mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
I210ToARGB conversion from 10 bit YUV to RGB
SSSE3 optimized 10 bit YUV conversion to ARGB in single step. Bug: libyuv:751 Test: I010ToARGB Change-Id: I234b2850e35992113ee6bd638732bafc7010a60d Reviewed-on: https://chromium-review.googlesource.com/848238 Commit-Queue: Frank Barchard <fbarchard@chromium.org> Reviewed-by: Frank Barchard <fbarchard@chromium.org> Reviewed-by: richard winterton <rrwinterton@gmail.com>
This commit is contained in:
parent
ac088b4be9
commit
a64658593e
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 1686
|
||||
Version: 1687
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -63,6 +63,32 @@ int I420ToABGR(const uint8* src_y,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert I010 to ARGB.
|
||||
LIBYUV_API
|
||||
int I010ToARGB(const uint16* src_y,
|
||||
int src_stride_y,
|
||||
const uint16* src_u,
|
||||
int src_stride_u,
|
||||
const uint16* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert H010 to ARGB.
|
||||
LIBYUV_API
|
||||
int H010ToARGB(const uint16* src_y,
|
||||
int src_stride_y,
|
||||
const uint16* src_u,
|
||||
int src_stride_u,
|
||||
const uint16* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert I422 to ARGB.
|
||||
LIBYUV_API
|
||||
int I422ToARGB(const uint8* src_y,
|
||||
|
||||
@ -265,6 +265,8 @@ extern "C" {
|
||||
#define HAS_ARGBTOAR30ROW_SSSE3
|
||||
#define HAS_CONVERT16TO8ROW_SSSE3
|
||||
#define HAS_CONVERT8TO16ROW_SSE2
|
||||
// I210 is for H010. 2 = 422. I for 601 vs H for 709.
|
||||
#define HAS_I210TOARGBROW_SSSE3
|
||||
#define HAS_MERGERGBROW_SSSE3
|
||||
#define HAS_SPLITRGBROW_SSSE3
|
||||
#endif
|
||||
@ -1735,9 +1737,9 @@ void I422ToARGBRow_C(const uint8* src_y,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422ToARGBRow_C(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
void I210ToARGBRow_C(const uint16* src_y,
|
||||
const uint16* src_u,
|
||||
const uint16* src_v,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
@ -1807,12 +1809,6 @@ void I422ToARGBRow_AVX2(const uint8* src_y,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422ToARGBRow_AVX2(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422ToRGBARow_AVX2(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
@ -1849,6 +1845,13 @@ void I422ToARGBRow_SSSE3(const uint8* src_y,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
|
||||
void I210ToARGBRow_SSSE3(const uint16* src_y,
|
||||
const uint16* src_u,
|
||||
const uint16* src_v,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422AlphaToARGBRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
@ -1863,12 +1866,6 @@ void I422AlphaToARGBRow_AVX2(const uint8* y_buf,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422ToARGBRow_SSSE3(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void NV12ToARGBRow_SSSE3(const uint8* src_y,
|
||||
const uint8* src_uv,
|
||||
uint8* dst_argb,
|
||||
@ -1999,6 +1996,12 @@ void I422ToARGBRow_Any_SSSE3(const uint8* src_y,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I210ToARGBRow_Any_SSSE3(const uint16* src_y,
|
||||
const uint16* src_u,
|
||||
const uint16* src_v,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422AlphaToARGBRow_Any_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 1686
|
||||
#define LIBYUV_VERSION 1687
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
@ -47,7 +47,7 @@ int ARGBCopy(const uint8* src_argb,
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert I422 to ARGB with matrix
|
||||
// Convert I420 to ARGB with matrix
|
||||
static int I420ToARGBMatrix(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
@ -573,18 +573,13 @@ static int H010ToARGBMatrix(const uint16* src_y,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int scale, // 16384 for 10 bits
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
int halfwidth = (width + 1) >> 1;
|
||||
void (*Convert16To8Row)(const uint16* src_y, uint8* dst_y, int scale,
|
||||
int width) = Convert16To8Row_C;
|
||||
void (*I422ToARGBRow)(const uint8* y_buf, const uint8* u_buf,
|
||||
const uint8* v_buf, uint8* rgb_buf,
|
||||
void (*I210ToARGBRow)(const uint16* y_buf, const uint16* u_buf,
|
||||
const uint16* v_buf, uint8* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants, int width) =
|
||||
I422ToARGBRow_C;
|
||||
|
||||
I210ToARGBRow_C;
|
||||
if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
@ -594,85 +589,23 @@ static int H010ToARGBMatrix(const uint16* src_y,
|
||||
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
|
||||
dst_stride_argb = -dst_stride_argb;
|
||||
}
|
||||
|
||||
#if defined(HAS_CONVERT16TO8ROW_SSSE3)
|
||||
#if defined(HAS_I210TOARGBROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
Convert16To8Row = Convert16To8Row_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
Convert16To8Row = Convert16To8Row_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_CONVERT16TO8ROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
Convert16To8Row = Convert16To8Row_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
Convert16To8Row = Convert16To8Row_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOARGBROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
|
||||
I210ToARGBRow = I210ToARGBRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I422ToARGBRow = I422ToARGBRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOARGBROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
I422ToARGBRow = I422ToARGBRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
I422ToARGBRow = I422ToARGBRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOARGBROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
I422ToARGBRow = I422ToARGBRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I422ToARGBRow = I422ToARGBRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOARGBROW_MSA)
|
||||
if (TestCpuFlag(kCpuHasMSA)) {
|
||||
I422ToARGBRow = I422ToARGBRow_Any_MSA;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I422ToARGBRow = I422ToARGBRow_MSA;
|
||||
I210ToARGBRow = I210ToARGBRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
{
|
||||
// Row buffers for 8 bit YUV.
|
||||
align_buffer_64(row_buf, width + halfwidth * 2);
|
||||
uint8* row_y = row_buf;
|
||||
uint8* row_u = row_buf + width;
|
||||
uint8* row_v = row_buf + width + halfwidth;
|
||||
|
||||
for (y = 0; y < height - 1; y += 2) {
|
||||
Convert16To8Row(src_y, row_y, scale, width);
|
||||
Convert16To8Row(src_u, row_u, scale, halfwidth);
|
||||
Convert16To8Row(src_v, row_v, scale, halfwidth);
|
||||
I422ToARGBRow(row_y, row_u, row_v, dst_argb, yuvconstants, width);
|
||||
|
||||
Convert16To8Row(src_y + src_stride_y, row_y, scale, width);
|
||||
I422ToARGBRow(row_y, row_u, row_v, dst_argb + dst_stride_argb,
|
||||
yuvconstants, width);
|
||||
dst_argb += dst_stride_argb * 2;
|
||||
src_y += src_stride_y * 2;
|
||||
for (y = 0; y < height; ++y) {
|
||||
I210ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width);
|
||||
dst_argb += dst_stride_argb;
|
||||
src_y += src_stride_y;
|
||||
if (y & 1) {
|
||||
src_u += src_stride_u;
|
||||
src_v += src_stride_v;
|
||||
}
|
||||
|
||||
if (height & 1) {
|
||||
Convert16To8Row(src_y, row_y, scale, width);
|
||||
Convert16To8Row(src_u, row_u, scale, halfwidth);
|
||||
Convert16To8Row(src_v, row_v, scale, halfwidth);
|
||||
I422ToARGBRow(row_y, row_u, row_v, dst_argb, yuvconstants, width);
|
||||
}
|
||||
free_aligned_buffer_64(row_buf);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@ -691,7 +624,7 @@ int H010ToARGB(const uint16* src_y,
|
||||
int height) {
|
||||
return H010ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
|
||||
src_stride_v, dst_argb, dst_stride_argb,
|
||||
&kYuvH709Constants, 16384, width, height);
|
||||
&kYuvH709Constants, width, height);
|
||||
}
|
||||
|
||||
// Convert I444 to ARGB with matrix
|
||||
|
||||
@ -194,6 +194,32 @@ ANY31C(I422ToRGB565Row_Any_MSA, I422ToRGB565Row_MSA, 1, 0, 2, 7)
|
||||
#endif
|
||||
#undef ANY31C
|
||||
|
||||
// 64 byte per row for future AVX2
|
||||
// Any 3 planes of 16 bit to 1 with yuvconstants
|
||||
// TODO(fbarchard): consider
|
||||
#define ANY31CT(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, T, SBPP, BPP, MASK) \
|
||||
void NAMEANY(const T* y_buf, const T* u_buf, const T* v_buf, uint8* dst_ptr, \
|
||||
const struct YuvConstants* yuvconstants, int width) { \
|
||||
SIMD_ALIGNED(T temp[16 * 3]); \
|
||||
SIMD_ALIGNED(uint8 out[64]); \
|
||||
memset(temp, 0, 16 * 3 * SBPP); /* for YUY2 and msan */ \
|
||||
int r = width & MASK; \
|
||||
int n = width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n); \
|
||||
} \
|
||||
memcpy(temp, y_buf + n, r * SBPP); \
|
||||
memcpy(temp + 16, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \
|
||||
memcpy(temp + 32, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \
|
||||
ANY_SIMD(temp, temp + 16, temp + 32, out, yuvconstants, MASK + 1); \
|
||||
memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, out, SS(r, DUVSHIFT) * BPP); \
|
||||
}
|
||||
|
||||
#ifdef HAS_I210TOARGBROW_SSSE3
|
||||
ANY31CT(I210ToARGBRow_Any_SSSE3, I210ToARGBRow_SSSE3, 1, 0, uint16, 2, 4, 7)
|
||||
#endif
|
||||
#undef ANY31CT
|
||||
|
||||
// Any 2 planes to 1.
|
||||
#define ANY21(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \
|
||||
void NAMEANY(const uint8* y_buf, const uint8* uv_buf, uint8* dst_ptr, \
|
||||
|
||||
@ -1295,6 +1295,51 @@ static __inline void YuvPixel(uint8 y,
|
||||
*r = Clamp((int32)(-(v * vr) + y1 + br) >> 6);
|
||||
}
|
||||
|
||||
// C reference code that mimics the YUV 10 bit assembly.
|
||||
static __inline void YuvPixel10(uint16 y,
|
||||
uint16 u,
|
||||
uint16 v,
|
||||
uint8* b,
|
||||
uint8* g,
|
||||
uint8* r,
|
||||
const struct YuvConstants* yuvconstants) {
|
||||
#if defined(__aarch64__)
|
||||
int ub = -yuvconstants->kUVToRB[0];
|
||||
int ug = yuvconstants->kUVToG[0];
|
||||
int vg = yuvconstants->kUVToG[1];
|
||||
int vr = -yuvconstants->kUVToRB[1];
|
||||
int bb = yuvconstants->kUVBiasBGR[0];
|
||||
int bg = yuvconstants->kUVBiasBGR[1];
|
||||
int br = yuvconstants->kUVBiasBGR[2];
|
||||
int yg = yuvconstants->kYToRgb[0] / 0x0101;
|
||||
#elif defined(__arm__)
|
||||
int ub = -yuvconstants->kUVToRB[0];
|
||||
int ug = yuvconstants->kUVToG[0];
|
||||
int vg = yuvconstants->kUVToG[4];
|
||||
int vr = -yuvconstants->kUVToRB[4];
|
||||
int bb = yuvconstants->kUVBiasBGR[0];
|
||||
int bg = yuvconstants->kUVBiasBGR[1];
|
||||
int br = yuvconstants->kUVBiasBGR[2];
|
||||
int yg = yuvconstants->kYToRgb[0] / 0x0101;
|
||||
#else
|
||||
int ub = yuvconstants->kUVToB[0];
|
||||
int ug = yuvconstants->kUVToG[0];
|
||||
int vg = yuvconstants->kUVToG[1];
|
||||
int vr = yuvconstants->kUVToR[1];
|
||||
int bb = yuvconstants->kUVBiasB[0];
|
||||
int bg = yuvconstants->kUVBiasG[0];
|
||||
int br = yuvconstants->kUVBiasR[0];
|
||||
int yg = yuvconstants->kYToRgb[0];
|
||||
#endif
|
||||
|
||||
uint32 y1 = (uint32)((y << 6) * yg) >> 16;
|
||||
u = clamp255(u >> 2);
|
||||
v = clamp255(v >> 2);
|
||||
*b = Clamp((int32)(-(u * ub) + y1 + bb) >> 6);
|
||||
*g = Clamp((int32)(-(u * ug + v * vg) + y1 + bg) >> 6);
|
||||
*r = Clamp((int32)(-(v * vr) + y1 + br) >> 6);
|
||||
}
|
||||
|
||||
// Y contribution to R,G,B. Scale and bias.
|
||||
#define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */
|
||||
#define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
|
||||
@ -1388,6 +1433,33 @@ void I422ToARGBRow_C(const uint8* src_y,
|
||||
}
|
||||
}
|
||||
|
||||
// 10 bit YUV to ARGB
|
||||
void I210ToARGBRow_C(const uint16* src_y,
|
||||
const uint16* src_u,
|
||||
const uint16* src_v,
|
||||
uint8* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
int x;
|
||||
for (x = 0; x < width - 1; x += 2) {
|
||||
YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
|
||||
rgb_buf + 2, yuvconstants);
|
||||
rgb_buf[3] = 255;
|
||||
YuvPixel10(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
|
||||
rgb_buf + 6, yuvconstants);
|
||||
rgb_buf[7] = 255;
|
||||
src_y += 2;
|
||||
src_u += 1;
|
||||
src_v += 1;
|
||||
rgb_buf += 8; // Advance 2 pixels.
|
||||
}
|
||||
if (width & 1) {
|
||||
YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
|
||||
rgb_buf + 2, yuvconstants);
|
||||
rgb_buf[3] = 255;
|
||||
}
|
||||
}
|
||||
|
||||
void I422AlphaToARGBRow_C(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
|
||||
@ -1623,6 +1623,20 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0,
|
||||
"punpcklbw %%xmm4,%%xmm4 \n" \
|
||||
"lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n"
|
||||
|
||||
// Read 4 UV from 422 10 bit, upsample to 8 UV
|
||||
// TODO(fbarchard): Consider shufb to replace pack/unpack
|
||||
#define READYUV422_10 \
|
||||
"movq " MEMACCESS([u_buf]) ",%%xmm0 \n" \
|
||||
MEMOPREG(movq, 0x00, [u_buf], [v_buf], 1, xmm1) \
|
||||
"lea " MEMLEA(0x8, [u_buf]) ",%[u_buf] \n" \
|
||||
"punpcklwd %%xmm1,%%xmm0 \n" \
|
||||
"psraw $0x2,%%xmm0 \n" \
|
||||
"packuswb %%xmm0,%%xmm0 \n" \
|
||||
"punpcklwd %%xmm0,%%xmm0 \n" \
|
||||
"movdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \
|
||||
"psllw $0x6,%%xmm4 \n" \
|
||||
"lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n"
|
||||
|
||||
// Read 4 UV from 422, upsample to 8 UV. With 8 Alpha.
|
||||
#define READYUVA422 \
|
||||
"movd " MEMACCESS([u_buf]) ",%%xmm0 \n" \
|
||||
@ -1862,6 +1876,36 @@ void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf,
|
||||
);
|
||||
}
|
||||
|
||||
// 10 bit YUV to ARGB
|
||||
void OMITFP I210ToARGBRow_SSSE3(const uint16* y_buf,
|
||||
const uint16* u_buf,
|
||||
const uint16* v_buf,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
YUVTORGB_SETUP(yuvconstants)
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
READYUV422_10
|
||||
YUVTORGB(yuvconstants)
|
||||
STOREARGB
|
||||
"sub $0x8,%[width] \n"
|
||||
"jg 1b \n"
|
||||
: [y_buf]"+r"(y_buf), // %[y_buf]
|
||||
[u_buf]"+r"(u_buf), // %[u_buf]
|
||||
[v_buf]"+r"(v_buf), // %[v_buf]
|
||||
[dst_argb]"+r"(dst_argb), // %[dst_argb]
|
||||
[width]"+rm"(width) // %[width]
|
||||
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
|
||||
: "memory", "cc", NACL_R14 YUVTORGB_REGS
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
|
||||
);
|
||||
}
|
||||
|
||||
#ifdef HAS_I422ALPHATOARGBROW_SSSE3
|
||||
void OMITFP I422AlphaToARGBRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user