diff --git a/README.chromium b/README.chromium index 8845bc851..89407babb 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1686 +Version: 1687 License: BSD License File: LICENSE diff --git a/include/libyuv/convert_argb.h b/include/libyuv/convert_argb.h index 5998649df..49838ce73 100644 --- a/include/libyuv/convert_argb.h +++ b/include/libyuv/convert_argb.h @@ -63,6 +63,32 @@ int I420ToABGR(const uint8* src_y, int width, int height); +// Convert I010 to ARGB. +LIBYUV_API +int I010ToARGB(const uint16* src_y, + int src_stride_y, + const uint16* src_u, + int src_stride_u, + const uint16* src_v, + int src_stride_v, + uint8* dst_argb, + int dst_stride_argb, + int width, + int height); + +// Convert H010 to ARGB. +LIBYUV_API +int H010ToARGB(const uint16* src_y, + int src_stride_y, + const uint16* src_u, + int src_stride_u, + const uint16* src_v, + int src_stride_v, + uint8* dst_argb, + int dst_stride_argb, + int width, + int height); + // Convert I422 to ARGB. LIBYUV_API int I422ToARGB(const uint8* src_y, diff --git a/include/libyuv/row.h b/include/libyuv/row.h index 9131111ad..992d2ceb5 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -265,6 +265,8 @@ extern "C" { #define HAS_ARGBTOAR30ROW_SSSE3 #define HAS_CONVERT16TO8ROW_SSSE3 #define HAS_CONVERT8TO16ROW_SSE2 +// I210 is for H010. 2 = 422. I for 601 vs H for 709. +#define HAS_I210TOARGBROW_SSSE3 #define HAS_MERGERGBROW_SSSE3 #define HAS_SPLITRGBROW_SSSE3 #endif @@ -1735,9 +1737,9 @@ void I422ToARGBRow_C(const uint8* src_y, uint8* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I422ToARGBRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, +void I210ToARGBRow_C(const uint16* src_y, + const uint16* src_u, + const uint16* src_v, uint8* dst_argb, const struct YuvConstants* yuvconstants, int width); @@ -1807,12 +1809,6 @@ void I422ToARGBRow_AVX2(const uint8* src_y, uint8* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I422ToARGBRow_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); void I422ToRGBARow_AVX2(const uint8* src_y, const uint8* src_u, const uint8* src_v, @@ -1849,6 +1845,13 @@ void I422ToARGBRow_SSSE3(const uint8* src_y, uint8* dst_argb, const struct YuvConstants* yuvconstants, int width); + +void I210ToARGBRow_SSSE3(const uint16* src_y, + const uint16* src_u, + const uint16* src_v, + uint8* dst_argb, + const struct YuvConstants* yuvconstants, + int width); void I422AlphaToARGBRow_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, @@ -1863,12 +1866,6 @@ void I422AlphaToARGBRow_AVX2(const uint8* y_buf, uint8* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I422ToARGBRow_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); void NV12ToARGBRow_SSSE3(const uint8* src_y, const uint8* src_uv, uint8* dst_argb, @@ -1999,6 +1996,12 @@ void I422ToARGBRow_Any_SSSE3(const uint8* src_y, uint8* dst_argb, const struct YuvConstants* yuvconstants, int width); +void I210ToARGBRow_Any_SSSE3(const uint16* src_y, + const uint16* src_u, + const uint16* src_v, + uint8* dst_argb, + const struct YuvConstants* yuvconstants, + int width); void I422AlphaToARGBRow_Any_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 358152397..5d0639528 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1686 +#define LIBYUV_VERSION 1687 #endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/source/convert_argb.cc b/source/convert_argb.cc index 5858ec25e..b166ceed0 100644 --- a/source/convert_argb.cc +++ b/source/convert_argb.cc @@ -47,7 +47,7 @@ int ARGBCopy(const uint8* src_argb, return 0; } -// Convert I422 to ARGB with matrix +// Convert I420 to ARGB with matrix static int I420ToARGBMatrix(const uint8* src_y, int src_stride_y, const uint8* src_u, @@ -573,18 +573,13 @@ static int H010ToARGBMatrix(const uint16* src_y, uint8* dst_argb, int dst_stride_argb, const struct YuvConstants* yuvconstants, - int scale, // 16384 for 10 bits int width, int height) { int y; - int halfwidth = (width + 1) >> 1; - void (*Convert16To8Row)(const uint16* src_y, uint8* dst_y, int scale, - int width) = Convert16To8Row_C; - void (*I422ToARGBRow)(const uint8* y_buf, const uint8* u_buf, - const uint8* v_buf, uint8* rgb_buf, + void (*I210ToARGBRow)(const uint16* y_buf, const uint16* u_buf, + const uint16* v_buf, uint8* rgb_buf, const struct YuvConstants* yuvconstants, int width) = - I422ToARGBRow_C; - + I210ToARGBRow_C; if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -594,85 +589,23 @@ static int H010ToARGBMatrix(const uint16* src_y, dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_stride_argb = -dst_stride_argb; } - -#if defined(HAS_CONVERT16TO8ROW_SSSE3) +#if defined(HAS_I210TOARGBROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - Convert16To8Row = Convert16To8Row_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - Convert16To8Row = Convert16To8Row_SSSE3; - } - } -#endif -#if defined(HAS_CONVERT16TO8ROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - Convert16To8Row = Convert16To8Row_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - Convert16To8Row = Convert16To8Row_AVX2; - } - } -#endif -#if defined(HAS_I422TOARGBROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - I422ToARGBRow = I422ToARGBRow_Any_SSSE3; + I210ToARGBRow = I210ToARGBRow_Any_SSSE3; if (IS_ALIGNED(width, 8)) { - I422ToARGBRow = I422ToARGBRow_SSSE3; - } - } -#endif -#if defined(HAS_I422TOARGBROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - I422ToARGBRow = I422ToARGBRow_Any_AVX2; - if (IS_ALIGNED(width, 16)) { - I422ToARGBRow = I422ToARGBRow_AVX2; - } - } -#endif -#if defined(HAS_I422TOARGBROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - I422ToARGBRow = I422ToARGBRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - I422ToARGBRow = I422ToARGBRow_NEON; - } - } -#endif -#if defined(HAS_I422TOARGBROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - I422ToARGBRow = I422ToARGBRow_Any_MSA; - if (IS_ALIGNED(width, 8)) { - I422ToARGBRow = I422ToARGBRow_MSA; + I210ToARGBRow = I210ToARGBRow_SSSE3; } } #endif - { - // Row buffers for 8 bit YUV. - align_buffer_64(row_buf, width + halfwidth * 2); - uint8* row_y = row_buf; - uint8* row_u = row_buf + width; - uint8* row_v = row_buf + width + halfwidth; - - for (y = 0; y < height - 1; y += 2) { - Convert16To8Row(src_y, row_y, scale, width); - Convert16To8Row(src_u, row_u, scale, halfwidth); - Convert16To8Row(src_v, row_v, scale, halfwidth); - I422ToARGBRow(row_y, row_u, row_v, dst_argb, yuvconstants, width); - - Convert16To8Row(src_y + src_stride_y, row_y, scale, width); - I422ToARGBRow(row_y, row_u, row_v, dst_argb + dst_stride_argb, - yuvconstants, width); - dst_argb += dst_stride_argb * 2; - src_y += src_stride_y * 2; + for (y = 0; y < height; ++y) { + I210ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width); + dst_argb += dst_stride_argb; + src_y += src_stride_y; + if (y & 1) { src_u += src_stride_u; src_v += src_stride_v; } - - if (height & 1) { - Convert16To8Row(src_y, row_y, scale, width); - Convert16To8Row(src_u, row_u, scale, halfwidth); - Convert16To8Row(src_v, row_v, scale, halfwidth); - I422ToARGBRow(row_y, row_u, row_v, dst_argb, yuvconstants, width); - } - free_aligned_buffer_64(row_buf); } return 0; } @@ -691,7 +624,7 @@ int H010ToARGB(const uint16* src_y, int height) { return H010ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, dst_argb, dst_stride_argb, - &kYuvH709Constants, 16384, width, height); + &kYuvH709Constants, width, height); } // Convert I444 to ARGB with matrix diff --git a/source/row_any.cc b/source/row_any.cc index 4dda9099b..721ff26c1 100644 --- a/source/row_any.cc +++ b/source/row_any.cc @@ -194,6 +194,32 @@ ANY31C(I422ToRGB565Row_Any_MSA, I422ToRGB565Row_MSA, 1, 0, 2, 7) #endif #undef ANY31C +// 64 byte per row for future AVX2 +// Any 3 planes of 16 bit to 1 with yuvconstants +// TODO(fbarchard): consider +#define ANY31CT(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, T, SBPP, BPP, MASK) \ + void NAMEANY(const T* y_buf, const T* u_buf, const T* v_buf, uint8* dst_ptr, \ + const struct YuvConstants* yuvconstants, int width) { \ + SIMD_ALIGNED(T temp[16 * 3]); \ + SIMD_ALIGNED(uint8 out[64]); \ + memset(temp, 0, 16 * 3 * SBPP); /* for YUY2 and msan */ \ + int r = width & MASK; \ + int n = width & ~MASK; \ + if (n > 0) { \ + ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n); \ + } \ + memcpy(temp, y_buf + n, r * SBPP); \ + memcpy(temp + 16, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \ + memcpy(temp + 32, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \ + ANY_SIMD(temp, temp + 16, temp + 32, out, yuvconstants, MASK + 1); \ + memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, out, SS(r, DUVSHIFT) * BPP); \ + } + +#ifdef HAS_I210TOARGBROW_SSSE3 +ANY31CT(I210ToARGBRow_Any_SSSE3, I210ToARGBRow_SSSE3, 1, 0, uint16, 2, 4, 7) +#endif +#undef ANY31CT + // Any 2 planes to 1. #define ANY21(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \ void NAMEANY(const uint8* y_buf, const uint8* uv_buf, uint8* dst_ptr, \ diff --git a/source/row_common.cc b/source/row_common.cc index e5fef5d92..a0ca90b8a 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -1295,6 +1295,51 @@ static __inline void YuvPixel(uint8 y, *r = Clamp((int32)(-(v * vr) + y1 + br) >> 6); } +// C reference code that mimics the YUV 10 bit assembly. +static __inline void YuvPixel10(uint16 y, + uint16 u, + uint16 v, + uint8* b, + uint8* g, + uint8* r, + const struct YuvConstants* yuvconstants) { +#if defined(__aarch64__) + int ub = -yuvconstants->kUVToRB[0]; + int ug = yuvconstants->kUVToG[0]; + int vg = yuvconstants->kUVToG[1]; + int vr = -yuvconstants->kUVToRB[1]; + int bb = yuvconstants->kUVBiasBGR[0]; + int bg = yuvconstants->kUVBiasBGR[1]; + int br = yuvconstants->kUVBiasBGR[2]; + int yg = yuvconstants->kYToRgb[0] / 0x0101; +#elif defined(__arm__) + int ub = -yuvconstants->kUVToRB[0]; + int ug = yuvconstants->kUVToG[0]; + int vg = yuvconstants->kUVToG[4]; + int vr = -yuvconstants->kUVToRB[4]; + int bb = yuvconstants->kUVBiasBGR[0]; + int bg = yuvconstants->kUVBiasBGR[1]; + int br = yuvconstants->kUVBiasBGR[2]; + int yg = yuvconstants->kYToRgb[0] / 0x0101; +#else + int ub = yuvconstants->kUVToB[0]; + int ug = yuvconstants->kUVToG[0]; + int vg = yuvconstants->kUVToG[1]; + int vr = yuvconstants->kUVToR[1]; + int bb = yuvconstants->kUVBiasB[0]; + int bg = yuvconstants->kUVBiasG[0]; + int br = yuvconstants->kUVBiasR[0]; + int yg = yuvconstants->kYToRgb[0]; +#endif + + uint32 y1 = (uint32)((y << 6) * yg) >> 16; + u = clamp255(u >> 2); + v = clamp255(v >> 2); + *b = Clamp((int32)(-(u * ub) + y1 + bb) >> 6); + *g = Clamp((int32)(-(u * ug + v * vg) + y1 + bg) >> 6); + *r = Clamp((int32)(-(v * vr) + y1 + br) >> 6); +} + // Y contribution to R,G,B. Scale and bias. #define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */ #define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */ @@ -1388,6 +1433,33 @@ void I422ToARGBRow_C(const uint8* src_y, } } +// 10 bit YUV to ARGB +void I210ToARGBRow_C(const uint16* src_y, + const uint16* src_u, + const uint16* src_v, + uint8* rgb_buf, + const struct YuvConstants* yuvconstants, + int width) { + int x; + for (x = 0; x < width - 1; x += 2) { + YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, + rgb_buf + 2, yuvconstants); + rgb_buf[3] = 255; + YuvPixel10(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5, + rgb_buf + 6, yuvconstants); + rgb_buf[7] = 255; + src_y += 2; + src_u += 1; + src_v += 1; + rgb_buf += 8; // Advance 2 pixels. + } + if (width & 1) { + YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, + rgb_buf + 2, yuvconstants); + rgb_buf[3] = 255; + } +} + void I422AlphaToARGBRow_C(const uint8* src_y, const uint8* src_u, const uint8* src_v, diff --git a/source/row_gcc.cc b/source/row_gcc.cc index 59b4b726d..d4f673af8 100644 --- a/source/row_gcc.cc +++ b/source/row_gcc.cc @@ -1623,6 +1623,20 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0, "punpcklbw %%xmm4,%%xmm4 \n" \ "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" +// Read 4 UV from 422 10 bit, upsample to 8 UV +// TODO(fbarchard): Consider shufb to replace pack/unpack +#define READYUV422_10 \ + "movq " MEMACCESS([u_buf]) ",%%xmm0 \n" \ + MEMOPREG(movq, 0x00, [u_buf], [v_buf], 1, xmm1) \ + "lea " MEMLEA(0x8, [u_buf]) ",%[u_buf] \n" \ + "punpcklwd %%xmm1,%%xmm0 \n" \ + "psraw $0x2,%%xmm0 \n" \ + "packuswb %%xmm0,%%xmm0 \n" \ + "punpcklwd %%xmm0,%%xmm0 \n" \ + "movdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \ + "psllw $0x6,%%xmm4 \n" \ + "lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n" + // Read 4 UV from 422, upsample to 8 UV. With 8 Alpha. #define READYUVA422 \ "movd " MEMACCESS([u_buf]) ",%%xmm0 \n" \ @@ -1862,6 +1876,36 @@ void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf, ); } +// 10 bit YUV to ARGB +void OMITFP I210ToARGBRow_SSSE3(const uint16* y_buf, + const uint16* u_buf, + const uint16* v_buf, + uint8* dst_argb, + const struct YuvConstants* yuvconstants, + int width) { + asm volatile ( + YUVTORGB_SETUP(yuvconstants) + "sub %[u_buf],%[v_buf] \n" + "pcmpeqb %%xmm5,%%xmm5 \n" + + LABELALIGN + "1: \n" + READYUV422_10 + YUVTORGB(yuvconstants) + STOREARGB + "sub $0x8,%[width] \n" + "jg 1b \n" + : [y_buf]"+r"(y_buf), // %[y_buf] + [u_buf]"+r"(u_buf), // %[u_buf] + [v_buf]"+r"(v_buf), // %[v_buf] + [dst_argb]"+r"(dst_argb), // %[dst_argb] + [width]"+rm"(width) // %[width] + : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] + : "memory", "cc", NACL_R14 YUVTORGB_REGS + "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" + ); +} + #ifdef HAS_I422ALPHATOARGBROW_SSSE3 void OMITFP I422AlphaToARGBRow_SSSE3(const uint8* y_buf, const uint8* u_buf,