diff --git a/README.chromium b/README.chromium index 9d669256e..cc1d8e9db 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1415 +Version: 1416 License: BSD License File: LICENSE diff --git a/include/libyuv/convert_from_argb.h b/include/libyuv/convert_from_argb.h index e36b4dfa2..1df53200d 100644 --- a/include/libyuv/convert_from_argb.h +++ b/include/libyuv/convert_from_argb.h @@ -143,6 +143,12 @@ int ARGBToI400(const uint8* src_argb, int src_stride_argb, uint8* dst_y, int dst_stride_y, int width, int height); +// Convert ARGB to G. (Reverse of J400toARGB, which replicates G back to ARGB) +LIBYUV_API +int ARGBToG(const uint8* src_argb, int src_stride_argb, + uint8* dst_g, int dst_stride_g, + int width, int height); + // Convert ARGB To NV12. LIBYUV_API int ARGBToNV12(const uint8* src_argb, int src_stride_argb, diff --git a/include/libyuv/row.h b/include/libyuv/row.h index 40577ba4d..cccb47827 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -70,7 +70,6 @@ extern "C" { #define HAS_ARGBSHUFFLEROW_SSSE3 #define HAS_ARGBTOARGB1555ROW_SSE2 #define HAS_ARGBTOARGB4444ROW_SSE2 -#define HAS_ARGBTOBAYERGGROW_SSE2 #define HAS_ARGBTORAWROW_SSSE3 #define HAS_ARGBTORGB24ROW_SSSE3 #define HAS_ARGBTORGB565ROW_SSE2 @@ -271,7 +270,6 @@ extern "C" { #define HAS_ARGB4444TOYROW_NEON #define HAS_ARGBTOARGB1555ROW_NEON #define HAS_ARGBTOARGB4444ROW_NEON -#define HAS_ARGBTOBAYERGGROW_NEON #define HAS_ARGBTORAWROW_NEON #define HAS_ARGBTORGB24ROW_NEON #define HAS_ARGBTORGB565ROW_NEON @@ -1632,17 +1630,6 @@ void UYVYToUVRow_Any_NEON(const uint8* src_uyvy, int stride_uyvy, void UYVYToUV422Row_Any_NEON(const uint8* src_uyvy, uint8* dst_u, uint8* dst_v, int pix); -void ARGBToBayerGGRow_C(const uint8* src_argb, uint8* dst_bayer, - uint32 /* selector */, int pix); -void ARGBToBayerGGRow_SSE2(const uint8* src_argb, uint8* dst_bayer, - uint32 /* selector */, int pix); -void ARGBToBayerGGRow_NEON(const uint8* src_argb, uint8* dst_bayer, - uint32 /* selector */, int pix); -void ARGBToBayerGGRow_Any_SSE2(const uint8* src_argb, uint8* dst_bayer, - uint32 /* selector */, int pix); -void ARGBToBayerGGRow_Any_NEON(const uint8* src_argb, uint8* dst_bayer, - uint32 /* selector */, int pix); - void I422ToYUY2Row_C(const uint8* src_y, const uint8* src_u, const uint8* src_v, diff --git a/include/libyuv/version.h b/include/libyuv/version.h index f1a7df250..dcdef33b5 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1415 +#define LIBYUV_VERSION 1416 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/planar_functions.cc b/source/planar_functions.cc index 08b9a215b..2bac2932e 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -1976,8 +1976,8 @@ static int ARGBSobelize(const uint8* src_argb, int src_stride_argb, const uint8* src_sobely, uint8* dst, int width)) { int y; - void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer, - uint32 selector, int pix) = ARGBToBayerGGRow_C; + void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_g, int pix) = + ARGBToYJRow_C; void (*SobelYRow)(const uint8* src_y0, const uint8* src_y1, uint8* dst_sobely, int width) = SobelYRow_C; void (*SobelXRow)(const uint8* src_y0, const uint8* src_y1, @@ -1993,31 +1993,32 @@ static int ARGBSobelize(const uint8* src_argb, int src_stride_argb, src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } - // ARGBToBayer used to select G channel from ARGB. -#if defined(HAS_ARGBTOBAYERGGROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2)) { - ARGBToBayerRow = ARGBToBayerGGRow_Any_SSE2; - if (IS_ALIGNED(width, 8)) { - ARGBToBayerRow = ARGBToBayerGGRow_SSE2; - } - } -#endif -#if defined(HAS_ARGBTOBAYERROW_SSSE3) + +#if defined(HAS_ARGBTOYJROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToBayerRow = ARGBToBayerRow_Any_SSSE3; - if (IS_ALIGNED(width, 8)) { - ARGBToBayerRow = ARGBToBayerRow_SSSE3; + ARGBToYJRow = ARGBToYJRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToYJRow = ARGBToYJRow_SSSE3; } } #endif -#if defined(HAS_ARGBTOBAYERGGROW_NEON) +#if defined(HAS_ARGBTOYJROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToYJRow = ARGBToYJRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToYJRow = ARGBToYJRow_AVX2; + } + } +#endif +#if defined(HAS_ARGBTOYJROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { - ARGBToBayerRow = ARGBToBayerGGRow_Any_NEON; + ARGBToYJRow = ARGBToYJRow_Any_NEON; if (IS_ALIGNED(width, 8)) { - ARGBToBayerRow = ARGBToBayerGGRow_NEON; + ARGBToYJRow = ARGBToYJRow_NEON; } } #endif + #if defined(HAS_SOBELYROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { SobelYRow = SobelYRow_SSE2; @@ -2050,20 +2051,20 @@ static int ARGBSobelize(const uint8* src_argb, int src_stride_argb, uint8* row_y0 = row_y + kEdge; uint8* row_y1 = row_y0 + kRowSize; uint8* row_y2 = row_y1 + kRowSize; - ARGBToBayerRow(src_argb, row_y0, 0x0d090501, width); + ARGBToYJRow(src_argb, row_y0, width); row_y0[-1] = row_y0[0]; memset(row_y0 + width, row_y0[width - 1], 16); // Extrude 16 for valgrind. - ARGBToBayerRow(src_argb, row_y1, 0x0d090501, width); + ARGBToYJRow(src_argb, row_y1, width); row_y1[-1] = row_y1[0]; memset(row_y1 + width, row_y1[width - 1], 16); memset(row_y2 + width, 0, 16); for (y = 0; y < height; ++y) { - // Convert next row of ARGB to Y. + // Convert next row of ARGB to G. if (y < (height - 1)) { src_argb += src_stride_argb; } - ARGBToBayerRow(src_argb, row_y2, 0x0d090501, width); + ARGBToYJRow(src_argb, row_y2, width); row_y2[-1] = row_y2[0]; row_y2[width] = row_y2[width - 1]; diff --git a/source/row_any.cc b/source/row_any.cc index c0011cd56..b338ca4c1 100644 --- a/source/row_any.cc +++ b/source/row_any.cc @@ -278,27 +278,6 @@ RGBDANY(ARGBToRGB565DitherRow_Any_NEON, ARGBToRGB565DitherRow_NEON, #endif #undef RGBDANY -// ARGB to Bayer does multiple of 4 pixels, SSSE3 aligned src, unaligned dst. -#define BAYERANY(NAMEANY, ARGBTORGB_SIMD, ARGBTORGB_C, SBPP, BPP, MASK) \ - void NAMEANY(const uint8* src, uint8* dst, uint32 selector, int width) { \ - int n = width & ~MASK; \ - if (n > 0) { \ - ARGBTORGB_SIMD(src, dst, selector, n); \ - } \ - ARGBTORGB_C(src + n * SBPP, dst + n * BPP, selector, width & MASK); \ - } - -#if defined(HAS_ARGBTOBAYERGGROW_SSE2) -BAYERANY(ARGBToBayerGGRow_Any_SSE2, ARGBToBayerGGRow_SSE2, ARGBToBayerGGRow_C, - 4, 1, 7) -#endif -#if defined(HAS_ARGBTOBAYERGGROW_NEON) -BAYERANY(ARGBToBayerGGRow_Any_NEON, ARGBToBayerGGRow_NEON, ARGBToBayerGGRow_C, - 4, 1, 7) -#endif - -#undef BAYERANY - #define YANY(NAMEANY, ARGBTOY_SIMD, ARGBTOY_C, SBPP, BPP, MASK) \ void NAMEANY(const uint8* src_argb, uint8* dst_y, int width) { \ int n = width & ~MASK; \ diff --git a/source/row_common.cc b/source/row_common.cc index c1266d449..0e57f4c3f 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -1011,17 +1011,17 @@ void J400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width) { #define VR -102 /* round(-1.596 * 64) */ // Bias values to subtract 16 from Y and 128 from U and V. -#define BB (UB * 128 + YGB) +#define BB (UB * 128 + YGB) #define BG (UG * 128 + VG * 128 + YGB) -#define BR (VR * 128 + YGB) +#define BR (VR * 128 + YGB) // C reference code that mimics the YUV assembly. static __inline void YuvPixel(uint8 y, uint8 u, uint8 v, uint8* b, uint8* g, uint8* r) { uint32 y1 = (uint32)(y * 0x0101 * YG) >> 16; - *b = Clamp((int32)(-( u * UB) + y1 + BB) >> 6); + *b = Clamp((int32)(-(u * UB) + y1 + BB) >> 6); *g = Clamp((int32)(-(v * VG + u * UG) + y1 + BG) >> 6); - *r = Clamp((int32)(-(v * VR ) + y1 + BR) >> 6); + *r = Clamp((int32)(-(v * VR)+ y1 + BR) >> 6); } // C reference code that mimics the YUV assembly. @@ -1059,17 +1059,17 @@ static __inline void YPixel(uint8 y, uint8* b, uint8* g, uint8* r) { #define VRJ -90 /* round(-1.40200 * 64) */ // Bias values to subtract 16 from Y and 128 from U and V. -#define BBJ (UBJ * 128 + YGBJ) +#define BBJ (UBJ * 128 + YGBJ) #define BGJ (UGJ * 128 + VGJ * 128 + YGBJ) -#define BRJ (VRJ * 128 + YGBJ) +#define BRJ (VRJ * 128 + YGBJ) // C reference code that mimics the YUV assembly. static __inline void YuvJPixel(uint8 y, uint8 u, uint8 v, uint8* b, uint8* g, uint8* r) { uint32 y1 = (uint32)(y * 0x0101 * YGJ) >> 16; - *b = Clamp((int32)(-( u * UBJ) + y1 + BBJ) >> 6); + *b = Clamp((int32)(-(u * UBJ) + y1 + BBJ) >> 6); *g = Clamp((int32)(-(v * VGJ + u * UGJ) + y1 + BGJ) >> 6); - *r = Clamp((int32)(-(v * VRJ ) + y1 + BRJ) >> 6); + *r = Clamp((int32)(-(v * VRJ) + y1 + BRJ) >> 6); } #undef YGJ @@ -2086,22 +2086,6 @@ void InterpolateRow_16_C(uint16* dst_ptr, const uint16* src_ptr, } } -// Select G channel from ARGB. e.g. GGGGGGGG -void ARGBToBayerGGRow_C(const uint8* src_argb, - uint8* dst_bayer, uint32 selector, int pix) { - // Copy a row of G. - int x; - for (x = 0; x < pix - 1; x += 2) { - dst_bayer[0] = src_argb[1]; - dst_bayer[1] = src_argb[5]; - src_argb += 8; - dst_bayer += 2; - } - if (pix & 1) { - dst_bayer[0] = src_argb[1]; - } -} - // Use first 4 shuffler values to reorder ARGB channels. void ARGBShuffleRow_C(const uint8* src_argb, uint8* dst_argb, const uint8* shuffler, int pix) { diff --git a/source/row_neon.cc b/source/row_neon.cc index 9240cf532..1a72eb903 100644 --- a/source/row_neon.cc +++ b/source/row_neon.cc @@ -1251,25 +1251,6 @@ void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy, ); } -// Select G channels from ARGB. e.g. GGGGGGGG -void ARGBToBayerGGRow_NEON(const uint8* src_argb, uint8* dst_bayer, - uint32 /*selector*/, int pix) { - asm volatile ( - "1: \n" - MEMACCESS(0) - "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load row 8 pixels. - "subs %2, %2, #8 \n" // 8 processed per loop - MEMACCESS(1) - "vst1.8 {d1}, [%1]! \n" // store 8 G's. - "bgt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_bayer), // %1 - "+r"(pix) // %2 - : - : "cc", "memory", "q0", "q1" // Clobber List - ); -} - // For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA. void ARGBShuffleRow_NEON(const uint8* src_argb, uint8* dst_argb, const uint8* shuffler, int pix) { diff --git a/source/row_neon64.cc b/source/row_neon64.cc index 3498b3ce0..5d015454b 100644 --- a/source/row_neon64.cc +++ b/source/row_neon64.cc @@ -1259,27 +1259,6 @@ void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy, } #endif // HAS_UYVYTOUVROW_NEON -// Select G channels from ARGB. e.g. GGGGGGGG -#ifdef HAS_ARGBTOBAYERGGROW_NEON -void ARGBToBayerGGRow_NEON(const uint8* src_argb, uint8* dst_bayer, - uint32 /*selector*/, int pix) { - asm volatile ( - "1: \n" - MEMACCESS(0) - "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load row 8 pixels - "subs %w2, %w2, #8 \n" // 8 processed per loop - MEMACCESS(1) - "st1 {v1.8b}, [%1], #8 \n" // store 8 G's. - "b.gt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_bayer), // %1 - "+r"(pix) // %2 - : - : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List - ); -} -#endif // HAS_ARGBTOBAYERGGROW_NEON - // For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA. #ifdef HAS_ARGBSHUFFLEROW_NEON void ARGBShuffleRow_NEON(const uint8* src_argb, uint8* dst_argb, diff --git a/source/row_posix.cc b/source/row_posix.cc index baf94a917..f9f167539 100644 --- a/source/row_posix.cc +++ b/source/row_posix.cc @@ -1599,8 +1599,8 @@ YuvConstants SIMD_ALIGNED(kYuvJConstants) = { "punpcklwd %%xmm2,%%xmm0 \n" \ "punpckhwd %%xmm2,%%xmm1 \n" \ "movdqu %%xmm0," MEMACCESS([dst_argb]) " \n" \ - "movdqu %%xmm1," MEMACCESS2(0x10,[dst_argb]) " \n" \ - "lea " MEMLEA(0x20,[dst_argb]) ",%[dst_argb] \n" + "movdqu %%xmm1," MEMACCESS2(0x10, [dst_argb]) " \n" \ + "lea " MEMLEA(0x20, [dst_argb]) ", %[dst_argb] \n" // Store 8 BGRA values. Assumes XMM5 is zero. #define STOREBGRA \ @@ -1611,8 +1611,8 @@ YuvConstants SIMD_ALIGNED(kYuvJConstants) = { "punpcklwd %%xmm1,%%xmm5 \n" \ "punpckhwd %%xmm1,%%xmm0 \n" \ "movdqu %%xmm5," MEMACCESS([dst_bgra]) " \n" \ - "movdqu %%xmm0," MEMACCESS2(0x10,[dst_bgra]) " \n" \ - "lea " MEMLEA(0x20,[dst_bgra]) ",%[dst_bgra] \n" + "movdqu %%xmm0," MEMACCESS2(0x10, [dst_bgra]) " \n" \ + "lea " MEMLEA(0x20, [dst_bgra]) ", %[dst_bgra] \n" // Store 8 ABGR values. Assumes XMM5 is zero. #define STOREABGR \ @@ -1622,8 +1622,8 @@ YuvConstants SIMD_ALIGNED(kYuvJConstants) = { "punpcklwd %%xmm0,%%xmm2 \n" \ "punpckhwd %%xmm0,%%xmm1 \n" \ "movdqu %%xmm2," MEMACCESS([dst_abgr]) " \n" \ - "movdqu %%xmm1," MEMACCESS2(0x10,[dst_abgr]) " \n" \ - "lea " MEMLEA(0x20,[dst_abgr]) ",%[dst_abgr] \n" + "movdqu %%xmm1," MEMACCESS2(0x10, [dst_abgr]) " \n" \ + "lea " MEMLEA(0x20, [dst_abgr]) ", %[dst_abgr] \n" // Store 8 RGBA values. Assumes XMM5 is zero. #define STORERGBA \ @@ -1634,8 +1634,8 @@ YuvConstants SIMD_ALIGNED(kYuvJConstants) = { "punpcklwd %%xmm1,%%xmm5 \n" \ "punpckhwd %%xmm1,%%xmm0 \n" \ "movdqu %%xmm5," MEMACCESS([dst_rgba]) " \n" \ - "movdqu %%xmm0," MEMACCESS2(0x10,[dst_rgba]) " \n" \ - "lea " MEMLEA(0x20,[dst_rgba]) ",%[dst_rgba] \n" + "movdqu %%xmm0," MEMACCESS2(0x10, [dst_rgba]) " \n" \ + "lea " MEMLEA(0x20, [dst_rgba]) ",%[dst_rgba] \n" void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf, const uint8* u_buf, @@ -5030,37 +5030,6 @@ void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr, } #endif // HAS_INTERPOLATEROW_SSE2 -#ifdef HAS_ARGBTOBAYERGGROW_SSE2 -void ARGBToBayerGGRow_SSE2(const uint8* src_argb, uint8* dst_bayer, - uint32 selector, int pix) { - asm volatile ( - "pcmpeqb %%xmm5,%%xmm5 \n" - "psrld $0x18,%%xmm5 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "psrld $0x8,%%xmm0 \n" - "psrld $0x8,%%xmm1 \n" - "pand %%xmm5,%%xmm0 \n" - "pand %%xmm5,%%xmm1 \n" - "packssdw %%xmm1,%%xmm0 \n" - "packuswb %%xmm1,%%xmm0 \n" - "movq %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x8,1) ",%1 \n" - "sub $0x8,%2 \n" - "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_bayer), // %1 - "+r"(pix) // %2 - : - : "memory", "cc" - , "xmm0", "xmm1", "xmm5" - ); -} -#endif // HAS_ARGBTOBAYERGGROW_SSE2 - #ifdef HAS_ARGBSHUFFLEROW_SSSE3 // For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA. void ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb, diff --git a/source/row_win.cc b/source/row_win.cc index 246e7d9e1..a62a13254 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -5875,36 +5875,6 @@ void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr, } #endif // HAS_INTERPOLATEROW_SSE2 -// Specialized ARGB to Bayer that just isolates G channel. -__declspec(naked) -void ARGBToBayerGGRow_SSE2(const uint8* src_argb, uint8* dst_bayer, - uint32 selector, int pix) { - __asm { - mov eax, [esp + 4] // src_argb - mov edx, [esp + 8] // dst_bayer - // selector - mov ecx, [esp + 16] // pix - pcmpeqb xmm5, xmm5 // generate mask 0x000000ff - psrld xmm5, 24 - - wloop: - movdqu xmm0, [eax] - movdqu xmm1, [eax + 16] - lea eax, [eax + 32] - psrld xmm0, 8 // Move green to bottom. - psrld xmm1, 8 - pand xmm0, xmm5 - pand xmm1, xmm5 - packssdw xmm0, xmm1 - packuswb xmm0, xmm1 - movq qword ptr [edx], xmm0 - lea edx, [edx + 8] - sub ecx, 8 - jg wloop - ret - } -} - // For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA. __declspec(naked) void ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb,