diff --git a/README.chromium b/README.chromium index fdd0defde..9b37bafe0 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1475 +Version: 1476 License: BSD License File: LICENSE diff --git a/include/libyuv/row.h b/include/libyuv/row.h index 410f4f797..5ba49fc52 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -88,6 +88,7 @@ extern "C" { #define HAS_I422TOARGB1555ROW_SSSE3 #define HAS_I422TOARGB4444ROW_SSSE3 #define HAS_I422TOARGBROW_SSSE3 +#define HAS_I422TOARGBMATRIXROW_SSSE3 #define HAS_I422TOBGRAROW_SSSE3 #define HAS_I422TORAWROW_SSSE3 #define HAS_I422TORGB24ROW_SSSE3 @@ -161,6 +162,7 @@ extern "C" { #if !defined(LIBYUV_DISABLE_X86) && defined (_M_X64) && \ (!defined(__clang__) || defined(__SSSE3__)) #define HAS_I422TOARGBROW_SSSE3 +#define HAS_I422TOARGBMATRIXROW_SSSE3 #endif // GCC >= 4.7.0 required for AVX2. @@ -223,6 +225,7 @@ extern "C" { #define HAS_I400TOARGBROW_AVX2 #define HAS_I422TOABGRROW_AVX2 #define HAS_I422TOARGBROW_AVX2 +#define HAS_I422TOARGBMATRIXROW_AVX2 #define HAS_I422TOBGRAROW_AVX2 #define HAS_I422TORAWROW_AVX2 #define HAS_I422TORGB24ROW_AVX2 @@ -290,6 +293,8 @@ extern "C" { #define HAS_I422TOARGB1555ROW_NEON #define HAS_I422TOARGB4444ROW_NEON #define HAS_I422TOARGBROW_NEON +// TODO(fbarchard): Implement NEON version +#define HAS_I422TOARGBMATRIXROW_NEON #define HAS_I422TOBGRAROW_NEON #define HAS_I422TORAWROW_NEON #define HAS_I422TORGB24ROW_NEON @@ -414,6 +419,21 @@ typedef uint32 ulvec32[8]; typedef uint8 ulvec8[32]; #endif +// This struct is for Intel color conversion. +#if defined(_M_IX86) || defined(_M_X64) || \ + defined(__x86_64__) || defined(__i386__) + +struct YuvConstants { + lvec8 kUVToB; + lvec8 kUVToG; + lvec8 kUVToR; + lvec16 kUVBiasB; + lvec16 kUVBiasG; + lvec16 kUVBiasR; + lvec16 kYToRgb; +}; +#endif + #if defined(__APPLE__) || defined(__x86_64__) || defined(__llvm__) #define OMITFP #else @@ -509,6 +529,12 @@ void I422ToARGBRow_NEON(const uint8* src_y, const uint8* src_v, uint8* dst_argb, int width); +void I422ToARGBMatrixRow_NEON(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, + struct YuvConstants* YuvConstants, + int width); void I411ToARGBRow_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, @@ -962,6 +988,12 @@ void I422ToARGBRow_C(const uint8* src_y, const uint8* src_v, uint8* dst_argb, int width); +void I422ToARGBMatrixRow_C(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, + struct YuvConstants* YuvConstants, + int width); void I411ToARGBRow_C(const uint8* src_y, const uint8* src_u, const uint8* src_v, @@ -1039,6 +1071,12 @@ void I422ToARGBRow_AVX2(const uint8* src_y, const uint8* src_v, uint8* dst_argb, int width); +void I422ToARGBMatrixRow_AVX2(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, + struct YuvConstants* YuvConstants, + int width); void I422ToBGRARow_AVX2(const uint8* src_y, const uint8* src_u, const uint8* src_v, @@ -1069,6 +1107,12 @@ void I422ToARGBRow_SSSE3(const uint8* src_y, const uint8* src_v, uint8* dst_argb, int width); +void I422ToARGBMatrixRow_SSSE3(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, + struct YuvConstants* YuvConstants, + int width); void I411ToARGBRow_SSSE3(const uint8* src_y, const uint8* src_u, const uint8* src_v, @@ -1203,6 +1247,12 @@ void I422ToARGBRow_Any_AVX2(const uint8* src_y, const uint8* src_v, uint8* dst_argb, int width); +void I422ToARGBMatrixRow_Any_AVX2(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, + struct YuvConstants* YuvConstants, + int width); void I422ToBGRARow_Any_AVX2(const uint8* src_y, const uint8* src_u, const uint8* src_v, @@ -1233,6 +1283,12 @@ void I422ToARGBRow_Any_SSSE3(const uint8* src_y, const uint8* src_v, uint8* dst_argb, int width); +void I422ToARGBMatrixRow_Any_SSSE3(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, + struct YuvConstants* YuvConstants, + int width); void I411ToARGBRow_Any_SSSE3(const uint8* src_y, const uint8* src_u, const uint8* src_v, @@ -1463,7 +1519,13 @@ void I422ToARGBRow_Any_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + struct YuvConstants* YuvConstants, int width); +void I422ToARGBMatrixRow_Any_NEON(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, + int width); void I411ToARGBRow_Any_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 9ebb4091e..e03457821 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1475 +#define LIBYUV_VERSION 1476 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/row_common.cc b/source/row_common.cc index dcd0e4121..8320b2820 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -2156,6 +2156,51 @@ void I422ToUYVYRow_C(const uint8* src_y, } } +#if defined(HAS_I422TOARGBMATRIXROW_SSSE3) +extern struct YuvConstants kYuvConstants; +extern struct YuvConstants kYuvJConstants; + +// JPeg color space version of I422ToARGB +void J422ToARGBRow_SSSE3(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* dst_argb, + int width) { + I422ToARGBMatrixRow_SSSE3(y_buf, u_buf, v_buf, dst_argb, + &kYuvJConstants, width); +} + +void I422ToARGBRow_SSSE3(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* dst_argb, + int width) { + I422ToARGBMatrixRow_SSSE3(y_buf, u_buf, v_buf, dst_argb, + &kYuvConstants, width); +} + +#if defined(HAS_I422TOARGBMATRIXROW_AVX2) +// JPeg color space version of I422ToARGB +void J422ToARGBRow_AVX2(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* dst_argb, + int width) { + I422ToARGBMatrixRow_AVX2(y_buf, u_buf, v_buf, dst_argb, + &kYuvJConstants, width); +} + +void I422ToARGBRow_AVX2(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* dst_argb, + int width) { + I422ToARGBMatrixRow_AVX2(y_buf, u_buf, v_buf, dst_argb, + &kYuvConstants, width); +} +#endif +#endif + // Maximum temporary width for wrappers to process at a time, in pixels. #define MAXTWIDTH 2048 diff --git a/source/row_gcc.cc b/source/row_gcc.cc index f06a9cc46..4a4f2a2d2 100644 --- a/source/row_gcc.cc +++ b/source/row_gcc.cc @@ -1319,16 +1319,6 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0, int src_stride_rgba, #if defined(HAS_I422TOARGBROW_SSSE3) || defined(HAS_I422TOARGBROW_AVX2) -struct YuvConstants { - lvec8 kUVToB; // 0 - lvec8 kUVToG; // 32 - lvec8 kUVToR; // 64 - lvec16 kUVBiasB; // 96 - lvec16 kUVBiasG; // 128 - lvec16 kUVBiasR; // 160 - lvec16 kYToRgb; // 192 -}; - // BT.601 YUV to RGB reference // R = (Y - 16) * 1.164 - V * -1.596 // G = (Y - 16) * 1.164 - U * 0.391 - V * 0.813 @@ -1351,7 +1341,7 @@ struct YuvConstants { #define BR (VR * 128 + YGB) // BT601 constants for YUV to RGB. -static YuvConstants SIMD_ALIGNED(kYuvConstants) = { +YuvConstants SIMD_ALIGNED(kYuvConstants) = { { UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0 }, { UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, @@ -1365,7 +1355,7 @@ static YuvConstants SIMD_ALIGNED(kYuvConstants) = { }; // BT601 constants for NV21 where chroma plane is VU instead of UV. -static YuvConstants SIMD_ALIGNED(kYvuConstants) = { +YuvConstants SIMD_ALIGNED(kYvuConstants) = { { 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB }, { VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, @@ -1658,11 +1648,12 @@ void OMITFP I422ToRAWRow_SSSE3(const uint8* y_buf, ); } -void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, - int width) { +void OMITFP I422ToARGBMatrixRow_SSSE3(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* dst_argb, + struct YuvConstants* YuvConstants, + int width) { asm volatile ( "sub %[u_buf],%[v_buf] \n" "pcmpeqb %%xmm5,%%xmm5 \n" @@ -1678,33 +1669,7 @@ void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf, [v_buf]"+r"(v_buf), // %[v_buf] [dst_argb]"+r"(dst_argb), // %[dst_argb] [width]"+rm"(width) // %[width] - : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants] - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" - ); -} - -void OMITFP J422ToARGBRow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, - int width) { - asm volatile ( - "sub %[u_buf],%[v_buf] \n" - "pcmpeqb %%xmm5,%%xmm5 \n" - LABELALIGN - "1: \n" - READYUV422 - YUVTORGB(kYuvConstants) - STOREARGB - "sub $0x8,%[width] \n" - "jg 1b \n" - : [y_buf]"+r"(y_buf), // %[y_buf] - [u_buf]"+r"(u_buf), // %[u_buf] - [v_buf]"+r"(v_buf), // %[v_buf] - [dst_argb]"+r"(dst_argb), // %[dst_argb] - [width]"+rm"(width) // %[width] - : [kYuvConstants]"r"(&kYuvJConstants.kUVToB) // %[kYuvConstants] + : [kYuvConstants]"r"(YuvConstants) // %[YuvConstants] : "memory", "cc", NACL_R14 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" ); @@ -1939,14 +1904,15 @@ void OMITFP I422ToBGRARow_AVX2(const uint8* y_buf, } #endif // HAS_I422TOBGRAROW_AVX2 -#if defined(HAS_I422TOARGBROW_AVX2) +#if defined(HAS_I422TOARGBMATRIXROW_AVX2) // 16 pixels // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). -void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, - int width) { +void OMITFP I422ToARGBMatrixRow_AVX2(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* dst_argb, + struct YuvConstants* YuvConstants, + int width) { asm volatile ( "sub %[u_buf],%[v_buf] \n" "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" @@ -1974,54 +1940,12 @@ void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf, [v_buf]"+r"(v_buf), // %[v_buf] [dst_argb]"+r"(dst_argb), // %[dst_argb] [width]"+rm"(width) // %[width] - : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants] + : [kYuvConstants]"r"(YuvConstants) // %[YuvConstants] : "memory", "cc", NACL_R14 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" ); } -#endif // HAS_I422TOARGBROW_AVX2 - -#if defined(HAS_J422TOARGBROW_AVX2) -// 16 pixels -// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). -void OMITFP J422ToARGBRow_AVX2(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, - int width) { - asm volatile ( - "sub %[u_buf],%[v_buf] \n" - "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" - LABELALIGN - "1: \n" - READYUV422_AVX2 - YUVTORGB_AVX2(kYuvConstants) - - // Step 3: Weave into ARGB - "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" // BG - "vpermq $0xd8,%%ymm0,%%ymm0 \n" - "vpunpcklbw %%ymm5,%%ymm2,%%ymm2 \n" // RA - "vpermq $0xd8,%%ymm2,%%ymm2 \n" - "vpunpcklwd %%ymm2,%%ymm0,%%ymm1 \n" // BGRA first 8 pixels - "vpunpckhwd %%ymm2,%%ymm0,%%ymm0 \n" // BGRA next 8 pixels - - "vmovdqu %%ymm1," MEMACCESS([dst_argb]) "\n" - "vmovdqu %%ymm0," MEMACCESS2(0x20,[dst_argb]) "\n" - "lea " MEMLEA(0x40,[dst_argb]) ",%[dst_argb] \n" - "sub $0x10,%[width] \n" - "jg 1b \n" - "vzeroupper \n" - : [y_buf]"+r"(y_buf), // %[y_buf] - [u_buf]"+r"(u_buf), // %[u_buf] - [v_buf]"+r"(v_buf), // %[v_buf] - [dst_argb]"+r"(dst_argb), // %[dst_argb] - [width]"+rm"(width) // %[width] - : [kYuvConstants]"r"(&kYuvJConstants.kUVToB) // %[kYuvConstants] - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" - ); -} -#endif // HAS_J422TOARGBROW_AVX2 +#endif // HAS_I422TOARGBMATRIXROW_AVX2 #if defined(HAS_I422TOABGRROW_AVX2) // 16 pixels diff --git a/source/row_win.cc b/source/row_win.cc index b190e645f..e86f2c464 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -25,16 +25,6 @@ extern "C" { #if !defined(LIBYUV_DISABLE_X86) && \ (defined(_M_IX86) || (defined(_M_X64) && !defined(__clang__))) -struct YuvConstants { - lvec8 kUVToB; - lvec8 kUVToG; - lvec8 kUVToR; - lvec16 kUVBiasB; - lvec16 kUVBiasG; - lvec16 kUVBiasR; - lvec16 kYToRgb; -}; - #define KUVTOB 0 #define KUVTOG 32 #define KUVTOR 64 @@ -65,7 +55,7 @@ struct YuvConstants { #define BR (VR * 128 + YGB) // BT601 constants for YUV to RGB. -static YuvConstants SIMD_ALIGNED(kYuvConstants) = { +YuvConstants SIMD_ALIGNED(kYuvConstants) = { { UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0 }, { UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, @@ -79,7 +69,7 @@ static YuvConstants SIMD_ALIGNED(kYuvConstants) = { }; // BT601 constants for NV21 where chroma plane is VU instead of UV. -static YuvConstants SIMD_ALIGNED(kYvuConstants) = { +YuvConstants SIMD_ALIGNED(kYvuConstants) = { { 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB }, { VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, @@ -124,7 +114,7 @@ static YuvConstants SIMD_ALIGNED(kYvuConstants) = { #define BRJ (VRJ * 128 + YGBJ) // JPEG constants for YUV to RGB. -static YuvConstants SIMD_ALIGNED(kYuvJConstants) = { +YuvConstants SIMD_ALIGNED(kYuvJConstants) = { { UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0 }, { UGJ, VGJ, UGJ, VGJ, UGJ, VGJ, UGJ, VGJ, @@ -155,12 +145,13 @@ static YuvConstants SIMD_ALIGNED(kYuvJConstants) = { // 64 bit #if defined(_M_X64) -#if defined(HAS_I422TOARGBROW_SSSE3) -void I422ToARGBRow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, - int width) { +#if defined(HAS_I422TOARGBMATRIXROW_SSSE3) +void I422ToARGBMatrixRow_SSSE3(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* dst_argb, + struct YuvConstants* YuvConstants, + int width) { __m128i xmm0, xmm1, xmm2, xmm3; const __m128i xmm5 = _mm_set1_epi8(-1); const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; @@ -172,15 +163,15 @@ void I422ToARGBRow_SSSE3(const uint8* y_buf, xmm0 = _mm_unpacklo_epi16(xmm0, xmm0); xmm1 = _mm_loadu_si128(&xmm0); xmm2 = _mm_loadu_si128(&xmm0); - xmm0 = _mm_maddubs_epi16(xmm0, *(__m128i*)kYuvConstants.kUVToB); - xmm1 = _mm_maddubs_epi16(xmm1, *(__m128i*)kYuvConstants.kUVToG); - xmm2 = _mm_maddubs_epi16(xmm2, *(__m128i*)kYuvConstants.kUVToR); - xmm0 = _mm_sub_epi16(*(__m128i*)kYuvConstants.kUVBiasB, xmm0); - xmm1 = _mm_sub_epi16(*(__m128i*)kYuvConstants.kUVBiasG, xmm1); - xmm2 = _mm_sub_epi16(*(__m128i*)kYuvConstants.kUVBiasR, xmm2); + xmm0 = _mm_maddubs_epi16(xmm0, *(__m128i*)YuvConstants->kUVToB); + xmm1 = _mm_maddubs_epi16(xmm1, *(__m128i*)YuvConstants->kUVToG); + xmm2 = _mm_maddubs_epi16(xmm2, *(__m128i*)YuvConstants->kUVToR); + xmm0 = _mm_sub_epi16(*(__m128i*)YuvConstants->kUVBiasB, xmm0); + xmm1 = _mm_sub_epi16(*(__m128i*)YuvConstants->kUVBiasG, xmm1); + xmm2 = _mm_sub_epi16(*(__m128i*)YuvConstants->kUVBiasR, xmm2); xmm3 = _mm_loadl_epi64((__m128i*)y_buf); xmm3 = _mm_unpacklo_epi8(xmm3, xmm3); - xmm3 = _mm_mulhi_epu16(xmm3, *(__m128i*)kYuvConstants.kYToRgb); + xmm3 = _mm_mulhi_epu16(xmm3, *(__m128i*)YuvConstants->kYToRgb); xmm0 = _mm_adds_epi16(xmm0, xmm3); xmm1 = _mm_adds_epi16(xmm1, xmm3); xmm2 = _mm_adds_epi16(xmm2, xmm3); @@ -2012,77 +2003,45 @@ void RGBAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, __asm lea edx, [edx + 64] \ } -#ifdef HAS_I422TOARGBROW_AVX2 +#ifdef HAS_I422TOARGBMATRIXROW_AVX2 // 16 pixels // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). __declspec(naked) -void I422ToARGBRow_AVX2(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, - int width) { +void I422ToARGBMatrixRow_AVX2(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* dst_argb, + struct YuvConstants* YuvConstants, + int width) { __asm { push esi push edi - mov eax, [esp + 8 + 4] // Y - mov esi, [esp + 8 + 8] // U - mov edi, [esp + 8 + 12] // V - mov edx, [esp + 8 + 16] // argb - mov ecx, [esp + 8 + 20] // width + push ebp + mov eax, [esp + 12 + 4] // Y + mov esi, [esp + 12 + 8] // U + mov edi, [esp + 12 + 12] // V + mov edx, [esp + 12 + 16] // argb + mov ebp, [esp + 12 + 20] // YuvConstants + mov ecx, [esp + 12 + 20] // width sub edi, esi vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha convertloop: READYUV422_AVX2 - YUVTORGB_AVX2(kYuvConstants) + YUVTORGB_AVX2(ebp) STOREARGB_AVX2 sub ecx, 16 jg convertloop + pop ebp pop edi pop esi vzeroupper ret } } -#endif // HAS_I422TOARGBROW_AVX2 - -#ifdef HAS_J422TOARGBROW_AVX2 -// 16 pixels -// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). -__declspec(naked) -void J422ToARGBRow_AVX2(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, - int width) { - __asm { - push esi - push edi - mov eax, [esp + 8 + 4] // Y - mov esi, [esp + 8 + 8] // U - mov edi, [esp + 8 + 12] // V - mov edx, [esp + 8 + 16] // argb - mov ecx, [esp + 8 + 20] // width - sub edi, esi - vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha - - convertloop: - READYUV422_AVX2 - YUVTORGB_AVX2(kYuvJConstants) - STOREARGB_AVX2 - - sub ecx, 16 - jg convertloop - - pop edi - pop esi - vzeroupper - ret - } -} -#endif // HAS_J422TOARGBROW_AVX2 +#endif // HAS_I422TOARGBMATRIXROW_AVX2 #ifdef HAS_I444TOARGBROW_AVX2 // 16 pixels @@ -2691,11 +2650,12 @@ void I422ToRGB565Row_SSSE3(const uint8* y_buf, // 8 pixels. // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). __declspec(naked) -void I422ToARGBRow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, - int width) { +void I422ToARGBMatrixRow_SSSE3(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* dst_argb, + struct YuvConstants* YuvConstants, + int width) { __asm { push esi push edi @@ -2704,8 +2664,9 @@ void I422ToARGBRow_SSSE3(const uint8* y_buf, mov esi, [esp + 12 + 8] // U mov edi, [esp + 12 + 12] // V mov edx, [esp + 12 + 16] // argb - mov ecx, [esp + 12 + 20] // width - lea ebp, kYuvConstants + mov ebp, [esp + 12 + 20] // YuvConstants + mov ecx, [esp + 12 + 24] // width + sub edi, esi pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha @@ -2724,40 +2685,6 @@ void I422ToARGBRow_SSSE3(const uint8* y_buf, } } -// 8 pixels. -// JPeg color space version of I422ToARGB -// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). -__declspec(naked) -void J422ToARGBRow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, - int width) { - __asm { - push esi - push edi - mov eax, [esp + 8 + 4] // Y - mov esi, [esp + 8 + 8] // U - mov edi, [esp + 8 + 12] // V - mov edx, [esp + 8 + 16] // argb - mov ecx, [esp + 8 + 20] // width - sub edi, esi - pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha - - convertloop: - READYUV422 - YUVTORGB(kYuvJConstants) - STOREARGB - - sub ecx, 8 - jg convertloop - - pop edi - pop esi - ret - } -} - // 8 pixels. // 2 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). // Similar to I420 but duplicate UV once more.