From 3b4f5eb7b86b93003c1c1bbec1bc9d4d06dda663 Mon Sep 17 00:00:00 2001 From: "fbarchard@google.com" Date: Tue, 17 Mar 2015 00:54:50 +0000 Subject: [PATCH] Port J422 colorspace to GCC BUG=414 TESTED=try bots R=tpsiaki@google.com Review URL: https://webrtc-codereview.appspot.com/43809004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@1334 16f28f9a-4ce2-e073-06de-1de4eb20be90 --- README.chromium | 2 +- include/libyuv/row.h | 5 +- include/libyuv/version.h | 2 +- source/row_posix.cc | 166 +++++++++++++++++++++++++++++++++++---- source/row_win.cc | 21 +++-- 5 files changed, 164 insertions(+), 32 deletions(-) diff --git a/README.chromium b/README.chromium index c74572cb6..4985117e8 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1333 +Version: 1334 License: BSD License File: LICENSE diff --git a/include/libyuv/row.h b/include/libyuv/row.h index 4d28f0f13..f88f39f82 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -124,6 +124,7 @@ extern "C" { #define HAS_YUY2TOUV422ROW_SSE2 #define HAS_YUY2TOUVROW_SSE2 #define HAS_YUY2TOYROW_SSE2 +#define HAS_J422TOARGBROW_SSSE3 // Effects: #define HAS_ARGBADDROW_SSE2 @@ -203,8 +204,6 @@ extern "C" { // TODO(fbarchard): Port to Neon #define HAS_ARGBTORGB565DITHERROW_SSE2 #define HAS_ARGBTORGB565DITHERROW_AVX2 -#define HAS_J422TOARGBROW_SSSE3 -#define HAS_J422TOARGBROW_AVX2 #endif // The following are available on all x86 platforms, but @@ -232,6 +231,7 @@ extern "C" { #define HAS_YUY2TOUV422ROW_AVX2 #define HAS_YUY2TOUVROW_AVX2 #define HAS_YUY2TOYROW_AVX2 +#define HAS_J422TOARGBROW_AVX2 // The following require HAS_I422TOARGBROW_AVX2 #if defined(HAS_I422TOARGBROW_AVX2) @@ -247,7 +247,6 @@ extern "C" { #define HAS_ARGBUNATTENUATEROW_AVX2 #endif - // The following are Yasm x86 only: // TODO(fbarchard): Port AVX2 to inline. #if !defined(LIBYUV_DISABLE_X86) && defined(HAVE_YASM) diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 2315e2ed8..eb1c1ffae 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1333 +#define LIBYUV_VERSION 1334 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/row_posix.cc b/source/row_posix.cc index 4b6614ac1..c42885696 100644 --- a/source/row_posix.cc +++ b/source/row_posix.cc @@ -1414,22 +1414,6 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0, int src_stride_rgba, #if defined(HAS_I422TOARGBROW_SSSE3) || defined(HAS_I422TOARGBROW_AVX2) -// YUV to RGB conversion constants. -// Y contribution to R,G,B. Scale and bias. -#define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */ -#define YGB 1160 /* 1.164 * 64 * 16 - adjusted for even error distribution */ - -// U and V contributions to R,G,B. -#define UB -128 /* -min(128, round(2.018 * 64)) */ -#define UG 25 /* -round(-0.391 * 64) */ -#define VG 52 /* -round(-0.813 * 64) */ -#define VR -102 /* -round(1.596 * 64) */ - -// Bias values to subtract 16 from Y and 128 from U and V. -#define BB (UB * 128 - YGB) -#define BG (UG * 128 + VG * 128 - YGB) -#define BR (VR * 128 - YGB) - struct YuvConstants { lvec8 kUVToB; // 0 lvec8 kUVToG; // 32 @@ -1440,6 +1424,27 @@ struct YuvConstants { lvec16 kYToRgb; // 192 }; +// BT.601 YUV to RGB reference +// R = (Y - 16) * 1.164 - V * -1.596 +// G = (Y - 16) * 1.164 - U * 0.391 - V * 0.813 +// B = (Y - 16) * 1.164 - U * -2.018 + +// Y contribution to R,G,B. Scale and bias. +// TODO(fbarchard): Consider moving constants into a common header. +#define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */ +#define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */ + +// U and V contributions to R,G,B. +#define UB -128 /* max(-128, round(-2.018 * 64)) */ +#define UG 25 /* round(0.391 * 64) */ +#define VG 52 /* round(0.813 * 64) */ +#define VR -102 /* round(-1.596 * 64) */ + +// Bias values to subtract 16 from Y and 128 from U and V. +#define BB (UB * 128 + YGB) +#define BG (UG * 128 + VG * 128 + YGB) +#define BR (VR * 128 + YGB) + // BT601 constants for YUV to RGB. static YuvConstants SIMD_ALIGNED(kYuvConstants) = { { UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, @@ -1468,6 +1473,67 @@ static YuvConstants SIMD_ALIGNED(kYvuConstants) = { { YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG } }; +#undef YG +#undef YGB +#undef UB +#undef UG +#undef VG +#undef VR +#undef BB +#undef BG +#undef BR + +// JPEG YUV to RGB reference +// * R = Y - V * -1.40200 +// * G = Y - U * 0.34414 - V * 0.71414 +// * B = Y - U * -1.77200 + +// Y contribution to R,G,B. Scale and bias. +// TODO(fbarchard): Consider moving constants into a common header. +#define YGJ 16320 /* round(1.000 * 64 * 256 * 256 / 257) */ +#define YGBJ 32 /* 64 / 2 */ + +// U and V contributions to R,G,B. +#define UBJ -113 /* round(-1.77200 * 64) */ +#define UGJ 22 /* round(0.34414 * 64) */ +#define VGJ 46 /* round(0.71414 * 64) */ +#define VRJ -90 /* round(-1.40200 * 64) */ + +// Bias values to subtract 16 from Y and 128 from U and V. +#define BBJ (UBJ * 128 + YGBJ) +#define BGJ (UGJ * 128 + VGJ * 128 + YGBJ) +#define BRJ (VRJ * 128 + YGBJ) + +// JPEG constants for YUV to RGB. +YuvConstants SIMD_ALIGNED(kYuvJConstants) = { + { UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, + UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0 }, + { UGJ, VGJ, UGJ, VGJ, UGJ, VGJ, UGJ, VGJ, + UGJ, VGJ, UGJ, VGJ, UGJ, VGJ, UGJ, VGJ, + UGJ, VGJ, UGJ, VGJ, UGJ, VGJ, UGJ, VGJ, + UGJ, VGJ, UGJ, VGJ, UGJ, VGJ, UGJ, VGJ }, + { 0, VRJ, 0, VRJ, 0, VRJ, 0, VRJ, 0, VRJ, 0, VRJ, 0, VRJ, 0, VRJ, + 0, VRJ, 0, VRJ, 0, VRJ, 0, VRJ, 0, VRJ, 0, VRJ, 0, VRJ, 0, VRJ }, + { BBJ, BBJ, BBJ, BBJ, BBJ, BBJ, BBJ, BBJ, + BBJ, BBJ, BBJ, BBJ, BBJ, BBJ, BBJ, BBJ }, + { BGJ, BGJ, BGJ, BGJ, BGJ, BGJ, BGJ, BGJ, + BGJ, BGJ, BGJ, BGJ, BGJ, BGJ, BGJ, BGJ }, + { BRJ, BRJ, BRJ, BRJ, BRJ, BRJ, BRJ, BRJ, + BRJ, BRJ, BRJ, BRJ, BRJ, BRJ, BRJ, BRJ }, + { YGJ, YGJ, YGJ, YGJ, YGJ, YGJ, YGJ, YGJ, + YGJ, YGJ, YGJ, YGJ, YGJ, YGJ, YGJ, YGJ } +}; + +#undef YGJ +#undef YGBJ +#undef UBJ +#undef UGJ +#undef VGJ +#undef VRJ +#undef BBJ +#undef BGJ +#undef BRJ + // Read 8 UV from 411 #define READYUV444 \ "movq " MEMACCESS([u_buf]) ",%%xmm0 \n" \ @@ -1713,6 +1779,32 @@ void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf, ); } +void OMITFP J422ToARGBRow_SSSE3(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* dst_argb, + int width) { + asm volatile ( + "sub %[u_buf],%[v_buf] \n" + "pcmpeqb %%xmm5,%%xmm5 \n" + LABELALIGN + "1: \n" + READYUV422 + YUVTORGB(kYuvConstants) + STOREARGB + "sub $0x8,%[width] \n" + "jg 1b \n" + : [y_buf]"+r"(y_buf), // %[y_buf] + [u_buf]"+r"(u_buf), // %[u_buf] + [v_buf]"+r"(v_buf), // %[v_buf] + [dst_argb]"+r"(dst_argb), // %[dst_argb] + [width]"+rm"(width) // %[width] + : [kYuvConstants]"r"(&kYuvJConstants.kUVToB) // %[kYuvConstants] + : "memory", "cc", NACL_R14 + "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" + ); +} + void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, @@ -1984,6 +2076,48 @@ void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf, } #endif // HAS_I422TOARGBROW_AVX2 +#if defined(HAS_J422TOARGBROW_AVX2) +// 16 pixels +// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). +void OMITFP J422ToARGBRow_AVX2(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* dst_argb, + int width) { + asm volatile ( + "sub %[u_buf],%[v_buf] \n" + "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" + LABELALIGN + "1: \n" + READYUV422_AVX2 + YUVTORGB_AVX2(kYuvConstants) + + // Step 3: Weave into ARGB + "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" // BG + "vpermq $0xd8,%%ymm0,%%ymm0 \n" + "vpunpcklbw %%ymm5,%%ymm2,%%ymm2 \n" // RA + "vpermq $0xd8,%%ymm2,%%ymm2 \n" + "vpunpcklwd %%ymm2,%%ymm0,%%ymm1 \n" // BGRA first 8 pixels + "vpunpckhwd %%ymm2,%%ymm0,%%ymm0 \n" // BGRA next 8 pixels + + "vmovdqu %%ymm1," MEMACCESS([dst_argb]) "\n" + "vmovdqu %%ymm0," MEMACCESS2(0x20,[dst_argb]) "\n" + "lea " MEMLEA(0x40,[dst_argb]) ",%[dst_argb] \n" + "sub $0x10,%[width] \n" + "jg 1b \n" + "vzeroupper \n" + : [y_buf]"+r"(y_buf), // %[y_buf] + [u_buf]"+r"(u_buf), // %[u_buf] + [v_buf]"+r"(v_buf), // %[v_buf] + [dst_argb]"+r"(dst_argb), // %[dst_argb] + [width]"+rm"(width) // %[width] + : [kYuvConstants]"r"(&kYuvJConstants.kUVToB) // %[kYuvConstants] + : "memory", "cc", NACL_R14 + "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" + ); +} +#endif // HAS_J422TOARGBROW_AVX2 + #if defined(HAS_I422TOABGRROW_AVX2) // 16 pixels // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes). diff --git a/source/row_win.cc b/source/row_win.cc index c531c3855..4676aaec4 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -24,6 +24,16 @@ extern "C" { #if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) && \ (defined(_M_IX86) || defined(_M_X64)) +struct YuvConstants { + lvec8 kUVToB; // 0 + lvec8 kUVToG; // 32 + lvec8 kUVToR; // 64 + lvec16 kUVBiasB; // 96 + lvec16 kUVBiasG; // 128 + lvec16 kUVBiasR; // 160 + lvec16 kYToRgb; // 192 +}; + // BT.601 YUV to RGB reference // R = (Y - 16) * 1.164 - V * -1.596 // G = (Y - 16) * 1.164 - U * 0.391 - V * 0.813 @@ -45,16 +55,6 @@ extern "C" { #define BG (UG * 128 + VG * 128 + YGB) #define BR (VR * 128 + YGB) -struct YuvConstants { - lvec8 kUVToB; // 0 - lvec8 kUVToG; // 32 - lvec8 kUVToR; // 64 - lvec16 kUVBiasB; // 96 - lvec16 kUVBiasG; // 128 - lvec16 kUVBiasR; // 160 - lvec16 kYToRgb; // 192 -}; - // BT601 constants for YUV to RGB. static YuvConstants SIMD_ALIGNED(kYuvConstants) = { { UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, @@ -1894,7 +1894,6 @@ void I422ToARGBRow_AVX2(const uint8* y_buf, } #endif // HAS_I422TOARGBROW_AVX2 - #ifdef HAS_J422TOARGBROW_AVX2 // 16 pixels // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).