diff --git a/include/libyuv/row.h b/include/libyuv/row.h index ca3c3d01b..d5e7eacdc 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -117,7 +117,6 @@ extern "C" { #define HAS_I422TORGBAROW_SSSE3 #define HAS_I422TOUYVYROW_SSE2 #define HAS_I422TOYUY2ROW_SSE2 -#define HAS_I444TOARGBROW_SSSE3 #define HAS_J400TOARGBROW_SSE2 #define HAS_J422TOARGBROW_SSSE3 #define HAS_J422TOABGRROW_SSSE3 @@ -150,6 +149,10 @@ extern "C" { #define HAS_YUY2TOUV422ROW_SSE2 #define HAS_YUY2TOUVROW_SSE2 #define HAS_YUY2TOYROW_SSE2 +#define HAS_I444TOARGBROW_SSSE3 +#define HAS_I444TOABGRROW_SSSE3 +#define HAS_I444TOARGBMATRIXROW_SSSE3 +#define HAS_I444TOABGRMATRIXROW_SSSE3 // Effects: #define HAS_ARGBADDROW_SSE2 @@ -192,15 +195,6 @@ extern "C" { #define HAS_I422TOABGRMATRIXROW_SSSE3 #endif -// The following are available for Visual C and clangcl 32 bit: -// TODO(fbarchard): Port to gcc. -#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \ - (defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2)) -#define HAS_I444TOABGRROW_SSSE3 -#define HAS_I444TOARGBMATRIXROW_SSSE3 -#define HAS_I444TOABGRMATRIXROW_SSSE3 -#endif - // The following are available for AVX2 Visual C and clangcl 32 bit: // TODO(fbarchard): Port to gcc. #if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \ diff --git a/source/row_gcc.cc b/source/row_gcc.cc index a53520c66..52046645e 100644 --- a/source/row_gcc.cc +++ b/source/row_gcc.cc @@ -1584,18 +1584,19 @@ YuvConstants SIMD_ALIGNED(kYuvHConstants) = { "movdqu %%xmm0," MEMACCESS2(0x10, [dst_rgba]) " \n" \ "lea " MEMLEA(0x20, [dst_rgba]) ",%[dst_rgba] \n" -void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, - int width) { +void OMITFP I444ToARGBMatrixRow_SSSE3(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* dst_argb, + struct YuvConstants* YuvConstants, + int width) { asm volatile ( "sub %[u_buf],%[v_buf] \n" "pcmpeqb %%xmm5,%%xmm5 \n" LABELALIGN "1: \n" READYUV444 - YUVTORGB(kYuvConstants) + YUVTORGB(YuvConstants) STOREARGB "sub $0x8,%[width] \n" "jg 1b \n" @@ -1604,7 +1605,34 @@ void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf, [v_buf]"+r"(v_buf), // %[v_buf] [dst_argb]"+r"(dst_argb), // %[dst_argb] [width]"+rm"(width) // %[width] - : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants] + : [YuvConstants]"r"(YuvConstants) // %[kYuvConstants] + : "memory", "cc", NACL_R14 + "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" + ); +} + +void OMITFP I444ToABGRMatrixRow_SSSE3(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* dst_abgr, + struct YuvConstants* YuvConstants, + int width) { + asm volatile ( + "sub %[u_buf],%[v_buf] \n" + "pcmpeqb %%xmm5,%%xmm5 \n" + LABELALIGN + "1: \n" + READYUV444 + YUVTORGB(YuvConstants) + STOREABGR + "sub $0x8,%[width] \n" + "jg 1b \n" + : [y_buf]"+r"(y_buf), // %[y_buf] + [u_buf]"+r"(u_buf), // %[u_buf] + [v_buf]"+r"(v_buf), // %[v_buf] + [dst_abgr]"+r"(dst_abgr), // %[dst_abgr] + [width]"+rm"(width) // %[width] + : [YuvConstants]"r"(YuvConstants) // %[kYuvConstants] : "memory", "cc", NACL_R14 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" ); @@ -1711,7 +1739,7 @@ void OMITFP I422ToARGBMatrixRow_SSSE3(const uint8* y_buf, LABELALIGN "1: \n" READYUV422 - YUVTORGB(kYuvConstants) + YUVTORGB(YuvConstants) STOREARGB "sub $0x8,%[width] \n" "jg 1b \n" @@ -1720,7 +1748,7 @@ void OMITFP I422ToARGBMatrixRow_SSSE3(const uint8* y_buf, [v_buf]"+r"(v_buf), // %[v_buf] [dst_argb]"+r"(dst_argb), // %[dst_argb] [width]"+rm"(width) // %[width] - : [kYuvConstants]"r"(YuvConstants) // %[kYuvConstants] + : [YuvConstants]"r"(YuvConstants) // %[kYuvConstants] : "memory", "cc", NACL_R14 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" );