From 1019e4537fc1bfc6ee505cd1c628b645c7e966b7 Mon Sep 17 00:00:00 2001 From: Frank Barchard Date: Fri, 13 Nov 2015 18:31:22 -0800 Subject: [PATCH] port I444ToARGB avx2 code from Visual C to GCC. SSSE3 Note: Google Test filter = *I444ToARGB* [==========] Running 8 tests from 1 test case. [----------] Global test environment set-up. [----------] 8 tests from LibYUVConvertTest [ RUN ] LibYUVConvertTest.I444ToARGB_Any [ OK ] LibYUVConvertTest.I444ToARGB_Any (435 ms) [ RUN ] LibYUVConvertTest.I444ToARGB_Unaligned [ OK ] LibYUVConvertTest.I444ToARGB_Unaligned (418 ms) [ RUN ] LibYUVConvertTest.I444ToARGB_Invert [ OK ] LibYUVConvertTest.I444ToARGB_Invert (417 ms) [ RUN ] LibYUVConvertTest.I444ToARGB_Opt [ OK ] LibYUVConvertTest.I444ToARGB_Opt (411 ms) [ RUN ] LibYUVConvertTest.I444ToARGB_ARGB_Any [ OK ] LibYUVConvertTest.I444ToARGB_ARGB_Any (419 ms) [ RUN ] LibYUVConvertTest.I444ToARGB_ARGB_Unaligned [ OK ] LibYUVConvertTest.I444ToARGB_ARGB_Unaligned (432 ms) [ RUN ] LibYUVConvertTest.I444ToARGB_ARGB_Invert [ OK ] LibYUVConvertTest.I444ToARGB_ARGB_Invert (435 ms) [ RUN ] LibYUVConvertTest.I444ToARGB_ARGB_Opt [ OK ] LibYUVConvertTest.I444ToARGB_ARGB_Opt (421 ms) [----------] 8 tests from LibYUVConvertTest (3389 ms total) AVX2 Note: Google Test filter = *I444ToARGB* [==========] Running 8 tests from 1 test case. [----------] Global test environment set-up. [----------] 8 tests from LibYUVConvertTest [ RUN ] LibYUVConvertTest.I444ToARGB_Any [ OK ] LibYUVConvertTest.I444ToARGB_Any (340 ms) [ RUN ] LibYUVConvertTest.I444ToARGB_Unaligned [ OK ] LibYUVConvertTest.I444ToARGB_Unaligned (325 ms) [ RUN ] LibYUVConvertTest.I444ToARGB_Invert [ OK ] LibYUVConvertTest.I444ToARGB_Invert (316 ms) [ RUN ] LibYUVConvertTest.I444ToARGB_Opt [ OK ] LibYUVConvertTest.I444ToARGB_Opt (316 ms) [ RUN ] LibYUVConvertTest.I444ToARGB_ARGB_Any [ OK ] LibYUVConvertTest.I444ToARGB_ARGB_Any (315 ms) [ RUN ] LibYUVConvertTest.I444ToARGB_ARGB_Unaligned [ OK ] LibYUVConvertTest.I444ToARGB_ARGB_Unaligned (341 ms) [ RUN ] LibYUVConvertTest.I444ToARGB_ARGB_Invert [ OK ] LibYUVConvertTest.I444ToARGB_ARGB_Invert (331 ms) [ RUN ] LibYUVConvertTest.I444ToARGB_ARGB_Opt [ OK ] LibYUVConvertTest.I444ToARGB_ARGB_Opt (329 ms) [----------] 8 tests from LibYUVConvertTest (2615 ms total) TBR=harryjin@google.com BUG=libyuv:492 Review URL: https://codereview.chromium.org/1445893002 . --- include/libyuv/row.h | 2 +- source/row_gcc.cc | 48 +++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 48 insertions(+), 2 deletions(-) diff --git a/include/libyuv/row.h b/include/libyuv/row.h index 5de839df1..7213fe7ac 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -202,6 +202,7 @@ extern "C" { // https://code.google.com/p/libyuv/issues/detail?id=517 #define HAS_I422ALPHATOARGBROW_AVX2 #endif +#define HAS_I444TOARGBROW_AVX2 #define HAS_I422TOARGBROW_AVX2 #define HAS_I422TORGB24ROW_AVX2 #define HAS_I422TORGBAROW_AVX2 @@ -242,7 +243,6 @@ extern "C" { #define HAS_I422TOARGB1555ROW_AVX2 #define HAS_I422TOARGB4444ROW_AVX2 #define HAS_I422TORGB565ROW_AVX2 -#define HAS_I444TOARGBROW_AVX2 #define HAS_J400TOARGBROW_AVX2 #define HAS_NV12TORGB565ROW_AVX2 #define HAS_RGB565TOARGBROW_AVX2 diff --git a/source/row_gcc.cc b/source/row_gcc.cc index 9940cba12..348935405 100644 --- a/source/row_gcc.cc +++ b/source/row_gcc.cc @@ -1947,6 +1947,19 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf, #endif // HAS_I422TOARGBROW_SSSE3 +// Read 16 UV from 444 +#define READYUV444_AVX2 \ + "vmovdqu " MEMACCESS([u_buf]) ",%%xmm0 \n" \ + MEMOPREG(vmovdqu, 0x00, [u_buf], [v_buf], 1, xmm1) \ + "lea " MEMLEA(0x10, [u_buf]) ",%[u_buf] \n" \ + "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ + "vpermq $0xd8,%%ymm1,%%ymm1 \n" \ + "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \ + "vmovdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \ + "vpermq $0xd8,%%ymm4,%%ymm4 \n" \ + "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \ + "lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n" + // Read 8 UV from 422, upsample to 16 UV. #define READYUV422_AVX2 \ "vmovq " MEMACCESS([u_buf]) ",%%xmm0 \n" \ @@ -2079,6 +2092,39 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf, "vmovdqu %%ymm0," MEMACCESS2(0x20, [dst_argb]) " \n" \ "lea " MEMLEA(0x40, [dst_argb]) ", %[dst_argb] \n" +#ifdef HAS_I444TOARGBROW_AVX2 +// 16 pixels +// 16 UV values with 16 Y producing 16 ARGB (64 bytes). +void OMITFP I444ToARGBRow_AVX2(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* dst_argb, + const struct YuvConstants* yuvconstants, + int width) { + asm volatile ( + YUVTORGB_SETUP_AVX2(yuvconstants) + "sub %[u_buf],%[v_buf] \n" + "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" + LABELALIGN + "1: \n" + READYUV444_AVX2 + YUVTORGB_AVX2(yuvconstants) + STOREARGB_AVX2 + "sub $0x10,%[width] \n" + "jg 1b \n" + "vzeroupper \n" + : [y_buf]"+r"(y_buf), // %[y_buf] + [u_buf]"+r"(u_buf), // %[u_buf] + [v_buf]"+r"(v_buf), // %[v_buf] + [dst_argb]"+r"(dst_argb), // %[dst_argb] + [width]"+rm"(width) // %[width] + : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] + : "memory", "cc", NACL_R14 YUVTORGB_REGS_AVX2 + "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" + ); +} +#endif // HAS_I444TOARGBROW_AVX2 + #if defined(HAS_I422TOARGBROW_AVX2) // 16 pixels // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). @@ -2091,7 +2137,7 @@ void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf, asm volatile ( YUVTORGB_SETUP_AVX2(yuvconstants) "sub %[u_buf],%[v_buf] \n" - "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" + "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" LABELALIGN "1: \n" READYUV422_AVX2