From 431cb3667a346136189952ff93974f4ac9e79d78 Mon Sep 17 00:00:00 2001 From: Frank Barchard Date: Wed, 4 Nov 2015 16:16:18 -0800 Subject: [PATCH] YUV to RGB for x64 use registers instead of memory. On Arm the YVU to RGB conversions move constants into registers. This change does the same for 64 bit intel builds where additional registers are available. The AVX2 saves 3 instructions by because the 2nd argument needs to be a register, so a vmovdqu was avoided. x64 builds using memory: AVX2 I420ToARGB_Opt (3059 ms) SSSE3 I420ToARGB_Opt (3959 ms) Now using registers AVX2 I420ToARGB_Opt (2906 ms) SSSE3 I420ToARGB_Opt (3928 ms) TBR=harryjin@google.com BUG=libyuv:520 Review URL: https://codereview.chromium.org/1407353010 . --- README.chromium | 2 +- include/libyuv/version.h | 2 +- source/row_gcc.cc | 140 +++++++++++++++++++++++++++++++-------- 3 files changed, 116 insertions(+), 28 deletions(-) diff --git a/README.chromium b/README.chromium index c58aadebb..66b60a0f9 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1531 +Version: 1532 License: BSD License File: LICENSE diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 03b60c46b..610c8b81b 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1531 +#define LIBYUV_VERSION 1532 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/row_gcc.cc b/source/row_gcc.cc index 105fa987b..9940cba12 100644 --- a/source/row_gcc.cc +++ b/source/row_gcc.cc @@ -1564,6 +1564,44 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0, int src_stride_rgba, "pshufb %[kShuffleUYVYUV], %%xmm0 \n" \ "lea " MEMLEA(0x10, [uyvy_buf]) ",%[uyvy_buf] \n" +#if defined(__x86_64__) +#define YUVTORGB_SETUP(yuvconstants) \ + "movdqa " MEMACCESS([yuvconstants]) ",%%xmm8 \n" \ + "movdqa " MEMACCESS2(32, [yuvconstants]) ",%%xmm9 \n" \ + "movdqa " MEMACCESS2(64, [yuvconstants]) ",%%xmm10 \n" \ + "movdqa " MEMACCESS2(96, [yuvconstants]) ",%%xmm11 \n" \ + "movdqa " MEMACCESS2(128, [yuvconstants]) ",%%xmm12 \n" \ + "movdqa " MEMACCESS2(160, [yuvconstants]) ",%%xmm13 \n" \ + "movdqa " MEMACCESS2(192, [yuvconstants]) ",%%xmm14 \n" +// Convert 8 pixels: 8 UV and 8 Y +#define YUVTORGB(yuvconstants) \ + "movdqa %%xmm0,%%xmm1 \n" \ + "movdqa %%xmm0,%%xmm2 \n" \ + "movdqa %%xmm0,%%xmm3 \n" \ + "movdqa %%xmm11,%%xmm0 \n" \ + "pmaddubsw %%xmm8,%%xmm1 \n" \ + "psubw %%xmm1,%%xmm0 \n" \ + "movdqa %%xmm12,%%xmm1 \n" \ + "pmaddubsw %%xmm9,%%xmm2 \n" \ + "psubw %%xmm2,%%xmm1 \n" \ + "movdqa %%xmm13,%%xmm2 \n" \ + "pmaddubsw %%xmm10,%%xmm3 \n" \ + "psubw %%xmm3,%%xmm2 \n" \ + "pmulhuw %%xmm14,%%xmm4 \n" \ + "paddsw %%xmm4,%%xmm0 \n" \ + "paddsw %%xmm4,%%xmm1 \n" \ + "paddsw %%xmm4,%%xmm2 \n" \ + "psraw $0x6,%%xmm0 \n" \ + "psraw $0x6,%%xmm1 \n" \ + "psraw $0x6,%%xmm2 \n" \ + "packuswb %%xmm0,%%xmm0 \n" \ + "packuswb %%xmm1,%%xmm1 \n" \ + "packuswb %%xmm2,%%xmm2 \n" +#define YUVTORGB_REGS \ + "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", + +#else +#define YUVTORGB_SETUP(yuvconstants) // Convert 8 pixels: 8 UV and 8 Y #define YUVTORGB(yuvconstants) \ "movdqa %%xmm0,%%xmm1 \n" \ @@ -1588,6 +1626,8 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0, int src_stride_rgba, "packuswb %%xmm0,%%xmm0 \n" \ "packuswb %%xmm1,%%xmm1 \n" \ "packuswb %%xmm2,%%xmm2 \n" +#define YUVTORGB_REGS +#endif // Store 8 ARGB values. #define STOREARGB \ @@ -1619,6 +1659,7 @@ void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf, const struct YuvConstants* yuvconstants, int width) { asm volatile ( + YUVTORGB_SETUP(yuvconstants) "sub %[u_buf],%[v_buf] \n" "pcmpeqb %%xmm5,%%xmm5 \n" LABELALIGN @@ -1634,7 +1675,7 @@ void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf, [dst_argb]"+r"(dst_argb), // %[dst_argb] [width]"+rm"(width) // %[width] : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] - : "memory", "cc", NACL_R14 + : "memory", "cc", NACL_R14 YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); } @@ -1646,6 +1687,7 @@ void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf, const struct YuvConstants* yuvconstants, int width) { asm volatile ( + YUVTORGB_SETUP(yuvconstants) "movdqa %[kShuffleMaskARGBToRGB24_0],%%xmm5 \n" "movdqa %[kShuffleMaskARGBToRGB24],%%xmm6 \n" "sub %[u_buf],%[v_buf] \n" @@ -1678,7 +1720,7 @@ void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf, : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] [kShuffleMaskARGBToRGB24_0]"m"(kShuffleMaskARGBToRGB24_0), [kShuffleMaskARGBToRGB24]"m"(kShuffleMaskARGBToRGB24) - : "memory", "cc", NACL_R14 + : "memory", "cc", NACL_R14 YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" ); } @@ -1690,6 +1732,7 @@ void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf, const struct YuvConstants* yuvconstants, int width) { asm volatile ( + YUVTORGB_SETUP(yuvconstants) "sub %[u_buf],%[v_buf] \n" "pcmpeqb %%xmm5,%%xmm5 \n" LABELALIGN @@ -1705,7 +1748,7 @@ void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf, [dst_argb]"+r"(dst_argb), // %[dst_argb] [width]"+rm"(width) // %[width] : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] - : "memory", "cc", NACL_R14 + : "memory", "cc", NACL_R14 YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); } @@ -1719,6 +1762,7 @@ void OMITFP I422AlphaToARGBRow_SSSE3(const uint8* y_buf, const struct YuvConstants* yuvconstants, int width) { asm volatile ( + YUVTORGB_SETUP(yuvconstants) "sub %[u_buf],%[v_buf] \n" LABELALIGN "1: \n" @@ -1738,7 +1782,7 @@ void OMITFP I422AlphaToARGBRow_SSSE3(const uint8* y_buf, [width]"+rm"(width) // %[width] #endif : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] - : "memory", "cc", NACL_R14 + : "memory", "cc", NACL_R14 YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); } @@ -1751,6 +1795,7 @@ void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf, const struct YuvConstants* yuvconstants, int width) { asm volatile ( + YUVTORGB_SETUP(yuvconstants) "sub %[u_buf],%[v_buf] \n" "pcmpeqb %%xmm5,%%xmm5 \n" LABELALIGN @@ -1766,7 +1811,7 @@ void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf, [dst_argb]"+r"(dst_argb), // %[dst_argb] [width]"+rm"(width) // %[width] : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] - : "memory", "cc", NACL_R14 + : "memory", "cc", NACL_R14 YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); } @@ -1777,6 +1822,7 @@ void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf, const struct YuvConstants* yuvconstants, int width) { asm volatile ( + YUVTORGB_SETUP(yuvconstants) "pcmpeqb %%xmm5,%%xmm5 \n" LABELALIGN "1: \n" @@ -1790,8 +1836,8 @@ void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf, [dst_argb]"+r"(dst_argb), // %[dst_argb] [width]"+rm"(width) // %[width] : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] - // Does not use r14. - : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" + : "memory", "cc", YUVTORGB_REGS // Does not use r14. + "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); } @@ -1801,6 +1847,7 @@ void OMITFP NV21ToARGBRow_SSSE3(const uint8* y_buf, const struct YuvConstants* yuvconstants, int width) { asm volatile ( + YUVTORGB_SETUP(yuvconstants) "pcmpeqb %%xmm5,%%xmm5 \n" LABELALIGN "1: \n" @@ -1815,8 +1862,8 @@ void OMITFP NV21ToARGBRow_SSSE3(const uint8* y_buf, [width]"+rm"(width) // %[width] : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] [kShuffleNV21]"m"(kShuffleNV21) - // Does not use r14. - : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" + : "memory", "cc", YUVTORGB_REGS // Does not use r14. + "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); } @@ -1825,6 +1872,7 @@ void OMITFP YUY2ToARGBRow_SSSE3(const uint8* yuy2_buf, const struct YuvConstants* yuvconstants, int width) { asm volatile ( + YUVTORGB_SETUP(yuvconstants) "pcmpeqb %%xmm5,%%xmm5 \n" LABELALIGN "1: \n" @@ -1839,8 +1887,8 @@ void OMITFP YUY2ToARGBRow_SSSE3(const uint8* yuy2_buf, : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] [kShuffleYUY2Y]"m"(kShuffleYUY2Y), [kShuffleYUY2UV]"m"(kShuffleYUY2UV) - // Does not use r14. - : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" + : "memory", "cc", YUVTORGB_REGS // Does not use r14. + "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); } @@ -1849,6 +1897,7 @@ void OMITFP UYVYToARGBRow_SSSE3(const uint8* uyvy_buf, const struct YuvConstants* yuvconstants, int width) { asm volatile ( + YUVTORGB_SETUP(yuvconstants) "pcmpeqb %%xmm5,%%xmm5 \n" LABELALIGN "1: \n" @@ -1863,8 +1912,8 @@ void OMITFP UYVYToARGBRow_SSSE3(const uint8* uyvy_buf, : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] [kShuffleUYVYY]"m"(kShuffleUYVYY), [kShuffleUYVYUV]"m"(kShuffleUYVYUV) - // Does not use r14. - : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" + : "memory", "cc", YUVTORGB_REGS // Does not use r14. + "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); } @@ -1875,6 +1924,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf, const struct YuvConstants* yuvconstants, int width) { asm volatile ( + YUVTORGB_SETUP(yuvconstants) "sub %[u_buf],%[v_buf] \n" "pcmpeqb %%xmm5,%%xmm5 \n" LABELALIGN @@ -1890,7 +1940,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf, [dst_rgba]"+r"(dst_rgba), // %[dst_rgba] [width]"+rm"(width) // %[width] : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] - : "memory", "cc", NACL_R14 + : "memory", "cc", NACL_R14 YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); } @@ -1964,7 +2014,36 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf, "vpshufb %[kShuffleUYVYUV], %%ymm0, %%ymm0 \n" \ "lea " MEMLEA(0x20, [uyvy_buf]) ",%[uyvy_buf] \n" -// Convert 16 pixels: 16 UV and 16 Y. +#if defined(__x86_64__) +#define YUVTORGB_SETUP_AVX2(yuvconstants) \ + "vmovdqa " MEMACCESS([yuvconstants]) ",%%ymm8 \n" \ + "vmovdqa " MEMACCESS2(32, [yuvconstants]) ",%%ymm9 \n" \ + "vmovdqa " MEMACCESS2(64, [yuvconstants]) ",%%ymm10 \n" \ + "vmovdqa " MEMACCESS2(96, [yuvconstants]) ",%%ymm11 \n" \ + "vmovdqa " MEMACCESS2(128, [yuvconstants]) ",%%ymm12 \n" \ + "vmovdqa " MEMACCESS2(160, [yuvconstants]) ",%%ymm13 \n" \ + "vmovdqa " MEMACCESS2(192, [yuvconstants]) ",%%ymm14 \n" +#define YUVTORGB_AVX2(yuvconstants) \ + "vpmaddubsw %%ymm10,%%ymm0,%%ymm2 \n" \ + "vpmaddubsw %%ymm9,%%ymm0,%%ymm1 \n" \ + "vpmaddubsw %%ymm8,%%ymm0,%%ymm0 \n" \ + "vpsubw %%ymm2,%%ymm13,%%ymm2 \n" \ + "vpsubw %%ymm1,%%ymm12,%%ymm1 \n" \ + "vpsubw %%ymm0,%%ymm11,%%ymm0 \n" \ + "vpmulhuw %%ymm14,%%ymm4,%%ymm4 \n" \ + "vpaddsw %%ymm4,%%ymm0,%%ymm0 \n" \ + "vpaddsw %%ymm4,%%ymm1,%%ymm1 \n" \ + "vpaddsw %%ymm4,%%ymm2,%%ymm2 \n" \ + "vpsraw $0x6,%%ymm0,%%ymm0 \n" \ + "vpsraw $0x6,%%ymm1,%%ymm1 \n" \ + "vpsraw $0x6,%%ymm2,%%ymm2 \n" \ + "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" \ + "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n" \ + "vpackuswb %%ymm2,%%ymm2,%%ymm2 \n" +#define YUVTORGB_REGS_AVX2 \ + "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", +#else// Convert 16 pixels: 16 UV and 16 Y. +#define YUVTORGB_SETUP_AVX2(yuvconstants) #define YUVTORGB_AVX2(yuvconstants) \ "vpmaddubsw " MEMACCESS2(64, [yuvconstants]) ",%%ymm0,%%ymm2 \n" \ "vpmaddubsw " MEMACCESS2(32, [yuvconstants]) ",%%ymm0,%%ymm1 \n" \ @@ -1985,6 +2064,8 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf, "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" \ "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n" \ "vpackuswb %%ymm2,%%ymm2,%%ymm2 \n" +#define YUVTORGB_REGS_AVX2 +#endif // Store 16 ARGB values. #define STOREARGB_AVX2 \ @@ -2008,6 +2089,7 @@ void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf, const struct YuvConstants* yuvconstants, int width) { asm volatile ( + YUVTORGB_SETUP_AVX2(yuvconstants) "sub %[u_buf],%[v_buf] \n" "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" LABELALIGN @@ -2024,7 +2106,7 @@ void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf, [dst_argb]"+r"(dst_argb), // %[dst_argb] [width]"+rm"(width) // %[width] : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] - : "memory", "cc", NACL_R14 + : "memory", "cc", NACL_R14 YUVTORGB_REGS_AVX2 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); } @@ -2041,6 +2123,7 @@ void OMITFP I422AlphaToARGBRow_AVX2(const uint8* y_buf, const struct YuvConstants* yuvconstants, int width) { asm volatile ( + YUVTORGB_SETUP_AVX2(yuvconstants) "sub %[u_buf],%[v_buf] \n" LABELALIGN "1: \n" @@ -2061,7 +2144,7 @@ void OMITFP I422AlphaToARGBRow_AVX2(const uint8* y_buf, [width]"+rm"(width) // %[width] #endif : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] - : "memory", "cc", NACL_R14 + : "memory", "cc", NACL_R14 YUVTORGB_REGS_AVX2 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); } @@ -2077,6 +2160,7 @@ void OMITFP I422ToRGBARow_AVX2(const uint8* y_buf, const struct YuvConstants* yuvconstants, int width) { asm volatile ( + YUVTORGB_SETUP_AVX2(yuvconstants) "sub %[u_buf],%[v_buf] \n" "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" LABELALIGN @@ -2103,7 +2187,7 @@ void OMITFP I422ToRGBARow_AVX2(const uint8* y_buf, [dst_argb]"+r"(dst_argb), // %[dst_argb] [width]"+rm"(width) // %[width] : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] - : "memory", "cc", NACL_R14 + : "memory", "cc", NACL_R14 YUVTORGB_REGS_AVX2 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); } @@ -2118,6 +2202,7 @@ void OMITFP NV12ToARGBRow_AVX2(const uint8* y_buf, const struct YuvConstants* yuvconstants, int width) { asm volatile ( + YUVTORGB_SETUP_AVX2(yuvconstants) "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" LABELALIGN "1: \n" @@ -2132,8 +2217,8 @@ void OMITFP NV12ToARGBRow_AVX2(const uint8* y_buf, [dst_argb]"+r"(dst_argb), // %[dst_argb] [width]"+rm"(width) // %[width] : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] - // Does not use r14. - : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" + : "memory", "cc", YUVTORGB_REGS_AVX2 // Does not use r14. + "xmm0", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); } #endif // HAS_NV12TOARGBROW_AVX2 @@ -2147,6 +2232,7 @@ void OMITFP NV21ToARGBRow_AVX2(const uint8* y_buf, const struct YuvConstants* yuvconstants, int width) { asm volatile ( + YUVTORGB_SETUP_AVX2(yuvconstants) "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" LABELALIGN "1: \n" @@ -2162,8 +2248,8 @@ void OMITFP NV21ToARGBRow_AVX2(const uint8* y_buf, [width]"+rm"(width) // %[width] : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] [kShuffleNV21]"m"(kShuffleNV21) - // Does not use r14. - : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" + : "memory", "cc", YUVTORGB_REGS_AVX2 // Does not use r14. + "xmm0", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); } #endif // HAS_NV21TOARGBROW_AVX2 @@ -2176,6 +2262,7 @@ void OMITFP YUY2ToARGBRow_AVX2(const uint8* yuy2_buf, const struct YuvConstants* yuvconstants, int width) { asm volatile ( + YUVTORGB_SETUP_AVX2(yuvconstants) "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" LABELALIGN "1: \n" @@ -2191,8 +2278,8 @@ void OMITFP YUY2ToARGBRow_AVX2(const uint8* yuy2_buf, : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] [kShuffleYUY2Y]"m"(kShuffleYUY2Y), [kShuffleYUY2UV]"m"(kShuffleYUY2UV) - // Does not use r14. - : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" + : "memory", "cc", YUVTORGB_REGS_AVX2 // Does not use r14. + "xmm0", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); } #endif // HAS_YUY2TOARGBROW_AVX2 @@ -2205,6 +2292,7 @@ void OMITFP UYVYToARGBRow_AVX2(const uint8* uyvy_buf, const struct YuvConstants* yuvconstants, int width) { asm volatile ( + YUVTORGB_SETUP_AVX2(yuvconstants) "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" LABELALIGN "1: \n" @@ -2220,8 +2308,8 @@ void OMITFP UYVYToARGBRow_AVX2(const uint8* uyvy_buf, : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] [kShuffleUYVYY]"m"(kShuffleUYVYY), [kShuffleUYVYUV]"m"(kShuffleUYVYUV) - // Does not use r14. - : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" + : "memory", "cc", YUVTORGB_REGS_AVX2 // Does not use r14. + "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); } #endif // HAS_UYVYTOARGBROW_AVX2