From cc33dc68c7ce7e20e820b5519989ea18a73f5113 Mon Sep 17 00:00:00 2001 From: Frank Barchard Date: Fri, 12 Feb 2016 10:26:10 -0800 Subject: [PATCH] Port I411ToARGBRow to AVX2. An SSSE3 version already exists, and an AVX2 version is available for Visual C. This ports the function to AVX2 completing the AVX2 ports of all YUV to RGB functions for AVX2 on gcc. TBR=harryjin@google.com BUG=libyuv:555 Review URL: https://codereview.chromium.org/1687253002 . --- README.chromium | 2 +- include/libyuv/row.h | 2 +- include/libyuv/version.h | 2 +- source/row_gcc.cc | 53 +++++++++++++++++++++++++++++++++++++--- 4 files changed, 53 insertions(+), 6 deletions(-) diff --git a/README.chromium b/README.chromium index fd770ef02..94b23a349 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1575 +Version: 1576 License: BSD License File: LICENSE diff --git a/include/libyuv/row.h b/include/libyuv/row.h index 64190dd96..a5294647c 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -206,6 +206,7 @@ extern "C" { // https://code.google.com/p/libyuv/issues/detail?id=517 #define HAS_I422ALPHATOARGBROW_AVX2 #endif +#define HAS_I411TOARGBROW_AVX2 #define HAS_I422TOARGB1555ROW_AVX2 #define HAS_I422TOARGB4444ROW_AVX2 #define HAS_I422TOARGBROW_AVX2 @@ -248,7 +249,6 @@ extern "C" { #define HAS_ARGBTOARGB1555ROW_AVX2 #define HAS_ARGBTOARGB4444ROW_AVX2 #define HAS_ARGBTORGB565ROW_AVX2 -#define HAS_I411TOARGBROW_AVX2 #define HAS_J400TOARGBROW_AVX2 #define HAS_RGB565TOARGBROW_AVX2 #endif diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 464199310..1f7f524cf 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1575 +#define LIBYUV_VERSION 1576 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/row_gcc.cc b/source/row_gcc.cc index 1ff1dc2f6..6fab45076 100644 --- a/source/row_gcc.cc +++ b/source/row_gcc.cc @@ -1431,7 +1431,7 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0, int src_stride_rgba, #if defined(HAS_I422TOARGBROW_SSSE3) || defined(HAS_I422TOARGBROW_AVX2) -// Read 8 UV from 411 +// Read 8 UV from 444 #define READYUV444 \ "movq " MEMACCESS([u_buf]) ",%%xmm0 \n" \ MEMOPREG(movq, 0x00, [u_buf], [v_buf], 1, xmm1) \ @@ -1952,6 +1952,20 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf, "vpermq $0xd8,%%ymm5,%%ymm5 \n" \ "lea " MEMLEA(0x10, [a_buf]) ",%[a_buf] \n" +// Read 4 UV from 411, upsample to 16 UV. +#define READYUV411_AVX2 \ + "vmovd " MEMACCESS([u_buf]) ",%%xmm0 \n" \ + MEMOPREG(vmovd, 0x00, [u_buf], [v_buf], 1, xmm1) \ + "lea " MEMLEA(0x4, [u_buf]) ",%[u_buf] \n" \ + "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \ + "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \ + "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ + "vpunpckldq %%ymm0,%%ymm0,%%ymm0 \n" \ + "vmovdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \ + "vpermq $0xd8,%%ymm4,%%ymm4 \n" \ + "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \ + "lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n" + // Read 8 UV from NV12, upsample to 16 UV. #define READNV12_AVX2 \ "vmovdqu " MEMACCESS([uv_buf]) ",%%xmm0 \n" \ @@ -2067,7 +2081,7 @@ void OMITFP I444ToARGBRow_AVX2(const uint8* y_buf, asm volatile ( YUVTORGB_SETUP_AVX2(yuvconstants) "sub %[u_buf],%[v_buf] \n" - "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" + "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" LABELALIGN "1: \n" READYUV444_AVX2 @@ -2088,6 +2102,39 @@ void OMITFP I444ToARGBRow_AVX2(const uint8* y_buf, } #endif // HAS_I444TOARGBROW_AVX2 +#ifdef HAS_I411TOARGBROW_AVX2 +// 16 pixels +// 4 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). +void OMITFP I411ToARGBRow_AVX2(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* dst_argb, + const struct YuvConstants* yuvconstants, + int width) { + asm volatile ( + YUVTORGB_SETUP_AVX2(yuvconstants) + "sub %[u_buf],%[v_buf] \n" + "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" + LABELALIGN + "1: \n" + READYUV411_AVX2 + YUVTORGB_AVX2(yuvconstants) + STOREARGB_AVX2 + "sub $0x10,%[width] \n" + "jg 1b \n" + "vzeroupper \n" + : [y_buf]"+r"(y_buf), // %[y_buf] + [u_buf]"+r"(u_buf), // %[u_buf] + [v_buf]"+r"(v_buf), // %[v_buf] + [dst_argb]"+r"(dst_argb), // %[dst_argb] + [width]"+rm"(width) // %[width] + : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] + : "memory", "cc", NACL_R14 YUVTORGB_REGS_AVX2 + "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" + ); +} +#endif // HAS_I411TOARGBROW_AVX2 + #if defined(HAS_I422TOARGBROW_AVX2) // 16 pixels // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). @@ -2100,7 +2147,7 @@ void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf, asm volatile ( YUVTORGB_SETUP_AVX2(yuvconstants) "sub %[u_buf],%[v_buf] \n" - "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" + "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" LABELALIGN "1: \n" READYUV422_AVX2