diff --git a/README.chromium b/README.chromium index fd770ef02..94b23a349 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1575 +Version: 1576 License: BSD License File: LICENSE diff --git a/include/libyuv/row.h b/include/libyuv/row.h index 64190dd96..a5294647c 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -206,6 +206,7 @@ extern "C" { // https://code.google.com/p/libyuv/issues/detail?id=517 #define HAS_I422ALPHATOARGBROW_AVX2 #endif +#define HAS_I411TOARGBROW_AVX2 #define HAS_I422TOARGB1555ROW_AVX2 #define HAS_I422TOARGB4444ROW_AVX2 #define HAS_I422TOARGBROW_AVX2 @@ -248,7 +249,6 @@ extern "C" { #define HAS_ARGBTOARGB1555ROW_AVX2 #define HAS_ARGBTOARGB4444ROW_AVX2 #define HAS_ARGBTORGB565ROW_AVX2 -#define HAS_I411TOARGBROW_AVX2 #define HAS_J400TOARGBROW_AVX2 #define HAS_RGB565TOARGBROW_AVX2 #endif diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 464199310..1f7f524cf 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1575 +#define LIBYUV_VERSION 1576 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/row_gcc.cc b/source/row_gcc.cc index 1ff1dc2f6..6fab45076 100644 --- a/source/row_gcc.cc +++ b/source/row_gcc.cc @@ -1431,7 +1431,7 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0, int src_stride_rgba, #if defined(HAS_I422TOARGBROW_SSSE3) || defined(HAS_I422TOARGBROW_AVX2) -// Read 8 UV from 411 +// Read 8 UV from 444 #define READYUV444 \ "movq " MEMACCESS([u_buf]) ",%%xmm0 \n" \ MEMOPREG(movq, 0x00, [u_buf], [v_buf], 1, xmm1) \ @@ -1952,6 +1952,20 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf, "vpermq $0xd8,%%ymm5,%%ymm5 \n" \ "lea " MEMLEA(0x10, [a_buf]) ",%[a_buf] \n" +// Read 4 UV from 411, upsample to 16 UV. +#define READYUV411_AVX2 \ + "vmovd " MEMACCESS([u_buf]) ",%%xmm0 \n" \ + MEMOPREG(vmovd, 0x00, [u_buf], [v_buf], 1, xmm1) \ + "lea " MEMLEA(0x4, [u_buf]) ",%[u_buf] \n" \ + "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \ + "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \ + "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ + "vpunpckldq %%ymm0,%%ymm0,%%ymm0 \n" \ + "vmovdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \ + "vpermq $0xd8,%%ymm4,%%ymm4 \n" \ + "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \ + "lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n" + // Read 8 UV from NV12, upsample to 16 UV. #define READNV12_AVX2 \ "vmovdqu " MEMACCESS([uv_buf]) ",%%xmm0 \n" \ @@ -2067,7 +2081,7 @@ void OMITFP I444ToARGBRow_AVX2(const uint8* y_buf, asm volatile ( YUVTORGB_SETUP_AVX2(yuvconstants) "sub %[u_buf],%[v_buf] \n" - "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" + "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" LABELALIGN "1: \n" READYUV444_AVX2 @@ -2088,6 +2102,39 @@ void OMITFP I444ToARGBRow_AVX2(const uint8* y_buf, } #endif // HAS_I444TOARGBROW_AVX2 +#ifdef HAS_I411TOARGBROW_AVX2 +// 16 pixels +// 4 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). +void OMITFP I411ToARGBRow_AVX2(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* dst_argb, + const struct YuvConstants* yuvconstants, + int width) { + asm volatile ( + YUVTORGB_SETUP_AVX2(yuvconstants) + "sub %[u_buf],%[v_buf] \n" + "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" + LABELALIGN + "1: \n" + READYUV411_AVX2 + YUVTORGB_AVX2(yuvconstants) + STOREARGB_AVX2 + "sub $0x10,%[width] \n" + "jg 1b \n" + "vzeroupper \n" + : [y_buf]"+r"(y_buf), // %[y_buf] + [u_buf]"+r"(u_buf), // %[u_buf] + [v_buf]"+r"(v_buf), // %[v_buf] + [dst_argb]"+r"(dst_argb), // %[dst_argb] + [width]"+rm"(width) // %[width] + : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] + : "memory", "cc", NACL_R14 YUVTORGB_REGS_AVX2 + "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" + ); +} +#endif // HAS_I411TOARGBROW_AVX2 + #if defined(HAS_I422TOARGBROW_AVX2) // 16 pixels // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). @@ -2100,7 +2147,7 @@ void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf, asm volatile ( YUVTORGB_SETUP_AVX2(yuvconstants) "sub %[u_buf],%[v_buf] \n" - "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" + "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" LABELALIGN "1: \n" READYUV422_AVX2