Port I411ToARGBRow to AVX2.

An SSSE3 version already exists, and an AVX2 version is available for
Visual C.  This ports the function to AVX2 completing the AVX2 ports of
all YUV to RGB functions for AVX2 on gcc.

TBR=harryjin@google.com
BUG=libyuv:555

Review URL: https://codereview.chromium.org/1687253002 .
This commit is contained in:
Frank Barchard 2016-02-12 10:26:10 -08:00
parent 0e554b18fe
commit cc33dc68c7
4 changed files with 53 additions and 6 deletions

View File

@ -1,6 +1,6 @@
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 1575
Version: 1576
License: BSD
License File: LICENSE

View File

@ -206,6 +206,7 @@ extern "C" {
// https://code.google.com/p/libyuv/issues/detail?id=517
#define HAS_I422ALPHATOARGBROW_AVX2
#endif
#define HAS_I411TOARGBROW_AVX2
#define HAS_I422TOARGB1555ROW_AVX2
#define HAS_I422TOARGB4444ROW_AVX2
#define HAS_I422TOARGBROW_AVX2
@ -248,7 +249,6 @@ extern "C" {
#define HAS_ARGBTOARGB1555ROW_AVX2
#define HAS_ARGBTOARGB4444ROW_AVX2
#define HAS_ARGBTORGB565ROW_AVX2
#define HAS_I411TOARGBROW_AVX2
#define HAS_J400TOARGBROW_AVX2
#define HAS_RGB565TOARGBROW_AVX2
#endif

View File

@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1575
#define LIBYUV_VERSION 1576
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT

View File

@ -1431,7 +1431,7 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0, int src_stride_rgba,
#if defined(HAS_I422TOARGBROW_SSSE3) || defined(HAS_I422TOARGBROW_AVX2)
// Read 8 UV from 411
// Read 8 UV from 444
#define READYUV444 \
"movq " MEMACCESS([u_buf]) ",%%xmm0 \n" \
MEMOPREG(movq, 0x00, [u_buf], [v_buf], 1, xmm1) \
@ -1952,6 +1952,20 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf,
"vpermq $0xd8,%%ymm5,%%ymm5 \n" \
"lea " MEMLEA(0x10, [a_buf]) ",%[a_buf] \n"
// Read 4 UV from 411, upsample to 16 UV.
#define READYUV411_AVX2 \
"vmovd " MEMACCESS([u_buf]) ",%%xmm0 \n" \
MEMOPREG(vmovd, 0x00, [u_buf], [v_buf], 1, xmm1) \
"lea " MEMLEA(0x4, [u_buf]) ",%[u_buf] \n" \
"vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \
"vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \
"vpermq $0xd8,%%ymm0,%%ymm0 \n" \
"vpunpckldq %%ymm0,%%ymm0,%%ymm0 \n" \
"vmovdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \
"vpermq $0xd8,%%ymm4,%%ymm4 \n" \
"vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \
"lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n"
// Read 8 UV from NV12, upsample to 16 UV.
#define READNV12_AVX2 \
"vmovdqu " MEMACCESS([uv_buf]) ",%%xmm0 \n" \
@ -2067,7 +2081,7 @@ void OMITFP I444ToARGBRow_AVX2(const uint8* y_buf,
asm volatile (
YUVTORGB_SETUP_AVX2(yuvconstants)
"sub %[u_buf],%[v_buf] \n"
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
LABELALIGN
"1: \n"
READYUV444_AVX2
@ -2088,6 +2102,39 @@ void OMITFP I444ToARGBRow_AVX2(const uint8* y_buf,
}
#endif // HAS_I444TOARGBROW_AVX2
#ifdef HAS_I411TOARGBROW_AVX2
// 16 pixels
// 4 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
void OMITFP I411ToARGBRow_AVX2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
YUVTORGB_SETUP_AVX2(yuvconstants)
"sub %[u_buf],%[v_buf] \n"
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
LABELALIGN
"1: \n"
READYUV411_AVX2
YUVTORGB_AVX2(yuvconstants)
STOREARGB_AVX2
"sub $0x10,%[width] \n"
"jg 1b \n"
"vzeroupper \n"
: [y_buf]"+r"(y_buf), // %[y_buf]
[u_buf]"+r"(u_buf), // %[u_buf]
[v_buf]"+r"(v_buf), // %[v_buf]
[dst_argb]"+r"(dst_argb), // %[dst_argb]
[width]"+rm"(width) // %[width]
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
: "memory", "cc", NACL_R14 YUVTORGB_REGS_AVX2
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
}
#endif // HAS_I411TOARGBROW_AVX2
#if defined(HAS_I422TOARGBROW_AVX2)
// 16 pixels
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
@ -2100,7 +2147,7 @@ void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf,
asm volatile (
YUVTORGB_SETUP_AVX2(yuvconstants)
"sub %[u_buf],%[v_buf] \n"
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
LABELALIGN
"1: \n"
READYUV422_AVX2