mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 09:16:48 +08:00
Port I411ToARGBRow to AVX2.
An SSSE3 version already exists, and an AVX2 version is available for Visual C. This ports the function to AVX2 completing the AVX2 ports of all YUV to RGB functions for AVX2 on gcc. TBR=harryjin@google.com BUG=libyuv:555 Review URL: https://codereview.chromium.org/1687253002 .
This commit is contained in:
parent
0e554b18fe
commit
cc33dc68c7
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 1575
|
||||
Version: 1576
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -206,6 +206,7 @@ extern "C" {
|
||||
// https://code.google.com/p/libyuv/issues/detail?id=517
|
||||
#define HAS_I422ALPHATOARGBROW_AVX2
|
||||
#endif
|
||||
#define HAS_I411TOARGBROW_AVX2
|
||||
#define HAS_I422TOARGB1555ROW_AVX2
|
||||
#define HAS_I422TOARGB4444ROW_AVX2
|
||||
#define HAS_I422TOARGBROW_AVX2
|
||||
@ -248,7 +249,6 @@ extern "C" {
|
||||
#define HAS_ARGBTOARGB1555ROW_AVX2
|
||||
#define HAS_ARGBTOARGB4444ROW_AVX2
|
||||
#define HAS_ARGBTORGB565ROW_AVX2
|
||||
#define HAS_I411TOARGBROW_AVX2
|
||||
#define HAS_J400TOARGBROW_AVX2
|
||||
#define HAS_RGB565TOARGBROW_AVX2
|
||||
#endif
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 1575
|
||||
#define LIBYUV_VERSION 1576
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||
|
||||
@ -1431,7 +1431,7 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0, int src_stride_rgba,
|
||||
|
||||
#if defined(HAS_I422TOARGBROW_SSSE3) || defined(HAS_I422TOARGBROW_AVX2)
|
||||
|
||||
// Read 8 UV from 411
|
||||
// Read 8 UV from 444
|
||||
#define READYUV444 \
|
||||
"movq " MEMACCESS([u_buf]) ",%%xmm0 \n" \
|
||||
MEMOPREG(movq, 0x00, [u_buf], [v_buf], 1, xmm1) \
|
||||
@ -1952,6 +1952,20 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf,
|
||||
"vpermq $0xd8,%%ymm5,%%ymm5 \n" \
|
||||
"lea " MEMLEA(0x10, [a_buf]) ",%[a_buf] \n"
|
||||
|
||||
// Read 4 UV from 411, upsample to 16 UV.
|
||||
#define READYUV411_AVX2 \
|
||||
"vmovd " MEMACCESS([u_buf]) ",%%xmm0 \n" \
|
||||
MEMOPREG(vmovd, 0x00, [u_buf], [v_buf], 1, xmm1) \
|
||||
"lea " MEMLEA(0x4, [u_buf]) ",%[u_buf] \n" \
|
||||
"vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \
|
||||
"vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \
|
||||
"vpermq $0xd8,%%ymm0,%%ymm0 \n" \
|
||||
"vpunpckldq %%ymm0,%%ymm0,%%ymm0 \n" \
|
||||
"vmovdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \
|
||||
"vpermq $0xd8,%%ymm4,%%ymm4 \n" \
|
||||
"vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \
|
||||
"lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n"
|
||||
|
||||
// Read 8 UV from NV12, upsample to 16 UV.
|
||||
#define READNV12_AVX2 \
|
||||
"vmovdqu " MEMACCESS([uv_buf]) ",%%xmm0 \n" \
|
||||
@ -2067,7 +2081,7 @@ void OMITFP I444ToARGBRow_AVX2(const uint8* y_buf,
|
||||
asm volatile (
|
||||
YUVTORGB_SETUP_AVX2(yuvconstants)
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
|
||||
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
READYUV444_AVX2
|
||||
@ -2088,6 +2102,39 @@ void OMITFP I444ToARGBRow_AVX2(const uint8* y_buf,
|
||||
}
|
||||
#endif // HAS_I444TOARGBROW_AVX2
|
||||
|
||||
#ifdef HAS_I411TOARGBROW_AVX2
|
||||
// 16 pixels
|
||||
// 4 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
|
||||
void OMITFP I411ToARGBRow_AVX2(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
YUVTORGB_SETUP_AVX2(yuvconstants)
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
READYUV411_AVX2
|
||||
YUVTORGB_AVX2(yuvconstants)
|
||||
STOREARGB_AVX2
|
||||
"sub $0x10,%[width] \n"
|
||||
"jg 1b \n"
|
||||
"vzeroupper \n"
|
||||
: [y_buf]"+r"(y_buf), // %[y_buf]
|
||||
[u_buf]"+r"(u_buf), // %[u_buf]
|
||||
[v_buf]"+r"(v_buf), // %[v_buf]
|
||||
[dst_argb]"+r"(dst_argb), // %[dst_argb]
|
||||
[width]"+rm"(width) // %[width]
|
||||
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
|
||||
: "memory", "cc", NACL_R14 YUVTORGB_REGS_AVX2
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
|
||||
);
|
||||
}
|
||||
#endif // HAS_I411TOARGBROW_AVX2
|
||||
|
||||
#if defined(HAS_I422TOARGBROW_AVX2)
|
||||
// 16 pixels
|
||||
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
|
||||
@ -2100,7 +2147,7 @@ void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf,
|
||||
asm volatile (
|
||||
YUVTORGB_SETUP_AVX2(yuvconstants)
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
|
||||
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
READYUV422_AVX2
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user