mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 01:06:46 +08:00
port I444ToARGB avx2 code from Visual C to GCC.
SSSE3 Note: Google Test filter = *I444ToARGB* [==========] Running 8 tests from 1 test case. [----------] Global test environment set-up. [----------] 8 tests from LibYUVConvertTest [ RUN ] LibYUVConvertTest.I444ToARGB_Any [ OK ] LibYUVConvertTest.I444ToARGB_Any (435 ms) [ RUN ] LibYUVConvertTest.I444ToARGB_Unaligned [ OK ] LibYUVConvertTest.I444ToARGB_Unaligned (418 ms) [ RUN ] LibYUVConvertTest.I444ToARGB_Invert [ OK ] LibYUVConvertTest.I444ToARGB_Invert (417 ms) [ RUN ] LibYUVConvertTest.I444ToARGB_Opt [ OK ] LibYUVConvertTest.I444ToARGB_Opt (411 ms) [ RUN ] LibYUVConvertTest.I444ToARGB_ARGB_Any [ OK ] LibYUVConvertTest.I444ToARGB_ARGB_Any (419 ms) [ RUN ] LibYUVConvertTest.I444ToARGB_ARGB_Unaligned [ OK ] LibYUVConvertTest.I444ToARGB_ARGB_Unaligned (432 ms) [ RUN ] LibYUVConvertTest.I444ToARGB_ARGB_Invert [ OK ] LibYUVConvertTest.I444ToARGB_ARGB_Invert (435 ms) [ RUN ] LibYUVConvertTest.I444ToARGB_ARGB_Opt [ OK ] LibYUVConvertTest.I444ToARGB_ARGB_Opt (421 ms) [----------] 8 tests from LibYUVConvertTest (3389 ms total) AVX2 Note: Google Test filter = *I444ToARGB* [==========] Running 8 tests from 1 test case. [----------] Global test environment set-up. [----------] 8 tests from LibYUVConvertTest [ RUN ] LibYUVConvertTest.I444ToARGB_Any [ OK ] LibYUVConvertTest.I444ToARGB_Any (340 ms) [ RUN ] LibYUVConvertTest.I444ToARGB_Unaligned [ OK ] LibYUVConvertTest.I444ToARGB_Unaligned (325 ms) [ RUN ] LibYUVConvertTest.I444ToARGB_Invert [ OK ] LibYUVConvertTest.I444ToARGB_Invert (316 ms) [ RUN ] LibYUVConvertTest.I444ToARGB_Opt [ OK ] LibYUVConvertTest.I444ToARGB_Opt (316 ms) [ RUN ] LibYUVConvertTest.I444ToARGB_ARGB_Any [ OK ] LibYUVConvertTest.I444ToARGB_ARGB_Any (315 ms) [ RUN ] LibYUVConvertTest.I444ToARGB_ARGB_Unaligned [ OK ] LibYUVConvertTest.I444ToARGB_ARGB_Unaligned (341 ms) [ RUN ] LibYUVConvertTest.I444ToARGB_ARGB_Invert [ OK ] LibYUVConvertTest.I444ToARGB_ARGB_Invert (331 ms) [ RUN ] LibYUVConvertTest.I444ToARGB_ARGB_Opt [ OK ] LibYUVConvertTest.I444ToARGB_ARGB_Opt (329 ms) [----------] 8 tests from LibYUVConvertTest (2615 ms total) TBR=harryjin@google.com BUG=libyuv:492 Review URL: https://codereview.chromium.org/1445893002 .
This commit is contained in:
parent
60adcbaf32
commit
1019e4537f
@ -202,6 +202,7 @@ extern "C" {
|
|||||||
// https://code.google.com/p/libyuv/issues/detail?id=517
|
// https://code.google.com/p/libyuv/issues/detail?id=517
|
||||||
#define HAS_I422ALPHATOARGBROW_AVX2
|
#define HAS_I422ALPHATOARGBROW_AVX2
|
||||||
#endif
|
#endif
|
||||||
|
#define HAS_I444TOARGBROW_AVX2
|
||||||
#define HAS_I422TOARGBROW_AVX2
|
#define HAS_I422TOARGBROW_AVX2
|
||||||
#define HAS_I422TORGB24ROW_AVX2
|
#define HAS_I422TORGB24ROW_AVX2
|
||||||
#define HAS_I422TORGBAROW_AVX2
|
#define HAS_I422TORGBAROW_AVX2
|
||||||
@ -242,7 +243,6 @@ extern "C" {
|
|||||||
#define HAS_I422TOARGB1555ROW_AVX2
|
#define HAS_I422TOARGB1555ROW_AVX2
|
||||||
#define HAS_I422TOARGB4444ROW_AVX2
|
#define HAS_I422TOARGB4444ROW_AVX2
|
||||||
#define HAS_I422TORGB565ROW_AVX2
|
#define HAS_I422TORGB565ROW_AVX2
|
||||||
#define HAS_I444TOARGBROW_AVX2
|
|
||||||
#define HAS_J400TOARGBROW_AVX2
|
#define HAS_J400TOARGBROW_AVX2
|
||||||
#define HAS_NV12TORGB565ROW_AVX2
|
#define HAS_NV12TORGB565ROW_AVX2
|
||||||
#define HAS_RGB565TOARGBROW_AVX2
|
#define HAS_RGB565TOARGBROW_AVX2
|
||||||
|
|||||||
@ -1947,6 +1947,19 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf,
|
|||||||
|
|
||||||
#endif // HAS_I422TOARGBROW_SSSE3
|
#endif // HAS_I422TOARGBROW_SSSE3
|
||||||
|
|
||||||
|
// Read 16 UV from 444
|
||||||
|
#define READYUV444_AVX2 \
|
||||||
|
"vmovdqu " MEMACCESS([u_buf]) ",%%xmm0 \n" \
|
||||||
|
MEMOPREG(vmovdqu, 0x00, [u_buf], [v_buf], 1, xmm1) \
|
||||||
|
"lea " MEMLEA(0x10, [u_buf]) ",%[u_buf] \n" \
|
||||||
|
"vpermq $0xd8,%%ymm0,%%ymm0 \n" \
|
||||||
|
"vpermq $0xd8,%%ymm1,%%ymm1 \n" \
|
||||||
|
"vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \
|
||||||
|
"vmovdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \
|
||||||
|
"vpermq $0xd8,%%ymm4,%%ymm4 \n" \
|
||||||
|
"vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \
|
||||||
|
"lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n"
|
||||||
|
|
||||||
// Read 8 UV from 422, upsample to 16 UV.
|
// Read 8 UV from 422, upsample to 16 UV.
|
||||||
#define READYUV422_AVX2 \
|
#define READYUV422_AVX2 \
|
||||||
"vmovq " MEMACCESS([u_buf]) ",%%xmm0 \n" \
|
"vmovq " MEMACCESS([u_buf]) ",%%xmm0 \n" \
|
||||||
@ -2079,6 +2092,39 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf,
|
|||||||
"vmovdqu %%ymm0," MEMACCESS2(0x20, [dst_argb]) " \n" \
|
"vmovdqu %%ymm0," MEMACCESS2(0x20, [dst_argb]) " \n" \
|
||||||
"lea " MEMLEA(0x40, [dst_argb]) ", %[dst_argb] \n"
|
"lea " MEMLEA(0x40, [dst_argb]) ", %[dst_argb] \n"
|
||||||
|
|
||||||
|
#ifdef HAS_I444TOARGBROW_AVX2
|
||||||
|
// 16 pixels
|
||||||
|
// 16 UV values with 16 Y producing 16 ARGB (64 bytes).
|
||||||
|
void OMITFP I444ToARGBRow_AVX2(const uint8* y_buf,
|
||||||
|
const uint8* u_buf,
|
||||||
|
const uint8* v_buf,
|
||||||
|
uint8* dst_argb,
|
||||||
|
const struct YuvConstants* yuvconstants,
|
||||||
|
int width) {
|
||||||
|
asm volatile (
|
||||||
|
YUVTORGB_SETUP_AVX2(yuvconstants)
|
||||||
|
"sub %[u_buf],%[v_buf] \n"
|
||||||
|
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
|
||||||
|
LABELALIGN
|
||||||
|
"1: \n"
|
||||||
|
READYUV444_AVX2
|
||||||
|
YUVTORGB_AVX2(yuvconstants)
|
||||||
|
STOREARGB_AVX2
|
||||||
|
"sub $0x10,%[width] \n"
|
||||||
|
"jg 1b \n"
|
||||||
|
"vzeroupper \n"
|
||||||
|
: [y_buf]"+r"(y_buf), // %[y_buf]
|
||||||
|
[u_buf]"+r"(u_buf), // %[u_buf]
|
||||||
|
[v_buf]"+r"(v_buf), // %[v_buf]
|
||||||
|
[dst_argb]"+r"(dst_argb), // %[dst_argb]
|
||||||
|
[width]"+rm"(width) // %[width]
|
||||||
|
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
|
||||||
|
: "memory", "cc", NACL_R14 YUVTORGB_REGS_AVX2
|
||||||
|
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
#endif // HAS_I444TOARGBROW_AVX2
|
||||||
|
|
||||||
#if defined(HAS_I422TOARGBROW_AVX2)
|
#if defined(HAS_I422TOARGBROW_AVX2)
|
||||||
// 16 pixels
|
// 16 pixels
|
||||||
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
|
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user