mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 17:26:49 +08:00
SwapUV AVX2 and SSSE3
Based on ARGBShuffle but with count adjusted and new shuffle mask BUG=libyuv:809 Change-Id: Idd936ee6bedcf285607a68c2fc54d876b4becc01 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/1711882 Reviewed-by: richard winterton <rrwinterton@gmail.com> Commit-Queue: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
parent
22ae4bfa05
commit
fec9121b67
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 1733
|
||||
Version: 1734
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -275,6 +275,7 @@ extern "C" {
|
||||
#define HAS_I422TOAR30ROW_SSSE3
|
||||
#define HAS_MERGERGBROW_SSSE3
|
||||
#define HAS_SPLITRGBROW_SSSE3
|
||||
#define HAS_SWAPUVROW_SSSE3
|
||||
#endif
|
||||
|
||||
// The following are available for AVX2 gcc/clang x86 platforms:
|
||||
@ -295,6 +296,7 @@ extern "C" {
|
||||
#define HAS_I422TOYUY2ROW_AVX2
|
||||
#define HAS_MERGEUVROW_16_AVX2
|
||||
#define HAS_MULTIPLYROW_16_AVX2
|
||||
#define HAS_SWAPUVROW_AVX2
|
||||
// TODO(fbarchard): Fix AVX2 version of YUV24
|
||||
// #define HAS_NV21TOYUV24ROW_AVX2
|
||||
#endif
|
||||
@ -3374,6 +3376,10 @@ void UYVYToUV422Row_Any_MMI(const uint8_t* src_ptr,
|
||||
void SwapUVRow_C(const uint8_t* src_uv, uint8_t* dst_vu, int width);
|
||||
void SwapUVRow_NEON(const uint8_t* src_uv, uint8_t* dst_vu, int width);
|
||||
void SwapUVRow_Any_NEON(const uint8_t* src_uv, uint8_t* dst_vu, int width);
|
||||
void SwapUVRow_SSSE3(const uint8_t* src_uv, uint8_t* dst_vu, int width);
|
||||
void SwapUVRow_Any_SSSE3(const uint8_t* src_uv, uint8_t* dst_vu, int width);
|
||||
void SwapUVRow_AVX2(const uint8_t* src_uv, uint8_t* dst_vu, int width);
|
||||
void SwapUVRow_Any_AVX2(const uint8_t* src_uv, uint8_t* dst_vu, int width);
|
||||
void AYUVToYRow_C(const uint8_t* src_ayuv, uint8_t* dst_y, int width);
|
||||
void AYUVToUVRow_C(const uint8_t* src_ayuv,
|
||||
int stride_ayuv,
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 1733
|
||||
#define LIBYUV_VERSION 1734
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
@ -527,6 +527,22 @@ void SwapUVPlane(const uint8_t* src_uv,
|
||||
src_stride_uv = dst_stride_vu = 0;
|
||||
}
|
||||
|
||||
#if defined(HAS_SWAPUVROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
SwapUVRow = SwapUVRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
SwapUVRow = SwapUVRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SWAPUVROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
SwapUVRow = SwapUVRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
SwapUVRow = SwapUVRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SWAPUVROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
SwapUVRow = SwapUVRow_Any_NEON;
|
||||
|
||||
@ -710,6 +710,12 @@ ANY11(UYVYToYRow_Any_MMI, UYVYToYRow_MMI, 1, 4, 1, 15)
|
||||
#ifdef HAS_AYUVTOYROW_NEON
|
||||
ANY11(AYUVToYRow_Any_NEON, AYUVToYRow_NEON, 0, 4, 1, 15)
|
||||
#endif
|
||||
#ifdef HAS_SWAPUVROW_SSSE3
|
||||
ANY11(SwapUVRow_Any_SSSE3, SwapUVRow_SSSE3, 0, 2, 2, 15)
|
||||
#endif
|
||||
#ifdef HAS_SWAPUVROW_AVX2
|
||||
ANY11(SwapUVRow_Any_AVX2, SwapUVRow_AVX2, 0, 2, 2, 31)
|
||||
#endif
|
||||
#ifdef HAS_SWAPUVROW_NEON
|
||||
ANY11(SwapUVRow_Any_NEON, SwapUVRow_NEON, 0, 2, 2, 15)
|
||||
#endif
|
||||
|
||||
@ -6790,6 +6790,68 @@ void NV21ToYUV24Row_AVX2(const uint8_t* src_y,
|
||||
}
|
||||
#endif // HAS_NV21TOYUV24ROW_AVX2
|
||||
|
||||
#ifdef HAS_SWAPUVROW_SSSE3
|
||||
|
||||
// Shuffle table for reversing the bytes.
|
||||
static const uvec8 kShuffleUVToVU = {1u, 0u, 3u, 2u, 5u, 4u, 7u, 6u,
|
||||
9u, 8u, 11u, 10u, 13u, 12u, 15u, 14u};
|
||||
|
||||
void SwapUVRow_SSSE3(const uint8_t* src_uv,
|
||||
uint8_t* dst_vu,
|
||||
int width) {
|
||||
asm volatile(
|
||||
|
||||
"movdqu %3,%%xmm5 \n"
|
||||
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movdqu (%0),%%xmm0 \n"
|
||||
"movdqu 0x10(%0),%%xmm1 \n"
|
||||
"lea 0x20(%0),%0 \n"
|
||||
"pshufb %%xmm5,%%xmm0 \n"
|
||||
"pshufb %%xmm5,%%xmm1 \n"
|
||||
"movdqu %%xmm0,(%1) \n"
|
||||
"movdqu %%xmm1,0x10(%1) \n"
|
||||
"lea 0x20(%1),%1 \n"
|
||||
"sub $0x10,%2 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src_uv), // %0
|
||||
"+r"(dst_vu), // %1
|
||||
"+r"(width) // %2
|
||||
: "m"(kShuffleUVToVU) // %3
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm5");
|
||||
}
|
||||
#endif // HAS_SWAPUVROW_SSSE3
|
||||
|
||||
#ifdef HAS_SWAPUVROW_AVX2
|
||||
void SwapUVRow_AVX2(const uint8_t* src_uv,
|
||||
uint8_t* dst_vu,
|
||||
int width) {
|
||||
asm volatile(
|
||||
|
||||
"vbroadcastf128 %3,%%ymm5 \n"
|
||||
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"vmovdqu (%0),%%ymm0 \n"
|
||||
"vmovdqu 0x20(%0),%%ymm1 \n"
|
||||
"lea 0x40(%0),%0 \n"
|
||||
"vpshufb %%ymm5,%%ymm0,%%ymm0 \n"
|
||||
"vpshufb %%ymm5,%%ymm1,%%ymm1 \n"
|
||||
"vmovdqu %%ymm0,(%1) \n"
|
||||
"vmovdqu %%ymm1,0x20(%1) \n"
|
||||
"lea 0x40(%1),%1 \n"
|
||||
"sub $0x20,%2 \n"
|
||||
"jg 1b \n"
|
||||
"vzeroupper \n"
|
||||
: "+r"(src_uv), // %0
|
||||
"+r"(dst_vu), // %1
|
||||
"+r"(width) // %2
|
||||
: "m"(kShuffleUVToVU) // %3
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm5");
|
||||
}
|
||||
#endif // HAS_SWAPUVROW_AVX2
|
||||
|
||||
#endif // defined(__x86_64__) || defined(__i386__)
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
@ -994,6 +994,7 @@ TESTATOPLANAR(I400, 1, 1, I420, 2, 2, 2)
|
||||
TESTATOPLANAR(J400, 1, 1, J420, 2, 2, 2)
|
||||
TESTATOPLANAR(RAW, 3, 1, I420, 2, 2, 4)
|
||||
TESTATOPLANAR(RGB24, 3, 1, I420, 2, 2, 4)
|
||||
// TODO(fbarchard): Investigate high error on Win32.
|
||||
TESTATOPLANAR(RGB24, 3, 1, J420, 2, 2, 10)
|
||||
TESTATOPLANAR(RGB565, 2, 1, I420, 2, 2, 5)
|
||||
TESTATOPLANAR(RGBA, 4, 1, I420, 2, 2, 4)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user