DetilePlane_16 AVX version

- fix ifdefs for DetilePlane_16 to use 16 bit versions, not 8 bit.  (no functional change)

Bug: b/258474032
Change-Id: Ic07e02d9801e21126ebee0ceb5779aa712a493ce
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/4034812
Reviewed-by: richard winterton <rrwinterton@gmail.com>
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
Frank Barchard 2022-11-18 14:13:22 -08:00 committed by libyuv LUCI CQ
parent 8713ba3f0b
commit ea26d7adb1
4 changed files with 52 additions and 2 deletions

View File

@ -291,6 +291,7 @@ extern "C" {
#define HAS_CONVERT8TO16ROW_SSE2
#define HAS_DETILEROW_SSE2
#define HAS_DETILEROW_16_SSE2
#define HAS_DETILEROW_16_AVX
#define HAS_DETILESPLITUVROW_SSSE3
#define HAS_DETILETOYUY2_SSE2
#define HAS_HALFMERGEUVROW_SSSE3
@ -2030,6 +2031,14 @@ void DetileRow_Any_SSE2(const uint8_t* src,
ptrdiff_t src_tile_stride,
uint8_t* dst,
int width);
void DetileRow_AVX(const uint8_t* src,
ptrdiff_t src_tile_stride,
uint8_t* dst,
int width);
void DetileRow_Any_AVX(const uint8_t* src,
ptrdiff_t src_tile_stride,
uint8_t* dst,
int width);
void DetileRow_16_C(const uint16_t* src,
ptrdiff_t src_tile_stride,
uint16_t* dst,
@ -2050,6 +2059,14 @@ void DetileRow_16_Any_SSE2(const uint16_t* src,
ptrdiff_t src_tile_stride,
uint16_t* dst,
int width);
void DetileRow_16_AVX(const uint16_t* src,
ptrdiff_t src_tile_stride,
uint16_t* dst,
int width);
void DetileRow_16_Any_AVX(const uint16_t* src,
ptrdiff_t src_tile_stride,
uint16_t* dst,
int width);
void DetileSplitUVRow_C(const uint8_t* src_uv,
ptrdiff_t src_tile_stride,
uint8_t* dst_u,

View File

@ -1002,7 +1002,7 @@ int DetilePlane_16(const uint16_t* src_y,
dst_stride_y = -dst_stride_y;
}
#if defined(HAS_DETILEROW_SSE2)
#if defined(HAS_DETILEROW_16_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
DetileRow_16 = DetileRow_16_Any_SSE2;
if (IS_ALIGNED(width, 16)) {
@ -1010,7 +1010,15 @@ int DetilePlane_16(const uint16_t* src_y,
}
}
#endif
#if defined(HAS_DETILEROW_NEON)
#if defined(HAS_DETILEROW_16_AVX)
if (TestCpuFlag(kCpuHasAVX)) {
DetileRow_16 = DetileRow_16_Any_AVX;
if (IS_ALIGNED(width, 16)) {
DetileRow_16 = DetileRow_16_AVX;
}
}
#endif
#if defined(HAS_DETILEROW_16_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
DetileRow_16 = DetileRow_16_Any_NEON;
if (IS_ALIGNED(width, 16)) {

View File

@ -2268,6 +2268,9 @@ ANYDETILE(DetileRow_16_Any_NEON, DetileRow_16_NEON, uint16_t, 2, 15)
#ifdef HAS_DETILEROW_16_SSE2
ANYDETILE(DetileRow_16_Any_SSE2, DetileRow_16_SSE2, uint16_t, 2, 15)
#endif
#ifdef HAS_DETILEROW_16_AVX
ANYDETILE(DetileRow_16_Any_AVX, DetileRow_16_AVX, uint16_t, 2, 15)
#endif
#define ANYDETILESPLITUV(NAMEANY, ANY_SIMD, MASK) \
void NAMEANY(const uint8_t* src_uv, ptrdiff_t src_tile_stride, \

View File

@ -5051,6 +5051,28 @@ void DetileRow_16_SSE2(const uint16_t* src,
}
#endif // HAS_DETILEROW_SSE2
#ifdef HAS_DETILEROW_16_AVX
void DetileRow_16_AVX(const uint16_t* src,
ptrdiff_t src_tile_stride,
uint16_t* dst,
int width) {
asm volatile(
"1: \n"
"vmovdqu (%0),%%ymm0 \n"
"lea (%0,%3,2),%0 \n"
"vmovdqu %%ymm0,(%1) \n"
"lea 0x20(%1),%1 \n"
"sub $0x10,%2 \n"
"jg 1b \n"
"vzeroupper \n"
: "+r"(src), // %0
"+r"(dst), // %1
"+r"(width) // %2
: "r"(src_tile_stride) // %3
: "cc", "memory", "xmm0");
}
#endif // HAS_DETILEROW_AVX
#ifdef HAS_DETILETOYUY2_SSE2
// Read 16 Y, 8 UV, and write 8 YUYV.
void DetileToYUY2_SSE2(const uint8_t* src_y,