Mirrow AVX2 functions for Visual C

Bug: libyuv:42280902
Change-Id: Iabbec9af3a4f4dd89294e60145823c7fc4dd6ec6
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/7843378
Commit-Queue: Frank Barchard <fbarchard@google.com>
Reviewed-by: richard winterton <rrwinterton@gmail.com>
This commit is contained in:
Frank Barchard 2026-05-15 13:52:46 -07:00
parent 0f320a03f7
commit cda55fcf53
14 changed files with 332 additions and 171 deletions

View File

@ -10,7 +10,7 @@ Libyuv uses a dispatch system where high-level conversion functions call optimiz
### x86 Architectures (32-bit and 64-bit)
* **row_gcc.cc**: **Master copy.** Contains inline assembly in GCC syntax for GCC and Clang. Supports SSE, AVX, and AVX512. AVX512 implementations are strictly for 64-bit targets.
* **row_gcc.cc**: **Master copy.** Contains inline assembly in GCC syntax for GCC and Clang. Supports AVX, and AVX512. AVX512 implementations are strictly for 64-bit targets.
* **row_win.cc**: Derivative of `row_gcc.cc`. Contains C++ intrinsics specifically for Visual C++ (MSVC). Can be tested with Clang using `-DLIBYUV_ENABLE_ROWWIN`.
* **Note**: Use either `row_gcc` or `row_win`, never both.
@ -33,10 +33,8 @@ Libyuv uses a dispatch system where high-level conversion functions call optimiz
## Coding Guidelines
1. **Maintain Parity**: When modifying `row_common.cc`, update corresponding optimizations. `row_gcc.cc` is the master; `row_win.cc` should be updated to match.
2. **Assembly Safety**: Ensure inline assembly in `row_gcc.cc` correctly preserves registers according to the ABI.
3. **AVX512 Logic**: AVX512 row functions are strictly enabled for **64-bit x86 only**.
4. **Feature Macros**: Use the `HAS_` macros in `include/libyuv/row.h` to enable or disable specific AVX512 versions.
1. **AVX512 Logic**: AVX512 row functions are strictly enabled for **64-bit x86 only**.
2. **Feature Macros**: Use the `HAS_` macros in `include/libyuv/row.h` to enable or disable specific AVX512 versions.
## Changelist (CL) & Commit Guidelines

View File

@ -1,6 +1,6 @@
Name: libyuv
URL: https://chromium.googlesource.com/libyuv/libyuv/
Version: 1938
Version: 1939
Revision: DEPS
License: BSD-3-Clause
License File: LICENSE

View File

@ -72,7 +72,6 @@ Additional commonly used compiler options can be passed to Bazel via `--copt`:
bazel build -c opt --config=android_arm64 \
--copt=-DLIBYUV_UNLIMITED_DATA \
--copt=-DLIBYUV_BIT_EXACT=1 \
--copt=-DENABLE_ROW_TESTS \
//:libyuv_test

View File

@ -142,8 +142,13 @@ extern "C" {
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(__x86_64__) || defined(__i386__) || \
defined(_M_X64) || defined(_M_X86))
#define HAS_ARGBMIRRORROW_AVX2
#define HAS_RGB24MIRRORROW_AVX2
#define HAS_ARGBTOUVMATRIXROW_AVX2
#define HAS_MERGEUVROW_AVX2
#define HAS_MIRRORROW_AVX2
#define HAS_MIRRORSPLITUVROW_AVX2
#define HAS_MIRRORUVROW_AVX2
#endif
#if !defined(LIBYUV_DISABLE_X86) && defined(USE_ROW_GCC) && \
@ -170,6 +175,7 @@ extern "C" {
#define HAS_INTERPOLATEROW_AVX2
#define HAS_J422TOARGBROW_AVX2
#define HAS_MIRRORROW_AVX2
#define HAS_MIRRORSPLITUVROW_AVX2
#define HAS_NV12TOARGBROW_AVX2
#define HAS_NV12TORGB24ROW_AVX2
#define HAS_NV12TORGB565ROW_AVX2
@ -236,7 +242,6 @@ extern "C" {
#define HAS_P410TOARGBROW_SSSE3
#define HAS_RAWTOARGBROW_AVX2
#define HAS_RAWTORGBAROW_SSSE3
#define HAS_RGB24MIRRORROW_SSSE3
#define HAS_RGBATOYJROW_SSSE3
#define HAS_SPLITARGBROW_SSE2
#define HAS_SPLITARGBROW_SSSE3
@ -2939,12 +2944,11 @@ void MirrorUVRow_LSX(const uint8_t* src_uv, uint8_t* dst_uv, int width);
void MirrorUVRow_LASX(const uint8_t* src_uv, uint8_t* dst_uv, int width);
void MirrorUVRow_C(const uint8_t* src_uv, uint8_t* dst_uv, int width);
void MirrorUVRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void MirrorUVRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void MirrorUVRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void MirrorUVRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void MirrorUVRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void MirrorSplitUVRow_SSSE3(const uint8_t* src,
void MirrorSplitUVRow_AVX2(const uint8_t* src,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
@ -2983,14 +2987,14 @@ void ARGBMirrorRow_Any_LASX(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
void RGB24MirrorRow_SSSE3(const uint8_t* src_rgb24,
void RGB24MirrorRow_AVX2(const uint8_t* src_rgb24,
uint8_t* dst_rgb24,
int width);
void RGB24MirrorRow_NEON(const uint8_t* src_rgb24,
uint8_t* dst_rgb24,
int width);
void RGB24MirrorRow_C(const uint8_t* src_rgb24, uint8_t* dst_rgb24, int width);
void RGB24MirrorRow_Any_SSSE3(const uint8_t* src_ptr,
void RGB24MirrorRow_Any_AVX2(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
void RGB24MirrorRow_Any_NEON(const uint8_t* src_ptr,

View File

@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1938
#define LIBYUV_VERSION 1939
#endif // INCLUDE_LIBYUV_VERSION_H_

View File

@ -2583,7 +2583,6 @@ void MirrorUVPlane(const uint8_t* src_uv,
#endif
#if defined(HAS_MIRRORUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
MirrorUVRow = MirrorUVRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
MirrorUVRow = MirrorUVRow_SSSE3;
}
@ -2823,11 +2822,11 @@ int RGB24Mirror(const uint8_t* src_rgb24,
}
}
#endif
#if defined(HAS_RGB24MIRRORROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
RGB24MirrorRow = RGB24MirrorRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
RGB24MirrorRow = RGB24MirrorRow_SSSE3;
#if defined(HAS_RGB24MIRRORROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
RGB24MirrorRow = RGB24MirrorRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
RGB24MirrorRow = RGB24MirrorRow_AVX2;
}
}
#endif

View File

@ -397,9 +397,9 @@ void SplitRotateUV180(const uint8_t* src,
MirrorSplitUVRow = MirrorSplitUVRow_NEON;
}
#endif
#if defined(HAS_MIRRORSPLITUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16)) {
MirrorSplitUVRow = MirrorSplitUVRow_SSSE3;
#if defined(HAS_MIRRORSPLITUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 32)) {
MirrorSplitUVRow = MirrorSplitUVRow_AVX2;
}
#endif
#if defined(HAS_MIRRORSPLITUVROW_LSX)

View File

@ -1906,8 +1906,8 @@ ANY11IS(InterpolateRow_16To8_Any_AVX2,
// Any 1 to 1 mirror.
#define ANY11M(NAMEANY, ANY_SIMD, BPP, MASK) \
void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \
SIMD_ALIGNED(uint8_t vin[64]); \
SIMD_ALIGNED(uint8_t vout[64]); \
SIMD_ALIGNED(uint8_t vin[128]); \
SIMD_ALIGNED(uint8_t vout[128]); \
memset(vin, 0, sizeof(vin)); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
@ -1938,9 +1938,6 @@ ANY11M(MirrorRow_Any_LASX, MirrorRow_LASX, 1, 63)
#ifdef HAS_MIRRORUVROW_AVX2
ANY11M(MirrorUVRow_Any_AVX2, MirrorUVRow_AVX2, 2, 15)
#endif
#ifdef HAS_MIRRORUVROW_SSSE3
ANY11M(MirrorUVRow_Any_SSSE3, MirrorUVRow_SSSE3, 2, 7)
#endif
#ifdef HAS_MIRRORUVROW_NEON
ANY11M(MirrorUVRow_Any_NEON, MirrorUVRow_NEON, 2, 31)
#endif
@ -1965,8 +1962,8 @@ ANY11M(ARGBMirrorRow_Any_LSX, ARGBMirrorRow_LSX, 4, 7)
#ifdef HAS_ARGBMIRRORROW_LASX
ANY11M(ARGBMirrorRow_Any_LASX, ARGBMirrorRow_LASX, 4, 15)
#endif
#ifdef HAS_RGB24MIRRORROW_SSSE3
ANY11M(RGB24MirrorRow_Any_SSSE3, RGB24MirrorRow_SSSE3, 3, 15)
#ifdef HAS_RGB24MIRRORROW_AVX2
ANY11M(RGB24MirrorRow_Any_AVX2, RGB24MirrorRow_AVX2, 3, 31)
#endif
#ifdef HAS_RGB24MIRRORROW_NEON
ANY11M(RGB24MirrorRow_Any_NEON, RGB24MirrorRow_NEON, 3, 15)

View File

@ -37,10 +37,6 @@ extern "C" {
// LIBYUV_UNLIMITED_BT709
// LIBYUV_UNLIMITED_BT2020
#if defined(LIBYUV_BIT_EXACT)
#define LIBYUV_UNATTENUATE_DUP 1
#endif
// llvm x86 is poor at ternary operator, so use branchless min/max.
#define USE_BRANCHLESS 1
@ -3578,12 +3574,8 @@ const uint32_t fixed_invtbl8[256] = {
T(0xfc), T(0xfd), T(0xfe), 0x01000100};
#undef T
#if defined(LIBYUV_UNATTENUATE_DUP)
// This code mimics the Intel SIMD version for better testability.
#define UNATTENUATE(f, ia) clamp255(((f | (f << 8)) * ia) >> 16)
#else
#define UNATTENUATE(f, ia) clamp255((f * ia) >> 8)
#endif
// mimics the Intel SIMD code for exactness.
void ARGBUnattenuateRow_C(const uint8_t* src_argb,

View File

@ -21,6 +21,10 @@ extern "C" {
(defined(__x86_64__) || defined(__i386__)) && \
!defined(LIBYUV_ENABLE_ROWWIN)
// Note: for avx and avx512 declare clobber as xmm registers due to
// clang for windows needing to preserve xmm registers but not saving
// them if declared as ymm or zmm.
#if defined(HAS_ARGBTOYROW_SSSE3) || defined(HAS_ARGBGRAYROW_SSSE3)
// Constants for ARGB
@ -268,11 +272,11 @@ static const uint32_t kPermdRAWToARGB_AVX512BW[16] = {
void RGBToARGBRow_AVX512BW(const uint8_t* src_raw, uint8_t* dst_argb, const uint32_t* shuffler, int width) {
asm volatile(
"vpternlogd $0xff,%%zmm22,%%zmm22,%%zmm22 \n" // 0xffffffff
"vpslld $0x18,%%zmm22,%%zmm22 \n" // 0xff000000
"vpternlogd $0xff,%%zmm6,%%zmm6,%%zmm6 \n" // 0xffffffff
"vpslld $0x18,%%zmm6,%%zmm6 \n" // 0xff000000
"movabs $0xffffffffffff,%%rax \n" // 48 bytes mask
"kmovq %%rax,%%k1 \n"
"vmovdqu32 %3,%%zmm21 \n"
"vmovdqu32 %3,%%zmm5 \n"
"vbroadcasti32x4 %4,%%zmm4 \n"
LABELALIGN //
@ -282,18 +286,18 @@ void RGBToARGBRow_AVX512BW(const uint8_t* src_raw, uint8_t* dst_argb, const uint
"vmovdqu8 96(%0),%%zmm2%{%%k1%}%{z%} \n"
"vmovdqu8 144(%0),%%zmm3%{%%k1%}%{z%} \n"
"lea 192(%0),%0 \n"
"vpermd %%zmm0,%%zmm21,%%zmm0 \n"
"vpermd %%zmm1,%%zmm21,%%zmm1 \n"
"vpermd %%zmm2,%%zmm21,%%zmm2 \n"
"vpermd %%zmm3,%%zmm21,%%zmm3 \n"
"vpermd %%zmm0,%%zmm5,%%zmm0 \n"
"vpermd %%zmm1,%%zmm5,%%zmm1 \n"
"vpermd %%zmm2,%%zmm5,%%zmm2 \n"
"vpermd %%zmm3,%%zmm5,%%zmm3 \n"
"vpshufb %%zmm4,%%zmm0,%%zmm0 \n"
"vpshufb %%zmm4,%%zmm1,%%zmm1 \n"
"vpshufb %%zmm4,%%zmm2,%%zmm2 \n"
"vpshufb %%zmm4,%%zmm3,%%zmm3 \n"
"vpord %%zmm22,%%zmm0,%%zmm0 \n"
"vpord %%zmm22,%%zmm1,%%zmm1 \n"
"vpord %%zmm22,%%zmm2,%%zmm2 \n"
"vpord %%zmm22,%%zmm3,%%zmm3 \n"
"vpord %%zmm6,%%zmm0,%%zmm0 \n"
"vpord %%zmm6,%%zmm1,%%zmm1 \n"
"vpord %%zmm6,%%zmm2,%%zmm2 \n"
"vpord %%zmm6,%%zmm3,%%zmm3 \n"
"vmovdqu32 %%zmm0,(%1) \n"
"vmovdqu32 %%zmm1,0x40(%1) \n"
"vmovdqu32 %%zmm2,0x80(%1) \n"
@ -307,7 +311,7 @@ void RGBToARGBRow_AVX512BW(const uint8_t* src_raw, uint8_t* dst_argb, const uint
"+r"(width) // %2
: "m"(kPermdRAWToARGB_AVX512BW), // %3
"m"(*shuffler) // %4
: "memory", "cc", "rax", "k1", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm21", "xmm22");
: "memory", "cc", "rax", "k1", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
}
void RAWToARGBRow_AVX512BW(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
@ -1568,26 +1572,26 @@ void ARGBToYMatrixRow_AVX512BW(const uint8_t* src_argb,
const struct ArgbConstants* c) {
asm volatile(
"vpternlogd $0xff,%%zmm16,%%zmm16,%%zmm16 \n"
"vpsllw $15,%%zmm16,%%zmm21 \n"
"vpacksswb %%zmm21,%%zmm21,%%zmm21 \n"
"vpsllw $15,%%zmm16,%%zmm5 \n"
"vpacksswb %%zmm5,%%zmm5,%%zmm5 \n"
"vpsrlw $15,%%zmm16,%%zmm16 \n" // zmm16 = 1
"vbroadcasti64x4 0(%3),%%zmm4 \n"
"vbroadcasti64x4 0x60(%3),%%zmm23 \n"
"vpmaddubsw %%zmm21,%%zmm4,%%zmm22 \n"
"vpmaddwd %%zmm16,%%zmm22,%%zmm22 \n"
"vpackssdw %%zmm22,%%zmm22,%%zmm22 \n"
"vpsubw %%zmm22,%%zmm23,%%zmm23 \n"
"vmovups %4,%%zmm22 \n"
"vbroadcasti64x4 0x60(%3),%%zmm7 \n"
"vpmaddubsw %%zmm5,%%zmm4,%%zmm6 \n"
"vpmaddwd %%zmm16,%%zmm6,%%zmm6 \n"
"vpackssdw %%zmm6,%%zmm6,%%zmm6 \n"
"vpsubw %%zmm6,%%zmm7,%%zmm7 \n"
"vmovups %4,%%zmm6 \n"
LABELALIGN
"1: \n"
"vmovups (%0),%%zmm0 \n"
"vmovups 0x40(%0),%%zmm1 \n"
"vmovups 0x80(%0),%%zmm2 \n"
"vmovups 0xc0(%0),%%zmm3 \n"
"vpsubb %%zmm21,%%zmm0,%%zmm0 \n"
"vpsubb %%zmm21,%%zmm1,%%zmm1 \n"
"vpsubb %%zmm21,%%zmm2,%%zmm2 \n"
"vpsubb %%zmm21,%%zmm3,%%zmm3 \n"
"vpsubb %%zmm5,%%zmm0,%%zmm0 \n"
"vpsubb %%zmm5,%%zmm1,%%zmm1 \n"
"vpsubb %%zmm5,%%zmm2,%%zmm2 \n"
"vpsubb %%zmm5,%%zmm3,%%zmm3 \n"
"vpmaddubsw %%zmm0,%%zmm4,%%zmm0 \n"
"vpmaddubsw %%zmm1,%%zmm4,%%zmm1 \n"
"vpmaddubsw %%zmm2,%%zmm4,%%zmm2 \n"
@ -1599,12 +1603,12 @@ void ARGBToYMatrixRow_AVX512BW(const uint8_t* src_argb,
"vpmaddwd %%zmm16,%%zmm2,%%zmm2 \n"
"vpmaddwd %%zmm16,%%zmm3,%%zmm3 \n"
"vpackssdw %%zmm3,%%zmm2,%%zmm2 \n"
"vpaddw %%zmm23,%%zmm0,%%zmm0 \n"
"vpaddw %%zmm23,%%zmm2,%%zmm2 \n"
"vpaddw %%zmm7,%%zmm0,%%zmm0 \n"
"vpaddw %%zmm7,%%zmm2,%%zmm2 \n"
"vpsrlw $0x8,%%zmm0,%%zmm0 \n"
"vpsrlw $0x8,%%zmm2,%%zmm2 \n"
"vpackuswb %%zmm2,%%zmm0,%%zmm0 \n"
"vpermd %%zmm0,%%zmm22,%%zmm0 \n"
"vpermd %%zmm0,%%zmm6,%%zmm0 \n"
"vmovups %%zmm0,(%1) \n"
"lea 0x40(%1),%1 \n"
"sub $0x40,%2 \n"
@ -1615,8 +1619,8 @@ void ARGBToYMatrixRow_AVX512BW(const uint8_t* src_argb,
"+r"(width) // %2
: "r"(c), // %3
"m"(kPermdARGBToY_AVX512BW) // %4
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm16", "xmm21",
"xmm22", "xmm23");
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
"xmm7", "xmm16");
}
#endif
@ -1773,8 +1777,8 @@ void ARGBToUV444MatrixRow_AVX512BW(const uint8_t* src_argb,
"vbroadcasti64x4 0x20(%4),%%zmm3 \n" // kRGBToU
"vbroadcasti64x4 0x40(%4),%%zmm4 \n" // kRGBToV
"vpternlogd $0xff,%%zmm16,%%zmm16,%%zmm16 \n" // -1
"vpsllw $15,%%zmm16,%%zmm21 \n" // 0x8000
"vmovups %5,%%zmm23 \n"
"vpsllw $15,%%zmm16,%%zmm5 \n" // 0x8000
"vmovups %5,%%zmm7 \n"
"sub %1,%2 \n"
LABELALIGN
@ -1782,45 +1786,45 @@ void ARGBToUV444MatrixRow_AVX512BW(const uint8_t* src_argb,
"vmovups (%0),%%zmm0 \n"
"vmovups 0x40(%0),%%zmm1 \n"
"vmovups 0x80(%0),%%zmm2 \n"
"vmovups 0xc0(%0),%%zmm22 \n"
"vmovups 0xc0(%0),%%zmm6 \n"
"vpmaddubsw %%zmm3,%%zmm0,%%zmm0 \n"
"vpmaddubsw %%zmm3,%%zmm1,%%zmm1 \n"
"vpmaddubsw %%zmm3,%%zmm2,%%zmm2 \n"
"vpmaddubsw %%zmm3,%%zmm22,%%zmm22 \n"
"vpmaddubsw %%zmm3,%%zmm6,%%zmm6 \n"
"vpmaddwd %%zmm16,%%zmm0,%%zmm0 \n"
"vpmaddwd %%zmm16,%%zmm1,%%zmm1 \n"
"vpmaddwd %%zmm16,%%zmm2,%%zmm2 \n"
"vpmaddwd %%zmm16,%%zmm22,%%zmm22 \n"
"vpmaddwd %%zmm16,%%zmm6,%%zmm6 \n"
"vpackssdw %%zmm1,%%zmm0,%%zmm0 \n" // mutates
"vpackssdw %%zmm22,%%zmm2,%%zmm2 \n"
"vpsubw %%zmm21,%%zmm0,%%zmm0 \n"
"vpsubw %%zmm21,%%zmm2,%%zmm2 \n"
"vpackssdw %%zmm6,%%zmm2,%%zmm2 \n"
"vpsubw %%zmm5,%%zmm0,%%zmm0 \n"
"vpsubw %%zmm5,%%zmm2,%%zmm2 \n"
"vpsrlw $0x8,%%zmm0,%%zmm0 \n"
"vpsrlw $0x8,%%zmm2,%%zmm2 \n"
"vpackuswb %%zmm2,%%zmm0,%%zmm0 \n" // mutates
"vpermd %%zmm0,%%zmm23,%%zmm0 \n" // unmutate.
"vpermd %%zmm0,%%zmm7,%%zmm0 \n" // unmutate.
"vmovups %%zmm0,(%1) \n"
"vmovups (%0),%%zmm0 \n"
"vmovups 0x40(%0),%%zmm1 \n"
"vmovups 0x80(%0),%%zmm2 \n"
"vmovups 0xc0(%0),%%zmm22 \n"
"vmovups 0xc0(%0),%%zmm6 \n"
"vpmaddubsw %%zmm4,%%zmm0,%%zmm0 \n"
"vpmaddubsw %%zmm4,%%zmm1,%%zmm1 \n"
"vpmaddubsw %%zmm4,%%zmm2,%%zmm2 \n"
"vpmaddubsw %%zmm4,%%zmm22,%%zmm22 \n"
"vpmaddubsw %%zmm4,%%zmm6,%%zmm6 \n"
"vpmaddwd %%zmm16,%%zmm0,%%zmm0 \n"
"vpmaddwd %%zmm16,%%zmm1,%%zmm1 \n"
"vpmaddwd %%zmm16,%%zmm2,%%zmm2 \n"
"vpmaddwd %%zmm16,%%zmm22,%%zmm22 \n"
"vpmaddwd %%zmm16,%%zmm6,%%zmm6 \n"
"vpackssdw %%zmm1,%%zmm0,%%zmm0 \n" // mutates
"vpackssdw %%zmm22,%%zmm2,%%zmm2 \n"
"vpsubw %%zmm21,%%zmm0,%%zmm0 \n"
"vpsubw %%zmm21,%%zmm2,%%zmm2 \n"
"vpackssdw %%zmm6,%%zmm2,%%zmm2 \n"
"vpsubw %%zmm5,%%zmm0,%%zmm0 \n"
"vpsubw %%zmm5,%%zmm2,%%zmm2 \n"
"vpsrlw $0x8,%%zmm0,%%zmm0 \n"
"vpsrlw $0x8,%%zmm2,%%zmm2 \n"
"vpackuswb %%zmm2,%%zmm0,%%zmm0 \n" // mutates
"vpermd %%zmm0,%%zmm23,%%zmm0 \n" // unmutate.
"vpermd %%zmm0,%%zmm7,%%zmm0 \n" // unmutate.
"vmovups %%zmm0,(%1,%2,1) \n"
"lea 0x100(%0),%0 \n"
"lea 0x40(%1),%1 \n"
@ -1837,8 +1841,8 @@ void ARGBToUV444MatrixRow_AVX512BW(const uint8_t* src_argb,
#endif
: "r"(c), // %4
"m"(kPermdARGBToY_AVX512BW) // %5
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm16", "xmm21",
"xmm22", "xmm23");
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
"xmm7", "xmm16");
}
#endif // HAS_ARGBTOUV444ROW_AVX512BW
@ -2233,11 +2237,11 @@ void ARGBToUVMatrixRow_AVX512BW(const uint8_t* src_argb,
const struct ArgbConstants* c) {
asm volatile(
"vbroadcasti64x4 0x20(%5),%%zmm4 \n" // RGBToU
"vbroadcasti64x4 0x40(%5),%%zmm21 \n" // RGBToV
"vbroadcasti64x4 0x40(%5),%%zmm5 \n" // RGBToV
"vpternlogd $0xff,%%zmm16,%%zmm16,%%zmm16 \n"
"vpabsb %%zmm16,%%zmm22 \n" // 0x0101
"vpabsb %%zmm16,%%zmm6 \n" // 0x0101
"vpsllw $15,%%zmm16,%%zmm17 \n" // 0x8000
"vbroadcasti64x4 %6,%%zmm23 \n" // kShuffleAARRGGBB
"vbroadcasti64x4 %6,%%zmm7 \n" // kShuffleAARRGGBB
"vmovups %7,%%zmm18 \n" // kPermdARGBToY_AVX512BW
"vmovups %8,%%zmm19 \n" // kPermdARGBToUV_AVX512BW
"sub %1,%2 \n"
@ -2248,14 +2252,14 @@ void ARGBToUVMatrixRow_AVX512BW(const uint8_t* src_argb,
"vmovups 0x40(%0),%%zmm1 \n"
"vmovups 0x00(%0,%4,1),%%zmm2 \n"
"vmovups 0x40(%0,%4,1),%%zmm3 \n"
"vpshufb %%zmm23,%%zmm0,%%zmm0 \n" // aarrggbb
"vpshufb %%zmm23,%%zmm1,%%zmm1 \n"
"vpshufb %%zmm23,%%zmm2,%%zmm2 \n"
"vpshufb %%zmm23,%%zmm3,%%zmm3 \n"
"vpmaddubsw %%zmm22,%%zmm0,%%zmm0 \n" // 32x2 -> 16x2
"vpmaddubsw %%zmm22,%%zmm1,%%zmm1 \n"
"vpmaddubsw %%zmm22,%%zmm2,%%zmm2 \n"
"vpmaddubsw %%zmm22,%%zmm3,%%zmm3 \n"
"vpshufb %%zmm7,%%zmm0,%%zmm0 \n" // aarrggbb
"vpshufb %%zmm7,%%zmm1,%%zmm1 \n"
"vpshufb %%zmm7,%%zmm2,%%zmm2 \n"
"vpshufb %%zmm7,%%zmm3,%%zmm3 \n"
"vpmaddubsw %%zmm6,%%zmm0,%%zmm0 \n" // 32x2 -> 16x2
"vpmaddubsw %%zmm6,%%zmm1,%%zmm1 \n"
"vpmaddubsw %%zmm6,%%zmm2,%%zmm2 \n"
"vpmaddubsw %%zmm6,%%zmm3,%%zmm3 \n"
"vpaddw %%zmm0,%%zmm2,%%zmm0 \n" // 16x2 -> 16x1
"vpaddw %%zmm1,%%zmm3,%%zmm1 \n"
"vpxorq %%zmm2,%%zmm2,%%zmm2 \n" // 0 for vpavgw
@ -2267,7 +2271,7 @@ void ARGBToUVMatrixRow_AVX512BW(const uint8_t* src_argb,
"vpermd %%zmm0,%%zmm19,%%zmm0 \n" // unscramble pixels
"vpmaddubsw %%zmm4,%%zmm0,%%zmm1 \n" // 16 U
"vpmaddubsw %%zmm21,%%zmm0,%%zmm0 \n" // 16 V
"vpmaddubsw %%zmm5,%%zmm0,%%zmm0 \n" // 16 V
"vpmaddwd %%zmm16,%%zmm1,%%zmm1 \n"
"vpmaddwd %%zmm16,%%zmm0,%%zmm0 \n"
"vpackssdw %%zmm0,%%zmm1,%%zmm0 \n" // mutates (U in lower, V in upper)
@ -2298,8 +2302,8 @@ void ARGBToUVMatrixRow_AVX512BW(const uint8_t* src_argb,
"m"(kShuffleAARRGGBB), // %6
"m"(kPermdARGBToY_AVX512BW), // %7
"m"(kPermdARGBToUV_AVX512BW) // %8
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm16", "xmm17",
"xmm18", "xmm19", "xmm21", "xmm22", "xmm23");
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
"xmm7", "xmm16", "xmm17", "xmm18", "xmm19");
}
void ARGBToUVRow_AVX512BW(const uint8_t* src_argb,
@ -3626,14 +3630,14 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf,
"vmovdqa 128(%[yuvconstants]),%%ymm12 \n"
#define YUVTORGB_SETUP_AVX512BW(yuvconstants) \
"vpternlogd $0xff,%%zmm29,%%zmm29,%%zmm29 \n" \
"vpbroadcastq (%[yuvconstants]),%%zmm24 \n" \
"vpabsb %%zmm29,%%zmm29 \n" \
"vpsllw $7,%%zmm29,%%zmm29 \n" \
"vpbroadcastq 32(%[yuvconstants]),%%zmm25 \n" \
"vpbroadcastq 64(%[yuvconstants]),%%zmm26 \n" \
"vpbroadcastq 96(%[yuvconstants]),%%zmm27 \n" \
"vpbroadcastq 128(%[yuvconstants]),%%zmm28 \n" \
"vpternlogd $0xff,%%zmm13,%%zmm13,%%zmm13 \n" \
"vpbroadcastq (%[yuvconstants]),%%zmm8 \n" \
"vpabsb %%zmm13,%%zmm13 \n" \
"vpsllw $7,%%zmm13,%%zmm13 \n" \
"vpbroadcastq 32(%[yuvconstants]),%%zmm9 \n" \
"vpbroadcastq 64(%[yuvconstants]),%%zmm10 \n" \
"vpbroadcastq 96(%[yuvconstants]),%%zmm11 \n" \
"vpbroadcastq 128(%[yuvconstants]),%%zmm12 \n" \
"vmovups (%[quadsplitperm]),%%zmm16 \n" \
"vmovups (%[dquadsplitperm]),%%zmm17 \n" \
"vmovups (%[unperm]),%%zmm18 \n"
@ -3650,12 +3654,12 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf,
"vpaddsw %%ymm4,%%ymm2,%%ymm2 \n"
#define YUVTORGB16_AVX512BW(yuvconstants) \
"vpsubb %%zmm29,%%zmm3,%%zmm3 \n" \
"vpmulhuw %%zmm27,%%zmm4,%%zmm4 \n" \
"vpmaddubsw %%zmm3,%%zmm24,%%zmm0 \n" \
"vpmaddubsw %%zmm3,%%zmm25,%%zmm1 \n" \
"vpmaddubsw %%zmm3,%%zmm26,%%zmm2 \n" \
"vpaddw %%zmm4,%%zmm28,%%zmm4 \n" \
"vpsubb %%zmm13,%%zmm3,%%zmm3 \n" \
"vpmulhuw %%zmm11,%%zmm4,%%zmm4 \n" \
"vpmaddubsw %%zmm3,%%zmm8,%%zmm0 \n" \
"vpmaddubsw %%zmm3,%%zmm9,%%zmm1 \n" \
"vpmaddubsw %%zmm3,%%zmm10,%%zmm2 \n" \
"vpaddw %%zmm4,%%zmm12,%%zmm4 \n" \
"vpaddsw %%zmm4,%%zmm0,%%zmm0 \n" \
"vpsubsw %%zmm1,%%zmm4,%%zmm1 \n" \
"vpaddsw %%zmm4,%%zmm2,%%zmm2 \n"
@ -3722,7 +3726,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf,
#define STOREARGB_AVX512BW \
"vpunpcklbw %%zmm1,%%zmm0,%%zmm0 \n" \
"vpermq %%zmm0,%%zmm18,%%zmm0 \n" \
"vpunpcklbw %%zmm21,%%zmm2,%%zmm2 \n" \
"vpunpcklbw %%zmm5,%%zmm2,%%zmm2 \n" \
"vpermq %%zmm2,%%zmm18,%%zmm2 \n" \
"vpunpcklwd %%zmm2,%%zmm0,%%zmm1 \n" \
"vpunpckhwd %%zmm2,%%zmm0,%%zmm0 \n" \
@ -3844,7 +3848,7 @@ void OMITFP I422ToARGBRow_AVX512BW(const uint8_t* y_buf,
YUVTORGB_SETUP_AVX512BW(yuvconstants)
"sub %[u_buf],%[v_buf] \n"
"vpcmpeqb %%xmm5,%%xmm5,%%xmm5 \n"
"vpbroadcastq %%xmm5,%%zmm21 \n"
"vpbroadcastq %%xmm5,%%zmm5 \n"
LABELALIGN
"1: \n"
@ -4685,6 +4689,43 @@ void MirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width) {
}
#endif // HAS_MIRRORROW_AVX2
#ifdef HAS_MIRRORSPLITUVROW_AVX2
// Shuffle table for reversing the bytes of UV channels.
static const uvec8 kShuffleMirrorSplitUV = {14u, 12u, 10u, 8u, 6u, 4u, 2u, 0u,
15u, 13u, 11u, 9u, 7u, 5u, 3u, 1u};
void MirrorSplitUVRow_AVX2(const uint8_t* src,
uint8_t* dst_u,
uint8_t* dst_v,
int width) {
ptrdiff_t temp_width = (ptrdiff_t)(width);
asm volatile(
"vbroadcasti128 %4,%%ymm1 \n"
"lea -0x20(%0,%3,2),%0 \n"
"sub %1,%2 \n"
LABELALIGN
"1: \n"
"vmovdqu (%0),%%ymm0 \n"
"lea -0x20(%0),%0 \n"
"vpshufb %%ymm1,%%ymm0,%%ymm0 \n"
"vpermq $0x72,%%ymm0,%%ymm0 \n"
"vextracti128 $0x1,%%ymm0,%%xmm2 \n"
"vmovdqu %%xmm0,(%1) \n"
"vmovdqu %%xmm2,0x00(%1,%2,1) \n"
"lea 0x10(%1),%1 \n"
"sub $0x10,%3 \n"
"jg 1b \n"
"vzeroupper \n"
: "+r"(src), // %0
"+r"(dst_u), // %1
"+r"(dst_v), // %2
"+r"(temp_width) // %3
: "m"(kShuffleMirrorSplitUV) // %4
: "memory", "cc", "xmm0", "xmm1", "xmm2");
}
#endif // HAS_MIRRORSPLITUVROW_AVX2
#ifdef HAS_MIRRORUVROW_SSSE3
// Shuffle table for reversing the UV.
static const uvec8 kShuffleMirrorUV = {14u, 15u, 12u, 13u, 10u, 11u, 8u, 9u,
@ -4733,38 +4774,7 @@ void MirrorUVRow_AVX2(const uint8_t* src_uv, uint8_t* dst_uv, int width) {
}
#endif // HAS_MIRRORUVROW_AVX2
#ifdef HAS_MIRRORSPLITUVROW_SSSE3
// Shuffle table for reversing the bytes of UV channels.
static const uvec8 kShuffleMirrorSplitUV = {14u, 12u, 10u, 8u, 6u, 4u, 2u, 0u,
15u, 13u, 11u, 9u, 7u, 5u, 3u, 1u};
void MirrorSplitUVRow_SSSE3(const uint8_t* src,
uint8_t* dst_u,
uint8_t* dst_v,
int width) {
ptrdiff_t temp_width = (ptrdiff_t)(width);
asm volatile(
"movdqa %4,%%xmm1 \n"
"lea -0x10(%0,%3,2),%0 \n"
"sub %1,%2 \n"
LABELALIGN
"1: \n"
"movdqu (%0),%%xmm0 \n"
"lea -0x10(%0),%0 \n"
"pshufb %%xmm1,%%xmm0 \n"
"movlpd %%xmm0,(%1) \n"
"movhpd %%xmm0,0x00(%1,%2,1) \n"
"lea 0x8(%1),%1 \n"
"sub $8,%3 \n"
"jg 1b \n"
: "+r"(src), // %0
"+r"(dst_u), // %1
"+r"(dst_v), // %2
"+r"(temp_width) // %3
: "m"(kShuffleMirrorSplitUV) // %4
: "memory", "cc", "xmm0", "xmm1");
}
#endif // HAS_MIRRORSPLITUVROW_SSSE3
#ifdef HAS_RGB24MIRRORROW_SSSE3
@ -4813,6 +4823,60 @@ void RGB24MirrorRow_SSSE3(const uint8_t* src_rgb24,
}
#endif // HAS_RGB24MIRRORROW_SSSE3
#ifdef HAS_RGB24MIRRORROW_AVX2
// Shuffle first 10 pixels to last 10 mirrored. first byte zero
static const uvec8 kShuffleMirrorRGB0_AVX = {
128u, 12u, 13u, 14u, 9u, 10u, 11u, 6u, 7u, 8u, 3u, 4u, 5u, 0u, 1u, 2u
};
// Shuffle last 2 pixels to first 2 mirrored. last byte zero
static const uvec8 kShuffleMirrorRGB1_AVX = {
13u, 14u, 15u, 10u, 11u, 12u, 7u, 8u, 9u, 4u, 5u, 6u, 1u, 2u, 3u, 128u
};
void RGB24MirrorRow_AVX2(const uint8_t* src_rgb24,
uint8_t* dst_rgb24,
int width) {
ptrdiff_t temp_width = (ptrdiff_t)(width);
src_rgb24 += width * 3 - 96;
asm volatile(
"vbroadcasti128 %3,%%ymm4 \n"
"vmovdqa %4,%%xmm5 \n"
LABELALIGN
"1: \n"
"vmovdqu (%0),%%xmm0 \n" // first 10
"vinserti128 $1,15(%0),%%ymm0,%%ymm0 \n"
"vmovdqu 30(%0),%%xmm1 \n" // next 10
"vinserti128 $1,45(%0),%%ymm1,%%ymm1 \n"
"vmovdqu 60(%0),%%xmm2 \n" // next 10
"vinserti128 $1,75(%0),%%ymm2,%%ymm2 \n"
"vmovdqu 80(%0),%%xmm3 \n" // last 2 special
"vpshufb %%ymm4,%%ymm0,%%ymm0 \n"
"vpshufb %%ymm4,%%ymm1,%%ymm1 \n"
"vpshufb %%ymm4,%%ymm2,%%ymm2 \n"
"vpshufb %%xmm5,%%xmm3,%%xmm3 \n"
"lea -0x60(%0),%0 \n"
"vmovdqu %%xmm0,80(%1) \n"
"vextracti128 $1,%%ymm0,65(%1) \n"
"vmovdqu %%xmm1,50(%1) \n"
"vextracti128 $1,%%ymm1,35(%1) \n"
"vmovdqu %%xmm2,20(%1) \n"
"vextracti128 $1,%%ymm2,5(%1) \n"
"vmovq %%xmm3,0(%1) \n"
"lea 0x60(%1),%1 \n"
"sub $0x20,%2 \n"
"jg 1b \n"
"vzeroupper \n"
: "+r"(src_rgb24), // %0
"+r"(dst_rgb24), // %1
"+r"(temp_width) // %2
: "m"(kShuffleMirrorRGB0_AVX), // %3
"m"(kShuffleMirrorRGB1_AVX) // %4
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
}
#endif // HAS_RGB24MIRRORROW_AVX2
#ifdef HAS_ARGBMIRRORROW_SSE2
void ARGBMirrorRow_SSE2(const uint8_t* src, uint8_t* dst, int width) {

View File

@ -470,6 +470,128 @@ void MergeUVRow_AVX2(const uint8_t* src_u,
}
#endif
#ifdef HAS_MIRRORROW_AVX2
LIBYUV_TARGET_AVX2
void MirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width) {
__m256i ymm_shuf =
_mm256_broadcastsi128_si256(_mm_setr_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0));
src += width;
while (width > 0) {
src -= 32;
__m256i ymm0 = _mm256_loadu_si256((const __m256i*)src);
ymm0 = _mm256_shuffle_epi8(ymm0, ymm_shuf);
ymm0 = _mm256_permute4x64_epi64(ymm0, 0x4e);
_mm256_storeu_si256((__m256i*)dst, ymm0);
dst += 32;
width -= 32;
}
}
#endif
#ifdef HAS_MIRRORUVROW_AVX2
LIBYUV_TARGET_AVX2
void MirrorUVRow_AVX2(const uint8_t* src_uv, uint8_t* dst_uv, int width) {
__m256i ymm_shuf =
_mm256_broadcastsi128_si256(_mm_setr_epi8(14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1));
src_uv += width * 2;
while (width > 0) {
src_uv -= 32;
__m256i ymm0 = _mm256_loadu_si256((const __m256i*)src_uv);
ymm0 = _mm256_shuffle_epi8(ymm0, ymm_shuf);
ymm0 = _mm256_permute4x64_epi64(ymm0, 0x4e);
_mm256_storeu_si256((__m256i*)dst_uv, ymm0);
dst_uv += 32;
width -= 16;
}
}
#endif
#ifdef HAS_MIRRORSPLITUVROW_AVX2
LIBYUV_TARGET_AVX2
void MirrorSplitUVRow_AVX2(const uint8_t* src_uv,
uint8_t* dst_u,
uint8_t* dst_v,
int width) {
__m256i ymm_shuf =
_mm256_broadcastsi128_si256(_mm_setr_epi8(14, 12, 10, 8, 6, 4, 2, 0, 15, 13, 11, 9, 7, 5, 3, 1));
src_uv += width * 2;
while (width > 0) {
src_uv -= 32;
__m256i ymm0 = _mm256_loadu_si256((const __m256i*)src_uv);
ymm0 = _mm256_shuffle_epi8(ymm0, ymm_shuf);
ymm0 = _mm256_permute4x64_epi64(ymm0, 0x72);
_mm_storeu_si128((__m128i*)dst_u, _mm256_castsi256_si128(ymm0));
_mm_storeu_si128((__m128i*)dst_v, _mm256_extracti128_si256(ymm0, 1));
dst_u += 16;
dst_v += 16;
width -= 16;
}
}
#endif
#ifdef HAS_RGB24MIRRORROW_AVX2
LIBYUV_TARGET_AVX2
void RGB24MirrorRow_AVX2(const uint8_t* src_rgb24,
uint8_t* dst_rgb24,
int width) {
__m256i shuf0 = _mm256_setr_epi8(
-1, 12, 13, 14, 9, 10, 11, 6, 7, 8, 3, 4, 5, 0, 1, 2,
-1, 12, 13, 14, 9, 10, 11, 6, 7, 8, 3, 4, 5, 0, 1, 2);
__m128i shuf1 = _mm_setr_epi8(
13, 14, 15, 10, 11, 12, 7, 8, 9, 4, 5, 6, 1, 2, 3, -1);
src_rgb24 += width * 3 - 96;
while (width > 0) {
__m128i v0_lo = _mm_loadu_si128((const __m128i*)(src_rgb24 + 0));
__m128i v0_hi = _mm_loadu_si128((const __m128i*)(src_rgb24 + 15));
__m256i v0 = _mm256_inserti128_si256(_mm256_castsi128_si256(v0_lo), v0_hi, 1);
__m128i v1_lo = _mm_loadu_si128((const __m128i*)(src_rgb24 + 30));
__m128i v1_hi = _mm_loadu_si128((const __m128i*)(src_rgb24 + 45));
__m256i v1 = _mm256_inserti128_si256(_mm256_castsi128_si256(v1_lo), v1_hi, 1);
__m128i v2_lo = _mm_loadu_si128((const __m128i*)(src_rgb24 + 60));
__m128i v2_hi = _mm_loadu_si128((const __m128i*)(src_rgb24 + 75));
__m256i v2 = _mm256_inserti128_si256(_mm256_castsi128_si256(v2_lo), v2_hi, 1);
__m128i v3 = _mm_loadu_si128((const __m128i*)(src_rgb24 + 80));
v0 = _mm256_shuffle_epi8(v0, shuf0);
v1 = _mm256_shuffle_epi8(v1, shuf0);
v2 = _mm256_shuffle_epi8(v2, shuf0);
v3 = _mm_shuffle_epi8(v3, shuf1);
_mm_storeu_si128((__m128i*)(dst_rgb24 + 80), _mm256_castsi256_si128(v0));
_mm_storeu_si128((__m128i*)(dst_rgb24 + 65), _mm256_extracti128_si256(v0, 1));
_mm_storeu_si128((__m128i*)(dst_rgb24 + 50), _mm256_castsi256_si128(v1));
_mm_storeu_si128((__m128i*)(dst_rgb24 + 35), _mm256_extracti128_si256(v1, 1));
_mm_storeu_si128((__m128i*)(dst_rgb24 + 20), _mm256_castsi256_si128(v2));
_mm_storeu_si128((__m128i*)(dst_rgb24 + 5), _mm256_extracti128_si256(v2, 1));
_mm_storel_epi64((__m128i*)(dst_rgb24 + 0), v3);
src_rgb24 -= 96;
dst_rgb24 += 96;
width -= 32;
}
}
#endif
#ifdef HAS_ARGBMIRRORROW_AVX2
LIBYUV_TARGET_AVX2
void ARGBMirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width) {
__m256i ymm_shuf = _mm256_setr_epi32(7, 6, 5, 4, 3, 2, 1, 0);
src += width * 4;
while (width > 0) {
src -= 32;
__m256i ymm0 = _mm256_loadu_si256((const __m256i*)src);
ymm0 = _mm256_permutevar8x32_epi32(ymm0, ymm_shuf);
_mm256_storeu_si256((__m256i*)dst, ymm0);
dst += 32;
width -= 8;
}
}
#endif
#endif
#ifdef __cplusplus

View File

@ -22,14 +22,8 @@ namespace libyuv {
// TODO(fbarchard): clang x86 has a higher accuracy YUV to RGB.
// Port to Visual C and other CPUs
#if !defined(LIBYUV_BIT_EXACT) && !defined(LIBYUV_DISABLE_X86) && \
(defined(__x86_64__) || defined(__i386__))
#define ERROR_FULL 5
#define ERROR_J420 4
#else
#define ERROR_FULL 6
#define ERROR_J420 6
#endif
#define ERROR_R 1
#define ERROR_G 1
#ifdef LIBYUV_UNLIMITED_DATA

View File

@ -2050,7 +2050,6 @@ TEST_F(LibYUVConvertTest, MM21ToYUY2) {
}
// Test RGB24 to J420 is exact
#if defined(LIBYUV_BIT_EXACT)
TEST_F(LibYUVConvertTest, TestRGB24ToJ420) {
const int kSize = 256;
align_buffer_page_end(orig_rgb24, kSize * 3 * 2); // 2 rows of RGB24
@ -2075,10 +2074,8 @@ TEST_F(LibYUVConvertTest, TestRGB24ToJ420) {
free_aligned_buffer_page_end(orig_rgb24);
free_aligned_buffer_page_end(dest_j420);
}
#endif
// Test RGB24 to I420 is exact
#if defined(LIBYUV_BIT_EXACT)
TEST_F(LibYUVConvertTest, TestRGB24ToI420) {
const int kSize = 256;
align_buffer_page_end(orig_rgb24, kSize * 3 * 2); // 2 rows of RGB24
@ -2103,7 +2100,6 @@ TEST_F(LibYUVConvertTest, TestRGB24ToI420) {
free_aligned_buffer_page_end(orig_rgb24);
free_aligned_buffer_page_end(dest_i420);
}
#endif
TEST_F(LibYUVConvertTest, TestJ420ToI420) {
const uint8_t src_y[12] = {0, 0, 128, 128, 255, 255,

View File

@ -29,11 +29,7 @@
#include "libyuv/row.h" /* For ScaleSumSamples_Neon */
#endif
#if defined(LIBYUV_BIT_EXACT)
#define EXPECTED_UNATTENUATE_DIFF 0
#else
#define EXPECTED_UNATTENUATE_DIFF 2
#endif
namespace libyuv {
@ -284,28 +280,28 @@ TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Any) {
int max_diff = TestUnattenuateI(benchmark_width_ + 1, benchmark_height_,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_, +1, 0);
ASSERT_LE(max_diff, EXPECTED_UNATTENUATE_DIFF);
ASSERT_EQ(max_diff, 0);
}
TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Unaligned) {
int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_, +1, 1);
ASSERT_LE(max_diff, EXPECTED_UNATTENUATE_DIFF);
ASSERT_EQ(max_diff, 0);
}
TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Invert) {
int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_, -1, 0);
ASSERT_LE(max_diff, EXPECTED_UNATTENUATE_DIFF);
ASSERT_EQ(max_diff, 0);
}
TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Opt) {
int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_, +1, 0);
ASSERT_LE(max_diff, EXPECTED_UNATTENUATE_DIFF);
ASSERT_EQ(max_diff, 0);
}
TEST_F(LibYUVPlanarTest, TestARGBComputeCumulativeSum) {