mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 17:26:49 +08:00
ARGBAttenuate AVX2 rewritten to match NEON/C code
Bug: 665 Change-Id: If26fb389dabbca870a0e720f5258d6c9b2cde156 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/2196904 Commit-Queue: Frank Barchard <fbarchard@chromium.org> Reviewed-by: richard winterton <rrwinterton@gmail.com>
This commit is contained in:
parent
d13db1b437
commit
84da59c168
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 1753
|
||||
Version: 1754
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -143,7 +143,6 @@ extern "C" {
|
||||
// Effects:
|
||||
#define HAS_ARGBADDROW_SSE2
|
||||
#define HAS_ARGBAFFINEROW_SSE2
|
||||
#define HAS_ARGBATTENUATEROW_SSSE3
|
||||
#define HAS_ARGBBLENDROW_SSSE3
|
||||
#define HAS_ARGBCOLORMATRIXROW_SSSE3
|
||||
#define HAS_ARGBCOLORTABLEROW_X86
|
||||
@ -231,7 +230,6 @@ extern "C" {
|
||||
|
||||
// Effects:
|
||||
#define HAS_ARGBADDROW_AVX2
|
||||
#define HAS_ARGBATTENUATEROW_AVX2
|
||||
#define HAS_ARGBMULTIPLYROW_AVX2
|
||||
#define HAS_ARGBSUBTRACTROW_AVX2
|
||||
#define HAS_ARGBUNATTENUATEROW_AVX2
|
||||
@ -270,6 +268,7 @@ extern "C" {
|
||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||
(defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
|
||||
#define HAS_ABGRTOAR30ROW_SSSE3
|
||||
#define HAS_ARGBATTENUATEROW_SSSE3
|
||||
#define HAS_ARGBTOAR30ROW_SSSE3
|
||||
#define HAS_CONVERT16TO8ROW_SSSE3
|
||||
#define HAS_CONVERT8TO16ROW_SSE2
|
||||
@ -285,7 +284,6 @@ extern "C" {
|
||||
#define HAS_RGBATOYJROW_SSSE3
|
||||
#define HAS_SPLITRGBROW_SSSE3
|
||||
#define HAS_SWAPUVROW_SSSE3
|
||||
|
||||
#endif
|
||||
|
||||
// The following are available for AVX2 gcc/clang x86 platforms:
|
||||
@ -296,6 +294,7 @@ extern "C" {
|
||||
#define HAS_ABGRTOAR30ROW_AVX2
|
||||
#define HAS_ABGRTOUVROW_AVX2
|
||||
#define HAS_ABGRTOYROW_AVX2
|
||||
#define HAS_ARGBATTENUATEROW_AVX2
|
||||
#define HAS_ARGBTOAR30ROW_AVX2
|
||||
#define HAS_ARGBTORAWROW_AVX2
|
||||
#define HAS_ARGBTORGB24ROW_AVX2
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 1753
|
||||
#define LIBYUV_VERSION 1754
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
@ -763,11 +763,11 @@ void RGB565ToUVRow_C(const uint8_t* src_rgb565,
|
||||
r3 = (r3 << 3) | (r3 >> 2);
|
||||
|
||||
#if LIBYUV_ARGBTOUV_PAVGB
|
||||
uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3));
|
||||
uint8_t ag = AVGB(AVGB(g0, g2), AVGB(g1, g3));
|
||||
uint8_t ar = AVGB(AVGB(r0, r2), AVGB(r1, r3));
|
||||
dst_u[0] = RGBToU(ar, ag, ab);
|
||||
dst_v[0] = RGBToV(ar, ag, ab);
|
||||
uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3));
|
||||
uint8_t ag = AVGB(AVGB(g0, g2), AVGB(g1, g3));
|
||||
uint8_t ar = AVGB(AVGB(r0, r2), AVGB(r1, r3));
|
||||
dst_u[0] = RGBToU(ar, ag, ab);
|
||||
dst_v[0] = RGBToV(ar, ag, ab);
|
||||
#else
|
||||
uint16_t b = (b0 + b1 + b2 + b3 + 1) >> 1;
|
||||
uint16_t g = (g0 + g1 + g2 + g3 + 1) >> 1;
|
||||
@ -776,10 +776,10 @@ void RGB565ToUVRow_C(const uint8_t* src_rgb565,
|
||||
dst_v[0] = RGB2xToV(r, g, b);
|
||||
#endif
|
||||
|
||||
src_rgb565 += 4;
|
||||
next_rgb565 += 4;
|
||||
dst_u += 1;
|
||||
dst_v += 1;
|
||||
src_rgb565 += 4;
|
||||
next_rgb565 += 4;
|
||||
dst_u += 1;
|
||||
dst_v += 1;
|
||||
}
|
||||
if (width & 1) {
|
||||
uint8_t b0 = src_rgb565[0] & 0x1f;
|
||||
@ -847,11 +847,11 @@ void ARGB1555ToUVRow_C(const uint8_t* src_argb1555,
|
||||
r3 = (r3 << 3) | (r3 >> 2);
|
||||
|
||||
#if LIBYUV_ARGBTOUV_PAVGB
|
||||
uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3));
|
||||
uint8_t ag = AVGB(AVGB(g0, g2), AVGB(g1, g3));
|
||||
uint8_t ar = AVGB(AVGB(r0, r2), AVGB(r1, r3));
|
||||
dst_u[0] = RGBToU(ar, ag, ab);
|
||||
dst_v[0] = RGBToV(ar, ag, ab);
|
||||
uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3));
|
||||
uint8_t ag = AVGB(AVGB(g0, g2), AVGB(g1, g3));
|
||||
uint8_t ar = AVGB(AVGB(r0, r2), AVGB(r1, r3));
|
||||
dst_u[0] = RGBToU(ar, ag, ab);
|
||||
dst_v[0] = RGBToV(ar, ag, ab);
|
||||
#else
|
||||
uint16_t b = (b0 + b1 + b2 + b3 + 1) >> 1;
|
||||
uint16_t g = (g0 + g1 + g2 + g3 + 1) >> 1;
|
||||
@ -860,10 +860,10 @@ void ARGB1555ToUVRow_C(const uint8_t* src_argb1555,
|
||||
dst_v[0] = RGB2xToV(r, g, b);
|
||||
#endif
|
||||
|
||||
src_argb1555 += 4;
|
||||
next_argb1555 += 4;
|
||||
dst_u += 1;
|
||||
dst_v += 1;
|
||||
src_argb1555 += 4;
|
||||
next_argb1555 += 4;
|
||||
dst_u += 1;
|
||||
dst_v += 1;
|
||||
}
|
||||
if (width & 1) {
|
||||
uint8_t b0 = src_argb1555[0] & 0x1f;
|
||||
@ -931,11 +931,11 @@ void ARGB4444ToUVRow_C(const uint8_t* src_argb4444,
|
||||
r3 = (r3 << 4) | r3;
|
||||
|
||||
#if LIBYUV_ARGBTOUV_PAVGB
|
||||
uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3));
|
||||
uint8_t ag = AVGB(AVGB(g0, g2), AVGB(g1, g3));
|
||||
uint8_t ar = AVGB(AVGB(r0, r2), AVGB(r1, r3));
|
||||
dst_u[0] = RGBToU(ar, ag, ab);
|
||||
dst_v[0] = RGBToV(ar, ag, ab);
|
||||
uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3));
|
||||
uint8_t ag = AVGB(AVGB(g0, g2), AVGB(g1, g3));
|
||||
uint8_t ar = AVGB(AVGB(r0, r2), AVGB(r1, r3));
|
||||
dst_u[0] = RGBToU(ar, ag, ab);
|
||||
dst_v[0] = RGBToV(ar, ag, ab);
|
||||
#else
|
||||
uint16_t b = (b0 + b1 + b2 + b3 + 1) >> 1;
|
||||
uint16_t g = (g0 + g1 + g2 + g3 + 1) >> 1;
|
||||
@ -944,10 +944,10 @@ void ARGB4444ToUVRow_C(const uint8_t* src_argb4444,
|
||||
dst_v[0] = RGB2xToV(r, g, b);
|
||||
#endif
|
||||
|
||||
src_argb4444 += 4;
|
||||
next_argb4444 += 4;
|
||||
dst_u += 1;
|
||||
dst_v += 1;
|
||||
src_argb4444 += 4;
|
||||
next_argb4444 += 4;
|
||||
dst_u += 1;
|
||||
dst_v += 1;
|
||||
}
|
||||
if (width & 1) {
|
||||
uint8_t b0 = src_argb4444[0] & 0x0f;
|
||||
@ -2681,7 +2681,7 @@ void UYVYToYRow_C(const uint8_t* src_uyvy, uint8_t* dst_y, int width) {
|
||||
}
|
||||
}
|
||||
|
||||
#define BLEND(f, b, a) (((256 - a) * b) >> 8) + f
|
||||
#define BLEND(f, b, a) clamp255((((256 - a) * b) >> 8) + f)
|
||||
|
||||
// Blend src_argb0 over src_argb1 and store to dst_argb.
|
||||
// dst_argb may be src_argb0 or src_argb1.
|
||||
@ -2757,12 +2757,7 @@ void BlendPlaneRow_C(const uint8_t* src0,
|
||||
}
|
||||
#undef UBLEND
|
||||
|
||||
#if defined(__aarch64__) || defined(__arm__)
|
||||
#define ATTENUATE(f, a) (f * a + 128) >> 8
|
||||
#else
|
||||
// This code mimics the SSSE3 version for better testability.
|
||||
#define ATTENUATE(f, a) (a | (a << 8)) * (f | (f << 8)) >> 24
|
||||
#endif
|
||||
|
||||
// Multiply source RGB by alpha and store to destination.
|
||||
void ARGBAttenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
|
||||
|
||||
@ -4892,94 +4892,99 @@ void BlendPlaneRow_AVX2(const uint8_t* src0,
|
||||
#endif // HAS_BLENDPLANEROW_AVX2
|
||||
|
||||
#ifdef HAS_ARGBATTENUATEROW_SSSE3
|
||||
// Shuffle table duplicating alpha
|
||||
static const uvec8 kShuffleAlpha0 = {3u, 3u, 3u, 3u, 3u, 3u, 128u, 128u,
|
||||
7u, 7u, 7u, 7u, 7u, 7u, 128u, 128u};
|
||||
static const uvec8 kShuffleAlpha1 = {11u, 11u, 11u, 11u, 11u, 11u, 128u, 128u,
|
||||
15u, 15u, 15u, 15u, 15u, 15u, 128u, 128u};
|
||||
// Shuffle table duplicating alpha.
|
||||
static const uvec8 kAttenShuffle = {6u, 7u, 6u, 7u, 6u, 7u, 128u, 128u,
|
||||
14u, 15u, 14u, 15u, 14u, 15u, 128u, 128u};
|
||||
|
||||
// Attenuate 4 pixels at a time.
|
||||
void ARGBAttenuateRow_SSSE3(const uint8_t* src_argb,
|
||||
uint8_t* dst_argb,
|
||||
int width) {
|
||||
asm volatile(
|
||||
"pcmpeqb %%xmm3,%%xmm3 \n"
|
||||
"pslld $0x18,%%xmm3 \n"
|
||||
"movdqa %3,%%xmm4 \n"
|
||||
"movdqa %4,%%xmm5 \n"
|
||||
"movdqu %3,%%xmm6 \n" // alpha shuffler
|
||||
"pcmpeqb %%xmm7,%%xmm7 \n" // 0x0080
|
||||
"psllw $0xf,%%xmm7 \n"
|
||||
"psrlw $0x8,%%xmm7 \n"
|
||||
"pcmpeqb %%xmm0,%%xmm0 \n" // 0xff000000
|
||||
"pslld $0x18,%%xmm0 \n"
|
||||
"sub %0,%1 \n"
|
||||
|
||||
// 4 pixel loop.
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movdqu (%0),%%xmm0 \n"
|
||||
"pshufb %%xmm4,%%xmm0 \n"
|
||||
"movdqu (%0),%%xmm1 \n"
|
||||
"punpcklbw %%xmm1,%%xmm1 \n"
|
||||
"pmulhuw %%xmm1,%%xmm0 \n"
|
||||
"movdqu (%0),%%xmm1 \n"
|
||||
"pshufb %%xmm5,%%xmm1 \n"
|
||||
"movdqu (%0),%%xmm2 \n"
|
||||
"punpckhbw %%xmm2,%%xmm2 \n"
|
||||
"pmulhuw %%xmm2,%%xmm1 \n"
|
||||
"movdqu (%0),%%xmm2 \n"
|
||||
"lea 0x10(%0),%0 \n"
|
||||
"pand %%xmm3,%%xmm2 \n"
|
||||
"psrlw $0x8,%%xmm0 \n"
|
||||
"psrlw $0x8,%%xmm1 \n"
|
||||
"packuswb %%xmm1,%%xmm0 \n"
|
||||
"por %%xmm2,%%xmm0 \n"
|
||||
"movdqu %%xmm0,(%1) \n"
|
||||
"lea 0x10(%1),%1 \n"
|
||||
"sub $0x4,%2 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src_argb), // %0
|
||||
"+r"(dst_argb), // %1
|
||||
"+r"(width) // %2
|
||||
: "m"(kShuffleAlpha0), // %3
|
||||
"m"(kShuffleAlpha1) // %4
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
|
||||
"1: \n"
|
||||
"movdqu (%0),%%xmm1 \n"
|
||||
"pxor %%xmm4,%%xmm4 \n"
|
||||
"movdqa %%xmm1,%%xmm2 \n"
|
||||
"movdqa %%xmm1,%%xmm3 \n"
|
||||
"punpcklbw %%xmm4,%%xmm2 \n"
|
||||
"punpckhbw %%xmm4,%%xmm3 \n"
|
||||
"movdqa %%xmm2,%%xmm4 \n"
|
||||
"movdqa %%xmm3,%%xmm5 \n"
|
||||
"pshufb %%xmm6,%%xmm4 \n"
|
||||
"pshufb %%xmm6,%%xmm5 \n"
|
||||
"pmullw %%xmm4,%%xmm2 \n"
|
||||
"pmullw %%xmm5,%%xmm3 \n"
|
||||
"pand %%xmm0,%%xmm1 \n"
|
||||
"paddw %%xmm7,%%xmm2 \n"
|
||||
"paddw %%xmm7,%%xmm3 \n"
|
||||
"psrlw $0x8,%%xmm2 \n"
|
||||
"psrlw $0x8,%%xmm3 \n"
|
||||
"packuswb %%xmm3,%%xmm2 \n"
|
||||
"por %%xmm1,%%xmm2 \n"
|
||||
"movdqu %%xmm2,(%0,%1,1) \n"
|
||||
"lea 0x10(%0),%0 \n"
|
||||
"sub $0x4,%2 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src_argb), // %0
|
||||
"+r"(dst_argb), // %1
|
||||
"+r"(width) // %2
|
||||
: "m"(kAttenShuffle) // %3
|
||||
: "memory", "cc",
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7");
|
||||
}
|
||||
#endif // HAS_ARGBATTENUATEROW_SSSE3
|
||||
|
||||
#ifdef HAS_ARGBATTENUATEROW_AVX2
|
||||
// Shuffle table duplicating alpha.
|
||||
static const uvec8 kShuffleAlpha_AVX2 = {6u, 7u, 6u, 7u, 6u, 7u,
|
||||
128u, 128u, 14u, 15u, 14u, 15u,
|
||||
14u, 15u, 128u, 128u};
|
||||
// Attenuate 8 pixels at a time.
|
||||
void ARGBAttenuateRow_AVX2(const uint8_t* src_argb,
|
||||
uint8_t* dst_argb,
|
||||
int width) {
|
||||
asm volatile(
|
||||
"vbroadcastf128 %3,%%ymm4 \n"
|
||||
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
|
||||
"vpslld $0x18,%%ymm5,%%ymm5 \n"
|
||||
"vbroadcastf128 %3,%%ymm6 \n" // alpha shuffler
|
||||
"vpcmpeqb %%ymm0,%%ymm0,%%ymm0 \n" // 0xff000000
|
||||
"vpsllw $0xf,%%ymm0,%%ymm7 \n" // 0x0080
|
||||
"vpslld $0x18,%%ymm0,%%ymm0 \n"
|
||||
"vpsrlw $0x8,%%ymm7,%%ymm7 \n"
|
||||
"sub %0,%1 \n"
|
||||
|
||||
// 8 pixel loop.
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"vmovdqu (%0),%%ymm6 \n"
|
||||
"vpunpcklbw %%ymm6,%%ymm6,%%ymm0 \n"
|
||||
"vpunpckhbw %%ymm6,%%ymm6,%%ymm1 \n"
|
||||
"vpshufb %%ymm4,%%ymm0,%%ymm2 \n"
|
||||
"vpshufb %%ymm4,%%ymm1,%%ymm3 \n"
|
||||
"vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n"
|
||||
"vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n"
|
||||
"vpand %%ymm5,%%ymm6,%%ymm6 \n"
|
||||
"vpsrlw $0x8,%%ymm0,%%ymm0 \n"
|
||||
"vpsrlw $0x8,%%ymm1,%%ymm1 \n"
|
||||
"vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
|
||||
"vpor %%ymm6,%%ymm0,%%ymm0 \n"
|
||||
"vmovdqu %%ymm0,0x00(%0,%1,1) \n"
|
||||
"lea 0x20(%0),%0 \n"
|
||||
"vmovdqu (%0),%%ymm1 \n"
|
||||
"vpxor %%ymm3,%%ymm3,%%ymm3 \n"
|
||||
"vpunpcklbw %%ymm3,%%ymm1,%%ymm2 \n"
|
||||
"vpunpckhbw %%ymm3,%%ymm1,%%ymm3 \n"
|
||||
"vpshufb %%ymm6,%%ymm2,%%ymm4 \n"
|
||||
"vpshufb %%ymm6,%%ymm3,%%ymm5 \n"
|
||||
"vpmullw %%ymm4,%%ymm2,%%ymm2 \n"
|
||||
"vpmullw %%ymm5,%%ymm3,%%ymm3 \n"
|
||||
"vpaddw %%ymm7,%%ymm2,%%ymm2 \n"
|
||||
"vpaddw %%ymm7,%%ymm3,%%ymm3 \n"
|
||||
"vpsrlw $0x8,%%ymm2,%%ymm2 \n"
|
||||
"vpsrlw $0x8,%%ymm3,%%ymm3 \n"
|
||||
"vpackuswb %%ymm3,%%ymm2,%%ymm2 \n"
|
||||
"vpblendvb %%ymm0,%%ymm1,%%ymm2,%%ymm2 \n"
|
||||
"vmovdqu %%ymm2,(%0,%1,1) \n"
|
||||
"lea 0x20(%0),%0 \n"
|
||||
"sub $0x8,%2 \n"
|
||||
"jg 1b \n"
|
||||
"jg 1b \n"
|
||||
"vzeroupper \n"
|
||||
: "+r"(src_argb), // %0
|
||||
"+r"(dst_argb), // %1
|
||||
"+r"(width) // %2
|
||||
: "m"(kShuffleAlpha_AVX2) // %3
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
|
||||
: "+r"(src_argb), // %0
|
||||
"+r"(dst_argb), // %1
|
||||
"+r"(width) // %2
|
||||
: "m"(kAttenShuffle) // %3
|
||||
: "memory", "cc",
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7");
|
||||
}
|
||||
#endif // HAS_ARGBATTENUATEROW_AVX2
|
||||
|
||||
@ -7068,7 +7073,6 @@ void HalfMergeUVRow_SSSE3(const uint8_t* src_u,
|
||||
"psrlw $0xf,%%xmm4 \n"
|
||||
"packuswb %%xmm4,%%xmm4 \n"
|
||||
"pxor %%xmm5,%%xmm5 \n"
|
||||
"1: \n"
|
||||
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
@ -7111,11 +7115,10 @@ void HalfMergeUVRow_AVX2(const uint8_t* src_u,
|
||||
uint8_t* dst_uv,
|
||||
int width) {
|
||||
asm volatile(
|
||||
"vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n"
|
||||
"vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n" // 0x0101
|
||||
"vpsrlw $0xf,%%ymm4,%%ymm4 \n"
|
||||
"vpackuswb %%ymm4,%%ymm4,%%ymm4 \n"
|
||||
"vpxor %%ymm5,%%ymm5,%%ymm5 \n"
|
||||
"1: \n"
|
||||
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
|
||||
@ -123,8 +123,8 @@ void I444ToARGBRow_NEON(const uint8_t* src_y,
|
||||
"movi v23.8b, #255 \n" /* A */
|
||||
"1: \n"
|
||||
READYUV444
|
||||
YUVTORGB(v22, v21, v20)
|
||||
"prfm pldl1keep, [%0, 448] \n"
|
||||
YUVTORGB(v22, v21, v20)
|
||||
"prfm pldl1keep, [%1, 448] \n"
|
||||
"prfm pldl1keep, [%2, 448] \n"
|
||||
"subs %w4, %w4, #8 \n"
|
||||
@ -188,11 +188,11 @@ void I422AlphaToARGBRow_NEON(const uint8_t* src_y,
|
||||
YUVTORGB_SETUP
|
||||
"1: \n"
|
||||
READYUV422
|
||||
"prfm pldl1keep, [%0, 448] \n"
|
||||
YUVTORGB(v22, v21, v20)
|
||||
"ld1 {v23.8b}, [%3], #8 \n"
|
||||
"prfm pldl1keep, [%0, 448] \n"
|
||||
"prfm pldl1keep, [%1, 448] \n"
|
||||
"prfm pldl1keep, [%2, 448] \n"
|
||||
"prfm pldl1keep, [%1, 128] \n"
|
||||
"prfm pldl1keep, [%2, 128] \n"
|
||||
"prfm pldl1keep, [%3, 448] \n"
|
||||
"subs %w5, %w5, #8 \n"
|
||||
"st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%4], #32 \n"
|
||||
@ -223,10 +223,10 @@ void I422ToRGBARow_NEON(const uint8_t* src_y,
|
||||
"movi v20.8b, #255 \n" /* A */
|
||||
"1: \n"
|
||||
READYUV422
|
||||
YUVTORGB(v23, v22, v21)
|
||||
"prfm pldl1keep, [%0, 448] \n"
|
||||
"prfm pldl1keep, [%1, 448] \n"
|
||||
"prfm pldl1keep, [%2, 448] \n"
|
||||
YUVTORGB(v23, v22, v21)
|
||||
"prfm pldl1keep, [%1, 128] \n"
|
||||
"prfm pldl1keep, [%2, 128] \n"
|
||||
"subs %w4, %w4, #8 \n"
|
||||
"st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n"
|
||||
"b.gt 1b \n"
|
||||
@ -254,10 +254,10 @@ void I422ToRGB24Row_NEON(const uint8_t* src_y,
|
||||
YUVTORGB_SETUP
|
||||
"1: \n"
|
||||
READYUV422
|
||||
YUVTORGB(v22, v21, v20)
|
||||
"prfm pldl1keep, [%0, 448] \n"
|
||||
"prfm pldl1keep, [%1, 448] \n"
|
||||
"prfm pldl1keep, [%2, 448] \n"
|
||||
YUVTORGB(v22, v21, v20)
|
||||
"prfm pldl1keep, [%1, 128] \n"
|
||||
"prfm pldl1keep, [%2, 128] \n"
|
||||
"subs %w4, %w4, #8 \n"
|
||||
"st3 {v20.8b,v21.8b,v22.8b}, [%3], #24 \n"
|
||||
"b.gt 1b \n"
|
||||
@ -295,13 +295,12 @@ void I422ToRGB565Row_NEON(const uint8_t* src_y,
|
||||
"1: \n"
|
||||
READYUV422
|
||||
YUVTORGB(v22, v21, v20)
|
||||
"prfm pldl1keep, [%0, 448] \n"
|
||||
"subs %w4, %w4, #8 \n"
|
||||
ARGBTORGB565
|
||||
"prfm pldl1keep, [%0, 448] \n"
|
||||
"prfm pldl1keep, [%1, 448] \n"
|
||||
"prfm pldl1keep, [%2, 448] \n"
|
||||
"prfm pldl1keep, [%1, 128] \n"
|
||||
"prfm pldl1keep, [%2, 128] \n"
|
||||
"st1 {v0.8h}, [%3], #16 \n" // store 8 pixels RGB565.
|
||||
"prfm pldl1keep, [%0, 448] \n"
|
||||
"b.gt 1b \n"
|
||||
: "+r"(src_y), // %0
|
||||
"+r"(src_u), // %1
|
||||
@ -337,11 +336,11 @@ void I422ToARGB1555Row_NEON(const uint8_t* src_y,
|
||||
"1: \n"
|
||||
READYUV422
|
||||
YUVTORGB(v22, v21, v20)
|
||||
"prfm pldl1keep, [%0, 448] \n"
|
||||
"subs %w4, %w4, #8 \n"
|
||||
ARGBTOARGB1555
|
||||
"prfm pldl1keep, [%0, 448] \n"
|
||||
"prfm pldl1keep, [%1, 448] \n"
|
||||
"prfm pldl1keep, [%2, 448] \n"
|
||||
"prfm pldl1keep, [%1, 128] \n"
|
||||
"prfm pldl1keep, [%2, 128] \n"
|
||||
"st1 {v0.8h}, [%3], #16 \n" // store 8 pixels RGB565.
|
||||
"b.gt 1b \n"
|
||||
: "+r"(src_y), // %0
|
||||
@ -380,12 +379,12 @@ void I422ToARGB4444Row_NEON(const uint8_t* src_y,
|
||||
"1: \n"
|
||||
READYUV422
|
||||
YUVTORGB(v22, v21, v20)
|
||||
"prfm pldl1keep, [%0, 448] \n"
|
||||
"subs %w4, %w4, #8 \n"
|
||||
"movi v23.8b, #255 \n"
|
||||
ARGBTOARGB4444
|
||||
"prfm pldl1keep, [%0, 448] \n"
|
||||
"prfm pldl1keep, [%1, 448] \n"
|
||||
"prfm pldl1keep, [%2, 448] \n"
|
||||
"prfm pldl1keep, [%1, 128] \n"
|
||||
"prfm pldl1keep, [%2, 128] \n"
|
||||
"st1 {v0.8h}, [%3], #16 \n" // store 8 pixels ARGB4444.
|
||||
"b.gt 1b \n"
|
||||
: "+r"(src_y), // %0
|
||||
@ -453,9 +452,9 @@ void NV12ToARGBRow_NEON(const uint8_t* src_y,
|
||||
"movi v23.8b, #255 \n"
|
||||
"1: \n"
|
||||
READNV12
|
||||
YUVTORGB(v22, v21, v20)
|
||||
"prfm pldl1keep, [%0, 448] \n"
|
||||
"prfm pldl1keep, [%1, 448] \n"
|
||||
YUVTORGB(v22, v21, v20)
|
||||
"prfm pldl1keep, [%1, 256] \n"
|
||||
"subs %w3, %w3, #8 \n"
|
||||
"st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%2], #32 \n"
|
||||
"b.gt 1b \n"
|
||||
@ -482,9 +481,9 @@ void NV21ToARGBRow_NEON(const uint8_t* src_y,
|
||||
"movi v23.8b, #255 \n"
|
||||
"1: \n"
|
||||
READNV21
|
||||
YUVTORGB(v22, v21, v20)
|
||||
"prfm pldl1keep, [%0, 448] \n"
|
||||
"prfm pldl1keep, [%1, 448] \n"
|
||||
YUVTORGB(v22, v21, v20)
|
||||
"prfm pldl1keep, [%1, 256] \n"
|
||||
"subs %w3, %w3, #8 \n"
|
||||
"st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%2], #32 \n"
|
||||
"b.gt 1b \n"
|
||||
@ -510,9 +509,9 @@ void NV12ToRGB24Row_NEON(const uint8_t* src_y,
|
||||
YUVTORGB_SETUP
|
||||
"1: \n"
|
||||
READNV12
|
||||
YUVTORGB(v22, v21, v20)
|
||||
"prfm pldl1keep, [%0, 448] \n"
|
||||
"prfm pldl1keep, [%1, 448] \n"
|
||||
YUVTORGB(v22, v21, v20)
|
||||
"prfm pldl1keep, [%1, 256] \n"
|
||||
"subs %w3, %w3, #8 \n"
|
||||
"st3 {v20.8b,v21.8b,v22.8b}, [%2], #24 \n"
|
||||
"b.gt 1b \n"
|
||||
@ -538,9 +537,9 @@ void NV21ToRGB24Row_NEON(const uint8_t* src_y,
|
||||
YUVTORGB_SETUP
|
||||
"1: \n"
|
||||
READNV21
|
||||
YUVTORGB(v22, v21, v20)
|
||||
"prfm pldl1keep, [%0, 448] \n"
|
||||
"prfm pldl1keep, [%1, 448] \n"
|
||||
YUVTORGB(v22, v21, v20)
|
||||
"prfm pldl1keep, [%1, 256] \n"
|
||||
"subs %w3, %w3, #8 \n"
|
||||
"st3 {v20.8b,v21.8b,v22.8b}, [%2], #24 \n"
|
||||
"b.gt 1b \n"
|
||||
@ -562,25 +561,24 @@ void NV12ToRGB565Row_NEON(const uint8_t* src_y,
|
||||
uint8_t* dst_rgb565,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile(YUVTORGB_SETUP
|
||||
"1: \n" READNV12 YUVTORGB(
|
||||
v22, v21, v20) ARGBTORGB565
|
||||
"prfm pldl1keep, [%0, 448] \n"
|
||||
"prfm pldl1keep, [%1, 448] \n"
|
||||
"subs %w3, %w3, #8 \n"
|
||||
"st1 {v0.8h}, [%2], 16 \n" // store 8 pixels
|
||||
"b.gt 1b \n"
|
||||
: "+r"(src_y), // %0
|
||||
"+r"(src_uv), // %1
|
||||
"+r"(dst_rgb565), // %2
|
||||
"+r"(width) // %3
|
||||
: [kUVToRB] "r"(&yuvconstants->kUVToRB),
|
||||
[kUVToG] "r"(&yuvconstants->kUVToG),
|
||||
[kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR),
|
||||
[kYToRgb] "r"(&yuvconstants->kYToRgb)
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
|
||||
"v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28",
|
||||
"v29", "v30");
|
||||
asm volatile(
|
||||
YUVTORGB_SETUP "1: \n" READNV12
|
||||
"prfm pldl1keep, [%0, 448] \n" YUVTORGB(
|
||||
v22, v21, v20) ARGBTORGB565
|
||||
"prfm pldl1keep, [%1, 256] \n"
|
||||
"subs %w3, %w3, #8 \n"
|
||||
"st1 {v0.8h}, [%2], 16 \n" // store 8 pixels
|
||||
"b.gt 1b \n"
|
||||
: "+r"(src_y), // %0
|
||||
"+r"(src_uv), // %1
|
||||
"+r"(dst_rgb565), // %2
|
||||
"+r"(width) // %3
|
||||
: [kUVToRB] "r"(&yuvconstants->kUVToRB),
|
||||
[kUVToG] "r"(&yuvconstants->kUVToG),
|
||||
[kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR),
|
||||
[kYToRgb] "r"(&yuvconstants->kYToRgb)
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
|
||||
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30");
|
||||
}
|
||||
|
||||
void YUY2ToARGBRow_NEON(const uint8_t* src_yuy2,
|
||||
@ -592,8 +590,8 @@ void YUY2ToARGBRow_NEON(const uint8_t* src_yuy2,
|
||||
"movi v23.8b, #255 \n"
|
||||
"1: \n"
|
||||
READYUY2
|
||||
YUVTORGB(v22, v21, v20)
|
||||
"prfm pldl1keep, [%0, 448] \n"
|
||||
YUVTORGB(v22, v21, v20)
|
||||
"subs %w2, %w2, #8 \n"
|
||||
"st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n"
|
||||
"b.gt 1b \n"
|
||||
@ -757,7 +755,6 @@ void SetRow_NEON(uint8_t* dst, uint8_t v8, int width) {
|
||||
"1: \n"
|
||||
"subs %w1, %w1, #16 \n" // 16 bytes per loop
|
||||
"st1 {v0.16b}, [%0], #16 \n" // store
|
||||
"prfm pldl1keep, [%0, 448] \n"
|
||||
"b.gt 1b \n"
|
||||
: "+r"(dst), // %0
|
||||
"+r"(width) // %1
|
||||
@ -771,7 +768,6 @@ void ARGBSetRow_NEON(uint8_t* dst, uint32_t v32, int width) {
|
||||
"1: \n"
|
||||
"subs %w1, %w1, #4 \n" // 4 ints per loop
|
||||
"st1 {v0.16b}, [%0], #16 \n" // store
|
||||
"prfm pldl1keep, [%0, 448] \n"
|
||||
"b.gt 1b \n"
|
||||
: "+r"(dst), // %0
|
||||
"+r"(width) // %1
|
||||
@ -1161,9 +1157,9 @@ void UYVYToYRow_NEON(const uint8_t* src_uyvy, uint8_t* dst_y, int width) {
|
||||
asm volatile(
|
||||
"1: \n"
|
||||
"ld2 {v0.16b,v1.16b}, [%0], #32 \n" // load 16 pixels of UYVY.
|
||||
"prfm pldl1keep, [%0, 448] \n"
|
||||
"subs %w2, %w2, #16 \n" // 16 processed per loop.
|
||||
"st1 {v1.16b}, [%1], #16 \n" // store 16 pixels of Y.
|
||||
"prfm pldl1keep, [%0, 448] \n"
|
||||
"b.gt 1b \n"
|
||||
: "+r"(src_uyvy), // %0
|
||||
"+r"(dst_y), // %1
|
||||
@ -1861,10 +1857,10 @@ void RAWToUVRow_NEON(const uint8_t* src_raw,
|
||||
|
||||
// 16x2 pixels -> 8x1. width is number of rgb pixels. e.g. 16.
|
||||
void RGB565ToUVRow_NEON(const uint8_t* src_rgb565,
|
||||
int src_stride_rgb565,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width) {
|
||||
int src_stride_rgb565,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width) {
|
||||
const uint8_t* src_rgb565_1 = src_rgb565 + src_stride_rgb565;
|
||||
asm volatile(
|
||||
RGBTOUV_SETUP_REG
|
||||
@ -2456,8 +2452,8 @@ void ARGBQuantizeRow_NEON(uint8_t* dst_argb,
|
||||
"1: \n"
|
||||
"ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0] \n" // load 8 ARGB.
|
||||
"prfm pldl1keep, [%0, 448] \n"
|
||||
"subs %w1, %w1, #8 \n" // 8 processed per loop.
|
||||
"uxtl v0.8h, v0.8b \n" // b (0 .. 255)
|
||||
"subs %w1, %w1, #8 \n" // 8 processed per loop.
|
||||
"uxtl v0.8h, v0.8b \n" // b (0 .. 255)
|
||||
"uxtl v1.8h, v1.8b \n"
|
||||
"uxtl v2.8h, v2.8b \n"
|
||||
"sqdmulh v0.8h, v0.8h, v4.8h \n" // b * scale
|
||||
@ -2566,19 +2562,19 @@ void ARGBSepiaRow_NEON(uint8_t* dst_argb, int width) {
|
||||
"1: \n"
|
||||
"ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0] \n" // load 8 ARGB pixels.
|
||||
"prfm pldl1keep, [%0, 448] \n"
|
||||
"subs %w1, %w1, #8 \n" // 8 processed per loop.
|
||||
"umull v4.8h, v0.8b, v20.8b \n" // B to Sepia B
|
||||
"umlal v4.8h, v1.8b, v21.8b \n" // G
|
||||
"umlal v4.8h, v2.8b, v22.8b \n" // R
|
||||
"umull v5.8h, v0.8b, v24.8b \n" // B to Sepia G
|
||||
"umlal v5.8h, v1.8b, v25.8b \n" // G
|
||||
"umlal v5.8h, v2.8b, v26.8b \n" // R
|
||||
"umull v6.8h, v0.8b, v28.8b \n" // B to Sepia R
|
||||
"umlal v6.8h, v1.8b, v29.8b \n" // G
|
||||
"umlal v6.8h, v2.8b, v30.8b \n" // R
|
||||
"uqshrn v0.8b, v4.8h, #7 \n" // 16 bit to 8 bit B
|
||||
"uqshrn v1.8b, v5.8h, #7 \n" // 16 bit to 8 bit G
|
||||
"uqshrn v2.8b, v6.8h, #7 \n" // 16 bit to 8 bit R
|
||||
"subs %w1, %w1, #8 \n" // 8 processed per loop.
|
||||
"umull v4.8h, v0.8b, v20.8b \n" // B to Sepia B
|
||||
"umlal v4.8h, v1.8b, v21.8b \n" // G
|
||||
"umlal v4.8h, v2.8b, v22.8b \n" // R
|
||||
"umull v5.8h, v0.8b, v24.8b \n" // B to Sepia G
|
||||
"umlal v5.8h, v1.8b, v25.8b \n" // G
|
||||
"umlal v5.8h, v2.8b, v26.8b \n" // R
|
||||
"umull v6.8h, v0.8b, v28.8b \n" // B to Sepia R
|
||||
"umlal v6.8h, v1.8b, v29.8b \n" // G
|
||||
"umlal v6.8h, v2.8b, v30.8b \n" // R
|
||||
"uqshrn v0.8b, v4.8h, #7 \n" // 16 bit to 8 bit B
|
||||
"uqshrn v1.8b, v5.8h, #7 \n" // 16 bit to 8 bit G
|
||||
"uqshrn v2.8b, v6.8h, #7 \n" // 16 bit to 8 bit R
|
||||
"st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // store 8 pixels.
|
||||
"b.gt 1b \n"
|
||||
: "+r"(dst_argb), // %0
|
||||
@ -3225,8 +3221,8 @@ void NV21ToYUV24Row_NEON(const uint8_t* src_y,
|
||||
int width) {
|
||||
asm volatile(
|
||||
"1: \n"
|
||||
"ld1 {v2.16b}, [%0], #16 \n" // load 16 Y values
|
||||
"ld2 {v0.8b, v1.8b}, [%1], #16 \n" // load 8 VU values
|
||||
"ld1 {v2.16b}, [%0], #16 \n" // load 16 Y values
|
||||
"ld2 {v0.8b, v1.8b}, [%1], #16 \n" // load 8 VU values
|
||||
"prfm pldl1keep, [%0, 448] \n"
|
||||
"prfm pldl1keep, [%1, 448] \n"
|
||||
"zip1 v0.16b, v0.16b, v0.16b \n" // replicate V values
|
||||
|
||||
@ -4254,6 +4254,8 @@ __declspec(naked) void ARGBBlendRow_SSSE3(const uint8_t* src_argb0,
|
||||
}
|
||||
#endif // HAS_ARGBBLENDROW_SSSE3
|
||||
|
||||
// ARGBAttenuateRow disabled on win32 due to differences (off by 1) compared
|
||||
// to C and Neon. Use row_gcc.cc with clangcl.
|
||||
#ifdef HAS_ARGBATTENUATEROW_SSSE3
|
||||
// Shuffle table duplicating alpha.
|
||||
static const uvec8 kShuffleAlpha0 = {
|
||||
|
||||
@ -38,6 +38,26 @@ TEST_F(LibYUVPlanarTest, TestAttenuate) {
|
||||
align_buffer_page_end(unatten_pixels, kSize);
|
||||
align_buffer_page_end(atten2_pixels, kSize);
|
||||
|
||||
// Test unattenuation clamps
|
||||
orig_pixels[0 * 4 + 0] = 10u;
|
||||
orig_pixels[0 * 4 + 1] = 20u;
|
||||
orig_pixels[0 * 4 + 2] = 30u;
|
||||
orig_pixels[0 * 4 + 3] = 255u;
|
||||
orig_pixels[1 * 4 + 0] = 255u;
|
||||
orig_pixels[1 * 4 + 1] = 128u;
|
||||
orig_pixels[1 * 4 + 2] = 99u;
|
||||
orig_pixels[1 * 4 + 3] = 255u;
|
||||
|
||||
ARGBAttenuate(orig_pixels, 0, atten_pixels, 0, 2, 1);
|
||||
EXPECT_EQ(10u, atten_pixels[0 * 4 + 0]);
|
||||
EXPECT_EQ(20u, atten_pixels[0 * 4 + 1]);
|
||||
EXPECT_EQ(30u, atten_pixels[0 * 4 + 2]);
|
||||
EXPECT_EQ(255u, atten_pixels[0 * 4 + 3]);
|
||||
EXPECT_EQ(254u, atten_pixels[1 * 4 + 0]);
|
||||
EXPECT_EQ(128u, atten_pixels[1 * 4 + 1]);
|
||||
EXPECT_EQ(99u, atten_pixels[1 * 4 + 2]);
|
||||
EXPECT_EQ(255u, atten_pixels[1 * 4 + 3]);
|
||||
|
||||
// Test unattenuation clamps
|
||||
orig_pixels[0 * 4 + 0] = 200u;
|
||||
orig_pixels[0 * 4 + 1] = 129u;
|
||||
@ -100,9 +120,9 @@ TEST_F(LibYUVPlanarTest, TestAttenuate) {
|
||||
EXPECT_EQ(32, atten_pixels[128 * 4 + 1]);
|
||||
EXPECT_EQ(21, atten_pixels[128 * 4 + 2]);
|
||||
EXPECT_EQ(128, atten_pixels[128 * 4 + 3]);
|
||||
EXPECT_NEAR(255, atten_pixels[255 * 4 + 0], 1);
|
||||
EXPECT_NEAR(127, atten_pixels[255 * 4 + 1], 1);
|
||||
EXPECT_NEAR(85, atten_pixels[255 * 4 + 2], 1);
|
||||
EXPECT_EQ(254, atten_pixels[255 * 4 + 0]);
|
||||
EXPECT_EQ(127, atten_pixels[255 * 4 + 1]);
|
||||
EXPECT_EQ(85, atten_pixels[255 * 4 + 2]);
|
||||
EXPECT_EQ(255, atten_pixels[255 * 4 + 3]);
|
||||
|
||||
free_aligned_buffer_page_end(atten2_pixels);
|
||||
@ -1125,7 +1145,8 @@ static int TestBlend(int width,
|
||||
int disable_cpu_flags,
|
||||
int benchmark_cpu_info,
|
||||
int invert,
|
||||
int off) {
|
||||
int off,
|
||||
int attenuate) {
|
||||
if (width < 1) {
|
||||
width = 1;
|
||||
}
|
||||
@ -1139,10 +1160,12 @@ static int TestBlend(int width,
|
||||
src_argb_a[i + off] = (fastrand() & 0xff);
|
||||
src_argb_b[i + off] = (fastrand() & 0xff);
|
||||
}
|
||||
ARGBAttenuate(src_argb_a + off, kStride, src_argb_a + off, kStride, width,
|
||||
height);
|
||||
ARGBAttenuate(src_argb_b + off, kStride, src_argb_b + off, kStride, width,
|
||||
height);
|
||||
MemRandomize(src_argb_a, kStride * height + off);
|
||||
MemRandomize(src_argb_b, kStride * height + off);
|
||||
if (attenuate) {
|
||||
ARGBAttenuate(src_argb_a + off, kStride, src_argb_a + off, kStride, width,
|
||||
height);
|
||||
}
|
||||
memset(dst_argb_c, 255, kStride * height);
|
||||
memset(dst_argb_opt, 255, kStride * height);
|
||||
|
||||
@ -1172,28 +1195,35 @@ static int TestBlend(int width,
|
||||
TEST_F(LibYUVPlanarTest, ARGBBlend_Any) {
|
||||
int max_diff =
|
||||
TestBlend(benchmark_width_ - 4, benchmark_height_, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
|
||||
disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 1);
|
||||
EXPECT_LE(max_diff, 1);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVPlanarTest, ARGBBlend_Unaligned) {
|
||||
int max_diff =
|
||||
TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
|
||||
disable_cpu_flags_, benchmark_cpu_info_, +1, 1, 1);
|
||||
EXPECT_LE(max_diff, 1);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVPlanarTest, ARGBBlend_Invert) {
|
||||
int max_diff =
|
||||
TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
|
||||
disable_cpu_flags_, benchmark_cpu_info_, -1, 0, 1);
|
||||
EXPECT_LE(max_diff, 1);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVPlanarTest, ARGBBlend_Unattenuated) {
|
||||
int max_diff =
|
||||
TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 0);
|
||||
EXPECT_LE(max_diff, 1);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVPlanarTest, ARGBBlend_Opt) {
|
||||
int max_diff =
|
||||
TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
|
||||
disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 1);
|
||||
EXPECT_LE(max_diff, 1);
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user