Add RAWToJ420

Add J420 output from RAW.
Optimize RGB24 and RAW To J420 on ARM by using NEON for the 2 step conversion.

Also fix sign-compare warning that was breaking Windows build

Bug: libyuv:887, b/183534734
Change-Id: I8c39334552dc0b28414e638708db413d6adf8d6e
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/2783382
Reviewed-by: Wan-Teh Chang <wtc@google.com>
This commit is contained in:
Frank Barchard 2021-03-23 15:54:02 -07:00 committed by Frank Barchard
parent b046131c0b
commit d8f1bfc981
10 changed files with 344 additions and 101 deletions

View File

@ -1,6 +1,6 @@
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 1781
Version: 1782
License: BSD
License File: LICENSE

View File

@ -693,6 +693,19 @@ int RAWToI420(const uint8_t* src_raw,
int width,
int height);
// RGB big endian (rgb in memory) to J420.
LIBYUV_API
int RAWToJ420(const uint8_t* src_raw,
int src_stride_raw,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// RGB16 (RGBP fourcc) little endian to I420.
LIBYUV_API
int RGB565ToI420(const uint8_t* src_rgb565,

View File

@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1781
#define LIBYUV_VERSION 1782
#endif // INCLUDE_LIBYUV_VERSION_H_

View File

@ -1368,6 +1368,18 @@ int ARGBToI420(const uint8_t* src_argb,
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
#if defined(HAS_ARGBTOYROW_NEON) && defined(HAS_ARGBTOUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYRow = ARGBToYRow_Any_NEON;
ARGBToUVRow = ARGBToUVRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_NEON;
}
}
}
#endif
#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
@ -1388,22 +1400,6 @@ int ARGBToI420(const uint8_t* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToUVRow = ARGBToUVRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_NEON;
}
}
#endif
#if defined(HAS_ARGBTOYROW_MMI) && defined(HAS_ARGBTOUVROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToYRow = ARGBToYRow_Any_MMI;
@ -1771,7 +1767,7 @@ int RGB24ToI420(const uint8_t* src_rgb24,
}
// Neon version does direct RGB24 to YUV.
#if defined(HAS_RGB24TOYROW_NEON)
#if defined(HAS_RGB24TOYROW_NEON) && defined(HAS_RGB24TOUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
RGB24ToUVRow = RGB24ToUVRow_Any_NEON;
RGB24ToYRow = RGB24ToYRow_Any_NEON;
@ -1808,6 +1804,14 @@ int RGB24ToI420(const uint8_t* src_rgb24,
#endif
// Other platforms do intermediate conversion from RGB24 to ARGB.
#else
#if defined(HAS_RGB24TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
RGB24ToARGBRow = RGB24ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
RGB24ToARGBRow = RGB24ToARGBRow_NEON;
}
}
#endif
#if defined(HAS_RGB24TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3;
@ -1816,6 +1820,18 @@ int RGB24ToI420(const uint8_t* src_rgb24,
}
}
#endif
#if defined(HAS_ARGBTOYROW_NEON) && defined(HAS_ARGBTOUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToUVRow = ARGBToUVRow_Any_NEON;
ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_NEON;
}
}
}
#endif
#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
@ -1960,6 +1976,14 @@ int RGB24ToJ420(const uint8_t* src_rgb24,
}
#endif
#else
#if defined(HAS_RGB24TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
RGB24ToARGBRow = RGB24ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
RGB24ToARGBRow = RGB24ToARGBRow_NEON;
}
}
#endif
#if defined(HAS_RGB24TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3;
@ -1968,6 +1992,18 @@ int RGB24ToJ420(const uint8_t* src_rgb24,
}
}
#endif
#if defined(HAS_ARGBTOYJROW_NEON) && defined(HAS_ARGBTOUVJROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToUVJRow = ARGBToUVJRow_Any_NEON;
ARGBToYJRow = ARGBToYJRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYJRow = ARGBToYJRow_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToUVJRow = ARGBToUVJRow_NEON;
}
}
}
#endif
#if defined(HAS_ARGBTOYJROW_SSSE3) && defined(HAS_ARGBTOUVJROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3;
@ -2111,6 +2147,26 @@ int RAWToI420(const uint8_t* src_raw,
#endif
// Other platforms do intermediate conversion from RAW to ARGB.
#else
#if defined(HAS_RAWTOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
RAWToARGBRow = RAWToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
RAWToARGBRow = RAWToARGBRow_NEON;
}
}
#endif
#if defined(HAS_ARGBTOYROW_NEON) && defined(HAS_ARGBTOUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToUVRow = ARGBToUVRow_Any_NEON;
ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_NEON;
}
}
}
#endif
#if defined(HAS_RAWTOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
RAWToARGBRow = RAWToARGBRow_Any_SSSE3;
@ -2186,6 +2242,178 @@ int RAWToI420(const uint8_t* src_raw,
return 0;
}
// TODO(fbarchard): Use Matrix version to implement I420 and J420.
// Convert RAW to J420.
LIBYUV_API
int RAWToJ420(const uint8_t* src_raw,
int src_stride_raw,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height) {
int y;
#if (defined(HAS_RAWTOYJROW_NEON) && defined(HAS_RAWTOUVJROW_NEON)) || \
defined(HAS_RAWTOYJROW_MSA) || defined(HAS_RAWTOYJROW_MMI)
void (*RAWToUVJRow)(const uint8_t* src_raw, int src_stride_raw,
uint8_t* dst_u, uint8_t* dst_v, int width) =
RAWToUVJRow_C;
void (*RAWToYJRow)(const uint8_t* src_raw, uint8_t* dst_y, int width) =
RAWToYJRow_C;
#else
void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) =
RAWToARGBRow_C;
void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb,
uint8_t* dst_u, uint8_t* dst_v, int width) =
ARGBToUVJRow_C;
void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
ARGBToYJRow_C;
#endif
if (!src_raw || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_raw = src_raw + (height - 1) * src_stride_raw;
src_stride_raw = -src_stride_raw;
}
// Neon version does direct RAW to YUV.
#if defined(HAS_RAWTOYJROW_NEON) && defined(HAS_RAWTOUVJROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
RAWToUVJRow = RAWToUVJRow_Any_NEON;
RAWToYJRow = RAWToYJRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
RAWToYJRow = RAWToYJRow_NEON;
if (IS_ALIGNED(width, 16)) {
RAWToUVJRow = RAWToUVJRow_NEON;
}
}
}
// MMI and MSA version does direct RAW to YUV.
#elif (defined(HAS_RAWTOYJROW_MMI) || defined(HAS_RAWTOYJROW_MSA))
#if defined(HAS_RAWTOYJROW_MMI) && defined(HAS_RAWTOUVJROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
RAWToUVJRow = RAWToUVJRow_Any_MMI;
RAWToYJRow = RAWToYJRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
RAWToYJRow = RAWToYJRow_MMI;
if (IS_ALIGNED(width, 16)) {
RAWToUVJRow = RAWToUVJRow_MMI;
}
}
}
#endif
#if defined(HAS_RAWTOYJROW_MSA) && defined(HAS_RAWTOUVJROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
RAWToUVJRow = RAWToUVJRow_Any_MSA;
RAWToYJRow = RAWToYJRow_Any_MSA;
if (IS_ALIGNED(width, 16)) {
RAWToYJRow = RAWToYJRow_MSA;
RAWToUVJRow = RAWToUVJRow_MSA;
}
}
#endif
#else
#if defined(HAS_RAWTOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
RAWToARGBRow = RAWToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
RAWToARGBRow = RAWToARGBRow_NEON;
}
}
#endif
#if defined(HAS_ARGBTOYJROW_NEON) && defined(HAS_ARGBTOUVJROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToUVJRow = ARGBToUVJRow_Any_NEON;
ARGBToYJRow = ARGBToYJRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYJRow = ARGBToYJRow_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToUVJRow = ARGBToUVJRow_NEON;
}
}
}
#endif
#if defined(HAS_RAWTOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
RAWToARGBRow = RAWToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
RAWToARGBRow = RAWToARGBRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOYJROW_SSSE3) && defined(HAS_ARGBTOUVJROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3;
ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVJRow = ARGBToUVJRow_SSSE3;
ARGBToYJRow = ARGBToYJRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOYJROW_AVX2) && defined(HAS_ARGBTOUVJROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToUVJRow = ARGBToUVJRow_Any_AVX2;
ARGBToYJRow = ARGBToYJRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToUVJRow = ARGBToUVJRow_AVX2;
ARGBToYJRow = ARGBToYJRow_AVX2;
}
}
#endif
#endif
{
#if !((defined(HAS_RAWTOYJROW_NEON) && defined(HAS_RAWTOUVJROW_NEON)) || \
defined(HAS_RAWTOYJROW_MSA) || defined(HAS_RAWTOYJROW_MMI))
// Allocate 2 rows of ARGB.
const int kRowSize = (width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2);
#endif
for (y = 0; y < height - 1; y += 2) {
#if ((defined(HAS_RAWTOYJROW_NEON) && defined(HAS_RAWTOUVJROW_NEON)) || \
defined(HAS_RAWTOYJROW_MSA) || defined(HAS_RAWTOYJROW_MMI))
RAWToUVJRow(src_raw, src_stride_raw, dst_u, dst_v, width);
RAWToYJRow(src_raw, dst_y, width);
RAWToYJRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width);
#else
RAWToARGBRow(src_raw, row, width);
RAWToARGBRow(src_raw + src_stride_raw, row + kRowSize, width);
ARGBToUVJRow(row, kRowSize, dst_u, dst_v, width);
ARGBToYJRow(row, dst_y, width);
ARGBToYJRow(row + kRowSize, dst_y + dst_stride_y, width);
#endif
src_raw += src_stride_raw * 2;
dst_y += dst_stride_y * 2;
dst_u += dst_stride_u;
dst_v += dst_stride_v;
}
if (height & 1) {
#if ((defined(HAS_RAWTOYJROW_NEON) && defined(HAS_RAWTOUVJROW_NEON)) || \
defined(HAS_RAWTOYJROW_MSA) || defined(HAS_RAWTOYJROW_MMI))
RAWToUVJRow(src_raw, 0, dst_u, dst_v, width);
RAWToYJRow(src_raw, dst_y, width);
#else
RAWToARGBRow(src_raw, row, width);
ARGBToUVJRow(row, 0, dst_u, dst_v, width);
ARGBToYJRow(row, dst_y, width);
#endif
}
#if !((defined(HAS_RAWTOYJROW_NEON) && defined(HAS_RAWTOUVJROW_NEON)) || \
defined(HAS_RAWTOYJROW_MSA) || defined(HAS_RAWTOYJROW_MMI))
free_aligned_buffer_64(row);
#endif
}
return 0;
}
// Convert RGB565 to I420.
LIBYUV_API
int RGB565ToI420(const uint8_t* src_rgb565,

View File

@ -3376,14 +3376,14 @@ int AR30ToAB30(const uint8_t* src_ar30,
// Convert AR64 to ARGB.
LIBYUV_API
int AR64ToARGB(const uint16_t* src_ar64,
int src_stride_ar64,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height) {
int src_stride_ar64,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height) {
int y;
void (*AR64ToARGBRow)(const uint16_t* src_ar64, uint8_t* dst_argb,
int width) = AR64ToARGBRow_C;
int width) = AR64ToARGBRow_C;
if (!src_ar64 || !dst_argb || width <= 0 || height == 0) {
return -1;
}

View File

@ -1116,8 +1116,7 @@ void ARGBToAB64Row_SSSE3(const uint8_t* src_argb,
asm volatile(
"movdqa %3,%%xmm2 \n"
"movdqa %4,%%xmm3 \n"
LABELALIGN
"movdqa %4,%%xmm3 \n" LABELALIGN
"1: \n"
"movdqu (%0),%%xmm0 \n"
"movdqa %%xmm0,%%xmm1 \n"
@ -1129,11 +1128,11 @@ void ARGBToAB64Row_SSSE3(const uint8_t* src_argb,
"lea 0x20(%1),%1 \n"
"sub $0x4,%2 \n"
"jg 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_ab64), // %1
"+r"(width) // %2
: "m"(kShuffleARGBToAB64Lo), // %3
"m"(kShuffleARGBToAB64Hi) // %4
: "+r"(src_argb), // %0
"+r"(dst_ab64), // %1
"+r"(width) // %2
: "m"(kShuffleARGBToAB64Lo), // %3
"m"(kShuffleARGBToAB64Hi) // %4
: "memory", "cc", "xmm0", "xmm1", "xmm2");
}
@ -1166,8 +1165,7 @@ void AB64ToARGBRow_SSSE3(const uint16_t* src_ar64,
int width) {
asm volatile(
"movdqa %3,%%xmm2 \n"
LABELALIGN
"movdqa %3,%%xmm2 \n" LABELALIGN
"1: \n"
"movdqu (%0),%%xmm0 \n"
"movdqu 0x10(%0),%%xmm1 \n"
@ -1220,8 +1218,7 @@ void ARGBToAB64Row_AVX2(const uint8_t* src_argb,
asm volatile(
"vbroadcastf128 %3,%%ymm2 \n"
"vbroadcastf128 %4,%%ymm3 \n"
LABELALIGN
"vbroadcastf128 %4,%%ymm3 \n" LABELALIGN
"1: \n"
"vmovdqu (%0),%%ymm0 \n"
"vpermq $0xd8,%%ymm0,%%ymm0 \n"
@ -1233,11 +1230,11 @@ void ARGBToAB64Row_AVX2(const uint8_t* src_argb,
"lea 0x40(%1),%1 \n"
"sub $0x8,%2 \n"
"jg 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_ab64), // %1
"+r"(width) // %2
: "+r"(src_argb), // %0
"+r"(dst_ab64), // %1
"+r"(width) // %2
: "m"(kShuffleARGBToAB64Lo), // %3
"m"(kShuffleARGBToAB64Hi) // %3
"m"(kShuffleARGBToAB64Hi) // %3
: "memory", "cc", "xmm0", "xmm1", "xmm2");
}
#endif
@ -1275,8 +1272,7 @@ void AB64ToARGBRow_AVX2(const uint16_t* src_ar64,
int width) {
asm volatile(
"vbroadcastf128 %3,%%ymm2 \n"
LABELALIGN
"vbroadcastf128 %3,%%ymm2 \n" LABELALIGN
"1: \n"
"vmovdqu (%0),%%ymm0 \n"
"vmovdqu 0x20(%0),%%ymm1 \n"

View File

@ -2191,7 +2191,7 @@ void AR64ToARGBRow_NEON(const uint16_t* src_ar64,
: "cc", "memory", "q0", "q1", "q2", "q3");
}
static const uvec8 kShuffleAB64ToARGB = {5, 3, 1, 7, 13, 11, 9, 15};
static const uvec8 kShuffleAB64ToARGB = {5, 3, 1, 7, 13, 11, 9, 15};
void AB64ToARGBRow_NEON(const uint16_t* src_ab64,
uint8_t* dst_argb,
@ -2362,9 +2362,9 @@ void RAWToYJRow_NEON(const uint8_t* src_raw, uint8_t* dst_yj, int width) {
"1: \n"
"vld3.8 {d0, d1, d2}, [%0]! \n" // load 8 pixels of RAW.
"subs %2, %2, #8 \n" // 8 processed per loop.
"vmull.u8 q4, d0, d4 \n" // B
"vmull.u8 q4, d0, d4 \n" // R
"vmlal.u8 q4, d1, d5 \n" // G
"vmlal.u8 q4, d2, d6 \n" // R
"vmlal.u8 q4, d2, d6 \n" // B
"vqrshrn.u16 d0, q4, #8 \n" // 16 bit to 8 bit Y
"vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
"bgt 1b \n"

View File

@ -1628,10 +1628,10 @@ void AR64ToARGBRow_NEON(const uint16_t* src_ar64,
"subs %w2, %w2, #8 \n" // 8 processed per loop.
"stp q0, q2, [%1], #32 \n" // store 8 pixels
"b.gt 1b \n"
: "+r"(src_ar64), // %0
"+r"(dst_argb), // %1
"+r"(width) // %2
: "m"(kShuffleAR64ToARGB) // %3
: "+r"(src_ar64), // %0
"+r"(dst_argb), // %1
"+r"(width) // %2
: "m"(kShuffleAR64ToARGB) // %3
: "cc", "memory", "v0", "v1", "v2", "v3", "v4");
}
@ -2506,9 +2506,9 @@ void RAWToYJRow_NEON(const uint8_t* src_raw, uint8_t* dst_yj, int width) {
"ld3 {v0.8b,v1.8b,v2.8b}, [%0], #24 \n" // load 8 pixels.
"prfm pldl1keep, [%0, 448] \n"
"subs %w2, %w2, #8 \n" // 8 processed per loop.
"umull v0.8h, v0.8b, v4.8b \n" // B
"umull v0.8h, v0.8b, v4.8b \n" // R
"umlal v0.8h, v1.8b, v5.8b \n" // G
"umlal v0.8h, v2.8b, v6.8b \n" // R
"umlal v0.8h, v2.8b, v6.8b \n" // B
"uqrshrn v0.8b, v0.8h, #8 \n" // 16 bit to 8 bit Y
"st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y.
"b.gt 1b \n"

View File

@ -1140,6 +1140,7 @@ TESTATOPLANAR(BGRA, 4, 1, I420, 2, 2)
TESTATOPLANAR(I400, 1, 1, I420, 2, 2)
TESTATOPLANAR(J400, 1, 1, J420, 2, 2)
TESTATOPLANAR(RAW, 3, 1, I420, 2, 2)
TESTATOPLANAR(RAW, 3, 1, J420, 2, 2)
TESTATOPLANAR(RGB24, 3, 1, I420, 2, 2)
TESTATOPLANAR(RGB24, 3, 1, J420, 2, 2)
TESTATOPLANAR(RGBA, 4, 1, I420, 2, 2)
@ -1226,10 +1227,11 @@ TESTATOBIPLANAR(AYUV, 1, 4, NV21, 2, 2)
const int kStrideB = \
(kWidth * EPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \
align_buffer_page_end(src_argb, \
kStrideA* kHeightA * sizeof(TYPE_A) + OFF); \
align_buffer_page_end(dst_argb_c, kStrideB* kHeightB * sizeof(TYPE_B)); \
align_buffer_page_end(dst_argb_opt, kStrideB* kHeightB * sizeof(TYPE_B)); \
for (int i = 0; i < kStrideA * kHeightA * sizeof(TYPE_A); ++i) { \
kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF); \
align_buffer_page_end(dst_argb_c, kStrideB* kHeightB*(int)sizeof(TYPE_B)); \
align_buffer_page_end(dst_argb_opt, \
kStrideB* kHeightB*(int)sizeof(TYPE_B)); \
for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) { \
src_argb[i + OFF] = (fastrand() & 0xff); \
} \
memset(dst_argb_c, 1, kStrideB* kHeightB); \
@ -1242,7 +1244,7 @@ TESTATOBIPLANAR(AYUV, 1, 4, NV21, 2, 2)
FMT_A##To##FMT_B((TYPE_A*)(src_argb + OFF), kStrideA, \
(TYPE_B*)dst_argb_opt, kStrideB, kWidth, NEG kHeight); \
} \
for (int i = 0; i < kStrideB * kHeightB * sizeof(TYPE_B); ++i) { \
for (int i = 0; i < kStrideB * kHeightB * (int)sizeof(TYPE_B); ++i) { \
EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \
} \
free_aligned_buffer_page_end(src_argb); \
@ -1250,40 +1252,41 @@ TESTATOBIPLANAR(AYUV, 1, 4, NV21, 2, 2)
free_aligned_buffer_page_end(dst_argb_opt); \
}
#define TESTATOBRANDOM(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, \
TYPE_B, EPP_B, STRIDE_B, HEIGHT_B) \
TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##_Random) { \
for (int times = 0; times < benchmark_iterations_; ++times) { \
const int kWidth = (fastrand() & 63) + 1; \
const int kHeight = (fastrand() & 31) + 1; \
const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A; \
const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B; \
const int kStrideA = \
(kWidth * EPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \
const int kStrideB = \
(kWidth * EPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \
align_buffer_page_end(src_argb, kStrideA* kHeightA * sizeof(TYPE_A)); \
align_buffer_page_end(dst_argb_c, kStrideB* kHeightB * sizeof(TYPE_B)); \
align_buffer_page_end(dst_argb_opt, \
kStrideB* kHeightB * sizeof(TYPE_B)); \
for (int i = 0; i < kStrideA * kHeightA * sizeof(TYPE_A); ++i) { \
src_argb[i] = 0xfe; \
} \
memset(dst_argb_c, 123, kStrideB* kHeightB); \
memset(dst_argb_opt, 123, kStrideB* kHeightB); \
MaskCpuFlags(disable_cpu_flags_); \
FMT_A##To##FMT_B((TYPE_A*)src_argb, kStrideA, (TYPE_B*)dst_argb_c, \
kStrideB, kWidth, kHeight); \
MaskCpuFlags(benchmark_cpu_info_); \
FMT_A##To##FMT_B((TYPE_A*)src_argb, kStrideA, (TYPE_B*)dst_argb_opt, \
kStrideB, kWidth, kHeight); \
for (int i = 0; i < kStrideB * kHeightB * sizeof(TYPE_B); ++i) { \
EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \
} \
free_aligned_buffer_page_end(src_argb); \
free_aligned_buffer_page_end(dst_argb_c); \
free_aligned_buffer_page_end(dst_argb_opt); \
} \
#define TESTATOBRANDOM(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, \
TYPE_B, EPP_B, STRIDE_B, HEIGHT_B) \
TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##_Random) { \
for (int times = 0; times < benchmark_iterations_; ++times) { \
const int kWidth = (fastrand() & 63) + 1; \
const int kHeight = (fastrand() & 31) + 1; \
const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A; \
const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B; \
const int kStrideA = \
(kWidth * EPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \
const int kStrideB = \
(kWidth * EPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \
align_buffer_page_end(src_argb, kStrideA* kHeightA*(int)sizeof(TYPE_A)); \
align_buffer_page_end(dst_argb_c, \
kStrideB* kHeightB*(int)sizeof(TYPE_B)); \
align_buffer_page_end(dst_argb_opt, \
kStrideB* kHeightB*(int)sizeof(TYPE_B)); \
for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) { \
src_argb[i] = 0xfe; \
} \
memset(dst_argb_c, 123, kStrideB* kHeightB); \
memset(dst_argb_opt, 123, kStrideB* kHeightB); \
MaskCpuFlags(disable_cpu_flags_); \
FMT_A##To##FMT_B((TYPE_A*)src_argb, kStrideA, (TYPE_B*)dst_argb_c, \
kStrideB, kWidth, kHeight); \
MaskCpuFlags(benchmark_cpu_info_); \
FMT_A##To##FMT_B((TYPE_A*)src_argb, kStrideA, (TYPE_B*)dst_argb_opt, \
kStrideB, kWidth, kHeight); \
for (int i = 0; i < kStrideB * kHeightB * (int)sizeof(TYPE_B); ++i) { \
EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \
} \
free_aligned_buffer_page_end(src_argb); \
free_aligned_buffer_page_end(dst_argb_c); \
free_aligned_buffer_page_end(dst_argb_opt); \
} \
}
#define TESTATOB(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, \
@ -1464,10 +1467,11 @@ TESTATOBD(ARGB, 4, 4, 1, RGB565, 2, 2, 1)
const int kStrideA = \
(kWidth * EPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \
align_buffer_page_end(src_argb, \
kStrideA* kHeightA * sizeof(TYPE_A) + OFF); \
align_buffer_page_end(dst_argb_c, kStrideA* kHeightA * sizeof(TYPE_A)); \
align_buffer_page_end(dst_argb_opt, kStrideA* kHeightA * sizeof(TYPE_A)); \
for (int i = 0; i < kStrideA * kHeightA * sizeof(TYPE_A); ++i) { \
kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF); \
align_buffer_page_end(dst_argb_c, kStrideA* kHeightA*(int)sizeof(TYPE_A)); \
align_buffer_page_end(dst_argb_opt, \
kStrideA* kHeightA*(int)sizeof(TYPE_A)); \
for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) { \
src_argb[i + OFF] = (fastrand() & 0xff); \
} \
memset(dst_argb_c, 1, kStrideA* kHeightA); \
@ -1486,7 +1490,7 @@ TESTATOBD(ARGB, 4, 4, 1, RGB565, 2, 2, 1)
MaskCpuFlags(benchmark_cpu_info_); \
FMT_ATOB((TYPE_A*)dst_argb_opt, kStrideA, (TYPE_A*)dst_argb_opt, kStrideA, \
kWidth, NEG kHeight); \
for (int i = 0; i < kStrideA * kHeightA * sizeof(TYPE_A); ++i) { \
for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) { \
EXPECT_EQ(src_argb[i + OFF], dst_argb_opt[i]); \
EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \
} \

View File

@ -252,13 +252,13 @@ static int ARGBClipTestFilter(int src_width,
// Test a scale factor with all 4 filters. Expect unfiltered to be exact, but
// filtering is different fixed point implementations for SSSE3, Neon and C.
#ifdef ENABLE_SLOW_TESTS
#define TEST_FACTOR(name, nom, denom) \
#define TEST_FACTOR(name, nom, denom) \
TEST_FACTOR1(, name, None, nom, denom, 0) \
TEST_FACTOR1(, name, Linear, nom, denom, 3) \
TEST_FACTOR1(, name, Bilinear, nom, denom, 3) \
TEST_FACTOR1(, name, Box, nom, denom, 3)
#else
#define TEST_FACTOR(name, nom, denom) \
#define TEST_FACTOR(name, nom, denom) \
TEST_FACTOR1(DISABLED_, name, None, nom, denom, 0) \
TEST_FACTOR1(DISABLED_, name, Linear, nom, denom, 3) \
TEST_FACTOR1(DISABLED_, name, Bilinear, nom, denom, 3) \
@ -290,13 +290,15 @@ TEST_FACTOR(3, 1, 3)
benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, DISABLED_##name##ClipTo##width##x##height##_##filter) { \
TEST_F(LibYUVScaleTest, \
DISABLED_##name##ClipTo##width##x##height##_##filter) { \
int diff = \
ARGBClipTestFilter(benchmark_width_, benchmark_height_, width, height, \
kFilter##filter, benchmark_iterations_); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, DISABLED_##name##ClipFrom##width##x##height##_##filter) { \
TEST_F(LibYUVScaleTest, \
DISABLED_##name##ClipFrom##width##x##height##_##filter) { \
int diff = ARGBClipTestFilter(width, height, Abs(benchmark_width_), \
Abs(benchmark_height_), kFilter##filter, \
benchmark_iterations_); \
@ -305,12 +307,12 @@ TEST_FACTOR(3, 1, 3)
/// Test scale to a specified size with all 4 filters.
#ifdef ENABLE_SLOW_TESTS
#define TEST_SCALETO(name, width, height) \
#define TEST_SCALETO(name, width, height) \
TEST_SCALETO1(, name, width, height, None, 0) \
TEST_SCALETO1(, name, width, height, Linear, 3) \
TEST_SCALETO1(, name, width, height, Bilinear, 3)
#else
#define TEST_SCALETO(name, width, height) \
#define TEST_SCALETO(name, width, height) \
TEST_SCALETO1(DISABLED_, name, width, height, None, 0) \
TEST_SCALETO1(DISABLED_, name, width, height, Linear, 3) \
TEST_SCALETO1(DISABLED_, name, width, height, Bilinear, 3)