Fix arm unittest failure by removing unused FloatDivToByteRow.

Apply clang-format to fix jpeg if() for lint fix.
Change comments about 4th pixel for open source compliance.
Rename UVToVU to SwapUV for consistency with MergeUV.

BUG=b/135532289, b/136515133

Change-Id: I9ce377c57b1d4d8f8b373c4cb44cd3f836300f79
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/1685936
Reviewed-by: Chong Zhang <chz@google.com>
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
Frank Barchard 2019-07-02 12:46:12 -07:00 committed by Frank Barchard
parent c6dcbdfaac
commit f9aacffa02
12 changed files with 101 additions and 251 deletions

View File

@ -376,7 +376,7 @@ extern "C" {
#define HAS_SETROW_NEON
#define HAS_SPLITRGBROW_NEON
#define HAS_SPLITUVROW_NEON
#define HAS_UVToVUROW_NEON
#define HAS_SWAPUVROW_NEON
#define HAS_UYVYTOARGBROW_NEON
#define HAS_UYVYTOUV422ROW_NEON
#define HAS_UYVYTOUVROW_NEON
@ -409,7 +409,6 @@ extern "C" {
// The following are available on AArch64 platforms:
#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
#define HAS_FLOATDIVTOBYTEROW_NEON
#define HAS_SCALESUMSAMPLES_NEON
#endif
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
@ -3372,9 +3371,9 @@ void UYVYToUV422Row_Any_MMI(const uint8_t* src_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void UVToVURow_C(const uint8_t* src_uv, uint8_t* dst_vu, int width);
void UVToVURow_NEON(const uint8_t* src_uv, uint8_t* dst_vu, int width);
void UVToVURow_Any_NEON(const uint8_t* src_uv, uint8_t* dst_vu, int width);
void SwapUVRow_C(const uint8_t* src_uv, uint8_t* dst_vu, int width);
void SwapUVRow_NEON(const uint8_t* src_uv, uint8_t* dst_vu, int width);
void SwapUVRow_Any_NEON(const uint8_t* src_uv, uint8_t* dst_vu, int width);
void AYUVToYRow_C(const uint8_t* src_ayuv, uint8_t* dst_y, int width);
void AYUVToUVRow_C(const uint8_t* src_ayuv,
int stride_ayuv,
@ -4018,17 +4017,6 @@ float ScaleSumSamples_NEON(const float* src,
void ScaleSamples_C(const float* src, float* dst, float scale, int width);
void ScaleSamples_NEON(const float* src, float* dst, float scale, int width);
void FloatDivToByteRow_C(const float* src_weights,
const float* src_values,
uint8_t* dst_out,
uint8_t* dst_mask,
int width);
void FloatDivToByteRow_NEON(const float* src_weights,
const float* src_values,
uint8_t* dst_out,
uint8_t* dst_mask,
int width);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv

View File

@ -69,13 +69,13 @@ static uint32_t ARGBDetectRow_C(const uint8_t* argb, int width) {
if (argb[0] != 255) { // First byte is not Alpha of 255, so not ARGB.
return FOURCC_BGRA;
}
if (argb[3] != 255) { // 4th byte is not Alpha of 255, so not BGRA.
if (argb[3] != 255) { // Fourth byte is not Alpha of 255, so not BGRA.
return FOURCC_ARGB;
}
if (argb[4] != 255) { // Second pixel first byte is not Alpha of 255.
return FOURCC_BGRA;
}
if (argb[7] != 255) { // Second pixel 4th byte is not Alpha of 255.
if (argb[7] != 255) { // Second pixel fourth byte is not Alpha of 255.
return FOURCC_ARGB;
}
argb += 8;

View File

@ -430,7 +430,7 @@ boolean fill_input_buffer(j_decompress_ptr cinfo) {
void skip_input_data(j_decompress_ptr cinfo, long num_bytes) { // NOLINT
jpeg_source_mgr* src = cinfo->src;
size_t bytes = static_cast<size_t>(num_bytes);
if(bytes > src->bytes_in_buffer) {
if (bytes > src->bytes_in_buffer) {
src->next_input_byte = nullptr;
src->bytes_in_buffer = 0;
} else {

View File

@ -516,8 +516,8 @@ int NV21ToNV12(const uint8_t* src_y,
int width,
int height) {
int y;
void (*UVToVURow)(const uint8_t* src_uv, uint8_t* dst_vu, int width) =
UVToVURow_C;
void (*SwapUVRow)(const uint8_t* src_uv, uint8_t* dst_vu, int width) =
SwapUVRow_C;
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
@ -540,11 +540,11 @@ int NV21ToNV12(const uint8_t* src_y,
src_stride_vu = dst_stride_uv = 0;
}
#if defined(HAS_UVToVUROW_NEON)
#if defined(HAS_SWAPUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
UVToVURow = UVToVURow_Any_NEON;
SwapUVRow = SwapUVRow_Any_NEON;
if (IS_ALIGNED(halfwidth, 16)) {
UVToVURow = UVToVURow_NEON;
SwapUVRow = SwapUVRow_NEON;
}
}
#endif
@ -553,7 +553,7 @@ int NV21ToNV12(const uint8_t* src_y,
}
for (y = 0; y < halfheight; ++y) {
UVToVURow(src_vu, dst_uv, halfwidth);
SwapUVRow(src_vu, dst_uv, halfwidth);
src_vu += src_stride_vu;
dst_uv += dst_stride_uv;
}

View File

@ -710,8 +710,8 @@ ANY11(UYVYToYRow_Any_MMI, UYVYToYRow_MMI, 1, 4, 1, 15)
#ifdef HAS_AYUVTOYROW_NEON
ANY11(AYUVToYRow_Any_NEON, AYUVToYRow_NEON, 0, 4, 1, 15)
#endif
#ifdef HAS_AYUVTOYROW_NEON
ANY11(UVToVURow_Any_NEON, UVToVURow_NEON, 0, 2, 2, 15)
#ifdef HAS_SWAPUVROW_NEON
ANY11(SwapUVRow_Any_NEON, SwapUVRow_NEON, 0, 2, 2, 15)
#endif
#ifdef HAS_RGB24TOARGBROW_NEON
ANY11(RGB24ToARGBRow_Any_NEON, RGB24ToARGBRow_NEON, 0, 3, 4, 7)

View File

@ -3319,7 +3319,7 @@ void AYUVToYRow_C(const uint8_t* src_ayuv, uint8_t* dst_y, int width) {
}
}
void UVToVURow_C(const uint8_t* src_uv, uint8_t* dst_vu, int width) {
void SwapUVRow_C(const uint8_t* src_uv, uint8_t* dst_vu, int width) {
int x;
for (x = 0; x < width; ++x) {
uint8_t u = src_uv[0];
@ -3331,19 +3331,6 @@ void UVToVURow_C(const uint8_t* src_uv, uint8_t* dst_vu, int width) {
}
}
// divide values by weights and provide mask to indicate weight of 0.
void FloatDivToByteRow_C(const float* src_weights,
const float* src_values,
uint8_t* dst_out,
uint8_t* dst_mask,
int width) {
int x;
for (x = 0; x < width; ++x) {
dst_out[x] = Clamp(src_values[x] / src_weights[x]);
dst_mask[x] = src_weights[x] > 0 ? 0 : 0xff;
}
}
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv

View File

@ -6120,24 +6120,24 @@ void I422ToYUY2Row_SSE2(const uint8_t* src_y,
int width) {
asm volatile(
"sub %1,%2 \n"
"sub %1,%2 \n"
LABELALIGN
"1: \n"
"movq (%1),%%xmm2 \n"
"movq 0x00(%1,%2,1),%%xmm1 \n"
"add $0x8,%1 \n"
"punpcklbw %%xmm1,%%xmm2 \n"
"movdqu (%0),%%xmm0 \n"
"add $0x10,%0 \n"
"movdqa %%xmm0,%%xmm1 \n"
"punpcklbw %%xmm2,%%xmm0 \n"
"punpckhbw %%xmm2,%%xmm1 \n"
"movdqu %%xmm0,(%3) \n"
"movdqu %%xmm1,0x10(%3) \n"
"lea 0x20(%3),%3 \n"
"sub $0x10,%4 \n"
"jg 1b \n"
"1: \n"
"movq (%1),%%xmm2 \n"
"movq 0x00(%1,%2,1),%%xmm1 \n"
"add $0x8,%1 \n"
"punpcklbw %%xmm1,%%xmm2 \n"
"movdqu (%0),%%xmm0 \n"
"add $0x10,%0 \n"
"movdqa %%xmm0,%%xmm1 \n"
"punpcklbw %%xmm2,%%xmm0 \n"
"punpckhbw %%xmm2,%%xmm1 \n"
"movdqu %%xmm0,(%3) \n"
"movdqu %%xmm1,0x10(%3) \n"
"lea 0x20(%3),%3 \n"
"sub $0x10,%4 \n"
"jg 1b \n"
: "+r"(src_y), // %0
"+r"(src_u), // %1
"+r"(src_v), // %2
@ -6156,24 +6156,24 @@ void I422ToUYVYRow_SSE2(const uint8_t* src_y,
int width) {
asm volatile(
"sub %1,%2 \n"
"sub %1,%2 \n"
LABELALIGN
"1: \n"
"movq (%1),%%xmm2 \n"
"movq 0x00(%1,%2,1),%%xmm1 \n"
"add $0x8,%1 \n"
"punpcklbw %%xmm1,%%xmm2 \n"
"movdqu (%0),%%xmm0 \n"
"movdqa %%xmm2,%%xmm1 \n"
"add $0x10,%0 \n"
"punpcklbw %%xmm0,%%xmm1 \n"
"punpckhbw %%xmm0,%%xmm2 \n"
"movdqu %%xmm1,(%3) \n"
"movdqu %%xmm2,0x10(%3) \n"
"lea 0x20(%3),%3 \n"
"sub $0x10,%4 \n"
"jg 1b \n"
"1: \n"
"movq (%1),%%xmm2 \n"
"movq 0x00(%1,%2,1),%%xmm1 \n"
"add $0x8,%1 \n"
"punpcklbw %%xmm1,%%xmm2 \n"
"movdqu (%0),%%xmm0 \n"
"movdqa %%xmm2,%%xmm1 \n"
"add $0x10,%0 \n"
"punpcklbw %%xmm0,%%xmm1 \n"
"punpckhbw %%xmm0,%%xmm2 \n"
"movdqu %%xmm1,(%3) \n"
"movdqu %%xmm2,0x10(%3) \n"
"lea 0x20(%3),%3 \n"
"sub $0x10,%4 \n"
"jg 1b \n"
: "+r"(src_y), // %0
"+r"(src_u), // %1
"+r"(src_v), // %2
@ -6192,27 +6192,27 @@ void I422ToYUY2Row_AVX2(const uint8_t* src_y,
int width) {
asm volatile(
"sub %1,%2 \n"
"sub %1,%2 \n"
LABELALIGN
"1: \n"
"vpmovzxbw (%1),%%ymm1 \n"
"vpmovzxbw 0x00(%1,%2,1),%%ymm2 \n"
"add $0x10,%1 \n"
"vpsllw $0x8,%%ymm2,%%ymm2 \n"
"vpor %%ymm1,%%ymm2,%%ymm2 \n"
"vmovdqu (%0),%%ymm0 \n"
"add $0x20,%0 \n"
"vpunpcklbw %%ymm2,%%ymm0,%%ymm1 \n"
"vpunpckhbw %%ymm2,%%ymm0,%%ymm2 \n"
"vextractf128 $0x0,%%ymm1,(%3) \n"
"vextractf128 $0x0,%%ymm2,0x10(%3) \n"
"vextractf128 $0x1,%%ymm1,0x20(%3) \n"
"vextractf128 $0x1,%%ymm2,0x30(%3) \n"
"lea 0x40(%3),%3 \n"
"sub $0x20,%4 \n"
"jg 1b \n"
"vzeroupper \n"
"1: \n"
"vpmovzxbw (%1),%%ymm1 \n"
"vpmovzxbw 0x00(%1,%2,1),%%ymm2 \n"
"add $0x10,%1 \n"
"vpsllw $0x8,%%ymm2,%%ymm2 \n"
"vpor %%ymm1,%%ymm2,%%ymm2 \n"
"vmovdqu (%0),%%ymm0 \n"
"add $0x20,%0 \n"
"vpunpcklbw %%ymm2,%%ymm0,%%ymm1 \n"
"vpunpckhbw %%ymm2,%%ymm0,%%ymm2 \n"
"vextractf128 $0x0,%%ymm1,(%3) \n"
"vextractf128 $0x0,%%ymm2,0x10(%3) \n"
"vextractf128 $0x1,%%ymm1,0x20(%3) \n"
"vextractf128 $0x1,%%ymm2,0x30(%3) \n"
"lea 0x40(%3),%3 \n"
"sub $0x20,%4 \n"
"jg 1b \n"
"vzeroupper \n"
: "+r"(src_y), // %0
"+r"(src_u), // %1
"+r"(src_v), // %2
@ -6231,27 +6231,27 @@ void I422ToUYVYRow_AVX2(const uint8_t* src_y,
int width) {
asm volatile(
"sub %1,%2 \n"
"sub %1,%2 \n"
LABELALIGN
"1: \n"
"vpmovzxbw (%1),%%ymm1 \n"
"vpmovzxbw 0x00(%1,%2,1),%%ymm2 \n"
"add $0x10,%1 \n"
"vpsllw $0x8,%%ymm2,%%ymm2 \n"
"vpor %%ymm1,%%ymm2,%%ymm2 \n"
"vmovdqu (%0),%%ymm0 \n"
"add $0x20,%0 \n"
"vpunpcklbw %%ymm0,%%ymm2,%%ymm1 \n"
"vpunpckhbw %%ymm0,%%ymm2,%%ymm2 \n"
"vextractf128 $0x0,%%ymm1,(%3) \n"
"vextractf128 $0x0,%%ymm2,0x10(%3) \n"
"vextractf128 $0x1,%%ymm1,0x20(%3) \n"
"vextractf128 $0x1,%%ymm2,0x30(%3) \n"
"lea 0x40(%3),%3 \n"
"sub $0x20,%4 \n"
"jg 1b \n"
"vzeroupper \n"
"1: \n"
"vpmovzxbw (%1),%%ymm1 \n"
"vpmovzxbw 0x00(%1,%2,1),%%ymm2 \n"
"add $0x10,%1 \n"
"vpsllw $0x8,%%ymm2,%%ymm2 \n"
"vpor %%ymm1,%%ymm2,%%ymm2 \n"
"vmovdqu (%0),%%ymm0 \n"
"add $0x20,%0 \n"
"vpunpcklbw %%ymm0,%%ymm2,%%ymm1 \n"
"vpunpckhbw %%ymm0,%%ymm2,%%ymm2 \n"
"vextractf128 $0x0,%%ymm1,(%3) \n"
"vextractf128 $0x0,%%ymm2,0x10(%3) \n"
"vextractf128 $0x1,%%ymm1,0x20(%3) \n"
"vextractf128 $0x1,%%ymm2,0x30(%3) \n"
"lea 0x40(%3),%3 \n"
"sub $0x20,%4 \n"
"jg 1b \n"
"vzeroupper \n"
: "+r"(src_y), // %0
"+r"(src_u), // %1
"+r"(src_v), // %2

View File

@ -2769,7 +2769,7 @@ void NV21ToYUV24Row_NEON(const uint8_t* src_y,
uint8_t* dst_yuv24,
int width) {
asm volatile(
"1: \n"
"1: \n"
"vld1.8 {q2}, [%0]! \n" // load 16 Y values
"vld2.8 {d0, d2}, [%1]! \n" // load 8 VU values
"vmov d1, d0 \n"
@ -2854,7 +2854,7 @@ void AYUVToVURow_NEON(const uint8_t* src_ayuv,
// Similar to ARGBExtractAlphaRow_NEON
void AYUVToYRow_NEON(const uint8_t* src_ayuv, uint8_t* dst_y, int width) {
asm volatile(
"1: \n"
"1: \n"
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 AYUV pixels
"vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 AYUV pixels
"subs %2, %2, #16 \n" // 16 processed per loop
@ -2868,9 +2868,9 @@ void AYUVToYRow_NEON(const uint8_t* src_ayuv, uint8_t* dst_y, int width) {
}
// Convert biplanar UV channel of NV12 to NV21
void UVToVURow_NEON(const uint8_t* src_uv, uint8_t* dst_vu, int width) {
void SwapUVRow_NEON(const uint8_t* src_uv, uint8_t* dst_vu, int width) {
asm volatile(
"1: \n"
"1: \n"
"vld2.8 {d0, d2}, [%0]! \n" // load 16 UV values
"vld2.8 {d1, d3}, [%0]! \n"
"vorr.u8 q2, q0, q0 \n" // move U after V

View File

@ -2882,7 +2882,7 @@ void NV21ToYUV24Row_NEON(const uint8_t* src_y,
uint8_t* dst_yuv24,
int width) {
asm volatile(
"1: \n"
"1: \n"
"ld1 {v2.16b}, [%0], #16 \n" // load 16 Y values
"ld2 {v0.8b, v1.8b}, [%1], #16 \n" // load 8 VU values
"zip1 v0.16b, v0.16b, v0.16b \n" // replicate V values
@ -2905,9 +2905,8 @@ void AYUVToUVRow_NEON(const uint8_t* src_ayuv,
const uint8_t* src_ayuv_1 = src_ayuv + src_stride_ayuv;
asm volatile(
"1: \n"
"ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16
// pixels.
"1: \n"
"ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 ayuv
"uaddlp v0.8h, v0.16b \n" // V 16 bytes -> 8 shorts.
"uaddlp v1.8h, v1.16b \n" // U 16 bytes -> 8 shorts.
"ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load next 16
@ -2933,7 +2932,7 @@ void AYUVToVURow_NEON(const uint8_t* src_ayuv,
const uint8_t* src_ayuv_1 = src_ayuv + src_stride_ayuv;
asm volatile(
"1: \n"
"1: \n"
"ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16
// pixels.
"uaddlp v0.8h, v0.16b \n" // V 16 bytes -> 8 shorts.
@ -2957,7 +2956,7 @@ void AYUVToVURow_NEON(const uint8_t* src_ayuv,
// Copy row of AYUV Y's into Y
void AYUVToYRow_NEON(const uint8_t* src_ayuv, uint8_t* dst_y, int width) {
asm volatile(
"1: \n"
"1: \n"
"ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16
// pixels
"subs %w2, %w2, #16 \n" // 16 pixels per loop
@ -2970,52 +2969,10 @@ void AYUVToYRow_NEON(const uint8_t* src_ayuv, uint8_t* dst_y, int width) {
: "cc", "memory", "v0", "v1", "v2", "v3");
}
void FloatDivToByteRow_NEON(const float* src_weights,
const float* src_values,
uint8_t* dst_out,
uint8_t* dst_mask,
int width) {
asm volatile(
"movi v0.4s, #0 \n"
"1: \n"
"ld1 {v1.4s,v2.4s}, [%0], #32 \n" // load 8 float weights
"ld1 {v3.4s,v4.4s}, [%1], #32 \n" // load 8 float values
"subs %w4, %w4, #8 \n" // 8 pixels per loop
"fdiv v1.4s, v3.4s, v1.4s \n" // values / weights
"fdiv v2.4s, v4.4s, v2.4s \n"
"fcvtas v1.4s, v1.4s \n" // float to int
"fcvtas v2.4s, v2.4s \n" // float to int
"uqxtn v1.4h, v1.4s \n" // 8 shorts
"uqxtn2 v1.8h, v2.4s \n"
"uqxtn v1.8b, v1.8h \n" // 8 bytes
"st1 {v1.8b}, [%2], #8 \n" // store 8 byte out
"fcmgt v5.4s, v1.4s, v0.4s \n" // cmp weight to zero
"fcmgt v6.4s, v2.4s, v0.4s \n"
"uqxtn v5.4h, v5.4s \n" // 8 shorts
"uqxtn2 v5.8h, v6.4s \n"
"uqxtn v5.8b, v1.8h \n" // 8 bytes
"st1 {v5.8b}, [%3], #8 \n" // store 8 byte mask
"b.gt 1b \n"
: "+r"(src_weights), // %0
"+r"(src_values), // %1
"+r"(dst_out), // %2
"+r"(dst_mask), // %3
"+r"(width) // %4
:
: "cc", "memory", "v1", "v2", "v3", "v4", "v5", "v6");
}
// Convert biplanar UV channel of NV12 to NV21
void UVToVURow_NEON(const uint8_t* src_uv, uint8_t* dst_vu, int width) {
void SwapUVRow_NEON(const uint8_t* src_uv, uint8_t* dst_vu, int width) {
asm volatile(
"1: \n"
"1: \n"
"ld2 {v0.16b, v1.16b}, [%0], #32 \n" // load 16 UV values
"orr v2.16b, v0.16b, v0.16b \n" // move U after V
"subs %w2, %w2, #16 \n" // 16 pixels per loop

View File

@ -5450,7 +5450,7 @@ void ComputeCumulativeSumRow_SSE2(const uint8_t* row,
// 1 pixel loop
l1:
movd xmm2, dword ptr [eax] // 1 argb pixel, 4 bytes.
movd xmm2, dword ptr [eax] // 1 argb pixel
lea eax, [eax + 4]
punpcklbw xmm2, xmm1
punpcklwd xmm2, xmm1

View File

@ -993,8 +993,7 @@ TESTATOPLANAR(I400, 1, 1, I420, 2, 2, 2)
TESTATOPLANAR(J400, 1, 1, J420, 2, 2, 2)
TESTATOPLANAR(RAW, 3, 1, I420, 2, 2, 4)
TESTATOPLANAR(RGB24, 3, 1, I420, 2, 2, 4)
// TODO(fbarchard): Investigate J420 error of 11 on Windows.
TESTATOPLANAR(RGB24, 3, 1, J420, 2, 2, 11)
TESTATOPLANAR(RGB24, 3, 1, J420, 2, 2, 4)
TESTATOPLANAR(RGB565, 2, 1, I420, 2, 2, 5)
TESTATOPLANAR(RGBA, 4, 1, I420, 2, 2, 4)
TESTATOPLANAR(UYVY, 2, 1, I420, 2, 2, 2)

View File

@ -3269,88 +3269,7 @@ TEST_F(LibYUVPlanarTest, TestGaussCol_Opt) {
EXPECT_EQ(dst_pixels_c[639], static_cast<uint32_t>(30704));
}
float TestFloatDivToByte(int benchmark_width,
int benchmark_height,
int benchmark_iterations,
float scale,
bool opt) {
int i, j;
// NEON does multiple of 8, so round count up
const int kPixels = (benchmark_width * benchmark_height + 7) & ~7;
align_buffer_page_end(src_weights, kPixels * 4);
align_buffer_page_end(src_values, kPixels * 4);
align_buffer_page_end(dst_out_c, kPixels);
align_buffer_page_end(dst_out_opt, kPixels);
align_buffer_page_end(dst_mask_c, kPixels);
align_buffer_page_end(dst_mask_opt, kPixels);
// Randomize works but may contain some denormals affecting performance.
// MemRandomize(orig_y, kPixels * 4);
// large values are problematic. audio is really -1 to 1.
for (i = 0; i < kPixels; ++i) {
(reinterpret_cast<float*>(src_weights))[i] = scale;
(reinterpret_cast<float*>(src_values))[i] =
sinf(static_cast<float>(i) * 0.1f);
}
memset(dst_out_c, 0, kPixels);
memset(dst_out_opt, 1, kPixels);
memset(dst_mask_c, 2, kPixels);
memset(dst_mask_opt, 3, kPixels);
FloatDivToByteRow_C(reinterpret_cast<float*>(src_weights),
reinterpret_cast<float*>(src_values), dst_out_c,
dst_mask_c, kPixels);
for (j = 0; j < benchmark_iterations; j++) {
if (opt) {
#ifdef HAS_FLOATDIVTOBYTEROW_NEON
FloatDivToByteRow_NEON(reinterpret_cast<float*>(src_weights),
reinterpret_cast<float*>(src_values), dst_out_opt,
dst_mask_opt, kPixels);
#else
FloatDivToByteRow_C(reinterpret_cast<float*>(src_weights),
reinterpret_cast<float*>(src_values), dst_out_opt,
dst_mask_opt, kPixels);
#endif
} else {
FloatDivToByteRow_C(reinterpret_cast<float*>(src_weights),
reinterpret_cast<float*>(src_values), dst_out_opt,
dst_mask_opt, kPixels);
}
}
uint8_t max_diff = 0;
for (i = 0; i < kPixels; ++i) {
uint8_t abs_diff = abs(dst_out_c[i] - dst_out_opt[i]) +
abs(dst_mask_c[i] - dst_mask_opt[i]);
if (abs_diff > max_diff) {
max_diff = abs_diff;
}
}
free_aligned_buffer_page_end(src_weights);
free_aligned_buffer_page_end(src_values);
free_aligned_buffer_page_end(dst_out_c);
free_aligned_buffer_page_end(dst_out_opt);
free_aligned_buffer_page_end(dst_mask_c);
free_aligned_buffer_page_end(dst_mask_opt);
return max_diff;
}
TEST_F(LibYUVPlanarTest, TestFloatDivToByte_C) {
float diff = TestFloatDivToByte(benchmark_width_, benchmark_height_,
benchmark_iterations_, 1.2f, false);
EXPECT_EQ(0, diff);
}
TEST_F(LibYUVPlanarTest, TestFloatDivToByte_Opt) {
float diff = TestFloatDivToByte(benchmark_width_, benchmark_height_,
benchmark_iterations_, 1.2f, true);
EXPECT_EQ(0, diff);
}
TEST_F(LibYUVPlanarTest, UVToVURow) {
TEST_F(LibYUVPlanarTest, SwapUVRow) {
const int kPixels = benchmark_width_ * benchmark_height_;
align_buffer_page_end(src_pixels_vu, kPixels * 2);
align_buffer_page_end(dst_pixels_uv, kPixels * 2);
@ -3358,7 +3277,7 @@ TEST_F(LibYUVPlanarTest, UVToVURow) {
MemRandomize(src_pixels_vu, kPixels * 2);
memset(dst_pixels_uv, 1, kPixels * 2);
UVToVURow_C(src_pixels_vu, dst_pixels_uv, kPixels);
SwapUVRow_C(src_pixels_vu, dst_pixels_uv, kPixels);
for (int i = 0; i < kPixels; ++i) {
EXPECT_EQ(dst_pixels_uv[i * 2 + 0], src_pixels_vu[i * 2 + 1]);