diff --git a/include/libyuv/row.h b/include/libyuv/row.h index 9bb488506..5cbdaadee 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -376,7 +376,7 @@ extern "C" { #define HAS_SETROW_NEON #define HAS_SPLITRGBROW_NEON #define HAS_SPLITUVROW_NEON -#define HAS_UVToVUROW_NEON +#define HAS_SWAPUVROW_NEON #define HAS_UYVYTOARGBROW_NEON #define HAS_UYVYTOUV422ROW_NEON #define HAS_UYVYTOUVROW_NEON @@ -409,7 +409,6 @@ extern "C" { // The following are available on AArch64 platforms: #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) -#define HAS_FLOATDIVTOBYTEROW_NEON #define HAS_SCALESUMSAMPLES_NEON #endif #if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) @@ -3372,9 +3371,9 @@ void UYVYToUV422Row_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); -void UVToVURow_C(const uint8_t* src_uv, uint8_t* dst_vu, int width); -void UVToVURow_NEON(const uint8_t* src_uv, uint8_t* dst_vu, int width); -void UVToVURow_Any_NEON(const uint8_t* src_uv, uint8_t* dst_vu, int width); +void SwapUVRow_C(const uint8_t* src_uv, uint8_t* dst_vu, int width); +void SwapUVRow_NEON(const uint8_t* src_uv, uint8_t* dst_vu, int width); +void SwapUVRow_Any_NEON(const uint8_t* src_uv, uint8_t* dst_vu, int width); void AYUVToYRow_C(const uint8_t* src_ayuv, uint8_t* dst_y, int width); void AYUVToUVRow_C(const uint8_t* src_ayuv, int stride_ayuv, @@ -4018,17 +4017,6 @@ float ScaleSumSamples_NEON(const float* src, void ScaleSamples_C(const float* src, float* dst, float scale, int width); void ScaleSamples_NEON(const float* src, float* dst, float scale, int width); -void FloatDivToByteRow_C(const float* src_weights, - const float* src_values, - uint8_t* dst_out, - uint8_t* dst_mask, - int width); -void FloatDivToByteRow_NEON(const float* src_weights, - const float* src_values, - uint8_t* dst_out, - uint8_t* dst_mask, - int width); - #ifdef __cplusplus } // extern "C" } // namespace libyuv diff --git a/source/compare.cc b/source/compare.cc index 5aa3a4db8..7f4828104 100644 --- a/source/compare.cc +++ b/source/compare.cc @@ -69,13 +69,13 @@ static uint32_t ARGBDetectRow_C(const uint8_t* argb, int width) { if (argb[0] != 255) { // First byte is not Alpha of 255, so not ARGB. return FOURCC_BGRA; } - if (argb[3] != 255) { // 4th byte is not Alpha of 255, so not BGRA. + if (argb[3] != 255) { // Fourth byte is not Alpha of 255, so not BGRA. return FOURCC_ARGB; } if (argb[4] != 255) { // Second pixel first byte is not Alpha of 255. return FOURCC_BGRA; } - if (argb[7] != 255) { // Second pixel 4th byte is not Alpha of 255. + if (argb[7] != 255) { // Second pixel fourth byte is not Alpha of 255. return FOURCC_ARGB; } argb += 8; diff --git a/source/mjpeg_decoder.cc b/source/mjpeg_decoder.cc index 5c5e5eadf..80e381dd6 100644 --- a/source/mjpeg_decoder.cc +++ b/source/mjpeg_decoder.cc @@ -430,7 +430,7 @@ boolean fill_input_buffer(j_decompress_ptr cinfo) { void skip_input_data(j_decompress_ptr cinfo, long num_bytes) { // NOLINT jpeg_source_mgr* src = cinfo->src; size_t bytes = static_cast(num_bytes); - if(bytes > src->bytes_in_buffer) { + if (bytes > src->bytes_in_buffer) { src->next_input_byte = nullptr; src->bytes_in_buffer = 0; } else { diff --git a/source/planar_functions.cc b/source/planar_functions.cc index 9cab230f3..59e687afd 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -516,8 +516,8 @@ int NV21ToNV12(const uint8_t* src_y, int width, int height) { int y; - void (*UVToVURow)(const uint8_t* src_uv, uint8_t* dst_vu, int width) = - UVToVURow_C; + void (*SwapUVRow)(const uint8_t* src_uv, uint8_t* dst_vu, int width) = + SwapUVRow_C; int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; @@ -540,11 +540,11 @@ int NV21ToNV12(const uint8_t* src_y, src_stride_vu = dst_stride_uv = 0; } -#if defined(HAS_UVToVUROW_NEON) +#if defined(HAS_SWAPUVROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { - UVToVURow = UVToVURow_Any_NEON; + SwapUVRow = SwapUVRow_Any_NEON; if (IS_ALIGNED(halfwidth, 16)) { - UVToVURow = UVToVURow_NEON; + SwapUVRow = SwapUVRow_NEON; } } #endif @@ -553,7 +553,7 @@ int NV21ToNV12(const uint8_t* src_y, } for (y = 0; y < halfheight; ++y) { - UVToVURow(src_vu, dst_uv, halfwidth); + SwapUVRow(src_vu, dst_uv, halfwidth); src_vu += src_stride_vu; dst_uv += dst_stride_uv; } diff --git a/source/row_any.cc b/source/row_any.cc index 06ca723a2..ef89350ec 100644 --- a/source/row_any.cc +++ b/source/row_any.cc @@ -710,8 +710,8 @@ ANY11(UYVYToYRow_Any_MMI, UYVYToYRow_MMI, 1, 4, 1, 15) #ifdef HAS_AYUVTOYROW_NEON ANY11(AYUVToYRow_Any_NEON, AYUVToYRow_NEON, 0, 4, 1, 15) #endif -#ifdef HAS_AYUVTOYROW_NEON -ANY11(UVToVURow_Any_NEON, UVToVURow_NEON, 0, 2, 2, 15) +#ifdef HAS_SWAPUVROW_NEON +ANY11(SwapUVRow_Any_NEON, SwapUVRow_NEON, 0, 2, 2, 15) #endif #ifdef HAS_RGB24TOARGBROW_NEON ANY11(RGB24ToARGBRow_Any_NEON, RGB24ToARGBRow_NEON, 0, 3, 4, 7) diff --git a/source/row_common.cc b/source/row_common.cc index 8951d0037..8ef1b1c53 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -3319,7 +3319,7 @@ void AYUVToYRow_C(const uint8_t* src_ayuv, uint8_t* dst_y, int width) { } } -void UVToVURow_C(const uint8_t* src_uv, uint8_t* dst_vu, int width) { +void SwapUVRow_C(const uint8_t* src_uv, uint8_t* dst_vu, int width) { int x; for (x = 0; x < width; ++x) { uint8_t u = src_uv[0]; @@ -3331,19 +3331,6 @@ void UVToVURow_C(const uint8_t* src_uv, uint8_t* dst_vu, int width) { } } -// divide values by weights and provide mask to indicate weight of 0. -void FloatDivToByteRow_C(const float* src_weights, - const float* src_values, - uint8_t* dst_out, - uint8_t* dst_mask, - int width) { - int x; - for (x = 0; x < width; ++x) { - dst_out[x] = Clamp(src_values[x] / src_weights[x]); - dst_mask[x] = src_weights[x] > 0 ? 0 : 0xff; - } -} - #ifdef __cplusplus } // extern "C" } // namespace libyuv diff --git a/source/row_gcc.cc b/source/row_gcc.cc index decd3d2e4..18b6350b8 100644 --- a/source/row_gcc.cc +++ b/source/row_gcc.cc @@ -6120,24 +6120,24 @@ void I422ToYUY2Row_SSE2(const uint8_t* src_y, int width) { asm volatile( - "sub %1,%2 \n" + "sub %1,%2 \n" LABELALIGN - "1: \n" - "movq (%1),%%xmm2 \n" - "movq 0x00(%1,%2,1),%%xmm1 \n" - "add $0x8,%1 \n" - "punpcklbw %%xmm1,%%xmm2 \n" - "movdqu (%0),%%xmm0 \n" - "add $0x10,%0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklbw %%xmm2,%%xmm0 \n" - "punpckhbw %%xmm2,%%xmm1 \n" - "movdqu %%xmm0,(%3) \n" - "movdqu %%xmm1,0x10(%3) \n" - "lea 0x20(%3),%3 \n" - "sub $0x10,%4 \n" - "jg 1b \n" + "1: \n" + "movq (%1),%%xmm2 \n" + "movq 0x00(%1,%2,1),%%xmm1 \n" + "add $0x8,%1 \n" + "punpcklbw %%xmm1,%%xmm2 \n" + "movdqu (%0),%%xmm0 \n" + "add $0x10,%0 \n" + "movdqa %%xmm0,%%xmm1 \n" + "punpcklbw %%xmm2,%%xmm0 \n" + "punpckhbw %%xmm2,%%xmm1 \n" + "movdqu %%xmm0,(%3) \n" + "movdqu %%xmm1,0x10(%3) \n" + "lea 0x20(%3),%3 \n" + "sub $0x10,%4 \n" + "jg 1b \n" : "+r"(src_y), // %0 "+r"(src_u), // %1 "+r"(src_v), // %2 @@ -6156,24 +6156,24 @@ void I422ToUYVYRow_SSE2(const uint8_t* src_y, int width) { asm volatile( - "sub %1,%2 \n" + "sub %1,%2 \n" LABELALIGN - "1: \n" - "movq (%1),%%xmm2 \n" - "movq 0x00(%1,%2,1),%%xmm1 \n" - "add $0x8,%1 \n" - "punpcklbw %%xmm1,%%xmm2 \n" - "movdqu (%0),%%xmm0 \n" - "movdqa %%xmm2,%%xmm1 \n" - "add $0x10,%0 \n" - "punpcklbw %%xmm0,%%xmm1 \n" - "punpckhbw %%xmm0,%%xmm2 \n" - "movdqu %%xmm1,(%3) \n" - "movdqu %%xmm2,0x10(%3) \n" - "lea 0x20(%3),%3 \n" - "sub $0x10,%4 \n" - "jg 1b \n" + "1: \n" + "movq (%1),%%xmm2 \n" + "movq 0x00(%1,%2,1),%%xmm1 \n" + "add $0x8,%1 \n" + "punpcklbw %%xmm1,%%xmm2 \n" + "movdqu (%0),%%xmm0 \n" + "movdqa %%xmm2,%%xmm1 \n" + "add $0x10,%0 \n" + "punpcklbw %%xmm0,%%xmm1 \n" + "punpckhbw %%xmm0,%%xmm2 \n" + "movdqu %%xmm1,(%3) \n" + "movdqu %%xmm2,0x10(%3) \n" + "lea 0x20(%3),%3 \n" + "sub $0x10,%4 \n" + "jg 1b \n" : "+r"(src_y), // %0 "+r"(src_u), // %1 "+r"(src_v), // %2 @@ -6192,27 +6192,27 @@ void I422ToYUY2Row_AVX2(const uint8_t* src_y, int width) { asm volatile( - "sub %1,%2 \n" + "sub %1,%2 \n" LABELALIGN - "1: \n" - "vpmovzxbw (%1),%%ymm1 \n" - "vpmovzxbw 0x00(%1,%2,1),%%ymm2 \n" - "add $0x10,%1 \n" - "vpsllw $0x8,%%ymm2,%%ymm2 \n" - "vpor %%ymm1,%%ymm2,%%ymm2 \n" - "vmovdqu (%0),%%ymm0 \n" - "add $0x20,%0 \n" - "vpunpcklbw %%ymm2,%%ymm0,%%ymm1 \n" - "vpunpckhbw %%ymm2,%%ymm0,%%ymm2 \n" - "vextractf128 $0x0,%%ymm1,(%3) \n" - "vextractf128 $0x0,%%ymm2,0x10(%3) \n" - "vextractf128 $0x1,%%ymm1,0x20(%3) \n" - "vextractf128 $0x1,%%ymm2,0x30(%3) \n" - "lea 0x40(%3),%3 \n" - "sub $0x20,%4 \n" - "jg 1b \n" - "vzeroupper \n" + "1: \n" + "vpmovzxbw (%1),%%ymm1 \n" + "vpmovzxbw 0x00(%1,%2,1),%%ymm2 \n" + "add $0x10,%1 \n" + "vpsllw $0x8,%%ymm2,%%ymm2 \n" + "vpor %%ymm1,%%ymm2,%%ymm2 \n" + "vmovdqu (%0),%%ymm0 \n" + "add $0x20,%0 \n" + "vpunpcklbw %%ymm2,%%ymm0,%%ymm1 \n" + "vpunpckhbw %%ymm2,%%ymm0,%%ymm2 \n" + "vextractf128 $0x0,%%ymm1,(%3) \n" + "vextractf128 $0x0,%%ymm2,0x10(%3) \n" + "vextractf128 $0x1,%%ymm1,0x20(%3) \n" + "vextractf128 $0x1,%%ymm2,0x30(%3) \n" + "lea 0x40(%3),%3 \n" + "sub $0x20,%4 \n" + "jg 1b \n" + "vzeroupper \n" : "+r"(src_y), // %0 "+r"(src_u), // %1 "+r"(src_v), // %2 @@ -6231,27 +6231,27 @@ void I422ToUYVYRow_AVX2(const uint8_t* src_y, int width) { asm volatile( - "sub %1,%2 \n" + "sub %1,%2 \n" LABELALIGN - "1: \n" - "vpmovzxbw (%1),%%ymm1 \n" - "vpmovzxbw 0x00(%1,%2,1),%%ymm2 \n" - "add $0x10,%1 \n" - "vpsllw $0x8,%%ymm2,%%ymm2 \n" - "vpor %%ymm1,%%ymm2,%%ymm2 \n" - "vmovdqu (%0),%%ymm0 \n" - "add $0x20,%0 \n" - "vpunpcklbw %%ymm0,%%ymm2,%%ymm1 \n" - "vpunpckhbw %%ymm0,%%ymm2,%%ymm2 \n" - "vextractf128 $0x0,%%ymm1,(%3) \n" - "vextractf128 $0x0,%%ymm2,0x10(%3) \n" - "vextractf128 $0x1,%%ymm1,0x20(%3) \n" - "vextractf128 $0x1,%%ymm2,0x30(%3) \n" - "lea 0x40(%3),%3 \n" - "sub $0x20,%4 \n" - "jg 1b \n" - "vzeroupper \n" + "1: \n" + "vpmovzxbw (%1),%%ymm1 \n" + "vpmovzxbw 0x00(%1,%2,1),%%ymm2 \n" + "add $0x10,%1 \n" + "vpsllw $0x8,%%ymm2,%%ymm2 \n" + "vpor %%ymm1,%%ymm2,%%ymm2 \n" + "vmovdqu (%0),%%ymm0 \n" + "add $0x20,%0 \n" + "vpunpcklbw %%ymm0,%%ymm2,%%ymm1 \n" + "vpunpckhbw %%ymm0,%%ymm2,%%ymm2 \n" + "vextractf128 $0x0,%%ymm1,(%3) \n" + "vextractf128 $0x0,%%ymm2,0x10(%3) \n" + "vextractf128 $0x1,%%ymm1,0x20(%3) \n" + "vextractf128 $0x1,%%ymm2,0x30(%3) \n" + "lea 0x40(%3),%3 \n" + "sub $0x20,%4 \n" + "jg 1b \n" + "vzeroupper \n" : "+r"(src_y), // %0 "+r"(src_u), // %1 "+r"(src_v), // %2 diff --git a/source/row_neon.cc b/source/row_neon.cc index a12fa790d..09e1af11b 100644 --- a/source/row_neon.cc +++ b/source/row_neon.cc @@ -2769,7 +2769,7 @@ void NV21ToYUV24Row_NEON(const uint8_t* src_y, uint8_t* dst_yuv24, int width) { asm volatile( - "1: \n" + "1: \n" "vld1.8 {q2}, [%0]! \n" // load 16 Y values "vld2.8 {d0, d2}, [%1]! \n" // load 8 VU values "vmov d1, d0 \n" @@ -2854,7 +2854,7 @@ void AYUVToVURow_NEON(const uint8_t* src_ayuv, // Similar to ARGBExtractAlphaRow_NEON void AYUVToYRow_NEON(const uint8_t* src_ayuv, uint8_t* dst_y, int width) { asm volatile( - "1: \n" + "1: \n" "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 AYUV pixels "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 AYUV pixels "subs %2, %2, #16 \n" // 16 processed per loop @@ -2868,9 +2868,9 @@ void AYUVToYRow_NEON(const uint8_t* src_ayuv, uint8_t* dst_y, int width) { } // Convert biplanar UV channel of NV12 to NV21 -void UVToVURow_NEON(const uint8_t* src_uv, uint8_t* dst_vu, int width) { +void SwapUVRow_NEON(const uint8_t* src_uv, uint8_t* dst_vu, int width) { asm volatile( - "1: \n" + "1: \n" "vld2.8 {d0, d2}, [%0]! \n" // load 16 UV values "vld2.8 {d1, d3}, [%0]! \n" "vorr.u8 q2, q0, q0 \n" // move U after V diff --git a/source/row_neon64.cc b/source/row_neon64.cc index f5cbb4701..7314282b7 100644 --- a/source/row_neon64.cc +++ b/source/row_neon64.cc @@ -2882,7 +2882,7 @@ void NV21ToYUV24Row_NEON(const uint8_t* src_y, uint8_t* dst_yuv24, int width) { asm volatile( - "1: \n" + "1: \n" "ld1 {v2.16b}, [%0], #16 \n" // load 16 Y values "ld2 {v0.8b, v1.8b}, [%1], #16 \n" // load 8 VU values "zip1 v0.16b, v0.16b, v0.16b \n" // replicate V values @@ -2905,9 +2905,8 @@ void AYUVToUVRow_NEON(const uint8_t* src_ayuv, const uint8_t* src_ayuv_1 = src_ayuv + src_stride_ayuv; asm volatile( - "1: \n" - "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 - // pixels. + "1: \n" + "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 ayuv "uaddlp v0.8h, v0.16b \n" // V 16 bytes -> 8 shorts. "uaddlp v1.8h, v1.16b \n" // U 16 bytes -> 8 shorts. "ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load next 16 @@ -2933,7 +2932,7 @@ void AYUVToVURow_NEON(const uint8_t* src_ayuv, const uint8_t* src_ayuv_1 = src_ayuv + src_stride_ayuv; asm volatile( - "1: \n" + "1: \n" "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 // pixels. "uaddlp v0.8h, v0.16b \n" // V 16 bytes -> 8 shorts. @@ -2957,7 +2956,7 @@ void AYUVToVURow_NEON(const uint8_t* src_ayuv, // Copy row of AYUV Y's into Y void AYUVToYRow_NEON(const uint8_t* src_ayuv, uint8_t* dst_y, int width) { asm volatile( - "1: \n" + "1: \n" "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 // pixels "subs %w2, %w2, #16 \n" // 16 pixels per loop @@ -2970,52 +2969,10 @@ void AYUVToYRow_NEON(const uint8_t* src_ayuv, uint8_t* dst_y, int width) { : "cc", "memory", "v0", "v1", "v2", "v3"); } -void FloatDivToByteRow_NEON(const float* src_weights, - const float* src_values, - uint8_t* dst_out, - uint8_t* dst_mask, - int width) { - asm volatile( - "movi v0.4s, #0 \n" - - "1: \n" - "ld1 {v1.4s,v2.4s}, [%0], #32 \n" // load 8 float weights - "ld1 {v3.4s,v4.4s}, [%1], #32 \n" // load 8 float values - "subs %w4, %w4, #8 \n" // 8 pixels per loop - - "fdiv v1.4s, v3.4s, v1.4s \n" // values / weights - "fdiv v2.4s, v4.4s, v2.4s \n" - - "fcvtas v1.4s, v1.4s \n" // float to int - "fcvtas v2.4s, v2.4s \n" // float to int - "uqxtn v1.4h, v1.4s \n" // 8 shorts - "uqxtn2 v1.8h, v2.4s \n" - "uqxtn v1.8b, v1.8h \n" // 8 bytes - - "st1 {v1.8b}, [%2], #8 \n" // store 8 byte out - - "fcmgt v5.4s, v1.4s, v0.4s \n" // cmp weight to zero - "fcmgt v6.4s, v2.4s, v0.4s \n" - "uqxtn v5.4h, v5.4s \n" // 8 shorts - "uqxtn2 v5.8h, v6.4s \n" - "uqxtn v5.8b, v1.8h \n" // 8 bytes - - "st1 {v5.8b}, [%3], #8 \n" // store 8 byte mask - - "b.gt 1b \n" - : "+r"(src_weights), // %0 - "+r"(src_values), // %1 - "+r"(dst_out), // %2 - "+r"(dst_mask), // %3 - "+r"(width) // %4 - : - : "cc", "memory", "v1", "v2", "v3", "v4", "v5", "v6"); -} - // Convert biplanar UV channel of NV12 to NV21 -void UVToVURow_NEON(const uint8_t* src_uv, uint8_t* dst_vu, int width) { +void SwapUVRow_NEON(const uint8_t* src_uv, uint8_t* dst_vu, int width) { asm volatile( - "1: \n" + "1: \n" "ld2 {v0.16b, v1.16b}, [%0], #32 \n" // load 16 UV values "orr v2.16b, v0.16b, v0.16b \n" // move U after V "subs %w2, %w2, #16 \n" // 16 pixels per loop diff --git a/source/row_win.cc b/source/row_win.cc index 27e3da7ba..2214d272e 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -5450,7 +5450,7 @@ void ComputeCumulativeSumRow_SSE2(const uint8_t* row, // 1 pixel loop l1: - movd xmm2, dword ptr [eax] // 1 argb pixel, 4 bytes. + movd xmm2, dword ptr [eax] // 1 argb pixel lea eax, [eax + 4] punpcklbw xmm2, xmm1 punpcklwd xmm2, xmm1 diff --git a/unit_test/convert_test.cc b/unit_test/convert_test.cc index 32a4cd1ca..31173779e 100644 --- a/unit_test/convert_test.cc +++ b/unit_test/convert_test.cc @@ -993,8 +993,7 @@ TESTATOPLANAR(I400, 1, 1, I420, 2, 2, 2) TESTATOPLANAR(J400, 1, 1, J420, 2, 2, 2) TESTATOPLANAR(RAW, 3, 1, I420, 2, 2, 4) TESTATOPLANAR(RGB24, 3, 1, I420, 2, 2, 4) -// TODO(fbarchard): Investigate J420 error of 11 on Windows. -TESTATOPLANAR(RGB24, 3, 1, J420, 2, 2, 11) +TESTATOPLANAR(RGB24, 3, 1, J420, 2, 2, 4) TESTATOPLANAR(RGB565, 2, 1, I420, 2, 2, 5) TESTATOPLANAR(RGBA, 4, 1, I420, 2, 2, 4) TESTATOPLANAR(UYVY, 2, 1, I420, 2, 2, 2) diff --git a/unit_test/planar_test.cc b/unit_test/planar_test.cc index 70f8966e0..22e48abb2 100644 --- a/unit_test/planar_test.cc +++ b/unit_test/planar_test.cc @@ -3269,88 +3269,7 @@ TEST_F(LibYUVPlanarTest, TestGaussCol_Opt) { EXPECT_EQ(dst_pixels_c[639], static_cast(30704)); } -float TestFloatDivToByte(int benchmark_width, - int benchmark_height, - int benchmark_iterations, - float scale, - bool opt) { - int i, j; - // NEON does multiple of 8, so round count up - const int kPixels = (benchmark_width * benchmark_height + 7) & ~7; - align_buffer_page_end(src_weights, kPixels * 4); - align_buffer_page_end(src_values, kPixels * 4); - align_buffer_page_end(dst_out_c, kPixels); - align_buffer_page_end(dst_out_opt, kPixels); - align_buffer_page_end(dst_mask_c, kPixels); - align_buffer_page_end(dst_mask_opt, kPixels); - - // Randomize works but may contain some denormals affecting performance. - // MemRandomize(orig_y, kPixels * 4); - // large values are problematic. audio is really -1 to 1. - for (i = 0; i < kPixels; ++i) { - (reinterpret_cast(src_weights))[i] = scale; - (reinterpret_cast(src_values))[i] = - sinf(static_cast(i) * 0.1f); - } - memset(dst_out_c, 0, kPixels); - memset(dst_out_opt, 1, kPixels); - memset(dst_mask_c, 2, kPixels); - memset(dst_mask_opt, 3, kPixels); - - FloatDivToByteRow_C(reinterpret_cast(src_weights), - reinterpret_cast(src_values), dst_out_c, - dst_mask_c, kPixels); - - for (j = 0; j < benchmark_iterations; j++) { - if (opt) { -#ifdef HAS_FLOATDIVTOBYTEROW_NEON - FloatDivToByteRow_NEON(reinterpret_cast(src_weights), - reinterpret_cast(src_values), dst_out_opt, - dst_mask_opt, kPixels); -#else - FloatDivToByteRow_C(reinterpret_cast(src_weights), - reinterpret_cast(src_values), dst_out_opt, - dst_mask_opt, kPixels); -#endif - } else { - FloatDivToByteRow_C(reinterpret_cast(src_weights), - reinterpret_cast(src_values), dst_out_opt, - dst_mask_opt, kPixels); - } - } - - uint8_t max_diff = 0; - for (i = 0; i < kPixels; ++i) { - uint8_t abs_diff = abs(dst_out_c[i] - dst_out_opt[i]) + - abs(dst_mask_c[i] - dst_mask_opt[i]); - if (abs_diff > max_diff) { - max_diff = abs_diff; - } - } - - free_aligned_buffer_page_end(src_weights); - free_aligned_buffer_page_end(src_values); - free_aligned_buffer_page_end(dst_out_c); - free_aligned_buffer_page_end(dst_out_opt); - free_aligned_buffer_page_end(dst_mask_c); - free_aligned_buffer_page_end(dst_mask_opt); - - return max_diff; -} - -TEST_F(LibYUVPlanarTest, TestFloatDivToByte_C) { - float diff = TestFloatDivToByte(benchmark_width_, benchmark_height_, - benchmark_iterations_, 1.2f, false); - EXPECT_EQ(0, diff); -} - -TEST_F(LibYUVPlanarTest, TestFloatDivToByte_Opt) { - float diff = TestFloatDivToByte(benchmark_width_, benchmark_height_, - benchmark_iterations_, 1.2f, true); - EXPECT_EQ(0, diff); -} - -TEST_F(LibYUVPlanarTest, UVToVURow) { +TEST_F(LibYUVPlanarTest, SwapUVRow) { const int kPixels = benchmark_width_ * benchmark_height_; align_buffer_page_end(src_pixels_vu, kPixels * 2); align_buffer_page_end(dst_pixels_uv, kPixels * 2); @@ -3358,7 +3277,7 @@ TEST_F(LibYUVPlanarTest, UVToVURow) { MemRandomize(src_pixels_vu, kPixels * 2); memset(dst_pixels_uv, 1, kPixels * 2); - UVToVURow_C(src_pixels_vu, dst_pixels_uv, kPixels); + SwapUVRow_C(src_pixels_vu, dst_pixels_uv, kPixels); for (int i = 0; i < kPixels; ++i) { EXPECT_EQ(dst_pixels_uv[i * 2 + 0], src_pixels_vu[i * 2 + 1]);