From f1c00932dfbc6bec9a48da90ca36ebd0101f16ce Mon Sep 17 00:00:00 2001 From: Frank Barchard Date: Wed, 17 Jul 2019 16:05:26 -0700 Subject: [PATCH] NV21 unittest and benchmark BUG=libyuv:809 Change-Id: I75afb5612dcd05820479848a90ad16b07a7981bc Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/1707229 Reviewed-by: richard winterton Commit-Queue: Frank Barchard --- README.chromium | 2 +- docs/rotation.md | 4 ++ include/libyuv/planar_functions.h | 9 ++++ include/libyuv/version.h | 2 +- source/planar_functions.cc | 76 ++++++++++++++++------------- unit_test/convert_test.cc | 79 ++++++++++++++++--------------- unit_test/planar_test.cc | 77 +++++++++++++++++++++++++----- 7 files changed, 162 insertions(+), 87 deletions(-) diff --git a/README.chromium b/README.chromium index bddc20238..41eae983b 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1732 +Version: 1733 License: BSD License File: LICENSE diff --git a/docs/rotation.md b/docs/rotation.md index fb84fce5a..a08430fde 100644 --- a/docs/rotation.md +++ b/docs/rotation.md @@ -100,4 +100,8 @@ Inverting can be achieved with almost any libyuv function by passing a negative I420Mirror and ARGBMirror can also be used to rotate by 180 degrees by passing a negative height. +# Cropping - Vertical Flip +When cropping from a subsampled format like NV21, the method of setting the start pointers wont work for odd crop start y on the UV plane. +If the height after cropping will be odd, invert the source - point to the last row, negate the strides, and pass negative height, which +will re-invert the image as the conversion outputs. diff --git a/include/libyuv/planar_functions.h b/include/libyuv/planar_functions.h index f6f5b3edd..5299fe2c0 100644 --- a/include/libyuv/planar_functions.h +++ b/include/libyuv/planar_functions.h @@ -105,6 +105,15 @@ void MergeUVPlane(const uint8_t* src_u, int width, int height); +// Swap U and V channels in interleaved UV plane. +LIBYUV_API +void SwapUVPlane(const uint8_t* src_uv, + int src_stride_uv, + uint8_t* dst_vu, + int dst_stride_vu, + int width, + int height); + // Split interleaved RGB plane into separate R, G and B planes. LIBYUV_API void SplitRGBPlane(const uint8_t* src_rgb, diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 741ef34df..0f245bf9a 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1732 +#define LIBYUV_VERSION 1733 #endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/source/planar_functions.cc b/source/planar_functions.cc index 59e687afd..d739baa14 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -503,6 +503,46 @@ void MergeUVPlane(const uint8_t* src_u, } } +// Swap U and V channels in interleaved UV plane. +LIBYUV_API +void SwapUVPlane(const uint8_t* src_uv, + int src_stride_uv, + uint8_t* dst_vu, + int dst_stride_vu, + int width, + int height) { + int y; + void (*SwapUVRow)(const uint8_t* src_uv, uint8_t* dst_vu, int width) = + SwapUVRow_C; + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_uv = src_uv + (height - 1) * src_stride_uv; + src_stride_uv = -src_stride_uv; + } + // Coalesce rows. + if (src_stride_uv == width * 2 && dst_stride_vu == width * 2) { + width *= height; + height = 1; + src_stride_uv = dst_stride_vu = 0; + } + +#if defined(HAS_SWAPUVROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + SwapUVRow = SwapUVRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + SwapUVRow = SwapUVRow_NEON; + } + } +#endif + + for (y = 0; y < height; ++y) { + SwapUVRow(src_uv, dst_vu, width); + src_uv += src_stride_uv; + dst_vu += dst_stride_vu; + } +} + // Convert NV21 to NV12. LIBYUV_API int NV21ToNV12(const uint8_t* src_y, @@ -515,48 +555,16 @@ int NV21ToNV12(const uint8_t* src_y, int dst_stride_uv, int width, int height) { - int y; - void (*SwapUVRow)(const uint8_t* src_uv, uint8_t* dst_vu, int width) = - SwapUVRow_C; - int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; if (!src_vu || !dst_uv || width <= 0 || height == 0) { return -1; } - // Negative height means invert the image. - if (height < 0) { - height = -height; - halfheight = (height + 1) >> 1; - src_y = src_y + (height - 1) * src_stride_y; - src_vu = src_vu + (halfheight - 1) * src_stride_vu; - src_stride_y = -src_stride_y; - src_stride_vu = -src_stride_vu; - } - // Coalesce rows. - if (src_stride_vu == halfwidth * 2 && dst_stride_uv == halfwidth * 2) { - halfwidth *= halfheight; - halfheight = 1; - src_stride_vu = dst_stride_uv = 0; - } - -#if defined(HAS_SWAPUVROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - SwapUVRow = SwapUVRow_Any_NEON; - if (IS_ALIGNED(halfwidth, 16)) { - SwapUVRow = SwapUVRow_NEON; - } - } -#endif if (dst_y) { CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); } - - for (y = 0; y < halfheight; ++y) { - SwapUVRow(src_vu, dst_uv, halfwidth); - src_vu += src_stride_vu; - dst_uv += dst_stride_uv; - } + SwapUVPlane(src_vu, src_stride_vu, dst_uv, dst_stride_uv, halfwidth, + halfheight); return 0; } diff --git a/unit_test/convert_test.cc b/unit_test/convert_test.cc index 31173779e..c564ced6a 100644 --- a/unit_test/convert_test.cc +++ b/unit_test/convert_test.cc @@ -397,67 +397,68 @@ TESTPLANARTOBP(I400, 2, 2, NV21, 2, 2) #define TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, \ - OFF) \ + OFF, DOY) \ TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \ const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ const int kHeight = benchmark_height_; \ align_buffer_page_end(src_y, kWidth* kHeight + OFF); \ - align_buffer_page_end(src_uv, SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * 2 * \ + align_buffer_page_end(src_uv, 2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \ SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \ OFF); \ align_buffer_page_end(dst_y_c, kWidth* kHeight); \ - align_buffer_page_end(dst_uv_c, SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * \ + align_buffer_page_end(dst_uv_c, 2 * SUBSAMPLE(kWidth, SUBSAMP_X) * \ SUBSAMPLE(kHeight, SUBSAMP_Y)); \ align_buffer_page_end(dst_y_opt, kWidth* kHeight); \ - align_buffer_page_end(dst_uv_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * \ + align_buffer_page_end(dst_uv_opt, 2 * SUBSAMPLE(kWidth, SUBSAMP_X) * \ SUBSAMPLE(kHeight, SUBSAMP_Y)); \ for (int i = 0; i < kHeight; ++i) \ for (int j = 0; j < kWidth; ++j) \ src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \ for (int i = 0; i < SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); ++i) { \ - for (int j = 0; j < SUBSAMPLE(kWidth, SRC_SUBSAMP_X); ++j) { \ - src_uv[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) * 2 + j + 0 + OFF] = \ - (fastrand() & 0xff); \ - src_uv[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) * 2 + j + 1 + OFF] = \ + for (int j = 0; j < 2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X); ++j) { \ + src_uv[(i * 2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] = \ (fastrand() & 0xff); \ } \ } \ memset(dst_y_c, 1, kWidth* kHeight); \ memset(dst_uv_c, 2, \ - SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + 2 * SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ memset(dst_y_opt, 101, kWidth* kHeight); \ memset(dst_uv_opt, 102, \ - SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + 2 * SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ MaskCpuFlags(disable_cpu_flags_); \ SRC_FMT_PLANAR##To##FMT_PLANAR( \ src_y + OFF, kWidth, src_uv + OFF, \ - SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * 2, dst_y_c, kWidth, dst_uv_c, \ - SUBSAMPLE(kWidth, SUBSAMP_X) * 2, kWidth, NEG kHeight); \ + 2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X), DOY ? dst_y_c : NULL, kWidth, \ + dst_uv_c, 2 * SUBSAMPLE(kWidth, SUBSAMP_X), kWidth, NEG kHeight); \ MaskCpuFlags(benchmark_cpu_info_); \ for (int i = 0; i < benchmark_iterations_; ++i) { \ SRC_FMT_PLANAR##To##FMT_PLANAR( \ src_y + OFF, kWidth, src_uv + OFF, \ - SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * 2, dst_y_opt, kWidth, dst_uv_opt, \ - SUBSAMPLE(kWidth, SUBSAMP_X) * 2, kWidth, NEG kHeight); \ + 2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X), DOY ? dst_y_opt : NULL, \ + kWidth, dst_uv_opt, 2 * SUBSAMPLE(kWidth, SUBSAMP_X), kWidth, \ + NEG kHeight); \ } \ int max_diff = 0; \ - for (int i = 0; i < kHeight; ++i) { \ - for (int j = 0; j < kWidth; ++j) { \ - int abs_diff = abs(static_cast(dst_y_c[i * kWidth + j]) - \ - static_cast(dst_y_opt[i * kWidth + j])); \ - if (abs_diff > max_diff) { \ - max_diff = abs_diff; \ + if (DOY) { \ + for (int i = 0; i < kHeight; ++i) { \ + for (int j = 0; j < kWidth; ++j) { \ + int abs_diff = abs(static_cast(dst_y_c[i * kWidth + j]) - \ + static_cast(dst_y_opt[i * kWidth + j])); \ + if (abs_diff > max_diff) { \ + max_diff = abs_diff; \ + } \ } \ } \ + EXPECT_LE(max_diff, 1); \ } \ - EXPECT_LE(max_diff, 1); \ for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ - for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X) * 2; ++j) { \ + for (int j = 0; j < 2 * SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \ int abs_diff = \ abs(static_cast( \ - dst_uv_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) * 2 + j]) - \ + dst_uv_c[i * 2 * SUBSAMPLE(kWidth, SUBSAMP_X) + j]) - \ static_cast( \ - dst_uv_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) * 2 + j])); \ + dst_uv_opt[i * 2 * SUBSAMPLE(kWidth, SUBSAMP_X) + j])); \ if (abs_diff > max_diff) { \ max_diff = abs_diff; \ } \ @@ -472,21 +473,21 @@ TESTPLANARTOBP(I400, 2, 2, NV21, 2, 2) free_aligned_buffer_page_end(src_uv); \ } -#define TESTBIPLANARTOBP(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ - FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ - TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \ - SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Opt, +, 0) \ - TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \ - SUBSAMP_X, SUBSAMP_Y, benchmark_width_ - 4, _Any, +, 0) \ - TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \ - SUBSAMP_X, SUBSAMP_Y, benchmark_width, _Unaligned, +, 1) \ - TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \ - SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Invert, -, 0) \ - TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \ - SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Opt, +, 0) +#define TESTBIPLANARTOBP(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ + TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \ + SUBSAMP_X, SUBSAMP_Y, benchmark_width_ - 4, _Any, +, 0, 1) \ + TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \ + SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Unaligned, +, 1, \ + 1) \ + TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \ + SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Invert, -, 0, 1) \ + TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \ + SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Opt, +, 0, 1) \ + TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \ + SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _NullY, +, 0, 0) -// TODO(fbarchard): Fix msan on this unittest -// TESTBIPLANARTOBP(NV21, 2, 2, NV12, 2, 2) +TESTBIPLANARTOBP(NV21, 2, 2, NV12, 2, 2) #define TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, OFF, \ @@ -993,7 +994,7 @@ TESTATOPLANAR(I400, 1, 1, I420, 2, 2, 2) TESTATOPLANAR(J400, 1, 1, J420, 2, 2, 2) TESTATOPLANAR(RAW, 3, 1, I420, 2, 2, 4) TESTATOPLANAR(RGB24, 3, 1, I420, 2, 2, 4) -TESTATOPLANAR(RGB24, 3, 1, J420, 2, 2, 4) +TESTATOPLANAR(RGB24, 3, 1, J420, 2, 2, 10) TESTATOPLANAR(RGB565, 2, 1, I420, 2, 2, 5) TESTATOPLANAR(RGBA, 4, 1, I420, 2, 2, 4) TESTATOPLANAR(UYVY, 2, 1, I420, 2, 2, 2) diff --git a/unit_test/planar_test.cc b/unit_test/planar_test.cc index 22e48abb2..cb9994a49 100644 --- a/unit_test/planar_test.cc +++ b/unit_test/planar_test.cc @@ -13,6 +13,7 @@ #include // row.h defines SIMD_ALIGNED, overriding unit_test.h +// TODO(fbarchard): Remove row.h from unittests. Test public functions. #include "libyuv/row.h" /* For ScaleSumSamples_Neon */ #include "../unit_test/unit_test.h" @@ -2321,7 +2322,8 @@ TEST_F(LibYUVPlanarTest, TestARGBCopyAlpha) { } TEST_F(LibYUVPlanarTest, TestARGBExtractAlpha) { - const int kPixels = benchmark_width_ * benchmark_height_; + // Round count up to multiple of 16 + const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15; align_buffer_page_end(src_pixels, kPixels * 4); align_buffer_page_end(dst_pixels_opt, kPixels); align_buffer_page_end(dst_pixels_c, kPixels); @@ -2349,7 +2351,8 @@ TEST_F(LibYUVPlanarTest, TestARGBExtractAlpha) { } TEST_F(LibYUVPlanarTest, TestARGBCopyYToAlpha) { - const int kPixels = benchmark_width_ * benchmark_height_; + // Round count up to multiple of 16 + const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15; align_buffer_page_end(orig_pixels, kPixels); align_buffer_page_end(dst_pixels_opt, kPixels * 4); align_buffer_page_end(dst_pixels_c, kPixels * 4); @@ -2482,7 +2485,8 @@ TEST_F(LibYUVPlanarTest, SetPlane_Opt) { } TEST_F(LibYUVPlanarTest, MergeUVPlane_Opt) { - const int kPixels = benchmark_width_ * benchmark_height_; + // Round count up to multiple of 16 + const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15; align_buffer_page_end(src_pixels, kPixels * 2); align_buffer_page_end(tmp_pixels_u, kPixels); align_buffer_page_end(tmp_pixels_v, kPixels); @@ -2526,7 +2530,8 @@ TEST_F(LibYUVPlanarTest, MergeUVPlane_Opt) { } TEST_F(LibYUVPlanarTest, SplitUVPlane_Opt) { - const int kPixels = benchmark_width_ * benchmark_height_; + // Round count up to multiple of 16 + const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15; align_buffer_page_end(src_pixels, kPixels * 2); align_buffer_page_end(tmp_pixels_u, kPixels); align_buffer_page_end(tmp_pixels_v, kPixels); @@ -2568,8 +2573,39 @@ TEST_F(LibYUVPlanarTest, SplitUVPlane_Opt) { free_aligned_buffer_page_end(dst_pixels_c); } +TEST_F(LibYUVPlanarTest, SwapUVPlane_Opt) { + // Round count up to multiple of 16 + const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15; + align_buffer_page_end(src_pixels, kPixels * 2); + align_buffer_page_end(dst_pixels_opt, kPixels * 2); + align_buffer_page_end(dst_pixels_c, kPixels * 2); + + MemRandomize(src_pixels, kPixels * 2); + MemRandomize(dst_pixels_opt, kPixels * 2); + MemRandomize(dst_pixels_c, kPixels * 2); + + MaskCpuFlags(disable_cpu_flags_); + SwapUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_c, + benchmark_width_ * 2, benchmark_width_, benchmark_height_); + MaskCpuFlags(benchmark_cpu_info_); + + for (int i = 0; i < benchmark_iterations_; ++i) { + SwapUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_opt, + benchmark_width_ * 2, benchmark_width_, benchmark_height_); + } + + for (int i = 0; i < kPixels * 2; ++i) { + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + } + + free_aligned_buffer_page_end(src_pixels); + free_aligned_buffer_page_end(dst_pixels_opt); + free_aligned_buffer_page_end(dst_pixels_c); +} + TEST_F(LibYUVPlanarTest, MergeRGBPlane_Opt) { - const int kPixels = benchmark_width_ * benchmark_height_; + // Round count up to multiple of 16 + const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15; align_buffer_page_end(src_pixels, kPixels * 3); align_buffer_page_end(tmp_pixels_r, kPixels); align_buffer_page_end(tmp_pixels_g, kPixels); @@ -2617,7 +2653,8 @@ TEST_F(LibYUVPlanarTest, MergeRGBPlane_Opt) { } TEST_F(LibYUVPlanarTest, SplitRGBPlane_Opt) { - const int kPixels = benchmark_width_ * benchmark_height_; + // Round count up to multiple of 16 + const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15; align_buffer_page_end(src_pixels, kPixels * 3); align_buffer_page_end(tmp_pixels_r, kPixels); align_buffer_page_end(tmp_pixels_g, kPixels); @@ -2666,7 +2703,8 @@ TEST_F(LibYUVPlanarTest, SplitRGBPlane_Opt) { // TODO(fbarchard): improve test for platforms and cpu detect #ifdef HAS_MERGEUVROW_16_AVX2 TEST_F(LibYUVPlanarTest, MergeUVRow_16_Opt) { - const int kPixels = benchmark_width_ * benchmark_height_; + // Round count up to multiple of 16 + const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15; align_buffer_page_end(src_pixels_u, kPixels * 2); align_buffer_page_end(src_pixels_v, kPixels * 2); align_buffer_page_end(dst_pixels_uv_opt, kPixels * 2 * 2); @@ -2710,7 +2748,8 @@ TEST_F(LibYUVPlanarTest, MergeUVRow_16_Opt) { // TODO(fbarchard): Improve test for more platforms. #ifdef HAS_MULTIPLYROW_16_AVX2 TEST_F(LibYUVPlanarTest, MultiplyRow_16_Opt) { - const int kPixels = benchmark_width_ * benchmark_height_; + // Round count up to multiple of 16 + const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15; align_buffer_page_end(src_pixels_y, kPixels * 2); align_buffer_page_end(dst_pixels_y_opt, kPixels * 2); align_buffer_page_end(dst_pixels_y_c, kPixels * 2); @@ -2746,7 +2785,8 @@ TEST_F(LibYUVPlanarTest, MultiplyRow_16_Opt) { #endif // HAS_MULTIPLYROW_16_AVX2 TEST_F(LibYUVPlanarTest, Convert16To8Plane) { - const int kPixels = benchmark_width_ * benchmark_height_; + // Round count up to multiple of 16 + const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15; align_buffer_page_end(src_pixels_y, kPixels * 2); align_buffer_page_end(dst_pixels_y_opt, kPixels); align_buffer_page_end(dst_pixels_y_c, kPixels); @@ -2823,7 +2863,8 @@ TEST_F(LibYUVPlanarTest, Convert16To8Row_Opt) { #endif // HAS_CONVERT16TO8ROW_AVX2 TEST_F(LibYUVPlanarTest, Convert8To16Plane) { - const int kPixels = benchmark_width_ * benchmark_height_; + // Round count up to multiple of 16 + const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15; align_buffer_page_end(src_pixels_y, kPixels); align_buffer_page_end(dst_pixels_y_opt, kPixels * 2); align_buffer_page_end(dst_pixels_y_c, kPixels * 2); @@ -3271,14 +3312,26 @@ TEST_F(LibYUVPlanarTest, TestGaussCol_Opt) { TEST_F(LibYUVPlanarTest, SwapUVRow) { const int kPixels = benchmark_width_ * benchmark_height_; + void (*SwapUVRow)(const uint8_t* src_uv, uint8_t* dst_vu, int width) = + SwapUVRow_C; + align_buffer_page_end(src_pixels_vu, kPixels * 2); align_buffer_page_end(dst_pixels_uv, kPixels * 2); - MemRandomize(src_pixels_vu, kPixels * 2); memset(dst_pixels_uv, 1, kPixels * 2); - SwapUVRow_C(src_pixels_vu, dst_pixels_uv, kPixels); +#if defined(HAS_SWAPUVROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + SwapUVRow = SwapUVRow_Any_NEON; + if (IS_ALIGNED(kPixels, 16)) { + SwapUVRow = SwapUVRow_NEON; + } + } +#endif + for (int j = 0; j < benchmark_iterations_; j++) { + SwapUVRow(src_pixels_vu, dst_pixels_uv, kPixels); + } for (int i = 0; i < kPixels; ++i) { EXPECT_EQ(dst_pixels_uv[i * 2 + 0], src_pixels_vu[i * 2 + 1]); EXPECT_EQ(dst_pixels_uv[i * 2 + 1], src_pixels_vu[i * 2 + 0]);