From 93b1b332cd60b56ab90aea14182755e379c28a80 Mon Sep 17 00:00:00 2001 From: Frank Barchard Date: Tue, 12 Jan 2021 14:15:24 -0800 Subject: [PATCH] NV12 Bilinear upsampling bug fix Reenable InterpolateRow_AVX2 Bug: libyuv:838, b/68638384, b/176195584 Change-Id: I990fcc204d89ee9b8f5264184558a08aa21d6a9f Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/2626067 Reviewed-by: Eugene Zemtsov Reviewed-by: richard winterton Reviewed-by: Frank Barchard --- README.chromium | 2 +- include/libyuv/version.h | 2 +- source/scale_uv.cc | 19 +++---- unit_test/scale_argb_test.cc | 15 +++++ unit_test/scale_test.cc | 106 +++++++++++++++++++++++------------ unit_test/scale_uv_test.cc | 15 +++++ 6 files changed, 111 insertions(+), 48 deletions(-) diff --git a/README.chromium b/README.chromium index 4a7e30b08..578228da4 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1768 +Version: 1770 License: BSD License File: LICENSE diff --git a/include/libyuv/version.h b/include/libyuv/version.h index efaac73e3..3c632b3ab 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1768 +#define LIBYUV_VERSION 1770 #endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/source/scale_uv.cc b/source/scale_uv.cc index b0469f09b..c57df5959 100644 --- a/source/scale_uv.cc +++ b/source/scale_uv.cc @@ -492,7 +492,7 @@ static void ScaleUVBilinearUp(int src_width, #if defined(HAS_INTERPOLATEROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { InterpolateRow = InterpolateRow_Any_SSSE3; - if (IS_ALIGNED(dst_width, 4)) { + if (IS_ALIGNED(dst_width, 8)) { InterpolateRow = InterpolateRow_SSSE3; } } @@ -500,7 +500,7 @@ static void ScaleUVBilinearUp(int src_width, #if defined(HAS_INTERPOLATEROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { InterpolateRow = InterpolateRow_Any_AVX2; - if (IS_ALIGNED(dst_width, 8)) { + if (IS_ALIGNED(dst_width, 16)) { InterpolateRow = InterpolateRow_AVX2; } } @@ -508,7 +508,7 @@ static void ScaleUVBilinearUp(int src_width, #if defined(HAS_INTERPOLATEROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { InterpolateRow = InterpolateRow_Any_NEON; - if (IS_ALIGNED(dst_width, 4)) { + if (IS_ALIGNED(dst_width, 8)) { InterpolateRow = InterpolateRow_NEON; } } @@ -516,7 +516,7 @@ static void ScaleUVBilinearUp(int src_width, #if defined(HAS_INTERPOLATEROW_MMI) if (TestCpuFlag(kCpuHasMMI)) { InterpolateRow = InterpolateRow_Any_MMI; - if (IS_ALIGNED(dst_width, 2)) { + if (IS_ALIGNED(dst_width, 4)) { InterpolateRow = InterpolateRow_MMI; } } @@ -524,7 +524,7 @@ static void ScaleUVBilinearUp(int src_width, #if defined(HAS_INTERPOLATEROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { InterpolateRow = InterpolateRow_Any_MSA; - if (IS_ALIGNED(dst_width, 8)) { + if (IS_ALIGNED(dst_width, 16)) { InterpolateRow = InterpolateRow_MSA; } } @@ -540,7 +540,7 @@ static void ScaleUVBilinearUp(int src_width, #if defined(HAS_SCALEUVFILTERCOLS_NEON) if (filtering && TestCpuFlag(kCpuHasNEON)) { ScaleUVFilterCols = ScaleUVFilterCols_Any_NEON; - if (IS_ALIGNED(dst_width, 4)) { + if (IS_ALIGNED(dst_width, 8)) { ScaleUVFilterCols = ScaleUVFilterCols_NEON; } } @@ -548,7 +548,7 @@ static void ScaleUVBilinearUp(int src_width, #if defined(HAS_SCALEUVFILTERCOLS_MSA) if (filtering && TestCpuFlag(kCpuHasMSA)) { ScaleUVFilterCols = ScaleUVFilterCols_Any_MSA; - if (IS_ALIGNED(dst_width, 8)) { + if (IS_ALIGNED(dst_width, 16)) { ScaleUVFilterCols = ScaleUVFilterCols_MSA; } } @@ -561,7 +561,7 @@ static void ScaleUVBilinearUp(int src_width, #if defined(HAS_SCALEUVCOLS_NEON) if (!filtering && TestCpuFlag(kCpuHasNEON)) { ScaleUVFilterCols = ScaleUVCols_Any_NEON; - if (IS_ALIGNED(dst_width, 8)) { + if (IS_ALIGNED(dst_width, 16)) { ScaleUVFilterCols = ScaleUVCols_NEON; } } @@ -577,7 +577,7 @@ static void ScaleUVBilinearUp(int src_width, #if defined(HAS_SCALEUVCOLS_MSA) if (!filtering && TestCpuFlag(kCpuHasMSA)) { ScaleUVFilterCols = ScaleUVCols_Any_MSA; - if (IS_ALIGNED(dst_width, 4)) { + if (IS_ALIGNED(dst_width, 8)) { ScaleUVFilterCols = ScaleUVCols_MSA; } } @@ -844,7 +844,6 @@ static void ScaleUV(const uint8_t* src, dst_stride, src, dst, x, y, dy, 4, filtering); return; } - #if HAS_SCALEUVBILINEARUP if (filtering && dy < 65536) { ScaleUVBilinearUp(src_width, src_height, clip_width, clip_height, diff --git a/unit_test/scale_argb_test.cc b/unit_test/scale_argb_test.cc index 2fdf5f603..c04a236a1 100644 --- a/unit_test/scale_argb_test.cc +++ b/unit_test/scale_argb_test.cc @@ -312,6 +312,21 @@ TEST_SCALETO(ARGBScale, 1920, 1080) #undef TEST_SCALETO1 #undef TEST_SCALETO +#define TEST_SCALESWAPXY1(name, filter, max_diff) \ + TEST_F(LibYUVScaleTest, name##SwapXY_##filter) { \ + int diff = ARGBTestFilter(benchmark_width_, benchmark_height_, \ + benchmark_height_, benchmark_width_, \ + kFilter##filter, benchmark_iterations_, \ + disable_cpu_flags_, benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } + +// Test scale with swapped width and height with all 3 filters. +TEST_SCALESWAPXY1(ARGBScale, None, 0) +TEST_SCALESWAPXY1(ARGBScale, Linear, 0) +TEST_SCALESWAPXY1(ARGBScale, Bilinear, 0) +#undef TEST_SCALESWAPXY1 + // Scale with YUV conversion to ARGB and clipping. // TODO(fbarchard): Add fourcc support. All 4 ARGB formats is easy to support. LIBYUV_API diff --git a/unit_test/scale_test.cc b/unit_test/scale_test.cc index d627af02d..d5294110b 100644 --- a/unit_test/scale_test.cc +++ b/unit_test/scale_test.cc @@ -771,6 +771,58 @@ TEST_SCALETO(Scale, 1920, 1080) #undef TEST_SCALETO1 #undef TEST_SCALETO +#define TEST_SCALESWAPXY1(DISABLED_, name, filter, max_diff) \ + TEST_F(LibYUVScaleTest, I420##name##SwapXY_##filter) { \ + int diff = I420TestFilter(benchmark_width_, benchmark_height_, \ + benchmark_height_, benchmark_width_, \ + kFilter##filter, benchmark_iterations_, \ + disable_cpu_flags_, benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, I444##name##SwapXY_##filter) { \ + int diff = I444TestFilter(benchmark_width_, benchmark_height_, \ + benchmark_height_, benchmark_width_, \ + kFilter##filter, benchmark_iterations_, \ + disable_cpu_flags_, benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, DISABLED_##I420##name##SwapXY_##filter##_16) { \ + int diff = I420TestFilter_16(benchmark_width_, benchmark_height_, \ + benchmark_height_, benchmark_width_, \ + kFilter##filter, benchmark_iterations_, \ + disable_cpu_flags_, benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, DISABLED_##I444##name##SwapXY_##filter##_16) { \ + int diff = I444TestFilter_16(benchmark_width_, benchmark_height_, \ + benchmark_height_, benchmark_width_, \ + kFilter##filter, benchmark_iterations_, \ + disable_cpu_flags_, benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, NV12##name##SwapXY_##filter) { \ + int diff = NV12TestFilter(benchmark_width_, benchmark_height_, \ + benchmark_height_, benchmark_width_, \ + kFilter##filter, benchmark_iterations_, \ + disable_cpu_flags_, benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } + +// Test scale to a specified size with all 4 filters. +#ifdef ENABLE_SLOW_TESTS +TEST_SCALESWAPXY1(, Scale, None, 0) +TEST_SCALESWAPXY1(, Scale, Linear, 3) +TEST_SCALESWAPXY1(, Scale, Bilinear, 3) +TEST_SCALESWAPXY1(, Scale, Box, 3) +#else +TEST_SCALESWAPXY1(DISABLED_, Scale, None, 0) +TEST_SCALESWAPXY1(DISABLED_, Scale, Linear, 3) +TEST_SCALESWAPXY1(DISABLED_, Scale, Bilinear, 3) +TEST_SCALESWAPXY1(DISABLED_, Scale, Box, 3) +#endif + +#undef TEST_SCALESWAPXY1 + #ifdef ENABLE_ROW_TESTS #ifdef HAS_SCALEROWDOWN2_SSSE3 TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_Odd_SSSE3) { @@ -1119,22 +1171,16 @@ TEST_F(LibYUVScaleTest, PlaneTestRotate_None) { align_buffer_page_end(dest_opt_pixels, kSize); align_buffer_page_end(dest_c_pixels, kSize); - MaskCpuFlags(disable_cpu_flags_); // Disable all CPU optimization. - ScalePlane(orig_pixels, benchmark_width_, - benchmark_width_, benchmark_height_, - dest_c_pixels, benchmark_height_, - benchmark_height_, benchmark_width_, - kFilterNone); + ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_, + dest_c_pixels, benchmark_height_, benchmark_height_, + benchmark_width_, kFilterNone); MaskCpuFlags(benchmark_cpu_info_); // Enable all CPU optimization. - for (int i = 0; i < benchmark_iterations_; ++i) { - ScalePlane(orig_pixels, benchmark_width_, - benchmark_width_, benchmark_height_, - dest_opt_pixels, benchmark_height_, - benchmark_height_, benchmark_width_, - kFilterNone); + ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, + benchmark_height_, dest_opt_pixels, benchmark_height_, + benchmark_height_, benchmark_width_, kFilterNone); } for (int i = 0; i < kSize; ++i) { @@ -1155,22 +1201,16 @@ TEST_F(LibYUVScaleTest, PlaneTestRotate_Bilinear) { align_buffer_page_end(dest_opt_pixels, kSize); align_buffer_page_end(dest_c_pixels, kSize); - MaskCpuFlags(disable_cpu_flags_); // Disable all CPU optimization. - ScalePlane(orig_pixels, benchmark_width_, - benchmark_width_, benchmark_height_, - dest_c_pixels, benchmark_height_, - benchmark_height_, benchmark_width_, - kFilterBilinear); + ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_, + dest_c_pixels, benchmark_height_, benchmark_height_, + benchmark_width_, kFilterBilinear); MaskCpuFlags(benchmark_cpu_info_); // Enable all CPU optimization. - for (int i = 0; i < benchmark_iterations_; ++i) { - ScalePlane(orig_pixels, benchmark_width_, - benchmark_width_, benchmark_height_, - dest_opt_pixels, benchmark_height_, - benchmark_height_, benchmark_width_, - kFilterBilinear); + ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, + benchmark_height_, dest_opt_pixels, benchmark_height_, + benchmark_height_, benchmark_width_, kFilterBilinear); } for (int i = 0; i < kSize; ++i) { @@ -1192,22 +1232,16 @@ TEST_F(LibYUVScaleTest, PlaneTestRotate_Box) { align_buffer_page_end(dest_opt_pixels, kSize); align_buffer_page_end(dest_c_pixels, kSize); - MaskCpuFlags(disable_cpu_flags_); // Disable all CPU optimization. - ScalePlane(orig_pixels, benchmark_width_, - benchmark_width_, benchmark_height_, - dest_c_pixels, benchmark_height_, - benchmark_height_, benchmark_width_, - kFilterBox); + ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_, + dest_c_pixels, benchmark_height_, benchmark_height_, + benchmark_width_, kFilterBox); MaskCpuFlags(benchmark_cpu_info_); // Enable all CPU optimization. - for (int i = 0; i < benchmark_iterations_; ++i) { - ScalePlane(orig_pixels, benchmark_width_, - benchmark_width_, benchmark_height_, - dest_opt_pixels, benchmark_height_, - benchmark_height_, benchmark_width_, - kFilterBox); + ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, + benchmark_height_, dest_opt_pixels, benchmark_height_, + benchmark_height_, benchmark_width_, kFilterBox); } for (int i = 0; i < kSize; ++i) { diff --git a/unit_test/scale_uv_test.cc b/unit_test/scale_uv_test.cc index b62bf3ad7..e45a25da4 100644 --- a/unit_test/scale_uv_test.cc +++ b/unit_test/scale_uv_test.cc @@ -176,6 +176,21 @@ TEST_SCALETO(UVScale, 1920, 1080) #undef TEST_SCALETO1 #undef TEST_SCALETO +#define TEST_SCALESWAPXY1(name, filter, max_diff) \ + TEST_F(LibYUVScaleTest, name##SwapXY_##filter) { \ + int diff = \ + UVTestFilter(benchmark_width_, benchmark_height_, benchmark_height_, \ + benchmark_width_, kFilter##filter, benchmark_iterations_, \ + disable_cpu_flags_, benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } + +// Test scale with swapped width and height with all 3 filters. +TEST_SCALESWAPXY1(UVScale, None, 0) +TEST_SCALESWAPXY1(UVScale, Linear, 0) +TEST_SCALESWAPXY1(UVScale, Bilinear, 0) +#undef TEST_SCALESWAPXY1 + TEST_F(LibYUVScaleTest, UVTest3x) { const int kSrcStride = 48 * 2; const int kDstStride = 16 * 2;