NV12 Bilinear upsampling bug fix

Reenable InterpolateRow_AVX2

Bug: libyuv:838, b/68638384, b/176195584
Change-Id: I990fcc204d89ee9b8f5264184558a08aa21d6a9f
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/2626067
Reviewed-by: Eugene Zemtsov <eugene@chromium.org>
Reviewed-by: richard winterton <rrwinterton@gmail.com>
Reviewed-by: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
Frank Barchard 2021-01-12 14:15:24 -08:00 committed by Frank Barchard
parent 1d3f901aa0
commit 93b1b332cd
6 changed files with 111 additions and 48 deletions

View File

@ -1,6 +1,6 @@
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 1768
Version: 1770
License: BSD
License File: LICENSE

View File

@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1768
#define LIBYUV_VERSION 1770
#endif // INCLUDE_LIBYUV_VERSION_H_

View File

@ -492,7 +492,7 @@ static void ScaleUVBilinearUp(int src_width,
#if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(dst_width, 4)) {
if (IS_ALIGNED(dst_width, 8)) {
InterpolateRow = InterpolateRow_SSSE3;
}
}
@ -500,7 +500,7 @@ static void ScaleUVBilinearUp(int src_width,
#if defined(HAS_INTERPOLATEROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
InterpolateRow = InterpolateRow_Any_AVX2;
if (IS_ALIGNED(dst_width, 8)) {
if (IS_ALIGNED(dst_width, 16)) {
InterpolateRow = InterpolateRow_AVX2;
}
}
@ -508,7 +508,7 @@ static void ScaleUVBilinearUp(int src_width,
#if defined(HAS_INTERPOLATEROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
InterpolateRow = InterpolateRow_Any_NEON;
if (IS_ALIGNED(dst_width, 4)) {
if (IS_ALIGNED(dst_width, 8)) {
InterpolateRow = InterpolateRow_NEON;
}
}
@ -516,7 +516,7 @@ static void ScaleUVBilinearUp(int src_width,
#if defined(HAS_INTERPOLATEROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
InterpolateRow = InterpolateRow_Any_MMI;
if (IS_ALIGNED(dst_width, 2)) {
if (IS_ALIGNED(dst_width, 4)) {
InterpolateRow = InterpolateRow_MMI;
}
}
@ -524,7 +524,7 @@ static void ScaleUVBilinearUp(int src_width,
#if defined(HAS_INTERPOLATEROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
InterpolateRow = InterpolateRow_Any_MSA;
if (IS_ALIGNED(dst_width, 8)) {
if (IS_ALIGNED(dst_width, 16)) {
InterpolateRow = InterpolateRow_MSA;
}
}
@ -540,7 +540,7 @@ static void ScaleUVBilinearUp(int src_width,
#if defined(HAS_SCALEUVFILTERCOLS_NEON)
if (filtering && TestCpuFlag(kCpuHasNEON)) {
ScaleUVFilterCols = ScaleUVFilterCols_Any_NEON;
if (IS_ALIGNED(dst_width, 4)) {
if (IS_ALIGNED(dst_width, 8)) {
ScaleUVFilterCols = ScaleUVFilterCols_NEON;
}
}
@ -548,7 +548,7 @@ static void ScaleUVBilinearUp(int src_width,
#if defined(HAS_SCALEUVFILTERCOLS_MSA)
if (filtering && TestCpuFlag(kCpuHasMSA)) {
ScaleUVFilterCols = ScaleUVFilterCols_Any_MSA;
if (IS_ALIGNED(dst_width, 8)) {
if (IS_ALIGNED(dst_width, 16)) {
ScaleUVFilterCols = ScaleUVFilterCols_MSA;
}
}
@ -561,7 +561,7 @@ static void ScaleUVBilinearUp(int src_width,
#if defined(HAS_SCALEUVCOLS_NEON)
if (!filtering && TestCpuFlag(kCpuHasNEON)) {
ScaleUVFilterCols = ScaleUVCols_Any_NEON;
if (IS_ALIGNED(dst_width, 8)) {
if (IS_ALIGNED(dst_width, 16)) {
ScaleUVFilterCols = ScaleUVCols_NEON;
}
}
@ -577,7 +577,7 @@ static void ScaleUVBilinearUp(int src_width,
#if defined(HAS_SCALEUVCOLS_MSA)
if (!filtering && TestCpuFlag(kCpuHasMSA)) {
ScaleUVFilterCols = ScaleUVCols_Any_MSA;
if (IS_ALIGNED(dst_width, 4)) {
if (IS_ALIGNED(dst_width, 8)) {
ScaleUVFilterCols = ScaleUVCols_MSA;
}
}
@ -844,7 +844,6 @@ static void ScaleUV(const uint8_t* src,
dst_stride, src, dst, x, y, dy, 4, filtering);
return;
}
#if HAS_SCALEUVBILINEARUP
if (filtering && dy < 65536) {
ScaleUVBilinearUp(src_width, src_height, clip_width, clip_height,

View File

@ -312,6 +312,21 @@ TEST_SCALETO(ARGBScale, 1920, 1080)
#undef TEST_SCALETO1
#undef TEST_SCALETO
#define TEST_SCALESWAPXY1(name, filter, max_diff) \
TEST_F(LibYUVScaleTest, name##SwapXY_##filter) { \
int diff = ARGBTestFilter(benchmark_width_, benchmark_height_, \
benchmark_height_, benchmark_width_, \
kFilter##filter, benchmark_iterations_, \
disable_cpu_flags_, benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
}
// Test scale with swapped width and height with all 3 filters.
TEST_SCALESWAPXY1(ARGBScale, None, 0)
TEST_SCALESWAPXY1(ARGBScale, Linear, 0)
TEST_SCALESWAPXY1(ARGBScale, Bilinear, 0)
#undef TEST_SCALESWAPXY1
// Scale with YUV conversion to ARGB and clipping.
// TODO(fbarchard): Add fourcc support. All 4 ARGB formats is easy to support.
LIBYUV_API

View File

@ -771,6 +771,58 @@ TEST_SCALETO(Scale, 1920, 1080)
#undef TEST_SCALETO1
#undef TEST_SCALETO
#define TEST_SCALESWAPXY1(DISABLED_, name, filter, max_diff) \
TEST_F(LibYUVScaleTest, I420##name##SwapXY_##filter) { \
int diff = I420TestFilter(benchmark_width_, benchmark_height_, \
benchmark_height_, benchmark_width_, \
kFilter##filter, benchmark_iterations_, \
disable_cpu_flags_, benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, I444##name##SwapXY_##filter) { \
int diff = I444TestFilter(benchmark_width_, benchmark_height_, \
benchmark_height_, benchmark_width_, \
kFilter##filter, benchmark_iterations_, \
disable_cpu_flags_, benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, DISABLED_##I420##name##SwapXY_##filter##_16) { \
int diff = I420TestFilter_16(benchmark_width_, benchmark_height_, \
benchmark_height_, benchmark_width_, \
kFilter##filter, benchmark_iterations_, \
disable_cpu_flags_, benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, DISABLED_##I444##name##SwapXY_##filter##_16) { \
int diff = I444TestFilter_16(benchmark_width_, benchmark_height_, \
benchmark_height_, benchmark_width_, \
kFilter##filter, benchmark_iterations_, \
disable_cpu_flags_, benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, NV12##name##SwapXY_##filter) { \
int diff = NV12TestFilter(benchmark_width_, benchmark_height_, \
benchmark_height_, benchmark_width_, \
kFilter##filter, benchmark_iterations_, \
disable_cpu_flags_, benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
}
// Test scale to a specified size with all 4 filters.
#ifdef ENABLE_SLOW_TESTS
TEST_SCALESWAPXY1(, Scale, None, 0)
TEST_SCALESWAPXY1(, Scale, Linear, 3)
TEST_SCALESWAPXY1(, Scale, Bilinear, 3)
TEST_SCALESWAPXY1(, Scale, Box, 3)
#else
TEST_SCALESWAPXY1(DISABLED_, Scale, None, 0)
TEST_SCALESWAPXY1(DISABLED_, Scale, Linear, 3)
TEST_SCALESWAPXY1(DISABLED_, Scale, Bilinear, 3)
TEST_SCALESWAPXY1(DISABLED_, Scale, Box, 3)
#endif
#undef TEST_SCALESWAPXY1
#ifdef ENABLE_ROW_TESTS
#ifdef HAS_SCALEROWDOWN2_SSSE3
TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_Odd_SSSE3) {
@ -1119,22 +1171,16 @@ TEST_F(LibYUVScaleTest, PlaneTestRotate_None) {
align_buffer_page_end(dest_opt_pixels, kSize);
align_buffer_page_end(dest_c_pixels, kSize);
MaskCpuFlags(disable_cpu_flags_); // Disable all CPU optimization.
ScalePlane(orig_pixels, benchmark_width_,
benchmark_width_, benchmark_height_,
dest_c_pixels, benchmark_height_,
benchmark_height_, benchmark_width_,
kFilterNone);
ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_,
dest_c_pixels, benchmark_height_, benchmark_height_,
benchmark_width_, kFilterNone);
MaskCpuFlags(benchmark_cpu_info_); // Enable all CPU optimization.
for (int i = 0; i < benchmark_iterations_; ++i) {
ScalePlane(orig_pixels, benchmark_width_,
benchmark_width_, benchmark_height_,
dest_opt_pixels, benchmark_height_,
benchmark_height_, benchmark_width_,
kFilterNone);
ScalePlane(orig_pixels, benchmark_width_, benchmark_width_,
benchmark_height_, dest_opt_pixels, benchmark_height_,
benchmark_height_, benchmark_width_, kFilterNone);
}
for (int i = 0; i < kSize; ++i) {
@ -1155,22 +1201,16 @@ TEST_F(LibYUVScaleTest, PlaneTestRotate_Bilinear) {
align_buffer_page_end(dest_opt_pixels, kSize);
align_buffer_page_end(dest_c_pixels, kSize);
MaskCpuFlags(disable_cpu_flags_); // Disable all CPU optimization.
ScalePlane(orig_pixels, benchmark_width_,
benchmark_width_, benchmark_height_,
dest_c_pixels, benchmark_height_,
benchmark_height_, benchmark_width_,
kFilterBilinear);
ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_,
dest_c_pixels, benchmark_height_, benchmark_height_,
benchmark_width_, kFilterBilinear);
MaskCpuFlags(benchmark_cpu_info_); // Enable all CPU optimization.
for (int i = 0; i < benchmark_iterations_; ++i) {
ScalePlane(orig_pixels, benchmark_width_,
benchmark_width_, benchmark_height_,
dest_opt_pixels, benchmark_height_,
benchmark_height_, benchmark_width_,
kFilterBilinear);
ScalePlane(orig_pixels, benchmark_width_, benchmark_width_,
benchmark_height_, dest_opt_pixels, benchmark_height_,
benchmark_height_, benchmark_width_, kFilterBilinear);
}
for (int i = 0; i < kSize; ++i) {
@ -1192,22 +1232,16 @@ TEST_F(LibYUVScaleTest, PlaneTestRotate_Box) {
align_buffer_page_end(dest_opt_pixels, kSize);
align_buffer_page_end(dest_c_pixels, kSize);
MaskCpuFlags(disable_cpu_flags_); // Disable all CPU optimization.
ScalePlane(orig_pixels, benchmark_width_,
benchmark_width_, benchmark_height_,
dest_c_pixels, benchmark_height_,
benchmark_height_, benchmark_width_,
kFilterBox);
ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_,
dest_c_pixels, benchmark_height_, benchmark_height_,
benchmark_width_, kFilterBox);
MaskCpuFlags(benchmark_cpu_info_); // Enable all CPU optimization.
for (int i = 0; i < benchmark_iterations_; ++i) {
ScalePlane(orig_pixels, benchmark_width_,
benchmark_width_, benchmark_height_,
dest_opt_pixels, benchmark_height_,
benchmark_height_, benchmark_width_,
kFilterBox);
ScalePlane(orig_pixels, benchmark_width_, benchmark_width_,
benchmark_height_, dest_opt_pixels, benchmark_height_,
benchmark_height_, benchmark_width_, kFilterBox);
}
for (int i = 0; i < kSize; ++i) {

View File

@ -176,6 +176,21 @@ TEST_SCALETO(UVScale, 1920, 1080)
#undef TEST_SCALETO1
#undef TEST_SCALETO
#define TEST_SCALESWAPXY1(name, filter, max_diff) \
TEST_F(LibYUVScaleTest, name##SwapXY_##filter) { \
int diff = \
UVTestFilter(benchmark_width_, benchmark_height_, benchmark_height_, \
benchmark_width_, kFilter##filter, benchmark_iterations_, \
disable_cpu_flags_, benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
}
// Test scale with swapped width and height with all 3 filters.
TEST_SCALESWAPXY1(UVScale, None, 0)
TEST_SCALESWAPXY1(UVScale, Linear, 0)
TEST_SCALESWAPXY1(UVScale, Bilinear, 0)
#undef TEST_SCALESWAPXY1
TEST_F(LibYUVScaleTest, UVTest3x) {
const int kSrcStride = 48 * 2;
const int kDstStride = 16 * 2;