From ccd6d6fc578ae2bcb86d5a2db309ba87806fcb21 Mon Sep 17 00:00:00 2001 From: Frank Barchard Date: Tue, 26 Sep 2017 18:58:29 -0700 Subject: [PATCH] add TestCopySamples_Opt unittest as reference for TestScaleSamples_Opt TestScaleSamples_Opt can be slow on ARM if the size of the buffer is 1 MB. This test does a memcpy and behaves the same. Bug: libyuv:738 Test: LibYUVPlanarTest.TestCopySamples_Opt Change-Id: Ia9f30190ed76ea350ebe054c9b899d5268e7e135 Reviewed-on: https://chromium-review.googlesource.com/685751 Reviewed-by: Cheng Wang Commit-Queue: Frank Barchard --- unit_test/planar_test.cc | 66 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 63 insertions(+), 3 deletions(-) diff --git a/unit_test/planar_test.cc b/unit_test/planar_test.cc index 7a13fd8b2..6ef7059f4 100644 --- a/unit_test/planar_test.cc +++ b/unit_test/planar_test.cc @@ -2626,9 +2626,9 @@ float TestScaleMaxSamples(int benchmark_width, float max_c, max_opt = 0.f; // NEON does multiple of 8, so round count up const int kPixels = (benchmark_width * benchmark_height + 7) & ~7; - align_buffer_page_end(orig_y, kPixels * 4 * 3); - uint8* dst_c = orig_y + kPixels * 4; - uint8* dst_opt = orig_y + kPixels * 4 * 2; + align_buffer_page_end(orig_y, kPixels * 4 * 3 + 48); + uint8* dst_c = orig_y + kPixels * 4 + 16; + uint8* dst_opt = orig_y + kPixels * 4 * 2 + 32; // Randomize works but may contain some denormals affecting performance. // MemRandomize(orig_y, kPixels * 4); @@ -2828,6 +2828,66 @@ TEST_F(LibYUVPlanarTest, TestScaleSamples_Opt) { EXPECT_EQ(0, diff); } +float TestCopySamples(int benchmark_width, + int benchmark_height, + int benchmark_iterations, + bool opt) { + int i, j; + // NEON does multiple of 16 floats, so round count up + const int kPixels = (benchmark_width * benchmark_height + 15) & ~15; + align_buffer_page_end(orig_y, kPixels * 4 * 3); + uint8* dst_c = orig_y + kPixels * 4; + uint8* dst_opt = orig_y + kPixels * 4 * 2; + + // Randomize works but may contain some denormals affecting performance. + // MemRandomize(orig_y, kPixels * 4); + // large values are problematic. audio is really -1 to 1. + for (i = 0; i < kPixels; ++i) { + (reinterpret_cast(orig_y))[i] = sinf(static_cast(i) * 0.1f); + } + memset(dst_c, 0, kPixels * 4); + memset(dst_opt, 1, kPixels * 4); + + memcpy(reinterpret_cast(dst_c), reinterpret_cast(orig_y), + kPixels * 4); + + for (j = 0; j < benchmark_iterations; j++) { + if (opt) { +#ifdef HAS_COPYROW_NEON + CopyRow_NEON(orig_y, dst_opt, kPixels * 4); +#else + CopyRow_C(orig_y, dst_opt, kPixels * 4); +#endif + } else { + CopyRow_C(orig_y, dst_opt, kPixels * 4); + } + } + + float max_diff = 0.f; + for (i = 0; i < kPixels; ++i) { + float abs_diff = FAbs((reinterpret_cast(dst_c)[i]) - + (reinterpret_cast(dst_opt)[i])); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + + free_aligned_buffer_page_end(orig_y); + return max_diff; +} + +TEST_F(LibYUVPlanarTest, TestCopySamples_C) { + float diff = TestCopySamples(benchmark_width_, benchmark_height_, + benchmark_iterations_, false); + EXPECT_EQ(0, diff); +} + +TEST_F(LibYUVPlanarTest, TestCopySamples_Opt) { + float diff = TestCopySamples(benchmark_width_, benchmark_height_, + benchmark_iterations_, true); + EXPECT_EQ(0, diff); +} + extern "C" void GaussRow_NEON(const uint32* src, uint16* dst, int width); extern "C" void GaussRow_C(const uint32* src, uint16* dst, int width);