From bb17da97cfa31c69800d0e3182b183d6c67dc763 Mon Sep 17 00:00:00 2001 From: Frank Barchard Date: Wed, 16 Aug 2017 12:11:16 -0700 Subject: [PATCH] Test C vs NEON for ScaleDown2Box_16 TBR=kjellander@chromium.org BUG=libyuv:718 TEST=LibYUVScaleTest.TestScaleRowDown2Box_16 Change-Id: Ic74d29d6f14983ff26e8af541ef702a0f8bf3f17 Reviewed-on: https://chromium-review.googlesource.com/616189 Reviewed-by: Cheng Wang --- source/scale_neon64.cc | 5 ++--- unit_test/scale_test.cc | 47 +++++++++++++++++++++++++++-------------- 2 files changed, 33 insertions(+), 19 deletions(-) diff --git a/source/scale_neon64.cc b/source/scale_neon64.cc index 077def307..e1da7edb6 100644 --- a/source/scale_neon64.cc +++ b/source/scale_neon64.cc @@ -1002,7 +1002,7 @@ void ScaleRowDown2Box_16_NEON(const uint16* src_ptr, int dst_width) { asm volatile( // change the stride to row 2 pointer - "add %1, %1, %0 \n" + "add %1, %0, %1, lsl #1 \n" // ptr + stide * 2 "1: \n" "ld1 {v0.8h, v1.8h}, [%0], #32 \n" // load row 1 and post inc "ld1 {v2.8h, v3.8h}, [%1], #32 \n" // load row 2 and post inc @@ -1033,8 +1033,7 @@ void ScaleRowUp2_16_NEON(const uint16* src_ptr, uint16* dst, int dst_width) { asm volatile( - // change the stride to row 2 pointer - "add %1, %1, %0 \n" + "add %1, %0, %1, lsl #1 \n" // ptr + stide * 2 "movi v20.4h, #1 \n" "movi v21.4h, #3 \n" // constants "movi v22.4h, #9 \n" diff --git a/unit_test/scale_test.cc b/unit_test/scale_test.cc index ffd05a7de..c74b6b85a 100644 --- a/unit_test/scale_test.cc +++ b/unit_test/scale_test.cc @@ -457,7 +457,7 @@ extern "C" void ScaleRowUp2_16_NEON(const uint16* src_ptr, ptrdiff_t src_stride, uint16* dst, int dst_width); - + TEST_F(LibYUVScaleTest, TestScaleRowUp2_16) { SIMD_ALIGNED(uint16 orig_pixels[640 * 2 + 1]); // 2 rows + 1 pixel overrun SIMD_ALIGNED(uint16 dst_pixels_opt[1280]); @@ -471,13 +471,13 @@ TEST_F(LibYUVScaleTest, TestScaleRowUp2_16) { orig_pixels[i] = i; } ScaleRowUp2_16_NEON(&orig_pixels[0], - 640 * 2, + 640, &dst_pixels_c[0], 1280); for (int i = 0; i < benchmark_pixels_div1280_; ++i) { ScaleRowUp2_16_NEON(&orig_pixels[0], - 640 * 2, + 640, &dst_pixels_opt[0], 1280); } @@ -488,42 +488,57 @@ TEST_F(LibYUVScaleTest, TestScaleRowUp2_16) { EXPECT_EQ(dst_pixels_c[0], (0 * 9 + 1 * 3 + 640 * 3 + 641 * 1 + 8) / 16); EXPECT_EQ(dst_pixels_c[1279], 800); } +#endif extern "C" void ScaleRowDown2Box_16_NEON(const uint16* src_ptr, ptrdiff_t src_stride, uint16* dst, int dst_width); - + TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_16) { SIMD_ALIGNED(uint16 orig_pixels[2560 * 2]); - SIMD_ALIGNED(uint16 dst_pixels_opt[1280]); SIMD_ALIGNED(uint16 dst_pixels_c[1280]); + SIMD_ALIGNED(uint16 dst_pixels_opt[1280]); memset(orig_pixels, 0, sizeof(orig_pixels)); - memset(dst_pixels_opt, 1, sizeof(dst_pixels_opt)); - memset(dst_pixels_c, 2, sizeof(dst_pixels_c)); + memset(dst_pixels_c, 1, sizeof(dst_pixels_c)); + memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt)); for (int i = 0; i < 2560 * 2; ++i) { orig_pixels[i] = i; } - ScaleRowDown2Box_16_NEON(&orig_pixels[0], - 2560 * 2, + ScaleRowDown2Box_16_C(&orig_pixels[0], + 2560, &dst_pixels_c[0], 1280); - for (int i = 0; i < benchmark_pixels_div1280_; ++i) { - ScaleRowDown2Box_16_NEON(&orig_pixels[0], - 2560 * 2, - &dst_pixels_opt[0], - 1280); +#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) + int has_neon = TestCpuFlag(kCpuHasNEON); + if (has_neon) { + ScaleRowDown2Box_16_NEON(&orig_pixels[0], + 2560, + &dst_pixels_opt[0], + 1280); + } else { + ScaleRowDown2Box_16_C(&orig_pixels[0], + 2560, + &dst_pixels_opt[0], + 1280); + } +#else + ScaleRowDown2Box_16_C(&orig_pixels[0], + 2560, + &dst_pixels_opt[0], + 1280); +#endif } for (int i = 0; i < 1280; ++i) { EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); } - EXPECT_EQ(dst_pixels_c[0], 1281); + + EXPECT_EQ(dst_pixels_c[0], (0 + 1 + 2560 + 2561 + 2) / 4); EXPECT_EQ(dst_pixels_c[1279], 3839); } -#endif // __aarch64__ } // namespace libyuv