mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2026-02-16 23:29:52 +08:00
Test C vs NEON for ScaleDown2Box_16
TBR=kjellander@chromium.org BUG=libyuv:718 TEST=LibYUVScaleTest.TestScaleRowDown2Box_16 Change-Id: Ic74d29d6f14983ff26e8af541ef702a0f8bf3f17 Reviewed-on: https://chromium-review.googlesource.com/616189 Reviewed-by: Cheng Wang <wangcheng@google.com>
This commit is contained in:
parent
7e59ee4c75
commit
bb17da97cf
@ -1002,7 +1002,7 @@ void ScaleRowDown2Box_16_NEON(const uint16* src_ptr,
|
|||||||
int dst_width) {
|
int dst_width) {
|
||||||
asm volatile(
|
asm volatile(
|
||||||
// change the stride to row 2 pointer
|
// change the stride to row 2 pointer
|
||||||
"add %1, %1, %0 \n"
|
"add %1, %0, %1, lsl #1 \n" // ptr + stide * 2
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"ld1 {v0.8h, v1.8h}, [%0], #32 \n" // load row 1 and post inc
|
"ld1 {v0.8h, v1.8h}, [%0], #32 \n" // load row 1 and post inc
|
||||||
"ld1 {v2.8h, v3.8h}, [%1], #32 \n" // load row 2 and post inc
|
"ld1 {v2.8h, v3.8h}, [%1], #32 \n" // load row 2 and post inc
|
||||||
@ -1033,8 +1033,7 @@ void ScaleRowUp2_16_NEON(const uint16* src_ptr,
|
|||||||
uint16* dst,
|
uint16* dst,
|
||||||
int dst_width) {
|
int dst_width) {
|
||||||
asm volatile(
|
asm volatile(
|
||||||
// change the stride to row 2 pointer
|
"add %1, %0, %1, lsl #1 \n" // ptr + stide * 2
|
||||||
"add %1, %1, %0 \n"
|
|
||||||
"movi v20.4h, #1 \n"
|
"movi v20.4h, #1 \n"
|
||||||
"movi v21.4h, #3 \n" // constants
|
"movi v21.4h, #3 \n" // constants
|
||||||
"movi v22.4h, #9 \n"
|
"movi v22.4h, #9 \n"
|
||||||
|
|||||||
@ -457,7 +457,7 @@ extern "C" void ScaleRowUp2_16_NEON(const uint16* src_ptr,
|
|||||||
ptrdiff_t src_stride,
|
ptrdiff_t src_stride,
|
||||||
uint16* dst,
|
uint16* dst,
|
||||||
int dst_width);
|
int dst_width);
|
||||||
|
|
||||||
TEST_F(LibYUVScaleTest, TestScaleRowUp2_16) {
|
TEST_F(LibYUVScaleTest, TestScaleRowUp2_16) {
|
||||||
SIMD_ALIGNED(uint16 orig_pixels[640 * 2 + 1]); // 2 rows + 1 pixel overrun
|
SIMD_ALIGNED(uint16 orig_pixels[640 * 2 + 1]); // 2 rows + 1 pixel overrun
|
||||||
SIMD_ALIGNED(uint16 dst_pixels_opt[1280]);
|
SIMD_ALIGNED(uint16 dst_pixels_opt[1280]);
|
||||||
@ -471,13 +471,13 @@ TEST_F(LibYUVScaleTest, TestScaleRowUp2_16) {
|
|||||||
orig_pixels[i] = i;
|
orig_pixels[i] = i;
|
||||||
}
|
}
|
||||||
ScaleRowUp2_16_NEON(&orig_pixels[0],
|
ScaleRowUp2_16_NEON(&orig_pixels[0],
|
||||||
640 * 2,
|
640,
|
||||||
&dst_pixels_c[0],
|
&dst_pixels_c[0],
|
||||||
1280);
|
1280);
|
||||||
|
|
||||||
for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
|
for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
|
||||||
ScaleRowUp2_16_NEON(&orig_pixels[0],
|
ScaleRowUp2_16_NEON(&orig_pixels[0],
|
||||||
640 * 2,
|
640,
|
||||||
&dst_pixels_opt[0],
|
&dst_pixels_opt[0],
|
||||||
1280);
|
1280);
|
||||||
}
|
}
|
||||||
@ -488,42 +488,57 @@ TEST_F(LibYUVScaleTest, TestScaleRowUp2_16) {
|
|||||||
EXPECT_EQ(dst_pixels_c[0], (0 * 9 + 1 * 3 + 640 * 3 + 641 * 1 + 8) / 16);
|
EXPECT_EQ(dst_pixels_c[0], (0 * 9 + 1 * 3 + 640 * 3 + 641 * 1 + 8) / 16);
|
||||||
EXPECT_EQ(dst_pixels_c[1279], 800);
|
EXPECT_EQ(dst_pixels_c[1279], 800);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
extern "C" void ScaleRowDown2Box_16_NEON(const uint16* src_ptr,
|
extern "C" void ScaleRowDown2Box_16_NEON(const uint16* src_ptr,
|
||||||
ptrdiff_t src_stride,
|
ptrdiff_t src_stride,
|
||||||
uint16* dst,
|
uint16* dst,
|
||||||
int dst_width);
|
int dst_width);
|
||||||
|
|
||||||
TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_16) {
|
TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_16) {
|
||||||
SIMD_ALIGNED(uint16 orig_pixels[2560 * 2]);
|
SIMD_ALIGNED(uint16 orig_pixels[2560 * 2]);
|
||||||
SIMD_ALIGNED(uint16 dst_pixels_opt[1280]);
|
|
||||||
SIMD_ALIGNED(uint16 dst_pixels_c[1280]);
|
SIMD_ALIGNED(uint16 dst_pixels_c[1280]);
|
||||||
|
SIMD_ALIGNED(uint16 dst_pixels_opt[1280]);
|
||||||
|
|
||||||
memset(orig_pixels, 0, sizeof(orig_pixels));
|
memset(orig_pixels, 0, sizeof(orig_pixels));
|
||||||
memset(dst_pixels_opt, 1, sizeof(dst_pixels_opt));
|
memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
|
||||||
memset(dst_pixels_c, 2, sizeof(dst_pixels_c));
|
memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt));
|
||||||
|
|
||||||
for (int i = 0; i < 2560 * 2; ++i) {
|
for (int i = 0; i < 2560 * 2; ++i) {
|
||||||
orig_pixels[i] = i;
|
orig_pixels[i] = i;
|
||||||
}
|
}
|
||||||
ScaleRowDown2Box_16_NEON(&orig_pixels[0],
|
ScaleRowDown2Box_16_C(&orig_pixels[0],
|
||||||
2560 * 2,
|
2560,
|
||||||
&dst_pixels_c[0],
|
&dst_pixels_c[0],
|
||||||
1280);
|
1280);
|
||||||
|
|
||||||
for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
|
for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
|
||||||
ScaleRowDown2Box_16_NEON(&orig_pixels[0],
|
#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
|
||||||
2560 * 2,
|
int has_neon = TestCpuFlag(kCpuHasNEON);
|
||||||
&dst_pixels_opt[0],
|
if (has_neon) {
|
||||||
1280);
|
ScaleRowDown2Box_16_NEON(&orig_pixels[0],
|
||||||
|
2560,
|
||||||
|
&dst_pixels_opt[0],
|
||||||
|
1280);
|
||||||
|
} else {
|
||||||
|
ScaleRowDown2Box_16_C(&orig_pixels[0],
|
||||||
|
2560,
|
||||||
|
&dst_pixels_opt[0],
|
||||||
|
1280);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
ScaleRowDown2Box_16_C(&orig_pixels[0],
|
||||||
|
2560,
|
||||||
|
&dst_pixels_opt[0],
|
||||||
|
1280);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i = 0; i < 1280; ++i) {
|
for (int i = 0; i < 1280; ++i) {
|
||||||
EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
|
EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
|
||||||
}
|
}
|
||||||
EXPECT_EQ(dst_pixels_c[0], 1281);
|
|
||||||
|
EXPECT_EQ(dst_pixels_c[0], (0 + 1 + 2560 + 2561 + 2) / 4);
|
||||||
EXPECT_EQ(dst_pixels_c[1279], 3839);
|
EXPECT_EQ(dst_pixels_c[1279], 3839);
|
||||||
}
|
}
|
||||||
#endif // __aarch64__
|
|
||||||
|
|
||||||
} // namespace libyuv
|
} // namespace libyuv
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user