diff --git a/source/scale.cc b/source/scale.cc index 5481a7e6e..9c1e9b264 100644 --- a/source/scale.cc +++ b/source/scale.cc @@ -815,9 +815,8 @@ static void ScaleAddCols2_C(int dst_width, boxwidth = MIN1((x >> 16) - ix); int scaletbl_index = boxwidth - minboxwidth; assert((scaletbl_index == 0) || (scaletbl_index == 1)); - *dst_ptr++ = (uint8_t)((SumPixels(boxwidth, src_ptr + ix) * - scaletbl[scaletbl_index] + - 32768) >> + *dst_ptr++ = (uint8_t)(SumPixels(boxwidth, src_ptr + ix) * + scaletbl[scaletbl_index] >> 16); } } @@ -841,9 +840,7 @@ static void ScaleAddCols2_16_C(int dst_width, int scaletbl_index = boxwidth - minboxwidth; assert((scaletbl_index == 0) || (scaletbl_index == 1)); *dst_ptr++ = - (SumPixels_16(boxwidth, src_ptr + ix) * scaletbl[scaletbl_index] + - 32768) >> - 16; + SumPixels_16(boxwidth, src_ptr + ix) * scaletbl[scaletbl_index] >> 16; } } @@ -858,7 +855,7 @@ static void ScaleAddCols0_C(int dst_width, (void)dx; src_ptr += (x >> 16); for (i = 0; i < dst_width; ++i) { - *dst_ptr++ = (uint8_t)((src_ptr[i] * scaleval + 32768) >> 16); + *dst_ptr++ = (uint8_t)(src_ptr[i] * scaleval >> 16); } } @@ -873,8 +870,7 @@ static void ScaleAddCols1_C(int dst_width, int i; x >>= 16; for (i = 0; i < dst_width; ++i) { - *dst_ptr++ = - (uint8_t)((SumPixels(boxwidth, src_ptr + x) * scaleval + 32768) >> 16); + *dst_ptr++ = (uint8_t)(SumPixels(boxwidth, src_ptr + x) * scaleval >> 16); x += boxwidth; } } @@ -889,7 +885,7 @@ static void ScaleAddCols1_16_C(int dst_width, int scaleval = 65536 / (boxwidth * boxheight); int i; for (i = 0; i < dst_width; ++i) { - *dst_ptr++ = (SumPixels_16(boxwidth, src_ptr + x) * scaleval + 32768) >> 16; + *dst_ptr++ = SumPixels_16(boxwidth, src_ptr + x) * scaleval >> 16; x += boxwidth; } } diff --git a/source/scale_common.cc b/source/scale_common.cc index c82fd41dd..537f030aa 100644 --- a/source/scale_common.cc +++ b/source/scale_common.cc @@ -893,29 +893,23 @@ void ScaleRowDown38_3_Box_C(const uint8_t* src_ptr, int i; assert((dst_width % 3 == 0) && (dst_width > 0)); for (i = 0; i < dst_width; i += 3) { - dst_ptr[0] = - (uint8_t)(((src_ptr[0] + src_ptr[1] + src_ptr[2] + - src_ptr[src_stride + 0] + src_ptr[src_stride + 1] + - src_ptr[src_stride + 2] + src_ptr[src_stride * 2 + 0] + - src_ptr[src_stride * 2 + 1] + src_ptr[src_stride * 2 + 2]) * - (65536 / 9) + - 32768) >> - 16); - dst_ptr[1] = - (uint8_t)(((src_ptr[3] + src_ptr[4] + src_ptr[5] + - src_ptr[src_stride + 3] + src_ptr[src_stride + 4] + - src_ptr[src_stride + 5] + src_ptr[src_stride * 2 + 3] + - src_ptr[src_stride * 2 + 4] + src_ptr[src_stride * 2 + 5]) * - (65536 / 9) + - 32768) >> - 16); - dst_ptr[2] = - (uint8_t)(((src_ptr[6] + src_ptr[7] + src_ptr[src_stride + 6] + - src_ptr[src_stride + 7] + src_ptr[src_stride * 2 + 6] + - src_ptr[src_stride * 2 + 7]) * - (65536 / 6) + - 32768) >> - 16); + dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + + src_ptr[src_stride + 0] + src_ptr[src_stride + 1] + + src_ptr[src_stride + 2] + src_ptr[src_stride * 2 + 0] + + src_ptr[src_stride * 2 + 1] + src_ptr[src_stride * 2 + 2]) * + (65536 / 9) >> + 16; + dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + + src_ptr[src_stride + 3] + src_ptr[src_stride + 4] + + src_ptr[src_stride + 5] + src_ptr[src_stride * 2 + 3] + + src_ptr[src_stride * 2 + 4] + src_ptr[src_stride * 2 + 5]) * + (65536 / 9) >> + 16; + dst_ptr[2] = (src_ptr[6] + src_ptr[7] + src_ptr[src_stride + 6] + + src_ptr[src_stride + 7] + src_ptr[src_stride * 2 + 6] + + src_ptr[src_stride * 2 + 7]) * + (65536 / 6) >> + 16; src_ptr += 8; dst_ptr += 3; } @@ -928,31 +922,23 @@ void ScaleRowDown38_3_Box_16_C(const uint16_t* src_ptr, int i; assert((dst_width % 3 == 0) && (dst_width > 0)); for (i = 0; i < dst_width; i += 3) { - dst_ptr[0] = - (uint16_t)(((src_ptr[0] + src_ptr[1] + src_ptr[2] + - src_ptr[src_stride + 0] + src_ptr[src_stride + 1] + - src_ptr[src_stride + 2] + src_ptr[src_stride * 2 + 0] + - src_ptr[src_stride * 2 + 1] + - src_ptr[src_stride * 2 + 2]) * - (65536u / 9u) + - 32768u) >> - 16); - dst_ptr[1] = - (uint16_t)(((src_ptr[3] + src_ptr[4] + src_ptr[5] + - src_ptr[src_stride + 3] + src_ptr[src_stride + 4] + - src_ptr[src_stride + 5] + src_ptr[src_stride * 2 + 3] + - src_ptr[src_stride * 2 + 4] + - src_ptr[src_stride * 2 + 5]) * - (65536u / 9u) + - 32768u) >> - 16); - dst_ptr[2] = - (uint16_t)(((src_ptr[6] + src_ptr[7] + src_ptr[src_stride + 6] + - src_ptr[src_stride + 7] + src_ptr[src_stride * 2 + 6] + - src_ptr[src_stride * 2 + 7]) * - (65536u / 6u) + - 32768u) >> - 16); + dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + + src_ptr[src_stride + 0] + src_ptr[src_stride + 1] + + src_ptr[src_stride + 2] + src_ptr[src_stride * 2 + 0] + + src_ptr[src_stride * 2 + 1] + src_ptr[src_stride * 2 + 2]) * + (65536u / 9u) >> + 16; + dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + + src_ptr[src_stride + 3] + src_ptr[src_stride + 4] + + src_ptr[src_stride + 5] + src_ptr[src_stride * 2 + 3] + + src_ptr[src_stride * 2 + 4] + src_ptr[src_stride * 2 + 5]) * + (65536u / 9u) >> + 16; + dst_ptr[2] = (src_ptr[6] + src_ptr[7] + src_ptr[src_stride + 6] + + src_ptr[src_stride + 7] + src_ptr[src_stride * 2 + 6] + + src_ptr[src_stride * 2 + 7]) * + (65536u / 6u) >> + 16; src_ptr += 8; dst_ptr += 3; } @@ -966,23 +952,20 @@ void ScaleRowDown38_2_Box_C(const uint8_t* src_ptr, int i; assert((dst_width % 3 == 0) && (dst_width > 0)); for (i = 0; i < dst_width; i += 3) { - dst_ptr[0] = (uint8_t)(((src_ptr[0] + src_ptr[1] + src_ptr[2] + - src_ptr[src_stride + 0] + src_ptr[src_stride + 1] + - src_ptr[src_stride + 2]) * - (65536 / 6) + - 32768) >> - 16); - dst_ptr[1] = (uint8_t)(((src_ptr[3] + src_ptr[4] + src_ptr[5] + - src_ptr[src_stride + 3] + src_ptr[src_stride + 4] + - src_ptr[src_stride + 5]) * - (65536 / 6) + - 32768) >> - 16); - dst_ptr[2] = (uint8_t)(((src_ptr[6] + src_ptr[7] + src_ptr[src_stride + 6] + - src_ptr[src_stride + 7]) * - (65536 / 4) + - 32768) >> - 16); + dst_ptr[0] = + (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[src_stride + 0] + + src_ptr[src_stride + 1] + src_ptr[src_stride + 2]) * + (65536 / 6) >> + 16; + dst_ptr[1] = + (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[src_stride + 3] + + src_ptr[src_stride + 4] + src_ptr[src_stride + 5]) * + (65536 / 6) >> + 16; + dst_ptr[2] = (src_ptr[6] + src_ptr[7] + src_ptr[src_stride + 6] + + src_ptr[src_stride + 7]) * + (65536 / 4) >> + 16; src_ptr += 8; dst_ptr += 3; } @@ -996,25 +979,19 @@ void ScaleRowDown38_2_Box_16_C(const uint16_t* src_ptr, assert((dst_width % 3 == 0) && (dst_width > 0)); for (i = 0; i < dst_width; i += 3) { dst_ptr[0] = - (uint16_t)(((src_ptr[0] + src_ptr[1] + src_ptr[2] + - src_ptr[src_stride + 0] + src_ptr[src_stride + 1] + - src_ptr[src_stride + 2]) * - (65536u / 6u) + - 32768u) >> - 16); + (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[src_stride + 0] + + src_ptr[src_stride + 1] + src_ptr[src_stride + 2]) * + (65536u / 6u) >> + 16; dst_ptr[1] = - (uint16_t)(((src_ptr[3] + src_ptr[4] + src_ptr[5] + - src_ptr[src_stride + 3] + src_ptr[src_stride + 4] + - src_ptr[src_stride + 5]) * - (65536u / 6u) + - 32768u) >> - 16); - dst_ptr[2] = - (uint16_t)(((src_ptr[6] + src_ptr[7] + src_ptr[src_stride + 6] + - src_ptr[src_stride + 7]) * - (65536u / 4u) + - 32768u) >> - 16); + (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[src_stride + 3] + + src_ptr[src_stride + 4] + src_ptr[src_stride + 5]) * + (65536u / 6u) >> + 16; + dst_ptr[2] = (src_ptr[6] + src_ptr[7] + src_ptr[src_stride + 6] + + src_ptr[src_stride + 7]) * + (65536u / 4u) >> + 16; src_ptr += 8; dst_ptr += 3; } diff --git a/source/scale_gcc.cc b/source/scale_gcc.cc index b313e854d..6a2524230 100644 --- a/source/scale_gcc.cc +++ b/source/scale_gcc.cc @@ -693,11 +693,7 @@ void ScaleRowDown38_2_Box_SSSE3(const uint8_t* src_ptr, "paddusw %%xmm6,%%xmm1 \n" "pshufb %%xmm4,%%xmm0 \n" "paddusw %%xmm0,%%xmm1 \n" - "movdqa %%xmm1,%%xmm6 \n" "pmulhuw %%xmm5,%%xmm1 \n" - "pmullw %%xmm5,%%xmm6 \n" - "psraw $15,%%xmm6 \n" - "psubw %%xmm6,%%xmm1 \n" "packuswb %%xmm1,%%xmm1 \n" "movd %%xmm1,(%1) \n" "psrlq $0x10,%%xmm1 \n" @@ -758,11 +754,7 @@ void ScaleRowDown38_3_Box_SSSE3(const uint8_t* src_ptr, "paddusw %%xmm1,%%xmm7 \n" "pshufb %%xmm3,%%xmm7 \n" "paddusw %%xmm7,%%xmm6 \n" - "movdqa %%xmm6,%%xmm7 \n" "pmulhuw %%xmm4,%%xmm6 \n" - "pmullw %%xmm4,%%xmm7 \n" - "psraw $15,%%xmm7 \n" - "psubw %%xmm7,%%xmm6 \n" "packuswb %%xmm6,%%xmm6 \n" "movd %%xmm6,(%1) \n" "psrlq $0x10,%%xmm6 \n" diff --git a/source/scale_win.cc b/source/scale_win.cc index 0e47e9615..32c0506fa 100644 --- a/source/scale_win.cc +++ b/source/scale_win.cc @@ -748,11 +748,7 @@ __declspec(naked) void ScaleRowDown38_3_Box_SSSE3(const uint8_t* src_ptr, pshufb xmm7, xmm3 paddusw xmm6, xmm7 - movdqa xmm7, xmm6 - pmulhuw xmm6, xmm4 - pmullw xmm7, xmm4 - psraw xmm7, 15 - psubw xmm6, xmm7 // divide by 9,9,6, 9,9,6 with round + pmulhuw xmm6, xmm4 // divide by 9,9,6, 9,9,6 packuswb xmm6, xmm6 movd [edx], xmm6 // write 6 pixels @@ -797,11 +793,7 @@ __declspec(naked) void ScaleRowDown38_2_Box_SSSE3(const uint8_t* src_ptr, pshufb xmm0, xmm4 paddusw xmm1, xmm0 - movdqa xmm6, xmm1 - pmulhuw xmm1, xmm5 - pmullw xmm6, xmm5 - psraw xmm6, 15 - psubw xmm1, xmm6 // divide by 3,3,2, 3,3,2 with round + pmulhuw xmm1, xmm5 // divide by 3,3,2, 3,3,2 packuswb xmm1, xmm1 movd [edx], xmm1 // write 6 pixels diff --git a/unit_test/scale_plane_test.cc b/unit_test/scale_plane_test.cc index c21b831ec..979c70aad 100644 --- a/unit_test/scale_plane_test.cc +++ b/unit_test/scale_plane_test.cc @@ -15,7 +15,6 @@ #include #include -#include #include "../unit_test/unit_test.h" #include "libyuv/cpu_id.h" @@ -43,11 +42,6 @@ namespace libyuv { -namespace { -using ::testing::Combine; -using ::testing::Values; -} // namespace - #ifdef ENABLE_ROW_TESTS #ifdef HAS_SCALEROWDOWN2_SSSE3 TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_Odd_SSSE3) { @@ -539,156 +533,5 @@ TEST_F(LibYUVScaleTest, ScalePlaneVertical_IntStrideOverflow) { delete[] src; delete[] dst; } -struct ScaleTestParams { - std::vector> src_data; - std::vector> dst_data; - FilterMode filter_mode; -}; - -class ScalePlaneTest - : public LibYUVScaleTest, - public ::testing::WithParamInterface> { -}; - -TEST_P(ScalePlaneTest, Mismatch) { - const ScaleTestParams& params = std::get<0>(GetParam()); - bool use_c_only = std::get<1>(GetParam()); - const int src_height = params.src_data.size(); - const int src_width = params.src_data[0].size(); - const int dst_height = params.dst_data.size(); - const int dst_width = params.dst_data[0].size(); - const int size = src_width * src_height; - - align_buffer_page_end(src_pixels, size); - align_buffer_page_end(dst_pixels, dst_width * dst_height); - - for (int y = 0; y < src_height; ++y) { - for (int x = 0; x < src_width; ++x) { - src_pixels[y * src_width + x] = - static_cast(params.src_data[y][x]); - } - } - memset(dst_pixels, 0, dst_width * dst_height); - - MaskCpuFlags(use_c_only ? disable_cpu_flags_ : benchmark_cpu_info_); - - ScalePlane(src_pixels, src_width, src_width, src_height, dst_pixels, - dst_width, dst_width, dst_height, params.filter_mode); - - bool has_mismatch = false; - for (int y = 0; y < dst_height; ++y) { - for (int x = 0; x < dst_width; ++x) { - if (dst_pixels[y * dst_width + x] != - static_cast(params.dst_data[y][x])) { - has_mismatch = true; - break; - } - } - if (has_mismatch) - break; - } - EXPECT_FALSE(has_mismatch); - - free_aligned_buffer_page_end(dst_pixels); - free_aligned_buffer_page_end(src_pixels); -} - -TEST_P(ScalePlaneTest, Mismatch16) { - const ScaleTestParams& params = std::get<0>(GetParam()); - bool use_c_only = std::get<1>(GetParam()); - const int src_height = params.src_data.size(); - const int src_width = params.src_data[0].size(); - const int dst_height = params.dst_data.size(); - const int dst_width = params.dst_data[0].size(); - const int size = src_width * src_height; - - align_buffer_page_end(src_pixels, size * 2); - align_buffer_page_end(dst_pixels, dst_width * dst_height * 2); - - uint16_t* src_ptr = reinterpret_cast(src_pixels); - uint16_t* dst_ptr = reinterpret_cast(dst_pixels); - - for (int y = 0; y < src_height; ++y) { - for (int x = 0; x < src_width; ++x) { - src_ptr[y * src_width + x] = static_cast(params.src_data[y][x]); - } - } - memset(dst_ptr, 0, dst_width * dst_height * 2); - - MaskCpuFlags(use_c_only ? disable_cpu_flags_ : benchmark_cpu_info_); - - ScalePlane_16(src_ptr, src_width, src_width, src_height, dst_ptr, dst_width, - dst_width, dst_height, params.filter_mode); - - bool has_mismatch = false; - for (int y = 0; y < dst_height; ++y) { - for (int x = 0; x < dst_width; ++x) { - if (dst_ptr[y * dst_width + x] != - static_cast(params.dst_data[y][x])) { - has_mismatch = true; - break; - } - } - if (has_mismatch) - break; - } - EXPECT_FALSE(has_mismatch); - - free_aligned_buffer_page_end(dst_pixels); - free_aligned_buffer_page_end(src_pixels); -} - -INSTANTIATE_TEST_SUITE_P( - All, - ScalePlaneTest, - Combine( - Values( - // ScaleRowDown2Box - ScaleTestParams{.src_data = {{1, 3, 1, 3, 1, 3, 1, 3}, - {1, 3, 1, 3, 1, 3, 1, 3}, - {1, 3, 1, 3, 1, 3, 1, 3}, - {1, 3, 1, 3, 1, 3, 1, 3}, - {1, 3, 1, 3, 1, 3, 1, 3}, - {1, 3, 1, 3, 1, 3, 1, 3}, - {1, 3, 1, 3, 1, 3, 1, 3}, - {1, 3, 1, 3, 1, 3, 1, 3}}, - .dst_data = {{2, 2, 2, 2}, - {2, 2, 2, 2}, - {2, 2, 2, 2}, - {2, 2, 2, 2}}, - .filter_mode = kFilterBox}, - // ScaleRowDown4Box - ScaleTestParams{.src_data = {{1, 3, 1, 3, 1, 3, 1, 3}, - {1, 3, 1, 3, 1, 3, 1, 3}, - {1, 3, 1, 3, 1, 3, 1, 3}, - {1, 3, 1, 3, 1, 3, 1, 3}, - {1, 3, 1, 3, 1, 3, 1, 3}, - {1, 3, 1, 3, 1, 3, 1, 3}, - {1, 3, 1, 3, 1, 3, 1, 3}, - {1, 3, 1, 3, 1, 3, 1, 3}}, - .dst_data = {{2, 2}, {2, 2}}, - .filter_mode = kFilterBox}, - // ScaleRowDown38Box - ScaleTestParams{ - .src_data = - std::vector>(8, std::vector(8, 1)), - .dst_data = - std::vector>(3, std::vector(3, 1)), - .filter_mode = kFilterBox}, - // ScaleAddCols1_C (1/3) - ScaleTestParams{ - .src_data = - std::vector>(6, std::vector(6, 1)), - .dst_data = - std::vector>(2, std::vector(2, 1)), - .filter_mode = kFilterBox}, - // ScaleAddCols2_C (5/12) - ScaleTestParams{ - .src_data = - std::vector>(12, std::vector(12, 1)), - .dst_data = - std::vector>(5, std::vector(5, 1)), - .filter_mode = kFilterBox}), - Values(false, true))); } // namespace libyuv