Change ARGBMultiplyRow_C to match Neon

The existing behaviour does not round correctly in all cases, so adjust
it to match the existing Neon implementation.

Update the tests to require bit-exactness and disable other
implementations that do not round correctly.

Change-Id: Ie790fb4b4805b555d74d689d83802e1dd4f33df5
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/5869115
Reviewed-by: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
George Steed 2024-09-17 11:39:37 +01:00 committed by Frank Barchard
parent a37e6bc81b
commit 02c6e8baca
3 changed files with 22 additions and 22 deletions

View File

@ -106,7 +106,8 @@ extern "C" {
#define HAS_ARGBGRAYROW_SSSE3
#define HAS_ARGBLUMACOLORTABLEROW_SSSE3
#define HAS_ARGBMIRRORROW_SSE2
#define HAS_ARGBMULTIPLYROW_SSE2
// TODO: Re-enable once rounding behaviour is fixed.
// #define HAS_ARGBMULTIPLYROW_SSE2
#define HAS_ARGBPOLYNOMIALROW_SSE2
#define HAS_ARGBQUANTIZEROW_SSE2
#define HAS_ARGBSEPIAROW_SSSE3
@ -186,7 +187,8 @@ extern "C" {
// Effects:
#define HAS_ARGBADDROW_AVX2
#define HAS_ARGBMULTIPLYROW_AVX2
// TODO: Re-enable once rounding behaviour is fixed.
// #define HAS_ARGBMULTIPLYROW_AVX2
#define HAS_ARGBSUBTRACTROW_AVX2
#define HAS_BLENDPLANEROW_AVX2
@ -585,7 +587,8 @@ extern "C" {
#define HAS_ARGBEXTRACTALPHAROW_MSA
#define HAS_ARGBGRAYROW_MSA
#define HAS_ARGBMIRRORROW_MSA
#define HAS_ARGBMULTIPLYROW_MSA
// TODO: Re-enable once rounding behaviour is fixed.
// #define HAS_ARGBMULTIPLYROW_MSA
#define HAS_ARGBQUANTIZEROW_MSA
#define HAS_ARGBSEPIAROW_MSA
#define HAS_ARGBSETROW_MSA
@ -684,7 +687,8 @@ extern "C" {
#define HAS_ARGBTOUVROW_LSX
#define HAS_ARGBTOYJROW_LSX
#define HAS_ARGBMIRRORROW_LSX
#define HAS_ARGBMULTIPLYROW_LSX
// TODO: Re-enable once rounding behaviour is fixed.
// #define HAS_ARGBMULTIPLYROW_LSX
#define HAS_BGRATOUVROW_LSX
#define HAS_BGRATOYROW_LSX
#define HAS_I400TOARGBROW_LSX
@ -751,7 +755,8 @@ extern "C" {
#define HAS_ARGBATTENUATEROW_LASX
#define HAS_ARGBGRAYROW_LASX
#define HAS_ARGBMIRRORROW_LASX
#define HAS_ARGBMULTIPLYROW_LASX
// TODO: Re-enable once rounding behaviour is fixed.
// #define HAS_ARGBMULTIPLYROW_LASX
#define HAS_ARGBSEPIAROW_LASX
#define HAS_ARGBSHADEROW_LASX
#define HAS_ARGBSHUFFLEROW_LASX

View File

@ -1375,34 +1375,29 @@ void ARGBShadeRow_C(const uint8_t* src_argb,
#undef REPEAT8
#undef SHADE
#define REPEAT8(v) (v) | ((v) << 8)
#define SHADE(f, v) v* f >> 16
void ARGBMultiplyRow_C(const uint8_t* src_argb,
const uint8_t* src_argb1,
uint8_t* dst_argb,
int width) {
int i;
for (i = 0; i < width; ++i) {
const uint32_t b = REPEAT8(src_argb[0]);
const uint32_t g = REPEAT8(src_argb[1]);
const uint32_t r = REPEAT8(src_argb[2]);
const uint32_t a = REPEAT8(src_argb[3]);
const uint32_t b = src_argb[0];
const uint32_t g = src_argb[1];
const uint32_t r = src_argb[2];
const uint32_t a = src_argb[3];
const uint32_t b_scale = src_argb1[0];
const uint32_t g_scale = src_argb1[1];
const uint32_t r_scale = src_argb1[2];
const uint32_t a_scale = src_argb1[3];
dst_argb[0] = STATIC_CAST(uint8_t, SHADE(b, b_scale));
dst_argb[1] = STATIC_CAST(uint8_t, SHADE(g, g_scale));
dst_argb[2] = STATIC_CAST(uint8_t, SHADE(r, r_scale));
dst_argb[3] = STATIC_CAST(uint8_t, SHADE(a, a_scale));
dst_argb[0] = STATIC_CAST(uint8_t, (b * b_scale + 128) >> 8);
dst_argb[1] = STATIC_CAST(uint8_t, (g * g_scale + 128) >> 8);
dst_argb[2] = STATIC_CAST(uint8_t, (r * r_scale + 128) >> 8);
dst_argb[3] = STATIC_CAST(uint8_t, (a * a_scale + 128) >> 8);
src_argb += 4;
src_argb1 += 4;
dst_argb += 4;
}
}
#undef REPEAT8
#undef SHADE
#define SHADE(f, v) clamp255(v + f)

View File

@ -1906,28 +1906,28 @@ TEST_F(LibYUVPlanarTest, ARGBMultiply_Any) {
int max_diff = TestMultiply(benchmark_width_ + 1, benchmark_height_,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_, +1, 0);
EXPECT_LE(max_diff, 1);
EXPECT_LE(max_diff, 0);
}
TEST_F(LibYUVPlanarTest, ARGBMultiply_Unaligned) {
int max_diff =
TestMultiply(benchmark_width_, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
EXPECT_LE(max_diff, 1);
EXPECT_LE(max_diff, 0);
}
TEST_F(LibYUVPlanarTest, ARGBMultiply_Invert) {
int max_diff =
TestMultiply(benchmark_width_, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
EXPECT_LE(max_diff, 1);
EXPECT_LE(max_diff, 0);
}
TEST_F(LibYUVPlanarTest, ARGBMultiply_Opt) {
int max_diff =
TestMultiply(benchmark_width_, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
EXPECT_LE(max_diff, 1);
EXPECT_LE(max_diff, 0);
}
static int TestAdd(int width,