Change ARGBMultiplyRow_C to match Neon

The existing behaviour does not round correctly in all cases, so adjust
it to match the existing Neon implementation.

Update the tests to require bit-exactness and disable other
implementations that do not round correctly.

Change-Id: Ie790fb4b4805b555d74d689d83802e1dd4f33df5
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/5869115
Reviewed-by: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
George Steed 2024-09-17 11:39:37 +01:00 committed by Frank Barchard
parent a37e6bc81b
commit 02c6e8baca
3 changed files with 22 additions and 22 deletions

View File

@ -106,7 +106,8 @@ extern "C" {
#define HAS_ARGBGRAYROW_SSSE3 #define HAS_ARGBGRAYROW_SSSE3
#define HAS_ARGBLUMACOLORTABLEROW_SSSE3 #define HAS_ARGBLUMACOLORTABLEROW_SSSE3
#define HAS_ARGBMIRRORROW_SSE2 #define HAS_ARGBMIRRORROW_SSE2
#define HAS_ARGBMULTIPLYROW_SSE2 // TODO: Re-enable once rounding behaviour is fixed.
// #define HAS_ARGBMULTIPLYROW_SSE2
#define HAS_ARGBPOLYNOMIALROW_SSE2 #define HAS_ARGBPOLYNOMIALROW_SSE2
#define HAS_ARGBQUANTIZEROW_SSE2 #define HAS_ARGBQUANTIZEROW_SSE2
#define HAS_ARGBSEPIAROW_SSSE3 #define HAS_ARGBSEPIAROW_SSSE3
@ -186,7 +187,8 @@ extern "C" {
// Effects: // Effects:
#define HAS_ARGBADDROW_AVX2 #define HAS_ARGBADDROW_AVX2
#define HAS_ARGBMULTIPLYROW_AVX2 // TODO: Re-enable once rounding behaviour is fixed.
// #define HAS_ARGBMULTIPLYROW_AVX2
#define HAS_ARGBSUBTRACTROW_AVX2 #define HAS_ARGBSUBTRACTROW_AVX2
#define HAS_BLENDPLANEROW_AVX2 #define HAS_BLENDPLANEROW_AVX2
@ -585,7 +587,8 @@ extern "C" {
#define HAS_ARGBEXTRACTALPHAROW_MSA #define HAS_ARGBEXTRACTALPHAROW_MSA
#define HAS_ARGBGRAYROW_MSA #define HAS_ARGBGRAYROW_MSA
#define HAS_ARGBMIRRORROW_MSA #define HAS_ARGBMIRRORROW_MSA
#define HAS_ARGBMULTIPLYROW_MSA // TODO: Re-enable once rounding behaviour is fixed.
// #define HAS_ARGBMULTIPLYROW_MSA
#define HAS_ARGBQUANTIZEROW_MSA #define HAS_ARGBQUANTIZEROW_MSA
#define HAS_ARGBSEPIAROW_MSA #define HAS_ARGBSEPIAROW_MSA
#define HAS_ARGBSETROW_MSA #define HAS_ARGBSETROW_MSA
@ -684,7 +687,8 @@ extern "C" {
#define HAS_ARGBTOUVROW_LSX #define HAS_ARGBTOUVROW_LSX
#define HAS_ARGBTOYJROW_LSX #define HAS_ARGBTOYJROW_LSX
#define HAS_ARGBMIRRORROW_LSX #define HAS_ARGBMIRRORROW_LSX
#define HAS_ARGBMULTIPLYROW_LSX // TODO: Re-enable once rounding behaviour is fixed.
// #define HAS_ARGBMULTIPLYROW_LSX
#define HAS_BGRATOUVROW_LSX #define HAS_BGRATOUVROW_LSX
#define HAS_BGRATOYROW_LSX #define HAS_BGRATOYROW_LSX
#define HAS_I400TOARGBROW_LSX #define HAS_I400TOARGBROW_LSX
@ -751,7 +755,8 @@ extern "C" {
#define HAS_ARGBATTENUATEROW_LASX #define HAS_ARGBATTENUATEROW_LASX
#define HAS_ARGBGRAYROW_LASX #define HAS_ARGBGRAYROW_LASX
#define HAS_ARGBMIRRORROW_LASX #define HAS_ARGBMIRRORROW_LASX
#define HAS_ARGBMULTIPLYROW_LASX // TODO: Re-enable once rounding behaviour is fixed.
// #define HAS_ARGBMULTIPLYROW_LASX
#define HAS_ARGBSEPIAROW_LASX #define HAS_ARGBSEPIAROW_LASX
#define HAS_ARGBSHADEROW_LASX #define HAS_ARGBSHADEROW_LASX
#define HAS_ARGBSHUFFLEROW_LASX #define HAS_ARGBSHUFFLEROW_LASX

View File

@ -1375,34 +1375,29 @@ void ARGBShadeRow_C(const uint8_t* src_argb,
#undef REPEAT8 #undef REPEAT8
#undef SHADE #undef SHADE
#define REPEAT8(v) (v) | ((v) << 8)
#define SHADE(f, v) v* f >> 16
void ARGBMultiplyRow_C(const uint8_t* src_argb, void ARGBMultiplyRow_C(const uint8_t* src_argb,
const uint8_t* src_argb1, const uint8_t* src_argb1,
uint8_t* dst_argb, uint8_t* dst_argb,
int width) { int width) {
int i; int i;
for (i = 0; i < width; ++i) { for (i = 0; i < width; ++i) {
const uint32_t b = REPEAT8(src_argb[0]); const uint32_t b = src_argb[0];
const uint32_t g = REPEAT8(src_argb[1]); const uint32_t g = src_argb[1];
const uint32_t r = REPEAT8(src_argb[2]); const uint32_t r = src_argb[2];
const uint32_t a = REPEAT8(src_argb[3]); const uint32_t a = src_argb[3];
const uint32_t b_scale = src_argb1[0]; const uint32_t b_scale = src_argb1[0];
const uint32_t g_scale = src_argb1[1]; const uint32_t g_scale = src_argb1[1];
const uint32_t r_scale = src_argb1[2]; const uint32_t r_scale = src_argb1[2];
const uint32_t a_scale = src_argb1[3]; const uint32_t a_scale = src_argb1[3];
dst_argb[0] = STATIC_CAST(uint8_t, SHADE(b, b_scale)); dst_argb[0] = STATIC_CAST(uint8_t, (b * b_scale + 128) >> 8);
dst_argb[1] = STATIC_CAST(uint8_t, SHADE(g, g_scale)); dst_argb[1] = STATIC_CAST(uint8_t, (g * g_scale + 128) >> 8);
dst_argb[2] = STATIC_CAST(uint8_t, SHADE(r, r_scale)); dst_argb[2] = STATIC_CAST(uint8_t, (r * r_scale + 128) >> 8);
dst_argb[3] = STATIC_CAST(uint8_t, SHADE(a, a_scale)); dst_argb[3] = STATIC_CAST(uint8_t, (a * a_scale + 128) >> 8);
src_argb += 4; src_argb += 4;
src_argb1 += 4; src_argb1 += 4;
dst_argb += 4; dst_argb += 4;
} }
} }
#undef REPEAT8
#undef SHADE
#define SHADE(f, v) clamp255(v + f) #define SHADE(f, v) clamp255(v + f)

View File

@ -1906,28 +1906,28 @@ TEST_F(LibYUVPlanarTest, ARGBMultiply_Any) {
int max_diff = TestMultiply(benchmark_width_ + 1, benchmark_height_, int max_diff = TestMultiply(benchmark_width_ + 1, benchmark_height_,
benchmark_iterations_, disable_cpu_flags_, benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_, +1, 0); benchmark_cpu_info_, +1, 0);
EXPECT_LE(max_diff, 1); EXPECT_LE(max_diff, 0);
} }
TEST_F(LibYUVPlanarTest, ARGBMultiply_Unaligned) { TEST_F(LibYUVPlanarTest, ARGBMultiply_Unaligned) {
int max_diff = int max_diff =
TestMultiply(benchmark_width_, benchmark_height_, benchmark_iterations_, TestMultiply(benchmark_width_, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, +1, 1); disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
EXPECT_LE(max_diff, 1); EXPECT_LE(max_diff, 0);
} }
TEST_F(LibYUVPlanarTest, ARGBMultiply_Invert) { TEST_F(LibYUVPlanarTest, ARGBMultiply_Invert) {
int max_diff = int max_diff =
TestMultiply(benchmark_width_, benchmark_height_, benchmark_iterations_, TestMultiply(benchmark_width_, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, -1, 0); disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
EXPECT_LE(max_diff, 1); EXPECT_LE(max_diff, 0);
} }
TEST_F(LibYUVPlanarTest, ARGBMultiply_Opt) { TEST_F(LibYUVPlanarTest, ARGBMultiply_Opt) {
int max_diff = int max_diff =
TestMultiply(benchmark_width_, benchmark_height_, benchmark_iterations_, TestMultiply(benchmark_width_, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, +1, 0); disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
EXPECT_LE(max_diff, 1); EXPECT_LE(max_diff, 0);
} }
static int TestAdd(int width, static int TestAdd(int width,