mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
Change ARGBMultiplyRow_C to match Neon
The existing behaviour does not round correctly in all cases, so adjust it to match the existing Neon implementation. Update the tests to require bit-exactness and disable other implementations that do not round correctly. Change-Id: Ie790fb4b4805b555d74d689d83802e1dd4f33df5 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/5869115 Reviewed-by: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
parent
a37e6bc81b
commit
02c6e8baca
@ -106,7 +106,8 @@ extern "C" {
|
|||||||
#define HAS_ARGBGRAYROW_SSSE3
|
#define HAS_ARGBGRAYROW_SSSE3
|
||||||
#define HAS_ARGBLUMACOLORTABLEROW_SSSE3
|
#define HAS_ARGBLUMACOLORTABLEROW_SSSE3
|
||||||
#define HAS_ARGBMIRRORROW_SSE2
|
#define HAS_ARGBMIRRORROW_SSE2
|
||||||
#define HAS_ARGBMULTIPLYROW_SSE2
|
// TODO: Re-enable once rounding behaviour is fixed.
|
||||||
|
// #define HAS_ARGBMULTIPLYROW_SSE2
|
||||||
#define HAS_ARGBPOLYNOMIALROW_SSE2
|
#define HAS_ARGBPOLYNOMIALROW_SSE2
|
||||||
#define HAS_ARGBQUANTIZEROW_SSE2
|
#define HAS_ARGBQUANTIZEROW_SSE2
|
||||||
#define HAS_ARGBSEPIAROW_SSSE3
|
#define HAS_ARGBSEPIAROW_SSSE3
|
||||||
@ -186,7 +187,8 @@ extern "C" {
|
|||||||
|
|
||||||
// Effects:
|
// Effects:
|
||||||
#define HAS_ARGBADDROW_AVX2
|
#define HAS_ARGBADDROW_AVX2
|
||||||
#define HAS_ARGBMULTIPLYROW_AVX2
|
// TODO: Re-enable once rounding behaviour is fixed.
|
||||||
|
// #define HAS_ARGBMULTIPLYROW_AVX2
|
||||||
#define HAS_ARGBSUBTRACTROW_AVX2
|
#define HAS_ARGBSUBTRACTROW_AVX2
|
||||||
#define HAS_BLENDPLANEROW_AVX2
|
#define HAS_BLENDPLANEROW_AVX2
|
||||||
|
|
||||||
@ -585,7 +587,8 @@ extern "C" {
|
|||||||
#define HAS_ARGBEXTRACTALPHAROW_MSA
|
#define HAS_ARGBEXTRACTALPHAROW_MSA
|
||||||
#define HAS_ARGBGRAYROW_MSA
|
#define HAS_ARGBGRAYROW_MSA
|
||||||
#define HAS_ARGBMIRRORROW_MSA
|
#define HAS_ARGBMIRRORROW_MSA
|
||||||
#define HAS_ARGBMULTIPLYROW_MSA
|
// TODO: Re-enable once rounding behaviour is fixed.
|
||||||
|
// #define HAS_ARGBMULTIPLYROW_MSA
|
||||||
#define HAS_ARGBQUANTIZEROW_MSA
|
#define HAS_ARGBQUANTIZEROW_MSA
|
||||||
#define HAS_ARGBSEPIAROW_MSA
|
#define HAS_ARGBSEPIAROW_MSA
|
||||||
#define HAS_ARGBSETROW_MSA
|
#define HAS_ARGBSETROW_MSA
|
||||||
@ -684,7 +687,8 @@ extern "C" {
|
|||||||
#define HAS_ARGBTOUVROW_LSX
|
#define HAS_ARGBTOUVROW_LSX
|
||||||
#define HAS_ARGBTOYJROW_LSX
|
#define HAS_ARGBTOYJROW_LSX
|
||||||
#define HAS_ARGBMIRRORROW_LSX
|
#define HAS_ARGBMIRRORROW_LSX
|
||||||
#define HAS_ARGBMULTIPLYROW_LSX
|
// TODO: Re-enable once rounding behaviour is fixed.
|
||||||
|
// #define HAS_ARGBMULTIPLYROW_LSX
|
||||||
#define HAS_BGRATOUVROW_LSX
|
#define HAS_BGRATOUVROW_LSX
|
||||||
#define HAS_BGRATOYROW_LSX
|
#define HAS_BGRATOYROW_LSX
|
||||||
#define HAS_I400TOARGBROW_LSX
|
#define HAS_I400TOARGBROW_LSX
|
||||||
@ -751,7 +755,8 @@ extern "C" {
|
|||||||
#define HAS_ARGBATTENUATEROW_LASX
|
#define HAS_ARGBATTENUATEROW_LASX
|
||||||
#define HAS_ARGBGRAYROW_LASX
|
#define HAS_ARGBGRAYROW_LASX
|
||||||
#define HAS_ARGBMIRRORROW_LASX
|
#define HAS_ARGBMIRRORROW_LASX
|
||||||
#define HAS_ARGBMULTIPLYROW_LASX
|
// TODO: Re-enable once rounding behaviour is fixed.
|
||||||
|
// #define HAS_ARGBMULTIPLYROW_LASX
|
||||||
#define HAS_ARGBSEPIAROW_LASX
|
#define HAS_ARGBSEPIAROW_LASX
|
||||||
#define HAS_ARGBSHADEROW_LASX
|
#define HAS_ARGBSHADEROW_LASX
|
||||||
#define HAS_ARGBSHUFFLEROW_LASX
|
#define HAS_ARGBSHUFFLEROW_LASX
|
||||||
|
|||||||
@ -1375,34 +1375,29 @@ void ARGBShadeRow_C(const uint8_t* src_argb,
|
|||||||
#undef REPEAT8
|
#undef REPEAT8
|
||||||
#undef SHADE
|
#undef SHADE
|
||||||
|
|
||||||
#define REPEAT8(v) (v) | ((v) << 8)
|
|
||||||
#define SHADE(f, v) v* f >> 16
|
|
||||||
|
|
||||||
void ARGBMultiplyRow_C(const uint8_t* src_argb,
|
void ARGBMultiplyRow_C(const uint8_t* src_argb,
|
||||||
const uint8_t* src_argb1,
|
const uint8_t* src_argb1,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width) {
|
int width) {
|
||||||
int i;
|
int i;
|
||||||
for (i = 0; i < width; ++i) {
|
for (i = 0; i < width; ++i) {
|
||||||
const uint32_t b = REPEAT8(src_argb[0]);
|
const uint32_t b = src_argb[0];
|
||||||
const uint32_t g = REPEAT8(src_argb[1]);
|
const uint32_t g = src_argb[1];
|
||||||
const uint32_t r = REPEAT8(src_argb[2]);
|
const uint32_t r = src_argb[2];
|
||||||
const uint32_t a = REPEAT8(src_argb[3]);
|
const uint32_t a = src_argb[3];
|
||||||
const uint32_t b_scale = src_argb1[0];
|
const uint32_t b_scale = src_argb1[0];
|
||||||
const uint32_t g_scale = src_argb1[1];
|
const uint32_t g_scale = src_argb1[1];
|
||||||
const uint32_t r_scale = src_argb1[2];
|
const uint32_t r_scale = src_argb1[2];
|
||||||
const uint32_t a_scale = src_argb1[3];
|
const uint32_t a_scale = src_argb1[3];
|
||||||
dst_argb[0] = STATIC_CAST(uint8_t, SHADE(b, b_scale));
|
dst_argb[0] = STATIC_CAST(uint8_t, (b * b_scale + 128) >> 8);
|
||||||
dst_argb[1] = STATIC_CAST(uint8_t, SHADE(g, g_scale));
|
dst_argb[1] = STATIC_CAST(uint8_t, (g * g_scale + 128) >> 8);
|
||||||
dst_argb[2] = STATIC_CAST(uint8_t, SHADE(r, r_scale));
|
dst_argb[2] = STATIC_CAST(uint8_t, (r * r_scale + 128) >> 8);
|
||||||
dst_argb[3] = STATIC_CAST(uint8_t, SHADE(a, a_scale));
|
dst_argb[3] = STATIC_CAST(uint8_t, (a * a_scale + 128) >> 8);
|
||||||
src_argb += 4;
|
src_argb += 4;
|
||||||
src_argb1 += 4;
|
src_argb1 += 4;
|
||||||
dst_argb += 4;
|
dst_argb += 4;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#undef REPEAT8
|
|
||||||
#undef SHADE
|
|
||||||
|
|
||||||
#define SHADE(f, v) clamp255(v + f)
|
#define SHADE(f, v) clamp255(v + f)
|
||||||
|
|
||||||
|
|||||||
@ -1906,28 +1906,28 @@ TEST_F(LibYUVPlanarTest, ARGBMultiply_Any) {
|
|||||||
int max_diff = TestMultiply(benchmark_width_ + 1, benchmark_height_,
|
int max_diff = TestMultiply(benchmark_width_ + 1, benchmark_height_,
|
||||||
benchmark_iterations_, disable_cpu_flags_,
|
benchmark_iterations_, disable_cpu_flags_,
|
||||||
benchmark_cpu_info_, +1, 0);
|
benchmark_cpu_info_, +1, 0);
|
||||||
EXPECT_LE(max_diff, 1);
|
EXPECT_LE(max_diff, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(LibYUVPlanarTest, ARGBMultiply_Unaligned) {
|
TEST_F(LibYUVPlanarTest, ARGBMultiply_Unaligned) {
|
||||||
int max_diff =
|
int max_diff =
|
||||||
TestMultiply(benchmark_width_, benchmark_height_, benchmark_iterations_,
|
TestMultiply(benchmark_width_, benchmark_height_, benchmark_iterations_,
|
||||||
disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
|
disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
|
||||||
EXPECT_LE(max_diff, 1);
|
EXPECT_LE(max_diff, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(LibYUVPlanarTest, ARGBMultiply_Invert) {
|
TEST_F(LibYUVPlanarTest, ARGBMultiply_Invert) {
|
||||||
int max_diff =
|
int max_diff =
|
||||||
TestMultiply(benchmark_width_, benchmark_height_, benchmark_iterations_,
|
TestMultiply(benchmark_width_, benchmark_height_, benchmark_iterations_,
|
||||||
disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
|
disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
|
||||||
EXPECT_LE(max_diff, 1);
|
EXPECT_LE(max_diff, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(LibYUVPlanarTest, ARGBMultiply_Opt) {
|
TEST_F(LibYUVPlanarTest, ARGBMultiply_Opt) {
|
||||||
int max_diff =
|
int max_diff =
|
||||||
TestMultiply(benchmark_width_, benchmark_height_, benchmark_iterations_,
|
TestMultiply(benchmark_width_, benchmark_height_, benchmark_iterations_,
|
||||||
disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
|
disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
|
||||||
EXPECT_LE(max_diff, 1);
|
EXPECT_LE(max_diff, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int TestAdd(int width,
|
static int TestAdd(int width,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user