From b3fd3f3f3b90bd9f2250631c8a8d635e3d881db0 Mon Sep 17 00:00:00 2001 From: Frank Barchard Date: Mon, 3 Feb 2025 13:16:38 -0800 Subject: [PATCH] Fix ARGBToUV444Row_NEON - constants passed in are signed and need to be negated to positive. Bug: 394127527 Change-Id: I531e475d2ddd4583922d4abef13b9282d002dd7a Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/6226854 Reviewed-by: Ben Weiss --- include/libyuv/row.h | 1 + source/row_neon.cc | 12 ++++++++---- source/row_neon64.cc | 12 ++++++++---- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/include/libyuv/row.h b/include/libyuv/row.h index 9ee8af68f..d8fe2137d 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -398,6 +398,7 @@ extern "C" { #define HAS_ARGBTOUVJ444ROW_NEON #define HAS_ARGBTOUVJROW_NEON #define HAS_ARGBTOUVROW_NEON +// TODO: Fix ARGBTOYROW and test ARGBToI444 tests pass. #define HAS_ARGBTOYJROW_NEON #define HAS_ARGBTOYROW_NEON #define HAS_AYUVTOUVROW_NEON diff --git a/source/row_neon.cc b/source/row_neon.cc index d5e36cdef..49d7584dc 100644 --- a/source/row_neon.cc +++ b/source/row_neon.cc @@ -1841,18 +1841,22 @@ static void ARGBToUV444MatrixRow_NEON( "vdup.u8 d26, d0[2] \n" // UR -0.2969 coefficient "vdup.u8 d27, d0[4] \n" // VB -0.1406 coefficient "vdup.u8 d28, d0[5] \n" // VG -0.7344 coefficient + "vneg.s8 d25, d25 \n" + "vneg.s8 d26, d26 \n" + "vneg.s8 d27, d27 \n" + "vneg.s8 d28, d28 \n" "vmov.u16 q15, #0x8080 \n" // 128.5 "1: \n" "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels. "subs %3, %3, #8 \n" // 8 processed per loop. "vmull.u8 q2, d0, d24 \n" // B - "vmlal.u8 q2, d1, d25 \n" // G - "vmlal.u8 q2, d2, d26 \n" // R + "vmlsl.u8 q2, d1, d25 \n" // G + "vmlsl.u8 q2, d2, d26 \n" // R "vmull.u8 q3, d2, d24 \n" // R - "vmlal.u8 q3, d1, d28 \n" // G - "vmlal.u8 q3, d0, d27 \n" // B + "vmlsl.u8 q3, d1, d28 \n" // G + "vmlsl.u8 q3, d0, d27 \n" // B "vaddhn.u16 d0, q2, q15 \n" // +128 -> unsigned "vaddhn.u16 d1, q3, q15 \n" // +128 -> unsigned diff --git a/source/row_neon64.cc b/source/row_neon64.cc index 8ef9d4975..c30ef680c 100644 --- a/source/row_neon64.cc +++ b/source/row_neon64.cc @@ -2729,19 +2729,23 @@ static void ARGBToUV444MatrixRow_NEON( "dup v26.16b, v0.b[2] \n" // UR -0.2969 coefficient "dup v27.16b, v0.b[4] \n" // VB -0.1406 coefficient "dup v28.16b, v0.b[5] \n" // VG -0.7344 coefficient + "neg v25.16b, v25.16b \n" + "neg v26.16b, v26.16b \n" + "neg v27.16b, v27.16b \n" + "neg v28.16b, v28.16b \n" "movi v29.16b, #0x80 \n" // 128.5 "1: \n" "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB "subs %w3, %w3, #8 \n" // 8 processed per loop. "umull v4.8h, v0.8b, v24.8b \n" // B - "umlal v4.8h, v1.8b, v25.8b \n" // G - "umlal v4.8h, v2.8b, v26.8b \n" // R + "umlsl v4.8h, v1.8b, v25.8b \n" // G + "umlsl v4.8h, v2.8b, v26.8b \n" // R "prfm pldl1keep, [%0, 448] \n" "umull v3.8h, v2.8b, v24.8b \n" // R - "umlal v3.8h, v1.8b, v28.8b \n" // G - "umlal v3.8h, v0.8b, v27.8b \n" // B + "umlsl v3.8h, v1.8b, v28.8b \n" // G + "umlsl v3.8h, v0.8b, v27.8b \n" // B "addhn v0.8b, v4.8h, v29.8h \n" // +128 -> unsigned "addhn v1.8b, v3.8h, v29.8h \n" // +128 -> unsigned