clang 17 -flto-thin bug fix for Neon YUVtoRGB and ARGBToRGB565Dither

- YUV to RGB AArch32 kRGBCoeffBias rewind pointer
- ARGBToRGB565Dither declare width and source pointers as modified


Bug: chromium:1424089
Change-Id: I987180652331bab16ce27d8d166399a687ee890e
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/4370099
Reviewed-by: Mirko Bonadei <mbonadei@chromium.org>
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
Frank Barchard 2023-03-24 02:18:55 -07:00 committed by libyuv LUCI CQ
parent 3f219a3501
commit 1a971f8cc3
2 changed files with 23 additions and 22 deletions

View File

@ -91,10 +91,11 @@ extern "C" {
#define YUVTORGB_SETUP \
"vld4.8 {d26[], d27[], d28[], d29[]}, [%[kUVCoeff]] \n" \
"vld1.16 {d31[]}, [%[kRGBCoeffBias]]! \n" \
"vld1.16 {d20[], d21[]}, [%[kRGBCoeffBias]]! \n" \
"vld1.16 {d22[], d23[]}, [%[kRGBCoeffBias]]! \n" \
"vld1.16 {d24[], d25[]}, [%[kRGBCoeffBias]] \n"
"vld1.16 {d31[]}, [%[kRGBCoeffBias]]! \n" \
"vld1.16 {d20[], d21[]}, [%[kRGBCoeffBias]]! \n" \
"vld1.16 {d22[], d23[]}, [%[kRGBCoeffBias]]! \n" \
"vld1.16 {d24[], d25[]}, [%[kRGBCoeffBias]] \n" \
"sub %[kRGBCoeffBias], %[kRGBCoeffBias], #10 \n"
// q0: B uint16x8_t
// q1: G uint16x8_t
@ -1754,20 +1755,20 @@ void ARGBToRGB565DitherRow_NEON(const uint8_t* src_argb,
const uint32_t dither4,
int width) {
asm volatile(
"vdup.32 d7, %2 \n" // dither4
"vdup.32 d7, %3 \n" // dither4
"1: \n"
"vld4.8 {d0, d2, d4, d6}, [%1]! \n" // load 8 pixels of ARGB.
"subs %3, %3, #8 \n" // 8 processed per loop.
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 pixels of ARGB.
"subs %2, %2, #8 \n" // 8 processed per loop.
"vqadd.u8 d0, d0, d7 \n"
"vqadd.u8 d2, d2, d7 \n"
"vqadd.u8 d4, d4, d7 \n" // add for dither
ARGBTORGB565
"vst1.8 {q2}, [%0]! \n" // store 8 RGB565.
"vst1.8 {q2}, [%1]! \n" // store 8 RGB565.
"bgt 1b \n"
: "+r"(dst_rgb) // %0
: "r"(src_argb), // %1
"r"(dither4), // %2
"r"(width) // %3
: "+r"(src_argb), // %0
"+r"(dst_rgb), // %1
"+r"(width) // %2
: "r"(dither4) // %3
: "cc", "memory", "q0", "q1", "q2", "q3");
}

View File

@ -1982,21 +1982,21 @@ void ARGBToRGB565DitherRow_NEON(const uint8_t* src_argb,
const uint32_t dither4,
int width) {
asm volatile(
"dup v1.4s, %w2 \n" // dither4
"dup v1.4s, %w3 \n" // dither4
"1: \n"
"ld4 {v16.8b,v17.8b,v18.8b,v19.8b}, [%1], #32 \n" // load 8
// pixels
"subs %w3, %w3, #8 \n" // 8 processed per loop.
"ld4 {v16.8b,v17.8b,v18.8b,v19.8b}, [%0], #32 \n" // load 8 ARGB
"subs %w2, %w2, #8 \n" // 8 processed per loop.
"uqadd v16.8b, v16.8b, v1.8b \n"
"prfm pldl1keep, [%0, 448] \n"
"uqadd v17.8b, v17.8b, v1.8b \n"
"uqadd v18.8b, v18.8b, v1.8b \n" ARGBTORGB565
"st1 {v18.16b}, [%0], #16 \n" // store 8 pixels RGB565.
"uqadd v18.8b, v18.8b, v1.8b \n"
ARGBTORGB565
"st1 {v18.16b}, [%1], #16 \n" // store 8 pixels RGB565.
"b.gt 1b \n"
: "+r"(dst_rgb) // %0
: "r"(src_argb), // %1
"r"(dither4), // %2
"r"(width) // %3
: "+r"(src_argb), // %0
"+r"(dst_rgb), // %1
"+r"(width) // %2
: "r"(dither4) // %3
: "cc", "memory", "v1", "v16", "v17", "v18", "v19");
}