mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-08 01:36:47 +08:00
[AArch64] Avoid unnecessary work in READYUV400
The value of UV components in the vector are known and the vectors are never overwritten, so we can hoist the UV-specific parts of the calculation out of the loop. Reduction in runtimes for I400ToARGBRow_NEON: Cortex-A55: -10.0% Cortex-A510: -3.7% Cortex-A76: -19.3% Cortex-X2: -14.4% Bug: libyuv:976 Change-Id: I17d6de4e1790f71407e12ff84548568cc3ebbe1a Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/5457434 Reviewed-by: Frank Barchard <fbarchard@chromium.org> Commit-Queue: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
parent
ea56460300
commit
b265c311b7
@ -46,10 +46,9 @@ extern "C" {
|
||||
"zip1 v0.16b, v0.16b, v0.16b \n" \
|
||||
"prfm pldl1keep, [%[src_v], 448] \n"
|
||||
|
||||
// Read 8 Y, and set 4 U and 4 V to 128
|
||||
// Read 8 Y
|
||||
#define READYUV400 \
|
||||
"ldr d0, [%[src_y]], #8 \n" \
|
||||
"movi v1.16b, #128 \n" \
|
||||
"prfm pldl1keep, [%[src_y], 448] \n" \
|
||||
"zip1 v0.16b, v0.16b, v0.16b \n"
|
||||
|
||||
@ -111,6 +110,18 @@ static const uvec8 kNV21InterleavedTable = {1, 1, 5, 5, 9, 9, 13, 13,
|
||||
"uqsub v16.8h, v16.8h, v25.8h \n" /* B */ \
|
||||
"uqsub v18.8h, v18.8h, v27.8h \n" /* R */
|
||||
|
||||
// Convert from YUV I400 to 2.14 fixed point RGB
|
||||
#define I400TORGB \
|
||||
"umull2 v3.4s, v0.8h, v24.8h \n" \
|
||||
"umull v0.4s, v0.4h, v24.4h \n" \
|
||||
"uzp2 v0.8h, v0.8h, v3.8h \n" /* Y */ \
|
||||
"add v17.8h, v0.8h, v26.8h \n" /* G */ \
|
||||
"add v16.8h, v0.8h, v4.8h \n" /* B */ \
|
||||
"add v18.8h, v0.8h, v5.8h \n" /* R */ \
|
||||
"uqsub v17.8h, v17.8h, v6.8h \n" /* G */ \
|
||||
"uqsub v16.8h, v16.8h, v25.8h \n" /* B */ \
|
||||
"uqsub v18.8h, v18.8h, v27.8h \n" /* R */
|
||||
|
||||
// Convert from 2.14 fixed point RGB To 8 bit RGB
|
||||
#define RGBTORGB8 \
|
||||
"uqshrn v17.8b, v17.8h, #6 \n" \
|
||||
@ -398,8 +409,13 @@ void I400ToARGBRow_NEON(const uint8_t* src_y,
|
||||
int width) {
|
||||
asm volatile(
|
||||
YUVTORGB_SETUP
|
||||
"movi v1.16b, #128 \n"
|
||||
"movi v19.8b, #255 \n"
|
||||
"1: \n" READYUV400 YUVTORGB
|
||||
"umull v6.8h, v1.8b, v30.8b \n"
|
||||
"umlal2 v6.8h, v1.16b, v31.16b \n" /* DG */
|
||||
"umull v4.8h, v1.8b, v28.8b \n" /* DB */
|
||||
"umull2 v5.8h, v1.16b, v29.16b \n" /* DR */
|
||||
"1: \n" READYUV400 I400TORGB
|
||||
RGBTORGB8
|
||||
"subs %w[width], %w[width], #8 \n"
|
||||
"st4 {v16.8b,v17.8b,v18.8b,v19.8b}, [%[dst_argb]], #32 \n"
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user