mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
[AArch64] Replace UQXTN{,2} with UZP2 in Convert16To8Row_NEON
The existing code makes use of a pair of shifts to put the bits we want in the low part of each vector lane and then a pair of UQXTN and UQXTN2 instructions to perform a saturating cast down from 16-bit elements to 8-bit elements. We can instead achieve the same thing by adding eight to the first shift amount so that the bits we want appear in the high half of the lane, doing the saturation at the same time, and then simply use UZP2 to pull out the high halves of each lane in a single instruction. Reduction in runtime for Convert16To8Row_NEON: Cortex-A55: -19.7% Cortex-A510: -23.5% Cortex-A76: -35.4% Cortex-X2: -34.1% Bug: libyuv:976 Change-Id: I9a80c0f4f2c6b5203f23e422c0970d3167052f91 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/5463950 Reviewed-by: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
parent
4f52235a67
commit
356232b687
@ -4701,16 +4701,18 @@ void Convert16To8Row_NEON(const uint16_t* src_y,
|
||||
uint8_t* dst_y,
|
||||
int scale,
|
||||
int width) {
|
||||
int shift = 15 - __builtin_clz((int32_t)scale); // Negative shl is shr
|
||||
// 15 - clz(scale), + 8 to shift result into the high half of the lane to
|
||||
// saturate, then we can just use UZP2 to narrow rather than a pair of
|
||||
// saturating narrow instructions.
|
||||
int shift = 23 - __builtin_clz((int32_t)scale);
|
||||
asm volatile(
|
||||
"dup v2.8h, %w3 \n"
|
||||
"1: \n"
|
||||
"ldp q0, q1, [%0], #32 \n"
|
||||
"ushl v0.8h, v0.8h, v2.8h \n" // shr = v2 is negative
|
||||
"ushl v1.8h, v1.8h, v2.8h \n"
|
||||
"uqshl v0.8h, v0.8h, v2.8h \n"
|
||||
"uqshl v1.8h, v1.8h, v2.8h \n"
|
||||
"prfm pldl1keep, [%0, 448] \n"
|
||||
"uqxtn v0.8b, v0.8h \n"
|
||||
"uqxtn2 v0.16b, v1.8h \n"
|
||||
"uzp2 v0.16b, v0.16b, v1.16b \n"
|
||||
"subs %w2, %w2, #16 \n" // 16 src pixels per loop
|
||||
"str q0, [%1], #16 \n" // store 16 pixels
|
||||
"b.gt 1b \n"
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user