[AArch64] Add "limit" variable explanations in SVE *AR30 kernels

As requested here: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/6023583/1/source/row_sve.cc#1973

Change-Id: I15d8ca1f724a7123fbf52ac60b18c850e4004e64
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/6067153
Reviewed-by: Justin Green <greenjustin@google.com>
Reviewed-by: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
George Steed 2024-11-29 15:25:48 +00:00 committed by Frank Barchard
parent 11ef227b6d
commit 3e75e41e79

View File

@ -301,7 +301,10 @@ extern "C" {
"z20", "z22", "z23", "z24", "z25", "z26", "z27", "z28", "z29", "z30", \
"z31", "p0", "p1", "p2", "p3"
// Store AR30 elements
// Store AR30 elements. Inputs are 2.14 fixed point RGB. We expect z23 to be
// populated with 0x3ff0 (0x3fff would also work) to saturate the R input
// rather than needing a pair of shifts to saturate and then insert into the
// correct position in the lane.
#define STOREAR30_SVE \
"uqshl z16.h, p0/m, z16.h, #2 \n" /* bbbbbbbbbbxxxxxx */ \
"uqshl z17.h, p0/m, z17.h, #2 \n" /* ggggggggggxxxxxx */ \
@ -2196,6 +2199,7 @@ void I210ToAR30Row_SVE2(const uint16_t* src_y,
uint64_t vl;
asm("cnth %0" : "=r"(vl));
int width_last_y = width & (vl - 1);
// The limit is used for saturating the 2.14 red channel in STOREAR30_SVE.
uint16_t limit = 0x3ff0;
asm volatile(
"ptrue p0.b \n" //
@ -2301,6 +2305,7 @@ void P210ToAR30Row_SVE2(const uint16_t* src_y,
int width_last_uv = width_last_y + (width_last_y & 1);
uint32_t nv_uv_start = 0x03010301U;
uint32_t nv_uv_step = 0x04040404U;
// The limit is used for saturating the 2.14 red channel in STOREAR30_SVE.
uint16_t limit = 0x3ff0;
asm volatile(
"ptrue p0.b \n" //
@ -2458,6 +2463,7 @@ void I410ToAR30Row_SVE2(const uint16_t* src_y,
uint64_t vl;
asm("cnth %0" : "=r"(vl));
int width_last_y = width & (vl - 1);
// The limit is used for saturating the 2.14 red channel in STOREAR30_SVE.
uint16_t limit = 0x3ff0;
asm volatile(
"ptrue p0.b \n" //
@ -2555,6 +2561,7 @@ void P410ToAR30Row_SVE2(const uint16_t* src_y,
uint64_t vl;
asm("cnth %0" : "=r"(vl));
int width_last_y = width & (vl - 1);
// The limit is used for saturating the 2.14 red channel in STOREAR30_SVE.
uint16_t limit = 0x3ff0;
asm volatile(
"ptrue p0.b \n" //
@ -2607,6 +2614,7 @@ void I212ToAR30Row_SVE2(const uint16_t* src_y,
uint64_t vl;
asm("cnth %0" : "=r"(vl));
int width_last_y = width & (vl - 1);
// The limit is used for saturating the 2.14 red channel in STOREAR30_SVE.
uint16_t limit = 0x3ff0;
asm volatile(
"ptrue p0.b \n" //