mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
[AArch64] Add "limit" variable explanations in SVE *AR30 kernels
As requested here: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/6023583/1/source/row_sve.cc#1973 Change-Id: I15d8ca1f724a7123fbf52ac60b18c850e4004e64 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/6067153 Reviewed-by: Justin Green <greenjustin@google.com> Reviewed-by: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
parent
11ef227b6d
commit
3e75e41e79
@ -301,7 +301,10 @@ extern "C" {
|
|||||||
"z20", "z22", "z23", "z24", "z25", "z26", "z27", "z28", "z29", "z30", \
|
"z20", "z22", "z23", "z24", "z25", "z26", "z27", "z28", "z29", "z30", \
|
||||||
"z31", "p0", "p1", "p2", "p3"
|
"z31", "p0", "p1", "p2", "p3"
|
||||||
|
|
||||||
// Store AR30 elements
|
// Store AR30 elements. Inputs are 2.14 fixed point RGB. We expect z23 to be
|
||||||
|
// populated with 0x3ff0 (0x3fff would also work) to saturate the R input
|
||||||
|
// rather than needing a pair of shifts to saturate and then insert into the
|
||||||
|
// correct position in the lane.
|
||||||
#define STOREAR30_SVE \
|
#define STOREAR30_SVE \
|
||||||
"uqshl z16.h, p0/m, z16.h, #2 \n" /* bbbbbbbbbbxxxxxx */ \
|
"uqshl z16.h, p0/m, z16.h, #2 \n" /* bbbbbbbbbbxxxxxx */ \
|
||||||
"uqshl z17.h, p0/m, z17.h, #2 \n" /* ggggggggggxxxxxx */ \
|
"uqshl z17.h, p0/m, z17.h, #2 \n" /* ggggggggggxxxxxx */ \
|
||||||
@ -2196,6 +2199,7 @@ void I210ToAR30Row_SVE2(const uint16_t* src_y,
|
|||||||
uint64_t vl;
|
uint64_t vl;
|
||||||
asm("cnth %0" : "=r"(vl));
|
asm("cnth %0" : "=r"(vl));
|
||||||
int width_last_y = width & (vl - 1);
|
int width_last_y = width & (vl - 1);
|
||||||
|
// The limit is used for saturating the 2.14 red channel in STOREAR30_SVE.
|
||||||
uint16_t limit = 0x3ff0;
|
uint16_t limit = 0x3ff0;
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"ptrue p0.b \n" //
|
"ptrue p0.b \n" //
|
||||||
@ -2301,6 +2305,7 @@ void P210ToAR30Row_SVE2(const uint16_t* src_y,
|
|||||||
int width_last_uv = width_last_y + (width_last_y & 1);
|
int width_last_uv = width_last_y + (width_last_y & 1);
|
||||||
uint32_t nv_uv_start = 0x03010301U;
|
uint32_t nv_uv_start = 0x03010301U;
|
||||||
uint32_t nv_uv_step = 0x04040404U;
|
uint32_t nv_uv_step = 0x04040404U;
|
||||||
|
// The limit is used for saturating the 2.14 red channel in STOREAR30_SVE.
|
||||||
uint16_t limit = 0x3ff0;
|
uint16_t limit = 0x3ff0;
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"ptrue p0.b \n" //
|
"ptrue p0.b \n" //
|
||||||
@ -2458,6 +2463,7 @@ void I410ToAR30Row_SVE2(const uint16_t* src_y,
|
|||||||
uint64_t vl;
|
uint64_t vl;
|
||||||
asm("cnth %0" : "=r"(vl));
|
asm("cnth %0" : "=r"(vl));
|
||||||
int width_last_y = width & (vl - 1);
|
int width_last_y = width & (vl - 1);
|
||||||
|
// The limit is used for saturating the 2.14 red channel in STOREAR30_SVE.
|
||||||
uint16_t limit = 0x3ff0;
|
uint16_t limit = 0x3ff0;
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"ptrue p0.b \n" //
|
"ptrue p0.b \n" //
|
||||||
@ -2555,6 +2561,7 @@ void P410ToAR30Row_SVE2(const uint16_t* src_y,
|
|||||||
uint64_t vl;
|
uint64_t vl;
|
||||||
asm("cnth %0" : "=r"(vl));
|
asm("cnth %0" : "=r"(vl));
|
||||||
int width_last_y = width & (vl - 1);
|
int width_last_y = width & (vl - 1);
|
||||||
|
// The limit is used for saturating the 2.14 red channel in STOREAR30_SVE.
|
||||||
uint16_t limit = 0x3ff0;
|
uint16_t limit = 0x3ff0;
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"ptrue p0.b \n" //
|
"ptrue p0.b \n" //
|
||||||
@ -2607,6 +2614,7 @@ void I212ToAR30Row_SVE2(const uint16_t* src_y,
|
|||||||
uint64_t vl;
|
uint64_t vl;
|
||||||
asm("cnth %0" : "=r"(vl));
|
asm("cnth %0" : "=r"(vl));
|
||||||
int width_last_y = width & (vl - 1);
|
int width_last_y = width & (vl - 1);
|
||||||
|
// The limit is used for saturating the 2.14 red channel in STOREAR30_SVE.
|
||||||
uint16_t limit = 0x3ff0;
|
uint16_t limit = 0x3ff0;
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"ptrue p0.b \n" //
|
"ptrue p0.b \n" //
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user