mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 08:46:47 +08:00
[AArch64] Remove unused SVE INDEX instrs from NV{12,21} kernels
When reading subsampled UV data in NV{12,21} we previously needed to
permute the data to both (a) duplicate each element into the
corresponding pair of lanes for the Y elements; and (b) arrange the UV
components in the correct lanes. This was done in a vector-length
agnostic way by generating the permute indices dynamically at runtime
through an SVE INDEX instruction.
Now that we are using the READNV_SVE_2X macro everywhere these
instructions are now redundant: the multiplications are done on the
subsampled UV data before the duplication and the conversion macro takes
arguments that adjust whether we need to operate on the even or odd
lanes of the vector.
Since the permute indices generated by these INDEX instructions are now
unused, remove them.
Change-Id: I3298a83aadfda52c4cc89bc4fd6518b06765a187
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/6089957
Reviewed-by: Justin Green <greenjustin@google.com>
Reviewed-by: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
parent
533dc5866b
commit
cce8950816
@ -791,10 +791,6 @@ static inline void NV12ToARGBRow_SVE_SC(const uint8_t* src_y,
|
||||
uint8_t* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) STREAMING_COMPATIBLE {
|
||||
uint32_t nv_u_start = 0xff00U;
|
||||
uint32_t nv_u_step = 0x0002U;
|
||||
uint32_t nv_v_start = 0xff01U;
|
||||
uint32_t nv_v_step = 0x0002U;
|
||||
uint64_t vl;
|
||||
asm("cntb %0" : "=r"(vl));
|
||||
int width_last_y = width & (vl - 1);
|
||||
@ -803,8 +799,6 @@ static inline void NV12ToARGBRow_SVE_SC(const uint8_t* src_y,
|
||||
"ptrue p0.b \n" //
|
||||
YUVTORGB_SVE_SETUP
|
||||
"dup z19.b, #255 \n" // Alpha
|
||||
"index z7.h, %w[nv_u_start], %w[nv_u_step] \n"
|
||||
"index z23.h, %w[nv_v_start], %w[nv_v_step] \n"
|
||||
"subs %w[width], %w[width], %w[vl] \n"
|
||||
"b.lt 2f \n"
|
||||
|
||||
@ -837,10 +831,6 @@ static inline void NV12ToARGBRow_SVE_SC(const uint8_t* src_y,
|
||||
: [vl] "r"(vl), // %[vl]
|
||||
[kUVCoeff] "r"(&yuvconstants->kUVCoeff), // %[kUVCoeff]
|
||||
[kRGBCoeffBias] "r"(&yuvconstants->kRGBCoeffBias), // %[kRGBCoeffBias]
|
||||
[nv_u_start] "r"(nv_u_start), // %[nv_u_start]
|
||||
[nv_u_step] "r"(nv_u_step), // %[nv_u_step]
|
||||
[nv_v_start] "r"(nv_v_start), // %[nv_v_start]
|
||||
[nv_v_step] "r"(nv_v_step), // %[nv_v_step]
|
||||
[width_last_y] "r"(width_last_y), // %[width_last_y]
|
||||
[width_last_uv] "r"(width_last_uv) // %[width_last_uv]
|
||||
: "cc", "memory", YUVTORGB_SVE_REGS, "p2");
|
||||
@ -851,10 +841,6 @@ static inline void NV21ToARGBRow_SVE_SC(const uint8_t* src_y,
|
||||
uint8_t* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) STREAMING_COMPATIBLE {
|
||||
uint32_t nv_u_start = 0xff01U;
|
||||
uint32_t nv_u_step = 0x0002U;
|
||||
uint32_t nv_v_start = 0xff00U;
|
||||
uint32_t nv_v_step = 0x0002U;
|
||||
uint64_t vl;
|
||||
asm("cntb %0" : "=r"(vl));
|
||||
int width_last_y = width & (vl - 1);
|
||||
@ -863,8 +849,6 @@ static inline void NV21ToARGBRow_SVE_SC(const uint8_t* src_y,
|
||||
"ptrue p0.b \n" //
|
||||
YUVTORGB_SVE_SETUP
|
||||
"dup z19.b, #255 \n" // Alpha
|
||||
"index z7.h, %w[nv_u_start], %w[nv_u_step] \n"
|
||||
"index z23.h, %w[nv_v_start], %w[nv_v_step] \n"
|
||||
"subs %w[width], %w[width], %w[vl] \n"
|
||||
"b.lt 2f \n"
|
||||
|
||||
@ -897,10 +881,6 @@ static inline void NV21ToARGBRow_SVE_SC(const uint8_t* src_y,
|
||||
: [vl] "r"(vl), // %[vl]
|
||||
[kUVCoeff] "r"(&yuvconstants->kUVCoeff), // %[kUVCoeff]
|
||||
[kRGBCoeffBias] "r"(&yuvconstants->kRGBCoeffBias), // %[kRGBCoeffBias]
|
||||
[nv_u_start] "r"(nv_u_start), // %[nv_u_start]
|
||||
[nv_u_step] "r"(nv_u_step), // %[nv_u_step]
|
||||
[nv_v_start] "r"(nv_v_start), // %[nv_v_start]
|
||||
[nv_v_step] "r"(nv_v_step), // %[nv_v_step]
|
||||
[width_last_y] "r"(width_last_y), // %[width_last_y]
|
||||
[width_last_uv] "r"(width_last_uv) // %[width_last_uv]
|
||||
: "cc", "memory", YUVTORGB_SVE_REGS, "p2");
|
||||
@ -912,10 +892,6 @@ static inline void NV12ToRGB24Row_SVE_SC(
|
||||
uint8_t* dst_rgb24,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) STREAMING_COMPATIBLE {
|
||||
uint32_t nv_u_start = 0xff00U;
|
||||
uint32_t nv_u_step = 0x0002U;
|
||||
uint32_t nv_v_start = 0xff01U;
|
||||
uint32_t nv_v_step = 0x0002U;
|
||||
uint64_t vl;
|
||||
asm("cntb %0" : "=r"(vl));
|
||||
int width_last_y = width & (vl - 1);
|
||||
@ -924,8 +900,6 @@ static inline void NV12ToRGB24Row_SVE_SC(
|
||||
"ptrue p0.b \n" //
|
||||
YUVTORGB_SVE_SETUP
|
||||
"dup z19.b, #255 \n" // Alpha
|
||||
"index z7.h, %w[nv_u_start], %w[nv_u_step] \n"
|
||||
"index z23.h, %w[nv_v_start], %w[nv_v_step] \n"
|
||||
"subs %w[width], %w[width], %w[vl] \n"
|
||||
"b.lt 2f \n"
|
||||
|
||||
@ -958,10 +932,6 @@ static inline void NV12ToRGB24Row_SVE_SC(
|
||||
: [vl] "r"(vl), // %[vl]
|
||||
[kUVCoeff] "r"(&yuvconstants->kUVCoeff), // %[kUVCoeff]
|
||||
[kRGBCoeffBias] "r"(&yuvconstants->kRGBCoeffBias), // %[kRGBCoeffBias]
|
||||
[nv_u_start] "r"(nv_u_start), // %[nv_u_start]
|
||||
[nv_u_step] "r"(nv_u_step), // %[nv_u_step]
|
||||
[nv_v_start] "r"(nv_v_start), // %[nv_v_start]
|
||||
[nv_v_step] "r"(nv_v_step), // %[nv_v_step]
|
||||
[width_last_y] "r"(width_last_y), // %[width_last_y]
|
||||
[width_last_uv] "r"(width_last_uv) // %[width_last_uv]
|
||||
: "cc", "memory", YUVTORGB_SVE_REGS, "p2");
|
||||
@ -973,10 +943,6 @@ static inline void NV21ToRGB24Row_SVE_SC(
|
||||
uint8_t* dst_rgb24,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) STREAMING_COMPATIBLE {
|
||||
uint32_t nv_u_start = 0xff01U;
|
||||
uint32_t nv_u_step = 0x0002U;
|
||||
uint32_t nv_v_start = 0xff00U;
|
||||
uint32_t nv_v_step = 0x0002U;
|
||||
uint64_t vl;
|
||||
asm("cntb %0" : "=r"(vl));
|
||||
int width_last_y = width & (vl - 1);
|
||||
@ -985,8 +951,6 @@ static inline void NV21ToRGB24Row_SVE_SC(
|
||||
"ptrue p0.b \n" //
|
||||
YUVTORGB_SVE_SETUP
|
||||
"dup z19.b, #255 \n" // Alpha
|
||||
"index z7.h, %w[nv_u_start], %w[nv_u_step] \n"
|
||||
"index z23.h, %w[nv_v_start], %w[nv_v_step] \n"
|
||||
"subs %w[width], %w[width], %w[vl] \n"
|
||||
"b.lt 2f \n"
|
||||
|
||||
@ -1019,10 +983,6 @@ static inline void NV21ToRGB24Row_SVE_SC(
|
||||
: [vl] "r"(vl), // %[vl]
|
||||
[kUVCoeff] "r"(&yuvconstants->kUVCoeff), // %[kUVCoeff]
|
||||
[kRGBCoeffBias] "r"(&yuvconstants->kRGBCoeffBias), // %[kRGBCoeffBias]
|
||||
[nv_u_start] "r"(nv_u_start), // %[nv_u_start]
|
||||
[nv_u_step] "r"(nv_u_step), // %[nv_u_step]
|
||||
[nv_v_start] "r"(nv_v_start), // %[nv_v_start]
|
||||
[nv_v_step] "r"(nv_v_step), // %[nv_v_step]
|
||||
[width_last_y] "r"(width_last_y), // %[width_last_y]
|
||||
[width_last_uv] "r"(width_last_uv) // %[width_last_uv]
|
||||
: "cc", "memory", YUVTORGB_SVE_REGS, "p2");
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user