mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 17:26:49 +08:00
[AArch64] Remove redundant increments in ScaleRowDown2_16_NEON
These were mistakenly copied from the main loop body, however this particular block of the code is only executed at most once so we do not need to perform the address updates. Also adjust formatting with clang-format to match other kernels. Change-Id: I8214821417d5e4f455ebe8805e1a37a9728ab8d2 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/6067154 Reviewed-by: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
parent
9144583f22
commit
233f859e3c
@ -1358,45 +1358,43 @@ void ScaleRowDown2_16_NEON(const uint16_t* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint16_t* dst,
|
||||
int dst_width) {
|
||||
|
||||
(void)src_stride;
|
||||
asm volatile(
|
||||
"subs %w[dst_width], %w[dst_width], #32 \n"
|
||||
"b.lt 2f \n"
|
||||
"subs %w[dst_width], %w[dst_width], #32 \n"
|
||||
"b.lt 2f \n"
|
||||
|
||||
"1: \n"
|
||||
"ldp q0, q1, [%[src_ptr]] \n"
|
||||
"ldp q2, q3, [%[src_ptr], #32] \n"
|
||||
"ldp q4, q5, [%[src_ptr], #64] \n"
|
||||
"ldp q6, q7, [%[src_ptr], #96] \n"
|
||||
"add %[src_ptr], %[src_ptr], #128 \n"
|
||||
"uzp2 v0.8h, v0.8h, v1.8h \n"
|
||||
"uzp2 v1.8h, v2.8h, v3.8h \n"
|
||||
"uzp2 v2.8h, v4.8h, v5.8h \n"
|
||||
"uzp2 v3.8h, v6.8h, v7.8h \n"
|
||||
"subs %w[dst_width], %w[dst_width], #32 \n" // 32 elems per iteration.
|
||||
"stp q0, q1, [%[dst_ptr]] \n"
|
||||
"stp q2, q3, [%[dst_ptr], #32] \n"
|
||||
"add %[dst_ptr], %[dst_ptr], #64 \n"
|
||||
"b.ge 1b \n"
|
||||
"1: \n"
|
||||
"ldp q0, q1, [%[src_ptr]] \n"
|
||||
"ldp q2, q3, [%[src_ptr], #32] \n"
|
||||
"ldp q4, q5, [%[src_ptr], #64] \n"
|
||||
"ldp q6, q7, [%[src_ptr], #96] \n"
|
||||
"add %[src_ptr], %[src_ptr], #128 \n"
|
||||
"uzp2 v0.8h, v0.8h, v1.8h \n"
|
||||
"uzp2 v1.8h, v2.8h, v3.8h \n"
|
||||
"uzp2 v2.8h, v4.8h, v5.8h \n"
|
||||
"uzp2 v3.8h, v6.8h, v7.8h \n"
|
||||
"subs %w[dst_width], %w[dst_width], #32 \n" // 32 elems per iteration.
|
||||
"stp q0, q1, [%[dst_ptr]] \n"
|
||||
"stp q2, q3, [%[dst_ptr], #32] \n"
|
||||
"add %[dst_ptr], %[dst_ptr], #64 \n"
|
||||
"b.ge 1b \n"
|
||||
|
||||
"2: \n"
|
||||
"adds %w[dst_width], %w[dst_width], #32 \n"
|
||||
"b.eq 99f \n"
|
||||
"2: \n"
|
||||
"adds %w[dst_width], %w[dst_width], #32 \n"
|
||||
"b.eq 99f \n"
|
||||
|
||||
"ldp q0, q1, [%[src_ptr]] \n"
|
||||
"ldp q2, q3, [%[src_ptr], #32] \n"
|
||||
"add %[src_ptr], %[src_ptr], #64 \n"
|
||||
"uzp2 v0.8h, v0.8h, v1.8h \n"
|
||||
"uzp2 v1.8h, v2.8h, v3.8h \n"
|
||||
"stp q0, q1, [%[dst_ptr]], #32 \n"
|
||||
"ldp q0, q1, [%[src_ptr]] \n"
|
||||
"ldp q2, q3, [%[src_ptr], #32] \n"
|
||||
"uzp2 v0.8h, v0.8h, v1.8h \n"
|
||||
"uzp2 v1.8h, v2.8h, v3.8h \n"
|
||||
"stp q0, q1, [%[dst_ptr]] \n"
|
||||
|
||||
"99: \n"
|
||||
: [src_ptr]"+r"(src_ptr),
|
||||
[dst_ptr]"+r"(dst),
|
||||
[dst_width]"+r"(dst_width)
|
||||
:
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7");
|
||||
"99: \n"
|
||||
: [src_ptr] "+r"(src_ptr), // %[src_ptr]
|
||||
[dst_ptr] "+r"(dst), // %[dst_ptr]
|
||||
[dst_width] "+r"(dst_width) // %[dst_width]
|
||||
:
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7");
|
||||
}
|
||||
|
||||
void ScaleRowDown2Linear_16_NEON(const uint16_t* src_ptr,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user