mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2026-01-01 03:12:16 +08:00
Scale Down by 2 use ld2 and urhadd
urhadd is a rounded average. Linear filter wants to average horizontally, so use ld2 to separate even and odd pixels. TBR=jkellander@chromium.org BUG=None TEST=LibYUVScaleTest.*ScaleDownBy2* Change-Id: Id667288a030e72ce8e1c1d6719b69c555c0db063 Reviewed-on: https://chromium-review.googlesource.com/642448 Commit-Queue: Frank Barchard <fbarchard@google.com> Reviewed-by: Cheng Wang <wangcheng@google.com>
This commit is contained in:
parent
b6e8e9aa97
commit
e200738d82
@ -49,14 +49,10 @@ void ScaleRowDown2Linear_NEON(const uint8* src_ptr,
|
||||
(void)src_stride;
|
||||
asm volatile(
|
||||
"1: \n"
|
||||
"ld1 {v0.16b,v1.16b}, [%0], #32 \n" // load pixels and post
|
||||
// inc
|
||||
// load even pixels into v0, odd into v1
|
||||
"ld2 {v0.16b,v1.16b}, [%0], #32 \n"
|
||||
"urhadd v0.16b, v0.16b, v1.16b \n" // rounding half add
|
||||
"subs %w2, %w2, #16 \n" // 16 processed per loop
|
||||
"uaddlp v0.8h, v0.16b \n" // add adjacent
|
||||
"uaddlp v1.8h, v1.16b \n"
|
||||
"rshrn v0.8b, v0.8h, #1 \n" // downshift, round and
|
||||
// pack
|
||||
"rshrn2 v0.16b, v1.8h, #1 \n"
|
||||
"st1 {v0.16b}, [%1], #16 \n"
|
||||
"b.gt 1b \n"
|
||||
: "+r"(src_ptr), // %0
|
||||
@ -726,13 +722,12 @@ void ScaleARGBRowDown2_NEON(const uint8* src_ptr,
|
||||
int dst_width) {
|
||||
(void)src_stride;
|
||||
asm volatile(
|
||||
"1: \n"
|
||||
// load even pixels into q0, odd into q1
|
||||
"ld2 {v0.4s, v1.4s}, [%0], #32 \n"
|
||||
"ld2 {v2.4s, v3.4s}, [%0], #32 \n"
|
||||
"1: \n"
|
||||
// load 16 ARGB pixels with even pixels into q0/q2, odd into q1/q3
|
||||
"ld4 {v0.4s,v1.4s,v2.4s,v3.4s}, [%0], #64 \n"
|
||||
"subs %w2, %w2, #8 \n" // 8 processed per loop
|
||||
"st1 {v1.16b}, [%1], #16 \n" // store odd pixels
|
||||
"st1 {v3.16b}, [%1], #16 \n"
|
||||
"mov v2.16b, v3.16b \n"
|
||||
"st2 {v1.4s,v2.4s}, [%1], #32 \n" // store 8 odd pixels
|
||||
"b.gt 1b \n"
|
||||
: "+r"(src_ptr), // %0
|
||||
"+r"(dst), // %1
|
||||
@ -748,20 +743,14 @@ void ScaleARGBRowDown2Linear_NEON(const uint8* src_argb,
|
||||
int dst_width) {
|
||||
(void)src_stride;
|
||||
asm volatile(
|
||||
"1: \n"
|
||||
// load 8 ARGB pixels.
|
||||
"ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n"
|
||||
"subs %w2, %w2, #8 \n" // 8 processed per loop.
|
||||
"uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts.
|
||||
"uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts.
|
||||
"uaddlp v2.8h, v2.16b \n" // R 16 bytes -> 8 shorts.
|
||||
"uaddlp v3.8h, v3.16b \n" // A 16 bytes -> 8 shorts.
|
||||
"rshrn v0.8b, v0.8h, #1 \n" // downshift, round and
|
||||
// pack
|
||||
"rshrn v1.8b, v1.8h, #1 \n"
|
||||
"rshrn v2.8b, v2.8h, #1 \n"
|
||||
"rshrn v3.8b, v3.8h, #1 \n"
|
||||
"st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n"
|
||||
"1: \n"
|
||||
// load 16 ARGB pixels with even pixels into q0/q2, odd into q1/q3
|
||||
"ld4 {v0.4s,v1.4s,v2.4s,v3.4s}, [%0], #64 \n"
|
||||
"subs %w2, %w2, #8 \n" // 8 processed per loop
|
||||
|
||||
"urhadd v0.16b, v0.16b, v1.16b \n" // rounding half add
|
||||
"urhadd v1.16b, v2.16b, v3.16b \n"
|
||||
"st2 {v0.4s,v1.4s}, [%1], #32 \n" // store 8 pixels
|
||||
"b.gt 1b \n"
|
||||
: "+r"(src_argb), // %0
|
||||
"+r"(dst_argb), // %1
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user