mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2026-01-01 03:12:16 +08:00
Add ScaleRowDown2Linear_NEON for ARM32/64
ARM32/64 NEON versions of ScaleRowDown2Linear_NEON are implemented. BUG=319 TESTED=libyuvTest.ScaleDownBy2_Linear on ARM32/64 with Android R=fbarchard@google.com Change-Id: I2c7f43a0d56ed4dfded5bdbbb61765d87d65a2ba Review URL: https://webrtc-codereview.appspot.com/43519005 git-svn-id: http://libyuv.googlecode.com/svn/trunk@1307 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
693e0217c8
commit
274c9bce92
@ -268,6 +268,9 @@ void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width);
|
||||
|
||||
void ScaleRowDown2Linear_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width);
|
||||
|
||||
void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width);
|
||||
|
||||
|
||||
@ -55,7 +55,9 @@ static void ScalePlaneDown2(int src_width, int src_height,
|
||||
|
||||
#if defined(HAS_SCALEROWDOWN2_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 16)) {
|
||||
ScaleRowDown2 = filtering ? ScaleRowDown2Box_NEON : ScaleRowDown2_NEON;
|
||||
ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_NEON :
|
||||
(filtering == kFilterLinear ? ScaleRowDown2Linear_NEON :
|
||||
ScaleRowDown2Box_NEON);
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SCALEROWDOWN2_SSE2)
|
||||
|
||||
@ -43,6 +43,30 @@ void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
);
|
||||
}
|
||||
|
||||
// Read 32x1 average down and write 16x1.
|
||||
void ScaleRowDown2Linear_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width) {
|
||||
asm volatile (
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {q0, q1}, [%0]! \n" // load pixels and post inc
|
||||
"subs %2, %2, #16 \n" // 16 processed per loop
|
||||
"vpaddl.u8 q0, q0 \n" // add adjacent
|
||||
"vpaddl.u8 q1, q1 \n"
|
||||
"vrshrn.u16 d0, q0, #1 \n" // downshift, round and pack
|
||||
"vrshrn.u16 d1, q1, #1 \n"
|
||||
MEMACCESS(1)
|
||||
"vst1.8 {q0}, [%1]! \n"
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_ptr), // %0
|
||||
"+r"(dst), // %1
|
||||
"+r"(dst_width) // %2
|
||||
:
|
||||
: "q0", "q1" // Clobber List
|
||||
);
|
||||
}
|
||||
|
||||
// Read 32x2 average down and write 16x1.
|
||||
void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width) {
|
||||
|
||||
@ -40,6 +40,29 @@ void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
);
|
||||
}
|
||||
|
||||
// Read 32x1 average down and write 16x1.
|
||||
void ScaleRowDown2Linear_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width) {
|
||||
asm volatile (
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"ld1 {v0.16b,v1.16b}, [%0], #32 \n" // load pixels and post inc
|
||||
"subs %2, %2, #16 \n" // 16 processed per loop
|
||||
"uaddlp v0.8h, v0.16b \n" // add adjacent
|
||||
"uaddlp v1.8h, v1.16b \n"
|
||||
"rshrn v0.8b, v0.8h, #1 \n" // downshift, round and pack
|
||||
"rshrn2 v0.16b, v1.8h, #1 \n"
|
||||
MEMACCESS(1)
|
||||
"st1 {v0.16b}, [%1], #16 \n"
|
||||
"b.gt 1b \n"
|
||||
: "+r"(src_ptr), // %0
|
||||
"+r"(dst), // %1
|
||||
"+r"(dst_width) // %2
|
||||
:
|
||||
: "v0", "v1" // Clobber List
|
||||
);
|
||||
}
|
||||
|
||||
// Read 32x2 average down and write 16x1.
|
||||
void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width) {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user