mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2026-01-01 03:12:16 +08:00
Add ScaleARGBRowDown2Linear_NEON for ARM32/64
ARM32/64 NEON versions of ScaleARGBRowDown2Linear_NEON are implemented. BUG=319 TESTED=libyuvTest.ARGBScale* on ARM32/64 with Android R=fbarchard@google.com Change-Id: Ife602c81b51aa36e0d56b9d628f278a24eed96f6 Review URL: https://webrtc-codereview.appspot.com/44689004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@1336 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
e246e6c18f
commit
4d387fc619
@ -259,6 +259,8 @@ void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride,
|
||||
uint8* dst_argb, int dst_width);
|
||||
void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width);
|
||||
void ScaleARGBRowDown2Linear_NEON(const uint8* src_argb, ptrdiff_t src_stride,
|
||||
uint8* dst_argb, int dst_width);
|
||||
void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width);
|
||||
|
||||
|
||||
@ -61,8 +61,9 @@ static void ScaleARGBDown2(int src_width, int src_height,
|
||||
#endif
|
||||
#if defined(HAS_SCALEARGBROWDOWN2_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8)) {
|
||||
ScaleARGBRowDown2 = filtering ? ScaleARGBRowDown2Box_NEON :
|
||||
ScaleARGBRowDown2_NEON;
|
||||
ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_NEON :
|
||||
(filtering == kFilterLinear ? ScaleARGBRowDown2Linear_NEON :
|
||||
ScaleARGBRowDown2Box_NEON);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
@ -698,6 +698,35 @@ void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleARGBRowDown2Linear_NEON(const uint8* src_argb, ptrdiff_t src_stride,
|
||||
uint8* dst_argb, int dst_width) {
|
||||
asm volatile (
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
|
||||
MEMACCESS(0)
|
||||
"vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels.
|
||||
"subs %2, %2, #8 \n" // 8 processed per loop
|
||||
"vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts.
|
||||
"vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
|
||||
"vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts.
|
||||
"vpaddl.u8 q3, q3 \n" // A 16 bytes -> 8 shorts.
|
||||
"vrshrn.u16 d0, q0, #1 \n" // downshift, round and pack
|
||||
"vrshrn.u16 d1, q1, #1 \n"
|
||||
"vrshrn.u16 d2, q2, #1 \n"
|
||||
"vrshrn.u16 d3, q3, #1 \n"
|
||||
MEMACCESS(1)
|
||||
"vst4.8 {d0, d1, d2, d3}, [%1]! \n"
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_argb), // %0
|
||||
"+r"(dst_argb), // %1
|
||||
"+r"(dst_width) // %2
|
||||
:
|
||||
: "memory", "cc", "q0", "q1", "q2", "q3" // Clobber List
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width) {
|
||||
asm volatile (
|
||||
|
||||
@ -701,6 +701,33 @@ void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleARGBRowDown2Linear_NEON(const uint8* src_argb, ptrdiff_t src_stride,
|
||||
uint8* dst_argb, int dst_width) {
|
||||
asm volatile (
|
||||
"1: \n"
|
||||
MEMACCESS (0)
|
||||
// load 8 ARGB pixels.
|
||||
"ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n"
|
||||
"subs %2, %2, #8 \n" // 8 processed per loop.
|
||||
"uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts.
|
||||
"uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts.
|
||||
"uaddlp v2.8h, v2.16b \n" // R 16 bytes -> 8 shorts.
|
||||
"uaddlp v3.8h, v3.16b \n" // A 16 bytes -> 8 shorts.
|
||||
"rshrn v0.8b, v0.8h, #1 \n" // downshift, round and pack
|
||||
"rshrn v1.8b, v1.8h, #1 \n"
|
||||
"rshrn v2.8b, v2.8h, #1 \n"
|
||||
"rshrn v3.8b, v3.8h, #1 \n"
|
||||
MEMACCESS (1)
|
||||
"st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n"
|
||||
"b.gt 1b \n"
|
||||
: "+r"(src_argb), // %0
|
||||
"+r"(dst_argb), // %1
|
||||
"+r"(dst_width) // %2
|
||||
:
|
||||
: "memory", "cc", "v0", "v1", "v2", "v3" // Clobber List
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width) {
|
||||
asm volatile (
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user