mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 01:06:46 +08:00
Add ScaleAddRows_NEON for ARM32/64
ARM32/64 NEON versions of ScaleAddRows_NEON are implemented. BUG=319 TESTED=libyuvTest.Scale* on ARM32/64 with Android R=fbarchard@google.com Change-Id: I45b88c2b5f576042ba5b3d8d6f8851257fdb7218 Review URL: https://webrtc-codereview.appspot.com/46379004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@1326 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
63726ed9c6
commit
ca5b1bd58b
@ -51,6 +51,7 @@ extern "C" {
|
||||
#define HAS_SCALEROWDOWN38_NEON
|
||||
#define HAS_SCALEARGBROWDOWNEVEN_NEON
|
||||
#define HAS_SCALEARGBROWDOWN2_NEON
|
||||
#define HAS_SCALEADDROWS_NEON
|
||||
#endif
|
||||
|
||||
// The following are available on Mips platforms:
|
||||
@ -305,6 +306,9 @@ void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
|
||||
void ScaleAddRows_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint16* dst_ptr, int src_width, int src_height);
|
||||
|
||||
void ScaleRowDown2_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width);
|
||||
void ScaleRowDown2Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
|
||||
@ -758,6 +758,12 @@ static void ScalePlaneBox(int src_width, int src_height,
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(HAS_SCALEADDROWS_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(src_width, 16)) {
|
||||
ScaleAddRows = ScaleAddRows_NEON;
|
||||
}
|
||||
#endif
|
||||
|
||||
for (j = 0; j < dst_height; ++j) {
|
||||
int boxheight;
|
||||
int iy = y >> 16;
|
||||
|
||||
@ -541,6 +541,40 @@ void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleAddRows_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint16* dst_ptr, int src_width, int src_height) {
|
||||
const uint8* src_tmp = NULL;
|
||||
asm volatile (
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
"mov %0, %1 \n"
|
||||
"mov r12, %5 \n"
|
||||
"veor q2, q2, q2 \n"
|
||||
"veor q3, q3, q3 \n"
|
||||
"2: \n"
|
||||
// load 16 pixels into q0
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {q0}, [%0], %3 \n"
|
||||
"vaddw.u8 q3, q3, d1 \n"
|
||||
"vaddw.u8 q2, q2, d0 \n"
|
||||
"subs r12, r12, #1 \n"
|
||||
"bgt 2b \n"
|
||||
MEMACCESS(2)
|
||||
"vst1.16 {q2, q3}, [%2]! \n" // store pixels
|
||||
"add %1, %1, #16 \n"
|
||||
"subs %4, %4, #16 \n" // 16 processed per loop
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_tmp), // %0
|
||||
"+r"(src_ptr), // %1
|
||||
"+r"(dst_ptr), // %2
|
||||
"+r"(src_stride), // %3
|
||||
"+r"(src_width), // %4
|
||||
"+r"(src_height) // %5
|
||||
:
|
||||
: "memory", "cc", "r12", "q0", "q1", "q2", "q3" // Clobber List
|
||||
);
|
||||
}
|
||||
|
||||
// 16x2 -> 16x1
|
||||
void ScaleFilterRows_NEON(uint8* dst_ptr,
|
||||
const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
|
||||
@ -545,6 +545,39 @@ void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleAddRows_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint16* dst_ptr, int src_width, int src_height) {
|
||||
const uint8* src_tmp = NULL;
|
||||
asm volatile (
|
||||
"1: \n"
|
||||
"mov %0, %1 \n"
|
||||
"mov x12, %5 \n"
|
||||
"eor v2.16b, v2.16b, v2.16b \n"
|
||||
"eor v3.16b, v3.16b, v3.16b \n"
|
||||
"2: \n"
|
||||
// load 16 pixels into q0
|
||||
MEMACCESS(0)
|
||||
"ld1 {v0.16b}, [%0], %3 \n"
|
||||
"uaddw2 v3.8h, v3.8h, v0.16b \n"
|
||||
"uaddw v2.8h, v2.8h, v0.8b \n"
|
||||
"subs x12, x12, #1 \n"
|
||||
"b.gt 2b \n"
|
||||
MEMACCESS(2)
|
||||
"st1 {v2.8h, v3.8h}, [%2], #32 \n" // store pixels
|
||||
"add %1, %1, #16 \n"
|
||||
"subs %4, %4, #16 \n" // 16 processed per loop
|
||||
"b.gt 1b \n"
|
||||
: "+r"(src_tmp), // %0
|
||||
"+r"(src_ptr), // %1
|
||||
"+r"(dst_ptr), // %2
|
||||
"+r"(src_stride), // %3
|
||||
"+r"(src_width), // %4
|
||||
"+r"(src_height) // %5
|
||||
:
|
||||
: "memory", "cc", "x12", "v0", "v1", "v2", "v3" // Clobber List
|
||||
);
|
||||
}
|
||||
|
||||
// 16x2 -> 16x1
|
||||
void ScaleFilterRows_NEON(uint8* dst_ptr,
|
||||
const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user