mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-10 18:56:49 +08:00
Row AArch64 Neon implementation - Part 3
BUG=319 TESTED=libyuv_unittest R=fbarchard@google.com Change-Id: Ia818ca62d4a84d76b0144f904983d82d41cab651 Signed-off-by: Ashok Bhat <ashok.bhat@arm.com> Review URL: https://webrtc-codereview.appspot.com/15149004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@1056 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
8f04ca5b9c
commit
c1155cb587
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 1055
|
||||
Version: 1056
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -271,14 +271,14 @@ extern "C" {
|
||||
// #define HAS_NV21TORGB565ROW_NEON
|
||||
// #define HAS_YUY2TOARGBROW_NEON
|
||||
// #define HAS_UYVYTOARGBROW_NEON
|
||||
// #define HAS_SPLITUVROW_NEON
|
||||
// #define HAS_MERGEUVROW_NEON
|
||||
// #define HAS_COPYROW_NEON
|
||||
// #define HAS_SETROW_NEON
|
||||
// #define HAS_ARGBSETROWS_NEON
|
||||
// #define HAS_MIRRORROW_NEON
|
||||
// #define HAS_MIRRORUVROW_NEON
|
||||
// #define HAS_ARGBMIRRORROW_NEON
|
||||
#define HAS_SPLITUVROW_NEON
|
||||
#define HAS_MERGEUVROW_NEON
|
||||
#define HAS_COPYROW_NEON
|
||||
#define HAS_SETROW_NEON
|
||||
#define HAS_ARGBSETROWS_NEON
|
||||
#define HAS_MIRRORROW_NEON
|
||||
#define HAS_MIRRORUVROW_NEON
|
||||
#define HAS_ARGBMIRRORROW_NEON
|
||||
#define HAS_RGB24TOARGBROW_NEON
|
||||
#define HAS_RAWTOARGBROW_NEON
|
||||
// #define HAS_RGB565TOARGBROW_NEON
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 1055
|
||||
#define LIBYUV_VERSION 1056
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||
|
||||
@ -824,19 +824,19 @@ void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld2.8 {q0, q1}, [%0]! \n" // load 16 pairs of UV
|
||||
"ld2 {v0.16b, v1.16b}, [%0], #32 \n" // load 16 pairs of UV
|
||||
"subs %3, %3, #16 \n" // 16 processed per loop
|
||||
MEMACCESS(1)
|
||||
"vst1.8 {q0}, [%1]! \n" // store U
|
||||
"st1 {v0.16b}, [%1], #16 \n" // store U
|
||||
MEMACCESS(2)
|
||||
"vst1.8 {q1}, [%2]! \n" // store V
|
||||
"st1 {v1.16b}, [%2], #16 \n" // store V
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_uv), // %0
|
||||
"+r"(dst_u), // %1
|
||||
"+r"(dst_v), // %2
|
||||
"+r"(width) // %3 // Output registers
|
||||
: // Input registers
|
||||
: "cc", "memory", "q0", "q1" // Clobber List
|
||||
: "cc", "memory", "v0", "v1" // Clobber List
|
||||
);
|
||||
}
|
||||
#endif // HAS_SPLITUVROW_NEON
|
||||
@ -849,12 +849,12 @@ void MergeUVRow_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {q0}, [%0]! \n" // load U
|
||||
"ld1 {v0.16b}, [%0], #16 \n" // load U
|
||||
MEMACCESS(1)
|
||||
"vld1.8 {q1}, [%1]! \n" // load V
|
||||
"ld1 {v1.16b}, [%1], #16 \n" // load V
|
||||
"subs %3, %3, #16 \n" // 16 processed per loop
|
||||
MEMACCESS(2)
|
||||
"vst2.u8 {q0, q1}, [%2]! \n" // store 16 pairs of UV
|
||||
"st2 {v0.16b, v1.16b}, [%2], #32 \n" // store 16 pairs of UV
|
||||
"bgt 1b \n"
|
||||
:
|
||||
"+r"(src_u), // %0
|
||||
@ -862,7 +862,7 @@ void MergeUVRow_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
|
||||
"+r"(dst_uv), // %2
|
||||
"+r"(width) // %3 // Output registers
|
||||
: // Input registers
|
||||
: "cc", "memory", "q0", "q1" // Clobber List
|
||||
: "cc", "memory", "v0", "v1" // Clobber List
|
||||
);
|
||||
}
|
||||
#endif // HAS_MERGEUVROW_NEON
|
||||
@ -874,16 +874,16 @@ void CopyRow_NEON(const uint8* src, uint8* dst, int count) {
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {d0, d1, d2, d3}, [%0]! \n" // load 32
|
||||
"ld1 {v0.8b-v3.8b}, [%0], #32 \n" // load 32
|
||||
"subs %2, %2, #32 \n" // 32 processed per loop
|
||||
MEMACCESS(1)
|
||||
"vst1.8 {d0, d1, d2, d3}, [%1]! \n" // store 32
|
||||
"st1 {v0.8b-v3.8b}, [%1], #32 \n" // store 32
|
||||
"bgt 1b \n"
|
||||
: "+r"(src), // %0
|
||||
"+r"(dst), // %1
|
||||
"+r"(count) // %2 // Output registers
|
||||
: // Input registers
|
||||
: "cc", "memory", "q0", "q1" // Clobber List
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
|
||||
);
|
||||
}
|
||||
#endif // HAS_COPYROW_NEON
|
||||
@ -892,16 +892,16 @@ void CopyRow_NEON(const uint8* src, uint8* dst, int count) {
|
||||
#ifdef HAS_SETROW_NEON
|
||||
void SetRow_NEON(uint8* dst, uint32 v32, int count) {
|
||||
asm volatile (
|
||||
"vdup.u32 q0, %2 \n" // duplicate 4 ints
|
||||
"dup v0.4s, %w2 \n" // duplicate 4 ints
|
||||
"1: \n"
|
||||
"subs %1, %1, #16 \n" // 16 bytes per loop
|
||||
MEMACCESS(0)
|
||||
"vst1.8 {q0}, [%0]! \n" // store
|
||||
"st1 {v0.16b}, [%0], #16 \n" // store
|
||||
"bgt 1b \n"
|
||||
: "+r"(dst), // %0
|
||||
"+r"(count) // %1
|
||||
: "r"(v32) // %2
|
||||
: "cc", "memory", "q0"
|
||||
: "cc", "memory", "v0"
|
||||
);
|
||||
}
|
||||
#endif // HAS_SETROW_NEON
|
||||
@ -922,26 +922,25 @@ void ARGBSetRows_NEON(uint8* dst, uint32 v32, int width,
|
||||
void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
|
||||
asm volatile (
|
||||
// Start at end of source row.
|
||||
"mov r3, #-16 \n"
|
||||
"add %0, %0, %2 \n"
|
||||
"sub %0, #16 \n"
|
||||
"sub %0, %0, #16 \n"
|
||||
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {q0}, [%0], r3 \n" // src -= 16
|
||||
"subs %2, #16 \n" // 16 pixels per loop.
|
||||
"vrev64.8 q0, q0 \n"
|
||||
"ld1 {v0.16b}, [%0], %3 \n" // src -= 16
|
||||
"subs %2, %2, #16 \n" // 16 pixels per loop.
|
||||
"rev64 v0.16b, v0.16b \n"
|
||||
MEMACCESS(1)
|
||||
"vst1.8 {d1}, [%1]! \n" // dst += 16
|
||||
"st1 {v0.D}[1], [%1], #8 \n" // dst += 16
|
||||
MEMACCESS(1)
|
||||
"vst1.8 {d0}, [%1]! \n"
|
||||
"st1 {v0.D}[0], [%1], #8 \n"
|
||||
"bgt 1b \n"
|
||||
: "+r"(src), // %0
|
||||
"+r"(dst), // %1
|
||||
"+r"(width) // %2
|
||||
:
|
||||
: "cc", "memory", "r3", "q0"
|
||||
: "r"((ptrdiff_t)-16) // %3
|
||||
: "cc", "memory", "v0"
|
||||
);
|
||||
}
|
||||
#endif // HAS_MIRRORROW_NEON
|
||||
@ -951,27 +950,27 @@ void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
|
||||
int width) {
|
||||
asm volatile (
|
||||
// Start at end of source row.
|
||||
"mov r12, #-16 \n"
|
||||
"add %0, %0, %3, lsl #1 \n"
|
||||
"sub %0, #16 \n"
|
||||
"sub %0, %0, #16 \n"
|
||||
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld2.8 {d0, d1}, [%0], r12 \n" // src -= 16
|
||||
"subs %3, #8 \n" // 8 pixels per loop.
|
||||
"vrev64.8 q0, q0 \n"
|
||||
"ld2 {v0.8b, v1.8b}, [%0], %4 \n" // src -= 16
|
||||
"subs %3, %3, #8 \n" // 8 pixels per loop.
|
||||
"rev64 v0.8b, v0.8b \n"
|
||||
"rev64 v1.8b, v1.8b \n"
|
||||
MEMACCESS(1)
|
||||
"vst1.8 {d0}, [%1]! \n" // dst += 8
|
||||
"st1 {v0.8b}, [%1], #8 \n" // dst += 8
|
||||
MEMACCESS(2)
|
||||
"vst1.8 {d1}, [%2]! \n"
|
||||
"st1 {v1.8b}, [%2], #8 \n"
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_uv), // %0
|
||||
"+r"(dst_u), // %1
|
||||
"+r"(dst_v), // %2
|
||||
"+r"(width) // %3
|
||||
:
|
||||
: "cc", "memory", "r12", "q0"
|
||||
: "r"((ptrdiff_t)-16) // %4
|
||||
: "cc", "memory", "v0", "v1"
|
||||
);
|
||||
}
|
||||
#endif // HAS_MIRRORUVROW_NEON
|
||||
@ -980,26 +979,25 @@ void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
|
||||
void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width) {
|
||||
asm volatile (
|
||||
// Start at end of source row.
|
||||
"mov r3, #-16 \n"
|
||||
"add %0, %0, %2, lsl #2 \n"
|
||||
"sub %0, #16 \n"
|
||||
"sub %0, %0, #16 \n"
|
||||
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {q0}, [%0], r3 \n" // src -= 16
|
||||
"subs %2, #4 \n" // 4 pixels per loop.
|
||||
"vrev64.32 q0, q0 \n"
|
||||
"ld1 {v0.16b}, [%0], %3 \n" // src -= 16
|
||||
"subs %2, %2, #4 \n" // 4 pixels per loop.
|
||||
"rev64 v0.4s, v0.4s \n"
|
||||
MEMACCESS(1)
|
||||
"vst1.8 {d1}, [%1]! \n" // dst += 16
|
||||
"st1 {v0.D}[1], [%1], #8 \n" // dst += 16
|
||||
MEMACCESS(1)
|
||||
"vst1.8 {d0}, [%1]! \n"
|
||||
"st1 {v0.D}[0], [%1], #8 \n"
|
||||
"bgt 1b \n"
|
||||
: "+r"(src), // %0
|
||||
"+r"(dst), // %1
|
||||
"+r"(width) // %2
|
||||
:
|
||||
: "cc", "memory", "r3", "q0"
|
||||
: "r"((ptrdiff_t)-16) // %3
|
||||
: "cc", "memory", "v0"
|
||||
);
|
||||
}
|
||||
#endif // HAS_ARGBMIRRORROW_NEON
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user