mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 01:06:46 +08:00
clear aarch64 related macro and fix bugs
fix 2 bugs: - build bug libyuv.gyp - runtime bug in ScaleRowDown38_2_Box_NEON BUG= TESTED=libyuv_unittest R=fbarchard@google.com, fbarchard@chromium.org Review URL: https://webrtc-codereview.appspot.com/23939004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@1117 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
205c1440cf
commit
0eb196f8db
@ -44,21 +44,13 @@ extern "C" {
|
|||||||
|
|
||||||
// The following are available on Neon platforms:
|
// The following are available on Neon platforms:
|
||||||
#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
|
#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
|
||||||
(defined(__ARM_NEON__) || defined(LIBYUV_NEON))
|
(defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
|
||||||
#define HAS_SCALEROWDOWN2_NEON
|
#define HAS_SCALEROWDOWN2_NEON
|
||||||
#define HAS_SCALEROWDOWN4_NEON
|
#define HAS_SCALEROWDOWN4_NEON
|
||||||
#define HAS_SCALEROWDOWN34_NEON
|
#define HAS_SCALEROWDOWN34_NEON
|
||||||
#define HAS_SCALEROWDOWN38_NEON
|
#define HAS_SCALEROWDOWN38_NEON
|
||||||
#define HAS_SCALEARGBROWDOWNEVEN_NEON
|
#define HAS_SCALEARGBROWDOWNEVEN_NEON
|
||||||
#define HAS_SCALEARGBROWDOWN2_NEON
|
#define HAS_SCALEARGBROWDOWN2_NEON
|
||||||
#elif !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
|
|
||||||
(defined(__aarch64__) || defined(LIBYUV_NEON))
|
|
||||||
#define HAS_SCALEROWDOWN2_NEON
|
|
||||||
#define HAS_SCALEROWDOWN4_NEON
|
|
||||||
#define HAS_SCALEROWDOWN34_NEON
|
|
||||||
#define HAS_SCALEROWDOWN38_NEON
|
|
||||||
#define HAS_SCALEARGBROWDOWN2_NEON
|
|
||||||
#define HAS_SCALEARGBROWDOWNEVEN_NEON
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// The following are available on Mips platforms:
|
// The following are available on Mips platforms:
|
||||||
|
|||||||
22
libyuv.gyp
22
libyuv.gyp
@ -130,16 +130,6 @@
|
|||||||
'LIBYUV_DISABLE_X86',
|
'LIBYUV_DISABLE_X86',
|
||||||
],
|
],
|
||||||
}],
|
}],
|
||||||
['OS == "android" and target_arch == "arm64"', {
|
|
||||||
'ldflags': [
|
|
||||||
'-Wl,--dynamic-linker,/system/bin/linker64',
|
|
||||||
],
|
|
||||||
}],
|
|
||||||
['OS == "android" and target_arch != "arm64"', {
|
|
||||||
'ldflags': [
|
|
||||||
'-Wl,--dynamic-linker,/system/bin/linker',
|
|
||||||
],
|
|
||||||
}],
|
|
||||||
], #conditions
|
], #conditions
|
||||||
'defines': [
|
'defines': [
|
||||||
# Enable the following 3 macros to turn off assembly for specified CPU.
|
# Enable the following 3 macros to turn off assembly for specified CPU.
|
||||||
@ -159,6 +149,18 @@
|
|||||||
'include',
|
'include',
|
||||||
'.',
|
'.',
|
||||||
],
|
],
|
||||||
|
'conditions': [
|
||||||
|
['OS == "android" and target_arch == "arm64"', {
|
||||||
|
'ldflags': [
|
||||||
|
'-Wl,--dynamic-linker,/system/bin/linker64',
|
||||||
|
],
|
||||||
|
}],
|
||||||
|
['OS == "android" and target_arch != "arm64"', {
|
||||||
|
'ldflags': [
|
||||||
|
'-Wl,--dynamic-linker,/system/bin/linker',
|
||||||
|
],
|
||||||
|
}],
|
||||||
|
], #conditions
|
||||||
},
|
},
|
||||||
'sources': [
|
'sources': [
|
||||||
'<@(libyuv_sources)',
|
'<@(libyuv_sources)',
|
||||||
|
|||||||
@ -42,11 +42,7 @@ extern "C" {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
|
#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
|
||||||
(defined(__ARM_NEON__) || defined(LIBYUV_NEON))
|
(defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
|
||||||
#define HAS_MIRRORROW_NEON
|
|
||||||
void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
|
|
||||||
#define HAS_MIRRORROW_UV_NEON
|
|
||||||
void MirrorUVRow_NEON(const uint8* src, uint8* dst_a, uint8* dst_b, int width);
|
|
||||||
#define HAS_TRANSPOSE_WX8_NEON
|
#define HAS_TRANSPOSE_WX8_NEON
|
||||||
void TransposeWx8_NEON(const uint8* src, int src_stride,
|
void TransposeWx8_NEON(const uint8* src, int src_stride,
|
||||||
uint8* dst, int dst_stride, int width);
|
uint8* dst, int dst_stride, int width);
|
||||||
@ -55,23 +51,7 @@ void TransposeUVWx8_NEON(const uint8* src, int src_stride,
|
|||||||
uint8* dst_a, int dst_stride_a,
|
uint8* dst_a, int dst_stride_a,
|
||||||
uint8* dst_b, int dst_stride_b,
|
uint8* dst_b, int dst_stride_b,
|
||||||
int width);
|
int width);
|
||||||
//following symbol is temporally enable for aarch64, until all neon optimized
|
#endif
|
||||||
//functions have been ported to aarch64
|
|
||||||
#elif !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
|
|
||||||
(defined(__aarch64__) || defined(LIBYUV_NEON))
|
|
||||||
// #define HAS_MIRRORROW_NEON
|
|
||||||
// void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
|
|
||||||
// #define HAS_MIRRORROW_UV_NEON
|
|
||||||
// void MirrorUVRow_NEON(const uint8* src, uint8* dst_a, uint8* dst_b, int width);
|
|
||||||
#define HAS_TRANSPOSE_WX8_NEON
|
|
||||||
void TransposeWx8_NEON(const uint8* src, int src_stride,
|
|
||||||
uint8* dst, int dst_stride, int width);
|
|
||||||
#define HAS_TRANSPOSE_UVWX8_NEON
|
|
||||||
void TransposeUVWx8_NEON(const uint8* src, int src_stride,
|
|
||||||
uint8* dst_a, int dst_stride_a,
|
|
||||||
uint8* dst_b, int dst_stride_b,
|
|
||||||
int width);
|
|
||||||
#endif // defined(__ARM_NEON__)
|
|
||||||
|
|
||||||
#if !defined(LIBYUV_DISABLE_MIPS) && !defined(__native_client__) && \
|
#if !defined(LIBYUV_DISABLE_MIPS) && !defined(__native_client__) && \
|
||||||
defined(__mips__) && \
|
defined(__mips__) && \
|
||||||
|
|||||||
@ -20,7 +20,6 @@ extern "C" {
|
|||||||
// This module is for GCC Neon armv8 64 bit.
|
// This module is for GCC Neon armv8 64 bit.
|
||||||
#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
|
#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWDOWN2_NEON
|
|
||||||
// Read 32x1 throw away even pixels, and write 16x1.
|
// Read 32x1 throw away even pixels, and write 16x1.
|
||||||
void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||||
uint8* dst, int dst_width) {
|
uint8* dst, int dst_width) {
|
||||||
@ -40,9 +39,7 @@ void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
|||||||
: "v0", "v1" // Clobber List
|
: "v0", "v1" // Clobber List
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
#endif //HAS_SCALEROWDOWN2_NEON
|
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWDOWN2_NEON
|
|
||||||
// Read 32x2 average down and write 16x1.
|
// Read 32x2 average down and write 16x1.
|
||||||
void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||||
uint8* dst, int dst_width) {
|
uint8* dst, int dst_width) {
|
||||||
@ -72,9 +69,7 @@ void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
|||||||
: "v0", "v1", "v2", "v3" // Clobber List
|
: "v0", "v1", "v2", "v3" // Clobber List
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
#endif //HAS_SCALEROWDOWN2_NEON
|
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWDOWN4_NEON
|
|
||||||
void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||||
uint8* dst_ptr, int dst_width) {
|
uint8* dst_ptr, int dst_width) {
|
||||||
asm volatile (
|
asm volatile (
|
||||||
@ -92,9 +87,7 @@ void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
|||||||
: "v0", "v1", "v2", "v3", "memory", "cc"
|
: "v0", "v1", "v2", "v3", "memory", "cc"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
#endif //HAS_SCALEROWDOWN4_NEON
|
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWDOWN4_NEON
|
|
||||||
void ScaleRowDown4Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
void ScaleRowDown4Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||||
uint8* dst_ptr, int dst_width) {
|
uint8* dst_ptr, int dst_width) {
|
||||||
const uint8* src_ptr1 = src_ptr + src_stride;
|
const uint8* src_ptr1 = src_ptr + src_stride;
|
||||||
@ -130,9 +123,7 @@ asm volatile (
|
|||||||
: "v0", "v1", "v2", "v3", "memory", "cc"
|
: "v0", "v1", "v2", "v3", "memory", "cc"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
#endif //HAS_SCALEROWDOWN4_NEON
|
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWDOWN34_NEON
|
|
||||||
// Down scale from 4 to 3 pixels. Use the neon multilane read/write
|
// Down scale from 4 to 3 pixels. Use the neon multilane read/write
|
||||||
// to load up the every 4th pixel into a 4 different registers.
|
// to load up the every 4th pixel into a 4 different registers.
|
||||||
// Point samples 32 pixels to 24 pixels.
|
// Point samples 32 pixels to 24 pixels.
|
||||||
@ -155,9 +146,7 @@ void ScaleRowDown34_NEON(const uint8* src_ptr,
|
|||||||
: "v0", "v1", "v2", "v3", "memory", "cc"
|
: "v0", "v1", "v2", "v3", "memory", "cc"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
#endif //HAS_SCALEROWDOWN34_NEON
|
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWDOWN34_NEON
|
|
||||||
void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr,
|
void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr,
|
||||||
ptrdiff_t src_stride,
|
ptrdiff_t src_stride,
|
||||||
uint8* dst_ptr, int dst_width) {
|
uint8* dst_ptr, int dst_width) {
|
||||||
@ -217,9 +206,7 @@ void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr,
|
|||||||
"v20", "memory", "cc"
|
"v20", "memory", "cc"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
#endif //ScaleRowDown34_0_Box_NEON
|
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWDOWN34_NEON
|
|
||||||
void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr,
|
void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr,
|
||||||
ptrdiff_t src_stride,
|
ptrdiff_t src_stride,
|
||||||
uint8* dst_ptr, int dst_width) {
|
uint8* dst_ptr, int dst_width) {
|
||||||
@ -262,9 +249,7 @@ void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr,
|
|||||||
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "memory", "cc"
|
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "memory", "cc"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
#endif //HAS_SCALEROWDOWN34_NEON
|
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWDOWN38_NEON
|
|
||||||
static uvec8 kShuf38 =
|
static uvec8 kShuf38 =
|
||||||
{ 0, 3, 6, 8, 11, 14, 16, 19, 22, 24, 27, 30, 0, 0, 0, 0 };
|
{ 0, 3, 6, 8, 11, 14, 16, 19, 22, 24, 27, 30, 0, 0, 0, 0 };
|
||||||
static uvec8 kShuf38_2 =
|
static uvec8 kShuf38_2 =
|
||||||
@ -301,9 +286,6 @@ void ScaleRowDown38_NEON(const uint8* src_ptr,
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif //HAS_SCALEROWDOWN38_NEON
|
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWDOWN38_NEON
|
|
||||||
// 32x3 -> 12x1
|
// 32x3 -> 12x1
|
||||||
void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
|
void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
|
||||||
ptrdiff_t src_stride,
|
ptrdiff_t src_stride,
|
||||||
@ -432,9 +414,7 @@ void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
|
|||||||
"v30", "v31", "memory", "cc"
|
"v30", "v31", "memory", "cc"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
#endif //HAS_SCALEROWDOWN38_NEON
|
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWDOWN38_NEON
|
|
||||||
// 32x2 -> 12x1
|
// 32x2 -> 12x1
|
||||||
void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
|
void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
|
||||||
ptrdiff_t src_stride,
|
ptrdiff_t src_stride,
|
||||||
@ -456,7 +436,7 @@ void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
|
|||||||
MEMACCESS(0)
|
MEMACCESS(0)
|
||||||
"ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n"
|
"ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n"
|
||||||
MEMACCESS(3)
|
MEMACCESS(3)
|
||||||
"ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%3], #32 \n"
|
"ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%2], #32 \n"
|
||||||
"subs %3, %3, #12 \n"
|
"subs %3, %3, #12 \n"
|
||||||
|
|
||||||
// Shuffle the input data around to get align the data
|
// Shuffle the input data around to get align the data
|
||||||
@ -541,7 +521,6 @@ void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
|
|||||||
"v18", "v19", "v30", "v31", "memory", "cc"
|
"v18", "v19", "v30", "v31", "memory", "cc"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
#endif //HAS_SCALEROWDOWN38_NEON
|
|
||||||
|
|
||||||
// 16x2 -> 16x1
|
// 16x2 -> 16x1
|
||||||
void ScaleFilterRows_NEON(uint8* dst_ptr,
|
void ScaleFilterRows_NEON(uint8* dst_ptr,
|
||||||
@ -643,7 +622,6 @@ void ScaleFilterRows_NEON(uint8* dst_ptr,
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAS_SCALEARGBROWDOWN2_NEON
|
|
||||||
void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||||
uint8* dst, int dst_width) {
|
uint8* dst, int dst_width) {
|
||||||
asm volatile (
|
asm volatile (
|
||||||
@ -666,9 +644,7 @@ void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
|||||||
: "memory", "cc", "v0", "v1", "v2", "v3" // Clobber List
|
: "memory", "cc", "v0", "v1", "v2", "v3" // Clobber List
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
#endif //HAS_SCALEARGBROWDOWN2_NEON
|
|
||||||
|
|
||||||
#ifdef HAS_SCALEARGBROWDOWN2_NEON
|
|
||||||
void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||||
uint8* dst, int dst_width) {
|
uint8* dst, int dst_width) {
|
||||||
asm volatile (
|
asm volatile (
|
||||||
@ -703,9 +679,7 @@ void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
|||||||
: "memory", "cc", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19"
|
: "memory", "cc", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
#endif //HAS_SCALEARGBROWDOWN2_NEON
|
|
||||||
|
|
||||||
#ifdef HAS_SCALEARGBROWDOWNEVEN_NEON
|
|
||||||
// Reads 4 pixels at a time.
|
// Reads 4 pixels at a time.
|
||||||
// Alignment requirement: src_argb 4 byte aligned.
|
// Alignment requirement: src_argb 4 byte aligned.
|
||||||
void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t src_stride,
|
void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t src_stride,
|
||||||
@ -731,9 +705,7 @@ void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t src_stride,
|
|||||||
: "memory", "cc", "v0"
|
: "memory", "cc", "v0"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
#endif //HAS_SCALEARGBROWDOWNEVEN_NEON
|
|
||||||
|
|
||||||
#ifdef HAS_SCALEARGBROWDOWNEVEN_NEON
|
|
||||||
// Reads 4 pixels at a time.
|
// Reads 4 pixels at a time.
|
||||||
// Alignment requirement: src_argb 4 byte aligned.
|
// Alignment requirement: src_argb 4 byte aligned.
|
||||||
// TODO, might be worth another optimization pass in future.
|
// TODO, might be worth another optimization pass in future.
|
||||||
@ -786,7 +758,6 @@ void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride,
|
|||||||
: "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16"
|
: "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
#endif // HAS_SCALEARGBROWDOWNEVEN_NEON
|
|
||||||
#endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
|
#endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user