mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 17:26:49 +08:00
Row AArch64 Neon implementation - Part 1
BUG=319 TEST=libyuv_unittest R=fbarchard@google.com Change-Id: I367ffa7bb0fd0337ab8486d3eb4fb94afea7400c Signed-off-by: Ashok Bhat <ashok.bhat@arm.com> Review URL: https://webrtc-codereview.appspot.com/21149004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@1044 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
26f43db1ef
commit
de9fa43c60
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 1041
|
||||
Version: 1044
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -252,6 +252,94 @@ extern "C" {
|
||||
|
||||
// The following are available on arm64 platforms:
|
||||
#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
|
||||
// #define HAS_I444TOARGBROW_NEON
|
||||
// #define HAS_I422TOARGBROW_NEON
|
||||
// #define HAS_I411TOARGBROW_NEON
|
||||
// #define HAS_I422TOBGRAROW_NEON
|
||||
// #define HAS_I422TOABGRROW_NEON
|
||||
// #define HAS_I422TORGBAROW_NEON
|
||||
// #define HAS_I422TORGB24ROW_NEON
|
||||
// #define HAS_I422TORAWROW_NEON
|
||||
// #define HAS_I422TORGB565ROW_NEON
|
||||
// #define HAS_I422TOARGB1555ROW_NEON
|
||||
// #define HAS_I422TOARGB4444ROW_NEON
|
||||
// #define HAS_YTOARGBROW_NEON
|
||||
// #define HAS_I400TOARGBROW_NEON
|
||||
// #define HAS_NV12TOARGBROW_NEON
|
||||
// #define HAS_NV21TOARGBROW_NEON
|
||||
// #define HAS_NV12TORGB565ROW_NEON
|
||||
// #define HAS_NV21TORGB565ROW_NEON
|
||||
// #define HAS_YUY2TOARGBROW_NEON
|
||||
// #define HAS_UYVYTOARGBROW_NEON
|
||||
// #define HAS_SPLITUVROW_NEON
|
||||
// #define HAS_MERGEUVROW_NEON
|
||||
// #define HAS_COPYROW_NEON
|
||||
// #define HAS_SETROW_NEON
|
||||
// #define HAS_ARGBSETROWS_NEON
|
||||
// #define HAS_MIRRORROW_NEON
|
||||
// #define HAS_MIRRORUVROW_NEON
|
||||
// #define HAS_ARGBMIRRORROW_NEON
|
||||
// #define HAS_RGB24TOARGBROW_NEON
|
||||
// #define HAS_RAWTOARGBROW_NEON
|
||||
// #define HAS_RGB565TOARGBROW_NEON
|
||||
// #define HAS_ARGB1555TOARGBROW_NEON
|
||||
// #define HAS_ARGB4444TOARGBROW_NEON
|
||||
// #define HAS_ARGBTORGB24ROW_NEON
|
||||
// #define HAS_ARGBTORAWROW_NEON
|
||||
// #define HAS_YUY2TOYROW_NEON
|
||||
// #define HAS_UYVYTOYROW_NEON
|
||||
// #define HAS_YUY2TOUV422ROW_NEON
|
||||
// #define HAS_UYVYTOUV422ROW_NEON
|
||||
// #define HAS_YUY2TOUVROW_NEON
|
||||
// #define HAS_UYVYTOUVROW_NEON
|
||||
// #define HAS_HALFROW_NEON
|
||||
// #define HAS_ARGBTOBAYERROW_NEON
|
||||
// #define HAS_ARGBTOBAYERGGROW_NEON
|
||||
// #define HAS_ARGBSHUFFLEROW_NEON
|
||||
// #define HAS_I422TOYUY2ROW_NEON
|
||||
// #define HAS_I422TOUYVYROW_NEON
|
||||
// #define HAS_ARGBTORGB565ROW_NEON
|
||||
// #define HAS_ARGBTOARGB1555ROW_NEON
|
||||
// #define HAS_ARGBTOARGB4444ROW_NEON
|
||||
// #define HAS_ARGBTOYROW_NEON
|
||||
// #define HAS_ARGBTOYJROW_NEON
|
||||
// #define HAS_ARGBTOUV444ROW_NEON
|
||||
// #define HAS_ARGBTOUV422ROW_NEON
|
||||
// #define HAS_ARGBTOUV411ROW_NEON
|
||||
// #define HAS_ARGBTOUVROW_NEON
|
||||
// #define HAS_ARGBTOUVJROW_NEON
|
||||
// #define HAS_BGRATOUVROW_NEON
|
||||
// #define HAS_ABGRTOUVROW_NEON
|
||||
// #define HAS_RGBATOUVROW_NEON
|
||||
// #define HAS_RGB24TOUVROW_NEON
|
||||
// #define HAS_RAWTOUVROW_NEON
|
||||
// #define HAS_RGB565TOUVROW_NEON
|
||||
// #define HAS_ARGB1555TOUVROW_NEON
|
||||
// #define HAS_ARGB4444TOUVROW_NEON
|
||||
// #define HAS_RGB565TOYROW_NEON
|
||||
// #define HAS_ARGB1555TOYROW_NEON
|
||||
// #define HAS_ARGB4444TOYROW_NEON
|
||||
// #define HAS_BGRATOYROW_NEON
|
||||
// #define HAS_ABGRTOYROW_NEON
|
||||
// #define HAS_RGBATOYROW_NEON
|
||||
// #define HAS_RGB24TOYROW_NEON
|
||||
// #define HAS_RAWTOYROW_NEON
|
||||
// #define HAS_INTERPOLATEROW_NEON
|
||||
// #define HAS_ARGBBLENDROW_NEON
|
||||
// #define HAS_ARGBATTENUATEROW_NEON
|
||||
// #define HAS_ARGBQUANTIZEROW_NEON
|
||||
// #define HAS_ARGBSHADEROW_NEON
|
||||
// #define HAS_ARGBGRAYROW_NEON
|
||||
// #define HAS_ARGBSEPIAROW_NEON
|
||||
// #define HAS_ARGBCOLORMATRIXROW_NEON
|
||||
// #define HAS_ARGBMULTIPLYROW_NEON
|
||||
// #define HAS_ARGBADDROW_NEON
|
||||
// #define HAS_ARGBSUBTRACTROW_NEON
|
||||
#define HAS_SOBELROW_NEON
|
||||
#define HAS_SOBELTOPLANEROW_NEON
|
||||
#define HAS_SOBELXYROW_NEON
|
||||
#define HAS_SOBELXROW_NEON
|
||||
#define HAS_SOBELYROW_NEON
|
||||
#endif
|
||||
|
||||
// The following are available on Neon platforms:
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 1041
|
||||
#define LIBYUV_VERSION 1044
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||
|
||||
@ -3141,27 +3141,27 @@ void ARGBSubtractRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
|
||||
void SobelRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
|
||||
uint8* dst_argb, int width) {
|
||||
asm volatile (
|
||||
"vmov.u8 d3, #255 \n" // alpha
|
||||
"movi v3.8b, #255 \n" // alpha
|
||||
// 8 pixel loop.
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {d0}, [%0]! \n" // load 8 sobelx.
|
||||
"ld1 {v0.8b}, [%0], #8 \n" // load 8 sobelx.
|
||||
MEMACCESS(1)
|
||||
"vld1.8 {d1}, [%1]! \n" // load 8 sobely.
|
||||
"ld1 {v1.8b}, [%1], #8 \n" // load 8 sobely.
|
||||
"subs %3, %3, #8 \n" // 8 processed per loop.
|
||||
"vqadd.u8 d0, d0, d1 \n" // add
|
||||
"vmov.u8 d1, d0 \n"
|
||||
"vmov.u8 d2, d0 \n"
|
||||
"uqadd v0.8b, v0.8b, v1.8b \n" // add
|
||||
"mov v1.8b, v0.8b \n"
|
||||
"mov v2.8b, v0.8b \n"
|
||||
MEMACCESS(2)
|
||||
"vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels.
|
||||
"st4 {v0.8b-v3.8b}, [%2], #32 \n" // store 8 ARGB pixels.
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_sobelx), // %0
|
||||
"+r"(src_sobely), // %1
|
||||
"+r"(dst_argb), // %2
|
||||
"+r"(width) // %3
|
||||
:
|
||||
: "cc", "memory", "q0", "q1"
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3"
|
||||
);
|
||||
}
|
||||
#endif // HAS_SOBELROW_NEON
|
||||
@ -3175,20 +3175,20 @@ void SobelToPlaneRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {q0}, [%0]! \n" // load 16 sobelx.
|
||||
"ld1 {v0.16b}, [%0], #16 \n" // load 16 sobelx.
|
||||
MEMACCESS(1)
|
||||
"vld1.8 {q1}, [%1]! \n" // load 16 sobely.
|
||||
"ld1 {v1.16b}, [%1], #16 \n" // load 16 sobely.
|
||||
"subs %3, %3, #16 \n" // 16 processed per loop.
|
||||
"vqadd.u8 q0, q0, q1 \n" // add
|
||||
"uqadd v0.16b, v0.16b, v1.16b \n" // add
|
||||
MEMACCESS(2)
|
||||
"vst1.8 {q0}, [%2]! \n" // store 16 pixels.
|
||||
"st1 {v0.16b}, [%2], #16 \n" // store 16 pixels.
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_sobelx), // %0
|
||||
"+r"(src_sobely), // %1
|
||||
"+r"(dst_y), // %2
|
||||
"+r"(width) // %3
|
||||
:
|
||||
: "cc", "memory", "q0", "q1"
|
||||
: "cc", "memory", "v0", "v1"
|
||||
);
|
||||
}
|
||||
#endif // HAS_SOBELTOPLANEROW_NEON
|
||||
@ -3202,25 +3202,25 @@ void SobelToPlaneRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
|
||||
void SobelXYRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
|
||||
uint8* dst_argb, int width) {
|
||||
asm volatile (
|
||||
"vmov.u8 d3, #255 \n" // alpha
|
||||
"movi v3.8b, #255 \n" // alpha
|
||||
// 8 pixel loop.
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {d2}, [%0]! \n" // load 8 sobelx.
|
||||
"ld1 {v2.8b}, [%0], #8 \n" // load 8 sobelx.
|
||||
MEMACCESS(1)
|
||||
"vld1.8 {d0}, [%1]! \n" // load 8 sobely.
|
||||
"ld1 {v0.8b}, [%1], #8 \n" // load 8 sobely.
|
||||
"subs %3, %3, #8 \n" // 8 processed per loop.
|
||||
"vqadd.u8 d1, d0, d2 \n" // add
|
||||
"uqadd v1.8b, v0.8b, v2.8b \n" // add
|
||||
MEMACCESS(2)
|
||||
"vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels.
|
||||
"st4 {v0.8b-v3.8b}, [%2], #32 \n" // store 8 ARGB pixels.
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_sobelx), // %0
|
||||
"+r"(src_sobely), // %1
|
||||
"+r"(dst_argb), // %2
|
||||
"+r"(width) // %3
|
||||
:
|
||||
: "cc", "memory", "q0", "q1"
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3"
|
||||
);
|
||||
}
|
||||
#endif // HAS_SOBELXYROW_NEON
|
||||
@ -3236,28 +3236,28 @@ void SobelXRow_NEON(const uint8* src_y0, const uint8* src_y1,
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {d0}, [%0],%5 \n" // top
|
||||
"ld1 {v0.8b}, [%0],%5 \n" // top
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {d1}, [%0],%6 \n"
|
||||
"vsubl.u8 q0, d0, d1 \n"
|
||||
"ld1 {v1.8b}, [%0],%6 \n"
|
||||
"usubl v0.8h, v0.8b, v1.8b \n"
|
||||
MEMACCESS(1)
|
||||
"vld1.8 {d2}, [%1],%5 \n" // center * 2
|
||||
"ld1 {v2.8b}, [%1],%5 \n" // center * 2
|
||||
MEMACCESS(1)
|
||||
"vld1.8 {d3}, [%1],%6 \n"
|
||||
"vsubl.u8 q1, d2, d3 \n"
|
||||
"vadd.s16 q0, q0, q1 \n"
|
||||
"vadd.s16 q0, q0, q1 \n"
|
||||
"ld1 {v3.8b}, [%1],%6 \n"
|
||||
"usubl v1.8h, v2.8b, v3.8b \n"
|
||||
"add v0.8h, v0.8h, v1.8h \n"
|
||||
"add v0.8h, v0.8h, v1.8h \n"
|
||||
MEMACCESS(2)
|
||||
"vld1.8 {d2}, [%2],%5 \n" // bottom
|
||||
"ld1 {v2.8b}, [%2],%5 \n" // bottom
|
||||
MEMACCESS(2)
|
||||
"vld1.8 {d3}, [%2],%6 \n"
|
||||
"ld1 {v3.8b}, [%2],%6 \n"
|
||||
"subs %4, %4, #8 \n" // 8 pixels
|
||||
"vsubl.u8 q1, d2, d3 \n"
|
||||
"vadd.s16 q0, q0, q1 \n"
|
||||
"vabs.s16 q0, q0 \n"
|
||||
"vqmovn.u16 d0, q0 \n"
|
||||
"usubl v1.8h, v2.8b, v3.8b \n"
|
||||
"add v0.8h, v0.8h, v1.8h \n"
|
||||
"abs v0.8h, v0.8h \n"
|
||||
"uqxtn v0.8b, v0.8h \n"
|
||||
MEMACCESS(3)
|
||||
"vst1.8 {d0}, [%3]! \n" // store 8 sobelx
|
||||
"st1 {v0.8b}, [%3], #8 \n" // store 8 sobelx
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_y0), // %0
|
||||
"+r"(src_y1), // %1
|
||||
@ -3266,7 +3266,7 @@ void SobelXRow_NEON(const uint8* src_y0, const uint8* src_y1,
|
||||
"+r"(width) // %4
|
||||
: "r"(2), // %5
|
||||
"r"(6) // %6
|
||||
: "cc", "memory", "q0", "q1" // Clobber List
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
|
||||
);
|
||||
}
|
||||
#endif // HAS_SOBELXROW_NEON
|
||||
@ -3282,28 +3282,28 @@ void SobelYRow_NEON(const uint8* src_y0, const uint8* src_y1,
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {d0}, [%0],%4 \n" // left
|
||||
"ld1 {v0.8b}, [%0],%4 \n" // left
|
||||
MEMACCESS(1)
|
||||
"vld1.8 {d1}, [%1],%4 \n"
|
||||
"vsubl.u8 q0, d0, d1 \n"
|
||||
"ld1 {v1.8b}, [%1],%4 \n"
|
||||
"usubl v0.8h, v0.8b, v1.8b \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {d2}, [%0],%4 \n" // center * 2
|
||||
"ld1 {v2.8b}, [%0],%4 \n" // center * 2
|
||||
MEMACCESS(1)
|
||||
"vld1.8 {d3}, [%1],%4 \n"
|
||||
"vsubl.u8 q1, d2, d3 \n"
|
||||
"vadd.s16 q0, q0, q1 \n"
|
||||
"vadd.s16 q0, q0, q1 \n"
|
||||
"ld1 {v3.8b}, [%1],%4 \n"
|
||||
"usubl v1.8h, v2.8b, v3.8b \n"
|
||||
"add v0.8h, v0.8h, v1.8h \n"
|
||||
"add v0.8h, v0.8h, v1.8h \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {d2}, [%0],%5 \n" // right
|
||||
"ld1 {v2.8b}, [%0],%5 \n" // right
|
||||
MEMACCESS(1)
|
||||
"vld1.8 {d3}, [%1],%5 \n"
|
||||
"ld1 {v3.8b}, [%1],%5 \n"
|
||||
"subs %3, %3, #8 \n" // 8 pixels
|
||||
"vsubl.u8 q1, d2, d3 \n"
|
||||
"vadd.s16 q0, q0, q1 \n"
|
||||
"vabs.s16 q0, q0 \n"
|
||||
"vqmovn.u16 d0, q0 \n"
|
||||
"usubl v1.8h, v2.8b, v3.8b \n"
|
||||
"add v0.8h, v0.8h, v1.8h \n"
|
||||
"abs v0.8h, v0.8h \n"
|
||||
"uqxtn v0.8b, v0.8h \n"
|
||||
MEMACCESS(2)
|
||||
"vst1.8 {d0}, [%2]! \n" // store 8 sobely
|
||||
"st1 {v0.8b}, [%2], #8 \n" // store 8 sobely
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_y0), // %0
|
||||
"+r"(src_y1), // %1
|
||||
@ -3311,7 +3311,7 @@ void SobelYRow_NEON(const uint8* src_y0, const uint8* src_y1,
|
||||
"+r"(width) // %3
|
||||
: "r"(1), // %4
|
||||
"r"(6) // %5
|
||||
: "cc", "memory", "q0", "q1" // Clobber List
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
|
||||
);
|
||||
}
|
||||
#endif // HAS_SOBELYROW_NEON
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user