mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 01:06:46 +08:00
On Neon remove aligned SplitUVRow
BUG=none TEST=none Review URL: https://webrtc-codereview.appspot.com/930020 git-svn-id: http://libyuv.googlecode.com/svn/trunk@493 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
cb5262dbc5
commit
4a86a836fc
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: http://code.google.com/p/libyuv/
|
URL: http://code.google.com/p/libyuv/
|
||||||
Version: 492
|
Version: 493
|
||||||
License: BSD
|
License: BSD
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
|
|
||||||
|
|||||||
@ -524,8 +524,6 @@ void SplitUVRow_Unaligned_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
|
|||||||
int pix);
|
int pix);
|
||||||
void SplitUVRow_Unaligned_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
|
void SplitUVRow_Unaligned_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
|
||||||
int pix);
|
int pix);
|
||||||
void SplitUVRow_Unaligned_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
|
|
||||||
int pix);
|
|
||||||
void SplitUVRow_Unaligned_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u,
|
void SplitUVRow_Unaligned_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u,
|
||||||
uint8* dst_v, int pix);
|
uint8* dst_v, int pix);
|
||||||
void SplitUVRow_Any_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
|
void SplitUVRow_Any_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
|
||||||
@ -549,8 +547,6 @@ void MergeUVRow_Unaligned_SSE2(const uint8* src_u, const uint8* src_v,
|
|||||||
uint8* dst_uv, int width);
|
uint8* dst_uv, int width);
|
||||||
void MergeUVRow_Unaligned_AVX2(const uint8* src_u, const uint8* src_v,
|
void MergeUVRow_Unaligned_AVX2(const uint8* src_u, const uint8* src_v,
|
||||||
uint8* dst_uv, int width);
|
uint8* dst_uv, int width);
|
||||||
void MergeUVRow_Unaligned_NEON(const uint8* src_u, const uint8* src_v,
|
|
||||||
uint8* dst_uv, int width);
|
|
||||||
void MergeUVRow_Any_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
|
void MergeUVRow_Any_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
|
||||||
int width);
|
int width);
|
||||||
void MergeUVRow_Any_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
|
void MergeUVRow_Any_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
|
||||||
|
|||||||
@ -11,6 +11,6 @@
|
|||||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||||
#define INCLUDE_LIBYUV_VERSION_H_
|
#define INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
#define LIBYUV_VERSION 492
|
#define LIBYUV_VERSION 493
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||||
|
|||||||
@ -397,14 +397,9 @@ static int X420ToI420(const uint8* src_y,
|
|||||||
if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) {
|
if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) {
|
||||||
SplitUVRow = SplitUVRow_Any_NEON;
|
SplitUVRow = SplitUVRow_Any_NEON;
|
||||||
if (IS_ALIGNED(halfwidth, 16)) {
|
if (IS_ALIGNED(halfwidth, 16)) {
|
||||||
SplitUVRow = SplitUVRow_Unaligned_NEON;
|
|
||||||
if (IS_ALIGNED(src_uv, 16) && IS_ALIGNED(src_stride_uv, 16) &&
|
|
||||||
IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) &&
|
|
||||||
IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) {
|
|
||||||
SplitUVRow = SplitUVRow_NEON;
|
SplitUVRow = SplitUVRow_NEON;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_SPLITUVROW_MIPS_DSPR2)
|
#if defined(HAS_SPLITUVROW_MIPS_DSPR2)
|
||||||
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && halfwidth >= 16) {
|
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && halfwidth >= 16) {
|
||||||
|
|||||||
@ -551,14 +551,9 @@ int I420ToNV12(const uint8* src_y, int src_stride_y,
|
|||||||
if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) {
|
if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) {
|
||||||
MergeUVRow_ = MergeUVRow_Any_NEON;
|
MergeUVRow_ = MergeUVRow_Any_NEON;
|
||||||
if (IS_ALIGNED(halfwidth, 16)) {
|
if (IS_ALIGNED(halfwidth, 16)) {
|
||||||
MergeUVRow_ = MergeUVRow_Unaligned_NEON;
|
|
||||||
if (IS_ALIGNED(src_u, 16) && IS_ALIGNED(src_stride_u, 16) &&
|
|
||||||
IS_ALIGNED(src_v, 16) && IS_ALIGNED(src_stride_v, 16) &&
|
|
||||||
IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) {
|
|
||||||
MergeUVRow_ = MergeUVRow_NEON;
|
MergeUVRow_ = MergeUVRow_NEON;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
|
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
|
||||||
|
|||||||
@ -276,12 +276,9 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
|
|||||||
if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) {
|
if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) {
|
||||||
MergeUVRow_ = MergeUVRow_Any_NEON;
|
MergeUVRow_ = MergeUVRow_Any_NEON;
|
||||||
if (IS_ALIGNED(halfwidth, 16)) {
|
if (IS_ALIGNED(halfwidth, 16)) {
|
||||||
MergeUVRow_ = MergeUVRow_Unaligned_NEON;
|
|
||||||
if (IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) {
|
|
||||||
MergeUVRow_ = MergeUVRow_NEON;
|
MergeUVRow_ = MergeUVRow_NEON;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
SIMD_ALIGNED(uint8 row_u[kMaxStride / 2]);
|
SIMD_ALIGNED(uint8 row_u[kMaxStride / 2]);
|
||||||
@ -385,12 +382,9 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
|
|||||||
if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) {
|
if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) {
|
||||||
MergeUVRow_ = MergeUVRow_Any_NEON;
|
MergeUVRow_ = MergeUVRow_Any_NEON;
|
||||||
if (IS_ALIGNED(halfwidth, 16)) {
|
if (IS_ALIGNED(halfwidth, 16)) {
|
||||||
MergeUVRow_ = MergeUVRow_Unaligned_NEON;
|
|
||||||
if (IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) {
|
|
||||||
MergeUVRow_ = MergeUVRow_NEON;
|
MergeUVRow_ = MergeUVRow_NEON;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
SIMD_ALIGNED(uint8 row_u[kMaxStride / 2]);
|
SIMD_ALIGNED(uint8 row_u[kMaxStride / 2]);
|
||||||
|
|||||||
@ -312,7 +312,7 @@ SplitUVRowANY(SplitUVRow_Any_SSE2, SplitUVRow_Unaligned_SSE2, SplitUVRow_C, 15)
|
|||||||
SplitUVRowANY(SplitUVRow_Any_AVX2, SplitUVRow_Unaligned_AVX2, SplitUVRow_C, 31)
|
SplitUVRowANY(SplitUVRow_Any_AVX2, SplitUVRow_Unaligned_AVX2, SplitUVRow_C, 31)
|
||||||
#endif
|
#endif
|
||||||
#ifdef HAS_SPLITUVROW_NEON
|
#ifdef HAS_SPLITUVROW_NEON
|
||||||
SplitUVRowANY(SplitUVRow_Any_NEON, SplitUVRow_Unaligned_NEON, SplitUVRow_C, 15)
|
SplitUVRowANY(SplitUVRow_Any_NEON, SplitUVRow_NEON, SplitUVRow_C, 15)
|
||||||
#endif
|
#endif
|
||||||
#ifdef HAS_SPLITUVROW_MIPS_DSPR2
|
#ifdef HAS_SPLITUVROW_MIPS_DSPR2
|
||||||
SplitUVRowANY(SplitUVRow_Any_MIPS_DSPR2, SplitUVRow_Unaligned_MIPS_DSPR2,
|
SplitUVRowANY(SplitUVRow_Any_MIPS_DSPR2, SplitUVRow_Unaligned_MIPS_DSPR2,
|
||||||
@ -338,7 +338,7 @@ MergeUVRow_ANY(MergeUVRow_Any_SSE2, MergeUVRow_Unaligned_SSE2, MergeUVRow_C, 15)
|
|||||||
MergeUVRow_ANY(MergeUVRow_Any_AVX2, MergeUVRow_Unaligned_AVX2, MergeUVRow_C, 31)
|
MergeUVRow_ANY(MergeUVRow_Any_AVX2, MergeUVRow_Unaligned_AVX2, MergeUVRow_C, 31)
|
||||||
#endif
|
#endif
|
||||||
#ifdef HAS_MERGEUVROW_NEON
|
#ifdef HAS_MERGEUVROW_NEON
|
||||||
MergeUVRow_ANY(MergeUVRow_Any_NEON, MergeUVRow_Unaligned_NEON, MergeUVRow_C, 15)
|
MergeUVRow_ANY(MergeUVRow_Any_NEON, MergeUVRow_NEON, MergeUVRow_C, 15)
|
||||||
#endif
|
#endif
|
||||||
#undef MergeUVRow_ANY
|
#undef MergeUVRow_ANY
|
||||||
|
|
||||||
|
|||||||
@ -748,33 +748,11 @@ void UYVYToARGBRow_NEON(const uint8* src_uyvy,
|
|||||||
#endif // HAS_UYVYTOARGBROW_NEON
|
#endif // HAS_UYVYTOARGBROW_NEON
|
||||||
|
|
||||||
#ifdef HAS_SPLITUVROW_NEON
|
#ifdef HAS_SPLITUVROW_NEON
|
||||||
// Reads 16 pairs of UV and write even values to dst_u and odd to dst_v
|
// Reads 16 pairs of UV and write even values to dst_u and odd to dst_v.
|
||||||
// Alignment requirement: 16 bytes for pointers, and multiple of 16 pixels.
|
|
||||||
void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
|
void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile (
|
asm volatile (
|
||||||
".p2align 2 \n"
|
".p2align 2 \n"
|
||||||
"1: \n"
|
|
||||||
"vld2.u8 {q0, q1}, [%0:128]! \n" // load 16 pairs of UV
|
|
||||||
"subs %3, %3, #16 \n" // 16 processed per loop
|
|
||||||
"vst1.u8 {q0}, [%1:128]! \n" // store U
|
|
||||||
"vst1.u8 {q1}, [%2:128]! \n" // store V
|
|
||||||
"bgt 1b \n"
|
|
||||||
: "+r"(src_uv), // %0
|
|
||||||
"+r"(dst_u), // %1
|
|
||||||
"+r"(dst_v), // %2
|
|
||||||
"+r"(width) // %3 // Output registers
|
|
||||||
: // Input registers
|
|
||||||
: "memory", "cc", "q0", "q1" // Clobber List
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Reads 16 pairs of UV and write even values to dst_u and odd to dst_v
|
|
||||||
// Alignment requirement: Multiple of 16 pixels, pointers unaligned.
|
|
||||||
void SplitUVRow_Unaligned_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
|
|
||||||
int width) {
|
|
||||||
asm volatile (
|
|
||||||
".p2align 2 \n"
|
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"vld2.u8 {q0, q1}, [%0]! \n" // load 16 pairs of UV
|
"vld2.u8 {q0, q1}, [%0]! \n" // load 16 pairs of UV
|
||||||
"subs %3, %3, #16 \n" // 16 processed per loop
|
"subs %3, %3, #16 \n" // 16 processed per loop
|
||||||
@ -793,32 +771,10 @@ void SplitUVRow_Unaligned_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
|
|||||||
|
|
||||||
#ifdef HAS_MERGEUVROW_NEON
|
#ifdef HAS_MERGEUVROW_NEON
|
||||||
// Reads 16 U's and V's and writes out 16 pairs of UV.
|
// Reads 16 U's and V's and writes out 16 pairs of UV.
|
||||||
// Alignment requirement: 16 bytes for pointers, and multiple of 16 pixels.
|
|
||||||
void MergeUVRow_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
|
void MergeUVRow_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile (
|
asm volatile (
|
||||||
".p2align 2 \n"
|
".p2align 2 \n"
|
||||||
"1: \n"
|
|
||||||
"vld1.u8 {q0}, [%0:128]! \n" // load U
|
|
||||||
"vld1.u8 {q1}, [%1:128]! \n" // load V
|
|
||||||
"subs %3, %3, #16 \n" // 16 processed per loop
|
|
||||||
"vst2.u8 {q0, q1}, [%2:128]! \n" // store 16 pairs of UV
|
|
||||||
"bgt 1b \n"
|
|
||||||
:
|
|
||||||
"+r"(src_u), // %0
|
|
||||||
"+r"(src_v), // %1
|
|
||||||
"+r"(dst_uv), // %2
|
|
||||||
"+r"(width) // %3 // Output registers
|
|
||||||
: // Input registers
|
|
||||||
: "memory", "cc", "q0", "q1" // Clobber List
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Reads 16 U's and V's and writes out 16 pairs of UV.
|
|
||||||
void MergeUVRow_Unaligned_NEON(const uint8* src_u, const uint8* src_v,
|
|
||||||
uint8* dst_uv, int width) {
|
|
||||||
asm volatile (
|
|
||||||
".p2align 2 \n"
|
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"vld1.u8 {q0}, [%0]! \n" // load U
|
"vld1.u8 {q0}, [%0]! \n" // load U
|
||||||
"vld1.u8 {q1}, [%1]! \n" // load V
|
"vld1.u8 {q1}, [%1]! \n" // load V
|
||||||
|
|||||||
@ -1017,7 +1017,8 @@ static void ScaleARGBSimple(int src_width, int src_height,
|
|||||||
int x = (dx >= 65536) ? ((dx >> 1) - 32768) : (dx >> 1);
|
int x = (dx >= 65536) ? ((dx >> 1) - 32768) : (dx >> 1);
|
||||||
int y = (dy >= 65536) ? ((dy >> 1) - 32768) : (dy >> 1);
|
int y = (dy >= 65536) ? ((dy >> 1) - 32768) : (dy >> 1);
|
||||||
for (int i = 0; i < dst_height; ++i) {
|
for (int i = 0; i < dst_height; ++i) {
|
||||||
ScaleARGBCols(dst_argb, src_argb + (y >> 16) * src_stride, dst_width, x, dx);
|
ScaleARGBCols(dst_argb, src_argb + (y >> 16) * src_stride, dst_width, x,
|
||||||
|
dx);
|
||||||
dst_argb += dst_stride;
|
dst_argb += dst_stride;
|
||||||
y += dy;
|
y += dy;
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user