mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 01:06:46 +08:00
I411ToARGB_NEON and I444ToARGB_NEON port.
BUG=none TEST=none Review URL: https://webrtc-codereview.appspot.com/964009 git-svn-id: http://libyuv.googlecode.com/svn/trunk@467 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
30859f75f2
commit
b883ce6e2c
@ -184,6 +184,8 @@ extern "C" {
|
||||
#define HAS_ARGBTOYROW_NEON
|
||||
#define HAS_MERGEUV_NEON
|
||||
#define HAS_YTOARGBROW_NEON
|
||||
#define HAS_I444TOARGBROW_NEON
|
||||
#define HAS_I411TOARGBROW_NEON
|
||||
#endif
|
||||
|
||||
// The following are available on Mips platforms
|
||||
@ -231,11 +233,21 @@ typedef uint32 uvec32[4];
|
||||
#define OMITFP __attribute__((optimize("omit-frame-pointer")))
|
||||
#endif
|
||||
|
||||
void I444ToARGBRow_NEON(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
void I422ToARGBRow_NEON(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
void I411ToARGBRow_NEON(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
void I422ToBGRARow_NEON(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
@ -757,11 +769,21 @@ void RGBAToUVRow_Any_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width);
|
||||
void ARGBToYRow_Any_NEON(const uint8* src_argb, uint8* dst_y, int pix);
|
||||
|
||||
void I444ToARGBRow_Any_NEON(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
void I422ToARGBRow_Any_NEON(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
void I411ToARGBRow_Any_NEON(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
void I422ToBGRARow_Any_NEON(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
|
||||
@ -80,6 +80,13 @@ int I444ToARGB(const uint8* src_y, int src_stride_y,
|
||||
}
|
||||
}
|
||||
}
|
||||
#elif defined(HAS_I444TOARGBROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
|
||||
I444ToARGBRow = I444ToARGBRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I444ToARGBRow = I444ToARGBRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (int y = 0; y < height; ++y) {
|
||||
@ -185,6 +192,13 @@ int I411ToARGB(const uint8* src_y, int src_stride_y,
|
||||
}
|
||||
}
|
||||
}
|
||||
#elif defined(HAS_I411TOARGBROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
|
||||
I411ToARGBRow = I411ToARGBRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I411ToARGBRow = I411ToARGBRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (int y = 0; y < height; ++y) {
|
||||
|
||||
@ -61,7 +61,9 @@ YANY(I422ToYUY2Row_Any_SSE2, I422ToYUY2Row_SSE2, I422ToYUY2Row_C, 1, 2, 15)
|
||||
YANY(I422ToUYVYRow_Any_SSE2, I422ToUYVYRow_SSE2, I422ToUYVYRow_C, 1, 2, 15)
|
||||
#endif // HAS_I422TOARGBROW_SSSE3
|
||||
#ifdef HAS_I422TOARGBROW_NEON
|
||||
YANY(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, I444ToARGBRow_C, 0, 4, 7)
|
||||
YANY(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, I422ToARGBRow_C, 1, 4, 7)
|
||||
YANY(I411ToARGBRow_Any_NEON, I411ToARGBRow_NEON, I411ToARGBRow_C, 2, 4, 7)
|
||||
YANY(I422ToBGRARow_Any_NEON, I422ToBGRARow_NEON, I422ToBGRARow_C, 1, 4, 7)
|
||||
YANY(I422ToABGRRow_Any_NEON, I422ToABGRRow_NEON, I422ToABGRRow_C, 1, 4, 7)
|
||||
YANY(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, I422ToRGBARow_C, 1, 4, 7)
|
||||
|
||||
@ -467,6 +467,29 @@ static __inline void YuvPixel2(uint8 y, uint8 u, uint8 v,
|
||||
*r = Clip(static_cast<int32>((u * UR + v * VR) - (BR) + y1) >> 6);
|
||||
}
|
||||
|
||||
#if defined(__ARM_NEON__)
|
||||
// C mimic assembly.
|
||||
// TODO(fbarchard): Remove subsampling from Neon.
|
||||
void I444ToARGBRow_C(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) {
|
||||
for (int x = 0; x < width - 1; x += 2) {
|
||||
uint8 u = (u_buf[0] + u_buf[1] + 1) >> 1;
|
||||
uint8 v = (v_buf[0] + v_buf[1] + 1) >> 1;
|
||||
YuvPixel(y_buf[0], u, v, rgb_buf + 0, 24, 16, 8, 0);
|
||||
YuvPixel(y_buf[1], u, v, rgb_buf + 4, 24, 16, 8, 0);
|
||||
y_buf += 2;
|
||||
u_buf += 2;
|
||||
v_buf += 2;
|
||||
rgb_buf += 8; // Advance 2 pixels.
|
||||
}
|
||||
if (width & 1) {
|
||||
YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 16, 8, 0);
|
||||
}
|
||||
}
|
||||
#else
|
||||
void I444ToARGBRow_C(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
@ -480,7 +503,7 @@ void I444ToARGBRow_C(const uint8* y_buf,
|
||||
rgb_buf += 4; // Advance 1 pixel.
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
// Also used for 420
|
||||
void I422ToARGBRow_C(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
|
||||
@ -24,6 +24,22 @@ extern "C" {
|
||||
"vld1.u32 {d2[0]}, [%1]! \n" \
|
||||
"vld1.u32 {d2[1]}, [%2]! \n"
|
||||
|
||||
// Read 8 Y, 2 U and 2 V from 422
|
||||
#define READYUV411 \
|
||||
"vld1.u8 {d0}, [%0]! \n" \
|
||||
"vld1.u16 {d2[0]}, [%1]! \n" \
|
||||
"vld1.u16 {d2[1]}, [%2]! \n" \
|
||||
"vmov.u8 d3, d2 \n" \
|
||||
"vzip.u8 d2, d3 \n"
|
||||
|
||||
// Read 8 Y, 8 U and 8 V from 444
|
||||
#define READYUV444 \
|
||||
"vld1.u8 {d0}, [%0]! \n" \
|
||||
"vld1.u8 {d2}, [%1]! \n" \
|
||||
"vld1.u8 {d3}, [%2]! \n" \
|
||||
"vpaddl.u8 q1, q1 \n" \
|
||||
"vrshrn.u16 d2, q1, #1 \n"
|
||||
|
||||
// Read 8 Y, and set 4 U and 4 V to 128
|
||||
#define READYUV400 \
|
||||
"vld1.u8 {d0}, [%0]! \n" \
|
||||
@ -79,6 +95,39 @@ static const vec8 kUVToG = { -25, -25, -25, -25, -52, -52, -52, -52,
|
||||
0, 0, 0, 0, 0, 0, 0, 0 };
|
||||
#endif
|
||||
|
||||
#ifdef HAS_I444TOARGBROW_NEON
|
||||
void I444ToARGBRow_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"vld1.u8 {d24}, [%5] \n"
|
||||
"vld1.u8 {d25}, [%6] \n"
|
||||
"vmov.u8 d26, #128 \n"
|
||||
"vmov.u16 q14, #74 \n"
|
||||
"vmov.u16 q15, #16 \n"
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
READYUV444
|
||||
YUV422TORGB
|
||||
"subs %4, %4, #8 \n"
|
||||
"vmov.u8 d23, #255 \n"
|
||||
"vst4.8 {d20, d21, d22, d23}, [%3]! \n"
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_y), // %0
|
||||
"+r"(src_u), // %1
|
||||
"+r"(src_v), // %2
|
||||
"+r"(dst_argb), // %3
|
||||
"+r"(width) // %4
|
||||
: "r"(&kUVToRB), // %5
|
||||
"r"(&kUVToG) // %6
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3",
|
||||
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
|
||||
);
|
||||
}
|
||||
#endif // HAS_I444TOARGBROW_NEON
|
||||
|
||||
#ifdef HAS_I422TOARGBROW_NEON
|
||||
void I422ToARGBRow_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
@ -112,6 +161,39 @@ void I422ToARGBRow_NEON(const uint8* src_y,
|
||||
}
|
||||
#endif // HAS_I422TOARGBROW_NEON
|
||||
|
||||
#ifdef HAS_I411TOARGBROW_NEON
|
||||
void I411ToARGBRow_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"vld1.u8 {d24}, [%5] \n"
|
||||
"vld1.u8 {d25}, [%6] \n"
|
||||
"vmov.u8 d26, #128 \n"
|
||||
"vmov.u16 q14, #74 \n"
|
||||
"vmov.u16 q15, #16 \n"
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
READYUV411
|
||||
YUV422TORGB
|
||||
"subs %4, %4, #8 \n"
|
||||
"vmov.u8 d23, #255 \n"
|
||||
"vst4.8 {d20, d21, d22, d23}, [%3]! \n"
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_y), // %0
|
||||
"+r"(src_u), // %1
|
||||
"+r"(src_v), // %2
|
||||
"+r"(dst_argb), // %3
|
||||
"+r"(width) // %4
|
||||
: "r"(&kUVToRB), // %5
|
||||
"r"(&kUVToG) // %6
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3",
|
||||
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
|
||||
);
|
||||
}
|
||||
#endif // HAS_I411TOARGBROW_NEON
|
||||
|
||||
#ifdef HAS_I422TOBGRAROW_NEON
|
||||
void I422ToBGRARow_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user