mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 17:26:49 +08:00
YUY2ToARGB_NEON in one step
BUG=none TEST=none Review URL: https://webrtc-codereview.appspot.com/964010 git-svn-id: http://libyuv.googlecode.com/svn/trunk@468 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
b883ce6e2c
commit
793e5a06ff
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 466
|
||||
Version: 468
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -92,6 +92,8 @@ extern "C" {
|
||||
#define HAS_I422TOARGB4444ROW_SSSE3
|
||||
#define HAS_I422TOARGB1555ROW_SSSE3
|
||||
#define HAS_I422TORGB565ROW_SSSE3
|
||||
#define HAS_YUY2TOARGBROW_SSSE3
|
||||
#define HAS_UYVYTOARGBROW_SSSE3
|
||||
|
||||
// Effects
|
||||
#define HAS_ARGBAFFINEROW_SSE2
|
||||
@ -163,6 +165,8 @@ extern "C" {
|
||||
#define HAS_MIRRORROWUV_NEON
|
||||
#define HAS_NV12TOARGBROW_NEON
|
||||
#define HAS_NV21TOARGBROW_NEON
|
||||
#define HAS_YUY2TOARGBROW_NEON
|
||||
#define HAS_UYVYTOARGBROW_NEON
|
||||
#define HAS_NV12TORGB565ROW_NEON
|
||||
#define HAS_NV21TORGB565ROW_NEON
|
||||
#define HAS_RAWTOARGBROW_NEON
|
||||
@ -304,6 +308,12 @@ void NV21ToRGB565Row_NEON(const uint8* y_buf,
|
||||
const uint8* uv_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
void YUY2ToARGBRow_NEON(const uint8* yuy2_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
void UYVYToARGBRow_NEON(const uint8* uyvy_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
|
||||
void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
|
||||
void BGRAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
|
||||
@ -501,6 +511,12 @@ void NV21ToARGBRow_C(const uint8* y_buf,
|
||||
const uint8* vu_buf,
|
||||
uint8* argb_buf,
|
||||
int width);
|
||||
void YUY2ToARGBRow_C(const uint8* yuy2_buf,
|
||||
uint8* argb_buf,
|
||||
int width);
|
||||
void UYVYToARGBRow_C(const uint8* uyvy_buf,
|
||||
uint8* argb_buf,
|
||||
int width);
|
||||
void I422ToBGRARow_C(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
@ -576,6 +592,12 @@ void NV21ToRGB565Row_SSSE3(const uint8* y_buf,
|
||||
const uint8* vu_buf,
|
||||
uint8* argb_buf,
|
||||
int width);
|
||||
void YUY2ToARGBRow_SSSE3(const uint8* yuy2_buf,
|
||||
uint8* argb_buf,
|
||||
int width);
|
||||
void UYVYToARGBRow_SSSE3(const uint8* uyvy_buf,
|
||||
uint8* argb_buf,
|
||||
int width);
|
||||
void I422ToBGRARow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
@ -641,6 +663,12 @@ void NV21ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
|
||||
const uint8* vu_buf,
|
||||
uint8* argb_buf,
|
||||
int width);
|
||||
void YUY2ToARGBRow_Unaligned_SSSE3(const uint8* yuy2_buf,
|
||||
uint8* argb_buf,
|
||||
int width);
|
||||
void UYVYToARGBRow_Unaligned_SSSE3(const uint8* uyvy_buf,
|
||||
uint8* argb_buf,
|
||||
int width);
|
||||
void I422ToBGRARow_Unaligned_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
@ -687,6 +715,12 @@ void NV21ToRGB565Row_Any_SSSE3(const uint8* y_buf,
|
||||
const uint8* vu_buf,
|
||||
uint8* argb_buf,
|
||||
int width);
|
||||
void YUY2ToARGBRow_Any_SSSE3(const uint8* yuy2_buf,
|
||||
uint8* argb_buf,
|
||||
int width);
|
||||
void UYVYToARGBRow_Any_SSSE3(const uint8* uyvy_buf,
|
||||
uint8* argb_buf,
|
||||
int width);
|
||||
void I422ToBGRARow_Any_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
@ -840,6 +874,12 @@ void NV21ToRGB565Row_Any_NEON(const uint8* y_buf,
|
||||
const uint8* uv_buf,
|
||||
uint8* argb_buf,
|
||||
int width);
|
||||
void YUY2ToARGBRow_Any_NEON(const uint8* yuy2_buf,
|
||||
uint8* argb_buf,
|
||||
int width);
|
||||
void UYVYToARGBRow_Any_NEON(const uint8* uyvy_buf,
|
||||
uint8* argb_buf,
|
||||
int width);
|
||||
void I422ToARGBRow_MIPS_DSPR2(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 466
|
||||
#define LIBYUV_VERSION 468
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||
|
||||
@ -740,79 +740,28 @@ int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2,
|
||||
src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
|
||||
src_stride_yuy2 = -src_stride_yuy2;
|
||||
}
|
||||
void (*YUY2ToUV422Row)(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v,
|
||||
int pix) = YUY2ToUV422Row_C;
|
||||
void (*YUY2ToYRow)(const uint8* src_yuy2,
|
||||
uint8* dst_y, int pix) = YUY2ToYRow_C;
|
||||
#if defined(HAS_YUY2TOYROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
if (width > 16) {
|
||||
YUY2ToUV422Row = YUY2ToUV422Row_Any_SSE2;
|
||||
YUY2ToYRow = YUY2ToYRow_Any_SSE2;
|
||||
}
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
YUY2ToUV422Row = YUY2ToUV422Row_Unaligned_SSE2;
|
||||
YUY2ToYRow = YUY2ToYRow_Unaligned_SSE2;
|
||||
if (IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16)) {
|
||||
YUY2ToUV422Row = YUY2ToUV422Row_SSE2;
|
||||
YUY2ToYRow = YUY2ToYRow_SSE2;
|
||||
}
|
||||
}
|
||||
}
|
||||
#elif defined(HAS_YUY2TOYROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
if (width > 8) {
|
||||
YUY2ToYRow = YUY2ToYRow_Any_NEON;
|
||||
if (width > 16) {
|
||||
YUY2ToUV422Row = YUY2ToUV422Row_Any_NEON;
|
||||
}
|
||||
}
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
YUY2ToYRow = YUY2ToYRow_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
YUY2ToUV422Row = YUY2ToUV422Row_NEON;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void (*I422ToARGBRow)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) = I422ToARGBRow_C;
|
||||
#if defined(HAS_I422TOARGBROW_SSSE3)
|
||||
void (*YUY2ToARGBRow)(const uint8* src_yuy2, uint8* dst_argb, int pix) =
|
||||
YUY2ToARGBRow_C;
|
||||
#if defined(HAS_YUY2TOARGBROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
|
||||
I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
|
||||
YUY2ToARGBRow = YUY2ToARGBRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I422ToARGBRow = I422ToARGBRow_Unaligned_SSSE3;
|
||||
YUY2ToARGBRow = YUY2ToARGBRow_Unaligned_SSSE3;
|
||||
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
|
||||
I422ToARGBRow = I422ToARGBRow_SSSE3;
|
||||
YUY2ToARGBRow = YUY2ToARGBRow_SSSE3;
|
||||
}
|
||||
}
|
||||
}
|
||||
#elif defined(HAS_I422TOARGBROW_NEON)
|
||||
#elif defined(HAS_YUY2TOARGBROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
|
||||
I422ToARGBRow = I422ToARGBRow_Any_NEON;
|
||||
YUY2ToARGBRow = YUY2ToARGBRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I422ToARGBRow = I422ToARGBRow_NEON;
|
||||
YUY2ToARGBRow = YUY2ToARGBRow_NEON;
|
||||
}
|
||||
}
|
||||
#elif defined(HAS_I422TOARGBROW_MIPS_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
|
||||
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
|
||||
I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
|
||||
}
|
||||
#endif
|
||||
|
||||
SIMD_ALIGNED(uint8 rowy[kMaxStride]);
|
||||
SIMD_ALIGNED(uint8 rowu[kMaxStride]);
|
||||
SIMD_ALIGNED(uint8 rowv[kMaxStride]);
|
||||
|
||||
for (int y = 0; y < height; ++y) {
|
||||
YUY2ToUV422Row(src_yuy2, rowu, rowv, width);
|
||||
YUY2ToYRow(src_yuy2, rowy, width);
|
||||
I422ToARGBRow(rowy, rowu, rowv, dst_argb, width);
|
||||
YUY2ToARGBRow(src_yuy2, dst_argb, width);
|
||||
src_yuy2 += src_stride_yuy2;
|
||||
dst_argb += dst_stride_argb;
|
||||
}
|
||||
@ -834,63 +783,28 @@ int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy,
|
||||
src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
|
||||
src_stride_uyvy = -src_stride_uyvy;
|
||||
}
|
||||
void (*UYVYToUV422Row)(const uint8* src_uyvy, uint8* dst_u, uint8* dst_v,
|
||||
int pix) = UYVYToUV422Row_C;
|
||||
void (*UYVYToYRow)(const uint8* src_uyvy,
|
||||
uint8* dst_y, int pix) = UYVYToYRow_C;
|
||||
#if defined(HAS_UYVYTOYROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
if (width > 16) {
|
||||
UYVYToUV422Row = UYVYToUV422Row_Any_SSE2;
|
||||
UYVYToYRow = UYVYToYRow_Any_SSE2;
|
||||
}
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
UYVYToUV422Row = UYVYToUV422Row_Unaligned_SSE2;
|
||||
UYVYToYRow = UYVYToYRow_Unaligned_SSE2;
|
||||
if (IS_ALIGNED(src_uyvy, 16) && IS_ALIGNED(src_stride_uyvy, 16)) {
|
||||
UYVYToUV422Row = UYVYToUV422Row_SSE2;
|
||||
UYVYToYRow = UYVYToYRow_SSE2;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
void (*I422ToARGBRow)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) = I422ToARGBRow_C;
|
||||
#if defined(HAS_I422TOARGBROW_SSSE3)
|
||||
void (*UYVYToARGBRow)(const uint8* src_uyvy, uint8* dst_argb, int pix) =
|
||||
UYVYToARGBRow_C;
|
||||
#if defined(HAS_UYVYTOARGBROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
|
||||
I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
|
||||
UYVYToARGBRow = UYVYToARGBRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I422ToARGBRow = I422ToARGBRow_Unaligned_SSSE3;
|
||||
UYVYToARGBRow = UYVYToARGBRow_Unaligned_SSSE3;
|
||||
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
|
||||
I422ToARGBRow = I422ToARGBRow_SSSE3;
|
||||
UYVYToARGBRow = UYVYToARGBRow_SSSE3;
|
||||
}
|
||||
}
|
||||
}
|
||||
#elif defined(HAS_I422TOARGBROW_NEON)
|
||||
#elif defined(HAS_UYVYTOARGBROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
|
||||
I422ToARGBRow = I422ToARGBRow_Any_NEON;
|
||||
UYVYToARGBRow = UYVYToARGBRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I422ToARGBRow = I422ToARGBRow_NEON;
|
||||
UYVYToARGBRow = UYVYToARGBRow_NEON;
|
||||
}
|
||||
}
|
||||
#elif defined(HAS_I422TOARGBROW_MIPS_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
|
||||
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
|
||||
I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
|
||||
}
|
||||
#endif
|
||||
|
||||
SIMD_ALIGNED(uint8 rowy[kMaxStride]);
|
||||
SIMD_ALIGNED(uint8 rowu[kMaxStride]);
|
||||
SIMD_ALIGNED(uint8 rowv[kMaxStride]);
|
||||
|
||||
for (int y = 0; y < height; ++y) {
|
||||
UYVYToUV422Row(src_uyvy, rowu, rowv, width);
|
||||
UYVYToYRow(src_uyvy, rowy, width);
|
||||
I422ToARGBRow(rowy, rowu, rowv, dst_argb, width);
|
||||
UYVYToARGBRow(src_uyvy, dst_argb, width);
|
||||
src_uyvy += src_stride_uyvy;
|
||||
dst_argb += dst_stride_argb;
|
||||
}
|
||||
|
||||
@ -114,6 +114,7 @@ NV2NY(NV21ToRGB565Row_Any_NEON, NV21ToRGB565Row_NEON, NV21ToRGB565Row_C, 0, 2)
|
||||
#endif // HAS_NV12TORGB565ROW_NEON
|
||||
#undef NVANY
|
||||
|
||||
// YUY2 to RGB does 8 at a time.
|
||||
// RGB to RGB does multiple of 16 pixels with SIMD and remainder with C.
|
||||
// SSSE3 RGB24 is multiple of 16 pixels, aligned source and destination.
|
||||
// SSE2 RGB565 is multiple of 4 pixels, ARGB must be aligned to 16 bytes.
|
||||
@ -141,6 +142,10 @@ RGBANY(ARGBToARGB4444Row_Any_SSE2, ARGBToARGB4444Row_SSE2, ARGBToARGB4444Row_C,
|
||||
3, 4, 2)
|
||||
RGBANY(I400ToARGBRow_Any_SSE2, I400ToARGBRow_Unaligned_SSE2, I400ToARGBRow_C,
|
||||
7, 1, 4)
|
||||
RGBANY(YUY2ToARGBRow_Any_SSSE3, YUY2ToARGBRow_Unaligned_SSSE3, YUY2ToARGBRow_C,
|
||||
7, 2, 4)
|
||||
RGBANY(UYVYToARGBRow_Any_SSSE3, UYVYToARGBRow_Unaligned_SSSE3, UYVYToARGBRow_C,
|
||||
7, 2, 4)
|
||||
#endif
|
||||
#if defined(HAS_ARGBTORGB24ROW_NEON)
|
||||
RGBANY(ARGBToRGB24Row_Any_NEON, ARGBToRGB24Row_NEON, ARGBToRGB24Row_C, 7, 4, 3)
|
||||
@ -153,10 +158,15 @@ RGBANY(ARGBToARGB4444Row_Any_NEON, ARGBToARGB4444Row_NEON, ARGBToARGB4444Row_C,
|
||||
7, 4, 2)
|
||||
RGBANY(I400ToARGBRow_Any_NEON, I400ToARGBRow_NEON, I400ToARGBRow_C,
|
||||
7, 1, 4)
|
||||
RGBANY(YUY2ToARGBRow_Any_NEON, YUY2ToARGBRow_NEON, YUY2ToARGBRow_C,
|
||||
7, 2, 4)
|
||||
RGBANY(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, UYVYToARGBRow_C,
|
||||
7, 2, 4)
|
||||
#endif
|
||||
#undef RGBANY
|
||||
|
||||
// RGB/YUV to Y does multiple of 16 with SIMD and last 16 with SIMD.
|
||||
// TODO(fbarchard): Use last 16 method for all unsubsampled conversions.
|
||||
#define YANY(NAMEANY, ARGBTOY_SIMD, BPP, NUM) \
|
||||
void NAMEANY(const uint8* src_argb, uint8* dst_y, int width) { \
|
||||
ARGBTOY_SIMD(src_argb, dst_y, width - NUM); \
|
||||
|
||||
@ -801,6 +801,34 @@ void NV21ToRGB565Row_C(const uint8* y_buf,
|
||||
}
|
||||
}
|
||||
|
||||
void YUY2ToARGBRow_C(const uint8* yuy2_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) {
|
||||
for (int x = 0; x < width - 1; x += 2) {
|
||||
YuvPixel(yuy2_buf[0], yuy2_buf[1], yuy2_buf[3], rgb_buf + 0, 24, 16, 8, 0);
|
||||
YuvPixel(yuy2_buf[2], yuy2_buf[1], yuy2_buf[3], rgb_buf + 4, 24, 16, 8, 0);
|
||||
yuy2_buf += 4;
|
||||
rgb_buf += 8; // Advance 2 pixels.
|
||||
}
|
||||
if (width & 1) {
|
||||
YuvPixel(yuy2_buf[0], yuy2_buf[1], yuy2_buf[3], rgb_buf + 0, 24, 16, 8, 0);
|
||||
}
|
||||
}
|
||||
|
||||
void UYVYToARGBRow_C(const uint8* uyvy_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) {
|
||||
for (int x = 0; x < width - 1; x += 2) {
|
||||
YuvPixel(uyvy_buf[1], uyvy_buf[0], uyvy_buf[2], rgb_buf + 0, 24, 16, 8, 0);
|
||||
YuvPixel(uyvy_buf[3], uyvy_buf[0], uyvy_buf[2], rgb_buf + 4, 24, 16, 8, 0);
|
||||
uyvy_buf += 4;
|
||||
rgb_buf += 8; // Advance 2 pixels.
|
||||
}
|
||||
if (width & 1) {
|
||||
YuvPixel(uyvy_buf[1], uyvy_buf[0], uyvy_buf[2], rgb_buf + 0, 24, 16, 8, 0);
|
||||
}
|
||||
}
|
||||
|
||||
void I422ToBGRARow_C(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
@ -1402,6 +1430,7 @@ void I422ToARGB4444Row_SSSE3(const uint8* y_buf,
|
||||
I422ToARGBRow_SSSE3(y_buf, u_buf, v_buf, row, width);
|
||||
ARGBToARGB4444Row_SSE2(row, rgb_buf, width);
|
||||
}
|
||||
|
||||
void NV12ToRGB565Row_SSSE3(const uint8* src_y,
|
||||
const uint8* src_uv,
|
||||
uint8* dst_rgb565,
|
||||
@ -1420,6 +1449,50 @@ void NV21ToRGB565Row_SSSE3(const uint8* src_y,
|
||||
ARGBToRGB565Row_SSE2(row, dst_rgb565, width);
|
||||
}
|
||||
|
||||
void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2,
|
||||
uint8* dst_argb,
|
||||
int width) {
|
||||
SIMD_ALIGNED(uint8 rowy[kMaxStride]);
|
||||
SIMD_ALIGNED(uint8 rowu[kMaxStride]);
|
||||
SIMD_ALIGNED(uint8 rowv[kMaxStride]);
|
||||
YUY2ToUV422Row_SSE2(src_yuy2, rowu, rowv, width);
|
||||
YUY2ToYRow_SSE2(src_yuy2, rowy, width);
|
||||
I422ToARGBRow_SSSE3(rowy, rowu, rowv, dst_argb, width);
|
||||
}
|
||||
|
||||
void YUY2ToARGBRow_Unaligned_SSSE3(const uint8* src_yuy2,
|
||||
uint8* dst_argb,
|
||||
int width) {
|
||||
SIMD_ALIGNED(uint8 rowy[kMaxStride]);
|
||||
SIMD_ALIGNED(uint8 rowu[kMaxStride]);
|
||||
SIMD_ALIGNED(uint8 rowv[kMaxStride]);
|
||||
YUY2ToUV422Row_Unaligned_SSE2(src_yuy2, rowu, rowv, width);
|
||||
YUY2ToYRow_Unaligned_SSE2(src_yuy2, rowy, width);
|
||||
I422ToARGBRow_Unaligned_SSSE3(rowy, rowu, rowv, dst_argb, width);
|
||||
}
|
||||
|
||||
void UYVYToARGBRow_SSSE3(const uint8* src_uyvy,
|
||||
uint8* dst_argb,
|
||||
int width) {
|
||||
SIMD_ALIGNED(uint8 rowy[kMaxStride]);
|
||||
SIMD_ALIGNED(uint8 rowu[kMaxStride]);
|
||||
SIMD_ALIGNED(uint8 rowv[kMaxStride]);
|
||||
UYVYToUV422Row_SSE2(src_uyvy, rowu, rowv, width);
|
||||
UYVYToYRow_SSE2(src_uyvy, rowy, width);
|
||||
I422ToARGBRow_SSSE3(rowy, rowu, rowv, dst_argb, width);
|
||||
}
|
||||
|
||||
void UYVYToARGBRow_Unaligned_SSSE3(const uint8* src_uyvy,
|
||||
uint8* dst_argb,
|
||||
int width) {
|
||||
SIMD_ALIGNED(uint8 rowy[kMaxStride]);
|
||||
SIMD_ALIGNED(uint8 rowu[kMaxStride]);
|
||||
SIMD_ALIGNED(uint8 rowv[kMaxStride]);
|
||||
UYVYToUV422Row_Unaligned_SSE2(src_uyvy, rowu, rowv, width);
|
||||
UYVYToYRow_Unaligned_SSE2(src_uyvy, rowy, width);
|
||||
I422ToARGBRow_Unaligned_SSSE3(rowy, rowu, rowv, dst_argb, width);
|
||||
}
|
||||
|
||||
#endif // defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)
|
||||
#endif // !defined(YUV_DISABLE_ASM)
|
||||
|
||||
|
||||
@ -51,7 +51,7 @@ extern "C" {
|
||||
"vld1.u8 {d2}, [%1]! \n" \
|
||||
"vmov.u8 d3, d2 \n"/* split odd/even uv apart */\
|
||||
"vuzp.u8 d2, d3 \n" \
|
||||
"vtrn.u32 d2, d3 \n" \
|
||||
"vtrn.u32 d2, d3 \n"
|
||||
|
||||
// Read 8 Y and 4 VU from NV21
|
||||
#define READNV21 \
|
||||
@ -59,7 +59,22 @@ extern "C" {
|
||||
"vld1.u8 {d2}, [%1]! \n" \
|
||||
"vmov.u8 d3, d2 \n"/* split odd/even uv apart */\
|
||||
"vuzp.u8 d3, d2 \n" \
|
||||
"vtrn.u32 d2, d3 \n" \
|
||||
"vtrn.u32 d2, d3 \n"
|
||||
|
||||
// Read 8 YUY2
|
||||
#define READYUY2 \
|
||||
"vld2.u8 {d0, d2}, [%0]! \n" \
|
||||
"vmov.u8 d3, d2 \n" \
|
||||
"vuzp.u8 d2, d3 \n" \
|
||||
"vtrn.u32 d2, d3 \n"
|
||||
|
||||
// Read 8 UYVY
|
||||
#define READUYVY \
|
||||
"vld2.u8 {d2, d3}, [%0]! \n" \
|
||||
"vmov.u8 d0, d3 \n" \
|
||||
"vmov.u8 d3, d2 \n" \
|
||||
"vuzp.u8 d2, d3 \n" \
|
||||
"vtrn.u32 d2, d3 \n"
|
||||
|
||||
#define YUV422TORGB \
|
||||
"veor.u8 d2, d26 \n"/*subtract 128 from u and v*/\
|
||||
@ -674,6 +689,64 @@ void NV21ToRGB565Row_NEON(const uint8* src_y,
|
||||
}
|
||||
#endif // HAS_NV21TORGB565ROW_NEON
|
||||
|
||||
#ifdef HAS_YUY2TOARGBROW_NEON
|
||||
void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
|
||||
uint8* dst_argb,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"vld1.u8 {d24}, [%3] \n"
|
||||
"vld1.u8 {d25}, [%4] \n"
|
||||
"vmov.u8 d26, #128 \n"
|
||||
"vmov.u16 q14, #74 \n"
|
||||
"vmov.u16 q15, #16 \n"
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
READYUY2
|
||||
YUV422TORGB
|
||||
"subs %2, %2, #8 \n"
|
||||
"vmov.u8 d23, #255 \n"
|
||||
"vst4.8 {d20, d21, d22, d23}, [%1]! \n"
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_yuy2), // %0
|
||||
"+r"(dst_argb), // %1
|
||||
"+r"(width) // %2
|
||||
: "r"(&kUVToRB), // %3
|
||||
"r"(&kUVToG) // %4
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3",
|
||||
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
|
||||
);
|
||||
}
|
||||
#endif // HAS_YUY2TOARGBROW_NEON
|
||||
|
||||
#ifdef HAS_UYVYTOARGBROW_NEON
|
||||
void UYVYToARGBRow_NEON(const uint8* src_uyvy,
|
||||
uint8* dst_argb,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"vld1.u8 {d24}, [%3] \n"
|
||||
"vld1.u8 {d25}, [%4] \n"
|
||||
"vmov.u8 d26, #128 \n"
|
||||
"vmov.u16 q14, #74 \n"
|
||||
"vmov.u16 q15, #16 \n"
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
READUYVY
|
||||
YUV422TORGB
|
||||
"subs %2, %2, #8 \n"
|
||||
"vmov.u8 d23, #255 \n"
|
||||
"vst4.8 {d20, d21, d22, d23}, [%1]! \n"
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_uyvy), // %0
|
||||
"+r"(dst_argb), // %1
|
||||
"+r"(width) // %2
|
||||
: "r"(&kUVToRB), // %3
|
||||
"r"(&kUVToG) // %4
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3",
|
||||
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
|
||||
);
|
||||
}
|
||||
#endif // HAS_UYVYTOARGBROW_NEON
|
||||
|
||||
#ifdef HAS_SPLITUV_NEON
|
||||
// Reads 16 pairs of UV and write even values to dst_u and odd to dst_v
|
||||
// Alignment requirement: 16 bytes for pointers, and multiple of 16 pixels.
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user