mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 09:16:48 +08:00
remove I422ToBGRA and use I422ToRGBA internally
Removes low levels for I420ToBGRA and I420ToRAW and reimplements them as I420ToRGBA and I420ToRGB24 with transposed color matrix. Adds unittests that do 1 step conversion vs 2 steps to test end swapping versions match direct conversions. R=harryjin@google.com BUG=libyuv:518 Review URL: https://codereview.chromium.org/1427993004 .
This commit is contained in:
parent
5d97b93369
commit
2c7aa0070a
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 1527
|
||||
Version: 1528
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -103,7 +103,6 @@ extern "C" {
|
||||
#define HAS_BGRATOYROW_SSSE3
|
||||
#define HAS_COPYROW_ERMS
|
||||
#define HAS_COPYROW_SSE2
|
||||
#define HAS_H422TOABGRROW_SSSE3
|
||||
#define HAS_H422TOARGBROW_SSSE3
|
||||
#define HAS_I400TOARGBROW_SSE2
|
||||
#define HAS_I411TOARGBROW_SSSE3
|
||||
@ -115,8 +114,6 @@ extern "C" {
|
||||
#define HAS_I422TOARGB1555ROW_SSSE3
|
||||
#define HAS_I422TOARGB4444ROW_SSSE3
|
||||
#define HAS_I422TOARGBROW_SSSE3
|
||||
#define HAS_I422TOBGRAROW_SSSE3
|
||||
#define HAS_I422TORAWROW_SSSE3
|
||||
#define HAS_I422TORGB24ROW_SSSE3
|
||||
#define HAS_I422TORGB565ROW_SSSE3
|
||||
#define HAS_I422TORGBAROW_SSSE3
|
||||
@ -124,7 +121,6 @@ extern "C" {
|
||||
#define HAS_I422TOYUY2ROW_SSE2
|
||||
#define HAS_I444TOARGBROW_SSSE3
|
||||
#define HAS_J400TOARGBROW_SSE2
|
||||
#define HAS_J422TOABGRROW_SSSE3
|
||||
#define HAS_J422TOARGBROW_SSSE3
|
||||
#define HAS_MERGEUVROW_SSE2
|
||||
#define HAS_MIRRORROW_SSSE3
|
||||
@ -198,7 +194,6 @@ extern "C" {
|
||||
#define HAS_ARGBTOYJROW_AVX2
|
||||
#define HAS_ARGBTOYROW_AVX2
|
||||
#define HAS_COPYROW_AVX
|
||||
#define HAS_H422TOABGRROW_AVX2
|
||||
#define HAS_H422TOARGBROW_AVX2
|
||||
#define HAS_I400TOARGBROW_AVX2
|
||||
#if !(defined(_DEBUG) && defined(__i386__))
|
||||
@ -207,12 +202,9 @@ extern "C" {
|
||||
#define HAS_I422ALPHATOARGBROW_AVX2
|
||||
#endif
|
||||
#define HAS_I422TOARGBROW_AVX2
|
||||
#define HAS_I422TOBGRAROW_AVX2
|
||||
#define HAS_I422TORAWROW_AVX2
|
||||
#define HAS_I422TORGB24ROW_AVX2
|
||||
#define HAS_I422TORGBAROW_AVX2
|
||||
#define HAS_INTERPOLATEROW_AVX2
|
||||
#define HAS_J422TOABGRROW_AVX2
|
||||
#define HAS_J422TOARGBROW_AVX2
|
||||
#define HAS_MERGEUVROW_AVX2
|
||||
#define HAS_MIRRORROW_AVX2
|
||||
@ -295,8 +287,6 @@ extern "C" {
|
||||
#define HAS_I422TOARGB1555ROW_NEON
|
||||
#define HAS_I422TOARGB4444ROW_NEON
|
||||
#define HAS_I422TOARGBROW_NEON
|
||||
#define HAS_I422TOBGRAROW_NEON
|
||||
#define HAS_I422TORAWROW_NEON
|
||||
#define HAS_I422TORGB24ROW_NEON
|
||||
#define HAS_I422TORGB565ROW_NEON
|
||||
#define HAS_I422TORGBAROW_NEON
|
||||
@ -359,7 +349,6 @@ extern "C" {
|
||||
#define HAS_COPYROW_MIPS
|
||||
#if defined(__mips_dsp) && (__mips_dsp_rev >= 2)
|
||||
#define HAS_I422TOARGBROW_MIPS_DSPR2
|
||||
#define HAS_I422TOBGRAROW_MIPS_DSPR2
|
||||
#define HAS_INTERPOLATEROW_MIPS_DSPR2
|
||||
#define HAS_MIRRORROW_MIPS_DSPR2
|
||||
#define HAS_MIRRORUVROW_MIPS_DSPR2
|
||||
@ -574,12 +563,6 @@ void I411ToARGBRow_NEON(const uint8* src_y,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422ToBGRARow_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_bgra,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422ToRGBARow_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
@ -592,12 +575,6 @@ void I422ToRGB24Row_NEON(const uint8* src_y,
|
||||
uint8* dst_rgb24,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422ToRAWRow_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_raw,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422ToRGB565Row_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
@ -1075,12 +1052,6 @@ void UYVYToARGBRow_C(const uint8* src_uyvy,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422ToBGRARow_C(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_bgra,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422ToRGBARow_C(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
@ -1093,12 +1064,6 @@ void I422ToRGB24Row_C(const uint8* src_y,
|
||||
uint8* dst_rgb24,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422ToRAWRow_C(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_raw,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422ToARGB4444Row_C(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
@ -1129,12 +1094,6 @@ void I422ToARGBRow_AVX2(const uint8* src_y,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422ToBGRARow_AVX2(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422ToRGBARow_AVX2(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
@ -1249,12 +1208,6 @@ void UYVYToARGBRow_AVX2(const uint8* src_uyvy,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422ToBGRARow_SSSE3(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_bgra,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422ToRGBARow_SSSE3(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
@ -1309,30 +1262,12 @@ void I422ToRGB24Row_AVX2(const uint8* src_y,
|
||||
uint8* dst_rgb24,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422ToRAWRow_SSSE3(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_raw,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422ToRAWRow_AVX2(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_raw,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422ToARGBRow_Any_AVX2(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422ToBGRARow_Any_AVX2(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422ToRGBARow_Any_AVX2(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
@ -1429,12 +1364,6 @@ void UYVYToARGBRow_Any_AVX2(const uint8* src_uyvy,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422ToBGRARow_Any_SSSE3(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_bgra,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422ToRGBARow_Any_SSSE3(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
@ -1489,18 +1418,6 @@ void I422ToRGB24Row_Any_AVX2(const uint8* src_y,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422ToRAWRow_Any_SSSE3(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422ToRAWRow_Any_AVX2(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
|
||||
void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width);
|
||||
void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int width);
|
||||
@ -1615,12 +1532,6 @@ void I411ToARGBRow_Any_NEON(const uint8* src_y,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422ToBGRARow_Any_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422ToRGBARow_Any_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
@ -1633,12 +1544,6 @@ void I422ToRGB24Row_Any_NEON(const uint8* src_y,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422ToRAWRow_Any_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422ToARGB4444Row_Any_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
@ -1686,24 +1591,12 @@ void I422ToARGBRow_MIPS_DSPR2(const uint8* src_y,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422ToBGRARow_MIPS_DSPR2(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422ToARGBRow_MIPS_DSPR2(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422ToBGRARow_MIPS_DSPR2(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
|
||||
void YUY2ToYRow_AVX2(const uint8* src_yuy2, uint8* dst_y, int width);
|
||||
void YUY2ToUVRow_AVX2(const uint8* src_yuy2, int stride_yuy2,
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 1527
|
||||
#define LIBYUV_VERSION 1528
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||
|
||||
@ -450,83 +450,13 @@ int I420ToNV21(const uint8* src_y, int src_stride_y,
|
||||
width, height);
|
||||
}
|
||||
|
||||
// Convert I420 to BGRA.
|
||||
LIBYUV_API
|
||||
int I420ToBGRA(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_bgra, int dst_stride_bgra,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*I422ToBGRARow)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) = I422ToBGRARow_C;
|
||||
if (!src_y || !src_u || !src_v || !dst_bgra ||
|
||||
width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
dst_bgra = dst_bgra + (height - 1) * dst_stride_bgra;
|
||||
dst_stride_bgra = -dst_stride_bgra;
|
||||
}
|
||||
#if defined(HAS_I422TOBGRAROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
I422ToBGRARow = I422ToBGRARow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I422ToBGRARow = I422ToBGRARow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOBGRAROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
I422ToBGRARow = I422ToBGRARow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
I422ToBGRARow = I422ToBGRARow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOBGRAROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
I422ToBGRARow = I422ToBGRARow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I422ToBGRARow = I422ToBGRARow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOBGRAROW_MIPS_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
|
||||
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
|
||||
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
|
||||
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
|
||||
IS_ALIGNED(dst_bgra, 4) && IS_ALIGNED(dst_stride_bgra, 4)) {
|
||||
I422ToBGRARow = I422ToBGRARow_MIPS_DSPR2;
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
I422ToBGRARow(src_y, src_u, src_v, dst_bgra, &kYuvIConstants, width);
|
||||
dst_bgra += dst_stride_bgra;
|
||||
src_y += src_stride_y;
|
||||
if (y & 1) {
|
||||
src_u += src_stride_u;
|
||||
src_v += src_stride_v;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert I420 to RGBA.
|
||||
LIBYUV_API
|
||||
int I420ToRGBA(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_rgba, int dst_stride_rgba,
|
||||
int width, int height) {
|
||||
// Convert I422 to RGBA with matrix
|
||||
static int I420ToRGBAMatrix(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_rgba, int dst_stride_rgba,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*I422ToRGBARow)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
@ -568,9 +498,18 @@ int I420ToRGBA(const uint8* src_y, int src_stride_y,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TORGBAROW_MIPS_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
|
||||
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
|
||||
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
|
||||
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
|
||||
IS_ALIGNED(dst_rgba, 4) && IS_ALIGNED(dst_stride_rgba, 4)) {
|
||||
I422ToRGBARow = I422ToRGBARow_MIPS_DSPR2;
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
I422ToRGBARow(src_y, src_u, src_v, dst_rgba, &kYuvIConstants, width);
|
||||
I422ToRGBARow(src_y, src_u, src_v, dst_rgba, yuvconstants, width);
|
||||
dst_rgba += dst_stride_rgba;
|
||||
src_y += src_stride_y;
|
||||
if (y & 1) {
|
||||
@ -581,13 +520,43 @@ int I420ToRGBA(const uint8* src_y, int src_stride_y,
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert I420 to RGB24.
|
||||
// Convert I420 to RGBA.
|
||||
LIBYUV_API
|
||||
int I420ToRGB24(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_rgb24, int dst_stride_rgb24,
|
||||
int width, int height) {
|
||||
int I420ToRGBA(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_rgba, int dst_stride_rgba,
|
||||
int width, int height) {
|
||||
return I420ToRGBAMatrix(src_y, src_stride_y,
|
||||
src_u, src_stride_u,
|
||||
src_v, src_stride_v,
|
||||
dst_rgba, dst_stride_rgba,
|
||||
&kYuvIConstants,
|
||||
width, height);
|
||||
}
|
||||
|
||||
// Convert I420 to BGRA.
|
||||
LIBYUV_API
|
||||
int I420ToBGRA(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_bgra, int dst_stride_bgra,
|
||||
int width, int height) {
|
||||
return I420ToRGBAMatrix(src_y, src_stride_y,
|
||||
src_v, src_stride_v, // Swap U and V
|
||||
src_u, src_stride_u,
|
||||
dst_bgra, dst_stride_bgra,
|
||||
&kYvuIConstants, // Use Yvu matrix
|
||||
width, height);
|
||||
}
|
||||
|
||||
// Convert I420 to RGB24 with matrix
|
||||
static int I420ToRGB24Matrix(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_rgb24, int dst_stride_rgb24,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*I422ToRGB24Row)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
@ -631,7 +600,7 @@ int I420ToRGB24(const uint8* src_y, int src_stride_y,
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
I422ToRGB24Row(src_y, src_u, src_v, dst_rgb24, &kYuvIConstants, width);
|
||||
I422ToRGB24Row(src_y, src_u, src_v, dst_rgb24, yuvconstants, width);
|
||||
dst_rgb24 += dst_stride_rgb24;
|
||||
src_y += src_stride_y;
|
||||
if (y & 1) {
|
||||
@ -642,65 +611,34 @@ int I420ToRGB24(const uint8* src_y, int src_stride_y,
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert I420 to RGB24.
|
||||
LIBYUV_API
|
||||
int I420ToRGB24(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_rgb24, int dst_stride_rgb24,
|
||||
int width, int height) {
|
||||
return I420ToRGB24Matrix(src_y, src_stride_y,
|
||||
src_u, src_stride_u,
|
||||
src_v, src_stride_v,
|
||||
dst_rgb24, dst_stride_rgb24,
|
||||
&kYuvIConstants,
|
||||
width, height);
|
||||
}
|
||||
|
||||
// Convert I420 to RAW.
|
||||
LIBYUV_API
|
||||
int I420ToRAW(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_raw, int dst_stride_raw,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*I422ToRAWRow)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) = I422ToRAWRow_C;
|
||||
if (!src_y || !src_u || !src_v || !dst_raw ||
|
||||
width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
dst_raw = dst_raw + (height - 1) * dst_stride_raw;
|
||||
dst_stride_raw = -dst_stride_raw;
|
||||
}
|
||||
#if defined(HAS_I422TORAWROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
I422ToRAWRow = I422ToRAWRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I422ToRAWRow = I422ToRAWRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TORAWROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
I422ToRAWRow = I422ToRAWRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
I422ToRAWRow = I422ToRAWRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TORAWROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
I422ToRAWRow = I422ToRAWRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I422ToRAWRow = I422ToRAWRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
I422ToRAWRow(src_y, src_u, src_v, dst_raw, &kYuvIConstants, width);
|
||||
dst_raw += dst_stride_raw;
|
||||
src_y += src_stride_y;
|
||||
if (y & 1) {
|
||||
src_u += src_stride_u;
|
||||
src_v += src_stride_v;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_raw, int dst_stride_raw,
|
||||
int width, int height) {
|
||||
return I420ToRGB24Matrix(src_y, src_stride_y,
|
||||
src_v, src_stride_v, // Swap U and V
|
||||
src_u, src_stride_u,
|
||||
dst_raw, dst_stride_raw,
|
||||
&kYvuIConstants, // Use Yvu matrix
|
||||
width, height);
|
||||
}
|
||||
|
||||
// Convert I420 to ARGB1555.
|
||||
|
||||
@ -764,92 +764,13 @@ int ARGBSubtract(const uint8* src_argb0, int src_stride_argb0,
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert I422 to BGRA.
|
||||
LIBYUV_API
|
||||
int I422ToBGRA(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_bgra, int dst_stride_bgra,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*I422ToBGRARow)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) = I422ToBGRARow_C;
|
||||
if (!src_y || !src_u || !src_v ||
|
||||
!dst_bgra ||
|
||||
width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
dst_bgra = dst_bgra + (height - 1) * dst_stride_bgra;
|
||||
dst_stride_bgra = -dst_stride_bgra;
|
||||
}
|
||||
// Coalesce rows.
|
||||
if (src_stride_y == width &&
|
||||
src_stride_u * 2 == width &&
|
||||
src_stride_v * 2 == width &&
|
||||
dst_stride_bgra == width * 4) {
|
||||
width *= height;
|
||||
height = 1;
|
||||
src_stride_y = src_stride_u = src_stride_v = dst_stride_bgra = 0;
|
||||
}
|
||||
#if defined(HAS_I422TOBGRAROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
I422ToBGRARow = I422ToBGRARow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I422ToBGRARow = I422ToBGRARow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOBGRAROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
I422ToBGRARow = I422ToBGRARow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
I422ToBGRARow = I422ToBGRARow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOBGRAROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
I422ToBGRARow = I422ToBGRARow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I422ToBGRARow = I422ToBGRARow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOBGRAROW_MIPS_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
|
||||
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
|
||||
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
|
||||
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
|
||||
IS_ALIGNED(dst_bgra, 4) && IS_ALIGNED(dst_stride_bgra, 4)) {
|
||||
I422ToBGRARow = I422ToBGRARow_MIPS_DSPR2;
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
I422ToBGRARow(src_y, src_u, src_v, dst_bgra, &kYuvIConstants, width);
|
||||
dst_bgra += dst_stride_bgra;
|
||||
src_y += src_stride_y;
|
||||
src_u += src_stride_u;
|
||||
src_v += src_stride_v;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert I422 to RGBA.
|
||||
LIBYUV_API
|
||||
int I422ToRGBA(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_rgba, int dst_stride_rgba,
|
||||
int width, int height) {
|
||||
// Convert I422 to RGBA with matrix
|
||||
static int I422ToRGBAMatrix(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_rgba, int dst_stride_rgba,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*I422ToRGBARow)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
@ -857,8 +778,7 @@ int I422ToRGBA(const uint8* src_y, int src_stride_y,
|
||||
uint8* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) = I422ToRGBARow_C;
|
||||
if (!src_y || !src_u || !src_v ||
|
||||
!dst_rgba ||
|
||||
if (!src_y || !src_u || !src_v || !dst_rgba ||
|
||||
width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
@ -868,23 +788,6 @@ int I422ToRGBA(const uint8* src_y, int src_stride_y,
|
||||
dst_rgba = dst_rgba + (height - 1) * dst_stride_rgba;
|
||||
dst_stride_rgba = -dst_stride_rgba;
|
||||
}
|
||||
// Coalesce rows.
|
||||
if (src_stride_y == width &&
|
||||
src_stride_u * 2 == width &&
|
||||
src_stride_v * 2 == width &&
|
||||
dst_stride_rgba == width * 4) {
|
||||
width *= height;
|
||||
height = 1;
|
||||
src_stride_y = src_stride_u = src_stride_v = dst_stride_rgba = 0;
|
||||
}
|
||||
#if defined(HAS_I422TORGBAROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
|
||||
I422ToRGBARow = I422ToRGBARow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I422ToRGBARow = I422ToRGBARow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TORGBAROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
I422ToRGBARow = I422ToRGBARow_Any_SSSE3;
|
||||
@ -901,9 +804,26 @@ int I422ToRGBA(const uint8* src_y, int src_stride_y,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TORGBAROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
I422ToRGBARow = I422ToRGBARow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I422ToRGBARow = I422ToRGBARow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TORGBAROW_MIPS_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
|
||||
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
|
||||
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
|
||||
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
|
||||
IS_ALIGNED(dst_rgba, 4) && IS_ALIGNED(dst_stride_rgba, 4)) {
|
||||
I422ToRGBARow = I422ToRGBARow_MIPS_DSPR2;
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
I422ToRGBARow(src_y, src_u, src_v, dst_rgba, &kYuvIConstants, width);
|
||||
I422ToRGBARow(src_y, src_u, src_v, dst_rgba, yuvconstants, width);
|
||||
dst_rgba += dst_stride_rgba;
|
||||
src_y += src_stride_y;
|
||||
src_u += src_stride_u;
|
||||
@ -912,6 +832,36 @@ int I422ToRGBA(const uint8* src_y, int src_stride_y,
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert I422 to RGBA.
|
||||
LIBYUV_API
|
||||
int I422ToRGBA(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_rgba, int dst_stride_rgba,
|
||||
int width, int height) {
|
||||
return I422ToRGBAMatrix(src_y, src_stride_y,
|
||||
src_u, src_stride_u,
|
||||
src_v, src_stride_v,
|
||||
dst_rgba, dst_stride_rgba,
|
||||
&kYuvIConstants,
|
||||
width, height);
|
||||
}
|
||||
|
||||
// Convert I422 to BGRA.
|
||||
LIBYUV_API
|
||||
int I422ToBGRA(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_bgra, int dst_stride_bgra,
|
||||
int width, int height) {
|
||||
return I422ToRGBAMatrix(src_y, src_stride_y,
|
||||
src_v, src_stride_v, // Swap U and V
|
||||
src_u, src_stride_u,
|
||||
dst_bgra, dst_stride_bgra,
|
||||
&kYvuIConstants, // Use Yvu matrix
|
||||
width, height);
|
||||
}
|
||||
|
||||
// Convert NV12 to RGB565.
|
||||
LIBYUV_API
|
||||
int NV12ToRGB565(const uint8* src_y, int src_stride_y,
|
||||
|
||||
@ -115,26 +115,18 @@ ANY31C(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_SSSE3, 1, 0, 4, 7)
|
||||
#ifdef HAS_I444TOARGBROW_SSSE3
|
||||
ANY31C(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_SSSE3, 0, 0, 4, 7)
|
||||
ANY31C(I411ToARGBRow_Any_SSSE3, I411ToARGBRow_SSSE3, 2, 0, 4, 7)
|
||||
ANY31C(I422ToBGRARow_Any_SSSE3, I422ToBGRARow_SSSE3, 1, 0, 4, 7)
|
||||
ANY31C(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_SSSE3, 1, 0, 4, 7)
|
||||
ANY31C(I422ToARGB4444Row_Any_SSSE3, I422ToARGB4444Row_SSSE3, 1, 0, 2, 7)
|
||||
ANY31C(I422ToARGB1555Row_Any_SSSE3, I422ToARGB1555Row_SSSE3, 1, 0, 2, 7)
|
||||
ANY31C(I422ToRGB565Row_Any_SSSE3, I422ToRGB565Row_SSSE3, 1, 0, 2, 7)
|
||||
ANY31C(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, 1, 0, 3, 7)
|
||||
ANY31C(I422ToRAWRow_Any_SSSE3, I422ToRAWRow_SSSE3, 1, 0, 3, 7)
|
||||
#endif // HAS_I444TOARGBROW_SSSE3
|
||||
#ifdef HAS_I422TORGB24ROW_AVX2
|
||||
ANY31C(I422ToRGB24Row_Any_AVX2, I422ToRGB24Row_AVX2, 1, 0, 3, 15)
|
||||
#endif
|
||||
#ifdef HAS_I422TORAWROW_AVX2
|
||||
ANY31C(I422ToRAWRow_Any_AVX2, I422ToRAWRow_AVX2, 1, 0, 3, 15)
|
||||
#endif
|
||||
#ifdef HAS_I422TOARGBROW_AVX2
|
||||
ANY31C(I422ToARGBRow_Any_AVX2, I422ToARGBRow_AVX2, 1, 0, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_I422TOBGRAROW_AVX2
|
||||
ANY31C(I422ToBGRARow_Any_AVX2, I422ToBGRARow_AVX2, 1, 0, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_I422TORGBAROW_AVX2
|
||||
ANY31C(I422ToRGBARow_Any_AVX2, I422ToRGBARow_AVX2, 1, 0, 4, 15)
|
||||
#endif
|
||||
@ -157,10 +149,8 @@ ANY31C(I422ToRGB565Row_Any_AVX2, I422ToRGB565Row_AVX2, 1, 0, 2, 7)
|
||||
ANY31C(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, 0, 0, 4, 7)
|
||||
ANY31C(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, 1, 0, 4, 7)
|
||||
ANY31C(I411ToARGBRow_Any_NEON, I411ToARGBRow_NEON, 2, 0, 4, 7)
|
||||
ANY31C(I422ToBGRARow_Any_NEON, I422ToBGRARow_NEON, 1, 0, 4, 7)
|
||||
ANY31C(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, 1, 0, 4, 7)
|
||||
ANY31C(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, 1, 0, 3, 7)
|
||||
ANY31C(I422ToRAWRow_Any_NEON, I422ToRAWRow_NEON, 1, 0, 3, 7)
|
||||
ANY31C(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, 1, 0, 2, 7)
|
||||
ANY31C(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, 1, 0, 2, 7)
|
||||
ANY31C(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, 1, 0, 2, 7)
|
||||
|
||||
@ -1443,29 +1443,6 @@ void I422ToRGB24Row_C(const uint8* src_y,
|
||||
}
|
||||
}
|
||||
|
||||
void I422ToRAWRow_C(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
int x;
|
||||
for (x = 0; x < width - 1; x += 2) {
|
||||
YuvPixel(src_y[0], src_u[0], src_v[0],
|
||||
rgb_buf + 2, rgb_buf + 1, rgb_buf + 0, yuvconstants);
|
||||
YuvPixel(src_y[1], src_u[0], src_v[0],
|
||||
rgb_buf + 5, rgb_buf + 4, rgb_buf + 3, yuvconstants);
|
||||
src_y += 2;
|
||||
src_u += 1;
|
||||
src_v += 1;
|
||||
rgb_buf += 6; // Advance 2 pixels.
|
||||
}
|
||||
if (width & 1) {
|
||||
YuvPixel(src_y[0], src_u[0], src_v[0],
|
||||
rgb_buf + 2, rgb_buf + 1, rgb_buf + 0, yuvconstants);
|
||||
}
|
||||
}
|
||||
|
||||
void I422ToARGB4444Row_C(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
@ -1752,32 +1729,6 @@ void UYVYToARGBRow_C(const uint8* src_uyvy,
|
||||
}
|
||||
}
|
||||
|
||||
void I422ToBGRARow_C(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
int x;
|
||||
for (x = 0; x < width - 1; x += 2) {
|
||||
YuvPixel(src_y[0], src_u[0], src_v[0],
|
||||
rgb_buf + 3, rgb_buf + 2, rgb_buf + 1, yuvconstants);
|
||||
rgb_buf[0] = 255;
|
||||
YuvPixel(src_y[1], src_u[0], src_v[0],
|
||||
rgb_buf + 7, rgb_buf + 6, rgb_buf + 5, yuvconstants);
|
||||
rgb_buf[4] = 255;
|
||||
src_y += 2;
|
||||
src_u += 1;
|
||||
src_v += 1;
|
||||
rgb_buf += 8; // Advance 2 pixels.
|
||||
}
|
||||
if (width & 1) {
|
||||
YuvPixel(src_y[0], src_u[0], src_v[0],
|
||||
rgb_buf + 3, rgb_buf + 2, rgb_buf + 1, yuvconstants);
|
||||
rgb_buf[0] = 255;
|
||||
}
|
||||
}
|
||||
|
||||
void I422ToRGBARow_C(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
@ -2629,29 +2580,6 @@ void I422ToRGB24Row_AVX2(const uint8* src_y,
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(HAS_I422TORAWROW_AVX2)
|
||||
void I422ToRAWRow_AVX2(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_raw,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
// Row buffer for intermediate ARGB pixels.
|
||||
SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
|
||||
while (width > 0) {
|
||||
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
|
||||
I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
|
||||
// TODO(fbarchard): ARGBToRAWRow_AVX2
|
||||
ARGBToRAWRow_SSSE3(row, dst_raw, twidth);
|
||||
src_y += twidth;
|
||||
src_u += twidth / 2;
|
||||
src_v += twidth / 2;
|
||||
dst_raw += twidth * 3;
|
||||
width -= twidth;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(HAS_NV12TORGB565ROW_AVX2)
|
||||
void NV12ToRGB565Row_AVX2(const uint8* src_y,
|
||||
const uint8* src_uv,
|
||||
|
||||
@ -1663,50 +1663,6 @@ void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf,
|
||||
);
|
||||
}
|
||||
|
||||
void OMITFP I422ToRAWRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_raw,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"movdqa %[kShuffleMaskARGBToRAW_0],%%xmm5 \n"
|
||||
"movdqa %[kShuffleMaskARGBToRAW],%%xmm6 \n"
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
READYUV422
|
||||
YUVTORGB(yuvconstants)
|
||||
"punpcklbw %%xmm1,%%xmm0 \n"
|
||||
"punpcklbw %%xmm2,%%xmm2 \n"
|
||||
"movdqa %%xmm0,%%xmm1 \n"
|
||||
"punpcklwd %%xmm2,%%xmm0 \n"
|
||||
"punpckhwd %%xmm2,%%xmm1 \n"
|
||||
"pshufb %%xmm5,%%xmm0 \n"
|
||||
"pshufb %%xmm6,%%xmm1 \n"
|
||||
"palignr $0xc,%%xmm0,%%xmm1 \n"
|
||||
"movq %%xmm0," MEMACCESS([dst_raw]) " \n"
|
||||
"movdqu %%xmm1," MEMACCESS2(0x8,[dst_raw]) "\n"
|
||||
"lea " MEMLEA(0x18,[dst_raw]) ",%[dst_raw] \n"
|
||||
"subl $0x8,%[width] \n"
|
||||
"jg 1b \n"
|
||||
: [y_buf]"+r"(y_buf), // %[y_buf]
|
||||
[u_buf]"+r"(u_buf), // %[u_buf]
|
||||
[v_buf]"+r"(v_buf), // %[v_buf]
|
||||
[dst_raw]"+r"(dst_raw), // %[dst_raw]
|
||||
#if defined(__i386__) && defined(__pic__)
|
||||
[width]"+m"(width) // %[width]
|
||||
#else
|
||||
[width]"+rm"(width) // %[width]
|
||||
#endif
|
||||
: [yuvconstants]"r"(yuvconstants), // %[yuvconstants]
|
||||
[kShuffleMaskARGBToRAW_0]"m"(kShuffleMaskARGBToRAW_0),
|
||||
[kShuffleMaskARGBToRAW]"m"(kShuffleMaskARGBToRAW)
|
||||
: "memory", "cc", NACL_R14
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
|
||||
);
|
||||
}
|
||||
|
||||
void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
@ -1892,60 +1848,6 @@ void OMITFP UYVYToARGBRow_SSSE3(const uint8* uyvy_buf,
|
||||
);
|
||||
}
|
||||
|
||||
void OMITFP I422ToBGRARow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_bgra,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
READYUV422
|
||||
YUVTORGB(yuvconstants)
|
||||
STOREBGRA
|
||||
"sub $0x8,%[width] \n"
|
||||
"jg 1b \n"
|
||||
: [y_buf]"+r"(y_buf), // %[y_buf]
|
||||
[u_buf]"+r"(u_buf), // %[u_buf]
|
||||
[v_buf]"+r"(v_buf), // %[v_buf]
|
||||
[dst_bgra]"+r"(dst_bgra), // %[dst_bgra]
|
||||
[width]"+rm"(width) // %[width]
|
||||
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
|
||||
: "memory", "cc", NACL_R14
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
|
||||
);
|
||||
}
|
||||
|
||||
void OMITFP I422ToABGRRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_abgr,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
READYUV422
|
||||
YUVTORGB(yuvconstants)
|
||||
STOREABGR
|
||||
"sub $0x8,%[width] \n"
|
||||
"jg 1b \n"
|
||||
: [y_buf]"+r"(y_buf), // %[y_buf]
|
||||
[u_buf]"+r"(u_buf), // %[u_buf]
|
||||
[v_buf]"+r"(v_buf), // %[v_buf]
|
||||
[dst_abgr]"+r"(dst_abgr), // %[dst_abgr]
|
||||
[width]"+rm"(width) // %[width]
|
||||
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
|
||||
: "memory", "cc", NACL_R14
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
|
||||
);
|
||||
}
|
||||
|
||||
void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
@ -2088,48 +1990,6 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf,
|
||||
"vmovdqu %%ymm1," MEMACCESS2(0x20, [dst_abgr]) " \n" \
|
||||
"lea " MEMLEA(0x40, [dst_abgr]) ", %[dst_abgr] \n"
|
||||
|
||||
#if defined(HAS_I422TOBGRAROW_AVX2)
|
||||
// 16 pixels
|
||||
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 BGRA (64 bytes).
|
||||
void OMITFP I422ToBGRARow_AVX2(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_bgra,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
READYUV422_AVX2
|
||||
YUVTORGB_AVX2(yuvconstants)
|
||||
|
||||
// Step 3: Weave into BGRA
|
||||
"vpunpcklbw %%ymm0,%%ymm1,%%ymm1 \n" // GB
|
||||
"vpermq $0xd8,%%ymm1,%%ymm1 \n"
|
||||
"vpunpcklbw %%ymm2,%%ymm5,%%ymm2 \n" // AR
|
||||
"vpermq $0xd8,%%ymm2,%%ymm2 \n"
|
||||
"vpunpcklwd %%ymm1,%%ymm2,%%ymm0 \n" // ARGB first 8 pixels
|
||||
"vpunpckhwd %%ymm1,%%ymm2,%%ymm2 \n" // ARGB next 8 pixels
|
||||
"vmovdqu %%ymm0," MEMACCESS([dst_bgra]) "\n"
|
||||
"vmovdqu %%ymm2," MEMACCESS2(0x20,[dst_bgra]) "\n"
|
||||
"lea " MEMLEA(0x40,[dst_bgra]) ",%[dst_bgra] \n"
|
||||
"sub $0x10,%[width] \n"
|
||||
"jg 1b \n"
|
||||
"vzeroupper \n"
|
||||
: [y_buf]"+r"(y_buf), // %[y_buf]
|
||||
[u_buf]"+r"(u_buf), // %[u_buf]
|
||||
[v_buf]"+r"(v_buf), // %[v_buf]
|
||||
[dst_bgra]"+r"(dst_bgra), // %[dst_bgra]
|
||||
[width]"+rm"(width) // %[width]
|
||||
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
|
||||
: "memory", "cc", NACL_R14
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
|
||||
);
|
||||
}
|
||||
#endif // HAS_I422TOBGRAROW_AVX2
|
||||
|
||||
#if defined(HAS_I422TOARGBROW_AVX2)
|
||||
// 16 pixels
|
||||
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
|
||||
@ -2199,38 +2059,6 @@ void OMITFP I422AlphaToARGBRow_AVX2(const uint8* y_buf,
|
||||
}
|
||||
#endif // HAS_I422ALPHATOARGBROW_AVX2
|
||||
|
||||
#if defined(HAS_I422TOABGRROW_AVX2)
|
||||
// 16 pixels
|
||||
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes).
|
||||
void OMITFP I422ToABGRRow_AVX2(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_abgr,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
READYUV422_AVX2
|
||||
YUVTORGB_AVX2(yuvconstants)
|
||||
STOREABGR_AVX2
|
||||
"sub $0x10,%[width] \n"
|
||||
"jg 1b \n"
|
||||
"vzeroupper \n"
|
||||
: [y_buf]"+r"(y_buf), // %[y_buf]
|
||||
[u_buf]"+r"(u_buf), // %[u_buf]
|
||||
[v_buf]"+r"(v_buf), // %[v_buf]
|
||||
[dst_abgr]"+r"(dst_abgr), // %[dst_abgr]
|
||||
[width]"+rm"(width) // %[width]
|
||||
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
|
||||
: "memory", "cc", NACL_R14
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
|
||||
);
|
||||
}
|
||||
#endif // HAS_I422TOABGRROW_AVX2
|
||||
|
||||
#if defined(HAS_I422TORGBAROW_AVX2)
|
||||
// 16 pixels
|
||||
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 RGBA (64 bytes).
|
||||
|
||||
@ -715,70 +715,6 @@ void I422ToARGBRow_MIPS_DSPR2(const uint8* y_buf,
|
||||
);
|
||||
}
|
||||
|
||||
void I422ToBGRARow_MIPS_DSPR2(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
__asm__ __volatile__ (
|
||||
".set push \n"
|
||||
".set noreorder \n"
|
||||
"beqz %[width], 2f \n"
|
||||
" repl.ph $s0, 74 \n" // |YG|YG| = |74 |74 |
|
||||
"repl.ph $s1, -25 \n" // |UG|UG| = |-25|-25|
|
||||
"repl.ph $s2, -52 \n" // |VG|VG| = |-52|-52|
|
||||
"repl.ph $s3, 102 \n" // |VR|VR| = |102|102|
|
||||
"repl.ph $s4, 16 \n" // |0|16|0|16|
|
||||
"repl.ph $s5, 128 \n" // |128|128|
|
||||
"lui $s6, 0xff \n"
|
||||
"ori $s6, 0xff \n" // |00|ff|00|ff|
|
||||
|
||||
"1: \n"
|
||||
YUVTORGB
|
||||
// Arranging into bgra format
|
||||
"precr.qb.ph $t4, $t4, $t8 \n" // |B1|b1|G1|g1|
|
||||
"precr.qb.ph $t5, $t5, $t9 \n" // |B0|b0|G0|g0|
|
||||
"precrq.qb.ph $t8, $t4, $t5 \n" // |B1|G1|B0|G0|
|
||||
"precr.qb.ph $t9, $t4, $t5 \n" // |b1|g1|b0|g0|
|
||||
|
||||
"precr.qb.ph $t2, $t1, $t2 \n" // |R1|r1|R0|r0|
|
||||
"addiu %[width], -4 \n"
|
||||
"addiu %[y_buf], 4 \n"
|
||||
"preceu.ph.qbla $t1, $t2 \n" // |0 |R1|0 |R0|
|
||||
"preceu.ph.qbra $t2, $t2 \n" // |0 |r1|0 |r0|
|
||||
"sll $t1, $t1, 8 \n" // |R1|0 |R0|0 |
|
||||
"sll $t2, $t2, 8 \n" // |r1|0 |r0|0 |
|
||||
"or $t1, $t1, $s6 \n" // |R1|ff|R0|ff|
|
||||
"or $t2, $t2, $s6 \n" // |r1|ff|r0|ff|
|
||||
"precrq.ph.w $t0, $t9, $t2 \n" // |b1|g1|r1|ff|
|
||||
"precrq.ph.w $t3, $t8, $t1 \n" // |B1|G1|R1|ff|
|
||||
"sll $t1, $t1, 16 \n"
|
||||
"sll $t2, $t2, 16 \n"
|
||||
"packrl.ph $t2, $t9, $t2 \n" // |b0|g0|r0|ff|
|
||||
"packrl.ph $t1, $t8, $t1 \n" // |B0|G0|R0|ff|
|
||||
// Store results.
|
||||
"sw $t2, 0(%[rgb_buf]) \n"
|
||||
"sw $t0, 4(%[rgb_buf]) \n"
|
||||
"sw $t1, 8(%[rgb_buf]) \n"
|
||||
"sw $t3, 12(%[rgb_buf]) \n"
|
||||
"bnez %[width], 1b \n"
|
||||
" addiu %[rgb_buf], 16 \n"
|
||||
"2: \n"
|
||||
".set pop \n"
|
||||
:[y_buf] "+r" (y_buf),
|
||||
[u_buf] "+r" (u_buf),
|
||||
[v_buf] "+r" (v_buf),
|
||||
[width] "+r" (width),
|
||||
[rgb_buf] "+r" (rgb_buf)
|
||||
:
|
||||
: "t0", "t1", "t2", "t3", "t4", "t5",
|
||||
"t6", "t7", "t8", "t9",
|
||||
"s0", "s1", "s2", "s3",
|
||||
"s4", "s5", "s6"
|
||||
);
|
||||
}
|
||||
|
||||
// Bilinear filter 8x2 -> 8x1
|
||||
void InterpolateRow_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
|
||||
ptrdiff_t src_stride, int dst_width,
|
||||
|
||||
@ -224,37 +224,6 @@ void I411ToARGBRow_NEON(const uint8* src_y,
|
||||
);
|
||||
}
|
||||
|
||||
void I422ToBGRARow_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_bgra,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
YUVTORGB_SETUP
|
||||
"1: \n"
|
||||
READYUV422
|
||||
YUVTORGB
|
||||
"subs %4, %4, #8 \n"
|
||||
"vswp.u8 d20, d22 \n"
|
||||
"vmov.u8 d19, #255 \n"
|
||||
MEMACCESS(3)
|
||||
"vst4.8 {d19, d20, d21, d22}, [%3]! \n"
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_y), // %0
|
||||
"+r"(src_u), // %1
|
||||
"+r"(src_v), // %2
|
||||
"+r"(dst_bgra), // %3
|
||||
"+r"(width) // %4
|
||||
: [kUVToRB]"r"(&yuvconstants->kUVToRB),
|
||||
[kUVToG]"r"(&yuvconstants->kUVToG),
|
||||
[kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
|
||||
[kYToRgb]"r"(&yuvconstants->kYToRgb)
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
|
||||
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
|
||||
);
|
||||
}
|
||||
|
||||
void I422ToRGBARow_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
@ -314,36 +283,6 @@ void I422ToRGB24Row_NEON(const uint8* src_y,
|
||||
);
|
||||
}
|
||||
|
||||
void I422ToRAWRow_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_raw,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
YUVTORGB_SETUP
|
||||
"1: \n"
|
||||
READYUV422
|
||||
YUVTORGB
|
||||
"subs %4, %4, #8 \n"
|
||||
"vswp.u8 d20, d22 \n"
|
||||
MEMACCESS(3)
|
||||
"vst3.8 {d20, d21, d22}, [%3]! \n"
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_y), // %0
|
||||
"+r"(src_u), // %1
|
||||
"+r"(src_v), // %2
|
||||
"+r"(dst_raw), // %3
|
||||
"+r"(width) // %4
|
||||
: [kUVToRB]"r"(&yuvconstants->kUVToRB),
|
||||
[kUVToG]"r"(&yuvconstants->kUVToG),
|
||||
[kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
|
||||
[kYToRgb]"r"(&yuvconstants->kYToRgb)
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
|
||||
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
|
||||
);
|
||||
}
|
||||
|
||||
#define ARGBTORGB565 \
|
||||
"vshr.u8 d20, d20, #3 \n" /* B */ \
|
||||
"vshr.u8 d21, d21, #2 \n" /* G */ \
|
||||
|
||||
@ -233,38 +233,6 @@ void I411ToARGBRow_NEON(const uint8* src_y,
|
||||
}
|
||||
#endif // HAS_I411TOARGBROW_NEON
|
||||
|
||||
#ifdef HAS_I422TOBGRAROW_NEON
|
||||
void I422ToBGRARow_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_bgra,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
YUVTORGB_SETUP
|
||||
"1: \n"
|
||||
READYUV422
|
||||
YUVTORGB(v21, v22, v23)
|
||||
"subs %w4, %w4, #8 \n"
|
||||
"movi v20.8b, #255 \n" /* A */
|
||||
MEMACCESS(3)
|
||||
"st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n"
|
||||
"b.gt 1b \n"
|
||||
: "+r"(src_y), // %0
|
||||
"+r"(src_u), // %1
|
||||
"+r"(src_v), // %2
|
||||
"+r"(dst_bgra), // %3
|
||||
"+r"(width) // %4
|
||||
: [kUVToRB]"r"(&yuvconstants->kUVToRB),
|
||||
[kUVToG]"r"(&yuvconstants->kUVToG),
|
||||
[kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
|
||||
[kYToRgb]"r"(&yuvconstants->kYToRgb)
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
|
||||
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
|
||||
);
|
||||
}
|
||||
#endif // HAS_I422TOBGRAROW_NEON
|
||||
|
||||
#ifdef HAS_I422TORGBAROW_NEON
|
||||
void I422ToRGBARow_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
@ -328,37 +296,6 @@ void I422ToRGB24Row_NEON(const uint8* src_y,
|
||||
}
|
||||
#endif // HAS_I422TORGB24ROW_NEON
|
||||
|
||||
#ifdef HAS_I422TORAWROW_NEON
|
||||
void I422ToRAWRow_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_raw,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
YUVTORGB_SETUP
|
||||
"1: \n"
|
||||
READYUV422
|
||||
YUVTORGB(v20, v21, v22)
|
||||
"subs %w4, %w4, #8 \n"
|
||||
MEMACCESS(3)
|
||||
"st3 {v20.8b,v21.8b,v22.8b}, [%3], #24 \n"
|
||||
"b.gt 1b \n"
|
||||
: "+r"(src_y), // %0
|
||||
"+r"(src_u), // %1
|
||||
"+r"(src_v), // %2
|
||||
"+r"(dst_raw), // %3
|
||||
"+r"(width) // %4
|
||||
: [kUVToRB]"r"(&yuvconstants->kUVToRB),
|
||||
[kUVToG]"r"(&yuvconstants->kUVToG),
|
||||
[kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
|
||||
[kYToRgb]"r"(&yuvconstants->kYToRgb)
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
|
||||
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
|
||||
);
|
||||
}
|
||||
#endif // HAS_I422TORAWROW_NEON
|
||||
|
||||
#define ARGBTORGB565 \
|
||||
"shll v0.8h, v22.8b, #8 \n" /* R */ \
|
||||
"shll v20.8h, v20.8b, #8 \n" /* B */ \
|
||||
|
||||
@ -2354,48 +2354,6 @@ void UYVYToARGBRow_AVX2(const uint8* src_uyvy,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#ifdef HAS_I422TOBGRAROW_AVX2
|
||||
// 16 pixels
|
||||
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 BGRA (64 bytes).
|
||||
// TODO(fbarchard): Use macros to reduce duplicate code. See SSSE3.
|
||||
__declspec(naked)
|
||||
void I422ToBGRARow_AVX2(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
__asm {
|
||||
push esi
|
||||
push edi
|
||||
push ebx
|
||||
mov eax, [esp + 12 + 4] // Y
|
||||
mov esi, [esp + 12 + 8] // U
|
||||
mov edi, [esp + 12 + 12] // V
|
||||
mov edx, [esp + 12 + 16] // abgr
|
||||
mov ebx, [esp + 12 + 20] // yuvconstants
|
||||
mov ecx, [esp + 12 + 24] // width
|
||||
sub edi, esi
|
||||
vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
|
||||
|
||||
convertloop:
|
||||
READYUV422_AVX2
|
||||
YUVTORGB_AVX2(ebx)
|
||||
STOREBGRA_AVX2
|
||||
|
||||
sub ecx, 16
|
||||
jg convertloop
|
||||
|
||||
pop ebx
|
||||
pop edi
|
||||
pop esi
|
||||
vzeroupper
|
||||
ret
|
||||
}
|
||||
}
|
||||
#endif // HAS_I422TOBGRAROW_AVX2
|
||||
|
||||
#ifdef HAS_I422TORGBAROW_AVX2
|
||||
// 16 pixels
|
||||
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 RGBA (64 bytes).
|
||||
@ -2749,44 +2707,6 @@ void I422ToRGB24Row_SSSE3(const uint8* y_buf,
|
||||
}
|
||||
}
|
||||
|
||||
// 8 pixels.
|
||||
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RAW (24 bytes).
|
||||
__declspec(naked)
|
||||
void I422ToRAWRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_raw,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
__asm {
|
||||
push esi
|
||||
push edi
|
||||
push ebx
|
||||
mov eax, [esp + 12 + 4] // Y
|
||||
mov esi, [esp + 12 + 8] // U
|
||||
mov edi, [esp + 12 + 12] // V
|
||||
mov edx, [esp + 12 + 16] // argb
|
||||
mov ebx, [esp + 12 + 20] // yuvconstants
|
||||
mov ecx, [esp + 12 + 24] // width
|
||||
sub edi, esi
|
||||
movdqa xmm5, xmmword ptr kShuffleMaskARGBToRAW_0
|
||||
movdqa xmm6, xmmword ptr kShuffleMaskARGBToRAW
|
||||
|
||||
convertloop:
|
||||
READYUV422
|
||||
YUVTORGB(ebx)
|
||||
STORERAW
|
||||
|
||||
sub ecx, 8
|
||||
jg convertloop
|
||||
|
||||
pop ebx
|
||||
pop edi
|
||||
pop esi
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
// 8 pixels
|
||||
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RGB565 (16 bytes).
|
||||
__declspec(naked)
|
||||
@ -3065,75 +2985,6 @@ void UYVYToARGBRow_SSSE3(const uint8* src_uyvy,
|
||||
}
|
||||
}
|
||||
|
||||
__declspec(naked)
|
||||
void I422ToBGRARow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_bgra,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
__asm {
|
||||
push esi
|
||||
push edi
|
||||
push ebx
|
||||
mov eax, [esp + 12 + 4] // Y
|
||||
mov esi, [esp + 12 + 8] // U
|
||||
mov edi, [esp + 12 + 12] // V
|
||||
mov edx, [esp + 12 + 16] // argb
|
||||
mov ebx, [esp + 12 + 20] // yuvconstants
|
||||
mov ecx, [esp + 12 + 24] // width
|
||||
sub edi, esi
|
||||
|
||||
convertloop:
|
||||
READYUV422
|
||||
YUVTORGB(ebx)
|
||||
STOREBGRA
|
||||
|
||||
sub ecx, 8
|
||||
jg convertloop
|
||||
|
||||
pop ebx
|
||||
pop edi
|
||||
pop esi
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
__declspec(naked)
|
||||
void I422ToABGRRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_abgr,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
__asm {
|
||||
push esi
|
||||
push edi
|
||||
push ebx
|
||||
mov eax, [esp + 12 + 4] // Y
|
||||
mov esi, [esp + 12 + 8] // U
|
||||
mov edi, [esp + 12 + 12] // V
|
||||
mov edx, [esp + 12 + 16] // argb
|
||||
mov ebx, [esp + 12 + 20] // yuvconstants
|
||||
mov ecx, [esp + 12 + 24] // width
|
||||
sub edi, esi
|
||||
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
|
||||
|
||||
convertloop:
|
||||
READYUV422
|
||||
YUVTORGB(ebx)
|
||||
STOREABGR
|
||||
|
||||
sub ecx, 8
|
||||
jg convertloop
|
||||
|
||||
pop ebx
|
||||
pop edi
|
||||
pop esi
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
__declspec(naked)
|
||||
void I422ToRGBARow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
|
||||
@ -1691,39 +1691,103 @@ TEST_F(LibYUVConvertTest, NAME) { \
|
||||
TESTPTOB(TestYUY2ToNV12, YUY2ToI420, YUY2ToNV12)
|
||||
TESTPTOB(TestUYVYToNV12, UYVYToI420, UYVYToNV12)
|
||||
|
||||
TEST_F(LibYUVConvertTest, TestI444ToABGRvsARGB) {
|
||||
align_buffer_64(src_y, benchmark_width_ * benchmark_height_);
|
||||
align_buffer_64(src_u, benchmark_width_ * benchmark_height_);
|
||||
align_buffer_64(src_v, benchmark_width_ * benchmark_height_);
|
||||
align_buffer_64(dst_argb, benchmark_width_ * benchmark_height_ * 4);
|
||||
align_buffer_64(dst_abgr, benchmark_width_ * benchmark_height_ * 4);
|
||||
MemRandomize(src_y, benchmark_width_ * benchmark_height_);
|
||||
MemRandomize(src_u, benchmark_width_ * benchmark_height_);
|
||||
MemRandomize(src_v, benchmark_width_ * benchmark_height_);
|
||||
MemRandomize(dst_argb, benchmark_width_ * benchmark_height_ * 4);
|
||||
MemRandomize(dst_abgr, benchmark_width_ * benchmark_height_ * 4);
|
||||
libyuv::I444ToARGB(src_y, benchmark_width_,
|
||||
src_u, benchmark_width_,
|
||||
src_v, benchmark_width_,
|
||||
dst_argb, benchmark_width_ * 4,
|
||||
benchmark_width_, benchmark_height_);
|
||||
libyuv::I444ToABGR(src_y, benchmark_width_,
|
||||
src_u, benchmark_width_,
|
||||
src_v, benchmark_width_,
|
||||
dst_abgr, benchmark_width_ * 4,
|
||||
benchmark_width_, benchmark_height_);
|
||||
// swap in place.
|
||||
libyuv::ABGRToARGB(dst_abgr, benchmark_width_ * 4,
|
||||
dst_abgr, benchmark_width_ * 4,
|
||||
benchmark_width_, benchmark_height_);
|
||||
for (int i = 0; i < benchmark_width_ * benchmark_height_ * 4; ++i) {
|
||||
EXPECT_EQ(dst_abgr[i], dst_argb[i]);
|
||||
}
|
||||
free_aligned_buffer_64(src_y);
|
||||
free_aligned_buffer_64(src_u);
|
||||
free_aligned_buffer_64(src_v);
|
||||
free_aligned_buffer_64(dst_argb);
|
||||
free_aligned_buffer_64(dst_abgr);
|
||||
#define TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
|
||||
W1280, N, NEG, OFF, FMT_C, BPP_C) \
|
||||
TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##_##FMT_C##N) { \
|
||||
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
|
||||
const int kHeight = benchmark_height_; \
|
||||
const int kStrideB = kWidth * BPP_B; \
|
||||
const int kSizeUV = \
|
||||
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y); \
|
||||
align_buffer_64(src_y, kWidth * kHeight + OFF); \
|
||||
align_buffer_64(src_u, kSizeUV + OFF); \
|
||||
align_buffer_64(src_v, kSizeUV + OFF); \
|
||||
align_buffer_64(dst_argb_b, kStrideB * kHeight + OFF); \
|
||||
for (int i = 0; i < kWidth * kHeight; ++i) { \
|
||||
src_y[i + OFF] = (fastrand() & 0xff); \
|
||||
} \
|
||||
for (int i = 0; i < kSizeUV; ++i) { \
|
||||
src_u[i + OFF] = (fastrand() & 0xff); \
|
||||
src_v[i + OFF] = (fastrand() & 0xff); \
|
||||
} \
|
||||
memset(dst_argb_b + OFF, 1, kStrideB * kHeight); \
|
||||
for (int i = 0; i < benchmark_iterations_; ++i) { \
|
||||
FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \
|
||||
src_u + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \
|
||||
src_v + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \
|
||||
dst_argb_b + OFF, kStrideB, \
|
||||
kWidth, NEG kHeight); \
|
||||
} \
|
||||
int max_diff = 0; \
|
||||
/* Convert to a 3rd format in 1 step and 2 steps and compare */ \
|
||||
const int kStrideC = kWidth * BPP_C; \
|
||||
align_buffer_64(dst_argb_c, kStrideC * kHeight + OFF); \
|
||||
align_buffer_64(dst_argb_bc, kStrideC * kHeight + OFF); \
|
||||
memset(dst_argb_c + OFF, 2, kStrideC * kHeight); \
|
||||
memset(dst_argb_bc + OFF, 3, kStrideC * kHeight); \
|
||||
FMT_PLANAR##To##FMT_C(src_y + OFF, kWidth, \
|
||||
src_u + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \
|
||||
src_v + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \
|
||||
dst_argb_c + OFF, kStrideC, \
|
||||
kWidth, NEG kHeight); \
|
||||
/* Convert B to C */ \
|
||||
FMT_B##To##FMT_C(dst_argb_b + OFF, kStrideB, \
|
||||
dst_argb_bc + OFF, kStrideC, \
|
||||
kWidth, kHeight); \
|
||||
for (int i = 0; i < kStrideC * kHeight; ++i) { \
|
||||
EXPECT_EQ(dst_argb_c[i + OFF], dst_argb_bc[i + OFF]); \
|
||||
} \
|
||||
free_aligned_buffer_64(src_y); \
|
||||
free_aligned_buffer_64(src_u); \
|
||||
free_aligned_buffer_64(src_v); \
|
||||
free_aligned_buffer_64(dst_argb_b); \
|
||||
free_aligned_buffer_64(dst_argb_c); \
|
||||
free_aligned_buffer_64(dst_argb_bc); \
|
||||
}
|
||||
|
||||
#define TESTPLANARTOE(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
|
||||
FMT_C, BPP_C) \
|
||||
TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
|
||||
benchmark_width_ - 4, _Any, +, 0, FMT_C, BPP_C) \
|
||||
TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
|
||||
benchmark_width_, _Unaligned, +, 1, FMT_C, BPP_C) \
|
||||
TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
|
||||
benchmark_width_, _Invert, -, 0, FMT_C, BPP_C) \
|
||||
TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
|
||||
benchmark_width_, _Opt, +, 0, FMT_C, BPP_C)
|
||||
|
||||
TESTPLANARTOE(I420, 2, 2, ARGB, 4, ABGR, 4)
|
||||
TESTPLANARTOE(J420, 2, 2, ARGB, 4, ARGB, 4)
|
||||
TESTPLANARTOE(J420, 2, 2, ABGR, 4, ARGB, 4)
|
||||
TESTPLANARTOE(H420, 2, 2, ARGB, 4, ARGB, 4)
|
||||
TESTPLANARTOE(H420, 2, 2, ABGR, 4, ARGB, 4)
|
||||
TESTPLANARTOE(I420, 2, 2, BGRA, 4, ARGB, 4)
|
||||
TESTPLANARTOE(I420, 2, 2, ABGR, 4, ARGB, 4)
|
||||
TESTPLANARTOE(I420, 2, 2, RGBA, 4, ARGB, 4)
|
||||
TESTPLANARTOE(I420, 2, 2, RGB24, 3, ARGB, 4)
|
||||
// TESTPLANARTOE(I420, 2, 2, RGB24, 3, RAW, 3)
|
||||
TESTPLANARTOE(I420, 2, 2, ARGB, 4, RAW, 3)
|
||||
TESTPLANARTOE(I420, 2, 2, RAW, 3, ARGB, 4)
|
||||
TESTPLANARTOE(I420, 2, 2, ARGB, 4, RGB565, 2)
|
||||
TESTPLANARTOE(I420, 2, 2, ARGB, 4, ARGB1555, 2)
|
||||
TESTPLANARTOE(I420, 2, 2, ARGB, 4, ARGB4444, 2)
|
||||
TESTPLANARTOE(I422, 2, 1, ARGB, 4, ARGB, 4)
|
||||
TESTPLANARTOE(J422, 2, 1, ARGB, 4, ARGB, 4)
|
||||
TESTPLANARTOE(J422, 2, 1, ABGR, 4, ARGB, 4)
|
||||
TESTPLANARTOE(H422, 2, 1, ARGB, 4, ARGB, 4)
|
||||
TESTPLANARTOE(H422, 2, 1, ABGR, 4, ARGB, 4)
|
||||
TESTPLANARTOE(I422, 2, 1, BGRA, 4, ARGB, 4)
|
||||
TESTPLANARTOE(I422, 2, 1, ABGR, 4, ARGB, 4)
|
||||
TESTPLANARTOE(I422, 2, 1, RGBA, 4, ARGB, 4)
|
||||
TESTPLANARTOE(I411, 4, 1, ARGB, 4, ARGB, 4)
|
||||
TESTPLANARTOE(I444, 1, 1, ARGB, 4, ARGB, 4)
|
||||
TESTPLANARTOE(J444, 1, 1, ARGB, 4, ARGB, 4)
|
||||
TESTPLANARTOE(I444, 1, 1, ABGR, 4, ARGB, 4)
|
||||
// TESTPLANARTOE(I420, 2, 2, YUY2, 2, ARGB, 4)
|
||||
// TESTPLANARTOE(I420, 2, 2, UYVY, 2, ARGB, 4)
|
||||
TESTPLANARTOE(I422, 2, 1, YUY2, 2, ARGB, 4)
|
||||
TESTPLANARTOE(I422, 2, 1, UYVY, 2, ARGB, 4)
|
||||
// TESTPLANARTOE(I420, 2, 2, ARGB, 4, I400, 1)
|
||||
// TESTPLANARTOE(J420, 2, 2, ARGB, 4, J400, 1)
|
||||
|
||||
} // namespace libyuv
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user