mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 01:06:46 +08:00
I420Alpha row function in 1 pass.
API change - I420AlphaToARGB takes flag indicating if RGB should be premultiplied by alpha. This version implements an efficient SSSE3 version for Windows. C version done in 2 steps. Was libyuvTest.I420AlphaToARGB_Any (1136 ms) libyuvTest.I420AlphaToARGB_Unaligned (1210 ms) libyuvTest.I420AlphaToARGB_Invert (966 ms) libyuvTest.I420AlphaToARGB_Opt (1031 ms) libyuvTest.I420AlphaToABGR_Any (1020 ms) libyuvTest.I420AlphaToABGR_Unaligned (1359 ms) libyuvTest.I420AlphaToABGR_Invert (1082 ms) libyuvTest.I420AlphaToABGR_Opt (986 ms) R=harryjin@google.com BUG=libyuv:496 Review URL: https://codereview.chromium.org/1367093002 .
This commit is contained in:
parent
d4594beefc
commit
e365cdde3b
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 1492
|
||||
Version: 1493
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -83,7 +83,7 @@ int I420AlphaToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
const uint8* src_a, int src_stride_a,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int width, int height, int attenuate);
|
||||
|
||||
// Convert I420 with Alpha to preattenuated ABGR.
|
||||
LIBYUV_API
|
||||
@ -92,7 +92,7 @@ int I420AlphaToABGR(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
const uint8* src_a, int src_stride_a,
|
||||
uint8* dst_abgr, int dst_stride_abgr,
|
||||
int width, int height);
|
||||
int width, int height, int attenuate);
|
||||
|
||||
// Convert I400 (grey) to ARGB. Reverse of ARGBToI400.
|
||||
LIBYUV_API
|
||||
|
||||
@ -187,6 +187,14 @@ extern "C" {
|
||||
#define HAS_I422TOABGRROW_SSSE3
|
||||
#endif
|
||||
|
||||
|
||||
// The following are available on 32 bit x86 Visual C and clangcl.
|
||||
// TODO(fbarchard): Port to gcc.
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined (_M_IX86)
|
||||
#define HAS_I422ALPHATOARGBROW_SSSE3
|
||||
#define HAS_I422ALPHATOABGRROW_SSSE3
|
||||
#endif
|
||||
|
||||
// The following are available for AVX2 Visual C and clangcl 32 bit:
|
||||
// TODO(fbarchard): Port to gcc.
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \
|
||||
@ -257,6 +265,7 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
// The following are disabled when SSSE3 is available:
|
||||
// TODO(fbarchard): remove sse2. ssse3 is faster and well supported.
|
||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) && \
|
||||
!defined(LIBYUV_SSSE3_ONLY)
|
||||
@ -1045,6 +1054,20 @@ void I422ToARGBRow_C(const uint8* src_y,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422AlphaToARGBRow_C(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
const uint8* a_buf,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422AlphaToABGRRow_C(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
const uint8* a_buf,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422ToABGRRow_C(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
@ -1213,6 +1236,20 @@ void I422ToARGBRow_SSSE3(const uint8* src_y,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422AlphaToARGBRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
const uint8* a_buf,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422AlphaToABGRRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
const uint8* a_buf,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422ToARGBRow_SSSE3(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
@ -1405,6 +1442,20 @@ void I422ToARGBRow_Any_SSSE3(const uint8* src_y,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422AlphaToARGBRow_Any_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
const uint8* a_buf,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422AlphaToABGRRow_Any_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
const uint8* a_buf,
|
||||
uint8* dst_abgr,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I411ToARGBRow_Any_SSSE3(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 1492
|
||||
#define LIBYUV_VERSION 1493
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||
|
||||
@ -336,16 +336,15 @@ int I420AlphaToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
const uint8* src_a, int src_stride_a,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height) {
|
||||
int width, int height, int attenuate) {
|
||||
int y;
|
||||
void (*I422ToARGBRow)(const uint8* y_buf,
|
||||
void (*I422AlphaToARGBRow)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
const uint8* a_buf,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) = I422ToARGBRow_C;
|
||||
void (*ARGBCopyYToAlphaRow)(const uint8* src_y, uint8* dst_argb, int width) =
|
||||
ARGBCopyYToAlphaRow_C;
|
||||
int width) = I422AlphaToARGBRow_C;
|
||||
void (*ARGBAttenuateRow)(const uint8* src_argb, uint8* dst_argb,
|
||||
int width) = ARGBAttenuateRow_C;
|
||||
if (!src_y || !src_u || !src_v || !dst_argb ||
|
||||
@ -358,53 +357,37 @@ int I420AlphaToARGB(const uint8* src_y, int src_stride_y,
|
||||
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
|
||||
dst_stride_argb = -dst_stride_argb;
|
||||
}
|
||||
#if defined(HAS_I422TOARGBROW_SSSE3)
|
||||
#if defined(HAS_I422ALPHATOARGBROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
|
||||
I422AlphaToARGBRow = I422AlphaToARGBRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I422ToARGBRow = I422ToARGBRow_SSSE3;
|
||||
I422AlphaToARGBRow = I422AlphaToARGBRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOARGBROW_AVX2)
|
||||
#if defined(HAS_I422ALPHATOARGBROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
I422ToARGBRow = I422ToARGBRow_Any_AVX2;
|
||||
I422AlphaToARGBRow = I422AlphaToARGBRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
I422ToARGBRow = I422ToARGBRow_AVX2;
|
||||
I422AlphaToARGBRow = I422AlphaToARGBRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOARGBROW_NEON)
|
||||
#if defined(HAS_I422ALPHATOARGBROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
I422ToARGBRow = I422ToARGBRow_Any_NEON;
|
||||
I422AlphaToARGBRow = I422AlphaToARGBRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I422ToARGBRow = I422ToARGBRow_NEON;
|
||||
I422AlphaToARGBRow = I422AlphaToARGBRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
|
||||
#if defined(HAS_I422ALPHATOARGBROW_MIPS_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
|
||||
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
|
||||
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
|
||||
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
|
||||
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
|
||||
I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBCOPYYTOALPHAROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_Any_SSE2;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_SSE2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBCOPYYTOALPHAROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_AVX2;
|
||||
}
|
||||
I422AlphaToARGBRow = I422AlphaToARGBRow_MIPS_DSPR2;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBATTENUATEROW_SSE2)
|
||||
@ -441,9 +424,10 @@ int I420AlphaToARGB(const uint8* src_y, int src_stride_y,
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
I422ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvConstants, width);
|
||||
ARGBCopyYToAlphaRow(src_a, dst_argb, width);
|
||||
I422AlphaToARGBRow(src_y, src_u, src_v, src_a, dst_argb, &kYuvConstants, width);
|
||||
if (attenuate) {
|
||||
ARGBAttenuateRow(dst_argb, dst_argb, width);
|
||||
}
|
||||
dst_argb += dst_stride_argb;
|
||||
src_a += src_stride_a;
|
||||
src_y += src_stride_y;
|
||||
@ -454,24 +438,24 @@ int I420AlphaToARGB(const uint8* src_y, int src_stride_y,
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
// Convert I420 with Alpha to preattenuated ABGR.
|
||||
|
||||
// Convert I420 with Alpha to preattenuated ARGB.
|
||||
LIBYUV_API
|
||||
int I420AlphaToABGR(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
const uint8* src_a, int src_stride_a,
|
||||
uint8* dst_abgr, int dst_stride_abgr,
|
||||
int width, int height) {
|
||||
int width, int height, int attenuate) {
|
||||
int y;
|
||||
void (*I422ToABGRRow)(const uint8* y_buf,
|
||||
void (*I422AlphaToABGRRow)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
const uint8* a_buf,
|
||||
uint8* dst_abgr,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) = I422ToABGRRow_C;
|
||||
void (*ARGBCopyYToAlphaRow)(const uint8* src_y, uint8* dst_argb, int width) =
|
||||
ARGBCopyYToAlphaRow_C;
|
||||
void (*ARGBAttenuateRow)(const uint8* src_argb, uint8* dst_argb,
|
||||
int width) = I422AlphaToABGRRow_C;
|
||||
void (*ARGBAttenuateRow)(const uint8* src_abgr, uint8* dst_abgr,
|
||||
int width) = ARGBAttenuateRow_C;
|
||||
if (!src_y || !src_u || !src_v || !dst_abgr ||
|
||||
width <= 0 || height == 0) {
|
||||
@ -483,53 +467,37 @@ int I420AlphaToABGR(const uint8* src_y, int src_stride_y,
|
||||
dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr;
|
||||
dst_stride_abgr = -dst_stride_abgr;
|
||||
}
|
||||
#if defined(HAS_I422TOABGRROW_SSSE3)
|
||||
#if defined(HAS_I422ALPHATOABGRROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
I422ToABGRRow = I422ToABGRRow_Any_SSSE3;
|
||||
I422AlphaToABGRRow = I422AlphaToABGRRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I422ToABGRRow = I422ToABGRRow_SSSE3;
|
||||
I422AlphaToABGRRow = I422AlphaToABGRRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOABGRROW_AVX2)
|
||||
#if defined(HAS_I422ALPHATOABGRROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
I422ToABGRRow = I422ToABGRRow_Any_AVX2;
|
||||
I422AlphaToABGRRow = I422AlphaToABGRRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
I422ToABGRRow = I422ToABGRRow_AVX2;
|
||||
I422AlphaToABGRRow = I422AlphaToABGRRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOABGRROW_NEON)
|
||||
#if defined(HAS_I422ALPHATOABGRROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
I422ToABGRRow = I422ToABGRRow_Any_NEON;
|
||||
I422AlphaToABGRRow = I422AlphaToABGRRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I422ToABGRRow = I422ToABGRRow_NEON;
|
||||
I422AlphaToABGRRow = I422AlphaToABGRRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOABGRROW_MIPS_DSPR2)
|
||||
#if defined(HAS_I422ALPHATOABGRROW_MIPS_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
|
||||
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
|
||||
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
|
||||
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
|
||||
IS_ALIGNED(dst_abgr, 4) && IS_ALIGNED(dst_stride_abgr, 4)) {
|
||||
I422ToABGRRow = I422ToABGRRow_MIPS_DSPR2;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBCOPYYTOALPHAROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_Any_SSE2;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_SSE2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBCOPYYTOALPHAROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_AVX2;
|
||||
}
|
||||
I422AlphaToABGRRow = I422AlphaToABGRRow_MIPS_DSPR2;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBATTENUATEROW_SSE2)
|
||||
@ -566,9 +534,10 @@ int I420AlphaToABGR(const uint8* src_y, int src_stride_y,
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
I422ToABGRRow(src_y, src_u, src_v, dst_abgr, &kYuvConstants, width);
|
||||
ARGBCopyYToAlphaRow(src_a, dst_abgr, width);
|
||||
I422AlphaToABGRRow(src_y, src_u, src_v, src_a, dst_abgr, &kYuvConstants, width);
|
||||
if (attenuate) {
|
||||
ARGBAttenuateRow(dst_abgr, dst_abgr, width);
|
||||
}
|
||||
dst_abgr += dst_stride_abgr;
|
||||
src_a += src_stride_a;
|
||||
src_y += src_stride_y;
|
||||
|
||||
@ -22,6 +22,34 @@ extern "C" {
|
||||
// Subsampled source needs to be increase by 1 of not even.
|
||||
#define SS(width, shift) (((width) + (1 << (shift)) - 1) >> (shift))
|
||||
|
||||
// Any 4 planes to 1 with yuvconstants
|
||||
#define ANY41C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \
|
||||
void NAMEANY(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, \
|
||||
const uint8* a_buf, uint8* dst_ptr, \
|
||||
struct YuvConstants* yuvconstants, int width) { \
|
||||
SIMD_ALIGNED(uint8 temp[64 * 5]); \
|
||||
memset(temp, 0, 64 * 4); /* for msan */ \
|
||||
int r = width & MASK; \
|
||||
int n = width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, yuvconstants, n); \
|
||||
} \
|
||||
memcpy(temp, y_buf + n, r); \
|
||||
memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
|
||||
memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
|
||||
memcpy(temp + 192, a_buf + n, r); \
|
||||
ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, temp + 256, \
|
||||
yuvconstants, MASK + 1); \
|
||||
memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 256, \
|
||||
SS(r, DUVSHIFT) * BPP); \
|
||||
}
|
||||
|
||||
#ifdef HAS_I422ALPHATOARGBROW_SSSE3
|
||||
ANY41C(I422AlphaToARGBRow_Any_SSSE3, I422AlphaToARGBRow_SSSE3, 1, 0, 4, 7)
|
||||
ANY41C(I422AlphaToABGRRow_Any_SSSE3, I422AlphaToABGRRow_SSSE3, 1, 0, 4, 7)
|
||||
#endif
|
||||
#undef ANY41C
|
||||
|
||||
// Any 3 planes to 1.
|
||||
#define ANY31(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \
|
||||
void NAMEANY(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, \
|
||||
@ -50,7 +78,7 @@ ANY31(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, 1, 1, 4, 15)
|
||||
#ifdef HAS_I422TOUYVYROW_NEON
|
||||
ANY31(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, 1, 1, 4, 15)
|
||||
#endif
|
||||
#undef ANY31C
|
||||
#undef ANY31
|
||||
|
||||
// Any 3 planes to 1 with yuvconstants
|
||||
#define ANY31C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \
|
||||
|
||||
@ -2412,6 +2412,29 @@ void I422ToRGB565Row_SSSE3(const uint8* src_y,
|
||||
}
|
||||
#endif
|
||||
|
||||
void I422AlphaToARGBRow_C(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
const uint8* src_a,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
|
||||
I422ToARGBRow_C(src_y, src_u, src_v, dst_argb, &kYuvConstants, width);
|
||||
ARGBCopyYToAlphaRow_C(src_a, dst_argb, width);
|
||||
}
|
||||
|
||||
void I422AlphaToABGRRow_C(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
const uint8* src_a,
|
||||
uint8* dst_abgr,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
I422ToABGRRow_C(src_y, src_u, src_v, dst_abgr, &kYuvConstants, width);
|
||||
ARGBCopyYToAlphaRow_C(src_a, dst_abgr, width);
|
||||
}
|
||||
|
||||
#if defined(HAS_I422TOARGB1555ROW_SSSE3)
|
||||
void I422ToARGB1555Row_SSSE3(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
|
||||
@ -2416,6 +2416,20 @@ void I422ToABGRRow_AVX2(const uint8* y_buf,
|
||||
__asm lea eax, [eax + 8] \
|
||||
}
|
||||
|
||||
// Read 4 UV from 422, upsample to 8 UV. With 8 Alpha.
|
||||
#define READYUVA422 __asm { \
|
||||
__asm movd xmm0, [esi] /* U */ \
|
||||
__asm movd xmm1, [esi + edi] /* V */ \
|
||||
__asm lea esi, [esi + 4] \
|
||||
__asm punpcklbw xmm0, xmm1 /* UV */ \
|
||||
__asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \
|
||||
__asm movq xmm4, qword ptr [eax] /* Y */ \
|
||||
__asm punpcklbw xmm4, xmm4 \
|
||||
__asm lea eax, [eax + 8] \
|
||||
__asm movq xmm5, qword ptr [ebp] /* A */ \
|
||||
__asm lea ebp, [ebp + 8] \
|
||||
}
|
||||
|
||||
// Read 2 UV from 411, upsample to 8 UV.
|
||||
#define READYUV411 __asm { \
|
||||
__asm pinsrw xmm0, [esi], 0 /* U */ \
|
||||
@ -2833,6 +2847,88 @@ void I422ToARGBRow_SSSE3(const uint8* y_buf,
|
||||
}
|
||||
}
|
||||
|
||||
// 8 pixels.
|
||||
// 4 UV values upsampled to 8 UV, mixed with 8 Y and 8 A producing 8 ARGB (32 bytes).
|
||||
__declspec(naked)
|
||||
void I422AlphaToARGBRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
const uint8* a_buf,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
__asm {
|
||||
push esi
|
||||
push edi
|
||||
push ebx
|
||||
push ebp
|
||||
mov eax, [esp + 16 + 4] // Y
|
||||
mov esi, [esp + 16 + 8] // U
|
||||
mov edi, [esp + 16 + 12] // V
|
||||
mov ebp, [esp + 16 + 16] // A
|
||||
mov edx, [esp + 16 + 20] // argb
|
||||
mov ebx, [esp + 16 + 24] // yuvconstants
|
||||
mov ecx, [esp + 16 + 28] // width
|
||||
sub edi, esi
|
||||
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
|
||||
|
||||
convertloop:
|
||||
READYUVA422
|
||||
YUVTORGB(ebx)
|
||||
STOREARGB
|
||||
|
||||
sub ecx, 8
|
||||
jg convertloop
|
||||
|
||||
pop ebp
|
||||
pop ebx
|
||||
pop edi
|
||||
pop esi
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
// 8 pixels.
|
||||
// 4 UV values upsampled to 8 UV, mixed with 8 Y and 8 A producing 8 ABGR (32 bytes).
|
||||
__declspec(naked)
|
||||
void I422AlphaToABGRRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
const uint8* a_buf,
|
||||
uint8* dst_abgr,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
__asm {
|
||||
push esi
|
||||
push edi
|
||||
push ebx
|
||||
push ebp
|
||||
mov eax, [esp + 16 + 4] // Y
|
||||
mov esi, [esp + 16 + 8] // U
|
||||
mov edi, [esp + 16 + 12] // V
|
||||
mov ebp, [esp + 16 + 16] // A
|
||||
mov edx, [esp + 16 + 20] // abgr
|
||||
mov ebx, [esp + 16 + 24] // yuvconstants
|
||||
mov ecx, [esp + 16 + 28] // width
|
||||
sub edi, esi
|
||||
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
|
||||
|
||||
convertloop:
|
||||
READYUVA422
|
||||
YUVTORGB(ebx)
|
||||
STOREABGR
|
||||
|
||||
sub ecx, 8
|
||||
jg convertloop
|
||||
|
||||
pop ebp
|
||||
pop ebx
|
||||
pop edi
|
||||
pop esi
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
// 8 pixels.
|
||||
// 2 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
|
||||
// Similar to I420 but duplicate UV once more.
|
||||
|
||||
@ -518,7 +518,7 @@ TESTPLANARTOB(I420, 2, 2, I400, 1, 1, 1, 0, ARGB, 4)
|
||||
TESTPLANARTOB(J420, 2, 2, J400, 1, 1, 1, 0, ARGB, 4)
|
||||
|
||||
#define TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
|
||||
YALIGN, W1280, DIFF, N, NEG, OFF) \
|
||||
YALIGN, W1280, DIFF, N, NEG, OFF, ATTEN) \
|
||||
TEST_F(libyuvTest, FMT_PLANAR##To##FMT_B##N) { \
|
||||
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
|
||||
const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \
|
||||
@ -547,7 +547,7 @@ TEST_F(libyuvTest, FMT_PLANAR##To##FMT_B##N) { \
|
||||
src_v + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \
|
||||
src_a + OFF, kWidth, \
|
||||
dst_argb_c + OFF, kStrideB, \
|
||||
kWidth, NEG kHeight); \
|
||||
kWidth, NEG kHeight, ATTEN); \
|
||||
MaskCpuFlags(-1); \
|
||||
for (int i = 0; i < benchmark_iterations_; ++i) { \
|
||||
FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \
|
||||
@ -555,7 +555,7 @@ TEST_F(libyuvTest, FMT_PLANAR##To##FMT_B##N) { \
|
||||
src_v + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \
|
||||
src_a + OFF, kWidth, \
|
||||
dst_argb_opt + OFF, kStrideB, \
|
||||
kWidth, NEG kHeight); \
|
||||
kWidth, NEG kHeight, ATTEN); \
|
||||
} \
|
||||
int max_diff = 0; \
|
||||
for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) { \
|
||||
@ -578,13 +578,15 @@ TEST_F(libyuvTest, FMT_PLANAR##To##FMT_B##N) { \
|
||||
#define TESTQPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
|
||||
YALIGN, DIFF) \
|
||||
TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
|
||||
YALIGN, benchmark_width_ - 4, DIFF, _Any, +, 0) \
|
||||
YALIGN, benchmark_width_ - 4, DIFF, _Any, +, 0, 0) \
|
||||
TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
|
||||
YALIGN, benchmark_width_, DIFF, _Unaligned, +, 1) \
|
||||
YALIGN, benchmark_width_, DIFF, _Unaligned, +, 1, 0) \
|
||||
TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
|
||||
YALIGN, benchmark_width_, DIFF, _Invert, -, 0) \
|
||||
YALIGN, benchmark_width_, DIFF, _Invert, -, 0, 0) \
|
||||
TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
|
||||
YALIGN, benchmark_width_, DIFF, _Opt, +, 0)
|
||||
YALIGN, benchmark_width_, DIFF, _Opt, +, 0, 0) \
|
||||
TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
|
||||
YALIGN, benchmark_width_, DIFF, _Premult, +, 0, 1)
|
||||
|
||||
TESTQPLANARTOB(I420Alpha, 2, 2, ARGB, 4, 4, 1, 2)
|
||||
TESTQPLANARTOB(I420Alpha, 2, 2, ABGR, 4, 4, 1, 2)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user