mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
refactor I420AlphaToABGR to use I420AlphaToARGB internally
swap U and V and transpose conversion matrix, so I420AlphaToARGB and I420AlphaToABGR share low level code. Having less code with same performance allows more focused optimization for future ARM versions. R=harryjin@google.com TBR=harryjin@chromium.org BUG=libyuv:473,libyuv:516 Review URL: https://codereview.chromium.org/1422263002 .
This commit is contained in:
parent
cf160cdbaa
commit
b86dbf24d3
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 1525
|
||||
Version: 1526
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -107,7 +107,6 @@ extern "C" {
|
||||
#define HAS_H422TOARGBROW_SSSE3
|
||||
#define HAS_I400TOARGBROW_SSE2
|
||||
#define HAS_I411TOARGBROW_SSSE3
|
||||
#define HAS_I422ALPHATOABGRROW_SSSE3
|
||||
#define HAS_I422ALPHATOARGBROW_SSSE3
|
||||
#define HAS_I422TOABGRROW_SSSE3
|
||||
#define HAS_I422TOARGB1555ROW_SSSE3
|
||||
@ -199,7 +198,6 @@ extern "C" {
|
||||
#define HAS_H422TOABGRROW_AVX2
|
||||
#define HAS_H422TOARGBROW_AVX2
|
||||
#define HAS_I400TOARGBROW_AVX2
|
||||
#define HAS_I422ALPHATOABGRROW_AVX2
|
||||
#define HAS_I422ALPHATOARGBROW_AVX2
|
||||
#define HAS_I422TOABGRROW_AVX2
|
||||
#define HAS_I422TOARGBROW_AVX2
|
||||
@ -254,7 +252,6 @@ extern "C" {
|
||||
// The following are also available on x64 Visual C.
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined (_M_X64) && \
|
||||
(!defined(__clang__) || defined(__SSSE3__))
|
||||
#define HAS_I422ALPHATOABGRROW_SSSE3
|
||||
#define HAS_I422ALPHATOARGBROW_SSSE3
|
||||
#define HAS_I422TOABGRROW_SSSE3
|
||||
#define HAS_I422TOARGBROW_SSSE3
|
||||
@ -1053,13 +1050,6 @@ void I422AlphaToARGBRow_C(const uint8* y_buf,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422AlphaToABGRRow_C(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
const uint8* a_buf,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422ToABGRRow_C(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
@ -1216,13 +1206,6 @@ void I422AlphaToARGBRow_SSSE3(const uint8* y_buf,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422AlphaToABGRRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
const uint8* a_buf,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422AlphaToARGBRow_AVX2(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
@ -1230,13 +1213,6 @@ void I422AlphaToARGBRow_AVX2(const uint8* y_buf,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422AlphaToABGRRow_AVX2(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
const uint8* a_buf,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422ToARGBRow_SSSE3(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
@ -1434,13 +1410,6 @@ void I422AlphaToARGBRow_Any_SSSE3(const uint8* y_buf,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422AlphaToABGRRow_Any_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
const uint8* a_buf,
|
||||
uint8* dst_abgr,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422AlphaToARGBRow_Any_AVX2(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
@ -1448,13 +1417,6 @@ void I422AlphaToARGBRow_Any_AVX2(const uint8* y_buf,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422AlphaToABGRRow_Any_AVX2(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
const uint8* a_buf,
|
||||
uint8* dst_abgr,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I411ToARGBRow_Any_SSSE3(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 1525
|
||||
#define LIBYUV_VERSION 1526
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||
|
||||
@ -45,7 +45,6 @@ int ARGBCopy(const uint8* src_argb, int src_stride_argb,
|
||||
}
|
||||
|
||||
// Convert I444 to ARGB.
|
||||
LIBYUV_API
|
||||
static int I444ToARGBMatrix(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
@ -129,6 +128,21 @@ int I444ToARGB(const uint8* src_y, int src_stride_y,
|
||||
width, height);
|
||||
}
|
||||
|
||||
// Convert I444 to ABGR.
|
||||
LIBYUV_API
|
||||
int I444ToABGR(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_abgr, int dst_stride_abgr,
|
||||
int width, int height) {
|
||||
return I444ToARGBMatrix(src_y, src_stride_y,
|
||||
src_v, src_stride_v, // Swap U and V
|
||||
src_u, src_stride_u,
|
||||
dst_abgr, dst_stride_abgr,
|
||||
&kYvuIConstants, // Use Yvu matrix
|
||||
width, height);
|
||||
}
|
||||
|
||||
// Convert J444 to ARGB.
|
||||
LIBYUV_API
|
||||
int J444ToARGB(const uint8* src_y, int src_stride_y,
|
||||
@ -144,21 +158,6 @@ int J444ToARGB(const uint8* src_y, int src_stride_y,
|
||||
width, height);
|
||||
}
|
||||
|
||||
// Convert I444 to ABGR.
|
||||
LIBYUV_API
|
||||
int I444ToABGR(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_abgr, int dst_stride_abgr,
|
||||
int width, int height) {
|
||||
return I444ToARGBMatrix(src_y, src_stride_y,
|
||||
src_v, src_stride_v,
|
||||
src_u, src_stride_u,
|
||||
dst_abgr, dst_stride_abgr,
|
||||
&kYvuIConstants,
|
||||
width, height);
|
||||
}
|
||||
|
||||
// Convert I422 to ARGB.
|
||||
LIBYUV_API
|
||||
int I422ToARGB(const uint8* src_y, int src_stride_y,
|
||||
@ -307,13 +306,13 @@ int I411ToARGB(const uint8* src_y, int src_stride_y,
|
||||
}
|
||||
|
||||
// Convert I420 with Alpha to preattenuated ARGB.
|
||||
LIBYUV_API
|
||||
int I420AlphaToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
const uint8* src_a, int src_stride_a,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height, int attenuate) {
|
||||
static int I420AlphaToARGBMatrix(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
const uint8* src_a, int src_stride_a,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width, int height, int attenuate) {
|
||||
int y;
|
||||
void (*I422AlphaToARGBRow)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
@ -393,7 +392,7 @@ int I420AlphaToARGB(const uint8* src_y, int src_stride_y,
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
I422AlphaToARGBRow(src_y, src_u, src_v, src_a, dst_argb, &kYuvIConstants,
|
||||
I422AlphaToARGBRow(src_y, src_u, src_v, src_a, dst_argb, yuvconstants,
|
||||
width);
|
||||
if (attenuate) {
|
||||
ARGBAttenuateRow(dst_argb, dst_argb, width);
|
||||
@ -409,6 +408,23 @@ int I420AlphaToARGB(const uint8* src_y, int src_stride_y,
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert I420 with Alpha to preattenuated ARGB.
|
||||
LIBYUV_API
|
||||
int I420AlphaToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
const uint8* src_a, int src_stride_a,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height, int attenuate) {
|
||||
return I420AlphaToARGBMatrix(src_y, src_stride_y,
|
||||
src_u, src_stride_u,
|
||||
src_v, src_stride_v,
|
||||
src_a, src_stride_a,
|
||||
dst_argb, dst_stride_argb,
|
||||
&kYuvIConstants,
|
||||
width, height, attenuate);
|
||||
}
|
||||
|
||||
// Convert I420 with Alpha to preattenuated ARGB.
|
||||
LIBYUV_API
|
||||
int I420AlphaToABGR(const uint8* src_y, int src_stride_y,
|
||||
@ -417,99 +433,13 @@ int I420AlphaToABGR(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_a, int src_stride_a,
|
||||
uint8* dst_abgr, int dst_stride_abgr,
|
||||
int width, int height, int attenuate) {
|
||||
int y;
|
||||
void (*I422AlphaToABGRRow)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
const uint8* a_buf,
|
||||
uint8* dst_abgr,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) = I422AlphaToABGRRow_C;
|
||||
void (*ARGBAttenuateRow)(const uint8* src_abgr, uint8* dst_abgr,
|
||||
int width) = ARGBAttenuateRow_C;
|
||||
if (!src_y || !src_u || !src_v || !dst_abgr ||
|
||||
width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr;
|
||||
dst_stride_abgr = -dst_stride_abgr;
|
||||
}
|
||||
#if defined(HAS_I422ALPHATOABGRROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
I422AlphaToABGRRow = I422AlphaToABGRRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I422AlphaToABGRRow = I422AlphaToABGRRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422ALPHATOABGRROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
I422AlphaToABGRRow = I422AlphaToABGRRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
I422AlphaToABGRRow = I422AlphaToABGRRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422ALPHATOABGRROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
I422AlphaToABGRRow = I422AlphaToABGRRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I422AlphaToABGRRow = I422AlphaToABGRRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422ALPHATOABGRROW_MIPS_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
|
||||
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
|
||||
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
|
||||
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
|
||||
IS_ALIGNED(dst_abgr, 4) && IS_ALIGNED(dst_stride_abgr, 4)) {
|
||||
I422AlphaToABGRRow = I422AlphaToABGRRow_MIPS_DSPR2;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBATTENUATEROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 4)) {
|
||||
ARGBAttenuateRow = ARGBAttenuateRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBATTENUATEROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
ARGBAttenuateRow = ARGBAttenuateRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGBAttenuateRow = ARGBAttenuateRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBATTENUATEROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
ARGBAttenuateRow = ARGBAttenuateRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGBAttenuateRow = ARGBAttenuateRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
I422AlphaToABGRRow(src_y, src_u, src_v, src_a, dst_abgr, &kYuvIConstants,
|
||||
width);
|
||||
if (attenuate) {
|
||||
ARGBAttenuateRow(dst_abgr, dst_abgr, width);
|
||||
}
|
||||
dst_abgr += dst_stride_abgr;
|
||||
src_a += src_stride_a;
|
||||
src_y += src_stride_y;
|
||||
if (y & 1) {
|
||||
src_u += src_stride_u;
|
||||
src_v += src_stride_v;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
return I420AlphaToARGBMatrix(src_y, src_stride_y,
|
||||
src_v, src_stride_v, // Swap U and V
|
||||
src_u, src_stride_u,
|
||||
src_a, src_stride_a,
|
||||
dst_abgr, dst_stride_abgr,
|
||||
&kYvuIConstants, // Use Yvu matrix
|
||||
width, height, attenuate);
|
||||
}
|
||||
|
||||
// Convert I400 to ARGB.
|
||||
|
||||
@ -46,11 +46,9 @@ extern "C" {
|
||||
|
||||
#ifdef HAS_I422ALPHATOARGBROW_SSSE3
|
||||
ANY41C(I422AlphaToARGBRow_Any_SSSE3, I422AlphaToARGBRow_SSSE3, 1, 0, 4, 7)
|
||||
ANY41C(I422AlphaToABGRRow_Any_SSSE3, I422AlphaToABGRRow_SSSE3, 1, 0, 4, 7)
|
||||
#endif
|
||||
#ifdef HAS_I422ALPHATOARGBROW_AVX2
|
||||
ANY41C(I422AlphaToARGBRow_Any_AVX2, I422AlphaToARGBRow_AVX2, 1, 0, 4, 7)
|
||||
ANY41C(I422AlphaToABGRRow_Any_AVX2, I422AlphaToABGRRow_AVX2, 1, 0, 4, 7)
|
||||
#endif
|
||||
#undef ANY41C
|
||||
|
||||
|
||||
@ -1413,34 +1413,6 @@ void I422ToABGRRow_C(const uint8* src_y,
|
||||
}
|
||||
}
|
||||
|
||||
void I422AlphaToABGRRow_C(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
const uint8* src_a,
|
||||
uint8* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
int x;
|
||||
for (x = 0; x < width - 1; x += 2) {
|
||||
YuvPixel(src_y[0], src_u[0], src_v[0],
|
||||
rgb_buf + 2, rgb_buf + 1, rgb_buf + 0, yuvconstants);
|
||||
rgb_buf[3] = src_a[0];
|
||||
YuvPixel(src_y[1], src_u[0], src_v[0],
|
||||
rgb_buf + 6, rgb_buf + 5, rgb_buf + 4, yuvconstants);
|
||||
rgb_buf[7] = src_a[1];
|
||||
src_y += 2;
|
||||
src_u += 1;
|
||||
src_v += 1;
|
||||
src_a += 2;
|
||||
rgb_buf += 8; // Advance 2 pixels.
|
||||
}
|
||||
if (width & 1) {
|
||||
YuvPixel(src_y[0], src_u[0], src_v[0],
|
||||
rgb_buf + 2, rgb_buf + 1, rgb_buf + 0, yuvconstants);
|
||||
rgb_buf[3] = src_a[0];
|
||||
}
|
||||
}
|
||||
|
||||
void I422ToRGB24Row_C(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
|
||||
@ -1766,38 +1766,6 @@ void OMITFP I422AlphaToARGBRow_SSSE3(const uint8* y_buf,
|
||||
);
|
||||
}
|
||||
|
||||
void OMITFP I422AlphaToABGRRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
const uint8* a_buf,
|
||||
uint8* dst_abgr,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
READYUVA422
|
||||
YUVTORGB(yuvconstants)
|
||||
STOREABGR
|
||||
"subl $0x8,%[width] \n"
|
||||
"jg 1b \n"
|
||||
: [y_buf]"+r"(y_buf), // %[y_buf]
|
||||
[u_buf]"+r"(u_buf), // %[u_buf]
|
||||
[v_buf]"+r"(v_buf), // %[v_buf]
|
||||
[a_buf]"+r"(a_buf), // %[a_buf]
|
||||
[dst_abgr]"+r"(dst_abgr), // %[dst_abgr]
|
||||
#if defined(__i386__) && defined(__pic__)
|
||||
[width]"+m"(width) // %[width]
|
||||
#else
|
||||
[width]"+rm"(width) // %[width]
|
||||
#endif
|
||||
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
|
||||
: "memory", "cc", NACL_R14
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
|
||||
);
|
||||
}
|
||||
|
||||
void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
@ -2229,43 +2197,6 @@ void OMITFP I422AlphaToARGBRow_AVX2(const uint8* y_buf,
|
||||
}
|
||||
#endif // HAS_I422ALPHATOARGBROW_AVX2
|
||||
|
||||
#if defined(HAS_I422ALPHATOABGRROW_AVX2)
|
||||
// 16 pixels
|
||||
// 8 UV values upsampled to 16 UV, mixed with 16 Y and 16 A producing 16 ABGR.
|
||||
void OMITFP I422AlphaToABGRRow_AVX2(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
const uint8* a_buf,
|
||||
uint8* dst_abgr,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
READYUVA422_AVX2
|
||||
YUVTORGB_AVX2(yuvconstants)
|
||||
STOREABGR_AVX2
|
||||
"subl $0x10,%[width] \n"
|
||||
"jg 1b \n"
|
||||
"vzeroupper \n"
|
||||
: [y_buf]"+r"(y_buf), // %[y_buf]
|
||||
[u_buf]"+r"(u_buf), // %[u_buf]
|
||||
[v_buf]"+r"(v_buf), // %[v_buf]
|
||||
[a_buf]"+r"(a_buf), // %[a_buf]
|
||||
[dst_abgr]"+r"(dst_abgr), // %[dst_abgr]
|
||||
#if defined(__i386__) && defined(__pic__)
|
||||
[width]"+m"(width) // %[width]
|
||||
#else
|
||||
[width]"+rm"(width) // %[width]
|
||||
#endif
|
||||
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
|
||||
: "memory", "cc", NACL_R14
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
|
||||
);
|
||||
}
|
||||
#endif // HAS_I422ALPHATOABGRROW_AVX2
|
||||
|
||||
#if defined(HAS_I422TOABGRROW_AVX2)
|
||||
// 16 pixels
|
||||
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes).
|
||||
|
||||
@ -153,25 +153,6 @@ void I422AlphaToARGBRow_SSSE3(const uint8* y_buf,
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(HAS_I422ALPHATOABGRROW_SSSE3)
|
||||
void I422AlphaToABGRRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
const uint8* a_buf,
|
||||
uint8* dst_abgr,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
__m128i xmm0, xmm1, xmm2, xmm4, xmm5;
|
||||
const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf;
|
||||
while (width > 0) {
|
||||
READYUVA422
|
||||
YUVTORGB(yuvconstants)
|
||||
STOREABGR
|
||||
width -= 8;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// 32 bit
|
||||
#else // defined(_M_X64)
|
||||
#ifdef HAS_ARGBTOYROW_SSSE3
|
||||
@ -2185,49 +2166,6 @@ void I422AlphaToARGBRow_AVX2(const uint8* y_buf,
|
||||
}
|
||||
#endif // HAS_I422ALPHATOARGBROW_AVX2
|
||||
|
||||
#ifdef HAS_I422ALPHATOABGRROW_AVX2
|
||||
// 16 pixels
|
||||
// 8 UV values upsampled to 16 UV, mixed with 16 Y and 16 A producing 16 ABGR.
|
||||
__declspec(naked)
|
||||
void I422AlphaToABGRRow_AVX2(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
const uint8* a_buf,
|
||||
uint8* dst_abgr,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
__asm {
|
||||
push esi
|
||||
push edi
|
||||
push ebx
|
||||
push ebp
|
||||
mov eax, [esp + 16 + 4] // Y
|
||||
mov esi, [esp + 16 + 8] // U
|
||||
mov edi, [esp + 16 + 12] // V
|
||||
mov ebp, [esp + 16 + 16] // A
|
||||
mov edx, [esp + 16 + 20] // abgr
|
||||
mov ebx, [esp + 16 + 24] // yuvconstants
|
||||
mov ecx, [esp + 16 + 28] // width
|
||||
sub edi, esi
|
||||
|
||||
convertloop:
|
||||
READYUVA422_AVX2
|
||||
YUVTORGB_AVX2(ebx)
|
||||
STOREABGR_AVX2
|
||||
|
||||
sub ecx, 16
|
||||
jg convertloop
|
||||
|
||||
pop ebp
|
||||
pop ebx
|
||||
pop edi
|
||||
pop esi
|
||||
vzeroupper
|
||||
ret
|
||||
}
|
||||
}
|
||||
#endif // HAS_I422ALPHATOABGRROW_AVX2
|
||||
|
||||
#ifdef HAS_I444TOARGBROW_AVX2
|
||||
// 16 pixels
|
||||
// 16 UV values with 16 Y producing 16 ARGB (64 bytes).
|
||||
@ -3027,46 +2965,6 @@ void I422AlphaToARGBRow_SSSE3(const uint8* y_buf,
|
||||
}
|
||||
}
|
||||
|
||||
// 8 pixels.
|
||||
// 4 UV values upsampled to 8 UV, mixed with 8 Y and 8 A producing 8 ABGR.
|
||||
__declspec(naked)
|
||||
void I422AlphaToABGRRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
const uint8* a_buf,
|
||||
uint8* dst_abgr,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
__asm {
|
||||
push esi
|
||||
push edi
|
||||
push ebx
|
||||
push ebp
|
||||
mov eax, [esp + 16 + 4] // Y
|
||||
mov esi, [esp + 16 + 8] // U
|
||||
mov edi, [esp + 16 + 12] // V
|
||||
mov ebp, [esp + 16 + 16] // A
|
||||
mov edx, [esp + 16 + 20] // abgr
|
||||
mov ebx, [esp + 16 + 24] // yuvconstants
|
||||
mov ecx, [esp + 16 + 28] // width
|
||||
sub edi, esi
|
||||
|
||||
convertloop:
|
||||
READYUVA422
|
||||
YUVTORGB(ebx)
|
||||
STOREABGR
|
||||
|
||||
sub ecx, 8
|
||||
jg convertloop
|
||||
|
||||
pop ebp
|
||||
pop ebx
|
||||
pop edi
|
||||
pop esi
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
// 8 pixels.
|
||||
// 2 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
|
||||
// Similar to I420 but duplicate UV once more.
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user