mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-08 01:36:47 +08:00
win64 version of I422AlphaToARGB.
Was I420AlphaToARGB_Premult (8861 ms) I420AlphaToARGB_Opt (7119 ms) Now I420AlphaToABGR_Premult (2840 ms) I420AlphaToARGB_Opt (484 ms) C function switched to 1 step. Was I420AlphaToARGB_Premult (8862 ms) I420AlphaToABGR_Opt (6718 ms) Now I420AlphaToARGB_Premult (8706 ms) I420AlphaToARGB_Opt (6541 ms) R=harryjin@google.com BUG=libyuv:496, libyuv:473 Review URL: https://codereview.chromium.org/1359183003 .
This commit is contained in:
parent
9a0e12f5f1
commit
febc26a2c9
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 1494
|
||||
Version: 1495
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -187,6 +187,8 @@ extern "C" {
|
||||
(!defined(__clang__) || defined(__SSSE3__))
|
||||
#define HAS_I422TOARGBROW_SSSE3
|
||||
#define HAS_I422TOABGRROW_SSSE3
|
||||
#define HAS_I422ALPHATOARGBROW_SSSE3
|
||||
#define HAS_I422ALPHATOABGRROW_SSSE3
|
||||
#endif
|
||||
|
||||
// The following are available for AVX2 Visual C and clangcl 32 bit:
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 1494
|
||||
#define LIBYUV_VERSION 1495
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||
|
||||
@ -1353,6 +1353,88 @@ void I422ToARGBRow_C(const uint8* src_y,
|
||||
}
|
||||
}
|
||||
|
||||
void I422AlphaToARGBRow_C(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
const uint8* src_a,
|
||||
uint8* rgb_buf,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
int x;
|
||||
for (x = 0; x < width - 1; x += 2) {
|
||||
YuvPixel(src_y[0], src_u[0], src_v[0],
|
||||
rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
|
||||
rgb_buf[3] = src_a[0];
|
||||
YuvPixel(src_y[1], src_u[0], src_v[0],
|
||||
rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants);
|
||||
rgb_buf[7] = src_a[1];
|
||||
src_y += 2;
|
||||
src_u += 1;
|
||||
src_v += 1;
|
||||
src_a += 2;
|
||||
rgb_buf += 8; // Advance 2 pixels.
|
||||
}
|
||||
if (width & 1) {
|
||||
YuvPixel(src_y[0], src_u[0], src_v[0],
|
||||
rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
|
||||
rgb_buf[3] = src_a[0];
|
||||
}
|
||||
}
|
||||
|
||||
void I422ToABGRRow_C(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* rgb_buf,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
int x;
|
||||
for (x = 0; x < width - 1; x += 2) {
|
||||
YuvPixel(src_y[0], src_u[0], src_v[0],
|
||||
rgb_buf + 2, rgb_buf + 1, rgb_buf + 0, yuvconstants);
|
||||
rgb_buf[3] = 255;
|
||||
YuvPixel(src_y[1], src_u[0], src_v[0],
|
||||
rgb_buf + 6, rgb_buf + 5, rgb_buf + 4, yuvconstants);
|
||||
rgb_buf[7] = 255;
|
||||
src_y += 2;
|
||||
src_u += 1;
|
||||
src_v += 1;
|
||||
rgb_buf += 8; // Advance 2 pixels.
|
||||
}
|
||||
if (width & 1) {
|
||||
YuvPixel(src_y[0], src_u[0], src_v[0],
|
||||
rgb_buf + 2, rgb_buf + 1, rgb_buf + 0, yuvconstants);
|
||||
rgb_buf[3] = 255;
|
||||
}
|
||||
}
|
||||
|
||||
void I422AlphaToABGRRow_C(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
const uint8* src_a,
|
||||
uint8* rgb_buf,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
int x;
|
||||
for (x = 0; x < width - 1; x += 2) {
|
||||
YuvPixel(src_y[0], src_u[0], src_v[0],
|
||||
rgb_buf + 2, rgb_buf + 1, rgb_buf + 0, yuvconstants);
|
||||
rgb_buf[3] = src_a[0];
|
||||
YuvPixel(src_y[1], src_u[0], src_v[0],
|
||||
rgb_buf + 6, rgb_buf + 5, rgb_buf + 4, yuvconstants);
|
||||
rgb_buf[7] = src_a[1];
|
||||
src_y += 2;
|
||||
src_u += 1;
|
||||
src_v += 1;
|
||||
src_a += 2;
|
||||
rgb_buf += 8; // Advance 2 pixels.
|
||||
}
|
||||
if (width & 1) {
|
||||
YuvPixel(src_y[0], src_u[0], src_v[0],
|
||||
rgb_buf + 2, rgb_buf + 1, rgb_buf + 0, yuvconstants);
|
||||
rgb_buf[3] = src_a[0];
|
||||
}
|
||||
}
|
||||
|
||||
void I422ToRGB24Row_C(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
@ -1687,32 +1769,6 @@ void I422ToBGRARow_C(const uint8* src_y,
|
||||
}
|
||||
}
|
||||
|
||||
void I422ToABGRRow_C(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* rgb_buf,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
int x;
|
||||
for (x = 0; x < width - 1; x += 2) {
|
||||
YuvPixel(src_y[0], src_u[0], src_v[0],
|
||||
rgb_buf + 2, rgb_buf + 1, rgb_buf + 0, yuvconstants);
|
||||
rgb_buf[3] = 255;
|
||||
YuvPixel(src_y[1], src_u[0], src_v[0],
|
||||
rgb_buf + 6, rgb_buf + 5, rgb_buf + 4, yuvconstants);
|
||||
rgb_buf[7] = 255;
|
||||
src_y += 2;
|
||||
src_u += 1;
|
||||
src_v += 1;
|
||||
rgb_buf += 8; // Advance 2 pixels.
|
||||
}
|
||||
if (width & 1) {
|
||||
YuvPixel(src_y[0], src_u[0], src_v[0],
|
||||
rgb_buf + 2, rgb_buf + 1, rgb_buf + 0, yuvconstants);
|
||||
rgb_buf[3] = 255;
|
||||
}
|
||||
}
|
||||
|
||||
void I422ToRGBARow_C(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
@ -2412,29 +2468,6 @@ void I422ToRGB565Row_SSSE3(const uint8* src_y,
|
||||
}
|
||||
#endif
|
||||
|
||||
void I422AlphaToARGBRow_C(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
const uint8* src_a,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
|
||||
I422ToARGBRow_C(src_y, src_u, src_v, dst_argb, &kYuvConstants, width);
|
||||
ARGBCopyYToAlphaRow_C(src_a, dst_argb, width);
|
||||
}
|
||||
|
||||
void I422AlphaToABGRRow_C(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
const uint8* src_a,
|
||||
uint8* dst_abgr,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
I422ToABGRRow_C(src_y, src_u, src_v, dst_abgr, &kYuvConstants, width);
|
||||
ARGBCopyYToAlphaRow_C(src_a, dst_abgr, width);
|
||||
}
|
||||
|
||||
#if defined(HAS_I422TOARGB1555ROW_SSSE3)
|
||||
void I422ToARGB1555Row_SSSE3(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
|
||||
@ -30,6 +30,17 @@ extern "C" {
|
||||
|
||||
// Read 4 UV from 422, upsample to 8 UV.
|
||||
#define READYUV422 \
|
||||
xmm0 = _mm_cvtsi32_si128(*(uint32*)u_buf); \
|
||||
xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset)); \
|
||||
xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \
|
||||
xmm0 = _mm_unpacklo_epi16(xmm0, xmm0); \
|
||||
u_buf += 4; \
|
||||
xmm4 = _mm_loadl_epi64((__m128i*)y_buf); \
|
||||
xmm4 = _mm_unpacklo_epi8(xmm4, xmm4); \
|
||||
y_buf += 8;
|
||||
|
||||
// Read 4 UV from 422, upsample to 8 UV. With 8 Alpha.
|
||||
#define READYUVA422 \
|
||||
xmm0 = _mm_cvtsi32_si128(*(uint32*)u_buf); \
|
||||
xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset)); \
|
||||
xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \
|
||||
@ -38,6 +49,8 @@ extern "C" {
|
||||
xmm4 = _mm_loadl_epi64((__m128i*)y_buf); \
|
||||
xmm4 = _mm_unpacklo_epi8(xmm4, xmm4); \
|
||||
y_buf += 8; \
|
||||
xmm5 = _mm_loadl_epi64((__m128i*)a_buf); \
|
||||
a_buf += 8;
|
||||
|
||||
// Convert 8 pixels: 8 UV and 8 Y.
|
||||
#define YUVTORGB(yuvconstants) \
|
||||
@ -78,9 +91,9 @@ extern "C" {
|
||||
xmm1 = _mm_loadu_si128(&xmm2); \
|
||||
xmm2 = _mm_unpacklo_epi16(xmm2, xmm0); \
|
||||
xmm1 = _mm_unpackhi_epi16(xmm1, xmm0); \
|
||||
_mm_storeu_si128((__m128i *)dst_argb, xmm2); \
|
||||
_mm_storeu_si128((__m128i *)(dst_argb + 16), xmm1); \
|
||||
dst_argb += 32;
|
||||
_mm_storeu_si128((__m128i *)dst_abgr, xmm2); \
|
||||
_mm_storeu_si128((__m128i *)(dst_abgr + 16), xmm1); \
|
||||
dst_abgr += 32;
|
||||
|
||||
|
||||
#if defined(HAS_I422TOARGBROW_SSSE3)
|
||||
@ -106,7 +119,7 @@ void I422ToARGBRow_SSSE3(const uint8* y_buf,
|
||||
void I422ToABGRRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
uint8* dst_abgr,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
__m128i xmm0, xmm1, xmm2, xmm4;
|
||||
@ -120,6 +133,45 @@ void I422ToABGRRow_SSSE3(const uint8* y_buf,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(HAS_I422ALPHATOARGBROW_SSSE3)
|
||||
void I422AlphaToARGBRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
const uint8* a_buf,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
__m128i xmm0, xmm1, xmm2, xmm4, xmm5;
|
||||
const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf;
|
||||
while (width > 0) {
|
||||
READYUVA422
|
||||
YUVTORGB(yuvconstants)
|
||||
STOREARGB
|
||||
width -= 8;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(HAS_I422ALPHATOABGRROW_SSSE3)
|
||||
void I422AlphaToABGRRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
const uint8* a_buf,
|
||||
uint8* dst_abgr,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
__m128i xmm0, xmm1, xmm2, xmm4, xmm5;
|
||||
const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf;
|
||||
while (width > 0) {
|
||||
READYUVA422
|
||||
YUVTORGB(yuvconstants)
|
||||
STOREABGR
|
||||
width -= 8;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// 32 bit
|
||||
#else // defined(_M_X64)
|
||||
#ifdef HAS_ARGBTOYROW_SSSE3
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user