diff --git a/README.chromium b/README.chromium index 477263b0f..576dd582c 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1494 +Version: 1495 License: BSD License File: LICENSE diff --git a/include/libyuv/row.h b/include/libyuv/row.h index fcd62b0f5..ea1323c70 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -187,6 +187,8 @@ extern "C" { (!defined(__clang__) || defined(__SSSE3__)) #define HAS_I422TOARGBROW_SSSE3 #define HAS_I422TOABGRROW_SSSE3 +#define HAS_I422ALPHATOARGBROW_SSSE3 +#define HAS_I422ALPHATOABGRROW_SSSE3 #endif // The following are available for AVX2 Visual C and clangcl 32 bit: diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 52305e256..e55cbcf6d 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1494 +#define LIBYUV_VERSION 1495 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/row_common.cc b/source/row_common.cc index 3c025f1a4..b810849d9 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -1353,6 +1353,88 @@ void I422ToARGBRow_C(const uint8* src_y, } } +void I422AlphaToARGBRow_C(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + const uint8* src_a, + uint8* rgb_buf, + struct YuvConstants* yuvconstants, + int width) { + int x; + for (x = 0; x < width - 1; x += 2) { + YuvPixel(src_y[0], src_u[0], src_v[0], + rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); + rgb_buf[3] = src_a[0]; + YuvPixel(src_y[1], src_u[0], src_v[0], + rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants); + rgb_buf[7] = src_a[1]; + src_y += 2; + src_u += 1; + src_v += 1; + src_a += 2; + rgb_buf += 8; // Advance 2 pixels. + } + if (width & 1) { + YuvPixel(src_y[0], src_u[0], src_v[0], + rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); + rgb_buf[3] = src_a[0]; + } +} + +void I422ToABGRRow_C(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* rgb_buf, + struct YuvConstants* yuvconstants, + int width) { + int x; + for (x = 0; x < width - 1; x += 2) { + YuvPixel(src_y[0], src_u[0], src_v[0], + rgb_buf + 2, rgb_buf + 1, rgb_buf + 0, yuvconstants); + rgb_buf[3] = 255; + YuvPixel(src_y[1], src_u[0], src_v[0], + rgb_buf + 6, rgb_buf + 5, rgb_buf + 4, yuvconstants); + rgb_buf[7] = 255; + src_y += 2; + src_u += 1; + src_v += 1; + rgb_buf += 8; // Advance 2 pixels. + } + if (width & 1) { + YuvPixel(src_y[0], src_u[0], src_v[0], + rgb_buf + 2, rgb_buf + 1, rgb_buf + 0, yuvconstants); + rgb_buf[3] = 255; + } +} + +void I422AlphaToABGRRow_C(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + const uint8* src_a, + uint8* rgb_buf, + struct YuvConstants* yuvconstants, + int width) { + int x; + for (x = 0; x < width - 1; x += 2) { + YuvPixel(src_y[0], src_u[0], src_v[0], + rgb_buf + 2, rgb_buf + 1, rgb_buf + 0, yuvconstants); + rgb_buf[3] = src_a[0]; + YuvPixel(src_y[1], src_u[0], src_v[0], + rgb_buf + 6, rgb_buf + 5, rgb_buf + 4, yuvconstants); + rgb_buf[7] = src_a[1]; + src_y += 2; + src_u += 1; + src_v += 1; + src_a += 2; + rgb_buf += 8; // Advance 2 pixels. + } + if (width & 1) { + YuvPixel(src_y[0], src_u[0], src_v[0], + rgb_buf + 2, rgb_buf + 1, rgb_buf + 0, yuvconstants); + rgb_buf[3] = src_a[0]; + } +} + void I422ToRGB24Row_C(const uint8* src_y, const uint8* src_u, const uint8* src_v, @@ -1687,32 +1769,6 @@ void I422ToBGRARow_C(const uint8* src_y, } } -void I422ToABGRRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* rgb_buf, - struct YuvConstants* yuvconstants, - int width) { - int x; - for (x = 0; x < width - 1; x += 2) { - YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 2, rgb_buf + 1, rgb_buf + 0, yuvconstants); - rgb_buf[3] = 255; - YuvPixel(src_y[1], src_u[0], src_v[0], - rgb_buf + 6, rgb_buf + 5, rgb_buf + 4, yuvconstants); - rgb_buf[7] = 255; - src_y += 2; - src_u += 1; - src_v += 1; - rgb_buf += 8; // Advance 2 pixels. - } - if (width & 1) { - YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 2, rgb_buf + 1, rgb_buf + 0, yuvconstants); - rgb_buf[3] = 255; - } -} - void I422ToRGBARow_C(const uint8* src_y, const uint8* src_u, const uint8* src_v, @@ -2412,29 +2468,6 @@ void I422ToRGB565Row_SSSE3(const uint8* src_y, } #endif -void I422AlphaToARGBRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - const uint8* src_a, - uint8* dst_argb, - struct YuvConstants* yuvconstants, - int width) { - - I422ToARGBRow_C(src_y, src_u, src_v, dst_argb, &kYuvConstants, width); - ARGBCopyYToAlphaRow_C(src_a, dst_argb, width); -} - -void I422AlphaToABGRRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - const uint8* src_a, - uint8* dst_abgr, - struct YuvConstants* yuvconstants, - int width) { - I422ToABGRRow_C(src_y, src_u, src_v, dst_abgr, &kYuvConstants, width); - ARGBCopyYToAlphaRow_C(src_a, dst_abgr, width); -} - #if defined(HAS_I422TOARGB1555ROW_SSSE3) void I422ToARGB1555Row_SSSE3(const uint8* src_y, const uint8* src_u, diff --git a/source/row_win.cc b/source/row_win.cc index 64c029ce2..e5b27b835 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -30,6 +30,17 @@ extern "C" { // Read 4 UV from 422, upsample to 8 UV. #define READYUV422 \ + xmm0 = _mm_cvtsi32_si128(*(uint32*)u_buf); \ + xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset)); \ + xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \ + xmm0 = _mm_unpacklo_epi16(xmm0, xmm0); \ + u_buf += 4; \ + xmm4 = _mm_loadl_epi64((__m128i*)y_buf); \ + xmm4 = _mm_unpacklo_epi8(xmm4, xmm4); \ + y_buf += 8; + +// Read 4 UV from 422, upsample to 8 UV. With 8 Alpha. +#define READYUVA422 \ xmm0 = _mm_cvtsi32_si128(*(uint32*)u_buf); \ xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset)); \ xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \ @@ -38,6 +49,8 @@ extern "C" { xmm4 = _mm_loadl_epi64((__m128i*)y_buf); \ xmm4 = _mm_unpacklo_epi8(xmm4, xmm4); \ y_buf += 8; \ + xmm5 = _mm_loadl_epi64((__m128i*)a_buf); \ + a_buf += 8; // Convert 8 pixels: 8 UV and 8 Y. #define YUVTORGB(yuvconstants) \ @@ -78,9 +91,9 @@ extern "C" { xmm1 = _mm_loadu_si128(&xmm2); \ xmm2 = _mm_unpacklo_epi16(xmm2, xmm0); \ xmm1 = _mm_unpackhi_epi16(xmm1, xmm0); \ - _mm_storeu_si128((__m128i *)dst_argb, xmm2); \ - _mm_storeu_si128((__m128i *)(dst_argb + 16), xmm1); \ - dst_argb += 32; + _mm_storeu_si128((__m128i *)dst_abgr, xmm2); \ + _mm_storeu_si128((__m128i *)(dst_abgr + 16), xmm1); \ + dst_abgr += 32; #if defined(HAS_I422TOARGBROW_SSSE3) @@ -106,7 +119,7 @@ void I422ToARGBRow_SSSE3(const uint8* y_buf, void I422ToABGRRow_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, - uint8* dst_argb, + uint8* dst_abgr, struct YuvConstants* yuvconstants, int width) { __m128i xmm0, xmm1, xmm2, xmm4; @@ -120,6 +133,45 @@ void I422ToABGRRow_SSSE3(const uint8* y_buf, } } #endif + +#if defined(HAS_I422ALPHATOARGBROW_SSSE3) +void I422AlphaToARGBRow_SSSE3(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + const uint8* a_buf, + uint8* dst_argb, + struct YuvConstants* yuvconstants, + int width) { + __m128i xmm0, xmm1, xmm2, xmm4, xmm5; + const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; + while (width > 0) { + READYUVA422 + YUVTORGB(yuvconstants) + STOREARGB + width -= 8; + } +} +#endif + +#if defined(HAS_I422ALPHATOABGRROW_SSSE3) +void I422AlphaToABGRRow_SSSE3(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + const uint8* a_buf, + uint8* dst_abgr, + struct YuvConstants* yuvconstants, + int width) { + __m128i xmm0, xmm1, xmm2, xmm4, xmm5; + const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; + while (width > 0) { + READYUVA422 + YUVTORGB(yuvconstants) + STOREABGR + width -= 8; + } +} +#endif + // 32 bit #else // defined(_M_X64) #ifdef HAS_ARGBTOYROW_SSSE3