From 0e4388aea3d075e31dd33f4d76fb0c728e7e0428 Mon Sep 17 00:00:00 2001 From: "fbarchard@google.com" Date: Tue, 17 Mar 2015 17:25:27 +0000 Subject: [PATCH] I422ToRGB24 AVX2 and I422ToRAW BUG=none TESTED=I422ToRGB24 unittest R=tpsiaki@google.com Review URL: https://webrtc-codereview.appspot.com/46619004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@1337 16f28f9a-4ce2-e073-06de-1de4eb20be90 --- README.chromium | 2 +- include/libyuv/row.h | 22 ++++++++++++++++++++ include/libyuv/version.h | 2 +- source/convert_from.cc | 16 +++++++++++++++ source/row_any.cc | 6 ++++++ source/row_common.cc | 44 ++++++++++++++++++++++++++++++++++++++++ 6 files changed, 90 insertions(+), 2 deletions(-) diff --git a/README.chromium b/README.chromium index 4985117e8..08892be3e 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1334 +Version: 1337 License: BSD License File: LICENSE diff --git a/include/libyuv/row.h b/include/libyuv/row.h index bbdbd7f38..1a46eef58 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -198,6 +198,8 @@ extern "C" { #define HAS_I422TORGB565ROW_AVX2 #define HAS_I422TOARGB1555ROW_AVX2 #define HAS_I422TOARGB4444ROW_AVX2 +#define HAS_I422TORGB24ROW_AVX2 +#define HAS_I422TORAWROW_AVX2 #define HAS_I444TOARGBROW_AVX2 #define HAS_I411TOARGBROW_AVX2 #define HAS_J400TOARGBROW_AVX2 @@ -1177,11 +1179,21 @@ void I422ToRGB24Row_SSSE3(const uint8* src_y, const uint8* src_v, uint8* dst_rgb24, int width); +void I422ToRGB24Row_AVX2(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_rgb24, + int width); void I422ToRAWRow_SSSE3(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_raw, int width); +void I422ToRAWRow_AVX2(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_raw, + int width); void I422ToARGBRow_Any_AVX2(const uint8* src_y, const uint8* src_u, const uint8* src_v, @@ -1331,11 +1343,21 @@ void I422ToRGB24Row_Any_SSSE3(const uint8* src_y, const uint8* src_v, uint8* dst_argb, int width); +void I422ToRGB24Row_Any_AVX2(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, + int width); void I422ToRAWRow_Any_SSSE3(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, int width); +void I422ToRAWRow_Any_AVX2(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, + int width); void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width); void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int width); diff --git a/include/libyuv/version.h b/include/libyuv/version.h index eb1c1ffae..7ce1c4134 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1334 +#define LIBYUV_VERSION 1337 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/convert_from.cc b/source/convert_from.cc index d1ee22a42..f78b54569 100644 --- a/source/convert_from.cc +++ b/source/convert_from.cc @@ -739,6 +739,14 @@ int I420ToRGB24(const uint8* src_y, int src_stride_y, } } #endif +#if defined(HAS_I422TORGB24ROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + I422ToRGB24Row = I422ToRGB24Row_Any_AVX2; + if (IS_ALIGNED(width, 16)) { + I422ToRGB24Row = I422ToRGB24Row_AVX2; + } + } +#endif #if defined(HAS_I422TORGB24ROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { I422ToRGB24Row = I422ToRGB24Row_Any_NEON; @@ -791,6 +799,14 @@ int I420ToRAW(const uint8* src_y, int src_stride_y, } } #endif +#if defined(HAS_I422TORAWROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + I422ToRAWRow = I422ToRAWRow_Any_AVX2; + if (IS_ALIGNED(width, 16)) { + I422ToRAWRow = I422ToRAWRow_AVX2; + } + } +#endif #if defined(HAS_I422TORAWROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { I422ToRAWRow = I422ToRAWRow_Any_NEON; diff --git a/source/row_any.cc b/source/row_any.cc index 66b7ad9f9..b57a01a8e 100644 --- a/source/row_any.cc +++ b/source/row_any.cc @@ -57,6 +57,12 @@ YANY(I422ToRAWRow_Any_SSSE3, I422ToRAWRow_SSSE3, I422ToRAWRow_C, 1, 3, 7) YANY(I422ToYUY2Row_Any_SSE2, I422ToYUY2Row_SSE2, I422ToYUY2Row_C, 1, 2, 15) YANY(I422ToUYVYRow_Any_SSE2, I422ToUYVYRow_SSE2, I422ToUYVYRow_C, 1, 2, 15) #endif // HAS_I444TOARGBROW_SSSE3 +#ifdef HAS_I422TORGB24ROW_AVX2 +YANY(I422ToRGB24Row_Any_AVX2, I422ToRGB24Row_AVX2, I422ToRGB24Row_C, 1, 3, 15) +#endif +#ifdef HAS_I422TORAWROW_AVX2 +YANY(I422ToRAWRow_Any_AVX2, I422ToRAWRow_AVX2, I422ToRAWRow_C, 1, 3, 15) +#endif #ifdef HAS_J422TOARGBROW_SSSE3 YANY(J422ToARGBRow_Any_SSSE3, J422ToARGBRow_SSSE3, J422ToARGBRow_C, 1, 4, 7) #endif diff --git a/source/row_common.cc b/source/row_common.cc index a65553d4e..669dc66f4 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -2370,6 +2370,50 @@ void I422ToARGB4444Row_AVX2(const uint8* src_y, } #endif +#if defined(HAS_I422TORGB24ROW_AVX2) +void I422ToRGB24Row_AVX2(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_rgb24, + int width) { + // Row buffer for intermediate ARGB pixels. + SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]); + while (width > 0) { + int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; + I422ToARGBRow_AVX2(src_y, src_u, src_v, row, twidth); + // TODO(fbarchard): ARGBToRGB24Row_AVX2 + ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth); + src_y += twidth; + src_u += twidth / 2; + src_v += twidth / 2; + dst_rgb24 += twidth * 3; + width -= twidth; + } +} +#endif + +#if defined(HAS_I422TORAWROW_AVX2) +void I422ToRAWRow_AVX2(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_raw, + int width) { + // Row buffer for intermediate ARGB pixels. + SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]); + while (width > 0) { + int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; + I422ToARGBRow_AVX2(src_y, src_u, src_v, row, twidth); + // TODO(fbarchard): ARGBToRAWRow_AVX2 + ARGBToRAWRow_SSSE3(row, dst_raw, twidth); + src_y += twidth; + src_u += twidth / 2; + src_v += twidth / 2; + dst_raw += twidth * 3; + width -= twidth; + } +} +#endif + #if defined(HAS_NV12TORGB565ROW_AVX2) void NV12ToRGB565Row_AVX2(const uint8* src_y, const uint8* src_uv, uint8* dst_rgb565, int width) {