diff --git a/README.chromium b/README.chromium index db900167e..04262982d 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 589 +Version: 590 License: BSD License File: LICENSE diff --git a/include/libyuv/row.h b/include/libyuv/row.h index b7cbd6e29..6c202834a 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -137,6 +137,7 @@ extern "C" { #define HAS_UYVYTOUV422ROW_AVX2 #define HAS_UYVYTOUVROW_AVX2 #define HAS_UYVYTOYROW_AVX2 +#define HAS_HALFROW_AVX2 // Effects #define HAS_ARGBATTENUATEROW_AVX2 @@ -1269,6 +1270,8 @@ void HalfRow_C(const uint8* src_uv, int src_uv_stride, uint8* dst_uv, int pix); void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride, uint8* dst_uv, int pix); +void HalfRow_AVX2(const uint8* src_uv, int src_uv_stride, + uint8* dst_uv, int pix); void HalfRow_NEON(const uint8* src_uv, int src_uv_stride, uint8* dst_uv, int pix); diff --git a/include/libyuv/version.h b/include/libyuv/version.h index ba05e2442..9566ad398 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 589 +#define LIBYUV_VERSION 590 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/convert.cc b/source/convert.cc index e3b024929..8f63fe7a5 100644 --- a/source/convert.cc +++ b/source/convert.cc @@ -97,7 +97,15 @@ int I422ToI420(const uint8* src_y, int src_stride_y, IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) { HalfRow = HalfRow_SSE2; } -#elif defined(HAS_HALFROW_NEON) +#endif +#if defined(HAS_HALFROW_AVX2) + bool clear = false; + if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(halfwidth, 32)) { + clear = true; + HalfRow = HalfRow_AVX2; + } +#endif +#if defined(HAS_HALFROW_NEON) if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(halfwidth, 16)) { HalfRow = HalfRow_NEON; } @@ -128,6 +136,11 @@ int I422ToI420(const uint8* src_y, int src_stride_y, if (height & 1) { HalfRow(src_v, 0, dst_v, halfwidth); } +#if defined(HAS_HALFROW_AVX2) + if (clear) { + __asm vzeroupper; + } +#endif return 0; } diff --git a/source/row_win.cc b/source/row_win.cc index a05828c5e..ea773c721 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -5531,6 +5531,32 @@ void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride, } } +#ifdef HAS_HALFROW_AVX2 +__declspec(naked) __declspec(align(16)) +void HalfRow_AVX2(const uint8* src_uv, int src_uv_stride, + uint8* dst_uv, int pix) { + __asm { + push edi + mov eax, [esp + 4 + 4] // src_uv + mov edx, [esp + 4 + 8] // src_uv_stride + mov edi, [esp + 4 + 12] // dst_v + mov ecx, [esp + 4 + 16] // pix + sub edi, eax + + align 16 + convertloop: + vmovdqu ymm0, [eax] + vpavgb ymm0, ymm0, [eax + edx] + sub ecx, 32 + vmovdqu [eax + edi], ymm0 + lea eax, [eax + 32] + jg convertloop + pop edi + ret + } +} +#endif // HAS_HALFROW_AVX2 + __declspec(naked) __declspec(align(16)) void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer, uint32 selector, int pix) {