From 51398e0be5004b8818df4a4ccda9fe77bcfaf141 Mon Sep 17 00:00:00 2001 From: "fbarchard@google.com" Date: Wed, 6 Mar 2013 00:57:48 +0000 Subject: [PATCH] ARGBMirror AVX2 BUG=none TEST=out\release\libyuv_unittest --gtest_filter=*ARGBMirror* Review URL: https://webrtc-codereview.appspot.com/1159005 git-svn-id: http://libyuv.googlecode.com/svn/trunk@594 16f28f9a-4ce2-e073-06de-1de4eb20be90 --- README.chromium | 2 +- include/libyuv/row.h | 2 ++ include/libyuv/version.h | 2 +- source/convert_from.cc | 32 +++++++++++++++++++------------- source/planar_functions.cc | 16 +++++++++++++++- source/rotate_argb.cc | 15 ++++++++++++++- source/row_win.cc | 27 +++++++++++++++++++++++++++ 7 files changed, 79 insertions(+), 17 deletions(-) diff --git a/README.chromium b/README.chromium index 04262982d..8f1dccf34 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 590 +Version: 594 License: BSD License File: LICENSE diff --git a/include/libyuv/row.h b/include/libyuv/row.h index d8bda6c77..119c2aa22 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -139,6 +139,7 @@ extern "C" { #define HAS_UYVYTOYROW_AVX2 #define HAS_HALFROW_AVX2 #define HAS_MIRRORROW_AVX2 +#define HAS_ARGBMIRRORROW_AVX2 // Effects #define HAS_ARGBATTENUATEROW_AVX2 @@ -574,6 +575,7 @@ void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width); +void ARGBMirrorRow_AVX2(const uint8* src, uint8* dst, int width); void ARGBMirrorRow_SSSE3(const uint8* src, uint8* dst, int width); void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width); void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width); diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 9566ad398..a3c224299 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 590 +#define LIBYUV_VERSION 594 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/convert_from.cc b/source/convert_from.cc index 6949f5933..c3b05f607 100644 --- a/source/convert_from.cc +++ b/source/convert_from.cc @@ -50,23 +50,29 @@ int I420ToI422(const uint8* src_y, int src_stride_y, } int halfwidth = (width + 1) >> 1; void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C; +#if defined(HAS_COPYROW_X86) + if (IS_ALIGNED(halfwidth, 4)) { + CopyRow = CopyRow_X86; + } +#endif +#if defined(HAS_COPYROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(halfwidth, 32) && + IS_ALIGNED(src_u, 16) && IS_ALIGNED(src_stride_u, 16) && + IS_ALIGNED(src_v, 16) && IS_ALIGNED(src_stride_v, 16) && + IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) && + IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) { + CopyRow = CopyRow_SSE2; + } +#endif +#if defined(HAS_COPYROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + CopyRow = CopyRow_AVX2; + } +#endif #if defined(HAS_COPYROW_NEON) if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(halfwidth, 32)) { CopyRow = CopyRow_NEON; } -#elif defined(HAS_COPYROW_X86) - if (IS_ALIGNED(halfwidth, 4)) { - CopyRow = CopyRow_X86; -#if defined(HAS_COPYROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(halfwidth, 32) && - IS_ALIGNED(src_u, 16) && IS_ALIGNED(src_stride_u, 16) && - IS_ALIGNED(src_v, 16) && IS_ALIGNED(src_stride_v, 16) && - IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) && - IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) { - CopyRow = CopyRow_SSE2; - } -#endif - } #endif #if defined(HAS_COPYROW_MIPS) if (TestCpuFlag(kCpuHasMIPS)) { diff --git a/source/planar_functions.cc b/source/planar_functions.cc index ef6f4edb8..0b5f7823b 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -448,7 +448,15 @@ int ARGBMirror(const uint8* src_argb, int src_stride_argb, IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { ARGBMirrorRow = ARGBMirrorRow_SSSE3; } -#elif defined(HAS_ARGBMIRRORROW_NEON) +#endif +#if defined(HAS_ARGBMIRRORROW_AVX2) + bool clear = false; + if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 8)) { + clear = true; + ARGBMirrorRow = ARGBMirrorRow_AVX2; + } +#endif +#if defined(HAS_ARGBMIRRORROW_NEON) if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 4)) { ARGBMirrorRow = ARGBMirrorRow_NEON; } @@ -460,6 +468,12 @@ int ARGBMirror(const uint8* src_argb, int src_stride_argb, src_argb += src_stride_argb; dst_argb += dst_stride_argb; } + +#if defined(HAS_ARGBMIRRORROW_AVX2) + if (clear) { + __asm vzeroupper; + } +#endif return 0; } diff --git a/source/rotate_argb.cc b/source/rotate_argb.cc index 20406f5d5..cccfb9b48 100644 --- a/source/rotate_argb.cc +++ b/source/rotate_argb.cc @@ -99,7 +99,15 @@ void ARGBRotate180(const uint8* src, int src_stride, IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) { ARGBMirrorRow = ARGBMirrorRow_SSSE3; } -#elif defined(HAS_ARGBMIRRORROW_NEON) +#endif +#if defined(HAS_ARGBMIRRORROW_AVX2) + bool clear = false; + if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 8)) { + clear = true; + ARGBMirrorRow = ARGBMirrorRow_AVX2; + } +#endif +#if defined(HAS_ARGBMIRRORROW_NEON) if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 4)) { ARGBMirrorRow = ARGBMirrorRow_NEON; } @@ -151,6 +159,11 @@ void ARGBRotate180(const uint8* src, int src_stride, src_bot -= src_stride; dst_bot -= dst_stride; } +#if defined(HAS_ARGBMIRRORROW_AVX2) + if (clear) { + __asm vzeroupper; + } +#endif } LIBYUV_API diff --git a/source/row_win.cc b/source/row_win.cc index 272ac7320..07f1f035c 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -3056,6 +3056,33 @@ void ARGBMirrorRow_SSSE3(const uint8* src, uint8* dst, int width) { } #endif // HAS_ARGBMIRRORROW_SSSE3 +#ifdef HAS_ARGBMIRRORROW_AVX2 +// Shuffle table for reversing the bytes. +static const ulvec32 kARGBShuffleMirror_AVX2 = { + 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u +}; + +__declspec(naked) __declspec(align(16)) +void ARGBMirrorRow_AVX2(const uint8* src, uint8* dst, int width) { + __asm { + mov eax, [esp + 4] // src + mov edx, [esp + 8] // dst + mov ecx, [esp + 12] // width + lea eax, [eax - 32] + vmovdqa ymm5, kARGBShuffleMirror_AVX2 + + align 16 + convertloop: + vpermd ymm0, ymm5, [eax + ecx * 4] // permute dword order + sub ecx, 8 + vmovdqu [edx], ymm0 + lea edx, [edx + 32] + jg convertloop + ret + } +} +#endif // HAS_ARGBMIRRORROW_AVX2 + #ifdef HAS_SPLITUVROW_SSE2 __declspec(naked) __declspec(align(16)) void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {