From 35508d09796c8186c5b94992fdbaed79a6314f37 Mon Sep 17 00:00:00 2001 From: "fbarchard@google.com" Date: Thu, 13 Nov 2014 23:11:10 +0000 Subject: [PATCH] Mirror_AVX2 ported to GCC. BUG=269 TESTED=try bots R=harryjin@google.com Review URL: https://webrtc-codereview.appspot.com/32079004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@1164 16f28f9a-4ce2-e073-06de-1de4eb20be90 --- include/libyuv/row.h | 4 ++-- source/row_posix.cc | 31 +++++++++++++++++++++++++++++++ source/row_win.cc | 8 ++------ 3 files changed, 35 insertions(+), 8 deletions(-) diff --git a/include/libyuv/row.h b/include/libyuv/row.h index 941be842c..ffd837030 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -199,6 +199,7 @@ extern "C" { #define HAS_UYVYTOUVROW_AVX2 #define HAS_SPLITUVROW_AVX2 #define HAS_MERGEUVROW_AVX2 +#define HAS_MIRRORROW_AVX2 // Effects: #define HAS_ARGBADDROW_AVX2 @@ -206,6 +207,7 @@ extern "C" { #define HAS_ARGBMULTIPLYROW_AVX2 #define HAS_ARGBATTENUATEROW_AVX2 #define HAS_ARGBUNATTENUATEROW_AVX2 +#define HAS_ARGBMIRRORROW_AVX2 #endif // The following are require VS2012. @@ -218,8 +220,6 @@ extern "C" { #define HAS_I422TORGBAROW_AVX2 #define HAS_I422TOABGRROW_AVX2 #define HAS_INTERPOLATEROW_AVX2 -#define HAS_MIRRORROW_AVX2 -#define HAS_ARGBMIRRORROW_AVX2 #endif // defined(VISUALC_HAS_AVX2) // The following are Yasm x86 only: diff --git a/source/row_posix.cc b/source/row_posix.cc index 77047a28c..18927600b 100644 --- a/source/row_posix.cc +++ b/source/row_posix.cc @@ -2207,6 +2207,37 @@ void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) { } #endif // HAS_MIRRORROW_SSSE3 +#ifdef HAS_MIRRORROW_AVX2 +void MirrorRow_AVX2(const uint8* src, uint8* dst, int width) { + intptr_t temp_width = (intptr_t)(width); + asm volatile ( + "vbroadcastf128 %3,%%ymm5 \n" + "lea " MEMLEA(-0x20,0) ",%0 \n" + LABELALIGN + "1: \n" + MEMOPREG(vmovdqu,0x00,0,2,1,ymm0) // vmovdqu (%0,%2),%%ymm0 + "vpshufb %%ymm5,%%ymm0,%%ymm0 \n" + "vpermq $0x4e,%%ymm0,%%ymm0 \n" + "sub $0x20,%2 \n" + "vmovdqu %%ymm0," MEMACCESS(1) " \n" + "lea " MEMLEA(0x20,1) ",%1 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(temp_width) // %2 + : "m"(kShuffleMirror) // %3 + : "memory", "cc" +#if defined(__native_client__) && defined(__x86_64__) + , "r14" +#endif +#if defined(__SSE2__) + , "xmm0", "xmm5" +#endif + ); +} +#endif // HAS_MIRRORROW_AVX2 + #ifdef HAS_MIRRORROW_SSE2 void MirrorRow_SSE2(const uint8* src, uint8* dst, int width) { intptr_t temp_width = (intptr_t)(width); diff --git a/source/row_win.cc b/source/row_win.cc index d07dc620e..6edc6be77 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -2398,6 +2398,7 @@ static const uvec8 kShuffleMirror = { 15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u }; +// TODO(fbarchard): Replace lea with -16 offset. __declspec(naked) __declspec(align(16)) void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) { __asm { @@ -2421,18 +2422,13 @@ void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) { #endif // HAS_MIRRORROW_SSSE3 #ifdef HAS_MIRRORROW_AVX2 -// Shuffle table for reversing the bytes. -static const uvec8 kShuffleMirror_AVX2 = { - 15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u -}; - __declspec(naked) __declspec(align(16)) void MirrorRow_AVX2(const uint8* src, uint8* dst, int width) { __asm { mov eax, [esp + 4] // src mov edx, [esp + 8] // dst mov ecx, [esp + 12] // width - vbroadcastf128 ymm5, kShuffleMirror_AVX2 + vbroadcastf128 ymm5, kShuffleMirror lea eax, [eax - 32] align 4