From ef67597b4864fd57f6504efd45650d3d065a6afd Mon Sep 17 00:00:00 2001 From: "fbarchard@google.com" Date: Fri, 21 Nov 2014 19:25:14 +0000 Subject: [PATCH] ARGBMirror use SSE2 pshufd instruction instead of SSSE3 pshufb. BUG=269 TESTED=local benchmark for ARGBMirror R=tpsiaki@google.com Review URL: https://webrtc-codereview.appspot.com/32509004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@1176 16f28f9a-4ce2-e073-06de-1de4eb20be90 --- README.chromium | 2 +- include/libyuv/row.h | 6 +++--- include/libyuv/version.h | 2 +- source/planar_functions.cc | 8 ++++---- source/rotate_argb.cc | 8 ++++---- source/row_any.cc | 3 --- source/row_posix.cc | 21 ++++++++------------- source/row_win.cc | 14 ++++---------- 8 files changed, 25 insertions(+), 39 deletions(-) diff --git a/README.chromium b/README.chromium index 17a4ee79f..31193046f 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1175 +Version: 1176 License: BSD License File: LICENSE diff --git a/include/libyuv/row.h b/include/libyuv/row.h index 29b364607..471d66a41 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -68,7 +68,7 @@ extern "C" { #define HAS_ARGBCOPYYTOALPHAROW_SSE2 #define HAS_ARGBGRAYROW_SSSE3 #define HAS_ARGBLUMACOLORTABLEROW_SSSE3 -#define HAS_ARGBMIRRORROW_SSSE3 +#define HAS_ARGBMIRRORROW_SSE2 #define HAS_ARGBMULTIPLYROW_SSE2 #define HAS_ARGBPOLYNOMIALROW_SSE2 #define HAS_ARGBQUANTIZEROW_SSE2 @@ -836,11 +836,11 @@ void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width); void ARGBMirrorRow_AVX2(const uint8* src, uint8* dst, int width); -void ARGBMirrorRow_SSSE3(const uint8* src, uint8* dst, int width); +void ARGBMirrorRow_SSE2(const uint8* src, uint8* dst, int width); void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width); void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width); void ARGBMirrorRow_Any_AVX2(const uint8* src, uint8* dst, int width); -void ARGBMirrorRow_Any_SSSE3(const uint8* src, uint8* dst, int width); +void ARGBMirrorRow_Any_SSE2(const uint8* src, uint8* dst, int width); void ARGBMirrorRow_Any_NEON(const uint8* src, uint8* dst, int width); void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix); diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 9fb4864c7..dbfcfa9cf 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1175 +#define LIBYUV_VERSION 1176 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/planar_functions.cc b/source/planar_functions.cc index db8699cf3..d43913c6a 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -502,11 +502,11 @@ int ARGBMirror(const uint8* src_argb, int src_stride_argb, } } #endif -#if defined(HAS_ARGBMIRRORROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBMirrorRow = ARGBMirrorRow_Any_SSSE3; +#if defined(HAS_ARGBMIRRORROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + ARGBMirrorRow = ARGBMirrorRow_Any_SSE2; if (IS_ALIGNED(width, 4)) { - ARGBMirrorRow = ARGBMirrorRow_SSSE3; + ARGBMirrorRow = ARGBMirrorRow_SSE2; } } #endif diff --git a/source/rotate_argb.cc b/source/rotate_argb.cc index d65ba8c23..d86d0383b 100644 --- a/source/rotate_argb.cc +++ b/source/rotate_argb.cc @@ -109,11 +109,11 @@ void ARGBRotate180(const uint8* src, int src_stride, } } #endif -#if defined(HAS_ARGBMIRRORROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBMirrorRow = ARGBMirrorRow_Any_SSSE3; +#if defined(HAS_ARGBMIRRORROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + ARGBMirrorRow = ARGBMirrorRow_Any_SSE2; if (IS_ALIGNED(width, 4)) { - ARGBMirrorRow = ARGBMirrorRow_SSSE3; + ARGBMirrorRow = ARGBMirrorRow_SSE2; } } #endif diff --git a/source/row_any.cc b/source/row_any.cc index 54b117567..4dd9b5471 100644 --- a/source/row_any.cc +++ b/source/row_any.cc @@ -646,9 +646,6 @@ MANY(MirrorRow_Any_NEON, MirrorRow_NEON, MirrorRow_C, 1, 15) #ifdef HAS_ARGBMIRRORROW_AVX2 MANY(ARGBMirrorRow_Any_AVX2, ARGBMirrorRow_AVX2, ARGBMirrorRow_C, 4, 7) #endif -#ifdef HAS_ARGBMIRRORROW_SSSE3 -MANY(ARGBMirrorRow_Any_SSSE3, ARGBMirrorRow_SSSE3, ARGBMirrorRow_C, 4, 3) -#endif #ifdef HAS_ARGBMIRRORROW_SSE2 MANY(ARGBMirrorRow_Any_SSE2, ARGBMirrorRow_SSE2, ARGBMirrorRow_C, 4, 3) #endif diff --git a/source/row_posix.cc b/source/row_posix.cc index 949a8d120..da908cea2 100644 --- a/source/row_posix.cc +++ b/source/row_posix.cc @@ -2306,21 +2306,16 @@ void MirrorUVRow_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v, } #endif // HAS_MIRRORROW_UV_SSSE3 -#ifdef HAS_ARGBMIRRORROW_SSSE3 -// Shuffle table for reversing the bytes. -static uvec8 kARGBShuffleMirror = { - 12u, 13u, 14u, 15u, 8u, 9u, 10u, 11u, 4u, 5u, 6u, 7u, 0u, 1u, 2u, 3u -}; +#ifdef HAS_ARGBMIRRORROW_SSE2 -void ARGBMirrorRow_SSSE3(const uint8* src, uint8* dst, int width) { +void ARGBMirrorRow_SSE2(const uint8* src, uint8* dst, int width) { intptr_t temp_width = (intptr_t)(width); asm volatile ( "lea " MEMLEA4(-0x10,0,2,4) ",%0 \n" - "movdqa %3,%%xmm5 \n" LABELALIGN "1: \n" "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "pshufb %%xmm5,%%xmm0 \n" + "pshufd $0x1b,%%xmm0,%%xmm0 \n" "lea " MEMLEA(-0x10,0) ",%0 \n" "movdqu %%xmm0," MEMACCESS(1) " \n" "lea " MEMLEA(0x10,1) ",%1 \n" @@ -2332,11 +2327,11 @@ void ARGBMirrorRow_SSSE3(const uint8* src, uint8* dst, int width) { : "m"(kARGBShuffleMirror) // %3 : "memory", "cc" #if defined(__SSE2__) - , "xmm0", "xmm5" + , "xmm0" #endif ); } -#endif // HAS_ARGBMIRRORROW_SSSE3 +#endif // HAS_ARGBMIRRORROW_SSE2 #ifdef HAS_ARGBMIRRORROW_AVX2 // Shuffle table for reversing the bytes. @@ -2351,9 +2346,9 @@ void ARGBMirrorRow_AVX2(const uint8* src, uint8* dst, int width) { "1: \n" VMEMOPREG(vpermd,-0x20,0,2,4,ymm5,ymm0) // vpermd -0x20(%0,%2,4),ymm5,ymm0 "vmovdqu %%ymm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x20,1) ",%1 \n" - "sub $0x20,%2 \n" - "jg 1b \n" + "lea " MEMLEA(0x20,1) ",%1 \n" + "sub $0x20,%2 \n" + "jg 1b \n" "vzeroupper \n" : "+r"(src), // %0 "+r"(dst), // %1 diff --git a/source/row_win.cc b/source/row_win.cc index 3eb16ed3f..3cfea2158 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -2507,26 +2507,20 @@ void MirrorUVRow_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v, } #endif // HAS_MIRRORROW_UV_SSSE3 -#ifdef HAS_ARGBMIRRORROW_SSSE3 -// Shuffle table for reversing the bytes. -static const uvec8 kARGBShuffleMirror = { - 12u, 13u, 14u, 15u, 8u, 9u, 10u, 11u, 4u, 5u, 6u, 7u, 0u, 1u, 2u, 3u -}; - +#ifdef HAS_ARGBMIRRORROW_SSE2 __declspec(naked) __declspec(align(16)) -void ARGBMirrorRow_SSSE3(const uint8* src, uint8* dst, int width) { +void ARGBMirrorRow_SSE2(const uint8* src, uint8* dst, int width) { __asm { mov eax, [esp + 4] // src mov edx, [esp + 8] // dst mov ecx, [esp + 12] // width lea eax, [eax - 16 + ecx * 4] // last 4 pixels. - movdqa xmm5, kARGBShuffleMirror align 4 convertloop: movdqu xmm0, [eax] lea eax, [eax - 16] - pshufb xmm0, xmm5 + pshufd xmm0, xmm0, 0x1b movdqu [edx], xmm0 lea edx, [edx + 16] sub ecx, 4 @@ -2534,7 +2528,7 @@ void ARGBMirrorRow_SSSE3(const uint8* src, uint8* dst, int width) { ret } } -#endif // HAS_ARGBMIRRORROW_SSSE3 +#endif // HAS_ARGBMIRRORROW_SSE2 #ifdef HAS_ARGBMIRRORROW_AVX2 // Shuffle table for reversing the bytes.