From 94d4269936472587ca201df4287b6c5a22dc85ff Mon Sep 17 00:00:00 2001 From: Frank Barchard Date: Tue, 18 Aug 2015 14:50:27 -0700 Subject: [PATCH] clang use scalewin R=harryjin@google.com TBR=harryjin@google.com BUG=libyuv:469 Review URL: https://webrtc-codereview.appspot.com/51329004. --- README.chromium | 2 +- include/libyuv/rotate_row.h | 6 ---- include/libyuv/scale_row.h | 14 +++++++-- include/libyuv/version.h | 2 +- source/scale_gcc.cc | 3 +- source/scale_win.cc | 61 ++++++++++++++++++------------------- 6 files changed, 45 insertions(+), 43 deletions(-) diff --git a/README.chromium b/README.chromium index 08dcf5119..915bf54e6 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1466 +Version: 1467 License: BSD License File: LICENSE diff --git a/include/libyuv/rotate_row.h b/include/libyuv/rotate_row.h index 37ebc7401..020cf2173 100644 --- a/include/libyuv/rotate_row.h +++ b/include/libyuv/rotate_row.h @@ -23,12 +23,6 @@ extern "C" { #define LIBYUV_DISABLE_X86 #endif -// Visual C 2012 required for AVX2. -#if defined(_M_IX86) && !defined(__clang__) && \ - defined(_MSC_VER) && _MSC_VER >= 1700 -#define VISUALC_HAS_AVX2 1 -#endif // VisualStudio >= 2012 - // The following are available for Visual C and clangcl 32 bit: #if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) #define HAS_TRANSPOSEWX8_SSSE3 diff --git a/include/libyuv/scale_row.h b/include/libyuv/scale_row.h index 94ad9cf86..e7594841b 100644 --- a/include/libyuv/scale_row.h +++ b/include/libyuv/scale_row.h @@ -30,6 +30,13 @@ extern "C" { #define VISUALC_HAS_AVX2 1 #endif // VisualStudio >= 2012 +// clang >= 3.4.0 required for AVX2. +#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__)) +#if (__clang_major__ > 3) || (__clang_major__ == 3 && (__clang_minor__ >= 4)) +#define CLANG_HAS_AVX2 1 +#endif // clang >= 3.4 +#endif // __clang__ + // The following are available on all x86 platforms: #if !defined(LIBYUV_DISABLE_X86) && \ (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) @@ -48,15 +55,16 @@ extern "C" { #define HAS_SCALEROWDOWN4_SSE2 #endif -// The following are available on VS2012: -#if !defined(LIBYUV_DISABLE_X86) && defined(VISUALC_HAS_AVX2) +// The following are available for Visual C and clangcl 32 bit: +#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \ + (defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2)) #define HAS_SCALEADDROW_AVX2 #define HAS_SCALEROWDOWN2_AVX2 #define HAS_SCALEROWDOWN4_AVX2 #endif // The following are available on Visual C: -#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && !defined(__clang__) +#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) #define HAS_SCALEADDROW_SSE2 #endif diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 7fec0c728..1b75a69da 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1466 +#define LIBYUV_VERSION 1467 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/scale_gcc.cc b/source/scale_gcc.cc index 8a6ac5459..43b68fa0a 100644 --- a/source/scale_gcc.cc +++ b/source/scale_gcc.cc @@ -16,7 +16,8 @@ extern "C" { #endif // This module is for GCC x86 and x64. -#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__)) +#if !defined(LIBYUV_DISABLE_X86) && \ + (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER))) // Offsets for source bytes 0 to 9 static uvec8 kShuf0 = diff --git a/source/scale_win.cc b/source/scale_win.cc index c3896ebad..f48a4ee76 100644 --- a/source/scale_win.cc +++ b/source/scale_win.cc @@ -16,9 +16,8 @@ namespace libyuv { extern "C" { #endif -// This module is for Visual C x86. -#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \ - defined(_MSC_VER) && !defined(__clang__) +// This module is for 32 bit Visual C x86 and clangcl +#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) // Offsets for source bytes 0 to 9 static uvec8 kShuf0 = @@ -499,9 +498,9 @@ void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, // src_stride ignored mov edx, [esp + 12] // dst_ptr mov ecx, [esp + 16] // dst_width - movdqa xmm3, kShuf0 - movdqa xmm4, kShuf1 - movdqa xmm5, kShuf2 + movdqa xmm3, xmmword ptr kShuf0 + movdqa xmm4, xmmword ptr kShuf1 + movdqa xmm5, xmmword ptr kShuf2 wloop: movdqu xmm0, [eax] @@ -548,12 +547,12 @@ void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr, mov esi, [esp + 4 + 8] // src_stride mov edx, [esp + 4 + 12] // dst_ptr mov ecx, [esp + 4 + 16] // dst_width - movdqa xmm2, kShuf01 - movdqa xmm3, kShuf11 - movdqa xmm4, kShuf21 - movdqa xmm5, kMadd01 - movdqa xmm6, kMadd11 - movdqa xmm7, kRound34 + movdqa xmm2, xmmword ptr kShuf01 + movdqa xmm3, xmmword ptr kShuf11 + movdqa xmm4, xmmword ptr kShuf21 + movdqa xmm5, xmmword ptr kMadd01 + movdqa xmm6, xmmword ptr kMadd11 + movdqa xmm7, xmmword ptr kRound34 wloop: movdqu xmm0, [eax] // pixels 0..7 @@ -579,7 +578,7 @@ void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr, lea eax, [eax + 32] pavgb xmm0, xmm1 pshufb xmm0, xmm4 - movdqa xmm1, kMadd21 + movdqa xmm1, xmmword ptr kMadd21 pmaddubsw xmm0, xmm1 paddsw xmm0, xmm7 psrlw xmm0, 2 @@ -605,12 +604,12 @@ void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr, mov esi, [esp + 4 + 8] // src_stride mov edx, [esp + 4 + 12] // dst_ptr mov ecx, [esp + 4 + 16] // dst_width - movdqa xmm2, kShuf01 - movdqa xmm3, kShuf11 - movdqa xmm4, kShuf21 - movdqa xmm5, kMadd01 - movdqa xmm6, kMadd11 - movdqa xmm7, kRound34 + movdqa xmm2, xmmword ptr kShuf01 + movdqa xmm3, xmmword ptr kShuf11 + movdqa xmm4, xmmword ptr kShuf21 + movdqa xmm5, xmmword ptr kMadd01 + movdqa xmm6, xmmword ptr kMadd11 + movdqa xmm7, xmmword ptr kRound34 wloop: movdqu xmm0, [eax] // pixels 0..7 @@ -639,7 +638,7 @@ void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr, pavgb xmm1, xmm0 pavgb xmm0, xmm1 pshufb xmm0, xmm4 - movdqa xmm1, kMadd21 + movdqa xmm1, xmmword ptr kMadd21 pmaddubsw xmm0, xmm1 paddsw xmm0, xmm7 psrlw xmm0, 2 @@ -665,8 +664,8 @@ void ScaleRowDown38_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, // src_stride ignored mov edx, [esp + 12] // dst_ptr mov ecx, [esp + 16] // dst_width - movdqa xmm4, kShuf38a - movdqa xmm5, kShuf38b + movdqa xmm4, xmmword ptr kShuf38a + movdqa xmm5, xmmword ptr kShuf38b xloop: movdqu xmm0, [eax] // 16 pixels -> 0,1,2,3,4,5 @@ -698,9 +697,9 @@ void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr, mov esi, [esp + 4 + 8] // src_stride mov edx, [esp + 4 + 12] // dst_ptr mov ecx, [esp + 4 + 16] // dst_width - movdqa xmm2, kShufAc - movdqa xmm3, kShufAc3 - movdqa xmm4, kScaleAc33 + movdqa xmm2, xmmword ptr kShufAc + movdqa xmm3, xmmword ptr kShufAc3 + movdqa xmm4, xmmword ptr kScaleAc33 pxor xmm5, xmm5 xloop: @@ -763,10 +762,10 @@ void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr, mov esi, [esp + 4 + 8] // src_stride mov edx, [esp + 4 + 12] // dst_ptr mov ecx, [esp + 4 + 16] // dst_width - movdqa xmm2, kShufAb0 - movdqa xmm3, kShufAb1 - movdqa xmm4, kShufAb2 - movdqa xmm5, kScaleAb2 + movdqa xmm2, xmmword ptr kShufAb0 + movdqa xmm3, xmmword ptr kShufAb1 + movdqa xmm4, xmmword ptr kShufAb2 + movdqa xmm5, xmmword ptr kScaleAb2 xloop: movdqu xmm0, [eax] // average 2 rows into xmm0 @@ -1233,8 +1232,8 @@ void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb, mov ecx, [esp + 8 + 12] // dst_width movd xmm2, [esp + 8 + 16] // x movd xmm3, [esp + 8 + 20] // dx - movdqa xmm4, kShuffleColARGB - movdqa xmm5, kShuffleFractions + movdqa xmm4, xmmword ptr kShuffleColARGB + movdqa xmm5, xmmword ptr kShuffleFractions pcmpeqb xmm6, xmm6 // generate 0x007f for inverting fraction. psrlw xmm6, 9 pextrw eax, xmm2, 1 // get x0 integer. preroll