mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
clang use scalewin
R=harryjin@google.com TBR=harryjin@google.com BUG=libyuv:469 Review URL: https://webrtc-codereview.appspot.com/51329004.
This commit is contained in:
parent
cda9d38a4e
commit
94d4269936
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 1466
|
||||
Version: 1467
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -23,12 +23,6 @@ extern "C" {
|
||||
#define LIBYUV_DISABLE_X86
|
||||
#endif
|
||||
|
||||
// Visual C 2012 required for AVX2.
|
||||
#if defined(_M_IX86) && !defined(__clang__) && \
|
||||
defined(_MSC_VER) && _MSC_VER >= 1700
|
||||
#define VISUALC_HAS_AVX2 1
|
||||
#endif // VisualStudio >= 2012
|
||||
|
||||
// The following are available for Visual C and clangcl 32 bit:
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
|
||||
#define HAS_TRANSPOSEWX8_SSSE3
|
||||
|
||||
@ -30,6 +30,13 @@ extern "C" {
|
||||
#define VISUALC_HAS_AVX2 1
|
||||
#endif // VisualStudio >= 2012
|
||||
|
||||
// clang >= 3.4.0 required for AVX2.
|
||||
#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
|
||||
#if (__clang_major__ > 3) || (__clang_major__ == 3 && (__clang_minor__ >= 4))
|
||||
#define CLANG_HAS_AVX2 1
|
||||
#endif // clang >= 3.4
|
||||
#endif // __clang__
|
||||
|
||||
// The following are available on all x86 platforms:
|
||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
|
||||
@ -48,15 +55,16 @@ extern "C" {
|
||||
#define HAS_SCALEROWDOWN4_SSE2
|
||||
#endif
|
||||
|
||||
// The following are available on VS2012:
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined(VISUALC_HAS_AVX2)
|
||||
// The following are available for Visual C and clangcl 32 bit:
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \
|
||||
(defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2))
|
||||
#define HAS_SCALEADDROW_AVX2
|
||||
#define HAS_SCALEROWDOWN2_AVX2
|
||||
#define HAS_SCALEROWDOWN4_AVX2
|
||||
#endif
|
||||
|
||||
// The following are available on Visual C:
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && !defined(__clang__)
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
|
||||
#define HAS_SCALEADDROW_SSE2
|
||||
#endif
|
||||
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 1466
|
||||
#define LIBYUV_VERSION 1467
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||
|
||||
@ -16,7 +16,8 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
// This module is for GCC x86 and x64.
|
||||
#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
|
||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||
(defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
|
||||
|
||||
// Offsets for source bytes 0 to 9
|
||||
static uvec8 kShuf0 =
|
||||
|
||||
@ -16,9 +16,8 @@ namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// This module is for Visual C x86.
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \
|
||||
defined(_MSC_VER) && !defined(__clang__)
|
||||
// This module is for 32 bit Visual C x86 and clangcl
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
|
||||
|
||||
// Offsets for source bytes 0 to 9
|
||||
static uvec8 kShuf0 =
|
||||
@ -499,9 +498,9 @@ void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
// src_stride ignored
|
||||
mov edx, [esp + 12] // dst_ptr
|
||||
mov ecx, [esp + 16] // dst_width
|
||||
movdqa xmm3, kShuf0
|
||||
movdqa xmm4, kShuf1
|
||||
movdqa xmm5, kShuf2
|
||||
movdqa xmm3, xmmword ptr kShuf0
|
||||
movdqa xmm4, xmmword ptr kShuf1
|
||||
movdqa xmm5, xmmword ptr kShuf2
|
||||
|
||||
wloop:
|
||||
movdqu xmm0, [eax]
|
||||
@ -548,12 +547,12 @@ void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,
|
||||
mov esi, [esp + 4 + 8] // src_stride
|
||||
mov edx, [esp + 4 + 12] // dst_ptr
|
||||
mov ecx, [esp + 4 + 16] // dst_width
|
||||
movdqa xmm2, kShuf01
|
||||
movdqa xmm3, kShuf11
|
||||
movdqa xmm4, kShuf21
|
||||
movdqa xmm5, kMadd01
|
||||
movdqa xmm6, kMadd11
|
||||
movdqa xmm7, kRound34
|
||||
movdqa xmm2, xmmword ptr kShuf01
|
||||
movdqa xmm3, xmmword ptr kShuf11
|
||||
movdqa xmm4, xmmword ptr kShuf21
|
||||
movdqa xmm5, xmmword ptr kMadd01
|
||||
movdqa xmm6, xmmword ptr kMadd11
|
||||
movdqa xmm7, xmmword ptr kRound34
|
||||
|
||||
wloop:
|
||||
movdqu xmm0, [eax] // pixels 0..7
|
||||
@ -579,7 +578,7 @@ void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,
|
||||
lea eax, [eax + 32]
|
||||
pavgb xmm0, xmm1
|
||||
pshufb xmm0, xmm4
|
||||
movdqa xmm1, kMadd21
|
||||
movdqa xmm1, xmmword ptr kMadd21
|
||||
pmaddubsw xmm0, xmm1
|
||||
paddsw xmm0, xmm7
|
||||
psrlw xmm0, 2
|
||||
@ -605,12 +604,12 @@ void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr,
|
||||
mov esi, [esp + 4 + 8] // src_stride
|
||||
mov edx, [esp + 4 + 12] // dst_ptr
|
||||
mov ecx, [esp + 4 + 16] // dst_width
|
||||
movdqa xmm2, kShuf01
|
||||
movdqa xmm3, kShuf11
|
||||
movdqa xmm4, kShuf21
|
||||
movdqa xmm5, kMadd01
|
||||
movdqa xmm6, kMadd11
|
||||
movdqa xmm7, kRound34
|
||||
movdqa xmm2, xmmword ptr kShuf01
|
||||
movdqa xmm3, xmmword ptr kShuf11
|
||||
movdqa xmm4, xmmword ptr kShuf21
|
||||
movdqa xmm5, xmmword ptr kMadd01
|
||||
movdqa xmm6, xmmword ptr kMadd11
|
||||
movdqa xmm7, xmmword ptr kRound34
|
||||
|
||||
wloop:
|
||||
movdqu xmm0, [eax] // pixels 0..7
|
||||
@ -639,7 +638,7 @@ void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr,
|
||||
pavgb xmm1, xmm0
|
||||
pavgb xmm0, xmm1
|
||||
pshufb xmm0, xmm4
|
||||
movdqa xmm1, kMadd21
|
||||
movdqa xmm1, xmmword ptr kMadd21
|
||||
pmaddubsw xmm0, xmm1
|
||||
paddsw xmm0, xmm7
|
||||
psrlw xmm0, 2
|
||||
@ -665,8 +664,8 @@ void ScaleRowDown38_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
// src_stride ignored
|
||||
mov edx, [esp + 12] // dst_ptr
|
||||
mov ecx, [esp + 16] // dst_width
|
||||
movdqa xmm4, kShuf38a
|
||||
movdqa xmm5, kShuf38b
|
||||
movdqa xmm4, xmmword ptr kShuf38a
|
||||
movdqa xmm5, xmmword ptr kShuf38b
|
||||
|
||||
xloop:
|
||||
movdqu xmm0, [eax] // 16 pixels -> 0,1,2,3,4,5
|
||||
@ -698,9 +697,9 @@ void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr,
|
||||
mov esi, [esp + 4 + 8] // src_stride
|
||||
mov edx, [esp + 4 + 12] // dst_ptr
|
||||
mov ecx, [esp + 4 + 16] // dst_width
|
||||
movdqa xmm2, kShufAc
|
||||
movdqa xmm3, kShufAc3
|
||||
movdqa xmm4, kScaleAc33
|
||||
movdqa xmm2, xmmword ptr kShufAc
|
||||
movdqa xmm3, xmmword ptr kShufAc3
|
||||
movdqa xmm4, xmmword ptr kScaleAc33
|
||||
pxor xmm5, xmm5
|
||||
|
||||
xloop:
|
||||
@ -763,10 +762,10 @@ void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr,
|
||||
mov esi, [esp + 4 + 8] // src_stride
|
||||
mov edx, [esp + 4 + 12] // dst_ptr
|
||||
mov ecx, [esp + 4 + 16] // dst_width
|
||||
movdqa xmm2, kShufAb0
|
||||
movdqa xmm3, kShufAb1
|
||||
movdqa xmm4, kShufAb2
|
||||
movdqa xmm5, kScaleAb2
|
||||
movdqa xmm2, xmmword ptr kShufAb0
|
||||
movdqa xmm3, xmmword ptr kShufAb1
|
||||
movdqa xmm4, xmmword ptr kShufAb2
|
||||
movdqa xmm5, xmmword ptr kScaleAb2
|
||||
|
||||
xloop:
|
||||
movdqu xmm0, [eax] // average 2 rows into xmm0
|
||||
@ -1233,8 +1232,8 @@ void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,
|
||||
mov ecx, [esp + 8 + 12] // dst_width
|
||||
movd xmm2, [esp + 8 + 16] // x
|
||||
movd xmm3, [esp + 8 + 20] // dx
|
||||
movdqa xmm4, kShuffleColARGB
|
||||
movdqa xmm5, kShuffleFractions
|
||||
movdqa xmm4, xmmword ptr kShuffleColARGB
|
||||
movdqa xmm5, xmmword ptr kShuffleFractions
|
||||
pcmpeqb xmm6, xmm6 // generate 0x007f for inverting fraction.
|
||||
psrlw xmm6, 9
|
||||
pextrw eax, xmm2, 1 // get x0 integer. preroll
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user