mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
clang use scalewin
R=harryjin@google.com TBR=harryjin@google.com BUG=libyuv:469 Review URL: https://webrtc-codereview.appspot.com/51329004.
This commit is contained in:
parent
cda9d38a4e
commit
94d4269936
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: http://code.google.com/p/libyuv/
|
URL: http://code.google.com/p/libyuv/
|
||||||
Version: 1466
|
Version: 1467
|
||||||
License: BSD
|
License: BSD
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
|
|
||||||
|
|||||||
@ -23,12 +23,6 @@ extern "C" {
|
|||||||
#define LIBYUV_DISABLE_X86
|
#define LIBYUV_DISABLE_X86
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Visual C 2012 required for AVX2.
|
|
||||||
#if defined(_M_IX86) && !defined(__clang__) && \
|
|
||||||
defined(_MSC_VER) && _MSC_VER >= 1700
|
|
||||||
#define VISUALC_HAS_AVX2 1
|
|
||||||
#endif // VisualStudio >= 2012
|
|
||||||
|
|
||||||
// The following are available for Visual C and clangcl 32 bit:
|
// The following are available for Visual C and clangcl 32 bit:
|
||||||
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
|
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
|
||||||
#define HAS_TRANSPOSEWX8_SSSE3
|
#define HAS_TRANSPOSEWX8_SSSE3
|
||||||
|
|||||||
@ -30,6 +30,13 @@ extern "C" {
|
|||||||
#define VISUALC_HAS_AVX2 1
|
#define VISUALC_HAS_AVX2 1
|
||||||
#endif // VisualStudio >= 2012
|
#endif // VisualStudio >= 2012
|
||||||
|
|
||||||
|
// clang >= 3.4.0 required for AVX2.
|
||||||
|
#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
|
||||||
|
#if (__clang_major__ > 3) || (__clang_major__ == 3 && (__clang_minor__ >= 4))
|
||||||
|
#define CLANG_HAS_AVX2 1
|
||||||
|
#endif // clang >= 3.4
|
||||||
|
#endif // __clang__
|
||||||
|
|
||||||
// The following are available on all x86 platforms:
|
// The following are available on all x86 platforms:
|
||||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||||
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
|
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
|
||||||
@ -48,15 +55,16 @@ extern "C" {
|
|||||||
#define HAS_SCALEROWDOWN4_SSE2
|
#define HAS_SCALEROWDOWN4_SSE2
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// The following are available on VS2012:
|
// The following are available for Visual C and clangcl 32 bit:
|
||||||
#if !defined(LIBYUV_DISABLE_X86) && defined(VISUALC_HAS_AVX2)
|
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \
|
||||||
|
(defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2))
|
||||||
#define HAS_SCALEADDROW_AVX2
|
#define HAS_SCALEADDROW_AVX2
|
||||||
#define HAS_SCALEROWDOWN2_AVX2
|
#define HAS_SCALEROWDOWN2_AVX2
|
||||||
#define HAS_SCALEROWDOWN4_AVX2
|
#define HAS_SCALEROWDOWN4_AVX2
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// The following are available on Visual C:
|
// The following are available on Visual C:
|
||||||
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && !defined(__clang__)
|
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
|
||||||
#define HAS_SCALEADDROW_SSE2
|
#define HAS_SCALEADDROW_SSE2
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|||||||
@ -11,6 +11,6 @@
|
|||||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||||
#define INCLUDE_LIBYUV_VERSION_H_
|
#define INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
#define LIBYUV_VERSION 1466
|
#define LIBYUV_VERSION 1467
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||||
|
|||||||
@ -16,7 +16,8 @@ extern "C" {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
// This module is for GCC x86 and x64.
|
// This module is for GCC x86 and x64.
|
||||||
#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
|
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||||
|
(defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
|
||||||
|
|
||||||
// Offsets for source bytes 0 to 9
|
// Offsets for source bytes 0 to 9
|
||||||
static uvec8 kShuf0 =
|
static uvec8 kShuf0 =
|
||||||
|
|||||||
@ -16,9 +16,8 @@ namespace libyuv {
|
|||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// This module is for Visual C x86.
|
// This module is for 32 bit Visual C x86 and clangcl
|
||||||
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \
|
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
|
||||||
defined(_MSC_VER) && !defined(__clang__)
|
|
||||||
|
|
||||||
// Offsets for source bytes 0 to 9
|
// Offsets for source bytes 0 to 9
|
||||||
static uvec8 kShuf0 =
|
static uvec8 kShuf0 =
|
||||||
@ -499,9 +498,9 @@ void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
|||||||
// src_stride ignored
|
// src_stride ignored
|
||||||
mov edx, [esp + 12] // dst_ptr
|
mov edx, [esp + 12] // dst_ptr
|
||||||
mov ecx, [esp + 16] // dst_width
|
mov ecx, [esp + 16] // dst_width
|
||||||
movdqa xmm3, kShuf0
|
movdqa xmm3, xmmword ptr kShuf0
|
||||||
movdqa xmm4, kShuf1
|
movdqa xmm4, xmmword ptr kShuf1
|
||||||
movdqa xmm5, kShuf2
|
movdqa xmm5, xmmword ptr kShuf2
|
||||||
|
|
||||||
wloop:
|
wloop:
|
||||||
movdqu xmm0, [eax]
|
movdqu xmm0, [eax]
|
||||||
@ -548,12 +547,12 @@ void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,
|
|||||||
mov esi, [esp + 4 + 8] // src_stride
|
mov esi, [esp + 4 + 8] // src_stride
|
||||||
mov edx, [esp + 4 + 12] // dst_ptr
|
mov edx, [esp + 4 + 12] // dst_ptr
|
||||||
mov ecx, [esp + 4 + 16] // dst_width
|
mov ecx, [esp + 4 + 16] // dst_width
|
||||||
movdqa xmm2, kShuf01
|
movdqa xmm2, xmmword ptr kShuf01
|
||||||
movdqa xmm3, kShuf11
|
movdqa xmm3, xmmword ptr kShuf11
|
||||||
movdqa xmm4, kShuf21
|
movdqa xmm4, xmmword ptr kShuf21
|
||||||
movdqa xmm5, kMadd01
|
movdqa xmm5, xmmword ptr kMadd01
|
||||||
movdqa xmm6, kMadd11
|
movdqa xmm6, xmmword ptr kMadd11
|
||||||
movdqa xmm7, kRound34
|
movdqa xmm7, xmmword ptr kRound34
|
||||||
|
|
||||||
wloop:
|
wloop:
|
||||||
movdqu xmm0, [eax] // pixels 0..7
|
movdqu xmm0, [eax] // pixels 0..7
|
||||||
@ -579,7 +578,7 @@ void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,
|
|||||||
lea eax, [eax + 32]
|
lea eax, [eax + 32]
|
||||||
pavgb xmm0, xmm1
|
pavgb xmm0, xmm1
|
||||||
pshufb xmm0, xmm4
|
pshufb xmm0, xmm4
|
||||||
movdqa xmm1, kMadd21
|
movdqa xmm1, xmmword ptr kMadd21
|
||||||
pmaddubsw xmm0, xmm1
|
pmaddubsw xmm0, xmm1
|
||||||
paddsw xmm0, xmm7
|
paddsw xmm0, xmm7
|
||||||
psrlw xmm0, 2
|
psrlw xmm0, 2
|
||||||
@ -605,12 +604,12 @@ void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr,
|
|||||||
mov esi, [esp + 4 + 8] // src_stride
|
mov esi, [esp + 4 + 8] // src_stride
|
||||||
mov edx, [esp + 4 + 12] // dst_ptr
|
mov edx, [esp + 4 + 12] // dst_ptr
|
||||||
mov ecx, [esp + 4 + 16] // dst_width
|
mov ecx, [esp + 4 + 16] // dst_width
|
||||||
movdqa xmm2, kShuf01
|
movdqa xmm2, xmmword ptr kShuf01
|
||||||
movdqa xmm3, kShuf11
|
movdqa xmm3, xmmword ptr kShuf11
|
||||||
movdqa xmm4, kShuf21
|
movdqa xmm4, xmmword ptr kShuf21
|
||||||
movdqa xmm5, kMadd01
|
movdqa xmm5, xmmword ptr kMadd01
|
||||||
movdqa xmm6, kMadd11
|
movdqa xmm6, xmmword ptr kMadd11
|
||||||
movdqa xmm7, kRound34
|
movdqa xmm7, xmmword ptr kRound34
|
||||||
|
|
||||||
wloop:
|
wloop:
|
||||||
movdqu xmm0, [eax] // pixels 0..7
|
movdqu xmm0, [eax] // pixels 0..7
|
||||||
@ -639,7 +638,7 @@ void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr,
|
|||||||
pavgb xmm1, xmm0
|
pavgb xmm1, xmm0
|
||||||
pavgb xmm0, xmm1
|
pavgb xmm0, xmm1
|
||||||
pshufb xmm0, xmm4
|
pshufb xmm0, xmm4
|
||||||
movdqa xmm1, kMadd21
|
movdqa xmm1, xmmword ptr kMadd21
|
||||||
pmaddubsw xmm0, xmm1
|
pmaddubsw xmm0, xmm1
|
||||||
paddsw xmm0, xmm7
|
paddsw xmm0, xmm7
|
||||||
psrlw xmm0, 2
|
psrlw xmm0, 2
|
||||||
@ -665,8 +664,8 @@ void ScaleRowDown38_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
|||||||
// src_stride ignored
|
// src_stride ignored
|
||||||
mov edx, [esp + 12] // dst_ptr
|
mov edx, [esp + 12] // dst_ptr
|
||||||
mov ecx, [esp + 16] // dst_width
|
mov ecx, [esp + 16] // dst_width
|
||||||
movdqa xmm4, kShuf38a
|
movdqa xmm4, xmmword ptr kShuf38a
|
||||||
movdqa xmm5, kShuf38b
|
movdqa xmm5, xmmword ptr kShuf38b
|
||||||
|
|
||||||
xloop:
|
xloop:
|
||||||
movdqu xmm0, [eax] // 16 pixels -> 0,1,2,3,4,5
|
movdqu xmm0, [eax] // 16 pixels -> 0,1,2,3,4,5
|
||||||
@ -698,9 +697,9 @@ void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr,
|
|||||||
mov esi, [esp + 4 + 8] // src_stride
|
mov esi, [esp + 4 + 8] // src_stride
|
||||||
mov edx, [esp + 4 + 12] // dst_ptr
|
mov edx, [esp + 4 + 12] // dst_ptr
|
||||||
mov ecx, [esp + 4 + 16] // dst_width
|
mov ecx, [esp + 4 + 16] // dst_width
|
||||||
movdqa xmm2, kShufAc
|
movdqa xmm2, xmmword ptr kShufAc
|
||||||
movdqa xmm3, kShufAc3
|
movdqa xmm3, xmmword ptr kShufAc3
|
||||||
movdqa xmm4, kScaleAc33
|
movdqa xmm4, xmmword ptr kScaleAc33
|
||||||
pxor xmm5, xmm5
|
pxor xmm5, xmm5
|
||||||
|
|
||||||
xloop:
|
xloop:
|
||||||
@ -763,10 +762,10 @@ void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr,
|
|||||||
mov esi, [esp + 4 + 8] // src_stride
|
mov esi, [esp + 4 + 8] // src_stride
|
||||||
mov edx, [esp + 4 + 12] // dst_ptr
|
mov edx, [esp + 4 + 12] // dst_ptr
|
||||||
mov ecx, [esp + 4 + 16] // dst_width
|
mov ecx, [esp + 4 + 16] // dst_width
|
||||||
movdqa xmm2, kShufAb0
|
movdqa xmm2, xmmword ptr kShufAb0
|
||||||
movdqa xmm3, kShufAb1
|
movdqa xmm3, xmmword ptr kShufAb1
|
||||||
movdqa xmm4, kShufAb2
|
movdqa xmm4, xmmword ptr kShufAb2
|
||||||
movdqa xmm5, kScaleAb2
|
movdqa xmm5, xmmword ptr kScaleAb2
|
||||||
|
|
||||||
xloop:
|
xloop:
|
||||||
movdqu xmm0, [eax] // average 2 rows into xmm0
|
movdqu xmm0, [eax] // average 2 rows into xmm0
|
||||||
@ -1233,8 +1232,8 @@ void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,
|
|||||||
mov ecx, [esp + 8 + 12] // dst_width
|
mov ecx, [esp + 8 + 12] // dst_width
|
||||||
movd xmm2, [esp + 8 + 16] // x
|
movd xmm2, [esp + 8 + 16] // x
|
||||||
movd xmm3, [esp + 8 + 20] // dx
|
movd xmm3, [esp + 8 + 20] // dx
|
||||||
movdqa xmm4, kShuffleColARGB
|
movdqa xmm4, xmmword ptr kShuffleColARGB
|
||||||
movdqa xmm5, kShuffleFractions
|
movdqa xmm5, xmmword ptr kShuffleFractions
|
||||||
pcmpeqb xmm6, xmm6 // generate 0x007f for inverting fraction.
|
pcmpeqb xmm6, xmm6 // generate 0x007f for inverting fraction.
|
||||||
psrlw xmm6, 9
|
psrlw xmm6, 9
|
||||||
pextrw eax, xmm2, 1 // get x0 integer. preroll
|
pextrw eax, xmm2, 1 // get x0 integer. preroll
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user