mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-08 01:36:47 +08:00
Simplify constraints on asm yuv scale columns for benefit of android intel build.
BUG=none TEST=try bots R=ryanpetrie@google.com Review URL: https://webrtc-codereview.appspot.com/3989005 git-svn-id: http://libyuv.googlecode.com/svn/trunk@857 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
ff74e023e1
commit
e812e86ea9
10
Makefile
10
Makefile
@ -1,5 +1,5 @@
|
||||
# This is a generic makefile for libyuv for gcc.
|
||||
# Caveat: This file will get overwritten by GYP if projects are generated
|
||||
# Caveat: This file will get overwritten by GYP if projects are generated
|
||||
# with GYP_GENERATORS=make
|
||||
|
||||
CC=g++
|
||||
@ -34,15 +34,15 @@ LOCAL_OBJ_FILES := \
|
||||
.cc.o:
|
||||
$(CC) -c $(CCFLAGS) $*.cc -o $*.o
|
||||
|
||||
all: libyuv.a convert
|
||||
all: libyuv.a convert Makefile
|
||||
|
||||
libyuv.a: $(LOCAL_OBJ_FILES)
|
||||
libyuv.a: $(LOCAL_OBJ_FILES) Makefile
|
||||
$(AR) $(ARFLAGS) -o $@ $(LOCAL_OBJ_FILES)
|
||||
|
||||
# A test utility that uses libyuv conversion.
|
||||
convert: util/convert.cc
|
||||
convert: util/convert.cc Makefile
|
||||
$(CC) $(CCFLAGS) -Iutil/ -o $@ util/convert.cc libyuv.a
|
||||
|
||||
clean:
|
||||
/bin/rm -f *.o libyuv.a convert
|
||||
/bin/rm -f source/*.o libyuv.a convert
|
||||
|
||||
|
||||
@ -145,22 +145,26 @@ extern "C" {
|
||||
#define HAS_YUY2TOYROW_SSE2
|
||||
#endif
|
||||
|
||||
// The following are available on all x86 platforms, including NaCL, but
|
||||
// require VS2012, clang, gcc4.7 or NaCL.
|
||||
// Caveat: llvm 3.1 required, but does not provide a version.
|
||||
// AVX2 functions available on all x86 platforms, but not NaCL, and
|
||||
// require VS2012, clang 3.4 or gcc 4.7.
|
||||
#if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
|
||||
/* Test for GCC >= 4.7.0 */
|
||||
#if (__GNUC__ > 4) || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7))
|
||||
#define GCC_HAS_AVX2 1
|
||||
#endif // GNUC >= 4.7
|
||||
#endif // __GNUC__
|
||||
// TODO(fbarchard): Test with new NaCL tool chain. Change __native_client__AVX2
|
||||
// to __native_client__ to test.
|
||||
|
||||
#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
|
||||
/* Test for clang >= 3.4.0 */
|
||||
#if (__clang_major__ > 3) || (__clang_major__ == 3 && (__clang_minor__ >= 4))
|
||||
#define CLANG_HAS_AVX2 1
|
||||
#endif // GNUC >= 4.7
|
||||
#endif // __GNUC__
|
||||
|
||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||
(defined(__x86_64__) || defined(__i386__)) && \
|
||||
((defined(_M_IX86) && defined(_MSC_VER) && _MSC_VER >= 1700) || \
|
||||
((defined(__x86_64__) || defined(__i386__)) && \
|
||||
(defined(__native_client__AVX2) || defined(__clang__) || \
|
||||
defined(GCC_HAS_AVX2))))
|
||||
defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
|
||||
// Effects:
|
||||
#define HAS_ARGBPOLYNOMIALROW_AVX2
|
||||
#define HAS_ARGBSHUFFLEROW_AVX2
|
||||
|
||||
@ -1689,12 +1689,12 @@ static void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
|
||||
asm volatile (
|
||||
"movd %6,%%xmm2 \n"
|
||||
"movd %7,%%xmm3 \n"
|
||||
"movl $0x04040000,%k5 \n"
|
||||
"movd %k5,%%xmm5 \n"
|
||||
"movl $0x04040000,%k2 \n"
|
||||
"movd %k2,%%xmm5 \n"
|
||||
"pcmpeqb %%xmm6,%%xmm6 \n"
|
||||
"psrlw $0x9,%%xmm6 \n"
|
||||
"pextrw $0x1,%%xmm2,%k3 \n"
|
||||
"subl $0x2,%2 \n"
|
||||
"subl $0x2,%5 \n"
|
||||
"jl 29f \n"
|
||||
"movdqa %%xmm2,%%xmm0 \n"
|
||||
"paddd %%xmm3,%%xmm0 \n"
|
||||
@ -1706,11 +1706,11 @@ static void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
|
||||
"2: \n"
|
||||
"movdqa %%xmm2,%%xmm1 \n"
|
||||
"paddd %%xmm3,%%xmm2 \n"
|
||||
"movzwl (%1,%3,1),%k5 \n"
|
||||
"movd %k5,%%xmm0 \n"
|
||||
"movzwl (%1,%3,1),%k2 \n"
|
||||
"movd %k2,%%xmm0 \n"
|
||||
"psrlw $0x9,%%xmm1 \n"
|
||||
"movzwl (%1,%4,1),%k5 \n"
|
||||
"movd %k5,%%xmm4 \n"
|
||||
"movzwl (%1,%4,1),%k2 \n"
|
||||
"movd %k2,%%xmm4 \n"
|
||||
"pshufb %%xmm5,%%xmm1 \n"
|
||||
"punpcklwd %%xmm4,%%xmm0 \n"
|
||||
"pxor %%xmm6,%%xmm1 \n"
|
||||
@ -1719,32 +1719,32 @@ static void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
|
||||
"pextrw $0x3,%%xmm2,%k4 \n"
|
||||
"psrlw $0x7,%%xmm0 \n"
|
||||
"packuswb %%xmm0,%%xmm0 \n"
|
||||
"movd %%xmm0,%k5 \n"
|
||||
"mov %w5,(%0) \n"
|
||||
"movd %%xmm0,%k2 \n"
|
||||
"mov %w2,(%0) \n"
|
||||
"lea 0x2(%0),%0 \n"
|
||||
"sub $0x2,%2 \n"
|
||||
"sub $0x2,%5 \n"
|
||||
"jge 2b \n"
|
||||
".p2align 2 \n"
|
||||
"29: \n"
|
||||
"addl $0x1,%2 \n"
|
||||
"addl $0x1,%5 \n"
|
||||
"jl 99f \n"
|
||||
"movzwl (%1,%3,1),%k5 \n"
|
||||
"movd %k5,%%xmm0 \n"
|
||||
"movzwl (%1,%3,1),%k2 \n"
|
||||
"movd %k2,%%xmm0 \n"
|
||||
"psrlw $0x9,%%xmm1 \n"
|
||||
"pshufb %%xmm5,%%xmm1 \n"
|
||||
"pxor %%xmm6,%%xmm1 \n"
|
||||
"pmaddubsw %%xmm1,%%xmm0 \n"
|
||||
"psrlw $0x7,%%xmm0 \n"
|
||||
"packuswb %%xmm0,%%xmm0 \n"
|
||||
"movd %%xmm0,%k5 \n"
|
||||
"mov %b5,(%0) \n"
|
||||
"movd %%xmm0,%k2 \n"
|
||||
"mov %b2,(%0) \n"
|
||||
"99: \n"
|
||||
: "+r"(dst_ptr), // %0
|
||||
"+r"(src_ptr), // %1
|
||||
"+rm"(dst_width), // %2
|
||||
"+a"(x0), // %3
|
||||
"+d"(x1), // %4
|
||||
"+b"(temp_pixel) // %5
|
||||
"+a"(temp_pixel), // %2
|
||||
"+r"(x0), // %3
|
||||
"+r"(x1), // %4
|
||||
"+rm"(dst_width) // %5
|
||||
: "rm"(x), // %6
|
||||
"rm"(dx) // %7
|
||||
: "memory", "cc"
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user