mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2026-01-01 03:12:16 +08:00
Enable AlphaBlend SSSE3 code for NaCL
BUG=253 TESTED=validator R=nfullagar@chromium.org, ryanpetrie@google.com Review URL: https://webrtc-codereview.appspot.com/1968004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@744 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
f2aa91a1ac
commit
f8a86cb095
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 743
|
||||
Version: 744
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -27,8 +27,7 @@ extern "C" {
|
||||
#define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a) - 1)))
|
||||
|
||||
#if defined(__CLR_VER) || defined(COVERAGE_ENABLED) || \
|
||||
defined(TARGET_IPHONE_SIMULATOR) || \
|
||||
(defined(__native_client__) && defined(__x86_64__))
|
||||
defined(TARGET_IPHONE_SIMULATOR)
|
||||
#define LIBYUV_DISABLE_X86
|
||||
#endif
|
||||
// True if compiling for SSSE3 as a requirement.
|
||||
@ -36,9 +35,17 @@ extern "C" {
|
||||
#define LIBYUV_SSSE3_ONLY
|
||||
#endif
|
||||
|
||||
// The following are available on all x86 platforms:
|
||||
// The following are available on all x86 platforms, including NaCL:
|
||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
|
||||
#define HAS_ARGBBLENDROW_SSSE3
|
||||
#endif
|
||||
|
||||
// The following are available on all x86 platforms except NaCL x64:
|
||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) && \
|
||||
!(defined(__native_client__) && defined(__x86_64__))
|
||||
|
||||
// Conversions.
|
||||
#define HAS_ABGRTOUVROW_SSSE3
|
||||
#define HAS_ABGRTOYROW_SSSE3
|
||||
@ -108,7 +115,6 @@ extern "C" {
|
||||
#define HAS_ARGBADDROW_SSE2
|
||||
#define HAS_ARGBAFFINEROW_SSE2
|
||||
#define HAS_ARGBATTENUATEROW_SSSE3
|
||||
#define HAS_ARGBBLENDROW_SSSE3
|
||||
#define HAS_ARGBCOLORMATRIXROW_SSSE3
|
||||
#define HAS_ARGBGRAYROW_SSSE3
|
||||
#define HAS_ARGBMIRRORROW_SSSE3
|
||||
@ -180,10 +186,13 @@ extern "C" {
|
||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) && \
|
||||
!defined(LIBYUV_SSSE3_ONLY)
|
||||
#define HAS_ARGBATTENUATEROW_SSE2
|
||||
// Available with NaCL:
|
||||
#define HAS_ARGBBLENDROW_SSE2
|
||||
#if !(defined(__native_client__) && defined(__x86_64__))
|
||||
#define HAS_ARGBATTENUATEROW_SSE2
|
||||
#define HAS_MIRRORROW_SSE2
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// The following are available on Neon platforms
|
||||
#if !defined(LIBYUV_DISABLE_NEON) && \
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 743
|
||||
#define LIBYUV_VERSION 744
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||
|
||||
@ -20,6 +20,14 @@ extern "C" {
|
||||
// This module is for GCC x86 and x64
|
||||
#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
|
||||
|
||||
#if defined(__native_client__) && defined(__x86_64__)
|
||||
#define MEMACCESS(x) "%%nacl:(%%r15,%q" #x ")"
|
||||
#define MEMLEA(x, y) #x "(%q" #y ")"
|
||||
#else
|
||||
#define MEMACCESS(x) "(%" #x ")"
|
||||
#define MEMLEA(x, y) #x "(%" #y ")"
|
||||
#endif
|
||||
|
||||
#ifdef HAS_ARGBTOYROW_SSSE3
|
||||
|
||||
// Constants for ARGB
|
||||
@ -3494,19 +3502,19 @@ void ARGBBlendRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
|
||||
"10: \n"
|
||||
"test $0xf,%2 \n"
|
||||
"je 19f \n"
|
||||
"movd (%0),%%xmm3 \n"
|
||||
"lea 0x4(%0),%0 \n"
|
||||
"movd "MEMACCESS(0)",%%xmm3 \n"
|
||||
"lea "MEMLEA(0x4,0)",%0 \n"
|
||||
"movdqa %%xmm3,%%xmm0 \n"
|
||||
"pxor %%xmm4,%%xmm3 \n"
|
||||
"movd (%1),%%xmm2 \n"
|
||||
"movd "MEMACCESS(1)",%%xmm2 \n"
|
||||
"psrlw $0x8,%%xmm3 \n"
|
||||
"pshufhw $0xf5,%%xmm3,%%xmm3 \n"
|
||||
"pshuflw $0xf5,%%xmm3,%%xmm3 \n"
|
||||
"pand %%xmm6,%%xmm2 \n"
|
||||
"paddw %%xmm7,%%xmm3 \n"
|
||||
"pmullw %%xmm3,%%xmm2 \n"
|
||||
"movd (%1),%%xmm1 \n"
|
||||
"lea 0x4(%1),%1 \n"
|
||||
"movd "MEMACCESS(1)",%%xmm1 \n"
|
||||
"lea "MEMLEA(0x4,1)",%1 \n"
|
||||
"psrlw $0x8,%%xmm1 \n"
|
||||
"por %%xmm4,%%xmm0 \n"
|
||||
"pmullw %%xmm3,%%xmm1 \n"
|
||||
@ -3515,8 +3523,8 @@ void ARGBBlendRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
|
||||
"pand %%xmm5,%%xmm1 \n"
|
||||
"paddusb %%xmm1,%%xmm0 \n"
|
||||
"sub $0x1,%3 \n"
|
||||
"movd %%xmm0,(%2) \n"
|
||||
"lea 0x4(%2),%2 \n"
|
||||
"movd %%xmm0,"MEMACCESS(2)" \n"
|
||||
"lea "MEMLEA(0x4,2)",%2 \n"
|
||||
"jge 10b \n"
|
||||
|
||||
"19: \n"
|
||||
@ -3526,19 +3534,19 @@ void ARGBBlendRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
|
||||
// 4 pixel loop.
|
||||
".p2align 2 \n"
|
||||
"41: \n"
|
||||
"movdqu (%0),%%xmm3 \n"
|
||||
"lea 0x10(%0),%0 \n"
|
||||
"movdqu "MEMACCESS(0)",%%xmm3 \n"
|
||||
"lea "MEMLEA(0x10,0)",%0 \n"
|
||||
"movdqa %%xmm3,%%xmm0 \n"
|
||||
"pxor %%xmm4,%%xmm3 \n"
|
||||
"movdqu (%1),%%xmm2 \n"
|
||||
"movdqu "MEMACCESS(1)",%%xmm2 \n"
|
||||
"psrlw $0x8,%%xmm3 \n"
|
||||
"pshufhw $0xf5,%%xmm3,%%xmm3 \n"
|
||||
"pshuflw $0xf5,%%xmm3,%%xmm3 \n"
|
||||
"pand %%xmm6,%%xmm2 \n"
|
||||
"paddw %%xmm7,%%xmm3 \n"
|
||||
"pmullw %%xmm3,%%xmm2 \n"
|
||||
"movdqu (%1),%%xmm1 \n"
|
||||
"lea 0x10(%1),%1 \n"
|
||||
"movdqu "MEMACCESS(1)",%%xmm1 \n"
|
||||
"lea "MEMLEA(0x10,1)",%1 \n"
|
||||
"psrlw $0x8,%%xmm1 \n"
|
||||
"por %%xmm4,%%xmm0 \n"
|
||||
"pmullw %%xmm3,%%xmm1 \n"
|
||||
@ -3547,8 +3555,8 @@ void ARGBBlendRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
|
||||
"pand %%xmm5,%%xmm1 \n"
|
||||
"paddusb %%xmm1,%%xmm0 \n"
|
||||
"sub $0x4,%3 \n"
|
||||
"movdqa %%xmm0,(%2) \n"
|
||||
"lea 0x10(%2),%2 \n"
|
||||
"movdqa %%xmm0,"MEMACCESS(2)" \n"
|
||||
"lea "MEMLEA(0x10,2)",%2 \n"
|
||||
"jge 41b \n"
|
||||
|
||||
"49: \n"
|
||||
@ -3557,19 +3565,19 @@ void ARGBBlendRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
|
||||
|
||||
// 1 pixel loop.
|
||||
"91: \n"
|
||||
"movd (%0),%%xmm3 \n"
|
||||
"lea 0x4(%0),%0 \n"
|
||||
"movd "MEMACCESS(0)",%%xmm3 \n"
|
||||
"lea "MEMLEA(0x4,0)",%0 \n"
|
||||
"movdqa %%xmm3,%%xmm0 \n"
|
||||
"pxor %%xmm4,%%xmm3 \n"
|
||||
"movd (%1),%%xmm2 \n"
|
||||
"movd "MEMACCESS(1)",%%xmm2 \n"
|
||||
"psrlw $0x8,%%xmm3 \n"
|
||||
"pshufhw $0xf5,%%xmm3,%%xmm3 \n"
|
||||
"pshuflw $0xf5,%%xmm3,%%xmm3 \n"
|
||||
"pand %%xmm6,%%xmm2 \n"
|
||||
"paddw %%xmm7,%%xmm3 \n"
|
||||
"pmullw %%xmm3,%%xmm2 \n"
|
||||
"movd (%1),%%xmm1 \n"
|
||||
"lea 0x4(%1),%1 \n"
|
||||
"movd "MEMACCESS(1)",%%xmm1 \n"
|
||||
"lea "MEMLEA(0x4,1)",%1 \n"
|
||||
"psrlw $0x8,%%xmm1 \n"
|
||||
"por %%xmm4,%%xmm0 \n"
|
||||
"pmullw %%xmm3,%%xmm1 \n"
|
||||
@ -3578,8 +3586,8 @@ void ARGBBlendRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
|
||||
"pand %%xmm5,%%xmm1 \n"
|
||||
"paddusb %%xmm1,%%xmm0 \n"
|
||||
"sub $0x1,%3 \n"
|
||||
"movd %%xmm0,(%2) \n"
|
||||
"lea 0x4(%2),%2 \n"
|
||||
"movd %%xmm0,"MEMACCESS(2)" \n"
|
||||
"lea "MEMLEA(0x4,2)",%2 \n"
|
||||
"jge 91b \n"
|
||||
"99: \n"
|
||||
: "+r"(src_argb0), // %0
|
||||
@ -3631,17 +3639,17 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
|
||||
"10: \n"
|
||||
"test $0xf,%2 \n"
|
||||
"je 19f \n"
|
||||
"movd (%0),%%xmm3 \n"
|
||||
"lea 0x4(%0),%0 \n"
|
||||
"movd "MEMACCESS(0)",%%xmm3 \n"
|
||||
"lea "MEMLEA(0x4,0)",%0 \n"
|
||||
"movdqa %%xmm3,%%xmm0 \n"
|
||||
"pxor %%xmm4,%%xmm3 \n"
|
||||
"movd (%1),%%xmm2 \n"
|
||||
"movd "MEMACCESS(1)",%%xmm2 \n"
|
||||
"pshufb %4,%%xmm3 \n"
|
||||
"pand %%xmm6,%%xmm2 \n"
|
||||
"paddw %%xmm7,%%xmm3 \n"
|
||||
"pmullw %%xmm3,%%xmm2 \n"
|
||||
"movd (%1),%%xmm1 \n"
|
||||
"lea 0x4(%1),%1 \n"
|
||||
"movd "MEMACCESS(1)",%%xmm1 \n"
|
||||
"lea "MEMLEA(0x4,1)",%1 \n"
|
||||
"psrlw $0x8,%%xmm1 \n"
|
||||
"por %%xmm4,%%xmm0 \n"
|
||||
"pmullw %%xmm3,%%xmm1 \n"
|
||||
@ -3650,8 +3658,8 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
|
||||
"pand %%xmm5,%%xmm1 \n"
|
||||
"paddusb %%xmm1,%%xmm0 \n"
|
||||
"sub $0x1,%3 \n"
|
||||
"movd %%xmm0,(%2) \n"
|
||||
"lea 0x4(%2),%2 \n"
|
||||
"movd %%xmm0,"MEMACCESS(2)" \n"
|
||||
"lea "MEMLEA(0x4,2)",%2 \n"
|
||||
"jge 10b \n"
|
||||
|
||||
"19: \n"
|
||||
@ -3665,17 +3673,17 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
|
||||
// 4 pixel loop.
|
||||
".p2align 2 \n"
|
||||
"40: \n"
|
||||
"movdqa (%0),%%xmm3 \n"
|
||||
"lea 0x10(%0),%0 \n"
|
||||
"movdqa "MEMACCESS(0)",%%xmm3 \n"
|
||||
"lea "MEMLEA(0x10,0)",%0 \n"
|
||||
"movdqa %%xmm3,%%xmm0 \n"
|
||||
"pxor %%xmm4,%%xmm3 \n"
|
||||
"movdqa (%1),%%xmm2 \n"
|
||||
"movdqa "MEMACCESS(1)",%%xmm2 \n"
|
||||
"pshufb %4,%%xmm3 \n"
|
||||
"pand %%xmm6,%%xmm2 \n"
|
||||
"paddw %%xmm7,%%xmm3 \n"
|
||||
"pmullw %%xmm3,%%xmm2 \n"
|
||||
"movdqa (%1),%%xmm1 \n"
|
||||
"lea 0x10(%1),%1 \n"
|
||||
"movdqa "MEMACCESS(1)",%%xmm1 \n"
|
||||
"lea "MEMLEA(0x10,1)",%1 \n"
|
||||
"psrlw $0x8,%%xmm1 \n"
|
||||
"por %%xmm4,%%xmm0 \n"
|
||||
"pmullw %%xmm3,%%xmm1 \n"
|
||||
@ -3684,25 +3692,25 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
|
||||
"pand %%xmm5,%%xmm1 \n"
|
||||
"paddusb %%xmm1,%%xmm0 \n"
|
||||
"sub $0x4,%3 \n"
|
||||
"movdqa %%xmm0,(%2) \n"
|
||||
"lea 0x10(%2),%2 \n"
|
||||
"movdqa %%xmm0,"MEMACCESS(2)" \n"
|
||||
"lea "MEMLEA(0x10,2)",%2 \n"
|
||||
"jge 40b \n"
|
||||
"jmp 49f \n"
|
||||
|
||||
// 4 pixel unaligned loop.
|
||||
".p2align 2 \n"
|
||||
"41: \n"
|
||||
"movdqu (%0),%%xmm3 \n"
|
||||
"lea 0x10(%0),%0 \n"
|
||||
"movdqu "MEMACCESS(0)",%%xmm3 \n"
|
||||
"lea "MEMLEA(0x10,0)",%0 \n"
|
||||
"movdqa %%xmm3,%%xmm0 \n"
|
||||
"pxor %%xmm4,%%xmm3 \n"
|
||||
"movdqu (%1),%%xmm2 \n"
|
||||
"movdqu "MEMACCESS(1)",%%xmm2 \n"
|
||||
"pshufb %4,%%xmm3 \n"
|
||||
"pand %%xmm6,%%xmm2 \n"
|
||||
"paddw %%xmm7,%%xmm3 \n"
|
||||
"pmullw %%xmm3,%%xmm2 \n"
|
||||
"movdqu (%1),%%xmm1 \n"
|
||||
"lea 0x10(%1),%1 \n"
|
||||
"movdqu "MEMACCESS(1)",%%xmm1 \n"
|
||||
"lea "MEMLEA(0x10,1)",%1 \n"
|
||||
"psrlw $0x8,%%xmm1 \n"
|
||||
"por %%xmm4,%%xmm0 \n"
|
||||
"pmullw %%xmm3,%%xmm1 \n"
|
||||
@ -3711,8 +3719,8 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
|
||||
"pand %%xmm5,%%xmm1 \n"
|
||||
"paddusb %%xmm1,%%xmm0 \n"
|
||||
"sub $0x4,%3 \n"
|
||||
"movdqa %%xmm0,(%2) \n"
|
||||
"lea 0x10(%2),%2 \n"
|
||||
"movdqa %%xmm0,"MEMACCESS(2)" \n"
|
||||
"lea "MEMLEA(0x10,2)",%2 \n"
|
||||
"jge 41b \n"
|
||||
|
||||
"49: \n"
|
||||
@ -3721,17 +3729,17 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
|
||||
|
||||
// 1 pixel loop.
|
||||
"91: \n"
|
||||
"movd (%0),%%xmm3 \n"
|
||||
"lea 0x4(%0),%0 \n"
|
||||
"movd "MEMACCESS(0)",%%xmm3 \n"
|
||||
"lea "MEMLEA(0x4,0)",%0 \n"
|
||||
"movdqa %%xmm3,%%xmm0 \n"
|
||||
"pxor %%xmm4,%%xmm3 \n"
|
||||
"movd (%1),%%xmm2 \n"
|
||||
"movd "MEMACCESS(1)",%%xmm2 \n"
|
||||
"pshufb %4,%%xmm3 \n"
|
||||
"pand %%xmm6,%%xmm2 \n"
|
||||
"paddw %%xmm7,%%xmm3 \n"
|
||||
"pmullw %%xmm3,%%xmm2 \n"
|
||||
"movd (%1),%%xmm1 \n"
|
||||
"lea 0x4(%1),%1 \n"
|
||||
"movd "MEMACCESS(1)",%%xmm1 \n"
|
||||
"lea "MEMLEA(0x4,1)",%1 \n"
|
||||
"psrlw $0x8,%%xmm1 \n"
|
||||
"por %%xmm4,%%xmm0 \n"
|
||||
"pmullw %%xmm3,%%xmm1 \n"
|
||||
@ -3740,8 +3748,8 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
|
||||
"pand %%xmm5,%%xmm1 \n"
|
||||
"paddusb %%xmm1,%%xmm0 \n"
|
||||
"sub $0x1,%3 \n"
|
||||
"movd %%xmm0,(%2) \n"
|
||||
"lea 0x4(%2),%2 \n"
|
||||
"movd %%xmm0,"MEMACCESS(2)" \n"
|
||||
"lea "MEMLEA(0x4,2)",%2 \n"
|
||||
"jge 91b \n"
|
||||
"99: \n"
|
||||
: "+r"(src_argb0), // %0
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user