diff --git a/README.chromium b/README.chromium index 6875a0923..511f9bae0 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 743 +Version: 744 License: BSD License File: LICENSE diff --git a/include/libyuv/row.h b/include/libyuv/row.h index 12a3e8188..dcb11c6dd 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -27,8 +27,7 @@ extern "C" { #define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a) - 1))) #if defined(__CLR_VER) || defined(COVERAGE_ENABLED) || \ - defined(TARGET_IPHONE_SIMULATOR) || \ - (defined(__native_client__) && defined(__x86_64__)) + defined(TARGET_IPHONE_SIMULATOR) #define LIBYUV_DISABLE_X86 #endif // True if compiling for SSSE3 as a requirement. @@ -36,9 +35,17 @@ extern "C" { #define LIBYUV_SSSE3_ONLY #endif -// The following are available on all x86 platforms: +// The following are available on all x86 platforms, including NaCL: #if !defined(LIBYUV_DISABLE_X86) && \ (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) +#define HAS_ARGBBLENDROW_SSSE3 +#endif + +// The following are available on all x86 platforms except NaCL x64: +#if !defined(LIBYUV_DISABLE_X86) && \ + (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) && \ + !(defined(__native_client__) && defined(__x86_64__)) + // Conversions. #define HAS_ABGRTOUVROW_SSSE3 #define HAS_ABGRTOYROW_SSSE3 @@ -108,7 +115,6 @@ extern "C" { #define HAS_ARGBADDROW_SSE2 #define HAS_ARGBAFFINEROW_SSE2 #define HAS_ARGBATTENUATEROW_SSSE3 -#define HAS_ARGBBLENDROW_SSSE3 #define HAS_ARGBCOLORMATRIXROW_SSSE3 #define HAS_ARGBGRAYROW_SSSE3 #define HAS_ARGBMIRRORROW_SSSE3 @@ -180,10 +186,13 @@ extern "C" { #if !defined(LIBYUV_DISABLE_X86) && \ (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) && \ !defined(LIBYUV_SSSE3_ONLY) -#define HAS_ARGBATTENUATEROW_SSE2 +// Available with NaCL: #define HAS_ARGBBLENDROW_SSE2 +#if !(defined(__native_client__) && defined(__x86_64__)) +#define HAS_ARGBATTENUATEROW_SSE2 #define HAS_MIRRORROW_SSE2 #endif +#endif // The following are available on Neon platforms #if !defined(LIBYUV_DISABLE_NEON) && \ diff --git a/include/libyuv/version.h b/include/libyuv/version.h index afd4bc688..178f82217 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 743 +#define LIBYUV_VERSION 744 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/row_posix.cc b/source/row_posix.cc index db255a2a4..1c05913bc 100644 --- a/source/row_posix.cc +++ b/source/row_posix.cc @@ -20,6 +20,14 @@ extern "C" { // This module is for GCC x86 and x64 #if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__)) +#if defined(__native_client__) && defined(__x86_64__) +#define MEMACCESS(x) "%%nacl:(%%r15,%q" #x ")" +#define MEMLEA(x, y) #x "(%q" #y ")" +#else +#define MEMACCESS(x) "(%" #x ")" +#define MEMLEA(x, y) #x "(%" #y ")" +#endif + #ifdef HAS_ARGBTOYROW_SSSE3 // Constants for ARGB @@ -3494,19 +3502,19 @@ void ARGBBlendRow_SSE2(const uint8* src_argb0, const uint8* src_argb1, "10: \n" "test $0xf,%2 \n" "je 19f \n" - "movd (%0),%%xmm3 \n" - "lea 0x4(%0),%0 \n" + "movd "MEMACCESS(0)",%%xmm3 \n" + "lea "MEMLEA(0x4,0)",%0 \n" "movdqa %%xmm3,%%xmm0 \n" "pxor %%xmm4,%%xmm3 \n" - "movd (%1),%%xmm2 \n" + "movd "MEMACCESS(1)",%%xmm2 \n" "psrlw $0x8,%%xmm3 \n" "pshufhw $0xf5,%%xmm3,%%xmm3 \n" "pshuflw $0xf5,%%xmm3,%%xmm3 \n" "pand %%xmm6,%%xmm2 \n" "paddw %%xmm7,%%xmm3 \n" "pmullw %%xmm3,%%xmm2 \n" - "movd (%1),%%xmm1 \n" - "lea 0x4(%1),%1 \n" + "movd "MEMACCESS(1)",%%xmm1 \n" + "lea "MEMLEA(0x4,1)",%1 \n" "psrlw $0x8,%%xmm1 \n" "por %%xmm4,%%xmm0 \n" "pmullw %%xmm3,%%xmm1 \n" @@ -3515,8 +3523,8 @@ void ARGBBlendRow_SSE2(const uint8* src_argb0, const uint8* src_argb1, "pand %%xmm5,%%xmm1 \n" "paddusb %%xmm1,%%xmm0 \n" "sub $0x1,%3 \n" - "movd %%xmm0,(%2) \n" - "lea 0x4(%2),%2 \n" + "movd %%xmm0,"MEMACCESS(2)" \n" + "lea "MEMLEA(0x4,2)",%2 \n" "jge 10b \n" "19: \n" @@ -3526,19 +3534,19 @@ void ARGBBlendRow_SSE2(const uint8* src_argb0, const uint8* src_argb1, // 4 pixel loop. ".p2align 2 \n" "41: \n" - "movdqu (%0),%%xmm3 \n" - "lea 0x10(%0),%0 \n" + "movdqu "MEMACCESS(0)",%%xmm3 \n" + "lea "MEMLEA(0x10,0)",%0 \n" "movdqa %%xmm3,%%xmm0 \n" "pxor %%xmm4,%%xmm3 \n" - "movdqu (%1),%%xmm2 \n" + "movdqu "MEMACCESS(1)",%%xmm2 \n" "psrlw $0x8,%%xmm3 \n" "pshufhw $0xf5,%%xmm3,%%xmm3 \n" "pshuflw $0xf5,%%xmm3,%%xmm3 \n" "pand %%xmm6,%%xmm2 \n" "paddw %%xmm7,%%xmm3 \n" "pmullw %%xmm3,%%xmm2 \n" - "movdqu (%1),%%xmm1 \n" - "lea 0x10(%1),%1 \n" + "movdqu "MEMACCESS(1)",%%xmm1 \n" + "lea "MEMLEA(0x10,1)",%1 \n" "psrlw $0x8,%%xmm1 \n" "por %%xmm4,%%xmm0 \n" "pmullw %%xmm3,%%xmm1 \n" @@ -3547,8 +3555,8 @@ void ARGBBlendRow_SSE2(const uint8* src_argb0, const uint8* src_argb1, "pand %%xmm5,%%xmm1 \n" "paddusb %%xmm1,%%xmm0 \n" "sub $0x4,%3 \n" - "movdqa %%xmm0,(%2) \n" - "lea 0x10(%2),%2 \n" + "movdqa %%xmm0,"MEMACCESS(2)" \n" + "lea "MEMLEA(0x10,2)",%2 \n" "jge 41b \n" "49: \n" @@ -3557,19 +3565,19 @@ void ARGBBlendRow_SSE2(const uint8* src_argb0, const uint8* src_argb1, // 1 pixel loop. "91: \n" - "movd (%0),%%xmm3 \n" - "lea 0x4(%0),%0 \n" + "movd "MEMACCESS(0)",%%xmm3 \n" + "lea "MEMLEA(0x4,0)",%0 \n" "movdqa %%xmm3,%%xmm0 \n" "pxor %%xmm4,%%xmm3 \n" - "movd (%1),%%xmm2 \n" + "movd "MEMACCESS(1)",%%xmm2 \n" "psrlw $0x8,%%xmm3 \n" "pshufhw $0xf5,%%xmm3,%%xmm3 \n" "pshuflw $0xf5,%%xmm3,%%xmm3 \n" "pand %%xmm6,%%xmm2 \n" "paddw %%xmm7,%%xmm3 \n" "pmullw %%xmm3,%%xmm2 \n" - "movd (%1),%%xmm1 \n" - "lea 0x4(%1),%1 \n" + "movd "MEMACCESS(1)",%%xmm1 \n" + "lea "MEMLEA(0x4,1)",%1 \n" "psrlw $0x8,%%xmm1 \n" "por %%xmm4,%%xmm0 \n" "pmullw %%xmm3,%%xmm1 \n" @@ -3578,8 +3586,8 @@ void ARGBBlendRow_SSE2(const uint8* src_argb0, const uint8* src_argb1, "pand %%xmm5,%%xmm1 \n" "paddusb %%xmm1,%%xmm0 \n" "sub $0x1,%3 \n" - "movd %%xmm0,(%2) \n" - "lea 0x4(%2),%2 \n" + "movd %%xmm0,"MEMACCESS(2)" \n" + "lea "MEMLEA(0x4,2)",%2 \n" "jge 91b \n" "99: \n" : "+r"(src_argb0), // %0 @@ -3631,17 +3639,17 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1, "10: \n" "test $0xf,%2 \n" "je 19f \n" - "movd (%0),%%xmm3 \n" - "lea 0x4(%0),%0 \n" + "movd "MEMACCESS(0)",%%xmm3 \n" + "lea "MEMLEA(0x4,0)",%0 \n" "movdqa %%xmm3,%%xmm0 \n" "pxor %%xmm4,%%xmm3 \n" - "movd (%1),%%xmm2 \n" + "movd "MEMACCESS(1)",%%xmm2 \n" "pshufb %4,%%xmm3 \n" "pand %%xmm6,%%xmm2 \n" "paddw %%xmm7,%%xmm3 \n" "pmullw %%xmm3,%%xmm2 \n" - "movd (%1),%%xmm1 \n" - "lea 0x4(%1),%1 \n" + "movd "MEMACCESS(1)",%%xmm1 \n" + "lea "MEMLEA(0x4,1)",%1 \n" "psrlw $0x8,%%xmm1 \n" "por %%xmm4,%%xmm0 \n" "pmullw %%xmm3,%%xmm1 \n" @@ -3650,8 +3658,8 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1, "pand %%xmm5,%%xmm1 \n" "paddusb %%xmm1,%%xmm0 \n" "sub $0x1,%3 \n" - "movd %%xmm0,(%2) \n" - "lea 0x4(%2),%2 \n" + "movd %%xmm0,"MEMACCESS(2)" \n" + "lea "MEMLEA(0x4,2)",%2 \n" "jge 10b \n" "19: \n" @@ -3665,17 +3673,17 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1, // 4 pixel loop. ".p2align 2 \n" "40: \n" - "movdqa (%0),%%xmm3 \n" - "lea 0x10(%0),%0 \n" + "movdqa "MEMACCESS(0)",%%xmm3 \n" + "lea "MEMLEA(0x10,0)",%0 \n" "movdqa %%xmm3,%%xmm0 \n" "pxor %%xmm4,%%xmm3 \n" - "movdqa (%1),%%xmm2 \n" + "movdqa "MEMACCESS(1)",%%xmm2 \n" "pshufb %4,%%xmm3 \n" "pand %%xmm6,%%xmm2 \n" "paddw %%xmm7,%%xmm3 \n" "pmullw %%xmm3,%%xmm2 \n" - "movdqa (%1),%%xmm1 \n" - "lea 0x10(%1),%1 \n" + "movdqa "MEMACCESS(1)",%%xmm1 \n" + "lea "MEMLEA(0x10,1)",%1 \n" "psrlw $0x8,%%xmm1 \n" "por %%xmm4,%%xmm0 \n" "pmullw %%xmm3,%%xmm1 \n" @@ -3684,25 +3692,25 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1, "pand %%xmm5,%%xmm1 \n" "paddusb %%xmm1,%%xmm0 \n" "sub $0x4,%3 \n" - "movdqa %%xmm0,(%2) \n" - "lea 0x10(%2),%2 \n" + "movdqa %%xmm0,"MEMACCESS(2)" \n" + "lea "MEMLEA(0x10,2)",%2 \n" "jge 40b \n" "jmp 49f \n" // 4 pixel unaligned loop. ".p2align 2 \n" "41: \n" - "movdqu (%0),%%xmm3 \n" - "lea 0x10(%0),%0 \n" + "movdqu "MEMACCESS(0)",%%xmm3 \n" + "lea "MEMLEA(0x10,0)",%0 \n" "movdqa %%xmm3,%%xmm0 \n" "pxor %%xmm4,%%xmm3 \n" - "movdqu (%1),%%xmm2 \n" + "movdqu "MEMACCESS(1)",%%xmm2 \n" "pshufb %4,%%xmm3 \n" "pand %%xmm6,%%xmm2 \n" "paddw %%xmm7,%%xmm3 \n" "pmullw %%xmm3,%%xmm2 \n" - "movdqu (%1),%%xmm1 \n" - "lea 0x10(%1),%1 \n" + "movdqu "MEMACCESS(1)",%%xmm1 \n" + "lea "MEMLEA(0x10,1)",%1 \n" "psrlw $0x8,%%xmm1 \n" "por %%xmm4,%%xmm0 \n" "pmullw %%xmm3,%%xmm1 \n" @@ -3711,8 +3719,8 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1, "pand %%xmm5,%%xmm1 \n" "paddusb %%xmm1,%%xmm0 \n" "sub $0x4,%3 \n" - "movdqa %%xmm0,(%2) \n" - "lea 0x10(%2),%2 \n" + "movdqa %%xmm0,"MEMACCESS(2)" \n" + "lea "MEMLEA(0x10,2)",%2 \n" "jge 41b \n" "49: \n" @@ -3721,17 +3729,17 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1, // 1 pixel loop. "91: \n" - "movd (%0),%%xmm3 \n" - "lea 0x4(%0),%0 \n" + "movd "MEMACCESS(0)",%%xmm3 \n" + "lea "MEMLEA(0x4,0)",%0 \n" "movdqa %%xmm3,%%xmm0 \n" "pxor %%xmm4,%%xmm3 \n" - "movd (%1),%%xmm2 \n" + "movd "MEMACCESS(1)",%%xmm2 \n" "pshufb %4,%%xmm3 \n" "pand %%xmm6,%%xmm2 \n" "paddw %%xmm7,%%xmm3 \n" "pmullw %%xmm3,%%xmm2 \n" - "movd (%1),%%xmm1 \n" - "lea 0x4(%1),%1 \n" + "movd "MEMACCESS(1)",%%xmm1 \n" + "lea "MEMLEA(0x4,1)",%1 \n" "psrlw $0x8,%%xmm1 \n" "por %%xmm4,%%xmm0 \n" "pmullw %%xmm3,%%xmm1 \n" @@ -3740,8 +3748,8 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1, "pand %%xmm5,%%xmm1 \n" "paddusb %%xmm1,%%xmm0 \n" "sub $0x1,%3 \n" - "movd %%xmm0,(%2) \n" - "lea 0x4(%2),%2 \n" + "movd %%xmm0,"MEMACCESS(2)" \n" + "lea "MEMLEA(0x4,2)",%2 \n" "jge 91b \n" "99: \n" : "+r"(src_argb0), // %0