diff --git a/README.chromium b/README.chromium index 511f9bae0..980a40c78 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 744 +Version: 745 License: BSD License File: LICENSE diff --git a/include/libyuv/row.h b/include/libyuv/row.h index dcb11c6dd..92c828846 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -39,6 +39,7 @@ extern "C" { #if !defined(LIBYUV_DISABLE_X86) && \ (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) #define HAS_ARGBBLENDROW_SSSE3 +#define HAS_ARGBATTENUATEROW_SSSE3 #endif // The following are available on all x86 platforms except NaCL x64: @@ -114,7 +115,6 @@ extern "C" { // Effects #define HAS_ARGBADDROW_SSE2 #define HAS_ARGBAFFINEROW_SSE2 -#define HAS_ARGBATTENUATEROW_SSSE3 #define HAS_ARGBCOLORMATRIXROW_SSSE3 #define HAS_ARGBGRAYROW_SSSE3 #define HAS_ARGBMIRRORROW_SSSE3 @@ -188,8 +188,8 @@ extern "C" { !defined(LIBYUV_SSSE3_ONLY) // Available with NaCL: #define HAS_ARGBBLENDROW_SSE2 -#if !(defined(__native_client__) && defined(__x86_64__)) #define HAS_ARGBATTENUATEROW_SSE2 +#if !(defined(__native_client__) && defined(__x86_64__)) #define HAS_MIRRORROW_SSE2 #endif #endif diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 178f82217..5e853e981 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 744 +#define LIBYUV_VERSION 745 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/row_posix.cc b/source/row_posix.cc index 1c05913bc..239731a17 100644 --- a/source/row_posix.cc +++ b/source/row_posix.cc @@ -3770,7 +3770,6 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1, // aligned to 16 bytes void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) { asm volatile ( - "sub %0,%1 \n" "pcmpeqb %%xmm4,%%xmm4 \n" "pslld $0x18,%%xmm4 \n" "pcmpeqb %%xmm5,%%xmm5 \n" @@ -3779,17 +3778,18 @@ void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) { // 4 pixel loop. ".p2align 4 \n" "1: \n" - "movdqa (%0),%%xmm0 \n" + "movdqa "MEMACCESS(0)",%%xmm0 \n" "punpcklbw %%xmm0,%%xmm0 \n" "pshufhw $0xff,%%xmm0,%%xmm2 \n" "pshuflw $0xff,%%xmm2,%%xmm2 \n" "pmulhuw %%xmm2,%%xmm0 \n" - "movdqa (%0),%%xmm1 \n" + "movdqa "MEMACCESS(0)",%%xmm1 \n" "punpckhbw %%xmm1,%%xmm1 \n" "pshufhw $0xff,%%xmm1,%%xmm2 \n" "pshuflw $0xff,%%xmm2,%%xmm2 \n" "pmulhuw %%xmm2,%%xmm1 \n" - "movdqa (%0),%%xmm2 \n" + "movdqa "MEMACCESS(0)",%%xmm2 \n" + "lea "MEMLEA(0x10,0)",%0 \n" "psrlw $0x8,%%xmm0 \n" "pand %%xmm4,%%xmm2 \n" "psrlw $0x8,%%xmm1 \n" @@ -3797,8 +3797,8 @@ void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) { "pand %%xmm5,%%xmm0 \n" "por %%xmm2,%%xmm0 \n" "sub $0x4,%2 \n" - "movdqa %%xmm0,(%0,%1,1) \n" - "lea 0x10(%0),%0 \n" + "movdqa %%xmm0,"MEMACCESS(1)" \n" + "lea "MEMLEA(0x10,1)",%1 \n" "jg 1b \n" : "+r"(src_argb), // %0 "+r"(dst_argb), // %1 @@ -3825,7 +3825,6 @@ static uvec8 kShuffleAlpha1 = { // aligned to 16 bytes void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) { asm volatile ( - "sub %0,%1 \n" "pcmpeqb %%xmm3,%%xmm3 \n" "pslld $0x18,%%xmm3 \n" "movdqa %3,%%xmm4 \n" @@ -3834,25 +3833,26 @@ void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) { // 4 pixel loop. ".p2align 4 \n" "1: \n" - "movdqa (%0),%%xmm0 \n" + "movdqa "MEMACCESS(0)",%%xmm0 \n" "pshufb %%xmm4,%%xmm0 \n" - "movdqa (%0),%%xmm1 \n" + "movdqa "MEMACCESS(0)",%%xmm1 \n" "punpcklbw %%xmm1,%%xmm1 \n" "pmulhuw %%xmm1,%%xmm0 \n" - "movdqa (%0),%%xmm1 \n" + "movdqa "MEMACCESS(0)",%%xmm1 \n" "pshufb %%xmm5,%%xmm1 \n" - "movdqa (%0),%%xmm2 \n" + "movdqa "MEMACCESS(0)",%%xmm2 \n" "punpckhbw %%xmm2,%%xmm2 \n" "pmulhuw %%xmm2,%%xmm1 \n" - "movdqa (%0),%%xmm2 \n" + "movdqa "MEMACCESS(0)",%%xmm2 \n" + "lea "MEMLEA(0x10,0)",%0 \n" "pand %%xmm3,%%xmm2 \n" "psrlw $0x8,%%xmm0 \n" "psrlw $0x8,%%xmm1 \n" "packuswb %%xmm1,%%xmm0 \n" "por %%xmm2,%%xmm0 \n" "sub $0x4,%2 \n" - "movdqa %%xmm0,(%0,%1,1) \n" - "lea 0x10(%0),%0 \n" + "movdqa %%xmm0,"MEMACCESS(1)" \n" + "lea "MEMLEA(0x10,1)",%1 \n" "jg 1b \n" : "+r"(src_argb), // %0 "+r"(dst_argb), // %1 diff --git a/source/row_win.cc b/source/row_win.cc index 2dcaba4fe..a255293b0 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -4624,7 +4624,6 @@ void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) { mov eax, [esp + 4] // src_argb0 mov edx, [esp + 8] // dst_argb mov ecx, [esp + 12] // width - sub edx, eax pcmpeqb xmm4, xmm4 // generate mask 0xff000000 pslld xmm4, 24 pcmpeqb xmm5, xmm5 // generate mask 0x00ffffff @@ -4643,6 +4642,7 @@ void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) { pshuflw xmm2, xmm2, 0FFh pmulhuw xmm1, xmm2 // rgb * a movdqa xmm2, [eax] // alphas + lea eax, [eax + 16] psrlw xmm0, 8 pand xmm2, xmm4 psrlw xmm1, 8 @@ -4650,8 +4650,8 @@ void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) { pand xmm0, xmm5 // keep original alphas por xmm0, xmm2 sub ecx, 4 - movdqa [eax + edx], xmm0 - lea eax, [eax + 16] + movdqa [edx], xmm0 + lea edx, [edx + 16] jg convertloop ret @@ -4674,7 +4674,6 @@ void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) { mov eax, [esp + 4] // src_argb0 mov edx, [esp + 8] // dst_argb mov ecx, [esp + 12] // width - sub edx, eax pcmpeqb xmm3, xmm3 // generate mask 0xff000000 pslld xmm3, 24 movdqa xmm4, kShuffleAlpha0 @@ -4693,14 +4692,15 @@ void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) { punpckhbw xmm2, xmm2 // next 2 pixel rgbs pmulhuw xmm1, xmm2 // rgb * a movdqa xmm2, [eax] // mask original alpha + lea eax, [eax + 16] pand xmm2, xmm3 psrlw xmm0, 8 psrlw xmm1, 8 packuswb xmm0, xmm1 por xmm0, xmm2 // copy original alpha sub ecx, 4 - movdqa [eax + edx], xmm0 - lea eax, [eax + 16] + movdqa [edx], xmm0 + lea edx, [edx + 16] jg convertloop ret