diff --git a/README.chromium b/README.chromium index efd8c857f..9a61d93ec 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 321 +Version: 322 License: BSD License File: LICENSE diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 0bbf2611d..644eb649b 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 321 +#define LIBYUV_VERSION 322 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/row.h b/source/row.h index db5e7b44b..6973c45ee 100644 --- a/source/row.h +++ b/source/row.h @@ -80,6 +80,9 @@ extern "C" { #define HAS_CUMULATIVESUMTOAVERAGE_SSE2 #define HAS_ARGBSHADE_SSE2 #define HAS_ARGBAFFINEROW_SSE2 +// HAS_ARGBBLENDROW_SSE2 may be faster than SSSE3 version on some CPUs, so +// enable it here instead of LIBYUV_SSSE3_ONLY section. +#define HAS_ARGBBLENDROW_SSE2 #endif // The following are Windows only: @@ -96,7 +99,6 @@ extern "C" { !defined(LIBYUV_SSSE3_ONLY) #define HAS_MIRRORROW_SSE2 #define HAS_ARGBATTENUATE_SSE2 -#define HAS_ARGBBLENDROW_SSE2 #endif // The following are available on Neon platforms diff --git a/source/row_posix.cc b/source/row_posix.cc index b25435663..8976fe1b4 100644 --- a/source/row_posix.cc +++ b/source/row_posix.cc @@ -2548,16 +2548,16 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1, // 4 pixel loop. ".p2align 2 \n" "40: \n" - "movdqu (%0),%%xmm3 \n" + "movdqa (%0),%%xmm3 \n" "lea 0x10(%0),%0 \n" "movdqa %%xmm3,%%xmm0 \n" "pxor %%xmm4,%%xmm3 \n" - "movdqu (%1),%%xmm2 \n" + "movdqa (%1),%%xmm2 \n" "pshufb %4,%%xmm3 \n" "pand %%xmm6,%%xmm2 \n" "paddw %%xmm7,%%xmm3 \n" "pmullw %%xmm3,%%xmm2 \n" - "movdqu (%1),%%xmm1 \n" + "movdqa (%1),%%xmm1 \n" "lea 0x10(%1),%1 \n" "psrlw $0x8,%%xmm1 \n" "por %%xmm4,%%xmm0 \n"