diff --git a/README.chromium b/README.chromium index 8864a09a8..d76316cdc 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 820 +Version: 821 License: BSD License File: LICENSE diff --git a/include/libyuv/version.h b/include/libyuv/version.h index e56684bd2..889e398e7 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 820 +#define LIBYUV_VERSION 821 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/planar_functions.cc b/source/planar_functions.cc index e7c4bcebf..03c504e15 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -1134,9 +1134,7 @@ int ARGBAttenuate(const uint8* src_argb, int src_stride_argb, } #endif #if defined(HAS_ARGBATTENUATEROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 4 && - IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) && - IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { + if (TestCpuFlag(kCpuHasSSSE3) && width >= 4) { ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3; if (IS_ALIGNED(width, 4)) { ARGBAttenuateRow = ARGBAttenuateRow_SSSE3; @@ -1191,9 +1189,7 @@ int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb, void (*ARGBUnattenuateRow)(const uint8* src_argb, uint8* dst_argb, int width) = ARGBUnattenuateRow_C; #if defined(HAS_ARGBUNATTENUATEROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && width >= 4 && - IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) && - IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { + if (TestCpuFlag(kCpuHasSSE2) && width >= 4) { ARGBUnattenuateRow = ARGBUnattenuateRow_Any_SSE2; if (IS_ALIGNED(width, 4)) { ARGBUnattenuateRow = ARGBUnattenuateRow_SSE2; diff --git a/source/row_posix.cc b/source/row_posix.cc index 01bb29e44..1ed20f41b 100644 --- a/source/row_posix.cc +++ b/source/row_posix.cc @@ -4117,17 +4117,17 @@ void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) { // 4 pixel loop. ".p2align 4 \n" "1: \n" - "movdqa "MEMACCESS(0)",%%xmm0 \n" + "movdqu "MEMACCESS(0)",%%xmm0 \n" "pshufb %%xmm4,%%xmm0 \n" - "movdqa "MEMACCESS(0)",%%xmm1 \n" + "movdqu "MEMACCESS(0)",%%xmm1 \n" "punpcklbw %%xmm1,%%xmm1 \n" "pmulhuw %%xmm1,%%xmm0 \n" - "movdqa "MEMACCESS(0)",%%xmm1 \n" + "movdqu "MEMACCESS(0)",%%xmm1 \n" "pshufb %%xmm5,%%xmm1 \n" - "movdqa "MEMACCESS(0)",%%xmm2 \n" + "movdqu "MEMACCESS(0)",%%xmm2 \n" "punpckhbw %%xmm2,%%xmm2 \n" "pmulhuw %%xmm2,%%xmm1 \n" - "movdqa "MEMACCESS(0)",%%xmm2 \n" + "movdqu "MEMACCESS(0)",%%xmm2 \n" "lea "MEMLEA(0x10,0)",%0 \n" "pand %%xmm3,%%xmm2 \n" "psrlw $0x8,%%xmm0 \n" @@ -4135,7 +4135,7 @@ void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) { "packuswb %%xmm1,%%xmm0 \n" "por %%xmm2,%%xmm0 \n" "sub $0x4,%2 \n" - "movdqa %%xmm0,"MEMACCESS(1)" \n" + "movdqu %%xmm0,"MEMACCESS(1)" \n" "lea "MEMLEA(0x10,1)",%1 \n" "jg 1b \n" : "+r"(src_argb), // %0 @@ -4161,7 +4161,7 @@ void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, // 4 pixel loop. ".p2align 4 \n" "1: \n" - "movdqa "MEMACCESS(0)",%%xmm0 \n" + "movdqu "MEMACCESS(0)",%%xmm0 \n" "movzb "MEMACCESS2(0x03,0)",%3 \n" "punpcklbw %%xmm0,%%xmm0 \n" MEMOPREG(movd,0x00,4,3,4,xmm2) // movd 0x0(%4,%3,4),%%xmm2 @@ -4171,7 +4171,7 @@ void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, "pshuflw $0x40,%%xmm3,%%xmm3 \n" "movlhps %%xmm3,%%xmm2 \n" "pmulhuw %%xmm2,%%xmm0 \n" - "movdqa "MEMACCESS(0)",%%xmm1 \n" + "movdqu "MEMACCESS(0)",%%xmm1 \n" "movzb "MEMACCESS2(0x0b,0)",%3 \n" "punpckhbw %%xmm1,%%xmm1 \n" MEMOPREG(movd,0x00,4,3,4,xmm2) // movd 0x0(%4,%3,4),%%xmm2 @@ -4184,7 +4184,7 @@ void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, "lea "MEMLEA(0x10,0)",%0 \n" "packuswb %%xmm1,%%xmm0 \n" "sub $0x4,%2 \n" - "movdqa %%xmm0,"MEMACCESS(1)" \n" + "movdqu %%xmm0,"MEMACCESS(1)" \n" "lea "MEMLEA(0x10,1)",%1 \n" "jg 1b \n" : "+r"(src_argb), // %0 diff --git a/source/row_win.cc b/source/row_win.cc index 86d91262f..1f232a83c 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -4586,7 +4586,7 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1, mov esi, [esp + 4 + 8] // src_argb1 mov edx, [esp + 4 + 12] // dst_argb mov ecx, [esp + 4 + 16] // width - pcmpeqb xmm7, xmm7 // generate constant 1 + pcmpeqb xmm7, xmm7 // generate constant 0x0001 psrlw xmm7, 15 pcmpeqb xmm6, xmm6 // generate mask 0x00ff00ff psrlw xmm6, 8 @@ -4788,17 +4788,17 @@ void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) { align 16 convertloop: - movdqa xmm0, [eax] // read 4 pixels + movdqu xmm0, [eax] // read 4 pixels pshufb xmm0, xmm4 // isolate first 2 alphas - movdqa xmm1, [eax] // read 4 pixels + movdqu xmm1, [eax] // read 4 pixels punpcklbw xmm1, xmm1 // first 2 pixel rgbs pmulhuw xmm0, xmm1 // rgb * a - movdqa xmm1, [eax] // read 4 pixels + movdqu xmm1, [eax] // read 4 pixels pshufb xmm1, xmm5 // isolate next 2 alphas - movdqa xmm2, [eax] // read 4 pixels + movdqu xmm2, [eax] // read 4 pixels punpckhbw xmm2, xmm2 // next 2 pixel rgbs pmulhuw xmm1, xmm2 // rgb * a - movdqa xmm2, [eax] // mask original alpha + movdqu xmm2, [eax] // mask original alpha lea eax, [eax + 16] pand xmm2, xmm3 psrlw xmm0, 8 @@ -4806,7 +4806,7 @@ void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) { packuswb xmm0, xmm1 por xmm0, xmm2 // copy original alpha sub ecx, 4 - movdqa [edx], xmm0 + movdqu [edx], xmm0 lea edx, [edx + 16] jg convertloop @@ -4874,7 +4874,7 @@ void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, align 16 convertloop: - movdqa xmm0, [eax] // read 4 pixels + movdqu xmm0, [eax] // read 4 pixels movzx esi, byte ptr [eax + 3] // first alpha movzx edi, byte ptr [eax + 7] // second alpha punpcklbw xmm0, xmm0 // first 2 @@ -4885,7 +4885,7 @@ void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, movlhps xmm2, xmm3 pmulhuw xmm0, xmm2 // rgb * a - movdqa xmm1, [eax] // read 4 pixels + movdqu xmm1, [eax] // read 4 pixels movzx esi, byte ptr [eax + 11] // third alpha movzx edi, byte ptr [eax + 15] // forth alpha punpckhbw xmm1, xmm1 // next 2 @@ -4899,7 +4899,7 @@ void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, packuswb xmm0, xmm1 sub ecx, 4 - movdqa [edx], xmm0 + movdqu [edx], xmm0 lea edx, [edx + 16] jg convertloop pop edi