diff --git a/README.chromium b/README.chromium index e54b46899..6446aa1a9 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1473 +Version: 1474 License: BSD License File: LICENSE diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 5ed7b2aa3..adf5d242b 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1473 +#define LIBYUV_VERSION 1474 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/row_win.cc b/source/row_win.cc index 0e00dbeb4..50cd2f874 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -4331,19 +4331,21 @@ __declspec(naked) void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) { __asm { + push ebx push esi push edi - mov eax, [esp + 8 + 4] // src_argb0 - mov edx, [esp + 8 + 8] // dst_argb - mov ecx, [esp + 8 + 12] // width + mov eax, [esp + 12 + 4] // src_argb + mov edx, [esp + 12 + 8] // dst_argb + mov ecx, [esp + 12 + 12] // width + lea ebx, fixed_invtbl8 convertloop: movdqu xmm0, [eax] // read 4 pixels movzx esi, byte ptr [eax + 3] // first alpha movzx edi, byte ptr [eax + 7] // second alpha punpcklbw xmm0, xmm0 // first 2 - movd xmm2, dword ptr [fixed_invtbl8 + esi * 4] - movd xmm3, dword ptr [fixed_invtbl8 + edi * 4] + movd xmm2, dword ptr [ebx + esi * 4] + movd xmm3, dword ptr [ebx + edi * 4] pshuflw xmm2, xmm2, 040h // first 4 inv_alpha words. 1, a, a, a pshuflw xmm3, xmm3, 040h // next 4 inv_alpha words movlhps xmm2, xmm3 @@ -4353,21 +4355,22 @@ void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, movzx esi, byte ptr [eax + 11] // third alpha movzx edi, byte ptr [eax + 15] // forth alpha punpckhbw xmm1, xmm1 // next 2 - movd xmm2, dword ptr [fixed_invtbl8 + esi * 4] - movd xmm3, dword ptr [fixed_invtbl8 + edi * 4] + movd xmm2, dword ptr [ebx + esi * 4] + movd xmm3, dword ptr [ebx + edi * 4] pshuflw xmm2, xmm2, 040h // first 4 inv_alpha words pshuflw xmm3, xmm3, 040h // next 4 inv_alpha words movlhps xmm2, xmm3 pmulhuw xmm1, xmm2 // rgb * a lea eax, [eax + 16] - packuswb xmm0, xmm1 movdqu [edx], xmm0 lea edx, [edx + 16] sub ecx, 4 jg convertloop + pop edi pop esi + pop ebx ret } } @@ -4420,36 +4423,37 @@ void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width) { __asm { - mov eax, [esp + 4] // src_argb0 - mov edx, [esp + 8] // dst_argb - mov ecx, [esp + 12] // width - sub edx, eax - vbroadcastf128 ymm5, xmmword ptr kUnattenShuffleAlpha_AVX2 - + push ebx push esi push edi + mov eax, [esp + 12 + 4] // src_argb + mov edx, [esp + 12 + 8] // dst_argb + mov ecx, [esp + 12 + 12] // width + sub edx, eax + lea ebx, fixed_invtbl8 + vbroadcastf128 ymm5, xmmword ptr kUnattenShuffleAlpha_AVX2 convertloop: // replace VPGATHER movzx esi, byte ptr [eax + 3] // alpha0 movzx edi, byte ptr [eax + 7] // alpha1 - vmovd xmm0, dword ptr [fixed_invtbl8 + esi * 4] // [1,a0] - vmovd xmm1, dword ptr [fixed_invtbl8 + edi * 4] // [1,a1] + vmovd xmm0, dword ptr [ebx + esi * 4] // [1,a0] + vmovd xmm1, dword ptr [ebx + edi * 4] // [1,a1] movzx esi, byte ptr [eax + 11] // alpha2 movzx edi, byte ptr [eax + 15] // alpha3 vpunpckldq xmm6, xmm0, xmm1 // [1,a1,1,a0] - vmovd xmm2, dword ptr [fixed_invtbl8 + esi * 4] // [1,a2] - vmovd xmm3, dword ptr [fixed_invtbl8 + edi * 4] // [1,a3] + vmovd xmm2, dword ptr [ebx + esi * 4] // [1,a2] + vmovd xmm3, dword ptr [ebx + edi * 4] // [1,a3] movzx esi, byte ptr [eax + 19] // alpha4 movzx edi, byte ptr [eax + 23] // alpha5 vpunpckldq xmm7, xmm2, xmm3 // [1,a3,1,a2] - vmovd xmm0, dword ptr [fixed_invtbl8 + esi * 4] // [1,a4] - vmovd xmm1, dword ptr [fixed_invtbl8 + edi * 4] // [1,a5] + vmovd xmm0, dword ptr [ebx + esi * 4] // [1,a4] + vmovd xmm1, dword ptr [ebx + edi * 4] // [1,a5] movzx esi, byte ptr [eax + 27] // alpha6 movzx edi, byte ptr [eax + 31] // alpha7 vpunpckldq xmm0, xmm0, xmm1 // [1,a5,1,a4] - vmovd xmm2, dword ptr [fixed_invtbl8 + esi * 4] // [1,a6] - vmovd xmm3, dword ptr [fixed_invtbl8 + edi * 4] // [1,a7] + vmovd xmm2, dword ptr [ebx + esi * 4] // [1,a6] + vmovd xmm3, dword ptr [ebx + edi * 4] // [1,a7] vpunpckldq xmm2, xmm2, xmm3 // [1,a7,1,a6] vpunpcklqdq xmm3, xmm6, xmm7 // [1,a3,1,a2,1,a1,1,a0] vpunpcklqdq xmm0, xmm0, xmm2 // [1,a7,1,a6,1,a5,1,a4] @@ -4473,6 +4477,7 @@ void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, pop edi pop esi + pop ebx vzeroupper ret }