mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2026-02-16 23:29:52 +08:00
use pointer to inverse table for clangcl
R=harryjin@google.com TBR=harryjin@google.com BUG=none Review URL: https://webrtc-codereview.appspot.com/54859004.
This commit is contained in:
parent
5452cce452
commit
3c4f5735ce
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: http://code.google.com/p/libyuv/
|
URL: http://code.google.com/p/libyuv/
|
||||||
Version: 1473
|
Version: 1474
|
||||||
License: BSD
|
License: BSD
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
|
|
||||||
|
|||||||
@ -11,6 +11,6 @@
|
|||||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||||
#define INCLUDE_LIBYUV_VERSION_H_
|
#define INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
#define LIBYUV_VERSION 1473
|
#define LIBYUV_VERSION 1474
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||||
|
|||||||
@ -4331,19 +4331,21 @@ __declspec(naked)
|
|||||||
void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb,
|
void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb,
|
||||||
int width) {
|
int width) {
|
||||||
__asm {
|
__asm {
|
||||||
|
push ebx
|
||||||
push esi
|
push esi
|
||||||
push edi
|
push edi
|
||||||
mov eax, [esp + 8 + 4] // src_argb0
|
mov eax, [esp + 12 + 4] // src_argb
|
||||||
mov edx, [esp + 8 + 8] // dst_argb
|
mov edx, [esp + 12 + 8] // dst_argb
|
||||||
mov ecx, [esp + 8 + 12] // width
|
mov ecx, [esp + 12 + 12] // width
|
||||||
|
lea ebx, fixed_invtbl8
|
||||||
|
|
||||||
convertloop:
|
convertloop:
|
||||||
movdqu xmm0, [eax] // read 4 pixels
|
movdqu xmm0, [eax] // read 4 pixels
|
||||||
movzx esi, byte ptr [eax + 3] // first alpha
|
movzx esi, byte ptr [eax + 3] // first alpha
|
||||||
movzx edi, byte ptr [eax + 7] // second alpha
|
movzx edi, byte ptr [eax + 7] // second alpha
|
||||||
punpcklbw xmm0, xmm0 // first 2
|
punpcklbw xmm0, xmm0 // first 2
|
||||||
movd xmm2, dword ptr [fixed_invtbl8 + esi * 4]
|
movd xmm2, dword ptr [ebx + esi * 4]
|
||||||
movd xmm3, dword ptr [fixed_invtbl8 + edi * 4]
|
movd xmm3, dword ptr [ebx + edi * 4]
|
||||||
pshuflw xmm2, xmm2, 040h // first 4 inv_alpha words. 1, a, a, a
|
pshuflw xmm2, xmm2, 040h // first 4 inv_alpha words. 1, a, a, a
|
||||||
pshuflw xmm3, xmm3, 040h // next 4 inv_alpha words
|
pshuflw xmm3, xmm3, 040h // next 4 inv_alpha words
|
||||||
movlhps xmm2, xmm3
|
movlhps xmm2, xmm3
|
||||||
@ -4353,21 +4355,22 @@ void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb,
|
|||||||
movzx esi, byte ptr [eax + 11] // third alpha
|
movzx esi, byte ptr [eax + 11] // third alpha
|
||||||
movzx edi, byte ptr [eax + 15] // forth alpha
|
movzx edi, byte ptr [eax + 15] // forth alpha
|
||||||
punpckhbw xmm1, xmm1 // next 2
|
punpckhbw xmm1, xmm1 // next 2
|
||||||
movd xmm2, dword ptr [fixed_invtbl8 + esi * 4]
|
movd xmm2, dword ptr [ebx + esi * 4]
|
||||||
movd xmm3, dword ptr [fixed_invtbl8 + edi * 4]
|
movd xmm3, dword ptr [ebx + edi * 4]
|
||||||
pshuflw xmm2, xmm2, 040h // first 4 inv_alpha words
|
pshuflw xmm2, xmm2, 040h // first 4 inv_alpha words
|
||||||
pshuflw xmm3, xmm3, 040h // next 4 inv_alpha words
|
pshuflw xmm3, xmm3, 040h // next 4 inv_alpha words
|
||||||
movlhps xmm2, xmm3
|
movlhps xmm2, xmm3
|
||||||
pmulhuw xmm1, xmm2 // rgb * a
|
pmulhuw xmm1, xmm2 // rgb * a
|
||||||
lea eax, [eax + 16]
|
lea eax, [eax + 16]
|
||||||
|
|
||||||
packuswb xmm0, xmm1
|
packuswb xmm0, xmm1
|
||||||
movdqu [edx], xmm0
|
movdqu [edx], xmm0
|
||||||
lea edx, [edx + 16]
|
lea edx, [edx + 16]
|
||||||
sub ecx, 4
|
sub ecx, 4
|
||||||
jg convertloop
|
jg convertloop
|
||||||
|
|
||||||
pop edi
|
pop edi
|
||||||
pop esi
|
pop esi
|
||||||
|
pop ebx
|
||||||
ret
|
ret
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -4420,36 +4423,37 @@ void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb,
|
|||||||
int width) {
|
int width) {
|
||||||
__asm {
|
__asm {
|
||||||
|
|
||||||
mov eax, [esp + 4] // src_argb0
|
push ebx
|
||||||
mov edx, [esp + 8] // dst_argb
|
|
||||||
mov ecx, [esp + 12] // width
|
|
||||||
sub edx, eax
|
|
||||||
vbroadcastf128 ymm5, xmmword ptr kUnattenShuffleAlpha_AVX2
|
|
||||||
|
|
||||||
push esi
|
push esi
|
||||||
push edi
|
push edi
|
||||||
|
mov eax, [esp + 12 + 4] // src_argb
|
||||||
|
mov edx, [esp + 12 + 8] // dst_argb
|
||||||
|
mov ecx, [esp + 12 + 12] // width
|
||||||
|
sub edx, eax
|
||||||
|
lea ebx, fixed_invtbl8
|
||||||
|
vbroadcastf128 ymm5, xmmword ptr kUnattenShuffleAlpha_AVX2
|
||||||
|
|
||||||
convertloop:
|
convertloop:
|
||||||
// replace VPGATHER
|
// replace VPGATHER
|
||||||
movzx esi, byte ptr [eax + 3] // alpha0
|
movzx esi, byte ptr [eax + 3] // alpha0
|
||||||
movzx edi, byte ptr [eax + 7] // alpha1
|
movzx edi, byte ptr [eax + 7] // alpha1
|
||||||
vmovd xmm0, dword ptr [fixed_invtbl8 + esi * 4] // [1,a0]
|
vmovd xmm0, dword ptr [ebx + esi * 4] // [1,a0]
|
||||||
vmovd xmm1, dword ptr [fixed_invtbl8 + edi * 4] // [1,a1]
|
vmovd xmm1, dword ptr [ebx + edi * 4] // [1,a1]
|
||||||
movzx esi, byte ptr [eax + 11] // alpha2
|
movzx esi, byte ptr [eax + 11] // alpha2
|
||||||
movzx edi, byte ptr [eax + 15] // alpha3
|
movzx edi, byte ptr [eax + 15] // alpha3
|
||||||
vpunpckldq xmm6, xmm0, xmm1 // [1,a1,1,a0]
|
vpunpckldq xmm6, xmm0, xmm1 // [1,a1,1,a0]
|
||||||
vmovd xmm2, dword ptr [fixed_invtbl8 + esi * 4] // [1,a2]
|
vmovd xmm2, dword ptr [ebx + esi * 4] // [1,a2]
|
||||||
vmovd xmm3, dword ptr [fixed_invtbl8 + edi * 4] // [1,a3]
|
vmovd xmm3, dword ptr [ebx + edi * 4] // [1,a3]
|
||||||
movzx esi, byte ptr [eax + 19] // alpha4
|
movzx esi, byte ptr [eax + 19] // alpha4
|
||||||
movzx edi, byte ptr [eax + 23] // alpha5
|
movzx edi, byte ptr [eax + 23] // alpha5
|
||||||
vpunpckldq xmm7, xmm2, xmm3 // [1,a3,1,a2]
|
vpunpckldq xmm7, xmm2, xmm3 // [1,a3,1,a2]
|
||||||
vmovd xmm0, dword ptr [fixed_invtbl8 + esi * 4] // [1,a4]
|
vmovd xmm0, dword ptr [ebx + esi * 4] // [1,a4]
|
||||||
vmovd xmm1, dword ptr [fixed_invtbl8 + edi * 4] // [1,a5]
|
vmovd xmm1, dword ptr [ebx + edi * 4] // [1,a5]
|
||||||
movzx esi, byte ptr [eax + 27] // alpha6
|
movzx esi, byte ptr [eax + 27] // alpha6
|
||||||
movzx edi, byte ptr [eax + 31] // alpha7
|
movzx edi, byte ptr [eax + 31] // alpha7
|
||||||
vpunpckldq xmm0, xmm0, xmm1 // [1,a5,1,a4]
|
vpunpckldq xmm0, xmm0, xmm1 // [1,a5,1,a4]
|
||||||
vmovd xmm2, dword ptr [fixed_invtbl8 + esi * 4] // [1,a6]
|
vmovd xmm2, dword ptr [ebx + esi * 4] // [1,a6]
|
||||||
vmovd xmm3, dword ptr [fixed_invtbl8 + edi * 4] // [1,a7]
|
vmovd xmm3, dword ptr [ebx + edi * 4] // [1,a7]
|
||||||
vpunpckldq xmm2, xmm2, xmm3 // [1,a7,1,a6]
|
vpunpckldq xmm2, xmm2, xmm3 // [1,a7,1,a6]
|
||||||
vpunpcklqdq xmm3, xmm6, xmm7 // [1,a3,1,a2,1,a1,1,a0]
|
vpunpcklqdq xmm3, xmm6, xmm7 // [1,a3,1,a2,1,a1,1,a0]
|
||||||
vpunpcklqdq xmm0, xmm0, xmm2 // [1,a7,1,a6,1,a5,1,a4]
|
vpunpcklqdq xmm0, xmm0, xmm2 // [1,a7,1,a6,1,a5,1,a4]
|
||||||
@ -4473,6 +4477,7 @@ void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb,
|
|||||||
|
|
||||||
pop edi
|
pop edi
|
||||||
pop esi
|
pop esi
|
||||||
|
pop ebx
|
||||||
vzeroupper
|
vzeroupper
|
||||||
ret
|
ret
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user