mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2026-02-16 23:29:52 +08:00
alpha blend last pixel fix
BUG=none TEST=none Review URL: https://webrtc-codereview.appspot.com/439008 git-svn-id: http://libyuv.googlecode.com/svn/trunk@210 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
90310ddbb9
commit
976423feca
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: http://code.google.com/p/libyuv/
|
URL: http://code.google.com/p/libyuv/
|
||||||
Version: 209
|
Version: 210
|
||||||
License: BSD
|
License: BSD
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
|
|
||||||
|
|||||||
@ -11,7 +11,7 @@
|
|||||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||||
#define INCLUDE_LIBYUV_VERSION_H_
|
#define INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
#define LIBYUV_VERSION 209
|
#define LIBYUV_VERSION 210
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
|
|||||||
@ -218,10 +218,10 @@ void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
|
|||||||
// loop will run one extra time.
|
// loop will run one extra time.
|
||||||
"sub %2, #16 \n"
|
"sub %2, #16 \n"
|
||||||
|
|
||||||
// mirror the bytes in the 64 bit segments. unable to mirror
|
// mirror the bytes in the 64 bit segments. unable to mirror
|
||||||
// the bytes in the entire 128 bits in one go.
|
// the bytes in the entire 128 bits in one go.
|
||||||
// because of the inability to mirror the entire 128 bits
|
// because of the inability to mirror the entire 128 bits
|
||||||
// mirror the writing out of the two 64 bit segments.
|
// mirror the writing out of the two 64 bit segments.
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"vld1.8 {q0}, [%0]! \n" // src += 16
|
"vld1.8 {q0}, [%0]! \n" // src += 16
|
||||||
"vrev64.8 q0, q0 \n"
|
"vrev64.8 q0, q0 \n"
|
||||||
|
|||||||
@ -1931,29 +1931,29 @@ void ARGBBlendRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) {
|
|||||||
"sub %0,%1 \n"
|
"sub %0,%1 \n"
|
||||||
"mov (%0),%3 \n"
|
"mov (%0),%3 \n"
|
||||||
"sub $0x1,%2 \n"
|
"sub $0x1,%2 \n"
|
||||||
"je 8f \n" // last1
|
"jle 8f \n" // last1
|
||||||
"cmp $0xff000000,%3 \n"
|
"cmp $0xff000000,%3 \n"
|
||||||
"jae 2f \n" // opaqueloop
|
"jae 2f \n" // opaqueloop
|
||||||
"cmp $0xffffff,%3 \n"
|
"cmp $0xffffff,%3 \n"
|
||||||
"ja 3f \n" // translucientloop
|
"ja 3f \n" // translucentloop
|
||||||
|
|
||||||
// transparentloop
|
// transparentloop
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"sub $0x1,%2 \n"
|
"sub $0x1,%2 \n"
|
||||||
"lea 0x4(%0),%0 \n"
|
"lea 0x4(%0),%0 \n"
|
||||||
"je 8f \n" // last1
|
"jle 8f \n" // last1
|
||||||
"mov (%0),%3 \n"
|
"mov (%0),%3 \n"
|
||||||
"cmp $0xffffff,%3 \n"
|
"cmp $0xffffff,%3 \n"
|
||||||
"jbe 1b \n" // transparentloop
|
"jbe 1b \n" // transparentloop
|
||||||
"cmp $0xff000000,%3 \n"
|
"cmp $0xff000000,%3 \n"
|
||||||
"jb 3f \n" // translucientloop
|
"jb 3f \n" // translucentloop
|
||||||
|
|
||||||
// opaqueloop
|
// opaqueloop
|
||||||
"2: \n"
|
"2: \n"
|
||||||
"mov %3,(%0,%1,1) \n"
|
"mov %3,(%0,%1,1) \n"
|
||||||
"lea 0x4(%0),%0 \n"
|
"lea 0x4(%0),%0 \n"
|
||||||
"sub $0x1,%2 \n"
|
"sub $0x1,%2 \n"
|
||||||
"je 8f \n" // last1
|
"jle 8f \n" // last1
|
||||||
"mov (%0),%3 \n"
|
"mov (%0),%3 \n"
|
||||||
"cmp $0xff000000,%3 \n"
|
"cmp $0xff000000,%3 \n"
|
||||||
"jae 2b \n" // opaqueloop
|
"jae 2b \n" // opaqueloop
|
||||||
@ -1961,48 +1961,50 @@ void ARGBBlendRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) {
|
|||||||
"jbe 1b \n" // transparentloop
|
"jbe 1b \n" // transparentloop
|
||||||
"nop \n"
|
"nop \n"
|
||||||
|
|
||||||
// translucientloop
|
// translucentloop
|
||||||
"3: \n"
|
"3: \n"
|
||||||
"movq (%0),%%xmm0 \n"
|
|
||||||
"movq (%0,%1,1),%%xmm1 \n"
|
|
||||||
"punpcklbw %%xmm0,%%xmm0 \n"
|
|
||||||
"punpcklbw %%xmm1,%%xmm1 \n"
|
|
||||||
"pshuflw $0xff,%%xmm0,%%xmm2 \n"
|
|
||||||
"pshufhw $0xff,%%xmm2,%%xmm2 \n"
|
|
||||||
"movdqa %%xmm2,%%xmm3 \n"
|
|
||||||
"pxor %%xmm4,%%xmm3 \n"
|
|
||||||
"pmulhuw %%xmm2,%%xmm0 \n"
|
|
||||||
"pmulhuw %%xmm3,%%xmm1 \n"
|
|
||||||
"paddw %%xmm1,%%xmm0 \n"
|
|
||||||
"psrlw $0x8,%%xmm0 \n"
|
|
||||||
"packuswb %%xmm0,%%xmm0 \n"
|
|
||||||
"movq %%xmm0,(%0,%1,1) \n"
|
|
||||||
"lea 0x8(%0),%0 \n"
|
|
||||||
"sub $0x2,%2 \n"
|
|
||||||
"jbe 8f \n" // last1
|
|
||||||
"mov (%0),%3 \n"
|
|
||||||
"cmp $0xffffff,%3 \n"
|
|
||||||
"jbe 1b \n" // transparentloop
|
|
||||||
"cmp $0xff000000,%3 \n"
|
|
||||||
"jb 3b \n" // translucientloop
|
|
||||||
"jmp 2b \n" // opaqueloop
|
|
||||||
|
|
||||||
// last1
|
|
||||||
"8: \n"
|
|
||||||
"add $0x1,%2 \n"
|
|
||||||
"je 9f \n" // done
|
|
||||||
"movd %3,%%xmm0 \n"
|
"movd %3,%%xmm0 \n"
|
||||||
"mov (%0,%1,1),%3 \n"
|
"mov (%0,%1,1),%3 \n"
|
||||||
"movd %3,%%xmm1 \n"
|
"movd %3,%%xmm1 \n"
|
||||||
"punpcklbw %%xmm0,%%xmm0 \n"
|
"punpcklbw %%xmm0,%%xmm0 \n"
|
||||||
"punpcklbw %%xmm1,%%xmm1 \n"
|
"punpcklbw %%xmm1,%%xmm1 \n"
|
||||||
"pshuflw $0xff,%%xmm0,%%xmm2 \n"
|
"pshuflw $0xff,%%xmm0,%%xmm2 \n"
|
||||||
"pshufhw $0xff,%%xmm2,%%xmm2 \n"
|
|
||||||
"movdqa %%xmm2,%%xmm3 \n"
|
"movdqa %%xmm2,%%xmm3 \n"
|
||||||
"pxor %%xmm4,%%xmm3 \n"
|
"pxor %%xmm4,%%xmm3 \n"
|
||||||
"pmulhuw %%xmm2,%%xmm0 \n"
|
"pmulhuw %%xmm2,%%xmm0 \n"
|
||||||
"pmulhuw %%xmm3,%%xmm1 \n"
|
"pmulhuw %%xmm3,%%xmm1 \n"
|
||||||
"paddw %%xmm1,%%xmm0 \n"
|
"paddusw %%xmm1,%%xmm0 \n"
|
||||||
|
"psrlw $0x8,%%xmm0 \n"
|
||||||
|
"packuswb %%xmm0,%%xmm0 \n"
|
||||||
|
"movd %%xmm0,%3 \n"
|
||||||
|
"mov %3,(%0,%1,1) \n"
|
||||||
|
"lea 0x8(%0),%0 \n"
|
||||||
|
"sub $0x2,%2 \n"
|
||||||
|
"jle 8f \n" // last1
|
||||||
|
"mov (%0),%3 \n"
|
||||||
|
"cmp $0xffffff,%3 \n"
|
||||||
|
"jbe 1b \n" // transparentloop
|
||||||
|
"cmp $0xff000000,%3 \n"
|
||||||
|
"jb 3b \n" // translucentloop
|
||||||
|
"jmp 2b \n" // opaqueloop
|
||||||
|
|
||||||
|
// last1
|
||||||
|
"8: \n"
|
||||||
|
"add $0x1,%2 \n" // 1 pixel left?
|
||||||
|
"cmp $0x1,%2 \n"
|
||||||
|
"jl 9f \n" // done
|
||||||
|
"mov (%0),%3 \n"
|
||||||
|
"movd %3,%%xmm0 \n"
|
||||||
|
"mov (%0,%1,1),%3 \n"
|
||||||
|
"movd %3,%%xmm1 \n"
|
||||||
|
"punpcklbw %%xmm0,%%xmm0 \n"
|
||||||
|
"punpcklbw %%xmm1,%%xmm1 \n"
|
||||||
|
"pshuflw $0xff,%%xmm0,%%xmm2 \n"
|
||||||
|
"movdqa %%xmm2,%%xmm3 \n"
|
||||||
|
"pxor %%xmm4,%%xmm3 \n"
|
||||||
|
"pmulhuw %%xmm2,%%xmm0 \n"
|
||||||
|
"pmulhuw %%xmm3,%%xmm1 \n"
|
||||||
|
"paddusw %%xmm1,%%xmm0 \n"
|
||||||
"psrlw $0x8,%%xmm0 \n"
|
"psrlw $0x8,%%xmm0 \n"
|
||||||
"packuswb %%xmm0,%%xmm0 \n"
|
"packuswb %%xmm0,%%xmm0 \n"
|
||||||
"movd %%xmm0,%3 \n"
|
"movd %%xmm0,%3 \n"
|
||||||
|
|||||||
@ -477,7 +477,6 @@ __asm {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO(fbarchard): Port to gcc
|
|
||||||
__declspec(naked)
|
__declspec(naked)
|
||||||
void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) {
|
void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) {
|
||||||
__asm {
|
__asm {
|
||||||
@ -1965,40 +1964,42 @@ void ARGBBlendRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) {
|
|||||||
mov edx, [esp + 4 + 8] // dst_argb
|
mov edx, [esp + 4 + 8] // dst_argb
|
||||||
mov ecx, [esp + 4 + 12] // width
|
mov ecx, [esp + 4 + 12] // width
|
||||||
pcmpeqb xmm4, xmm4 // generate 0xffffffff do negative alpha
|
pcmpeqb xmm4, xmm4 // generate 0xffffffff do negative alpha
|
||||||
|
pcmpeqb xmm5, xmm5 // generate 0xff000000 for alpha
|
||||||
|
pslld xmm5, 24
|
||||||
sub edx, esi
|
sub edx, esi
|
||||||
mov eax, [esi] // get first pixel
|
mov eax, [esi] // get first pixel
|
||||||
sub ecx, 1 // ensure there are at least 2 pixels
|
sub ecx, 1 // ensure there are at least 2 pixels
|
||||||
je last1 // last pixel?
|
jle last1 // last pixel?
|
||||||
cmp eax, 0xFF000000 // opaque?
|
cmp eax, 0xFF000000 // opaque?
|
||||||
jae opaqueloop
|
jae opaqueloop
|
||||||
cmp eax, 0x00FFFFFF // translucient?
|
cmp eax, 0x00FFFFFF // translucent?
|
||||||
ja translucientloop
|
ja translucentloop
|
||||||
|
|
||||||
align 16
|
align 16
|
||||||
transparentloop:
|
transparentloop:
|
||||||
sub ecx, 1
|
sub ecx, 1
|
||||||
lea esi, [esi + 4]
|
lea esi, [esi + 4]
|
||||||
je last1
|
jle last1
|
||||||
mov eax, [esi] // handle remaining pixel
|
mov eax, [esi] // get next pixel
|
||||||
cmp eax, 0x00FFFFFF // transparent?
|
cmp eax, 0x00FFFFFF // transparent?
|
||||||
jbe transparentloop
|
jbe transparentloop
|
||||||
cmp eax, 0xFF000000 // translucient?
|
cmp eax, 0xFF000000 // translucent?
|
||||||
jb translucientloop
|
jb translucentloop
|
||||||
|
|
||||||
align 16
|
align 16
|
||||||
opaqueloop:
|
opaqueloop:
|
||||||
mov dword ptr [esi + edx], eax
|
mov dword ptr [esi + edx], eax
|
||||||
lea esi, [esi + 4]
|
lea esi, [esi + 4]
|
||||||
sub ecx, 1
|
sub ecx, 1
|
||||||
je last1
|
jle last1
|
||||||
mov eax, [esi] // handle remaining pixel
|
mov eax, [esi] // get next pixel
|
||||||
cmp eax, 0xFF000000 // opaque?
|
cmp eax, 0xFF000000 // opaque?
|
||||||
jae opaqueloop
|
jae opaqueloop
|
||||||
cmp eax, 0x00FFFFFF // transparent?
|
cmp eax, 0x00FFFFFF // transparent?
|
||||||
jbe transparentloop
|
jbe transparentloop
|
||||||
|
|
||||||
align 16
|
align 16
|
||||||
translucientloop:
|
translucentloop:
|
||||||
movq xmm0, qword ptr [esi] // fetch 2 pixels
|
movq xmm0, qword ptr [esi] // fetch 2 pixels
|
||||||
movq xmm1, qword ptr [esi + edx]
|
movq xmm1, qword ptr [esi + edx]
|
||||||
punpcklbw xmm0, xmm0 // src 16 bits
|
punpcklbw xmm0, xmm0 // src 16 bits
|
||||||
@ -2009,39 +2010,42 @@ void ARGBBlendRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) {
|
|||||||
pxor xmm3, xmm4
|
pxor xmm3, xmm4
|
||||||
pmulhuw xmm0, xmm2 // src * a
|
pmulhuw xmm0, xmm2 // src * a
|
||||||
pmulhuw xmm1, xmm3 // dst * (a ^ 0xffff)
|
pmulhuw xmm1, xmm3 // dst * (a ^ 0xffff)
|
||||||
paddw xmm0, xmm1
|
paddusw xmm0, xmm1
|
||||||
psrlw xmm0, 8
|
psrlw xmm0, 8
|
||||||
packuswb xmm0, xmm0 // pack 2 pixels
|
packuswb xmm0, xmm0 // pack 2 pixels
|
||||||
|
por xmm0, xmm5 // set alpha
|
||||||
movq qword ptr [esi + edx], xmm0
|
movq qword ptr [esi + edx], xmm0
|
||||||
lea esi, [esi + 8]
|
lea esi, [esi + 8]
|
||||||
sub ecx, 2
|
sub ecx, 2
|
||||||
jbe last1
|
jle last1
|
||||||
mov eax, [esi] // handle remaining pixel
|
mov eax, [esi]
|
||||||
cmp eax, 0x00FFFFFF // transparent?
|
cmp eax, 0x00FFFFFF // transparent?
|
||||||
jbe transparentloop
|
jbe transparentloop
|
||||||
cmp eax, 0xFF000000 // translucient?
|
cmp eax, 0xFF000000 // translucent?
|
||||||
jb translucientloop
|
jb translucentloop
|
||||||
jmp opaqueloop
|
jmp opaqueloop
|
||||||
|
|
||||||
align 16
|
align 16
|
||||||
last1:
|
last1:
|
||||||
add ecx, 1
|
add ecx, 1
|
||||||
je done
|
cmp ecx, 1 // 1 left?
|
||||||
|
jl done
|
||||||
|
|
||||||
|
mov eax, [esi] // get next pixel
|
||||||
movd xmm0, eax
|
movd xmm0, eax
|
||||||
mov eax, [esi + edx]
|
mov eax, [esi + edx]
|
||||||
movd xmm1, eax
|
movd xmm1, eax
|
||||||
punpcklbw xmm0, xmm0 // src 16 bits
|
punpcklbw xmm0, xmm0 // src 16 bits
|
||||||
punpcklbw xmm1, xmm1 // dst 16 bits
|
punpcklbw xmm1, xmm1 // dst 16 bits
|
||||||
pshuflw xmm2, xmm0, 0xff // src alpha
|
pshuflw xmm2, xmm0, 0xff // src alpha
|
||||||
pshufhw xmm2, xmm2, 0xff
|
|
||||||
movdqa xmm3, xmm2 // dst alpha
|
movdqa xmm3, xmm2 // dst alpha
|
||||||
pxor xmm3, xmm4
|
pxor xmm3, xmm4
|
||||||
pmulhuw xmm0, xmm2 // src * a
|
pmulhuw xmm0, xmm2 // src * a
|
||||||
pmulhuw xmm1, xmm3 // dst * (a ^ 0xffff)
|
pmulhuw xmm1, xmm3 // dst * (a ^ 0xffff)
|
||||||
paddw xmm0, xmm1
|
paddusw xmm0, xmm1
|
||||||
psrlw xmm0, 8
|
psrlw xmm0, 8
|
||||||
packuswb xmm0, xmm0 // pack to bytes
|
packuswb xmm0, xmm0 // pack to bytes
|
||||||
|
por xmm0, xmm5 // set alpha
|
||||||
movd eax, xmm0
|
movd eax, xmm0
|
||||||
mov dword ptr [esi + edx], eax
|
mov dword ptr [esi + edx], eax
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user