mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2026-01-01 03:12:16 +08:00
alpha blend last pixel fix
BUG=none TEST=none Review URL: https://webrtc-codereview.appspot.com/439008 git-svn-id: http://libyuv.googlecode.com/svn/trunk@210 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
90310ddbb9
commit
976423feca
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 209
|
||||
Version: 210
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -11,7 +11,7 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 209
|
||||
#define LIBYUV_VERSION 210
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
|
||||
@ -218,10 +218,10 @@ void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
|
||||
// loop will run one extra time.
|
||||
"sub %2, #16 \n"
|
||||
|
||||
// mirror the bytes in the 64 bit segments. unable to mirror
|
||||
// mirror the bytes in the 64 bit segments. unable to mirror
|
||||
// the bytes in the entire 128 bits in one go.
|
||||
// because of the inability to mirror the entire 128 bits
|
||||
// mirror the writing out of the two 64 bit segments.
|
||||
// mirror the writing out of the two 64 bit segments.
|
||||
"1: \n"
|
||||
"vld1.8 {q0}, [%0]! \n" // src += 16
|
||||
"vrev64.8 q0, q0 \n"
|
||||
|
||||
@ -1931,29 +1931,29 @@ void ARGBBlendRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) {
|
||||
"sub %0,%1 \n"
|
||||
"mov (%0),%3 \n"
|
||||
"sub $0x1,%2 \n"
|
||||
"je 8f \n" // last1
|
||||
"jle 8f \n" // last1
|
||||
"cmp $0xff000000,%3 \n"
|
||||
"jae 2f \n" // opaqueloop
|
||||
"cmp $0xffffff,%3 \n"
|
||||
"ja 3f \n" // translucientloop
|
||||
"ja 3f \n" // translucentloop
|
||||
|
||||
// transparentloop
|
||||
"1: \n"
|
||||
"sub $0x1,%2 \n"
|
||||
"lea 0x4(%0),%0 \n"
|
||||
"je 8f \n" // last1
|
||||
"jle 8f \n" // last1
|
||||
"mov (%0),%3 \n"
|
||||
"cmp $0xffffff,%3 \n"
|
||||
"jbe 1b \n" // transparentloop
|
||||
"cmp $0xff000000,%3 \n"
|
||||
"jb 3f \n" // translucientloop
|
||||
"jb 3f \n" // translucentloop
|
||||
|
||||
// opaqueloop
|
||||
"2: \n"
|
||||
"mov %3,(%0,%1,1) \n"
|
||||
"lea 0x4(%0),%0 \n"
|
||||
"sub $0x1,%2 \n"
|
||||
"je 8f \n" // last1
|
||||
"jle 8f \n" // last1
|
||||
"mov (%0),%3 \n"
|
||||
"cmp $0xff000000,%3 \n"
|
||||
"jae 2b \n" // opaqueloop
|
||||
@ -1961,48 +1961,50 @@ void ARGBBlendRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) {
|
||||
"jbe 1b \n" // transparentloop
|
||||
"nop \n"
|
||||
|
||||
// translucientloop
|
||||
// translucentloop
|
||||
"3: \n"
|
||||
"movq (%0),%%xmm0 \n"
|
||||
"movq (%0,%1,1),%%xmm1 \n"
|
||||
"punpcklbw %%xmm0,%%xmm0 \n"
|
||||
"punpcklbw %%xmm1,%%xmm1 \n"
|
||||
"pshuflw $0xff,%%xmm0,%%xmm2 \n"
|
||||
"pshufhw $0xff,%%xmm2,%%xmm2 \n"
|
||||
"movdqa %%xmm2,%%xmm3 \n"
|
||||
"pxor %%xmm4,%%xmm3 \n"
|
||||
"pmulhuw %%xmm2,%%xmm0 \n"
|
||||
"pmulhuw %%xmm3,%%xmm1 \n"
|
||||
"paddw %%xmm1,%%xmm0 \n"
|
||||
"psrlw $0x8,%%xmm0 \n"
|
||||
"packuswb %%xmm0,%%xmm0 \n"
|
||||
"movq %%xmm0,(%0,%1,1) \n"
|
||||
"lea 0x8(%0),%0 \n"
|
||||
"sub $0x2,%2 \n"
|
||||
"jbe 8f \n" // last1
|
||||
"mov (%0),%3 \n"
|
||||
"cmp $0xffffff,%3 \n"
|
||||
"jbe 1b \n" // transparentloop
|
||||
"cmp $0xff000000,%3 \n"
|
||||
"jb 3b \n" // translucientloop
|
||||
"jmp 2b \n" // opaqueloop
|
||||
|
||||
// last1
|
||||
"8: \n"
|
||||
"add $0x1,%2 \n"
|
||||
"je 9f \n" // done
|
||||
"movd %3,%%xmm0 \n"
|
||||
"mov (%0,%1,1),%3 \n"
|
||||
"movd %3,%%xmm1 \n"
|
||||
"punpcklbw %%xmm0,%%xmm0 \n"
|
||||
"punpcklbw %%xmm1,%%xmm1 \n"
|
||||
"pshuflw $0xff,%%xmm0,%%xmm2 \n"
|
||||
"pshufhw $0xff,%%xmm2,%%xmm2 \n"
|
||||
"movdqa %%xmm2,%%xmm3 \n"
|
||||
"pxor %%xmm4,%%xmm3 \n"
|
||||
"pmulhuw %%xmm2,%%xmm0 \n"
|
||||
"pmulhuw %%xmm3,%%xmm1 \n"
|
||||
"paddw %%xmm1,%%xmm0 \n"
|
||||
"paddusw %%xmm1,%%xmm0 \n"
|
||||
"psrlw $0x8,%%xmm0 \n"
|
||||
"packuswb %%xmm0,%%xmm0 \n"
|
||||
"movd %%xmm0,%3 \n"
|
||||
"mov %3,(%0,%1,1) \n"
|
||||
"lea 0x8(%0),%0 \n"
|
||||
"sub $0x2,%2 \n"
|
||||
"jle 8f \n" // last1
|
||||
"mov (%0),%3 \n"
|
||||
"cmp $0xffffff,%3 \n"
|
||||
"jbe 1b \n" // transparentloop
|
||||
"cmp $0xff000000,%3 \n"
|
||||
"jb 3b \n" // translucentloop
|
||||
"jmp 2b \n" // opaqueloop
|
||||
|
||||
// last1
|
||||
"8: \n"
|
||||
"add $0x1,%2 \n" // 1 pixel left?
|
||||
"cmp $0x1,%2 \n"
|
||||
"jl 9f \n" // done
|
||||
"mov (%0),%3 \n"
|
||||
"movd %3,%%xmm0 \n"
|
||||
"mov (%0,%1,1),%3 \n"
|
||||
"movd %3,%%xmm1 \n"
|
||||
"punpcklbw %%xmm0,%%xmm0 \n"
|
||||
"punpcklbw %%xmm1,%%xmm1 \n"
|
||||
"pshuflw $0xff,%%xmm0,%%xmm2 \n"
|
||||
"movdqa %%xmm2,%%xmm3 \n"
|
||||
"pxor %%xmm4,%%xmm3 \n"
|
||||
"pmulhuw %%xmm2,%%xmm0 \n"
|
||||
"pmulhuw %%xmm3,%%xmm1 \n"
|
||||
"paddusw %%xmm1,%%xmm0 \n"
|
||||
"psrlw $0x8,%%xmm0 \n"
|
||||
"packuswb %%xmm0,%%xmm0 \n"
|
||||
"movd %%xmm0,%3 \n"
|
||||
|
||||
@ -477,7 +477,6 @@ __asm {
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(fbarchard): Port to gcc
|
||||
__declspec(naked)
|
||||
void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) {
|
||||
__asm {
|
||||
@ -1965,40 +1964,42 @@ void ARGBBlendRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) {
|
||||
mov edx, [esp + 4 + 8] // dst_argb
|
||||
mov ecx, [esp + 4 + 12] // width
|
||||
pcmpeqb xmm4, xmm4 // generate 0xffffffff do negative alpha
|
||||
pcmpeqb xmm5, xmm5 // generate 0xff000000 for alpha
|
||||
pslld xmm5, 24
|
||||
sub edx, esi
|
||||
mov eax, [esi] // get first pixel
|
||||
sub ecx, 1 // ensure there are at least 2 pixels
|
||||
je last1 // last pixel?
|
||||
jle last1 // last pixel?
|
||||
cmp eax, 0xFF000000 // opaque?
|
||||
jae opaqueloop
|
||||
cmp eax, 0x00FFFFFF // translucient?
|
||||
ja translucientloop
|
||||
cmp eax, 0x00FFFFFF // translucent?
|
||||
ja translucentloop
|
||||
|
||||
align 16
|
||||
transparentloop:
|
||||
sub ecx, 1
|
||||
lea esi, [esi + 4]
|
||||
je last1
|
||||
mov eax, [esi] // handle remaining pixel
|
||||
jle last1
|
||||
mov eax, [esi] // get next pixel
|
||||
cmp eax, 0x00FFFFFF // transparent?
|
||||
jbe transparentloop
|
||||
cmp eax, 0xFF000000 // translucient?
|
||||
jb translucientloop
|
||||
cmp eax, 0xFF000000 // translucent?
|
||||
jb translucentloop
|
||||
|
||||
align 16
|
||||
opaqueloop:
|
||||
mov dword ptr [esi + edx], eax
|
||||
lea esi, [esi + 4]
|
||||
sub ecx, 1
|
||||
je last1
|
||||
mov eax, [esi] // handle remaining pixel
|
||||
jle last1
|
||||
mov eax, [esi] // get next pixel
|
||||
cmp eax, 0xFF000000 // opaque?
|
||||
jae opaqueloop
|
||||
cmp eax, 0x00FFFFFF // transparent?
|
||||
jbe transparentloop
|
||||
|
||||
align 16
|
||||
translucientloop:
|
||||
translucentloop:
|
||||
movq xmm0, qword ptr [esi] // fetch 2 pixels
|
||||
movq xmm1, qword ptr [esi + edx]
|
||||
punpcklbw xmm0, xmm0 // src 16 bits
|
||||
@ -2009,39 +2010,42 @@ void ARGBBlendRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) {
|
||||
pxor xmm3, xmm4
|
||||
pmulhuw xmm0, xmm2 // src * a
|
||||
pmulhuw xmm1, xmm3 // dst * (a ^ 0xffff)
|
||||
paddw xmm0, xmm1
|
||||
paddusw xmm0, xmm1
|
||||
psrlw xmm0, 8
|
||||
packuswb xmm0, xmm0 // pack 2 pixels
|
||||
por xmm0, xmm5 // set alpha
|
||||
movq qword ptr [esi + edx], xmm0
|
||||
lea esi, [esi + 8]
|
||||
sub ecx, 2
|
||||
jbe last1
|
||||
mov eax, [esi] // handle remaining pixel
|
||||
jle last1
|
||||
mov eax, [esi]
|
||||
cmp eax, 0x00FFFFFF // transparent?
|
||||
jbe transparentloop
|
||||
cmp eax, 0xFF000000 // translucient?
|
||||
jb translucientloop
|
||||
cmp eax, 0xFF000000 // translucent?
|
||||
jb translucentloop
|
||||
jmp opaqueloop
|
||||
|
||||
align 16
|
||||
last1:
|
||||
add ecx, 1
|
||||
je done
|
||||
cmp ecx, 1 // 1 left?
|
||||
jl done
|
||||
|
||||
mov eax, [esi] // get next pixel
|
||||
movd xmm0, eax
|
||||
mov eax, [esi + edx]
|
||||
movd xmm1, eax
|
||||
punpcklbw xmm0, xmm0 // src 16 bits
|
||||
punpcklbw xmm1, xmm1 // dst 16 bits
|
||||
pshuflw xmm2, xmm0, 0xff // src alpha
|
||||
pshufhw xmm2, xmm2, 0xff
|
||||
movdqa xmm3, xmm2 // dst alpha
|
||||
pxor xmm3, xmm4
|
||||
pmulhuw xmm0, xmm2 // src * a
|
||||
pmulhuw xmm1, xmm3 // dst * (a ^ 0xffff)
|
||||
paddw xmm0, xmm1
|
||||
paddusw xmm0, xmm1
|
||||
psrlw xmm0, 8
|
||||
packuswb xmm0, xmm0 // pack to bytes
|
||||
por xmm0, xmm5 // set alpha
|
||||
movd eax, xmm0
|
||||
mov dword ptr [esi + edx], eax
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user