mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
alphablend test for opaque and transparent
BUG=none TEST=none Review URL: https://webrtc-codereview.appspot.com/436005 git-svn-id: http://libyuv.googlecode.com/svn/trunk@205 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
9198f3754b
commit
fe9f4ad808
@ -162,12 +162,6 @@ int ARGBBlend(const uint8* src_argb, int src_stride_argb,
|
|||||||
ARGBBlendRow = ARGBBlendRow_SSE2;
|
ARGBBlendRow = ARGBBlendRow_SSE2;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_ARGBBLENDROW_SSSE3)
|
|
||||||
if (TestCpuFlag(kCpuHasSSSE3) &&
|
|
||||||
IS_ALIGNED(width, 2)) {
|
|
||||||
ARGBBlendRow = ARGBBlendRow_SSSE3;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
for (int y = 0; y < height; ++y) {
|
for (int y = 0; y < height; ++y) {
|
||||||
ARGBBlendRow(src_argb, dst_argb, width);
|
ARGBBlendRow(src_argb, dst_argb, width);
|
||||||
|
|||||||
@ -996,6 +996,7 @@ void RotateUV270(const uint8* src, int src_stride,
|
|||||||
width, height);
|
width, height);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Rotate 180 is a horizontal and vertical flip.
|
||||||
void RotateUV180(const uint8* src, int src_stride,
|
void RotateUV180(const uint8* src, int src_stride,
|
||||||
uint8* dst_a, int dst_stride_a,
|
uint8* dst_a, int dst_stride_a,
|
||||||
uint8* dst_b, int dst_stride_b,
|
uint8* dst_b, int dst_stride_b,
|
||||||
|
|||||||
@ -65,7 +65,6 @@ extern "C" {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(_MSC_VER)
|
#if defined(_MSC_VER)
|
||||||
#define HAS_ARGBBLENDROW_SSSE3
|
|
||||||
#define HAS_ARGBBLENDROW_SSE2
|
#define HAS_ARGBBLENDROW_SSE2
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -244,7 +243,6 @@ void YToARGBRow_SSE2(const uint8* y_buf,
|
|||||||
uint8* rgb_buf,
|
uint8* rgb_buf,
|
||||||
int width);
|
int width);
|
||||||
|
|
||||||
void ARGBBlendRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width);
|
|
||||||
void ARGBBlendRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width);
|
void ARGBBlendRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width);
|
||||||
void ARGBBlendRow_C(const uint8* src_argb, uint8* dst_argb, int width);
|
void ARGBBlendRow_C(const uint8* src_argb, uint8* dst_argb, int width);
|
||||||
|
|
||||||
|
|||||||
@ -514,76 +514,6 @@ void ARGBBlendRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#if 0
|
|
||||||
void ARGBBlendRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
|
|
||||||
for (int x = 0; x < width - 1; x += 2) {
|
|
||||||
uint32 f = *(uint32*)src_argb;
|
|
||||||
uint32 a = f >> 24;
|
|
||||||
if (a) {
|
|
||||||
const uint32 b = *(uint32*)dst_argb;
|
|
||||||
if (a < 255) {
|
|
||||||
const uint32 src_rb = f & 0x00ff00ff;
|
|
||||||
const uint32 dst_rb = b & 0x00ff00ff;
|
|
||||||
const uint32 out_rb = (src_rb * a + dst_rb * (a ^ 0xff) + 0x00800080) &
|
|
||||||
0xff00ff00;
|
|
||||||
|
|
||||||
const uint32 src_g = f & 0x0000ff00;
|
|
||||||
const uint32 dst_g = b & 0x0000ff00;
|
|
||||||
const uint32 out_g = ((src_g * a + dst_g * (a ^ 0xff) + 0x00008000) &
|
|
||||||
0x00ff0000);
|
|
||||||
|
|
||||||
f = ((out_rb | out_g) >> 8) | 0xff000000;
|
|
||||||
}
|
|
||||||
*(uint32*)dst_argb = f;
|
|
||||||
}
|
|
||||||
|
|
||||||
f = *(uint32*)(src_argb + 4);
|
|
||||||
a = f >> 24;
|
|
||||||
if (a) {
|
|
||||||
const uint32 b = *(uint32*)(dst_argb + 4);
|
|
||||||
if (a < 255) {
|
|
||||||
const uint32 src_rb = f & 0x00ff00ff;
|
|
||||||
const uint32 dst_rb = b & 0x00ff00ff;
|
|
||||||
const uint32 out_rb = (src_rb * a + dst_rb * (a ^ 0xff) + 0x00800080) &
|
|
||||||
0xff00ff00;
|
|
||||||
|
|
||||||
const uint32 src_g = f & 0x0000ff00;
|
|
||||||
const uint32 dst_g = b & 0x0000ff00;
|
|
||||||
const uint32 out_g = ((src_g * a + dst_g * (a ^ 0xff) + 0x00008000) &
|
|
||||||
0x00ff0000);
|
|
||||||
|
|
||||||
f = ((out_rb | out_g) >> 8) | 0xff000000;
|
|
||||||
}
|
|
||||||
*(uint32*)(dst_argb + 4) = f;
|
|
||||||
}
|
|
||||||
src_argb += 8;
|
|
||||||
dst_argb += 8;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (width & 1) {
|
|
||||||
uint32 f = *(uint32*)src_argb;
|
|
||||||
uint32 a = f >> 24;
|
|
||||||
if (a) {
|
|
||||||
const uint32 b = *(uint32*)dst_argb;
|
|
||||||
if (a < 255) {
|
|
||||||
const uint32 src_rb = f & 0x00ff00ff;
|
|
||||||
const uint32 dst_rb = b & 0x00ff00ff;
|
|
||||||
const uint32 out_rb = (src_rb * a + dst_rb * (a ^ 0xff) + 0x00800080) &
|
|
||||||
0xff00ff00;
|
|
||||||
|
|
||||||
const uint32 src_g = f & 0x0000ff00;
|
|
||||||
const uint32 dst_g = b & 0x0000ff00;
|
|
||||||
const uint32 out_g = ((src_g * a + dst_g * (a ^ 0xff) + 0x00008000) &
|
|
||||||
0x00ff0000);
|
|
||||||
|
|
||||||
f = ((out_rb | out_g) >> 8) | 0xff000000;
|
|
||||||
}
|
|
||||||
*(uint32*)dst_argb = f;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Wrappers to handle odd sizes/alignments
|
// Wrappers to handle odd sizes/alignments
|
||||||
#define MAKEYUVANY(NAMEANY, NAME, COPYROW) \
|
#define MAKEYUVANY(NAMEANY, NAME, COPYROW) \
|
||||||
void NAMEANY(const uint8* y_buf, \
|
void NAMEANY(const uint8* y_buf, \
|
||||||
|
|||||||
@ -1909,75 +1909,55 @@ void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy,
|
|||||||
}
|
}
|
||||||
#endif // HAS_YUY2TOYROW_SSE2
|
#endif // HAS_YUY2TOYROW_SSE2
|
||||||
|
|
||||||
|
|
||||||
#ifdef HAS_ARGBBLENDROW_SSSE3
|
|
||||||
// Shuffle table for copying alpha
|
|
||||||
static const uvec8 kShuffleAlpha = {
|
|
||||||
7u, 7u, 7u, 7u, 7u, 7u, 0x80, 0x80, 15u, 15u, 15u, 15u, 15u, 15u, 0x80, 0x80
|
|
||||||
};
|
|
||||||
|
|
||||||
__declspec(naked)
|
|
||||||
void ARGBBlendRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
|
|
||||||
__asm {
|
|
||||||
mov eax, 0x00200020 // rounding constant for 8.6 fixed point
|
|
||||||
movd xmm3, eax
|
|
||||||
pshufd xmm3, xmm3, 0
|
|
||||||
mov eax, 0x3f3f3f3f // mask for alpha
|
|
||||||
movd xmm7, eax
|
|
||||||
pshufd xmm7, xmm7, 0
|
|
||||||
movdqa xmm4, kShuffleAlpha
|
|
||||||
pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
|
|
||||||
psrlw xmm5, 8
|
|
||||||
pcmpeqb xmm6, xmm6 // generate 0x00010001 for negating
|
|
||||||
psrlw xmm6, 15
|
|
||||||
mov eax, [esp + 4] // src_argb
|
|
||||||
mov edx, [esp + 8] // dst_argb
|
|
||||||
mov ecx, [esp + 12] // width
|
|
||||||
sub edx, eax
|
|
||||||
|
|
||||||
convertloop:
|
|
||||||
movq xmm0, qword ptr [eax] // fetch 2 pixels
|
|
||||||
movq xmm1, qword ptr [eax + edx]
|
|
||||||
punpcklbw xmm1, xmm0 // mix 2 pixels aArRgGbB_aArRgGbB
|
|
||||||
movdqa xmm2, xmm1 // alpha from byte 7 and 15
|
|
||||||
pshufb xmm2, xmm4
|
|
||||||
pxor xmm2, xmm5
|
|
||||||
psrlw xmm2, 2
|
|
||||||
pand xmm2, xmm7
|
|
||||||
paddw xmm2, xmm6 // -a = (a^255)+1
|
|
||||||
pmaddubsw xmm1, xmm2
|
|
||||||
paddw xmm1, xmm3 // round
|
|
||||||
psrlw xmm1, 6
|
|
||||||
|
|
||||||
packuswb xmm1, xmm1 // pack 2 pixels
|
|
||||||
sub ecx, 2
|
|
||||||
movq qword ptr [eax + edx], xmm1
|
|
||||||
lea eax, [eax + 8]
|
|
||||||
ja convertloop
|
|
||||||
|
|
||||||
ret
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif // HAS_ARGBBLENDROW_SSSE3
|
|
||||||
|
|
||||||
#ifdef HAS_ARGBBLENDROW_SSE2
|
#ifdef HAS_ARGBBLENDROW_SSE2
|
||||||
// TODO(fbarchard): Single multiply method b+a(f-b)
|
// TODO(fbarchard): Single multiply method b+a(f-b)
|
||||||
// TODO(fbarchard): Unroll and pair
|
// TODO(fbarchard): Unroll and pair
|
||||||
// TODO(fbarchard): Test for transparent and opaque common cases
|
// TODO(fbarchard): Port to gcc
|
||||||
__declspec(naked)
|
__declspec(naked)
|
||||||
void ARGBBlendRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) {
|
void ARGBBlendRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) {
|
||||||
__asm {
|
__asm {
|
||||||
|
push esi
|
||||||
|
mov esi, [esp + 4 + 4] // src_argb
|
||||||
|
mov edx, [esp + 4 + 8] // dst_argb
|
||||||
|
mov ecx, [esp + 4 + 12] // width
|
||||||
pcmpeqb xmm4, xmm4 // generate 0xffffffff do negative alpha
|
pcmpeqb xmm4, xmm4 // generate 0xffffffff do negative alpha
|
||||||
mov eax, [esp + 4] // src_argb
|
sub edx, esi
|
||||||
mov edx, [esp + 8] // dst_argb
|
|
||||||
mov ecx, [esp + 12] // width
|
mov eax, [esi] // get first pixel
|
||||||
sub edx, eax
|
sub ecx, 1 // ensure there are at least 2 pixels
|
||||||
|
je last1 // last pixel?
|
||||||
|
cmp eax, 0xFF000000 // opaque?
|
||||||
|
jae opaqueloop
|
||||||
|
cmp eax, 0x00FFFFFF // translucient?
|
||||||
|
ja translucientloop
|
||||||
|
|
||||||
|
align 16
|
||||||
|
transparentloop:
|
||||||
|
sub ecx, 1
|
||||||
|
lea esi, [esi + 4]
|
||||||
|
je last1
|
||||||
|
mov eax, [esi] // handle remaining pixel
|
||||||
|
cmp eax, 0x00FFFFFF // transparent?
|
||||||
|
jbe transparentloop
|
||||||
|
cmp eax, 0xFF000000 // translucient?
|
||||||
|
jb translucientloop
|
||||||
|
|
||||||
|
align 16
|
||||||
|
opaqueloop:
|
||||||
|
mov dword ptr [esi + edx], eax
|
||||||
|
lea esi, [esi + 4]
|
||||||
sub ecx, 1
|
sub ecx, 1
|
||||||
je last1
|
je last1
|
||||||
|
mov eax, [esi] // handle remaining pixel
|
||||||
|
cmp eax, 0xFF000000 // opaque?
|
||||||
|
jae opaqueloop
|
||||||
|
cmp eax, 0x00FFFFFF // transparent?
|
||||||
|
jbe transparentloop
|
||||||
|
|
||||||
convertloop:
|
align 4
|
||||||
movq xmm0, qword ptr [eax] // fetch 2 pixels
|
translucientloop:
|
||||||
movq xmm1, qword ptr [eax + edx]
|
movq xmm0, qword ptr [esi] // fetch 2 pixels
|
||||||
|
movq xmm1, qword ptr [esi + edx]
|
||||||
punpcklbw xmm0, xmm0 // src 16 bits
|
punpcklbw xmm0, xmm0 // src 16 bits
|
||||||
punpcklbw xmm1, xmm1 // dst 16 bits
|
punpcklbw xmm1, xmm1 // dst 16 bits
|
||||||
pshuflw xmm2, xmm0, 0xff // src alpha
|
pshuflw xmm2, xmm0, 0xff // src alpha
|
||||||
@ -1989,19 +1969,25 @@ void ARGBBlendRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) {
|
|||||||
paddw xmm0, xmm1
|
paddw xmm0, xmm1
|
||||||
psrlw xmm0, 8
|
psrlw xmm0, 8
|
||||||
packuswb xmm0, xmm0 // pack 2 pixels
|
packuswb xmm0, xmm0 // pack 2 pixels
|
||||||
|
movq qword ptr [esi + edx], xmm0
|
||||||
|
lea esi, [esi + 8]
|
||||||
sub ecx, 2
|
sub ecx, 2
|
||||||
movq qword ptr [eax + edx], xmm0
|
jbe last1
|
||||||
lea eax, [eax + 8]
|
mov eax, [esi] // handle remaining pixel
|
||||||
ja convertloop
|
cmp eax, 0x00FFFFFF // transparent?
|
||||||
|
jbe transparentloop
|
||||||
|
cmp eax, 0xFF000000 // translucient?
|
||||||
|
jb translucientloop
|
||||||
|
jmp opaqueloop
|
||||||
|
|
||||||
|
align 4
|
||||||
last1:
|
last1:
|
||||||
add ecx, 1
|
add ecx, 1
|
||||||
je done
|
je done
|
||||||
|
|
||||||
mov ecx, [eax] // handle remaining pixel
|
movd xmm0, eax
|
||||||
movd xmm0, ecx
|
mov eax, [esi + edx]
|
||||||
mov ecx, [eax + edx]
|
movd xmm1, eax
|
||||||
movd xmm1, ecx
|
|
||||||
punpcklbw xmm0, xmm0 // src 16 bits
|
punpcklbw xmm0, xmm0 // src 16 bits
|
||||||
punpcklbw xmm1, xmm1 // dst 16 bits
|
punpcklbw xmm1, xmm1 // dst 16 bits
|
||||||
pshuflw xmm2, xmm0, 0xff // src alpha
|
pshuflw xmm2, xmm0, 0xff // src alpha
|
||||||
@ -2012,17 +1998,16 @@ void ARGBBlendRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) {
|
|||||||
pmulhuw xmm1, xmm3 // dst * (a ^ 0xffff)
|
pmulhuw xmm1, xmm3 // dst * (a ^ 0xffff)
|
||||||
paddw xmm0, xmm1
|
paddw xmm0, xmm1
|
||||||
psrlw xmm0, 8
|
psrlw xmm0, 8
|
||||||
packuswb xmm0, xmm0 // pack 2 pixels
|
packuswb xmm0, xmm0 // pack to bytes
|
||||||
|
movd eax, xmm0
|
||||||
movd ecx, xmm0
|
mov dword ptr [esi + edx], eax
|
||||||
mov dword ptr [eax + edx], ecx
|
|
||||||
|
|
||||||
done:
|
done:
|
||||||
|
pop esi
|
||||||
ret
|
ret
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif // HAS_ARGBBLENDROW_SSSE3
|
#endif // HAS_ARGBBLENDROW_SSE2
|
||||||
|
|
||||||
#endif // _M_IX86
|
#endif // _M_IX86
|
||||||
|
|
||||||
|
|||||||
@ -1702,18 +1702,18 @@ static void ScaleAddRows_SSE2(const uint8* src_ptr, int src_stride,
|
|||||||
intptr_t tmp_src_stride = static_cast<intptr_t>(src_stride);
|
intptr_t tmp_src_stride = static_cast<intptr_t>(src_stride);
|
||||||
asm volatile (
|
asm volatile (
|
||||||
"pxor %%xmm4,%%xmm4 \n"
|
"pxor %%xmm4,%%xmm4 \n"
|
||||||
"sub $0x1,%6 \n"
|
"sub $0x1,%5 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movdqa (%0),%%xmm0 \n"
|
"movdqa (%0),%%xmm0 \n"
|
||||||
"mov %0,%3 \n"
|
"mov %0,%3 \n"
|
||||||
"add %4,%0 \n"
|
"add %6,%0 \n"
|
||||||
"movdqa %%xmm0,%%xmm1 \n"
|
"movdqa %%xmm0,%%xmm1 \n"
|
||||||
"punpcklbw %%xmm4,%%xmm0 \n"
|
"punpcklbw %%xmm4,%%xmm0 \n"
|
||||||
"punpckhbw %%xmm4,%%xmm1 \n"
|
"punpckhbw %%xmm4,%%xmm1 \n"
|
||||||
"mov %6,%2 \n"
|
"mov %5,%2 \n"
|
||||||
"2: \n"
|
"2: \n"
|
||||||
"movdqa (%0),%%xmm2 \n"
|
"movdqa (%0),%%xmm2 \n"
|
||||||
"add %4,%0 \n"
|
"add %6,%0 \n"
|
||||||
"movdqa %%xmm2,%%xmm3 \n"
|
"movdqa %%xmm2,%%xmm3 \n"
|
||||||
"punpcklbw %%xmm4,%%xmm2 \n"
|
"punpcklbw %%xmm4,%%xmm2 \n"
|
||||||
"punpckhbw %%xmm4,%%xmm3 \n"
|
"punpckhbw %%xmm4,%%xmm3 \n"
|
||||||
@ -1725,16 +1725,15 @@ static void ScaleAddRows_SSE2(const uint8* src_ptr, int src_stride,
|
|||||||
"movdqa %%xmm1,0x10(%1) \n"
|
"movdqa %%xmm1,0x10(%1) \n"
|
||||||
"lea 0x10(%3),%0 \n"
|
"lea 0x10(%3),%0 \n"
|
||||||
"lea 0x20(%1),%1 \n"
|
"lea 0x20(%1),%1 \n"
|
||||||
"sub $0x10,%5 \n"
|
"sub $0x10,%4 \n"
|
||||||
"ja 1b \n"
|
"ja 1b \n"
|
||||||
: "+r"(src_ptr), // %0
|
: "+r"(src_ptr), // %0
|
||||||
"+r"(dst_ptr), // %1
|
"+r"(dst_ptr), // %1
|
||||||
"+r"(tmp_height), // %2
|
"+r"(tmp_height), // %2
|
||||||
"+r"(tmp_src), // %3
|
"+r"(tmp_src), // %3
|
||||||
"+rm"(tmp_src_stride), // %4
|
"+rm"(src_width), // %4
|
||||||
"+rm"(src_width), // %5
|
"+rm"(src_height) // %5
|
||||||
"+rm"(src_height) // %6
|
: "+rm"(tmp_src_stride), // %6
|
||||||
:
|
|
||||||
: "memory", "cc"
|
: "memory", "cc"
|
||||||
#if defined(__SSE2__)
|
#if defined(__SSE2__)
|
||||||
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"
|
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user