mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 17:26:49 +08:00
I400ToARGB use 8.8 fixed point to avoid a shift. gcc generate constants to avoid fpic performance stall
BUG=none TEST=none Review URL: http://webrtc-codereview.appspot.com/322013 git-svn-id: http://libyuv.googlecode.com/svn/trunk@106 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
9cece4b198
commit
8b9759c4a7
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 104
|
||||
Version: 106
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -558,47 +558,49 @@ void OMITFP FastConvertYUV444ToARGBRow_SSSE3(const uint8* y_buf, // rdi
|
||||
#endif
|
||||
|
||||
#ifdef HAS_FASTCONVERTYTOARGBROW_SSE2
|
||||
|
||||
void FastConvertYToARGBRow_SSE2(const uint8* y_buf, // rdi
|
||||
uint8* rgb_buf, // rcx
|
||||
int width) { // r8
|
||||
asm volatile (
|
||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||
"pslld $0x18,%%xmm5 \n"
|
||||
"pxor %%xmm4,%%xmm4 \n"
|
||||
"movdqa %3,%%xmm3 \n"
|
||||
"movdqa %4,%%xmm2 \n"
|
||||
"pcmpeqb %%xmm4,%%xmm4 \n"
|
||||
"pslld $0x18,%%xmm4 \n"
|
||||
"mov $0x10001000,%%eax \n"
|
||||
"movd %%eax,%%xmm3 \n"
|
||||
"pshufd $0x0,%%xmm3,%%xmm3 \n"
|
||||
"mov $0x012a012a,%%eax \n"
|
||||
"movd %%eax,%%xmm2 \n"
|
||||
"pshufd $0x0,%%xmm2,%%xmm2 \n"
|
||||
|
||||
"1: \n"
|
||||
// Step 1: Scale Y contribution to 8 G values. G = (y - 16) * 1.164
|
||||
"movq (%0),%%xmm0 \n"
|
||||
"lea 0x8(%0),%0 \n"
|
||||
"punpcklbw %%xmm4,%%xmm0 \n"
|
||||
"psubsw %%xmm3,%%xmm0 \n"
|
||||
"pmullw %%xmm2,%%xmm0 \n"
|
||||
"psraw $0x6,%%xmm0 \n"
|
||||
"packuswb %%xmm0,%%xmm0 \n"
|
||||
// Step 1: Scale Y contribution to 8 G values. G = (y - 16) * 1.164
|
||||
"movq (%0),%%xmm0 \n"
|
||||
"lea 0x8(%0),%0 \n"
|
||||
"punpcklbw %%xmm0,%%xmm0 \n"
|
||||
"psubusw %%xmm3,%%xmm0 \n"
|
||||
"pmulhuw %%xmm2,%%xmm0 \n"
|
||||
"packuswb %%xmm0,%%xmm0 \n"
|
||||
|
||||
// Step 2: Weave into ARGB
|
||||
"punpcklbw %%xmm0,%%xmm0 \n"
|
||||
"movdqa %%xmm0,%%xmm1 \n"
|
||||
"punpcklwd %%xmm0,%%xmm0 \n"
|
||||
"por %%xmm5,%%xmm0 \n"
|
||||
"movdqa %%xmm0,(%1) \n"
|
||||
"punpckhwd %%xmm1,%%xmm1 \n"
|
||||
"por %%xmm5,%%xmm1 \n"
|
||||
"movdqa %%xmm1,16(%1) \n"
|
||||
"lea 32(%1),%1 \n"
|
||||
// Step 2: Weave into ARGB
|
||||
"punpcklbw %%xmm0,%%xmm0 \n"
|
||||
"movdqa %%xmm0,%%xmm1 \n"
|
||||
"punpcklwd %%xmm0,%%xmm0 \n"
|
||||
"punpckhwd %%xmm1,%%xmm1 \n"
|
||||
"por %%xmm4,%%xmm0 \n"
|
||||
"por %%xmm4,%%xmm1 \n"
|
||||
"movdqa %%xmm0,(%1) \n"
|
||||
"movdqa %%xmm1,16(%1) \n"
|
||||
"lea 32(%1),%1 \n"
|
||||
|
||||
"sub $0x8,%2 \n"
|
||||
"ja 1b \n"
|
||||
"sub $0x8,%2 \n"
|
||||
"ja 1b \n"
|
||||
: "+r"(y_buf), // %0
|
||||
"+r"(rgb_buf), // %1
|
||||
"+rm"(width) // %2
|
||||
: "m"(kYuvConstants.kYSub16), // %3
|
||||
"m"(kYuvConstants.kYToRgb) // %4
|
||||
: "memory", "cc"
|
||||
:
|
||||
: "memory", "cc", "eax"
|
||||
#if defined(__SSE2__)
|
||||
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
|
||||
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"
|
||||
#endif
|
||||
);
|
||||
}
|
||||
|
||||
@ -780,23 +780,25 @@ void FastConvertYToARGBRow_SSE2(const uint8* y_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) {
|
||||
__asm {
|
||||
pcmpeqb xmm4, xmm4 // generate mask 0xff000000
|
||||
pslld xmm4, 24
|
||||
mov eax,0x10001000
|
||||
movd xmm3,eax
|
||||
pshufd xmm3,xmm3,0
|
||||
mov eax,0x012a012a
|
||||
movd xmm2,eax
|
||||
pshufd xmm2,xmm2,0
|
||||
mov eax, [esp + 4] // Y
|
||||
mov edx, [esp + 8] // rgb
|
||||
mov ecx, [esp + 12] // width
|
||||
pcmpeqb xmm5, xmm5 // generate mask 0xff000000
|
||||
pslld xmm5, 24
|
||||
pxor xmm4, xmm4
|
||||
movdqa xmm3, kYSub16
|
||||
movdqa xmm2, kYToRgb
|
||||
|
||||
convertloop:
|
||||
// Step 1: Scale Y contribution to 8 G values. G = (y - 16) * 1.164
|
||||
movq xmm0, qword ptr [eax]
|
||||
movq xmm0, [eax]
|
||||
lea eax, [eax + 8]
|
||||
punpcklbw xmm0, xmm4
|
||||
psubsw xmm0, xmm3
|
||||
pmullw xmm0, xmm2
|
||||
psraw xmm0, 6
|
||||
punpcklbw xmm0, xmm0 // Y.Y
|
||||
psubusw xmm0, xmm3
|
||||
pmulhuw xmm0, xmm2
|
||||
packuswb xmm0, xmm0 // G
|
||||
|
||||
// Step 2: Weave into ARGB
|
||||
@ -804,8 +806,8 @@ void FastConvertYToARGBRow_SSE2(const uint8* y_buf,
|
||||
movdqa xmm1, xmm0
|
||||
punpcklwd xmm0, xmm0 // BGRA first 4 pixels
|
||||
punpckhwd xmm1, xmm1 // BGRA next 4 pixels
|
||||
por xmm0, xmm5
|
||||
por xmm1, xmm5
|
||||
por xmm0, xmm4
|
||||
por xmm1, xmm4
|
||||
movdqa [edx], xmm0
|
||||
movdqa [edx + 16], xmm1
|
||||
lea edx, [edx + 32]
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user