FastConvertYUVToARGBRow_SSSE3 use 2 pack and then 2 stores, which works better on Core2

BUG=none
TEST=none
Review URL: http://webrtc-codereview.appspot.com/323007

git-svn-id: http://libyuv.googlecode.com/svn/trunk@103 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
fbarchard@google.com 2011-12-13 02:49:22 +00:00
parent 19a248ab63
commit 3fe369661a
3 changed files with 5 additions and 5 deletions

View File

@ -1,6 +1,6 @@
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 102
Version: 103
License: BSD
License File: LICENSE

View File

@ -409,8 +409,8 @@ void OMITFP FastConvertYUVToARGBRow_SSSE3(const uint8* y_buf, // rdi
"punpcklbw %%xmm5,%%xmm2 \n"
"movdqa %%xmm0,%%xmm1 \n"
"punpcklwd %%xmm2,%%xmm0 \n"
"movdqa %%xmm0,(%3) \n"
"punpckhwd %%xmm2,%%xmm1 \n"
"movdqa %%xmm0,(%3) \n"
"movdqa %%xmm1,0x10(%3) \n"
"lea 0x20(%3),%3 \n"
"sub $0x8,%4 \n"

View File

@ -614,8 +614,8 @@ void FastConvertYUVToARGBRow_SSSE3(const uint8* y_buf,
punpcklbw xmm2, xmm5 // RA
movdqa xmm1, xmm0
punpcklwd xmm0, xmm2 // BGRA first 4 pixels
movdqa [edx], xmm0
punpckhwd xmm1, xmm2 // BGRA next 4 pixels
movdqa [edx], xmm0
movdqa [edx + 16], xmm1
lea edx, [edx + 32]
@ -803,10 +803,10 @@ void FastConvertYToARGBRow_SSE2(const uint8* y_buf,
punpcklbw xmm0, xmm0 // GG
movdqa xmm1, xmm0
punpcklwd xmm0, xmm0 // BGRA first 4 pixels
por xmm0, xmm5
movdqa [edx], xmm0
punpckhwd xmm1, xmm1 // BGRA next 4 pixels
por xmm0, xmm5
por xmm1, xmm5
movdqa [edx], xmm0
movdqa [edx + 16], xmm1
lea edx, [edx + 32]