mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 17:26:49 +08:00
address munge for rowreverse. And computer green mask based on red mask to save one shift.
BUG=none TEST=none Review URL: https://webrtc-codereview.appspot.com/363001 git-svn-id: http://libyuv.googlecode.com/svn/trunk@144 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
d1943b39e5
commit
0e6ce93c84
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 143
|
||||
Version: 144
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -655,14 +655,13 @@ void ReverseRow_SSSE3(const uint8* src, uint8* dst, int width) {
|
||||
intptr_t temp_width = static_cast<intptr_t>(width);
|
||||
asm volatile (
|
||||
"movdqa %3,%%xmm5 \n"
|
||||
"lea -0x10(%0,%2,1),%0 \n"
|
||||
"lea -0x10(%0),%0 \n"
|
||||
"1: \n"
|
||||
"movdqa (%0),%%xmm0 \n"
|
||||
"lea -0x10(%0),%0 \n"
|
||||
"movdqa (%0,%2),%%xmm0 \n"
|
||||
"pshufb %%xmm5,%%xmm0 \n"
|
||||
"sub $0x10,%2 \n"
|
||||
"movdqa %%xmm0,(%1) \n"
|
||||
"lea 0x10(%1),%1 \n"
|
||||
"sub $0x10,%2 \n"
|
||||
"ja 1b \n"
|
||||
: "+r"(src), // %0
|
||||
"+r"(dst), // %1
|
||||
@ -681,10 +680,9 @@ void ReverseRow_SSSE3(const uint8* src, uint8* dst, int width) {
|
||||
void ReverseRow_SSE2(const uint8* src, uint8* dst, int width) {
|
||||
intptr_t temp_width = static_cast<intptr_t>(width);
|
||||
asm volatile (
|
||||
"lea -0x10(%0,%2,1),%0 \n"
|
||||
"lea -0x10(%0),%0 \n"
|
||||
"1: \n"
|
||||
"movdqa (%0),%%xmm0 \n"
|
||||
"lea -0x10(%0),%0 \n"
|
||||
"movdqa (%0,%2),%%xmm0 \n"
|
||||
"movdqa %%xmm0,%%xmm1 \n"
|
||||
"psllw $0x8,%%xmm0 \n"
|
||||
"psrlw $0x8,%%xmm1 \n"
|
||||
@ -692,9 +690,9 @@ void ReverseRow_SSE2(const uint8* src, uint8* dst, int width) {
|
||||
"pshuflw $0x1b,%%xmm0,%%xmm0 \n"
|
||||
"pshufhw $0x1b,%%xmm0,%%xmm0 \n"
|
||||
"pshufd $0x4e,%%xmm0,%%xmm0 \n"
|
||||
"sub $0x10,%2 \n"
|
||||
"movdqa %%xmm0,(%1) \n"
|
||||
"lea 0x10(%1),%1 \n"
|
||||
"sub $0x10,%2 \n"
|
||||
"ja 1b \n"
|
||||
: "+r"(src), // %0
|
||||
"+r"(dst), // %1
|
||||
|
||||
@ -238,120 +238,6 @@ __asm {
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef SHIFT565
|
||||
// Below shift/mask code is efficient and works, but more instructions than
|
||||
// pmul method
|
||||
// TODO(fbarchard): Port RGB565ToARGBRow_SSE2 to gcc
|
||||
// 29 instructions
|
||||
__declspec(naked)
|
||||
void OldRGB565ToARGBRow_SSE2(const uint8* src_rgb565, uint8* dst_argb,
|
||||
int pix) {
|
||||
__asm {
|
||||
mov eax, [esp + 4] // src_rgb565
|
||||
mov edx, [esp + 8] // dst_argb
|
||||
mov ecx, [esp + 12] // pix
|
||||
pcmpeqb xmm5, xmm5 // generate mask 0xff000000 for Alpha
|
||||
pslld xmm5, 24
|
||||
pcmpeqb xmm4, xmm4 // generate mask 0xf800f800 for Red
|
||||
psllw xmm4, 11
|
||||
pcmpeqb xmm6, xmm6 // generate mask 0x001f001f for Blue
|
||||
psrlw xmm6, 11
|
||||
pcmpeqb xmm7, xmm7 // generate mask 0x00fc00fc for Green
|
||||
psrlw xmm7, 10
|
||||
psllw xmm7, 2
|
||||
|
||||
convertloop:
|
||||
movdqa xmm0, [eax] // fetch 8 pixels of bgr565
|
||||
lea eax, [eax + 16]
|
||||
movdqa xmm1, xmm0
|
||||
movdqa xmm2, xmm0
|
||||
pand xmm1, xmm4 // R in upper 5 bits
|
||||
psrlw xmm2, 13 // R 3 bits
|
||||
psllw xmm2, 8
|
||||
por xmm1, xmm2
|
||||
movdqa xmm2, xmm0
|
||||
pand xmm2, xmm6 // mask B 5 bits
|
||||
movdqa xmm3, xmm2
|
||||
psllw xmm2, 3
|
||||
psrlw xmm3, 2
|
||||
por xmm2, xmm3
|
||||
por xmm1, xmm2 // RB
|
||||
psrlw xmm0, 3 // G in top 6 bits of lower byte
|
||||
pand xmm0, xmm7 // mask G 6 bits
|
||||
movdqa xmm2, xmm0
|
||||
psrlw xmm2, 6
|
||||
por xmm0, xmm2
|
||||
por xmm0, xmm5 // AG
|
||||
movdqa xmm2, xmm1
|
||||
punpcklbw xmm1, xmm0
|
||||
punpckhbw xmm2, xmm0
|
||||
movdqa [edx], xmm1 // store 4 pixels of ARGB
|
||||
movdqa [edx + 16], xmm2 // store next 4 pixels of ARGB
|
||||
lea edx, [edx + 32]
|
||||
sub ecx, 8
|
||||
ja convertloop
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(fbarchard): Port ARGB1555ToARGBRow_SSE2 to gcc
|
||||
// 33 instructions
|
||||
__declspec(naked)
|
||||
void OldARGB1555ToARGBRow_SSE2(const uint8* src_argb1555, uint8* dst_argb,
|
||||
int pix) {
|
||||
__asm {
|
||||
mov eax, [esp + 4] // src_argb1555
|
||||
mov edx, [esp + 8] // dst_argb
|
||||
mov ecx, [esp + 12] // pix
|
||||
pcmpeqb xmm5, xmm5 // generate mask 0xff00ff00 for Alpha
|
||||
psllw xmm5, 8
|
||||
pcmpeqb xmm4, xmm4 // generate mask 0xf800f800 for Red
|
||||
psllw xmm4, 11
|
||||
pcmpeqb xmm6, xmm6 // generate mask 0x001f001f for Blue
|
||||
psrlw xmm6, 11
|
||||
pcmpeqb xmm7, xmm7 // generate mask 0x00f800f8 for Green
|
||||
psrlw xmm7, 11
|
||||
psllw xmm7, 3
|
||||
|
||||
convertloop:
|
||||
movdqa xmm0, [eax] // fetch 8 pixels of bgr565
|
||||
lea eax, [eax + 16]
|
||||
movdqa xmm1, xmm0
|
||||
psllw xmm1, 1
|
||||
movdqa xmm2, xmm0
|
||||
pand xmm1, xmm4 // R in upper 5 bits
|
||||
psrlw xmm2, 13 // R 3 bits
|
||||
psllw xmm2, 8
|
||||
por xmm1, xmm2
|
||||
movdqa xmm2, xmm0
|
||||
pand xmm2, xmm6 // mask B 5 bits
|
||||
movdqa xmm3, xmm2
|
||||
psllw xmm2, 3
|
||||
psrlw xmm3, 2
|
||||
por xmm2, xmm3
|
||||
por xmm1, xmm2 // RB
|
||||
movdqa xmm2, xmm0
|
||||
psrlw xmm2, 2 // G in top 5 bits of lower byte
|
||||
pand xmm2, xmm7 // mask G 5 bits
|
||||
movdqa xmm3, xmm2
|
||||
psrlw xmm3, 5
|
||||
por xmm2, xmm3
|
||||
psraw xmm0, 8 // A
|
||||
pand xmm0, xmm5
|
||||
por xmm0, xmm2 // AG
|
||||
movdqa xmm2, xmm1
|
||||
punpcklbw xmm1, xmm0
|
||||
punpckhbw xmm2, xmm0
|
||||
movdqa [edx], xmm1 // store 4 pixels of ARGB
|
||||
movdqa [edx + 16], xmm2 // store next 4 pixels of ARGB
|
||||
lea edx, [edx + 32]
|
||||
sub ecx, 8
|
||||
ja convertloop
|
||||
ret
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// pmul method to replicate bits
|
||||
// Math to replicate bits
|
||||
// (v << 8) | (v << 3)
|
||||
@ -422,8 +308,7 @@ __asm {
|
||||
pshufd xmm6, xmm6, 0
|
||||
pcmpeqb xmm3, xmm3 // generate mask 0xf800f800 for Red
|
||||
psllw xmm3, 11
|
||||
pcmpeqb xmm4, xmm4 // generate mask 0x03e003e0 for Green
|
||||
psllw xmm4, 11
|
||||
movdqa xmm4, xmm3 // generate mask 0x03e003e0 for Green
|
||||
psrlw xmm4, 6
|
||||
pcmpeqb xmm7, xmm7 // generate mask 0xff00ff00 for Alpha
|
||||
psllw xmm7, 8
|
||||
@ -1305,14 +1190,13 @@ __asm {
|
||||
mov edx, [esp + 8] // dst
|
||||
mov ecx, [esp + 12] // width
|
||||
movdqa xmm5, kShuffleReverse
|
||||
lea eax, [eax + ecx - 16]
|
||||
convertloop:
|
||||
movdqa xmm0, [eax]
|
||||
lea eax, [eax - 16]
|
||||
convertloop:
|
||||
movdqa xmm0, [eax + ecx]
|
||||
pshufb xmm0, xmm5
|
||||
sub ecx, 16
|
||||
movdqa [edx], xmm0
|
||||
lea edx, [edx + 16]
|
||||
sub ecx, 16
|
||||
ja convertloop
|
||||
ret
|
||||
}
|
||||
@ -1327,10 +1211,9 @@ __asm {
|
||||
mov eax, [esp + 4] // src
|
||||
mov edx, [esp + 8] // dst
|
||||
mov ecx, [esp + 12] // width
|
||||
lea eax, [eax + ecx - 16]
|
||||
convertloop:
|
||||
movdqa xmm0, [eax]
|
||||
lea eax, [eax - 16]
|
||||
convertloop:
|
||||
movdqa xmm0, [eax + ecx]
|
||||
movdqa xmm1, xmm0 // swap bytes
|
||||
psllw xmm0, 8
|
||||
psrlw xmm1, 8
|
||||
@ -1338,9 +1221,9 @@ __asm {
|
||||
pshuflw xmm0, xmm0, 0x1b // swap words
|
||||
pshufhw xmm0, xmm0, 0x1b
|
||||
pshufd xmm0, xmm0, 0x4e // swap qwords
|
||||
sub ecx, 16
|
||||
movdqa [edx], xmm0
|
||||
lea edx, [edx + 16]
|
||||
sub ecx, 16
|
||||
ja convertloop
|
||||
ret
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user