mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-08 01:36:47 +08:00
ARGBToRGB24 and ARGBToRAW optimized
BUG=none TEST=media_unittest Review URL: https://webrtc-codereview.appspot.com/348013 git-svn-id: http://libyuv.googlecode.com/svn/trunk@140 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
8af21a57f5
commit
24d2656b65
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 137
|
||||
Version: 140
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -1653,7 +1653,7 @@ int I420ToRGB24(const uint8* src_y, int src_stride_y,
|
||||
|
||||
SIMD_ALIGNED(uint8 row[kMaxStride]);
|
||||
void (*ARGBToRGB24Row)(const uint8* src_argb, uint8* dst_rgb, int pix);
|
||||
#if defined(HAS_ARGBTORGB24ROW_SSSE3_DISABLED)
|
||||
#if defined(HAS_ARGBTORGB24ROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) &&
|
||||
IS_ALIGNED(width, 16) &&
|
||||
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
|
||||
@ -1709,7 +1709,7 @@ int I420ToRAW(const uint8* src_y, int src_stride_y,
|
||||
|
||||
SIMD_ALIGNED(uint8 row[kMaxStride]);
|
||||
void (*ARGBToRAWRow)(const uint8* src_argb, uint8* dst_rgb, int pix);
|
||||
#if defined(HAS_ARGBTORAWROW_SSSE3_DISABLED)
|
||||
#if defined(HAS_ARGBTORAWROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) &&
|
||||
IS_ALIGNED(width, 16) &&
|
||||
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
|
||||
@ -1765,7 +1765,7 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y,
|
||||
|
||||
SIMD_ALIGNED(uint8 row[kMaxStride]);
|
||||
void (*ARGBToRGB565Row)(const uint8* src_argb, uint8* dst_rgb, int pix);
|
||||
#if defined(HAS_ARGBTORGB565ROW_SSE2_DISABLED)
|
||||
#if defined(HAS_ARGBTORGB565ROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2) &&
|
||||
IS_ALIGNED(width, 16) &&
|
||||
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
|
||||
@ -1821,7 +1821,7 @@ int I420ToARGB1555(const uint8* src_y, int src_stride_y,
|
||||
|
||||
SIMD_ALIGNED(uint8 row[kMaxStride]);
|
||||
void (*ARGBToARGB1555Row)(const uint8* src_argb, uint8* dst_rgb, int pix);
|
||||
#if defined(HAS_ARGBTOARGB1555ROW_SSE2_DISABLED)
|
||||
#if defined(HAS_ARGBTOARGB1555ROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2) &&
|
||||
IS_ALIGNED(width, 16) &&
|
||||
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
|
||||
@ -2195,7 +2195,7 @@ int ARGBToRGB24(const uint8* src_argb, int src_stride_argb,
|
||||
src_stride_argb = -src_stride_argb;
|
||||
}
|
||||
void (*ARGBToRGB24Row)(const uint8* src_argb, uint8* dst_rgb, int pix);
|
||||
#if defined(HAS_ARGBTORGB24ROW_SSSE3_DISABLED)
|
||||
#if defined(HAS_ARGBTORGB24ROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) &&
|
||||
IS_ALIGNED(width, 16) &&
|
||||
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
|
||||
@ -2225,7 +2225,7 @@ int ARGBToRAW(const uint8* src_argb, int src_stride_argb,
|
||||
src_stride_argb = -src_stride_argb;
|
||||
}
|
||||
void (*ARGBToRAWRow)(const uint8* src_argb, uint8* dst_rgb, int pix);
|
||||
#if defined(HAS_ARGBTORAWROW_SSSE3_DISABLED)
|
||||
#if defined(HAS_ARGBTORAWROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) &&
|
||||
IS_ALIGNED(width, 16) &&
|
||||
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
|
||||
@ -2276,10 +2276,10 @@ int NV12ToRGB565(const uint8* src_y, int src_stride_y,
|
||||
|
||||
SIMD_ALIGNED(uint8 row[kMaxStride]);
|
||||
void (*ARGBToRGB565Row)(const uint8* src_argb, uint8* dst_rgb, int pix);
|
||||
#if defined(HAS_ARGBTORGB565ROW_SSE2_DISABLED)
|
||||
#if defined(HAS_ARGBTORGB565ROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2) &&
|
||||
IS_ALIGNED(width, 16) &&
|
||||
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
|
||||
IS_ALIGNED(dst_rgb, 16) && IS_ALIGNED(dst_stride_rgb, 16)) {
|
||||
ARGBToRGB565Row = ARGBToRGB565Row_SSE2;
|
||||
} else
|
||||
#endif
|
||||
|
||||
@ -523,16 +523,16 @@ __asm {
|
||||
pshufb xmm2, xmm5
|
||||
pshufb xmm3, xmm5
|
||||
movdqa xmm4, xmm1
|
||||
psllq xmm4, 12
|
||||
pslldq xmm4, 12
|
||||
por xmm4, xmm0
|
||||
movdqa [edx], xmm4 // first 16 bytes
|
||||
movdqa xmm4, xmm2
|
||||
psrlq xmm1, 4
|
||||
psllq xmm4, 8
|
||||
psrldq xmm1, 4
|
||||
pslldq xmm4, 8
|
||||
por xmm1, xmm4
|
||||
movdqa [edx + 16], xmm1 // middle 16 bytes
|
||||
psrlq xmm2, 8
|
||||
psllq xmm3, 4
|
||||
psrldq xmm2, 8
|
||||
pslldq xmm3, 4
|
||||
por xmm2, xmm3
|
||||
movdqa [edx + 32], xmm2 // last 16 bytes
|
||||
lea edx, [edx + 48]
|
||||
@ -562,16 +562,16 @@ __asm {
|
||||
pshufb xmm2, xmm5
|
||||
pshufb xmm3, xmm5
|
||||
movdqa xmm4, xmm1
|
||||
psllq xmm4, 12
|
||||
pslldq xmm4, 12
|
||||
por xmm4, xmm0
|
||||
movdqa [edx], xmm4 // first 16 bytes
|
||||
movdqa xmm4, xmm2
|
||||
psrlq xmm1, 4
|
||||
psllq xmm4, 8
|
||||
psrldq xmm1, 4
|
||||
pslldq xmm4, 8
|
||||
por xmm1, xmm4
|
||||
movdqa [edx + 16], xmm1 // middle 16 bytes
|
||||
psrlq xmm2, 8
|
||||
psllq xmm3, 4
|
||||
psrldq xmm2, 8
|
||||
pslldq xmm3, 4
|
||||
por xmm2, xmm3
|
||||
movdqa [edx + 32], xmm2 // last 16 bytes
|
||||
lea edx, [edx + 48]
|
||||
@ -582,6 +582,7 @@ __asm {
|
||||
}
|
||||
|
||||
// TODO(fbarchard): Port to gcc
|
||||
// TODO(fbarchard): Improve sign extension/packing
|
||||
__declspec(naked)
|
||||
void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) {
|
||||
__asm {
|
||||
@ -591,7 +592,7 @@ __asm {
|
||||
psrlw xmm4, 10
|
||||
psllw xmm4, 5
|
||||
pcmpeqb xmm5, xmm5 // generate mask 0xf800f800
|
||||
psrlw xmm5, 11
|
||||
psllw xmm5, 11
|
||||
|
||||
mov eax, [esp + 4] // src_argb
|
||||
mov edx, [esp + 8] // dst_rgb
|
||||
@ -599,20 +600,20 @@ __asm {
|
||||
|
||||
convertloop:
|
||||
movdqa xmm0, [eax] // fetch 4 pixels of argb
|
||||
lea eax, [eax + 16]
|
||||
movdqa xmm1, xmm0 // B
|
||||
psrlw xmm1, 3
|
||||
psrld xmm1, 3
|
||||
pand xmm1, xmm3
|
||||
movdqa xmm2, xmm0 // G
|
||||
psrlw xmm2, 5
|
||||
psrld xmm2, 5
|
||||
pand xmm2, xmm4
|
||||
por xmm1, xmm2
|
||||
psrlw xmm0, 8 // R
|
||||
psrld xmm0, 8 // R
|
||||
pand xmm0, xmm5
|
||||
por xmm0, xmm1
|
||||
pslld xmm0, 16
|
||||
psrad xmm0, 16
|
||||
packssdw xmm0, xmm0
|
||||
lea eax, [eax + 16]
|
||||
movq qword ptr [edx], xmm0 // store 4 pixels of ARGB1555
|
||||
lea edx, [edx + 8]
|
||||
sub ecx, 4
|
||||
@ -622,6 +623,7 @@ __asm {
|
||||
}
|
||||
|
||||
// TODO(fbarchard): Port to gcc
|
||||
// TODO(fbarchard): Improve sign extension/packing
|
||||
__declspec(naked)
|
||||
void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) {
|
||||
__asm {
|
||||
@ -629,10 +631,10 @@ __asm {
|
||||
psrlw xmm3, 11
|
||||
movdqa xmm4, xmm3 // generate mask 0x03e003e0
|
||||
psllw xmm4, 5
|
||||
movdqa xmm5, xmm3 // generate mask 0x7c007c00
|
||||
psllw xmm5, 10
|
||||
movdqa xmm5, xmm4 // generate mask 0x7c007c00
|
||||
psllw xmm5, 5
|
||||
pcmpeqb xmm6, xmm6 // generate mask 0x80008000
|
||||
psrlw xmm6, 15
|
||||
psllw xmm6, 15
|
||||
|
||||
mov eax, [esp + 4] // src_argb
|
||||
mov edx, [esp + 8] // dst_rgb
|
||||
@ -640,26 +642,25 @@ __asm {
|
||||
|
||||
convertloop:
|
||||
movdqa xmm0, [eax] // fetch 4 pixels of argb
|
||||
lea eax, [eax + 16]
|
||||
movdqa xmm1, xmm0 // B
|
||||
psrlw xmm1, 3
|
||||
psrld xmm1, 3
|
||||
pand xmm1, xmm3
|
||||
movdqa xmm2, xmm0 // G
|
||||
psrlw xmm2, 6
|
||||
psrld xmm2, 6
|
||||
pand xmm2, xmm4
|
||||
por xmm1, xmm2
|
||||
movdqa xmm2, xmm0 // R
|
||||
psrlw xmm2, 9
|
||||
psrld xmm2, 9
|
||||
pand xmm2, xmm5
|
||||
por xmm1, xmm2
|
||||
movdqa xmm2, xmm0 // A
|
||||
psrlw xmm2, 16
|
||||
pand xmm2, xmm6
|
||||
por xmm1, xmm2
|
||||
psrld xmm0, 16 // A
|
||||
pand xmm0, xmm6
|
||||
por xmm0, xmm1
|
||||
pslld xmm0, 16
|
||||
psrad xmm0, 16
|
||||
packssdw xmm1, xmm1
|
||||
movq qword ptr [edx], xmm1 // store 4 pixels of ARGB1555
|
||||
packssdw xmm0, xmm0
|
||||
lea eax, [eax + 16]
|
||||
movq qword ptr [edx], xmm0 // store 4 pixels of ARGB1555
|
||||
lea edx, [edx + 8]
|
||||
sub ecx, 4
|
||||
ja convertloop
|
||||
@ -682,7 +683,6 @@ __asm {
|
||||
|
||||
convertloop:
|
||||
movdqa xmm0, [eax] // fetch 4 pixels of argb
|
||||
lea eax, [eax + 16]
|
||||
movdqa xmm1, xmm0
|
||||
pand xmm0, xmm3 // low nibble
|
||||
pand xmm1, xmm4 // high nibble
|
||||
@ -690,6 +690,7 @@ __asm {
|
||||
psrl xmm1, 8
|
||||
por xmm0, xmm1
|
||||
packuswb xmm0, xmm0
|
||||
lea eax, [eax + 16]
|
||||
movq qword ptr [edx], xmm0 // store 4 pixels of ARGB4444
|
||||
lea edx, [edx + 8]
|
||||
sub ecx, 4
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user