mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2026-02-16 23:29:52 +08:00
Macro to store ARGB value
BUG=396 TESTED=local windows build R=harryjin@google.com Review URL: https://webrtc-codereview.appspot.com/38109004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@1279 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
5ab38f9258
commit
2f56d2859f
@ -1526,6 +1526,17 @@ static YuvConstants SIMD_ALIGNED(kYvuConstants) = {
|
|||||||
"packuswb %%xmm1,%%xmm1 \n" \
|
"packuswb %%xmm1,%%xmm1 \n" \
|
||||||
"packuswb %%xmm2,%%xmm2 \n"
|
"packuswb %%xmm2,%%xmm2 \n"
|
||||||
|
|
||||||
|
// Store 8 ARGB values. Assumes XMM5 is zero.
|
||||||
|
#define STOREARGB \
|
||||||
|
"punpcklbw %%xmm1,%%xmm0 \n" \
|
||||||
|
"punpcklbw %%xmm5,%%xmm2 \n" \
|
||||||
|
"movdqa %%xmm0,%%xmm1 \n" \
|
||||||
|
"punpcklwd %%xmm2,%%xmm0 \n" \
|
||||||
|
"punpckhwd %%xmm2,%%xmm1 \n" \
|
||||||
|
"movdqu %%xmm0," MEMACCESS([dst_argb]) " \n" \
|
||||||
|
"movdqu %%xmm1," MEMACCESS2(0x10,[dst_argb]) " \n" \
|
||||||
|
"lea " MEMLEA(0x20,[dst_argb]) ",%[dst_argb] \n"
|
||||||
|
|
||||||
void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf,
|
void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf,
|
||||||
const uint8* u_buf,
|
const uint8* u_buf,
|
||||||
const uint8* v_buf,
|
const uint8* v_buf,
|
||||||
@ -1538,14 +1549,7 @@ void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf,
|
|||||||
"1: \n"
|
"1: \n"
|
||||||
READYUV444
|
READYUV444
|
||||||
YUVTORGB(kYuvConstants)
|
YUVTORGB(kYuvConstants)
|
||||||
"punpcklbw %%xmm1,%%xmm0 \n"
|
STOREARGB
|
||||||
"punpcklbw %%xmm5,%%xmm2 \n"
|
|
||||||
"movdqa %%xmm0,%%xmm1 \n"
|
|
||||||
"punpcklwd %%xmm2,%%xmm0 \n"
|
|
||||||
"punpckhwd %%xmm2,%%xmm1 \n"
|
|
||||||
"movdqu %%xmm0," MEMACCESS([dst_argb]) " \n"
|
|
||||||
"movdqu %%xmm1," MEMACCESS2(0x10,[dst_argb]) " \n"
|
|
||||||
"lea " MEMLEA(0x20,[dst_argb]) ",%[dst_argb] \n"
|
|
||||||
"sub $0x8,%[width] \n"
|
"sub $0x8,%[width] \n"
|
||||||
"jg 1b \n"
|
"jg 1b \n"
|
||||||
: [y_buf]"+r"(y_buf), // %[y_buf]
|
: [y_buf]"+r"(y_buf), // %[y_buf]
|
||||||
@ -1660,14 +1664,7 @@ void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf,
|
|||||||
"1: \n"
|
"1: \n"
|
||||||
READYUV422
|
READYUV422
|
||||||
YUVTORGB(kYuvConstants)
|
YUVTORGB(kYuvConstants)
|
||||||
"punpcklbw %%xmm1,%%xmm0 \n"
|
STOREARGB
|
||||||
"punpcklbw %%xmm5,%%xmm2 \n"
|
|
||||||
"movdqa %%xmm0,%%xmm1 \n"
|
|
||||||
"punpcklwd %%xmm2,%%xmm0 \n"
|
|
||||||
"punpckhwd %%xmm2,%%xmm1 \n"
|
|
||||||
"movdqu %%xmm0," MEMACCESS([dst_argb]) "\n"
|
|
||||||
"movdqu %%xmm1," MEMACCESS2(0x10,[dst_argb]) "\n"
|
|
||||||
"lea " MEMLEA(0x20,[dst_argb]) ",%[dst_argb] \n"
|
|
||||||
"sub $0x8,%[width] \n"
|
"sub $0x8,%[width] \n"
|
||||||
"jg 1b \n"
|
"jg 1b \n"
|
||||||
: [y_buf]"+r"(y_buf), // %[y_buf]
|
: [y_buf]"+r"(y_buf), // %[y_buf]
|
||||||
@ -1693,14 +1690,7 @@ void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf,
|
|||||||
"1: \n"
|
"1: \n"
|
||||||
READYUV411
|
READYUV411
|
||||||
YUVTORGB(kYuvConstants)
|
YUVTORGB(kYuvConstants)
|
||||||
"punpcklbw %%xmm1,%%xmm0 \n"
|
STOREARGB
|
||||||
"punpcklbw %%xmm5,%%xmm2 \n"
|
|
||||||
"movdqa %%xmm0,%%xmm1 \n"
|
|
||||||
"punpcklwd %%xmm2,%%xmm0 \n"
|
|
||||||
"punpckhwd %%xmm2,%%xmm1 \n"
|
|
||||||
"movdqu %%xmm0," MEMACCESS([dst_argb]) "\n"
|
|
||||||
"movdqu %%xmm1," MEMACCESS2(0x10,[dst_argb]) "\n"
|
|
||||||
"lea " MEMLEA(0x20,[dst_argb]) ",%[dst_argb] \n"
|
|
||||||
"sub $0x8,%[width] \n"
|
"sub $0x8,%[width] \n"
|
||||||
"jg 1b \n"
|
"jg 1b \n"
|
||||||
: [y_buf]"+r"(y_buf), // %[y_buf]
|
: [y_buf]"+r"(y_buf), // %[y_buf]
|
||||||
@ -1724,14 +1714,7 @@ void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf,
|
|||||||
"1: \n"
|
"1: \n"
|
||||||
READNV12
|
READNV12
|
||||||
YUVTORGB(kYuvConstants)
|
YUVTORGB(kYuvConstants)
|
||||||
"punpcklbw %%xmm1,%%xmm0 \n"
|
STOREARGB
|
||||||
"punpcklbw %%xmm5,%%xmm2 \n"
|
|
||||||
"movdqa %%xmm0,%%xmm1 \n"
|
|
||||||
"punpcklwd %%xmm2,%%xmm0 \n"
|
|
||||||
"punpckhwd %%xmm2,%%xmm1 \n"
|
|
||||||
"movdqu %%xmm0," MEMACCESS([dst_argb]) "\n"
|
|
||||||
"movdqu %%xmm1," MEMACCESS2(0x10,[dst_argb]) "\n"
|
|
||||||
"lea " MEMLEA(0x20,[dst_argb]) ",%[dst_argb] \n"
|
|
||||||
"sub $0x8,%[width] \n"
|
"sub $0x8,%[width] \n"
|
||||||
"jg 1b \n"
|
"jg 1b \n"
|
||||||
: [y_buf]"+r"(y_buf), // %[y_buf]
|
: [y_buf]"+r"(y_buf), // %[y_buf]
|
||||||
@ -1754,14 +1737,7 @@ void OMITFP NV21ToARGBRow_SSSE3(const uint8* y_buf,
|
|||||||
"1: \n"
|
"1: \n"
|
||||||
READNV12
|
READNV12
|
||||||
YUVTORGB(kYuvConstants)
|
YUVTORGB(kYuvConstants)
|
||||||
"punpcklbw %%xmm1,%%xmm0 \n"
|
STOREARGB
|
||||||
"punpcklbw %%xmm5,%%xmm2 \n"
|
|
||||||
"movdqa %%xmm0,%%xmm1 \n"
|
|
||||||
"punpcklwd %%xmm2,%%xmm0 \n"
|
|
||||||
"punpckhwd %%xmm2,%%xmm1 \n"
|
|
||||||
"movdqu %%xmm0," MEMACCESS([dst_argb]) "\n"
|
|
||||||
"movdqu %%xmm1," MEMACCESS2(0x10,[dst_argb]) "\n"
|
|
||||||
"lea " MEMLEA(0x20,[dst_argb]) ",%[dst_argb] \n"
|
|
||||||
"sub $0x8,%[width] \n"
|
"sub $0x8,%[width] \n"
|
||||||
"jg 1b \n"
|
"jg 1b \n"
|
||||||
: [y_buf]"+r"(y_buf), // %[y_buf]
|
: [y_buf]"+r"(y_buf), // %[y_buf]
|
||||||
|
|||||||
@ -316,7 +316,7 @@ void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix) {
|
|||||||
por xmm3, xmm5
|
por xmm3, xmm5
|
||||||
movdqu [edx + 48], xmm3
|
movdqu [edx + 48], xmm3
|
||||||
lea edx, [edx + 64]
|
lea edx, [edx + 64]
|
||||||
sub ecx, 16
|
sub ecx, 16
|
||||||
jg convertloop
|
jg convertloop
|
||||||
ret
|
ret
|
||||||
}
|
}
|
||||||
@ -1772,6 +1772,19 @@ void I422ToABGRRow_AVX2(const uint8* y_buf,
|
|||||||
__asm packuswb xmm2, xmm2 /* R */ \
|
__asm packuswb xmm2, xmm2 /* R */ \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Store 8 ARGB values.
|
||||||
|
#define STOREARGB __asm { \
|
||||||
|
/* Step 3: Weave into ARGB */ \
|
||||||
|
__asm punpcklbw xmm0, xmm1 /* BG */ \
|
||||||
|
__asm punpcklbw xmm2, xmm5 /* RA */ \
|
||||||
|
__asm movdqa xmm1, xmm0 \
|
||||||
|
__asm punpcklwd xmm0, xmm2 /* BGRA first 4 pixels */ \
|
||||||
|
__asm punpckhwd xmm1, xmm2 /* BGRA next 4 pixels */ \
|
||||||
|
__asm movdqu [edx], xmm0 \
|
||||||
|
__asm movdqu [edx + 16], xmm1 \
|
||||||
|
__asm lea edx, [edx + 32] \
|
||||||
|
}
|
||||||
|
|
||||||
// 8 pixels.
|
// 8 pixels.
|
||||||
// 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes).
|
// 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes).
|
||||||
__declspec(naked) __declspec(align(16))
|
__declspec(naked) __declspec(align(16))
|
||||||
@ -1794,16 +1807,7 @@ void I444ToARGBRow_SSSE3(const uint8* y_buf,
|
|||||||
convertloop:
|
convertloop:
|
||||||
READYUV444
|
READYUV444
|
||||||
YUVTORGB(kYuvConstants)
|
YUVTORGB(kYuvConstants)
|
||||||
|
STOREARGB
|
||||||
// Step 3: Weave into ARGB
|
|
||||||
punpcklbw xmm0, xmm1 // BG
|
|
||||||
punpcklbw xmm2, xmm5 // RA
|
|
||||||
movdqa xmm1, xmm0
|
|
||||||
punpcklwd xmm0, xmm2 // ABGR first 4 pixels
|
|
||||||
punpckhwd xmm1, xmm2 // ABGR next 4 pixels
|
|
||||||
movdqu [edx], xmm0
|
|
||||||
movdqu [edx + 16], xmm1
|
|
||||||
lea edx, [edx + 32]
|
|
||||||
sub ecx, 8
|
sub ecx, 8
|
||||||
jg convertloop
|
jg convertloop
|
||||||
|
|
||||||
@ -1996,16 +2000,7 @@ void I422ToARGBRow_SSSE3(const uint8* y_buf,
|
|||||||
convertloop:
|
convertloop:
|
||||||
READYUV422
|
READYUV422
|
||||||
YUVTORGB(kYuvConstants)
|
YUVTORGB(kYuvConstants)
|
||||||
|
STOREARGB
|
||||||
// Step 3: Weave into ARGB
|
|
||||||
punpcklbw xmm0, xmm1 // BG
|
|
||||||
punpcklbw xmm2, xmm5 // RA
|
|
||||||
movdqa xmm1, xmm0
|
|
||||||
punpcklwd xmm0, xmm2 // BGRA first 4 pixels
|
|
||||||
punpckhwd xmm1, xmm2 // BGRA next 4 pixels
|
|
||||||
movdqu [edx], xmm0
|
|
||||||
movdqu [edx + 16], xmm1
|
|
||||||
lea edx, [edx + 32]
|
|
||||||
sub ecx, 8
|
sub ecx, 8
|
||||||
jg convertloop
|
jg convertloop
|
||||||
|
|
||||||
@ -2039,16 +2034,7 @@ void I411ToARGBRow_SSSE3(const uint8* y_buf,
|
|||||||
convertloop:
|
convertloop:
|
||||||
READYUV411 // modifies EBX
|
READYUV411 // modifies EBX
|
||||||
YUVTORGB(kYuvConstants)
|
YUVTORGB(kYuvConstants)
|
||||||
|
STOREARGB
|
||||||
// Step 3: Weave into ARGB
|
|
||||||
punpcklbw xmm0, xmm1 // BG
|
|
||||||
punpcklbw xmm2, xmm5 // RA
|
|
||||||
movdqa xmm1, xmm0
|
|
||||||
punpcklwd xmm0, xmm2 // BGRA first 4 pixels
|
|
||||||
punpckhwd xmm1, xmm2 // BGRA next 4 pixels
|
|
||||||
movdqu [edx], xmm0
|
|
||||||
movdqu [edx + 16], xmm1
|
|
||||||
lea edx, [edx + 32]
|
|
||||||
sub ecx, 8
|
sub ecx, 8
|
||||||
jg convertloop
|
jg convertloop
|
||||||
|
|
||||||
@ -2077,16 +2063,7 @@ void NV12ToARGBRow_SSSE3(const uint8* y_buf,
|
|||||||
convertloop:
|
convertloop:
|
||||||
READNV12
|
READNV12
|
||||||
YUVTORGB(kYuvConstants)
|
YUVTORGB(kYuvConstants)
|
||||||
|
STOREARGB
|
||||||
// Step 3: Weave into ARGB
|
|
||||||
punpcklbw xmm0, xmm1 // BG
|
|
||||||
punpcklbw xmm2, xmm5 // RA
|
|
||||||
movdqa xmm1, xmm0
|
|
||||||
punpcklwd xmm0, xmm2 // BGRA first 4 pixels
|
|
||||||
punpckhwd xmm1, xmm2 // BGRA next 4 pixels
|
|
||||||
movdqu [edx], xmm0
|
|
||||||
movdqu [edx + 16], xmm1
|
|
||||||
lea edx, [edx + 32]
|
|
||||||
sub ecx, 8
|
sub ecx, 8
|
||||||
jg convertloop
|
jg convertloop
|
||||||
|
|
||||||
@ -2113,16 +2090,7 @@ void NV21ToARGBRow_SSSE3(const uint8* y_buf,
|
|||||||
convertloop:
|
convertloop:
|
||||||
READNV12
|
READNV12
|
||||||
YUVTORGB(kYvuConstants)
|
YUVTORGB(kYvuConstants)
|
||||||
|
STOREARGB
|
||||||
// Step 3: Weave into ARGB
|
|
||||||
punpcklbw xmm0, xmm1 // BG
|
|
||||||
punpcklbw xmm2, xmm5 // RA
|
|
||||||
movdqa xmm1, xmm0
|
|
||||||
punpcklwd xmm0, xmm2 // BGRA first 4 pixels
|
|
||||||
punpckhwd xmm1, xmm2 // BGRA next 4 pixels
|
|
||||||
movdqu [edx], xmm0
|
|
||||||
movdqu [edx + 16], xmm1
|
|
||||||
lea edx, [edx + 32]
|
|
||||||
sub ecx, 8
|
sub ecx, 8
|
||||||
jg convertloop
|
jg convertloop
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user