mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2026-02-16 23:29:52 +08:00
Add a macro for YUV to RGB on Windows. Allows multiple color matrix structures in the future.
BUG=393 TESTED=local build R=brucedawson@google.com, harryjin@google.com Review URL: https://webrtc-codereview.appspot.com/38079004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@1275 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
97a3850ea4
commit
3bb829a44f
@ -1499,23 +1499,23 @@ static YuvConstants SIMD_ALIGNED(kYvuConstants) = {
|
|||||||
"punpcklwd %%xmm0,%%xmm0 \n"
|
"punpcklwd %%xmm0,%%xmm0 \n"
|
||||||
|
|
||||||
// Convert 8 pixels: 8 UV and 8 Y
|
// Convert 8 pixels: 8 UV and 8 Y
|
||||||
#define YUVTORGB(kYuvConstants) \
|
#define YUVTORGB(YuvConstants) \
|
||||||
"movdqa %%xmm0,%%xmm1 \n" \
|
"movdqa %%xmm0,%%xmm1 \n" \
|
||||||
"movdqa %%xmm0,%%xmm2 \n" \
|
"movdqa %%xmm0,%%xmm2 \n" \
|
||||||
"movdqa %%xmm0,%%xmm3 \n" \
|
"movdqa %%xmm0,%%xmm3 \n" \
|
||||||
"movdqa " MEMACCESS2(96, [kYuvConstants]) ",%%xmm0 \n" \
|
"movdqa " MEMACCESS2(96, [YuvConstants]) ",%%xmm0 \n" \
|
||||||
"pmaddubsw " MEMACCESS([kYuvConstants]) ",%%xmm1 \n" \
|
"pmaddubsw " MEMACCESS([YuvConstants]) ",%%xmm1 \n" \
|
||||||
"psubw %%xmm1,%%xmm0 \n" \
|
"psubw %%xmm1,%%xmm0 \n" \
|
||||||
"movdqa " MEMACCESS2(128, [kYuvConstants]) ",%%xmm1 \n" \
|
"movdqa " MEMACCESS2(128, [YuvConstants]) ",%%xmm1 \n" \
|
||||||
"pmaddubsw " MEMACCESS2(32, [kYuvConstants]) ",%%xmm2 \n" \
|
"pmaddubsw " MEMACCESS2(32, [YuvConstants]) ",%%xmm2 \n" \
|
||||||
"psubw %%xmm2,%%xmm1 \n" \
|
"psubw %%xmm2,%%xmm1 \n" \
|
||||||
"movdqa " MEMACCESS2(160, [kYuvConstants]) ",%%xmm2 \n" \
|
"movdqa " MEMACCESS2(160, [YuvConstants]) ",%%xmm2 \n" \
|
||||||
"pmaddubsw " MEMACCESS2(64, [kYuvConstants]) ",%%xmm3 \n" \
|
"pmaddubsw " MEMACCESS2(64, [YuvConstants]) ",%%xmm3 \n" \
|
||||||
"psubw %%xmm3,%%xmm2 \n" \
|
"psubw %%xmm3,%%xmm2 \n" \
|
||||||
"movq " MEMACCESS([y_buf]) ",%%xmm3 \n" \
|
"movq " MEMACCESS([y_buf]) ",%%xmm3 \n" \
|
||||||
"lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" \
|
"lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" \
|
||||||
"punpcklbw %%xmm3,%%xmm3 \n" \
|
"punpcklbw %%xmm3,%%xmm3 \n" \
|
||||||
"pmulhuw " MEMACCESS2(192, [kYuvConstants]) ",%%xmm3 \n" \
|
"pmulhuw " MEMACCESS2(192, [YuvConstants]) ",%%xmm3 \n" \
|
||||||
"paddsw %%xmm3,%%xmm0 \n" \
|
"paddsw %%xmm3,%%xmm0 \n" \
|
||||||
"paddsw %%xmm3,%%xmm1 \n" \
|
"paddsw %%xmm3,%%xmm1 \n" \
|
||||||
"paddsw %%xmm3,%%xmm2 \n" \
|
"paddsw %%xmm3,%%xmm2 \n" \
|
||||||
@ -1887,21 +1887,21 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf,
|
|||||||
"vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n"
|
"vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n"
|
||||||
|
|
||||||
// Convert 16 pixels: 16 UV and 16 Y.
|
// Convert 16 pixels: 16 UV and 16 Y.
|
||||||
#define YUVTORGB_AVX2(kYuvConstants) \
|
#define YUVTORGB_AVX2(YuvConstants) \
|
||||||
"vpmaddubsw " MEMACCESS2(64, [kYuvConstants]) ",%%ymm0,%%ymm2 \n" \
|
"vpmaddubsw " MEMACCESS2(64, [YuvConstants]) ",%%ymm0,%%ymm2 \n" \
|
||||||
"vpmaddubsw " MEMACCESS2(32, [kYuvConstants]) ",%%ymm0,%%ymm1 \n" \
|
"vpmaddubsw " MEMACCESS2(32, [YuvConstants]) ",%%ymm0,%%ymm1 \n" \
|
||||||
"vpmaddubsw " MEMACCESS([kYuvConstants]) ",%%ymm0,%%ymm0 \n" \
|
"vpmaddubsw " MEMACCESS([YuvConstants]) ",%%ymm0,%%ymm0 \n" \
|
||||||
"vmovdqu " MEMACCESS2(160, [kYuvConstants]) ",%%ymm3 \n" \
|
"vmovdqu " MEMACCESS2(160, [YuvConstants]) ",%%ymm3 \n" \
|
||||||
"vpsubw %%ymm2,%%ymm3,%%ymm2 \n" \
|
"vpsubw %%ymm2,%%ymm3,%%ymm2 \n" \
|
||||||
"vmovdqu " MEMACCESS2(128, [kYuvConstants]) ",%%ymm2 \n" \
|
"vmovdqu " MEMACCESS2(128, [YuvConstants]) ",%%ymm2 \n" \
|
||||||
"vpsubw %%ymm1,%%ymm2,%%ymm1 \n" \
|
"vpsubw %%ymm1,%%ymm2,%%ymm1 \n" \
|
||||||
"vmovdqu " MEMACCESS2(96, [kYuvConstants]) ",%%ymm1 \n" \
|
"vmovdqu " MEMACCESS2(96, [YuvConstants]) ",%%ymm1 \n" \
|
||||||
"vpsubw %%ymm0,%%ymm1,%%ymm0 \n" \
|
"vpsubw %%ymm0,%%ymm1,%%ymm0 \n" \
|
||||||
"vmovdqu " MEMACCESS([y_buf]) ",%%xmm3 \n" \
|
"vmovdqu " MEMACCESS([y_buf]) ",%%xmm3 \n" \
|
||||||
"lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n" \
|
"lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n" \
|
||||||
"vpermq $0xd8,%%ymm3,%%ymm3 \n" \
|
"vpermq $0xd8,%%ymm3,%%ymm3 \n" \
|
||||||
"vpunpcklbw %%ymm3,%%ymm3,%%ymm3 \n" \
|
"vpunpcklbw %%ymm3,%%ymm3,%%ymm3 \n" \
|
||||||
"vpmulhuw " MEMACCESS2(192, [kYuvConstants]) ",%%ymm3,%%ymm3 \n" \
|
"vpmulhuw " MEMACCESS2(192, [YuvConstants]) ",%%ymm3,%%ymm3 \n" \
|
||||||
"vpaddsw %%ymm3,%%ymm0,%%ymm0 \n" \
|
"vpaddsw %%ymm3,%%ymm0,%%ymm0 \n" \
|
||||||
"vpaddsw %%ymm3,%%ymm1,%%ymm1 \n" \
|
"vpaddsw %%ymm3,%%ymm1,%%ymm1 \n" \
|
||||||
"vpaddsw %%ymm3,%%ymm2,%%ymm2 \n" \
|
"vpaddsw %%ymm3,%%ymm2,%%ymm2 \n" \
|
||||||
|
|||||||
@ -1472,30 +1472,45 @@ void RGBAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
|||||||
#endif // HAS_ARGBTOYROW_SSSE3
|
#endif // HAS_ARGBTOYROW_SSSE3
|
||||||
|
|
||||||
#if defined(HAS_I422TOARGBROW_AVX2) || defined(HAS_I422TOBGRAROW_AVX2)
|
#if defined(HAS_I422TOARGBROW_AVX2) || defined(HAS_I422TOBGRAROW_AVX2)
|
||||||
static const lvec8 kUVToB_AVX = {
|
|
||||||
UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
|
struct YuvConstants {
|
||||||
UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0
|
lvec8 kUVToB; // 0
|
||||||
|
lvec8 kUVToG; // 32
|
||||||
|
lvec8 kUVToR; // 64
|
||||||
|
lvec16 kUVBiasB; // 96
|
||||||
|
lvec16 kUVBiasG; // 128
|
||||||
|
lvec16 kUVBiasR; // 160
|
||||||
|
lvec16 kYToRgb; // 192
|
||||||
};
|
};
|
||||||
static const lvec8 kUVToR_AVX = {
|
|
||||||
0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR,
|
// BT601 constants for YUV to RGB.
|
||||||
0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR
|
static YuvConstants SIMD_ALIGNED(kYuvConstants) = {
|
||||||
|
{ UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
|
||||||
|
UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0 },
|
||||||
|
{ UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,
|
||||||
|
UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG },
|
||||||
|
{ 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR,
|
||||||
|
0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR },
|
||||||
|
{ BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB },
|
||||||
|
{ BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG },
|
||||||
|
{ BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR },
|
||||||
|
{ YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG }
|
||||||
};
|
};
|
||||||
static const lvec8 kUVToG_AVX = {
|
|
||||||
UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,
|
// BT601 constants for NV21 where chroma plane is VU instead of UV.
|
||||||
UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG
|
static YuvConstants SIMD_ALIGNED(kYvuConstants) = {
|
||||||
};
|
{ 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB,
|
||||||
static const lvec16 kYToRgb_AVX = {
|
0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB },
|
||||||
YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG
|
{ VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
|
||||||
};
|
VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG },
|
||||||
static const lvec16 kUVBiasB_AVX = {
|
{ VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0,
|
||||||
BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB
|
VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0 },
|
||||||
};
|
{ BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB },
|
||||||
static const lvec16 kUVBiasG_AVX = {
|
{ BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG },
|
||||||
BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG
|
{ BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR },
|
||||||
};
|
{ YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG }
|
||||||
static const lvec16 kUVBiasR_AVX = {
|
|
||||||
BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // defined(HAS_I422TOARGBROW_AVX2) || defined(HAS_I422TOBGRAROW_AVX2)
|
#endif // defined(HAS_I422TOARGBROW_AVX2) || defined(HAS_I422TOBGRAROW_AVX2)
|
||||||
|
|
||||||
// Read 8 UV from 422, upsample to 16 UV.
|
// Read 8 UV from 422, upsample to 16 UV.
|
||||||
@ -1509,23 +1524,23 @@ static const lvec16 kUVBiasR_AVX = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Convert 16 pixels: 16 UV and 16 Y.
|
// Convert 16 pixels: 16 UV and 16 Y.
|
||||||
#define YUVTORGB_AVX2 __asm { \
|
#define YUVTORGB_AVX2(YuvConstants) __asm { \
|
||||||
/* Step 1: Find 8 UV contributions to 16 R,G,B values */ \
|
/* Step 1: Find 8 UV contributions to 16 R,G,B values */ \
|
||||||
__asm vpmaddubsw ymm2, ymm0, kUVToR_AVX /* scale R UV */ \
|
__asm vpmaddubsw ymm2, ymm0, YuvConstants.kUVToR /* scale R UV */ \
|
||||||
__asm vpmaddubsw ymm1, ymm0, kUVToG_AVX /* scale G UV */ \
|
__asm vpmaddubsw ymm1, ymm0, YuvConstants.kUVToG /* scale G UV */ \
|
||||||
__asm vpmaddubsw ymm0, ymm0, kUVToB_AVX /* scale B UV */ \
|
__asm vpmaddubsw ymm0, ymm0, YuvConstants.kUVToB /* scale B UV */ \
|
||||||
__asm vmovdqu ymm3, kUVBiasR_AVX \
|
__asm vmovdqu ymm3, YuvConstants.kUVBiasR \
|
||||||
__asm vpsubw ymm2, ymm3, ymm2 \
|
__asm vpsubw ymm2, ymm3, ymm2 \
|
||||||
__asm vmovdqu ymm3, kUVBiasG_AVX \
|
__asm vmovdqu ymm3, YuvConstants.kUVBiasG \
|
||||||
__asm vpsubw ymm1, ymm3, ymm1 \
|
__asm vpsubw ymm1, ymm3, ymm1 \
|
||||||
__asm vmovdqu ymm3, kUVBiasB_AVX \
|
__asm vmovdqu ymm3, YuvConstants.kUVBiasB \
|
||||||
__asm vpsubw ymm0, ymm3, ymm0 \
|
__asm vpsubw ymm0, ymm3, ymm0 \
|
||||||
/* Step 2: Find Y contribution to 16 R,G,B values */ \
|
/* Step 2: Find Y contribution to 16 R,G,B values */ \
|
||||||
__asm vmovdqu xmm3, [eax] /* NOLINT */ \
|
__asm vmovdqu xmm3, [eax] /* NOLINT */ \
|
||||||
__asm lea eax, [eax + 16] \
|
__asm lea eax, [eax + 16] \
|
||||||
__asm vpermq ymm3, ymm3, 0xd8 \
|
__asm vpermq ymm3, ymm3, 0xd8 \
|
||||||
__asm vpunpcklbw ymm3, ymm3, ymm3 \
|
__asm vpunpcklbw ymm3, ymm3, ymm3 \
|
||||||
__asm vpmulhuw ymm3, ymm3, kYToRgb_AVX \
|
__asm vpmulhuw ymm3, ymm3, YuvConstants.kYToRgb \
|
||||||
__asm vpaddsw ymm0, ymm0, ymm3 /* B += Y */ \
|
__asm vpaddsw ymm0, ymm0, ymm3 /* B += Y */ \
|
||||||
__asm vpaddsw ymm1, ymm1, ymm3 /* G += Y */ \
|
__asm vpaddsw ymm1, ymm1, ymm3 /* G += Y */ \
|
||||||
__asm vpaddsw ymm2, ymm2, ymm3 /* R += Y */ \
|
__asm vpaddsw ymm2, ymm2, ymm3 /* R += Y */ \
|
||||||
@ -1559,7 +1574,7 @@ void I422ToARGBRow_AVX2(const uint8* y_buf,
|
|||||||
|
|
||||||
convertloop:
|
convertloop:
|
||||||
READYUV422_AVX2
|
READYUV422_AVX2
|
||||||
YUVTORGB_AVX2
|
YUVTORGB_AVX2(kYuvConstants)
|
||||||
|
|
||||||
// Step 3: Weave into ARGB
|
// Step 3: Weave into ARGB
|
||||||
vpunpcklbw ymm0, ymm0, ymm1 // BG
|
vpunpcklbw ymm0, ymm0, ymm1 // BG
|
||||||
@ -1605,7 +1620,7 @@ void I422ToBGRARow_AVX2(const uint8* y_buf,
|
|||||||
|
|
||||||
convertloop:
|
convertloop:
|
||||||
READYUV422_AVX2
|
READYUV422_AVX2
|
||||||
YUVTORGB_AVX2
|
YUVTORGB_AVX2(kYuvConstants)
|
||||||
|
|
||||||
// Step 3: Weave into BGRA
|
// Step 3: Weave into BGRA
|
||||||
vpunpcklbw ymm1, ymm1, ymm0 // GB
|
vpunpcklbw ymm1, ymm1, ymm0 // GB
|
||||||
@ -1651,7 +1666,7 @@ void I422ToRGBARow_AVX2(const uint8* y_buf,
|
|||||||
|
|
||||||
convertloop:
|
convertloop:
|
||||||
READYUV422_AVX2
|
READYUV422_AVX2
|
||||||
YUVTORGB_AVX2
|
YUVTORGB_AVX2(kYuvConstants)
|
||||||
|
|
||||||
// Step 3: Weave into RGBA
|
// Step 3: Weave into RGBA
|
||||||
vpunpcklbw ymm1, ymm1, ymm2 // GR
|
vpunpcklbw ymm1, ymm1, ymm2 // GR
|
||||||
@ -1697,7 +1712,7 @@ void I422ToABGRRow_AVX2(const uint8* y_buf,
|
|||||||
|
|
||||||
convertloop:
|
convertloop:
|
||||||
READYUV422_AVX2
|
READYUV422_AVX2
|
||||||
YUVTORGB_AVX2
|
YUVTORGB_AVX2(kYuvConstants)
|
||||||
|
|
||||||
// Step 3: Weave into ABGR
|
// Step 3: Weave into ABGR
|
||||||
vpunpcklbw ymm1, ymm2, ymm1 // RG
|
vpunpcklbw ymm1, ymm2, ymm1 // RG
|
||||||
@ -1760,56 +1775,25 @@ void I422ToABGRRow_AVX2(const uint8* y_buf,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Convert 8 pixels: 8 UV and 8 Y.
|
// Convert 8 pixels: 8 UV and 8 Y.
|
||||||
#define YUVTORGB __asm { \
|
#define YUVTORGB(YuvConstants) __asm { \
|
||||||
/* Step 1: Find 4 UV contributions to 8 R,G,B values */ \
|
/* Step 1: Find 4 UV contributions to 8 R,G,B values */ \
|
||||||
__asm movdqa xmm1, xmm0 \
|
__asm movdqa xmm1, xmm0 \
|
||||||
__asm movdqa xmm2, xmm0 \
|
__asm movdqa xmm2, xmm0 \
|
||||||
__asm movdqa xmm3, xmm0 \
|
__asm movdqa xmm3, xmm0 \
|
||||||
__asm movdqa xmm0, kUVBiasB /* unbias back to signed */ \
|
__asm movdqa xmm0, YuvConstants.kUVBiasB /* unbias back to signed */ \
|
||||||
__asm pmaddubsw xmm1, kUVToB /* scale B UV */ \
|
__asm pmaddubsw xmm1, YuvConstants.kUVToB /* scale B UV */ \
|
||||||
__asm psubw xmm0, xmm1 \
|
__asm psubw xmm0, xmm1 \
|
||||||
__asm movdqa xmm1, kUVBiasG \
|
__asm movdqa xmm1, YuvConstants.kUVBiasG \
|
||||||
__asm pmaddubsw xmm2, kUVToG /* scale G UV */ \
|
__asm pmaddubsw xmm2, YuvConstants.kUVToG /* scale G UV */ \
|
||||||
__asm psubw xmm1, xmm2 \
|
__asm psubw xmm1, xmm2 \
|
||||||
__asm movdqa xmm2, kUVBiasR \
|
__asm movdqa xmm2, YuvConstants.kUVBiasR \
|
||||||
__asm pmaddubsw xmm3, kUVToR /* scale R UV */ \
|
__asm pmaddubsw xmm3, YuvConstants.kUVToR /* scale R UV */ \
|
||||||
__asm psubw xmm2, xmm3 \
|
__asm psubw xmm2, xmm3 \
|
||||||
/* Step 2: Find Y contribution to 8 R,G,B values */ \
|
/* Step 2: Find Y contribution to 8 R,G,B values */ \
|
||||||
__asm movq xmm3, qword ptr [eax] /* NOLINT */ \
|
__asm movq xmm3, qword ptr [eax] /* NOLINT */ \
|
||||||
__asm lea eax, [eax + 8] \
|
__asm lea eax, [eax + 8] \
|
||||||
__asm punpcklbw xmm3, xmm3 \
|
__asm punpcklbw xmm3, xmm3 \
|
||||||
__asm pmulhuw xmm3, kYToRgb \
|
__asm pmulhuw xmm3, YuvConstants.kYToRgb \
|
||||||
__asm paddsw xmm0, xmm3 /* B += Y */ \
|
|
||||||
__asm paddsw xmm1, xmm3 /* G += Y */ \
|
|
||||||
__asm paddsw xmm2, xmm3 /* R += Y */ \
|
|
||||||
__asm psraw xmm0, 6 \
|
|
||||||
__asm psraw xmm1, 6 \
|
|
||||||
__asm psraw xmm2, 6 \
|
|
||||||
__asm packuswb xmm0, xmm0 /* B */ \
|
|
||||||
__asm packuswb xmm1, xmm1 /* G */ \
|
|
||||||
__asm packuswb xmm2, xmm2 /* R */ \
|
|
||||||
}
|
|
||||||
|
|
||||||
// Convert 8 pixels: 8 VU and 8 Y.
|
|
||||||
#define YVUTORGB __asm { \
|
|
||||||
/* Step 1: Find 4 UV contributions to 8 R,G,B values */ \
|
|
||||||
__asm movdqa xmm1, xmm0 \
|
|
||||||
__asm movdqa xmm2, xmm0 \
|
|
||||||
__asm movdqa xmm3, xmm0 \
|
|
||||||
__asm movdqa xmm0, kUVBiasB /* unbias back to signed */ \
|
|
||||||
__asm pmaddubsw xmm1, kVUToB /* scale B UV */ \
|
|
||||||
__asm psubw xmm0, xmm1 \
|
|
||||||
__asm movdqa xmm1, kUVBiasG \
|
|
||||||
__asm pmaddubsw xmm2, kVUToG /* scale G UV */ \
|
|
||||||
__asm psubw xmm1, xmm2 \
|
|
||||||
__asm movdqa xmm2, kUVBiasR \
|
|
||||||
__asm pmaddubsw xmm3, kVUToR /* scale R UV */ \
|
|
||||||
__asm psubw xmm2, xmm3 \
|
|
||||||
/* Step 2: Find Y contribution to 8 R,G,B values */ \
|
|
||||||
__asm movq xmm3, qword ptr [eax] /* NOLINT */ \
|
|
||||||
__asm lea eax, [eax + 8] \
|
|
||||||
__asm punpcklbw xmm3, xmm3 \
|
|
||||||
__asm pmulhuw xmm3, kYToRgb \
|
|
||||||
__asm paddsw xmm0, xmm3 /* B += Y */ \
|
__asm paddsw xmm0, xmm3 /* B += Y */ \
|
||||||
__asm paddsw xmm1, xmm3 /* G += Y */ \
|
__asm paddsw xmm1, xmm3 /* G += Y */ \
|
||||||
__asm paddsw xmm2, xmm3 /* R += Y */ \
|
__asm paddsw xmm2, xmm3 /* R += Y */ \
|
||||||
@ -1842,7 +1826,7 @@ void I444ToARGBRow_SSSE3(const uint8* y_buf,
|
|||||||
|
|
||||||
convertloop:
|
convertloop:
|
||||||
READYUV444
|
READYUV444
|
||||||
YUVTORGB
|
YUVTORGB(kYuvConstants)
|
||||||
|
|
||||||
// Step 3: Weave into ARGB
|
// Step 3: Weave into ARGB
|
||||||
punpcklbw xmm0, xmm1 // BG
|
punpcklbw xmm0, xmm1 // BG
|
||||||
@ -1884,7 +1868,7 @@ void I422ToRGB24Row_SSSE3(const uint8* y_buf,
|
|||||||
|
|
||||||
convertloop:
|
convertloop:
|
||||||
READYUV422
|
READYUV422
|
||||||
YUVTORGB
|
YUVTORGB(kYuvConstants)
|
||||||
|
|
||||||
// Step 3: Weave into RRGB
|
// Step 3: Weave into RRGB
|
||||||
punpcklbw xmm0, xmm1 // BG
|
punpcklbw xmm0, xmm1 // BG
|
||||||
@ -1929,7 +1913,7 @@ void I422ToRAWRow_SSSE3(const uint8* y_buf,
|
|||||||
|
|
||||||
convertloop:
|
convertloop:
|
||||||
READYUV422
|
READYUV422
|
||||||
YUVTORGB
|
YUVTORGB(kYuvConstants)
|
||||||
|
|
||||||
// Step 3: Weave into RRGB
|
// Step 3: Weave into RRGB
|
||||||
punpcklbw xmm0, xmm1 // BG
|
punpcklbw xmm0, xmm1 // BG
|
||||||
@ -1979,7 +1963,7 @@ void I422ToRGB565Row_SSSE3(const uint8* y_buf,
|
|||||||
|
|
||||||
convertloop:
|
convertloop:
|
||||||
READYUV422
|
READYUV422
|
||||||
YUVTORGB
|
YUVTORGB(kYuvConstants)
|
||||||
|
|
||||||
// Step 3: Weave into RRGB
|
// Step 3: Weave into RRGB
|
||||||
punpcklbw xmm0, xmm1 // BG
|
punpcklbw xmm0, xmm1 // BG
|
||||||
@ -2044,7 +2028,7 @@ void I422ToARGBRow_SSSE3(const uint8* y_buf,
|
|||||||
|
|
||||||
convertloop:
|
convertloop:
|
||||||
READYUV422
|
READYUV422
|
||||||
YUVTORGB
|
YUVTORGB(kYuvConstants)
|
||||||
|
|
||||||
// Step 3: Weave into ARGB
|
// Step 3: Weave into ARGB
|
||||||
punpcklbw xmm0, xmm1 // BG
|
punpcklbw xmm0, xmm1 // BG
|
||||||
@ -2087,7 +2071,7 @@ void I411ToARGBRow_SSSE3(const uint8* y_buf,
|
|||||||
|
|
||||||
convertloop:
|
convertloop:
|
||||||
READYUV411 // modifies EBX
|
READYUV411 // modifies EBX
|
||||||
YUVTORGB
|
YUVTORGB(kYuvConstants)
|
||||||
|
|
||||||
// Step 3: Weave into ARGB
|
// Step 3: Weave into ARGB
|
||||||
punpcklbw xmm0, xmm1 // BG
|
punpcklbw xmm0, xmm1 // BG
|
||||||
@ -2125,7 +2109,7 @@ void NV12ToARGBRow_SSSE3(const uint8* y_buf,
|
|||||||
|
|
||||||
convertloop:
|
convertloop:
|
||||||
READNV12
|
READNV12
|
||||||
YUVTORGB
|
YUVTORGB(kYuvConstants)
|
||||||
|
|
||||||
// Step 3: Weave into ARGB
|
// Step 3: Weave into ARGB
|
||||||
punpcklbw xmm0, xmm1 // BG
|
punpcklbw xmm0, xmm1 // BG
|
||||||
@ -2145,7 +2129,7 @@ void NV12ToARGBRow_SSSE3(const uint8* y_buf,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// 8 pixels.
|
// 8 pixels.
|
||||||
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
|
// 4 VU values upsampled to 8 VU, mixed with 8 Y producing 8 ARGB (32 bytes).
|
||||||
__declspec(naked) __declspec(align(16))
|
__declspec(naked) __declspec(align(16))
|
||||||
void NV21ToARGBRow_SSSE3(const uint8* y_buf,
|
void NV21ToARGBRow_SSSE3(const uint8* y_buf,
|
||||||
const uint8* uv_buf,
|
const uint8* uv_buf,
|
||||||
@ -2154,14 +2138,14 @@ void NV21ToARGBRow_SSSE3(const uint8* y_buf,
|
|||||||
__asm {
|
__asm {
|
||||||
push esi
|
push esi
|
||||||
mov eax, [esp + 4 + 4] // Y
|
mov eax, [esp + 4 + 4] // Y
|
||||||
mov esi, [esp + 4 + 8] // VU
|
mov esi, [esp + 4 + 8] // UV
|
||||||
mov edx, [esp + 4 + 12] // argb
|
mov edx, [esp + 4 + 12] // argb
|
||||||
mov ecx, [esp + 4 + 16] // width
|
mov ecx, [esp + 4 + 16] // width
|
||||||
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
|
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
|
||||||
|
|
||||||
convertloop:
|
convertloop:
|
||||||
READNV12
|
READNV12
|
||||||
YVUTORGB
|
YUVTORGB(kYvuConstants)
|
||||||
|
|
||||||
// Step 3: Weave into ARGB
|
// Step 3: Weave into ARGB
|
||||||
punpcklbw xmm0, xmm1 // BG
|
punpcklbw xmm0, xmm1 // BG
|
||||||
@ -2198,7 +2182,7 @@ void I422ToBGRARow_SSSE3(const uint8* y_buf,
|
|||||||
|
|
||||||
convertloop:
|
convertloop:
|
||||||
READYUV422
|
READYUV422
|
||||||
YUVTORGB
|
YUVTORGB(kYuvConstants)
|
||||||
|
|
||||||
// Step 3: Weave into BGRA
|
// Step 3: Weave into BGRA
|
||||||
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
|
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
|
||||||
@ -2238,7 +2222,7 @@ void I422ToABGRRow_SSSE3(const uint8* y_buf,
|
|||||||
|
|
||||||
convertloop:
|
convertloop:
|
||||||
READYUV422
|
READYUV422
|
||||||
YUVTORGB
|
YUVTORGB(kYuvConstants)
|
||||||
|
|
||||||
// Step 3: Weave into ARGB
|
// Step 3: Weave into ARGB
|
||||||
punpcklbw xmm2, xmm1 // RG
|
punpcklbw xmm2, xmm1 // RG
|
||||||
@ -2276,7 +2260,7 @@ void I422ToRGBARow_SSSE3(const uint8* y_buf,
|
|||||||
|
|
||||||
convertloop:
|
convertloop:
|
||||||
READYUV422
|
READYUV422
|
||||||
YUVTORGB
|
YUVTORGB(kYuvConstants)
|
||||||
|
|
||||||
// Step 3: Weave into RGBA
|
// Step 3: Weave into RGBA
|
||||||
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
|
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user