Add a macro for YUV to RGB on Windows. Allows multiple color matrix structures in the future.

BUG=393
TESTED=local build
R=brucedawson@google.com, harryjin@google.com

Review URL: https://webrtc-codereview.appspot.com/38079004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@1275 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
fbarchard@google.com 2015-02-10 23:03:37 +00:00
parent 97a3850ea4
commit 3bb829a44f
2 changed files with 85 additions and 101 deletions

View File

@ -1499,23 +1499,23 @@ static YuvConstants SIMD_ALIGNED(kYvuConstants) = {
"punpcklwd %%xmm0,%%xmm0 \n" "punpcklwd %%xmm0,%%xmm0 \n"
// Convert 8 pixels: 8 UV and 8 Y // Convert 8 pixels: 8 UV and 8 Y
#define YUVTORGB(kYuvConstants) \ #define YUVTORGB(YuvConstants) \
"movdqa %%xmm0,%%xmm1 \n" \ "movdqa %%xmm0,%%xmm1 \n" \
"movdqa %%xmm0,%%xmm2 \n" \ "movdqa %%xmm0,%%xmm2 \n" \
"movdqa %%xmm0,%%xmm3 \n" \ "movdqa %%xmm0,%%xmm3 \n" \
"movdqa " MEMACCESS2(96, [kYuvConstants]) ",%%xmm0 \n" \ "movdqa " MEMACCESS2(96, [YuvConstants]) ",%%xmm0 \n" \
"pmaddubsw " MEMACCESS([kYuvConstants]) ",%%xmm1 \n" \ "pmaddubsw " MEMACCESS([YuvConstants]) ",%%xmm1 \n" \
"psubw %%xmm1,%%xmm0 \n" \ "psubw %%xmm1,%%xmm0 \n" \
"movdqa " MEMACCESS2(128, [kYuvConstants]) ",%%xmm1 \n" \ "movdqa " MEMACCESS2(128, [YuvConstants]) ",%%xmm1 \n" \
"pmaddubsw " MEMACCESS2(32, [kYuvConstants]) ",%%xmm2 \n" \ "pmaddubsw " MEMACCESS2(32, [YuvConstants]) ",%%xmm2 \n" \
"psubw %%xmm2,%%xmm1 \n" \ "psubw %%xmm2,%%xmm1 \n" \
"movdqa " MEMACCESS2(160, [kYuvConstants]) ",%%xmm2 \n" \ "movdqa " MEMACCESS2(160, [YuvConstants]) ",%%xmm2 \n" \
"pmaddubsw " MEMACCESS2(64, [kYuvConstants]) ",%%xmm3 \n" \ "pmaddubsw " MEMACCESS2(64, [YuvConstants]) ",%%xmm3 \n" \
"psubw %%xmm3,%%xmm2 \n" \ "psubw %%xmm3,%%xmm2 \n" \
"movq " MEMACCESS([y_buf]) ",%%xmm3 \n" \ "movq " MEMACCESS([y_buf]) ",%%xmm3 \n" \
"lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" \ "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" \
"punpcklbw %%xmm3,%%xmm3 \n" \ "punpcklbw %%xmm3,%%xmm3 \n" \
"pmulhuw " MEMACCESS2(192, [kYuvConstants]) ",%%xmm3 \n" \ "pmulhuw " MEMACCESS2(192, [YuvConstants]) ",%%xmm3 \n" \
"paddsw %%xmm3,%%xmm0 \n" \ "paddsw %%xmm3,%%xmm0 \n" \
"paddsw %%xmm3,%%xmm1 \n" \ "paddsw %%xmm3,%%xmm1 \n" \
"paddsw %%xmm3,%%xmm2 \n" \ "paddsw %%xmm3,%%xmm2 \n" \
@ -1887,21 +1887,21 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf,
"vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n"
// Convert 16 pixels: 16 UV and 16 Y. // Convert 16 pixels: 16 UV and 16 Y.
#define YUVTORGB_AVX2(kYuvConstants) \ #define YUVTORGB_AVX2(YuvConstants) \
"vpmaddubsw " MEMACCESS2(64, [kYuvConstants]) ",%%ymm0,%%ymm2 \n" \ "vpmaddubsw " MEMACCESS2(64, [YuvConstants]) ",%%ymm0,%%ymm2 \n" \
"vpmaddubsw " MEMACCESS2(32, [kYuvConstants]) ",%%ymm0,%%ymm1 \n" \ "vpmaddubsw " MEMACCESS2(32, [YuvConstants]) ",%%ymm0,%%ymm1 \n" \
"vpmaddubsw " MEMACCESS([kYuvConstants]) ",%%ymm0,%%ymm0 \n" \ "vpmaddubsw " MEMACCESS([YuvConstants]) ",%%ymm0,%%ymm0 \n" \
"vmovdqu " MEMACCESS2(160, [kYuvConstants]) ",%%ymm3 \n" \ "vmovdqu " MEMACCESS2(160, [YuvConstants]) ",%%ymm3 \n" \
"vpsubw %%ymm2,%%ymm3,%%ymm2 \n" \ "vpsubw %%ymm2,%%ymm3,%%ymm2 \n" \
"vmovdqu " MEMACCESS2(128, [kYuvConstants]) ",%%ymm2 \n" \ "vmovdqu " MEMACCESS2(128, [YuvConstants]) ",%%ymm2 \n" \
"vpsubw %%ymm1,%%ymm2,%%ymm1 \n" \ "vpsubw %%ymm1,%%ymm2,%%ymm1 \n" \
"vmovdqu " MEMACCESS2(96, [kYuvConstants]) ",%%ymm1 \n" \ "vmovdqu " MEMACCESS2(96, [YuvConstants]) ",%%ymm1 \n" \
"vpsubw %%ymm0,%%ymm1,%%ymm0 \n" \ "vpsubw %%ymm0,%%ymm1,%%ymm0 \n" \
"vmovdqu " MEMACCESS([y_buf]) ",%%xmm3 \n" \ "vmovdqu " MEMACCESS([y_buf]) ",%%xmm3 \n" \
"lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n" \ "lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n" \
"vpermq $0xd8,%%ymm3,%%ymm3 \n" \ "vpermq $0xd8,%%ymm3,%%ymm3 \n" \
"vpunpcklbw %%ymm3,%%ymm3,%%ymm3 \n" \ "vpunpcklbw %%ymm3,%%ymm3,%%ymm3 \n" \
"vpmulhuw " MEMACCESS2(192, [kYuvConstants]) ",%%ymm3,%%ymm3 \n" \ "vpmulhuw " MEMACCESS2(192, [YuvConstants]) ",%%ymm3,%%ymm3 \n" \
"vpaddsw %%ymm3,%%ymm0,%%ymm0 \n" \ "vpaddsw %%ymm3,%%ymm0,%%ymm0 \n" \
"vpaddsw %%ymm3,%%ymm1,%%ymm1 \n" \ "vpaddsw %%ymm3,%%ymm1,%%ymm1 \n" \
"vpaddsw %%ymm3,%%ymm2,%%ymm2 \n" \ "vpaddsw %%ymm3,%%ymm2,%%ymm2 \n" \

View File

@ -1472,30 +1472,45 @@ void RGBAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
#endif // HAS_ARGBTOYROW_SSSE3 #endif // HAS_ARGBTOYROW_SSSE3
#if defined(HAS_I422TOARGBROW_AVX2) || defined(HAS_I422TOBGRAROW_AVX2) #if defined(HAS_I422TOARGBROW_AVX2) || defined(HAS_I422TOBGRAROW_AVX2)
static const lvec8 kUVToB_AVX = {
UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, struct YuvConstants {
UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0 lvec8 kUVToB; // 0
lvec8 kUVToG; // 32
lvec8 kUVToR; // 64
lvec16 kUVBiasB; // 96
lvec16 kUVBiasG; // 128
lvec16 kUVBiasR; // 160
lvec16 kYToRgb; // 192
}; };
static const lvec8 kUVToR_AVX = {
0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, // BT601 constants for YUV to RGB.
0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR static YuvConstants SIMD_ALIGNED(kYuvConstants) = {
{ UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0 },
{ UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,
UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG },
{ 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR,
0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR },
{ BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB },
{ BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG },
{ BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR },
{ YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG }
}; };
static const lvec8 kUVToG_AVX = {
UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, // BT601 constants for NV21 where chroma plane is VU instead of UV.
UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG static YuvConstants SIMD_ALIGNED(kYvuConstants) = {
}; { 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB,
static const lvec16 kYToRgb_AVX = { 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB },
YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG { VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
}; VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG },
static const lvec16 kUVBiasB_AVX = { { VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0,
BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0 },
}; { BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB },
static const lvec16 kUVBiasG_AVX = { { BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG },
BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG { BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR },
}; { YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG }
static const lvec16 kUVBiasR_AVX = {
BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR
}; };
#endif // defined(HAS_I422TOARGBROW_AVX2) || defined(HAS_I422TOBGRAROW_AVX2) #endif // defined(HAS_I422TOARGBROW_AVX2) || defined(HAS_I422TOBGRAROW_AVX2)
// Read 8 UV from 422, upsample to 16 UV. // Read 8 UV from 422, upsample to 16 UV.
@ -1509,23 +1524,23 @@ static const lvec16 kUVBiasR_AVX = {
} }
// Convert 16 pixels: 16 UV and 16 Y. // Convert 16 pixels: 16 UV and 16 Y.
#define YUVTORGB_AVX2 __asm { \ #define YUVTORGB_AVX2(YuvConstants) __asm { \
/* Step 1: Find 8 UV contributions to 16 R,G,B values */ \ /* Step 1: Find 8 UV contributions to 16 R,G,B values */ \
__asm vpmaddubsw ymm2, ymm0, kUVToR_AVX /* scale R UV */ \ __asm vpmaddubsw ymm2, ymm0, YuvConstants.kUVToR /* scale R UV */ \
__asm vpmaddubsw ymm1, ymm0, kUVToG_AVX /* scale G UV */ \ __asm vpmaddubsw ymm1, ymm0, YuvConstants.kUVToG /* scale G UV */ \
__asm vpmaddubsw ymm0, ymm0, kUVToB_AVX /* scale B UV */ \ __asm vpmaddubsw ymm0, ymm0, YuvConstants.kUVToB /* scale B UV */ \
__asm vmovdqu ymm3, kUVBiasR_AVX \ __asm vmovdqu ymm3, YuvConstants.kUVBiasR \
__asm vpsubw ymm2, ymm3, ymm2 \ __asm vpsubw ymm2, ymm3, ymm2 \
__asm vmovdqu ymm3, kUVBiasG_AVX \ __asm vmovdqu ymm3, YuvConstants.kUVBiasG \
__asm vpsubw ymm1, ymm3, ymm1 \ __asm vpsubw ymm1, ymm3, ymm1 \
__asm vmovdqu ymm3, kUVBiasB_AVX \ __asm vmovdqu ymm3, YuvConstants.kUVBiasB \
__asm vpsubw ymm0, ymm3, ymm0 \ __asm vpsubw ymm0, ymm3, ymm0 \
/* Step 2: Find Y contribution to 16 R,G,B values */ \ /* Step 2: Find Y contribution to 16 R,G,B values */ \
__asm vmovdqu xmm3, [eax] /* NOLINT */ \ __asm vmovdqu xmm3, [eax] /* NOLINT */ \
__asm lea eax, [eax + 16] \ __asm lea eax, [eax + 16] \
__asm vpermq ymm3, ymm3, 0xd8 \ __asm vpermq ymm3, ymm3, 0xd8 \
__asm vpunpcklbw ymm3, ymm3, ymm3 \ __asm vpunpcklbw ymm3, ymm3, ymm3 \
__asm vpmulhuw ymm3, ymm3, kYToRgb_AVX \ __asm vpmulhuw ymm3, ymm3, YuvConstants.kYToRgb \
__asm vpaddsw ymm0, ymm0, ymm3 /* B += Y */ \ __asm vpaddsw ymm0, ymm0, ymm3 /* B += Y */ \
__asm vpaddsw ymm1, ymm1, ymm3 /* G += Y */ \ __asm vpaddsw ymm1, ymm1, ymm3 /* G += Y */ \
__asm vpaddsw ymm2, ymm2, ymm3 /* R += Y */ \ __asm vpaddsw ymm2, ymm2, ymm3 /* R += Y */ \
@ -1559,7 +1574,7 @@ void I422ToARGBRow_AVX2(const uint8* y_buf,
convertloop: convertloop:
READYUV422_AVX2 READYUV422_AVX2
YUVTORGB_AVX2 YUVTORGB_AVX2(kYuvConstants)
// Step 3: Weave into ARGB // Step 3: Weave into ARGB
vpunpcklbw ymm0, ymm0, ymm1 // BG vpunpcklbw ymm0, ymm0, ymm1 // BG
@ -1605,7 +1620,7 @@ void I422ToBGRARow_AVX2(const uint8* y_buf,
convertloop: convertloop:
READYUV422_AVX2 READYUV422_AVX2
YUVTORGB_AVX2 YUVTORGB_AVX2(kYuvConstants)
// Step 3: Weave into BGRA // Step 3: Weave into BGRA
vpunpcklbw ymm1, ymm1, ymm0 // GB vpunpcklbw ymm1, ymm1, ymm0 // GB
@ -1651,7 +1666,7 @@ void I422ToRGBARow_AVX2(const uint8* y_buf,
convertloop: convertloop:
READYUV422_AVX2 READYUV422_AVX2
YUVTORGB_AVX2 YUVTORGB_AVX2(kYuvConstants)
// Step 3: Weave into RGBA // Step 3: Weave into RGBA
vpunpcklbw ymm1, ymm1, ymm2 // GR vpunpcklbw ymm1, ymm1, ymm2 // GR
@ -1697,7 +1712,7 @@ void I422ToABGRRow_AVX2(const uint8* y_buf,
convertloop: convertloop:
READYUV422_AVX2 READYUV422_AVX2
YUVTORGB_AVX2 YUVTORGB_AVX2(kYuvConstants)
// Step 3: Weave into ABGR // Step 3: Weave into ABGR
vpunpcklbw ymm1, ymm2, ymm1 // RG vpunpcklbw ymm1, ymm2, ymm1 // RG
@ -1760,56 +1775,25 @@ void I422ToABGRRow_AVX2(const uint8* y_buf,
} }
// Convert 8 pixels: 8 UV and 8 Y. // Convert 8 pixels: 8 UV and 8 Y.
#define YUVTORGB __asm { \ #define YUVTORGB(YuvConstants) __asm { \
/* Step 1: Find 4 UV contributions to 8 R,G,B values */ \ /* Step 1: Find 4 UV contributions to 8 R,G,B values */ \
__asm movdqa xmm1, xmm0 \ __asm movdqa xmm1, xmm0 \
__asm movdqa xmm2, xmm0 \ __asm movdqa xmm2, xmm0 \
__asm movdqa xmm3, xmm0 \ __asm movdqa xmm3, xmm0 \
__asm movdqa xmm0, kUVBiasB /* unbias back to signed */ \ __asm movdqa xmm0, YuvConstants.kUVBiasB /* unbias back to signed */ \
__asm pmaddubsw xmm1, kUVToB /* scale B UV */ \ __asm pmaddubsw xmm1, YuvConstants.kUVToB /* scale B UV */ \
__asm psubw xmm0, xmm1 \ __asm psubw xmm0, xmm1 \
__asm movdqa xmm1, kUVBiasG \ __asm movdqa xmm1, YuvConstants.kUVBiasG \
__asm pmaddubsw xmm2, kUVToG /* scale G UV */ \ __asm pmaddubsw xmm2, YuvConstants.kUVToG /* scale G UV */ \
__asm psubw xmm1, xmm2 \ __asm psubw xmm1, xmm2 \
__asm movdqa xmm2, kUVBiasR \ __asm movdqa xmm2, YuvConstants.kUVBiasR \
__asm pmaddubsw xmm3, kUVToR /* scale R UV */ \ __asm pmaddubsw xmm3, YuvConstants.kUVToR /* scale R UV */ \
__asm psubw xmm2, xmm3 \ __asm psubw xmm2, xmm3 \
/* Step 2: Find Y contribution to 8 R,G,B values */ \ /* Step 2: Find Y contribution to 8 R,G,B values */ \
__asm movq xmm3, qword ptr [eax] /* NOLINT */ \ __asm movq xmm3, qword ptr [eax] /* NOLINT */ \
__asm lea eax, [eax + 8] \ __asm lea eax, [eax + 8] \
__asm punpcklbw xmm3, xmm3 \ __asm punpcklbw xmm3, xmm3 \
__asm pmulhuw xmm3, kYToRgb \ __asm pmulhuw xmm3, YuvConstants.kYToRgb \
__asm paddsw xmm0, xmm3 /* B += Y */ \
__asm paddsw xmm1, xmm3 /* G += Y */ \
__asm paddsw xmm2, xmm3 /* R += Y */ \
__asm psraw xmm0, 6 \
__asm psraw xmm1, 6 \
__asm psraw xmm2, 6 \
__asm packuswb xmm0, xmm0 /* B */ \
__asm packuswb xmm1, xmm1 /* G */ \
__asm packuswb xmm2, xmm2 /* R */ \
}
// Convert 8 pixels: 8 VU and 8 Y.
#define YVUTORGB __asm { \
/* Step 1: Find 4 UV contributions to 8 R,G,B values */ \
__asm movdqa xmm1, xmm0 \
__asm movdqa xmm2, xmm0 \
__asm movdqa xmm3, xmm0 \
__asm movdqa xmm0, kUVBiasB /* unbias back to signed */ \
__asm pmaddubsw xmm1, kVUToB /* scale B UV */ \
__asm psubw xmm0, xmm1 \
__asm movdqa xmm1, kUVBiasG \
__asm pmaddubsw xmm2, kVUToG /* scale G UV */ \
__asm psubw xmm1, xmm2 \
__asm movdqa xmm2, kUVBiasR \
__asm pmaddubsw xmm3, kVUToR /* scale R UV */ \
__asm psubw xmm2, xmm3 \
/* Step 2: Find Y contribution to 8 R,G,B values */ \
__asm movq xmm3, qword ptr [eax] /* NOLINT */ \
__asm lea eax, [eax + 8] \
__asm punpcklbw xmm3, xmm3 \
__asm pmulhuw xmm3, kYToRgb \
__asm paddsw xmm0, xmm3 /* B += Y */ \ __asm paddsw xmm0, xmm3 /* B += Y */ \
__asm paddsw xmm1, xmm3 /* G += Y */ \ __asm paddsw xmm1, xmm3 /* G += Y */ \
__asm paddsw xmm2, xmm3 /* R += Y */ \ __asm paddsw xmm2, xmm3 /* R += Y */ \
@ -1842,7 +1826,7 @@ void I444ToARGBRow_SSSE3(const uint8* y_buf,
convertloop: convertloop:
READYUV444 READYUV444
YUVTORGB YUVTORGB(kYuvConstants)
// Step 3: Weave into ARGB // Step 3: Weave into ARGB
punpcklbw xmm0, xmm1 // BG punpcklbw xmm0, xmm1 // BG
@ -1884,7 +1868,7 @@ void I422ToRGB24Row_SSSE3(const uint8* y_buf,
convertloop: convertloop:
READYUV422 READYUV422
YUVTORGB YUVTORGB(kYuvConstants)
// Step 3: Weave into RRGB // Step 3: Weave into RRGB
punpcklbw xmm0, xmm1 // BG punpcklbw xmm0, xmm1 // BG
@ -1929,7 +1913,7 @@ void I422ToRAWRow_SSSE3(const uint8* y_buf,
convertloop: convertloop:
READYUV422 READYUV422
YUVTORGB YUVTORGB(kYuvConstants)
// Step 3: Weave into RRGB // Step 3: Weave into RRGB
punpcklbw xmm0, xmm1 // BG punpcklbw xmm0, xmm1 // BG
@ -1979,7 +1963,7 @@ void I422ToRGB565Row_SSSE3(const uint8* y_buf,
convertloop: convertloop:
READYUV422 READYUV422
YUVTORGB YUVTORGB(kYuvConstants)
// Step 3: Weave into RRGB // Step 3: Weave into RRGB
punpcklbw xmm0, xmm1 // BG punpcklbw xmm0, xmm1 // BG
@ -2044,7 +2028,7 @@ void I422ToARGBRow_SSSE3(const uint8* y_buf,
convertloop: convertloop:
READYUV422 READYUV422
YUVTORGB YUVTORGB(kYuvConstants)
// Step 3: Weave into ARGB // Step 3: Weave into ARGB
punpcklbw xmm0, xmm1 // BG punpcklbw xmm0, xmm1 // BG
@ -2087,7 +2071,7 @@ void I411ToARGBRow_SSSE3(const uint8* y_buf,
convertloop: convertloop:
READYUV411 // modifies EBX READYUV411 // modifies EBX
YUVTORGB YUVTORGB(kYuvConstants)
// Step 3: Weave into ARGB // Step 3: Weave into ARGB
punpcklbw xmm0, xmm1 // BG punpcklbw xmm0, xmm1 // BG
@ -2125,7 +2109,7 @@ void NV12ToARGBRow_SSSE3(const uint8* y_buf,
convertloop: convertloop:
READNV12 READNV12
YUVTORGB YUVTORGB(kYuvConstants)
// Step 3: Weave into ARGB // Step 3: Weave into ARGB
punpcklbw xmm0, xmm1 // BG punpcklbw xmm0, xmm1 // BG
@ -2145,7 +2129,7 @@ void NV12ToARGBRow_SSSE3(const uint8* y_buf,
} }
// 8 pixels. // 8 pixels.
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). // 4 VU values upsampled to 8 VU, mixed with 8 Y producing 8 ARGB (32 bytes).
__declspec(naked) __declspec(align(16)) __declspec(naked) __declspec(align(16))
void NV21ToARGBRow_SSSE3(const uint8* y_buf, void NV21ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* uv_buf, const uint8* uv_buf,
@ -2154,14 +2138,14 @@ void NV21ToARGBRow_SSSE3(const uint8* y_buf,
__asm { __asm {
push esi push esi
mov eax, [esp + 4 + 4] // Y mov eax, [esp + 4 + 4] // Y
mov esi, [esp + 4 + 8] // VU mov esi, [esp + 4 + 8] // UV
mov edx, [esp + 4 + 12] // argb mov edx, [esp + 4 + 12] // argb
mov ecx, [esp + 4 + 16] // width mov ecx, [esp + 4 + 16] // width
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
convertloop: convertloop:
READNV12 READNV12
YVUTORGB YUVTORGB(kYvuConstants)
// Step 3: Weave into ARGB // Step 3: Weave into ARGB
punpcklbw xmm0, xmm1 // BG punpcklbw xmm0, xmm1 // BG
@ -2198,7 +2182,7 @@ void I422ToBGRARow_SSSE3(const uint8* y_buf,
convertloop: convertloop:
READYUV422 READYUV422
YUVTORGB YUVTORGB(kYuvConstants)
// Step 3: Weave into BGRA // Step 3: Weave into BGRA
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
@ -2238,7 +2222,7 @@ void I422ToABGRRow_SSSE3(const uint8* y_buf,
convertloop: convertloop:
READYUV422 READYUV422
YUVTORGB YUVTORGB(kYuvConstants)
// Step 3: Weave into ARGB // Step 3: Weave into ARGB
punpcklbw xmm2, xmm1 // RG punpcklbw xmm2, xmm1 // RG
@ -2276,7 +2260,7 @@ void I422ToRGBARow_SSSE3(const uint8* y_buf,
convertloop: convertloop:
READYUV422 READYUV422
YUVTORGB YUVTORGB(kYuvConstants)
// Step 3: Weave into RGBA // Step 3: Weave into RGBA
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha