mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 01:06:46 +08:00
I422ToRGB24Row_SSSE3 in 1 pass. Internally converts to ARGB then packs down to RGB.
BUG=116 TEST=libyuv unittest Review URL: https://webrtc-codereview.appspot.com/863013 git-svn-id: http://libyuv.googlecode.com/svn/trunk@399 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
6b5a8efff7
commit
827de16bb1
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 398
|
||||
Version: 399
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -100,6 +100,8 @@ extern "C" {
|
||||
#define HAS_RGBATOARGBROW_SSSE3
|
||||
#define HAS_RGBATOUVROW_SSSE3
|
||||
#define HAS_RGBATOYROW_SSSE3
|
||||
#define HAS_I422TORGB24ROW_SSSE3
|
||||
#define HAS_I422TORAWROW_SSSE3
|
||||
#endif
|
||||
|
||||
// The following are disabled when SSSE3 is available:
|
||||
@ -436,6 +438,19 @@ void I422ToRGBARow_SSSE3(const uint8* y_buf,
|
||||
uint8* rgba_buf,
|
||||
int width);
|
||||
|
||||
// RGB24/RAW are unaligned.
|
||||
void I422ToRGB24Row_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
|
||||
void I422ToRAWRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
|
||||
void I444ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
@ -528,6 +543,19 @@ void I422ToRGBARow_Any_SSSE3(const uint8* y_buf,
|
||||
uint8* rgba_buf,
|
||||
int width);
|
||||
|
||||
// RGB24/RAW are unaligned.
|
||||
void I422ToRGB24Row_Any_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
|
||||
void I422ToRAWRow_Any_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
|
||||
void YToARGBRow_SSE2(const uint8* y_buf,
|
||||
uint8* argb_buf,
|
||||
int width);
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 398
|
||||
#define LIBYUV_VERSION 399
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||
|
||||
@ -928,12 +928,9 @@ int I420ToRGB24(const uint8* src_y, int src_stride_y,
|
||||
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
|
||||
I422ToRGB24Row = I422ToRGB24Row_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I422ToRGB24Row = I422ToRGB24Row_Unaligned_SSSE3;
|
||||
if (IS_ALIGNED(dst_rgb24, 16) && IS_ALIGNED(dst_stride_rgb24, 16)) {
|
||||
I422ToRGB24Row = I422ToRGB24Row_SSSE3;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (int y = 0; y < height; ++y) {
|
||||
@ -982,12 +979,9 @@ int I420ToRAW(const uint8* src_y, int src_stride_y,
|
||||
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
|
||||
I422ToRAWRow = I422ToRAWRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I422ToRAWRow = I422ToRAWRow_Unaligned_SSSE3;
|
||||
if (IS_ALIGNED(dst_raw, 16) && IS_ALIGNED(dst_stride_raw, 16)) {
|
||||
I422ToRAWRow = I422ToRAWRow_SSSE3;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (int y = 0; y < height; ++y) {
|
||||
|
||||
@ -1023,9 +1023,9 @@ YANY(I422ToBGRARow_Any_SSSE3, I422ToBGRARow_Unaligned_SSSE3, I422ToBGRARow_C, 1)
|
||||
YANY(I422ToABGRRow_Any_SSSE3, I422ToABGRRow_Unaligned_SSSE3, I422ToABGRRow_C, 1)
|
||||
#endif
|
||||
#ifdef HAS_I422TORGB24ROW_SSSE3
|
||||
YANY(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_Unaligned_SSSE3, \
|
||||
I422ToRGB24Row_C, 1)
|
||||
YANY(I422ToRAWRow_Any_SSSE3, I422ToRAWRow_Unaligned_SSSE3, I422ToRAWRow_C, 1)
|
||||
// I422ToRGB24Row_SSSE3 is unaligned.
|
||||
YANY(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, I422ToRGB24Row_C, 1)
|
||||
YANY(I422ToRAWRow_Any_SSSE3, I422ToRAWRow_SSSE3, I422ToRAWRow_C, 1)
|
||||
#endif
|
||||
#ifdef HAS_I422TORGBAROW_SSSE3
|
||||
YANY(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_Unaligned_SSSE3, I422ToRGBARow_C, 1)
|
||||
|
||||
@ -122,6 +122,16 @@ static const uvec8 kShuffleMaskARGBToRAW = {
|
||||
2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 8u, 14u, 13u, 12u, 128u, 128u, 128u, 128u
|
||||
};
|
||||
|
||||
// Shuffle table for converting ARGBToRGB24 for I420ToRGB24. First 8 + next 4
|
||||
static const uvec8 kShuffleMaskARGBToRGB24_0 = {
|
||||
0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 128u, 128u, 128u, 128u, 10u, 12u, 13u, 14u
|
||||
};
|
||||
|
||||
// Shuffle table for converting ARGB to RAW.
|
||||
static const uvec8 kShuffleMaskARGBToRAW_0 = {
|
||||
2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 128u, 128u, 128u, 128u, 8u, 14u, 13u, 12u
|
||||
};
|
||||
|
||||
__declspec(naked) __declspec(align(16))
|
||||
void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) {
|
||||
__asm {
|
||||
@ -1654,6 +1664,100 @@ void I444ToARGBRow_SSSE3(const uint8* y_buf,
|
||||
}
|
||||
}
|
||||
|
||||
// 8 pixels, dest aligned 16.
|
||||
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
|
||||
__declspec(naked) __declspec(align(16))
|
||||
void I422ToRGB24Row_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb24_buf,
|
||||
int width) {
|
||||
__asm {
|
||||
push esi
|
||||
push edi
|
||||
mov eax, [esp + 8 + 4] // Y
|
||||
mov esi, [esp + 8 + 8] // U
|
||||
mov edi, [esp + 8 + 12] // V
|
||||
mov edx, [esp + 8 + 16] // rgb24
|
||||
mov ecx, [esp + 8 + 20] // width
|
||||
sub edi, esi
|
||||
pxor xmm4, xmm4
|
||||
movdqa xmm5, kShuffleMaskARGBToRGB24_0
|
||||
movdqa xmm6, kShuffleMaskARGBToRGB24
|
||||
|
||||
align 16
|
||||
convertloop:
|
||||
READYUV422
|
||||
YUVTORGB
|
||||
|
||||
// Step 3: Weave into RRGB
|
||||
punpcklbw xmm0, xmm1 // BG
|
||||
punpcklbw xmm2, xmm2 // RR
|
||||
movdqa xmm1, xmm0
|
||||
punpcklwd xmm0, xmm2 // BGRR first 4 pixels
|
||||
punpckhwd xmm1, xmm2 // BGRR next 4 pixels
|
||||
pshufb xmm0, xmm5 // Pack into first 8 and last 4 bytes.
|
||||
pshufb xmm1, xmm6 // Pack into first 12 bytes.
|
||||
palignr xmm1, xmm0, 12 // last 4 bytes of xmm0 + 12 from xmm1
|
||||
movq qword ptr [edx], xmm0 // First 8 bytes
|
||||
movdqu [edx + 8], xmm1 // Last 16 bytes. = 24 bytes, 8 RGB pixels.
|
||||
lea edx, [edx + 24]
|
||||
sub ecx, 8
|
||||
jg convertloop
|
||||
|
||||
pop edi
|
||||
pop esi
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
// 8 pixels, dest aligned 16.
|
||||
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
|
||||
__declspec(naked) __declspec(align(16))
|
||||
void I422ToRAWRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* raw_buf,
|
||||
int width) {
|
||||
__asm {
|
||||
push esi
|
||||
push edi
|
||||
mov eax, [esp + 8 + 4] // Y
|
||||
mov esi, [esp + 8 + 8] // U
|
||||
mov edi, [esp + 8 + 12] // V
|
||||
mov edx, [esp + 8 + 16] // raw
|
||||
mov ecx, [esp + 8 + 20] // width
|
||||
sub edi, esi
|
||||
pxor xmm4, xmm4
|
||||
movdqa xmm5, kShuffleMaskARGBToRAW_0
|
||||
movdqa xmm6, kShuffleMaskARGBToRAW
|
||||
|
||||
align 16
|
||||
convertloop:
|
||||
READYUV422
|
||||
YUVTORGB
|
||||
|
||||
// Step 3: Weave into RRGB
|
||||
punpcklbw xmm0, xmm1 // BG
|
||||
punpcklbw xmm2, xmm2 // RR
|
||||
movdqa xmm1, xmm0
|
||||
punpcklwd xmm0, xmm2 // BGRR first 4 pixels
|
||||
punpckhwd xmm1, xmm2 // BGRR next 4 pixels
|
||||
pshufb xmm0, xmm5 // Pack into first 8 and last 4 bytes.
|
||||
pshufb xmm1, xmm6 // Pack into first 12 bytes.
|
||||
palignr xmm1, xmm0, 12 // last 4 bytes of xmm0 + 12 from xmm1
|
||||
movq qword ptr [edx], xmm0 // First 8 bytes
|
||||
movdqu [edx + 8], xmm1 // Last 16 bytes. = 24 bytes, 8 RGB pixels.
|
||||
lea edx, [edx + 24]
|
||||
sub ecx, 8
|
||||
jg convertloop
|
||||
|
||||
pop edi
|
||||
pop esi
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
// 8 pixels, dest aligned 16.
|
||||
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
|
||||
__declspec(naked) __declspec(align(16))
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user