mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 01:06:46 +08:00
I422ToRGB24Row_SSSE3 in 1 pass. Internally converts to ARGB then packs down to RGB.
BUG=116 TEST=libyuv unittest Review URL: https://webrtc-codereview.appspot.com/863013 git-svn-id: http://libyuv.googlecode.com/svn/trunk@399 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
6b5a8efff7
commit
827de16bb1
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: http://code.google.com/p/libyuv/
|
URL: http://code.google.com/p/libyuv/
|
||||||
Version: 398
|
Version: 399
|
||||||
License: BSD
|
License: BSD
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
|
|
||||||
|
|||||||
@ -100,6 +100,8 @@ extern "C" {
|
|||||||
#define HAS_RGBATOARGBROW_SSSE3
|
#define HAS_RGBATOARGBROW_SSSE3
|
||||||
#define HAS_RGBATOUVROW_SSSE3
|
#define HAS_RGBATOUVROW_SSSE3
|
||||||
#define HAS_RGBATOYROW_SSSE3
|
#define HAS_RGBATOYROW_SSSE3
|
||||||
|
#define HAS_I422TORGB24ROW_SSSE3
|
||||||
|
#define HAS_I422TORAWROW_SSSE3
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// The following are disabled when SSSE3 is available:
|
// The following are disabled when SSSE3 is available:
|
||||||
@ -436,6 +438,19 @@ void I422ToRGBARow_SSSE3(const uint8* y_buf,
|
|||||||
uint8* rgba_buf,
|
uint8* rgba_buf,
|
||||||
int width);
|
int width);
|
||||||
|
|
||||||
|
// RGB24/RAW are unaligned.
|
||||||
|
void I422ToRGB24Row_SSSE3(const uint8* y_buf,
|
||||||
|
const uint8* u_buf,
|
||||||
|
const uint8* v_buf,
|
||||||
|
uint8* rgb_buf,
|
||||||
|
int width);
|
||||||
|
|
||||||
|
void I422ToRAWRow_SSSE3(const uint8* y_buf,
|
||||||
|
const uint8* u_buf,
|
||||||
|
const uint8* v_buf,
|
||||||
|
uint8* rgb_buf,
|
||||||
|
int width);
|
||||||
|
|
||||||
void I444ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
|
void I444ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
|
||||||
const uint8* u_buf,
|
const uint8* u_buf,
|
||||||
const uint8* v_buf,
|
const uint8* v_buf,
|
||||||
@ -528,6 +543,19 @@ void I422ToRGBARow_Any_SSSE3(const uint8* y_buf,
|
|||||||
uint8* rgba_buf,
|
uint8* rgba_buf,
|
||||||
int width);
|
int width);
|
||||||
|
|
||||||
|
// RGB24/RAW are unaligned.
|
||||||
|
void I422ToRGB24Row_Any_SSSE3(const uint8* y_buf,
|
||||||
|
const uint8* u_buf,
|
||||||
|
const uint8* v_buf,
|
||||||
|
uint8* rgb_buf,
|
||||||
|
int width);
|
||||||
|
|
||||||
|
void I422ToRAWRow_Any_SSSE3(const uint8* y_buf,
|
||||||
|
const uint8* u_buf,
|
||||||
|
const uint8* v_buf,
|
||||||
|
uint8* rgb_buf,
|
||||||
|
int width);
|
||||||
|
|
||||||
void YToARGBRow_SSE2(const uint8* y_buf,
|
void YToARGBRow_SSE2(const uint8* y_buf,
|
||||||
uint8* argb_buf,
|
uint8* argb_buf,
|
||||||
int width);
|
int width);
|
||||||
|
|||||||
@ -11,6 +11,6 @@
|
|||||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||||
#define INCLUDE_LIBYUV_VERSION_H_
|
#define INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
#define LIBYUV_VERSION 398
|
#define LIBYUV_VERSION 399
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||||
|
|||||||
@ -928,10 +928,7 @@ int I420ToRGB24(const uint8* src_y, int src_stride_y,
|
|||||||
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
|
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
|
||||||
I422ToRGB24Row = I422ToRGB24Row_Any_SSSE3;
|
I422ToRGB24Row = I422ToRGB24Row_Any_SSSE3;
|
||||||
if (IS_ALIGNED(width, 8)) {
|
if (IS_ALIGNED(width, 8)) {
|
||||||
I422ToRGB24Row = I422ToRGB24Row_Unaligned_SSSE3;
|
I422ToRGB24Row = I422ToRGB24Row_SSSE3;
|
||||||
if (IS_ALIGNED(dst_rgb24, 16) && IS_ALIGNED(dst_stride_rgb24, 16)) {
|
|
||||||
I422ToRGB24Row = I422ToRGB24Row_SSSE3;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
@ -982,10 +979,7 @@ int I420ToRAW(const uint8* src_y, int src_stride_y,
|
|||||||
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
|
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
|
||||||
I422ToRAWRow = I422ToRAWRow_Any_SSSE3;
|
I422ToRAWRow = I422ToRAWRow_Any_SSSE3;
|
||||||
if (IS_ALIGNED(width, 8)) {
|
if (IS_ALIGNED(width, 8)) {
|
||||||
I422ToRAWRow = I422ToRAWRow_Unaligned_SSSE3;
|
I422ToRAWRow = I422ToRAWRow_SSSE3;
|
||||||
if (IS_ALIGNED(dst_raw, 16) && IS_ALIGNED(dst_stride_raw, 16)) {
|
|
||||||
I422ToRAWRow = I422ToRAWRow_SSSE3;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@ -1023,9 +1023,9 @@ YANY(I422ToBGRARow_Any_SSSE3, I422ToBGRARow_Unaligned_SSSE3, I422ToBGRARow_C, 1)
|
|||||||
YANY(I422ToABGRRow_Any_SSSE3, I422ToABGRRow_Unaligned_SSSE3, I422ToABGRRow_C, 1)
|
YANY(I422ToABGRRow_Any_SSSE3, I422ToABGRRow_Unaligned_SSSE3, I422ToABGRRow_C, 1)
|
||||||
#endif
|
#endif
|
||||||
#ifdef HAS_I422TORGB24ROW_SSSE3
|
#ifdef HAS_I422TORGB24ROW_SSSE3
|
||||||
YANY(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_Unaligned_SSSE3, \
|
// I422ToRGB24Row_SSSE3 is unaligned.
|
||||||
I422ToRGB24Row_C, 1)
|
YANY(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, I422ToRGB24Row_C, 1)
|
||||||
YANY(I422ToRAWRow_Any_SSSE3, I422ToRAWRow_Unaligned_SSSE3, I422ToRAWRow_C, 1)
|
YANY(I422ToRAWRow_Any_SSSE3, I422ToRAWRow_SSSE3, I422ToRAWRow_C, 1)
|
||||||
#endif
|
#endif
|
||||||
#ifdef HAS_I422TORGBAROW_SSSE3
|
#ifdef HAS_I422TORGBAROW_SSSE3
|
||||||
YANY(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_Unaligned_SSSE3, I422ToRGBARow_C, 1)
|
YANY(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_Unaligned_SSSE3, I422ToRGBARow_C, 1)
|
||||||
|
|||||||
@ -122,6 +122,16 @@ static const uvec8 kShuffleMaskARGBToRAW = {
|
|||||||
2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 8u, 14u, 13u, 12u, 128u, 128u, 128u, 128u
|
2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 8u, 14u, 13u, 12u, 128u, 128u, 128u, 128u
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Shuffle table for converting ARGBToRGB24 for I420ToRGB24. First 8 + next 4
|
||||||
|
static const uvec8 kShuffleMaskARGBToRGB24_0 = {
|
||||||
|
0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 128u, 128u, 128u, 128u, 10u, 12u, 13u, 14u
|
||||||
|
};
|
||||||
|
|
||||||
|
// Shuffle table for converting ARGB to RAW.
|
||||||
|
static const uvec8 kShuffleMaskARGBToRAW_0 = {
|
||||||
|
2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 128u, 128u, 128u, 128u, 8u, 14u, 13u, 12u
|
||||||
|
};
|
||||||
|
|
||||||
__declspec(naked) __declspec(align(16))
|
__declspec(naked) __declspec(align(16))
|
||||||
void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) {
|
void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) {
|
||||||
__asm {
|
__asm {
|
||||||
@ -1654,6 +1664,100 @@ void I444ToARGBRow_SSSE3(const uint8* y_buf,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 8 pixels, dest aligned 16.
|
||||||
|
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
|
||||||
|
__declspec(naked) __declspec(align(16))
|
||||||
|
void I422ToRGB24Row_SSSE3(const uint8* y_buf,
|
||||||
|
const uint8* u_buf,
|
||||||
|
const uint8* v_buf,
|
||||||
|
uint8* rgb24_buf,
|
||||||
|
int width) {
|
||||||
|
__asm {
|
||||||
|
push esi
|
||||||
|
push edi
|
||||||
|
mov eax, [esp + 8 + 4] // Y
|
||||||
|
mov esi, [esp + 8 + 8] // U
|
||||||
|
mov edi, [esp + 8 + 12] // V
|
||||||
|
mov edx, [esp + 8 + 16] // rgb24
|
||||||
|
mov ecx, [esp + 8 + 20] // width
|
||||||
|
sub edi, esi
|
||||||
|
pxor xmm4, xmm4
|
||||||
|
movdqa xmm5, kShuffleMaskARGBToRGB24_0
|
||||||
|
movdqa xmm6, kShuffleMaskARGBToRGB24
|
||||||
|
|
||||||
|
align 16
|
||||||
|
convertloop:
|
||||||
|
READYUV422
|
||||||
|
YUVTORGB
|
||||||
|
|
||||||
|
// Step 3: Weave into RRGB
|
||||||
|
punpcklbw xmm0, xmm1 // BG
|
||||||
|
punpcklbw xmm2, xmm2 // RR
|
||||||
|
movdqa xmm1, xmm0
|
||||||
|
punpcklwd xmm0, xmm2 // BGRR first 4 pixels
|
||||||
|
punpckhwd xmm1, xmm2 // BGRR next 4 pixels
|
||||||
|
pshufb xmm0, xmm5 // Pack into first 8 and last 4 bytes.
|
||||||
|
pshufb xmm1, xmm6 // Pack into first 12 bytes.
|
||||||
|
palignr xmm1, xmm0, 12 // last 4 bytes of xmm0 + 12 from xmm1
|
||||||
|
movq qword ptr [edx], xmm0 // First 8 bytes
|
||||||
|
movdqu [edx + 8], xmm1 // Last 16 bytes. = 24 bytes, 8 RGB pixels.
|
||||||
|
lea edx, [edx + 24]
|
||||||
|
sub ecx, 8
|
||||||
|
jg convertloop
|
||||||
|
|
||||||
|
pop edi
|
||||||
|
pop esi
|
||||||
|
ret
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 8 pixels, dest aligned 16.
|
||||||
|
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
|
||||||
|
__declspec(naked) __declspec(align(16))
|
||||||
|
void I422ToRAWRow_SSSE3(const uint8* y_buf,
|
||||||
|
const uint8* u_buf,
|
||||||
|
const uint8* v_buf,
|
||||||
|
uint8* raw_buf,
|
||||||
|
int width) {
|
||||||
|
__asm {
|
||||||
|
push esi
|
||||||
|
push edi
|
||||||
|
mov eax, [esp + 8 + 4] // Y
|
||||||
|
mov esi, [esp + 8 + 8] // U
|
||||||
|
mov edi, [esp + 8 + 12] // V
|
||||||
|
mov edx, [esp + 8 + 16] // raw
|
||||||
|
mov ecx, [esp + 8 + 20] // width
|
||||||
|
sub edi, esi
|
||||||
|
pxor xmm4, xmm4
|
||||||
|
movdqa xmm5, kShuffleMaskARGBToRAW_0
|
||||||
|
movdqa xmm6, kShuffleMaskARGBToRAW
|
||||||
|
|
||||||
|
align 16
|
||||||
|
convertloop:
|
||||||
|
READYUV422
|
||||||
|
YUVTORGB
|
||||||
|
|
||||||
|
// Step 3: Weave into RRGB
|
||||||
|
punpcklbw xmm0, xmm1 // BG
|
||||||
|
punpcklbw xmm2, xmm2 // RR
|
||||||
|
movdqa xmm1, xmm0
|
||||||
|
punpcklwd xmm0, xmm2 // BGRR first 4 pixels
|
||||||
|
punpckhwd xmm1, xmm2 // BGRR next 4 pixels
|
||||||
|
pshufb xmm0, xmm5 // Pack into first 8 and last 4 bytes.
|
||||||
|
pshufb xmm1, xmm6 // Pack into first 12 bytes.
|
||||||
|
palignr xmm1, xmm0, 12 // last 4 bytes of xmm0 + 12 from xmm1
|
||||||
|
movq qword ptr [edx], xmm0 // First 8 bytes
|
||||||
|
movdqu [edx + 8], xmm1 // Last 16 bytes. = 24 bytes, 8 RGB pixels.
|
||||||
|
lea edx, [edx + 24]
|
||||||
|
sub ecx, 8
|
||||||
|
jg convertloop
|
||||||
|
|
||||||
|
pop edi
|
||||||
|
pop esi
|
||||||
|
ret
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// 8 pixels, dest aligned 16.
|
// 8 pixels, dest aligned 16.
|
||||||
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
|
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
|
||||||
__declspec(naked) __declspec(align(16))
|
__declspec(naked) __declspec(align(16))
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user