mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 17:26:49 +08:00
port I420ToYUY2 code to support I420ToYUY2 I422ToYUY2 and I420ToV210
BUG=none TEST=none Review URL: https://webrtc-codereview.appspot.com/388011 git-svn-id: http://libyuv.googlecode.com/svn/trunk@173 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
798197fc14
commit
032b5f990f
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 172
|
||||
Version: 173
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -69,6 +69,19 @@ int I420ToUYVY(const uint8* src_y, int src_stride_y,
|
||||
uint8* dst_frame, int dst_stride_frame,
|
||||
int width, int height);
|
||||
|
||||
int I422ToUYVY(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_frame, int dst_stride_frame,
|
||||
int width, int height);
|
||||
|
||||
int I420ToV210(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_frame, int dst_stride_frame,
|
||||
int width, int height);
|
||||
|
||||
|
||||
int RGB24ToI420(const uint8* src_frame, int src_stride_frame,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
|
||||
@ -11,7 +11,7 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 172
|
||||
#define LIBYUV_VERSION 173
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
|
||||
@ -10,12 +10,6 @@
|
||||
|
||||
#include "libyuv/convert.h"
|
||||
|
||||
//#define SCALEOPT //Currently for windows only. June 2010
|
||||
|
||||
#ifdef SCALEOPT
|
||||
#include <emmintrin.h> // Not currently used
|
||||
#endif
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
#include "libyuv/cpu_id.h"
|
||||
#include "libyuv/format_conversion.h"
|
||||
@ -32,6 +26,9 @@ extern "C" {
|
||||
// YUY2 - Macro-pixel = 2 image pixels
|
||||
// Y0U0Y1V0....Y2U2Y3V2...Y4U4Y5V4....
|
||||
|
||||
// UYVY - Macro-pixel = 2 image pixels
|
||||
// U0Y0V0Y1
|
||||
|
||||
#if defined(_M_IX86) && !defined(YUV_DISABLE_ASM)
|
||||
#define HAS_I42XTOYUY2ROW_SSE2
|
||||
__declspec(naked)
|
||||
@ -50,12 +47,12 @@ static void I42xToYUY2Row_SSE2(const uint8* src_y,
|
||||
sub edx, esi
|
||||
|
||||
convertloop:
|
||||
movdqa xmm0, [eax] // Y
|
||||
lea eax, [eax + 16]
|
||||
movq xmm2, qword ptr [esi] // U
|
||||
movq xmm3, qword ptr [esi + edx] // V
|
||||
lea esi, [esi + 8]
|
||||
punpcklbw xmm2, xmm3 // UV
|
||||
movdqa xmm0, [eax] // Y
|
||||
lea eax, [eax + 16]
|
||||
movdqa xmm1, xmm0
|
||||
punpcklbw xmm0, xmm2 // YUYV
|
||||
punpckhbw xmm1, xmm2
|
||||
@ -70,6 +67,44 @@ static void I42xToYUY2Row_SSE2(const uint8* src_y,
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
#define HAS_I42XTOUYVYROW_SSE2
|
||||
__declspec(naked)
|
||||
static void I42xToUYVYRow_SSE2(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_frame, int width) {
|
||||
__asm {
|
||||
push esi
|
||||
push edi
|
||||
mov eax, [esp + 8 + 4] // src_y
|
||||
mov esi, [esp + 8 + 8] // src_u
|
||||
mov edx, [esp + 8 + 12] // src_v
|
||||
mov edi, [esp + 8 + 16] // dst_frame
|
||||
mov ecx, [esp + 8 + 20] // width
|
||||
sub edx, esi
|
||||
|
||||
convertloop:
|
||||
movq xmm2, qword ptr [esi] // U
|
||||
movq xmm3, qword ptr [esi + edx] // V
|
||||
lea esi, [esi + 8]
|
||||
punpcklbw xmm2, xmm3 // UV
|
||||
movdqa xmm0, [eax] // Y
|
||||
movdqa xmm1, xmm2
|
||||
lea eax, [eax + 16]
|
||||
punpcklbw xmm1, xmm0 // UYVY
|
||||
punpckhbw xmm2, xmm0
|
||||
movdqa [edi], xmm1
|
||||
movdqa [edi + 16], xmm2
|
||||
lea edi, [edi + 32]
|
||||
sub ecx, 16
|
||||
ja convertloop
|
||||
|
||||
pop edi
|
||||
pop esi
|
||||
ret
|
||||
}
|
||||
}
|
||||
#elif (defined(__x86_64__) || defined(__i386__)) && !defined(YUV_DISABLE_ASM)
|
||||
#define HAS_I42XTOYUY2ROW_SSE2
|
||||
static void I42xToYUY2Row_SSE2(const uint8* src_y,
|
||||
@ -77,33 +112,68 @@ static void I42xToYUY2Row_SSE2(const uint8* src_y,
|
||||
const uint8* src_v,
|
||||
uint8* dst_frame, int width) {
|
||||
asm volatile (
|
||||
"sub %1,%2 \n"
|
||||
"1: \n"
|
||||
"movdqa (%0),%%xmm0 \n"
|
||||
"lea 0x10(%0),%0 \n"
|
||||
"movq (%1),%%xmm2 \n"
|
||||
"movq (%1,%2,1),%%xmm3 \n"
|
||||
"lea 0x8(%1),%1 \n"
|
||||
"punpcklbw %%xmm3,%%xmm2 \n"
|
||||
"movdqa %%xmm0,%%xmm1 \n"
|
||||
"punpcklbw %%xmm2,%%xmm0 \n"
|
||||
"punpckhbw %%xmm2,%%xmm1 \n"
|
||||
"movdqa %%xmm0,(%3) \n"
|
||||
"movdqa %%xmm1,0x10(%3) \n"
|
||||
"lea 0x20(%3),%3 \n"
|
||||
"sub $0x10,%4 \n"
|
||||
"ja 1b \n"
|
||||
: "+r"(src_y), // %0
|
||||
"+r"(src_u), // %1
|
||||
"+r"(src_v), // %2
|
||||
"+r"(dst_frame), // %3
|
||||
"+rm"(width) // %4
|
||||
:
|
||||
: "memory", "cc"
|
||||
"sub %1,%2 \n"
|
||||
"1: \n"
|
||||
"movq (%1),%%xmm2 \n"
|
||||
"movq (%1,%2,1),%%xmm3 \n"
|
||||
"lea 0x8(%1),%1 \n"
|
||||
"punpcklbw %%xmm3,%%xmm2 \n"
|
||||
"movdqa (%0),%%xmm0 \n"
|
||||
"lea 0x10(%0),%0 \n"
|
||||
"movdqa %%xmm0,%%xmm1 \n"
|
||||
"punpcklbw %%xmm2,%%xmm0 \n"
|
||||
"punpckhbw %%xmm2,%%xmm1 \n"
|
||||
"movdqa %%xmm0,(%3) \n"
|
||||
"movdqa %%xmm1,0x10(%3) \n"
|
||||
"lea 0x20(%3),%3 \n"
|
||||
"sub $0x10,%4 \n"
|
||||
"ja 1b \n"
|
||||
: "+r"(src_y), // %0
|
||||
"+r"(src_u), // %1
|
||||
"+r"(src_v), // %2
|
||||
"+r"(dst_frame), // %3
|
||||
"+rm"(width) // %4
|
||||
:
|
||||
: "memory", "cc"
|
||||
#if defined(__SSE2__)
|
||||
, "xmm0", "xmm1", "xmm2", "xmm3"
|
||||
#endif
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
#define HAS_I42XTOUYVYROW_SSE2
|
||||
static void I42xToUYVYRow_SSE2(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_frame, int width) {
|
||||
asm volatile (
|
||||
"sub %1,%2 \n"
|
||||
"1: \n"
|
||||
"movq (%1),%%xmm2 \n"
|
||||
"movq (%1,%2,1),%%xmm3 \n"
|
||||
"lea 0x8(%1),%1 \n"
|
||||
"punpcklbw %%xmm3,%%xmm2 \n"
|
||||
"movdqa (%0),%%xmm0 \n"
|
||||
"movdqa %%xmm2,%%xmm1 \n"
|
||||
"lea 0x10(%0),%0 \n"
|
||||
"punpcklbw %%xmm0,%%xmm1 \n"
|
||||
"punpckhbw %%xmm0,%%xmm2 \n"
|
||||
"movdqa %%xmm1,(%3) \n"
|
||||
"movdqa %%xmm2,0x10(%3) \n"
|
||||
"lea 0x20(%3),%3 \n"
|
||||
"sub $0x10,%4 \n"
|
||||
"ja 1b \n"
|
||||
: "+r"(src_y), // %0
|
||||
"+r"(src_u), // %1
|
||||
"+r"(src_v), // %2
|
||||
"+r"(dst_frame), // %3
|
||||
"+rm"(width) // %4
|
||||
:
|
||||
: "memory", "cc"
|
||||
#if defined(__SSE2__)
|
||||
, "xmm0", "xmm1", "xmm2", "xmm3"
|
||||
#endif
|
||||
);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -127,6 +197,104 @@ void I42xToYUY2Row_C(const uint8* src_y, const uint8* src_u, const uint8* src_v,
|
||||
}
|
||||
}
|
||||
|
||||
void I42xToUYVYRow_C(const uint8* src_y, const uint8* src_u, const uint8* src_v,
|
||||
uint8* dst_frame, int width) {
|
||||
for (int x = 0; x < width - 1; x += 2) {
|
||||
dst_frame[0] = src_u[0];
|
||||
dst_frame[1] = src_y[0];
|
||||
dst_frame[2] = src_v[0];
|
||||
dst_frame[3] = src_y[1];
|
||||
dst_frame += 4;
|
||||
src_y += 2;
|
||||
src_u += 1;
|
||||
src_v += 1;
|
||||
}
|
||||
if (width & 1) {
|
||||
dst_frame[0] = src_u[0];
|
||||
dst_frame[1] = src_y[0];
|
||||
dst_frame[2] = src_v[0];
|
||||
dst_frame[3] = src_y[0]; // duplicate last y
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// gcc provided macros
|
||||
#if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && defined(__BIG_ENDIAN)
|
||||
#if __BYTE_ORDER == __LITTLE_ENDIAN
|
||||
#define LIBYUV_LITTLE_ENDIAN
|
||||
#endif
|
||||
// Visual C for x86 defines these
|
||||
#elif defined(_M_X64) || defined(_M_IX86)
|
||||
#define LIBYUV_LITTLE_ENDIAN
|
||||
#endif
|
||||
|
||||
#ifdef LIBYUV_LITTLE_ENDIAN
|
||||
#define READWORD(p) (*((uint32*) (p)))
|
||||
#define WRITEWORD(p, v) (*((uint32*) (p))) = v
|
||||
#else
|
||||
uint32 READWORD(const uint8* p) {
|
||||
return (uint32) p[0] |
|
||||
((uint32) (p[1]) << 8) |
|
||||
((uint32) (p[2]) << 16) |
|
||||
((uint32) (p[3]) << 24);
|
||||
}
|
||||
void WRITEWORD(uint8* p, uint32 v) {
|
||||
p[0] = (uint8)(v & 255);
|
||||
p[1] = (uint8)((v >> 8) & 255);
|
||||
p[2] = (uint8)((v >> 16) & 255);
|
||||
p[3] = (uint8)((v >> 24) & 255);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Must be multiple of 6 pixels. Will over convert to handle remainder.
|
||||
// https://developer.apple.com/quicktime/icefloe/dispatch019.html#v210
|
||||
void V210ToUYVYRow_C(const uint8* src_v210, uint8* dst_uyvy, int width) {
|
||||
for (int x = 0; x < width; x += 6) {
|
||||
uint32 w = READWORD(src_v210 + 0);
|
||||
dst_uyvy[0] = (w >> 2) & 0xff;
|
||||
dst_uyvy[1] = (w >> 12) & 0xff;
|
||||
dst_uyvy[2] = (w >> 22) & 0xff;
|
||||
|
||||
w = READWORD(src_v210 + 4);
|
||||
dst_uyvy[3] = (w >> 2) & 0xff;
|
||||
dst_uyvy[4] = (w >> 12) & 0xff;
|
||||
dst_uyvy[5] = (w >> 22) & 0xff;
|
||||
|
||||
w = READWORD(src_v210 + 8);
|
||||
dst_uyvy[6] = (w >> 2) & 0xff;
|
||||
dst_uyvy[7] = (w >> 12) & 0xff;
|
||||
dst_uyvy[8] = (w >> 22) & 0xff;
|
||||
|
||||
w = READWORD(src_v210 + 12);
|
||||
dst_uyvy[9] = (w >> 2) & 0xff;
|
||||
dst_uyvy[10] = (w >> 12) & 0xff;
|
||||
dst_uyvy[11] = (w >> 22) & 0xff;
|
||||
|
||||
src_v210 += 16;
|
||||
dst_uyvy += 12;
|
||||
}
|
||||
}
|
||||
|
||||
#define EIGHTTOTEN(x) (x << 2 | x >> 6)
|
||||
void UYVYToV210Row_C(const uint8* src_uyvy, uint8* dst_v210, int width) {
|
||||
for (int x = 0; x < width; x += 6) {
|
||||
WRITEWORD(dst_v210 + 0, (EIGHTTOTEN(src_uyvy[0])) |
|
||||
(EIGHTTOTEN(src_uyvy[1]) << 10) |
|
||||
(EIGHTTOTEN(src_uyvy[2]) << 20));
|
||||
WRITEWORD(dst_v210 + 4, (EIGHTTOTEN(src_uyvy[3])) |
|
||||
(EIGHTTOTEN(src_uyvy[4]) << 10) |
|
||||
(EIGHTTOTEN(src_uyvy[5]) << 20));
|
||||
WRITEWORD(dst_v210 + 8, (EIGHTTOTEN(src_uyvy[6])) |
|
||||
(EIGHTTOTEN(src_uyvy[7]) << 10) |
|
||||
(EIGHTTOTEN(src_uyvy[8]) << 20));
|
||||
WRITEWORD(dst_v210 + 12, (EIGHTTOTEN(src_uyvy[9])) |
|
||||
(EIGHTTOTEN(src_uyvy[10]) << 10) |
|
||||
(EIGHTTOTEN(src_uyvy[11]) << 20));
|
||||
src_uyvy += 12;
|
||||
dst_v210 += 16;
|
||||
}
|
||||
}
|
||||
|
||||
int I422ToYUY2(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
@ -143,17 +311,16 @@ int I422ToYUY2(const uint8* src_y, int src_stride_y,
|
||||
}
|
||||
void (*I42xToYUY2Row)(const uint8* src_y, const uint8* src_u,
|
||||
const uint8* src_v, uint8* dst_frame, int width);
|
||||
I42xToYUY2Row = I42xToYUY2Row_C;
|
||||
#if defined(HAS_I42XTOYUY2ROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2) &&
|
||||
IS_ALIGNED(width, 16) &&
|
||||
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
|
||||
IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) {
|
||||
I42xToYUY2Row = I42xToYUY2Row_SSE2;
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
I42xToYUY2Row = I42xToYUY2Row_C;
|
||||
}
|
||||
#endif
|
||||
|
||||
for (int y = 0; y < height; ++y) {
|
||||
I42xToYUY2Row(src_y, src_u, src_y, dst_frame, width);
|
||||
src_y += src_stride_y;
|
||||
@ -180,17 +347,16 @@ int I420ToYUY2(const uint8* src_y, int src_stride_y,
|
||||
}
|
||||
void (*I42xToYUY2Row)(const uint8* src_y, const uint8* src_u,
|
||||
const uint8* src_v, uint8* dst_frame, int width);
|
||||
I42xToYUY2Row = I42xToYUY2Row_C;
|
||||
#if defined(HAS_I42XTOYUY2ROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2) &&
|
||||
IS_ALIGNED(width, 16) &&
|
||||
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
|
||||
IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) {
|
||||
I42xToYUY2Row = I42xToYUY2Row_SSE2;
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
I42xToYUY2Row = I42xToYUY2Row_C;
|
||||
}
|
||||
#endif
|
||||
|
||||
for (int y = 0; y < height - 1; y += 2) {
|
||||
I42xToYUY2Row(src_y, src_u, src_v, dst_frame, width);
|
||||
I42xToYUY2Row(src_y + src_stride_y, src_u, src_v,
|
||||
@ -206,6 +372,42 @@ int I420ToYUY2(const uint8* src_y, int src_stride_y,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int I422ToUYVY(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_frame, int dst_stride_frame,
|
||||
int width, int height) {
|
||||
if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
dst_frame = dst_frame + (height - 1) * dst_stride_frame;
|
||||
dst_stride_frame = -dst_stride_frame;
|
||||
}
|
||||
void (*I42xToUYVYRow)(const uint8* src_y, const uint8* src_u,
|
||||
const uint8* src_v, uint8* dst_frame, int width);
|
||||
I42xToUYVYRow = I42xToUYVYRow_C;
|
||||
#if defined(HAS_I42XTOUYVYROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2) &&
|
||||
IS_ALIGNED(width, 16) &&
|
||||
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
|
||||
IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) {
|
||||
I42xToUYVYRow = I42xToUYVYRow_SSE2;
|
||||
}
|
||||
#endif
|
||||
|
||||
for (int y = 0; y < height; ++y) {
|
||||
I42xToUYVYRow(src_y, src_u, src_y, dst_frame, width);
|
||||
src_y += src_stride_y;
|
||||
src_u += src_stride_u;
|
||||
src_v += src_stride_v;
|
||||
dst_frame += dst_stride_frame;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int I420ToUYVY(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
@ -214,107 +416,85 @@ int I420ToUYVY(const uint8* src_y, int src_stride_y,
|
||||
if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
int i = 0;
|
||||
const uint8* y1 = src_y;
|
||||
const uint8* y2 = y1 + src_stride_y;
|
||||
const uint8* u = src_u;
|
||||
const uint8* v = src_v;
|
||||
|
||||
uint8* out1 = dst_frame;
|
||||
uint8* out2 = dst_frame + dst_stride_frame;
|
||||
|
||||
// Macro-pixel = 2 image pixels
|
||||
// U0Y0V0Y1....U2Y2V2Y3...U4Y4V4Y5.....
|
||||
|
||||
#ifndef SCALEOPT
|
||||
for (; i < ((height + 1) >> 1); i++) {
|
||||
for (int j = 0; j < ((width + 1) >> 1); j++) {
|
||||
out1[0] = *u;
|
||||
out1[1] = y1[0];
|
||||
out1[2] = *v;
|
||||
out1[3] = y1[1];
|
||||
|
||||
out2[0] = *u;
|
||||
out2[1] = y2[0];
|
||||
out2[2] = *v;
|
||||
out2[3] = y2[1];
|
||||
out1 += 4;
|
||||
out2 += 4;
|
||||
u++;
|
||||
v++;
|
||||
y1 += 2;
|
||||
y2 += 2;
|
||||
}
|
||||
y1 += 2 * src_stride_y - width;
|
||||
y2 += 2 * src_stride_y - width;
|
||||
u += src_stride_u - ((width + 1) >> 1);
|
||||
v += src_stride_v - ((width + 1) >> 1);
|
||||
out1 += 2 * (dst_stride_frame - width);
|
||||
out2 += 2 * (dst_stride_frame - width);
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
dst_frame = dst_frame + (height - 1) * dst_stride_frame;
|
||||
dst_stride_frame = -dst_stride_frame;
|
||||
}
|
||||
#else
|
||||
for (; i < (height >> 1);i++) {
|
||||
int32 width__ = (width >> 4);
|
||||
_asm
|
||||
{
|
||||
;pusha
|
||||
mov eax, DWORD PTR [in1] ;1939.33
|
||||
mov ecx, DWORD PTR [in2] ;1939.33
|
||||
mov ebx, DWORD PTR [src_u] ;1939.33
|
||||
mov edx, DWORD PTR [src_v] ;1939.33
|
||||
loop0:
|
||||
movq xmm6, QWORD PTR [ebx] ;src_u
|
||||
movq xmm0, QWORD PTR [edx] ;src_v
|
||||
punpcklbw xmm6, xmm0 ;src_u, src_v mix
|
||||
movdqa xmm1, xmm6
|
||||
movdqa xmm2, xmm6
|
||||
movdqa xmm4, xmm6
|
||||
|
||||
movdqu xmm3, XMMWORD PTR [eax] ;in1
|
||||
punpcklbw xmm1, xmm3 ;src_u, in1, src_v
|
||||
mov esi, DWORD PTR [out1]
|
||||
movdqu XMMWORD PTR [esi], xmm1 ;write to out1
|
||||
|
||||
movdqu xmm5, XMMWORD PTR [ecx] ;in2
|
||||
punpcklbw xmm2, xmm5 ;src_u, in2, src_v
|
||||
mov edi, DWORD PTR [out2]
|
||||
movdqu XMMWORD PTR [edi], xmm2 ;write to out2
|
||||
|
||||
punpckhbw xmm4, xmm3 ;src_u, in1, src_v again
|
||||
movdqu XMMWORD PTR [esi+16], xmm4 ;write to out1 again
|
||||
add esi, 32
|
||||
mov DWORD PTR [out1], esi
|
||||
|
||||
punpckhbw xmm6, xmm5 ;src_u, in2, src_v again
|
||||
movdqu XMMWORD PTR [edi+16], xmm6 ;write to out2 again
|
||||
add edi, 32
|
||||
mov DWORD PTR [out2], edi
|
||||
|
||||
add ebx, 8
|
||||
add edx, 8
|
||||
add eax, 16
|
||||
add ecx, 16
|
||||
|
||||
mov esi, DWORD PTR [width__]
|
||||
sub esi, 1
|
||||
mov DWORD PTR [width__], esi
|
||||
jg loop0
|
||||
|
||||
mov DWORD PTR [in1], eax ;1939.33
|
||||
mov DWORD PTR [in2], ecx ;1939.33
|
||||
mov DWORD PTR [src_u], ebx ;1939.33
|
||||
mov DWORD PTR [src_v], edx ;1939.33
|
||||
|
||||
;popa
|
||||
emms
|
||||
}
|
||||
in1 += width;
|
||||
in2 += width;
|
||||
out1 += 2 * (dst_stride_frame - width);
|
||||
out2 += 2 * (dst_stride_frame - width);
|
||||
void (*I42xToUYVYRow)(const uint8* src_y, const uint8* src_u,
|
||||
const uint8* src_v, uint8* dst_frame, int width);
|
||||
I42xToUYVYRow = I42xToUYVYRow_C;
|
||||
#if defined(HAS_I42XTOUYVYROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2) &&
|
||||
IS_ALIGNED(width, 16) &&
|
||||
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
|
||||
IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) {
|
||||
I42xToUYVYRow = I42xToUYVYRow_SSE2;
|
||||
}
|
||||
#endif
|
||||
|
||||
for (int y = 0; y < height - 1; y += 2) {
|
||||
I42xToUYVYRow(src_y, src_u, src_v, dst_frame, width);
|
||||
I42xToUYVYRow(src_y + src_stride_y, src_u, src_v,
|
||||
dst_frame + dst_stride_frame, width);
|
||||
src_y += src_stride_y * 2;
|
||||
src_u += src_stride_u;
|
||||
src_v += src_stride_v;
|
||||
dst_frame += dst_stride_frame * 2;
|
||||
}
|
||||
if (height & 1) {
|
||||
I42xToUYVYRow(src_y, src_u, src_v, dst_frame, width);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int I420ToV210(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_frame, int dst_stride_frame,
|
||||
int width, int height) {
|
||||
if (width * 16 / 6 > kMaxStride || // row buffer of V210 is required
|
||||
src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
dst_frame = dst_frame + (height - 1) * dst_stride_frame;
|
||||
dst_stride_frame = -dst_stride_frame;
|
||||
}
|
||||
|
||||
SIMD_ALIGNED(uint8 row[kMaxStride]);
|
||||
void (*UYVYToV210Row)(const uint8* src_uyvy, uint8* dst_v210, int pix);
|
||||
UYVYToV210Row = UYVYToV210Row_C;
|
||||
|
||||
void (*I42xToUYVYRow)(const uint8* src_y, const uint8* src_u,
|
||||
const uint8* src_v, uint8* dst_frame, int width);
|
||||
I42xToUYVYRow = I42xToUYVYRow_C;
|
||||
#if defined(HAS_I42XTOUYVYROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2) &&
|
||||
IS_ALIGNED(width, 16) &&
|
||||
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16)) {
|
||||
I42xToUYVYRow = I42xToUYVYRow_SSE2;
|
||||
}
|
||||
#endif
|
||||
|
||||
for (int y = 0; y < height - 1; y += 2) {
|
||||
I42xToUYVYRow(src_y, src_u, src_v, row, width);
|
||||
UYVYToV210Row(row, dst_frame, width);
|
||||
I42xToUYVYRow(src_y + src_stride_y, src_u, src_v, row, width);
|
||||
UYVYToV210Row(row, dst_frame + dst_stride_frame, width);
|
||||
|
||||
src_y += src_stride_y * 2;
|
||||
src_u += src_stride_u;
|
||||
src_v += src_stride_v;
|
||||
dst_frame += dst_stride_frame * 2;
|
||||
}
|
||||
if (height & 1) {
|
||||
I42xToUYVYRow(src_y, src_u, src_v, row, width);
|
||||
UYVYToV210Row(row, dst_frame, width);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -467,56 +647,6 @@ int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy,
|
||||
return 0;
|
||||
}
|
||||
|
||||
// gcc provided macros
|
||||
#if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && defined(__BIG_ENDIAN)
|
||||
#if __BYTE_ORDER == __LITTLE_ENDIAN
|
||||
#define LIBYUV_LITTLE_ENDIAN
|
||||
#endif
|
||||
// Visual C for x86 defines these
|
||||
#elif defined(_M_X64) || defined(_M_IX86)
|
||||
#define LIBYUV_LITTLE_ENDIAN
|
||||
#endif
|
||||
|
||||
#ifdef LIBYUV_LITTLE_ENDIAN
|
||||
#define READWORD(p) (*((uint32*) (p)))
|
||||
#else
|
||||
uint32 READWORD(const uint8* p) {
|
||||
return (uint32) p[0] |
|
||||
((uint32) (p[1]) << 8) |
|
||||
((uint32) (p[2]) << 16) |
|
||||
((uint32) (p[3]) << 24);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Must be multiple of 6 pixels. Will over convert to handle remainder.
|
||||
// https://developer.apple.com/quicktime/icefloe/dispatch019.html#v210
|
||||
void V210ToUYVYRow_C(const uint8* src_v210, uint8* dst_uyvy, int width) {
|
||||
for (int x = 0; x < width; x += 6) {
|
||||
uint32 w = READWORD(src_v210 + 0);
|
||||
dst_uyvy[0] = (w >> 2) & 0xff;
|
||||
dst_uyvy[1] = (w >> 12) & 0xff;
|
||||
dst_uyvy[2] = (w >> 22) & 0xff;
|
||||
|
||||
w = READWORD(src_v210 + 4);
|
||||
dst_uyvy[3] = (w >> 2) & 0xff;
|
||||
dst_uyvy[4] = (w >> 12) & 0xff;
|
||||
dst_uyvy[5] = (w >> 22) & 0xff;
|
||||
|
||||
w = READWORD(src_v210 + 8);
|
||||
dst_uyvy[6] = (w >> 2) & 0xff;
|
||||
dst_uyvy[7] = (w >> 12) & 0xff;
|
||||
dst_uyvy[8] = (w >> 22) & 0xff;
|
||||
|
||||
w = READWORD(src_v210 + 12);
|
||||
dst_uyvy[9] = (w >> 2) & 0xff;
|
||||
dst_uyvy[10] = (w >> 12) & 0xff;
|
||||
dst_uyvy[11] = (w >> 22) & 0xff;
|
||||
|
||||
dst_uyvy += 12;
|
||||
src_v210 += 16;
|
||||
}
|
||||
}
|
||||
|
||||
// Convert V210 to I420.
|
||||
// V210 is 10 bit version of UYVY. 16 bytes to store 6 pixels.
|
||||
// With is multiple of 48.
|
||||
@ -525,7 +655,7 @@ int V210ToI420(const uint8* src_v210, int src_stride_v210,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height) {
|
||||
if (width * 16 / 6 > kMaxStride) { // row buffer is required
|
||||
if (width * 2 * 2 > kMaxStride) { // 2 rows of UYVY is required
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
@ -1098,9 +1228,11 @@ int ConvertToI420(const uint8* sample, size_t sample_size,
|
||||
dst_width, inv_dst_height);
|
||||
break;
|
||||
case FOURCC_V210:
|
||||
// TODO(fbarchard): Confirm stride is 16 bytes per 6 pixels.
|
||||
src = sample + (aligned_src_width * crop_y + crop_x) * 16 / 6;
|
||||
V210ToI420(src, aligned_src_width * 16 / 6,
|
||||
// stride is multiple of 48 pixels (128 bytes).
|
||||
// pixels come in groups of 6 = 16 bytes
|
||||
src = sample + (aligned_src_width + 47) / 48 * 128 * crop_y +
|
||||
crop_x / 6 * 16;
|
||||
V210ToI420(src, (aligned_src_width + 47) / 48 * 128,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
|
||||
@ -49,6 +49,15 @@ int ConvertFromI420(const uint8* y, int y_stride,
|
||||
dst_sample_stride ? dst_sample_stride : width * 2,
|
||||
width, height);
|
||||
break;
|
||||
case FOURCC_V210:
|
||||
I420ToV210(y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
dst_sample,
|
||||
dst_sample_stride ? dst_sample_stride :
|
||||
(width + 47) / 48 * 128,
|
||||
width, height);
|
||||
break;
|
||||
case FOURCC_RGBP:
|
||||
I420ToRGB565(y, y_stride,
|
||||
u, u_stride,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user