V210 fourcc support for AJA cards. Make UYVYToI420 use 'Last16' method for 'Any'. also YUY2, ARGB, BGRA and ABGR

BUG=none
TEST=none
Review URL: https://webrtc-codereview.appspot.com/388008

git-svn-id: http://libyuv.googlecode.com/svn/trunk@171 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
fbarchard@google.com 2012-02-14 00:11:15 +00:00
parent b95dbf2495
commit c8c8047d43
6 changed files with 169 additions and 55 deletions

View File

@ -1,6 +1,6 @@
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 170
Version: 171
License: BSD
License File: LICENSE

View File

@ -133,6 +133,13 @@ int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert V210 to I420.
int V210ToI420(const uint8* src_uyvy, int src_stride_uyvy,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert I420 to ARGB.
int I420ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,

View File

@ -11,7 +11,7 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 170
#define LIBYUV_VERSION 171
#endif // INCLUDE_LIBYUV_VERSION_H_

View File

@ -50,6 +50,7 @@ enum FourCC {
FOURCC_UYVY = FOURCC('U', 'Y', 'V', 'Y'),
FOURCC_M420 = FOURCC('M', '4', '2', '0'),
FOURCC_Q420 = FOURCC('Q', '4', '2', '0'),
FOURCC_V210 = FOURCC('V', '2', '1', '0'),
FOURCC_24BG = FOURCC('2', '4', 'B', 'G'),
FOURCC_ABGR = FOURCC('A', 'B', 'G', 'R'),
FOURCC_BGRA = FOURCC('B', 'G', 'R', 'A'),

View File

@ -361,7 +361,6 @@ int RGB24ToARGB(const uint8* src_frame, int src_stride_frame,
// READSAFE_ALWAYS - enables read ahead on systems without memory exceptions
// or where buffers are padded by 64 bytes.
#define READSAFE_ODDHEIGHT
static bool TestReadSafe(const uint8* src_yuy2, int src_stride_yuy2,
@ -374,7 +373,8 @@ static bool TestReadSafe(const uint8* src_yuy2, int src_stride_yuy2,
#elif defined(READSAFE_NEVER)
return false;
#elif defined(READSAFE_ODDHEIGHT)
if (src_stride_yuy2 >= 0 && (height & 1) && width * bpp >= overread) {
if (!(width & 15) ||
(src_stride_yuy2 >= 0 && (height & 1) && width * bpp >= overread)) {
return true;
}
return false;
@ -404,14 +404,14 @@ int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2,
src_stride_yuy2 = -src_stride_yuy2;
}
void (*YUY2ToUVRow)(const uint8* src_yuy2, int src_stride_yuy2,
uint8* dst_u, uint8* dst_v, int pix);
uint8* dst_u, uint8* dst_v, int pix);
void (*YUY2ToYRow)(const uint8* src_yuy2,
uint8* dst_y, int pix);
uint8* dst_y, int pix);
YUY2ToYRow = YUY2ToYRow_C;
YUY2ToUVRow = YUY2ToUVRow_C;
#if defined(HAS_YUY2TOYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
if (TestReadSafe(src_yuy2, src_stride_yuy2, width, height, 2, 32)) {
if (width > 16) {
YUY2ToUVRow = YUY2ToUVRow_Any_SSE2;
YUY2ToYRow = YUY2ToYRow_Any_SSE2;
}
@ -437,8 +437,8 @@ int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2,
dst_v += dst_stride_v;
}
if (height & 1) {
YUY2ToUVRow_C(src_yuy2, 0, dst_u, dst_v, width);
YUY2ToYRow_C(src_yuy2, dst_y, width);
YUY2ToUVRow(src_yuy2, 0, dst_u, dst_v, width);
YUY2ToYRow(src_yuy2, dst_y, width);
}
return 0;
}
@ -456,14 +456,14 @@ int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy,
src_stride_uyvy = -src_stride_uyvy;
}
void (*UYVYToUVRow)(const uint8* src_uyvy, int src_stride_uyvy,
uint8* dst_u, uint8* dst_v, int pix);
uint8* dst_u, uint8* dst_v, int pix);
void (*UYVYToYRow)(const uint8* src_uyvy,
uint8* dst_y, int pix);
uint8* dst_y, int pix);
UYVYToYRow = UYVYToYRow_C;
UYVYToUVRow = UYVYToUVRow_C;
#if defined(HAS_UYVYTOYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
if (TestReadSafe(src_uyvy, src_stride_uyvy, width, height, 2, 32)) {
if (width > 16) {
UYVYToUVRow = UYVYToUVRow_Any_SSE2;
UYVYToYRow = UYVYToYRow_Any_SSE2;
}
@ -489,8 +489,113 @@ int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy,
dst_v += dst_stride_v;
}
if (height & 1) {
UYVYToUVRow_C(src_uyvy, 0, dst_u, dst_v, width);
UYVYToYRow_C(src_uyvy, dst_y, width);
UYVYToUVRow(src_uyvy, 0, dst_u, dst_v, width);
UYVYToYRow(src_uyvy, dst_y, width);
}
return 0;
}
// gcc provided macros
#if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && defined(__BIG_ENDIAN)
#if __BYTE_ORDER == __LITTLE_ENDIAN
#define LIBYUV_LITTLE_ENDIAN
#endif
// Visual C for x86 defines these
#elif defined(_M_X64) || defined(_M_IX86)
#define LIBYUV_LITTLE_ENDIAN
#endif
#ifdef LIBYUV_LITTLE_ENDIAN
#define READWORD(p) (*((uint32*) (p)))
#else
uint32 READWORD(const uint8* p) {
return (uint32) p[0] |
((uint32) (p[1]) << 8) |
((uint32) (p[2]) << 16) |
((uint32) (p[3]) << 24);
}
#endif
// Must be multiple of 6 pixels. Will over convert to handle remainder.
// https://developer.apple.com/quicktime/icefloe/dispatch019.html#v210
void V210ToUYVYRow_C(const uint8* src_v210, uint8* dst_uyvy, int width) {
for (int x = 0; x < width; x += 6) {
uint32 w = READWORD(src_v210 + 0);
dst_uyvy[0] = (w >> 2) & 0xff;
dst_uyvy[1] = (w >> 12) & 0xff;
dst_uyvy[2] = (w >> 22) & 0xff;
w = READWORD(src_v210 + 4);
dst_uyvy[3] = (w >> 2) & 0xff;
dst_uyvy[4] = (w >> 12) & 0xff;
dst_uyvy[5] = (w >> 22) & 0xff;
w = READWORD(src_v210 + 8);
dst_uyvy[6] = (w >> 2) & 0xff;
dst_uyvy[7] = (w >> 12) & 0xff;
dst_uyvy[8] = (w >> 22) & 0xff;
w = READWORD(src_v210 + 12);
dst_uyvy[9] = (w >> 2) & 0xff;
dst_uyvy[10] = (w >> 12) & 0xff;
dst_uyvy[11] = (w >> 22) & 0xff;
dst_uyvy += 12;
src_v210 += 16;
}
}
// Convert V210 to I420.
// V210 is 10 bit version of UYVY. 16 bytes to store 6 pixels.
// With is multiple of 48.
int V210ToI420(const uint8* src_v210, int src_stride_v210,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
if (width * 16 / 6 > kMaxStride) { // row buffer is required
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_v210 = src_v210 + (height - 1) * src_stride_v210;
src_stride_v210 = -src_stride_v210;
}
SIMD_ALIGNED(uint8 row[kMaxStride * 2]);
void (*V210ToUYVYRow)(const uint8* src_v210, uint8* dst_uyvy, int pix);
V210ToUYVYRow = V210ToUYVYRow_C;
void (*UYVYToUVRow)(const uint8* src_uyvy, int src_stride_uyvy,
uint8* dst_u, uint8* dst_v, int pix);
void (*UYVYToYRow)(const uint8* src_uyvy,
uint8* dst_y, int pix);
UYVYToYRow = UYVYToYRow_C;
UYVYToUVRow = UYVYToUVRow_C;
#if defined(HAS_UYVYTOYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16)) {
UYVYToUVRow = UYVYToUVRow_SSE2;
UYVYToYRow = UYVYToYRow_Unaligned_SSE2;
if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
UYVYToYRow = UYVYToYRow_SSE2;
}
}
#endif
for (int y = 0; y < height - 1; y += 2) {
V210ToUYVYRow(src_v210, row, width);
V210ToUYVYRow(src_v210 + src_stride_v210, row + kMaxStride, width);
UYVYToUVRow(row, kMaxStride, dst_u, dst_v, width);
UYVYToYRow(row, dst_y, width);
UYVYToYRow(row + kMaxStride, dst_y + dst_stride_y, width);
src_v210 += src_stride_v210 * 2;
dst_y += dst_stride_y * 2;
dst_u += dst_stride_u;
dst_v += dst_stride_v;
}
if (height & 1) {
V210ToUYVYRow(src_v210, row, width);
UYVYToUVRow(row, 0, dst_u, dst_v, width);
UYVYToYRow(row, dst_y, width);
}
return 0;
}
@ -513,10 +618,8 @@ int ARGBToI420(const uint8* src_argb, int src_stride_argb,
ARGBToUVRow = ARGBToUVRow_C;
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
if (TestReadSafe(src_argb, src_stride_argb, width, height, 4, 64)) {
if (IS_ALIGNED(width, 2)) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
}
if (width > 16) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
ARGBToYRow = ARGBToYRow_Any_SSSE3;
}
if (IS_ALIGNED(width, 16)) {
@ -566,10 +669,8 @@ int BGRAToI420(const uint8* src_bgra, int src_stride_bgra,
BGRAToUVRow = BGRAToUVRow_C;
#if defined(HAS_BGRATOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
if (TestReadSafe(src_bgra, src_stride_bgra, width, height, 4, 64)) {
if (IS_ALIGNED(width, 2)) {
BGRAToUVRow = BGRAToUVRow_Any_SSSE3;
}
if (width > 16) {
BGRAToUVRow = BGRAToUVRow_Any_SSSE3;
BGRAToYRow = BGRAToYRow_Any_SSSE3;
}
if (IS_ALIGNED(width, 16)) {
@ -595,8 +696,8 @@ int BGRAToI420(const uint8* src_bgra, int src_stride_bgra,
dst_v += dst_stride_v;
}
if (height & 1) {
BGRAToUVRow_C(src_bgra, 0, dst_u, dst_v, width);
BGRAToYRow_C(src_bgra, dst_y, width);
BGRAToUVRow(src_bgra, 0, dst_u, dst_v, width);
BGRAToYRow(src_bgra, dst_y, width);
}
return 0;
}
@ -619,10 +720,8 @@ int ABGRToI420(const uint8* src_abgr, int src_stride_abgr,
ABGRToUVRow = ABGRToUVRow_C;
#if defined(HAS_ABGRTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
if (TestReadSafe(src_abgr, src_stride_abgr, width, height, 4, 64)) {
if (IS_ALIGNED(width, 2)) {
ABGRToUVRow = ABGRToUVRow_Any_SSSE3;
}
if (width > 16) {
ABGRToUVRow = ABGRToUVRow_Any_SSSE3;
ABGRToYRow = ABGRToYRow_Any_SSSE3;
}
if (IS_ALIGNED(width, 16)) {
@ -648,8 +747,8 @@ int ABGRToI420(const uint8* src_abgr, int src_stride_abgr,
dst_v += dst_stride_v;
}
if (height & 1) {
ABGRToUVRow_C(src_abgr, 0, dst_u, dst_v, width);
ABGRToYRow_C(src_abgr, dst_y, width);
ABGRToUVRow(src_abgr, 0, dst_u, dst_v, width);
ABGRToYRow(src_abgr, dst_y, width);
}
return 0;
}
@ -686,7 +785,7 @@ int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24,
ARGBToUVRow = ARGBToUVRow_C;
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
if (IS_ALIGNED(width, 2)) {
if (width > 16) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
}
ARGBToYRow = ARGBToYRow_Any_SSSE3;
@ -751,7 +850,7 @@ int RAWToI420(const uint8* src_raw, int src_stride_raw,
ARGBToUVRow = ARGBToUVRow_C;
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
if (IS_ALIGNED(width, 2)) {
if (width > 16) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
}
ARGBToYRow = ARGBToYRow_Any_SSSE3;
@ -816,7 +915,7 @@ int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565,
ARGBToUVRow = ARGBToUVRow_C;
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
if (IS_ALIGNED(width, 2)) {
if (width > 16) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
}
ARGBToYRow = ARGBToYRow_Any_SSSE3;
@ -881,7 +980,7 @@ int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555,
ARGBToUVRow = ARGBToUVRow_C;
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
if (IS_ALIGNED(width, 2)) {
if (width > 16) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
}
ARGBToYRow = ARGBToYRow_Any_SSSE3;
@ -947,7 +1046,7 @@ int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444,
ARGBToUVRow = ARGBToUVRow_C;
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
if (IS_ALIGNED(width, 2)) {
if (width > 16) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
}
ARGBToYRow = ARGBToYRow_Any_SSSE3;
@ -1026,6 +1125,15 @@ int ConvertToI420(const uint8* sample, size_t sample_size,
v, v_stride,
dst_width, inv_dst_height);
break;
case FOURCC_V210:
// TODO(fbarchard): Confirm stride is 16 bytes per 6 pixels.
src = sample + (aligned_src_width * crop_y + crop_x) * 16 / 6;
V210ToI420(src, aligned_src_width * 16 / 6,
y, y_stride,
u, u_stride,
v, v_stride,
dst_width, inv_dst_height);
break;
case FOURCC_24BG:
src = sample + (src_width * crop_y + crop_x) * 3;
RGB24ToI420(src, src_width * 3,

View File

@ -455,34 +455,32 @@ MAKEYUVANYRGB(ARGBToARGB4444Row_Any_SSE2, ARGBToARGB4444Row_SSE2, 2)
#ifdef HAS_ARGBTOYROW_SSSE3
#define MAKEANYTOYANY(NAMEANY, ARGBTOY) \
#define MAKEYANY(NAMEANY, ARGBTOY_SSE, BPP) \
void NAMEANY(const uint8* src_argb, uint8* dst_y, int width) { \
SIMD_ALIGNED(uint8 row[kMaxStride]); \
ARGBTOY(src_argb, row, width); \
memcpy(dst_y, row, width); \
ARGBTOY_SSE(src_argb, dst_y, width - 16); \
ARGBTOY_SSE(src_argb + (width - 16) * BPP, dst_y + (width - 16), 16); \
}
MAKEANYTOYANY(ARGBToYRow_Any_SSSE3, ARGBToYRow_Unaligned_SSSE3)
MAKEANYTOYANY(BGRAToYRow_Any_SSSE3, BGRAToYRow_Unaligned_SSSE3)
MAKEANYTOYANY(ABGRToYRow_Any_SSSE3, ABGRToYRow_Unaligned_SSSE3)
MAKEANYTOYANY(YUY2ToYRow_Any_SSE2, YUY2ToYRow_Unaligned_SSE2)
MAKEANYTOYANY(UYVYToYRow_Any_SSE2, UYVYToYRow_Unaligned_SSE2)
MAKEYANY(ARGBToYRow_Any_SSSE3, ARGBToYRow_Unaligned_SSSE3, 4)
MAKEYANY(BGRAToYRow_Any_SSSE3, BGRAToYRow_Unaligned_SSSE3, 4)
MAKEYANY(ABGRToYRow_Any_SSSE3, ABGRToYRow_Unaligned_SSSE3, 4)
MAKEYANY(YUY2ToYRow_Any_SSE2, YUY2ToYRow_Unaligned_SSE2, 2)
MAKEYANY(UYVYToYRow_Any_SSE2, UYVYToYRow_Unaligned_SSE2, 2)
#define MAKEANYTOUVANY(NAMEANY, ARGBTOUV) \
#define MAKEUVANY(NAMEANY, ARGBTOUV_SSE, ARGBTOUV_C, BPP) \
void NAMEANY(const uint8* src_argb0, int src_stride_argb, \
uint8* dst_u, uint8* dst_v, int width) { \
SIMD_ALIGNED(uint8 row[kMaxStride * 2]); \
ARGBTOUV(src_argb0, src_stride_argb, row, row + kMaxStride, width); \
int halfwidth = (width + 1) >> 1; \
memcpy(dst_u, row, halfwidth); \
memcpy(dst_v, row + kMaxStride, halfwidth); \
ARGBTOUV_SSE(src_argb0, src_stride_argb, dst_u, dst_v, width & ~15); \
ARGBTOUV_C(src_argb0 + (width & ~15) * BPP, src_stride_argb, \
dst_u + (width & ~15) / 2, dst_v + (width & ~15) / 2, \
width & 15); \
}
MAKEANYTOUVANY(ARGBToUVRow_Any_SSSE3, ARGBToUVRow_Unaligned_SSSE3)
MAKEANYTOUVANY(BGRAToUVRow_Any_SSSE3, BGRAToUVRow_Unaligned_SSSE3)
MAKEANYTOUVANY(ABGRToUVRow_Any_SSSE3, ABGRToUVRow_Unaligned_SSSE3)
MAKEANYTOUVANY(YUY2ToUVRow_Any_SSE2, YUY2ToUVRow_Unaligned_SSE2)
MAKEANYTOUVANY(UYVYToUVRow_Any_SSE2, UYVYToUVRow_Unaligned_SSE2)
MAKEUVANY(ARGBToUVRow_Any_SSSE3, ARGBToUVRow_Unaligned_SSSE3, ARGBToUVRow_C, 4)
MAKEUVANY(BGRAToUVRow_Any_SSSE3, BGRAToUVRow_Unaligned_SSSE3, BGRAToUVRow_C, 4)
MAKEUVANY(ABGRToUVRow_Any_SSSE3, ABGRToUVRow_Unaligned_SSSE3, ABGRToUVRow_C, 4)
MAKEUVANY(YUY2ToUVRow_Any_SSE2, YUY2ToUVRow_Unaligned_SSE2, YUY2ToUVRow_C, 2)
MAKEUVANY(UYVYToUVRow_Any_SSE2, UYVYToUVRow_Unaligned_SSE2, UYVYToUVRow_C, 2)
#endif
#ifdef __cplusplus