mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-08 01:36:47 +08:00
Remove I411 support.
YUV 411 is very uncommon format. Remove support. Update documentation to reflect that 411 is deprecated. Simplify tests for YUV to only test with the new side by side YUV but keep old 3 plane test around with a macro for now. BUG=libyuv:645 R=kjellander@chromium.org Review URL: https://codereview.chromium.org/2406123002 .
This commit is contained in:
parent
0071f46a1f
commit
d363ea6527
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 1623
|
||||
Version: 1624
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -37,20 +37,18 @@ This is how OSX formats map to libyuv
|
||||
The following is extracted from video_common.h as a complete list of formats supported by libyuv.
|
||||
|
||||
enum FourCC {
|
||||
// 9 Primary YUV formats: 5 planar, 2 biplanar, 2 packed.
|
||||
// 8 Primary YUV formats: 5 planar, 2 biplanar, 2 packed.
|
||||
FOURCC_I420 = FOURCC('I', '4', '2', '0'),
|
||||
FOURCC_I422 = FOURCC('I', '4', '2', '2'),
|
||||
FOURCC_I444 = FOURCC('I', '4', '4', '4'),
|
||||
FOURCC_I411 = FOURCC('I', '4', '1', '1'),
|
||||
FOURCC_I400 = FOURCC('I', '4', '0', '0'),
|
||||
FOURCC_NV21 = FOURCC('N', 'V', '2', '1'),
|
||||
FOURCC_NV12 = FOURCC('N', 'V', '1', '2'),
|
||||
FOURCC_YUY2 = FOURCC('Y', 'U', 'Y', '2'),
|
||||
FOURCC_UYVY = FOURCC('U', 'Y', 'V', 'Y'),
|
||||
|
||||
// 2 Secondary YUV formats: row biplanar.
|
||||
// 1 Secondary YUV formats: row biplanar.
|
||||
FOURCC_M420 = FOURCC('M', '4', '2', '0'),
|
||||
FOURCC_Q420 = FOURCC('Q', '4', '2', '0'),
|
||||
|
||||
// 9 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp.
|
||||
FOURCC_ARGB = FOURCC('A', 'R', 'G', 'B'),
|
||||
@ -104,7 +102,7 @@ The following is extracted from video_common.h as a complete list of formats sup
|
||||
|
||||
# Planar YUV
|
||||
The following formats contains a full size Y plane followed by 1 or 2
|
||||
planes for UV: I420, I422, I444, I411, I400, NV21, NV12, I400
|
||||
planes for UV: I420, I422, I444, I400, NV21, NV12, I400
|
||||
The size (subsampling) of the UV varies.
|
||||
I420, NV12 and NV21 are half width, half height
|
||||
I422, NV16 and NV61 are half width, full height
|
||||
|
||||
@ -45,16 +45,6 @@ int I422ToI420(const uint8* src_y, int src_stride_y,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
|
||||
// Convert I411 to I420.
|
||||
LIBYUV_API
|
||||
int I411ToI420(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
|
||||
// Copy I420 to I420.
|
||||
#define I420ToI420 I420Copy
|
||||
LIBYUV_API
|
||||
|
||||
@ -82,14 +82,6 @@ int I444ToABGR(const uint8* src_y, int src_stride_y,
|
||||
uint8* dst_abgr, int dst_stride_abgr,
|
||||
int width, int height);
|
||||
|
||||
// Convert I411 to ARGB.
|
||||
LIBYUV_API
|
||||
int I411ToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
|
||||
// Convert I420 with Alpha to preattenuated ARGB.
|
||||
LIBYUV_API
|
||||
int I420AlphaToARGB(const uint8* src_y, int src_stride_y,
|
||||
|
||||
@ -41,15 +41,6 @@ int I420ToI444(const uint8* src_y, int src_stride_y,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToI411(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
|
||||
// Copy to I400. Source can be I420, I422, I444, I400, NV12 or NV21.
|
||||
LIBYUV_API
|
||||
int I400Copy(const uint8* src_y, int src_stride_y,
|
||||
|
||||
@ -123,14 +123,6 @@ int ARGBToJ422(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
|
||||
// Convert ARGB To I411.
|
||||
LIBYUV_API
|
||||
int ARGBToI411(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
|
||||
// Convert ARGB to J400. (JPeg full range).
|
||||
LIBYUV_API
|
||||
int ARGBToJ400(const uint8* src_argb, int src_stride_argb,
|
||||
|
||||
@ -37,7 +37,6 @@ static const uint32 kUnknownDataSize = 0xFFFFFFFF;
|
||||
enum JpegSubsamplingType {
|
||||
kJpegYuv420,
|
||||
kJpegYuv422,
|
||||
kJpegYuv411,
|
||||
kJpegYuv444,
|
||||
kJpegYuv400,
|
||||
kJpegUnknown
|
||||
|
||||
@ -176,9 +176,6 @@ extern "C" {
|
||||
// caveat: clangcl uses row_win.cc which works.
|
||||
#if defined(NDEBUG) || !(defined(_DEBUG) && defined(__i386__)) || \
|
||||
!defined(__i386__) || defined(_MSC_VER)
|
||||
// TODO(fbarchard): fix build error on x86 debug
|
||||
// https://code.google.com/p/libyuv/issues/detail?id=524
|
||||
#define HAS_I411TOARGBROW_SSSE3
|
||||
// TODO(fbarchard): fix build error on android_full_debug=1
|
||||
// https://code.google.com/p/libyuv/issues/detail?id=517
|
||||
#define HAS_I422ALPHATOARGBROW_SSSE3
|
||||
@ -208,7 +205,6 @@ extern "C" {
|
||||
// https://code.google.com/p/libyuv/issues/detail?id=517
|
||||
#define HAS_I422ALPHATOARGBROW_AVX2
|
||||
#endif
|
||||
#define HAS_I411TOARGBROW_AVX2
|
||||
#define HAS_I422TOARGB1555ROW_AVX2
|
||||
#define HAS_I422TOARGB4444ROW_AVX2
|
||||
#define HAS_I422TOARGBROW_AVX2
|
||||
@ -281,7 +277,6 @@ extern "C" {
|
||||
#define HAS_ARGBTORGB24ROW_NEON
|
||||
#define HAS_ARGBTORGB565DITHERROW_NEON
|
||||
#define HAS_ARGBTORGB565ROW_NEON
|
||||
#define HAS_ARGBTOUV411ROW_NEON
|
||||
#define HAS_ARGBTOUV444ROW_NEON
|
||||
#define HAS_ARGBTOUVJROW_NEON
|
||||
#define HAS_ARGBTOUVROW_NEON
|
||||
@ -292,7 +287,6 @@ extern "C" {
|
||||
#define HAS_BGRATOYROW_NEON
|
||||
#define HAS_COPYROW_NEON
|
||||
#define HAS_I400TOARGBROW_NEON
|
||||
#define HAS_I411TOARGBROW_NEON
|
||||
#define HAS_I422ALPHATOARGBROW_NEON
|
||||
#define HAS_I422TOARGB1555ROW_NEON
|
||||
#define HAS_I422TOARGB4444ROW_NEON
|
||||
@ -593,12 +587,6 @@ void I422ToARGBRow_NEON(const uint8* src_y,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I411ToARGBRow_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422ToRGBARow_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
@ -668,8 +656,6 @@ void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int width);
|
||||
void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int width);
|
||||
void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
|
||||
int width);
|
||||
void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
|
||||
int width);
|
||||
void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width);
|
||||
void ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb,
|
||||
@ -758,8 +744,6 @@ void RGBAToUVRow_Any_SSSE3(const uint8* src_rgba, int src_stride_rgba,
|
||||
uint8* dst_u, uint8* dst_v, int width);
|
||||
void ARGBToUV444Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
|
||||
int width);
|
||||
void ARGBToUV411Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
|
||||
int width);
|
||||
void ARGBToUVRow_Any_NEON(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width);
|
||||
void ARGBToUVJRow_Any_NEON(const uint8* src_argb, int src_stride_argb,
|
||||
@ -810,8 +794,6 @@ void ARGBToUV444Row_Any_SSSE3(const uint8* src_argb,
|
||||
|
||||
void ARGBToUV444Row_C(const uint8* src_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width);
|
||||
void ARGBToUV411Row_C(const uint8* src_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width);
|
||||
|
||||
void MirrorRow_AVX2(const uint8* src, uint8* dst, int width);
|
||||
void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width);
|
||||
@ -1066,12 +1048,6 @@ void I422AlphaToARGBRow_C(const uint8* y_buf,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I411ToARGBRow_C(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void NV12ToARGBRow_C(const uint8* src_y,
|
||||
const uint8* src_uv,
|
||||
uint8* dst_argb,
|
||||
@ -1193,18 +1169,6 @@ void I422ToARGBRow_SSSE3(const uint8* src_y,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I411ToARGBRow_SSSE3(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I411ToARGBRow_AVX2(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void NV12ToARGBRow_SSSE3(const uint8* src_y,
|
||||
const uint8* src_uv,
|
||||
uint8* dst_argb,
|
||||
@ -1349,18 +1313,6 @@ void I422AlphaToARGBRow_Any_AVX2(const uint8* y_buf,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I411ToARGBRow_Any_SSSE3(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I411ToARGBRow_Any_AVX2(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void NV12ToARGBRow_Any_SSSE3(const uint8* src_y,
|
||||
const uint8* src_uv,
|
||||
uint8* dst_argb,
|
||||
@ -1588,12 +1540,6 @@ void I422AlphaToARGBRow_Any_NEON(const uint8* src_y,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I411ToARGBRow_Any_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422ToRGBARow_Any_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 1623
|
||||
#define LIBYUV_VERSION 1624
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
@ -49,18 +49,18 @@ extern "C" {
|
||||
// Secondary formats are converted in 2 steps.
|
||||
// Auxilliary formats call primary converters.
|
||||
enum FourCC {
|
||||
// 9 Primary YUV formats: 5 planar, 2 biplanar, 2 packed.
|
||||
// 8 Primary YUV formats: 5 planar, 2 biplanar, 2 packed.
|
||||
FOURCC_I420 = FOURCC('I', '4', '2', '0'),
|
||||
FOURCC_I422 = FOURCC('I', '4', '2', '2'),
|
||||
FOURCC_I444 = FOURCC('I', '4', '4', '4'),
|
||||
FOURCC_I411 = FOURCC('I', '4', '1', '1'),
|
||||
FOURCC_I411 = FOURCC('I', '4', '1', '1'), // deprecated.
|
||||
FOURCC_I400 = FOURCC('I', '4', '0', '0'),
|
||||
FOURCC_NV21 = FOURCC('N', 'V', '2', '1'),
|
||||
FOURCC_NV12 = FOURCC('N', 'V', '1', '2'),
|
||||
FOURCC_YUY2 = FOURCC('Y', 'U', 'Y', '2'),
|
||||
FOURCC_UYVY = FOURCC('U', 'Y', 'V', 'Y'),
|
||||
|
||||
// 2 Secondary YUV formats: row biplanar.
|
||||
// 1 Secondary YUV format: row biplanar.
|
||||
FOURCC_M420 = FOURCC('M', '4', '2', '0'),
|
||||
FOURCC_Q420 = FOURCC('Q', '4', '2', '0'), // deprecated.
|
||||
|
||||
|
||||
@ -137,27 +137,6 @@ int I444ToI420(const uint8* src_y, int src_stride_y,
|
||||
width, height);
|
||||
}
|
||||
|
||||
// 411 chroma is 1/4 width, 1x height
|
||||
// 420 chroma is 1/2 width, 1/2 height
|
||||
LIBYUV_API
|
||||
int I411ToI420(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height) {
|
||||
const int src_uv_width = SUBSAMPLE(width, 3, 2);
|
||||
return I4xxToI420(src_y, src_stride_y,
|
||||
src_u, src_stride_u,
|
||||
src_v, src_stride_v,
|
||||
dst_y, dst_stride_y,
|
||||
dst_u, dst_stride_u,
|
||||
dst_v, dst_stride_v,
|
||||
width, height,
|
||||
src_uv_width, height);
|
||||
}
|
||||
|
||||
// I400 is greyscale typically used in MJPG
|
||||
LIBYUV_API
|
||||
int I400ToI420(const uint8* src_y, int src_stride_y,
|
||||
|
||||
@ -487,75 +487,6 @@ int J444ToARGB(const uint8* src_y, int src_stride_y,
|
||||
width, height);
|
||||
}
|
||||
|
||||
// Convert I411 to ARGB.
|
||||
LIBYUV_API
|
||||
int I411ToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*I411ToARGBRow)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) = I411ToARGBRow_C;
|
||||
if (!src_y || !src_u || !src_v ||
|
||||
!dst_argb ||
|
||||
width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
|
||||
dst_stride_argb = -dst_stride_argb;
|
||||
}
|
||||
// Coalesce rows.
|
||||
if (src_stride_y == width &&
|
||||
src_stride_u * 4 == width &&
|
||||
src_stride_v * 4 == width &&
|
||||
dst_stride_argb == width * 4) {
|
||||
width *= height;
|
||||
height = 1;
|
||||
src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0;
|
||||
}
|
||||
#if defined(HAS_I411TOARGBROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
I411ToARGBRow = I411ToARGBRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I411ToARGBRow = I411ToARGBRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I411TOARGBROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
I411ToARGBRow = I411ToARGBRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
I411ToARGBRow = I411ToARGBRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I411TOARGBROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
I411ToARGBRow = I411ToARGBRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I411ToARGBRow = I411ToARGBRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
I411ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvI601Constants, width);
|
||||
dst_argb += dst_stride_argb;
|
||||
src_y += src_stride_y;
|
||||
src_u += src_stride_u;
|
||||
src_v += src_stride_v;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert I420 with Alpha to preattenuated ARGB.
|
||||
static int I420AlphaToARGBMatrix(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
|
||||
@ -104,28 +104,6 @@ int I420ToI444(const uint8* src_y, int src_stride_y,
|
||||
dst_uv_width, dst_uv_height);
|
||||
}
|
||||
|
||||
// 420 chroma is 1/2 width, 1/2 height
|
||||
// 411 chroma is 1/4 width, 1x height
|
||||
LIBYUV_API
|
||||
int I420ToI411(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height) {
|
||||
const int dst_uv_width = (Abs(width) + 3) >> 2;
|
||||
const int dst_uv_height = Abs(height);
|
||||
return I420ToI4xx(src_y, src_stride_y,
|
||||
src_u, src_stride_u,
|
||||
src_v, src_stride_v,
|
||||
dst_y, dst_stride_y,
|
||||
dst_u, dst_stride_u,
|
||||
dst_v, dst_stride_v,
|
||||
width, height,
|
||||
dst_uv_width, dst_uv_height);
|
||||
}
|
||||
|
||||
// Copy to I400. Source can be I420,422,444,400,NV12,NV21
|
||||
LIBYUV_API
|
||||
int I400Copy(const uint8* src_y, int src_stride_y,
|
||||
@ -900,7 +878,7 @@ int I420ToRGB565Dither(const uint8* src_y, int src_stride_y,
|
||||
for (y = 0; y < height; ++y) {
|
||||
I422ToARGBRow(src_y, src_u, src_v, row_argb, &kYuvI601Constants, width);
|
||||
ARGBToRGB565DitherRow(row_argb, dst_rgb565,
|
||||
*(uint32*)(dither4x4 + ((y & 3) << 2)), width);
|
||||
*(uint32*)(dither4x4 + ((y & 3) << 2)), width); // NOLINT
|
||||
dst_rgb565 += dst_stride_rgb565;
|
||||
src_y += src_stride_y;
|
||||
if (y & 1) {
|
||||
@ -1113,20 +1091,6 @@ int ConvertFromI420(const uint8* y, int y_stride,
|
||||
width, height);
|
||||
break;
|
||||
}
|
||||
case FOURCC_I411: {
|
||||
int quarterwidth = (width + 3) / 4;
|
||||
uint8* dst_u = dst_sample + width * height;
|
||||
uint8* dst_v = dst_u + quarterwidth * height;
|
||||
r = I420ToI411(y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
dst_sample, width,
|
||||
dst_u, quarterwidth,
|
||||
dst_v, quarterwidth,
|
||||
width, height);
|
||||
break;
|
||||
}
|
||||
|
||||
// Formats not supported - MJPG, biplanar, some rgb formats.
|
||||
default:
|
||||
return -1; // unknown fourcc - return failure code.
|
||||
|
||||
@ -181,79 +181,6 @@ int ARGBToI422(const uint8* src_argb, int src_stride_argb,
|
||||
return 0;
|
||||
}
|
||||
|
||||
// ARGB little endian (bgra in memory) to I411
|
||||
LIBYUV_API
|
||||
int ARGBToI411(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*ARGBToUV411Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
|
||||
int width) = ARGBToUV411Row_C;
|
||||
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
|
||||
ARGBToYRow_C;
|
||||
if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
src_argb = src_argb + (height - 1) * src_stride_argb;
|
||||
src_stride_argb = -src_stride_argb;
|
||||
}
|
||||
// Coalesce rows.
|
||||
if (src_stride_argb == width * 4 &&
|
||||
dst_stride_y == width &&
|
||||
dst_stride_u * 4 == width &&
|
||||
dst_stride_v * 4 == width) {
|
||||
width *= height;
|
||||
height = 1;
|
||||
src_stride_argb = dst_stride_y = dst_stride_u = dst_stride_v = 0;
|
||||
}
|
||||
#if defined(HAS_ARGBTOYROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
ARGBToYRow = ARGBToYRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToYRow = ARGBToYRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
ARGBToYRow = ARGBToYRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
ARGBToYRow = ARGBToYRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
ARGBToYRow = ARGBToYRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGBToYRow = ARGBToYRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOUV411ROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
ARGBToUV411Row = ARGBToUV411Row_Any_NEON;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
ARGBToUV411Row = ARGBToUV411Row_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
ARGBToUV411Row(src_argb, dst_u, dst_v, width);
|
||||
ARGBToYRow(src_argb, dst_y, width);
|
||||
src_argb += src_stride_argb;
|
||||
dst_y += dst_stride_y;
|
||||
dst_u += dst_stride_u;
|
||||
dst_v += dst_stride_v;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
@ -912,7 +839,7 @@ int ARGBToRGB565Dither(const uint8* src_argb, int src_stride_argb,
|
||||
#endif
|
||||
for (y = 0; y < height; ++y) {
|
||||
ARGBToRGB565DitherRow(src_argb, dst_rgb565,
|
||||
*(uint32*)(dither4x4 + ((y & 3) << 2)), width);
|
||||
*(uint32*)(dither4x4 + ((y & 3) << 2)), width); /* NOLINT */
|
||||
src_argb += src_stride_argb;
|
||||
dst_rgb565 += dst_stride_rgb565;
|
||||
}
|
||||
|
||||
@ -86,24 +86,6 @@ static void JpegI444ToI420(void* opaque,
|
||||
dest->h -= rows;
|
||||
}
|
||||
|
||||
static void JpegI411ToI420(void* opaque,
|
||||
const uint8* const* data,
|
||||
const int* strides,
|
||||
int rows) {
|
||||
I420Buffers* dest = (I420Buffers*)(opaque);
|
||||
I411ToI420(data[0], strides[0],
|
||||
data[1], strides[1],
|
||||
data[2], strides[2],
|
||||
dest->y, dest->y_stride,
|
||||
dest->u, dest->u_stride,
|
||||
dest->v, dest->v_stride,
|
||||
dest->w, rows);
|
||||
dest->y += rows * dest->y_stride;
|
||||
dest->u += ((rows + 1) >> 1) * dest->u_stride;
|
||||
dest->v += ((rows + 1) >> 1) * dest->v_stride;
|
||||
dest->h -= rows;
|
||||
}
|
||||
|
||||
static void JpegI400ToI420(void* opaque,
|
||||
const uint8* const* data,
|
||||
const int* strides,
|
||||
@ -193,17 +175,6 @@ int MJPGToI420(const uint8* sample,
|
||||
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegI444ToI420, &bufs, dw, dh);
|
||||
// YUV411
|
||||
} else if (mjpeg_decoder.GetColorSpace() ==
|
||||
MJpegDecoder::kColorSpaceYCbCr &&
|
||||
mjpeg_decoder.GetNumComponents() == 3 &&
|
||||
mjpeg_decoder.GetVertSampFactor(0) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(0) == 4 &&
|
||||
mjpeg_decoder.GetVertSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegI411ToI420, &bufs, dw, dh);
|
||||
// YUV400
|
||||
} else if (mjpeg_decoder.GetColorSpace() ==
|
||||
MJpegDecoder::kColorSpaceGrayscale &&
|
||||
@ -213,7 +184,7 @@ int MJPGToI420(const uint8* sample,
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegI400ToI420, &bufs, dw, dh);
|
||||
} else {
|
||||
// TODO(fbarchard): Implement conversion for any other colorspace/sample
|
||||
// factors that occur in practice. 411 is supported by libjpeg
|
||||
// factors that occur in practice.
|
||||
// ERROR: Unable to convert MJPEG frame because format is not supported
|
||||
mjpeg_decoder.UnloadFrame();
|
||||
return 1;
|
||||
@ -272,20 +243,6 @@ static void JpegI444ToARGB(void* opaque,
|
||||
dest->h -= rows;
|
||||
}
|
||||
|
||||
static void JpegI411ToARGB(void* opaque,
|
||||
const uint8* const* data,
|
||||
const int* strides,
|
||||
int rows) {
|
||||
ARGBBuffers* dest = (ARGBBuffers*)(opaque);
|
||||
I411ToARGB(data[0], strides[0],
|
||||
data[1], strides[1],
|
||||
data[2], strides[2],
|
||||
dest->argb, dest->argb_stride,
|
||||
dest->w, rows);
|
||||
dest->argb += rows * dest->argb_stride;
|
||||
dest->h -= rows;
|
||||
}
|
||||
|
||||
static void JpegI400ToARGB(void* opaque,
|
||||
const uint8* const* data,
|
||||
const int* strides,
|
||||
@ -355,17 +312,6 @@ int MJPGToARGB(const uint8* sample,
|
||||
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegI444ToARGB, &bufs, dw, dh);
|
||||
// YUV411
|
||||
} else if (mjpeg_decoder.GetColorSpace() ==
|
||||
MJpegDecoder::kColorSpaceYCbCr &&
|
||||
mjpeg_decoder.GetNumComponents() == 3 &&
|
||||
mjpeg_decoder.GetVertSampFactor(0) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(0) == 4 &&
|
||||
mjpeg_decoder.GetVertSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegI411ToARGB, &bufs, dw, dh);
|
||||
// YUV400
|
||||
} else if (mjpeg_decoder.GetColorSpace() ==
|
||||
MJpegDecoder::kColorSpaceGrayscale &&
|
||||
@ -375,7 +321,7 @@ int MJPGToARGB(const uint8* sample,
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegI400ToARGB, &bufs, dw, dh);
|
||||
} else {
|
||||
// TODO(fbarchard): Implement conversion for any other colorspace/sample
|
||||
// factors that occur in practice. 411 is supported by libjpeg
|
||||
// factors that occur in practice.
|
||||
// ERROR: Unable to convert MJPEG frame because format is not supported
|
||||
mjpeg_decoder.UnloadFrame();
|
||||
return 1;
|
||||
|
||||
@ -67,7 +67,7 @@ int ConvertToARGB(const uint8* sample, size_t sample_size,
|
||||
|
||||
if (need_buf) {
|
||||
int argb_size = crop_width * 4 * abs_crop_height;
|
||||
rotate_buffer = (uint8*)malloc(argb_size);
|
||||
rotate_buffer = (uint8*)malloc(argb_size); /* NOLINT */
|
||||
if (!rotate_buffer) {
|
||||
return 1; // Out of memory runtime error.
|
||||
}
|
||||
@ -262,20 +262,6 @@ int ConvertToARGB(const uint8* sample, size_t sample_size,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
}
|
||||
case FOURCC_I411: {
|
||||
int quarterwidth = (src_width + 3) / 4;
|
||||
const uint8* src_y = sample + src_width * crop_y + crop_x;
|
||||
const uint8* src_u = sample + src_width * abs_src_height +
|
||||
quarterwidth * crop_y + crop_x / 4;
|
||||
const uint8* src_v = sample + src_width * abs_src_height +
|
||||
quarterwidth * (abs_src_height + crop_y) + crop_x / 4;
|
||||
r = I411ToARGB(src_y, src_width,
|
||||
src_u, quarterwidth,
|
||||
src_v, quarterwidth,
|
||||
crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
}
|
||||
#ifdef HAVE_JPEG
|
||||
case FOURCC_MJPG:
|
||||
r = MJPGToARGB(sample, sample_size,
|
||||
|
||||
@ -70,7 +70,7 @@ int ConvertToI420(const uint8* sample,
|
||||
if (need_buf) {
|
||||
int y_size = crop_width * abs_crop_height;
|
||||
int uv_size = ((crop_width + 1) / 2) * ((abs_crop_height + 1) / 2);
|
||||
rotate_buffer = (uint8*)malloc(y_size + uv_size * 2);
|
||||
rotate_buffer = (uint8*)malloc(y_size + uv_size * 2); /* NOLINT */
|
||||
if (!rotate_buffer) {
|
||||
return 1; // Out of memory runtime error.
|
||||
}
|
||||
@ -286,22 +286,6 @@ int ConvertToI420(const uint8* sample,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
}
|
||||
case FOURCC_I411: {
|
||||
int quarterwidth = (src_width + 3) / 4;
|
||||
const uint8* src_y = sample + src_width * crop_y + crop_x;
|
||||
const uint8* src_u = sample + src_width * abs_src_height +
|
||||
quarterwidth * crop_y + crop_x / 4;
|
||||
const uint8* src_v = sample + src_width * abs_src_height +
|
||||
quarterwidth * (abs_src_height + crop_y) + crop_x / 4;
|
||||
r = I411ToI420(src_y, src_width,
|
||||
src_u, quarterwidth,
|
||||
src_v, quarterwidth,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
}
|
||||
#ifdef HAVE_JPEG
|
||||
case FOURCC_MJPG:
|
||||
r = MJPGToI420(sample, sample_size,
|
||||
|
||||
@ -127,9 +127,6 @@ ANY31(BlendPlaneRow_Any_SSSE3, BlendPlaneRow_SSSE3, 0, 0, 1, 7)
|
||||
#ifdef HAS_I422TOARGBROW_SSSE3
|
||||
ANY31C(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_SSSE3, 1, 0, 4, 7)
|
||||
#endif
|
||||
#ifdef HAS_I411TOARGBROW_SSSE3
|
||||
ANY31C(I411ToARGBRow_Any_SSSE3, I411ToARGBRow_SSSE3, 2, 0, 4, 7)
|
||||
#endif
|
||||
#ifdef HAS_I444TOARGBROW_SSSE3
|
||||
ANY31C(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_SSSE3, 0, 0, 4, 7)
|
||||
ANY31C(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_SSSE3, 1, 0, 4, 7)
|
||||
@ -150,9 +147,6 @@ ANY31C(I422ToRGBARow_Any_AVX2, I422ToRGBARow_AVX2, 1, 0, 4, 15)
|
||||
#ifdef HAS_I444TOARGBROW_AVX2
|
||||
ANY31C(I444ToARGBRow_Any_AVX2, I444ToARGBRow_AVX2, 0, 0, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_I411TOARGBROW_AVX2
|
||||
ANY31C(I411ToARGBRow_Any_AVX2, I411ToARGBRow_AVX2, 2, 0, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_I422TOARGB4444ROW_AVX2
|
||||
ANY31C(I422ToARGB4444Row_Any_AVX2, I422ToARGB4444Row_AVX2, 1, 0, 2, 7)
|
||||
#endif
|
||||
@ -165,7 +159,6 @@ ANY31C(I422ToRGB565Row_Any_AVX2, I422ToRGB565Row_AVX2, 1, 0, 2, 7)
|
||||
#ifdef HAS_I422TOARGBROW_NEON
|
||||
ANY31C(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, 0, 0, 4, 7)
|
||||
ANY31C(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, 1, 0, 4, 7)
|
||||
ANY31C(I411ToARGBRow_Any_NEON, I411ToARGBRow_NEON, 2, 0, 4, 7)
|
||||
ANY31C(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, 1, 0, 4, 7)
|
||||
ANY31C(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, 1, 0, 3, 7)
|
||||
ANY31C(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, 1, 0, 2, 7)
|
||||
@ -720,21 +713,6 @@ ANY1(ARGBSetRow_Any_NEON, ARGBSetRow_NEON, uint32, 4, 3)
|
||||
ANY_SIMD(src_ptr, dst_u, dst_v, n); \
|
||||
} \
|
||||
memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
|
||||
/* repeat last 4 - 12 bytes for 411 subsampler */ \
|
||||
if (((width & 3) == 1) && BPP == 4 && DUVSHIFT == 2) { \
|
||||
memcpy(temp + SS(r, UVSHIFT) * BPP, \
|
||||
temp + SS(r, UVSHIFT) * BPP - BPP, BPP); \
|
||||
memcpy(temp + SS(r, UVSHIFT) * BPP + BPP, \
|
||||
temp + SS(r, UVSHIFT) * BPP - BPP, BPP * 2); \
|
||||
} \
|
||||
if (((width & 3) == 2) && BPP == 4 && DUVSHIFT == 2) { \
|
||||
memcpy(temp + SS(r, UVSHIFT) * BPP, \
|
||||
temp + SS(r, UVSHIFT) * BPP - BPP * 2, BPP * 2); \
|
||||
} \
|
||||
if (((width & 3) == 3) && BPP == 4 && DUVSHIFT == 2) { \
|
||||
memcpy(temp + SS(r, UVSHIFT) * BPP, \
|
||||
temp + SS(r, UVSHIFT) * BPP - BPP, BPP); \
|
||||
} \
|
||||
ANY_SIMD(temp, temp + 128, temp + 256, MASK + 1); \
|
||||
memcpy(dst_u + (n >> DUVSHIFT), temp + 128, SS(r, DUVSHIFT)); \
|
||||
memcpy(dst_v + (n >> DUVSHIFT), temp + 256, SS(r, DUVSHIFT)); \
|
||||
@ -765,7 +743,6 @@ ANY12(UYVYToUV422Row_Any_SSE2, UYVYToUV422Row_SSE2, 1, 4, 1, 15)
|
||||
#endif
|
||||
#ifdef HAS_YUY2TOUV422ROW_NEON
|
||||
ANY12(ARGBToUV444Row_Any_NEON, ARGBToUV444Row_NEON, 0, 4, 0, 7)
|
||||
ANY12(ARGBToUV411Row_Any_NEON, ARGBToUV411Row_NEON, 0, 4, 2, 31)
|
||||
ANY12(YUY2ToUV422Row_Any_NEON, YUY2ToUV422Row_NEON, 1, 4, 1, 15)
|
||||
ANY12(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON, 1, 4, 1, 15)
|
||||
#endif
|
||||
|
||||
@ -636,41 +636,6 @@ void ARGBToUV444Row_C(const uint8* src_argb,
|
||||
}
|
||||
}
|
||||
|
||||
void ARGBToUV411Row_C(const uint8* src_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width) {
|
||||
int x;
|
||||
for (x = 0; x < width - 3; x += 4) {
|
||||
uint8 ab = (src_argb[0] + src_argb[4] + src_argb[8] + src_argb[12]) >> 2;
|
||||
uint8 ag = (src_argb[1] + src_argb[5] + src_argb[9] + src_argb[13]) >> 2;
|
||||
uint8 ar = (src_argb[2] + src_argb[6] + src_argb[10] + src_argb[14]) >> 2;
|
||||
dst_u[0] = RGBToU(ar, ag, ab);
|
||||
dst_v[0] = RGBToV(ar, ag, ab);
|
||||
src_argb += 16;
|
||||
dst_u += 1;
|
||||
dst_v += 1;
|
||||
}
|
||||
// Odd width handling mimics 'any' function which replicates last pixel.
|
||||
if ((width & 3) == 3) {
|
||||
uint8 ab = (src_argb[0] + src_argb[4] + src_argb[8] + src_argb[8]) >> 2;
|
||||
uint8 ag = (src_argb[1] + src_argb[5] + src_argb[9] + src_argb[9]) >> 2;
|
||||
uint8 ar = (src_argb[2] + src_argb[6] + src_argb[10] + src_argb[10]) >> 2;
|
||||
dst_u[0] = RGBToU(ar, ag, ab);
|
||||
dst_v[0] = RGBToV(ar, ag, ab);
|
||||
} else if ((width & 3) == 2) {
|
||||
uint8 ab = (src_argb[0] + src_argb[4]) >> 1;
|
||||
uint8 ag = (src_argb[1] + src_argb[5]) >> 1;
|
||||
uint8 ar = (src_argb[2] + src_argb[6]) >> 1;
|
||||
dst_u[0] = RGBToU(ar, ag, ab);
|
||||
dst_v[0] = RGBToV(ar, ag, ab);
|
||||
} else if ((width & 3) == 1) {
|
||||
uint8 ab = src_argb[0];
|
||||
uint8 ag = src_argb[1];
|
||||
uint8 ar = src_argb[2];
|
||||
dst_u[0] = RGBToU(ar, ag, ab);
|
||||
dst_v[0] = RGBToV(ar, ag, ab);
|
||||
}
|
||||
}
|
||||
|
||||
void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
|
||||
int x;
|
||||
for (x = 0; x < width; ++x) {
|
||||
@ -1530,48 +1495,6 @@ void I422ToRGB565Row_C(const uint8* src_y,
|
||||
}
|
||||
}
|
||||
|
||||
void I411ToARGBRow_C(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
int x;
|
||||
for (x = 0; x < width - 3; x += 4) {
|
||||
YuvPixel(src_y[0], src_u[0], src_v[0],
|
||||
rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
|
||||
rgb_buf[3] = 255;
|
||||
YuvPixel(src_y[1], src_u[0], src_v[0],
|
||||
rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants);
|
||||
rgb_buf[7] = 255;
|
||||
YuvPixel(src_y[2], src_u[0], src_v[0],
|
||||
rgb_buf + 8, rgb_buf + 9, rgb_buf + 10, yuvconstants);
|
||||
rgb_buf[11] = 255;
|
||||
YuvPixel(src_y[3], src_u[0], src_v[0],
|
||||
rgb_buf + 12, rgb_buf + 13, rgb_buf + 14, yuvconstants);
|
||||
rgb_buf[15] = 255;
|
||||
src_y += 4;
|
||||
src_u += 1;
|
||||
src_v += 1;
|
||||
rgb_buf += 16; // Advance 4 pixels.
|
||||
}
|
||||
if (width & 2) {
|
||||
YuvPixel(src_y[0], src_u[0], src_v[0],
|
||||
rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
|
||||
rgb_buf[3] = 255;
|
||||
YuvPixel(src_y[1], src_u[0], src_v[0],
|
||||
rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants);
|
||||
rgb_buf[7] = 255;
|
||||
src_y += 2;
|
||||
rgb_buf += 8; // Advance 2 pixels.
|
||||
}
|
||||
if (width & 1) {
|
||||
YuvPixel(src_y[0], src_u[0], src_v[0],
|
||||
rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
|
||||
rgb_buf[3] = 255;
|
||||
}
|
||||
}
|
||||
|
||||
void NV12ToARGBRow_C(const uint8* src_y,
|
||||
const uint8* src_uv,
|
||||
uint8* rgb_buf,
|
||||
|
||||
@ -1526,26 +1526,6 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0, int src_stride_rgba,
|
||||
"movq " MEMACCESS([a_buf]) ",%%xmm5 \n" \
|
||||
"lea " MEMLEA(0x8, [a_buf]) ",%[a_buf] \n"
|
||||
|
||||
// Read 2 UV from 411, upsample to 8 UV.
|
||||
// reading 4 bytes is an msan violation.
|
||||
// "movd " MEMACCESS([u_buf]) ",%%xmm0 \n"
|
||||
// MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1)
|
||||
// pinsrw fails with drmemory
|
||||
// __asm pinsrw xmm0, [esi], 0 /* U */
|
||||
// __asm pinsrw xmm1, [esi + edi], 0 /* V */
|
||||
#define READYUV411_TEMP \
|
||||
"movzwl " MEMACCESS([u_buf]) ",%[temp] \n" \
|
||||
"movd %[temp],%%xmm0 \n" \
|
||||
MEMOPARG(movzwl, 0x00, [u_buf], [v_buf], 1, [temp]) " \n" \
|
||||
"movd %[temp],%%xmm1 \n" \
|
||||
"lea " MEMLEA(0x2, [u_buf]) ",%[u_buf] \n" \
|
||||
"punpcklbw %%xmm1,%%xmm0 \n" \
|
||||
"punpcklwd %%xmm0,%%xmm0 \n" \
|
||||
"punpckldq %%xmm0,%%xmm0 \n" \
|
||||
"movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \
|
||||
"punpcklbw %%xmm4,%%xmm4 \n" \
|
||||
"lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n"
|
||||
|
||||
// Read 4 UV from NV12, upsample to 8 UV
|
||||
#define READNV12 \
|
||||
"movq " MEMACCESS([uv_buf]) ",%%xmm0 \n" \
|
||||
@ -1804,42 +1784,6 @@ void OMITFP I422AlphaToARGBRow_SSSE3(const uint8* y_buf,
|
||||
}
|
||||
#endif // HAS_I422ALPHATOARGBROW_SSSE3
|
||||
|
||||
#ifdef HAS_I411TOARGBROW_SSSE3
|
||||
void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
int temp;
|
||||
asm volatile (
|
||||
YUVTORGB_SETUP(yuvconstants)
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
READYUV411_TEMP
|
||||
YUVTORGB(yuvconstants)
|
||||
STOREARGB
|
||||
"subl $0x8,%[width] \n"
|
||||
"jg 1b \n"
|
||||
: [y_buf]"+r"(y_buf), // %[y_buf]
|
||||
[u_buf]"+r"(u_buf), // %[u_buf]
|
||||
[v_buf]"+r"(v_buf), // %[v_buf]
|
||||
[dst_argb]"+r"(dst_argb), // %[dst_argb]
|
||||
[temp]"=&r"(temp), // %[temp]
|
||||
#if defined(__i386__) && defined(__pic__)
|
||||
[width]"+m"(width) // %[width]
|
||||
#else
|
||||
[width]"+rm"(width) // %[width]
|
||||
#endif
|
||||
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
|
||||
: "memory", "cc", NACL_R14 YUVTORGB_REGS
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
|
||||
);
|
||||
}
|
||||
#endif
|
||||
|
||||
void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* uv_buf,
|
||||
uint8* dst_argb,
|
||||
@ -2013,20 +1957,6 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf,
|
||||
"vpermq $0xd8,%%ymm5,%%ymm5 \n" \
|
||||
"lea " MEMLEA(0x10, [a_buf]) ",%[a_buf] \n"
|
||||
|
||||
// Read 4 UV from 411, upsample to 16 UV.
|
||||
#define READYUV411_AVX2 \
|
||||
"vmovd " MEMACCESS([u_buf]) ",%%xmm0 \n" \
|
||||
MEMOPREG(vmovd, 0x00, [u_buf], [v_buf], 1, xmm1) \
|
||||
"lea " MEMLEA(0x4, [u_buf]) ",%[u_buf] \n" \
|
||||
"vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \
|
||||
"vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \
|
||||
"vpermq $0xd8,%%ymm0,%%ymm0 \n" \
|
||||
"vpunpckldq %%ymm0,%%ymm0,%%ymm0 \n" \
|
||||
"vmovdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \
|
||||
"vpermq $0xd8,%%ymm4,%%ymm4 \n" \
|
||||
"vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \
|
||||
"lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n"
|
||||
|
||||
// Read 8 UV from NV12, upsample to 16 UV.
|
||||
#define READNV12_AVX2 \
|
||||
"vmovdqu " MEMACCESS([uv_buf]) ",%%xmm0 \n" \
|
||||
@ -2163,39 +2093,6 @@ void OMITFP I444ToARGBRow_AVX2(const uint8* y_buf,
|
||||
}
|
||||
#endif // HAS_I444TOARGBROW_AVX2
|
||||
|
||||
#ifdef HAS_I411TOARGBROW_AVX2
|
||||
// 16 pixels
|
||||
// 4 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
|
||||
void OMITFP I411ToARGBRow_AVX2(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
YUVTORGB_SETUP_AVX2(yuvconstants)
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
READYUV411_AVX2
|
||||
YUVTORGB_AVX2(yuvconstants)
|
||||
STOREARGB_AVX2
|
||||
"sub $0x10,%[width] \n"
|
||||
"jg 1b \n"
|
||||
"vzeroupper \n"
|
||||
: [y_buf]"+r"(y_buf), // %[y_buf]
|
||||
[u_buf]"+r"(u_buf), // %[u_buf]
|
||||
[v_buf]"+r"(v_buf), // %[v_buf]
|
||||
[dst_argb]"+r"(dst_argb), // %[dst_argb]
|
||||
[width]"+rm"(width) // %[width]
|
||||
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
|
||||
: "memory", "cc", NACL_R14 YUVTORGB_REGS_AVX2
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
|
||||
);
|
||||
}
|
||||
#endif // HAS_I411TOARGBROW_AVX2
|
||||
|
||||
#if defined(HAS_I422TOARGBROW_AVX2)
|
||||
// 16 pixels
|
||||
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
|
||||
|
||||
@ -30,17 +30,6 @@ extern "C" {
|
||||
MEMACCESS(2) \
|
||||
"vld1.32 {d2[1]}, [%2]! \n"
|
||||
|
||||
// Read 8 Y, 2 U and 2 V from 422
|
||||
#define READYUV411 \
|
||||
MEMACCESS(0) \
|
||||
"vld1.8 {d0}, [%0]! \n" \
|
||||
MEMACCESS(1) \
|
||||
"vld1.16 {d2[0]}, [%1]! \n" \
|
||||
MEMACCESS(2) \
|
||||
"vld1.16 {d2[1]}, [%2]! \n" \
|
||||
"vmov.u8 d3, d2 \n" \
|
||||
"vzip.u8 d2, d3 \n"
|
||||
|
||||
// Read 8 Y, 8 U and 8 V from 444
|
||||
#define READYUV444 \
|
||||
MEMACCESS(0) \
|
||||
@ -229,36 +218,6 @@ void I422AlphaToARGBRow_NEON(const uint8* src_y,
|
||||
);
|
||||
}
|
||||
|
||||
void I411ToARGBRow_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
YUVTORGB_SETUP
|
||||
"vmov.u8 d23, #255 \n"
|
||||
"1: \n"
|
||||
READYUV411
|
||||
YUVTORGB
|
||||
"subs %4, %4, #8 \n"
|
||||
MEMACCESS(3)
|
||||
"vst4.8 {d20, d21, d22, d23}, [%3]! \n"
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_y), // %0
|
||||
"+r"(src_u), // %1
|
||||
"+r"(src_v), // %2
|
||||
"+r"(dst_argb), // %3
|
||||
"+r"(width) // %4
|
||||
: [kUVToRB]"r"(&yuvconstants->kUVToRB),
|
||||
[kUVToG]"r"(&yuvconstants->kUVToG),
|
||||
[kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
|
||||
[kYToRgb]"r"(&yuvconstants->kYToRgb)
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
|
||||
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
|
||||
);
|
||||
}
|
||||
|
||||
void I422ToRGBARow_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
@ -1383,69 +1342,6 @@ void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
|
||||
);
|
||||
}
|
||||
|
||||
// 32x1 pixels -> 8x1. width is number of argb pixels. e.g. 32.
|
||||
void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
|
||||
"vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient
|
||||
"vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient
|
||||
"vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
|
||||
"vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
|
||||
"vmov.u16 q15, #0x8080 \n" // 128.5
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
|
||||
MEMACCESS(0)
|
||||
"vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels.
|
||||
"vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts.
|
||||
"vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
|
||||
"vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts.
|
||||
MEMACCESS(0)
|
||||
"vld4.8 {d8, d10, d12, d14}, [%0]! \n" // load 8 more ARGB pixels.
|
||||
MEMACCESS(0)
|
||||
"vld4.8 {d9, d11, d13, d15}, [%0]! \n" // load last 8 ARGB pixels.
|
||||
"vpaddl.u8 q4, q4 \n" // B 16 bytes -> 8 shorts.
|
||||
"vpaddl.u8 q5, q5 \n" // G 16 bytes -> 8 shorts.
|
||||
"vpaddl.u8 q6, q6 \n" // R 16 bytes -> 8 shorts.
|
||||
|
||||
"vpadd.u16 d0, d0, d1 \n" // B 16 shorts -> 8 shorts.
|
||||
"vpadd.u16 d1, d8, d9 \n" // B
|
||||
"vpadd.u16 d2, d2, d3 \n" // G 16 shorts -> 8 shorts.
|
||||
"vpadd.u16 d3, d10, d11 \n" // G
|
||||
"vpadd.u16 d4, d4, d5 \n" // R 16 shorts -> 8 shorts.
|
||||
"vpadd.u16 d5, d12, d13 \n" // R
|
||||
|
||||
"vrshr.u16 q0, q0, #1 \n" // 2x average
|
||||
"vrshr.u16 q1, q1, #1 \n"
|
||||
"vrshr.u16 q2, q2, #1 \n"
|
||||
|
||||
"subs %3, %3, #32 \n" // 32 processed per loop.
|
||||
"vmul.s16 q8, q0, q10 \n" // B
|
||||
"vmls.s16 q8, q1, q11 \n" // G
|
||||
"vmls.s16 q8, q2, q12 \n" // R
|
||||
"vadd.u16 q8, q8, q15 \n" // +128 -> unsigned
|
||||
"vmul.s16 q9, q2, q10 \n" // R
|
||||
"vmls.s16 q9, q1, q14 \n" // G
|
||||
"vmls.s16 q9, q0, q13 \n" // B
|
||||
"vadd.u16 q9, q9, q15 \n" // +128 -> unsigned
|
||||
"vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U
|
||||
"vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V
|
||||
MEMACCESS(1)
|
||||
"vst1.8 {d0}, [%1]! \n" // store 8 pixels U.
|
||||
MEMACCESS(2)
|
||||
"vst1.8 {d1}, [%2]! \n" // store 8 pixels V.
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_argb), // %0
|
||||
"+r"(dst_u), // %1
|
||||
"+r"(dst_v), // %2
|
||||
"+r"(width) // %3
|
||||
:
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
|
||||
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
|
||||
);
|
||||
}
|
||||
|
||||
// 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16.
|
||||
#define RGBTOUV(QB, QG, QR) \
|
||||
"vmul.s16 q8, " #QB ", q10 \n" /* B */ \
|
||||
|
||||
@ -27,16 +27,6 @@ extern "C" {
|
||||
MEMACCESS(2) \
|
||||
"ld1 {v1.s}[1], [%2], #4 \n"
|
||||
|
||||
// Read 8 Y, 2 U and 2 V from 422
|
||||
#define READYUV411 \
|
||||
MEMACCESS(0) \
|
||||
"ld1 {v0.8b}, [%0], #8 \n" \
|
||||
MEMACCESS(1) \
|
||||
"ld1 {v2.h}[0], [%1], #2 \n" \
|
||||
MEMACCESS(2) \
|
||||
"ld1 {v2.h}[1], [%2], #2 \n" \
|
||||
"zip1 v1.8b, v2.8b, v2.8b \n"
|
||||
|
||||
// Read 8 Y, 8 U and 8 V from 444
|
||||
#define READYUV444 \
|
||||
MEMACCESS(0) \
|
||||
@ -220,36 +210,6 @@ void I422AlphaToARGBRow_NEON(const uint8* src_y,
|
||||
);
|
||||
}
|
||||
|
||||
void I411ToARGBRow_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
YUVTORGB_SETUP
|
||||
"movi v23.8b, #255 \n" /* A */
|
||||
"1: \n"
|
||||
READYUV411
|
||||
YUVTORGB(v22, v21, v20)
|
||||
"subs %w4, %w4, #8 \n"
|
||||
MEMACCESS(3)
|
||||
"st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n"
|
||||
"b.gt 1b \n"
|
||||
: "+r"(src_y), // %0
|
||||
"+r"(src_u), // %1
|
||||
"+r"(src_v), // %2
|
||||
"+r"(dst_argb), // %3
|
||||
"+r"(width) // %4
|
||||
: [kUVToRB]"r"(&yuvconstants->kUVToRB),
|
||||
[kUVToG]"r"(&yuvconstants->kUVToG),
|
||||
[kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
|
||||
[kYToRgb]"r"(&yuvconstants->kYToRgb)
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
|
||||
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
|
||||
);
|
||||
}
|
||||
|
||||
void I422ToRGBARow_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
@ -1395,57 +1355,6 @@ void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
|
||||
"movi v24.8h, #47, lsl #0 \n" /* VG coefficient (-0.7344) / 2 */ \
|
||||
"movi v25.16b, #0x80 \n" /* 128.5 (0x8080 in 16-bit) */
|
||||
|
||||
// 32x1 pixels -> 8x1. width is number of argb pixels. e.g. 32.
|
||||
void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
|
||||
int width) {
|
||||
asm volatile (
|
||||
RGBTOUV_SETUP_REG
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels.
|
||||
"uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts.
|
||||
"uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts.
|
||||
"uaddlp v2.8h, v2.16b \n" // R 16 bytes -> 8 shorts.
|
||||
MEMACCESS(0)
|
||||
"ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%0], #64 \n" // load next 16.
|
||||
"uaddlp v4.8h, v4.16b \n" // B 16 bytes -> 8 shorts.
|
||||
"uaddlp v5.8h, v5.16b \n" // G 16 bytes -> 8 shorts.
|
||||
"uaddlp v6.8h, v6.16b \n" // R 16 bytes -> 8 shorts.
|
||||
|
||||
"addp v0.8h, v0.8h, v4.8h \n" // B 16 shorts -> 8 shorts.
|
||||
"addp v1.8h, v1.8h, v5.8h \n" // G 16 shorts -> 8 shorts.
|
||||
"addp v2.8h, v2.8h, v6.8h \n" // R 16 shorts -> 8 shorts.
|
||||
|
||||
"urshr v0.8h, v0.8h, #1 \n" // 2x average
|
||||
"urshr v1.8h, v1.8h, #1 \n"
|
||||
"urshr v2.8h, v2.8h, #1 \n"
|
||||
|
||||
"subs %w3, %w3, #32 \n" // 32 processed per loop.
|
||||
"mul v3.8h, v0.8h, v20.8h \n" // B
|
||||
"mls v3.8h, v1.8h, v21.8h \n" // G
|
||||
"mls v3.8h, v2.8h, v22.8h \n" // R
|
||||
"add v3.8h, v3.8h, v25.8h \n" // +128 -> unsigned
|
||||
"mul v4.8h, v2.8h, v20.8h \n" // R
|
||||
"mls v4.8h, v1.8h, v24.8h \n" // G
|
||||
"mls v4.8h, v0.8h, v23.8h \n" // B
|
||||
"add v4.8h, v4.8h, v25.8h \n" // +128 -> unsigned
|
||||
"uqshrn v0.8b, v3.8h, #8 \n" // 16 bit to 8 bit U
|
||||
"uqshrn v1.8b, v4.8h, #8 \n" // 16 bit to 8 bit V
|
||||
MEMACCESS(1)
|
||||
"st1 {v0.8b}, [%1], #8 \n" // store 8 pixels U.
|
||||
MEMACCESS(2)
|
||||
"st1 {v1.8b}, [%2], #8 \n" // store 8 pixels V.
|
||||
"b.gt 1b \n"
|
||||
: "+r"(src_argb), // %0
|
||||
"+r"(dst_u), // %1
|
||||
"+r"(dst_v), // %2
|
||||
"+r"(width) // %3
|
||||
:
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
|
||||
"v20", "v21", "v22", "v23", "v24", "v25"
|
||||
);
|
||||
}
|
||||
|
||||
// 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16.
|
||||
#define RGBTOUV(QB, QG, QR) \
|
||||
"mul v3.8h, " #QB ",v20.8h \n" /* B */ \
|
||||
|
||||
@ -1969,21 +1969,6 @@ void RGBAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
||||
__asm lea ebp, [ebp + 16] \
|
||||
}
|
||||
|
||||
// Read 4 UV from 411, upsample to 16 UV.
|
||||
#define READYUV411_AVX2 __asm { \
|
||||
__asm vmovd xmm0, dword ptr [esi] /* U */ \
|
||||
__asm vmovd xmm1, dword ptr [esi + edi] /* V */ \
|
||||
__asm lea esi, [esi + 4] \
|
||||
__asm vpunpcklbw ymm0, ymm0, ymm1 /* UV */ \
|
||||
__asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \
|
||||
__asm vpermq ymm0, ymm0, 0xd8 \
|
||||
__asm vpunpckldq ymm0, ymm0, ymm0 /* UVUVUVUV (upsample) */ \
|
||||
__asm vmovdqu xmm4, [eax] /* Y */ \
|
||||
__asm vpermq ymm4, ymm4, 0xd8 \
|
||||
__asm vpunpcklbw ymm4, ymm4, ymm4 \
|
||||
__asm lea eax, [eax + 16] \
|
||||
}
|
||||
|
||||
// Read 8 UV from NV12, upsample to 16 UV.
|
||||
#define READNV12_AVX2 __asm { \
|
||||
__asm vmovdqu xmm0, [esi] /* UV */ \
|
||||
@ -2198,46 +2183,6 @@ void I444ToARGBRow_AVX2(const uint8* y_buf,
|
||||
}
|
||||
#endif // HAS_I444TOARGBROW_AVX2
|
||||
|
||||
#ifdef HAS_I411TOARGBROW_AVX2
|
||||
// 16 pixels
|
||||
// 4 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
|
||||
__declspec(naked)
|
||||
void I411ToARGBRow_AVX2(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
__asm {
|
||||
push esi
|
||||
push edi
|
||||
push ebx
|
||||
mov eax, [esp + 12 + 4] // Y
|
||||
mov esi, [esp + 12 + 8] // U
|
||||
mov edi, [esp + 12 + 12] // V
|
||||
mov edx, [esp + 12 + 16] // abgr
|
||||
mov ebx, [esp + 12 + 20] // yuvconstants
|
||||
mov ecx, [esp + 12 + 24] // width
|
||||
sub edi, esi
|
||||
vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
|
||||
|
||||
convertloop:
|
||||
READYUV411_AVX2
|
||||
YUVTORGB_AVX2(ebx)
|
||||
STOREARGB_AVX2
|
||||
|
||||
sub ecx, 16
|
||||
jg convertloop
|
||||
|
||||
pop ebx
|
||||
pop edi
|
||||
pop esi
|
||||
vzeroupper
|
||||
ret
|
||||
}
|
||||
}
|
||||
#endif // HAS_I411TOARGBROW_AVX2
|
||||
|
||||
#ifdef HAS_NV12TOARGBROW_AVX2
|
||||
// 16 pixels.
|
||||
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
|
||||
@ -2451,24 +2396,6 @@ void I422ToRGBARow_AVX2(const uint8* y_buf,
|
||||
__asm lea ebp, [ebp + 8] \
|
||||
}
|
||||
|
||||
// Read 2 UV from 411, upsample to 8 UV.
|
||||
// drmemory fails with memory fault if pinsrw used. libyuv bug: 525
|
||||
// __asm pinsrw xmm0, [esi], 0 /* U */
|
||||
// __asm pinsrw xmm1, [esi + edi], 0 /* V */
|
||||
#define READYUV411_EBX __asm { \
|
||||
__asm movzx ebx, word ptr [esi] /* U */ \
|
||||
__asm movd xmm0, ebx \
|
||||
__asm movzx ebx, word ptr [esi + edi] /* V */ \
|
||||
__asm movd xmm1, ebx \
|
||||
__asm lea esi, [esi + 2] \
|
||||
__asm punpcklbw xmm0, xmm1 /* UV */ \
|
||||
__asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \
|
||||
__asm punpckldq xmm0, xmm0 /* UVUVUVUV (upsample) */ \
|
||||
__asm movq xmm4, qword ptr [eax] \
|
||||
__asm punpcklbw xmm4, xmm4 \
|
||||
__asm lea eax, [eax + 8] \
|
||||
}
|
||||
|
||||
// Read 4 UV from NV12, upsample to 8 UV.
|
||||
#define READNV12 __asm { \
|
||||
__asm movq xmm0, qword ptr [esi] /* UV */ \
|
||||
@ -2819,46 +2746,6 @@ void I422AlphaToARGBRow_SSSE3(const uint8* y_buf,
|
||||
}
|
||||
}
|
||||
|
||||
// 8 pixels.
|
||||
// 2 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
|
||||
// Similar to I420 but duplicate UV once more.
|
||||
__declspec(naked)
|
||||
void I411ToARGBRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
__asm {
|
||||
push esi
|
||||
push edi
|
||||
push ebx
|
||||
push ebp
|
||||
mov eax, [esp + 16 + 4] // Y
|
||||
mov esi, [esp + 16 + 8] // U
|
||||
mov edi, [esp + 16 + 12] // V
|
||||
mov edx, [esp + 16 + 16] // abgr
|
||||
mov ebp, [esp + 16 + 20] // yuvconstants
|
||||
mov ecx, [esp + 16 + 24] // width
|
||||
sub edi, esi
|
||||
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
|
||||
|
||||
convertloop:
|
||||
READYUV411_EBX
|
||||
YUVTORGB(ebp)
|
||||
STOREARGB
|
||||
|
||||
sub ecx, 8
|
||||
jg convertloop
|
||||
|
||||
pop ebp
|
||||
pop ebx
|
||||
pop edi
|
||||
pop esi
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
// 8 pixels.
|
||||
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
|
||||
__declspec(naked)
|
||||
|
||||
@ -166,10 +166,8 @@ TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \
|
||||
TESTPLANARTOP(I420, 2, 2, I420, 2, 2)
|
||||
TESTPLANARTOP(I422, 2, 1, I420, 2, 2)
|
||||
TESTPLANARTOP(I444, 1, 1, I420, 2, 2)
|
||||
TESTPLANARTOP(I411, 4, 1, I420, 2, 2)
|
||||
TESTPLANARTOP(I420, 2, 2, I422, 2, 1)
|
||||
TESTPLANARTOP(I420, 2, 2, I444, 1, 1)
|
||||
TESTPLANARTOP(I420, 2, 2, I411, 4, 1)
|
||||
TESTPLANARTOP(I420, 2, 2, I420Mirror, 2, 2)
|
||||
TESTPLANARTOP(I422, 2, 1, I422, 2, 1)
|
||||
TESTPLANARTOP(I444, 1, 1, I444, 1, 1)
|
||||
@ -655,7 +653,6 @@ TESTPLANARTOB(H422, 2, 1, ABGR, 4, 4, 1, 2, ARGB, 4)
|
||||
TESTPLANARTOB(I422, 2, 1, BGRA, 4, 4, 1, 2, ARGB, 4)
|
||||
TESTPLANARTOB(I422, 2, 1, ABGR, 4, 4, 1, 2, ARGB, 4)
|
||||
TESTPLANARTOB(I422, 2, 1, RGBA, 4, 4, 1, 2, ARGB, 4)
|
||||
TESTPLANARTOB(I411, 4, 1, ARGB, 4, 4, 1, 2, ARGB, 4)
|
||||
TESTPLANARTOB(I444, 1, 1, ARGB, 4, 4, 1, 2, ARGB, 4)
|
||||
TESTPLANARTOB(J444, 1, 1, ARGB, 4, 4, 1, 2, ARGB, 4)
|
||||
TESTPLANARTOB(I444, 1, 1, ABGR, 4, 4, 1, 2, ARGB, 4)
|
||||
@ -819,6 +816,8 @@ TESTBIPLANARTOB(NV12, 2, 2, ARGB, 4, 2)
|
||||
TESTBIPLANARTOB(NV21, 2, 2, ARGB, 4, 2)
|
||||
TESTBIPLANARTOB(NV12, 2, 2, RGB565, 2, 9)
|
||||
|
||||
#ifdef DO_THREE_PLANES
|
||||
// Do 3 allocations for yuv. conventional but slower.
|
||||
#define TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
|
||||
W1280, DIFF, N, NEG, OFF) \
|
||||
TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \
|
||||
@ -894,10 +893,9 @@ TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \
|
||||
free_aligned_buffer_page_end(dst_v_opt); \
|
||||
free_aligned_buffer_page_end(src_argb); \
|
||||
}
|
||||
|
||||
|
||||
#define TESTATOPLANARI2(FMT_A, BPP_A, YALIGN, FMT_PLANAR, \
|
||||
SUBSAMP_X, SUBSAMP_Y, W1280, DIFF, N, NEG, OFF) \
|
||||
#else
|
||||
#define TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, \
|
||||
SUBSAMP_X, SUBSAMP_Y, W1280, DIFF, N, NEG, OFF) \
|
||||
TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \
|
||||
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
|
||||
const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \
|
||||
@ -952,6 +950,7 @@ TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \
|
||||
free_aligned_buffer_page_end(dst_uv_opt); \
|
||||
free_aligned_buffer_page_end(src_argb); \
|
||||
}
|
||||
#endif
|
||||
|
||||
#define TESTATOPLANAR(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
|
||||
DIFF) \
|
||||
@ -962,9 +961,7 @@ TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \
|
||||
TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
|
||||
benchmark_width_, DIFF, _Invert, -, 0) \
|
||||
TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
|
||||
benchmark_width_, DIFF, _Opt, +, 0) \
|
||||
TESTATOPLANARI2(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
|
||||
benchmark_width_, DIFF, _SxS, +, 0)
|
||||
benchmark_width_, DIFF, _Opt, +, 0)
|
||||
|
||||
TESTATOPLANAR(ARGB, 4, 1, I420, 2, 2, 4)
|
||||
#if defined(__arm__) || defined (__aarch64__)
|
||||
@ -985,7 +982,6 @@ TESTATOPLANAR(RGB565, 2, 1, I420, 2, 2, 5)
|
||||
// TODO(fbarchard): Make 1555 neon work same as C code, reduce to diff 9.
|
||||
TESTATOPLANAR(ARGB1555, 2, 1, I420, 2, 2, 15)
|
||||
TESTATOPLANAR(ARGB4444, 2, 1, I420, 2, 2, 17)
|
||||
TESTATOPLANAR(ARGB, 4, 1, I411, 4, 1, 4)
|
||||
TESTATOPLANAR(ARGB, 4, 1, I422, 2, 1, 2)
|
||||
TESTATOPLANAR(ARGB, 4, 1, I444, 1, 1, 2)
|
||||
TESTATOPLANAR(YUY2, 2, 1, I420, 2, 2, 2)
|
||||
@ -1983,7 +1979,6 @@ TESTPLANARTOE(H422, 2, 1, ABGR, 1, 4, ARGB, 4)
|
||||
TESTPLANARTOE(I422, 2, 1, BGRA, 1, 4, ARGB, 4)
|
||||
TESTPLANARTOE(I422, 2, 1, ABGR, 1, 4, ARGB, 4)
|
||||
TESTPLANARTOE(I422, 2, 1, RGBA, 1, 4, ARGB, 4)
|
||||
TESTPLANARTOE(I411, 4, 1, ARGB, 1, 4, ARGB, 4)
|
||||
TESTPLANARTOE(I444, 1, 1, ARGB, 1, 4, ARGB, 4)
|
||||
TESTPLANARTOE(J444, 1, 1, ARGB, 1, 4, ARGB, 4)
|
||||
TESTPLANARTOE(I444, 1, 1, ABGR, 1, 4, ARGB, 4)
|
||||
|
||||
@ -66,7 +66,6 @@ TEST_F(LibYUVBaseTest, TestFourCC) {
|
||||
EXPECT_TRUE(TestValidFourCC(FOURCC_I420, FOURCC_BPP_I420));
|
||||
EXPECT_TRUE(TestValidFourCC(FOURCC_I422, FOURCC_BPP_I422));
|
||||
EXPECT_TRUE(TestValidFourCC(FOURCC_I444, FOURCC_BPP_I444));
|
||||
EXPECT_TRUE(TestValidFourCC(FOURCC_I411, FOURCC_BPP_I411));
|
||||
EXPECT_TRUE(TestValidFourCC(FOURCC_I400, FOURCC_BPP_I400));
|
||||
EXPECT_TRUE(TestValidFourCC(FOURCC_NV21, FOURCC_BPP_NV21));
|
||||
EXPECT_TRUE(TestValidFourCC(FOURCC_NV12, FOURCC_BPP_NV12));
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user