diff --git a/README.chromium b/README.chromium index a8a109ff8..a2ab5c27d 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1623 +Version: 1624 License: BSD License File: LICENSE diff --git a/docs/formats.md b/docs/formats.md index f3a35420f..cddfe027e 100644 --- a/docs/formats.md +++ b/docs/formats.md @@ -37,20 +37,18 @@ This is how OSX formats map to libyuv The following is extracted from video_common.h as a complete list of formats supported by libyuv. enum FourCC { - // 9 Primary YUV formats: 5 planar, 2 biplanar, 2 packed. + // 8 Primary YUV formats: 5 planar, 2 biplanar, 2 packed. FOURCC_I420 = FOURCC('I', '4', '2', '0'), FOURCC_I422 = FOURCC('I', '4', '2', '2'), FOURCC_I444 = FOURCC('I', '4', '4', '4'), - FOURCC_I411 = FOURCC('I', '4', '1', '1'), FOURCC_I400 = FOURCC('I', '4', '0', '0'), FOURCC_NV21 = FOURCC('N', 'V', '2', '1'), FOURCC_NV12 = FOURCC('N', 'V', '1', '2'), FOURCC_YUY2 = FOURCC('Y', 'U', 'Y', '2'), FOURCC_UYVY = FOURCC('U', 'Y', 'V', 'Y'), - // 2 Secondary YUV formats: row biplanar. + // 1 Secondary YUV formats: row biplanar. FOURCC_M420 = FOURCC('M', '4', '2', '0'), - FOURCC_Q420 = FOURCC('Q', '4', '2', '0'), // 9 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp. FOURCC_ARGB = FOURCC('A', 'R', 'G', 'B'), @@ -104,7 +102,7 @@ The following is extracted from video_common.h as a complete list of formats sup # Planar YUV The following formats contains a full size Y plane followed by 1 or 2 - planes for UV: I420, I422, I444, I411, I400, NV21, NV12, I400 + planes for UV: I420, I422, I444, I400, NV21, NV12, I400 The size (subsampling) of the UV varies. I420, NV12 and NV21 are half width, half height I422, NV16 and NV61 are half width, full height diff --git a/include/libyuv/convert.h b/include/libyuv/convert.h index d44485847..5a9368dd6 100644 --- a/include/libyuv/convert.h +++ b/include/libyuv/convert.h @@ -45,16 +45,6 @@ int I422ToI420(const uint8* src_y, int src_stride_y, uint8* dst_v, int dst_stride_v, int width, int height); -// Convert I411 to I420. -LIBYUV_API -int I411ToI420(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - // Copy I420 to I420. #define I420ToI420 I420Copy LIBYUV_API diff --git a/include/libyuv/convert_argb.h b/include/libyuv/convert_argb.h index dc03ac8d5..5d8de59fc 100644 --- a/include/libyuv/convert_argb.h +++ b/include/libyuv/convert_argb.h @@ -82,14 +82,6 @@ int I444ToABGR(const uint8* src_y, int src_stride_y, uint8* dst_abgr, int dst_stride_abgr, int width, int height); -// Convert I411 to ARGB. -LIBYUV_API -int I411ToARGB(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - // Convert I420 with Alpha to preattenuated ARGB. LIBYUV_API int I420AlphaToARGB(const uint8* src_y, int src_stride_y, diff --git a/include/libyuv/convert_from.h b/include/libyuv/convert_from.h index 59c40474f..24f11a52e 100644 --- a/include/libyuv/convert_from.h +++ b/include/libyuv/convert_from.h @@ -41,15 +41,6 @@ int I420ToI444(const uint8* src_y, int src_stride_y, uint8* dst_v, int dst_stride_v, int width, int height); -LIBYUV_API -int I420ToI411(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - // Copy to I400. Source can be I420, I422, I444, I400, NV12 or NV21. LIBYUV_API int I400Copy(const uint8* src_y, int src_stride_y, diff --git a/include/libyuv/convert_from_argb.h b/include/libyuv/convert_from_argb.h index 8d7f02f8c..8bb09f617 100644 --- a/include/libyuv/convert_from_argb.h +++ b/include/libyuv/convert_from_argb.h @@ -123,14 +123,6 @@ int ARGBToJ422(const uint8* src_argb, int src_stride_argb, uint8* dst_v, int dst_stride_v, int width, int height); -// Convert ARGB To I411. -LIBYUV_API -int ARGBToI411(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - // Convert ARGB to J400. (JPeg full range). LIBYUV_API int ARGBToJ400(const uint8* src_argb, int src_stride_argb, diff --git a/include/libyuv/mjpeg_decoder.h b/include/libyuv/mjpeg_decoder.h index 4975bae5b..d654ee06a 100644 --- a/include/libyuv/mjpeg_decoder.h +++ b/include/libyuv/mjpeg_decoder.h @@ -37,7 +37,6 @@ static const uint32 kUnknownDataSize = 0xFFFFFFFF; enum JpegSubsamplingType { kJpegYuv420, kJpegYuv422, - kJpegYuv411, kJpegYuv444, kJpegYuv400, kJpegUnknown diff --git a/include/libyuv/row.h b/include/libyuv/row.h index 1a17cbf4e..fc89de3d6 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -176,9 +176,6 @@ extern "C" { // caveat: clangcl uses row_win.cc which works. #if defined(NDEBUG) || !(defined(_DEBUG) && defined(__i386__)) || \ !defined(__i386__) || defined(_MSC_VER) -// TODO(fbarchard): fix build error on x86 debug -// https://code.google.com/p/libyuv/issues/detail?id=524 -#define HAS_I411TOARGBROW_SSSE3 // TODO(fbarchard): fix build error on android_full_debug=1 // https://code.google.com/p/libyuv/issues/detail?id=517 #define HAS_I422ALPHATOARGBROW_SSSE3 @@ -208,7 +205,6 @@ extern "C" { // https://code.google.com/p/libyuv/issues/detail?id=517 #define HAS_I422ALPHATOARGBROW_AVX2 #endif -#define HAS_I411TOARGBROW_AVX2 #define HAS_I422TOARGB1555ROW_AVX2 #define HAS_I422TOARGB4444ROW_AVX2 #define HAS_I422TOARGBROW_AVX2 @@ -281,7 +277,6 @@ extern "C" { #define HAS_ARGBTORGB24ROW_NEON #define HAS_ARGBTORGB565DITHERROW_NEON #define HAS_ARGBTORGB565ROW_NEON -#define HAS_ARGBTOUV411ROW_NEON #define HAS_ARGBTOUV444ROW_NEON #define HAS_ARGBTOUVJROW_NEON #define HAS_ARGBTOUVROW_NEON @@ -292,7 +287,6 @@ extern "C" { #define HAS_BGRATOYROW_NEON #define HAS_COPYROW_NEON #define HAS_I400TOARGBROW_NEON -#define HAS_I411TOARGBROW_NEON #define HAS_I422ALPHATOARGBROW_NEON #define HAS_I422TOARGB1555ROW_NEON #define HAS_I422TOARGB4444ROW_NEON @@ -593,12 +587,6 @@ void I422ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I411ToARGBRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); void I422ToRGBARow_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, @@ -668,8 +656,6 @@ void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int width); void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int width); void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, int width); -void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, - int width); void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb, uint8* dst_u, uint8* dst_v, int width); void ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb, @@ -758,8 +744,6 @@ void RGBAToUVRow_Any_SSSE3(const uint8* src_rgba, int src_stride_rgba, uint8* dst_u, uint8* dst_v, int width); void ARGBToUV444Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, int width); -void ARGBToUV411Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, - int width); void ARGBToUVRow_Any_NEON(const uint8* src_argb, int src_stride_argb, uint8* dst_u, uint8* dst_v, int width); void ARGBToUVJRow_Any_NEON(const uint8* src_argb, int src_stride_argb, @@ -810,8 +794,6 @@ void ARGBToUV444Row_Any_SSSE3(const uint8* src_argb, void ARGBToUV444Row_C(const uint8* src_argb, uint8* dst_u, uint8* dst_v, int width); -void ARGBToUV411Row_C(const uint8* src_argb, - uint8* dst_u, uint8* dst_v, int width); void MirrorRow_AVX2(const uint8* src, uint8* dst, int width); void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width); @@ -1066,12 +1048,6 @@ void I422AlphaToARGBRow_C(const uint8* y_buf, uint8* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I411ToARGBRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); void NV12ToARGBRow_C(const uint8* src_y, const uint8* src_uv, uint8* dst_argb, @@ -1193,18 +1169,6 @@ void I422ToARGBRow_SSSE3(const uint8* src_y, uint8* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I411ToARGBRow_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); -void I411ToARGBRow_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); void NV12ToARGBRow_SSSE3(const uint8* src_y, const uint8* src_uv, uint8* dst_argb, @@ -1349,18 +1313,6 @@ void I422AlphaToARGBRow_Any_AVX2(const uint8* y_buf, uint8* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I411ToARGBRow_Any_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); -void I411ToARGBRow_Any_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); void NV12ToARGBRow_Any_SSSE3(const uint8* src_y, const uint8* src_uv, uint8* dst_argb, @@ -1588,12 +1540,6 @@ void I422AlphaToARGBRow_Any_NEON(const uint8* src_y, uint8* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I411ToARGBRow_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); void I422ToRGBARow_Any_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, diff --git a/include/libyuv/version.h b/include/libyuv/version.h index bb558bd7f..d237d78ab 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1623 +#define LIBYUV_VERSION 1624 #endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/include/libyuv/video_common.h b/include/libyuv/video_common.h index cb425426a..084201af5 100644 --- a/include/libyuv/video_common.h +++ b/include/libyuv/video_common.h @@ -49,18 +49,18 @@ extern "C" { // Secondary formats are converted in 2 steps. // Auxilliary formats call primary converters. enum FourCC { - // 9 Primary YUV formats: 5 planar, 2 biplanar, 2 packed. + // 8 Primary YUV formats: 5 planar, 2 biplanar, 2 packed. FOURCC_I420 = FOURCC('I', '4', '2', '0'), FOURCC_I422 = FOURCC('I', '4', '2', '2'), FOURCC_I444 = FOURCC('I', '4', '4', '4'), - FOURCC_I411 = FOURCC('I', '4', '1', '1'), + FOURCC_I411 = FOURCC('I', '4', '1', '1'), // deprecated. FOURCC_I400 = FOURCC('I', '4', '0', '0'), FOURCC_NV21 = FOURCC('N', 'V', '2', '1'), FOURCC_NV12 = FOURCC('N', 'V', '1', '2'), FOURCC_YUY2 = FOURCC('Y', 'U', 'Y', '2'), FOURCC_UYVY = FOURCC('U', 'Y', 'V', 'Y'), - // 2 Secondary YUV formats: row biplanar. + // 1 Secondary YUV format: row biplanar. FOURCC_M420 = FOURCC('M', '4', '2', '0'), FOURCC_Q420 = FOURCC('Q', '4', '2', '0'), // deprecated. diff --git a/source/convert.cc b/source/convert.cc index ed3cd7fd0..ba07b3959 100644 --- a/source/convert.cc +++ b/source/convert.cc @@ -137,27 +137,6 @@ int I444ToI420(const uint8* src_y, int src_stride_y, width, height); } -// 411 chroma is 1/4 width, 1x height -// 420 chroma is 1/2 width, 1/2 height -LIBYUV_API -int I411ToI420(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - const int src_uv_width = SUBSAMPLE(width, 3, 2); - return I4xxToI420(src_y, src_stride_y, - src_u, src_stride_u, - src_v, src_stride_v, - dst_y, dst_stride_y, - dst_u, dst_stride_u, - dst_v, dst_stride_v, - width, height, - src_uv_width, height); -} - // I400 is greyscale typically used in MJPG LIBYUV_API int I400ToI420(const uint8* src_y, int src_stride_y, diff --git a/source/convert_argb.cc b/source/convert_argb.cc index fb9582d62..947d0fc64 100644 --- a/source/convert_argb.cc +++ b/source/convert_argb.cc @@ -487,75 +487,6 @@ int J444ToARGB(const uint8* src_y, int src_stride_y, width, height); } -// Convert I411 to ARGB. -LIBYUV_API -int I411ToARGB(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { - int y; - void (*I411ToARGBRow)(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - const struct YuvConstants* yuvconstants, - int width) = I411ToARGBRow_C; - if (!src_y || !src_u || !src_v || - !dst_argb || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - dst_argb = dst_argb + (height - 1) * dst_stride_argb; - dst_stride_argb = -dst_stride_argb; - } - // Coalesce rows. - if (src_stride_y == width && - src_stride_u * 4 == width && - src_stride_v * 4 == width && - dst_stride_argb == width * 4) { - width *= height; - height = 1; - src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0; - } -#if defined(HAS_I411TOARGBROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - I411ToARGBRow = I411ToARGBRow_Any_SSSE3; - if (IS_ALIGNED(width, 8)) { - I411ToARGBRow = I411ToARGBRow_SSSE3; - } - } -#endif -#if defined(HAS_I411TOARGBROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - I411ToARGBRow = I411ToARGBRow_Any_AVX2; - if (IS_ALIGNED(width, 16)) { - I411ToARGBRow = I411ToARGBRow_AVX2; - } - } -#endif -#if defined(HAS_I411TOARGBROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - I411ToARGBRow = I411ToARGBRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - I411ToARGBRow = I411ToARGBRow_NEON; - } - } -#endif - - for (y = 0; y < height; ++y) { - I411ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvI601Constants, width); - dst_argb += dst_stride_argb; - src_y += src_stride_y; - src_u += src_stride_u; - src_v += src_stride_v; - } - return 0; -} - // Convert I420 with Alpha to preattenuated ARGB. static int I420AlphaToARGBMatrix(const uint8* src_y, int src_stride_y, const uint8* src_u, int src_stride_u, diff --git a/source/convert_from.cc b/source/convert_from.cc index 1256ca99c..89d24f474 100644 --- a/source/convert_from.cc +++ b/source/convert_from.cc @@ -104,28 +104,6 @@ int I420ToI444(const uint8* src_y, int src_stride_y, dst_uv_width, dst_uv_height); } -// 420 chroma is 1/2 width, 1/2 height -// 411 chroma is 1/4 width, 1x height -LIBYUV_API -int I420ToI411(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - const int dst_uv_width = (Abs(width) + 3) >> 2; - const int dst_uv_height = Abs(height); - return I420ToI4xx(src_y, src_stride_y, - src_u, src_stride_u, - src_v, src_stride_v, - dst_y, dst_stride_y, - dst_u, dst_stride_u, - dst_v, dst_stride_v, - width, height, - dst_uv_width, dst_uv_height); -} - // Copy to I400. Source can be I420,422,444,400,NV12,NV21 LIBYUV_API int I400Copy(const uint8* src_y, int src_stride_y, @@ -900,7 +878,7 @@ int I420ToRGB565Dither(const uint8* src_y, int src_stride_y, for (y = 0; y < height; ++y) { I422ToARGBRow(src_y, src_u, src_v, row_argb, &kYuvI601Constants, width); ARGBToRGB565DitherRow(row_argb, dst_rgb565, - *(uint32*)(dither4x4 + ((y & 3) << 2)), width); + *(uint32*)(dither4x4 + ((y & 3) << 2)), width); // NOLINT dst_rgb565 += dst_stride_rgb565; src_y += src_stride_y; if (y & 1) { @@ -1113,20 +1091,6 @@ int ConvertFromI420(const uint8* y, int y_stride, width, height); break; } - case FOURCC_I411: { - int quarterwidth = (width + 3) / 4; - uint8* dst_u = dst_sample + width * height; - uint8* dst_v = dst_u + quarterwidth * height; - r = I420ToI411(y, y_stride, - u, u_stride, - v, v_stride, - dst_sample, width, - dst_u, quarterwidth, - dst_v, quarterwidth, - width, height); - break; - } - // Formats not supported - MJPG, biplanar, some rgb formats. default: return -1; // unknown fourcc - return failure code. diff --git a/source/convert_from_argb.cc b/source/convert_from_argb.cc index 50ede22a6..73c180d40 100644 --- a/source/convert_from_argb.cc +++ b/source/convert_from_argb.cc @@ -181,79 +181,6 @@ int ARGBToI422(const uint8* src_argb, int src_stride_argb, return 0; } -// ARGB little endian (bgra in memory) to I411 -LIBYUV_API -int ARGBToI411(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - int y; - void (*ARGBToUV411Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v, - int width) = ARGBToUV411Row_C; - void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) = - ARGBToYRow_C; - if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { - return -1; - } - if (height < 0) { - height = -height; - src_argb = src_argb + (height - 1) * src_stride_argb; - src_stride_argb = -src_stride_argb; - } - // Coalesce rows. - if (src_stride_argb == width * 4 && - dst_stride_y == width && - dst_stride_u * 4 == width && - dst_stride_v * 4 == width) { - width *= height; - height = 1; - src_stride_argb = dst_stride_y = dst_stride_u = dst_stride_v = 0; - } -#if defined(HAS_ARGBTOYROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToYRow = ARGBToYRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToYRow = ARGBToYRow_SSSE3; - } - } -#endif -#if defined(HAS_ARGBTOYROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToYRow = ARGBToYRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ARGBToYRow = ARGBToYRow_AVX2; - } - } -#endif -#if defined(HAS_ARGBTOYROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ARGBToYRow = ARGBToYRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - ARGBToYRow = ARGBToYRow_NEON; - } - } -#endif -#if defined(HAS_ARGBTOUV411ROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ARGBToUV411Row = ARGBToUV411Row_Any_NEON; - if (IS_ALIGNED(width, 32)) { - ARGBToUV411Row = ARGBToUV411Row_NEON; - } - } -#endif - - for (y = 0; y < height; ++y) { - ARGBToUV411Row(src_argb, dst_u, dst_v, width); - ARGBToYRow(src_argb, dst_y, width); - src_argb += src_stride_argb; - dst_y += dst_stride_y; - dst_u += dst_stride_u; - dst_v += dst_stride_v; - } - return 0; -} - LIBYUV_API int ARGBToNV12(const uint8* src_argb, int src_stride_argb, uint8* dst_y, int dst_stride_y, @@ -912,7 +839,7 @@ int ARGBToRGB565Dither(const uint8* src_argb, int src_stride_argb, #endif for (y = 0; y < height; ++y) { ARGBToRGB565DitherRow(src_argb, dst_rgb565, - *(uint32*)(dither4x4 + ((y & 3) << 2)), width); + *(uint32*)(dither4x4 + ((y & 3) << 2)), width); /* NOLINT */ src_argb += src_stride_argb; dst_rgb565 += dst_stride_rgb565; } diff --git a/source/convert_jpeg.cc b/source/convert_jpeg.cc index 90f550a26..970735c20 100644 --- a/source/convert_jpeg.cc +++ b/source/convert_jpeg.cc @@ -86,24 +86,6 @@ static void JpegI444ToI420(void* opaque, dest->h -= rows; } -static void JpegI411ToI420(void* opaque, - const uint8* const* data, - const int* strides, - int rows) { - I420Buffers* dest = (I420Buffers*)(opaque); - I411ToI420(data[0], strides[0], - data[1], strides[1], - data[2], strides[2], - dest->y, dest->y_stride, - dest->u, dest->u_stride, - dest->v, dest->v_stride, - dest->w, rows); - dest->y += rows * dest->y_stride; - dest->u += ((rows + 1) >> 1) * dest->u_stride; - dest->v += ((rows + 1) >> 1) * dest->v_stride; - dest->h -= rows; -} - static void JpegI400ToI420(void* opaque, const uint8* const* data, const int* strides, @@ -193,17 +175,6 @@ int MJPGToI420(const uint8* sample, mjpeg_decoder.GetVertSampFactor(2) == 1 && mjpeg_decoder.GetHorizSampFactor(2) == 1) { ret = mjpeg_decoder.DecodeToCallback(&JpegI444ToI420, &bufs, dw, dh); - // YUV411 - } else if (mjpeg_decoder.GetColorSpace() == - MJpegDecoder::kColorSpaceYCbCr && - mjpeg_decoder.GetNumComponents() == 3 && - mjpeg_decoder.GetVertSampFactor(0) == 1 && - mjpeg_decoder.GetHorizSampFactor(0) == 4 && - mjpeg_decoder.GetVertSampFactor(1) == 1 && - mjpeg_decoder.GetHorizSampFactor(1) == 1 && - mjpeg_decoder.GetVertSampFactor(2) == 1 && - mjpeg_decoder.GetHorizSampFactor(2) == 1) { - ret = mjpeg_decoder.DecodeToCallback(&JpegI411ToI420, &bufs, dw, dh); // YUV400 } else if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceGrayscale && @@ -213,7 +184,7 @@ int MJPGToI420(const uint8* sample, ret = mjpeg_decoder.DecodeToCallback(&JpegI400ToI420, &bufs, dw, dh); } else { // TODO(fbarchard): Implement conversion for any other colorspace/sample - // factors that occur in practice. 411 is supported by libjpeg + // factors that occur in practice. // ERROR: Unable to convert MJPEG frame because format is not supported mjpeg_decoder.UnloadFrame(); return 1; @@ -272,20 +243,6 @@ static void JpegI444ToARGB(void* opaque, dest->h -= rows; } -static void JpegI411ToARGB(void* opaque, - const uint8* const* data, - const int* strides, - int rows) { - ARGBBuffers* dest = (ARGBBuffers*)(opaque); - I411ToARGB(data[0], strides[0], - data[1], strides[1], - data[2], strides[2], - dest->argb, dest->argb_stride, - dest->w, rows); - dest->argb += rows * dest->argb_stride; - dest->h -= rows; -} - static void JpegI400ToARGB(void* opaque, const uint8* const* data, const int* strides, @@ -355,17 +312,6 @@ int MJPGToARGB(const uint8* sample, mjpeg_decoder.GetVertSampFactor(2) == 1 && mjpeg_decoder.GetHorizSampFactor(2) == 1) { ret = mjpeg_decoder.DecodeToCallback(&JpegI444ToARGB, &bufs, dw, dh); - // YUV411 - } else if (mjpeg_decoder.GetColorSpace() == - MJpegDecoder::kColorSpaceYCbCr && - mjpeg_decoder.GetNumComponents() == 3 && - mjpeg_decoder.GetVertSampFactor(0) == 1 && - mjpeg_decoder.GetHorizSampFactor(0) == 4 && - mjpeg_decoder.GetVertSampFactor(1) == 1 && - mjpeg_decoder.GetHorizSampFactor(1) == 1 && - mjpeg_decoder.GetVertSampFactor(2) == 1 && - mjpeg_decoder.GetHorizSampFactor(2) == 1) { - ret = mjpeg_decoder.DecodeToCallback(&JpegI411ToARGB, &bufs, dw, dh); // YUV400 } else if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceGrayscale && @@ -375,7 +321,7 @@ int MJPGToARGB(const uint8* sample, ret = mjpeg_decoder.DecodeToCallback(&JpegI400ToARGB, &bufs, dw, dh); } else { // TODO(fbarchard): Implement conversion for any other colorspace/sample - // factors that occur in practice. 411 is supported by libjpeg + // factors that occur in practice. // ERROR: Unable to convert MJPEG frame because format is not supported mjpeg_decoder.UnloadFrame(); return 1; diff --git a/source/convert_to_argb.cc b/source/convert_to_argb.cc index aecdc80fd..8e931a75f 100644 --- a/source/convert_to_argb.cc +++ b/source/convert_to_argb.cc @@ -67,7 +67,7 @@ int ConvertToARGB(const uint8* sample, size_t sample_size, if (need_buf) { int argb_size = crop_width * 4 * abs_crop_height; - rotate_buffer = (uint8*)malloc(argb_size); + rotate_buffer = (uint8*)malloc(argb_size); /* NOLINT */ if (!rotate_buffer) { return 1; // Out of memory runtime error. } @@ -262,20 +262,6 @@ int ConvertToARGB(const uint8* sample, size_t sample_size, crop_width, inv_crop_height); break; } - case FOURCC_I411: { - int quarterwidth = (src_width + 3) / 4; - const uint8* src_y = sample + src_width * crop_y + crop_x; - const uint8* src_u = sample + src_width * abs_src_height + - quarterwidth * crop_y + crop_x / 4; - const uint8* src_v = sample + src_width * abs_src_height + - quarterwidth * (abs_src_height + crop_y) + crop_x / 4; - r = I411ToARGB(src_y, src_width, - src_u, quarterwidth, - src_v, quarterwidth, - crop_argb, argb_stride, - crop_width, inv_crop_height); - break; - } #ifdef HAVE_JPEG case FOURCC_MJPG: r = MJPGToARGB(sample, sample_size, diff --git a/source/convert_to_i420.cc b/source/convert_to_i420.cc index e5f307c44..4c4634a71 100644 --- a/source/convert_to_i420.cc +++ b/source/convert_to_i420.cc @@ -70,7 +70,7 @@ int ConvertToI420(const uint8* sample, if (need_buf) { int y_size = crop_width * abs_crop_height; int uv_size = ((crop_width + 1) / 2) * ((abs_crop_height + 1) / 2); - rotate_buffer = (uint8*)malloc(y_size + uv_size * 2); + rotate_buffer = (uint8*)malloc(y_size + uv_size * 2); /* NOLINT */ if (!rotate_buffer) { return 1; // Out of memory runtime error. } @@ -286,22 +286,6 @@ int ConvertToI420(const uint8* sample, crop_width, inv_crop_height); break; } - case FOURCC_I411: { - int quarterwidth = (src_width + 3) / 4; - const uint8* src_y = sample + src_width * crop_y + crop_x; - const uint8* src_u = sample + src_width * abs_src_height + - quarterwidth * crop_y + crop_x / 4; - const uint8* src_v = sample + src_width * abs_src_height + - quarterwidth * (abs_src_height + crop_y) + crop_x / 4; - r = I411ToI420(src_y, src_width, - src_u, quarterwidth, - src_v, quarterwidth, - y, y_stride, - u, u_stride, - v, v_stride, - crop_width, inv_crop_height); - break; - } #ifdef HAVE_JPEG case FOURCC_MJPG: r = MJPGToI420(sample, sample_size, diff --git a/source/row_any.cc b/source/row_any.cc index ae84a82f8..a3520b4ec 100644 --- a/source/row_any.cc +++ b/source/row_any.cc @@ -127,9 +127,6 @@ ANY31(BlendPlaneRow_Any_SSSE3, BlendPlaneRow_SSSE3, 0, 0, 1, 7) #ifdef HAS_I422TOARGBROW_SSSE3 ANY31C(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_SSSE3, 1, 0, 4, 7) #endif -#ifdef HAS_I411TOARGBROW_SSSE3 -ANY31C(I411ToARGBRow_Any_SSSE3, I411ToARGBRow_SSSE3, 2, 0, 4, 7) -#endif #ifdef HAS_I444TOARGBROW_SSSE3 ANY31C(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_SSSE3, 0, 0, 4, 7) ANY31C(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_SSSE3, 1, 0, 4, 7) @@ -150,9 +147,6 @@ ANY31C(I422ToRGBARow_Any_AVX2, I422ToRGBARow_AVX2, 1, 0, 4, 15) #ifdef HAS_I444TOARGBROW_AVX2 ANY31C(I444ToARGBRow_Any_AVX2, I444ToARGBRow_AVX2, 0, 0, 4, 15) #endif -#ifdef HAS_I411TOARGBROW_AVX2 -ANY31C(I411ToARGBRow_Any_AVX2, I411ToARGBRow_AVX2, 2, 0, 4, 15) -#endif #ifdef HAS_I422TOARGB4444ROW_AVX2 ANY31C(I422ToARGB4444Row_Any_AVX2, I422ToARGB4444Row_AVX2, 1, 0, 2, 7) #endif @@ -165,7 +159,6 @@ ANY31C(I422ToRGB565Row_Any_AVX2, I422ToRGB565Row_AVX2, 1, 0, 2, 7) #ifdef HAS_I422TOARGBROW_NEON ANY31C(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, 0, 0, 4, 7) ANY31C(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, 1, 0, 4, 7) -ANY31C(I411ToARGBRow_Any_NEON, I411ToARGBRow_NEON, 2, 0, 4, 7) ANY31C(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, 1, 0, 4, 7) ANY31C(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, 1, 0, 3, 7) ANY31C(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, 1, 0, 2, 7) @@ -720,21 +713,6 @@ ANY1(ARGBSetRow_Any_NEON, ARGBSetRow_NEON, uint32, 4, 3) ANY_SIMD(src_ptr, dst_u, dst_v, n); \ } \ memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \ - /* repeat last 4 - 12 bytes for 411 subsampler */ \ - if (((width & 3) == 1) && BPP == 4 && DUVSHIFT == 2) { \ - memcpy(temp + SS(r, UVSHIFT) * BPP, \ - temp + SS(r, UVSHIFT) * BPP - BPP, BPP); \ - memcpy(temp + SS(r, UVSHIFT) * BPP + BPP, \ - temp + SS(r, UVSHIFT) * BPP - BPP, BPP * 2); \ - } \ - if (((width & 3) == 2) && BPP == 4 && DUVSHIFT == 2) { \ - memcpy(temp + SS(r, UVSHIFT) * BPP, \ - temp + SS(r, UVSHIFT) * BPP - BPP * 2, BPP * 2); \ - } \ - if (((width & 3) == 3) && BPP == 4 && DUVSHIFT == 2) { \ - memcpy(temp + SS(r, UVSHIFT) * BPP, \ - temp + SS(r, UVSHIFT) * BPP - BPP, BPP); \ - } \ ANY_SIMD(temp, temp + 128, temp + 256, MASK + 1); \ memcpy(dst_u + (n >> DUVSHIFT), temp + 128, SS(r, DUVSHIFT)); \ memcpy(dst_v + (n >> DUVSHIFT), temp + 256, SS(r, DUVSHIFT)); \ @@ -765,7 +743,6 @@ ANY12(UYVYToUV422Row_Any_SSE2, UYVYToUV422Row_SSE2, 1, 4, 1, 15) #endif #ifdef HAS_YUY2TOUV422ROW_NEON ANY12(ARGBToUV444Row_Any_NEON, ARGBToUV444Row_NEON, 0, 4, 0, 7) -ANY12(ARGBToUV411Row_Any_NEON, ARGBToUV411Row_NEON, 0, 4, 2, 31) ANY12(YUY2ToUV422Row_Any_NEON, YUY2ToUV422Row_NEON, 1, 4, 1, 15) ANY12(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON, 1, 4, 1, 15) #endif diff --git a/source/row_common.cc b/source/row_common.cc index e194e6cd1..c97b2c699 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -636,41 +636,6 @@ void ARGBToUV444Row_C(const uint8* src_argb, } } -void ARGBToUV411Row_C(const uint8* src_argb, - uint8* dst_u, uint8* dst_v, int width) { - int x; - for (x = 0; x < width - 3; x += 4) { - uint8 ab = (src_argb[0] + src_argb[4] + src_argb[8] + src_argb[12]) >> 2; - uint8 ag = (src_argb[1] + src_argb[5] + src_argb[9] + src_argb[13]) >> 2; - uint8 ar = (src_argb[2] + src_argb[6] + src_argb[10] + src_argb[14]) >> 2; - dst_u[0] = RGBToU(ar, ag, ab); - dst_v[0] = RGBToV(ar, ag, ab); - src_argb += 16; - dst_u += 1; - dst_v += 1; - } - // Odd width handling mimics 'any' function which replicates last pixel. - if ((width & 3) == 3) { - uint8 ab = (src_argb[0] + src_argb[4] + src_argb[8] + src_argb[8]) >> 2; - uint8 ag = (src_argb[1] + src_argb[5] + src_argb[9] + src_argb[9]) >> 2; - uint8 ar = (src_argb[2] + src_argb[6] + src_argb[10] + src_argb[10]) >> 2; - dst_u[0] = RGBToU(ar, ag, ab); - dst_v[0] = RGBToV(ar, ag, ab); - } else if ((width & 3) == 2) { - uint8 ab = (src_argb[0] + src_argb[4]) >> 1; - uint8 ag = (src_argb[1] + src_argb[5]) >> 1; - uint8 ar = (src_argb[2] + src_argb[6]) >> 1; - dst_u[0] = RGBToU(ar, ag, ab); - dst_v[0] = RGBToV(ar, ag, ab); - } else if ((width & 3) == 1) { - uint8 ab = src_argb[0]; - uint8 ag = src_argb[1]; - uint8 ar = src_argb[2]; - dst_u[0] = RGBToU(ar, ag, ab); - dst_v[0] = RGBToV(ar, ag, ab); - } -} - void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width) { int x; for (x = 0; x < width; ++x) { @@ -1530,48 +1495,6 @@ void I422ToRGB565Row_C(const uint8* src_y, } } -void I411ToARGBRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* rgb_buf, - const struct YuvConstants* yuvconstants, - int width) { - int x; - for (x = 0; x < width - 3; x += 4) { - YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); - rgb_buf[3] = 255; - YuvPixel(src_y[1], src_u[0], src_v[0], - rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants); - rgb_buf[7] = 255; - YuvPixel(src_y[2], src_u[0], src_v[0], - rgb_buf + 8, rgb_buf + 9, rgb_buf + 10, yuvconstants); - rgb_buf[11] = 255; - YuvPixel(src_y[3], src_u[0], src_v[0], - rgb_buf + 12, rgb_buf + 13, rgb_buf + 14, yuvconstants); - rgb_buf[15] = 255; - src_y += 4; - src_u += 1; - src_v += 1; - rgb_buf += 16; // Advance 4 pixels. - } - if (width & 2) { - YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); - rgb_buf[3] = 255; - YuvPixel(src_y[1], src_u[0], src_v[0], - rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants); - rgb_buf[7] = 255; - src_y += 2; - rgb_buf += 8; // Advance 2 pixels. - } - if (width & 1) { - YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); - rgb_buf[3] = 255; - } -} - void NV12ToARGBRow_C(const uint8* src_y, const uint8* src_uv, uint8* rgb_buf, diff --git a/source/row_gcc.cc b/source/row_gcc.cc index 6522ac56c..bf9ddde42 100644 --- a/source/row_gcc.cc +++ b/source/row_gcc.cc @@ -1526,26 +1526,6 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0, int src_stride_rgba, "movq " MEMACCESS([a_buf]) ",%%xmm5 \n" \ "lea " MEMLEA(0x8, [a_buf]) ",%[a_buf] \n" -// Read 2 UV from 411, upsample to 8 UV. -// reading 4 bytes is an msan violation. -// "movd " MEMACCESS([u_buf]) ",%%xmm0 \n" -// MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) -// pinsrw fails with drmemory -// __asm pinsrw xmm0, [esi], 0 /* U */ -// __asm pinsrw xmm1, [esi + edi], 0 /* V */ -#define READYUV411_TEMP \ - "movzwl " MEMACCESS([u_buf]) ",%[temp] \n" \ - "movd %[temp],%%xmm0 \n" \ - MEMOPARG(movzwl, 0x00, [u_buf], [v_buf], 1, [temp]) " \n" \ - "movd %[temp],%%xmm1 \n" \ - "lea " MEMLEA(0x2, [u_buf]) ",%[u_buf] \n" \ - "punpcklbw %%xmm1,%%xmm0 \n" \ - "punpcklwd %%xmm0,%%xmm0 \n" \ - "punpckldq %%xmm0,%%xmm0 \n" \ - "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \ - "punpcklbw %%xmm4,%%xmm4 \n" \ - "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" - // Read 4 UV from NV12, upsample to 8 UV #define READNV12 \ "movq " MEMACCESS([uv_buf]) ",%%xmm0 \n" \ @@ -1804,42 +1784,6 @@ void OMITFP I422AlphaToARGBRow_SSSE3(const uint8* y_buf, } #endif // HAS_I422ALPHATOARGBROW_SSSE3 -#ifdef HAS_I411TOARGBROW_SSSE3 -void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width) { - int temp; - asm volatile ( - YUVTORGB_SETUP(yuvconstants) - "sub %[u_buf],%[v_buf] \n" - "pcmpeqb %%xmm5,%%xmm5 \n" - LABELALIGN - "1: \n" - READYUV411_TEMP - YUVTORGB(yuvconstants) - STOREARGB - "subl $0x8,%[width] \n" - "jg 1b \n" - : [y_buf]"+r"(y_buf), // %[y_buf] - [u_buf]"+r"(u_buf), // %[u_buf] - [v_buf]"+r"(v_buf), // %[v_buf] - [dst_argb]"+r"(dst_argb), // %[dst_argb] - [temp]"=&r"(temp), // %[temp] -#if defined(__i386__) && defined(__pic__) - [width]"+m"(width) // %[width] -#else - [width]"+rm"(width) // %[width] -#endif - : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] - : "memory", "cc", NACL_R14 YUVTORGB_REGS - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" - ); -} -#endif - void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf, const uint8* uv_buf, uint8* dst_argb, @@ -2013,20 +1957,6 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf, "vpermq $0xd8,%%ymm5,%%ymm5 \n" \ "lea " MEMLEA(0x10, [a_buf]) ",%[a_buf] \n" -// Read 4 UV from 411, upsample to 16 UV. -#define READYUV411_AVX2 \ - "vmovd " MEMACCESS([u_buf]) ",%%xmm0 \n" \ - MEMOPREG(vmovd, 0x00, [u_buf], [v_buf], 1, xmm1) \ - "lea " MEMLEA(0x4, [u_buf]) ",%[u_buf] \n" \ - "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \ - "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \ - "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ - "vpunpckldq %%ymm0,%%ymm0,%%ymm0 \n" \ - "vmovdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \ - "vpermq $0xd8,%%ymm4,%%ymm4 \n" \ - "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \ - "lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n" - // Read 8 UV from NV12, upsample to 16 UV. #define READNV12_AVX2 \ "vmovdqu " MEMACCESS([uv_buf]) ",%%xmm0 \n" \ @@ -2163,39 +2093,6 @@ void OMITFP I444ToARGBRow_AVX2(const uint8* y_buf, } #endif // HAS_I444TOARGBROW_AVX2 -#ifdef HAS_I411TOARGBROW_AVX2 -// 16 pixels -// 4 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). -void OMITFP I411ToARGBRow_AVX2(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width) { - asm volatile ( - YUVTORGB_SETUP_AVX2(yuvconstants) - "sub %[u_buf],%[v_buf] \n" - "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" - LABELALIGN - "1: \n" - READYUV411_AVX2 - YUVTORGB_AVX2(yuvconstants) - STOREARGB_AVX2 - "sub $0x10,%[width] \n" - "jg 1b \n" - "vzeroupper \n" - : [y_buf]"+r"(y_buf), // %[y_buf] - [u_buf]"+r"(u_buf), // %[u_buf] - [v_buf]"+r"(v_buf), // %[v_buf] - [dst_argb]"+r"(dst_argb), // %[dst_argb] - [width]"+rm"(width) // %[width] - : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] - : "memory", "cc", NACL_R14 YUVTORGB_REGS_AVX2 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" - ); -} -#endif // HAS_I411TOARGBROW_AVX2 - #if defined(HAS_I422TOARGBROW_AVX2) // 16 pixels // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). diff --git a/source/row_neon.cc b/source/row_neon.cc index 4a41d8a22..dc081fa30 100644 --- a/source/row_neon.cc +++ b/source/row_neon.cc @@ -30,17 +30,6 @@ extern "C" { MEMACCESS(2) \ "vld1.32 {d2[1]}, [%2]! \n" -// Read 8 Y, 2 U and 2 V from 422 -#define READYUV411 \ - MEMACCESS(0) \ - "vld1.8 {d0}, [%0]! \n" \ - MEMACCESS(1) \ - "vld1.16 {d2[0]}, [%1]! \n" \ - MEMACCESS(2) \ - "vld1.16 {d2[1]}, [%2]! \n" \ - "vmov.u8 d3, d2 \n" \ - "vzip.u8 d2, d3 \n" - // Read 8 Y, 8 U and 8 V from 444 #define READYUV444 \ MEMACCESS(0) \ @@ -229,36 +218,6 @@ void I422AlphaToARGBRow_NEON(const uint8* src_y, ); } -void I411ToARGBRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width) { - asm volatile ( - YUVTORGB_SETUP - "vmov.u8 d23, #255 \n" - "1: \n" - READYUV411 - YUVTORGB - "subs %4, %4, #8 \n" - MEMACCESS(3) - "vst4.8 {d20, d21, d22, d23}, [%3]! \n" - "bgt 1b \n" - : "+r"(src_y), // %0 - "+r"(src_u), // %1 - "+r"(src_v), // %2 - "+r"(dst_argb), // %3 - "+r"(width) // %4 - : [kUVToRB]"r"(&yuvconstants->kUVToRB), - [kUVToG]"r"(&yuvconstants->kUVToG), - [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), - [kYToRgb]"r"(&yuvconstants->kYToRgb) - : "cc", "memory", "q0", "q1", "q2", "q3", "q4", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); -} - void I422ToRGBARow_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, @@ -1383,69 +1342,6 @@ void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, ); } -// 32x1 pixels -> 8x1. width is number of argb pixels. e.g. 32. -void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, - int width) { - asm volatile ( - "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient - "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient - "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient - "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient - "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient - "vmov.u16 q15, #0x8080 \n" // 128.5 - "1: \n" - MEMACCESS(0) - "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels. - MEMACCESS(0) - "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels. - "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts. - "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts. - "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts. - MEMACCESS(0) - "vld4.8 {d8, d10, d12, d14}, [%0]! \n" // load 8 more ARGB pixels. - MEMACCESS(0) - "vld4.8 {d9, d11, d13, d15}, [%0]! \n" // load last 8 ARGB pixels. - "vpaddl.u8 q4, q4 \n" // B 16 bytes -> 8 shorts. - "vpaddl.u8 q5, q5 \n" // G 16 bytes -> 8 shorts. - "vpaddl.u8 q6, q6 \n" // R 16 bytes -> 8 shorts. - - "vpadd.u16 d0, d0, d1 \n" // B 16 shorts -> 8 shorts. - "vpadd.u16 d1, d8, d9 \n" // B - "vpadd.u16 d2, d2, d3 \n" // G 16 shorts -> 8 shorts. - "vpadd.u16 d3, d10, d11 \n" // G - "vpadd.u16 d4, d4, d5 \n" // R 16 shorts -> 8 shorts. - "vpadd.u16 d5, d12, d13 \n" // R - - "vrshr.u16 q0, q0, #1 \n" // 2x average - "vrshr.u16 q1, q1, #1 \n" - "vrshr.u16 q2, q2, #1 \n" - - "subs %3, %3, #32 \n" // 32 processed per loop. - "vmul.s16 q8, q0, q10 \n" // B - "vmls.s16 q8, q1, q11 \n" // G - "vmls.s16 q8, q2, q12 \n" // R - "vadd.u16 q8, q8, q15 \n" // +128 -> unsigned - "vmul.s16 q9, q2, q10 \n" // R - "vmls.s16 q9, q1, q14 \n" // G - "vmls.s16 q9, q0, q13 \n" // B - "vadd.u16 q9, q9, q15 \n" // +128 -> unsigned - "vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U - "vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V - MEMACCESS(1) - "vst1.8 {d0}, [%1]! \n" // store 8 pixels U. - MEMACCESS(2) - "vst1.8 {d1}, [%2]! \n" // store 8 pixels V. - "bgt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(width) // %3 - : - : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); -} - // 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16. #define RGBTOUV(QB, QG, QR) \ "vmul.s16 q8, " #QB ", q10 \n" /* B */ \ diff --git a/source/row_neon64.cc b/source/row_neon64.cc index 6375d4f55..d62762dab 100644 --- a/source/row_neon64.cc +++ b/source/row_neon64.cc @@ -27,16 +27,6 @@ extern "C" { MEMACCESS(2) \ "ld1 {v1.s}[1], [%2], #4 \n" -// Read 8 Y, 2 U and 2 V from 422 -#define READYUV411 \ - MEMACCESS(0) \ - "ld1 {v0.8b}, [%0], #8 \n" \ - MEMACCESS(1) \ - "ld1 {v2.h}[0], [%1], #2 \n" \ - MEMACCESS(2) \ - "ld1 {v2.h}[1], [%2], #2 \n" \ - "zip1 v1.8b, v2.8b, v2.8b \n" - // Read 8 Y, 8 U and 8 V from 444 #define READYUV444 \ MEMACCESS(0) \ @@ -220,36 +210,6 @@ void I422AlphaToARGBRow_NEON(const uint8* src_y, ); } -void I411ToARGBRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width) { - asm volatile ( - YUVTORGB_SETUP - "movi v23.8b, #255 \n" /* A */ - "1: \n" - READYUV411 - YUVTORGB(v22, v21, v20) - "subs %w4, %w4, #8 \n" - MEMACCESS(3) - "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" - "b.gt 1b \n" - : "+r"(src_y), // %0 - "+r"(src_u), // %1 - "+r"(src_v), // %2 - "+r"(dst_argb), // %3 - "+r"(width) // %4 - : [kUVToRB]"r"(&yuvconstants->kUVToRB), - [kUVToG]"r"(&yuvconstants->kUVToG), - [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), - [kYToRgb]"r"(&yuvconstants->kYToRgb) - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", - "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" - ); -} - void I422ToRGBARow_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, @@ -1395,57 +1355,6 @@ void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, "movi v24.8h, #47, lsl #0 \n" /* VG coefficient (-0.7344) / 2 */ \ "movi v25.16b, #0x80 \n" /* 128.5 (0x8080 in 16-bit) */ -// 32x1 pixels -> 8x1. width is number of argb pixels. e.g. 32. -void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, - int width) { - asm volatile ( - RGBTOUV_SETUP_REG - "1: \n" - MEMACCESS(0) - "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels. - "uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts. - "uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts. - "uaddlp v2.8h, v2.16b \n" // R 16 bytes -> 8 shorts. - MEMACCESS(0) - "ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%0], #64 \n" // load next 16. - "uaddlp v4.8h, v4.16b \n" // B 16 bytes -> 8 shorts. - "uaddlp v5.8h, v5.16b \n" // G 16 bytes -> 8 shorts. - "uaddlp v6.8h, v6.16b \n" // R 16 bytes -> 8 shorts. - - "addp v0.8h, v0.8h, v4.8h \n" // B 16 shorts -> 8 shorts. - "addp v1.8h, v1.8h, v5.8h \n" // G 16 shorts -> 8 shorts. - "addp v2.8h, v2.8h, v6.8h \n" // R 16 shorts -> 8 shorts. - - "urshr v0.8h, v0.8h, #1 \n" // 2x average - "urshr v1.8h, v1.8h, #1 \n" - "urshr v2.8h, v2.8h, #1 \n" - - "subs %w3, %w3, #32 \n" // 32 processed per loop. - "mul v3.8h, v0.8h, v20.8h \n" // B - "mls v3.8h, v1.8h, v21.8h \n" // G - "mls v3.8h, v2.8h, v22.8h \n" // R - "add v3.8h, v3.8h, v25.8h \n" // +128 -> unsigned - "mul v4.8h, v2.8h, v20.8h \n" // R - "mls v4.8h, v1.8h, v24.8h \n" // G - "mls v4.8h, v0.8h, v23.8h \n" // B - "add v4.8h, v4.8h, v25.8h \n" // +128 -> unsigned - "uqshrn v0.8b, v3.8h, #8 \n" // 16 bit to 8 bit U - "uqshrn v1.8b, v4.8h, #8 \n" // 16 bit to 8 bit V - MEMACCESS(1) - "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels U. - MEMACCESS(2) - "st1 {v1.8b}, [%2], #8 \n" // store 8 pixels V. - "b.gt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(width) // %3 - : - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", - "v20", "v21", "v22", "v23", "v24", "v25" - ); -} - // 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16. #define RGBTOUV(QB, QG, QR) \ "mul v3.8h, " #QB ",v20.8h \n" /* B */ \ diff --git a/source/row_win.cc b/source/row_win.cc index d2da0e439..e3c16e2a5 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -1969,21 +1969,6 @@ void RGBAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, __asm lea ebp, [ebp + 16] \ } -// Read 4 UV from 411, upsample to 16 UV. -#define READYUV411_AVX2 __asm { \ - __asm vmovd xmm0, dword ptr [esi] /* U */ \ - __asm vmovd xmm1, dword ptr [esi + edi] /* V */ \ - __asm lea esi, [esi + 4] \ - __asm vpunpcklbw ymm0, ymm0, ymm1 /* UV */ \ - __asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \ - __asm vpermq ymm0, ymm0, 0xd8 \ - __asm vpunpckldq ymm0, ymm0, ymm0 /* UVUVUVUV (upsample) */ \ - __asm vmovdqu xmm4, [eax] /* Y */ \ - __asm vpermq ymm4, ymm4, 0xd8 \ - __asm vpunpcklbw ymm4, ymm4, ymm4 \ - __asm lea eax, [eax + 16] \ - } - // Read 8 UV from NV12, upsample to 16 UV. #define READNV12_AVX2 __asm { \ __asm vmovdqu xmm0, [esi] /* UV */ \ @@ -2198,46 +2183,6 @@ void I444ToARGBRow_AVX2(const uint8* y_buf, } #endif // HAS_I444TOARGBROW_AVX2 -#ifdef HAS_I411TOARGBROW_AVX2 -// 16 pixels -// 4 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). -__declspec(naked) -void I411ToARGBRow_AVX2(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width) { - __asm { - push esi - push edi - push ebx - mov eax, [esp + 12 + 4] // Y - mov esi, [esp + 12 + 8] // U - mov edi, [esp + 12 + 12] // V - mov edx, [esp + 12 + 16] // abgr - mov ebx, [esp + 12 + 20] // yuvconstants - mov ecx, [esp + 12 + 24] // width - sub edi, esi - vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha - - convertloop: - READYUV411_AVX2 - YUVTORGB_AVX2(ebx) - STOREARGB_AVX2 - - sub ecx, 16 - jg convertloop - - pop ebx - pop edi - pop esi - vzeroupper - ret - } -} -#endif // HAS_I411TOARGBROW_AVX2 - #ifdef HAS_NV12TOARGBROW_AVX2 // 16 pixels. // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). @@ -2451,24 +2396,6 @@ void I422ToRGBARow_AVX2(const uint8* y_buf, __asm lea ebp, [ebp + 8] \ } -// Read 2 UV from 411, upsample to 8 UV. -// drmemory fails with memory fault if pinsrw used. libyuv bug: 525 -// __asm pinsrw xmm0, [esi], 0 /* U */ -// __asm pinsrw xmm1, [esi + edi], 0 /* V */ -#define READYUV411_EBX __asm { \ - __asm movzx ebx, word ptr [esi] /* U */ \ - __asm movd xmm0, ebx \ - __asm movzx ebx, word ptr [esi + edi] /* V */ \ - __asm movd xmm1, ebx \ - __asm lea esi, [esi + 2] \ - __asm punpcklbw xmm0, xmm1 /* UV */ \ - __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \ - __asm punpckldq xmm0, xmm0 /* UVUVUVUV (upsample) */ \ - __asm movq xmm4, qword ptr [eax] \ - __asm punpcklbw xmm4, xmm4 \ - __asm lea eax, [eax + 8] \ - } - // Read 4 UV from NV12, upsample to 8 UV. #define READNV12 __asm { \ __asm movq xmm0, qword ptr [esi] /* UV */ \ @@ -2819,46 +2746,6 @@ void I422AlphaToARGBRow_SSSE3(const uint8* y_buf, } } -// 8 pixels. -// 2 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). -// Similar to I420 but duplicate UV once more. -__declspec(naked) -void I411ToARGBRow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width) { - __asm { - push esi - push edi - push ebx - push ebp - mov eax, [esp + 16 + 4] // Y - mov esi, [esp + 16 + 8] // U - mov edi, [esp + 16 + 12] // V - mov edx, [esp + 16 + 16] // abgr - mov ebp, [esp + 16 + 20] // yuvconstants - mov ecx, [esp + 16 + 24] // width - sub edi, esi - pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha - - convertloop: - READYUV411_EBX - YUVTORGB(ebp) - STOREARGB - - sub ecx, 8 - jg convertloop - - pop ebp - pop ebx - pop edi - pop esi - ret - } -} - // 8 pixels. // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). __declspec(naked) diff --git a/unit_test/convert_test.cc b/unit_test/convert_test.cc index 92a91fad4..24a18249b 100644 --- a/unit_test/convert_test.cc +++ b/unit_test/convert_test.cc @@ -166,10 +166,8 @@ TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \ TESTPLANARTOP(I420, 2, 2, I420, 2, 2) TESTPLANARTOP(I422, 2, 1, I420, 2, 2) TESTPLANARTOP(I444, 1, 1, I420, 2, 2) -TESTPLANARTOP(I411, 4, 1, I420, 2, 2) TESTPLANARTOP(I420, 2, 2, I422, 2, 1) TESTPLANARTOP(I420, 2, 2, I444, 1, 1) -TESTPLANARTOP(I420, 2, 2, I411, 4, 1) TESTPLANARTOP(I420, 2, 2, I420Mirror, 2, 2) TESTPLANARTOP(I422, 2, 1, I422, 2, 1) TESTPLANARTOP(I444, 1, 1, I444, 1, 1) @@ -655,7 +653,6 @@ TESTPLANARTOB(H422, 2, 1, ABGR, 4, 4, 1, 2, ARGB, 4) TESTPLANARTOB(I422, 2, 1, BGRA, 4, 4, 1, 2, ARGB, 4) TESTPLANARTOB(I422, 2, 1, ABGR, 4, 4, 1, 2, ARGB, 4) TESTPLANARTOB(I422, 2, 1, RGBA, 4, 4, 1, 2, ARGB, 4) -TESTPLANARTOB(I411, 4, 1, ARGB, 4, 4, 1, 2, ARGB, 4) TESTPLANARTOB(I444, 1, 1, ARGB, 4, 4, 1, 2, ARGB, 4) TESTPLANARTOB(J444, 1, 1, ARGB, 4, 4, 1, 2, ARGB, 4) TESTPLANARTOB(I444, 1, 1, ABGR, 4, 4, 1, 2, ARGB, 4) @@ -819,6 +816,8 @@ TESTBIPLANARTOB(NV12, 2, 2, ARGB, 4, 2) TESTBIPLANARTOB(NV21, 2, 2, ARGB, 4, 2) TESTBIPLANARTOB(NV12, 2, 2, RGB565, 2, 9) +#ifdef DO_THREE_PLANES +// Do 3 allocations for yuv. conventional but slower. #define TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ W1280, DIFF, N, NEG, OFF) \ TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \ @@ -894,10 +893,9 @@ TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \ free_aligned_buffer_page_end(dst_v_opt); \ free_aligned_buffer_page_end(src_argb); \ } - - -#define TESTATOPLANARI2(FMT_A, BPP_A, YALIGN, FMT_PLANAR, \ - SUBSAMP_X, SUBSAMP_Y, W1280, DIFF, N, NEG, OFF) \ +#else +#define TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, \ + SUBSAMP_X, SUBSAMP_Y, W1280, DIFF, N, NEG, OFF) \ TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \ const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ @@ -952,6 +950,7 @@ TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \ free_aligned_buffer_page_end(dst_uv_opt); \ free_aligned_buffer_page_end(src_argb); \ } +#endif #define TESTATOPLANAR(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ DIFF) \ @@ -962,9 +961,7 @@ TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \ TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ benchmark_width_, DIFF, _Invert, -, 0) \ TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_, DIFF, _Opt, +, 0) \ - TESTATOPLANARI2(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_, DIFF, _SxS, +, 0) + benchmark_width_, DIFF, _Opt, +, 0) TESTATOPLANAR(ARGB, 4, 1, I420, 2, 2, 4) #if defined(__arm__) || defined (__aarch64__) @@ -985,7 +982,6 @@ TESTATOPLANAR(RGB565, 2, 1, I420, 2, 2, 5) // TODO(fbarchard): Make 1555 neon work same as C code, reduce to diff 9. TESTATOPLANAR(ARGB1555, 2, 1, I420, 2, 2, 15) TESTATOPLANAR(ARGB4444, 2, 1, I420, 2, 2, 17) -TESTATOPLANAR(ARGB, 4, 1, I411, 4, 1, 4) TESTATOPLANAR(ARGB, 4, 1, I422, 2, 1, 2) TESTATOPLANAR(ARGB, 4, 1, I444, 1, 1, 2) TESTATOPLANAR(YUY2, 2, 1, I420, 2, 2, 2) @@ -1983,7 +1979,6 @@ TESTPLANARTOE(H422, 2, 1, ABGR, 1, 4, ARGB, 4) TESTPLANARTOE(I422, 2, 1, BGRA, 1, 4, ARGB, 4) TESTPLANARTOE(I422, 2, 1, ABGR, 1, 4, ARGB, 4) TESTPLANARTOE(I422, 2, 1, RGBA, 1, 4, ARGB, 4) -TESTPLANARTOE(I411, 4, 1, ARGB, 1, 4, ARGB, 4) TESTPLANARTOE(I444, 1, 1, ARGB, 1, 4, ARGB, 4) TESTPLANARTOE(J444, 1, 1, ARGB, 1, 4, ARGB, 4) TESTPLANARTOE(I444, 1, 1, ABGR, 1, 4, ARGB, 4) diff --git a/unit_test/video_common_test.cc b/unit_test/video_common_test.cc index ac97d0f34..657f6a3d6 100644 --- a/unit_test/video_common_test.cc +++ b/unit_test/video_common_test.cc @@ -66,7 +66,6 @@ TEST_F(LibYUVBaseTest, TestFourCC) { EXPECT_TRUE(TestValidFourCC(FOURCC_I420, FOURCC_BPP_I420)); EXPECT_TRUE(TestValidFourCC(FOURCC_I422, FOURCC_BPP_I422)); EXPECT_TRUE(TestValidFourCC(FOURCC_I444, FOURCC_BPP_I444)); - EXPECT_TRUE(TestValidFourCC(FOURCC_I411, FOURCC_BPP_I411)); EXPECT_TRUE(TestValidFourCC(FOURCC_I400, FOURCC_BPP_I400)); EXPECT_TRUE(TestValidFourCC(FOURCC_NV21, FOURCC_BPP_NV21)); EXPECT_TRUE(TestValidFourCC(FOURCC_NV12, FOURCC_BPP_NV12));