diff --git a/README.chromium b/README.chromium index 681e9cc0f..8eecc5338 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1841 +Version: 1844 License: BSD License File: LICENSE diff --git a/include/libyuv/convert_argb.h b/include/libyuv/convert_argb.h index 4025050a8..8e4562efc 100644 --- a/include/libyuv/convert_argb.h +++ b/include/libyuv/convert_argb.h @@ -404,6 +404,32 @@ int U444ToABGR(const uint8_t* src_y, int width, int height); +// Convert I444 to RGB24. +LIBYUV_API +int I444ToRGB24(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + int width, + int height); + +// Convert I444 to RAW. +LIBYUV_API +int I444ToRAW(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_raw, + int dst_stride_raw, + int width, + int height); + // Convert I010 to ARGB. LIBYUV_API int I010ToARGB(const uint16_t* src_y, @@ -1312,6 +1338,32 @@ int J420ToRAW(const uint8_t* src_y, int width, int height); +// Convert I422 to RGB24. +LIBYUV_API +int I422ToRGB24(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + int width, + int height); + +// Convert I422 to RAW. +LIBYUV_API +int I422ToRAW(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_raw, + int dst_stride_raw, + int width, + int height); + LIBYUV_API int I420ToRGB565(const uint8_t* src_y, int src_stride_y, @@ -1495,6 +1547,20 @@ int I444ToARGBMatrix(const uint8_t* src_y, int width, int height); +// Convert I444 to RGB24 with matrix. +LIBYUV_API +int I444ToRGB24Matrix(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + const struct YuvConstants* yuvconstants, + int width, + int height); + // Convert 10 bit 420 YUV to ARGB with matrix. LIBYUV_API int I010ToAR30Matrix(const uint16_t* src_y, @@ -1893,6 +1959,20 @@ int I420ToRGB24Matrix(const uint8_t* src_y, int width, int height); +// Convert I422 to RGB24 with matrix. +LIBYUV_API +int I422ToRGB24Matrix(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + const struct YuvConstants* yuvconstants, + int width, + int height); + // Convert I420 to RGB565 with specified color matrix. LIBYUV_API int I420ToRGB565Matrix(const uint8_t* src_y, @@ -1975,6 +2055,21 @@ int I422ToARGBMatrixFilter(const uint8_t* src_y, int height, enum FilterMode filter); +// Convert I422 to RGB24 with matrix and UV filter mode. +LIBYUV_API +int I422ToRGB24MatrixFilter(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + const struct YuvConstants* yuvconstants, + int width, + int height, + enum FilterMode filter); + // Convert I420 to RGB24 with matrix and UV filter mode. LIBYUV_API int I420ToRGB24MatrixFilter(const uint8_t* src_y, diff --git a/include/libyuv/row.h b/include/libyuv/row.h index 1c14ef3b4..c09d51bac 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -111,6 +111,7 @@ extern "C" { #define HAS_I422TOUYVYROW_SSE2 #define HAS_I422TOYUY2ROW_SSE2 #define HAS_I444TOARGBROW_SSSE3 +#define HAS_I444TORGB24ROW_SSSE3 #define HAS_INTERPOLATEROW_SSSE3 #define HAS_J400TOARGBROW_SSE2 #define HAS_J422TOARGBROW_SSSE3 @@ -219,6 +220,7 @@ extern "C" { #define HAS_I422TORGB565ROW_AVX2 #define HAS_I422TORGBAROW_AVX2 #define HAS_I444TOARGBROW_AVX2 +#define HAS_I444TORGB24ROW_AVX2 #define HAS_INTERPOLATEROW_AVX2 #define HAS_J422TOARGBROW_AVX2 #define HAS_MERGEUVROW_AVX2 @@ -469,6 +471,7 @@ extern "C" { #define HAS_I422TOYUY2ROW_NEON #define HAS_I444ALPHATOARGBROW_NEON #define HAS_I444TOARGBROW_NEON +#define HAS_I444TORGB24ROW_NEON #define HAS_INTERPOLATEROW_16_NEON #define HAS_INTERPOLATEROW_NEON #define HAS_J400TOARGBROW_NEON @@ -903,6 +906,12 @@ void I444ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); +void I444ToRGB24Row_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb24, + const struct YuvConstants* yuvconstants, + int width); void I422ToARGBRow_NEON(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, @@ -3183,6 +3192,12 @@ void I444ToARGBRow_C(const uint8_t* src_y, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width); +void I444ToRGB24Row_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width); void I422ToARGBRow_C(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, @@ -3377,6 +3392,18 @@ void I444ToARGBRow_AVX2(const uint8_t* y_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); +void I444ToRGB24Row_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_rgb24, + const struct YuvConstants* yuvconstants, + int width); +void I444ToRGB24Row_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_rgb24, + const struct YuvConstants* yuvconstants, + int width); void I422ToARGBRow_SSSE3(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, @@ -3718,12 +3745,24 @@ void I444ToARGBRow_Any_SSSE3(const uint8_t* y_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); +void I444ToRGB24Row_Any_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); void I444ToARGBRow_Any_AVX2(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); +void I444ToRGB24Row_Any_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); void I422ToARGBRow_Any_SSSE3(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, @@ -4385,6 +4424,12 @@ void I444ToARGBRow_Any_NEON(const uint8_t* y_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); +void I444ToRGB24Row_Any_NEON(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); void I422ToARGBRow_Any_NEON(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, diff --git a/include/libyuv/version.h b/include/libyuv/version.h index afe00d4be..699d13f91 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1841 +#define LIBYUV_VERSION 1844 #endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/source/convert_argb.cc b/source/convert_argb.cc index 5500fcb28..64425c596 100644 --- a/source/convert_argb.cc +++ b/source/convert_argb.cc @@ -10,6 +10,8 @@ #include "libyuv/convert_argb.h" +#include + #include "libyuv/convert_from_argb.h" #include "libyuv/cpu_id.h" #ifdef HAVE_JPEG @@ -67,6 +69,7 @@ int I420ToARGBMatrix(const uint8_t* src_y, const uint8_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I422ToARGBRow_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -300,6 +303,7 @@ int I422ToARGBMatrix(const uint8_t* src_y, const uint8_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I422ToARGBRow_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -538,6 +542,7 @@ int I444ToARGBMatrix(const uint8_t* src_y, const uint8_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I444ToARGBRow_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -749,6 +754,128 @@ int U444ToABGR(const uint8_t* src_y, width, height); } +// Convert I444 to RGB24 with matrix. +LIBYUV_API +int I444ToRGB24Matrix(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + const struct YuvConstants* yuvconstants, + int width, + int height) { + int y; + void (*I444ToRGB24Row)(const uint8_t* y_buf, const uint8_t* u_buf, + const uint8_t* v_buf, uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, int width) = + I444ToRGB24Row_C; + assert(yuvconstants); + if (!src_y || !src_u || !src_v || !dst_rgb24 || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_rgb24 = dst_rgb24 + (height - 1) * dst_stride_rgb24; + dst_stride_rgb24 = -dst_stride_rgb24; + } + // Coalesce rows. + if (src_stride_y == width && src_stride_u == width && src_stride_v == width && + dst_stride_rgb24 == width * 3) { + width *= height; + height = 1; + src_stride_y = src_stride_u = src_stride_v = dst_stride_rgb24 = 0; + } +#if defined(HAS_I444TORGB24ROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + I444ToRGB24Row = I444ToRGB24Row_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + I444ToRGB24Row = I444ToRGB24Row_SSSE3; + } + } +#endif +#if defined(HAS_I444TORGB24ROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + I444ToRGB24Row = I444ToRGB24Row_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + I444ToRGB24Row = I444ToRGB24Row_AVX2; + } + } +#endif +#if defined(HAS_I444TORGB24ROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + I444ToRGB24Row = I444ToRGB24Row_Any_NEON; + if (IS_ALIGNED(width, 8)) { + I444ToRGB24Row = I444ToRGB24Row_NEON; + } + } +#endif +#if defined(HAS_I444TORGB24ROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + I444ToRGB24Row = I444ToRGB24Row_Any_MSA; + if (IS_ALIGNED(width, 8)) { + I444ToRGB24Row = I444ToRGB24Row_MSA; + } + } +#endif +#if defined(HAS_I444TORGB24ROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + I444ToRGB24Row = I444ToRGB24Row_Any_LSX; + if (IS_ALIGNED(width, 16)) { + I444ToRGB24Row = I444ToRGB24Row_LSX; + } + } +#endif + + for (y = 0; y < height; ++y) { + I444ToRGB24Row(src_y, src_u, src_v, dst_rgb24, yuvconstants, width); + dst_rgb24 += dst_stride_rgb24; + src_y += src_stride_y; + src_u += src_stride_u; + src_v += src_stride_v; + } + return 0; +} + +// Convert I444 to RGB24. +LIBYUV_API +int I444ToRGB24(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + int width, + int height) { + return I444ToRGB24Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_rgb24, dst_stride_rgb24, + &kYuvI601Constants, width, height); +} + +// Convert I444 to RAW. +LIBYUV_API +int I444ToRAW(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_raw, + int dst_stride_raw, + int width, + int height) { + return I444ToRGB24Matrix(src_y, src_stride_y, src_v, + src_stride_v, // Swap U and V + src_u, src_stride_u, dst_raw, dst_stride_raw, + &kYvuI601Constants, // Use Yvu matrix + width, height); +} + // Convert 10 bit YUV to ARGB with matrix. // TODO(fbarchard): Consider passing scale multiplier to I210ToARGB to // multiply 10 bit yuv into high bits to allow any number of bits. @@ -769,6 +896,7 @@ int I010ToAR30Matrix(const uint16_t* src_y, const uint16_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I210ToAR30Row_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) { return -1; } @@ -928,6 +1056,7 @@ int I012ToAR30Matrix(const uint16_t* src_y, const uint16_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I212ToAR30Row_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) { return -1; } @@ -985,6 +1114,7 @@ int I210ToAR30Matrix(const uint16_t* src_y, const uint16_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I210ToAR30Row_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) { return -1; } @@ -1139,6 +1269,7 @@ int I410ToAR30Matrix(const uint16_t* src_y, const uint16_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I410ToAR30Row_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) { return -1; } @@ -1192,6 +1323,7 @@ int I010ToARGBMatrix(const uint16_t* src_y, const uint16_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I210ToARGBRow_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -1355,6 +1487,7 @@ int I012ToARGBMatrix(const uint16_t* src_y, const uint16_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I212ToARGBRow_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -1410,6 +1543,7 @@ int I210ToARGBMatrix(const uint16_t* src_y, const uint16_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I210ToARGBRow_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -1570,6 +1704,7 @@ int I410ToARGBMatrix(const uint16_t* src_y, const uint16_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I410ToARGBRow_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -1619,6 +1754,7 @@ int P010ToARGBMatrix(const uint16_t* src_y, void (*P210ToARGBRow)( const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = P210ToARGBRow_C; + assert(yuvconstants); if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -1669,6 +1805,7 @@ int P210ToARGBMatrix(const uint16_t* src_y, void (*P210ToARGBRow)( const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = P210ToARGBRow_C; + assert(yuvconstants); if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -1717,6 +1854,7 @@ int P010ToAR30Matrix(const uint16_t* src_y, void (*P210ToAR30Row)( const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = P210ToAR30Row_C; + assert(yuvconstants); if (!src_y || !src_uv || !dst_ar30 || width <= 0 || height == 0) { return -1; } @@ -1767,6 +1905,7 @@ int P210ToAR30Matrix(const uint16_t* src_y, void (*P210ToAR30Row)( const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = P210ToAR30Row_C; + assert(yuvconstants); if (!src_y || !src_uv || !dst_ar30 || width <= 0 || height == 0) { return -1; } @@ -1825,6 +1964,7 @@ int I420AlphaToARGBMatrix(const uint8_t* src_y, int width) = I422AlphaToARGBRow_C; void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = ARGBAttenuateRow_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 || height == 0) { return -1; @@ -1949,6 +2089,7 @@ int I422AlphaToARGBMatrix(const uint8_t* src_y, int width) = I422AlphaToARGBRow_C; void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = ARGBAttenuateRow_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 || height == 0) { return -1; @@ -2071,6 +2212,7 @@ int I444AlphaToARGBMatrix(const uint8_t* src_y, int width) = I444AlphaToARGBRow_C; void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = ARGBAttenuateRow_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 || height == 0) { return -1; @@ -2314,6 +2456,7 @@ int I010AlphaToARGBMatrix(const uint16_t* src_y, int width) = I210AlphaToARGBRow_C; void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = ARGBAttenuateRow_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 || height == 0) { return -1; @@ -2414,6 +2557,7 @@ int I210AlphaToARGBMatrix(const uint16_t* src_y, int width) = I210AlphaToARGBRow_C; void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = ARGBAttenuateRow_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 || height == 0) { return -1; @@ -2512,6 +2656,7 @@ int I410AlphaToARGBMatrix(const uint16_t* src_y, int width) = I410AlphaToARGBRow_C; void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = ARGBAttenuateRow_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 || height == 0) { return -1; @@ -2599,6 +2744,7 @@ int I400ToARGBMatrix(const uint8_t* src_y, void (*I400ToARGBRow)(const uint8_t* y_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I400ToARGBRow_C; + assert(yuvconstants); if (!src_y || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -3516,6 +3662,7 @@ int NV12ToARGBMatrix(const uint8_t* src_y, void (*NV12ToARGBRow)( const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = NV12ToARGBRow_C; + assert(yuvconstants); if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -3600,6 +3747,7 @@ int NV21ToARGBMatrix(const uint8_t* src_y, void (*NV21ToARGBRow)( const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = NV21ToARGBRow_C; + assert(yuvconstants); if (!src_y || !src_vu || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -3743,6 +3891,7 @@ int NV12ToRGB24Matrix(const uint8_t* src_y, void (*NV12ToRGB24Row)( const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = NV12ToRGB24Row_C; + assert(yuvconstants); if (!src_y || !src_uv || !dst_rgb24 || width <= 0 || height == 0) { return -1; } @@ -3803,6 +3952,7 @@ int NV21ToRGB24Matrix(const uint8_t* src_y, void (*NV21ToRGB24Row)( const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = NV21ToRGB24Row_C; + assert(yuvconstants); if (!src_y || !src_vu || !dst_rgb24 || width <= 0 || height == 0) { return -1; } @@ -4145,6 +4295,7 @@ int Android420ToARGBMatrix(const uint8_t* src_y, const ptrdiff_t vu_off = src_v - src_u; int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -4245,6 +4396,7 @@ int I422ToRGBAMatrix(const uint8_t* src_y, const uint8_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I422ToRGBARow_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_rgba || width <= 0 || height == 0) { return -1; } @@ -4356,6 +4508,7 @@ int NV12ToRGB565Matrix(const uint8_t* src_y, void (*NV12ToRGB565Row)( const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = NV12ToRGB565Row_C; + assert(yuvconstants); if (!src_y || !src_uv || !dst_rgb565 || width <= 0 || height == 0) { return -1; } @@ -4458,6 +4611,7 @@ int I420ToRGBAMatrix(const uint8_t* src_y, const uint8_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I422ToRGBARow_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_rgba || width <= 0 || height == 0) { return -1; } @@ -4574,6 +4728,7 @@ int I420ToRGB24Matrix(const uint8_t* src_y, const uint8_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I422ToRGB24Row_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_rgb24 || width <= 0 || height == 0) { return -1; } @@ -4744,6 +4899,121 @@ int H420ToRAW(const uint8_t* src_y, width, height); } +// Convert I422 to RGB24 with matrix. +LIBYUV_API +int I422ToRGB24Matrix(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + const struct YuvConstants* yuvconstants, + int width, + int height) { + int y; + void (*I422ToRGB24Row)(const uint8_t* y_buf, const uint8_t* u_buf, + const uint8_t* v_buf, uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, int width) = + I422ToRGB24Row_C; + assert(yuvconstants); + if (!src_y || !src_u || !src_v || !dst_rgb24 || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_rgb24 = dst_rgb24 + (height - 1) * dst_stride_rgb24; + dst_stride_rgb24 = -dst_stride_rgb24; + } +#if defined(HAS_I422TORGB24ROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + I422ToRGB24Row = I422ToRGB24Row_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + I422ToRGB24Row = I422ToRGB24Row_SSSE3; + } + } +#endif +#if defined(HAS_I422TORGB24ROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + I422ToRGB24Row = I422ToRGB24Row_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + I422ToRGB24Row = I422ToRGB24Row_AVX2; + } + } +#endif +#if defined(HAS_I422TORGB24ROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + I422ToRGB24Row = I422ToRGB24Row_Any_NEON; + if (IS_ALIGNED(width, 8)) { + I422ToRGB24Row = I422ToRGB24Row_NEON; + } + } +#endif +#if defined(HAS_I422TORGB24ROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + I422ToRGB24Row = I422ToRGB24Row_Any_MSA; + if (IS_ALIGNED(width, 16)) { + I422ToRGB24Row = I422ToRGB24Row_MSA; + } + } +#endif +#if defined(HAS_I422TORGB24ROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + I422ToRGB24Row = I422ToRGB24Row_Any_LASX; + if (IS_ALIGNED(width, 32)) { + I422ToRGB24Row = I422ToRGB24Row_LASX; + } + } +#endif + + for (y = 0; y < height; ++y) { + I422ToRGB24Row(src_y, src_u, src_v, dst_rgb24, yuvconstants, width); + dst_rgb24 += dst_stride_rgb24; + src_y += src_stride_y; + src_u += src_stride_u; + src_v += src_stride_v; + } + return 0; +} + +// Convert I422 to RGB24. +LIBYUV_API +int I422ToRGB24(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + int width, + int height) { + return I422ToRGB24Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_rgb24, dst_stride_rgb24, + &kYuvI601Constants, width, height); +} + +// Convert I422 to RAW. +LIBYUV_API +int I422ToRAW(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_raw, + int dst_stride_raw, + int width, + int height) { + return I422ToRGB24Matrix(src_y, src_stride_y, src_v, + src_stride_v, // Swap U and V + src_u, src_stride_u, dst_raw, dst_stride_raw, + &kYvuI601Constants, // Use Yvu matrix + width, height); +} + // Convert I420 to ARGB1555. LIBYUV_API int I420ToARGB1555(const uint8_t* src_y, @@ -4924,6 +5194,7 @@ int I420ToRGB565Matrix(const uint8_t* src_y, const uint8_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I422ToRGB565Row_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_rgb565 || width <= 0 || height == 0) { return -1; } @@ -5055,6 +5326,7 @@ int I422ToRGB565Matrix(const uint8_t* src_y, const uint8_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I422ToRGB565Row_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_rgb565 || width <= 0 || height == 0) { return -1; } @@ -5298,6 +5570,7 @@ int I420ToAR30Matrix(const uint8_t* src_y, const struct YuvConstants* yuvconstants, int width) = I422ToAR30Row_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) { return -1; } @@ -5421,9 +5694,12 @@ static int I420ToARGBMatrixBilinear(const uint8_t* src_y, const uint8_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I444ToARGBRow_C; - void (*Scale2RowUp)(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) = - ScaleRowUp2_Bilinear_Any_C; + void (*Scale2RowUp_Bilinear)(const uint8_t* src_ptr, ptrdiff_t src_stride, + uint8_t* dst_ptr, ptrdiff_t dst_stride, + int dst_width) = ScaleRowUp2_Bilinear_Any_C; + void (*ScaleRowUp2_Linear)(const uint8_t* src_ptr, uint8_t* dst_ptr, + int dst_width) = ScaleRowUp2_Linear_Any_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -5474,27 +5750,31 @@ static int I420ToARGBMatrixBilinear(const uint8_t* src_y, } #endif -#if defined(HAS_SCALEROWUP2_LINEAR_SSE2) +#if defined(HAS_SCALEROWUP2_BILINEAR_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { - Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSE2; + Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_SSE2; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSE2; } #endif -#if defined(HAS_SCALEROWUP2_LINEAR_SSSE3) +#if defined(HAS_SCALEROWUP2_BILINEAR_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSSE3; + Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_SSSE3; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSSE3; } #endif -#if defined(HAS_SCALEROWUP2_LINEAR_AVX2) +#if defined(HAS_SCALEROWUP2_BILINEAR_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - Scale2RowUp = ScaleRowUp2_Bilinear_Any_AVX2; + Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_AVX2; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_AVX2; } #endif -#if defined(HAS_SCALEROWUP2_LINEAR_NEON) +#if defined(HAS_SCALEROWUP2_BILINEAR_NEON) if (TestCpuFlag(kCpuHasNEON)) { - Scale2RowUp = ScaleRowUp2_Bilinear_Any_NEON; + Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_NEON; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_NEON; } #endif @@ -5506,15 +5786,15 @@ static int I420ToARGBMatrixBilinear(const uint8_t* src_y, uint8_t* temp_v_1 = row + row_size * 2; uint8_t* temp_v_2 = row + row_size * 3; - Scale2RowUp(src_u, 0, temp_u_1, row_size, width); - Scale2RowUp(src_v, 0, temp_v_1, row_size, width); + ScaleRowUp2_Linear(src_u, temp_u_1, width); + ScaleRowUp2_Linear(src_v, temp_v_1, width); I444ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width); dst_argb += dst_stride_argb; src_y += src_stride_y; for (y = 0; y < height - 2; y += 2) { - Scale2RowUp(src_u, src_stride_u, temp_u_1, row_size, width); - Scale2RowUp(src_v, src_stride_v, temp_v_1, row_size, width); + Scale2RowUp_Bilinear(src_u, src_stride_u, temp_u_1, row_size, width); + Scale2RowUp_Bilinear(src_v, src_stride_v, temp_v_1, row_size, width); I444ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width); dst_argb += dst_stride_argb; src_y += src_stride_y; @@ -5526,8 +5806,8 @@ static int I420ToARGBMatrixBilinear(const uint8_t* src_y, } if (!(height & 1)) { - Scale2RowUp(src_u, 0, temp_u_1, row_size, width); - Scale2RowUp(src_v, 0, temp_v_1, row_size, width); + ScaleRowUp2_Linear(src_u, temp_u_1, width); + ScaleRowUp2_Linear(src_v, temp_v_1, width); I444ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width); } @@ -5551,8 +5831,9 @@ static int I422ToARGBMatrixLinear(const uint8_t* src_y, const uint8_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I444ToARGBRow_C; - void (*ScaleRowUp)(const uint8_t* src_ptr, uint8_t* dst_ptr, int dst_width) = - ScaleRowUp2_Linear_Any_C; + void (*ScaleRowUp2_Linear)(const uint8_t* src_ptr, uint8_t* dst_ptr, + int dst_width) = ScaleRowUp2_Linear_Any_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -5604,22 +5885,22 @@ static int I422ToARGBMatrixLinear(const uint8_t* src_y, #endif #if defined(HAS_SCALEROWUP2_LINEAR_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { - ScaleRowUp = ScaleRowUp2_Linear_Any_SSE2; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSE2; } #endif #if defined(HAS_SCALEROWUP2_LINEAR_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - ScaleRowUp = ScaleRowUp2_Linear_Any_SSSE3; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSSE3; } #endif #if defined(HAS_SCALEROWUP2_LINEAR_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - ScaleRowUp = ScaleRowUp2_Linear_Any_AVX2; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_AVX2; } #endif #if defined(HAS_SCALEROWUP2_LINEAR_NEON) if (TestCpuFlag(kCpuHasNEON)) { - ScaleRowUp = ScaleRowUp2_Linear_Any_NEON; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_NEON; } #endif @@ -5630,8 +5911,8 @@ static int I422ToARGBMatrixLinear(const uint8_t* src_y, uint8_t* temp_v = row + row_size; for (y = 0; y < height; ++y) { - ScaleRowUp(src_u, temp_u, width); - ScaleRowUp(src_v, temp_v, width); + ScaleRowUp2_Linear(src_u, temp_u, width); + ScaleRowUp2_Linear(src_v, temp_v, width); I444ToARGBRow(src_y, temp_u, temp_v, dst_argb, yuvconstants, width); dst_argb += dst_stride_argb; src_y += src_stride_y; @@ -5655,15 +5936,16 @@ static int I420ToRGB24MatrixBilinear(const uint8_t* src_y, int width, int height) { int y; - void (*I444ToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf, - const uint8_t* v_buf, uint8_t* rgb_buf, - const struct YuvConstants* yuvconstants, int width) = - I444ToARGBRow_C; - void (*ARGBToRGB24Row)(const uint8_t* src_argb, uint8_t* dst_rgb, int width) = - ARGBToRGB24Row_C; - void (*Scale2RowUp)(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) = - ScaleRowUp2_Bilinear_Any_C; + void (*I444ToRGB24Row)(const uint8_t* y_buf, const uint8_t* u_buf, + const uint8_t* v_buf, uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, int width) = + I444ToRGB24Row_C; + void (*Scale2RowUp_Bilinear)(const uint8_t* src_ptr, ptrdiff_t src_stride, + uint8_t* dst_ptr, ptrdiff_t dst_stride, + int dst_width) = ScaleRowUp2_Bilinear_Any_C; + void (*ScaleRowUp2_Linear)(const uint8_t* src_ptr, uint8_t* dst_ptr, + int dst_width) = ScaleRowUp2_Linear_Any_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_rgb24 || width <= 0 || height == 0) { return -1; } @@ -5673,141 +5955,96 @@ static int I420ToRGB24MatrixBilinear(const uint8_t* src_y, dst_rgb24 = dst_rgb24 + (height - 1) * dst_stride_rgb24; dst_stride_rgb24 = -dst_stride_rgb24; } -#if defined(HAS_I444TOARGBROW_SSSE3) +#if defined(HAS_I444TORGB24ROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - I444ToARGBRow = I444ToARGBRow_Any_SSSE3; - if (IS_ALIGNED(width, 8)) { - I444ToARGBRow = I444ToARGBRow_SSSE3; + I444ToRGB24Row = I444ToRGB24Row_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + I444ToRGB24Row = I444ToRGB24Row_SSSE3; } } #endif -#if defined(HAS_I444TOARGBROW_AVX2) +#if defined(HAS_I444TORGB24ROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - I444ToARGBRow = I444ToARGBRow_Any_AVX2; - if (IS_ALIGNED(width, 16)) { - I444ToARGBRow = I444ToARGBRow_AVX2; + I444ToRGB24Row = I444ToRGB24Row_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + I444ToRGB24Row = I444ToRGB24Row_AVX2; } } #endif -#if defined(HAS_I444TOARGBROW_NEON) +#if defined(HAS_I444TORGB24ROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { - I444ToARGBRow = I444ToARGBRow_Any_NEON; + I444ToRGB24Row = I444ToRGB24Row_Any_NEON; if (IS_ALIGNED(width, 8)) { - I444ToARGBRow = I444ToARGBRow_NEON; + I444ToRGB24Row = I444ToRGB24Row_NEON; } } #endif -#if defined(HAS_I444TOARGBROW_MSA) +#if defined(HAS_I444TORGB24ROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { - I444ToARGBRow = I444ToARGBRow_Any_MSA; + I444ToRGB24Row = I444ToRGB24Row_Any_MSA; if (IS_ALIGNED(width, 8)) { - I444ToARGBRow = I444ToARGBRow_MSA; + I444ToRGB24Row = I444ToRGB24Row_MSA; } } #endif -#if defined(HAS_I444TOARGBROW_LASX) +#if defined(HAS_I444TORGB24ROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { - I444ToARGBRow = I444ToARGBRow_Any_LASX; + I444ToRGB24Row = I444ToRGB24Row_Any_LASX; if (IS_ALIGNED(width, 32)) { - I444ToARGBRow = I444ToARGBRow_LASX; + I444ToRGB24Row = I444ToRGB24Row_LASX; } } #endif -#if defined(HAS_ARGBTORGB24ROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToRGB24Row = ARGBToRGB24Row_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToRGB24Row = ARGBToRGB24Row_SSSE3; - } - } -#endif -#if defined(HAS_ARGBTORGB24ROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToRGB24Row = ARGBToRGB24Row_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ARGBToRGB24Row = ARGBToRGB24Row_AVX2; - } - } -#endif -#if defined(HAS_ARGBTORGB24ROW_AVX512VBMI) - if (TestCpuFlag(kCpuHasAVX512VBMI)) { - ARGBToRGB24Row = ARGBToRGB24Row_Any_AVX512VBMI; - if (IS_ALIGNED(width, 32)) { - ARGBToRGB24Row = ARGBToRGB24Row_AVX512VBMI; - } - } -#endif -#if defined(HAS_ARGBTORGB24ROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ARGBToRGB24Row = ARGBToRGB24Row_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToRGB24Row = ARGBToRGB24Row_NEON; - } - } -#endif -#if defined(HAS_ARGBTORGB24ROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGBToRGB24Row = ARGBToRGB24Row_Any_MSA; - if (IS_ALIGNED(width, 16)) { - ARGBToRGB24Row = ARGBToRGB24Row_MSA; - } - } -#endif -#if defined(HAS_ARGBTORGB24ROW_LASX) - if (TestCpuFlag(kCpuHasLASX)) { - ARGBToRGB24Row = ARGBToRGB24Row_Any_LASX; - if (IS_ALIGNED(width, 32)) { - ARGBToRGB24Row = ARGBToRGB24Row_LASX; - } - } -#endif -// TODO: Fix HAS macros to match function names -#if defined(HAS_SCALEROWUP2_LINEAR_SSE2) + +#if defined(HAS_SCALEROWUP2_BILINEAR_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { - Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSE2; + Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_SSE2; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSE2; } #endif -#if defined(HAS_SCALEROWUP2_LINEAR_SSSE3) + +#if defined(HAS_SCALEROWUP2_BILINEAR_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSSE3; + Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_SSSE3; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSSE3; } #endif -#if defined(HAS_SCALEROWUP2_LINEAR_AVX2) + +#if defined(HAS_SCALEROWUP2_BILINEAR_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - Scale2RowUp = ScaleRowUp2_Bilinear_Any_AVX2; + Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_AVX2; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_AVX2; } #endif -#if defined(HAS_SCALEROWUP2_LINEAR_NEON) + +#if defined(HAS_SCALEROWUP2_BILINEAR_NEON) if (TestCpuFlag(kCpuHasNEON)) { - Scale2RowUp = ScaleRowUp2_Bilinear_Any_NEON; + Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_NEON; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_NEON; } #endif // alloc 4 lines temp const int row_size = (width + 31) & ~31; - align_buffer_64(row, row_size * 8); + align_buffer_64(row, row_size * 4); uint8_t* temp_u_1 = row; uint8_t* temp_u_2 = row + row_size; uint8_t* temp_v_1 = row + row_size * 2; uint8_t* temp_v_2 = row + row_size * 3; - uint8_t* temp_argb = row + row_size * 4; - Scale2RowUp(src_u, 0, temp_u_1, row_size, width); - Scale2RowUp(src_v, 0, temp_v_1, row_size, width); - I444ToARGBRow(src_y, temp_u_1, temp_v_1, temp_argb, yuvconstants, width); - ARGBToRGB24Row(temp_argb, dst_rgb24, width); + ScaleRowUp2_Linear(src_u, temp_u_1, width); + ScaleRowUp2_Linear(src_v, temp_v_1, width); + I444ToRGB24Row(src_y, temp_u_1, temp_v_1, dst_rgb24, yuvconstants, width); dst_rgb24 += dst_stride_rgb24; src_y += src_stride_y; for (y = 0; y < height - 2; y += 2) { - Scale2RowUp(src_u, src_stride_u, temp_u_1, row_size, width); - Scale2RowUp(src_v, src_stride_v, temp_v_1, row_size, width); - I444ToARGBRow(src_y, temp_u_1, temp_v_1, temp_argb, yuvconstants, width); - ARGBToRGB24Row(temp_argb, dst_rgb24, width); + Scale2RowUp_Bilinear(src_u, src_stride_u, temp_u_1, row_size, width); + Scale2RowUp_Bilinear(src_v, src_stride_v, temp_v_1, row_size, width); + I444ToRGB24Row(src_y, temp_u_1, temp_v_1, dst_rgb24, yuvconstants, width); dst_rgb24 += dst_stride_rgb24; src_y += src_stride_y; - I444ToARGBRow(src_y, temp_u_2, temp_v_2, temp_argb, yuvconstants, width); - ARGBToRGB24Row(temp_argb, dst_rgb24, width); + I444ToRGB24Row(src_y, temp_u_2, temp_v_2, dst_rgb24, yuvconstants, width); dst_rgb24 += dst_stride_rgb24; src_y += src_stride_y; src_u += src_stride_u; @@ -5815,10 +6052,9 @@ static int I420ToRGB24MatrixBilinear(const uint8_t* src_y, } if (!(height & 1)) { - Scale2RowUp(src_u, 0, temp_u_1, row_size, width); - Scale2RowUp(src_v, 0, temp_v_1, row_size, width); - I444ToARGBRow(src_y, temp_u_1, temp_v_1, temp_argb, yuvconstants, width); - ARGBToRGB24Row(temp_argb, dst_rgb24, width); + ScaleRowUp2_Linear(src_u, temp_u_1, width); + ScaleRowUp2_Linear(src_v, temp_v_1, width); + I444ToRGB24Row(src_y, temp_u_1, temp_v_1, dst_rgb24, yuvconstants, width); } free_aligned_buffer_64(row); @@ -5841,9 +6077,12 @@ static int I010ToAR30MatrixBilinear(const uint16_t* src_y, const uint16_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I410ToAR30Row_C; - void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride, - uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) = - ScaleRowUp2_Bilinear_16_Any_C; + void (*Scale2RowUp_Bilinear_12)( + const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr, + ptrdiff_t dst_stride, int dst_width) = ScaleRowUp2_Bilinear_16_Any_C; + void (*ScaleRowUp2_Linear_12)(const uint16_t* src_ptr, uint16_t* dst_ptr, + int dst_width) = ScaleRowUp2_Linear_16_Any_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) { return -1; } @@ -5870,21 +6109,24 @@ static int I010ToAR30MatrixBilinear(const uint16_t* src_y, } #endif -#if defined(HAS_SCALEROWUP2_LINEAR_12_SSSE3) +#if defined(HAS_SCALEROWUP2_BILINEAR_12_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_SSSE3; + Scale2RowUp_Bilinear_12 = ScaleRowUp2_Bilinear_12_Any_SSSE3; + ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_SSSE3; } #endif -#if defined(HAS_SCALEROWUP2_LINEAR_12_AVX2) +#if defined(HAS_SCALEROWUP2_BILINEAR_12_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_AVX2; + Scale2RowUp_Bilinear_12 = ScaleRowUp2_Bilinear_12_Any_AVX2; + ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_AVX2; } #endif -#if defined(HAS_SCALEROWUP2_LINEAR_12_NEON) +#if defined(HAS_SCALEROWUP2_BILINEAR_12_NEON) if (TestCpuFlag(kCpuHasNEON)) { - Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_NEON; + Scale2RowUp_Bilinear_12 = ScaleRowUp2_Bilinear_12_Any_NEON; + ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_NEON; } #endif @@ -5896,15 +6138,15 @@ static int I010ToAR30MatrixBilinear(const uint16_t* src_y, uint16_t* temp_v_1 = (uint16_t*)(row) + row_size * 2; uint16_t* temp_v_2 = (uint16_t*)(row) + row_size * 3; - Scale2RowUp(src_u, 0, temp_u_1, row_size, width); - Scale2RowUp(src_v, 0, temp_v_1, row_size, width); + ScaleRowUp2_Linear_12(src_u, temp_u_1, width); + ScaleRowUp2_Linear_12(src_v, temp_v_1, width); I410ToAR30Row(src_y, temp_u_1, temp_v_1, dst_ar30, yuvconstants, width); dst_ar30 += dst_stride_ar30; src_y += src_stride_y; for (y = 0; y < height - 2; y += 2) { - Scale2RowUp(src_u, src_stride_u, temp_u_1, row_size, width); - Scale2RowUp(src_v, src_stride_v, temp_v_1, row_size, width); + Scale2RowUp_Bilinear_12(src_u, src_stride_u, temp_u_1, row_size, width); + Scale2RowUp_Bilinear_12(src_v, src_stride_v, temp_v_1, row_size, width); I410ToAR30Row(src_y, temp_u_1, temp_v_1, dst_ar30, yuvconstants, width); dst_ar30 += dst_stride_ar30; src_y += src_stride_y; @@ -5916,8 +6158,8 @@ static int I010ToAR30MatrixBilinear(const uint16_t* src_y, } if (!(height & 1)) { - Scale2RowUp(src_u, 0, temp_u_1, row_size, width); - Scale2RowUp(src_v, 0, temp_v_1, row_size, width); + ScaleRowUp2_Linear_12(src_u, temp_u_1, width); + ScaleRowUp2_Linear_12(src_v, temp_v_1, width); I410ToAR30Row(src_y, temp_u_1, temp_v_1, dst_ar30, yuvconstants, width); } @@ -5942,8 +6184,9 @@ static int I210ToAR30MatrixLinear(const uint16_t* src_y, const uint16_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I410ToAR30Row_C; - void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr, - int dst_width) = ScaleRowUp2_Linear_16_Any_C; + void (*ScaleRowUp2_Linear_12)(const uint16_t* src_ptr, uint16_t* dst_ptr, + int dst_width) = ScaleRowUp2_Linear_16_Any_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) { return -1; } @@ -5972,17 +6215,17 @@ static int I210ToAR30MatrixLinear(const uint16_t* src_y, #if defined(HAS_SCALEROWUP2_LINEAR_12_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - ScaleRowUp = ScaleRowUp2_Linear_12_Any_SSSE3; + ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_SSSE3; } #endif #if defined(HAS_SCALEROWUP2_LINEAR_12_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - ScaleRowUp = ScaleRowUp2_Linear_12_Any_AVX2; + ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_AVX2; } #endif #if defined(HAS_SCALEROWUP2_LINEAR_12_NEON) if (TestCpuFlag(kCpuHasNEON)) { - ScaleRowUp = ScaleRowUp2_Linear_12_Any_NEON; + ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_NEON; } #endif @@ -5993,8 +6236,8 @@ static int I210ToAR30MatrixLinear(const uint16_t* src_y, uint16_t* temp_v = (uint16_t*)(row) + row_size; for (y = 0; y < height; ++y) { - ScaleRowUp(src_u, temp_u, width); - ScaleRowUp(src_v, temp_v, width); + ScaleRowUp2_Linear_12(src_u, temp_u, width); + ScaleRowUp2_Linear_12(src_v, temp_v, width); I410ToAR30Row(src_y, temp_u, temp_v, dst_ar30, yuvconstants, width); dst_ar30 += dst_stride_ar30; src_y += src_stride_y; @@ -6021,9 +6264,12 @@ static int I010ToARGBMatrixBilinear(const uint16_t* src_y, const uint16_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I410ToARGBRow_C; - void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride, - uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) = - ScaleRowUp2_Bilinear_16_Any_C; + void (*Scale2RowUp_Bilinear_12)( + const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr, + ptrdiff_t dst_stride, int dst_width) = ScaleRowUp2_Bilinear_16_Any_C; + void (*ScaleRowUp2_Linear_12)(const uint16_t* src_ptr, uint16_t* dst_ptr, + int dst_width) = ScaleRowUp2_Linear_16_Any_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -6050,21 +6296,24 @@ static int I010ToARGBMatrixBilinear(const uint16_t* src_y, } #endif -#if defined(HAS_SCALEROWUP2_LINEAR_12_SSSE3) +#if defined(HAS_SCALEROWUP2_BILINEAR_12_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_SSSE3; + Scale2RowUp_Bilinear_12 = ScaleRowUp2_Bilinear_12_Any_SSSE3; + ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_SSSE3; } #endif -#if defined(HAS_SCALEROWUP2_LINEAR_12_AVX2) +#if defined(HAS_SCALEROWUP2_BILINEAR_12_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_AVX2; + Scale2RowUp_Bilinear_12 = ScaleRowUp2_Bilinear_12_Any_AVX2; + ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_AVX2; } #endif -#if defined(HAS_SCALEROWUP2_LINEAR_12_NEON) +#if defined(HAS_SCALEROWUP2_BILINEAR_12_NEON) if (TestCpuFlag(kCpuHasNEON)) { - Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_NEON; + Scale2RowUp_Bilinear_12 = ScaleRowUp2_Bilinear_12_Any_NEON; + ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_NEON; } #endif @@ -6076,15 +6325,15 @@ static int I010ToARGBMatrixBilinear(const uint16_t* src_y, uint16_t* temp_v_1 = (uint16_t*)(row) + row_size * 2; uint16_t* temp_v_2 = (uint16_t*)(row) + row_size * 3; - Scale2RowUp(src_u, 0, temp_u_1, row_size, width); - Scale2RowUp(src_v, 0, temp_v_1, row_size, width); + ScaleRowUp2_Linear_12(src_u, temp_u_1, width); + ScaleRowUp2_Linear_12(src_v, temp_v_1, width); I410ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width); dst_argb += dst_stride_argb; src_y += src_stride_y; for (y = 0; y < height - 2; y += 2) { - Scale2RowUp(src_u, src_stride_u, temp_u_1, row_size, width); - Scale2RowUp(src_v, src_stride_v, temp_v_1, row_size, width); + Scale2RowUp_Bilinear_12(src_u, src_stride_u, temp_u_1, row_size, width); + Scale2RowUp_Bilinear_12(src_v, src_stride_v, temp_v_1, row_size, width); I410ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width); dst_argb += dst_stride_argb; src_y += src_stride_y; @@ -6096,8 +6345,8 @@ static int I010ToARGBMatrixBilinear(const uint16_t* src_y, } if (!(height & 1)) { - Scale2RowUp(src_u, 0, temp_u_1, row_size, width); - Scale2RowUp(src_v, 0, temp_v_1, row_size, width); + ScaleRowUp2_Linear_12(src_u, temp_u_1, width); + ScaleRowUp2_Linear_12(src_v, temp_v_1, width); I410ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width); } @@ -6121,8 +6370,9 @@ static int I210ToARGBMatrixLinear(const uint16_t* src_y, const uint16_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I410ToARGBRow_C; - void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr, - int dst_width) = ScaleRowUp2_Linear_16_Any_C; + void (*ScaleRowUp2_Linear_12)(const uint16_t* src_ptr, uint16_t* dst_ptr, + int dst_width) = ScaleRowUp2_Linear_16_Any_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -6151,17 +6401,17 @@ static int I210ToARGBMatrixLinear(const uint16_t* src_y, #if defined(HAS_SCALEROWUP2_LINEAR_12_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - ScaleRowUp = ScaleRowUp2_Linear_12_Any_SSSE3; + ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_SSSE3; } #endif #if defined(HAS_SCALEROWUP2_LINEAR_12_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - ScaleRowUp = ScaleRowUp2_Linear_12_Any_AVX2; + ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_AVX2; } #endif #if defined(HAS_SCALEROWUP2_LINEAR_12_NEON) if (TestCpuFlag(kCpuHasNEON)) { - ScaleRowUp = ScaleRowUp2_Linear_12_Any_NEON; + ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_NEON; } #endif @@ -6172,8 +6422,8 @@ static int I210ToARGBMatrixLinear(const uint16_t* src_y, uint16_t* temp_v = (uint16_t*)(row) + row_size; for (y = 0; y < height; ++y) { - ScaleRowUp(src_u, temp_u, width); - ScaleRowUp(src_v, temp_v, width); + ScaleRowUp2_Linear_12(src_u, temp_u, width); + ScaleRowUp2_Linear_12(src_v, temp_v, width); I410ToARGBRow(src_y, temp_u, temp_v, dst_argb, yuvconstants, width); dst_argb += dst_stride_argb; src_y += src_stride_y; @@ -6208,9 +6458,12 @@ static int I420AlphaToARGBMatrixBilinear( int width) = I444AlphaToARGBRow_C; void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = ARGBAttenuateRow_C; - void (*Scale2RowUp)(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) = - ScaleRowUp2_Bilinear_Any_C; + void (*Scale2RowUp_Bilinear)(const uint8_t* src_ptr, ptrdiff_t src_stride, + uint8_t* dst_ptr, ptrdiff_t dst_stride, + int dst_width) = ScaleRowUp2_Bilinear_Any_C; + void (*ScaleRowUp2_Linear)(const uint8_t* src_ptr, uint8_t* dst_ptr, + int dst_width) = ScaleRowUp2_Linear_Any_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 || height == 0) { return -1; @@ -6293,27 +6546,32 @@ static int I420AlphaToARGBMatrixBilinear( } } #endif -#if defined(HAS_SCALEROWUP2_LINEAR_SSE2) + +#if defined(HAS_SCALEROWUP2_BILINEAR_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { - Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSE2; + Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_SSE2; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSE2; } #endif -#if defined(HAS_SCALEROWUP2_LINEAR_SSSE3) +#if defined(HAS_SCALEROWUP2_BILINEAR_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSSE3; + Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_SSSE3; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSSE3; } #endif -#if defined(HAS_SCALEROWUP2_LINEAR_AVX2) +#if defined(HAS_SCALEROWUP2_BILINEAR_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - Scale2RowUp = ScaleRowUp2_Bilinear_Any_AVX2; + Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_AVX2; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_AVX2; } #endif -#if defined(HAS_SCALEROWUP2_LINEAR_NEON) +#if defined(HAS_SCALEROWUP2_BILINEAR_NEON) if (TestCpuFlag(kCpuHasNEON)) { - Scale2RowUp = ScaleRowUp2_Bilinear_Any_NEON; + Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_NEON; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_NEON; } #endif @@ -6325,8 +6583,8 @@ static int I420AlphaToARGBMatrixBilinear( uint8_t* temp_v_1 = row + row_size * 2; uint8_t* temp_v_2 = row + row_size * 3; - Scale2RowUp(src_u, 0, temp_u_1, row_size, width); - Scale2RowUp(src_v, 0, temp_v_1, row_size, width); + ScaleRowUp2_Linear(src_u, temp_u_1, width); + ScaleRowUp2_Linear(src_v, temp_v_1, width); I444AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants, width); if (attenuate) { @@ -6337,8 +6595,8 @@ static int I420AlphaToARGBMatrixBilinear( src_a += src_stride_a; for (y = 0; y < height - 2; y += 2) { - Scale2RowUp(src_u, src_stride_u, temp_u_1, row_size, width); - Scale2RowUp(src_v, src_stride_v, temp_v_1, row_size, width); + Scale2RowUp_Bilinear(src_u, src_stride_u, temp_u_1, row_size, width); + Scale2RowUp_Bilinear(src_v, src_stride_v, temp_v_1, row_size, width); I444AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants, width); if (attenuate) { @@ -6360,8 +6618,8 @@ static int I420AlphaToARGBMatrixBilinear( } if (!(height & 1)) { - Scale2RowUp(src_u, 0, temp_u_1, row_size, width); - Scale2RowUp(src_v, 0, temp_v_1, row_size, width); + ScaleRowUp2_Linear(src_u, temp_u_1, width); + ScaleRowUp2_Linear(src_v, temp_v_1, width); I444AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants, width); if (attenuate) { @@ -6395,8 +6653,9 @@ static int I422AlphaToARGBMatrixLinear(const uint8_t* src_y, int width) = I444AlphaToARGBRow_C; void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = ARGBAttenuateRow_C; - void (*ScaleRowUp)(const uint8_t* src_ptr, uint8_t* dst_ptr, int dst_width) = - ScaleRowUp2_Linear_Any_C; + void (*ScaleRowUp2_Linear)(const uint8_t* src_ptr, uint8_t* dst_ptr, + int dst_width) = ScaleRowUp2_Linear_Any_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 || height == 0) { return -1; @@ -6481,22 +6740,22 @@ static int I422AlphaToARGBMatrixLinear(const uint8_t* src_y, #endif #if defined(HAS_SCALEROWUP2_LINEAR_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { - ScaleRowUp = ScaleRowUp2_Linear_Any_SSE2; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSE2; } #endif #if defined(HAS_SCALEROWUP2_LINEAR_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - ScaleRowUp = ScaleRowUp2_Linear_Any_SSSE3; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSSE3; } #endif #if defined(HAS_SCALEROWUP2_LINEAR_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - ScaleRowUp = ScaleRowUp2_Linear_Any_AVX2; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_AVX2; } #endif #if defined(HAS_SCALEROWUP2_LINEAR_NEON) if (TestCpuFlag(kCpuHasNEON)) { - ScaleRowUp = ScaleRowUp2_Linear_Any_NEON; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_NEON; } #endif @@ -6507,8 +6766,8 @@ static int I422AlphaToARGBMatrixLinear(const uint8_t* src_y, uint8_t* temp_v = row + row_size; for (y = 0; y < height; ++y) { - ScaleRowUp(src_u, temp_u, width); - ScaleRowUp(src_v, temp_v, width); + ScaleRowUp2_Linear(src_u, temp_u, width); + ScaleRowUp2_Linear(src_v, temp_v, width); I444AlphaToARGBRow(src_y, temp_u, temp_v, src_a, dst_argb, yuvconstants, width); if (attenuate) { @@ -6548,9 +6807,12 @@ static int I010AlphaToARGBMatrixBilinear( int width) = I410AlphaToARGBRow_C; void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = ARGBAttenuateRow_C; - void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride, - uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) = - ScaleRowUp2_Bilinear_16_Any_C; + void (*Scale2RowUp_Bilinear_12)( + const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr, + ptrdiff_t dst_stride, int dst_width) = ScaleRowUp2_Bilinear_16_Any_C; + void (*ScaleRowUp2_Linear_12)(const uint16_t* src_ptr, uint16_t* dst_ptr, + int dst_width) = ScaleRowUp2_Linear_16_Any_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 || height == 0) { return -1; @@ -6610,21 +6872,24 @@ static int I010AlphaToARGBMatrixBilinear( } #endif -#if defined(HAS_SCALEROWUP2_LINEAR_12_SSSE3) +#if defined(HAS_SCALEROWUP2_BILINEAR_12_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_SSSE3; + Scale2RowUp_Bilinear_12 = ScaleRowUp2_Bilinear_12_Any_SSSE3; + ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_SSSE3; } #endif -#if defined(HAS_SCALEROWUP2_LINEAR_12_AVX2) +#if defined(HAS_SCALEROWUP2_BILINEAR_12_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_AVX2; + Scale2RowUp_Bilinear_12 = ScaleRowUp2_Bilinear_12_Any_AVX2; + ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_AVX2; } #endif -#if defined(HAS_SCALEROWUP2_LINEAR_12_NEON) +#if defined(HAS_SCALEROWUP2_BILINEAR_12_NEON) if (TestCpuFlag(kCpuHasNEON)) { - Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_NEON; + Scale2RowUp_Bilinear_12 = ScaleRowUp2_Bilinear_12_Any_NEON; + ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_NEON; } #endif @@ -6636,8 +6901,8 @@ static int I010AlphaToARGBMatrixBilinear( uint16_t* temp_v_1 = (uint16_t*)(row) + row_size * 2; uint16_t* temp_v_2 = (uint16_t*)(row) + row_size * 3; - Scale2RowUp(src_u, 0, temp_u_1, row_size, width); - Scale2RowUp(src_v, 0, temp_v_1, row_size, width); + ScaleRowUp2_Linear_12(src_u, temp_u_1, width); + ScaleRowUp2_Linear_12(src_v, temp_v_1, width); I410AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants, width); if (attenuate) { @@ -6648,8 +6913,8 @@ static int I010AlphaToARGBMatrixBilinear( src_a += src_stride_a; for (y = 0; y < height - 2; y += 2) { - Scale2RowUp(src_u, src_stride_u, temp_u_1, row_size, width); - Scale2RowUp(src_v, src_stride_v, temp_v_1, row_size, width); + Scale2RowUp_Bilinear_12(src_u, src_stride_u, temp_u_1, row_size, width); + Scale2RowUp_Bilinear_12(src_v, src_stride_v, temp_v_1, row_size, width); I410AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants, width); if (attenuate) { @@ -6671,8 +6936,8 @@ static int I010AlphaToARGBMatrixBilinear( } if (!(height & 1)) { - Scale2RowUp(src_u, 0, temp_u_1, row_size, width); - Scale2RowUp(src_v, 0, temp_v_1, row_size, width); + ScaleRowUp2_Linear_12(src_u, temp_u_1, width); + ScaleRowUp2_Linear_12(src_v, temp_v_1, width); I410AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants, width); if (attenuate) { @@ -6706,8 +6971,9 @@ static int I210AlphaToARGBMatrixLinear(const uint16_t* src_y, int width) = I410AlphaToARGBRow_C; void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = ARGBAttenuateRow_C; - void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr, - int dst_width) = ScaleRowUp2_Linear_16_Any_C; + void (*ScaleRowUp2_Linear)(const uint16_t* src_ptr, uint16_t* dst_ptr, + int dst_width) = ScaleRowUp2_Linear_16_Any_C; + assert(yuvconstants); if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 || height == 0) { return -1; @@ -6769,17 +7035,17 @@ static int I210AlphaToARGBMatrixLinear(const uint16_t* src_y, #if defined(HAS_SCALEROWUP2_LINEAR_12_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - ScaleRowUp = ScaleRowUp2_Linear_12_Any_SSSE3; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_12_Any_SSSE3; } #endif #if defined(HAS_SCALEROWUP2_LINEAR_12_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - ScaleRowUp = ScaleRowUp2_Linear_12_Any_AVX2; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_12_Any_AVX2; } #endif #if defined(HAS_SCALEROWUP2_LINEAR_12_NEON) if (TestCpuFlag(kCpuHasNEON)) { - ScaleRowUp = ScaleRowUp2_Linear_12_Any_NEON; + ScaleRowUp2_Linear = ScaleRowUp2_Linear_12_Any_NEON; } #endif @@ -6790,8 +7056,8 @@ static int I210AlphaToARGBMatrixLinear(const uint16_t* src_y, uint16_t* temp_v = (uint16_t*)(row) + row_size; for (y = 0; y < height; ++y) { - ScaleRowUp(src_u, temp_u, width); - ScaleRowUp(src_v, temp_v, width); + ScaleRowUp2_Linear(src_u, temp_u, width); + ScaleRowUp2_Linear(src_v, temp_v, width); I410AlphaToARGBRow(src_y, temp_u, temp_v, src_a, dst_argb, yuvconstants, width); if (attenuate) { @@ -6820,9 +7086,10 @@ static int P010ToARGBMatrixBilinear(const uint16_t* src_y, void (*P410ToARGBRow)( const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = P410ToARGBRow_C; - void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride, - uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) = - ScaleUVRowUp2_Bilinear_16_Any_C; + void (*Scale2RowUp_Bilinear_16)( + const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr, + ptrdiff_t dst_stride, int dst_width) = ScaleUVRowUp2_Bilinear_16_Any_C; + assert(yuvconstants); if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -6851,19 +7118,19 @@ static int P010ToARGBMatrixBilinear(const uint16_t* src_y, #ifdef HAS_SCALEUVROWUP2_BILINEAR_16_SSE41 if (TestCpuFlag(kCpuHasSSE41)) { - Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_SSE41; + Scale2RowUp_Bilinear_16 = ScaleUVRowUp2_Bilinear_16_Any_SSE41; } #endif #ifdef HAS_SCALEUVROWUP2_BILINEAR_16_AVX2 if (TestCpuFlag(kCpuHasAVX2)) { - Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_AVX2; + Scale2RowUp_Bilinear_16 = ScaleUVRowUp2_Bilinear_16_Any_AVX2; } #endif #ifdef HAS_SCALEUVROWUP2_BILINEAR_16_NEON if (TestCpuFlag(kCpuHasNEON)) { - Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_NEON; + Scale2RowUp_Bilinear_16 = ScaleUVRowUp2_Bilinear_16_Any_NEON; } #endif @@ -6873,13 +7140,13 @@ static int P010ToARGBMatrixBilinear(const uint16_t* src_y, uint16_t* temp_uv_1 = (uint16_t*)(row); uint16_t* temp_uv_2 = (uint16_t*)(row) + row_size; - Scale2RowUp(src_uv, 0, temp_uv_1, row_size, width); + Scale2RowUp_Bilinear_16(src_uv, 0, temp_uv_1, row_size, width); P410ToARGBRow(src_y, temp_uv_1, dst_argb, yuvconstants, width); dst_argb += dst_stride_argb; src_y += src_stride_y; for (y = 0; y < height - 2; y += 2) { - Scale2RowUp(src_uv, src_stride_uv, temp_uv_1, row_size, width); + Scale2RowUp_Bilinear_16(src_uv, src_stride_uv, temp_uv_1, row_size, width); P410ToARGBRow(src_y, temp_uv_1, dst_argb, yuvconstants, width); dst_argb += dst_stride_argb; src_y += src_stride_y; @@ -6890,7 +7157,7 @@ static int P010ToARGBMatrixBilinear(const uint16_t* src_y, } if (!(height & 1)) { - Scale2RowUp(src_uv, 0, temp_uv_1, row_size, width); + Scale2RowUp_Bilinear_16(src_uv, 0, temp_uv_1, row_size, width); P410ToARGBRow(src_y, temp_uv_1, dst_argb, yuvconstants, width); } @@ -6911,8 +7178,9 @@ static int P210ToARGBMatrixLinear(const uint16_t* src_y, void (*P410ToARGBRow)( const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = P410ToARGBRow_C; - void (*ScaleRowUp)(const uint16_t* src_uv, uint16_t* dst_uv, int dst_width) = - ScaleUVRowUp2_Linear_16_Any_C; + void (*ScaleRowUp2_Linear)(const uint16_t* src_uv, uint16_t* dst_uv, + int dst_width) = ScaleUVRowUp2_Linear_16_Any_C; + assert(yuvconstants); if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -6941,19 +7209,19 @@ static int P210ToARGBMatrixLinear(const uint16_t* src_y, #ifdef HAS_SCALEUVROWUP2_LINEAR_16_SSE41 if (TestCpuFlag(kCpuHasSSE41)) { - ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_SSE41; + ScaleRowUp2_Linear = ScaleUVRowUp2_Linear_16_Any_SSE41; } #endif #ifdef HAS_SCALEUVROWUP2_LINEAR_16_AVX2 if (TestCpuFlag(kCpuHasAVX2)) { - ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_AVX2; + ScaleRowUp2_Linear = ScaleUVRowUp2_Linear_16_Any_AVX2; } #endif #ifdef HAS_SCALEUVROWUP2_LINEAR_16_NEON if (TestCpuFlag(kCpuHasNEON)) { - ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_NEON; + ScaleRowUp2_Linear = ScaleUVRowUp2_Linear_16_Any_NEON; } #endif @@ -6962,7 +7230,7 @@ static int P210ToARGBMatrixLinear(const uint16_t* src_y, uint16_t* temp_uv = (uint16_t*)(row); for (y = 0; y < height; ++y) { - ScaleRowUp(src_uv, temp_uv, width); + ScaleRowUp2_Linear(src_uv, temp_uv, width); P410ToARGBRow(src_y, temp_uv, dst_argb, yuvconstants, width); dst_argb += dst_stride_argb; src_y += src_stride_y; @@ -6986,9 +7254,10 @@ static int P010ToAR30MatrixBilinear(const uint16_t* src_y, void (*P410ToAR30Row)( const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = P410ToAR30Row_C; - void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride, - uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) = - ScaleUVRowUp2_Bilinear_16_Any_C; + void (*Scale2RowUp_Bilinear_16)( + const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr, + ptrdiff_t dst_stride, int dst_width) = ScaleUVRowUp2_Bilinear_16_Any_C; + assert(yuvconstants); if (!src_y || !src_uv || !dst_ar30 || width <= 0 || height == 0) { return -1; } @@ -7017,19 +7286,19 @@ static int P010ToAR30MatrixBilinear(const uint16_t* src_y, #ifdef HAS_SCALEUVROWUP2_BILINEAR_16_SSE41 if (TestCpuFlag(kCpuHasSSE41)) { - Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_SSE41; + Scale2RowUp_Bilinear_16 = ScaleUVRowUp2_Bilinear_16_Any_SSE41; } #endif #ifdef HAS_SCALEUVROWUP2_BILINEAR_16_AVX2 if (TestCpuFlag(kCpuHasAVX2)) { - Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_AVX2; + Scale2RowUp_Bilinear_16 = ScaleUVRowUp2_Bilinear_16_Any_AVX2; } #endif #ifdef HAS_SCALEUVROWUP2_BILINEAR_16_NEON if (TestCpuFlag(kCpuHasNEON)) { - Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_NEON; + Scale2RowUp_Bilinear_16 = ScaleUVRowUp2_Bilinear_16_Any_NEON; } #endif @@ -7039,13 +7308,13 @@ static int P010ToAR30MatrixBilinear(const uint16_t* src_y, uint16_t* temp_uv_1 = (uint16_t*)(row); uint16_t* temp_uv_2 = (uint16_t*)(row) + row_size; - Scale2RowUp(src_uv, 0, temp_uv_1, row_size, width); + Scale2RowUp_Bilinear_16(src_uv, 0, temp_uv_1, row_size, width); P410ToAR30Row(src_y, temp_uv_1, dst_ar30, yuvconstants, width); dst_ar30 += dst_stride_ar30; src_y += src_stride_y; for (y = 0; y < height - 2; y += 2) { - Scale2RowUp(src_uv, src_stride_uv, temp_uv_1, row_size, width); + Scale2RowUp_Bilinear_16(src_uv, src_stride_uv, temp_uv_1, row_size, width); P410ToAR30Row(src_y, temp_uv_1, dst_ar30, yuvconstants, width); dst_ar30 += dst_stride_ar30; src_y += src_stride_y; @@ -7056,7 +7325,7 @@ static int P010ToAR30MatrixBilinear(const uint16_t* src_y, } if (!(height & 1)) { - Scale2RowUp(src_uv, 0, temp_uv_1, row_size, width); + Scale2RowUp_Bilinear_16(src_uv, 0, temp_uv_1, row_size, width); P410ToAR30Row(src_y, temp_uv_1, dst_ar30, yuvconstants, width); } @@ -7077,8 +7346,9 @@ static int P210ToAR30MatrixLinear(const uint16_t* src_y, void (*P410ToAR30Row)( const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = P410ToAR30Row_C; - void (*ScaleRowUp)(const uint16_t* src_uv, uint16_t* dst_uv, int dst_width) = - ScaleUVRowUp2_Linear_16_Any_C; + void (*ScaleRowUp2_Linear)(const uint16_t* src_uv, uint16_t* dst_uv, + int dst_width) = ScaleUVRowUp2_Linear_16_Any_C; + assert(yuvconstants); if (!src_y || !src_uv || !dst_ar30 || width <= 0 || height == 0) { return -1; } @@ -7107,19 +7377,19 @@ static int P210ToAR30MatrixLinear(const uint16_t* src_y, #ifdef HAS_SCALEUVROWUP2_LINEAR_16_SSE41 if (TestCpuFlag(kCpuHasSSE41)) { - ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_SSE41; + ScaleRowUp2_Linear = ScaleUVRowUp2_Linear_16_Any_SSE41; } #endif #ifdef HAS_SCALEUVROWUP2_LINEAR_16_AVX2 if (TestCpuFlag(kCpuHasAVX2)) { - ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_AVX2; + ScaleRowUp2_Linear = ScaleUVRowUp2_Linear_16_Any_AVX2; } #endif #ifdef HAS_SCALEUVROWUP2_LINEAR_16_NEON if (TestCpuFlag(kCpuHasNEON)) { - ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_NEON; + ScaleRowUp2_Linear = ScaleUVRowUp2_Linear_16_Any_NEON; } #endif @@ -7128,7 +7398,7 @@ static int P210ToAR30MatrixLinear(const uint16_t* src_y, uint16_t* temp_uv = (uint16_t*)(row); for (y = 0; y < height; ++y) { - ScaleRowUp(src_uv, temp_uv, width); + ScaleRowUp2_Linear(src_uv, temp_uv, width); P410ToAR30Row(src_y, temp_uv, dst_ar30, yuvconstants, width); dst_ar30 += dst_stride_ar30; src_y += src_stride_y; @@ -7139,6 +7409,128 @@ static int P210ToAR30MatrixLinear(const uint16_t* src_y, return 0; } +static int I422ToRGB24MatrixLinear(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + const struct YuvConstants* yuvconstants, + int width, + int height) { + int y; + void (*I444ToRGB24Row)(const uint8_t* y_buf, const uint8_t* u_buf, + const uint8_t* v_buf, uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, int width) = + I444ToRGB24Row_C; + void (*ScaleRowUp2_Linear)(const uint8_t* src_ptr, uint8_t* dst_ptr, + int dst_width) = ScaleRowUp2_Linear_Any_C; + assert(yuvconstants); + if (!src_y || !src_u || !src_v || !dst_rgb24 || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_rgb24 = dst_rgb24 + (height - 1) * dst_stride_rgb24; + dst_stride_rgb24 = -dst_stride_rgb24; + } +#if defined(HAS_I444TORGB24ROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + I444ToRGB24Row = I444ToRGB24Row_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + I444ToRGB24Row = I444ToRGB24Row_SSSE3; + } + } +#endif +#if defined(HAS_I444TORGB24ROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + I444ToRGB24Row = I444ToRGB24Row_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + I444ToRGB24Row = I444ToRGB24Row_AVX2; + } + } +#endif +#if defined(HAS_I444TORGB24ROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + I444ToRGB24Row = I444ToRGB24Row_Any_NEON; + if (IS_ALIGNED(width, 8)) { + I444ToRGB24Row = I444ToRGB24Row_NEON; + } + } +#endif +#if defined(HAS_SCALEROWUP2_LINEAR_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSE2; + } +#endif +#if defined(HAS_SCALEROWUP2_LINEAR_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSSE3; + } +#endif +#if defined(HAS_SCALEROWUP2_LINEAR_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_AVX2; + } +#endif +#if defined(HAS_SCALEROWUP2_LINEAR_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_NEON; + } +#endif + + // alloc 2 lines temp + const int row_size = (width + 31) & ~31; + align_buffer_64(row, row_size * 2); + uint8_t* temp_u = row; + uint8_t* temp_v = row + row_size; + + for (y = 0; y < height; ++y) { + ScaleRowUp2_Linear(src_u, temp_u, width); + ScaleRowUp2_Linear(src_v, temp_v, width); + I444ToRGB24Row(src_y, temp_u, temp_v, dst_rgb24, yuvconstants, width); + dst_rgb24 += dst_stride_rgb24; + src_y += src_stride_y; + src_u += src_stride_u; + src_v += src_stride_v; + } + + free_aligned_buffer_64(row); + return 0; +} + +LIBYUV_API +int I422ToRGB24MatrixFilter(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + const struct YuvConstants* yuvconstants, + int width, + int height, + enum FilterMode filter) { + switch (filter) { + case kFilterNone: + return I422ToRGB24Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_rgb24, dst_stride_rgb24, + yuvconstants, width, height); + case kFilterBilinear: + case kFilterBox: + case kFilterLinear: + return I422ToRGB24MatrixLinear( + src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, + dst_rgb24, dst_stride_rgb24, yuvconstants, width, height); + } + + return -1; +} + LIBYUV_API int I420ToARGBMatrixFilter(const uint8_t* src_y, int src_stride_y, @@ -7217,14 +7609,12 @@ int I420ToRGB24MatrixFilter(const uint8_t* src_y, return I420ToRGB24Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, dst_rgb24, dst_stride_rgb24, yuvconstants, width, height); + case kFilterLinear: // TODO(fb): Implement Linear using Bilinear stride 0 case kFilterBilinear: case kFilterBox: return I420ToRGB24MatrixBilinear( src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, dst_rgb24, dst_stride_rgb24, yuvconstants, width, height); - case kFilterLinear: - // TODO: Implement Linear using Bilinear with Scale2RowUp stride 0 - return -1; } return -1; @@ -7248,13 +7638,12 @@ int I010ToAR30MatrixFilter(const uint16_t* src_y, return I010ToAR30Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, dst_ar30, dst_stride_ar30, yuvconstants, width, height); + case kFilterLinear: // TODO(fb): Implement Linear using Bilinear stride 0 case kFilterBilinear: case kFilterBox: return I010ToAR30MatrixBilinear( src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, dst_ar30, dst_stride_ar30, yuvconstants, width, height); - case kFilterLinear: - return -1; } return -1; @@ -7307,13 +7696,12 @@ int I010ToARGBMatrixFilter(const uint16_t* src_y, return I010ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, dst_argb, dst_stride_argb, yuvconstants, width, height); + case kFilterLinear: // TODO(fb): Implement Linear using Bilinear stride 0 case kFilterBilinear: case kFilterBox: return I010ToARGBMatrixBilinear( src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, dst_argb, dst_stride_argb, yuvconstants, width, height); - case kFilterLinear: - return -1; } return -1; @@ -7370,14 +7758,13 @@ int I420AlphaToARGBMatrixFilter(const uint8_t* src_y, src_v, src_stride_v, src_a, src_stride_a, dst_argb, dst_stride_argb, yuvconstants, width, height, attenuate); + case kFilterLinear: // TODO(fb): Implement Linear using Bilinear stride 0 case kFilterBilinear: case kFilterBox: return I420AlphaToARGBMatrixBilinear( src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, src_a, src_stride_a, dst_argb, dst_stride_argb, yuvconstants, width, height, attenuate); - case kFilterLinear: - return -1; } return -1; @@ -7439,14 +7826,13 @@ int I010AlphaToARGBMatrixFilter(const uint16_t* src_y, src_v, src_stride_v, src_a, src_stride_a, dst_argb, dst_stride_argb, yuvconstants, width, height, attenuate); + case kFilterLinear: // TODO(fb): Implement Linear using Bilinear stride 0 case kFilterBilinear: case kFilterBox: return I010AlphaToARGBMatrixBilinear( src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, src_a, src_stride_a, dst_argb, dst_stride_argb, yuvconstants, width, height, attenuate); - case kFilterLinear: - return -1; } return -1; @@ -7486,6 +7872,8 @@ int I210AlphaToARGBMatrixFilter(const uint16_t* src_y, return -1; } +// TODO(fb): Verify this function works correctly. P010 is like NV12 but 10 bit +// UV is biplanar. LIBYUV_API int P010ToARGBMatrixFilter(const uint16_t* src_y, int src_stride_y, @@ -7502,13 +7890,12 @@ int P010ToARGBMatrixFilter(const uint16_t* src_y, return P010ToARGBMatrix(src_y, src_stride_y, src_uv, src_stride_uv, dst_argb, dst_stride_argb, yuvconstants, width, height); + case kFilterLinear: // TODO(fb): Implement Linear using Bilinear stride 0 case kFilterBilinear: case kFilterBox: return P010ToARGBMatrixBilinear(src_y, src_stride_y, src_uv, src_stride_uv, dst_argb, dst_stride_argb, yuvconstants, width, height); - case kFilterLinear: - return -1; } return -1; @@ -7557,13 +7944,12 @@ int P010ToAR30MatrixFilter(const uint16_t* src_y, return P010ToAR30Matrix(src_y, src_stride_y, src_uv, src_stride_uv, dst_ar30, dst_stride_ar30, yuvconstants, width, height); + case kFilterLinear: // TODO(fb): Implement Linear using Bilinear stride 0 case kFilterBilinear: case kFilterBox: return P010ToAR30MatrixBilinear(src_y, src_stride_y, src_uv, src_stride_uv, dst_ar30, dst_stride_ar30, yuvconstants, width, height); - case kFilterLinear: - return -1; } return -1; diff --git a/source/row_any.cc b/source/row_any.cc index bd46ba1b5..d6149ee52 100644 --- a/source/row_any.cc +++ b/source/row_any.cc @@ -359,6 +359,9 @@ ANY31C(I422ToAR30Row_Any_AVX2, I422ToAR30Row_AVX2, 1, 0, 4, 15) #ifdef HAS_I444TOARGBROW_SSSE3 ANY31C(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_SSSE3, 0, 0, 4, 7) #endif +#ifdef HAS_I444TORGB24ROW_SSSE3 +ANY31C(I444ToRGB24Row_Any_SSSE3, I444ToRGB24Row_SSSE3, 0, 0, 3, 15) +#endif #ifdef HAS_I422TORGB24ROW_AVX2 ANY31C(I422ToRGB24Row_Any_AVX2, I422ToRGB24Row_AVX2, 1, 0, 3, 31) #endif @@ -374,6 +377,9 @@ ANY31C(I422ToRGBARow_Any_AVX2, I422ToRGBARow_AVX2, 1, 0, 4, 15) #ifdef HAS_I444TOARGBROW_AVX2 ANY31C(I444ToARGBRow_Any_AVX2, I444ToARGBRow_AVX2, 0, 0, 4, 15) #endif +#ifdef HAS_I444TORGB24ROW_AVX2 +ANY31C(I444ToRGB24Row_Any_AVX2, I444ToRGB24Row_AVX2, 0, 0, 3, 31) +#endif #ifdef HAS_I422TOARGB4444ROW_AVX2 ANY31C(I422ToARGB4444Row_Any_AVX2, I422ToARGB4444Row_AVX2, 1, 0, 2, 15) #endif @@ -383,6 +389,9 @@ ANY31C(I422ToARGB1555Row_Any_AVX2, I422ToARGB1555Row_AVX2, 1, 0, 2, 15) #ifdef HAS_I422TORGB565ROW_AVX2 ANY31C(I422ToRGB565Row_Any_AVX2, I422ToRGB565Row_AVX2, 1, 0, 2, 15) #endif +#ifdef HAS_I444TORGB24ROW_NEON +ANY31C(I444ToRGB24Row_Any_NEON, I444ToRGB24Row_NEON, 0, 0, 3, 7) +#endif #ifdef HAS_I422TOARGBROW_NEON ANY31C(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, 0, 0, 4, 7) ANY31C(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, 1, 0, 4, 7) diff --git a/source/row_common.cc b/source/row_common.cc index 4e1141f70..2531c85b8 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -1863,6 +1863,23 @@ void I444ToARGBRow_C(const uint8_t* src_y, } } +void I444ToRGB24Row_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width) { + int x; + for (x = 0; x < width; ++x) { + YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, + rgb_buf + 2, yuvconstants); + src_y += 1; + src_u += 1; + src_v += 1; + rgb_buf += 3; // Advance 1 pixel. + } +} + // Also used for 420 void I422ToARGBRow_C(const uint8_t* src_y, const uint8_t* src_u, @@ -4061,6 +4078,32 @@ void I422ToRGB24Row_AVX2(const uint8_t* src_y, } #endif +#if defined(HAS_I444TORGB24ROW_AVX2) +void I444ToRGB24Row_AVX2(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb24, + const struct YuvConstants* yuvconstants, + int width) { + // Row buffer for intermediate ARGB pixels. + SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); + while (width > 0) { + int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; + I444ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth); +#if defined(HAS_ARGBTORGB24ROW_AVX2) + ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth); +#else + ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth); +#endif + src_y += twidth; + src_u += twidth; + src_v += twidth; + dst_rgb24 += twidth * 3; + width -= twidth; + } +} +#endif + #if defined(HAS_NV12TORGB565ROW_AVX2) void NV12ToRGB565Row_AVX2(const uint8_t* src_y, const uint8_t* src_uv, diff --git a/source/row_gcc.cc b/source/row_gcc.cc index 3bda44821..af92e3f02 100644 --- a/source/row_gcc.cc +++ b/source/row_gcc.cc @@ -2582,6 +2582,20 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba, "movdqu %%xmm0,0x10(%[dst_rgba]) \n" \ "lea 0x20(%[dst_rgba]),%[dst_rgba] \n" +// Store 8 RGB24 values. +#define STORERGB24 \ + "punpcklbw %%xmm1,%%xmm0 \n" \ + "punpcklbw %%xmm2,%%xmm2 \n" \ + "movdqa %%xmm0,%%xmm1 \n" \ + "punpcklwd %%xmm2,%%xmm0 \n" \ + "punpckhwd %%xmm2,%%xmm1 \n" \ + "pshufb %%xmm5,%%xmm0 \n" \ + "pshufb %%xmm6,%%xmm1 \n" \ + "palignr $0xc,%%xmm0,%%xmm1 \n" \ + "movq %%xmm0,(%[dst_rgb24]) \n" \ + "movdqu %%xmm1,0x8(%[dst_rgb24]) \n" \ + "lea 0x18(%[dst_rgb24]),%[dst_rgb24] \n" + // Store 8 AR30 values. #define STOREAR30 \ "psraw $0x4,%%xmm0 \n" \ @@ -2691,17 +2705,43 @@ void OMITFP I422ToRGB24Row_SSSE3(const uint8_t* y_buf, "1: \n" READYUV422 YUVTORGB(yuvconstants) - "punpcklbw %%xmm1,%%xmm0 \n" - "punpcklbw %%xmm2,%%xmm2 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklwd %%xmm2,%%xmm0 \n" - "punpckhwd %%xmm2,%%xmm1 \n" - "pshufb %%xmm5,%%xmm0 \n" - "pshufb %%xmm6,%%xmm1 \n" - "palignr $0xc,%%xmm0,%%xmm1 \n" - "movq %%xmm0,(%[dst_rgb24]) \n" - "movdqu %%xmm1,0x8(%[dst_rgb24]) \n" - "lea 0x18(%[dst_rgb24]),%[dst_rgb24] \n" + STORERGB24 + "subl $0x8,%[width] \n" + "jg 1b \n" + : [y_buf]"+r"(y_buf), // %[y_buf] + [u_buf]"+r"(u_buf), // %[u_buf] + [v_buf]"+r"(v_buf), // %[v_buf] + [dst_rgb24]"+r"(dst_rgb24), // %[dst_rgb24] +#if defined(__i386__) + [width]"+m"(width) // %[width] +#else + [width]"+rm"(width) // %[width] +#endif + : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] + [kShuffleMaskARGBToRGB24_0]"m"(kShuffleMaskARGBToRGB24_0), + [kShuffleMaskARGBToRGB24]"m"(kShuffleMaskARGBToRGB24) + : "memory", "cc", YUVTORGB_REGS + "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" + ); +} + +void OMITFP I444ToRGB24Row_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_rgb24, + const struct YuvConstants* yuvconstants, + int width) { + asm volatile ( + YUVTORGB_SETUP(yuvconstants) + "movdqa %[kShuffleMaskARGBToRGB24_0],%%xmm5 \n" + "movdqa %[kShuffleMaskARGBToRGB24],%%xmm6 \n" + "sub %[u_buf],%[v_buf] \n" + + LABELALIGN + "1: \n" + READYUV444 + YUVTORGB(yuvconstants) + STORERGB24 "subl $0x8,%[width] \n" "jg 1b \n" : [y_buf]"+r"(y_buf), // %[y_buf] diff --git a/source/row_neon.cc b/source/row_neon.cc index 3f5c5de1b..36185e8c9 100644 --- a/source/row_neon.cc +++ b/source/row_neon.cc @@ -156,6 +156,29 @@ void I444ToARGBRow_NEON(const uint8_t* src_y, : "cc", "memory", YUVTORGB_REGS, "d6"); } +void I444ToRGB24Row_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb24, + const struct YuvConstants* yuvconstants, + int width) { + asm volatile( + YUVTORGB_SETUP + "1: \n" READYUV444 YUVTORGB + RGBTORGB8 + "subs %[width], %[width], #8 \n" + "vst3.8 {d0, d2, d4}, [%[dst_rgb24]]! \n" + "bgt 1b \n" + : [src_y] "+r"(src_y), // %[src_y] + [src_u] "+r"(src_u), // %[src_u] + [src_v] "+r"(src_v), // %[src_v] + [dst_rgb24] "+r"(dst_rgb24), // %[dst_argb] + [width] "+r"(width) // %[width] + : [kUVCoeff] "r"(&yuvconstants->kUVCoeff), // %[kUVCoeff] + [kRGBCoeffBias] "r"(&yuvconstants->kRGBCoeffBias) // %[kRGBCoeffBias] + : "cc", "memory", YUVTORGB_REGS); +} + void I422ToARGBRow_NEON(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, diff --git a/source/row_neon64.cc b/source/row_neon64.cc index 37962378e..3cbd9b79b 100644 --- a/source/row_neon64.cc +++ b/source/row_neon64.cc @@ -142,6 +142,29 @@ void I444ToARGBRow_NEON(const uint8_t* src_y, : "cc", "memory", YUVTORGB_REGS, "v19"); } +void I444ToRGB24Row_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb24, + const struct YuvConstants* yuvconstants, + int width) { + asm volatile( + YUVTORGB_SETUP + "1: \n" READYUV444 YUVTORGB + RGBTORGB8 + "subs %w[width], %w[width], #8 \n" + "st3 {v16.8b,v17.8b,v18.8b}, [%[dst_rgb24]], #24 \n" + "b.gt 1b \n" + : [src_y] "+r"(src_y), // %[src_y] + [src_u] "+r"(src_u), // %[src_u] + [src_v] "+r"(src_v), // %[src_v] + [dst_rgb24] "+r"(dst_rgb24), // %[dst_rgb24] + [width] "+r"(width) // %[width] + : [kUVCoeff] "r"(&yuvconstants->kUVCoeff), // %[kUVCoeff] + [kRGBCoeffBias] "r"(&yuvconstants->kRGBCoeffBias) // %[kRGBCoeffBias] + : "cc", "memory", YUVTORGB_REGS); +} + void I422ToARGBRow_NEON(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, diff --git a/source/row_win.cc b/source/row_win.cc index c7c1ff60d..c5a14f86f 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -2789,6 +2789,44 @@ __declspec(naked) void I422ToRGB24Row_SSSE3( } } +// 8 pixels. +// 8 UV values, mixed with 8 Y producing 8 RGB24 (24 bytes). +__declspec(naked) void I444ToRGB24Row_SSSE3( + const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_rgb24, + const struct YuvConstants* yuvconstants, + int width) { + __asm { + push esi + push edi + push ebx + mov eax, [esp + 12 + 4] // Y + mov esi, [esp + 12 + 8] // U + mov edi, [esp + 12 + 12] // V + mov edx, [esp + 12 + 16] // argb + mov ebx, [esp + 12 + 20] // yuvconstants + mov ecx, [esp + 12 + 24] // width + sub edi, esi + movdqa xmm5, xmmword ptr kShuffleMaskARGBToRGB24_0 + movdqa xmm6, xmmword ptr kShuffleMaskARGBToRGB24 + + convertloop: + READYUV444 + YUVTORGB(ebx) + STORERGB24 + + sub ecx, 8 + jg convertloop + + pop ebx + pop edi + pop esi + ret + } +} + // 8 pixels // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RGB565 (16 bytes). __declspec(naked) void I422ToRGB565Row_SSSE3( diff --git a/unit_test/convert_test.cc b/unit_test/convert_test.cc index bb06e4edd..47eff2ece 100644 --- a/unit_test/convert_test.cc +++ b/unit_test/convert_test.cc @@ -683,6 +683,9 @@ TESTBIPLANARTOP(MM21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 16, 32) #define I420ToRGB24Filter(a, b, c, d, e, f, g, h, i, j) \ I420ToRGB24MatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \ kFilterBilinear) +#define I422ToRGB24Filter(a, b, c, d, e, f, g, h, i, j) \ + I420ToRGB24MatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \ + kFilterBilinear) #define ALIGNINT(V, ALIGN) (((V) + (ALIGN)-1) / (ALIGN) * (ALIGN)) @@ -795,8 +798,12 @@ TESTPLANARTOB(V422, 2, 1, ARGB, 4, 4, 1) TESTPLANARTOB(V422, 2, 1, ABGR, 4, 4, 1) TESTPLANARTOB(I422, 2, 1, BGRA, 4, 4, 1) TESTPLANARTOB(I422, 2, 1, RGBA, 4, 4, 1) +TESTPLANARTOB(I422, 1, 1, RGB24, 3, 3, 1) +TESTPLANARTOB(I422, 1, 1, RAW, 3, 3, 1) TESTPLANARTOB(I444, 1, 1, ARGB, 4, 4, 1) TESTPLANARTOB(I444, 1, 1, ABGR, 4, 4, 1) +TESTPLANARTOB(I444, 1, 1, RGB24, 3, 3, 1) +TESTPLANARTOB(I444, 1, 1, RAW, 3, 3, 1) TESTPLANARTOB(J444, 1, 1, ARGB, 4, 4, 1) TESTPLANARTOB(J444, 1, 1, ABGR, 4, 4, 1) TESTPLANARTOB(H444, 1, 1, ARGB, 4, 4, 1) @@ -820,6 +827,7 @@ TESTPLANARTOB(H420, 2, 2, AB30, 4, 4, 1) TESTPLANARTOB(I420, 2, 2, ARGBFilter, 4, 4, 1) TESTPLANARTOB(I422, 2, 1, ARGBFilter, 4, 4, 1) TESTPLANARTOB(I420, 2, 2, RGB24Filter, 3, 3, 1) +TESTPLANARTOB(I422, 2, 2, RGB24Filter, 3, 3, 1) #else TESTPLANARTOB(I420, 2, 2, ABGR, 4, 4, 1) TESTPLANARTOB(I420, 2, 2, ARGB, 4, 4, 1) @@ -844,6 +852,7 @@ TESTPLANARTOB(I422, 2, 1, UYVY, 2, 4, 1) TESTPLANARTOB(I422, 2, 1, YUY2, 2, 4, 1) TESTPLANARTOB(I420, 2, 2, ARGBFilter, 4, 4, 1) TESTPLANARTOB(I422, 2, 1, ARGBFilter, 4, 4, 1) +TESTPLANARTOB(I420, 2, 2, RGB24Filter, 3, 3, 1) TESTPLANARTOB(I444, 1, 1, ABGR, 4, 4, 1) TESTPLANARTOB(I444, 1, 1, ARGB, 4, 4, 1) #endif