From ebb27d69160b3768187bb8372c01dcbf5e61785a Mon Sep 17 00:00:00 2001 From: Yuan Tong Date: Wed, 9 Mar 2022 18:19:48 +0800 Subject: [PATCH] Add YUV to RGB conversion function with filter parameter Add the following functions: I420ToARGBMatrixFilter I422ToARGBMatrixFilter I010ToAR30MatrixFilter I210ToAR30MatrixFilter I010ToARGBMatrixFilter I210ToARGBMatrixFilter I420AlphaToARGBMatrixFilter I422AlphaToARGBMatrixFilter I010AlphaToARGBMatrixFilter I210AlphaToARGBMatrixFilter Bug: libyuv:872 Change-Id: Ib33b09fd7d304688c5e06c55e0a576a964665a51 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/3430334 Reviewed-by: Frank Barchard Commit-Queue: Frank Barchard --- include/libyuv/convert_argb.h | 215 ++++ source/convert_argb.cc | 1971 +++++++++++++++++++++++++++++++++ source/scale_any.cc | 2 +- unit_test/convert_test.cc | 60 + 4 files changed, 2247 insertions(+), 1 deletion(-) diff --git a/include/libyuv/convert_argb.h b/include/libyuv/convert_argb.h index a655e5b90..4f5f2ef88 100644 --- a/include/libyuv/convert_argb.h +++ b/include/libyuv/convert_argb.h @@ -14,6 +14,7 @@ #include "libyuv/basic_types.h" #include "libyuv/rotate.h" // For enum RotationMode. +#include "libyuv/scale.h" // For enum FilterMode. #ifdef __cplusplus namespace libyuv { @@ -1930,6 +1931,220 @@ int I400ToARGBMatrix(const uint8_t* src_y, int width, int height); +// Convert I420 to ARGB with matrix and UV filter mode. +LIBYUV_API +int I420ToARGBMatrixFilter(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height, + FilterMode filter); + +// Convert I422 to ARGB with matrix and UV filter mode. +LIBYUV_API +int I422ToARGBMatrixFilter(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height, + FilterMode filter); + +// Convert I010 to AR30 with matrix and UV filter mode. +LIBYUV_API +int I010ToAR30MatrixFilter(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_ar30, + int dst_stride_ar30, + const struct YuvConstants* yuvconstants, + int width, + int height, + FilterMode filter); + +// Convert I210 to AR30 with matrix and UV filter mode. +LIBYUV_API +int I210ToAR30MatrixFilter(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_ar30, + int dst_stride_ar30, + const struct YuvConstants* yuvconstants, + int width, + int height, + FilterMode filter); + +// Convert I010 to ARGB with matrix and UV filter mode. +LIBYUV_API +int I010ToARGBMatrixFilter(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height, + FilterMode filter); + +// Convert I210 to ARGB with matrix and UV filter mode. +LIBYUV_API +int I210ToARGBMatrixFilter(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height, + FilterMode filter); + +// Convert I420 with Alpha to attenuated ARGB with matrix and UV filter mode. +LIBYUV_API +int I420AlphaToARGBMatrixFilter(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + const uint8_t* src_a, + int src_stride_a, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height, + int attenuate, + FilterMode filter); + +// Convert I422 with Alpha to attenuated ARGB with matrix and UV filter mode. +LIBYUV_API +int I422AlphaToARGBMatrixFilter(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + const uint8_t* src_a, + int src_stride_a, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height, + int attenuate, + FilterMode filter); + +// Convert I010 with Alpha to attenuated ARGB with matrix and UV filter mode. +LIBYUV_API +int I010AlphaToARGBMatrixFilter(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + const uint16_t* src_a, + int src_stride_a, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height, + int attenuate, + FilterMode filter); + +// Convert I210 with Alpha to attenuated ARGB with matrix and UV filter mode. +LIBYUV_API +int I210AlphaToARGBMatrixFilter(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + const uint16_t* src_a, + int src_stride_a, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height, + int attenuate, + FilterMode filter); + +// Convert P010 to ARGB with matrix and UV filter mode. +LIBYUV_API +int P010ToARGBMatrixFilter(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_uv, + int src_stride_uv, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height, + FilterMode filter); + +// Convert P210 to ARGB with matrix and UV filter mode. +LIBYUV_API +int P210ToARGBMatrixFilter(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_uv, + int src_stride_uv, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height, + FilterMode filter) ; + +// Convert P010 to AR30 with matrix and UV filter mode. +LIBYUV_API +int P010ToAR30MatrixFilter(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_uv, + int src_stride_uv, + uint8_t* dst_ar30, + int dst_stride_ar30, + const struct YuvConstants* yuvconstants, + int width, + int height, + FilterMode filter); + +// Convert P210 to AR30 with matrix and UV filter mode. +LIBYUV_API +int P210ToAR30MatrixFilter(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_uv, + int src_stride_uv, + uint8_t* dst_ar30, + int dst_stride_ar30, + const struct YuvConstants* yuvconstants, + int width, + int height, + FilterMode filter); + // Convert camera sample to ARGB with cropping, rotation and vertical flip. // "sample_size" is needed to parse MJPG. // "dst_stride_argb" number of bytes in a row of the dst_argb plane. diff --git a/source/convert_argb.cc b/source/convert_argb.cc index 90f6c9472..93b2f7831 100644 --- a/source/convert_argb.cc +++ b/source/convert_argb.cc @@ -16,6 +16,7 @@ #include "libyuv/planar_functions.h" // For CopyPlane and ARGBShuffle. #include "libyuv/rotate_argb.h" #include "libyuv/row.h" +#include "libyuv/scale_row.h" // For ScaleRowUp2_Linear and ScaleRowUp2_Bilinear #include "libyuv/video_common.h" #ifdef __cplusplus @@ -5378,6 +5379,1976 @@ int H420ToAB30(const uint8_t* src_y, &kYvuH709Constants, width, height); } +static int I420ToARGBMatrixBilinear(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height) { + int y; + void (*I444ToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf, + const uint8_t* v_buf, uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, int width) = + I444ToARGBRow_C; + void (*Scale2RowUp)(const uint8_t* src_ptr, ptrdiff_t src_stride, + uint8_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) = + ScaleRowUp2_Bilinear_Any_C; + if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; + dst_stride_argb = -dst_stride_argb; + } +#if defined(HAS_I444TOARGBROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + I444ToARGBRow = I444ToARGBRow_Any_SSSE3; + if (IS_ALIGNED(width, 8)) { + I444ToARGBRow = I444ToARGBRow_SSSE3; + } + } +#endif +#if defined(HAS_I444TOARGBROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + I444ToARGBRow = I444ToARGBRow_Any_AVX2; + if (IS_ALIGNED(width, 16)) { + I444ToARGBRow = I444ToARGBRow_AVX2; + } + } +#endif +#if defined(HAS_I444TOARGBROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + I444ToARGBRow = I444ToARGBRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + I444ToARGBRow = I444ToARGBRow_NEON; + } + } +#endif +#if defined(HAS_I444TOARGBROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + I444ToARGBRow = I444ToARGBRow_Any_MSA; + if (IS_ALIGNED(width, 8)) { + I444ToARGBRow = I444ToARGBRow_MSA; + } + } +#endif +#if defined(HAS_I444TOARGBROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + I444ToARGBRow = I444ToARGBRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + I444ToARGBRow = I444ToARGBRow_LASX; + } + } +#endif + +#if defined(HAS_SCALEROWUP2LINEAR_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSE2; + } +#endif + +#if defined(HAS_SCALEROWUP2LINEAR_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSSE3; + } +#endif + +#if defined(HAS_SCALEROWUP2LINEAR_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + Scale2RowUp = ScaleRowUp2_Bilinear_Any_AVX2; + } +#endif + +#if defined(HAS_SCALEROWUP2LINEAR_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + Scale2RowUp = ScaleRowUp2_Bilinear_Any_NEON; + } +#endif + + // alloc 4 lines temp + const int kRowSize = (width + 31) & ~31; + align_buffer_64(row, kRowSize * 4); + uint8_t* temp_u_1 = row; + uint8_t* temp_u_2 = row + kRowSize; + uint8_t* temp_v_1 = row + kRowSize * 2; + uint8_t* temp_v_2 = row + kRowSize * 3; + + Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width); + Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width); + I444ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width); + dst_argb += dst_stride_argb; + src_y += src_stride_y; + + for (y = 0; y < height - 2; y += 2) { + Scale2RowUp(src_u, src_stride_u, temp_u_1, kRowSize, width); + Scale2RowUp(src_v, src_stride_v, temp_v_1, kRowSize, width); + I444ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width); + dst_argb += dst_stride_argb; + src_y += src_stride_y; + I444ToARGBRow(src_y, temp_u_2, temp_v_2, dst_argb, yuvconstants, width); + dst_argb += dst_stride_argb; + src_y += src_stride_y; + src_u += src_stride_u; + src_v += src_stride_v; + } + + if (!(height & 1)) { + Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width); + Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width); + I444ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width); + } + + free_aligned_buffer_64(row); + return 0; +} + +static int I422ToARGBMatrixLinear(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height) { + int y; + void (*I444ToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf, + const uint8_t* v_buf, uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, int width) = + I444ToARGBRow_C; + void (*ScaleRowUp)(const uint8_t* src_ptr, uint8_t* dst_ptr, int dst_width) = + ScaleRowUp2_Linear_Any_C; + if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; + dst_stride_argb = -dst_stride_argb; + } +#if defined(HAS_I444TOARGBROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + I444ToARGBRow = I444ToARGBRow_Any_SSSE3; + if (IS_ALIGNED(width, 8)) { + I444ToARGBRow = I444ToARGBRow_SSSE3; + } + } +#endif +#if defined(HAS_I444TOARGBROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + I444ToARGBRow = I444ToARGBRow_Any_AVX2; + if (IS_ALIGNED(width, 16)) { + I444ToARGBRow = I444ToARGBRow_AVX2; + } + } +#endif +#if defined(HAS_I444TOARGBROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + I444ToARGBRow = I444ToARGBRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + I444ToARGBRow = I444ToARGBRow_NEON; + } + } +#endif +#if defined(HAS_I444TOARGBROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + I444ToARGBRow = I444ToARGBRow_Any_MSA; + if (IS_ALIGNED(width, 8)) { + I444ToARGBRow = I444ToARGBRow_MSA; + } + } +#endif +#if defined(HAS_I444TOARGBROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + I444ToARGBRow = I444ToARGBRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + I444ToARGBRow = I444ToARGBRow_LASX; + } + } +#endif +#if defined(HAS_SCALEROWUP2LINEAR_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + ScaleRowUp = ScaleRowUp2_Linear_Any_SSE2; + } +#endif +#if defined(HAS_SCALEROWUP2LINEAR_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ScaleRowUp = ScaleRowUp2_Linear_Any_SSSE3; + } +#endif +#if defined(HAS_SCALEROWUP2LINEAR_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ScaleRowUp = ScaleRowUp2_Linear_Any_AVX2; + } +#endif +#if defined(HAS_SCALEROWUP2LINEAR_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ScaleRowUp = ScaleRowUp2_Linear_Any_NEON; + } +#endif + + // alloc 2 lines temp + const int kRowSize = (width + 31) & ~31; + align_buffer_64(row, kRowSize * 2); + uint8_t* temp_u = row; + uint8_t* temp_v = row + kRowSize; + + for (y = 0; y < height; ++y) { + ScaleRowUp(src_u, temp_u, width); + ScaleRowUp(src_v, temp_v, width); + I444ToARGBRow(src_y, temp_u, temp_v, dst_argb, yuvconstants, width); + dst_argb += dst_stride_argb; + src_y += src_stride_y; + src_u += src_stride_u; + src_v += src_stride_v; + } + + free_aligned_buffer_64(row); + return 0; +} + +static int I010ToAR30MatrixBilinear(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_ar30, + int dst_stride_ar30, + const struct YuvConstants* yuvconstants, + int width, + int height) { + int y; + void (*I410ToAR30Row)(const uint16_t* y_buf, const uint16_t* u_buf, + const uint16_t* v_buf, uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, int width) = + I410ToAR30Row_C; + void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride, + uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) = + ScaleRowUp2_Bilinear_16_Any_C; + if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_ar30 = dst_ar30 + (height - 1) * dst_stride_ar30; + dst_stride_ar30 = -dst_stride_ar30; + } +#if defined(HAS_I410TOAR30ROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + I410ToAR30Row = I410ToAR30Row_Any_SSSE3; + if (IS_ALIGNED(width, 8)) { + I410ToAR30Row = I410ToAR30Row_SSSE3; + } + } +#endif +#if defined(HAS_I410TOAR30ROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + I410ToAR30Row = I410ToAR30Row_Any_AVX2; + if (IS_ALIGNED(width, 16)) { + I410ToAR30Row = I410ToAR30Row_AVX2; + } + } +#endif + +#if defined(HAS_SCALEROWUP2LINEAR_12_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_SSSE3; + } +#endif + +#if defined(HAS_SCALEROWUP2LINEAR_12_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_AVX2; + } +#endif + +#if defined(HAS_SCALEROWUP2LINEAR_12_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_NEON; + } +#endif + + // alloc 4 lines temp + const int kRowSize = (width + 31) & ~31; + align_buffer_64(row, kRowSize * 4 * sizeof(uint16_t)); + uint16_t* temp_u_1 = (uint16_t*)(row); + uint16_t* temp_u_2 = (uint16_t*)(row) + kRowSize; + uint16_t* temp_v_1 = (uint16_t*)(row) + kRowSize * 2; + uint16_t* temp_v_2 = (uint16_t*)(row) + kRowSize * 3; + + Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width); + Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width); + I410ToAR30Row(src_y, temp_u_1, temp_v_1, dst_ar30, yuvconstants, width); + dst_ar30 += dst_stride_ar30; + src_y += src_stride_y; + + for (y = 0; y < height - 2; y += 2) { + Scale2RowUp(src_u, src_stride_u, temp_u_1, kRowSize, width); + Scale2RowUp(src_v, src_stride_v, temp_v_1, kRowSize, width); + I410ToAR30Row(src_y, temp_u_1, temp_v_1, dst_ar30, yuvconstants, width); + dst_ar30 += dst_stride_ar30; + src_y += src_stride_y; + I410ToAR30Row(src_y, temp_u_2, temp_v_2, dst_ar30, yuvconstants, width); + dst_ar30 += dst_stride_ar30; + src_y += src_stride_y; + src_u += src_stride_u; + src_v += src_stride_v; + } + + if (!(height & 1)) { + Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width); + Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width); + I410ToAR30Row(src_y, temp_u_1, temp_v_1, dst_ar30, yuvconstants, width); + } + + free_aligned_buffer_64(row); + + return 0; +} + +static int I210ToAR30MatrixLinear(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_ar30, + int dst_stride_ar30, + const struct YuvConstants* yuvconstants, + int width, + int height) { + int y; + void (*I410ToAR30Row)(const uint16_t* y_buf, const uint16_t* u_buf, + const uint16_t* v_buf, uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, int width) = + I410ToAR30Row_C; + void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr, + int dst_width) = ScaleRowUp2_Linear_16_Any_C; + if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_ar30 = dst_ar30 + (height - 1) * dst_stride_ar30; + dst_stride_ar30 = -dst_stride_ar30; + } +#if defined(HAS_I410TOAR30ROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + I410ToAR30Row = I410ToAR30Row_Any_SSSE3; + if (IS_ALIGNED(width, 8)) { + I410ToAR30Row = I410ToAR30Row_SSSE3; + } + } +#endif +#if defined(HAS_I410TOAR30ROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + I410ToAR30Row = I410ToAR30Row_Any_AVX2; + if (IS_ALIGNED(width, 16)) { + I410ToAR30Row = I410ToAR30Row_AVX2; + } + } +#endif + +#if defined(HAS_SCALEROWUP2LINEAR_12_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ScaleRowUp = ScaleRowUp2_Linear_12_Any_SSSE3; + } +#endif +#if defined(HAS_SCALEROWUP2LINEAR_12_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ScaleRowUp = ScaleRowUp2_Linear_12_Any_AVX2; + } +#endif +#if defined(HAS_SCALEROWUP2LINEAR_12_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ScaleRowUp = ScaleRowUp2_Linear_12_Any_NEON; + } +#endif + + // alloc 2 lines temp + const int kRowSize = (width + 31) & ~31; + align_buffer_64(row, kRowSize * 2 * sizeof(uint16_t)); + uint16_t* temp_u = (uint16_t*)(row); + uint16_t* temp_v = (uint16_t*)(row) + kRowSize; + + for (y = 0; y < height; ++y) { + ScaleRowUp(src_u, temp_u, width); + ScaleRowUp(src_v, temp_v, width); + I410ToAR30Row(src_y, temp_u, temp_v, dst_ar30, yuvconstants, width); + dst_ar30 += dst_stride_ar30; + src_y += src_stride_y; + src_u += src_stride_u; + src_v += src_stride_v; + } + free_aligned_buffer_64(row); + return 0; +} + +static int I010ToARGBMatrixBilinear(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height) { + int y; + void (*I410ToARGBRow)(const uint16_t* y_buf, const uint16_t* u_buf, + const uint16_t* v_buf, uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, int width) = + I410ToARGBRow_C; + void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride, + uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) = + ScaleRowUp2_Bilinear_16_Any_C; + if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; + dst_stride_argb = -dst_stride_argb; + } +#if defined(HAS_I410TOARGBROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + I410ToARGBRow = I410ToARGBRow_Any_SSSE3; + if (IS_ALIGNED(width, 8)) { + I410ToARGBRow = I410ToARGBRow_SSSE3; + } + } +#endif +#if defined(HAS_I410TOARGBROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + I410ToARGBRow = I410ToARGBRow_Any_AVX2; + if (IS_ALIGNED(width, 16)) { + I410ToARGBRow = I410ToARGBRow_AVX2; + } + } +#endif + +#if defined(HAS_SCALEROWUP2LINEAR_12_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_SSSE3; + } +#endif + +#if defined(HAS_SCALEROWUP2LINEAR_12_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_AVX2; + } +#endif + +#if defined(HAS_SCALEROWUP2LINEAR_12_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_NEON; + } +#endif + + // alloc 4 lines temp + const int kRowSize = (width + 31) & ~31; + align_buffer_64(row, kRowSize * 4 * sizeof(uint16_t)); + uint16_t* temp_u_1 = (uint16_t*)(row); + uint16_t* temp_u_2 = (uint16_t*)(row) + kRowSize; + uint16_t* temp_v_1 = (uint16_t*)(row) + kRowSize * 2; + uint16_t* temp_v_2 = (uint16_t*)(row) + kRowSize * 3; + + Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width); + Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width); + I410ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width); + dst_argb += dst_stride_argb; + src_y += src_stride_y; + + for (y = 0; y < height - 2; y += 2) { + Scale2RowUp(src_u, src_stride_u, temp_u_1, kRowSize, width); + Scale2RowUp(src_v, src_stride_v, temp_v_1, kRowSize, width); + I410ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width); + dst_argb += dst_stride_argb; + src_y += src_stride_y; + I410ToARGBRow(src_y, temp_u_2, temp_v_2, dst_argb, yuvconstants, width); + dst_argb += dst_stride_argb; + src_y += src_stride_y; + src_u += src_stride_u; + src_v += src_stride_v; + } + + if (!(height & 1)) { + Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width); + Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width); + I410ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width); + } + + free_aligned_buffer_64(row); + return 0; +} + +static int I210ToARGBMatrixLinear(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height) { + int y; + void (*I410ToARGBRow)(const uint16_t* y_buf, const uint16_t* u_buf, + const uint16_t* v_buf, uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, int width) = + I410ToARGBRow_C; + void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr, + int dst_width) = ScaleRowUp2_Linear_16_Any_C; + if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; + dst_stride_argb = -dst_stride_argb; + } +#if defined(HAS_I410TOARGBROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + I410ToARGBRow = I410ToARGBRow_Any_SSSE3; + if (IS_ALIGNED(width, 8)) { + I410ToARGBRow = I410ToARGBRow_SSSE3; + } + } +#endif +#if defined(HAS_I410TOARGBROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + I410ToARGBRow = I410ToARGBRow_Any_AVX2; + if (IS_ALIGNED(width, 16)) { + I410ToARGBRow = I410ToARGBRow_AVX2; + } + } +#endif + +#if defined(HAS_SCALEROWUP2LINEAR_12_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ScaleRowUp = ScaleRowUp2_Linear_12_Any_SSSE3; + } +#endif +#if defined(HAS_SCALEROWUP2LINEAR_12_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ScaleRowUp = ScaleRowUp2_Linear_12_Any_AVX2; + } +#endif +#if defined(HAS_SCALEROWUP2LINEAR_12_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ScaleRowUp = ScaleRowUp2_Linear_12_Any_NEON; + } +#endif + + // alloc 2 lines temp + const int kRowSize = (width + 31) & ~31; + align_buffer_64(row, kRowSize * 2 * sizeof(uint16_t)); + uint16_t* temp_u = (uint16_t*)(row); + uint16_t* temp_v = (uint16_t*)(row) + kRowSize; + + for (y = 0; y < height; ++y) { + ScaleRowUp(src_u, temp_u, width); + ScaleRowUp(src_v, temp_v, width); + I410ToARGBRow(src_y, temp_u, temp_v, dst_argb, yuvconstants, width); + dst_argb += dst_stride_argb; + src_y += src_stride_y; + src_u += src_stride_u; + src_v += src_stride_v; + } + + free_aligned_buffer_64(row); + return 0; +} + +static int I420AlphaToARGBMatrixBilinear( + const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + const uint8_t* src_a, + int src_stride_a, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height, + int attenuate) { + int y; + void (*I444AlphaToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf, + const uint8_t* v_buf, const uint8_t* a_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width) = I444AlphaToARGBRow_C; + void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb, + int width) = ARGBAttenuateRow_C; + void (*Scale2RowUp)(const uint8_t* src_ptr, ptrdiff_t src_stride, + uint8_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) = + ScaleRowUp2_Bilinear_Any_C; + if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; + dst_stride_argb = -dst_stride_argb; + } +#if defined(HAS_I444ALPHATOARGBROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + I444AlphaToARGBRow = I444AlphaToARGBRow_Any_SSSE3; + if (IS_ALIGNED(width, 8)) { + I444AlphaToARGBRow = I444AlphaToARGBRow_SSSE3; + } + } +#endif +#if defined(HAS_I444ALPHATOARGBROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + I444AlphaToARGBRow = I444AlphaToARGBRow_Any_AVX2; + if (IS_ALIGNED(width, 16)) { + I444AlphaToARGBRow = I444AlphaToARGBRow_AVX2; + } + } +#endif +#if defined(HAS_I444ALPHATOARGBROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + I444AlphaToARGBRow = I444AlphaToARGBRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + I444AlphaToARGBRow = I444AlphaToARGBRow_NEON; + } + } +#endif +#if defined(HAS_I444ALPHATOARGBROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + I444AlphaToARGBRow = I444AlphaToARGBRow_Any_MSA; + if (IS_ALIGNED(width, 8)) { + I444AlphaToARGBRow = I444AlphaToARGBRow_MSA; + } + } +#endif +#if defined(HAS_I444ALPHATOARGBROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + I444AlphaToARGBRow = I444AlphaToARGBRow_Any_LASX; + if (IS_ALIGNED(width, 16)) { + I444AlphaToARGBRow = I444AlphaToARGBRow_LASX; + } + } +#endif +#if defined(HAS_ARGBATTENUATEROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3; + if (IS_ALIGNED(width, 4)) { + ARGBAttenuateRow = ARGBAttenuateRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBATTENUATEROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBAttenuateRow = ARGBAttenuateRow_Any_AVX2; + if (IS_ALIGNED(width, 8)) { + ARGBAttenuateRow = ARGBAttenuateRow_AVX2; + } + } +#endif +#if defined(HAS_ARGBATTENUATEROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBAttenuateRow = ARGBAttenuateRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + ARGBAttenuateRow = ARGBAttenuateRow_NEON; + } + } +#endif +#if defined(HAS_ARGBATTENUATEROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGBAttenuateRow = ARGBAttenuateRow_Any_MSA; + if (IS_ALIGNED(width, 8)) { + ARGBAttenuateRow = ARGBAttenuateRow_MSA; + } + } +#endif +#if defined(HAS_SCALEROWUP2LINEAR_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSE2; + } +#endif + +#if defined(HAS_SCALEROWUP2LINEAR_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSSE3; + } +#endif + +#if defined(HAS_SCALEROWUP2LINEAR_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + Scale2RowUp = ScaleRowUp2_Bilinear_Any_AVX2; + } +#endif + +#if defined(HAS_SCALEROWUP2LINEAR_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + Scale2RowUp = ScaleRowUp2_Bilinear_Any_NEON; + } +#endif + + // alloc 4 lines temp + const int kRowSize = (width + 31) & ~31; + align_buffer_64(row, kRowSize * 4); + uint8_t* temp_u_1 = row; + uint8_t* temp_u_2 = row + kRowSize; + uint8_t* temp_v_1 = row + kRowSize * 2; + uint8_t* temp_v_2 = row + kRowSize * 3; + + Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width); + Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width); + I444AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants, + width); + if (attenuate) { + ARGBAttenuateRow(dst_argb, dst_argb, width); + } + dst_argb += dst_stride_argb; + src_y += src_stride_y; + src_a += src_stride_a; + + for (y = 0; y < height - 2; y += 2) { + Scale2RowUp(src_u, src_stride_u, temp_u_1, kRowSize, width); + Scale2RowUp(src_v, src_stride_v, temp_v_1, kRowSize, width); + I444AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants, + width); + if (attenuate) { + ARGBAttenuateRow(dst_argb, dst_argb, width); + } + dst_argb += dst_stride_argb; + src_y += src_stride_y; + src_a += src_stride_a; + I444AlphaToARGBRow(src_y, temp_u_2, temp_v_2, src_a, dst_argb, yuvconstants, + width); + if (attenuate) { + ARGBAttenuateRow(dst_argb, dst_argb, width); + } + dst_argb += dst_stride_argb; + src_y += src_stride_y; + src_u += src_stride_u; + src_v += src_stride_v; + src_a += src_stride_a; + } + + if (!(height & 1)) { + Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width); + Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width); + I444AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants, + width); + if (attenuate) { + ARGBAttenuateRow(dst_argb, dst_argb, width); + } + } + + free_aligned_buffer_64(row); + return 0; +} + +static int I422AlphaToARGBMatrixLinear(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + const uint8_t* src_a, + int src_stride_a, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height, + int attenuate) { + int y; + void (*I444AlphaToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf, + const uint8_t* v_buf, const uint8_t* a_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width) = I444AlphaToARGBRow_C; + void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb, + int width) = ARGBAttenuateRow_C; + void (*ScaleRowUp)(const uint8_t* src_ptr, uint8_t* dst_ptr, int dst_width) = + ScaleRowUp2_Linear_Any_C; + if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; + dst_stride_argb = -dst_stride_argb; + } +#if defined(HAS_I444ALPHATOARGBROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + I444AlphaToARGBRow = I444AlphaToARGBRow_Any_SSSE3; + if (IS_ALIGNED(width, 8)) { + I444AlphaToARGBRow = I444AlphaToARGBRow_SSSE3; + } + } +#endif +#if defined(HAS_I444ALPHATOARGBROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + I444AlphaToARGBRow = I444AlphaToARGBRow_Any_AVX2; + if (IS_ALIGNED(width, 16)) { + I444AlphaToARGBRow = I444AlphaToARGBRow_AVX2; + } + } +#endif +#if defined(HAS_I444ALPHATOARGBROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + I444AlphaToARGBRow = I444AlphaToARGBRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + I444AlphaToARGBRow = I444AlphaToARGBRow_NEON; + } + } +#endif +#if defined(HAS_I444ALPHATOARGBROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + I444AlphaToARGBRow = I444AlphaToARGBRow_Any_MSA; + if (IS_ALIGNED(width, 8)) { + I444AlphaToARGBRow = I444AlphaToARGBRow_MSA; + } + } +#endif +#if defined(HAS_I444ALPHATOARGBROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + I444AlphaToARGBRow = I444AlphaToARGBRow_Any_LASX; + if (IS_ALIGNED(width, 16)) { + I444AlphaToARGBRow = I444AlphaToARGBRow_LASX; + } + } +#endif +#if defined(HAS_ARGBATTENUATEROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3; + if (IS_ALIGNED(width, 4)) { + ARGBAttenuateRow = ARGBAttenuateRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBATTENUATEROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBAttenuateRow = ARGBAttenuateRow_Any_AVX2; + if (IS_ALIGNED(width, 8)) { + ARGBAttenuateRow = ARGBAttenuateRow_AVX2; + } + } +#endif +#if defined(HAS_ARGBATTENUATEROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBAttenuateRow = ARGBAttenuateRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + ARGBAttenuateRow = ARGBAttenuateRow_NEON; + } + } +#endif +#if defined(HAS_ARGBATTENUATEROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGBAttenuateRow = ARGBAttenuateRow_Any_MSA; + if (IS_ALIGNED(width, 8)) { + ARGBAttenuateRow = ARGBAttenuateRow_MSA; + } + } +#endif +#if defined(HAS_SCALEROWUP2LINEAR_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + ScaleRowUp = ScaleRowUp2_Linear_Any_SSE2; + } +#endif +#if defined(HAS_SCALEROWUP2LINEAR_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ScaleRowUp = ScaleRowUp2_Linear_Any_SSSE3; + } +#endif +#if defined(HAS_SCALEROWUP2LINEAR_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ScaleRowUp = ScaleRowUp2_Linear_Any_AVX2; + } +#endif +#if defined(HAS_SCALEROWUP2LINEAR_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ScaleRowUp = ScaleRowUp2_Linear_Any_NEON; + } +#endif + + // alloc 2 lines temp + const int kRowSize = (width + 31) & ~31; + align_buffer_64(row, kRowSize * 2); + uint8_t* temp_u = row; + uint8_t* temp_v = row + kRowSize; + + for (y = 0; y < height; ++y) { + ScaleRowUp(src_u, temp_u, width); + ScaleRowUp(src_v, temp_v, width); + I444AlphaToARGBRow(src_y, temp_u, temp_v, src_a, dst_argb, yuvconstants, + width); + if (attenuate) { + ARGBAttenuateRow(dst_argb, dst_argb, width); + } + dst_argb += dst_stride_argb; + src_a += src_stride_a; + src_y += src_stride_y; + src_u += src_stride_u; + src_v += src_stride_v; + } + + free_aligned_buffer_64(row); + return 0; +} + +static int I010AlphaToARGBMatrixBilinear( + const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + const uint16_t* src_a, + int src_stride_a, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height, + int attenuate) { + int y; + void (*I410AlphaToARGBRow)(const uint16_t* y_buf, const uint16_t* u_buf, + const uint16_t* v_buf, const uint16_t* a_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width) = I410AlphaToARGBRow_C; + void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb, + int width) = ARGBAttenuateRow_C; + void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride, + uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) = + ScaleRowUp2_Bilinear_16_Any_C; + if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; + dst_stride_argb = -dst_stride_argb; + } +#if defined(HAS_I410ALPHATOARGBROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + I410AlphaToARGBRow = I410AlphaToARGBRow_Any_SSSE3; + if (IS_ALIGNED(width, 8)) { + I410AlphaToARGBRow = I410AlphaToARGBRow_SSSE3; + } + } +#endif +#if defined(HAS_I410ALPHATOARGBROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + I410AlphaToARGBRow = I410AlphaToARGBRow_Any_AVX2; + if (IS_ALIGNED(width, 16)) { + I410AlphaToARGBRow = I410AlphaToARGBRow_AVX2; + } + } +#endif +#if defined(HAS_ARGBATTENUATEROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3; + if (IS_ALIGNED(width, 4)) { + ARGBAttenuateRow = ARGBAttenuateRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBATTENUATEROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBAttenuateRow = ARGBAttenuateRow_Any_AVX2; + if (IS_ALIGNED(width, 8)) { + ARGBAttenuateRow = ARGBAttenuateRow_AVX2; + } + } +#endif +#if defined(HAS_ARGBATTENUATEROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBAttenuateRow = ARGBAttenuateRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + ARGBAttenuateRow = ARGBAttenuateRow_NEON; + } + } +#endif +#if defined(HAS_ARGBATTENUATEROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGBAttenuateRow = ARGBAttenuateRow_Any_MSA; + if (IS_ALIGNED(width, 8)) { + ARGBAttenuateRow = ARGBAttenuateRow_MSA; + } + } +#endif + +#if defined(HAS_SCALEROWUP2LINEAR_12_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_SSSE3; + } +#endif + +#if defined(HAS_SCALEROWUP2LINEAR_12_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_AVX2; + } +#endif + +#if defined(HAS_SCALEROWUP2LINEAR_12_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_NEON; + } +#endif + + // alloc 4 lines temp + const int kRowSize = (width + 31) & ~31; + align_buffer_64(row, kRowSize * 4 * sizeof(uint16_t)); + uint16_t* temp_u_1 = (uint16_t*)(row); + uint16_t* temp_u_2 = (uint16_t*)(row) + kRowSize; + uint16_t* temp_v_1 = (uint16_t*)(row) + kRowSize * 2; + uint16_t* temp_v_2 = (uint16_t*)(row) + kRowSize * 3; + + Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width); + Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width); + I410AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants, + width); + if (attenuate) { + ARGBAttenuateRow(dst_argb, dst_argb, width); + } + dst_argb += dst_stride_argb; + src_y += src_stride_y; + + for (y = 0; y < height - 2; y += 2) { + Scale2RowUp(src_u, src_stride_u, temp_u_1, kRowSize, width); + Scale2RowUp(src_v, src_stride_v, temp_v_1, kRowSize, width); + I410AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants, + width); + if (attenuate) { + ARGBAttenuateRow(dst_argb, dst_argb, width); + } + dst_argb += dst_stride_argb; + src_y += src_stride_y; + src_a += src_stride_a; + I410AlphaToARGBRow(src_y, temp_u_2, temp_v_2, src_a, dst_argb, yuvconstants, + width); + if (attenuate) { + ARGBAttenuateRow(dst_argb, dst_argb, width); + } + dst_argb += dst_stride_argb; + src_y += src_stride_y; + src_a += src_stride_a; + src_u += src_stride_u; + src_v += src_stride_v; + } + + if (!(height & 1)) { + Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width); + Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width); + I410AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants, + width); + } + + free_aligned_buffer_64(row); + return 0; +} + +static int I210AlphaToARGBMatrixLinear(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + const uint16_t* src_a, + int src_stride_a, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height, + int attenuate) { + int y; + void (*I410AlphaToARGBRow)(const uint16_t* y_buf, const uint16_t* u_buf, + const uint16_t* v_buf, const uint16_t* a_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width) = I410AlphaToARGBRow_C; + void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb, + int width) = ARGBAttenuateRow_C; + void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr, + int dst_width) = ScaleRowUp2_Linear_16_Any_C; + if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; + dst_stride_argb = -dst_stride_argb; + } +#if defined(HAS_I410ALPHATOARGBROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + I410AlphaToARGBRow = I410AlphaToARGBRow_Any_SSSE3; + if (IS_ALIGNED(width, 8)) { + I410AlphaToARGBRow = I410AlphaToARGBRow_SSSE3; + } + } +#endif +#if defined(HAS_I410ALPHATOARGBROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + I410AlphaToARGBRow = I410AlphaToARGBRow_Any_AVX2; + if (IS_ALIGNED(width, 16)) { + I410AlphaToARGBRow = I410AlphaToARGBRow_AVX2; + } + } +#endif +#if defined(HAS_ARGBATTENUATEROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3; + if (IS_ALIGNED(width, 4)) { + ARGBAttenuateRow = ARGBAttenuateRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBATTENUATEROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBAttenuateRow = ARGBAttenuateRow_Any_AVX2; + if (IS_ALIGNED(width, 8)) { + ARGBAttenuateRow = ARGBAttenuateRow_AVX2; + } + } +#endif +#if defined(HAS_ARGBATTENUATEROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBAttenuateRow = ARGBAttenuateRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + ARGBAttenuateRow = ARGBAttenuateRow_NEON; + } + } +#endif +#if defined(HAS_ARGBATTENUATEROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGBAttenuateRow = ARGBAttenuateRow_Any_MSA; + if (IS_ALIGNED(width, 8)) { + ARGBAttenuateRow = ARGBAttenuateRow_MSA; + } + } +#endif + +#if defined(HAS_SCALEROWUP2LINEAR_12_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ScaleRowUp = ScaleRowUp2_Linear_12_Any_SSSE3; + } +#endif +#if defined(HAS_SCALEROWUP2LINEAR_12_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ScaleRowUp = ScaleRowUp2_Linear_12_Any_AVX2; + } +#endif +#if defined(HAS_SCALEROWUP2LINEAR_12_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ScaleRowUp = ScaleRowUp2_Linear_12_Any_NEON; + } +#endif + + // alloc 2 lines temp + const int kRowSize = (width + 31) & ~31; + align_buffer_64(row, kRowSize * 2 * sizeof(uint16_t)); + uint16_t* temp_u = (uint16_t*)(row); + uint16_t* temp_v = (uint16_t*)(row) + kRowSize; + + for (y = 0; y < height; ++y) { + ScaleRowUp(src_u, temp_u, width); + ScaleRowUp(src_v, temp_v, width); + I410AlphaToARGBRow(src_y, temp_u, temp_v, src_a, dst_argb, yuvconstants, + width); + if (attenuate) { + ARGBAttenuateRow(dst_argb, dst_argb, width); + } + dst_argb += dst_stride_argb; + src_a += src_stride_a; + src_y += src_stride_y; + src_u += src_stride_u; + src_v += src_stride_v; + } + free_aligned_buffer_64(row); + return 0; +} + +static int P010ToARGBMatrixBilinear(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_uv, + int src_stride_uv, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height) { + int y; + void (*P410ToARGBRow)( + const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, int width) = P410ToARGBRow_C; + void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride, + uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) = + ScaleUVRowUp2_Bilinear_16_Any_C; + if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; + dst_stride_argb = -dst_stride_argb; + } +#if defined(HAS_P410TOARGBROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + P410ToARGBRow = P410ToARGBRow_Any_SSSE3; + if (IS_ALIGNED(width, 8)) { + P410ToARGBRow = P410ToARGBRow_SSSE3; + } + } +#endif +#if defined(HAS_P410TOARGBROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + P410ToARGBRow = P410ToARGBRow_Any_AVX2; + if (IS_ALIGNED(width, 16)) { + P410ToARGBRow = P410ToARGBRow_AVX2; + } + } +#endif + +#ifdef HAS_SCALEUVROWUP2BILINEAR_16_SSE2 + if (TestCpuFlag(kCpuHasSSE2)) { + Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_SSE2; + } +#endif + +#ifdef HAS_SCALEUVROWUP2BILINEAR_16_AVX2 + if (TestCpuFlag(kCpuHasAVX2)) { + Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_AVX2; + } +#endif + +#ifdef HAS_SCALEUVROWUP2BILINEAR_16_NEON + if (TestCpuFlag(kCpuHasNEON)) { + Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_NEON; + } +#endif + + // alloc 2 lines temp + const int kRowSize = (2 * width + 31) & ~31; + align_buffer_64(row, kRowSize * 2 * sizeof(uint16_t)); + uint16_t* temp_uv_1 = (uint16_t*)(row); + uint16_t* temp_uv_2 = (uint16_t*)(row) + kRowSize; + + Scale2RowUp(src_uv, 0, temp_uv_1, kRowSize, width); + P410ToARGBRow(src_y, temp_uv_1, dst_argb, yuvconstants, width); + dst_argb += dst_stride_argb; + src_y += src_stride_y; + + for (y = 0; y < height - 2; y += 2) { + Scale2RowUp(src_uv, src_stride_uv, temp_uv_1, kRowSize, width); + P410ToARGBRow(src_y, temp_uv_1, dst_argb, yuvconstants, width); + dst_argb += dst_stride_argb; + src_y += src_stride_y; + P410ToARGBRow(src_y, temp_uv_2, dst_argb, yuvconstants, width); + dst_argb += dst_stride_argb; + src_y += src_stride_y; + src_uv += src_stride_uv; + } + + if (!(height & 1)) { + Scale2RowUp(src_uv, 0, temp_uv_1, kRowSize, width); + P410ToARGBRow(src_y, temp_uv_1, dst_argb, yuvconstants, width); + } + + free_aligned_buffer_64(row); + return 0; +} + +static int P210ToARGBMatrixLinear(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_uv, + int src_stride_uv, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height) { + int y; + void (*P410ToARGBRow)( + const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, int width) = P410ToARGBRow_C; + void (*ScaleRowUp)(const uint16_t* src_uv, uint16_t* dst_uv, int dst_width) = + ScaleUVRowUp2_Linear_16_Any_C; + if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; + dst_stride_argb = -dst_stride_argb; + } +#if defined(HAS_P410TOARGBROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + P410ToARGBRow = P410ToARGBRow_Any_SSSE3; + if (IS_ALIGNED(width, 8)) { + P410ToARGBRow = P410ToARGBRow_SSSE3; + } + } +#endif +#if defined(HAS_P410TOARGBROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + P410ToARGBRow = P410ToARGBRow_Any_AVX2; + if (IS_ALIGNED(width, 16)) { + P410ToARGBRow = P410ToARGBRow_AVX2; + } + } +#endif + +#ifdef HAS_SCALEUVROWUP2LINEAR_16_SSE2 + if (TestCpuFlag(kCpuHasSSE2)) { + ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_SSE2; + } +#endif + +#ifdef HAS_SCALEUVROWUP2LINEAR_16_AVX2 + if (TestCpuFlag(kCpuHasAVX2)) { + ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_AVX2; + } +#endif + +#ifdef HAS_SCALEUVROWUP2LINEAR_16_NEON + if (TestCpuFlag(kCpuHasNEON)) { + ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_NEON; + } +#endif + + const int kRowSize = (2 * width + 31) & ~31; + align_buffer_64(row, kRowSize * sizeof(uint16_t)); + uint16_t* temp_uv = (uint16_t*)(row); + + for (y = 0; y < height; ++y) { + ScaleRowUp(src_uv, temp_uv, width); + P410ToARGBRow(src_y, temp_uv, dst_argb, yuvconstants, width); + dst_argb += dst_stride_argb; + src_y += src_stride_y; + src_uv += src_stride_uv; + } + + free_aligned_buffer_64(row); + return 0; +} + +static int P010ToAR30MatrixBilinear(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_uv, + int src_stride_uv, + uint8_t* dst_ar30, + int dst_stride_ar30, + const struct YuvConstants* yuvconstants, + int width, + int height) { + int y; + void (*P410ToAR30Row)( + const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, int width) = P410ToAR30Row_C; + void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride, + uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) = + ScaleUVRowUp2_Bilinear_16_Any_C; + if (!src_y || !src_uv || !dst_ar30 || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_ar30 = dst_ar30 + (height - 1) * dst_stride_ar30; + dst_stride_ar30 = -dst_stride_ar30; + } +#if defined(HAS_P410TOAR30ROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + P410ToAR30Row = P410ToAR30Row_Any_SSSE3; + if (IS_ALIGNED(width, 8)) { + P410ToAR30Row = P410ToAR30Row_SSSE3; + } + } +#endif +#if defined(HAS_P410TOAR30ROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + P410ToAR30Row = P410ToAR30Row_Any_AVX2; + if (IS_ALIGNED(width, 16)) { + P410ToAR30Row = P410ToAR30Row_AVX2; + } + } +#endif + +#ifdef HAS_SCALEUVROWUP2BILINEAR_16_SSE2 + if (TestCpuFlag(kCpuHasSSE2)) { + Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_SSE2; + } +#endif + +#ifdef HAS_SCALEUVROWUP2BILINEAR_16_AVX2 + if (TestCpuFlag(kCpuHasAVX2)) { + Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_AVX2; + } +#endif + +#ifdef HAS_SCALEUVROWUP2BILINEAR_16_NEON + if (TestCpuFlag(kCpuHasNEON)) { + Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_NEON; + } +#endif + + // alloc 2 lines temp + const int kRowSize = (2 * width + 31) & ~31; + align_buffer_64(row, kRowSize * 2 * sizeof(uint16_t)); + uint16_t* temp_uv_1 = (uint16_t*)(row); + uint16_t* temp_uv_2 = (uint16_t*)(row) + kRowSize; + + Scale2RowUp(src_uv, 0, temp_uv_1, kRowSize, width); + P410ToAR30Row(src_y, temp_uv_1, dst_ar30, yuvconstants, width); + dst_ar30 += dst_stride_ar30; + src_y += src_stride_y; + + for (y = 0; y < height - 2; y += 2) { + Scale2RowUp(src_uv, src_stride_uv, temp_uv_1, kRowSize, width); + P410ToAR30Row(src_y, temp_uv_1, dst_ar30, yuvconstants, width); + dst_ar30 += dst_stride_ar30; + src_y += src_stride_y; + P410ToAR30Row(src_y, temp_uv_2, dst_ar30, yuvconstants, width); + dst_ar30 += dst_stride_ar30; + src_y += src_stride_y; + src_uv += src_stride_uv; + } + + if (!(height & 1)) { + Scale2RowUp(src_uv, 0, temp_uv_1, kRowSize, width); + P410ToAR30Row(src_y, temp_uv_1, dst_ar30, yuvconstants, width); + } + + free_aligned_buffer_64(row); + return 0; +} + +static int P210ToAR30MatrixLinear(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_uv, + int src_stride_uv, + uint8_t* dst_ar30, + int dst_stride_ar30, + const struct YuvConstants* yuvconstants, + int width, + int height) { + int y; + void (*P410ToAR30Row)( + const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, int width) = P410ToAR30Row_C; + void (*ScaleRowUp)(const uint16_t* src_uv, uint16_t* dst_uv, int dst_width) = + ScaleUVRowUp2_Linear_16_Any_C; + if (!src_y || !src_uv || !dst_ar30 || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_ar30 = dst_ar30 + (height - 1) * dst_stride_ar30; + dst_stride_ar30 = -dst_stride_ar30; + } +#if defined(HAS_P410TOAR30ROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + P410ToAR30Row = P410ToAR30Row_Any_SSSE3; + if (IS_ALIGNED(width, 8)) { + P410ToAR30Row = P410ToAR30Row_SSSE3; + } + } +#endif +#if defined(HAS_P410TOAR30ROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + P410ToAR30Row = P410ToAR30Row_Any_AVX2; + if (IS_ALIGNED(width, 16)) { + P410ToAR30Row = P410ToAR30Row_AVX2; + } + } +#endif + +#ifdef HAS_SCALEUVROWUP2LINEAR_16_SSE2 + if (TestCpuFlag(kCpuHasSSE2)) { + ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_SSE2; + } +#endif + +#ifdef HAS_SCALEUVROWUP2LINEAR_16_AVX2 + if (TestCpuFlag(kCpuHasAVX2)) { + ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_AVX2; + } +#endif + +#ifdef HAS_SCALEUVROWUP2LINEAR_16_NEON + if (TestCpuFlag(kCpuHasNEON)) { + ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_NEON; + } +#endif + + const int kRowSize = (2 * width + 31) & ~31; + align_buffer_64(row, kRowSize * sizeof(uint16_t)); + uint16_t* temp_uv = (uint16_t*)(row); + + for (y = 0; y < height; ++y) { + ScaleRowUp(src_uv, temp_uv, width); + P410ToAR30Row(src_y, temp_uv, dst_ar30, yuvconstants, width); + dst_ar30 += dst_stride_ar30; + src_y += src_stride_y; + src_uv += src_stride_uv; + } + + free_aligned_buffer_64(row); + return 0; +} + +LIBYUV_API +int I420ToARGBMatrixFilter(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height, + FilterMode filter) { + switch (filter) { + case kFilterNone: + return I420ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_argb, dst_stride_argb, + yuvconstants, width, height); + case kFilterBilinear: + case kFilterBox: + return I420ToARGBMatrixBilinear( + src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, + dst_argb, dst_stride_argb, yuvconstants, width, height); + case kFilterLinear: + // Actually we can do this, but probably there's no usage. + return -1; + } + + return -1; +} + +LIBYUV_API +int I422ToARGBMatrixFilter(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height, + FilterMode filter) { + switch (filter) { + case kFilterNone: + return I422ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_argb, dst_stride_argb, + yuvconstants, width, height); + case kFilterBilinear: + case kFilterBox: + case kFilterLinear: + return I422ToARGBMatrixLinear( + src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, + dst_argb, dst_stride_argb, yuvconstants, width, height); + } + + return -1; +} + +LIBYUV_API +int I010ToAR30MatrixFilter(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_ar30, + int dst_stride_ar30, + const struct YuvConstants* yuvconstants, + int width, + int height, + FilterMode filter) { + switch (filter) { + case kFilterNone: + return I010ToAR30Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_ar30, dst_stride_ar30, + yuvconstants, width, height); + case kFilterBilinear: + case kFilterBox: + return I010ToAR30MatrixBilinear( + src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, + dst_ar30, dst_stride_ar30, yuvconstants, width, height); + case kFilterLinear: + return -1; + } + + return -1; +} + +LIBYUV_API +int I210ToAR30MatrixFilter(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_ar30, + int dst_stride_ar30, + const struct YuvConstants* yuvconstants, + int width, + int height, + FilterMode filter) { + switch (filter) { + case kFilterNone: + return I210ToAR30Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_ar30, dst_stride_ar30, + yuvconstants, width, height); + case kFilterBilinear: + case kFilterBox: + case kFilterLinear: + return I210ToAR30MatrixLinear( + src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, + dst_ar30, dst_stride_ar30, yuvconstants, width, height); + } + + return -1; +} + +LIBYUV_API +int I010ToARGBMatrixFilter(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height, + FilterMode filter) { + switch (filter) { + case kFilterNone: + return I010ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_argb, dst_stride_argb, + yuvconstants, width, height); + case kFilterBilinear: + case kFilterBox: + return I010ToARGBMatrixBilinear( + src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, + dst_argb, dst_stride_argb, yuvconstants, width, height); + case kFilterLinear: + return -1; + } + + return -1; +} + +LIBYUV_API +int I210ToARGBMatrixFilter(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height, + FilterMode filter) { + switch (filter) { + case kFilterNone: + return I210ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_argb, dst_stride_argb, + yuvconstants, width, height); + case kFilterBilinear: + case kFilterBox: + case kFilterLinear: + return I210ToARGBMatrixLinear( + src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, + dst_argb, dst_stride_argb, yuvconstants, width, height); + } + + return -1; +} + +LIBYUV_API +int I420AlphaToARGBMatrixFilter(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + const uint8_t* src_a, + int src_stride_a, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height, + int attenuate, + FilterMode filter) { + switch (filter) { + case kFilterNone: + return I420AlphaToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, + src_v, src_stride_v, src_a, src_stride_a, + dst_argb, dst_stride_argb, yuvconstants, + width, height, attenuate); + case kFilterBilinear: + case kFilterBox: + return I420AlphaToARGBMatrixBilinear( + src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, src_a, + src_stride_a, dst_argb, dst_stride_argb, yuvconstants, width, height, + attenuate); + case kFilterLinear: + return -1; + } + + return -1; +} + +LIBYUV_API +int I422AlphaToARGBMatrixFilter(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + const uint8_t* src_a, + int src_stride_a, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height, + int attenuate, + FilterMode filter) { + switch (filter) { + case kFilterNone: + return I422AlphaToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, + src_v, src_stride_v, src_a, src_stride_a, + dst_argb, dst_stride_argb, yuvconstants, + width, height, attenuate); + case kFilterBilinear: + case kFilterBox: + case kFilterLinear: + return I422AlphaToARGBMatrixLinear( + src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, src_a, + src_stride_a, dst_argb, dst_stride_argb, yuvconstants, width, height, + attenuate); + } + + return -1; +} + +LIBYUV_API +int I010AlphaToARGBMatrixFilter(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + const uint16_t* src_a, + int src_stride_a, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height, + int attenuate, + FilterMode filter) { + switch (filter) { + case kFilterNone: + return I010AlphaToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, + src_v, src_stride_v, src_a, src_stride_a, + dst_argb, dst_stride_argb, yuvconstants, + width, height, attenuate); + case kFilterBilinear: + case kFilterBox: + return I010AlphaToARGBMatrixBilinear( + src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, src_a, + src_stride_a, dst_argb, dst_stride_argb, yuvconstants, width, height, + attenuate); + case kFilterLinear: + return -1; + } + + return -1; +} + +LIBYUV_API +int I210AlphaToARGBMatrixFilter(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + const uint16_t* src_a, + int src_stride_a, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height, + int attenuate, + FilterMode filter) { + switch (filter) { + case kFilterNone: + return I210AlphaToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, + src_v, src_stride_v, src_a, src_stride_a, + dst_argb, dst_stride_argb, yuvconstants, + width, height, attenuate); + case kFilterBilinear: + case kFilterBox: + case kFilterLinear: + return I210AlphaToARGBMatrixLinear( + src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, src_a, + src_stride_a, dst_argb, dst_stride_argb, yuvconstants, width, height, + attenuate); + } + + return -1; +} + +LIBYUV_API +int P010ToARGBMatrixFilter(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_uv, + int src_stride_uv, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height, + FilterMode filter) { + switch (filter) { + case kFilterNone: + return P010ToARGBMatrix(src_y, src_stride_y, src_uv, src_stride_uv, + dst_argb, dst_stride_argb, yuvconstants, width, + height); + case kFilterBilinear: + case kFilterBox: + return P010ToARGBMatrixBilinear(src_y, src_stride_y, src_uv, + src_stride_uv, dst_argb, dst_stride_argb, + yuvconstants, width, height); + case kFilterLinear: + return -1; + } + + return -1; +} + +LIBYUV_API +int P210ToARGBMatrixFilter(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_uv, + int src_stride_uv, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height, + FilterMode filter) { + switch (filter) { + case kFilterNone: + return P210ToARGBMatrix(src_y, src_stride_y, src_uv, src_stride_uv, + dst_argb, dst_stride_argb, yuvconstants, width, + height); + case kFilterBilinear: + case kFilterBox: + case kFilterLinear: + return P210ToARGBMatrixLinear(src_y, src_stride_y, src_uv, src_stride_uv, + dst_argb, dst_stride_argb, yuvconstants, + width, height); + } + + return -1; +} + +LIBYUV_API +int P010ToAR30MatrixFilter(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_uv, + int src_stride_uv, + uint8_t* dst_ar30, + int dst_stride_ar30, + const struct YuvConstants* yuvconstants, + int width, + int height, + FilterMode filter) { + switch (filter) { + case kFilterNone: + return P010ToAR30Matrix(src_y, src_stride_y, src_uv, src_stride_uv, + dst_ar30, dst_stride_ar30, yuvconstants, width, + height); + case kFilterBilinear: + case kFilterBox: + return P010ToAR30MatrixBilinear(src_y, src_stride_y, src_uv, + src_stride_uv, dst_ar30, dst_stride_ar30, + yuvconstants, width, height); + case kFilterLinear: + return -1; + } + + return -1; +} + +LIBYUV_API +int P210ToAR30MatrixFilter(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_uv, + int src_stride_uv, + uint8_t* dst_ar30, + int dst_stride_ar30, + const struct YuvConstants* yuvconstants, + int width, + int height, + FilterMode filter) { + switch (filter) { + case kFilterNone: + return P210ToAR30Matrix(src_y, src_stride_y, src_uv, src_stride_uv, + dst_ar30, dst_stride_ar30, yuvconstants, width, + height); + case kFilterBilinear: + case kFilterBox: + case kFilterLinear: + return P210ToAR30MatrixLinear(src_y, src_stride_y, src_uv, src_stride_uv, + dst_ar30, dst_stride_ar30, yuvconstants, + width, height); + } + + return -1; +} + #ifdef __cplusplus } // extern "C" } // namespace libyuv diff --git a/source/scale_any.cc b/source/scale_any.cc index 63aff51fd..0f6c345d5 100644 --- a/source/scale_any.cc +++ b/source/scale_any.cc @@ -658,7 +658,7 @@ CANY(ScaleARGBFilterCols_Any_LSX, } \ C(src_ptr + (n / 2), dst_ptr + n + 1, r); \ } \ - dst_ptr[dst_width - 1] = src_ptr[(dst_width / 2) - 1]; \ + dst_ptr[dst_width - 1] = src_ptr[(dst_width - 1) / 2]; \ } // Even the C versions need to be wrapped, because boundary pixels have to diff --git a/unit_test/convert_test.cc b/unit_test/convert_test.cc index 2701ac166..de90f6606 100644 --- a/unit_test/convert_test.cc +++ b/unit_test/convert_test.cc @@ -673,6 +673,13 @@ TESTBIPLANARTOP(MM21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 16, 32) #define V444ToARGB(a, b, c, d, e, f, g, h, i, j) \ I444ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvV2020Constants, i, j) +#define I420ToARGBFilter(a, b, c, d, e, f, g, h, i, j) \ + I420ToARGBMatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \ + kFilterBilinear) +#define I422ToARGBFilter(a, b, c, d, e, f, g, h, i, j) \ + I422ToARGBMatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \ + kFilterBilinear) + #define ALIGNINT(V, ALIGN) (((V) + (ALIGN)-1) / (ALIGN) * (ALIGN)) #define TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ @@ -806,6 +813,8 @@ TESTPLANARTOB(H420, 2, 2, AR30, 4, 4, 1) TESTPLANARTOB(I420, 2, 2, AB30, 4, 4, 1) TESTPLANARTOB(H420, 2, 2, AB30, 4, 4, 1) #endif +TESTPLANARTOB(I420, 2, 2, ARGBFilter, 4, 4, 1) +TESTPLANARTOB(I422, 2, 1, ARGBFilter, 4, 4, 1) #else TESTPLANARTOB(I420, 2, 2, ABGR, 4, 4, 1) TESTPLANARTOB(I420, 2, 2, ARGB, 4, 4, 1) @@ -822,12 +831,14 @@ TESTPLANARTOB(I422, 2, 1, RGB565, 2, 2, 1) TESTPLANARTOB(I420, 2, 2, I400, 1, 1, 1) TESTPLANARTOB(I420, 2, 2, UYVY, 2, 4, 1) TESTPLANARTOB(I420, 2, 2, YUY2, 2, 4, 1) +TESTPLANARTOB(I420, 2, 2, ARGBFilter, 4, 4, 1) TESTPLANARTOB(I422, 2, 1, ABGR, 4, 4, 1) TESTPLANARTOB(I422, 2, 1, ARGB, 4, 4, 1) TESTPLANARTOB(I422, 2, 1, BGRA, 4, 4, 1) TESTPLANARTOB(I422, 2, 1, RGBA, 4, 4, 1) TESTPLANARTOB(I422, 2, 1, UYVY, 2, 4, 1) TESTPLANARTOB(I422, 2, 1, YUY2, 2, 4, 1) +TESTPLANARTOB(I422, 2, 1, ARGBFilter, 4, 4, 1) TESTPLANARTOB(I444, 1, 1, ABGR, 4, 4, 1) TESTPLANARTOB(I444, 1, 1, ARGB, 4, 4, 1) #endif @@ -990,6 +1001,13 @@ TESTPLANARTOB(I444, 1, 1, ARGB, 4, 4, 1) I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ l, m) +#define I420AlphaToARGBFilter(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I420AlphaToARGBMatrixFilter(a, b, c, d, e, f, g, h, i, j, \ + &kYuvI601Constants, k, l, m, kFilterBilinear) +#define I422AlphaToARGBFilter(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I422AlphaToARGBMatrixFilter(a, b, c, d, e, f, g, h, i, j, \ + &kYuvI601Constants, k, l, m, kFilterBilinear) + #if defined(ENABLE_FULL_TESTS) TESTQPLANARTOB(I420Alpha, 2, 2, ARGB, 4, 4, 1) TESTQPLANARTOB(I420Alpha, 2, 2, ABGR, 4, 4, 1) @@ -1027,10 +1045,14 @@ TESTQPLANARTOB(U444Alpha, 1, 1, ARGB, 4, 4, 1) TESTQPLANARTOB(U444Alpha, 1, 1, ABGR, 4, 4, 1) TESTQPLANARTOB(V444Alpha, 1, 1, ARGB, 4, 4, 1) TESTQPLANARTOB(V444Alpha, 1, 1, ABGR, 4, 4, 1) +TESTQPLANARTOB(I420Alpha, 2, 2, ARGBFilter, 4, 4, 1) +TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1) #else TESTQPLANARTOB(I420Alpha, 2, 2, ARGB, 4, 4, 1) TESTQPLANARTOB(I422Alpha, 2, 1, ARGB, 4, 4, 1) TESTQPLANARTOB(I444Alpha, 1, 1, ARGB, 4, 4, 1) +TESTQPLANARTOB(I420Alpha, 2, 2, ARGBFilter, 4, 4, 1) +TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1) #endif #define TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, \ @@ -3355,6 +3377,19 @@ TEST_F(LibYUVConvertTest, ABGRToAR30Row_Opt) { #define U410ToAB30(a, b, c, d, e, f, g, h, i, j) \ I410ToAB30Matrix(a, b, c, d, e, f, g, h, &kYuv2020Constants, i, j) +#define I010ToARGBFilter(a, b, c, d, e, f, g, h, i, j) \ + I010ToARGBMatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \ + kFilterBilinear) +#define I010ToAR30Filter(a, b, c, d, e, f, g, h, i, j) \ + I010ToAR30MatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \ + kFilterBilinear) +#define I210ToARGBFilter(a, b, c, d, e, f, g, h, i, j) \ + I210ToARGBMatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \ + kFilterBilinear) +#define I210ToAR30Filter(a, b, c, d, e, f, g, h, i, j) \ + I210ToAR30MatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \ + kFilterBilinear) + // TODO(fbarchard): Fix clamping issue affected by U channel. #define TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, \ BPP_B, ALIGN, YALIGN, W1280, N, NEG, SOFF, DOFF) \ @@ -3435,6 +3470,8 @@ TESTPLANAR16TOB(H410, 1, 1, 0x3ff, ABGR, 4, 4, 1) TESTPLANAR16TOB(U410, 1, 1, 0x3ff, ARGB, 4, 4, 1) TESTPLANAR16TOB(U410, 1, 1, 0x3ff, ABGR, 4, 4, 1) TESTPLANAR16TOB(I012, 2, 2, 0xfff, ARGB, 4, 4, 1) +TESTPLANAR16TOB(I010, 2, 2, 0x3ff, ARGBFilter, 4, 4, 1) +TESTPLANAR16TOB(I210, 2, 1, 0x3ff, ARGBFilter, 4, 4, 1) #ifdef LITTLE_ENDIAN_ONLY_TEST TESTPLANAR16TOB(I010, 2, 2, 0x3ff, AR30, 4, 4, 1) @@ -3456,6 +3493,8 @@ TESTPLANAR16TOB(H410, 1, 1, 0x3ff, AB30, 4, 4, 1) TESTPLANAR16TOB(U410, 1, 1, 0x3ff, AR30, 4, 4, 1) TESTPLANAR16TOB(U410, 1, 1, 0x3ff, AB30, 4, 4, 1) TESTPLANAR16TOB(I012, 2, 2, 0xfff, AR30, 4, 4, 1) +TESTPLANAR16TOB(I010, 2, 2, 0x3ff, AR30Filter, 4, 4, 1) +TESTPLANAR16TOB(I210, 2, 1, 0x3ff, AR30Filter, 4, 4, 1) #endif // LITTLE_ENDIAN_ONLY_TEST #endif // DISABLE_SLOW_TESTS @@ -3643,6 +3682,12 @@ TESTPLANAR16TOB(I012, 2, 2, 0xfff, AR30, 4, 4, 1) #define V410AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ I410AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \ l, m) +#define I010AlphaToARGBFilter(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I010AlphaToARGBMatrixFilter(a, b, c, d, e, f, g, h, i, j, \ + &kYuvI601Constants, k, l, m, kFilterBilinear) +#define I210AlphaToARGBFilter(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I010AlphaToARGBMatrixFilter(a, b, c, d, e, f, g, h, i, j, \ + &kYuvI601Constants, k, l, m, kFilterBilinear) // These conversions are only optimized for x86 #if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__) @@ -3682,6 +3727,8 @@ TESTQPLANAR16TOB(U410Alpha, 1, 1, ARGB, 4, 4, 1, 10) TESTQPLANAR16TOB(U410Alpha, 1, 1, ABGR, 4, 4, 1, 10) TESTQPLANAR16TOB(V410Alpha, 1, 1, ARGB, 4, 4, 1, 10) TESTQPLANAR16TOB(V410Alpha, 1, 1, ABGR, 4, 4, 1, 10) +TESTQPLANAR16TOB(I010Alpha, 2, 2, ARGBFilter, 4, 4, 1, 10) +TESTQPLANAR16TOB(I210Alpha, 2, 1, ARGBFilter, 4, 4, 1, 10) #endif // DISABLE_SLOW_TESTS #define TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ @@ -3766,6 +3813,15 @@ TESTQPLANAR16TOB(V410Alpha, 1, 1, ABGR, 4, 4, 1, 10) #define P216ToAR30(a, b, c, d, e, f, g, h) \ P216ToAR30Matrix(a, b, c, d, e, f, &kYuvH709Constants, g, h) +#define P010ToARGBFilter(a, b, c, d, e, f, g, h) \ + P010ToARGBMatrixFilter(a, b, c, d, e, f, &kYuvH709Constants, g, h, kFilterBilinear) +#define P210ToARGBFilter(a, b, c, d, e, f, g, h) \ + P210ToARGBMatrixFilter(a, b, c, d, e, f, &kYuvH709Constants, g, h, kFilterBilinear) +#define P010ToAR30Filter(a, b, c, d, e, f, g, h) \ + P010ToAR30MatrixFilter(a, b, c, d, e, f, &kYuvH709Constants, g, h, kFilterBilinear) +#define P210ToAR30Filter(a, b, c, d, e, f, g, h) \ + P210ToAR30MatrixFilter(a, b, c, d, e, f, &kYuvH709Constants, g, h, kFilterBilinear) + #if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__) TESTBIPLANAR16TOB(P010, 2, 2, ARGB, 4, 4, 1, 10) TESTBIPLANAR16TOB(P210, 2, 1, ARGB, 4, 4, 1, 10) @@ -3773,6 +3829,8 @@ TESTBIPLANAR16TOB(P012, 2, 2, ARGB, 4, 4, 1, 12) TESTBIPLANAR16TOB(P212, 2, 1, ARGB, 4, 4, 1, 12) TESTBIPLANAR16TOB(P016, 2, 2, ARGB, 4, 4, 1, 16) TESTBIPLANAR16TOB(P216, 2, 1, ARGB, 4, 4, 1, 16) +TESTBIPLANAR16TOB(P010, 2, 2, ARGBFilter, 4, 4, 1, 10) +TESTBIPLANAR16TOB(P210, 2, 1, ARGBFilter, 4, 4, 1, 10) #ifdef LITTLE_ENDIAN_ONLY_TEST TESTBIPLANAR16TOB(P010, 2, 2, AR30, 4, 4, 1, 10) TESTBIPLANAR16TOB(P210, 2, 1, AR30, 4, 4, 1, 10) @@ -3780,6 +3838,8 @@ TESTBIPLANAR16TOB(P012, 2, 2, AR30, 4, 4, 1, 12) TESTBIPLANAR16TOB(P212, 2, 1, AR30, 4, 4, 1, 12) TESTBIPLANAR16TOB(P016, 2, 2, AR30, 4, 4, 1, 16) TESTBIPLANAR16TOB(P216, 2, 1, AR30, 4, 4, 1, 16) +TESTBIPLANAR16TOB(P010, 2, 2, AR30Filter, 4, 4, 1, 10) +TESTBIPLANAR16TOB(P210, 2, 1, AR30Filter, 4, 4, 1, 10) #endif // LITTLE_ENDIAN_ONLY_TEST #endif // DISABLE_SLOW_TESTS