diff --git a/README.chromium b/README.chromium index f71cb2481..92d44bc8c 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: https://chromium.googlesource.com/libyuv/libyuv/ -Version: 1936 +Version: 1937 Revision: DEPS License: BSD-3-Clause License File: LICENSE diff --git a/include/libyuv/convert.h b/include/libyuv/convert.h index 662337750..4ca5ed3a4 100644 --- a/include/libyuv/convert.h +++ b/include/libyuv/convert.h @@ -875,6 +875,19 @@ int BGRAToI420(const uint8_t* src_bgra, int width, int height); +// BGRA little endian (argb in memory) to I422. +LIBYUV_API +int BGRAToI422(const uint8_t* src_bgra, + int src_stride_bgra, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); + // ABGR little endian (rgba in memory) to I420. LIBYUV_API int ABGRToI420(const uint8_t* src_abgr, @@ -888,6 +901,19 @@ int ABGRToI420(const uint8_t* src_abgr, int width, int height); +// ABGR little endian (rgba in memory) to I422. +LIBYUV_API +int ABGRToI422(const uint8_t* src_abgr, + int src_stride_abgr, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); + // RGBA little endian (abgr in memory) to I420. LIBYUV_API int RGBAToI420(const uint8_t* src_rgba, @@ -901,6 +927,19 @@ int RGBAToI420(const uint8_t* src_rgba, int width, int height); +// RGBA little endian (abgr in memory) to I422. +LIBYUV_API +int RGBAToI422(const uint8_t* src_rgba, + int src_stride_rgba, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); + // RGB little endian (bgr in memory) to I420. LIBYUV_API int RGB24ToI420(const uint8_t* src_rgb24, diff --git a/include/libyuv/convert_from_argb.h b/include/libyuv/convert_from_argb.h index e8a8d6a4d..5786c5aae 100644 --- a/include/libyuv/convert_from_argb.h +++ b/include/libyuv/convert_from_argb.h @@ -245,6 +245,19 @@ int ARGBToI422(const uint8_t* src_argb, int width, int height); +// Convert ABGR To I422. +LIBYUV_API +int ABGRToI422(const uint8_t* src_abgr, + int src_stride_abgr, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); + // RGB to I444 with matrix. See ArgbConstants at the top of this file for usage. LIBYUV_API int ARGBToI422Matrix(const uint8_t* src_argb, @@ -458,7 +471,7 @@ int ARGBToUYVY(const uint8_t* src_argb, // RAW to NV21 with Matrix LIBYUV_API -int RGBToNV21Matrix(const uint8_t* src_raw, +int RAWToNV21Matrix(const uint8_t* src_raw, int src_stride_raw, uint8_t* dst_y, int dst_stride_y, diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 651c641a6..f384c1efb 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1936 +#define LIBYUV_VERSION 1937 #endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/source/convert.cc b/source/convert.cc index 0b90ffaaf..079fb650c 100644 --- a/source/convert.cc +++ b/source/convert.cc @@ -2008,8 +2008,8 @@ int ARGBToI420(const uint8_t* src_argb, int width, int height) { return ARGBToI420Matrix(src_argb, src_stride_argb, dst_y, dst_stride_y, dst_u, - dst_stride_u, dst_v, dst_stride_v, - &kArgbI601Constants, width, height); + dst_stride_u, dst_v, dst_stride_v, &kArgbI601Constants, + width, height); } LIBYUV_API @@ -2094,10 +2094,40 @@ ARGBToUVMatrixRow_C; } #endif +#if defined(HAS_ARGBTOUVMATRIXROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_NEON_I8MM) + if (TestCpuFlag(kCpuHasNEON) && TestCpuFlag(kCpuHasNeonI8MM)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON_I8MM; + if (IS_ALIGNED(width, 16)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON_I8MM; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_SVE2) + if (TestCpuFlag(kCpuHasSVE2)) { + if (IS_ALIGNED(width, 2)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_SVE2; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_SME) + if (TestCpuFlag(kCpuHasSME)) { + if (IS_ALIGNED(width, 2)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_SME; + } + } +#endif #if defined(HAS_ARGBTOUVMATRIXROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { + if (IS_ALIGNED(width, 8)) { ARGBToUVMatrixRow = ARGBToUVMatrixRow_SSSE3; } } @@ -2105,64 +2135,10 @@ ARGBToUVMatrixRow_C; #if defined(HAS_ARGBTOUVMATRIXROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { + if (IS_ALIGNED(width, 16)) { ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX2; } } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_AVX512BW) - if (TestCpuFlag(kCpuHasAVX512BW)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX512BW; - if (IS_ALIGNED(width, 64)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX512BW; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_NEON_I8MM) - if (TestCpuFlag(kCpuHasNeonI8MM)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON_I8MM; - if (IS_ALIGNED(width, 16)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON_I8MM; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_SVE2) - if (TestCpuFlag(kCpuHasSVE2)) { - if (IS_ALIGNED(width, 2)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_SVE2; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_SME) - if (TestCpuFlag(kCpuHasSME)) { - if (IS_ALIGNED(width, 2)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_SME; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_LSX; - if (IS_ALIGNED(width, 16)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_LSX; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_LASX) - if (TestCpuFlag(kCpuHasLASX)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_LASX; - if (IS_ALIGNED(width, 32)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_LASX; - } - } #endif if (!src_argb || !dst_y || !dst_u || !dst_v || !argbconstants || width <= 0 || height == 0) { @@ -2197,7 +2173,7 @@ ARGBToUVMatrixRow_C; // Convert ARGB to I420 with Alpha // The following version calls ARGBExtractAlpha on the full image. LIBYUV_API -int ARGBToI420AlphaMatrix(const uint8_t* src_argb, +int ARGBToI420Alpha(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_y, int dst_stride_y, @@ -2208,40 +2184,19 @@ int ARGBToI420AlphaMatrix(const uint8_t* src_argb, uint8_t* dst_a, int dst_stride_a, int width, - int height, - const struct ArgbConstants* argbconstants) { - int r = ARGBToI420Matrix(src_argb, src_stride_argb, dst_y, dst_stride_y, dst_u, - dst_stride_u, dst_v, dst_stride_v, argbconstants, width, height); + int height) { + int r = ARGBToI420(src_argb, src_stride_argb, dst_y, dst_stride_y, dst_u, + dst_stride_u, dst_v, dst_stride_v, width, height); if (r == 0) { r = ARGBExtractAlpha(src_argb, src_stride_argb, dst_a, dst_stride_a, width, height); } return r; } - -LIBYUV_API -int ARGBToI420AlphaMatrix(const uint8_t* src_argb, - int src_stride_argb, - uint8_t* dst_y, - int dst_stride_y, - uint8_t* dst_u, - int dst_stride_u, - uint8_t* dst_v, - int dst_stride_v, - uint8_t* dst_a, - int dst_stride_a, - int width, - int height, - const struct ArgbConstants* argbconstants) { - return ARGBToI420AlphaMatrix(src_argb, src_stride_argb, dst_y, dst_stride_y, - dst_u, dst_stride_u, dst_v, dst_stride_v, - dst_a, dst_stride_a, width, height, - &kArgbI601Constants); -} #else // USE_EXTRACTALPHA // Convert ARGB to I420 with Alpha LIBYUV_API -int ARGBToI420AlphaMatrix(const uint8_t* src_argb, +int ARGBToI420Alpha(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_y, int dst_stride_y, @@ -2252,160 +2207,139 @@ int ARGBToI420AlphaMatrix(const uint8_t* src_argb, uint8_t* dst_a, int dst_stride_a, int width, - int height, - const struct ArgbConstants* argbconstants) { + int height) { int y; - void (*ARGBToUVMatrixRow)(const uint8_t* src_argb, int src_stride_argb, - uint8_t* dst_u, uint8_t* dst_v, int width, - const struct ArgbConstants* c) = - ARGBToUVMatrixRow_C; - void (*ARGBToYMatrixRow)(const uint8_t* src_argb, uint8_t* dst_y, int width, - const struct ArgbConstants* c) = ARGBToYMatrixRow_C; + void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb, + uint8_t* dst_u, uint8_t* dst_v, int width) = + ARGBToUVRow_C; + void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = + ARGBToYRow_C; void (*ARGBExtractAlphaRow)(const uint8_t* src_argb, uint8_t* dst_a, int width) = ARGBExtractAlphaRow_C; if (!src_argb || !dst_y || !dst_u || !dst_v || !dst_a || width <= 0 || height == 0) { return -1; } - -#if defined(HAS_ARGBTOYMATRIXROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_SSSE3; - } - } -#endif -#if defined(HAS_ARGBTOYMATRIXROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_AVX2; - } - } -#endif -#if defined(HAS_ARGBTOYMATRIXROW_AVX512BW) - if (TestCpuFlag(kCpuHasAVX512BW)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_AVX512BW; - if (IS_ALIGNED(width, 64)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_AVX512BW; - } - } -#endif -#if defined(HAS_ARGBTOYMATRIXROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_NEON; - } - } -#endif -#if defined(HAS_ARGBTOYMATRIXROW_NEON_DOTPROD) - if (TestCpuFlag(kCpuHasNeonDotProd)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_NEON_DotProd; - if (IS_ALIGNED(width, 16)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_NEON_DotProd; - } - } -#endif -#if defined(HAS_ARGBTOYMATRIXROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_LSX; - if (IS_ALIGNED(width, 16)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_LSX; - } - } -#endif -#if defined(HAS_ARGBTOYMATRIXROW_LASX) - if (TestCpuFlag(kCpuHasLASX)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_LASX; - if (IS_ALIGNED(width, 32)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_LASX; - } - } -#endif -#if defined(HAS_ARGBTOYMATRIXROW_RVV) - if (TestCpuFlag(kCpuHasRVV)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_RVV; - } -#endif - -#if defined(HAS_ARGBTOUVMATRIXROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_SSSE3; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX2; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_AVX512BW) - if (TestCpuFlag(kCpuHasAVX512BW)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX512BW; - if (IS_ALIGNED(width, 64)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX512BW; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_NEON_I8MM) - if (TestCpuFlag(kCpuHasNeonI8MM)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON_I8MM; - if (IS_ALIGNED(width, 16)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON_I8MM; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_SVE2) - if (TestCpuFlag(kCpuHasSVE2)) { - if (IS_ALIGNED(width, 2)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_SVE2; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_SME) - if (TestCpuFlag(kCpuHasSME)) { - if (IS_ALIGNED(width, 2)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_SME; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_LSX; - if (IS_ALIGNED(width, 16)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_LSX; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_LASX) - if (TestCpuFlag(kCpuHasLASX)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_LASX; - if (IS_ALIGNED(width, 32)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_LASX; - } - } -#endif // Negative height means invert the image. if (height < 0) { height = -height; src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } +#if defined(HAS_ARGBTOYROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBToYRow = ARGBToYRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_NEON; + } + } +#endif +#if defined(HAS_ARGBTOYROW_NEON_DOTPROD) + if (TestCpuFlag(kCpuHasNeonDotProd)) { + ARGBToYRow = ARGBToYRow_Any_NEON_DotProd; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_NEON_DotProd; + } + } +#endif +#if defined(HAS_ARGBTOUVROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBToUVRow = ARGBToUVRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ARGBToUVRow = ARGBToUVRow_NEON; + } + } +#endif +#if defined(HAS_ARGBTOUVROW_NEON_I8MM) + if (TestCpuFlag(kCpuHasNeonI8MM)) { + ARGBToUVRow = ARGBToUVRow_Any_NEON_I8MM; + if (IS_ALIGNED(width, 16)) { + ARGBToUVRow = ARGBToUVRow_NEON_I8MM; + } + } +#endif +#if defined(HAS_ARGBTOUVROW_SVE2) + if (TestCpuFlag(kCpuHasSVE2)) { + ARGBToUVRow = ARGBToUVRow_Any_SVE2; + if (IS_ALIGNED(width, 2)) { + ARGBToUVRow = ARGBToUVRow_SVE2; + } + } +#endif +#if defined(HAS_ARGBTOUVROW_SME) + if (TestCpuFlag(kCpuHasSME)) { + ARGBToUVRow = ARGBToUVRow_Any_SME; + if (IS_ALIGNED(width, 2)) { + ARGBToUVRow = ARGBToUVRow_SME; + } + } +#endif +#if defined(HAS_ARGBTOYROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToYRow = ARGBToYRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOUVROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToUVRow = ARGBToUVRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToUVRow = ARGBToUVRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOYROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToYRow = ARGBToYRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToYRow = ARGBToYRow_AVX2; + } + } +#endif +#if defined(HAS_ARGBTOYROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + ARGBToYRow = ARGBToYRow_Any_AVX512BW; + if (IS_ALIGNED(width, 64)) { + ARGBToYRow = ARGBToYRow_AVX512BW; + } + } +#endif +#if defined(HAS_ARGBTOUVROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToUVRow = ARGBToUVRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToUVRow = ARGBToUVRow_AVX2; + } + } +#endif +#if defined(HAS_ARGBTOUVROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + ARGBToUVRow = ARGBToUVRow_Any_AVX512BW; + if (IS_ALIGNED(width, 64)) { + ARGBToUVRow = ARGBToUVRow_AVX512BW; + } + } +#endif +#if defined(HAS_ARGBTOYROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToYRow = ARGBToYRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_LSX; + } + } +#endif +#if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ARGBToYRow = ARGBToYRow_Any_LASX; + ARGBToUVRow = ARGBToUVRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + ARGBToYRow = ARGBToYRow_LASX; + ARGBToUVRow = ARGBToUVRow_LASX; + } + } +#endif #if defined(HAS_ARGBEXTRACTALPHAROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { ARGBExtractAlphaRow = IS_ALIGNED(width, 8) ? ARGBExtractAlphaRow_SSE2 @@ -2437,10 +2371,9 @@ int ARGBToI420AlphaMatrix(const uint8_t* src_argb, #endif for (y = 0; y < height - 1; y += 2) { - ARGBToUVMatrixRow(src_argb, src_stride_argb, dst_u, dst_v, width, argbconstants); - ARGBToYMatrixRow(src_argb, dst_y, width, argbconstants); - ARGBToYMatrixRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width, - argbconstants); + ARGBToUVRow(src_argb, src_stride_argb, dst_u, dst_v, width); + ARGBToYRow(src_argb, dst_y, width); + ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width); ARGBExtractAlphaRow(src_argb, dst_a, width); ARGBExtractAlphaRow(src_argb + src_stride_argb, dst_a + dst_stride_a, width); @@ -2451,31 +2384,12 @@ int ARGBToI420AlphaMatrix(const uint8_t* src_argb, dst_a += dst_stride_a * 2; } if (height & 1) { - ARGBToUVMatrixRow(src_argb, 0, dst_u, dst_v, width, argbconstants); - ARGBToYMatrixRow(src_argb, dst_y, width, argbconstants); + ARGBToUVRow(src_argb, 0, dst_u, dst_v, width); + ARGBToYRow(src_argb, dst_y, width); ARGBExtractAlphaRow(src_argb, dst_a, width); } return 0; } - -LIBYUV_API -int ARGBToI420Alpha(const uint8_t* src_argb, - int src_stride_argb, - uint8_t* dst_y, - int dst_stride_y, - uint8_t* dst_u, - int dst_stride_u, - uint8_t* dst_v, - int dst_stride_v, - uint8_t* dst_a, - int dst_stride_a, - int width, - int height) { - return ARGBToI420AlphaMatrix(src_argb, src_stride_argb, dst_y, dst_stride_y, - dst_u, dst_stride_u, dst_v, dst_stride_v, - dst_a, dst_stride_a, width, height, - &kArgbI601Constants); -} #endif // USE_EXTRACTALPHA // Convert BGRA to I420. @@ -2490,147 +2404,26 @@ int BGRAToI420(const uint8_t* src_bgra, int dst_stride_v, int width, int height) { - int y; - void (*BGRAToUVRow)(const uint8_t* src_bgra0, int src_stride_bgra, - uint8_t* dst_u, uint8_t* dst_v, int width) = - BGRAToUVRow_C; - void (*BGRAToYRow)(const uint8_t* src_bgra, uint8_t* dst_y, int width) = - BGRAToYRow_C; - if (!src_bgra || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_bgra = src_bgra + (height - 1) * src_stride_bgra; - src_stride_bgra = -src_stride_bgra; - } -#if defined(HAS_BGRATOYROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - BGRAToYRow = BGRAToYRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - BGRAToYRow = BGRAToYRow_NEON; - } - } -#endif -#if defined(HAS_BGRATOYROW_NEON_DOTPROD) - if (TestCpuFlag(kCpuHasNeonDotProd)) { - BGRAToYRow = BGRAToYRow_Any_NEON_DotProd; - if (IS_ALIGNED(width, 16)) { - BGRAToYRow = BGRAToYRow_NEON_DotProd; - } - } -#endif -#if defined(HAS_BGRATOUVROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - BGRAToUVRow = BGRAToUVRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - BGRAToUVRow = BGRAToUVRow_NEON; - } - } -#endif -#if defined(HAS_BGRATOUVROW_NEON_I8MM) - if (TestCpuFlag(kCpuHasNeonI8MM)) { - BGRAToUVRow = BGRAToUVRow_Any_NEON_I8MM; - if (IS_ALIGNED(width, 16)) { - BGRAToUVRow = BGRAToUVRow_NEON_I8MM; - } - } -#endif -#if defined(HAS_BGRATOUVROW_SVE2) - if (TestCpuFlag(kCpuHasSVE2)) { - BGRAToUVRow = BGRAToUVRow_Any_SVE2; - if (IS_ALIGNED(width, 2)) { - BGRAToUVRow = BGRAToUVRow_SVE2; - } - } -#endif -#if defined(HAS_BGRATOUVROW_SME) - if (TestCpuFlag(kCpuHasSME)) { - BGRAToUVRow = BGRAToUVRow_Any_SME; - if (IS_ALIGNED(width, 2)) { - BGRAToUVRow = BGRAToUVRow_SME; - } - } -#endif -#if defined(HAS_BGRATOYROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - BGRAToYRow = BGRAToYRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - BGRAToYRow = BGRAToYRow_SSSE3; - } - } -#endif -#if defined(HAS_BGRATOUVROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - BGRAToUVRow = BGRAToUVRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - BGRAToUVRow = BGRAToUVRow_SSSE3; - } - } -#endif -#if defined(HAS_BGRATOYROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - BGRAToYRow = BGRAToYRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - BGRAToYRow = BGRAToYRow_AVX2; - } - } -#endif -#if defined(HAS_ARGBTOYROW_AVX512BW) - if (TestCpuFlag(kCpuHasAVX512BW)) { - BGRAToYRow = BGRAToYRow_Any_AVX512BW; - if (IS_ALIGNED(width, 64)) { - BGRAToYRow = BGRAToYRow_AVX512BW; - } - } -#endif -#if defined(HAS_BGRATOUVROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - BGRAToUVRow = BGRAToUVRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - BGRAToUVRow = BGRAToUVRow_AVX2; - } - } -#endif -#if defined(HAS_BGRATOYROW_LSX) && defined(HAS_BGRATOUVROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - BGRAToYRow = BGRAToYRow_Any_LSX; - BGRAToUVRow = BGRAToUVRow_Any_LSX; - if (IS_ALIGNED(width, 16)) { - BGRAToYRow = BGRAToYRow_LSX; - BGRAToUVRow = BGRAToUVRow_LSX; - } - } -#endif -#if defined(HAS_BGRATOYROW_LASX) - if (TestCpuFlag(kCpuHasLASX)) { - BGRAToYRow = BGRAToYRow_Any_LASX; - if (IS_ALIGNED(width, 32)) { - BGRAToYRow = BGRAToYRow_LASX; - } - } -#endif -#if defined(HAS_BGRATOYROW_RVV) - if (TestCpuFlag(kCpuHasRVV)) { - BGRAToYRow = BGRAToYRow_RVV; - } -#endif + return ARGBToI420Matrix(src_bgra, src_stride_bgra, dst_y, dst_stride_y, dst_u, + dst_stride_u, dst_v, dst_stride_v, &kBgraI601Constants, + width, height); +} - for (y = 0; y < height - 1; y += 2) { - BGRAToUVRow(src_bgra, src_stride_bgra, dst_u, dst_v, width); - BGRAToYRow(src_bgra, dst_y, width); - BGRAToYRow(src_bgra + src_stride_bgra, dst_y + dst_stride_y, width); - src_bgra += src_stride_bgra * 2; - dst_y += dst_stride_y * 2; - dst_u += dst_stride_u; - dst_v += dst_stride_v; - } - if (height & 1) { - BGRAToUVRow(src_bgra, 0, dst_u, dst_v, width); - BGRAToYRow(src_bgra, dst_y, width); - } - return 0; +// Convert BGRA to I422. +LIBYUV_API +int BGRAToI422(const uint8_t* src_bgra, + int src_stride_bgra, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { + return ARGBToI422Matrix(src_bgra, src_stride_bgra, dst_y, dst_stride_y, dst_u, + dst_stride_u, dst_v, dst_stride_v, &kBgraI601Constants, + width, height); } // Convert ABGR to I420. @@ -2645,147 +2438,26 @@ int ABGRToI420(const uint8_t* src_abgr, int dst_stride_v, int width, int height) { - int y; - void (*ABGRToUVRow)(const uint8_t* src_abgr0, int src_stride_abgr, - uint8_t* dst_u, uint8_t* dst_v, int width) = - ABGRToUVRow_C; - void (*ABGRToYRow)(const uint8_t* src_abgr, uint8_t* dst_y, int width) = - ABGRToYRow_C; - if (!src_abgr || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_abgr = src_abgr + (height - 1) * src_stride_abgr; - src_stride_abgr = -src_stride_abgr; - } -#if defined(HAS_ABGRTOYROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ABGRToYRow = ABGRToYRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ABGRToYRow = ABGRToYRow_SSSE3; - } - } -#endif -#if defined(HAS_ABGRTOUVROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ABGRToUVRow = ABGRToUVRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ABGRToUVRow = ABGRToUVRow_SSSE3; - } - } -#endif -#if defined(HAS_ABGRTOYROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - ABGRToYRow = ABGRToYRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ABGRToYRow = ABGRToYRow_AVX2; - } - } -#endif -#if defined(HAS_ARGBTOYROW_AVX512BW) - if (TestCpuFlag(kCpuHasAVX512BW)) { - ABGRToYRow = ABGRToYRow_Any_AVX512BW; - if (IS_ALIGNED(width, 64)) { - ABGRToYRow = ABGRToYRow_AVX512BW; - } - } -#endif -#if defined(HAS_ABGRTOUVROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - ABGRToUVRow = ABGRToUVRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ABGRToUVRow = ABGRToUVRow_AVX2; - } - } -#endif -#if defined(HAS_ABGRTOYROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ABGRToYRow = ABGRToYRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ABGRToYRow = ABGRToYRow_NEON; - } - } -#endif -#if defined(HAS_ABGRTOYROW_NEON_DOTPROD) - if (TestCpuFlag(kCpuHasNeonDotProd)) { - ABGRToYRow = ABGRToYRow_Any_NEON_DotProd; - if (IS_ALIGNED(width, 16)) { - ABGRToYRow = ABGRToYRow_NEON_DotProd; - } - } -#endif -#if defined(HAS_ABGRTOUVROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ABGRToUVRow = ABGRToUVRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ABGRToUVRow = ABGRToUVRow_NEON; - } - } -#endif -#if defined(HAS_ABGRTOUVROW_NEON_I8MM) - if (TestCpuFlag(kCpuHasNeonI8MM)) { - ABGRToUVRow = ABGRToUVRow_Any_NEON_I8MM; - if (IS_ALIGNED(width, 16)) { - ABGRToUVRow = ABGRToUVRow_NEON_I8MM; - } - } -#endif -#if defined(HAS_ABGRTOUVROW_SVE2) - if (TestCpuFlag(kCpuHasSVE2)) { - ABGRToUVRow = ABGRToUVRow_Any_SVE2; - if (IS_ALIGNED(width, 2)) { - ABGRToUVRow = ABGRToUVRow_SVE2; - } - } -#endif -#if defined(HAS_ABGRTOUVROW_SME) - if (TestCpuFlag(kCpuHasSME)) { - ABGRToUVRow = ABGRToUVRow_Any_SME; - if (IS_ALIGNED(width, 2)) { - ABGRToUVRow = ABGRToUVRow_SME; - } - } -#endif -#if defined(HAS_ABGRTOYROW_LSX) && defined(HAS_ABGRTOUVROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - ABGRToYRow = ABGRToYRow_Any_LSX; - ABGRToUVRow = ABGRToUVRow_Any_LSX; - if (IS_ALIGNED(width, 16)) { - ABGRToYRow = ABGRToYRow_LSX; - ABGRToUVRow = ABGRToUVRow_LSX; - } - } -#endif -#if defined(HAS_ABGRTOYROW_LASX) - if (TestCpuFlag(kCpuHasLASX)) { - ABGRToYRow = ABGRToYRow_Any_LASX; - if (IS_ALIGNED(width, 32)) { - ABGRToYRow = ABGRToYRow_LASX; - } - } -#endif -#if defined(HAS_ABGRTOYROW_RVV) - if (TestCpuFlag(kCpuHasRVV)) { - ABGRToYRow = ABGRToYRow_RVV; - } -#endif + return ARGBToI420Matrix(src_abgr, src_stride_abgr, dst_y, dst_stride_y, dst_u, + dst_stride_u, dst_v, dst_stride_v, &kAbgrI601Constants, + width, height); +} - for (y = 0; y < height - 1; y += 2) { - ABGRToUVRow(src_abgr, src_stride_abgr, dst_u, dst_v, width); - ABGRToYRow(src_abgr, dst_y, width); - ABGRToYRow(src_abgr + src_stride_abgr, dst_y + dst_stride_y, width); - src_abgr += src_stride_abgr * 2; - dst_y += dst_stride_y * 2; - dst_u += dst_stride_u; - dst_v += dst_stride_v; - } - if (height & 1) { - ABGRToUVRow(src_abgr, 0, dst_u, dst_v, width); - ABGRToYRow(src_abgr, dst_y, width); - } - return 0; +// Convert ABGR to I422. +LIBYUV_API +int ABGRToI422(const uint8_t* src_abgr, + int src_stride_abgr, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { + return ARGBToI422Matrix(src_abgr, src_stride_abgr, dst_y, dst_stride_y, dst_u, + dst_stride_u, dst_v, dst_stride_v, &kAbgrI601Constants, + width, height); } // Convert RGBA to I420. @@ -2800,335 +2472,34 @@ int RGBAToI420(const uint8_t* src_rgba, int dst_stride_v, int width, int height) { - int y; - void (*RGBAToUVRow)(const uint8_t* src_rgba0, int src_stride_rgba, - uint8_t* dst_u, uint8_t* dst_v, int width) = - RGBAToUVRow_C; - void (*RGBAToYRow)(const uint8_t* src_rgba, uint8_t* dst_y, int width) = - RGBAToYRow_C; - if (!src_rgba || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_rgba = src_rgba + (height - 1) * src_stride_rgba; - src_stride_rgba = -src_stride_rgba; - } -#if defined(HAS_RGBATOYROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - RGBAToYRow = RGBAToYRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - RGBAToYRow = RGBAToYRow_SSSE3; - } - } -#endif -#if defined(HAS_ARGBTOYROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - RGBAToYRow = RGBAToYRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - RGBAToYRow = RGBAToYRow_AVX2; - } - } -#endif -#if defined(HAS_ARGBTOYROW_AVX512BW) - if (TestCpuFlag(kCpuHasAVX512BW)) { - RGBAToYRow = RGBAToYRow_Any_AVX512BW; - if (IS_ALIGNED(width, 64)) { - RGBAToYRow = RGBAToYRow_AVX512BW; - } - } -#endif -#if defined(HAS_RGBATOUVROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - RGBAToUVRow = RGBAToUVRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - RGBAToUVRow = RGBAToUVRow_SSSE3; - } - } -#endif -#if defined(HAS_RGBATOYROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - RGBAToYRow = RGBAToYRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - RGBAToYRow = RGBAToYRow_NEON; - } - } -#endif -#if defined(HAS_RGBATOYROW_NEON_DOTPROD) - if (TestCpuFlag(kCpuHasNeonDotProd)) { - RGBAToYRow = RGBAToYRow_Any_NEON_DotProd; - if (IS_ALIGNED(width, 16)) { - RGBAToYRow = RGBAToYRow_NEON_DotProd; - } - } -#endif -#if defined(HAS_RGBATOUVROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - RGBAToUVRow = RGBAToUVRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - RGBAToUVRow = RGBAToUVRow_NEON; - } - } -#endif -#if defined(HAS_RGBATOUVROW_NEON_I8MM) - if (TestCpuFlag(kCpuHasNeonI8MM)) { - RGBAToUVRow = RGBAToUVRow_Any_NEON_I8MM; - if (IS_ALIGNED(width, 16)) { - RGBAToUVRow = RGBAToUVRow_NEON_I8MM; - } - } -#endif -#if defined(HAS_RGBATOUVROW_SVE2) - if (TestCpuFlag(kCpuHasSVE2)) { - RGBAToUVRow = RGBAToUVRow_Any_SVE2; - if (IS_ALIGNED(width, 2)) { - RGBAToUVRow = RGBAToUVRow_SVE2; - } - } -#endif -#if defined(HAS_RGBATOUVROW_SME) - if (TestCpuFlag(kCpuHasSME)) { - RGBAToUVRow = RGBAToUVRow_Any_SME; - if (IS_ALIGNED(width, 2)) { - RGBAToUVRow = RGBAToUVRow_SME; - } - } -#endif -#if defined(HAS_RGBATOYROW_LSX) && defined(HAS_RGBATOUVROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - RGBAToYRow = RGBAToYRow_Any_LSX; - RGBAToUVRow = RGBAToUVRow_Any_LSX; - if (IS_ALIGNED(width, 16)) { - RGBAToYRow = RGBAToYRow_LSX; - RGBAToUVRow = RGBAToUVRow_LSX; - } - } -#endif -#if defined(HAS_RGBATOYROW_LASX) - if (TestCpuFlag(kCpuHasNEON)) { - RGBAToYRow = RGBAToYRow_Any_LASX; - if (IS_ALIGNED(width, 32)) { - RGBAToYRow = RGBAToYRow_LASX; - } - } -#endif -#if defined(HAS_RGBATOYROW_RVV) - if (TestCpuFlag(kCpuHasRVV)) { - RGBAToYRow = RGBAToYRow_RVV; - } -#endif - - for (y = 0; y < height - 1; y += 2) { - RGBAToUVRow(src_rgba, src_stride_rgba, dst_u, dst_v, width); - RGBAToYRow(src_rgba, dst_y, width); - RGBAToYRow(src_rgba + src_stride_rgba, dst_y + dst_stride_y, width); - src_rgba += src_stride_rgba * 2; - dst_y += dst_stride_y * 2; - dst_u += dst_stride_u; - dst_v += dst_stride_v; - } - if (height & 1) { - RGBAToUVRow(src_rgba, 0, dst_u, dst_v, width); - RGBAToYRow(src_rgba, dst_y, width); - } - return 0; + return ARGBToI420Matrix(src_rgba, src_stride_rgba, dst_y, dst_stride_y, dst_u, + dst_stride_u, dst_v, dst_stride_v, &kRgbaI601Constants, + width, height); } -// Any RGB to I420 with Matrix -static int RGBToI420Matrix(const uint8_t* src_rgb, - int src_stride_rgb, - uint8_t* dst_y, - int dst_stride_y, - uint8_t* dst_u, - int dst_stride_u, - uint8_t* dst_v, - int dst_stride_v, - int width, - int height, - const struct ArgbConstants* argbconstants, - void (*RGBToARGBRow)(const uint8_t* src_rgb, - uint8_t* dst_argb, - int width)) { - int y; - void (*ARGBToUVMatrixRow)(const uint8_t* src_argb0, int src_stride_argb, - uint8_t* dst_u, uint8_t* dst_v, int width, - const struct ArgbConstants* c) = - ARGBToUVMatrixRow_C; - void (*ARGBToYMatrixRow)(const uint8_t* src_argb, uint8_t* dst_y, int width, - const struct ArgbConstants* c) = ARGBToYMatrixRow_C; - -#if defined(HAS_ARGBTOYMATRIXROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_SSSE3; - } - } -#endif -#if defined(HAS_ARGBTOYMATRIXROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_AVX2; - } - } -#endif -#if defined(HAS_ARGBTOYMATRIXROW_AVX512BW) - if (TestCpuFlag(kCpuHasAVX512BW)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_AVX512BW; - if (IS_ALIGNED(width, 64)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_AVX512BW; - } - } -#endif -#if defined(HAS_ARGBTOYMATRIXROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_NEON; - } - } -#endif -#if defined(HAS_ARGBTOYMATRIXROW_NEON_DOTPROD) - if (TestCpuFlag(kCpuHasNeonDotProd)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_NEON_DotProd; - if (IS_ALIGNED(width, 16)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_NEON_DotProd; - } - } -#endif -#if defined(HAS_ARGBTOYMATRIXROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_LSX; - if (IS_ALIGNED(width, 16)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_LSX; - } - } -#endif -#if defined(HAS_ARGBTOYMATRIXROW_LASX) - if (TestCpuFlag(kCpuHasLASX)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_LASX; - if (IS_ALIGNED(width, 32)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_LASX; - } - } -#endif -#if defined(HAS_ARGBTOYMATRIXROW_RVV) - if (TestCpuFlag(kCpuHasRVV)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_RVV; - } -#endif - -#if defined(HAS_ARGBTOUVMATRIXROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_SSSE3; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX2; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_AVX512BW) - if (TestCpuFlag(kCpuHasAVX512BW)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX512BW; - if (IS_ALIGNED(width, 64)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX512BW; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_NEON_I8MM) - if (TestCpuFlag(kCpuHasNeonI8MM)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON_I8MM; - if (IS_ALIGNED(width, 16)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON_I8MM; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_SVE2) - if (TestCpuFlag(kCpuHasSVE2)) { - if (IS_ALIGNED(width, 2)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_SVE2; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_SME) - if (TestCpuFlag(kCpuHasSME)) { - if (IS_ALIGNED(width, 2)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_SME; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_LSX; - if (IS_ALIGNED(width, 16)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_LSX; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_LASX) - if (TestCpuFlag(kCpuHasLASX)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_LASX; - if (IS_ALIGNED(width, 32)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_LASX; - } - } -#endif - - if (!src_rgb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_rgb = src_rgb + (height - 1) * src_stride_rgb; - src_stride_rgb = -src_stride_rgb; - } - - { - // Allocate 2 rows of ARGB. - const int row_size = (width * 4 + 31) & ~31; - align_buffer_64(row, row_size * 2); - if (!row) - return 1; - - for (y = 0; y < height - 1; y += 2) { - RGBToARGBRow(src_rgb, row, width); - RGBToARGBRow(src_rgb + src_stride_rgb, row + row_size, width); - ARGBToUVMatrixRow(row, row_size, dst_u, dst_v, width, argbconstants); - ARGBToYMatrixRow(row, dst_y, width, argbconstants); - ARGBToYMatrixRow(row + row_size, dst_y + dst_stride_y, width, argbconstants); - src_rgb += src_stride_rgb * 2; - dst_y += dst_stride_y * 2; - dst_u += dst_stride_u; - dst_v += dst_stride_v; - } - if (height & 1) { - RGBToARGBRow(src_rgb, row, width); - ARGBToUVMatrixRow(row, 0, dst_u, dst_v, width, argbconstants); - ARGBToYMatrixRow(row, dst_y, width, argbconstants); - } - free_aligned_buffer_64(row); - } - return 0; +// Convert RGBA to I422. +LIBYUV_API +int RGBAToI422(const uint8_t* src_rgba, + int src_stride_rgba, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { + return ARGBToI422Matrix(src_rgba, src_stride_rgba, dst_y, dst_stride_y, dst_u, + dst_stride_u, dst_v, dst_stride_v, &kRgbaI601Constants, + width, height); } +// Enabled if 1 pass is available +#if (defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_LSX) || \ + defined(HAS_RGB24TOYROW_RVV)) +#define HAS_RGB24TOYROW +#endif + // Convert RGB24 to I420. LIBYUV_API int RGB24ToI420(const uint8_t* src_rgb24, @@ -3141,363 +2512,14 @@ int RGB24ToI420(const uint8_t* src_rgb24, int dst_stride_v, int width, int height) { - void (*RGB24ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = - RGB24ToARGBRow_C; -#if defined(HAS_RGB24TOARGBROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - RGB24ToARGBRow = RGB24ToARGBRow_SSSE3; - } - } -#endif -#if defined(HAS_RGB24TOARGBROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - RGB24ToARGBRow = RGB24ToARGBRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - RGB24ToARGBRow = RGB24ToARGBRow_AVX2; - } - } -#endif -#if defined(HAS_RGB24TOARGBROW_AVX512BW) - if (TestCpuFlag(kCpuHasAVX512BW)) { - RGB24ToARGBRow = RGB24ToARGBRow_Any_AVX512BW; - if (IS_ALIGNED(width, 64)) { - RGB24ToARGBRow = RGB24ToARGBRow_AVX512BW; - } - } -#endif -#if defined(HAS_RGB24TOARGBROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - RGB24ToARGBRow = RGB24ToARGBRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - RGB24ToARGBRow = RGB24ToARGBRow_NEON; - } - } -#endif -#if defined(HAS_RGB24TOARGBROW_SVE2) - if (TestCpuFlag(kCpuHasSVE2)) { - RGB24ToARGBRow = RGB24ToARGBRow_SVE2; - } -#endif -#if defined(HAS_RGB24TOARGBROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - RGB24ToARGBRow = RGB24ToARGBRow_Any_LSX; - if (IS_ALIGNED(width, 16)) { - RGB24ToARGBRow = RGB24ToARGBRow_LSX; - } - } -#endif -#if defined(HAS_RGB24TOARGBROW_LASX) - if (TestCpuFlag(kCpuHasLASX)) { - RGB24ToARGBRow = RGB24ToARGBRow_Any_LASX; - if (IS_ALIGNED(width, 32)) { - RGB24ToARGBRow = RGB24ToARGBRow_LASX; - } - } -#endif -#if defined(HAS_RGB24TOARGBROW_RVV) - if (TestCpuFlag(kCpuHasRVV)) { - RGB24ToARGBRow = RGB24ToARGBRow_RVV; - } -#endif - - return RGBToI420Matrix(src_rgb24, src_stride_rgb24, dst_y, dst_stride_y, - dst_u, dst_stride_u, dst_v, dst_stride_v, width, - height, &kArgbI601Constants, RGB24ToARGBRow); -} - -// Convert RGB24 to J420. -LIBYUV_API -int RGB24ToJ420(const uint8_t* src_rgb24, - int src_stride_rgb24, - uint8_t* dst_y, - int dst_stride_y, - uint8_t* dst_u, - int dst_stride_u, - uint8_t* dst_v, - int dst_stride_v, - int width, - int height) { - void (*RGB24ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = - RGB24ToARGBRow_C; -#if defined(HAS_RGB24TOARGBROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - RGB24ToARGBRow = RGB24ToARGBRow_SSSE3; - } - } -#endif -#if defined(HAS_RGB24TOARGBROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - RGB24ToARGBRow = RGB24ToARGBRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - RGB24ToARGBRow = RGB24ToARGBRow_AVX2; - } - } -#endif -#if defined(HAS_RGB24TOARGBROW_AVX512BW) - if (TestCpuFlag(kCpuHasAVX512BW)) { - RGB24ToARGBRow = RGB24ToARGBRow_Any_AVX512BW; - if (IS_ALIGNED(width, 64)) { - RGB24ToARGBRow = RGB24ToARGBRow_AVX512BW; - } - } -#endif -#if defined(HAS_RGB24TOARGBROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - RGB24ToARGBRow = RGB24ToARGBRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - RGB24ToARGBRow = RGB24ToARGBRow_NEON; - } - } -#endif -#if defined(HAS_RGB24TOARGBROW_SVE2) - if (TestCpuFlag(kCpuHasSVE2)) { - RGB24ToARGBRow = RGB24ToARGBRow_SVE2; - } -#endif -#if defined(HAS_RGB24TOARGBROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - RGB24ToARGBRow = RGB24ToARGBRow_Any_LSX; - if (IS_ALIGNED(width, 16)) { - RGB24ToARGBRow = RGB24ToARGBRow_LSX; - } - } -#endif -#if defined(HAS_RGB24TOARGBROW_LASX) - if (TestCpuFlag(kCpuHasLASX)) { - RGB24ToARGBRow = RGB24ToARGBRow_Any_LASX; - if (IS_ALIGNED(width, 32)) { - RGB24ToARGBRow = RGB24ToARGBRow_LASX; - } - } -#endif -#if defined(HAS_RGB24TOARGBROW_RVV) - if (TestCpuFlag(kCpuHasRVV)) { - RGB24ToARGBRow = RGB24ToARGBRow_RVV; - } -#endif - - return RGBToI420Matrix(src_rgb24, src_stride_rgb24, dst_y, dst_stride_y, - dst_u, dst_stride_u, dst_v, dst_stride_v, width, - height, &kArgbJPEGConstants, RGB24ToARGBRow); -} - -// Convert RAW to I420. -LIBYUV_API -int RAWToI420(const uint8_t* src_raw, - int src_stride_raw, - uint8_t* dst_y, - int dst_stride_y, - uint8_t* dst_u, - int dst_stride_u, - uint8_t* dst_v, - int dst_stride_v, - int width, - int height) { - void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = - RAWToARGBRow_C; -#if defined(HAS_RAWTOARGBROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - RAWToARGBRow = RAWToARGBRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - RAWToARGBRow = RAWToARGBRow_SSSE3; - } - } -#endif -#if defined(HAS_RAWTOARGBROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - RAWToARGBRow = RAWToARGBRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - RAWToARGBRow = RAWToARGBRow_AVX2; - } - } -#endif -#if defined(HAS_RAWTOARGBROW_AVX512BW) - if (TestCpuFlag(kCpuHasAVX512BW)) { - RAWToARGBRow = RAWToARGBRow_Any_AVX512BW; - if (IS_ALIGNED(width, 64)) { - RAWToARGBRow = RAWToARGBRow_AVX512BW; - } - } -#endif -#if defined(HAS_RAWTOARGBROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - RAWToARGBRow = RAWToARGBRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - RAWToARGBRow = RAWToARGBRow_NEON; - } - } -#endif -#if defined(HAS_RAWTOARGBROW_SVE2) - if (TestCpuFlag(kCpuHasSVE2)) { - RAWToARGBRow = RAWToARGBRow_SVE2; - } -#endif -#if defined(HAS_RAWTOARGBROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - RAWToARGBRow = RAWToARGBRow_Any_LSX; - if (IS_ALIGNED(width, 16)) { - RAWToARGBRow = RAWToARGBRow_LSX; - } - } -#endif -#if defined(HAS_RAWTOARGBROW_LASX) - if (TestCpuFlag(kCpuHasLASX)) { - RAWToARGBRow = RAWToARGBRow_Any_LASX; - if (IS_ALIGNED(width, 32)) { - RAWToARGBRow = RAWToARGBRow_LASX; - } - } -#endif -#if defined(HAS_RAWTOARGBROW_RVV) - if (TestCpuFlag(kCpuHasRVV)) { - RAWToARGBRow = RAWToARGBRow_RVV; - } -#endif - - return RGBToI420Matrix(src_raw, src_stride_raw, dst_y, dst_stride_y, dst_u, - dst_stride_u, dst_v, dst_stride_v, width, height, - &kArgbI601Constants, RAWToARGBRow); -} - -// Convert RAW to J420. -LIBYUV_API -int RAWToJ420(const uint8_t* src_raw, - int src_stride_raw, - uint8_t* dst_y, - int dst_stride_y, - uint8_t* dst_u, - int dst_stride_u, - uint8_t* dst_v, - int dst_stride_v, - int width, - int height) { - void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = - RAWToARGBRow_C; -#if defined(HAS_RAWTOARGBROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - RAWToARGBRow = RAWToARGBRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - RAWToARGBRow = RAWToARGBRow_SSSE3; - } - } -#endif -#if defined(HAS_RAWTOARGBROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - RAWToARGBRow = RAWToARGBRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - RAWToARGBRow = RAWToARGBRow_AVX2; - } - } -#endif -#if defined(HAS_RAWTOARGBROW_AVX512BW) - if (TestCpuFlag(kCpuHasAVX512BW)) { - RAWToARGBRow = RAWToARGBRow_Any_AVX512BW; - if (IS_ALIGNED(width, 64)) { - RAWToARGBRow = RAWToARGBRow_AVX512BW; - } - } -#endif -#if defined(HAS_RAWTOARGBROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - RAWToARGBRow = RAWToARGBRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - RAWToARGBRow = RAWToARGBRow_NEON; - } - } -#endif -#if defined(HAS_RAWTOARGBROW_SVE2) - if (TestCpuFlag(kCpuHasSVE2)) { - RAWToARGBRow = RAWToARGBRow_SVE2; - } -#endif -#if defined(HAS_RAWTOARGBROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - RAWToARGBRow = RAWToARGBRow_Any_LSX; - if (IS_ALIGNED(width, 16)) { - RAWToARGBRow = RAWToARGBRow_LSX; - } - } -#endif -#if defined(HAS_RAWTOARGBROW_LASX) - if (TestCpuFlag(kCpuHasLASX)) { - RAWToARGBRow = RAWToARGBRow_Any_LASX; - if (IS_ALIGNED(width, 32)) { - RAWToARGBRow = RAWToARGBRow_LASX; - } - } -#endif -#if defined(HAS_RAWTOARGBROW_RVV) - if (TestCpuFlag(kCpuHasRVV)) { - RAWToARGBRow = RAWToARGBRow_RVV; - } -#endif - - return RGBToI420Matrix(src_raw, src_stride_raw, dst_y, dst_stride_y, dst_u, - dst_stride_u, dst_v, dst_stride_v, width, height, - &kArgbJPEGConstants, RAWToARGBRow); -} - - -// RAW big endian (rgb in memory) to I444 -static int RGBToI444Matrix(const uint8_t* src_rgb, - int src_stride_rgb, - uint8_t* dst_y, - int dst_stride_y, - uint8_t* dst_u, - int dst_stride_u, - uint8_t* dst_v, - int dst_stride_v, - int width, - int height, - const struct ArgbConstants* argbconstants, - void (*RGBToARGBRow)(const uint8_t* src_rgb, - uint8_t* dst_argb, - int width)) { int y; + void (*RGB24ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = + RGB24ToARGBRow_C; + void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb, + uint8_t* dst_u, uint8_t* dst_v, int width) = + ARGBToUVRow_C; void (*ARGBToYMatrixRow)(const uint8_t* src_argb, uint8_t* dst_y, int width, const struct ArgbConstants* c) = ARGBToYMatrixRow_C; - void (*ARGBToUV444MatrixRow)(const uint8_t* src_argb, uint8_t* dst_u, - uint8_t* dst_v, int width, - const struct ArgbConstants* c) = - ARGBToUV444MatrixRow_C; - -#if defined(HAS_ARGBTOUV444MATRIXROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToUV444MatrixRow = ARGBToUV444MatrixRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToUV444MatrixRow = ARGBToUV444MatrixRow_SSSE3; - } - } -#endif -#if defined(HAS_ARGBTOUV444MATRIXROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToUV444MatrixRow = ARGBToUV444MatrixRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ARGBToUV444MatrixRow = ARGBToUV444MatrixRow_AVX2; - } - } -#endif -#if defined(HAS_ARGBTOUV444MATRIXROW_AVX512BW) - if (TestCpuFlag(kCpuHasAVX512BW)) { - ARGBToUV444MatrixRow = ARGBToUV444MatrixRow_Any_AVX512BW; - if (IS_ALIGNED(width, 64)) { - ARGBToUV444MatrixRow = ARGBToUV444MatrixRow_AVX512BW; - } - } -#endif -#if defined(HAS_ARGBTOUV444MATRIXROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ARGBToUV444MatrixRow = ARGBToUV444MatrixRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - ARGBToUV444MatrixRow = ARGBToUV444MatrixRow_NEON; - } - } -#endif - #if defined(HAS_ARGBTOYMATRIXROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBToYMatrixRow = ARGBToYMatrixRow_Any_SSSE3; @@ -3560,38 +2582,363 @@ static int RGBToI444Matrix(const uint8_t* src_rgb, } #endif - if (!src_rgb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { + + if (!src_rgb24 || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; } + // Negative height means invert the image. if (height < 0) { height = -height; - src_rgb = src_rgb + (height - 1) * src_stride_rgb; - src_stride_rgb = -src_stride_rgb; + src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24; + src_stride_rgb24 = -src_stride_rgb24; } + +#if defined(HAS_RGB24TOARGBROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + RGB24ToARGBRow = RGB24ToARGBRow_SSSE3; + } + } +#endif +#if defined(HAS_RGB24TOARGBROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + RGB24ToARGBRow = RGB24ToARGBRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + RGB24ToARGBRow = RGB24ToARGBRow_AVX2; + } + } +#endif +#if defined(HAS_RGB24TOARGBROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + RGB24ToARGBRow = RGB24ToARGBRow_Any_AVX512BW; + if (IS_ALIGNED(width, 64)) { + RGB24ToARGBRow = RGB24ToARGBRow_AVX512BW; + } + } +#endif +#if defined(HAS_RGB24TOARGBROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + RGB24ToARGBRow = RGB24ToARGBRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + RGB24ToARGBRow = RGB24ToARGBRow_NEON; + } + } +#endif +#if defined(HAS_RGB24TOARGBROW_SVE2) + if (TestCpuFlag(kCpuHasSVE2)) { + RGB24ToARGBRow = RGB24ToARGBRow_SVE2; + } +#endif +#if defined(HAS_RGB24TOARGBROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + RGB24ToARGBRow = RGB24ToARGBRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + RGB24ToARGBRow = RGB24ToARGBRow_LSX; + } + } +#endif +#if defined(HAS_RGB24TOARGBROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + RGB24ToARGBRow = RGB24ToARGBRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + RGB24ToARGBRow = RGB24ToARGBRow_LASX; + } + } +#endif +#if defined(HAS_RGB24TOARGBROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + RGB24ToARGBRow = RGB24ToARGBRow_RVV; + } +#endif +#if defined(HAS_ARGBTOUVROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToUVRow = ARGBToUVRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToUVRow = ARGBToUVRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOUVROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToUVRow = ARGBToUVRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToUVRow = ARGBToUVRow_AVX2; + } + } +#endif +#if defined(HAS_ARGBTOUVROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + ARGBToUVRow = ARGBToUVRow_Any_AVX512BW; + if (IS_ALIGNED(width, 64)) { + ARGBToUVRow = ARGBToUVRow_AVX512BW; + } + } +#endif + { - // Allocate a row of ARGB. - const int row_size = width * 4; - align_buffer_64(row, row_size); + // Allocate 2 rows of ARGB. + const int row_size = (width * 4 + 31) & ~31; + align_buffer_64(row, row_size * 2); if (!row) return 1; - for (y = 0; y < height; ++y) { - RGBToARGBRow(src_rgb, row, width); - ARGBToUV444MatrixRow(row, dst_u, dst_v, width, argbconstants); - ARGBToYMatrixRow(row, dst_y, width, argbconstants); - src_rgb += src_stride_rgb; - dst_y += dst_stride_y; + for (y = 0; y < height - 1; y += 2) { + RGB24ToARGBRow(src_rgb24, row, width); + RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + row_size, width); + ARGBToUVRow(row, row_size, dst_u, dst_v, width); + ARGBToYMatrixRow(row, dst_y, width, &kArgbI601Constants); + ARGBToYMatrixRow(row + row_size, dst_y + dst_stride_y, width, &kArgbI601Constants); + src_rgb24 += src_stride_rgb24 * 2; + dst_y += dst_stride_y * 2; dst_u += dst_stride_u; dst_v += dst_stride_v; } + if (height & 1) { + RGB24ToARGBRow(src_rgb24, row, width); + ARGBToUVRow(row, 0, dst_u, dst_v, width); + ARGBToYMatrixRow(row, dst_y, width, &kArgbI601Constants); + } free_aligned_buffer_64(row); } return 0; } +#undef HAS_RGB24TOYROW -// 2 step conversion of RAWToARGB then ARGBToY and ARGBToUV444 +// Enabled if 1 pass is available +#if defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_RVV) +#define HAS_RGB24TOYJROW +#endif + +// Convert RGB24 to J420. LIBYUV_API -int RAWToI444(const uint8_t* src_raw, +int RGB24ToJ420(const uint8_t* src_rgb24, + int src_stride_rgb24, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { + int y; +#if defined(HAS_RGB24TOYJROW) + void (*RGB24ToUVJRow)(const uint8_t* src_rgb24, int src_stride_rgb24, + uint8_t* dst_u, uint8_t* dst_v, int width) = + RGB24ToUVJRow_C; + void (*RGB24ToYJRow)(const uint8_t* src_rgb24, uint8_t* dst_y, int width) = + RGB24ToYJRow_C; +#else + void (*RGB24ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = + RGB24ToARGBRow_C; + void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb, + uint8_t* dst_u, uint8_t* dst_v, int width) = + ARGBToUVJRow_C; + void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = + ARGBToYJRow_C; +#endif + if (!src_rgb24 || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24; + src_stride_rgb24 = -src_stride_rgb24; + } + +#if defined(HAS_RGB24TOYJROW) + +// Neon version does direct RGB24 to YUV. +#if defined(HAS_RGB24TOYJROW_NEON) && defined(HAS_RGB24TOUVJROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + RGB24ToUVJRow = RGB24ToUVJRow_Any_NEON; + RGB24ToYJRow = RGB24ToYJRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + RGB24ToYJRow = RGB24ToYJRow_NEON; + RGB24ToUVJRow = RGB24ToUVJRow_NEON; + } + } +#endif +#if defined(HAS_RGB24TOYJROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + RGB24ToYJRow = RGB24ToYJRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + RGB24ToYJRow = RGB24ToYJRow_LSX; + } + } +#endif +#if defined(HAS_RGB24TOYJROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + RGB24ToYJRow = RGB24ToYJRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + RGB24ToYJRow = RGB24ToYJRow_LASX; + } + } +#endif +#if defined(HAS_RGB24TOYJROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + RGB24ToYJRow = RGB24ToYJRow_RVV; + } +#endif + +// Other platforms do intermediate conversion from RGB24 to ARGB. +#else // HAS_RGB24TOYJROW + +#if defined(HAS_RGB24TOARGBROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + RGB24ToARGBRow = RGB24ToARGBRow_SSSE3; + } + } +#endif +#if defined(HAS_RGB24TOARGBROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + RGB24ToARGBRow = RGB24ToARGBRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + RGB24ToARGBRow = RGB24ToARGBRow_AVX2; + } + } +#endif +#if defined(HAS_RGB24TOARGBROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + RGB24ToARGBRow = RGB24ToARGBRow_Any_AVX512BW; + if (IS_ALIGNED(width, 64)) { + RGB24ToARGBRow = RGB24ToARGBRow_AVX512BW; + } + } +#endif +#if defined(HAS_RGB24TOARGBROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + RGB24ToARGBRow = RGB24ToARGBRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + RGB24ToARGBRow = RGB24ToARGBRow_NEON; + } + } +#endif +#if defined(HAS_RGB24TOARGBROW_SVE2) + if (TestCpuFlag(kCpuHasSVE2)) { + RGB24ToARGBRow = RGB24ToARGBRow_SVE2; + } +#endif +#if defined(HAS_RGB24TOARGBROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + RGB24ToARGBRow = RGB24ToARGBRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + RGB24ToARGBRow = RGB24ToARGBRow_LSX; + } + } +#endif +#if defined(HAS_RGB24TOARGBROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + RGB24ToARGBRow = RGB24ToARGBRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + RGB24ToARGBRow = RGB24ToARGBRow_LASX; + } + } +#endif +#if defined(HAS_RGB24TOARGBROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + RGB24ToARGBRow = RGB24ToARGBRow_RVV; + } +#endif +#if defined(HAS_ARGBTOYJROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToYJRow = ARGBToYJRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToYJRow = ARGBToYJRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOYJROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToYJRow = ARGBToYJRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToYJRow = ARGBToYJRow_AVX2; + } + } +#endif +#if defined(HAS_ARGBTOYROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + ARGBToYJRow = ARGBToYJRow_Any_AVX512BW; + if (IS_ALIGNED(width, 64)) { + ARGBToYJRow = ARGBToYJRow_AVX512BW; + } + } +#endif +#if defined(HAS_ARGBTOUVJROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToUVJRow = ARGBToUVJRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOUVJROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToUVJRow = ARGBToUVJRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToUVJRow = ARGBToUVJRow_AVX2; + } + } +#endif +#endif // HAS_RGB24TOYJROW + + { +#if !defined(HAS_RGB24TOYJROW) + // Allocate 2 rows of ARGB. + const int row_size = (width * 4 + 31) & ~31; + align_buffer_64(row, row_size * 2); + if (!row) + return 1; +#endif + + for (y = 0; y < height - 1; y += 2) { +#if defined(HAS_RGB24TOYJROW) + RGB24ToUVJRow(src_rgb24, src_stride_rgb24, dst_u, dst_v, width); + RGB24ToYJRow(src_rgb24, dst_y, width); + RGB24ToYJRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width); +#else + RGB24ToARGBRow(src_rgb24, row, width); + RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + row_size, width); + ARGBToUVJRow(row, row_size, dst_u, dst_v, width); + ARGBToYJRow(row, dst_y, width); + ARGBToYJRow(row + row_size, dst_y + dst_stride_y, width); +#endif + src_rgb24 += src_stride_rgb24 * 2; + dst_y += dst_stride_y * 2; + dst_u += dst_stride_u; + dst_v += dst_stride_v; + } + if (height & 1) { +#if defined(HAS_RGB24TOYJROW) + RGB24ToUVJRow(src_rgb24, 0, dst_u, dst_v, width); + RGB24ToYJRow(src_rgb24, dst_y, width); +#else + RGB24ToARGBRow(src_rgb24, row, width); + ARGBToUVJRow(row, 0, dst_u, dst_v, width); + ARGBToYJRow(row, dst_y, width); +#endif + } +#if !defined(HAS_RGB24TOYJROW) + free_aligned_buffer_64(row); +#endif + } + return 0; +} +#undef HAS_RGB24TOYJROW + +// Enabled if 1 pass is available +#if (defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_LSX) || \ + defined(HAS_RAWTOYROW_RVV)) +#define HAS_RAWTOYROW +#endif + +// Convert RAW to I420. +LIBYUV_API +int RAWToI420(const uint8_t* src_raw, int src_stride_raw, uint8_t* dst_y, int dst_stride_y, @@ -3601,8 +2948,87 @@ int RAWToI444(const uint8_t* src_raw, int dst_stride_v, int width, int height) { - void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, - int width) = RAWToARGBRow_C; + int y; + void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = + RAWToARGBRow_C; + void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb, + uint8_t* dst_u, uint8_t* dst_v, int width) = + ARGBToUVRow_C; + void (*ARGBToYMatrixRow)(const uint8_t* src_argb, uint8_t* dst_y, int width, + const struct ArgbConstants* c) = ARGBToYMatrixRow_C; +#if defined(HAS_ARGBTOYMATRIXROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_AVX2; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_AVX512BW; + if (IS_ALIGNED(width, 64)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_AVX512BW; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_NEON; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_NEON_DOTPROD) + if (TestCpuFlag(kCpuHasNeonDotProd)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_NEON_DotProd; + if (IS_ALIGNED(width, 16)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_NEON_DotProd; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_LSX; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_LASX; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_RVV; + } +#endif + + + if (!src_raw || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_raw = src_raw + (height - 1) * src_stride_raw; + src_stride_raw = -src_stride_raw; + } + #if defined(HAS_RAWTOARGBROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { RAWToARGBRow = RAWToARGBRow_Any_SSSE3; @@ -3661,10 +3087,507 @@ int RAWToI444(const uint8_t* src_raw, RAWToARGBRow = RAWToARGBRow_RVV; } #endif - return RGBToI444Matrix(src_raw, src_stride_raw, - dst_y, dst_stride_y, dst_u, dst_stride_u, - dst_v, dst_stride_v, width, height, - &kArgbI601Constants, RAWToARGBRow); +#if defined(HAS_ARGBTOUVROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToUVRow = ARGBToUVRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToUVRow = ARGBToUVRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOUVROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToUVRow = ARGBToUVRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToUVRow = ARGBToUVRow_AVX2; + } + } +#endif +#if defined(HAS_ARGBTOUVROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + ARGBToUVRow = ARGBToUVRow_Any_AVX512BW; + if (IS_ALIGNED(width, 64)) { + ARGBToUVRow = ARGBToUVRow_AVX512BW; + } + } +#endif + + { + // Allocate 2 rows of ARGB. + const int row_size = (width * 4 + 31) & ~31; + align_buffer_64(row, row_size * 2); + if (!row) + return 1; + + for (y = 0; y < height - 1; y += 2) { + RAWToARGBRow(src_raw, row, width); + RAWToARGBRow(src_raw + src_stride_raw, row + row_size, width); + ARGBToUVRow(row, row_size, dst_u, dst_v, width); + ARGBToYMatrixRow(row, dst_y, width, &kArgbI601Constants); + ARGBToYMatrixRow(row + row_size, dst_y + dst_stride_y, width, &kArgbI601Constants); + src_raw += src_stride_raw * 2; + dst_y += dst_stride_y * 2; + dst_u += dst_stride_u; + dst_v += dst_stride_v; + } + if (height & 1) { + RAWToARGBRow(src_raw, row, width); + ARGBToUVRow(row, 0, dst_u, dst_v, width); + ARGBToYMatrixRow(row, dst_y, width, &kArgbI601Constants); + } + free_aligned_buffer_64(row); + } + return 0; +} +#undef HAS_RAWTOYROW + +// Enabled if 1 pass is available +#if defined(HAS_RAWTOYJROW_NEON) || defined(HAS_RAWTOYJROW_RVV) +#define HAS_RAWTOYJROW +#endif + +// Convert RAW to J420. +LIBYUV_API +int RAWToJ420(const uint8_t* src_raw, + int src_stride_raw, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { + int y; +#if defined(HAS_RAWTOYJROW) + void (*RAWToUVJRow)(const uint8_t* src_raw, int src_stride_raw, + uint8_t* dst_u, uint8_t* dst_v, int width) = + RAWToUVJRow_C; + void (*RAWToYJRow)(const uint8_t* src_raw, uint8_t* dst_y, int width) = + RAWToYJRow_C; +#else + void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = + RAWToARGBRow_C; + void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb, + uint8_t* dst_u, uint8_t* dst_v, int width) = + ARGBToUVJRow_C; + void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = + ARGBToYJRow_C; +#endif + if (!src_raw || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_raw = src_raw + (height - 1) * src_stride_raw; + src_stride_raw = -src_stride_raw; + } + +#if defined(HAS_RAWTOYJROW) + +// Neon version does direct RAW to YUV. +#if defined(HAS_RAWTOYJROW_NEON) && defined(HAS_RAWTOUVJROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + RAWToUVJRow = RAWToUVJRow_Any_NEON; + RAWToYJRow = RAWToYJRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + RAWToYJRow = RAWToYJRow_NEON; + RAWToUVJRow = RAWToUVJRow_NEON; + } + } +#endif +#if defined(HAS_RAWTOYJROW_LSX) && defined(HAS_RAWTOUVJROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + RAWToUVJRow = RAWToUVJRow_Any_LSX; + RAWToYJRow = RAWToYJRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + RAWToYJRow = RAWToYJRow_LSX; + RAWToUVJRow = RAWToUVJRow_LSX; + } + } +#endif +#if defined(HAS_RAWTOYJROW_LASX) && defined(HAS_RAWTOUVJROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + RAWToUVJRow = RAWToUVJRow_Any_LASX; + RAWToYJRow = RAWToYJRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + RAWToYJRow = RAWToYJRow_LASX; + RAWToUVJRow = RAWToUVJRow_LASX; + } + } +#endif +#if defined(HAS_RAWTOYJROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + RAWToYJRow = RAWToYJRow_RVV; + } +#endif + +// Other platforms do intermediate conversion from RAW to ARGB. +#else // HAS_RAWTOYJROW + +#if defined(HAS_RAWTOARGBROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + RAWToARGBRow = RAWToARGBRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + RAWToARGBRow = RAWToARGBRow_SSSE3; + } + } +#endif +#if defined(HAS_RAWTOARGBROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + RAWToARGBRow = RAWToARGBRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + RAWToARGBRow = RAWToARGBRow_AVX2; + } + } +#endif +#if defined(HAS_RAWTOARGBROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + RAWToARGBRow = RAWToARGBRow_Any_AVX512BW; + if (IS_ALIGNED(width, 64)) { + RAWToARGBRow = RAWToARGBRow_AVX512BW; + } + } +#endif +#if defined(HAS_RAWTOARGBROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + RAWToARGBRow = RAWToARGBRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + RAWToARGBRow = RAWToARGBRow_NEON; + } + } +#endif +#if defined(HAS_RAWTOARGBROW_SVE2) + if (TestCpuFlag(kCpuHasSVE2)) { + RAWToARGBRow = RAWToARGBRow_SVE2; + } +#endif +#if defined(HAS_RAWTOARGBROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + RAWToARGBRow = RAWToARGBRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + RAWToARGBRow = RAWToARGBRow_LSX; + } + } +#endif +#if defined(HAS_RAWTOARGBROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + RAWToARGBRow = RAWToARGBRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + RAWToARGBRow = RAWToARGBRow_LASX; + } + } +#endif +#if defined(HAS_RAWTOARGBROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + RAWToARGBRow = RAWToARGBRow_RVV; + } +#endif +#if defined(HAS_ARGBTOYJROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToYJRow = ARGBToYJRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToYJRow = ARGBToYJRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOYJROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToYJRow = ARGBToYJRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToYJRow = ARGBToYJRow_AVX2; + } + } +#endif +#if defined(HAS_ARGBTOUVJROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToUVJRow = ARGBToUVJRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOUVJROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToUVJRow = ARGBToUVJRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToUVJRow = ARGBToUVJRow_AVX2; + } + } +#endif +#endif // HAS_RAWTOYJROW + + { +#if !defined(HAS_RAWTOYJROW) + // Allocate 2 rows of ARGB. + const int row_size = (width * 4 + 31) & ~31; + align_buffer_64(row, row_size * 2); + if (!row) + return 1; +#endif + + for (y = 0; y < height - 1; y += 2) { +#if defined(HAS_RAWTOYJROW) + RAWToUVJRow(src_raw, src_stride_raw, dst_u, dst_v, width); + RAWToYJRow(src_raw, dst_y, width); + RAWToYJRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width); +#else + RAWToARGBRow(src_raw, row, width); + RAWToARGBRow(src_raw + src_stride_raw, row + row_size, width); + ARGBToUVJRow(row, row_size, dst_u, dst_v, width); + ARGBToYJRow(row, dst_y, width); + ARGBToYJRow(row + row_size, dst_y + dst_stride_y, width); +#endif + src_raw += src_stride_raw * 2; + dst_y += dst_stride_y * 2; + dst_u += dst_stride_u; + dst_v += dst_stride_v; + } + if (height & 1) { +#if defined(HAS_RAWTOYJROW) + RAWToUVJRow(src_raw, 0, dst_u, dst_v, width); + RAWToYJRow(src_raw, dst_y, width); +#else + RAWToARGBRow(src_raw, row, width); + ARGBToUVJRow(row, 0, dst_u, dst_v, width); + ARGBToYJRow(row, dst_y, width); +#endif + } +#if !defined(HAS_RAWTOYJROW) + free_aligned_buffer_64(row); +#endif + } + return 0; +} +#undef HAS_RAWTOYJROW + +// RAW big endian (rgb in memory) to I444 +// 2 step conversion of RAWToARGB then ARGBToY and ARGBToUV444 +LIBYUV_API +int RAWToI444(const uint8_t* src_raw, + int src_stride_raw, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { + int y; + void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = + RAWToARGBRow_C; + void (*ARGBToYRow)(const uint8_t* src_raw, uint8_t* dst_y, int width) = + ARGBToYRow_C; + void (*ARGBToUV444Row)(const uint8_t* src_raw, uint8_t* dst_u, uint8_t* dst_v, + int width) = ARGBToUV444Row_C; + if (!src_raw || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { + return -1; + } + if (height < 0) { + height = -height; + src_raw = src_raw + (height - 1) * src_stride_raw; + src_stride_raw = -src_stride_raw; + } + // TODO: add row coalesce when main loop handles large width in blocks + // TODO: implement UV444 or trim the ifdef below +#if defined(HAS_ARGBTOUV444ROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToUV444Row = ARGBToUV444Row_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToUV444Row = ARGBToUV444Row_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOUV444ROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToUV444Row = ARGBToUV444Row_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToUV444Row = ARGBToUV444Row_AVX2; + } + } +#endif +#if defined(HAS_ARGBTOUV444ROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + ARGBToUV444Row = ARGBToUV444Row_Any_AVX512BW; + if (IS_ALIGNED(width, 64)) { + ARGBToUV444Row = ARGBToUV444Row_AVX512BW; + } + } +#endif +#if defined(HAS_ARGBTOUV444ROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBToUV444Row = ARGBToUV444Row_Any_NEON; + if (IS_ALIGNED(width, 8)) { + ARGBToUV444Row = ARGBToUV444Row_NEON; + } + } +#endif +#if defined(HAS_ARGBTOUV444ROW_NEON_I8MM) + if (TestCpuFlag(kCpuHasNeonI8MM)) { + ARGBToUV444Row = ARGBToUV444Row_Any_NEON_I8MM; + if (IS_ALIGNED(width, 8)) { + ARGBToUV444Row = ARGBToUV444Row_NEON_I8MM; + } + } +#endif +#if defined(HAS_ARGBTOUV444ROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToUV444Row = ARGBToUV444Row_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToUV444Row = ARGBToUV444Row_LSX; + } + } +#endif +#if defined(HAS_ARGBTOUV444ROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ARGBToUV444Row = ARGBToUV444Row_Any_LASX; + if (IS_ALIGNED(width, 32)) { + ARGBToUV444Row = ARGBToUV444Row_LASX; + } + } +#endif +#if defined(HAS_ARGBTOYROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToYRow = ARGBToYRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOYROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToYRow = ARGBToYRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToYRow = ARGBToYRow_AVX2; + } + } +#endif +#if defined(HAS_ARGBTOYROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + ARGBToYRow = ARGBToYRow_Any_AVX512BW; + if (IS_ALIGNED(width, 64)) { + ARGBToYRow = ARGBToYRow_AVX512BW; + } + } +#endif +#if defined(HAS_ARGBTOYROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBToYRow = ARGBToYRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_NEON; + } + } +#endif +#if defined(HAS_ARGBTOYROW_NEON_DOTPROD) + if (TestCpuFlag(kCpuHasNeonDotProd)) { + ARGBToYRow = ARGBToYRow_Any_NEON_DotProd; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_NEON_DotProd; + } + } +#endif +#if defined(HAS_ARGBTOYROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToYRow = ARGBToYRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_LSX; + } + } +#endif +#if defined(HAS_ARGBTOYROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ARGBToYRow = ARGBToYRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + ARGBToYRow = ARGBToYRow_LASX; + } + } +#endif +#if defined(HAS_ARGBTOYROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBToYRow = ARGBToYRow_RVV; + } +#endif + +#if defined(HAS_RAWTOARGBROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + RAWToARGBRow = RAWToARGBRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + RAWToARGBRow = RAWToARGBRow_SSSE3; + } + } +#endif +#if defined(HAS_RAWTOARGBROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + RAWToARGBRow = RAWToARGBRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + RAWToARGBRow = RAWToARGBRow_AVX2; + } + } +#endif +#if defined(HAS_RAWTOARGBROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + RAWToARGBRow = RAWToARGBRow_Any_AVX512BW; + if (IS_ALIGNED(width, 64)) { + RAWToARGBRow = RAWToARGBRow_AVX512BW; + } + } +#endif +#if defined(HAS_RAWTOARGBROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + RAWToARGBRow = RAWToARGBRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + RAWToARGBRow = RAWToARGBRow_NEON; + } + } +#endif +#if defined(HAS_RAWTOARGBROW_SVE2) + if (TestCpuFlag(kCpuHasSVE2)) { + RAWToARGBRow = RAWToARGBRow_SVE2; + } +#endif +#if defined(HAS_RAWTOARGBROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + RAWToARGBRow = RAWToARGBRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + RAWToARGBRow = RAWToARGBRow_LSX; + } + } +#endif +#if defined(HAS_RAWTOARGBROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + RAWToARGBRow = RAWToARGBRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + RAWToARGBRow = RAWToARGBRow_LASX; + } + } +#endif +#if defined(HAS_RAWTOARGBROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + RAWToARGBRow = RAWToARGBRow_RVV; + } +#endif + + { + // Allocate a row of ARGB. + const int row_size = width * 4; + align_buffer_64(row, row_size); + if (!row) + return 1; + + for (y = 0; y < height; ++y) { + RAWToARGBRow(src_raw, row, width); + ARGBToUV444Row(row, dst_u, dst_v, width); + ARGBToYRow(row, dst_y, width); + src_raw += src_stride_raw; + dst_y += dst_stride_y; + dst_u += dst_stride_u; + dst_v += dst_stride_v; + } + free_aligned_buffer_64(row); + } + return 0; } // RAW big endian (rgb in memory) to J444 @@ -3680,8 +3603,132 @@ int RAWToJ444(const uint8_t* src_raw, int dst_stride_v, int width, int height) { - void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, - int width) = RAWToARGBRow_C; + int y; + void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = + RAWToARGBRow_C; + void (*ARGBToYJRow)(const uint8_t* src_raw, uint8_t* dst_y, int width) = + ARGBToYJRow_C; + void (*ARGBToUVJ444Row)(const uint8_t* src_raw, uint8_t* dst_u, + uint8_t* dst_v, int width) = ARGBToUVJ444Row_C; + if (!src_raw || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { + return -1; + } + if (height < 0) { + height = -height; + src_raw = src_raw + (height - 1) * src_stride_raw; + src_stride_raw = -src_stride_raw; + } + // TODO: add row coalesce when main loop handles large width in blocks +#if defined(HAS_ARGBTOUVJ444ROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToUVJ444Row = ARGBToUVJ444Row_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToUVJ444Row = ARGBToUVJ444Row_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOUVJ444ROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToUVJ444Row = ARGBToUVJ444Row_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToUVJ444Row = ARGBToUVJ444Row_AVX2; + } + } +#endif +#if defined(HAS_ARGBTOUVJ444ROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + ARGBToUVJ444Row = ARGBToUVJ444Row_Any_AVX512BW; + if (IS_ALIGNED(width, 64)) { + ARGBToUVJ444Row = ARGBToUVJ444Row_AVX512BW; + } + } +#endif +#if defined(HAS_ARGBTOUVJ444ROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBToUVJ444Row = ARGBToUVJ444Row_Any_NEON; + if (IS_ALIGNED(width, 8)) { + ARGBToUVJ444Row = ARGBToUVJ444Row_NEON; + } + } +#endif +#if defined(HAS_ARGBTOUVJ444ROW_NEON_I8MM) + if (TestCpuFlag(kCpuHasNeonI8MM)) { + ARGBToUVJ444Row = ARGBToUVJ444Row_Any_NEON_I8MM; + if (IS_ALIGNED(width, 8)) { + ARGBToUVJ444Row = ARGBToUVJ444Row_NEON_I8MM; + } + } +#endif +#if defined(HAS_ARGBTOUVJ444ROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToUVJ444Row = ARGBToUVJ444Row_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToUVJ444Row = ARGBToUVJ444Row_LSX; + } + } +#endif +#if defined(HAS_ARGBTOUVJ444ROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ARGBToUVJ444Row = ARGBToUVJ444Row_Any_LASX; + if (IS_ALIGNED(width, 32)) { + ARGBToUVJ444Row = ARGBToUVJ444Row_LASX; + } + } +#endif +#if defined(HAS_ARGBTOYJROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToYJRow = ARGBToYJRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToYJRow = ARGBToYJRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOYJROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToYJRow = ARGBToYJRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToYJRow = ARGBToYJRow_AVX2; + } + } +#endif +#if defined(HAS_ARGBTOYJROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBToYJRow = ARGBToYJRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ARGBToYJRow = ARGBToYJRow_NEON; + } + } +#endif +#if defined(HAS_ARGBTOYJROW_NEON_DOTPROD) + if (TestCpuFlag(kCpuHasNeonDotProd)) { + ARGBToYJRow = ARGBToYJRow_Any_NEON_DotProd; + if (IS_ALIGNED(width, 16)) { + ARGBToYJRow = ARGBToYJRow_NEON_DotProd; + } + } +#endif +#if defined(HAS_ARGBTOYJROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToYJRow = ARGBToYJRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToYJRow = ARGBToYJRow_LSX; + } + } +#endif +#if defined(HAS_ARGBTOYJROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ARGBToYJRow = ARGBToYJRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + ARGBToYJRow = ARGBToYJRow_LASX; + } + } +#endif +#if defined(HAS_ARGBTOYJROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBToYJRow = ARGBToYJRow_RVV; + } +#endif + #if defined(HAS_RAWTOARGBROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { RAWToARGBRow = RAWToARGBRow_Any_SSSE3; @@ -3740,10 +3787,26 @@ int RAWToJ444(const uint8_t* src_raw, RAWToARGBRow = RAWToARGBRow_RVV; } #endif - return RGBToI444Matrix(src_raw, src_stride_raw, - dst_y, dst_stride_y, dst_u, dst_stride_u, - dst_v, dst_stride_v, width, height, - &kArgbJPEGConstants, RAWToARGBRow); + + { + // Allocate a row of ARGB. + const int row_size = width * 4; + align_buffer_64(row, row_size); + if (!row) + return 1; + + for (y = 0; y < height; ++y) { + RAWToARGBRow(src_raw, row, width); + ARGBToUVJ444Row(row, dst_u, dst_v, width); + ARGBToYJRow(row, dst_y, width); + src_raw += src_stride_raw; + dst_y += dst_stride_y; + dst_u += dst_stride_u; + dst_v += dst_stride_v; + } + free_aligned_buffer_64(row); + } + return 0; } // Convert RGB565 to I420. @@ -3758,8 +3821,73 @@ int RGB565ToI420(const uint8_t* src_rgb565, int dst_stride_v, int width, int height) { + int y; +#if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_LSX) || \ + defined(HAS_RGB565TOYROW_LASX)) + void (*RGB565ToUVRow)(const uint8_t* src_rgb565, int src_stride_rgb565, + uint8_t* dst_u, uint8_t* dst_v, int width) = + RGB565ToUVRow_C; + void (*RGB565ToYRow)(const uint8_t* src_rgb565, uint8_t* dst_y, int width) = + RGB565ToYRow_C; +#else void (*RGB565ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = RGB565ToARGBRow_C; + void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb, + uint8_t* dst_u, uint8_t* dst_v, int width) = + ARGBToUVRow_C; + void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = + ARGBToYRow_C; +#endif + if (!src_rgb565 || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_rgb565 = src_rgb565 + (height - 1) * src_stride_rgb565; + src_stride_rgb565 = -src_stride_rgb565; + } + +// Neon version does direct RGB565 to YUV. +#if defined(HAS_RGB565TOYROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + RGB565ToYRow = RGB565ToYRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + RGB565ToYRow = RGB565ToYRow_NEON; + } + } +#endif +// Neon version does direct RGB565 to YUV. +#if defined(HAS_RGB565TOUVROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + RGB565ToUVRow = RGB565ToUVRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + RGB565ToUVRow = RGB565ToUVRow_NEON; + } + } +#endif +// LSX version does direct RGB565 to YUV. +#if defined(HAS_RGB565TOYROW_LSX) && defined(HAS_RGB565TOUVROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + RGB565ToUVRow = RGB565ToUVRow_Any_LSX; + RGB565ToYRow = RGB565ToYRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + RGB565ToYRow = RGB565ToYRow_LSX; + RGB565ToUVRow = RGB565ToUVRow_LSX; + } + } +#endif +#if defined(HAS_RGB565TOYROW_LASX) && defined(HAS_RGB565TOUVROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + RGB565ToUVRow = RGB565ToUVRow_Any_LASX; + RGB565ToYRow = RGB565ToYRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + RGB565ToYRow = RGB565ToYRow_LASX; + RGB565ToUVRow = RGB565ToUVRow_LASX; + } + } +#endif +// Other platforms do intermediate conversion from RGB565 to ARGB. #if defined(HAS_RGB565TOARGBROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { RGB565ToARGBRow = RGB565ToARGBRow_Any_SSE2; @@ -3776,10 +3904,98 @@ int RGB565ToI420(const uint8_t* src_rgb565, } } #endif - return RGBToI420Matrix(src_rgb565, src_stride_rgb565, - dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, - dst_stride_v, width, height, - &kArgbI601Constants, RGB565ToARGBRow); +#if defined(HAS_ARGBTOYROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToYRow = ARGBToYRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOYROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToYRow = ARGBToYRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToYRow = ARGBToYRow_AVX2; + } + } +#endif +#if defined(HAS_ARGBTOYROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + ARGBToYRow = ARGBToYRow_Any_AVX512BW; + if (IS_ALIGNED(width, 64)) { + ARGBToYRow = ARGBToYRow_AVX512BW; + } + } +#endif +#if defined(HAS_ARGBTOUVROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToUVRow = ARGBToUVRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToUVRow = ARGBToUVRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOUVROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToUVRow = ARGBToUVRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToUVRow = ARGBToUVRow_AVX2; + } + } +#endif +#if defined(HAS_ARGBTOUVROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + ARGBToUVRow = ARGBToUVRow_Any_AVX512BW; + if (IS_ALIGNED(width, 64)) { + ARGBToUVRow = ARGBToUVRow_AVX512BW; + } + } +#endif + { +#if !(defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_LSX) || \ + defined(HAS_RGB565TOYROW_LASX)) + // Allocate 2 rows of ARGB. + const int row_size = (width * 4 + 31) & ~31; + align_buffer_64(row, row_size * 2); + if (!row) + return 1; +#endif + for (y = 0; y < height - 1; y += 2) { +#if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_LSX) || \ + defined(HAS_RGB565TOYROW_LASX)) + RGB565ToUVRow(src_rgb565, src_stride_rgb565, dst_u, dst_v, width); + RGB565ToYRow(src_rgb565, dst_y, width); + RGB565ToYRow(src_rgb565 + src_stride_rgb565, dst_y + dst_stride_y, width); +#else + RGB565ToARGBRow(src_rgb565, row, width); + RGB565ToARGBRow(src_rgb565 + src_stride_rgb565, row + row_size, width); + ARGBToUVRow(row, row_size, dst_u, dst_v, width); + ARGBToYRow(row, dst_y, width); + ARGBToYRow(row + row_size, dst_y + dst_stride_y, width); +#endif + src_rgb565 += src_stride_rgb565 * 2; + dst_y += dst_stride_y * 2; + dst_u += dst_stride_u; + dst_v += dst_stride_v; + } + if (height & 1) { +#if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_LSX) || \ + defined(HAS_RGB565TOYROW_LASX)) + RGB565ToUVRow(src_rgb565, 0, dst_u, dst_v, width); + RGB565ToYRow(src_rgb565, dst_y, width); +#else + RGB565ToARGBRow(src_rgb565, row, width); + ARGBToUVRow(row, 0, dst_u, dst_v, width); + ARGBToYRow(row, dst_y, width); +#endif + } +#if !(defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_LSX) || \ + defined(HAS_RGB565TOYROW_LASX)) + free_aligned_buffer_64(row); +#endif + } + return 0; } // Convert ARGB1555 to I420. @@ -3794,8 +4010,75 @@ int ARGB1555ToI420(const uint8_t* src_argb1555, int dst_stride_v, int width, int height) { + int y; +#if (defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_LSX) || \ + defined(HAS_ARGB1555TOYROW_LASX)) + void (*ARGB1555ToUVRow)(const uint8_t* src_argb1555, int src_stride_argb1555, + uint8_t* dst_u, uint8_t* dst_v, int width) = + ARGB1555ToUVRow_C; + void (*ARGB1555ToYRow)(const uint8_t* src_argb1555, uint8_t* dst_y, + int width) = ARGB1555ToYRow_C; +#else void (*ARGB1555ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, - int width) = ARGB1555ToARGBRow_C; + int width) = ARGB1555ToARGBRow_C; + void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb, + uint8_t* dst_u, uint8_t* dst_v, int width) = + ARGBToUVRow_C; + void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = + ARGBToYRow_C; +#endif + if (!src_argb1555 || !dst_y || !dst_u || !dst_v || width <= 0 || + height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_argb1555 = src_argb1555 + (height - 1) * src_stride_argb1555; + src_stride_argb1555 = -src_stride_argb1555; + } + +// Neon version does direct ARGB1555 to YUV. +#if defined(HAS_ARGB1555TOYROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGB1555ToYRow = ARGB1555ToYRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + ARGB1555ToYRow = ARGB1555ToYRow_NEON; + } + } +#endif +#if defined(HAS_ARGB1555TOUVROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGB1555ToUVRow = ARGB1555ToUVRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ARGB1555ToUVRow = ARGB1555ToUVRow_NEON; + } + } +#endif + +// LSX version does direct ARGB1555 to YUV. +#if defined(HAS_ARGB1555TOYROW_LSX) && defined(HAS_ARGB1555TOUVROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGB1555ToUVRow = ARGB1555ToUVRow_Any_LSX; + ARGB1555ToYRow = ARGB1555ToYRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGB1555ToYRow = ARGB1555ToYRow_LSX; + ARGB1555ToUVRow = ARGB1555ToUVRow_LSX; + } + } +#endif +#if defined(HAS_ARGB1555TOYROW_LASX) && defined(HAS_ARGB1555TOUVROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ARGB1555ToUVRow = ARGB1555ToUVRow_Any_LASX; + ARGB1555ToYRow = ARGB1555ToYRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + ARGB1555ToYRow = ARGB1555ToYRow_LASX; + ARGB1555ToUVRow = ARGB1555ToUVRow_LASX; + } + } +#endif + +// Other platforms do intermediate conversion from ARGB1555 to ARGB. #if defined(HAS_ARGB1555TOARGBROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_SSE2; @@ -3812,10 +4095,101 @@ int ARGB1555ToI420(const uint8_t* src_argb1555, } } #endif - return RGBToI420Matrix(src_argb1555, src_stride_argb1555, - dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, - dst_stride_v, width, height, - &kArgbI601Constants, ARGB1555ToARGBRow); +#if defined(HAS_ARGBTOYROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToYRow = ARGBToYRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOUVROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToUVRow = ARGBToUVRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToUVRow = ARGBToUVRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOYROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToYRow = ARGBToYRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToYRow = ARGBToYRow_AVX2; + } + } +#endif +#if defined(HAS_ARGBTOYROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + ARGBToYRow = ARGBToYRow_Any_AVX512BW; + if (IS_ALIGNED(width, 64)) { + ARGBToYRow = ARGBToYRow_AVX512BW; + } + } +#endif +#if defined(HAS_ARGBTOUVROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToUVRow = ARGBToUVRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToUVRow = ARGBToUVRow_AVX2; + } + } +#endif +#if defined(HAS_ARGBTOUVROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + ARGBToUVRow = ARGBToUVRow_Any_AVX512BW; + if (IS_ALIGNED(width, 64)) { + ARGBToUVRow = ARGBToUVRow_AVX512BW; + } + } +#endif + { +#if !(defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_LSX) || \ + defined(HAS_ARGB1555TOYROW_LASX)) + // Allocate 2 rows of ARGB. + const int row_size = (width * 4 + 31) & ~31; + align_buffer_64(row, row_size * 2); + if (!row) + return 1; +#endif + + for (y = 0; y < height - 1; y += 2) { +#if (defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_LSX) || \ + defined(HAS_ARGB1555TOYROW_LASX)) + ARGB1555ToUVRow(src_argb1555, src_stride_argb1555, dst_u, dst_v, width); + ARGB1555ToYRow(src_argb1555, dst_y, width); + ARGB1555ToYRow(src_argb1555 + src_stride_argb1555, dst_y + dst_stride_y, + width); +#else + ARGB1555ToARGBRow(src_argb1555, row, width); + ARGB1555ToARGBRow(src_argb1555 + src_stride_argb1555, row + row_size, + width); + ARGBToUVRow(row, row_size, dst_u, dst_v, width); + ARGBToYRow(row, dst_y, width); + ARGBToYRow(row + row_size, dst_y + dst_stride_y, width); +#endif + src_argb1555 += src_stride_argb1555 * 2; + dst_y += dst_stride_y * 2; + dst_u += dst_stride_u; + dst_v += dst_stride_v; + } + if (height & 1) { +#if (defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_LSX) || \ + defined(HAS_ARGB1555TOYROW_LASX)) + ARGB1555ToUVRow(src_argb1555, 0, dst_u, dst_v, width); + ARGB1555ToYRow(src_argb1555, dst_y, width); +#else + ARGB1555ToARGBRow(src_argb1555, row, width); + ARGBToUVRow(row, 0, dst_u, dst_v, width); + ARGBToYRow(row, dst_y, width); +#endif + } +#if !(defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_LSX) || \ + defined(HAS_ARGB1555TOYROW_LASX)) + free_aligned_buffer_64(row); +#endif + } + return 0; } // Convert ARGB4444 to I420. @@ -3830,8 +4204,50 @@ int ARGB4444ToI420(const uint8_t* src_argb4444, int dst_stride_v, int width, int height) { + int y; +#if defined(HAS_ARGB4444TOYROW_NEON) + void (*ARGB4444ToUVRow)(const uint8_t* src_argb4444, int src_stride_argb4444, + uint8_t* dst_u, uint8_t* dst_v, int width) = + ARGB4444ToUVRow_C; + void (*ARGB4444ToYRow)(const uint8_t* src_argb4444, uint8_t* dst_y, + int width) = ARGB4444ToYRow_C; +#else void (*ARGB4444ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, - int width) = ARGB4444ToARGBRow_C; + int width) = ARGB4444ToARGBRow_C; + void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb, + uint8_t* dst_u, uint8_t* dst_v, int width) = + ARGBToUVRow_C; + void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = + ARGBToYRow_C; +#endif + if (!src_argb4444 || !dst_y || !dst_u || !dst_v || width <= 0 || + height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_argb4444 = src_argb4444 + (height - 1) * src_stride_argb4444; + src_stride_argb4444 = -src_stride_argb4444; + } + +// Neon version does direct ARGB4444 to YUV. +#if defined(HAS_ARGB4444TOYROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGB4444ToYRow = ARGB4444ToYRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + ARGB4444ToYRow = ARGB4444ToYRow_NEON; + } + } +#endif +#if defined(HAS_ARGB4444TOUVROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGB4444ToUVRow = ARGB4444ToUVRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ARGB4444ToUVRow = ARGB4444ToUVRow_NEON; + } + } +#endif #if defined(HAS_ARGB4444TOARGBROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_SSE2; @@ -3864,28 +4280,141 @@ int ARGB4444ToI420(const uint8_t* src_argb4444, } } #endif - return RGBToI420Matrix(src_argb4444, src_stride_argb4444, - dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, - dst_stride_v, width, height, - &kArgbI601Constants, ARGB4444ToARGBRow); +#if defined(HAS_ARGBTOYROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToYRow = ARGBToYRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOUVROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToUVRow = ARGBToUVRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToUVRow = ARGBToUVRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOYROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToYRow = ARGBToYRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToYRow = ARGBToYRow_AVX2; + } + } +#endif +#if defined(HAS_ARGBTOYROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + ARGBToYRow = ARGBToYRow_Any_AVX512BW; + if (IS_ALIGNED(width, 64)) { + ARGBToYRow = ARGBToYRow_AVX512BW; + } + } +#endif +#if defined(HAS_ARGBTOUVROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToUVRow = ARGBToUVRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToUVRow = ARGBToUVRow_AVX2; + } + } +#endif +#if defined(HAS_ARGBTOUVROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + ARGBToUVRow = ARGBToUVRow_Any_AVX512BW; + if (IS_ALIGNED(width, 64)) { + ARGBToUVRow = ARGBToUVRow_AVX512BW; + } + } +#endif +#if defined(HAS_ARGBTOYROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToYRow = ARGBToYRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_LSX; + } + } +#endif +#if defined(HAS_ARGBTOYROW_LSX) && defined(HAS_ARGBTOUVROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToYRow = ARGBToYRow_Any_LSX; + ARGBToUVRow = ARGBToUVRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_LSX; + ARGBToUVRow = ARGBToUVRow_LSX; + } + } +#endif +#if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ARGBToYRow = ARGBToYRow_Any_LASX; + ARGBToUVRow = ARGBToUVRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + ARGBToYRow = ARGBToYRow_LASX; + ARGBToUVRow = ARGBToUVRow_LASX; + } + } +#endif + + { +#if !(defined(HAS_ARGB4444TOYROW_NEON)) + // Allocate 2 rows of ARGB. + const int row_size = (width * 4 + 31) & ~31; + align_buffer_64(row, row_size * 2); + if (!row) + return 1; +#endif + + for (y = 0; y < height - 1; y += 2) { +#if defined(HAS_ARGB4444TOYROW_NEON) + ARGB4444ToUVRow(src_argb4444, src_stride_argb4444, dst_u, dst_v, width); + ARGB4444ToYRow(src_argb4444, dst_y, width); + ARGB4444ToYRow(src_argb4444 + src_stride_argb4444, dst_y + dst_stride_y, + width); +#else + ARGB4444ToARGBRow(src_argb4444, row, width); + ARGB4444ToARGBRow(src_argb4444 + src_stride_argb4444, row + row_size, + width); + ARGBToUVRow(row, row_size, dst_u, dst_v, width); + ARGBToYRow(row, dst_y, width); + ARGBToYRow(row + row_size, dst_y + dst_stride_y, width); +#endif + src_argb4444 += src_stride_argb4444 * 2; + dst_y += dst_stride_y * 2; + dst_u += dst_stride_u; + dst_v += dst_stride_v; + } + if (height & 1) { +#if defined(HAS_ARGB4444TOYROW_NEON) + ARGB4444ToUVRow(src_argb4444, 0, dst_u, dst_v, width); + ARGB4444ToYRow(src_argb4444, dst_y, width); +#else + ARGB4444ToARGBRow(src_argb4444, row, width); + ARGBToUVRow(row, 0, dst_u, dst_v, width); + ARGBToYRow(row, dst_y, width); +#endif + } +#if !(defined(HAS_ARGB4444TOYROW_NEON)) + free_aligned_buffer_64(row); +#endif + } + return 0; } - - -static int RGBToI400Matrix(const uint8_t* src_rgb, - int src_stride_rgb, - uint8_t* dst_y, - int dst_stride_y, - int width, - int height, - const struct ArgbConstants* argbconstants, - void (*RGBToARGBRow)(const uint8_t* src_rgb, - uint8_t* dst_argb, - int width)) { +// Convert RGB24 to J400. +LIBYUV_API +int RGB24ToJ400(const uint8_t* src_rgb24, + int src_stride_rgb24, + uint8_t* dst_yj, + int dst_stride_yj, + int width, + int height) { int y; + void (*RGB24ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = + RGB24ToARGBRow_C; void (*ARGBToYMatrixRow)(const uint8_t* src_argb, uint8_t* dst_y, int width, const struct ArgbConstants* c) = ARGBToYMatrixRow_C; - #if defined(HAS_ARGBTOYMATRIXROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBToYMatrixRow = ARGBToYMatrixRow_Any_SSSE3; @@ -3948,17 +4477,79 @@ static int RGBToI400Matrix(const uint8_t* src_rgb, } #endif - if (!src_rgb || !dst_y || width <= 0 || height == 0) { + if (!src_rgb24 || !dst_yj || width <= 0 || height == 0) { return -1; } - // Negative height means invert the image. if (height < 0) { height = -height; - src_rgb = src_rgb + (height - 1) * src_stride_rgb; - src_stride_rgb = -src_stride_rgb; + src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24; + src_stride_rgb24 = -src_stride_rgb24; } - - { + // Coalesce rows. + if (src_stride_rgb24 == width * 3 && dst_stride_yj == width) { + width *= height; + height = 1; + src_stride_rgb24 = dst_stride_yj = 0; + } +#if defined(HAS_RGB24TOARGBROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + RGB24ToARGBRow = RGB24ToARGBRow_SSSE3; + } + } +#endif +#if defined(HAS_RGB24TOARGBROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + RGB24ToARGBRow = RGB24ToARGBRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + RGB24ToARGBRow = RGB24ToARGBRow_AVX2; + } + } +#endif +#if defined(HAS_RGB24TOARGBROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + RGB24ToARGBRow = RGB24ToARGBRow_Any_AVX512BW; + if (IS_ALIGNED(width, 64)) { + RGB24ToARGBRow = RGB24ToARGBRow_AVX512BW; + } + } +#endif +#if defined(HAS_RGB24TOARGBROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + RGB24ToARGBRow = RGB24ToARGBRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + RGB24ToARGBRow = RGB24ToARGBRow_NEON; + } + } +#endif +#if defined(HAS_RGB24TOARGBROW_SVE2) + if (TestCpuFlag(kCpuHasSVE2)) { + RGB24ToARGBRow = RGB24ToARGBRow_SVE2; + } +#endif +#if defined(HAS_RGB24TOARGBROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + RGB24ToARGBRow = RGB24ToARGBRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + RGB24ToARGBRow = RGB24ToARGBRow_LSX; + } + } +#endif +#if defined(HAS_RGB24TOARGBROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + RGB24ToARGBRow = RGB24ToARGBRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + RGB24ToARGBRow = RGB24ToARGBRow_LASX; + } + } +#endif +#if defined(HAS_RGB24TOARGBROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + RGB24ToARGBRow = RGB24ToARGBRow_RVV; + } +#endif +{ // Allocate 1 row of ARGB. const int row_size = (width * 4 + 31) & ~31; align_buffer_64(row, row_size); @@ -3966,10 +4557,10 @@ static int RGBToI400Matrix(const uint8_t* src_rgb, return 1; for (y = 0; y < height; ++y) { - RGBToARGBRow(src_rgb, row, width); - ARGBToYMatrixRow(row, dst_y, width, argbconstants); - src_rgb += src_stride_rgb; - dst_y += dst_stride_y; + RGB24ToARGBRow(src_rgb24, row, width); + ARGBToYMatrixRow(row, dst_yj, width, &kArgbJPEGConstants); + src_rgb24 += src_stride_rgb24; + dst_yj += dst_stride_yj; } free_aligned_buffer_64(row); } @@ -3984,8 +4575,89 @@ int RAWToJ400(const uint8_t* src_raw, int dst_stride_yj, int width, int height) { - void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, - int width) = RAWToARGBRow_C; + int y; + void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = + RAWToARGBRow_C; + void (*ARGBToYMatrixRow)(const uint8_t* src_argb, uint8_t* dst_y, int width, + const struct ArgbConstants* c) = ARGBToYMatrixRow_C; +#if defined(HAS_ARGBTOYMATRIXROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_AVX2; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_AVX512BW; + if (IS_ALIGNED(width, 64)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_AVX512BW; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_NEON; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_NEON_DOTPROD) + if (TestCpuFlag(kCpuHasNeonDotProd)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_NEON_DotProd; + if (IS_ALIGNED(width, 16)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_NEON_DotProd; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_LSX; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_LASX; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_RVV; + } +#endif + + + if (!src_raw || !dst_yj || width <= 0 || height == 0) { + return -1; + } + if (height < 0) { + height = -height; + src_raw = src_raw + (height - 1) * src_stride_raw; + src_stride_raw = -src_stride_raw; + } + // Coalesce rows. + if (src_stride_raw == width * 3 && dst_stride_yj == width) { + width *= height; + height = 1; + src_stride_raw = dst_stride_yj = 0; + } + #if defined(HAS_RAWTOARGBROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { RAWToARGBRow = RAWToARGBRow_Any_SSSE3; @@ -4044,9 +4716,23 @@ int RAWToJ400(const uint8_t* src_raw, RAWToARGBRow = RAWToARGBRow_RVV; } #endif - return RGBToI400Matrix(src_raw, src_stride_raw, - dst_yj, dst_stride_yj, width, height, - &kArgbJPEGConstants, RAWToARGBRow); + + { + // Allocate 1 row of ARGB. + const int row_size = (width * 4 + 31) & ~31; + align_buffer_64(row, row_size); + if (!row) + return 1; + + for (y = 0; y < height; ++y) { + RAWToARGBRow(src_raw, row, width); + ARGBToYMatrixRow(row, dst_yj, width, &kArgbJPEGConstants); + src_raw += src_stride_raw; + dst_yj += dst_stride_yj; + } + free_aligned_buffer_64(row); + } + return 0; } // Convert Android420 to I420. @@ -4204,82 +4890,7 @@ int J420ToI420(const uint8_t* src_y, 1, 220, 16, 225, 16); } -LIBYUV_API -int RGB24ToJ400(const uint8_t* src_rgb24, - int src_stride_rgb24, - uint8_t* dst_yj, - int dst_stride_yj, - int width, - int height) { - void (*RGB24ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, - int width) = RGB24ToARGBRow_C; -#if defined(HAS_RGB24TOARGBROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - RGB24ToARGBRow = RGB24ToARGBRow_SSSE3; - } - } -#endif -#if defined(HAS_RGB24TOARGBROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - RGB24ToARGBRow = RGB24ToARGBRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - RGB24ToARGBRow = RGB24ToARGBRow_AVX2; - } - } -#endif -#if defined(HAS_RGB24TOARGBROW_AVX512BW) - if (TestCpuFlag(kCpuHasAVX512BW)) { - RGB24ToARGBRow = RGB24ToARGBRow_Any_AVX512BW; - if (IS_ALIGNED(width, 64)) { - RGB24ToARGBRow = RGB24ToARGBRow_AVX512BW; - } - } -#endif -#if defined(HAS_RGB24TOARGBROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - RGB24ToARGBRow = RGB24ToARGBRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - RGB24ToARGBRow = RGB24ToARGBRow_NEON; - } - } -#endif -#if defined(HAS_RGB24TOARGBROW_SVE2) - if (TestCpuFlag(kCpuHasSVE2)) { - RGB24ToARGBRow = RGB24ToARGBRow_SVE2; - } -#endif -#if defined(HAS_RGB24TOARGBROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - RGB24ToARGBRow = RGB24ToARGBRow_Any_LSX; - if (IS_ALIGNED(width, 16)) { - RGB24ToARGBRow = RGB24ToARGBRow_LSX; - } - } -#endif -#if defined(HAS_RGB24TOARGBROW_LASX) - if (TestCpuFlag(kCpuHasLASX)) { - RGB24ToARGBRow = RGB24ToARGBRow_Any_LASX; - if (IS_ALIGNED(width, 32)) { - RGB24ToARGBRow = RGB24ToARGBRow_LASX; - } - } -#endif -#if defined(HAS_RGB24TOARGBROW_RVV) - if (TestCpuFlag(kCpuHasRVV)) { - RGB24ToARGBRow = RGB24ToARGBRow_RVV; - } -#endif - return RGBToI400Matrix(src_rgb24, src_stride_rgb24, - dst_yj, dst_stride_yj, width, height, - &kArgbJPEGConstants, RGB24ToARGBRow); -} - #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif - - - diff --git a/source/convert_from_argb.cc b/source/convert_from_argb.cc index a139c1d20..71a616550 100644 --- a/source/convert_from_argb.cc +++ b/source/convert_from_argb.cc @@ -24,17 +24,17 @@ extern "C" { LIBYUV_API int ARGBToI444(const uint8_t* src_argb, int src_stride_argb, - uint8_t* dst_y, - int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, + uint8_t* dst_y, + int dst_stride_y, int width, int height) { - return ARGBToI444Matrix(src_argb, src_stride_argb, dst_y, dst_stride_y, dst_u, - dst_stride_u, dst_v, dst_stride_v, - &kArgbI601Constants, width, height); + return ARGBToI444Matrix(src_argb, src_stride_argb, dst_u, dst_stride_u, dst_v, + dst_stride_v, dst_y, dst_stride_y, &kArgbI601Constants, + width, height); } LIBYUV_API @@ -186,8 +186,8 @@ int ARGBToI422(const uint8_t* src_argb, int width, int height) { return ARGBToI422Matrix(src_argb, src_stride_argb, dst_y, dst_stride_y, dst_u, - dst_stride_u, dst_v, dst_stride_v, - &kArgbI601Constants, width, height); + dst_stride_u, dst_v, dst_stride_v, &kArgbI601Constants, + width, height); } LIBYUV_API @@ -272,10 +272,40 @@ ARGBToUVMatrixRow_C; } #endif +#if defined(HAS_ARGBTOUVMATRIXROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_NEON_I8MM) + if (TestCpuFlag(kCpuHasNEON) && TestCpuFlag(kCpuHasNeonI8MM)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON_I8MM; + if (IS_ALIGNED(width, 16)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON_I8MM; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_SVE2) + if (TestCpuFlag(kCpuHasSVE2)) { + if (IS_ALIGNED(width, 2)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_SVE2; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_SME) + if (TestCpuFlag(kCpuHasSME)) { + if (IS_ALIGNED(width, 2)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_SME; + } + } +#endif #if defined(HAS_ARGBTOUVMATRIXROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { + if (IS_ALIGNED(width, 8)) { ARGBToUVMatrixRow = ARGBToUVMatrixRow_SSSE3; } } @@ -283,64 +313,10 @@ ARGBToUVMatrixRow_C; #if defined(HAS_ARGBTOUVMATRIXROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { + if (IS_ALIGNED(width, 16)) { ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX2; } } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_AVX512BW) - if (TestCpuFlag(kCpuHasAVX512BW)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX512BW; - if (IS_ALIGNED(width, 64)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX512BW; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_NEON_I8MM) - if (TestCpuFlag(kCpuHasNeonI8MM)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON_I8MM; - if (IS_ALIGNED(width, 16)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON_I8MM; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_SVE2) - if (TestCpuFlag(kCpuHasSVE2)) { - if (IS_ALIGNED(width, 2)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_SVE2; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_SME) - if (TestCpuFlag(kCpuHasSME)) { - if (IS_ALIGNED(width, 2)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_SME; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_LSX; - if (IS_ALIGNED(width, 16)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_LSX; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_LASX) - if (TestCpuFlag(kCpuHasLASX)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_LASX; - if (IS_ALIGNED(width, 32)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_LASX; - } - } #endif if (!src_argb || !dst_y || !dst_u || !dst_v || !argbconstants || width <= 0 || height == 0) { @@ -458,10 +434,40 @@ ARGBToUVMatrixRow_C; } #endif +#if defined(HAS_ARGBTOUVMATRIXROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_NEON_I8MM) + if (TestCpuFlag(kCpuHasNEON) && TestCpuFlag(kCpuHasNeonI8MM)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON_I8MM; + if (IS_ALIGNED(width, 16)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON_I8MM; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_SVE2) + if (TestCpuFlag(kCpuHasSVE2)) { + if (IS_ALIGNED(width, 2)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_SVE2; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_SME) + if (TestCpuFlag(kCpuHasSME)) { + if (IS_ALIGNED(width, 2)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_SME; + } + } +#endif #if defined(HAS_ARGBTOUVMATRIXROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { + if (IS_ALIGNED(width, 8)) { ARGBToUVMatrixRow = ARGBToUVMatrixRow_SSSE3; } } @@ -469,64 +475,10 @@ ARGBToUVMatrixRow_C; #if defined(HAS_ARGBTOUVMATRIXROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { + if (IS_ALIGNED(width, 16)) { ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX2; } } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_AVX512BW) - if (TestCpuFlag(kCpuHasAVX512BW)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX512BW; - if (IS_ALIGNED(width, 64)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX512BW; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_NEON_I8MM) - if (TestCpuFlag(kCpuHasNeonI8MM)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON_I8MM; - if (IS_ALIGNED(width, 16)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON_I8MM; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_SVE2) - if (TestCpuFlag(kCpuHasSVE2)) { - if (IS_ALIGNED(width, 2)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_SVE2; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_SME) - if (TestCpuFlag(kCpuHasSME)) { - if (IS_ALIGNED(width, 2)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_SME; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_LSX; - if (IS_ALIGNED(width, 16)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_LSX; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_LASX) - if (TestCpuFlag(kCpuHasLASX)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_LASX; - if (IS_ALIGNED(width, 32)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_LASX; - } - } #endif void (*MergeUVRow)(const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uv, int width) = MergeUVRow_C; @@ -617,27 +569,136 @@ ARGBToUVMatrixRow_C; return 0; } -// Same as NV12 but U and V swapped. -LIBYUV_API -int ARGBToNV21(const uint8_t* src_argb, - int src_stride_argb, - uint8_t* dst_y, - int dst_stride_y, - uint8_t* dst_vu, - int dst_stride_vu, - int width, - int height) { +int ARGBToNV21Matrix(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_vu, + int dst_stride_uv, + const struct ArgbConstants* argbconstants, + int width, + int height) { int y; int halfwidth = (width + 1) >> 1; + void (*ARGBToYMatrixRow)(const uint8_t* src_argb, uint8_t* dst_y, int width, + const struct ArgbConstants* c) = ARGBToYMatrixRow_C; void (*ARGBToUVMatrixRow)(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_u, uint8_t* dst_v, int width, const struct ArgbConstants* c) = - ARGBToUVMatrixRow_C; - void (*ARGBToYMatrixRow)(const uint8_t* src_argb, uint8_t* dst_y, int width, - const struct ArgbConstants* c) = ARGBToYMatrixRow_C; +ARGBToUVMatrixRow_C; + +#if defined(HAS_ARGBTOYMATRIXROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_AVX2; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_AVX512BW; + if (IS_ALIGNED(width, 64)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_AVX512BW; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_NEON; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_NEON_DOTPROD) + if (TestCpuFlag(kCpuHasNeonDotProd)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_NEON_DotProd; + if (IS_ALIGNED(width, 16)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_NEON_DotProd; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_LSX; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_LASX; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_RVV; + } +#endif + +#if defined(HAS_ARGBTOUVMATRIXROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_NEON_I8MM) + if (TestCpuFlag(kCpuHasNEON) && TestCpuFlag(kCpuHasNeonI8MM)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON_I8MM; + if (IS_ALIGNED(width, 16)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON_I8MM; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_SVE2) + if (TestCpuFlag(kCpuHasSVE2)) { + if (IS_ALIGNED(width, 2)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_SVE2; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_SME) + if (TestCpuFlag(kCpuHasSME)) { + if (IS_ALIGNED(width, 2)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_SME; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_SSSE3; + if (IS_ALIGNED(width, 8)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX2; + if (IS_ALIGNED(width, 16)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX2; + } + } +#endif void (*MergeUVRow)(const uint8_t* src_u, const uint8_t* src_v, - uint8_t* dst_vu, int width) = MergeUVRow_C; - if (!src_argb || !dst_y || !dst_vu || width <= 0 || height == 0) { + uint8_t* dst_vu, int width) = MergeUVRow_C; + if (!src_argb || !dst_y || !dst_vu || !argbconstants || width <= 0 || + height == 0) { return -1; } // Negative height means invert the image. @@ -646,14 +707,101 @@ int ARGBToNV21(const uint8_t* src_argb, src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } -#if defined(HAS_ARGBTOYMATRIXROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_SSSE3; +#if defined(HAS_MERGEUVROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + MergeUVRow = MergeUVRow_Any_SSE2; + if (IS_ALIGNED(halfwidth, 16)) { + MergeUVRow = MergeUVRow_SSE2; } } #endif +#if defined(HAS_MERGEUVROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + MergeUVRow = MergeUVRow_Any_AVX2; + if (IS_ALIGNED(halfwidth, 32)) { + MergeUVRow = MergeUVRow_AVX2; + } + } +#endif +#if defined(HAS_MERGEUVROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + MergeUVRow = MergeUVRow_Any_AVX512BW; + if (IS_ALIGNED(halfwidth, 32)) { + MergeUVRow = MergeUVRow_AVX512BW; + } + } +#endif +#if defined(HAS_MERGEUVROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + MergeUVRow = MergeUVRow_Any_NEON; + if (IS_ALIGNED(halfwidth, 16)) { + MergeUVRow = MergeUVRow_NEON; + } + } +#endif +#if defined(HAS_MERGEUVROW_SME) + if (TestCpuFlag(kCpuHasSME)) { + MergeUVRow = MergeUVRow_SME; + } +#endif +#if defined(HAS_MERGEUVROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + MergeUVRow = MergeUVRow_Any_LSX; + if (IS_ALIGNED(halfwidth, 16)) { + MergeUVRow = MergeUVRow_LSX; + } + } +#endif +#if defined(HAS_MERGEUVROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + MergeUVRow = MergeUVRow_RVV; + } +#endif + + // Allocate a rows of uv. + align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2); + uint8_t* row_v = row_u + ((halfwidth + 31) & ~31); + if (!row_u) + return 1; + + for (y = 0; y < height - 1; y += 2) { + ARGBToUVMatrixRow(src_argb, src_stride_argb, row_u, row_v, width, + argbconstants); + MergeUVRow(row_u, row_v, dst_vu, halfwidth); + ARGBToYMatrixRow(src_argb, dst_y, width, argbconstants); + ARGBToYMatrixRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width, + argbconstants); + src_argb += src_stride_argb * 2; + dst_y += dst_stride_y * 2; + dst_vu += dst_stride_uv; + } + if (height & 1) { + ARGBToUVMatrixRow(src_argb, 0, row_u, row_v, width, argbconstants); + MergeUVRow(row_u, row_v, dst_vu, halfwidth); + ARGBToYMatrixRow(src_argb, dst_y, width, argbconstants); + } + free_aligned_buffer_64(row_u); + return 0; +} +LIBYUV_API +int ARGBToI400Matrix(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_y, + int dst_stride_y, + const struct ArgbConstants* constants, + int width, + int height) { + int y; + void (*ARGBToYMatrixRow)(const uint8_t* src_argb, uint8_t* dst_y, int width, + const struct ArgbConstants* c) = ARGBToYMatrixRow_C; + if (!src_argb || !dst_y || !constants || width <= 0 || height == 0) { + return -1; + } + if (height < 0) { + height = -height; + src_argb = src_argb + (height - 1) * src_stride_argb; + src_stride_argb = -src_stride_argb; + } #if defined(HAS_ARGBTOYMATRIXROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGBToYMatrixRow = ARGBToYMatrixRow_Any_AVX2; @@ -686,648 +834,40 @@ int ARGBToNV21(const uint8_t* src_argb, } } #endif -#if defined(HAS_ARGBTOYMATRIXROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_LSX; - if (IS_ALIGNED(width, 16)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_LSX; - } - } -#endif -#if defined(HAS_ARGBTOYMATRIXROW_LASX) - if (TestCpuFlag(kCpuHasLASX)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_LASX; - if (IS_ALIGNED(width, 32)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_LASX; - } - } -#endif -#if defined(HAS_ARGBTOYMATRIXROW_RVV) - if (TestCpuFlag(kCpuHasRVV)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_RVV; - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_SSSE3; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX2; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_AVX512BW) - if (TestCpuFlag(kCpuHasAVX512BW)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX512BW; - if (IS_ALIGNED(width, 64)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX512BW; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_NEON_I8MM) - if (TestCpuFlag(kCpuHasNeonI8MM)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON_I8MM; - if (IS_ALIGNED(width, 16)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON_I8MM; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_SVE2) - if (TestCpuFlag(kCpuHasSVE2)) { - if (IS_ALIGNED(width, 2)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_SVE2; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_SME) - if (TestCpuFlag(kCpuHasSME)) { - if (IS_ALIGNED(width, 2)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_SME; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_LSX; - if (IS_ALIGNED(width, 16)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_LSX; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_LASX) - if (TestCpuFlag(kCpuHasLASX)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_LASX; - if (IS_ALIGNED(width, 32)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_LASX; - } - } -#endif -#if defined(HAS_MERGEUVROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2)) { - MergeUVRow = MergeUVRow_Any_SSE2; - if (IS_ALIGNED(halfwidth, 16)) { - MergeUVRow = MergeUVRow_SSE2; - } - } -#endif -#if defined(HAS_MERGEUVROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - MergeUVRow = MergeUVRow_Any_AVX2; - if (IS_ALIGNED(halfwidth, 32)) { - MergeUVRow = MergeUVRow_AVX2; - } - } -#endif -#if defined(HAS_MERGEUVROW_AVX512BW) - if (TestCpuFlag(kCpuHasAVX512BW)) { - MergeUVRow = MergeUVRow_Any_AVX512BW; - if (IS_ALIGNED(halfwidth, 64)) { - MergeUVRow = MergeUVRow_AVX512BW; - } - } -#endif -#if defined(HAS_MERGEUVROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - MergeUVRow = MergeUVRow_Any_NEON; - if (IS_ALIGNED(halfwidth, 16)) { - MergeUVRow = MergeUVRow_NEON; - } - } -#endif -#if defined(HAS_MERGEUVROW_SME) - if (TestCpuFlag(kCpuHasSME)) { - MergeUVRow = MergeUVRow_SME; - } -#endif -#if defined(HAS_MERGEUVROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - MergeUVRow = MergeUVRow_Any_LSX; - if (IS_ALIGNED(halfwidth, 16)) { - MergeUVRow = MergeUVRow_LSX; - } - } -#endif -#if defined(HAS_MERGEUVROW_RVV) - if (TestCpuFlag(kCpuHasRVV)) { - MergeUVRow = MergeUVRow_RVV; - } -#endif - { - // Allocate a rows of uv. - align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2); - uint8_t* row_v = row_u + ((halfwidth + 31) & ~31); - if (!row_u) - return 1; - for (y = 0; y < height - 1; y += 2) { - ARGBToUVMatrixRow(src_argb, src_stride_argb, row_u, row_v, width, &kArgbI601Constants); - MergeUVRow(row_v, row_u, dst_vu, halfwidth); - ARGBToYMatrixRow(src_argb, dst_y, width, &kArgbI601Constants); - ARGBToYMatrixRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width, &kArgbI601Constants); - src_argb += src_stride_argb * 2; - dst_y += dst_stride_y * 2; - dst_vu += dst_stride_vu; - } - if (height & 1) { - ARGBToUVMatrixRow(src_argb, 0, row_u, row_v, width, &kArgbI601Constants); - MergeUVRow(row_v, row_u, dst_vu, halfwidth); - ARGBToYMatrixRow(src_argb, dst_y, width, &kArgbI601Constants); - } - free_aligned_buffer_64(row_u); + for (y = 0; y < height; ++y) { + ARGBToYMatrixRow(src_argb, dst_y, width, constants); + src_argb += src_stride_argb; + dst_y += dst_stride_y; } return 0; } - LIBYUV_API -int ABGRToNV12(const uint8_t* src_abgr, - int src_stride_abgr, - uint8_t* dst_y, - int dst_stride_y, - uint8_t* dst_uv, - int dst_stride_uv, - int width, - int height) { - int y; - int halfwidth = (width + 1) >> 1; - void (*ABGRToUVRow)(const uint8_t* src_abgr0, int src_stride_abgr, - uint8_t* dst_u, uint8_t* dst_v, int width) = - ABGRToUVRow_C; - void (*ABGRToYRow)(const uint8_t* src_abgr, uint8_t* dst_y, int width) = - ABGRToYRow_C; - void (*MergeUVRow)(const uint8_t* src_u, const uint8_t* src_v, - uint8_t* dst_uv, int width) = MergeUVRow_C; - if (!src_abgr || !dst_y || !dst_uv || width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_abgr = src_abgr + (height - 1) * src_stride_abgr; - src_stride_abgr = -src_stride_abgr; - } -#if defined(HAS_ABGRTOYROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ABGRToYRow = ABGRToYRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ABGRToYRow = ABGRToYRow_SSSE3; - } - } -#endif -#if defined(HAS_ABGRTOUVROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ABGRToUVRow = ABGRToUVRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ABGRToUVRow = ABGRToUVRow_SSSE3; - } - } -#endif -#if defined(HAS_ABGRTOYROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - ABGRToYRow = ABGRToYRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ABGRToYRow = ABGRToYRow_AVX2; - } - } -#endif -#if defined(HAS_ARGBTOYROW_AVX512BW) - if (TestCpuFlag(kCpuHasAVX512BW)) { - ABGRToYRow = ABGRToYRow_Any_AVX512BW; - if (IS_ALIGNED(width, 64)) { - ABGRToYRow = ABGRToYRow_AVX512BW; - } - } -#endif -#if defined(HAS_ABGRTOUVROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - ABGRToUVRow = ABGRToUVRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ABGRToUVRow = ABGRToUVRow_AVX2; - } - } -#endif -#if defined(HAS_ABGRTOUVROW_AVX512BW) - if (TestCpuFlag(kCpuHasAVX512BW)) { - ABGRToUVRow = ABGRToUVRow_Any_AVX512BW; - if (IS_ALIGNED(width, 64)) { - ABGRToUVRow = ABGRToUVRow_AVX512BW; - } - } -#endif -#if defined(HAS_ABGRTOYROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ABGRToYRow = ABGRToYRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ABGRToYRow = ABGRToYRow_NEON; - } - } -#endif -#if defined(HAS_ABGRTOYROW_NEON_DOTPROD) - if (TestCpuFlag(kCpuHasNeonDotProd)) { - ABGRToYRow = ABGRToYRow_Any_NEON_DotProd; - if (IS_ALIGNED(width, 16)) { - ABGRToYRow = ABGRToYRow_NEON_DotProd; - } - } -#endif -#if defined(HAS_ABGRTOUVROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ABGRToUVRow = ABGRToUVRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ABGRToUVRow = ABGRToUVRow_NEON; - } - } -#endif -#if defined(HAS_ABGRTOUVROW_NEON_I8MM) - if (TestCpuFlag(kCpuHasNeonI8MM)) { - ABGRToUVRow = ABGRToUVRow_Any_NEON_I8MM; - if (IS_ALIGNED(width, 16)) { - ABGRToUVRow = ABGRToUVRow_NEON_I8MM; - } - } -#endif -#if defined(HAS_ABGRTOUVROW_SVE2) - if (TestCpuFlag(kCpuHasSVE2)) { - ABGRToUVRow = ABGRToUVRow_Any_SVE2; - if (IS_ALIGNED(width, 2)) { - ABGRToUVRow = ABGRToUVRow_SVE2; - } - } -#endif -#if defined(HAS_ABGRTOUVROW_SME) - if (TestCpuFlag(kCpuHasSME)) { - ABGRToUVRow = ABGRToUVRow_Any_SME; - if (IS_ALIGNED(width, 2)) { - ABGRToUVRow = ABGRToUVRow_SME; - } - } -#endif -#if defined(HAS_ABGRTOYROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - ABGRToYRow = ABGRToYRow_Any_LSX; - if (IS_ALIGNED(width, 16)) { - ABGRToYRow = ABGRToYRow_LSX; - } - } -#endif -#if defined(HAS_ABGRTOYROW_LASX) - if (TestCpuFlag(kCpuHasLASX)) { - ABGRToYRow = ABGRToYRow_Any_LASX; - if (IS_ALIGNED(width, 32)) { - ABGRToYRow = ABGRToYRow_LASX; - } - } -#endif -#if defined(HAS_ABGRTOYROW_RVV) - if (TestCpuFlag(kCpuHasRVV)) { - ABGRToYRow = ABGRToYRow_RVV; - } -#endif -#if defined(HAS_MERGEUVROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2)) { - MergeUVRow = MergeUVRow_Any_SSE2; - if (IS_ALIGNED(halfwidth, 16)) { - MergeUVRow = MergeUVRow_SSE2; - } - } -#endif -#if defined(HAS_MERGEUVROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - MergeUVRow = MergeUVRow_Any_AVX2; - if (IS_ALIGNED(halfwidth, 32)) { - MergeUVRow = MergeUVRow_AVX2; - } - } -#endif -#if defined(HAS_MERGEUVROW_AVX512BW) - if (TestCpuFlag(kCpuHasAVX512BW)) { - MergeUVRow = MergeUVRow_Any_AVX512BW; - if (IS_ALIGNED(halfwidth, 64)) { - MergeUVRow = MergeUVRow_AVX512BW; - } - } -#endif -#if defined(HAS_MERGEUVROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - MergeUVRow = MergeUVRow_Any_NEON; - if (IS_ALIGNED(halfwidth, 16)) { - MergeUVRow = MergeUVRow_NEON; - } - } -#endif -#if defined(HAS_MERGEUVROW_SME) - if (TestCpuFlag(kCpuHasSME)) { - MergeUVRow = MergeUVRow_SME; - } -#endif -#if defined(HAS_MERGEUVROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - MergeUVRow = MergeUVRow_Any_LSX; - if (IS_ALIGNED(halfwidth, 16)) { - MergeUVRow = MergeUVRow_LSX; - } - } -#endif -#if defined(HAS_MERGEUVROW_RVV) - if (TestCpuFlag(kCpuHasRVV)) { - MergeUVRow = MergeUVRow_RVV; - } -#endif - { - // Allocate a rows of uv. - align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2); - uint8_t* row_v = row_u + ((halfwidth + 31) & ~31); - if (!row_u) - return 1; - - for (y = 0; y < height - 1; y += 2) { - ABGRToUVRow(src_abgr, src_stride_abgr, row_u, row_v, width); - MergeUVRow(row_u, row_v, dst_uv, halfwidth); - ABGRToYRow(src_abgr, dst_y, width); - ABGRToYRow(src_abgr + src_stride_abgr, dst_y + dst_stride_y, width); - src_abgr += src_stride_abgr * 2; - dst_y += dst_stride_y * 2; - dst_uv += dst_stride_uv; - } - if (height & 1) { - ABGRToUVRow(src_abgr, 0, row_u, row_v, width); - MergeUVRow(row_u, row_v, dst_uv, halfwidth); - ABGRToYRow(src_abgr, dst_y, width); - } - free_aligned_buffer_64(row_u); - } - return 0; -} - -// Same as NV12 but U and V swapped. -LIBYUV_API -int ABGRToNV21(const uint8_t* src_abgr, - int src_stride_abgr, - uint8_t* dst_y, - int dst_stride_y, - uint8_t* dst_vu, - int dst_stride_vu, - int width, - int height) { - int y; - int halfwidth = (width + 1) >> 1; - void (*ABGRToUVRow)(const uint8_t* src_abgr0, int src_stride_abgr, - uint8_t* dst_u, uint8_t* dst_v, int width) = - ABGRToUVRow_C; - void (*ABGRToYRow)(const uint8_t* src_abgr, uint8_t* dst_y, int width) = - ABGRToYRow_C; - void (*MergeUVRow)(const uint8_t* src_u, const uint8_t* src_v, - uint8_t* dst_vu, int width) = MergeUVRow_C; - if (!src_abgr || !dst_y || !dst_vu || width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_abgr = src_abgr + (height - 1) * src_stride_abgr; - src_stride_abgr = -src_stride_abgr; - } -#if defined(HAS_ABGRTOYROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ABGRToYRow = ABGRToYRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ABGRToYRow = ABGRToYRow_SSSE3; - } - } -#endif -#if defined(HAS_ABGRTOUVROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ABGRToUVRow = ABGRToUVRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ABGRToUVRow = ABGRToUVRow_SSSE3; - } - } -#endif -#if defined(HAS_ABGRTOYROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - ABGRToYRow = ABGRToYRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ABGRToYRow = ABGRToYRow_AVX2; - } - } -#endif -#if defined(HAS_ABGRTOUVROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - ABGRToUVRow = ABGRToUVRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ABGRToUVRow = ABGRToUVRow_AVX2; - } - } -#endif -#if defined(HAS_ABGRTOUVROW_AVX512BW) - if (TestCpuFlag(kCpuHasAVX512BW)) { - ABGRToUVRow = ABGRToUVRow_Any_AVX512BW; - if (IS_ALIGNED(width, 64)) { - ABGRToUVRow = ABGRToUVRow_AVX512BW; - } - } -#endif -#if defined(HAS_ABGRTOYROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ABGRToYRow = ABGRToYRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ABGRToYRow = ABGRToYRow_NEON; - } - } -#endif -#if defined(HAS_ABGRTOYROW_NEON_DOTPROD) - if (TestCpuFlag(kCpuHasNeonDotProd)) { - ABGRToYRow = ABGRToYRow_Any_NEON_DotProd; - if (IS_ALIGNED(width, 16)) { - ABGRToYRow = ABGRToYRow_NEON_DotProd; - } - } -#endif -#if defined(HAS_ABGRTOUVROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ABGRToUVRow = ABGRToUVRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ABGRToUVRow = ABGRToUVRow_NEON; - } - } -#endif -#if defined(HAS_ABGRTOUVROW_NEON_I8MM) - if (TestCpuFlag(kCpuHasNeonI8MM)) { - ABGRToUVRow = ABGRToUVRow_Any_NEON_I8MM; - if (IS_ALIGNED(width, 16)) { - ABGRToUVRow = ABGRToUVRow_NEON_I8MM; - } - } -#endif -#if defined(HAS_ABGRTOUVROW_SVE2) - if (TestCpuFlag(kCpuHasSVE2)) { - ABGRToUVRow = ABGRToUVRow_Any_SVE2; - if (IS_ALIGNED(width, 2)) { - ABGRToUVRow = ABGRToUVRow_SVE2; - } - } -#endif -#if defined(HAS_ABGRTOUVROW_SME) - if (TestCpuFlag(kCpuHasSME)) { - ABGRToUVRow = ABGRToUVRow_Any_SME; - if (IS_ALIGNED(width, 2)) { - ABGRToUVRow = ABGRToUVRow_SME; - } - } -#endif -#if defined(HAS_ABGRTOYROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - ABGRToYRow = ABGRToYRow_Any_LSX; - if (IS_ALIGNED(width, 16)) { - ABGRToYRow = ABGRToYRow_LSX; - } - } -#endif -#if defined(HAS_ABGRTOYROW_LASX) - if (TestCpuFlag(kCpuHasLASX)) { - ABGRToYRow = ABGRToYRow_Any_LASX; - if (IS_ALIGNED(width, 32)) { - ABGRToYRow = ABGRToYRow_LASX; - } - } -#endif -#if defined(HAS_ABGRTOYROW_RVV) - if (TestCpuFlag(kCpuHasRVV)) { - ABGRToYRow = ABGRToYRow_RVV; - } -#endif -#if defined(HAS_MERGEUVROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2)) { - MergeUVRow = MergeUVRow_Any_SSE2; - if (IS_ALIGNED(halfwidth, 16)) { - MergeUVRow = MergeUVRow_SSE2; - } - } -#endif -#if defined(HAS_MERGEUVROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - MergeUVRow = MergeUVRow_Any_AVX2; - if (IS_ALIGNED(halfwidth, 32)) { - MergeUVRow = MergeUVRow_AVX2; - } - } -#endif -#if defined(HAS_MERGEUVROW_AVX512BW) - if (TestCpuFlag(kCpuHasAVX512BW)) { - MergeUVRow = MergeUVRow_Any_AVX512BW; - if (IS_ALIGNED(halfwidth, 64)) { - MergeUVRow = MergeUVRow_AVX512BW; - } - } -#endif -#if defined(HAS_MERGEUVROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - MergeUVRow = MergeUVRow_Any_NEON; - if (IS_ALIGNED(halfwidth, 16)) { - MergeUVRow = MergeUVRow_NEON; - } - } -#endif -#if defined(HAS_MERGEUVROW_SME) - if (TestCpuFlag(kCpuHasSME)) { - MergeUVRow = MergeUVRow_SME; - } -#endif -#if defined(HAS_MERGEUVROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - MergeUVRow = MergeUVRow_Any_LSX; - if (IS_ALIGNED(halfwidth, 16)) { - MergeUVRow = MergeUVRow_LSX; - } - } -#endif -#if defined(HAS_MERGEUVROW_RVV) - if (TestCpuFlag(kCpuHasRVV)) { - MergeUVRow = MergeUVRow_RVV; - } -#endif - { - // Allocate a rows of uv. - align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2); - uint8_t* row_v = row_u + ((halfwidth + 31) & ~31); - if (!row_u) - return 1; - - for (y = 0; y < height - 1; y += 2) { - ABGRToUVRow(src_abgr, src_stride_abgr, row_u, row_v, width); - MergeUVRow(row_v, row_u, dst_vu, halfwidth); - ABGRToYRow(src_abgr, dst_y, width); - ABGRToYRow(src_abgr + src_stride_abgr, dst_y + dst_stride_y, width); - src_abgr += src_stride_abgr * 2; - dst_y += dst_stride_y * 2; - dst_vu += dst_stride_vu; - } - if (height & 1) { - ABGRToUVRow(src_abgr, 0, row_u, row_v, width); - MergeUVRow(row_v, row_u, dst_vu, halfwidth); - ABGRToYRow(src_abgr, dst_y, width); - } - free_aligned_buffer_64(row_u); - } - return 0; -} - -// Convert ARGB to YUY2. -LIBYUV_API -int ARGBToYUY2(const uint8_t* src_argb, - int src_stride_argb, - uint8_t* dst_yuy2, - int dst_stride_yuy2, - int width, - int height) { +int ARGBToYUY2Matrix(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_yuy2, + int dst_stride_yuy2, + const struct ArgbConstants* constants, + int width, + int height) { int y; void (*ARGBToUVMatrixRow)(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_u, uint8_t* dst_v, int width, - const struct ArgbConstants* c) = - ARGBToUVMatrixRow_C; + const struct ArgbConstants* c) = ARGBToUVMatrixRow_C; void (*ARGBToYMatrixRow)(const uint8_t* src_argb, uint8_t* dst_y, int width, const struct ArgbConstants* c) = ARGBToYMatrixRow_C; void (*I422ToYUY2Row)(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_yuy2, int width) = I422ToYUY2Row_C; - if (!src_argb || !dst_yuy2 || width <= 0 || height == 0) { + if (!src_argb || !dst_yuy2 || !constants || width <= 0 || height == 0) { return -1; } - // Negative height means invert the image. if (height < 0) { height = -height; dst_yuy2 = dst_yuy2 + (height - 1) * dst_stride_yuy2; dst_stride_yuy2 = -dst_stride_yuy2; } - // Coalesce rows. - if (src_stride_argb == width * 4 && dst_stride_yuy2 == width * 2) { - width *= height; - height = 1; - src_stride_argb = dst_stride_yuy2 = 0; - } -#if defined(HAS_ARGBTOYMATRIXROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_SSSE3; - } - } -#endif #if defined(HAS_ARGBTOYMATRIXROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGBToYMatrixRow = ARGBToYMatrixRow_Any_AVX2; @@ -1352,43 +892,6 @@ int ARGBToYUY2(const uint8_t* src_argb, } } #endif -#if defined(HAS_ARGBTOYMATRIXROW_NEON_DOTPROD) - if (TestCpuFlag(kCpuHasNeonDotProd)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_NEON_DotProd; - if (IS_ALIGNED(width, 16)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_NEON_DotProd; - } - } -#endif -#if defined(HAS_ARGBTOYMATRIXROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_LSX; - if (IS_ALIGNED(width, 16)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_LSX; - } - } -#endif -#if defined(HAS_ARGBTOYMATRIXROW_LASX) - if (TestCpuFlag(kCpuHasLASX)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_LASX; - if (IS_ALIGNED(width, 32)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_LASX; - } - } -#endif -#if defined(HAS_ARGBTOYMATRIXROW_RVV) - if (TestCpuFlag(kCpuHasRVV)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_RVV; - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_SSSE3; - } - } -#endif #if defined(HAS_ARGBTOUVMATRIXROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX2; @@ -1413,52 +916,6 @@ int ARGBToYUY2(const uint8_t* src_argb, } } #endif -#if defined(HAS_ARGBTOUVMATRIXROW_NEON_I8MM) - if (TestCpuFlag(kCpuHasNeonI8MM)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON_I8MM; - if (IS_ALIGNED(width, 16)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON_I8MM; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_SVE2) - if (TestCpuFlag(kCpuHasSVE2)) { - if (IS_ALIGNED(width, 2)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_SVE2; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_SME) - if (TestCpuFlag(kCpuHasSME)) { - if (IS_ALIGNED(width, 2)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_SME; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_LSX; - if (IS_ALIGNED(width, 16)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_LSX; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_LASX) - if (TestCpuFlag(kCpuHasLASX)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_LASX; - if (IS_ALIGNED(width, 32)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_LASX; - } - } -#endif -#if defined(HAS_I422TOYUY2ROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2)) { - I422ToYUY2Row = I422ToYUY2Row_Any_SSE2; - if (IS_ALIGNED(width, 16)) { - I422ToYUY2Row = I422ToYUY2Row_SSE2; - } - } -#endif #if defined(HAS_I422TOYUY2ROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { I422ToYUY2Row = I422ToYUY2Row_Any_AVX2; @@ -1475,25 +932,8 @@ int ARGBToYUY2(const uint8_t* src_argb, } } #endif -#if defined(HAS_I422TOYUY2ROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - I422ToYUY2Row = I422ToYUY2Row_Any_LSX; - if (IS_ALIGNED(width, 16)) { - I422ToYUY2Row = I422ToYUY2Row_LSX; - } - } -#endif -#if defined(HAS_I422TOYUY2ROW_LASX) - if (TestCpuFlag(kCpuHasLASX)) { - I422ToYUY2Row = I422ToYUY2Row_Any_LASX; - if (IS_ALIGNED(width, 32)) { - I422ToYUY2Row = I422ToYUY2Row_LASX; - } - } -#endif { - // Allocate a rows of yuv. align_buffer_64(row_y, ((width + 63) & ~63) * 2); uint8_t* row_u = row_y + ((width + 63) & ~63); uint8_t* row_v = row_u + ((width + 63) & ~63) / 2; @@ -1501,8 +941,8 @@ int ARGBToYUY2(const uint8_t* src_argb, return 1; for (y = 0; y < height; ++y) { - ARGBToUVMatrixRow(src_argb, 0, row_u, row_v, width, &kArgbI601Constants); - ARGBToYMatrixRow(src_argb, row_y, width, &kArgbI601Constants); + ARGBToUVMatrixRow(src_argb, 0, row_u, row_v, width, constants); + ARGBToYMatrixRow(src_argb, row_y, width, constants); I422ToYUY2Row(row_y, row_u, row_v, dst_yuy2, width); src_argb += src_stride_argb; dst_yuy2 += dst_stride_yuy2; @@ -1513,48 +953,32 @@ int ARGBToYUY2(const uint8_t* src_argb, return 0; } -// Convert ARGB to UYVY. LIBYUV_API -int ARGBToUYVY(const uint8_t* src_argb, - int src_stride_argb, - uint8_t* dst_uyvy, - int dst_stride_uyvy, - int width, - int height) { +int ARGBToUYVYMatrix(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_uyvy, + int dst_stride_uyvy, + const struct ArgbConstants* constants, + int width, + int height) { int y; void (*ARGBToUVMatrixRow)(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_u, uint8_t* dst_v, int width, - const struct ArgbConstants* c) = - ARGBToUVMatrixRow_C; + const struct ArgbConstants* c) = ARGBToUVMatrixRow_C; void (*ARGBToYMatrixRow)(const uint8_t* src_argb, uint8_t* dst_y, int width, const struct ArgbConstants* c) = ARGBToYMatrixRow_C; void (*I422ToUYVYRow)(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uyvy, int width) = I422ToUYVYRow_C; - if (!src_argb || !dst_uyvy || width <= 0 || height == 0) { + if (!src_argb || !dst_uyvy || !constants || width <= 0 || height == 0) { return -1; } - // Negative height means invert the image. if (height < 0) { height = -height; dst_uyvy = dst_uyvy + (height - 1) * dst_stride_uyvy; dst_stride_uyvy = -dst_stride_uyvy; } - // Coalesce rows. - if (src_stride_argb == width * 4 && dst_stride_uyvy == width * 2) { - width *= height; - height = 1; - src_stride_argb = dst_stride_uyvy = 0; - } -#if defined(HAS_ARGBTOYMATRIXROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_SSSE3; - } - } -#endif #if defined(HAS_ARGBTOYMATRIXROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGBToYMatrixRow = ARGBToYMatrixRow_Any_AVX2; @@ -1579,43 +1003,6 @@ int ARGBToUYVY(const uint8_t* src_argb, } } #endif -#if defined(HAS_ARGBTOYMATRIXROW_NEON_DOTPROD) - if (TestCpuFlag(kCpuHasNeonDotProd)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_NEON_DotProd; - if (IS_ALIGNED(width, 16)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_NEON_DotProd; - } - } -#endif -#if defined(HAS_ARGBTOYMATRIXROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_LSX; - if (IS_ALIGNED(width, 16)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_LSX; - } - } -#endif -#if defined(HAS_ARGBTOYMATRIXROW_LASX) - if (TestCpuFlag(kCpuHasLASX)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_LASX; - if (IS_ALIGNED(width, 32)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_LASX; - } - } -#endif -#if defined(HAS_ARGBTOYMATRIXROW_RVV) - if (TestCpuFlag(kCpuHasRVV)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_RVV; - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_SSSE3; - } - } -#endif #if defined(HAS_ARGBTOUVMATRIXROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX2; @@ -1640,52 +1027,6 @@ int ARGBToUYVY(const uint8_t* src_argb, } } #endif -#if defined(HAS_ARGBTOUVMATRIXROW_NEON_I8MM) - if (TestCpuFlag(kCpuHasNeonI8MM)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON_I8MM; - if (IS_ALIGNED(width, 16)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON_I8MM; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_SVE2) - if (TestCpuFlag(kCpuHasSVE2)) { - if (IS_ALIGNED(width, 2)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_SVE2; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_SME) - if (TestCpuFlag(kCpuHasSME)) { - if (IS_ALIGNED(width, 2)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_SME; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_LSX; - if (IS_ALIGNED(width, 16)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_LSX; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_LASX) - if (TestCpuFlag(kCpuHasLASX)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_LASX; - if (IS_ALIGNED(width, 32)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_LASX; - } - } -#endif -#if defined(HAS_I422TOUYVYROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2)) { - I422ToUYVYRow = I422ToUYVYRow_Any_SSE2; - if (IS_ALIGNED(width, 16)) { - I422ToUYVYRow = I422ToUYVYRow_SSE2; - } - } -#endif #if defined(HAS_I422TOUYVYROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { I422ToUYVYRow = I422ToUYVYRow_Any_AVX2; @@ -1702,25 +1043,8 @@ int ARGBToUYVY(const uint8_t* src_argb, } } #endif -#if defined(HAS_I422TOUYVYROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - I422ToUYVYRow = I422ToUYVYRow_Any_LSX; - if (IS_ALIGNED(width, 16)) { - I422ToUYVYRow = I422ToUYVYRow_LSX; - } - } -#endif -#if defined(HAS_I422TOUYVYROW_LASX) - if (TestCpuFlag(kCpuHasLASX)) { - I422ToUYVYRow = I422ToUYVYRow_Any_LASX; - if (IS_ALIGNED(width, 32)) { - I422ToUYVYRow = I422ToUYVYRow_LASX; - } - } -#endif { - // Allocate a rows of yuv. align_buffer_64(row_y, ((width + 63) & ~63) * 2); uint8_t* row_u = row_y + ((width + 63) & ~63); uint8_t* row_v = row_u + ((width + 63) & ~63) / 2; @@ -1728,8 +1052,8 @@ int ARGBToUYVY(const uint8_t* src_argb, return 1; for (y = 0; y < height; ++y) { - ARGBToUVMatrixRow(src_argb, 0, row_u, row_v, width, &kArgbI601Constants); - ARGBToYMatrixRow(src_argb, row_y, width, &kArgbI601Constants); + ARGBToUVMatrixRow(src_argb, 0, row_u, row_v, width, constants); + ARGBToYMatrixRow(src_argb, row_y, width, constants); I422ToUYVYRow(row_y, row_u, row_v, dst_uyvy, width); src_argb += src_stride_argb; dst_uyvy += dst_stride_uyvy; @@ -1740,16 +1064,74 @@ int ARGBToUYVY(const uint8_t* src_argb, return 0; } -// Convert ARGB to I400. -LIBYUV_API -int ARGBToI400Matrix(const uint8_t* src_argb, - int src_stride_argb, - uint8_t* dst_y, - int dst_stride_y, - const struct ArgbConstants* argbconstants, - int width, - int height); + +// Same as NV12 but U and V swapped. +LIBYUV_API +int ARGBToNV21(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_vu, + int dst_stride_vu, + int width, + int height) { + return ARGBToNV21Matrix(src_argb, src_stride_argb, dst_y, dst_stride_y, dst_vu, + dst_stride_vu, &kArgbI601Constants, width, height); +} + +LIBYUV_API +int ABGRToNV12(const uint8_t* src_abgr, + int src_stride_abgr, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_uv, + int dst_stride_uv, + int width, + int height) { + return ARGBToNV12Matrix(src_abgr, src_stride_abgr, dst_y, dst_stride_y, dst_uv, + dst_stride_uv, &kAbgrI601Constants, width, height); +} + +// Same as NV12 but U and V swapped. +LIBYUV_API +int ABGRToNV21(const uint8_t* src_abgr, + int src_stride_abgr, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_vu, + int dst_stride_vu, + int width, + int height) { + return ARGBToNV21Matrix(src_abgr, src_stride_abgr, dst_y, dst_stride_y, dst_vu, + dst_stride_vu, &kAbgrI601Constants, width, height); +} + +// Convert ARGB to YUY2. +LIBYUV_API +int ARGBToYUY2(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_yuy2, + int dst_stride_yuy2, + int width, + int height) { + return ARGBToYUY2Matrix(src_argb, src_stride_argb, dst_yuy2, dst_stride_yuy2, + &kArgbI601Constants, width, height); +} + +// Convert ARGB to UYVY. +LIBYUV_API +int ARGBToUYVY(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_uyvy, + int dst_stride_uyvy, + int width, + int height) { + return ARGBToUYVYMatrix(src_argb, src_stride_argb, dst_uyvy, dst_stride_uyvy, + &kArgbI601Constants, width, height); +} + +// Convert ARGB to I400. LIBYUV_API int ARGBToI400(const uint8_t* src_argb, int src_stride_argb, @@ -1760,100 +1142,6 @@ int ARGBToI400(const uint8_t* src_argb, return ARGBToI400Matrix(src_argb, src_stride_argb, dst_y, dst_stride_y, &kArgbI601Constants, width, height); } -LIBYUV_API -int ARGBToI400Matrix(const uint8_t* src_argb, - int src_stride_argb, - uint8_t* dst_y, - int dst_stride_y, - const struct ArgbConstants* argbconstants, - int width, - int height) { - int y; - void (*ARGBToYMatrixRow)(const uint8_t* src_argb, uint8_t* dst_y, int width, - const struct ArgbConstants* c) = ARGBToYMatrixRow_C; - if (!src_argb || !dst_y || width <= 0 || height == 0) { - return -1; - } - if (height < 0) { - height = -height; - src_argb = src_argb + (height - 1) * src_stride_argb; - src_stride_argb = -src_stride_argb; - } - // Coalesce rows. - if (src_stride_argb == width * 4 && dst_stride_y == width) { - width *= height; - height = 1; - src_stride_argb = dst_stride_y = 0; - } -#if defined(HAS_ARGBTOYMATRIXROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_SSSE3; - } - } -#endif -#if defined(HAS_ARGBTOYMATRIXROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_AVX2; - } - } -#endif -#if defined(HAS_ARGBTOYMATRIXROW_AVX512BW) - if (TestCpuFlag(kCpuHasAVX512BW)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_AVX512BW; - if (IS_ALIGNED(width, 64)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_AVX512BW; - } - } -#endif -#if defined(HAS_ARGBTOYMATRIXROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_NEON; - } - } -#endif -#if defined(HAS_ARGBTOYMATRIXROW_NEON_DOTPROD) - if (TestCpuFlag(kCpuHasNeonDotProd)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_NEON_DotProd; - if (IS_ALIGNED(width, 16)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_NEON_DotProd; - } - } -#endif -#if defined(HAS_ARGBTOYMATRIXROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_LSX; - if (IS_ALIGNED(width, 16)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_LSX; - } - } -#endif -#if defined(HAS_ARGBTOYMATRIXROW_LASX) - if (TestCpuFlag(kCpuHasLASX)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_LASX; - if (IS_ALIGNED(width, 32)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_LASX; - } - } -#endif -#if defined(HAS_ARGBTOYMATRIXROW_RVV) - if (TestCpuFlag(kCpuHasRVV)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_RVV; - } -#endif - - for (y = 0; y < height; ++y) { - ARGBToYMatrixRow(src_argb, dst_y, width, argbconstants); - src_argb += src_stride_argb; - dst_y += dst_stride_y; - } - return 0; -} #ifndef __riscv // Shuffle table for converting ARGB to RGBA. @@ -2517,10 +1805,68 @@ int ARGBToAR30(const uint8_t* src_argb, return 0; } +// ARGB little endian (bgra in memory) to J444 +LIBYUV_API +int ARGBToJ444(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + uint8_t* dst_y, + int dst_stride_y, + int width, + int height) { + return ARGBToI444Matrix(src_argb, src_stride_argb, dst_u, dst_stride_u, dst_v, + dst_stride_v, dst_y, dst_stride_y, &kArgbJPEGConstants, + width, height); +} +// Convert ARGB to J420. (JPeg full range I420). +LIBYUV_API +int ARGBToJ420(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { + return ARGBToI420Matrix(src_argb, src_stride_argb, dst_y, dst_stride_y, dst_u, + dst_stride_u, dst_v, dst_stride_v, &kArgbJPEGConstants, + width, height); +} // Convert ARGB to J422. (JPeg full range I422). +LIBYUV_API +int ARGBToJ422(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { + return ARGBToI422Matrix(src_argb, src_stride_argb, dst_y, dst_stride_y, dst_u, + dst_stride_u, dst_v, dst_stride_v, &kArgbJPEGConstants, + width, height); +} +// Convert ARGB to J400. +LIBYUV_API +int ARGBToJ400(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_y, + int dst_stride_y, + int width, + int height) { + return ARGBToI400Matrix(src_argb, src_stride_argb, dst_y, dst_stride_y, + &kArgbJPEGConstants, width, height); +} // Convert RGBA to J400. LIBYUV_API @@ -2621,316 +1967,34 @@ int RGBAToJ400(const uint8_t* src_rgba, LIBYUV_API int ABGRToJ420(const uint8_t* src_abgr, int src_stride_abgr, - uint8_t* dst_yj, - int dst_stride_yj, - uint8_t* dst_uj, - int dst_stride_uj, - uint8_t* dst_vj, - int dst_stride_vj, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, int width, int height) { - int y; - void (*ABGRToUVJRow)(const uint8_t* src_abgr0, int src_stride_abgr, - uint8_t* dst_uj, uint8_t* dst_vj, int width) = - ABGRToUVJRow_C; - void (*ABGRToYJRow)(const uint8_t* src_abgr, uint8_t* dst_yj, int width) = - ABGRToYJRow_C; - if (!src_abgr || !dst_yj || !dst_uj || !dst_vj || width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_abgr = src_abgr + (height - 1) * src_stride_abgr; - src_stride_abgr = -src_stride_abgr; - } -#if defined(HAS_ABGRTOYJROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ABGRToYJRow = ABGRToYJRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ABGRToYJRow = ABGRToYJRow_SSSE3; - } - } -#endif -#if defined(HAS_ABGRTOUVJROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ABGRToUVJRow = ABGRToUVJRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ABGRToUVJRow = ABGRToUVJRow_SSSE3; - } - } -#endif -#if defined(HAS_ABGRTOYJROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - ABGRToYJRow = ABGRToYJRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ABGRToYJRow = ABGRToYJRow_AVX2; - } - } -#endif -#if defined(HAS_ARGBTOYROW_AVX512BW) - if (TestCpuFlag(kCpuHasAVX512BW)) { - ABGRToYJRow = ABGRToYJRow_Any_AVX512BW; - if (IS_ALIGNED(width, 64)) { - ABGRToYJRow = ABGRToYJRow_AVX512BW; - } - } -#endif -#if defined(HAS_ABGRTOUVJROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - ABGRToUVJRow = ABGRToUVJRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ABGRToUVJRow = ABGRToUVJRow_AVX2; - } - } -#endif -#if defined(HAS_ABGRTOUVJROW_AVX512BW) - if (TestCpuFlag(kCpuHasAVX512BW)) { - ABGRToUVJRow = ABGRToUVJRow_Any_AVX512BW; - if (IS_ALIGNED(width, 64)) { - ABGRToUVJRow = ABGRToUVJRow_AVX512BW; - } - } -#endif -#if defined(HAS_ABGRTOYJROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ABGRToYJRow = ABGRToYJRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ABGRToYJRow = ABGRToYJRow_NEON; - } - } -#endif -#if defined(HAS_ABGRTOYJROW_NEON_DOTPROD) - if (TestCpuFlag(kCpuHasNeonDotProd)) { - ABGRToYJRow = ABGRToYJRow_Any_NEON_DotProd; - if (IS_ALIGNED(width, 16)) { - ABGRToYJRow = ABGRToYJRow_NEON_DotProd; - } - } -#endif -#if defined(HAS_ABGRTOUVJROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ABGRToUVJRow = ABGRToUVJRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ABGRToUVJRow = ABGRToUVJRow_NEON; - } - } -#endif -#if defined(HAS_ABGRTOUVJROW_NEON_I8MM) - if (TestCpuFlag(kCpuHasNeonI8MM)) { - ABGRToUVJRow = ABGRToUVJRow_Any_NEON_I8MM; - if (IS_ALIGNED(width, 16)) { - ABGRToUVJRow = ABGRToUVJRow_NEON_I8MM; - } - } -#endif -#if defined(HAS_ABGRTOUVJROW_SVE2) - if (TestCpuFlag(kCpuHasSVE2)) { - ABGRToUVJRow = ABGRToUVJRow_Any_SVE2; - if (IS_ALIGNED(width, 2)) { - ABGRToUVJRow = ABGRToUVJRow_SVE2; - } - } -#endif -#if defined(HAS_ABGRTOUVJROW_SME) - if (TestCpuFlag(kCpuHasSME)) { - ABGRToUVJRow = ABGRToUVJRow_Any_SME; - if (IS_ALIGNED(width, 2)) { - ABGRToUVJRow = ABGRToUVJRow_SME; - } - } -#endif -#if defined(HAS_ABGRTOYJROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - ABGRToYJRow = ABGRToYJRow_Any_LSX; - if (IS_ALIGNED(width, 16)) { - ABGRToYJRow = ABGRToYJRow_LSX; - } - } -#endif -#if defined(HAS_ABGRTOYJROW_LASX) - if (TestCpuFlag(kCpuHasLASX)) { - ABGRToYJRow = ABGRToYJRow_Any_LASX; - if (IS_ALIGNED(width, 32)) { - ABGRToYJRow = ABGRToYJRow_LASX; - } - } -#endif -#if defined(HAS_ABGRTOYJROW_RVV) - if (TestCpuFlag(kCpuHasRVV)) { - ABGRToYJRow = ABGRToYJRow_RVV; - } -#endif - - for (y = 0; y < height - 1; y += 2) { - ABGRToUVJRow(src_abgr, src_stride_abgr, dst_uj, dst_vj, width); - ABGRToYJRow(src_abgr, dst_yj, width); - ABGRToYJRow(src_abgr + src_stride_abgr, dst_yj + dst_stride_yj, width); - src_abgr += src_stride_abgr * 2; - dst_yj += dst_stride_yj * 2; - dst_uj += dst_stride_uj; - dst_vj += dst_stride_vj; - } - if (height & 1) { - ABGRToUVJRow(src_abgr, 0, dst_uj, dst_vj, width); - ABGRToYJRow(src_abgr, dst_yj, width); - } - return 0; + return ARGBToI420Matrix(src_abgr, src_stride_abgr, dst_y, dst_stride_y, dst_u, + dst_stride_u, dst_v, dst_stride_v, &kAbgrJPEGConstants, + width, height); } // Convert ABGR to J422. (JPeg full range I422). LIBYUV_API int ABGRToJ422(const uint8_t* src_abgr, int src_stride_abgr, - uint8_t* dst_yj, - int dst_stride_yj, - uint8_t* dst_uj, - int dst_stride_uj, - uint8_t* dst_vj, - int dst_stride_vj, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, int width, int height) { - int y; - void (*ABGRToUVJRow)(const uint8_t* src_abgr0, int src_stride_abgr, - uint8_t* dst_uj, uint8_t* dst_vj, int width) = - ABGRToUVJRow_C; - void (*ABGRToYJRow)(const uint8_t* src_abgr, uint8_t* dst_yj, int width) = - ABGRToYJRow_C; - if (!src_abgr || !dst_yj || !dst_uj || !dst_vj || width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_abgr = src_abgr + (height - 1) * src_stride_abgr; - src_stride_abgr = -src_stride_abgr; - } - // Coalesce rows. - if (src_stride_abgr == width * 4 && dst_stride_yj == width && - dst_stride_uj * 2 == width && dst_stride_vj * 2 == width) { - width *= height; - height = 1; - src_stride_abgr = dst_stride_yj = dst_stride_uj = dst_stride_vj = 0; - } -#if defined(HAS_ABGRTOYJROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ABGRToYJRow = ABGRToYJRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ABGRToYJRow = ABGRToYJRow_SSSE3; - } - } -#endif -#if defined(HAS_ABGRTOUVJROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ABGRToUVJRow = ABGRToUVJRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ABGRToUVJRow = ABGRToUVJRow_SSSE3; - } - } -#endif -#if defined(HAS_ABGRTOYJROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - ABGRToYJRow = ABGRToYJRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ABGRToYJRow = ABGRToYJRow_AVX2; - } - } -#endif -#if defined(HAS_ABGRTOUVJROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - ABGRToUVJRow = ABGRToUVJRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ABGRToUVJRow = ABGRToUVJRow_AVX2; - } - } -#endif -#if defined(HAS_ABGRTOUVJROW_AVX512BW) - if (TestCpuFlag(kCpuHasAVX512BW)) { - ABGRToUVJRow = ABGRToUVJRow_Any_AVX512BW; - if (IS_ALIGNED(width, 64)) { - ABGRToUVJRow = ABGRToUVJRow_AVX512BW; - } - } -#endif -#if defined(HAS_ABGRTOYJROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ABGRToYJRow = ABGRToYJRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ABGRToYJRow = ABGRToYJRow_NEON; - } - } -#endif -#if defined(HAS_ABGRTOYJROW_NEON_DOTPROD) - if (TestCpuFlag(kCpuHasNeonDotProd)) { - ABGRToYJRow = ABGRToYJRow_Any_NEON_DotProd; - if (IS_ALIGNED(width, 16)) { - ABGRToYJRow = ABGRToYJRow_NEON_DotProd; - } - } -#endif -#if defined(HAS_ABGRTOUVJROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ABGRToUVJRow = ABGRToUVJRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ABGRToUVJRow = ABGRToUVJRow_NEON; - } - } -#endif -#if defined(HAS_ABGRTOUVJROW_NEON_I8MM) - if (TestCpuFlag(kCpuHasNeonI8MM)) { - ABGRToUVJRow = ABGRToUVJRow_Any_NEON_I8MM; - if (IS_ALIGNED(width, 16)) { - ABGRToUVJRow = ABGRToUVJRow_NEON_I8MM; - } - } -#endif -#if defined(HAS_ABGRTOUVJROW_SVE2) - if (TestCpuFlag(kCpuHasSVE2)) { - ABGRToUVJRow = ABGRToUVJRow_Any_SVE2; - if (IS_ALIGNED(width, 2)) { - ABGRToUVJRow = ABGRToUVJRow_SVE2; - } - } -#endif -#if defined(HAS_ABGRTOUVJROW_SME) - if (TestCpuFlag(kCpuHasSME)) { - ABGRToUVJRow = ABGRToUVJRow_Any_SME; - if (IS_ALIGNED(width, 2)) { - ABGRToUVJRow = ABGRToUVJRow_SME; - } - } -#endif -#if defined(HAS_ABGRTOYJROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - ABGRToYJRow = ABGRToYJRow_Any_LSX; - if (IS_ALIGNED(width, 16)) { - ABGRToYJRow = ABGRToYJRow_LSX; - } - } -#endif -#if defined(HAS_ABGRTOYJROW_LASX) - if (TestCpuFlag(kCpuHasLASX)) { - ABGRToYJRow = ABGRToYJRow_Any_LASX; - if (IS_ALIGNED(width, 32)) { - ABGRToYJRow = ABGRToYJRow_LASX; - } - } -#endif -#if defined(HAS_ABGRTOYJROW_RVV) - if (TestCpuFlag(kCpuHasRVV)) { - ABGRToYJRow = ABGRToYJRow_RVV; - } -#endif - - for (y = 0; y < height; ++y) { - ABGRToUVJRow(src_abgr, 0, dst_uj, dst_vj, width); - ABGRToYJRow(src_abgr, dst_yj, width); - src_abgr += src_stride_abgr; - dst_yj += dst_stride_yj; - dst_uj += dst_stride_uj; - dst_vj += dst_stride_vj; - } - return 0; + return ARGBToI422Matrix(src_abgr, src_stride_abgr, dst_y, dst_stride_y, dst_u, + dst_stride_u, dst_v, dst_stride_v, &kAbgrJPEGConstants, + width, height); } // Convert ABGR to J400. @@ -2941,83 +2005,8 @@ int ABGRToJ400(const uint8_t* src_abgr, int dst_stride_yj, int width, int height) { - int y; - void (*ABGRToYJRow)(const uint8_t* src_abgr, uint8_t* dst_yj, int width) = - ABGRToYJRow_C; - if (!src_abgr || !dst_yj || width <= 0 || height == 0) { - return -1; - } - if (height < 0) { - height = -height; - src_abgr = src_abgr + (height - 1) * src_stride_abgr; - src_stride_abgr = -src_stride_abgr; - } - // Coalesce rows. - if (src_stride_abgr == width * 4 && dst_stride_yj == width) { - width *= height; - height = 1; - src_stride_abgr = dst_stride_yj = 0; - } -#if defined(HAS_ABGRTOYJROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ABGRToYJRow = ABGRToYJRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ABGRToYJRow = ABGRToYJRow_SSSE3; - } - } -#endif -#if defined(HAS_ABGRTOYJROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - ABGRToYJRow = ABGRToYJRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ABGRToYJRow = ABGRToYJRow_AVX2; - } - } -#endif -#if defined(HAS_ABGRTOYJROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ABGRToYJRow = ABGRToYJRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ABGRToYJRow = ABGRToYJRow_NEON; - } - } -#endif -#if defined(HAS_ABGRTOYJROW_NEON_DOTPROD) - if (TestCpuFlag(kCpuHasNeonDotProd)) { - ABGRToYJRow = ABGRToYJRow_Any_NEON_DotProd; - if (IS_ALIGNED(width, 16)) { - ABGRToYJRow = ABGRToYJRow_NEON_DotProd; - } - } -#endif -#if defined(HAS_ABGRTOYJROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - ABGRToYJRow = ABGRToYJRow_Any_LSX; - if (IS_ALIGNED(width, 16)) { - ABGRToYJRow = ABGRToYJRow_LSX; - } - } -#endif -#if defined(HAS_ABGRTOYJROW_LASX) - if (TestCpuFlag(kCpuHasLASX)) { - ABGRToYJRow = ABGRToYJRow_Any_LASX; - if (IS_ALIGNED(width, 32)) { - ABGRToYJRow = ABGRToYJRow_LASX; - } - } -#endif -#if defined(HAS_ABGRTOYJROW_RVV) - if (TestCpuFlag(kCpuHasRVV)) { - ABGRToYJRow = ABGRToYJRow_RVV; - } -#endif - - for (y = 0; y < height; ++y) { - ABGRToYJRow(src_abgr, dst_yj, width); - src_abgr += src_stride_abgr; - dst_yj += dst_stride_yj; - } - return 0; + return ARGBToI400Matrix(src_abgr, src_stride_abgr, dst_yj, dst_stride_yj, + &kAbgrJPEGConstants, width, height); } // Convert ARGB to AR64. @@ -3302,10 +2291,40 @@ int RAWToNV21Matrix(const uint8_t* src_raw, RAWToARGBRow = RAWToARGBRow_RVV; } #endif +#if defined(HAS_ARGBTOUVMATRIXROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_NEON_I8MM) + if (TestCpuFlag(kCpuHasNEON) && TestCpuFlag(kCpuHasNeonI8MM)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON_I8MM; + if (IS_ALIGNED(width, 16)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON_I8MM; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_SVE2) + if (TestCpuFlag(kCpuHasSVE2)) { + if (IS_ALIGNED(width, 2)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_SVE2; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_SME) + if (TestCpuFlag(kCpuHasSME)) { + if (IS_ALIGNED(width, 2)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_SME; + } + } +#endif #if defined(HAS_ARGBTOUVMATRIXROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { + if (IS_ALIGNED(width, 8)) { ARGBToUVMatrixRow = ARGBToUVMatrixRow_SSSE3; } } @@ -3313,7 +2332,7 @@ int RAWToNV21Matrix(const uint8_t* src_raw, #if defined(HAS_ARGBTOUVMATRIXROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { + if (IS_ALIGNED(width, 16)) { ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX2; } } @@ -3321,54 +2340,8 @@ int RAWToNV21Matrix(const uint8_t* src_raw, #if defined(HAS_ARGBTOUVMATRIXROW_AVX512BW) if (TestCpuFlag(kCpuHasAVX512BW)) { ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX512BW; - if (IS_ALIGNED(width, 64)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX512BW; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_NEON_I8MM) - if (TestCpuFlag(kCpuHasNeonI8MM)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON_I8MM; - if (IS_ALIGNED(width, 16)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON_I8MM; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_SVE2) - if (TestCpuFlag(kCpuHasSVE2)) { - if (IS_ALIGNED(width, 2)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_SVE2; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_SME) - if (TestCpuFlag(kCpuHasSME)) { - if (IS_ALIGNED(width, 2)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_SME; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_LSX; - if (IS_ALIGNED(width, 16)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_LSX; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_LASX) - if (TestCpuFlag(kCpuHasLASX)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_LASX; if (IS_ALIGNED(width, 32)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_LASX; + ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX512BW; } } #endif @@ -3504,73 +2477,7 @@ int RGB24ToNV12(const uint8_t* src_rgb24, } - -// Convert ARGB to J444. -LIBYUV_API -int ARGBToJ444(const uint8_t* src_argb, - int src_stride_argb, - uint8_t* dst_yj, - int dst_stride_yj, - uint8_t* dst_uj, - int dst_stride_uj, - uint8_t* dst_vj, - int dst_stride_vj, - int width, - int height) { - return ARGBToI444Matrix(src_argb, src_stride_argb, dst_yj, dst_stride_yj, - dst_uj, dst_stride_uj, dst_vj, dst_stride_vj, - &kArgbJPEGConstants, width, height); -} - -// Convert ARGB to J422. -LIBYUV_API -int ARGBToJ422(const uint8_t* src_argb, - int src_stride_argb, - uint8_t* dst_yj, - int dst_stride_yj, - uint8_t* dst_uj, - int dst_stride_uj, - uint8_t* dst_vj, - int dst_stride_vj, - int width, - int height) { - return ARGBToI422Matrix(src_argb, src_stride_argb, dst_yj, dst_stride_yj, - dst_uj, dst_stride_uj, dst_vj, dst_stride_vj, - &kArgbJPEGConstants, width, height); -} - -// Convert ARGB to J420. (JPeg full range I420). -LIBYUV_API -int ARGBToJ420(const uint8_t* src_argb, - int src_stride_argb, - uint8_t* dst_yj, - int dst_stride_yj, - uint8_t* dst_uj, - int dst_stride_uj, - uint8_t* dst_vj, - int dst_stride_vj, - int width, - int height) { - return ARGBToI420Matrix(src_argb, src_stride_argb, dst_yj, dst_stride_yj, - dst_uj, dst_stride_uj, dst_vj, dst_stride_vj, - &kArgbJPEGConstants, width, height); -} - -// Convert ARGB to J400. -LIBYUV_API -int ARGBToJ400(const uint8_t* src_argb, - int src_stride_argb, - uint8_t* dst_yj, - int dst_stride_yj, - int width, - int height) { - return ARGBToI400Matrix(src_argb, src_stride_argb, dst_yj, dst_stride_yj, - &kArgbJPEGConstants, width, height); -} - #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif - - diff --git a/source/planar_functions.cc b/source/planar_functions.cc index 3b703920c..7c78277e6 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -8,6 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include "libyuv/convert_from_argb.h" // For ArgbConstants #include "libyuv/planar_functions.h" #include @@ -15,12 +16,10 @@ #include "libyuv/cpu_id.h" #include "libyuv/row.h" -#include "libyuv/convert_from_argb.h" #include "libyuv/scale_row.h" // for ScaleRowDown2 #ifdef __cplusplus namespace libyuv { - extern "C" { #endif @@ -4745,8 +4744,8 @@ static int ARGBSobelize(const uint8_t* src_argb, uint8_t* dst, int width)) { int y; - void (*ARGBToYMatrixRow)(const uint8_t* src_argb, uint8_t* dst_y, int width, - const struct ArgbConstants* c) = ARGBToYMatrixRow_C; + void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_g, int width) = + ARGBToYJRow_C; void (*SobelYRow)(const uint8_t* src_y0, const uint8_t* src_y1, uint8_t* dst_sobely, int width) = SobelYRow_C; void (*SobelXRow)(const uint8_t* src_y0, const uint8_t* src_y1, @@ -4763,65 +4762,57 @@ static int ARGBSobelize(const uint8_t* src_argb, src_stride_argb = -src_stride_argb; } -#if defined(HAS_ARGBTOYMATRIXROW_SSSE3) +#if defined(HAS_ARGBTOYJROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_SSSE3; + ARGBToYJRow = ARGBToYJRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_SSSE3; + ARGBToYJRow = ARGBToYJRow_SSSE3; } } #endif -#if defined(HAS_ARGBTOYMATRIXROW_AVX2) +#if defined(HAS_ARGBTOYJROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_AVX2; + ARGBToYJRow = ARGBToYJRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_AVX2; + ARGBToYJRow = ARGBToYJRow_AVX2; } } #endif -#if defined(HAS_ARGBTOYMATRIXROW_AVX512BW) +#if defined(HAS_ARGBTOYROW_AVX512BW) if (TestCpuFlag(kCpuHasAVX512BW)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_AVX512BW; + ARGBToYJRow = ARGBToYJRow_Any_AVX512BW; if (IS_ALIGNED(width, 64)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_AVX512BW; + ARGBToYJRow = ARGBToYJRow_AVX512BW; } } #endif -#if defined(HAS_ARGBTOYMATRIXROW_NEON) +#if defined(HAS_ARGBTOYJROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_NEON; + ARGBToYJRow = ARGBToYJRow_Any_NEON; if (IS_ALIGNED(width, 16)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_NEON; + ARGBToYJRow = ARGBToYJRow_NEON; } } #endif -#if defined(HAS_ARGBTOYMATRIXROW_NEON_DOTPROD) - if (TestCpuFlag(kCpuHasNeonDotProd)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_NEON_DotProd; - if (IS_ALIGNED(width, 16)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_NEON_DotProd; - } - } -#endif -#if defined(HAS_ARGBTOYMATRIXROW_LSX) +#if defined(HAS_ARGBTOYJROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_LSX; + ARGBToYJRow = ARGBToYJRow_Any_LSX; if (IS_ALIGNED(width, 16)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_LSX; + ARGBToYJRow = ARGBToYJRow_LSX; } } #endif -#if defined(HAS_ARGBTOYMATRIXROW_LASX) +#if defined(HAS_ARGBTOYJROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_LASX; + ARGBToYJRow = ARGBToYJRow_Any_LASX; if (IS_ALIGNED(width, 32)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_LASX; + ARGBToYJRow = ARGBToYJRow_LASX; } } #endif -#if defined(HAS_ARGBTOYMATRIXROW_RVV) +#if defined(HAS_ARGBTOYJROW_RVV) if (TestCpuFlag(kCpuHasRVV)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_RVV; + ARGBToYJRow = ARGBToYJRow_RVV; } #endif @@ -4859,10 +4850,10 @@ static int ARGBSobelize(const uint8_t* src_argb, uint8_t* row_y2 = row_y1 + row_size; if (!rows) return 1; - ARGBToYMatrixRow(src_argb, row_y0, width, &kArgbJPEGConstants); + ARGBToYJRow(src_argb, row_y0, width); row_y0[-1] = row_y0[0]; memset(row_y0 + width, row_y0[width - 1], 16); // Extrude 16 for valgrind. - ARGBToYMatrixRow(src_argb, row_y1, width, &kArgbJPEGConstants); + ARGBToYJRow(src_argb, row_y1, width); row_y1[-1] = row_y1[0]; memset(row_y1 + width, row_y1[width - 1], 16); memset(row_y2 + width, 0, 16); @@ -4872,7 +4863,7 @@ static int ARGBSobelize(const uint8_t* src_argb, if (y < (height - 1)) { src_argb += src_stride_argb; } - ARGBToYMatrixRow(src_argb, row_y2, width, &kArgbJPEGConstants); + ARGBToYJRow(src_argb, row_y2, width); row_y2[-1] = row_y2[0]; row_y2[width] = row_y2[width - 1]; diff --git a/source/row_common.cc b/source/row_common.cc index b2a0ec12b..50795cf91 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -753,28 +753,31 @@ MAKEROWYJ(ABGR, 0, 1, 2, 4) MAKEROWYJ(RGBA, 3, 2, 1, 4) #undef MAKEROWYJ -static __inline uint8_t RGBToYMatrix(uint8_t r, - uint8_t g, - uint8_t b, +static __inline uint8_t RGBToYMatrix(uint8_t b0, + uint8_t b1, + uint8_t b2, + uint8_t b3, const struct ArgbConstants* c) { - return (c->kRGBToY[2] * r + c->kRGBToY[1] * g + c->kRGBToY[0] * b + - c->kAddY[0]) >> + return (c->kRGBToY[0] * b0 + c->kRGBToY[1] * b1 + c->kRGBToY[2] * b2 + + c->kRGBToY[3] * b3 + c->kAddY[0]) >> 8; } -static __inline uint8_t RGBToUMatrix(uint8_t r, - uint8_t g, - uint8_t b, +static __inline uint8_t RGBToUMatrix(uint8_t b0, + uint8_t b1, + uint8_t b2, + uint8_t b3, const struct ArgbConstants* c) { - return (c->kAddUV[0] - - (c->kRGBToU[2] * r + c->kRGBToU[1] * g + c->kRGBToU[0] * b)) >> + return (c->kAddUV[0] - (c->kRGBToU[0] * b0 + c->kRGBToU[1] * b1 + + c->kRGBToU[2] * b2 + c->kRGBToU[3] * b3)) >> 8; } -static __inline uint8_t RGBToVMatrix(uint8_t r, - uint8_t g, - uint8_t b, +static __inline uint8_t RGBToVMatrix(uint8_t b0, + uint8_t b1, + uint8_t b2, + uint8_t b3, const struct ArgbConstants* c) { - return (c->kAddUV[0] - - (c->kRGBToV[2] * r + c->kRGBToV[1] * g + c->kRGBToV[0] * b)) >> + return (c->kAddUV[0] - (c->kRGBToV[0] * b0 + c->kRGBToV[1] * b1 + + c->kRGBToV[2] * b2 + c->kRGBToV[3] * b3)) >> 8; } @@ -784,7 +787,7 @@ void ARGBToYMatrixRow_C(const uint8_t* src_argb, const struct ArgbConstants* c) { int x; for (x = 0; x < width; ++x) { - dst_y[0] = RGBToYMatrix(src_argb[2], src_argb[1], src_argb[0], c); + dst_y[0] = RGBToYMatrix(src_argb[0], src_argb[1], src_argb[2], src_argb[3], c); src_argb += 4; dst_y += 1; } @@ -799,25 +802,28 @@ void ARGBToUVMatrixRow_C(const uint8_t* src_argb, const uint8_t* src_argb1 = src_argb + src_stride_argb; int x; for (x = 0; x < width - 1; x += 2) { - uint8_t ab = + uint8_t b0 = (src_argb[0] + src_argb[4] + src_argb1[0] + src_argb1[4] + 2) >> 2; - uint8_t ag = + uint8_t b1 = (src_argb[1] + src_argb[5] + src_argb1[1] + src_argb1[5] + 2) >> 2; - uint8_t ar = + uint8_t b2 = (src_argb[2] + src_argb[6] + src_argb1[2] + src_argb1[6] + 2) >> 2; - dst_u[0] = RGBToUMatrix(ar, ag, ab, c); - dst_v[0] = RGBToVMatrix(ar, ag, ab, c); + uint8_t b3 = + (src_argb[3] + src_argb[7] + src_argb1[3] + src_argb1[7] + 2) >> 2; + dst_u[0] = RGBToUMatrix(b0, b1, b2, b3, c); + dst_v[0] = RGBToVMatrix(b0, b1, b2, b3, c); src_argb += 8; src_argb1 += 8; dst_u += 1; dst_v += 1; } if (width & 1) { - uint8_t ab = (src_argb[0] + src_argb1[0] + 1) >> 1; - uint8_t ag = (src_argb[1] + src_argb1[1] + 1) >> 1; - uint8_t ar = (src_argb[2] + src_argb1[2] + 1) >> 1; - dst_u[0] = RGBToUMatrix(ar, ag, ab, c); - dst_v[0] = RGBToVMatrix(ar, ag, ab, c); + uint8_t b0 = (src_argb[0] + src_argb1[0] + 1) >> 1; + uint8_t b1 = (src_argb[1] + src_argb1[1] + 1) >> 1; + uint8_t b2 = (src_argb[2] + src_argb1[2] + 1) >> 1; + uint8_t b3 = (src_argb[3] + src_argb1[3] + 1) >> 1; + dst_u[0] = RGBToUMatrix(b0, b1, b2, b3, c); + dst_v[0] = RGBToVMatrix(b0, b1, b2, b3, c); } } @@ -828,11 +834,10 @@ void ARGBToUV444MatrixRow_C(const uint8_t* src_argb, const struct ArgbConstants* c) { int x; for (x = 0; x < width; ++x) { - uint8_t ab = src_argb[0]; - uint8_t ag = src_argb[1]; - uint8_t ar = src_argb[2]; - dst_u[0] = RGBToUMatrix(ar, ag, ab, c); - dst_v[0] = RGBToVMatrix(ar, ag, ab, c); + dst_u[0] = + RGBToUMatrix(src_argb[0], src_argb[1], src_argb[2], src_argb[3], c); + dst_v[0] = + RGBToVMatrix(src_argb[0], src_argb[1], src_argb[2], src_argb[3], c); src_argb += 4; dst_u += 1; dst_v += 1; @@ -1513,16 +1518,16 @@ void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width) { YUVCONSTANTSBODY(YG, YB, VR, VG, UG, UB); #define MAKEARGBCONSTANTS(name, RY, GY, BY, RU, GU, BU, RV, GV, BV, AY, AUV) \ - const struct ArgbConstants SIMD_ALIGNED(kArgb##name##Constants) = \ + extern const struct ArgbConstants SIMD_ALIGNED(kArgb##name##Constants) = \ ARGBCONSTANTSBODY(BY, GY, RY, 0, -(BU), -(GU), -(RU), 0, -(BV), -(GV), \ -(RV), 0, AY, AUV); \ - const struct ArgbConstants SIMD_ALIGNED(kAbgr##name##Constants) = \ + extern const struct ArgbConstants SIMD_ALIGNED(kAbgr##name##Constants) = \ ARGBCONSTANTSBODY(RY, GY, BY, 0, -(RU), -(GU), -(BU), 0, -(RV), -(GV), \ -(BV), 0, AY, AUV); \ - const struct ArgbConstants SIMD_ALIGNED(kRgba##name##Constants) = \ + extern const struct ArgbConstants SIMD_ALIGNED(kRgba##name##Constants) = \ ARGBCONSTANTSBODY(0, BY, GY, RY, 0, -(BU), -(GU), -(RU), 0, -(BV), \ -(GV), -(RV), AY, AUV); \ - const struct ArgbConstants SIMD_ALIGNED(kBgra##name##Constants) = \ + extern const struct ArgbConstants SIMD_ALIGNED(kBgra##name##Constants) = \ ARGBCONSTANTSBODY(0, RY, GY, BY, 0, -(RU), -(GU), -(BU), 0, -(RV), \ -(GV), -(BV), AY, AUV); diff --git a/source/row_neon.cc b/source/row_neon.cc index 257398bbe..62644a321 100644 --- a/source/row_neon.cc +++ b/source/row_neon.cc @@ -1848,32 +1848,41 @@ void ARGBToUV444MatrixRow_NEON(const uint8_t* src_argb, int width, const struct ArgbConstants* c) { asm volatile( - "vld1.8 {d16}, [%4] \n" // load kRGBToU - "vld1.8 {d17}, [%5] \n" // load kRGBToV - "vld1.16 {d18[0]}, [%6] \n" // load kAddUV[0] - "vabs.s8 d16, d16 \n" // BU, GU, RU - "vabs.s8 d17, d17 \n" // BV, GV, RV - "vdup.8 d20, d16[0] \n" // BU - "vdup.8 d21, d16[1] \n" // GU - "vdup.8 d22, d16[2] \n" // RU - "vdup.8 d23, d17[0] \n" // BV - "vdup.8 d24, d17[1] \n" // GV - "vdup.8 d25, d17[2] \n" // RV - "vdup.16 q15, d18[0] \n" // kAddUV - + "vld1.8 {d24}, [%4] \n" // load kRGBToU + "vld1.8 {d25}, [%5] \n" // load kRGBToV + "vld1.16 {d26[0]}, [%6] \n" // load kAddUV[0] + "vmovl.s8 q10, d24 \n" // U coeffs (8 shorts) + "vmovl.s8 q11, d25 \n" // V coeffs (8 shorts) + "vdup.16 q6, d26[0] \n" // bias "1: \n" "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels. "subs %3, %3, #8 \n" // 8 processed per loop. - "vmull.u8 q2, d0, d20 \n" // B * BU - "vmlsl.u8 q2, d1, d21 \n" // - G * GU - "vmlsl.u8 q2, d2, d22 \n" // - R * RU - "vmull.u8 q3, d2, d25 \n" // R * RV - "vmlsl.u8 q3, d1, d24 \n" // - G * GV - "vmlsl.u8 q3, d0, d23 \n" // - B * BV + "vmovl.u8 q4, d0 \n" // B + "vmovl.u8 q5, d1 \n" // G + "vmovl.u8 q7, d2 \n" // R + "vmovl.u8 q8, d3 \n" // A - "vaddhn.u16 d0, q2, q15 \n" // signed -> unsigned - "vaddhn.u16 d1, q3, q15 \n" + "vdup.16 q12, d20[0] \n" + "vmul.s16 q2, q4, q12 \n" // U = B * U0 + "vdup.16 q12, d20[1] \n" + "vmla.s16 q2, q5, q12 \n" // U += G * U1 + "vdup.16 q12, d20[2] \n" + "vmla.s16 q2, q7, q12 \n" // U += R * U2 + "vdup.16 q12, d20[3] \n" + "vmla.s16 q2, q8, q12 \n" // U += A * U3 + + "vdup.16 q12, d22[0] \n" + "vmul.s16 q3, q4, q12 \n" // V = B * V0 + "vdup.16 q12, d22[1] \n" + "vmla.s16 q3, q5, q12 \n" // V += G * V1 + "vdup.16 q12, d22[2] \n" + "vmla.s16 q3, q7, q12 \n" // V += R * V2 + "vdup.16 q12, d22[3] \n" + "vmla.s16 q3, q8, q12 \n" // V += A * V3 + + "vsubhn.s16 d0, q6, q2 \n" // 128.0 - U + "vsubhn.s16 d1, q6, q3 \n" // 128.0 - V "vst1.8 {d0}, [%1]! \n" // store 8 pixels U. "vst1.8 {d1}, [%2]! \n" // store 8 pixels V. @@ -1885,8 +1894,8 @@ void ARGBToUV444MatrixRow_NEON(const uint8_t* src_argb, : "r"(&c->kRGBToU), // %4 "r"(&c->kRGBToV), // %5 "r"(&c->kAddUV) // %6 - : "cc", "memory", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", - "q12", "q13", "q14", "q15"); + : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", + "q10", "q11", "q12"); } void ARGBToUV444Row_NEON(const uint8_t* src_argb, @@ -1926,16 +1935,11 @@ void ARGBToUVMatrixRow_NEON(const uint8_t* src_argb, const struct ArgbConstants* c) { const uint8_t* src_argb_1 = src_argb + src_stride_argb; asm volatile ( - "vld1.8 {d18}, [%5] \n" // load kRGBToU - "vld1.8 {d19}, [%6] \n" // load kRGBToV - "vmovl.s8 q8, d18 \n" // U coeffs in q8 (d16, d17) - "vmovl.s8 q9, d19 \n" // V coeffs in q9 (d18, d19) - "vdup.16 q10, d16[0] \n" // U0 - "vdup.16 q11, d16[1] \n" // U1 - "vdup.16 q12, d16[2] \n" // U2 - "vdup.16 q13, d18[0] \n" // V0 - "vdup.16 q14, d18[1] \n" // V1 - "vdup.16 q15, d18[2] \n" // V2 + "vld1.8 {d24}, [%5] \n" // load kRGBToU (8 bytes, only 4 used) + "vld1.8 {d25}, [%6] \n" // load kRGBToV + "vmovl.s8 q14, d24 \n" // U coeffs in d28 + "vmovl.s8 q15, d25 \n" // V coeffs in d30 + "vmov.u16 q11, #0x8000 \n" // 128.0 bias "1: \n" "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels. @@ -1944,28 +1948,39 @@ void ARGBToUVMatrixRow_NEON(const uint8_t* src_argb, "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts. "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts. "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts. - "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more ARGB pixels. - "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 ARGB pixels. - "vpadal.u8 q0, q4 \n" // B 16 bytes -> 8 shorts. - "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts. - "vpadal.u8 q2, q6 \n" // R 16 bytes -> 8 shorts. + "vpaddl.u8 q3, q3 \n" // A 16 bytes -> 8 shorts. + "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more pixels. + "vld4.8 {d9, d11, d13, d15}, [%1]! \n" + "vpadal.u8 q0, q4 \n" // B + "vpadal.u8 q1, q5 \n" // G + "vpadal.u8 q2, q6 \n" // R + "vpadal.u8 q3, q7 \n" // A "vrshr.u16 q0, q0, #2 \n" // average of 4 "vrshr.u16 q1, q1, #2 \n" "vrshr.u16 q2, q2, #2 \n" + "vrshr.u16 q3, q3, #2 \n" - "vmov.u16 q3, #0x8000 \n" // 128.0 - - "vmul.s16 q8, q0, q10 \n" // U = B * U0 - "vmla.s16 q8, q1, q11 \n" // U += G * U1 + "vdup.16 q12, d28[0] \n" + "vmul.s16 q8, q0, q12 \n" // U = B * U0 + "vdup.16 q12, d28[1] \n" + "vmla.s16 q8, q1, q12 \n" // U += G * U1 + "vdup.16 q12, d28[2] \n" "vmla.s16 q8, q2, q12 \n" // U += R * U2 + "vdup.16 q12, d28[3] \n" + "vmla.s16 q8, q3, q12 \n" // U += A * U3 - "vmul.s16 q9, q0, q13 \n" // V = B * V0 - "vmla.s16 q9, q1, q14 \n" // V += G * V1 - "vmla.s16 q9, q2, q15 \n" // V += R * V2 + "vdup.16 q12, d30[0] \n" + "vmul.s16 q9, q0, q12 \n" // V = B * V0 + "vdup.16 q12, d30[1] \n" + "vmla.s16 q9, q1, q12 \n" // V += G * V1 + "vdup.16 q12, d30[2] \n" + "vmla.s16 q9, q2, q12 \n" // V += R * V2 + "vdup.16 q12, d30[3] \n" + "vmla.s16 q9, q3, q12 \n" // V += A * V3 - "vsubhn.s16 d0, q3, q8 \n" // 128.0 - U - "vsubhn.s16 d1, q3, q9 \n" // 128.0 - V + "vsubhn.s16 d0, q11, q8 \n" // 128.0 - U + "vsubhn.s16 d1, q11, q9 \n" // 128.0 - V "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. @@ -1978,7 +1993,7 @@ void ARGBToUVMatrixRow_NEON(const uint8_t* src_argb, : "r"(&c->kRGBToU), // %5 "r"(&c->kRGBToV) // %6 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" + "q8", "q9", "q11", "q12", "q14", "q15" ); } @@ -2212,44 +2227,8 @@ void BGRAToUVRow_NEON(const uint8_t* src_bgra, uint8_t* dst_u, uint8_t* dst_v, int width) { - asm volatile ( - "add %1, %0, %1 \n" // src_stride + src_bgra - "vmov.s16 q10, #112 \n" // UB/VR 0.875 coefficient - "vmov.s16 q11, #74 \n" // UG -0.5781 coefficient - "vmov.s16 q12, #38 \n" // UR -0.2969 coefficient - "vmov.s16 q13, #18 \n" // VB -0.1406 coefficient - "vmov.s16 q14, #94 \n" // VG -0.7344 coefficient - "vmov.u16 q15, #0x8000 \n" // 128.0 - "1: \n" - "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 BGRA pixels. - "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 BGRA pixels. - "subs %4, %4, #16 \n" // 16 processed per loop. - "vpaddl.u8 q3, q3 \n" // B 16 bytes -> 8 shorts. - "vpaddl.u8 q2, q2 \n" // G 16 bytes -> 8 shorts. - "vpaddl.u8 q1, q1 \n" // R 16 bytes -> 8 shorts. - "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more BGRA pixels. - "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 BGRA pixels. - "vpadal.u8 q3, q7 \n" // B 16 bytes -> 8 shorts. - "vpadal.u8 q2, q6 \n" // G 16 bytes -> 8 shorts. - "vpadal.u8 q1, q5 \n" // R 16 bytes -> 8 shorts. - - "vrshr.u16 q1, q1, #2 \n" // average of 4 - "vrshr.u16 q2, q2, #2 \n" - "vrshr.u16 q3, q3, #2 \n" - - RGBTOUV(q3, q2, q1) - "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. - "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. - "bgt 1b \n" - : "+r"(src_bgra), // %0 - "+r"(src_stride_bgra), // %1 - "+r"(dst_u), // %2- - "+r"(dst_v), // %3 - "+r"(width) // %4 - : - : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); + ARGBToUVMatrixRow_NEON(src_bgra, src_stride_bgra, dst_u, dst_v, width, + &kBgraI601Constants); } void ABGRToUVRow_NEON(const uint8_t* src_abgr, @@ -2257,44 +2236,8 @@ void ABGRToUVRow_NEON(const uint8_t* src_abgr, uint8_t* dst_u, uint8_t* dst_v, int width) { - asm volatile ( - "add %1, %0, %1 \n" // src_stride + src_abgr - "vmov.s16 q10, #112 \n" // UB/VR 0.875 coefficient - "vmov.s16 q11, #74 \n" // UG -0.5781 coefficient - "vmov.s16 q12, #38 \n" // UR -0.2969 coefficient - "vmov.s16 q13, #18 \n" // VB -0.1406 coefficient - "vmov.s16 q14, #94 \n" // VG -0.7344 coefficient - "vmov.u16 q15, #0x8000 \n" // 128.0 - "1: \n" - "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ABGR pixels. - "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ABGR pixels. - "subs %4, %4, #16 \n" // 16 processed per loop. - "vpaddl.u8 q2, q2 \n" // B 16 bytes -> 8 shorts. - "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts. - "vpaddl.u8 q0, q0 \n" // R 16 bytes -> 8 shorts. - "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more ABGR pixels. - "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 ABGR pixels. - "vpadal.u8 q2, q6 \n" // B 16 bytes -> 8 shorts. - "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts. - "vpadal.u8 q0, q4 \n" // R 16 bytes -> 8 shorts. - - "vrshr.u16 q0, q0, #2 \n" // average of 4 - "vrshr.u16 q1, q1, #2 \n" - "vrshr.u16 q2, q2, #2 \n" - - RGBTOUV(q2, q1, q0) - "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. - "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. - "bgt 1b \n" - : "+r"(src_abgr), // %0 - "+r"(src_stride_abgr), // %1 - "+r"(dst_u), // %2 - "+r"(dst_v), // %3 - "+r"(width) // %4 - : - : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); + ARGBToUVMatrixRow_NEON(src_abgr, src_stride_abgr, dst_u, dst_v, width, + &kAbgrI601Constants); } void RGBAToUVRow_NEON(const uint8_t* src_rgba, @@ -2302,44 +2245,8 @@ void RGBAToUVRow_NEON(const uint8_t* src_rgba, uint8_t* dst_u, uint8_t* dst_v, int width) { - asm volatile ( - "add %1, %0, %1 \n" // src_stride + src_rgba - "vmov.s16 q10, #112 \n" // UB/VR 0.875 coefficient - "vmov.s16 q11, #74 \n" // UG -0.5781 coefficient - "vmov.s16 q12, #38 \n" // UR -0.2969 coefficient - "vmov.s16 q13, #18 \n" // VB -0.1406 coefficient - "vmov.s16 q14, #94 \n" // VG -0.7344 coefficient - "vmov.u16 q15, #0x8000 \n" // 128.0 - "1: \n" - "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 RGBA pixels. - "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 RGBA pixels. - "subs %4, %4, #16 \n" // 16 processed per loop. - "vpaddl.u8 q0, q1 \n" // B 16 bytes -> 8 shorts. - "vpaddl.u8 q1, q2 \n" // G 16 bytes -> 8 shorts. - "vpaddl.u8 q2, q3 \n" // R 16 bytes -> 8 shorts. - "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more RGBA pixels. - "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 RGBA pixels. - "vpadal.u8 q0, q5 \n" // B 16 bytes -> 8 shorts. - "vpadal.u8 q1, q6 \n" // G 16 bytes -> 8 shorts. - "vpadal.u8 q2, q7 \n" // R 16 bytes -> 8 shorts. - - "vrshr.u16 q0, q0, #2 \n" // average of 4 - "vrshr.u16 q1, q1, #2 \n" - "vrshr.u16 q2, q2, #2 \n" - - RGBTOUV(q0, q1, q2) - "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. - "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. - "bgt 1b \n" - : "+r"(src_rgba), // %0 - "+r"(src_stride_rgba), // %1 - "+r"(dst_u), // %2 - "+r"(dst_v), // %3 - "+r"(width) // %4 - : - : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); + ARGBToUVMatrixRow_NEON(src_rgba, src_stride_rgba, dst_u, dst_v, width, + &kRgbaI601Constants); } void RGB24ToUVRow_NEON(const uint8_t* src_rgb24, @@ -2801,15 +2708,16 @@ void ARGBToYMatrixRow_NEON(const uint8_t* src_argb, int width, const struct ArgbConstants* c) { asm volatile( - "vld1.8 {d16}, [%3] \n" // load kRGBToY - "vld1.16 {d18[0]}, [%4] \n" // load kAddY[0] - "vdup.8 d20, d16[0] \n" // BY - "vdup.8 d21, d16[1] \n" // GY - "vdup.8 d22, d16[2] \n" // RY - "vdup.16 q12, d18[0] \n" // AY + "vld1.8 {d24}, [%3] \n" // load kRGBToY + "vld1.16 {d25[0]}, [%4] \n" // load kAddY[0] + "vdup.8 d20, d24[0] \n" // B + "vdup.8 d21, d24[1] \n" // G + "vdup.8 d22, d24[2] \n" // R + "vdup.8 d23, d24[3] \n" // A + "vdup.16 q12, d25[0] \n" // bias "1: \n" - "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 16 pixels of ARGB - "vld4.8 {d1, d3, d5, d7}, [%0]! \n" + "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 pixels of ARGB + "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 pixels "subs %1, %1, #16 \n" // 16 processed per loop. "vmull.u8 q8, d0, d20 \n" // B "vmull.u8 q9, d1, d20 \n" @@ -2817,6 +2725,8 @@ void ARGBToYMatrixRow_NEON(const uint8_t* src_argb, "vmlal.u8 q9, d3, d21 \n" "vmlal.u8 q8, d4, d22 \n" // R "vmlal.u8 q9, d5, d22 \n" + "vmlal.u8 q8, d6, d23 \n" // A + "vmlal.u8 q9, d7, d23 \n" "vaddhn.u16 d0, q8, q12 \n" // 16 bit to 8 bit Y "vaddhn.u16 d1, q9, q12 \n" "vst1.8 {d0, d1}, [%2]! \n" // store 16 pixels Y. @@ -2826,8 +2736,8 @@ void ARGBToYMatrixRow_NEON(const uint8_t* src_argb, "+r"(dst_y) // %2 : "r"(&c->kRGBToY), // %3 "r"(&c->kAddY) // %4 - : "cc", "memory", "q0", "q1", "q2", "q3", "q8", "q9", "d20", "d21", "d22", - "q12"); + : "cc", "memory", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "q12", + "d24", "d25"); } void ARGBToYRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) { @@ -2846,52 +2756,20 @@ void ABGRToYJRow_NEON(const uint8_t* src_abgr, uint8_t* dst_yj, int width) { ARGBToYMatrixRow_NEON(src_abgr, dst_yj, width, &kAbgrJPEGConstants); } -// RGBA expects first value to be A and ignored, then 3 values to contain RGB. -// Same code as ARGB, except the LD4 -static void RGBAToYMatrixRow_NEON(const uint8_t* src_rgba, - uint8_t* dst_y, - int width, - const struct ArgbConstants* c) { - asm volatile( - "vld1.8 {d16}, [%3] \n" // load kRGBToY - "vld1.16 {d18[0]}, [%4] \n" // load kAddY[0] - "vdup.8 d20, d16[0] \n" // BY - "vdup.8 d21, d16[1] \n" // GY - "vdup.8 d22, d16[2] \n" // RY - "vdup.16 q12, d18[0] \n" // AY - "1: \n" - "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 16 pixels of RGBA - "vld4.8 {d1, d3, d5, d7}, [%0]! \n" - "subs %2, %2, #16 \n" // 16 processed per loop. - "vmull.u8 q8, d2, d20 \n" // B - "vmull.u8 q9, d3, d20 \n" - "vmlal.u8 q8, d4, d21 \n" // G - "vmlal.u8 q9, d5, d21 \n" - "vmlal.u8 q8, d6, d22 \n" // R - "vmlal.u8 q9, d7, d22 \n" - "vaddhn.u16 d0, q8, q12 \n" // 16 bit to 8 bit Y - "vaddhn.u16 d1, q9, q12 \n" - "vst1.8 {d0, d1}, [%1]! \n" // store 16 pixels Y. - "bgt 1b \n" - : "+r"(src_rgba), // %0 - "+r"(dst_y), // %1 - "+r"(width) // %2 - : "r"(&c->kRGBToY), // %3 - "r"(&c->kAddY) // %4 - : "cc", "memory", "q0", "q1", "q2", "q3", "q8", "q9", "d20", "d21", "d22", - "q12"); -} - void RGBAToYRow_NEON(const uint8_t* src_rgba, uint8_t* dst_y, int width) { - RGBAToYMatrixRow_NEON(src_rgba, dst_y, width, &kArgbI601Constants); + ARGBToYMatrixRow_NEON(src_rgba, dst_y, width, &kRgbaI601Constants); } void RGBAToYJRow_NEON(const uint8_t* src_rgba, uint8_t* dst_yj, int width) { - RGBAToYMatrixRow_NEON(src_rgba, dst_yj, width, &kArgbJPEGConstants); + ARGBToYMatrixRow_NEON(src_rgba, dst_yj, width, &kRgbaJPEGConstants); } void BGRAToYRow_NEON(const uint8_t* src_bgra, uint8_t* dst_y, int width) { - RGBAToYMatrixRow_NEON(src_bgra, dst_y, width, &kAbgrI601Constants); + ARGBToYMatrixRow_NEON(src_bgra, dst_y, width, &kBgraI601Constants); +} + +void BGRAToYJRow_NEON(const uint8_t* src_bgra, uint8_t* dst_yj, int width) { + ARGBToYMatrixRow_NEON(src_bgra, dst_yj, width, &kBgraJPEGConstants); } void RGBToYMatrixRow_NEON(const uint8_t* src_rgb, @@ -2899,12 +2777,12 @@ void RGBToYMatrixRow_NEON(const uint8_t* src_rgb, int width, const struct ArgbConstants* c) { asm volatile( - "vld1.8 {d16}, [%3] \n" // load kRGBToY - "vld1.16 {d18[0]}, [%4] \n" // load kAddY[0] - "vdup.8 d20, d16[0] \n" // BY - "vdup.8 d21, d16[1] \n" // GY - "vdup.8 d22, d16[2] \n" // RY - "vdup.16 q12, d18[0] \n" // AY + "vld1.8 {d24}, [%3] \n" // load kRGBToY + "vld1.16 {d25[0]}, [%4] \n" // load kAddY[0] + "vdup.8 d20, d24[0] \n" // BY + "vdup.8 d21, d24[1] \n" // GY + "vdup.8 d22, d24[2] \n" // RY + "vdup.16 q12, d25[0] \n" // AY "1: \n" "vld3.8 {d2, d4, d6}, [%0]! \n" // load 16 pixels of // RGB24. @@ -2925,8 +2803,8 @@ void RGBToYMatrixRow_NEON(const uint8_t* src_rgb, "+r"(width) // %2 : "r"(&c->kRGBToY), // %3 "r"(&c->kAddY) // %4 - : "cc", "memory", "q0", "q1", "q2", "q3", "q8", "q9", "d20", "d21", "d22", - "q12"); + : "cc", "memory", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "q12", + "d24", "d25"); } diff --git a/source/row_neon64.cc b/source/row_neon64.cc index 19016cc3b..4eed2df12 100644 --- a/source/row_neon64.cc +++ b/source/row_neon64.cc @@ -2736,47 +2736,61 @@ struct RgbUVConstants { }; // 8x1 pixels. -static void ARGBToUV444MatrixRow_NEON( - const uint8_t* src_argb, - uint8_t* dst_u, - uint8_t* dst_v, - int width, - const struct RgbUVConstants* rgbuvconstants) { +void ARGBToUV444MatrixRow_NEON(const uint8_t* src_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width, + const struct ArgbConstants* c) { asm volatile( - "ldr d0, [%4] \n" // load rgbuvconstants - "dup v24.16b, v0.b[0] \n" // UB 0.875 coefficient - "dup v25.16b, v0.b[1] \n" // UG -0.5781 coefficient - "dup v26.16b, v0.b[2] \n" // UR -0.2969 coefficient - "dup v27.16b, v0.b[4] \n" // VB -0.1406 coefficient - "dup v28.16b, v0.b[5] \n" // VG -0.7344 coefficient - "neg v24.16b, v24.16b \n" - "movi v29.8h, #0x80, lsl #8 \n" // 128.0 - + "ldr q16, [%[c], #16] \n" // kRGBToU + "ldr q17, [%[c], #32] \n" // kRGBToV + "ldr s0, [%[c], #64] \n" // kAddUV + "sxtl v16.8h, v16.8b \n" // sign extend U coeffs to 16-bit + "sxtl v17.8h, v17.8b \n" // sign extend V coeffs to 16-bit + "dup v20.8h, v16.h[0] \n" // U0 + "dup v21.8h, v16.h[1] \n" // U1 + "dup v22.8h, v16.h[2] \n" // U2 + "dup v23.8h, v16.h[3] \n" // U3 + "dup v24.8h, v17.h[0] \n" // V0 + "dup v26.8h, v17.h[1] \n" // V1 + "dup v27.8h, v17.h[2] \n" // V2 + "dup v28.8h, v17.h[3] \n" // V3 + "dup v25.8h, v0.h[0] \n" // kAddUV "1: \n" "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB "subs %w3, %w3, #8 \n" // 8 processed per loop. - "umull v4.8h, v0.8b, v24.8b \n" // B - "umlsl v4.8h, v1.8b, v25.8b \n" // G - "umlsl v4.8h, v2.8b, v26.8b \n" // R - "prfm pldl1keep, [%0, 448] \n" - "umull v3.8h, v2.8b, v24.8b \n" // R - "umlsl v3.8h, v1.8b, v28.8b \n" // G - "umlsl v3.8h, v0.8b, v27.8b \n" // B + "uxtl v4.8h, v0.8b \n" + "uxtl v5.8h, v1.8b \n" + "uxtl v6.8h, v2.8b \n" + "uxtl v7.8h, v3.8b \n" - "addhn v0.8b, v4.8h, v29.8h \n" // signed -> unsigned - "addhn v1.8b, v3.8h, v29.8h \n" + // U = B*U0 + G*U1 + R*U2 + A*U3 + "mul v18.8h, v4.8h, v20.8h \n" + "mla v18.8h, v5.8h, v21.8h \n" + "mla v18.8h, v6.8h, v22.8h \n" + "mla v18.8h, v7.8h, v23.8h \n" - "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels U. - "st1 {v1.8b}, [%2], #8 \n" // store 8 pixels V. + // V = B*V0 + G*V1 + R*V2 + A*V3 + "mul v19.8h, v4.8h, v24.8h \n" + "mla v19.8h, v5.8h, v26.8h \n" + "mla v19.8h, v6.8h, v27.8h \n" + "mla v19.8h, v7.8h, v28.8h \n" + + "subhn v0.8b, v25.8h, v18.8h \n" + "subhn v1.8b, v25.8h, v19.8h \n" + + "st1 {v0.8b}, [%1], #8 \n" + "st1 {v1.8b}, [%2], #8 \n" "b.gt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(width) // %3 - : "r"(rgbuvconstants) // %4 - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v24", "v25", "v26", - "v27", "v28", "v29"); + : "+r"(src_argb), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+r"(width) // %3 + : [c] "r"(c) // %4 + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", + "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", + "v26", "v27", "v28"); } static void ARGBToUV444MatrixRow_NEON_I8MM( @@ -2784,10 +2798,12 @@ static void ARGBToUV444MatrixRow_NEON_I8MM( uint8_t* dst_u, uint8_t* dst_v, int width, - const struct RgbUVConstants* rgbuvconstants) { + const struct ArgbConstants* c) { asm volatile( - "ld2r {v16.4s, v17.4s}, [%[rgbuvconstants]] \n" - "movi v29.8h, #0x80, lsl #8 \n" // 128.0 + "ldr q16, [%[c], #16] \n" // kRGBToU + "ldr q17, [%[c], #32] \n" // kRGBToV + "ldr s0, [%[c], #64] \n" // kAddUV + "dup v29.8h, v0.h[0] \n" // 128.0 "1: \n" "ldp q0, q1, [%[src]], #32 \n" "subs %w[width], %w[width], #8 \n" // 8 processed per loop. @@ -2807,11 +2823,11 @@ static void ARGBToUV444MatrixRow_NEON_I8MM( "str d0, [%[dst_u]], #8 \n" // store 8 pixels U. "str d1, [%[dst_v]], #8 \n" // store 8 pixels V. "b.gt 1b \n" - : [src] "+r"(src_argb), // %[src] - [dst_u] "+r"(dst_u), // %[dst_u] - [dst_v] "+r"(dst_v), // %[dst_v] - [width] "+r"(width) // %[width] - : [rgbuvconstants] "r"(rgbuvconstants) // %[rgbuvconstants] + : [src] "+r"(src_argb), // %[src] + [dst_u] "+r"(dst_u), // %[dst_u] + [dst_v] "+r"(dst_v), // %[dst_v] + [width] "+r"(width) // %[width] + : [c] "r"(c) // %[c] : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v16", "v17", "v29"); } @@ -2824,15 +2840,12 @@ static void ARGBToUV444MatrixRow_NEON_I8MM( // VG -0.7344 coefficient = -94 // VR 0.875 coefficient = 112 -static const struct RgbUVConstants kARGBI601UVConstants = {{-112, 74, 38, 0}, - {18, 94, -112, 0}}; - void ARGBToUV444Row_NEON(const uint8_t* src_argb, uint8_t* dst_u, uint8_t* dst_v, int width) { ARGBToUV444MatrixRow_NEON(src_argb, dst_u, dst_v, width, - &kARGBI601UVConstants); + &kArgbI601Constants); } void ARGBToUV444Row_NEON_I8MM(const uint8_t* src_argb, @@ -2840,26 +2853,15 @@ void ARGBToUV444Row_NEON_I8MM(const uint8_t* src_argb, uint8_t* dst_v, int width) { ARGBToUV444MatrixRow_NEON_I8MM(src_argb, dst_u, dst_v, width, - &kARGBI601UVConstants); + &kArgbI601Constants); } -// RGB to JPEG coefficients -// UB 0.500 coefficient = 128 -// UG -0.33126 coefficient = -85 -// UR -0.16874 coefficient = -43 -// VB -0.08131 coefficient = -21 -// VG -0.41869 coefficient = -107 -// VR 0.500 coefficient = 128 - -static const struct RgbUVConstants kARGBJPEGUVConstants = {{-128, 85, 43, 0}, - {21, 107, -128, 0}}; - void ARGBToUVJ444Row_NEON(const uint8_t* src_argb, uint8_t* dst_u, uint8_t* dst_v, int width) { ARGBToUV444MatrixRow_NEON(src_argb, dst_u, dst_v, width, - &kARGBJPEGUVConstants); + &kArgbJPEGConstants); } void ARGBToUVJ444Row_NEON_I8MM(const uint8_t* src_argb, @@ -2867,7 +2869,7 @@ void ARGBToUVJ444Row_NEON_I8MM(const uint8_t* src_argb, uint8_t* dst_v, int width) { ARGBToUV444MatrixRow_NEON_I8MM(src_argb, dst_u, dst_v, width, - &kARGBJPEGUVConstants); + &kArgbJPEGConstants); } #define RGBTOUV_SETUP_REG \ @@ -2906,12 +2908,14 @@ void ARGBToUVMatrixRow_NEON(const uint8_t* src_argb, "ldr q17, [%[c], #32] \n" // kRGBToV "sxtl v16.8h, v16.8b \n" // sign extend U coeffs to 16-bit "sxtl v17.8h, v17.8b \n" // sign extend V coeffs to 16-bit - "dup v20.8h, v16.h[0] \n" // U0 (-BU) - "dup v21.8h, v16.h[1] \n" // U1 (-GU) - "dup v22.8h, v16.h[2] \n" // U2 (-RU) - "dup v23.8h, v17.h[0] \n" // V0 (-BV) - "dup v24.8h, v17.h[1] \n" // V1 (-GV) - "dup v26.8h, v17.h[2] \n" // V2 (-RV) + "dup v20.8h, v16.h[0] \n" // U0 + "dup v21.8h, v16.h[1] \n" // U1 + "dup v22.8h, v16.h[2] \n" // U2 + "dup v23.8h, v16.h[3] \n" // U3 + "dup v24.8h, v17.h[0] \n" // V0 + "dup v26.8h, v17.h[1] \n" // V1 + "dup v27.8h, v17.h[2] \n" // V2 + "dup v28.8h, v17.h[3] \n" // V3 "movi v25.8h, #0x80, lsl #8 \n" // 128.0 in 16-bit (0x8000) "1: \n" @@ -2921,26 +2925,31 @@ void ARGBToUVMatrixRow_NEON(const uint8_t* src_argb, "prfm pldl1keep, [%0, 448] \n" "uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts. "uaddlp v2.8h, v2.16b \n" // R 16 bytes -> 8 shorts. + "uaddlp v18.8h, v3.16b \n" // A 16 bytes -> 8 shorts. "ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load 16 more. "uadalp v0.8h, v4.16b \n" // B 16 bytes -> 8 shorts. "prfm pldl1keep, [%1, 448] \n" "uadalp v1.8h, v5.16b \n" // G 16 bytes -> 8 shorts. "uadalp v2.8h, v6.16b \n" // R 16 bytes -> 8 shorts. + "uadalp v18.8h, v7.16b \n" // A 16 bytes -> 8 shorts. "urshr v0.8h, v0.8h, #2 \n" // average of 4 "urshr v1.8h, v1.8h, #2 \n" "urshr v2.8h, v2.8h, #2 \n" + "urshr v18.8h, v18.8h, #2 \n" - // U = B*U0 + G*U1 + R*U2 + // U = B*U0 + G*U1 + R*U2 + A*U3 "mul v3.8h, v0.8h, v20.8h \n" "mla v3.8h, v1.8h, v21.8h \n" "mla v3.8h, v2.8h, v22.8h \n" + "mla v3.8h, v18.8h, v23.8h \n" - // V = B*V0 + G*V1 + R*V2 - "mul v4.8h, v0.8h, v23.8h \n" - "mla v4.8h, v1.8h, v24.8h \n" - "mla v4.8h, v2.8h, v26.8h \n" + // V = B*V0 + G*V1 + R*V2 + A*V3 + "mul v4.8h, v0.8h, v24.8h \n" + "mla v4.8h, v1.8h, v26.8h \n" + "mla v4.8h, v2.8h, v27.8h \n" + "mla v4.8h, v18.8h, v28.8h \n" // U = (128.0 - U) >> 8, V = (128.0 - V) >> 8 "subhn v0.8b, v25.8h, v3.8h \n" @@ -2956,7 +2965,8 @@ void ARGBToUVMatrixRow_NEON(const uint8_t* src_argb, "+r"(width) // %4 : [c] "r"(c) // %5 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", - "v16", "v17", "v20", "v21", "v22", "v23", "v24", "v25", "v26" + "v16", "v17", "v18", "v20", "v21", "v22", "v23", "v24", "v25", "v26", + "v27", "v28" ); } @@ -2974,44 +2984,35 @@ void ARGBToUVJRow_NEON(const uint8_t* src_argb, uint8_t* dst_u, uint8_t* dst_v, int width) { - const uint8_t* src_argb_1 = src_argb + src_stride_argb; - asm volatile ( - "movi v20.8h, #128 \n" // UB/VR coeff (0.500) - "movi v21.8h, #85 \n" // UG coeff (-0.33126) - "movi v22.8h, #43 \n" // UR coeff (-0.16874) - "movi v23.8h, #21 \n" // VB coeff (-0.08131) - "movi v24.8h, #107 \n" // VG coeff (-0.41869) - "movi v25.8h, #0x80, lsl #8 \n" // 128.0 (0x8000 in 16-bit) - "1: \n" - "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels. - "subs %w4, %w4, #16 \n" // 16 processed per loop. - "uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts. - "prfm pldl1keep, [%0, 448] \n" - "uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts. - "uaddlp v2.8h, v2.16b \n" // R 16 bytes -> 8 shorts. - "ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load next 16 - "uadalp v0.8h, v4.16b \n" // B 16 bytes -> 8 shorts. - "prfm pldl1keep, [%1, 448] \n" - "uadalp v1.8h, v5.16b \n" // G 16 bytes -> 8 shorts. - "uadalp v2.8h, v6.16b \n" // R 16 bytes -> 8 shorts. + ARGBToUVMatrixRow_NEON(src_argb, src_stride_argb, dst_u, dst_v, width, + &kArgbJPEGConstants); +} - "urshr v0.8h, v0.8h, #2 \n" // average of 4 - "urshr v1.8h, v1.8h, #2 \n" - "urshr v2.8h, v2.8h, #2 \n" +void ABGRToUVRow_NEON(const uint8_t* src_abgr, + int src_stride_abgr, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + ARGBToUVMatrixRow_NEON(src_abgr, src_stride_abgr, dst_u, dst_v, width, + &kAbgrI601Constants); +} - RGBTOUV(v0.8h, v1.8h, v2.8h) - "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U. - "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V. - "b.gt 1b \n" - : "+r"(src_argb), // %0 - "+r"(src_argb_1), // %1 - "+r"(dst_u), // %2 - "+r"(dst_v), // %3 - "+r"(width) // %4 - : - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", - "v20", "v21", "v22", "v23", "v24", "v25" - ); +void BGRAToUVRow_NEON(const uint8_t* src_bgra, + int src_stride_bgra, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + ARGBToUVMatrixRow_NEON(src_bgra, src_stride_bgra, dst_u, dst_v, width, + &kBgraI601Constants); +} + +void RGBAToUVRow_NEON(const uint8_t* src_rgba, + int src_stride_rgba, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + ARGBToUVMatrixRow_NEON(src_rgba, src_stride_rgba, dst_u, dst_v, width, + &kRgbaI601Constants); } void ABGRToUVJRow_NEON(const uint8_t* src_abgr, @@ -3019,44 +3020,8 @@ void ABGRToUVJRow_NEON(const uint8_t* src_abgr, uint8_t* dst_uj, uint8_t* dst_vj, int width) { - const uint8_t* src_abgr_1 = src_abgr + src_stride_abgr; - asm volatile ( - "movi v20.8h, #128 \n" // UB/VR coeff (0.500) - "movi v21.8h, #85 \n" // UG coeff (-0.33126) - "movi v22.8h, #43 \n" // UR coeff (-0.16874) - "movi v23.8h, #21 \n" // VB coeff (-0.08131) - "movi v24.8h, #107 \n" // VG coeff (-0.41869) - "movi v25.8h, #0x80, lsl #8 \n" // 128.0 (0x8000 in 16-bit) - "1: \n" - "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels. - "subs %w4, %w4, #16 \n" // 16 processed per loop. - "uaddlp v0.8h, v0.16b \n" // R 16 bytes -> 8 shorts. - "prfm pldl1keep, [%0, 448] \n" - "uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts. - "uaddlp v2.8h, v2.16b \n" // B 16 bytes -> 8 shorts. - "ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load next 16 - "uadalp v0.8h, v4.16b \n" // R 16 bytes -> 8 shorts. - "prfm pldl1keep, [%1, 448] \n" - "uadalp v1.8h, v5.16b \n" // G 16 bytes -> 8 shorts. - "uadalp v2.8h, v6.16b \n" // B 16 bytes -> 8 shorts. - - "urshr v0.8h, v0.8h, #2 \n" // average of 4 - "urshr v1.8h, v1.8h, #2 \n" - "urshr v2.8h, v2.8h, #2 \n" - - RGBTOUV(v2.8h, v1.8h, v0.8h) - "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U. - "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V. - "b.gt 1b \n" - : "+r"(src_abgr), // %0 - "+r"(src_abgr_1), // %1 - "+r"(dst_uj), // %2 - "+r"(dst_vj), // %3 - "+r"(width) // %4 - : - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", - "v20", "v21", "v22", "v23", "v24", "v25" - ); + ARGBToUVMatrixRow_NEON(src_abgr, src_stride_abgr, dst_uj, dst_vj, width, + &kAbgrJPEGConstants); } void RGB24ToUVJRow_NEON(const uint8_t* src_rgb24, @@ -3149,126 +3114,6 @@ void RAWToUVJRow_NEON(const uint8_t* src_raw, ); } -void BGRAToUVRow_NEON(const uint8_t* src_bgra, - int src_stride_bgra, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - const uint8_t* src_bgra_1 = src_bgra + src_stride_bgra; - asm volatile ( - RGBTOUV_SETUP_REG - "1: \n" - "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels. - "subs %w4, %w4, #16 \n" // 16 processed per loop. - "uaddlp v0.8h, v3.16b \n" // B 16 bytes -> 8 shorts. - "prfm pldl1keep, [%0, 448] \n" - "uaddlp v3.8h, v2.16b \n" // G 16 bytes -> 8 shorts. - "uaddlp v2.8h, v1.16b \n" // R 16 bytes -> 8 shorts. - "ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load 16 more - "uadalp v0.8h, v7.16b \n" // B 16 bytes -> 8 shorts. - "prfm pldl1keep, [%1, 448] \n" - "uadalp v3.8h, v6.16b \n" // G 16 bytes -> 8 shorts. - "uadalp v2.8h, v5.16b \n" // R 16 bytes -> 8 shorts. - - "urshr v0.8h, v0.8h, #2 \n" // average of 4 - "urshr v1.8h, v3.8h, #2 \n" - "urshr v2.8h, v2.8h, #2 \n" - - RGBTOUV(v0.8h, v1.8h, v2.8h) - "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U. - "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V. - "b.gt 1b \n" - : "+r"(src_bgra), // %0 - "+r"(src_bgra_1), // %1 - "+r"(dst_u), // %2 - "+r"(dst_v), // %3 - "+r"(width) // %4 - : - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", - "v20", "v21", "v22", "v23", "v24", "v25" - ); -} - -void ABGRToUVRow_NEON(const uint8_t* src_abgr, - int src_stride_abgr, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - const uint8_t* src_abgr_1 = src_abgr + src_stride_abgr; - asm volatile ( - RGBTOUV_SETUP_REG - "1: \n" - "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels. - "subs %w4, %w4, #16 \n" // 16 processed per loop. - "uaddlp v3.8h, v2.16b \n" // B 16 bytes -> 8 shorts. - "prfm pldl1keep, [%0, 448] \n" - "uaddlp v2.8h, v1.16b \n" // G 16 bytes -> 8 shorts. - "uaddlp v1.8h, v0.16b \n" // R 16 bytes -> 8 shorts. - "ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load 16 more. - "uadalp v3.8h, v6.16b \n" // B 16 bytes -> 8 shorts. - "prfm pldl1keep, [%1, 448] \n" - "uadalp v2.8h, v5.16b \n" // G 16 bytes -> 8 shorts. - "uadalp v1.8h, v4.16b \n" // R 16 bytes -> 8 shorts. - - "urshr v0.8h, v3.8h, #2 \n" // average of 4 - "urshr v2.8h, v2.8h, #2 \n" - "urshr v1.8h, v1.8h, #2 \n" - - RGBTOUV(v0.8h, v2.8h, v1.8h) - "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U. - "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V. - "b.gt 1b \n" - : "+r"(src_abgr), // %0 - "+r"(src_abgr_1), // %1 - "+r"(dst_u), // %2 - "+r"(dst_v), // %3 - "+r"(width) // %4 - : - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", - "v20", "v21", "v22", "v23", "v24", "v25" - ); -} - -void RGBAToUVRow_NEON(const uint8_t* src_rgba, - int src_stride_rgba, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - const uint8_t* src_rgba_1 = src_rgba + src_stride_rgba; - asm volatile ( - RGBTOUV_SETUP_REG - "1: \n" - "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels. - "subs %w4, %w4, #16 \n" // 16 processed per loop. - "uaddlp v0.8h, v1.16b \n" // B 16 bytes -> 8 shorts. - "prfm pldl1keep, [%0, 448] \n" - "uaddlp v1.8h, v2.16b \n" // G 16 bytes -> 8 shorts. - "uaddlp v2.8h, v3.16b \n" // R 16 bytes -> 8 shorts. - "ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load 16 more. - "uadalp v0.8h, v5.16b \n" // B 16 bytes -> 8 shorts. - "prfm pldl1keep, [%1, 448] \n" - "uadalp v1.8h, v6.16b \n" // G 16 bytes -> 8 shorts. - "uadalp v2.8h, v7.16b \n" // R 16 bytes -> 8 shorts. - - "urshr v0.8h, v0.8h, #2 \n" // average of 4 - "urshr v1.8h, v1.8h, #2 \n" - "urshr v2.8h, v2.8h, #2 \n" - - RGBTOUV(v0.8h, v1.8h, v2.8h) - "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U. - "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V. - "b.gt 1b \n" - : "+r"(src_rgba), // %0 - "+r"(src_rgba_1), // %1 - "+r"(dst_u), // %2 - "+r"(dst_v), // %3 - "+r"(width) // %4 - : - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", - "v20", "v21", "v22", "v23", "v24", "v25" - ); -} - void RGB24ToUVRow_NEON(const uint8_t* src_rgb24, int src_stride_rgb24, uint8_t* dst_u, @@ -3483,18 +3328,19 @@ void ARGB4444ToUVRow_NEON(const uint8_t* src_argb4444, ); } -// Process any of ARGB, ABGR, BGRA, RGBA, by adjusting the uvconstants layout. +// Process any of ARGB, ABGR, BGRA, RGBA, by adjusting the ArgbConstants layout. static void ARGBToUVMatrixRow_NEON_I8MM_Impl(const uint8_t* src, int src_stride, uint8_t* dst_u, uint8_t* dst_v, int width, - const int8_t* uvconstants) { + const struct ArgbConstants* c) { const uint8_t* src1 = src + src_stride; asm volatile( "movi v23.8h, #0x80, lsl #8 \n" // 128.0 (0x8000 in // 16-bit) - "ld2r {v24.4s, v25.4s}, [%[uvconstants]] \n" + "ldr q24, [%[c], #16] \n" // kRGBToU + "ldr q25, [%[c], #32] \n" // kRGBToV "1: \n" "ld2 {v0.4s, v1.4s}, [%[src]], #32 \n" // load 8 pixels @@ -3547,51 +3393,19 @@ static void ARGBToUVMatrixRow_NEON_I8MM_Impl(const uint8_t* src, [dst_u] "+r"(dst_u), // %[dst_u] [dst_v] "+r"(dst_v), // %[dst_v] [width] "+r"(width) // %[width] - : [uvconstants] "r"(uvconstants) // %[uvconstants] + : [c] "r"(c) // %[c] : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v23", "v24", "v25"); } -// RGB to BT601 coefficients -// UB 0.875 coefficient = 112 -// UG -0.5781 coefficient = -74 -// UR -0.2969 coefficient = -38 -// VB -0.1406 coefficient = -18 -// VG -0.7344 coefficient = -94 -// VR 0.875 coefficient = 112 -// I8MM constants are stored negated such that we can store 128 in int8_t. - -static const int8_t kARGBToUVCoefficients[] = { - // -UB, -UG, -UR, 0, -VB, -VG, -VR, 0 - -112, 74, 38, 0, 18, 94, -112, 0, -}; - -static const int8_t kABGRToUVCoefficients[] = { - // -UR, -UG, -UB, 0, -VR, -VG, -VB, 0 - 38, 74, -112, 0, -112, 94, 18, 0, -}; - -static const int8_t kBGRAToUVCoefficients[] = { - // 0, -UR, -UG, -UB, 0, -VR, -VG, -VB - 0, 38, 74, -112, 0, -112, 94, 18, -}; - -static const int8_t kRGBAToUVCoefficients[] = { - // 0, -UB, -UG, -UR, 0, -VB, -VG, -VR - 0, -112, 74, 38, 0, 18, 94, -112, -}; - void ARGBToUVMatrixRow_NEON_I8MM(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_u, uint8_t* dst_v, int width, const struct ArgbConstants* c) { - int8_t uvconstants[8] = { - (int8_t)c->kRGBToU[0], (int8_t)c->kRGBToU[1], (int8_t)c->kRGBToU[2], (int8_t)c->kRGBToU[3], - (int8_t)c->kRGBToV[0], (int8_t)c->kRGBToV[1], (int8_t)c->kRGBToV[2], (int8_t)c->kRGBToV[3]}; ARGBToUVMatrixRow_NEON_I8MM_Impl(src_argb, src_stride_argb, dst_u, dst_v, width, - uvconstants); + c); } void ARGBToUVRow_NEON_I8MM(const uint8_t* src_argb, @@ -3600,7 +3414,7 @@ void ARGBToUVRow_NEON_I8MM(const uint8_t* src_argb, uint8_t* dst_v, int width) { ARGBToUVMatrixRow_NEON_I8MM_Impl(src_argb, src_stride_argb, dst_u, dst_v, width, - kARGBToUVCoefficients); + &kArgbI601Constants); } void ABGRToUVRow_NEON_I8MM(const uint8_t* src_abgr, @@ -3609,7 +3423,7 @@ void ABGRToUVRow_NEON_I8MM(const uint8_t* src_abgr, uint8_t* dst_v, int width) { ARGBToUVMatrixRow_NEON_I8MM_Impl(src_abgr, src_stride_abgr, dst_u, dst_v, width, - kABGRToUVCoefficients); + &kAbgrI601Constants); } void BGRAToUVRow_NEON_I8MM(const uint8_t* src_bgra, @@ -3618,7 +3432,7 @@ void BGRAToUVRow_NEON_I8MM(const uint8_t* src_bgra, uint8_t* dst_v, int width) { ARGBToUVMatrixRow_NEON_I8MM_Impl(src_bgra, src_stride_bgra, dst_u, dst_v, width, - kBGRAToUVCoefficients); + &kBgraI601Constants); } void RGBAToUVRow_NEON_I8MM(const uint8_t* src_rgba, @@ -3627,35 +3441,16 @@ void RGBAToUVRow_NEON_I8MM(const uint8_t* src_rgba, uint8_t* dst_v, int width) { ARGBToUVMatrixRow_NEON_I8MM_Impl(src_rgba, src_stride_rgba, dst_u, dst_v, width, - kRGBAToUVCoefficients); + &kRgbaI601Constants); } -// RGB to JPEG coefficients -// UB 0.500 coefficient = 128 -// UG -0.33126 coefficient = -85 -// UR -0.16874 coefficient = -43 -// VB -0.08131 coefficient = -21 -// VG -0.41869 coefficient = -107 -// VR 0.500 coefficient = 128 -// I8MM constants are stored negated such that we can store 128 in int8_t. - -static const int8_t kARGBToUVJCoefficients[] = { - // -UB, -UG, -UR, 0, -VB, -VG, -VR, 0 - -128, 85, 43, 0, 21, 107, -128, 0, -}; - -static const int8_t kABGRToUVJCoefficients[] = { - // -UR, -UG, -UB, 0, -VR, -VG, -VB, 0 - 43, 85, -128, 0, -128, 107, 21, 0, -}; - void ARGBToUVJRow_NEON_I8MM(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_u, uint8_t* dst_v, int width) { ARGBToUVMatrixRow_NEON_I8MM_Impl(src_argb, src_stride_argb, dst_u, dst_v, width, - kARGBToUVJCoefficients); + &kArgbJPEGConstants); } void ABGRToUVJRow_NEON_I8MM(const uint8_t* src_abgr, @@ -3664,7 +3459,7 @@ void ABGRToUVJRow_NEON_I8MM(const uint8_t* src_abgr, uint8_t* dst_v, int width) { ARGBToUVMatrixRow_NEON_I8MM_Impl(src_abgr, src_stride_abgr, dst_u, dst_v, width, - kABGRToUVJCoefficients); + &kAbgrJPEGConstants); } void RGB565ToYRow_NEON(const uint8_t* src_rgb565, uint8_t* dst_y, int width) { @@ -3771,206 +3566,145 @@ void ARGBToYMatrixRow_NEON(const uint8_t* src_argb, int width, const struct ArgbConstants* c) { asm volatile( - "ldr s0, [%3] \n" // load rgbconstants - "ldr s1, [%3, #48] \n" - "dup v6.16b, v0.b[0] \n" - "dup v7.16b, v0.b[1] \n" - "dup v16.16b, v0.b[2] \n" - "dup v17.8h, v1.h[0] \n" + "ldr s16, [%3] \n" // load 4 coeffs + "ldr s17, [%3, #48] \n" // load kAddY[0] + "dup v18.16b, v16.b[0] \n" // B + "dup v19.16b, v16.b[1] \n" // G + "dup v20.16b, v16.b[2] \n" // R + "dup v21.16b, v16.b[3] \n" // A + "dup v22.8h, v17.h[0] \n" // bias "1: \n" "ld4 {v2.16b,v3.16b,v4.16b,v5.16b}, [%0], #64 \n" // load 16 - // pixels. "subs %w2, %w2, #16 \n" // 16 processed per loop. - "umull v0.8h, v2.8b, v6.8b \n" // B - "umull2 v1.8h, v2.16b, v6.16b \n" + "umull v0.8h, v2.8b, v18.8b \n" // B + "umull2 v1.8h, v2.16b, v18.16b \n" "prfm pldl1keep, [%0, 448] \n" - "umlal v0.8h, v3.8b, v7.8b \n" // G - "umlal2 v1.8h, v3.16b, v7.16b \n" - "umlal v0.8h, v4.8b, v16.8b \n" // R - "umlal2 v1.8h, v4.16b, v16.16b \n" - "addhn v0.8b, v0.8h, v17.8h \n" // 16 bit to 8 bit Y - "addhn v1.8b, v1.8h, v17.8h \n" + "umlal v0.8h, v3.8b, v19.8b \n" // G + "umlal2 v1.8h, v3.16b, v19.16b \n" + "umlal v0.8h, v4.8b, v20.8b \n" // R + "umlal2 v1.8h, v4.16b, v20.16b \n" + "umlal v0.8h, v5.8b, v21.8b \n" // A + "umlal2 v1.8h, v5.16b, v21.16b \n" + "addhn v0.8b, v0.8h, v22.8h \n" // 16 bit to 8 bit Y + "addhn v1.8b, v1.8h, v22.8h \n" "st1 {v0.8b, v1.8b}, [%1], #16 \n" // store 16 pixels Y. "b.gt 1b \n" : "+r"(src_argb), // %0 "+r"(dst_y), // %1 "+r"(width) // %2 - : "r"(c) // %3 - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", - "v17"); + : "r"(c) // %3 + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v16", "v17", "v18", + "v19", "v20", "v21", "v22"); } + void ARGBToYMatrixRow_NEON_DotProd( const uint8_t* src_argb, uint8_t* dst_y, int width, const struct ArgbConstants* c) { asm volatile( - "ldr s0, [%3] \n" // load rgbconstants - "ldr s1, [%3, #48] \n" - "dup v16.4s, v0.s[0] \n" - "dup v17.8h, v1.h[0] \n" + "ldr s16, [%3] \n" // load 4 coeffs + "ldr s17, [%3, #48] \n" // load kAddY[0] + "dup v18.4s, v16.s[0] \n" + "dup v19.8h, v17.h[0] \n" "1: \n" "ld1 {v4.16b, v5.16b, v6.16b, v7.16b}, [%0], #64 \n" // load 16 - // pixels. "subs %w2, %w2, #16 \n" // 16 processed per loop. "movi v0.16b, #0 \n" "movi v1.16b, #0 \n" "movi v2.16b, #0 \n" "movi v3.16b, #0 \n" - "udot v0.4s, v4.16b, v16.16b \n" - "udot v1.4s, v5.16b, v16.16b \n" - "udot v2.4s, v6.16b, v16.16b \n" - "udot v3.4s, v7.16b, v16.16b \n" + "udot v0.4s, v4.16b, v18.16b \n" + "udot v1.4s, v5.16b, v18.16b \n" + "udot v2.4s, v6.16b, v18.16b \n" + "udot v3.4s, v7.16b, v18.16b \n" "uzp1 v0.8h, v0.8h, v1.8h \n" "uzp1 v1.8h, v2.8h, v3.8h \n" - "addhn v0.8b, v0.8h, v17.8h \n" - "addhn v1.8b, v1.8h, v17.8h \n" + "addhn v0.8b, v0.8h, v19.8h \n" + "addhn v1.8b, v1.8h, v19.8h \n" "st1 {v0.8b, v1.8b}, [%1], #16 \n" // store 16 pixels Y. "b.gt 1b \n" : "+r"(src_argb), // %0 "+r"(dst_y), // %1 "+r"(width) // %2 - : "r"(c) // %3 - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", - "v17"); + : "r"(c) // %3 + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19"); } + // RGB to JPeg coefficients -// B * 0.1140 coefficient = 29 -// G * 0.5870 coefficient = 150 -// R * 0.2990 coefficient = 77 -// Add 0.5 -static const struct ArgbConstants kRgb24JPEGConstants = {{29, 150, 77, 0}, {}, {}, {0x0080}, {}}; -static const struct ArgbConstants kRgb24JPEGDotProdConstants = {{0, 29, 150, 77}, {}, {}, {0x0080}, {}}; - -static const struct ArgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, {}, {}, {0x0080}, {}}; - -// RGB to BT.601 coefficients -// B * 0.1016 coefficient = 25 -// G * 0.5078 coefficient = 129 -// R * 0.2578 coefficient = 66 -// Add 16.5 = 0x1080 - -static const struct ArgbConstants kRgb24I601Constants = {{25, 129, 66, 0}, {}, {}, {0x1080}, {}}; -static const struct ArgbConstants kRgb24I601DotProdConstants = {{0, 25, 129, 66}, {}, {}, {0x1080}, {}}; - -static const struct ArgbConstants kRawI601Constants = {{66, 129, 25, 0}, {}, {}, {0x1080}, {}}; -static const struct ArgbConstants kRawI601DotProdConstants = {{0, 66, 129, 25}, {}, {}, {0x1080}, {}}; void ARGBToYRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) { - ARGBToYMatrixRow_NEON(src_argb, dst_y, width, &kRgb24I601Constants); + ARGBToYMatrixRow_NEON(src_argb, dst_y, width, &kArgbI601Constants); } void ARGBToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_yj, int width) { - ARGBToYMatrixRow_NEON(src_argb, dst_yj, width, &kRgb24JPEGConstants); + ARGBToYMatrixRow_NEON(src_argb, dst_yj, width, &kArgbJPEGConstants); } void ABGRToYRow_NEON(const uint8_t* src_abgr, uint8_t* dst_y, int width) { - ARGBToYMatrixRow_NEON(src_abgr, dst_y, width, &kRawI601Constants); + ARGBToYMatrixRow_NEON(src_abgr, dst_y, width, &kAbgrI601Constants); } void ABGRToYJRow_NEON(const uint8_t* src_abgr, uint8_t* dst_yj, int width) { - ARGBToYMatrixRow_NEON(src_abgr, dst_yj, width, &kRawJPEGConstants); + ARGBToYMatrixRow_NEON(src_abgr, dst_yj, width, &kAbgrJPEGConstants); } void ARGBToYRow_NEON_DotProd(const uint8_t* src_argb, uint8_t* dst_y, int width) { - ARGBToYMatrixRow_NEON_DotProd(src_argb, dst_y, width, &kRgb24I601Constants); + ARGBToYMatrixRow_NEON_DotProd(src_argb, dst_y, width, &kArgbI601Constants); } void ARGBToYJRow_NEON_DotProd(const uint8_t* src_argb, uint8_t* dst_yj, int width) { - ARGBToYMatrixRow_NEON_DotProd(src_argb, dst_yj, width, &kRgb24JPEGConstants); + ARGBToYMatrixRow_NEON_DotProd(src_argb, dst_yj, width, &kArgbJPEGConstants); } void ABGRToYRow_NEON_DotProd(const uint8_t* src_abgr, uint8_t* dst_y, int width) { - ARGBToYMatrixRow_NEON_DotProd(src_abgr, dst_y, width, &kRawI601Constants); + ARGBToYMatrixRow_NEON_DotProd(src_abgr, dst_y, width, &kAbgrI601Constants); } void ABGRToYJRow_NEON_DotProd(const uint8_t* src_abgr, uint8_t* dst_yj, int width) { - ARGBToYMatrixRow_NEON_DotProd(src_abgr, dst_yj, width, &kRawJPEGConstants); + ARGBToYMatrixRow_NEON_DotProd(src_abgr, dst_yj, width, &kAbgrJPEGConstants); } // RGBA expects first value to be A and ignored, then 3 values to contain RGB. -// Same code as ARGB, except the LD4 -static void RGBAToYMatrixRow_NEON(const uint8_t* src_rgba, - uint8_t* dst_y, - int width, - const struct ArgbConstants* c) { - asm volatile( - "ldr s0, [%3] \n" // load rgbconstants - "ldr s1, [%3, #48] \n" - "dup v6.16b, v0.b[0] \n" - "dup v7.16b, v0.b[1] \n" - "dup v16.16b, v0.b[2] \n" - "dup v17.8h, v1.h[0] \n" - "1: \n" - "ld4 {v1.16b,v2.16b,v3.16b,v4.16b}, [%0], #64 \n" // load 16 - // pixels. - "subs %w2, %w2, #16 \n" // 16 processed per loop. - "umull v0.8h, v2.8b, v6.8b \n" // B - "umull2 v1.8h, v2.16b, v6.16b \n" - "prfm pldl1keep, [%0, 448] \n" - "umlal v0.8h, v3.8b, v7.8b \n" // G - "umlal2 v1.8h, v3.16b, v7.16b \n" - "umlal v0.8h, v4.8b, v16.8b \n" // R - "umlal2 v1.8h, v4.16b, v16.16b \n" - "addhn v0.8b, v0.8h, v17.8h \n" // 16 bit to 8 bit Y - "addhn v1.8b, v1.8h, v17.8h \n" - "st1 {v0.8b, v1.8b}, [%1], #16 \n" // store 16 pixels Y. - "b.gt 1b \n" - : "+r"(src_rgba), // %0 - "+r"(dst_y), // %1 - "+r"(width) // %2 - : "r"(c) // %3 - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", - "v17"); -} void RGBAToYRow_NEON(const uint8_t* src_rgba, uint8_t* dst_y, int width) { - RGBAToYMatrixRow_NEON(src_rgba, dst_y, width, &kRgb24I601Constants); + ARGBToYMatrixRow_NEON(src_rgba, dst_y, width, &kRgbaI601Constants); } void RGBAToYJRow_NEON(const uint8_t* src_rgba, uint8_t* dst_yj, int width) { - RGBAToYMatrixRow_NEON(src_rgba, dst_yj, width, &kRgb24JPEGConstants); + ARGBToYMatrixRow_NEON(src_rgba, dst_yj, width, &kRgbaJPEGConstants); } void BGRAToYRow_NEON(const uint8_t* src_bgra, uint8_t* dst_y, int width) { - RGBAToYMatrixRow_NEON(src_bgra, dst_y, width, &kRawI601Constants); + ARGBToYMatrixRow_NEON(src_bgra, dst_y, width, &kBgraI601Constants); } void RGBAToYRow_NEON_DotProd(const uint8_t* src_rgba, uint8_t* dst_y, int width) { - // No need for a separate implementation for RGBA inputs, just permute the - // RGB constants. - ARGBToYMatrixRow_NEON_DotProd(src_rgba, dst_y, width, - &kRgb24I601DotProdConstants); + ARGBToYMatrixRow_NEON_DotProd(src_rgba, dst_y, width, &kRgbaI601Constants); } void RGBAToYJRow_NEON_DotProd(const uint8_t* src_rgba, uint8_t* dst_yj, int width) { - // No need for a separate implementation for RGBA inputs, just permute the - // RGB constants. - ARGBToYMatrixRow_NEON_DotProd(src_rgba, dst_yj, width, - &kRgb24JPEGDotProdConstants); + ARGBToYMatrixRow_NEON_DotProd(src_rgba, dst_yj, width, &kRgbaJPEGConstants); } void BGRAToYRow_NEON_DotProd(const uint8_t* src_bgra, uint8_t* dst_y, int width) { - // No need for a separate implementation for RGBA inputs, just permute the - // RGB constants. - ARGBToYMatrixRow_NEON_DotProd(src_bgra, dst_y, width, - &kRawI601DotProdConstants); + ARGBToYMatrixRow_NEON_DotProd(src_bgra, dst_y, width, &kBgraI601Constants); } void RGBToYMatrixRow_NEON(const uint8_t* src_rgb, @@ -3978,30 +3712,32 @@ void RGBToYMatrixRow_NEON(const uint8_t* src_rgb, int width, const struct ArgbConstants* c) { asm volatile( - "ldr d0, [%3] \n" // load rgbconstants - "dup v5.16b, v0.b[0] \n" - "dup v6.16b, v0.b[1] \n" - "dup v7.16b, v0.b[2] \n" - "dup v16.8h, v0.h[2] \n" + "ldr s16, [%3] \n" // load 4 coeffs + "ldr s17, [%3, #48] \n" // load kAddY[0] + "dup v18.16b, v16.b[0] \n" // B + "dup v19.16b, v16.b[1] \n" // G + "dup v20.16b, v16.b[2] \n" // R + "dup v21.8h, v17.h[0] \n" // bias "1: \n" "ld3 {v2.16b,v3.16b,v4.16b}, [%0], #48 \n" // load 16 pixels. "subs %w2, %w2, #16 \n" // 16 processed per loop. - "umull v0.8h, v2.8b, v5.8b \n" // B - "umull2 v1.8h, v2.16b, v5.16b \n" + "umull v0.8h, v2.8b, v18.8b \n" // B + "umull2 v1.8h, v2.16b, v18.16b \n" "prfm pldl1keep, [%0, 448] \n" - "umlal v0.8h, v3.8b, v6.8b \n" // G - "umlal2 v1.8h, v3.16b, v6.16b \n" - "umlal v0.8h, v4.8b, v7.8b \n" // R - "umlal2 v1.8h, v4.16b, v7.16b \n" - "addhn v0.8b, v0.8h, v16.8h \n" // 16 bit to 8 bit Y - "addhn v1.8b, v1.8h, v16.8h \n" + "umlal v0.8h, v3.8b, v19.8b \n" // G + "umlal2 v1.8h, v3.16b, v19.16b \n" + "umlal v0.8h, v4.8b, v20.8b \n" // R + "umlal2 v1.8h, v4.16b, v20.16b \n" + "addhn v0.8b, v0.8h, v21.8h \n" // 16 bit to 8 bit Y + "addhn v1.8b, v1.8h, v21.8h \n" "st1 {v0.8b, v1.8b}, [%1], #16 \n" // store 16 pixels Y. "b.gt 1b \n" : "+r"(src_rgb), // %0 "+r"(dst_y), // %1 "+r"(width) // %2 : "r"(c) // %3 - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16"); + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v16", "v17", "v18", + "v19", "v20", "v21"); }