From 81f698829b9d009156b52a6a4d739578434db672 Mon Sep 17 00:00:00 2001 From: Frank Barchard Date: Mon, 20 Apr 2026 17:22:17 -0700 Subject: [PATCH] Add RGBToNV21Matrix function - implement wrappers with RAW, RGB24, NV21 and JNV21 to call it. Zen5 Was [ OK ] LibYUVConvertTest.RAWToJNV21_Opt (1146 ms) Now [ OK ] LibYUVConvertTest.RAWToJNV21_Opt (1446 ms) reason - the new code uses 1 pass for RAWToY but 2 pass for RAWToARGB,ARGBToUV. needs 1 RGBToUV Bug: libyuv:42280902 Change-Id: Ife6fbed0829484045409e6d42b85cec1d1fd6052 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/7780026 Reviewed-by: richard winterton Commit-Queue: Frank Barchard --- README.chromium | 2 +- include/libyuv/convert_from_argb.h | 23 + include/libyuv/row.h | 16 + include/libyuv/version.h | 2 +- source/convert.cc | 823 ++++------------------------- source/convert_from_argb.cc | 611 +++++++++++---------- source/row_any.cc | 2 +- source/row_common.cc | 44 ++ source/row_rvv.cc | 2 +- unit_test/convert_argb_test.cc | 6 +- unit_test/convert_test.cc | 1 + 11 files changed, 514 insertions(+), 1018 deletions(-) diff --git a/README.chromium b/README.chromium index ff7425cb0..0b58b116e 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: https://chromium.googlesource.com/libyuv/libyuv/ -Version: 1929 +Version: 1930 Revision: DEPS License: BSD-3-Clause License File: LICENSE diff --git a/include/libyuv/convert_from_argb.h b/include/libyuv/convert_from_argb.h index c0473fd70..88d1b5850 100644 --- a/include/libyuv/convert_from_argb.h +++ b/include/libyuv/convert_from_argb.h @@ -456,6 +456,29 @@ int ARGBToUYVY(const uint8_t* src_argb, int width, int height); +// RAW to NV21 with matrix. +LIBYUV_API +int RAWToNV21Matrix(const uint8_t* src_raw, + int src_stride_raw, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_vu, + int dst_stride_vu, + const struct ArgbConstants* argbconstants, + int width, + int height); + +// RAW to NV21. +LIBYUV_API +int RAWToNV21(const uint8_t* src_raw, + int src_stride_raw, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_vu, + int dst_stride_vu, + int width, + int height); + // RAW to JNV21 full range NV21 LIBYUV_API int RAWToJNV21(const uint8_t* src_raw, diff --git a/include/libyuv/row.h b/include/libyuv/row.h index 426fbb951..c6d4786ae 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -2191,6 +2191,10 @@ void RAWToYJRow_LASX(const uint8_t* src_raw, uint8_t* dst_yj, int width); void ARGBToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width); void ARGBToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width); +void RGBToYMatrixRow_C(const uint8_t* src_rgb24, + uint8_t* dst_y, + int width, + const struct ArgbConstants* c); void ARGBToYMatrixRow_C(const uint8_t* src_argb, uint8_t* dst_y, int width, @@ -2199,6 +2203,10 @@ void ARGBToYMatrixRow_RVV(const uint8_t* src_argb, uint8_t* dst_y, int width, const struct ArgbConstants* c); +void RGBToYMatrixRow_RVV(const uint8_t* src_rgb, + uint8_t* dst_y, + int width, + const struct ArgbConstants* c); void ARGBToUVMatrixRow_C(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_u, @@ -2223,6 +2231,14 @@ void ARGBToUVMatrixRow_AVX512BW(const uint8_t* src_argb, uint8_t* dst_v, int width, const struct ArgbConstants* c); + +void RGBToUVMatrixRow_C(const uint8_t* src_rgb24, + int src_stride_rgb24, + uint8_t* dst_u, + uint8_t* dst_v, + int width, + const struct ArgbConstants* c); + void ARGBToUV444MatrixRow_C(const uint8_t* src_argb, uint8_t* dst_u, uint8_t* dst_v, diff --git a/include/libyuv/version.h b/include/libyuv/version.h index af964a89a..ed6baad3b 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1929 +#define LIBYUV_VERSION 1930 #endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/source/convert.cc b/source/convert.cc index 4a13972be..78e8bf8d1 100644 --- a/source/convert.cc +++ b/source/convert.cc @@ -11,6 +11,7 @@ #include "libyuv/convert.h" #include "libyuv/basic_types.h" +#include "libyuv/convert_from_argb.h" #include "libyuv/cpu_id.h" #include "libyuv/planar_functions.h" #include "libyuv/rotate.h" @@ -2205,7 +2206,14 @@ int ARGBToI420Matrix(const uint8_t* src_argb, ARGBToYMatrixRow = ARGBToYMatrixRow_RVV; } #endif -// TODO(fbarchard): add AVX512BW +#if defined(HAS_ARGBTOYMATRIXROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_AVX512BW; + if (IS_ALIGNED(width, 64)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_AVX512BW; + } + } +#endif #if defined(HAS_ARGBTOYMATRIXROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBToYMatrixRow = ARGBToYMatrixRow_Any_NEON; @@ -2923,12 +2931,89 @@ int RGBAToI420(const uint8_t* src_rgba, return 0; } -// Enabled if 1 pass is available -#if (defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_LSX) || \ - defined(HAS_RGB24TOYROW_AVX2) || \ - defined(HAS_RGB24TOYROW_RVV)) -#define HAS_RGB24TOYROW +// Convert RGB to I420 with matrix. +LIBYUV_API +int RGBToI420Matrix(const uint8_t* src_rgb24, + int src_stride_rgb24, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + const struct ArgbConstants* argbconstants, + int width, + int height) { + int y; + void (*RGBToUVMatrixRow)(const uint8_t* src_rgb24, int src_stride_rgb24, + uint8_t* dst_u, uint8_t* dst_v, int width, + const struct ArgbConstants* c) = RGBToUVMatrixRow_C; + void (*RGBToYMatrixRow)(const uint8_t* src_rgb24, uint8_t* dst_y, int width, + const struct ArgbConstants* c) = RGBToYMatrixRow_C; + + if (!src_rgb24 || !dst_y || !dst_u || !dst_v || !argbconstants || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24; + src_stride_rgb24 = -src_stride_rgb24; + } + +#if defined(HAS_RGBTOYMATRIXROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + RGBToYMatrixRow = RGBToYMatrixRow_RVV; + } #endif +#if defined(HAS_RGBTOYMATRIXROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + RGBToYMatrixRow = RGBToYMatrixRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + RGBToYMatrixRow = RGBToYMatrixRow_AVX2; + } + } +#endif +#if defined(HAS_RGBTOYMATRIXROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + RGBToYMatrixRow = RGBToYMatrixRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + RGBToYMatrixRow = RGBToYMatrixRow_NEON; + } + } +#endif +#if defined(HAS_RGBTOYMATRIXROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + RGBToYMatrixRow = RGBToYMatrixRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + RGBToYMatrixRow = RGBToYMatrixRow_LSX; + } + } +#endif +#if defined(HAS_RGBTOYMATRIXROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + RGBToYMatrixRow = RGBToYMatrixRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + RGBToYMatrixRow = RGBToYMatrixRow_LASX; + } + } +#endif + + for (y = 0; y < height - 1; y += 2) { + RGBToUVMatrixRow(src_rgb24, src_stride_rgb24, dst_u, dst_v, width, argbconstants); + RGBToYMatrixRow(src_rgb24, dst_y, width, argbconstants); + RGBToYMatrixRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width, argbconstants); + src_rgb24 += src_stride_rgb24 * 2; + dst_y += dst_stride_y * 2; + dst_u += dst_stride_u; + dst_v += dst_stride_v; + } + if (height & 1) { + RGBToUVMatrixRow(src_rgb24, 0, dst_u, dst_v, width, argbconstants); + RGBToYMatrixRow(src_rgb24, dst_y, width, argbconstants); + } + return 0; +} // Convert RGB24 to I420. LIBYUV_API @@ -2942,190 +3027,10 @@ int RGB24ToI420(const uint8_t* src_rgb24, int dst_stride_v, int width, int height) { - int y; -#if defined(HAS_RGB24TOYROW) - void (*RGB24ToUVRow)(const uint8_t* src_rgb24, int src_stride_rgb24, - uint8_t* dst_u, uint8_t* dst_v, int width) = - RGB24ToUVRow_C; - void (*RGB24ToYRow)(const uint8_t* src_rgb24, uint8_t* dst_y, int width) = - RGB24ToYRow_C; -#else - void (*RGB24ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = - RGB24ToARGBRow_C; - void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb, - uint8_t* dst_u, uint8_t* dst_v, int width) = - ARGBToUVRow_C; - void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = - ARGBToYRow_C; -#endif - if (!src_rgb24 || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24; - src_stride_rgb24 = -src_stride_rgb24; - } - -#if defined(HAS_RGB24TOYROW) - -#if defined(HAS_RGB24TOYROW_AVX2) && defined(HAS_RGBTOYMATRIXROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - // TODO(fbarchard): Write an AVX2 function for RGB24ToUVRow. - RGB24ToYRow = RGB24ToYRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - RGB24ToYRow = RGB24ToYRow_AVX2; - } - } -#endif - -// Neon version does direct RGB24 to YUV. -#if defined(HAS_RGB24TOYROW_NEON) && defined(HAS_RGB24TOUVROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - RGB24ToUVRow = RGB24ToUVRow_Any_NEON; - RGB24ToYRow = RGB24ToYRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - RGB24ToYRow = RGB24ToYRow_NEON; - RGB24ToUVRow = RGB24ToUVRow_NEON; - } - } -#endif -#if defined(HAS_RGB24TOYROW_LSX) && defined(HAS_RGB24TOUVROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - RGB24ToUVRow = RGB24ToUVRow_Any_LSX; - RGB24ToYRow = RGB24ToYRow_Any_LSX; - if (IS_ALIGNED(width, 16)) { - RGB24ToYRow = RGB24ToYRow_LSX; - RGB24ToUVRow = RGB24ToUVRow_LSX; - } - } -#endif -#if defined(HAS_RGB24TOYROW_LASX) && defined(HAS_RGB24TOUVROW_LASX) - if (TestCpuFlag(kCpuHasLASX)) { - RGB24ToUVRow = RGB24ToUVRow_Any_LASX; - RGB24ToYRow = RGB24ToYRow_Any_LASX; - if (IS_ALIGNED(width, 32)) { - RGB24ToYRow = RGB24ToYRow_LASX; - RGB24ToUVRow = RGB24ToUVRow_LASX; - } - } -#endif -#if defined(HAS_RGB24TOYROW_RVV) - if (TestCpuFlag(kCpuHasRVV)) { - RGB24ToYRow = RGB24ToYRow_RVV; - } -#endif - -// Other platforms do intermediate conversion from RGB24 to ARGB. -#else // HAS_RGB24TOYROW - -#if defined(HAS_RGB24TOARGBROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - RGB24ToARGBRow = RGB24ToARGBRow_SSSE3; - } - } -#endif -#if defined(HAS_ARGBTOYROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToYRow = ARGBToYRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToYRow = ARGBToYRow_SSSE3; - } - } -#endif -#if defined(HAS_ARGBTOYROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToYRow = ARGBToYRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ARGBToYRow = ARGBToYRow_AVX2; - } - } -#endif -#if defined(HAS_ARGBTOYROW_AVX512BW) - if (TestCpuFlag(kCpuHasAVX512BW)) { - ARGBToYRow = ARGBToYRow_Any_AVX512BW; - if (IS_ALIGNED(width, 64)) { - ARGBToYRow = ARGBToYRow_AVX512BW; - } - } -#endif -#if defined(HAS_ARGBTOUVROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToUVRow = ARGBToUVRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_SSSE3; - } - } -#endif -#if defined(HAS_ARGBTOUVROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToUVRow = ARGBToUVRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ARGBToUVRow = ARGBToUVRow_AVX2; - } - } -#endif -#if defined(HAS_ARGBTOUVROW_AVX512BW) - if (TestCpuFlag(kCpuHasAVX512BW)) { - ARGBToUVRow = ARGBToUVRow_Any_AVX512BW; - if (IS_ALIGNED(width, 64)) { - ARGBToUVRow = ARGBToUVRow_AVX512BW; - } - } -#endif -#endif // HAS_RGB24TOYROW - - { -#if !defined(HAS_RGB24TOYROW) - // Allocate 2 rows of ARGB. - const int row_size = (width * 4 + 31) & ~31; - align_buffer_64(row, row_size * 2); - if (!row) - return 1; -#endif - - for (y = 0; y < height - 1; y += 2) { -#if defined(HAS_RGB24TOYROW) - RGB24ToUVRow(src_rgb24, src_stride_rgb24, dst_u, dst_v, width); - RGB24ToYRow(src_rgb24, dst_y, width); - RGB24ToYRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width); -#else - RGB24ToARGBRow(src_rgb24, row, width); - RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + row_size, width); - ARGBToUVRow(row, row_size, dst_u, dst_v, width); - ARGBToYRow(row, dst_y, width); - ARGBToYRow(row + row_size, dst_y + dst_stride_y, width); -#endif - src_rgb24 += src_stride_rgb24 * 2; - dst_y += dst_stride_y * 2; - dst_u += dst_stride_u; - dst_v += dst_stride_v; - } - if (height & 1) { -#if defined(HAS_RGB24TOYROW) - RGB24ToUVRow(src_rgb24, 0, dst_u, dst_v, width); - RGB24ToYRow(src_rgb24, dst_y, width); -#else - RGB24ToARGBRow(src_rgb24, row, width); - ARGBToUVRow(row, 0, dst_u, dst_v, width); - ARGBToYRow(row, dst_y, width); -#endif - } -#if !defined(HAS_RGB24TOYROW) - free_aligned_buffer_64(row); -#endif - } - return 0; + return RGBToI420Matrix(src_rgb24, src_stride_rgb24, dst_y, dst_stride_y, + dst_u, dst_stride_u, dst_v, dst_stride_v, + &kArgbI601Constants, width, height); } -#undef HAS_RGB24TOYROW - -// Enabled if 1 pass is available -#if defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_AVX2) || defined(HAS_RGB24TOYJROW_RVV) -#define HAS_RGB24TOYJROW -#endif // Convert RGB24 to J420. LIBYUV_API @@ -3139,179 +3044,10 @@ int RGB24ToJ420(const uint8_t* src_rgb24, int dst_stride_v, int width, int height) { - int y; -#if defined(HAS_RGB24TOYJROW) - void (*RGB24ToUVJRow)(const uint8_t* src_rgb24, int src_stride_rgb24, - uint8_t* dst_u, uint8_t* dst_v, int width) = - RGB24ToUVJRow_C; - void (*RGB24ToYJRow)(const uint8_t* src_rgb24, uint8_t* dst_y, int width) = - RGB24ToYJRow_C; -#else - void (*RGB24ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = - RGB24ToARGBRow_C; - void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb, - uint8_t* dst_u, uint8_t* dst_v, int width) = - ARGBToUVJRow_C; - void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = - ARGBToYJRow_C; -#endif - if (!src_rgb24 || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24; - src_stride_rgb24 = -src_stride_rgb24; - } - -#if defined(HAS_RGB24TOYJROW) - -#if defined(HAS_RGB24TOYJROW_AVX2) && defined(HAS_RGBTOYMATRIXROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - RGB24ToYJRow = RGB24ToYJRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - RGB24ToYJRow = RGB24ToYJRow_AVX2; - } - } -#endif - -// Neon version does direct RGB24 to YUV. -#if defined(HAS_RGB24TOYJROW_NEON) && defined(HAS_RGB24TOUVJROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - RGB24ToUVJRow = RGB24ToUVJRow_Any_NEON; - RGB24ToYJRow = RGB24ToYJRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - RGB24ToYJRow = RGB24ToYJRow_NEON; - RGB24ToUVJRow = RGB24ToUVJRow_NEON; - } - } -#endif -#if defined(HAS_RGB24TOYJROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - RGB24ToYJRow = RGB24ToYJRow_Any_LSX; - if (IS_ALIGNED(width, 16)) { - RGB24ToYJRow = RGB24ToYJRow_LSX; - } - } -#endif -#if defined(HAS_RGB24TOYJROW_LASX) - if (TestCpuFlag(kCpuHasLASX)) { - RGB24ToYJRow = RGB24ToYJRow_Any_LASX; - if (IS_ALIGNED(width, 32)) { - RGB24ToYJRow = RGB24ToYJRow_LASX; - } - } -#endif -#if defined(HAS_RGB24TOYJROW_RVV) - if (TestCpuFlag(kCpuHasRVV)) { - RGB24ToYJRow = RGB24ToYJRow_RVV; - } -#endif - -// Other platforms do intermediate conversion from RGB24 to ARGB. -#else // HAS_RGB24TOYJROW - -#if defined(HAS_RGB24TOARGBROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - RGB24ToARGBRow = RGB24ToARGBRow_SSSE3; - } - } -#endif -#if defined(HAS_ARGBTOYJROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToYJRow = ARGBToYJRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToYJRow = ARGBToYJRow_SSSE3; - } - } -#endif -#if defined(HAS_ARGBTOYJROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToYJRow = ARGBToYJRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ARGBToYJRow = ARGBToYJRow_AVX2; - } - } -#endif -#if defined(HAS_ARGBTOYROW_AVX512BW) - if (TestCpuFlag(kCpuHasAVX512BW)) { - ARGBToYJRow = ARGBToYJRow_Any_AVX512BW; - if (IS_ALIGNED(width, 64)) { - ARGBToYJRow = ARGBToYJRow_AVX512BW; - } - } -#endif -#if defined(HAS_ARGBTOUVJROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToUVJRow = ARGBToUVJRow_SSSE3; - } - } -#endif -#if defined(HAS_ARGBTOUVJROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToUVJRow = ARGBToUVJRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ARGBToUVJRow = ARGBToUVJRow_AVX2; - } - } -#endif -#endif // HAS_RGB24TOYJROW - - { -#if !defined(HAS_RGB24TOYJROW) - // Allocate 2 rows of ARGB. - const int row_size = (width * 4 + 31) & ~31; - align_buffer_64(row, row_size * 2); - if (!row) - return 1; -#endif - - for (y = 0; y < height - 1; y += 2) { -#if defined(HAS_RGB24TOYJROW) - RGB24ToUVJRow(src_rgb24, src_stride_rgb24, dst_u, dst_v, width); - RGB24ToYJRow(src_rgb24, dst_y, width); - RGB24ToYJRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width); -#else - RGB24ToARGBRow(src_rgb24, row, width); - RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + row_size, width); - ARGBToUVJRow(row, row_size, dst_u, dst_v, width); - ARGBToYJRow(row, dst_y, width); - ARGBToYJRow(row + row_size, dst_y + dst_stride_y, width); -#endif - src_rgb24 += src_stride_rgb24 * 2; - dst_y += dst_stride_y * 2; - dst_u += dst_stride_u; - dst_v += dst_stride_v; - } - if (height & 1) { -#if defined(HAS_RGB24TOYJROW) - RGB24ToUVJRow(src_rgb24, 0, dst_u, dst_v, width); - RGB24ToYJRow(src_rgb24, dst_y, width); -#else - RGB24ToARGBRow(src_rgb24, row, width); - ARGBToUVJRow(row, 0, dst_u, dst_v, width); - ARGBToYJRow(row, dst_y, width); -#endif - } -#if !defined(HAS_RGB24TOYJROW) - free_aligned_buffer_64(row); -#endif - } - return 0; + return RGBToI420Matrix(src_rgb24, src_stride_rgb24, dst_y, dst_stride_y, + dst_u, dst_stride_u, dst_v, dst_stride_v, + &kArgbJPEGConstants, width, height); } -#undef HAS_RGB24TOYJROW - -// Enabled if 1 pass is available -#if (defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_LSX) || \ - defined(HAS_RAWTOYROW_AVX2) || \ - defined(HAS_RAWTOYROW_RVV)) -#define HAS_RAWTOYROW -#endif // Convert RAW to I420. LIBYUV_API @@ -3325,197 +3061,10 @@ int RAWToI420(const uint8_t* src_raw, int dst_stride_v, int width, int height) { - int y; -#if defined(HAS_RAWTOYROW) - void (*RAWToUVRow)(const uint8_t* src_raw, int src_stride_raw, uint8_t* dst_u, - uint8_t* dst_v, int width) = RAWToUVRow_C; - void (*RAWToYRow)(const uint8_t* src_raw, uint8_t* dst_y, int width) = - RAWToYRow_C; -#else - void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = - RAWToARGBRow_C; - void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb, - uint8_t* dst_u, uint8_t* dst_v, int width) = - ARGBToUVRow_C; - void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = - ARGBToYRow_C; -#endif - if (!src_raw || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_raw = src_raw + (height - 1) * src_stride_raw; - src_stride_raw = -src_stride_raw; - } - -#if defined(HAS_RAWTOYROW) - -#if defined(HAS_RAWTOYROW_AVX2) && defined(HAS_RGBTOYMATRIXROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - // TODO(fbarchard): Write an AVX2 function for RAWToUVRow. - RAWToYRow = RAWToYRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - RAWToYRow = RAWToYRow_AVX2; - } - } -#endif - -// Neon version does direct RAW to YUV. -#if defined(HAS_RAWTOYROW_NEON) && defined(HAS_RAWTOUVROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - RAWToUVRow = RAWToUVRow_Any_NEON; - RAWToYRow = RAWToYRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - RAWToYRow = RAWToYRow_NEON; - RAWToUVRow = RAWToUVRow_NEON; - } - } -#endif -#if defined(HAS_RAWTOYROW_LSX) && defined(HAS_RAWTOUVROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - RAWToUVRow = RAWToUVRow_Any_LSX; - RAWToYRow = RAWToYRow_Any_LSX; - if (IS_ALIGNED(width, 16)) { - RAWToYRow = RAWToYRow_LSX; - RAWToUVRow = RAWToUVRow_LSX; - } - } -#endif -#if defined(HAS_RAWTOYROW_LASX) && defined(HAS_RAWTOUVROW_LASX) - if (TestCpuFlag(kCpuHasLASX)) { - RAWToUVRow = RAWToUVRow_Any_LASX; - RAWToYRow = RAWToYRow_Any_LASX; - if (IS_ALIGNED(width, 32)) { - RAWToYRow = RAWToYRow_LASX; - RAWToUVRow = RAWToUVRow_LASX; - } - } -#endif -#if defined(HAS_RAWTOYROW_RVV) - if (TestCpuFlag(kCpuHasRVV)) { - RAWToYRow = RAWToYRow_RVV; - } -#endif - -// Other platforms do intermediate conversion from RAW to ARGB. -#else // HAS_RAWTOYROW - -#if defined(HAS_RAWTOARGBROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - RAWToARGBRow = RAWToARGBRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - RAWToARGBRow = RAWToARGBRow_SSSE3; - } - } -#endif -#if defined(HAS_RAWTOARGBROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - RAWToARGBRow = RAWToARGBRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - RAWToARGBRow = RAWToARGBRow_AVX2; - } - } -#endif -#if defined(HAS_ARGBTOYROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToYRow = ARGBToYRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToYRow = ARGBToYRow_SSSE3; - } - } -#endif -#if defined(HAS_ARGBTOYROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToYRow = ARGBToYRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ARGBToYRow = ARGBToYRow_AVX2; - } - } -#endif -#if defined(HAS_ARGBTOYROW_AVX512BW) - if (TestCpuFlag(kCpuHasAVX512BW)) { - ARGBToYRow = ARGBToYRow_Any_AVX512BW; - if (IS_ALIGNED(width, 64)) { - ARGBToYRow = ARGBToYRow_AVX512BW; - } - } -#endif -#if defined(HAS_ARGBTOUVROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToUVRow = ARGBToUVRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_SSSE3; - } - } -#endif -#if defined(HAS_ARGBTOUVROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToUVRow = ARGBToUVRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ARGBToUVRow = ARGBToUVRow_AVX2; - } - } -#endif -#if defined(HAS_ARGBTOUVROW_AVX512BW) - if (TestCpuFlag(kCpuHasAVX512BW)) { - ARGBToUVRow = ARGBToUVRow_Any_AVX512BW; - if (IS_ALIGNED(width, 64)) { - ARGBToUVRow = ARGBToUVRow_AVX512BW; - } - } -#endif -#endif // HAS_RAWTOYROW - - { -#if !defined(HAS_RAWTOYROW) - // Allocate 2 rows of ARGB. - const int row_size = (width * 4 + 31) & ~31; - align_buffer_64(row, row_size * 2); - if (!row) - return 1; -#endif - - for (y = 0; y < height - 1; y += 2) { -#if defined(HAS_RAWTOYROW) - RAWToUVRow(src_raw, src_stride_raw, dst_u, dst_v, width); - RAWToYRow(src_raw, dst_y, width); - RAWToYRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width); -#else - RAWToARGBRow(src_raw, row, width); - RAWToARGBRow(src_raw + src_stride_raw, row + row_size, width); - ARGBToUVRow(row, row_size, dst_u, dst_v, width); - ARGBToYRow(row, dst_y, width); - ARGBToYRow(row + row_size, dst_y + dst_stride_y, width); -#endif - src_raw += src_stride_raw * 2; - dst_y += dst_stride_y * 2; - dst_u += dst_stride_u; - dst_v += dst_stride_v; - } - if (height & 1) { -#if defined(HAS_RAWTOYROW) - RAWToUVRow(src_raw, 0, dst_u, dst_v, width); - RAWToYRow(src_raw, dst_y, width); -#else - RAWToARGBRow(src_raw, row, width); - ARGBToUVRow(row, 0, dst_u, dst_v, width); - ARGBToYRow(row, dst_y, width); -#endif - } -#if !defined(HAS_RAWTOYROW) - free_aligned_buffer_64(row); -#endif - } - return 0; + return RGBToI420Matrix(src_raw, src_stride_raw, dst_y, dst_stride_y, + dst_v, dst_stride_v, dst_u, dst_stride_u, + &kAbgrI601Constants, width, height); } -#undef HAS_RAWTOYROW - -// Enabled if 1 pass is available -#if defined(HAS_RAWTOYJROW_NEON) || defined(HAS_RAWTOYJROW_AVX2) || defined(HAS_RAWTOYJROW_RVV) -#define HAS_RAWTOYJROW -#endif // Convert RAW to J420. LIBYUV_API @@ -3529,176 +3078,10 @@ int RAWToJ420(const uint8_t* src_raw, int dst_stride_v, int width, int height) { - int y; -#if defined(HAS_RAWTOYJROW) - void (*RAWToUVJRow)(const uint8_t* src_raw, int src_stride_raw, - uint8_t* dst_u, uint8_t* dst_v, int width) = - RAWToUVJRow_C; - void (*RAWToYJRow)(const uint8_t* src_raw, uint8_t* dst_y, int width) = - RAWToYJRow_C; -#else - void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = - RAWToARGBRow_C; - void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb, - uint8_t* dst_u, uint8_t* dst_v, int width) = - ARGBToUVJRow_C; - void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = - ARGBToYJRow_C; -#endif - if (!src_raw || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_raw = src_raw + (height - 1) * src_stride_raw; - src_stride_raw = -src_stride_raw; - } - -#if defined(HAS_RAWTOYJROW) - -#if defined(HAS_RAWTOYJROW_AVX2) && defined(HAS_RGBTOYMATRIXROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - RAWToYJRow = RAWToYJRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - RAWToYJRow = RAWToYJRow_AVX2; - } - } -#endif - -// Neon version does direct RAW to YUV. -#if defined(HAS_RAWTOYJROW_NEON) && defined(HAS_RAWTOUVJROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - RAWToUVJRow = RAWToUVJRow_Any_NEON; - RAWToYJRow = RAWToYJRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - RAWToYJRow = RAWToYJRow_NEON; - RAWToUVJRow = RAWToUVJRow_NEON; - } - } -#endif -#if defined(HAS_RAWTOYJROW_LSX) && defined(HAS_RAWTOUVJROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - RAWToUVJRow = RAWToUVJRow_Any_LSX; - RAWToYJRow = RAWToYJRow_Any_LSX; - if (IS_ALIGNED(width, 16)) { - RAWToYJRow = RAWToYJRow_LSX; - RAWToUVJRow = RAWToUVJRow_LSX; - } - } -#endif -#if defined(HAS_RAWTOYJROW_LASX) && defined(HAS_RAWTOUVJROW_LASX) - if (TestCpuFlag(kCpuHasLASX)) { - RAWToUVJRow = RAWToUVJRow_Any_LASX; - RAWToYJRow = RAWToYJRow_Any_LASX; - if (IS_ALIGNED(width, 32)) { - RAWToYJRow = RAWToYJRow_LASX; - RAWToUVJRow = RAWToUVJRow_LASX; - } - } -#endif -#if defined(HAS_RAWTOYJROW_RVV) - if (TestCpuFlag(kCpuHasRVV)) { - RAWToYJRow = RAWToYJRow_RVV; - } -#endif - -// Other platforms do intermediate conversion from RAW to ARGB. -#else // HAS_RAWTOYJROW - -#if defined(HAS_RAWTOARGBROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - RAWToARGBRow = RAWToARGBRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - RAWToARGBRow = RAWToARGBRow_SSSE3; - } - } -#endif -#if defined(HAS_RAWTOARGBROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - RAWToARGBRow = RAWToARGBRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - RAWToARGBRow = RAWToARGBRow_AVX2; - } - } -#endif -#if defined(HAS_ARGBTOYJROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToYJRow = ARGBToYJRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToYJRow = ARGBToYJRow_SSSE3; - } - } -#endif -#if defined(HAS_ARGBTOYJROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToYJRow = ARGBToYJRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ARGBToYJRow = ARGBToYJRow_AVX2; - } - } -#endif -#if defined(HAS_ARGBTOUVJROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToUVJRow = ARGBToUVJRow_SSSE3; - } - } -#endif -#if defined(HAS_ARGBTOUVJROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToUVJRow = ARGBToUVJRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ARGBToUVJRow = ARGBToUVJRow_AVX2; - } - } -#endif -#endif // HAS_RAWTOYJROW - - { -#if !defined(HAS_RAWTOYJROW) - // Allocate 2 rows of ARGB. - const int row_size = (width * 4 + 31) & ~31; - align_buffer_64(row, row_size * 2); - if (!row) - return 1; -#endif - - for (y = 0; y < height - 1; y += 2) { -#if defined(HAS_RAWTOYJROW) - RAWToUVJRow(src_raw, src_stride_raw, dst_u, dst_v, width); - RAWToYJRow(src_raw, dst_y, width); - RAWToYJRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width); -#else - RAWToARGBRow(src_raw, row, width); - RAWToARGBRow(src_raw + src_stride_raw, row + row_size, width); - ARGBToUVJRow(row, row_size, dst_u, dst_v, width); - ARGBToYJRow(row, dst_y, width); - ARGBToYJRow(row + row_size, dst_y + dst_stride_y, width); -#endif - src_raw += src_stride_raw * 2; - dst_y += dst_stride_y * 2; - dst_u += dst_stride_u; - dst_v += dst_stride_v; - } - if (height & 1) { -#if defined(HAS_RAWTOYJROW) - RAWToUVJRow(src_raw, 0, dst_u, dst_v, width); - RAWToYJRow(src_raw, dst_y, width); -#else - RAWToARGBRow(src_raw, row, width); - ARGBToUVJRow(row, 0, dst_u, dst_v, width); - ARGBToYJRow(row, dst_y, width); -#endif - } -#if !defined(HAS_RAWTOYJROW) - free_aligned_buffer_64(row); -#endif - } - return 0; + return RGBToI420Matrix(src_raw, src_stride_raw, dst_y, dst_stride_y, + dst_v, dst_stride_v, dst_u, dst_stride_u, + &kAbgrJPEGConstants, width, height); } -#undef HAS_RAWTOYJROW // RAW big endian (rgb in memory) to I444 // 2 step conversion of RAWToARGB then ARGBToY and ARGBToUV444 diff --git a/source/convert_from_argb.cc b/source/convert_from_argb.cc index c72f77e93..89be79c75 100644 --- a/source/convert_from_argb.cc +++ b/source/convert_from_argb.cc @@ -248,6 +248,22 @@ int ARGBToI444Matrix(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTOYROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_LSX; + } + } +#endif +#if defined(HAS_ARGBTOYROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_LASX; + } + } +#endif #if defined(HAS_ARGBTOUV444MATRIXROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBToUV444MatrixRow = ARGBToUV444MatrixRow_Any_NEON; @@ -511,6 +527,22 @@ int ARGBToI422Matrix(const uint8_t* src_argb, ARGBToYMatrixRow = ARGBToYMatrixRow_NEON; } } +#endif +#if defined(HAS_ARGBTOYROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_LSX; + } + } +#endif +#if defined(HAS_ARGBTOYROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_LASX; + } + } #endif if (!src_argb || !dst_y || !dst_u || !dst_v || !argbconstants || width <= 0 || height == 0) { @@ -779,43 +811,9 @@ int ARGBToNV12Matrix(const uint8_t* src_argb, uint8_t* dst_u, uint8_t* dst_v, int width, const struct ArgbConstants* c) = ARGBToUVMatrixRow_C; -#if defined(HAS_ARGBTOUVMATRIXROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_SSSE3; - if (IS_ALIGNED(width, 8)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_SSSE3; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_RVV) - if (TestCpuFlag(kCpuHasRVV)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_RVV; - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX2; - if (IS_ALIGNED(width, 16)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX2; - } - } -#endif -#if defined(HAS_ARGBTOYMATRIXROW_RVV) - if (TestCpuFlag(kCpuHasRVV)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_RVV; - } -#endif -// TODO(fbarchard): add AVX512BW -#if defined(HAS_ARGBTOYMATRIXROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_NEON; - } - } -#endif void (*MergeUVRow)(const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uv, int width) = MergeUVRow_C; + if (!src_argb || !dst_y || !dst_uv || !argbconstants || width <= 0 || height == 0) { return -1; @@ -826,6 +824,90 @@ int ARGBToNV12Matrix(const uint8_t* src_argb, src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } + +#if defined(HAS_ARGBTOYROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_SSSE3; + if (IS_ALIGNED(width, 8)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOYROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_AVX2; + } + } +#endif +#if defined(HAS_ARGBTOYROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_AVX512BW; + if (IS_ALIGNED(width, 64)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_AVX512BW; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX2; + if (IS_ALIGNED(width, 16)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX2; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX512BW; + if (IS_ALIGNED(width, 32)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX512BW; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_NEON; + } + } +#endif +#if defined(HAS_ARGBTOYROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_LSX; + } + } +#endif +#if defined(HAS_ARGBTOYROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_LASX; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_RVV; + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_RVV; + } +#endif + #if defined(HAS_MERGEUVROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { MergeUVRow = MergeUVRow_Any_SSE2; @@ -877,29 +959,31 @@ int ARGBToNV12Matrix(const uint8_t* src_argb, } #endif - // Allocate a rows of uv. - align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2); - uint8_t* row_v = row_u + ((halfwidth + 31) & ~31); - if (!row_u) - return 1; + { + // Allocate a rows of uv. + align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2); + uint8_t* row_v = row_u + ((halfwidth + 31) & ~31); + if (!row_u) + return 1; - for (y = 0; y < height - 1; y += 2) { - ARGBToUVMatrixRow(src_argb, src_stride_argb, row_u, row_v, width, - argbconstants); - MergeUVRow(row_u, row_v, dst_uv, halfwidth); - ARGBToYMatrixRow(src_argb, dst_y, width, argbconstants); - ARGBToYMatrixRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width, - argbconstants); - src_argb += src_stride_argb * 2; - dst_y += dst_stride_y * 2; - dst_uv += dst_stride_uv; + for (y = 0; y < height - 1; y += 2) { + ARGBToUVMatrixRow(src_argb, src_stride_argb, row_u, row_v, width, + argbconstants); + MergeUVRow(row_u, row_v, dst_uv, halfwidth); + ARGBToYMatrixRow(src_argb, dst_y, width, argbconstants); + ARGBToYMatrixRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width, + argbconstants); + src_argb += src_stride_argb * 2; + dst_y += dst_stride_y * 2; + dst_uv += dst_stride_uv; + } + if (height & 1) { + ARGBToUVMatrixRow(src_argb, 0, row_u, row_v, width, argbconstants); + MergeUVRow(row_u, row_v, dst_uv, halfwidth); + ARGBToYMatrixRow(src_argb, dst_y, width, argbconstants); + } + free_aligned_buffer_64(row_u); } - if (height & 1) { - ARGBToUVMatrixRow(src_argb, 0, row_u, row_v, width, argbconstants); - MergeUVRow(row_u, row_v, dst_uv, halfwidth); - ARGBToYMatrixRow(src_argb, dst_y, width, argbconstants); - } - free_aligned_buffer_64(row_u); return 0; } @@ -915,13 +999,17 @@ int ARGBToNV21(const uint8_t* src_argb, int height) { int y; int halfwidth = (width + 1) >> 1; - void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb, - uint8_t* dst_u, uint8_t* dst_v, int width) = - ARGBToUVRow_C; - void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = - ARGBToYRow_C; + void (*ARGBToYMatrixRow)(const uint8_t* src_argb, uint8_t* dst_y, int width, + const struct ArgbConstants* c) = ARGBToYMatrixRow_C; + void (*ARGBToUVMatrixRow)(const uint8_t* src_argb, int src_stride_argb, + uint8_t* dst_u, uint8_t* dst_v, int width, + const struct ArgbConstants* c) = + ARGBToUVMatrixRow_C; void (*MergeUVRow)(const uint8_t* src_u, const uint8_t* src_v, - uint8_t* dst_vu, int width) = MergeUVRow_C; + uint8_t* dst_vu, int width) = MergeUVRow_C; + + const struct ArgbConstants* argbconstants = &kArgbI601Constants; + if (!src_argb || !dst_y || !dst_vu || width <= 0 || height == 0) { return -1; } @@ -931,140 +1019,90 @@ int ARGBToNV21(const uint8_t* src_argb, src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } + #if defined(HAS_ARGBTOYROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToYRow = ARGBToYRow_Any_SSSE3; + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { - ARGBToYRow = ARGBToYRow_SSSE3; + ARGBToYMatrixRow = ARGBToYMatrixRow_SSSE3; } } #endif -#if defined(HAS_ARGBTOUVROW_SSSE3) +#if defined(HAS_ARGBTOUVMATRIXROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToUVRow = ARGBToUVRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_SSSE3; + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_SSSE3; + if (IS_ALIGNED(width, 8)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_SSSE3; } } #endif #if defined(HAS_ARGBTOYROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToYRow = ARGBToYRow_Any_AVX2; + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { - ARGBToYRow = ARGBToYRow_AVX2; + ARGBToYMatrixRow = ARGBToYMatrixRow_AVX2; } } #endif #if defined(HAS_ARGBTOYROW_AVX512BW) if (TestCpuFlag(kCpuHasAVX512BW)) { - ARGBToYRow = ARGBToYRow_Any_AVX512BW; + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_AVX512BW; if (IS_ALIGNED(width, 64)) { - ARGBToYRow = ARGBToYRow_AVX512BW; + ARGBToYMatrixRow = ARGBToYMatrixRow_AVX512BW; } } #endif -#if defined(HAS_ARGBTOUVROW_AVX2) +#if defined(HAS_ARGBTOUVMATRIXROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToUVRow = ARGBToUVRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ARGBToUVRow = ARGBToUVRow_AVX2; + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX2; + if (IS_ALIGNED(width, 16)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX2; } } #endif -#if defined(HAS_ARGBTOUVROW_AVX512BW) +#if defined(HAS_ARGBTOUVMATRIXROW_AVX512BW) if (TestCpuFlag(kCpuHasAVX512BW)) { - ARGBToUVRow = ARGBToUVRow_Any_AVX512BW; - if (IS_ALIGNED(width, 64)) { - ARGBToUVRow = ARGBToUVRow_AVX512BW; + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX512BW; + if (IS_ALIGNED(width, 32)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX512BW; } } #endif -#if defined(HAS_ARGBTOYROW_NEON) +#if defined(HAS_ARGBTOYMATRIXROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { - ARGBToYRow = ARGBToYRow_Any_NEON; + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_NEON; if (IS_ALIGNED(width, 16)) { - ARGBToYRow = ARGBToYRow_NEON; - } - } -#endif -#if defined(HAS_ARGBTOYROW_NEON_DOTPROD) - if (TestCpuFlag(kCpuHasNeonDotProd)) { - ARGBToYRow = ARGBToYRow_Any_NEON_DotProd; - if (IS_ALIGNED(width, 16)) { - ARGBToYRow = ARGBToYRow_NEON_DotProd; - } - } -#endif -#if defined(HAS_ARGBTOUVROW_RVV) - if (TestCpuFlag(kCpuHasRVV)) { - ARGBToUVRow = ARGBToUVRow_RVV; - } -#endif -#if defined(HAS_ARGBTOUVROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ARGBToUVRow = ARGBToUVRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_NEON; - } - } -#endif -#if defined(HAS_ARGBTOUVROW_NEON_I8MM) - if (TestCpuFlag(kCpuHasNeonI8MM)) { - ARGBToUVRow = ARGBToUVRow_Any_NEON_I8MM; - if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_NEON_I8MM; - } - } -#endif -#if defined(HAS_ARGBTOUVROW_SVE2) - if (TestCpuFlag(kCpuHasSVE2)) { - ARGBToUVRow = ARGBToUVRow_Any_SVE2; - if (IS_ALIGNED(width, 2)) { - ARGBToUVRow = ARGBToUVRow_SVE2; - } - } -#endif -#if defined(HAS_ARGBTOUVROW_SME) - if (TestCpuFlag(kCpuHasSME)) { - ARGBToUVRow = ARGBToUVRow_Any_SME; - if (IS_ALIGNED(width, 2)) { - ARGBToUVRow = ARGBToUVRow_SME; + ARGBToYMatrixRow = ARGBToYMatrixRow_NEON; } } #endif #if defined(HAS_ARGBTOYROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { - ARGBToYRow = ARGBToYRow_Any_LSX; + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_LSX; if (IS_ALIGNED(width, 16)) { - ARGBToYRow = ARGBToYRow_LSX; + ARGBToYMatrixRow = ARGBToYMatrixRow_LSX; } } #endif -#if defined(HAS_ARGBTOYROW_LSX) && defined(HAS_ARGBTOUVROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - ARGBToYRow = ARGBToYRow_Any_LSX; - ARGBToUVRow = ARGBToUVRow_Any_LSX; - if (IS_ALIGNED(width, 16)) { - ARGBToYRow = ARGBToYRow_LSX; - ARGBToUVRow = ARGBToUVRow_LSX; - } - } -#endif -#if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX) +#if defined(HAS_ARGBTOYROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { - ARGBToYRow = ARGBToYRow_Any_LASX; - ARGBToUVRow = ARGBToUVRow_Any_LASX; + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_LASX; if (IS_ALIGNED(width, 32)) { - ARGBToYRow = ARGBToYRow_LASX; - ARGBToUVRow = ARGBToUVRow_LASX; + ARGBToYMatrixRow = ARGBToYMatrixRow_LASX; } } #endif -#if defined(HAS_ARGBTOYROW_RVV) +#if defined(HAS_ARGBTOYMATRIXROW_RVV) if (TestCpuFlag(kCpuHasRVV)) { - ARGBToYRow = ARGBToYRow_RVV; + ARGBToYMatrixRow = ARGBToYMatrixRow_RVV; } #endif +#if defined(HAS_ARGBTOUVMATRIXROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_RVV; + } +#endif + #if defined(HAS_MERGEUVROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { MergeUVRow = MergeUVRow_Any_SSE2; @@ -1084,7 +1122,7 @@ int ARGBToNV21(const uint8_t* src_argb, #if defined(HAS_MERGEUVROW_AVX512BW) if (TestCpuFlag(kCpuHasAVX512BW)) { MergeUVRow = MergeUVRow_Any_AVX512BW; - if (IS_ALIGNED(halfwidth, 64)) { + if (IS_ALIGNED(halfwidth, 32)) { MergeUVRow = MergeUVRow_AVX512BW; } } @@ -1115,6 +1153,7 @@ int ARGBToNV21(const uint8_t* src_argb, MergeUVRow = MergeUVRow_RVV; } #endif + { // Allocate a rows of uv. align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2); @@ -1123,25 +1162,25 @@ int ARGBToNV21(const uint8_t* src_argb, return 1; for (y = 0; y < height - 1; y += 2) { - ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width); + ARGBToUVMatrixRow(src_argb, src_stride_argb, row_u, row_v, width, + argbconstants); MergeUVRow(row_v, row_u, dst_vu, halfwidth); - ARGBToYRow(src_argb, dst_y, width); - ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width); + ARGBToYMatrixRow(src_argb, dst_y, width, argbconstants); + ARGBToYMatrixRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width, + argbconstants); src_argb += src_stride_argb * 2; dst_y += dst_stride_y * 2; dst_vu += dst_stride_vu; } if (height & 1) { - ARGBToUVRow(src_argb, 0, row_u, row_v, width); + ARGBToUVMatrixRow(src_argb, 0, row_u, row_v, width, argbconstants); MergeUVRow(row_v, row_u, dst_vu, halfwidth); - ARGBToYRow(src_argb, dst_y, width); + ARGBToYMatrixRow(src_argb, dst_y, width, argbconstants); } free_aligned_buffer_64(row_u); } return 0; } - -LIBYUV_API int ABGRToNV12(const uint8_t* src_abgr, int src_stride_abgr, uint8_t* dst_y, @@ -3983,36 +4022,32 @@ int ARGBToAB64(const uint8_t* src_argb, #define HAS_RAWTOYJROW #endif -// RAW to JNV21 full range NV21 +// RAW to NV21 with matrix. LIBYUV_API -int RAWToJNV21(const uint8_t* src_raw, - int src_stride_raw, - uint8_t* dst_y, - int dst_stride_y, - uint8_t* dst_vu, - int dst_stride_vu, - int width, - int height) { +int RAWToNV21Matrix(const uint8_t* src_raw, + int src_stride_raw, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_vu, + int dst_stride_vu, + const struct ArgbConstants* argbconstants, + int width, + int height) { int y; int halfwidth = (width + 1) >> 1; -#if defined(HAS_RAWTOYJROW) - void (*RAWToUVJRow)(const uint8_t* src_raw, int src_stride_raw, - uint8_t* dst_uj, uint8_t* dst_vj, int width) = - RAWToUVJRow_C; - void (*RAWToYJRow)(const uint8_t* src_raw, uint8_t* dst_y, int width) = - RAWToYJRow_C; -#else void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = RAWToARGBRow_C; - void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb, - uint8_t* dst_uj, uint8_t* dst_vj, int width) = - ARGBToUVJRow_C; - void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = - ARGBToYJRow_C; -#endif - void (*MergeUVRow)(const uint8_t* src_uj, const uint8_t* src_vj, - uint8_t* dst_vu, int width) = MergeUVRow_C; - if (!src_raw || !dst_y || !dst_vu || width <= 0 || height == 0) { + void (*ARGBToUVMatrixRow)(const uint8_t* src_argb, int src_stride_argb, + uint8_t* dst_u, uint8_t* dst_v, int width, + const struct ArgbConstants* c) = + ARGBToUVMatrixRow_C; + void (*RGBToYMatrixRow)(const uint8_t* src_argb, uint8_t* dst_y, int width, + const struct ArgbConstants* c) = NULL; + void (*MergeUVRow)(const uint8_t* src_u, const uint8_t* src_v, + uint8_t* dst_uv, int width) = MergeUVRow_C; + + if (!src_raw || !dst_y || !dst_vu || !argbconstants || width <= 0 || + height == 0) { return -1; } // Negative height means invert the image. @@ -4022,54 +4057,6 @@ int RAWToJNV21(const uint8_t* src_raw, src_stride_raw = -src_stride_raw; } -#if defined(HAS_RAWTOYJROW) - -#if defined(HAS_RAWTOYJROW_AVX2) && defined(HAS_RGBTOYMATRIXROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - // TODO(fbarchard): Write an AVX2 function for RAWToUVJRow. - RAWToYJRow = RAWToYJRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - RAWToYJRow = RAWToYJRow_AVX2; - } - } -#endif - -// Neon version does direct RAW to YUV. -#if defined(HAS_RAWTOYJROW_NEON) && defined(HAS_RAWTOUVJROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - RAWToUVJRow = RAWToUVJRow_Any_NEON; - RAWToYJRow = RAWToYJRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - RAWToYJRow = RAWToYJRow_NEON; - RAWToUVJRow = RAWToUVJRow_NEON; - } - } -#endif -#if defined(HAS_RAWTOYJROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - RAWToYJRow = RAWToYJRow_Any_LSX; - if (IS_ALIGNED(width, 16)) { - RAWToYJRow = RAWToYJRow_LSX; - } - } -#endif -#if defined(HAS_RAWTOYJROW_LASX) - if (TestCpuFlag(kCpuHasLASX)) { - RAWToYJRow = RAWToYJRow_Any_LASX; - if (IS_ALIGNED(width, 32)) { - RAWToYJRow = RAWToYJRow_LASX; - } - } -#endif -#if defined(HAS_RAWTOYJROW_RVV) - if (TestCpuFlag(kCpuHasRVV)) { - RAWToYJRow = RAWToYJRow_RVV; - } -#endif - -// Other platforms do intermediate conversion from RAW to ARGB. -#else // HAS_RAWTOYJROW - #if defined(HAS_RAWTOARGBROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { RAWToARGBRow = RAWToARGBRow_Any_SSSE3; @@ -4086,47 +4073,57 @@ int RAWToJNV21(const uint8_t* src_raw, } } #endif -#if defined(HAS_ARGBTOYJROW_SSSE3) +#if defined(HAS_ARGBTOUVMATRIXROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToYJRow = ARGBToYJRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToYJRow = ARGBToYJRow_SSSE3; + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_SSSE3; + if (IS_ALIGNED(width, 8)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_SSSE3; } } #endif -#if defined(HAS_ARGBTOYJROW_AVX2) +#if defined(HAS_ARGBTOUVMATRIXROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_RVV; + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToYJRow = ARGBToYJRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ARGBToYJRow = ARGBToYJRow_AVX2; - } - } -#endif -#if defined(HAS_ARGBTOUVJROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3; + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX2; if (IS_ALIGNED(width, 16)) { - ARGBToUVJRow = ARGBToUVJRow_SSSE3; + ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX2; } } #endif -#if defined(HAS_ARGBTOUVJROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToUVJRow = ARGBToUVJRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ARGBToUVJRow = ARGBToUVJRow_AVX2; - } - } -#endif -#if defined(HAS_ARGBTOUVJROW_AVX512BW) +#if defined(HAS_ARGBTOUVMATRIXROW_AVX512BW) if (TestCpuFlag(kCpuHasAVX512BW)) { - ARGBToUVJRow = ARGBToUVJRow_Any_AVX512BW; - if (IS_ALIGNED(width, 64)) { - ARGBToUVJRow = ARGBToUVJRow_AVX512BW; + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX512BW; + if (IS_ALIGNED(width, 32)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX512BW; } } #endif -#endif // HAS_RAWTOYJROW +#if defined(HAS_RGBTOYMATRIXROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + RGBToYMatrixRow = RGBToYMatrixRow_RVV; + } +#endif +#if defined(HAS_RGBTOYMATRIXROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + RGBToYMatrixRow = RGBToYMatrixRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + RGBToYMatrixRow = RGBToYMatrixRow_AVX2; + } + } +#endif +#if defined(HAS_RGBTOYMATRIXROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + RGBToYMatrixRow = RGBToYMatrixRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + RGBToYMatrixRow = RGBToYMatrixRow_NEON; + } + } +#endif + #if defined(HAS_MERGEUVROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { MergeUVRow = MergeUVRow_Any_SSE2; @@ -4146,7 +4143,7 @@ int RAWToJNV21(const uint8_t* src_raw, #if defined(HAS_MERGEUVROW_AVX512BW) if (TestCpuFlag(kCpuHasAVX512BW)) { MergeUVRow = MergeUVRow_Any_AVX512BW; - if (IS_ALIGNED(halfwidth, 64)) { + if (IS_ALIGNED(halfwidth, 32)) { MergeUVRow = MergeUVRow_AVX512BW; } } @@ -4177,58 +4174,90 @@ int RAWToJNV21(const uint8_t* src_raw, MergeUVRow = MergeUVRow_RVV; } #endif + { -#if defined(HAS_RAWTOYJROW) - // Allocate a row of uv. - const int row_uv_size = ((halfwidth + 31) & ~31); - align_buffer_64(row_uj, row_uv_size * 2); - uint8_t* row_vj = row_uj + row_uv_size; -#else - // Allocate row of uv and 2 rows of ARGB. + const struct ArgbConstants* uvconstants = argbconstants; + if (argbconstants == &kAbgrI601Constants) { + uvconstants = &kArgbI601Constants; + } else if (argbconstants == &kAbgrJPEGConstants) { + uvconstants = &kArgbJPEGConstants; + } + + // Allocate rows of uv and 2 rows of ARGB. const int row_size = ((width * 4 + 31) & ~31); const int row_uv_size = ((halfwidth + 31) & ~31); - align_buffer_64(row_uj, row_uv_size * 2 + row_size * 2); - uint8_t* row_vj = row_uj + row_uv_size; - uint8_t* row = row_vj + row_uv_size; -#endif - if (!row_uj) + align_buffer_64(row_u, row_uv_size * 2 + row_size * 2); + uint8_t* row_v = row_u + row_uv_size; + uint8_t* row = row_v + row_uv_size; + if (!row_u) return 1; for (y = 0; y < height - 1; y += 2) { -#if defined(HAS_RAWTOYJROW) - RAWToUVJRow(src_raw, src_stride_raw, row_uj, row_vj, width); - MergeUVRow(row_vj, row_uj, dst_vu, halfwidth); - RAWToYJRow(src_raw, dst_y, width); - RAWToYJRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width); -#else RAWToARGBRow(src_raw, row, width); RAWToARGBRow(src_raw + src_stride_raw, row + row_size, width); - ARGBToUVJRow(row, row_size, row_uj, row_vj, width); - MergeUVRow(row_vj, row_uj, dst_vu, halfwidth); - ARGBToYJRow(row, dst_y, width); - ARGBToYJRow(row + row_size, dst_y + dst_stride_y, width); -#endif + ARGBToUVMatrixRow(row, row_size, row_u, row_v, width, uvconstants); + MergeUVRow(row_v, row_u, dst_vu, halfwidth); + if (RGBToYMatrixRow) { + RGBToYMatrixRow(src_raw, dst_y, width, argbconstants); + RGBToYMatrixRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width, + argbconstants); + } else { + void (*ARGBToYMatrixRow)(const uint8_t* src_argb, uint8_t* dst_y, + int width, const struct ArgbConstants* c) = + ARGBToYMatrixRow_C; + ARGBToYMatrixRow(row, dst_y, width, uvconstants); + ARGBToYMatrixRow(row + row_size, dst_y + dst_stride_y, width, + uvconstants); + } src_raw += src_stride_raw * 2; dst_y += dst_stride_y * 2; dst_vu += dst_stride_vu; } if (height & 1) { -#if defined(HAS_RAWTOYJROW) - RAWToUVJRow(src_raw, 0, row_uj, row_vj, width); - MergeUVRow(row_vj, row_uj, dst_vu, halfwidth); - RAWToYJRow(src_raw, dst_y, width); -#else RAWToARGBRow(src_raw, row, width); - ARGBToUVJRow(row, 0, row_uj, row_vj, width); - MergeUVRow(row_vj, row_uj, dst_vu, halfwidth); - ARGBToYJRow(row, dst_y, width); -#endif + ARGBToUVMatrixRow(row, 0, row_u, row_v, width, uvconstants); + MergeUVRow(row_v, row_u, dst_vu, halfwidth); + if (RGBToYMatrixRow) { + RGBToYMatrixRow(src_raw, dst_y, width, argbconstants); + } else { + void (*ARGBToYMatrixRow)(const uint8_t* src_argb, uint8_t* dst_y, + int width, const struct ArgbConstants* c) = + ARGBToYMatrixRow_C; + ARGBToYMatrixRow(row, dst_y, width, uvconstants); + } } - free_aligned_buffer_64(row_uj); + free_aligned_buffer_64(row_u); } return 0; } -#undef HAS_RAWTOYJROW + +// RAW to NV21. +LIBYUV_API +int RAWToNV21(const uint8_t* src_raw, + int src_stride_raw, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_vu, + int dst_stride_vu, + int width, + int height) { + return RAWToNV21Matrix(src_raw, src_stride_raw, dst_y, dst_stride_y, dst_vu, + dst_stride_vu, &kAbgrI601Constants, width, height); +} + +// RAW to JNV21 full range NV21 +LIBYUV_API +int RAWToJNV21(const uint8_t* src_raw, + int src_stride_raw, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_vu, + int dst_stride_vu, + int width, + int height) { + return RAWToNV21Matrix(src_raw, src_stride_raw, dst_y, dst_stride_y, dst_vu, + dst_stride_vu, &kAbgrJPEGConstants, width, height); +} #ifdef __cplusplus } // extern "C" diff --git a/source/row_any.cc b/source/row_any.cc index fdef98599..14ad6849f 100644 --- a/source/row_any.cc +++ b/source/row_any.cc @@ -2321,7 +2321,7 @@ ANY12M(ARGBToUV444MatrixRow_Any_NEON, ARGBToUV444MatrixRow_NEON, 4, 7) #ifdef HAS_ARGBTOYROW_SSSE3 ANY11MC(ARGBToYMatrixRow_Any_SSSE3, ARGBToYMatrixRow_SSSE3, 4, 15) #endif -#ifdef HAS_ARGBTOYMATRIXROW_AVX2 +#ifdef HAS_ARGBTOYROW_AVX2 ANY11MC(ARGBToYMatrixRow_Any_AVX2, ARGBToYMatrixRow_AVX2, 4, 31) ANY11MC(RGBToYMatrixRow_Any_AVX2, RGBToYMatrixRow_AVX2, 3, 31) #endif diff --git a/source/row_common.cc b/source/row_common.cc index 3fac1d5f4..167a8bc3c 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -782,6 +782,18 @@ static __inline uint8_t RGBToVMatrix(uint8_t r, 8; } +void RGBToYMatrixRow_C(const uint8_t* src_rgb24, + uint8_t* dst_y, + int width, + const struct ArgbConstants* c) { + int x; + for (x = 0; x < width; ++x) { + dst_y[0] = RGBToYMatrix(src_rgb24[2], src_rgb24[1], src_rgb24[0], c); + src_rgb24 += 3; + dst_y += 1; + } +} + void ARGBToYMatrixRow_C(const uint8_t* src_argb, uint8_t* dst_y, int width, @@ -825,6 +837,38 @@ void ARGBToUVMatrixRow_C(const uint8_t* src_argb, } } + +void RGBToUVMatrixRow_C(const uint8_t* src_rgb24, + int src_stride_rgb24, + uint8_t* dst_u, + uint8_t* dst_v, + int width, + const struct ArgbConstants* c) { + const uint8_t* src_rgb24_1 = src_rgb24 + src_stride_rgb24; + int x; + for (x = 0; x < width - 1; x += 2) { + uint8_t ab = + (src_rgb24[0] + src_rgb24[3] + src_rgb24_1[0] + src_rgb24_1[3] + 2) >> 2; + uint8_t ag = + (src_rgb24[1] + src_rgb24[4] + src_rgb24_1[1] + src_rgb24_1[4] + 2) >> 2; + uint8_t ar = + (src_rgb24[2] + src_rgb24[5] + src_rgb24_1[2] + src_rgb24_1[5] + 2) >> 2; + dst_u[0] = RGBToUMatrix(ar, ag, ab, c); + dst_v[0] = RGBToVMatrix(ar, ag, ab, c); + src_rgb24 += 6; + src_rgb24_1 += 6; + dst_u += 1; + dst_v += 1; + } + if (width & 1) { + uint8_t ab = (src_rgb24[0] + src_rgb24_1[0] + 1) >> 1; + uint8_t ag = (src_rgb24[1] + src_rgb24_1[1] + 1) >> 1; + uint8_t ar = (src_rgb24[2] + src_rgb24_1[2] + 1) >> 1; + dst_u[0] = RGBToUMatrix(ar, ag, ab, c); + dst_v[0] = RGBToVMatrix(ar, ag, ab, c); + } +} + void ARGBToUV444MatrixRow_C(const uint8_t* src_argb, uint8_t* dst_u, uint8_t* dst_v, diff --git a/source/row_rvv.cc b/source/row_rvv.cc index 9634d523a..c71b234c1 100644 --- a/source/row_rvv.cc +++ b/source/row_rvv.cc @@ -1238,7 +1238,7 @@ void BGRAToYRow_RVV(const uint8_t* src_bgra, uint8_t* dst_y, int width) { #endif #ifdef HAS_RGBTOYMATRIXROW_RVV -static void RGBToYMatrixRow_RVV(const uint8_t* src_rgb, +void RGBToYMatrixRow_RVV(const uint8_t* src_rgb, uint8_t* dst_y, int width, const struct ArgbConstants* c) { diff --git a/unit_test/convert_argb_test.cc b/unit_test/convert_argb_test.cc index 111c5916a..8281dec7d 100644 --- a/unit_test/convert_argb_test.cc +++ b/unit_test/convert_argb_test.cc @@ -2901,14 +2901,14 @@ TEST_F(LibYUVConvertTest, TestARGBToUVRow_Any) { #if defined(HAS_ARGBTOUVROW_AVX2) int has_avx2 = TestCpuFlag(kCpuHasAVX2); if (has_avx2) { - ARGBToUVRow_AVX2(&orig_argb_pixels[0], 0, &dest_u_opt[0], &dest_v_opt[0], kWidth); + ARGBToUVRow_Any_AVX2(&orig_argb_pixels[0], 0, &dest_u_opt[0], &dest_v_opt[0], kWidth); } else { ARGBToUVRow_C(&orig_argb_pixels[0], 0, &dest_u_opt[0], &dest_v_opt[0], kWidth); } #elif defined(HAS_ARGBTOUVROW_NEON) - ARGBToUVRow_NEON(&orig_argb_pixels[0], 0, &dest_u_opt[0], &dest_v_opt[0], kWidth); + ARGBToUVRow_Any_NEON(&orig_argb_pixels[0], 0, &dest_u_opt[0], &dest_v_opt[0], kWidth); #elif defined(HAS_ARGBTOUVROW_RVV) - ARGBToUVRow_RVV(&orig_argb_pixels[0], 0, &dest_u_opt[0], &dest_v_opt[0], kWidth); + ARGBToUVRow_Any_RVV(&orig_argb_pixels[0], 0, &dest_u_opt[0], &dest_v_opt[0], kWidth); #else ARGBToUVRow_C(&orig_argb_pixels[0], 0, &dest_u_opt[0], &dest_v_opt[0], kWidth); #endif diff --git a/unit_test/convert_test.cc b/unit_test/convert_test.cc index f5c9c6259..d15d990dc 100644 --- a/unit_test/convert_test.cc +++ b/unit_test/convert_test.cc @@ -826,6 +826,7 @@ TESTATOBP(ARGB, 1, 4, NV21, 2, 2) TESTATOBP(ABGR, 1, 4, NV12, 2, 2) TESTATOBP(ABGR, 1, 4, NV21, 2, 2) TESTATOBP(RAW, 1, 3, JNV21, 2, 2) +TESTATOBP(RAW, 1, 3, NV21, 2, 2) TESTATOBP(YUY2, 2, 4, NV12, 2, 2) TESTATOBP(UYVY, 2, 4, NV12, 2, 2) TESTATOBP(AYUV, 1, 4, NV12, 2, 2)