ARGBToNV12 use Matrix

Refactored Matrix functions (ARGBToI420Matrix, ARGBToI422Matrix, ARGBToI444Matrix and ARGBToNV12Matrix)
  and updated their CPU dispatch logic.

ARGBToNV12 clang
 68.05% ARGBToUVMatrixRow_AVX512BW
 21.04% ARGBToYMatrixRow_AVX512BW
  2.88% MergeUVRow_AVX512BW

ARGBToNV12 rowwin
 61.26% ARGBToUVMatrixRow_AVX2
 25.43% ARGBToYMatrixRow_AVX2
  3.09% MergeUVRow_AVX2

ARM on One Plus 15
 42.98% libyuv::ARGBToUVMatrixRow_SVE_SC()
 38.95% ARGBToYMatrixRow_NEON_DotProd
  2.96% MergeUVRow_NEON
  0.18% ARGBToUVMatrixRow_SVE2

ARGBToI420
 72.28% ARGBToUVMatrixRow_AVX512BW
 19.04% ARGBToYMatrixRow_AVX512BW

ARGBToI422
 77.46% ARGBToUVMatrixRow_AVX512BW
 15.55% ARGBToYMatrixRow_AVX512BW

ARGBToI444
 67.03% ARGBToYMatrixRow_AVX512BW
 24.80% ARGBToUV444MatrixRow_AVX512BW

Bug: libyuv:42280902
Change-Id: I463ebcdb70cb669a1ce1a81102b8fd2fb3943bd3
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/7819051
Reviewed-by: richard winterton <rrwinterton@gmail.com>
Commit-Queue: Frank Barchard <fbarchard@google.com>
This commit is contained in:
Frank Barchard 2026-05-05 17:57:52 -07:00 committed by libyuv-scoped@luci-project-accounts.iam.gserviceaccount.com
parent 561a9780e2
commit 125f151316
2 changed files with 36 additions and 673 deletions

View File

@ -2007,160 +2007,9 @@ int ARGBToI420(const uint8_t* src_argb,
int dst_stride_v,
int width,
int height) {
int y;
void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb,
uint8_t* dst_u, uint8_t* dst_v, int width) =
ARGBToUVRow_C;
void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
ARGBToYRow_C;
if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
#if defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_NEON;
}
}
#endif
#if defined(HAS_ARGBTOYROW_NEON_DOTPROD)
if (TestCpuFlag(kCpuHasNeonDotProd)) {
ARGBToYRow = ARGBToYRow_Any_NEON_DotProd;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_NEON_DotProd;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToUVRow = ARGBToUVRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_NEON;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_NEON_I8MM)
if (TestCpuFlag(kCpuHasNeonI8MM)) {
ARGBToUVRow = ARGBToUVRow_Any_NEON_I8MM;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_NEON_I8MM;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_SVE2)
if (TestCpuFlag(kCpuHasSVE2)) {
ARGBToUVRow = ARGBToUVRow_Any_SVE2;
if (IS_ALIGNED(width, 2)) {
ARGBToUVRow = ARGBToUVRow_SVE2;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_SME)
if (TestCpuFlag(kCpuHasSME)) {
ARGBToUVRow = ARGBToUVRow_Any_SME;
if (IS_ALIGNED(width, 2)) {
ARGBToUVRow = ARGBToUVRow_SME;
}
}
#endif
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOYROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToYRow = ARGBToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToYRow = ARGBToYRow_AVX2;
}
}
#endif
#if defined(HAS_ARGBTOYROW_AVX512BW)
if (TestCpuFlag(kCpuHasAVX512BW)) {
ARGBToYRow = ARGBToYRow_Any_AVX512BW;
if (IS_ALIGNED(width, 64)) {
ARGBToYRow = ARGBToYRow_AVX512BW;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToUVRow = ARGBToUVRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToUVRow = ARGBToUVRow_AVX2;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_AVX512BW)
if (TestCpuFlag(kCpuHasAVX512BW)) {
ARGBToUVRow = ARGBToUVRow_Any_AVX512BW;
if (IS_ALIGNED(width, 64)) {
ARGBToUVRow = ARGBToUVRow_AVX512BW;
}
}
#endif
#if defined(HAS_ARGBTOYROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
ARGBToYRow = ARGBToYRow_Any_LSX;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_LSX;
}
}
#endif
#if defined(HAS_ARGBTOYROW_LSX) && defined(HAS_ARGBTOUVROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
ARGBToYRow = ARGBToYRow_Any_LSX;
ARGBToUVRow = ARGBToUVRow_Any_LSX;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_LSX;
ARGBToUVRow = ARGBToUVRow_LSX;
}
}
#endif
#if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToYRow = ARGBToYRow_Any_LASX;
ARGBToUVRow = ARGBToUVRow_Any_LASX;
if (IS_ALIGNED(width, 32)) {
ARGBToYRow = ARGBToYRow_LASX;
ARGBToUVRow = ARGBToUVRow_LASX;
}
}
#endif
for (y = 0; y < height - 1; y += 2) {
ARGBToUVRow(src_argb, src_stride_argb, dst_u, dst_v, width);
ARGBToYRow(src_argb, dst_y, width);
ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width);
src_argb += src_stride_argb * 2;
dst_y += dst_stride_y * 2;
dst_u += dst_stride_u;
dst_v += dst_stride_v;
}
if (height & 1) {
ARGBToUVRow(src_argb, 0, dst_u, dst_v, width);
ARGBToYRow(src_argb, dst_y, width);
}
return 0;
return ARGBToI420Matrix(src_argb, src_stride_argb, dst_y, dst_stride_y, dst_u,
dst_stride_u, dst_v, dst_stride_v,
&kArgbI601Constants, width, height);
}
LIBYUV_API
@ -2290,6 +2139,14 @@ ARGBToUVMatrixRow_C;
ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX2;
}
}
#endif
#if defined(HAS_ARGBTOUVMATRIXROW_AVX512BW)
if (TestCpuFlag(kCpuHasAVX512BW)) {
ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX512BW;
if (IS_ALIGNED(width, 32)) {
ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX512BW;
}
}
#endif
if (!src_argb || !dst_y || !dst_u || !dst_v || !argbconstants || width <= 0 ||
height == 0) {

View File

@ -32,153 +32,9 @@ int ARGBToI444(const uint8_t* src_argb,
int dst_stride_v,
int width,
int height) {
int y;
void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
ARGBToYRow_C;
void (*ARGBToUV444Row)(const uint8_t* src_argb, uint8_t* dst_u,
uint8_t* dst_v, int width) = ARGBToUV444Row_C;
if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
return -1;
}
if (height < 0) {
height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
// Coalesce rows.
if (src_stride_argb == width * 4 && dst_stride_y == width &&
dst_stride_u == width && dst_stride_v == width) {
width *= height;
height = 1;
src_stride_argb = dst_stride_y = dst_stride_u = dst_stride_v = 0;
}
#if defined(HAS_ARGBTOUV444ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUV444Row = ARGBToUV444Row_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUV444Row = ARGBToUV444Row_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOUV444ROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToUV444Row = ARGBToUV444Row_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToUV444Row = ARGBToUV444Row_AVX2;
}
}
#endif
#if defined(HAS_ARGBTOUV444ROW_AVX512BW)
if (TestCpuFlag(kCpuHasAVX512BW)) {
ARGBToUV444Row = ARGBToUV444Row_Any_AVX512BW;
if (IS_ALIGNED(width, 64)) {
ARGBToUV444Row = ARGBToUV444Row_AVX512BW;
}
}
#endif
#if defined(HAS_ARGBTOUV444ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToUV444Row = ARGBToUV444Row_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToUV444Row = ARGBToUV444Row_NEON;
}
}
#endif
#if defined(HAS_ARGBTOUV444ROW_NEON_I8MM)
if (TestCpuFlag(kCpuHasNeonI8MM)) {
ARGBToUV444Row = ARGBToUV444Row_Any_NEON_I8MM;
if (IS_ALIGNED(width, 8)) {
ARGBToUV444Row = ARGBToUV444Row_NEON_I8MM;
}
}
#endif
#if defined(HAS_ARGBTOUV444ROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
ARGBToUV444Row = ARGBToUV444Row_Any_LSX;
if (IS_ALIGNED(width, 16)) {
ARGBToUV444Row = ARGBToUV444Row_LSX;
}
}
#endif
#if defined(HAS_ARGBTOUV444ROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToUV444Row = ARGBToUV444Row_Any_LASX;
if (IS_ALIGNED(width, 32)) {
ARGBToUV444Row = ARGBToUV444Row_LASX;
}
}
#endif
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOYROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToYRow = ARGBToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToYRow = ARGBToYRow_AVX2;
}
}
#endif
#if defined(HAS_ARGBTOYROW_AVX512BW)
if (TestCpuFlag(kCpuHasAVX512BW)) {
ARGBToYRow = ARGBToYRow_Any_AVX512BW;
if (IS_ALIGNED(width, 64)) {
ARGBToYRow = ARGBToYRow_AVX512BW;
}
}
#endif
#if defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_NEON;
}
}
#endif
#if defined(HAS_ARGBTOYROW_NEON_DOTPROD)
if (TestCpuFlag(kCpuHasNeonDotProd)) {
ARGBToYRow = ARGBToYRow_Any_NEON_DotProd;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_NEON_DotProd;
}
}
#endif
#if defined(HAS_ARGBTOYROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
ARGBToYRow = ARGBToYRow_Any_LSX;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_LSX;
}
}
#endif
#if defined(HAS_ARGBTOYROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToYRow = ARGBToYRow_Any_LASX;
if (IS_ALIGNED(width, 32)) {
ARGBToYRow = ARGBToYRow_LASX;
}
}
#endif
#if defined(HAS_ARGBTOYROW_RVV)
if (TestCpuFlag(kCpuHasRVV)) {
ARGBToYRow = ARGBToYRow_RVV;
}
#endif
for (y = 0; y < height; ++y) {
ARGBToUV444Row(src_argb, dst_u, dst_v, width);
ARGBToYRow(src_argb, dst_y, width);
src_argb += src_stride_argb;
dst_y += dst_stride_y;
dst_u += dst_stride_u;
dst_v += dst_stride_v;
}
return 0;
return ARGBToI444Matrix(src_argb, src_stride_argb, dst_y, dst_stride_y, dst_u,
dst_stride_u, dst_v, dst_stride_v,
&kArgbI601Constants, width, height);
}
LIBYUV_API
@ -329,167 +185,9 @@ int ARGBToI422(const uint8_t* src_argb,
int dst_stride_v,
int width,
int height) {
int y;
void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb,
uint8_t* dst_u, uint8_t* dst_v, int width) =
ARGBToUVRow_C;
void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
ARGBToYRow_C;
if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
// Coalesce rows.
if (src_stride_argb == width * 4 && dst_stride_y == width &&
dst_stride_u * 2 == width && dst_stride_v * 2 == width) {
width *= height;
height = 1;
src_stride_argb = dst_stride_y = dst_stride_u = dst_stride_v = 0;
}
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOYROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToYRow = ARGBToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToYRow = ARGBToYRow_AVX2;
}
}
#endif
#if defined(HAS_ARGBTOYROW_AVX512BW)
if (TestCpuFlag(kCpuHasAVX512BW)) {
ARGBToYRow = ARGBToYRow_Any_AVX512BW;
if (IS_ALIGNED(width, 64)) {
ARGBToYRow = ARGBToYRow_AVX512BW;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToUVRow = ARGBToUVRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToUVRow = ARGBToUVRow_AVX2;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_AVX512BW)
if (TestCpuFlag(kCpuHasAVX512BW)) {
ARGBToUVRow = ARGBToUVRow_Any_AVX512BW;
if (IS_ALIGNED(width, 64)) {
ARGBToUVRow = ARGBToUVRow_AVX512BW;
}
}
#endif
#if defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_NEON;
}
}
#endif
#if defined(HAS_ARGBTOYROW_NEON_DOTPROD)
if (TestCpuFlag(kCpuHasNeonDotProd)) {
ARGBToYRow = ARGBToYRow_Any_NEON_DotProd;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_NEON_DotProd;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToUVRow = ARGBToUVRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_NEON;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_NEON_I8MM)
if (TestCpuFlag(kCpuHasNeonI8MM)) {
ARGBToUVRow = ARGBToUVRow_Any_NEON_I8MM;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_NEON_I8MM;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_SVE2)
if (TestCpuFlag(kCpuHasSVE2)) {
ARGBToUVRow = ARGBToUVRow_Any_SVE2;
if (IS_ALIGNED(width, 2)) {
ARGBToUVRow = ARGBToUVRow_SVE2;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_SME)
if (TestCpuFlag(kCpuHasSME)) {
ARGBToUVRow = ARGBToUVRow_Any_SME;
if (IS_ALIGNED(width, 2)) {
ARGBToUVRow = ARGBToUVRow_SME;
}
}
#endif
#if defined(HAS_ARGBTOYROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
ARGBToYRow = ARGBToYRow_Any_LSX;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_LSX;
}
}
#endif
#if defined(HAS_ARGBTOYROW_LSX) && defined(HAS_ARGBTOUVROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
ARGBToYRow = ARGBToYRow_Any_LSX;
ARGBToUVRow = ARGBToUVRow_Any_LSX;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_LSX;
ARGBToUVRow = ARGBToUVRow_LSX;
}
}
#endif
#if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToYRow = ARGBToYRow_Any_LASX;
ARGBToUVRow = ARGBToUVRow_Any_LASX;
if (IS_ALIGNED(width, 32)) {
ARGBToYRow = ARGBToYRow_LASX;
ARGBToUVRow = ARGBToUVRow_LASX;
}
}
#endif
#if defined(HAS_ARGBTOYROW_RVV)
if (TestCpuFlag(kCpuHasRVV)) {
ARGBToYRow = ARGBToYRow_RVV;
}
#endif
for (y = 0; y < height; ++y) {
ARGBToUVRow(src_argb, 0, dst_u, dst_v, width);
ARGBToYRow(src_argb, dst_y, width);
src_argb += src_stride_argb;
dst_y += dst_stride_y;
dst_u += dst_stride_u;
dst_v += dst_stride_v;
}
return 0;
return ARGBToI422Matrix(src_argb, src_stride_argb, dst_y, dst_stride_y, dst_u,
dst_stride_u, dst_v, dst_stride_v,
&kArgbI601Constants, width, height);
}
LIBYUV_API
@ -619,6 +317,14 @@ ARGBToUVMatrixRow_C;
ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX2;
}
}
#endif
#if defined(HAS_ARGBTOUVMATRIXROW_AVX512BW)
if (TestCpuFlag(kCpuHasAVX512BW)) {
ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX512BW;
if (IS_ALIGNED(width, 32)) {
ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX512BW;
}
}
#endif
if (!src_argb || !dst_y || !dst_u || !dst_v || !argbconstants || width <= 0 ||
height == 0) {
@ -651,217 +357,9 @@ int ARGBToNV12(const uint8_t* src_argb,
int dst_stride_uv,
int width,
int height) {
int y;
int halfwidth = (width + 1) >> 1;
void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb,
uint8_t* dst_u, uint8_t* dst_v, int width) =
ARGBToUVRow_C;
void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
ARGBToYRow_C;
void (*MergeUVRow)(const uint8_t* src_u, const uint8_t* src_v,
uint8_t* dst_uv, int width) = MergeUVRow_C;
if (!src_argb || !dst_y || !dst_uv || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
#if defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_NEON;
}
}
#endif
#if defined(HAS_ARGBTOYROW_NEON_DOTPROD)
if (TestCpuFlag(kCpuHasNeonDotProd)) {
ARGBToYRow = ARGBToYRow_Any_NEON_DotProd;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_NEON_DotProd;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToUVRow = ARGBToUVRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_NEON;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_NEON_I8MM)
if (TestCpuFlag(kCpuHasNeonI8MM)) {
ARGBToUVRow = ARGBToUVRow_Any_NEON_I8MM;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_NEON_I8MM;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_SVE2)
if (TestCpuFlag(kCpuHasSVE2)) {
ARGBToUVRow = ARGBToUVRow_Any_SVE2;
if (IS_ALIGNED(width, 2)) {
ARGBToUVRow = ARGBToUVRow_SVE2;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_SME)
if (TestCpuFlag(kCpuHasSME)) {
ARGBToUVRow = ARGBToUVRow_Any_SME;
if (IS_ALIGNED(width, 2)) {
ARGBToUVRow = ARGBToUVRow_SME;
}
}
#endif
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOYROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToYRow = ARGBToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToYRow = ARGBToYRow_AVX2;
}
}
#endif
#if defined(HAS_ARGBTOYROW_AVX512BW)
if (TestCpuFlag(kCpuHasAVX512BW)) {
ARGBToYRow = ARGBToYRow_Any_AVX512BW;
if (IS_ALIGNED(width, 64)) {
ARGBToYRow = ARGBToYRow_AVX512BW;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToUVRow = ARGBToUVRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToUVRow = ARGBToUVRow_AVX2;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_AVX512BW)
if (TestCpuFlag(kCpuHasAVX512BW)) {
ARGBToUVRow = ARGBToUVRow_Any_AVX512BW;
if (IS_ALIGNED(width, 64)) {
ARGBToUVRow = ARGBToUVRow_AVX512BW;
}
}
#endif
#if defined(HAS_ARGBTOYROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
ARGBToYRow = ARGBToYRow_Any_LSX;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_LSX;
}
}
#endif
#if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToYRow = ARGBToYRow_Any_LASX;
ARGBToUVRow = ARGBToUVRow_Any_LASX;
if (IS_ALIGNED(width, 32)) {
ARGBToYRow = ARGBToYRow_LASX;
ARGBToUVRow = ARGBToUVRow_LASX;
}
}
#endif
#if defined(HAS_ARGBTOYROW_RVV)
if (TestCpuFlag(kCpuHasRVV)) {
ARGBToYRow = ARGBToYRow_RVV;
}
#endif
#if defined(HAS_MERGEUVROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
MergeUVRow = MergeUVRow_Any_SSE2;
if (IS_ALIGNED(halfwidth, 16)) {
MergeUVRow = MergeUVRow_SSE2;
}
}
#endif
#if defined(HAS_MERGEUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
MergeUVRow = MergeUVRow_Any_AVX2;
if (IS_ALIGNED(halfwidth, 32)) {
MergeUVRow = MergeUVRow_AVX2;
}
}
#endif
#if defined(HAS_MERGEUVROW_AVX512BW)
if (TestCpuFlag(kCpuHasAVX512BW)) {
MergeUVRow = MergeUVRow_Any_AVX512BW;
if (IS_ALIGNED(halfwidth, 32)) {
MergeUVRow = MergeUVRow_AVX512BW;
}
}
#endif
#if defined(HAS_MERGEUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
MergeUVRow = MergeUVRow_Any_NEON;
if (IS_ALIGNED(halfwidth, 16)) {
MergeUVRow = MergeUVRow_NEON;
}
}
#endif
#if defined(HAS_MERGEUVROW_SME)
if (TestCpuFlag(kCpuHasSME)) {
MergeUVRow = MergeUVRow_SME;
}
#endif
#if defined(HAS_MERGEUVROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
MergeUVRow = MergeUVRow_Any_LSX;
if (IS_ALIGNED(halfwidth, 16)) {
MergeUVRow = MergeUVRow_LSX;
}
}
#endif
#if defined(HAS_MERGEUVROW_RVV)
if (TestCpuFlag(kCpuHasRVV)) {
MergeUVRow = MergeUVRow_RVV;
}
#endif
{
// Allocate a rows of uv.
align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2);
uint8_t* row_v = row_u + ((halfwidth + 31) & ~31);
if (!row_u)
return 1;
for (y = 0; y < height - 1; y += 2) {
ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width);
MergeUVRow(row_u, row_v, dst_uv, halfwidth);
ARGBToYRow(src_argb, dst_y, width);
ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width);
src_argb += src_stride_argb * 2;
dst_y += dst_stride_y * 2;
dst_uv += dst_stride_uv;
}
if (height & 1) {
ARGBToUVRow(src_argb, 0, row_u, row_v, width);
MergeUVRow(row_u, row_v, dst_uv, halfwidth);
ARGBToYRow(src_argb, dst_y, width);
}
free_aligned_buffer_64(row_u);
}
return 0;
return ARGBToNV12Matrix(src_argb, src_stride_argb, dst_y, dst_stride_y,
dst_uv, dst_stride_uv, &kArgbI601Constants, width,
height);
}
LIBYUV_API
@ -990,6 +488,14 @@ ARGBToUVMatrixRow_C;
ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX2;
}
}
#endif
#if defined(HAS_ARGBTOUVMATRIXROW_AVX512BW)
if (TestCpuFlag(kCpuHasAVX512BW)) {
ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX512BW;
if (IS_ALIGNED(width, 32)) {
ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX512BW;
}
}
#endif
void (*MergeUVRow)(const uint8_t* src_u, const uint8_t* src_v,
uint8_t* dst_uv, int width) = MergeUVRow_C;