Add ARGBToUV444MatrixRow_RVV, ARGBToUVMatrixRow_RVV, and wrappers

This change implements ARGBToUV444MatrixRow_RVV, ARGBToUVMatrixRow_RVV,
and their wrappers (ARGBToUVRow_RVV, ARGBToUVJRow_RVV, etc.) using RVV
intrinsics, mirroring the NEON/AVX2 designs. It wires them into the
build and dispatch systems.

LIBYUV_RVV_HAS_TUPLE_TYPE is always true on new compilers. This macro
has been removed, assuming it is true everywhere, reducing the amount of
code in row_rvv.cc, scale_rvv.cc, and row.h.

Tested via: ~/bin/doyuv3v && ~/bin/runyuv3v TestARGBToI444Matrix
~/bin/doyuv3av

Bug: libyuv:42280902
Change-Id: I36d305386b297d69023c068aa9c62ab6b2ad039c
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/7769956
Reviewed-by: richard winterton <rrwinterton@gmail.com>
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
Frank Barchard 2026-04-16 17:09:38 -07:00 committed by libyuv-scoped@luci-project-accounts.iam.gserviceaccount.com
parent dec8272138
commit ace7c4573c
5 changed files with 357 additions and 1916 deletions

View File

@ -847,7 +847,6 @@ extern "C" {
#endif
#if defined(__riscv_v_intrinsic) && __riscv_v_intrinsic >= 12000
// Since v0.12, TUPLE_TYPE is introduced for segment load and store.
#define LIBYUV_RVV_HAS_TUPLE_TYPE
// Since v0.12, VXRM(fixed-point rounding mode) is included in arguments of
// fixed-point intrinsics.
#define LIBYUV_RVV_HAS_VXRM_ARG
@ -863,6 +862,12 @@ extern "C" {
#define HAS_ARGBTOAR64ROW_RVV
#define HAS_ARGBTOYJROW_RVV
#define HAS_ARGBTOYMATRIXROW_RVV
#define HAS_ARGBTOUV444MATRIXROW_RVV
#define HAS_ARGBTOUVMATRIXROW_RVV
#define HAS_ARGBTOUV444ROW_RVV
#define HAS_ARGBTOUVJ444ROW_RVV
#define HAS_ARGBTOUVROW_RVV
#define HAS_ARGBTOUVJROW_RVV
#define HAS_ARGBTOYROW_RVV
#define HAS_BGRATOYROW_RVV
#define HAS_COPYROW_RVV
@ -892,8 +897,7 @@ extern "C" {
// __riscv_vcreate_v_u8m2x3
// __riscv_vcreate_v_u8m2x4
// __riscv_vcreate_v_u8m4x2
#if !defined(LIBYUV_RVV_HAS_TUPLE_TYPE) || \
(defined(LIBYUV_RVV_HAS_TUPLE_TYPE) && defined(LIBYUV_RVV_HAS_VCREATE))
#ifdef LIBYUV_RVV_HAS_VCREATE
#define HAS_AB64TOARGBROW_RVV
#define HAS_AR64TOAB64ROW_RVV
#define HAS_ARGBATTENUATEROW_RVV
@ -1798,6 +1802,35 @@ void ABGRToYJRow_NEON_DotProd(const uint8_t* src_abgr,
void RGBAToYJRow_NEON_DotProd(const uint8_t* src_rgba,
uint8_t* dst_yj,
int width);
void ARGBToUV444MatrixRow_RVV(const uint8_t* src_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width,
const struct ArgbConstants* c);
void ARGBToUVMatrixRow_RVV(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width,
const struct ArgbConstants* c);
void ARGBToUV444Row_RVV(const uint8_t* src_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void ARGBToUVRow_RVV(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void ARGBToUVJ444Row_RVV(const uint8_t* src_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void ARGBToUVJRow_RVV(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void ARGBToYRow_RVV(const uint8_t* src_argb, uint8_t* dst_y, int width);
void ARGBToYJRow_RVV(const uint8_t* src_argb, uint8_t* dst_yj, int width);
void ABGRToYJRow_RVV(const uint8_t* src_abgr, uint8_t* dst_yj, int width);
@ -2143,6 +2176,10 @@ void ARGBToYMatrixRow_C(const uint8_t* src_argb,
uint8_t* dst_y,
int width,
const struct ArgbConstants* c);
void ARGBToYMatrixRow_RVV(const uint8_t* src_argb,
uint8_t* dst_y,
int width,
const struct ArgbConstants* c);
void ARGBToUVMatrixRow_C(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_u,

View File

@ -2187,6 +2187,11 @@ int ARGBToI420Matrix(const uint8_t* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOUVMATRIXROW_RVV)
if (TestCpuFlag(kCpuHasRVV)) {
ARGBToUVMatrixRow = ARGBToUVMatrixRow_RVV;
}
#endif
#if defined(HAS_ARGBTOUVMATRIXROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX2;
@ -2195,6 +2200,12 @@ int ARGBToI420Matrix(const uint8_t* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOYMATRIXROW_RVV)
if (TestCpuFlag(kCpuHasRVV)) {
ARGBToYMatrixRow = ARGBToYMatrixRow_RVV;
}
#endif
// TODO(fbarchard): add AVX512BW
#if defined(HAS_ARGBTOYMATRIXROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYMatrixRow = ARGBToYMatrixRow_Any_NEON;

View File

@ -108,6 +108,11 @@ int ARGBToI444(const uint8_t* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOUV444ROW_RVV)
if (TestCpuFlag(kCpuHasRVV)) {
ARGBToUV444Row = ARGBToUV444Row_RVV;
}
#endif
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYRow = ARGBToYRow_Any_SSSE3;
@ -200,6 +205,11 @@ int ARGBToI444Matrix(const uint8_t* src_argb,
uint8_t* dst_v, int width,
const struct ArgbConstants* c) =
ARGBToUV444MatrixRow_C;
#if defined(HAS_ARGBTOUV444MATRIXROW_RVV)
if (TestCpuFlag(kCpuHasRVV)) {
ARGBToUV444MatrixRow = ARGBToUV444MatrixRow_RVV;
}
#endif
#if defined(HAS_ARGBTOUV444MATRIXROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUV444MatrixRow = ARGBToUV444MatrixRow_Any_SSSE3;
@ -224,6 +234,12 @@ int ARGBToI444Matrix(const uint8_t* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOYMATRIXROW_RVV)
if (TestCpuFlag(kCpuHasRVV)) {
ARGBToYMatrixRow = ARGBToYMatrixRow_RVV;
}
#endif
// TODO(fbarchard): add AVX512BW
#if defined(HAS_ARGBTOYMATRIXROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYMatrixRow = ARGBToYMatrixRow_Any_NEON;
@ -360,6 +376,11 @@ int ARGBToI422(const uint8_t* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOUVROW_RVV)
if (TestCpuFlag(kCpuHasRVV)) {
ARGBToUVRow = ARGBToUVRow_RVV;
}
#endif
#if defined(HAS_ARGBTOUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToUVRow = ARGBToUVRow_Any_NEON;
@ -464,6 +485,11 @@ int ARGBToI422Matrix(const uint8_t* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOUVMATRIXROW_RVV)
if (TestCpuFlag(kCpuHasRVV)) {
ARGBToUVMatrixRow = ARGBToUVMatrixRow_RVV;
}
#endif
#if defined(HAS_ARGBTOUVMATRIXROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX2;
@ -472,6 +498,12 @@ int ARGBToI422Matrix(const uint8_t* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOYMATRIXROW_RVV)
if (TestCpuFlag(kCpuHasRVV)) {
ARGBToYMatrixRow = ARGBToYMatrixRow_RVV;
}
#endif
// TODO(fbarchard): add AVX512BW
#if defined(HAS_ARGBTOYMATRIXROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYMatrixRow = ARGBToYMatrixRow_Any_NEON;
@ -545,6 +577,11 @@ int ARGBToNV12(const uint8_t* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOUVROW_RVV)
if (TestCpuFlag(kCpuHasRVV)) {
ARGBToUVRow = ARGBToUVRow_RVV;
}
#endif
#if defined(HAS_ARGBTOUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToUVRow = ARGBToUVRow_Any_NEON;
@ -750,6 +787,11 @@ int ARGBToNV12Matrix(const uint8_t* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOUVMATRIXROW_RVV)
if (TestCpuFlag(kCpuHasRVV)) {
ARGBToUVMatrixRow = ARGBToUVMatrixRow_RVV;
}
#endif
#if defined(HAS_ARGBTOUVMATRIXROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX2;
@ -758,6 +800,12 @@ int ARGBToNV12Matrix(const uint8_t* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOYMATRIXROW_RVV)
if (TestCpuFlag(kCpuHasRVV)) {
ARGBToYMatrixRow = ARGBToYMatrixRow_RVV;
}
#endif
// TODO(fbarchard): add AVX512BW
#if defined(HAS_ARGBTOYMATRIXROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYMatrixRow = ARGBToYMatrixRow_Any_NEON;
@ -947,6 +995,11 @@ int ARGBToNV21(const uint8_t* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOUVROW_RVV)
if (TestCpuFlag(kCpuHasRVV)) {
ARGBToUVRow = ARGBToUVRow_RVV;
}
#endif
#if defined(HAS_ARGBTOUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToUVRow = ARGBToUVRow_Any_NEON;
@ -1618,6 +1671,11 @@ int ARGBToYUY2(const uint8_t* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOUVROW_RVV)
if (TestCpuFlag(kCpuHasRVV)) {
ARGBToUVRow = ARGBToUVRow_RVV;
}
#endif
#if defined(HAS_ARGBTOUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToUVRow = ARGBToUVRow_Any_NEON;
@ -1842,6 +1900,11 @@ int ARGBToUYVY(const uint8_t* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOUVROW_RVV)
if (TestCpuFlag(kCpuHasRVV)) {
ARGBToUVRow = ARGBToUVRow_RVV;
}
#endif
#if defined(HAS_ARGBTOUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToUVRow = ARGBToUVRow_Any_NEON;
@ -2814,6 +2877,11 @@ int ARGBToJ444(const uint8_t* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOUVJ444ROW_RVV)
if (TestCpuFlag(kCpuHasRVV)) {
ARGBToUVJ444Row = ARGBToUVJ444Row_RVV;
}
#endif
#if defined(HAS_ARGBTOYJROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
@ -2930,6 +2998,11 @@ int ARGBToJ420(const uint8_t* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOUVJROW_RVV)
if (TestCpuFlag(kCpuHasRVV)) {
ARGBToUVJRow = ARGBToUVJRow_RVV;
}
#endif
#if defined(HAS_ARGBTOUVJROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToUVJRow = ARGBToUVJRow_Any_NEON;
@ -3134,6 +3207,11 @@ int ARGBToJ422(const uint8_t* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOUVJROW_RVV)
if (TestCpuFlag(kCpuHasRVV)) {
ARGBToUVJRow = ARGBToUVJRow_RVV;
}
#endif
#if defined(HAS_ARGBTOUVJROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToUVJRow = ARGBToUVJRow_Any_NEON;

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff