From ace7c4573c625af0046edf6ebb4f1956fd220bec Mon Sep 17 00:00:00 2001
From: Frank Barchard <fbarchard@google.com>
Date: Thu, 16 Apr 2026 17:09:38 -0700
Subject: [PATCH] Add ARGBToUV444MatrixRow_RVV, ARGBToUVMatrixRow_RVV, and
 wrappers

This change implements ARGBToUV444MatrixRow_RVV, ARGBToUVMatrixRow_RVV,
and their wrappers (ARGBToUVRow_RVV, ARGBToUVJRow_RVV, etc.) using RVV
intrinsics, mirroring the NEON/AVX2 designs. It wires them into the
build and dispatch systems.

LIBYUV_RVV_HAS_TUPLE_TYPE is always true on new compilers. This macro
has been removed, assuming it is true everywhere, reducing the amount of
code in row_rvv.cc, scale_rvv.cc, and row.h.

Tested via: ~/bin/doyuv3v && ~/bin/runyuv3v TestARGBToI444Matrix
~/bin/doyuv3av

Bug: libyuv:42280902
Change-Id: I36d305386b297d69023c068aa9c62ab6b2ad039c
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/7769956
Reviewed-by: richard winterton <rrwinterton@gmail.com>
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
---
 include/libyuv/row.h        |   43 +-
 source/convert.cc           |   11 +
 source/convert_from_argb.cc |   78 +++
 source/row_rvv.cc           | 1296 ++++++-----------------------------
 source/scale_rvv.cc         |  845 +----------------------
 5 files changed, 357 insertions(+), 1916 deletions(-)

diff --git a/include/libyuv/row.h b/include/libyuv/row.h
index b47d42eed..0ee5598cc 100644
--- a/include/libyuv/row.h
+++ b/include/libyuv/row.h
@@ -847,7 +847,6 @@ extern "C" {
 #endif
 #if defined(__riscv_v_intrinsic) && __riscv_v_intrinsic >= 12000
 // Since v0.12, TUPLE_TYPE is introduced for segment load and store.
-#define LIBYUV_RVV_HAS_TUPLE_TYPE
 // Since v0.12, VXRM(fixed-point rounding mode) is included in arguments of
 // fixed-point intrinsics.
 #define LIBYUV_RVV_HAS_VXRM_ARG
@@ -863,6 +862,12 @@ extern "C" {
 #define HAS_ARGBTOAR64ROW_RVV
 #define HAS_ARGBTOYJROW_RVV
 #define HAS_ARGBTOYMATRIXROW_RVV
+#define HAS_ARGBTOUV444MATRIXROW_RVV
+#define HAS_ARGBTOUVMATRIXROW_RVV
+#define HAS_ARGBTOUV444ROW_RVV
+#define HAS_ARGBTOUVJ444ROW_RVV
+#define HAS_ARGBTOUVROW_RVV
+#define HAS_ARGBTOUVJROW_RVV
 #define HAS_ARGBTOYROW_RVV
 #define HAS_BGRATOYROW_RVV
 #define HAS_COPYROW_RVV
@@ -892,8 +897,7 @@ extern "C" {
 //  __riscv_vcreate_v_u8m2x3
 //  __riscv_vcreate_v_u8m2x4
 //  __riscv_vcreate_v_u8m4x2
-#if !defined(LIBYUV_RVV_HAS_TUPLE_TYPE) || \
-    (defined(LIBYUV_RVV_HAS_TUPLE_TYPE) && defined(LIBYUV_RVV_HAS_VCREATE))
+#ifdef LIBYUV_RVV_HAS_VCREATE
 #define HAS_AB64TOARGBROW_RVV
 #define HAS_AR64TOAB64ROW_RVV
 #define HAS_ARGBATTENUATEROW_RVV
@@ -1798,6 +1802,35 @@ void ABGRToYJRow_NEON_DotProd(const uint8_t* src_abgr,
 void RGBAToYJRow_NEON_DotProd(const uint8_t* src_rgba,
                               uint8_t* dst_yj,
                               int width);
+void ARGBToUV444MatrixRow_RVV(const uint8_t* src_argb,
+                              uint8_t* dst_u,
+                              uint8_t* dst_v,
+                              int width,
+                              const struct ArgbConstants* c);
+void ARGBToUVMatrixRow_RVV(const uint8_t* src_argb,
+                           int src_stride_argb,
+                           uint8_t* dst_u,
+                           uint8_t* dst_v,
+                           int width,
+                           const struct ArgbConstants* c);
+void ARGBToUV444Row_RVV(const uint8_t* src_argb,
+                        uint8_t* dst_u,
+                        uint8_t* dst_v,
+                        int width);
+void ARGBToUVRow_RVV(const uint8_t* src_argb,
+                     int src_stride_argb,
+                     uint8_t* dst_u,
+                     uint8_t* dst_v,
+                     int width);
+void ARGBToUVJ444Row_RVV(const uint8_t* src_argb,
+                         uint8_t* dst_u,
+                         uint8_t* dst_v,
+                         int width);
+void ARGBToUVJRow_RVV(const uint8_t* src_argb,
+                      int src_stride_argb,
+                      uint8_t* dst_u,
+                      uint8_t* dst_v,
+                      int width);
 void ARGBToYRow_RVV(const uint8_t* src_argb, uint8_t* dst_y, int width);
 void ARGBToYJRow_RVV(const uint8_t* src_argb, uint8_t* dst_yj, int width);
 void ABGRToYJRow_RVV(const uint8_t* src_abgr, uint8_t* dst_yj, int width);
@@ -2143,6 +2176,10 @@ void ARGBToYMatrixRow_C(const uint8_t* src_argb,
                         uint8_t* dst_y,
                         int width,
                         const struct ArgbConstants* c);
+void ARGBToYMatrixRow_RVV(const uint8_t* src_argb,
+                          uint8_t* dst_y,
+                          int width,
+                          const struct ArgbConstants* c);
 void ARGBToUVMatrixRow_C(const uint8_t* src_argb,
                          int src_stride_argb,
                          uint8_t* dst_u,
diff --git a/source/convert.cc b/source/convert.cc
index 07a58f602..79c1e16d6 100644
--- a/source/convert.cc
+++ b/source/convert.cc
@@ -2187,6 +2187,11 @@ int ARGBToI420Matrix(const uint8_t* src_argb,
     }
   }
 #endif
+#if defined(HAS_ARGBTOUVMATRIXROW_RVV)
+  if (TestCpuFlag(kCpuHasRVV)) {
+    ARGBToUVMatrixRow = ARGBToUVMatrixRow_RVV;
+  }
+#endif
 #if defined(HAS_ARGBTOUVMATRIXROW_AVX2)
   if (TestCpuFlag(kCpuHasAVX2)) {
     ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX2;
@@ -2195,6 +2200,12 @@ int ARGBToI420Matrix(const uint8_t* src_argb,
     }
   }
 #endif
+#if defined(HAS_ARGBTOYMATRIXROW_RVV)
+  if (TestCpuFlag(kCpuHasRVV)) {
+    ARGBToYMatrixRow = ARGBToYMatrixRow_RVV;
+  }
+#endif
+// TODO(fbarchard): add AVX512BW
 #if defined(HAS_ARGBTOYMATRIXROW_NEON)
   if (TestCpuFlag(kCpuHasNEON)) {
     ARGBToYMatrixRow = ARGBToYMatrixRow_Any_NEON;
diff --git a/source/convert_from_argb.cc b/source/convert_from_argb.cc
index 7f7be08ea..9428f1439 100644
--- a/source/convert_from_argb.cc
+++ b/source/convert_from_argb.cc
@@ -108,6 +108,11 @@ int ARGBToI444(const uint8_t* src_argb,
     }
   }
 #endif
+#if defined(HAS_ARGBTOUV444ROW_RVV)
+  if (TestCpuFlag(kCpuHasRVV)) {
+    ARGBToUV444Row = ARGBToUV444Row_RVV;
+  }
+#endif
 #if defined(HAS_ARGBTOYROW_SSSE3)
   if (TestCpuFlag(kCpuHasSSSE3)) {
     ARGBToYRow = ARGBToYRow_Any_SSSE3;
@@ -200,6 +205,11 @@ int ARGBToI444Matrix(const uint8_t* src_argb,
                                uint8_t* dst_v, int width,
                                const struct ArgbConstants* c) =
       ARGBToUV444MatrixRow_C;
+#if defined(HAS_ARGBTOUV444MATRIXROW_RVV)
+  if (TestCpuFlag(kCpuHasRVV)) {
+    ARGBToUV444MatrixRow = ARGBToUV444MatrixRow_RVV;
+  }
+#endif
 #if defined(HAS_ARGBTOUV444MATRIXROW_SSSE3)
   if (TestCpuFlag(kCpuHasSSSE3)) {
     ARGBToUV444MatrixRow = ARGBToUV444MatrixRow_Any_SSSE3;
@@ -224,6 +234,12 @@ int ARGBToI444Matrix(const uint8_t* src_argb,
     }
   }
 #endif
+#if defined(HAS_ARGBTOYMATRIXROW_RVV)
+  if (TestCpuFlag(kCpuHasRVV)) {
+    ARGBToYMatrixRow = ARGBToYMatrixRow_RVV;
+  }
+#endif
+// TODO(fbarchard): add AVX512BW
 #if defined(HAS_ARGBTOYMATRIXROW_NEON)
   if (TestCpuFlag(kCpuHasNEON)) {
     ARGBToYMatrixRow = ARGBToYMatrixRow_Any_NEON;
@@ -360,6 +376,11 @@ int ARGBToI422(const uint8_t* src_argb,
     }
   }
 #endif
+#if defined(HAS_ARGBTOUVROW_RVV)
+  if (TestCpuFlag(kCpuHasRVV)) {
+    ARGBToUVRow = ARGBToUVRow_RVV;
+  }
+#endif
 #if defined(HAS_ARGBTOUVROW_NEON)
   if (TestCpuFlag(kCpuHasNEON)) {
     ARGBToUVRow = ARGBToUVRow_Any_NEON;
@@ -464,6 +485,11 @@ int ARGBToI422Matrix(const uint8_t* src_argb,
     }
   }
 #endif
+#if defined(HAS_ARGBTOUVMATRIXROW_RVV)
+  if (TestCpuFlag(kCpuHasRVV)) {
+    ARGBToUVMatrixRow = ARGBToUVMatrixRow_RVV;
+  }
+#endif
 #if defined(HAS_ARGBTOUVMATRIXROW_AVX2)
   if (TestCpuFlag(kCpuHasAVX2)) {
     ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX2;
@@ -472,6 +498,12 @@ int ARGBToI422Matrix(const uint8_t* src_argb,
     }
   }
 #endif
+#if defined(HAS_ARGBTOYMATRIXROW_RVV)
+  if (TestCpuFlag(kCpuHasRVV)) {
+    ARGBToYMatrixRow = ARGBToYMatrixRow_RVV;
+  }
+#endif
+// TODO(fbarchard): add AVX512BW
 #if defined(HAS_ARGBTOYMATRIXROW_NEON)
   if (TestCpuFlag(kCpuHasNEON)) {
     ARGBToYMatrixRow = ARGBToYMatrixRow_Any_NEON;
@@ -545,6 +577,11 @@ int ARGBToNV12(const uint8_t* src_argb,
     }
   }
 #endif
+#if defined(HAS_ARGBTOUVROW_RVV)
+  if (TestCpuFlag(kCpuHasRVV)) {
+    ARGBToUVRow = ARGBToUVRow_RVV;
+  }
+#endif
 #if defined(HAS_ARGBTOUVROW_NEON)
   if (TestCpuFlag(kCpuHasNEON)) {
     ARGBToUVRow = ARGBToUVRow_Any_NEON;
@@ -750,6 +787,11 @@ int ARGBToNV12Matrix(const uint8_t* src_argb,
     }
   }
 #endif
+#if defined(HAS_ARGBTOUVMATRIXROW_RVV)
+  if (TestCpuFlag(kCpuHasRVV)) {
+    ARGBToUVMatrixRow = ARGBToUVMatrixRow_RVV;
+  }
+#endif
 #if defined(HAS_ARGBTOUVMATRIXROW_AVX2)
   if (TestCpuFlag(kCpuHasAVX2)) {
     ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX2;
@@ -758,6 +800,12 @@ int ARGBToNV12Matrix(const uint8_t* src_argb,
     }
   }
 #endif
+#if defined(HAS_ARGBTOYMATRIXROW_RVV)
+  if (TestCpuFlag(kCpuHasRVV)) {
+    ARGBToYMatrixRow = ARGBToYMatrixRow_RVV;
+  }
+#endif
+// TODO(fbarchard): add AVX512BW
 #if defined(HAS_ARGBTOYMATRIXROW_NEON)
   if (TestCpuFlag(kCpuHasNEON)) {
     ARGBToYMatrixRow = ARGBToYMatrixRow_Any_NEON;
@@ -947,6 +995,11 @@ int ARGBToNV21(const uint8_t* src_argb,
     }
   }
 #endif
+#if defined(HAS_ARGBTOUVROW_RVV)
+  if (TestCpuFlag(kCpuHasRVV)) {
+    ARGBToUVRow = ARGBToUVRow_RVV;
+  }
+#endif
 #if defined(HAS_ARGBTOUVROW_NEON)
   if (TestCpuFlag(kCpuHasNEON)) {
     ARGBToUVRow = ARGBToUVRow_Any_NEON;
@@ -1618,6 +1671,11 @@ int ARGBToYUY2(const uint8_t* src_argb,
     }
   }
 #endif
+#if defined(HAS_ARGBTOUVROW_RVV)
+  if (TestCpuFlag(kCpuHasRVV)) {
+    ARGBToUVRow = ARGBToUVRow_RVV;
+  }
+#endif
 #if defined(HAS_ARGBTOUVROW_NEON)
   if (TestCpuFlag(kCpuHasNEON)) {
     ARGBToUVRow = ARGBToUVRow_Any_NEON;
@@ -1842,6 +1900,11 @@ int ARGBToUYVY(const uint8_t* src_argb,
     }
   }
 #endif
+#if defined(HAS_ARGBTOUVROW_RVV)
+  if (TestCpuFlag(kCpuHasRVV)) {
+    ARGBToUVRow = ARGBToUVRow_RVV;
+  }
+#endif
 #if defined(HAS_ARGBTOUVROW_NEON)
   if (TestCpuFlag(kCpuHasNEON)) {
     ARGBToUVRow = ARGBToUVRow_Any_NEON;
@@ -2814,6 +2877,11 @@ int ARGBToJ444(const uint8_t* src_argb,
     }
   }
 #endif
+#if defined(HAS_ARGBTOUVJ444ROW_RVV)
+  if (TestCpuFlag(kCpuHasRVV)) {
+    ARGBToUVJ444Row = ARGBToUVJ444Row_RVV;
+  }
+#endif
 #if defined(HAS_ARGBTOYJROW_SSSE3)
   if (TestCpuFlag(kCpuHasSSSE3)) {
     ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
@@ -2930,6 +2998,11 @@ int ARGBToJ420(const uint8_t* src_argb,
     }
   }
 #endif
+#if defined(HAS_ARGBTOUVJROW_RVV)
+  if (TestCpuFlag(kCpuHasRVV)) {
+    ARGBToUVJRow = ARGBToUVJRow_RVV;
+  }
+#endif
 #if defined(HAS_ARGBTOUVJROW_NEON)
   if (TestCpuFlag(kCpuHasNEON)) {
     ARGBToUVJRow = ARGBToUVJRow_Any_NEON;
@@ -3134,6 +3207,11 @@ int ARGBToJ422(const uint8_t* src_argb,
     }
   }
 #endif
+#if defined(HAS_ARGBTOUVJROW_RVV)
+  if (TestCpuFlag(kCpuHasRVV)) {
+    ARGBToUVJRow = ARGBToUVJRow_RVV;
+  }
+#endif
 #if defined(HAS_ARGBTOUVJROW_NEON)
   if (TestCpuFlag(kCpuHasNEON)) {
     ARGBToUVJRow = ARGBToUVJRow_Any_NEON;
diff --git a/source/row_rvv.cc b/source/row_rvv.cc
index 0bdcd879b..84ea7a843 100644
--- a/source/row_rvv.cc
+++ b/source/row_rvv.cc
@@ -16,6 +16,7 @@
  */
 
 #include "libyuv/row.h"
+#include "libyuv/convert_from_argb.h"
 
 // This module is for RVV (RISC-V Vector extension)
 #if !defined(LIBYUV_DISABLE_RVV) && defined(__riscv_vector)
@@ -126,7 +127,6 @@ extern "C" {
   }
 #endif
 
-#ifdef LIBYUV_RVV_HAS_TUPLE_TYPE
 // Read [2*VLEN/8] Y from src_y; Read [VLEN/8] U and [VLEN/8] V from src_uv
 #define READNV12(vl, w, src_y, src_uv, v_u, v_v, v_y_16) \
   {                                                      \
@@ -170,45 +170,6 @@ extern "C" {
     v_y = __riscv_vle8_v_u8m2(src_y, vl);                \
     v_y_16 = __riscv_vwaddu_vx_u16m4(v_y, 0, vl);        \
   }
-#else
-// Read [2*VLEN/8] Y from src_y; Read [VLEN/8] U and [VLEN/8] V from src_uv
-#define READNV12(vl, w, src_y, src_uv, v_u, v_v, v_y_16)   \
-  {                                                        \
-    vuint8m1_t v_tmp0, v_tmp1;                             \
-    vuint8m2_t v_y;                                        \
-    vuint16m2_t v_u_16, v_v_16;                            \
-    vl = __riscv_vsetvl_e8m1((w + 1) / 2);                 \
-    __riscv_vlseg2e8_v_u8m1(&v_tmp0, &v_tmp1, src_uv, vl); \
-    v_u_16 = __riscv_vwaddu_vx_u16m2(v_tmp0, 0, vl);       \
-    v_v_16 = __riscv_vwaddu_vx_u16m2(v_tmp1, 0, vl);       \
-    v_v_16 = __riscv_vmul_vx_u16m2(v_v_16, 0x0101, vl);    \
-    v_u_16 = __riscv_vmul_vx_u16m2(v_u_16, 0x0101, vl);    \
-    v_v = __riscv_vreinterpret_v_u16m2_u8m2(v_v_16);       \
-    v_u = __riscv_vreinterpret_v_u16m2_u8m2(v_u_16);       \
-    vl = __riscv_vsetvl_e8m2(w);                           \
-    v_y = __riscv_vle8_v_u8m2(src_y, vl);                  \
-    v_y_16 = __riscv_vwaddu_vx_u16m4(v_y, 0, vl);          \
-  }
-
-// Read 2*[VLEN/8] Y from src_y; Read [VLEN/8] U and [VLEN/8] V from src_vu
-#define READNV21(vl, w, src_y, src_vu, v_u, v_v, v_y_16)   \
-  {                                                        \
-    vuint8m1_t v_tmp0, v_tmp1;                             \
-    vuint8m2_t v_y;                                        \
-    vuint16m2_t v_u_16, v_v_16;                            \
-    vl = __riscv_vsetvl_e8m1((w + 1) / 2);                 \
-    __riscv_vlseg2e8_v_u8m1(&v_tmp0, &v_tmp1, src_vu, vl); \
-    v_u_16 = __riscv_vwaddu_vx_u16m2(v_tmp1, 0, vl);       \
-    v_v_16 = __riscv_vwaddu_vx_u16m2(v_tmp0, 0, vl);       \
-    v_v_16 = __riscv_vmul_vx_u16m2(v_v_16, 0x0101, vl);    \
-    v_u_16 = __riscv_vmul_vx_u16m2(v_u_16, 0x0101, vl);    \
-    v_v = __riscv_vreinterpret_v_u16m2_u8m2(v_v_16);       \
-    v_u = __riscv_vreinterpret_v_u16m2_u8m2(v_u_16);       \
-    vl = __riscv_vsetvl_e8m2(w);                           \
-    v_y = __riscv_vle8_v_u8m2(src_y, vl);                  \
-    v_y_16 = __riscv_vwaddu_vx_u16m4(v_y, 0, vl);          \
-  }
-#endif
 
 #ifdef HAS_ARGBTOAR64ROW_RVV
 void ARGBToAR64Row_RVV(const uint8_t* src_argb, uint16_t* dst_ar64, int width) {
@@ -229,7 +190,6 @@ void ARGBToAR64Row_RVV(const uint8_t* src_argb, uint16_t* dst_ar64, int width) {
 #endif
 
 #ifdef HAS_ARGBTOAB64ROW_RVV
-#ifdef LIBYUV_RVV_HAS_TUPLE_TYPE
 void ARGBToAB64Row_RVV(const uint8_t* src_argb, uint16_t* dst_ab64, int width) {
   size_t avl = (size_t)width;
   do {
@@ -256,29 +216,6 @@ void ARGBToAB64Row_RVV(const uint8_t* src_argb, uint16_t* dst_ab64, int width) {
     dst_ab64 += 4 * vl;
   } while (avl > 0);
 }
-#else
-void ARGBToAB64Row_RVV(const uint8_t* src_argb, uint16_t* dst_ab64, int width) {
-  size_t avl = (size_t)width;
-  do {
-    vuint16m2_t v_b_16, v_g_16, v_r_16, v_a_16;
-    vuint8m1_t v_b, v_g, v_r, v_a;
-    size_t vl = __riscv_vsetvl_e8m1(avl);
-    __riscv_vlseg4e8_v_u8m1(&v_b, &v_g, &v_r, &v_a, src_argb, vl);
-    v_b_16 = __riscv_vwaddu_vx_u16m2(v_b, 0, vl);
-    v_g_16 = __riscv_vwaddu_vx_u16m2(v_g, 0, vl);
-    v_r_16 = __riscv_vwaddu_vx_u16m2(v_r, 0, vl);
-    v_a_16 = __riscv_vwaddu_vx_u16m2(v_a, 0, vl);
-    v_b_16 = __riscv_vmul_vx_u16m2(v_b_16, 0x0101, vl);
-    v_g_16 = __riscv_vmul_vx_u16m2(v_g_16, 0x0101, vl);
-    v_r_16 = __riscv_vmul_vx_u16m2(v_r_16, 0x0101, vl);
-    v_a_16 = __riscv_vmul_vx_u16m2(v_a_16, 0x0101, vl);
-    __riscv_vsseg4e16_v_u16m2(dst_ab64, v_r_16, v_g_16, v_b_16, v_a_16, vl);
-    avl -= vl;
-    src_argb += 4 * vl;
-    dst_ab64 += 4 * vl;
-  } while (avl > 0);
-}
-#endif
 #endif
 
 #ifdef HAS_AR64TOARGBROW_RVV
@@ -299,7 +236,6 @@ void AR64ToARGBRow_RVV(const uint16_t* src_ar64, uint8_t* dst_argb, int width) {
 #endif
 
 #ifdef HAS_AR64TOAB64ROW_RVV
-#ifdef LIBYUV_RVV_HAS_TUPLE_TYPE
 void AR64ToAB64Row_RVV(const uint16_t* src_ar64,
                        uint16_t* dst_ab64,
                        int width) {
@@ -318,26 +254,9 @@ void AR64ToAB64Row_RVV(const uint16_t* src_ar64,
     dst_ab64 += vl * 4;
   } while (w > 0);
 }
-#else
-void AR64ToAB64Row_RVV(const uint16_t* src_ar64,
-                       uint16_t* dst_ab64,
-                       int width) {
-  size_t w = (size_t)width;
-  do {
-    size_t vl = __riscv_vsetvl_e16m2(w);
-    vuint16m2_t v_b, v_g, v_r, v_a;
-    __riscv_vlseg4e16_v_u16m2(&v_b, &v_g, &v_r, &v_a, src_ar64, vl);
-    __riscv_vsseg4e16_v_u16m2(dst_ab64, v_r, v_g, v_b, v_a, vl);
-    w -= vl;
-    src_ar64 += vl * 4;
-    dst_ab64 += vl * 4;
-  } while (w > 0);
-}
-#endif
 #endif
 
 #ifdef HAS_AB64TOARGBROW_RVV
-#ifdef LIBYUV_RVV_HAS_TUPLE_TYPE
 void AB64ToARGBRow_RVV(const uint16_t* src_ab64, uint8_t* dst_argb, int width) {
   size_t avl = (size_t)width;
   do {
@@ -358,29 +277,9 @@ void AB64ToARGBRow_RVV(const uint16_t* src_ab64, uint8_t* dst_argb, int width) {
     dst_argb += 4 * vl;
   } while (avl > 0);
 }
-#else
-void AB64ToARGBRow_RVV(const uint16_t* src_ab64, uint8_t* dst_argb, int width) {
-  size_t avl = (size_t)width;
-  do {
-    vuint16m2_t v_b_16, v_g_16, v_r_16, v_a_16;
-    vuint8m1_t v_b, v_g, v_r, v_a;
-    size_t vl = __riscv_vsetvl_e16m2(avl);
-    __riscv_vlseg4e16_v_u16m2(&v_r_16, &v_g_16, &v_b_16, &v_a_16, src_ab64, vl);
-    v_b = __riscv_vnsrl_wx_u8m1(v_b_16, 8, vl);
-    v_g = __riscv_vnsrl_wx_u8m1(v_g_16, 8, vl);
-    v_r = __riscv_vnsrl_wx_u8m1(v_r_16, 8, vl);
-    v_a = __riscv_vnsrl_wx_u8m1(v_a_16, 8, vl);
-    __riscv_vsseg4e8_v_u8m1(dst_argb, v_b, v_g, v_r, v_a, vl);
-    avl -= vl;
-    src_ab64 += 4 * vl;
-    dst_argb += 4 * vl;
-  } while (avl > 0);
-}
-#endif
 #endif
 
 #ifdef HAS_RAWTOARGBROW_RVV
-#ifdef LIBYUV_RVV_HAS_TUPLE_TYPE
 void RAWToARGBRow_RVV(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
   size_t w = (size_t)width;
   size_t vl = __riscv_vsetvl_e8m2(w);
@@ -398,26 +297,9 @@ void RAWToARGBRow_RVV(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
     vl = __riscv_vsetvl_e8m2(w);
   } while (w > 0);
 }
-#else
-void RAWToARGBRow_RVV(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
-  size_t w = (size_t)width;
-  size_t vl = __riscv_vsetvl_e8m2(w);
-  vuint8m2_t v_a = __riscv_vmv_v_x_u8m2(255u, vl);
-  do {
-    vuint8m2_t v_b, v_g, v_r;
-    __riscv_vlseg3e8_v_u8m2(&v_r, &v_g, &v_b, src_raw, vl);
-    __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl);
-    w -= vl;
-    src_raw += vl * 3;
-    dst_argb += vl * 4;
-    vl = __riscv_vsetvl_e8m2(w);
-  } while (w > 0);
-}
-#endif
 #endif
 
 #ifdef HAS_RAWTORGBAROW_RVV
-#ifdef LIBYUV_RVV_HAS_TUPLE_TYPE
 void RAWToRGBARow_RVV(const uint8_t* src_raw, uint8_t* dst_rgba, int width) {
   size_t w = (size_t)width;
   size_t vl = __riscv_vsetvl_e8m2(w);
@@ -435,26 +317,9 @@ void RAWToRGBARow_RVV(const uint8_t* src_raw, uint8_t* dst_rgba, int width) {
     vl = __riscv_vsetvl_e8m2(w);
   } while (w > 0);
 }
-#else
-void RAWToRGBARow_RVV(const uint8_t* src_raw, uint8_t* dst_rgba, int width) {
-  size_t w = (size_t)width;
-  size_t vl = __riscv_vsetvl_e8m2(w);
-  vuint8m2_t v_a = __riscv_vmv_v_x_u8m2(255u, vl);
-  do {
-    vuint8m2_t v_b, v_g, v_r;
-    __riscv_vlseg3e8_v_u8m2(&v_r, &v_g, &v_b, src_raw, vl);
-    __riscv_vsseg4e8_v_u8m2(dst_rgba, v_a, v_b, v_g, v_r, vl);
-    w -= vl;
-    src_raw += vl * 3;
-    dst_rgba += vl * 4;
-    vl = __riscv_vsetvl_e8m2(w);
-  } while (w > 0);
-}
-#endif
 #endif
 
 #ifdef HAS_RAWTORGB24ROW_RVV
-#ifdef LIBYUV_RVV_HAS_TUPLE_TYPE
 void RAWToRGB24Row_RVV(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
   size_t w = (size_t)width;
   do {
@@ -470,24 +335,9 @@ void RAWToRGB24Row_RVV(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
     dst_rgb24 += vl * 3;
   } while (w > 0);
 }
-#else
-void RAWToRGB24Row_RVV(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
-  size_t w = (size_t)width;
-  do {
-    vuint8m2_t v_b, v_g, v_r;
-    size_t vl = __riscv_vsetvl_e8m2(w);
-    __riscv_vlseg3e8_v_u8m2(&v_b, &v_g, &v_r, src_raw, vl);
-    __riscv_vsseg3e8_v_u8m2(dst_rgb24, v_r, v_g, v_b, vl);
-    w -= vl;
-    src_raw += vl * 3;
-    dst_rgb24 += vl * 3;
-  } while (w > 0);
-}
-#endif
 #endif
 
 #ifdef HAS_ARGBTORAWROW_RVV
-#ifdef LIBYUV_RVV_HAS_TUPLE_TYPE
 void ARGBToRAWRow_RVV(const uint8_t* src_argb, uint8_t* dst_raw, int width) {
   size_t w = (size_t)width;
   do {
@@ -503,24 +353,9 @@ void ARGBToRAWRow_RVV(const uint8_t* src_argb, uint8_t* dst_raw, int width) {
     dst_raw += vl * 3;
   } while (w > 0);
 }
-#else
-void ARGBToRAWRow_RVV(const uint8_t* src_argb, uint8_t* dst_raw, int width) {
-  size_t w = (size_t)width;
-  do {
-    vuint8m2_t v_b, v_g, v_r, v_a;
-    size_t vl = __riscv_vsetvl_e8m2(w);
-    __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl);
-    __riscv_vsseg3e8_v_u8m2(dst_raw, v_r, v_g, v_b, vl);
-    w -= vl;
-    src_argb += vl * 4;
-    dst_raw += vl * 3;
-  } while (w > 0);
-}
-#endif
 #endif
 
 #ifdef HAS_ARGBTORGB24ROW_RVV
-#ifdef LIBYUV_RVV_HAS_TUPLE_TYPE
 void ARGBToRGB24Row_RVV(const uint8_t* src_argb,
                         uint8_t* dst_rgb24,
                         int width) {
@@ -538,26 +373,9 @@ void ARGBToRGB24Row_RVV(const uint8_t* src_argb,
     dst_rgb24 += vl * 3;
   } while (w > 0);
 }
-#else
-void ARGBToRGB24Row_RVV(const uint8_t* src_argb,
-                        uint8_t* dst_rgb24,
-                        int width) {
-  size_t w = (size_t)width;
-  do {
-    vuint8m2_t v_b, v_g, v_r, v_a;
-    size_t vl = __riscv_vsetvl_e8m2(w);
-    __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl);
-    __riscv_vsseg3e8_v_u8m2(dst_rgb24, v_b, v_g, v_r, vl);
-    w -= vl;
-    src_argb += vl * 4;
-    dst_rgb24 += vl * 3;
-  } while (w > 0);
-}
-#endif
 #endif
 
 #ifdef HAS_ARGBTOABGRROW_RVV
-#ifdef LIBYUV_RVV_HAS_TUPLE_TYPE
 void ARGBToABGRRow_RVV(const uint8_t* src_argb, uint8_t* dst_abgr, int width) {
   size_t w = (size_t)width;
   do {
@@ -574,24 +392,9 @@ void ARGBToABGRRow_RVV(const uint8_t* src_argb, uint8_t* dst_abgr, int width) {
     dst_abgr += vl * 4;
   } while (w > 0);
 }
-#else
-void ARGBToABGRRow_RVV(const uint8_t* src_argb, uint8_t* dst_abgr, int width) {
-  size_t w = (size_t)width;
-  do {
-    size_t vl = __riscv_vsetvl_e8m2(w);
-    vuint8m2_t v_a, v_r, v_g, v_b;
-    __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl);
-    __riscv_vsseg4e8_v_u8m2(dst_abgr, v_r, v_g, v_b, v_a, vl);
-    w -= vl;
-    src_argb += vl * 4;
-    dst_abgr += vl * 4;
-  } while (w > 0);
-}
-#endif
 #endif
 
 #ifdef HAS_ARGBTOBGRAROW_RVV
-#ifdef LIBYUV_RVV_HAS_TUPLE_TYPE
 void ARGBToBGRARow_RVV(const uint8_t* src_argb, uint8_t* dst_bgra, int width) {
   size_t w = (size_t)width;
   do {
@@ -608,24 +411,9 @@ void ARGBToBGRARow_RVV(const uint8_t* src_argb, uint8_t* dst_bgra, int width) {
     dst_bgra += vl * 4;
   } while (w > 0);
 }
-#else
-void ARGBToBGRARow_RVV(const uint8_t* src_argb, uint8_t* dst_bgra, int width) {
-  size_t w = (size_t)width;
-  do {
-    size_t vl = __riscv_vsetvl_e8m2(w);
-    vuint8m2_t v_a, v_r, v_g, v_b;
-    __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl);
-    __riscv_vsseg4e8_v_u8m2(dst_bgra, v_a, v_r, v_g, v_b, vl);
-    w -= vl;
-    src_argb += vl * 4;
-    dst_bgra += vl * 4;
-  } while (w > 0);
-}
-#endif
 #endif
 
 #ifdef HAS_ARGBTORGBAROW_RVV
-#ifdef LIBYUV_RVV_HAS_TUPLE_TYPE
 void ARGBToRGBARow_RVV(const uint8_t* src_argb, uint8_t* dst_rgba, int width) {
   size_t w = (size_t)width;
   do {
@@ -642,24 +430,9 @@ void ARGBToRGBARow_RVV(const uint8_t* src_argb, uint8_t* dst_rgba, int width) {
     dst_rgba += vl * 4;
   } while (w > 0);
 }
-#else
-void ARGBToRGBARow_RVV(const uint8_t* src_argb, uint8_t* dst_rgba, int width) {
-  size_t w = (size_t)width;
-  do {
-    size_t vl = __riscv_vsetvl_e8m2(w);
-    vuint8m2_t v_a, v_r, v_g, v_b;
-    __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl);
-    __riscv_vsseg4e8_v_u8m2(dst_rgba, v_a, v_b, v_g, v_r, vl);
-    w -= vl;
-    src_argb += vl * 4;
-    dst_rgba += vl * 4;
-  } while (w > 0);
-}
-#endif
 #endif
 
 #ifdef HAS_RGBATOARGBROW_RVV
-#ifdef LIBYUV_RVV_HAS_TUPLE_TYPE
 void RGBAToARGBRow_RVV(const uint8_t* src_rgba, uint8_t* dst_argb, int width) {
   size_t w = (size_t)width;
   do {
@@ -676,24 +449,9 @@ void RGBAToARGBRow_RVV(const uint8_t* src_rgba, uint8_t* dst_argb, int width) {
     dst_argb += vl * 4;
   } while (w > 0);
 }
-#else
-void RGBAToARGBRow_RVV(const uint8_t* src_rgba, uint8_t* dst_argb, int width) {
-  size_t w = (size_t)width;
-  do {
-    size_t vl = __riscv_vsetvl_e8m2(w);
-    vuint8m2_t v_a, v_r, v_g, v_b;
-    __riscv_vlseg4e8_v_u8m2(&v_a, &v_b, &v_g, &v_r, src_rgba, vl);
-    __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl);
-    w -= vl;
-    src_rgba += vl * 4;
-    dst_argb += vl * 4;
-  } while (w > 0);
-}
-#endif
 #endif
 
 #ifdef HAS_RGB24TOARGBROW_RVV
-#ifdef LIBYUV_RVV_HAS_TUPLE_TYPE
 void RGB24ToARGBRow_RVV(const uint8_t* src_rgb24,
                         uint8_t* dst_argb,
                         int width) {
@@ -713,28 +471,9 @@ void RGB24ToARGBRow_RVV(const uint8_t* src_rgb24,
     vl = __riscv_vsetvl_e8m2(w);
   } while (w > 0);
 }
-#else
-void RGB24ToARGBRow_RVV(const uint8_t* src_rgb24,
-                        uint8_t* dst_argb,
-                        int width) {
-  size_t w = (size_t)width;
-  size_t vl = __riscv_vsetvl_e8m2(w);
-  vuint8m2_t v_a = __riscv_vmv_v_x_u8m2(255u, vl);
-  do {
-    vuint8m2_t v_b, v_g, v_r;
-    __riscv_vlseg3e8_v_u8m2(&v_b, &v_g, &v_r, src_rgb24, vl);
-    __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl);
-    w -= vl;
-    src_rgb24 += vl * 3;
-    dst_argb += vl * 4;
-    vl = __riscv_vsetvl_e8m2(w);
-  } while (w > 0);
-}
-#endif
 #endif
 
 #ifdef HAS_I444TOARGBROW_RVV
-#ifdef LIBYUV_RVV_HAS_TUPLE_TYPE
 void I444ToARGBRow_RVV(const uint8_t* src_y,
                        const uint8_t* src_u,
                        const uint8_t* src_v,
@@ -765,40 +504,9 @@ void I444ToARGBRow_RVV(const uint8_t* src_y,
     dst_argb += vl * 4;
   } while (w > 0);
 }
-#else
-void I444ToARGBRow_RVV(const uint8_t* src_y,
-                       const uint8_t* src_u,
-                       const uint8_t* src_v,
-                       uint8_t* dst_argb,
-                       const struct YuvConstants* yuvconstants,
-                       int width) {
-  size_t w = (size_t)width;
-  size_t vl = __riscv_vsetvl_e8m2(w);
-  uint8_t ub, vr, ug, vg;
-  int16_t yg, bb, bg, br;
-  vuint8m2_t v_u, v_v;
-  vuint8m2_t v_b, v_g, v_r, v_a;
-  vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16;
-  YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
-  v_a = __riscv_vmv_v_x_u8m2(255u, vl);
-  do {
-    READYUV444(vl, w, src_y, src_u, src_v, v_u, v_v, v_y_16);
-    YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16,
-             v_b_16, v_r_16);
-    RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r);
-    __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl);
-    w -= vl;
-    src_y += vl;
-    src_u += vl;
-    src_v += vl;
-    dst_argb += vl * 4;
-  } while (w > 0);
-}
-#endif
 #endif
 
 #ifdef HAS_I444ALPHATOARGBROW_RVV
-#ifdef LIBYUV_RVV_HAS_TUPLE_TYPE
 void I444AlphaToARGBRow_RVV(const uint8_t* src_y,
                             const uint8_t* src_u,
                             const uint8_t* src_v,
@@ -831,42 +539,9 @@ void I444AlphaToARGBRow_RVV(const uint8_t* src_y,
     dst_argb += vl * 4;
   } while (w > 0);
 }
-#else
-void I444AlphaToARGBRow_RVV(const uint8_t* src_y,
-                            const uint8_t* src_u,
-                            const uint8_t* src_v,
-                            const uint8_t* src_a,
-                            uint8_t* dst_argb,
-                            const struct YuvConstants* yuvconstants,
-                            int width) {
-  size_t vl;
-  size_t w = (size_t)width;
-  uint8_t ub, vr, ug, vg;
-  int16_t yg, bb, bg, br;
-  vuint8m2_t v_u, v_v;
-  vuint8m2_t v_b, v_g, v_r, v_a;
-  vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16;
-  YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
-  do {
-    READYUV444(vl, w, src_y, src_u, src_v, v_u, v_v, v_y_16);
-    v_a = __riscv_vle8_v_u8m2(src_a, vl);
-    YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16,
-             v_b_16, v_r_16);
-    RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r);
-    __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl);
-    w -= vl;
-    src_y += vl;
-    src_a += vl;
-    src_u += vl;
-    src_v += vl;
-    dst_argb += vl * 4;
-  } while (w > 0);
-}
-#endif
 #endif
 
 #ifdef HAS_I444TORGB24ROW_RVV
-#ifdef LIBYUV_RVV_HAS_TUPLE_TYPE
 void I444ToRGB24Row_RVV(const uint8_t* src_y,
                         const uint8_t* src_u,
                         const uint8_t* src_v,
@@ -896,39 +571,9 @@ void I444ToRGB24Row_RVV(const uint8_t* src_y,
     dst_rgb24 += vl * 3;
   } while (w > 0);
 }
-#else
-void I444ToRGB24Row_RVV(const uint8_t* src_y,
-                        const uint8_t* src_u,
-                        const uint8_t* src_v,
-                        uint8_t* dst_rgb24,
-                        const struct YuvConstants* yuvconstants,
-                        int width) {
-  size_t vl;
-  size_t w = (size_t)width;
-  uint8_t ub, vr, ug, vg;
-  int16_t yg, bb, bg, br;
-  vuint8m2_t v_u, v_v;
-  vuint8m2_t v_b, v_g, v_r;
-  vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16;
-  YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
-  do {
-    READYUV444(vl, w, src_y, src_u, src_v, v_u, v_v, v_y_16);
-    YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16,
-             v_b_16, v_r_16);
-    RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r);
-    __riscv_vsseg3e8_v_u8m2(dst_rgb24, v_b, v_g, v_r, vl);
-    w -= vl;
-    src_y += vl;
-    src_u += vl;
-    src_v += vl;
-    dst_rgb24 += vl * 3;
-  } while (w > 0);
-}
-#endif
 #endif
 
 #ifdef HAS_I422TOARGBROW_RVV
-#ifdef LIBYUV_RVV_HAS_TUPLE_TYPE
 void I422ToARGBRow_RVV(const uint8_t* src_y,
                        const uint8_t* src_u,
                        const uint8_t* src_v,
@@ -959,40 +604,9 @@ void I422ToARGBRow_RVV(const uint8_t* src_y,
     dst_argb += vl * 4;
   } while (w > 0);
 }
-#else
-void I422ToARGBRow_RVV(const uint8_t* src_y,
-                       const uint8_t* src_u,
-                       const uint8_t* src_v,
-                       uint8_t* dst_argb,
-                       const struct YuvConstants* yuvconstants,
-                       int width) {
-  size_t w = (size_t)width;
-  size_t vl = __riscv_vsetvl_e8m2(w);
-  uint8_t ub, vr, ug, vg;
-  int16_t yg, bb, bg, br;
-  vuint8m2_t v_u, v_v;
-  vuint8m2_t v_b, v_g, v_r, v_a;
-  vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16;
-  YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
-  v_a = __riscv_vmv_v_x_u8m2(255u, vl);
-  do {
-    READYUV422(vl, w, src_y, src_u, src_v, v_u, v_v, v_y_16);
-    YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16,
-             v_b_16, v_r_16);
-    RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r);
-    __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl);
-    w -= vl;
-    src_y += vl;
-    src_u += vl / 2;
-    src_v += vl / 2;
-    dst_argb += vl * 4;
-  } while (w > 0);
-}
-#endif
 #endif
 
 #ifdef HAS_I422ALPHATOARGBROW_RVV
-#ifdef LIBYUV_RVV_HAS_TUPLE_TYPE
 void I422AlphaToARGBRow_RVV(const uint8_t* src_y,
                             const uint8_t* src_u,
                             const uint8_t* src_v,
@@ -1025,42 +639,9 @@ void I422AlphaToARGBRow_RVV(const uint8_t* src_y,
     dst_argb += vl * 4;
   } while (w > 0);
 }
-#else
-void I422AlphaToARGBRow_RVV(const uint8_t* src_y,
-                            const uint8_t* src_u,
-                            const uint8_t* src_v,
-                            const uint8_t* src_a,
-                            uint8_t* dst_argb,
-                            const struct YuvConstants* yuvconstants,
-                            int width) {
-  size_t vl;
-  size_t w = (size_t)width;
-  uint8_t ub, vr, ug, vg;
-  int16_t yg, bb, bg, br;
-  vuint8m2_t v_u, v_v;
-  vuint8m2_t v_b, v_g, v_r, v_a;
-  vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16;
-  YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
-  do {
-    READYUV422(vl, w, src_y, src_u, src_v, v_u, v_v, v_y_16);
-    v_a = __riscv_vle8_v_u8m2(src_a, vl);
-    YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16,
-             v_b_16, v_r_16);
-    RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r);
-    __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl);
-    w -= vl;
-    src_y += vl;
-    src_a += vl;
-    src_u += vl / 2;
-    src_v += vl / 2;
-    dst_argb += vl * 4;
-  } while (w > 0);
-}
-#endif
 #endif
 
 #ifdef HAS_I422TORGBAROW_RVV
-#ifdef LIBYUV_RVV_HAS_TUPLE_TYPE
 void I422ToRGBARow_RVV(const uint8_t* src_y,
                        const uint8_t* src_u,
                        const uint8_t* src_v,
@@ -1091,40 +672,9 @@ void I422ToRGBARow_RVV(const uint8_t* src_y,
     dst_rgba += vl * 4;
   } while (w > 0);
 }
-#else
-void I422ToRGBARow_RVV(const uint8_t* src_y,
-                       const uint8_t* src_u,
-                       const uint8_t* src_v,
-                       uint8_t* dst_rgba,
-                       const struct YuvConstants* yuvconstants,
-                       int width) {
-  size_t w = (size_t)width;
-  size_t vl = __riscv_vsetvl_e8m2(w);
-  uint8_t ub, vr, ug, vg;
-  int16_t yg, bb, bg, br;
-  vuint8m2_t v_u, v_v;
-  vuint8m2_t v_b, v_g, v_r, v_a;
-  vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16;
-  YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
-  v_a = __riscv_vmv_v_x_u8m2(255u, vl);
-  do {
-    READYUV422(vl, w, src_y, src_u, src_v, v_u, v_v, v_y_16);
-    YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16,
-             v_b_16, v_r_16);
-    RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r);
-    __riscv_vsseg4e8_v_u8m2(dst_rgba, v_a, v_b, v_g, v_r, vl);
-    w -= vl;
-    src_y += vl;
-    src_u += vl / 2;
-    src_v += vl / 2;
-    dst_rgba += vl * 4;
-  } while (w > 0);
-}
-#endif
 #endif
 
 #ifdef HAS_I422TORGB24ROW_RVV
-#ifdef LIBYUV_RVV_HAS_TUPLE_TYPE
 void I422ToRGB24Row_RVV(const uint8_t* src_y,
                         const uint8_t* src_u,
                         const uint8_t* src_v,
@@ -1154,39 +704,10 @@ void I422ToRGB24Row_RVV(const uint8_t* src_y,
     dst_rgb24 += vl * 3;
   } while (w > 0);
 }
-#else
-void I422ToRGB24Row_RVV(const uint8_t* src_y,
-                        const uint8_t* src_u,
-                        const uint8_t* src_v,
-                        uint8_t* dst_rgb24,
-                        const struct YuvConstants* yuvconstants,
-                        int width) {
-  size_t vl;
-  size_t w = (size_t)width;
-  uint8_t ub, vr, ug, vg;
-  int16_t yg, bb, bg, br;
-  vuint8m2_t v_u, v_v;
-  vuint8m2_t v_b, v_g, v_r;
-  vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16;
-  YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
-  do {
-    READYUV422(vl, w, src_y, src_u, src_v, v_u, v_v, v_y_16);
-    YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16,
-             v_b_16, v_r_16);
-    RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r);
-    __riscv_vsseg3e8_v_u8m2(dst_rgb24, v_b, v_g, v_r, vl);
-    w -= vl;
-    src_y += vl;
-    src_u += vl / 2;
-    src_v += vl / 2;
-    dst_rgb24 += vl * 3;
-  } while (w > 0);
-}
-#endif
 #endif
 
 #ifdef HAS_I400TOARGBROW_RVV
-#if defined(LIBYUV_RVV_HAS_TUPLE_TYPE) && defined(LIBYUV_RVV_HAS_VXRM_ARG)
+#ifdef LIBYUV_RVV_HAS_VXRM_ARG
 void I400ToARGBRow_RVV(const uint8_t* src_y,
                        uint8_t* dst_argb,
                        const struct YuvConstants* yuvconstants,
@@ -1224,50 +745,10 @@ void I400ToARGBRow_RVV(const uint8_t* src_y,
     dst_argb += vl * 4;
   } while (w > 0);
 }
-#else
-void I400ToARGBRow_RVV(const uint8_t* src_y,
-                       uint8_t* dst_argb,
-                       const struct YuvConstants* yuvconstants,
-                       int width) {
-  size_t w = (size_t)width;
-  size_t vl = __riscv_vsetvl_e8m2(w);
-  const bool is_yb_positive = (yuvconstants->kRGBCoeffBias[4] >= 0);
-  vuint8m2_t v_a = __riscv_vmv_v_x_u8m2(255u, vl);
-  vuint16m4_t v_yb;
-  vuint16m4_t v_yg = __riscv_vmv_v_x_u16m4(yuvconstants->kRGBCoeffBias[0], vl);
-  // To match behavior on other platforms, vxrm (fixed-point rounding mode
-  // register) sets to round-to-nearest-up mode(0).
-  asm volatile("csrwi vxrm, 0");
-  if (is_yb_positive) {
-    v_yb = __riscv_vmv_v_x_u16m4(yuvconstants->kRGBCoeffBias[4] - 32, vl);
-  } else {
-    v_yb = __riscv_vmv_v_x_u16m4(-yuvconstants->kRGBCoeffBias[4] + 32, vl);
-  }
-  do {
-    vuint8m2_t v_y, v_out;
-    vuint16m4_t v_y_16, v_tmp0, v_tmp1, v_tmp2;
-    vl = __riscv_vsetvl_e8m2(w);
-    v_y = __riscv_vle8_v_u8m2(src_y, vl);
-    v_y_16 = __riscv_vwaddu_vx_u16m4(v_y, 0, vl);
-    v_tmp0 = __riscv_vmul_vx_u16m4(v_y_16, 0x0101, vl);  // 257 * v_y
-    v_tmp1 = __riscv_vmulhu_vv_u16m4(v_tmp0, v_yg, vl);
-    if (is_yb_positive) {
-      v_tmp2 = __riscv_vsaddu_vv_u16m4(v_tmp1, v_yb, vl);
-    } else {
-      v_tmp2 = __riscv_vssubu_vv_u16m4(v_tmp1, v_yb, vl);
-    }
-    v_out = __riscv_vnclipu_wx_u8m2(v_tmp2, 6, vl);
-    __riscv_vsseg4e8_v_u8m2(dst_argb, v_out, v_out, v_out, v_a, vl);
-    w -= vl;
-    src_y += vl;
-    dst_argb += vl * 4;
-  } while (w > 0);
-}
 #endif
 #endif
 
 #ifdef HAS_J400TOARGBROW_RVV
-#ifdef LIBYUV_RVV_HAS_TUPLE_TYPE
 void J400ToARGBRow_RVV(const uint8_t* src_y, uint8_t* dst_argb, int width) {
   size_t w = (size_t)width;
   size_t vl = __riscv_vsetvl_e8m2(w);
@@ -1282,22 +763,6 @@ void J400ToARGBRow_RVV(const uint8_t* src_y, uint8_t* dst_argb, int width) {
     vl = __riscv_vsetvl_e8m2(w);
   } while (w > 0);
 }
-#else
-void J400ToARGBRow_RVV(const uint8_t* src_y, uint8_t* dst_argb, int width) {
-  size_t w = (size_t)width;
-  size_t vl = __riscv_vsetvl_e8m2(w);
-  vuint8m2_t v_a = __riscv_vmv_v_x_u8m2(255u, vl);
-  do {
-    vuint8m2_t v_y;
-    v_y = __riscv_vle8_v_u8m2(src_y, vl);
-    __riscv_vsseg4e8_v_u8m2(dst_argb, v_y, v_y, v_y, v_a, vl);
-    w -= vl;
-    src_y += vl;
-    dst_argb += vl * 4;
-    vl = __riscv_vsetvl_e8m2(w);
-  } while (w > 0);
-}
-#endif
 #endif
 
 #ifdef HAS_COPYROW_RVV
@@ -1315,7 +780,6 @@ void CopyRow_RVV(const uint8_t* src, uint8_t* dst, int width) {
 #endif
 
 #ifdef HAS_NV12TOARGBROW_RVV
-#ifdef LIBYUV_RVV_HAS_TUPLE_TYPE
 void NV12ToARGBRow_RVV(const uint8_t* src_y,
                        const uint8_t* src_uv,
                        uint8_t* dst_argb,
@@ -1344,38 +808,9 @@ void NV12ToARGBRow_RVV(const uint8_t* src_y,
     dst_argb += vl * 4;
   } while (w > 0);
 }
-#else
-void NV12ToARGBRow_RVV(const uint8_t* src_y,
-                       const uint8_t* src_uv,
-                       uint8_t* dst_argb,
-                       const struct YuvConstants* yuvconstants,
-                       int width) {
-  size_t w = (size_t)width;
-  size_t vl = __riscv_vsetvl_e8m2(w);
-  uint8_t ub, vr, ug, vg;
-  int16_t yg, bb, bg, br;
-  vuint8m2_t v_u, v_v;
-  vuint8m2_t v_b, v_g, v_r, v_a;
-  vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16;
-  YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
-  v_a = __riscv_vmv_v_x_u8m2(255u, vl);
-  do {
-    READNV12(vl, w, src_y, src_uv, v_u, v_v, v_y_16);
-    YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16,
-             v_b_16, v_r_16);
-    RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r);
-    __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl);
-    w -= vl;
-    src_y += vl;
-    src_uv += vl;
-    dst_argb += vl * 4;
-  } while (w > 0);
-}
-#endif
 #endif
 
 #ifdef HAS_NV12TORGB24ROW_RVV
-#ifdef LIBYUV_RVV_HAS_TUPLE_TYPE
 void NV12ToRGB24Row_RVV(const uint8_t* src_y,
                         const uint8_t* src_uv,
                         uint8_t* dst_rgb24,
@@ -1403,37 +838,9 @@ void NV12ToRGB24Row_RVV(const uint8_t* src_y,
     dst_rgb24 += vl * 3;
   } while (w > 0);
 }
-#else
-void NV12ToRGB24Row_RVV(const uint8_t* src_y,
-                        const uint8_t* src_uv,
-                        uint8_t* dst_rgb24,
-                        const struct YuvConstants* yuvconstants,
-                        int width) {
-  size_t w = (size_t)width;
-  size_t vl = __riscv_vsetvl_e8m2(w);
-  uint8_t ub, vr, ug, vg;
-  int16_t yg, bb, bg, br;
-  vuint8m2_t v_u, v_v;
-  vuint8m2_t v_b, v_g, v_r;
-  vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16;
-  YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
-  do {
-    READNV12(vl, w, src_y, src_uv, v_u, v_v, v_y_16);
-    YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16,
-             v_b_16, v_r_16);
-    RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r);
-    __riscv_vsseg3e8_v_u8m2(dst_rgb24, v_b, v_g, v_r, vl);
-    w -= vl;
-    src_y += vl;
-    src_uv += vl;
-    dst_rgb24 += vl * 3;
-  } while (w > 0);
-}
-#endif
 #endif
 
 #ifdef HAS_NV21TOARGBROW_RVV
-#ifdef LIBYUV_RVV_HAS_TUPLE_TYPE
 void NV21ToARGBRow_RVV(const uint8_t* src_y,
                        const uint8_t* src_vu,
                        uint8_t* dst_argb,
@@ -1462,38 +869,9 @@ void NV21ToARGBRow_RVV(const uint8_t* src_y,
     dst_argb += vl * 4;
   } while (w > 0);
 }
-#else
-void NV21ToARGBRow_RVV(const uint8_t* src_y,
-                       const uint8_t* src_vu,
-                       uint8_t* dst_argb,
-                       const struct YuvConstants* yuvconstants,
-                       int width) {
-  size_t w = (size_t)width;
-  size_t vl = __riscv_vsetvl_e8m2(w);
-  uint8_t ub, vr, ug, vg;
-  int16_t yg, bb, bg, br;
-  vuint8m2_t v_u, v_v;
-  vuint8m2_t v_b, v_g, v_r, v_a;
-  vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16;
-  YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
-  v_a = __riscv_vmv_v_x_u8m2(255u, vl);
-  do {
-    READNV21(vl, w, src_y, src_vu, v_u, v_v, v_y_16);
-    YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16,
-             v_b_16, v_r_16);
-    RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r);
-    __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl);
-    w -= vl;
-    src_y += vl;
-    src_vu += vl;
-    dst_argb += vl * 4;
-  } while (w > 0);
-}
-#endif
 #endif
 
 #ifdef HAS_NV21TORGB24ROW_RVV
-#ifdef LIBYUV_RVV_HAS_TUPLE_TYPE
 void NV21ToRGB24Row_RVV(const uint8_t* src_y,
                         const uint8_t* src_vu,
                         uint8_t* dst_rgb24,
@@ -1521,33 +899,6 @@ void NV21ToRGB24Row_RVV(const uint8_t* src_y,
     dst_rgb24 += vl * 3;
   } while (w > 0);
 }
-#else
-void NV21ToRGB24Row_RVV(const uint8_t* src_y,
-                        const uint8_t* src_vu,
-                        uint8_t* dst_rgb24,
-                        const struct YuvConstants* yuvconstants,
-                        int width) {
-  size_t w = (size_t)width;
-  size_t vl = __riscv_vsetvl_e8m2(w);
-  uint8_t ub, vr, ug, vg;
-  int16_t yg, bb, bg, br;
-  vuint8m2_t v_u, v_v;
-  vuint8m2_t v_b, v_g, v_r;
-  vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16;
-  YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
-  do {
-    READNV21(vl, w, src_y, src_vu, v_u, v_v, v_y_16);
-    YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16,
-             v_b_16, v_r_16);
-    RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r);
-    __riscv_vsseg3e8_v_u8m2(dst_rgb24, v_b, v_g, v_r, vl);
-    w -= vl;
-    src_y += vl;
-    src_vu += vl;
-    dst_rgb24 += vl * 3;
-  } while (w > 0);
-}
-#endif
 #endif
 
 // Bilinear filter [VLEN/8]x2 -> [VLEN/8]x1
@@ -1667,7 +1018,6 @@ void InterpolateRow_RVV(uint8_t* dst_ptr,
 #endif
 
 #ifdef HAS_SPLITRGBROW_RVV
-#ifdef LIBYUV_RVV_HAS_TUPLE_TYPE
 void SplitRGBRow_RVV(const uint8_t* src_rgb,
                      uint8_t* dst_r,
                      uint8_t* dst_g,
@@ -1690,32 +1040,9 @@ void SplitRGBRow_RVV(const uint8_t* src_rgb,
     src_rgb += vl * 3;
   } while (w > 0);
 }
-#else
-void SplitRGBRow_RVV(const uint8_t* src_rgb,
-                     uint8_t* dst_r,
-                     uint8_t* dst_g,
-                     uint8_t* dst_b,
-                     int width) {
-  size_t w = (size_t)width;
-  do {
-    vuint8m2_t v_b, v_g, v_r;
-    size_t vl = __riscv_vsetvl_e8m2(w);
-    __riscv_vlseg3e8_v_u8m2(&v_r, &v_g, &v_b, src_rgb, vl);
-    __riscv_vse8_v_u8m2(dst_r, v_r, vl);
-    __riscv_vse8_v_u8m2(dst_g, v_g, vl);
-    __riscv_vse8_v_u8m2(dst_b, v_b, vl);
-    w -= vl;
-    dst_r += vl;
-    dst_g += vl;
-    dst_b += vl;
-    src_rgb += vl * 3;
-  } while (w > 0);
-}
-#endif
 #endif
 
 #ifdef HAS_MERGERGBROW_RVV
-#ifdef LIBYUV_RVV_HAS_TUPLE_TYPE
 void MergeRGBRow_RVV(const uint8_t* src_r,
                      const uint8_t* src_g,
                      const uint8_t* src_b,
@@ -1736,31 +1063,9 @@ void MergeRGBRow_RVV(const uint8_t* src_r,
     dst_rgb += vl * 3;
   } while (w > 0);
 }
-#else
-void MergeRGBRow_RVV(const uint8_t* src_r,
-                     const uint8_t* src_g,
-                     const uint8_t* src_b,
-                     uint8_t* dst_rgb,
-                     int width) {
-  size_t w = (size_t)width;
-  do {
-    size_t vl = __riscv_vsetvl_e8m2(w);
-    vuint8m2_t v_r = __riscv_vle8_v_u8m2(src_r, vl);
-    vuint8m2_t v_g = __riscv_vle8_v_u8m2(src_g, vl);
-    vuint8m2_t v_b = __riscv_vle8_v_u8m2(src_b, vl);
-    __riscv_vsseg3e8_v_u8m2(dst_rgb, v_r, v_g, v_b, vl);
-    w -= vl;
-    src_r += vl;
-    src_g += vl;
-    src_b += vl;
-    dst_rgb += vl * 3;
-  } while (w > 0);
-}
-#endif
 #endif
 
 #ifdef HAS_SPLITARGBROW_RVV
-#ifdef LIBYUV_RVV_HAS_TUPLE_TYPE
 void SplitARGBRow_RVV(const uint8_t* src_argb,
                       uint8_t* dst_r,
                       uint8_t* dst_g,
@@ -1787,35 +1092,9 @@ void SplitARGBRow_RVV(const uint8_t* src_argb,
     src_argb += vl * 4;
   } while (w > 0);
 }
-#else
-void SplitARGBRow_RVV(const uint8_t* src_argb,
-                      uint8_t* dst_r,
-                      uint8_t* dst_g,
-                      uint8_t* dst_b,
-                      uint8_t* dst_a,
-                      int width) {
-  size_t w = (size_t)width;
-  do {
-    vuint8m2_t v_b, v_g, v_r, v_a;
-    size_t vl = __riscv_vsetvl_e8m2(w);
-    __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl);
-    __riscv_vse8_v_u8m2(dst_a, v_a, vl);
-    __riscv_vse8_v_u8m2(dst_r, v_r, vl);
-    __riscv_vse8_v_u8m2(dst_g, v_g, vl);
-    __riscv_vse8_v_u8m2(dst_b, v_b, vl);
-    w -= vl;
-    dst_a += vl;
-    dst_r += vl;
-    dst_g += vl;
-    dst_b += vl;
-    src_argb += vl * 4;
-  } while (w > 0);
-}
-#endif
 #endif
 
 #ifdef HAS_MERGEARGBROW_RVV
-#ifdef LIBYUV_RVV_HAS_TUPLE_TYPE
 void MergeARGBRow_RVV(const uint8_t* src_r,
                       const uint8_t* src_g,
                       const uint8_t* src_b,
@@ -1839,34 +1118,9 @@ void MergeARGBRow_RVV(const uint8_t* src_r,
     dst_argb += vl * 4;
   } while (w > 0);
 }
-#else
-void MergeARGBRow_RVV(const uint8_t* src_r,
-                      const uint8_t* src_g,
-                      const uint8_t* src_b,
-                      const uint8_t* src_a,
-                      uint8_t* dst_argb,
-                      int width) {
-  size_t w = (size_t)width;
-  do {
-    size_t vl = __riscv_vsetvl_e8m2(w);
-    vuint8m2_t v_r = __riscv_vle8_v_u8m2(src_r, vl);
-    vuint8m2_t v_g = __riscv_vle8_v_u8m2(src_g, vl);
-    vuint8m2_t v_b = __riscv_vle8_v_u8m2(src_b, vl);
-    vuint8m2_t v_a = __riscv_vle8_v_u8m2(src_a, vl);
-    __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl);
-    w -= vl;
-    src_r += vl;
-    src_g += vl;
-    src_b += vl;
-    src_a += vl;
-    dst_argb += vl * 4;
-  } while (w > 0);
-}
-#endif
 #endif
 
 #ifdef HAS_SPLITXRGBROW_RVV
-#ifdef LIBYUV_RVV_HAS_TUPLE_TYPE
 void SplitXRGBRow_RVV(const uint8_t* src_argb,
                       uint8_t* dst_r,
                       uint8_t* dst_g,
@@ -1889,32 +1143,9 @@ void SplitXRGBRow_RVV(const uint8_t* src_argb,
     src_argb += vl * 4;
   } while (w > 0);
 }
-#else
-void SplitXRGBRow_RVV(const uint8_t* src_argb,
-                      uint8_t* dst_r,
-                      uint8_t* dst_g,
-                      uint8_t* dst_b,
-                      int width) {
-  size_t w = (size_t)width;
-  do {
-    vuint8m2_t v_b, v_g, v_r, v_a;
-    size_t vl = __riscv_vsetvl_e8m2(w);
-    __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl);
-    __riscv_vse8_v_u8m2(dst_r, v_r, vl);
-    __riscv_vse8_v_u8m2(dst_g, v_g, vl);
-    __riscv_vse8_v_u8m2(dst_b, v_b, vl);
-    w -= vl;
-    dst_r += vl;
-    dst_g += vl;
-    dst_b += vl;
-    src_argb += vl * 4;
-  } while (w > 0);
-}
-#endif
 #endif
 
 #ifdef HAS_MERGEXRGBROW_RVV
-#ifdef LIBYUV_RVV_HAS_TUPLE_TYPE
 void MergeXRGBRow_RVV(const uint8_t* src_r,
                       const uint8_t* src_g,
                       const uint8_t* src_b,
@@ -1937,34 +1168,9 @@ void MergeXRGBRow_RVV(const uint8_t* src_r,
     vl = __riscv_vsetvl_e8m2(w);
   } while (w > 0);
 }
-#else
-void MergeXRGBRow_RVV(const uint8_t* src_r,
-                      const uint8_t* src_g,
-                      const uint8_t* src_b,
-                      uint8_t* dst_argb,
-                      int width) {
-  size_t w = (size_t)width;
-  size_t vl = __riscv_vsetvl_e8m2(w);
-  vuint8m2_t v_a = __riscv_vmv_v_x_u8m2(255u, vl);
-  do {
-    vuint8m2_t v_r, v_g, v_b;
-    v_r = __riscv_vle8_v_u8m2(src_r, vl);
-    v_g = __riscv_vle8_v_u8m2(src_g, vl);
-    v_b = __riscv_vle8_v_u8m2(src_b, vl);
-    __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl);
-    w -= vl;
-    src_r += vl;
-    src_g += vl;
-    src_b += vl;
-    dst_argb += vl * 4;
-    vl = __riscv_vsetvl_e8m2(w);
-  } while (w > 0);
-}
-#endif
 #endif
 
 #ifdef HAS_SPLITUVROW_RVV
-#ifdef LIBYUV_RVV_HAS_TUPLE_TYPE
 void SplitUVRow_RVV(const uint8_t* src_uv,
                     uint8_t* dst_u,
                     uint8_t* dst_v,
@@ -1983,29 +1189,9 @@ void SplitUVRow_RVV(const uint8_t* src_uv,
     src_uv += 2 * vl;
   } while (w > 0);
 }
-#else
-void SplitUVRow_RVV(const uint8_t* src_uv,
-                    uint8_t* dst_u,
-                    uint8_t* dst_v,
-                    int width) {
-  size_t w = (size_t)width;
-  do {
-    size_t vl = __riscv_vsetvl_e8m4(w);
-    vuint8m4_t v_u, v_v;
-    __riscv_vlseg2e8_v_u8m4(&v_u, &v_v, src_uv, vl);
-    __riscv_vse8_v_u8m4(dst_u, v_u, vl);
-    __riscv_vse8_v_u8m4(dst_v, v_v, vl);
-    w -= vl;
-    dst_u += vl;
-    dst_v += vl;
-    src_uv += 2 * vl;
-  } while (w > 0);
-}
-#endif
 #endif
 
 #ifdef HAS_MERGEUVROW_RVV
-#ifdef LIBYUV_RVV_HAS_TUPLE_TYPE
 void MergeUVRow_RVV(const uint8_t* src_u,
                     const uint8_t* src_v,
                     uint8_t* dst_uv,
@@ -2023,74 +1209,24 @@ void MergeUVRow_RVV(const uint8_t* src_u,
     dst_uv += 2 * vl;
   } while (w > 0);
 }
-#else
-void MergeUVRow_RVV(const uint8_t* src_u,
-                    const uint8_t* src_v,
-                    uint8_t* dst_uv,
-                    int width) {
-  size_t w = (size_t)width;
-  do {
-    vuint8m4_t v_u, v_v;
-    size_t vl = __riscv_vsetvl_e8m4(w);
-    v_u = __riscv_vle8_v_u8m4(src_u, vl);
-    v_v = __riscv_vle8_v_u8m4(src_v, vl);
-    __riscv_vsseg2e8_v_u8m4(dst_uv, v_u, v_v, vl);
-    w -= vl;
-    src_u += vl;
-    src_v += vl;
-    dst_uv += 2 * vl;
-  } while (w > 0);
-}
-#endif
 #endif
 
-struct RgbConstants {
-  uint8_t kRGBToY[4];
-  uint16_t kAddY;
-  uint16_t pad;
-};
-
-// RGB to JPeg coefficients
-// B * 0.1140 coefficient = 29
-// G * 0.5870 coefficient = 150
-// R * 0.2990 coefficient = 77
-// Add 0.5 = 0x80
-static const struct RgbConstants kRgb24JPEGConstants = {{29, 150, 77, 0},
-                                                        128,
-                                                        0};
-
-static const struct RgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, 128, 0};
-
-// RGB to BT.601 coefficients
-// B * 0.1016 coefficient = 25
-// G * 0.5078 coefficient = 129
-// R * 0.2578 coefficient = 66
-// Add 16.5 = 0x1080
-
-static const struct RgbConstants kRgb24I601Constants = {{25, 129, 66, 0},
-                                                        0x1080,
-                                                        0};
-
-static const struct RgbConstants kRawI601Constants = {{66, 129, 25, 0},
-                                                      0x1080,
-                                                      0};
 
 // ARGB expects first 3 values to contain RGB and 4th value is ignored
 #ifdef HAS_ARGBTOYMATRIXROW_RVV
-#ifdef LIBYUV_RVV_HAS_TUPLE_TYPE
-static void ARGBToYMatrixRow_RVV(const uint8_t* src_argb,
+void ARGBToYMatrixRow_RVV(const uint8_t* src_argb,
                                  uint8_t* dst_y,
                                  int width,
-                                 const struct RgbConstants* rgbconstants) {
+                                 const struct ArgbConstants* c) {
   assert(width != 0);
   size_t w = (size_t)width;
   vuint8m2_t v_by, v_gy, v_ry;  // vectors are to store RGBToY constant
   vuint16m4_t v_addy;           // vector is to store kAddY
   size_t vl = __riscv_vsetvl_e8m2(w);
-  v_by = __riscv_vmv_v_x_u8m2(rgbconstants->kRGBToY[0], vl);
-  v_gy = __riscv_vmv_v_x_u8m2(rgbconstants->kRGBToY[1], vl);
-  v_ry = __riscv_vmv_v_x_u8m2(rgbconstants->kRGBToY[2], vl);
-  v_addy = __riscv_vmv_v_x_u16m4(rgbconstants->kAddY, vl);
+  v_by = __riscv_vmv_v_x_u8m2(c->kRGBToY[0], vl);
+  v_gy = __riscv_vmv_v_x_u8m2(c->kRGBToY[1], vl);
+  v_ry = __riscv_vmv_v_x_u8m2(c->kRGBToY[2], vl);
+  v_addy = __riscv_vmv_v_x_u16m4(c->kAddY[0], vl);
   do {
     vuint8m2_t v_y;
     vuint16m4_t v_y_u16;
@@ -2110,79 +1246,47 @@ static void ARGBToYMatrixRow_RVV(const uint8_t* src_argb,
     dst_y += vl;
   } while (w > 0);
 }
-#else
-static void ARGBToYMatrixRow_RVV(const uint8_t* src_argb,
-                                 uint8_t* dst_y,
-                                 int width,
-                                 const struct RgbConstants* rgbconstants) {
-  assert(width != 0);
-  size_t w = (size_t)width;
-  vuint8m2_t v_by, v_gy, v_ry;  // vectors are to store RGBToY constant
-  vuint16m4_t v_addy;           // vector is to store kAddY
-  size_t vl = __riscv_vsetvl_e8m2(w);
-  v_by = __riscv_vmv_v_x_u8m2(rgbconstants->kRGBToY[0], vl);
-  v_gy = __riscv_vmv_v_x_u8m2(rgbconstants->kRGBToY[1], vl);
-  v_ry = __riscv_vmv_v_x_u8m2(rgbconstants->kRGBToY[2], vl);
-  v_addy = __riscv_vmv_v_x_u16m4(rgbconstants->kAddY, vl);
-  do {
-    vuint8m2_t v_b, v_g, v_r, v_a, v_y;
-    vuint16m4_t v_y_u16;
-    size_t vl = __riscv_vsetvl_e8m2(w);
-    __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl);
-    v_y_u16 = __riscv_vwmulu_vv_u16m4(v_r, v_ry, vl);
-    v_y_u16 = __riscv_vwmaccu_vv_u16m4(v_y_u16, v_gy, v_g, vl);
-    v_y_u16 = __riscv_vwmaccu_vv_u16m4(v_y_u16, v_by, v_b, vl);
-    v_y_u16 = __riscv_vadd_vv_u16m4(v_y_u16, v_addy, vl);
-    v_y = __riscv_vnsrl_wx_u8m2(v_y_u16, 8, vl);
-    __riscv_vse8_v_u8m2(dst_y, v_y, vl);
-    w -= vl;
-    src_argb += 4 * vl;
-    dst_y += vl;
-  } while (w > 0);
-}
-#endif
 #endif
 
 #ifdef HAS_ARGBTOYROW_RVV
 void ARGBToYRow_RVV(const uint8_t* src_argb, uint8_t* dst_y, int width) {
-  ARGBToYMatrixRow_RVV(src_argb, dst_y, width, &kRgb24I601Constants);
+  ARGBToYMatrixRow_RVV(src_argb, dst_y, width, &kArgbI601Constants);
 }
 #endif
 
 #ifdef HAS_ARGBTOYJROW_RVV
 void ARGBToYJRow_RVV(const uint8_t* src_argb, uint8_t* dst_yj, int width) {
-  ARGBToYMatrixRow_RVV(src_argb, dst_yj, width, &kRgb24JPEGConstants);
+  ARGBToYMatrixRow_RVV(src_argb, dst_yj, width, &kArgbJPEGConstants);
 }
 #endif
 
 #ifdef HAS_ABGRTOYROW_RVV
 void ABGRToYRow_RVV(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
-  ARGBToYMatrixRow_RVV(src_abgr, dst_y, width, &kRawI601Constants);
+  ARGBToYMatrixRow_RVV(src_abgr, dst_y, width, &kAbgrI601Constants);
 }
 #endif
 
 #ifdef HAS_ABGRTOYJROW_RVV
 void ABGRToYJRow_RVV(const uint8_t* src_abgr, uint8_t* dst_yj, int width) {
-  ARGBToYMatrixRow_RVV(src_abgr, dst_yj, width, &kRawJPEGConstants);
+  ARGBToYMatrixRow_RVV(src_abgr, dst_yj, width, &kAbgrJPEGConstants);
 }
 #endif
 
 // RGBA expects first value to be A and ignored, then 3 values to contain RGB.
 #ifdef HAS_RGBATOYMATRIXROW_RVV
-#ifdef LIBYUV_RVV_HAS_TUPLE_TYPE
 static void RGBAToYMatrixRow_RVV(const uint8_t* src_rgba,
                                  uint8_t* dst_y,
                                  int width,
-                                 const struct RgbConstants* rgbconstants) {
+                                 const struct ArgbConstants* c) {
   assert(width != 0);
   size_t w = (size_t)width;
   vuint8m2_t v_by, v_gy, v_ry;  // vectors are to store RGBToY constant
   vuint16m4_t v_addy;           // vector is to store kAddY
   size_t vl = __riscv_vsetvl_e8m2(w);
-  v_by = __riscv_vmv_v_x_u8m2(rgbconstants->kRGBToY[0], vl);
-  v_gy = __riscv_vmv_v_x_u8m2(rgbconstants->kRGBToY[1], vl);
-  v_ry = __riscv_vmv_v_x_u8m2(rgbconstants->kRGBToY[2], vl);
-  v_addy = __riscv_vmv_v_x_u16m4(rgbconstants->kAddY, vl);
+  v_by = __riscv_vmv_v_x_u8m2(c->kRGBToY[0], vl);
+  v_gy = __riscv_vmv_v_x_u8m2(c->kRGBToY[1], vl);
+  v_ry = __riscv_vmv_v_x_u8m2(c->kRGBToY[2], vl);
+  v_addy = __riscv_vmv_v_x_u16m4(c->kAddY[0], vl);
   do {
     vuint8m2_t v_y;
     vuint16m4_t v_y_u16;
@@ -2202,72 +1306,40 @@ static void RGBAToYMatrixRow_RVV(const uint8_t* src_rgba,
     dst_y += vl;
   } while (w > 0);
 }
-#else
-static void RGBAToYMatrixRow_RVV(const uint8_t* src_rgba,
-                                 uint8_t* dst_y,
-                                 int width,
-                                 const struct RgbConstants* rgbconstants) {
-  assert(width != 0);
-  size_t w = (size_t)width;
-  vuint8m2_t v_by, v_gy, v_ry;  // vectors are to store RGBToY constant
-  vuint16m4_t v_addy;           // vector is to store kAddY
-  size_t vl = __riscv_vsetvl_e8m2(w);
-  v_by = __riscv_vmv_v_x_u8m2(rgbconstants->kRGBToY[0], vl);
-  v_gy = __riscv_vmv_v_x_u8m2(rgbconstants->kRGBToY[1], vl);
-  v_ry = __riscv_vmv_v_x_u8m2(rgbconstants->kRGBToY[2], vl);
-  v_addy = __riscv_vmv_v_x_u16m4(rgbconstants->kAddY, vl);
-  do {
-    vuint8m2_t v_b, v_g, v_r, v_a, v_y;
-    vuint16m4_t v_y_u16;
-    size_t vl = __riscv_vsetvl_e8m2(w);
-    __riscv_vlseg4e8_v_u8m2(&v_a, &v_b, &v_g, &v_r, src_rgba, vl);
-    v_y_u16 = __riscv_vwmulu_vv_u16m4(v_r, v_ry, vl);
-    v_y_u16 = __riscv_vwmaccu_vv_u16m4(v_y_u16, v_gy, v_g, vl);
-    v_y_u16 = __riscv_vwmaccu_vv_u16m4(v_y_u16, v_by, v_b, vl);
-    v_y_u16 = __riscv_vadd_vv_u16m4(v_y_u16, v_addy, vl);
-    v_y = __riscv_vnsrl_wx_u8m2(v_y_u16, 8, vl);
-    __riscv_vse8_v_u8m2(dst_y, v_y, vl);
-    w -= vl;
-    src_rgba += 4 * vl;
-    dst_y += vl;
-  } while (w > 0);
-}
-#endif
 #endif
 
 #ifdef HAS_RGBATOYROW_RVV
 void RGBAToYRow_RVV(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
-  RGBAToYMatrixRow_RVV(src_rgba, dst_y, width, &kRgb24I601Constants);
+  RGBAToYMatrixRow_RVV(src_rgba, dst_y, width, &kArgbI601Constants);
 }
 #endif
 
 #ifdef HAS_RGBATOYJROW_RVV
 void RGBAToYJRow_RVV(const uint8_t* src_rgba, uint8_t* dst_yj, int width) {
-  RGBAToYMatrixRow_RVV(src_rgba, dst_yj, width, &kRgb24JPEGConstants);
+  RGBAToYMatrixRow_RVV(src_rgba, dst_yj, width, &kArgbJPEGConstants);
 }
 #endif
 
 #ifdef HAS_BGRATOYROW_RVV
 void BGRAToYRow_RVV(const uint8_t* src_bgra, uint8_t* dst_y, int width) {
-  RGBAToYMatrixRow_RVV(src_bgra, dst_y, width, &kRawI601Constants);
+  RGBAToYMatrixRow_RVV(src_bgra, dst_y, width, &kAbgrI601Constants);
 }
 #endif
 
 #ifdef HAS_RGBTOYMATRIXROW_RVV
-#ifdef LIBYUV_RVV_HAS_TUPLE_TYPE
 static void RGBToYMatrixRow_RVV(const uint8_t* src_rgb,
                                 uint8_t* dst_y,
                                 int width,
-                                const struct RgbConstants* rgbconstants) {
+                                const struct ArgbConstants* c) {
   assert(width != 0);
   size_t w = (size_t)width;
   vuint8m2_t v_by, v_gy, v_ry;  // vectors are to store RGBToY constant
   vuint16m4_t v_addy;           // vector is to store kAddY
   size_t vl = __riscv_vsetvl_e8m2(w);
-  v_by = __riscv_vmv_v_x_u8m2(rgbconstants->kRGBToY[0], vl);
-  v_gy = __riscv_vmv_v_x_u8m2(rgbconstants->kRGBToY[1], vl);
-  v_ry = __riscv_vmv_v_x_u8m2(rgbconstants->kRGBToY[2], vl);
-  v_addy = __riscv_vmv_v_x_u16m4(rgbconstants->kAddY, vl);
+  v_by = __riscv_vmv_v_x_u8m2(c->kRGBToY[0], vl);
+  v_gy = __riscv_vmv_v_x_u8m2(c->kRGBToY[1], vl);
+  v_ry = __riscv_vmv_v_x_u8m2(c->kRGBToY[2], vl);
+  v_addy = __riscv_vmv_v_x_u16m4(c->kAddY[0], vl);
   do {
     vuint8m2_t v_y;
     vuint16m4_t v_y_u16;
@@ -2287,60 +1359,29 @@ static void RGBToYMatrixRow_RVV(const uint8_t* src_rgb,
     dst_y += vl;
   } while (w > 0);
 }
-#else
-static void RGBToYMatrixRow_RVV(const uint8_t* src_rgb,
-                                uint8_t* dst_y,
-                                int width,
-                                const struct RgbConstants* rgbconstants) {
-  assert(width != 0);
-  size_t w = (size_t)width;
-  vuint8m2_t v_by, v_gy, v_ry;  // vectors are to store RGBToY constant
-  vuint16m4_t v_addy;           // vector is to store kAddY
-  size_t vl = __riscv_vsetvl_e8m2(w);
-  v_by = __riscv_vmv_v_x_u8m2(rgbconstants->kRGBToY[0], vl);
-  v_gy = __riscv_vmv_v_x_u8m2(rgbconstants->kRGBToY[1], vl);
-  v_ry = __riscv_vmv_v_x_u8m2(rgbconstants->kRGBToY[2], vl);
-  v_addy = __riscv_vmv_v_x_u16m4(rgbconstants->kAddY, vl);
-  do {
-    vuint8m2_t v_b, v_g, v_r, v_y;
-    vuint16m4_t v_y_u16;
-    size_t vl = __riscv_vsetvl_e8m2(w);
-    __riscv_vlseg3e8_v_u8m2(&v_b, &v_g, &v_r, src_rgb, vl);
-    v_y_u16 = __riscv_vwmulu_vv_u16m4(v_r, v_ry, vl);
-    v_y_u16 = __riscv_vwmaccu_vv_u16m4(v_y_u16, v_gy, v_g, vl);
-    v_y_u16 = __riscv_vwmaccu_vv_u16m4(v_y_u16, v_by, v_b, vl);
-    v_y_u16 = __riscv_vadd_vv_u16m4(v_y_u16, v_addy, vl);
-    v_y = __riscv_vnsrl_wx_u8m2(v_y_u16, 8, vl);
-    __riscv_vse8_v_u8m2(dst_y, v_y, vl);
-    w -= vl;
-    src_rgb += 3 * vl;
-    dst_y += vl;
-  } while (w > 0);
-}
-#endif
 #endif
 
 #ifdef HAS_RGB24TOYJROW_RVV
 void RGB24ToYJRow_RVV(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) {
-  RGBToYMatrixRow_RVV(src_rgb24, dst_yj, width, &kRgb24JPEGConstants);
+  RGBToYMatrixRow_RVV(src_rgb24, dst_yj, width, &kArgbJPEGConstants);
 }
 #endif
 
 #ifdef HAS_RAWTOYJROW_RVV
 void RAWToYJRow_RVV(const uint8_t* src_raw, uint8_t* dst_yj, int width) {
-  RGBToYMatrixRow_RVV(src_raw, dst_yj, width, &kRawJPEGConstants);
+  RGBToYMatrixRow_RVV(src_raw, dst_yj, width, &kAbgrJPEGConstants);
 }
 #endif
 
 #ifdef HAS_RGB24TOYROW_RVV
 void RGB24ToYRow_RVV(const uint8_t* src_rgb24, uint8_t* dst_y, int width) {
-  RGBToYMatrixRow_RVV(src_rgb24, dst_y, width, &kRgb24I601Constants);
+  RGBToYMatrixRow_RVV(src_rgb24, dst_y, width, &kArgbI601Constants);
 }
 #endif
 
 #ifdef HAS_RAWTOYROW_RVV
 void RAWToYRow_RVV(const uint8_t* src_raw, uint8_t* dst_y, int width) {
-  RGBToYMatrixRow_RVV(src_raw, dst_y, width, &kRawI601Constants);
+  RGBToYMatrixRow_RVV(src_raw, dst_y, width, &kAbgrI601Constants);
 }
 #endif
 
@@ -2348,7 +1389,6 @@ void RAWToYRow_RVV(const uint8_t* src_raw, uint8_t* dst_y, int width) {
 // dst_argb may be src_argb or src_argb1.
 // src_argb: RGB values have already been pre-multiplied by the a.
 #ifdef HAS_ARGBBLENDROW_RVV
-#ifdef LIBYUV_RVV_HAS_TUPLE_TYPE
 void ARGBBlendRow_RVV(const uint8_t* src_argb,
                       const uint8_t* src_argb1,
                       uint8_t* dst_argb,
@@ -2395,48 +1435,6 @@ void ARGBBlendRow_RVV(const uint8_t* src_argb,
     dst_argb += 4 * vl;
   } while (w > 0);
 }
-#else
-void ARGBBlendRow_RVV(const uint8_t* src_argb,
-                      const uint8_t* src_argb1,
-                      uint8_t* dst_argb,
-                      int width) {
-  size_t w = (size_t)width;
-  size_t vl = __riscv_vsetvlmax_e8m2();
-  // clamp255((((256 - a) * b) >> 8) + f)
-  // = b * (256 - a) / 256 + f
-  // = b - (b * a / 256) + f
-  vuint8m2_t v_255 = __riscv_vmv_v_x_u8m2(255, vl);
-  do {
-    vuint8m2_t v_src0_b, v_src0_g, v_src0_r, v_src0_a;
-    vuint8m2_t v_src1_b, v_src1_g, v_src1_r, v_src1_a;
-    vuint8m2_t v_tmp_b, v_tmp_g, v_tmp_r;
-    vuint8m2_t v_dst_b, v_dst_g, v_dst_r;
-    vl = __riscv_vsetvl_e8m2(w);
-    __riscv_vlseg4e8_v_u8m2(&v_src0_b, &v_src0_g, &v_src0_r, &v_src0_a,
-                            src_argb, vl);
-    __riscv_vlseg4e8_v_u8m2(&v_src1_b, &v_src1_g, &v_src1_r, &v_src1_a,
-                            src_argb1, vl);
-
-    v_tmp_b = __riscv_vmulhu_vv_u8m2(v_src1_b, v_src0_a, vl);
-    v_tmp_g = __riscv_vmulhu_vv_u8m2(v_src1_g, v_src0_a, vl);
-    v_tmp_r = __riscv_vmulhu_vv_u8m2(v_src1_r, v_src0_a, vl);
-
-    v_dst_b = __riscv_vsub_vv_u8m2(v_src1_b, v_tmp_b, vl);
-    v_dst_g = __riscv_vsub_vv_u8m2(v_src1_g, v_tmp_g, vl);
-    v_dst_r = __riscv_vsub_vv_u8m2(v_src1_r, v_tmp_r, vl);
-
-    v_dst_b = __riscv_vsaddu_vv_u8m2(v_dst_b, v_src0_b, vl);
-    v_dst_g = __riscv_vsaddu_vv_u8m2(v_dst_g, v_src0_g, vl);
-    v_dst_r = __riscv_vsaddu_vv_u8m2(v_dst_r, v_src0_r, vl);
-    __riscv_vsseg4e8_v_u8m2(dst_argb, v_dst_b, v_dst_g, v_dst_r, v_255, vl);
-
-    w -= vl;
-    src_argb += 4 * vl;
-    src_argb1 += 4 * vl;
-    dst_argb += 4 * vl;
-  } while (w > 0);
-}
-#endif
 #endif
 
 #ifdef HAS_BLENDPLANEROW_RVV
@@ -2474,7 +1472,6 @@ void BlendPlaneRow_RVV(const uint8_t* src0,
 
 // Attenuate: (f * a + 255) >> 8
 #ifdef HAS_ARGBATTENUATEROW_RVV
-#ifdef LIBYUV_RVV_HAS_TUPLE_TYPE
 void ARGBAttenuateRow_RVV(const uint8_t* src_argb,
                           uint8_t* dst_argb,
                           int width) {
@@ -2508,39 +1505,9 @@ void ARGBAttenuateRow_RVV(const uint8_t* src_argb,
     dst_argb += vl * 4;
   } while (w > 0);
 }
-#else
-void ARGBAttenuateRow_RVV(const uint8_t* src_argb,
-                          uint8_t* dst_argb,
-                          int width) {
-  size_t w = (size_t)width;
-  do {
-    vuint8m2_t v_b, v_g, v_r, v_a;
-    vuint16m4_t v_ba_16, v_ga_16, v_ra_16;
-    size_t vl = __riscv_vsetvl_e8m2(w);
-    __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl);
-    // f * a
-    v_ba_16 = __riscv_vwmulu_vv_u16m4(v_b, v_a, vl);
-    v_ga_16 = __riscv_vwmulu_vv_u16m4(v_g, v_a, vl);
-    v_ra_16 = __riscv_vwmulu_vv_u16m4(v_r, v_a, vl);
-    // f * a + 255
-    v_ba_16 = __riscv_vadd_vx_u16m4(v_ba_16, 255u, vl);
-    v_ga_16 = __riscv_vadd_vx_u16m4(v_ga_16, 255u, vl);
-    v_ra_16 = __riscv_vadd_vx_u16m4(v_ra_16, 255u, vl);
-    // (f * a + 255) >> 8
-    v_b = __riscv_vnsrl_wx_u8m2(v_ba_16, 8, vl);
-    v_g = __riscv_vnsrl_wx_u8m2(v_ga_16, 8, vl);
-    v_r = __riscv_vnsrl_wx_u8m2(v_ra_16, 8, vl);
-    __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl);
-    w -= vl;
-    src_argb += vl * 4;
-    dst_argb += vl * 4;
-  } while (w > 0);
-}
-#endif
 #endif
 
 #ifdef HAS_ARGBEXTRACTALPHAROW_RVV
-#ifdef LIBYUV_RVV_HAS_TUPLE_TYPE
 void ARGBExtractAlphaRow_RVV(const uint8_t* src_argb,
                              uint8_t* dst_a,
                              int width) {
@@ -2555,22 +1522,6 @@ void ARGBExtractAlphaRow_RVV(const uint8_t* src_argb,
     dst_a += vl;
   } while (w > 0);
 }
-#else
-void ARGBExtractAlphaRow_RVV(const uint8_t* src_argb,
-                             uint8_t* dst_a,
-                             int width) {
-  size_t w = (size_t)width;
-  do {
-    size_t vl = __riscv_vsetvl_e8m2(w);
-    vuint8m2_t v_b, v_g, v_r, v_a;
-    __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl);
-    __riscv_vse8_v_u8m2(dst_a, v_a, vl);
-    w -= vl;
-    src_argb += vl * 4;
-    dst_a += vl;
-  } while (w > 0);
-}
-#endif
 #endif
 
 #ifdef HAS_ARGBCOPYYTOALPHAROW_RVV
@@ -2589,6 +1540,195 @@ void ARGBCopyYToAlphaRow_RVV(const uint8_t* src, uint8_t* dst, int width) {
 }
 #endif
 
+
+
+#ifdef HAS_ARGBTOUV444ROW_RVV
+void ARGBToUV444MatrixRow_RVV(const uint8_t* src_argb,
+                                     uint8_t* dst_u,
+                                     uint8_t* dst_v,
+                                     int width,
+                                     const struct ArgbConstants* c) {
+  size_t w = (size_t)width;
+  do {
+    size_t vl = __riscv_vsetvl_e8m2(w);
+    vuint8m2x4_t v_src_argb = __riscv_vlseg4e8_v_u8m2x4(src_argb, vl);
+    vuint8m2_t v_b = __riscv_vget_v_u8m2x4_u8m2(v_src_argb, 0);
+    vuint8m2_t v_g = __riscv_vget_v_u8m2x4_u8m2(v_src_argb, 1);
+    vuint8m2_t v_r = __riscv_vget_v_u8m2x4_u8m2(v_src_argb, 2);
+
+    vint16m4_t v_b_16 = __riscv_vreinterpret_v_u16m4_i16m4(__riscv_vwaddu_vx_u16m4(v_b, 0, vl));
+    vint16m4_t v_g_16 = __riscv_vreinterpret_v_u16m4_i16m4(__riscv_vwaddu_vx_u16m4(v_g, 0, vl));
+    vint16m4_t v_r_16 = __riscv_vreinterpret_v_u16m4_i16m4(__riscv_vwaddu_vx_u16m4(v_r, 0, vl));
+
+    vint16m4_t v_u_16 = __riscv_vmv_v_x_i16m4(c->kAddUV[0], vl);
+    v_u_16 = __riscv_vnmsac_vx_i16m4(v_u_16, c->kRGBToU[2], v_r_16, vl);
+    v_u_16 = __riscv_vnmsac_vx_i16m4(v_u_16, c->kRGBToU[1], v_g_16, vl);
+    v_u_16 = __riscv_vnmsac_vx_i16m4(v_u_16, c->kRGBToU[0], v_b_16, vl);
+    vuint8m2_t v_u = __riscv_vnsrl_wx_u8m2(__riscv_vreinterpret_v_i16m4_u16m4(v_u_16), 8, vl);
+
+    vint16m4_t v_v_16 = __riscv_vmv_v_x_i16m4(c->kAddUV[0], vl);
+    v_v_16 = __riscv_vnmsac_vx_i16m4(v_v_16, c->kRGBToV[2], v_r_16, vl);
+    v_v_16 = __riscv_vnmsac_vx_i16m4(v_v_16, c->kRGBToV[1], v_g_16, vl);
+    v_v_16 = __riscv_vnmsac_vx_i16m4(v_v_16, c->kRGBToV[0], v_b_16, vl);
+    vuint8m2_t v_v = __riscv_vnsrl_wx_u8m2(__riscv_vreinterpret_v_i16m4_u16m4(v_v_16), 8, vl);
+
+    __riscv_vse8_v_u8m2(dst_u, v_u, vl);
+    __riscv_vse8_v_u8m2(dst_v, v_v, vl);
+
+    w -= vl;
+    src_argb += 4 * vl;
+    dst_u += vl;
+    dst_v += vl;
+  } while (w > 0);
+}
+
+void ARGBToUV444Row_RVV(const uint8_t* src_argb,
+                        uint8_t* dst_u,
+                        uint8_t* dst_v,
+                        int width) {
+  ARGBToUV444MatrixRow_RVV(src_argb, dst_u, dst_v, width, &kArgbI601Constants);
+}
+
+void ARGBToUVJ444Row_RVV(const uint8_t* src_argb,
+                         uint8_t* dst_u,
+                         uint8_t* dst_v,
+                         int width) {
+  ARGBToUV444MatrixRow_RVV(src_argb, dst_u, dst_v, width, &kArgbJPEGConstants);
+}
+#endif
+
+
+#ifdef HAS_ARGBTOUVMATRIXROW_RVV
+void ARGBToUVMatrixRow_RVV(const uint8_t* src_argb,
+                           int src_stride_argb,
+                           uint8_t* dst_u,
+                           uint8_t* dst_v,
+                           int width,
+                           const struct ArgbConstants* c) {
+  const uint8_t* src_argb1 = src_argb + src_stride_argb;
+  size_t w = (size_t)(width / 2);
+  if (w > 0) {
+    do {
+      size_t vl = __riscv_vsetvl_e8m1(w);
+      vuint8m1x8_t v_src = __riscv_vlseg8e8_v_u8m1x8(src_argb, vl);
+      vuint8m1x8_t v_src1 = __riscv_vlseg8e8_v_u8m1x8(src_argb1, vl);
+
+      vuint8m1_t v_b0 = __riscv_vget_v_u8m1x8_u8m1(v_src, 0);
+      vuint8m1_t v_g0 = __riscv_vget_v_u8m1x8_u8m1(v_src, 1);
+      vuint8m1_t v_r0 = __riscv_vget_v_u8m1x8_u8m1(v_src, 2);
+      vuint8m1_t v_b1 = __riscv_vget_v_u8m1x8_u8m1(v_src, 4);
+      vuint8m1_t v_g1 = __riscv_vget_v_u8m1x8_u8m1(v_src, 5);
+      vuint8m1_t v_r1 = __riscv_vget_v_u8m1x8_u8m1(v_src, 6);
+
+      vuint8m1_t v_b0_1 = __riscv_vget_v_u8m1x8_u8m1(v_src1, 0);
+      vuint8m1_t v_g0_1 = __riscv_vget_v_u8m1x8_u8m1(v_src1, 1);
+      vuint8m1_t v_r0_1 = __riscv_vget_v_u8m1x8_u8m1(v_src1, 2);
+      vuint8m1_t v_b1_1 = __riscv_vget_v_u8m1x8_u8m1(v_src1, 4);
+      vuint8m1_t v_g1_1 = __riscv_vget_v_u8m1x8_u8m1(v_src1, 5);
+      vuint8m1_t v_r1_1 = __riscv_vget_v_u8m1x8_u8m1(v_src1, 6);
+
+      vuint16m2_t v_sum_b = __riscv_vwaddu_vv_u16m2(v_b0, v_b1, vl);
+      vuint16m2_t v_sum_b1 = __riscv_vwaddu_vv_u16m2(v_b0_1, v_b1_1, vl);
+      v_sum_b = __riscv_vadd_vv_u16m2(v_sum_b, v_sum_b1, vl);
+      vuint8m1_t v_ab = __riscv_vnclipu_wx_u8m1(v_sum_b, 2, __RISCV_VXRM_RNU, vl);
+
+      vuint16m2_t v_sum_g = __riscv_vwaddu_vv_u16m2(v_g0, v_g1, vl);
+      vuint16m2_t v_sum_g1 = __riscv_vwaddu_vv_u16m2(v_g0_1, v_g1_1, vl);
+      v_sum_g = __riscv_vadd_vv_u16m2(v_sum_g, v_sum_g1, vl);
+      vuint8m1_t v_ag = __riscv_vnclipu_wx_u8m1(v_sum_g, 2, __RISCV_VXRM_RNU, vl);
+
+      vuint16m2_t v_sum_r = __riscv_vwaddu_vv_u16m2(v_r0, v_r1, vl);
+      vuint16m2_t v_sum_r1 = __riscv_vwaddu_vv_u16m2(v_r0_1, v_r1_1, vl);
+      v_sum_r = __riscv_vadd_vv_u16m2(v_sum_r, v_sum_r1, vl);
+      vuint8m1_t v_ar = __riscv_vnclipu_wx_u8m1(v_sum_r, 2, __RISCV_VXRM_RNU, vl);
+
+      vint16m2_t v_b_16 = __riscv_vreinterpret_v_u16m2_i16m2(__riscv_vwaddu_vx_u16m2(v_ab, 0, vl));
+      vint16m2_t v_g_16 = __riscv_vreinterpret_v_u16m2_i16m2(__riscv_vwaddu_vx_u16m2(v_ag, 0, vl));
+      vint16m2_t v_r_16 = __riscv_vreinterpret_v_u16m2_i16m2(__riscv_vwaddu_vx_u16m2(v_ar, 0, vl));
+
+      vint16m2_t v_u_16 = __riscv_vmv_v_x_i16m2(c->kAddUV[0], vl);
+      v_u_16 = __riscv_vnmsac_vx_i16m2(v_u_16, c->kRGBToU[2], v_r_16, vl);
+      v_u_16 = __riscv_vnmsac_vx_i16m2(v_u_16, c->kRGBToU[1], v_g_16, vl);
+      v_u_16 = __riscv_vnmsac_vx_i16m2(v_u_16, c->kRGBToU[0], v_b_16, vl);
+      vuint8m1_t v_u = __riscv_vnsrl_wx_u8m1(__riscv_vreinterpret_v_i16m2_u16m2(v_u_16), 8, vl);
+
+      vint16m2_t v_v_16 = __riscv_vmv_v_x_i16m2(c->kAddUV[0], vl);
+      v_v_16 = __riscv_vnmsac_vx_i16m2(v_v_16, c->kRGBToV[2], v_r_16, vl);
+      v_v_16 = __riscv_vnmsac_vx_i16m2(v_v_16, c->kRGBToV[1], v_g_16, vl);
+      v_v_16 = __riscv_vnmsac_vx_i16m2(v_v_16, c->kRGBToV[0], v_b_16, vl);
+      vuint8m1_t v_v = __riscv_vnsrl_wx_u8m1(__riscv_vreinterpret_v_i16m2_u16m2(v_v_16), 8, vl);
+
+      __riscv_vse8_v_u8m1(dst_u, v_u, vl);
+      __riscv_vse8_v_u8m1(dst_v, v_v, vl);
+
+      w -= vl;
+      src_argb += 8 * vl;
+      src_argb1 += 8 * vl;
+      dst_u += vl;
+      dst_v += vl;
+    } while (w > 0);
+  }
+  if (width & 1) {
+    size_t vl = 1;
+    vuint8m1_t v_b0 = __riscv_vle8_v_u8m1(&src_argb[0], vl);
+    vuint8m1_t v_g0 = __riscv_vle8_v_u8m1(&src_argb[1], vl);
+    vuint8m1_t v_r0 = __riscv_vle8_v_u8m1(&src_argb[2], vl);
+
+    vuint8m1_t v_b0_1 = __riscv_vle8_v_u8m1(&src_argb1[0], vl);
+    vuint8m1_t v_g0_1 = __riscv_vle8_v_u8m1(&src_argb1[1], vl);
+    vuint8m1_t v_r0_1 = __riscv_vle8_v_u8m1(&src_argb1[2], vl);
+
+    vuint16m2_t v_sum_b = __riscv_vwaddu_vv_u16m2(v_b0, v_b0_1, vl);
+    vuint8m1_t v_ab = __riscv_vnclipu_wx_u8m1(v_sum_b, 1, __RISCV_VXRM_RNU, vl);
+
+    vuint16m2_t v_sum_g = __riscv_vwaddu_vv_u16m2(v_g0, v_g0_1, vl);
+    vuint8m1_t v_ag = __riscv_vnclipu_wx_u8m1(v_sum_g, 1, __RISCV_VXRM_RNU, vl);
+
+    vuint16m2_t v_sum_r = __riscv_vwaddu_vv_u16m2(v_r0, v_r0_1, vl);
+    vuint8m1_t v_ar = __riscv_vnclipu_wx_u8m1(v_sum_r, 1, __RISCV_VXRM_RNU, vl);
+
+    vint16m2_t v_b_16 = __riscv_vreinterpret_v_u16m2_i16m2(__riscv_vwaddu_vx_u16m2(v_ab, 0, vl));
+    vint16m2_t v_g_16 = __riscv_vreinterpret_v_u16m2_i16m2(__riscv_vwaddu_vx_u16m2(v_ag, 0, vl));
+    vint16m2_t v_r_16 = __riscv_vreinterpret_v_u16m2_i16m2(__riscv_vwaddu_vx_u16m2(v_ar, 0, vl));
+
+    vint16m2_t v_u_16 = __riscv_vmv_v_x_i16m2(c->kAddUV[0], vl);
+    v_u_16 = __riscv_vnmsac_vx_i16m2(v_u_16, c->kRGBToU[2], v_r_16, vl);
+    v_u_16 = __riscv_vnmsac_vx_i16m2(v_u_16, c->kRGBToU[1], v_g_16, vl);
+    v_u_16 = __riscv_vnmsac_vx_i16m2(v_u_16, c->kRGBToU[0], v_b_16, vl);
+    vuint8m1_t v_u = __riscv_vnsrl_wx_u8m1(__riscv_vreinterpret_v_i16m2_u16m2(v_u_16), 8, vl);
+
+    vint16m2_t v_v_16 = __riscv_vmv_v_x_i16m2(c->kAddUV[0], vl);
+    v_v_16 = __riscv_vnmsac_vx_i16m2(v_v_16, c->kRGBToV[2], v_r_16, vl);
+    v_v_16 = __riscv_vnmsac_vx_i16m2(v_v_16, c->kRGBToV[1], v_g_16, vl);
+    v_v_16 = __riscv_vnmsac_vx_i16m2(v_v_16, c->kRGBToV[0], v_b_16, vl);
+    vuint8m1_t v_v = __riscv_vnsrl_wx_u8m1(__riscv_vreinterpret_v_i16m2_u16m2(v_v_16), 8, vl);
+
+    __riscv_vse8_v_u8m1(dst_u, v_u, vl);
+    __riscv_vse8_v_u8m1(dst_v, v_v, vl);
+  }
+}
+#endif
+
+#ifdef HAS_ARGBTOUVROW_RVV
+void ARGBToUVRow_RVV(const uint8_t* src_argb,
+                     int src_stride_argb,
+                     uint8_t* dst_u,
+                     uint8_t* dst_v,
+                     int width) {
+  ARGBToUVMatrixRow_RVV(src_argb, src_stride_argb, dst_u, dst_v, width,
+                        &kArgbI601Constants);
+}
+
+void ARGBToUVJRow_RVV(const uint8_t* src_argb,
+                      int src_stride_argb,
+                      uint8_t* dst_u,
+                      uint8_t* dst_v,
+                      int width) {
+  ARGBToUVMatrixRow_RVV(src_argb, src_stride_argb, dst_u, dst_v, width,
+                        &kArgbJPEGConstants);
+}
+#endif
+
 #ifdef __cplusplus
 }  // extern "C"
 }  // namespace libyuv
diff --git a/source/scale_rvv.cc b/source/scale_rvv.cc
index 60310e319..3260f24d5 100644
--- a/source/scale_rvv.cc
+++ b/source/scale_rvv.cc
@@ -116,7 +116,7 @@ void ScaleARGBRowDown2_RVV(const uint8_t* src_argb,
 #endif
 
 #ifdef HAS_SCALEARGBROWDOWN2LINEAR_RVV
-#if defined(LIBYUV_RVV_HAS_TUPLE_TYPE) && defined(LIBYUV_RVV_HAS_VXRM_ARG)
+#ifdef LIBYUV_RVV_HAS_VXRM_ARG
 void ScaleARGBRowDown2Linear_RVV(const uint8_t* src_argb,
                                  ptrdiff_t src_stride,
                                  uint8_t* dst_argb,
@@ -139,37 +139,11 @@ void ScaleARGBRowDown2Linear_RVV(const uint8_t* src_argb,
     dst_argb += vl * 4;
   } while (w > 0);
 }
-#else
-void ScaleARGBRowDown2Linear_RVV(const uint8_t* src_argb,
-                                 ptrdiff_t src_stride,
-                                 uint8_t* dst_argb,
-                                 int dst_width) {
-  (void)src_stride;
-  size_t w = (size_t)dst_width;
-  const uint32_t* src = (const uint32_t*)(src_argb);
-  // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode
-  // register) is set to round-to-nearest-up mode(0).
-  asm volatile("csrwi vxrm, 0");
-  do {
-    vuint8m4_t v_odd, v_even, v_dst;
-    vuint32m4_t v_odd_32, v_even_32;
-    size_t vl = __riscv_vsetvl_e32m4(w);
-    __riscv_vlseg2e32_v_u32m4(&v_even_32, &v_odd_32, src, vl);
-    v_even = __riscv_vreinterpret_v_u32m4_u8m4(v_even_32);
-    v_odd = __riscv_vreinterpret_v_u32m4_u8m4(v_odd_32);
-    // Use round-to-nearest-up mode for averaging add
-    v_dst = __riscv_vaaddu_vv_u8m4(v_even, v_odd, vl * 4);
-    __riscv_vse8_v_u8m4(dst_argb, v_dst, vl * 4);
-    w -= vl;
-    src += vl * 2;
-    dst_argb += vl * 4;
-  } while (w > 0);
-}
 #endif
 #endif
 
 #ifdef HAS_SCALEARGBROWDOWN2BOX_RVV
-#if defined(LIBYUV_RVV_HAS_TUPLE_TYPE) && defined(LIBYUV_RVV_HAS_VXRM_ARG)
+#ifdef LIBYUV_RVV_HAS_VXRM_ARG
 void ScaleARGBRowDown2Box_RVV(const uint8_t* src_argb,
                               ptrdiff_t src_stride,
                               uint8_t* dst_argb,
@@ -204,40 +178,6 @@ void ScaleARGBRowDown2Box_RVV(const uint8_t* src_argb,
     dst_argb += vl * 4;
   } while (w > 0);
 }
-#else
-void ScaleARGBRowDown2Box_RVV(const uint8_t* src_argb,
-                              ptrdiff_t src_stride,
-                              uint8_t* dst_argb,
-                              int dst_width) {
-  size_t w = (size_t)dst_width;
-  const uint32_t* src0 = (const uint32_t*)(src_argb);
-  const uint32_t* src1 = (const uint32_t*)(src_argb + src_stride);
-  // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode
-  // register) is set to round-to-nearest-up mode(0).
-  asm volatile("csrwi vxrm, 0");
-  do {
-    vuint8m4_t v_row0_odd, v_row0_even, v_row1_odd, v_row1_even, v_dst;
-    vuint16m8_t v_row0_sum, v_row1_sum, v_dst_16;
-    vuint32m4_t v_row0_odd_32, v_row0_even_32, v_row1_odd_32, v_row1_even_32;
-    size_t vl = __riscv_vsetvl_e32m4(w);
-    __riscv_vlseg2e32_v_u32m4(&v_row0_even_32, &v_row0_odd_32, src0, vl);
-    __riscv_vlseg2e32_v_u32m4(&v_row1_even_32, &v_row1_odd_32, src1, vl);
-    v_row0_even = __riscv_vreinterpret_v_u32m4_u8m4(v_row0_even_32);
-    v_row0_odd = __riscv_vreinterpret_v_u32m4_u8m4(v_row0_odd_32);
-    v_row1_even = __riscv_vreinterpret_v_u32m4_u8m4(v_row1_even_32);
-    v_row1_odd = __riscv_vreinterpret_v_u32m4_u8m4(v_row1_odd_32);
-    v_row0_sum = __riscv_vwaddu_vv_u16m8(v_row0_even, v_row0_odd, vl * 4);
-    v_row1_sum = __riscv_vwaddu_vv_u16m8(v_row1_even, v_row1_odd, vl * 4);
-    v_dst_16 = __riscv_vadd_vv_u16m8(v_row0_sum, v_row1_sum, vl * 4);
-    // Use round-to-nearest-up mode for vnclip
-    v_dst = __riscv_vnclipu_wx_u8m4(v_dst_16, 2, vl * 4);
-    __riscv_vse8_v_u8m4(dst_argb, v_dst, vl * 4);
-    w -= vl;
-    src0 += vl * 2;
-    src1 += vl * 2;
-    dst_argb += vl * 4;
-  } while (w > 0);
-}
 #endif
 #endif
 
@@ -263,7 +203,7 @@ void ScaleARGBRowDownEven_RVV(const uint8_t* src_argb,
 #endif
 
 #ifdef HAS_SCALEARGBROWDOWNEVENBOX_RVV
-#if defined(LIBYUV_RVV_HAS_TUPLE_TYPE) && defined(LIBYUV_RVV_HAS_VXRM_ARG)
+#ifdef LIBYUV_RVV_HAS_VXRM_ARG
 void ScaleARGBRowDownEvenBox_RVV(const uint8_t* src_argb,
                                  ptrdiff_t src_stride,
                                  int src_stepx,
@@ -299,44 +239,6 @@ void ScaleARGBRowDownEvenBox_RVV(const uint8_t* src_argb,
     dst_argb += vl * 4;
   } while (w > 0);
 }
-#else
-void ScaleARGBRowDownEvenBox_RVV(const uint8_t* src_argb,
-                                 ptrdiff_t src_stride,
-                                 int src_stepx,
-                                 uint8_t* dst_argb,
-                                 int dst_width) {
-  size_t w = (size_t)dst_width;
-  const uint32_t* src0 = (const uint32_t*)(src_argb);
-  const uint32_t* src1 = (const uint32_t*)(src_argb + src_stride);
-  const int stride_byte = src_stepx * 4;
-  // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode
-  // register) is set to round-to-nearest-up mode(0).
-  asm volatile("csrwi vxrm, 0");
-  do {
-    vuint8m4_t v_row0_low, v_row0_high, v_row1_low, v_row1_high, v_dst;
-    vuint16m8_t v_row0_sum, v_row1_sum, v_sum;
-    vuint32m4_t v_row0_low_32, v_row0_high_32, v_row1_low_32, v_row1_high_32;
-    size_t vl = __riscv_vsetvl_e32m4(w);
-    __riscv_vlsseg2e32_v_u32m4(&v_row0_low_32, &v_row0_high_32, src0,
-                               stride_byte, vl);
-    __riscv_vlsseg2e32_v_u32m4(&v_row1_low_32, &v_row1_high_32, src1,
-                               stride_byte, vl);
-    v_row0_low = __riscv_vreinterpret_v_u32m4_u8m4(v_row0_low_32);
-    v_row0_high = __riscv_vreinterpret_v_u32m4_u8m4(v_row0_high_32);
-    v_row1_low = __riscv_vreinterpret_v_u32m4_u8m4(v_row1_low_32);
-    v_row1_high = __riscv_vreinterpret_v_u32m4_u8m4(v_row1_high_32);
-    v_row0_sum = __riscv_vwaddu_vv_u16m8(v_row0_low, v_row0_high, vl * 4);
-    v_row1_sum = __riscv_vwaddu_vv_u16m8(v_row1_low, v_row1_high, vl * 4);
-    v_sum = __riscv_vadd_vv_u16m8(v_row0_sum, v_row1_sum, vl * 4);
-    // Use round-to-nearest-up mode for vnclip
-    v_dst = __riscv_vnclipu_wx_u8m4(v_sum, 2, vl * 4);
-    __riscv_vse8_v_u8m4(dst_argb, v_dst, vl * 4);
-    w -= vl;
-    src0 += vl * src_stepx;
-    src1 += vl * src_stepx;
-    dst_argb += vl * 4;
-  } while (w > 0);
-}
 #endif
 #endif
 
@@ -361,7 +263,7 @@ void ScaleRowDown2_RVV(const uint8_t* src_ptr,
 #endif
 
 #ifdef HAS_SCALEROWDOWN2LINEAR_RVV
-#if defined(LIBYUV_RVV_HAS_TUPLE_TYPE) && defined(LIBYUV_RVV_HAS_VXRM_ARG)
+#ifdef LIBYUV_RVV_HAS_VXRM_ARG
 void ScaleRowDown2Linear_RVV(const uint8_t* src_ptr,
                              ptrdiff_t src_stride,
                              uint8_t* dst,
@@ -380,33 +282,11 @@ void ScaleRowDown2Linear_RVV(const uint8_t* src_ptr,
     dst += vl;
   } while (w > 0);
 }
-#else
-void ScaleRowDown2Linear_RVV(const uint8_t* src_ptr,
-                             ptrdiff_t src_stride,
-                             uint8_t* dst,
-                             int dst_width) {
-  size_t w = (size_t)dst_width;
-  (void)src_stride;
-  // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode
-  // register) is set to round-to-nearest-up mode(0).
-  asm volatile("csrwi vxrm, 0");
-  do {
-    vuint8m4_t v_s0, v_s1, v_dst;
-    size_t vl = __riscv_vsetvl_e8m4(w);
-    __riscv_vlseg2e8_v_u8m4(&v_s0, &v_s1, src_ptr, vl);
-    // Use round-to-nearest-up mode for averaging add
-    v_dst = __riscv_vaaddu_vv_u8m4(v_s0, v_s1, vl);
-    __riscv_vse8_v_u8m4(dst, v_dst, vl);
-    w -= vl;
-    src_ptr += 2 * vl;
-    dst += vl;
-  } while (w > 0);
-}
 #endif
 #endif
 
 #ifdef HAS_SCALEROWDOWN2BOX_RVV
-#if defined(LIBYUV_RVV_HAS_TUPLE_TYPE) && defined(LIBYUV_RVV_HAS_VXRM_ARG)
+#ifdef LIBYUV_RVV_HAS_VXRM_ARG
 void ScaleRowDown2Box_RVV(const uint8_t* src_ptr,
                           ptrdiff_t src_stride,
                           uint8_t* dst,
@@ -434,41 +314,10 @@ void ScaleRowDown2Box_RVV(const uint8_t* src_ptr,
     dst += vl;
   } while (w > 0);
 }
-#else
-void ScaleRowDown2Box_RVV(const uint8_t* src_ptr,
-                          ptrdiff_t src_stride,
-                          uint8_t* dst,
-                          int dst_width) {
-  const uint8_t* s = src_ptr;
-  const uint8_t* t = src_ptr + src_stride;
-  size_t w = (size_t)dst_width;
-  // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode
-  // register) is set to round-to-nearest-up mode(0).
-  asm volatile("csrwi vxrm, 0");
-  do {
-    size_t vl = __riscv_vsetvl_e8m4(w);
-    vuint8m4_t v_s0, v_s1, v_t0, v_t1;
-    vuint16m8_t v_s01, v_t01, v_st01;
-    vuint8m4_t v_dst;
-    __riscv_vlseg2e8_v_u8m4(&v_s0, &v_s1, s, vl);
-    __riscv_vlseg2e8_v_u8m4(&v_t0, &v_t1, t, vl);
-    v_s01 = __riscv_vwaddu_vv_u16m8(v_s0, v_s1, vl);
-    v_t01 = __riscv_vwaddu_vv_u16m8(v_t0, v_t1, vl);
-    v_st01 = __riscv_vadd_vv_u16m8(v_s01, v_t01, vl);
-    // Use round-to-nearest-up mode for vnclip
-    v_dst = __riscv_vnclipu_wx_u8m4(v_st01, 2, vl);
-    __riscv_vse8_v_u8m4(dst, v_dst, vl);
-    w -= vl;
-    s += 2 * vl;
-    t += 2 * vl;
-    dst += vl;
-  } while (w > 0);
-}
 #endif
 #endif
 
 #ifdef HAS_SCALEROWDOWN4_RVV
-#ifdef LIBYUV_RVV_HAS_TUPLE_TYPE
 void ScaleRowDown4_RVV(const uint8_t* src_ptr,
                        ptrdiff_t src_stride,
                        uint8_t* dst_ptr,
@@ -485,28 +334,10 @@ void ScaleRowDown4_RVV(const uint8_t* src_ptr,
     dst_ptr += vl;
   } while (w > 0);
 }
-#else
-void ScaleRowDown4_RVV(const uint8_t* src_ptr,
-                       ptrdiff_t src_stride,
-                       uint8_t* dst_ptr,
-                       int dst_width) {
-  size_t w = (size_t)dst_width;
-  (void)src_stride;
-  do {
-    size_t vl = __riscv_vsetvl_e8m2(w);
-    vuint8m2_t v_s0, v_s1, v_s2, v_s3;
-    __riscv_vlseg4e8_v_u8m2(&v_s0, &v_s1, &v_s2, &v_s3, src_ptr, vl);
-    __riscv_vse8_v_u8m2(dst_ptr, v_s2, vl);
-    w -= vl;
-    src_ptr += (4 * vl);
-    dst_ptr += vl;
-  } while (w > 0);
-}
-#endif
 #endif
 
 #ifdef HAS_SCALEROWDOWN4BOX_RVV
-#if defined(LIBYUV_RVV_HAS_TUPLE_TYPE) && defined(LIBYUV_RVV_HAS_VXRM_ARG)
+#ifdef LIBYUV_RVV_HAS_VXRM_ARG
 void ScaleRowDown4Box_RVV(const uint8_t* src_ptr,
                           ptrdiff_t src_stride,
                           uint8_t* dst_ptr,
@@ -566,72 +397,10 @@ void ScaleRowDown4Box_RVV(const uint8_t* src_ptr,
     dst_ptr += vl;
   } while (w > 0);
 }
-#else
-void ScaleRowDown4Box_RVV(const uint8_t* src_ptr,
-                          ptrdiff_t src_stride,
-                          uint8_t* dst_ptr,
-                          int dst_width) {
-  const uint8_t* src_ptr1 = src_ptr + src_stride;
-  const uint8_t* src_ptr2 = src_ptr + src_stride * 2;
-  const uint8_t* src_ptr3 = src_ptr + src_stride * 3;
-  size_t w = (size_t)dst_width;
-  // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode
-  // register) is set to round-to-nearest-up mode(0).
-  asm volatile("csrwi vxrm, 0");
-  do {
-    vuint8m2_t v_s0, v_s1, v_s2, v_s3;
-    vuint8m2_t v_t0, v_t1, v_t2, v_t3;
-    vuint8m2_t v_u0, v_u1, v_u2, v_u3;
-    vuint8m2_t v_v0, v_v1, v_v2, v_v3;
-    vuint16m4_t v_s01, v_s23, v_t01, v_t23;
-    vuint16m4_t v_u01, v_u23, v_v01, v_v23;
-    vuint16m4_t v_st01, v_st23, v_uv01, v_uv23;
-    vuint16m4_t v_st0123, v_uv0123, v_stuv0123;
-    vuint8m2_t v_dst;
-    size_t vl = __riscv_vsetvl_e8m2(w);
-
-    __riscv_vlseg4e8_v_u8m2(&v_s0, &v_s1, &v_s2, &v_s3, src_ptr, vl);
-    v_s01 = __riscv_vwaddu_vv_u16m4(v_s0, v_s1, vl);
-
-    __riscv_vlseg4e8_v_u8m2(&v_t0, &v_t1, &v_t2, &v_t3, src_ptr1, vl);
-    v_t01 = __riscv_vwaddu_vv_u16m4(v_t0, v_t1, vl);
-
-    __riscv_vlseg4e8_v_u8m2(&v_u0, &v_u1, &v_u2, &v_u3, src_ptr2, vl);
-    v_u01 = __riscv_vwaddu_vv_u16m4(v_u0, v_u1, vl);
-    v_u23 = __riscv_vwaddu_vv_u16m4(v_u2, v_u3, vl);
-
-    v_s23 = __riscv_vwaddu_vv_u16m4(v_s2, v_s3, vl);
-    v_t23 = __riscv_vwaddu_vv_u16m4(v_t2, v_t3, vl);
-    v_st01 = __riscv_vadd_vv_u16m4(v_s01, v_t01, vl);
-    v_st23 = __riscv_vadd_vv_u16m4(v_s23, v_t23, vl);
-
-    __riscv_vlseg4e8_v_u8m2(&v_v0, &v_v1, &v_v2, &v_v3, src_ptr3, vl);
-
-    v_v01 = __riscv_vwaddu_vv_u16m4(v_v0, v_v1, vl);
-    v_v23 = __riscv_vwaddu_vv_u16m4(v_v2, v_v3, vl);
-
-    v_uv01 = __riscv_vadd_vv_u16m4(v_u01, v_v01, vl);
-    v_uv23 = __riscv_vadd_vv_u16m4(v_u23, v_v23, vl);
-
-    v_st0123 = __riscv_vadd_vv_u16m4(v_st01, v_st23, vl);
-    v_uv0123 = __riscv_vadd_vv_u16m4(v_uv01, v_uv23, vl);
-    v_stuv0123 = __riscv_vadd_vv_u16m4(v_st0123, v_uv0123, vl);
-    // Use round-to-nearest-up mode for vnclip
-    v_dst = __riscv_vnclipu_wx_u8m2(v_stuv0123, 4, vl);
-    __riscv_vse8_v_u8m2(dst_ptr, v_dst, vl);
-    w -= vl;
-    src_ptr += 4 * vl;
-    src_ptr1 += 4 * vl;
-    src_ptr2 += 4 * vl;
-    src_ptr3 += 4 * vl;
-    dst_ptr += vl;
-  } while (w > 0);
-}
 #endif
 #endif
 
 #ifdef HAS_SCALEROWDOWN34_RVV
-#ifdef LIBYUV_RVV_HAS_TUPLE_TYPE
 void ScaleRowDown34_RVV(const uint8_t* src_ptr,
                         ptrdiff_t src_stride,
                         uint8_t* dst_ptr,
@@ -650,27 +419,10 @@ void ScaleRowDown34_RVV(const uint8_t* src_ptr,
     dst_ptr += 3 * vl;
   } while (w > 0);
 }
-#else
-void ScaleRowDown34_RVV(const uint8_t* src_ptr,
-                        ptrdiff_t src_stride,
-                        uint8_t* dst_ptr,
-                        int dst_width) {
-  size_t w = (size_t)dst_width / 3u;
-  do {
-    size_t vl = __riscv_vsetvl_e8m2(w);
-    vuint8m2_t v_s0, v_s1, v_s2, v_s3;
-    __riscv_vlseg4e8_v_u8m2(&v_s0, &v_s1, &v_s2, &v_s3, src_ptr, vl);
-    __riscv_vsseg3e8_v_u8m2(dst_ptr, v_s0, v_s1, v_s3, vl);
-    w -= vl;
-    src_ptr += 4 * vl;
-    dst_ptr += 3 * vl;
-  } while (w > 0);
-}
-#endif
 #endif
 
 #ifdef HAS_SCALEROWDOWN34_0_BOX_RVV
-#if defined(LIBYUV_RVV_HAS_TUPLE_TYPE) && defined(LIBYUV_RVV_HAS_VXRM_ARG)
+#ifdef LIBYUV_RVV_HAS_VXRM_ARG
 void ScaleRowDown34_0_Box_RVV(const uint8_t* src_ptr,
                               ptrdiff_t src_stride,
                               uint8_t* dst_ptr,
@@ -737,77 +489,11 @@ void ScaleRowDown34_0_Box_RVV(const uint8_t* src_ptr,
     dst_ptr += 3 * vl;
   } while (w > 0);
 }
-#else
-void ScaleRowDown34_0_Box_RVV(const uint8_t* src_ptr,
-                              ptrdiff_t src_stride,
-                              uint8_t* dst_ptr,
-                              int dst_width) {
-  size_t w = (size_t)dst_width / 3u;
-  const uint8_t* s = src_ptr;
-  const uint8_t* t = src_ptr + src_stride;
-  // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode
-  // register) is set to round-to-nearest-up mode(0).
-  asm volatile("csrwi vxrm, 0");
-  do {
-    vuint8m2_t v_s0, v_s1, v_s2, v_s3;
-    vuint16m4_t v_t0_u16, v_t1_u16, v_t2_u16, v_t3_u16;
-    vuint8m2_t v_u0, v_u1, v_u2, v_u3;
-    vuint16m4_t v_u1_u16;
-    vuint8m2_t v_a0, v_a1, v_a2;
-    size_t vl = __riscv_vsetvl_e8m2(w);
-    __riscv_vlseg4e8_v_u8m2(&v_s0, &v_s1, &v_s2, &v_s3, s, vl);
-
-    if (src_stride == 0) {
-      v_t0_u16 = __riscv_vwaddu_vx_u16m4(v_s0, 2, vl);
-      v_t1_u16 = __riscv_vwaddu_vx_u16m4(v_s1, 2, vl);
-      v_t2_u16 = __riscv_vwaddu_vx_u16m4(v_s2, 2, vl);
-      v_t3_u16 = __riscv_vwaddu_vx_u16m4(v_s3, 2, vl);
-    } else {
-      vuint8m2_t v_t0, v_t1, v_t2, v_t3;
-      __riscv_vlseg4e8_v_u8m2(&v_t0, &v_t1, &v_t2, &v_t3, t, vl);
-      v_t0_u16 = __riscv_vwaddu_vx_u16m4(v_t0, 0, vl);
-      v_t1_u16 = __riscv_vwaddu_vx_u16m4(v_t1, 0, vl);
-      v_t2_u16 = __riscv_vwaddu_vx_u16m4(v_t2, 0, vl);
-      v_t3_u16 = __riscv_vwaddu_vx_u16m4(v_t3, 0, vl);
-      t += 4 * vl;
-    }
-
-    v_t0_u16 = __riscv_vwmaccu_vx_u16m4(v_t0_u16, 3, v_s0, vl);
-    v_t1_u16 = __riscv_vwmaccu_vx_u16m4(v_t1_u16, 3, v_s1, vl);
-    v_t2_u16 = __riscv_vwmaccu_vx_u16m4(v_t2_u16, 3, v_s2, vl);
-    v_t3_u16 = __riscv_vwmaccu_vx_u16m4(v_t3_u16, 3, v_s3, vl);
-
-    // Use round-to-nearest-up mode for vnclip & averaging add
-    v_u0 = __riscv_vnclipu_wx_u8m2(v_t0_u16, 2, vl);
-    v_u1 = __riscv_vnclipu_wx_u8m2(v_t1_u16, 2, vl);
-    v_u2 = __riscv_vnclipu_wx_u8m2(v_t2_u16, 2, vl);
-    v_u3 = __riscv_vnclipu_wx_u8m2(v_t3_u16, 2, vl);
-
-    // a0 = (src[0] * 3 + s[1] * 1 + 2) >> 2
-    v_u1_u16 = __riscv_vwaddu_vx_u16m4(v_u1, 0, vl);
-    v_u1_u16 = __riscv_vwmaccu_vx_u16m4(v_u1_u16, 3, v_u0, vl);
-    v_a0 = __riscv_vnclipu_wx_u8m2(v_u1_u16, 2, vl);
-
-    // a1 = (src[1] * 1 + s[2] * 1 + 1) >> 1
-    v_a1 = __riscv_vaaddu_vv_u8m2(v_u1, v_u2, vl);
-
-    // a2 = (src[2] * 1 + s[3] * 3 + 2) >> 2
-    v_u1_u16 = __riscv_vwaddu_vx_u16m4(v_u2, 0, vl);
-    v_u1_u16 = __riscv_vwmaccu_vx_u16m4(v_u1_u16, 3, v_u3, vl);
-    v_a2 = __riscv_vnclipu_wx_u8m2(v_u1_u16, 2, vl);
-
-    __riscv_vsseg3e8_v_u8m2(dst_ptr, v_a0, v_a1, v_a2, vl);
-
-    w -= vl;
-    s += 4 * vl;
-    dst_ptr += 3 * vl;
-  } while (w > 0);
-}
 #endif
 #endif
 
 #ifdef HAS_SCALEROWDOWN34_1_BOX_RVV
-#if defined(LIBYUV_RVV_HAS_TUPLE_TYPE) && defined(LIBYUV_RVV_HAS_VXRM_ARG)
+#ifdef LIBYUV_RVV_HAS_VXRM_ARG
 void ScaleRowDown34_1_Box_RVV(const uint8_t* src_ptr,
                               ptrdiff_t src_stride,
                               uint8_t* dst_ptr,
@@ -866,65 +552,10 @@ void ScaleRowDown34_1_Box_RVV(const uint8_t* src_ptr,
     dst_ptr += 3 * vl;
   } while (w > 0);
 }
-#else
-void ScaleRowDown34_1_Box_RVV(const uint8_t* src_ptr,
-                              ptrdiff_t src_stride,
-                              uint8_t* dst_ptr,
-                              int dst_width) {
-  size_t w = (size_t)dst_width / 3u;
-  const uint8_t* s = src_ptr;
-  const uint8_t* t = src_ptr + src_stride;
-  // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode
-  // register) is set to round-to-nearest-up mode(0).
-  asm volatile("csrwi vxrm, 0");
-  do {
-    vuint8m2_t v_s0, v_s1, v_s2, v_s3;
-    vuint8m2_t v_ave0, v_ave1, v_ave2, v_ave3;
-    vuint16m4_t v_u1_u16;
-    vuint8m2_t v_a0, v_a1, v_a2;
-    size_t vl = __riscv_vsetvl_e8m2(w);
-    __riscv_vlseg4e8_v_u8m2(&v_s0, &v_s1, &v_s2, &v_s3, s, vl);
-
-    // Use round-to-nearest-up mode for vnclip & averaging add
-    if (src_stride == 0) {
-      v_ave0 = __riscv_vaaddu_vv_u8m2(v_s0, v_s0, vl);
-      v_ave1 = __riscv_vaaddu_vv_u8m2(v_s1, v_s1, vl);
-      v_ave2 = __riscv_vaaddu_vv_u8m2(v_s2, v_s2, vl);
-      v_ave3 = __riscv_vaaddu_vv_u8m2(v_s3, v_s3, vl);
-    } else {
-      vuint8m2_t v_t0, v_t1, v_t2, v_t3;
-      __riscv_vlseg4e8_v_u8m2(&v_t0, &v_t1, &v_t2, &v_t3, t, vl);
-      v_ave0 = __riscv_vaaddu_vv_u8m2(v_s0, v_t0, vl);
-      v_ave1 = __riscv_vaaddu_vv_u8m2(v_s1, v_t1, vl);
-      v_ave2 = __riscv_vaaddu_vv_u8m2(v_s2, v_t2, vl);
-      v_ave3 = __riscv_vaaddu_vv_u8m2(v_s3, v_t3, vl);
-      t += 4 * vl;
-    }
-    // a0 = (src[0] * 3 + s[1] * 1 + 2) >> 2
-    v_u1_u16 = __riscv_vwaddu_vx_u16m4(v_ave1, 0, vl);
-    v_u1_u16 = __riscv_vwmaccu_vx_u16m4(v_u1_u16, 3, v_ave0, vl);
-    v_a0 = __riscv_vnclipu_wx_u8m2(v_u1_u16, 2, vl);
-
-    // a1 = (src[1] * 1 + s[2] * 1 + 1) >> 1
-    v_a1 = __riscv_vaaddu_vv_u8m2(v_ave1, v_ave2, vl);
-
-    // a2 = (src[2] * 1 + s[3] * 3 + 2) >> 2
-    v_u1_u16 = __riscv_vwaddu_vx_u16m4(v_ave2, 0, vl);
-    v_u1_u16 = __riscv_vwmaccu_vx_u16m4(v_u1_u16, 3, v_ave3, vl);
-    v_a2 = __riscv_vnclipu_wx_u8m2(v_u1_u16, 2, vl);
-
-    __riscv_vsseg3e8_v_u8m2(dst_ptr, v_a0, v_a1, v_a2, vl);
-
-    w -= vl;
-    s += 4 * vl;
-    dst_ptr += 3 * vl;
-  } while (w > 0);
-}
 #endif
 #endif
 
 #ifdef HAS_SCALEROWDOWN38_RVV
-#ifdef LIBYUV_RVV_HAS_TUPLE_TYPE
 void ScaleRowDown38_RVV(const uint8_t* src_ptr,
                         ptrdiff_t src_stride,
                         uint8_t* dst_ptr,
@@ -945,30 +576,9 @@ void ScaleRowDown38_RVV(const uint8_t* src_ptr,
     dst_ptr += 3 * vl;
   } while (w > 0);
 }
-#else
-void ScaleRowDown38_RVV(const uint8_t* src_ptr,
-                        ptrdiff_t src_stride,
-                        uint8_t* dst_ptr,
-                        int dst_width) {
-  size_t w = (size_t)dst_width / 3u;
-  (void)src_stride;
-  assert(dst_width % 3 == 0);
-  do {
-    vuint8m1_t v_s0, v_s1, v_s2, v_s3, v_s4, v_s5, v_s6, v_s7;
-    size_t vl = __riscv_vsetvl_e8m1(w);
-    __riscv_vlseg8e8_v_u8m1(&v_s0, &v_s1, &v_s2, &v_s3, &v_s4, &v_s5, &v_s6,
-                            &v_s7, src_ptr, vl);
-    __riscv_vsseg3e8_v_u8m1(dst_ptr, v_s0, v_s3, v_s6, vl);
-    w -= vl;
-    src_ptr += 8 * vl;
-    dst_ptr += 3 * vl;
-  } while (w > 0);
-}
-#endif
 #endif
 
 #ifdef HAS_SCALEROWDOWN38_2_BOX_RVV
-#ifdef LIBYUV_RVV_HAS_TUPLE_TYPE
 void ScaleRowDown38_2_Box_RVV(const uint8_t* src_ptr,
                               ptrdiff_t src_stride,
                               uint8_t* dst_ptr,
@@ -1035,67 +645,9 @@ void ScaleRowDown38_2_Box_RVV(const uint8_t* src_ptr,
     dst_ptr += 3 * vl;
   } while (w > 0);
 }
-#else
-void ScaleRowDown38_2_Box_RVV(const uint8_t* src_ptr,
-                              ptrdiff_t src_stride,
-                              uint8_t* dst_ptr,
-                              int dst_width) {
-  size_t w = (size_t)dst_width / 3u;
-  const uint16_t coeff_a = (65536u / 6u);
-  const uint16_t coeff_b = (65536u / 4u);
-  assert((dst_width % 3 == 0) && (dst_width > 0));
-  do {
-    vuint8m1_t v_s0, v_s1, v_s2, v_s3, v_s4, v_s5, v_s6, v_s7;
-    vuint8m1_t v_t0, v_t1, v_t2, v_t3, v_t4, v_t5, v_t6, v_t7;
-    vuint16m2_t v_e0, v_e1, v_e2, v_e;
-    vuint16m2_t v_f0, v_f1, v_f2, v_f;
-    vuint16m2_t v_g0, v_g1, v_g;
-    vuint8m1_t v_dst_e, v_dst_f, v_dst_g;
-    size_t vl = __riscv_vsetvl_e8m1(w);
-    // s: e00, e10, e20, f00, f10, f20, g00, g10
-    // t: e01, e11, e21, f01, f11, f21, g01, g11
-    __riscv_vlseg8e8_v_u8m1(&v_s0, &v_s1, &v_s2, &v_s3, &v_s4, &v_s5, &v_s6,
-                            &v_s7, src_ptr, vl);
-    __riscv_vlseg8e8_v_u8m1(&v_t0, &v_t1, &v_t2, &v_t3, &v_t4, &v_t5, &v_t6,
-                            &v_t7, src_ptr + src_stride, vl);
-    // Calculate sum of [e00, e21] to v_e
-    // Calculate sum of [f00, f21] to v_f
-    // Calculate sum of [g00, g11] to v_g
-    v_e0 = __riscv_vwaddu_vv_u16m2(v_s0, v_t0, vl);
-    v_e1 = __riscv_vwaddu_vv_u16m2(v_s1, v_t1, vl);
-    v_e2 = __riscv_vwaddu_vv_u16m2(v_s2, v_t2, vl);
-    v_f0 = __riscv_vwaddu_vv_u16m2(v_s3, v_t3, vl);
-    v_f1 = __riscv_vwaddu_vv_u16m2(v_s4, v_t4, vl);
-    v_f2 = __riscv_vwaddu_vv_u16m2(v_s5, v_t5, vl);
-    v_g0 = __riscv_vwaddu_vv_u16m2(v_s6, v_t6, vl);
-    v_g1 = __riscv_vwaddu_vv_u16m2(v_s7, v_t7, vl);
-
-    v_e0 = __riscv_vadd_vv_u16m2(v_e0, v_e1, vl);
-    v_f0 = __riscv_vadd_vv_u16m2(v_f0, v_f1, vl);
-    v_e = __riscv_vadd_vv_u16m2(v_e0, v_e2, vl);
-    v_f = __riscv_vadd_vv_u16m2(v_f0, v_f2, vl);
-    v_g = __riscv_vadd_vv_u16m2(v_g0, v_g1, vl);
-
-    // Average in 16-bit fixed-point
-    v_e = __riscv_vmulhu_vx_u16m2(v_e, coeff_a, vl);
-    v_f = __riscv_vmulhu_vx_u16m2(v_f, coeff_a, vl);
-    v_g = __riscv_vmulhu_vx_u16m2(v_g, coeff_b, vl);
-
-    v_dst_e = __riscv_vnsrl_wx_u8m1(v_e, 0, vl);
-    v_dst_f = __riscv_vnsrl_wx_u8m1(v_f, 0, vl);
-    v_dst_g = __riscv_vnsrl_wx_u8m1(v_g, 0, vl);
-
-    __riscv_vsseg3e8_v_u8m1(dst_ptr, v_dst_e, v_dst_f, v_dst_g, vl);
-    w -= vl;
-    src_ptr += 8 * vl;
-    dst_ptr += 3 * vl;
-  } while (w > 0);
-}
-#endif
 #endif
 
 #ifdef HAS_SCALEROWDOWN38_3_BOX_RVV
-#ifdef LIBYUV_RVV_HAS_TUPLE_TYPE
 void ScaleRowDown38_3_Box_RVV(const uint8_t* src_ptr,
                               ptrdiff_t src_stride,
                               uint8_t* dst_ptr,
@@ -1186,78 +738,6 @@ void ScaleRowDown38_3_Box_RVV(const uint8_t* src_ptr,
     dst_ptr += 3 * vl;
   } while (w > 0);
 }
-#else
-void ScaleRowDown38_3_Box_RVV(const uint8_t* src_ptr,
-                              ptrdiff_t src_stride,
-                              uint8_t* dst_ptr,
-                              int dst_width) {
-  size_t w = (size_t)dst_width / 3u;
-  const uint16_t coeff_a = (65536u / 9u);
-  const uint16_t coeff_b = (65536u / 6u);
-  assert((dst_width % 3 == 0) && (dst_width > 0));
-  do {
-    vuint8m1_t v_s0, v_s1, v_s2, v_s3, v_s4, v_s5, v_s6, v_s7;
-    vuint8m1_t v_t0, v_t1, v_t2, v_t3, v_t4, v_t5, v_t6, v_t7;
-    vuint8m1_t v_u0, v_u1, v_u2, v_u3, v_u4, v_u5, v_u6, v_u7;
-    vuint16m2_t v_e0, v_e1, v_e2, v_e3, v_e4, v_e;
-    vuint16m2_t v_f0, v_f1, v_f2, v_f3, v_f4, v_f;
-    vuint16m2_t v_g0, v_g1, v_g2, v_g;
-    vuint8m1_t v_dst_e, v_dst_f, v_dst_g;
-    size_t vl = __riscv_vsetvl_e8m1(w);
-    // s: e00, e10, e20, f00, f10, f20, g00, g10
-    // t: e01, e11, e21, f01, f11, f21, g01, g11
-    // u: e02, e12, e22, f02, f12, f22, g02, g12
-    __riscv_vlseg8e8_v_u8m1(&v_s0, &v_s1, &v_s2, &v_s3, &v_s4, &v_s5, &v_s6,
-                            &v_s7, src_ptr, vl);
-    __riscv_vlseg8e8_v_u8m1(&v_t0, &v_t1, &v_t2, &v_t3, &v_t4, &v_t5, &v_t6,
-                            &v_t7, src_ptr + src_stride, vl);
-    __riscv_vlseg8e8_v_u8m1(&v_u0, &v_u1, &v_u2, &v_u3, &v_u4, &v_u5, &v_u6,
-                            &v_u7, src_ptr + 2 * src_stride, vl);
-    // Calculate sum of [e00, e22]
-    v_e0 = __riscv_vwaddu_vv_u16m2(v_s0, v_t0, vl);
-    v_e1 = __riscv_vwaddu_vv_u16m2(v_s1, v_t1, vl);
-    v_e2 = __riscv_vwaddu_vv_u16m2(v_s2, v_t2, vl);
-    v_e3 = __riscv_vwaddu_vv_u16m2(v_u0, v_u1, vl);
-    v_e4 = __riscv_vwaddu_vx_u16m2(v_u2, 0, vl);
-
-    v_e0 = __riscv_vadd_vv_u16m2(v_e0, v_e1, vl);
-    v_e2 = __riscv_vadd_vv_u16m2(v_e2, v_e3, vl);
-    v_e0 = __riscv_vadd_vv_u16m2(v_e0, v_e4, vl);
-    v_e = __riscv_vadd_vv_u16m2(v_e0, v_e2, vl);
-    // Calculate sum of [f00, f22]
-    v_f0 = __riscv_vwaddu_vv_u16m2(v_s3, v_t3, vl);
-    v_f1 = __riscv_vwaddu_vv_u16m2(v_s4, v_t4, vl);
-    v_f2 = __riscv_vwaddu_vv_u16m2(v_s5, v_t5, vl);
-    v_f3 = __riscv_vwaddu_vv_u16m2(v_u3, v_u4, vl);
-    v_f4 = __riscv_vwaddu_vx_u16m2(v_u5, 0, vl);
-
-    v_f0 = __riscv_vadd_vv_u16m2(v_f0, v_f1, vl);
-    v_f2 = __riscv_vadd_vv_u16m2(v_f2, v_f3, vl);
-    v_f0 = __riscv_vadd_vv_u16m2(v_f0, v_f4, vl);
-    v_f = __riscv_vadd_vv_u16m2(v_f0, v_f2, vl);
-    // Calculate sum of [g00, g12]
-    v_g0 = __riscv_vwaddu_vv_u16m2(v_s6, v_t6, vl);
-    v_g1 = __riscv_vwaddu_vv_u16m2(v_s7, v_t7, vl);
-    v_g2 = __riscv_vwaddu_vv_u16m2(v_u6, v_u7, vl);
-
-    v_g = __riscv_vadd_vv_u16m2(v_g0, v_g1, vl);
-    v_g = __riscv_vadd_vv_u16m2(v_g, v_g2, vl);
-
-    // Average in 16-bit fixed-point
-    v_e = __riscv_vmulhu_vx_u16m2(v_e, coeff_a, vl);
-    v_f = __riscv_vmulhu_vx_u16m2(v_f, coeff_a, vl);
-    v_g = __riscv_vmulhu_vx_u16m2(v_g, coeff_b, vl);
-
-    v_dst_e = __riscv_vnsrl_wx_u8m1(v_e, 0, vl);
-    v_dst_f = __riscv_vnsrl_wx_u8m1(v_f, 0, vl);
-    v_dst_g = __riscv_vnsrl_wx_u8m1(v_g, 0, vl);
-    __riscv_vsseg3e8_v_u8m1(dst_ptr, v_dst_e, v_dst_f, v_dst_g, vl);
-    w -= vl;
-    src_ptr += 8 * vl;
-    dst_ptr += 3 * vl;
-  } while (w > 0);
-}
-#endif
 #endif
 
 // ScaleUVRowUp2_(Bi)linear_RVV function is equal to other platforms'
@@ -1265,7 +745,6 @@ void ScaleRowDown38_3_Box_RVV(const uint8_t* src_ptr,
 // platforms only implement non-edge part of image and process edge with scalar.
 
 #ifdef HAS_SCALEROWUP2_LINEAR_RVV
-#ifdef LIBYUV_RVV_HAS_TUPLE_TYPE
 void ScaleRowUp2_Linear_RVV(const uint8_t* src_ptr,
                             uint8_t* dst_ptr,
                             int dst_width) {
@@ -1301,45 +780,9 @@ void ScaleRowUp2_Linear_RVV(const uint8_t* src_ptr,
   }
   dst_ptr[dst_width - 1] = src_ptr[(dst_width - 1) / 2];
 }
-#else
-void ScaleRowUp2_Linear_RVV(const uint8_t* src_ptr,
-                            uint8_t* dst_ptr,
-                            int dst_width) {
-  size_t work_width = (size_t)dst_width - 1u;
-  size_t src_width = work_width >> 1u;
-  const uint8_t* work_src_ptr = src_ptr;
-  uint8_t* work_dst_ptr = dst_ptr + 1;
-  size_t vl = __riscv_vsetvlmax_e8m4();
-  vuint8m4_t v_3 = __riscv_vmv_v_x_u8m4(3, vl);
-  dst_ptr[0] = src_ptr[0];
-  while (src_width > 0) {
-    vuint8m4_t v_src0, v_src1, v_dst_odd, v_dst_even;
-    vuint16m8_t v_src0_u16, v_src1_u16;
-    size_t vl = __riscv_vsetvl_e8m4(src_width);
-    v_src0 = __riscv_vle8_v_u8m4(work_src_ptr, vl);
-    v_src1 = __riscv_vle8_v_u8m4(work_src_ptr + 1, vl);
-
-    v_src0_u16 = __riscv_vwaddu_vx_u16m8(v_src0, 2, vl);
-    v_src1_u16 = __riscv_vwaddu_vx_u16m8(v_src1, 2, vl);
-    v_src0_u16 = __riscv_vwmaccu_vv_u16m8(v_src0_u16, v_3, v_src1, vl);
-    v_src1_u16 = __riscv_vwmaccu_vv_u16m8(v_src1_u16, v_3, v_src0, vl);
-
-    v_dst_odd = __riscv_vnsrl_wx_u8m4(v_src0_u16, 2, vl);
-    v_dst_even = __riscv_vnsrl_wx_u8m4(v_src1_u16, 2, vl);
-
-    __riscv_vsseg2e8_v_u8m4(work_dst_ptr, v_dst_even, v_dst_odd, vl);
-
-    src_width -= vl;
-    work_src_ptr += vl;
-    work_dst_ptr += 2 * vl;
-  }
-  dst_ptr[dst_width - 1] = src_ptr[(dst_width - 1) / 2];
-}
-#endif
 #endif
 
 #ifdef HAS_SCALEROWUP2_BILINEAR_RVV
-#ifdef LIBYUV_RVV_HAS_TUPLE_TYPE
 void ScaleRowUp2_Bilinear_RVV(const uint8_t* src_ptr,
                               ptrdiff_t src_stride,
                               uint8_t* dst_ptr,
@@ -1411,77 +854,6 @@ void ScaleRowUp2_Bilinear_RVV(const uint8_t* src_ptr,
   e[dst_width - 1] =
       (s[(dst_width - 1) / 2] + 3 * t[(dst_width - 1) / 2] + 2) >> 2;
 }
-#else
-void ScaleRowUp2_Bilinear_RVV(const uint8_t* src_ptr,
-                              ptrdiff_t src_stride,
-                              uint8_t* dst_ptr,
-                              ptrdiff_t dst_stride,
-                              int dst_width) {
-  size_t work_width = ((size_t)dst_width - 1u) & ~1u;
-  size_t src_width = work_width >> 1u;
-  const uint8_t* work_s = src_ptr;
-  const uint8_t* work_t = src_ptr + src_stride;
-  const uint8_t* s = work_s;
-  const uint8_t* t = work_t;
-  uint8_t* d = dst_ptr;
-  uint8_t* e = dst_ptr + dst_stride;
-  uint8_t* work_d = d + 1;
-  uint8_t* work_e = e + 1;
-  size_t vl = __riscv_vsetvlmax_e16m4();
-  vuint16m4_t v_3_u16 = __riscv_vmv_v_x_u16m4(3, vl);
-  vuint8m2_t v_3_u8 = __riscv_vmv_v_x_u8m2(3, vl);
-  d[0] = (3 * s[0] + t[0] + 2) >> 2;
-  e[0] = (s[0] + 3 * t[0] + 2) >> 2;
-  while (src_width > 0) {
-    vuint8m2_t v_s0, v_s1, v_t0, v_t1;
-    vuint16m4_t v_s0_u16, v_s1_u16, v_t0_u16, v_t1_u16;
-    vuint16m4_t v_t0_u16_, v_t1_u16_;
-    vuint8m2_t v_dst0_even, v_dst0_odd, v_dst1_even, v_dst1_odd;
-    size_t vl = __riscv_vsetvl_e8m2(src_width);
-    v_s0 = __riscv_vle8_v_u8m2(work_s, vl);
-    v_s1 = __riscv_vle8_v_u8m2(work_s + 1, vl);
-
-    v_s0_u16 = __riscv_vwaddu_vx_u16m4(v_s0, 2, vl);
-    v_s1_u16 = __riscv_vwaddu_vx_u16m4(v_s1, 2, vl);
-    v_s0_u16 = __riscv_vwmaccu_vv_u16m4(v_s0_u16, v_3_u8, v_s1, vl);
-    v_s1_u16 = __riscv_vwmaccu_vv_u16m4(v_s1_u16, v_3_u8, v_s0, vl);
-
-    v_t0 = __riscv_vle8_v_u8m2(work_t, vl);
-    v_t1 = __riscv_vle8_v_u8m2(work_t + 1, vl);
-
-    v_t0_u16 = __riscv_vwaddu_vx_u16m4(v_t0, 2, vl);
-    v_t1_u16 = __riscv_vwaddu_vx_u16m4(v_t1, 2, vl);
-    v_t0_u16 = __riscv_vwmaccu_vv_u16m4(v_t0_u16, v_3_u8, v_t1, vl);
-    v_t1_u16 = __riscv_vwmaccu_vv_u16m4(v_t1_u16, v_3_u8, v_t0, vl);
-
-    v_t0_u16_ = __riscv_vmv_v_v_u16m4(v_t0_u16, vl);
-    v_t1_u16_ = __riscv_vmv_v_v_u16m4(v_t1_u16, vl);
-
-    v_t0_u16 = __riscv_vmacc_vv_u16m4(v_t0_u16, v_3_u16, v_s0_u16, vl);
-    v_t1_u16 = __riscv_vmacc_vv_u16m4(v_t1_u16, v_3_u16, v_s1_u16, vl);
-    v_s0_u16 = __riscv_vmacc_vv_u16m4(v_s0_u16, v_3_u16, v_t0_u16_, vl);
-    v_s1_u16 = __riscv_vmacc_vv_u16m4(v_s1_u16, v_3_u16, v_t1_u16_, vl);
-
-    v_dst0_odd = __riscv_vnsrl_wx_u8m2(v_t0_u16, 4, vl);
-    v_dst0_even = __riscv_vnsrl_wx_u8m2(v_t1_u16, 4, vl);
-    v_dst1_odd = __riscv_vnsrl_wx_u8m2(v_s0_u16, 4, vl);
-    v_dst1_even = __riscv_vnsrl_wx_u8m2(v_s1_u16, 4, vl);
-
-    __riscv_vsseg2e8_v_u8m2(work_d, v_dst0_even, v_dst0_odd, vl);
-    __riscv_vsseg2e8_v_u8m2(work_e, v_dst1_even, v_dst1_odd, vl);
-
-    src_width -= vl;
-    work_s += vl;
-    work_t += vl;
-    work_d += 2 * vl;
-    work_e += 2 * vl;
-  }
-  d[dst_width - 1] =
-      (3 * s[(dst_width - 1) / 2] + t[(dst_width - 1) / 2] + 2) >> 2;
-  e[dst_width - 1] =
-      (s[(dst_width - 1) / 2] + 3 * t[(dst_width - 1) / 2] + 2) >> 2;
-}
-#endif
 #endif
 
 #ifdef HAS_SCALEUVROWDOWN2_RVV
@@ -1506,7 +878,7 @@ void ScaleUVRowDown2_RVV(const uint8_t* src_uv,
 #endif
 
 #ifdef HAS_SCALEUVROWDOWN2LINEAR_RVV
-#if defined(LIBYUV_RVV_HAS_TUPLE_TYPE) && defined(LIBYUV_RVV_HAS_VXRM_ARG)
+#ifdef LIBYUV_RVV_HAS_VXRM_ARG
 void ScaleUVRowDown2Linear_RVV(const uint8_t* src_uv,
                                ptrdiff_t src_stride,
                                uint8_t* dst_uv,
@@ -1529,37 +901,11 @@ void ScaleUVRowDown2Linear_RVV(const uint8_t* src_uv,
     dst_uv += vl * 2;
   } while (w > 0);
 }
-#else
-void ScaleUVRowDown2Linear_RVV(const uint8_t* src_uv,
-                               ptrdiff_t src_stride,
-                               uint8_t* dst_uv,
-                               int dst_width) {
-  size_t w = (size_t)dst_width;
-  const uint16_t* src = (const uint16_t*)src_uv;
-  (void)src_stride;
-  // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode
-  // register) is set to round-to-nearest-up mode(0).
-  asm volatile("csrwi vxrm, 0");
-  do {
-    vuint8m4_t v_u0v0, v_u1v1, v_avg;
-    vuint16m4_t v_u0v0_16, v_u1v1_16;
-    size_t vl = __riscv_vsetvl_e16m4(w);
-    __riscv_vlseg2e16_v_u16m4(&v_u0v0_16, &v_u1v1_16, src, vl);
-    v_u0v0 = __riscv_vreinterpret_v_u16m4_u8m4(v_u0v0_16);
-    v_u1v1 = __riscv_vreinterpret_v_u16m4_u8m4(v_u1v1_16);
-    // Use round-to-nearest-up mode for averaging add
-    v_avg = __riscv_vaaddu_vv_u8m4(v_u0v0, v_u1v1, vl * 2);
-    __riscv_vse8_v_u8m4(dst_uv, v_avg, vl * 2);
-    w -= vl;
-    src += vl * 2;
-    dst_uv += vl * 2;
-  } while (w > 0);
-}
 #endif
 #endif
 
 #ifdef HAS_SCALEUVROWDOWN2BOX_RVV
-#if defined(LIBYUV_RVV_HAS_TUPLE_TYPE) && defined(LIBYUV_RVV_HAS_VXRM_ARG)
+#ifdef LIBYUV_RVV_HAS_VXRM_ARG
 void ScaleUVRowDown2Box_RVV(const uint8_t* src_uv,
                             ptrdiff_t src_stride,
                             uint8_t* dst_uv,
@@ -1599,48 +945,6 @@ void ScaleUVRowDown2Box_RVV(const uint8_t* src_uv,
     src_uv_row1 += 4 * vl;
   } while (w > 0);
 }
-#else
-void ScaleUVRowDown2Box_RVV(const uint8_t* src_uv,
-                            ptrdiff_t src_stride,
-                            uint8_t* dst_uv,
-                            int dst_width) {
-  const uint8_t* src_uv_row1 = src_uv + src_stride;
-  size_t w = (size_t)dst_width;
-  // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode
-  // register) is set to round-to-nearest-up mode(0).
-  asm volatile("csrwi vxrm, 0");
-  do {
-    vuint8m2_t v_u0_row0, v_v0_row0, v_u1_row0, v_v1_row0;
-    vuint8m2_t v_u0_row1, v_v0_row1, v_u1_row1, v_v1_row1;
-    vuint16m4_t v_u0u1_row0, v_u0u1_row1, v_v0v1_row0, v_v0v1_row1;
-    vuint16m4_t v_sum0, v_sum1;
-    vuint8m2_t v_dst_u, v_dst_v;
-    size_t vl = __riscv_vsetvl_e8m2(w);
-
-    __riscv_vlseg4e8_v_u8m2(&v_u0_row0, &v_v0_row0, &v_u1_row0, &v_v1_row0,
-                            src_uv, vl);
-    __riscv_vlseg4e8_v_u8m2(&v_u0_row1, &v_v0_row1, &v_u1_row1, &v_v1_row1,
-                            src_uv_row1, vl);
-
-    v_u0u1_row0 = __riscv_vwaddu_vv_u16m4(v_u0_row0, v_u1_row0, vl);
-    v_u0u1_row1 = __riscv_vwaddu_vv_u16m4(v_u0_row1, v_u1_row1, vl);
-    v_v0v1_row0 = __riscv_vwaddu_vv_u16m4(v_v0_row0, v_v1_row0, vl);
-    v_v0v1_row1 = __riscv_vwaddu_vv_u16m4(v_v0_row1, v_v1_row1, vl);
-
-    v_sum0 = __riscv_vadd_vv_u16m4(v_u0u1_row0, v_u0u1_row1, vl);
-    v_sum1 = __riscv_vadd_vv_u16m4(v_v0v1_row0, v_v0v1_row1, vl);
-    // Use round-to-nearest-up mode for vnclip
-    v_dst_u = __riscv_vnclipu_wx_u8m2(v_sum0, 2, vl);
-    v_dst_v = __riscv_vnclipu_wx_u8m2(v_sum1, 2, vl);
-
-    __riscv_vsseg2e8_v_u8m2(dst_uv, v_dst_u, v_dst_v, vl);
-
-    dst_uv += 2 * vl;
-    src_uv += 4 * vl;
-    w -= vl;
-    src_uv_row1 += 4 * vl;
-  } while (w > 0);
-}
 #endif
 #endif
 
@@ -1699,7 +1003,6 @@ void ScaleUVRowDownEven_RVV(const uint8_t* src_uv,
 // scalar.
 
 #ifdef HAS_SCALEUVROWUP2_LINEAR_RVV
-#ifdef LIBYUV_RVV_HAS_TUPLE_TYPE
 void ScaleUVRowUp2_Linear_RVV(const uint8_t* src_ptr,
                               uint8_t* dst_ptr,
                               int dst_width) {
@@ -1741,51 +1044,9 @@ void ScaleUVRowUp2_Linear_RVV(const uint8_t* src_ptr,
   dst_ptr[2 * dst_width - 2] = src_ptr[((dst_width + 1) & ~1) - 2];
   dst_ptr[2 * dst_width - 1] = src_ptr[((dst_width + 1) & ~1) - 1];
 }
-#else
-void ScaleUVRowUp2_Linear_RVV(const uint8_t* src_ptr,
-                              uint8_t* dst_ptr,
-                              int dst_width) {
-  size_t work_width = ((size_t)dst_width - 1u) & ~1u;
-  uint16_t* work_dst_ptr = (uint16_t*)dst_ptr + 1;
-  const uint8_t* work_src_ptr = src_ptr;
-  size_t vl = __riscv_vsetvlmax_e8m4();
-  vuint8m4_t v_3_u8 = __riscv_vmv_v_x_u8m4(3, vl);
-  dst_ptr[0] = src_ptr[0];
-  dst_ptr[1] = src_ptr[1];
-  while (work_width > 0) {
-    vuint8m4_t v_uv0, v_uv1, v_dst_odd_u8, v_dst_even_u8;
-    vuint16m4_t v_dst_odd, v_dst_even;
-    vuint16m8_t v_uv0_u16, v_uv1_u16;
-    size_t vl = __riscv_vsetvl_e8m4(work_width);
-    v_uv0 = __riscv_vle8_v_u8m4(work_src_ptr, vl);
-    v_uv1 = __riscv_vle8_v_u8m4(work_src_ptr + 2, vl);
-
-    v_uv0_u16 = __riscv_vwaddu_vx_u16m8(v_uv0, 2, vl);
-    v_uv1_u16 = __riscv_vwaddu_vx_u16m8(v_uv1, 2, vl);
-
-    v_uv0_u16 = __riscv_vwmaccu_vv_u16m8(v_uv0_u16, v_3_u8, v_uv1, vl);
-    v_uv1_u16 = __riscv_vwmaccu_vv_u16m8(v_uv1_u16, v_3_u8, v_uv0, vl);
-
-    v_dst_odd_u8 = __riscv_vnsrl_wx_u8m4(v_uv0_u16, 2, vl);
-    v_dst_even_u8 = __riscv_vnsrl_wx_u8m4(v_uv1_u16, 2, vl);
-
-    v_dst_even = __riscv_vreinterpret_v_u8m4_u16m4(v_dst_even_u8);
-    v_dst_odd = __riscv_vreinterpret_v_u8m4_u16m4(v_dst_odd_u8);
-
-    __riscv_vsseg2e16_v_u16m4(work_dst_ptr, v_dst_even, v_dst_odd, vl / 2);
-
-    work_width -= vl;
-    work_src_ptr += vl;
-    work_dst_ptr += vl;
-  }
-  dst_ptr[2 * dst_width - 2] = src_ptr[((dst_width + 1) & ~1) - 2];
-  dst_ptr[2 * dst_width - 1] = src_ptr[((dst_width + 1) & ~1) - 1];
-}
-#endif
 #endif
 
 #ifdef HAS_SCALEUVROWUP2_BILINEAR_RVV
-#ifdef LIBYUV_RVV_HAS_TUPLE_TYPE
 void ScaleUVRowUp2_Bilinear_RVV(const uint8_t* src_ptr,
                                 ptrdiff_t src_stride,
                                 uint8_t* dst_ptr,
@@ -1873,92 +1134,6 @@ void ScaleUVRowUp2_Bilinear_RVV(const uint8_t* src_ptr,
       (s[((dst_width + 1) & ~1) - 1] + 3 * t[((dst_width + 1) & ~1) - 1] + 2) >>
       2;
 }
-#else
-void ScaleUVRowUp2_Bilinear_RVV(const uint8_t* src_ptr,
-                                ptrdiff_t src_stride,
-                                uint8_t* dst_ptr,
-                                ptrdiff_t dst_stride,
-                                int dst_width) {
-  size_t work_width = ((size_t)dst_width - 1u) & ~1u;
-  const uint8_t* work_s = src_ptr;
-  const uint8_t* work_t = src_ptr + src_stride;
-  const uint8_t* s = work_s;
-  const uint8_t* t = work_t;
-  uint8_t* d = dst_ptr;
-  uint8_t* e = dst_ptr + dst_stride;
-  uint16_t* work_d = (uint16_t*)d + 1;
-  uint16_t* work_e = (uint16_t*)e + 1;
-  size_t vl = __riscv_vsetvlmax_e16m4();
-  vuint16m4_t v_3_u16 = __riscv_vmv_v_x_u16m4(3, vl);
-  vuint8m2_t v_3_u8 = __riscv_vmv_v_x_u8m2(3, vl);
-  d[0] = (3 * s[0] + t[0] + 2) >> 2;
-  e[0] = (s[0] + 3 * t[0] + 2) >> 2;
-  d[1] = (3 * s[1] + t[1] + 2) >> 2;
-  e[1] = (s[1] + 3 * t[1] + 2) >> 2;
-  while (work_width > 0) {
-    vuint8m2_t v_s0, v_s1, v_t0, v_t1;
-    vuint16m4_t v_s0_u16, v_s1_u16, v_t0_u16, v_t1_u16;
-    vuint16m4_t v_t0_u16_, v_t1_u16_;
-    vuint8m2_t v_dst0_odd_u8, v_dst0_even_u8, v_dst1_odd_u8, v_dst1_even_u8;
-    vuint16m2_t v_dst0_even, v_dst0_odd, v_dst1_even, v_dst1_odd;
-    size_t vl = __riscv_vsetvl_e8m2(work_width);
-    v_s0 = __riscv_vle8_v_u8m2(work_s, vl);
-    v_s1 = __riscv_vle8_v_u8m2(work_s + 2, vl);
-
-    v_s0_u16 = __riscv_vwaddu_vx_u16m4(v_s0, 2, vl);
-    v_s1_u16 = __riscv_vwaddu_vx_u16m4(v_s1, 2, vl);
-    v_s0_u16 = __riscv_vwmaccu_vv_u16m4(v_s0_u16, v_3_u8, v_s1, vl);
-    v_s1_u16 = __riscv_vwmaccu_vv_u16m4(v_s1_u16, v_3_u8, v_s0, vl);
-
-    v_t0 = __riscv_vle8_v_u8m2(work_t, vl);
-    v_t1 = __riscv_vle8_v_u8m2(work_t + 2, vl);
-
-    v_t0_u16 = __riscv_vwaddu_vx_u16m4(v_t0, 2, vl);
-    v_t1_u16 = __riscv_vwaddu_vx_u16m4(v_t1, 2, vl);
-    v_t0_u16 = __riscv_vwmaccu_vv_u16m4(v_t0_u16, v_3_u8, v_t1, vl);
-    v_t1_u16 = __riscv_vwmaccu_vv_u16m4(v_t1_u16, v_3_u8, v_t0, vl);
-
-    v_t0_u16_ = __riscv_vmv_v_v_u16m4(v_t0_u16, vl);
-    v_t1_u16_ = __riscv_vmv_v_v_u16m4(v_t1_u16, vl);
-
-    v_t0_u16 = __riscv_vmacc_vv_u16m4(v_t0_u16, v_3_u16, v_s0_u16, vl);
-    v_t1_u16 = __riscv_vmacc_vv_u16m4(v_t1_u16, v_3_u16, v_s1_u16, vl);
-    v_s0_u16 = __riscv_vmacc_vv_u16m4(v_s0_u16, v_3_u16, v_t0_u16_, vl);
-    v_s1_u16 = __riscv_vmacc_vv_u16m4(v_s1_u16, v_3_u16, v_t1_u16_, vl);
-
-    v_dst0_odd_u8 = __riscv_vnsrl_wx_u8m2(v_t0_u16, 4, vl);
-    v_dst0_even_u8 = __riscv_vnsrl_wx_u8m2(v_t1_u16, 4, vl);
-    v_dst1_odd_u8 = __riscv_vnsrl_wx_u8m2(v_s0_u16, 4, vl);
-    v_dst1_even_u8 = __riscv_vnsrl_wx_u8m2(v_s1_u16, 4, vl);
-
-    v_dst0_even = __riscv_vreinterpret_v_u8m2_u16m2(v_dst0_even_u8);
-    v_dst0_odd = __riscv_vreinterpret_v_u8m2_u16m2(v_dst0_odd_u8);
-    v_dst1_even = __riscv_vreinterpret_v_u8m2_u16m2(v_dst1_even_u8);
-    v_dst1_odd = __riscv_vreinterpret_v_u8m2_u16m2(v_dst1_odd_u8);
-
-    __riscv_vsseg2e16_v_u16m2(work_d, v_dst0_even, v_dst0_odd, vl / 2);
-    __riscv_vsseg2e16_v_u16m2(work_e, v_dst1_even, v_dst1_odd, vl / 2);
-
-    work_width -= vl;
-    work_s += vl;
-    work_t += vl;
-    work_d += vl;
-    work_e += vl;
-  }
-  d[2 * dst_width - 2] =
-      (3 * s[((dst_width + 1) & ~1) - 2] + t[((dst_width + 1) & ~1) - 2] + 2) >>
-      2;
-  e[2 * dst_width - 2] =
-      (s[((dst_width + 1) & ~1) - 2] + 3 * t[((dst_width + 1) & ~1) - 2] + 2) >>
-      2;
-  d[2 * dst_width - 1] =
-      (3 * s[((dst_width + 1) & ~1) - 1] + t[((dst_width + 1) & ~1) - 1] + 2) >>
-      2;
-  e[2 * dst_width - 1] =
-      (s[((dst_width + 1) & ~1) - 1] + 3 * t[((dst_width + 1) & ~1) - 1] + 2) >>
-      2;
-}
-#endif
 #endif
 
 #ifdef __cplusplus