diff --git a/include/libyuv/row_sve.h b/include/libyuv/row_sve.h
index d631ba560..9ed0e4efb 100644
--- a/include/libyuv/row_sve.h
+++ b/include/libyuv/row_sve.h
@@ -342,6 +342,7 @@ extern "C" {
 // rather than needing a pair of shifts to saturate and then insert into the
 // correct position in the lane.
 #define STOREAR30_SVE                                                    \
+  /* Inputs: B: z16.h,  G: z17.h,  R: z18.h */                           \
   "uqshl    z16.h, p0/m, z16.h, #2            \n" /* bbbbbbbbbbxxxxxx */ \
   "uqshl    z17.h, p0/m, z17.h, #2            \n" /* ggggggggggxxxxxx */ \
   "umin     z18.h, p0/m, z18.h, z23.h         \n" /* 00rrrrrrrrrrxxxx */ \
@@ -352,6 +353,31 @@ extern "C" {
   "st2h     {z17.h, z18.h}, p1, [%[dst_ar30]] \n"                        \
   "incb     %[dst_ar30], all, mul #2          \n"
 
+#define STOREAR30_SVE_2X                                                 \
+  /* Inputs: B: z16.h,  G: z17.h,  R: z18.h */                           \
+  /*         B: z20.h,  G: z21.h,  R: z22.h */                           \
+  "uqshl    z16.h, p0/m, z16.h, #2            \n" /* bbbbbbbbbbxxxxxx */ \
+  "uqshl    z20.h, p0/m, z20.h, #2            \n" /* bbbbbbbbbbxxxxxx */ \
+  "uqshl    z17.h, p0/m, z17.h, #2            \n" /* ggggggggggxxxxxx */ \
+  "uqshl    z21.h, p0/m, z21.h, #2            \n" /* ggggggggggxxxxxx */ \
+  "umin     z18.h, p0/m, z18.h, z23.h         \n" /* 00rrrrrrrrrrxxxx */ \
+  "umin     z22.h, p0/m, z22.h, z23.h         \n" /* 00rrrrrrrrrrxxxx */ \
+  "orr      z18.h, z18.h, #0xc000             \n" /* 11rrrrrrrrrrxxxx */ \
+  "orr      z22.h, z22.h, #0xc000             \n" /* 11rrrrrrrrrrxxxx */ \
+  "sri      z18.h, z17.h, #12                 \n" /* 11rrrrrrrrrrgggg */ \
+  "sri      z22.h, z21.h, #12                 \n" /* 11rrrrrrrrrrgggg */ \
+  "lsl      z17.h, z17.h, #4                  \n" /* ggggggxxxxxx0000 */ \
+  "lsl      z19.h, z21.h, #4                  \n" /* ggggggxxxxxx0000 */ \
+  "sri      z17.h, z16.h, #6                  \n" /* ggggggbbbbbbbbbb */ \
+  "sri      z19.h, z20.h, #6                  \n" /* ggggggbbbbbbbbbb */ \
+  "zip2     z16.h, z17.h, z19.h               \n"                        \
+  "zip1     z21.h, z17.h, z19.h               \n"                        \
+  "zip2     z17.h, z18.h, z22.h               \n"                        \
+  "zip1     z22.h, z18.h, z22.h               \n"                        \
+  "st2h     {z21.h, z22.h}, p2, [%[dst_ar30]] \n"                        \
+  "st2h     {z16.h, z17.h}, p3, [%[dst_ar30], #2, mul vl] \n"            \
+  "incb     %[dst_ar30], all, mul #4          \n"
+
 #define YUVTORGB_SVE_REGS                                                     \
   "z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7", "z16", "z17", "z18", "z19", \
       "z20", "z21", "z22", "z23", "z24", "z25", "z26", "z27", "z28", "z29",   \
@@ -776,32 +802,35 @@ static inline void I422ToAR30Row_SVE_SC(const uint8_t* src_y,
                                         const struct YuvConstants* yuvconstants,
                                         int width) STREAMING_COMPATIBLE {
   uint64_t vl;
-  // The limit is used for saturating the 2.14 red channel in STOREAR30_SVE.
+  // The limit is used for saturating the 2.14 red channel in STOREAR30_SVE_2X.
   const uint16_t limit = 0x3ff0;
   asm volatile(
       "cnth     %[vl]                                   \n"
       "ptrue    p0.b                                    \n"  //
       YUVTORGB_SVE_SETUP
-      "dup      z19.b, #255                             \n"  // Alpha
       "dup      z23.h, %w[limit]                        \n"
-      "subs     %w[width], %w[width], %w[vl]            \n"
+      "subs     %w[width], %w[width], %w[vl], lsl #1    \n"
       "b.le     2f                                      \n"
 
       // Run bulk of computation with an all-true predicate to avoid predicate
       // generation overhead.
-      "ptrue    p1.h                                    \n"
+      "ptrue    p1.b                                    \n"
+      "ptrue    p2.b                                    \n"
+      "ptrue    p3.b                                    \n"
       "1:                                               \n"  //
-      READYUV422_SVE I4XXTORGB_SVE STOREAR30_SVE
-      "subs     %w[width], %w[width], %w[vl]            \n"
+      READYUV422_SVE_2X I422TORGB_SVE_2X STOREAR30_SVE_2X
+      "subs     %w[width], %w[width], %w[vl], lsl #1    \n"
       "b.gt     1b                                      \n"
 
       // Calculate a predicate for the final iteration to deal with the tail.
       "2:                                               \n"
-      "adds    %w[width], %w[width], %w[vl]             \n"
+      "adds    %w[width], %w[width], %w[vl], lsl #1     \n"
       "b.eq    99f                                      \n"
 
-      "whilelt  p1.h, wzr, %w[width]                    \n"  //
-      READYUV422_SVE I4XXTORGB_SVE STOREAR30_SVE
+      "whilelt  p1.b, wzr, %w[width]                    \n"
+      "whilelt  p2.h, wzr, %w[width]                    \n"
+      "whilelt  p3.h, %w[vl], %w[width]                 \n"  //
+      READYUV422_SVE_2X I422TORGB_SVE_2X STOREAR30_SVE_2X
 
       "99:                                              \n"
       : [src_y] "+r"(src_y),                                // %[src_y]