diff --git a/README.chromium b/README.chromium
index 681e9cc0f..8eecc5338 100644
--- a/README.chromium
+++ b/README.chromium
@@ -1,6 +1,6 @@
 Name: libyuv
 URL: http://code.google.com/p/libyuv/
-Version: 1841
+Version: 1844
 License: BSD
 License File: LICENSE
 
diff --git a/include/libyuv/convert_argb.h b/include/libyuv/convert_argb.h
index 4025050a8..8e4562efc 100644
--- a/include/libyuv/convert_argb.h
+++ b/include/libyuv/convert_argb.h
@@ -404,6 +404,32 @@ int U444ToABGR(const uint8_t* src_y,
                int width,
                int height);
 
+// Convert I444 to RGB24.
+LIBYUV_API
+int I444ToRGB24(const uint8_t* src_y,
+                int src_stride_y,
+                const uint8_t* src_u,
+                int src_stride_u,
+                const uint8_t* src_v,
+                int src_stride_v,
+                uint8_t* dst_rgb24,
+                int dst_stride_rgb24,
+                int width,
+                int height);
+
+// Convert I444 to RAW.
+LIBYUV_API
+int I444ToRAW(const uint8_t* src_y,
+              int src_stride_y,
+              const uint8_t* src_u,
+              int src_stride_u,
+              const uint8_t* src_v,
+              int src_stride_v,
+              uint8_t* dst_raw,
+              int dst_stride_raw,
+              int width,
+              int height);
+
 // Convert I010 to ARGB.
 LIBYUV_API
 int I010ToARGB(const uint16_t* src_y,
@@ -1312,6 +1338,32 @@ int J420ToRAW(const uint8_t* src_y,
               int width,
               int height);
 
+// Convert I422 to RGB24.
+LIBYUV_API
+int I422ToRGB24(const uint8_t* src_y,
+                int src_stride_y,
+                const uint8_t* src_u,
+                int src_stride_u,
+                const uint8_t* src_v,
+                int src_stride_v,
+                uint8_t* dst_rgb24,
+                int dst_stride_rgb24,
+                int width,
+                int height);
+
+// Convert I422 to RAW.
+LIBYUV_API
+int I422ToRAW(const uint8_t* src_y,
+              int src_stride_y,
+              const uint8_t* src_u,
+              int src_stride_u,
+              const uint8_t* src_v,
+              int src_stride_v,
+              uint8_t* dst_raw,
+              int dst_stride_raw,
+              int width,
+              int height);
+
 LIBYUV_API
 int I420ToRGB565(const uint8_t* src_y,
                  int src_stride_y,
@@ -1495,6 +1547,20 @@ int I444ToARGBMatrix(const uint8_t* src_y,
                      int width,
                      int height);
 
+// Convert I444 to RGB24 with matrix.
+LIBYUV_API
+int I444ToRGB24Matrix(const uint8_t* src_y,
+                      int src_stride_y,
+                      const uint8_t* src_u,
+                      int src_stride_u,
+                      const uint8_t* src_v,
+                      int src_stride_v,
+                      uint8_t* dst_rgb24,
+                      int dst_stride_rgb24,
+                      const struct YuvConstants* yuvconstants,
+                      int width,
+                      int height);
+
 // Convert 10 bit 420 YUV to ARGB with matrix.
 LIBYUV_API
 int I010ToAR30Matrix(const uint16_t* src_y,
@@ -1893,6 +1959,20 @@ int I420ToRGB24Matrix(const uint8_t* src_y,
                       int width,
                       int height);
 
+// Convert I422 to RGB24 with matrix.
+LIBYUV_API
+int I422ToRGB24Matrix(const uint8_t* src_y,
+                      int src_stride_y,
+                      const uint8_t* src_u,
+                      int src_stride_u,
+                      const uint8_t* src_v,
+                      int src_stride_v,
+                      uint8_t* dst_rgb24,
+                      int dst_stride_rgb24,
+                      const struct YuvConstants* yuvconstants,
+                      int width,
+                      int height);
+
 // Convert I420 to RGB565 with specified color matrix.
 LIBYUV_API
 int I420ToRGB565Matrix(const uint8_t* src_y,
@@ -1975,6 +2055,21 @@ int I422ToARGBMatrixFilter(const uint8_t* src_y,
                            int height,
                            enum FilterMode filter);
 
+// Convert I422 to RGB24 with matrix and UV filter mode.
+LIBYUV_API
+int I422ToRGB24MatrixFilter(const uint8_t* src_y,
+                            int src_stride_y,
+                            const uint8_t* src_u,
+                            int src_stride_u,
+                            const uint8_t* src_v,
+                            int src_stride_v,
+                            uint8_t* dst_rgb24,
+                            int dst_stride_rgb24,
+                            const struct YuvConstants* yuvconstants,
+                            int width,
+                            int height,
+                            enum FilterMode filter);
+
 // Convert I420 to RGB24 with matrix and UV filter mode.
 LIBYUV_API
 int I420ToRGB24MatrixFilter(const uint8_t* src_y,
diff --git a/include/libyuv/row.h b/include/libyuv/row.h
index 1c14ef3b4..c09d51bac 100644
--- a/include/libyuv/row.h
+++ b/include/libyuv/row.h
@@ -111,6 +111,7 @@ extern "C" {
 #define HAS_I422TOUYVYROW_SSE2
 #define HAS_I422TOYUY2ROW_SSE2
 #define HAS_I444TOARGBROW_SSSE3
+#define HAS_I444TORGB24ROW_SSSE3
 #define HAS_INTERPOLATEROW_SSSE3
 #define HAS_J400TOARGBROW_SSE2
 #define HAS_J422TOARGBROW_SSSE3
@@ -219,6 +220,7 @@ extern "C" {
 #define HAS_I422TORGB565ROW_AVX2
 #define HAS_I422TORGBAROW_AVX2
 #define HAS_I444TOARGBROW_AVX2
+#define HAS_I444TORGB24ROW_AVX2
 #define HAS_INTERPOLATEROW_AVX2
 #define HAS_J422TOARGBROW_AVX2
 #define HAS_MERGEUVROW_AVX2
@@ -469,6 +471,7 @@ extern "C" {
 #define HAS_I422TOYUY2ROW_NEON
 #define HAS_I444ALPHATOARGBROW_NEON
 #define HAS_I444TOARGBROW_NEON
+#define HAS_I444TORGB24ROW_NEON
 #define HAS_INTERPOLATEROW_16_NEON
 #define HAS_INTERPOLATEROW_NEON
 #define HAS_J400TOARGBROW_NEON
@@ -903,6 +906,12 @@ void I444ToARGBRow_NEON(const uint8_t* src_y,
                         uint8_t* dst_argb,
                         const struct YuvConstants* yuvconstants,
                         int width);
+void I444ToRGB24Row_NEON(const uint8_t* src_y,
+                         const uint8_t* src_u,
+                         const uint8_t* src_v,
+                         uint8_t* dst_rgb24,
+                         const struct YuvConstants* yuvconstants,
+                         int width);
 void I422ToARGBRow_NEON(const uint8_t* src_y,
                         const uint8_t* src_u,
                         const uint8_t* src_v,
@@ -3183,6 +3192,12 @@ void I444ToARGBRow_C(const uint8_t* src_y,
                      uint8_t* rgb_buf,
                      const struct YuvConstants* yuvconstants,
                      int width);
+void I444ToRGB24Row_C(const uint8_t* src_y,
+                      const uint8_t* src_u,
+                      const uint8_t* src_v,
+                      uint8_t* rgb_buf,
+                      const struct YuvConstants* yuvconstants,
+                      int width);
 void I422ToARGBRow_C(const uint8_t* src_y,
                      const uint8_t* src_u,
                      const uint8_t* src_v,
@@ -3377,6 +3392,18 @@ void I444ToARGBRow_AVX2(const uint8_t* y_buf,
                         uint8_t* dst_argb,
                         const struct YuvConstants* yuvconstants,
                         int width);
+void I444ToRGB24Row_SSSE3(const uint8_t* y_buf,
+                          const uint8_t* u_buf,
+                          const uint8_t* v_buf,
+                          uint8_t* dst_rgb24,
+                          const struct YuvConstants* yuvconstants,
+                          int width);
+void I444ToRGB24Row_AVX2(const uint8_t* y_buf,
+                         const uint8_t* u_buf,
+                         const uint8_t* v_buf,
+                         uint8_t* dst_rgb24,
+                         const struct YuvConstants* yuvconstants,
+                         int width);
 void I422ToARGBRow_SSSE3(const uint8_t* y_buf,
                          const uint8_t* u_buf,
                          const uint8_t* v_buf,
@@ -3718,12 +3745,24 @@ void I444ToARGBRow_Any_SSSE3(const uint8_t* y_buf,
                              uint8_t* dst_ptr,
                              const struct YuvConstants* yuvconstants,
                              int width);
+void I444ToRGB24Row_Any_SSSE3(const uint8_t* y_buf,
+                              const uint8_t* u_buf,
+                              const uint8_t* v_buf,
+                              uint8_t* dst_ptr,
+                              const struct YuvConstants* yuvconstants,
+                              int width);
 void I444ToARGBRow_Any_AVX2(const uint8_t* y_buf,
                             const uint8_t* u_buf,
                             const uint8_t* v_buf,
                             uint8_t* dst_ptr,
                             const struct YuvConstants* yuvconstants,
                             int width);
+void I444ToRGB24Row_Any_AVX2(const uint8_t* y_buf,
+                             const uint8_t* u_buf,
+                             const uint8_t* v_buf,
+                             uint8_t* dst_ptr,
+                             const struct YuvConstants* yuvconstants,
+                             int width);
 void I422ToARGBRow_Any_SSSE3(const uint8_t* y_buf,
                              const uint8_t* u_buf,
                              const uint8_t* v_buf,
@@ -4385,6 +4424,12 @@ void I444ToARGBRow_Any_NEON(const uint8_t* y_buf,
                             uint8_t* dst_ptr,
                             const struct YuvConstants* yuvconstants,
                             int width);
+void I444ToRGB24Row_Any_NEON(const uint8_t* y_buf,
+                             const uint8_t* u_buf,
+                             const uint8_t* v_buf,
+                             uint8_t* dst_ptr,
+                             const struct YuvConstants* yuvconstants,
+                             int width);
 void I422ToARGBRow_Any_NEON(const uint8_t* y_buf,
                             const uint8_t* u_buf,
                             const uint8_t* v_buf,
diff --git a/include/libyuv/version.h b/include/libyuv/version.h
index afe00d4be..699d13f91 100644
--- a/include/libyuv/version.h
+++ b/include/libyuv/version.h
@@ -11,6 +11,6 @@
 #ifndef INCLUDE_LIBYUV_VERSION_H_
 #define INCLUDE_LIBYUV_VERSION_H_
 
-#define LIBYUV_VERSION 1841
+#define LIBYUV_VERSION 1844
 
 #endif  // INCLUDE_LIBYUV_VERSION_H_
diff --git a/source/convert_argb.cc b/source/convert_argb.cc
index 5500fcb28..64425c596 100644
--- a/source/convert_argb.cc
+++ b/source/convert_argb.cc
@@ -10,6 +10,8 @@
 
 #include "libyuv/convert_argb.h"
 
+#include <assert.h>
+
 #include "libyuv/convert_from_argb.h"
 #include "libyuv/cpu_id.h"
 #ifdef HAVE_JPEG
@@ -67,6 +69,7 @@ int I420ToARGBMatrix(const uint8_t* src_y,
                         const uint8_t* v_buf, uint8_t* rgb_buf,
                         const struct YuvConstants* yuvconstants, int width) =
       I422ToARGBRow_C;
+  assert(yuvconstants);
   if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
     return -1;
   }
@@ -300,6 +303,7 @@ int I422ToARGBMatrix(const uint8_t* src_y,
                         const uint8_t* v_buf, uint8_t* rgb_buf,
                         const struct YuvConstants* yuvconstants, int width) =
       I422ToARGBRow_C;
+  assert(yuvconstants);
   if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
     return -1;
   }
@@ -538,6 +542,7 @@ int I444ToARGBMatrix(const uint8_t* src_y,
                         const uint8_t* v_buf, uint8_t* rgb_buf,
                         const struct YuvConstants* yuvconstants, int width) =
       I444ToARGBRow_C;
+  assert(yuvconstants);
   if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
     return -1;
   }
@@ -749,6 +754,128 @@ int U444ToABGR(const uint8_t* src_y,
                           width, height);
 }
 
+// Convert I444 to RGB24 with matrix.
+LIBYUV_API
+int I444ToRGB24Matrix(const uint8_t* src_y,
+                      int src_stride_y,
+                      const uint8_t* src_u,
+                      int src_stride_u,
+                      const uint8_t* src_v,
+                      int src_stride_v,
+                      uint8_t* dst_rgb24,
+                      int dst_stride_rgb24,
+                      const struct YuvConstants* yuvconstants,
+                      int width,
+                      int height) {
+  int y;
+  void (*I444ToRGB24Row)(const uint8_t* y_buf, const uint8_t* u_buf,
+                         const uint8_t* v_buf, uint8_t* rgb_buf,
+                         const struct YuvConstants* yuvconstants, int width) =
+      I444ToRGB24Row_C;
+  assert(yuvconstants);
+  if (!src_y || !src_u || !src_v || !dst_rgb24 || width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    dst_rgb24 = dst_rgb24 + (height - 1) * dst_stride_rgb24;
+    dst_stride_rgb24 = -dst_stride_rgb24;
+  }
+  // Coalesce rows.
+  if (src_stride_y == width && src_stride_u == width && src_stride_v == width &&
+      dst_stride_rgb24 == width * 3) {
+    width *= height;
+    height = 1;
+    src_stride_y = src_stride_u = src_stride_v = dst_stride_rgb24 = 0;
+  }
+#if defined(HAS_I444TORGB24ROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3)) {
+    I444ToRGB24Row = I444ToRGB24Row_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
+      I444ToRGB24Row = I444ToRGB24Row_SSSE3;
+    }
+  }
+#endif
+#if defined(HAS_I444TORGB24ROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    I444ToRGB24Row = I444ToRGB24Row_Any_AVX2;
+    if (IS_ALIGNED(width, 32)) {
+      I444ToRGB24Row = I444ToRGB24Row_AVX2;
+    }
+  }
+#endif
+#if defined(HAS_I444TORGB24ROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON)) {
+    I444ToRGB24Row = I444ToRGB24Row_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      I444ToRGB24Row = I444ToRGB24Row_NEON;
+    }
+  }
+#endif
+#if defined(HAS_I444TORGB24ROW_MSA)
+  if (TestCpuFlag(kCpuHasMSA)) {
+    I444ToRGB24Row = I444ToRGB24Row_Any_MSA;
+    if (IS_ALIGNED(width, 8)) {
+      I444ToRGB24Row = I444ToRGB24Row_MSA;
+    }
+  }
+#endif
+#if defined(HAS_I444TORGB24ROW_LSX)
+  if (TestCpuFlag(kCpuHasLSX)) {
+    I444ToRGB24Row = I444ToRGB24Row_Any_LSX;
+    if (IS_ALIGNED(width, 16)) {
+      I444ToRGB24Row = I444ToRGB24Row_LSX;
+    }
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    I444ToRGB24Row(src_y, src_u, src_v, dst_rgb24, yuvconstants, width);
+    dst_rgb24 += dst_stride_rgb24;
+    src_y += src_stride_y;
+    src_u += src_stride_u;
+    src_v += src_stride_v;
+  }
+  return 0;
+}
+
+// Convert I444 to RGB24.
+LIBYUV_API
+int I444ToRGB24(const uint8_t* src_y,
+                int src_stride_y,
+                const uint8_t* src_u,
+                int src_stride_u,
+                const uint8_t* src_v,
+                int src_stride_v,
+                uint8_t* dst_rgb24,
+                int dst_stride_rgb24,
+                int width,
+                int height) {
+  return I444ToRGB24Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
+                           src_stride_v, dst_rgb24, dst_stride_rgb24,
+                           &kYuvI601Constants, width, height);
+}
+
+// Convert I444 to RAW.
+LIBYUV_API
+int I444ToRAW(const uint8_t* src_y,
+              int src_stride_y,
+              const uint8_t* src_u,
+              int src_stride_u,
+              const uint8_t* src_v,
+              int src_stride_v,
+              uint8_t* dst_raw,
+              int dst_stride_raw,
+              int width,
+              int height) {
+  return I444ToRGB24Matrix(src_y, src_stride_y, src_v,
+                           src_stride_v,  // Swap U and V
+                           src_u, src_stride_u, dst_raw, dst_stride_raw,
+                           &kYvuI601Constants,  // Use Yvu matrix
+                           width, height);
+}
+
 // Convert 10 bit YUV to ARGB with matrix.
 // TODO(fbarchard): Consider passing scale multiplier to I210ToARGB to
 // multiply 10 bit yuv into high bits to allow any number of bits.
@@ -769,6 +896,7 @@ int I010ToAR30Matrix(const uint16_t* src_y,
                         const uint16_t* v_buf, uint8_t* rgb_buf,
                         const struct YuvConstants* yuvconstants, int width) =
       I210ToAR30Row_C;
+  assert(yuvconstants);
   if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) {
     return -1;
   }
@@ -928,6 +1056,7 @@ int I012ToAR30Matrix(const uint16_t* src_y,
                         const uint16_t* v_buf, uint8_t* rgb_buf,
                         const struct YuvConstants* yuvconstants, int width) =
       I212ToAR30Row_C;
+  assert(yuvconstants);
   if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) {
     return -1;
   }
@@ -985,6 +1114,7 @@ int I210ToAR30Matrix(const uint16_t* src_y,
                         const uint16_t* v_buf, uint8_t* rgb_buf,
                         const struct YuvConstants* yuvconstants, int width) =
       I210ToAR30Row_C;
+  assert(yuvconstants);
   if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) {
     return -1;
   }
@@ -1139,6 +1269,7 @@ int I410ToAR30Matrix(const uint16_t* src_y,
                         const uint16_t* v_buf, uint8_t* rgb_buf,
                         const struct YuvConstants* yuvconstants, int width) =
       I410ToAR30Row_C;
+  assert(yuvconstants);
   if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) {
     return -1;
   }
@@ -1192,6 +1323,7 @@ int I010ToARGBMatrix(const uint16_t* src_y,
                         const uint16_t* v_buf, uint8_t* rgb_buf,
                         const struct YuvConstants* yuvconstants, int width) =
       I210ToARGBRow_C;
+  assert(yuvconstants);
   if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
     return -1;
   }
@@ -1355,6 +1487,7 @@ int I012ToARGBMatrix(const uint16_t* src_y,
                         const uint16_t* v_buf, uint8_t* rgb_buf,
                         const struct YuvConstants* yuvconstants, int width) =
       I212ToARGBRow_C;
+  assert(yuvconstants);
   if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
     return -1;
   }
@@ -1410,6 +1543,7 @@ int I210ToARGBMatrix(const uint16_t* src_y,
                         const uint16_t* v_buf, uint8_t* rgb_buf,
                         const struct YuvConstants* yuvconstants, int width) =
       I210ToARGBRow_C;
+  assert(yuvconstants);
   if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
     return -1;
   }
@@ -1570,6 +1704,7 @@ int I410ToARGBMatrix(const uint16_t* src_y,
                         const uint16_t* v_buf, uint8_t* rgb_buf,
                         const struct YuvConstants* yuvconstants, int width) =
       I410ToARGBRow_C;
+  assert(yuvconstants);
   if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
     return -1;
   }
@@ -1619,6 +1754,7 @@ int P010ToARGBMatrix(const uint16_t* src_y,
   void (*P210ToARGBRow)(
       const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf,
       const struct YuvConstants* yuvconstants, int width) = P210ToARGBRow_C;
+  assert(yuvconstants);
   if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) {
     return -1;
   }
@@ -1669,6 +1805,7 @@ int P210ToARGBMatrix(const uint16_t* src_y,
   void (*P210ToARGBRow)(
       const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf,
       const struct YuvConstants* yuvconstants, int width) = P210ToARGBRow_C;
+  assert(yuvconstants);
   if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) {
     return -1;
   }
@@ -1717,6 +1854,7 @@ int P010ToAR30Matrix(const uint16_t* src_y,
   void (*P210ToAR30Row)(
       const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf,
       const struct YuvConstants* yuvconstants, int width) = P210ToAR30Row_C;
+  assert(yuvconstants);
   if (!src_y || !src_uv || !dst_ar30 || width <= 0 || height == 0) {
     return -1;
   }
@@ -1767,6 +1905,7 @@ int P210ToAR30Matrix(const uint16_t* src_y,
   void (*P210ToAR30Row)(
       const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf,
       const struct YuvConstants* yuvconstants, int width) = P210ToAR30Row_C;
+  assert(yuvconstants);
   if (!src_y || !src_uv || !dst_ar30 || width <= 0 || height == 0) {
     return -1;
   }
@@ -1825,6 +1964,7 @@ int I420AlphaToARGBMatrix(const uint8_t* src_y,
                              int width) = I422AlphaToARGBRow_C;
   void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
                            int width) = ARGBAttenuateRow_C;
+  assert(yuvconstants);
   if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 ||
       height == 0) {
     return -1;
@@ -1949,6 +2089,7 @@ int I422AlphaToARGBMatrix(const uint8_t* src_y,
                              int width) = I422AlphaToARGBRow_C;
   void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
                            int width) = ARGBAttenuateRow_C;
+  assert(yuvconstants);
   if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 ||
       height == 0) {
     return -1;
@@ -2071,6 +2212,7 @@ int I444AlphaToARGBMatrix(const uint8_t* src_y,
                              int width) = I444AlphaToARGBRow_C;
   void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
                            int width) = ARGBAttenuateRow_C;
+  assert(yuvconstants);
   if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 ||
       height == 0) {
     return -1;
@@ -2314,6 +2456,7 @@ int I010AlphaToARGBMatrix(const uint16_t* src_y,
                              int width) = I210AlphaToARGBRow_C;
   void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
                            int width) = ARGBAttenuateRow_C;
+  assert(yuvconstants);
   if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 ||
       height == 0) {
     return -1;
@@ -2414,6 +2557,7 @@ int I210AlphaToARGBMatrix(const uint16_t* src_y,
                              int width) = I210AlphaToARGBRow_C;
   void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
                            int width) = ARGBAttenuateRow_C;
+  assert(yuvconstants);
   if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 ||
       height == 0) {
     return -1;
@@ -2512,6 +2656,7 @@ int I410AlphaToARGBMatrix(const uint16_t* src_y,
                              int width) = I410AlphaToARGBRow_C;
   void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
                            int width) = ARGBAttenuateRow_C;
+  assert(yuvconstants);
   if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 ||
       height == 0) {
     return -1;
@@ -2599,6 +2744,7 @@ int I400ToARGBMatrix(const uint8_t* src_y,
   void (*I400ToARGBRow)(const uint8_t* y_buf, uint8_t* rgb_buf,
                         const struct YuvConstants* yuvconstants, int width) =
       I400ToARGBRow_C;
+  assert(yuvconstants);
   if (!src_y || !dst_argb || width <= 0 || height == 0) {
     return -1;
   }
@@ -3516,6 +3662,7 @@ int NV12ToARGBMatrix(const uint8_t* src_y,
   void (*NV12ToARGBRow)(
       const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf,
       const struct YuvConstants* yuvconstants, int width) = NV12ToARGBRow_C;
+  assert(yuvconstants);
   if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) {
     return -1;
   }
@@ -3600,6 +3747,7 @@ int NV21ToARGBMatrix(const uint8_t* src_y,
   void (*NV21ToARGBRow)(
       const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf,
       const struct YuvConstants* yuvconstants, int width) = NV21ToARGBRow_C;
+  assert(yuvconstants);
   if (!src_y || !src_vu || !dst_argb || width <= 0 || height == 0) {
     return -1;
   }
@@ -3743,6 +3891,7 @@ int NV12ToRGB24Matrix(const uint8_t* src_y,
   void (*NV12ToRGB24Row)(
       const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf,
       const struct YuvConstants* yuvconstants, int width) = NV12ToRGB24Row_C;
+  assert(yuvconstants);
   if (!src_y || !src_uv || !dst_rgb24 || width <= 0 || height == 0) {
     return -1;
   }
@@ -3803,6 +3952,7 @@ int NV21ToRGB24Matrix(const uint8_t* src_y,
   void (*NV21ToRGB24Row)(
       const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf,
       const struct YuvConstants* yuvconstants, int width) = NV21ToRGB24Row_C;
+  assert(yuvconstants);
   if (!src_y || !src_vu || !dst_rgb24 || width <= 0 || height == 0) {
     return -1;
   }
@@ -4145,6 +4295,7 @@ int Android420ToARGBMatrix(const uint8_t* src_y,
   const ptrdiff_t vu_off = src_v - src_u;
   int halfwidth = (width + 1) >> 1;
   int halfheight = (height + 1) >> 1;
+  assert(yuvconstants);
   if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
     return -1;
   }
@@ -4245,6 +4396,7 @@ int I422ToRGBAMatrix(const uint8_t* src_y,
                         const uint8_t* v_buf, uint8_t* rgb_buf,
                         const struct YuvConstants* yuvconstants, int width) =
       I422ToRGBARow_C;
+  assert(yuvconstants);
   if (!src_y || !src_u || !src_v || !dst_rgba || width <= 0 || height == 0) {
     return -1;
   }
@@ -4356,6 +4508,7 @@ int NV12ToRGB565Matrix(const uint8_t* src_y,
   void (*NV12ToRGB565Row)(
       const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf,
       const struct YuvConstants* yuvconstants, int width) = NV12ToRGB565Row_C;
+  assert(yuvconstants);
   if (!src_y || !src_uv || !dst_rgb565 || width <= 0 || height == 0) {
     return -1;
   }
@@ -4458,6 +4611,7 @@ int I420ToRGBAMatrix(const uint8_t* src_y,
                         const uint8_t* v_buf, uint8_t* rgb_buf,
                         const struct YuvConstants* yuvconstants, int width) =
       I422ToRGBARow_C;
+  assert(yuvconstants);
   if (!src_y || !src_u || !src_v || !dst_rgba || width <= 0 || height == 0) {
     return -1;
   }
@@ -4574,6 +4728,7 @@ int I420ToRGB24Matrix(const uint8_t* src_y,
                          const uint8_t* v_buf, uint8_t* rgb_buf,
                          const struct YuvConstants* yuvconstants, int width) =
       I422ToRGB24Row_C;
+  assert(yuvconstants);
   if (!src_y || !src_u || !src_v || !dst_rgb24 || width <= 0 || height == 0) {
     return -1;
   }
@@ -4744,6 +4899,121 @@ int H420ToRAW(const uint8_t* src_y,
                            width, height);
 }
 
+// Convert I422 to RGB24 with matrix.
+LIBYUV_API
+int I422ToRGB24Matrix(const uint8_t* src_y,
+                      int src_stride_y,
+                      const uint8_t* src_u,
+                      int src_stride_u,
+                      const uint8_t* src_v,
+                      int src_stride_v,
+                      uint8_t* dst_rgb24,
+                      int dst_stride_rgb24,
+                      const struct YuvConstants* yuvconstants,
+                      int width,
+                      int height) {
+  int y;
+  void (*I422ToRGB24Row)(const uint8_t* y_buf, const uint8_t* u_buf,
+                         const uint8_t* v_buf, uint8_t* rgb_buf,
+                         const struct YuvConstants* yuvconstants, int width) =
+      I422ToRGB24Row_C;
+  assert(yuvconstants);
+  if (!src_y || !src_u || !src_v || !dst_rgb24 || width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    dst_rgb24 = dst_rgb24 + (height - 1) * dst_stride_rgb24;
+    dst_stride_rgb24 = -dst_stride_rgb24;
+  }
+#if defined(HAS_I422TORGB24ROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3)) {
+    I422ToRGB24Row = I422ToRGB24Row_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
+      I422ToRGB24Row = I422ToRGB24Row_SSSE3;
+    }
+  }
+#endif
+#if defined(HAS_I422TORGB24ROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    I422ToRGB24Row = I422ToRGB24Row_Any_AVX2;
+    if (IS_ALIGNED(width, 32)) {
+      I422ToRGB24Row = I422ToRGB24Row_AVX2;
+    }
+  }
+#endif
+#if defined(HAS_I422TORGB24ROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON)) {
+    I422ToRGB24Row = I422ToRGB24Row_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      I422ToRGB24Row = I422ToRGB24Row_NEON;
+    }
+  }
+#endif
+#if defined(HAS_I422TORGB24ROW_MSA)
+  if (TestCpuFlag(kCpuHasMSA)) {
+    I422ToRGB24Row = I422ToRGB24Row_Any_MSA;
+    if (IS_ALIGNED(width, 16)) {
+      I422ToRGB24Row = I422ToRGB24Row_MSA;
+    }
+  }
+#endif
+#if defined(HAS_I422TORGB24ROW_LASX)
+  if (TestCpuFlag(kCpuHasLASX)) {
+    I422ToRGB24Row = I422ToRGB24Row_Any_LASX;
+    if (IS_ALIGNED(width, 32)) {
+      I422ToRGB24Row = I422ToRGB24Row_LASX;
+    }
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    I422ToRGB24Row(src_y, src_u, src_v, dst_rgb24, yuvconstants, width);
+    dst_rgb24 += dst_stride_rgb24;
+    src_y += src_stride_y;
+    src_u += src_stride_u;
+    src_v += src_stride_v;
+  }
+  return 0;
+}
+
+// Convert I422 to RGB24.
+LIBYUV_API
+int I422ToRGB24(const uint8_t* src_y,
+                int src_stride_y,
+                const uint8_t* src_u,
+                int src_stride_u,
+                const uint8_t* src_v,
+                int src_stride_v,
+                uint8_t* dst_rgb24,
+                int dst_stride_rgb24,
+                int width,
+                int height) {
+  return I422ToRGB24Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
+                           src_stride_v, dst_rgb24, dst_stride_rgb24,
+                           &kYuvI601Constants, width, height);
+}
+
+// Convert I422 to RAW.
+LIBYUV_API
+int I422ToRAW(const uint8_t* src_y,
+              int src_stride_y,
+              const uint8_t* src_u,
+              int src_stride_u,
+              const uint8_t* src_v,
+              int src_stride_v,
+              uint8_t* dst_raw,
+              int dst_stride_raw,
+              int width,
+              int height) {
+  return I422ToRGB24Matrix(src_y, src_stride_y, src_v,
+                           src_stride_v,  // Swap U and V
+                           src_u, src_stride_u, dst_raw, dst_stride_raw,
+                           &kYvuI601Constants,  // Use Yvu matrix
+                           width, height);
+}
+
 // Convert I420 to ARGB1555.
 LIBYUV_API
 int I420ToARGB1555(const uint8_t* src_y,
@@ -4924,6 +5194,7 @@ int I420ToRGB565Matrix(const uint8_t* src_y,
                           const uint8_t* v_buf, uint8_t* rgb_buf,
                           const struct YuvConstants* yuvconstants, int width) =
       I422ToRGB565Row_C;
+  assert(yuvconstants);
   if (!src_y || !src_u || !src_v || !dst_rgb565 || width <= 0 || height == 0) {
     return -1;
   }
@@ -5055,6 +5326,7 @@ int I422ToRGB565Matrix(const uint8_t* src_y,
                           const uint8_t* v_buf, uint8_t* rgb_buf,
                           const struct YuvConstants* yuvconstants, int width) =
       I422ToRGB565Row_C;
+  assert(yuvconstants);
   if (!src_y || !src_u || !src_v || !dst_rgb565 || width <= 0 || height == 0) {
     return -1;
   }
@@ -5298,6 +5570,7 @@ int I420ToAR30Matrix(const uint8_t* src_y,
                         const struct YuvConstants* yuvconstants, int width) =
       I422ToAR30Row_C;
 
+  assert(yuvconstants);
   if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) {
     return -1;
   }
@@ -5421,9 +5694,12 @@ static int I420ToARGBMatrixBilinear(const uint8_t* src_y,
                         const uint8_t* v_buf, uint8_t* rgb_buf,
                         const struct YuvConstants* yuvconstants, int width) =
       I444ToARGBRow_C;
-  void (*Scale2RowUp)(const uint8_t* src_ptr, ptrdiff_t src_stride,
-                      uint8_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) =
-      ScaleRowUp2_Bilinear_Any_C;
+  void (*Scale2RowUp_Bilinear)(const uint8_t* src_ptr, ptrdiff_t src_stride,
+                               uint8_t* dst_ptr, ptrdiff_t dst_stride,
+                               int dst_width) = ScaleRowUp2_Bilinear_Any_C;
+  void (*ScaleRowUp2_Linear)(const uint8_t* src_ptr, uint8_t* dst_ptr,
+                             int dst_width) = ScaleRowUp2_Linear_Any_C;
+  assert(yuvconstants);
   if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
     return -1;
   }
@@ -5474,27 +5750,31 @@ static int I420ToARGBMatrixBilinear(const uint8_t* src_y,
   }
 #endif
 
-#if defined(HAS_SCALEROWUP2_LINEAR_SSE2)
+#if defined(HAS_SCALEROWUP2_BILINEAR_SSE2)
   if (TestCpuFlag(kCpuHasSSE2)) {
-    Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSE2;
+    Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_SSE2;
+    ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSE2;
   }
 #endif
 
-#if defined(HAS_SCALEROWUP2_LINEAR_SSSE3)
+#if defined(HAS_SCALEROWUP2_BILINEAR_SSSE3)
   if (TestCpuFlag(kCpuHasSSSE3)) {
-    Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSSE3;
+    Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_SSSE3;
+    ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSSE3;
   }
 #endif
 
-#if defined(HAS_SCALEROWUP2_LINEAR_AVX2)
+#if defined(HAS_SCALEROWUP2_BILINEAR_AVX2)
   if (TestCpuFlag(kCpuHasAVX2)) {
-    Scale2RowUp = ScaleRowUp2_Bilinear_Any_AVX2;
+    Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_AVX2;
+    ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_AVX2;
   }
 #endif
 
-#if defined(HAS_SCALEROWUP2_LINEAR_NEON)
+#if defined(HAS_SCALEROWUP2_BILINEAR_NEON)
   if (TestCpuFlag(kCpuHasNEON)) {
-    Scale2RowUp = ScaleRowUp2_Bilinear_Any_NEON;
+    Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_NEON;
+    ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_NEON;
   }
 #endif
 
@@ -5506,15 +5786,15 @@ static int I420ToARGBMatrixBilinear(const uint8_t* src_y,
   uint8_t* temp_v_1 = row + row_size * 2;
   uint8_t* temp_v_2 = row + row_size * 3;
 
-  Scale2RowUp(src_u, 0, temp_u_1, row_size, width);
-  Scale2RowUp(src_v, 0, temp_v_1, row_size, width);
+  ScaleRowUp2_Linear(src_u, temp_u_1, width);
+  ScaleRowUp2_Linear(src_v, temp_v_1, width);
   I444ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width);
   dst_argb += dst_stride_argb;
   src_y += src_stride_y;
 
   for (y = 0; y < height - 2; y += 2) {
-    Scale2RowUp(src_u, src_stride_u, temp_u_1, row_size, width);
-    Scale2RowUp(src_v, src_stride_v, temp_v_1, row_size, width);
+    Scale2RowUp_Bilinear(src_u, src_stride_u, temp_u_1, row_size, width);
+    Scale2RowUp_Bilinear(src_v, src_stride_v, temp_v_1, row_size, width);
     I444ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width);
     dst_argb += dst_stride_argb;
     src_y += src_stride_y;
@@ -5526,8 +5806,8 @@ static int I420ToARGBMatrixBilinear(const uint8_t* src_y,
   }
 
   if (!(height & 1)) {
-    Scale2RowUp(src_u, 0, temp_u_1, row_size, width);
-    Scale2RowUp(src_v, 0, temp_v_1, row_size, width);
+    ScaleRowUp2_Linear(src_u, temp_u_1, width);
+    ScaleRowUp2_Linear(src_v, temp_v_1, width);
     I444ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width);
   }
 
@@ -5551,8 +5831,9 @@ static int I422ToARGBMatrixLinear(const uint8_t* src_y,
                         const uint8_t* v_buf, uint8_t* rgb_buf,
                         const struct YuvConstants* yuvconstants, int width) =
       I444ToARGBRow_C;
-  void (*ScaleRowUp)(const uint8_t* src_ptr, uint8_t* dst_ptr, int dst_width) =
-      ScaleRowUp2_Linear_Any_C;
+  void (*ScaleRowUp2_Linear)(const uint8_t* src_ptr, uint8_t* dst_ptr,
+                             int dst_width) = ScaleRowUp2_Linear_Any_C;
+  assert(yuvconstants);
   if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
     return -1;
   }
@@ -5604,22 +5885,22 @@ static int I422ToARGBMatrixLinear(const uint8_t* src_y,
 #endif
 #if defined(HAS_SCALEROWUP2_LINEAR_SSE2)
   if (TestCpuFlag(kCpuHasSSE2)) {
-    ScaleRowUp = ScaleRowUp2_Linear_Any_SSE2;
+    ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSE2;
   }
 #endif
 #if defined(HAS_SCALEROWUP2_LINEAR_SSSE3)
   if (TestCpuFlag(kCpuHasSSSE3)) {
-    ScaleRowUp = ScaleRowUp2_Linear_Any_SSSE3;
+    ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSSE3;
   }
 #endif
 #if defined(HAS_SCALEROWUP2_LINEAR_AVX2)
   if (TestCpuFlag(kCpuHasAVX2)) {
-    ScaleRowUp = ScaleRowUp2_Linear_Any_AVX2;
+    ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_AVX2;
   }
 #endif
 #if defined(HAS_SCALEROWUP2_LINEAR_NEON)
   if (TestCpuFlag(kCpuHasNEON)) {
-    ScaleRowUp = ScaleRowUp2_Linear_Any_NEON;
+    ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_NEON;
   }
 #endif
 
@@ -5630,8 +5911,8 @@ static int I422ToARGBMatrixLinear(const uint8_t* src_y,
   uint8_t* temp_v = row + row_size;
 
   for (y = 0; y < height; ++y) {
-    ScaleRowUp(src_u, temp_u, width);
-    ScaleRowUp(src_v, temp_v, width);
+    ScaleRowUp2_Linear(src_u, temp_u, width);
+    ScaleRowUp2_Linear(src_v, temp_v, width);
     I444ToARGBRow(src_y, temp_u, temp_v, dst_argb, yuvconstants, width);
     dst_argb += dst_stride_argb;
     src_y += src_stride_y;
@@ -5655,15 +5936,16 @@ static int I420ToRGB24MatrixBilinear(const uint8_t* src_y,
                                      int width,
                                      int height) {
   int y;
-  void (*I444ToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf,
-                        const uint8_t* v_buf, uint8_t* rgb_buf,
-                        const struct YuvConstants* yuvconstants, int width) =
-      I444ToARGBRow_C;
-  void (*ARGBToRGB24Row)(const uint8_t* src_argb, uint8_t* dst_rgb, int width) =
-      ARGBToRGB24Row_C;
-  void (*Scale2RowUp)(const uint8_t* src_ptr, ptrdiff_t src_stride,
-                      uint8_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) =
-      ScaleRowUp2_Bilinear_Any_C;
+  void (*I444ToRGB24Row)(const uint8_t* y_buf, const uint8_t* u_buf,
+                         const uint8_t* v_buf, uint8_t* rgb_buf,
+                         const struct YuvConstants* yuvconstants, int width) =
+      I444ToRGB24Row_C;
+  void (*Scale2RowUp_Bilinear)(const uint8_t* src_ptr, ptrdiff_t src_stride,
+                               uint8_t* dst_ptr, ptrdiff_t dst_stride,
+                               int dst_width) = ScaleRowUp2_Bilinear_Any_C;
+  void (*ScaleRowUp2_Linear)(const uint8_t* src_ptr, uint8_t* dst_ptr,
+                             int dst_width) = ScaleRowUp2_Linear_Any_C;
+  assert(yuvconstants);
   if (!src_y || !src_u || !src_v || !dst_rgb24 || width <= 0 || height == 0) {
     return -1;
   }
@@ -5673,141 +5955,96 @@ static int I420ToRGB24MatrixBilinear(const uint8_t* src_y,
     dst_rgb24 = dst_rgb24 + (height - 1) * dst_stride_rgb24;
     dst_stride_rgb24 = -dst_stride_rgb24;
   }
-#if defined(HAS_I444TOARGBROW_SSSE3)
+#if defined(HAS_I444TORGB24ROW_SSSE3)
   if (TestCpuFlag(kCpuHasSSSE3)) {
-    I444ToARGBRow = I444ToARGBRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 8)) {
-      I444ToARGBRow = I444ToARGBRow_SSSE3;
+    I444ToRGB24Row = I444ToRGB24Row_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
+      I444ToRGB24Row = I444ToRGB24Row_SSSE3;
     }
   }
 #endif
-#if defined(HAS_I444TOARGBROW_AVX2)
+#if defined(HAS_I444TORGB24ROW_AVX2)
   if (TestCpuFlag(kCpuHasAVX2)) {
-    I444ToARGBRow = I444ToARGBRow_Any_AVX2;
-    if (IS_ALIGNED(width, 16)) {
-      I444ToARGBRow = I444ToARGBRow_AVX2;
+    I444ToRGB24Row = I444ToRGB24Row_Any_AVX2;
+    if (IS_ALIGNED(width, 32)) {
+      I444ToRGB24Row = I444ToRGB24Row_AVX2;
     }
   }
 #endif
-#if defined(HAS_I444TOARGBROW_NEON)
+#if defined(HAS_I444TORGB24ROW_NEON)
   if (TestCpuFlag(kCpuHasNEON)) {
-    I444ToARGBRow = I444ToARGBRow_Any_NEON;
+    I444ToRGB24Row = I444ToRGB24Row_Any_NEON;
     if (IS_ALIGNED(width, 8)) {
-      I444ToARGBRow = I444ToARGBRow_NEON;
+      I444ToRGB24Row = I444ToRGB24Row_NEON;
     }
   }
 #endif
-#if defined(HAS_I444TOARGBROW_MSA)
+#if defined(HAS_I444TORGB24ROW_MSA)
   if (TestCpuFlag(kCpuHasMSA)) {
-    I444ToARGBRow = I444ToARGBRow_Any_MSA;
+    I444ToRGB24Row = I444ToRGB24Row_Any_MSA;
     if (IS_ALIGNED(width, 8)) {
-      I444ToARGBRow = I444ToARGBRow_MSA;
+      I444ToRGB24Row = I444ToRGB24Row_MSA;
     }
   }
 #endif
-#if defined(HAS_I444TOARGBROW_LASX)
+#if defined(HAS_I444TORGB24ROW_LASX)
   if (TestCpuFlag(kCpuHasLASX)) {
-    I444ToARGBRow = I444ToARGBRow_Any_LASX;
+    I444ToRGB24Row = I444ToRGB24Row_Any_LASX;
     if (IS_ALIGNED(width, 32)) {
-      I444ToARGBRow = I444ToARGBRow_LASX;
+      I444ToRGB24Row = I444ToRGB24Row_LASX;
     }
   }
 #endif
-#if defined(HAS_ARGBTORGB24ROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
-    ARGBToRGB24Row = ARGBToRGB24Row_Any_SSSE3;
-    if (IS_ALIGNED(width, 16)) {
-      ARGBToRGB24Row = ARGBToRGB24Row_SSSE3;
-    }
-  }
-#endif
-#if defined(HAS_ARGBTORGB24ROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2)) {
-    ARGBToRGB24Row = ARGBToRGB24Row_Any_AVX2;
-    if (IS_ALIGNED(width, 32)) {
-      ARGBToRGB24Row = ARGBToRGB24Row_AVX2;
-    }
-  }
-#endif
-#if defined(HAS_ARGBTORGB24ROW_AVX512VBMI)
-  if (TestCpuFlag(kCpuHasAVX512VBMI)) {
-    ARGBToRGB24Row = ARGBToRGB24Row_Any_AVX512VBMI;
-    if (IS_ALIGNED(width, 32)) {
-      ARGBToRGB24Row = ARGBToRGB24Row_AVX512VBMI;
-    }
-  }
-#endif
-#if defined(HAS_ARGBTORGB24ROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
-    ARGBToRGB24Row = ARGBToRGB24Row_Any_NEON;
-    if (IS_ALIGNED(width, 16)) {
-      ARGBToRGB24Row = ARGBToRGB24Row_NEON;
-    }
-  }
-#endif
-#if defined(HAS_ARGBTORGB24ROW_MSA)
-  if (TestCpuFlag(kCpuHasMSA)) {
-    ARGBToRGB24Row = ARGBToRGB24Row_Any_MSA;
-    if (IS_ALIGNED(width, 16)) {
-      ARGBToRGB24Row = ARGBToRGB24Row_MSA;
-    }
-  }
-#endif
-#if defined(HAS_ARGBTORGB24ROW_LASX)
-  if (TestCpuFlag(kCpuHasLASX)) {
-    ARGBToRGB24Row = ARGBToRGB24Row_Any_LASX;
-    if (IS_ALIGNED(width, 32)) {
-      ARGBToRGB24Row = ARGBToRGB24Row_LASX;
-    }
-  }
-#endif
-// TODO: Fix HAS macros to match function names
-#if defined(HAS_SCALEROWUP2_LINEAR_SSE2)
+
+#if defined(HAS_SCALEROWUP2_BILINEAR_SSE2)
   if (TestCpuFlag(kCpuHasSSE2)) {
-    Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSE2;
+    Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_SSE2;
+    ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSE2;
   }
 #endif
-#if defined(HAS_SCALEROWUP2_LINEAR_SSSE3)
+
+#if defined(HAS_SCALEROWUP2_BILINEAR_SSSE3)
   if (TestCpuFlag(kCpuHasSSSE3)) {
-    Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSSE3;
+    Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_SSSE3;
+    ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSSE3;
   }
 #endif
-#if defined(HAS_SCALEROWUP2_LINEAR_AVX2)
+
+#if defined(HAS_SCALEROWUP2_BILINEAR_AVX2)
   if (TestCpuFlag(kCpuHasAVX2)) {
-    Scale2RowUp = ScaleRowUp2_Bilinear_Any_AVX2;
+    Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_AVX2;
+    ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_AVX2;
   }
 #endif
-#if defined(HAS_SCALEROWUP2_LINEAR_NEON)
+
+#if defined(HAS_SCALEROWUP2_BILINEAR_NEON)
   if (TestCpuFlag(kCpuHasNEON)) {
-    Scale2RowUp = ScaleRowUp2_Bilinear_Any_NEON;
+    Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_NEON;
+    ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_NEON;
   }
 #endif
 
   // alloc 4 lines temp
   const int row_size = (width + 31) & ~31;
-  align_buffer_64(row, row_size * 8);
+  align_buffer_64(row, row_size * 4);
   uint8_t* temp_u_1 = row;
   uint8_t* temp_u_2 = row + row_size;
   uint8_t* temp_v_1 = row + row_size * 2;
   uint8_t* temp_v_2 = row + row_size * 3;
-  uint8_t* temp_argb = row + row_size * 4;
 
-  Scale2RowUp(src_u, 0, temp_u_1, row_size, width);
-  Scale2RowUp(src_v, 0, temp_v_1, row_size, width);
-  I444ToARGBRow(src_y, temp_u_1, temp_v_1, temp_argb, yuvconstants, width);
-  ARGBToRGB24Row(temp_argb, dst_rgb24, width);
+  ScaleRowUp2_Linear(src_u, temp_u_1, width);
+  ScaleRowUp2_Linear(src_v, temp_v_1, width);
+  I444ToRGB24Row(src_y, temp_u_1, temp_v_1, dst_rgb24, yuvconstants, width);
   dst_rgb24 += dst_stride_rgb24;
   src_y += src_stride_y;
 
   for (y = 0; y < height - 2; y += 2) {
-    Scale2RowUp(src_u, src_stride_u, temp_u_1, row_size, width);
-    Scale2RowUp(src_v, src_stride_v, temp_v_1, row_size, width);
-    I444ToARGBRow(src_y, temp_u_1, temp_v_1, temp_argb, yuvconstants, width);
-    ARGBToRGB24Row(temp_argb, dst_rgb24, width);
+    Scale2RowUp_Bilinear(src_u, src_stride_u, temp_u_1, row_size, width);
+    Scale2RowUp_Bilinear(src_v, src_stride_v, temp_v_1, row_size, width);
+    I444ToRGB24Row(src_y, temp_u_1, temp_v_1, dst_rgb24, yuvconstants, width);
     dst_rgb24 += dst_stride_rgb24;
     src_y += src_stride_y;
-    I444ToARGBRow(src_y, temp_u_2, temp_v_2, temp_argb, yuvconstants, width);
-    ARGBToRGB24Row(temp_argb, dst_rgb24, width);
+    I444ToRGB24Row(src_y, temp_u_2, temp_v_2, dst_rgb24, yuvconstants, width);
     dst_rgb24 += dst_stride_rgb24;
     src_y += src_stride_y;
     src_u += src_stride_u;
@@ -5815,10 +6052,9 @@ static int I420ToRGB24MatrixBilinear(const uint8_t* src_y,
   }
 
   if (!(height & 1)) {
-    Scale2RowUp(src_u, 0, temp_u_1, row_size, width);
-    Scale2RowUp(src_v, 0, temp_v_1, row_size, width);
-    I444ToARGBRow(src_y, temp_u_1, temp_v_1, temp_argb, yuvconstants, width);
-    ARGBToRGB24Row(temp_argb, dst_rgb24, width);
+    ScaleRowUp2_Linear(src_u, temp_u_1, width);
+    ScaleRowUp2_Linear(src_v, temp_v_1, width);
+    I444ToRGB24Row(src_y, temp_u_1, temp_v_1, dst_rgb24, yuvconstants, width);
   }
 
   free_aligned_buffer_64(row);
@@ -5841,9 +6077,12 @@ static int I010ToAR30MatrixBilinear(const uint16_t* src_y,
                         const uint16_t* v_buf, uint8_t* rgb_buf,
                         const struct YuvConstants* yuvconstants, int width) =
       I410ToAR30Row_C;
-  void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride,
-                      uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) =
-      ScaleRowUp2_Bilinear_16_Any_C;
+  void (*Scale2RowUp_Bilinear_12)(
+      const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr,
+      ptrdiff_t dst_stride, int dst_width) = ScaleRowUp2_Bilinear_16_Any_C;
+  void (*ScaleRowUp2_Linear_12)(const uint16_t* src_ptr, uint16_t* dst_ptr,
+                                int dst_width) = ScaleRowUp2_Linear_16_Any_C;
+  assert(yuvconstants);
   if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) {
     return -1;
   }
@@ -5870,21 +6109,24 @@ static int I010ToAR30MatrixBilinear(const uint16_t* src_y,
   }
 #endif
 
-#if defined(HAS_SCALEROWUP2_LINEAR_12_SSSE3)
+#if defined(HAS_SCALEROWUP2_BILINEAR_12_SSSE3)
   if (TestCpuFlag(kCpuHasSSSE3)) {
-    Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_SSSE3;
+    Scale2RowUp_Bilinear_12 = ScaleRowUp2_Bilinear_12_Any_SSSE3;
+    ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_SSSE3;
   }
 #endif
 
-#if defined(HAS_SCALEROWUP2_LINEAR_12_AVX2)
+#if defined(HAS_SCALEROWUP2_BILINEAR_12_AVX2)
   if (TestCpuFlag(kCpuHasAVX2)) {
-    Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_AVX2;
+    Scale2RowUp_Bilinear_12 = ScaleRowUp2_Bilinear_12_Any_AVX2;
+    ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_AVX2;
   }
 #endif
 
-#if defined(HAS_SCALEROWUP2_LINEAR_12_NEON)
+#if defined(HAS_SCALEROWUP2_BILINEAR_12_NEON)
   if (TestCpuFlag(kCpuHasNEON)) {
-    Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_NEON;
+    Scale2RowUp_Bilinear_12 = ScaleRowUp2_Bilinear_12_Any_NEON;
+    ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_NEON;
   }
 #endif
 
@@ -5896,15 +6138,15 @@ static int I010ToAR30MatrixBilinear(const uint16_t* src_y,
   uint16_t* temp_v_1 = (uint16_t*)(row) + row_size * 2;
   uint16_t* temp_v_2 = (uint16_t*)(row) + row_size * 3;
 
-  Scale2RowUp(src_u, 0, temp_u_1, row_size, width);
-  Scale2RowUp(src_v, 0, temp_v_1, row_size, width);
+  ScaleRowUp2_Linear_12(src_u, temp_u_1, width);
+  ScaleRowUp2_Linear_12(src_v, temp_v_1, width);
   I410ToAR30Row(src_y, temp_u_1, temp_v_1, dst_ar30, yuvconstants, width);
   dst_ar30 += dst_stride_ar30;
   src_y += src_stride_y;
 
   for (y = 0; y < height - 2; y += 2) {
-    Scale2RowUp(src_u, src_stride_u, temp_u_1, row_size, width);
-    Scale2RowUp(src_v, src_stride_v, temp_v_1, row_size, width);
+    Scale2RowUp_Bilinear_12(src_u, src_stride_u, temp_u_1, row_size, width);
+    Scale2RowUp_Bilinear_12(src_v, src_stride_v, temp_v_1, row_size, width);
     I410ToAR30Row(src_y, temp_u_1, temp_v_1, dst_ar30, yuvconstants, width);
     dst_ar30 += dst_stride_ar30;
     src_y += src_stride_y;
@@ -5916,8 +6158,8 @@ static int I010ToAR30MatrixBilinear(const uint16_t* src_y,
   }
 
   if (!(height & 1)) {
-    Scale2RowUp(src_u, 0, temp_u_1, row_size, width);
-    Scale2RowUp(src_v, 0, temp_v_1, row_size, width);
+    ScaleRowUp2_Linear_12(src_u, temp_u_1, width);
+    ScaleRowUp2_Linear_12(src_v, temp_v_1, width);
     I410ToAR30Row(src_y, temp_u_1, temp_v_1, dst_ar30, yuvconstants, width);
   }
 
@@ -5942,8 +6184,9 @@ static int I210ToAR30MatrixLinear(const uint16_t* src_y,
                         const uint16_t* v_buf, uint8_t* rgb_buf,
                         const struct YuvConstants* yuvconstants, int width) =
       I410ToAR30Row_C;
-  void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr,
-                     int dst_width) = ScaleRowUp2_Linear_16_Any_C;
+  void (*ScaleRowUp2_Linear_12)(const uint16_t* src_ptr, uint16_t* dst_ptr,
+                                int dst_width) = ScaleRowUp2_Linear_16_Any_C;
+  assert(yuvconstants);
   if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) {
     return -1;
   }
@@ -5972,17 +6215,17 @@ static int I210ToAR30MatrixLinear(const uint16_t* src_y,
 
 #if defined(HAS_SCALEROWUP2_LINEAR_12_SSSE3)
   if (TestCpuFlag(kCpuHasSSSE3)) {
-    ScaleRowUp = ScaleRowUp2_Linear_12_Any_SSSE3;
+    ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_SSSE3;
   }
 #endif
 #if defined(HAS_SCALEROWUP2_LINEAR_12_AVX2)
   if (TestCpuFlag(kCpuHasAVX2)) {
-    ScaleRowUp = ScaleRowUp2_Linear_12_Any_AVX2;
+    ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_AVX2;
   }
 #endif
 #if defined(HAS_SCALEROWUP2_LINEAR_12_NEON)
   if (TestCpuFlag(kCpuHasNEON)) {
-    ScaleRowUp = ScaleRowUp2_Linear_12_Any_NEON;
+    ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_NEON;
   }
 #endif
 
@@ -5993,8 +6236,8 @@ static int I210ToAR30MatrixLinear(const uint16_t* src_y,
   uint16_t* temp_v = (uint16_t*)(row) + row_size;
 
   for (y = 0; y < height; ++y) {
-    ScaleRowUp(src_u, temp_u, width);
-    ScaleRowUp(src_v, temp_v, width);
+    ScaleRowUp2_Linear_12(src_u, temp_u, width);
+    ScaleRowUp2_Linear_12(src_v, temp_v, width);
     I410ToAR30Row(src_y, temp_u, temp_v, dst_ar30, yuvconstants, width);
     dst_ar30 += dst_stride_ar30;
     src_y += src_stride_y;
@@ -6021,9 +6264,12 @@ static int I010ToARGBMatrixBilinear(const uint16_t* src_y,
                         const uint16_t* v_buf, uint8_t* rgb_buf,
                         const struct YuvConstants* yuvconstants, int width) =
       I410ToARGBRow_C;
-  void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride,
-                      uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) =
-      ScaleRowUp2_Bilinear_16_Any_C;
+  void (*Scale2RowUp_Bilinear_12)(
+      const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr,
+      ptrdiff_t dst_stride, int dst_width) = ScaleRowUp2_Bilinear_16_Any_C;
+  void (*ScaleRowUp2_Linear_12)(const uint16_t* src_ptr, uint16_t* dst_ptr,
+                                int dst_width) = ScaleRowUp2_Linear_16_Any_C;
+  assert(yuvconstants);
   if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
     return -1;
   }
@@ -6050,21 +6296,24 @@ static int I010ToARGBMatrixBilinear(const uint16_t* src_y,
   }
 #endif
 
-#if defined(HAS_SCALEROWUP2_LINEAR_12_SSSE3)
+#if defined(HAS_SCALEROWUP2_BILINEAR_12_SSSE3)
   if (TestCpuFlag(kCpuHasSSSE3)) {
-    Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_SSSE3;
+    Scale2RowUp_Bilinear_12 = ScaleRowUp2_Bilinear_12_Any_SSSE3;
+    ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_SSSE3;
   }
 #endif
 
-#if defined(HAS_SCALEROWUP2_LINEAR_12_AVX2)
+#if defined(HAS_SCALEROWUP2_BILINEAR_12_AVX2)
   if (TestCpuFlag(kCpuHasAVX2)) {
-    Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_AVX2;
+    Scale2RowUp_Bilinear_12 = ScaleRowUp2_Bilinear_12_Any_AVX2;
+    ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_AVX2;
   }
 #endif
 
-#if defined(HAS_SCALEROWUP2_LINEAR_12_NEON)
+#if defined(HAS_SCALEROWUP2_BILINEAR_12_NEON)
   if (TestCpuFlag(kCpuHasNEON)) {
-    Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_NEON;
+    Scale2RowUp_Bilinear_12 = ScaleRowUp2_Bilinear_12_Any_NEON;
+    ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_NEON;
   }
 #endif
 
@@ -6076,15 +6325,15 @@ static int I010ToARGBMatrixBilinear(const uint16_t* src_y,
   uint16_t* temp_v_1 = (uint16_t*)(row) + row_size * 2;
   uint16_t* temp_v_2 = (uint16_t*)(row) + row_size * 3;
 
-  Scale2RowUp(src_u, 0, temp_u_1, row_size, width);
-  Scale2RowUp(src_v, 0, temp_v_1, row_size, width);
+  ScaleRowUp2_Linear_12(src_u, temp_u_1, width);
+  ScaleRowUp2_Linear_12(src_v, temp_v_1, width);
   I410ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width);
   dst_argb += dst_stride_argb;
   src_y += src_stride_y;
 
   for (y = 0; y < height - 2; y += 2) {
-    Scale2RowUp(src_u, src_stride_u, temp_u_1, row_size, width);
-    Scale2RowUp(src_v, src_stride_v, temp_v_1, row_size, width);
+    Scale2RowUp_Bilinear_12(src_u, src_stride_u, temp_u_1, row_size, width);
+    Scale2RowUp_Bilinear_12(src_v, src_stride_v, temp_v_1, row_size, width);
     I410ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width);
     dst_argb += dst_stride_argb;
     src_y += src_stride_y;
@@ -6096,8 +6345,8 @@ static int I010ToARGBMatrixBilinear(const uint16_t* src_y,
   }
 
   if (!(height & 1)) {
-    Scale2RowUp(src_u, 0, temp_u_1, row_size, width);
-    Scale2RowUp(src_v, 0, temp_v_1, row_size, width);
+    ScaleRowUp2_Linear_12(src_u, temp_u_1, width);
+    ScaleRowUp2_Linear_12(src_v, temp_v_1, width);
     I410ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width);
   }
 
@@ -6121,8 +6370,9 @@ static int I210ToARGBMatrixLinear(const uint16_t* src_y,
                         const uint16_t* v_buf, uint8_t* rgb_buf,
                         const struct YuvConstants* yuvconstants, int width) =
       I410ToARGBRow_C;
-  void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr,
-                     int dst_width) = ScaleRowUp2_Linear_16_Any_C;
+  void (*ScaleRowUp2_Linear_12)(const uint16_t* src_ptr, uint16_t* dst_ptr,
+                                int dst_width) = ScaleRowUp2_Linear_16_Any_C;
+  assert(yuvconstants);
   if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
     return -1;
   }
@@ -6151,17 +6401,17 @@ static int I210ToARGBMatrixLinear(const uint16_t* src_y,
 
 #if defined(HAS_SCALEROWUP2_LINEAR_12_SSSE3)
   if (TestCpuFlag(kCpuHasSSSE3)) {
-    ScaleRowUp = ScaleRowUp2_Linear_12_Any_SSSE3;
+    ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_SSSE3;
   }
 #endif
 #if defined(HAS_SCALEROWUP2_LINEAR_12_AVX2)
   if (TestCpuFlag(kCpuHasAVX2)) {
-    ScaleRowUp = ScaleRowUp2_Linear_12_Any_AVX2;
+    ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_AVX2;
   }
 #endif
 #if defined(HAS_SCALEROWUP2_LINEAR_12_NEON)
   if (TestCpuFlag(kCpuHasNEON)) {
-    ScaleRowUp = ScaleRowUp2_Linear_12_Any_NEON;
+    ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_NEON;
   }
 #endif
 
@@ -6172,8 +6422,8 @@ static int I210ToARGBMatrixLinear(const uint16_t* src_y,
   uint16_t* temp_v = (uint16_t*)(row) + row_size;
 
   for (y = 0; y < height; ++y) {
-    ScaleRowUp(src_u, temp_u, width);
-    ScaleRowUp(src_v, temp_v, width);
+    ScaleRowUp2_Linear_12(src_u, temp_u, width);
+    ScaleRowUp2_Linear_12(src_v, temp_v, width);
     I410ToARGBRow(src_y, temp_u, temp_v, dst_argb, yuvconstants, width);
     dst_argb += dst_stride_argb;
     src_y += src_stride_y;
@@ -6208,9 +6458,12 @@ static int I420AlphaToARGBMatrixBilinear(
                              int width) = I444AlphaToARGBRow_C;
   void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
                            int width) = ARGBAttenuateRow_C;
-  void (*Scale2RowUp)(const uint8_t* src_ptr, ptrdiff_t src_stride,
-                      uint8_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) =
-      ScaleRowUp2_Bilinear_Any_C;
+  void (*Scale2RowUp_Bilinear)(const uint8_t* src_ptr, ptrdiff_t src_stride,
+                               uint8_t* dst_ptr, ptrdiff_t dst_stride,
+                               int dst_width) = ScaleRowUp2_Bilinear_Any_C;
+  void (*ScaleRowUp2_Linear)(const uint8_t* src_ptr, uint8_t* dst_ptr,
+                             int dst_width) = ScaleRowUp2_Linear_Any_C;
+  assert(yuvconstants);
   if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 ||
       height == 0) {
     return -1;
@@ -6293,27 +6546,32 @@ static int I420AlphaToARGBMatrixBilinear(
     }
   }
 #endif
-#if defined(HAS_SCALEROWUP2_LINEAR_SSE2)
+
+#if defined(HAS_SCALEROWUP2_BILINEAR_SSE2)
   if (TestCpuFlag(kCpuHasSSE2)) {
-    Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSE2;
+    Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_SSE2;
+    ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSE2;
   }
 #endif
 
-#if defined(HAS_SCALEROWUP2_LINEAR_SSSE3)
+#if defined(HAS_SCALEROWUP2_BILINEAR_SSSE3)
   if (TestCpuFlag(kCpuHasSSSE3)) {
-    Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSSE3;
+    Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_SSSE3;
+    ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSSE3;
   }
 #endif
 
-#if defined(HAS_SCALEROWUP2_LINEAR_AVX2)
+#if defined(HAS_SCALEROWUP2_BILINEAR_AVX2)
   if (TestCpuFlag(kCpuHasAVX2)) {
-    Scale2RowUp = ScaleRowUp2_Bilinear_Any_AVX2;
+    Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_AVX2;
+    ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_AVX2;
   }
 #endif
 
-#if defined(HAS_SCALEROWUP2_LINEAR_NEON)
+#if defined(HAS_SCALEROWUP2_BILINEAR_NEON)
   if (TestCpuFlag(kCpuHasNEON)) {
-    Scale2RowUp = ScaleRowUp2_Bilinear_Any_NEON;
+    Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_NEON;
+    ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_NEON;
   }
 #endif
 
@@ -6325,8 +6583,8 @@ static int I420AlphaToARGBMatrixBilinear(
   uint8_t* temp_v_1 = row + row_size * 2;
   uint8_t* temp_v_2 = row + row_size * 3;
 
-  Scale2RowUp(src_u, 0, temp_u_1, row_size, width);
-  Scale2RowUp(src_v, 0, temp_v_1, row_size, width);
+  ScaleRowUp2_Linear(src_u, temp_u_1, width);
+  ScaleRowUp2_Linear(src_v, temp_v_1, width);
   I444AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants,
                      width);
   if (attenuate) {
@@ -6337,8 +6595,8 @@ static int I420AlphaToARGBMatrixBilinear(
   src_a += src_stride_a;
 
   for (y = 0; y < height - 2; y += 2) {
-    Scale2RowUp(src_u, src_stride_u, temp_u_1, row_size, width);
-    Scale2RowUp(src_v, src_stride_v, temp_v_1, row_size, width);
+    Scale2RowUp_Bilinear(src_u, src_stride_u, temp_u_1, row_size, width);
+    Scale2RowUp_Bilinear(src_v, src_stride_v, temp_v_1, row_size, width);
     I444AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants,
                        width);
     if (attenuate) {
@@ -6360,8 +6618,8 @@ static int I420AlphaToARGBMatrixBilinear(
   }
 
   if (!(height & 1)) {
-    Scale2RowUp(src_u, 0, temp_u_1, row_size, width);
-    Scale2RowUp(src_v, 0, temp_v_1, row_size, width);
+    ScaleRowUp2_Linear(src_u, temp_u_1, width);
+    ScaleRowUp2_Linear(src_v, temp_v_1, width);
     I444AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants,
                        width);
     if (attenuate) {
@@ -6395,8 +6653,9 @@ static int I422AlphaToARGBMatrixLinear(const uint8_t* src_y,
                              int width) = I444AlphaToARGBRow_C;
   void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
                            int width) = ARGBAttenuateRow_C;
-  void (*ScaleRowUp)(const uint8_t* src_ptr, uint8_t* dst_ptr, int dst_width) =
-      ScaleRowUp2_Linear_Any_C;
+  void (*ScaleRowUp2_Linear)(const uint8_t* src_ptr, uint8_t* dst_ptr,
+                             int dst_width) = ScaleRowUp2_Linear_Any_C;
+  assert(yuvconstants);
   if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 ||
       height == 0) {
     return -1;
@@ -6481,22 +6740,22 @@ static int I422AlphaToARGBMatrixLinear(const uint8_t* src_y,
 #endif
 #if defined(HAS_SCALEROWUP2_LINEAR_SSE2)
   if (TestCpuFlag(kCpuHasSSE2)) {
-    ScaleRowUp = ScaleRowUp2_Linear_Any_SSE2;
+    ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSE2;
   }
 #endif
 #if defined(HAS_SCALEROWUP2_LINEAR_SSSE3)
   if (TestCpuFlag(kCpuHasSSSE3)) {
-    ScaleRowUp = ScaleRowUp2_Linear_Any_SSSE3;
+    ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSSE3;
   }
 #endif
 #if defined(HAS_SCALEROWUP2_LINEAR_AVX2)
   if (TestCpuFlag(kCpuHasAVX2)) {
-    ScaleRowUp = ScaleRowUp2_Linear_Any_AVX2;
+    ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_AVX2;
   }
 #endif
 #if defined(HAS_SCALEROWUP2_LINEAR_NEON)
   if (TestCpuFlag(kCpuHasNEON)) {
-    ScaleRowUp = ScaleRowUp2_Linear_Any_NEON;
+    ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_NEON;
   }
 #endif
 
@@ -6507,8 +6766,8 @@ static int I422AlphaToARGBMatrixLinear(const uint8_t* src_y,
   uint8_t* temp_v = row + row_size;
 
   for (y = 0; y < height; ++y) {
-    ScaleRowUp(src_u, temp_u, width);
-    ScaleRowUp(src_v, temp_v, width);
+    ScaleRowUp2_Linear(src_u, temp_u, width);
+    ScaleRowUp2_Linear(src_v, temp_v, width);
     I444AlphaToARGBRow(src_y, temp_u, temp_v, src_a, dst_argb, yuvconstants,
                        width);
     if (attenuate) {
@@ -6548,9 +6807,12 @@ static int I010AlphaToARGBMatrixBilinear(
                              int width) = I410AlphaToARGBRow_C;
   void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
                            int width) = ARGBAttenuateRow_C;
-  void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride,
-                      uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) =
-      ScaleRowUp2_Bilinear_16_Any_C;
+  void (*Scale2RowUp_Bilinear_12)(
+      const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr,
+      ptrdiff_t dst_stride, int dst_width) = ScaleRowUp2_Bilinear_16_Any_C;
+  void (*ScaleRowUp2_Linear_12)(const uint16_t* src_ptr, uint16_t* dst_ptr,
+                                int dst_width) = ScaleRowUp2_Linear_16_Any_C;
+  assert(yuvconstants);
   if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 ||
       height == 0) {
     return -1;
@@ -6610,21 +6872,24 @@ static int I010AlphaToARGBMatrixBilinear(
   }
 #endif
 
-#if defined(HAS_SCALEROWUP2_LINEAR_12_SSSE3)
+#if defined(HAS_SCALEROWUP2_BILINEAR_12_SSSE3)
   if (TestCpuFlag(kCpuHasSSSE3)) {
-    Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_SSSE3;
+    Scale2RowUp_Bilinear_12 = ScaleRowUp2_Bilinear_12_Any_SSSE3;
+    ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_SSSE3;
   }
 #endif
 
-#if defined(HAS_SCALEROWUP2_LINEAR_12_AVX2)
+#if defined(HAS_SCALEROWUP2_BILINEAR_12_AVX2)
   if (TestCpuFlag(kCpuHasAVX2)) {
-    Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_AVX2;
+    Scale2RowUp_Bilinear_12 = ScaleRowUp2_Bilinear_12_Any_AVX2;
+    ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_AVX2;
   }
 #endif
 
-#if defined(HAS_SCALEROWUP2_LINEAR_12_NEON)
+#if defined(HAS_SCALEROWUP2_BILINEAR_12_NEON)
   if (TestCpuFlag(kCpuHasNEON)) {
-    Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_NEON;
+    Scale2RowUp_Bilinear_12 = ScaleRowUp2_Bilinear_12_Any_NEON;
+    ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_NEON;
   }
 #endif
 
@@ -6636,8 +6901,8 @@ static int I010AlphaToARGBMatrixBilinear(
   uint16_t* temp_v_1 = (uint16_t*)(row) + row_size * 2;
   uint16_t* temp_v_2 = (uint16_t*)(row) + row_size * 3;
 
-  Scale2RowUp(src_u, 0, temp_u_1, row_size, width);
-  Scale2RowUp(src_v, 0, temp_v_1, row_size, width);
+  ScaleRowUp2_Linear_12(src_u, temp_u_1, width);
+  ScaleRowUp2_Linear_12(src_v, temp_v_1, width);
   I410AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants,
                      width);
   if (attenuate) {
@@ -6648,8 +6913,8 @@ static int I010AlphaToARGBMatrixBilinear(
   src_a += src_stride_a;
 
   for (y = 0; y < height - 2; y += 2) {
-    Scale2RowUp(src_u, src_stride_u, temp_u_1, row_size, width);
-    Scale2RowUp(src_v, src_stride_v, temp_v_1, row_size, width);
+    Scale2RowUp_Bilinear_12(src_u, src_stride_u, temp_u_1, row_size, width);
+    Scale2RowUp_Bilinear_12(src_v, src_stride_v, temp_v_1, row_size, width);
     I410AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants,
                        width);
     if (attenuate) {
@@ -6671,8 +6936,8 @@ static int I010AlphaToARGBMatrixBilinear(
   }
 
   if (!(height & 1)) {
-    Scale2RowUp(src_u, 0, temp_u_1, row_size, width);
-    Scale2RowUp(src_v, 0, temp_v_1, row_size, width);
+    ScaleRowUp2_Linear_12(src_u, temp_u_1, width);
+    ScaleRowUp2_Linear_12(src_v, temp_v_1, width);
     I410AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants,
                        width);
     if (attenuate) {
@@ -6706,8 +6971,9 @@ static int I210AlphaToARGBMatrixLinear(const uint16_t* src_y,
                              int width) = I410AlphaToARGBRow_C;
   void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
                            int width) = ARGBAttenuateRow_C;
-  void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr,
-                     int dst_width) = ScaleRowUp2_Linear_16_Any_C;
+  void (*ScaleRowUp2_Linear)(const uint16_t* src_ptr, uint16_t* dst_ptr,
+                             int dst_width) = ScaleRowUp2_Linear_16_Any_C;
+  assert(yuvconstants);
   if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 ||
       height == 0) {
     return -1;
@@ -6769,17 +7035,17 @@ static int I210AlphaToARGBMatrixLinear(const uint16_t* src_y,
 
 #if defined(HAS_SCALEROWUP2_LINEAR_12_SSSE3)
   if (TestCpuFlag(kCpuHasSSSE3)) {
-    ScaleRowUp = ScaleRowUp2_Linear_12_Any_SSSE3;
+    ScaleRowUp2_Linear = ScaleRowUp2_Linear_12_Any_SSSE3;
   }
 #endif
 #if defined(HAS_SCALEROWUP2_LINEAR_12_AVX2)
   if (TestCpuFlag(kCpuHasAVX2)) {
-    ScaleRowUp = ScaleRowUp2_Linear_12_Any_AVX2;
+    ScaleRowUp2_Linear = ScaleRowUp2_Linear_12_Any_AVX2;
   }
 #endif
 #if defined(HAS_SCALEROWUP2_LINEAR_12_NEON)
   if (TestCpuFlag(kCpuHasNEON)) {
-    ScaleRowUp = ScaleRowUp2_Linear_12_Any_NEON;
+    ScaleRowUp2_Linear = ScaleRowUp2_Linear_12_Any_NEON;
   }
 #endif
 
@@ -6790,8 +7056,8 @@ static int I210AlphaToARGBMatrixLinear(const uint16_t* src_y,
   uint16_t* temp_v = (uint16_t*)(row) + row_size;
 
   for (y = 0; y < height; ++y) {
-    ScaleRowUp(src_u, temp_u, width);
-    ScaleRowUp(src_v, temp_v, width);
+    ScaleRowUp2_Linear(src_u, temp_u, width);
+    ScaleRowUp2_Linear(src_v, temp_v, width);
     I410AlphaToARGBRow(src_y, temp_u, temp_v, src_a, dst_argb, yuvconstants,
                        width);
     if (attenuate) {
@@ -6820,9 +7086,10 @@ static int P010ToARGBMatrixBilinear(const uint16_t* src_y,
   void (*P410ToARGBRow)(
       const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf,
       const struct YuvConstants* yuvconstants, int width) = P410ToARGBRow_C;
-  void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride,
-                      uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) =
-      ScaleUVRowUp2_Bilinear_16_Any_C;
+  void (*Scale2RowUp_Bilinear_16)(
+      const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr,
+      ptrdiff_t dst_stride, int dst_width) = ScaleUVRowUp2_Bilinear_16_Any_C;
+  assert(yuvconstants);
   if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) {
     return -1;
   }
@@ -6851,19 +7118,19 @@ static int P010ToARGBMatrixBilinear(const uint16_t* src_y,
 
 #ifdef HAS_SCALEUVROWUP2_BILINEAR_16_SSE41
   if (TestCpuFlag(kCpuHasSSE41)) {
-    Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_SSE41;
+    Scale2RowUp_Bilinear_16 = ScaleUVRowUp2_Bilinear_16_Any_SSE41;
   }
 #endif
 
 #ifdef HAS_SCALEUVROWUP2_BILINEAR_16_AVX2
   if (TestCpuFlag(kCpuHasAVX2)) {
-    Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_AVX2;
+    Scale2RowUp_Bilinear_16 = ScaleUVRowUp2_Bilinear_16_Any_AVX2;
   }
 #endif
 
 #ifdef HAS_SCALEUVROWUP2_BILINEAR_16_NEON
   if (TestCpuFlag(kCpuHasNEON)) {
-    Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_NEON;
+    Scale2RowUp_Bilinear_16 = ScaleUVRowUp2_Bilinear_16_Any_NEON;
   }
 #endif
 
@@ -6873,13 +7140,13 @@ static int P010ToARGBMatrixBilinear(const uint16_t* src_y,
   uint16_t* temp_uv_1 = (uint16_t*)(row);
   uint16_t* temp_uv_2 = (uint16_t*)(row) + row_size;
 
-  Scale2RowUp(src_uv, 0, temp_uv_1, row_size, width);
+  Scale2RowUp_Bilinear_16(src_uv, 0, temp_uv_1, row_size, width);
   P410ToARGBRow(src_y, temp_uv_1, dst_argb, yuvconstants, width);
   dst_argb += dst_stride_argb;
   src_y += src_stride_y;
 
   for (y = 0; y < height - 2; y += 2) {
-    Scale2RowUp(src_uv, src_stride_uv, temp_uv_1, row_size, width);
+    Scale2RowUp_Bilinear_16(src_uv, src_stride_uv, temp_uv_1, row_size, width);
     P410ToARGBRow(src_y, temp_uv_1, dst_argb, yuvconstants, width);
     dst_argb += dst_stride_argb;
     src_y += src_stride_y;
@@ -6890,7 +7157,7 @@ static int P010ToARGBMatrixBilinear(const uint16_t* src_y,
   }
 
   if (!(height & 1)) {
-    Scale2RowUp(src_uv, 0, temp_uv_1, row_size, width);
+    Scale2RowUp_Bilinear_16(src_uv, 0, temp_uv_1, row_size, width);
     P410ToARGBRow(src_y, temp_uv_1, dst_argb, yuvconstants, width);
   }
 
@@ -6911,8 +7178,9 @@ static int P210ToARGBMatrixLinear(const uint16_t* src_y,
   void (*P410ToARGBRow)(
       const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf,
       const struct YuvConstants* yuvconstants, int width) = P410ToARGBRow_C;
-  void (*ScaleRowUp)(const uint16_t* src_uv, uint16_t* dst_uv, int dst_width) =
-      ScaleUVRowUp2_Linear_16_Any_C;
+  void (*ScaleRowUp2_Linear)(const uint16_t* src_uv, uint16_t* dst_uv,
+                             int dst_width) = ScaleUVRowUp2_Linear_16_Any_C;
+  assert(yuvconstants);
   if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) {
     return -1;
   }
@@ -6941,19 +7209,19 @@ static int P210ToARGBMatrixLinear(const uint16_t* src_y,
 
 #ifdef HAS_SCALEUVROWUP2_LINEAR_16_SSE41
   if (TestCpuFlag(kCpuHasSSE41)) {
-    ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_SSE41;
+    ScaleRowUp2_Linear = ScaleUVRowUp2_Linear_16_Any_SSE41;
   }
 #endif
 
 #ifdef HAS_SCALEUVROWUP2_LINEAR_16_AVX2
   if (TestCpuFlag(kCpuHasAVX2)) {
-    ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_AVX2;
+    ScaleRowUp2_Linear = ScaleUVRowUp2_Linear_16_Any_AVX2;
   }
 #endif
 
 #ifdef HAS_SCALEUVROWUP2_LINEAR_16_NEON
   if (TestCpuFlag(kCpuHasNEON)) {
-    ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_NEON;
+    ScaleRowUp2_Linear = ScaleUVRowUp2_Linear_16_Any_NEON;
   }
 #endif
 
@@ -6962,7 +7230,7 @@ static int P210ToARGBMatrixLinear(const uint16_t* src_y,
   uint16_t* temp_uv = (uint16_t*)(row);
 
   for (y = 0; y < height; ++y) {
-    ScaleRowUp(src_uv, temp_uv, width);
+    ScaleRowUp2_Linear(src_uv, temp_uv, width);
     P410ToARGBRow(src_y, temp_uv, dst_argb, yuvconstants, width);
     dst_argb += dst_stride_argb;
     src_y += src_stride_y;
@@ -6986,9 +7254,10 @@ static int P010ToAR30MatrixBilinear(const uint16_t* src_y,
   void (*P410ToAR30Row)(
       const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf,
       const struct YuvConstants* yuvconstants, int width) = P410ToAR30Row_C;
-  void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride,
-                      uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) =
-      ScaleUVRowUp2_Bilinear_16_Any_C;
+  void (*Scale2RowUp_Bilinear_16)(
+      const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr,
+      ptrdiff_t dst_stride, int dst_width) = ScaleUVRowUp2_Bilinear_16_Any_C;
+  assert(yuvconstants);
   if (!src_y || !src_uv || !dst_ar30 || width <= 0 || height == 0) {
     return -1;
   }
@@ -7017,19 +7286,19 @@ static int P010ToAR30MatrixBilinear(const uint16_t* src_y,
 
 #ifdef HAS_SCALEUVROWUP2_BILINEAR_16_SSE41
   if (TestCpuFlag(kCpuHasSSE41)) {
-    Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_SSE41;
+    Scale2RowUp_Bilinear_16 = ScaleUVRowUp2_Bilinear_16_Any_SSE41;
   }
 #endif
 
 #ifdef HAS_SCALEUVROWUP2_BILINEAR_16_AVX2
   if (TestCpuFlag(kCpuHasAVX2)) {
-    Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_AVX2;
+    Scale2RowUp_Bilinear_16 = ScaleUVRowUp2_Bilinear_16_Any_AVX2;
   }
 #endif
 
 #ifdef HAS_SCALEUVROWUP2_BILINEAR_16_NEON
   if (TestCpuFlag(kCpuHasNEON)) {
-    Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_NEON;
+    Scale2RowUp_Bilinear_16 = ScaleUVRowUp2_Bilinear_16_Any_NEON;
   }
 #endif
 
@@ -7039,13 +7308,13 @@ static int P010ToAR30MatrixBilinear(const uint16_t* src_y,
   uint16_t* temp_uv_1 = (uint16_t*)(row);
   uint16_t* temp_uv_2 = (uint16_t*)(row) + row_size;
 
-  Scale2RowUp(src_uv, 0, temp_uv_1, row_size, width);
+  Scale2RowUp_Bilinear_16(src_uv, 0, temp_uv_1, row_size, width);
   P410ToAR30Row(src_y, temp_uv_1, dst_ar30, yuvconstants, width);
   dst_ar30 += dst_stride_ar30;
   src_y += src_stride_y;
 
   for (y = 0; y < height - 2; y += 2) {
-    Scale2RowUp(src_uv, src_stride_uv, temp_uv_1, row_size, width);
+    Scale2RowUp_Bilinear_16(src_uv, src_stride_uv, temp_uv_1, row_size, width);
     P410ToAR30Row(src_y, temp_uv_1, dst_ar30, yuvconstants, width);
     dst_ar30 += dst_stride_ar30;
     src_y += src_stride_y;
@@ -7056,7 +7325,7 @@ static int P010ToAR30MatrixBilinear(const uint16_t* src_y,
   }
 
   if (!(height & 1)) {
-    Scale2RowUp(src_uv, 0, temp_uv_1, row_size, width);
+    Scale2RowUp_Bilinear_16(src_uv, 0, temp_uv_1, row_size, width);
     P410ToAR30Row(src_y, temp_uv_1, dst_ar30, yuvconstants, width);
   }
 
@@ -7077,8 +7346,9 @@ static int P210ToAR30MatrixLinear(const uint16_t* src_y,
   void (*P410ToAR30Row)(
       const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf,
       const struct YuvConstants* yuvconstants, int width) = P410ToAR30Row_C;
-  void (*ScaleRowUp)(const uint16_t* src_uv, uint16_t* dst_uv, int dst_width) =
-      ScaleUVRowUp2_Linear_16_Any_C;
+  void (*ScaleRowUp2_Linear)(const uint16_t* src_uv, uint16_t* dst_uv,
+                             int dst_width) = ScaleUVRowUp2_Linear_16_Any_C;
+  assert(yuvconstants);
   if (!src_y || !src_uv || !dst_ar30 || width <= 0 || height == 0) {
     return -1;
   }
@@ -7107,19 +7377,19 @@ static int P210ToAR30MatrixLinear(const uint16_t* src_y,
 
 #ifdef HAS_SCALEUVROWUP2_LINEAR_16_SSE41
   if (TestCpuFlag(kCpuHasSSE41)) {
-    ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_SSE41;
+    ScaleRowUp2_Linear = ScaleUVRowUp2_Linear_16_Any_SSE41;
   }
 #endif
 
 #ifdef HAS_SCALEUVROWUP2_LINEAR_16_AVX2
   if (TestCpuFlag(kCpuHasAVX2)) {
-    ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_AVX2;
+    ScaleRowUp2_Linear = ScaleUVRowUp2_Linear_16_Any_AVX2;
   }
 #endif
 
 #ifdef HAS_SCALEUVROWUP2_LINEAR_16_NEON
   if (TestCpuFlag(kCpuHasNEON)) {
-    ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_NEON;
+    ScaleRowUp2_Linear = ScaleUVRowUp2_Linear_16_Any_NEON;
   }
 #endif
 
@@ -7128,7 +7398,7 @@ static int P210ToAR30MatrixLinear(const uint16_t* src_y,
   uint16_t* temp_uv = (uint16_t*)(row);
 
   for (y = 0; y < height; ++y) {
-    ScaleRowUp(src_uv, temp_uv, width);
+    ScaleRowUp2_Linear(src_uv, temp_uv, width);
     P410ToAR30Row(src_y, temp_uv, dst_ar30, yuvconstants, width);
     dst_ar30 += dst_stride_ar30;
     src_y += src_stride_y;
@@ -7139,6 +7409,128 @@ static int P210ToAR30MatrixLinear(const uint16_t* src_y,
   return 0;
 }
 
+static int I422ToRGB24MatrixLinear(const uint8_t* src_y,
+                                   int src_stride_y,
+                                   const uint8_t* src_u,
+                                   int src_stride_u,
+                                   const uint8_t* src_v,
+                                   int src_stride_v,
+                                   uint8_t* dst_rgb24,
+                                   int dst_stride_rgb24,
+                                   const struct YuvConstants* yuvconstants,
+                                   int width,
+                                   int height) {
+  int y;
+  void (*I444ToRGB24Row)(const uint8_t* y_buf, const uint8_t* u_buf,
+                         const uint8_t* v_buf, uint8_t* rgb_buf,
+                         const struct YuvConstants* yuvconstants, int width) =
+      I444ToRGB24Row_C;
+  void (*ScaleRowUp2_Linear)(const uint8_t* src_ptr, uint8_t* dst_ptr,
+                             int dst_width) = ScaleRowUp2_Linear_Any_C;
+  assert(yuvconstants);
+  if (!src_y || !src_u || !src_v || !dst_rgb24 || width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    dst_rgb24 = dst_rgb24 + (height - 1) * dst_stride_rgb24;
+    dst_stride_rgb24 = -dst_stride_rgb24;
+  }
+#if defined(HAS_I444TORGB24ROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3)) {
+    I444ToRGB24Row = I444ToRGB24Row_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
+      I444ToRGB24Row = I444ToRGB24Row_SSSE3;
+    }
+  }
+#endif
+#if defined(HAS_I444TORGB24ROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    I444ToRGB24Row = I444ToRGB24Row_Any_AVX2;
+    if (IS_ALIGNED(width, 32)) {
+      I444ToRGB24Row = I444ToRGB24Row_AVX2;
+    }
+  }
+#endif
+#if defined(HAS_I444TORGB24ROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON)) {
+    I444ToRGB24Row = I444ToRGB24Row_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      I444ToRGB24Row = I444ToRGB24Row_NEON;
+    }
+  }
+#endif
+#if defined(HAS_SCALEROWUP2_LINEAR_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2)) {
+    ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSE2;
+  }
+#endif
+#if defined(HAS_SCALEROWUP2_LINEAR_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3)) {
+    ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSSE3;
+  }
+#endif
+#if defined(HAS_SCALEROWUP2_LINEAR_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_AVX2;
+  }
+#endif
+#if defined(HAS_SCALEROWUP2_LINEAR_NEON)
+  if (TestCpuFlag(kCpuHasNEON)) {
+    ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_NEON;
+  }
+#endif
+
+  // alloc 2 lines temp
+  const int row_size = (width + 31) & ~31;
+  align_buffer_64(row, row_size * 2);
+  uint8_t* temp_u = row;
+  uint8_t* temp_v = row + row_size;
+
+  for (y = 0; y < height; ++y) {
+    ScaleRowUp2_Linear(src_u, temp_u, width);
+    ScaleRowUp2_Linear(src_v, temp_v, width);
+    I444ToRGB24Row(src_y, temp_u, temp_v, dst_rgb24, yuvconstants, width);
+    dst_rgb24 += dst_stride_rgb24;
+    src_y += src_stride_y;
+    src_u += src_stride_u;
+    src_v += src_stride_v;
+  }
+
+  free_aligned_buffer_64(row);
+  return 0;
+}
+
+LIBYUV_API
+int I422ToRGB24MatrixFilter(const uint8_t* src_y,
+                            int src_stride_y,
+                            const uint8_t* src_u,
+                            int src_stride_u,
+                            const uint8_t* src_v,
+                            int src_stride_v,
+                            uint8_t* dst_rgb24,
+                            int dst_stride_rgb24,
+                            const struct YuvConstants* yuvconstants,
+                            int width,
+                            int height,
+                            enum FilterMode filter) {
+  switch (filter) {
+    case kFilterNone:
+      return I422ToRGB24Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
+                               src_stride_v, dst_rgb24, dst_stride_rgb24,
+                               yuvconstants, width, height);
+    case kFilterBilinear:
+    case kFilterBox:
+    case kFilterLinear:
+      return I422ToRGB24MatrixLinear(
+          src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v,
+          dst_rgb24, dst_stride_rgb24, yuvconstants, width, height);
+  }
+
+  return -1;
+}
+
 LIBYUV_API
 int I420ToARGBMatrixFilter(const uint8_t* src_y,
                            int src_stride_y,
@@ -7217,14 +7609,12 @@ int I420ToRGB24MatrixFilter(const uint8_t* src_y,
       return I420ToRGB24Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
                                src_stride_v, dst_rgb24, dst_stride_rgb24,
                                yuvconstants, width, height);
+    case kFilterLinear:  // TODO(fb): Implement Linear using Bilinear stride 0
     case kFilterBilinear:
     case kFilterBox:
       return I420ToRGB24MatrixBilinear(
           src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v,
           dst_rgb24, dst_stride_rgb24, yuvconstants, width, height);
-    case kFilterLinear:
-      // TODO: Implement Linear using Bilinear with Scale2RowUp stride 0
-      return -1;
   }
 
   return -1;
@@ -7248,13 +7638,12 @@ int I010ToAR30MatrixFilter(const uint16_t* src_y,
       return I010ToAR30Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
                               src_stride_v, dst_ar30, dst_stride_ar30,
                               yuvconstants, width, height);
+    case kFilterLinear:  // TODO(fb): Implement Linear using Bilinear stride 0
     case kFilterBilinear:
     case kFilterBox:
       return I010ToAR30MatrixBilinear(
           src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v,
           dst_ar30, dst_stride_ar30, yuvconstants, width, height);
-    case kFilterLinear:
-      return -1;
   }
 
   return -1;
@@ -7307,13 +7696,12 @@ int I010ToARGBMatrixFilter(const uint16_t* src_y,
       return I010ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
                               src_stride_v, dst_argb, dst_stride_argb,
                               yuvconstants, width, height);
+    case kFilterLinear:  // TODO(fb): Implement Linear using Bilinear stride 0
     case kFilterBilinear:
     case kFilterBox:
       return I010ToARGBMatrixBilinear(
           src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v,
           dst_argb, dst_stride_argb, yuvconstants, width, height);
-    case kFilterLinear:
-      return -1;
   }
 
   return -1;
@@ -7370,14 +7758,13 @@ int I420AlphaToARGBMatrixFilter(const uint8_t* src_y,
                                    src_v, src_stride_v, src_a, src_stride_a,
                                    dst_argb, dst_stride_argb, yuvconstants,
                                    width, height, attenuate);
+    case kFilterLinear:  // TODO(fb): Implement Linear using Bilinear stride 0
     case kFilterBilinear:
     case kFilterBox:
       return I420AlphaToARGBMatrixBilinear(
           src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, src_a,
           src_stride_a, dst_argb, dst_stride_argb, yuvconstants, width, height,
           attenuate);
-    case kFilterLinear:
-      return -1;
   }
 
   return -1;
@@ -7439,14 +7826,13 @@ int I010AlphaToARGBMatrixFilter(const uint16_t* src_y,
                                    src_v, src_stride_v, src_a, src_stride_a,
                                    dst_argb, dst_stride_argb, yuvconstants,
                                    width, height, attenuate);
+    case kFilterLinear:  // TODO(fb): Implement Linear using Bilinear stride 0
     case kFilterBilinear:
     case kFilterBox:
       return I010AlphaToARGBMatrixBilinear(
           src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, src_a,
           src_stride_a, dst_argb, dst_stride_argb, yuvconstants, width, height,
           attenuate);
-    case kFilterLinear:
-      return -1;
   }
 
   return -1;
@@ -7486,6 +7872,8 @@ int I210AlphaToARGBMatrixFilter(const uint16_t* src_y,
   return -1;
 }
 
+// TODO(fb): Verify this function works correctly.  P010 is like NV12 but 10 bit
+// UV is biplanar.
 LIBYUV_API
 int P010ToARGBMatrixFilter(const uint16_t* src_y,
                            int src_stride_y,
@@ -7502,13 +7890,12 @@ int P010ToARGBMatrixFilter(const uint16_t* src_y,
       return P010ToARGBMatrix(src_y, src_stride_y, src_uv, src_stride_uv,
                               dst_argb, dst_stride_argb, yuvconstants, width,
                               height);
+    case kFilterLinear:  // TODO(fb): Implement Linear using Bilinear stride 0
     case kFilterBilinear:
     case kFilterBox:
       return P010ToARGBMatrixBilinear(src_y, src_stride_y, src_uv,
                                       src_stride_uv, dst_argb, dst_stride_argb,
                                       yuvconstants, width, height);
-    case kFilterLinear:
-      return -1;
   }
 
   return -1;
@@ -7557,13 +7944,12 @@ int P010ToAR30MatrixFilter(const uint16_t* src_y,
       return P010ToAR30Matrix(src_y, src_stride_y, src_uv, src_stride_uv,
                               dst_ar30, dst_stride_ar30, yuvconstants, width,
                               height);
+    case kFilterLinear:  // TODO(fb): Implement Linear using Bilinear stride 0
     case kFilterBilinear:
     case kFilterBox:
       return P010ToAR30MatrixBilinear(src_y, src_stride_y, src_uv,
                                       src_stride_uv, dst_ar30, dst_stride_ar30,
                                       yuvconstants, width, height);
-    case kFilterLinear:
-      return -1;
   }
 
   return -1;
diff --git a/source/row_any.cc b/source/row_any.cc
index bd46ba1b5..d6149ee52 100644
--- a/source/row_any.cc
+++ b/source/row_any.cc
@@ -359,6 +359,9 @@ ANY31C(I422ToAR30Row_Any_AVX2, I422ToAR30Row_AVX2, 1, 0, 4, 15)
 #ifdef HAS_I444TOARGBROW_SSSE3
 ANY31C(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_SSSE3, 0, 0, 4, 7)
 #endif
+#ifdef HAS_I444TORGB24ROW_SSSE3
+ANY31C(I444ToRGB24Row_Any_SSSE3, I444ToRGB24Row_SSSE3, 0, 0, 3, 15)
+#endif
 #ifdef HAS_I422TORGB24ROW_AVX2
 ANY31C(I422ToRGB24Row_Any_AVX2, I422ToRGB24Row_AVX2, 1, 0, 3, 31)
 #endif
@@ -374,6 +377,9 @@ ANY31C(I422ToRGBARow_Any_AVX2, I422ToRGBARow_AVX2, 1, 0, 4, 15)
 #ifdef HAS_I444TOARGBROW_AVX2
 ANY31C(I444ToARGBRow_Any_AVX2, I444ToARGBRow_AVX2, 0, 0, 4, 15)
 #endif
+#ifdef HAS_I444TORGB24ROW_AVX2
+ANY31C(I444ToRGB24Row_Any_AVX2, I444ToRGB24Row_AVX2, 0, 0, 3, 31)
+#endif
 #ifdef HAS_I422TOARGB4444ROW_AVX2
 ANY31C(I422ToARGB4444Row_Any_AVX2, I422ToARGB4444Row_AVX2, 1, 0, 2, 15)
 #endif
@@ -383,6 +389,9 @@ ANY31C(I422ToARGB1555Row_Any_AVX2, I422ToARGB1555Row_AVX2, 1, 0, 2, 15)
 #ifdef HAS_I422TORGB565ROW_AVX2
 ANY31C(I422ToRGB565Row_Any_AVX2, I422ToRGB565Row_AVX2, 1, 0, 2, 15)
 #endif
+#ifdef HAS_I444TORGB24ROW_NEON
+ANY31C(I444ToRGB24Row_Any_NEON, I444ToRGB24Row_NEON, 0, 0, 3, 7)
+#endif
 #ifdef HAS_I422TOARGBROW_NEON
 ANY31C(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, 0, 0, 4, 7)
 ANY31C(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, 1, 0, 4, 7)
diff --git a/source/row_common.cc b/source/row_common.cc
index 4e1141f70..2531c85b8 100644
--- a/source/row_common.cc
+++ b/source/row_common.cc
@@ -1863,6 +1863,23 @@ void I444ToARGBRow_C(const uint8_t* src_y,
   }
 }
 
+void I444ToRGB24Row_C(const uint8_t* src_y,
+                      const uint8_t* src_u,
+                      const uint8_t* src_v,
+                      uint8_t* rgb_buf,
+                      const struct YuvConstants* yuvconstants,
+                      int width) {
+  int x;
+  for (x = 0; x < width; ++x) {
+    YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
+             rgb_buf + 2, yuvconstants);
+    src_y += 1;
+    src_u += 1;
+    src_v += 1;
+    rgb_buf += 3;  // Advance 1 pixel.
+  }
+}
+
 // Also used for 420
 void I422ToARGBRow_C(const uint8_t* src_y,
                      const uint8_t* src_u,
@@ -4061,6 +4078,32 @@ void I422ToRGB24Row_AVX2(const uint8_t* src_y,
 }
 #endif
 
+#if defined(HAS_I444TORGB24ROW_AVX2)
+void I444ToRGB24Row_AVX2(const uint8_t* src_y,
+                         const uint8_t* src_u,
+                         const uint8_t* src_v,
+                         uint8_t* dst_rgb24,
+                         const struct YuvConstants* yuvconstants,
+                         int width) {
+  // Row buffer for intermediate ARGB pixels.
+  SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
+  while (width > 0) {
+    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
+    I444ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
+#if defined(HAS_ARGBTORGB24ROW_AVX2)
+    ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth);
+#else
+    ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
+#endif
+    src_y += twidth;
+    src_u += twidth;
+    src_v += twidth;
+    dst_rgb24 += twidth * 3;
+    width -= twidth;
+  }
+}
+#endif
+
 #if defined(HAS_NV12TORGB565ROW_AVX2)
 void NV12ToRGB565Row_AVX2(const uint8_t* src_y,
                           const uint8_t* src_uv,
diff --git a/source/row_gcc.cc b/source/row_gcc.cc
index 3bda44821..af92e3f02 100644
--- a/source/row_gcc.cc
+++ b/source/row_gcc.cc
@@ -2582,6 +2582,20 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba,
   "movdqu    %%xmm0,0x10(%[dst_rgba])                          \n" \
   "lea       0x20(%[dst_rgba]),%[dst_rgba]                     \n"
 
+// Store 8 RGB24 values.
+#define STORERGB24                                                      \
+  "punpcklbw   %%xmm1,%%xmm0                                        \n" \
+  "punpcklbw   %%xmm2,%%xmm2                                        \n" \
+  "movdqa      %%xmm0,%%xmm1                                        \n" \
+  "punpcklwd   %%xmm2,%%xmm0                                        \n" \
+  "punpckhwd   %%xmm2,%%xmm1                                        \n" \
+  "pshufb      %%xmm5,%%xmm0                                        \n" \
+  "pshufb      %%xmm6,%%xmm1                                        \n" \
+  "palignr     $0xc,%%xmm0,%%xmm1                                   \n" \
+  "movq        %%xmm0,(%[dst_rgb24])                                \n" \
+  "movdqu      %%xmm1,0x8(%[dst_rgb24])                             \n" \
+  "lea         0x18(%[dst_rgb24]),%[dst_rgb24]                      \n"
+
 // Store 8 AR30 values.
 #define STOREAR30                                                  \
   "psraw      $0x4,%%xmm0                                      \n" \
@@ -2691,17 +2705,43 @@ void OMITFP I422ToRGB24Row_SSSE3(const uint8_t* y_buf,
       "1:                                        \n"
     READYUV422
     YUVTORGB(yuvconstants)
-      "punpcklbw   %%xmm1,%%xmm0                 \n"
-      "punpcklbw   %%xmm2,%%xmm2                 \n"
-      "movdqa      %%xmm0,%%xmm1                 \n"
-      "punpcklwd   %%xmm2,%%xmm0                 \n"
-      "punpckhwd   %%xmm2,%%xmm1                 \n"
-      "pshufb      %%xmm5,%%xmm0                 \n"
-      "pshufb      %%xmm6,%%xmm1                 \n"
-      "palignr     $0xc,%%xmm0,%%xmm1            \n"
-      "movq        %%xmm0,(%[dst_rgb24])         \n"
-      "movdqu      %%xmm1,0x8(%[dst_rgb24])      \n"
-      "lea         0x18(%[dst_rgb24]),%[dst_rgb24] \n"
+    STORERGB24
+      "subl        $0x8,%[width]                 \n"
+      "jg          1b                            \n"
+  : [y_buf]"+r"(y_buf),    // %[y_buf]
+    [u_buf]"+r"(u_buf),    // %[u_buf]
+    [v_buf]"+r"(v_buf),    // %[v_buf]
+    [dst_rgb24]"+r"(dst_rgb24),  // %[dst_rgb24]
+#if defined(__i386__)
+    [width]"+m"(width)     // %[width]
+#else
+    [width]"+rm"(width)    // %[width]
+#endif
+  : [yuvconstants]"r"(yuvconstants),  // %[yuvconstants]
+    [kShuffleMaskARGBToRGB24_0]"m"(kShuffleMaskARGBToRGB24_0),
+    [kShuffleMaskARGBToRGB24]"m"(kShuffleMaskARGBToRGB24)
+  : "memory", "cc", YUVTORGB_REGS
+    "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
+  );
+}
+
+void OMITFP I444ToRGB24Row_SSSE3(const uint8_t* y_buf,
+                                 const uint8_t* u_buf,
+                                 const uint8_t* v_buf,
+                                 uint8_t* dst_rgb24,
+                                 const struct YuvConstants* yuvconstants,
+                                 int width) {
+  asm volatile (
+    YUVTORGB_SETUP(yuvconstants)
+      "movdqa      %[kShuffleMaskARGBToRGB24_0],%%xmm5 \n"
+      "movdqa      %[kShuffleMaskARGBToRGB24],%%xmm6 \n"
+      "sub         %[u_buf],%[v_buf]             \n"
+
+    LABELALIGN
+      "1:                                        \n"
+    READYUV444
+    YUVTORGB(yuvconstants)
+    STORERGB24
       "subl        $0x8,%[width]                 \n"
       "jg          1b                            \n"
   : [y_buf]"+r"(y_buf),    // %[y_buf]
diff --git a/source/row_neon.cc b/source/row_neon.cc
index 3f5c5de1b..36185e8c9 100644
--- a/source/row_neon.cc
+++ b/source/row_neon.cc
@@ -156,6 +156,29 @@ void I444ToARGBRow_NEON(const uint8_t* src_y,
       : "cc", "memory", YUVTORGB_REGS, "d6");
 }
 
+void I444ToRGB24Row_NEON(const uint8_t* src_y,
+                         const uint8_t* src_u,
+                         const uint8_t* src_v,
+                         uint8_t* dst_rgb24,
+                         const struct YuvConstants* yuvconstants,
+                         int width) {
+  asm volatile(
+      YUVTORGB_SETUP
+      "1:                                        \n" READYUV444 YUVTORGB
+          RGBTORGB8
+      "subs        %[width], %[width], #8        \n"
+      "vst3.8      {d0, d2, d4}, [%[dst_rgb24]]! \n"
+      "bgt         1b                            \n"
+      : [src_y] "+r"(src_y),                               // %[src_y]
+        [src_u] "+r"(src_u),                               // %[src_u]
+        [src_v] "+r"(src_v),                               // %[src_v]
+        [dst_rgb24] "+r"(dst_rgb24),                       // %[dst_argb]
+        [width] "+r"(width)                                // %[width]
+      : [kUVCoeff] "r"(&yuvconstants->kUVCoeff),           // %[kUVCoeff]
+        [kRGBCoeffBias] "r"(&yuvconstants->kRGBCoeffBias)  // %[kRGBCoeffBias]
+      : "cc", "memory", YUVTORGB_REGS);
+}
+
 void I422ToARGBRow_NEON(const uint8_t* src_y,
                         const uint8_t* src_u,
                         const uint8_t* src_v,
diff --git a/source/row_neon64.cc b/source/row_neon64.cc
index 37962378e..3cbd9b79b 100644
--- a/source/row_neon64.cc
+++ b/source/row_neon64.cc
@@ -142,6 +142,29 @@ void I444ToARGBRow_NEON(const uint8_t* src_y,
       : "cc", "memory", YUVTORGB_REGS, "v19");
 }
 
+void I444ToRGB24Row_NEON(const uint8_t* src_y,
+                         const uint8_t* src_u,
+                         const uint8_t* src_v,
+                         uint8_t* dst_rgb24,
+                         const struct YuvConstants* yuvconstants,
+                         int width) {
+  asm volatile(
+      YUVTORGB_SETUP
+      "1:                                        \n" READYUV444 YUVTORGB
+          RGBTORGB8
+      "subs        %w[width], %w[width], #8      \n"
+      "st3         {v16.8b,v17.8b,v18.8b}, [%[dst_rgb24]], #24 \n"
+      "b.gt        1b                            \n"
+      : [src_y] "+r"(src_y),                               // %[src_y]
+        [src_u] "+r"(src_u),                               // %[src_u]
+        [src_v] "+r"(src_v),                               // %[src_v]
+        [dst_rgb24] "+r"(dst_rgb24),                       // %[dst_rgb24]
+        [width] "+r"(width)                                // %[width]
+      : [kUVCoeff] "r"(&yuvconstants->kUVCoeff),           // %[kUVCoeff]
+        [kRGBCoeffBias] "r"(&yuvconstants->kRGBCoeffBias)  // %[kRGBCoeffBias]
+      : "cc", "memory", YUVTORGB_REGS);
+}
+
 void I422ToARGBRow_NEON(const uint8_t* src_y,
                         const uint8_t* src_u,
                         const uint8_t* src_v,
diff --git a/source/row_win.cc b/source/row_win.cc
index c7c1ff60d..c5a14f86f 100644
--- a/source/row_win.cc
+++ b/source/row_win.cc
@@ -2789,6 +2789,44 @@ __declspec(naked) void I422ToRGB24Row_SSSE3(
   }
 }
 
+// 8 pixels.
+// 8 UV values, mixed with 8 Y producing 8 RGB24 (24 bytes).
+__declspec(naked) void I444ToRGB24Row_SSSE3(
+    const uint8_t* y_buf,
+    const uint8_t* u_buf,
+    const uint8_t* v_buf,
+    uint8_t* dst_rgb24,
+    const struct YuvConstants* yuvconstants,
+    int width) {
+  __asm {
+    push       esi
+    push       edi
+    push       ebx
+    mov        eax, [esp + 12 + 4]  // Y
+    mov        esi, [esp + 12 + 8]  // U
+    mov        edi, [esp + 12 + 12]  // V
+    mov        edx, [esp + 12 + 16]  // argb
+    mov        ebx, [esp + 12 + 20]  // yuvconstants
+    mov        ecx, [esp + 12 + 24]  // width
+    sub        edi, esi
+    movdqa     xmm5, xmmword ptr kShuffleMaskARGBToRGB24_0
+    movdqa     xmm6, xmmword ptr kShuffleMaskARGBToRGB24
+
+ convertloop:
+    READYUV444
+    YUVTORGB(ebx)
+    STORERGB24
+
+    sub        ecx, 8
+    jg         convertloop
+
+    pop        ebx
+    pop        edi
+    pop        esi
+    ret
+  }
+}
+
 // 8 pixels
 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RGB565 (16 bytes).
 __declspec(naked) void I422ToRGB565Row_SSSE3(
diff --git a/unit_test/convert_test.cc b/unit_test/convert_test.cc
index bb06e4edd..47eff2ece 100644
--- a/unit_test/convert_test.cc
+++ b/unit_test/convert_test.cc
@@ -683,6 +683,9 @@ TESTBIPLANARTOP(MM21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 16, 32)
 #define I420ToRGB24Filter(a, b, c, d, e, f, g, h, i, j)                     \
   I420ToRGB24MatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \
                           kFilterBilinear)
+#define I422ToRGB24Filter(a, b, c, d, e, f, g, h, i, j)                     \
+  I420ToRGB24MatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \
+                          kFilterBilinear)
 
 #define ALIGNINT(V, ALIGN) (((V) + (ALIGN)-1) / (ALIGN) * (ALIGN))
 
@@ -795,8 +798,12 @@ TESTPLANARTOB(V422, 2, 1, ARGB, 4, 4, 1)
 TESTPLANARTOB(V422, 2, 1, ABGR, 4, 4, 1)
 TESTPLANARTOB(I422, 2, 1, BGRA, 4, 4, 1)
 TESTPLANARTOB(I422, 2, 1, RGBA, 4, 4, 1)
+TESTPLANARTOB(I422, 1, 1, RGB24, 3, 3, 1)
+TESTPLANARTOB(I422, 1, 1, RAW, 3, 3, 1)
 TESTPLANARTOB(I444, 1, 1, ARGB, 4, 4, 1)
 TESTPLANARTOB(I444, 1, 1, ABGR, 4, 4, 1)
+TESTPLANARTOB(I444, 1, 1, RGB24, 3, 3, 1)
+TESTPLANARTOB(I444, 1, 1, RAW, 3, 3, 1)
 TESTPLANARTOB(J444, 1, 1, ARGB, 4, 4, 1)
 TESTPLANARTOB(J444, 1, 1, ABGR, 4, 4, 1)
 TESTPLANARTOB(H444, 1, 1, ARGB, 4, 4, 1)
@@ -820,6 +827,7 @@ TESTPLANARTOB(H420, 2, 2, AB30, 4, 4, 1)
 TESTPLANARTOB(I420, 2, 2, ARGBFilter, 4, 4, 1)
 TESTPLANARTOB(I422, 2, 1, ARGBFilter, 4, 4, 1)
 TESTPLANARTOB(I420, 2, 2, RGB24Filter, 3, 3, 1)
+TESTPLANARTOB(I422, 2, 2, RGB24Filter, 3, 3, 1)
 #else
 TESTPLANARTOB(I420, 2, 2, ABGR, 4, 4, 1)
 TESTPLANARTOB(I420, 2, 2, ARGB, 4, 4, 1)
@@ -844,6 +852,7 @@ TESTPLANARTOB(I422, 2, 1, UYVY, 2, 4, 1)
 TESTPLANARTOB(I422, 2, 1, YUY2, 2, 4, 1)
 TESTPLANARTOB(I420, 2, 2, ARGBFilter, 4, 4, 1)
 TESTPLANARTOB(I422, 2, 1, ARGBFilter, 4, 4, 1)
+TESTPLANARTOB(I420, 2, 2, RGB24Filter, 3, 3, 1)
 TESTPLANARTOB(I444, 1, 1, ABGR, 4, 4, 1)
 TESTPLANARTOB(I444, 1, 1, ARGB, 4, 4, 1)
 #endif