Add more 10 bit YUV To RGB function

The following functions are added: planar YUV: I410ToAR30, I410ToARGB planar YUVA: I010AlphaToARGB, I210AlphaToARGB, I410AlphaToARGB biplanar YUV: P010ToARGB, P210ToARGB P010ToAR30, P210ToAR30 biplanar functions can also handle 12 bit and 16 bit samples. libyuv_unittest --gtest_filter=LibYUVConvertTest.*10*ToA*:LibYUVConvertTest.*P?1?ToA* R=fbarchard@chromium.org Bug: libyuv:751, libyuv:844 Change-Id: I2be02244dfa23335e1e7bc241fb0613990208de5 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/2707003 Reviewed-by: Frank Barchard <fbarchard@chromium.org>
2026-04-30 19:09:18 +08:00 · 2021-03-03 11:22:32 +08:00 · 2021-03-03 11:22:32 +08:00 · cdabad5bfa
commit cdabad5bfa
parent c41eabe3d4
9 changed files with 2700 additions and 254 deletions
--- a/README.chromium
+++ b/README.chromium
@ -1,6 +1,6 @@
 Name: libyuv
 URL: http://code.google.com/p/libyuv/
-Version: 1779
+Version: 1780
 License: BSD
 License File: LICENSE

--- a/include/libyuv/convert_argb.h
+++ b/include/libyuv/convert_argb.h
@ -54,12 +54,30 @@ LIBYUV_API extern const struct YuvConstants kYvuV2020Constants;  // BT.2020 full
  NV21ToRGB24Matrix(a, b, c, d, e, f, g##VU, h, i)
 #define NV21ToRAWMatrix(a, b, c, d, e, f, g, h, i) \
  NV12ToRGB24Matrix(a, b, c, d, e, f, g##VU, h, i)
+#define I010ToABGRMatrix(a, b, c, d, e, f, g, h, i, j, k) \
+  I010ToARGBMatrix(a, b, e, f, c, d, g, h, i##VU, j, k)
+#define I210ToABGRMatrix(a, b, c, d, e, f, g, h, i, j, k) \
+  I210ToARGBMatrix(a, b, e, f, c, d, g, h, i##VU, j, k)
+#define I410ToABGRMatrix(a, b, c, d, e, f, g, h, i, j, k) \
+  I410ToARGBMatrix(a, b, e, f, c, d, g, h, i##VU, j, k)
+#define I010ToAB30Matrix(a, b, c, d, e, f, g, h, i, j, k) \
+  I010ToAR30Matrix(a, b, e, f, c, d, g, h, i##VU, j, k)
+#define I210ToAB30Matrix(a, b, c, d, e, f, g, h, i, j, k) \
+  I210ToAR30Matrix(a, b, e, f, c, d, g, h, i##VU, j, k)
+#define I410ToAB30Matrix(a, b, c, d, e, f, g, h, i, j, k) \
+  I410ToAR30Matrix(a, b, e, f, c, d, g, h, i##VU, j, k)
 #define I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, k, l, m, n) \
  I420AlphaToARGBMatrix(a, b, e, f, c, d, g, h, i, j, k##VU, l, m, n)
 #define I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, k, l, m, n) \
  I422AlphaToARGBMatrix(a, b, e, f, c, d, g, h, i, j, k##VU, l, m, n)
 #define I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, k, l, m, n) \
  I444AlphaToARGBMatrix(a, b, e, f, c, d, g, h, i, j, k##VU, l, m, n)
+#define I010AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, k, l, m, n) \
+  I010AlphaToARGBMatrix(a, b, e, f, c, d, g, h, i, j, k##VU, l, m, n)
+#define I210AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, k, l, m, n) \
+  I210AlphaToARGBMatrix(a, b, e, f, c, d, g, h, i, j, k##VU, l, m, n)
+#define I410AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, k, l, m, n) \
+  I410AlphaToARGBMatrix(a, b, e, f, c, d, g, h, i, j, k##VU, l, m, n)

 // Alias.
 #define ARGBToARGB ARGBCopy
@ -125,32 +143,6 @@ int J420ToABGR(const uint8_t* src_y,
               int width,
               int height);

-// Convert F420 to ARGB. BT.709 full range
-LIBYUV_API
-int F420ToARGB(const uint8_t* src_y,
-               int src_stride_y,
-               const uint8_t* src_u,
-               int src_stride_u,
-               const uint8_t* src_v,
-               int src_stride_v,
-               uint8_t* dst_argb,
-               int dst_stride_argb,
-               int width,
-               int height);
-
-// Convert F420 to ABGR. BT.709 full range
-LIBYUV_API
-int F420ToABGR(const uint8_t* src_y,
-               int src_stride_y,
-               const uint8_t* src_u,
-               int src_stride_u,
-               const uint8_t* src_v,
-               int src_stride_v,
-               uint8_t* dst_abgr,
-               int dst_stride_abgr,
-               int width,
-               int height);
-
 // Convert H420 to ARGB.
 LIBYUV_API
 int H420ToARGB(const uint8_t* src_y,
@ -1440,7 +1432,7 @@ int I444ToARGBMatrix(const uint8_t* src_y,
                     int width,
                     int height);

-// multiply 10 bit yuv into high bits to allow any number of bits.
+// Convert 10 bit 420 YUV to ARGB with matrix.
 LIBYUV_API
 int I010ToAR30Matrix(const uint16_t* src_y,
                     int src_stride_y,
@ -1454,7 +1446,7 @@ int I010ToAR30Matrix(const uint16_t* src_y,
                     int width,
                     int height);

-// multiply 10 bit yuv into high bits to allow any number of bits.
+// Convert 10 bit 420 YUV to ARGB with matrix.
 LIBYUV_API
 int I210ToAR30Matrix(const uint16_t* src_y,
                     int src_stride_y,
@ -1468,6 +1460,20 @@ int I210ToAR30Matrix(const uint16_t* src_y,
                     int width,
                     int height);

+// Convert 10 bit 444 YUV to ARGB with matrix.
+LIBYUV_API
+int I410ToAR30Matrix(const uint16_t* src_y,
+                     int src_stride_y,
+                     const uint16_t* src_u,
+                     int src_stride_u,
+                     const uint16_t* src_v,
+                     int src_stride_v,
+                     uint8_t* dst_ar30,
+                     int dst_stride_ar30,
+                     const struct YuvConstants* yuvconstants,
+                     int width,
+                     int height);
+
 // Convert 10 bit YUV to ARGB with matrix.
 LIBYUV_API
 int I010ToARGBMatrix(const uint16_t* src_y,
@ -1496,6 +1502,87 @@ int I210ToARGBMatrix(const uint16_t* src_y,
                     int width,
                     int height);

+// Convert 10 bit 444 YUV to ARGB with matrix.
+LIBYUV_API
+int I410ToARGBMatrix(const uint16_t* src_y,
+                     int src_stride_y,
+                     const uint16_t* src_u,
+                     int src_stride_u,
+                     const uint16_t* src_v,
+                     int src_stride_v,
+                     uint8_t* dst_argb,
+                     int dst_stride_argb,
+                     const struct YuvConstants* yuvconstants,
+                     int width,
+                     int height);
+
+// Convert P010 to ARGB with matrix.
+LIBYUV_API
+int P010ToARGBMatrix(const uint16_t* src_y,
+                     int src_stride_y,
+                     const uint16_t* src_uv,
+                     int src_stride_uv,
+                     uint8_t* dst_argb,
+                     int dst_stride_argb,
+                     const struct YuvConstants* yuvconstants,
+                     int width,
+                     int height);
+
+// Convert P210 to ARGB with matrix.
+LIBYUV_API
+int P210ToARGBMatrix(const uint16_t* src_y,
+                     int src_stride_y,
+                     const uint16_t* src_uv,
+                     int src_stride_uv,
+                     uint8_t* dst_argb,
+                     int dst_stride_argb,
+                     const struct YuvConstants* yuvconstants,
+                     int width,
+                     int height);
+
+// Convert P010 to AR30 with matrix.
+LIBYUV_API
+int P010ToAR30Matrix(const uint16_t* src_y,
+                     int src_stride_y,
+                     const uint16_t* src_uv,
+                     int src_stride_uv,
+                     uint8_t* dst_ar30,
+                     int dst_stride_ar30,
+                     const struct YuvConstants* yuvconstants,
+                     int width,
+                     int height);
+
+// Convert P210 to AR30 with matrix.
+LIBYUV_API
+int P210ToAR30Matrix(const uint16_t* src_y,
+                     int src_stride_y,
+                     const uint16_t* src_uv,
+                     int src_stride_uv,
+                     uint8_t* dst_ar30,
+                     int dst_stride_ar30,
+                     const struct YuvConstants* yuvconstants,
+                     int width,
+                     int height);
+
+// P012 and P010 use most significant bits so the conversion is the same.
+// Convert P012 to ARGB with matrix.
+#define P012ToARGBMatrix P010ToARGBMatrix
+// Convert P012 to AR30 with matrix.
+#define P012ToAR30Matrix P010ToAR30Matrix
+// Convert P212 to ARGB with matrix.
+#define P212ToARGBMatrix P210ToARGBMatrix
+// Convert P212 to AR30 with matrix.
+#define P212ToAR30Matrix P210ToAR30Matrix
+
+// Convert P016 to ARGB with matrix.
+#define P016ToARGBMatrix P010ToARGBMatrix
+// Convert P016 to AR30 with matrix.
+#define P016ToAR30Matrix P010ToAR30Matrix
+// Convert P216 to ARGB with matrix.
+#define P216ToARGBMatrix P210ToARGBMatrix
+// Convert P216 to AR30 with matrix.
+#define P216ToAR30Matrix P210ToAR30Matrix
+
 // Convert I420 with Alpha to preattenuated ARGB with matrix.
 LIBYUV_API
 int I420AlphaToARGBMatrix(const uint8_t* src_y,
@ -1547,6 +1634,57 @@ int I444AlphaToARGBMatrix(const uint8_t* src_y,
                          int height,
                          int attenuate);

+// Convert I010 with Alpha to preattenuated ARGB with matrix.
+LIBYUV_API
+int I010AlphaToARGBMatrix(const uint16_t* src_y,
+                          int src_stride_y,
+                          const uint16_t* src_u,
+                          int src_stride_u,
+                          const uint16_t* src_v,
+                          int src_stride_v,
+                          const uint16_t* src_a,
+                          int src_stride_a,
+                          uint8_t* dst_argb,
+                          int dst_stride_argb,
+                          const struct YuvConstants* yuvconstants,
+                          int width,
+                          int height,
+                          int attenuate);
+
+// Convert I210 with Alpha to preattenuated ARGB with matrix.
+LIBYUV_API
+int I210AlphaToARGBMatrix(const uint16_t* src_y,
+                          int src_stride_y,
+                          const uint16_t* src_u,
+                          int src_stride_u,
+                          const uint16_t* src_v,
+                          int src_stride_v,
+                          const uint16_t* src_a,
+                          int src_stride_a,
+                          uint8_t* dst_argb,
+                          int dst_stride_argb,
+                          const struct YuvConstants* yuvconstants,
+                          int width,
+                          int height,
+                          int attenuate);
+
+// Convert I410 with Alpha to preattenuated ARGB with matrix.
+LIBYUV_API
+int I410AlphaToARGBMatrix(const uint16_t* src_y,
+                          int src_stride_y,
+                          const uint16_t* src_u,
+                          int src_stride_u,
+                          const uint16_t* src_v,
+                          int src_stride_v,
+                          const uint16_t* src_a,
+                          int src_stride_a,
+                          uint8_t* dst_argb,
+                          int dst_stride_argb,
+                          const struct YuvConstants* yuvconstants,
+                          int width,
+                          int height,
+                          int attenuate);
+
 // Convert NV12 to ARGB with matrix.
 LIBYUV_API
 int NV12ToARGBMatrix(const uint8_t* src_y,
--- a/include/libyuv/row.h
+++ b/include/libyuv/row.h
@ -175,8 +175,10 @@ extern "C" {
    defined(_MSC_VER)
 // TODO(fbarchard): fix build error on android_full_debug=1
 // https://code.google.com/p/libyuv/issues/detail?id=517
-#define HAS_I444ALPHATOARGBROW_SSSE3
+#define HAS_I210ALPHATOARGBROW_SSSE3
+#define HAS_I410ALPHATOARGBROW_SSSE3
 #define HAS_I422ALPHATOARGBROW_SSSE3
+#define HAS_I444ALPHATOARGBROW_SSSE3
 #endif
 #endif

@ -240,8 +242,10 @@ extern "C" {
    defined(_MSC_VER)
 // TODO(fbarchard): fix build error on android_full_debug=1
 // https://code.google.com/p/libyuv/issues/detail?id=517
-#define HAS_I444ALPHATOARGBROW_AVX2
+#define HAS_I210ALPHATOARGBROW_AVX2
+#define HAS_I410ALPHATOARGBROW_AVX2
 #define HAS_I422ALPHATOARGBROW_AVX2
+#define HAS_I444ALPHATOARGBROW_AVX2
 #endif
 #endif

@ -280,9 +284,15 @@ extern "C" {
 #define HAS_I210TOARGBROW_SSSE3
 #define HAS_I400TOARGBROW_SSE2
 #define HAS_I422TOAR30ROW_SSSE3
+#define HAS_I410TOAR30ROW_SSSE3
+#define HAS_I410TOARGBROW_SSSE3
 #define HAS_MERGEARGBROW_SSE2
 #define HAS_MERGERGBROW_SSSE3
 #define HAS_MIRRORUVROW_SSSE3
+#define HAS_P210TOAR30ROW_SSSE3
+#define HAS_P210TOARGBROW_SSSE3
+#define HAS_P410TOAR30ROW_SSSE3
+#define HAS_P410TOARGBROW_SSSE3
 #define HAS_RAWTORGBAROW_SSSE3
 #define HAS_RGB24MIRRORROW_SSSE3
 #define HAS_RGBATOYJROW_SSSE3
@ -311,6 +321,12 @@ extern "C" {
 #define HAS_I210TOAR30ROW_AVX2
 #define HAS_I210TOARGBROW_AVX2
 #define HAS_I400TOARGBROW_AVX2
+#define HAS_I410TOAR30ROW_AVX2
+#define HAS_I410TOARGBROW_AVX2
+#define HAS_P210TOAR30ROW_AVX2
+#define HAS_P210TOARGBROW_AVX2
+#define HAS_P410TOAR30ROW_AVX2
+#define HAS_P410TOARGBROW_AVX2
 #define HAS_I422TOAR30ROW_AVX2
 #define HAS_I422TOUYVYROW_AVX2
 #define HAS_I422TOYUY2ROW_AVX2
@ -2575,6 +2591,33 @@ void I210ToARGBRow_C(const uint16_t* src_y,
                     uint8_t* rgb_buf,
                     const struct YuvConstants* yuvconstants,
                     int width);
+void I410ToAR30Row_C(const uint16_t* src_y,
+                     const uint16_t* src_u,
+                     const uint16_t* src_v,
+                     uint8_t* rgb_buf,
+                     const struct YuvConstants* yuvconstants,
+                     int width);
+void I410ToARGBRow_C(const uint16_t* src_y,
+                     const uint16_t* src_u,
+                     const uint16_t* src_v,
+                     uint8_t* rgb_buf,
+                     const struct YuvConstants* yuvconstants,
+                     int width);
+void I210AlphaToARGBRow_C(const uint16_t* src_y,
+                          const uint16_t* src_u,
+                          const uint16_t* src_v,
+                          const uint16_t* src_a,
+                          uint8_t* rgb_buf,
+                          const struct YuvConstants* yuvconstants,
+                          int width);
+void I410AlphaToARGBRow_C(const uint16_t* src_y,
+                          const uint16_t* src_u,
+                          const uint16_t* src_v,
+                          const uint16_t* src_a,
+                          uint8_t* rgb_buf,
+                          const struct YuvConstants* yuvconstants,
+                          int width);
+
 void I444AlphaToARGBRow_C(const uint8_t* src_y,
                          const uint8_t* src_u,
                          const uint8_t* src_v,
@ -2626,6 +2669,27 @@ void UYVYToARGBRow_C(const uint8_t* src_uyvy,
                     uint8_t* rgb_buf,
                     const struct YuvConstants* yuvconstants,
                     int width);
+void P210ToARGBRow_C(const uint16_t* y_buf,
+                     const uint16_t* uv_buf,
+                     uint8_t* dst_argb,
+                     const struct YuvConstants* yuvconstants,
+                     int width);
+void P410ToARGBRow_C(const uint16_t* y_buf,
+                     const uint16_t* uv_buf,
+                     uint8_t* dst_argb,
+                     const struct YuvConstants* yuvconstants,
+                     int width);
+void P210ToAR30Row_C(const uint16_t* y_buf,
+                     const uint16_t* uv_buf,
+                     uint8_t* dst_ar30,
+                     const struct YuvConstants* yuvconstants,
+                     int width);
+void P410ToAR30Row_C(const uint16_t* y_buf,
+                     const uint16_t* uv_buf,
+                     uint8_t* dst_ar30,
+                     const struct YuvConstants* yuvconstants,
+                     int width);
+
 void I422ToRGBARow_C(const uint8_t* src_y,
                     const uint8_t* src_u,
                     const uint8_t* src_v,
@ -2705,6 +2769,32 @@ void I210ToARGBRow_SSSE3(const uint16_t* y_buf,
                         uint8_t* dst_argb,
                         const struct YuvConstants* yuvconstants,
                         int width);
+void I410ToAR30Row_SSSE3(const uint16_t* src_y,
+                         const uint16_t* src_u,
+                         const uint16_t* src_v,
+                         uint8_t* rgb_buf,
+                         const struct YuvConstants* yuvconstants,
+                         int width);
+void I410ToARGBRow_SSSE3(const uint16_t* src_y,
+                         const uint16_t* src_u,
+                         const uint16_t* src_v,
+                         uint8_t* rgb_buf,
+                         const struct YuvConstants* yuvconstants,
+                         int width);
+void I210AlphaToARGBRow_SSSE3(const uint16_t* src_y,
+                              const uint16_t* src_u,
+                              const uint16_t* src_v,
+                              const uint16_t* src_a,
+                              uint8_t* rgb_buf,
+                              const struct YuvConstants* yuvconstants,
+                              int width);
+void I410AlphaToARGBRow_SSSE3(const uint16_t* src_y,
+                              const uint16_t* src_u,
+                              const uint16_t* src_v,
+                              const uint16_t* src_a,
+                              uint8_t* rgb_buf,
+                              const struct YuvConstants* yuvconstants,
+                              int width);
 void I422ToAR30Row_AVX2(const uint8_t* y_buf,
                        const uint8_t* u_buf,
                        const uint8_t* v_buf,
@ -2723,6 +2813,32 @@ void I210ToAR30Row_AVX2(const uint16_t* y_buf,
                        uint8_t* dst_ar30,
                        const struct YuvConstants* yuvconstants,
                        int width);
+void I410ToAR30Row_AVX2(const uint16_t* src_y,
+                        const uint16_t* src_u,
+                        const uint16_t* src_v,
+                        uint8_t* rgb_buf,
+                        const struct YuvConstants* yuvconstants,
+                        int width);
+void I410ToARGBRow_AVX2(const uint16_t* src_y,
+                        const uint16_t* src_u,
+                        const uint16_t* src_v,
+                        uint8_t* rgb_buf,
+                        const struct YuvConstants* yuvconstants,
+                        int width);
+void I210AlphaToARGBRow_AVX2(const uint16_t* src_y,
+                             const uint16_t* src_u,
+                             const uint16_t* src_v,
+                             const uint16_t* src_a,
+                             uint8_t* rgb_buf,
+                             const struct YuvConstants* yuvconstants,
+                             int width);
+void I410AlphaToARGBRow_AVX2(const uint16_t* src_y,
+                             const uint16_t* src_u,
+                             const uint16_t* src_v,
+                             const uint16_t* src_a,
+                             uint8_t* rgb_buf,
+                             const struct YuvConstants* yuvconstants,
+                             int width);
 void I444AlphaToARGBRow_SSSE3(const uint8_t* y_buf,
                              const uint8_t* u_buf,
                              const uint8_t* v_buf,
@ -2821,6 +2937,48 @@ void UYVYToARGBRow_AVX2(const uint8_t* uyvy_buf,
                        uint8_t* dst_argb,
                        const struct YuvConstants* yuvconstants,
                        int width);
+
+void P210ToARGBRow_SSSE3(const uint16_t* y_buf,
+                         const uint16_t* uv_buf,
+                         uint8_t* dst_argb,
+                         const struct YuvConstants* yuvconstants,
+                         int width);
+void P410ToARGBRow_SSSE3(const uint16_t* y_buf,
+                         const uint16_t* uv_buf,
+                         uint8_t* dst_argb,
+                         const struct YuvConstants* yuvconstants,
+                         int width);
+void P210ToAR30Row_SSSE3(const uint16_t* y_buf,
+                         const uint16_t* uv_buf,
+                         uint8_t* dst_ar30,
+                         const struct YuvConstants* yuvconstants,
+                         int width);
+void P410ToAR30Row_SSSE3(const uint16_t* y_buf,
+                         const uint16_t* uv_buf,
+                         uint8_t* dst_ar30,
+                         const struct YuvConstants* yuvconstants,
+                         int width);
+void P210ToARGBRow_AVX2(const uint16_t* y_buf,
+                        const uint16_t* uv_buf,
+                        uint8_t* dst_argb,
+                        const struct YuvConstants* yuvconstants,
+                        int width);
+void P410ToARGBRow_AVX2(const uint16_t* y_buf,
+                        const uint16_t* uv_buf,
+                        uint8_t* dst_argb,
+                        const struct YuvConstants* yuvconstants,
+                        int width);
+void P210ToAR30Row_AVX2(const uint16_t* y_buf,
+                        const uint16_t* uv_buf,
+                        uint8_t* dst_ar30,
+                        const struct YuvConstants* yuvconstants,
+                        int width);
+void P410ToAR30Row_AVX2(const uint16_t* y_buf,
+                        const uint16_t* uv_buf,
+                        uint8_t* dst_ar30,
+                        const struct YuvConstants* yuvconstants,
+                        int width);
+
 void I422ToRGBARow_SSSE3(const uint8_t* y_buf,
                         const uint8_t* u_buf,
                         const uint8_t* v_buf,
@ -2923,6 +3081,32 @@ void I210ToARGBRow_Any_SSSE3(const uint16_t* y_buf,
                             uint8_t* dst_ptr,
                             const struct YuvConstants* yuvconstants,
                             int width);
+void I410ToAR30Row_Any_SSSE3(const uint16_t* src_y,
+                             const uint16_t* src_u,
+                             const uint16_t* src_v,
+                             uint8_t* rgb_buf,
+                             const struct YuvConstants* yuvconstants,
+                             int width);
+void I410ToARGBRow_Any_SSSE3(const uint16_t* src_y,
+                             const uint16_t* src_u,
+                             const uint16_t* src_v,
+                             uint8_t* rgb_buf,
+                             const struct YuvConstants* yuvconstants,
+                             int width);
+void I210AlphaToARGBRow_Any_SSSE3(const uint16_t* src_y,
+                                  const uint16_t* src_u,
+                                  const uint16_t* src_v,
+                                  const uint16_t* src_a,
+                                  uint8_t* rgb_buf,
+                                  const struct YuvConstants* yuvconstants,
+                                  int width);
+void I410AlphaToARGBRow_Any_SSSE3(const uint16_t* src_y,
+                                  const uint16_t* src_u,
+                                  const uint16_t* src_v,
+                                  const uint16_t* src_a,
+                                  uint8_t* rgb_buf,
+                                  const struct YuvConstants* yuvconstants,
+                                  int width);
 void I422ToAR30Row_Any_AVX2(const uint8_t* y_buf,
                            const uint8_t* u_buf,
                            const uint8_t* v_buf,
@ -2941,6 +3125,32 @@ void I210ToAR30Row_Any_AVX2(const uint16_t* y_buf,
                            uint8_t* dst_ptr,
                            const struct YuvConstants* yuvconstants,
                            int width);
+void I410ToAR30Row_Any_AVX2(const uint16_t* src_y,
+                            const uint16_t* src_u,
+                            const uint16_t* src_v,
+                            uint8_t* rgb_buf,
+                            const struct YuvConstants* yuvconstants,
+                            int width);
+void I410ToARGBRow_Any_AVX2(const uint16_t* src_y,
+                            const uint16_t* src_u,
+                            const uint16_t* src_v,
+                            uint8_t* rgb_buf,
+                            const struct YuvConstants* yuvconstants,
+                            int width);
+void I210AlphaToARGBRow_Any_AVX2(const uint16_t* src_y,
+                                 const uint16_t* src_u,
+                                 const uint16_t* src_v,
+                                 const uint16_t* src_a,
+                                 uint8_t* rgb_buf,
+                                 const struct YuvConstants* yuvconstants,
+                                 int width);
+void I410AlphaToARGBRow_Any_AVX2(const uint16_t* src_y,
+                                 const uint16_t* src_u,
+                                 const uint16_t* src_v,
+                                 const uint16_t* src_a,
+                                 uint8_t* rgb_buf,
+                                 const struct YuvConstants* yuvconstants,
+                                 int width);
 void I444AlphaToARGBRow_Any_SSSE3(const uint8_t* y_buf,
                                  const uint8_t* u_buf,
                                  const uint8_t* v_buf,
@ -3039,6 +3249,46 @@ void UYVYToARGBRow_Any_AVX2(const uint8_t* src_ptr,
                            uint8_t* dst_ptr,
                            const struct YuvConstants* yuvconstants,
                            int width);
+void P210ToARGBRow_Any_SSSE3(const uint16_t* y_buf,
+                             const uint16_t* uv_buf,
+                             uint8_t* dst_argb,
+                             const struct YuvConstants* yuvconstants,
+                             int width);
+void P410ToARGBRow_Any_SSSE3(const uint16_t* y_buf,
+                             const uint16_t* uv_buf,
+                             uint8_t* dst_argb,
+                             const struct YuvConstants* yuvconstants,
+                             int width);
+void P210ToAR30Row_Any_SSSE3(const uint16_t* y_buf,
+                             const uint16_t* uv_buf,
+                             uint8_t* dst_ar30,
+                             const struct YuvConstants* yuvconstants,
+                             int width);
+void P410ToAR30Row_Any_SSSE3(const uint16_t* y_buf,
+                             const uint16_t* uv_buf,
+                             uint8_t* dst_ar30,
+                             const struct YuvConstants* yuvconstants,
+                             int width);
+void P210ToARGBRow_Any_AVX2(const uint16_t* y_buf,
+                            const uint16_t* uv_buf,
+                            uint8_t* dst_argb,
+                            const struct YuvConstants* yuvconstants,
+                            int width);
+void P410ToARGBRow_Any_AVX2(const uint16_t* y_buf,
+                            const uint16_t* uv_buf,
+                            uint8_t* dst_argb,
+                            const struct YuvConstants* yuvconstants,
+                            int width);
+void P210ToAR30Row_Any_AVX2(const uint16_t* y_buf,
+                            const uint16_t* uv_buf,
+                            uint8_t* dst_ar30,
+                            const struct YuvConstants* yuvconstants,
+                            int width);
+void P410ToAR30Row_Any_AVX2(const uint16_t* y_buf,
+                            const uint16_t* uv_buf,
+                            uint8_t* dst_ar30,
+                            const struct YuvConstants* yuvconstants,
+                            int width);
 void I422ToRGBARow_Any_SSSE3(const uint8_t* y_buf,
                             const uint8_t* u_buf,
                             const uint8_t* v_buf,
@ -3537,6 +3787,46 @@ void UYVYToARGBRow_Any_NEON(const uint8_t* src_ptr,
                            uint8_t* dst_ptr,
                            const struct YuvConstants* yuvconstants,
                            int width);
+void P210ToARGBRow_NEON(const uint16_t* y_buf,
+                             const uint16_t* uv_buf,
+                             uint8_t* dst_argb,
+                             const struct YuvConstants* yuvconstants,
+                             int width);
+void P410ToARGBRow_NEON(const uint16_t* y_buf,
+                             const uint16_t* uv_buf,
+                             uint8_t* dst_argb,
+                             const struct YuvConstants* yuvconstants,
+                             int width);
+void P210ToAR30Row_NEON(const uint16_t* y_buf,
+                             const uint16_t* uv_buf,
+                             uint8_t* dst_ar30,
+                             const struct YuvConstants* yuvconstants,
+                             int width);
+void P410ToAR30Row_NEON(const uint16_t* y_buf,
+                             const uint16_t* uv_buf,
+                             uint8_t* dst_ar30,
+                             const struct YuvConstants* yuvconstants,
+                             int width);
+void P210ToARGBRow_Any_NEON(const uint16_t* y_buf,
+                            const uint16_t* uv_buf,
+                            uint8_t* dst_argb,
+                            const struct YuvConstants* yuvconstants,
+                            int width);
+void P410ToARGBRow_Any_NEON(const uint16_t* y_buf,
+                            const uint16_t* uv_buf,
+                            uint8_t* dst_argb,
+                            const struct YuvConstants* yuvconstants,
+                            int width);
+void P210ToAR30Row_Any_NEON(const uint16_t* y_buf,
+                            const uint16_t* uv_buf,
+                            uint8_t* dst_ar30,
+                            const struct YuvConstants* yuvconstants,
+                            int width);
+void P410ToAR30Row_Any_NEON(const uint16_t* y_buf,
+                            const uint16_t* uv_buf,
+                            uint8_t* dst_ar30,
+                            const struct YuvConstants* yuvconstants,
+                            int width);
 void I444ToARGBRow_Any_MSA(const uint8_t* y_buf,
                           const uint8_t* u_buf,
                           const uint8_t* v_buf,
--- a/include/libyuv/version.h
+++ b/include/libyuv/version.h
@ -11,6 +11,6 @@
 #ifndef INCLUDE_LIBYUV_VERSION_H_
 #define INCLUDE_LIBYUV_VERSION_H_

-#define LIBYUV_VERSION 1779
+#define LIBYUV_VERSION 1780

 #endif  // INCLUDE_LIBYUV_VERSION_H_
--- a/source/convert_argb.cc
+++ b/source/convert_argb.cc
@ -1045,6 +1045,58 @@ int U210ToAB30(const uint16_t* src_y,
                          &kYuv2020Constants, width, height);
 }

+LIBYUV_API
+int I410ToAR30Matrix(const uint16_t* src_y,
+                     int src_stride_y,
+                     const uint16_t* src_u,
+                     int src_stride_u,
+                     const uint16_t* src_v,
+                     int src_stride_v,
+                     uint8_t* dst_ar30,
+                     int dst_stride_ar30,
+                     const struct YuvConstants* yuvconstants,
+                     int width,
+                     int height) {
+  int y;
+  void (*I410ToAR30Row)(const uint16_t* y_buf, const uint16_t* u_buf,
+                        const uint16_t* v_buf, uint8_t* rgb_buf,
+                        const struct YuvConstants* yuvconstants, int width) =
+  I410ToAR30Row_C;
+  if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    dst_ar30 = dst_ar30 + (height - 1) * dst_stride_ar30;
+    dst_stride_ar30 = -dst_stride_ar30;
+  }
+#if defined(HAS_I410TOAR30ROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3)) {
+    I410ToAR30Row = I410ToAR30Row_Any_SSSE3;
+    if (IS_ALIGNED(width, 8)) {
+      I410ToAR30Row = I410ToAR30Row_SSSE3;
+    }
+  }
+#endif
+#if defined(HAS_I410TOAR30ROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    I410ToAR30Row = I410ToAR30Row_Any_AVX2;
+    if (IS_ALIGNED(width, 16)) {
+      I410ToAR30Row = I410ToAR30Row_AVX2;
+    }
+  }
+#endif
+  for (y = 0; y < height; ++y) {
+    I410ToAR30Row(src_y, src_u, src_v, dst_ar30, yuvconstants, width);
+    dst_ar30 += dst_stride_ar30;
+    src_y += src_stride_y;
+    src_u += src_stride_u;
+    src_v += src_stride_v;
+  }
+  return 0;
+}
+
 // Convert 10 bit YUV to ARGB with matrix.
 LIBYUV_API
 int I010ToARGBMatrix(const uint16_t* src_y,
@ -1087,14 +1139,6 @@ int I010ToARGBMatrix(const uint16_t* src_y,
      I210ToARGBRow = I210ToARGBRow_AVX2;
    }
  }
-#endif
-#if defined(HAS_I210TOARGBROW_MMI)
-  if (TestCpuFlag(kCpuHasMMI)) {
-    I210ToARGBRow = I210ToARGBRow_Any_MMI;
-    if (IS_ALIGNED(width, 4)) {
-      I210ToARGBRow = I210ToARGBRow_MMI;
-    }
-  }
 #endif
  for (y = 0; y < height; ++y) {
    I210ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width);
@ -1258,14 +1302,6 @@ int I210ToARGBMatrix(const uint16_t* src_y,
      I210ToARGBRow = I210ToARGBRow_AVX2;
    }
  }
-#endif
-#if defined(HAS_I210TOARGBROW_MMI)
-  if (TestCpuFlag(kCpuHasMMI)) {
-    I210ToARGBRow = I210ToARGBRow_Any_MMI;
-    if (IS_ALIGNED(width, 4)) {
-      I210ToARGBRow = I210ToARGBRow_MMI;
-    }
-  }
 #endif
  for (y = 0; y < height; ++y) {
    I210ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width);
@ -1385,6 +1421,254 @@ int U210ToABGR(const uint16_t* src_y,
                          width, height);
 }

+LIBYUV_API
+int I410ToARGBMatrix(const uint16_t* src_y,
+                     int src_stride_y,
+                     const uint16_t* src_u,
+                     int src_stride_u,
+                     const uint16_t* src_v,
+                     int src_stride_v,
+                     uint8_t* dst_argb,
+                     int dst_stride_argb,
+                     const struct YuvConstants* yuvconstants,
+                     int width,
+                     int height) {
+  int y;
+  void (*I410ToARGBRow)(const uint16_t* y_buf, const uint16_t* u_buf,
+                        const uint16_t* v_buf, uint8_t* rgb_buf,
+                        const struct YuvConstants* yuvconstants, int width) =
+  I410ToARGBRow_C;
+  if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
+    dst_stride_argb = -dst_stride_argb;
+  }
+#if defined(HAS_I410TOARGBROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3)) {
+    I410ToARGBRow = I410ToARGBRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 8)) {
+      I410ToARGBRow = I410ToARGBRow_SSSE3;
+    }
+  }
+#endif
+#if defined(HAS_I410TOARGBROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    I410ToARGBRow = I410ToARGBRow_Any_AVX2;
+    if (IS_ALIGNED(width, 16)) {
+      I410ToARGBRow = I410ToARGBRow_AVX2;
+    }
+  }
+#endif
+  for (y = 0; y < height; ++y) {
+    I410ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width);
+    dst_argb += dst_stride_argb;
+    src_y += src_stride_y;
+    src_u += src_stride_u;
+    src_v += src_stride_v;
+  }
+  return 0;
+}
+
+LIBYUV_API
+int P010ToARGBMatrix(const uint16_t* src_y,
+                     int src_stride_y,
+                     const uint16_t* src_uv,
+                     int src_stride_uv,
+                     uint8_t* dst_argb,
+                     int dst_stride_argb,
+                     const struct YuvConstants* yuvconstants,
+                     int width,
+                     int height) {
+  int y;
+  void (*P210ToARGBRow)(const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf,
+                        const struct YuvConstants* yuvconstants, int width) =
+  P210ToARGBRow_C;
+  if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
+    dst_stride_argb = -dst_stride_argb;
+  }
+#if defined(HAS_P210TOARGBROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3)) {
+    P210ToARGBRow = P210ToARGBRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 8)) {
+      P210ToARGBRow = P210ToARGBRow_SSSE3;
+    }
+  }
+#endif
+#if defined(HAS_P210TOARGBROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    P210ToARGBRow = P210ToARGBRow_Any_AVX2;
+    if (IS_ALIGNED(width, 16)) {
+      P210ToARGBRow = P210ToARGBRow_AVX2;
+    }
+  }
+#endif
+  for (y = 0; y < height; ++y) {
+    P210ToARGBRow(src_y, src_uv, dst_argb, yuvconstants, width);
+    dst_argb += dst_stride_argb;
+    src_y += src_stride_y;
+    if (y & 1) {
+      src_uv += src_stride_uv;
+    }
+  }
+  return 0;
+}
+
+LIBYUV_API
+int P210ToARGBMatrix(const uint16_t* src_y,
+                     int src_stride_y,
+                     const uint16_t* src_uv,
+                     int src_stride_uv,
+                     uint8_t* dst_argb,
+                     int dst_stride_argb,
+                     const struct YuvConstants* yuvconstants,
+                     int width,
+                     int height) {
+  int y;
+  void (*P210ToARGBRow)(const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf,
+                        const struct YuvConstants* yuvconstants, int width) =
+  P210ToARGBRow_C;
+  if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
+    dst_stride_argb = -dst_stride_argb;
+  }
+#if defined(HAS_P210TOARGBROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3)) {
+    P210ToARGBRow = P210ToARGBRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 8)) {
+      P210ToARGBRow = P210ToARGBRow_SSSE3;
+    }
+  }
+#endif
+#if defined(HAS_P210TOARGBROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    P210ToARGBRow = P210ToARGBRow_Any_AVX2;
+    if (IS_ALIGNED(width, 16)) {
+      P210ToARGBRow = P210ToARGBRow_AVX2;
+    }
+  }
+#endif
+  for (y = 0; y < height; ++y) {
+    P210ToARGBRow(src_y, src_uv, dst_argb, yuvconstants, width);
+    dst_argb += dst_stride_argb;
+    src_y += src_stride_y;
+    src_uv += src_stride_uv;
+  }
+  return 0;
+}
+
+LIBYUV_API
+int P010ToAR30Matrix(const uint16_t* src_y,
+                     int src_stride_y,
+                     const uint16_t* src_uv,
+                     int src_stride_uv,
+                     uint8_t* dst_ar30,
+                     int dst_stride_ar30,
+                     const struct YuvConstants* yuvconstants,
+                     int width,
+                     int height) {
+  int y;
+  void (*P210ToAR30Row)(const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf,
+                        const struct YuvConstants* yuvconstants, int width) =
+  P210ToAR30Row_C;
+  if (!src_y || !src_uv || !dst_ar30 || width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    dst_ar30 = dst_ar30 + (height - 1) * dst_stride_ar30;
+    dst_stride_ar30 = -dst_stride_ar30;
+  }
+#if defined(HAS_P210TOAR30ROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3)) {
+    P210ToAR30Row = P210ToAR30Row_Any_SSSE3;
+    if (IS_ALIGNED(width, 8)) {
+      P210ToAR30Row = P210ToAR30Row_SSSE3;
+    }
+  }
+#endif
+#if defined(HAS_P210TOAR30ROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    P210ToAR30Row = P210ToAR30Row_Any_AVX2;
+    if (IS_ALIGNED(width, 16)) {
+      P210ToAR30Row = P210ToAR30Row_AVX2;
+    }
+  }
+#endif
+  for (y = 0; y < height; ++y) {
+    P210ToAR30Row(src_y, src_uv, dst_ar30, yuvconstants, width);
+    dst_ar30 += dst_stride_ar30;
+    src_y += src_stride_y;
+    if (y & 1) {
+      src_uv += src_stride_uv;
+    }
+  }
+  return 0;
+}
+
+LIBYUV_API
+int P210ToAR30Matrix(const uint16_t* src_y,
+                     int src_stride_y,
+                     const uint16_t* src_uv,
+                     int src_stride_uv,
+                     uint8_t* dst_ar30,
+                     int dst_stride_ar30,
+                     const struct YuvConstants* yuvconstants,
+                     int width,
+                     int height) {
+  int y;
+  void (*P210ToAR30Row)(const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf,
+                        const struct YuvConstants* yuvconstants, int width) =
+  P210ToAR30Row_C;
+  if (!src_y || !src_uv || !dst_ar30 || width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    dst_ar30 = dst_ar30 + (height - 1) * dst_stride_ar30;
+    dst_stride_ar30 = -dst_stride_ar30;
+  }
+#if defined(HAS_P210TOAR30ROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3)) {
+    P210ToAR30Row = P210ToAR30Row_Any_SSSE3;
+    if (IS_ALIGNED(width, 8)) {
+      P210ToAR30Row = P210ToAR30Row_SSSE3;
+    }
+  }
+#endif
+#if defined(HAS_P210TOAR30ROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    P210ToAR30Row = P210ToAR30Row_Any_AVX2;
+    if (IS_ALIGNED(width, 16)) {
+      P210ToAR30Row = P210ToAR30Row_AVX2;
+    }
+  }
+#endif
+  for (y = 0; y < height; ++y) {
+    P210ToAR30Row(src_y, src_uv, dst_ar30, yuvconstants, width);
+    dst_ar30 += dst_stride_ar30;
+    src_y += src_stride_y;
+    src_uv += src_stride_uv;
+  }
+  return 0;
+}
+
 // Convert I420 with Alpha to preattenuated ARGB with matrix.
 LIBYUV_API
 int I420AlphaToARGBMatrix(const uint8_t* src_y,
@ -1903,6 +2187,323 @@ int I444AlphaToABGR(const uint8_t* src_y,
      width, height, attenuate);
 }

+// Convert I010 with Alpha to preattenuated ARGB with matrix.
+LIBYUV_API
+int I010AlphaToARGBMatrix(const uint16_t* src_y,
+                          int src_stride_y,
+                          const uint16_t* src_u,
+                          int src_stride_u,
+                          const uint16_t* src_v,
+                          int src_stride_v,
+                          const uint16_t* src_a,
+                          int src_stride_a,
+                          uint8_t* dst_argb,
+                          int dst_stride_argb,
+                          const struct YuvConstants* yuvconstants,
+                          int width,
+                          int height,
+                          int attenuate) {
+  int y;
+  void (*I210AlphaToARGBRow)(const uint16_t* y_buf, const uint16_t* u_buf,
+                             const uint16_t* v_buf, const uint16_t* a_buf,
+                             uint8_t* dst_argb,
+                             const struct YuvConstants* yuvconstants,
+                             int width) = I210AlphaToARGBRow_C;
+  void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
+                           int width) = ARGBAttenuateRow_C;
+  if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
+    dst_stride_argb = -dst_stride_argb;
+  }
+#if defined(HAS_I210ALPHATOARGBROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3)) {
+    I210AlphaToARGBRow = I210AlphaToARGBRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 8)) {
+      I210AlphaToARGBRow = I210AlphaToARGBRow_SSSE3;
+    }
+  }
+#endif
+#if defined(HAS_I210ALPHATOARGBROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    I210AlphaToARGBRow = I210AlphaToARGBRow_Any_AVX2;
+    if (IS_ALIGNED(width, 16)) {
+      I210AlphaToARGBRow = I210AlphaToARGBRow_AVX2;
+    }
+  }
+#endif
+#if defined(HAS_ARGBATTENUATEROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3)) {
+    ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 4)) {
+      ARGBAttenuateRow = ARGBAttenuateRow_SSSE3;
+    }
+  }
+#endif
+#if defined(HAS_ARGBATTENUATEROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    ARGBAttenuateRow = ARGBAttenuateRow_Any_AVX2;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBAttenuateRow = ARGBAttenuateRow_AVX2;
+    }
+  }
+#endif
+#if defined(HAS_ARGBATTENUATEROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON)) {
+    ARGBAttenuateRow = ARGBAttenuateRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBAttenuateRow = ARGBAttenuateRow_NEON;
+    }
+  }
+#endif
+#if defined(HAS_ARGBATTENUATEROW_MMI)
+  if (TestCpuFlag(kCpuHasMMI)) {
+    ARGBAttenuateRow = ARGBAttenuateRow_Any_MMI;
+    if (IS_ALIGNED(width, 2)) {
+      ARGBAttenuateRow = ARGBAttenuateRow_MMI;
+    }
+  }
+#endif
+#if defined(HAS_ARGBATTENUATEROW_MSA)
+  if (TestCpuFlag(kCpuHasMSA)) {
+    ARGBAttenuateRow = ARGBAttenuateRow_Any_MSA;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBAttenuateRow = ARGBAttenuateRow_MSA;
+    }
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    I210AlphaToARGBRow(src_y, src_u, src_v, src_a, dst_argb, yuvconstants,
+                       width);
+    if (attenuate) {
+      ARGBAttenuateRow(dst_argb, dst_argb, width);
+    }
+    dst_argb += dst_stride_argb;
+    src_a += src_stride_a;
+    src_y += src_stride_y;
+    if (y & 1) {
+      src_u += src_stride_u;
+      src_v += src_stride_v;
+    }
+  }
+  return 0;
+}
+
+// Convert I210 with Alpha to preattenuated ARGB with matrix.
+LIBYUV_API
+int I210AlphaToARGBMatrix(const uint16_t* src_y,
+                          int src_stride_y,
+                          const uint16_t* src_u,
+                          int src_stride_u,
+                          const uint16_t* src_v,
+                          int src_stride_v,
+                          const uint16_t* src_a,
+                          int src_stride_a,
+                          uint8_t* dst_argb,
+                          int dst_stride_argb,
+                          const struct YuvConstants* yuvconstants,
+                          int width,
+                          int height,
+                          int attenuate) {
+  int y;
+  void (*I210AlphaToARGBRow)(const uint16_t* y_buf, const uint16_t* u_buf,
+                             const uint16_t* v_buf, const uint16_t* a_buf,
+                             uint8_t* dst_argb,
+                             const struct YuvConstants* yuvconstants,
+                             int width) = I210AlphaToARGBRow_C;
+  void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
+                           int width) = ARGBAttenuateRow_C;
+  if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
+    dst_stride_argb = -dst_stride_argb;
+  }
+#if defined(HAS_I210ALPHATOARGBROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3)) {
+    I210AlphaToARGBRow = I210AlphaToARGBRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 8)) {
+      I210AlphaToARGBRow = I210AlphaToARGBRow_SSSE3;
+    }
+  }
+#endif
+#if defined(HAS_I210ALPHATOARGBROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    I210AlphaToARGBRow = I210AlphaToARGBRow_Any_AVX2;
+    if (IS_ALIGNED(width, 16)) {
+      I210AlphaToARGBRow = I210AlphaToARGBRow_AVX2;
+    }
+  }
+#endif
+#if defined(HAS_ARGBATTENUATEROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3)) {
+    ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 4)) {
+      ARGBAttenuateRow = ARGBAttenuateRow_SSSE3;
+    }
+  }
+#endif
+#if defined(HAS_ARGBATTENUATEROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    ARGBAttenuateRow = ARGBAttenuateRow_Any_AVX2;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBAttenuateRow = ARGBAttenuateRow_AVX2;
+    }
+  }
+#endif
+#if defined(HAS_ARGBATTENUATEROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON)) {
+    ARGBAttenuateRow = ARGBAttenuateRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBAttenuateRow = ARGBAttenuateRow_NEON;
+    }
+  }
+#endif
+#if defined(HAS_ARGBATTENUATEROW_MMI)
+  if (TestCpuFlag(kCpuHasMMI)) {
+    ARGBAttenuateRow = ARGBAttenuateRow_Any_MMI;
+    if (IS_ALIGNED(width, 2)) {
+      ARGBAttenuateRow = ARGBAttenuateRow_MMI;
+    }
+  }
+#endif
+#if defined(HAS_ARGBATTENUATEROW_MSA)
+  if (TestCpuFlag(kCpuHasMSA)) {
+    ARGBAttenuateRow = ARGBAttenuateRow_Any_MSA;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBAttenuateRow = ARGBAttenuateRow_MSA;
+    }
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    I210AlphaToARGBRow(src_y, src_u, src_v, src_a, dst_argb, yuvconstants,
+                       width);
+    if (attenuate) {
+      ARGBAttenuateRow(dst_argb, dst_argb, width);
+    }
+    dst_argb += dst_stride_argb;
+    src_a += src_stride_a;
+    src_y += src_stride_y;
+    src_u += src_stride_u;
+    src_v += src_stride_v;
+  }
+  return 0;
+}
+
+// Convert I410 with Alpha to preattenuated ARGB with matrix.
+LIBYUV_API
+int I410AlphaToARGBMatrix(const uint16_t* src_y,
+                          int src_stride_y,
+                          const uint16_t* src_u,
+                          int src_stride_u,
+                          const uint16_t* src_v,
+                          int src_stride_v,
+                          const uint16_t* src_a,
+                          int src_stride_a,
+                          uint8_t* dst_argb,
+                          int dst_stride_argb,
+                          const struct YuvConstants* yuvconstants,
+                          int width,
+                          int height,
+                          int attenuate) {
+  int y;
+  void (*I410AlphaToARGBRow)(const uint16_t* y_buf, const uint16_t* u_buf,
+                             const uint16_t* v_buf, const uint16_t* a_buf,
+                             uint8_t* dst_argb,
+                             const struct YuvConstants* yuvconstants,
+                             int width) = I410AlphaToARGBRow_C;
+  void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
+                           int width) = ARGBAttenuateRow_C;
+  if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
+    dst_stride_argb = -dst_stride_argb;
+  }
+#if defined(HAS_I410ALPHATOARGBROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3)) {
+    I410AlphaToARGBRow = I410AlphaToARGBRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 8)) {
+      I410AlphaToARGBRow = I410AlphaToARGBRow_SSSE3;
+    }
+  }
+#endif
+#if defined(HAS_I410ALPHATOARGBROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    I410AlphaToARGBRow = I410AlphaToARGBRow_Any_AVX2;
+    if (IS_ALIGNED(width, 16)) {
+      I410AlphaToARGBRow = I410AlphaToARGBRow_AVX2;
+    }
+  }
+#endif
+#if defined(HAS_ARGBATTENUATEROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3)) {
+    ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 4)) {
+      ARGBAttenuateRow = ARGBAttenuateRow_SSSE3;
+    }
+  }
+#endif
+#if defined(HAS_ARGBATTENUATEROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    ARGBAttenuateRow = ARGBAttenuateRow_Any_AVX2;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBAttenuateRow = ARGBAttenuateRow_AVX2;
+    }
+  }
+#endif
+#if defined(HAS_ARGBATTENUATEROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON)) {
+    ARGBAttenuateRow = ARGBAttenuateRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBAttenuateRow = ARGBAttenuateRow_NEON;
+    }
+  }
+#endif
+#if defined(HAS_ARGBATTENUATEROW_MMI)
+  if (TestCpuFlag(kCpuHasMMI)) {
+    ARGBAttenuateRow = ARGBAttenuateRow_Any_MMI;
+    if (IS_ALIGNED(width, 2)) {
+      ARGBAttenuateRow = ARGBAttenuateRow_MMI;
+    }
+  }
+#endif
+#if defined(HAS_ARGBATTENUATEROW_MSA)
+  if (TestCpuFlag(kCpuHasMSA)) {
+    ARGBAttenuateRow = ARGBAttenuateRow_Any_MSA;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBAttenuateRow = ARGBAttenuateRow_MSA;
+    }
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    I410AlphaToARGBRow(src_y, src_u, src_v, src_a, dst_argb, yuvconstants,
+                       width);
+    if (attenuate) {
+      ARGBAttenuateRow(dst_argb, dst_argb, width);
+    }
+    dst_argb += dst_stride_argb;
+    src_a += src_stride_a;
+    src_y += src_stride_y;
+    src_u += src_stride_u;
+    src_v += src_stride_v;
+  }
+  return 0;
+}
+
 // Convert I400 to ARGB with matrix.
 LIBYUV_API
 int I400ToARGBMatrix(const uint8_t* src_y,
--- a/source/row_any.cc
+++ b/source/row_any.cc
@ -115,6 +115,46 @@ ANY41C(I422AlphaToARGBRow_Any_MMI, I422AlphaToARGBRow_MMI, 1, 0, 4, 7)
 #endif
 #undef ANY41C

+// Any 4 planes to 1 plane of 8 bit with yuvconstants
+#define ANY41CT(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, T, SBPP, BPP, MASK)      \
+  void NAMEANY(const T* y_buf, const T* u_buf, const T* v_buf, const T* a_buf, \
+               uint8_t* dst_ptr, const struct YuvConstants* yuvconstants,      \
+               int width) {                                                    \
+    SIMD_ALIGNED(T temp[16 * 4]);                                              \
+    SIMD_ALIGNED(uint8_t out[64]);                                             \
+    memset(temp, 0, 16 * 4 * SBPP); /* for YUY2 and msan */                    \
+    int r = width & MASK;                                                      \
+    int n = width & ~MASK;                                                     \
+    if (n > 0) {                                                               \
+      ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, yuvconstants, n);          \
+    }                                                                          \
+    memcpy(temp, y_buf + n, r * SBPP);                                         \
+    memcpy(temp + 16, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP);          \
+    memcpy(temp + 32, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP);          \
+    memcpy(temp + 48, a_buf + n, r * SBPP);                                    \
+    ANY_SIMD(temp, temp + 16, temp + 32, temp + 48, out, yuvconstants,         \
+             MASK + 1);                                                        \
+    memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, out, SS(r, DUVSHIFT) * BPP);       \
+  }
+
+#ifdef HAS_I210ALPHATOARGBROW_SSSE3
+ANY41CT(I210AlphaToARGBRow_Any_SSSE3, I210AlphaToARGBRow_SSSE3, 1, 0, uint16_t, 2, 4, 7)
+#endif
+
+#ifdef HAS_I210ALPHATOARGBROW_AVX2
+ANY41CT(I210AlphaToARGBRow_Any_AVX2, I210AlphaToARGBRow_AVX2, 1, 0, uint16_t, 2, 4, 15)
+#endif
+
+#ifdef HAS_I410ALPHATOARGBROW_SSSE3
+ANY41CT(I410AlphaToARGBRow_Any_SSSE3, I410AlphaToARGBRow_SSSE3, 0, 0, uint16_t, 2, 4, 7)
+#endif
+
+#ifdef HAS_I410ALPHATOARGBROW_AVX2
+ANY41CT(I410AlphaToARGBRow_Any_AVX2, I410AlphaToARGBRow_AVX2, 0, 0, uint16_t, 2, 4, 15)
+#endif
+
+#undef ANY41CT
+
 // Any 3 planes to 1.
 #define ANY31(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK)      \
  void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf,          \
@ -327,6 +367,18 @@ ANY31CT(I210ToARGBRow_Any_AVX2, I210ToARGBRow_AVX2, 1, 0, uint16_t, 2, 4, 15)
 #ifdef HAS_I210TOAR30ROW_AVX2
 ANY31CT(I210ToAR30Row_Any_AVX2, I210ToAR30Row_AVX2, 1, 0, uint16_t, 2, 4, 15)
 #endif
+#ifdef HAS_I410TOAR30ROW_SSSE3
+ANY31CT(I410ToAR30Row_Any_SSSE3, I410ToAR30Row_SSSE3, 0, 0, uint16_t, 2, 4, 7)
+#endif
+#ifdef HAS_I410TOARGBROW_SSSE3
+ANY31CT(I410ToARGBRow_Any_SSSE3, I410ToARGBRow_SSSE3, 0, 0, uint16_t, 2, 4, 7)
+#endif
+#ifdef HAS_I410TOARGBROW_AVX2
+ANY31CT(I410ToARGBRow_Any_AVX2, I410ToARGBRow_AVX2, 0, 0, uint16_t, 2, 4, 15)
+#endif
+#ifdef HAS_I410TOAR30ROW_AVX2
+ANY31CT(I410ToAR30Row_Any_AVX2, I410ToAR30Row_AVX2, 0, 0, uint16_t, 2, 4, 15)
+#endif
 #ifdef HAS_I210TOARGBROW_MMI
 ANY31CT(I210ToARGBRow_Any_MMI, I210ToARGBRow_MMI, 1, 0, uint16_t, 2, 4, 7)
 #endif
@ -546,12 +598,57 @@ ANY21C(NV12ToRGB565Row_Any_MMI, NV12ToRGB565Row_MMI, 1, 1, 2, 2, 7)
 #endif
 #undef ANY21C

+// Any 2 planes of 16 bit to 1 with yuvconstants
+#define ANY21CT(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, T, SBPP, BPP, MASK)      \
+  void NAMEANY(const T* y_buf, const T* uv_buf, uint8_t* dst_ptr,              \
+               const struct YuvConstants* yuvconstants, int width) {           \
+    SIMD_ALIGNED(T temp[16 * 3]);                                              \
+    SIMD_ALIGNED(uint8_t out[64]);                                             \
+    memset(temp, 0, 16 * 3 * SBPP); /* for YUY2 and msan */                    \
+    int r = width & MASK;                                                      \
+    int n = width & ~MASK;                                                     \
+    if (n > 0) {                                                               \
+      ANY_SIMD(y_buf, uv_buf, dst_ptr, yuvconstants, n);                       \
+    }                                                                          \
+    memcpy(temp, y_buf + n, r * SBPP);                                         \
+    memcpy(temp + 16, uv_buf + 2 * (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP * 2); \
+    ANY_SIMD(temp, temp + 16, out, yuvconstants, MASK + 1);                    \
+    memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, out, SS(r, DUVSHIFT) * BPP);       \
+  }
+
+#ifdef HAS_P210TOAR30ROW_SSSE3
+ANY21CT(P210ToAR30Row_Any_SSSE3, P210ToAR30Row_SSSE3, 1, 0, uint16_t, 2, 4, 7)
+#endif
+#ifdef HAS_P210TOARGBROW_SSSE3
+ANY21CT(P210ToARGBRow_Any_SSSE3, P210ToARGBRow_SSSE3, 1, 0, uint16_t, 2, 4, 7)
+#endif
+#ifdef HAS_P210TOARGBROW_AVX2
+ANY21CT(P210ToARGBRow_Any_AVX2, P210ToARGBRow_AVX2, 1, 0, uint16_t, 2, 4, 15)
+#endif
+#ifdef HAS_P210TOAR30ROW_AVX2
+ANY21CT(P210ToAR30Row_Any_AVX2, P210ToAR30Row_AVX2, 1, 0, uint16_t, 2, 4, 15)
+#endif
+#ifdef HAS_P410TOAR30ROW_SSSE3
+ANY21CT(P410ToAR30Row_Any_SSSE3, P410ToAR30Row_SSSE3, 0, 0, uint16_t, 2, 4, 7)
+#endif
+#ifdef HAS_P410TOARGBROW_SSSE3
+ANY21CT(P410ToARGBRow_Any_SSSE3, P410ToARGBRow_SSSE3, 0, 0, uint16_t, 2, 4, 7)
+#endif
+#ifdef HAS_P410TOARGBROW_AVX2
+ANY21CT(P410ToARGBRow_Any_AVX2, P410ToARGBRow_AVX2, 0, 0, uint16_t, 2, 4, 15)
+#endif
+#ifdef HAS_P410TOAR30ROW_AVX2
+ANY21CT(P410ToAR30Row_Any_AVX2, P410ToAR30Row_AVX2, 0, 0, uint16_t, 2, 4, 15)
+#endif
+
+#undef ANY21CT
+
 // Any 2 16 bit planes with parameter to 1
 #define ANY21PT(NAMEANY, ANY_SIMD, T, BPP, MASK)                     \
  void NAMEANY(const T* src_u, const T* src_v, T* dst_uv, int depth, \
               int width) {                                          \
    SIMD_ALIGNED(T temp[16 * 4]);                                    \
-    memset(temp, 0, 16 * 4); /* for msan */                          \
+    memset(temp, 0, 16 * 4 * BPP); /* for msan */                    \
    int r = width & MASK;                                            \
    int n = width & ~MASK;                                           \
    if (n > 0) {                                                     \
--- a/source/row_common.cc
+++ b/source/row_common.cc
@ -1546,14 +1546,14 @@ MAKEYUVCONSTANTS(V2020, YG, YB, UB, UG, VG, VR, BB, BG, BR)
 #undef MAKEYUVCONSTANTS

 // C reference code that mimics the YUV assembly.
-// Reads 8 bit YUV and leaves result as 16 bit.
-static __inline void YuvPixel(uint8_t y,
-                              uint8_t u,
-                              uint8_t v,
-                              uint8_t* b,
-                              uint8_t* g,
-                              uint8_t* r,
-                              const struct YuvConstants* yuvconstants) {
+// Reads 8 bit YUV and leaves result as 8 bit.
+static __inline void YuvPixel8_8(uint8_t y,
+                                 uint8_t u,
+                                 uint8_t v,
+                                 uint8_t* b,
+                                 uint8_t* g,
+                                 uint8_t* r,
+                                 const struct YuvConstants* yuvconstants) {
 #if defined(__aarch64__)
  int ub = -yuvconstants->kUVToRB[0];
  int ug = yuvconstants->kUVToG[0];
@ -1634,13 +1634,13 @@ static __inline void YuvPixel8_16(uint8_t y,

 // C reference code that mimics the YUV 16 bit assembly.
 // Reads 10 bit YUV and leaves result as 16 bit.
-static __inline void YuvPixel16(int16_t y,
-                                int16_t u,
-                                int16_t v,
-                                int* b,
-                                int* g,
-                                int* r,
-                                const struct YuvConstants* yuvconstants) {
+static __inline void YuvPixel10_16(uint16_t y,
+                                   uint16_t u,
+                                   uint16_t v,
+                                   int* b,
+                                   int* g,
+                                   int* r,
+                                   const struct YuvConstants* yuvconstants) {
 #if defined(__aarch64__)
  int ub = -yuvconstants->kUVToRB[0];
  int ug = yuvconstants->kUVToG[0];
@ -1680,24 +1680,116 @@ static __inline void YuvPixel16(int16_t y,

 // C reference code that mimics the YUV 10 bit assembly.
 // Reads 10 bit YUV and clamps down to 8 bit RGB.
-static __inline void YuvPixel10(uint16_t y,
-                                uint16_t u,
-                                uint16_t v,
-                                uint8_t* b,
-                                uint8_t* g,
-                                uint8_t* r,
-                                const struct YuvConstants* yuvconstants) {
+static __inline void YuvPixel10_8(uint16_t y,
+                                  uint16_t u,
+                                  uint16_t v,
+                                  uint8_t* b,
+                                  uint8_t* g,
+                                  uint8_t* r,
+                                  const struct YuvConstants* yuvconstants) {
  int b16;
  int g16;
  int r16;
-  YuvPixel16(y, u, v, &b16, &g16, &r16, yuvconstants);
+  YuvPixel10_16(y, u, v, &b16, &g16, &r16, yuvconstants);
  *b = Clamp(b16 >> 6);
  *g = Clamp(g16 >> 6);
  *r = Clamp(r16 >> 6);
 }

+// C reference code that mimics the YUV 16 bit assembly.
+// Reads 16 bit YUV and leaves result as 8 bit.
+static __inline void YuvPixel16_8(uint16_t y,
+                                  uint16_t u,
+                                  uint16_t v,
+                                  uint8_t* b,
+                                  uint8_t* g,
+                                  uint8_t* r,
+                                  const struct YuvConstants* yuvconstants) {
+#if defined(__aarch64__)
+  int ub = -yuvconstants->kUVToRB[0];
+  int ug = yuvconstants->kUVToG[0];
+  int vg = yuvconstants->kUVToG[1];
+  int vr = -yuvconstants->kUVToRB[1];
+  int bb = yuvconstants->kUVBiasBGR[0];
+  int bg = yuvconstants->kUVBiasBGR[1];
+  int br = yuvconstants->kUVBiasBGR[2];
+  int yg = yuvconstants->kYToRgb[1];
+#elif defined(__arm__)
+  int ub = -yuvconstants->kUVToRB[0];
+  int ug = yuvconstants->kUVToG[0];
+  int vg = yuvconstants->kUVToG[4];
+  int vr = -yuvconstants->kUVToRB[4];
+  int bb = yuvconstants->kUVBiasBGR[0];
+  int bg = yuvconstants->kUVBiasBGR[1];
+  int br = yuvconstants->kUVBiasBGR[2];
+  int yg = yuvconstants->kYToRgb[1];
+#else
+  int ub = yuvconstants->kUVToB[0];
+  int ug = yuvconstants->kUVToG[0];
+  int vg = yuvconstants->kUVToG[1];
+  int vr = yuvconstants->kUVToR[1];
+  int bb = yuvconstants->kUVBiasB[0];
+  int bg = yuvconstants->kUVBiasG[0];
+  int br = yuvconstants->kUVBiasR[0];
+  int yg = yuvconstants->kYToRgb[0];
+#endif
+
+  uint32_t y1 = (uint32_t)(y * yg) >> 16;
+  u = clamp255(u >> 8);
+  v = clamp255(v >> 8);
+  *b = Clamp((int32_t)(y1 + -(u * ub) + bb) >> 6);
+  *g = Clamp((int32_t)(y1 + -(u * ug + v * vg) + bg) >> 6);
+  *r = Clamp((int32_t)(y1 + -(v * vr) + br) >> 6);
+}
+
+// C reference code that mimics the YUV 16 bit assembly.
+// Reads 16 bit YUV and leaves result as 16 bit.
+static __inline void YuvPixel16_16(uint16_t y,
+                                   uint16_t u,
+                                   uint16_t v,
+                                   int* b,
+                                   int* g,
+                                   int* r,
+                                   const struct YuvConstants* yuvconstants) {
+#if defined(__aarch64__)
+  int ub = -yuvconstants->kUVToRB[0];
+  int ug = yuvconstants->kUVToG[0];
+  int vg = yuvconstants->kUVToG[1];
+  int vr = -yuvconstants->kUVToRB[1];
+  int bb = yuvconstants->kUVBiasBGR[0];
+  int bg = yuvconstants->kUVBiasBGR[1];
+  int br = yuvconstants->kUVBiasBGR[2];
+  int yg = yuvconstants->kYToRgb[1];
+#elif defined(__arm__)
+  int ub = -yuvconstants->kUVToRB[0];
+  int ug = yuvconstants->kUVToG[0];
+  int vg = yuvconstants->kUVToG[4];
+  int vr = -yuvconstants->kUVToRB[4];
+  int bb = yuvconstants->kUVBiasBGR[0];
+  int bg = yuvconstants->kUVBiasBGR[1];
+  int br = yuvconstants->kUVBiasBGR[2];
+  int yg = yuvconstants->kYToRgb[1];
+#else
+  int ub = yuvconstants->kUVToB[0];
+  int ug = yuvconstants->kUVToG[0];
+  int vg = yuvconstants->kUVToG[1];
+  int vr = yuvconstants->kUVToR[1];
+  int bb = yuvconstants->kUVBiasB[0];
+  int bg = yuvconstants->kUVBiasG[0];
+  int br = yuvconstants->kUVBiasR[0];
+  int yg = yuvconstants->kYToRgb[0];
+#endif
+
+  uint32_t y1 = (uint32_t)(y * yg) >> 16;
+  u = clamp255(u >> 8);
+  v = clamp255(v >> 8);
+  *b = (int)(-(u * ub) + y1 + bb);
+  *g = (int)(-(u * ug + v * vg) + y1 + bg);
+  *r = (int)(-(v * vr) + y1 + br);
+}
+
 // C reference code that mimics the YUV assembly.
-// Reads 8 bit YUV and leaves result as 16 bit.
+// Reads 8 bit YUV and leaves result as 8 bit.
 static __inline void YPixel(uint8_t y,
                            uint8_t* b,
                            uint8_t* g,
@ -1730,11 +1822,11 @@ void I444ToARGBRow_C(const uint8_t* src_y,
  for (x = 0; x < width - 1; x += 2) {
    uint8_t u = (src_u[0] + src_u[1] + 1) >> 1;
    uint8_t v = (src_v[0] + src_v[1] + 1) >> 1;
-    YuvPixel(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2,
-             yuvconstants);
+    YuvPixel8_8(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2,
+                yuvconstants);
    rgb_buf[3] = 255;
-    YuvPixel(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6,
-             yuvconstants);
+    YuvPixel8_8(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6,
+                yuvconstants);
    rgb_buf[7] = 255;
    src_y += 2;
    src_u += 2;
@ -1742,8 +1834,8 @@ void I444ToARGBRow_C(const uint8_t* src_y,
    rgb_buf += 8;  // Advance 2 pixels.
  }
  if (width & 1) {
-    YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
-             rgb_buf + 2, yuvconstants);
+    YuvPixel8_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
+                rgb_buf + 2, yuvconstants);
    rgb_buf[3] = 255;
  }
 }
@ -1756,8 +1848,8 @@ void I444ToARGBRow_C(const uint8_t* src_y,
                     int width) {
  int x;
  for (x = 0; x < width; ++x) {
-    YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
-             rgb_buf + 2, yuvconstants);
+    YuvPixel8_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
+                rgb_buf + 2, yuvconstants);
    rgb_buf[3] = 255;
    src_y += 1;
    src_u += 1;
@ -1776,11 +1868,11 @@ void I422ToARGBRow_C(const uint8_t* src_y,
                     int width) {
  int x;
  for (x = 0; x < width - 1; x += 2) {
-    YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
-             rgb_buf + 2, yuvconstants);
+    YuvPixel8_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
+                rgb_buf + 2, yuvconstants);
    rgb_buf[3] = 255;
-    YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
-             rgb_buf + 6, yuvconstants);
+    YuvPixel8_8(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
+                rgb_buf + 6, yuvconstants);
    rgb_buf[7] = 255;
    src_y += 2;
    src_u += 1;
@ -1788,8 +1880,8 @@ void I422ToARGBRow_C(const uint8_t* src_y,
    rgb_buf += 8;  // Advance 2 pixels.
  }
  if (width & 1) {
-    YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
-             rgb_buf + 2, yuvconstants);
+    YuvPixel8_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
+                rgb_buf + 2, yuvconstants);
    rgb_buf[3] = 255;
  }
 }
@ -1803,11 +1895,11 @@ void I210ToARGBRow_C(const uint16_t* src_y,
                     int width) {
  int x;
  for (x = 0; x < width - 1; x += 2) {
-    YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
-               rgb_buf + 2, yuvconstants);
+    YuvPixel10_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
+                 rgb_buf + 2, yuvconstants);
    rgb_buf[3] = 255;
-    YuvPixel10(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
-               rgb_buf + 6, yuvconstants);
+    YuvPixel10_8(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
+                 rgb_buf + 6, yuvconstants);
    rgb_buf[7] = 255;
    src_y += 2;
    src_u += 1;
@ -1815,15 +1907,81 @@ void I210ToARGBRow_C(const uint16_t* src_y,
    rgb_buf += 8;  // Advance 2 pixels.
  }
  if (width & 1) {
-    YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
-               rgb_buf + 2, yuvconstants);
+    YuvPixel10_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
+                 rgb_buf + 2, yuvconstants);
    rgb_buf[3] = 255;
  }
 }

+void I410ToARGBRow_C(const uint16_t* src_y,
+                     const uint16_t* src_u,
+                     const uint16_t* src_v,
+                     uint8_t* rgb_buf,
+                     const struct YuvConstants* yuvconstants,
+                     int width) {
+  int x;
+  for (x = 0; x < width; ++x) {
+    YuvPixel10_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
+                 rgb_buf + 2, yuvconstants);
+    rgb_buf[3] = 255;
+    src_y += 1;
+    src_u += 1;
+    src_v += 1;
+    rgb_buf += 4;  // Advance 1 pixels.
+  }
+}
+
+void I210AlphaToARGBRow_C(const uint16_t* src_y,
+                          const uint16_t* src_u,
+                          const uint16_t* src_v,
+                          const uint16_t* src_a,
+                          uint8_t* rgb_buf,
+                          const struct YuvConstants* yuvconstants,
+                          int width) {
+  int x;
+  for (x = 0; x < width - 1; x += 2) {
+    YuvPixel10_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
+                 rgb_buf + 2, yuvconstants);
+    rgb_buf[3] = clamp255(src_a[0] >> 2);
+    YuvPixel10_8(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
+                 rgb_buf + 6, yuvconstants);
+    rgb_buf[7] = clamp255(src_a[1] >> 2);
+    src_y += 2;
+    src_u += 1;
+    src_v += 1;
+    src_a += 2;
+    rgb_buf += 8;  // Advance 2 pixels.
+  }
+  if (width & 1) {
+    YuvPixel10_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
+                 rgb_buf + 2, yuvconstants);
+    rgb_buf[3] = clamp255(src_a[0] >> 2);
+  }
+}
+
+void I410AlphaToARGBRow_C(const uint16_t* src_y,
+                          const uint16_t* src_u,
+                          const uint16_t* src_v,
+                          const uint16_t* src_a,
+                          uint8_t* rgb_buf,
+                          const struct YuvConstants* yuvconstants,
+                          int width) {
+  int x;
+  for (x = 0; x < width; ++x) {
+    YuvPixel10_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
+                 rgb_buf + 2, yuvconstants);
+    rgb_buf[3] = clamp255(src_a[0] >> 2);
+    src_y += 1;
+    src_u += 1;
+    src_v += 1;
+    src_a += 1;
+    rgb_buf += 4;  // Advance 1 pixels.
+  }
+}
+
 static void StoreAR30(uint8_t* rgb_buf, int b, int g, int r) {
  uint32_t ar30;
-  b = b >> 4;  // convert 10.6 to 10 bit.
+  b = b >> 4;  // convert 8 bit 10.6 to 10 bit.
  g = g >> 4;
  r = r >> 4;
  b = Clamp10(b);
@ -1845,9 +2003,9 @@ void I210ToAR30Row_C(const uint16_t* src_y,
  int g;
  int r;
  for (x = 0; x < width - 1; x += 2) {
-    YuvPixel16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
+    YuvPixel10_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
    StoreAR30(rgb_buf, b, g, r);
-    YuvPixel16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
+    YuvPixel10_16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
    StoreAR30(rgb_buf + 4, b, g, r);
    src_y += 2;
    src_u += 1;
@ -1855,11 +2013,113 @@ void I210ToAR30Row_C(const uint16_t* src_y,
    rgb_buf += 8;  // Advance 2 pixels.
  }
  if (width & 1) {
-    YuvPixel16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
+    YuvPixel10_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
    StoreAR30(rgb_buf, b, g, r);
  }
 }

+void I410ToAR30Row_C(const uint16_t* src_y,
+                     const uint16_t* src_u,
+                     const uint16_t* src_v,
+                     uint8_t* rgb_buf,
+                     const struct YuvConstants* yuvconstants,
+                     int width) {
+  int x;
+  int b;
+  int g;
+  int r;
+  for (x = 0; x < width; ++x) {
+    YuvPixel10_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
+    StoreAR30(rgb_buf, b, g, r);
+    src_y += 1;
+    src_u += 1;
+    src_v += 1;
+    rgb_buf += 4;  // Advance 1 pixel.
+  }
+}
+
+void P210ToARGBRow_C(const uint16_t* src_y,
+                     const uint16_t* src_uv,
+                     uint8_t* rgb_buf,
+                     const struct YuvConstants* yuvconstants,
+                     int width) {
+  int x;
+  for (x = 0; x < width - 1; x += 2) {
+    YuvPixel16_8(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
+                 rgb_buf + 2, yuvconstants);
+    rgb_buf[3] = 255;
+    YuvPixel16_8(src_y[1], src_uv[0], src_uv[1], rgb_buf + 4, rgb_buf + 5,
+                 rgb_buf + 6, yuvconstants);
+    rgb_buf[7] = 255;
+    src_y += 2;
+    src_uv += 2;
+    rgb_buf += 8;  // Advance 2 pixels.
+  }
+  if (width & 1) {
+    YuvPixel16_8(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
+                 rgb_buf + 2, yuvconstants);
+    rgb_buf[3] = 255;
+  }
+}
+
+void P410ToARGBRow_C(const uint16_t* src_y,
+                     const uint16_t* src_uv,
+                     uint8_t* rgb_buf,
+                     const struct YuvConstants* yuvconstants,
+                     int width) {
+  int x;
+  for (x = 0; x < width; ++x) {
+    YuvPixel16_8(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
+                 rgb_buf + 2, yuvconstants);
+    rgb_buf[3] = 255;
+    src_y += 1;
+    src_uv += 2;
+    rgb_buf += 4;  // Advance 1 pixels.
+  }
+}
+
+void P210ToAR30Row_C(const uint16_t* src_y,
+                     const uint16_t* src_uv,
+                     uint8_t* rgb_buf,
+                     const struct YuvConstants* yuvconstants,
+                     int width) {
+  int x;
+  int b;
+  int g;
+  int r;
+  for (x = 0; x < width - 1; x += 2) {
+    YuvPixel16_16(src_y[0], src_uv[0], src_uv[1], &b, &g, &r, yuvconstants);
+    StoreAR30(rgb_buf, b, g, r);
+    YuvPixel16_16(src_y[1], src_uv[0], src_uv[1], &b, &g, &r, yuvconstants);
+    StoreAR30(rgb_buf + 4, b, g, r);
+    src_y += 2;
+    src_uv += 2;
+    rgb_buf += 8;  // Advance 2 pixels.
+  }
+  if (width & 1) {
+    YuvPixel16_16(src_y[0], src_uv[0], src_uv[1], &b, &g, &r, yuvconstants);
+    StoreAR30(rgb_buf, b, g, r);
+  }
+}
+
+void P410ToAR30Row_C(const uint16_t* src_y,
+                     const uint16_t* src_uv,
+                     uint8_t* rgb_buf,
+                     const struct YuvConstants* yuvconstants,
+                     int width) {
+  int x;
+  int b;
+  int g;
+  int r;
+  for (x = 0; x < width; ++x) {
+    YuvPixel16_16(src_y[0], src_uv[0], src_uv[1], &b, &g, &r, yuvconstants);
+    StoreAR30(rgb_buf, b, g, r);
+    src_y += 1;
+    src_uv += 2;
+    rgb_buf += 4;  // Advance 1 pixel.
+  }
+}
+
 // 8 bit YUV to 10 bit AR30
 // Uses same code as 10 bit YUV bit shifts the 8 bit values up to 10 bits.
 void I422ToAR30Row_C(const uint8_t* src_y,
@ -1903,11 +2163,11 @@ void I444AlphaToARGBRow_C(const uint8_t* src_y,
  for (x = 0; x < width - 1; x += 2) {
    uint8_t u = (src_u[0] + src_u[1] + 1) >> 1;
    uint8_t v = (src_v[0] + src_v[1] + 1) >> 1;
-    YuvPixel(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2,
-             yuvconstants);
+    YuvPixel8_8(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2,
+                yuvconstants);
    rgb_buf[3] = src_a[0];
-    YuvPixel(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6,
-             yuvconstants);
+    YuvPixel8_8(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6,
+                yuvconstants);
    rgb_buf[7] = src_a[1];
    src_y += 2;
    src_u += 2;
@ -1916,8 +2176,8 @@ void I444AlphaToARGBRow_C(const uint8_t* src_y,
    rgb_buf += 8;  // Advance 2 pixels.
  }
  if (width & 1) {
-    YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
-             rgb_buf + 2, yuvconstants);
+    YuvPixel8_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
+                rgb_buf + 2, yuvconstants);
    rgb_buf[3] = src_a[0];
  }
 }
@ -1931,8 +2191,8 @@ void I444AlphaToARGBRow_C(const uint8_t* src_y,
                          int width) {
  int x;
  for (x = 0; x < width; ++x) {
-    YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
-             rgb_buf + 2, yuvconstants);
+    YuvPixel8_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
+                rgb_buf + 2, yuvconstants);
    rgb_buf[3] = src_a[0];
    src_y += 1;
    src_u += 1;
@ -1952,11 +2212,11 @@ void I422AlphaToARGBRow_C(const uint8_t* src_y,
                          int width) {
  int x;
  for (x = 0; x < width - 1; x += 2) {
-    YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
-             rgb_buf + 2, yuvconstants);
+    YuvPixel8_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
+                rgb_buf + 2, yuvconstants);
    rgb_buf[3] = src_a[0];
-    YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
-             rgb_buf + 6, yuvconstants);
+    YuvPixel8_8(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
+                rgb_buf + 6, yuvconstants);
    rgb_buf[7] = src_a[1];
    src_y += 2;
    src_u += 1;
@ -1965,8 +2225,8 @@ void I422AlphaToARGBRow_C(const uint8_t* src_y,
    rgb_buf += 8;  // Advance 2 pixels.
  }
  if (width & 1) {
-    YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
-             rgb_buf + 2, yuvconstants);
+    YuvPixel8_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
+                rgb_buf + 2, yuvconstants);
    rgb_buf[3] = src_a[0];
  }
 }
@ -1979,18 +2239,18 @@ void I422ToRGB24Row_C(const uint8_t* src_y,
                      int width) {
  int x;
  for (x = 0; x < width - 1; x += 2) {
-    YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
-             rgb_buf + 2, yuvconstants);
-    YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 3, rgb_buf + 4,
-             rgb_buf + 5, yuvconstants);
+    YuvPixel8_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
+                rgb_buf + 2, yuvconstants);
+    YuvPixel8_8(src_y[1], src_u[0], src_v[0], rgb_buf + 3, rgb_buf + 4,
+                rgb_buf + 5, yuvconstants);
    src_y += 2;
    src_u += 1;
    src_v += 1;
    rgb_buf += 6;  // Advance 2 pixels.
  }
  if (width & 1) {
-    YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
-             rgb_buf + 2, yuvconstants);
+    YuvPixel8_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
+                rgb_buf + 2, yuvconstants);
  }
 }

@ -2008,8 +2268,8 @@ void I422ToARGB4444Row_C(const uint8_t* src_y,
  uint8_t r1;
  int x;
  for (x = 0; x < width - 1; x += 2) {
-    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
-    YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
+    YuvPixel8_8(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
+    YuvPixel8_8(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
    b0 = b0 >> 4;
    g0 = g0 >> 4;
    r0 = r0 >> 4;
@ -2024,7 +2284,7 @@ void I422ToARGB4444Row_C(const uint8_t* src_y,
    dst_argb4444 += 4;  // Advance 2 pixels.
  }
  if (width & 1) {
-    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
+    YuvPixel8_8(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
    b0 = b0 >> 4;
    g0 = g0 >> 4;
    r0 = r0 >> 4;
@ -2046,8 +2306,8 @@ void I422ToARGB1555Row_C(const uint8_t* src_y,
  uint8_t r1;
  int x;
  for (x = 0; x < width - 1; x += 2) {
-    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
-    YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
+    YuvPixel8_8(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
+    YuvPixel8_8(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
    b0 = b0 >> 3;
    g0 = g0 >> 3;
    r0 = r0 >> 3;
@ -2062,7 +2322,7 @@ void I422ToARGB1555Row_C(const uint8_t* src_y,
    dst_argb1555 += 4;  // Advance 2 pixels.
  }
  if (width & 1) {
-    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
+    YuvPixel8_8(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
    b0 = b0 >> 3;
    g0 = g0 >> 3;
    r0 = r0 >> 3;
@ -2084,8 +2344,8 @@ void I422ToRGB565Row_C(const uint8_t* src_y,
  uint8_t r1;
  int x;
  for (x = 0; x < width - 1; x += 2) {
-    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
-    YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
+    YuvPixel8_8(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
+    YuvPixel8_8(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
    b0 = b0 >> 3;
    g0 = g0 >> 2;
    r0 = r0 >> 3;
@ -2100,7 +2360,7 @@ void I422ToRGB565Row_C(const uint8_t* src_y,
    dst_rgb565 += 4;  // Advance 2 pixels.
  }
  if (width & 1) {
-    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
+    YuvPixel8_8(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
    b0 = b0 >> 3;
    g0 = g0 >> 2;
    r0 = r0 >> 3;
@ -2115,19 +2375,19 @@ void NV12ToARGBRow_C(const uint8_t* src_y,
                     int width) {
  int x;
  for (x = 0; x < width - 1; x += 2) {
-    YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
-             rgb_buf + 2, yuvconstants);
+    YuvPixel8_8(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
+                rgb_buf + 2, yuvconstants);
    rgb_buf[3] = 255;
-    YuvPixel(src_y[1], src_uv[0], src_uv[1], rgb_buf + 4, rgb_buf + 5,
-             rgb_buf + 6, yuvconstants);
+    YuvPixel8_8(src_y[1], src_uv[0], src_uv[1], rgb_buf + 4, rgb_buf + 5,
+                rgb_buf + 6, yuvconstants);
    rgb_buf[7] = 255;
    src_y += 2;
    src_uv += 2;
    rgb_buf += 8;  // Advance 2 pixels.
  }
  if (width & 1) {
-    YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
-             rgb_buf + 2, yuvconstants);
+    YuvPixel8_8(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
+                rgb_buf + 2, yuvconstants);
    rgb_buf[3] = 255;
  }
 }
@ -2139,19 +2399,19 @@ void NV21ToARGBRow_C(const uint8_t* src_y,
                     int width) {
  int x;
  for (x = 0; x < width - 1; x += 2) {
-    YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
-             rgb_buf + 2, yuvconstants);
+    YuvPixel8_8(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
+                rgb_buf + 2, yuvconstants);
    rgb_buf[3] = 255;
-    YuvPixel(src_y[1], src_vu[1], src_vu[0], rgb_buf + 4, rgb_buf + 5,
-             rgb_buf + 6, yuvconstants);
+    YuvPixel8_8(src_y[1], src_vu[1], src_vu[0], rgb_buf + 4, rgb_buf + 5,
+                rgb_buf + 6, yuvconstants);
    rgb_buf[7] = 255;
    src_y += 2;
    src_vu += 2;
    rgb_buf += 8;  // Advance 2 pixels.
  }
  if (width & 1) {
-    YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
-             rgb_buf + 2, yuvconstants);
+    YuvPixel8_8(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
+                rgb_buf + 2, yuvconstants);
    rgb_buf[3] = 255;
  }
 }
@ -2163,17 +2423,17 @@ void NV12ToRGB24Row_C(const uint8_t* src_y,
                      int width) {
  int x;
  for (x = 0; x < width - 1; x += 2) {
-    YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
-             rgb_buf + 2, yuvconstants);
-    YuvPixel(src_y[1], src_uv[0], src_uv[1], rgb_buf + 3, rgb_buf + 4,
-             rgb_buf + 5, yuvconstants);
+    YuvPixel8_8(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
+                rgb_buf + 2, yuvconstants);
+    YuvPixel8_8(src_y[1], src_uv[0], src_uv[1], rgb_buf + 3, rgb_buf + 4,
+                rgb_buf + 5, yuvconstants);
    src_y += 2;
    src_uv += 2;
    rgb_buf += 6;  // Advance 2 pixels.
  }
  if (width & 1) {
-    YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
-             rgb_buf + 2, yuvconstants);
+    YuvPixel8_8(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
+                rgb_buf + 2, yuvconstants);
  }
 }

@ -2184,17 +2444,17 @@ void NV21ToRGB24Row_C(const uint8_t* src_y,
                      int width) {
  int x;
  for (x = 0; x < width - 1; x += 2) {
-    YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
-             rgb_buf + 2, yuvconstants);
-    YuvPixel(src_y[1], src_vu[1], src_vu[0], rgb_buf + 3, rgb_buf + 4,
-             rgb_buf + 5, yuvconstants);
+    YuvPixel8_8(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
+                rgb_buf + 2, yuvconstants);
+    YuvPixel8_8(src_y[1], src_vu[1], src_vu[0], rgb_buf + 3, rgb_buf + 4,
+                rgb_buf + 5, yuvconstants);
    src_y += 2;
    src_vu += 2;
    rgb_buf += 6;  // Advance 2 pixels.
  }
  if (width & 1) {
-    YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
-             rgb_buf + 2, yuvconstants);
+    YuvPixel8_8(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
+                rgb_buf + 2, yuvconstants);
  }
 }

@ -2211,8 +2471,8 @@ void NV12ToRGB565Row_C(const uint8_t* src_y,
  uint8_t r1;
  int x;
  for (x = 0; x < width - 1; x += 2) {
-    YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants);
-    YuvPixel(src_y[1], src_uv[0], src_uv[1], &b1, &g1, &r1, yuvconstants);
+    YuvPixel8_8(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants);
+    YuvPixel8_8(src_y[1], src_uv[0], src_uv[1], &b1, &g1, &r1, yuvconstants);
    b0 = b0 >> 3;
    g0 = g0 >> 2;
    r0 = r0 >> 3;
@ -2226,7 +2486,7 @@ void NV12ToRGB565Row_C(const uint8_t* src_y,
    dst_rgb565 += 4;  // Advance 2 pixels.
  }
  if (width & 1) {
-    YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants);
+    YuvPixel8_8(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants);
    b0 = b0 >> 3;
    g0 = g0 >> 2;
    r0 = r0 >> 3;
@ -2240,18 +2500,18 @@ void YUY2ToARGBRow_C(const uint8_t* src_yuy2,
                     int width) {
  int x;
  for (x = 0; x < width - 1; x += 2) {
-    YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, rgb_buf + 1,
-             rgb_buf + 2, yuvconstants);
+    YuvPixel8_8(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, rgb_buf + 1,
+                rgb_buf + 2, yuvconstants);
    rgb_buf[3] = 255;
-    YuvPixel(src_yuy2[2], src_yuy2[1], src_yuy2[3], rgb_buf + 4, rgb_buf + 5,
-             rgb_buf + 6, yuvconstants);
+    YuvPixel8_8(src_yuy2[2], src_yuy2[1], src_yuy2[3], rgb_buf + 4, rgb_buf + 5,
+                rgb_buf + 6, yuvconstants);
    rgb_buf[7] = 255;
    src_yuy2 += 4;
    rgb_buf += 8;  // Advance 2 pixels.
  }
  if (width & 1) {
-    YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, rgb_buf + 1,
-             rgb_buf + 2, yuvconstants);
+    YuvPixel8_8(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, rgb_buf + 1,
+                rgb_buf + 2, yuvconstants);
    rgb_buf[3] = 255;
  }
 }
@ -2262,18 +2522,18 @@ void UYVYToARGBRow_C(const uint8_t* src_uyvy,
                     int width) {
  int x;
  for (x = 0; x < width - 1; x += 2) {
-    YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, rgb_buf + 1,
-             rgb_buf + 2, yuvconstants);
+    YuvPixel8_8(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, rgb_buf + 1,
+                rgb_buf + 2, yuvconstants);
    rgb_buf[3] = 255;
-    YuvPixel(src_uyvy[3], src_uyvy[0], src_uyvy[2], rgb_buf + 4, rgb_buf + 5,
-             rgb_buf + 6, yuvconstants);
+    YuvPixel8_8(src_uyvy[3], src_uyvy[0], src_uyvy[2], rgb_buf + 4, rgb_buf + 5,
+                rgb_buf + 6, yuvconstants);
    rgb_buf[7] = 255;
    src_uyvy += 4;
    rgb_buf += 8;  // Advance 2 pixels.
  }
  if (width & 1) {
-    YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, rgb_buf + 1,
-             rgb_buf + 2, yuvconstants);
+    YuvPixel8_8(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, rgb_buf + 1,
+                rgb_buf + 2, yuvconstants);
    rgb_buf[3] = 255;
  }
 }
@ -2286,11 +2546,11 @@ void I422ToRGBARow_C(const uint8_t* src_y,
                     int width) {
  int x;
  for (x = 0; x < width - 1; x += 2) {
-    YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 1, rgb_buf + 2,
-             rgb_buf + 3, yuvconstants);
+    YuvPixel8_8(src_y[0], src_u[0], src_v[0], rgb_buf + 1, rgb_buf + 2,
+                rgb_buf + 3, yuvconstants);
    rgb_buf[0] = 255;
-    YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 5, rgb_buf + 6,
-             rgb_buf + 7, yuvconstants);
+    YuvPixel8_8(src_y[1], src_u[0], src_v[0], rgb_buf + 5, rgb_buf + 6,
+                rgb_buf + 7, yuvconstants);
    rgb_buf[4] = 255;
    src_y += 2;
    src_u += 1;
@ -2298,8 +2558,8 @@ void I422ToRGBARow_C(const uint8_t* src_y,
    rgb_buf += 8;  // Advance 2 pixels.
  }
  if (width & 1) {
-    YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 1, rgb_buf + 2,
-             rgb_buf + 3, yuvconstants);
+    YuvPixel8_8(src_y[0], src_u[0], src_v[0], rgb_buf + 1, rgb_buf + 2,
+                rgb_buf + 3, yuvconstants);
    rgb_buf[0] = 255;
  }
 }
--- a/source/row_gcc.cc
+++ b/source/row_gcc.cc
@ -1944,13 +1944,63 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba0,
  "movq       0x00(%[u_buf],%[v_buf],1),%%xmm1                \n" \
  "lea        0x8(%[u_buf]),%[u_buf]                          \n" \
  "punpcklwd  %%xmm1,%%xmm0                                   \n" \
-  "psraw      $0x2,%%xmm0                                     \n" \
+  "psraw      $2,%%xmm0                                       \n" \
  "packuswb   %%xmm0,%%xmm0                                   \n" \
  "punpcklwd  %%xmm0,%%xmm0                                   \n" \
  "movdqu     (%[y_buf]),%%xmm4                               \n" \
-  "psllw      $0x6,%%xmm4                                     \n" \
+  "psllw      $6,%%xmm4                                       \n" \
  "lea        0x10(%[y_buf]),%[y_buf]                         \n"

+#define READYUVA210                                               \
+  "movq       (%[u_buf]),%%xmm0                               \n" \
+  "movq       0x00(%[u_buf],%[v_buf],1),%%xmm1                \n" \
+  "lea        0x8(%[u_buf]),%[u_buf]                          \n" \
+  "punpcklwd  %%xmm1,%%xmm0                                   \n" \
+  "psraw      $2,%%xmm0                                       \n" \
+  "packuswb   %%xmm0,%%xmm0                                   \n" \
+  "punpcklwd  %%xmm0,%%xmm0                                   \n" \
+  "movdqu     (%[y_buf]),%%xmm4                               \n" \
+  "psllw      $6,%%xmm4                                       \n" \
+  "lea        0x10(%[y_buf]),%[y_buf]                         \n" \
+  "movdqu     (%[a_buf]),%%xmm5                               \n" \
+  "psraw      $2,%%xmm5                                       \n" \
+  "packuswb   %%xmm5,%%xmm5                                   \n" \
+  "lea        0x10(%[a_buf]),%[a_buf]                         \n"
+
+// Read 8 UV from 444 10 bit
+#define READYUV410                                                \
+  "movdqu     (%[u_buf]),%%xmm0                               \n" \
+  "movdqu     0x00(%[u_buf],%[v_buf],1),%%xmm2                \n" \
+  "lea        0x10(%[u_buf]),%[u_buf]                         \n" \
+  "psraw      $2,%%xmm0                                       \n" \
+  "psraw      $2,%%xmm2                                       \n" \
+  "movdqa     %%xmm0,%%xmm1                                   \n" \
+  "punpcklwd  %%xmm2,%%xmm0                                   \n" \
+  "punpckhwd  %%xmm2,%%xmm1                                   \n" \
+  "packuswb   %%xmm1,%%xmm0                                   \n" \
+  "movdqu     (%[y_buf]),%%xmm4                               \n" \
+  "psllw      $6,%%xmm4                                       \n" \
+  "lea        0x10(%[y_buf]),%[y_buf]                         \n"
+
+// Read 8 UV from 444 10 bit.  With 8 Alpha.
+#define READYUVA410                                               \
+  "movdqu     (%[u_buf]),%%xmm0                               \n" \
+  "movdqu     0x00(%[u_buf],%[v_buf],1),%%xmm2                \n" \
+  "lea        0x10(%[u_buf]),%[u_buf]                         \n" \
+  "psraw      $2,%%xmm0                                       \n" \
+  "psraw      $2,%%xmm2                                       \n" \
+  "movdqa     %%xmm0,%%xmm1                                   \n" \
+  "punpcklwd  %%xmm2,%%xmm0                                   \n" \
+  "punpckhwd  %%xmm2,%%xmm1                                   \n" \
+  "packuswb   %%xmm1,%%xmm0                                   \n" \
+  "movdqu     (%[y_buf]),%%xmm4                               \n" \
+  "psllw      $0x6,%%xmm4                                     \n" \
+  "lea        0x10(%[y_buf]),%[y_buf]                         \n" \
+  "movdqu     (%[a_buf]),%%xmm5                               \n" \
+  "psraw      $2,%%xmm5                                       \n" \
+  "packuswb   %%xmm5,%%xmm5                                   \n" \
+  "lea        0x10(%[a_buf]),%[a_buf]                         \n"
+
 // Read 4 UV from 422, upsample to 8 UV.  With 8 Alpha.
 #define READYUVA422                                               \
  "movd       (%[u_buf]),%%xmm0                               \n" \
@ -2010,6 +2060,27 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba0,
  "pshufb     %[kShuffleUYVYUV], %%xmm0                       \n" \
  "lea        0x10(%[uyvy_buf]),%[uyvy_buf]                   \n"

+// Read 4 UV from P210, upsample to 8 UV
+#define READP210                                                  \
+  "movdqu     (%[uv_buf]),%%xmm0                              \n" \
+  "lea        0x10(%[uv_buf]),%[uv_buf]                       \n" \
+  "psrlw      $0x8,%%xmm0                                     \n" \
+  "packuswb   %%xmm0,%%xmm0                                   \n" \
+  "punpcklwd  %%xmm0,%%xmm0                                   \n" \
+  "movdqu     (%[y_buf]),%%xmm4                               \n" \
+  "lea        0x10(%[y_buf]),%[y_buf]                         \n"
+
+// Read 8 UV from P410
+#define READP410                                                  \
+  "movdqu     (%[uv_buf]),%%xmm0                              \n" \
+  "movdqu     0x10(%[uv_buf]),%%xmm1                          \n" \
+  "lea        0x20(%[uv_buf]),%[uv_buf]                       \n" \
+  "psrlw      $0x8,%%xmm0                                     \n" \
+  "psrlw      $0x8,%%xmm1                                     \n" \
+  "packuswb   %%xmm1,%%xmm0                                   \n" \
+  "movdqu     (%[y_buf]),%%xmm4                               \n" \
+  "lea        0x10(%[y_buf]),%[y_buf]                         \n"
+
 #if defined(__x86_64__)
 #define YUVTORGB_SETUP(yuvconstants)                              \
  "movdqa     (%[yuvconstants]),%%xmm8                        \n" \
@ -2362,6 +2433,146 @@ void OMITFP I210ToAR30Row_SSSE3(const uint16_t* y_buf,
  );
 }

+// 10 bit YUV to ARGB
+void OMITFP I410ToARGBRow_SSSE3(const uint16_t* y_buf,
+                                const uint16_t* u_buf,
+                                const uint16_t* v_buf,
+                                uint8_t* dst_argb,
+                                const struct YuvConstants* yuvconstants,
+                                int width) {
+  asm volatile (
+
+    YUVTORGB_SETUP(yuvconstants)
+    "sub         %[u_buf],%[v_buf]             \n"
+    "pcmpeqb     %%xmm5,%%xmm5                 \n"
+
+    LABELALIGN
+    "1:                                        \n"
+    READYUV410
+    YUVTORGB(yuvconstants)
+    STOREARGB
+    "sub         $0x8,%[width]                 \n"
+    "jg          1b                            \n"
+  : [y_buf]"+r"(y_buf),    // %[y_buf]
+    [u_buf]"+r"(u_buf),    // %[u_buf]
+    [v_buf]"+r"(v_buf),    // %[v_buf]
+    [dst_argb]"+r"(dst_argb),  // %[dst_argb]
+    [width]"+rm"(width)    // %[width]
+  : [yuvconstants]"r"(yuvconstants)  // %[yuvconstants]
+  : "memory", "cc", YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+  );
+}
+
+#ifdef HAS_I210ALPHATOARGBROW_SSSE3
+// 10 bit YUVA to ARGB
+void OMITFP I210AlphaToARGBRow_SSSE3(const uint16_t* y_buf,
+                                     const uint16_t* u_buf,
+                                     const uint16_t* v_buf,
+                                     const uint16_t* a_buf,
+                                     uint8_t* dst_argb,
+                                     const struct YuvConstants* yuvconstants,
+                                     int width) {
+  asm volatile(
+
+    YUVTORGB_SETUP(yuvconstants)
+    "sub         %[u_buf],%[v_buf]             \n"
+
+    LABELALIGN
+    "1:                                        \n"
+    READYUVA210
+    YUVTORGB(yuvconstants)
+    STOREARGB
+    "subl        $0x8,%[width]                 \n"
+    "jg          1b                            \n"
+  : [y_buf] "+r"(y_buf),  // %[y_buf]
+    [u_buf] "+r"(u_buf),  // %[u_buf]
+    [v_buf] "+r"(v_buf),  // %[v_buf]
+    [a_buf] "+r"(a_buf),
+    [dst_argb] "+r"(dst_argb),         // %[dst_argb]
+#if defined(__i386__)
+    [width]"+m"(width)     // %[width]
+#else
+    [width]"+rm"(width)    // %[width]
+#endif
+  : [yuvconstants] "r"(yuvconstants)  // %[yuvconstants]
+  : "memory", "cc", YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+  );
+}
+#endif
+
+#ifdef HAS_I410ALPHATOARGBROW_SSSE3
+// 10 bit YUVA to ARGB
+void OMITFP I410AlphaToARGBRow_SSSE3(const uint16_t* y_buf,
+                                     const uint16_t* u_buf,
+                                     const uint16_t* v_buf,
+                                     const uint16_t* a_buf,
+                                     uint8_t* dst_argb,
+                                     const struct YuvConstants* yuvconstants,
+                                     int width) {
+  asm volatile(
+
+    YUVTORGB_SETUP(yuvconstants)
+    "sub         %[u_buf],%[v_buf]             \n"
+
+    LABELALIGN
+    "1:                                        \n"
+    READYUVA410
+    YUVTORGB(yuvconstants)
+    STOREARGB
+    "subl        $0x8,%[width]                 \n"
+    "jg          1b                            \n"
+  : [y_buf] "+r"(y_buf),  // %[y_buf]
+    [u_buf] "+r"(u_buf),  // %[u_buf]
+    [v_buf] "+r"(v_buf),  // %[v_buf]
+    [a_buf] "+r"(a_buf),
+    [dst_argb] "+r"(dst_argb),         // %[dst_argb]
+#if defined(__i386__)
+    [width]"+m"(width)     // %[width]
+#else
+    [width]"+rm"(width)    // %[width]
+#endif
+  : [yuvconstants] "r"(yuvconstants)   // %[yuvconstants]
+  : "memory", "cc", YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+  );
+}
+#endif
+
+// 10 bit YUV to AR30
+void OMITFP I410ToAR30Row_SSSE3(const uint16_t* y_buf,
+                                const uint16_t* u_buf,
+                                const uint16_t* v_buf,
+                                uint8_t* dst_ar30,
+                                const struct YuvConstants* yuvconstants,
+                                int width) {
+  asm volatile (
+
+    YUVTORGB_SETUP(yuvconstants)
+    "sub         %[u_buf],%[v_buf]             \n"
+    "pcmpeqb     %%xmm5,%%xmm5                 \n"
+    "psrlw       $14,%%xmm5                    \n"
+    "psllw       $4,%%xmm5                     \n"  // 2 alpha bits
+    "pxor        %%xmm6,%%xmm6                 \n"
+    "pcmpeqb     %%xmm7,%%xmm7                 \n"  // 0 for min
+    "psrlw       $6,%%xmm7                     \n"  // 1023 for max
+
+    LABELALIGN
+    "1:                                        \n"
+    READYUV410
+    YUVTORGB16(yuvconstants)
+    STOREAR30
+    "sub         $0x8,%[width]                 \n"
+    "jg          1b                            \n"
+  : [y_buf]"+r"(y_buf),    // %[y_buf]
+    [u_buf]"+r"(u_buf),    // %[u_buf]
+    [v_buf]"+r"(v_buf),    // %[v_buf]
+    [dst_ar30]"+r"(dst_ar30),  // %[dst_ar30]
+    [width]"+rm"(width)    // %[width]
+  : [yuvconstants]"r"(yuvconstants)   // %[yuvconstants]
+  : "memory", "cc", YUVTORGB_REGS
+      "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
+  );
+}
+
 #ifdef HAS_I422ALPHATOARGBROW_SSSE3
 void OMITFP I422AlphaToARGBRow_SSSE3(const uint8_t* y_buf,
                                     const uint8_t* u_buf,
@ -2513,6 +2724,123 @@ void OMITFP UYVYToARGBRow_SSSE3(const uint8_t* uyvy_buf,
  // clang-format on
 }

+void OMITFP P210ToARGBRow_SSSE3(const uint16_t* y_buf,
+                                const uint16_t* uv_buf,
+                                uint8_t* dst_argb,
+                                const struct YuvConstants* yuvconstants,
+                                int width) {
+  asm volatile (
+
+    YUVTORGB_SETUP(yuvconstants)
+    "pcmpeqb     %%xmm5,%%xmm5                 \n"
+
+    LABELALIGN
+    "1:                                        \n"
+    READP210
+    YUVTORGB(yuvconstants)
+    STOREARGB
+    "sub         $0x8,%[width]                 \n"
+    "jg          1b                            \n"
+    : [y_buf] "+r"(y_buf),              // %[y_buf]
+      [uv_buf] "+r"(uv_buf),            // %[u_buf]
+      [dst_argb] "+r"(dst_argb),        // %[dst_argb]
+      [width] "+rm"(width)              // %[width]
+    : [yuvconstants] "r"(yuvconstants)  // %[yuvconstants]
+    : "memory", "cc", YUVTORGB_REGS
+    "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+  );
+}
+
+void OMITFP P410ToARGBRow_SSSE3(const uint16_t* y_buf,
+                                const uint16_t* uv_buf,
+                                uint8_t* dst_argb,
+                                const struct YuvConstants* yuvconstants,
+                                int width) {
+  asm volatile (
+
+    YUVTORGB_SETUP(yuvconstants)
+    "pcmpeqb     %%xmm5,%%xmm5                 \n"
+
+    LABELALIGN
+    "1:                                        \n"
+    READP410
+    YUVTORGB(yuvconstants)
+    STOREARGB
+    "sub         $0x8,%[width]                 \n"
+    "jg          1b                            \n"
+    : [y_buf] "+r"(y_buf),              // %[y_buf]
+      [uv_buf] "+r"(uv_buf),            // %[u_buf]
+      [dst_argb] "+r"(dst_argb),        // %[dst_argb]
+      [width] "+rm"(width)              // %[width]
+    : [yuvconstants] "r"(yuvconstants)  // %[yuvconstants]
+    : "memory", "cc", YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4",
+      "xmm5");
+}
+
+void OMITFP P210ToAR30Row_SSSE3(const uint16_t* y_buf,
+                                const uint16_t* uv_buf,
+                                uint8_t* dst_ar30,
+                                const struct YuvConstants* yuvconstants,
+                                int width) {
+  asm volatile (
+
+    YUVTORGB_SETUP(yuvconstants)
+    "pcmpeqb     %%xmm5,%%xmm5                 \n"
+    "psrlw       $14,%%xmm5                    \n"
+    "psllw       $4,%%xmm5                     \n"  // 2 alpha bits
+    "pxor        %%xmm6,%%xmm6                 \n"
+    "pcmpeqb     %%xmm7,%%xmm7                 \n"  // 0 for min
+    "psrlw       $6,%%xmm7                     \n"  // 1023 for max
+
+    LABELALIGN
+    "1:                                        \n"
+    READP210
+    YUVTORGB16(yuvconstants)
+    STOREAR30
+    "sub         $0x8,%[width]                 \n"
+    "jg          1b                            \n"
+  : [y_buf]"+r"(y_buf),              // %[y_buf]
+    [uv_buf]"+r"(uv_buf),            // %[uv_buf]
+    [dst_ar30]"+r"(dst_ar30),        // %[dst_ar30]
+    [width]"+rm"(width)              // %[width]
+  : [yuvconstants]"r"(yuvconstants)  // %[yuvconstants]
+  : "memory", "cc", YUVTORGB_REGS
+      "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
+  );
+}
+
+void OMITFP P410ToAR30Row_SSSE3(const uint16_t* y_buf,
+                                const uint16_t* uv_buf,
+                                uint8_t* dst_ar30,
+                                const struct YuvConstants* yuvconstants,
+                                int width) {
+  asm volatile (
+
+    YUVTORGB_SETUP(yuvconstants)
+    "pcmpeqb     %%xmm5,%%xmm5                 \n"
+    "psrlw       $14,%%xmm5                    \n"
+    "psllw       $4,%%xmm5                     \n"  // 2 alpha bits
+    "pxor        %%xmm6,%%xmm6                 \n"
+    "pcmpeqb     %%xmm7,%%xmm7                 \n"  // 0 for min
+    "psrlw       $6,%%xmm7                     \n"  // 1023 for max
+
+    LABELALIGN
+    "1:                                        \n"
+    READP410
+    YUVTORGB16(yuvconstants)
+    STOREAR30
+    "sub         $0x8,%[width]                 \n"
+    "jg          1b                            \n"
+  : [y_buf]"+r"(y_buf),              // %[y_buf]
+    [uv_buf]"+r"(uv_buf),            // %[uv_buf]
+    [dst_ar30]"+r"(dst_ar30),        // %[dst_ar30]
+    [width]"+rm"(width)              // %[width]
+  : [yuvconstants]"r"(yuvconstants)  // %[yuvconstants]
+  : "memory", "cc", YUVTORGB_REGS
+      "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
+  );
+}
+
 void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf,
                                const uint8_t* u_buf,
                                const uint8_t* v_buf,
@ -2570,7 +2898,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf,
  "vpunpcklbw %%ymm4,%%ymm4,%%ymm4                                \n" \
  "lea        0x10(%[y_buf]),%[y_buf]                             \n"

-// Read 8 UV from 210 10 bit, upsample to 16 UV
+// Read 8 UV from 210, upsample to 16 UV
 // TODO(fbarchard): Consider vshufb to replace pack/unpack
 // TODO(fbarchard): Consider vunpcklpd to combine the 2 registers into 1.
 #define READYUV210_AVX2                                            \
@ -2580,13 +2908,64 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf,
  "vpermq     $0xd8,%%ymm0,%%ymm0                              \n" \
  "vpermq     $0xd8,%%ymm1,%%ymm1                              \n" \
  "vpunpcklwd %%ymm1,%%ymm0,%%ymm0                             \n" \
-  "vpsraw     $0x2,%%ymm0,%%ymm0                               \n" \
+  "vpsraw     $2,%%ymm0,%%ymm0                                 \n" \
  "vpackuswb  %%ymm0,%%ymm0,%%ymm0                             \n" \
  "vpunpcklwd %%ymm0,%%ymm0,%%ymm0                             \n" \
  "vmovdqu    (%[y_buf]),%%ymm4                                \n" \
-  "vpsllw     $0x6,%%ymm4,%%ymm4                               \n" \
+  "vpsllw     $6,%%ymm4,%%ymm4                                 \n" \
  "lea        0x20(%[y_buf]),%[y_buf]                          \n"

+// Read 8 UV from 210, upsample to 16 UV. With 16 Alpha.
+#define READYUVA210_AVX2                                           \
+  "vmovdqu    (%[u_buf]),%%xmm0                                \n" \
+  "vmovdqu    0x00(%[u_buf],%[v_buf],1),%%xmm1                 \n" \
+  "lea        0x10(%[u_buf]),%[u_buf]                          \n" \
+  "vpermq     $0xd8,%%ymm0,%%ymm0                              \n" \
+  "vpermq     $0xd8,%%ymm1,%%ymm1                              \n" \
+  "vpunpcklwd %%ymm1,%%ymm0,%%ymm0                             \n" \
+  "vpsraw     $2,%%ymm0,%%ymm0                                 \n" \
+  "vpackuswb  %%ymm0,%%ymm0,%%ymm0                             \n" \
+  "vpunpcklwd %%ymm0,%%ymm0,%%ymm0                             \n" \
+  "vmovdqu    (%[y_buf]),%%ymm4                                \n" \
+  "vpsllw     $6,%%ymm4,%%ymm4                                 \n" \
+  "lea        0x20(%[y_buf]),%[y_buf]                          \n" \
+  "vmovdqu    (%[a_buf]),%%ymm5                                \n" \
+  "vpsraw     $2,%%ymm5,%%ymm5                                 \n" \
+  "vpackuswb  %%ymm5,%%ymm5,%%ymm5                             \n" \
+  "lea        0x20(%[a_buf]),%[a_buf]                          \n"
+
+// Read 16 UV from 410
+#define READYUV410_AVX2                                            \
+  "vmovdqu    (%[u_buf]),%%ymm0                                \n" \
+  "vmovdqu    0x00(%[u_buf],%[v_buf],1),%%ymm2                 \n" \
+  "lea        0x20(%[u_buf]),%[u_buf]                          \n" \
+  "vpsraw     $2,%%ymm0,%%ymm0                                 \n" \
+  "vpsraw     $2,%%ymm2,%%ymm2                                 \n" \
+  "vpunpckhwd %%ymm2,%%ymm0,%%ymm1                             \n" \
+  "vpunpcklwd %%ymm2,%%ymm0,%%ymm0                             \n" \
+  "vpackuswb  %%ymm1,%%ymm0,%%ymm0                             \n" \
+  "vmovdqu    (%[y_buf]),%%ymm4                                \n" \
+  "vpsllw     $6,%%ymm4,%%ymm4                                 \n" \
+  "lea        0x20(%[y_buf]),%[y_buf]                          \n"
+
+// Read 16 UV from 410. With 16 Alpha.
+#define READYUVA410_AVX2                                           \
+  "vmovdqu    (%[u_buf]),%%ymm0                                \n" \
+  "vmovdqu    0x00(%[u_buf],%[v_buf],1),%%ymm2                 \n" \
+  "lea        0x20(%[u_buf]),%[u_buf]                          \n" \
+  "vpsraw     $2,%%ymm0,%%ymm0                                 \n" \
+  "vpsraw     $2,%%ymm2,%%ymm2                                 \n" \
+  "vpunpckhwd %%ymm2,%%ymm0,%%ymm1                             \n" \
+  "vpunpcklwd %%ymm2,%%ymm0,%%ymm0                             \n" \
+  "vpackuswb  %%ymm1,%%ymm0,%%ymm0                             \n" \
+  "vmovdqu    (%[y_buf]),%%ymm4                                \n" \
+  "vpsllw     $6,%%ymm4,%%ymm4                                 \n" \
+  "lea        0x20(%[y_buf]),%[y_buf]                          \n" \
+  "vmovdqu    (%[a_buf]),%%ymm5                                \n" \
+  "vpsraw     $2,%%ymm5,%%ymm5                                 \n" \
+  "vpackuswb  %%ymm5,%%ymm5,%%ymm5                             \n" \
+  "lea        0x20(%[a_buf]),%[a_buf]                          \n"
+
 // Read 16 UV from 444.  With 16 Alpha.
 #define READYUVA444_AVX2                                              \
  "vmovdqu    (%[u_buf]),%%xmm0                                   \n" \
@ -2641,6 +3020,28 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf,
  "vpunpcklbw %%ymm4,%%ymm4,%%ymm4                                \n" \
  "lea        0x10(%[y_buf]),%[y_buf]                             \n"

+// Read 4 UV from P210, upsample to 8 UV
+#define READP210_AVX2                                                 \
+  "vmovdqu    (%[uv_buf]),%%ymm0                                  \n" \
+  "lea        0x20(%[uv_buf]),%[uv_buf]                           \n" \
+  "vpsrlw     $0x8,%%ymm0,%%ymm0                                  \n" \
+  "vpackuswb  %%ymm0,%%ymm0,%%ymm0                                \n" \
+  "vpunpcklwd %%ymm0,%%ymm0,%%ymm0                                \n" \
+  "vmovdqu    (%[y_buf]),%%ymm4                                   \n" \
+  "lea        0x20(%[y_buf]),%[y_buf]                             \n"
+
+// Read 8 UV from P410
+#define READP410_AVX2                                                 \
+  "vmovdqu    (%[uv_buf]),%%ymm0                                  \n" \
+  "vmovdqu    0x20(%[uv_buf]),%%ymm1                              \n" \
+  "lea        0x40(%[uv_buf]),%[uv_buf]                           \n" \
+  "vpsrlw     $0x8,%%ymm0,%%ymm0                                  \n" \
+  "vpsrlw     $0x8,%%ymm1,%%ymm1                                  \n" \
+  "vpackuswb  %%ymm1,%%ymm0,%%ymm0                                \n" \
+  "vpermq     $0xd8,%%ymm0,%%ymm0                                 \n" \
+  "vmovdqu    (%[y_buf]),%%ymm4                                   \n" \
+  "lea        0x20(%[y_buf]),%[y_buf]                             \n"
+
 // Read 8 YUY2 with 16 Y and upsample 8 UV to 16 UV.
 #define READYUY2_AVX2                                                 \
  "vmovdqu    (%[yuy2_buf]),%%ymm4                                \n" \
@ -2934,6 +3335,165 @@ void OMITFP I210ToAR30Row_AVX2(const uint16_t* y_buf,
 }
 #endif  // HAS_I210TOAR30ROW_AVX2

+#if defined(HAS_I410TOARGBROW_AVX2)
+// 16 pixels
+// 16 UV values with 16 Y producing 16 ARGB (64 bytes).
+void OMITFP I410ToARGBRow_AVX2(const uint16_t* y_buf,
+                               const uint16_t* u_buf,
+                               const uint16_t* v_buf,
+                               uint8_t* dst_argb,
+                               const struct YuvConstants* yuvconstants,
+                               int width) {
+  asm volatile (
+
+    YUVTORGB_SETUP_AVX2(yuvconstants)
+    "sub         %[u_buf],%[v_buf]             \n"
+    "vpcmpeqb    %%ymm5,%%ymm5,%%ymm5          \n"
+
+    LABELALIGN
+    "1:                                        \n"
+    READYUV410_AVX2
+    YUVTORGB_AVX2(yuvconstants)
+    STOREARGB_AVX2
+    "sub         $0x10,%[width]                \n"
+    "jg          1b                            \n"
+    "vzeroupper                                \n"
+
+  : [y_buf]"+r"(y_buf),    // %[y_buf]
+    [u_buf]"+r"(u_buf),    // %[u_buf]
+    [v_buf]"+r"(v_buf),    // %[v_buf]
+    [dst_argb]"+r"(dst_argb),  // %[dst_argb]
+    [width]"+rm"(width)    // %[width]
+  : [yuvconstants]"r"(yuvconstants)  // %[yuvconstants]
+  : "memory", "cc", YUVTORGB_REGS_AVX2
+      "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+  );
+}
+#endif  // HAS_I410TOARGBROW_AVX2
+
+#if defined(HAS_I210ALPHATOARGBROW_AVX2)
+// 16 pixels
+// 8 UV, 16 Y and 16 A producing 16 ARGB (64 bytes).
+void OMITFP I210AlphaToARGBRow_AVX2(const uint16_t* y_buf,
+                                    const uint16_t* u_buf,
+                                    const uint16_t* v_buf,
+                                    const uint16_t* a_buf,
+                                    uint8_t* dst_argb,
+                                    const struct YuvConstants* yuvconstants,
+                                    int width) {
+  asm volatile(
+
+    YUVTORGB_SETUP_AVX2(yuvconstants)
+    "sub         %[u_buf],%[v_buf]             \n"
+
+    LABELALIGN
+    "1:                                        \n"
+    READYUVA210_AVX2
+    YUVTORGB_AVX2(yuvconstants)
+    STOREARGB_AVX2
+    "subl        $0x10,%[width]                \n"
+    "jg          1b                            \n"
+    "vzeroupper                                \n"
+
+  : [y_buf] "+r"(y_buf),               // %[y_buf]
+    [u_buf] "+r"(u_buf),               // %[u_buf]
+    [v_buf] "+r"(v_buf),               // %[v_buf]
+    [a_buf] "+r"(a_buf),               // %[a_buf]
+    [dst_argb] "+r"(dst_argb),         // %[dst_argb]
+#if defined(__i386__)
+    [width]"+m"(width)     // %[width]
+#else
+    [width]"+rm"(width)    // %[width]
+#endif
+  : [yuvconstants] "r"(yuvconstants)   // %[yuvconstants]
+  : "memory", "cc", YUVTORGB_REGS_AVX2 "xmm0", "xmm1", "xmm2", "xmm3",
+      "xmm4", "xmm5"
+  );
+}
+#endif  // HAS_I210TOARGBROW_AVX2
+
+#if defined(HAS_I410ALPHATOARGBROW_AVX2)
+// 16 pixels
+// 16 UV, 16 Y and 16 A producing 16 ARGB (64 bytes).
+void OMITFP I410AlphaToARGBRow_AVX2(const uint16_t* y_buf,
+                                    const uint16_t* u_buf,
+                                    const uint16_t* v_buf,
+                                    const uint16_t* a_buf,
+                                    uint8_t* dst_argb,
+                                    const struct YuvConstants* yuvconstants,
+                                    int width) {
+  asm volatile(
+
+    YUVTORGB_SETUP_AVX2(yuvconstants)
+    "sub         %[u_buf],%[v_buf]             \n"
+
+    LABELALIGN
+    "1:                                        \n"
+    READYUVA410_AVX2
+    YUVTORGB_AVX2(yuvconstants)
+    STOREARGB_AVX2
+    "subl        $0x10,%[width]                \n"
+    "jg          1b                            \n"
+    "vzeroupper                                \n"
+
+  : [y_buf] "+r"(y_buf),               // %[y_buf]
+    [u_buf] "+r"(u_buf),               // %[u_buf]
+    [v_buf] "+r"(v_buf),               // %[v_buf]
+    [a_buf] "+r"(a_buf),               // %[a_buf]
+    [dst_argb] "+r"(dst_argb),         // %[dst_argb]
+#if defined(__i386__)
+    [width]"+m"(width)     // %[width]
+#else
+    [width]"+rm"(width)    // %[width]
+#endif
+  : [yuvconstants] "r"(yuvconstants)   // %[yuvconstants]
+  : "memory", "cc", YUVTORGB_REGS_AVX2 "xmm0", "xmm1", "xmm2", "xmm3",
+    "xmm4", "xmm5"
+  );
+}
+#endif  // HAS_I410TOARGBROW_AVX2
+
+#if defined(HAS_I410TOAR30ROW_AVX2)
+// 16 pixels
+// 16 UV values with 16 Y producing 16 AR30 (64 bytes).
+void OMITFP I410ToAR30Row_AVX2(const uint16_t* y_buf,
+                               const uint16_t* u_buf,
+                               const uint16_t* v_buf,
+                               uint8_t* dst_ar30,
+                               const struct YuvConstants* yuvconstants,
+                               int width) {
+  asm volatile (
+
+    YUVTORGB_SETUP_AVX2(yuvconstants)
+    "sub         %[u_buf],%[v_buf]             \n"
+    "vpcmpeqb    %%ymm5,%%ymm5,%%ymm5          \n"  // AR30 constants
+    "vpsrlw      $14,%%ymm5,%%ymm5             \n"
+    "vpsllw      $4,%%ymm5,%%ymm5              \n"  // 2 alpha bits
+    "vpxor       %%ymm6,%%ymm6,%%ymm6          \n"  // 0 for min
+    "vpcmpeqb    %%ymm7,%%ymm7,%%ymm7          \n"  // 1023 for max
+    "vpsrlw      $6,%%ymm7,%%ymm7              \n"
+
+    LABELALIGN
+    "1:                                        \n"
+    READYUV410_AVX2
+    YUVTORGB16_AVX2(yuvconstants)
+    STOREAR30_AVX2
+    "sub         $0x10,%[width]                \n"
+    "jg          1b                            \n"
+
+    "vzeroupper                                \n"
+  : [y_buf]"+r"(y_buf),    // %[y_buf]
+    [u_buf]"+r"(u_buf),    // %[u_buf]
+    [v_buf]"+r"(v_buf),    // %[v_buf]
+    [dst_ar30]"+r"(dst_ar30),  // %[dst_ar30]
+    [width]"+rm"(width)    // %[width]
+  : [yuvconstants]"r"(yuvconstants)  // %[yuvconstants]
+  : "memory", "cc", YUVTORGB_REGS_AVX2
+      "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+  );
+}
+#endif  // HAS_I410TOAR30ROW_AVX2
+
 #if defined(HAS_I444ALPHATOARGBROW_AVX2)
 // 16 pixels
 // 16 UV values with 16 Y and 16 A producing 16 ARGB.
@ -3193,6 +3753,148 @@ void OMITFP UYVYToARGBRow_AVX2(const uint8_t* uyvy_buf,
 }
 #endif  // HAS_UYVYTOARGBROW_AVX2

+#if defined(HAS_P210TOARGBROW_AVX2)
+// 16 pixels.
+// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
+void OMITFP P210ToARGBRow_AVX2(const uint16_t* y_buf,
+                               const uint16_t* uv_buf,
+                               uint8_t* dst_argb,
+                               const struct YuvConstants* yuvconstants,
+                               int width) {
+  // clang-format off
+  asm volatile (
+    YUVTORGB_SETUP_AVX2(yuvconstants)
+    "vpcmpeqb    %%ymm5,%%ymm5,%%ymm5          \n"
+
+    LABELALIGN
+    "1:                                        \n"
+    READP210_AVX2
+    YUVTORGB_AVX2(yuvconstants)
+    STOREARGB_AVX2
+    "sub         $0x10,%[width]                \n"
+    "jg          1b                            \n"
+    "vzeroupper                                \n"
+  : [y_buf]"+r"(y_buf),    // %[y_buf]
+    [uv_buf]"+r"(uv_buf),    // %[uv_buf]
+    [dst_argb]"+r"(dst_argb),  // %[dst_argb]
+    [width]"+rm"(width)    // %[width]
+  : [yuvconstants]"r"(yuvconstants)  // %[yuvconstants]
+  : "memory", "cc", YUVTORGB_REGS_AVX2
+      "xmm0", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+  );
+  // clang-format on
+}
+#endif  // HAS_P210TOARGBROW_AVX2
+
+#if defined(HAS_P410TOARGBROW_AVX2)
+// 16 pixels.
+// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
+void OMITFP P410ToARGBRow_AVX2(const uint16_t* y_buf,
+                               const uint16_t* uv_buf,
+                               uint8_t* dst_argb,
+                               const struct YuvConstants* yuvconstants,
+                               int width) {
+  // clang-format off
+  asm volatile (
+    YUVTORGB_SETUP_AVX2(yuvconstants)
+    "vpcmpeqb    %%ymm5,%%ymm5,%%ymm5          \n"
+
+    LABELALIGN
+    "1:                                        \n"
+    READP410_AVX2
+    YUVTORGB_AVX2(yuvconstants)
+    STOREARGB_AVX2
+    "sub         $0x10,%[width]                \n"
+    "jg          1b                            \n"
+    "vzeroupper                                \n"
+  : [y_buf]"+r"(y_buf),    // %[y_buf]
+    [uv_buf]"+r"(uv_buf),    // %[uv_buf]
+    [dst_argb]"+r"(dst_argb),  // %[dst_argb]
+    [width]"+rm"(width)    // %[width]
+  : [yuvconstants]"r"(yuvconstants)  // %[yuvconstants]
+  : "memory", "cc", YUVTORGB_REGS_AVX2
+      "xmm0", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+  );
+  // clang-format on
+}
+#endif  // HAS_P410TOARGBROW_AVX2
+
+#if defined(HAS_P210TOAR30ROW_AVX2)
+// 16 pixels
+// 16 UV values with 16 Y producing 16 AR30 (64 bytes).
+void OMITFP P210ToAR30Row_AVX2(const uint16_t* y_buf,
+                               const uint16_t* uv_buf,
+                               uint8_t* dst_ar30,
+                               const struct YuvConstants* yuvconstants,
+                               int width) {
+  asm volatile (
+
+    YUVTORGB_SETUP_AVX2(yuvconstants)
+    "vpcmpeqb    %%ymm5,%%ymm5,%%ymm5          \n"  // AR30 constants
+    "vpsrlw      $14,%%ymm5,%%ymm5             \n"
+    "vpsllw      $4,%%ymm5,%%ymm5              \n"  // 2 alpha bits
+    "vpxor       %%ymm6,%%ymm6,%%ymm6          \n"  // 0 for min
+    "vpcmpeqb    %%ymm7,%%ymm7,%%ymm7          \n"  // 1023 for max
+    "vpsrlw      $6,%%ymm7,%%ymm7              \n"
+
+    LABELALIGN
+    "1:                                        \n"
+    READP210_AVX2
+    YUVTORGB16_AVX2(yuvconstants)
+    STOREAR30_AVX2
+    "sub         $0x10,%[width]                \n"
+    "jg          1b                            \n"
+
+    "vzeroupper                                \n"
+  : [y_buf]"+r"(y_buf),    // %[y_buf]
+    [uv_buf]"+r"(uv_buf),    // %[uv_buf]
+    [dst_ar30]"+r"(dst_ar30),  // %[dst_ar30]
+    [width]"+rm"(width)    // %[width]
+  : [yuvconstants]"r"(yuvconstants)  // %[yuvconstants]
+  : "memory", "cc", YUVTORGB_REGS_AVX2
+      "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
+  );
+}
+#endif  // HAS_P210TOAR30ROW_AVX2
+
+#if defined(HAS_P410TOAR30ROW_AVX2)
+// 16 pixels
+// 16 UV values with 16 Y producing 16 AR30 (64 bytes).
+void OMITFP P410ToAR30Row_AVX2(const uint16_t* y_buf,
+                               const uint16_t* uv_buf,
+                               uint8_t* dst_ar30,
+                               const struct YuvConstants* yuvconstants,
+                               int width) {
+  asm volatile (
+
+    YUVTORGB_SETUP_AVX2(yuvconstants)
+    "vpcmpeqb    %%ymm5,%%ymm5,%%ymm5          \n"  // AR30 constants
+    "vpsrlw      $14,%%ymm5,%%ymm5             \n"
+    "vpsllw      $4,%%ymm5,%%ymm5              \n"  // 2 alpha bits
+    "vpxor       %%ymm6,%%ymm6,%%ymm6          \n"  // 0 for min
+    "vpcmpeqb    %%ymm7,%%ymm7,%%ymm7          \n"  // 1023 for max
+    "vpsrlw      $6,%%ymm7,%%ymm7              \n"
+
+    LABELALIGN
+    "1:                                        \n"
+    READP410_AVX2
+    YUVTORGB16_AVX2(yuvconstants)
+    STOREAR30_AVX2
+    "sub         $0x10,%[width]                \n"
+    "jg          1b                            \n"
+
+    "vzeroupper                                \n"
+  : [y_buf]"+r"(y_buf),    // %[y_buf]
+    [uv_buf]"+r"(uv_buf),    // %[uv_buf]
+    [dst_ar30]"+r"(dst_ar30),  // %[dst_ar30]
+    [width]"+rm"(width)    // %[width]
+  : [yuvconstants]"r"(yuvconstants)  // %[yuvconstants]
+  : "memory", "cc", YUVTORGB_REGS_AVX2
+      "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
+  );
+}
+#endif  // HAS_P410TOAR30ROW_AVX2
+
 #ifdef HAS_I400TOARGBROW_SSE2
 void I400ToARGBRow_SSE2(const uint8_t* y_buf,
                        uint8_t* dst_argb,
--- a/unit_test/convert_test.cc
+++ b/unit_test/convert_test.cc
@ -55,13 +55,13 @@ namespace libyuv {
    static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported");       \
    static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported");       \
    static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2,                   \
-                  "SRC_SUBSAMP_X unsupported");                           \
+                  "SRC_SUBSAMP_X unsupported");                               \
    static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2,                   \
-                  "SRC_SUBSAMP_Y unsupported");                           \
+                  "SRC_SUBSAMP_Y unsupported");                               \
    static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2,                   \
-                  "DST_SUBSAMP_X unsupported");                           \
+                  "DST_SUBSAMP_X unsupported");                               \
    static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2,                   \
-                  "DST_SUBSAMP_Y unsupported");                           \
+                  "DST_SUBSAMP_Y unsupported");                               \
    const int kWidth = ((W1280) > 0) ? (W1280) : 1;                           \
    const int kHeight = benchmark_height_;                                    \
    const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X);               \
@ -928,6 +928,8 @@ TESTQPLANARTOB(J420Alpha, 2, 2, ARGB, 4, 4, 1)
 TESTQPLANARTOB(J420Alpha, 2, 2, ABGR, 4, 4, 1)
 TESTQPLANARTOB(H420Alpha, 2, 2, ARGB, 4, 4, 1)
 TESTQPLANARTOB(H420Alpha, 2, 2, ABGR, 4, 4, 1)
+TESTQPLANARTOB(F420Alpha, 2, 2, ARGB, 4, 4, 1)
+TESTQPLANARTOB(F420Alpha, 2, 2, ABGR, 4, 4, 1)
 TESTQPLANARTOB(U420Alpha, 2, 2, ARGB, 4, 4, 1)
 TESTQPLANARTOB(U420Alpha, 2, 2, ABGR, 4, 4, 1)
 TESTQPLANARTOB(V420Alpha, 2, 2, ARGB, 4, 4, 1)
@ -938,6 +940,8 @@ TESTQPLANARTOB(J422Alpha, 2, 1, ARGB, 4, 4, 1)
 TESTQPLANARTOB(J422Alpha, 2, 1, ABGR, 4, 4, 1)
 TESTQPLANARTOB(H422Alpha, 2, 1, ARGB, 4, 4, 1)
 TESTQPLANARTOB(H422Alpha, 2, 1, ABGR, 4, 4, 1)
+TESTQPLANARTOB(F422Alpha, 2, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(F422Alpha, 2, 1, ABGR, 4, 4, 1)
 TESTQPLANARTOB(U422Alpha, 2, 1, ARGB, 4, 4, 1)
 TESTQPLANARTOB(U422Alpha, 2, 1, ABGR, 4, 4, 1)
 TESTQPLANARTOB(V422Alpha, 2, 1, ARGB, 4, 4, 1)
@ -948,6 +952,8 @@ TESTQPLANARTOB(J444Alpha, 1, 1, ARGB, 4, 4, 1)
 TESTQPLANARTOB(J444Alpha, 1, 1, ABGR, 4, 4, 1)
 TESTQPLANARTOB(H444Alpha, 1, 1, ARGB, 4, 4, 1)
 TESTQPLANARTOB(H444Alpha, 1, 1, ABGR, 4, 4, 1)
+TESTQPLANARTOB(F444Alpha, 1, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(F444Alpha, 1, 1, ABGR, 4, 4, 1)
 TESTQPLANARTOB(U444Alpha, 1, 1, ARGB, 4, 4, 1)
 TESTQPLANARTOB(U444Alpha, 1, 1, ABGR, 4, 4, 1)
 TESTQPLANARTOB(V444Alpha, 1, 1, ARGB, 4, 4, 1)
@ -3162,89 +3168,441 @@ TEST_F(LibYUVConvertTest, ABGRToAR30Row_Opt) {
 #endif  // HAS_ABGRTOAR30ROW_AVX2

 // TODO(fbarchard): Fix clamping issue affected by U channel.
-#define TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B,   \
-                         ALIGN, YALIGN, W1280, N, NEG, SOFF, DOFF)         \
-  TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) {                    \
-    const int kWidth = ((W1280) > 0) ? (W1280) : 1;                        \
-    const int kHeight = ALIGNINT(benchmark_height_, YALIGN);               \
-    const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN);                  \
-    const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X);                    \
-    const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y);         \
-    const int kBpc = 2;                                                    \
-    align_buffer_page_end(src_y, kWidth* kHeight* kBpc + SOFF);            \
-    align_buffer_page_end(src_u, kSizeUV* kBpc + SOFF);                    \
-    align_buffer_page_end(src_v, kSizeUV* kBpc + SOFF);                    \
-    align_buffer_page_end(dst_argb_c, kStrideB* kHeight + DOFF);           \
-    align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + DOFF);         \
-    for (int i = 0; i < kWidth * kHeight; ++i) {                           \
-      reinterpret_cast<uint16_t*>(src_y + SOFF)[i] = (fastrand() & 0x3ff); \
-    }                                                                      \
-    for (int i = 0; i < kSizeUV; ++i) {                                    \
-      reinterpret_cast<uint16_t*>(src_u + SOFF)[i] = (fastrand() & 0x3ff); \
-      reinterpret_cast<uint16_t*>(src_v + SOFF)[i] = (fastrand() & 0x3ff); \
-    }                                                                      \
-    memset(dst_argb_c + DOFF, 1, kStrideB * kHeight);                      \
-    memset(dst_argb_opt + DOFF, 101, kStrideB * kHeight);                  \
-    MaskCpuFlags(disable_cpu_flags_);                                      \
-    FMT_PLANAR##To##FMT_B(                                                 \
-        reinterpret_cast<uint16_t*>(src_y + SOFF), kWidth,                 \
-        reinterpret_cast<uint16_t*>(src_u + SOFF), kStrideUV,              \
-        reinterpret_cast<uint16_t*>(src_v + SOFF), kStrideUV,              \
-        dst_argb_c + DOFF, kStrideB, kWidth, NEG kHeight);                 \
-    MaskCpuFlags(benchmark_cpu_info_);                                     \
-    for (int i = 0; i < benchmark_iterations_; ++i) {                      \
-      FMT_PLANAR##To##FMT_B(                                               \
-          reinterpret_cast<uint16_t*>(src_y + SOFF), kWidth,               \
-          reinterpret_cast<uint16_t*>(src_u + SOFF), kStrideUV,            \
-          reinterpret_cast<uint16_t*>(src_v + SOFF), kStrideUV,            \
-          dst_argb_opt + DOFF, kStrideB, kWidth, NEG kHeight);             \
-    }                                                                      \
-    for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) {                   \
-      EXPECT_EQ(dst_argb_c[i + DOFF], dst_argb_opt[i + DOFF]);             \
-    }                                                                      \
-    free_aligned_buffer_page_end(src_y);                                   \
-    free_aligned_buffer_page_end(src_u);                                   \
-    free_aligned_buffer_page_end(src_v);                                   \
-    free_aligned_buffer_page_end(dst_argb_c);                              \
-    free_aligned_buffer_page_end(dst_argb_opt);                            \
+#define TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B,    \
+                         ALIGN, YALIGN, W1280, N, NEG, SOFF, DOFF, S_DEPTH) \
+  TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) {                     \
+    const int kWidth = ((W1280) > 0) ? (W1280) : 1;                         \
+    const int kHeight = ALIGNINT(benchmark_height_, YALIGN);                \
+    const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN);                   \
+    const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X);                     \
+    const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y);          \
+    const int kBpc = 2;                                                     \
+    align_buffer_page_end(src_y, kWidth* kHeight* kBpc + SOFF);             \
+    align_buffer_page_end(src_u, kSizeUV* kBpc + SOFF);                     \
+    align_buffer_page_end(src_v, kSizeUV* kBpc + SOFF);                     \
+    align_buffer_page_end(dst_argb_c, kStrideB* kHeight + DOFF);            \
+    align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + DOFF);          \
+    for (int i = 0; i < kWidth * kHeight; ++i) {                            \
+      reinterpret_cast<uint16_t*>(src_y + SOFF)[i] =                        \
+          (fastrand() & ((1 << S_DEPTH) - 1));                              \
+    }                                                                       \
+    for (int i = 0; i < kSizeUV; ++i) {                                     \
+      reinterpret_cast<uint16_t*>(src_u + SOFF)[i] =                        \
+          (fastrand() & ((1 << S_DEPTH) - 1));                              \
+      reinterpret_cast<uint16_t*>(src_v + SOFF)[i] =                        \
+          (fastrand() & ((1 << S_DEPTH) - 1));                              \
+    }                                                                       \
+    memset(dst_argb_c + DOFF, 1, kStrideB * kHeight);                       \
+    memset(dst_argb_opt + DOFF, 101, kStrideB * kHeight);                   \
+    MaskCpuFlags(disable_cpu_flags_);                                       \
+    FMT_PLANAR##To##FMT_B(                                                  \
+        reinterpret_cast<uint16_t*>(src_y + SOFF), kWidth,                  \
+        reinterpret_cast<uint16_t*>(src_u + SOFF), kStrideUV,               \
+        reinterpret_cast<uint16_t*>(src_v + SOFF), kStrideUV,               \
+        dst_argb_c + DOFF, kStrideB, kWidth, NEG kHeight);                  \
+    MaskCpuFlags(benchmark_cpu_info_);                                      \
+    for (int i = 0; i < benchmark_iterations_; ++i) {                       \
+      FMT_PLANAR##To##FMT_B(                                                \
+          reinterpret_cast<uint16_t*>(src_y + SOFF), kWidth,                \
+          reinterpret_cast<uint16_t*>(src_u + SOFF), kStrideUV,             \
+          reinterpret_cast<uint16_t*>(src_v + SOFF), kStrideUV,             \
+          dst_argb_opt + DOFF, kStrideB, kWidth, NEG kHeight);              \
+    }                                                                       \
+    for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) {                    \
+      EXPECT_EQ(dst_argb_c[i + DOFF], dst_argb_opt[i + DOFF]);              \
+    }                                                                       \
+    free_aligned_buffer_page_end(src_y);                                    \
+    free_aligned_buffer_page_end(src_u);                                    \
+    free_aligned_buffer_page_end(src_v);                                    \
+    free_aligned_buffer_page_end(dst_argb_c);                               \
+    free_aligned_buffer_page_end(dst_argb_opt);                             \
  }

 #define TESTPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
-                        YALIGN)                                                \
+                        YALIGN, S_DEPTH)                                       \
  TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
-                   YALIGN, benchmark_width_ - 4, _Any, +, 0, 0)                \
+                   YALIGN, benchmark_width_ - 4, _Any, +, 0, 0, S_DEPTH)       \
  TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
-                   YALIGN, benchmark_width_, _Unaligned, +, 1, 1)              \
+                   YALIGN, benchmark_width_, _Unaligned, +, 1, 1, S_DEPTH)     \
  TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
-                   YALIGN, benchmark_width_, _Invert, -, 0, 0)                 \
+                   YALIGN, benchmark_width_, _Invert, -, 0, 0, S_DEPTH)        \
  TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
-                   YALIGN, benchmark_width_, _Opt, +, 0, 0)
+                   YALIGN, benchmark_width_, _Opt, +, 0, 0, S_DEPTH)

-TESTPLANAR16TOB(I010, 2, 2, ARGB, 4, 4, 1)
-TESTPLANAR16TOB(I010, 2, 2, ABGR, 4, 4, 1)
-TESTPLANAR16TOB(H010, 2, 2, ARGB, 4, 4, 1)
-TESTPLANAR16TOB(H010, 2, 2, ABGR, 4, 4, 1)
-TESTPLANAR16TOB(U010, 2, 2, ARGB, 4, 4, 1)
-TESTPLANAR16TOB(U010, 2, 2, ABGR, 4, 4, 1)
-TESTPLANAR16TOB(I210, 2, 1, ARGB, 4, 4, 1)
-TESTPLANAR16TOB(I210, 2, 1, ABGR, 4, 4, 1)
-TESTPLANAR16TOB(H210, 2, 1, ARGB, 4, 4, 1)
-TESTPLANAR16TOB(H210, 2, 1, ABGR, 4, 4, 1)
-TESTPLANAR16TOB(U210, 2, 1, ARGB, 4, 4, 1)
-TESTPLANAR16TOB(U210, 2, 1, ABGR, 4, 4, 1)
+#define I410ToARGB(a, b, c, d, e, f, g, h, i, j) \
+  I410ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j)
+#define I410ToABGR(a, b, c, d, e, f, g, h, i, j) \
+  I410ToABGRMatrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j)
+#define H410ToARGB(a, b, c, d, e, f, g, h, i, j) \
+  I410ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvH709Constants, i, j)
+#define H410ToABGR(a, b, c, d, e, f, g, h, i, j) \
+  I410ToABGRMatrix(a, b, c, d, e, f, g, h, &kYuvH709Constants, i, j)
+#define U410ToARGB(a, b, c, d, e, f, g, h, i, j) \
+  I410ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuv2020Constants, i, j)
+#define U410ToABGR(a, b, c, d, e, f, g, h, i, j) \
+  I410ToABGRMatrix(a, b, c, d, e, f, g, h, &kYuv2020Constants, i, j)
+#define I410ToAR30(a, b, c, d, e, f, g, h, i, j) \
+  I410ToAR30Matrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j)
+#define I410ToAB30(a, b, c, d, e, f, g, h, i, j) \
+  I410ToAB30Matrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j)
+#define H410ToAR30(a, b, c, d, e, f, g, h, i, j) \
+  I410ToAR30Matrix(a, b, c, d, e, f, g, h, &kYuvH709Constants, i, j)
+#define H410ToAB30(a, b, c, d, e, f, g, h, i, j) \
+  I410ToAB30Matrix(a, b, c, d, e, f, g, h, &kYuvH709Constants, i, j)
+#define U410ToAR30(a, b, c, d, e, f, g, h, i, j) \
+  I410ToAR30Matrix(a, b, c, d, e, f, g, h, &kYuv2020Constants, i, j)
+#define U410ToAB30(a, b, c, d, e, f, g, h, i, j) \
+  I410ToAB30Matrix(a, b, c, d, e, f, g, h, &kYuv2020Constants, i, j)
+
+TESTPLANAR16TOB(I010, 2, 2, ARGB, 4, 4, 1, 10)
+TESTPLANAR16TOB(I010, 2, 2, ABGR, 4, 4, 1, 10)
+TESTPLANAR16TOB(H010, 2, 2, ARGB, 4, 4, 1, 10)
+TESTPLANAR16TOB(H010, 2, 2, ABGR, 4, 4, 1, 10)
+TESTPLANAR16TOB(U010, 2, 2, ARGB, 4, 4, 1, 10)
+TESTPLANAR16TOB(U010, 2, 2, ABGR, 4, 4, 1, 10)
+TESTPLANAR16TOB(I210, 2, 1, ARGB, 4, 4, 1, 10)
+TESTPLANAR16TOB(I210, 2, 1, ABGR, 4, 4, 1, 10)
+TESTPLANAR16TOB(H210, 2, 1, ARGB, 4, 4, 1, 10)
+TESTPLANAR16TOB(H210, 2, 1, ABGR, 4, 4, 1, 10)
+TESTPLANAR16TOB(U210, 2, 1, ARGB, 4, 4, 1, 10)
+TESTPLANAR16TOB(U210, 2, 1, ABGR, 4, 4, 1, 10)
+TESTPLANAR16TOB(I410, 1, 1, ARGB, 4, 4, 1, 10)
+TESTPLANAR16TOB(I410, 1, 1, ABGR, 4, 4, 1, 10)
+TESTPLANAR16TOB(H410, 1, 1, ARGB, 4, 4, 1, 10)
+TESTPLANAR16TOB(H410, 1, 1, ABGR, 4, 4, 1, 10)
+TESTPLANAR16TOB(U410, 1, 1, ARGB, 4, 4, 1, 10)
+TESTPLANAR16TOB(U410, 1, 1, ABGR, 4, 4, 1, 10)
 #ifdef LITTLE_ENDIAN_ONLY_TEST
-TESTPLANAR16TOB(I010, 2, 2, AR30, 4, 4, 1)
-TESTPLANAR16TOB(I010, 2, 2, AB30, 4, 4, 1)
-TESTPLANAR16TOB(H010, 2, 2, AR30, 4, 4, 1)
-TESTPLANAR16TOB(H010, 2, 2, AB30, 4, 4, 1)
-TESTPLANAR16TOB(U010, 2, 2, AR30, 4, 4, 1)
-TESTPLANAR16TOB(U010, 2, 2, AB30, 4, 4, 1)
-TESTPLANAR16TOB(I210, 2, 1, AR30, 4, 4, 1)
-TESTPLANAR16TOB(I210, 2, 1, AB30, 4, 4, 1)
-TESTPLANAR16TOB(H210, 2, 1, AR30, 4, 4, 1)
-TESTPLANAR16TOB(H210, 2, 1, AB30, 4, 4, 1)
-TESTPLANAR16TOB(U210, 2, 1, AR30, 4, 4, 1)
-TESTPLANAR16TOB(U210, 2, 1, AB30, 4, 4, 1)
+TESTPLANAR16TOB(I010, 2, 2, AR30, 4, 4, 1, 10)
+TESTPLANAR16TOB(I010, 2, 2, AB30, 4, 4, 1, 10)
+TESTPLANAR16TOB(H010, 2, 2, AR30, 4, 4, 1, 10)
+TESTPLANAR16TOB(H010, 2, 2, AB30, 4, 4, 1, 10)
+TESTPLANAR16TOB(U010, 2, 2, AR30, 4, 4, 1, 10)
+TESTPLANAR16TOB(U010, 2, 2, AB30, 4, 4, 1, 10)
+TESTPLANAR16TOB(I210, 2, 1, AR30, 4, 4, 1, 10)
+TESTPLANAR16TOB(I210, 2, 1, AB30, 4, 4, 1, 10)
+TESTPLANAR16TOB(H210, 2, 1, AR30, 4, 4, 1, 10)
+TESTPLANAR16TOB(H210, 2, 1, AB30, 4, 4, 1, 10)
+TESTPLANAR16TOB(U210, 2, 1, AR30, 4, 4, 1, 10)
+TESTPLANAR16TOB(U210, 2, 1, AB30, 4, 4, 1, 10)
+TESTPLANAR16TOB(I410, 1, 1, AR30, 4, 4, 1, 10)
+TESTPLANAR16TOB(I410, 1, 1, AB30, 4, 4, 1, 10)
+TESTPLANAR16TOB(H410, 1, 1, AR30, 4, 4, 1, 10)
+TESTPLANAR16TOB(H410, 1, 1, AB30, 4, 4, 1, 10)
+TESTPLANAR16TOB(U410, 1, 1, AR30, 4, 4, 1, 10)
+TESTPLANAR16TOB(U410, 1, 1, AB30, 4, 4, 1, 10)
+#endif
+
+#define TESTQPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B,      \
+                          ALIGN, YALIGN, W1280, N, NEG, OFF, ATTEN, S_DEPTH)   \
+  TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) {                        \
+    const int kWidth = ((W1280) > 0) ? (W1280) : 1;                            \
+    const int kHeight = ALIGNINT(benchmark_height_, YALIGN);                   \
+    const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN);                      \
+    const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X);                        \
+    const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y);             \
+    const int kBpc = 2;                                                        \
+    align_buffer_page_end(src_y, kWidth* kHeight* kBpc + OFF);                 \
+    align_buffer_page_end(src_u, kSizeUV* kBpc + OFF);                         \
+    align_buffer_page_end(src_v, kSizeUV* kBpc + OFF);                         \
+    align_buffer_page_end(src_a, kWidth* kHeight* kBpc + OFF);                 \
+    align_buffer_page_end(dst_argb_c, kStrideB* kHeight + OFF);                \
+    align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + OFF);              \
+    for (int i = 0; i < kWidth * kHeight; ++i) {                               \
+      reinterpret_cast<uint16_t*>(src_y + OFF)[i] =                            \
+          (fastrand() & ((1 << S_DEPTH) - 1));                                 \
+      reinterpret_cast<uint16_t*>(src_a + OFF)[i] =                            \
+          (fastrand() & ((1 << S_DEPTH) - 1));                                 \
+    }                                                                          \
+    for (int i = 0; i < kSizeUV; ++i) {                                        \
+      reinterpret_cast<uint16_t*>(src_u + OFF)[i] =                            \
+          (fastrand() & ((1 << S_DEPTH) - 1));                                 \
+      reinterpret_cast<uint16_t*>(src_v + OFF)[i] =                            \
+          (fastrand() & ((1 << S_DEPTH) - 1));                                 \
+    }                                                                          \
+    memset(dst_argb_c + OFF, 1, kStrideB * kHeight);                           \
+    memset(dst_argb_opt + OFF, 101, kStrideB * kHeight);                       \
+    MaskCpuFlags(disable_cpu_flags_);                                          \
+    FMT_PLANAR##To##FMT_B(reinterpret_cast<uint16_t*>(src_y + OFF), kWidth,    \
+                          reinterpret_cast<uint16_t*>(src_u + OFF), kStrideUV, \
+                          reinterpret_cast<uint16_t*>(src_v + OFF), kStrideUV, \
+                          reinterpret_cast<uint16_t*>(src_a + OFF), kWidth,    \
+                          dst_argb_c + OFF, kStrideB, kWidth, NEG kHeight,     \
+                          ATTEN);                                              \
+    MaskCpuFlags(benchmark_cpu_info_);                                         \
+    for (int i = 0; i < benchmark_iterations_; ++i) {                          \
+      FMT_PLANAR##To##FMT_B(                                                   \
+          reinterpret_cast<uint16_t*>(src_y + OFF), kWidth,                    \
+          reinterpret_cast<uint16_t*>(src_u + OFF), kStrideUV,                 \
+          reinterpret_cast<uint16_t*>(src_v + OFF), kStrideUV,                 \
+          reinterpret_cast<uint16_t*>(src_a + OFF), kWidth,                    \
+          dst_argb_opt + OFF, kStrideB, kWidth, NEG kHeight, ATTEN);           \
+    }                                                                          \
+    for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) {                       \
+      EXPECT_EQ(dst_argb_c[i + OFF], dst_argb_opt[i + OFF]);                   \
+    }                                                                          \
+    free_aligned_buffer_page_end(src_y);                                       \
+    free_aligned_buffer_page_end(src_u);                                       \
+    free_aligned_buffer_page_end(src_v);                                       \
+    free_aligned_buffer_page_end(src_a);                                       \
+    free_aligned_buffer_page_end(dst_argb_c);                                  \
+    free_aligned_buffer_page_end(dst_argb_opt);                                \
+  }
+
+#define TESTQPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B,    \
+                         ALIGN, YALIGN, S_DEPTH)                            \
+  TESTQPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,  \
+                    YALIGN, benchmark_width_ - 4, _Any, +, 0, 0, S_DEPTH)   \
+  TESTQPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,  \
+                    YALIGN, benchmark_width_, _Unaligned, +, 1, 0, S_DEPTH) \
+  TESTQPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,  \
+                    YALIGN, benchmark_width_, _Invert, -, 0, 0, S_DEPTH)    \
+  TESTQPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,  \
+                    YALIGN, benchmark_width_, _Opt, +, 0, 0, S_DEPTH)       \
+  TESTQPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,  \
+                    YALIGN, benchmark_width_, _Premult, +, 0, 1, S_DEPTH)
+
+#define I010AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I010AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvI601Constants, k, \
+                        l, m)
+#define I010AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I010AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvI601Constants, k, \
+                        l, m)
+#define J010AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I010AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
+                        l, m)
+#define J010AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I010AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
+                        l, m)
+#define F010AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I010AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
+                        l, m)
+#define F010AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I010AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
+                        l, m)
+#define H010AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I010AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
+                        l, m)
+#define H010AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I010AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
+                        l, m)
+#define U010AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I010AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
+                        l, m)
+#define U010AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I010AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
+                        l, m)
+#define V010AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)                \
+  I010AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
+                        l, m)
+#define V010AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)                \
+  I010AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
+                        l, m)
+#define I210AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I210AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvI601Constants, k, \
+                        l, m)
+#define I210AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I210AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvI601Constants, k, \
+                        l, m)
+#define J210AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I210AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
+                        l, m)
+#define J210AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I210AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
+                        l, m)
+#define F210AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I210AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
+                        l, m)
+#define F210AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I210AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
+                        l, m)
+#define H210AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I210AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
+                        l, m)
+#define H210AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I210AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
+                        l, m)
+#define U210AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I210AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
+                        l, m)
+#define U210AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I210AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
+                        l, m)
+#define V210AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)                \
+  I210AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
+                        l, m)
+#define V210AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)                \
+  I210AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
+                        l, m)
+#define I410AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I410AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvI601Constants, k, \
+                        l, m)
+#define I410AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I410AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvI601Constants, k, \
+                        l, m)
+#define J410AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I410AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
+                        l, m)
+#define J410AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I410AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
+                        l, m)
+#define F410AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I410AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
+                        l, m)
+#define F410AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I410AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
+                        l, m)
+#define H410AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I410AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
+                        l, m)
+#define H410AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I410AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
+                        l, m)
+#define U410AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I410AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
+                        l, m)
+#define U410AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)               \
+  I410AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
+                        l, m)
+#define V410AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m)                \
+  I410AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
+                        l, m)
+#define V410AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m)                \
+  I410AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
+                        l, m)
+
+TESTQPLANAR16TOB(I010Alpha, 2, 2, ARGB, 4, 4, 1, 10)
+TESTQPLANAR16TOB(I010Alpha, 2, 2, ABGR, 4, 4, 1, 10)
+TESTQPLANAR16TOB(J010Alpha, 2, 2, ARGB, 4, 4, 1, 10)
+TESTQPLANAR16TOB(J010Alpha, 2, 2, ABGR, 4, 4, 1, 10)
+TESTQPLANAR16TOB(H010Alpha, 2, 2, ARGB, 4, 4, 1, 10)
+TESTQPLANAR16TOB(H010Alpha, 2, 2, ABGR, 4, 4, 1, 10)
+TESTQPLANAR16TOB(F010Alpha, 2, 2, ARGB, 4, 4, 1, 10)
+TESTQPLANAR16TOB(F010Alpha, 2, 2, ABGR, 4, 4, 1, 10)
+TESTQPLANAR16TOB(U010Alpha, 2, 2, ARGB, 4, 4, 1, 10)
+TESTQPLANAR16TOB(U010Alpha, 2, 2, ABGR, 4, 4, 1, 10)
+TESTQPLANAR16TOB(V010Alpha, 2, 2, ARGB, 4, 4, 1, 10)
+TESTQPLANAR16TOB(V010Alpha, 2, 2, ABGR, 4, 4, 1, 10)
+TESTQPLANAR16TOB(I210Alpha, 2, 1, ARGB, 4, 4, 1, 10)
+TESTQPLANAR16TOB(I210Alpha, 2, 1, ABGR, 4, 4, 1, 10)
+TESTQPLANAR16TOB(J210Alpha, 2, 1, ARGB, 4, 4, 1, 10)
+TESTQPLANAR16TOB(J210Alpha, 2, 1, ABGR, 4, 4, 1, 10)
+TESTQPLANAR16TOB(H210Alpha, 2, 1, ARGB, 4, 4, 1, 10)
+TESTQPLANAR16TOB(H210Alpha, 2, 1, ABGR, 4, 4, 1, 10)
+TESTQPLANAR16TOB(F210Alpha, 2, 1, ARGB, 4, 4, 1, 10)
+TESTQPLANAR16TOB(F210Alpha, 2, 1, ABGR, 4, 4, 1, 10)
+TESTQPLANAR16TOB(U210Alpha, 2, 1, ARGB, 4, 4, 1, 10)
+TESTQPLANAR16TOB(U210Alpha, 2, 1, ABGR, 4, 4, 1, 10)
+TESTQPLANAR16TOB(V210Alpha, 2, 1, ARGB, 4, 4, 1, 10)
+TESTQPLANAR16TOB(V210Alpha, 2, 1, ABGR, 4, 4, 1, 10)
+TESTQPLANAR16TOB(I410Alpha, 1, 1, ARGB, 4, 4, 1, 10)
+TESTQPLANAR16TOB(I410Alpha, 1, 1, ABGR, 4, 4, 1, 10)
+TESTQPLANAR16TOB(J410Alpha, 1, 1, ARGB, 4, 4, 1, 10)
+TESTQPLANAR16TOB(J410Alpha, 1, 1, ABGR, 4, 4, 1, 10)
+TESTQPLANAR16TOB(H410Alpha, 1, 1, ARGB, 4, 4, 1, 10)
+TESTQPLANAR16TOB(H410Alpha, 1, 1, ABGR, 4, 4, 1, 10)
+TESTQPLANAR16TOB(F410Alpha, 1, 1, ARGB, 4, 4, 1, 10)
+TESTQPLANAR16TOB(F410Alpha, 1, 1, ABGR, 4, 4, 1, 10)
+TESTQPLANAR16TOB(U410Alpha, 1, 1, ARGB, 4, 4, 1, 10)
+TESTQPLANAR16TOB(U410Alpha, 1, 1, ABGR, 4, 4, 1, 10)
+TESTQPLANAR16TOB(V410Alpha, 1, 1, ARGB, 4, 4, 1, 10)
+TESTQPLANAR16TOB(V410Alpha, 1, 1, ABGR, 4, 4, 1, 10)
+
+#define TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B,     \
+                           ALIGN, YALIGN, W1280, N, NEG, SOFF, DOFF, S_DEPTH)  \
+  TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) {                        \
+    const int kWidth = ((W1280) > 0) ? (W1280) : 1;                            \
+    const int kHeight = ALIGNINT(benchmark_height_, YALIGN);                   \
+    const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN);                      \
+    const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X) * 2;                    \
+    const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y) * 2;         \
+    const int kBpc = 2;                                                        \
+    align_buffer_page_end(src_y, kWidth* kHeight* kBpc + SOFF);                \
+    align_buffer_page_end(src_uv, kSizeUV* kBpc + SOFF);                       \
+    align_buffer_page_end(dst_argb_c, kStrideB* kHeight + DOFF);               \
+    align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + DOFF);             \
+    for (int i = 0; i < kWidth * kHeight; ++i) {                               \
+      reinterpret_cast<uint16_t*>(src_y + SOFF)[i] =                           \
+          (fastrand() & (((uint16_t)(-1)) << (16 - S_DEPTH)));                 \
+    }                                                                          \
+    for (int i = 0; i < kSizeUV; ++i) {                                        \
+      reinterpret_cast<uint16_t*>(src_uv + SOFF)[i] =                          \
+          (fastrand() & (((uint16_t)(-1)) << (16 - S_DEPTH)));                 \
+    }                                                                          \
+    memset(dst_argb_c + DOFF, 1, kStrideB * kHeight);                          \
+    memset(dst_argb_opt + DOFF, 101, kStrideB * kHeight);                      \
+    MaskCpuFlags(disable_cpu_flags_);                                          \
+    FMT_PLANAR##To##FMT_B(reinterpret_cast<uint16_t*>(src_y + SOFF), kWidth,   \
+                          reinterpret_cast<uint16_t*>(src_uv + SOFF),          \
+                          kStrideUV, dst_argb_c + DOFF, kStrideB, kWidth,      \
+                          NEG kHeight);                                        \
+    MaskCpuFlags(benchmark_cpu_info_);                                         \
+    for (int i = 0; i < benchmark_iterations_; ++i) {                          \
+      FMT_PLANAR##To##FMT_B(reinterpret_cast<uint16_t*>(src_y + SOFF), kWidth, \
+                            reinterpret_cast<uint16_t*>(src_uv + SOFF),        \
+                            kStrideUV, dst_argb_opt + DOFF, kStrideB, kWidth,  \
+                            NEG kHeight);                                      \
+    }                                                                          \
+    for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) {                       \
+      EXPECT_EQ(dst_argb_c[i + DOFF], dst_argb_opt[i + DOFF]);                 \
+    }                                                                          \
+    free_aligned_buffer_page_end(src_y);                                       \
+    free_aligned_buffer_page_end(src_uv);                                      \
+    free_aligned_buffer_page_end(dst_argb_c);                                  \
+    free_aligned_buffer_page_end(dst_argb_opt);                                \
+  }
+
+#define TESTBIPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B,    \
+                          ALIGN, YALIGN, S_DEPTH)                            \
+  TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,  \
+                     YALIGN, benchmark_width_ - 4, _Any, +, 0, 0, S_DEPTH)   \
+  TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,  \
+                     YALIGN, benchmark_width_, _Unaligned, +, 1, 1, S_DEPTH) \
+  TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,  \
+                     YALIGN, benchmark_width_, _Invert, -, 0, 0, S_DEPTH)    \
+  TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,  \
+                     YALIGN, benchmark_width_, _Opt, +, 0, 0, S_DEPTH)
+
+#define P010ToARGB(a, b, c, d, e, f, g, h) \
+  P010ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
+#define P210ToARGB(a, b, c, d, e, f, g, h) \
+  P210ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
+#define P010ToAR30(a, b, c, d, e, f, g, h) \
+  P010ToAR30Matrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
+#define P210ToAR30(a, b, c, d, e, f, g, h) \
+  P210ToAR30Matrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
+
+#define P012ToARGB(a, b, c, d, e, f, g, h) \
+  P012ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
+#define P212ToARGB(a, b, c, d, e, f, g, h) \
+  P212ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
+#define P012ToAR30(a, b, c, d, e, f, g, h) \
+  P012ToAR30Matrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
+#define P212ToAR30(a, b, c, d, e, f, g, h) \
+  P212ToAR30Matrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
+
+#define P016ToARGB(a, b, c, d, e, f, g, h) \
+  P016ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
+#define P216ToARGB(a, b, c, d, e, f, g, h) \
+  P216ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
+#define P016ToAR30(a, b, c, d, e, f, g, h) \
+  P016ToAR30Matrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
+#define P216ToAR30(a, b, c, d, e, f, g, h) \
+  P216ToAR30Matrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
+
+TESTBIPLANAR16TOB(P010, 2, 2, ARGB, 4, 4, 1, 10)
+TESTBIPLANAR16TOB(P210, 2, 1, ARGB, 4, 4, 1, 10)
+TESTBIPLANAR16TOB(P012, 2, 2, ARGB, 4, 4, 1, 12)
+TESTBIPLANAR16TOB(P212, 2, 1, ARGB, 4, 4, 1, 12)
+TESTBIPLANAR16TOB(P016, 2, 2, ARGB, 4, 4, 1, 16)
+TESTBIPLANAR16TOB(P216, 2, 1, ARGB, 4, 4, 1, 16)
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+TESTBIPLANAR16TOB(P010, 2, 2, AR30, 4, 4, 1, 10)
+TESTBIPLANAR16TOB(P210, 2, 1, AR30, 4, 4, 1, 10)
+TESTBIPLANAR16TOB(P012, 2, 2, AR30, 4, 4, 1, 12)
+TESTBIPLANAR16TOB(P212, 2, 1, AR30, 4, 4, 1, 12)
+TESTBIPLANAR16TOB(P016, 2, 2, AR30, 4, 4, 1, 16)
+TESTBIPLANAR16TOB(P216, 2, 1, AR30, 4, 4, 1, 16)
 #endif

 static int Clamp(int y) {