diff --git a/include/libyuv/row.h b/include/libyuv/row.h
index 66c4a649d..502184e38 100644
--- a/include/libyuv/row.h
+++ b/include/libyuv/row.h
@@ -108,8 +108,6 @@ extern "C" {
 #define HAS_I422TOARGB1555ROW_SSSE3
 #define HAS_I422TOARGB4444ROW_SSSE3
 #define HAS_I422TOARGBROW_SSSE3
-#define HAS_I422TOARGBMATRIXROW_SSSE3
-#define HAS_I422TOABGRMATRIXROW_SSSE3
 #define HAS_I422TOBGRAROW_SSSE3
 #define HAS_I422TORAWROW_SSSE3
 #define HAS_I422TORGB24ROW_SSSE3
@@ -151,8 +149,6 @@ extern "C" {
 #define HAS_YUY2TOYROW_SSE2
 #define HAS_I444TOARGBROW_SSSE3
 #define HAS_I444TOABGRROW_SSSE3
-#define HAS_I444TOARGBMATRIXROW_SSSE3
-#define HAS_I444TOABGRMATRIXROW_SSSE3
 
 // Effects:
 #define HAS_ARGBADDROW_SSE2
@@ -191,8 +187,6 @@ extern "C" {
     (!defined(__clang__) || defined(__SSSE3__))
 #define HAS_I422TOARGBROW_SSSE3
 #define HAS_I422TOABGRROW_SSSE3
-#define HAS_I422TOARGBMATRIXROW_SSSE3
-#define HAS_I422TOABGRMATRIXROW_SSSE3
 #endif
 
 // The following are available for AVX2 Visual C and clangcl 32 bit:
@@ -218,8 +212,6 @@ extern "C" {
 #define HAS_NV21TOARGBROW_AVX2
 #define HAS_NV21TORGB565ROW_AVX2
 #define HAS_RGB565TOARGBROW_AVX2
-#define HAS_I444TOARGBMATRIXROW_AVX2
-#define HAS_I444TOABGRMATRIXROW_AVX2
 #endif
 
 // The following are available on all x86 platforms, but
@@ -239,8 +231,6 @@ extern "C" {
 #define HAS_I400TOARGBROW_AVX2
 #define HAS_I422TOABGRROW_AVX2
 #define HAS_I422TOARGBROW_AVX2
-#define HAS_I422TOARGBMATRIXROW_AVX2
-#define HAS_I422TOABGRMATRIXROW_AVX2
 #define HAS_I422TOBGRAROW_AVX2
 #define HAS_I422TORAWROW_AVX2
 #define HAS_I422TORGB24ROW_AVX2
@@ -313,8 +303,6 @@ extern "C" {
 #define HAS_I422TOARGB4444ROW_NEON
 // TODO(fbarchard): Implement aarch64 neon version
 #ifndef __aarch64__
-#define HAS_I422TOARGBMATRIXROW_NEON
-#define HAS_I422TOABGRMATRIXROW_NEON
 #define HAS_J422TOARGBROW_NEON
 #define HAS_J422TOABGRROW_NEON
 #define HAS_H422TOARGBROW_NEON
@@ -444,8 +432,19 @@ typedef uint32 ulvec32[8];
 typedef uint8 ulvec8[32];
 #endif
 
+#if defined(__arm__) || defined(__aarch64__)
+
+// This struct is for Arm color conversion.
+struct YuvConstants {
+    uvec8 kUVToRB;
+    uvec8 kUVToG;
+    vec16 kUVBiasBGR;
+    vec32 kYToRgb;
+};
+
+#else
+
 // This struct is for Intel color conversion.
-// TODO(fnbarchard): Consider different struct for other platforms.
 struct YuvConstants {
   lvec8 kUVToB;
   lvec8 kUVToG;
@@ -464,21 +463,12 @@ struct YuvConstants {
 #define KUVBIASG 128
 #define KUVBIASR 160
 #define KYTORGB  192
-
-struct YuvConstantsNEON {
-    uvec8 kUVToRB;
-    uvec8 kUVToG;
-    vec16 kUVBiasBGR;
-    vec32 kYToRgb;
-};
+#endif
 
 extern struct YuvConstants kYuvConstants;
 extern struct YuvConstants kYvuConstants;
 extern struct YuvConstants kYuvJConstants;
 extern struct YuvConstants kYuvHConstants;
-extern struct YuvConstantsNEON kYuvConstantsNEON;
-extern struct YuvConstantsNEON kYuvJConstantsNEON;
-extern struct YuvConstantsNEON kYuvHConstantsNEON;
 
 #if defined(__APPLE__) || defined(__x86_64__) || defined(__llvm__)
 #define OMITFP
@@ -569,110 +559,97 @@ void I444ToARGBRow_NEON(const uint8* src_y,
                         const uint8* src_u,
                         const uint8* src_v,
                         uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
                         int width);
 void I422ToARGBRow_NEON(const uint8* src_y,
                         const uint8* src_u,
                         const uint8* src_v,
                         uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
+                        int width);
+void I422ToARGBRow_NEON(const uint8* src_y,
+                        const uint8* src_u,
+                        const uint8* src_v,
+                        uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
+                        int width);
+void I422ToABGRRow_NEON(const uint8* src_y,
+                        const uint8* src_u,
+                        const uint8* src_v,
+                        uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
                         int width);
-void I422ToARGBMatrixRow_NEON(const uint8* src_y,
-                              const uint8* src_u,
-                              const uint8* src_v,
-                              uint8* dst_argb,
-                              struct YuvConstantsNEON* YuvConstants,
-                              int width);
-void I422ToABGRMatrixRow_NEON(const uint8* src_y,
-                              const uint8* src_u,
-                              const uint8* src_v,
-                              uint8* dst_argb,
-                              struct YuvConstantsNEON* YuvConstants,
-                              int width);
 void I411ToARGBRow_NEON(const uint8* src_y,
                         const uint8* src_u,
                         const uint8* src_v,
                         uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
                         int width);
 void I422ToBGRARow_NEON(const uint8* src_y,
                         const uint8* src_u,
                         const uint8* src_v,
                         uint8* dst_bgra,
+                        struct YuvConstants* yuvconstants,
                         int width);
 void I422ToABGRRow_NEON(const uint8* src_y,
                         const uint8* src_u,
                         const uint8* src_v,
                         uint8* dst_abgr,
+                        struct YuvConstants* yuvconstants,
                         int width);
 void I422ToRGBARow_NEON(const uint8* src_y,
                         const uint8* src_u,
                         const uint8* src_v,
                         uint8* dst_rgba,
+                        struct YuvConstants* yuvconstants,
                         int width);
 void I422ToRGB24Row_NEON(const uint8* src_y,
                          const uint8* src_u,
                          const uint8* src_v,
                          uint8* dst_rgb24,
+                         struct YuvConstants* yuvconstants,
                          int width);
 void I422ToRAWRow_NEON(const uint8* src_y,
                        const uint8* src_u,
                        const uint8* src_v,
                        uint8* dst_raw,
+                       struct YuvConstants* yuvconstants,
                        int width);
 void I422ToRGB565Row_NEON(const uint8* src_y,
                           const uint8* src_u,
                           const uint8* src_v,
                           uint8* dst_rgb565,
+                          struct YuvConstants* yuvconstants,
                           int width);
 void I422ToARGB1555Row_NEON(const uint8* src_y,
                             const uint8* src_u,
                             const uint8* src_v,
                             uint8* dst_argb1555,
+                            struct YuvConstants* yuvconstants,
                             int width);
 void I422ToARGB4444Row_NEON(const uint8* src_y,
                             const uint8* src_u,
                             const uint8* src_v,
                             uint8* dst_argb4444,
+                            struct YuvConstants* yuvconstants,
                             int width);
-void J422ToARGBRow_NEON(const uint8* src_y,
-                        const uint8* src_u,
-                        const uint8* src_v,
-                        uint8* dst_argb,
-                        int width);
-void H422ToARGBRow_NEON(const uint8* src_y,
-                        const uint8* src_u,
-                        const uint8* src_v,
-                        uint8* dst_argb,
-                        int width);
-void J422ToABGRRow_NEON(const uint8* src_y,
-                        const uint8* src_u,
-                        const uint8* src_v,
-                        uint8* dst_abgr,
-                        int width);
-void H422ToABGRRow_NEON(const uint8* src_y,
-                        const uint8* src_u,
-                        const uint8* src_v,
-                        uint8* dst_abgr,
-                        int width);
 void NV12ToARGBRow_NEON(const uint8* src_y,
                         const uint8* src_uv,
                         uint8* dst_argb,
-                        int width);
-void NV21ToARGBRow_NEON(const uint8* src_y,
-                        const uint8* src_vu,
-                        uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
                         int width);
 void NV12ToRGB565Row_NEON(const uint8* src_y,
                           const uint8* src_uv,
                           uint8* dst_rgb565,
-                          int width);
-void NV21ToRGB565Row_NEON(const uint8* src_y,
-                          const uint8* src_vu,
-                          uint8* dst_rgb565,
+                          struct YuvConstants* yuvconstants,
                           int width);
 void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
                         uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
                         int width);
 void UYVYToARGBRow_NEON(const uint8* src_uyvy,
                         uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
                         int width);
 
 void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix);
@@ -1054,571 +1031,526 @@ void I444ToARGBRow_C(const uint8* src_y,
                      const uint8* src_u,
                      const uint8* src_v,
                      uint8* dst_argb,
+                     struct YuvConstants* yuvconstants,
                      int width);
 void I444ToABGRRow_C(const uint8* src_y,
                      const uint8* src_u,
                      const uint8* src_v,
                      uint8* dst_argb,
+                     struct YuvConstants* yuvconstants,
                      int width);
 void I422ToARGBRow_C(const uint8* src_y,
                      const uint8* src_u,
                      const uint8* src_v,
                      uint8* dst_argb,
+                     struct YuvConstants* yuvconstants,
+                     int width);
+void I422ToARGBRow_C(const uint8* src_y,
+                     const uint8* src_u,
+                     const uint8* src_v,
+                     uint8* dst_argb,
+                     struct YuvConstants* yuvconstants,
+                     int width);
+void I422ToABGRRow_C(const uint8* src_y,
+                     const uint8* src_u,
+                     const uint8* src_v,
+                     uint8* dst_argb,
+                     struct YuvConstants* yuvconstants,
                      int width);
-void I422ToARGBMatrixRow_C(const uint8* src_y,
-                           const uint8* src_u,
-                           const uint8* src_v,
-                           uint8* dst_argb,
-                           struct YuvConstants* YuvConstants,
-                           int width);
-void I422ToABGRMatrixRow_C(const uint8* src_y,
-                           const uint8* src_u,
-                           const uint8* src_v,
-                           uint8* dst_argb,
-                           struct YuvConstants* YuvConstants,
-                           int width);
 void I411ToARGBRow_C(const uint8* src_y,
                      const uint8* src_u,
                      const uint8* src_v,
                      uint8* dst_argb,
+                     struct YuvConstants* yuvconstants,
                      int width);
 void NV12ToARGBRow_C(const uint8* src_y,
                      const uint8* src_uv,
                      uint8* dst_argb,
+                     struct YuvConstants* yuvconstants,
                      int width);
 void NV21ToRGB565Row_C(const uint8* src_y,
                        const uint8* src_vu,
                        uint8* dst_argb,
+                       struct YuvConstants* yuvconstants,
                        int width);
 void NV12ToRGB565Row_C(const uint8* src_y,
                        const uint8* src_uv,
                        uint8* dst_argb,
+                       struct YuvConstants* yuvconstants,
                        int width);
-void NV21ToARGBRow_C(const uint8* src_y,
-                     const uint8* src_vu,
-                     uint8* dst_argb,
-                     int width);
 void YUY2ToARGBRow_C(const uint8* src_yuy2,
                      uint8* dst_argb,
+                     struct YuvConstants* yuvconstants,
                      int width);
 void UYVYToARGBRow_C(const uint8* src_uyvy,
                      uint8* dst_argb,
-                     int width);
-void J422ToARGBRow_C(const uint8* src_y,
-                     const uint8* src_u,
-                     const uint8* src_v,
-                     uint8* dst_argb,
-                     int width);
-void J422ToABGRRow_C(const uint8* src_y,
-                     const uint8* src_u,
-                     const uint8* src_v,
-                     uint8* dst_argb,
-                     int width);
-void H422ToARGBRow_C(const uint8* src_y,
-                     const uint8* src_u,
-                     const uint8* src_v,
-                     uint8* dst_argb,
-                     int width);
-void H422ToABGRRow_C(const uint8* src_y,
-                     const uint8* src_u,
-                     const uint8* src_v,
-                     uint8* dst_argb,
+                     struct YuvConstants* yuvconstants,
                      int width);
 void I422ToBGRARow_C(const uint8* src_y,
                      const uint8* src_u,
                      const uint8* src_v,
                      uint8* dst_bgra,
+                     struct YuvConstants* yuvconstants,
                      int width);
 void I422ToABGRRow_C(const uint8* src_y,
                      const uint8* src_u,
                      const uint8* src_v,
                      uint8* dst_abgr,
+                     struct YuvConstants* yuvconstants,
                      int width);
 void I422ToRGBARow_C(const uint8* src_y,
                      const uint8* src_u,
                      const uint8* src_v,
                      uint8* dst_rgba,
+                     struct YuvConstants* yuvconstants,
                      int width);
 void I422ToRGB24Row_C(const uint8* src_y,
                       const uint8* src_u,
                       const uint8* src_v,
                       uint8* dst_rgb24,
+                      struct YuvConstants* yuvconstants,
                       int width);
 void I422ToRAWRow_C(const uint8* src_y,
                     const uint8* src_u,
                     const uint8* src_v,
                     uint8* dst_raw,
+                    struct YuvConstants* yuvconstants,
                     int width);
 void I422ToARGB4444Row_C(const uint8* src_y,
                          const uint8* src_u,
                          const uint8* src_v,
                          uint8* dst_argb4444,
+                         struct YuvConstants* yuvconstants,
                          int width);
 void I422ToARGB1555Row_C(const uint8* src_y,
                          const uint8* src_u,
                          const uint8* src_v,
                          uint8* dst_argb4444,
+                         struct YuvConstants* yuvconstants,
                          int width);
 void I422ToRGB565Row_C(const uint8* src_y,
                        const uint8* src_u,
                        const uint8* src_v,
                        uint8* dst_rgb565,
+                       struct YuvConstants* yuvconstants,
                        int width);
 void I422ToARGBRow_AVX2(const uint8* src_y,
                         const uint8* src_u,
                         const uint8* src_v,
                         uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
                         int width);
-void I422ToARGBMatrixRow_AVX2(const uint8* src_y,
-                              const uint8* src_u,
-                              const uint8* src_v,
-                              uint8* dst_argb,
-                              struct YuvConstants* YuvConstants,
-                              int width);
-void I422ToABGRMatrixRow_AVX2(const uint8* src_y,
-                              const uint8* src_u,
-                              const uint8* src_v,
-                              uint8* dst_argb,
-                              struct YuvConstants* YuvConstants,
-                              int width);
-void I422ToBGRARow_AVX2(const uint8* src_y,
-                        const uint8* src_u,
-                        const uint8* src_v,
-                        uint8* dst_argb,
-                        int width);
-void I422ToRGBARow_AVX2(const uint8* src_y,
+void I422ToARGBRow_AVX2(const uint8* src_y,
                         const uint8* src_u,
                         const uint8* src_v,
                         uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
                         int width);
 void I422ToABGRRow_AVX2(const uint8* src_y,
                         const uint8* src_u,
                         const uint8* src_v,
                         uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
+                        int width);
+void I422ToBGRARow_AVX2(const uint8* src_y,
+                        const uint8* src_u,
+                        const uint8* src_v,
+                        uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
+                        int width);
+void I422ToRGBARow_AVX2(const uint8* src_y,
+                        const uint8* src_u,
+                        const uint8* src_v,
+                        uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
+                        int width);
+void I422ToABGRRow_AVX2(const uint8* src_y,
+                        const uint8* src_u,
+                        const uint8* src_v,
+                        uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
                         int width);
-void I444ToARGBMatrixRow_SSSE3(const uint8* src_y,
-                               const uint8* src_u,
-                               const uint8* src_v,
-                               uint8* dst_argb,
-                               struct YuvConstants* YuvConstants,
-                               int width);
-void I444ToARGBMatrixRow_AVX2(const uint8* src_y,
-                              const uint8* src_u,
-                              const uint8* src_v,
-                              uint8* dst_argb,
-                              struct YuvConstants* YuvConstants,
-                              int width);
 void I444ToARGBRow_SSSE3(const uint8* src_y,
                          const uint8* src_u,
                          const uint8* src_v,
                          uint8* dst_argb,
+                         struct YuvConstants* yuvconstants,
                          int width);
 void I444ToARGBRow_AVX2(const uint8* src_y,
                         const uint8* src_u,
                         const uint8* src_v,
                         uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
+                        int width);
+void I444ToARGBRow_SSSE3(const uint8* src_y,
+                         const uint8* src_u,
+                         const uint8* src_v,
+                         uint8* dst_argb,
+                         struct YuvConstants* yuvconstants,
+                         int width);
+void I444ToARGBRow_AVX2(const uint8* src_y,
+                        const uint8* src_u,
+                        const uint8* src_v,
+                        uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
                         int width);
-void I444ToABGRMatrixRow_SSSE3(const uint8* src_y,
-                               const uint8* src_u,
-                               const uint8* src_v,
-                               uint8* dst_abgr,
-                               struct YuvConstants* YuvConstants,
-                               int width);
-void I444ToABGRMatrixRow_AVX2(const uint8* src_y,
-                              const uint8* src_u,
-                              const uint8* src_v,
-                              uint8* dst_abgr,
-                              struct YuvConstants* YuvConstants,
-                              int width);
 void I444ToABGRRow_SSSE3(const uint8* src_y,
                          const uint8* src_u,
                          const uint8* src_v,
                          uint8* dst_abgr,
+                         struct YuvConstants* yuvconstants,
                          int width);
 void I444ToABGRRow_AVX2(const uint8* src_y,
                         const uint8* src_u,
                         const uint8* src_v,
                         uint8* dst_abgr,
+                        struct YuvConstants* yuvconstants,
+                        int width);
+void I444ToABGRRow_SSSE3(const uint8* src_y,
+                         const uint8* src_u,
+                         const uint8* src_v,
+                         uint8* dst_abgr,
+                         struct YuvConstants* yuvconstants,
+                         int width);
+void I444ToABGRRow_AVX2(const uint8* src_y,
+                        const uint8* src_u,
+                        const uint8* src_v,
+                        uint8* dst_abgr,
+                        struct YuvConstants* yuvconstants,
                         int width);
 void I422ToARGBRow_SSSE3(const uint8* src_y,
                          const uint8* src_u,
                          const uint8* src_v,
                          uint8* dst_argb,
+                         struct YuvConstants* yuvconstants,
+                         int width);
+void I422ToARGBRow_SSSE3(const uint8* src_y,
+                         const uint8* src_u,
+                         const uint8* src_v,
+                         uint8* dst_argb,
+                         struct YuvConstants* yuvconstants,
+                         int width);
+void I422ToABGRRow_SSSE3(const uint8* src_y,
+                         const uint8* src_u,
+                         const uint8* src_v,
+                         uint8* dst_argb,
+                         struct YuvConstants* yuvconstants,
                          int width);
-void I422ToARGBMatrixRow_SSSE3(const uint8* src_y,
-                               const uint8* src_u,
-                               const uint8* src_v,
-                               uint8* dst_argb,
-                               struct YuvConstants* YuvConstants,
-                               int width);
-void I422ToABGRMatrixRow_SSSE3(const uint8* src_y,
-                               const uint8* src_u,
-                               const uint8* src_v,
-                               uint8* dst_argb,
-                               struct YuvConstants* YuvConstants,
-                               int width);
 void I411ToARGBRow_SSSE3(const uint8* src_y,
                          const uint8* src_u,
                          const uint8* src_v,
                          uint8* dst_argb,
+                         struct YuvConstants* yuvconstants,
                          int width);
 void I411ToARGBRow_AVX2(const uint8* src_y,
                         const uint8* src_u,
                         const uint8* src_v,
                         uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
                         int width);
 void NV12ToARGBRow_SSSE3(const uint8* src_y,
                          const uint8* src_uv,
                          uint8* dst_argb,
-                         int width);
-void NV21ToARGBRow_SSSE3(const uint8* src_y,
-                         const uint8* src_vu,
-                         uint8* dst_argb,
+                         struct YuvConstants* yuvconstants,
                          int width);
 void NV12ToARGBRow_AVX2(const uint8* src_y,
                         const uint8* src_uv,
                         uint8* dst_argb,
-                        int width);
-void NV21ToARGBRow_AVX2(const uint8* src_y,
-                        const uint8* src_vu,
-                        uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
                         int width);
 void NV12ToRGB565Row_SSSE3(const uint8* src_y,
                            const uint8* src_uv,
                            uint8* dst_argb,
-                           int width);
-void NV21ToRGB565Row_SSSE3(const uint8* src_y,
-                           const uint8* src_vu,
-                           uint8* dst_argb,
+                           struct YuvConstants* yuvconstants,
                            int width);
 void NV12ToRGB565Row_AVX2(const uint8* src_y,
                           const uint8* src_uv,
                           uint8* dst_argb,
-                          int width);
-void NV21ToRGB565Row_AVX2(const uint8* src_y,
-                          const uint8* src_vu,
-                          uint8* dst_argb,
+                          struct YuvConstants* yuvconstants,
                           int width);
 void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2,
                          uint8* dst_argb,
+                         struct YuvConstants* yuvconstants,
                          int width);
 void UYVYToARGBRow_SSSE3(const uint8* src_uyvy,
                          uint8* dst_argb,
+                         struct YuvConstants* yuvconstants,
                          int width);
 void YUY2ToARGBRow_AVX2(const uint8* src_yuy2,
                         uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
                         int width);
 void UYVYToARGBRow_AVX2(const uint8* src_uyvy,
                         uint8* dst_argb,
-                        int width);
-void J422ToARGBRow_SSSE3(const uint8* src_y,
-                         const uint8* src_u,
-                         const uint8* src_v,
-                         uint8* dst_argb,
-                         int width);
-void J422ToABGRRow_SSSE3(const uint8* src_y,
-                         const uint8* src_u,
-                         const uint8* src_v,
-                         uint8* dst_argb,
-                         int width);
-void J422ToARGBRow_AVX2(const uint8* src_y,
-                        const uint8* src_u,
-                        const uint8* src_v,
-                        uint8* dst_argb,
-                        int width);
-void J422ToABGRRow_AVX2(const uint8* src_y,
-                        const uint8* src_u,
-                        const uint8* src_v,
-                        uint8* dst_argb,
-                        int width);
-void H422ToARGBRow_SSSE3(const uint8* src_y,
-                         const uint8* src_u,
-                         const uint8* src_v,
-                         uint8* dst_argb,
-                         int width);
-void H422ToABGRRow_SSSE3(const uint8* src_y,
-                         const uint8* src_u,
-                         const uint8* src_v,
-                         uint8* dst_argb,
-                         int width);
-void H422ToARGBRow_AVX2(const uint8* src_y,
-                        const uint8* src_u,
-                        const uint8* src_v,
-                        uint8* dst_argb,
-                        int width);
-void H422ToABGRRow_AVX2(const uint8* src_y,
-                        const uint8* src_u,
-                        const uint8* src_v,
-                        uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
                         int width);
 void I422ToBGRARow_SSSE3(const uint8* src_y,
                          const uint8* src_u,
                          const uint8* src_v,
                          uint8* dst_bgra,
+                         struct YuvConstants* yuvconstants,
                          int width);
 void I422ToABGRRow_SSSE3(const uint8* src_y,
                          const uint8* src_u,
                          const uint8* src_v,
                          uint8* dst_abgr,
+                         struct YuvConstants* yuvconstants,
                          int width);
 void I422ToRGBARow_SSSE3(const uint8* src_y,
                          const uint8* src_u,
                          const uint8* src_v,
                          uint8* dst_rgba,
+                         struct YuvConstants* yuvconstants,
                          int width);
 void I422ToARGB4444Row_SSSE3(const uint8* src_y,
                              const uint8* src_u,
                              const uint8* src_v,
                              uint8* dst_argb,
+                             struct YuvConstants* yuvconstants,
                              int width);
 void I422ToARGB4444Row_AVX2(const uint8* src_y,
                             const uint8* src_u,
                             const uint8* src_v,
                             uint8* dst_argb,
+                            struct YuvConstants* yuvconstants,
                             int width);
 void I422ToARGB1555Row_SSSE3(const uint8* src_y,
                              const uint8* src_u,
                              const uint8* src_v,
                              uint8* dst_argb,
+                             struct YuvConstants* yuvconstants,
                              int width);
 void I422ToARGB1555Row_AVX2(const uint8* src_y,
                             const uint8* src_u,
                             const uint8* src_v,
                             uint8* dst_argb,
+                            struct YuvConstants* yuvconstants,
                             int width);
 void I422ToRGB565Row_SSSE3(const uint8* src_y,
                            const uint8* src_u,
                            const uint8* src_v,
                            uint8* dst_argb,
+                           struct YuvConstants* yuvconstants,
                            int width);
 void I422ToRGB565Row_AVX2(const uint8* src_y,
                           const uint8* src_u,
                           const uint8* src_v,
                           uint8* dst_argb,
+                          struct YuvConstants* yuvconstants,
                           int width);
 void I422ToRGB24Row_SSSE3(const uint8* src_y,
                           const uint8* src_u,
                           const uint8* src_v,
                           uint8* dst_rgb24,
+                          struct YuvConstants* yuvconstants,
                           int width);
 void I422ToRGB24Row_AVX2(const uint8* src_y,
                          const uint8* src_u,
                          const uint8* src_v,
                          uint8* dst_rgb24,
+                         struct YuvConstants* yuvconstants,
                          int width);
 void I422ToRAWRow_SSSE3(const uint8* src_y,
                         const uint8* src_u,
                         const uint8* src_v,
                         uint8* dst_raw,
+                        struct YuvConstants* yuvconstants,
                         int width);
 void I422ToRAWRow_AVX2(const uint8* src_y,
                        const uint8* src_u,
                        const uint8* src_v,
                        uint8* dst_raw,
+                       struct YuvConstants* yuvconstants,
                        int width);
 void I422ToARGBRow_Any_AVX2(const uint8* src_y,
                             const uint8* src_u,
                             const uint8* src_v,
                             uint8* dst_argb,
+                            struct YuvConstants* yuvconstants,
                             int width);
 void I422ToBGRARow_Any_AVX2(const uint8* src_y,
                             const uint8* src_u,
                             const uint8* src_v,
                             uint8* dst_argb,
+                            struct YuvConstants* yuvconstants,
                             int width);
 void I422ToRGBARow_Any_AVX2(const uint8* src_y,
                             const uint8* src_u,
                             const uint8* src_v,
                             uint8* dst_argb,
+                            struct YuvConstants* yuvconstants,
                             int width);
 void I422ToABGRRow_Any_AVX2(const uint8* src_y,
                             const uint8* src_u,
                             const uint8* src_v,
                             uint8* dst_argb,
+                            struct YuvConstants* yuvconstants,
                             int width);
 void I444ToARGBRow_Any_SSSE3(const uint8* src_y,
                              const uint8* src_u,
                              const uint8* src_v,
                              uint8* dst_argb,
+                             struct YuvConstants* yuvconstants,
                              int width);
 void I444ToARGBRow_Any_AVX2(const uint8* src_y,
                             const uint8* src_u,
                             const uint8* src_v,
                             uint8* dst_argb,
+                            struct YuvConstants* yuvconstants,
                             int width);
 void I444ToABGRRow_Any_SSSE3(const uint8* src_y,
                              const uint8* src_u,
                              const uint8* src_v,
                              uint8* dst_abgr,
+                             struct YuvConstants* yuvconstants,
                              int width);
 void I444ToABGRRow_Any_AVX2(const uint8* src_y,
                             const uint8* src_u,
                             const uint8* src_v,
                             uint8* dst_abgr,
+                            struct YuvConstants* yuvconstants,
                             int width);
 void I422ToARGBRow_Any_SSSE3(const uint8* src_y,
                              const uint8* src_u,
                              const uint8* src_v,
                              uint8* dst_argb,
+                             struct YuvConstants* yuvconstants,
                              int width);
 void I411ToARGBRow_Any_SSSE3(const uint8* src_y,
                              const uint8* src_u,
                              const uint8* src_v,
                              uint8* dst_argb,
+                             struct YuvConstants* yuvconstants,
                              int width);
 void I411ToARGBRow_Any_AVX2(const uint8* src_y,
                             const uint8* src_u,
                             const uint8* src_v,
                             uint8* dst_argb,
+                            struct YuvConstants* yuvconstants,
                             int width);
 void NV12ToARGBRow_Any_SSSE3(const uint8* src_y,
                              const uint8* src_uv,
                              uint8* dst_argb,
+                             struct YuvConstants* yuvconstants,
                              int width);
 void NV21ToARGBRow_Any_SSSE3(const uint8* src_y,
                              const uint8* src_vu,
                              uint8* dst_argb,
+                             struct YuvConstants* yuvconstants,
                              int width);
 void NV12ToARGBRow_Any_AVX2(const uint8* src_y,
                             const uint8* src_uv,
                             uint8* dst_argb,
+                            struct YuvConstants* yuvconstants,
                             int width);
 void NV21ToARGBRow_Any_AVX2(const uint8* src_y,
                             const uint8* src_vu,
                             uint8* dst_argb,
+                            struct YuvConstants* yuvconstants,
                             int width);
 void NV12ToRGB565Row_Any_SSSE3(const uint8* src_y,
                                const uint8* src_uv,
                                uint8* dst_argb,
-                               int width);
-void NV21ToRGB565Row_Any_SSSE3(const uint8* src_y,
-                               const uint8* src_vu,
-                               uint8* dst_argb,
+                               struct YuvConstants* yuvconstants,
                                int width);
 void NV12ToRGB565Row_Any_AVX2(const uint8* src_y,
                               const uint8* src_uv,
                               uint8* dst_argb,
-                              int width);
-void NV21ToRGB565Row_Any_AVX2(const uint8* src_y,
-                              const uint8* src_vu,
-                              uint8* dst_argb,
+                              struct YuvConstants* yuvconstants,
                               int width);
 void YUY2ToARGBRow_Any_SSSE3(const uint8* src_yuy2,
                              uint8* dst_argb,
+                             struct YuvConstants* yuvconstants,
                              int width);
 void UYVYToARGBRow_Any_SSSE3(const uint8* src_uyvy,
                              uint8* dst_argb,
+                             struct YuvConstants* yuvconstants,
                              int width);
 void YUY2ToARGBRow_Any_AVX2(const uint8* src_yuy2,
                             uint8* dst_argb,
+                            struct YuvConstants* yuvconstants,
                             int width);
 void UYVYToARGBRow_Any_AVX2(const uint8* src_uyvy,
                             uint8* dst_argb,
-                            int width);
-void J422ToARGBRow_Any_SSSE3(const uint8* src_y,
-                             const uint8* src_u,
-                             const uint8* src_v,
-                             uint8* dst_argb,
-                             int width);
-void J422ToABGRRow_Any_SSSE3(const uint8* src_y,
-                             const uint8* src_u,
-                             const uint8* src_v,
-                             uint8* dst_argb,
-                             int width);
-void J422ToARGBRow_Any_AVX2(const uint8* src_y,
-                            const uint8* src_u,
-                            const uint8* src_v,
-                            uint8* dst_argb,
-                            int width);
-void J422ToABGRRow_Any_AVX2(const uint8* src_y,
-                            const uint8* src_u,
-                            const uint8* src_v,
-                            uint8* dst_argb,
-                            int width);
-void H422ToARGBRow_Any_SSSE3(const uint8* src_y,
-                             const uint8* src_u,
-                             const uint8* src_v,
-                             uint8* dst_argb,
-                             int width);
-void H422ToABGRRow_Any_SSSE3(const uint8* src_y,
-                             const uint8* src_u,
-                             const uint8* src_v,
-                             uint8* dst_argb,
-                             int width);
-void H422ToARGBRow_Any_AVX2(const uint8* src_y,
-                            const uint8* src_u,
-                            const uint8* src_v,
-                            uint8* dst_argb,
-                            int width);
-void H422ToABGRRow_Any_AVX2(const uint8* src_y,
-                            const uint8* src_u,
-                            const uint8* src_v,
-                            uint8* dst_argb,
+                            struct YuvConstants* yuvconstants,
                             int width);
 void I422ToBGRARow_Any_SSSE3(const uint8* src_y,
                              const uint8* src_u,
                              const uint8* src_v,
                              uint8* dst_bgra,
+                             struct YuvConstants* yuvconstants,
                              int width);
 void I422ToABGRRow_Any_SSSE3(const uint8* src_y,
                              const uint8* src_u,
                              const uint8* src_v,
                              uint8* dst_abgr,
+                             struct YuvConstants* yuvconstants,
                              int width);
 void I422ToRGBARow_Any_SSSE3(const uint8* src_y,
                              const uint8* src_u,
                              const uint8* src_v,
                              uint8* dst_rgba,
+                             struct YuvConstants* yuvconstants,
                              int width);
 void I422ToARGB4444Row_Any_SSSE3(const uint8* src_y,
                                  const uint8* src_u,
                                  const uint8* src_v,
                                  uint8* dst_rgba,
+                                 struct YuvConstants* yuvconstants,
                                  int width);
 void I422ToARGB4444Row_Any_AVX2(const uint8* src_y,
                                 const uint8* src_u,
                                 const uint8* src_v,
                                 uint8* dst_rgba,
+                                struct YuvConstants* yuvconstants,
                                 int width);
 void I422ToARGB1555Row_Any_SSSE3(const uint8* src_y,
                                  const uint8* src_u,
                                  const uint8* src_v,
                                  uint8* dst_rgba,
+                                 struct YuvConstants* yuvconstants,
                                  int width);
 void I422ToARGB1555Row_Any_AVX2(const uint8* src_y,
                                 const uint8* src_u,
                                 const uint8* src_v,
                                 uint8* dst_rgba,
+                                struct YuvConstants* yuvconstants,
                                 int width);
 void I422ToRGB565Row_Any_SSSE3(const uint8* src_y,
                                const uint8* src_u,
                                const uint8* src_v,
                                uint8* dst_rgba,
+                               struct YuvConstants* yuvconstants,
                                int width);
 void I422ToRGB565Row_Any_AVX2(const uint8* src_y,
                               const uint8* src_u,
                               const uint8* src_v,
                               uint8* dst_rgba,
+                              struct YuvConstants* yuvconstants,
                               int width);
 void I422ToRGB24Row_Any_SSSE3(const uint8* src_y,
                               const uint8* src_u,
                               const uint8* src_v,
                               uint8* dst_argb,
+                              struct YuvConstants* yuvconstants,
                               int width);
 void I422ToRGB24Row_Any_AVX2(const uint8* src_y,
                              const uint8* src_u,
                              const uint8* src_v,
                              uint8* dst_argb,
+                             struct YuvConstants* yuvconstants,
                              int width);
 void I422ToRAWRow_Any_SSSE3(const uint8* src_y,
                             const uint8* src_u,
                             const uint8* src_v,
                             uint8* dst_argb,
+                            struct YuvConstants* yuvconstants,
                             int width);
 void I422ToRAWRow_Any_AVX2(const uint8* src_y,
                            const uint8* src_u,
                            const uint8* src_v,
                            uint8* dst_argb,
+                           struct YuvConstants* yuvconstants,
                            int width);
 
 void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width);
@@ -1716,128 +1648,121 @@ void I444ToARGBRow_Any_NEON(const uint8* src_y,
                             const uint8* src_u,
                             const uint8* src_v,
                             uint8* dst_argb,
+                            struct YuvConstants* yuvconstants,
                             int width);
 void I422ToARGBRow_Any_NEON(const uint8* src_y,
                             const uint8* src_u,
                             const uint8* src_v,
                             uint8* dst_argb,
+                            struct YuvConstants* yuvconstants,
                             int width);
 void I411ToARGBRow_Any_NEON(const uint8* src_y,
                             const uint8* src_u,
                             const uint8* src_v,
                             uint8* dst_argb,
+                            struct YuvConstants* yuvconstants,
                             int width);
 void I422ToBGRARow_Any_NEON(const uint8* src_y,
                             const uint8* src_u,
                             const uint8* src_v,
                             uint8* dst_argb,
+                            struct YuvConstants* yuvconstants,
                             int width);
 void I422ToABGRRow_Any_NEON(const uint8* src_y,
                             const uint8* src_u,
                             const uint8* src_v,
                             uint8* dst_argb,
+                            struct YuvConstants* yuvconstants,
                             int width);
 void I422ToRGBARow_Any_NEON(const uint8* src_y,
                             const uint8* src_u,
                             const uint8* src_v,
                             uint8* dst_argb,
+                            struct YuvConstants* yuvconstants,
                             int width);
 void I422ToRGB24Row_Any_NEON(const uint8* src_y,
                              const uint8* src_u,
                              const uint8* src_v,
                              uint8* dst_argb,
+                             struct YuvConstants* yuvconstants,
                              int width);
 void I422ToRAWRow_Any_NEON(const uint8* src_y,
                            const uint8* src_u,
                            const uint8* src_v,
                            uint8* dst_argb,
+                           struct YuvConstants* yuvconstants,
                            int width);
 void I422ToARGB4444Row_Any_NEON(const uint8* src_y,
                                 const uint8* src_u,
                                 const uint8* src_v,
                                 uint8* dst_argb,
+                                struct YuvConstants* yuvconstants,
                                 int width);
 void I422ToARGB1555Row_Any_NEON(const uint8* src_y,
                                 const uint8* src_u,
                                 const uint8* src_v,
                                 uint8* dst_argb,
+                                struct YuvConstants* yuvconstants,
                                 int width);
 void I422ToRGB565Row_Any_NEON(const uint8* src_y,
                               const uint8* src_u,
                               const uint8* src_v,
                               uint8* dst_argb,
+                              struct YuvConstants* yuvconstants,
                               int width);
-void J422ToARGBRow_Any_NEON(const uint8* src_y,
-                            const uint8* src_u,
-                            const uint8* src_v,
-                            uint8* dst_argb,
-                            int width);
-void H422ToARGBRow_Any_NEON(const uint8* src_y,
-                            const uint8* src_u,
-                            const uint8* src_v,
-                            uint8* dst_argb,
-                            int width);
-void J422ToABGRRow_Any_NEON(const uint8* src_y,
-                            const uint8* src_u,
-                            const uint8* src_v,
-                            uint8* dst_abgr,
-                            int width);
-void H422ToABGRRow_Any_NEON(const uint8* src_y,
-                            const uint8* src_u,
-                            const uint8* src_v,
-                            uint8* dst_abgr,
-                            int width);
 void NV12ToARGBRow_Any_NEON(const uint8* src_y,
                             const uint8* src_uv,
                             uint8* dst_argb,
-                            int width);
-void NV21ToARGBRow_Any_NEON(const uint8* src_y,
-                            const uint8* src_uv,
-                            uint8* dst_argb,
+                            struct YuvConstants* yuvconstants,
                             int width);
 void NV12ToRGB565Row_Any_NEON(const uint8* src_y,
                               const uint8* src_uv,
                               uint8* dst_argb,
-                              int width);
-void NV21ToRGB565Row_Any_NEON(const uint8* src_y,
-                              const uint8* src_uv,
-                              uint8* dst_argb,
+                              struct YuvConstants* yuvconstants,
                               int width);
 void YUY2ToARGBRow_Any_NEON(const uint8* src_yuy2,
                             uint8* dst_argb,
+                            struct YuvConstants* yuvconstants,
                             int width);
 void UYVYToARGBRow_Any_NEON(const uint8* src_uyvy,
                             uint8* dst_argb,
+                            struct YuvConstants* yuvconstants,
                             int width);
 void I422ToARGBRow_MIPS_DSPR2(const uint8* src_y,
                               const uint8* src_u,
                               const uint8* src_v,
                               uint8* dst_argb,
+                              struct YuvConstants* yuvconstants,
                               int width);
 void I422ToBGRARow_MIPS_DSPR2(const uint8* src_y,
                               const uint8* src_u,
                               const uint8* src_v,
                               uint8* dst_argb,
+                              struct YuvConstants* yuvconstants,
                               int width);
 void I422ToABGRRow_MIPS_DSPR2(const uint8* src_y,
                               const uint8* src_u,
                               const uint8* src_v,
                               uint8* dst_argb,
+                              struct YuvConstants* yuvconstants,
                               int width);
 void I422ToARGBRow_MIPS_DSPR2(const uint8* src_y,
                               const uint8* src_u,
                               const uint8* src_v,
                               uint8* dst_argb,
+                              struct YuvConstants* yuvconstants,
                               int width);
 void I422ToBGRARow_MIPS_DSPR2(const uint8* src_y,
                               const uint8* src_u,
                               const uint8* src_v,
                               uint8* dst_argb,
+                              struct YuvConstants* yuvconstants,
                               int width);
 void I422ToABGRRow_MIPS_DSPR2(const uint8* src_y,
                               const uint8* src_u,
                               const uint8* src_v,
                               uint8* dst_argb,
+                              struct YuvConstants* yuvconstants,
                               int width);
 
 void YUY2ToYRow_AVX2(const uint8* src_yuy2, uint8* dst_y, int pix);
diff --git a/source/convert_argb.cc b/source/convert_argb.cc
index dec44dedd..dc3071926 100644
--- a/source/convert_argb.cc
+++ b/source/convert_argb.cc
@@ -56,6 +56,7 @@ int I444ToARGB(const uint8* src_y, int src_stride_y,
                         const uint8* u_buf,
                         const uint8* v_buf,
                         uint8* rgb_buf,
+                        struct YuvConstants* yuvconstants,
                         int width) = I444ToARGBRow_C;
   if (!src_y || !src_u || !src_v ||
       !dst_argb ||
@@ -103,7 +104,7 @@ int I444ToARGB(const uint8* src_y, int src_stride_y,
 #endif
 
   for (y = 0; y < height; ++y) {
-    I444ToARGBRow(src_y, src_u, src_v, dst_argb, width);
+    I444ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvConstants, width);
     dst_argb += dst_stride_argb;
     src_y += src_stride_y;
     src_u += src_stride_u;
@@ -124,6 +125,7 @@ int I444ToABGR(const uint8* src_y, int src_stride_y,
                         const uint8* u_buf,
                         const uint8* v_buf,
                         uint8* rgb_buf,
+                        struct YuvConstants* yuvconstants,
                         int width) = I444ToABGRRow_C;
   if (!src_y || !src_u || !src_v ||
       !dst_abgr ||
@@ -171,7 +173,7 @@ int I444ToABGR(const uint8* src_y, int src_stride_y,
 #endif
 
   for (y = 0; y < height; ++y) {
-    I444ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
+    I444ToABGRRow(src_y, src_u, src_v, dst_abgr, &kYuvConstants, width);
     dst_abgr += dst_stride_abgr;
     src_y += src_stride_y;
     src_u += src_stride_u;
@@ -192,6 +194,7 @@ int I422ToARGB(const uint8* src_y, int src_stride_y,
                         const uint8* u_buf,
                         const uint8* v_buf,
                         uint8* rgb_buf,
+                        struct YuvConstants* yuvconstants,
                         int width) = I422ToARGBRow_C;
   if (!src_y || !src_u || !src_v ||
       !dst_argb ||
@@ -248,7 +251,7 @@ int I422ToARGB(const uint8* src_y, int src_stride_y,
 #endif
 
   for (y = 0; y < height; ++y) {
-    I422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
+    I422ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvConstants, width);
     dst_argb += dst_stride_argb;
     src_y += src_stride_y;
     src_u += src_stride_u;
@@ -269,6 +272,7 @@ int I411ToARGB(const uint8* src_y, int src_stride_y,
                         const uint8* u_buf,
                         const uint8* v_buf,
                         uint8* rgb_buf,
+                        struct YuvConstants* yuvconstants,
                         int width) = I411ToARGBRow_C;
   if (!src_y || !src_u || !src_v ||
       !dst_argb ||
@@ -316,7 +320,7 @@ int I411ToARGB(const uint8* src_y, int src_stride_y,
 #endif
 
   for (y = 0; y < height; ++y) {
-    I411ToARGBRow(src_y, src_u, src_v, dst_argb, width);
+    I411ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvConstants, width);
     dst_argb += dst_stride_argb;
     src_y += src_stride_y;
     src_u += src_stride_u;
@@ -338,6 +342,7 @@ int I420AlphaToARGB(const uint8* src_y, int src_stride_y,
                         const uint8* u_buf,
                         const uint8* v_buf,
                         uint8* rgb_buf,
+                        struct YuvConstants* yuvconstants,
                         int width) = I422ToARGBRow_C;
   void (*ARGBCopyYToAlphaRow)(const uint8* src_y, uint8* dst_argb, int width) =
       ARGBCopyYToAlphaRow_C;
@@ -436,7 +441,7 @@ int I420AlphaToARGB(const uint8* src_y, int src_stride_y,
 #endif
 
   for (y = 0; y < height; ++y) {
-    I422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
+    I422ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvConstants, width);
     ARGBCopyYToAlphaRow(src_a, dst_argb, width);
     ARGBAttenuateRow(dst_argb, dst_argb, width);
     dst_argb += dst_stride_argb;
@@ -462,6 +467,7 @@ int I420AlphaToABGR(const uint8* src_y, int src_stride_y,
                         const uint8* u_buf,
                         const uint8* v_buf,
                         uint8* rgb_buf,
+                        struct YuvConstants* yuvconstants,
                         int width) = I422ToABGRRow_C;
   void (*ARGBCopyYToAlphaRow)(const uint8* src_y, uint8* dst_argb, int width) =
       ARGBCopyYToAlphaRow_C;
@@ -560,7 +566,7 @@ int I420AlphaToABGR(const uint8* src_y, int src_stride_y,
 #endif
 
   for (y = 0; y < height; ++y) {
-    I422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
+    I422ToABGRRow(src_y, src_u, src_v, dst_abgr, &kYuvConstants, width);
     ARGBCopyYToAlphaRow(src_a, dst_abgr, width);
     ARGBAttenuateRow(dst_abgr, dst_abgr, width);
     dst_abgr += dst_stride_abgr;
@@ -639,7 +645,7 @@ int J400ToARGB(const uint8* src_y, int src_stride_y,
                uint8* dst_argb, int dst_stride_argb,
                int width, int height) {
   int y;
-  void (*J400ToARGBRow)(const uint8* src_y, uint8* dst_argb, int pix) =
+  void (*J400ToARGBRow)(const uint8* src_y, uint8* dst_argb, int width) =
       J400ToARGBRow_C;
   if (!src_y || !dst_argb ||
       width <= 0 || height == 0) {
@@ -766,7 +772,7 @@ int RGB24ToARGB(const uint8* src_rgb24, int src_stride_rgb24,
                 uint8* dst_argb, int dst_stride_argb,
                 int width, int height) {
   int y;
-  void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
+  void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) =
       RGB24ToARGBRow_C;
   if (!src_rgb24 || !dst_argb ||
       width <= 0 || height == 0) {
@@ -816,7 +822,7 @@ int RAWToARGB(const uint8* src_raw, int src_stride_raw,
               uint8* dst_argb, int dst_stride_argb,
               int width, int height) {
   int y;
-  void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
+  void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) =
       RAWToARGBRow_C;
   if (!src_raw || !dst_argb ||
       width <= 0 || height == 0) {
@@ -866,7 +872,7 @@ int RGB565ToARGB(const uint8* src_rgb565, int src_stride_rgb565,
                  uint8* dst_argb, int dst_stride_argb,
                  int width, int height) {
   int y;
-  void (*RGB565ToARGBRow)(const uint8* src_rgb565, uint8* dst_argb, int pix) =
+  void (*RGB565ToARGBRow)(const uint8* src_rgb565, uint8* dst_argb, int width) =
       RGB565ToARGBRow_C;
   if (!src_rgb565 || !dst_argb ||
       width <= 0 || height == 0) {
@@ -925,7 +931,7 @@ int ARGB1555ToARGB(const uint8* src_argb1555, int src_stride_argb1555,
                    int width, int height) {
   int y;
   void (*ARGB1555ToARGBRow)(const uint8* src_argb1555, uint8* dst_argb,
-      int pix) = ARGB1555ToARGBRow_C;
+      int width) = ARGB1555ToARGBRow_C;
   if (!src_argb1555 || !dst_argb ||
       width <= 0 || height == 0) {
     return -1;
@@ -983,7 +989,7 @@ int ARGB4444ToARGB(const uint8* src_argb4444, int src_stride_argb4444,
                    int width, int height) {
   int y;
   void (*ARGB4444ToARGBRow)(const uint8* src_argb4444, uint8* dst_argb,
-      int pix) = ARGB4444ToARGBRow_C;
+      int width) = ARGB4444ToARGBRow_C;
   if (!src_argb4444 || !dst_argb ||
       width <= 0 || height == 0) {
     return -1;
@@ -1044,6 +1050,7 @@ int NV12ToARGB(const uint8* src_y, int src_stride_y,
   void (*NV12ToARGBRow)(const uint8* y_buf,
                         const uint8* uv_buf,
                         uint8* rgb_buf,
+                        struct YuvConstants* yuvconstants,
                         int width) = NV12ToARGBRow_C;
   if (!src_y || !src_uv || !dst_argb ||
       width <= 0 || height == 0) {
@@ -1081,7 +1088,7 @@ int NV12ToARGB(const uint8* src_y, int src_stride_y,
 #endif
 
   for (y = 0; y < height; ++y) {
-    NV12ToARGBRow(src_y, src_uv, dst_argb, width);
+    NV12ToARGBRow(src_y, src_uv, dst_argb, &kYuvConstants, width);
     dst_argb += dst_stride_argb;
     src_y += src_stride_y;
     if (y & 1) {
@@ -1098,10 +1105,11 @@ int NV21ToARGB(const uint8* src_y, int src_stride_y,
                uint8* dst_argb, int dst_stride_argb,
                int width, int height) {
   int y;
-  void (*NV21ToARGBRow)(const uint8* y_buf,
+  void (*NV12ToARGBRow)(const uint8* y_buf,
                         const uint8* uv_buf,
                         uint8* rgb_buf,
-                        int width) = NV21ToARGBRow_C;
+                        struct YuvConstants* yuvconstants,
+                        int width) = NV12ToARGBRow_C;
   if (!src_y || !src_uv || !dst_argb ||
       width <= 0 || height == 0) {
     return -1;
@@ -1112,33 +1120,33 @@ int NV21ToARGB(const uint8* src_y, int src_stride_y,
     dst_argb = dst_argb + (height - 1) * dst_stride_argb;
     dst_stride_argb = -dst_stride_argb;
   }
-#if defined(HAS_NV21TOARGBROW_SSSE3)
+#if defined(HAS_NV12TOARGBROW_SSSE3)
   if (TestCpuFlag(kCpuHasSSSE3)) {
-    NV21ToARGBRow = NV21ToARGBRow_Any_SSSE3;
+    NV12ToARGBRow = NV12ToARGBRow_Any_SSSE3;
     if (IS_ALIGNED(width, 8)) {
-      NV21ToARGBRow = NV21ToARGBRow_SSSE3;
+      NV12ToARGBRow = NV12ToARGBRow_SSSE3;
     }
   }
 #endif
-#if defined(HAS_NV21TOARGBROW_AVX2)
+#if defined(HAS_NV12TOARGBROW_AVX2)
   if (TestCpuFlag(kCpuHasAVX2)) {
-    NV21ToARGBRow = NV21ToARGBRow_Any_AVX2;
+    NV12ToARGBRow = NV12ToARGBRow_Any_AVX2;
     if (IS_ALIGNED(width, 16)) {
-      NV21ToARGBRow = NV21ToARGBRow_AVX2;
+      NV12ToARGBRow = NV12ToARGBRow_AVX2;
     }
   }
 #endif
-#if defined(HAS_NV21TOARGBROW_NEON)
+#if defined(HAS_NV12TOARGBROW_NEON)
   if (TestCpuFlag(kCpuHasNEON)) {
-    NV21ToARGBRow = NV21ToARGBRow_Any_NEON;
+    NV12ToARGBRow = NV12ToARGBRow_Any_NEON;
     if (IS_ALIGNED(width, 8)) {
-      NV21ToARGBRow = NV21ToARGBRow_NEON;
+      NV12ToARGBRow = NV12ToARGBRow_NEON;
     }
   }
 #endif
 
   for (y = 0; y < height; ++y) {
-    NV21ToARGBRow(src_y, src_uv, dst_argb, width);
+    NV12ToARGBRow(src_y, src_uv, dst_argb, &kYvuConstants, width);
     dst_argb += dst_stride_argb;
     src_y += src_stride_y;
     if (y & 1) {
@@ -1157,6 +1165,7 @@ int M420ToARGB(const uint8* src_m420, int src_stride_m420,
   void (*NV12ToARGBRow)(const uint8* y_buf,
                         const uint8* uv_buf,
                         uint8* rgb_buf,
+                        struct YuvConstants* yuvconstants,
                         int width) = NV12ToARGBRow_C;
   if (!src_m420 || !dst_argb ||
       width <= 0 || height == 0) {
@@ -1194,14 +1203,16 @@ int M420ToARGB(const uint8* src_m420, int src_stride_m420,
 #endif
 
   for (y = 0; y < height - 1; y += 2) {
-    NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb, width);
+    NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb,
+                  &kYuvConstants, width);
     NV12ToARGBRow(src_m420 + src_stride_m420, src_m420 + src_stride_m420 * 2,
-                  dst_argb + dst_stride_argb, width);
+                  dst_argb + dst_stride_argb, &kYuvConstants, width);
     dst_argb += dst_stride_argb * 2;
     src_m420 += src_stride_m420 * 3;
   }
   if (height & 1) {
-    NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb, width);
+    NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb,
+                  &kYuvConstants, width);
   }
   return 0;
 }
@@ -1212,7 +1223,10 @@ int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2,
                uint8* dst_argb, int dst_stride_argb,
                int width, int height) {
   int y;
-  void (*YUY2ToARGBRow)(const uint8* src_yuy2, uint8* dst_argb, int pix) =
+  void (*YUY2ToARGBRow)(const uint8* src_yuy2,
+                        uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
+                        int width) =
       YUY2ToARGBRow_C;
   if (!src_yuy2 || !dst_argb ||
       width <= 0 || height == 0) {
@@ -1256,7 +1270,7 @@ int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2,
   }
 #endif
   for (y = 0; y < height; ++y) {
-    YUY2ToARGBRow(src_yuy2, dst_argb, width);
+    YUY2ToARGBRow(src_yuy2, dst_argb, &kYuvConstants, width);
     src_yuy2 += src_stride_yuy2;
     dst_argb += dst_stride_argb;
   }
@@ -1269,7 +1283,10 @@ int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy,
                uint8* dst_argb, int dst_stride_argb,
                int width, int height) {
   int y;
-  void (*UYVYToARGBRow)(const uint8* src_uyvy, uint8* dst_argb, int pix) =
+  void (*UYVYToARGBRow)(const uint8* src_uyvy,
+                        uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
+                        int width) =
       UYVYToARGBRow_C;
   if (!src_uyvy || !dst_argb ||
       width <= 0 || height == 0) {
@@ -1313,7 +1330,7 @@ int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy,
   }
 #endif
   for (y = 0; y < height; ++y) {
-    UYVYToARGBRow(src_uyvy, dst_argb, width);
+    UYVYToARGBRow(src_uyvy, dst_argb, &kYuvConstants, width);
     src_uyvy += src_stride_uyvy;
     dst_argb += dst_stride_argb;
   }
@@ -1328,11 +1345,12 @@ int J420ToARGB(const uint8* src_y, int src_stride_y,
                uint8* dst_argb, int dst_stride_argb,
                int width, int height) {
   int y;
-  void (*J422ToARGBRow)(const uint8* y_buf,
+  void (*I422ToARGBRow)(const uint8* y_buf,
                         const uint8* u_buf,
                         const uint8* v_buf,
                         uint8* rgb_buf,
-                        int width) = J422ToARGBRow_C;
+                        struct YuvConstants* yuvconstants,
+                        int width) = I422ToARGBRow_C;
   if (!src_y || !src_u || !src_v || !dst_argb ||
       width <= 0 || height == 0) {
     return -1;
@@ -1343,42 +1361,42 @@ int J420ToARGB(const uint8* src_y, int src_stride_y,
     dst_argb = dst_argb + (height - 1) * dst_stride_argb;
     dst_stride_argb = -dst_stride_argb;
   }
-#if defined(HAS_J422TOARGBROW_SSSE3)
+#if defined(HAS_I422TOARGBROW_SSSE3)
   if (TestCpuFlag(kCpuHasSSSE3)) {
-    J422ToARGBRow = J422ToARGBRow_Any_SSSE3;
+    I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
     if (IS_ALIGNED(width, 8)) {
-      J422ToARGBRow = J422ToARGBRow_SSSE3;
+      I422ToARGBRow = I422ToARGBRow_SSSE3;
     }
   }
 #endif
-#if defined(HAS_J422TOARGBROW_AVX2)
+#if defined(HAS_I422TOARGBROW_AVX2)
   if (TestCpuFlag(kCpuHasAVX2)) {
-    J422ToARGBRow = J422ToARGBRow_Any_AVX2;
+    I422ToARGBRow = I422ToARGBRow_Any_AVX2;
     if (IS_ALIGNED(width, 16)) {
-      J422ToARGBRow = J422ToARGBRow_AVX2;
+      I422ToARGBRow = I422ToARGBRow_AVX2;
     }
   }
 #endif
-#if defined(HAS_J422TOARGBROW_NEON)
+#if defined(HAS_I422TOARGBROW_NEON)
   if (TestCpuFlag(kCpuHasNEON)) {
-    J422ToARGBRow = J422ToARGBRow_Any_NEON;
+    I422ToARGBRow = I422ToARGBRow_Any_NEON;
     if (IS_ALIGNED(width, 8)) {
-      J422ToARGBRow = J422ToARGBRow_NEON;
+      I422ToARGBRow = I422ToARGBRow_NEON;
     }
   }
 #endif
-#if defined(HAS_J422TOARGBROW_MIPS_DSPR2)
+#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
       IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
       IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
       IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
       IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
-    J422ToARGBRow = J422ToARGBRow_MIPS_DSPR2;
+    I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
   }
 #endif
 
   for (y = 0; y < height; ++y) {
-    J422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
+    I422ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvJConstants, width);
     dst_argb += dst_stride_argb;
     src_y += src_stride_y;
     if (y & 1) {
@@ -1397,11 +1415,12 @@ int J422ToARGB(const uint8* src_y, int src_stride_y,
                uint8* dst_argb, int dst_stride_argb,
                int width, int height) {
   int y;
-  void (*J422ToARGBRow)(const uint8* y_buf,
+  void (*I422ToARGBRow)(const uint8* y_buf,
                         const uint8* u_buf,
                         const uint8* v_buf,
                         uint8* rgb_buf,
-                        int width) = J422ToARGBRow_C;
+                        struct YuvConstants* yuvconstants,
+                        int width) = I422ToARGBRow_C;
   if (!src_y || !src_u || !src_v ||
       !dst_argb ||
       width <= 0 || height == 0) {
@@ -1422,42 +1441,42 @@ int J422ToARGB(const uint8* src_y, int src_stride_y,
     height = 1;
     src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0;
   }
-#if defined(HAS_J422TOARGBROW_SSSE3)
+#if defined(HAS_I422TOARGBROW_SSSE3)
   if (TestCpuFlag(kCpuHasSSSE3)) {
-    J422ToARGBRow = J422ToARGBRow_Any_SSSE3;
+    I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
     if (IS_ALIGNED(width, 8)) {
-      J422ToARGBRow = J422ToARGBRow_SSSE3;
+      I422ToARGBRow = I422ToARGBRow_SSSE3;
     }
   }
 #endif
-#if defined(HAS_J422TOARGBROW_AVX2)
+#if defined(HAS_I422TOARGBROW_AVX2)
   if (TestCpuFlag(kCpuHasAVX2)) {
-    J422ToARGBRow = J422ToARGBRow_Any_AVX2;
+    I422ToARGBRow = I422ToARGBRow_Any_AVX2;
     if (IS_ALIGNED(width, 16)) {
-      J422ToARGBRow = J422ToARGBRow_AVX2;
+      I422ToARGBRow = I422ToARGBRow_AVX2;
     }
   }
 #endif
-#if defined(HAS_J422TOARGBROW_NEON)
+#if defined(HAS_I422TOARGBROW_NEON)
   if (TestCpuFlag(kCpuHasNEON)) {
-    J422ToARGBRow = J422ToARGBRow_Any_NEON;
+    I422ToARGBRow = I422ToARGBRow_Any_NEON;
     if (IS_ALIGNED(width, 8)) {
-      J422ToARGBRow = J422ToARGBRow_NEON;
+      I422ToARGBRow = I422ToARGBRow_NEON;
     }
   }
 #endif
-#if defined(HAS_J422TOARGBROW_MIPS_DSPR2)
+#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
       IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
       IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
       IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
       IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
-    J422ToARGBRow = J422ToARGBRow_MIPS_DSPR2;
+    I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
   }
 #endif
 
   for (y = 0; y < height; ++y) {
-    J422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
+    I422ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvJConstants, width);
     dst_argb += dst_stride_argb;
     src_y += src_stride_y;
     src_u += src_stride_u;
@@ -1474,11 +1493,12 @@ int J420ToABGR(const uint8* src_y, int src_stride_y,
                uint8* dst_abgr, int dst_stride_abgr,
                int width, int height) {
   int y;
-  void (*J422ToABGRRow)(const uint8* y_buf,
+  void (*I422ToABGRRow)(const uint8* y_buf,
                         const uint8* u_buf,
                         const uint8* v_buf,
                         uint8* rgb_buf,
-                        int width) = J422ToABGRRow_C;
+                        struct YuvConstants* yuvconstants,
+                        int width) = I422ToABGRRow_C;
   if (!src_y || !src_u || !src_v || !dst_abgr ||
       width <= 0 || height == 0) {
     return -1;
@@ -1489,42 +1509,42 @@ int J420ToABGR(const uint8* src_y, int src_stride_y,
     dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr;
     dst_stride_abgr = -dst_stride_abgr;
   }
-#if defined(HAS_J422TOABGRROW_SSSE3)
+#if defined(HAS_I422TOABGRROW_SSSE3)
   if (TestCpuFlag(kCpuHasSSSE3)) {
-    J422ToABGRRow = J422ToABGRRow_Any_SSSE3;
+    I422ToABGRRow = I422ToABGRRow_Any_SSSE3;
     if (IS_ALIGNED(width, 8)) {
-      J422ToABGRRow = J422ToABGRRow_SSSE3;
+      I422ToABGRRow = I422ToABGRRow_SSSE3;
     }
   }
 #endif
-#if defined(HAS_J422TOABGRROW_AVX2)
+#if defined(HAS_I422TOABGRROW_AVX2)
   if (TestCpuFlag(kCpuHasAVX2)) {
-    J422ToABGRRow = J422ToABGRRow_Any_AVX2;
+    I422ToABGRRow = I422ToABGRRow_Any_AVX2;
     if (IS_ALIGNED(width, 16)) {
-      J422ToABGRRow = J422ToABGRRow_AVX2;
+      I422ToABGRRow = I422ToABGRRow_AVX2;
     }
   }
 #endif
-#if defined(HAS_J422TOABGRROW_NEON)
+#if defined(HAS_I422TOABGRROW_NEON)
   if (TestCpuFlag(kCpuHasNEON)) {
-    J422ToABGRRow = J422ToABGRRow_Any_NEON;
+    I422ToABGRRow = I422ToABGRRow_Any_NEON;
     if (IS_ALIGNED(width, 8)) {
-      J422ToABGRRow = J422ToABGRRow_NEON;
+      I422ToABGRRow = I422ToABGRRow_NEON;
     }
   }
 #endif
-#if defined(HAS_J422TOABGRROW_MIPS_DSPR2)
+#if defined(HAS_I422TOABGRROW_MIPS_DSPR2)
   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
       IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
       IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
       IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
       IS_ALIGNED(dst_abgr, 4) && IS_ALIGNED(dst_stride_abgr, 4)) {
-    J422ToABGRRow = J422ToABGRRow_MIPS_DSPR2;
+    I422ToABGRRow = I422ToABGRRow_MIPS_DSPR2;
   }
 #endif
 
   for (y = 0; y < height; ++y) {
-    J422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
+    I422ToABGRRow(src_y, src_u, src_v, dst_abgr, &kYuvJConstants, width);
     dst_abgr += dst_stride_abgr;
     src_y += src_stride_y;
     if (y & 1) {
@@ -1543,11 +1563,12 @@ int J422ToABGR(const uint8* src_y, int src_stride_y,
                uint8* dst_abgr, int dst_stride_abgr,
                int width, int height) {
   int y;
-  void (*J422ToABGRRow)(const uint8* y_buf,
+  void (*I422ToABGRRow)(const uint8* y_buf,
                         const uint8* u_buf,
                         const uint8* v_buf,
                         uint8* rgb_buf,
-                        int width) = J422ToABGRRow_C;
+                        struct YuvConstants* yuvconstants,
+                        int width) = I422ToABGRRow_C;
   if (!src_y || !src_u || !src_v ||
       !dst_abgr ||
       width <= 0 || height == 0) {
@@ -1568,42 +1589,42 @@ int J422ToABGR(const uint8* src_y, int src_stride_y,
     height = 1;
     src_stride_y = src_stride_u = src_stride_v = dst_stride_abgr = 0;
   }
-#if defined(HAS_J422TOABGRROW_SSSE3)
+#if defined(HAS_I422TOABGRROW_SSSE3)
   if (TestCpuFlag(kCpuHasSSSE3)) {
-    J422ToABGRRow = J422ToABGRRow_Any_SSSE3;
+    I422ToABGRRow = I422ToABGRRow_Any_SSSE3;
     if (IS_ALIGNED(width, 8)) {
-      J422ToABGRRow = J422ToABGRRow_SSSE3;
+      I422ToABGRRow = I422ToABGRRow_SSSE3;
     }
   }
 #endif
-#if defined(HAS_J422TOABGRROW_AVX2)
+#if defined(HAS_I422TOABGRROW_AVX2)
   if (TestCpuFlag(kCpuHasAVX2)) {
-    J422ToABGRRow = J422ToABGRRow_Any_AVX2;
+    I422ToABGRRow = I422ToABGRRow_Any_AVX2;
     if (IS_ALIGNED(width, 16)) {
-      J422ToABGRRow = J422ToABGRRow_AVX2;
+      I422ToABGRRow = I422ToABGRRow_AVX2;
     }
   }
 #endif
-#if defined(HAS_J422TOABGRROW_NEON)
+#if defined(HAS_I422TOABGRROW_NEON)
   if (TestCpuFlag(kCpuHasNEON)) {
-    J422ToABGRRow = J422ToABGRRow_Any_NEON;
+    I422ToABGRRow = I422ToABGRRow_Any_NEON;
     if (IS_ALIGNED(width, 8)) {
-      J422ToABGRRow = J422ToABGRRow_NEON;
+      I422ToABGRRow = I422ToABGRRow_NEON;
     }
   }
 #endif
-#if defined(HAS_J422TOABGRROW_MIPS_DSPR2)
+#if defined(HAS_I422TOABGRROW_MIPS_DSPR2)
   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
       IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
       IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
       IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
       IS_ALIGNED(dst_abgr, 4) && IS_ALIGNED(dst_stride_abgr, 4)) {
-    J422ToABGRRow = J422ToABGRRow_MIPS_DSPR2;
+    I422ToABGRRow = I422ToABGRRow_MIPS_DSPR2;
   }
 #endif
 
   for (y = 0; y < height; ++y) {
-    J422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
+    I422ToABGRRow(src_y, src_u, src_v, dst_abgr, &kYuvJConstants, width);
     dst_abgr += dst_stride_abgr;
     src_y += src_stride_y;
     src_u += src_stride_u;
@@ -1620,11 +1641,12 @@ int H420ToARGB(const uint8* src_y, int src_stride_y,
                uint8* dst_argb, int dst_stride_argb,
                int width, int height) {
   int y;
-  void (*H422ToARGBRow)(const uint8* y_buf,
+  void (*I422ToARGBRow)(const uint8* y_buf,
                         const uint8* u_buf,
                         const uint8* v_buf,
                         uint8* rgb_buf,
-                        int width) = H422ToARGBRow_C;
+                        struct YuvConstants* yuvconstants,
+                        int width) = I422ToARGBRow_C;
   if (!src_y || !src_u || !src_v || !dst_argb ||
       width <= 0 || height == 0) {
     return -1;
@@ -1635,42 +1657,42 @@ int H420ToARGB(const uint8* src_y, int src_stride_y,
     dst_argb = dst_argb + (height - 1) * dst_stride_argb;
     dst_stride_argb = -dst_stride_argb;
   }
-#if defined(HAS_H422TOARGBROW_SSSE3)
+#if defined(HAS_I422TOARGBROW_SSSE3)
   if (TestCpuFlag(kCpuHasSSSE3)) {
-    H422ToARGBRow = H422ToARGBRow_Any_SSSE3;
+    I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
     if (IS_ALIGNED(width, 8)) {
-      H422ToARGBRow = H422ToARGBRow_SSSE3;
+      I422ToARGBRow = I422ToARGBRow_SSSE3;
     }
   }
 #endif
-#if defined(HAS_H422TOARGBROW_AVX2)
+#if defined(HAS_I422TOARGBROW_AVX2)
   if (TestCpuFlag(kCpuHasAVX2)) {
-    H422ToARGBRow = H422ToARGBRow_Any_AVX2;
+    I422ToARGBRow = I422ToARGBRow_Any_AVX2;
     if (IS_ALIGNED(width, 16)) {
-      H422ToARGBRow = H422ToARGBRow_AVX2;
+      I422ToARGBRow = I422ToARGBRow_AVX2;
     }
   }
 #endif
-#if defined(HAS_H422TOARGBROW_NEON)
+#if defined(HAS_I422TOARGBROW_NEON)
   if (TestCpuFlag(kCpuHasNEON)) {
-    H422ToARGBRow = H422ToARGBRow_Any_NEON;
+    I422ToARGBRow = I422ToARGBRow_Any_NEON;
     if (IS_ALIGNED(width, 8)) {
-      H422ToARGBRow = H422ToARGBRow_NEON;
+      I422ToARGBRow = I422ToARGBRow_NEON;
     }
   }
 #endif
-#if defined(HAS_H422TOARGBROW_MIPS_DSPR2)
+#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
       IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
       IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
       IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
       IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
-    H422ToARGBRow = H422ToARGBRow_MIPS_DSPR2;
+    I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
   }
 #endif
 
   for (y = 0; y < height; ++y) {
-    H422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
+    I422ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvHConstants, width);
     dst_argb += dst_stride_argb;
     src_y += src_stride_y;
     if (y & 1) {
@@ -1689,11 +1711,12 @@ int H422ToARGB(const uint8* src_y, int src_stride_y,
                uint8* dst_argb, int dst_stride_argb,
                int width, int height) {
   int y;
-  void (*H422ToARGBRow)(const uint8* y_buf,
+  void (*I422ToARGBRow)(const uint8* y_buf,
                         const uint8* u_buf,
                         const uint8* v_buf,
                         uint8* rgb_buf,
-                        int width) = H422ToARGBRow_C;
+                        struct YuvConstants* yuvconstants,
+                        int width) = I422ToARGBRow_C;
   if (!src_y || !src_u || !src_v ||
       !dst_argb ||
       width <= 0 || height == 0) {
@@ -1714,42 +1737,42 @@ int H422ToARGB(const uint8* src_y, int src_stride_y,
     height = 1;
     src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0;
   }
-#if defined(HAS_H422TOARGBROW_SSSE3)
+#if defined(HAS_I422TOARGBROW_SSSE3)
   if (TestCpuFlag(kCpuHasSSSE3)) {
-    H422ToARGBRow = H422ToARGBRow_Any_SSSE3;
+    I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
     if (IS_ALIGNED(width, 8)) {
-      H422ToARGBRow = H422ToARGBRow_SSSE3;
+      I422ToARGBRow = I422ToARGBRow_SSSE3;
     }
   }
 #endif
-#if defined(HAS_H422TOARGBROW_AVX2)
+#if defined(HAS_I422TOARGBROW_AVX2)
   if (TestCpuFlag(kCpuHasAVX2)) {
-    H422ToARGBRow = H422ToARGBRow_Any_AVX2;
+    I422ToARGBRow = I422ToARGBRow_Any_AVX2;
     if (IS_ALIGNED(width, 16)) {
-      H422ToARGBRow = H422ToARGBRow_AVX2;
+      I422ToARGBRow = I422ToARGBRow_AVX2;
     }
   }
 #endif
-#if defined(HAS_H422TOARGBROW_NEON)
+#if defined(HAS_I422TOARGBROW_NEON)
   if (TestCpuFlag(kCpuHasNEON)) {
-    H422ToARGBRow = H422ToARGBRow_Any_NEON;
+    I422ToARGBRow = I422ToARGBRow_Any_NEON;
     if (IS_ALIGNED(width, 8)) {
-      H422ToARGBRow = H422ToARGBRow_NEON;
+      I422ToARGBRow = I422ToARGBRow_NEON;
     }
   }
 #endif
-#if defined(HAS_H422TOARGBROW_MIPS_DSPR2)
+#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
       IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
       IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
       IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
       IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
-    H422ToARGBRow = H422ToARGBRow_MIPS_DSPR2;
+    I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
   }
 #endif
 
   for (y = 0; y < height; ++y) {
-    H422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
+    I422ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvHConstants, width);
     dst_argb += dst_stride_argb;
     src_y += src_stride_y;
     src_u += src_stride_u;
@@ -1766,11 +1789,12 @@ int H420ToABGR(const uint8* src_y, int src_stride_y,
                uint8* dst_abgr, int dst_stride_abgr,
                int width, int height) {
   int y;
-  void (*H422ToABGRRow)(const uint8* y_buf,
+  void (*I422ToABGRRow)(const uint8* y_buf,
                         const uint8* u_buf,
                         const uint8* v_buf,
                         uint8* rgb_buf,
-                        int width) = H422ToABGRRow_C;
+                        struct YuvConstants* yuvconstants,
+                        int width) = I422ToABGRRow_C;
   if (!src_y || !src_u || !src_v || !dst_abgr ||
       width <= 0 || height == 0) {
     return -1;
@@ -1781,42 +1805,42 @@ int H420ToABGR(const uint8* src_y, int src_stride_y,
     dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr;
     dst_stride_abgr = -dst_stride_abgr;
   }
-#if defined(HAS_H422TOABGRROW_SSSE3)
+#if defined(HAS_I422TOABGRROW_SSSE3)
   if (TestCpuFlag(kCpuHasSSSE3)) {
-    H422ToABGRRow = H422ToABGRRow_Any_SSSE3;
+    I422ToABGRRow = I422ToABGRRow_Any_SSSE3;
     if (IS_ALIGNED(width, 8)) {
-      H422ToABGRRow = H422ToABGRRow_SSSE3;
+      I422ToABGRRow = I422ToABGRRow_SSSE3;
     }
   }
 #endif
-#if defined(HAS_H422TOABGRROW_AVX2)
+#if defined(HAS_I422TOABGRROW_AVX2)
   if (TestCpuFlag(kCpuHasAVX2)) {
-    H422ToABGRRow = H422ToABGRRow_Any_AVX2;
+    I422ToABGRRow = I422ToABGRRow_Any_AVX2;
     if (IS_ALIGNED(width, 16)) {
-      H422ToABGRRow = H422ToABGRRow_AVX2;
+      I422ToABGRRow = I422ToABGRRow_AVX2;
     }
   }
 #endif
-#if defined(HAS_H422TOABGRROW_NEON)
+#if defined(HAS_I422TOABGRROW_NEON)
   if (TestCpuFlag(kCpuHasNEON)) {
-    H422ToABGRRow = H422ToABGRRow_Any_NEON;
+    I422ToABGRRow = I422ToABGRRow_Any_NEON;
     if (IS_ALIGNED(width, 8)) {
-      H422ToABGRRow = H422ToABGRRow_NEON;
+      I422ToABGRRow = I422ToABGRRow_NEON;
     }
   }
 #endif
-#if defined(HAS_H422TOABGRROW_MIPS_DSPR2)
+#if defined(HAS_I422TOABGRROW_MIPS_DSPR2)
   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
       IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
       IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
       IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
       IS_ALIGNED(dst_abgr, 4) && IS_ALIGNED(dst_stride_abgr, 4)) {
-    H422ToABGRRow = H422ToABGRRow_MIPS_DSPR2;
+    I422ToABGRRow = I422ToABGRRow_MIPS_DSPR2;
   }
 #endif
 
   for (y = 0; y < height; ++y) {
-    H422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
+    I422ToABGRRow(src_y, src_u, src_v, dst_abgr, &kYuvHConstants, width);
     dst_abgr += dst_stride_abgr;
     src_y += src_stride_y;
     if (y & 1) {
@@ -1835,11 +1859,12 @@ int H422ToABGR(const uint8* src_y, int src_stride_y,
                uint8* dst_abgr, int dst_stride_abgr,
                int width, int height) {
   int y;
-  void (*H422ToABGRRow)(const uint8* y_buf,
+  void (*I422ToABGRRow)(const uint8* y_buf,
                         const uint8* u_buf,
                         const uint8* v_buf,
                         uint8* rgb_buf,
-                        int width) = H422ToABGRRow_C;
+                        struct YuvConstants* yuvconstants,
+                        int width) = I422ToABGRRow_C;
   if (!src_y || !src_u || !src_v ||
       !dst_abgr ||
       width <= 0 || height == 0) {
@@ -1860,42 +1885,42 @@ int H422ToABGR(const uint8* src_y, int src_stride_y,
     height = 1;
     src_stride_y = src_stride_u = src_stride_v = dst_stride_abgr = 0;
   }
-#if defined(HAS_H422TOABGRROW_SSSE3)
+#if defined(HAS_I422TOABGRROW_SSSE3)
   if (TestCpuFlag(kCpuHasSSSE3)) {
-    H422ToABGRRow = H422ToABGRRow_Any_SSSE3;
+    I422ToABGRRow = I422ToABGRRow_Any_SSSE3;
     if (IS_ALIGNED(width, 8)) {
-      H422ToABGRRow = H422ToABGRRow_SSSE3;
+      I422ToABGRRow = I422ToABGRRow_SSSE3;
     }
   }
 #endif
-#if defined(HAS_H422TOABGRROW_AVX2)
+#if defined(HAS_I422TOABGRROW_AVX2)
   if (TestCpuFlag(kCpuHasAVX2)) {
-    H422ToABGRRow = H422ToABGRRow_Any_AVX2;
+    I422ToABGRRow = I422ToABGRRow_Any_AVX2;
     if (IS_ALIGNED(width, 16)) {
-      H422ToABGRRow = H422ToABGRRow_AVX2;
+      I422ToABGRRow = I422ToABGRRow_AVX2;
     }
   }
 #endif
-#if defined(HAS_H422TOABGRROW_NEON)
+#if defined(HAS_I422TOABGRROW_NEON)
   if (TestCpuFlag(kCpuHasNEON)) {
-    H422ToABGRRow = H422ToABGRRow_Any_NEON;
+    I422ToABGRRow = I422ToABGRRow_Any_NEON;
     if (IS_ALIGNED(width, 8)) {
-      H422ToABGRRow = H422ToABGRRow_NEON;
+      I422ToABGRRow = I422ToABGRRow_NEON;
     }
   }
 #endif
-#if defined(HAS_H422TOABGRROW_MIPS_DSPR2)
+#if defined(HAS_I422TOABGRROW_MIPS_DSPR2)
   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
       IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
       IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
       IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
       IS_ALIGNED(dst_abgr, 4) && IS_ALIGNED(dst_stride_abgr, 4)) {
-    H422ToABGRRow = H422ToABGRRow_MIPS_DSPR2;
+    I422ToABGRRow = I422ToABGRRow_MIPS_DSPR2;
   }
 #endif
 
   for (y = 0; y < height; ++y) {
-    H422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
+    I422ToABGRRow(src_y, src_u, src_v, dst_abgr, &kYuvHConstants, width);
     dst_abgr += dst_stride_abgr;
     src_y += src_stride_y;
     src_u += src_stride_u;
diff --git a/source/convert_from.cc b/source/convert_from.cc
index 31f1ac992..b9c7be904 100644
--- a/source/convert_from.cc
+++ b/source/convert_from.cc
@@ -462,6 +462,7 @@ int I420ToARGB(const uint8* src_y, int src_stride_y,
                         const uint8* u_buf,
                         const uint8* v_buf,
                         uint8* rgb_buf,
+                        struct YuvConstants* yuvconstants,
                         int width) = I422ToARGBRow_C;
   if (!src_y || !src_u || !src_v || !dst_argb ||
       width <= 0 || height == 0) {
@@ -508,7 +509,7 @@ int I420ToARGB(const uint8* src_y, int src_stride_y,
 #endif
 
   for (y = 0; y < height; ++y) {
-    I422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
+    I422ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvConstants, width);
     dst_argb += dst_stride_argb;
     src_y += src_stride_y;
     if (y & 1) {
@@ -531,6 +532,7 @@ int I420ToBGRA(const uint8* src_y, int src_stride_y,
                         const uint8* u_buf,
                         const uint8* v_buf,
                         uint8* rgb_buf,
+                        struct YuvConstants* yuvconstants,
                         int width) = I422ToBGRARow_C;
   if (!src_y || !src_u || !src_v || !dst_bgra ||
       width <= 0 || height == 0) {
@@ -577,7 +579,7 @@ int I420ToBGRA(const uint8* src_y, int src_stride_y,
 #endif
 
   for (y = 0; y < height; ++y) {
-    I422ToBGRARow(src_y, src_u, src_v, dst_bgra, width);
+    I422ToBGRARow(src_y, src_u, src_v, dst_bgra, &kYuvConstants, width);
     dst_bgra += dst_stride_bgra;
     src_y += src_stride_y;
     if (y & 1) {
@@ -600,6 +602,7 @@ int I420ToABGR(const uint8* src_y, int src_stride_y,
                         const uint8* u_buf,
                         const uint8* v_buf,
                         uint8* rgb_buf,
+                        struct YuvConstants* yuvconstants,
                         int width) = I422ToABGRRow_C;
   if (!src_y || !src_u || !src_v || !dst_abgr ||
       width <= 0 || height == 0) {
@@ -637,7 +640,7 @@ int I420ToABGR(const uint8* src_y, int src_stride_y,
 #endif
 
   for (y = 0; y < height; ++y) {
-    I422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
+    I422ToABGRRow(src_y, src_u, src_v, dst_abgr, &kYuvConstants, width);
     dst_abgr += dst_stride_abgr;
     src_y += src_stride_y;
     if (y & 1) {
@@ -660,6 +663,7 @@ int I420ToRGBA(const uint8* src_y, int src_stride_y,
                         const uint8* u_buf,
                         const uint8* v_buf,
                         uint8* rgb_buf,
+                        struct YuvConstants* yuvconstants,
                         int width) = I422ToRGBARow_C;
   if (!src_y || !src_u || !src_v || !dst_rgba ||
       width <= 0 || height == 0) {
@@ -697,7 +701,7 @@ int I420ToRGBA(const uint8* src_y, int src_stride_y,
 #endif
 
   for (y = 0; y < height; ++y) {
-    I422ToRGBARow(src_y, src_u, src_v, dst_rgba, width);
+    I422ToRGBARow(src_y, src_u, src_v, dst_rgba, &kYuvConstants, width);
     dst_rgba += dst_stride_rgba;
     src_y += src_stride_y;
     if (y & 1) {
@@ -720,6 +724,7 @@ int I420ToRGB24(const uint8* src_y, int src_stride_y,
                          const uint8* u_buf,
                          const uint8* v_buf,
                          uint8* rgb_buf,
+                         struct YuvConstants* yuvconstants,
                          int width) = I422ToRGB24Row_C;
   if (!src_y || !src_u || !src_v || !dst_rgb24 ||
       width <= 0 || height == 0) {
@@ -757,7 +762,7 @@ int I420ToRGB24(const uint8* src_y, int src_stride_y,
 #endif
 
   for (y = 0; y < height; ++y) {
-    I422ToRGB24Row(src_y, src_u, src_v, dst_rgb24, width);
+    I422ToRGB24Row(src_y, src_u, src_v, dst_rgb24, &kYuvConstants, width);
     dst_rgb24 += dst_stride_rgb24;
     src_y += src_stride_y;
     if (y & 1) {
@@ -780,6 +785,7 @@ int I420ToRAW(const uint8* src_y, int src_stride_y,
                        const uint8* u_buf,
                        const uint8* v_buf,
                        uint8* rgb_buf,
+                       struct YuvConstants* yuvconstants,
                        int width) = I422ToRAWRow_C;
   if (!src_y || !src_u || !src_v || !dst_raw ||
       width <= 0 || height == 0) {
@@ -817,7 +823,7 @@ int I420ToRAW(const uint8* src_y, int src_stride_y,
 #endif
 
   for (y = 0; y < height; ++y) {
-    I422ToRAWRow(src_y, src_u, src_v, dst_raw, width);
+    I422ToRAWRow(src_y, src_u, src_v, dst_raw, &kYuvConstants, width);
     dst_raw += dst_stride_raw;
     src_y += src_stride_y;
     if (y & 1) {
@@ -840,6 +846,7 @@ int I420ToARGB1555(const uint8* src_y, int src_stride_y,
                             const uint8* u_buf,
                             const uint8* v_buf,
                             uint8* rgb_buf,
+                            struct YuvConstants* yuvconstants,
                             int width) = I422ToARGB1555Row_C;
   if (!src_y || !src_u || !src_v || !dst_argb1555 ||
       width <= 0 || height == 0) {
@@ -877,7 +884,7 @@ int I420ToARGB1555(const uint8* src_y, int src_stride_y,
 #endif
 
   for (y = 0; y < height; ++y) {
-    I422ToARGB1555Row(src_y, src_u, src_v, dst_argb1555, width);
+    I422ToARGB1555Row(src_y, src_u, src_v, dst_argb1555, &kYuvConstants, width);
     dst_argb1555 += dst_stride_argb1555;
     src_y += src_stride_y;
     if (y & 1) {
@@ -901,6 +908,7 @@ int I420ToARGB4444(const uint8* src_y, int src_stride_y,
                             const uint8* u_buf,
                             const uint8* v_buf,
                             uint8* rgb_buf,
+                            struct YuvConstants* yuvconstants,
                             int width) = I422ToARGB4444Row_C;
   if (!src_y || !src_u || !src_v || !dst_argb4444 ||
       width <= 0 || height == 0) {
@@ -938,7 +946,7 @@ int I420ToARGB4444(const uint8* src_y, int src_stride_y,
 #endif
 
   for (y = 0; y < height; ++y) {
-    I422ToARGB4444Row(src_y, src_u, src_v, dst_argb4444, width);
+    I422ToARGB4444Row(src_y, src_u, src_v, dst_argb4444, &kYuvConstants, width);
     dst_argb4444 += dst_stride_argb4444;
     src_y += src_stride_y;
     if (y & 1) {
@@ -961,6 +969,7 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y,
                           const uint8* u_buf,
                           const uint8* v_buf,
                           uint8* rgb_buf,
+                          struct YuvConstants* yuvconstants,
                           int width) = I422ToRGB565Row_C;
   if (!src_y || !src_u || !src_v || !dst_rgb565 ||
       width <= 0 || height == 0) {
@@ -998,7 +1007,7 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y,
 #endif
 
   for (y = 0; y < height; ++y) {
-    I422ToRGB565Row(src_y, src_u, src_v, dst_rgb565, width);
+    I422ToRGB565Row(src_y, src_u, src_v, dst_rgb565, &kYuvConstants, width);
     dst_rgb565 += dst_stride_rgb565;
     src_y += src_stride_y;
     if (y & 1) {
@@ -1029,6 +1038,7 @@ int I420ToRGB565Dither(const uint8* src_y, int src_stride_y,
                         const uint8* u_buf,
                         const uint8* v_buf,
                         uint8* rgb_buf,
+                        struct YuvConstants* yuvconstants,
                         int width) = I422ToARGBRow_C;
   void (*ARGBToRGB565DitherRow)(const uint8* src_argb, uint8* dst_rgb,
       const uint32 dither4, int pix) = ARGBToRGB565DitherRow_C;
@@ -1105,7 +1115,7 @@ int I420ToRGB565Dither(const uint8* src_y, int src_stride_y,
     // Allocate a row of argb.
     align_buffer_64(row_argb, width * 4);
     for (y = 0; y < height; ++y) {
-      I422ToARGBRow(src_y, src_u, src_v, row_argb, width);
+      I422ToARGBRow(src_y, src_u, src_v, row_argb, &kYuvConstants, width);
       ARGBToRGB565DitherRow(row_argb, dst_rgb565,
                             *(uint32*)(dither4x4 + ((y & 3) << 2)), width);
       dst_rgb565 += dst_stride_rgb565;
diff --git a/source/planar_functions.cc b/source/planar_functions.cc
index 3cca5f48b..2299ab892 100644
--- a/source/planar_functions.cc
+++ b/source/planar_functions.cc
@@ -287,9 +287,9 @@ int YUY2ToI422(const uint8* src_yuy2, int src_stride_yuy2,
                int width, int height) {
   int y;
   void (*YUY2ToUV422Row)(const uint8* src_yuy2,
-                         uint8* dst_u, uint8* dst_v, int pix) =
+                         uint8* dst_u, uint8* dst_v, int width) =
       YUY2ToUV422Row_C;
-  void (*YUY2ToYRow)(const uint8* src_yuy2, uint8* dst_y, int pix) =
+  void (*YUY2ToYRow)(const uint8* src_yuy2, uint8* dst_y, int width) =
       YUY2ToYRow_C;
   // Negative height means invert the image.
   if (height < 0) {
@@ -359,10 +359,10 @@ int UYVYToI422(const uint8* src_uyvy, int src_stride_uyvy,
                int width, int height) {
   int y;
   void (*UYVYToUV422Row)(const uint8* src_uyvy,
-                         uint8* dst_u, uint8* dst_v, int pix) =
+                         uint8* dst_u, uint8* dst_v, int width) =
       UYVYToUV422Row_C;
   void (*UYVYToYRow)(const uint8* src_uyvy,
-                     uint8* dst_y, int pix) = UYVYToYRow_C;
+                     uint8* dst_y, int width) = UYVYToYRow_C;
   // Negative height means invert the image.
   if (height < 0) {
     height = -height;
@@ -790,6 +790,7 @@ int I422ToBGRA(const uint8* src_y, int src_stride_y,
                         const uint8* u_buf,
                         const uint8* v_buf,
                         uint8* rgb_buf,
+                        struct YuvConstants* yuvconstants,
                         int width) = I422ToBGRARow_C;
   if (!src_y || !src_u || !src_v ||
       !dst_bgra ||
@@ -846,7 +847,7 @@ int I422ToBGRA(const uint8* src_y, int src_stride_y,
 #endif
 
   for (y = 0; y < height; ++y) {
-    I422ToBGRARow(src_y, src_u, src_v, dst_bgra, width);
+    I422ToBGRARow(src_y, src_u, src_v, dst_bgra, &kYuvConstants, width);
     dst_bgra += dst_stride_bgra;
     src_y += src_stride_y;
     src_u += src_stride_u;
@@ -867,6 +868,7 @@ int I422ToABGR(const uint8* src_y, int src_stride_y,
                         const uint8* u_buf,
                         const uint8* v_buf,
                         uint8* rgb_buf,
+                        struct YuvConstants* yuvconstants,
                         int width) = I422ToABGRRow_C;
   if (!src_y || !src_u || !src_v ||
       !dst_abgr ||
@@ -914,7 +916,7 @@ int I422ToABGR(const uint8* src_y, int src_stride_y,
 #endif
 
   for (y = 0; y < height; ++y) {
-    I422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
+    I422ToABGRRow(src_y, src_u, src_v, dst_abgr, &kYuvConstants, width);
     dst_abgr += dst_stride_abgr;
     src_y += src_stride_y;
     src_u += src_stride_u;
@@ -935,6 +937,7 @@ int I422ToRGBA(const uint8* src_y, int src_stride_y,
                         const uint8* u_buf,
                         const uint8* v_buf,
                         uint8* rgb_buf,
+                        struct YuvConstants* yuvconstants,
                         int width) = I422ToRGBARow_C;
   if (!src_y || !src_u || !src_v ||
       !dst_rgba ||
@@ -982,7 +985,7 @@ int I422ToRGBA(const uint8* src_y, int src_stride_y,
 #endif
 
   for (y = 0; y < height; ++y) {
-    I422ToRGBARow(src_y, src_u, src_v, dst_rgba, width);
+    I422ToRGBARow(src_y, src_u, src_v, dst_rgba, &kYuvConstants, width);
     dst_rgba += dst_stride_rgba;
     src_y += src_stride_y;
     src_u += src_stride_u;
@@ -1001,6 +1004,7 @@ int NV12ToRGB565(const uint8* src_y, int src_stride_y,
   void (*NV12ToRGB565Row)(const uint8* y_buf,
                           const uint8* uv_buf,
                           uint8* rgb_buf,
+                          struct YuvConstants* yuvconstants,
                           int width) = NV12ToRGB565Row_C;
   if (!src_y || !src_uv || !dst_rgb565 ||
       width <= 0 || height == 0) {
@@ -1038,7 +1042,7 @@ int NV12ToRGB565(const uint8* src_y, int src_stride_y,
 #endif
 
   for (y = 0; y < height; ++y) {
-    NV12ToRGB565Row(src_y, src_uv, dst_rgb565, width);
+    NV12ToRGB565Row(src_y, src_uv, dst_rgb565, &kYuvConstants, width);
     dst_rgb565 += dst_stride_rgb565;
     src_y += src_stride_y;
     if (y & 1) {
@@ -1055,10 +1059,11 @@ int NV21ToRGB565(const uint8* src_y, int src_stride_y,
                  uint8* dst_rgb565, int dst_stride_rgb565,
                  int width, int height) {
   int y;
-  void (*NV21ToRGB565Row)(const uint8* y_buf,
+  void (*NV12ToRGB565Row)(const uint8* y_buf,
                           const uint8* src_vu,
                           uint8* rgb_buf,
-                          int width) = NV21ToRGB565Row_C;
+                          struct YuvConstants* yuvconstants,
+                          int width) = NV12ToRGB565Row_C;
   if (!src_y || !src_vu || !dst_rgb565 ||
       width <= 0 || height == 0) {
     return -1;
@@ -1069,33 +1074,33 @@ int NV21ToRGB565(const uint8* src_y, int src_stride_y,
     dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
     dst_stride_rgb565 = -dst_stride_rgb565;
   }
-#if defined(HAS_NV21TORGB565ROW_SSSE3)
+#if defined(HAS_NV12TORGB565ROW_SSSE3)
   if (TestCpuFlag(kCpuHasSSSE3)) {
-    NV21ToRGB565Row = NV21ToRGB565Row_Any_SSSE3;
+    NV12ToRGB565Row = NV12ToRGB565Row_Any_SSSE3;
     if (IS_ALIGNED(width, 8)) {
-      NV21ToRGB565Row = NV21ToRGB565Row_SSSE3;
+      NV12ToRGB565Row = NV12ToRGB565Row_SSSE3;
     }
   }
 #endif
-#if defined(HAS_NV21TORGB565ROW_AVX2)
+#if defined(HAS_NV12TORGB565ROW_AVX2)
   if (TestCpuFlag(kCpuHasAVX2)) {
-    NV21ToRGB565Row = NV21ToRGB565Row_Any_AVX2;
+    NV12ToRGB565Row = NV12ToRGB565Row_Any_AVX2;
     if (IS_ALIGNED(width, 16)) {
-      NV21ToRGB565Row = NV21ToRGB565Row_AVX2;
+      NV12ToRGB565Row = NV12ToRGB565Row_AVX2;
     }
   }
 #endif
-#if defined(HAS_NV21TORGB565ROW_NEON)
+#if defined(HAS_NV12TORGB565ROW_NEON)
   if (TestCpuFlag(kCpuHasNEON)) {
-    NV21ToRGB565Row = NV21ToRGB565Row_Any_NEON;
+    NV12ToRGB565Row = NV12ToRGB565Row_Any_NEON;
     if (IS_ALIGNED(width, 8)) {
-      NV21ToRGB565Row = NV21ToRGB565Row_NEON;
+      NV12ToRGB565Row = NV12ToRGB565Row_NEON;
     }
   }
 #endif
 
   for (y = 0; y < height; ++y) {
-    NV21ToRGB565Row(src_y, src_vu, dst_rgb565, width);
+    NV12ToRGB565Row(src_y, src_vu, dst_rgb565, &kYvuConstants, width);
     dst_rgb565 += dst_stride_rgb565;
     src_y += src_stride_y;
     if (y & 1) {
@@ -1110,7 +1115,7 @@ void SetPlane(uint8* dst_y, int dst_stride_y,
               int width, int height,
               uint32 value) {
   int y;
-  void (*SetRow)(uint8* dst, uint8 value, int pix) = SetRow_C;
+  void (*SetRow)(uint8* dst, uint8 value, int width) = SetRow_C;
   if (height < 0) {
     height = -height;
     dst_y = dst_y + (height - 1) * dst_stride_y;
@@ -1186,7 +1191,7 @@ int ARGBRect(uint8* dst_argb, int dst_stride_argb,
              int width, int height,
              uint32 value) {
   int y;
-  void (*ARGBSetRow)(uint8* dst_argb, uint32 value, int pix) = ARGBSetRow_C;
+  void (*ARGBSetRow)(uint8* dst_argb, uint32 value, int width) = ARGBSetRow_C;
   if (!dst_argb ||
       width <= 0 || height == 0 ||
       dst_x < 0 || dst_y < 0) {
@@ -1909,7 +1914,7 @@ int ARGBShuffle(const uint8* src_bgra, int src_stride_bgra,
                 const uint8* shuffler, int width, int height) {
   int y;
   void (*ARGBShuffleRow)(const uint8* src_bgra, uint8* dst_argb,
-                         const uint8* shuffler, int pix) = ARGBShuffleRow_C;
+                         const uint8* shuffler, int width) = ARGBShuffleRow_C;
   if (!src_bgra || !dst_argb ||
       width <= 0 || height == 0) {
     return -1;
@@ -1976,7 +1981,7 @@ static int ARGBSobelize(const uint8* src_argb, int src_stride_argb,
                                          const uint8* src_sobely,
                                          uint8* dst, int width)) {
   int y;
-  void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_g, int pix) =
+  void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_g, int width) =
       ARGBToYJRow_C;
   void (*SobelYRow)(const uint8* src_y0, const uint8* src_y1,
                     uint8* dst_sobely, int width) = SobelYRow_C;
@@ -2360,8 +2365,8 @@ int YUY2ToNV12(const uint8* src_yuy2, int src_stride_yuy2,
                int width, int height) {
   int y;
   int halfwidth = (width + 1) >> 1;
-  void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) =
-      SplitUVRow_C;
+  void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
+                     int width) = SplitUVRow_C;
   void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
                          ptrdiff_t src_stride, int dst_width,
                          int source_y_fraction) = InterpolateRow_C;
@@ -2464,8 +2469,8 @@ int UYVYToNV12(const uint8* src_uyvy, int src_stride_uyvy,
                int width, int height) {
   int y;
   int halfwidth = (width + 1) >> 1;
-  void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) =
-      SplitUVRow_C;
+  void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
+                     int width) = SplitUVRow_C;
   void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
                          ptrdiff_t src_stride, int dst_width,
                          int source_y_fraction) = InterpolateRow_C;
diff --git a/source/row_any.cc b/source/row_any.cc
index c309499ca..46cbdc759 100644
--- a/source/row_any.cc
+++ b/source/row_any.cc
@@ -40,103 +40,9 @@ extern "C" {
       memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 192,                      \
              SS(r, DUVSHIFT) * BPP);                                           \
     }
-
-#ifdef HAS_I422TOARGBROW_SSSE3
-ANY31(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_SSSE3, 1, 0, 4, 7)
-ANY31(I422ToABGRRow_Any_SSSE3, I422ToABGRRow_SSSE3, 1, 0, 4, 7)
-ANY31(J422ToARGBRow_Any_SSSE3, J422ToARGBRow_SSSE3, 1, 0, 4, 7)
-ANY31(J422ToABGRRow_Any_SSSE3, J422ToABGRRow_SSSE3, 1, 0, 4, 7)
-ANY31(H422ToARGBRow_Any_SSSE3, H422ToARGBRow_SSSE3, 1, 0, 4, 7)
-ANY31(H422ToABGRRow_Any_SSSE3, H422ToABGRRow_SSSE3, 1, 0, 4, 7)
-#endif
-#ifdef HAS_I444TOARGBROW_SSSE3
-ANY31(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_SSSE3, 0, 0, 4, 7)
-ANY31(I411ToARGBRow_Any_SSSE3, I411ToARGBRow_SSSE3, 2, 0, 4, 7)
-ANY31(I422ToBGRARow_Any_SSSE3, I422ToBGRARow_SSSE3, 1, 0, 4, 7)
-ANY31(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_SSSE3, 1, 0, 4, 7)
-ANY31(I422ToARGB4444Row_Any_SSSE3, I422ToARGB4444Row_SSSE3, 1, 0, 2, 7)
-ANY31(I422ToARGB1555Row_Any_SSSE3, I422ToARGB1555Row_SSSE3, 1, 0, 2, 7)
-ANY31(I422ToRGB565Row_Any_SSSE3, I422ToRGB565Row_SSSE3, 1, 0, 2, 7)
-ANY31(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, 1, 0, 3, 7)
-ANY31(I422ToRAWRow_Any_SSSE3, I422ToRAWRow_SSSE3, 1, 0, 3, 7)
+#ifdef HAS_I422TOYUY2ROW_SSE2
 ANY31(I422ToYUY2Row_Any_SSE2, I422ToYUY2Row_SSE2, 1, 1, 4, 15)
 ANY31(I422ToUYVYRow_Any_SSE2, I422ToUYVYRow_SSE2, 1, 1, 4, 15)
-#endif  // HAS_I444TOARGBROW_SSSE3
-#ifdef HAS_I444TOABGRROW_SSSE3
-ANY31(I444ToABGRRow_Any_SSSE3, I444ToABGRRow_SSSE3, 0, 0, 4, 7)
-#endif
-#ifdef HAS_I422TORGB24ROW_AVX2
-ANY31(I422ToRGB24Row_Any_AVX2, I422ToRGB24Row_AVX2, 1, 0, 3, 15)
-#endif
-#ifdef HAS_I422TORAWROW_AVX2
-ANY31(I422ToRAWRow_Any_AVX2, I422ToRAWRow_AVX2, 1, 0, 3, 15)
-#endif
-#ifdef HAS_J422TOARGBROW_AVX2
-ANY31(J422ToARGBRow_Any_AVX2, J422ToARGBRow_AVX2, 1, 0, 4, 15)
-#endif
-#ifdef HAS_J422TOABGRROW_AVX2
-ANY31(J422ToABGRRow_Any_AVX2, J422ToABGRRow_AVX2, 1, 0, 4, 15)
-#endif
-#ifdef HAS_H422TOARGBROW_AVX2
-ANY31(H422ToARGBRow_Any_AVX2, H422ToARGBRow_AVX2, 1, 0, 4, 15)
-#endif
-#ifdef HAS_H422TOABGRROW_AVX2
-ANY31(H422ToABGRRow_Any_AVX2, H422ToABGRRow_AVX2, 1, 0, 4, 15)
-#endif
-#ifdef HAS_I422TOARGBROW_AVX2
-ANY31(I422ToARGBRow_Any_AVX2, I422ToARGBRow_AVX2, 1, 0, 4, 15)
-#endif
-#ifdef HAS_I422TOBGRAROW_AVX2
-ANY31(I422ToBGRARow_Any_AVX2, I422ToBGRARow_AVX2, 1, 0, 4, 15)
-#endif
-#ifdef HAS_I422TORGBAROW_AVX2
-ANY31(I422ToRGBARow_Any_AVX2, I422ToRGBARow_AVX2, 1, 0, 4, 15)
-#endif
-#ifdef HAS_I422TOABGRROW_AVX2
-ANY31(I422ToABGRRow_Any_AVX2, I422ToABGRRow_AVX2, 1, 0, 4, 15)
-#endif
-#ifdef HAS_I444TOARGBROW_AVX2
-ANY31(I444ToARGBRow_Any_AVX2, I444ToARGBRow_AVX2, 0, 0, 4, 15)
-#endif
-#ifdef HAS_I444TOABGRROW_AVX2
-ANY31(I444ToABGRRow_Any_AVX2, I444ToABGRRow_AVX2, 0, 0, 4, 15)
-#endif
-#ifdef HAS_I411TOARGBROW_AVX2
-ANY31(I411ToARGBRow_Any_AVX2, I411ToARGBRow_AVX2, 2, 0, 4, 15)
-#endif
-#ifdef HAS_I422TOARGB4444ROW_AVX2
-ANY31(I422ToARGB4444Row_Any_AVX2, I422ToARGB4444Row_AVX2, 1, 0, 2, 7)
-#endif
-#ifdef HAS_I422TOARGB1555ROW_AVX2
-ANY31(I422ToARGB1555Row_Any_AVX2, I422ToARGB1555Row_AVX2, 1, 0, 2, 7)
-#endif
-#ifdef HAS_I422TORGB565ROW_AVX2
-ANY31(I422ToRGB565Row_Any_AVX2, I422ToRGB565Row_AVX2, 1, 0, 2, 7)
-#endif
-#ifdef HAS_I422TOARGBROW_NEON
-ANY31(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, 0, 0, 4, 7)
-ANY31(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, 1, 0, 4, 7)
-ANY31(I411ToARGBRow_Any_NEON, I411ToARGBRow_NEON, 2, 0, 4, 7)
-ANY31(I422ToBGRARow_Any_NEON, I422ToBGRARow_NEON, 1, 0, 4, 7)
-ANY31(I422ToABGRRow_Any_NEON, I422ToABGRRow_NEON, 1, 0, 4, 7)
-ANY31(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, 1, 0, 4, 7)
-ANY31(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, 1, 0, 3, 7)
-ANY31(I422ToRAWRow_Any_NEON, I422ToRAWRow_NEON, 1, 0, 3, 7)
-ANY31(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, 1, 0, 2, 7)
-ANY31(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, 1, 0, 2, 7)
-ANY31(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, 1, 0, 2, 7)
-#endif
-#ifdef HAS_J422TOARGBROW_NEON
-ANY31(J422ToARGBRow_Any_NEON, J422ToARGBRow_NEON, 1, 0, 4, 7)
-#endif
-#ifdef HAS_J422TOABGRROW_NEON
-ANY31(J422ToABGRRow_Any_NEON, J422ToABGRRow_NEON, 1, 0, 4, 7)
-#endif
-#ifdef HAS_H422TOARGBROW_NEON
-ANY31(H422ToARGBRow_Any_NEON, H422ToARGBRow_NEON, 1, 0, 4, 7)
-#endif
-#ifdef HAS_H422TOABGRROW_NEON
-ANY31(H422ToABGRRow_Any_NEON, H422ToABGRRow_NEON, 1, 0, 4, 7)
 #endif
 #ifdef HAS_I422TOYUY2ROW_NEON
 ANY31(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, 1, 1, 4, 15)
@@ -144,7 +50,97 @@ ANY31(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, 1, 1, 4, 15)
 #ifdef HAS_I422TOUYVYROW_NEON
 ANY31(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, 1, 1, 4, 15)
 #endif
-#undef ANY31
+#undef ANY31C
+
+// Any 3 planes to 1 with yuvconstants
+#define ANY31C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK)                \
+    void NAMEANY(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf,   \
+                 uint8* dst_ptr, struct YuvConstants* yuvconstants,            \
+                 int width) {                                                  \
+      SIMD_ALIGNED(uint8 temp[64 * 4]);                                        \
+      memset(temp, 0, 64 * 3);  /* for YUY2 and msan */                        \
+      int r = width & MASK;                                                    \
+      int n = width & ~MASK;                                                   \
+      if (n > 0) {                                                             \
+        ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n);               \
+      }                                                                        \
+      memcpy(temp, y_buf + n, r);                                              \
+      memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT));               \
+      memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT));              \
+      ANY_SIMD(temp, temp + 64, temp + 128, temp + 192,                        \
+               yuvconstants, MASK + 1);                                        \
+      memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 192,                      \
+             SS(r, DUVSHIFT) * BPP);                                           \
+    }
+
+#ifdef HAS_I422TOARGBROW_SSSE3
+ANY31C(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_SSSE3, 1, 0, 4, 7)
+ANY31C(I422ToABGRRow_Any_SSSE3, I422ToABGRRow_SSSE3, 1, 0, 4, 7)
+#endif
+#ifdef HAS_I444TOARGBROW_SSSE3
+ANY31C(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_SSSE3, 0, 0, 4, 7)
+ANY31C(I411ToARGBRow_Any_SSSE3, I411ToARGBRow_SSSE3, 2, 0, 4, 7)
+ANY31C(I422ToBGRARow_Any_SSSE3, I422ToBGRARow_SSSE3, 1, 0, 4, 7)
+ANY31C(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_SSSE3, 1, 0, 4, 7)
+ANY31C(I422ToARGB4444Row_Any_SSSE3, I422ToARGB4444Row_SSSE3, 1, 0, 2, 7)
+ANY31C(I422ToARGB1555Row_Any_SSSE3, I422ToARGB1555Row_SSSE3, 1, 0, 2, 7)
+ANY31C(I422ToRGB565Row_Any_SSSE3, I422ToRGB565Row_SSSE3, 1, 0, 2, 7)
+ANY31C(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, 1, 0, 3, 7)
+ANY31C(I422ToRAWRow_Any_SSSE3, I422ToRAWRow_SSSE3, 1, 0, 3, 7)
+#endif  // HAS_I444TOARGBROW_SSSE3
+#ifdef HAS_I444TOABGRROW_SSSE3
+ANY31C(I444ToABGRRow_Any_SSSE3, I444ToABGRRow_SSSE3, 0, 0, 4, 7)
+#endif
+#ifdef HAS_I422TORGB24ROW_AVX2
+ANY31C(I422ToRGB24Row_Any_AVX2, I422ToRGB24Row_AVX2, 1, 0, 3, 15)
+#endif
+#ifdef HAS_I422TORAWROW_AVX2
+ANY31C(I422ToRAWRow_Any_AVX2, I422ToRAWRow_AVX2, 1, 0, 3, 15)
+#endif
+#ifdef HAS_I422TOARGBROW_AVX2
+ANY31C(I422ToARGBRow_Any_AVX2, I422ToARGBRow_AVX2, 1, 0, 4, 15)
+#endif
+#ifdef HAS_I422TOBGRAROW_AVX2
+ANY31C(I422ToBGRARow_Any_AVX2, I422ToBGRARow_AVX2, 1, 0, 4, 15)
+#endif
+#ifdef HAS_I422TORGBAROW_AVX2
+ANY31C(I422ToRGBARow_Any_AVX2, I422ToRGBARow_AVX2, 1, 0, 4, 15)
+#endif
+#ifdef HAS_I422TOABGRROW_AVX2
+ANY31C(I422ToABGRRow_Any_AVX2, I422ToABGRRow_AVX2, 1, 0, 4, 15)
+#endif
+#ifdef HAS_I444TOARGBROW_AVX2
+ANY31C(I444ToARGBRow_Any_AVX2, I444ToARGBRow_AVX2, 0, 0, 4, 15)
+#endif
+#ifdef HAS_I444TOABGRROW_AVX2
+ANY31C(I444ToABGRRow_Any_AVX2, I444ToABGRRow_AVX2, 0, 0, 4, 15)
+#endif
+#ifdef HAS_I411TOARGBROW_AVX2
+ANY31C(I411ToARGBRow_Any_AVX2, I411ToARGBRow_AVX2, 2, 0, 4, 15)
+#endif
+#ifdef HAS_I422TOARGB4444ROW_AVX2
+ANY31C(I422ToARGB4444Row_Any_AVX2, I422ToARGB4444Row_AVX2, 1, 0, 2, 7)
+#endif
+#ifdef HAS_I422TOARGB1555ROW_AVX2
+ANY31C(I422ToARGB1555Row_Any_AVX2, I422ToARGB1555Row_AVX2, 1, 0, 2, 7)
+#endif
+#ifdef HAS_I422TORGB565ROW_AVX2
+ANY31C(I422ToRGB565Row_Any_AVX2, I422ToRGB565Row_AVX2, 1, 0, 2, 7)
+#endif
+#ifdef HAS_I422TOARGBROW_NEON
+ANY31C(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, 0, 0, 4, 7)
+ANY31C(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, 1, 0, 4, 7)
+ANY31C(I411ToARGBRow_Any_NEON, I411ToARGBRow_NEON, 2, 0, 4, 7)
+ANY31C(I422ToBGRARow_Any_NEON, I422ToBGRARow_NEON, 1, 0, 4, 7)
+ANY31C(I422ToABGRRow_Any_NEON, I422ToABGRRow_NEON, 1, 0, 4, 7)
+ANY31C(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, 1, 0, 4, 7)
+ANY31C(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, 1, 0, 3, 7)
+ANY31C(I422ToRAWRow_Any_NEON, I422ToRAWRow_NEON, 1, 0, 3, 7)
+ANY31C(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, 1, 0, 2, 7)
+ANY31C(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, 1, 0, 2, 7)
+ANY31C(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, 1, 0, 2, 7)
+#endif
+#undef ANY31C
 
 // Any 2 planes to 1.
 #define ANY21(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK)              \
@@ -164,32 +160,6 @@ ANY31(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, 1, 1, 4, 15)
       memcpy(dst_ptr + n * BPP, temp + 128, r * BPP);                          \
     }
 
-// Biplanar to RGB.
-#ifdef HAS_NV12TOARGBROW_SSSE3
-ANY21(NV12ToARGBRow_Any_SSSE3, NV12ToARGBRow_SSSE3, 1, 1, 2, 4, 7)
-ANY21(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_SSSE3, 1, 1, 2, 4, 7)
-#endif
-#ifdef HAS_NV12TOARGBROW_AVX2
-ANY21(NV12ToARGBRow_Any_AVX2, NV12ToARGBRow_AVX2, 1, 1, 2, 4, 15)
-ANY21(NV21ToARGBRow_Any_AVX2, NV21ToARGBRow_AVX2, 1, 1, 2, 4, 15)
-#endif
-#ifdef HAS_NV12TOARGBROW_NEON
-ANY21(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, 1, 1, 2, 4, 7)
-ANY21(NV21ToARGBRow_Any_NEON, NV21ToARGBRow_NEON, 1, 1, 2, 4, 7)
-#endif
-#ifdef HAS_NV12TORGB565ROW_SSSE3
-ANY21(NV12ToRGB565Row_Any_SSSE3, NV12ToRGB565Row_SSSE3, 1, 1, 2, 2, 7)
-ANY21(NV21ToRGB565Row_Any_SSSE3, NV21ToRGB565Row_SSSE3, 1, 1, 2, 2, 7)
-#endif
-#ifdef HAS_NV12TORGB565ROW_AVX2
-ANY21(NV12ToRGB565Row_Any_AVX2, NV12ToRGB565Row_AVX2, 1, 1, 2, 2, 15)
-ANY21(NV21ToRGB565Row_Any_AVX2, NV21ToRGB565Row_AVX2, 1, 1, 2, 2, 15)
-#endif
-#ifdef HAS_NV12TORGB565ROW_NEON
-ANY21(NV12ToRGB565Row_Any_NEON, NV12ToRGB565Row_NEON, 1, 1, 2, 2, 7)
-ANY21(NV21ToRGB565Row_Any_NEON, NV21ToRGB565Row_NEON, 1, 1, 2, 2, 7)
-#endif
-
 // Merge functions.
 #ifdef HAS_MERGEUVROW_SSE2
 ANY21(MergeUVRow_Any_SSE2, MergeUVRow_SSE2, 0, 1, 1, 2, 15)
@@ -249,6 +219,46 @@ ANY21(SobelXYRow_Any_NEON, SobelXYRow_NEON, 0, 1, 1, 4, 7)
 #endif
 #undef ANY21
 
+// Any 2 planes to 1 with yuvconstants
+#define ANY21C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK)             \
+    void NAMEANY(const uint8* y_buf, const uint8* uv_buf,                      \
+                 uint8* dst_ptr, struct YuvConstants* yuvconstants,            \
+                 int width) {                                                  \
+      SIMD_ALIGNED(uint8 temp[64 * 3]);                                        \
+      memset(temp, 0, 64 * 2);  /* for msan */                                 \
+      int r = width & MASK;                                                    \
+      int n = width & ~MASK;                                                   \
+      if (n > 0) {                                                             \
+        ANY_SIMD(y_buf, uv_buf, dst_ptr, yuvconstants, n);                     \
+      }                                                                        \
+      memcpy(temp, y_buf + n * SBPP, r * SBPP);                                \
+      memcpy(temp + 64, uv_buf + (n >> UVSHIFT) * SBPP2,                       \
+             SS(r, UVSHIFT) * SBPP2);                                          \
+      ANY_SIMD(temp, temp + 64, temp + 128, yuvconstants, MASK + 1);           \
+      memcpy(dst_ptr + n * BPP, temp + 128, r * BPP);                          \
+    }
+
+// Biplanar to RGB.
+#ifdef HAS_NV12TOARGBROW_SSSE3
+ANY21C(NV12ToARGBRow_Any_SSSE3, NV12ToARGBRow_SSSE3, 1, 1, 2, 4, 7)
+#endif
+#ifdef HAS_NV12TOARGBROW_AVX2
+ANY21C(NV12ToARGBRow_Any_AVX2, NV12ToARGBRow_AVX2, 1, 1, 2, 4, 15)
+#endif
+#ifdef HAS_NV12TOARGBROW_NEON
+ANY21C(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, 1, 1, 2, 4, 7)
+#endif
+#ifdef HAS_NV12TORGB565ROW_SSSE3
+ANY21C(NV12ToRGB565Row_Any_SSSE3, NV12ToRGB565Row_SSSE3, 1, 1, 2, 2, 7)
+#endif
+#ifdef HAS_NV12TORGB565ROW_AVX2
+ANY21C(NV12ToRGB565Row_Any_AVX2, NV12ToRGB565Row_AVX2, 1, 1, 2, 2, 15)
+#endif
+#ifdef HAS_NV12TORGB565ROW_NEON
+ANY21C(NV12ToRGB565Row_Any_NEON, NV12ToRGB565Row_NEON, 1, 1, 2, 2, 7)
+#endif
+#undef ANY21C
+
 // Any 1 to 1.
 #define ANY11(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK)                     \
     void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) {            \
@@ -297,9 +307,7 @@ ANY11(I400ToARGBRow_Any_SSE2, I400ToARGBRow_SSE2, 0, 1, 4, 7)
 #if defined(HAS_I400TOARGBROW_AVX2)
 ANY11(I400ToARGBRow_Any_AVX2, I400ToARGBRow_AVX2, 0, 1, 4, 15)
 #endif
-#if defined(HAS_YUY2TOARGBROW_SSSE3)
-ANY11(YUY2ToARGBRow_Any_SSSE3, YUY2ToARGBRow_SSSE3, 1, 4, 4, 15)
-ANY11(UYVYToARGBRow_Any_SSSE3, UYVYToARGBRow_SSSE3, 1, 4, 4, 15)
+#if defined(HAS_RGB24TOARGBROW_SSSE3)
 ANY11(RGB24ToARGBRow_Any_SSSE3, RGB24ToARGBRow_SSSE3, 0, 3, 4, 15)
 ANY11(RAWToARGBRow_Any_SSSE3, RAWToARGBRow_SSSE3, 0, 3, 4, 15)
 ANY11(RGB565ToARGBRow_Any_SSE2, RGB565ToARGBRow_SSE2, 0, 2, 4, 7)
@@ -315,10 +323,6 @@ ANY11(ARGB1555ToARGBRow_Any_AVX2, ARGB1555ToARGBRow_AVX2, 0, 2, 4, 15)
 #if defined(HAS_ARGB4444TOARGBROW_AVX2)
 ANY11(ARGB4444ToARGBRow_Any_AVX2, ARGB4444ToARGBRow_AVX2, 0, 2, 4, 15)
 #endif
-#if defined(HAS_YUY2TOARGBROW_AVX2)
-ANY11(YUY2ToARGBRow_Any_AVX2, YUY2ToARGBRow_AVX2, 1, 4, 4, 31)
-ANY11(UYVYToARGBRow_Any_AVX2, UYVYToARGBRow_AVX2, 1, 4, 4, 31)
-#endif
 #if defined(HAS_ARGBTORGB24ROW_NEON)
 ANY11(ARGBToRGB24Row_Any_NEON, ARGBToRGB24Row_NEON, 0, 4, 3, 7)
 ANY11(ARGBToRAWRow_Any_NEON, ARGBToRAWRow_NEON, 0, 4, 3, 7)
@@ -327,8 +331,6 @@ ANY11(ARGBToARGB1555Row_Any_NEON, ARGBToARGB1555Row_NEON, 0, 4, 2, 7)
 ANY11(ARGBToARGB4444Row_Any_NEON, ARGBToARGB4444Row_NEON, 0, 4, 2, 7)
 ANY11(J400ToARGBRow_Any_NEON, J400ToARGBRow_NEON, 0, 1, 4, 7)
 ANY11(I400ToARGBRow_Any_NEON, I400ToARGBRow_NEON, 0, 1, 4, 7)
-ANY11(YUY2ToARGBRow_Any_NEON, YUY2ToARGBRow_NEON, 1, 4, 4, 7)
-ANY11(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, 1, 4, 4, 7)
 #endif
 #ifdef HAS_ARGBTOYROW_AVX2
 ANY11(ARGBToYRow_Any_AVX2, ARGBToYRow_AVX2, 0, 4, 1, 31)
@@ -426,6 +428,35 @@ ANY11(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, 0, 4, 4, 7)
 #endif
 #undef ANY11
 
+// Any 1 to 1 with yuvconstants
+#define ANY11C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK)                    \
+    void NAMEANY(const uint8* src_ptr, uint8* dst_ptr,                         \
+                 struct YuvConstants* yuvconstants, int width) {               \
+      SIMD_ALIGNED(uint8 temp[128 * 2]);                                       \
+      memset(temp, 0, 128);  /* for YUY2 and msan */                           \
+      int r = width & MASK;                                                    \
+      int n = width & ~MASK;                                                   \
+      if (n > 0) {                                                             \
+        ANY_SIMD(src_ptr, dst_ptr, yuvconstants, n);                           \
+      }                                                                        \
+      memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP);    \
+      ANY_SIMD(temp, temp + 128, yuvconstants, MASK + 1);                      \
+      memcpy(dst_ptr + n * BPP, temp + 128, r * BPP);                          \
+    }
+#if defined(HAS_YUY2TOARGBROW_SSSE3)
+ANY11C(YUY2ToARGBRow_Any_SSSE3, YUY2ToARGBRow_SSSE3, 1, 4, 4, 15)
+ANY11C(UYVYToARGBRow_Any_SSSE3, UYVYToARGBRow_SSSE3, 1, 4, 4, 15)
+#endif
+#if defined(HAS_YUY2TOARGBROW_AVX2)
+ANY11C(YUY2ToARGBRow_Any_AVX2, YUY2ToARGBRow_AVX2, 1, 4, 4, 31)
+ANY11C(UYVYToARGBRow_Any_AVX2, UYVYToARGBRow_AVX2, 1, 4, 4, 31)
+#endif
+#if defined(HAS_YUY2TOARGBROW_NEON)
+ANY11C(YUY2ToARGBRow_Any_NEON, YUY2ToARGBRow_NEON, 1, 4, 4, 7)
+ANY11C(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, 1, 4, 4, 7)
+#endif
+#undef ANY11C
+
 // Any 1 to 1 blended.
 #define ANY11B(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK)                    \
     void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) {            \
diff --git a/source/row_common.cc b/source/row_common.cc
index 0a4520f71..341380332 100644
--- a/source/row_common.cc
+++ b/source/row_common.cc
@@ -1014,6 +1014,22 @@ void J400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width) {
 #define BG (UG * 128 + VG * 128 + YGB)
 #define BR            (VR * 128 + YGB)
 
+#if defined(__arm__) || defined(__aarch64__)
+YuvConstants SIMD_ALIGNED(kYuvConstants) = {
+  { -UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0 },
+  { UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0 },
+  { BB, BG, BR, 0, 0, 0, 0, 0 },
+  { 0x0101 * YG, 0, 0, 0 }
+};
+
+YuvConstants SIMD_ALIGNED(kYvuConstants) = {
+  { -VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0 },
+  { VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0 },
+  { BB, BG, BR, 0, 0, 0, 0, 0 },
+  { 0x0101 * YG, 0, 0, 0 }
+};
+
+#else
 // BT601 constants for YUV to RGB.
 YuvConstants SIMD_ALIGNED(kYuvConstants) = {
   { UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
@@ -1041,22 +1057,7 @@ YuvConstants SIMD_ALIGNED(kYvuConstants) = {
   { BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR },
   { YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG }
 };
-
-YuvConstantsNEON SIMD_ALIGNED(kYuvConstantsNEON) = {
-  { -UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0 },
-  { UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0 },
-  { BB, BG, BR, 0, 0, 0, 0, 0 },
-  { 0x0101 * YG, 0, 0, 0 }
-};
-
-// C reference code that mimics the YUV assembly.
-static __inline void YuvPixel(uint8 y, uint8 u, uint8 v,
-                              uint8* b, uint8* g, uint8* r) {
-  uint32 y1 = (uint32)(y * 0x0101 * YG) >> 16;
-  *b = Clamp((int32)(-(u * UB) + y1 + BB) >> 6);
-  *g = Clamp((int32)(-(v * VG + u * UG) + y1 + BG) >> 6);
-  *r = Clamp((int32)(-(v * VR)+ y1 + BR) >> 6);
-}
+#endif
 
 // C reference code that mimics the YUV assembly.
 static __inline void YPixel(uint8 y, uint8* b, uint8* g, uint8* r) {
@@ -1065,15 +1066,50 @@ static __inline void YPixel(uint8 y, uint8* b, uint8* g, uint8* r) {
   *g = Clamp((int32)(y1 + YGB) >> 6);
   *r = Clamp((int32)(y1 + YGB) >> 6);
 }
-#undef YG
+
+#undef BB
+#undef BG
+#undef BR
 #undef YGB
 #undef UB
 #undef UG
 #undef VG
 #undef VR
-#undef BB
-#undef BG
-#undef BR
+#undef YG
+
+// C reference code that mimics the YUV assembly.
+static __inline void YuvPixel(uint8 y, uint8 u, uint8 v,
+                              uint8* b, uint8* g, uint8* r,
+                              struct YuvConstants* yuvconstants) {
+#if defined(__arm__) || defined(__aarch64__)
+
+  int UB = -yuvconstants->kUVToRB[0];
+  int VB = 0;
+  int UG = yuvconstants->kUVToG[0];
+  int VG = yuvconstants->kUVToG[4];
+  int UR = 0;
+  int VR = -yuvconstants->kUVToRB[4];
+  int BB = yuvconstants->kUVBiasBGR[0];
+  int BG = yuvconstants->kUVBiasBGR[1];
+  int BR = yuvconstants->kUVBiasBGR[2];
+  int YG = yuvconstants->kYToRgb[0];
+#else
+  int UB = yuvconstants->kUVToB[0];
+  int VB = yuvconstants->kUVToB[1];  // usually 0
+  int UG = yuvconstants->kUVToG[0];
+  int VG = yuvconstants->kUVToG[1];
+  int UR = yuvconstants->kUVToR[0];  // usually 0
+  int VR = yuvconstants->kUVToR[1];
+  int BB = yuvconstants->kUVBiasB[0];
+  int BG = yuvconstants->kUVBiasG[0];
+  int BR = yuvconstants->kUVBiasR[0];
+  int YG = yuvconstants->kYToRgb[0];
+#endif
+  uint32 y1 = (uint32)(y * 0x0101 * YG) >> 16;
+  *b = Clamp((int32)(-(u * UB + v * VB) + y1 + BB) >> 6);
+  *g = Clamp((int32)(-(u * UG + v * VG) + y1 + BG) >> 6);
+  *r = Clamp((int32)(-(u * UR + v * VR) + y1 + BR) >> 6);
+}
 
 // JPEG YUV to RGB reference
 // *  R = Y                - V * -1.40200
@@ -1095,6 +1131,15 @@ static __inline void YPixel(uint8 y, uint8* b, uint8* g, uint8* r) {
 #define BGJ (UGJ * 128 + VGJ * 128 + YGBJ)
 #define BRJ             (VRJ * 128 + YGBJ)
 
+#if defined(__arm__) || defined(__aarch64__)
+// JPEG constants for YUV to RGB.
+YuvConstants SIMD_ALIGNED(kYuvJConstants) = {
+  { -UBJ, -UBJ, -UBJ, -UBJ, -VRJ, -VRJ, -VRJ, -VRJ, 0, 0, 0, 0, 0, 0, 0, 0 },
+  { UGJ, UGJ, UGJ, UGJ, VGJ, VGJ, VGJ, VGJ, 0, 0, 0, 0, 0, 0, 0, 0 },
+  { BBJ, BGJ, BRJ, 0, 0, 0, 0, 0 },
+  { 0x0101 * YGJ, 0, 0, 0 }
+};
+#else
 // JPEG constants for YUV to RGB.
 YuvConstants SIMD_ALIGNED(kYuvJConstants) = {
   { UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0,
@@ -1114,23 +1159,7 @@ YuvConstants SIMD_ALIGNED(kYuvJConstants) = {
   { YGJ, YGJ, YGJ, YGJ, YGJ, YGJ, YGJ, YGJ,
     YGJ, YGJ, YGJ, YGJ, YGJ, YGJ, YGJ, YGJ }
 };
-
-// JPEG constants for YUV to RGB.
-YuvConstantsNEON SIMD_ALIGNED(kYuvJConstantsNEON) = {
-  { -UBJ, -UBJ, -UBJ, -UBJ, -VRJ, -VRJ, -VRJ, -VRJ, 0, 0, 0, 0, 0, 0, 0, 0 },
-  { UGJ, UGJ, UGJ, UGJ, VGJ, VGJ, VGJ, VGJ, 0, 0, 0, 0, 0, 0, 0, 0 },
-  { BBJ, BGJ, BRJ, 0, 0, 0, 0, 0 },
-  { 0x0101 * YGJ, 0, 0, 0 }
-};
-
-// C reference code that mimics the YUV assembly.
-static __inline void YuvJPixel(uint8 y, uint8 u, uint8 v,
-                               uint8* b, uint8* g, uint8* r) {
-  uint32 y1 = (uint32)(y * 0x0101 * YGJ) >> 16;
-  *b = Clamp((int32)(-(u * UBJ) + y1 + BBJ) >> 6);
-  *g = Clamp((int32)(-(v * VGJ + u * UGJ) + y1 + BGJ) >> 6);
-  *r = Clamp((int32)(-(v * VRJ) + y1 + BRJ) >> 6);
-}
+#endif
 
 #undef YGJ
 #undef YGBJ
@@ -1162,6 +1191,15 @@ static __inline void YuvJPixel(uint8 y, uint8 u, uint8 v,
 #define BGH (UGH * 128 + VGH * 128 + YGBH)
 #define BRH (VRH * 128 + YGBH)
 
+#if defined(__arm__) || defined(__aarch64__)
+// BT.709 constants for YUV to RGB.
+YuvConstants SIMD_ALIGNED(kYuvHConstants) = {
+  { -UBH, -UBH, -UBH, -UBH, -VRH, -VRH, -VRH, -VRH, 0, 0, 0, 0, 0, 0, 0, 0 },
+  { UGH, UGH, UGH, UGH, VGH, VGH, VGH, VGH, 0, 0, 0, 0, 0, 0, 0, 0 },
+  { BBH, BGH, BRH, 0, 0, 0, 0, 0 },
+  { 0x0101 * YGH, 0, 0, 0 }
+};
+#else
 // BT.709 constants for YUV to RGB.
 YuvConstants SIMD_ALIGNED(kYuvHConstants) = {
   { UBH, 0, UBH, 0, UBH, 0, UBH, 0, UBH, 0, UBH, 0, UBH, 0, UBH, 0,
@@ -1181,23 +1219,7 @@ YuvConstants SIMD_ALIGNED(kYuvHConstants) = {
   { YGH, YGH, YGH, YGH, YGH, YGH, YGH, YGH,
     YGH, YGH, YGH, YGH, YGH, YGH, YGH, YGH }
 };
-
-// BT.709 constants for YUV to RGB.
-YuvConstantsNEON SIMD_ALIGNED(kYuvHConstantsNEON) = {
-  { -UBH, -UBH, -UBH, -UBH, -VRH, -VRH, -VRH, -VRH, 0, 0, 0, 0, 0, 0, 0, 0 },
-  { UGH, UGH, UGH, UGH, VGH, VGH, VGH, VGH, 0, 0, 0, 0, 0, 0, 0, 0 },
-  { BBH, BGH, BRH, 0, 0, 0, 0, 0 },
-  { 0x0101 * YGH, 0, 0, 0 }
-};
-
-// C reference code that mimics the YUV assembly.
-static __inline void YuvHPixel(uint8 y, uint8 u, uint8 v,
-                               uint8* b, uint8* g, uint8* r) {
-  uint32 y1 = (uint32)(y * 0x0101 * YGH) >> 16;
-  *b = Clamp((int32)(-(u * UBH) + y1 + BBH) >> 6);
-  *g = Clamp((int32)(-(v * VGH + u * UGH) + y1 + BGH) >> 6);
-  *r = Clamp((int32)(-(v * VRH) + y1 + BRH) >> 6);
-}
+#endif
 
 #undef YGH
 #undef YGBH
@@ -1217,14 +1239,17 @@ void I444ToARGBRow_C(const uint8* src_y,
                      const uint8* src_u,
                      const uint8* src_v,
                      uint8* rgb_buf,
+                     struct YuvConstants* yuvconstants,
                      int width) {
   int x;
   for (x = 0; x < width - 1; x += 2) {
     uint8 u = (src_u[0] + src_u[1] + 1) >> 1;
     uint8 v = (src_v[0] + src_v[1] + 1) >> 1;
-    YuvPixel(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+    YuvPixel(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2,
+             yuvconstants);
     rgb_buf[3] = 255;
-    YuvPixel(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
+    YuvPixel(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6,
+             yuvconstants);
     rgb_buf[7] = 255;
     src_y += 2;
     src_u += 2;
@@ -1233,7 +1258,7 @@ void I444ToARGBRow_C(const uint8* src_y,
   }
   if (width & 1) {
     YuvPixel(src_y[0], src_u[0], src_v[0],
-             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
   }
 }
 
@@ -1241,14 +1266,17 @@ void I444ToABGRRow_C(const uint8* src_y,
                      const uint8* src_u,
                      const uint8* src_v,
                      uint8* rgb_buf,
+                     struct YuvConstants* yuvconstants,
                      int width) {
   int x;
   for (x = 0; x < width - 1; x += 2) {
     uint8 u = (src_u[0] + src_u[1] + 1) >> 1;
     uint8 v = (src_v[0] + src_v[1] + 1) >> 1;
-    YuvPixel(src_y[0], u, v, rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
+    YuvPixel(src_y[0], u, v, rgb_buf + 2, rgb_buf + 1, rgb_buf + 0,
+             yuvconstants);
     rgb_buf[3] = 255;
-    YuvPixel(src_y[1], u, v, rgb_buf + 6, rgb_buf + 5, rgb_buf + 4);
+    YuvPixel(src_y[1], u, v, rgb_buf + 6, rgb_buf + 5, rgb_buf + 4,
+             yuvconstants);
     rgb_buf[7] = 255;
     src_y += 2;
     src_u += 2;
@@ -1257,7 +1285,7 @@ void I444ToABGRRow_C(const uint8* src_y,
   }
   if (width & 1) {
     YuvPixel(src_y[0], src_u[0], src_v[0],
-             rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
+             rgb_buf + 2, rgb_buf + 1, rgb_buf + 0, yuvconstants);
   }
 }
 #else
@@ -1265,11 +1293,12 @@ void I444ToARGBRow_C(const uint8* src_y,
                      const uint8* src_u,
                      const uint8* src_v,
                      uint8* rgb_buf,
+                     struct YuvConstants* yuvconstants,
                      int width) {
   int x;
   for (x = 0; x < width; ++x) {
     YuvPixel(src_y[0], src_u[0], src_v[0],
-             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
     rgb_buf[3] = 255;
     src_y += 1;
     src_u += 1;
@@ -1282,11 +1311,12 @@ void I444ToABGRRow_C(const uint8* src_y,
                      const uint8* src_u,
                      const uint8* src_v,
                      uint8* rgb_buf,
+                     struct YuvConstants* yuvconstants,
                      int width) {
   int x;
   for (x = 0; x < width; ++x) {
     YuvPixel(src_y[0], src_u[0], src_v[0],
-             rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
+             rgb_buf + 2, rgb_buf + 1, rgb_buf + 0, yuvconstants);
     rgb_buf[3] = 255;
     src_y += 1;
     src_u += 1;
@@ -1301,14 +1331,15 @@ void I422ToARGBRow_C(const uint8* src_y,
                      const uint8* src_u,
                      const uint8* src_v,
                      uint8* rgb_buf,
+                     struct YuvConstants* yuvconstants,
                      int width) {
   int x;
   for (x = 0; x < width - 1; x += 2) {
     YuvPixel(src_y[0], src_u[0], src_v[0],
-             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
     rgb_buf[3] = 255;
     YuvPixel(src_y[1], src_u[0], src_v[0],
-             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
+             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants);
     rgb_buf[7] = 255;
     src_y += 2;
     src_u += 1;
@@ -1317,124 +1348,23 @@ void I422ToARGBRow_C(const uint8* src_y,
   }
   if (width & 1) {
     YuvPixel(src_y[0], src_u[0], src_v[0],
-             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
     rgb_buf[3] = 255;
   }
 }
 
-void J422ToARGBRow_C(const uint8* src_y,
-                     const uint8* src_u,
-                     const uint8* src_v,
-                     uint8* rgb_buf,
-                     int width) {
-  int x;
-  for (x = 0; x < width - 1; x += 2) {
-    YuvJPixel(src_y[0], src_u[0], src_v[0],
-              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
-    rgb_buf[3] = 255;
-    YuvJPixel(src_y[1], src_u[0], src_v[0],
-              rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
-    rgb_buf[7] = 255;
-    src_y += 2;
-    src_u += 1;
-    src_v += 1;
-    rgb_buf += 8;  // Advance 2 pixels.
-  }
-  if (width & 1) {
-    YuvJPixel(src_y[0], src_u[0], src_v[0],
-              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
-    rgb_buf[3] = 255;
-  }
-}
-
-void J422ToABGRRow_C(const uint8* src_y,
-                     const uint8* src_u,
-                     const uint8* src_v,
-                     uint8* rgb_buf,
-                     int width) {
-  int x;
-  for (x = 0; x < width - 1; x += 2) {
-    YuvJPixel(src_y[0], src_u[0], src_v[0],
-              rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
-    rgb_buf[3] = 255;
-    YuvJPixel(src_y[1], src_u[0], src_v[0],
-              rgb_buf + 6, rgb_buf + 5, rgb_buf + 4);
-    rgb_buf[7] = 255;
-    src_y += 2;
-    src_u += 1;
-    src_v += 1;
-    rgb_buf += 8;  // Advance 2 pixels.
-  }
-  if (width & 1) {
-    YuvJPixel(src_y[0], src_u[0], src_v[0],
-              rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
-    rgb_buf[3] = 255;
-  }
-}
-
-// TODO(fbarchard): replace with common matrix function.
-void H422ToARGBRow_C(const uint8* src_y,
-                     const uint8* src_u,
-                     const uint8* src_v,
-                     uint8* rgb_buf,
-                     int width) {
-  int x;
-  for (x = 0; x < width - 1; x += 2) {
-    YuvHPixel(src_y[0], src_u[0], src_v[0],
-              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
-    rgb_buf[3] = 255;
-    YuvHPixel(src_y[1], src_u[0], src_v[0],
-              rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
-    rgb_buf[7] = 255;
-    src_y += 2;
-    src_u += 1;
-    src_v += 1;
-    rgb_buf += 8;  // Advance 2 pixels.
-  }
-  if (width & 1) {
-    YuvHPixel(src_y[0], src_u[0], src_v[0],
-              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
-    rgb_buf[3] = 255;
-  }
-}
-
-void H422ToABGRRow_C(const uint8* src_y,
-                     const uint8* src_u,
-                     const uint8* src_v,
-                     uint8* rgb_buf,
-                     int width) {
-  int x;
-  for (x = 0; x < width - 1; x += 2) {
-    YuvHPixel(src_y[0], src_u[0], src_v[0],
-              rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
-    rgb_buf[3] = 255;
-    YuvHPixel(src_y[1], src_u[0], src_v[0],
-              rgb_buf + 6, rgb_buf + 5, rgb_buf + 4);
-    rgb_buf[7] = 255;
-    src_y += 2;
-    src_u += 1;
-    src_v += 1;
-    rgb_buf += 8;  // Advance 2 pixels.
-  }
-  if (width & 1) {
-    YuvHPixel(src_y[0], src_u[0], src_v[0],
-              rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
-    rgb_buf[3] = 255;
-  }
-}
-
-
 void I422ToRGB24Row_C(const uint8* src_y,
                       const uint8* src_u,
                       const uint8* src_v,
                       uint8* rgb_buf,
+                      struct YuvConstants* yuvconstants,
                       int width) {
   int x;
   for (x = 0; x < width - 1; x += 2) {
     YuvPixel(src_y[0], src_u[0], src_v[0],
-             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
     YuvPixel(src_y[1], src_u[0], src_v[0],
-             rgb_buf + 3, rgb_buf + 4, rgb_buf + 5);
+             rgb_buf + 3, rgb_buf + 4, rgb_buf + 5, yuvconstants);
     src_y += 2;
     src_u += 1;
     src_v += 1;
@@ -1442,7 +1372,7 @@ void I422ToRGB24Row_C(const uint8* src_y,
   }
   if (width & 1) {
     YuvPixel(src_y[0], src_u[0], src_v[0],
-             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
   }
 }
 
@@ -1450,13 +1380,14 @@ void I422ToRAWRow_C(const uint8* src_y,
                     const uint8* src_u,
                     const uint8* src_v,
                     uint8* rgb_buf,
+                    struct YuvConstants* yuvconstants,
                     int width) {
   int x;
   for (x = 0; x < width - 1; x += 2) {
     YuvPixel(src_y[0], src_u[0], src_v[0],
-             rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
+             rgb_buf + 2, rgb_buf + 1, rgb_buf + 0, yuvconstants);
     YuvPixel(src_y[1], src_u[0], src_v[0],
-             rgb_buf + 5, rgb_buf + 4, rgb_buf + 3);
+             rgb_buf + 5, rgb_buf + 4, rgb_buf + 3, yuvconstants);
     src_y += 2;
     src_u += 1;
     src_v += 1;
@@ -1464,7 +1395,7 @@ void I422ToRAWRow_C(const uint8* src_y,
   }
   if (width & 1) {
     YuvPixel(src_y[0], src_u[0], src_v[0],
-             rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
+             rgb_buf + 2, rgb_buf + 1, rgb_buf + 0, yuvconstants);
   }
 }
 
@@ -1472,6 +1403,7 @@ void I422ToARGB4444Row_C(const uint8* src_y,
                          const uint8* src_u,
                          const uint8* src_v,
                          uint8* dst_argb4444,
+                         struct YuvConstants* yuvconstants,
                          int width) {
   uint8 b0;
   uint8 g0;
@@ -1481,8 +1413,8 @@ void I422ToARGB4444Row_C(const uint8* src_y,
   uint8 r1;
   int x;
   for (x = 0; x < width - 1; x += 2) {
-    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
-    YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1);
+    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
+    YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
     b0 = b0 >> 4;
     g0 = g0 >> 4;
     r0 = r0 >> 4;
@@ -1497,7 +1429,7 @@ void I422ToARGB4444Row_C(const uint8* src_y,
     dst_argb4444 += 4;  // Advance 2 pixels.
   }
   if (width & 1) {
-    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
+    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
     b0 = b0 >> 4;
     g0 = g0 >> 4;
     r0 = r0 >> 4;
@@ -1510,6 +1442,7 @@ void I422ToARGB1555Row_C(const uint8* src_y,
                          const uint8* src_u,
                          const uint8* src_v,
                          uint8* dst_argb1555,
+                         struct YuvConstants* yuvconstants,
                          int width) {
   uint8 b0;
   uint8 g0;
@@ -1519,8 +1452,8 @@ void I422ToARGB1555Row_C(const uint8* src_y,
   uint8 r1;
   int x;
   for (x = 0; x < width - 1; x += 2) {
-    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
-    YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1);
+    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
+    YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
     b0 = b0 >> 3;
     g0 = g0 >> 3;
     r0 = r0 >> 3;
@@ -1535,7 +1468,7 @@ void I422ToARGB1555Row_C(const uint8* src_y,
     dst_argb1555 += 4;  // Advance 2 pixels.
   }
   if (width & 1) {
-    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
+    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
     b0 = b0 >> 3;
     g0 = g0 >> 3;
     r0 = r0 >> 3;
@@ -1548,6 +1481,7 @@ void I422ToRGB565Row_C(const uint8* src_y,
                        const uint8* src_u,
                        const uint8* src_v,
                        uint8* dst_rgb565,
+                       struct YuvConstants* yuvconstants,
                        int width) {
   uint8 b0;
   uint8 g0;
@@ -1557,8 +1491,8 @@ void I422ToRGB565Row_C(const uint8* src_y,
   uint8 r1;
   int x;
   for (x = 0; x < width - 1; x += 2) {
-    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
-    YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1);
+    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
+    YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
     b0 = b0 >> 3;
     g0 = g0 >> 2;
     r0 = r0 >> 3;
@@ -1573,7 +1507,7 @@ void I422ToRGB565Row_C(const uint8* src_y,
     dst_rgb565 += 4;  // Advance 2 pixels.
   }
   if (width & 1) {
-    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
+    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
     b0 = b0 >> 3;
     g0 = g0 >> 2;
     r0 = r0 >> 3;
@@ -1585,20 +1519,21 @@ void I411ToARGBRow_C(const uint8* src_y,
                      const uint8* src_u,
                      const uint8* src_v,
                      uint8* rgb_buf,
+                     struct YuvConstants* yuvconstants,
                      int width) {
   int x;
   for (x = 0; x < width - 3; x += 4) {
     YuvPixel(src_y[0], src_u[0], src_v[0],
-             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
     rgb_buf[3] = 255;
     YuvPixel(src_y[1], src_u[0], src_v[0],
-             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
+             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants);
     rgb_buf[7] = 255;
     YuvPixel(src_y[2], src_u[0], src_v[0],
-             rgb_buf + 8, rgb_buf + 9, rgb_buf + 10);
+             rgb_buf + 8, rgb_buf + 9, rgb_buf + 10, yuvconstants);
     rgb_buf[11] = 255;
     YuvPixel(src_y[3], src_u[0], src_v[0],
-             rgb_buf + 12, rgb_buf + 13, rgb_buf + 14);
+             rgb_buf + 12, rgb_buf + 13, rgb_buf + 14, yuvconstants);
     rgb_buf[15] = 255;
     src_y += 4;
     src_u += 1;
@@ -1607,17 +1542,17 @@ void I411ToARGBRow_C(const uint8* src_y,
   }
   if (width & 2) {
     YuvPixel(src_y[0], src_u[0], src_v[0],
-             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
     rgb_buf[3] = 255;
     YuvPixel(src_y[1], src_u[0], src_v[0],
-             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
+             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants);
     rgb_buf[7] = 255;
     src_y += 2;
     rgb_buf += 8;  // Advance 2 pixels.
   }
   if (width & 1) {
     YuvPixel(src_y[0], src_u[0], src_v[0],
-             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
     rgb_buf[3] = 255;
   }
 }
@@ -1625,14 +1560,15 @@ void I411ToARGBRow_C(const uint8* src_y,
 void NV12ToARGBRow_C(const uint8* src_y,
                      const uint8* src_uv,
                      uint8* rgb_buf,
+                     struct YuvConstants* yuvconstants,
                      int width) {
   int x;
   for (x = 0; x < width - 1; x += 2) {
     YuvPixel(src_y[0], src_uv[0], src_uv[1],
-             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
     rgb_buf[3] = 255;
     YuvPixel(src_y[1], src_uv[0], src_uv[1],
-             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
+             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants);
     rgb_buf[7] = 255;
     src_y += 2;
     src_uv += 2;
@@ -1640,32 +1576,7 @@ void NV12ToARGBRow_C(const uint8* src_y,
   }
   if (width & 1) {
     YuvPixel(src_y[0], src_uv[0], src_uv[1],
-             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
-    rgb_buf[3] = 255;
-  }
-}
-
-void NV21ToARGBRow_C(const uint8* src_y,
-                     const uint8* src_vu,
-                     uint8* rgb_buf,
-                     int width) {
-  int x;
-  for (x = 0; x < width - 1; x += 2) {
-    YuvPixel(src_y[0], src_vu[1], src_vu[0],
-             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
-    rgb_buf[3] = 255;
-
-    YuvPixel(src_y[1], src_vu[1], src_vu[0],
-             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
-    rgb_buf[7] = 255;
-
-    src_y += 2;
-    src_vu += 2;
-    rgb_buf += 8;  // Advance 2 pixels.
-  }
-  if (width & 1) {
-    YuvPixel(src_y[0], src_vu[1], src_vu[0],
-             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
     rgb_buf[3] = 255;
   }
 }
@@ -1673,6 +1584,7 @@ void NV21ToARGBRow_C(const uint8* src_y,
 void NV12ToRGB565Row_C(const uint8* src_y,
                        const uint8* src_uv,
                        uint8* dst_rgb565,
+                       struct YuvConstants* yuvconstants,
                        int width) {
   uint8 b0;
   uint8 g0;
@@ -1682,8 +1594,8 @@ void NV12ToRGB565Row_C(const uint8* src_y,
   uint8 r1;
   int x;
   for (x = 0; x < width - 1; x += 2) {
-    YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0);
-    YuvPixel(src_y[1], src_uv[0], src_uv[1], &b1, &g1, &r1);
+    YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants);
+    YuvPixel(src_y[1], src_uv[0], src_uv[1], &b1, &g1, &r1, yuvconstants);
     b0 = b0 >> 3;
     g0 = g0 >> 2;
     r0 = r0 >> 3;
@@ -1697,42 +1609,7 @@ void NV12ToRGB565Row_C(const uint8* src_y,
     dst_rgb565 += 4;  // Advance 2 pixels.
   }
   if (width & 1) {
-    YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0);
-    b0 = b0 >> 3;
-    g0 = g0 >> 2;
-    r0 = r0 >> 3;
-    *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
-  }
-}
-
-void NV21ToRGB565Row_C(const uint8* src_y,
-                       const uint8* vsrc_u,
-                       uint8* dst_rgb565,
-                       int width) {
-  uint8 b0;
-  uint8 g0;
-  uint8 r0;
-  uint8 b1;
-  uint8 g1;
-  uint8 r1;
-  int x;
-  for (x = 0; x < width - 1; x += 2) {
-    YuvPixel(src_y[0], vsrc_u[1], vsrc_u[0], &b0, &g0, &r0);
-    YuvPixel(src_y[1], vsrc_u[1], vsrc_u[0], &b1, &g1, &r1);
-    b0 = b0 >> 3;
-    g0 = g0 >> 2;
-    r0 = r0 >> 3;
-    b1 = b1 >> 3;
-    g1 = g1 >> 2;
-    r1 = r1 >> 3;
-    *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) |
-        (b1 << 16) | (g1 << 21) | (r1 << 27);
-    src_y += 2;
-    vsrc_u += 2;
-    dst_rgb565 += 4;  // Advance 2 pixels.
-  }
-  if (width & 1) {
-    YuvPixel(src_y[0], vsrc_u[1], vsrc_u[0], &b0, &g0, &r0);
+    YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants);
     b0 = b0 >> 3;
     g0 = g0 >> 2;
     r0 = r0 >> 3;
@@ -1742,42 +1619,44 @@ void NV21ToRGB565Row_C(const uint8* src_y,
 
 void YUY2ToARGBRow_C(const uint8* src_yuy2,
                      uint8* rgb_buf,
+                     struct YuvConstants* yuvconstants,
                      int width) {
   int x;
   for (x = 0; x < width - 1; x += 2) {
     YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3],
-             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
     rgb_buf[3] = 255;
     YuvPixel(src_yuy2[2], src_yuy2[1], src_yuy2[3],
-             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
+             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants);
     rgb_buf[7] = 255;
     src_yuy2 += 4;
     rgb_buf += 8;  // Advance 2 pixels.
   }
   if (width & 1) {
     YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3],
-             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
     rgb_buf[3] = 255;
   }
 }
 
 void UYVYToARGBRow_C(const uint8* src_uyvy,
                      uint8* rgb_buf,
+                     struct YuvConstants* yuvconstants,
                      int width) {
   int x;
   for (x = 0; x < width - 1; x += 2) {
     YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2],
-             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
     rgb_buf[3] = 255;
     YuvPixel(src_uyvy[3], src_uyvy[0], src_uyvy[2],
-             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
+             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants);
     rgb_buf[7] = 255;
     src_uyvy += 4;
     rgb_buf += 8;  // Advance 2 pixels.
   }
   if (width & 1) {
     YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2],
-             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
     rgb_buf[3] = 255;
   }
 }
@@ -1786,14 +1665,15 @@ void I422ToBGRARow_C(const uint8* src_y,
                      const uint8* src_u,
                      const uint8* src_v,
                      uint8* rgb_buf,
+                     struct YuvConstants* yuvconstants,
                      int width) {
   int x;
   for (x = 0; x < width - 1; x += 2) {
     YuvPixel(src_y[0], src_u[0], src_v[0],
-             rgb_buf + 3, rgb_buf + 2, rgb_buf + 1);
+             rgb_buf + 3, rgb_buf + 2, rgb_buf + 1, yuvconstants);
     rgb_buf[0] = 255;
     YuvPixel(src_y[1], src_u[0], src_v[0],
-             rgb_buf + 7, rgb_buf + 6, rgb_buf + 5);
+             rgb_buf + 7, rgb_buf + 6, rgb_buf + 5, yuvconstants);
     rgb_buf[4] = 255;
     src_y += 2;
     src_u += 1;
@@ -1802,7 +1682,7 @@ void I422ToBGRARow_C(const uint8* src_y,
   }
   if (width & 1) {
     YuvPixel(src_y[0], src_u[0], src_v[0],
-             rgb_buf + 3, rgb_buf + 2, rgb_buf + 1);
+             rgb_buf + 3, rgb_buf + 2, rgb_buf + 1, yuvconstants);
     rgb_buf[0] = 255;
   }
 }
@@ -1811,14 +1691,15 @@ void I422ToABGRRow_C(const uint8* src_y,
                      const uint8* src_u,
                      const uint8* src_v,
                      uint8* rgb_buf,
+                     struct YuvConstants* yuvconstants,
                      int width) {
   int x;
   for (x = 0; x < width - 1; x += 2) {
     YuvPixel(src_y[0], src_u[0], src_v[0],
-             rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
+             rgb_buf + 2, rgb_buf + 1, rgb_buf + 0, yuvconstants);
     rgb_buf[3] = 255;
     YuvPixel(src_y[1], src_u[0], src_v[0],
-             rgb_buf + 6, rgb_buf + 5, rgb_buf + 4);
+             rgb_buf + 6, rgb_buf + 5, rgb_buf + 4, yuvconstants);
     rgb_buf[7] = 255;
     src_y += 2;
     src_u += 1;
@@ -1827,7 +1708,7 @@ void I422ToABGRRow_C(const uint8* src_y,
   }
   if (width & 1) {
     YuvPixel(src_y[0], src_u[0], src_v[0],
-             rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
+             rgb_buf + 2, rgb_buf + 1, rgb_buf + 0, yuvconstants);
     rgb_buf[3] = 255;
   }
 }
@@ -1836,14 +1717,15 @@ void I422ToRGBARow_C(const uint8* src_y,
                      const uint8* src_u,
                      const uint8* src_v,
                      uint8* rgb_buf,
+                     struct YuvConstants* yuvconstants,
                      int width) {
   int x;
   for (x = 0; x < width - 1; x += 2) {
     YuvPixel(src_y[0], src_u[0], src_v[0],
-             rgb_buf + 1, rgb_buf + 2, rgb_buf + 3);
+             rgb_buf + 1, rgb_buf + 2, rgb_buf + 3, yuvconstants);
     rgb_buf[0] = 255;
     YuvPixel(src_y[1], src_u[0], src_v[0],
-             rgb_buf + 5, rgb_buf + 6, rgb_buf + 7);
+             rgb_buf + 5, rgb_buf + 6, rgb_buf + 7, yuvconstants);
     rgb_buf[4] = 255;
     src_y += 2;
     src_u += 1;
@@ -1852,7 +1734,7 @@ void I422ToRGBARow_C(const uint8* src_y,
   }
   if (width & 1) {
     YuvPixel(src_y[0], src_u[0], src_v[0],
-             rgb_buf + 1, rgb_buf + 2, rgb_buf + 3);
+             rgb_buf + 1, rgb_buf + 2, rgb_buf + 3, yuvconstants);
     rgb_buf[0] = 255;
   }
 }
@@ -2401,374 +2283,10 @@ void I422ToUYVYRow_C(const uint8* src_y,
   }
 }
 
-#define ANYYUV(NAMEANY, ANY_SIMD, YUVCONSTANTS)                                \
-    void NAMEANY(const uint8* y_buf,                                           \
-                 const uint8* u_buf,                                           \
-                 const uint8* v_buf,                                           \
-                 uint8* dst_argb,                                              \
-                 int width) {                                                  \
-      ANY_SIMD(y_buf, u_buf, v_buf, dst_argb, &YUVCONSTANTS, width);           \
-    }
-
-#ifdef HAS_I422TOARGBMATRIXROW_NEON
-ANYYUV(I422ToARGBRow_NEON, I422ToARGBMatrixRow_NEON, kYuvConstantsNEON)
-ANYYUV(J422ToARGBRow_NEON, I422ToARGBMatrixRow_NEON, kYuvJConstantsNEON)
-ANYYUV(H422ToARGBRow_NEON, I422ToARGBMatrixRow_NEON, kYuvHConstantsNEON)
-#endif
-#ifdef HAS_I422TOABGRMATRIXROW_NEON
-ANYYUV(I422ToABGRRow_NEON, I422ToABGRMatrixRow_NEON, kYuvConstantsNEON)
-ANYYUV(J422ToABGRRow_NEON, I422ToABGRMatrixRow_NEON, kYuvJConstantsNEON)
-ANYYUV(H422ToABGRRow_NEON, I422ToABGRMatrixRow_NEON, kYuvHConstantsNEON)
-#endif
-#ifdef HAS_I422TOARGBMATRIXROW_SSSE3
-ANYYUV(I422ToARGBRow_SSSE3, I422ToARGBMatrixRow_SSSE3, kYuvConstants)
-ANYYUV(J422ToARGBRow_SSSE3, I422ToARGBMatrixRow_SSSE3, kYuvJConstants)
-ANYYUV(H422ToARGBRow_SSSE3, I422ToARGBMatrixRow_SSSE3, kYuvHConstants)
-#endif
-#ifdef HAS_I422TOARGBMATRIXROW_AVX2
-ANYYUV(I422ToARGBRow_AVX2, I422ToARGBMatrixRow_AVX2, kYuvConstants)
-ANYYUV(J422ToARGBRow_AVX2, I422ToARGBMatrixRow_AVX2, kYuvJConstants)
-ANYYUV(H422ToARGBRow_AVX2, I422ToARGBMatrixRow_AVX2, kYuvHConstants)
-#endif
-#ifdef HAS_I422TOABGRMATRIXROW_SSSE3
-ANYYUV(I422ToABGRRow_SSSE3, I422ToABGRMatrixRow_SSSE3, kYuvConstants)
-ANYYUV(J422ToABGRRow_SSSE3, I422ToABGRMatrixRow_SSSE3, kYuvJConstants)
-ANYYUV(H422ToABGRRow_SSSE3, I422ToABGRMatrixRow_SSSE3, kYuvHConstants)
-#endif
-#ifdef HAS_I422TOABGRMATRIXROW_AVX2
-ANYYUV(I422ToABGRRow_AVX2, I422ToABGRMatrixRow_AVX2, kYuvConstants)
-ANYYUV(J422ToABGRRow_AVX2, I422ToABGRMatrixRow_AVX2, kYuvJConstants)
-ANYYUV(H422ToABGRRow_AVX2, I422ToABGRMatrixRow_AVX2, kYuvHConstants)
-#endif
-// TODO(fbarchard): Neon, J444, H444 versions.
-#ifdef HAS_I444TOARGBMATRIXROW_SSSE3
-ANYYUV(I444ToARGBRow_SSSE3, I444ToARGBMatrixRow_SSSE3, kYuvConstants)
-#endif
-#ifdef HAS_I444TOARGBMATRIXROW_AVX2
-ANYYUV(I444ToARGBRow_AVX2, I444ToARGBMatrixRow_AVX2, kYuvConstants)
-#endif
-#ifdef HAS_I444TOABGRMATRIXROW_SSSE3
-ANYYUV(I444ToABGRRow_SSSE3, I444ToABGRMatrixRow_SSSE3, kYuvConstants)
-#endif
-#ifdef HAS_I444TOABGRMATRIXROW_AVX2
-ANYYUV(I444ToABGRRow_AVX2, I444ToABGRMatrixRow_AVX2, kYuvConstants)
-#endif
-
-// Maximum temporary width for wrappers to process at a time, in pixels.
-#define MAXTWIDTH 2048
-
-#if !(defined(_MSC_VER) && defined(_M_IX86)) && \
-    defined(HAS_I422TORGB565ROW_SSSE3)
-// row_win.cc has asm version, but GCC uses 2 step wrapper.
-void I422ToRGB565Row_SSSE3(const uint8* src_y,
-                           const uint8* src_u,
-                           const uint8* src_v,
-                           uint8* dst_rgb565,
-                           int width) {
-  SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
-  while (width > 0) {
-    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
-    I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, twidth);
-    ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
-    src_y += twidth;
-    src_u += twidth / 2;
-    src_v += twidth / 2;
-    dst_rgb565 += twidth * 2;
-    width -= twidth;
-  }
-}
-#endif
-
-#if defined(HAS_I422TOARGB1555ROW_SSSE3)
-void I422ToARGB1555Row_SSSE3(const uint8* src_y,
-                             const uint8* src_u,
-                             const uint8* src_v,
-                             uint8* dst_argb1555,
-                             int width) {
-  // Row buffer for intermediate ARGB pixels.
-  SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
-  while (width > 0) {
-    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
-    I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, twidth);
-    ARGBToARGB1555Row_SSE2(row, dst_argb1555, twidth);
-    src_y += twidth;
-    src_u += twidth / 2;
-    src_v += twidth / 2;
-    dst_argb1555 += twidth * 2;
-    width -= twidth;
-  }
-}
-#endif
-
-#if defined(HAS_I422TOARGB4444ROW_SSSE3)
-void I422ToARGB4444Row_SSSE3(const uint8* src_y,
-                             const uint8* src_u,
-                             const uint8* src_v,
-                             uint8* dst_argb4444,
-                             int width) {
-  // Row buffer for intermediate ARGB pixels.
-  SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
-  while (width > 0) {
-    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
-    I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, twidth);
-    ARGBToARGB4444Row_SSE2(row, dst_argb4444, twidth);
-    src_y += twidth;
-    src_u += twidth / 2;
-    src_v += twidth / 2;
-    dst_argb4444 += twidth * 2;
-    width -= twidth;
-  }
-}
-#endif
-
-#if defined(HAS_NV12TORGB565ROW_SSSE3)
-void NV12ToRGB565Row_SSSE3(const uint8* src_y, const uint8* src_uv,
-                           uint8* dst_rgb565, int width) {
-  // Row buffer for intermediate ARGB pixels.
-  SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
-  while (width > 0) {
-    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
-    NV12ToARGBRow_SSSE3(src_y, src_uv, row, twidth);
-    ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
-    src_y += twidth;
-    src_uv += twidth;
-    dst_rgb565 += twidth * 2;
-    width -= twidth;
-  }
-}
-#endif
-
-#if defined(HAS_NV21TORGB565ROW_SSSE3)
-void NV21ToRGB565Row_SSSE3(const uint8* src_y, const uint8* src_vu,
-                           uint8* dst_rgb565, int width) {
-  // Row buffer for intermediate ARGB pixels.
-  SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
-  while (width > 0) {
-    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
-    NV21ToARGBRow_SSSE3(src_y, src_vu, row, twidth);
-    ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
-    src_y += twidth;
-    src_vu += twidth;
-    dst_rgb565 += twidth * 2;
-    width -= twidth;
-  }
-}
-#endif
-
-#if defined(HAS_YUY2TOARGBROW_SSSE3)
-void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2, uint8* dst_argb, int width) {
-  // Row buffers for intermediate YUV pixels.
-  SIMD_ALIGNED(uint8 row_y[MAXTWIDTH]);
-  SIMD_ALIGNED(uint8 row_u[MAXTWIDTH / 2]);
-  SIMD_ALIGNED(uint8 row_v[MAXTWIDTH / 2]);
-  while (width > 0) {
-    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
-    YUY2ToUV422Row_SSE2(src_yuy2, row_u, row_v, twidth);
-    YUY2ToYRow_SSE2(src_yuy2, row_y, twidth);
-    I422ToARGBRow_SSSE3(row_y, row_u, row_v, dst_argb, twidth);
-    src_yuy2 += twidth * 2;
-    dst_argb += twidth * 4;
-    width -= twidth;
-  }
-}
-#endif
-
-#if defined(HAS_UYVYTOARGBROW_SSSE3)
-void UYVYToARGBRow_SSSE3(const uint8* src_uyvy, uint8* dst_argb, int width) {
-  // Row buffers for intermediate YUV pixels.
-  SIMD_ALIGNED(uint8 row_y[MAXTWIDTH]);
-  SIMD_ALIGNED(uint8 row_u[MAXTWIDTH / 2]);
-  SIMD_ALIGNED(uint8 row_v[MAXTWIDTH / 2]);
-  while (width > 0) {
-    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
-    UYVYToUV422Row_SSE2(src_uyvy, row_u, row_v, twidth);
-    UYVYToYRow_SSE2(src_uyvy, row_y, twidth);
-    I422ToARGBRow_SSSE3(row_y, row_u, row_v, dst_argb, twidth);
-    src_uyvy += twidth * 2;
-    dst_argb += twidth * 4;
-    width -= twidth;
-  }
-}
-#endif  // !defined(LIBYUV_DISABLE_X86)
-
-#if defined(HAS_I422TORGB565ROW_AVX2)
-void I422ToRGB565Row_AVX2(const uint8* src_y,
-                          const uint8* src_u,
-                          const uint8* src_v,
-                          uint8* dst_rgb565,
-                          int width) {
-  SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
-  while (width > 0) {
-    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
-    I422ToARGBRow_AVX2(src_y, src_u, src_v, row, twidth);
-    ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth);
-    src_y += twidth;
-    src_u += twidth / 2;
-    src_v += twidth / 2;
-    dst_rgb565 += twidth * 2;
-    width -= twidth;
-  }
-}
-#endif
-
-#if defined(HAS_I422TOARGB1555ROW_AVX2)
-void I422ToARGB1555Row_AVX2(const uint8* src_y,
-                            const uint8* src_u,
-                            const uint8* src_v,
-                            uint8* dst_argb1555,
-                            int width) {
-  // Row buffer for intermediate ARGB pixels.
-  SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
-  while (width > 0) {
-    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
-    I422ToARGBRow_AVX2(src_y, src_u, src_v, row, twidth);
-    ARGBToARGB1555Row_AVX2(row, dst_argb1555, twidth);
-    src_y += twidth;
-    src_u += twidth / 2;
-    src_v += twidth / 2;
-    dst_argb1555 += twidth * 2;
-    width -= twidth;
-  }
-}
-#endif
-
-#if defined(HAS_I422TOARGB4444ROW_AVX2)
-void I422ToARGB4444Row_AVX2(const uint8* src_y,
-                            const uint8* src_u,
-                            const uint8* src_v,
-                            uint8* dst_argb4444,
-                            int width) {
-  // Row buffer for intermediate ARGB pixels.
-  SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
-  while (width > 0) {
-    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
-    I422ToARGBRow_AVX2(src_y, src_u, src_v, row, twidth);
-    ARGBToARGB4444Row_AVX2(row, dst_argb4444, twidth);
-    src_y += twidth;
-    src_u += twidth / 2;
-    src_v += twidth / 2;
-    dst_argb4444 += twidth * 2;
-    width -= twidth;
-  }
-}
-#endif
-
-#if defined(HAS_I422TORGB24ROW_AVX2)
-void I422ToRGB24Row_AVX2(const uint8* src_y,
-                            const uint8* src_u,
-                            const uint8* src_v,
-                            uint8* dst_rgb24,
-                            int width) {
-  // Row buffer for intermediate ARGB pixels.
-  SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
-  while (width > 0) {
-    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
-    I422ToARGBRow_AVX2(src_y, src_u, src_v, row, twidth);
-    // TODO(fbarchard): ARGBToRGB24Row_AVX2
-    ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
-    src_y += twidth;
-    src_u += twidth / 2;
-    src_v += twidth / 2;
-    dst_rgb24 += twidth * 3;
-    width -= twidth;
-  }
-}
-#endif
-
-#if defined(HAS_I422TORAWROW_AVX2)
-void I422ToRAWRow_AVX2(const uint8* src_y,
-                            const uint8* src_u,
-                            const uint8* src_v,
-                            uint8* dst_raw,
-                            int width) {
-  // Row buffer for intermediate ARGB pixels.
-  SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
-  while (width > 0) {
-    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
-    I422ToARGBRow_AVX2(src_y, src_u, src_v, row, twidth);
-    // TODO(fbarchard): ARGBToRAWRow_AVX2
-    ARGBToRAWRow_SSSE3(row, dst_raw, twidth);
-    src_y += twidth;
-    src_u += twidth / 2;
-    src_v += twidth / 2;
-    dst_raw += twidth * 3;
-    width -= twidth;
-  }
-}
-#endif
-
-#if defined(HAS_NV12TORGB565ROW_AVX2)
-void NV12ToRGB565Row_AVX2(const uint8* src_y, const uint8* src_uv,
-                          uint8* dst_rgb565, int width) {
-  // Row buffer for intermediate ARGB pixels.
-  SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
-  while (width > 0) {
-    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
-    NV12ToARGBRow_AVX2(src_y, src_uv, row, twidth);
-    ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth);
-    src_y += twidth;
-    src_uv += twidth;
-    dst_rgb565 += twidth * 2;
-    width -= twidth;
-  }
-}
-#endif
-
-#if defined(HAS_NV21TORGB565ROW_AVX2)
-void NV21ToRGB565Row_AVX2(const uint8* src_y, const uint8* src_vu,
-                          uint8* dst_rgb565, int width) {
-  // Row buffer for intermediate ARGB pixels.
-  SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
-  while (width > 0) {
-    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
-    NV21ToARGBRow_AVX2(src_y, src_vu, row, twidth);
-    ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth);
-    src_y += twidth;
-    src_vu += twidth;
-    dst_rgb565 += twidth * 2;
-    width -= twidth;
-  }
-}
-#endif
-
-#if defined(HAS_YUY2TOARGBROW_AVX2)
-void YUY2ToARGBRow_AVX2(const uint8* src_yuy2, uint8* dst_argb, int width) {
-  // Row buffers for intermediate YUV pixels.
-  SIMD_ALIGNED32(uint8 row_y[MAXTWIDTH]);
-  SIMD_ALIGNED32(uint8 row_u[MAXTWIDTH / 2]);
-  SIMD_ALIGNED32(uint8 row_v[MAXTWIDTH / 2]);
-  while (width > 0) {
-    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
-    YUY2ToUV422Row_AVX2(src_yuy2, row_u, row_v, twidth);
-    YUY2ToYRow_AVX2(src_yuy2, row_y, twidth);
-    I422ToARGBRow_AVX2(row_y, row_u, row_v, dst_argb, twidth);
-    src_yuy2 += twidth * 2;
-    dst_argb += twidth * 4;
-    width -= twidth;
-  }
-}
-#endif
-
-#if defined(HAS_UYVYTOARGBROW_AVX2)
-void UYVYToARGBRow_AVX2(const uint8* src_uyvy, uint8* dst_argb, int width) {
-  // Row buffers for intermediate YUV pixels.
-  SIMD_ALIGNED32(uint8 row_y[MAXTWIDTH]);
-  SIMD_ALIGNED32(uint8 row_u[MAXTWIDTH / 2]);
-  SIMD_ALIGNED32(uint8 row_v[MAXTWIDTH / 2]);
-  while (width > 0) {
-    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
-    UYVYToUV422Row_AVX2(src_uyvy, row_u, row_v, twidth);
-    UYVYToYRow_AVX2(src_uyvy, row_y, twidth);
-    I422ToARGBRow_AVX2(row_y, row_u, row_v, dst_argb, twidth);
-    src_uyvy += twidth * 2;
-    dst_argb += twidth * 4;
-    width -= twidth;
-  }
-}
-#endif  // !defined(LIBYUV_DISABLE_X86)
 
 void ARGBPolynomialRow_C(const uint8* src_argb,
-                         uint8* dst_argb, const float* poly,
+                         uint8* dst_argb,
+                         const float* poly,
                          int width) {
   int i;
   for (i = 0; i < width; ++i) {
@@ -2868,6 +2386,311 @@ void ARGBCopyYToAlphaRow_C(const uint8* src, uint8* dst, int width) {
   }
 }
 
+// Maximum temporary width for wrappers to process at a time, in pixels.
+#define MAXTWIDTH 2048
+
+#if !(defined(_MSC_VER) && defined(_M_IX86)) && \
+    defined(HAS_I422TORGB565ROW_SSSE3)
+// row_win.cc has asm version, but GCC uses 2 step wrapper.
+void I422ToRGB565Row_SSSE3(const uint8* src_y,
+                           const uint8* src_u,
+                           const uint8* src_v,
+                           uint8* dst_rgb565,
+                           struct YuvConstants* yuvconstants,
+                           int width) {
+  SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
+  while (width > 0) {
+    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
+    I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
+    ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
+    src_y += twidth;
+    src_u += twidth / 2;
+    src_v += twidth / 2;
+    dst_rgb565 += twidth * 2;
+    width -= twidth;
+  }
+}
+#endif
+
+#if defined(HAS_I422TOARGB1555ROW_SSSE3)
+void I422ToARGB1555Row_SSSE3(const uint8* src_y,
+                             const uint8* src_u,
+                             const uint8* src_v,
+                             uint8* dst_argb1555,
+                             struct YuvConstants* yuvconstants,
+                             int width) {
+  // Row buffer for intermediate ARGB pixels.
+  SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
+  while (width > 0) {
+    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
+    I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
+    ARGBToARGB1555Row_SSE2(row, dst_argb1555, twidth);
+    src_y += twidth;
+    src_u += twidth / 2;
+    src_v += twidth / 2;
+    dst_argb1555 += twidth * 2;
+    width -= twidth;
+  }
+}
+#endif
+
+#if defined(HAS_I422TOARGB4444ROW_SSSE3)
+void I422ToARGB4444Row_SSSE3(const uint8* src_y,
+                             const uint8* src_u,
+                             const uint8* src_v,
+                             uint8* dst_argb4444,
+                             struct YuvConstants* yuvconstants,
+                             int width) {
+  // Row buffer for intermediate ARGB pixels.
+  SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
+  while (width > 0) {
+    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
+    I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
+    ARGBToARGB4444Row_SSE2(row, dst_argb4444, twidth);
+    src_y += twidth;
+    src_u += twidth / 2;
+    src_v += twidth / 2;
+    dst_argb4444 += twidth * 2;
+    width -= twidth;
+  }
+}
+#endif
+
+#if defined(HAS_NV12TORGB565ROW_SSSE3)
+void NV12ToRGB565Row_SSSE3(const uint8* src_y,
+                           const uint8* src_uv,
+                           uint8* dst_rgb565,
+                           struct YuvConstants* yuvconstants,
+                           int width) {
+  // Row buffer for intermediate ARGB pixels.
+  SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
+  while (width > 0) {
+    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
+    NV12ToARGBRow_SSSE3(src_y, src_uv, row, yuvconstants, twidth);
+    ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
+    src_y += twidth;
+    src_uv += twidth;
+    dst_rgb565 += twidth * 2;
+    width -= twidth;
+  }
+}
+#endif
+
+#if defined(HAS_YUY2TOARGBROW_SSSE3)
+void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2,
+                         uint8* dst_argb,
+                         struct YuvConstants* yuvconstants,
+                         int width) {
+  // Row buffers for intermediate YUV pixels.
+  SIMD_ALIGNED(uint8 row_y[MAXTWIDTH]);
+  SIMD_ALIGNED(uint8 row_u[MAXTWIDTH / 2]);
+  SIMD_ALIGNED(uint8 row_v[MAXTWIDTH / 2]);
+  while (width > 0) {
+    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
+    YUY2ToUV422Row_SSE2(src_yuy2, row_u, row_v, twidth);
+    YUY2ToYRow_SSE2(src_yuy2, row_y, twidth);
+    I422ToARGBRow_SSSE3(row_y, row_u, row_v, dst_argb, yuvconstants, twidth);
+    src_yuy2 += twidth * 2;
+    dst_argb += twidth * 4;
+    width -= twidth;
+  }
+}
+#endif
+
+#if defined(HAS_UYVYTOARGBROW_SSSE3)
+void UYVYToARGBRow_SSSE3(const uint8* src_uyvy,
+                         uint8* dst_argb,
+                         struct YuvConstants* yuvconstants,
+                         int width) {
+  // Row buffers for intermediate YUV pixels.
+  SIMD_ALIGNED(uint8 row_y[MAXTWIDTH]);
+  SIMD_ALIGNED(uint8 row_u[MAXTWIDTH / 2]);
+  SIMD_ALIGNED(uint8 row_v[MAXTWIDTH / 2]);
+  while (width > 0) {
+    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
+    UYVYToUV422Row_SSE2(src_uyvy, row_u, row_v, twidth);
+    UYVYToYRow_SSE2(src_uyvy, row_y, twidth);
+    I422ToARGBRow_SSSE3(row_y, row_u, row_v, dst_argb, yuvconstants, twidth);
+    src_uyvy += twidth * 2;
+    dst_argb += twidth * 4;
+    width -= twidth;
+  }
+}
+#endif  // !defined(LIBYUV_DISABLE_X86)
+
+#if defined(HAS_I422TORGB565ROW_AVX2)
+void I422ToRGB565Row_AVX2(const uint8* src_y,
+                          const uint8* src_u,
+                          const uint8* src_v,
+                          uint8* dst_rgb565,
+                          struct YuvConstants* yuvconstants,
+                          int width) {
+  SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
+  while (width > 0) {
+    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
+    I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
+    ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth);
+    src_y += twidth;
+    src_u += twidth / 2;
+    src_v += twidth / 2;
+    dst_rgb565 += twidth * 2;
+    width -= twidth;
+  }
+}
+#endif
+
+#if defined(HAS_I422TOARGB1555ROW_AVX2)
+void I422ToARGB1555Row_AVX2(const uint8* src_y,
+                            const uint8* src_u,
+                            const uint8* src_v,
+                            uint8* dst_argb1555,
+                            struct YuvConstants* yuvconstants,
+                            int width) {
+  // Row buffer for intermediate ARGB pixels.
+  SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
+  while (width > 0) {
+    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
+    I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
+    ARGBToARGB1555Row_AVX2(row, dst_argb1555, twidth);
+    src_y += twidth;
+    src_u += twidth / 2;
+    src_v += twidth / 2;
+    dst_argb1555 += twidth * 2;
+    width -= twidth;
+  }
+}
+#endif
+
+#if defined(HAS_I422TOARGB4444ROW_AVX2)
+void I422ToARGB4444Row_AVX2(const uint8* src_y,
+                            const uint8* src_u,
+                            const uint8* src_v,
+                            uint8* dst_argb4444,
+                            struct YuvConstants* yuvconstants,
+                            int width) {
+  // Row buffer for intermediate ARGB pixels.
+  SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
+  while (width > 0) {
+    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
+    I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
+    ARGBToARGB4444Row_AVX2(row, dst_argb4444, twidth);
+    src_y += twidth;
+    src_u += twidth / 2;
+    src_v += twidth / 2;
+    dst_argb4444 += twidth * 2;
+    width -= twidth;
+  }
+}
+#endif
+
+#if defined(HAS_I422TORGB24ROW_AVX2)
+void I422ToRGB24Row_AVX2(const uint8* src_y,
+                            const uint8* src_u,
+                            const uint8* src_v,
+                            uint8* dst_rgb24,
+                            struct YuvConstants* yuvconstants,
+                            int width) {
+  // Row buffer for intermediate ARGB pixels.
+  SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
+  while (width > 0) {
+    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
+    I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
+    // TODO(fbarchard): ARGBToRGB24Row_AVX2
+    ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
+    src_y += twidth;
+    src_u += twidth / 2;
+    src_v += twidth / 2;
+    dst_rgb24 += twidth * 3;
+    width -= twidth;
+  }
+}
+#endif
+
+#if defined(HAS_I422TORAWROW_AVX2)
+void I422ToRAWRow_AVX2(const uint8* src_y,
+                            const uint8* src_u,
+                            const uint8* src_v,
+                            uint8* dst_raw,
+                            struct YuvConstants* yuvconstants,
+                            int width) {
+  // Row buffer for intermediate ARGB pixels.
+  SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
+  while (width > 0) {
+    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
+    I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
+    // TODO(fbarchard): ARGBToRAWRow_AVX2
+    ARGBToRAWRow_SSSE3(row, dst_raw, twidth);
+    src_y += twidth;
+    src_u += twidth / 2;
+    src_v += twidth / 2;
+    dst_raw += twidth * 3;
+    width -= twidth;
+  }
+}
+#endif
+
+#if defined(HAS_NV12TORGB565ROW_AVX2)
+void NV12ToRGB565Row_AVX2(const uint8* src_y,
+                          const uint8* src_uv,
+                          uint8* dst_rgb565,
+                          struct YuvConstants* yuvconstants,
+                          int width) {
+  // Row buffer for intermediate ARGB pixels.
+  SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
+  while (width > 0) {
+    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
+    NV12ToARGBRow_AVX2(src_y, src_uv, row, yuvconstants, twidth);
+    ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth);
+    src_y += twidth;
+    src_uv += twidth;
+    dst_rgb565 += twidth * 2;
+    width -= twidth;
+  }
+}
+#endif
+
+#if defined(HAS_YUY2TOARGBROW_AVX2)
+void YUY2ToARGBRow_AVX2(const uint8* src_yuy2,
+                        uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
+                        int width) {
+  // Row buffers for intermediate YUV pixels.
+  SIMD_ALIGNED32(uint8 row_y[MAXTWIDTH]);
+  SIMD_ALIGNED32(uint8 row_u[MAXTWIDTH / 2]);
+  SIMD_ALIGNED32(uint8 row_v[MAXTWIDTH / 2]);
+  while (width > 0) {
+    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
+    YUY2ToUV422Row_AVX2(src_yuy2, row_u, row_v, twidth);
+    YUY2ToYRow_AVX2(src_yuy2, row_y, twidth);
+    I422ToARGBRow_AVX2(row_y, row_u, row_v, dst_argb, yuvconstants, twidth);
+    src_yuy2 += twidth * 2;
+    dst_argb += twidth * 4;
+    width -= twidth;
+  }
+}
+#endif
+
+#if defined(HAS_UYVYTOARGBROW_AVX2)
+void UYVYToARGBRow_AVX2(const uint8* src_uyvy,
+                        uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
+                        int width) {
+  // Row buffers for intermediate YUV pixels.
+  SIMD_ALIGNED32(uint8 row_y[MAXTWIDTH]);
+  SIMD_ALIGNED32(uint8 row_u[MAXTWIDTH / 2]);
+  SIMD_ALIGNED32(uint8 row_v[MAXTWIDTH / 2]);
+  while (width > 0) {
+    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
+    UYVYToUV422Row_AVX2(src_uyvy, row_u, row_v, twidth);
+    UYVYToYRow_AVX2(src_uyvy, row_y, twidth);
+    I422ToARGBRow_AVX2(row_y, row_u, row_v, dst_argb, yuvconstants, twidth);
+    src_uyvy += twidth * 2;
+    dst_argb += twidth * 4;
+    width -= twidth;
+  }
+}
+#endif  // !defined(LIBYUV_DISABLE_X86)
+
 #ifdef __cplusplus
 }  // extern "C"
 }  // namespace libyuv
diff --git a/source/row_gcc.cc b/source/row_gcc.cc
index 949750321..af5ca2b52 100644
--- a/source/row_gcc.cc
+++ b/source/row_gcc.cc
@@ -1350,23 +1350,23 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0, int src_stride_rgba,
     "punpcklwd  %%xmm0,%%xmm0                                   \n"
 
 // Convert 8 pixels: 8 UV and 8 Y
-#define YUVTORGB(YuvConstants)                                                 \
+#define YUVTORGB(yuvconstants)                                                 \
     "movdqa     %%xmm0,%%xmm1                                   \n"            \
     "movdqa     %%xmm0,%%xmm2                                   \n"            \
     "movdqa     %%xmm0,%%xmm3                                   \n"            \
-    "movdqa     " MEMACCESS2(96, [YuvConstants]) ",%%xmm0       \n"            \
-    "pmaddubsw  " MEMACCESS([YuvConstants]) ",%%xmm1            \n"            \
+    "movdqa     " MEMACCESS2(96, [yuvconstants]) ",%%xmm0       \n"            \
+    "pmaddubsw  " MEMACCESS([yuvconstants]) ",%%xmm1            \n"            \
     "psubw      %%xmm1,%%xmm0                                   \n"            \
-    "movdqa     " MEMACCESS2(128, [YuvConstants]) ",%%xmm1      \n"            \
-    "pmaddubsw  " MEMACCESS2(32, [YuvConstants]) ",%%xmm2       \n"            \
+    "movdqa     " MEMACCESS2(128, [yuvconstants]) ",%%xmm1      \n"            \
+    "pmaddubsw  " MEMACCESS2(32, [yuvconstants]) ",%%xmm2       \n"            \
     "psubw      %%xmm2,%%xmm1                                   \n"            \
-    "movdqa     " MEMACCESS2(160, [YuvConstants]) ",%%xmm2      \n"            \
-    "pmaddubsw  " MEMACCESS2(64, [YuvConstants]) ",%%xmm3       \n"            \
+    "movdqa     " MEMACCESS2(160, [yuvconstants]) ",%%xmm2      \n"            \
+    "pmaddubsw  " MEMACCESS2(64, [yuvconstants]) ",%%xmm3       \n"            \
     "psubw      %%xmm3,%%xmm2                                   \n"            \
     "movq       " MEMACCESS([y_buf]) ",%%xmm3                   \n"            \
     "lea        " MEMLEA(0x8, [y_buf]) ",%[y_buf]               \n"            \
     "punpcklbw  %%xmm3,%%xmm3                                   \n"            \
-    "pmulhuw    " MEMACCESS2(192, [YuvConstants]) ",%%xmm3      \n"            \
+    "pmulhuw    " MEMACCESS2(192, [yuvconstants]) ",%%xmm3      \n"            \
     "paddsw     %%xmm3,%%xmm0                                   \n"            \
     "paddsw     %%xmm3,%%xmm1                                   \n"            \
     "paddsw     %%xmm3,%%xmm2                                   \n"            \
@@ -1423,19 +1423,19 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0, int src_stride_rgba,
     "movdqu    %%xmm0," MEMACCESS2(0x10, [dst_rgba]) "           \n"           \
     "lea       " MEMLEA(0x20, [dst_rgba]) ",%[dst_rgba]          \n"
 
-void OMITFP I444ToARGBMatrixRow_SSSE3(const uint8* y_buf,
-                                      const uint8* u_buf,
-                                      const uint8* v_buf,
-                                      uint8* dst_argb,
-                                      struct YuvConstants* YuvConstants,
-                                      int width) {
+void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf,
+                                const uint8* u_buf,
+                                const uint8* v_buf,
+                                uint8* dst_argb,
+                                struct YuvConstants* yuvconstants,
+                                int width) {
   asm volatile (
     "sub       %[u_buf],%[v_buf]               \n"
     "pcmpeqb   %%xmm5,%%xmm5                   \n"
     LABELALIGN
   "1:                                          \n"
     READYUV444
-    YUVTORGB(YuvConstants)
+    YUVTORGB(yuvconstants)
     STOREARGB
     "sub       $0x8,%[width]                   \n"
     "jg        1b                              \n"
@@ -1444,25 +1444,25 @@ void OMITFP I444ToARGBMatrixRow_SSSE3(const uint8* y_buf,
     [v_buf]"+r"(v_buf),    // %[v_buf]
     [dst_argb]"+r"(dst_argb),  // %[dst_argb]
     [width]"+rm"(width)    // %[width]
-  : [YuvConstants]"r"(YuvConstants)  // %[kYuvConstants]
+  : [yuvconstants]"r"(yuvconstants)  // %[yuvconstants]
   : "memory", "cc", NACL_R14
     "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
   );
 }
 
-void OMITFP I444ToABGRMatrixRow_SSSE3(const uint8* y_buf,
-                                      const uint8* u_buf,
-                                      const uint8* v_buf,
-                                      uint8* dst_abgr,
-                                      struct YuvConstants* YuvConstants,
-                                      int width) {
+void OMITFP I444ToABGRRow_SSSE3(const uint8* y_buf,
+                                const uint8* u_buf,
+                                const uint8* v_buf,
+                                uint8* dst_abgr,
+                                struct YuvConstants* yuvconstants,
+                                int width) {
   asm volatile (
     "sub       %[u_buf],%[v_buf]               \n"
     "pcmpeqb   %%xmm5,%%xmm5                   \n"
     LABELALIGN
   "1:                                          \n"
     READYUV444
-    YUVTORGB(YuvConstants)
+    YUVTORGB(yuvconstants)
     STOREABGR
     "sub       $0x8,%[width]                   \n"
     "jg        1b                              \n"
@@ -1471,7 +1471,7 @@ void OMITFP I444ToABGRMatrixRow_SSSE3(const uint8* y_buf,
     [v_buf]"+r"(v_buf),    // %[v_buf]
     [dst_abgr]"+r"(dst_abgr),  // %[dst_abgr]
     [width]"+rm"(width)    // %[width]
-  : [YuvConstants]"r"(YuvConstants)  // %[kYuvConstants]
+  : [yuvconstants]"r"(yuvconstants)  // %[yuvconstants]
   : "memory", "cc", NACL_R14
     "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
   );
@@ -1482,6 +1482,7 @@ void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf,
                                  const uint8* u_buf,
                                  const uint8* v_buf,
                                  uint8* dst_rgb24,
+                                 struct YuvConstants* yuvconstants,
                                  int width) {
   asm volatile (
     "movdqa    %[kShuffleMaskARGBToRGB24_0],%%xmm5 \n"
@@ -1490,7 +1491,7 @@ void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf,
     LABELALIGN
   "1:                                          \n"
     READYUV422
-    YUVTORGB(kYuvConstants)
+    YUVTORGB(yuvconstants)
     "punpcklbw %%xmm1,%%xmm0                   \n"
     "punpcklbw %%xmm2,%%xmm2                   \n"
     "movdqa    %%xmm0,%%xmm1                   \n"
@@ -1514,7 +1515,7 @@ void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf,
 #else
     [width]"+rm"(width)    // %[width]
 #endif
-  : [kYuvConstants]"r"(&kYuvConstants.kUVToB),
+  : [yuvconstants]"r"(yuvconstants),  // %[yuvconstants]
     [kShuffleMaskARGBToRGB24_0]"m"(kShuffleMaskARGBToRGB24_0),
     [kShuffleMaskARGBToRGB24]"m"(kShuffleMaskARGBToRGB24)
   : "memory", "cc", NACL_R14
@@ -1526,6 +1527,7 @@ void OMITFP I422ToRAWRow_SSSE3(const uint8* y_buf,
                                const uint8* u_buf,
                                const uint8* v_buf,
                                uint8* dst_raw,
+                               struct YuvConstants* yuvconstants,
                                int width) {
   asm volatile (
     "movdqa    %[kShuffleMaskARGBToRAW_0],%%xmm5 \n"
@@ -1534,7 +1536,7 @@ void OMITFP I422ToRAWRow_SSSE3(const uint8* y_buf,
     LABELALIGN
   "1:                                          \n"
     READYUV422
-    YUVTORGB(kYuvConstants)
+    YUVTORGB(yuvconstants)
     "punpcklbw %%xmm1,%%xmm0                   \n"
     "punpcklbw %%xmm2,%%xmm2                   \n"
     "movdqa    %%xmm0,%%xmm1                   \n"
@@ -1558,7 +1560,7 @@ void OMITFP I422ToRAWRow_SSSE3(const uint8* y_buf,
 #else
     [width]"+rm"(width)    // %[width]
 #endif
-  : [kYuvConstants]"r"(&kYuvConstants.kUVToB),
+  : [yuvconstants]"r"(yuvconstants),  // %[yuvconstants]
     [kShuffleMaskARGBToRAW_0]"m"(kShuffleMaskARGBToRAW_0),
     [kShuffleMaskARGBToRAW]"m"(kShuffleMaskARGBToRAW)
   : "memory", "cc", NACL_R14
@@ -1566,19 +1568,19 @@ void OMITFP I422ToRAWRow_SSSE3(const uint8* y_buf,
   );
 }
 
-void OMITFP I422ToARGBMatrixRow_SSSE3(const uint8* y_buf,
-                                      const uint8* u_buf,
-                                      const uint8* v_buf,
-                                      uint8* dst_argb,
-                                      struct YuvConstants* YuvConstants,
-                                      int width) {
+void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf,
+                                const uint8* u_buf,
+                                const uint8* v_buf,
+                                uint8* dst_argb,
+                                struct YuvConstants* yuvconstants,
+                                int width) {
   asm volatile (
     "sub       %[u_buf],%[v_buf]               \n"
     "pcmpeqb   %%xmm5,%%xmm5                   \n"
     LABELALIGN
   "1:                                          \n"
     READYUV422
-    YUVTORGB(YuvConstants)
+    YUVTORGB(yuvconstants)
     STOREARGB
     "sub       $0x8,%[width]                   \n"
     "jg        1b                              \n"
@@ -1587,7 +1589,7 @@ void OMITFP I422ToARGBMatrixRow_SSSE3(const uint8* y_buf,
     [v_buf]"+r"(v_buf),    // %[v_buf]
     [dst_argb]"+r"(dst_argb),  // %[dst_argb]
     [width]"+rm"(width)    // %[width]
-  : [YuvConstants]"r"(YuvConstants)  // %[kYuvConstants]
+  : [yuvconstants]"r"(yuvconstants)  // %[yuvconstants]
   : "memory", "cc", NACL_R14
     "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
   );
@@ -1597,6 +1599,7 @@ void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf,
                                 const uint8* u_buf,
                                 const uint8* v_buf,
                                 uint8* dst_argb,
+                                struct YuvConstants* yuvconstants,
                                 int width) {
   asm volatile (
     "sub       %[u_buf],%[v_buf]               \n"
@@ -1604,7 +1607,7 @@ void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf,
     LABELALIGN
   "1:                                          \n"
     READYUV411
-    YUVTORGB(kYuvConstants)
+    YUVTORGB(yuvconstants)
     STOREARGB
     "sub       $0x8,%[width]                   \n"
     "jg        1b                              \n"
@@ -1613,7 +1616,7 @@ void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf,
     [v_buf]"+r"(v_buf),    // %[v_buf]
     [dst_argb]"+r"(dst_argb),  // %[dst_argb]
     [width]"+rm"(width)    // %[width]
-  : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
+  : [yuvconstants]"r"(yuvconstants)  // %[yuvconstants]
   : "memory", "cc", NACL_R14
     "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
   );
@@ -1622,13 +1625,14 @@ void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf,
 void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf,
                                 const uint8* uv_buf,
                                 uint8* dst_argb,
+                                struct YuvConstants* yuvconstants,
                                 int width) {
   asm volatile (
     "pcmpeqb   %%xmm5,%%xmm5                   \n"
     LABELALIGN
   "1:                                          \n"
     READNV12
-    YUVTORGB(kYuvConstants)
+    YUVTORGB(yuvconstants)
     STOREARGB
     "sub       $0x8,%[width]                   \n"
     "jg        1b                              \n"
@@ -1636,30 +1640,7 @@ void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf,
     [uv_buf]"+r"(uv_buf),    // %[uv_buf]
     [dst_argb]"+r"(dst_argb),  // %[dst_argb]
     [width]"+rm"(width)    // %[width]
-  : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
-  // Does not use r14.
-  : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
-  );
-}
-
-void OMITFP NV21ToARGBRow_SSSE3(const uint8* y_buf,
-                                const uint8* uv_buf,
-                                uint8* dst_argb,
-                                int width) {
-  asm volatile (
-    "pcmpeqb   %%xmm5,%%xmm5                   \n"
-    LABELALIGN
-  "1:                                          \n"
-    READNV12
-    YUVTORGB(kYuvConstants)
-    STOREARGB
-    "sub       $0x8,%[width]                   \n"
-    "jg        1b                              \n"
-  : [y_buf]"+r"(y_buf),    // %[y_buf]
-    [uv_buf]"+r"(uv_buf),    // %[uv_buf]
-    [dst_argb]"+r"(dst_argb),  // %[dst_argb]
-    [width]"+rm"(width)    // %[width]
-  : [kYuvConstants]"r"(&kYvuConstants.kUVToB) // %[kYuvConstants]
+  : [yuvconstants]"r"(yuvconstants)  // %[yuvconstants]
   // Does not use r14.
   : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
   );
@@ -1669,6 +1650,7 @@ void OMITFP I422ToBGRARow_SSSE3(const uint8* y_buf,
                                 const uint8* u_buf,
                                 const uint8* v_buf,
                                 uint8* dst_bgra,
+                                struct YuvConstants* yuvconstants,
                                 int width) {
   asm volatile (
     "sub       %[u_buf],%[v_buf]               \n"
@@ -1676,7 +1658,7 @@ void OMITFP I422ToBGRARow_SSSE3(const uint8* y_buf,
     LABELALIGN
   "1:                                          \n"
     READYUV422
-    YUVTORGB(kYuvConstants)
+    YUVTORGB(yuvconstants)
     STOREBGRA
     "sub       $0x8,%[width]                   \n"
     "jg        1b                              \n"
@@ -1685,25 +1667,25 @@ void OMITFP I422ToBGRARow_SSSE3(const uint8* y_buf,
     [v_buf]"+r"(v_buf),    // %[v_buf]
     [dst_bgra]"+r"(dst_bgra),  // %[dst_bgra]
     [width]"+rm"(width)    // %[width]
-  : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
+  : [yuvconstants]"r"(yuvconstants)  // %[yuvconstants]
   : "memory", "cc", NACL_R14
     "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
   );
 }
 
-void OMITFP I422ToABGRMatrixRow_SSSE3(const uint8* y_buf,
-                                      const uint8* u_buf,
-                                      const uint8* v_buf,
-                                      uint8* dst_abgr,
-                                      struct YuvConstants* YuvConstants,
-                                      int width) {
+void OMITFP I422ToABGRRow_SSSE3(const uint8* y_buf,
+                                const uint8* u_buf,
+                                const uint8* v_buf,
+                                uint8* dst_abgr,
+                                struct YuvConstants* yuvconstants,
+                                int width) {
   asm volatile (
     "sub       %[u_buf],%[v_buf]               \n"
     "pcmpeqb   %%xmm5,%%xmm5                   \n"
     LABELALIGN
   "1:                                          \n"
     READYUV422
-    YUVTORGB(kYuvConstants)
+    YUVTORGB(yuvconstants)
     STOREABGR
     "sub       $0x8,%[width]                   \n"
     "jg        1b                              \n"
@@ -1712,7 +1694,7 @@ void OMITFP I422ToABGRMatrixRow_SSSE3(const uint8* y_buf,
     [v_buf]"+r"(v_buf),    // %[v_buf]
     [dst_abgr]"+r"(dst_abgr),  // %[dst_abgr]
     [width]"+rm"(width)    // %[width]
-  : [kYuvConstants]"r"(YuvConstants)  // %[kYuvConstants]
+  : [yuvconstants]"r"(yuvconstants)  // %[yuvconstants]
   : "memory", "cc", NACL_R14
     "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
   );
@@ -1722,6 +1704,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf,
                                 const uint8* u_buf,
                                 const uint8* v_buf,
                                 uint8* dst_rgba,
+                                struct YuvConstants* yuvconstants,
                                 int width) {
   asm volatile (
     "sub       %[u_buf],%[v_buf]               \n"
@@ -1729,7 +1712,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf,
     LABELALIGN
   "1:                                          \n"
     READYUV422
-    YUVTORGB(kYuvConstants)
+    YUVTORGB(yuvconstants)
     STORERGBA
     "sub       $0x8,%[width]                   \n"
     "jg        1b                              \n"
@@ -1738,7 +1721,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf,
     [v_buf]"+r"(v_buf),    // %[v_buf]
     [dst_rgba]"+r"(dst_rgba),  // %[dst_rgba]
     [width]"+rm"(width)    // %[width]
-  : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
+  : [yuvconstants]"r"(yuvconstants)  // %[yuvconstants]
   : "memory", "cc", NACL_R14
     "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
   );
@@ -1788,6 +1771,7 @@ void OMITFP I422ToBGRARow_AVX2(const uint8* y_buf,
                                const uint8* u_buf,
                                const uint8* v_buf,
                                uint8* dst_bgra,
+                               struct YuvConstants* yuvconstants,
                                int width) {
   asm volatile (
     "sub       %[u_buf],%[v_buf]               \n"
@@ -1795,7 +1779,7 @@ void OMITFP I422ToBGRARow_AVX2(const uint8* y_buf,
     LABELALIGN
   "1:                                          \n"
     READYUV422_AVX2
-    YUVTORGB_AVX2(kYuvConstants)
+    YUVTORGB_AVX2(yuvconstants)
 
     // Step 3: Weave into BGRA
     "vpunpcklbw %%ymm0,%%ymm1,%%ymm1           \n"  // GB
@@ -1816,29 +1800,29 @@ void OMITFP I422ToBGRARow_AVX2(const uint8* y_buf,
     [v_buf]"+r"(v_buf),    // %[v_buf]
     [dst_bgra]"+r"(dst_bgra),  // %[dst_bgra]
     [width]"+rm"(width)    // %[width]
-  : [kYuvConstants]"r"(&kYuvConstants.kUVToB)  // %[kYuvConstants]
+  : [yuvconstants]"r"(yuvconstants)  // %[yuvconstants]
   : "memory", "cc", NACL_R14
     "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
   );
 }
 #endif  // HAS_I422TOBGRAROW_AVX2
 
-#if defined(HAS_I422TOARGBMATRIXROW_AVX2)
+#if defined(HAS_I422TOARGBROW_AVX2)
 // 16 pixels
 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
-void OMITFP I422ToARGBMatrixRow_AVX2(const uint8* y_buf,
-                                     const uint8* u_buf,
-                                     const uint8* v_buf,
-                                     uint8* dst_argb,
-                                     struct YuvConstants* YuvConstants,
-                                     int width) {
+void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf,
+                               const uint8* u_buf,
+                               const uint8* v_buf,
+                               uint8* dst_argb,
+                               struct YuvConstants* yuvconstants,
+                               int width) {
   asm volatile (
     "sub       %[u_buf],%[v_buf]               \n"
     "vpcmpeqb   %%ymm5,%%ymm5,%%ymm5           \n"
     LABELALIGN
   "1:                                          \n"
     READYUV422_AVX2
-    YUVTORGB_AVX2(kYuvConstants)
+    YUVTORGB_AVX2(yuvconstants)
 
     // Step 3: Weave into ARGB
     "vpunpcklbw %%ymm1,%%ymm0,%%ymm0           \n"  // BG
@@ -1859,29 +1843,29 @@ void OMITFP I422ToARGBMatrixRow_AVX2(const uint8* y_buf,
     [v_buf]"+r"(v_buf),    // %[v_buf]
     [dst_argb]"+r"(dst_argb),  // %[dst_argb]
     [width]"+rm"(width)    // %[width]
-  : [kYuvConstants]"r"(YuvConstants)  // %[kYuvConstants]
+  : [yuvconstants]"r"(yuvconstants)  // %[yuvconstants]
   : "memory", "cc", NACL_R14
     "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
   );
 }
-#endif  // HAS_I422TOARGBMATRIXROW_AVX2
+#endif  // HAS_I422TOARGBROW_AVX2
 
 #if defined(HAS_I422TOABGRROW_AVX2)
 // 16 pixels
 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes).
-void OMITFP I422ToABGRMatrixRow_AVX2(const uint8* y_buf,
-                                     const uint8* u_buf,
-                                     const uint8* v_buf,
-                                     uint8* dst_argb,
-                                     struct YuvConstants* YuvConstants,
-                                     int width) {
+void OMITFP I422ToABGRRow_AVX2(const uint8* y_buf,
+                               const uint8* u_buf,
+                               const uint8* v_buf,
+                               uint8* dst_argb,
+                               struct YuvConstants* yuvconstants,
+                               int width) {
   asm volatile (
     "sub       %[u_buf],%[v_buf]               \n"
     "vpcmpeqb   %%ymm5,%%ymm5,%%ymm5           \n"
     LABELALIGN
   "1:                                          \n"
     READYUV422_AVX2
-    YUVTORGB_AVX2(kYuvConstants)
+    YUVTORGB_AVX2(yuvconstants)
 
     // Step 3: Weave into ABGR
     "vpunpcklbw %%ymm1,%%ymm2,%%ymm1           \n"  // RG
@@ -1901,7 +1885,7 @@ void OMITFP I422ToABGRMatrixRow_AVX2(const uint8* y_buf,
     [v_buf]"+r"(v_buf),    // %[v_buf]
     [dst_argb]"+r"(dst_argb),  // %[dst_argb]
     [width]"+rm"(width)    // %[width]
-  : [kYuvConstants]"r"(YuvConstants)  // %[kYuvConstants]
+  : [yuvconstants]"r"(yuvconstants)  // %[yuvconstants]
   : "memory", "cc", NACL_R14
     "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
   );
@@ -1915,6 +1899,7 @@ void OMITFP I422ToRGBARow_AVX2(const uint8* y_buf,
                                const uint8* u_buf,
                                const uint8* v_buf,
                                uint8* dst_argb,
+                               struct YuvConstants* yuvconstants,
                                int width) {
   asm volatile (
     "sub       %[u_buf],%[v_buf]               \n"
@@ -1922,7 +1907,7 @@ void OMITFP I422ToRGBARow_AVX2(const uint8* y_buf,
     LABELALIGN
   "1:                                          \n"
     READYUV422_AVX2
-    YUVTORGB_AVX2(kYuvConstants)
+    YUVTORGB_AVX2(yuvconstants)
 
     // Step 3: Weave into RGBA
     "vpunpcklbw %%ymm2,%%ymm1,%%ymm1           \n"
@@ -1942,7 +1927,7 @@ void OMITFP I422ToRGBARow_AVX2(const uint8* y_buf,
     [v_buf]"+r"(v_buf),    // %[v_buf]
     [dst_argb]"+r"(dst_argb),  // %[dst_argb]
     [width]"+rm"(width)    // %[width]
-  : [kYuvConstants]"r"(&kYuvConstants.kUVToB)  // %[kYuvConstants]
+  : [yuvconstants]"r"(yuvconstants)  // %[yuvconstants]
   : "memory", "cc", NACL_R14
     "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
   );
diff --git a/source/row_mips.cc b/source/row_mips.cc
index 1183c7183..0720110b6 100644
--- a/source/row_mips.cc
+++ b/source/row_mips.cc
@@ -593,7 +593,7 @@ void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
 // t8 = | 0 | G1 | 0 | g1 |
 // t2 = | 0 | R0 | 0 | r0 |
 // t1 = | 0 | R1 | 0 | r1 |
-#define I422ToTransientMipsRGB                                                 \
+#define YUVTORGB                                                               \
       "lw                $t0, 0(%[y_buf])       \n"                            \
       "lhu               $t1, 0(%[u_buf])       \n"                            \
       "lhu               $t2, 0(%[v_buf])       \n"                            \
@@ -652,10 +652,12 @@ void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
       "addu.ph           $t2, $t2, $s5          \n"                            \
       "addu.ph           $t1, $t1, $s5          \n"
 
+// TODO(fbarchard): accept yuv conversion constants.
 void I422ToARGBRow_MIPS_DSPR2(const uint8* y_buf,
                               const uint8* u_buf,
                               const uint8* v_buf,
                               uint8* rgb_buf,
+                              struct YuvConstants* yuvconstants,
                               int width) {
   __asm__ __volatile__ (
     ".set push                                \n"
@@ -671,7 +673,7 @@ void I422ToARGBRow_MIPS_DSPR2(const uint8* y_buf,
     "ori               $s6, 0xff00            \n"  // |ff|00|ff|00|ff|
 
    "1:                                        \n"
-      I422ToTransientMipsRGB
+      YUVTORGB
 // Arranging into argb format
     "precr.qb.ph       $t4, $t8, $t4          \n"  // |G1|g1|B1|b1|
     "precr.qb.ph       $t5, $t9, $t5          \n"  // |G0|g0|B0|b0|
@@ -717,6 +719,7 @@ void I422ToABGRRow_MIPS_DSPR2(const uint8* y_buf,
                               const uint8* u_buf,
                               const uint8* v_buf,
                               uint8* rgb_buf,
+                              struct YuvConstants* yuvconstants,
                               int width) {
   __asm__ __volatile__ (
     ".set push                                \n"
@@ -732,7 +735,7 @@ void I422ToABGRRow_MIPS_DSPR2(const uint8* y_buf,
     "ori               $s6, 0xff00            \n"  // |ff|00|ff|00|
 
    "1:                                         \n"
-      I422ToTransientMipsRGB
+      YUVTORGB
 // Arranging into abgr format
     "precr.qb.ph      $t0, $t8, $t1           \n"  // |G1|g1|R1|r1|
     "precr.qb.ph      $t3, $t9, $t2           \n"  // |G0|g0|R0|r0|
@@ -778,6 +781,7 @@ void I422ToBGRARow_MIPS_DSPR2(const uint8* y_buf,
                               const uint8* u_buf,
                               const uint8* v_buf,
                               uint8* rgb_buf,
+                              struct YuvConstants* yuvconstants,
                               int width) {
   __asm__ __volatile__ (
     ".set push                                \n"
@@ -793,7 +797,7 @@ void I422ToBGRARow_MIPS_DSPR2(const uint8* y_buf,
     "ori               $s6, 0xff              \n"  // |00|ff|00|ff|
 
    "1:                                        \n"
-      I422ToTransientMipsRGB
+      YUVTORGB
       // Arranging into bgra format
     "precr.qb.ph       $t4, $t4, $t8          \n"  // |B1|b1|G1|g1|
     "precr.qb.ph       $t5, $t5, $t9          \n"  // |B0|b0|G0|g0|
diff --git a/source/row_neon.cc b/source/row_neon.cc
index f7e6ba1dc..9052ed043 100644
--- a/source/row_neon.cc
+++ b/source/row_neon.cc
@@ -93,7 +93,7 @@ extern "C" {
     "vuzp.u8    d2, d3                         \n"                             \
     "vtrn.u32   d2, d3                         \n"
 
-#define YUV422TORGB_SETUP_REG                                                  \
+#define YUVTORGB_SETUP                                                         \
     MEMACCESS([kUVToRB])                                                       \
     "vld1.8     {d24}, [%[kUVToRB]]            \n"                             \
     MEMACCESS([kUVToG])                                                        \
@@ -107,7 +107,7 @@ extern "C" {
     MEMACCESS([kYToRgb])                                                       \
     "vld1.32    {d30[], d31[]}, [%[kYToRgb]]     \n"
 
-#define YUV422TORGB                                                            \
+#define YUVTORGB                                                               \
     "vmull.u8   q8, d2, d24                    \n" /* u/v B/R component      */\
     "vmull.u8   q9, d2, d25                    \n" /* u/v G component        */\
     "vmovl.u8   q0, d0                         \n" /* Y                      */\
@@ -138,12 +138,13 @@ void I444ToARGBRow_NEON(const uint8* src_y,
                         const uint8* src_u,
                         const uint8* src_v,
                         uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
                         int width) {
   asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
   "1:                                          \n"
     READYUV444
-    YUV422TORGB
+    YUVTORGB
     "subs       %4, %4, #8                     \n"
     "vmov.u8    d23, #255                      \n"
     MEMACCESS(3)
@@ -154,26 +155,26 @@ void I444ToARGBRow_NEON(const uint8* src_y,
       "+r"(src_v),     // %2
       "+r"(dst_argb),  // %3
       "+r"(width)      // %4
-    : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB),   // %5
-      [kUVToG]"r"(&kYuvConstantsNEON.kUVToG),     // %6
-      [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+      [kUVToG]"r"(&yuvconstants->kUVToG),
+      [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+      [kYToRgb]"r"(&yuvconstants->kYToRgb)
     : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
       "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
   );
 }
 
-void I422ToARGBMatrixRow_NEON(const uint8* src_y,
-                              const uint8* src_u,
-                              const uint8* src_v,
-                              uint8* dst_argb,
-                              struct YuvConstantsNEON* YuvConstants,
-                              int width) {
+void I422ToARGBRow_NEON(const uint8* src_y,
+                        const uint8* src_u,
+                        const uint8* src_v,
+                        uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
+                        int width) {
   asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
   "1:                                          \n"
     READYUV422
-    YUV422TORGB
+    YUVTORGB
     "subs       %4, %4, #8                     \n"
     "vmov.u8    d23, #255                      \n"
     MEMACCESS(3)
@@ -184,10 +185,10 @@ void I422ToARGBMatrixRow_NEON(const uint8* src_y,
       "+r"(src_v),     // %2
       "+r"(dst_argb),  // %3
       "+r"(width)      // %4
-    : [kUVToRB]"r"(&YuvConstants->kUVToRB),   // %5
-      [kUVToG]"r"(&YuvConstants->kUVToG),     // %6
-      [kUVBiasBGR]"r"(&YuvConstants->kUVBiasBGR),
-      [kYToRgb]"r"(&YuvConstants->kYToRgb)
+    : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+      [kUVToG]"r"(&yuvconstants->kUVToG),
+      [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+      [kYToRgb]"r"(&yuvconstants->kYToRgb)
     : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
       "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
   );
@@ -197,12 +198,13 @@ void I411ToARGBRow_NEON(const uint8* src_y,
                         const uint8* src_u,
                         const uint8* src_v,
                         uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
                         int width) {
   asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
   "1:                                          \n"
     READYUV411
-    YUV422TORGB
+    YUVTORGB
     "subs       %4, %4, #8                     \n"
     "vmov.u8    d23, #255                      \n"
     MEMACCESS(3)
@@ -213,10 +215,10 @@ void I411ToARGBRow_NEON(const uint8* src_y,
       "+r"(src_v),     // %2
       "+r"(dst_argb),  // %3
       "+r"(width)      // %4
-    : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB),   // %5
-      [kUVToG]"r"(&kYuvConstantsNEON.kUVToG),     // %6
-      [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+      [kUVToG]"r"(&yuvconstants->kUVToG),
+      [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+      [kYToRgb]"r"(&yuvconstants->kYToRgb)
     : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
       "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
   );
@@ -226,12 +228,13 @@ void I422ToBGRARow_NEON(const uint8* src_y,
                         const uint8* src_u,
                         const uint8* src_v,
                         uint8* dst_bgra,
+                        struct YuvConstants* yuvconstants,
                         int width) {
   asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
   "1:                                          \n"
     READYUV422
-    YUV422TORGB
+    YUVTORGB
     "subs       %4, %4, #8                     \n"
     "vswp.u8    d20, d22                       \n"
     "vmov.u8    d19, #255                      \n"
@@ -243,26 +246,26 @@ void I422ToBGRARow_NEON(const uint8* src_y,
       "+r"(src_v),     // %2
       "+r"(dst_bgra),  // %3
       "+r"(width)      // %4
-    : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB),   // %5
-      [kUVToG]"r"(&kYuvConstantsNEON.kUVToG),     // %6
-      [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+      [kUVToG]"r"(&yuvconstants->kUVToG),
+      [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+      [kYToRgb]"r"(&yuvconstants->kYToRgb)
     : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
       "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
   );
 }
 
-void I422ToABGRMatrixRow_NEON(const uint8* src_y,
-                              const uint8* src_u,
-                              const uint8* src_v,
-                              uint8* dst_abgr,
-                              struct YuvConstantsNEON* YuvConstants,
-                              int width) {
+void I422ToABGRRow_NEON(const uint8* src_y,
+                        const uint8* src_u,
+                        const uint8* src_v,
+                        uint8* dst_abgr,
+                        struct YuvConstants* yuvconstants,
+                        int width) {
   asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
   "1:                                          \n"
     READYUV422
-    YUV422TORGB
+    YUVTORGB
     "subs       %4, %4, #8                     \n"
     "vswp.u8    d20, d22                       \n"
     "vmov.u8    d23, #255                      \n"
@@ -274,10 +277,10 @@ void I422ToABGRMatrixRow_NEON(const uint8* src_y,
       "+r"(src_v),     // %2
       "+r"(dst_abgr),  // %3
       "+r"(width)      // %4
-    : [kUVToRB]"r"(&YuvConstants->kUVToRB),   // %5
-      [kUVToG]"r"(&YuvConstants->kUVToG),     // %6
-      [kUVBiasBGR]"r"(&YuvConstants->kUVBiasBGR),
-      [kYToRgb]"r"(&YuvConstants->kYToRgb)
+    : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+      [kUVToG]"r"(&yuvconstants->kUVToG),
+      [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+      [kYToRgb]"r"(&yuvconstants->kYToRgb)
     : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
       "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
   );
@@ -287,12 +290,13 @@ void I422ToRGBARow_NEON(const uint8* src_y,
                         const uint8* src_u,
                         const uint8* src_v,
                         uint8* dst_rgba,
+                        struct YuvConstants* yuvconstants,
                         int width) {
   asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
   "1:                                          \n"
     READYUV422
-    YUV422TORGB
+    YUVTORGB
     "subs       %4, %4, #8                     \n"
     "vmov.u8    d19, #255                      \n"
     MEMACCESS(3)
@@ -303,10 +307,10 @@ void I422ToRGBARow_NEON(const uint8* src_y,
       "+r"(src_v),     // %2
       "+r"(dst_rgba),  // %3
       "+r"(width)      // %4
-    : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB),   // %5
-      [kUVToG]"r"(&kYuvConstantsNEON.kUVToG),     // %6
-      [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+      [kUVToG]"r"(&yuvconstants->kUVToG),
+      [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+      [kYToRgb]"r"(&yuvconstants->kYToRgb)
     : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
       "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
   );
@@ -316,12 +320,13 @@ void I422ToRGB24Row_NEON(const uint8* src_y,
                          const uint8* src_u,
                          const uint8* src_v,
                          uint8* dst_rgb24,
+                         struct YuvConstants* yuvconstants,
                          int width) {
   asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
   "1:                                          \n"
     READYUV422
-    YUV422TORGB
+    YUVTORGB
     "subs       %4, %4, #8                     \n"
     MEMACCESS(3)
     "vst3.8     {d20, d21, d22}, [%3]!         \n"
@@ -331,10 +336,10 @@ void I422ToRGB24Row_NEON(const uint8* src_y,
       "+r"(src_v),      // %2
       "+r"(dst_rgb24),  // %3
       "+r"(width)       // %4
-    : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB),   // %5
-      [kUVToG]"r"(&kYuvConstantsNEON.kUVToG),     // %6
-      [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+      [kUVToG]"r"(&yuvconstants->kUVToG),
+      [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+      [kYToRgb]"r"(&yuvconstants->kYToRgb)
     : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
       "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
   );
@@ -344,12 +349,13 @@ void I422ToRAWRow_NEON(const uint8* src_y,
                        const uint8* src_u,
                        const uint8* src_v,
                        uint8* dst_raw,
+                       struct YuvConstants* yuvconstants,
                        int width) {
   asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
   "1:                                          \n"
     READYUV422
-    YUV422TORGB
+    YUVTORGB
     "subs       %4, %4, #8                     \n"
     "vswp.u8    d20, d22                       \n"
     MEMACCESS(3)
@@ -360,10 +366,10 @@ void I422ToRAWRow_NEON(const uint8* src_y,
       "+r"(src_v),    // %2
       "+r"(dst_raw),  // %3
       "+r"(width)     // %4
-    : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB),   // %5
-      [kUVToG]"r"(&kYuvConstantsNEON.kUVToG),     // %6
-      [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+      [kUVToG]"r"(&yuvconstants->kUVToG),
+      [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+      [kYToRgb]"r"(&yuvconstants->kYToRgb)
     : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
       "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
   );
@@ -385,12 +391,13 @@ void I422ToRGB565Row_NEON(const uint8* src_y,
                           const uint8* src_u,
                           const uint8* src_v,
                           uint8* dst_rgb565,
+                          struct YuvConstants* yuvconstants,
                           int width) {
   asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
   "1:                                          \n"
     READYUV422
-    YUV422TORGB
+    YUVTORGB
     "subs       %4, %4, #8                     \n"
     ARGBTORGB565
     MEMACCESS(3)
@@ -401,10 +408,10 @@ void I422ToRGB565Row_NEON(const uint8* src_y,
       "+r"(src_v),    // %2
       "+r"(dst_rgb565),  // %3
       "+r"(width)     // %4
-    : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB),   // %5
-      [kUVToG]"r"(&kYuvConstantsNEON.kUVToG),     // %6
-      [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+      [kUVToG]"r"(&yuvconstants->kUVToG),
+      [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+      [kYToRgb]"r"(&yuvconstants->kYToRgb)
     : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
       "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
   );
@@ -429,12 +436,13 @@ void I422ToARGB1555Row_NEON(const uint8* src_y,
                             const uint8* src_u,
                             const uint8* src_v,
                             uint8* dst_argb1555,
+                            struct YuvConstants* yuvconstants,
                             int width) {
   asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
   "1:                                          \n"
     READYUV422
-    YUV422TORGB
+    YUVTORGB
     "subs       %4, %4, #8                     \n"
     "vmov.u8    d23, #255                      \n"
     ARGBTOARGB1555
@@ -446,10 +454,10 @@ void I422ToARGB1555Row_NEON(const uint8* src_y,
       "+r"(src_v),    // %2
       "+r"(dst_argb1555),  // %3
       "+r"(width)     // %4
-    : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB),   // %5
-      [kUVToG]"r"(&kYuvConstantsNEON.kUVToG),     // %6
-      [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+      [kUVToG]"r"(&yuvconstants->kUVToG),
+      [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+      [kYToRgb]"r"(&yuvconstants->kYToRgb)
     : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
       "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
   );
@@ -468,13 +476,14 @@ void I422ToARGB4444Row_NEON(const uint8* src_y,
                             const uint8* src_u,
                             const uint8* src_v,
                             uint8* dst_argb4444,
+                            struct YuvConstants* yuvconstants,
                             int width) {
   asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
     "vmov.u8    d4, #0x0f                      \n"  // bits to clear with vbic.
   "1:                                          \n"
     READYUV422
-    YUV422TORGB
+    YUVTORGB
     "subs       %4, %4, #8                     \n"
     "vmov.u8    d23, #255                      \n"
     ARGBTOARGB4444
@@ -486,10 +495,10 @@ void I422ToARGB4444Row_NEON(const uint8* src_y,
       "+r"(src_v),    // %2
       "+r"(dst_argb4444),  // %3
       "+r"(width)     // %4
-    : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB),   // %5
-      [kUVToG]"r"(&kYuvConstantsNEON.kUVToG),     // %6
-      [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+      [kUVToG]"r"(&yuvconstants->kUVToG),
+      [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+      [kYToRgb]"r"(&yuvconstants->kYToRgb)
     : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
       "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
   );
@@ -499,10 +508,10 @@ void I400ToARGBRow_NEON(const uint8* src_y,
                         uint8* dst_argb,
                         int width) {
   asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
   "1:                                          \n"
     READYUV400
-    YUV422TORGB
+    YUVTORGB
     "subs       %2, %2, #8                     \n"
     "vmov.u8    d23, #255                      \n"
     MEMACCESS(1)
@@ -511,10 +520,10 @@ void I400ToARGBRow_NEON(const uint8* src_y,
     : "+r"(src_y),     // %0
       "+r"(dst_argb),  // %1
       "+r"(width)      // %2
-    : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB),   // %3
-      [kUVToG]"r"(&kYuvConstantsNEON.kUVToG),     // %4
-      [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVToRB]"r"(&kYuvConstants.kUVToRB),
+      [kUVToG]"r"(&kYuvConstants.kUVToG),
+      [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
+      [kYToRgb]"r"(&kYuvConstants.kYToRgb)
     : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
       "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
   );
@@ -545,12 +554,13 @@ void J400ToARGBRow_NEON(const uint8* src_y,
 void NV12ToARGBRow_NEON(const uint8* src_y,
                         const uint8* src_uv,
                         uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
                         int width) {
   asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
   "1:                                          \n"
     READNV12
-    YUV422TORGB
+    YUVTORGB
     "subs       %3, %3, #8                     \n"
     "vmov.u8    d23, #255                      \n"
     MEMACCESS(2)
@@ -560,37 +570,10 @@ void NV12ToARGBRow_NEON(const uint8* src_y,
       "+r"(src_uv),    // %1
       "+r"(dst_argb),  // %2
       "+r"(width)      // %3
-    : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB),   // %4
-      [kUVToG]"r"(&kYuvConstantsNEON.kUVToG),     // %5
-      [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
-    : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
-      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
-  );
-}
-
-void NV21ToARGBRow_NEON(const uint8* src_y,
-                        const uint8* src_uv,
-                        uint8* dst_argb,
-                        int width) {
-  asm volatile (
-    YUV422TORGB_SETUP_REG
-  "1:                                          \n"
-    READNV21
-    YUV422TORGB
-    "subs       %3, %3, #8                     \n"
-    "vmov.u8    d23, #255                      \n"
-    MEMACCESS(2)
-    "vst4.8     {d20, d21, d22, d23}, [%2]!    \n"
-    "bgt        1b                             \n"
-    : "+r"(src_y),     // %0
-      "+r"(src_uv),    // %1
-      "+r"(dst_argb),  // %2
-      "+r"(width)      // %3
-    : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB),   // %4
-      [kUVToG]"r"(&kYuvConstantsNEON.kUVToG),     // %5
-      [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+      [kUVToG]"r"(&yuvconstants->kUVToG),
+      [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+      [kYToRgb]"r"(&yuvconstants->kYToRgb)
     : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
       "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
   );
@@ -599,12 +582,13 @@ void NV21ToARGBRow_NEON(const uint8* src_y,
 void NV12ToRGB565Row_NEON(const uint8* src_y,
                           const uint8* src_uv,
                           uint8* dst_rgb565,
+                          struct YuvConstants* yuvconstants,
                           int width) {
   asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
   "1:                                          \n"
     READNV12
-    YUV422TORGB
+    YUVTORGB
     "subs       %3, %3, #8                     \n"
     ARGBTORGB565
     MEMACCESS(2)
@@ -614,37 +598,10 @@ void NV12ToRGB565Row_NEON(const uint8* src_y,
       "+r"(src_uv),    // %1
       "+r"(dst_rgb565),  // %2
       "+r"(width)      // %3
-    : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB),   // %4
-      [kUVToG]"r"(&kYuvConstantsNEON.kUVToG),     // %5
-      [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
-    : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
-      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
-  );
-}
-
-void NV21ToRGB565Row_NEON(const uint8* src_y,
-                          const uint8* src_uv,
-                          uint8* dst_rgb565,
-                          int width) {
-  asm volatile (
-    YUV422TORGB_SETUP_REG
-  "1:                                          \n"
-    READNV21
-    YUV422TORGB
-    "subs       %3, %3, #8                     \n"
-    ARGBTORGB565
-    MEMACCESS(2)
-    "vst1.8     {q0}, [%2]!                    \n"  // store 8 pixels RGB565.
-    "bgt        1b                             \n"
-    : "+r"(src_y),     // %0
-      "+r"(src_uv),    // %1
-      "+r"(dst_rgb565),  // %2
-      "+r"(width)      // %3
-    : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB),   // %4
-      [kUVToG]"r"(&kYuvConstantsNEON.kUVToG),     // %5
-      [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+      [kUVToG]"r"(&yuvconstants->kUVToG),
+      [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+      [kYToRgb]"r"(&yuvconstants->kYToRgb)
     : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
       "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
   );
@@ -652,12 +609,13 @@ void NV21ToRGB565Row_NEON(const uint8* src_y,
 
 void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
                         uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
                         int width) {
   asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
   "1:                                          \n"
     READYUY2
-    YUV422TORGB
+    YUVTORGB
     "subs       %2, %2, #8                     \n"
     "vmov.u8    d23, #255                      \n"
     MEMACCESS(1)
@@ -666,10 +624,10 @@ void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
     : "+r"(src_yuy2),  // %0
       "+r"(dst_argb),  // %1
       "+r"(width)      // %2
-    : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB),   // %3
-      [kUVToG]"r"(&kYuvConstantsNEON.kUVToG),     // %4
-      [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+      [kUVToG]"r"(&yuvconstants->kUVToG),
+      [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+      [kYToRgb]"r"(&yuvconstants->kYToRgb)
     : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
       "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
   );
@@ -677,12 +635,13 @@ void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
 
 void UYVYToARGBRow_NEON(const uint8* src_uyvy,
                         uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
                         int width) {
   asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
   "1:                                          \n"
     READUYVY
-    YUV422TORGB
+    YUVTORGB
     "subs       %2, %2, #8                     \n"
     "vmov.u8    d23, #255                      \n"
     MEMACCESS(1)
@@ -691,10 +650,10 @@ void UYVYToARGBRow_NEON(const uint8* src_uyvy,
     : "+r"(src_uyvy),  // %0
       "+r"(dst_argb),  // %1
       "+r"(width)      // %2
-    : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB),   // %3
-      [kUVToG]"r"(&kYuvConstantsNEON.kUVToG),     // %4
-      [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+      [kUVToG]"r"(&yuvconstants->kUVToG),
+      [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+      [kYToRgb]"r"(&yuvconstants->kYToRgb)
     : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
       "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
   );
diff --git a/source/row_neon64.cc b/source/row_neon64.cc
index 668baef77..ae7b32cbf 100644
--- a/source/row_neon64.cc
+++ b/source/row_neon64.cc
@@ -91,7 +91,8 @@ extern "C" {
     "uzp2       v3.8b, v2.8b, v2.8b            \n"                             \
     "ins        v1.s[1], v3.s[0]               \n"
 
-#define YUV422TORGB_SETUP_REG                                                  \
+// TODO(fbarchard): replace movi with constants from struct.
+#define YUVTORGB_SETUP                                                         \
     "ld1r       {v24.8h}, [%[kUVBiasBGR]], #2  \n"                             \
     "ld1r       {v25.8h}, [%[kUVBiasBGR]], #2  \n"                             \
     "ld1r       {v26.8h}, [%[kUVBiasBGR]]      \n"                             \
@@ -101,7 +102,7 @@ extern "C" {
     "movi       v29.8h, #25                    \n"                             \
     "movi       v30.8h, #52                    \n"
 
-#define YUV422TORGB(vR, vG, vB)                                                \
+#define YUVTORGB(vR, vG, vB)                                                   \
     "uxtl       v0.8h, v0.8b                   \n" /* Extract Y    */          \
     "shll       v2.8h, v1.8b, #8               \n" /* Replicate UV */          \
     "ushll2     v3.4s, v0.8h, #0               \n" /* Y */                     \
@@ -143,12 +144,13 @@ void I444ToARGBRow_NEON(const uint8* src_y,
                         const uint8* src_u,
                         const uint8* src_v,
                         uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
                         int width) {
   asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
   "1:                                          \n"
     READYUV444
-    YUV422TORGB(v22, v21, v20)
+    YUVTORGB(v22, v21, v20)
     "subs       %w4, %w4, #8                   \n"
     "movi       v23.8b, #255                   \n" /* A */
     MEMACCESS(3)
@@ -159,8 +161,8 @@ void I444ToARGBRow_NEON(const uint8* src_y,
       "+r"(src_v),     // %2
       "+r"(dst_argb),  // %3
       "+r"(width)      // %4
-    : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+      [kYToRgb]"r"(&yuvconstants->kYToRgb)
     : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
       "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
   );
@@ -173,12 +175,13 @@ void I422ToARGBRow_NEON(const uint8* src_y,
                         const uint8* src_u,
                         const uint8* src_v,
                         uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
                         int width) {
   asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
   "1:                                          \n"
     READYUV422
-    YUV422TORGB(v22, v21, v20)
+    YUVTORGB(v22, v21, v20)
     "subs       %w4, %w4, #8                   \n"
     "movi       v23.8b, #255                   \n" /* A */
     MEMACCESS(3)
@@ -189,8 +192,8 @@ void I422ToARGBRow_NEON(const uint8* src_y,
       "+r"(src_v),     // %2
       "+r"(dst_argb),  // %3
       "+r"(width)      // %4
-    : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
+      [kYToRgb]"r"(&kYuvConstants.kYToRgb)
     : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
       "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
   );
@@ -202,12 +205,13 @@ void I411ToARGBRow_NEON(const uint8* src_y,
                         const uint8* src_u,
                         const uint8* src_v,
                         uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
                         int width) {
   asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
   "1:                                          \n"
     READYUV411
-    YUV422TORGB(v22, v21, v20)
+    YUVTORGB(v22, v21, v20)
     "subs       %w4, %w4, #8                   \n"
     "movi       v23.8b, #255                   \n" /* A */
     MEMACCESS(3)
@@ -218,8 +222,8 @@ void I411ToARGBRow_NEON(const uint8* src_y,
       "+r"(src_v),     // %2
       "+r"(dst_argb),  // %3
       "+r"(width)      // %4
-    : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
+      [kYToRgb]"r"(&kYuvConstants.kYToRgb)
     : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
       "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
   );
@@ -231,12 +235,13 @@ void I422ToBGRARow_NEON(const uint8* src_y,
                         const uint8* src_u,
                         const uint8* src_v,
                         uint8* dst_bgra,
+                        struct YuvConstants* yuvconstants,
                         int width) {
   asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
   "1:                                          \n"
     READYUV422
-    YUV422TORGB(v21, v22, v23)
+    YUVTORGB(v21, v22, v23)
     "subs       %w4, %w4, #8                   \n"
     "movi       v20.8b, #255                   \n" /* A */
     MEMACCESS(3)
@@ -247,8 +252,8 @@ void I422ToBGRARow_NEON(const uint8* src_y,
       "+r"(src_v),     // %2
       "+r"(dst_bgra),  // %3
       "+r"(width)      // %4
-    : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
+      [kYToRgb]"r"(&kYuvConstants.kYToRgb)
     : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
       "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
   );
@@ -261,12 +266,13 @@ void I422ToABGRRow_NEON(const uint8* src_y,
                         const uint8* src_u,
                         const uint8* src_v,
                         uint8* dst_abgr,
+                        struct YuvConstants* yuvconstants,
                         int width) {
   asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
   "1:                                          \n"
     READYUV422
-    YUV422TORGB(v20, v21, v22)
+    YUVTORGB(v20, v21, v22)
     "subs       %w4, %w4, #8                   \n"
     "movi       v23.8b, #255                   \n" /* A */
     MEMACCESS(3)
@@ -277,8 +283,8 @@ void I422ToABGRRow_NEON(const uint8* src_y,
       "+r"(src_v),     // %2
       "+r"(dst_abgr),  // %3
       "+r"(width)      // %4
-    : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
+      [kYToRgb]"r"(&kYuvConstants.kYToRgb)
     : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
       "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
   );
@@ -290,12 +296,13 @@ void I422ToRGBARow_NEON(const uint8* src_y,
                         const uint8* src_u,
                         const uint8* src_v,
                         uint8* dst_rgba,
+                        struct YuvConstants* yuvconstants,
                         int width) {
   asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
   "1:                                          \n"
     READYUV422
-    YUV422TORGB(v23, v22, v21)
+    YUVTORGB(v23, v22, v21)
     "subs       %w4, %w4, #8                   \n"
     "movi       v20.8b, #255                   \n" /* A */
     MEMACCESS(3)
@@ -306,8 +313,8 @@ void I422ToRGBARow_NEON(const uint8* src_y,
       "+r"(src_v),     // %2
       "+r"(dst_rgba),  // %3
       "+r"(width)      // %4
-    : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
+      [kYToRgb]"r"(&kYuvConstants.kYToRgb)
     : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
       "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
   );
@@ -319,12 +326,13 @@ void I422ToRGB24Row_NEON(const uint8* src_y,
                          const uint8* src_u,
                          const uint8* src_v,
                          uint8* dst_rgb24,
+                         struct YuvConstants* yuvconstants,
                          int width) {
   asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
   "1:                                          \n"
     READYUV422
-    YUV422TORGB(v22, v21, v20)
+    YUVTORGB(v22, v21, v20)
     "subs       %w4, %w4, #8                   \n"
     MEMACCESS(3)
     "st3        {v20.8b,v21.8b,v22.8b}, [%3], #24     \n"
@@ -334,8 +342,8 @@ void I422ToRGB24Row_NEON(const uint8* src_y,
       "+r"(src_v),     // %2
       "+r"(dst_rgb24), // %3
       "+r"(width)      // %4
-    : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
+      [kYToRgb]"r"(&kYuvConstants.kYToRgb)
     : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
       "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
   );
@@ -347,12 +355,13 @@ void I422ToRAWRow_NEON(const uint8* src_y,
                        const uint8* src_u,
                        const uint8* src_v,
                        uint8* dst_raw,
+                       struct YuvConstants* yuvconstants,
                        int width) {
   asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
   "1:                                          \n"
     READYUV422
-    YUV422TORGB(v20, v21, v22)
+    YUVTORGB(v20, v21, v22)
     "subs       %w4, %w4, #8                   \n"
     MEMACCESS(3)
     "st3        {v20.8b,v21.8b,v22.8b}, [%3], #24     \n"
@@ -362,8 +371,8 @@ void I422ToRAWRow_NEON(const uint8* src_y,
       "+r"(src_v),     // %2
       "+r"(dst_raw),   // %3
       "+r"(width)      // %4
-    : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
+      [kYToRgb]"r"(&kYuvConstants.kYToRgb)
     : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
       "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
   );
@@ -382,12 +391,13 @@ void I422ToRGB565Row_NEON(const uint8* src_y,
                           const uint8* src_u,
                           const uint8* src_v,
                           uint8* dst_rgb565,
+                          struct YuvConstants* yuvconstants,
                           int width) {
   asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
   "1:                                          \n"
     READYUV422
-    YUV422TORGB(v22, v21, v20)
+    YUVTORGB(v22, v21, v20)
     "subs       %w4, %w4, #8                   \n"
     ARGBTORGB565
     MEMACCESS(3)
@@ -398,8 +408,8 @@ void I422ToRGB565Row_NEON(const uint8* src_y,
       "+r"(src_v),    // %2
       "+r"(dst_rgb565),  // %3
       "+r"(width)     // %4
-    : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
+      [kYToRgb]"r"(&kYuvConstants.kYToRgb)
     : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
       "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
   );
@@ -420,12 +430,13 @@ void I422ToARGB1555Row_NEON(const uint8* src_y,
                             const uint8* src_u,
                             const uint8* src_v,
                             uint8* dst_argb1555,
+                            struct YuvConstants* yuvconstants,
                             int width) {
   asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
   "1:                                          \n"
     READYUV422
-    YUV422TORGB(v22, v21, v20)
+    YUVTORGB(v22, v21, v20)
     "subs       %w4, %w4, #8                   \n"
     "movi       v23.8b, #255                   \n"
     ARGBTOARGB1555
@@ -437,8 +448,8 @@ void I422ToARGB1555Row_NEON(const uint8* src_y,
       "+r"(src_v),    // %2
       "+r"(dst_argb1555),  // %3
       "+r"(width)     // %4
-    : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
+      [kYToRgb]"r"(&kYuvConstants.kYToRgb)
     : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
       "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
   );
@@ -460,13 +471,14 @@ void I422ToARGB4444Row_NEON(const uint8* src_y,
                             const uint8* src_u,
                             const uint8* src_v,
                             uint8* dst_argb4444,
+                            struct YuvConstants* yuvconstants,
                             int width) {
   asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
     "movi       v4.16b, #0x0f                  \n"  // bits to clear with vbic.
   "1:                                          \n"
     READYUV422
-    YUV422TORGB(v22, v21, v20)
+    YUVTORGB(v22, v21, v20)
     "subs       %w4, %w4, #8                   \n"
     "movi       v23.8b, #255                   \n"
     ARGBTOARGB4444
@@ -478,8 +490,8 @@ void I422ToARGB4444Row_NEON(const uint8* src_y,
       "+r"(src_v),    // %2
       "+r"(dst_argb4444),  // %3
       "+r"(width)     // %4
-    : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
+      [kYToRgb]"r"(&kYuvConstants.kYToRgb)
     : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
       "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
   );
@@ -492,10 +504,10 @@ void I400ToARGBRow_NEON(const uint8* src_y,
                         int width) {
   int64 width64 = (int64)(width);
   asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
   "1:                                          \n"
     READYUV400
-    YUV422TORGB(v22, v21, v20)
+    YUVTORGB(v22, v21, v20)
     "subs       %w2, %w2, #8                   \n"
     "movi       v23.8b, #255                   \n"
     MEMACCESS(1)
@@ -504,8 +516,8 @@ void I400ToARGBRow_NEON(const uint8* src_y,
     : "+r"(src_y),     // %0
       "+r"(dst_argb),  // %1
       "+r"(width64)    // %2
-    : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
+      [kYToRgb]"r"(&kYuvConstants.kYToRgb)
     : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
       "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
   );
@@ -540,12 +552,13 @@ void J400ToARGBRow_NEON(const uint8* src_y,
 void NV12ToARGBRow_NEON(const uint8* src_y,
                         const uint8* src_uv,
                         uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
                         int width) {
   asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
   "1:                                          \n"
     READNV12
-    YUV422TORGB(v22, v21, v20)
+    YUVTORGB(v22, v21, v20)
     "subs       %w3, %w3, #8                   \n"
     "movi       v23.8b, #255                   \n"
     MEMACCESS(2)
@@ -555,51 +568,25 @@ void NV12ToARGBRow_NEON(const uint8* src_y,
       "+r"(src_uv),    // %1
       "+r"(dst_argb),  // %2
       "+r"(width)      // %3
-    : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
+      [kYToRgb]"r"(&kYuvConstants.kYToRgb)
     : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
       "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
   );
 }
 #endif  // HAS_NV12TOARGBROW_NEON
 
-#ifdef HAS_NV21TOARGBROW_NEON
-void NV21ToARGBRow_NEON(const uint8* src_y,
-                        const uint8* src_uv,
-                        uint8* dst_argb,
-                        int width) {
-  asm volatile (
-    YUV422TORGB_SETUP_REG
-  "1:                                          \n"
-    READNV21
-    YUV422TORGB(v22, v21, v20)
-    "subs       %w3, %w3, #8                   \n"
-    "movi       v23.8b, #255                   \n"
-    MEMACCESS(2)
-    "st4        {v20.8b,v21.8b,v22.8b,v23.8b}, [%2], #32     \n"
-    "b.gt       1b                             \n"
-    : "+r"(src_y),     // %0
-      "+r"(src_uv),    // %1
-      "+r"(dst_argb),  // %2
-      "+r"(width)      // %3
-    : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
-    : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
-      "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
-  );
-}
-#endif  // HAS_NV21TOARGBROW_NEON
-
 #ifdef HAS_NV12TORGB565ROW_NEON
 void NV12ToRGB565Row_NEON(const uint8* src_y,
                           const uint8* src_uv,
                           uint8* dst_rgb565,
+                          struct YuvConstants* yuvconstants,
                           int width) {
   asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
   "1:                                          \n"
     READNV12
-    YUV422TORGB(v22, v21, v20)
+    YUVTORGB(v22, v21, v20)
     "subs       %w3, %w3, #8                   \n"
     ARGBTORGB565
     MEMACCESS(2)
@@ -609,51 +596,25 @@ void NV12ToRGB565Row_NEON(const uint8* src_y,
       "+r"(src_uv),    // %1
       "+r"(dst_rgb565),  // %2
       "+r"(width)      // %3
-    : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
+      [kYToRgb]"r"(&kYuvConstants.kYToRgb)
     : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
       "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
   );
 }
 #endif  // HAS_NV12TORGB565ROW_NEON
 
-#ifdef HAS_NV21TORGB565ROW_NEON
-void NV21ToRGB565Row_NEON(const uint8* src_y,
-                          const uint8* src_uv,
-                          uint8* dst_rgb565,
-                          int width) {
-  asm volatile (
-    YUV422TORGB_SETUP_REG
-  "1:                                          \n"
-    READNV21
-    YUV422TORGB(v22, v21, v20)
-    "subs       %w3, %w3, #8                   \n"
-    ARGBTORGB565
-    MEMACCESS(2)
-    "st1        {v0.8h}, [%2], 16              \n"  // store 8 pixels RGB565.
-    "b.gt       1b                             \n"
-    : "+r"(src_y),     // %0
-      "+r"(src_uv),    // %1
-      "+r"(dst_rgb565),  // %2
-      "+r"(width)      // %3
-    : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
-    : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
-      "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
-  );
-}
-#endif  // HAS_NV21TORGB565ROW_NEON
-
 #ifdef HAS_YUY2TOARGBROW_NEON
 void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
                         uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
                         int width) {
   int64 width64 = (int64)(width);
   asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
   "1:                                          \n"
     READYUY2
-    YUV422TORGB(v22, v21, v20)
+    YUVTORGB(v22, v21, v20)
     "subs       %w2, %w2, #8                   \n"
     "movi       v23.8b, #255                   \n"
     MEMACCESS(1)
@@ -662,8 +623,8 @@ void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
     : "+r"(src_yuy2),  // %0
       "+r"(dst_argb),  // %1
       "+r"(width64)    // %2
-    : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
+      [kYToRgb]"r"(&kYuvConstants.kYToRgb)
     : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
       "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
   );
@@ -673,13 +634,14 @@ void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
 #ifdef HAS_UYVYTOARGBROW_NEON
 void UYVYToARGBRow_NEON(const uint8* src_uyvy,
                         uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
                         int width) {
   int64 width64 = (int64)(width);
   asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
   "1:                                          \n"
     READUYVY
-    YUV422TORGB(v22, v21, v20)
+    YUVTORGB(v22, v21, v20)
     "subs       %w2, %w2, #8                   \n"
     "movi       v23.8b, #255                   \n"
     MEMACCESS(1)
@@ -688,8 +650,8 @@ void UYVYToARGBRow_NEON(const uint8* src_uyvy,
     : "+r"(src_uyvy),  // %0
       "+r"(dst_argb),  // %1
       "+r"(width64)    // %2
-    : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
+      [kYToRgb]"r"(&kYuvConstants.kYToRgb)
     : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
       "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
   );
diff --git a/source/row_win.cc b/source/row_win.cc
index f7a310520..f09d2a75b 100644
--- a/source/row_win.cc
+++ b/source/row_win.cc
@@ -83,13 +83,13 @@ extern "C" {
     dst_argb += 32;
 
 
-#if defined(HAS_I422TOARGBMATRIXROW_SSSE3)
-void I422ToARGBMatrixRow_SSSE3(const uint8* y_buf,
-                               const uint8* u_buf,
-                               const uint8* v_buf,
-                               uint8* dst_argb,
-                               struct YuvConstants* YuvConstants,
-                               int width) {
+#if defined(HAS_I422TOARGBROW_SSSE3)
+void I422ToARGBRow_SSSE3(const uint8* y_buf,
+                         const uint8* u_buf,
+                         const uint8* v_buf,
+                         uint8* dst_argb,
+                         struct YuvConstants* yuvconstants,
+                         int width) {
   __m128i xmm0, xmm1, xmm2, xmm3;
   const __m128i xmm5 = _mm_set1_epi8(-1);
   const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf;
@@ -102,13 +102,13 @@ void I422ToARGBMatrixRow_SSSE3(const uint8* y_buf,
 }
 #endif
 
-#if defined(HAS_I422TOABGRMATRIXROW_SSSE3)
-void I422ToABGRMatrixRow_SSSE3(const uint8* y_buf,
-                               const uint8* u_buf,
-                               const uint8* v_buf,
-                               uint8* dst_argb,
-                               struct YuvConstants* YuvConstants,
-                               int width) {
+#if defined(HAS_I422TOABGRROW_SSSE3)
+void I422ToABGRRow_SSSE3(const uint8* y_buf,
+                         const uint8* u_buf,
+                         const uint8* v_buf,
+                         uint8* dst_argb,
+                         struct YuvConstants* yuvconstants,
+                         int width) {
   __m128i xmm0, xmm1, xmm2, xmm3;
   const __m128i xmm5 = _mm_set1_epi8(-1);
   const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf;
@@ -1963,16 +1963,16 @@ void RGBAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
     __asm lea        edx,  [edx + 64]                                          \
   }
 
-#ifdef HAS_I422TOARGBMATRIXROW_AVX2
+#ifdef HAS_I422TOARGBROW_AVX2
 // 16 pixels
 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
 __declspec(naked)
-void I422ToARGBMatrixRow_AVX2(const uint8* y_buf,
-                              const uint8* u_buf,
-                              const uint8* v_buf,
-                              uint8* dst_argb,
-                              struct YuvConstants* YuvConstants,
-                              int width) {
+void I422ToARGBRow_AVX2(const uint8* y_buf,
+                        const uint8* u_buf,
+                        const uint8* v_buf,
+                        uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
+                        int width) {
   __asm {
     push       esi
     push       edi
@@ -2001,18 +2001,18 @@ void I422ToARGBMatrixRow_AVX2(const uint8* y_buf,
     ret
   }
 }
-#endif  // HAS_I422TOARGBMATRIXROW_AVX2
+#endif  // HAS_I422TOARGBROW_AVX2
 
-#ifdef HAS_I444TOARGBMATRIXROW_AVX2
+#ifdef HAS_I444TOARGBROW_AVX2
 // 16 pixels
 // 16 UV values with 16 Y producing 16 ARGB (64 bytes).
 __declspec(naked)
-void I444ToARGBMatrixRow_AVX2(const uint8* y_buf,
-                              const uint8* u_buf,
-                              const uint8* v_buf,
-                              uint8* dst_argb,
-                              struct YuvConstants* YuvConstants,
-                              int width) {
+void I444ToARGBRow_AVX2(const uint8* y_buf,
+                        const uint8* u_buf,
+                        const uint8* v_buf,
+                        uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
+                        int width) {
   __asm {
     push       esi
     push       edi
@@ -2040,18 +2040,18 @@ void I444ToARGBMatrixRow_AVX2(const uint8* y_buf,
     ret
   }
 }
-#endif  // HAS_I444TOARGBMATRIXROW_AVX2
+#endif  // HAS_I444TOARGBROW_AVX2
 
-#ifdef HAS_I444TOABGRMATRIXROW_AVX2
+#ifdef HAS_I444TOABGRROW_AVX2
 // 16 pixels
 // 16 UV values with 16 Y producing 16 ABGR (64 bytes).
 __declspec(naked)
-void I444ToABGRMatrixRow_AVX2(const uint8* y_buf,
-                              const uint8* u_buf,
-                              const uint8* v_buf,
-                              uint8* dst_abgr,
-                              struct YuvConstants* YuvConstants,
-                              int width) {
+void I444ToABGRRow_AVX2(const uint8* y_buf,
+                        const uint8* u_buf,
+                        const uint8* v_buf,
+                        uint8* dst_abgr,
+                        struct YuvConstants* yuvconstants,
+                        int width) {
   __asm {
     push       esi
     push       edi
@@ -2079,7 +2079,7 @@ void I444ToABGRMatrixRow_AVX2(const uint8* y_buf,
     ret
   }
 }
-#endif  // HAS_I444TOABGRMATRIXROW_AVX2
+#endif  // HAS_I444TOABGRROW_AVX2
 
 #ifdef HAS_I411TOARGBROW_AVX2
 // 16 pixels
@@ -2089,26 +2089,30 @@ void I411ToARGBRow_AVX2(const uint8* y_buf,
                         const uint8* u_buf,
                         const uint8* v_buf,
                         uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
                         int width) {
   __asm {
     push       esi
     push       edi
-    mov        eax, [esp + 8 + 4]   // Y
-    mov        esi, [esp + 8 + 8]   // U
-    mov        edi, [esp + 8 + 12]  // V
-    mov        edx, [esp + 8 + 16]  // argb
-    mov        ecx, [esp + 8 + 20]  // width
+    push       ebp
+    mov        eax, [esp + 12 + 4]   // Y
+    mov        esi, [esp + 12 + 8]   // U
+    mov        edi, [esp + 12 + 12]  // V
+    mov        edx, [esp + 12 + 16]  // abgr
+    mov        ebp, [esp + 12 + 20]  // YuvConstants
+    mov        ecx, [esp + 12 + 24]  // width
     sub        edi, esi
     vpcmpeqb   ymm5, ymm5, ymm5     // generate 0xffffffffffffffff for alpha
 
  convertloop:
     READYUV411_AVX2
-    YUVTORGB_AVX2(kYuvConstants)
+    YUVTORGB_AVX2(ebp)
     STOREARGB_AVX2
 
     sub        ecx, 16
     jg         convertloop
 
+    pop        ebp
     pop        edi
     pop        esi
     vzeroupper
@@ -2124,23 +2128,27 @@ __declspec(naked)
 void NV12ToARGBRow_AVX2(const uint8* y_buf,
                         const uint8* uv_buf,
                         uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
                         int width) {
   __asm {
     push       esi
-    mov        eax, [esp + 4 + 4]   // Y
-    mov        esi, [esp + 4 + 8]   // UV
-    mov        edx, [esp + 4 + 12]  // argb
-    mov        ecx, [esp + 4 + 16]  // width
+    push       ebp
+    mov        eax, [esp + 8 + 4]   // Y
+    mov        esi, [esp + 8 + 8]   // UV
+    mov        edx, [esp + 8 + 12]  // argb
+    mov        ebp, [esp + 8 + 16]  // YuvConstants
+    mov        ecx, [esp + 8 + 20]  // width
     vpcmpeqb   ymm5, ymm5, ymm5     // generate 0xffffffffffffffff for alpha
 
  convertloop:
     READNV12_AVX2
-    YUVTORGB_AVX2(kYuvConstants)
+    YUVTORGB_AVX2(ebp)
     STOREARGB_AVX2
 
     sub        ecx, 16
     jg         convertloop
 
+    pop        ebp
     pop        esi
     vzeroupper
     ret
@@ -2148,37 +2156,6 @@ void NV12ToARGBRow_AVX2(const uint8* y_buf,
 }
 #endif  // HAS_NV12TOARGBROW_AVX2
 
-#ifdef HAS_NV21TOARGBROW_AVX2
-// 16 pixels.
-// 8 VU values upsampled to 16 VU, mixed with 16 Y producing 16 ARGB (64 bytes).
-__declspec(naked)
-void NV21ToARGBRow_AVX2(const uint8* y_buf,
-                        const uint8* uv_buf,
-                        uint8* dst_argb,
-                        int width) {
-  __asm {
-    push       esi
-    mov        eax, [esp + 4 + 4]   // Y
-    mov        esi, [esp + 4 + 8]   // UV
-    mov        edx, [esp + 4 + 12]  // argb
-    mov        ecx, [esp + 4 + 16]  // width
-    vpcmpeqb   ymm5, ymm5, ymm5     // generate 0xffffffffffffffff for alpha
-
- convertloop:
-    READNV12_AVX2
-    YUVTORGB_AVX2(kYvuConstants)
-    STOREARGB_AVX2
-
-    sub        ecx, 16
-    jg         convertloop
-
-    pop        esi
-    vzeroupper
-    ret
-  }
-}
-#endif  // HAS_NV21TOARGBROW_AVX2
-
 #ifdef HAS_I422TOBGRAROW_AVX2
 // 16 pixels
 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 BGRA (64 bytes).
@@ -2188,26 +2165,30 @@ void I422ToBGRARow_AVX2(const uint8* y_buf,
                         const uint8* u_buf,
                         const uint8* v_buf,
                         uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
                         int width) {
   __asm {
     push       esi
     push       edi
-    mov        eax, [esp + 8 + 4]   // Y
-    mov        esi, [esp + 8 + 8]   // U
-    mov        edi, [esp + 8 + 12]  // V
-    mov        edx, [esp + 8 + 16]  // argb
-    mov        ecx, [esp + 8 + 20]  // width
+    push       ebp
+    mov        eax, [esp + 12 + 4]   // Y
+    mov        esi, [esp + 12 + 8]   // U
+    mov        edi, [esp + 12 + 12]  // V
+    mov        edx, [esp + 12 + 16]  // abgr
+    mov        ebp, [esp + 12 + 20]  // YuvConstants
+    mov        ecx, [esp + 12 + 24]  // width
     sub        edi, esi
     vpcmpeqb   ymm5, ymm5, ymm5     // generate 0xffffffffffffffff for alpha
 
  convertloop:
     READYUV422_AVX2
-    YUVTORGB_AVX2(kYuvConstants)
+    YUVTORGB_AVX2(ebp)
     STOREBGRA_AVX2
 
     sub        ecx, 16
     jg         convertloop
 
+    pop        ebp
     pop        edi
     pop        esi
     vzeroupper
@@ -2224,26 +2205,30 @@ void I422ToRGBARow_AVX2(const uint8* y_buf,
                         const uint8* u_buf,
                         const uint8* v_buf,
                         uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
                         int width) {
   __asm {
     push       esi
     push       edi
-    mov        eax, [esp + 8 + 4]   // Y
-    mov        esi, [esp + 8 + 8]   // U
-    mov        edi, [esp + 8 + 12]  // V
-    mov        edx, [esp + 8 + 16]  // argb
-    mov        ecx, [esp + 8 + 20]  // width
+    push       ebp
+    mov        eax, [esp + 12 + 4]   // Y
+    mov        esi, [esp + 12 + 8]   // U
+    mov        edi, [esp + 12 + 12]  // V
+    mov        edx, [esp + 12 + 16]  // abgr
+    mov        ebp, [esp + 12 + 20]  // YuvConstants
+    mov        ecx, [esp + 12 + 24]  // width
     sub        edi, esi
     vpcmpeqb   ymm5, ymm5, ymm5     // generate 0xffffffffffffffff for alpha
 
  convertloop:
     READYUV422_AVX2
-    YUVTORGB_AVX2(kYuvConstants)
+    YUVTORGB_AVX2(ebp)
     STORERGBA_AVX2
 
     sub        ecx, 16
     jg         convertloop
 
+    pop        ebp
     pop        edi
     pop        esi
     vzeroupper
@@ -2256,12 +2241,12 @@ void I422ToRGBARow_AVX2(const uint8* y_buf,
 // 16 pixels
 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes).
 __declspec(naked)
-void I422ToABGRMatrixRow_AVX2(const uint8* y_buf,
-                              const uint8* u_buf,
-                              const uint8* v_buf,
-                              uint8* dst_argb,
-                              struct YuvConstants* YuvConstants,
-                              int width) {
+void I422ToABGRRow_AVX2(const uint8* y_buf,
+                        const uint8* u_buf,
+                        const uint8* v_buf,
+                        uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
+                        int width) {
   __asm {
     push       esi
     push       edi
@@ -2481,12 +2466,12 @@ void I422ToABGRMatrixRow_AVX2(const uint8* y_buf,
 // 8 pixels.
 // 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes).
 __declspec(naked)
-void I444ToARGBMatrixRow_SSSE3(const uint8* y_buf,
-                               const uint8* u_buf,
-                               const uint8* v_buf,
-                               uint8* dst_argb,
-                               struct YuvConstants* YuvConstants,
-                               int width) {
+void I444ToARGBRow_SSSE3(const uint8* y_buf,
+                         const uint8* u_buf,
+                         const uint8* v_buf,
+                         uint8* dst_argb,
+                         struct YuvConstants* yuvconstants,
+                         int width) {
   __asm {
     push       esi
     push       edi
@@ -2518,12 +2503,12 @@ void I444ToARGBMatrixRow_SSSE3(const uint8* y_buf,
 // 8 pixels.
 // 8 UV values, mixed with 8 Y producing 8 ABGR (32 bytes).
 __declspec(naked)
-void I444ToABGRMatrixRow_SSSE3(const uint8* y_buf,
-                               const uint8* u_buf,
-                               const uint8* v_buf,
-                               uint8* dst_abgr,
-                               struct YuvConstants* YuvConstants,
-                               int width) {
+void I444ToABGRRow_SSSE3(const uint8* y_buf,
+                         const uint8* u_buf,
+                         const uint8* v_buf,
+                         uint8* dst_abgr,
+                         struct YuvConstants* yuvconstants,
+                         int width) {
   __asm {
     push       esi
     push       edi
@@ -2559,27 +2544,31 @@ void I422ToRGB24Row_SSSE3(const uint8* y_buf,
                           const uint8* u_buf,
                           const uint8* v_buf,
                           uint8* dst_rgb24,
+                          struct YuvConstants* yuvconstants,
                           int width) {
   __asm {
     push       esi
     push       edi
-    mov        eax, [esp + 8 + 4]   // Y
-    mov        esi, [esp + 8 + 8]   // U
-    mov        edi, [esp + 8 + 12]  // V
-    mov        edx, [esp + 8 + 16]  // rgb24
-    mov        ecx, [esp + 8 + 20]  // width
+    push       ebp
+    mov        eax, [esp + 12 + 4]   // Y
+    mov        esi, [esp + 12 + 8]   // U
+    mov        edi, [esp + 12 + 12]  // V
+    mov        edx, [esp + 12 + 16]  // argb
+    mov        ebp, [esp + 12 + 20]  // YuvConstants
+    mov        ecx, [esp + 12 + 24]  // width
     sub        edi, esi
     movdqa     xmm5, xmmword ptr kShuffleMaskARGBToRGB24_0
     movdqa     xmm6, xmmword ptr kShuffleMaskARGBToRGB24
 
  convertloop:
     READYUV422
-    YUVTORGB(kYuvConstants)
+    YUVTORGB(ebp)
     STORERGB24
 
     sub        ecx, 8
     jg         convertloop
 
+    pop        ebp
     pop        edi
     pop        esi
     ret
@@ -2593,27 +2582,31 @@ void I422ToRAWRow_SSSE3(const uint8* y_buf,
                         const uint8* u_buf,
                         const uint8* v_buf,
                         uint8* dst_raw,
+                        struct YuvConstants* yuvconstants,
                         int width) {
   __asm {
     push       esi
     push       edi
-    mov        eax, [esp + 8 + 4]   // Y
-    mov        esi, [esp + 8 + 8]   // U
-    mov        edi, [esp + 8 + 12]  // V
-    mov        edx, [esp + 8 + 16]  // raw
-    mov        ecx, [esp + 8 + 20]  // width
+    push       ebp
+    mov        eax, [esp + 12 + 4]   // Y
+    mov        esi, [esp + 12 + 8]   // U
+    mov        edi, [esp + 12 + 12]  // V
+    mov        edx, [esp + 12 + 16]  // argb
+    mov        ebp, [esp + 12 + 20]  // YuvConstants
+    mov        ecx, [esp + 12 + 24]  // width
     sub        edi, esi
     movdqa     xmm5, xmmword ptr kShuffleMaskARGBToRAW_0
     movdqa     xmm6, xmmword ptr kShuffleMaskARGBToRAW
 
  convertloop:
     READYUV422
-    YUVTORGB(kYuvConstants)
+    YUVTORGB(ebp)
     STORERAW
 
     sub        ecx, 8
     jg         convertloop
 
+    pop        ebp
     pop        edi
     pop        esi
     ret
@@ -2627,15 +2620,18 @@ void I422ToRGB565Row_SSSE3(const uint8* y_buf,
                            const uint8* u_buf,
                            const uint8* v_buf,
                            uint8* rgb565_buf,
+                           struct YuvConstants* yuvconstants,
                            int width) {
   __asm {
     push       esi
     push       edi
-    mov        eax, [esp + 8 + 4]   // Y
-    mov        esi, [esp + 8 + 8]   // U
-    mov        edi, [esp + 8 + 12]  // V
-    mov        edx, [esp + 8 + 16]  // rgb565
-    mov        ecx, [esp + 8 + 20]  // width
+    push       ebp
+    mov        eax, [esp + 12 + 4]   // Y
+    mov        esi, [esp + 12 + 8]   // U
+    mov        edi, [esp + 12 + 12]  // V
+    mov        edx, [esp + 12 + 16]  // argb
+    mov        ebp, [esp + 12 + 20]  // YuvConstants
+    mov        ecx, [esp + 12 + 24]  // width
     sub        edi, esi
     pcmpeqb    xmm5, xmm5       // generate mask 0x0000001f
     psrld      xmm5, 27
@@ -2647,12 +2643,13 @@ void I422ToRGB565Row_SSSE3(const uint8* y_buf,
 
  convertloop:
     READYUV422
-    YUVTORGB(kYuvConstants)
+    YUVTORGB(ebp)
     STORERGB565
 
     sub        ecx, 8
     jg         convertloop
 
+    pop        ebp
     pop        edi
     pop        esi
     ret
@@ -2662,12 +2659,12 @@ void I422ToRGB565Row_SSSE3(const uint8* y_buf,
 // 8 pixels.
 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
 __declspec(naked)
-void I422ToARGBMatrixRow_SSSE3(const uint8* y_buf,
-                               const uint8* u_buf,
-                               const uint8* v_buf,
-                               uint8* dst_argb,
-                               struct YuvConstants* YuvConstants,
-                               int width) {
+void I422ToARGBRow_SSSE3(const uint8* y_buf,
+                         const uint8* u_buf,
+                         const uint8* v_buf,
+                         uint8* dst_argb,
+                         struct YuvConstants* yuvconstants,
+                         int width) {
   __asm {
     push       esi
     push       edi
@@ -2704,30 +2701,32 @@ void I411ToARGBRow_SSSE3(const uint8* y_buf,
                          const uint8* u_buf,
                          const uint8* v_buf,
                          uint8* dst_argb,
+                         struct YuvConstants* yuvconstants,
                          int width) {
   __asm {
-    push       ebx
     push       esi
     push       edi
+    push       ebp
     mov        eax, [esp + 12 + 4]   // Y
     mov        esi, [esp + 12 + 8]   // U
     mov        edi, [esp + 12 + 12]  // V
-    mov        edx, [esp + 12 + 16]  // argb
-    mov        ecx, [esp + 12 + 20]  // width
+    mov        edx, [esp + 12 + 16]  // abgr
+    mov        ebp, [esp + 12 + 20]  // YuvConstants
+    mov        ecx, [esp + 12 + 24]  // width
     sub        edi, esi
     pcmpeqb    xmm5, xmm5            // generate 0xffffffff for alpha
 
  convertloop:
-    READYUV411  // modifies EBX
-    YUVTORGB(kYuvConstants)
+    READYUV411
+    YUVTORGB(ebp)
     STOREARGB
 
     sub        ecx, 8
     jg         convertloop
 
+    pop        ebp
     pop        edi
     pop        esi
-    pop        ebx
     ret
   }
 }
@@ -2738,51 +2737,27 @@ __declspec(naked)
 void NV12ToARGBRow_SSSE3(const uint8* y_buf,
                          const uint8* uv_buf,
                          uint8* dst_argb,
+                         struct YuvConstants* yuvconstants,
                          int width) {
   __asm {
     push       esi
-    mov        eax, [esp + 4 + 4]   // Y
-    mov        esi, [esp + 4 + 8]   // UV
-    mov        edx, [esp + 4 + 12]  // argb
-    mov        ecx, [esp + 4 + 16]  // width
+    push       ebp
+    mov        eax, [esp + 8 + 4]   // Y
+    mov        esi, [esp + 8 + 8]   // UV
+    mov        edx, [esp + 8 + 12]  // argb
+    mov        ebp, [esp + 8 + 16]  // YuvConstants
+    mov        ecx, [esp + 8 + 20]  // width
     pcmpeqb    xmm5, xmm5           // generate 0xffffffff for alpha
 
  convertloop:
     READNV12
-    YUVTORGB(kYuvConstants)
-    STOREARGB
-
-    sub        ecx, 8
-    jg         convertloop
-
-    pop        esi
-    ret
-  }
-}
-
-// 8 pixels.
-// 4 VU values upsampled to 8 VU, mixed with 8 Y producing 8 ARGB (32 bytes).
-__declspec(naked)
-void NV21ToARGBRow_SSSE3(const uint8* y_buf,
-                         const uint8* uv_buf,
-                         uint8* dst_argb,
-                         int width) {
-  __asm {
-    push       esi
-    mov        eax, [esp + 4 + 4]   // Y
-    mov        esi, [esp + 4 + 8]   // UV
-    mov        edx, [esp + 4 + 12]  // argb
-    mov        ecx, [esp + 4 + 16]  // width
-    pcmpeqb    xmm5, xmm5           // generate 0xffffffff for alpha
-
- convertloop:
-    READNV12
-    YUVTORGB(kYvuConstants)
+    YUVTORGB(ebp)
     STOREARGB
 
     sub        ecx, 8
     jg         convertloop
 
+    pop        ebp
     pop        esi
     ret
   }
@@ -2793,25 +2768,29 @@ void I422ToBGRARow_SSSE3(const uint8* y_buf,
                          const uint8* u_buf,
                          const uint8* v_buf,
                          uint8* dst_bgra,
+                         struct YuvConstants* yuvconstants,
                          int width) {
   __asm {
     push       esi
     push       edi
-    mov        eax, [esp + 8 + 4]   // Y
-    mov        esi, [esp + 8 + 8]   // U
-    mov        edi, [esp + 8 + 12]  // V
-    mov        edx, [esp + 8 + 16]  // bgra
-    mov        ecx, [esp + 8 + 20]  // width
+    push       ebp
+    mov        eax, [esp + 12 + 4]   // Y
+    mov        esi, [esp + 12 + 8]   // U
+    mov        edi, [esp + 12 + 12]  // V
+    mov        edx, [esp + 12 + 16]  // argb
+    mov        ebp, [esp + 12 + 20]  // YuvConstants
+    mov        ecx, [esp + 12 + 24]  // width
     sub        edi, esi
 
  convertloop:
     READYUV422
-    YUVTORGB(kYuvConstants)
+    YUVTORGB(ebp)
     STOREBGRA
 
     sub        ecx, 8
     jg         convertloop
 
+    pop        ebp
     pop        edi
     pop        esi
     ret
@@ -2819,12 +2798,12 @@ void I422ToBGRARow_SSSE3(const uint8* y_buf,
 }
 
 __declspec(naked)
-void I422ToABGRMatrixRow_SSSE3(const uint8* y_buf,
-                               const uint8* u_buf,
-                               const uint8* v_buf,
-                               uint8* dst_abgr,
-                               struct YuvConstants* YuvConstants,
-                               int width) {
+void I422ToABGRRow_SSSE3(const uint8* y_buf,
+                         const uint8* u_buf,
+                         const uint8* v_buf,
+                         uint8* dst_abgr,
+                         struct YuvConstants* yuvconstants,
+                         int width) {
   __asm {
     push       esi
     push       edi
@@ -2858,31 +2837,34 @@ void I422ToRGBARow_SSSE3(const uint8* y_buf,
                          const uint8* u_buf,
                          const uint8* v_buf,
                          uint8* dst_rgba,
+                         struct YuvConstants* yuvconstants,
                          int width) {
   __asm {
     push       esi
     push       edi
-    mov        eax, [esp + 8 + 4]   // Y
-    mov        esi, [esp + 8 + 8]   // U
-    mov        edi, [esp + 8 + 12]  // V
-    mov        edx, [esp + 8 + 16]  // rgba
-    mov        ecx, [esp + 8 + 20]  // width
+    push       ebp
+    mov        eax, [esp + 12 + 4]   // Y
+    mov        esi, [esp + 12 + 8]   // U
+    mov        edi, [esp + 12 + 12]  // V
+    mov        edx, [esp + 12 + 16]  // argb
+    mov        ebp, [esp + 12 + 20]  // YuvConstants
+    mov        ecx, [esp + 12 + 24]  // width
     sub        edi, esi
 
  convertloop:
     READYUV422
-    YUVTORGB(kYuvConstants)
+    YUVTORGB(ebp)
     STORERGBA
 
     sub        ecx, 8
     jg         convertloop
 
+    pop        ebp
     pop        edi
     pop        esi
     ret
   }
 }
-
 #endif  // HAS_I422TOARGBROW_SSSE3
 
 #ifdef HAS_I400TOARGBROW_SSE2