yuvconstants for all YUV to RGB conversion functions.

R=harryjin@google.com BUG=libyuv:488 Review URL: https://codereview.chromium.org/1363503002 .
2026-01-01 03:12:16 +08:00 · 2015-09-22 10:26:03 -07:00 · 2015-09-22 10:26:03 -07:00 · f96890a0be
commit f96890a0be
parent 62c49dc811
11 changed files with 1522 additions and 1811 deletions
--- a/include/libyuv/row.h
+++ b/include/libyuv/row.h
--- a/source/convert_argb.cc
+++ b/source/convert_argb.cc
@ -56,6 +56,7 @@ int I444ToARGB(const uint8* src_y, int src_stride_y,
                        const uint8* u_buf,
                        const uint8* v_buf,
                        uint8* rgb_buf,
+                        struct YuvConstants* yuvconstants,
                        int width) = I444ToARGBRow_C;
  if (!src_y || !src_u || !src_v ||
      !dst_argb ||
@ -103,7 +104,7 @@ int I444ToARGB(const uint8* src_y, int src_stride_y,
 #endif

  for (y = 0; y < height; ++y) {
-    I444ToARGBRow(src_y, src_u, src_v, dst_argb, width);
+    I444ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvConstants, width);
    dst_argb += dst_stride_argb;
    src_y += src_stride_y;
    src_u += src_stride_u;
@ -124,6 +125,7 @@ int I444ToABGR(const uint8* src_y, int src_stride_y,
                        const uint8* u_buf,
                        const uint8* v_buf,
                        uint8* rgb_buf,
+                        struct YuvConstants* yuvconstants,
                        int width) = I444ToABGRRow_C;
  if (!src_y || !src_u || !src_v ||
      !dst_abgr ||
@ -171,7 +173,7 @@ int I444ToABGR(const uint8* src_y, int src_stride_y,
 #endif

  for (y = 0; y < height; ++y) {
-    I444ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
+    I444ToABGRRow(src_y, src_u, src_v, dst_abgr, &kYuvConstants, width);
    dst_abgr += dst_stride_abgr;
    src_y += src_stride_y;
    src_u += src_stride_u;
@ -192,6 +194,7 @@ int I422ToARGB(const uint8* src_y, int src_stride_y,
                        const uint8* u_buf,
                        const uint8* v_buf,
                        uint8* rgb_buf,
+                        struct YuvConstants* yuvconstants,
                        int width) = I422ToARGBRow_C;
  if (!src_y || !src_u || !src_v ||
      !dst_argb ||
@ -248,7 +251,7 @@ int I422ToARGB(const uint8* src_y, int src_stride_y,
 #endif

  for (y = 0; y < height; ++y) {
-    I422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
+    I422ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvConstants, width);
    dst_argb += dst_stride_argb;
    src_y += src_stride_y;
    src_u += src_stride_u;
@ -269,6 +272,7 @@ int I411ToARGB(const uint8* src_y, int src_stride_y,
                        const uint8* u_buf,
                        const uint8* v_buf,
                        uint8* rgb_buf,
+                        struct YuvConstants* yuvconstants,
                        int width) = I411ToARGBRow_C;
  if (!src_y || !src_u || !src_v ||
      !dst_argb ||
@ -316,7 +320,7 @@ int I411ToARGB(const uint8* src_y, int src_stride_y,
 #endif

  for (y = 0; y < height; ++y) {
-    I411ToARGBRow(src_y, src_u, src_v, dst_argb, width);
+    I411ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvConstants, width);
    dst_argb += dst_stride_argb;
    src_y += src_stride_y;
    src_u += src_stride_u;
@ -338,6 +342,7 @@ int I420AlphaToARGB(const uint8* src_y, int src_stride_y,
                        const uint8* u_buf,
                        const uint8* v_buf,
                        uint8* rgb_buf,
+                        struct YuvConstants* yuvconstants,
                        int width) = I422ToARGBRow_C;
  void (*ARGBCopyYToAlphaRow)(const uint8* src_y, uint8* dst_argb, int width) =
      ARGBCopyYToAlphaRow_C;
@ -436,7 +441,7 @@ int I420AlphaToARGB(const uint8* src_y, int src_stride_y,
 #endif

  for (y = 0; y < height; ++y) {
-    I422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
+    I422ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvConstants, width);
    ARGBCopyYToAlphaRow(src_a, dst_argb, width);
    ARGBAttenuateRow(dst_argb, dst_argb, width);
    dst_argb += dst_stride_argb;
@ -462,6 +467,7 @@ int I420AlphaToABGR(const uint8* src_y, int src_stride_y,
                        const uint8* u_buf,
                        const uint8* v_buf,
                        uint8* rgb_buf,
+                        struct YuvConstants* yuvconstants,
                        int width) = I422ToABGRRow_C;
  void (*ARGBCopyYToAlphaRow)(const uint8* src_y, uint8* dst_argb, int width) =
      ARGBCopyYToAlphaRow_C;
@ -560,7 +566,7 @@ int I420AlphaToABGR(const uint8* src_y, int src_stride_y,
 #endif

  for (y = 0; y < height; ++y) {
-    I422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
+    I422ToABGRRow(src_y, src_u, src_v, dst_abgr, &kYuvConstants, width);
    ARGBCopyYToAlphaRow(src_a, dst_abgr, width);
    ARGBAttenuateRow(dst_abgr, dst_abgr, width);
    dst_abgr += dst_stride_abgr;
@ -639,7 +645,7 @@ int J400ToARGB(const uint8* src_y, int src_stride_y,
               uint8* dst_argb, int dst_stride_argb,
               int width, int height) {
  int y;
-  void (*J400ToARGBRow)(const uint8* src_y, uint8* dst_argb, int pix) =
+  void (*J400ToARGBRow)(const uint8* src_y, uint8* dst_argb, int width) =
      J400ToARGBRow_C;
  if (!src_y || !dst_argb ||
      width <= 0 || height == 0) {
@ -766,7 +772,7 @@ int RGB24ToARGB(const uint8* src_rgb24, int src_stride_rgb24,
                uint8* dst_argb, int dst_stride_argb,
                int width, int height) {
  int y;
-  void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
+  void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) =
      RGB24ToARGBRow_C;
  if (!src_rgb24 || !dst_argb ||
      width <= 0 || height == 0) {
@ -816,7 +822,7 @@ int RAWToARGB(const uint8* src_raw, int src_stride_raw,
              uint8* dst_argb, int dst_stride_argb,
              int width, int height) {
  int y;
-  void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
+  void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) =
      RAWToARGBRow_C;
  if (!src_raw || !dst_argb ||
      width <= 0 || height == 0) {
@ -866,7 +872,7 @@ int RGB565ToARGB(const uint8* src_rgb565, int src_stride_rgb565,
                 uint8* dst_argb, int dst_stride_argb,
                 int width, int height) {
  int y;
-  void (*RGB565ToARGBRow)(const uint8* src_rgb565, uint8* dst_argb, int pix) =
+  void (*RGB565ToARGBRow)(const uint8* src_rgb565, uint8* dst_argb, int width) =
      RGB565ToARGBRow_C;
  if (!src_rgb565 || !dst_argb ||
      width <= 0 || height == 0) {
@ -925,7 +931,7 @@ int ARGB1555ToARGB(const uint8* src_argb1555, int src_stride_argb1555,
                   int width, int height) {
  int y;
  void (*ARGB1555ToARGBRow)(const uint8* src_argb1555, uint8* dst_argb,
-      int pix) = ARGB1555ToARGBRow_C;
+      int width) = ARGB1555ToARGBRow_C;
  if (!src_argb1555 || !dst_argb ||
      width <= 0 || height == 0) {
    return -1;
@ -983,7 +989,7 @@ int ARGB4444ToARGB(const uint8* src_argb4444, int src_stride_argb4444,
                   int width, int height) {
  int y;
  void (*ARGB4444ToARGBRow)(const uint8* src_argb4444, uint8* dst_argb,
-      int pix) = ARGB4444ToARGBRow_C;
+      int width) = ARGB4444ToARGBRow_C;
  if (!src_argb4444 || !dst_argb ||
      width <= 0 || height == 0) {
    return -1;
@ -1044,6 +1050,7 @@ int NV12ToARGB(const uint8* src_y, int src_stride_y,
  void (*NV12ToARGBRow)(const uint8* y_buf,
                        const uint8* uv_buf,
                        uint8* rgb_buf,
+                        struct YuvConstants* yuvconstants,
                        int width) = NV12ToARGBRow_C;
  if (!src_y || !src_uv || !dst_argb ||
      width <= 0 || height == 0) {
@ -1081,7 +1088,7 @@ int NV12ToARGB(const uint8* src_y, int src_stride_y,
 #endif

  for (y = 0; y < height; ++y) {
-    NV12ToARGBRow(src_y, src_uv, dst_argb, width);
+    NV12ToARGBRow(src_y, src_uv, dst_argb, &kYuvConstants, width);
    dst_argb += dst_stride_argb;
    src_y += src_stride_y;
    if (y & 1) {
@ -1098,10 +1105,11 @@ int NV21ToARGB(const uint8* src_y, int src_stride_y,
               uint8* dst_argb, int dst_stride_argb,
               int width, int height) {
  int y;
-  void (*NV21ToARGBRow)(const uint8* y_buf,
+  void (*NV12ToARGBRow)(const uint8* y_buf,
                        const uint8* uv_buf,
                        uint8* rgb_buf,
-                        int width) = NV21ToARGBRow_C;
+                        struct YuvConstants* yuvconstants,
+                        int width) = NV12ToARGBRow_C;
  if (!src_y || !src_uv || !dst_argb ||
      width <= 0 || height == 0) {
    return -1;
@ -1112,33 +1120,33 @@ int NV21ToARGB(const uint8* src_y, int src_stride_y,
    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
    dst_stride_argb = -dst_stride_argb;
  }
-#if defined(HAS_NV21TOARGBROW_SSSE3)
+#if defined(HAS_NV12TOARGBROW_SSSE3)
  if (TestCpuFlag(kCpuHasSSSE3)) {
-    NV21ToARGBRow = NV21ToARGBRow_Any_SSSE3;
+    NV12ToARGBRow = NV12ToARGBRow_Any_SSSE3;
    if (IS_ALIGNED(width, 8)) {
-      NV21ToARGBRow = NV21ToARGBRow_SSSE3;
+      NV12ToARGBRow = NV12ToARGBRow_SSSE3;
    }
  }
 #endif
-#if defined(HAS_NV21TOARGBROW_AVX2)
+#if defined(HAS_NV12TOARGBROW_AVX2)
  if (TestCpuFlag(kCpuHasAVX2)) {
-    NV21ToARGBRow = NV21ToARGBRow_Any_AVX2;
+    NV12ToARGBRow = NV12ToARGBRow_Any_AVX2;
    if (IS_ALIGNED(width, 16)) {
-      NV21ToARGBRow = NV21ToARGBRow_AVX2;
+      NV12ToARGBRow = NV12ToARGBRow_AVX2;
    }
  }
 #endif
-#if defined(HAS_NV21TOARGBROW_NEON)
+#if defined(HAS_NV12TOARGBROW_NEON)
  if (TestCpuFlag(kCpuHasNEON)) {
-    NV21ToARGBRow = NV21ToARGBRow_Any_NEON;
+    NV12ToARGBRow = NV12ToARGBRow_Any_NEON;
    if (IS_ALIGNED(width, 8)) {
-      NV21ToARGBRow = NV21ToARGBRow_NEON;
+      NV12ToARGBRow = NV12ToARGBRow_NEON;
    }
  }
 #endif

  for (y = 0; y < height; ++y) {
-    NV21ToARGBRow(src_y, src_uv, dst_argb, width);
+    NV12ToARGBRow(src_y, src_uv, dst_argb, &kYvuConstants, width);
    dst_argb += dst_stride_argb;
    src_y += src_stride_y;
    if (y & 1) {
@ -1157,6 +1165,7 @@ int M420ToARGB(const uint8* src_m420, int src_stride_m420,
  void (*NV12ToARGBRow)(const uint8* y_buf,
                        const uint8* uv_buf,
                        uint8* rgb_buf,
+                        struct YuvConstants* yuvconstants,
                        int width) = NV12ToARGBRow_C;
  if (!src_m420 || !dst_argb ||
      width <= 0 || height == 0) {
@ -1194,14 +1203,16 @@ int M420ToARGB(const uint8* src_m420, int src_stride_m420,
 #endif

  for (y = 0; y < height - 1; y += 2) {
-    NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb, width);
+    NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb,
+                  &kYuvConstants, width);
    NV12ToARGBRow(src_m420 + src_stride_m420, src_m420 + src_stride_m420 * 2,
-                  dst_argb + dst_stride_argb, width);
+                  dst_argb + dst_stride_argb, &kYuvConstants, width);
    dst_argb += dst_stride_argb * 2;
    src_m420 += src_stride_m420 * 3;
  }
  if (height & 1) {
-    NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb, width);
+    NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb,
+                  &kYuvConstants, width);
  }
  return 0;
 }
@ -1212,7 +1223,10 @@ int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2,
               uint8* dst_argb, int dst_stride_argb,
               int width, int height) {
  int y;
-  void (*YUY2ToARGBRow)(const uint8* src_yuy2, uint8* dst_argb, int pix) =
+  void (*YUY2ToARGBRow)(const uint8* src_yuy2,
+                        uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
+                        int width) =
      YUY2ToARGBRow_C;
  if (!src_yuy2 || !dst_argb ||
      width <= 0 || height == 0) {
@ -1256,7 +1270,7 @@ int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2,
  }
 #endif
  for (y = 0; y < height; ++y) {
-    YUY2ToARGBRow(src_yuy2, dst_argb, width);
+    YUY2ToARGBRow(src_yuy2, dst_argb, &kYuvConstants, width);
    src_yuy2 += src_stride_yuy2;
    dst_argb += dst_stride_argb;
  }
@ -1269,7 +1283,10 @@ int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy,
               uint8* dst_argb, int dst_stride_argb,
               int width, int height) {
  int y;
-  void (*UYVYToARGBRow)(const uint8* src_uyvy, uint8* dst_argb, int pix) =
+  void (*UYVYToARGBRow)(const uint8* src_uyvy,
+                        uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
+                        int width) =
      UYVYToARGBRow_C;
  if (!src_uyvy || !dst_argb ||
      width <= 0 || height == 0) {
@ -1313,7 +1330,7 @@ int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy,
  }
 #endif
  for (y = 0; y < height; ++y) {
-    UYVYToARGBRow(src_uyvy, dst_argb, width);
+    UYVYToARGBRow(src_uyvy, dst_argb, &kYuvConstants, width);
    src_uyvy += src_stride_uyvy;
    dst_argb += dst_stride_argb;
  }
@ -1328,11 +1345,12 @@ int J420ToARGB(const uint8* src_y, int src_stride_y,
               uint8* dst_argb, int dst_stride_argb,
               int width, int height) {
  int y;
-  void (*J422ToARGBRow)(const uint8* y_buf,
+  void (*I422ToARGBRow)(const uint8* y_buf,
                        const uint8* u_buf,
                        const uint8* v_buf,
                        uint8* rgb_buf,
-                        int width) = J422ToARGBRow_C;
+                        struct YuvConstants* yuvconstants,
+                        int width) = I422ToARGBRow_C;
  if (!src_y || !src_u || !src_v || !dst_argb ||
      width <= 0 || height == 0) {
    return -1;
@ -1343,42 +1361,42 @@ int J420ToARGB(const uint8* src_y, int src_stride_y,
    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
    dst_stride_argb = -dst_stride_argb;
  }
-#if defined(HAS_J422TOARGBROW_SSSE3)
+#if defined(HAS_I422TOARGBROW_SSSE3)
  if (TestCpuFlag(kCpuHasSSSE3)) {
-    J422ToARGBRow = J422ToARGBRow_Any_SSSE3;
+    I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
    if (IS_ALIGNED(width, 8)) {
-      J422ToARGBRow = J422ToARGBRow_SSSE3;
+      I422ToARGBRow = I422ToARGBRow_SSSE3;
    }
  }
 #endif
-#if defined(HAS_J422TOARGBROW_AVX2)
+#if defined(HAS_I422TOARGBROW_AVX2)
  if (TestCpuFlag(kCpuHasAVX2)) {
-    J422ToARGBRow = J422ToARGBRow_Any_AVX2;
+    I422ToARGBRow = I422ToARGBRow_Any_AVX2;
    if (IS_ALIGNED(width, 16)) {
-      J422ToARGBRow = J422ToARGBRow_AVX2;
+      I422ToARGBRow = I422ToARGBRow_AVX2;
    }
  }
 #endif
-#if defined(HAS_J422TOARGBROW_NEON)
+#if defined(HAS_I422TOARGBROW_NEON)
  if (TestCpuFlag(kCpuHasNEON)) {
-    J422ToARGBRow = J422ToARGBRow_Any_NEON;
+    I422ToARGBRow = I422ToARGBRow_Any_NEON;
    if (IS_ALIGNED(width, 8)) {
-      J422ToARGBRow = J422ToARGBRow_NEON;
+      I422ToARGBRow = I422ToARGBRow_NEON;
    }
  }
 #endif
-#if defined(HAS_J422TOARGBROW_MIPS_DSPR2)
+#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
      IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
      IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
      IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
      IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
-    J422ToARGBRow = J422ToARGBRow_MIPS_DSPR2;
+    I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
  }
 #endif

  for (y = 0; y < height; ++y) {
-    J422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
+    I422ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvJConstants, width);
    dst_argb += dst_stride_argb;
    src_y += src_stride_y;
    if (y & 1) {
@ -1397,11 +1415,12 @@ int J422ToARGB(const uint8* src_y, int src_stride_y,
               uint8* dst_argb, int dst_stride_argb,
               int width, int height) {
  int y;
-  void (*J422ToARGBRow)(const uint8* y_buf,
+  void (*I422ToARGBRow)(const uint8* y_buf,
                        const uint8* u_buf,
                        const uint8* v_buf,
                        uint8* rgb_buf,
-                        int width) = J422ToARGBRow_C;
+                        struct YuvConstants* yuvconstants,
+                        int width) = I422ToARGBRow_C;
  if (!src_y || !src_u || !src_v ||
      !dst_argb ||
      width <= 0 || height == 0) {
@ -1422,42 +1441,42 @@ int J422ToARGB(const uint8* src_y, int src_stride_y,
    height = 1;
    src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0;
  }
-#if defined(HAS_J422TOARGBROW_SSSE3)
+#if defined(HAS_I422TOARGBROW_SSSE3)
  if (TestCpuFlag(kCpuHasSSSE3)) {
-    J422ToARGBRow = J422ToARGBRow_Any_SSSE3;
+    I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
    if (IS_ALIGNED(width, 8)) {
-      J422ToARGBRow = J422ToARGBRow_SSSE3;
+      I422ToARGBRow = I422ToARGBRow_SSSE3;
    }
  }
 #endif
-#if defined(HAS_J422TOARGBROW_AVX2)
+#if defined(HAS_I422TOARGBROW_AVX2)
  if (TestCpuFlag(kCpuHasAVX2)) {
-    J422ToARGBRow = J422ToARGBRow_Any_AVX2;
+    I422ToARGBRow = I422ToARGBRow_Any_AVX2;
    if (IS_ALIGNED(width, 16)) {
-      J422ToARGBRow = J422ToARGBRow_AVX2;
+      I422ToARGBRow = I422ToARGBRow_AVX2;
    }
  }
 #endif
-#if defined(HAS_J422TOARGBROW_NEON)
+#if defined(HAS_I422TOARGBROW_NEON)
  if (TestCpuFlag(kCpuHasNEON)) {
-    J422ToARGBRow = J422ToARGBRow_Any_NEON;
+    I422ToARGBRow = I422ToARGBRow_Any_NEON;
    if (IS_ALIGNED(width, 8)) {
-      J422ToARGBRow = J422ToARGBRow_NEON;
+      I422ToARGBRow = I422ToARGBRow_NEON;
    }
  }
 #endif
-#if defined(HAS_J422TOARGBROW_MIPS_DSPR2)
+#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
      IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
      IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
      IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
      IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
-    J422ToARGBRow = J422ToARGBRow_MIPS_DSPR2;
+    I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
  }
 #endif

  for (y = 0; y < height; ++y) {
-    J422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
+    I422ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvJConstants, width);
    dst_argb += dst_stride_argb;
    src_y += src_stride_y;
    src_u += src_stride_u;
@ -1474,11 +1493,12 @@ int J420ToABGR(const uint8* src_y, int src_stride_y,
               uint8* dst_abgr, int dst_stride_abgr,
               int width, int height) {
  int y;
-  void (*J422ToABGRRow)(const uint8* y_buf,
+  void (*I422ToABGRRow)(const uint8* y_buf,
                        const uint8* u_buf,
                        const uint8* v_buf,
                        uint8* rgb_buf,
-                        int width) = J422ToABGRRow_C;
+                        struct YuvConstants* yuvconstants,
+                        int width) = I422ToABGRRow_C;
  if (!src_y || !src_u || !src_v || !dst_abgr ||
      width <= 0 || height == 0) {
    return -1;
@ -1489,42 +1509,42 @@ int J420ToABGR(const uint8* src_y, int src_stride_y,
    dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr;
    dst_stride_abgr = -dst_stride_abgr;
  }
-#if defined(HAS_J422TOABGRROW_SSSE3)
+#if defined(HAS_I422TOABGRROW_SSSE3)
  if (TestCpuFlag(kCpuHasSSSE3)) {
-    J422ToABGRRow = J422ToABGRRow_Any_SSSE3;
+    I422ToABGRRow = I422ToABGRRow_Any_SSSE3;
    if (IS_ALIGNED(width, 8)) {
-      J422ToABGRRow = J422ToABGRRow_SSSE3;
+      I422ToABGRRow = I422ToABGRRow_SSSE3;
    }
  }
 #endif
-#if defined(HAS_J422TOABGRROW_AVX2)
+#if defined(HAS_I422TOABGRROW_AVX2)
  if (TestCpuFlag(kCpuHasAVX2)) {
-    J422ToABGRRow = J422ToABGRRow_Any_AVX2;
+    I422ToABGRRow = I422ToABGRRow_Any_AVX2;
    if (IS_ALIGNED(width, 16)) {
-      J422ToABGRRow = J422ToABGRRow_AVX2;
+      I422ToABGRRow = I422ToABGRRow_AVX2;
    }
  }
 #endif
-#if defined(HAS_J422TOABGRROW_NEON)
+#if defined(HAS_I422TOABGRROW_NEON)
  if (TestCpuFlag(kCpuHasNEON)) {
-    J422ToABGRRow = J422ToABGRRow_Any_NEON;
+    I422ToABGRRow = I422ToABGRRow_Any_NEON;
    if (IS_ALIGNED(width, 8)) {
-      J422ToABGRRow = J422ToABGRRow_NEON;
+      I422ToABGRRow = I422ToABGRRow_NEON;
    }
  }
 #endif
-#if defined(HAS_J422TOABGRROW_MIPS_DSPR2)
+#if defined(HAS_I422TOABGRROW_MIPS_DSPR2)
  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
      IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
      IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
      IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
      IS_ALIGNED(dst_abgr, 4) && IS_ALIGNED(dst_stride_abgr, 4)) {
-    J422ToABGRRow = J422ToABGRRow_MIPS_DSPR2;
+    I422ToABGRRow = I422ToABGRRow_MIPS_DSPR2;
  }
 #endif

  for (y = 0; y < height; ++y) {
-    J422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
+    I422ToABGRRow(src_y, src_u, src_v, dst_abgr, &kYuvJConstants, width);
    dst_abgr += dst_stride_abgr;
    src_y += src_stride_y;
    if (y & 1) {
@ -1543,11 +1563,12 @@ int J422ToABGR(const uint8* src_y, int src_stride_y,
               uint8* dst_abgr, int dst_stride_abgr,
               int width, int height) {
  int y;
-  void (*J422ToABGRRow)(const uint8* y_buf,
+  void (*I422ToABGRRow)(const uint8* y_buf,
                        const uint8* u_buf,
                        const uint8* v_buf,
                        uint8* rgb_buf,
-                        int width) = J422ToABGRRow_C;
+                        struct YuvConstants* yuvconstants,
+                        int width) = I422ToABGRRow_C;
  if (!src_y || !src_u || !src_v ||
      !dst_abgr ||
      width <= 0 || height == 0) {
@ -1568,42 +1589,42 @@ int J422ToABGR(const uint8* src_y, int src_stride_y,
    height = 1;
    src_stride_y = src_stride_u = src_stride_v = dst_stride_abgr = 0;
  }
-#if defined(HAS_J422TOABGRROW_SSSE3)
+#if defined(HAS_I422TOABGRROW_SSSE3)
  if (TestCpuFlag(kCpuHasSSSE3)) {
-    J422ToABGRRow = J422ToABGRRow_Any_SSSE3;
+    I422ToABGRRow = I422ToABGRRow_Any_SSSE3;
    if (IS_ALIGNED(width, 8)) {
-      J422ToABGRRow = J422ToABGRRow_SSSE3;
+      I422ToABGRRow = I422ToABGRRow_SSSE3;
    }
  }
 #endif
-#if defined(HAS_J422TOABGRROW_AVX2)
+#if defined(HAS_I422TOABGRROW_AVX2)
  if (TestCpuFlag(kCpuHasAVX2)) {
-    J422ToABGRRow = J422ToABGRRow_Any_AVX2;
+    I422ToABGRRow = I422ToABGRRow_Any_AVX2;
    if (IS_ALIGNED(width, 16)) {
-      J422ToABGRRow = J422ToABGRRow_AVX2;
+      I422ToABGRRow = I422ToABGRRow_AVX2;
    }
  }
 #endif
-#if defined(HAS_J422TOABGRROW_NEON)
+#if defined(HAS_I422TOABGRROW_NEON)
  if (TestCpuFlag(kCpuHasNEON)) {
-    J422ToABGRRow = J422ToABGRRow_Any_NEON;
+    I422ToABGRRow = I422ToABGRRow_Any_NEON;
    if (IS_ALIGNED(width, 8)) {
-      J422ToABGRRow = J422ToABGRRow_NEON;
+      I422ToABGRRow = I422ToABGRRow_NEON;
    }
  }
 #endif
-#if defined(HAS_J422TOABGRROW_MIPS_DSPR2)
+#if defined(HAS_I422TOABGRROW_MIPS_DSPR2)
  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
      IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
      IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
      IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
      IS_ALIGNED(dst_abgr, 4) && IS_ALIGNED(dst_stride_abgr, 4)) {
-    J422ToABGRRow = J422ToABGRRow_MIPS_DSPR2;
+    I422ToABGRRow = I422ToABGRRow_MIPS_DSPR2;
  }
 #endif

  for (y = 0; y < height; ++y) {
-    J422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
+    I422ToABGRRow(src_y, src_u, src_v, dst_abgr, &kYuvJConstants, width);
    dst_abgr += dst_stride_abgr;
    src_y += src_stride_y;
    src_u += src_stride_u;
@ -1620,11 +1641,12 @@ int H420ToARGB(const uint8* src_y, int src_stride_y,
               uint8* dst_argb, int dst_stride_argb,
               int width, int height) {
  int y;
-  void (*H422ToARGBRow)(const uint8* y_buf,
+  void (*I422ToARGBRow)(const uint8* y_buf,
                        const uint8* u_buf,
                        const uint8* v_buf,
                        uint8* rgb_buf,
-                        int width) = H422ToARGBRow_C;
+                        struct YuvConstants* yuvconstants,
+                        int width) = I422ToARGBRow_C;
  if (!src_y || !src_u || !src_v || !dst_argb ||
      width <= 0 || height == 0) {
    return -1;
@ -1635,42 +1657,42 @@ int H420ToARGB(const uint8* src_y, int src_stride_y,
    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
    dst_stride_argb = -dst_stride_argb;
  }
-#if defined(HAS_H422TOARGBROW_SSSE3)
+#if defined(HAS_I422TOARGBROW_SSSE3)
  if (TestCpuFlag(kCpuHasSSSE3)) {
-    H422ToARGBRow = H422ToARGBRow_Any_SSSE3;
+    I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
    if (IS_ALIGNED(width, 8)) {
-      H422ToARGBRow = H422ToARGBRow_SSSE3;
+      I422ToARGBRow = I422ToARGBRow_SSSE3;
    }
  }
 #endif
-#if defined(HAS_H422TOARGBROW_AVX2)
+#if defined(HAS_I422TOARGBROW_AVX2)
  if (TestCpuFlag(kCpuHasAVX2)) {
-    H422ToARGBRow = H422ToARGBRow_Any_AVX2;
+    I422ToARGBRow = I422ToARGBRow_Any_AVX2;
    if (IS_ALIGNED(width, 16)) {
-      H422ToARGBRow = H422ToARGBRow_AVX2;
+      I422ToARGBRow = I422ToARGBRow_AVX2;
    }
  }
 #endif
-#if defined(HAS_H422TOARGBROW_NEON)
+#if defined(HAS_I422TOARGBROW_NEON)
  if (TestCpuFlag(kCpuHasNEON)) {
-    H422ToARGBRow = H422ToARGBRow_Any_NEON;
+    I422ToARGBRow = I422ToARGBRow_Any_NEON;
    if (IS_ALIGNED(width, 8)) {
-      H422ToARGBRow = H422ToARGBRow_NEON;
+      I422ToARGBRow = I422ToARGBRow_NEON;
    }
  }
 #endif
-#if defined(HAS_H422TOARGBROW_MIPS_DSPR2)
+#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
      IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
      IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
      IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
      IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
-    H422ToARGBRow = H422ToARGBRow_MIPS_DSPR2;
+    I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
  }
 #endif

  for (y = 0; y < height; ++y) {
-    H422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
+    I422ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvHConstants, width);
    dst_argb += dst_stride_argb;
    src_y += src_stride_y;
    if (y & 1) {
@ -1689,11 +1711,12 @@ int H422ToARGB(const uint8* src_y, int src_stride_y,
               uint8* dst_argb, int dst_stride_argb,
               int width, int height) {
  int y;
-  void (*H422ToARGBRow)(const uint8* y_buf,
+  void (*I422ToARGBRow)(const uint8* y_buf,
                        const uint8* u_buf,
                        const uint8* v_buf,
                        uint8* rgb_buf,
-                        int width) = H422ToARGBRow_C;
+                        struct YuvConstants* yuvconstants,
+                        int width) = I422ToARGBRow_C;
  if (!src_y || !src_u || !src_v ||
      !dst_argb ||
      width <= 0 || height == 0) {
@ -1714,42 +1737,42 @@ int H422ToARGB(const uint8* src_y, int src_stride_y,
    height = 1;
    src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0;
  }
-#if defined(HAS_H422TOARGBROW_SSSE3)
+#if defined(HAS_I422TOARGBROW_SSSE3)
  if (TestCpuFlag(kCpuHasSSSE3)) {
-    H422ToARGBRow = H422ToARGBRow_Any_SSSE3;
+    I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
    if (IS_ALIGNED(width, 8)) {
-      H422ToARGBRow = H422ToARGBRow_SSSE3;
+      I422ToARGBRow = I422ToARGBRow_SSSE3;
    }
  }
 #endif
-#if defined(HAS_H422TOARGBROW_AVX2)
+#if defined(HAS_I422TOARGBROW_AVX2)
  if (TestCpuFlag(kCpuHasAVX2)) {
-    H422ToARGBRow = H422ToARGBRow_Any_AVX2;
+    I422ToARGBRow = I422ToARGBRow_Any_AVX2;
    if (IS_ALIGNED(width, 16)) {
-      H422ToARGBRow = H422ToARGBRow_AVX2;
+      I422ToARGBRow = I422ToARGBRow_AVX2;
    }
  }
 #endif
-#if defined(HAS_H422TOARGBROW_NEON)
+#if defined(HAS_I422TOARGBROW_NEON)
  if (TestCpuFlag(kCpuHasNEON)) {
-    H422ToARGBRow = H422ToARGBRow_Any_NEON;
+    I422ToARGBRow = I422ToARGBRow_Any_NEON;
    if (IS_ALIGNED(width, 8)) {
-      H422ToARGBRow = H422ToARGBRow_NEON;
+      I422ToARGBRow = I422ToARGBRow_NEON;
    }
  }
 #endif
-#if defined(HAS_H422TOARGBROW_MIPS_DSPR2)
+#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
      IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
      IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
      IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
      IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
-    H422ToARGBRow = H422ToARGBRow_MIPS_DSPR2;
+    I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
  }
 #endif

  for (y = 0; y < height; ++y) {
-    H422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
+    I422ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvHConstants, width);
    dst_argb += dst_stride_argb;
    src_y += src_stride_y;
    src_u += src_stride_u;
@ -1766,11 +1789,12 @@ int H420ToABGR(const uint8* src_y, int src_stride_y,
               uint8* dst_abgr, int dst_stride_abgr,
               int width, int height) {
  int y;
-  void (*H422ToABGRRow)(const uint8* y_buf,
+  void (*I422ToABGRRow)(const uint8* y_buf,
                        const uint8* u_buf,
                        const uint8* v_buf,
                        uint8* rgb_buf,
-                        int width) = H422ToABGRRow_C;
+                        struct YuvConstants* yuvconstants,
+                        int width) = I422ToABGRRow_C;
  if (!src_y || !src_u || !src_v || !dst_abgr ||
      width <= 0 || height == 0) {
    return -1;
@ -1781,42 +1805,42 @@ int H420ToABGR(const uint8* src_y, int src_stride_y,
    dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr;
    dst_stride_abgr = -dst_stride_abgr;
  }
-#if defined(HAS_H422TOABGRROW_SSSE3)
+#if defined(HAS_I422TOABGRROW_SSSE3)
  if (TestCpuFlag(kCpuHasSSSE3)) {
-    H422ToABGRRow = H422ToABGRRow_Any_SSSE3;
+    I422ToABGRRow = I422ToABGRRow_Any_SSSE3;
    if (IS_ALIGNED(width, 8)) {
-      H422ToABGRRow = H422ToABGRRow_SSSE3;
+      I422ToABGRRow = I422ToABGRRow_SSSE3;
    }
  }
 #endif
-#if defined(HAS_H422TOABGRROW_AVX2)
+#if defined(HAS_I422TOABGRROW_AVX2)
  if (TestCpuFlag(kCpuHasAVX2)) {
-    H422ToABGRRow = H422ToABGRRow_Any_AVX2;
+    I422ToABGRRow = I422ToABGRRow_Any_AVX2;
    if (IS_ALIGNED(width, 16)) {
-      H422ToABGRRow = H422ToABGRRow_AVX2;
+      I422ToABGRRow = I422ToABGRRow_AVX2;
    }
  }
 #endif
-#if defined(HAS_H422TOABGRROW_NEON)
+#if defined(HAS_I422TOABGRROW_NEON)
  if (TestCpuFlag(kCpuHasNEON)) {
-    H422ToABGRRow = H422ToABGRRow_Any_NEON;
+    I422ToABGRRow = I422ToABGRRow_Any_NEON;
    if (IS_ALIGNED(width, 8)) {
-      H422ToABGRRow = H422ToABGRRow_NEON;
+      I422ToABGRRow = I422ToABGRRow_NEON;
    }
  }
 #endif
-#if defined(HAS_H422TOABGRROW_MIPS_DSPR2)
+#if defined(HAS_I422TOABGRROW_MIPS_DSPR2)
  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
      IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
      IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
      IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
      IS_ALIGNED(dst_abgr, 4) && IS_ALIGNED(dst_stride_abgr, 4)) {
-    H422ToABGRRow = H422ToABGRRow_MIPS_DSPR2;
+    I422ToABGRRow = I422ToABGRRow_MIPS_DSPR2;
  }
 #endif

  for (y = 0; y < height; ++y) {
-    H422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
+    I422ToABGRRow(src_y, src_u, src_v, dst_abgr, &kYuvHConstants, width);
    dst_abgr += dst_stride_abgr;
    src_y += src_stride_y;
    if (y & 1) {
@ -1835,11 +1859,12 @@ int H422ToABGR(const uint8* src_y, int src_stride_y,
               uint8* dst_abgr, int dst_stride_abgr,
               int width, int height) {
  int y;
-  void (*H422ToABGRRow)(const uint8* y_buf,
+  void (*I422ToABGRRow)(const uint8* y_buf,
                        const uint8* u_buf,
                        const uint8* v_buf,
                        uint8* rgb_buf,
-                        int width) = H422ToABGRRow_C;
+                        struct YuvConstants* yuvconstants,
+                        int width) = I422ToABGRRow_C;
  if (!src_y || !src_u || !src_v ||
      !dst_abgr ||
      width <= 0 || height == 0) {
@ -1860,42 +1885,42 @@ int H422ToABGR(const uint8* src_y, int src_stride_y,
    height = 1;
    src_stride_y = src_stride_u = src_stride_v = dst_stride_abgr = 0;
  }
-#if defined(HAS_H422TOABGRROW_SSSE3)
+#if defined(HAS_I422TOABGRROW_SSSE3)
  if (TestCpuFlag(kCpuHasSSSE3)) {
-    H422ToABGRRow = H422ToABGRRow_Any_SSSE3;
+    I422ToABGRRow = I422ToABGRRow_Any_SSSE3;
    if (IS_ALIGNED(width, 8)) {
-      H422ToABGRRow = H422ToABGRRow_SSSE3;
+      I422ToABGRRow = I422ToABGRRow_SSSE3;
    }
  }
 #endif
-#if defined(HAS_H422TOABGRROW_AVX2)
+#if defined(HAS_I422TOABGRROW_AVX2)
  if (TestCpuFlag(kCpuHasAVX2)) {
-    H422ToABGRRow = H422ToABGRRow_Any_AVX2;
+    I422ToABGRRow = I422ToABGRRow_Any_AVX2;
    if (IS_ALIGNED(width, 16)) {
-      H422ToABGRRow = H422ToABGRRow_AVX2;
+      I422ToABGRRow = I422ToABGRRow_AVX2;
    }
  }
 #endif
-#if defined(HAS_H422TOABGRROW_NEON)
+#if defined(HAS_I422TOABGRROW_NEON)
  if (TestCpuFlag(kCpuHasNEON)) {
-    H422ToABGRRow = H422ToABGRRow_Any_NEON;
+    I422ToABGRRow = I422ToABGRRow_Any_NEON;
    if (IS_ALIGNED(width, 8)) {
-      H422ToABGRRow = H422ToABGRRow_NEON;
+      I422ToABGRRow = I422ToABGRRow_NEON;
    }
  }
 #endif
-#if defined(HAS_H422TOABGRROW_MIPS_DSPR2)
+#if defined(HAS_I422TOABGRROW_MIPS_DSPR2)
  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
      IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
      IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
      IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
      IS_ALIGNED(dst_abgr, 4) && IS_ALIGNED(dst_stride_abgr, 4)) {
-    H422ToABGRRow = H422ToABGRRow_MIPS_DSPR2;
+    I422ToABGRRow = I422ToABGRRow_MIPS_DSPR2;
  }
 #endif

  for (y = 0; y < height; ++y) {
-    H422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
+    I422ToABGRRow(src_y, src_u, src_v, dst_abgr, &kYuvHConstants, width);
    dst_abgr += dst_stride_abgr;
    src_y += src_stride_y;
    src_u += src_stride_u;
--- a/source/convert_from.cc
+++ b/source/convert_from.cc
@ -462,6 +462,7 @@ int I420ToARGB(const uint8* src_y, int src_stride_y,
                        const uint8* u_buf,
                        const uint8* v_buf,
                        uint8* rgb_buf,
+                        struct YuvConstants* yuvconstants,
                        int width) = I422ToARGBRow_C;
  if (!src_y || !src_u || !src_v || !dst_argb ||
      width <= 0 || height == 0) {
@ -508,7 +509,7 @@ int I420ToARGB(const uint8* src_y, int src_stride_y,
 #endif

  for (y = 0; y < height; ++y) {
-    I422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
+    I422ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvConstants, width);
    dst_argb += dst_stride_argb;
    src_y += src_stride_y;
    if (y & 1) {
@ -531,6 +532,7 @@ int I420ToBGRA(const uint8* src_y, int src_stride_y,
                        const uint8* u_buf,
                        const uint8* v_buf,
                        uint8* rgb_buf,
+                        struct YuvConstants* yuvconstants,
                        int width) = I422ToBGRARow_C;
  if (!src_y || !src_u || !src_v || !dst_bgra ||
      width <= 0 || height == 0) {
@ -577,7 +579,7 @@ int I420ToBGRA(const uint8* src_y, int src_stride_y,
 #endif

  for (y = 0; y < height; ++y) {
-    I422ToBGRARow(src_y, src_u, src_v, dst_bgra, width);
+    I422ToBGRARow(src_y, src_u, src_v, dst_bgra, &kYuvConstants, width);
    dst_bgra += dst_stride_bgra;
    src_y += src_stride_y;
    if (y & 1) {
@ -600,6 +602,7 @@ int I420ToABGR(const uint8* src_y, int src_stride_y,
                        const uint8* u_buf,
                        const uint8* v_buf,
                        uint8* rgb_buf,
+                        struct YuvConstants* yuvconstants,
                        int width) = I422ToABGRRow_C;
  if (!src_y || !src_u || !src_v || !dst_abgr ||
      width <= 0 || height == 0) {
@ -637,7 +640,7 @@ int I420ToABGR(const uint8* src_y, int src_stride_y,
 #endif

  for (y = 0; y < height; ++y) {
-    I422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
+    I422ToABGRRow(src_y, src_u, src_v, dst_abgr, &kYuvConstants, width);
    dst_abgr += dst_stride_abgr;
    src_y += src_stride_y;
    if (y & 1) {
@ -660,6 +663,7 @@ int I420ToRGBA(const uint8* src_y, int src_stride_y,
                        const uint8* u_buf,
                        const uint8* v_buf,
                        uint8* rgb_buf,
+                        struct YuvConstants* yuvconstants,
                        int width) = I422ToRGBARow_C;
  if (!src_y || !src_u || !src_v || !dst_rgba ||
      width <= 0 || height == 0) {
@ -697,7 +701,7 @@ int I420ToRGBA(const uint8* src_y, int src_stride_y,
 #endif

  for (y = 0; y < height; ++y) {
-    I422ToRGBARow(src_y, src_u, src_v, dst_rgba, width);
+    I422ToRGBARow(src_y, src_u, src_v, dst_rgba, &kYuvConstants, width);
    dst_rgba += dst_stride_rgba;
    src_y += src_stride_y;
    if (y & 1) {
@ -720,6 +724,7 @@ int I420ToRGB24(const uint8* src_y, int src_stride_y,
                         const uint8* u_buf,
                         const uint8* v_buf,
                         uint8* rgb_buf,
+                         struct YuvConstants* yuvconstants,
                         int width) = I422ToRGB24Row_C;
  if (!src_y || !src_u || !src_v || !dst_rgb24 ||
      width <= 0 || height == 0) {
@ -757,7 +762,7 @@ int I420ToRGB24(const uint8* src_y, int src_stride_y,
 #endif

  for (y = 0; y < height; ++y) {
-    I422ToRGB24Row(src_y, src_u, src_v, dst_rgb24, width);
+    I422ToRGB24Row(src_y, src_u, src_v, dst_rgb24, &kYuvConstants, width);
    dst_rgb24 += dst_stride_rgb24;
    src_y += src_stride_y;
    if (y & 1) {
@ -780,6 +785,7 @@ int I420ToRAW(const uint8* src_y, int src_stride_y,
                       const uint8* u_buf,
                       const uint8* v_buf,
                       uint8* rgb_buf,
+                       struct YuvConstants* yuvconstants,
                       int width) = I422ToRAWRow_C;
  if (!src_y || !src_u || !src_v || !dst_raw ||
      width <= 0 || height == 0) {
@ -817,7 +823,7 @@ int I420ToRAW(const uint8* src_y, int src_stride_y,
 #endif

  for (y = 0; y < height; ++y) {
-    I422ToRAWRow(src_y, src_u, src_v, dst_raw, width);
+    I422ToRAWRow(src_y, src_u, src_v, dst_raw, &kYuvConstants, width);
    dst_raw += dst_stride_raw;
    src_y += src_stride_y;
    if (y & 1) {
@ -840,6 +846,7 @@ int I420ToARGB1555(const uint8* src_y, int src_stride_y,
                            const uint8* u_buf,
                            const uint8* v_buf,
                            uint8* rgb_buf,
+                            struct YuvConstants* yuvconstants,
                            int width) = I422ToARGB1555Row_C;
  if (!src_y || !src_u || !src_v || !dst_argb1555 ||
      width <= 0 || height == 0) {
@ -877,7 +884,7 @@ int I420ToARGB1555(const uint8* src_y, int src_stride_y,
 #endif

  for (y = 0; y < height; ++y) {
-    I422ToARGB1555Row(src_y, src_u, src_v, dst_argb1555, width);
+    I422ToARGB1555Row(src_y, src_u, src_v, dst_argb1555, &kYuvConstants, width);
    dst_argb1555 += dst_stride_argb1555;
    src_y += src_stride_y;
    if (y & 1) {
@ -901,6 +908,7 @@ int I420ToARGB4444(const uint8* src_y, int src_stride_y,
                            const uint8* u_buf,
                            const uint8* v_buf,
                            uint8* rgb_buf,
+                            struct YuvConstants* yuvconstants,
                            int width) = I422ToARGB4444Row_C;
  if (!src_y || !src_u || !src_v || !dst_argb4444 ||
      width <= 0 || height == 0) {
@ -938,7 +946,7 @@ int I420ToARGB4444(const uint8* src_y, int src_stride_y,
 #endif

  for (y = 0; y < height; ++y) {
-    I422ToARGB4444Row(src_y, src_u, src_v, dst_argb4444, width);
+    I422ToARGB4444Row(src_y, src_u, src_v, dst_argb4444, &kYuvConstants, width);
    dst_argb4444 += dst_stride_argb4444;
    src_y += src_stride_y;
    if (y & 1) {
@ -961,6 +969,7 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y,
                          const uint8* u_buf,
                          const uint8* v_buf,
                          uint8* rgb_buf,
+                          struct YuvConstants* yuvconstants,
                          int width) = I422ToRGB565Row_C;
  if (!src_y || !src_u || !src_v || !dst_rgb565 ||
      width <= 0 || height == 0) {
@ -998,7 +1007,7 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y,
 #endif

  for (y = 0; y < height; ++y) {
-    I422ToRGB565Row(src_y, src_u, src_v, dst_rgb565, width);
+    I422ToRGB565Row(src_y, src_u, src_v, dst_rgb565, &kYuvConstants, width);
    dst_rgb565 += dst_stride_rgb565;
    src_y += src_stride_y;
    if (y & 1) {
@ -1029,6 +1038,7 @@ int I420ToRGB565Dither(const uint8* src_y, int src_stride_y,
                        const uint8* u_buf,
                        const uint8* v_buf,
                        uint8* rgb_buf,
+                        struct YuvConstants* yuvconstants,
                        int width) = I422ToARGBRow_C;
  void (*ARGBToRGB565DitherRow)(const uint8* src_argb, uint8* dst_rgb,
      const uint32 dither4, int pix) = ARGBToRGB565DitherRow_C;
@ -1105,7 +1115,7 @@ int I420ToRGB565Dither(const uint8* src_y, int src_stride_y,
    // Allocate a row of argb.
    align_buffer_64(row_argb, width * 4);
    for (y = 0; y < height; ++y) {
-      I422ToARGBRow(src_y, src_u, src_v, row_argb, width);
+      I422ToARGBRow(src_y, src_u, src_v, row_argb, &kYuvConstants, width);
      ARGBToRGB565DitherRow(row_argb, dst_rgb565,
                            *(uint32*)(dither4x4 + ((y & 3) << 2)), width);
      dst_rgb565 += dst_stride_rgb565;
--- a/source/planar_functions.cc
+++ b/source/planar_functions.cc
@ -287,9 +287,9 @@ int YUY2ToI422(const uint8* src_yuy2, int src_stride_yuy2,
               int width, int height) {
  int y;
  void (*YUY2ToUV422Row)(const uint8* src_yuy2,
-                         uint8* dst_u, uint8* dst_v, int pix) =
+                         uint8* dst_u, uint8* dst_v, int width) =
      YUY2ToUV422Row_C;
-  void (*YUY2ToYRow)(const uint8* src_yuy2, uint8* dst_y, int pix) =
+  void (*YUY2ToYRow)(const uint8* src_yuy2, uint8* dst_y, int width) =
      YUY2ToYRow_C;
  // Negative height means invert the image.
  if (height < 0) {
@ -359,10 +359,10 @@ int UYVYToI422(const uint8* src_uyvy, int src_stride_uyvy,
               int width, int height) {
  int y;
  void (*UYVYToUV422Row)(const uint8* src_uyvy,
-                         uint8* dst_u, uint8* dst_v, int pix) =
+                         uint8* dst_u, uint8* dst_v, int width) =
      UYVYToUV422Row_C;
  void (*UYVYToYRow)(const uint8* src_uyvy,
-                     uint8* dst_y, int pix) = UYVYToYRow_C;
+                     uint8* dst_y, int width) = UYVYToYRow_C;
  // Negative height means invert the image.
  if (height < 0) {
    height = -height;
@ -790,6 +790,7 @@ int I422ToBGRA(const uint8* src_y, int src_stride_y,
                        const uint8* u_buf,
                        const uint8* v_buf,
                        uint8* rgb_buf,
+                        struct YuvConstants* yuvconstants,
                        int width) = I422ToBGRARow_C;
  if (!src_y || !src_u || !src_v ||
      !dst_bgra ||
@ -846,7 +847,7 @@ int I422ToBGRA(const uint8* src_y, int src_stride_y,
 #endif

  for (y = 0; y < height; ++y) {
-    I422ToBGRARow(src_y, src_u, src_v, dst_bgra, width);
+    I422ToBGRARow(src_y, src_u, src_v, dst_bgra, &kYuvConstants, width);
    dst_bgra += dst_stride_bgra;
    src_y += src_stride_y;
    src_u += src_stride_u;
@ -867,6 +868,7 @@ int I422ToABGR(const uint8* src_y, int src_stride_y,
                        const uint8* u_buf,
                        const uint8* v_buf,
                        uint8* rgb_buf,
+                        struct YuvConstants* yuvconstants,
                        int width) = I422ToABGRRow_C;
  if (!src_y || !src_u || !src_v ||
      !dst_abgr ||
@ -914,7 +916,7 @@ int I422ToABGR(const uint8* src_y, int src_stride_y,
 #endif

  for (y = 0; y < height; ++y) {
-    I422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
+    I422ToABGRRow(src_y, src_u, src_v, dst_abgr, &kYuvConstants, width);
    dst_abgr += dst_stride_abgr;
    src_y += src_stride_y;
    src_u += src_stride_u;
@ -935,6 +937,7 @@ int I422ToRGBA(const uint8* src_y, int src_stride_y,
                        const uint8* u_buf,
                        const uint8* v_buf,
                        uint8* rgb_buf,
+                        struct YuvConstants* yuvconstants,
                        int width) = I422ToRGBARow_C;
  if (!src_y || !src_u || !src_v ||
      !dst_rgba ||
@ -982,7 +985,7 @@ int I422ToRGBA(const uint8* src_y, int src_stride_y,
 #endif

  for (y = 0; y < height; ++y) {
-    I422ToRGBARow(src_y, src_u, src_v, dst_rgba, width);
+    I422ToRGBARow(src_y, src_u, src_v, dst_rgba, &kYuvConstants, width);
    dst_rgba += dst_stride_rgba;
    src_y += src_stride_y;
    src_u += src_stride_u;
@ -1001,6 +1004,7 @@ int NV12ToRGB565(const uint8* src_y, int src_stride_y,
  void (*NV12ToRGB565Row)(const uint8* y_buf,
                          const uint8* uv_buf,
                          uint8* rgb_buf,
+                          struct YuvConstants* yuvconstants,
                          int width) = NV12ToRGB565Row_C;
  if (!src_y || !src_uv || !dst_rgb565 ||
      width <= 0 || height == 0) {
@ -1038,7 +1042,7 @@ int NV12ToRGB565(const uint8* src_y, int src_stride_y,
 #endif

  for (y = 0; y < height; ++y) {
-    NV12ToRGB565Row(src_y, src_uv, dst_rgb565, width);
+    NV12ToRGB565Row(src_y, src_uv, dst_rgb565, &kYuvConstants, width);
    dst_rgb565 += dst_stride_rgb565;
    src_y += src_stride_y;
    if (y & 1) {
@ -1055,10 +1059,11 @@ int NV21ToRGB565(const uint8* src_y, int src_stride_y,
                 uint8* dst_rgb565, int dst_stride_rgb565,
                 int width, int height) {
  int y;
-  void (*NV21ToRGB565Row)(const uint8* y_buf,
+  void (*NV12ToRGB565Row)(const uint8* y_buf,
                          const uint8* src_vu,
                          uint8* rgb_buf,
-                          int width) = NV21ToRGB565Row_C;
+                          struct YuvConstants* yuvconstants,
+                          int width) = NV12ToRGB565Row_C;
  if (!src_y || !src_vu || !dst_rgb565 ||
      width <= 0 || height == 0) {
    return -1;
@ -1069,33 +1074,33 @@ int NV21ToRGB565(const uint8* src_y, int src_stride_y,
    dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
    dst_stride_rgb565 = -dst_stride_rgb565;
  }
-#if defined(HAS_NV21TORGB565ROW_SSSE3)
+#if defined(HAS_NV12TORGB565ROW_SSSE3)
  if (TestCpuFlag(kCpuHasSSSE3)) {
-    NV21ToRGB565Row = NV21ToRGB565Row_Any_SSSE3;
+    NV12ToRGB565Row = NV12ToRGB565Row_Any_SSSE3;
    if (IS_ALIGNED(width, 8)) {
-      NV21ToRGB565Row = NV21ToRGB565Row_SSSE3;
+      NV12ToRGB565Row = NV12ToRGB565Row_SSSE3;
    }
  }
 #endif
-#if defined(HAS_NV21TORGB565ROW_AVX2)
+#if defined(HAS_NV12TORGB565ROW_AVX2)
  if (TestCpuFlag(kCpuHasAVX2)) {
-    NV21ToRGB565Row = NV21ToRGB565Row_Any_AVX2;
+    NV12ToRGB565Row = NV12ToRGB565Row_Any_AVX2;
    if (IS_ALIGNED(width, 16)) {
-      NV21ToRGB565Row = NV21ToRGB565Row_AVX2;
+      NV12ToRGB565Row = NV12ToRGB565Row_AVX2;
    }
  }
 #endif
-#if defined(HAS_NV21TORGB565ROW_NEON)
+#if defined(HAS_NV12TORGB565ROW_NEON)
  if (TestCpuFlag(kCpuHasNEON)) {
-    NV21ToRGB565Row = NV21ToRGB565Row_Any_NEON;
+    NV12ToRGB565Row = NV12ToRGB565Row_Any_NEON;
    if (IS_ALIGNED(width, 8)) {
-      NV21ToRGB565Row = NV21ToRGB565Row_NEON;
+      NV12ToRGB565Row = NV12ToRGB565Row_NEON;
    }
  }
 #endif

  for (y = 0; y < height; ++y) {
-    NV21ToRGB565Row(src_y, src_vu, dst_rgb565, width);
+    NV12ToRGB565Row(src_y, src_vu, dst_rgb565, &kYvuConstants, width);
    dst_rgb565 += dst_stride_rgb565;
    src_y += src_stride_y;
    if (y & 1) {
@ -1110,7 +1115,7 @@ void SetPlane(uint8* dst_y, int dst_stride_y,
              int width, int height,
              uint32 value) {
  int y;
-  void (*SetRow)(uint8* dst, uint8 value, int pix) = SetRow_C;
+  void (*SetRow)(uint8* dst, uint8 value, int width) = SetRow_C;
  if (height < 0) {
    height = -height;
    dst_y = dst_y + (height - 1) * dst_stride_y;
@ -1186,7 +1191,7 @@ int ARGBRect(uint8* dst_argb, int dst_stride_argb,
             int width, int height,
             uint32 value) {
  int y;
-  void (*ARGBSetRow)(uint8* dst_argb, uint32 value, int pix) = ARGBSetRow_C;
+  void (*ARGBSetRow)(uint8* dst_argb, uint32 value, int width) = ARGBSetRow_C;
  if (!dst_argb ||
      width <= 0 || height == 0 ||
      dst_x < 0 || dst_y < 0) {
@ -1909,7 +1914,7 @@ int ARGBShuffle(const uint8* src_bgra, int src_stride_bgra,
                const uint8* shuffler, int width, int height) {
  int y;
  void (*ARGBShuffleRow)(const uint8* src_bgra, uint8* dst_argb,
-                         const uint8* shuffler, int pix) = ARGBShuffleRow_C;
+                         const uint8* shuffler, int width) = ARGBShuffleRow_C;
  if (!src_bgra || !dst_argb ||
      width <= 0 || height == 0) {
    return -1;
@ -1976,7 +1981,7 @@ static int ARGBSobelize(const uint8* src_argb, int src_stride_argb,
                                         const uint8* src_sobely,
                                         uint8* dst, int width)) {
  int y;
-  void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_g, int pix) =
+  void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_g, int width) =
      ARGBToYJRow_C;
  void (*SobelYRow)(const uint8* src_y0, const uint8* src_y1,
                    uint8* dst_sobely, int width) = SobelYRow_C;
@ -2360,8 +2365,8 @@ int YUY2ToNV12(const uint8* src_yuy2, int src_stride_yuy2,
               int width, int height) {
  int y;
  int halfwidth = (width + 1) >> 1;
-  void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) =
-      SplitUVRow_C;
+  void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
+                     int width) = SplitUVRow_C;
  void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
                         ptrdiff_t src_stride, int dst_width,
                         int source_y_fraction) = InterpolateRow_C;
@ -2464,8 +2469,8 @@ int UYVYToNV12(const uint8* src_uyvy, int src_stride_uyvy,
               int width, int height) {
  int y;
  int halfwidth = (width + 1) >> 1;
-  void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) =
-      SplitUVRow_C;
+  void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
+                     int width) = SplitUVRow_C;
  void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
                         ptrdiff_t src_stride, int dst_width,
                         int source_y_fraction) = InterpolateRow_C;
--- a/source/row_any.cc
+++ b/source/row_any.cc
@ -40,103 +40,9 @@ extern "C" {
      memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 192,                      \
             SS(r, DUVSHIFT) * BPP);                                           \
    }
-
-#ifdef HAS_I422TOARGBROW_SSSE3
-ANY31(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_SSSE3, 1, 0, 4, 7)
-ANY31(I422ToABGRRow_Any_SSSE3, I422ToABGRRow_SSSE3, 1, 0, 4, 7)
-ANY31(J422ToARGBRow_Any_SSSE3, J422ToARGBRow_SSSE3, 1, 0, 4, 7)
-ANY31(J422ToABGRRow_Any_SSSE3, J422ToABGRRow_SSSE3, 1, 0, 4, 7)
-ANY31(H422ToARGBRow_Any_SSSE3, H422ToARGBRow_SSSE3, 1, 0, 4, 7)
-ANY31(H422ToABGRRow_Any_SSSE3, H422ToABGRRow_SSSE3, 1, 0, 4, 7)
-#endif
-#ifdef HAS_I444TOARGBROW_SSSE3
-ANY31(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_SSSE3, 0, 0, 4, 7)
-ANY31(I411ToARGBRow_Any_SSSE3, I411ToARGBRow_SSSE3, 2, 0, 4, 7)
-ANY31(I422ToBGRARow_Any_SSSE3, I422ToBGRARow_SSSE3, 1, 0, 4, 7)
-ANY31(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_SSSE3, 1, 0, 4, 7)
-ANY31(I422ToARGB4444Row_Any_SSSE3, I422ToARGB4444Row_SSSE3, 1, 0, 2, 7)
-ANY31(I422ToARGB1555Row_Any_SSSE3, I422ToARGB1555Row_SSSE3, 1, 0, 2, 7)
-ANY31(I422ToRGB565Row_Any_SSSE3, I422ToRGB565Row_SSSE3, 1, 0, 2, 7)
-ANY31(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, 1, 0, 3, 7)
-ANY31(I422ToRAWRow_Any_SSSE3, I422ToRAWRow_SSSE3, 1, 0, 3, 7)
+#ifdef HAS_I422TOYUY2ROW_SSE2
 ANY31(I422ToYUY2Row_Any_SSE2, I422ToYUY2Row_SSE2, 1, 1, 4, 15)
 ANY31(I422ToUYVYRow_Any_SSE2, I422ToUYVYRow_SSE2, 1, 1, 4, 15)
-#endif  // HAS_I444TOARGBROW_SSSE3
-#ifdef HAS_I444TOABGRROW_SSSE3
-ANY31(I444ToABGRRow_Any_SSSE3, I444ToABGRRow_SSSE3, 0, 0, 4, 7)
-#endif
-#ifdef HAS_I422TORGB24ROW_AVX2
-ANY31(I422ToRGB24Row_Any_AVX2, I422ToRGB24Row_AVX2, 1, 0, 3, 15)
-#endif
-#ifdef HAS_I422TORAWROW_AVX2
-ANY31(I422ToRAWRow_Any_AVX2, I422ToRAWRow_AVX2, 1, 0, 3, 15)
-#endif
-#ifdef HAS_J422TOARGBROW_AVX2
-ANY31(J422ToARGBRow_Any_AVX2, J422ToARGBRow_AVX2, 1, 0, 4, 15)
-#endif
-#ifdef HAS_J422TOABGRROW_AVX2
-ANY31(J422ToABGRRow_Any_AVX2, J422ToABGRRow_AVX2, 1, 0, 4, 15)
-#endif
-#ifdef HAS_H422TOARGBROW_AVX2
-ANY31(H422ToARGBRow_Any_AVX2, H422ToARGBRow_AVX2, 1, 0, 4, 15)
-#endif
-#ifdef HAS_H422TOABGRROW_AVX2
-ANY31(H422ToABGRRow_Any_AVX2, H422ToABGRRow_AVX2, 1, 0, 4, 15)
-#endif
-#ifdef HAS_I422TOARGBROW_AVX2
-ANY31(I422ToARGBRow_Any_AVX2, I422ToARGBRow_AVX2, 1, 0, 4, 15)
-#endif
-#ifdef HAS_I422TOBGRAROW_AVX2
-ANY31(I422ToBGRARow_Any_AVX2, I422ToBGRARow_AVX2, 1, 0, 4, 15)
-#endif
-#ifdef HAS_I422TORGBAROW_AVX2
-ANY31(I422ToRGBARow_Any_AVX2, I422ToRGBARow_AVX2, 1, 0, 4, 15)
-#endif
-#ifdef HAS_I422TOABGRROW_AVX2
-ANY31(I422ToABGRRow_Any_AVX2, I422ToABGRRow_AVX2, 1, 0, 4, 15)
-#endif
-#ifdef HAS_I444TOARGBROW_AVX2
-ANY31(I444ToARGBRow_Any_AVX2, I444ToARGBRow_AVX2, 0, 0, 4, 15)
-#endif
-#ifdef HAS_I444TOABGRROW_AVX2
-ANY31(I444ToABGRRow_Any_AVX2, I444ToABGRRow_AVX2, 0, 0, 4, 15)
-#endif
-#ifdef HAS_I411TOARGBROW_AVX2
-ANY31(I411ToARGBRow_Any_AVX2, I411ToARGBRow_AVX2, 2, 0, 4, 15)
-#endif
-#ifdef HAS_I422TOARGB4444ROW_AVX2
-ANY31(I422ToARGB4444Row_Any_AVX2, I422ToARGB4444Row_AVX2, 1, 0, 2, 7)
-#endif
-#ifdef HAS_I422TOARGB1555ROW_AVX2
-ANY31(I422ToARGB1555Row_Any_AVX2, I422ToARGB1555Row_AVX2, 1, 0, 2, 7)
-#endif
-#ifdef HAS_I422TORGB565ROW_AVX2
-ANY31(I422ToRGB565Row_Any_AVX2, I422ToRGB565Row_AVX2, 1, 0, 2, 7)
-#endif
-#ifdef HAS_I422TOARGBROW_NEON
-ANY31(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, 0, 0, 4, 7)
-ANY31(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, 1, 0, 4, 7)
-ANY31(I411ToARGBRow_Any_NEON, I411ToARGBRow_NEON, 2, 0, 4, 7)
-ANY31(I422ToBGRARow_Any_NEON, I422ToBGRARow_NEON, 1, 0, 4, 7)
-ANY31(I422ToABGRRow_Any_NEON, I422ToABGRRow_NEON, 1, 0, 4, 7)
-ANY31(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, 1, 0, 4, 7)
-ANY31(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, 1, 0, 3, 7)
-ANY31(I422ToRAWRow_Any_NEON, I422ToRAWRow_NEON, 1, 0, 3, 7)
-ANY31(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, 1, 0, 2, 7)
-ANY31(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, 1, 0, 2, 7)
-ANY31(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, 1, 0, 2, 7)
-#endif
-#ifdef HAS_J422TOARGBROW_NEON
-ANY31(J422ToARGBRow_Any_NEON, J422ToARGBRow_NEON, 1, 0, 4, 7)
-#endif
-#ifdef HAS_J422TOABGRROW_NEON
-ANY31(J422ToABGRRow_Any_NEON, J422ToABGRRow_NEON, 1, 0, 4, 7)
-#endif
-#ifdef HAS_H422TOARGBROW_NEON
-ANY31(H422ToARGBRow_Any_NEON, H422ToARGBRow_NEON, 1, 0, 4, 7)
-#endif
-#ifdef HAS_H422TOABGRROW_NEON
-ANY31(H422ToABGRRow_Any_NEON, H422ToABGRRow_NEON, 1, 0, 4, 7)
 #endif
 #ifdef HAS_I422TOYUY2ROW_NEON
 ANY31(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, 1, 1, 4, 15)
@ -144,7 +50,97 @@ ANY31(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, 1, 1, 4, 15)
 #ifdef HAS_I422TOUYVYROW_NEON
 ANY31(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, 1, 1, 4, 15)
 #endif
-#undef ANY31
+#undef ANY31C
+
+// Any 3 planes to 1 with yuvconstants
+#define ANY31C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK)                \
+    void NAMEANY(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf,   \
+                 uint8* dst_ptr, struct YuvConstants* yuvconstants,            \
+                 int width) {                                                  \
+      SIMD_ALIGNED(uint8 temp[64 * 4]);                                        \
+      memset(temp, 0, 64 * 3);  /* for YUY2 and msan */                        \
+      int r = width & MASK;                                                    \
+      int n = width & ~MASK;                                                   \
+      if (n > 0) {                                                             \
+        ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n);               \
+      }                                                                        \
+      memcpy(temp, y_buf + n, r);                                              \
+      memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT));               \
+      memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT));              \
+      ANY_SIMD(temp, temp + 64, temp + 128, temp + 192,                        \
+               yuvconstants, MASK + 1);                                        \
+      memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 192,                      \
+             SS(r, DUVSHIFT) * BPP);                                           \
+    }
+
+#ifdef HAS_I422TOARGBROW_SSSE3
+ANY31C(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_SSSE3, 1, 0, 4, 7)
+ANY31C(I422ToABGRRow_Any_SSSE3, I422ToABGRRow_SSSE3, 1, 0, 4, 7)
+#endif
+#ifdef HAS_I444TOARGBROW_SSSE3
+ANY31C(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_SSSE3, 0, 0, 4, 7)
+ANY31C(I411ToARGBRow_Any_SSSE3, I411ToARGBRow_SSSE3, 2, 0, 4, 7)
+ANY31C(I422ToBGRARow_Any_SSSE3, I422ToBGRARow_SSSE3, 1, 0, 4, 7)
+ANY31C(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_SSSE3, 1, 0, 4, 7)
+ANY31C(I422ToARGB4444Row_Any_SSSE3, I422ToARGB4444Row_SSSE3, 1, 0, 2, 7)
+ANY31C(I422ToARGB1555Row_Any_SSSE3, I422ToARGB1555Row_SSSE3, 1, 0, 2, 7)
+ANY31C(I422ToRGB565Row_Any_SSSE3, I422ToRGB565Row_SSSE3, 1, 0, 2, 7)
+ANY31C(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, 1, 0, 3, 7)
+ANY31C(I422ToRAWRow_Any_SSSE3, I422ToRAWRow_SSSE3, 1, 0, 3, 7)
+#endif  // HAS_I444TOARGBROW_SSSE3
+#ifdef HAS_I444TOABGRROW_SSSE3
+ANY31C(I444ToABGRRow_Any_SSSE3, I444ToABGRRow_SSSE3, 0, 0, 4, 7)
+#endif
+#ifdef HAS_I422TORGB24ROW_AVX2
+ANY31C(I422ToRGB24Row_Any_AVX2, I422ToRGB24Row_AVX2, 1, 0, 3, 15)
+#endif
+#ifdef HAS_I422TORAWROW_AVX2
+ANY31C(I422ToRAWRow_Any_AVX2, I422ToRAWRow_AVX2, 1, 0, 3, 15)
+#endif
+#ifdef HAS_I422TOARGBROW_AVX2
+ANY31C(I422ToARGBRow_Any_AVX2, I422ToARGBRow_AVX2, 1, 0, 4, 15)
+#endif
+#ifdef HAS_I422TOBGRAROW_AVX2
+ANY31C(I422ToBGRARow_Any_AVX2, I422ToBGRARow_AVX2, 1, 0, 4, 15)
+#endif
+#ifdef HAS_I422TORGBAROW_AVX2
+ANY31C(I422ToRGBARow_Any_AVX2, I422ToRGBARow_AVX2, 1, 0, 4, 15)
+#endif
+#ifdef HAS_I422TOABGRROW_AVX2
+ANY31C(I422ToABGRRow_Any_AVX2, I422ToABGRRow_AVX2, 1, 0, 4, 15)
+#endif
+#ifdef HAS_I444TOARGBROW_AVX2
+ANY31C(I444ToARGBRow_Any_AVX2, I444ToARGBRow_AVX2, 0, 0, 4, 15)
+#endif
+#ifdef HAS_I444TOABGRROW_AVX2
+ANY31C(I444ToABGRRow_Any_AVX2, I444ToABGRRow_AVX2, 0, 0, 4, 15)
+#endif
+#ifdef HAS_I411TOARGBROW_AVX2
+ANY31C(I411ToARGBRow_Any_AVX2, I411ToARGBRow_AVX2, 2, 0, 4, 15)
+#endif
+#ifdef HAS_I422TOARGB4444ROW_AVX2
+ANY31C(I422ToARGB4444Row_Any_AVX2, I422ToARGB4444Row_AVX2, 1, 0, 2, 7)
+#endif
+#ifdef HAS_I422TOARGB1555ROW_AVX2
+ANY31C(I422ToARGB1555Row_Any_AVX2, I422ToARGB1555Row_AVX2, 1, 0, 2, 7)
+#endif
+#ifdef HAS_I422TORGB565ROW_AVX2
+ANY31C(I422ToRGB565Row_Any_AVX2, I422ToRGB565Row_AVX2, 1, 0, 2, 7)
+#endif
+#ifdef HAS_I422TOARGBROW_NEON
+ANY31C(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, 0, 0, 4, 7)
+ANY31C(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, 1, 0, 4, 7)
+ANY31C(I411ToARGBRow_Any_NEON, I411ToARGBRow_NEON, 2, 0, 4, 7)
+ANY31C(I422ToBGRARow_Any_NEON, I422ToBGRARow_NEON, 1, 0, 4, 7)
+ANY31C(I422ToABGRRow_Any_NEON, I422ToABGRRow_NEON, 1, 0, 4, 7)
+ANY31C(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, 1, 0, 4, 7)
+ANY31C(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, 1, 0, 3, 7)
+ANY31C(I422ToRAWRow_Any_NEON, I422ToRAWRow_NEON, 1, 0, 3, 7)
+ANY31C(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, 1, 0, 2, 7)
+ANY31C(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, 1, 0, 2, 7)
+ANY31C(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, 1, 0, 2, 7)
+#endif
+#undef ANY31C

 // Any 2 planes to 1.
 #define ANY21(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK)              \
@ -164,32 +160,6 @@ ANY31(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, 1, 1, 4, 15)
      memcpy(dst_ptr + n * BPP, temp + 128, r * BPP);                          \
    }

-// Biplanar to RGB.
-#ifdef HAS_NV12TOARGBROW_SSSE3
-ANY21(NV12ToARGBRow_Any_SSSE3, NV12ToARGBRow_SSSE3, 1, 1, 2, 4, 7)
-ANY21(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_SSSE3, 1, 1, 2, 4, 7)
-#endif
-#ifdef HAS_NV12TOARGBROW_AVX2
-ANY21(NV12ToARGBRow_Any_AVX2, NV12ToARGBRow_AVX2, 1, 1, 2, 4, 15)
-ANY21(NV21ToARGBRow_Any_AVX2, NV21ToARGBRow_AVX2, 1, 1, 2, 4, 15)
-#endif
-#ifdef HAS_NV12TOARGBROW_NEON
-ANY21(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, 1, 1, 2, 4, 7)
-ANY21(NV21ToARGBRow_Any_NEON, NV21ToARGBRow_NEON, 1, 1, 2, 4, 7)
-#endif
-#ifdef HAS_NV12TORGB565ROW_SSSE3
-ANY21(NV12ToRGB565Row_Any_SSSE3, NV12ToRGB565Row_SSSE3, 1, 1, 2, 2, 7)
-ANY21(NV21ToRGB565Row_Any_SSSE3, NV21ToRGB565Row_SSSE3, 1, 1, 2, 2, 7)
-#endif
-#ifdef HAS_NV12TORGB565ROW_AVX2
-ANY21(NV12ToRGB565Row_Any_AVX2, NV12ToRGB565Row_AVX2, 1, 1, 2, 2, 15)
-ANY21(NV21ToRGB565Row_Any_AVX2, NV21ToRGB565Row_AVX2, 1, 1, 2, 2, 15)
-#endif
-#ifdef HAS_NV12TORGB565ROW_NEON
-ANY21(NV12ToRGB565Row_Any_NEON, NV12ToRGB565Row_NEON, 1, 1, 2, 2, 7)
-ANY21(NV21ToRGB565Row_Any_NEON, NV21ToRGB565Row_NEON, 1, 1, 2, 2, 7)
-#endif
-
 // Merge functions.
 #ifdef HAS_MERGEUVROW_SSE2
 ANY21(MergeUVRow_Any_SSE2, MergeUVRow_SSE2, 0, 1, 1, 2, 15)
@ -249,6 +219,46 @@ ANY21(SobelXYRow_Any_NEON, SobelXYRow_NEON, 0, 1, 1, 4, 7)
 #endif
 #undef ANY21

+// Any 2 planes to 1 with yuvconstants
+#define ANY21C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK)             \
+    void NAMEANY(const uint8* y_buf, const uint8* uv_buf,                      \
+                 uint8* dst_ptr, struct YuvConstants* yuvconstants,            \
+                 int width) {                                                  \
+      SIMD_ALIGNED(uint8 temp[64 * 3]);                                        \
+      memset(temp, 0, 64 * 2);  /* for msan */                                 \
+      int r = width & MASK;                                                    \
+      int n = width & ~MASK;                                                   \
+      if (n > 0) {                                                             \
+        ANY_SIMD(y_buf, uv_buf, dst_ptr, yuvconstants, n);                     \
+      }                                                                        \
+      memcpy(temp, y_buf + n * SBPP, r * SBPP);                                \
+      memcpy(temp + 64, uv_buf + (n >> UVSHIFT) * SBPP2,                       \
+             SS(r, UVSHIFT) * SBPP2);                                          \
+      ANY_SIMD(temp, temp + 64, temp + 128, yuvconstants, MASK + 1);           \
+      memcpy(dst_ptr + n * BPP, temp + 128, r * BPP);                          \
+    }
+
+// Biplanar to RGB.
+#ifdef HAS_NV12TOARGBROW_SSSE3
+ANY21C(NV12ToARGBRow_Any_SSSE3, NV12ToARGBRow_SSSE3, 1, 1, 2, 4, 7)
+#endif
+#ifdef HAS_NV12TOARGBROW_AVX2
+ANY21C(NV12ToARGBRow_Any_AVX2, NV12ToARGBRow_AVX2, 1, 1, 2, 4, 15)
+#endif
+#ifdef HAS_NV12TOARGBROW_NEON
+ANY21C(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, 1, 1, 2, 4, 7)
+#endif
+#ifdef HAS_NV12TORGB565ROW_SSSE3
+ANY21C(NV12ToRGB565Row_Any_SSSE3, NV12ToRGB565Row_SSSE3, 1, 1, 2, 2, 7)
+#endif
+#ifdef HAS_NV12TORGB565ROW_AVX2
+ANY21C(NV12ToRGB565Row_Any_AVX2, NV12ToRGB565Row_AVX2, 1, 1, 2, 2, 15)
+#endif
+#ifdef HAS_NV12TORGB565ROW_NEON
+ANY21C(NV12ToRGB565Row_Any_NEON, NV12ToRGB565Row_NEON, 1, 1, 2, 2, 7)
+#endif
+#undef ANY21C
+
 // Any 1 to 1.
 #define ANY11(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK)                     \
    void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) {            \
@ -297,9 +307,7 @@ ANY11(I400ToARGBRow_Any_SSE2, I400ToARGBRow_SSE2, 0, 1, 4, 7)
 #if defined(HAS_I400TOARGBROW_AVX2)
 ANY11(I400ToARGBRow_Any_AVX2, I400ToARGBRow_AVX2, 0, 1, 4, 15)
 #endif
-#if defined(HAS_YUY2TOARGBROW_SSSE3)
-ANY11(YUY2ToARGBRow_Any_SSSE3, YUY2ToARGBRow_SSSE3, 1, 4, 4, 15)
-ANY11(UYVYToARGBRow_Any_SSSE3, UYVYToARGBRow_SSSE3, 1, 4, 4, 15)
+#if defined(HAS_RGB24TOARGBROW_SSSE3)
 ANY11(RGB24ToARGBRow_Any_SSSE3, RGB24ToARGBRow_SSSE3, 0, 3, 4, 15)
 ANY11(RAWToARGBRow_Any_SSSE3, RAWToARGBRow_SSSE3, 0, 3, 4, 15)
 ANY11(RGB565ToARGBRow_Any_SSE2, RGB565ToARGBRow_SSE2, 0, 2, 4, 7)
@ -315,10 +323,6 @@ ANY11(ARGB1555ToARGBRow_Any_AVX2, ARGB1555ToARGBRow_AVX2, 0, 2, 4, 15)
 #if defined(HAS_ARGB4444TOARGBROW_AVX2)
 ANY11(ARGB4444ToARGBRow_Any_AVX2, ARGB4444ToARGBRow_AVX2, 0, 2, 4, 15)
 #endif
-#if defined(HAS_YUY2TOARGBROW_AVX2)
-ANY11(YUY2ToARGBRow_Any_AVX2, YUY2ToARGBRow_AVX2, 1, 4, 4, 31)
-ANY11(UYVYToARGBRow_Any_AVX2, UYVYToARGBRow_AVX2, 1, 4, 4, 31)
-#endif
 #if defined(HAS_ARGBTORGB24ROW_NEON)
 ANY11(ARGBToRGB24Row_Any_NEON, ARGBToRGB24Row_NEON, 0, 4, 3, 7)
 ANY11(ARGBToRAWRow_Any_NEON, ARGBToRAWRow_NEON, 0, 4, 3, 7)
@ -327,8 +331,6 @@ ANY11(ARGBToARGB1555Row_Any_NEON, ARGBToARGB1555Row_NEON, 0, 4, 2, 7)
 ANY11(ARGBToARGB4444Row_Any_NEON, ARGBToARGB4444Row_NEON, 0, 4, 2, 7)
 ANY11(J400ToARGBRow_Any_NEON, J400ToARGBRow_NEON, 0, 1, 4, 7)
 ANY11(I400ToARGBRow_Any_NEON, I400ToARGBRow_NEON, 0, 1, 4, 7)
-ANY11(YUY2ToARGBRow_Any_NEON, YUY2ToARGBRow_NEON, 1, 4, 4, 7)
-ANY11(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, 1, 4, 4, 7)
 #endif
 #ifdef HAS_ARGBTOYROW_AVX2
 ANY11(ARGBToYRow_Any_AVX2, ARGBToYRow_AVX2, 0, 4, 1, 31)
@ -426,6 +428,35 @@ ANY11(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, 0, 4, 4, 7)
 #endif
 #undef ANY11

+// Any 1 to 1 with yuvconstants
+#define ANY11C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK)                    \
+    void NAMEANY(const uint8* src_ptr, uint8* dst_ptr,                         \
+                 struct YuvConstants* yuvconstants, int width) {               \
+      SIMD_ALIGNED(uint8 temp[128 * 2]);                                       \
+      memset(temp, 0, 128);  /* for YUY2 and msan */                           \
+      int r = width & MASK;                                                    \
+      int n = width & ~MASK;                                                   \
+      if (n > 0) {                                                             \
+        ANY_SIMD(src_ptr, dst_ptr, yuvconstants, n);                           \
+      }                                                                        \
+      memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP);    \
+      ANY_SIMD(temp, temp + 128, yuvconstants, MASK + 1);                      \
+      memcpy(dst_ptr + n * BPP, temp + 128, r * BPP);                          \
+    }
+#if defined(HAS_YUY2TOARGBROW_SSSE3)
+ANY11C(YUY2ToARGBRow_Any_SSSE3, YUY2ToARGBRow_SSSE3, 1, 4, 4, 15)
+ANY11C(UYVYToARGBRow_Any_SSSE3, UYVYToARGBRow_SSSE3, 1, 4, 4, 15)
+#endif
+#if defined(HAS_YUY2TOARGBROW_AVX2)
+ANY11C(YUY2ToARGBRow_Any_AVX2, YUY2ToARGBRow_AVX2, 1, 4, 4, 31)
+ANY11C(UYVYToARGBRow_Any_AVX2, UYVYToARGBRow_AVX2, 1, 4, 4, 31)
+#endif
+#if defined(HAS_YUY2TOARGBROW_NEON)
+ANY11C(YUY2ToARGBRow_Any_NEON, YUY2ToARGBRow_NEON, 1, 4, 4, 7)
+ANY11C(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, 1, 4, 4, 7)
+#endif
+#undef ANY11C
+
 // Any 1 to 1 blended.
 #define ANY11B(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK)                    \
    void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) {            \
--- a/source/row_common.cc
+++ b/source/row_common.cc
--- a/source/row_gcc.cc
+++ b/source/row_gcc.cc
@ -1350,23 +1350,23 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0, int src_stride_rgba,
    "punpcklwd  %%xmm0,%%xmm0                                   \n"

 // Convert 8 pixels: 8 UV and 8 Y
-#define YUVTORGB(YuvConstants)                                                 \
+#define YUVTORGB(yuvconstants)                                                 \
    "movdqa     %%xmm0,%%xmm1                                   \n"            \
    "movdqa     %%xmm0,%%xmm2                                   \n"            \
    "movdqa     %%xmm0,%%xmm3                                   \n"            \
-    "movdqa     " MEMACCESS2(96, [YuvConstants]) ",%%xmm0       \n"            \
-    "pmaddubsw  " MEMACCESS([YuvConstants]) ",%%xmm1            \n"            \
+    "movdqa     " MEMACCESS2(96, [yuvconstants]) ",%%xmm0       \n"            \
+    "pmaddubsw  " MEMACCESS([yuvconstants]) ",%%xmm1            \n"            \
    "psubw      %%xmm1,%%xmm0                                   \n"            \
-    "movdqa     " MEMACCESS2(128, [YuvConstants]) ",%%xmm1      \n"            \
-    "pmaddubsw  " MEMACCESS2(32, [YuvConstants]) ",%%xmm2       \n"            \
+    "movdqa     " MEMACCESS2(128, [yuvconstants]) ",%%xmm1      \n"            \
+    "pmaddubsw  " MEMACCESS2(32, [yuvconstants]) ",%%xmm2       \n"            \
    "psubw      %%xmm2,%%xmm1                                   \n"            \
-    "movdqa     " MEMACCESS2(160, [YuvConstants]) ",%%xmm2      \n"            \
-    "pmaddubsw  " MEMACCESS2(64, [YuvConstants]) ",%%xmm3       \n"            \
+    "movdqa     " MEMACCESS2(160, [yuvconstants]) ",%%xmm2      \n"            \
+    "pmaddubsw  " MEMACCESS2(64, [yuvconstants]) ",%%xmm3       \n"            \
    "psubw      %%xmm3,%%xmm2                                   \n"            \
    "movq       " MEMACCESS([y_buf]) ",%%xmm3                   \n"            \
    "lea        " MEMLEA(0x8, [y_buf]) ",%[y_buf]               \n"            \
    "punpcklbw  %%xmm3,%%xmm3                                   \n"            \
-    "pmulhuw    " MEMACCESS2(192, [YuvConstants]) ",%%xmm3      \n"            \
+    "pmulhuw    " MEMACCESS2(192, [yuvconstants]) ",%%xmm3      \n"            \
    "paddsw     %%xmm3,%%xmm0                                   \n"            \
    "paddsw     %%xmm3,%%xmm1                                   \n"            \
    "paddsw     %%xmm3,%%xmm2                                   \n"            \
@ -1423,19 +1423,19 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0, int src_stride_rgba,
    "movdqu    %%xmm0," MEMACCESS2(0x10, [dst_rgba]) "           \n"           \
    "lea       " MEMLEA(0x20, [dst_rgba]) ",%[dst_rgba]          \n"

-void OMITFP I444ToARGBMatrixRow_SSSE3(const uint8* y_buf,
-                                      const uint8* u_buf,
-                                      const uint8* v_buf,
-                                      uint8* dst_argb,
-                                      struct YuvConstants* YuvConstants,
-                                      int width) {
+void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf,
+                                const uint8* u_buf,
+                                const uint8* v_buf,
+                                uint8* dst_argb,
+                                struct YuvConstants* yuvconstants,
+                                int width) {
  asm volatile (
    "sub       %[u_buf],%[v_buf]               \n"
    "pcmpeqb   %%xmm5,%%xmm5                   \n"
    LABELALIGN
  "1:                                          \n"
    READYUV444
-    YUVTORGB(YuvConstants)
+    YUVTORGB(yuvconstants)
    STOREARGB
    "sub       $0x8,%[width]                   \n"
    "jg        1b                              \n"
@ -1444,25 +1444,25 @@ void OMITFP I444ToARGBMatrixRow_SSSE3(const uint8* y_buf,
    [v_buf]"+r"(v_buf),    // %[v_buf]
    [dst_argb]"+r"(dst_argb),  // %[dst_argb]
    [width]"+rm"(width)    // %[width]
-  : [YuvConstants]"r"(YuvConstants)  // %[kYuvConstants]
+  : [yuvconstants]"r"(yuvconstants)  // %[yuvconstants]
  : "memory", "cc", NACL_R14
    "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
  );
 }

-void OMITFP I444ToABGRMatrixRow_SSSE3(const uint8* y_buf,
-                                      const uint8* u_buf,
-                                      const uint8* v_buf,
-                                      uint8* dst_abgr,
-                                      struct YuvConstants* YuvConstants,
-                                      int width) {
+void OMITFP I444ToABGRRow_SSSE3(const uint8* y_buf,
+                                const uint8* u_buf,
+                                const uint8* v_buf,
+                                uint8* dst_abgr,
+                                struct YuvConstants* yuvconstants,
+                                int width) {
  asm volatile (
    "sub       %[u_buf],%[v_buf]               \n"
    "pcmpeqb   %%xmm5,%%xmm5                   \n"
    LABELALIGN
  "1:                                          \n"
    READYUV444
-    YUVTORGB(YuvConstants)
+    YUVTORGB(yuvconstants)
    STOREABGR
    "sub       $0x8,%[width]                   \n"
    "jg        1b                              \n"
@ -1471,7 +1471,7 @@ void OMITFP I444ToABGRMatrixRow_SSSE3(const uint8* y_buf,
    [v_buf]"+r"(v_buf),    // %[v_buf]
    [dst_abgr]"+r"(dst_abgr),  // %[dst_abgr]
    [width]"+rm"(width)    // %[width]
-  : [YuvConstants]"r"(YuvConstants)  // %[kYuvConstants]
+  : [yuvconstants]"r"(yuvconstants)  // %[yuvconstants]
  : "memory", "cc", NACL_R14
    "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
  );
@ -1482,6 +1482,7 @@ void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf,
                                 const uint8* u_buf,
                                 const uint8* v_buf,
                                 uint8* dst_rgb24,
+                                 struct YuvConstants* yuvconstants,
                                 int width) {
  asm volatile (
    "movdqa    %[kShuffleMaskARGBToRGB24_0],%%xmm5 \n"
@ -1490,7 +1491,7 @@ void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf,
    LABELALIGN
  "1:                                          \n"
    READYUV422
-    YUVTORGB(kYuvConstants)
+    YUVTORGB(yuvconstants)
    "punpcklbw %%xmm1,%%xmm0                   \n"
    "punpcklbw %%xmm2,%%xmm2                   \n"
    "movdqa    %%xmm0,%%xmm1                   \n"
@ -1514,7 +1515,7 @@ void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf,
 #else
    [width]"+rm"(width)    // %[width]
 #endif
-  : [kYuvConstants]"r"(&kYuvConstants.kUVToB),
+  : [yuvconstants]"r"(yuvconstants),  // %[yuvconstants]
    [kShuffleMaskARGBToRGB24_0]"m"(kShuffleMaskARGBToRGB24_0),
    [kShuffleMaskARGBToRGB24]"m"(kShuffleMaskARGBToRGB24)
  : "memory", "cc", NACL_R14
@ -1526,6 +1527,7 @@ void OMITFP I422ToRAWRow_SSSE3(const uint8* y_buf,
                               const uint8* u_buf,
                               const uint8* v_buf,
                               uint8* dst_raw,
+                               struct YuvConstants* yuvconstants,
                               int width) {
  asm volatile (
    "movdqa    %[kShuffleMaskARGBToRAW_0],%%xmm5 \n"
@ -1534,7 +1536,7 @@ void OMITFP I422ToRAWRow_SSSE3(const uint8* y_buf,
    LABELALIGN
  "1:                                          \n"
    READYUV422
-    YUVTORGB(kYuvConstants)
+    YUVTORGB(yuvconstants)
    "punpcklbw %%xmm1,%%xmm0                   \n"
    "punpcklbw %%xmm2,%%xmm2                   \n"
    "movdqa    %%xmm0,%%xmm1                   \n"
@ -1558,7 +1560,7 @@ void OMITFP I422ToRAWRow_SSSE3(const uint8* y_buf,
 #else
    [width]"+rm"(width)    // %[width]
 #endif
-  : [kYuvConstants]"r"(&kYuvConstants.kUVToB),
+  : [yuvconstants]"r"(yuvconstants),  // %[yuvconstants]
    [kShuffleMaskARGBToRAW_0]"m"(kShuffleMaskARGBToRAW_0),
    [kShuffleMaskARGBToRAW]"m"(kShuffleMaskARGBToRAW)
  : "memory", "cc", NACL_R14
@ -1566,19 +1568,19 @@ void OMITFP I422ToRAWRow_SSSE3(const uint8* y_buf,
  );
 }

-void OMITFP I422ToARGBMatrixRow_SSSE3(const uint8* y_buf,
-                                      const uint8* u_buf,
-                                      const uint8* v_buf,
-                                      uint8* dst_argb,
-                                      struct YuvConstants* YuvConstants,
-                                      int width) {
+void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf,
+                                const uint8* u_buf,
+                                const uint8* v_buf,
+                                uint8* dst_argb,
+                                struct YuvConstants* yuvconstants,
+                                int width) {
  asm volatile (
    "sub       %[u_buf],%[v_buf]               \n"
    "pcmpeqb   %%xmm5,%%xmm5                   \n"
    LABELALIGN
  "1:                                          \n"
    READYUV422
-    YUVTORGB(YuvConstants)
+    YUVTORGB(yuvconstants)
    STOREARGB
    "sub       $0x8,%[width]                   \n"
    "jg        1b                              \n"
@ -1587,7 +1589,7 @@ void OMITFP I422ToARGBMatrixRow_SSSE3(const uint8* y_buf,
    [v_buf]"+r"(v_buf),    // %[v_buf]
    [dst_argb]"+r"(dst_argb),  // %[dst_argb]
    [width]"+rm"(width)    // %[width]
-  : [YuvConstants]"r"(YuvConstants)  // %[kYuvConstants]
+  : [yuvconstants]"r"(yuvconstants)  // %[yuvconstants]
  : "memory", "cc", NACL_R14
    "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
  );
@ -1597,6 +1599,7 @@ void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf,
                                const uint8* u_buf,
                                const uint8* v_buf,
                                uint8* dst_argb,
+                                struct YuvConstants* yuvconstants,
                                int width) {
  asm volatile (
    "sub       %[u_buf],%[v_buf]               \n"
@ -1604,7 +1607,7 @@ void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf,
    LABELALIGN
  "1:                                          \n"
    READYUV411
-    YUVTORGB(kYuvConstants)
+    YUVTORGB(yuvconstants)
    STOREARGB
    "sub       $0x8,%[width]                   \n"
    "jg        1b                              \n"
@ -1613,7 +1616,7 @@ void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf,
    [v_buf]"+r"(v_buf),    // %[v_buf]
    [dst_argb]"+r"(dst_argb),  // %[dst_argb]
    [width]"+rm"(width)    // %[width]
-  : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
+  : [yuvconstants]"r"(yuvconstants)  // %[yuvconstants]
  : "memory", "cc", NACL_R14
    "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
  );
@ -1622,13 +1625,14 @@ void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf,
 void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf,
                                const uint8* uv_buf,
                                uint8* dst_argb,
+                                struct YuvConstants* yuvconstants,
                                int width) {
  asm volatile (
    "pcmpeqb   %%xmm5,%%xmm5                   \n"
    LABELALIGN
  "1:                                          \n"
    READNV12
-    YUVTORGB(kYuvConstants)
+    YUVTORGB(yuvconstants)
    STOREARGB
    "sub       $0x8,%[width]                   \n"
    "jg        1b                              \n"
@ -1636,30 +1640,7 @@ void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf,
    [uv_buf]"+r"(uv_buf),    // %[uv_buf]
    [dst_argb]"+r"(dst_argb),  // %[dst_argb]
    [width]"+rm"(width)    // %[width]
-  : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
-  // Does not use r14.
-  : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
-  );
-}
-
-void OMITFP NV21ToARGBRow_SSSE3(const uint8* y_buf,
-                                const uint8* uv_buf,
-                                uint8* dst_argb,
-                                int width) {
-  asm volatile (
-    "pcmpeqb   %%xmm5,%%xmm5                   \n"
-    LABELALIGN
-  "1:                                          \n"
-    READNV12
-    YUVTORGB(kYuvConstants)
-    STOREARGB
-    "sub       $0x8,%[width]                   \n"
-    "jg        1b                              \n"
-  : [y_buf]"+r"(y_buf),    // %[y_buf]
-    [uv_buf]"+r"(uv_buf),    // %[uv_buf]
-    [dst_argb]"+r"(dst_argb),  // %[dst_argb]
-    [width]"+rm"(width)    // %[width]
-  : [kYuvConstants]"r"(&kYvuConstants.kUVToB) // %[kYuvConstants]
+  : [yuvconstants]"r"(yuvconstants)  // %[yuvconstants]
  // Does not use r14.
  : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
  );
@ -1669,6 +1650,7 @@ void OMITFP I422ToBGRARow_SSSE3(const uint8* y_buf,
                                const uint8* u_buf,
                                const uint8* v_buf,
                                uint8* dst_bgra,
+                                struct YuvConstants* yuvconstants,
                                int width) {
  asm volatile (
    "sub       %[u_buf],%[v_buf]               \n"
@ -1676,7 +1658,7 @@ void OMITFP I422ToBGRARow_SSSE3(const uint8* y_buf,
    LABELALIGN
  "1:                                          \n"
    READYUV422
-    YUVTORGB(kYuvConstants)
+    YUVTORGB(yuvconstants)
    STOREBGRA
    "sub       $0x8,%[width]                   \n"
    "jg        1b                              \n"
@ -1685,25 +1667,25 @@ void OMITFP I422ToBGRARow_SSSE3(const uint8* y_buf,
    [v_buf]"+r"(v_buf),    // %[v_buf]
    [dst_bgra]"+r"(dst_bgra),  // %[dst_bgra]
    [width]"+rm"(width)    // %[width]
-  : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
+  : [yuvconstants]"r"(yuvconstants)  // %[yuvconstants]
  : "memory", "cc", NACL_R14
    "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
  );
 }

-void OMITFP I422ToABGRMatrixRow_SSSE3(const uint8* y_buf,
-                                      const uint8* u_buf,
-                                      const uint8* v_buf,
-                                      uint8* dst_abgr,
-                                      struct YuvConstants* YuvConstants,
-                                      int width) {
+void OMITFP I422ToABGRRow_SSSE3(const uint8* y_buf,
+                                const uint8* u_buf,
+                                const uint8* v_buf,
+                                uint8* dst_abgr,
+                                struct YuvConstants* yuvconstants,
+                                int width) {
  asm volatile (
    "sub       %[u_buf],%[v_buf]               \n"
    "pcmpeqb   %%xmm5,%%xmm5                   \n"
    LABELALIGN
  "1:                                          \n"
    READYUV422
-    YUVTORGB(kYuvConstants)
+    YUVTORGB(yuvconstants)
    STOREABGR
    "sub       $0x8,%[width]                   \n"
    "jg        1b                              \n"
@ -1712,7 +1694,7 @@ void OMITFP I422ToABGRMatrixRow_SSSE3(const uint8* y_buf,
    [v_buf]"+r"(v_buf),    // %[v_buf]
    [dst_abgr]"+r"(dst_abgr),  // %[dst_abgr]
    [width]"+rm"(width)    // %[width]
-  : [kYuvConstants]"r"(YuvConstants)  // %[kYuvConstants]
+  : [yuvconstants]"r"(yuvconstants)  // %[yuvconstants]
  : "memory", "cc", NACL_R14
    "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
  );
@ -1722,6 +1704,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf,
                                const uint8* u_buf,
                                const uint8* v_buf,
                                uint8* dst_rgba,
+                                struct YuvConstants* yuvconstants,
                                int width) {
  asm volatile (
    "sub       %[u_buf],%[v_buf]               \n"
@ -1729,7 +1712,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf,
    LABELALIGN
  "1:                                          \n"
    READYUV422
-    YUVTORGB(kYuvConstants)
+    YUVTORGB(yuvconstants)
    STORERGBA
    "sub       $0x8,%[width]                   \n"
    "jg        1b                              \n"
@ -1738,7 +1721,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf,
    [v_buf]"+r"(v_buf),    // %[v_buf]
    [dst_rgba]"+r"(dst_rgba),  // %[dst_rgba]
    [width]"+rm"(width)    // %[width]
-  : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
+  : [yuvconstants]"r"(yuvconstants)  // %[yuvconstants]
  : "memory", "cc", NACL_R14
    "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
  );
@ -1788,6 +1771,7 @@ void OMITFP I422ToBGRARow_AVX2(const uint8* y_buf,
                               const uint8* u_buf,
                               const uint8* v_buf,
                               uint8* dst_bgra,
+                               struct YuvConstants* yuvconstants,
                               int width) {
  asm volatile (
    "sub       %[u_buf],%[v_buf]               \n"
@ -1795,7 +1779,7 @@ void OMITFP I422ToBGRARow_AVX2(const uint8* y_buf,
    LABELALIGN
  "1:                                          \n"
    READYUV422_AVX2
-    YUVTORGB_AVX2(kYuvConstants)
+    YUVTORGB_AVX2(yuvconstants)

    // Step 3: Weave into BGRA
    "vpunpcklbw %%ymm0,%%ymm1,%%ymm1           \n"  // GB
@ -1816,29 +1800,29 @@ void OMITFP I422ToBGRARow_AVX2(const uint8* y_buf,
    [v_buf]"+r"(v_buf),    // %[v_buf]
    [dst_bgra]"+r"(dst_bgra),  // %[dst_bgra]
    [width]"+rm"(width)    // %[width]
-  : [kYuvConstants]"r"(&kYuvConstants.kUVToB)  // %[kYuvConstants]
+  : [yuvconstants]"r"(yuvconstants)  // %[yuvconstants]
  : "memory", "cc", NACL_R14
    "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
  );
 }
 #endif  // HAS_I422TOBGRAROW_AVX2

-#if defined(HAS_I422TOARGBMATRIXROW_AVX2)
+#if defined(HAS_I422TOARGBROW_AVX2)
 // 16 pixels
 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
-void OMITFP I422ToARGBMatrixRow_AVX2(const uint8* y_buf,
-                                     const uint8* u_buf,
-                                     const uint8* v_buf,
-                                     uint8* dst_argb,
-                                     struct YuvConstants* YuvConstants,
-                                     int width) {
+void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf,
+                               const uint8* u_buf,
+                               const uint8* v_buf,
+                               uint8* dst_argb,
+                               struct YuvConstants* yuvconstants,
+                               int width) {
  asm volatile (
    "sub       %[u_buf],%[v_buf]               \n"
    "vpcmpeqb   %%ymm5,%%ymm5,%%ymm5           \n"
    LABELALIGN
  "1:                                          \n"
    READYUV422_AVX2
-    YUVTORGB_AVX2(kYuvConstants)
+    YUVTORGB_AVX2(yuvconstants)

    // Step 3: Weave into ARGB
    "vpunpcklbw %%ymm1,%%ymm0,%%ymm0           \n"  // BG
@ -1859,29 +1843,29 @@ void OMITFP I422ToARGBMatrixRow_AVX2(const uint8* y_buf,
    [v_buf]"+r"(v_buf),    // %[v_buf]
    [dst_argb]"+r"(dst_argb),  // %[dst_argb]
    [width]"+rm"(width)    // %[width]
-  : [kYuvConstants]"r"(YuvConstants)  // %[kYuvConstants]
+  : [yuvconstants]"r"(yuvconstants)  // %[yuvconstants]
  : "memory", "cc", NACL_R14
    "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
  );
 }
-#endif  // HAS_I422TOARGBMATRIXROW_AVX2
+#endif  // HAS_I422TOARGBROW_AVX2

 #if defined(HAS_I422TOABGRROW_AVX2)
 // 16 pixels
 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes).
-void OMITFP I422ToABGRMatrixRow_AVX2(const uint8* y_buf,
-                                     const uint8* u_buf,
-                                     const uint8* v_buf,
-                                     uint8* dst_argb,
-                                     struct YuvConstants* YuvConstants,
-                                     int width) {
+void OMITFP I422ToABGRRow_AVX2(const uint8* y_buf,
+                               const uint8* u_buf,
+                               const uint8* v_buf,
+                               uint8* dst_argb,
+                               struct YuvConstants* yuvconstants,
+                               int width) {
  asm volatile (
    "sub       %[u_buf],%[v_buf]               \n"
    "vpcmpeqb   %%ymm5,%%ymm5,%%ymm5           \n"
    LABELALIGN
  "1:                                          \n"
    READYUV422_AVX2
-    YUVTORGB_AVX2(kYuvConstants)
+    YUVTORGB_AVX2(yuvconstants)

    // Step 3: Weave into ABGR
    "vpunpcklbw %%ymm1,%%ymm2,%%ymm1           \n"  // RG
@ -1901,7 +1885,7 @@ void OMITFP I422ToABGRMatrixRow_AVX2(const uint8* y_buf,
    [v_buf]"+r"(v_buf),    // %[v_buf]
    [dst_argb]"+r"(dst_argb),  // %[dst_argb]
    [width]"+rm"(width)    // %[width]
-  : [kYuvConstants]"r"(YuvConstants)  // %[kYuvConstants]
+  : [yuvconstants]"r"(yuvconstants)  // %[yuvconstants]
  : "memory", "cc", NACL_R14
    "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
  );
@ -1915,6 +1899,7 @@ void OMITFP I422ToRGBARow_AVX2(const uint8* y_buf,
                               const uint8* u_buf,
                               const uint8* v_buf,
                               uint8* dst_argb,
+                               struct YuvConstants* yuvconstants,
                               int width) {
  asm volatile (
    "sub       %[u_buf],%[v_buf]               \n"
@ -1922,7 +1907,7 @@ void OMITFP I422ToRGBARow_AVX2(const uint8* y_buf,
    LABELALIGN
  "1:                                          \n"
    READYUV422_AVX2
-    YUVTORGB_AVX2(kYuvConstants)
+    YUVTORGB_AVX2(yuvconstants)

    // Step 3: Weave into RGBA
    "vpunpcklbw %%ymm2,%%ymm1,%%ymm1           \n"
@ -1942,7 +1927,7 @@ void OMITFP I422ToRGBARow_AVX2(const uint8* y_buf,
    [v_buf]"+r"(v_buf),    // %[v_buf]
    [dst_argb]"+r"(dst_argb),  // %[dst_argb]
    [width]"+rm"(width)    // %[width]
-  : [kYuvConstants]"r"(&kYuvConstants.kUVToB)  // %[kYuvConstants]
+  : [yuvconstants]"r"(yuvconstants)  // %[yuvconstants]
  : "memory", "cc", NACL_R14
    "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
  );
--- a/source/row_mips.cc
+++ b/source/row_mips.cc
@ -593,7 +593,7 @@ void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
 // t8 = | 0 | G1 | 0 | g1 |
 // t2 = | 0 | R0 | 0 | r0 |
 // t1 = | 0 | R1 | 0 | r1 |
-#define I422ToTransientMipsRGB                                                 \
+#define YUVTORGB                                                               \
      "lw                $t0, 0(%[y_buf])       \n"                            \
      "lhu               $t1, 0(%[u_buf])       \n"                            \
      "lhu               $t2, 0(%[v_buf])       \n"                            \
@ -652,10 +652,12 @@ void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
      "addu.ph           $t2, $t2, $s5          \n"                            \
      "addu.ph           $t1, $t1, $s5          \n"

+// TODO(fbarchard): accept yuv conversion constants.
 void I422ToARGBRow_MIPS_DSPR2(const uint8* y_buf,
                              const uint8* u_buf,
                              const uint8* v_buf,
                              uint8* rgb_buf,
+                              struct YuvConstants* yuvconstants,
                              int width) {
  __asm__ __volatile__ (
    ".set push                                \n"
@ -671,7 +673,7 @@ void I422ToARGBRow_MIPS_DSPR2(const uint8* y_buf,
    "ori               $s6, 0xff00            \n"  // |ff|00|ff|00|ff|

   "1:                                        \n"
-      I422ToTransientMipsRGB
+      YUVTORGB
 // Arranging into argb format
    "precr.qb.ph       $t4, $t8, $t4          \n"  // |G1|g1|B1|b1|
    "precr.qb.ph       $t5, $t9, $t5          \n"  // |G0|g0|B0|b0|
@ -717,6 +719,7 @@ void I422ToABGRRow_MIPS_DSPR2(const uint8* y_buf,
                              const uint8* u_buf,
                              const uint8* v_buf,
                              uint8* rgb_buf,
+                              struct YuvConstants* yuvconstants,
                              int width) {
  __asm__ __volatile__ (
    ".set push                                \n"
@ -732,7 +735,7 @@ void I422ToABGRRow_MIPS_DSPR2(const uint8* y_buf,
    "ori               $s6, 0xff00            \n"  // |ff|00|ff|00|

   "1:                                         \n"
-      I422ToTransientMipsRGB
+      YUVTORGB
 // Arranging into abgr format
    "precr.qb.ph      $t0, $t8, $t1           \n"  // |G1|g1|R1|r1|
    "precr.qb.ph      $t3, $t9, $t2           \n"  // |G0|g0|R0|r0|
@ -778,6 +781,7 @@ void I422ToBGRARow_MIPS_DSPR2(const uint8* y_buf,
                              const uint8* u_buf,
                              const uint8* v_buf,
                              uint8* rgb_buf,
+                              struct YuvConstants* yuvconstants,
                              int width) {
  __asm__ __volatile__ (
    ".set push                                \n"
@ -793,7 +797,7 @@ void I422ToBGRARow_MIPS_DSPR2(const uint8* y_buf,
    "ori               $s6, 0xff              \n"  // |00|ff|00|ff|

   "1:                                        \n"
-      I422ToTransientMipsRGB
+      YUVTORGB
      // Arranging into bgra format
    "precr.qb.ph       $t4, $t4, $t8          \n"  // |B1|b1|G1|g1|
    "precr.qb.ph       $t5, $t5, $t9          \n"  // |B0|b0|G0|g0|
--- a/source/row_neon.cc
+++ b/source/row_neon.cc
@ -93,7 +93,7 @@ extern "C" {
    "vuzp.u8    d2, d3                         \n"                             \
    "vtrn.u32   d2, d3                         \n"

-#define YUV422TORGB_SETUP_REG                                                  \
+#define YUVTORGB_SETUP                                                         \
    MEMACCESS([kUVToRB])                                                       \
    "vld1.8     {d24}, [%[kUVToRB]]            \n"                             \
    MEMACCESS([kUVToG])                                                        \
@ -107,7 +107,7 @@ extern "C" {
    MEMACCESS([kYToRgb])                                                       \
    "vld1.32    {d30[], d31[]}, [%[kYToRgb]]     \n"

-#define YUV422TORGB                                                            \
+#define YUVTORGB                                                               \
    "vmull.u8   q8, d2, d24                    \n" /* u/v B/R component      */\
    "vmull.u8   q9, d2, d25                    \n" /* u/v G component        */\
    "vmovl.u8   q0, d0                         \n" /* Y                      */\
@ -138,12 +138,13 @@ void I444ToARGBRow_NEON(const uint8* src_y,
                        const uint8* src_u,
                        const uint8* src_v,
                        uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
                        int width) {
  asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
  "1:                                          \n"
    READYUV444
-    YUV422TORGB
+    YUVTORGB
    "subs       %4, %4, #8                     \n"
    "vmov.u8    d23, #255                      \n"
    MEMACCESS(3)
@ -154,26 +155,26 @@ void I444ToARGBRow_NEON(const uint8* src_y,
      "+r"(src_v),     // %2
      "+r"(dst_argb),  // %3
      "+r"(width)      // %4
-    : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB),   // %5
-      [kUVToG]"r"(&kYuvConstantsNEON.kUVToG),     // %6
-      [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+      [kUVToG]"r"(&yuvconstants->kUVToG),
+      [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+      [kYToRgb]"r"(&yuvconstants->kYToRgb)
    : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
  );
 }

-void I422ToARGBMatrixRow_NEON(const uint8* src_y,
-                              const uint8* src_u,
-                              const uint8* src_v,
-                              uint8* dst_argb,
-                              struct YuvConstantsNEON* YuvConstants,
-                              int width) {
+void I422ToARGBRow_NEON(const uint8* src_y,
+                        const uint8* src_u,
+                        const uint8* src_v,
+                        uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
+                        int width) {
  asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
  "1:                                          \n"
    READYUV422
-    YUV422TORGB
+    YUVTORGB
    "subs       %4, %4, #8                     \n"
    "vmov.u8    d23, #255                      \n"
    MEMACCESS(3)
@ -184,10 +185,10 @@ void I422ToARGBMatrixRow_NEON(const uint8* src_y,
      "+r"(src_v),     // %2
      "+r"(dst_argb),  // %3
      "+r"(width)      // %4
-    : [kUVToRB]"r"(&YuvConstants->kUVToRB),   // %5
-      [kUVToG]"r"(&YuvConstants->kUVToG),     // %6
-      [kUVBiasBGR]"r"(&YuvConstants->kUVBiasBGR),
-      [kYToRgb]"r"(&YuvConstants->kYToRgb)
+    : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+      [kUVToG]"r"(&yuvconstants->kUVToG),
+      [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+      [kYToRgb]"r"(&yuvconstants->kYToRgb)
    : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
  );
@ -197,12 +198,13 @@ void I411ToARGBRow_NEON(const uint8* src_y,
                        const uint8* src_u,
                        const uint8* src_v,
                        uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
                        int width) {
  asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
  "1:                                          \n"
    READYUV411
-    YUV422TORGB
+    YUVTORGB
    "subs       %4, %4, #8                     \n"
    "vmov.u8    d23, #255                      \n"
    MEMACCESS(3)
@ -213,10 +215,10 @@ void I411ToARGBRow_NEON(const uint8* src_y,
      "+r"(src_v),     // %2
      "+r"(dst_argb),  // %3
      "+r"(width)      // %4
-    : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB),   // %5
-      [kUVToG]"r"(&kYuvConstantsNEON.kUVToG),     // %6
-      [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+      [kUVToG]"r"(&yuvconstants->kUVToG),
+      [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+      [kYToRgb]"r"(&yuvconstants->kYToRgb)
    : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
  );
@ -226,12 +228,13 @@ void I422ToBGRARow_NEON(const uint8* src_y,
                        const uint8* src_u,
                        const uint8* src_v,
                        uint8* dst_bgra,
+                        struct YuvConstants* yuvconstants,
                        int width) {
  asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
  "1:                                          \n"
    READYUV422
-    YUV422TORGB
+    YUVTORGB
    "subs       %4, %4, #8                     \n"
    "vswp.u8    d20, d22                       \n"
    "vmov.u8    d19, #255                      \n"
@ -243,26 +246,26 @@ void I422ToBGRARow_NEON(const uint8* src_y,
      "+r"(src_v),     // %2
      "+r"(dst_bgra),  // %3
      "+r"(width)      // %4
-    : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB),   // %5
-      [kUVToG]"r"(&kYuvConstantsNEON.kUVToG),     // %6
-      [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+      [kUVToG]"r"(&yuvconstants->kUVToG),
+      [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+      [kYToRgb]"r"(&yuvconstants->kYToRgb)
    : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
  );
 }

-void I422ToABGRMatrixRow_NEON(const uint8* src_y,
-                              const uint8* src_u,
-                              const uint8* src_v,
-                              uint8* dst_abgr,
-                              struct YuvConstantsNEON* YuvConstants,
-                              int width) {
+void I422ToABGRRow_NEON(const uint8* src_y,
+                        const uint8* src_u,
+                        const uint8* src_v,
+                        uint8* dst_abgr,
+                        struct YuvConstants* yuvconstants,
+                        int width) {
  asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
  "1:                                          \n"
    READYUV422
-    YUV422TORGB
+    YUVTORGB
    "subs       %4, %4, #8                     \n"
    "vswp.u8    d20, d22                       \n"
    "vmov.u8    d23, #255                      \n"
@ -274,10 +277,10 @@ void I422ToABGRMatrixRow_NEON(const uint8* src_y,
      "+r"(src_v),     // %2
      "+r"(dst_abgr),  // %3
      "+r"(width)      // %4
-    : [kUVToRB]"r"(&YuvConstants->kUVToRB),   // %5
-      [kUVToG]"r"(&YuvConstants->kUVToG),     // %6
-      [kUVBiasBGR]"r"(&YuvConstants->kUVBiasBGR),
-      [kYToRgb]"r"(&YuvConstants->kYToRgb)
+    : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+      [kUVToG]"r"(&yuvconstants->kUVToG),
+      [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+      [kYToRgb]"r"(&yuvconstants->kYToRgb)
    : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
  );
@ -287,12 +290,13 @@ void I422ToRGBARow_NEON(const uint8* src_y,
                        const uint8* src_u,
                        const uint8* src_v,
                        uint8* dst_rgba,
+                        struct YuvConstants* yuvconstants,
                        int width) {
  asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
  "1:                                          \n"
    READYUV422
-    YUV422TORGB
+    YUVTORGB
    "subs       %4, %4, #8                     \n"
    "vmov.u8    d19, #255                      \n"
    MEMACCESS(3)
@ -303,10 +307,10 @@ void I422ToRGBARow_NEON(const uint8* src_y,
      "+r"(src_v),     // %2
      "+r"(dst_rgba),  // %3
      "+r"(width)      // %4
-    : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB),   // %5
-      [kUVToG]"r"(&kYuvConstantsNEON.kUVToG),     // %6
-      [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+      [kUVToG]"r"(&yuvconstants->kUVToG),
+      [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+      [kYToRgb]"r"(&yuvconstants->kYToRgb)
    : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
  );
@ -316,12 +320,13 @@ void I422ToRGB24Row_NEON(const uint8* src_y,
                         const uint8* src_u,
                         const uint8* src_v,
                         uint8* dst_rgb24,
+                         struct YuvConstants* yuvconstants,
                         int width) {
  asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
  "1:                                          \n"
    READYUV422
-    YUV422TORGB
+    YUVTORGB
    "subs       %4, %4, #8                     \n"
    MEMACCESS(3)
    "vst3.8     {d20, d21, d22}, [%3]!         \n"
@ -331,10 +336,10 @@ void I422ToRGB24Row_NEON(const uint8* src_y,
      "+r"(src_v),      // %2
      "+r"(dst_rgb24),  // %3
      "+r"(width)       // %4
-    : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB),   // %5
-      [kUVToG]"r"(&kYuvConstantsNEON.kUVToG),     // %6
-      [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+      [kUVToG]"r"(&yuvconstants->kUVToG),
+      [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+      [kYToRgb]"r"(&yuvconstants->kYToRgb)
    : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
  );
@ -344,12 +349,13 @@ void I422ToRAWRow_NEON(const uint8* src_y,
                       const uint8* src_u,
                       const uint8* src_v,
                       uint8* dst_raw,
+                       struct YuvConstants* yuvconstants,
                       int width) {
  asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
  "1:                                          \n"
    READYUV422
-    YUV422TORGB
+    YUVTORGB
    "subs       %4, %4, #8                     \n"
    "vswp.u8    d20, d22                       \n"
    MEMACCESS(3)
@ -360,10 +366,10 @@ void I422ToRAWRow_NEON(const uint8* src_y,
      "+r"(src_v),    // %2
      "+r"(dst_raw),  // %3
      "+r"(width)     // %4
-    : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB),   // %5
-      [kUVToG]"r"(&kYuvConstantsNEON.kUVToG),     // %6
-      [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+      [kUVToG]"r"(&yuvconstants->kUVToG),
+      [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+      [kYToRgb]"r"(&yuvconstants->kYToRgb)
    : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
  );
@ -385,12 +391,13 @@ void I422ToRGB565Row_NEON(const uint8* src_y,
                          const uint8* src_u,
                          const uint8* src_v,
                          uint8* dst_rgb565,
+                          struct YuvConstants* yuvconstants,
                          int width) {
  asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
  "1:                                          \n"
    READYUV422
-    YUV422TORGB
+    YUVTORGB
    "subs       %4, %4, #8                     \n"
    ARGBTORGB565
    MEMACCESS(3)
@ -401,10 +408,10 @@ void I422ToRGB565Row_NEON(const uint8* src_y,
      "+r"(src_v),    // %2
      "+r"(dst_rgb565),  // %3
      "+r"(width)     // %4
-    : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB),   // %5
-      [kUVToG]"r"(&kYuvConstantsNEON.kUVToG),     // %6
-      [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+      [kUVToG]"r"(&yuvconstants->kUVToG),
+      [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+      [kYToRgb]"r"(&yuvconstants->kYToRgb)
    : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
  );
@ -429,12 +436,13 @@ void I422ToARGB1555Row_NEON(const uint8* src_y,
                            const uint8* src_u,
                            const uint8* src_v,
                            uint8* dst_argb1555,
+                            struct YuvConstants* yuvconstants,
                            int width) {
  asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
  "1:                                          \n"
    READYUV422
-    YUV422TORGB
+    YUVTORGB
    "subs       %4, %4, #8                     \n"
    "vmov.u8    d23, #255                      \n"
    ARGBTOARGB1555
@ -446,10 +454,10 @@ void I422ToARGB1555Row_NEON(const uint8* src_y,
      "+r"(src_v),    // %2
      "+r"(dst_argb1555),  // %3
      "+r"(width)     // %4
-    : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB),   // %5
-      [kUVToG]"r"(&kYuvConstantsNEON.kUVToG),     // %6
-      [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+      [kUVToG]"r"(&yuvconstants->kUVToG),
+      [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+      [kYToRgb]"r"(&yuvconstants->kYToRgb)
    : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
  );
@ -468,13 +476,14 @@ void I422ToARGB4444Row_NEON(const uint8* src_y,
                            const uint8* src_u,
                            const uint8* src_v,
                            uint8* dst_argb4444,
+                            struct YuvConstants* yuvconstants,
                            int width) {
  asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
    "vmov.u8    d4, #0x0f                      \n"  // bits to clear with vbic.
  "1:                                          \n"
    READYUV422
-    YUV422TORGB
+    YUVTORGB
    "subs       %4, %4, #8                     \n"
    "vmov.u8    d23, #255                      \n"
    ARGBTOARGB4444
@ -486,10 +495,10 @@ void I422ToARGB4444Row_NEON(const uint8* src_y,
      "+r"(src_v),    // %2
      "+r"(dst_argb4444),  // %3
      "+r"(width)     // %4
-    : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB),   // %5
-      [kUVToG]"r"(&kYuvConstantsNEON.kUVToG),     // %6
-      [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+      [kUVToG]"r"(&yuvconstants->kUVToG),
+      [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+      [kYToRgb]"r"(&yuvconstants->kYToRgb)
    : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
  );
@ -499,10 +508,10 @@ void I400ToARGBRow_NEON(const uint8* src_y,
                        uint8* dst_argb,
                        int width) {
  asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
  "1:                                          \n"
    READYUV400
-    YUV422TORGB
+    YUVTORGB
    "subs       %2, %2, #8                     \n"
    "vmov.u8    d23, #255                      \n"
    MEMACCESS(1)
@ -511,10 +520,10 @@ void I400ToARGBRow_NEON(const uint8* src_y,
    : "+r"(src_y),     // %0
      "+r"(dst_argb),  // %1
      "+r"(width)      // %2
-    : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB),   // %3
-      [kUVToG]"r"(&kYuvConstantsNEON.kUVToG),     // %4
-      [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVToRB]"r"(&kYuvConstants.kUVToRB),
+      [kUVToG]"r"(&kYuvConstants.kUVToG),
+      [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
+      [kYToRgb]"r"(&kYuvConstants.kYToRgb)
    : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
  );
@ -545,12 +554,13 @@ void J400ToARGBRow_NEON(const uint8* src_y,
 void NV12ToARGBRow_NEON(const uint8* src_y,
                        const uint8* src_uv,
                        uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
                        int width) {
  asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
  "1:                                          \n"
    READNV12
-    YUV422TORGB
+    YUVTORGB
    "subs       %3, %3, #8                     \n"
    "vmov.u8    d23, #255                      \n"
    MEMACCESS(2)
@ -560,37 +570,10 @@ void NV12ToARGBRow_NEON(const uint8* src_y,
      "+r"(src_uv),    // %1
      "+r"(dst_argb),  // %2
      "+r"(width)      // %3
-    : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB),   // %4
-      [kUVToG]"r"(&kYuvConstantsNEON.kUVToG),     // %5
-      [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
-    : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
-      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
-  );
-}
-
-void NV21ToARGBRow_NEON(const uint8* src_y,
-                        const uint8* src_uv,
-                        uint8* dst_argb,
-                        int width) {
-  asm volatile (
-    YUV422TORGB_SETUP_REG
-  "1:                                          \n"
-    READNV21
-    YUV422TORGB
-    "subs       %3, %3, #8                     \n"
-    "vmov.u8    d23, #255                      \n"
-    MEMACCESS(2)
-    "vst4.8     {d20, d21, d22, d23}, [%2]!    \n"
-    "bgt        1b                             \n"
-    : "+r"(src_y),     // %0
-      "+r"(src_uv),    // %1
-      "+r"(dst_argb),  // %2
-      "+r"(width)      // %3
-    : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB),   // %4
-      [kUVToG]"r"(&kYuvConstantsNEON.kUVToG),     // %5
-      [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+      [kUVToG]"r"(&yuvconstants->kUVToG),
+      [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+      [kYToRgb]"r"(&yuvconstants->kYToRgb)
    : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
  );
@ -599,12 +582,13 @@ void NV21ToARGBRow_NEON(const uint8* src_y,
 void NV12ToRGB565Row_NEON(const uint8* src_y,
                          const uint8* src_uv,
                          uint8* dst_rgb565,
+                          struct YuvConstants* yuvconstants,
                          int width) {
  asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
  "1:                                          \n"
    READNV12
-    YUV422TORGB
+    YUVTORGB
    "subs       %3, %3, #8                     \n"
    ARGBTORGB565
    MEMACCESS(2)
@ -614,37 +598,10 @@ void NV12ToRGB565Row_NEON(const uint8* src_y,
      "+r"(src_uv),    // %1
      "+r"(dst_rgb565),  // %2
      "+r"(width)      // %3
-    : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB),   // %4
-      [kUVToG]"r"(&kYuvConstantsNEON.kUVToG),     // %5
-      [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
-    : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
-      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
-  );
-}
-
-void NV21ToRGB565Row_NEON(const uint8* src_y,
-                          const uint8* src_uv,
-                          uint8* dst_rgb565,
-                          int width) {
-  asm volatile (
-    YUV422TORGB_SETUP_REG
-  "1:                                          \n"
-    READNV21
-    YUV422TORGB
-    "subs       %3, %3, #8                     \n"
-    ARGBTORGB565
-    MEMACCESS(2)
-    "vst1.8     {q0}, [%2]!                    \n"  // store 8 pixels RGB565.
-    "bgt        1b                             \n"
-    : "+r"(src_y),     // %0
-      "+r"(src_uv),    // %1
-      "+r"(dst_rgb565),  // %2
-      "+r"(width)      // %3
-    : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB),   // %4
-      [kUVToG]"r"(&kYuvConstantsNEON.kUVToG),     // %5
-      [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+      [kUVToG]"r"(&yuvconstants->kUVToG),
+      [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+      [kYToRgb]"r"(&yuvconstants->kYToRgb)
    : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
  );
@ -652,12 +609,13 @@ void NV21ToRGB565Row_NEON(const uint8* src_y,

 void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
                        uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
                        int width) {
  asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
  "1:                                          \n"
    READYUY2
-    YUV422TORGB
+    YUVTORGB
    "subs       %2, %2, #8                     \n"
    "vmov.u8    d23, #255                      \n"
    MEMACCESS(1)
@ -666,10 +624,10 @@ void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
    : "+r"(src_yuy2),  // %0
      "+r"(dst_argb),  // %1
      "+r"(width)      // %2
-    : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB),   // %3
-      [kUVToG]"r"(&kYuvConstantsNEON.kUVToG),     // %4
-      [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+      [kUVToG]"r"(&yuvconstants->kUVToG),
+      [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+      [kYToRgb]"r"(&yuvconstants->kYToRgb)
    : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
  );
@ -677,12 +635,13 @@ void YUY2ToARGBRow_NEON(const uint8* src_yuy2,

 void UYVYToARGBRow_NEON(const uint8* src_uyvy,
                        uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
                        int width) {
  asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
  "1:                                          \n"
    READUYVY
-    YUV422TORGB
+    YUVTORGB
    "subs       %2, %2, #8                     \n"
    "vmov.u8    d23, #255                      \n"
    MEMACCESS(1)
@ -691,10 +650,10 @@ void UYVYToARGBRow_NEON(const uint8* src_uyvy,
    : "+r"(src_uyvy),  // %0
      "+r"(dst_argb),  // %1
      "+r"(width)      // %2
-    : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB),   // %3
-      [kUVToG]"r"(&kYuvConstantsNEON.kUVToG),     // %4
-      [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+      [kUVToG]"r"(&yuvconstants->kUVToG),
+      [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+      [kYToRgb]"r"(&yuvconstants->kYToRgb)
    : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
  );
--- a/source/row_neon64.cc
+++ b/source/row_neon64.cc
@ -91,7 +91,8 @@ extern "C" {
    "uzp2       v3.8b, v2.8b, v2.8b            \n"                             \
    "ins        v1.s[1], v3.s[0]               \n"

-#define YUV422TORGB_SETUP_REG                                                  \
+// TODO(fbarchard): replace movi with constants from struct.
+#define YUVTORGB_SETUP                                                         \
    "ld1r       {v24.8h}, [%[kUVBiasBGR]], #2  \n"                             \
    "ld1r       {v25.8h}, [%[kUVBiasBGR]], #2  \n"                             \
    "ld1r       {v26.8h}, [%[kUVBiasBGR]]      \n"                             \
@ -101,7 +102,7 @@ extern "C" {
    "movi       v29.8h, #25                    \n"                             \
    "movi       v30.8h, #52                    \n"

-#define YUV422TORGB(vR, vG, vB)                                                \
+#define YUVTORGB(vR, vG, vB)                                                   \
    "uxtl       v0.8h, v0.8b                   \n" /* Extract Y    */          \
    "shll       v2.8h, v1.8b, #8               \n" /* Replicate UV */          \
    "ushll2     v3.4s, v0.8h, #0               \n" /* Y */                     \
@ -143,12 +144,13 @@ void I444ToARGBRow_NEON(const uint8* src_y,
                        const uint8* src_u,
                        const uint8* src_v,
                        uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
                        int width) {
  asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
  "1:                                          \n"
    READYUV444
-    YUV422TORGB(v22, v21, v20)
+    YUVTORGB(v22, v21, v20)
    "subs       %w4, %w4, #8                   \n"
    "movi       v23.8b, #255                   \n" /* A */
    MEMACCESS(3)
@ -159,8 +161,8 @@ void I444ToARGBRow_NEON(const uint8* src_y,
      "+r"(src_v),     // %2
      "+r"(dst_argb),  // %3
      "+r"(width)      // %4
-    : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+      [kYToRgb]"r"(&yuvconstants->kYToRgb)
    : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
      "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
  );
@ -173,12 +175,13 @@ void I422ToARGBRow_NEON(const uint8* src_y,
                        const uint8* src_u,
                        const uint8* src_v,
                        uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
                        int width) {
  asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
  "1:                                          \n"
    READYUV422
-    YUV422TORGB(v22, v21, v20)
+    YUVTORGB(v22, v21, v20)
    "subs       %w4, %w4, #8                   \n"
    "movi       v23.8b, #255                   \n" /* A */
    MEMACCESS(3)
@ -189,8 +192,8 @@ void I422ToARGBRow_NEON(const uint8* src_y,
      "+r"(src_v),     // %2
      "+r"(dst_argb),  // %3
      "+r"(width)      // %4
-    : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
+      [kYToRgb]"r"(&kYuvConstants.kYToRgb)
    : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
      "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
  );
@ -202,12 +205,13 @@ void I411ToARGBRow_NEON(const uint8* src_y,
                        const uint8* src_u,
                        const uint8* src_v,
                        uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
                        int width) {
  asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
  "1:                                          \n"
    READYUV411
-    YUV422TORGB(v22, v21, v20)
+    YUVTORGB(v22, v21, v20)
    "subs       %w4, %w4, #8                   \n"
    "movi       v23.8b, #255                   \n" /* A */
    MEMACCESS(3)
@ -218,8 +222,8 @@ void I411ToARGBRow_NEON(const uint8* src_y,
      "+r"(src_v),     // %2
      "+r"(dst_argb),  // %3
      "+r"(width)      // %4
-    : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
+      [kYToRgb]"r"(&kYuvConstants.kYToRgb)
    : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
      "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
  );
@ -231,12 +235,13 @@ void I422ToBGRARow_NEON(const uint8* src_y,
                        const uint8* src_u,
                        const uint8* src_v,
                        uint8* dst_bgra,
+                        struct YuvConstants* yuvconstants,
                        int width) {
  asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
  "1:                                          \n"
    READYUV422
-    YUV422TORGB(v21, v22, v23)
+    YUVTORGB(v21, v22, v23)
    "subs       %w4, %w4, #8                   \n"
    "movi       v20.8b, #255                   \n" /* A */
    MEMACCESS(3)
@ -247,8 +252,8 @@ void I422ToBGRARow_NEON(const uint8* src_y,
      "+r"(src_v),     // %2
      "+r"(dst_bgra),  // %3
      "+r"(width)      // %4
-    : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
+      [kYToRgb]"r"(&kYuvConstants.kYToRgb)
    : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
      "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
  );
@ -261,12 +266,13 @@ void I422ToABGRRow_NEON(const uint8* src_y,
                        const uint8* src_u,
                        const uint8* src_v,
                        uint8* dst_abgr,
+                        struct YuvConstants* yuvconstants,
                        int width) {
  asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
  "1:                                          \n"
    READYUV422
-    YUV422TORGB(v20, v21, v22)
+    YUVTORGB(v20, v21, v22)
    "subs       %w4, %w4, #8                   \n"
    "movi       v23.8b, #255                   \n" /* A */
    MEMACCESS(3)
@ -277,8 +283,8 @@ void I422ToABGRRow_NEON(const uint8* src_y,
      "+r"(src_v),     // %2
      "+r"(dst_abgr),  // %3
      "+r"(width)      // %4
-    : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
+      [kYToRgb]"r"(&kYuvConstants.kYToRgb)
    : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
      "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
  );
@ -290,12 +296,13 @@ void I422ToRGBARow_NEON(const uint8* src_y,
                        const uint8* src_u,
                        const uint8* src_v,
                        uint8* dst_rgba,
+                        struct YuvConstants* yuvconstants,
                        int width) {
  asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
  "1:                                          \n"
    READYUV422
-    YUV422TORGB(v23, v22, v21)
+    YUVTORGB(v23, v22, v21)
    "subs       %w4, %w4, #8                   \n"
    "movi       v20.8b, #255                   \n" /* A */
    MEMACCESS(3)
@ -306,8 +313,8 @@ void I422ToRGBARow_NEON(const uint8* src_y,
      "+r"(src_v),     // %2
      "+r"(dst_rgba),  // %3
      "+r"(width)      // %4
-    : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
+      [kYToRgb]"r"(&kYuvConstants.kYToRgb)
    : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
      "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
  );
@ -319,12 +326,13 @@ void I422ToRGB24Row_NEON(const uint8* src_y,
                         const uint8* src_u,
                         const uint8* src_v,
                         uint8* dst_rgb24,
+                         struct YuvConstants* yuvconstants,
                         int width) {
  asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
  "1:                                          \n"
    READYUV422
-    YUV422TORGB(v22, v21, v20)
+    YUVTORGB(v22, v21, v20)
    "subs       %w4, %w4, #8                   \n"
    MEMACCESS(3)
    "st3        {v20.8b,v21.8b,v22.8b}, [%3], #24     \n"
@ -334,8 +342,8 @@ void I422ToRGB24Row_NEON(const uint8* src_y,
      "+r"(src_v),     // %2
      "+r"(dst_rgb24), // %3
      "+r"(width)      // %4
-    : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
+      [kYToRgb]"r"(&kYuvConstants.kYToRgb)
    : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
      "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
  );
@ -347,12 +355,13 @@ void I422ToRAWRow_NEON(const uint8* src_y,
                       const uint8* src_u,
                       const uint8* src_v,
                       uint8* dst_raw,
+                       struct YuvConstants* yuvconstants,
                       int width) {
  asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
  "1:                                          \n"
    READYUV422
-    YUV422TORGB(v20, v21, v22)
+    YUVTORGB(v20, v21, v22)
    "subs       %w4, %w4, #8                   \n"
    MEMACCESS(3)
    "st3        {v20.8b,v21.8b,v22.8b}, [%3], #24     \n"
@ -362,8 +371,8 @@ void I422ToRAWRow_NEON(const uint8* src_y,
      "+r"(src_v),     // %2
      "+r"(dst_raw),   // %3
      "+r"(width)      // %4
-    : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
+      [kYToRgb]"r"(&kYuvConstants.kYToRgb)
    : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
      "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
  );
@ -382,12 +391,13 @@ void I422ToRGB565Row_NEON(const uint8* src_y,
                          const uint8* src_u,
                          const uint8* src_v,
                          uint8* dst_rgb565,
+                          struct YuvConstants* yuvconstants,
                          int width) {
  asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
  "1:                                          \n"
    READYUV422
-    YUV422TORGB(v22, v21, v20)
+    YUVTORGB(v22, v21, v20)
    "subs       %w4, %w4, #8                   \n"
    ARGBTORGB565
    MEMACCESS(3)
@ -398,8 +408,8 @@ void I422ToRGB565Row_NEON(const uint8* src_y,
      "+r"(src_v),    // %2
      "+r"(dst_rgb565),  // %3
      "+r"(width)     // %4
-    : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
+      [kYToRgb]"r"(&kYuvConstants.kYToRgb)
    : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
      "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
  );
@ -420,12 +430,13 @@ void I422ToARGB1555Row_NEON(const uint8* src_y,
                            const uint8* src_u,
                            const uint8* src_v,
                            uint8* dst_argb1555,
+                            struct YuvConstants* yuvconstants,
                            int width) {
  asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
  "1:                                          \n"
    READYUV422
-    YUV422TORGB(v22, v21, v20)
+    YUVTORGB(v22, v21, v20)
    "subs       %w4, %w4, #8                   \n"
    "movi       v23.8b, #255                   \n"
    ARGBTOARGB1555
@ -437,8 +448,8 @@ void I422ToARGB1555Row_NEON(const uint8* src_y,
      "+r"(src_v),    // %2
      "+r"(dst_argb1555),  // %3
      "+r"(width)     // %4
-    : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
+      [kYToRgb]"r"(&kYuvConstants.kYToRgb)
    : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
      "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
  );
@ -460,13 +471,14 @@ void I422ToARGB4444Row_NEON(const uint8* src_y,
                            const uint8* src_u,
                            const uint8* src_v,
                            uint8* dst_argb4444,
+                            struct YuvConstants* yuvconstants,
                            int width) {
  asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
    "movi       v4.16b, #0x0f                  \n"  // bits to clear with vbic.
  "1:                                          \n"
    READYUV422
-    YUV422TORGB(v22, v21, v20)
+    YUVTORGB(v22, v21, v20)
    "subs       %w4, %w4, #8                   \n"
    "movi       v23.8b, #255                   \n"
    ARGBTOARGB4444
@ -478,8 +490,8 @@ void I422ToARGB4444Row_NEON(const uint8* src_y,
      "+r"(src_v),    // %2
      "+r"(dst_argb4444),  // %3
      "+r"(width)     // %4
-    : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
+      [kYToRgb]"r"(&kYuvConstants.kYToRgb)
    : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
      "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
  );
@ -492,10 +504,10 @@ void I400ToARGBRow_NEON(const uint8* src_y,
                        int width) {
  int64 width64 = (int64)(width);
  asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
  "1:                                          \n"
    READYUV400
-    YUV422TORGB(v22, v21, v20)
+    YUVTORGB(v22, v21, v20)
    "subs       %w2, %w2, #8                   \n"
    "movi       v23.8b, #255                   \n"
    MEMACCESS(1)
@ -504,8 +516,8 @@ void I400ToARGBRow_NEON(const uint8* src_y,
    : "+r"(src_y),     // %0
      "+r"(dst_argb),  // %1
      "+r"(width64)    // %2
-    : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
+      [kYToRgb]"r"(&kYuvConstants.kYToRgb)
    : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
      "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
  );
@ -540,12 +552,13 @@ void J400ToARGBRow_NEON(const uint8* src_y,
 void NV12ToARGBRow_NEON(const uint8* src_y,
                        const uint8* src_uv,
                        uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
                        int width) {
  asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
  "1:                                          \n"
    READNV12
-    YUV422TORGB(v22, v21, v20)
+    YUVTORGB(v22, v21, v20)
    "subs       %w3, %w3, #8                   \n"
    "movi       v23.8b, #255                   \n"
    MEMACCESS(2)
@ -555,51 +568,25 @@ void NV12ToARGBRow_NEON(const uint8* src_y,
      "+r"(src_uv),    // %1
      "+r"(dst_argb),  // %2
      "+r"(width)      // %3
-    : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
+      [kYToRgb]"r"(&kYuvConstants.kYToRgb)
    : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
      "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
  );
 }
 #endif  // HAS_NV12TOARGBROW_NEON

-#ifdef HAS_NV21TOARGBROW_NEON
-void NV21ToARGBRow_NEON(const uint8* src_y,
-                        const uint8* src_uv,
-                        uint8* dst_argb,
-                        int width) {
-  asm volatile (
-    YUV422TORGB_SETUP_REG
-  "1:                                          \n"
-    READNV21
-    YUV422TORGB(v22, v21, v20)
-    "subs       %w3, %w3, #8                   \n"
-    "movi       v23.8b, #255                   \n"
-    MEMACCESS(2)
-    "st4        {v20.8b,v21.8b,v22.8b,v23.8b}, [%2], #32     \n"
-    "b.gt       1b                             \n"
-    : "+r"(src_y),     // %0
-      "+r"(src_uv),    // %1
-      "+r"(dst_argb),  // %2
-      "+r"(width)      // %3
-    : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
-    : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
-      "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
-  );
-}
-#endif  // HAS_NV21TOARGBROW_NEON
-
 #ifdef HAS_NV12TORGB565ROW_NEON
 void NV12ToRGB565Row_NEON(const uint8* src_y,
                          const uint8* src_uv,
                          uint8* dst_rgb565,
+                          struct YuvConstants* yuvconstants,
                          int width) {
  asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
  "1:                                          \n"
    READNV12
-    YUV422TORGB(v22, v21, v20)
+    YUVTORGB(v22, v21, v20)
    "subs       %w3, %w3, #8                   \n"
    ARGBTORGB565
    MEMACCESS(2)
@ -609,51 +596,25 @@ void NV12ToRGB565Row_NEON(const uint8* src_y,
      "+r"(src_uv),    // %1
      "+r"(dst_rgb565),  // %2
      "+r"(width)      // %3
-    : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
+      [kYToRgb]"r"(&kYuvConstants.kYToRgb)
    : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
      "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
  );
 }
 #endif  // HAS_NV12TORGB565ROW_NEON

-#ifdef HAS_NV21TORGB565ROW_NEON
-void NV21ToRGB565Row_NEON(const uint8* src_y,
-                          const uint8* src_uv,
-                          uint8* dst_rgb565,
-                          int width) {
-  asm volatile (
-    YUV422TORGB_SETUP_REG
-  "1:                                          \n"
-    READNV21
-    YUV422TORGB(v22, v21, v20)
-    "subs       %w3, %w3, #8                   \n"
-    ARGBTORGB565
-    MEMACCESS(2)
-    "st1        {v0.8h}, [%2], 16              \n"  // store 8 pixels RGB565.
-    "b.gt       1b                             \n"
-    : "+r"(src_y),     // %0
-      "+r"(src_uv),    // %1
-      "+r"(dst_rgb565),  // %2
-      "+r"(width)      // %3
-    : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
-    : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
-      "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
-  );
-}
-#endif  // HAS_NV21TORGB565ROW_NEON
-
 #ifdef HAS_YUY2TOARGBROW_NEON
 void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
                        uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
                        int width) {
  int64 width64 = (int64)(width);
  asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
  "1:                                          \n"
    READYUY2
-    YUV422TORGB(v22, v21, v20)
+    YUVTORGB(v22, v21, v20)
    "subs       %w2, %w2, #8                   \n"
    "movi       v23.8b, #255                   \n"
    MEMACCESS(1)
@ -662,8 +623,8 @@ void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
    : "+r"(src_yuy2),  // %0
      "+r"(dst_argb),  // %1
      "+r"(width64)    // %2
-    : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
+      [kYToRgb]"r"(&kYuvConstants.kYToRgb)
    : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
      "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
  );
@ -673,13 +634,14 @@ void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
 #ifdef HAS_UYVYTOARGBROW_NEON
 void UYVYToARGBRow_NEON(const uint8* src_uyvy,
                        uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
                        int width) {
  int64 width64 = (int64)(width);
  asm volatile (
-    YUV422TORGB_SETUP_REG
+    YUVTORGB_SETUP
  "1:                                          \n"
    READUYVY
-    YUV422TORGB(v22, v21, v20)
+    YUVTORGB(v22, v21, v20)
    "subs       %w2, %w2, #8                   \n"
    "movi       v23.8b, #255                   \n"
    MEMACCESS(1)
@ -688,8 +650,8 @@ void UYVYToARGBRow_NEON(const uint8* src_uyvy,
    : "+r"(src_uyvy),  // %0
      "+r"(dst_argb),  // %1
      "+r"(width64)    // %2
-    : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
-      [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+    : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
+      [kYToRgb]"r"(&kYuvConstants.kYToRgb)
    : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
      "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
  );
--- a/source/row_win.cc
+++ b/source/row_win.cc
@ -83,13 +83,13 @@ extern "C" {
    dst_argb += 32;


-#if defined(HAS_I422TOARGBMATRIXROW_SSSE3)
-void I422ToARGBMatrixRow_SSSE3(const uint8* y_buf,
-                               const uint8* u_buf,
-                               const uint8* v_buf,
-                               uint8* dst_argb,
-                               struct YuvConstants* YuvConstants,
-                               int width) {
+#if defined(HAS_I422TOARGBROW_SSSE3)
+void I422ToARGBRow_SSSE3(const uint8* y_buf,
+                         const uint8* u_buf,
+                         const uint8* v_buf,
+                         uint8* dst_argb,
+                         struct YuvConstants* yuvconstants,
+                         int width) {
  __m128i xmm0, xmm1, xmm2, xmm3;
  const __m128i xmm5 = _mm_set1_epi8(-1);
  const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf;
@ -102,13 +102,13 @@ void I422ToARGBMatrixRow_SSSE3(const uint8* y_buf,
 }
 #endif

-#if defined(HAS_I422TOABGRMATRIXROW_SSSE3)
-void I422ToABGRMatrixRow_SSSE3(const uint8* y_buf,
-                               const uint8* u_buf,
-                               const uint8* v_buf,
-                               uint8* dst_argb,
-                               struct YuvConstants* YuvConstants,
-                               int width) {
+#if defined(HAS_I422TOABGRROW_SSSE3)
+void I422ToABGRRow_SSSE3(const uint8* y_buf,
+                         const uint8* u_buf,
+                         const uint8* v_buf,
+                         uint8* dst_argb,
+                         struct YuvConstants* yuvconstants,
+                         int width) {
  __m128i xmm0, xmm1, xmm2, xmm3;
  const __m128i xmm5 = _mm_set1_epi8(-1);
  const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf;
@ -1963,16 +1963,16 @@ void RGBAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
    __asm lea        edx,  [edx + 64]                                          \
  }

-#ifdef HAS_I422TOARGBMATRIXROW_AVX2
+#ifdef HAS_I422TOARGBROW_AVX2
 // 16 pixels
 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
 __declspec(naked)
-void I422ToARGBMatrixRow_AVX2(const uint8* y_buf,
-                              const uint8* u_buf,
-                              const uint8* v_buf,
-                              uint8* dst_argb,
-                              struct YuvConstants* YuvConstants,
-                              int width) {
+void I422ToARGBRow_AVX2(const uint8* y_buf,
+                        const uint8* u_buf,
+                        const uint8* v_buf,
+                        uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
+                        int width) {
  __asm {
    push       esi
    push       edi
@ -2001,18 +2001,18 @@ void I422ToARGBMatrixRow_AVX2(const uint8* y_buf,
    ret
  }
 }
-#endif  // HAS_I422TOARGBMATRIXROW_AVX2
+#endif  // HAS_I422TOARGBROW_AVX2

-#ifdef HAS_I444TOARGBMATRIXROW_AVX2
+#ifdef HAS_I444TOARGBROW_AVX2
 // 16 pixels
 // 16 UV values with 16 Y producing 16 ARGB (64 bytes).
 __declspec(naked)
-void I444ToARGBMatrixRow_AVX2(const uint8* y_buf,
-                              const uint8* u_buf,
-                              const uint8* v_buf,
-                              uint8* dst_argb,
-                              struct YuvConstants* YuvConstants,
-                              int width) {
+void I444ToARGBRow_AVX2(const uint8* y_buf,
+                        const uint8* u_buf,
+                        const uint8* v_buf,
+                        uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
+                        int width) {
  __asm {
    push       esi
    push       edi
@ -2040,18 +2040,18 @@ void I444ToARGBMatrixRow_AVX2(const uint8* y_buf,
    ret
  }
 }
-#endif  // HAS_I444TOARGBMATRIXROW_AVX2
+#endif  // HAS_I444TOARGBROW_AVX2

-#ifdef HAS_I444TOABGRMATRIXROW_AVX2
+#ifdef HAS_I444TOABGRROW_AVX2
 // 16 pixels
 // 16 UV values with 16 Y producing 16 ABGR (64 bytes).
 __declspec(naked)
-void I444ToABGRMatrixRow_AVX2(const uint8* y_buf,
-                              const uint8* u_buf,
-                              const uint8* v_buf,
-                              uint8* dst_abgr,
-                              struct YuvConstants* YuvConstants,
-                              int width) {
+void I444ToABGRRow_AVX2(const uint8* y_buf,
+                        const uint8* u_buf,
+                        const uint8* v_buf,
+                        uint8* dst_abgr,
+                        struct YuvConstants* yuvconstants,
+                        int width) {
  __asm {
    push       esi
    push       edi
@ -2079,7 +2079,7 @@ void I444ToABGRMatrixRow_AVX2(const uint8* y_buf,
    ret
  }
 }
-#endif  // HAS_I444TOABGRMATRIXROW_AVX2
+#endif  // HAS_I444TOABGRROW_AVX2

 #ifdef HAS_I411TOARGBROW_AVX2
 // 16 pixels
@ -2089,26 +2089,30 @@ void I411ToARGBRow_AVX2(const uint8* y_buf,
                        const uint8* u_buf,
                        const uint8* v_buf,
                        uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
                        int width) {
  __asm {
    push       esi
    push       edi
-    mov        eax, [esp + 8 + 4]   // Y
-    mov        esi, [esp + 8 + 8]   // U
-    mov        edi, [esp + 8 + 12]  // V
-    mov        edx, [esp + 8 + 16]  // argb
-    mov        ecx, [esp + 8 + 20]  // width
+    push       ebp
+    mov        eax, [esp + 12 + 4]   // Y
+    mov        esi, [esp + 12 + 8]   // U
+    mov        edi, [esp + 12 + 12]  // V
+    mov        edx, [esp + 12 + 16]  // abgr
+    mov        ebp, [esp + 12 + 20]  // YuvConstants
+    mov        ecx, [esp + 12 + 24]  // width
    sub        edi, esi
    vpcmpeqb   ymm5, ymm5, ymm5     // generate 0xffffffffffffffff for alpha

 convertloop:
    READYUV411_AVX2
-    YUVTORGB_AVX2(kYuvConstants)
+    YUVTORGB_AVX2(ebp)
    STOREARGB_AVX2

    sub        ecx, 16
    jg         convertloop

+    pop        ebp
    pop        edi
    pop        esi
    vzeroupper
@ -2124,23 +2128,27 @@ __declspec(naked)
 void NV12ToARGBRow_AVX2(const uint8* y_buf,
                        const uint8* uv_buf,
                        uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
                        int width) {
  __asm {
    push       esi
-    mov        eax, [esp + 4 + 4]   // Y
-    mov        esi, [esp + 4 + 8]   // UV
-    mov        edx, [esp + 4 + 12]  // argb
-    mov        ecx, [esp + 4 + 16]  // width
+    push       ebp
+    mov        eax, [esp + 8 + 4]   // Y
+    mov        esi, [esp + 8 + 8]   // UV
+    mov        edx, [esp + 8 + 12]  // argb
+    mov        ebp, [esp + 8 + 16]  // YuvConstants
+    mov        ecx, [esp + 8 + 20]  // width
    vpcmpeqb   ymm5, ymm5, ymm5     // generate 0xffffffffffffffff for alpha

 convertloop:
    READNV12_AVX2
-    YUVTORGB_AVX2(kYuvConstants)
+    YUVTORGB_AVX2(ebp)
    STOREARGB_AVX2

    sub        ecx, 16
    jg         convertloop

+    pop        ebp
    pop        esi
    vzeroupper
    ret
@ -2148,37 +2156,6 @@ void NV12ToARGBRow_AVX2(const uint8* y_buf,
 }
 #endif  // HAS_NV12TOARGBROW_AVX2

-#ifdef HAS_NV21TOARGBROW_AVX2
-// 16 pixels.
-// 8 VU values upsampled to 16 VU, mixed with 16 Y producing 16 ARGB (64 bytes).
-__declspec(naked)
-void NV21ToARGBRow_AVX2(const uint8* y_buf,
-                        const uint8* uv_buf,
-                        uint8* dst_argb,
-                        int width) {
-  __asm {
-    push       esi
-    mov        eax, [esp + 4 + 4]   // Y
-    mov        esi, [esp + 4 + 8]   // UV
-    mov        edx, [esp + 4 + 12]  // argb
-    mov        ecx, [esp + 4 + 16]  // width
-    vpcmpeqb   ymm5, ymm5, ymm5     // generate 0xffffffffffffffff for alpha
-
- convertloop:
-    READNV12_AVX2
-    YUVTORGB_AVX2(kYvuConstants)
-    STOREARGB_AVX2
-
-    sub        ecx, 16
-    jg         convertloop
-
-    pop        esi
-    vzeroupper
-    ret
-  }
-}
-#endif  // HAS_NV21TOARGBROW_AVX2
-
 #ifdef HAS_I422TOBGRAROW_AVX2
 // 16 pixels
 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 BGRA (64 bytes).
@ -2188,26 +2165,30 @@ void I422ToBGRARow_AVX2(const uint8* y_buf,
                        const uint8* u_buf,
                        const uint8* v_buf,
                        uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
                        int width) {
  __asm {
    push       esi
    push       edi
-    mov        eax, [esp + 8 + 4]   // Y
-    mov        esi, [esp + 8 + 8]   // U
-    mov        edi, [esp + 8 + 12]  // V
-    mov        edx, [esp + 8 + 16]  // argb
-    mov        ecx, [esp + 8 + 20]  // width
+    push       ebp
+    mov        eax, [esp + 12 + 4]   // Y
+    mov        esi, [esp + 12 + 8]   // U
+    mov        edi, [esp + 12 + 12]  // V
+    mov        edx, [esp + 12 + 16]  // abgr
+    mov        ebp, [esp + 12 + 20]  // YuvConstants
+    mov        ecx, [esp + 12 + 24]  // width
    sub        edi, esi
    vpcmpeqb   ymm5, ymm5, ymm5     // generate 0xffffffffffffffff for alpha

 convertloop:
    READYUV422_AVX2
-    YUVTORGB_AVX2(kYuvConstants)
+    YUVTORGB_AVX2(ebp)
    STOREBGRA_AVX2

    sub        ecx, 16
    jg         convertloop

+    pop        ebp
    pop        edi
    pop        esi
    vzeroupper
@ -2224,26 +2205,30 @@ void I422ToRGBARow_AVX2(const uint8* y_buf,
                        const uint8* u_buf,
                        const uint8* v_buf,
                        uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
                        int width) {
  __asm {
    push       esi
    push       edi
-    mov        eax, [esp + 8 + 4]   // Y
-    mov        esi, [esp + 8 + 8]   // U
-    mov        edi, [esp + 8 + 12]  // V
-    mov        edx, [esp + 8 + 16]  // argb
-    mov        ecx, [esp + 8 + 20]  // width
+    push       ebp
+    mov        eax, [esp + 12 + 4]   // Y
+    mov        esi, [esp + 12 + 8]   // U
+    mov        edi, [esp + 12 + 12]  // V
+    mov        edx, [esp + 12 + 16]  // abgr
+    mov        ebp, [esp + 12 + 20]  // YuvConstants
+    mov        ecx, [esp + 12 + 24]  // width
    sub        edi, esi
    vpcmpeqb   ymm5, ymm5, ymm5     // generate 0xffffffffffffffff for alpha

 convertloop:
    READYUV422_AVX2
-    YUVTORGB_AVX2(kYuvConstants)
+    YUVTORGB_AVX2(ebp)
    STORERGBA_AVX2

    sub        ecx, 16
    jg         convertloop

+    pop        ebp
    pop        edi
    pop        esi
    vzeroupper
@ -2256,12 +2241,12 @@ void I422ToRGBARow_AVX2(const uint8* y_buf,
 // 16 pixels
 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes).
 __declspec(naked)
-void I422ToABGRMatrixRow_AVX2(const uint8* y_buf,
-                              const uint8* u_buf,
-                              const uint8* v_buf,
-                              uint8* dst_argb,
-                              struct YuvConstants* YuvConstants,
-                              int width) {
+void I422ToABGRRow_AVX2(const uint8* y_buf,
+                        const uint8* u_buf,
+                        const uint8* v_buf,
+                        uint8* dst_argb,
+                        struct YuvConstants* yuvconstants,
+                        int width) {
  __asm {
    push       esi
    push       edi
@ -2481,12 +2466,12 @@ void I422ToABGRMatrixRow_AVX2(const uint8* y_buf,
 // 8 pixels.
 // 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes).
 __declspec(naked)
-void I444ToARGBMatrixRow_SSSE3(const uint8* y_buf,
-                               const uint8* u_buf,
-                               const uint8* v_buf,
-                               uint8* dst_argb,
-                               struct YuvConstants* YuvConstants,
-                               int width) {
+void I444ToARGBRow_SSSE3(const uint8* y_buf,
+                         const uint8* u_buf,
+                         const uint8* v_buf,
+                         uint8* dst_argb,
+                         struct YuvConstants* yuvconstants,
+                         int width) {
  __asm {
    push       esi
    push       edi
@ -2518,12 +2503,12 @@ void I444ToARGBMatrixRow_SSSE3(const uint8* y_buf,
 // 8 pixels.
 // 8 UV values, mixed with 8 Y producing 8 ABGR (32 bytes).
 __declspec(naked)
-void I444ToABGRMatrixRow_SSSE3(const uint8* y_buf,
-                               const uint8* u_buf,
-                               const uint8* v_buf,
-                               uint8* dst_abgr,
-                               struct YuvConstants* YuvConstants,
-                               int width) {
+void I444ToABGRRow_SSSE3(const uint8* y_buf,
+                         const uint8* u_buf,
+                         const uint8* v_buf,
+                         uint8* dst_abgr,
+                         struct YuvConstants* yuvconstants,
+                         int width) {
  __asm {
    push       esi
    push       edi
@ -2559,27 +2544,31 @@ void I422ToRGB24Row_SSSE3(const uint8* y_buf,
                          const uint8* u_buf,
                          const uint8* v_buf,
                          uint8* dst_rgb24,
+                          struct YuvConstants* yuvconstants,
                          int width) {
  __asm {
    push       esi
    push       edi
-    mov        eax, [esp + 8 + 4]   // Y
-    mov        esi, [esp + 8 + 8]   // U
-    mov        edi, [esp + 8 + 12]  // V
-    mov        edx, [esp + 8 + 16]  // rgb24
-    mov        ecx, [esp + 8 + 20]  // width
+    push       ebp
+    mov        eax, [esp + 12 + 4]   // Y
+    mov        esi, [esp + 12 + 8]   // U
+    mov        edi, [esp + 12 + 12]  // V
+    mov        edx, [esp + 12 + 16]  // argb
+    mov        ebp, [esp + 12 + 20]  // YuvConstants
+    mov        ecx, [esp + 12 + 24]  // width
    sub        edi, esi
    movdqa     xmm5, xmmword ptr kShuffleMaskARGBToRGB24_0
    movdqa     xmm6, xmmword ptr kShuffleMaskARGBToRGB24

 convertloop:
    READYUV422
-    YUVTORGB(kYuvConstants)
+    YUVTORGB(ebp)
    STORERGB24

    sub        ecx, 8
    jg         convertloop

+    pop        ebp
    pop        edi
    pop        esi
    ret
@ -2593,27 +2582,31 @@ void I422ToRAWRow_SSSE3(const uint8* y_buf,
                        const uint8* u_buf,
                        const uint8* v_buf,
                        uint8* dst_raw,
+                        struct YuvConstants* yuvconstants,
                        int width) {
  __asm {
    push       esi
    push       edi
-    mov        eax, [esp + 8 + 4]   // Y
-    mov        esi, [esp + 8 + 8]   // U
-    mov        edi, [esp + 8 + 12]  // V
-    mov        edx, [esp + 8 + 16]  // raw
-    mov        ecx, [esp + 8 + 20]  // width
+    push       ebp
+    mov        eax, [esp + 12 + 4]   // Y
+    mov        esi, [esp + 12 + 8]   // U
+    mov        edi, [esp + 12 + 12]  // V
+    mov        edx, [esp + 12 + 16]  // argb
+    mov        ebp, [esp + 12 + 20]  // YuvConstants
+    mov        ecx, [esp + 12 + 24]  // width
    sub        edi, esi
    movdqa     xmm5, xmmword ptr kShuffleMaskARGBToRAW_0
    movdqa     xmm6, xmmword ptr kShuffleMaskARGBToRAW

 convertloop:
    READYUV422
-    YUVTORGB(kYuvConstants)
+    YUVTORGB(ebp)
    STORERAW

    sub        ecx, 8
    jg         convertloop

+    pop        ebp
    pop        edi
    pop        esi
    ret
@ -2627,15 +2620,18 @@ void I422ToRGB565Row_SSSE3(const uint8* y_buf,
                           const uint8* u_buf,
                           const uint8* v_buf,
                           uint8* rgb565_buf,
+                           struct YuvConstants* yuvconstants,
                           int width) {
  __asm {
    push       esi
    push       edi
-    mov        eax, [esp + 8 + 4]   // Y
-    mov        esi, [esp + 8 + 8]   // U
-    mov        edi, [esp + 8 + 12]  // V
-    mov        edx, [esp + 8 + 16]  // rgb565
-    mov        ecx, [esp + 8 + 20]  // width
+    push       ebp
+    mov        eax, [esp + 12 + 4]   // Y
+    mov        esi, [esp + 12 + 8]   // U
+    mov        edi, [esp + 12 + 12]  // V
+    mov        edx, [esp + 12 + 16]  // argb
+    mov        ebp, [esp + 12 + 20]  // YuvConstants
+    mov        ecx, [esp + 12 + 24]  // width
    sub        edi, esi
    pcmpeqb    xmm5, xmm5       // generate mask 0x0000001f
    psrld      xmm5, 27
@ -2647,12 +2643,13 @@ void I422ToRGB565Row_SSSE3(const uint8* y_buf,

 convertloop:
    READYUV422
-    YUVTORGB(kYuvConstants)
+    YUVTORGB(ebp)
    STORERGB565

    sub        ecx, 8
    jg         convertloop

+    pop        ebp
    pop        edi
    pop        esi
    ret
@ -2662,12 +2659,12 @@ void I422ToRGB565Row_SSSE3(const uint8* y_buf,
 // 8 pixels.
 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
 __declspec(naked)
-void I422ToARGBMatrixRow_SSSE3(const uint8* y_buf,
-                               const uint8* u_buf,
-                               const uint8* v_buf,
-                               uint8* dst_argb,
-                               struct YuvConstants* YuvConstants,
-                               int width) {
+void I422ToARGBRow_SSSE3(const uint8* y_buf,
+                         const uint8* u_buf,
+                         const uint8* v_buf,
+                         uint8* dst_argb,
+                         struct YuvConstants* yuvconstants,
+                         int width) {
  __asm {
    push       esi
    push       edi
@ -2704,30 +2701,32 @@ void I411ToARGBRow_SSSE3(const uint8* y_buf,
                         const uint8* u_buf,
                         const uint8* v_buf,
                         uint8* dst_argb,
+                         struct YuvConstants* yuvconstants,
                         int width) {
  __asm {
-    push       ebx
    push       esi
    push       edi
+    push       ebp
    mov        eax, [esp + 12 + 4]   // Y
    mov        esi, [esp + 12 + 8]   // U
    mov        edi, [esp + 12 + 12]  // V
-    mov        edx, [esp + 12 + 16]  // argb
-    mov        ecx, [esp + 12 + 20]  // width
+    mov        edx, [esp + 12 + 16]  // abgr
+    mov        ebp, [esp + 12 + 20]  // YuvConstants
+    mov        ecx, [esp + 12 + 24]  // width
    sub        edi, esi
    pcmpeqb    xmm5, xmm5            // generate 0xffffffff for alpha

 convertloop:
-    READYUV411  // modifies EBX
-    YUVTORGB(kYuvConstants)
+    READYUV411
+    YUVTORGB(ebp)
    STOREARGB

    sub        ecx, 8
    jg         convertloop

+    pop        ebp
    pop        edi
    pop        esi
-    pop        ebx
    ret
  }
 }
@ -2738,51 +2737,27 @@ __declspec(naked)
 void NV12ToARGBRow_SSSE3(const uint8* y_buf,
                         const uint8* uv_buf,
                         uint8* dst_argb,
+                         struct YuvConstants* yuvconstants,
                         int width) {
  __asm {
    push       esi
-    mov        eax, [esp + 4 + 4]   // Y
-    mov        esi, [esp + 4 + 8]   // UV
-    mov        edx, [esp + 4 + 12]  // argb
-    mov        ecx, [esp + 4 + 16]  // width
+    push       ebp
+    mov        eax, [esp + 8 + 4]   // Y
+    mov        esi, [esp + 8 + 8]   // UV
+    mov        edx, [esp + 8 + 12]  // argb
+    mov        ebp, [esp + 8 + 16]  // YuvConstants
+    mov        ecx, [esp + 8 + 20]  // width
    pcmpeqb    xmm5, xmm5           // generate 0xffffffff for alpha

 convertloop:
    READNV12
-    YUVTORGB(kYuvConstants)
-    STOREARGB
-
-    sub        ecx, 8
-    jg         convertloop
-
-    pop        esi
-    ret
-  }
-}
-
-// 8 pixels.
-// 4 VU values upsampled to 8 VU, mixed with 8 Y producing 8 ARGB (32 bytes).
-__declspec(naked)
-void NV21ToARGBRow_SSSE3(const uint8* y_buf,
-                         const uint8* uv_buf,
-                         uint8* dst_argb,
-                         int width) {
-  __asm {
-    push       esi
-    mov        eax, [esp + 4 + 4]   // Y
-    mov        esi, [esp + 4 + 8]   // UV
-    mov        edx, [esp + 4 + 12]  // argb
-    mov        ecx, [esp + 4 + 16]  // width
-    pcmpeqb    xmm5, xmm5           // generate 0xffffffff for alpha
-
- convertloop:
-    READNV12
-    YUVTORGB(kYvuConstants)
+    YUVTORGB(ebp)
    STOREARGB

    sub        ecx, 8
    jg         convertloop

+    pop        ebp
    pop        esi
    ret
  }
@ -2793,25 +2768,29 @@ void I422ToBGRARow_SSSE3(const uint8* y_buf,
                         const uint8* u_buf,
                         const uint8* v_buf,
                         uint8* dst_bgra,
+                         struct YuvConstants* yuvconstants,
                         int width) {
  __asm {
    push       esi
    push       edi
-    mov        eax, [esp + 8 + 4]   // Y
-    mov        esi, [esp + 8 + 8]   // U
-    mov        edi, [esp + 8 + 12]  // V
-    mov        edx, [esp + 8 + 16]  // bgra
-    mov        ecx, [esp + 8 + 20]  // width
+    push       ebp
+    mov        eax, [esp + 12 + 4]   // Y
+    mov        esi, [esp + 12 + 8]   // U
+    mov        edi, [esp + 12 + 12]  // V
+    mov        edx, [esp + 12 + 16]  // argb
+    mov        ebp, [esp + 12 + 20]  // YuvConstants
+    mov        ecx, [esp + 12 + 24]  // width
    sub        edi, esi

 convertloop:
    READYUV422
-    YUVTORGB(kYuvConstants)
+    YUVTORGB(ebp)
    STOREBGRA

    sub        ecx, 8
    jg         convertloop

+    pop        ebp
    pop        edi
    pop        esi
    ret
@ -2819,12 +2798,12 @@ void I422ToBGRARow_SSSE3(const uint8* y_buf,
 }

 __declspec(naked)
-void I422ToABGRMatrixRow_SSSE3(const uint8* y_buf,
-                               const uint8* u_buf,
-                               const uint8* v_buf,
-                               uint8* dst_abgr,
-                               struct YuvConstants* YuvConstants,
-                               int width) {
+void I422ToABGRRow_SSSE3(const uint8* y_buf,
+                         const uint8* u_buf,
+                         const uint8* v_buf,
+                         uint8* dst_abgr,
+                         struct YuvConstants* yuvconstants,
+                         int width) {
  __asm {
    push       esi
    push       edi
@ -2858,31 +2837,34 @@ void I422ToRGBARow_SSSE3(const uint8* y_buf,
                         const uint8* u_buf,
                         const uint8* v_buf,
                         uint8* dst_rgba,
+                         struct YuvConstants* yuvconstants,
                         int width) {
  __asm {
    push       esi
    push       edi
-    mov        eax, [esp + 8 + 4]   // Y
-    mov        esi, [esp + 8 + 8]   // U
-    mov        edi, [esp + 8 + 12]  // V
-    mov        edx, [esp + 8 + 16]  // rgba
-    mov        ecx, [esp + 8 + 20]  // width
+    push       ebp
+    mov        eax, [esp + 12 + 4]   // Y
+    mov        esi, [esp + 12 + 8]   // U
+    mov        edi, [esp + 12 + 12]  // V
+    mov        edx, [esp + 12 + 16]  // argb
+    mov        ebp, [esp + 12 + 20]  // YuvConstants
+    mov        ecx, [esp + 12 + 24]  // width
    sub        edi, esi

 convertloop:
    READYUV422
-    YUVTORGB(kYuvConstants)
+    YUVTORGB(ebp)
    STORERGBA

    sub        ecx, 8
    jg         convertloop

+    pop        ebp
    pop        edi
    pop        esi
    ret
  }
 }
-
 #endif  // HAS_I422TOARGBROW_SSSE3

 #ifdef HAS_I400TOARGBROW_SSE2