From 1f129460689f4bbc5c5bee38ea1de3999949e1a0 Mon Sep 17 00:00:00 2001
From: Frank Barchard <fbarchard@google.com>
Date: Tue, 5 Nov 2019 13:13:47 -0800
Subject: [PATCH] Add U444ToABGR, J444ToABGR, H444ToABGR, H444ToARGB and
 ConvertToARGB support

BUG=960620, libyuv:845, b/129864744

Change-Id: I9f80cda3be8e13298c596fac514f65a23a38d3d0
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/1900310
Reviewed-by: Dale Curtis <dalecurtis@chromium.org>
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
---
 README.chromium               |    2 +-
 include/libyuv/convert_argb.h |  340 +++--
 include/libyuv/version.h      |    2 +-
 include/libyuv/video_common.h |   13 +-
 source/convert_argb.cc        |  405 +++--
 source/convert_to_argb.cc     |   66 +-
 source/row_gcc.cc             |  121 +-
 source/row_mmi.cc             | 2665 ++++++++++++++++-----------------
 unit_test/convert_test.cc     |   31 +-
 9 files changed, 1890 insertions(+), 1755 deletions(-)

diff --git a/README.chromium b/README.chromium
index 5046019d4..51a3bb6ac 100644
--- a/README.chromium
+++ b/README.chromium
@@ -1,6 +1,6 @@
 Name: libyuv
 URL: http://code.google.com/p/libyuv/
-Version: 1739
+Version: 1740
 License: BSD
 License File: LICENSE
 
diff --git a/include/libyuv/convert_argb.h b/include/libyuv/convert_argb.h
index 9e12d1127..9eaf49f45 100644
--- a/include/libyuv/convert_argb.h
+++ b/include/libyuv/convert_argb.h
@@ -37,7 +37,6 @@ int ARGBCopy(const uint8_t* src_argb,
              int width,
              int height);
 
-
 // Convert I420 to ARGB.
 LIBYUV_API
 int I420ToARGB(const uint8_t* src_y,
@@ -142,6 +141,214 @@ int U420ToABGR(const uint8_t* src_y,
                int width,
                int height);
 
+// Convert I422 to ARGB.
+LIBYUV_API
+int I422ToARGB(const uint8_t* src_y,
+               int src_stride_y,
+               const uint8_t* src_u,
+               int src_stride_u,
+               const uint8_t* src_v,
+               int src_stride_v,
+               uint8_t* dst_argb,
+               int dst_stride_argb,
+               int width,
+               int height);
+
+// Convert I422 to ABGR.
+LIBYUV_API
+int I422ToABGR(const uint8_t* src_y,
+               int src_stride_y,
+               const uint8_t* src_u,
+               int src_stride_u,
+               const uint8_t* src_v,
+               int src_stride_v,
+               uint8_t* dst_abgr,
+               int dst_stride_abgr,
+               int width,
+               int height);
+
+// Convert J422 to ARGB.
+LIBYUV_API
+int J422ToARGB(const uint8_t* src_y,
+               int src_stride_y,
+               const uint8_t* src_u,
+               int src_stride_u,
+               const uint8_t* src_v,
+               int src_stride_v,
+               uint8_t* dst_argb,
+               int dst_stride_argb,
+               int width,
+               int height);
+
+// Convert J422 to ABGR.
+LIBYUV_API
+int J422ToABGR(const uint8_t* src_y,
+               int src_stride_y,
+               const uint8_t* src_u,
+               int src_stride_u,
+               const uint8_t* src_v,
+               int src_stride_v,
+               uint8_t* dst_abgr,
+               int dst_stride_abgr,
+               int width,
+               int height);
+
+// Convert H422 to ARGB.
+LIBYUV_API
+int H422ToARGB(const uint8_t* src_y,
+               int src_stride_y,
+               const uint8_t* src_u,
+               int src_stride_u,
+               const uint8_t* src_v,
+               int src_stride_v,
+               uint8_t* dst_argb,
+               int dst_stride_argb,
+               int width,
+               int height);
+
+// Convert H422 to ABGR.
+LIBYUV_API
+int H422ToABGR(const uint8_t* src_y,
+               int src_stride_y,
+               const uint8_t* src_u,
+               int src_stride_u,
+               const uint8_t* src_v,
+               int src_stride_v,
+               uint8_t* dst_abgr,
+               int dst_stride_abgr,
+               int width,
+               int height);
+
+// Convert U422 to ARGB.
+LIBYUV_API
+int U422ToARGB(const uint8_t* src_y,
+               int src_stride_y,
+               const uint8_t* src_u,
+               int src_stride_u,
+               const uint8_t* src_v,
+               int src_stride_v,
+               uint8_t* dst_argb,
+               int dst_stride_argb,
+               int width,
+               int height);
+
+// Convert U422 to ABGR.
+LIBYUV_API
+int U422ToABGR(const uint8_t* src_y,
+               int src_stride_y,
+               const uint8_t* src_u,
+               int src_stride_u,
+               const uint8_t* src_v,
+               int src_stride_v,
+               uint8_t* dst_abgr,
+               int dst_stride_abgr,
+               int width,
+               int height);
+
+// Convert I444 to ARGB.
+LIBYUV_API
+int I444ToARGB(const uint8_t* src_y,
+               int src_stride_y,
+               const uint8_t* src_u,
+               int src_stride_u,
+               const uint8_t* src_v,
+               int src_stride_v,
+               uint8_t* dst_argb,
+               int dst_stride_argb,
+               int width,
+               int height);
+
+// Convert I444 to ABGR.
+LIBYUV_API
+int I444ToABGR(const uint8_t* src_y,
+               int src_stride_y,
+               const uint8_t* src_u,
+               int src_stride_u,
+               const uint8_t* src_v,
+               int src_stride_v,
+               uint8_t* dst_abgr,
+               int dst_stride_abgr,
+               int width,
+               int height);
+
+// Convert J444 to ARGB.
+LIBYUV_API
+int J444ToARGB(const uint8_t* src_y,
+               int src_stride_y,
+               const uint8_t* src_u,
+               int src_stride_u,
+               const uint8_t* src_v,
+               int src_stride_v,
+               uint8_t* dst_argb,
+               int dst_stride_argb,
+               int width,
+               int height);
+
+// Convert J444 to ABGR.
+LIBYUV_API
+int J444ToABGR(const uint8_t* src_y,
+               int src_stride_y,
+               const uint8_t* src_u,
+               int src_stride_u,
+               const uint8_t* src_v,
+               int src_stride_v,
+               uint8_t* dst_abgr,
+               int dst_stride_abgr,
+               int width,
+               int height);
+
+// Convert H444 to ARGB.
+LIBYUV_API
+int H444ToARGB(const uint8_t* src_y,
+               int src_stride_y,
+               const uint8_t* src_u,
+               int src_stride_u,
+               const uint8_t* src_v,
+               int src_stride_v,
+               uint8_t* dst_argb,
+               int dst_stride_argb,
+               int width,
+               int height);
+
+// Convert H444 to ABGR.
+LIBYUV_API
+int H444ToABGR(const uint8_t* src_y,
+               int src_stride_y,
+               const uint8_t* src_u,
+               int src_stride_u,
+               const uint8_t* src_v,
+               int src_stride_v,
+               uint8_t* dst_abgr,
+               int dst_stride_abgr,
+               int width,
+               int height);
+
+// Convert U444 to ARGB.
+LIBYUV_API
+int U444ToARGB(const uint8_t* src_y,
+               int src_stride_y,
+               const uint8_t* src_u,
+               int src_stride_u,
+               const uint8_t* src_v,
+               int src_stride_v,
+               uint8_t* dst_argb,
+               int dst_stride_argb,
+               int width,
+               int height);
+
+// Convert U444 to ABGR.
+LIBYUV_API
+int U444ToABGR(const uint8_t* src_y,
+               int src_stride_y,
+               const uint8_t* src_u,
+               int src_stride_u,
+               const uint8_t* src_v,
+               int src_stride_v,
+               uint8_t* dst_abgr,
+               int dst_stride_abgr,
+               int width,
+               int height);
+
 // Convert I010 to ARGB.
 LIBYUV_API
 int I010ToARGB(const uint16_t* src_y,
@@ -232,72 +439,6 @@ int U010ToABGR(const uint16_t* src_y,
                int dst_stride_abgr,
                int width,
                int height);
-
-// Convert I422 to ARGB.
-LIBYUV_API
-int I422ToARGB(const uint8_t* src_y,
-               int src_stride_y,
-               const uint8_t* src_u,
-               int src_stride_u,
-               const uint8_t* src_v,
-               int src_stride_v,
-               uint8_t* dst_argb,
-               int dst_stride_argb,
-               int width,
-               int height);
-
-// Convert I444 to ARGB.
-LIBYUV_API
-int I444ToARGB(const uint8_t* src_y,
-               int src_stride_y,
-               const uint8_t* src_u,
-               int src_stride_u,
-               const uint8_t* src_v,
-               int src_stride_v,
-               uint8_t* dst_argb,
-               int dst_stride_argb,
-               int width,
-               int height);
-
-// Convert U444 to ARGB.
-LIBYUV_API
-int U444ToARGB(const uint8_t* src_y,
-               int src_stride_y,
-               const uint8_t* src_u,
-               int src_stride_u,
-               const uint8_t* src_v,
-               int src_stride_v,
-               uint8_t* dst_argb,
-               int dst_stride_argb,
-               int width,
-               int height);
-
-// Convert J444 to ARGB.
-LIBYUV_API
-int J444ToARGB(const uint8_t* src_y,
-               int src_stride_y,
-               const uint8_t* src_u,
-               int src_stride_u,
-               const uint8_t* src_v,
-               int src_stride_v,
-               uint8_t* dst_argb,
-               int dst_stride_argb,
-               int width,
-               int height);
-
-// Convert I444 to ABGR.
-LIBYUV_API
-int I444ToABGR(const uint8_t* src_y,
-               int src_stride_y,
-               const uint8_t* src_u,
-               int src_stride_u,
-               const uint8_t* src_v,
-               int src_stride_v,
-               uint8_t* dst_abgr,
-               int dst_stride_abgr,
-               int width,
-               int height);
-
 // Convert I420 with Alpha to preattenuated ARGB.
 LIBYUV_API
 int I420AlphaToARGB(const uint8_t* src_y,
@@ -477,71 +618,6 @@ int UYVYToARGB(const uint8_t* src_uyvy,
                int width,
                int height);
 
-// Convert J422 to ARGB.
-LIBYUV_API
-int J422ToARGB(const uint8_t* src_y,
-               int src_stride_y,
-               const uint8_t* src_u,
-               int src_stride_u,
-               const uint8_t* src_v,
-               int src_stride_v,
-               uint8_t* dst_argb,
-               int dst_stride_argb,
-               int width,
-               int height);
-
-// Convert J422 to ABGR.
-LIBYUV_API
-int J422ToABGR(const uint8_t* src_y,
-               int src_stride_y,
-               const uint8_t* src_u,
-               int src_stride_u,
-               const uint8_t* src_v,
-               int src_stride_v,
-               uint8_t* dst_abgr,
-               int dst_stride_abgr,
-               int width,
-               int height);
-
-// Convert H422 to ARGB.
-LIBYUV_API
-int H422ToARGB(const uint8_t* src_y,
-               int src_stride_y,
-               const uint8_t* src_u,
-               int src_stride_u,
-               const uint8_t* src_v,
-               int src_stride_v,
-               uint8_t* dst_argb,
-               int dst_stride_argb,
-               int width,
-               int height);
-
-// Convert U422 to ARGB.
-LIBYUV_API
-int U422ToARGB(const uint8_t* src_y,
-               int src_stride_y,
-               const uint8_t* src_u,
-               int src_stride_u,
-               const uint8_t* src_v,
-               int src_stride_v,
-               uint8_t* dst_argb,
-               int dst_stride_argb,
-               int width,
-               int height);
-
-// Convert H422 to ABGR.
-LIBYUV_API
-int H422ToABGR(const uint8_t* src_y,
-               int src_stride_y,
-               const uint8_t* src_u,
-               int src_stride_u,
-               const uint8_t* src_v,
-               int src_stride_v,
-               uint8_t* dst_abgr,
-               int dst_stride_abgr,
-               int width,
-               int height);
-
 // Convert H010 to ARGB.
 LIBYUV_API
 int H010ToARGB(const uint16_t* src_y,
diff --git a/include/libyuv/version.h b/include/libyuv/version.h
index 5e26e34a6..0ecd48b8b 100644
--- a/include/libyuv/version.h
+++ b/include/libyuv/version.h
@@ -11,6 +11,6 @@
 #ifndef INCLUDE_LIBYUV_VERSION_H_
 #define INCLUDE_LIBYUV_VERSION_H_
 
-#define LIBYUV_VERSION 1739
+#define LIBYUV_VERSION 1740
 
 #endif  // INCLUDE_LIBYUV_VERSION_H_
diff --git a/include/libyuv/video_common.h b/include/libyuv/video_common.h
index 8a8fc96e4..74c581168 100644
--- a/include/libyuv/video_common.h
+++ b/include/libyuv/video_common.h
@@ -82,15 +82,18 @@ enum FourCC {
   // 1 Primary Compressed YUV format.
   FOURCC_MJPG = FOURCC('M', 'J', 'P', 'G'),
 
-  // 11 Auxiliary YUV variations: 3 with U and V planes are swapped, 1 Alias.
+  // 14 Auxiliary YUV variations: 3 with U and V planes are swapped, 1 Alias.
   FOURCC_YV12 = FOURCC('Y', 'V', '1', '2'),
   FOURCC_YV16 = FOURCC('Y', 'V', '1', '6'),
   FOURCC_YV24 = FOURCC('Y', 'V', '2', '4'),
   FOURCC_YU12 = FOURCC('Y', 'U', '1', '2'),  // Linux version of I420.
-  FOURCC_J420 = FOURCC('J', '4', '2', '0'),
-  FOURCC_J400 = FOURCC('J', '4', '0', '0'),  // unofficial fourcc
-  FOURCC_H420 = FOURCC('H', '4', '2', '0'),  // unofficial fourcc
-  FOURCC_H422 = FOURCC('H', '4', '2', '2'),  // unofficial fourcc
+  FOURCC_J420 = FOURCC('J', '4', '2', '0'),  // jpeg (bt.601 full), unofficial fourcc
+  FOURCC_J422 = FOURCC('J', '4', '2', '2'),  // jpeg (bt.601 full), unofficial fourcc
+  FOURCC_J444 = FOURCC('J', '4', '4', '4'),  // jpeg (bt.601 full), unofficial fourcc
+  FOURCC_J400 = FOURCC('J', '4', '0', '0'),  // jpeg (bt.601 full), unofficial fourcc
+  FOURCC_H420 = FOURCC('H', '4', '2', '0'),  // bt.709, unofficial fourcc
+  FOURCC_H422 = FOURCC('H', '4', '2', '2'),  // bt.709, unofficial fourcc
+  FOURCC_H444 = FOURCC('H', '4', '4', '4'),  // bt.709, unofficial fourcc
   FOURCC_U420 = FOURCC('U', '4', '2', '0'),  // bt.2020, unofficial fourcc
   FOURCC_U422 = FOURCC('U', '4', '2', '2'),  // bt.2020, unofficial fourcc
   FOURCC_U444 = FOURCC('U', '4', '4', '4'),  // bt.2020, unofficial fourcc
diff --git a/source/convert_argb.cc b/source/convert_argb.cc
index 32c5ae0ea..cfcde0f73 100644
--- a/source/convert_argb.cc
+++ b/source/convert_argb.cc
@@ -443,6 +443,25 @@ int H422ToARGB(const uint8_t* src_y,
                           &kYuvH709Constants, width, height);
 }
 
+// Convert H422 to ABGR.
+LIBYUV_API
+int H422ToABGR(const uint8_t* src_y,
+               int src_stride_y,
+               const uint8_t* src_u,
+               int src_stride_u,
+               const uint8_t* src_v,
+               int src_stride_v,
+               uint8_t* dst_abgr,
+               int dst_stride_abgr,
+               int width,
+               int height) {
+  return I422ToARGBMatrix(src_y, src_stride_y, src_v,
+                          src_stride_v,  // Swap U and V
+                          src_u, src_stride_u, dst_abgr, dst_stride_abgr,
+                          &kYvuH709Constants,  // Use Yvu matrix
+                          width, height);
+}
+
 // Convert U422 to ARGB.
 LIBYUV_API
 int U422ToARGB(const uint8_t* src_y,
@@ -460,9 +479,9 @@ int U422ToARGB(const uint8_t* src_y,
                           &kYuv2020Constants, width, height);
 }
 
-// Convert H422 to ABGR.
+// Convert U422 to ABGR.
 LIBYUV_API
-int H422ToABGR(const uint8_t* src_y,
+int U422ToABGR(const uint8_t* src_y,
                int src_stride_y,
                const uint8_t* src_u,
                int src_stride_u,
@@ -473,12 +492,240 @@ int H422ToABGR(const uint8_t* src_y,
                int width,
                int height) {
   return I422ToARGBMatrix(src_y, src_stride_y, src_v,
+                          src_stride_v,  // Swap U and V
+                          src_u, src_stride_u, dst_abgr, dst_stride_abgr,
+                          &kYvu2020Constants,  // Use Yvu matrix
+                          width, height);
+}
+
+// Convert I444 to ARGB with matrix
+static int I444ToARGBMatrix(const uint8_t* src_y,
+                            int src_stride_y,
+                            const uint8_t* src_u,
+                            int src_stride_u,
+                            const uint8_t* src_v,
+                            int src_stride_v,
+                            uint8_t* dst_argb,
+                            int dst_stride_argb,
+                            const struct YuvConstants* yuvconstants,
+                            int width,
+                            int height) {
+  int y;
+  void (*I444ToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf,
+                        const uint8_t* v_buf, uint8_t* rgb_buf,
+                        const struct YuvConstants* yuvconstants, int width) =
+      I444ToARGBRow_C;
+  if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
+    dst_stride_argb = -dst_stride_argb;
+  }
+  // Coalesce rows.
+  if (src_stride_y == width && src_stride_u == width && src_stride_v == width &&
+      dst_stride_argb == width * 4) {
+    width *= height;
+    height = 1;
+    src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0;
+  }
+#if defined(HAS_I444TOARGBROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3)) {
+    I444ToARGBRow = I444ToARGBRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 8)) {
+      I444ToARGBRow = I444ToARGBRow_SSSE3;
+    }
+  }
+#endif
+#if defined(HAS_I444TOARGBROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    I444ToARGBRow = I444ToARGBRow_Any_AVX2;
+    if (IS_ALIGNED(width, 16)) {
+      I444ToARGBRow = I444ToARGBRow_AVX2;
+    }
+  }
+#endif
+#if defined(HAS_I444TOARGBROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON)) {
+    I444ToARGBRow = I444ToARGBRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      I444ToARGBRow = I444ToARGBRow_NEON;
+    }
+  }
+#endif
+#if defined(HAS_I444TOARGBROW_MSA)
+  if (TestCpuFlag(kCpuHasMSA)) {
+    I444ToARGBRow = I444ToARGBRow_Any_MSA;
+    if (IS_ALIGNED(width, 8)) {
+      I444ToARGBRow = I444ToARGBRow_MSA;
+    }
+  }
+#endif
+#if defined(HAS_I444TOARGBROW_MMI)
+  if (TestCpuFlag(kCpuHasMMI)) {
+    I444ToARGBRow = I444ToARGBRow_Any_MMI;
+    if (IS_ALIGNED(width, 4)) {
+      I444ToARGBRow = I444ToARGBRow_MMI;
+    }
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    I444ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width);
+    dst_argb += dst_stride_argb;
+    src_y += src_stride_y;
+    src_u += src_stride_u;
+    src_v += src_stride_v;
+  }
+  return 0;
+}
+
+// Convert I444 to ARGB.
+LIBYUV_API
+int I444ToARGB(const uint8_t* src_y,
+               int src_stride_y,
+               const uint8_t* src_u,
+               int src_stride_u,
+               const uint8_t* src_v,
+               int src_stride_v,
+               uint8_t* dst_argb,
+               int dst_stride_argb,
+               int width,
+               int height) {
+  return I444ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
+                          src_stride_v, dst_argb, dst_stride_argb,
+                          &kYuvI601Constants, width, height);
+}
+
+// Convert I444 to ABGR.
+LIBYUV_API
+int I444ToABGR(const uint8_t* src_y,
+               int src_stride_y,
+               const uint8_t* src_u,
+               int src_stride_u,
+               const uint8_t* src_v,
+               int src_stride_v,
+               uint8_t* dst_abgr,
+               int dst_stride_abgr,
+               int width,
+               int height) {
+  return I444ToARGBMatrix(src_y, src_stride_y, src_v,
+                          src_stride_v,  // Swap U and V
+                          src_u, src_stride_u, dst_abgr, dst_stride_abgr,
+                          &kYvuI601Constants,  // Use Yvu matrix
+                          width, height);
+}
+
+// Convert J444 to ARGB.
+LIBYUV_API
+int J444ToARGB(const uint8_t* src_y,
+               int src_stride_y,
+               const uint8_t* src_u,
+               int src_stride_u,
+               const uint8_t* src_v,
+               int src_stride_v,
+               uint8_t* dst_argb,
+               int dst_stride_argb,
+               int width,
+               int height) {
+  return I444ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
+                          src_stride_v, dst_argb, dst_stride_argb,
+                          &kYuvJPEGConstants, width, height);
+}
+
+// Convert J444 to ABGR.
+LIBYUV_API
+int J444ToABGR(const uint8_t* src_y,
+               int src_stride_y,
+               const uint8_t* src_u,
+               int src_stride_u,
+               const uint8_t* src_v,
+               int src_stride_v,
+               uint8_t* dst_abgr,
+               int dst_stride_abgr,
+               int width,
+               int height) {
+  return I444ToARGBMatrix(src_y, src_stride_y, src_v,
+                          src_stride_v,  // Swap U and V
+                          src_u, src_stride_u, dst_abgr, dst_stride_abgr,
+                          &kYvuJPEGConstants,  // Use Yvu matrix
+                          width, height);
+}
+
+// Convert H444 to ARGB.
+LIBYUV_API
+int H444ToARGB(const uint8_t* src_y,
+               int src_stride_y,
+               const uint8_t* src_u,
+               int src_stride_u,
+               const uint8_t* src_v,
+               int src_stride_v,
+               uint8_t* dst_argb,
+               int dst_stride_argb,
+               int width,
+               int height) {
+  return I444ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
+                          src_stride_v, dst_argb, dst_stride_argb,
+                          &kYuvH709Constants, width, height);
+}
+
+// Convert H444 to ABGR.
+LIBYUV_API
+int H444ToABGR(const uint8_t* src_y,
+               int src_stride_y,
+               const uint8_t* src_u,
+               int src_stride_u,
+               const uint8_t* src_v,
+               int src_stride_v,
+               uint8_t* dst_abgr,
+               int dst_stride_abgr,
+               int width,
+               int height) {
+  return I444ToARGBMatrix(src_y, src_stride_y, src_v,
                           src_stride_v,  // Swap U and V
                           src_u, src_stride_u, dst_abgr, dst_stride_abgr,
                           &kYvuH709Constants,  // Use Yvu matrix
                           width, height);
 }
 
+// Convert U444 to ARGB.
+LIBYUV_API
+int U444ToARGB(const uint8_t* src_y,
+               int src_stride_y,
+               const uint8_t* src_u,
+               int src_stride_u,
+               const uint8_t* src_v,
+               int src_stride_v,
+               uint8_t* dst_argb,
+               int dst_stride_argb,
+               int width,
+               int height) {
+  return I444ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
+                          src_stride_v, dst_argb, dst_stride_argb,
+                          &kYuv2020Constants, width, height);
+}
+
+// Convert U444 to ABGR.
+LIBYUV_API
+int U444ToABGR(const uint8_t* src_y,
+               int src_stride_y,
+               const uint8_t* src_u,
+               int src_stride_u,
+               const uint8_t* src_v,
+               int src_stride_v,
+               uint8_t* dst_abgr,
+               int dst_stride_abgr,
+               int width,
+               int height) {
+  return I444ToARGBMatrix(src_y, src_stride_y, src_v,
+                          src_stride_v,  // Swap U and V
+                          src_u, src_stride_u, dst_abgr, dst_stride_abgr,
+                          &kYvu2020Constants,  // Use Yvu matrix
+                          width, height);
+}
+
 // Convert 10 bit YUV to ARGB with matrix
 // TODO(fbarchard): Consider passing scale multiplier to I210ToARGB to
 // multiply 10 bit yuv into high bits to allow any number of bits.
@@ -807,160 +1054,6 @@ int U010ToABGR(const uint16_t* src_y,
                           width, height);
 }
 
-// Convert I444 to ARGB with matrix
-static int I444ToARGBMatrix(const uint8_t* src_y,
-                            int src_stride_y,
-                            const uint8_t* src_u,
-                            int src_stride_u,
-                            const uint8_t* src_v,
-                            int src_stride_v,
-                            uint8_t* dst_argb,
-                            int dst_stride_argb,
-                            const struct YuvConstants* yuvconstants,
-                            int width,
-                            int height) {
-  int y;
-  void (*I444ToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf,
-                        const uint8_t* v_buf, uint8_t* rgb_buf,
-                        const struct YuvConstants* yuvconstants, int width) =
-      I444ToARGBRow_C;
-  if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
-    dst_stride_argb = -dst_stride_argb;
-  }
-  // Coalesce rows.
-  if (src_stride_y == width && src_stride_u == width && src_stride_v == width &&
-      dst_stride_argb == width * 4) {
-    width *= height;
-    height = 1;
-    src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0;
-  }
-#if defined(HAS_I444TOARGBROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
-    I444ToARGBRow = I444ToARGBRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 8)) {
-      I444ToARGBRow = I444ToARGBRow_SSSE3;
-    }
-  }
-#endif
-#if defined(HAS_I444TOARGBROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2)) {
-    I444ToARGBRow = I444ToARGBRow_Any_AVX2;
-    if (IS_ALIGNED(width, 16)) {
-      I444ToARGBRow = I444ToARGBRow_AVX2;
-    }
-  }
-#endif
-#if defined(HAS_I444TOARGBROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
-    I444ToARGBRow = I444ToARGBRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      I444ToARGBRow = I444ToARGBRow_NEON;
-    }
-  }
-#endif
-#if defined(HAS_I444TOARGBROW_MSA)
-  if (TestCpuFlag(kCpuHasMSA)) {
-    I444ToARGBRow = I444ToARGBRow_Any_MSA;
-    if (IS_ALIGNED(width, 8)) {
-      I444ToARGBRow = I444ToARGBRow_MSA;
-    }
-  }
-#endif
-#if defined(HAS_I444TOARGBROW_MMI)
-  if (TestCpuFlag(kCpuHasMMI)) {
-    I444ToARGBRow = I444ToARGBRow_Any_MMI;
-    if (IS_ALIGNED(width, 4)) {
-      I444ToARGBRow = I444ToARGBRow_MMI;
-    }
-  }
-#endif
-
-  for (y = 0; y < height; ++y) {
-    I444ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width);
-    dst_argb += dst_stride_argb;
-    src_y += src_stride_y;
-    src_u += src_stride_u;
-    src_v += src_stride_v;
-  }
-  return 0;
-}
-
-// Convert I444 to ARGB.
-LIBYUV_API
-int I444ToARGB(const uint8_t* src_y,
-               int src_stride_y,
-               const uint8_t* src_u,
-               int src_stride_u,
-               const uint8_t* src_v,
-               int src_stride_v,
-               uint8_t* dst_argb,
-               int dst_stride_argb,
-               int width,
-               int height) {
-  return I444ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
-                          src_stride_v, dst_argb, dst_stride_argb,
-                          &kYuvI601Constants, width, height);
-}
-
-// Convert U444 to ARGB.
-LIBYUV_API
-int U444ToARGB(const uint8_t* src_y,
-               int src_stride_y,
-               const uint8_t* src_u,
-               int src_stride_u,
-               const uint8_t* src_v,
-               int src_stride_v,
-               uint8_t* dst_argb,
-               int dst_stride_argb,
-               int width,
-               int height) {
-  return I444ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
-                          src_stride_v, dst_argb, dst_stride_argb,
-                          &kYuv2020Constants, width, height);
-}
-
-// Convert I444 to ABGR.
-LIBYUV_API
-int I444ToABGR(const uint8_t* src_y,
-               int src_stride_y,
-               const uint8_t* src_u,
-               int src_stride_u,
-               const uint8_t* src_v,
-               int src_stride_v,
-               uint8_t* dst_abgr,
-               int dst_stride_abgr,
-               int width,
-               int height) {
-  return I444ToARGBMatrix(src_y, src_stride_y, src_v,
-                          src_stride_v,  // Swap U and V
-                          src_u, src_stride_u, dst_abgr, dst_stride_abgr,
-                          &kYvuI601Constants,  // Use Yvu matrix
-                          width, height);
-}
-
-// Convert J444 to ARGB.
-LIBYUV_API
-int J444ToARGB(const uint8_t* src_y,
-               int src_stride_y,
-               const uint8_t* src_u,
-               int src_stride_u,
-               const uint8_t* src_v,
-               int src_stride_v,
-               uint8_t* dst_argb,
-               int dst_stride_argb,
-               int width,
-               int height) {
-  return I444ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
-                          src_stride_v, dst_argb, dst_stride_argb,
-                          &kYuvJPEGConstants, width, height);
-}
-
 // Convert I420 with Alpha to preattenuated ARGB.
 static int I420AlphaToARGBMatrix(const uint8_t* src_y,
                                  int src_stride_y,
diff --git a/source/convert_to_argb.cc b/source/convert_to_argb.cc
index bf0e05ed9..c08f61013 100644
--- a/source/convert_to_argb.cc
+++ b/source/convert_to_argb.cc
@@ -32,9 +32,6 @@ extern "C" {
 // TODO(fbarchard): Add the following:
 // H010ToARGB
 // I010ToARGB
-// J400ToARGB
-// J422ToARGB
-// J444ToARGB
 
 LIBYUV_API
 int ConvertToARGB(const uint8_t* sample,
@@ -161,6 +158,11 @@ int ConvertToARGB(const uint8_t* sample,
       r = I400ToARGB(src, src_width, dst_argb, dst_stride_argb, crop_width,
                      inv_crop_height);
       break;
+    case FOURCC_J400:
+      src = sample + src_width * crop_y + crop_x;
+      r = J400ToARGB(src, src_width, dst_argb, dst_stride_argb, crop_width,
+                     inv_crop_height);
+      break;
 
     // Biplanar formats
     case FOURCC_NV12:
@@ -269,6 +271,18 @@ int ConvertToARGB(const uint8_t* sample,
       break;
     }
 
+    case FOURCC_J422: {
+      int halfwidth = (src_width + 1) / 2;
+      const uint8_t* src_y = sample + src_width * crop_y + crop_x;
+      const uint8_t* src_u =
+          sample + src_width * abs_src_height + halfwidth * crop_y + crop_x / 2;
+      const uint8_t* src_v = sample + src_width * abs_src_height +
+                             halfwidth * (abs_src_height + crop_y) + crop_x / 2;
+      r = J422ToARGB(src_y, src_width, src_u, halfwidth, src_v, halfwidth,
+                     dst_argb, dst_stride_argb, crop_width, inv_crop_height);
+      break;
+    }
+
     case FOURCC_H422: {
       int halfwidth = (src_width + 1) / 2;
       const uint8_t* src_y = sample + src_width * crop_y + crop_x;
@@ -281,6 +295,18 @@ int ConvertToARGB(const uint8_t* sample,
       break;
     }
 
+    case FOURCC_U422: {
+      int halfwidth = (src_width + 1) / 2;
+      const uint8_t* src_y = sample + src_width * crop_y + crop_x;
+      const uint8_t* src_u =
+          sample + src_width * abs_src_height + halfwidth * crop_y + crop_x / 2;
+      const uint8_t* src_v = sample + src_width * abs_src_height +
+                             halfwidth * (abs_src_height + crop_y) + crop_x / 2;
+      r = H422ToARGB(src_y, src_width, src_u, halfwidth, src_v, halfwidth,
+                     dst_argb, dst_stride_argb, crop_width, inv_crop_height);
+      break;
+    }
+
     case FOURCC_I444:
     case FOURCC_YV24: {
       const uint8_t* src_y = sample + src_width * crop_y + crop_x;
@@ -297,6 +323,40 @@ int ConvertToARGB(const uint8_t* sample,
                      dst_argb, dst_stride_argb, crop_width, inv_crop_height);
       break;
     }
+
+    case FOURCC_J444: {
+      const uint8_t* src_y = sample + src_width * crop_y + crop_x;
+      const uint8_t* src_u;
+      const uint8_t* src_v;
+      src_u = sample + src_width * (abs_src_height + crop_y) + crop_x;
+      src_v = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
+      r = J444ToARGB(src_y, src_width, src_u, src_width, src_v, src_width,
+                     dst_argb, dst_stride_argb, crop_width, inv_crop_height);
+      break;
+    }
+
+    case FOURCC_H444: {
+      const uint8_t* src_y = sample + src_width * crop_y + crop_x;
+      const uint8_t* src_u;
+      const uint8_t* src_v;
+      src_u = sample + src_width * (abs_src_height + crop_y) + crop_x;
+      src_v = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
+      r = H444ToARGB(src_y, src_width, src_u, src_width, src_v, src_width,
+                     dst_argb, dst_stride_argb, crop_width, inv_crop_height);
+      break;
+    }
+
+    case FOURCC_U444: {
+      const uint8_t* src_y = sample + src_width * crop_y + crop_x;
+      const uint8_t* src_u;
+      const uint8_t* src_v;
+      src_u = sample + src_width * (abs_src_height + crop_y) + crop_x;
+      src_v = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
+      r = U444ToARGB(src_y, src_width, src_u, src_width, src_v, src_width,
+                     dst_argb, dst_stride_argb, crop_width, inv_crop_height);
+      break;
+    }
+
 #ifdef HAVE_JPEG
     case FOURCC_MJPG:
       r = MJPGToARGB(sample, sample_size, dst_argb, dst_stride_argb, src_width,
diff --git a/source/row_gcc.cc b/source/row_gcc.cc
index fa7b8cb31..3088bb755 100644
--- a/source/row_gcc.cc
+++ b/source/row_gcc.cc
@@ -84,7 +84,7 @@ static const uvec8 kAddUV128 = {128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u,
                                 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u};
 
 static const uvec16 kSub128 = {0x8080u, 0x8080u, 0x8080u, 0x8080u,
-                                  0x8080u, 0x8080u, 0x8080u, 0x8080u};
+                               0x8080u, 0x8080u, 0x8080u, 0x8080u};
 
 #endif  // defined(HAS_ARGBTOYROW_SSSE3) || defined(HAS_I422TOARGBROW_SSSE3)
 
@@ -1101,8 +1101,10 @@ void ABGRToAR30Row_AVX2(const uint8_t* src, uint8_t* dst, int width) {
   "lea       0x40(%0),%0                     \n" \
   "phaddw    %%xmm0,%%xmm6                   \n" \
   "phaddw    %%xmm2,%%xmm1                   \n" \
-  "paddw     %%" #round ",%%xmm6             \n" \
-  "paddw     %%" #round ",%%xmm1             \n" \
+  "paddw     %%" #round                          \
+  ",%%xmm6             \n"                       \
+  "paddw     %%" #round                          \
+  ",%%xmm1             \n"                       \
   "psrlw     $0x8,%%xmm6                     \n" \
   "psrlw     $0x8,%%xmm1                     \n" \
   "packuswb  %%xmm1,%%xmm6                   \n" \
@@ -1111,33 +1113,35 @@ void ABGRToAR30Row_AVX2(const uint8_t* src, uint8_t* dst, int width) {
   "sub       $0x10,%2                        \n" \
   "jg        1b                              \n"
 
-#define RGBTOY_AVX2(round)                                                  \
-  "1:                                        \n"                            \
-  "vmovdqu    (%0),%%ymm0                    \n"                            \
-  "vmovdqu    0x20(%0),%%ymm1                \n"                            \
-  "vmovdqu    0x40(%0),%%ymm2                \n"                            \
-  "vmovdqu    0x60(%0),%%ymm3                \n"                            \
-  "vpsubb     %%ymm5, %%ymm0, %%ymm0         \n"                            \
-  "vpsubb     %%ymm5, %%ymm1, %%ymm1         \n"                            \
-  "vpsubb     %%ymm5, %%ymm2, %%ymm2         \n"                            \
-  "vpsubb     %%ymm5, %%ymm3, %%ymm3         \n"                            \
-  "vpmaddubsw %%ymm0,%%ymm4,%%ymm0           \n"                            \
-  "vpmaddubsw %%ymm1,%%ymm4,%%ymm1           \n"                            \
-  "vpmaddubsw %%ymm2,%%ymm4,%%ymm2           \n"                            \
-  "vpmaddubsw %%ymm3,%%ymm4,%%ymm3           \n"                            \
-  "lea       0x80(%0),%0                     \n"                            \
-  "vphaddw    %%ymm1,%%ymm0,%%ymm0           \n" /* mutates. */             \
-  "vphaddw    %%ymm3,%%ymm2,%%ymm2           \n"                            \
-  "vpaddw     %%" #round ",%%ymm0,%%ymm0     \n" /* Add .5 for rounding. */ \
-  "vpaddw     %%" #round ",%%ymm2,%%ymm2     \n"                            \
-  "vpsrlw     $0x8,%%ymm0,%%ymm0             \n"                            \
-  "vpsrlw     $0x8,%%ymm2,%%ymm2             \n"                            \
-  "vpackuswb  %%ymm2,%%ymm0,%%ymm0           \n" /* mutates. */             \
-  "vpermd     %%ymm0,%%ymm6,%%ymm0           \n" /* unmutate. */            \
-  "vmovdqu    %%ymm0,(%1)                    \n"                            \
-  "lea       0x20(%1),%1                     \n"                            \
-  "sub       $0x20,%2                        \n"                            \
-  "jg        1b                              \n"                            \
+#define RGBTOY_AVX2(round)                                       \
+  "1:                                        \n"                 \
+  "vmovdqu    (%0),%%ymm0                    \n"                 \
+  "vmovdqu    0x20(%0),%%ymm1                \n"                 \
+  "vmovdqu    0x40(%0),%%ymm2                \n"                 \
+  "vmovdqu    0x60(%0),%%ymm3                \n"                 \
+  "vpsubb     %%ymm5, %%ymm0, %%ymm0         \n"                 \
+  "vpsubb     %%ymm5, %%ymm1, %%ymm1         \n"                 \
+  "vpsubb     %%ymm5, %%ymm2, %%ymm2         \n"                 \
+  "vpsubb     %%ymm5, %%ymm3, %%ymm3         \n"                 \
+  "vpmaddubsw %%ymm0,%%ymm4,%%ymm0           \n"                 \
+  "vpmaddubsw %%ymm1,%%ymm4,%%ymm1           \n"                 \
+  "vpmaddubsw %%ymm2,%%ymm4,%%ymm2           \n"                 \
+  "vpmaddubsw %%ymm3,%%ymm4,%%ymm3           \n"                 \
+  "lea       0x80(%0),%0                     \n"                 \
+  "vphaddw    %%ymm1,%%ymm0,%%ymm0           \n" /* mutates. */  \
+  "vphaddw    %%ymm3,%%ymm2,%%ymm2           \n"                 \
+  "vpaddw     %%" #round                                         \
+  ",%%ymm0,%%ymm0     \n" /* Add .5 for rounding. */             \
+  "vpaddw     %%" #round                                         \
+  ",%%ymm2,%%ymm2     \n"                                        \
+  "vpsrlw     $0x8,%%ymm0,%%ymm0             \n"                 \
+  "vpsrlw     $0x8,%%ymm2,%%ymm2             \n"                 \
+  "vpackuswb  %%ymm2,%%ymm0,%%ymm0           \n" /* mutates. */  \
+  "vpermd     %%ymm0,%%ymm6,%%ymm0           \n" /* unmutate. */ \
+  "vmovdqu    %%ymm0,(%1)                    \n"                 \
+  "lea       0x20(%1),%1                     \n"                 \
+  "sub       $0x20,%2                        \n"                 \
+  "jg        1b                              \n"                 \
   "vzeroupper                                \n"
 
 #ifdef HAS_ARGBTOYROW_SSSE3
@@ -1148,15 +1152,15 @@ void ARGBToYRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width) {
       "movdqa    %4,%%xmm5                       \n"
       "movdqa    %5,%%xmm7                       \n"
 
-      LABELALIGN
-      RGBTOY(xmm7)
+      LABELALIGN RGBTOY(xmm7)
       : "+r"(src_argb),  // %0
         "+r"(dst_y),     // %1
         "+r"(width)      // %2
       : "m"(kARGBToY),   // %3
         "m"(kSub128),    // %4
         "m"(kAddY16)     // %5
-      : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7");
+      : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+        "xmm7");
 }
 #endif  // HAS_ARGBTOYROW_SSSE3
 
@@ -1168,8 +1172,7 @@ void ARGBToYJRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width) {
       "movdqa    %3,%%xmm4                       \n"
       "movdqa    %4,%%xmm5                       \n"
 
-      LABELALIGN
-      RGBTOY(xmm5)
+      LABELALIGN RGBTOY(xmm5)
       : "+r"(src_argb),  // %0
         "+r"(dst_y),     // %1
         "+r"(width)      // %2
@@ -1187,8 +1190,7 @@ void RGBAToYJRow_SSSE3(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
       "movdqa    %3,%%xmm4                       \n"
       "movdqa    %4,%%xmm5                       \n"
 
-      LABELALIGN
-      RGBTOY(xmm5)
+      LABELALIGN RGBTOY(xmm5)
       : "+r"(src_rgba),  // %0
         "+r"(dst_y),     // %1
         "+r"(width)      // %2
@@ -1210,8 +1212,7 @@ void ARGBToYRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width) {
       "vbroadcastf128 %5,%%ymm7                  \n"
       "vmovdqu    %6,%%ymm6                      \n"
 
-      LABELALIGN
-      RGBTOY_AVX2(ymm7)
+      LABELALIGN RGBTOY_AVX2(ymm7)
       : "+r"(src_argb),         // %0
         "+r"(dst_y),            // %1
         "+r"(width)             // %2
@@ -1219,7 +1220,8 @@ void ARGBToYRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width) {
         "m"(kSub128),           // %4
         "m"(kAddY16),           // %5
         "m"(kPermdARGBToY_AVX)  // %6
-      : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7");
+      : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+        "xmm7");
 }
 #endif  // HAS_ARGBTOYROW_AVX2
 
@@ -1232,8 +1234,7 @@ void ABGRToYRow_AVX2(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
       "vbroadcastf128 %5,%%ymm7                  \n"
       "vmovdqu    %6,%%ymm6                      \n"
 
-      LABELALIGN
-      RGBTOY_AVX2(ymm7)
+      LABELALIGN RGBTOY_AVX2(ymm7)
       : "+r"(src_abgr),         // %0
         "+r"(dst_y),            // %1
         "+r"(width)             // %2
@@ -1241,7 +1242,8 @@ void ABGRToYRow_AVX2(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
         "m"(kSub128),           // %4
         "m"(kAddY16),           // %5
         "m"(kPermdARGBToY_AVX)  // %6
-      : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7");
+      : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+        "xmm7");
 }
 #endif  // HAS_ABGRTOYROW_AVX2
 
@@ -1253,15 +1255,15 @@ void ARGBToYJRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width) {
       "vbroadcastf128 %4,%%ymm5                  \n"
       "vmovdqu    %5,%%ymm6                      \n"
 
-      LABELALIGN
-      RGBTOY_AVX2(ymm5)
+      LABELALIGN RGBTOY_AVX2(ymm5)
       : "+r"(src_argb),         // %0
         "+r"(dst_y),            // %1
         "+r"(width)             // %2
       : "m"(kARGBToYJ),         // %3
         "m"(kSub128),           // %4
         "m"(kPermdARGBToY_AVX)  // %5
-      : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7");
+      : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+        "xmm7");
 }
 #endif  // HAS_ARGBTOYJROW_AVX2
 
@@ -1273,9 +1275,8 @@ void RGBAToYJRow_AVX2(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
       "vbroadcastf128 %4,%%ymm5                  \n"
       "vmovdqu    %5,%%ymm6                      \n"
 
-      LABELALIGN
-      RGBTOY_AVX2(ymm5)
-      "vzeroupper                                \n"
+      LABELALIGN RGBTOY_AVX2(
+          ymm5) "vzeroupper                                \n"
       : "+r"(src_rgba),         // %0
         "+r"(dst_y),            // %1
         "+r"(width)             // %2
@@ -1536,7 +1537,7 @@ void ARGBToUVJRow_AVX2(const uint8_t* src_argb0,
         "+r"(dst_v),                       // %2
         "+rm"(width)                       // %3
       : "r"((intptr_t)(src_stride_argb)),  // %4
-        "m"(kSub128),                   // %5
+        "m"(kSub128),                      // %5
         "m"(kARGBToVJ),                    // %6
         "m"(kARGBToUJ),                    // %7
         "m"(kShufARGBToUV_AVX)             // %8
@@ -1606,7 +1607,7 @@ void ARGBToUVJRow_SSSE3(const uint8_t* src_argb0,
       : "r"((intptr_t)(src_stride_argb)),  // %4
         "m"(kARGBToVJ),                    // %5
         "m"(kARGBToUJ),                    // %6
-        "m"(kSub128)                    // %7
+        "m"(kSub128)                       // %7
       : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm6", "xmm7");
 }
 #endif  // HAS_ARGBTOUVJROW_SSSE3
@@ -1675,15 +1676,15 @@ void BGRAToYRow_SSSE3(const uint8_t* src_bgra, uint8_t* dst_y, int width) {
       "movdqa    %4,%%xmm5                       \n"
       "movdqa    %5,%%xmm7                       \n"
 
-      LABELALIGN
-      RGBTOY(xmm7)
+      LABELALIGN RGBTOY(xmm7)
       : "+r"(src_bgra),  // %0
         "+r"(dst_y),     // %1
         "+r"(width)      // %2
       : "m"(kBGRAToY),   // %3
         "m"(kSub128),    // %4
         "m"(kAddY16)     // %5
-      : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7");
+      : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+        "xmm7");
 }
 
 void BGRAToUVRow_SSSE3(const uint8_t* src_bgra0,
@@ -1755,15 +1756,15 @@ void ABGRToYRow_SSSE3(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
       "movdqa    %4,%%xmm5                       \n"
       "movdqa    %5,%%xmm7                       \n"
 
-      LABELALIGN
-      RGBTOY(xmm7)
+      LABELALIGN RGBTOY(xmm7)
       : "+r"(src_abgr),  // %0
         "+r"(dst_y),     // %1
         "+r"(width)      // %2
       : "m"(kABGRToY),   // %3
         "m"(kSub128),    // %4
         "m"(kAddY16)     // %5
-      : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7");
+      : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+        "xmm7");
 }
 
 void RGBAToYRow_SSSE3(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
@@ -1772,15 +1773,15 @@ void RGBAToYRow_SSSE3(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
       "movdqa    %4,%%xmm5                       \n"
       "movdqa    %5,%%xmm7                       \n"
 
-      LABELALIGN
-      RGBTOY(xmm7)
+      LABELALIGN RGBTOY(xmm7)
       : "+r"(src_rgba),  // %0
         "+r"(dst_y),     // %1
         "+r"(width)      // %2
       : "m"(kRGBAToY),   // %3
         "m"(kSub128),    // %4
         "m"(kAddY16)     // %5
-      : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7");
+      : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+        "xmm7");
 }
 
 void ABGRToUVRow_SSSE3(const uint8_t* src_abgr0,
diff --git a/source/row_mmi.cc b/source/row_mmi.cc
index d7d34e47f..50cfca726 100644
--- a/source/row_mmi.cc
+++ b/source/row_mmi.cc
@@ -6040,93 +6040,90 @@ void I444ToARGBRow_MMI(const uint8_t* src_y,
                        uint8_t* rgb_buf,
                        const struct YuvConstants* yuvconstants,
                        int width) {
-  uint64_t y,u,v;
-  uint64_t b_vec[2],g_vec[2],r_vec[2];
+  uint64_t y, u, v;
+  uint64_t b_vec[2], g_vec[2], r_vec[2];
   uint64_t mask = 0xff00ff00ff00ff00ULL;
-  uint64_t ub,ug,vg,vr,bb,bg,br,yg;
-  __asm__ volatile (
-    "ldc1       %[yg],           0xc0(%[yuvcons_ptr])             \n\t"//yg
-    "ldc1       %[bb],           0x60(%[yuvcons_ptr])             \n\t"//bb
-    "ldc1       %[ub],           0x00(%[yuvcons_ptr])             \n\t"//ub
-    "or         %[ub],           %[ub],             %[mask]       \n\t"//must sign extension
-    "ldc1       %[bg],           0x80(%[yuvcons_ptr])             \n\t"//bg
-    "ldc1       %[ug],           0x20(%[yuvcons_ptr])             \n\t"//ug
-    "punpcklbh  %[ug],           %[ug],             %[zero]       \n\t"
-    "pshufh     %[ug],           %[ug],             %[zero]       \n\t"
-    "ldc1       %[vg],           0x20(%[yuvcons_ptr])             \n\t"//vg
-    "punpcklbh  %[vg],           %[vg],             %[zero]       \n\t"
-    "pshufh     %[vg],           %[vg],             %[five]       \n\t"
-    "ldc1       %[br],           0xa0(%[yuvcons_ptr])             \n\t"//br
-    "ldc1       %[vr],           0x40(%[yuvcons_ptr])             \n\t"//vr
-    "punpcklbh  %[vr],           %[vr],             %[zero]       \n\t"
-    "pshufh     %[vr],           %[vr],             %[five]       \n\t"
-    "or         %[vr],           %[vr],             %[mask]       \n\t"//sign extension
+  uint64_t ub, ug, vg, vr, bb, bg, br, yg;
+  __asm__ volatile(
+      "ldc1       %[yg],           0xc0(%[yuvcons_ptr])             \n\t"  // yg
+      "ldc1       %[bb],           0x60(%[yuvcons_ptr])             \n\t"  // bb
+      "ldc1       %[ub],           0x00(%[yuvcons_ptr])             \n\t"  // ub
+      "or         %[ub],           %[ub],             %[mask]       \n\t"  // must
+                                                                           // sign
+                                                                           // extension
+      "ldc1       %[bg],           0x80(%[yuvcons_ptr])             \n\t"  // bg
+      "ldc1       %[ug],           0x20(%[yuvcons_ptr])             \n\t"  // ug
+      "punpcklbh  %[ug],           %[ug],             %[zero]       \n\t"
+      "pshufh     %[ug],           %[ug],             %[zero]       \n\t"
+      "ldc1       %[vg],           0x20(%[yuvcons_ptr])             \n\t"  // vg
+      "punpcklbh  %[vg],           %[vg],             %[zero]       \n\t"
+      "pshufh     %[vg],           %[vg],             %[five]       \n\t"
+      "ldc1       %[br],           0xa0(%[yuvcons_ptr])             \n\t"  // br
+      "ldc1       %[vr],           0x40(%[yuvcons_ptr])             \n\t"  // vr
+      "punpcklbh  %[vr],           %[vr],             %[zero]       \n\t"
+      "pshufh     %[vr],           %[vr],             %[five]       \n\t"
+      "or         %[vr],           %[vr],             %[mask]       \n\t"  // sign
+                                                                           // extension
 
-    "1:                                                           \n\t"
-    "gslwlc1    %[y],            0x03(%[y_ptr])                   \n\t"
-    "gslwrc1    %[y],            0x00(%[y_ptr])                   \n\t"
-    "gslwlc1    %[u],            0x03(%[u_ptr])                   \n\t"
-    "gslwrc1    %[u],            0x00(%[u_ptr])                   \n\t"
-    "gslwlc1    %[v],            0x03(%[v_ptr])                   \n\t"
-    "gslwrc1    %[v],            0x00(%[v_ptr])                   \n\t"
+      "1:                                                           \n\t"
+      "gslwlc1    %[y],            0x03(%[y_ptr])                   \n\t"
+      "gslwrc1    %[y],            0x00(%[y_ptr])                   \n\t"
+      "gslwlc1    %[u],            0x03(%[u_ptr])                   \n\t"
+      "gslwrc1    %[u],            0x00(%[u_ptr])                   \n\t"
+      "gslwlc1    %[v],            0x03(%[v_ptr])                   \n\t"
+      "gslwrc1    %[v],            0x00(%[v_ptr])                   \n\t"
 
-    "punpcklbh  %[y],            %[y],              %[y]          \n\t"//y*0x0101
-    "pmulhuh    %[y],            %[y],              %[yg]         \n\t"//y1
+      "punpcklbh  %[y],            %[y],              %[y]          \n\t"  // y*0x0101
+      "pmulhuh    %[y],            %[y],              %[yg]         \n\t"  // y1
 
-    "punpcklbh  %[u],            %[u],              %[zero]       \n\t"//u
-    "paddsh     %[b_vec0],       %[y],              %[bb]         \n\t"
-    "pmullh     %[b_vec1],       %[u],              %[ub]         \n\t"
-    "psubsh     %[b_vec0],       %[b_vec0],         %[b_vec1]     \n\t"
-    "psrah      %[b_vec0],       %[b_vec0],         %[six]        \n\t"
+      "punpcklbh  %[u],            %[u],              %[zero]       \n\t"  // u
+      "paddsh     %[b_vec0],       %[y],              %[bb]         \n\t"
+      "pmullh     %[b_vec1],       %[u],              %[ub]         \n\t"
+      "psubsh     %[b_vec0],       %[b_vec0],         %[b_vec1]     \n\t"
+      "psrah      %[b_vec0],       %[b_vec0],         %[six]        \n\t"
 
-    "punpcklbh  %[v],            %[v],              %[zero]       \n\t"//v
-    "paddsh     %[g_vec0],       %[y],              %[bg]         \n\t"
-    "pmullh     %[g_vec1],       %[u],              %[ug]         \n\t"//u*ug
-    "psubsh     %[g_vec0],       %[g_vec0],         %[g_vec1]     \n\t"
-    "pmullh     %[g_vec1],       %[v],              %[vg]         \n\t"//v*vg
-    "psubsh     %[g_vec0],       %[g_vec0],         %[g_vec1]     \n\t"
-    "psrah      %[g_vec0],       %[g_vec0],         %[six]        \n\t"
+      "punpcklbh  %[v],            %[v],              %[zero]       \n\t"  // v
+      "paddsh     %[g_vec0],       %[y],              %[bg]         \n\t"
+      "pmullh     %[g_vec1],       %[u],              %[ug]         \n\t"  // u*ug
+      "psubsh     %[g_vec0],       %[g_vec0],         %[g_vec1]     \n\t"
+      "pmullh     %[g_vec1],       %[v],              %[vg]         \n\t"  // v*vg
+      "psubsh     %[g_vec0],       %[g_vec0],         %[g_vec1]     \n\t"
+      "psrah      %[g_vec0],       %[g_vec0],         %[six]        \n\t"
 
-    "paddsh     %[r_vec0],       %[y],              %[br]         \n\t"
-    "pmullh     %[r_vec1],       %[v],              %[vr]         \n\t"//v*vr
-    "psubsh     %[r_vec0],       %[r_vec0],         %[r_vec1]     \n\t"
-    "psrah      %[r_vec0],       %[r_vec0],         %[six]        \n\t"
+      "paddsh     %[r_vec0],       %[y],              %[br]         \n\t"
+      "pmullh     %[r_vec1],       %[v],              %[vr]         \n\t"  // v*vr
+      "psubsh     %[r_vec0],       %[r_vec0],         %[r_vec1]     \n\t"
+      "psrah      %[r_vec0],       %[r_vec0],         %[six]        \n\t"
 
-    "packushb   %[r_vec0],       %[b_vec0],         %[r_vec0]     \n\t"//rrrrbbbb
-    "packushb   %[g_vec0],       %[g_vec0],         %[alpha]      \n\t"//ffffgggg
-    "punpcklwd  %[g_vec0],       %[g_vec0],         %[alpha]      \n\t"
-    "punpcklbh  %[b_vec0],       %[r_vec0],         %[g_vec0]     \n\t"//gbgbgbgb
-    "punpckhbh  %[r_vec0],       %[r_vec0],         %[g_vec0]     \n\t"//frfrfrfr
-    "punpcklhw  %[g_vec0],       %[b_vec0],         %[r_vec0]     \n\t"//frgbfrgb
-    "punpckhhw  %[g_vec1],       %[b_vec0],         %[r_vec0]     \n\t"//frgbfrgb
-    "gssdlc1    %[g_vec0],       0x07(%[rgbbuf_ptr])              \n\t"
-    "gssdrc1    %[g_vec0],       0x00(%[rgbbuf_ptr])              \n\t"
-    "gssdlc1    %[g_vec1],       0x0f(%[rgbbuf_ptr])              \n\t"
-    "gssdrc1    %[g_vec1],       0x08(%[rgbbuf_ptr])              \n\t"
+      "packushb   %[r_vec0],       %[b_vec0],         %[r_vec0]     \n\t"  // rrrrbbbb
+      "packushb   %[g_vec0],       %[g_vec0],         %[alpha]      \n\t"  // ffffgggg
+      "punpcklwd  %[g_vec0],       %[g_vec0],         %[alpha]      \n\t"
+      "punpcklbh  %[b_vec0],       %[r_vec0],         %[g_vec0]     \n\t"  // gbgbgbgb
+      "punpckhbh  %[r_vec0],       %[r_vec0],         %[g_vec0]     \n\t"  // frfrfrfr
+      "punpcklhw  %[g_vec0],       %[b_vec0],         %[r_vec0]     \n\t"  // frgbfrgb
+      "punpckhhw  %[g_vec1],       %[b_vec0],         %[r_vec0]     \n\t"  // frgbfrgb
+      "gssdlc1    %[g_vec0],       0x07(%[rgbbuf_ptr])              \n\t"
+      "gssdrc1    %[g_vec0],       0x00(%[rgbbuf_ptr])              \n\t"
+      "gssdlc1    %[g_vec1],       0x0f(%[rgbbuf_ptr])              \n\t"
+      "gssdrc1    %[g_vec1],       0x08(%[rgbbuf_ptr])              \n\t"
 
-    "daddiu     %[y_ptr],        %[y_ptr],          0x04          \n\t"
-    "daddiu     %[u_ptr],        %[u_ptr],          0x04          \n\t"
-    "daddiu     %[v_ptr],        %[v_ptr],          0x04          \n\t"
-    "daddiu     %[rgbbuf_ptr],   %[rgbbuf_ptr],     0x10          \n\t"
-    "daddi      %[width],        %[width],          -0x04         \n\t"
-    "bnez       %[width],        1b                               \n\t"
-    : [y]"=&f"(y),
-      [u]"=&f"(u),                         [v]"=&f"(v),
-      [b_vec0]"=&f"(b_vec[0]),             [b_vec1]"=&f"(b_vec[1]),
-      [g_vec0]"=&f"(g_vec[0]),             [g_vec1]"=&f"(g_vec[1]),
-      [r_vec0]"=&f"(r_vec[0]),             [r_vec1]"=&f"(r_vec[1]),
-      [ub]"=&f"(ub),                       [ug]"=&f"(ug),
-      [vg]"=&f"(vg),                       [vr]"=&f"(vr),
-      [bb]"=&f"(bb),                       [bg]"=&f"(bg),
-      [br]"=&f"(br),                       [yg]"=&f"(yg)
-    : [y_ptr]"r"(src_y),                   [u_ptr]"r"(src_u),
-      [v_ptr]"r"(src_v),                   [rgbbuf_ptr]"r"(rgb_buf),
-      [yuvcons_ptr]"r"(yuvconstants),      [width]"r"(width),
-      [zero]"f"(0x00),                     [alpha]"f"(-1),
-      [six]"f"(0x6),                       [five]"f"(0x55),
-      [mask]"f"(mask)
-    : "memory"
-  );
+      "daddiu     %[y_ptr],        %[y_ptr],          0x04          \n\t"
+      "daddiu     %[u_ptr],        %[u_ptr],          0x04          \n\t"
+      "daddiu     %[v_ptr],        %[v_ptr],          0x04          \n\t"
+      "daddiu     %[rgbbuf_ptr],   %[rgbbuf_ptr],     0x10          \n\t"
+      "daddi      %[width],        %[width],          -0x04         \n\t"
+      "bnez       %[width],        1b                               \n\t"
+      : [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec0] "=&f"(b_vec[0]),
+        [b_vec1] "=&f"(b_vec[1]), [g_vec0] "=&f"(g_vec[0]),
+        [g_vec1] "=&f"(g_vec[1]), [r_vec0] "=&f"(r_vec[0]),
+        [r_vec1] "=&f"(r_vec[1]), [ub] "=&f"(ub), [ug] "=&f"(ug),
+        [vg] "=&f"(vg), [vr] "=&f"(vr), [bb] "=&f"(bb), [bg] "=&f"(bg),
+        [br] "=&f"(br), [yg] "=&f"(yg)
+      : [y_ptr] "r"(src_y), [u_ptr] "r"(src_u), [v_ptr] "r"(src_v),
+        [rgbbuf_ptr] "r"(rgb_buf), [yuvcons_ptr] "r"(yuvconstants),
+        [width] "r"(width), [zero] "f"(0x00), [alpha] "f"(-1), [six] "f"(0x6),
+        [five] "f"(0x55), [mask] "f"(mask)
+      : "memory");
 }
 
 // Also used for 420
@@ -6136,99 +6133,96 @@ void I422ToARGBRow_MMI(const uint8_t* src_y,
                        uint8_t* rgb_buf,
                        const struct YuvConstants* yuvconstants,
                        int width) {
-  uint64_t y,u,v;
-  uint64_t b_vec[2],g_vec[2],r_vec[2];
+  uint64_t y, u, v;
+  uint64_t b_vec[2], g_vec[2], r_vec[2];
   uint64_t mask = 0xff00ff00ff00ff00ULL;
-  uint64_t ub,ug,vg,vr,bb,bg,br,yg;
+  uint64_t ub, ug, vg, vr, bb, bg, br, yg;
 
   __asm__ volatile(
-    "ldc1       %[yg],           0xc0(%[yuvcons_ptr])             \n\t"//yg
-    "ldc1       %[bb],           0x60(%[yuvcons_ptr])             \n\t"//bb
-    "ldc1       %[ub],           0x00(%[yuvcons_ptr])             \n\t"//ub
-    "or         %[ub],           %[ub],             %[mask]       \n\t"//must sign extension
-    "ldc1       %[bg],           0x80(%[yuvcons_ptr])             \n\t"//bg
-    "ldc1       %[ug],           0x20(%[yuvcons_ptr])             \n\t"//ug
-    "punpcklbh  %[ug],           %[ug],             %[zero]       \n\t"
-    "pshufh     %[ug],           %[ug],             %[zero]       \n\t"
-    "ldc1       %[vg],           0x20(%[yuvcons_ptr])             \n\t"//vg
-    "punpcklbh  %[vg],           %[vg],             %[zero]       \n\t"
-    "pshufh     %[vg],           %[vg],             %[five]       \n\t"
-    "ldc1       %[br],           0xa0(%[yuvcons_ptr])             \n\t"//br
-    "ldc1       %[vr],           0x40(%[yuvcons_ptr])             \n\t"//vr
-    "punpcklbh  %[vr],           %[vr],             %[zero]       \n\t"
-    "pshufh     %[vr],           %[vr],             %[five]       \n\t"
-    "or         %[vr],           %[vr],             %[mask]       \n\t"//sign extension
+      "ldc1       %[yg],           0xc0(%[yuvcons_ptr])             \n\t"  // yg
+      "ldc1       %[bb],           0x60(%[yuvcons_ptr])             \n\t"  // bb
+      "ldc1       %[ub],           0x00(%[yuvcons_ptr])             \n\t"  // ub
+      "or         %[ub],           %[ub],             %[mask]       \n\t"  // must
+                                                                           // sign
+                                                                           // extension
+      "ldc1       %[bg],           0x80(%[yuvcons_ptr])             \n\t"  // bg
+      "ldc1       %[ug],           0x20(%[yuvcons_ptr])             \n\t"  // ug
+      "punpcklbh  %[ug],           %[ug],             %[zero]       \n\t"
+      "pshufh     %[ug],           %[ug],             %[zero]       \n\t"
+      "ldc1       %[vg],           0x20(%[yuvcons_ptr])             \n\t"  // vg
+      "punpcklbh  %[vg],           %[vg],             %[zero]       \n\t"
+      "pshufh     %[vg],           %[vg],             %[five]       \n\t"
+      "ldc1       %[br],           0xa0(%[yuvcons_ptr])             \n\t"  // br
+      "ldc1       %[vr],           0x40(%[yuvcons_ptr])             \n\t"  // vr
+      "punpcklbh  %[vr],           %[vr],             %[zero]       \n\t"
+      "pshufh     %[vr],           %[vr],             %[five]       \n\t"
+      "or         %[vr],           %[vr],             %[mask]       \n\t"  // sign
+                                                                           // extension
 
-    "1:                                                           \n\t"
-    "gslwlc1    %[y],            0x03(%[y_ptr])                   \n\t"
-    "gslwrc1    %[y],            0x00(%[y_ptr])                   \n\t"
-    "gslwlc1    %[u],            0x03(%[u_ptr])                   \n\t"
-    "gslwrc1    %[u],            0x00(%[u_ptr])                   \n\t"
-    "gslwlc1    %[v],            0x03(%[v_ptr])                   \n\t"
-    "gslwrc1    %[v],            0x00(%[v_ptr])                   \n\t"
+      "1:                                                           \n\t"
+      "gslwlc1    %[y],            0x03(%[y_ptr])                   \n\t"
+      "gslwrc1    %[y],            0x00(%[y_ptr])                   \n\t"
+      "gslwlc1    %[u],            0x03(%[u_ptr])                   \n\t"
+      "gslwrc1    %[u],            0x00(%[u_ptr])                   \n\t"
+      "gslwlc1    %[v],            0x03(%[v_ptr])                   \n\t"
+      "gslwrc1    %[v],            0x00(%[v_ptr])                   \n\t"
 
-    "punpcklbh  %[y],            %[y],              %[y]          \n\t"//y*0x0101
-    "pmulhuh    %[y],            %[y],              %[yg]         \n\t"//y1
+      "punpcklbh  %[y],            %[y],              %[y]          \n\t"  // y*0x0101
+      "pmulhuh    %[y],            %[y],              %[yg]         \n\t"  // y1
 
-    //u3|u2|u1|u0 --> u1|u1|u0|u0
-    "punpcklbh  %[u],            %[u],              %[u]          \n\t"//u
-    "punpcklbh  %[u],            %[u],              %[zero]       \n\t"
-    "paddsh     %[b_vec0],       %[y],              %[bb]         \n\t"
-    "pmullh     %[b_vec1],       %[u],              %[ub]         \n\t"
-    "psubsh     %[b_vec0],       %[b_vec0],         %[b_vec1]     \n\t"
-    "psrah      %[b_vec0],       %[b_vec0],         %[six]        \n\t"
+      // u3|u2|u1|u0 --> u1|u1|u0|u0
+      "punpcklbh  %[u],            %[u],              %[u]          \n\t"  // u
+      "punpcklbh  %[u],            %[u],              %[zero]       \n\t"
+      "paddsh     %[b_vec0],       %[y],              %[bb]         \n\t"
+      "pmullh     %[b_vec1],       %[u],              %[ub]         \n\t"
+      "psubsh     %[b_vec0],       %[b_vec0],         %[b_vec1]     \n\t"
+      "psrah      %[b_vec0],       %[b_vec0],         %[six]        \n\t"
 
-    //v3|v2|v1|v0 --> v1|v1|v0|v0
-    "punpcklbh  %[v],            %[v],              %[v]          \n\t"//v
-    "punpcklbh  %[v],            %[v],              %[zero]       \n\t"
-    "paddsh     %[g_vec0],       %[y],              %[bg]         \n\t"
-    "pmullh     %[g_vec1],       %[u],              %[ug]         \n\t"//u*ug
-    "psubsh     %[g_vec0],       %[g_vec0],         %[g_vec1]     \n\t"
-    "pmullh     %[g_vec1],       %[v],              %[vg]         \n\t"//v*vg
-    "psubsh     %[g_vec0],       %[g_vec0],         %[g_vec1]     \n\t"
-    "psrah      %[g_vec0],       %[g_vec0],         %[six]        \n\t"
+      // v3|v2|v1|v0 --> v1|v1|v0|v0
+      "punpcklbh  %[v],            %[v],              %[v]          \n\t"  // v
+      "punpcklbh  %[v],            %[v],              %[zero]       \n\t"
+      "paddsh     %[g_vec0],       %[y],              %[bg]         \n\t"
+      "pmullh     %[g_vec1],       %[u],              %[ug]         \n\t"  // u*ug
+      "psubsh     %[g_vec0],       %[g_vec0],         %[g_vec1]     \n\t"
+      "pmullh     %[g_vec1],       %[v],              %[vg]         \n\t"  // v*vg
+      "psubsh     %[g_vec0],       %[g_vec0],         %[g_vec1]     \n\t"
+      "psrah      %[g_vec0],       %[g_vec0],         %[six]        \n\t"
 
-    "paddsh     %[r_vec0],       %[y],              %[br]         \n\t"
-    "pmullh     %[r_vec1],       %[v],              %[vr]         \n\t"//v*vr
-    "psubsh     %[r_vec0],       %[r_vec0],         %[r_vec1]     \n\t"
-    "psrah      %[r_vec0],       %[r_vec0],         %[six]        \n\t"
+      "paddsh     %[r_vec0],       %[y],              %[br]         \n\t"
+      "pmullh     %[r_vec1],       %[v],              %[vr]         \n\t"  // v*vr
+      "psubsh     %[r_vec0],       %[r_vec0],         %[r_vec1]     \n\t"
+      "psrah      %[r_vec0],       %[r_vec0],         %[six]        \n\t"
 
-    "packushb   %[r_vec0],       %[b_vec0],         %[r_vec0]     \n\t"//rrrrbbbb
-    "packushb   %[g_vec0],       %[g_vec0],         %[alpha]      \n\t"//ffffgggg
-    "punpcklwd  %[g_vec0],       %[g_vec0],         %[alpha]      \n\t"
-    "punpcklbh  %[b_vec0],       %[r_vec0],         %[g_vec0]     \n\t"//gbgbgbgb
-    "punpckhbh  %[r_vec0],       %[r_vec0],         %[g_vec0]     \n\t"//frfrfrfr
-    "punpcklhw  %[g_vec0],       %[b_vec0],         %[r_vec0]     \n\t"//frgbfrgb
-    "punpckhhw  %[g_vec1],       %[b_vec0],         %[r_vec0]     \n\t"//frgbfrgb
-    "gssdlc1    %[g_vec0],       0x07(%[rgbbuf_ptr])              \n\t"
-    "gssdrc1    %[g_vec0],       0x00(%[rgbbuf_ptr])              \n\t"
-    "gssdlc1    %[g_vec1],       0x0f(%[rgbbuf_ptr])              \n\t"
-    "gssdrc1    %[g_vec1],       0x08(%[rgbbuf_ptr])              \n\t"
+      "packushb   %[r_vec0],       %[b_vec0],         %[r_vec0]     \n\t"  // rrrrbbbb
+      "packushb   %[g_vec0],       %[g_vec0],         %[alpha]      \n\t"  // ffffgggg
+      "punpcklwd  %[g_vec0],       %[g_vec0],         %[alpha]      \n\t"
+      "punpcklbh  %[b_vec0],       %[r_vec0],         %[g_vec0]     \n\t"  // gbgbgbgb
+      "punpckhbh  %[r_vec0],       %[r_vec0],         %[g_vec0]     \n\t"  // frfrfrfr
+      "punpcklhw  %[g_vec0],       %[b_vec0],         %[r_vec0]     \n\t"  // frgbfrgb
+      "punpckhhw  %[g_vec1],       %[b_vec0],         %[r_vec0]     \n\t"  // frgbfrgb
+      "gssdlc1    %[g_vec0],       0x07(%[rgbbuf_ptr])              \n\t"
+      "gssdrc1    %[g_vec0],       0x00(%[rgbbuf_ptr])              \n\t"
+      "gssdlc1    %[g_vec1],       0x0f(%[rgbbuf_ptr])              \n\t"
+      "gssdrc1    %[g_vec1],       0x08(%[rgbbuf_ptr])              \n\t"
 
-    "daddiu     %[y_ptr],        %[y_ptr],          0x04          \n\t"
-    "daddiu     %[u_ptr],        %[u_ptr],          0x02          \n\t"
-    "daddiu     %[v_ptr],        %[v_ptr],          0x02          \n\t"
-    "daddiu     %[rgbbuf_ptr],   %[rgbbuf_ptr],     0x10          \n\t"
-    "daddi      %[width],        %[width],          -0x04         \n\t"
-    "bnez       %[width],        1b                               \n\t"
+      "daddiu     %[y_ptr],        %[y_ptr],          0x04          \n\t"
+      "daddiu     %[u_ptr],        %[u_ptr],          0x02          \n\t"
+      "daddiu     %[v_ptr],        %[v_ptr],          0x02          \n\t"
+      "daddiu     %[rgbbuf_ptr],   %[rgbbuf_ptr],     0x10          \n\t"
+      "daddi      %[width],        %[width],          -0x04         \n\t"
+      "bnez       %[width],        1b                               \n\t"
 
-    : [y]"=&f"(y),
-      [u]"=&f"(u),                         [v]"=&f"(v),
-      [b_vec0]"=&f"(b_vec[0]),             [b_vec1]"=&f"(b_vec[1]),
-      [g_vec0]"=&f"(g_vec[0]),             [g_vec1]"=&f"(g_vec[1]),
-      [r_vec0]"=&f"(r_vec[0]),             [r_vec1]"=&f"(r_vec[1]),
-      [ub]"=&f"(ub),                       [ug]"=&f"(ug),
-      [vg]"=&f"(vg),                       [vr]"=&f"(vr),
-      [bb]"=&f"(bb),                       [bg]"=&f"(bg),
-      [br]"=&f"(br),                       [yg]"=&f"(yg)
-    : [y_ptr]"r"(src_y),                   [u_ptr]"r"(src_u),
-      [v_ptr]"r"(src_v),                   [rgbbuf_ptr]"r"(rgb_buf),
-      [yuvcons_ptr]"r"(yuvconstants),      [width]"r"(width),
-      [zero]"f"(0x00),                     [alpha]"f"(-1),
-      [six]"f"(0x6),                       [five]"f"(0x55),
-      [mask]"f"(mask)
-    : "memory"
-  );
+      : [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec0] "=&f"(b_vec[0]),
+        [b_vec1] "=&f"(b_vec[1]), [g_vec0] "=&f"(g_vec[0]),
+        [g_vec1] "=&f"(g_vec[1]), [r_vec0] "=&f"(r_vec[0]),
+        [r_vec1] "=&f"(r_vec[1]), [ub] "=&f"(ub), [ug] "=&f"(ug),
+        [vg] "=&f"(vg), [vr] "=&f"(vr), [bb] "=&f"(bb), [bg] "=&f"(bg),
+        [br] "=&f"(br), [yg] "=&f"(yg)
+      : [y_ptr] "r"(src_y), [u_ptr] "r"(src_u), [v_ptr] "r"(src_v),
+        [rgbbuf_ptr] "r"(rgb_buf), [yuvcons_ptr] "r"(yuvconstants),
+        [width] "r"(width), [zero] "f"(0x00), [alpha] "f"(-1), [six] "f"(0x6),
+        [five] "f"(0x55), [mask] "f"(mask)
+      : "memory");
 }
 
 // 10 bit YUV to ARGB
@@ -6238,102 +6232,96 @@ void I210ToARGBRow_MMI(const uint16_t* src_y,
                        uint8_t* rgb_buf,
                        const struct YuvConstants* yuvconstants,
                        int width) {
-  uint64_t y,u,v;
-  uint64_t b_vec[2],g_vec[2],r_vec[2];
+  uint64_t y, u, v;
+  uint64_t b_vec[2], g_vec[2], r_vec[2];
   uint64_t mask = 0xff00ff00ff00ff00ULL;
-  uint64_t ub,ug,vg,vr,bb,bg,br,yg;
+  uint64_t ub, ug, vg, vr, bb, bg, br, yg;
 
   __asm__ volatile(
-    "ldc1       %[yg],           0xc0(%[yuvcons_ptr])             \n\t"
-    "ldc1       %[bb],           0x60(%[yuvcons_ptr])             \n\t"
-    "ldc1       %[ub],           0x00(%[yuvcons_ptr])             \n\t"
-    "or         %[ub],           %[ub],             %[mask]       \n\t"
-    "ldc1       %[bg],           0x80(%[yuvcons_ptr])             \n\t"
-    "ldc1       %[ug],           0x20(%[yuvcons_ptr])             \n\t"
-    "punpcklbh  %[ug],           %[ug],             %[zero]       \n\t"
-    "pshufh     %[ug],           %[ug],             %[zero]       \n\t"
-    "ldc1       %[vg],           0x20(%[yuvcons_ptr])             \n\t"
-    "punpcklbh  %[vg],           %[vg],             %[zero]       \n\t"
-    "pshufh     %[vg],           %[vg],             %[five]       \n\t"
-    "ldc1       %[br],           0xa0(%[yuvcons_ptr])             \n\t"
-    "ldc1       %[vr],           0x40(%[yuvcons_ptr])             \n\t"
-    "punpcklbh  %[vr],           %[vr],             %[zero]       \n\t"
-    "pshufh     %[vr],           %[vr],             %[five]       \n\t"
-    "or         %[vr],           %[vr],             %[mask]       \n\t"
+      "ldc1       %[yg],           0xc0(%[yuvcons_ptr])             \n\t"
+      "ldc1       %[bb],           0x60(%[yuvcons_ptr])             \n\t"
+      "ldc1       %[ub],           0x00(%[yuvcons_ptr])             \n\t"
+      "or         %[ub],           %[ub],             %[mask]       \n\t"
+      "ldc1       %[bg],           0x80(%[yuvcons_ptr])             \n\t"
+      "ldc1       %[ug],           0x20(%[yuvcons_ptr])             \n\t"
+      "punpcklbh  %[ug],           %[ug],             %[zero]       \n\t"
+      "pshufh     %[ug],           %[ug],             %[zero]       \n\t"
+      "ldc1       %[vg],           0x20(%[yuvcons_ptr])             \n\t"
+      "punpcklbh  %[vg],           %[vg],             %[zero]       \n\t"
+      "pshufh     %[vg],           %[vg],             %[five]       \n\t"
+      "ldc1       %[br],           0xa0(%[yuvcons_ptr])             \n\t"
+      "ldc1       %[vr],           0x40(%[yuvcons_ptr])             \n\t"
+      "punpcklbh  %[vr],           %[vr],             %[zero]       \n\t"
+      "pshufh     %[vr],           %[vr],             %[five]       \n\t"
+      "or         %[vr],           %[vr],             %[mask]       \n\t"
 
-    "1:                                                           \n\t"
-    "gsldlc1    %[y],            0x07(%[y_ptr])                   \n\t"
-    "gsldrc1    %[y],            0x00(%[y_ptr])                   \n\t"
-    "gslwlc1    %[u],            0x03(%[u_ptr])                   \n\t"
-    "gslwrc1    %[u],            0x00(%[u_ptr])                   \n\t"
-    "gslwlc1    %[v],            0x03(%[v_ptr])                   \n\t"
-    "gslwrc1    %[v],            0x00(%[v_ptr])                   \n\t"
+      "1:                                                           \n\t"
+      "gsldlc1    %[y],            0x07(%[y_ptr])                   \n\t"
+      "gsldrc1    %[y],            0x00(%[y_ptr])                   \n\t"
+      "gslwlc1    %[u],            0x03(%[u_ptr])                   \n\t"
+      "gslwrc1    %[u],            0x00(%[u_ptr])                   \n\t"
+      "gslwlc1    %[v],            0x03(%[v_ptr])                   \n\t"
+      "gslwrc1    %[v],            0x00(%[v_ptr])                   \n\t"
 
-    "psllh      %[y],            %[y],              %[six]        \n\t"
-    "pmulhuh    %[y],            %[y],              %[yg]         \n\t"
+      "psllh      %[y],            %[y],              %[six]        \n\t"
+      "pmulhuh    %[y],            %[y],              %[yg]         \n\t"
 
-    "punpcklhw  %[u],            %[u],              %[u]          \n\t"
-    "psrah      %[u],            %[u],              %[two]        \n\t"
-    "punpcklhw  %[v],            %[v],              %[v]          \n\t"
-    "psrah      %[v],            %[v],              %[two]        \n\t"
-    "pminsh     %[u],            %[u],              %[mask1]      \n\t"
-    "pminsh     %[v],            %[v],              %[mask1]      \n\t"
+      "punpcklhw  %[u],            %[u],              %[u]          \n\t"
+      "psrah      %[u],            %[u],              %[two]        \n\t"
+      "punpcklhw  %[v],            %[v],              %[v]          \n\t"
+      "psrah      %[v],            %[v],              %[two]        \n\t"
+      "pminsh     %[u],            %[u],              %[mask1]      \n\t"
+      "pminsh     %[v],            %[v],              %[mask1]      \n\t"
 
-    "paddsh     %[b_vec0],       %[y],              %[bb]         \n\t"
-    "pmullh     %[b_vec1],       %[u],              %[ub]         \n\t"
-    "psubsh     %[b_vec0],       %[b_vec0],         %[b_vec1]     \n\t"
+      "paddsh     %[b_vec0],       %[y],              %[bb]         \n\t"
+      "pmullh     %[b_vec1],       %[u],              %[ub]         \n\t"
+      "psubsh     %[b_vec0],       %[b_vec0],         %[b_vec1]     \n\t"
 
-    "paddsh     %[g_vec0],       %[y],              %[bg]         \n\t"
-    "pmullh     %[g_vec1],       %[u],              %[ug]         \n\t"
-    "psubsh     %[g_vec0],       %[g_vec0],         %[g_vec1]     \n\t"
-    "pmullh     %[g_vec1],       %[v],              %[vg]         \n\t"
-    "psubsh     %[g_vec0],       %[g_vec0],         %[g_vec1]     \n\t"
+      "paddsh     %[g_vec0],       %[y],              %[bg]         \n\t"
+      "pmullh     %[g_vec1],       %[u],              %[ug]         \n\t"
+      "psubsh     %[g_vec0],       %[g_vec0],         %[g_vec1]     \n\t"
+      "pmullh     %[g_vec1],       %[v],              %[vg]         \n\t"
+      "psubsh     %[g_vec0],       %[g_vec0],         %[g_vec1]     \n\t"
 
-    "paddsh     %[r_vec0],       %[y],              %[br]         \n\t"
-    "pmullh     %[r_vec1],       %[v],              %[vr]         \n\t"
-    "psubsh     %[r_vec0],       %[r_vec0],         %[r_vec1]     \n\t"
+      "paddsh     %[r_vec0],       %[y],              %[br]         \n\t"
+      "pmullh     %[r_vec1],       %[v],              %[vr]         \n\t"
+      "psubsh     %[r_vec0],       %[r_vec0],         %[r_vec1]     \n\t"
 
-    "psrah      %[b_vec0],       %[b_vec0],         %[six]        \n\t"
-    "psrah      %[g_vec0],       %[g_vec0],         %[six]        \n\t"
-    "psrah      %[r_vec0],       %[r_vec0],         %[six]        \n\t"
+      "psrah      %[b_vec0],       %[b_vec0],         %[six]        \n\t"
+      "psrah      %[g_vec0],       %[g_vec0],         %[six]        \n\t"
+      "psrah      %[r_vec0],       %[r_vec0],         %[six]        \n\t"
 
-    "packushb   %[r_vec0],       %[b_vec0],         %[r_vec0]     \n\t"
-    "packushb   %[g_vec0],       %[g_vec0],         %[alpha]      \n\t"
-    "punpcklwd  %[g_vec0],       %[g_vec0],         %[alpha]      \n\t"
-    "punpcklbh  %[b_vec0],       %[r_vec0],         %[g_vec0]     \n\t"
-    "punpckhbh  %[r_vec0],       %[r_vec0],         %[g_vec0]     \n\t"
-    "punpcklhw  %[g_vec0],       %[b_vec0],         %[r_vec0]     \n\t"
-    "punpckhhw  %[g_vec1],       %[b_vec0],         %[r_vec0]     \n\t"
-    "gssdlc1    %[g_vec0],       0x07(%[rgbbuf_ptr])              \n\t"
-    "gssdrc1    %[g_vec0],       0x00(%[rgbbuf_ptr])              \n\t"
-    "gssdlc1    %[g_vec1],       0x0f(%[rgbbuf_ptr])              \n\t"
-    "gssdrc1    %[g_vec1],       0x08(%[rgbbuf_ptr])              \n\t"
+      "packushb   %[r_vec0],       %[b_vec0],         %[r_vec0]     \n\t"
+      "packushb   %[g_vec0],       %[g_vec0],         %[alpha]      \n\t"
+      "punpcklwd  %[g_vec0],       %[g_vec0],         %[alpha]      \n\t"
+      "punpcklbh  %[b_vec0],       %[r_vec0],         %[g_vec0]     \n\t"
+      "punpckhbh  %[r_vec0],       %[r_vec0],         %[g_vec0]     \n\t"
+      "punpcklhw  %[g_vec0],       %[b_vec0],         %[r_vec0]     \n\t"
+      "punpckhhw  %[g_vec1],       %[b_vec0],         %[r_vec0]     \n\t"
+      "gssdlc1    %[g_vec0],       0x07(%[rgbbuf_ptr])              \n\t"
+      "gssdrc1    %[g_vec0],       0x00(%[rgbbuf_ptr])              \n\t"
+      "gssdlc1    %[g_vec1],       0x0f(%[rgbbuf_ptr])              \n\t"
+      "gssdrc1    %[g_vec1],       0x08(%[rgbbuf_ptr])              \n\t"
 
-    "daddiu     %[y_ptr],        %[y_ptr],          0x08          \n\t"
-    "daddiu     %[u_ptr],        %[u_ptr],          0x04          \n\t"
-    "daddiu     %[v_ptr],        %[v_ptr],          0x04          \n\t"
-    "daddiu     %[rgbbuf_ptr],   %[rgbbuf_ptr],     0x10          \n\t"
-    "daddi      %[width],        %[width],          -0x04         \n\t"
-    "bnez       %[width],        1b                               \n\t"
+      "daddiu     %[y_ptr],        %[y_ptr],          0x08          \n\t"
+      "daddiu     %[u_ptr],        %[u_ptr],          0x04          \n\t"
+      "daddiu     %[v_ptr],        %[v_ptr],          0x04          \n\t"
+      "daddiu     %[rgbbuf_ptr],   %[rgbbuf_ptr],     0x10          \n\t"
+      "daddi      %[width],        %[width],          -0x04         \n\t"
+      "bnez       %[width],        1b                               \n\t"
 
-    : [y]"=&f"(y),
-      [u]"=&f"(u),                         [v]"=&f"(v),
-      [b_vec0]"=&f"(b_vec[0]),             [b_vec1]"=&f"(b_vec[1]),
-      [g_vec0]"=&f"(g_vec[0]),             [g_vec1]"=&f"(g_vec[1]),
-      [r_vec0]"=&f"(r_vec[0]),             [r_vec1]"=&f"(r_vec[1]),
-      [ub]"=&f"(ub),                       [ug]"=&f"(ug),
-      [vg]"=&f"(vg),                       [vr]"=&f"(vr),
-      [bb]"=&f"(bb),                       [bg]"=&f"(bg),
-      [br]"=&f"(br),                       [yg]"=&f"(yg)
-    : [y_ptr]"r"(src_y),                   [u_ptr]"r"(src_u),
-      [v_ptr]"r"(src_v),                   [rgbbuf_ptr]"r"(rgb_buf),
-      [yuvcons_ptr]"r"(yuvconstants),      [width]"r"(width),
-      [zero]"f"(0x00),                     [alpha]"f"(-1),
-      [six]"f"(0x6),                       [five]"f"(0x55),
-      [mask]"f"(mask),                     [two]"f"(0x02),
-      [mask1]"f"(0x00ff00ff00ff00ff)
-    : "memory"
-  );
+      : [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec0] "=&f"(b_vec[0]),
+        [b_vec1] "=&f"(b_vec[1]), [g_vec0] "=&f"(g_vec[0]),
+        [g_vec1] "=&f"(g_vec[1]), [r_vec0] "=&f"(r_vec[0]),
+        [r_vec1] "=&f"(r_vec[1]), [ub] "=&f"(ub), [ug] "=&f"(ug),
+        [vg] "=&f"(vg), [vr] "=&f"(vr), [bb] "=&f"(bb), [bg] "=&f"(bg),
+        [br] "=&f"(br), [yg] "=&f"(yg)
+      : [y_ptr] "r"(src_y), [u_ptr] "r"(src_u), [v_ptr] "r"(src_v),
+        [rgbbuf_ptr] "r"(rgb_buf), [yuvcons_ptr] "r"(yuvconstants),
+        [width] "r"(width), [zero] "f"(0x00), [alpha] "f"(-1), [six] "f"(0x6),
+        [five] "f"(0x55), [mask] "f"(mask), [two] "f"(0x02),
+        [mask1] "f"(0x00ff00ff00ff00ff)
+      : "memory");
 }
 
 void I422AlphaToARGBRow_MMI(const uint8_t* src_y,
@@ -6343,102 +6331,96 @@ void I422AlphaToARGBRow_MMI(const uint8_t* src_y,
                             uint8_t* rgb_buf,
                             const struct YuvConstants* yuvconstants,
                             int width) {
-  uint64_t y,u,v,a;
-  uint64_t b_vec[2],g_vec[2],r_vec[2];
+  uint64_t y, u, v, a;
+  uint64_t b_vec[2], g_vec[2], r_vec[2];
   uint64_t mask = 0xff00ff00ff00ff00ULL;
-  uint64_t ub,ug,vg,vr,bb,bg,br,yg;
+  uint64_t ub, ug, vg, vr, bb, bg, br, yg;
 
   __asm__ volatile(
-    "ldc1       %[yg],           0xc0(%[yuvcons_ptr])             \n\t"
-    "ldc1       %[bb],           0x60(%[yuvcons_ptr])             \n\t"
-    "ldc1       %[ub],           0x00(%[yuvcons_ptr])             \n\t"
-    "or         %[ub],           %[ub],             %[mask]       \n\t"
-    "ldc1       %[bg],           0x80(%[yuvcons_ptr])             \n\t"
-    "ldc1       %[ug],           0x20(%[yuvcons_ptr])             \n\t"
-    "punpcklbh  %[ug],           %[ug],             %[zero]       \n\t"
-    "pshufh     %[ug],           %[ug],             %[zero]       \n\t"
-    "ldc1       %[vg],           0x20(%[yuvcons_ptr])             \n\t"
-    "punpcklbh  %[vg],           %[vg],             %[zero]       \n\t"
-    "pshufh     %[vg],           %[vg],             %[five]       \n\t"
-    "ldc1       %[br],           0xa0(%[yuvcons_ptr])             \n\t"
-    "ldc1       %[vr],           0x40(%[yuvcons_ptr])             \n\t"
-    "punpcklbh  %[vr],           %[vr],             %[zero]       \n\t"
-    "pshufh     %[vr],           %[vr],             %[five]       \n\t"
-    "or         %[vr],           %[vr],             %[mask]       \n\t"
+      "ldc1       %[yg],           0xc0(%[yuvcons_ptr])             \n\t"
+      "ldc1       %[bb],           0x60(%[yuvcons_ptr])             \n\t"
+      "ldc1       %[ub],           0x00(%[yuvcons_ptr])             \n\t"
+      "or         %[ub],           %[ub],             %[mask]       \n\t"
+      "ldc1       %[bg],           0x80(%[yuvcons_ptr])             \n\t"
+      "ldc1       %[ug],           0x20(%[yuvcons_ptr])             \n\t"
+      "punpcklbh  %[ug],           %[ug],             %[zero]       \n\t"
+      "pshufh     %[ug],           %[ug],             %[zero]       \n\t"
+      "ldc1       %[vg],           0x20(%[yuvcons_ptr])             \n\t"
+      "punpcklbh  %[vg],           %[vg],             %[zero]       \n\t"
+      "pshufh     %[vg],           %[vg],             %[five]       \n\t"
+      "ldc1       %[br],           0xa0(%[yuvcons_ptr])             \n\t"
+      "ldc1       %[vr],           0x40(%[yuvcons_ptr])             \n\t"
+      "punpcklbh  %[vr],           %[vr],             %[zero]       \n\t"
+      "pshufh     %[vr],           %[vr],             %[five]       \n\t"
+      "or         %[vr],           %[vr],             %[mask]       \n\t"
 
-    "1:                                                           \n\t"
-    "gslwlc1    %[y],            0x03(%[y_ptr])                   \n\t"
-    "gslwrc1    %[y],            0x00(%[y_ptr])                   \n\t"
-    "gslwlc1    %[u],            0x03(%[u_ptr])                   \n\t"
-    "gslwrc1    %[u],            0x00(%[u_ptr])                   \n\t"
-    "gslwlc1    %[v],            0x03(%[v_ptr])                   \n\t"
-    "gslwrc1    %[v],            0x00(%[v_ptr])                   \n\t"
-    "gslwlc1    %[a],            0x03(%[a_ptr])                   \n\t"
-    "gslwrc1    %[a],            0x00(%[a_ptr])                   \n\t"
+      "1:                                                           \n\t"
+      "gslwlc1    %[y],            0x03(%[y_ptr])                   \n\t"
+      "gslwrc1    %[y],            0x00(%[y_ptr])                   \n\t"
+      "gslwlc1    %[u],            0x03(%[u_ptr])                   \n\t"
+      "gslwrc1    %[u],            0x00(%[u_ptr])                   \n\t"
+      "gslwlc1    %[v],            0x03(%[v_ptr])                   \n\t"
+      "gslwrc1    %[v],            0x00(%[v_ptr])                   \n\t"
+      "gslwlc1    %[a],            0x03(%[a_ptr])                   \n\t"
+      "gslwrc1    %[a],            0x00(%[a_ptr])                   \n\t"
 
-    "punpcklbh  %[y],            %[y],              %[y]          \n\t"//y*0x0101
-    "pmulhuh    %[y],            %[y],              %[yg]         \n\t"//y1
+      "punpcklbh  %[y],            %[y],              %[y]          \n\t"  // y*0x0101
+      "pmulhuh    %[y],            %[y],              %[yg]         \n\t"  // y1
 
-    //u3|u2|u1|u0 --> u1|u1|u0|u0
-    "punpcklbh  %[u],            %[u],              %[u]          \n\t"//u
-    "punpcklbh  %[u],            %[u],              %[zero]       \n\t"
-    "paddsh     %[b_vec0],       %[y],              %[bb]         \n\t"
-    "pmullh     %[b_vec1],       %[u],              %[ub]         \n\t"
-    "psubsh     %[b_vec0],       %[b_vec0],         %[b_vec1]     \n\t"
-    "psrah      %[b_vec0],       %[b_vec0],         %[six]        \n\t"
+      // u3|u2|u1|u0 --> u1|u1|u0|u0
+      "punpcklbh  %[u],            %[u],              %[u]          \n\t"  // u
+      "punpcklbh  %[u],            %[u],              %[zero]       \n\t"
+      "paddsh     %[b_vec0],       %[y],              %[bb]         \n\t"
+      "pmullh     %[b_vec1],       %[u],              %[ub]         \n\t"
+      "psubsh     %[b_vec0],       %[b_vec0],         %[b_vec1]     \n\t"
+      "psrah      %[b_vec0],       %[b_vec0],         %[six]        \n\t"
 
-    //v3|v2|v1|v0 --> v1|v1|v0|v0
-    "punpcklbh  %[v],            %[v],              %[v]          \n\t"
-    "punpcklbh  %[v],            %[v],              %[zero]       \n\t"
-    "paddsh     %[g_vec0],       %[y],              %[bg]         \n\t"
-    "pmullh     %[g_vec1],       %[u],              %[ug]         \n\t"
-    "psubsh     %[g_vec0],       %[g_vec0],         %[g_vec1]     \n\t"
-    "pmullh     %[g_vec1],       %[v],              %[vg]         \n\t"
-    "psubsh     %[g_vec0],       %[g_vec0],         %[g_vec1]     \n\t"
-    "psrah      %[g_vec0],       %[g_vec0],         %[six]        \n\t"
+      // v3|v2|v1|v0 --> v1|v1|v0|v0
+      "punpcklbh  %[v],            %[v],              %[v]          \n\t"
+      "punpcklbh  %[v],            %[v],              %[zero]       \n\t"
+      "paddsh     %[g_vec0],       %[y],              %[bg]         \n\t"
+      "pmullh     %[g_vec1],       %[u],              %[ug]         \n\t"
+      "psubsh     %[g_vec0],       %[g_vec0],         %[g_vec1]     \n\t"
+      "pmullh     %[g_vec1],       %[v],              %[vg]         \n\t"
+      "psubsh     %[g_vec0],       %[g_vec0],         %[g_vec1]     \n\t"
+      "psrah      %[g_vec0],       %[g_vec0],         %[six]        \n\t"
 
-    "paddsh     %[r_vec0],       %[y],              %[br]         \n\t"
-    "pmullh     %[r_vec1],       %[v],              %[vr]         \n\t"
-    "psubsh     %[r_vec0],       %[r_vec0],         %[r_vec1]     \n\t"
-    "psrah      %[r_vec0],       %[r_vec0],         %[six]        \n\t"
+      "paddsh     %[r_vec0],       %[y],              %[br]         \n\t"
+      "pmullh     %[r_vec1],       %[v],              %[vr]         \n\t"
+      "psubsh     %[r_vec0],       %[r_vec0],         %[r_vec1]     \n\t"
+      "psrah      %[r_vec0],       %[r_vec0],         %[six]        \n\t"
 
-    "packushb   %[r_vec0],       %[b_vec0],         %[r_vec0]     \n\t"//rrrrbbbb
-    "packushb   %[g_vec0],       %[g_vec0],         %[a]          \n\t"
-    "punpcklwd  %[g_vec0],       %[g_vec0],         %[a]          \n\t"//aaaagggg
-    "punpcklbh  %[b_vec0],       %[r_vec0],         %[g_vec0]     \n\t"
-    "punpckhbh  %[r_vec0],       %[r_vec0],         %[g_vec0]     \n\t"
-    "punpcklhw  %[g_vec0],       %[b_vec0],         %[r_vec0]     \n\t"
-    "punpckhhw  %[g_vec1],       %[b_vec0],         %[r_vec0]     \n\t"
-    "gssdlc1    %[g_vec0],       0x07(%[rgbbuf_ptr])              \n\t"
-    "gssdrc1    %[g_vec0],       0x00(%[rgbbuf_ptr])              \n\t"
-    "gssdlc1    %[g_vec1],       0x0f(%[rgbbuf_ptr])              \n\t"
-    "gssdrc1    %[g_vec1],       0x08(%[rgbbuf_ptr])              \n\t"
+      "packushb   %[r_vec0],       %[b_vec0],         %[r_vec0]     \n\t"  // rrrrbbbb
+      "packushb   %[g_vec0],       %[g_vec0],         %[a]          \n\t"
+      "punpcklwd  %[g_vec0],       %[g_vec0],         %[a]          \n\t"  // aaaagggg
+      "punpcklbh  %[b_vec0],       %[r_vec0],         %[g_vec0]     \n\t"
+      "punpckhbh  %[r_vec0],       %[r_vec0],         %[g_vec0]     \n\t"
+      "punpcklhw  %[g_vec0],       %[b_vec0],         %[r_vec0]     \n\t"
+      "punpckhhw  %[g_vec1],       %[b_vec0],         %[r_vec0]     \n\t"
+      "gssdlc1    %[g_vec0],       0x07(%[rgbbuf_ptr])              \n\t"
+      "gssdrc1    %[g_vec0],       0x00(%[rgbbuf_ptr])              \n\t"
+      "gssdlc1    %[g_vec1],       0x0f(%[rgbbuf_ptr])              \n\t"
+      "gssdrc1    %[g_vec1],       0x08(%[rgbbuf_ptr])              \n\t"
 
-    "daddiu     %[y_ptr],        %[y_ptr],          0x04          \n\t"
-    "daddiu     %[a_ptr],        %[a_ptr],          0x04          \n\t"
-    "daddiu     %[u_ptr],        %[u_ptr],          0x02          \n\t"
-    "daddiu     %[v_ptr],        %[v_ptr],          0x02          \n\t"
-    "daddiu     %[rgbbuf_ptr],   %[rgbbuf_ptr],     0x10          \n\t"
-    "daddi      %[width],        %[width],          -0x04         \n\t"
-    "bnez       %[width],        1b                               \n\t"
+      "daddiu     %[y_ptr],        %[y_ptr],          0x04          \n\t"
+      "daddiu     %[a_ptr],        %[a_ptr],          0x04          \n\t"
+      "daddiu     %[u_ptr],        %[u_ptr],          0x02          \n\t"
+      "daddiu     %[v_ptr],        %[v_ptr],          0x02          \n\t"
+      "daddiu     %[rgbbuf_ptr],   %[rgbbuf_ptr],     0x10          \n\t"
+      "daddi      %[width],        %[width],          -0x04         \n\t"
+      "bnez       %[width],        1b                               \n\t"
 
-    : [y]"=&f"(y),                         [u]"=&f"(u),
-      [v]"=&f"(v),                         [a]"=&f"(a),
-      [b_vec0]"=&f"(b_vec[0]),             [b_vec1]"=&f"(b_vec[1]),
-      [g_vec0]"=&f"(g_vec[0]),             [g_vec1]"=&f"(g_vec[1]),
-      [r_vec0]"=&f"(r_vec[0]),             [r_vec1]"=&f"(r_vec[1]),
-      [ub]"=&f"(ub),                       [ug]"=&f"(ug),
-      [vg]"=&f"(vg),                       [vr]"=&f"(vr),
-      [bb]"=&f"(bb),                       [bg]"=&f"(bg),
-      [br]"=&f"(br),                       [yg]"=&f"(yg)
-    : [y_ptr]"r"(src_y),                   [u_ptr]"r"(src_u),
-      [v_ptr]"r"(src_v),                   [rgbbuf_ptr]"r"(rgb_buf),
-      [yuvcons_ptr]"r"(yuvconstants),      [width]"r"(width),
-      [a_ptr]"r"(src_a),                   [zero]"f"(0x00),
-      [six]"f"(0x6),                       [five]"f"(0x55),
-      [mask]"f"(mask)
-    : "memory"
-  );
+      : [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [a] "=&f"(a),
+        [b_vec0] "=&f"(b_vec[0]), [b_vec1] "=&f"(b_vec[1]),
+        [g_vec0] "=&f"(g_vec[0]), [g_vec1] "=&f"(g_vec[1]),
+        [r_vec0] "=&f"(r_vec[0]), [r_vec1] "=&f"(r_vec[1]), [ub] "=&f"(ub),
+        [ug] "=&f"(ug), [vg] "=&f"(vg), [vr] "=&f"(vr), [bb] "=&f"(bb),
+        [bg] "=&f"(bg), [br] "=&f"(br), [yg] "=&f"(yg)
+      : [y_ptr] "r"(src_y), [u_ptr] "r"(src_u), [v_ptr] "r"(src_v),
+        [rgbbuf_ptr] "r"(rgb_buf), [yuvcons_ptr] "r"(yuvconstants),
+        [width] "r"(width), [a_ptr] "r"(src_a), [zero] "f"(0x00),
+        [six] "f"(0x6), [five] "f"(0x55), [mask] "f"(mask)
+      : "memory");
 }
 
 void I422ToRGB24Row_MMI(const uint8_t* src_y,
@@ -6447,113 +6429,105 @@ void I422ToRGB24Row_MMI(const uint8_t* src_y,
                         uint8_t* rgb_buf,
                         const struct YuvConstants* yuvconstants,
                         int width) {
-  uint64_t y,u,v;
-  uint64_t b_vec[2],g_vec[2],r_vec[2];
+  uint64_t y, u, v;
+  uint64_t b_vec[2], g_vec[2], r_vec[2];
   uint64_t mask = 0xff00ff00ff00ff00ULL;
-  uint64_t ub,ug,vg,vr,bb,bg,br,yg;
+  uint64_t ub, ug, vg, vr, bb, bg, br, yg;
 
   __asm__ volatile(
-    "ldc1       %[yg],           0xc0(%[yuvcons_ptr])             \n\t"
-    "ldc1       %[bb],           0x60(%[yuvcons_ptr])             \n\t"
-    "ldc1       %[ub],           0x00(%[yuvcons_ptr])             \n\t"
-    "or         %[ub],           %[ub],             %[mask]       \n\t"
-    "ldc1       %[bg],           0x80(%[yuvcons_ptr])             \n\t"
-    "ldc1       %[ug],           0x20(%[yuvcons_ptr])             \n\t"
-    "punpcklbh  %[ug],           %[ug],             %[zero]       \n\t"
-    "pshufh     %[ug],           %[ug],             %[zero]       \n\t"
-    "ldc1       %[vg],           0x20(%[yuvcons_ptr])             \n\t"
-    "punpcklbh  %[vg],           %[vg],             %[zero]       \n\t"
-    "pshufh     %[vg],           %[vg],             %[five]       \n\t"
-    "ldc1       %[br],           0xa0(%[yuvcons_ptr])             \n\t"
-    "ldc1       %[vr],           0x40(%[yuvcons_ptr])             \n\t"
-    "punpcklbh  %[vr],           %[vr],             %[zero]       \n\t"
-    "pshufh     %[vr],           %[vr],             %[five]       \n\t"
-    "or         %[vr],           %[vr],             %[mask]       \n\t"
+      "ldc1       %[yg],           0xc0(%[yuvcons_ptr])             \n\t"
+      "ldc1       %[bb],           0x60(%[yuvcons_ptr])             \n\t"
+      "ldc1       %[ub],           0x00(%[yuvcons_ptr])             \n\t"
+      "or         %[ub],           %[ub],             %[mask]       \n\t"
+      "ldc1       %[bg],           0x80(%[yuvcons_ptr])             \n\t"
+      "ldc1       %[ug],           0x20(%[yuvcons_ptr])             \n\t"
+      "punpcklbh  %[ug],           %[ug],             %[zero]       \n\t"
+      "pshufh     %[ug],           %[ug],             %[zero]       \n\t"
+      "ldc1       %[vg],           0x20(%[yuvcons_ptr])             \n\t"
+      "punpcklbh  %[vg],           %[vg],             %[zero]       \n\t"
+      "pshufh     %[vg],           %[vg],             %[five]       \n\t"
+      "ldc1       %[br],           0xa0(%[yuvcons_ptr])             \n\t"
+      "ldc1       %[vr],           0x40(%[yuvcons_ptr])             \n\t"
+      "punpcklbh  %[vr],           %[vr],             %[zero]       \n\t"
+      "pshufh     %[vr],           %[vr],             %[five]       \n\t"
+      "or         %[vr],           %[vr],             %[mask]       \n\t"
 
-    "1:                                                           \n\t"
-    "gslwlc1    %[y],            0x03(%[y_ptr])                   \n\t"
-    "gslwrc1    %[y],            0x00(%[y_ptr])                   \n\t"
-    "gslwlc1    %[u],            0x03(%[u_ptr])                   \n\t"
-    "gslwrc1    %[u],            0x00(%[u_ptr])                   \n\t"
-    "gslwlc1    %[v],            0x03(%[v_ptr])                   \n\t"
-    "gslwrc1    %[v],            0x00(%[v_ptr])                   \n\t"
+      "1:                                                           \n\t"
+      "gslwlc1    %[y],            0x03(%[y_ptr])                   \n\t"
+      "gslwrc1    %[y],            0x00(%[y_ptr])                   \n\t"
+      "gslwlc1    %[u],            0x03(%[u_ptr])                   \n\t"
+      "gslwrc1    %[u],            0x00(%[u_ptr])                   \n\t"
+      "gslwlc1    %[v],            0x03(%[v_ptr])                   \n\t"
+      "gslwrc1    %[v],            0x00(%[v_ptr])                   \n\t"
 
-    "punpcklbh  %[y],            %[y],              %[y]          \n\t"//y*0x0101
-    "pmulhuh    %[y],            %[y],              %[yg]         \n\t"//y1
+      "punpcklbh  %[y],            %[y],              %[y]          \n\t"  // y*0x0101
+      "pmulhuh    %[y],            %[y],              %[yg]         \n\t"  // y1
 
-    //u3|u2|u1|u0 --> u1|u1|u0|u0
-    "punpcklbh  %[u],            %[u],              %[u]          \n\t"//u
-    "punpcklbh  %[u],            %[u],              %[zero]       \n\t"
-    "paddsh     %[b_vec0],       %[y],              %[bb]         \n\t"
-    "pmullh     %[b_vec1],       %[u],              %[ub]         \n\t"
-    "psubsh     %[b_vec0],       %[b_vec0],         %[b_vec1]     \n\t"
-    "psrah      %[b_vec0],       %[b_vec0],         %[six]        \n\t"
+      // u3|u2|u1|u0 --> u1|u1|u0|u0
+      "punpcklbh  %[u],            %[u],              %[u]          \n\t"  // u
+      "punpcklbh  %[u],            %[u],              %[zero]       \n\t"
+      "paddsh     %[b_vec0],       %[y],              %[bb]         \n\t"
+      "pmullh     %[b_vec1],       %[u],              %[ub]         \n\t"
+      "psubsh     %[b_vec0],       %[b_vec0],         %[b_vec1]     \n\t"
+      "psrah      %[b_vec0],       %[b_vec0],         %[six]        \n\t"
 
-    //v3|v2|v1|v0 --> v1|v1|v0|v0
-    "punpcklbh  %[v],            %[v],              %[v]          \n\t"
-    "punpcklbh  %[v],            %[v],              %[zero]       \n\t"
-    "paddsh     %[g_vec0],       %[y],              %[bg]         \n\t"
-    "pmullh     %[g_vec1],       %[u],              %[ug]         \n\t"
-    "psubsh     %[g_vec0],       %[g_vec0],         %[g_vec1]     \n\t"
-    "pmullh     %[g_vec1],       %[v],              %[vg]         \n\t"
-    "psubsh     %[g_vec0],       %[g_vec0],         %[g_vec1]     \n\t"
-    "psrah      %[g_vec0],       %[g_vec0],         %[six]        \n\t"
+      // v3|v2|v1|v0 --> v1|v1|v0|v0
+      "punpcklbh  %[v],            %[v],              %[v]          \n\t"
+      "punpcklbh  %[v],            %[v],              %[zero]       \n\t"
+      "paddsh     %[g_vec0],       %[y],              %[bg]         \n\t"
+      "pmullh     %[g_vec1],       %[u],              %[ug]         \n\t"
+      "psubsh     %[g_vec0],       %[g_vec0],         %[g_vec1]     \n\t"
+      "pmullh     %[g_vec1],       %[v],              %[vg]         \n\t"
+      "psubsh     %[g_vec0],       %[g_vec0],         %[g_vec1]     \n\t"
+      "psrah      %[g_vec0],       %[g_vec0],         %[six]        \n\t"
 
-    "paddsh     %[r_vec0],       %[y],              %[br]         \n\t"
-    "pmullh     %[r_vec1],       %[v],              %[vr]         \n\t"
-    "psubsh     %[r_vec0],       %[r_vec0],         %[r_vec1]     \n\t"
-    "psrah      %[r_vec0],       %[r_vec0],         %[six]        \n\t"
+      "paddsh     %[r_vec0],       %[y],              %[br]         \n\t"
+      "pmullh     %[r_vec1],       %[v],              %[vr]         \n\t"
+      "psubsh     %[r_vec0],       %[r_vec0],         %[r_vec1]     \n\t"
+      "psrah      %[r_vec0],       %[r_vec0],         %[six]        \n\t"
 
-    "packushb   %[r_vec0],       %[b_vec0],         %[r_vec0]     \n\t"
-    "packushb   %[g_vec0],       %[g_vec0],         %[zero]       \n\t"
-    "punpcklbh  %[b_vec0],       %[r_vec0],         %[g_vec0]     \n\t"
-    "punpckhbh  %[r_vec0],       %[r_vec0],         %[g_vec0]     \n\t"
-    "punpcklhw  %[g_vec0],       %[b_vec0],         %[r_vec0]     \n\t"
-    "punpckhhw  %[g_vec1],       %[b_vec0],         %[r_vec0]     \n\t"
+      "packushb   %[r_vec0],       %[b_vec0],         %[r_vec0]     \n\t"
+      "packushb   %[g_vec0],       %[g_vec0],         %[zero]       \n\t"
+      "punpcklbh  %[b_vec0],       %[r_vec0],         %[g_vec0]     \n\t"
+      "punpckhbh  %[r_vec0],       %[r_vec0],         %[g_vec0]     \n\t"
+      "punpcklhw  %[g_vec0],       %[b_vec0],         %[r_vec0]     \n\t"
+      "punpckhhw  %[g_vec1],       %[b_vec0],         %[r_vec0]     \n\t"
 
-    "punpckhwd  %[r_vec0],       %[g_vec0],         %[g_vec0]     \n\t"
-    "psllw      %[r_vec1],       %[r_vec0],         %[lmove1]     \n\t"
-    "or         %[g_vec0],       %[g_vec0],         %[r_vec1]     \n\t"
-    "psrlw      %[r_vec1],       %[r_vec0],         %[rmove1]     \n\t"
-    "pextrh     %[r_vec1],       %[r_vec1],         %[zero]       \n\t"
-    "pinsrh_2   %[g_vec0],       %[g_vec0],         %[r_vec1]     \n\t"
-    "pextrh     %[r_vec1],       %[g_vec1],         %[zero]       \n\t"
-    "pinsrh_3   %[g_vec0],       %[g_vec0],         %[r_vec1]     \n\t"
-    "pextrh     %[r_vec1],       %[g_vec1],         %[one]        \n\t"
-    "punpckhwd  %[g_vec1],       %[g_vec1],         %[g_vec1]     \n\t"
-    "psllw      %[g_vec1],       %[g_vec1],         %[rmove1]     \n\t"
-    "or         %[g_vec1],       %[g_vec1],         %[r_vec1]     \n\t"
-    "gssdlc1    %[g_vec0],       0x07(%[rgbbuf_ptr])              \n\t"
-    "gssdrc1    %[g_vec0],       0x00(%[rgbbuf_ptr])              \n\t"
-    "gsswlc1    %[g_vec1],       0x0b(%[rgbbuf_ptr])              \n\t"
-    "gsswrc1    %[g_vec1],       0x08(%[rgbbuf_ptr])              \n\t"
+      "punpckhwd  %[r_vec0],       %[g_vec0],         %[g_vec0]     \n\t"
+      "psllw      %[r_vec1],       %[r_vec0],         %[lmove1]     \n\t"
+      "or         %[g_vec0],       %[g_vec0],         %[r_vec1]     \n\t"
+      "psrlw      %[r_vec1],       %[r_vec0],         %[rmove1]     \n\t"
+      "pextrh     %[r_vec1],       %[r_vec1],         %[zero]       \n\t"
+      "pinsrh_2   %[g_vec0],       %[g_vec0],         %[r_vec1]     \n\t"
+      "pextrh     %[r_vec1],       %[g_vec1],         %[zero]       \n\t"
+      "pinsrh_3   %[g_vec0],       %[g_vec0],         %[r_vec1]     \n\t"
+      "pextrh     %[r_vec1],       %[g_vec1],         %[one]        \n\t"
+      "punpckhwd  %[g_vec1],       %[g_vec1],         %[g_vec1]     \n\t"
+      "psllw      %[g_vec1],       %[g_vec1],         %[rmove1]     \n\t"
+      "or         %[g_vec1],       %[g_vec1],         %[r_vec1]     \n\t"
+      "gssdlc1    %[g_vec0],       0x07(%[rgbbuf_ptr])              \n\t"
+      "gssdrc1    %[g_vec0],       0x00(%[rgbbuf_ptr])              \n\t"
+      "gsswlc1    %[g_vec1],       0x0b(%[rgbbuf_ptr])              \n\t"
+      "gsswrc1    %[g_vec1],       0x08(%[rgbbuf_ptr])              \n\t"
 
+      "daddiu     %[y_ptr],        %[y_ptr],          0x04          \n\t"
+      "daddiu     %[u_ptr],        %[u_ptr],          0x02          \n\t"
+      "daddiu     %[v_ptr],        %[v_ptr],          0x02          \n\t"
+      "daddiu     %[rgbbuf_ptr],   %[rgbbuf_ptr],     0x0c          \n\t"
+      "daddi      %[width],        %[width],          -0x04         \n\t"
+      "bnez       %[width],        1b                               \n\t"
 
-    "daddiu     %[y_ptr],        %[y_ptr],          0x04          \n\t"
-    "daddiu     %[u_ptr],        %[u_ptr],          0x02          \n\t"
-    "daddiu     %[v_ptr],        %[v_ptr],          0x02          \n\t"
-    "daddiu     %[rgbbuf_ptr],   %[rgbbuf_ptr],     0x0c          \n\t"
-    "daddi      %[width],        %[width],          -0x04         \n\t"
-    "bnez       %[width],        1b                               \n\t"
-
-    : [y]"=&f"(y),                         [u]"=&f"(u),
-      [v]"=&f"(v),
-      [b_vec0]"=&f"(b_vec[0]),             [b_vec1]"=&f"(b_vec[1]),
-      [g_vec0]"=&f"(g_vec[0]),             [g_vec1]"=&f"(g_vec[1]),
-      [r_vec0]"=&f"(r_vec[0]),             [r_vec1]"=&f"(r_vec[1]),
-      [ub]"=&f"(ub),                       [ug]"=&f"(ug),
-      [vg]"=&f"(vg),                       [vr]"=&f"(vr),
-      [bb]"=&f"(bb),                       [bg]"=&f"(bg),
-      [br]"=&f"(br),                       [yg]"=&f"(yg)
-    : [y_ptr]"r"(src_y),                   [u_ptr]"r"(src_u),
-      [v_ptr]"r"(src_v),                   [rgbbuf_ptr]"r"(rgb_buf),
-      [yuvcons_ptr]"r"(yuvconstants),      [width]"r"(width),
-      [zero]"f"(0x00),                     [five]"f"(0x55),
-      [six]"f"(0x6),                       [mask]"f"(mask),
-      [lmove1]"f"(0x18),                   [rmove1]"f"(0x8),
-      [one]"f"(0x1)
-    : "memory"
-  );
+      : [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec0] "=&f"(b_vec[0]),
+        [b_vec1] "=&f"(b_vec[1]), [g_vec0] "=&f"(g_vec[0]),
+        [g_vec1] "=&f"(g_vec[1]), [r_vec0] "=&f"(r_vec[0]),
+        [r_vec1] "=&f"(r_vec[1]), [ub] "=&f"(ub), [ug] "=&f"(ug),
+        [vg] "=&f"(vg), [vr] "=&f"(vr), [bb] "=&f"(bb), [bg] "=&f"(bg),
+        [br] "=&f"(br), [yg] "=&f"(yg)
+      : [y_ptr] "r"(src_y), [u_ptr] "r"(src_u), [v_ptr] "r"(src_v),
+        [rgbbuf_ptr] "r"(rgb_buf), [yuvcons_ptr] "r"(yuvconstants),
+        [width] "r"(width), [zero] "f"(0x00), [five] "f"(0x55), [six] "f"(0x6),
+        [mask] "f"(mask), [lmove1] "f"(0x18), [rmove1] "f"(0x8), [one] "f"(0x1)
+      : "memory");
 }
 
 void I422ToARGB4444Row_MMI(const uint8_t* src_y,
@@ -6564,110 +6538,103 @@ void I422ToARGB4444Row_MMI(const uint8_t* src_y,
                            int width) {
   uint64_t y, u, v;
   uint64_t b_vec, g_vec, r_vec, temp;
-  uint64_t ub,ug,vg,vr,bb,bg,br,yg;
+  uint64_t ub, ug, vg, vr, bb, bg, br, yg;
 
   __asm__ volatile(
-    "ldc1       %[yg],           0xc0(%[yuvcons_ptr])             \n\t"
-    "ldc1       %[bb],           0x60(%[yuvcons_ptr])             \n\t"
-    "ldc1       %[ub],           0x00(%[yuvcons_ptr])             \n\t"
-    "or         %[ub],           %[ub],             %[mask]       \n\t"
-    "ldc1       %[bg],           0x80(%[yuvcons_ptr])             \n\t"
-    "ldc1       %[ug],           0x20(%[yuvcons_ptr])             \n\t"
-    "punpcklbh  %[ug],           %[ug],             %[zero]       \n\t"
-    "pshufh     %[ug],           %[ug],             %[zero]       \n\t"
-    "ldc1       %[vg],           0x20(%[yuvcons_ptr])             \n\t"
-    "punpcklbh  %[vg],           %[vg],             %[zero]       \n\t"
-    "pshufh     %[vg],           %[vg],             %[five]       \n\t"
-    "ldc1       %[br],           0xa0(%[yuvcons_ptr])             \n\t"
-    "ldc1       %[vr],           0x40(%[yuvcons_ptr])             \n\t"
-    "punpcklbh  %[vr],           %[vr],             %[zero]       \n\t"
-    "pshufh     %[vr],           %[vr],             %[five]       \n\t"
-    "or         %[vr],           %[vr],             %[mask]       \n\t"
+      "ldc1       %[yg],           0xc0(%[yuvcons_ptr])             \n\t"
+      "ldc1       %[bb],           0x60(%[yuvcons_ptr])             \n\t"
+      "ldc1       %[ub],           0x00(%[yuvcons_ptr])             \n\t"
+      "or         %[ub],           %[ub],             %[mask]       \n\t"
+      "ldc1       %[bg],           0x80(%[yuvcons_ptr])             \n\t"
+      "ldc1       %[ug],           0x20(%[yuvcons_ptr])             \n\t"
+      "punpcklbh  %[ug],           %[ug],             %[zero]       \n\t"
+      "pshufh     %[ug],           %[ug],             %[zero]       \n\t"
+      "ldc1       %[vg],           0x20(%[yuvcons_ptr])             \n\t"
+      "punpcklbh  %[vg],           %[vg],             %[zero]       \n\t"
+      "pshufh     %[vg],           %[vg],             %[five]       \n\t"
+      "ldc1       %[br],           0xa0(%[yuvcons_ptr])             \n\t"
+      "ldc1       %[vr],           0x40(%[yuvcons_ptr])             \n\t"
+      "punpcklbh  %[vr],           %[vr],             %[zero]       \n\t"
+      "pshufh     %[vr],           %[vr],             %[five]       \n\t"
+      "or         %[vr],           %[vr],             %[mask]       \n\t"
 
-    "1:                                                           \n\t"
-    "gslwlc1    %[y],            0x03(%[y_ptr])                   \n\t"
-    "gslwrc1    %[y],            0x00(%[y_ptr])                   \n\t"
-    "gslwlc1    %[u],            0x03(%[u_ptr])                   \n\t"
-    "gslwrc1    %[u],            0x00(%[u_ptr])                   \n\t"
-    "gslwlc1    %[v],            0x03(%[v_ptr])                   \n\t"
-    "gslwrc1    %[v],            0x00(%[v_ptr])                   \n\t"
+      "1:                                                           \n\t"
+      "gslwlc1    %[y],            0x03(%[y_ptr])                   \n\t"
+      "gslwrc1    %[y],            0x00(%[y_ptr])                   \n\t"
+      "gslwlc1    %[u],            0x03(%[u_ptr])                   \n\t"
+      "gslwrc1    %[u],            0x00(%[u_ptr])                   \n\t"
+      "gslwlc1    %[v],            0x03(%[v_ptr])                   \n\t"
+      "gslwrc1    %[v],            0x00(%[v_ptr])                   \n\t"
 
-    "punpcklbh  %[y],            %[y],              %[y]          \n\t"//y*0x0101
-    "pmulhuh    %[y],            %[y],              %[yg]         \n\t"//y1
+      "punpcklbh  %[y],            %[y],              %[y]          \n\t"  // y*0x0101
+      "pmulhuh    %[y],            %[y],              %[yg]         \n\t"  // y1
 
-    //u3|u2|u1|u0 --> u1|u1|u0|u0
-    "punpcklbh  %[u],            %[u],              %[u]          \n\t"//u
-    "punpcklbh  %[u],            %[u],              %[zero]       \n\t"
-    "paddsh     %[b_vec],        %[y],              %[bb]         \n\t"
-    "pmullh     %[temp],         %[u],              %[ub]         \n\t"
-    "psubsh     %[b_vec],        %[b_vec],          %[temp]       \n\t"
-    "psrah      %[b_vec],        %[b_vec],          %[six]        \n\t"
+      // u3|u2|u1|u0 --> u1|u1|u0|u0
+      "punpcklbh  %[u],            %[u],              %[u]          \n\t"  // u
+      "punpcklbh  %[u],            %[u],              %[zero]       \n\t"
+      "paddsh     %[b_vec],        %[y],              %[bb]         \n\t"
+      "pmullh     %[temp],         %[u],              %[ub]         \n\t"
+      "psubsh     %[b_vec],        %[b_vec],          %[temp]       \n\t"
+      "psrah      %[b_vec],        %[b_vec],          %[six]        \n\t"
 
-    //v3|v2|v1|v0 --> v1|v1|v0|v0
-    "punpcklbh  %[v],            %[v],              %[v]          \n\t"
-    "punpcklbh  %[v],            %[v],              %[zero]       \n\t"
-    "paddsh     %[g_vec],        %[y],              %[bg]         \n\t"
-    "pmullh     %[temp],         %[u],              %[ug]         \n\t"
-    "psubsh     %[g_vec],        %[g_vec],          %[temp]       \n\t"
-    "pmullh     %[temp],         %[v],              %[vg]         \n\t"
-    "psubsh     %[g_vec],        %[g_vec],          %[temp]       \n\t"
-    "psrah      %[g_vec],        %[g_vec],          %[six]        \n\t"
+      // v3|v2|v1|v0 --> v1|v1|v0|v0
+      "punpcklbh  %[v],            %[v],              %[v]          \n\t"
+      "punpcklbh  %[v],            %[v],              %[zero]       \n\t"
+      "paddsh     %[g_vec],        %[y],              %[bg]         \n\t"
+      "pmullh     %[temp],         %[u],              %[ug]         \n\t"
+      "psubsh     %[g_vec],        %[g_vec],          %[temp]       \n\t"
+      "pmullh     %[temp],         %[v],              %[vg]         \n\t"
+      "psubsh     %[g_vec],        %[g_vec],          %[temp]       \n\t"
+      "psrah      %[g_vec],        %[g_vec],          %[six]        \n\t"
 
-    "paddsh     %[r_vec],        %[y],              %[br]         \n\t"
-    "pmullh     %[temp],         %[v],              %[vr]         \n\t"
-    "psubsh     %[r_vec],        %[r_vec],          %[temp]       \n\t"
-    "psrah      %[r_vec],        %[r_vec],          %[six]        \n\t"
+      "paddsh     %[r_vec],        %[y],              %[br]         \n\t"
+      "pmullh     %[temp],         %[v],              %[vr]         \n\t"
+      "psubsh     %[r_vec],        %[r_vec],          %[temp]       \n\t"
+      "psrah      %[r_vec],        %[r_vec],          %[six]        \n\t"
 
-    "packushb   %[r_vec],        %[b_vec],          %[r_vec]      \n\t"
-    "packushb   %[g_vec],        %[g_vec],          %[zero]       \n\t"
-    "punpcklwd  %[g_vec],        %[g_vec],          %[alpha]      \n\t"
-    "punpcklbh  %[b_vec],        %[r_vec],          %[g_vec]      \n\t"
-    "punpckhbh  %[r_vec],        %[r_vec],          %[g_vec]      \n\t"
-    "punpcklhw  %[g_vec],        %[b_vec],          %[r_vec]      \n\t"
-    "punpckhhw  %[b_vec],        %[b_vec],          %[r_vec]      \n\t"
+      "packushb   %[r_vec],        %[b_vec],          %[r_vec]      \n\t"
+      "packushb   %[g_vec],        %[g_vec],          %[zero]       \n\t"
+      "punpcklwd  %[g_vec],        %[g_vec],          %[alpha]      \n\t"
+      "punpcklbh  %[b_vec],        %[r_vec],          %[g_vec]      \n\t"
+      "punpckhbh  %[r_vec],        %[r_vec],          %[g_vec]      \n\t"
+      "punpcklhw  %[g_vec],        %[b_vec],          %[r_vec]      \n\t"
+      "punpckhhw  %[b_vec],        %[b_vec],          %[r_vec]      \n\t"
 
-    "and        %[g_vec],        %[g_vec],          %[mask1]      \n\t"
-    "psrlw      %[g_vec],        %[g_vec],          %[four]       \n\t"
-    "psrlw      %[r_vec],        %[g_vec],          %[four]       \n\t"
-    "or         %[g_vec],        %[g_vec],          %[r_vec]      \n\t"
-    "punpcklbh  %[r_vec],        %[alpha],          %[zero]       \n\t"
-    "and        %[g_vec],        %[g_vec],          %[r_vec]      \n\t"
+      "and        %[g_vec],        %[g_vec],          %[mask1]      \n\t"
+      "psrlw      %[g_vec],        %[g_vec],          %[four]       \n\t"
+      "psrlw      %[r_vec],        %[g_vec],          %[four]       \n\t"
+      "or         %[g_vec],        %[g_vec],          %[r_vec]      \n\t"
+      "punpcklbh  %[r_vec],        %[alpha],          %[zero]       \n\t"
+      "and        %[g_vec],        %[g_vec],          %[r_vec]      \n\t"
 
-    "and        %[b_vec],        %[b_vec],          %[mask1]      \n\t"
-    "psrlw      %[b_vec],        %[b_vec],          %[four]       \n\t"
-    "psrlw      %[r_vec],        %[b_vec],          %[four]       \n\t"
-    "or         %[b_vec],        %[b_vec],          %[r_vec]      \n\t"
-    "punpcklbh  %[r_vec],        %[alpha],          %[zero]       \n\t"
-    "and        %[b_vec],        %[b_vec],          %[r_vec]      \n\t"
-    "packushb   %[g_vec],        %[g_vec],          %[b_vec]      \n\t"
+      "and        %[b_vec],        %[b_vec],          %[mask1]      \n\t"
+      "psrlw      %[b_vec],        %[b_vec],          %[four]       \n\t"
+      "psrlw      %[r_vec],        %[b_vec],          %[four]       \n\t"
+      "or         %[b_vec],        %[b_vec],          %[r_vec]      \n\t"
+      "punpcklbh  %[r_vec],        %[alpha],          %[zero]       \n\t"
+      "and        %[b_vec],        %[b_vec],          %[r_vec]      \n\t"
+      "packushb   %[g_vec],        %[g_vec],          %[b_vec]      \n\t"
 
-    "gssdlc1    %[g_vec],        0x07(%[dst_argb4444])            \n\t"
-    "gssdrc1    %[g_vec],        0x00(%[dst_argb4444])            \n\t"
+      "gssdlc1    %[g_vec],        0x07(%[dst_argb4444])            \n\t"
+      "gssdrc1    %[g_vec],        0x00(%[dst_argb4444])            \n\t"
 
-    "daddiu     %[y_ptr],        %[y_ptr],          0x04          \n\t"
-    "daddiu     %[u_ptr],        %[u_ptr],          0x02          \n\t"
-    "daddiu     %[v_ptr],        %[v_ptr],          0x02          \n\t"
-    "daddiu     %[dst_argb4444], %[dst_argb4444],   0x08          \n\t"
-    "daddi      %[width],        %[width],          -0x04         \n\t"
-    "bnez       %[width],        1b                               \n\t"
+      "daddiu     %[y_ptr],        %[y_ptr],          0x04          \n\t"
+      "daddiu     %[u_ptr],        %[u_ptr],          0x02          \n\t"
+      "daddiu     %[v_ptr],        %[v_ptr],          0x02          \n\t"
+      "daddiu     %[dst_argb4444], %[dst_argb4444],   0x08          \n\t"
+      "daddi      %[width],        %[width],          -0x04         \n\t"
+      "bnez       %[width],        1b                               \n\t"
 
-    : [y]"=&f"(y),                         [u]"=&f"(u),
-      [v]"=&f"(v),
-      [b_vec]"=&f"(b_vec),                 [g_vec]"=&f"(g_vec),
-      [r_vec]"=&f"(r_vec),                 [temp]"=&f"(temp),
-      [ub]"=&f"(ub),                       [ug]"=&f"(ug),
-      [vg]"=&f"(vg),                       [vr]"=&f"(vr),
-      [bb]"=&f"(bb),                       [bg]"=&f"(bg),
-      [br]"=&f"(br),                       [yg]"=&f"(yg)
-    : [y_ptr]"r"(src_y),                   [u_ptr]"r"(src_u),
-      [v_ptr]"r"(src_v),                   [dst_argb4444]"r"(dst_argb4444),
-      [yuvcons_ptr]"r"(yuvconstants),      [width]"r"(width),
-      [zero]"f"(0x00),                     [five]"f"(0x55),
-      [six]"f"(0x6),                       [mask]"f"(0xff00ff00ff00ff00),
-      [four]"f"(0x4),                      [mask1]"f"(0xf0f0f0f0f0f0f0f0),
-      [alpha]"f"(-1)
-    : "memory"
-  );
+      : [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec] "=&f"(b_vec),
+        [g_vec] "=&f"(g_vec), [r_vec] "=&f"(r_vec), [temp] "=&f"(temp),
+        [ub] "=&f"(ub), [ug] "=&f"(ug), [vg] "=&f"(vg), [vr] "=&f"(vr),
+        [bb] "=&f"(bb), [bg] "=&f"(bg), [br] "=&f"(br), [yg] "=&f"(yg)
+      : [y_ptr] "r"(src_y), [u_ptr] "r"(src_u), [v_ptr] "r"(src_v),
+        [dst_argb4444] "r"(dst_argb4444), [yuvcons_ptr] "r"(yuvconstants),
+        [width] "r"(width), [zero] "f"(0x00), [five] "f"(0x55), [six] "f"(0x6),
+        [mask] "f"(0xff00ff00ff00ff00), [four] "f"(0x4),
+        [mask1] "f"(0xf0f0f0f0f0f0f0f0), [alpha] "f"(-1)
+      : "memory");
 }
 
 void I422ToARGB1555Row_MMI(const uint8_t* src_y,
@@ -6678,125 +6645,118 @@ void I422ToARGB1555Row_MMI(const uint8_t* src_y,
                            int width) {
   uint64_t y, u, v;
   uint64_t b_vec, g_vec, r_vec, temp;
-  uint64_t ub,ug,vg,vr,bb,bg,br,yg;
+  uint64_t ub, ug, vg, vr, bb, bg, br, yg;
 
   __asm__ volatile(
-    "ldc1       %[yg],           0xc0(%[yuvcons_ptr])             \n\t"
-    "ldc1       %[bb],           0x60(%[yuvcons_ptr])             \n\t"
-    "ldc1       %[ub],           0x00(%[yuvcons_ptr])             \n\t"
-    "or         %[ub],           %[ub],             %[mask1]      \n\t"
-    "ldc1       %[bg],           0x80(%[yuvcons_ptr])             \n\t"
-    "ldc1       %[ug],           0x20(%[yuvcons_ptr])             \n\t"
-    "punpcklbh  %[ug],           %[ug],             %[zero]       \n\t"
-    "pshufh     %[ug],           %[ug],             %[zero]       \n\t"
-    "ldc1       %[vg],           0x20(%[yuvcons_ptr])             \n\t"
-    "punpcklbh  %[vg],           %[vg],             %[zero]       \n\t"
-    "pshufh     %[vg],           %[vg],             %[five]       \n\t"
-    "ldc1       %[br],           0xa0(%[yuvcons_ptr])             \n\t"
-    "ldc1       %[vr],           0x40(%[yuvcons_ptr])             \n\t"
-    "punpcklbh  %[vr],           %[vr],             %[zero]       \n\t"
-    "pshufh     %[vr],           %[vr],             %[five]       \n\t"
-    "or         %[vr],           %[vr],             %[mask1]      \n\t"
+      "ldc1       %[yg],           0xc0(%[yuvcons_ptr])             \n\t"
+      "ldc1       %[bb],           0x60(%[yuvcons_ptr])             \n\t"
+      "ldc1       %[ub],           0x00(%[yuvcons_ptr])             \n\t"
+      "or         %[ub],           %[ub],             %[mask1]      \n\t"
+      "ldc1       %[bg],           0x80(%[yuvcons_ptr])             \n\t"
+      "ldc1       %[ug],           0x20(%[yuvcons_ptr])             \n\t"
+      "punpcklbh  %[ug],           %[ug],             %[zero]       \n\t"
+      "pshufh     %[ug],           %[ug],             %[zero]       \n\t"
+      "ldc1       %[vg],           0x20(%[yuvcons_ptr])             \n\t"
+      "punpcklbh  %[vg],           %[vg],             %[zero]       \n\t"
+      "pshufh     %[vg],           %[vg],             %[five]       \n\t"
+      "ldc1       %[br],           0xa0(%[yuvcons_ptr])             \n\t"
+      "ldc1       %[vr],           0x40(%[yuvcons_ptr])             \n\t"
+      "punpcklbh  %[vr],           %[vr],             %[zero]       \n\t"
+      "pshufh     %[vr],           %[vr],             %[five]       \n\t"
+      "or         %[vr],           %[vr],             %[mask1]      \n\t"
 
-    "1:                                                           \n\t"
-    "gslwlc1    %[y],            0x03(%[y_ptr])                   \n\t"
-    "gslwrc1    %[y],            0x00(%[y_ptr])                   \n\t"
-    "gslwlc1    %[u],            0x03(%[u_ptr])                   \n\t"
-    "gslwrc1    %[u],            0x00(%[u_ptr])                   \n\t"
-    "gslwlc1    %[v],            0x03(%[v_ptr])                   \n\t"
-    "gslwrc1    %[v],            0x00(%[v_ptr])                   \n\t"
+      "1:                                                           \n\t"
+      "gslwlc1    %[y],            0x03(%[y_ptr])                   \n\t"
+      "gslwrc1    %[y],            0x00(%[y_ptr])                   \n\t"
+      "gslwlc1    %[u],            0x03(%[u_ptr])                   \n\t"
+      "gslwrc1    %[u],            0x00(%[u_ptr])                   \n\t"
+      "gslwlc1    %[v],            0x03(%[v_ptr])                   \n\t"
+      "gslwrc1    %[v],            0x00(%[v_ptr])                   \n\t"
 
-    "punpcklbh  %[y],            %[y],              %[y]          \n\t"
-    "pmulhuh    %[y],            %[y],              %[yg]         \n\t"
+      "punpcklbh  %[y],            %[y],              %[y]          \n\t"
+      "pmulhuh    %[y],            %[y],              %[yg]         \n\t"
 
-    //u3|u2|u1|u0 --> u1|u1|u0|u0
-    "punpcklbh  %[u],            %[u],              %[u]          \n\t"
-    "punpcklbh  %[u],            %[u],              %[zero]       \n\t"
-    "paddsh     %[b_vec],        %[y],              %[bb]         \n\t"
-    "pmullh     %[temp],         %[u],              %[ub]         \n\t"
-    "psubsh     %[b_vec],        %[b_vec],          %[temp]       \n\t"
-    "psrah      %[b_vec],        %[b_vec],          %[six]        \n\t"
+      // u3|u2|u1|u0 --> u1|u1|u0|u0
+      "punpcklbh  %[u],            %[u],              %[u]          \n\t"
+      "punpcklbh  %[u],            %[u],              %[zero]       \n\t"
+      "paddsh     %[b_vec],        %[y],              %[bb]         \n\t"
+      "pmullh     %[temp],         %[u],              %[ub]         \n\t"
+      "psubsh     %[b_vec],        %[b_vec],          %[temp]       \n\t"
+      "psrah      %[b_vec],        %[b_vec],          %[six]        \n\t"
 
-    //v3|v2|v1|v0 --> v1|v1|v0|v0
-    "punpcklbh  %[v],            %[v],              %[v]          \n\t"
-    "punpcklbh  %[v],            %[v],              %[zero]       \n\t"
-    "paddsh     %[g_vec],        %[y],              %[bg]         \n\t"
-    "pmullh     %[temp],         %[u],              %[ug]         \n\t"
-    "psubsh     %[g_vec],        %[g_vec],          %[temp]       \n\t"
-    "pmullh     %[temp],         %[v],              %[vg]         \n\t"
-    "psubsh     %[g_vec],        %[g_vec],          %[temp]       \n\t"
-    "psrah      %[g_vec],        %[g_vec],          %[six]        \n\t"
+      // v3|v2|v1|v0 --> v1|v1|v0|v0
+      "punpcklbh  %[v],            %[v],              %[v]          \n\t"
+      "punpcklbh  %[v],            %[v],              %[zero]       \n\t"
+      "paddsh     %[g_vec],        %[y],              %[bg]         \n\t"
+      "pmullh     %[temp],         %[u],              %[ug]         \n\t"
+      "psubsh     %[g_vec],        %[g_vec],          %[temp]       \n\t"
+      "pmullh     %[temp],         %[v],              %[vg]         \n\t"
+      "psubsh     %[g_vec],        %[g_vec],          %[temp]       \n\t"
+      "psrah      %[g_vec],        %[g_vec],          %[six]        \n\t"
 
-    "paddsh     %[r_vec],        %[y],              %[br]         \n\t"
-    "pmullh     %[temp],         %[v],              %[vr]         \n\t"
-    "psubsh     %[r_vec],        %[r_vec],          %[temp]       \n\t"
-    "psrah      %[r_vec],        %[r_vec],          %[six]        \n\t"
+      "paddsh     %[r_vec],        %[y],              %[br]         \n\t"
+      "pmullh     %[temp],         %[v],              %[vr]         \n\t"
+      "psubsh     %[r_vec],        %[r_vec],          %[temp]       \n\t"
+      "psrah      %[r_vec],        %[r_vec],          %[six]        \n\t"
 
-    "packushb   %[r_vec],        %[b_vec],          %[r_vec]      \n\t"
-    "packushb   %[g_vec],        %[g_vec],          %[zero]       \n\t"
-    "punpcklbh  %[b_vec],        %[r_vec],          %[g_vec]      \n\t"
-    "punpckhbh  %[r_vec],        %[r_vec],          %[g_vec]      \n\t"
-    "punpcklhw  %[g_vec],        %[b_vec],          %[r_vec]      \n\t"
-    "punpckhhw  %[b_vec],        %[b_vec],          %[r_vec]      \n\t"
+      "packushb   %[r_vec],        %[b_vec],          %[r_vec]      \n\t"
+      "packushb   %[g_vec],        %[g_vec],          %[zero]       \n\t"
+      "punpcklbh  %[b_vec],        %[r_vec],          %[g_vec]      \n\t"
+      "punpckhbh  %[r_vec],        %[r_vec],          %[g_vec]      \n\t"
+      "punpcklhw  %[g_vec],        %[b_vec],          %[r_vec]      \n\t"
+      "punpckhhw  %[b_vec],        %[b_vec],          %[r_vec]      \n\t"
 
-    "psrlw      %[temp],         %[g_vec],          %[three]      \n\t"
-    "and        %[g_vec],        %[temp],           %[mask2]      \n\t"
-    "psrlw      %[temp],         %[temp],           %[eight]      \n\t"
-    "and        %[r_vec],        %[temp],           %[mask2]      \n\t"
-    "psllw      %[r_vec],        %[r_vec],          %[lmove5]     \n\t"
-    "or         %[g_vec],        %[g_vec],          %[r_vec]      \n\t"
-    "psrlw      %[temp],         %[temp],           %[eight]      \n\t"
-    "and        %[r_vec],        %[temp],           %[mask2]      \n\t"
-    "psllw      %[r_vec],        %[r_vec],          %[lmove5]     \n\t"
-    "psllw      %[r_vec],        %[r_vec],          %[lmove5]     \n\t"
-    "or         %[g_vec],        %[g_vec],          %[r_vec]      \n\t"
-    "or         %[g_vec],        %[g_vec],          %[mask3]      \n\t"
+      "psrlw      %[temp],         %[g_vec],          %[three]      \n\t"
+      "and        %[g_vec],        %[temp],           %[mask2]      \n\t"
+      "psrlw      %[temp],         %[temp],           %[eight]      \n\t"
+      "and        %[r_vec],        %[temp],           %[mask2]      \n\t"
+      "psllw      %[r_vec],        %[r_vec],          %[lmove5]     \n\t"
+      "or         %[g_vec],        %[g_vec],          %[r_vec]      \n\t"
+      "psrlw      %[temp],         %[temp],           %[eight]      \n\t"
+      "and        %[r_vec],        %[temp],           %[mask2]      \n\t"
+      "psllw      %[r_vec],        %[r_vec],          %[lmove5]     \n\t"
+      "psllw      %[r_vec],        %[r_vec],          %[lmove5]     \n\t"
+      "or         %[g_vec],        %[g_vec],          %[r_vec]      \n\t"
+      "or         %[g_vec],        %[g_vec],          %[mask3]      \n\t"
 
-    "psrlw      %[temp],         %[b_vec],          %[three]      \n\t"
-    "and        %[b_vec],        %[temp],           %[mask2]      \n\t"
-    "psrlw      %[temp],         %[temp],           %[eight]      \n\t"
-    "and        %[r_vec],        %[temp],           %[mask2]      \n\t"
-    "psllw      %[r_vec],        %[r_vec],          %[lmove5]     \n\t"
-    "or         %[b_vec],        %[b_vec],          %[r_vec]      \n\t"
-    "psrlw      %[temp],         %[temp],           %[eight]      \n\t"
-    "and        %[r_vec],        %[temp],           %[mask2]      \n\t"
-    "psllw      %[r_vec],        %[r_vec],          %[lmove5]     \n\t"
-    "psllw      %[r_vec],        %[r_vec],          %[lmove5]     \n\t"
-    "or         %[b_vec],        %[b_vec],          %[r_vec]      \n\t"
-    "or         %[b_vec],        %[b_vec],          %[mask3]      \n\t"
+      "psrlw      %[temp],         %[b_vec],          %[three]      \n\t"
+      "and        %[b_vec],        %[temp],           %[mask2]      \n\t"
+      "psrlw      %[temp],         %[temp],           %[eight]      \n\t"
+      "and        %[r_vec],        %[temp],           %[mask2]      \n\t"
+      "psllw      %[r_vec],        %[r_vec],          %[lmove5]     \n\t"
+      "or         %[b_vec],        %[b_vec],          %[r_vec]      \n\t"
+      "psrlw      %[temp],         %[temp],           %[eight]      \n\t"
+      "and        %[r_vec],        %[temp],           %[mask2]      \n\t"
+      "psllw      %[r_vec],        %[r_vec],          %[lmove5]     \n\t"
+      "psllw      %[r_vec],        %[r_vec],          %[lmove5]     \n\t"
+      "or         %[b_vec],        %[b_vec],          %[r_vec]      \n\t"
+      "or         %[b_vec],        %[b_vec],          %[mask3]      \n\t"
 
-    "punpcklhw  %[r_vec],        %[g_vec],          %[b_vec]      \n\t"
-    "punpckhhw  %[b_vec],        %[g_vec],          %[b_vec]      \n\t"
-    "punpcklhw  %[g_vec],        %[r_vec],          %[b_vec]      \n\t"
+      "punpcklhw  %[r_vec],        %[g_vec],          %[b_vec]      \n\t"
+      "punpckhhw  %[b_vec],        %[g_vec],          %[b_vec]      \n\t"
+      "punpcklhw  %[g_vec],        %[r_vec],          %[b_vec]      \n\t"
 
-    "gssdlc1    %[g_vec],        0x07(%[dst_argb1555])            \n\t"
-    "gssdrc1    %[g_vec],        0x00(%[dst_argb1555])            \n\t"
+      "gssdlc1    %[g_vec],        0x07(%[dst_argb1555])            \n\t"
+      "gssdrc1    %[g_vec],        0x00(%[dst_argb1555])            \n\t"
 
-    "daddiu     %[y_ptr],        %[y_ptr],          0x04          \n\t"
-    "daddiu     %[u_ptr],        %[u_ptr],          0x02          \n\t"
-    "daddiu     %[v_ptr],        %[v_ptr],          0x02          \n\t"
-    "daddiu     %[dst_argb1555], %[dst_argb1555],   0x08          \n\t"
-    "daddi      %[width],        %[width],          -0x04         \n\t"
-    "bnez       %[width],        1b                               \n\t"
+      "daddiu     %[y_ptr],        %[y_ptr],          0x04          \n\t"
+      "daddiu     %[u_ptr],        %[u_ptr],          0x02          \n\t"
+      "daddiu     %[v_ptr],        %[v_ptr],          0x02          \n\t"
+      "daddiu     %[dst_argb1555], %[dst_argb1555],   0x08          \n\t"
+      "daddi      %[width],        %[width],          -0x04         \n\t"
+      "bnez       %[width],        1b                               \n\t"
 
-    : [y]"=&f"(y),                         [u]"=&f"(u),
-      [v]"=&f"(v),
-      [b_vec]"=&f"(b_vec),                 [g_vec]"=&f"(g_vec),
-      [r_vec]"=&f"(r_vec),                 [temp]"=&f"(temp),
-      [ub]"=&f"(ub),                       [ug]"=&f"(ug),
-      [vg]"=&f"(vg),                       [vr]"=&f"(vr),
-      [bb]"=&f"(bb),                       [bg]"=&f"(bg),
-      [br]"=&f"(br),                       [yg]"=&f"(yg)
-    : [y_ptr]"r"(src_y),                   [u_ptr]"r"(src_u),
-      [v_ptr]"r"(src_v),                   [dst_argb1555]"r"(dst_argb1555),
-      [yuvcons_ptr]"r"(yuvconstants),      [width]"r"(width),
-      [zero]"f"(0x00),                     [five]"f"(0x55),
-      [six]"f"(0x6),                       [mask1]"f"(0xff00ff00ff00ff00),
-      [three]"f"(0x3),                     [mask2]"f"(0x1f0000001f),
-      [eight]"f"(0x8),                     [mask3]"f"(0x800000008000),
-      [lmove5]"f"(0x5)
-    : "memory"
-  );
+      : [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec] "=&f"(b_vec),
+        [g_vec] "=&f"(g_vec), [r_vec] "=&f"(r_vec), [temp] "=&f"(temp),
+        [ub] "=&f"(ub), [ug] "=&f"(ug), [vg] "=&f"(vg), [vr] "=&f"(vr),
+        [bb] "=&f"(bb), [bg] "=&f"(bg), [br] "=&f"(br), [yg] "=&f"(yg)
+      : [y_ptr] "r"(src_y), [u_ptr] "r"(src_u), [v_ptr] "r"(src_v),
+        [dst_argb1555] "r"(dst_argb1555), [yuvcons_ptr] "r"(yuvconstants),
+        [width] "r"(width), [zero] "f"(0x00), [five] "f"(0x55), [six] "f"(0x6),
+        [mask1] "f"(0xff00ff00ff00ff00), [three] "f"(0x3),
+        [mask2] "f"(0x1f0000001f), [eight] "f"(0x8),
+        [mask3] "f"(0x800000008000), [lmove5] "f"(0x5)
+      : "memory");
 }
 
 void I422ToRGB565Row_MMI(const uint8_t* src_y,
@@ -6807,127 +6767,120 @@ void I422ToRGB565Row_MMI(const uint8_t* src_y,
                          int width) {
   uint64_t y, u, v;
   uint64_t b_vec, g_vec, r_vec, temp;
-  uint64_t ub,ug,vg,vr,bb,bg,br,yg;
+  uint64_t ub, ug, vg, vr, bb, bg, br, yg;
 
   __asm__ volatile(
-    "ldc1       %[yg],           0xc0(%[yuvcons_ptr])             \n\t"
-    "ldc1       %[bb],           0x60(%[yuvcons_ptr])             \n\t"
-    "ldc1       %[ub],           0x00(%[yuvcons_ptr])             \n\t"
-    "or         %[ub],           %[ub],             %[mask1]      \n\t"
-    "ldc1       %[bg],           0x80(%[yuvcons_ptr])             \n\t"
-    "ldc1       %[ug],           0x20(%[yuvcons_ptr])             \n\t"
-    "punpcklbh  %[ug],           %[ug],             %[zero]       \n\t"
-    "pshufh     %[ug],           %[ug],             %[zero]       \n\t"
-    "ldc1       %[vg],           0x20(%[yuvcons_ptr])             \n\t"
-    "punpcklbh  %[vg],           %[vg],             %[zero]       \n\t"
-    "pshufh     %[vg],           %[vg],             %[five]       \n\t"
-    "ldc1       %[br],           0xa0(%[yuvcons_ptr])             \n\t"
-    "ldc1       %[vr],           0x40(%[yuvcons_ptr])             \n\t"
-    "punpcklbh  %[vr],           %[vr],             %[zero]       \n\t"
-    "pshufh     %[vr],           %[vr],             %[five]       \n\t"
-    "or         %[vr],           %[vr],             %[mask1]      \n\t"
+      "ldc1       %[yg],           0xc0(%[yuvcons_ptr])             \n\t"
+      "ldc1       %[bb],           0x60(%[yuvcons_ptr])             \n\t"
+      "ldc1       %[ub],           0x00(%[yuvcons_ptr])             \n\t"
+      "or         %[ub],           %[ub],             %[mask1]      \n\t"
+      "ldc1       %[bg],           0x80(%[yuvcons_ptr])             \n\t"
+      "ldc1       %[ug],           0x20(%[yuvcons_ptr])             \n\t"
+      "punpcklbh  %[ug],           %[ug],             %[zero]       \n\t"
+      "pshufh     %[ug],           %[ug],             %[zero]       \n\t"
+      "ldc1       %[vg],           0x20(%[yuvcons_ptr])             \n\t"
+      "punpcklbh  %[vg],           %[vg],             %[zero]       \n\t"
+      "pshufh     %[vg],           %[vg],             %[five]       \n\t"
+      "ldc1       %[br],           0xa0(%[yuvcons_ptr])             \n\t"
+      "ldc1       %[vr],           0x40(%[yuvcons_ptr])             \n\t"
+      "punpcklbh  %[vr],           %[vr],             %[zero]       \n\t"
+      "pshufh     %[vr],           %[vr],             %[five]       \n\t"
+      "or         %[vr],           %[vr],             %[mask1]      \n\t"
 
-    "1:                                                           \n\t"
-    "gslwlc1    %[y],            0x03(%[y_ptr])                   \n\t"
-    "gslwrc1    %[y],            0x00(%[y_ptr])                   \n\t"
-    "gslwlc1    %[u],            0x03(%[u_ptr])                   \n\t"
-    "gslwrc1    %[u],            0x00(%[u_ptr])                   \n\t"
-    "gslwlc1    %[v],            0x03(%[v_ptr])                   \n\t"
-    "gslwrc1    %[v],            0x00(%[v_ptr])                   \n\t"
+      "1:                                                           \n\t"
+      "gslwlc1    %[y],            0x03(%[y_ptr])                   \n\t"
+      "gslwrc1    %[y],            0x00(%[y_ptr])                   \n\t"
+      "gslwlc1    %[u],            0x03(%[u_ptr])                   \n\t"
+      "gslwrc1    %[u],            0x00(%[u_ptr])                   \n\t"
+      "gslwlc1    %[v],            0x03(%[v_ptr])                   \n\t"
+      "gslwrc1    %[v],            0x00(%[v_ptr])                   \n\t"
 
-    "punpcklbh  %[y],            %[y],              %[y]          \n\t"
-    "pmulhuh    %[y],            %[y],              %[yg]         \n\t"
+      "punpcklbh  %[y],            %[y],              %[y]          \n\t"
+      "pmulhuh    %[y],            %[y],              %[yg]         \n\t"
 
-    //u3|u2|u1|u0 --> u1|u1|u0|u0
-    "punpcklbh  %[u],            %[u],              %[u]          \n\t"
-    "punpcklbh  %[u],            %[u],              %[zero]       \n\t"
-    "paddsh     %[b_vec],        %[y],              %[bb]         \n\t"
-    "pmullh     %[temp],         %[u],              %[ub]         \n\t"
-    "psubsh     %[b_vec],        %[b_vec],          %[temp]       \n\t"
-    "psrah      %[b_vec],        %[b_vec],          %[six]        \n\t"
+      // u3|u2|u1|u0 --> u1|u1|u0|u0
+      "punpcklbh  %[u],            %[u],              %[u]          \n\t"
+      "punpcklbh  %[u],            %[u],              %[zero]       \n\t"
+      "paddsh     %[b_vec],        %[y],              %[bb]         \n\t"
+      "pmullh     %[temp],         %[u],              %[ub]         \n\t"
+      "psubsh     %[b_vec],        %[b_vec],          %[temp]       \n\t"
+      "psrah      %[b_vec],        %[b_vec],          %[six]        \n\t"
 
-    //v3|v2|v1|v0 --> v1|v1|v0|v0
-    "punpcklbh  %[v],            %[v],              %[v]          \n\t"
-    "punpcklbh  %[v],            %[v],              %[zero]       \n\t"
-    "paddsh     %[g_vec],        %[y],              %[bg]         \n\t"
-    "pmullh     %[temp],         %[u],              %[ug]         \n\t"
-    "psubsh     %[g_vec],        %[g_vec],          %[temp]       \n\t"
-    "pmullh     %[temp],         %[v],              %[vg]         \n\t"
-    "psubsh     %[g_vec],        %[g_vec],          %[temp]       \n\t"
-    "psrah      %[g_vec],        %[g_vec],          %[six]        \n\t"
+      // v3|v2|v1|v0 --> v1|v1|v0|v0
+      "punpcklbh  %[v],            %[v],              %[v]          \n\t"
+      "punpcklbh  %[v],            %[v],              %[zero]       \n\t"
+      "paddsh     %[g_vec],        %[y],              %[bg]         \n\t"
+      "pmullh     %[temp],         %[u],              %[ug]         \n\t"
+      "psubsh     %[g_vec],        %[g_vec],          %[temp]       \n\t"
+      "pmullh     %[temp],         %[v],              %[vg]         \n\t"
+      "psubsh     %[g_vec],        %[g_vec],          %[temp]       \n\t"
+      "psrah      %[g_vec],        %[g_vec],          %[six]        \n\t"
 
-    "paddsh     %[r_vec],        %[y],              %[br]         \n\t"
-    "pmullh     %[temp],         %[v],              %[vr]         \n\t"
-    "psubsh     %[r_vec],        %[r_vec],          %[temp]       \n\t"
-    "psrah      %[r_vec],        %[r_vec],          %[six]        \n\t"
+      "paddsh     %[r_vec],        %[y],              %[br]         \n\t"
+      "pmullh     %[temp],         %[v],              %[vr]         \n\t"
+      "psubsh     %[r_vec],        %[r_vec],          %[temp]       \n\t"
+      "psrah      %[r_vec],        %[r_vec],          %[six]        \n\t"
 
-    "packushb   %[r_vec],        %[b_vec],          %[r_vec]      \n\t"
-    "packushb   %[g_vec],        %[g_vec],          %[zero]       \n\t"
-    "punpcklbh  %[b_vec],        %[r_vec],          %[g_vec]      \n\t"
-    "punpckhbh  %[r_vec],        %[r_vec],          %[g_vec]      \n\t"
-    "punpcklhw  %[g_vec],        %[b_vec],          %[r_vec]      \n\t"
-    "punpckhhw  %[b_vec],        %[b_vec],          %[r_vec]      \n\t"
+      "packushb   %[r_vec],        %[b_vec],          %[r_vec]      \n\t"
+      "packushb   %[g_vec],        %[g_vec],          %[zero]       \n\t"
+      "punpcklbh  %[b_vec],        %[r_vec],          %[g_vec]      \n\t"
+      "punpckhbh  %[r_vec],        %[r_vec],          %[g_vec]      \n\t"
+      "punpcklhw  %[g_vec],        %[b_vec],          %[r_vec]      \n\t"
+      "punpckhhw  %[b_vec],        %[b_vec],          %[r_vec]      \n\t"
 
-    "psrlh      %[temp],         %[g_vec],          %[three]      \n\t"
-    "and        %[g_vec],        %[temp],           %[mask2]      \n\t"
-    "psrlw      %[temp],         %[temp],           %[seven]      \n\t"
-    "psrlw      %[r_vec],        %[mask1],          %[eight]      \n\t"
-    "and        %[r_vec],        %[temp],           %[r_vec]      \n\t"
-    "psllw      %[r_vec],        %[r_vec],          %[lmove5]     \n\t"
-    "or         %[g_vec],        %[g_vec],          %[r_vec]      \n\t"
-    "paddb      %[r_vec],        %[three],          %[six]        \n\t"
-    "psrlw      %[temp],         %[temp],           %[r_vec]      \n\t"
-    "and        %[r_vec],        %[temp],           %[mask2]      \n\t"
-    "paddb      %[temp],         %[three],          %[eight]      \n\t"
-    "psllw      %[r_vec],        %[r_vec],          %[temp]       \n\t"
-    "or         %[g_vec],        %[g_vec],          %[r_vec]      \n\t"
+      "psrlh      %[temp],         %[g_vec],          %[three]      \n\t"
+      "and        %[g_vec],        %[temp],           %[mask2]      \n\t"
+      "psrlw      %[temp],         %[temp],           %[seven]      \n\t"
+      "psrlw      %[r_vec],        %[mask1],          %[eight]      \n\t"
+      "and        %[r_vec],        %[temp],           %[r_vec]      \n\t"
+      "psllw      %[r_vec],        %[r_vec],          %[lmove5]     \n\t"
+      "or         %[g_vec],        %[g_vec],          %[r_vec]      \n\t"
+      "paddb      %[r_vec],        %[three],          %[six]        \n\t"
+      "psrlw      %[temp],         %[temp],           %[r_vec]      \n\t"
+      "and        %[r_vec],        %[temp],           %[mask2]      \n\t"
+      "paddb      %[temp],         %[three],          %[eight]      \n\t"
+      "psllw      %[r_vec],        %[r_vec],          %[temp]       \n\t"
+      "or         %[g_vec],        %[g_vec],          %[r_vec]      \n\t"
 
-    "psrlh      %[temp],         %[b_vec],          %[three]      \n\t"
-    "and        %[b_vec],        %[temp],           %[mask2]      \n\t"
-    "psrlw      %[temp],         %[temp],           %[seven]      \n\t"
-    "psrlw      %[r_vec],        %[mask1],          %[eight]      \n\t"
-    "and        %[r_vec],        %[temp],           %[r_vec]      \n\t"
-    "psllw      %[r_vec],        %[r_vec],          %[lmove5]     \n\t"
-    "or         %[b_vec],        %[b_vec],          %[r_vec]      \n\t"
-    "paddb      %[r_vec],        %[three],          %[six]        \n\t"
-    "psrlw      %[temp],         %[temp],           %[r_vec]      \n\t"
-    "and        %[r_vec],        %[temp],           %[mask2]      \n\t"
-    "paddb      %[temp],         %[three],          %[eight]      \n\t"
-    "psllw      %[r_vec],        %[r_vec],          %[temp]       \n\t"
-    "or         %[b_vec],        %[b_vec],          %[r_vec]      \n\t"
+      "psrlh      %[temp],         %[b_vec],          %[three]      \n\t"
+      "and        %[b_vec],        %[temp],           %[mask2]      \n\t"
+      "psrlw      %[temp],         %[temp],           %[seven]      \n\t"
+      "psrlw      %[r_vec],        %[mask1],          %[eight]      \n\t"
+      "and        %[r_vec],        %[temp],           %[r_vec]      \n\t"
+      "psllw      %[r_vec],        %[r_vec],          %[lmove5]     \n\t"
+      "or         %[b_vec],        %[b_vec],          %[r_vec]      \n\t"
+      "paddb      %[r_vec],        %[three],          %[six]        \n\t"
+      "psrlw      %[temp],         %[temp],           %[r_vec]      \n\t"
+      "and        %[r_vec],        %[temp],           %[mask2]      \n\t"
+      "paddb      %[temp],         %[three],          %[eight]      \n\t"
+      "psllw      %[r_vec],        %[r_vec],          %[temp]       \n\t"
+      "or         %[b_vec],        %[b_vec],          %[r_vec]      \n\t"
 
-    "punpcklhw  %[r_vec],        %[g_vec],          %[b_vec]      \n\t"
-    "punpckhhw  %[b_vec],        %[g_vec],          %[b_vec]      \n\t"
-    "punpcklhw  %[g_vec],        %[r_vec],          %[b_vec]      \n\t"
+      "punpcklhw  %[r_vec],        %[g_vec],          %[b_vec]      \n\t"
+      "punpckhhw  %[b_vec],        %[g_vec],          %[b_vec]      \n\t"
+      "punpcklhw  %[g_vec],        %[r_vec],          %[b_vec]      \n\t"
 
-    "gssdlc1    %[g_vec],        0x07(%[dst_rgb565])             \n\t"
-    "gssdrc1    %[g_vec],        0x00(%[dst_rgb565])             \n\t"
+      "gssdlc1    %[g_vec],        0x07(%[dst_rgb565])             \n\t"
+      "gssdrc1    %[g_vec],        0x00(%[dst_rgb565])             \n\t"
 
-    "daddiu     %[y_ptr],        %[y_ptr],          0x04          \n\t"
-    "daddiu     %[u_ptr],        %[u_ptr],          0x02          \n\t"
-    "daddiu     %[v_ptr],        %[v_ptr],          0x02          \n\t"
-    "daddiu     %[dst_rgb565],   %[dst_rgb565],     0x08          \n\t"
-    "daddi      %[width],        %[width],          -0x04         \n\t"
-    "bnez       %[width],        1b                               \n\t"
+      "daddiu     %[y_ptr],        %[y_ptr],          0x04          \n\t"
+      "daddiu     %[u_ptr],        %[u_ptr],          0x02          \n\t"
+      "daddiu     %[v_ptr],        %[v_ptr],          0x02          \n\t"
+      "daddiu     %[dst_rgb565],   %[dst_rgb565],     0x08          \n\t"
+      "daddi      %[width],        %[width],          -0x04         \n\t"
+      "bnez       %[width],        1b                               \n\t"
 
-    : [y]"=&f"(y),                         [u]"=&f"(u),
-      [v]"=&f"(v),
-      [b_vec]"=&f"(b_vec),                 [g_vec]"=&f"(g_vec),
-      [r_vec]"=&f"(r_vec),                 [temp]"=&f"(temp),
-      [ub]"=&f"(ub),                       [ug]"=&f"(ug),
-      [vg]"=&f"(vg),                       [vr]"=&f"(vr),
-      [bb]"=&f"(bb),                       [bg]"=&f"(bg),
-      [br]"=&f"(br),                       [yg]"=&f"(yg)
-    : [y_ptr]"r"(src_y),                   [u_ptr]"r"(src_u),
-      [v_ptr]"r"(src_v),                   [dst_rgb565]"r"(dst_rgb565),
-      [yuvcons_ptr]"r"(yuvconstants),      [width]"r"(width),
-      [zero]"f"(0x00),                     [five]"f"(0x55),
-      [six]"f"(0x6),                       [mask1]"f"(0xff00ff00ff00ff00),
-      [three]"f"(0x3),                     [mask2]"f"(0x1f0000001f),
-      [eight]"f"(0x8),                     [seven]"f"(0x7),
-      [lmove5]"f"(0x5)
-    : "memory"
-  );
+      : [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec] "=&f"(b_vec),
+        [g_vec] "=&f"(g_vec), [r_vec] "=&f"(r_vec), [temp] "=&f"(temp),
+        [ub] "=&f"(ub), [ug] "=&f"(ug), [vg] "=&f"(vg), [vr] "=&f"(vr),
+        [bb] "=&f"(bb), [bg] "=&f"(bg), [br] "=&f"(br), [yg] "=&f"(yg)
+      : [y_ptr] "r"(src_y), [u_ptr] "r"(src_u), [v_ptr] "r"(src_v),
+        [dst_rgb565] "r"(dst_rgb565), [yuvcons_ptr] "r"(yuvconstants),
+        [width] "r"(width), [zero] "f"(0x00), [five] "f"(0x55), [six] "f"(0x6),
+        [mask1] "f"(0xff00ff00ff00ff00), [three] "f"(0x3),
+        [mask2] "f"(0x1f0000001f), [eight] "f"(0x8), [seven] "f"(0x7),
+        [lmove5] "f"(0x5)
+      : "memory");
 }
 
 void NV12ToARGBRow_MMI(const uint8_t* src_y,
@@ -6937,91 +6890,83 @@ void NV12ToARGBRow_MMI(const uint8_t* src_y,
                        int width) {
   uint64_t y, u, v;
   uint64_t b_vec, g_vec, r_vec, temp;
-  uint64_t ub,ug,vg,vr,bb,bg,br,yg;
+  uint64_t ub, ug, vg, vr, bb, bg, br, yg;
 
   __asm__ volatile(
-    "ldc1       %[yg],           0xc0(%[yuvcons_ptr])             \n\t"
-    "ldc1       %[bb],           0x60(%[yuvcons_ptr])             \n\t"
-    "ldc1       %[ub],           0x00(%[yuvcons_ptr])             \n\t"
-    "or         %[ub],           %[ub],             %[mask1]      \n\t"
-    "ldc1       %[bg],           0x80(%[yuvcons_ptr])             \n\t"
-    "ldc1       %[ug],           0x20(%[yuvcons_ptr])             \n\t"
-    "punpcklbh  %[ug],           %[ug],             %[zero]       \n\t"
-    "pshufh     %[ug],           %[ug],             %[zero]       \n\t"
-    "ldc1       %[vg],           0x20(%[yuvcons_ptr])             \n\t"
-    "punpcklbh  %[vg],           %[vg],             %[zero]       \n\t"
-    "pshufh     %[vg],           %[vg],             %[five]       \n\t"
-    "ldc1       %[br],           0xa0(%[yuvcons_ptr])             \n\t"
-    "ldc1       %[vr],           0x40(%[yuvcons_ptr])             \n\t"
-    "punpcklbh  %[vr],           %[vr],             %[zero]       \n\t"
-    "pshufh     %[vr],           %[vr],             %[five]       \n\t"
-    "or         %[vr],           %[vr],             %[mask1]      \n\t"
+      "ldc1       %[yg],           0xc0(%[yuvcons_ptr])             \n\t"
+      "ldc1       %[bb],           0x60(%[yuvcons_ptr])             \n\t"
+      "ldc1       %[ub],           0x00(%[yuvcons_ptr])             \n\t"
+      "or         %[ub],           %[ub],             %[mask1]      \n\t"
+      "ldc1       %[bg],           0x80(%[yuvcons_ptr])             \n\t"
+      "ldc1       %[ug],           0x20(%[yuvcons_ptr])             \n\t"
+      "punpcklbh  %[ug],           %[ug],             %[zero]       \n\t"
+      "pshufh     %[ug],           %[ug],             %[zero]       \n\t"
+      "ldc1       %[vg],           0x20(%[yuvcons_ptr])             \n\t"
+      "punpcklbh  %[vg],           %[vg],             %[zero]       \n\t"
+      "pshufh     %[vg],           %[vg],             %[five]       \n\t"
+      "ldc1       %[br],           0xa0(%[yuvcons_ptr])             \n\t"
+      "ldc1       %[vr],           0x40(%[yuvcons_ptr])             \n\t"
+      "punpcklbh  %[vr],           %[vr],             %[zero]       \n\t"
+      "pshufh     %[vr],           %[vr],             %[five]       \n\t"
+      "or         %[vr],           %[vr],             %[mask1]      \n\t"
 
-    "1:                                                           \n\t"
-    "gslwlc1    %[y],            0x03(%[y_ptr])                   \n\t"
-    "gslwrc1    %[y],            0x00(%[y_ptr])                   \n\t"
-    "gslwlc1    %[u],            0x03(%[uv_ptr])                  \n\t"
-    "gslwrc1    %[u],            0x00(%[uv_ptr])                  \n\t"
-    "punpcklbh  %[u],            %[u],              %[zero]       \n\t"
-    "pshufh     %[v],            %[u],              %[vshu]       \n\t"
-    "pshufh     %[u],            %[u],              %[ushu]       \n\t"
+      "1:                                                           \n\t"
+      "gslwlc1    %[y],            0x03(%[y_ptr])                   \n\t"
+      "gslwrc1    %[y],            0x00(%[y_ptr])                   \n\t"
+      "gslwlc1    %[u],            0x03(%[uv_ptr])                  \n\t"
+      "gslwrc1    %[u],            0x00(%[uv_ptr])                  \n\t"
+      "punpcklbh  %[u],            %[u],              %[zero]       \n\t"
+      "pshufh     %[v],            %[u],              %[vshu]       \n\t"
+      "pshufh     %[u],            %[u],              %[ushu]       \n\t"
 
-    "punpcklbh  %[y],            %[y],              %[y]          \n\t"
-    "pmulhuh    %[y],            %[y],              %[yg]         \n\t"
+      "punpcklbh  %[y],            %[y],              %[y]          \n\t"
+      "pmulhuh    %[y],            %[y],              %[yg]         \n\t"
 
-    "paddsh     %[b_vec],        %[y],              %[bb]         \n\t"
-    "pmullh     %[temp],         %[u],              %[ub]         \n\t"
-    "psubsh     %[b_vec],        %[b_vec],          %[temp]       \n\t"
-    "psrah      %[b_vec],        %[b_vec],          %[six]        \n\t"
+      "paddsh     %[b_vec],        %[y],              %[bb]         \n\t"
+      "pmullh     %[temp],         %[u],              %[ub]         \n\t"
+      "psubsh     %[b_vec],        %[b_vec],          %[temp]       \n\t"
+      "psrah      %[b_vec],        %[b_vec],          %[six]        \n\t"
 
-    "paddsh     %[g_vec],        %[y],              %[bg]         \n\t"
-    "pmullh     %[temp],         %[u],              %[ug]         \n\t"
-    "psubsh     %[g_vec],        %[g_vec],          %[temp]       \n\t"
-    "pmullh     %[temp],         %[v],              %[vg]         \n\t"
-    "psubsh     %[g_vec],        %[g_vec],          %[temp]       \n\t"
-    "psrah      %[g_vec],        %[g_vec],          %[six]        \n\t"
+      "paddsh     %[g_vec],        %[y],              %[bg]         \n\t"
+      "pmullh     %[temp],         %[u],              %[ug]         \n\t"
+      "psubsh     %[g_vec],        %[g_vec],          %[temp]       \n\t"
+      "pmullh     %[temp],         %[v],              %[vg]         \n\t"
+      "psubsh     %[g_vec],        %[g_vec],          %[temp]       \n\t"
+      "psrah      %[g_vec],        %[g_vec],          %[six]        \n\t"
 
-    "paddsh     %[r_vec],        %[y],              %[br]         \n\t"
-    "pmullh     %[temp],         %[v],              %[vr]         \n\t"
-    "psubsh     %[r_vec],        %[r_vec],          %[temp]       \n\t"
-    "psrah      %[r_vec],        %[r_vec],          %[six]        \n\t"
+      "paddsh     %[r_vec],        %[y],              %[br]         \n\t"
+      "pmullh     %[temp],         %[v],              %[vr]         \n\t"
+      "psubsh     %[r_vec],        %[r_vec],          %[temp]       \n\t"
+      "psrah      %[r_vec],        %[r_vec],          %[six]        \n\t"
 
-    "packushb   %[r_vec],        %[b_vec],          %[r_vec]      \n\t"
-    "packushb   %[g_vec],        %[g_vec],          %[zero]       \n\t"
-    "punpcklwd  %[g_vec],        %[g_vec],          %[alpha]      \n\t"
-    "punpcklbh  %[b_vec],        %[r_vec],          %[g_vec]      \n\t"
-    "punpckhbh  %[r_vec],        %[r_vec],          %[g_vec]      \n\t"
-    "punpcklhw  %[g_vec],        %[b_vec],          %[r_vec]      \n\t"
-    "punpckhhw  %[b_vec],        %[b_vec],          %[r_vec]      \n\t"
+      "packushb   %[r_vec],        %[b_vec],          %[r_vec]      \n\t"
+      "packushb   %[g_vec],        %[g_vec],          %[zero]       \n\t"
+      "punpcklwd  %[g_vec],        %[g_vec],          %[alpha]      \n\t"
+      "punpcklbh  %[b_vec],        %[r_vec],          %[g_vec]      \n\t"
+      "punpckhbh  %[r_vec],        %[r_vec],          %[g_vec]      \n\t"
+      "punpcklhw  %[g_vec],        %[b_vec],          %[r_vec]      \n\t"
+      "punpckhhw  %[b_vec],        %[b_vec],          %[r_vec]      \n\t"
 
-    "gssdlc1    %[g_vec],       0x07(%[rgbbuf_ptr])               \n\t"
-    "gssdrc1    %[g_vec],       0x00(%[rgbbuf_ptr])               \n\t"
-    "gssdlc1    %[b_vec],       0x0f(%[rgbbuf_ptr])               \n\t"
-    "gssdrc1    %[b_vec],       0x08(%[rgbbuf_ptr])               \n\t"
+      "gssdlc1    %[g_vec],       0x07(%[rgbbuf_ptr])               \n\t"
+      "gssdrc1    %[g_vec],       0x00(%[rgbbuf_ptr])               \n\t"
+      "gssdlc1    %[b_vec],       0x0f(%[rgbbuf_ptr])               \n\t"
+      "gssdrc1    %[b_vec],       0x08(%[rgbbuf_ptr])               \n\t"
 
-    "daddiu     %[y_ptr],        %[y_ptr],          0x04          \n\t"
-    "daddiu     %[uv_ptr],       %[uv_ptr],         0x04          \n\t"
-    "daddiu     %[rgbbuf_ptr],   %[rgbbuf_ptr],     0x10          \n\t"
-    "daddi      %[width],        %[width],          -0x04         \n\t"
-    "bnez       %[width],        1b                               \n\t"
+      "daddiu     %[y_ptr],        %[y_ptr],          0x04          \n\t"
+      "daddiu     %[uv_ptr],       %[uv_ptr],         0x04          \n\t"
+      "daddiu     %[rgbbuf_ptr],   %[rgbbuf_ptr],     0x10          \n\t"
+      "daddi      %[width],        %[width],          -0x04         \n\t"
+      "bnez       %[width],        1b                               \n\t"
 
-    : [y]"=&f"(y),                         [u]"=&f"(u),
-      [v]"=&f"(v),
-      [b_vec]"=&f"(b_vec),                 [g_vec]"=&f"(g_vec),
-      [r_vec]"=&f"(r_vec),                 [temp]"=&f"(temp),
-      [ub]"=&f"(ub),                       [ug]"=&f"(ug),
-      [vg]"=&f"(vg),                       [vr]"=&f"(vr),
-      [bb]"=&f"(bb),                       [bg]"=&f"(bg),
-      [br]"=&f"(br),                       [yg]"=&f"(yg)
-    : [y_ptr]"r"(src_y),                   [uv_ptr]"r"(src_uv),
-      [rgbbuf_ptr]"r"(rgb_buf),
-      [yuvcons_ptr]"r"(yuvconstants),      [width]"r"(width),
-      [zero]"f"(0x00),                     [five]"f"(0x55),
-      [six]"f"(0x6),                       [mask1]"f"(0xff00ff00ff00ff00),
-      [ushu]"f"(0xA0),                     [vshu]"f"(0xf5),
-      [alpha]"f"(-1)
-    : "memory"
-  );
+      : [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec] "=&f"(b_vec),
+        [g_vec] "=&f"(g_vec), [r_vec] "=&f"(r_vec), [temp] "=&f"(temp),
+        [ub] "=&f"(ub), [ug] "=&f"(ug), [vg] "=&f"(vg), [vr] "=&f"(vr),
+        [bb] "=&f"(bb), [bg] "=&f"(bg), [br] "=&f"(br), [yg] "=&f"(yg)
+      : [y_ptr] "r"(src_y), [uv_ptr] "r"(src_uv), [rgbbuf_ptr] "r"(rgb_buf),
+        [yuvcons_ptr] "r"(yuvconstants), [width] "r"(width), [zero] "f"(0x00),
+        [five] "f"(0x55), [six] "f"(0x6), [mask1] "f"(0xff00ff00ff00ff00),
+        [ushu] "f"(0xA0), [vshu] "f"(0xf5), [alpha] "f"(-1)
+      : "memory");
 }
 
 void NV21ToARGBRow_MMI(const uint8_t* src_y,
@@ -7031,91 +6976,83 @@ void NV21ToARGBRow_MMI(const uint8_t* src_y,
                        int width) {
   uint64_t y, u, v;
   uint64_t b_vec, g_vec, r_vec, temp;
-  uint64_t ub,ug,vg,vr,bb,bg,br,yg;
+  uint64_t ub, ug, vg, vr, bb, bg, br, yg;
 
   __asm__ volatile(
-    "ldc1       %[yg],           0xc0(%[yuvcons_ptr])             \n\t"
-    "ldc1       %[bb],           0x60(%[yuvcons_ptr])             \n\t"
-    "ldc1       %[ub],           0x00(%[yuvcons_ptr])             \n\t"
-    "or         %[ub],           %[ub],             %[mask1]      \n\t"
-    "ldc1       %[bg],           0x80(%[yuvcons_ptr])             \n\t"
-    "ldc1       %[ug],           0x20(%[yuvcons_ptr])             \n\t"
-    "punpcklbh  %[ug],           %[ug],             %[zero]       \n\t"
-    "pshufh     %[ug],           %[ug],             %[zero]       \n\t"
-    "ldc1       %[vg],           0x20(%[yuvcons_ptr])             \n\t"
-    "punpcklbh  %[vg],           %[vg],             %[zero]       \n\t"
-    "pshufh     %[vg],           %[vg],             %[five]       \n\t"
-    "ldc1       %[br],           0xa0(%[yuvcons_ptr])             \n\t"
-    "ldc1       %[vr],           0x40(%[yuvcons_ptr])             \n\t"
-    "punpcklbh  %[vr],           %[vr],             %[zero]       \n\t"
-    "pshufh     %[vr],           %[vr],             %[five]       \n\t"
-    "or         %[vr],           %[vr],             %[mask1]      \n\t"
+      "ldc1       %[yg],           0xc0(%[yuvcons_ptr])             \n\t"
+      "ldc1       %[bb],           0x60(%[yuvcons_ptr])             \n\t"
+      "ldc1       %[ub],           0x00(%[yuvcons_ptr])             \n\t"
+      "or         %[ub],           %[ub],             %[mask1]      \n\t"
+      "ldc1       %[bg],           0x80(%[yuvcons_ptr])             \n\t"
+      "ldc1       %[ug],           0x20(%[yuvcons_ptr])             \n\t"
+      "punpcklbh  %[ug],           %[ug],             %[zero]       \n\t"
+      "pshufh     %[ug],           %[ug],             %[zero]       \n\t"
+      "ldc1       %[vg],           0x20(%[yuvcons_ptr])             \n\t"
+      "punpcklbh  %[vg],           %[vg],             %[zero]       \n\t"
+      "pshufh     %[vg],           %[vg],             %[five]       \n\t"
+      "ldc1       %[br],           0xa0(%[yuvcons_ptr])             \n\t"
+      "ldc1       %[vr],           0x40(%[yuvcons_ptr])             \n\t"
+      "punpcklbh  %[vr],           %[vr],             %[zero]       \n\t"
+      "pshufh     %[vr],           %[vr],             %[five]       \n\t"
+      "or         %[vr],           %[vr],             %[mask1]      \n\t"
 
-    "1:                                                           \n\t"
-    "gslwlc1    %[y],            0x03(%[y_ptr])                   \n\t"
-    "gslwrc1    %[y],            0x00(%[y_ptr])                   \n\t"
-    "gslwlc1    %[u],            0x03(%[vu_ptr])                  \n\t"
-    "gslwrc1    %[u],            0x00(%[vu_ptr])                  \n\t"
-    "punpcklbh  %[u],            %[u],              %[zero]       \n\t"
-    "pshufh     %[v],            %[u],              %[ushu]       \n\t"
-    "pshufh     %[u],            %[u],              %[vshu]       \n\t"
+      "1:                                                           \n\t"
+      "gslwlc1    %[y],            0x03(%[y_ptr])                   \n\t"
+      "gslwrc1    %[y],            0x00(%[y_ptr])                   \n\t"
+      "gslwlc1    %[u],            0x03(%[vu_ptr])                  \n\t"
+      "gslwrc1    %[u],            0x00(%[vu_ptr])                  \n\t"
+      "punpcklbh  %[u],            %[u],              %[zero]       \n\t"
+      "pshufh     %[v],            %[u],              %[ushu]       \n\t"
+      "pshufh     %[u],            %[u],              %[vshu]       \n\t"
 
-    "punpcklbh  %[y],            %[y],              %[y]          \n\t"
-    "pmulhuh    %[y],            %[y],              %[yg]         \n\t"
+      "punpcklbh  %[y],            %[y],              %[y]          \n\t"
+      "pmulhuh    %[y],            %[y],              %[yg]         \n\t"
 
-    "paddsh     %[b_vec],        %[y],              %[bb]         \n\t"
-    "pmullh     %[temp],         %[u],              %[ub]         \n\t"
-    "psubsh     %[b_vec],        %[b_vec],          %[temp]       \n\t"
-    "psrah      %[b_vec],        %[b_vec],          %[six]        \n\t"
+      "paddsh     %[b_vec],        %[y],              %[bb]         \n\t"
+      "pmullh     %[temp],         %[u],              %[ub]         \n\t"
+      "psubsh     %[b_vec],        %[b_vec],          %[temp]       \n\t"
+      "psrah      %[b_vec],        %[b_vec],          %[six]        \n\t"
 
-    "paddsh     %[g_vec],        %[y],              %[bg]         \n\t"
-    "pmullh     %[temp],         %[u],              %[ug]         \n\t"
-    "psubsh     %[g_vec],        %[g_vec],          %[temp]       \n\t"
-    "pmullh     %[temp],         %[v],              %[vg]         \n\t"
-    "psubsh     %[g_vec],        %[g_vec],          %[temp]       \n\t"
-    "psrah      %[g_vec],        %[g_vec],          %[six]        \n\t"
+      "paddsh     %[g_vec],        %[y],              %[bg]         \n\t"
+      "pmullh     %[temp],         %[u],              %[ug]         \n\t"
+      "psubsh     %[g_vec],        %[g_vec],          %[temp]       \n\t"
+      "pmullh     %[temp],         %[v],              %[vg]         \n\t"
+      "psubsh     %[g_vec],        %[g_vec],          %[temp]       \n\t"
+      "psrah      %[g_vec],        %[g_vec],          %[six]        \n\t"
 
-    "paddsh     %[r_vec],        %[y],              %[br]         \n\t"
-    "pmullh     %[temp],         %[v],              %[vr]         \n\t"
-    "psubsh     %[r_vec],        %[r_vec],          %[temp]       \n\t"
-    "psrah      %[r_vec],        %[r_vec],          %[six]        \n\t"
+      "paddsh     %[r_vec],        %[y],              %[br]         \n\t"
+      "pmullh     %[temp],         %[v],              %[vr]         \n\t"
+      "psubsh     %[r_vec],        %[r_vec],          %[temp]       \n\t"
+      "psrah      %[r_vec],        %[r_vec],          %[six]        \n\t"
 
-    "packushb   %[r_vec],        %[b_vec],          %[r_vec]      \n\t"
-    "packushb   %[g_vec],        %[g_vec],          %[zero]       \n\t"
-    "punpcklwd  %[g_vec],        %[g_vec],          %[alpha]      \n\t"
-    "punpcklbh  %[b_vec],        %[r_vec],          %[g_vec]      \n\t"
-    "punpckhbh  %[r_vec],        %[r_vec],          %[g_vec]      \n\t"
-    "punpcklhw  %[g_vec],        %[b_vec],          %[r_vec]      \n\t"
-    "punpckhhw  %[b_vec],        %[b_vec],          %[r_vec]      \n\t"
+      "packushb   %[r_vec],        %[b_vec],          %[r_vec]      \n\t"
+      "packushb   %[g_vec],        %[g_vec],          %[zero]       \n\t"
+      "punpcklwd  %[g_vec],        %[g_vec],          %[alpha]      \n\t"
+      "punpcklbh  %[b_vec],        %[r_vec],          %[g_vec]      \n\t"
+      "punpckhbh  %[r_vec],        %[r_vec],          %[g_vec]      \n\t"
+      "punpcklhw  %[g_vec],        %[b_vec],          %[r_vec]      \n\t"
+      "punpckhhw  %[b_vec],        %[b_vec],          %[r_vec]      \n\t"
 
-    "gssdlc1    %[g_vec],       0x07(%[rgbbuf_ptr])               \n\t"
-    "gssdrc1    %[g_vec],       0x00(%[rgbbuf_ptr])               \n\t"
-    "gssdlc1    %[b_vec],       0x0f(%[rgbbuf_ptr])               \n\t"
-    "gssdrc1    %[b_vec],       0x08(%[rgbbuf_ptr])               \n\t"
+      "gssdlc1    %[g_vec],       0x07(%[rgbbuf_ptr])               \n\t"
+      "gssdrc1    %[g_vec],       0x00(%[rgbbuf_ptr])               \n\t"
+      "gssdlc1    %[b_vec],       0x0f(%[rgbbuf_ptr])               \n\t"
+      "gssdrc1    %[b_vec],       0x08(%[rgbbuf_ptr])               \n\t"
 
-    "daddiu     %[y_ptr],        %[y_ptr],          0x04          \n\t"
-    "daddiu     %[vu_ptr],       %[vu_ptr],         0x04          \n\t"
-    "daddiu     %[rgbbuf_ptr],   %[rgbbuf_ptr],     0x10          \n\t"
-    "daddi      %[width],        %[width],          -0x04         \n\t"
-    "bnez       %[width],        1b                               \n\t"
+      "daddiu     %[y_ptr],        %[y_ptr],          0x04          \n\t"
+      "daddiu     %[vu_ptr],       %[vu_ptr],         0x04          \n\t"
+      "daddiu     %[rgbbuf_ptr],   %[rgbbuf_ptr],     0x10          \n\t"
+      "daddi      %[width],        %[width],          -0x04         \n\t"
+      "bnez       %[width],        1b                               \n\t"
 
-    : [y]"=&f"(y),                         [u]"=&f"(u),
-      [v]"=&f"(v),
-      [b_vec]"=&f"(b_vec),                 [g_vec]"=&f"(g_vec),
-      [r_vec]"=&f"(r_vec),                 [temp]"=&f"(temp),
-      [ub]"=&f"(ub),                       [ug]"=&f"(ug),
-      [vg]"=&f"(vg),                       [vr]"=&f"(vr),
-      [bb]"=&f"(bb),                       [bg]"=&f"(bg),
-      [br]"=&f"(br),                       [yg]"=&f"(yg)
-    : [y_ptr]"r"(src_y),                   [vu_ptr]"r"(src_vu),
-      [rgbbuf_ptr]"r"(rgb_buf),
-      [yuvcons_ptr]"r"(yuvconstants),      [width]"r"(width),
-      [zero]"f"(0x00),                     [five]"f"(0x55),
-      [six]"f"(0x6),                       [mask1]"f"(0xff00ff00ff00ff00),
-      [ushu]"f"(0xA0),                     [vshu]"f"(0xf5),
-      [alpha]"f"(-1)
-    : "memory"
-  );
+      : [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec] "=&f"(b_vec),
+        [g_vec] "=&f"(g_vec), [r_vec] "=&f"(r_vec), [temp] "=&f"(temp),
+        [ub] "=&f"(ub), [ug] "=&f"(ug), [vg] "=&f"(vg), [vr] "=&f"(vr),
+        [bb] "=&f"(bb), [bg] "=&f"(bg), [br] "=&f"(br), [yg] "=&f"(yg)
+      : [y_ptr] "r"(src_y), [vu_ptr] "r"(src_vu), [rgbbuf_ptr] "r"(rgb_buf),
+        [yuvcons_ptr] "r"(yuvconstants), [width] "r"(width), [zero] "f"(0x00),
+        [five] "f"(0x55), [six] "f"(0x6), [mask1] "f"(0xff00ff00ff00ff00),
+        [ushu] "f"(0xA0), [vshu] "f"(0xf5), [alpha] "f"(-1)
+      : "memory");
 }
 
 void NV12ToRGB24Row_MMI(const uint8_t* src_y,
@@ -7125,103 +7062,95 @@ void NV12ToRGB24Row_MMI(const uint8_t* src_y,
                         int width) {
   uint64_t y, u, v;
   uint64_t b_vec, g_vec, r_vec, temp;
-  uint64_t ub,ug,vg,vr,bb,bg,br,yg;
+  uint64_t ub, ug, vg, vr, bb, bg, br, yg;
 
   __asm__ volatile(
-    "ldc1       %[yg],           0xc0(%[yuvcons_ptr])             \n\t"
-    "ldc1       %[bb],           0x60(%[yuvcons_ptr])             \n\t"
-    "ldc1       %[ub],           0x00(%[yuvcons_ptr])             \n\t"
-    "or         %[ub],           %[ub],             %[mask1]      \n\t"
-    "ldc1       %[bg],           0x80(%[yuvcons_ptr])             \n\t"
-    "ldc1       %[ug],           0x20(%[yuvcons_ptr])             \n\t"
-    "punpcklbh  %[ug],           %[ug],             %[zero]       \n\t"
-    "pshufh     %[ug],           %[ug],             %[zero]       \n\t"
-    "ldc1       %[vg],           0x20(%[yuvcons_ptr])             \n\t"
-    "punpcklbh  %[vg],           %[vg],             %[zero]       \n\t"
-    "pshufh     %[vg],           %[vg],             %[five]       \n\t"
-    "ldc1       %[br],           0xa0(%[yuvcons_ptr])             \n\t"
-    "ldc1       %[vr],           0x40(%[yuvcons_ptr])             \n\t"
-    "punpcklbh  %[vr],           %[vr],             %[zero]       \n\t"
-    "pshufh     %[vr],           %[vr],             %[five]       \n\t"
-    "or         %[vr],           %[vr],             %[mask1]      \n\t"
+      "ldc1       %[yg],           0xc0(%[yuvcons_ptr])             \n\t"
+      "ldc1       %[bb],           0x60(%[yuvcons_ptr])             \n\t"
+      "ldc1       %[ub],           0x00(%[yuvcons_ptr])             \n\t"
+      "or         %[ub],           %[ub],             %[mask1]      \n\t"
+      "ldc1       %[bg],           0x80(%[yuvcons_ptr])             \n\t"
+      "ldc1       %[ug],           0x20(%[yuvcons_ptr])             \n\t"
+      "punpcklbh  %[ug],           %[ug],             %[zero]       \n\t"
+      "pshufh     %[ug],           %[ug],             %[zero]       \n\t"
+      "ldc1       %[vg],           0x20(%[yuvcons_ptr])             \n\t"
+      "punpcklbh  %[vg],           %[vg],             %[zero]       \n\t"
+      "pshufh     %[vg],           %[vg],             %[five]       \n\t"
+      "ldc1       %[br],           0xa0(%[yuvcons_ptr])             \n\t"
+      "ldc1       %[vr],           0x40(%[yuvcons_ptr])             \n\t"
+      "punpcklbh  %[vr],           %[vr],             %[zero]       \n\t"
+      "pshufh     %[vr],           %[vr],             %[five]       \n\t"
+      "or         %[vr],           %[vr],             %[mask1]      \n\t"
 
-    "1:                                                           \n\t"
-    "gslwlc1    %[y],            0x03(%[y_ptr])                   \n\t"
-    "gslwrc1    %[y],            0x00(%[y_ptr])                   \n\t"
-    "gslwlc1    %[u],            0x03(%[uv_ptr])                  \n\t"
-    "gslwrc1    %[u],            0x00(%[uv_ptr])                  \n\t"
-    "punpcklbh  %[u],            %[u],              %[zero]       \n\t"
-    "pshufh     %[v],            %[u],              %[vshu]       \n\t"
-    "pshufh     %[u],            %[u],              %[ushu]       \n\t"
+      "1:                                                           \n\t"
+      "gslwlc1    %[y],            0x03(%[y_ptr])                   \n\t"
+      "gslwrc1    %[y],            0x00(%[y_ptr])                   \n\t"
+      "gslwlc1    %[u],            0x03(%[uv_ptr])                  \n\t"
+      "gslwrc1    %[u],            0x00(%[uv_ptr])                  \n\t"
+      "punpcklbh  %[u],            %[u],              %[zero]       \n\t"
+      "pshufh     %[v],            %[u],              %[vshu]       \n\t"
+      "pshufh     %[u],            %[u],              %[ushu]       \n\t"
 
-    "punpcklbh  %[y],            %[y],              %[y]          \n\t"
-    "pmulhuh    %[y],            %[y],              %[yg]         \n\t"
+      "punpcklbh  %[y],            %[y],              %[y]          \n\t"
+      "pmulhuh    %[y],            %[y],              %[yg]         \n\t"
 
-    "paddsh     %[b_vec],        %[y],              %[bb]         \n\t"
-    "pmullh     %[temp],         %[u],              %[ub]         \n\t"
-    "psubsh     %[b_vec],        %[b_vec],          %[temp]       \n\t"
-    "psrah      %[b_vec],        %[b_vec],          %[six]        \n\t"
+      "paddsh     %[b_vec],        %[y],              %[bb]         \n\t"
+      "pmullh     %[temp],         %[u],              %[ub]         \n\t"
+      "psubsh     %[b_vec],        %[b_vec],          %[temp]       \n\t"
+      "psrah      %[b_vec],        %[b_vec],          %[six]        \n\t"
 
-    "paddsh     %[g_vec],        %[y],              %[bg]         \n\t"
-    "pmullh     %[temp],         %[u],              %[ug]         \n\t"
-    "psubsh     %[g_vec],        %[g_vec],          %[temp]       \n\t"
-    "pmullh     %[temp],         %[v],              %[vg]         \n\t"
-    "psubsh     %[g_vec],        %[g_vec],          %[temp]       \n\t"
-    "psrah      %[g_vec],        %[g_vec],          %[six]        \n\t"
+      "paddsh     %[g_vec],        %[y],              %[bg]         \n\t"
+      "pmullh     %[temp],         %[u],              %[ug]         \n\t"
+      "psubsh     %[g_vec],        %[g_vec],          %[temp]       \n\t"
+      "pmullh     %[temp],         %[v],              %[vg]         \n\t"
+      "psubsh     %[g_vec],        %[g_vec],          %[temp]       \n\t"
+      "psrah      %[g_vec],        %[g_vec],          %[six]        \n\t"
 
-    "paddsh     %[r_vec],        %[y],              %[br]         \n\t"
-    "pmullh     %[temp],         %[v],              %[vr]         \n\t"
-    "psubsh     %[r_vec],        %[r_vec],          %[temp]       \n\t"
-    "psrah      %[r_vec],        %[r_vec],          %[six]        \n\t"
+      "paddsh     %[r_vec],        %[y],              %[br]         \n\t"
+      "pmullh     %[temp],         %[v],              %[vr]         \n\t"
+      "psubsh     %[r_vec],        %[r_vec],          %[temp]       \n\t"
+      "psrah      %[r_vec],        %[r_vec],          %[six]        \n\t"
 
-    "packushb   %[r_vec],        %[b_vec],          %[r_vec]      \n\t"
-    "packushb   %[g_vec],        %[g_vec],          %[zero]       \n\t"
-    "punpcklbh  %[b_vec],        %[r_vec],          %[g_vec]      \n\t"
-    "punpckhbh  %[r_vec],        %[r_vec],          %[g_vec]      \n\t"
-    "punpcklhw  %[g_vec],        %[b_vec],          %[r_vec]      \n\t"
-    "punpckhhw  %[b_vec],        %[b_vec],          %[r_vec]      \n\t"
+      "packushb   %[r_vec],        %[b_vec],          %[r_vec]      \n\t"
+      "packushb   %[g_vec],        %[g_vec],          %[zero]       \n\t"
+      "punpcklbh  %[b_vec],        %[r_vec],          %[g_vec]      \n\t"
+      "punpckhbh  %[r_vec],        %[r_vec],          %[g_vec]      \n\t"
+      "punpcklhw  %[g_vec],        %[b_vec],          %[r_vec]      \n\t"
+      "punpckhhw  %[b_vec],        %[b_vec],          %[r_vec]      \n\t"
 
-    "punpckhwd  %[r_vec],        %[g_vec],          %[g_vec]      \n\t"
-    "psllw      %[temp],         %[r_vec],          %[lmove1]     \n\t"
-    "or         %[g_vec],        %[g_vec],          %[temp]       \n\t"
-    "psrlw      %[temp],         %[r_vec],          %[rmove1]     \n\t"
-    "pextrh     %[temp],         %[temp],           %[zero]       \n\t"
-    "pinsrh_2   %[g_vec],        %[g_vec],          %[temp]       \n\t"
-    "pextrh     %[temp],         %[b_vec],          %[zero]       \n\t"
-    "pinsrh_3   %[g_vec],        %[g_vec],          %[temp]       \n\t"
-    "pextrh     %[temp],         %[b_vec],          %[one]        \n\t"
-    "punpckhwd  %[b_vec],        %[b_vec],          %[b_vec]      \n\t"
-    "psllw      %[b_vec],        %[b_vec],          %[rmove1]     \n\t"
-    "or         %[b_vec],        %[b_vec],          %[temp]       \n\t"
-    "gssdlc1    %[g_vec],        0x07(%[rgbbuf_ptr])              \n\t"
-    "gssdrc1    %[g_vec],        0x00(%[rgbbuf_ptr])              \n\t"
-    "gsswlc1    %[b_vec],        0x0b(%[rgbbuf_ptr])              \n\t"
-    "gsswrc1    %[b_vec],        0x08(%[rgbbuf_ptr])              \n\t"
+      "punpckhwd  %[r_vec],        %[g_vec],          %[g_vec]      \n\t"
+      "psllw      %[temp],         %[r_vec],          %[lmove1]     \n\t"
+      "or         %[g_vec],        %[g_vec],          %[temp]       \n\t"
+      "psrlw      %[temp],         %[r_vec],          %[rmove1]     \n\t"
+      "pextrh     %[temp],         %[temp],           %[zero]       \n\t"
+      "pinsrh_2   %[g_vec],        %[g_vec],          %[temp]       \n\t"
+      "pextrh     %[temp],         %[b_vec],          %[zero]       \n\t"
+      "pinsrh_3   %[g_vec],        %[g_vec],          %[temp]       \n\t"
+      "pextrh     %[temp],         %[b_vec],          %[one]        \n\t"
+      "punpckhwd  %[b_vec],        %[b_vec],          %[b_vec]      \n\t"
+      "psllw      %[b_vec],        %[b_vec],          %[rmove1]     \n\t"
+      "or         %[b_vec],        %[b_vec],          %[temp]       \n\t"
+      "gssdlc1    %[g_vec],        0x07(%[rgbbuf_ptr])              \n\t"
+      "gssdrc1    %[g_vec],        0x00(%[rgbbuf_ptr])              \n\t"
+      "gsswlc1    %[b_vec],        0x0b(%[rgbbuf_ptr])              \n\t"
+      "gsswrc1    %[b_vec],        0x08(%[rgbbuf_ptr])              \n\t"
 
-    "daddiu     %[y_ptr],        %[y_ptr],          0x04          \n\t"
-    "daddiu     %[uv_ptr],       %[uv_ptr],         0x04          \n\t"
-    "daddiu     %[rgbbuf_ptr],   %[rgbbuf_ptr],     0x0C          \n\t"
-    "daddi      %[width],        %[width],          -0x04         \n\t"
-    "bnez       %[width],        1b                               \n\t"
+      "daddiu     %[y_ptr],        %[y_ptr],          0x04          \n\t"
+      "daddiu     %[uv_ptr],       %[uv_ptr],         0x04          \n\t"
+      "daddiu     %[rgbbuf_ptr],   %[rgbbuf_ptr],     0x0C          \n\t"
+      "daddi      %[width],        %[width],          -0x04         \n\t"
+      "bnez       %[width],        1b                               \n\t"
 
-    : [y]"=&f"(y),                         [u]"=&f"(u),
-      [v]"=&f"(v),
-      [b_vec]"=&f"(b_vec),                 [g_vec]"=&f"(g_vec),
-      [r_vec]"=&f"(r_vec),                 [temp]"=&f"(temp),
-      [ub]"=&f"(ub),                       [ug]"=&f"(ug),
-      [vg]"=&f"(vg),                       [vr]"=&f"(vr),
-      [bb]"=&f"(bb),                       [bg]"=&f"(bg),
-      [br]"=&f"(br),                       [yg]"=&f"(yg)
-    : [y_ptr]"r"(src_y),                   [uv_ptr]"r"(src_uv),
-      [rgbbuf_ptr]"r"(rgb_buf),
-      [yuvcons_ptr]"r"(yuvconstants),      [width]"r"(width),
-      [zero]"f"(0x00),                     [five]"f"(0x55),
-      [six]"f"(0x6),                       [mask1]"f"(0xff00ff00ff00ff00),
-      [ushu]"f"(0xA0),                     [vshu]"f"(0xf5),
-      [alpha]"f"(-1),                      [lmove1]"f"(0x18),
-      [one]"f"(0x1),                       [rmove1]"f"(0x8)
-    : "memory"
-  );
+      : [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec] "=&f"(b_vec),
+        [g_vec] "=&f"(g_vec), [r_vec] "=&f"(r_vec), [temp] "=&f"(temp),
+        [ub] "=&f"(ub), [ug] "=&f"(ug), [vg] "=&f"(vg), [vr] "=&f"(vr),
+        [bb] "=&f"(bb), [bg] "=&f"(bg), [br] "=&f"(br), [yg] "=&f"(yg)
+      : [y_ptr] "r"(src_y), [uv_ptr] "r"(src_uv), [rgbbuf_ptr] "r"(rgb_buf),
+        [yuvcons_ptr] "r"(yuvconstants), [width] "r"(width), [zero] "f"(0x00),
+        [five] "f"(0x55), [six] "f"(0x6), [mask1] "f"(0xff00ff00ff00ff00),
+        [ushu] "f"(0xA0), [vshu] "f"(0xf5), [alpha] "f"(-1), [lmove1] "f"(0x18),
+        [one] "f"(0x1), [rmove1] "f"(0x8)
+      : "memory");
 }
 
 void NV21ToRGB24Row_MMI(const uint8_t* src_y,
@@ -7231,103 +7160,95 @@ void NV21ToRGB24Row_MMI(const uint8_t* src_y,
                         int width) {
   uint64_t y, u, v;
   uint64_t b_vec, g_vec, r_vec, temp;
-  uint64_t ub,ug,vg,vr,bb,bg,br,yg;
+  uint64_t ub, ug, vg, vr, bb, bg, br, yg;
 
   __asm__ volatile(
-    "ldc1       %[yg],           0xc0(%[yuvcons_ptr])             \n\t"
-    "ldc1       %[bb],           0x60(%[yuvcons_ptr])             \n\t"
-    "ldc1       %[ub],           0x00(%[yuvcons_ptr])             \n\t"
-    "or         %[ub],           %[ub],             %[mask1]      \n\t"
-    "ldc1       %[bg],           0x80(%[yuvcons_ptr])             \n\t"
-    "ldc1       %[ug],           0x20(%[yuvcons_ptr])             \n\t"
-    "punpcklbh  %[ug],           %[ug],             %[zero]       \n\t"
-    "pshufh     %[ug],           %[ug],             %[zero]       \n\t"
-    "ldc1       %[vg],           0x20(%[yuvcons_ptr])             \n\t"
-    "punpcklbh  %[vg],           %[vg],             %[zero]       \n\t"
-    "pshufh     %[vg],           %[vg],             %[five]       \n\t"
-    "ldc1       %[br],           0xa0(%[yuvcons_ptr])             \n\t"
-    "ldc1       %[vr],           0x40(%[yuvcons_ptr])             \n\t"
-    "punpcklbh  %[vr],           %[vr],             %[zero]       \n\t"
-    "pshufh     %[vr],           %[vr],             %[five]       \n\t"
-    "or         %[vr],           %[vr],             %[mask1]      \n\t"
+      "ldc1       %[yg],           0xc0(%[yuvcons_ptr])             \n\t"
+      "ldc1       %[bb],           0x60(%[yuvcons_ptr])             \n\t"
+      "ldc1       %[ub],           0x00(%[yuvcons_ptr])             \n\t"
+      "or         %[ub],           %[ub],             %[mask1]      \n\t"
+      "ldc1       %[bg],           0x80(%[yuvcons_ptr])             \n\t"
+      "ldc1       %[ug],           0x20(%[yuvcons_ptr])             \n\t"
+      "punpcklbh  %[ug],           %[ug],             %[zero]       \n\t"
+      "pshufh     %[ug],           %[ug],             %[zero]       \n\t"
+      "ldc1       %[vg],           0x20(%[yuvcons_ptr])             \n\t"
+      "punpcklbh  %[vg],           %[vg],             %[zero]       \n\t"
+      "pshufh     %[vg],           %[vg],             %[five]       \n\t"
+      "ldc1       %[br],           0xa0(%[yuvcons_ptr])             \n\t"
+      "ldc1       %[vr],           0x40(%[yuvcons_ptr])             \n\t"
+      "punpcklbh  %[vr],           %[vr],             %[zero]       \n\t"
+      "pshufh     %[vr],           %[vr],             %[five]       \n\t"
+      "or         %[vr],           %[vr],             %[mask1]      \n\t"
 
-    "1:                                                           \n\t"
-    "gslwlc1    %[y],            0x03(%[y_ptr])                   \n\t"
-    "gslwrc1    %[y],            0x00(%[y_ptr])                   \n\t"
-    "gslwlc1    %[u],            0x03(%[vu_ptr])                  \n\t"
-    "gslwrc1    %[u],            0x00(%[vu_ptr])                  \n\t"
-    "punpcklbh  %[u],            %[u],              %[zero]       \n\t"
-    "pshufh     %[v],            %[u],              %[ushu]       \n\t"
-    "pshufh     %[u],            %[u],              %[vshu]       \n\t"
+      "1:                                                           \n\t"
+      "gslwlc1    %[y],            0x03(%[y_ptr])                   \n\t"
+      "gslwrc1    %[y],            0x00(%[y_ptr])                   \n\t"
+      "gslwlc1    %[u],            0x03(%[vu_ptr])                  \n\t"
+      "gslwrc1    %[u],            0x00(%[vu_ptr])                  \n\t"
+      "punpcklbh  %[u],            %[u],              %[zero]       \n\t"
+      "pshufh     %[v],            %[u],              %[ushu]       \n\t"
+      "pshufh     %[u],            %[u],              %[vshu]       \n\t"
 
-    "punpcklbh  %[y],            %[y],              %[y]          \n\t"
-    "pmulhuh    %[y],            %[y],              %[yg]         \n\t"
+      "punpcklbh  %[y],            %[y],              %[y]          \n\t"
+      "pmulhuh    %[y],            %[y],              %[yg]         \n\t"
 
-    "paddsh     %[b_vec],        %[y],              %[bb]         \n\t"
-    "pmullh     %[temp],         %[u],              %[ub]         \n\t"
-    "psubsh     %[b_vec],        %[b_vec],          %[temp]       \n\t"
-    "psrah      %[b_vec],        %[b_vec],          %[six]        \n\t"
+      "paddsh     %[b_vec],        %[y],              %[bb]         \n\t"
+      "pmullh     %[temp],         %[u],              %[ub]         \n\t"
+      "psubsh     %[b_vec],        %[b_vec],          %[temp]       \n\t"
+      "psrah      %[b_vec],        %[b_vec],          %[six]        \n\t"
 
-    "paddsh     %[g_vec],        %[y],              %[bg]         \n\t"
-    "pmullh     %[temp],         %[u],              %[ug]         \n\t"
-    "psubsh     %[g_vec],        %[g_vec],          %[temp]       \n\t"
-    "pmullh     %[temp],         %[v],              %[vg]         \n\t"
-    "psubsh     %[g_vec],        %[g_vec],          %[temp]       \n\t"
-    "psrah      %[g_vec],        %[g_vec],          %[six]        \n\t"
+      "paddsh     %[g_vec],        %[y],              %[bg]         \n\t"
+      "pmullh     %[temp],         %[u],              %[ug]         \n\t"
+      "psubsh     %[g_vec],        %[g_vec],          %[temp]       \n\t"
+      "pmullh     %[temp],         %[v],              %[vg]         \n\t"
+      "psubsh     %[g_vec],        %[g_vec],          %[temp]       \n\t"
+      "psrah      %[g_vec],        %[g_vec],          %[six]        \n\t"
 
-    "paddsh     %[r_vec],        %[y],              %[br]         \n\t"
-    "pmullh     %[temp],         %[v],              %[vr]         \n\t"
-    "psubsh     %[r_vec],        %[r_vec],          %[temp]       \n\t"
-    "psrah      %[r_vec],        %[r_vec],          %[six]        \n\t"
+      "paddsh     %[r_vec],        %[y],              %[br]         \n\t"
+      "pmullh     %[temp],         %[v],              %[vr]         \n\t"
+      "psubsh     %[r_vec],        %[r_vec],          %[temp]       \n\t"
+      "psrah      %[r_vec],        %[r_vec],          %[six]        \n\t"
 
-    "packushb   %[r_vec],        %[b_vec],          %[r_vec]      \n\t"
-    "packushb   %[g_vec],        %[g_vec],          %[zero]       \n\t"
-    "punpcklbh  %[b_vec],        %[r_vec],          %[g_vec]      \n\t"
-    "punpckhbh  %[r_vec],        %[r_vec],          %[g_vec]      \n\t"
-    "punpcklhw  %[g_vec],        %[b_vec],          %[r_vec]      \n\t"
-    "punpckhhw  %[b_vec],        %[b_vec],          %[r_vec]      \n\t"
+      "packushb   %[r_vec],        %[b_vec],          %[r_vec]      \n\t"
+      "packushb   %[g_vec],        %[g_vec],          %[zero]       \n\t"
+      "punpcklbh  %[b_vec],        %[r_vec],          %[g_vec]      \n\t"
+      "punpckhbh  %[r_vec],        %[r_vec],          %[g_vec]      \n\t"
+      "punpcklhw  %[g_vec],        %[b_vec],          %[r_vec]      \n\t"
+      "punpckhhw  %[b_vec],        %[b_vec],          %[r_vec]      \n\t"
 
-    "punpckhwd  %[r_vec],        %[g_vec],          %[g_vec]      \n\t"
-    "psllw      %[temp],         %[r_vec],          %[lmove1]     \n\t"
-    "or         %[g_vec],        %[g_vec],          %[temp]       \n\t"
-    "psrlw      %[temp],         %[r_vec],          %[rmove1]     \n\t"
-    "pextrh     %[temp],         %[temp],           %[zero]       \n\t"
-    "pinsrh_2   %[g_vec],        %[g_vec],          %[temp]       \n\t"
-    "pextrh     %[temp],         %[b_vec],          %[zero]       \n\t"
-    "pinsrh_3   %[g_vec],        %[g_vec],          %[temp]       \n\t"
-    "pextrh     %[temp],         %[b_vec],          %[one]        \n\t"
-    "punpckhwd  %[b_vec],        %[b_vec],          %[b_vec]      \n\t"
-    "psllw      %[b_vec],        %[b_vec],          %[rmove1]     \n\t"
-    "or         %[b_vec],        %[b_vec],          %[temp]       \n\t"
-    "gssdlc1    %[g_vec],        0x07(%[rgbbuf_ptr])              \n\t"
-    "gssdrc1    %[g_vec],        0x00(%[rgbbuf_ptr])              \n\t"
-    "gsswlc1    %[b_vec],        0x0b(%[rgbbuf_ptr])              \n\t"
-    "gsswrc1    %[b_vec],        0x08(%[rgbbuf_ptr])              \n\t"
+      "punpckhwd  %[r_vec],        %[g_vec],          %[g_vec]      \n\t"
+      "psllw      %[temp],         %[r_vec],          %[lmove1]     \n\t"
+      "or         %[g_vec],        %[g_vec],          %[temp]       \n\t"
+      "psrlw      %[temp],         %[r_vec],          %[rmove1]     \n\t"
+      "pextrh     %[temp],         %[temp],           %[zero]       \n\t"
+      "pinsrh_2   %[g_vec],        %[g_vec],          %[temp]       \n\t"
+      "pextrh     %[temp],         %[b_vec],          %[zero]       \n\t"
+      "pinsrh_3   %[g_vec],        %[g_vec],          %[temp]       \n\t"
+      "pextrh     %[temp],         %[b_vec],          %[one]        \n\t"
+      "punpckhwd  %[b_vec],        %[b_vec],          %[b_vec]      \n\t"
+      "psllw      %[b_vec],        %[b_vec],          %[rmove1]     \n\t"
+      "or         %[b_vec],        %[b_vec],          %[temp]       \n\t"
+      "gssdlc1    %[g_vec],        0x07(%[rgbbuf_ptr])              \n\t"
+      "gssdrc1    %[g_vec],        0x00(%[rgbbuf_ptr])              \n\t"
+      "gsswlc1    %[b_vec],        0x0b(%[rgbbuf_ptr])              \n\t"
+      "gsswrc1    %[b_vec],        0x08(%[rgbbuf_ptr])              \n\t"
 
-    "daddiu     %[y_ptr],        %[y_ptr],          0x04          \n\t"
-    "daddiu     %[vu_ptr],       %[vu_ptr],         0x04          \n\t"
-    "daddiu     %[rgbbuf_ptr],   %[rgbbuf_ptr],     0x0C          \n\t"
-    "daddi      %[width],        %[width],          -0x04         \n\t"
-    "bnez       %[width],        1b                               \n\t"
+      "daddiu     %[y_ptr],        %[y_ptr],          0x04          \n\t"
+      "daddiu     %[vu_ptr],       %[vu_ptr],         0x04          \n\t"
+      "daddiu     %[rgbbuf_ptr],   %[rgbbuf_ptr],     0x0C          \n\t"
+      "daddi      %[width],        %[width],          -0x04         \n\t"
+      "bnez       %[width],        1b                               \n\t"
 
-    : [y]"=&f"(y),                         [u]"=&f"(u),
-      [v]"=&f"(v),
-      [b_vec]"=&f"(b_vec),                 [g_vec]"=&f"(g_vec),
-      [r_vec]"=&f"(r_vec),                 [temp]"=&f"(temp),
-      [ub]"=&f"(ub),                       [ug]"=&f"(ug),
-      [vg]"=&f"(vg),                       [vr]"=&f"(vr),
-      [bb]"=&f"(bb),                       [bg]"=&f"(bg),
-      [br]"=&f"(br),                       [yg]"=&f"(yg)
-    : [y_ptr]"r"(src_y),                   [vu_ptr]"r"(src_vu),
-      [rgbbuf_ptr]"r"(rgb_buf),
-      [yuvcons_ptr]"r"(yuvconstants),      [width]"r"(width),
-      [zero]"f"(0x00),                     [five]"f"(0x55),
-      [six]"f"(0x6),                       [mask1]"f"(0xff00ff00ff00ff00),
-      [ushu]"f"(0xA0),                     [vshu]"f"(0xf5),
-      [lmove1]"f"(0x18),                   [rmove1]"f"(0x8),
-      [one]"f"(0x1)
-    : "memory"
-  );
+      : [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec] "=&f"(b_vec),
+        [g_vec] "=&f"(g_vec), [r_vec] "=&f"(r_vec), [temp] "=&f"(temp),
+        [ub] "=&f"(ub), [ug] "=&f"(ug), [vg] "=&f"(vg), [vr] "=&f"(vr),
+        [bb] "=&f"(bb), [bg] "=&f"(bg), [br] "=&f"(br), [yg] "=&f"(yg)
+      : [y_ptr] "r"(src_y), [vu_ptr] "r"(src_vu), [rgbbuf_ptr] "r"(rgb_buf),
+        [yuvcons_ptr] "r"(yuvconstants), [width] "r"(width), [zero] "f"(0x00),
+        [five] "f"(0x55), [six] "f"(0x6), [mask1] "f"(0xff00ff00ff00ff00),
+        [ushu] "f"(0xA0), [vshu] "f"(0xf5), [lmove1] "f"(0x18),
+        [rmove1] "f"(0x8), [one] "f"(0x1)
+      : "memory");
 }
 
 void NV12ToRGB565Row_MMI(const uint8_t* src_y,
@@ -7337,123 +7258,115 @@ void NV12ToRGB565Row_MMI(const uint8_t* src_y,
                          int width) {
   uint64_t y, u, v;
   uint64_t b_vec, g_vec, r_vec, temp;
-  uint64_t ub,ug,vg,vr,bb,bg,br,yg;
+  uint64_t ub, ug, vg, vr, bb, bg, br, yg;
 
   __asm__ volatile(
-    "ldc1       %[yg],           0xc0(%[yuvcons_ptr])             \n\t"
-    "ldc1       %[bb],           0x60(%[yuvcons_ptr])             \n\t"
-    "ldc1       %[ub],           0x00(%[yuvcons_ptr])             \n\t"
-    "or         %[ub],           %[ub],             %[mask1]      \n\t"
-    "ldc1       %[bg],           0x80(%[yuvcons_ptr])             \n\t"
-    "ldc1       %[ug],           0x20(%[yuvcons_ptr])             \n\t"
-    "punpcklbh  %[ug],           %[ug],             %[zero]       \n\t"
-    "pshufh     %[ug],           %[ug],             %[zero]       \n\t"
-    "ldc1       %[vg],           0x20(%[yuvcons_ptr])             \n\t"
-    "punpcklbh  %[vg],           %[vg],             %[zero]       \n\t"
-    "pshufh     %[vg],           %[vg],             %[five]       \n\t"
-    "ldc1       %[br],           0xa0(%[yuvcons_ptr])             \n\t"
-    "ldc1       %[vr],           0x40(%[yuvcons_ptr])             \n\t"
-    "punpcklbh  %[vr],           %[vr],             %[zero]       \n\t"
-    "pshufh     %[vr],           %[vr],             %[five]       \n\t"
-    "or         %[vr],           %[vr],             %[mask1]      \n\t"
+      "ldc1       %[yg],           0xc0(%[yuvcons_ptr])             \n\t"
+      "ldc1       %[bb],           0x60(%[yuvcons_ptr])             \n\t"
+      "ldc1       %[ub],           0x00(%[yuvcons_ptr])             \n\t"
+      "or         %[ub],           %[ub],             %[mask1]      \n\t"
+      "ldc1       %[bg],           0x80(%[yuvcons_ptr])             \n\t"
+      "ldc1       %[ug],           0x20(%[yuvcons_ptr])             \n\t"
+      "punpcklbh  %[ug],           %[ug],             %[zero]       \n\t"
+      "pshufh     %[ug],           %[ug],             %[zero]       \n\t"
+      "ldc1       %[vg],           0x20(%[yuvcons_ptr])             \n\t"
+      "punpcklbh  %[vg],           %[vg],             %[zero]       \n\t"
+      "pshufh     %[vg],           %[vg],             %[five]       \n\t"
+      "ldc1       %[br],           0xa0(%[yuvcons_ptr])             \n\t"
+      "ldc1       %[vr],           0x40(%[yuvcons_ptr])             \n\t"
+      "punpcklbh  %[vr],           %[vr],             %[zero]       \n\t"
+      "pshufh     %[vr],           %[vr],             %[five]       \n\t"
+      "or         %[vr],           %[vr],             %[mask1]      \n\t"
 
-    "1:                                                           \n\t"
-    "gslwlc1    %[y],            0x03(%[y_ptr])                   \n\t"
-    "gslwrc1    %[y],            0x00(%[y_ptr])                   \n\t"
-    "gslwlc1    %[u],            0x03(%[uv_ptr])                  \n\t"
-    "gslwrc1    %[u],            0x00(%[uv_ptr])                  \n\t"
-    "punpcklbh  %[u],            %[u],              %[zero]       \n\t"
-    "pshufh     %[v],            %[u],              %[vshu]       \n\t"
-    "pshufh     %[u],            %[u],              %[ushu]       \n\t"
+      "1:                                                           \n\t"
+      "gslwlc1    %[y],            0x03(%[y_ptr])                   \n\t"
+      "gslwrc1    %[y],            0x00(%[y_ptr])                   \n\t"
+      "gslwlc1    %[u],            0x03(%[uv_ptr])                  \n\t"
+      "gslwrc1    %[u],            0x00(%[uv_ptr])                  \n\t"
+      "punpcklbh  %[u],            %[u],              %[zero]       \n\t"
+      "pshufh     %[v],            %[u],              %[vshu]       \n\t"
+      "pshufh     %[u],            %[u],              %[ushu]       \n\t"
 
-    "punpcklbh  %[y],            %[y],              %[y]          \n\t"
-    "pmulhuh    %[y],            %[y],              %[yg]         \n\t"
+      "punpcklbh  %[y],            %[y],              %[y]          \n\t"
+      "pmulhuh    %[y],            %[y],              %[yg]         \n\t"
 
-    "paddsh     %[b_vec],        %[y],              %[bb]         \n\t"
-    "pmullh     %[temp],         %[u],              %[ub]         \n\t"
-    "psubsh     %[b_vec],        %[b_vec],          %[temp]       \n\t"
-    "psrah      %[b_vec],        %[b_vec],          %[six]        \n\t"
+      "paddsh     %[b_vec],        %[y],              %[bb]         \n\t"
+      "pmullh     %[temp],         %[u],              %[ub]         \n\t"
+      "psubsh     %[b_vec],        %[b_vec],          %[temp]       \n\t"
+      "psrah      %[b_vec],        %[b_vec],          %[six]        \n\t"
 
-    "paddsh     %[g_vec],        %[y],              %[bg]         \n\t"
-    "pmullh     %[temp],         %[u],              %[ug]         \n\t"
-    "psubsh     %[g_vec],        %[g_vec],          %[temp]       \n\t"
-    "pmullh     %[temp],         %[v],              %[vg]         \n\t"
-    "psubsh     %[g_vec],        %[g_vec],          %[temp]       \n\t"
-    "psrah      %[g_vec],        %[g_vec],          %[six]        \n\t"
+      "paddsh     %[g_vec],        %[y],              %[bg]         \n\t"
+      "pmullh     %[temp],         %[u],              %[ug]         \n\t"
+      "psubsh     %[g_vec],        %[g_vec],          %[temp]       \n\t"
+      "pmullh     %[temp],         %[v],              %[vg]         \n\t"
+      "psubsh     %[g_vec],        %[g_vec],          %[temp]       \n\t"
+      "psrah      %[g_vec],        %[g_vec],          %[six]        \n\t"
 
-    "paddsh     %[r_vec],        %[y],              %[br]         \n\t"
-    "pmullh     %[temp],         %[v],              %[vr]         \n\t"
-    "psubsh     %[r_vec],        %[r_vec],          %[temp]       \n\t"
-    "psrah      %[r_vec],        %[r_vec],          %[six]        \n\t"
+      "paddsh     %[r_vec],        %[y],              %[br]         \n\t"
+      "pmullh     %[temp],         %[v],              %[vr]         \n\t"
+      "psubsh     %[r_vec],        %[r_vec],          %[temp]       \n\t"
+      "psrah      %[r_vec],        %[r_vec],          %[six]        \n\t"
 
-    "packushb   %[r_vec],        %[b_vec],          %[r_vec]      \n\t"
-    "packushb   %[g_vec],        %[g_vec],          %[zero]       \n\t"
-    "punpcklbh  %[b_vec],        %[r_vec],          %[g_vec]      \n\t"
-    "punpckhbh  %[r_vec],        %[r_vec],          %[g_vec]      \n\t"
-    "punpcklhw  %[g_vec],        %[b_vec],          %[r_vec]      \n\t"
-    "punpckhhw  %[b_vec],        %[b_vec],          %[r_vec]      \n\t"
+      "packushb   %[r_vec],        %[b_vec],          %[r_vec]      \n\t"
+      "packushb   %[g_vec],        %[g_vec],          %[zero]       \n\t"
+      "punpcklbh  %[b_vec],        %[r_vec],          %[g_vec]      \n\t"
+      "punpckhbh  %[r_vec],        %[r_vec],          %[g_vec]      \n\t"
+      "punpcklhw  %[g_vec],        %[b_vec],          %[r_vec]      \n\t"
+      "punpckhhw  %[b_vec],        %[b_vec],          %[r_vec]      \n\t"
 
-    "psrlh      %[temp],         %[g_vec],          %[three]      \n\t"
-    "and        %[g_vec],        %[temp],           %[mask2]      \n\t"
-    "psrlw      %[temp],         %[temp],           %[seven]      \n\t"
-    "psrlw      %[r_vec],        %[mask1],          %[eight]      \n\t"
-    "and        %[r_vec],        %[temp],           %[r_vec]      \n\t"
-    "psubb      %[y],            %[eight],          %[three]      \n\t"//5
-    "psllw      %[r_vec],        %[r_vec],          %[y]          \n\t"
-    "or         %[g_vec],        %[g_vec],          %[r_vec]      \n\t"
-    "paddb      %[r_vec],        %[three],          %[six]        \n\t"
-    "psrlw      %[temp],         %[temp],           %[r_vec]      \n\t"
-    "and        %[r_vec],        %[temp],           %[mask2]      \n\t"
-    "paddb      %[temp],         %[three],          %[eight]      \n\t"
-    "psllw      %[r_vec],        %[r_vec],          %[temp]       \n\t"
-    "or         %[g_vec],        %[g_vec],          %[r_vec]      \n\t"
+      "psrlh      %[temp],         %[g_vec],          %[three]      \n\t"
+      "and        %[g_vec],        %[temp],           %[mask2]      \n\t"
+      "psrlw      %[temp],         %[temp],           %[seven]      \n\t"
+      "psrlw      %[r_vec],        %[mask1],          %[eight]      \n\t"
+      "and        %[r_vec],        %[temp],           %[r_vec]      \n\t"
+      "psubb      %[y],            %[eight],          %[three]      \n\t"  // 5
+      "psllw      %[r_vec],        %[r_vec],          %[y]          \n\t"
+      "or         %[g_vec],        %[g_vec],          %[r_vec]      \n\t"
+      "paddb      %[r_vec],        %[three],          %[six]        \n\t"
+      "psrlw      %[temp],         %[temp],           %[r_vec]      \n\t"
+      "and        %[r_vec],        %[temp],           %[mask2]      \n\t"
+      "paddb      %[temp],         %[three],          %[eight]      \n\t"
+      "psllw      %[r_vec],        %[r_vec],          %[temp]       \n\t"
+      "or         %[g_vec],        %[g_vec],          %[r_vec]      \n\t"
 
-    "psrlh      %[temp],         %[b_vec],          %[three]      \n\t"
-    "and        %[b_vec],        %[temp],           %[mask2]      \n\t"
-    "psrlw      %[temp],         %[temp],           %[seven]      \n\t"
-    "psrlw      %[r_vec],        %[mask1],          %[eight]      \n\t"
-    "and        %[r_vec],        %[temp],           %[r_vec]      \n\t"
-    "psubb      %[y],            %[eight],          %[three]      \n\t"//5
-    "psllw      %[r_vec],        %[r_vec],          %[y]          \n\t"
-    "or         %[b_vec],        %[b_vec],          %[r_vec]      \n\t"
-    "paddb      %[r_vec],        %[three],          %[six]        \n\t"
-    "psrlw      %[temp],         %[temp],           %[r_vec]      \n\t"
-    "and        %[r_vec],        %[temp],           %[mask2]      \n\t"
-    "paddb      %[temp],         %[three],          %[eight]      \n\t"
-    "psllw      %[r_vec],        %[r_vec],          %[temp]       \n\t"
-    "or         %[b_vec],        %[b_vec],          %[r_vec]      \n\t"
+      "psrlh      %[temp],         %[b_vec],          %[three]      \n\t"
+      "and        %[b_vec],        %[temp],           %[mask2]      \n\t"
+      "psrlw      %[temp],         %[temp],           %[seven]      \n\t"
+      "psrlw      %[r_vec],        %[mask1],          %[eight]      \n\t"
+      "and        %[r_vec],        %[temp],           %[r_vec]      \n\t"
+      "psubb      %[y],            %[eight],          %[three]      \n\t"  // 5
+      "psllw      %[r_vec],        %[r_vec],          %[y]          \n\t"
+      "or         %[b_vec],        %[b_vec],          %[r_vec]      \n\t"
+      "paddb      %[r_vec],        %[three],          %[six]        \n\t"
+      "psrlw      %[temp],         %[temp],           %[r_vec]      \n\t"
+      "and        %[r_vec],        %[temp],           %[mask2]      \n\t"
+      "paddb      %[temp],         %[three],          %[eight]      \n\t"
+      "psllw      %[r_vec],        %[r_vec],          %[temp]       \n\t"
+      "or         %[b_vec],        %[b_vec],          %[r_vec]      \n\t"
 
-    "punpcklhw  %[r_vec],        %[g_vec],          %[b_vec]      \n\t"
-    "punpckhhw  %[b_vec],        %[g_vec],          %[b_vec]      \n\t"
-    "punpcklhw  %[g_vec],        %[r_vec],          %[b_vec]      \n\t"
+      "punpcklhw  %[r_vec],        %[g_vec],          %[b_vec]      \n\t"
+      "punpckhhw  %[b_vec],        %[g_vec],          %[b_vec]      \n\t"
+      "punpcklhw  %[g_vec],        %[r_vec],          %[b_vec]      \n\t"
 
-    "gssdlc1    %[g_vec],        0x07(%[dst_rgb565])             \n\t"
-    "gssdrc1    %[g_vec],        0x00(%[dst_rgb565])             \n\t"
+      "gssdlc1    %[g_vec],        0x07(%[dst_rgb565])             \n\t"
+      "gssdrc1    %[g_vec],        0x00(%[dst_rgb565])             \n\t"
 
-    "daddiu     %[y_ptr],        %[y_ptr],          0x04          \n\t"
-	"daddiu     %[uv_ptr],       %[uv_ptr],         0x04          \n\t"
-    "daddiu     %[dst_rgb565],   %[dst_rgb565],     0x08          \n\t"
-    "daddi      %[width],        %[width],          -0x04         \n\t"
-    "bnez       %[width],        1b                               \n\t"
+      "daddiu     %[y_ptr],        %[y_ptr],          0x04          \n\t"
+      "daddiu     %[uv_ptr],       %[uv_ptr],         0x04          \n\t"
+      "daddiu     %[dst_rgb565],   %[dst_rgb565],     0x08          \n\t"
+      "daddi      %[width],        %[width],          -0x04         \n\t"
+      "bnez       %[width],        1b                               \n\t"
 
-    : [y]"=&f"(y),                         [u]"=&f"(u),
-      [v]"=&f"(v),
-      [b_vec]"=&f"(b_vec),                 [g_vec]"=&f"(g_vec),
-      [r_vec]"=&f"(r_vec),                 [temp]"=&f"(temp),
-      [ub]"=&f"(ub),                       [ug]"=&f"(ug),
-      [vg]"=&f"(vg),                       [vr]"=&f"(vr),
-      [bb]"=&f"(bb),                       [bg]"=&f"(bg),
-      [br]"=&f"(br),                       [yg]"=&f"(yg)
-    : [y_ptr]"r"(src_y),                   [uv_ptr]"r"(src_uv),
-      [dst_rgb565]"r"(dst_rgb565),
-      [yuvcons_ptr]"r"(yuvconstants),      [width]"r"(width),
-      [zero]"f"(0x00),                     [five]"f"(0x55),
-      [six]"f"(0x6),                       [mask1]"f"(0xff00ff00ff00ff00),
-      [ushu]"f"(0xA0),                     [vshu]"f"(0xf5),
-      [three]"f"(0x3),                     [mask2]"f"(0x1f0000001f),
-      [eight]"f"(0x8),                     [seven]"f"(0x7)
-    : "memory"
-  );
+      : [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec] "=&f"(b_vec),
+        [g_vec] "=&f"(g_vec), [r_vec] "=&f"(r_vec), [temp] "=&f"(temp),
+        [ub] "=&f"(ub), [ug] "=&f"(ug), [vg] "=&f"(vg), [vr] "=&f"(vr),
+        [bb] "=&f"(bb), [bg] "=&f"(bg), [br] "=&f"(br), [yg] "=&f"(yg)
+      : [y_ptr] "r"(src_y), [uv_ptr] "r"(src_uv), [dst_rgb565] "r"(dst_rgb565),
+        [yuvcons_ptr] "r"(yuvconstants), [width] "r"(width), [zero] "f"(0x00),
+        [five] "f"(0x55), [six] "f"(0x6), [mask1] "f"(0xff00ff00ff00ff00),
+        [ushu] "f"(0xA0), [vshu] "f"(0xf5), [three] "f"(0x3),
+        [mask2] "f"(0x1f0000001f), [eight] "f"(0x8), [seven] "f"(0x7)
+      : "memory");
 }
 
 void YUY2ToARGBRow_MMI(const uint8_t* src_yuy2,
@@ -7462,90 +7375,83 @@ void YUY2ToARGBRow_MMI(const uint8_t* src_yuy2,
                        int width) {
   uint64_t y, u, v;
   uint64_t b_vec, g_vec, r_vec, temp;
-  uint64_t ub,ug,vg,vr,bb,bg,br,yg;
+  uint64_t ub, ug, vg, vr, bb, bg, br, yg;
 
   __asm__ volatile(
-    "ldc1       %[yg],           0xc0(%[yuvcons_ptr])             \n\t"
-    "ldc1       %[bb],           0x60(%[yuvcons_ptr])             \n\t"
-    "ldc1       %[ub],           0x00(%[yuvcons_ptr])             \n\t"
-    "or         %[ub],           %[ub],             %[mask1]      \n\t"
-    "ldc1       %[bg],           0x80(%[yuvcons_ptr])             \n\t"
-    "ldc1       %[ug],           0x20(%[yuvcons_ptr])             \n\t"
-    "punpcklbh  %[ug],           %[ug],             %[zero]       \n\t"
-    "pshufh     %[ug],           %[ug],             %[zero]       \n\t"
-    "ldc1       %[vg],           0x20(%[yuvcons_ptr])             \n\t"
-    "punpcklbh  %[vg],           %[vg],             %[zero]       \n\t"
-    "pshufh     %[vg],           %[vg],             %[five]       \n\t"
-    "ldc1       %[br],           0xa0(%[yuvcons_ptr])             \n\t"
-    "ldc1       %[vr],           0x40(%[yuvcons_ptr])             \n\t"
-    "punpcklbh  %[vr],           %[vr],             %[zero]       \n\t"
-    "pshufh     %[vr],           %[vr],             %[five]       \n\t"
-    "or         %[vr],           %[vr],             %[mask1]      \n\t"
+      "ldc1       %[yg],           0xc0(%[yuvcons_ptr])             \n\t"
+      "ldc1       %[bb],           0x60(%[yuvcons_ptr])             \n\t"
+      "ldc1       %[ub],           0x00(%[yuvcons_ptr])             \n\t"
+      "or         %[ub],           %[ub],             %[mask1]      \n\t"
+      "ldc1       %[bg],           0x80(%[yuvcons_ptr])             \n\t"
+      "ldc1       %[ug],           0x20(%[yuvcons_ptr])             \n\t"
+      "punpcklbh  %[ug],           %[ug],             %[zero]       \n\t"
+      "pshufh     %[ug],           %[ug],             %[zero]       \n\t"
+      "ldc1       %[vg],           0x20(%[yuvcons_ptr])             \n\t"
+      "punpcklbh  %[vg],           %[vg],             %[zero]       \n\t"
+      "pshufh     %[vg],           %[vg],             %[five]       \n\t"
+      "ldc1       %[br],           0xa0(%[yuvcons_ptr])             \n\t"
+      "ldc1       %[vr],           0x40(%[yuvcons_ptr])             \n\t"
+      "punpcklbh  %[vr],           %[vr],             %[zero]       \n\t"
+      "pshufh     %[vr],           %[vr],             %[five]       \n\t"
+      "or         %[vr],           %[vr],             %[mask1]      \n\t"
 
-    "1:                                                           \n\t"
-    "gsldlc1    %[y],            0x07(%[yuy2_ptr])                \n\t"
-    "gsldrc1    %[y],            0x00(%[yuy2_ptr])                \n\t"
-    "psrlh      %[temp],         %[y],              %[eight]      \n\t"
-    "pshufh     %[u],            %[temp],           %[ushu]       \n\t"
-    "pshufh     %[v],            %[temp],           %[vshu]       \n\t"
+      "1:                                                           \n\t"
+      "gsldlc1    %[y],            0x07(%[yuy2_ptr])                \n\t"
+      "gsldrc1    %[y],            0x00(%[yuy2_ptr])                \n\t"
+      "psrlh      %[temp],         %[y],              %[eight]      \n\t"
+      "pshufh     %[u],            %[temp],           %[ushu]       \n\t"
+      "pshufh     %[v],            %[temp],           %[vshu]       \n\t"
 
-    "psrlh      %[temp],         %[mask1],          %[eight]      \n\t"
-    "and        %[y],            %[y],              %[temp]       \n\t"
-    "psllh      %[temp],         %[y],              %[eight]      \n\t"
-    "or         %[y],            %[y],              %[temp]       \n\t"
-    "pmulhuh    %[y],            %[y],              %[yg]         \n\t"
+      "psrlh      %[temp],         %[mask1],          %[eight]      \n\t"
+      "and        %[y],            %[y],              %[temp]       \n\t"
+      "psllh      %[temp],         %[y],              %[eight]      \n\t"
+      "or         %[y],            %[y],              %[temp]       \n\t"
+      "pmulhuh    %[y],            %[y],              %[yg]         \n\t"
 
-    "paddsh     %[b_vec],        %[y],              %[bb]         \n\t"
-    "pmullh     %[temp],         %[u],              %[ub]         \n\t"
-    "psubsh     %[b_vec],        %[b_vec],          %[temp]       \n\t"
-    "psrah      %[b_vec],        %[b_vec],          %[six]        \n\t"
+      "paddsh     %[b_vec],        %[y],              %[bb]         \n\t"
+      "pmullh     %[temp],         %[u],              %[ub]         \n\t"
+      "psubsh     %[b_vec],        %[b_vec],          %[temp]       \n\t"
+      "psrah      %[b_vec],        %[b_vec],          %[six]        \n\t"
 
-    "paddsh     %[g_vec],        %[y],              %[bg]         \n\t"
-    "pmullh     %[temp],         %[u],              %[ug]         \n\t"
-    "psubsh     %[g_vec],        %[g_vec],          %[temp]       \n\t"
-    "pmullh     %[temp],         %[v],              %[vg]         \n\t"
-    "psubsh     %[g_vec],        %[g_vec],          %[temp]       \n\t"
-    "psrah      %[g_vec],        %[g_vec],          %[six]        \n\t"
+      "paddsh     %[g_vec],        %[y],              %[bg]         \n\t"
+      "pmullh     %[temp],         %[u],              %[ug]         \n\t"
+      "psubsh     %[g_vec],        %[g_vec],          %[temp]       \n\t"
+      "pmullh     %[temp],         %[v],              %[vg]         \n\t"
+      "psubsh     %[g_vec],        %[g_vec],          %[temp]       \n\t"
+      "psrah      %[g_vec],        %[g_vec],          %[six]        \n\t"
 
-    "paddsh     %[r_vec],        %[y],              %[br]         \n\t"
-    "pmullh     %[temp],         %[v],              %[vr]         \n\t"
-    "psubsh     %[r_vec],        %[r_vec],          %[temp]       \n\t"
-    "psrah      %[r_vec],        %[r_vec],          %[six]        \n\t"
+      "paddsh     %[r_vec],        %[y],              %[br]         \n\t"
+      "pmullh     %[temp],         %[v],              %[vr]         \n\t"
+      "psubsh     %[r_vec],        %[r_vec],          %[temp]       \n\t"
+      "psrah      %[r_vec],        %[r_vec],          %[six]        \n\t"
 
-    "packushb   %[r_vec],        %[b_vec],          %[r_vec]      \n\t"
-    "packushb   %[g_vec],        %[g_vec],          %[zero]       \n\t"
-    "punpcklwd  %[g_vec],        %[g_vec],          %[alpha]      \n\t"
-    "punpcklbh  %[b_vec],        %[r_vec],          %[g_vec]      \n\t"
-    "punpckhbh  %[r_vec],        %[r_vec],          %[g_vec]      \n\t"
-    "punpcklhw  %[g_vec],        %[b_vec],          %[r_vec]      \n\t"
-    "punpckhhw  %[b_vec],        %[b_vec],          %[r_vec]      \n\t"
+      "packushb   %[r_vec],        %[b_vec],          %[r_vec]      \n\t"
+      "packushb   %[g_vec],        %[g_vec],          %[zero]       \n\t"
+      "punpcklwd  %[g_vec],        %[g_vec],          %[alpha]      \n\t"
+      "punpcklbh  %[b_vec],        %[r_vec],          %[g_vec]      \n\t"
+      "punpckhbh  %[r_vec],        %[r_vec],          %[g_vec]      \n\t"
+      "punpcklhw  %[g_vec],        %[b_vec],          %[r_vec]      \n\t"
+      "punpckhhw  %[b_vec],        %[b_vec],          %[r_vec]      \n\t"
 
-    "gssdlc1    %[g_vec],        0x07(%[rgbbuf_ptr])              \n\t"
-    "gssdrc1    %[g_vec],        0x00(%[rgbbuf_ptr])              \n\t"
-    "gssdlc1    %[b_vec],        0x0f(%[rgbbuf_ptr])              \n\t"
-    "gssdrc1    %[b_vec],        0x08(%[rgbbuf_ptr])              \n\t"
+      "gssdlc1    %[g_vec],        0x07(%[rgbbuf_ptr])              \n\t"
+      "gssdrc1    %[g_vec],        0x00(%[rgbbuf_ptr])              \n\t"
+      "gssdlc1    %[b_vec],        0x0f(%[rgbbuf_ptr])              \n\t"
+      "gssdrc1    %[b_vec],        0x08(%[rgbbuf_ptr])              \n\t"
 
-    "daddiu     %[yuy2_ptr],     %[yuy2_ptr],       0x08          \n\t"
-    "daddiu     %[rgbbuf_ptr],   %[rgbbuf_ptr],     0x10          \n\t"
-    "daddi      %[width],        %[width],          -0x04         \n\t"
-    "bnez       %[width],        1b                               \n\t"
+      "daddiu     %[yuy2_ptr],     %[yuy2_ptr],       0x08          \n\t"
+      "daddiu     %[rgbbuf_ptr],   %[rgbbuf_ptr],     0x10          \n\t"
+      "daddi      %[width],        %[width],          -0x04         \n\t"
+      "bnez       %[width],        1b                               \n\t"
 
-    : [y]"=&f"(y),                         [u]"=&f"(u),
-      [v]"=&f"(v),
-      [b_vec]"=&f"(b_vec),                 [g_vec]"=&f"(g_vec),
-      [r_vec]"=&f"(r_vec),                 [temp]"=&f"(temp),
-      [ub]"=&f"(ub),                       [ug]"=&f"(ug),
-      [vg]"=&f"(vg),                       [vr]"=&f"(vr),
-      [bb]"=&f"(bb),                       [bg]"=&f"(bg),
-      [br]"=&f"(br),                       [yg]"=&f"(yg)
-    : [yuy2_ptr]"r"(src_yuy2),             [rgbbuf_ptr]"r"(rgb_buf),
-      [yuvcons_ptr]"r"(yuvconstants),      [width]"r"(width),
-      [zero]"f"(0x00),                     [five]"f"(0x55),
-      [six]"f"(0x6),                       [mask1]"f"(0xff00ff00ff00ff00),
-      [ushu]"f"(0xA0),                     [vshu]"f"(0xf5),
-      [alpha]"f"(-1),                      [eight]"f"(0x8)
-    : "memory"
-  );
+      : [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec] "=&f"(b_vec),
+        [g_vec] "=&f"(g_vec), [r_vec] "=&f"(r_vec), [temp] "=&f"(temp),
+        [ub] "=&f"(ub), [ug] "=&f"(ug), [vg] "=&f"(vg), [vr] "=&f"(vr),
+        [bb] "=&f"(bb), [bg] "=&f"(bg), [br] "=&f"(br), [yg] "=&f"(yg)
+      : [yuy2_ptr] "r"(src_yuy2), [rgbbuf_ptr] "r"(rgb_buf),
+        [yuvcons_ptr] "r"(yuvconstants), [width] "r"(width), [zero] "f"(0x00),
+        [five] "f"(0x55), [six] "f"(0x6), [mask1] "f"(0xff00ff00ff00ff00),
+        [ushu] "f"(0xA0), [vshu] "f"(0xf5), [alpha] "f"(-1), [eight] "f"(0x8)
+      : "memory");
 }
 
 void UYVYToARGBRow_MMI(const uint8_t* src_uyvy,
@@ -7554,90 +7460,83 @@ void UYVYToARGBRow_MMI(const uint8_t* src_uyvy,
                        int width) {
   uint64_t y, u, v;
   uint64_t b_vec, g_vec, r_vec, temp;
-  uint64_t ub,ug,vg,vr,bb,bg,br,yg;
+  uint64_t ub, ug, vg, vr, bb, bg, br, yg;
 
   __asm__ volatile(
-    "ldc1       %[yg],           0xc0(%[yuvcons_ptr])             \n\t"
-    "ldc1       %[bb],           0x60(%[yuvcons_ptr])             \n\t"
-    "ldc1       %[ub],           0x00(%[yuvcons_ptr])             \n\t"
-    "or         %[ub],           %[ub],             %[mask1]      \n\t"
-    "ldc1       %[bg],           0x80(%[yuvcons_ptr])             \n\t"
-    "ldc1       %[ug],           0x20(%[yuvcons_ptr])             \n\t"
-    "punpcklbh  %[ug],           %[ug],             %[zero]       \n\t"
-    "pshufh     %[ug],           %[ug],             %[zero]       \n\t"
-    "ldc1       %[vg],           0x20(%[yuvcons_ptr])             \n\t"
-    "punpcklbh  %[vg],           %[vg],             %[zero]       \n\t"
-    "pshufh     %[vg],           %[vg],             %[five]       \n\t"
-    "ldc1       %[br],           0xa0(%[yuvcons_ptr])             \n\t"
-    "ldc1       %[vr],           0x40(%[yuvcons_ptr])             \n\t"
-    "punpcklbh  %[vr],           %[vr],             %[zero]       \n\t"
-    "pshufh     %[vr],           %[vr],             %[five]       \n\t"
-    "or         %[vr],           %[vr],             %[mask1]      \n\t"
+      "ldc1       %[yg],           0xc0(%[yuvcons_ptr])             \n\t"
+      "ldc1       %[bb],           0x60(%[yuvcons_ptr])             \n\t"
+      "ldc1       %[ub],           0x00(%[yuvcons_ptr])             \n\t"
+      "or         %[ub],           %[ub],             %[mask1]      \n\t"
+      "ldc1       %[bg],           0x80(%[yuvcons_ptr])             \n\t"
+      "ldc1       %[ug],           0x20(%[yuvcons_ptr])             \n\t"
+      "punpcklbh  %[ug],           %[ug],             %[zero]       \n\t"
+      "pshufh     %[ug],           %[ug],             %[zero]       \n\t"
+      "ldc1       %[vg],           0x20(%[yuvcons_ptr])             \n\t"
+      "punpcklbh  %[vg],           %[vg],             %[zero]       \n\t"
+      "pshufh     %[vg],           %[vg],             %[five]       \n\t"
+      "ldc1       %[br],           0xa0(%[yuvcons_ptr])             \n\t"
+      "ldc1       %[vr],           0x40(%[yuvcons_ptr])             \n\t"
+      "punpcklbh  %[vr],           %[vr],             %[zero]       \n\t"
+      "pshufh     %[vr],           %[vr],             %[five]       \n\t"
+      "or         %[vr],           %[vr],             %[mask1]      \n\t"
 
-    "1:                                                           \n\t"
-    "gsldlc1    %[y],            0x07(%[uyvy_ptr])                \n\t"
-    "gsldrc1    %[y],            0x00(%[uyvy_ptr])                \n\t"
-    "psrlh      %[temp],         %[mask1],          %[eight]      \n\t"
-    "and        %[temp],         %[y],              %[temp]       \n\t"
-    "pshufh     %[u],            %[temp],           %[ushu]       \n\t"
-    "pshufh     %[v],            %[temp],           %[vshu]       \n\t"
+      "1:                                                           \n\t"
+      "gsldlc1    %[y],            0x07(%[uyvy_ptr])                \n\t"
+      "gsldrc1    %[y],            0x00(%[uyvy_ptr])                \n\t"
+      "psrlh      %[temp],         %[mask1],          %[eight]      \n\t"
+      "and        %[temp],         %[y],              %[temp]       \n\t"
+      "pshufh     %[u],            %[temp],           %[ushu]       \n\t"
+      "pshufh     %[v],            %[temp],           %[vshu]       \n\t"
 
-    "psrlh      %[y],            %[y],              %[eight]      \n\t"
-    "psllh      %[temp],         %[y],              %[eight]      \n\t"
-    "or         %[y],            %[y],              %[temp]       \n\t"
-    "pmulhuh    %[y],            %[y],              %[yg]         \n\t"
+      "psrlh      %[y],            %[y],              %[eight]      \n\t"
+      "psllh      %[temp],         %[y],              %[eight]      \n\t"
+      "or         %[y],            %[y],              %[temp]       \n\t"
+      "pmulhuh    %[y],            %[y],              %[yg]         \n\t"
 
-    "paddsh     %[b_vec],        %[y],              %[bb]         \n\t"
-    "pmullh     %[temp],         %[u],              %[ub]         \n\t"
-    "psubsh     %[b_vec],        %[b_vec],          %[temp]       \n\t"
-    "psrah      %[b_vec],        %[b_vec],          %[six]        \n\t"
+      "paddsh     %[b_vec],        %[y],              %[bb]         \n\t"
+      "pmullh     %[temp],         %[u],              %[ub]         \n\t"
+      "psubsh     %[b_vec],        %[b_vec],          %[temp]       \n\t"
+      "psrah      %[b_vec],        %[b_vec],          %[six]        \n\t"
 
-    "paddsh     %[g_vec],        %[y],              %[bg]         \n\t"
-    "pmullh     %[temp],         %[u],              %[ug]         \n\t"
-    "psubsh     %[g_vec],        %[g_vec],          %[temp]       \n\t"
-    "pmullh     %[temp],         %[v],              %[vg]         \n\t"
-    "psubsh     %[g_vec],        %[g_vec],          %[temp]       \n\t"
-    "psrah      %[g_vec],        %[g_vec],          %[six]        \n\t"
+      "paddsh     %[g_vec],        %[y],              %[bg]         \n\t"
+      "pmullh     %[temp],         %[u],              %[ug]         \n\t"
+      "psubsh     %[g_vec],        %[g_vec],          %[temp]       \n\t"
+      "pmullh     %[temp],         %[v],              %[vg]         \n\t"
+      "psubsh     %[g_vec],        %[g_vec],          %[temp]       \n\t"
+      "psrah      %[g_vec],        %[g_vec],          %[six]        \n\t"
 
-    "paddsh     %[r_vec],        %[y],              %[br]         \n\t"
-    "pmullh     %[temp],         %[v],              %[vr]         \n\t"
-    "psubsh     %[r_vec],        %[r_vec],          %[temp]       \n\t"
-    "psrah      %[r_vec],        %[r_vec],          %[six]        \n\t"
+      "paddsh     %[r_vec],        %[y],              %[br]         \n\t"
+      "pmullh     %[temp],         %[v],              %[vr]         \n\t"
+      "psubsh     %[r_vec],        %[r_vec],          %[temp]       \n\t"
+      "psrah      %[r_vec],        %[r_vec],          %[six]        \n\t"
 
-    "packushb   %[r_vec],        %[b_vec],          %[r_vec]      \n\t"
-    "packushb   %[g_vec],        %[g_vec],          %[zero]       \n\t"
-    "punpcklwd  %[g_vec],        %[g_vec],          %[alpha]      \n\t"
-    "punpcklbh  %[b_vec],        %[r_vec],          %[g_vec]      \n\t"
-    "punpckhbh  %[r_vec],        %[r_vec],          %[g_vec]      \n\t"
-    "punpcklhw  %[g_vec],        %[b_vec],          %[r_vec]      \n\t"
-    "punpckhhw  %[b_vec],        %[b_vec],          %[r_vec]      \n\t"
+      "packushb   %[r_vec],        %[b_vec],          %[r_vec]      \n\t"
+      "packushb   %[g_vec],        %[g_vec],          %[zero]       \n\t"
+      "punpcklwd  %[g_vec],        %[g_vec],          %[alpha]      \n\t"
+      "punpcklbh  %[b_vec],        %[r_vec],          %[g_vec]      \n\t"
+      "punpckhbh  %[r_vec],        %[r_vec],          %[g_vec]      \n\t"
+      "punpcklhw  %[g_vec],        %[b_vec],          %[r_vec]      \n\t"
+      "punpckhhw  %[b_vec],        %[b_vec],          %[r_vec]      \n\t"
 
-    "gssdlc1    %[g_vec],        0x07(%[rgbbuf_ptr])              \n\t"
-    "gssdrc1    %[g_vec],        0x00(%[rgbbuf_ptr])              \n\t"
-    "gssdlc1    %[b_vec],        0x0f(%[rgbbuf_ptr])              \n\t"
-    "gssdrc1    %[b_vec],        0x08(%[rgbbuf_ptr])              \n\t"
+      "gssdlc1    %[g_vec],        0x07(%[rgbbuf_ptr])              \n\t"
+      "gssdrc1    %[g_vec],        0x00(%[rgbbuf_ptr])              \n\t"
+      "gssdlc1    %[b_vec],        0x0f(%[rgbbuf_ptr])              \n\t"
+      "gssdrc1    %[b_vec],        0x08(%[rgbbuf_ptr])              \n\t"
 
-    "daddiu     %[uyvy_ptr],     %[uyvy_ptr],       0x08          \n\t"
-    "daddiu     %[rgbbuf_ptr],   %[rgbbuf_ptr],     0x10          \n\t"
-    "daddi      %[width],        %[width],          -0x04         \n\t"
-    "bnez       %[width],        1b                               \n\t"
+      "daddiu     %[uyvy_ptr],     %[uyvy_ptr],       0x08          \n\t"
+      "daddiu     %[rgbbuf_ptr],   %[rgbbuf_ptr],     0x10          \n\t"
+      "daddi      %[width],        %[width],          -0x04         \n\t"
+      "bnez       %[width],        1b                               \n\t"
 
-    : [y]"=&f"(y),                         [u]"=&f"(u),
-      [v]"=&f"(v),
-      [b_vec]"=&f"(b_vec),                 [g_vec]"=&f"(g_vec),
-      [r_vec]"=&f"(r_vec),                 [temp]"=&f"(temp),
-      [ub]"=&f"(ub),                       [ug]"=&f"(ug),
-      [vg]"=&f"(vg),                       [vr]"=&f"(vr),
-      [bb]"=&f"(bb),                       [bg]"=&f"(bg),
-      [br]"=&f"(br),                       [yg]"=&f"(yg)
-    : [uyvy_ptr]"r"(src_uyvy),             [rgbbuf_ptr]"r"(rgb_buf),
-      [yuvcons_ptr]"r"(yuvconstants),      [width]"r"(width),
-      [zero]"f"(0x00),                     [five]"f"(0x55),
-      [six]"f"(0x6),                       [mask1]"f"(0xff00ff00ff00ff00),
-      [ushu]"f"(0xA0),                     [vshu]"f"(0xf5),
-      [alpha]"f"(-1),                      [eight]"f"(0x8)
-    : "memory"
-  );
+      : [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec] "=&f"(b_vec),
+        [g_vec] "=&f"(g_vec), [r_vec] "=&f"(r_vec), [temp] "=&f"(temp),
+        [ub] "=&f"(ub), [ug] "=&f"(ug), [vg] "=&f"(vg), [vr] "=&f"(vr),
+        [bb] "=&f"(bb), [bg] "=&f"(bg), [br] "=&f"(br), [yg] "=&f"(yg)
+      : [uyvy_ptr] "r"(src_uyvy), [rgbbuf_ptr] "r"(rgb_buf),
+        [yuvcons_ptr] "r"(yuvconstants), [width] "r"(width), [zero] "f"(0x00),
+        [five] "f"(0x55), [six] "f"(0x6), [mask1] "f"(0xff00ff00ff00ff00),
+        [ushu] "f"(0xA0), [vshu] "f"(0xf5), [alpha] "f"(-1), [eight] "f"(0x8)
+      : "memory");
 }
 
 void I422ToRGBARow_MMI(const uint8_t* src_y,
@@ -7648,112 +7547,104 @@ void I422ToRGBARow_MMI(const uint8_t* src_y,
                        int width) {
   uint64_t y, u, v;
   uint64_t b_vec, g_vec, r_vec, temp;
-  uint64_t ub,ug,vg,vr,bb,bg,br,yg;
+  uint64_t ub, ug, vg, vr, bb, bg, br, yg;
 
   __asm__ volatile(
-    "ldc1       %[yg],           0xc0(%[yuvcons_ptr])             \n\t"
-    "ldc1       %[bb],           0x60(%[yuvcons_ptr])             \n\t"
-    "ldc1       %[ub],           0x00(%[yuvcons_ptr])             \n\t"
-    "or         %[ub],           %[ub],             %[mask1]      \n\t"
-    "ldc1       %[bg],           0x80(%[yuvcons_ptr])             \n\t"
-    "ldc1       %[ug],           0x20(%[yuvcons_ptr])             \n\t"
-    "punpcklbh  %[ug],           %[ug],             %[zero]       \n\t"
-    "pshufh     %[ug],           %[ug],             %[zero]       \n\t"
-    "ldc1       %[vg],           0x20(%[yuvcons_ptr])             \n\t"
-    "punpcklbh  %[vg],           %[vg],             %[zero]       \n\t"
-    "pshufh     %[vg],           %[vg],             %[five]       \n\t"
-    "ldc1       %[br],           0xa0(%[yuvcons_ptr])             \n\t"
-    "ldc1       %[vr],           0x40(%[yuvcons_ptr])             \n\t"
-    "punpcklbh  %[vr],           %[vr],             %[zero]       \n\t"
-    "pshufh     %[vr],           %[vr],             %[five]       \n\t"
-    "or         %[vr],           %[vr],             %[mask1]      \n\t"
+      "ldc1       %[yg],           0xc0(%[yuvcons_ptr])             \n\t"
+      "ldc1       %[bb],           0x60(%[yuvcons_ptr])             \n\t"
+      "ldc1       %[ub],           0x00(%[yuvcons_ptr])             \n\t"
+      "or         %[ub],           %[ub],             %[mask1]      \n\t"
+      "ldc1       %[bg],           0x80(%[yuvcons_ptr])             \n\t"
+      "ldc1       %[ug],           0x20(%[yuvcons_ptr])             \n\t"
+      "punpcklbh  %[ug],           %[ug],             %[zero]       \n\t"
+      "pshufh     %[ug],           %[ug],             %[zero]       \n\t"
+      "ldc1       %[vg],           0x20(%[yuvcons_ptr])             \n\t"
+      "punpcklbh  %[vg],           %[vg],             %[zero]       \n\t"
+      "pshufh     %[vg],           %[vg],             %[five]       \n\t"
+      "ldc1       %[br],           0xa0(%[yuvcons_ptr])             \n\t"
+      "ldc1       %[vr],           0x40(%[yuvcons_ptr])             \n\t"
+      "punpcklbh  %[vr],           %[vr],             %[zero]       \n\t"
+      "pshufh     %[vr],           %[vr],             %[five]       \n\t"
+      "or         %[vr],           %[vr],             %[mask1]      \n\t"
 
-    "1:                                                           \n\t"
-    "gslwlc1    %[y],            0x03(%[y_ptr])                   \n\t"
-    "gslwrc1    %[y],            0x00(%[y_ptr])                   \n\t"
-    "gslwlc1    %[u],            0x03(%[u_ptr])                   \n\t"
-    "gslwrc1    %[u],            0x00(%[u_ptr])                   \n\t"
-    "gslwlc1    %[v],            0x03(%[v_ptr])                   \n\t"
-    "gslwrc1    %[v],            0x00(%[v_ptr])                   \n\t"
+      "1:                                                           \n\t"
+      "gslwlc1    %[y],            0x03(%[y_ptr])                   \n\t"
+      "gslwrc1    %[y],            0x00(%[y_ptr])                   \n\t"
+      "gslwlc1    %[u],            0x03(%[u_ptr])                   \n\t"
+      "gslwrc1    %[u],            0x00(%[u_ptr])                   \n\t"
+      "gslwlc1    %[v],            0x03(%[v_ptr])                   \n\t"
+      "gslwrc1    %[v],            0x00(%[v_ptr])                   \n\t"
 
-    "punpcklbh  %[y],            %[y],              %[y]          \n\t"
-    "pmulhuh    %[y],            %[y],              %[yg]         \n\t"
+      "punpcklbh  %[y],            %[y],              %[y]          \n\t"
+      "pmulhuh    %[y],            %[y],              %[yg]         \n\t"
 
-    "punpcklbh  %[u],            %[u],              %[u]          \n\t"
-    "punpcklbh  %[u],            %[u],              %[zero]       \n\t"
-    "paddsh     %[b_vec],        %[y],              %[bb]         \n\t"
-    "pmullh     %[temp],         %[u],              %[ub]         \n\t"
-    "psubsh     %[b_vec],        %[b_vec],          %[temp]       \n\t"
-    "psrah      %[b_vec],        %[b_vec],          %[six]        \n\t"
+      "punpcklbh  %[u],            %[u],              %[u]          \n\t"
+      "punpcklbh  %[u],            %[u],              %[zero]       \n\t"
+      "paddsh     %[b_vec],        %[y],              %[bb]         \n\t"
+      "pmullh     %[temp],         %[u],              %[ub]         \n\t"
+      "psubsh     %[b_vec],        %[b_vec],          %[temp]       \n\t"
+      "psrah      %[b_vec],        %[b_vec],          %[six]        \n\t"
 
-    "punpcklbh  %[v],            %[v],              %[v]          \n\t"
-    "punpcklbh  %[v],            %[v],              %[zero]       \n\t"
-    "paddsh     %[g_vec],        %[y],              %[bg]         \n\t"
-    "pmullh     %[temp],         %[u],              %[ug]         \n\t"
-    "psubsh     %[g_vec],        %[g_vec],          %[temp]       \n\t"
-    "pmullh     %[temp],         %[v],              %[vg]         \n\t"
-    "psubsh     %[g_vec],        %[g_vec],          %[temp]       \n\t"
-    "psrah      %[g_vec],        %[g_vec],          %[six]        \n\t"
+      "punpcklbh  %[v],            %[v],              %[v]          \n\t"
+      "punpcklbh  %[v],            %[v],              %[zero]       \n\t"
+      "paddsh     %[g_vec],        %[y],              %[bg]         \n\t"
+      "pmullh     %[temp],         %[u],              %[ug]         \n\t"
+      "psubsh     %[g_vec],        %[g_vec],          %[temp]       \n\t"
+      "pmullh     %[temp],         %[v],              %[vg]         \n\t"
+      "psubsh     %[g_vec],        %[g_vec],          %[temp]       \n\t"
+      "psrah      %[g_vec],        %[g_vec],          %[six]        \n\t"
 
-    "paddsh     %[r_vec],        %[y],              %[br]         \n\t"
-    "pmullh     %[temp],         %[v],              %[vr]         \n\t"
-    "psubsh     %[r_vec],        %[r_vec],          %[temp]       \n\t"
-    "psrah      %[r_vec],        %[r_vec],          %[six]        \n\t"
+      "paddsh     %[r_vec],        %[y],              %[br]         \n\t"
+      "pmullh     %[temp],         %[v],              %[vr]         \n\t"
+      "psubsh     %[r_vec],        %[r_vec],          %[temp]       \n\t"
+      "psrah      %[r_vec],        %[r_vec],          %[six]        \n\t"
 
-    "packushb   %[r_vec],        %[b_vec],          %[r_vec]      \n\t"
-    "packushb   %[g_vec],        %[g_vec],          %[zero]       \n\t"
-    "punpcklwd  %[g_vec],        %[alpha],          %[g_vec]      \n\t"
-    "punpcklbh  %[b_vec],        %[g_vec],          %[r_vec]      \n\t"
-    "punpckhbh  %[r_vec],        %[g_vec],          %[r_vec]      \n\t"
-    "punpcklhw  %[g_vec],        %[b_vec],          %[r_vec]      \n\t"
-    "punpckhhw  %[b_vec],        %[b_vec],          %[r_vec]      \n\t"
+      "packushb   %[r_vec],        %[b_vec],          %[r_vec]      \n\t"
+      "packushb   %[g_vec],        %[g_vec],          %[zero]       \n\t"
+      "punpcklwd  %[g_vec],        %[alpha],          %[g_vec]      \n\t"
+      "punpcklbh  %[b_vec],        %[g_vec],          %[r_vec]      \n\t"
+      "punpckhbh  %[r_vec],        %[g_vec],          %[r_vec]      \n\t"
+      "punpcklhw  %[g_vec],        %[b_vec],          %[r_vec]      \n\t"
+      "punpckhhw  %[b_vec],        %[b_vec],          %[r_vec]      \n\t"
 
-    "gssdlc1    %[g_vec],       0x07(%[rgbbuf_ptr])               \n\t"
-    "gssdrc1    %[g_vec],       0x00(%[rgbbuf_ptr])               \n\t"
-    "gssdlc1    %[b_vec],       0x0f(%[rgbbuf_ptr])               \n\t"
-    "gssdrc1    %[b_vec],       0x08(%[rgbbuf_ptr])               \n\t"
+      "gssdlc1    %[g_vec],       0x07(%[rgbbuf_ptr])               \n\t"
+      "gssdrc1    %[g_vec],       0x00(%[rgbbuf_ptr])               \n\t"
+      "gssdlc1    %[b_vec],       0x0f(%[rgbbuf_ptr])               \n\t"
+      "gssdrc1    %[b_vec],       0x08(%[rgbbuf_ptr])               \n\t"
 
-    "daddiu     %[y_ptr],        %[y_ptr],          0x04          \n\t"
-    "daddiu     %[u_ptr],        %[u_ptr],          0x02          \n\t"
-    "daddiu     %[v_ptr],        %[v_ptr],          0x02          \n\t"
-    "daddiu     %[rgbbuf_ptr],   %[rgbbuf_ptr],     0x10          \n\t"
-    "daddi      %[width],        %[width],          -0x04         \n\t"
-    "bnez       %[width],        1b                               \n\t"
+      "daddiu     %[y_ptr],        %[y_ptr],          0x04          \n\t"
+      "daddiu     %[u_ptr],        %[u_ptr],          0x02          \n\t"
+      "daddiu     %[v_ptr],        %[v_ptr],          0x02          \n\t"
+      "daddiu     %[rgbbuf_ptr],   %[rgbbuf_ptr],     0x10          \n\t"
+      "daddi      %[width],        %[width],          -0x04         \n\t"
+      "bnez       %[width],        1b                               \n\t"
 
-    : [y]"=&f"(y),                         [u]"=&f"(u),
-      [v]"=&f"(v),
-      [b_vec]"=&f"(b_vec),                 [g_vec]"=&f"(g_vec),
-      [r_vec]"=&f"(r_vec),                 [temp]"=&f"(temp),
-      [ub]"=&f"(ub),                       [ug]"=&f"(ug),
-      [vg]"=&f"(vg),                       [vr]"=&f"(vr),
-      [bb]"=&f"(bb),                       [bg]"=&f"(bg),
-      [br]"=&f"(br),                       [yg]"=&f"(yg)
-    : [y_ptr]"r"(src_y),                   [u_ptr]"r"(src_u),
-      [v_ptr]"r"(src_v),                   [rgbbuf_ptr]"r"(rgb_buf),
-      [yuvcons_ptr]"r"(yuvconstants),      [width]"r"(width),
-      [zero]"f"(0x00),                     [five]"f"(0x55),
-      [six]"f"(0x6),                       [mask1]"f"(0xff00ff00ff00ff00),
-      [alpha]"f"(-1)
-    : "memory"
-  );
+      : [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec] "=&f"(b_vec),
+        [g_vec] "=&f"(g_vec), [r_vec] "=&f"(r_vec), [temp] "=&f"(temp),
+        [ub] "=&f"(ub), [ug] "=&f"(ug), [vg] "=&f"(vg), [vr] "=&f"(vr),
+        [bb] "=&f"(bb), [bg] "=&f"(bg), [br] "=&f"(br), [yg] "=&f"(yg)
+      : [y_ptr] "r"(src_y), [u_ptr] "r"(src_u), [v_ptr] "r"(src_v),
+        [rgbbuf_ptr] "r"(rgb_buf), [yuvcons_ptr] "r"(yuvconstants),
+        [width] "r"(width), [zero] "f"(0x00), [five] "f"(0x55), [six] "f"(0x6),
+        [mask1] "f"(0xff00ff00ff00ff00), [alpha] "f"(-1)
+      : "memory");
 }
 
 void ARGBSetRow_MMI(uint8_t* dst_argb, uint32_t v32, int width) {
-  __asm__ volatile (
-    "punpcklwd  %[v32],          %[v32],            %[v32]        \n\t"
-    "1:                                                           \n\t"
-    "gssdlc1    %[v32],          0x07(%[dst_ptr])                 \n\t"
-    "gssdrc1    %[v32],          0x00(%[dst_ptr])                 \n\t"
-    "gssdlc1    %[v32],          0x0f(%[dst_ptr])                 \n\t"
-    "gssdrc1    %[v32],          0x08(%[dst_ptr])                 \n\t"
+  __asm__ volatile(
+      "punpcklwd  %[v32],          %[v32],            %[v32]        \n\t"
+      "1:                                                           \n\t"
+      "gssdlc1    %[v32],          0x07(%[dst_ptr])                 \n\t"
+      "gssdrc1    %[v32],          0x00(%[dst_ptr])                 \n\t"
+      "gssdlc1    %[v32],          0x0f(%[dst_ptr])                 \n\t"
+      "gssdrc1    %[v32],          0x08(%[dst_ptr])                 \n\t"
 
-    "daddi      %[width],        %[width],         -0x04          \n\t"
-    "daddiu     %[dst_ptr],      %[dst_ptr],        0x10          \n\t"
-    "bnez       %[width],        1b                               \n\t"
-    : [v32]"+&f"(v32)
-    : [dst_ptr]"r"(dst_argb),           [width]"r"(width)
-    : "memory"
-  );
+      "daddi      %[width],        %[width],         -0x04          \n\t"
+      "daddiu     %[dst_ptr],      %[dst_ptr],        0x10          \n\t"
+      "bnez       %[width],        1b                               \n\t"
+      : [v32] "+&f"(v32)
+      : [dst_ptr] "r"(dst_argb), [width] "r"(width)
+      : "memory");
 }
 
 // 10 bit YUV to ARGB
diff --git a/unit_test/convert_test.cc b/unit_test/convert_test.cc
index 321652b7b..7846f541a 100644
--- a/unit_test/convert_test.cc
+++ b/unit_test/convert_test.cc
@@ -676,6 +676,7 @@ TESTBIPLANARTOP(NV21, 2, 2, I420, 2, 2)
                  YALIGN, benchmark_width_, _Opt, +, 0)
 
 TESTPLANARTOB(I420, 2, 2, ARGB, 4, 4, 1)
+TESTPLANARTOB(I420, 2, 2, ABGR, 4, 4, 1)
 TESTPLANARTOB(J420, 2, 2, ARGB, 4, 4, 1)
 TESTPLANARTOB(J420, 2, 2, ABGR, 4, 4, 1)
 TESTPLANARTOB(H420, 2, 2, ARGB, 4, 4, 1)
@@ -683,7 +684,6 @@ TESTPLANARTOB(H420, 2, 2, ABGR, 4, 4, 1)
 TESTPLANARTOB(U420, 2, 2, ARGB, 4, 4, 1)
 TESTPLANARTOB(U420, 2, 2, ABGR, 4, 4, 1)
 TESTPLANARTOB(I420, 2, 2, BGRA, 4, 4, 1)
-TESTPLANARTOB(I420, 2, 2, ABGR, 4, 4, 1)
 TESTPLANARTOB(I420, 2, 2, RGBA, 4, 4, 1)
 TESTPLANARTOB(I420, 2, 2, RAW, 3, 3, 1)
 TESTPLANARTOB(I420, 2, 2, RGB24, 3, 3, 1)
@@ -700,20 +700,23 @@ TESTPLANARTOB(I420, 2, 2, ARGB4444, 2, 2, 1)
 TESTPLANARTOB(I422, 2, 1, RGB565, 2, 2, 1)
 #endif
 TESTPLANARTOB(I422, 2, 1, ARGB, 4, 4, 1)
+TESTPLANARTOB(I422, 2, 1, ABGR, 4, 4, 1)
 TESTPLANARTOB(J422, 2, 1, ARGB, 4, 4, 1)
 TESTPLANARTOB(J422, 2, 1, ABGR, 4, 4, 1)
 TESTPLANARTOB(H422, 2, 1, ARGB, 4, 4, 1)
 TESTPLANARTOB(H422, 2, 1, ABGR, 4, 4, 1)
 TESTPLANARTOB(U422, 2, 1, ARGB, 4, 4, 1)
-//TESTPLANARTOB(U422, 2, 1, ABGR, 4, 4, 1)
+TESTPLANARTOB(U422, 2, 1, ABGR, 4, 4, 1)
 TESTPLANARTOB(I422, 2, 1, BGRA, 4, 4, 1)
-TESTPLANARTOB(I422, 2, 1, ABGR, 4, 4, 1)
 TESTPLANARTOB(I422, 2, 1, RGBA, 4, 4, 1)
 TESTPLANARTOB(I444, 1, 1, ARGB, 4, 4, 1)
-TESTPLANARTOB(J444, 1, 1, ARGB, 4, 4, 1)
-//TESTPLANARTOB(H444, 1, 1, ARGB, 4, 4, 1)
-TESTPLANARTOB(U444, 1, 1, ARGB, 4, 4, 1)
 TESTPLANARTOB(I444, 1, 1, ABGR, 4, 4, 1)
+TESTPLANARTOB(J444, 1, 1, ARGB, 4, 4, 1)
+TESTPLANARTOB(J444, 1, 1, ABGR, 4, 4, 1)
+TESTPLANARTOB(H444, 1, 1, ARGB, 4, 4, 1)
+TESTPLANARTOB(H444, 1, 1, ABGR, 4, 4, 1)
+TESTPLANARTOB(U444, 1, 1, ARGB, 4, 4, 1)
+TESTPLANARTOB(U444, 1, 1, ABGR, 4, 4, 1)
 TESTPLANARTOB(I420, 2, 2, YUY2, 2, 4, 1)
 TESTPLANARTOB(I420, 2, 2, UYVY, 2, 4, 1)
 TESTPLANARTOB(I422, 2, 1, YUY2, 2, 4, 1)
@@ -2569,6 +2572,7 @@ TESTPTOB(TestUYVYToNV12, UYVYToI420, UYVYToNV12)
                  benchmark_width_, _Opt, +, 0, FMT_C, BPP_C)
 
 TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, ABGR, 4)
+TESTPLANARTOE(I420, 2, 2, ABGR, 1, 4, ARGB, 4)
 TESTPLANARTOE(J420, 2, 2, ARGB, 1, 4, ARGB, 4)
 TESTPLANARTOE(J420, 2, 2, ABGR, 1, 4, ARGB, 4)
 TESTPLANARTOE(H420, 2, 2, ARGB, 1, 4, ARGB, 4)
@@ -2576,7 +2580,6 @@ TESTPLANARTOE(H420, 2, 2, ABGR, 1, 4, ARGB, 4)
 TESTPLANARTOE(U420, 2, 2, ARGB, 1, 4, ARGB, 4)
 TESTPLANARTOE(U420, 2, 2, ABGR, 1, 4, ARGB, 4)
 TESTPLANARTOE(I420, 2, 2, BGRA, 1, 4, ARGB, 4)
-TESTPLANARTOE(I420, 2, 2, ABGR, 1, 4, ARGB, 4)
 TESTPLANARTOE(I420, 2, 2, RGBA, 1, 4, ARGB, 4)
 TESTPLANARTOE(I420, 2, 2, RGB24, 1, 3, ARGB, 4)
 TESTPLANARTOE(I420, 2, 2, RAW, 1, 3, RGB24, 3)
@@ -2594,16 +2597,24 @@ TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, ARGB1555, 2)
 TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, ARGB4444, 2)
 TESTPLANARTOE(I422, 2, 1, ARGB, 1, 4, RGB565, 2)
 #endif
+TESTPLANARTOE(I422, 2, 1, ARGB, 1, 4, ABGR, 4)
+TESTPLANARTOE(I422, 2, 1, ABGR, 1, 4, ARGB, 4)
 TESTPLANARTOE(J422, 2, 1, ARGB, 1, 4, ARGB, 4)
 TESTPLANARTOE(J422, 2, 1, ABGR, 1, 4, ARGB, 4)
 TESTPLANARTOE(H422, 2, 1, ARGB, 1, 4, ARGB, 4)
 TESTPLANARTOE(H422, 2, 1, ABGR, 1, 4, ARGB, 4)
+TESTPLANARTOE(U422, 2, 1, ARGB, 1, 4, ARGB, 4)
+TESTPLANARTOE(U422, 2, 1, ABGR, 1, 4, ARGB, 4)
 TESTPLANARTOE(I422, 2, 1, BGRA, 1, 4, ARGB, 4)
-TESTPLANARTOE(I422, 2, 1, ABGR, 1, 4, ARGB, 4)
 TESTPLANARTOE(I422, 2, 1, RGBA, 1, 4, ARGB, 4)
-TESTPLANARTOE(I444, 1, 1, ARGB, 1, 4, ARGB, 4)
-TESTPLANARTOE(J444, 1, 1, ARGB, 1, 4, ARGB, 4)
+TESTPLANARTOE(I444, 1, 1, ARGB, 1, 4, ABGR, 4)
 TESTPLANARTOE(I444, 1, 1, ABGR, 1, 4, ARGB, 4)
+TESTPLANARTOE(J444, 1, 1, ARGB, 1, 4, ARGB, 4)
+TESTPLANARTOE(J444, 1, 1, ABGR, 1, 4, ARGB, 4)
+TESTPLANARTOE(H444, 1, 1, ARGB, 1, 4, ARGB, 4)
+TESTPLANARTOE(H444, 1, 1, ABGR, 1, 4, ARGB, 4)
+TESTPLANARTOE(U444, 1, 1, ARGB, 1, 4, ARGB, 4)
+TESTPLANARTOE(U444, 1, 1, ABGR, 1, 4, ARGB, 4)
 TESTPLANARTOE(I420, 2, 2, YUY2, 2, 4, ARGB, 4)
 TESTPLANARTOE(I420, 2, 2, UYVY, 2, 4, ARGB, 4)
 TESTPLANARTOE(I422, 2, 1, YUY2, 2, 4, ARGB, 4)