RGB24Mirror function

Bug: b/151960427 Change-Id: I413db0011a4ed87eefc0dd166bb8e076b5aa4b1d Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/2116639 Commit-Queue: Frank Barchard <fbarchard@chromium.org> Reviewed-by: richard winterton <rrwinterton@gmail.com>
2025-12-06 16:56:55 +08:00 · 2020-03-24 12:07:52 -07:00 · 2020-03-24 12:07:52 -07:00 · aabcc477bd
commit aabcc477bd
parent 7f00d67d7c
16 changed files with 1614 additions and 1529 deletions
--- a/include/libyuv/planar_functions.h
+++ b/include/libyuv/planar_functions.h
@ -313,6 +313,18 @@ int ARGBMirror(const uint8_t* src_argb,
               int width,
               int height);

+// Alias
+#define RGB24ToRGB24Mirror RGB24Mirror
+
+// RGB24 mirror.
+LIBYUV_API
+int RGB24Mirror(const uint8_t* src_rgb24,
+               int src_stride_rgb24,
+               uint8_t* dst_rgb24,
+               int dst_stride_rgb24,
+               int width,
+               int height);
+
 // Mirror a plane of data.
 LIBYUV_API
 void MirrorPlane(const uint8_t* src_y,
--- a/include/libyuv/rotate.h
+++ b/include/libyuv/rotate.h
@ -83,6 +83,20 @@ int NV12ToI420Rotate(const uint8_t* src_y,
                     int height,
                     enum RotationMode mode);

+// Rotate NV12 input and store in NV12.
+LIBYUV_API
+int NV12Rotate(const uint8_t* src_y,
+               int src_stride_y,
+               const uint8_t* src_uv,
+               int src_stride_uv,
+               uint8_t* dst_y,
+               int dst_stride_y,
+               uint8_t* dst_uv,
+               int dst_stride_uv,
+               int width,
+               int height,
+               enum RotationMode mode);
+
 // Rotate a plane by 0, 90, 180, or 270.
 LIBYUV_API
 int RotatePlane(const uint8_t* src,
--- a/include/libyuv/row.h
+++ b/include/libyuv/row.h
@ -279,6 +279,7 @@ extern "C" {
 #define HAS_I422TOAR30ROW_SSSE3
 #define HAS_MERGERGBROW_SSSE3
 #define HAS_RAWTORGBAROW_SSSE3
+#define HAS_RGB24MIRRORROW_SSSE3
 #define HAS_RGBATOYJROW_SSSE3
 #define HAS_SPLITRGBROW_SSSE3
 #define HAS_SWAPUVROW_SSSE3
@ -408,6 +409,7 @@ extern "C" {
 #define HAS_ARGBCOLORMATRIXROW_NEON
 #define HAS_ARGBGRAYROW_NEON
 #define HAS_ARGBMIRRORROW_NEON
+#define HAS_RGB24MIRRORROW_NEON
 #define HAS_ARGBMULTIPLYROW_NEON
 #define HAS_ARGBQUANTIZEROW_NEON
 #define HAS_ARGBSEPIAROW_NEON
@ -1196,7 +1198,9 @@ void BGRAToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
 void ABGRToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
 void RGBAToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
 void RGB24ToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
-void RGB24ToYJRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void RGB24ToYJRow_Any_SSSE3(const uint8_t* src_ptr,
+                            uint8_t* dst_ptr,
+                            int width);
 void RAWToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
 void RAWToYJRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
 void RGB24ToYJRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
@ -1608,6 +1612,16 @@ void ARGBMirrorRow_Any_NEON(const uint8_t* src_ptr,
 void ARGBMirrorRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
 void ARGBMirrorRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);

+void RGB24MirrorRow_SSSE3(const uint8_t* src, uint8_t* dst, int width);
+void RGB24MirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width);
+void RGB24MirrorRow_C(const uint8_t* src, uint8_t* dst, int width);
+void RGB24MirrorRow_Any_SSSE3(const uint8_t* src_ptr,
+                              uint8_t* dst_ptr,
+                              int width);
+void RGB24MirrorRow_Any_NEON(const uint8_t* src_ptr,
+                             uint8_t* dst_ptr,
+                             int width);
+
 void SplitUVRow_C(const uint8_t* src_uv,
                  uint8_t* dst_u,
                  uint8_t* dst_v,
--- a/include/libyuv/video_common.h
+++ b/include/libyuv/video_common.h
@ -86,10 +86,14 @@ enum FourCC {
  FOURCC_YV16 = FOURCC('Y', 'V', '1', '6'),
  FOURCC_YV24 = FOURCC('Y', 'V', '2', '4'),
  FOURCC_YU12 = FOURCC('Y', 'U', '1', '2'),  // Linux version of I420.
-  FOURCC_J420 = FOURCC('J', '4', '2', '0'),  // jpeg (bt.601 full), unofficial fourcc
-  FOURCC_J422 = FOURCC('J', '4', '2', '2'),  // jpeg (bt.601 full), unofficial fourcc
-  FOURCC_J444 = FOURCC('J', '4', '4', '4'),  // jpeg (bt.601 full), unofficial fourcc
-  FOURCC_J400 = FOURCC('J', '4', '0', '0'),  // jpeg (bt.601 full), unofficial fourcc
+  FOURCC_J420 =
+      FOURCC('J', '4', '2', '0'),  // jpeg (bt.601 full), unofficial fourcc
+  FOURCC_J422 =
+      FOURCC('J', '4', '2', '2'),  // jpeg (bt.601 full), unofficial fourcc
+  FOURCC_J444 =
+      FOURCC('J', '4', '4', '4'),  // jpeg (bt.601 full), unofficial fourcc
+  FOURCC_J400 =
+      FOURCC('J', '4', '0', '0'),  // jpeg (bt.601 full), unofficial fourcc
  FOURCC_H420 = FOURCC('H', '4', '2', '0'),  // bt.709, unofficial fourcc
  FOURCC_H422 = FOURCC('H', '4', '2', '2'),  // bt.709, unofficial fourcc
  FOURCC_H444 = FOURCC('H', '4', '4', '4'),  // bt.709, unofficial fourcc
--- a/source/convert.cc
+++ b/source/convert.cc
@ -1599,8 +1599,7 @@ int RGB24ToJ420(const uint8_t* src_rgb24,
                int height) {
  int y;
 #if (defined(HAS_RGB24TOYJROW_NEON) && defined(HAS_RGB24TOUVJROW_NEON)) || \
-     defined(HAS_RGB24TOYJROW_MSA) || \
-     defined(HAS_RGB24TOYJROW_MMI)
+    defined(HAS_RGB24TOYJROW_MSA) || defined(HAS_RGB24TOYJROW_MMI)
  void (*RGB24ToUVJRow)(const uint8_t* src_rgb24, int src_stride_rgb24,
                        uint8_t* dst_u, uint8_t* dst_v, int width) =
      RGB24ToUVJRow_C;
@ -1748,8 +1747,7 @@ int RAWToI420(const uint8_t* src_raw,
              int height) {
  int y;
 #if (defined(HAS_RAWTOYROW_NEON) && defined(HAS_RAWTOUVROW_NEON)) || \
-     defined(HAS_RAWTOYROW_MSA) || \
-     defined(HAS_RAWTOYROW_MMI)
+    defined(HAS_RAWTOYROW_MSA) || defined(HAS_RAWTOYROW_MMI)
  void (*RAWToUVRow)(const uint8_t* src_raw, int src_stride_raw, uint8_t* dst_u,
                     uint8_t* dst_v, int width) = RAWToUVRow_C;
  void (*RAWToYRow)(const uint8_t* src_raw, uint8_t* dst_y, int width) =
--- a/source/convert_argb.cc
+++ b/source/convert_argb.cc
@ -1270,9 +1270,6 @@ static int I210ToARGBMatrix(const uint16_t* src_y,
  return 0;
 }

-
-
-
 // Convert I210 to ARGB.
 LIBYUV_API
 int I210ToARGB(const uint16_t* src_y,
--- a/source/planar_functions.cc
+++ b/source/planar_functions.cc
@ -1132,7 +1132,7 @@ int ARGBMirror(const uint8_t* src_argb,
 #if defined(HAS_ARGBMIRRORROW_NEON)
  if (TestCpuFlag(kCpuHasNEON)) {
    ARGBMirrorRow = ARGBMirrorRow_Any_NEON;
-    if (IS_ALIGNED(width, 4)) {
+    if (IS_ALIGNED(width, 16)) {
      ARGBMirrorRow = ARGBMirrorRow_NEON;
    }
  }
@ -1179,6 +1179,52 @@ int ARGBMirror(const uint8_t* src_argb,
  return 0;
 }

+// RGB24 mirror.
+LIBYUV_API
+int RGB24Mirror(const uint8_t* src_rgb24,
+               int src_stride_rgb24,
+               uint8_t* dst_rgb24,
+               int dst_stride_rgb24,
+               int width,
+               int height) {
+  int y;
+  void (*RGB24MirrorRow)(const uint8_t* src, uint8_t* dst, int width) =
+      RGB24MirrorRow_C;
+  if (!src_rgb24 || !dst_rgb24 || width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24;
+    src_stride_rgb24 = -src_stride_rgb24;
+  }
+#if defined(HAS_RGB24MIRRORROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON)) {
+    RGB24MirrorRow = RGB24MirrorRow_Any_NEON;
+    if (IS_ALIGNED(width, 16)) {
+      RGB24MirrorRow = RGB24MirrorRow_NEON;
+    }
+  }
+#endif
+#if defined(HAS_RGB24MIRRORROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3)) {
+    RGB24MirrorRow = RGB24MirrorRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
+      RGB24MirrorRow = RGB24MirrorRow_SSSE3;
+    }
+  }
+#endif
+
+  // Mirror plane
+  for (y = 0; y < height; ++y) {
+    RGB24MirrorRow(src_rgb24, dst_rgb24, width);
+    src_rgb24 += src_stride_rgb24;
+    dst_rgb24 += dst_stride_rgb24;
+  }
+  return 0;
+}
+
 // Get a blender that optimized for the CPU and pixel count.
 // As there are 6 blenders to choose from, the caller should try to use
 // the same blend function for all pixels if possible.
--- a/source/rotate_argb.cc
+++ b/source/rotate_argb.cc
@ -126,7 +126,7 @@ static int ARGBRotate180(const uint8_t* src_argb,
 #if defined(HAS_ARGBMIRRORROW_NEON)
  if (TestCpuFlag(kCpuHasNEON)) {
    ARGBMirrorRow = ARGBMirrorRow_Any_NEON;
-    if (IS_ALIGNED(width, 4)) {
+    if (IS_ALIGNED(width, 16)) {
      ARGBMirrorRow = ARGBMirrorRow_NEON;
    }
  }
--- a/source/row_any.cc
+++ b/source/row_any.cc
@ -1189,7 +1189,7 @@ ANY11M(ARGBMirrorRow_Any_AVX2, ARGBMirrorRow_AVX2, 4, 7)
 ANY11M(ARGBMirrorRow_Any_SSE2, ARGBMirrorRow_SSE2, 4, 3)
 #endif
 #ifdef HAS_ARGBMIRRORROW_NEON
-ANY11M(ARGBMirrorRow_Any_NEON, ARGBMirrorRow_NEON, 4, 3)
+ANY11M(ARGBMirrorRow_Any_NEON, ARGBMirrorRow_NEON, 4, 15)
 #endif
 #ifdef HAS_ARGBMIRRORROW_MSA
 ANY11M(ARGBMirrorRow_Any_MSA, ARGBMirrorRow_MSA, 4, 15)
@ -1197,6 +1197,12 @@ ANY11M(ARGBMirrorRow_Any_MSA, ARGBMirrorRow_MSA, 4, 15)
 #ifdef HAS_ARGBMIRRORROW_MMI
 ANY11M(ARGBMirrorRow_Any_MMI, ARGBMirrorRow_MMI, 4, 1)
 #endif
+#ifdef HAS_RGB24MIRRORROW_SSSE3
+ANY11M(RGB24MirrorRow_Any_SSSE3, RGB24MirrorRow_SSSE3, 3, 15)
+#endif
+#ifdef HAS_RGB24MIRRORROW_NEON
+ANY11M(RGB24MirrorRow_Any_NEON, RGB24MirrorRow_NEON, 3, 15)
+#endif
 #undef ANY11M

 // Any 1 plane. (memset)
--- a/source/row_common.cc
+++ b/source/row_common.cc
@ -2201,6 +2201,22 @@ void ARGBMirrorRow_C(const uint8_t* src, uint8_t* dst, int width) {
  }
 }

+void RGB24MirrorRow_C(const uint8_t* src_rgb24, uint8_t* dst_rgb24,
+                      int width) {
+  int x;
+  src_rgb24 += width * 3 - 3;
+  for (x = 0; x < width; ++x) {
+    uint8_t b = src_rgb24[0];
+    uint8_t g = src_rgb24[1];
+    uint8_t r = src_rgb24[2];
+    dst_rgb24[0] = b;
+    dst_rgb24[1] = g;
+    dst_rgb24[2] = r;
+    src_rgb24 -= 3;
+    dst_rgb24 += 3;
+  }
+}
+
 void SplitUVRow_C(const uint8_t* src_uv,
                  uint8_t* dst_u,
                  uint8_t* dst_v,
@ -3427,8 +3443,8 @@ void GaussCol_C(const uint16_t* src0,
 void GaussRow_F32_C(const float* src, float* dst, int width) {
  int i;
  for (i = 0; i < width; ++i) {
-    *dst++ =
-        (src[0] + src[1] * 4 + src[2] * 6 + src[3] * 4 + src[4]) * (1.0f / 256.0f);
+    *dst++ = (src[0] + src[1] * 4 + src[2] * 6 + src[3] * 4 + src[4]) *
+             (1.0f / 256.0f);
    ++src;
  }
 }
--- a/source/row_gcc.cc
+++ b/source/row_gcc.cc
@ -3262,6 +3262,52 @@ void MirrorUVRow_SSSE3(const uint8_t* src,
 }
 #endif  // HAS_MIRRORUVROW_SSSE3

+#ifdef HAS_RGB24MIRRORROW_SSSE3
+
+// Shuffle first 5 pixels to last 5 mirrored.  first byte zero
+static const uvec8 kShuffleMirrorRGB0 = {128u, 12u, 13u, 14u, 9u, 10u, 11u, 6u,
+                                         7u, 8u, 3u, 4u, 5u, 0u, 1u, 2u};
+
+// Shuffle last 5 pixels to first 5 mirrored.  last byte zero
+static const uvec8 kShuffleMirrorRGB1 = {13u, 14u, 15u, 10u, 11u, 12u, 7u,
+                                         8u, 9u, 4u, 5u, 6u, 1u, 2u, 3u, 128u};
+
+// Shuffle 5 pixels at a time (15 bytes)
+void RGB24MirrorRow_SSSE3(const uint8_t* src_rgb24, uint8_t* dst_rgb24,
+                          int width) {
+  intptr_t temp_width = (intptr_t)(width);
+  src_rgb24 += width * 3 - 48;
+  asm volatile(
+      "movdqa    %3,%%xmm4                       \n"
+      "movdqa    %4,%%xmm5                       \n"
+
+      LABELALIGN
+      "1:                                        \n"
+      "movdqu    (%0),%%xmm0                     \n"  // first 5
+      "movdqu    15(%0),%%xmm1                   \n"  // next 5
+      "movdqu    30(%0),%%xmm2                   \n"  // next 5
+      "movdqu    32(%0),%%xmm3                   \n"  // last 1 special
+      "pshufb    %%xmm4,%%xmm0                   \n"
+      "pshufb    %%xmm4,%%xmm1                   \n"
+      "pshufb    %%xmm4,%%xmm2                   \n"
+      "pshufb    %%xmm5,%%xmm3                   \n"
+      "lea       -0x30(%0),%0                    \n"
+      "movdqu    %%xmm0,32(%1)                   \n" // last 5
+      "movdqu    %%xmm1,17(%1)                   \n" // next 5
+      "movdqu    %%xmm2,2(%1)                    \n" // next 5
+      "movlpd    %%xmm3,0(%1)                    \n" // first 1
+      "lea       0x30(%1),%1                     \n"
+      "sub       $0x10,%2                        \n"
+      "jg        1b                              \n"
+      : "+r"(src_rgb24),  // %0
+        "+r"(dst_rgb24),  // %1
+        "+r"(temp_width)  // %2
+      : "m"(kShuffleMirrorRGB0), // %3
+        "m"(kShuffleMirrorRGB1)  // %4
+      : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
+}
+#endif   // HAS_RGB24MIRRORROW_SSSE3
+
 #ifdef HAS_ARGBMIRRORROW_SSE2

 void ARGBMirrorRow_SSE2(const uint8_t* src, uint8_t* dst, int width) {
--- a/source/row_mmi.cc
+++ b/source/row_mmi.cc
@ -6048,7 +6048,9 @@ void I444ToARGBRow_MMI(const uint8_t* src_y,
      "ldc1       %[yg],           0xc0(%[yuvcons_ptr])             \n\t"  // yg
      "ldc1       %[bb],           0x60(%[yuvcons_ptr])             \n\t"  // bb
      "ldc1       %[ub],           0x00(%[yuvcons_ptr])             \n\t"  // ub
-    "or         %[ub],           %[ub],             %[mask]       \n\t"//must sign extension
+      "or         %[ub],           %[ub],             %[mask]       \n\t"  // must
+                                                                           // sign
+                                                                           // extension
      "ldc1       %[bg],           0x80(%[yuvcons_ptr])             \n\t"  // bg
      "ldc1       %[ug],           0x20(%[yuvcons_ptr])             \n\t"  // ug
      "punpcklbh  %[ug],           %[ug],             %[zero]       \n\t"
@ -6060,7 +6062,8 @@ void I444ToARGBRow_MMI(const uint8_t* src_y,
      "ldc1       %[vr],           0x40(%[yuvcons_ptr])             \n\t"  // vr
      "punpcklbh  %[vr],           %[vr],             %[zero]       \n\t"
      "pshufh     %[vr],           %[vr],             %[five]       \n\t"
-    "or         %[vr],           %[vr],             %[mask]       \n\t"//sign extension
+      "or         %[vr],           %[vr],             %[mask]       \n\t"  // sign
+                                                                           // extension

      "1:                                                           \n\t"
      "gslwlc1    %[y],            0x03(%[y_ptr])                   \n\t"
@ -6110,23 +6113,17 @@ void I444ToARGBRow_MMI(const uint8_t* src_y,
      "daddiu     %[rgbbuf_ptr],   %[rgbbuf_ptr],     0x10          \n\t"
      "daddi      %[width],        %[width],          -0x04         \n\t"
      "bnez       %[width],        1b                               \n\t"
-    : [y]"=&f"(y),
-      [u]"=&f"(u),                         [v]"=&f"(v),
-      [b_vec0]"=&f"(b_vec[0]),             [b_vec1]"=&f"(b_vec[1]),
-      [g_vec0]"=&f"(g_vec[0]),             [g_vec1]"=&f"(g_vec[1]),
-      [r_vec0]"=&f"(r_vec[0]),             [r_vec1]"=&f"(r_vec[1]),
-      [ub]"=&f"(ub),                       [ug]"=&f"(ug),
-      [vg]"=&f"(vg),                       [vr]"=&f"(vr),
-      [bb]"=&f"(bb),                       [bg]"=&f"(bg),
+      : [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec0] "=&f"(b_vec[0]),
+        [b_vec1] "=&f"(b_vec[1]), [g_vec0] "=&f"(g_vec[0]),
+        [g_vec1] "=&f"(g_vec[1]), [r_vec0] "=&f"(r_vec[0]),
+        [r_vec1] "=&f"(r_vec[1]), [ub] "=&f"(ub), [ug] "=&f"(ug),
+        [vg] "=&f"(vg), [vr] "=&f"(vr), [bb] "=&f"(bb), [bg] "=&f"(bg),
        [br] "=&f"(br), [yg] "=&f"(yg)
-    : [y_ptr]"r"(src_y),                   [u_ptr]"r"(src_u),
-      [v_ptr]"r"(src_v),                   [rgbbuf_ptr]"r"(rgb_buf),
-      [yuvcons_ptr]"r"(yuvconstants),      [width]"r"(width),
-      [zero]"f"(0x00),                     [alpha]"f"(-1),
-      [six]"f"(0x6),                       [five]"f"(0x55),
-      [mask]"f"(mask)
-    : "memory"
-  );
+      : [y_ptr] "r"(src_y), [u_ptr] "r"(src_u), [v_ptr] "r"(src_v),
+        [rgbbuf_ptr] "r"(rgb_buf), [yuvcons_ptr] "r"(yuvconstants),
+        [width] "r"(width), [zero] "f"(0x00), [alpha] "f"(-1), [six] "f"(0x6),
+        [five] "f"(0x55), [mask] "f"(mask)
+      : "memory");
 }

 // Also used for 420
@ -6145,7 +6142,9 @@ void I422ToARGBRow_MMI(const uint8_t* src_y,
      "ldc1       %[yg],           0xc0(%[yuvcons_ptr])             \n\t"  // yg
      "ldc1       %[bb],           0x60(%[yuvcons_ptr])             \n\t"  // bb
      "ldc1       %[ub],           0x00(%[yuvcons_ptr])             \n\t"  // ub
-    "or         %[ub],           %[ub],             %[mask]       \n\t"//must sign extension
+      "or         %[ub],           %[ub],             %[mask]       \n\t"  // must
+                                                                           // sign
+                                                                           // extension
      "ldc1       %[bg],           0x80(%[yuvcons_ptr])             \n\t"  // bg
      "ldc1       %[ug],           0x20(%[yuvcons_ptr])             \n\t"  // ug
      "punpcklbh  %[ug],           %[ug],             %[zero]       \n\t"
@ -6157,7 +6156,8 @@ void I422ToARGBRow_MMI(const uint8_t* src_y,
      "ldc1       %[vr],           0x40(%[yuvcons_ptr])             \n\t"  // vr
      "punpcklbh  %[vr],           %[vr],             %[zero]       \n\t"
      "pshufh     %[vr],           %[vr],             %[five]       \n\t"
-    "or         %[vr],           %[vr],             %[mask]       \n\t"//sign extension
+      "or         %[vr],           %[vr],             %[mask]       \n\t"  // sign
+                                                                           // extension

      "1:                                                           \n\t"
      "gslwlc1    %[y],            0x03(%[y_ptr])                   \n\t"
@ -6212,23 +6212,17 @@ void I422ToARGBRow_MMI(const uint8_t* src_y,
      "daddi      %[width],        %[width],          -0x04         \n\t"
      "bnez       %[width],        1b                               \n\t"

-    : [y]"=&f"(y),
-      [u]"=&f"(u),                         [v]"=&f"(v),
-      [b_vec0]"=&f"(b_vec[0]),             [b_vec1]"=&f"(b_vec[1]),
-      [g_vec0]"=&f"(g_vec[0]),             [g_vec1]"=&f"(g_vec[1]),
-      [r_vec0]"=&f"(r_vec[0]),             [r_vec1]"=&f"(r_vec[1]),
-      [ub]"=&f"(ub),                       [ug]"=&f"(ug),
-      [vg]"=&f"(vg),                       [vr]"=&f"(vr),
-      [bb]"=&f"(bb),                       [bg]"=&f"(bg),
+      : [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec0] "=&f"(b_vec[0]),
+        [b_vec1] "=&f"(b_vec[1]), [g_vec0] "=&f"(g_vec[0]),
+        [g_vec1] "=&f"(g_vec[1]), [r_vec0] "=&f"(r_vec[0]),
+        [r_vec1] "=&f"(r_vec[1]), [ub] "=&f"(ub), [ug] "=&f"(ug),
+        [vg] "=&f"(vg), [vr] "=&f"(vr), [bb] "=&f"(bb), [bg] "=&f"(bg),
        [br] "=&f"(br), [yg] "=&f"(yg)
-    : [y_ptr]"r"(src_y),                   [u_ptr]"r"(src_u),
-      [v_ptr]"r"(src_v),                   [rgbbuf_ptr]"r"(rgb_buf),
-      [yuvcons_ptr]"r"(yuvconstants),      [width]"r"(width),
-      [zero]"f"(0x00),                     [alpha]"f"(-1),
-      [six]"f"(0x6),                       [five]"f"(0x55),
-      [mask]"f"(mask)
-    : "memory"
-  );
+      : [y_ptr] "r"(src_y), [u_ptr] "r"(src_u), [v_ptr] "r"(src_v),
+        [rgbbuf_ptr] "r"(rgb_buf), [yuvcons_ptr] "r"(yuvconstants),
+        [width] "r"(width), [zero] "f"(0x00), [alpha] "f"(-1), [six] "f"(0x6),
+        [five] "f"(0x55), [mask] "f"(mask)
+      : "memory");
 }

 // 10 bit YUV to ARGB
@ -6316,24 +6310,18 @@ void I210ToARGBRow_MMI(const uint16_t* src_y,
      "daddi      %[width],        %[width],          -0x04         \n\t"
      "bnez       %[width],        1b                               \n\t"

-    : [y]"=&f"(y),
-      [u]"=&f"(u),                         [v]"=&f"(v),
-      [b_vec0]"=&f"(b_vec[0]),             [b_vec1]"=&f"(b_vec[1]),
-      [g_vec0]"=&f"(g_vec[0]),             [g_vec1]"=&f"(g_vec[1]),
-      [r_vec0]"=&f"(r_vec[0]),             [r_vec1]"=&f"(r_vec[1]),
-      [ub]"=&f"(ub),                       [ug]"=&f"(ug),
-      [vg]"=&f"(vg),                       [vr]"=&f"(vr),
-      [bb]"=&f"(bb),                       [bg]"=&f"(bg),
+      : [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec0] "=&f"(b_vec[0]),
+        [b_vec1] "=&f"(b_vec[1]), [g_vec0] "=&f"(g_vec[0]),
+        [g_vec1] "=&f"(g_vec[1]), [r_vec0] "=&f"(r_vec[0]),
+        [r_vec1] "=&f"(r_vec[1]), [ub] "=&f"(ub), [ug] "=&f"(ug),
+        [vg] "=&f"(vg), [vr] "=&f"(vr), [bb] "=&f"(bb), [bg] "=&f"(bg),
        [br] "=&f"(br), [yg] "=&f"(yg)
-    : [y_ptr]"r"(src_y),                   [u_ptr]"r"(src_u),
-      [v_ptr]"r"(src_v),                   [rgbbuf_ptr]"r"(rgb_buf),
-      [yuvcons_ptr]"r"(yuvconstants),      [width]"r"(width),
-      [zero]"f"(0x00),                     [alpha]"f"(-1),
-      [six]"f"(0x6),                       [five]"f"(0x55),
-      [mask]"f"(mask),                     [two]"f"(0x02),
+      : [y_ptr] "r"(src_y), [u_ptr] "r"(src_u), [v_ptr] "r"(src_v),
+        [rgbbuf_ptr] "r"(rgb_buf), [yuvcons_ptr] "r"(yuvconstants),
+        [width] "r"(width), [zero] "f"(0x00), [alpha] "f"(-1), [six] "f"(0x6),
+        [five] "f"(0x55), [mask] "f"(mask), [two] "f"(0x02),
        [mask1] "f"(0x00ff00ff00ff00ff)
-    : "memory"
-  );
+      : "memory");
 }

 void I422AlphaToARGBRow_MMI(const uint8_t* src_y,
@ -6422,23 +6410,17 @@ void I422AlphaToARGBRow_MMI(const uint8_t* src_y,
      "daddi      %[width],        %[width],          -0x04         \n\t"
      "bnez       %[width],        1b                               \n\t"

-    : [y]"=&f"(y),                         [u]"=&f"(u),
-      [v]"=&f"(v),                         [a]"=&f"(a),
+      : [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [a] "=&f"(a),
        [b_vec0] "=&f"(b_vec[0]), [b_vec1] "=&f"(b_vec[1]),
        [g_vec0] "=&f"(g_vec[0]), [g_vec1] "=&f"(g_vec[1]),
-      [r_vec0]"=&f"(r_vec[0]),             [r_vec1]"=&f"(r_vec[1]),
-      [ub]"=&f"(ub),                       [ug]"=&f"(ug),
-      [vg]"=&f"(vg),                       [vr]"=&f"(vr),
-      [bb]"=&f"(bb),                       [bg]"=&f"(bg),
-      [br]"=&f"(br),                       [yg]"=&f"(yg)
-    : [y_ptr]"r"(src_y),                   [u_ptr]"r"(src_u),
-      [v_ptr]"r"(src_v),                   [rgbbuf_ptr]"r"(rgb_buf),
-      [yuvcons_ptr]"r"(yuvconstants),      [width]"r"(width),
-      [a_ptr]"r"(src_a),                   [zero]"f"(0x00),
-      [six]"f"(0x6),                       [five]"f"(0x55),
-      [mask]"f"(mask)
-    : "memory"
-  );
+        [r_vec0] "=&f"(r_vec[0]), [r_vec1] "=&f"(r_vec[1]), [ub] "=&f"(ub),
+        [ug] "=&f"(ug), [vg] "=&f"(vg), [vr] "=&f"(vr), [bb] "=&f"(bb),
+        [bg] "=&f"(bg), [br] "=&f"(br), [yg] "=&f"(yg)
+      : [y_ptr] "r"(src_y), [u_ptr] "r"(src_u), [v_ptr] "r"(src_v),
+        [rgbbuf_ptr] "r"(rgb_buf), [yuvcons_ptr] "r"(yuvconstants),
+        [width] "r"(width), [a_ptr] "r"(src_a), [zero] "f"(0x00),
+        [six] "f"(0x6), [five] "f"(0x55), [mask] "f"(mask)
+      : "memory");
 }

 void I422ToRGB24Row_MMI(const uint8_t* src_y,
@ -6528,7 +6510,6 @@ void I422ToRGB24Row_MMI(const uint8_t* src_y,
      "gsswlc1    %[g_vec1],       0x0b(%[rgbbuf_ptr])              \n\t"
      "gsswrc1    %[g_vec1],       0x08(%[rgbbuf_ptr])              \n\t"

-
      "daddiu     %[y_ptr],        %[y_ptr],          0x04          \n\t"
      "daddiu     %[u_ptr],        %[u_ptr],          0x02          \n\t"
      "daddiu     %[v_ptr],        %[v_ptr],          0x02          \n\t"
@ -6536,24 +6517,17 @@ void I422ToRGB24Row_MMI(const uint8_t* src_y,
      "daddi      %[width],        %[width],          -0x04         \n\t"
      "bnez       %[width],        1b                               \n\t"

-    : [y]"=&f"(y),                         [u]"=&f"(u),
-      [v]"=&f"(v),
-      [b_vec0]"=&f"(b_vec[0]),             [b_vec1]"=&f"(b_vec[1]),
-      [g_vec0]"=&f"(g_vec[0]),             [g_vec1]"=&f"(g_vec[1]),
-      [r_vec0]"=&f"(r_vec[0]),             [r_vec1]"=&f"(r_vec[1]),
-      [ub]"=&f"(ub),                       [ug]"=&f"(ug),
-      [vg]"=&f"(vg),                       [vr]"=&f"(vr),
-      [bb]"=&f"(bb),                       [bg]"=&f"(bg),
+      : [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec0] "=&f"(b_vec[0]),
+        [b_vec1] "=&f"(b_vec[1]), [g_vec0] "=&f"(g_vec[0]),
+        [g_vec1] "=&f"(g_vec[1]), [r_vec0] "=&f"(r_vec[0]),
+        [r_vec1] "=&f"(r_vec[1]), [ub] "=&f"(ub), [ug] "=&f"(ug),
+        [vg] "=&f"(vg), [vr] "=&f"(vr), [bb] "=&f"(bb), [bg] "=&f"(bg),
        [br] "=&f"(br), [yg] "=&f"(yg)
-    : [y_ptr]"r"(src_y),                   [u_ptr]"r"(src_u),
-      [v_ptr]"r"(src_v),                   [rgbbuf_ptr]"r"(rgb_buf),
-      [yuvcons_ptr]"r"(yuvconstants),      [width]"r"(width),
-      [zero]"f"(0x00),                     [five]"f"(0x55),
-      [six]"f"(0x6),                       [mask]"f"(mask),
-      [lmove1]"f"(0x18),                   [rmove1]"f"(0x8),
-      [one]"f"(0x1)
-    : "memory"
-  );
+      : [y_ptr] "r"(src_y), [u_ptr] "r"(src_u), [v_ptr] "r"(src_v),
+        [rgbbuf_ptr] "r"(rgb_buf), [yuvcons_ptr] "r"(yuvconstants),
+        [width] "r"(width), [zero] "f"(0x00), [five] "f"(0x55), [six] "f"(0x6),
+        [mask] "f"(mask), [lmove1] "f"(0x18), [rmove1] "f"(0x8), [one] "f"(0x1)
+      : "memory");
 }

 void I422ToARGB4444Row_MMI(const uint8_t* src_y,
@ -6651,23 +6625,16 @@ void I422ToARGB4444Row_MMI(const uint8_t* src_y,
      "daddi      %[width],        %[width],          -0x04         \n\t"
      "bnez       %[width],        1b                               \n\t"

-    : [y]"=&f"(y),                         [u]"=&f"(u),
-      [v]"=&f"(v),
-      [b_vec]"=&f"(b_vec),                 [g_vec]"=&f"(g_vec),
-      [r_vec]"=&f"(r_vec),                 [temp]"=&f"(temp),
-      [ub]"=&f"(ub),                       [ug]"=&f"(ug),
-      [vg]"=&f"(vg),                       [vr]"=&f"(vr),
-      [bb]"=&f"(bb),                       [bg]"=&f"(bg),
-      [br]"=&f"(br),                       [yg]"=&f"(yg)
-    : [y_ptr]"r"(src_y),                   [u_ptr]"r"(src_u),
-      [v_ptr]"r"(src_v),                   [dst_argb4444]"r"(dst_argb4444),
-      [yuvcons_ptr]"r"(yuvconstants),      [width]"r"(width),
-      [zero]"f"(0x00),                     [five]"f"(0x55),
-      [six]"f"(0x6),                       [mask]"f"(0xff00ff00ff00ff00),
-      [four]"f"(0x4),                      [mask1]"f"(0xf0f0f0f0f0f0f0f0),
-      [alpha]"f"(-1)
-    : "memory"
-  );
+      : [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec] "=&f"(b_vec),
+        [g_vec] "=&f"(g_vec), [r_vec] "=&f"(r_vec), [temp] "=&f"(temp),
+        [ub] "=&f"(ub), [ug] "=&f"(ug), [vg] "=&f"(vg), [vr] "=&f"(vr),
+        [bb] "=&f"(bb), [bg] "=&f"(bg), [br] "=&f"(br), [yg] "=&f"(yg)
+      : [y_ptr] "r"(src_y), [u_ptr] "r"(src_u), [v_ptr] "r"(src_v),
+        [dst_argb4444] "r"(dst_argb4444), [yuvcons_ptr] "r"(yuvconstants),
+        [width] "r"(width), [zero] "f"(0x00), [five] "f"(0x55), [six] "f"(0x6),
+        [mask] "f"(0xff00ff00ff00ff00), [four] "f"(0x4),
+        [mask1] "f"(0xf0f0f0f0f0f0f0f0), [alpha] "f"(-1)
+      : "memory");
 }

 void I422ToARGB1555Row_MMI(const uint8_t* src_y,
@ -6779,24 +6746,17 @@ void I422ToARGB1555Row_MMI(const uint8_t* src_y,
      "daddi      %[width],        %[width],          -0x04         \n\t"
      "bnez       %[width],        1b                               \n\t"

-    : [y]"=&f"(y),                         [u]"=&f"(u),
-      [v]"=&f"(v),
-      [b_vec]"=&f"(b_vec),                 [g_vec]"=&f"(g_vec),
-      [r_vec]"=&f"(r_vec),                 [temp]"=&f"(temp),
-      [ub]"=&f"(ub),                       [ug]"=&f"(ug),
-      [vg]"=&f"(vg),                       [vr]"=&f"(vr),
-      [bb]"=&f"(bb),                       [bg]"=&f"(bg),
-      [br]"=&f"(br),                       [yg]"=&f"(yg)
-    : [y_ptr]"r"(src_y),                   [u_ptr]"r"(src_u),
-      [v_ptr]"r"(src_v),                   [dst_argb1555]"r"(dst_argb1555),
-      [yuvcons_ptr]"r"(yuvconstants),      [width]"r"(width),
-      [zero]"f"(0x00),                     [five]"f"(0x55),
-      [six]"f"(0x6),                       [mask1]"f"(0xff00ff00ff00ff00),
-      [three]"f"(0x3),                     [mask2]"f"(0x1f0000001f),
-      [eight]"f"(0x8),                     [mask3]"f"(0x800000008000),
-      [lmove5]"f"(0x5)
-    : "memory"
-  );
+      : [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec] "=&f"(b_vec),
+        [g_vec] "=&f"(g_vec), [r_vec] "=&f"(r_vec), [temp] "=&f"(temp),
+        [ub] "=&f"(ub), [ug] "=&f"(ug), [vg] "=&f"(vg), [vr] "=&f"(vr),
+        [bb] "=&f"(bb), [bg] "=&f"(bg), [br] "=&f"(br), [yg] "=&f"(yg)
+      : [y_ptr] "r"(src_y), [u_ptr] "r"(src_u), [v_ptr] "r"(src_v),
+        [dst_argb1555] "r"(dst_argb1555), [yuvcons_ptr] "r"(yuvconstants),
+        [width] "r"(width), [zero] "f"(0x00), [five] "f"(0x55), [six] "f"(0x6),
+        [mask1] "f"(0xff00ff00ff00ff00), [three] "f"(0x3),
+        [mask2] "f"(0x1f0000001f), [eight] "f"(0x8),
+        [mask3] "f"(0x800000008000), [lmove5] "f"(0x5)
+      : "memory");
 }

 void I422ToRGB565Row_MMI(const uint8_t* src_y,
@ -6910,24 +6870,17 @@ void I422ToRGB565Row_MMI(const uint8_t* src_y,
      "daddi      %[width],        %[width],          -0x04         \n\t"
      "bnez       %[width],        1b                               \n\t"

-    : [y]"=&f"(y),                         [u]"=&f"(u),
-      [v]"=&f"(v),
-      [b_vec]"=&f"(b_vec),                 [g_vec]"=&f"(g_vec),
-      [r_vec]"=&f"(r_vec),                 [temp]"=&f"(temp),
-      [ub]"=&f"(ub),                       [ug]"=&f"(ug),
-      [vg]"=&f"(vg),                       [vr]"=&f"(vr),
-      [bb]"=&f"(bb),                       [bg]"=&f"(bg),
-      [br]"=&f"(br),                       [yg]"=&f"(yg)
-    : [y_ptr]"r"(src_y),                   [u_ptr]"r"(src_u),
-      [v_ptr]"r"(src_v),                   [dst_rgb565]"r"(dst_rgb565),
-      [yuvcons_ptr]"r"(yuvconstants),      [width]"r"(width),
-      [zero]"f"(0x00),                     [five]"f"(0x55),
-      [six]"f"(0x6),                       [mask1]"f"(0xff00ff00ff00ff00),
-      [three]"f"(0x3),                     [mask2]"f"(0x1f0000001f),
-      [eight]"f"(0x8),                     [seven]"f"(0x7),
+      : [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec] "=&f"(b_vec),
+        [g_vec] "=&f"(g_vec), [r_vec] "=&f"(r_vec), [temp] "=&f"(temp),
+        [ub] "=&f"(ub), [ug] "=&f"(ug), [vg] "=&f"(vg), [vr] "=&f"(vr),
+        [bb] "=&f"(bb), [bg] "=&f"(bg), [br] "=&f"(br), [yg] "=&f"(yg)
+      : [y_ptr] "r"(src_y), [u_ptr] "r"(src_u), [v_ptr] "r"(src_v),
+        [dst_rgb565] "r"(dst_rgb565), [yuvcons_ptr] "r"(yuvconstants),
+        [width] "r"(width), [zero] "f"(0x00), [five] "f"(0x55), [six] "f"(0x6),
+        [mask1] "f"(0xff00ff00ff00ff00), [three] "f"(0x3),
+        [mask2] "f"(0x1f0000001f), [eight] "f"(0x8), [seven] "f"(0x7),
        [lmove5] "f"(0x5)
-    : "memory"
-  );
+      : "memory");
 }

 void NV12ToARGBRow_MMI(const uint8_t* src_y,
@ -7005,23 +6958,15 @@ void NV12ToARGBRow_MMI(const uint8_t* src_y,
      "daddi      %[width],        %[width],          -0x04         \n\t"
      "bnez       %[width],        1b                               \n\t"

-    : [y]"=&f"(y),                         [u]"=&f"(u),
-      [v]"=&f"(v),
-      [b_vec]"=&f"(b_vec),                 [g_vec]"=&f"(g_vec),
-      [r_vec]"=&f"(r_vec),                 [temp]"=&f"(temp),
-      [ub]"=&f"(ub),                       [ug]"=&f"(ug),
-      [vg]"=&f"(vg),                       [vr]"=&f"(vr),
-      [bb]"=&f"(bb),                       [bg]"=&f"(bg),
-      [br]"=&f"(br),                       [yg]"=&f"(yg)
-    : [y_ptr]"r"(src_y),                   [uv_ptr]"r"(src_uv),
-      [rgbbuf_ptr]"r"(rgb_buf),
-      [yuvcons_ptr]"r"(yuvconstants),      [width]"r"(width),
-      [zero]"f"(0x00),                     [five]"f"(0x55),
-      [six]"f"(0x6),                       [mask1]"f"(0xff00ff00ff00ff00),
-      [ushu]"f"(0xA0),                     [vshu]"f"(0xf5),
-      [alpha]"f"(-1)
-    : "memory"
-  );
+      : [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec] "=&f"(b_vec),
+        [g_vec] "=&f"(g_vec), [r_vec] "=&f"(r_vec), [temp] "=&f"(temp),
+        [ub] "=&f"(ub), [ug] "=&f"(ug), [vg] "=&f"(vg), [vr] "=&f"(vr),
+        [bb] "=&f"(bb), [bg] "=&f"(bg), [br] "=&f"(br), [yg] "=&f"(yg)
+      : [y_ptr] "r"(src_y), [uv_ptr] "r"(src_uv), [rgbbuf_ptr] "r"(rgb_buf),
+        [yuvcons_ptr] "r"(yuvconstants), [width] "r"(width), [zero] "f"(0x00),
+        [five] "f"(0x55), [six] "f"(0x6), [mask1] "f"(0xff00ff00ff00ff00),
+        [ushu] "f"(0xA0), [vshu] "f"(0xf5), [alpha] "f"(-1)
+      : "memory");
 }

 void NV21ToARGBRow_MMI(const uint8_t* src_y,
@ -7099,23 +7044,15 @@ void NV21ToARGBRow_MMI(const uint8_t* src_y,
      "daddi      %[width],        %[width],          -0x04         \n\t"
      "bnez       %[width],        1b                               \n\t"

-    : [y]"=&f"(y),                         [u]"=&f"(u),
-      [v]"=&f"(v),
-      [b_vec]"=&f"(b_vec),                 [g_vec]"=&f"(g_vec),
-      [r_vec]"=&f"(r_vec),                 [temp]"=&f"(temp),
-      [ub]"=&f"(ub),                       [ug]"=&f"(ug),
-      [vg]"=&f"(vg),                       [vr]"=&f"(vr),
-      [bb]"=&f"(bb),                       [bg]"=&f"(bg),
-      [br]"=&f"(br),                       [yg]"=&f"(yg)
-    : [y_ptr]"r"(src_y),                   [vu_ptr]"r"(src_vu),
-      [rgbbuf_ptr]"r"(rgb_buf),
-      [yuvcons_ptr]"r"(yuvconstants),      [width]"r"(width),
-      [zero]"f"(0x00),                     [five]"f"(0x55),
-      [six]"f"(0x6),                       [mask1]"f"(0xff00ff00ff00ff00),
-      [ushu]"f"(0xA0),                     [vshu]"f"(0xf5),
-      [alpha]"f"(-1)
-    : "memory"
-  );
+      : [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec] "=&f"(b_vec),
+        [g_vec] "=&f"(g_vec), [r_vec] "=&f"(r_vec), [temp] "=&f"(temp),
+        [ub] "=&f"(ub), [ug] "=&f"(ug), [vg] "=&f"(vg), [vr] "=&f"(vr),
+        [bb] "=&f"(bb), [bg] "=&f"(bg), [br] "=&f"(br), [yg] "=&f"(yg)
+      : [y_ptr] "r"(src_y), [vu_ptr] "r"(src_vu), [rgbbuf_ptr] "r"(rgb_buf),
+        [yuvcons_ptr] "r"(yuvconstants), [width] "r"(width), [zero] "f"(0x00),
+        [five] "f"(0x55), [six] "f"(0x6), [mask1] "f"(0xff00ff00ff00ff00),
+        [ushu] "f"(0xA0), [vshu] "f"(0xf5), [alpha] "f"(-1)
+      : "memory");
 }

 void NV12ToRGB24Row_MMI(const uint8_t* src_y,
@ -7204,24 +7141,16 @@ void NV12ToRGB24Row_MMI(const uint8_t* src_y,
      "daddi      %[width],        %[width],          -0x04         \n\t"
      "bnez       %[width],        1b                               \n\t"

-    : [y]"=&f"(y),                         [u]"=&f"(u),
-      [v]"=&f"(v),
-      [b_vec]"=&f"(b_vec),                 [g_vec]"=&f"(g_vec),
-      [r_vec]"=&f"(r_vec),                 [temp]"=&f"(temp),
-      [ub]"=&f"(ub),                       [ug]"=&f"(ug),
-      [vg]"=&f"(vg),                       [vr]"=&f"(vr),
-      [bb]"=&f"(bb),                       [bg]"=&f"(bg),
-      [br]"=&f"(br),                       [yg]"=&f"(yg)
-    : [y_ptr]"r"(src_y),                   [uv_ptr]"r"(src_uv),
-      [rgbbuf_ptr]"r"(rgb_buf),
-      [yuvcons_ptr]"r"(yuvconstants),      [width]"r"(width),
-      [zero]"f"(0x00),                     [five]"f"(0x55),
-      [six]"f"(0x6),                       [mask1]"f"(0xff00ff00ff00ff00),
-      [ushu]"f"(0xA0),                     [vshu]"f"(0xf5),
-      [alpha]"f"(-1),                      [lmove1]"f"(0x18),
+      : [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec] "=&f"(b_vec),
+        [g_vec] "=&f"(g_vec), [r_vec] "=&f"(r_vec), [temp] "=&f"(temp),
+        [ub] "=&f"(ub), [ug] "=&f"(ug), [vg] "=&f"(vg), [vr] "=&f"(vr),
+        [bb] "=&f"(bb), [bg] "=&f"(bg), [br] "=&f"(br), [yg] "=&f"(yg)
+      : [y_ptr] "r"(src_y), [uv_ptr] "r"(src_uv), [rgbbuf_ptr] "r"(rgb_buf),
+        [yuvcons_ptr] "r"(yuvconstants), [width] "r"(width), [zero] "f"(0x00),
+        [five] "f"(0x55), [six] "f"(0x6), [mask1] "f"(0xff00ff00ff00ff00),
+        [ushu] "f"(0xA0), [vshu] "f"(0xf5), [alpha] "f"(-1), [lmove1] "f"(0x18),
        [one] "f"(0x1), [rmove1] "f"(0x8)
-    : "memory"
-  );
+      : "memory");
 }

 void NV21ToRGB24Row_MMI(const uint8_t* src_y,
@ -7310,24 +7239,16 @@ void NV21ToRGB24Row_MMI(const uint8_t* src_y,
      "daddi      %[width],        %[width],          -0x04         \n\t"
      "bnez       %[width],        1b                               \n\t"

-    : [y]"=&f"(y),                         [u]"=&f"(u),
-      [v]"=&f"(v),
-      [b_vec]"=&f"(b_vec),                 [g_vec]"=&f"(g_vec),
-      [r_vec]"=&f"(r_vec),                 [temp]"=&f"(temp),
-      [ub]"=&f"(ub),                       [ug]"=&f"(ug),
-      [vg]"=&f"(vg),                       [vr]"=&f"(vr),
-      [bb]"=&f"(bb),                       [bg]"=&f"(bg),
-      [br]"=&f"(br),                       [yg]"=&f"(yg)
-    : [y_ptr]"r"(src_y),                   [vu_ptr]"r"(src_vu),
-      [rgbbuf_ptr]"r"(rgb_buf),
-      [yuvcons_ptr]"r"(yuvconstants),      [width]"r"(width),
-      [zero]"f"(0x00),                     [five]"f"(0x55),
-      [six]"f"(0x6),                       [mask1]"f"(0xff00ff00ff00ff00),
-      [ushu]"f"(0xA0),                     [vshu]"f"(0xf5),
-      [lmove1]"f"(0x18),                   [rmove1]"f"(0x8),
-      [one]"f"(0x1)
-    : "memory"
-  );
+      : [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec] "=&f"(b_vec),
+        [g_vec] "=&f"(g_vec), [r_vec] "=&f"(r_vec), [temp] "=&f"(temp),
+        [ub] "=&f"(ub), [ug] "=&f"(ug), [vg] "=&f"(vg), [vr] "=&f"(vr),
+        [bb] "=&f"(bb), [bg] "=&f"(bg), [br] "=&f"(br), [yg] "=&f"(yg)
+      : [y_ptr] "r"(src_y), [vu_ptr] "r"(src_vu), [rgbbuf_ptr] "r"(rgb_buf),
+        [yuvcons_ptr] "r"(yuvconstants), [width] "r"(width), [zero] "f"(0x00),
+        [five] "f"(0x55), [six] "f"(0x6), [mask1] "f"(0xff00ff00ff00ff00),
+        [ushu] "f"(0xA0), [vshu] "f"(0xf5), [lmove1] "f"(0x18),
+        [rmove1] "f"(0x8), [one] "f"(0x1)
+      : "memory");
 }

 void NV12ToRGB565Row_MMI(const uint8_t* src_y,
@ -7436,24 +7357,16 @@ void NV12ToRGB565Row_MMI(const uint8_t* src_y,
      "daddi      %[width],        %[width],          -0x04         \n\t"
      "bnez       %[width],        1b                               \n\t"

-    : [y]"=&f"(y),                         [u]"=&f"(u),
-      [v]"=&f"(v),
-      [b_vec]"=&f"(b_vec),                 [g_vec]"=&f"(g_vec),
-      [r_vec]"=&f"(r_vec),                 [temp]"=&f"(temp),
-      [ub]"=&f"(ub),                       [ug]"=&f"(ug),
-      [vg]"=&f"(vg),                       [vr]"=&f"(vr),
-      [bb]"=&f"(bb),                       [bg]"=&f"(bg),
-      [br]"=&f"(br),                       [yg]"=&f"(yg)
-    : [y_ptr]"r"(src_y),                   [uv_ptr]"r"(src_uv),
-      [dst_rgb565]"r"(dst_rgb565),
-      [yuvcons_ptr]"r"(yuvconstants),      [width]"r"(width),
-      [zero]"f"(0x00),                     [five]"f"(0x55),
-      [six]"f"(0x6),                       [mask1]"f"(0xff00ff00ff00ff00),
-      [ushu]"f"(0xA0),                     [vshu]"f"(0xf5),
-      [three]"f"(0x3),                     [mask2]"f"(0x1f0000001f),
-      [eight]"f"(0x8),                     [seven]"f"(0x7)
-    : "memory"
-  );
+      : [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec] "=&f"(b_vec),
+        [g_vec] "=&f"(g_vec), [r_vec] "=&f"(r_vec), [temp] "=&f"(temp),
+        [ub] "=&f"(ub), [ug] "=&f"(ug), [vg] "=&f"(vg), [vr] "=&f"(vr),
+        [bb] "=&f"(bb), [bg] "=&f"(bg), [br] "=&f"(br), [yg] "=&f"(yg)
+      : [y_ptr] "r"(src_y), [uv_ptr] "r"(src_uv), [dst_rgb565] "r"(dst_rgb565),
+        [yuvcons_ptr] "r"(yuvconstants), [width] "r"(width), [zero] "f"(0x00),
+        [five] "f"(0x55), [six] "f"(0x6), [mask1] "f"(0xff00ff00ff00ff00),
+        [ushu] "f"(0xA0), [vshu] "f"(0xf5), [three] "f"(0x3),
+        [mask2] "f"(0x1f0000001f), [eight] "f"(0x8), [seven] "f"(0x7)
+      : "memory");
 }

 void YUY2ToARGBRow_MMI(const uint8_t* src_yuy2,
@ -7530,22 +7443,15 @@ void YUY2ToARGBRow_MMI(const uint8_t* src_yuy2,
      "daddi      %[width],        %[width],          -0x04         \n\t"
      "bnez       %[width],        1b                               \n\t"

-    : [y]"=&f"(y),                         [u]"=&f"(u),
-      [v]"=&f"(v),
-      [b_vec]"=&f"(b_vec),                 [g_vec]"=&f"(g_vec),
-      [r_vec]"=&f"(r_vec),                 [temp]"=&f"(temp),
-      [ub]"=&f"(ub),                       [ug]"=&f"(ug),
-      [vg]"=&f"(vg),                       [vr]"=&f"(vr),
-      [bb]"=&f"(bb),                       [bg]"=&f"(bg),
-      [br]"=&f"(br),                       [yg]"=&f"(yg)
+      : [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec] "=&f"(b_vec),
+        [g_vec] "=&f"(g_vec), [r_vec] "=&f"(r_vec), [temp] "=&f"(temp),
+        [ub] "=&f"(ub), [ug] "=&f"(ug), [vg] "=&f"(vg), [vr] "=&f"(vr),
+        [bb] "=&f"(bb), [bg] "=&f"(bg), [br] "=&f"(br), [yg] "=&f"(yg)
      : [yuy2_ptr] "r"(src_yuy2), [rgbbuf_ptr] "r"(rgb_buf),
-      [yuvcons_ptr]"r"(yuvconstants),      [width]"r"(width),
-      [zero]"f"(0x00),                     [five]"f"(0x55),
-      [six]"f"(0x6),                       [mask1]"f"(0xff00ff00ff00ff00),
-      [ushu]"f"(0xA0),                     [vshu]"f"(0xf5),
-      [alpha]"f"(-1),                      [eight]"f"(0x8)
-    : "memory"
-  );
+        [yuvcons_ptr] "r"(yuvconstants), [width] "r"(width), [zero] "f"(0x00),
+        [five] "f"(0x55), [six] "f"(0x6), [mask1] "f"(0xff00ff00ff00ff00),
+        [ushu] "f"(0xA0), [vshu] "f"(0xf5), [alpha] "f"(-1), [eight] "f"(0x8)
+      : "memory");
 }

 void UYVYToARGBRow_MMI(const uint8_t* src_uyvy,
@ -7622,22 +7528,15 @@ void UYVYToARGBRow_MMI(const uint8_t* src_uyvy,
      "daddi      %[width],        %[width],          -0x04         \n\t"
      "bnez       %[width],        1b                               \n\t"

-    : [y]"=&f"(y),                         [u]"=&f"(u),
-      [v]"=&f"(v),
-      [b_vec]"=&f"(b_vec),                 [g_vec]"=&f"(g_vec),
-      [r_vec]"=&f"(r_vec),                 [temp]"=&f"(temp),
-      [ub]"=&f"(ub),                       [ug]"=&f"(ug),
-      [vg]"=&f"(vg),                       [vr]"=&f"(vr),
-      [bb]"=&f"(bb),                       [bg]"=&f"(bg),
-      [br]"=&f"(br),                       [yg]"=&f"(yg)
+      : [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec] "=&f"(b_vec),
+        [g_vec] "=&f"(g_vec), [r_vec] "=&f"(r_vec), [temp] "=&f"(temp),
+        [ub] "=&f"(ub), [ug] "=&f"(ug), [vg] "=&f"(vg), [vr] "=&f"(vr),
+        [bb] "=&f"(bb), [bg] "=&f"(bg), [br] "=&f"(br), [yg] "=&f"(yg)
      : [uyvy_ptr] "r"(src_uyvy), [rgbbuf_ptr] "r"(rgb_buf),
-      [yuvcons_ptr]"r"(yuvconstants),      [width]"r"(width),
-      [zero]"f"(0x00),                     [five]"f"(0x55),
-      [six]"f"(0x6),                       [mask1]"f"(0xff00ff00ff00ff00),
-      [ushu]"f"(0xA0),                     [vshu]"f"(0xf5),
-      [alpha]"f"(-1),                      [eight]"f"(0x8)
-    : "memory"
-  );
+        [yuvcons_ptr] "r"(yuvconstants), [width] "r"(width), [zero] "f"(0x00),
+        [five] "f"(0x55), [six] "f"(0x6), [mask1] "f"(0xff00ff00ff00ff00),
+        [ushu] "f"(0xA0), [vshu] "f"(0xf5), [alpha] "f"(-1), [eight] "f"(0x8)
+      : "memory");
 }

 void I422ToRGBARow_MMI(const uint8_t* src_y,
@ -7720,22 +7619,15 @@ void I422ToRGBARow_MMI(const uint8_t* src_y,
      "daddi      %[width],        %[width],          -0x04         \n\t"
      "bnez       %[width],        1b                               \n\t"

-    : [y]"=&f"(y),                         [u]"=&f"(u),
-      [v]"=&f"(v),
-      [b_vec]"=&f"(b_vec),                 [g_vec]"=&f"(g_vec),
-      [r_vec]"=&f"(r_vec),                 [temp]"=&f"(temp),
-      [ub]"=&f"(ub),                       [ug]"=&f"(ug),
-      [vg]"=&f"(vg),                       [vr]"=&f"(vr),
-      [bb]"=&f"(bb),                       [bg]"=&f"(bg),
-      [br]"=&f"(br),                       [yg]"=&f"(yg)
-    : [y_ptr]"r"(src_y),                   [u_ptr]"r"(src_u),
-      [v_ptr]"r"(src_v),                   [rgbbuf_ptr]"r"(rgb_buf),
-      [yuvcons_ptr]"r"(yuvconstants),      [width]"r"(width),
-      [zero]"f"(0x00),                     [five]"f"(0x55),
-      [six]"f"(0x6),                       [mask1]"f"(0xff00ff00ff00ff00),
-      [alpha]"f"(-1)
-    : "memory"
-  );
+      : [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec] "=&f"(b_vec),
+        [g_vec] "=&f"(g_vec), [r_vec] "=&f"(r_vec), [temp] "=&f"(temp),
+        [ub] "=&f"(ub), [ug] "=&f"(ug), [vg] "=&f"(vg), [vr] "=&f"(vr),
+        [bb] "=&f"(bb), [bg] "=&f"(bg), [br] "=&f"(br), [yg] "=&f"(yg)
+      : [y_ptr] "r"(src_y), [u_ptr] "r"(src_u), [v_ptr] "r"(src_v),
+        [rgbbuf_ptr] "r"(rgb_buf), [yuvcons_ptr] "r"(yuvconstants),
+        [width] "r"(width), [zero] "f"(0x00), [five] "f"(0x55), [six] "f"(0x6),
+        [mask1] "f"(0xff00ff00ff00ff00), [alpha] "f"(-1)
+      : "memory");
 }

 void ARGBSetRow_MMI(uint8_t* dst_argb, uint32_t v32, int width) {
@ -7752,8 +7644,7 @@ void ARGBSetRow_MMI(uint8_t* dst_argb, uint32_t v32, int width) {
      "bnez       %[width],        1b                               \n\t"
      : [v32] "+&f"(v32)
      : [dst_ptr] "r"(dst_argb), [width] "r"(width)
-    : "memory"
-  );
+      : "memory");
 }

 // 10 bit YUV to ARGB
--- a/source/row_neon.cc
+++ b/source/row_neon.cc
@ -727,14 +727,10 @@ void MirrorUVRow_NEON(const uint8_t* src_uv,
 }

 void ARGBMirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width) {
+  src += width * 4 - 16;
  asm volatile(
-      // Start at end of source row.
-      "mov        r3, #-16                       \n"
-      "add        %0, %0, %2, lsl #2             \n"
-      "sub        %0, #16                        \n"
-
      "1:                                        \n"
-      "vld1.8     {q0}, [%0], r3                 \n"  // src -= 16
+      "vld1.8     {q0}, [%0], %3                 \n"  // src -= 16
      "subs       %2, #4                         \n"  // 4 pixels per loop.
      "vrev64.32  q0, q0                         \n"
      "vst1.8     {d1}, [%1]!                    \n"  // dst += 16
@ -743,12 +739,30 @@ void ARGBMirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width) {
      : "+r"(src),   // %0
        "+r"(dst),   // %1
        "+r"(width)  // %2
-      :
-      : "cc", "memory", "r3", "q0");
+      : "r"(-16)     // %3
+      : "cc", "memory", "q0");
 }

-void RGB24ToARGBRow_NEON(const uint8_t* src_rgb24,
-                         uint8_t* dst_argb,
+void RGB24MirrorRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_rgb24,
+                         int width) {
+  src_rgb24 += width * 3 - 24;
+  asm volatile(
+      "1:                                        \n"
+      "vld3.8     {d0, d1, d2}, [%0], %3         \n"  // src -= 24
+      "subs       %2, #8                         \n"  // 8 pixels per loop.
+      "vrev64.8   d0, d0                         \n"
+      "vrev64.8   d1, d1                         \n"
+      "vrev64.8   d2, d2                         \n"
+      "vst3.8     {d0, d1, d2}, [%1]!            \n"  // dst += 24
+      "bgt        1b                             \n"
+      : "+r"(src_rgb24),  // %0
+        "+r"(dst_rgb24),  // %1
+        "+r"(width)       // %2
+      : "r"(-24)          // %3
+      : "cc", "memory", "d0", "d1", "d2");
+}
+
+void RGB24ToARGBRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_argb,
                         int width) {
  asm volatile(
      "vmov.u8    d4, #255                       \n"  // Alpha
@ -2065,7 +2079,6 @@ void RAWToYRow_NEON(const uint8_t* src_raw, uint8_t* dst_y, int width) {
      : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8");
 }

-
 void RGB24ToYJRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) {
  asm volatile(
      "vmov.u8    d4, #29                        \n"  // B * 0.1140 coefficient
--- a/source/row_neon64.cc
+++ b/source/row_neon64.cc
@ -278,7 +278,8 @@ void I422ToRGB565Row_NEON(const uint8_t* src_y,
          v20) "subs       %w4, %w4, #8                   \n" ARGBTORGB565
               "st1        {v0.8h}, [%3], #16             \n"  // store 8 pixels
                                                               // RGB565.
-               "prfm       pldl1keep, [%0, 448]           \n"  // prefetch 7 lines ahead
+               "prfm       pldl1keep, [%0, 448]           \n"  // prefetch 7
+                                                               // lines ahead
               "b.gt       1b                             \n"
      : "+r"(src_y),       // %0
        "+r"(src_u),       // %1
@ -316,7 +317,8 @@ void I422ToARGB1555Row_NEON(const uint8_t* src_y,
          v20) "subs       %w4, %w4, #8                   \n" ARGBTOARGB1555
               "st1        {v0.8h}, [%3], #16             \n"  // store 8 pixels
                                                               // RGB565.
-               "prfm       pldl1keep, [%0, 448]           \n"  // prefetch 7 lines ahead
+               "prfm       pldl1keep, [%0, 448]           \n"  // prefetch 7
+                                                               // lines ahead
               "b.gt       1b                             \n"
      : "+r"(src_y),         // %0
        "+r"(src_u),         // %1
@ -530,7 +532,8 @@ void NV12ToRGB565Row_NEON(const uint8_t* src_y,
          v20) "subs       %w3, %w3, #8                   \n" ARGBTORGB565
               "st1        {v0.8h}, [%2], 16              \n"  // store 8 pixels
                                                               // RGB565.
-               "prfm       pldl1keep, [%0, 448]           \n"  // prefetch 7 lines ahead
+               "prfm       pldl1keep, [%0, 448]           \n"  // prefetch 7
+                                                               // lines ahead
               "b.gt       1b                             \n"
      : "+r"(src_y),       // %0
        "+r"(src_uv),      // %1
@ -750,7 +753,6 @@ void MirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width) {
      "tbl        v1.16b, {v1.16b}, v3.16b       \n"
      "tbl        v0.16b, {v2.16b}, v3.16b       \n"
      "st1        {v0.16b, v1.16b}, [%1], #32    \n"  // store 32 pixels
-      "prfm       pldl1keep, [%0, 448]           \n"  // prefetch 7 lines ahead
      "b.gt       1b                             \n"
      : "+r"(src),            // %0
        "+r"(dst),            // %1
@ -775,7 +777,6 @@ void MirrorUVRow_NEON(const uint8_t* src_uv,
      "rev64      v1.8b, v1.8b                   \n"
      "st1        {v0.8b}, [%1], #8              \n"  // dst += 8
      "st1        {v1.8b}, [%2], #8              \n"
-      "prfm       pldl1keep, [%0, 448]           \n"  // prefetch 7 lines ahead
      "b.gt       1b                             \n"
      : "+r"(src_uv),        // %0
        "+r"(dst_u),         // %1
@ -785,24 +786,49 @@ void MirrorUVRow_NEON(const uint8_t* src_uv,
      : "cc", "memory", "v0", "v1");
 }

-void ARGBMirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width) {
+void ARGBMirrorRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb,
+                         int width) {
  asm volatile(
-      // Start at end of source row.
-      "add        %0, %0, %w2, sxtw #2           \n"
-      "sub        %0, %0, #16                    \n"
+      "ld1        {v4.16b}, [%4]                 \n"  // shuffler
+      "add        %0, %0, %w2, sxtw #2           \n"  // Start at end of row.
+      "sub        %0, %0, #64                    \n"
      "1:                                        \n"
-      "ld1        {v0.16b}, [%0], %3             \n"  // src -= 16
-      "subs       %w2, %w2, #4                   \n"  // 4 pixels per loop.
-      "rev64      v0.4s, v0.4s                   \n"
-      "st1        {v0.D}[1], [%1], #8            \n"  // dst += 16
-      "st1        {v0.D}[0], [%1], #8            \n"
-      "prfm       pldl1keep, [%0, 448]           \n"  // prefetch 7 lines ahead
+      "ld4        {v0.16b, v1.16b, v2.16b, v3.16b}, [%0], %3\n"  // src -= 64
+      "subs       %w2, %w2, #16                  \n"  // 16 pixels per loop.
+      "tbl        v0.16b, {v0.16b}, v4.16b       \n"
+      "tbl        v1.16b, {v1.16b}, v4.16b       \n"
+      "tbl        v2.16b, {v2.16b}, v4.16b       \n"
+      "tbl        v3.16b, {v3.16b}, v4.16b       \n"
+      "st4        {v0.16b, v1.16b, v2.16b, v3.16b}, [%1], #64 \n"  // dst += 64
      "b.gt       1b                             \n"
-      : "+r"(src),           // %0
-        "+r"(dst),           // %1
+      : "+r"(src_argb),      // %0
+        "+r"(dst_argb),      // %1
        "+r"(width)           // %2
-      : "r"((ptrdiff_t)-16)  // %3
-      : "cc", "memory", "v0");
+      : "r"((ptrdiff_t)-64),  // %3
+        "r"(&kShuffleMirror)  // %4
+      : "cc", "memory", "v0", "v1", "v2", "v3", "v4");
+}
+
+void RGB24MirrorRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_rgb24,
+                         int width) {
+  src_rgb24 += width * 3 - 48;
+  asm volatile(
+      "ld1        {v3.16b}, [%4]                 \n"  // shuffler
+
+      "1:                                        \n"
+      "ld3        {v0.16b, v1.16b, v2.16b}, [%0], %3\n"  // src -= 48
+      "subs       %w2, %w2, #16                  \n"  // 16 pixels per loop.
+      "tbl        v0.16b, {v0.16b}, v3.16b       \n"
+      "tbl        v1.16b, {v1.16b}, v3.16b       \n"
+      "tbl        v2.16b, {v2.16b}, v3.16b       \n"
+      "st3        {v0.16b, v1.16b, v2.16b}, [%1], #48 \n"  // dst += 48
+      "b.gt       1b                             \n"
+      : "+r"(src_rgb24),      // %0
+        "+r"(dst_rgb24),      // %1
+        "+r"(width)           // %2
+      : "r"((ptrdiff_t)-48),  // %3
+        "r"(&kShuffleMirror)  // %4
+      : "cc", "memory", "v0", "v1", "v2", "v3");
 }

 void RGB24ToARGBRow_NEON(const uint8_t* src_rgb24,
@ -2185,7 +2211,8 @@ void RAWToYJRow_NEON(const uint8_t* src_raw, uint8_t* dst_yj, int width) {
      "umull      v0.8h, v0.8b, v4.8b            \n"  // B
      "umlal      v0.8h, v1.8b, v5.8b            \n"  // G
      "umlal      v0.8h, v2.8b, v6.8b            \n"  // R
-      "prfm       pldl1keep, [%0, 448]           \n"  // prefetch 7 cache lines ahead
+      "prfm       pldl1keep, [%0, 448]           \n"  // prefetch 7 cache lines
+                                                      // ahead
      "uqrshrn    v0.8b, v0.8h, #8               \n"  // 16 bit to 8 bit Y
      "st1        {v0.8b}, [%1], #8              \n"  // store 8 pixels Y.
      "prfm       pldl1keep, [%0, 448]           \n"  // prefetch 7 lines ahead
@ -3089,9 +3116,7 @@ void GaussCol_F32_NEON(const float* src0,
 }

 // filter 5 rows with 1, 4, 6, 4, 1 coefficients to produce 1 row.
-void GaussRow_F32_NEON(const float* src,
-                       float* dst,
-                       int width) {
+void GaussRow_F32_NEON(const float* src, float* dst, int width) {
  asm volatile(
      "ld3r       {v6.4s, v7.4s, v8.4s}, [%3]    \n"  // constants 4, 6, 1/256

--- a/unit_test/convert_test.cc
+++ b/unit_test/convert_test.cc
@ -1245,6 +1245,7 @@ TESTATOB(RAW, 3, 3, 1, RGBA, 4, 4, 1, 0)
 TESTATOB(RAW, 3, 3, 1, RGB24, 3, 3, 1, 0)
 TESTATOB(RGB24, 3, 3, 1, ARGB, 4, 4, 1, 0)
 TESTATOB(RGB24, 3, 3, 1, J400, 1, 1, 1, 0)
+TESTATOB(RGB24, 3, 3, 1, RGB24Mirror, 3, 3, 1, 0)
 TESTATOB(RAW, 3, 3, 1, J400, 1, 1, 1, 0)
 #ifdef INTEL_TEST
 TESTATOB(RGB565, 2, 2, 1, ARGB, 4, 4, 1, 0)
--- a/unit_test/scale_test.cc
+++ b/unit_test/scale_test.cc
@ -574,13 +574,15 @@ TEST_FACTOR(3, 1, 3, 0)
                              disable_cpu_flags_, benchmark_cpu_info_);       \
    EXPECT_LE(diff, max_diff);                                                \
  }                                                                           \
-  TEST_F(LibYUVScaleTest, DISABLED_##I420##name##To##width##x##height##_##filter##_16) { \
+  TEST_F(LibYUVScaleTest,                                                     \
+         DISABLED_##I420##name##To##width##x##height##_##filter##_16) {       \
    int diff = I420TestFilter_16(                                             \
        benchmark_width_, benchmark_height_, width, height, kFilter##filter,  \
        benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_);      \
    EXPECT_LE(diff, max_diff);                                                \
  }                                                                           \
-  TEST_F(LibYUVScaleTest, DISABLED_##I444##name##To##width##x##height##_##filter##_16) { \
+  TEST_F(LibYUVScaleTest,                                                     \
+         DISABLED_##I444##name##To##width##x##height##_##filter##_16) {       \
    int diff = I444TestFilter_16(                                             \
        benchmark_width_, benchmark_height_, width, height, kFilter##filter,  \
        benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_);      \