clang-tidy applied

Bug: libyuv:886, libyuv:889 Change-Id: I2d14d03c19402381256d3c6d988e0b7307bdffd8 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/2800147 Reviewed-by: richard winterton <rrwinterton@gmail.com>
2025-12-06 08:46:47 +08:00 · 2021-04-01 14:20:35 -07:00 · 2021-04-01 14:20:35 -07:00 · 60db98b6fa
commit 60db98b6fa
parent 34bf48e160
15 changed files with 958 additions and 1006 deletions
--- a/README.chromium
+++ b/README.chromium
@ -1,6 +1,6 @@
 Name: libyuv
 URL: http://code.google.com/p/libyuv/
-Version: 1784
+Version: 1785
 License: BSD
 License File: LICENSE

--- a/include/libyuv/row.h
+++ b/include/libyuv/row.h
@ -767,7 +767,7 @@ struct YuvConstants {
 #else
 // This struct is for Intel color conversion.
 struct YuvConstants {
-#if LIBYUV_UNLIMITED_DATA
+#if defined(LIBYUV_UNLIMITED_DATA)
  uint8_t kUVToB[32];
  uint8_t kUVToG[32];
  uint8_t kUVToR[32];
@ -1063,11 +1063,11 @@ void BGRAToYRow_SSSE3(const uint8_t* src_bgra, uint8_t* dst_y, int width);
 void ABGRToYRow_SSSE3(const uint8_t* src_abgr, uint8_t* dst_y, int width);
 void RGBAToYRow_SSSE3(const uint8_t* src_rgba, uint8_t* dst_y, int width);
 void RGB24ToYRow_SSSE3(const uint8_t* src_rgb24, uint8_t* dst_y, int width);
-void RGB24ToYJRow_SSSE3(const uint8_t* src_rgb24, uint8_t* dst_y, int width);
+void RGB24ToYJRow_SSSE3(const uint8_t* src_rgb24, uint8_t* dst_yj, int width);
 void RAWToYRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_y, int width);
-void RAWToYJRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_y, int width);
-void RGB24ToYJRow_AVX2(const uint8_t* src_rgb24, uint8_t* dst_y, int width);
-void RAWToYJRow_AVX2(const uint8_t* src_raw, uint8_t* dst_y, int width);
+void RAWToYJRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_yj, int width);
+void RGB24ToYJRow_AVX2(const uint8_t* src_rgb24, uint8_t* dst_yj, int width);
+void RAWToYJRow_AVX2(const uint8_t* src_raw, uint8_t* dst_yj, int width);
 void ARGBToYRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width);
 void ARGBToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width);
 void RGBAToYJRow_NEON(const uint8_t* src_rgba, uint8_t* dst_y, int width);
@ -1262,16 +1262,16 @@ void RGB565ToYRow_MMI(const uint8_t* src_rgb565, uint8_t* dst_y, int width);
 void ARGB1555ToYRow_MMI(const uint8_t* src_argb1555, uint8_t* dst_y, int width);
 void ARGB4444ToYRow_MMI(const uint8_t* src_argb4444, uint8_t* dst_y, int width);

-void ARGBToYRow_C(const uint8_t* src_argb, uint8_t* dst_y, int width);
-void ARGBToYJRow_C(const uint8_t* src_argb, uint8_t* dst_y, int width);
-void RGBAToYJRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width);
-void BGRAToYRow_C(const uint8_t* src_argb, uint8_t* dst_y, int width);
-void ABGRToYRow_C(const uint8_t* src_argb, uint8_t* dst_y, int width);
-void RGBAToYRow_C(const uint8_t* src_argb, uint8_t* dst_y, int width);
-void RGB24ToYRow_C(const uint8_t* src_argb, uint8_t* dst_y, int width);
-void RGB24ToYJRow_C(const uint8_t* src_argb, uint8_t* dst_yj, int width);
-void RAWToYRow_C(const uint8_t* src_argb, uint8_t* dst_y, int width);
-void RAWToYJRow_C(const uint8_t* src_argb, uint8_t* dst_yj, int width);
+void ARGBToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width);
+void ARGBToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width);
+void RGBAToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width);
+void BGRAToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width);
+void ABGRToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width);
+void RGBAToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width);
+void RGB24ToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width);
+void RGB24ToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width);
+void RAWToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width);
+void RAWToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width);
 void RGB565ToYRow_C(const uint8_t* src_rgb565, uint8_t* dst_y, int width);
 void ARGB1555ToYRow_C(const uint8_t* src_argb1555, uint8_t* dst_y, int width);
 void ARGB4444ToYRow_C(const uint8_t* src_argb4444, uint8_t* dst_y, int width);
@ -1373,42 +1373,42 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba,
                       uint8_t* dst_v,
                       int width);
 void ARGBToUVRow_Any_AVX2(const uint8_t* src_ptr,
-                          int src_stride_ptr,
+                          int src_stride,
                          uint8_t* dst_u,
                          uint8_t* dst_v,
                          int width);
 void ABGRToUVRow_Any_AVX2(const uint8_t* src_ptr,
-                          int src_stride_ptr,
+                          int src_stride,
                          uint8_t* dst_u,
                          uint8_t* dst_v,
                          int width);
 void ARGBToUVJRow_Any_AVX2(const uint8_t* src_ptr,
-                           int src_stride_ptr,
+                           int src_stride,
                           uint8_t* dst_u,
                           uint8_t* dst_v,
                           int width);
 void ARGBToUVRow_Any_SSSE3(const uint8_t* src_ptr,
-                           int src_stride_ptr,
+                           int src_stride,
                           uint8_t* dst_u,
                           uint8_t* dst_v,
                           int width);
 void ARGBToUVJRow_Any_SSSE3(const uint8_t* src_ptr,
-                            int src_stride_ptr,
+                            int src_stride,
                            uint8_t* dst_u,
                            uint8_t* dst_v,
                            int width);
 void BGRAToUVRow_Any_SSSE3(const uint8_t* src_ptr,
-                           int src_stride_ptr,
+                           int src_stride,
                           uint8_t* dst_u,
                           uint8_t* dst_v,
                           int width);
 void ABGRToUVRow_Any_SSSE3(const uint8_t* src_ptr,
-                           int src_stride_ptr,
+                           int src_stride,
                           uint8_t* dst_u,
                           uint8_t* dst_v,
                           int width);
 void RGBAToUVRow_Any_SSSE3(const uint8_t* src_ptr,
-                           int src_stride_ptr,
+                           int src_stride,
                           uint8_t* dst_u,
                           uint8_t* dst_v,
                           int width);
@ -1417,7 +1417,7 @@ void ARGBToUV444Row_Any_NEON(const uint8_t* src_ptr,
                             uint8_t* dst_v,
                             int width);
 void ARGBToUVRow_Any_NEON(const uint8_t* src_ptr,
-                          int src_stride_ptr,
+                          int src_stride,
                          uint8_t* dst_u,
                          uint8_t* dst_v,
                          int width);
@ -1440,47 +1440,47 @@ void ARGBToUVRow_Any_MMI(const uint8_t* src_ptr,
                         uint8_t* dst_v,
                         int width);
 void ARGBToUVJRow_Any_NEON(const uint8_t* src_ptr,
-                           int src_stride_ptr,
+                           int src_stride,
                           uint8_t* dst_u,
                           uint8_t* dst_v,
                           int width);
 void BGRAToUVRow_Any_NEON(const uint8_t* src_ptr,
-                          int src_stride_ptr,
+                          int src_stride,
                          uint8_t* dst_u,
                          uint8_t* dst_v,
                          int width);
 void ABGRToUVRow_Any_NEON(const uint8_t* src_ptr,
-                          int src_stride_ptr,
+                          int src_stride,
                          uint8_t* dst_u,
                          uint8_t* dst_v,
                          int width);
 void RGBAToUVRow_Any_NEON(const uint8_t* src_ptr,
-                          int src_stride_ptr,
+                          int src_stride,
                          uint8_t* dst_u,
                          uint8_t* dst_v,
                          int width);
 void RGB24ToUVRow_Any_NEON(const uint8_t* src_ptr,
-                           int src_stride_ptr,
+                           int src_stride,
                           uint8_t* dst_u,
                           uint8_t* dst_v,
                           int width);
 void RAWToUVRow_Any_NEON(const uint8_t* src_ptr,
-                         int src_stride_ptr,
+                         int src_stride,
                         uint8_t* dst_u,
                         uint8_t* dst_v,
                         int width);
 void RGB565ToUVRow_Any_NEON(const uint8_t* src_ptr,
-                            int src_stride_ptr,
+                            int src_stride,
                            uint8_t* dst_u,
                            uint8_t* dst_v,
                            int width);
 void ARGB1555ToUVRow_Any_NEON(const uint8_t* src_ptr,
-                              int src_stride_ptr,
+                              int src_stride,
                              uint8_t* dst_u,
                              uint8_t* dst_v,
                              int width);
 void ARGB4444ToUVRow_Any_NEON(const uint8_t* src_ptr,
-                              int src_stride_ptr,
+                              int src_stride,
                              uint8_t* dst_u,
                              uint8_t* dst_v,
                              int width);
@ -1689,7 +1689,7 @@ void MirrorSplitUVRow_C(const uint8_t* src_uv,

 void ARGBMirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width);
 void ARGBMirrorRow_SSE2(const uint8_t* src, uint8_t* dst, int width);
-void ARGBMirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width);
+void ARGBMirrorRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width);
 void ARGBMirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width);
 void ARGBMirrorRow_MMI(const uint8_t* src, uint8_t* dst, int width);
 void ARGBMirrorRow_C(const uint8_t* src, uint8_t* dst, int width);
@ -1705,9 +1705,13 @@ void ARGBMirrorRow_Any_NEON(const uint8_t* src_ptr,
 void ARGBMirrorRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
 void ARGBMirrorRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);

-void RGB24MirrorRow_SSSE3(const uint8_t* src, uint8_t* dst, int width);
-void RGB24MirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width);
-void RGB24MirrorRow_C(const uint8_t* src, uint8_t* dst, int width);
+void RGB24MirrorRow_SSSE3(const uint8_t* src_rgb24,
+                          uint8_t* dst_rgb24,
+                          int width);
+void RGB24MirrorRow_NEON(const uint8_t* src_rgb24,
+                         uint8_t* dst_rgb24,
+                         int width);
+void RGB24MirrorRow_C(const uint8_t* src_rgb24, uint8_t* dst_rgb24, int width);
 void RGB24MirrorRow_Any_SSSE3(const uint8_t* src_ptr,
                              uint8_t* dst_ptr,
                              int width);
@ -1928,23 +1932,23 @@ void MergeARGBRow_NEON(const uint8_t* src_r,
                       const uint8_t* src_a,
                       uint8_t* dst_argb,
                       int width);
-void MergeARGBRow_Any_SSE2(const uint8_t* src_r,
-                           const uint8_t* src_g,
-                           const uint8_t* src_b,
-                           const uint8_t* src_a,
-                           uint8_t* dst_argb,
+void MergeARGBRow_Any_SSE2(const uint8_t* y_buf,
+                           const uint8_t* u_buf,
+                           const uint8_t* v_buf,
+                           const uint8_t* a_buf,
+                           uint8_t* dst_ptr,
                           int width);
-void MergeARGBRow_Any_AVX2(const uint8_t* src_r,
-                           const uint8_t* src_g,
-                           const uint8_t* src_b,
-                           const uint8_t* src_a,
-                           uint8_t* dst_argb,
+void MergeARGBRow_Any_AVX2(const uint8_t* y_buf,
+                           const uint8_t* u_buf,
+                           const uint8_t* v_buf,
+                           const uint8_t* a_buf,
+                           uint8_t* dst_ptr,
                           int width);
-void MergeARGBRow_Any_NEON(const uint8_t* src_r,
-                           const uint8_t* src_g,
-                           const uint8_t* src_b,
-                           const uint8_t* src_a,
-                           uint8_t* dst_argb,
+void MergeARGBRow_Any_NEON(const uint8_t* y_buf,
+                           const uint8_t* u_buf,
+                           const uint8_t* v_buf,
+                           const uint8_t* a_buf,
+                           uint8_t* dst_ptr,
                           int width);
 void SplitARGBRow_C(const uint8_t* src_argb,
                    uint8_t* dst_r,
@ -1970,31 +1974,31 @@ void SplitARGBRow_AVX2(const uint8_t* src_argb,
                       uint8_t* dst_b,
                       uint8_t* dst_a,
                       int width);
-void SplitARGBRow_NEON(const uint8_t* src_argb,
+void SplitARGBRow_NEON(const uint8_t* src_rgba,
                       uint8_t* dst_r,
                       uint8_t* dst_g,
                       uint8_t* dst_b,
                       uint8_t* dst_a,
                       int width);
-void SplitARGBRow_Any_SSE2(const uint8_t* src_argb,
+void SplitARGBRow_Any_SSE2(const uint8_t* src_ptr,
                           uint8_t* dst_r,
                           uint8_t* dst_g,
                           uint8_t* dst_b,
                           uint8_t* dst_a,
                           int width);
-void SplitARGBRow_Any_SSSE3(const uint8_t* src_argb,
+void SplitARGBRow_Any_SSSE3(const uint8_t* src_ptr,
                            uint8_t* dst_r,
                            uint8_t* dst_g,
                            uint8_t* dst_b,
                            uint8_t* dst_a,
                            int width);
-void SplitARGBRow_Any_AVX2(const uint8_t* src_argb,
+void SplitARGBRow_Any_AVX2(const uint8_t* src_ptr,
                           uint8_t* dst_r,
                           uint8_t* dst_g,
                           uint8_t* dst_b,
                           uint8_t* dst_a,
                           int width);
-void SplitARGBRow_Any_NEON(const uint8_t* src_argb,
+void SplitARGBRow_Any_NEON(const uint8_t* src_ptr,
                           uint8_t* dst_r,
                           uint8_t* dst_g,
                           uint8_t* dst_b,
@ -2020,20 +2024,20 @@ void MergeXRGBRow_NEON(const uint8_t* src_r,
                       const uint8_t* src_b,
                       uint8_t* dst_argb,
                       int width);
-void MergeXRGBRow_Any_SSE2(const uint8_t* src_r,
-                           const uint8_t* src_g,
-                           const uint8_t* src_b,
-                           uint8_t* dst_argb,
+void MergeXRGBRow_Any_SSE2(const uint8_t* y_buf,
+                           const uint8_t* u_buf,
+                           const uint8_t* v_buf,
+                           uint8_t* dst_ptr,
                           int width);
-void MergeXRGBRow_Any_AVX2(const uint8_t* src_r,
-                           const uint8_t* src_g,
-                           const uint8_t* src_b,
-                           uint8_t* dst_argb,
+void MergeXRGBRow_Any_AVX2(const uint8_t* y_buf,
+                           const uint8_t* u_buf,
+                           const uint8_t* v_buf,
+                           uint8_t* dst_ptr,
                           int width);
-void MergeXRGBRow_Any_NEON(const uint8_t* src_r,
-                           const uint8_t* src_g,
-                           const uint8_t* src_b,
-                           uint8_t* dst_argb,
+void MergeXRGBRow_Any_NEON(const uint8_t* y_buf,
+                           const uint8_t* u_buf,
+                           const uint8_t* v_buf,
+                           uint8_t* dst_ptr,
                           int width);
 void SplitXRGBRow_C(const uint8_t* src_argb,
                    uint8_t* dst_r,
@ -2055,27 +2059,27 @@ void SplitXRGBRow_AVX2(const uint8_t* src_argb,
                       uint8_t* dst_g,
                       uint8_t* dst_b,
                       int width);
-void SplitXRGBRow_NEON(const uint8_t* src_argb,
+void SplitXRGBRow_NEON(const uint8_t* src_rgba,
                       uint8_t* dst_r,
                       uint8_t* dst_g,
                       uint8_t* dst_b,
                       int width);
-void SplitXRGBRow_Any_SSE2(const uint8_t* src_argb,
+void SplitXRGBRow_Any_SSE2(const uint8_t* src_ptr,
                           uint8_t* dst_r,
                           uint8_t* dst_g,
                           uint8_t* dst_b,
                           int width);
-void SplitXRGBRow_Any_SSSE3(const uint8_t* src_argb,
+void SplitXRGBRow_Any_SSSE3(const uint8_t* src_ptr,
                            uint8_t* dst_r,
                            uint8_t* dst_g,
                            uint8_t* dst_b,
                            int width);
-void SplitXRGBRow_Any_AVX2(const uint8_t* src_argb,
+void SplitXRGBRow_Any_AVX2(const uint8_t* src_ptr,
                           uint8_t* dst_r,
                           uint8_t* dst_g,
                           uint8_t* dst_b,
                           int width);
-void SplitXRGBRow_Any_NEON(const uint8_t* src_argb,
+void SplitXRGBRow_Any_NEON(const uint8_t* src_ptr,
                           uint8_t* dst_r,
                           uint8_t* dst_g,
                           uint8_t* dst_b,
@ -2183,74 +2187,74 @@ void MergeXRGB16To8Row_NEON(const uint16_t* src_r,
                            uint8_t* dst_argb,
                            int depth,
                            int width);
-void MergeXR30Row_Any_AVX2(const uint16_t* src_r,
-                           const uint16_t* src_g,
-                           const uint16_t* src_b,
-                           uint8_t* dst_ar30,
+void MergeXR30Row_Any_AVX2(const uint16_t* r_buf,
+                           const uint16_t* g_buf,
+                           const uint16_t* b_buf,
+                           uint8_t* dst_ptr,
                           int depth,
                           int width);
-void MergeAR64Row_Any_AVX2(const uint16_t* src_r,
-                           const uint16_t* src_g,
-                           const uint16_t* src_b,
-                           const uint16_t* src_a,
-                           uint16_t* dst_ar64,
+void MergeAR64Row_Any_AVX2(const uint16_t* r_buf,
+                           const uint16_t* g_buf,
+                           const uint16_t* b_buf,
+                           const uint16_t* a_buf,
+                           uint16_t* dst_ptr,
                           int depth,
                           int width);
-void MergeXR64Row_Any_AVX2(const uint16_t* src_r,
-                           const uint16_t* src_g,
-                           const uint16_t* src_b,
-                           uint16_t* dst_ar64,
+void MergeXR64Row_Any_AVX2(const uint16_t* r_buf,
+                           const uint16_t* g_buf,
+                           const uint16_t* b_buf,
+                           uint16_t* dst_ptr,
                           int depth,
                           int width);
-void MergeARGB16To8Row_Any_AVX2(const uint16_t* src_r,
-                                const uint16_t* src_g,
-                                const uint16_t* src_b,
-                                const uint16_t* src_a,
-                                uint8_t* dst_argb,
+void MergeARGB16To8Row_Any_AVX2(const uint16_t* r_buf,
+                                const uint16_t* g_buf,
+                                const uint16_t* b_buf,
+                                const uint16_t* a_buf,
+                                uint8_t* dst_ptr,
                                int depth,
                                int width);
-void MergeXRGB16To8Row_Any_AVX2(const uint16_t* src_r,
-                                const uint16_t* src_g,
-                                const uint16_t* src_b,
-                                uint8_t* dst_argb,
+void MergeXRGB16To8Row_Any_AVX2(const uint16_t* r_buf,
+                                const uint16_t* g_buf,
+                                const uint16_t* b_buf,
+                                uint8_t* dst_ptr,
                                int depth,
                                int width);
-void MergeXR30Row_Any_NEON(const uint16_t* src_r,
-                           const uint16_t* src_g,
-                           const uint16_t* src_b,
-                           uint8_t* dst_ar30,
+void MergeXR30Row_Any_NEON(const uint16_t* r_buf,
+                           const uint16_t* g_buf,
+                           const uint16_t* b_buf,
+                           uint8_t* dst_ptr,
                           int depth,
                           int width);
-void MergeXR30Row_10_Any_NEON(const uint16_t* src_r,
-                              const uint16_t* src_g,
-                              const uint16_t* src_b,
-                              uint8_t* dst_ar30,
+void MergeXR30Row_10_Any_NEON(const uint16_t* r_buf,
+                              const uint16_t* g_buf,
+                              const uint16_t* b_buf,
+                              uint8_t* dst_ptr,
                              int depth,
                              int width);
-void MergeAR64Row_Any_NEON(const uint16_t* src_r,
-                           const uint16_t* src_g,
-                           const uint16_t* src_b,
-                           const uint16_t* src_a,
-                           uint16_t* dst_ar64,
+void MergeAR64Row_Any_NEON(const uint16_t* r_buf,
+                           const uint16_t* g_buf,
+                           const uint16_t* b_buf,
+                           const uint16_t* a_buf,
+                           uint16_t* dst_ptr,
                           int depth,
                           int width);
-void MergeARGB16To8Row_Any_NEON(const uint16_t* src_r,
-                                const uint16_t* src_g,
-                                const uint16_t* src_b,
-                                const uint16_t* src_a,
-                                uint8_t* dst_argb,
+void MergeARGB16To8Row_Any_NEON(const uint16_t* r_buf,
+                                const uint16_t* g_buf,
+                                const uint16_t* b_buf,
+                                const uint16_t* a_buf,
+                                uint8_t* dst_ptr,
                                int depth,
                                int width);
-void MergeXR64Row_Any_NEON(const uint16_t* src_r,
-                           const uint16_t* src_g,
-                           const uint16_t* src_b,
-                           uint16_t* dst_ar64,
+void MergeXR64Row_Any_NEON(const uint16_t* r_buf,
+                           const uint16_t* g_buf,
+                           const uint16_t* b_buf,
+                           uint16_t* dst_ptr,
                           int depth,
                           int width);
-void MergeXRGB16To8Row_Any_NEON(const uint16_t* src_r,
-                                const uint16_t* src_g,
-                                const uint16_t* src_b,
-                                uint8_t* dst_argb,
+void MergeXRGB16To8Row_Any_NEON(const uint16_t* r_buf,
+                                const uint16_t* g_buf,
+                                const uint16_t* b_buf,
+                                uint8_t* dst_ptr,
                                int depth,
                                int width);

@ -2314,16 +2318,16 @@ void MultiplyRow_16_AVX2(const uint16_t* src_y,
                         uint16_t* dst_y,
                         int scale,
                         int width);
-void MultiplyRow_16_Any_AVX2(const uint16_t* src_y,
-                             uint16_t* dst_y,
+void MultiplyRow_16_Any_AVX2(const uint16_t* src_ptr,
+                             uint16_t* dst_ptr,
                             int scale,
                             int width);
 void MultiplyRow_16_NEON(const uint16_t* src_y,
                         uint16_t* dst_y,
                         int scale,
                         int width);
-void MultiplyRow_16_Any_NEON(const uint16_t* src_y,
-                             uint16_t* dst_y,
+void MultiplyRow_16_Any_NEON(const uint16_t* src_ptr,
+                             uint16_t* dst_ptr,
                             int scale,
                             int width);

@ -2335,16 +2339,16 @@ void DivideRow_16_AVX2(const uint16_t* src_y,
                       uint16_t* dst_y,
                       int scale,
                       int width);
-void DivideRow_16_Any_AVX2(const uint16_t* src_y,
-                           uint16_t* dst_y,
+void DivideRow_16_Any_AVX2(const uint16_t* src_ptr,
+                           uint16_t* dst_ptr,
                           int scale,
                           int width);
 void DivideRow_16_NEON(const uint16_t* src_y,
                       uint16_t* dst_y,
                       int scale,
                       int width);
-void DivideRow_16_Any_NEON(const uint16_t* src_y,
-                           uint16_t* dst_y,
+void DivideRow_16_Any_NEON(const uint16_t* src_ptr,
+                           uint16_t* dst_ptr,
                           int scale,
                           int width);

@ -3719,15 +3723,15 @@ void I400ToARGBRow_MMI(const uint8_t* src_y,
                       int width);
 void I400ToARGBRow_Any_SSE2(const uint8_t* src_ptr,
                            uint8_t* dst_ptr,
-                            const struct YuvConstants* yuvconstants,
+                            const struct YuvConstants* param,
                            int width);
 void I400ToARGBRow_Any_AVX2(const uint8_t* src_ptr,
                            uint8_t* dst_ptr,
-                            const struct YuvConstants* yuvconstants,
+                            const struct YuvConstants* param,
                            int width);
 void I400ToARGBRow_Any_NEON(const uint8_t* src_ptr,
                            uint8_t* dst_ptr,
-                            const struct YuvConstants* yuvconstants,
+                            const struct YuvConstants* param,
                            int width);
 void I400ToARGBRow_Any_MSA(const uint8_t* src_ptr,
                           uint8_t* dst_ptr,
@ -3739,11 +3743,11 @@ void I400ToARGBRow_Any_MMI(const uint8_t* src_ptr,
                           int width);

 // ARGB preattenuated alpha blend.
-void ARGBBlendRow_SSSE3(const uint8_t* src_argb0,
+void ARGBBlendRow_SSSE3(const uint8_t* src_argb,
                        const uint8_t* src_argb1,
                        uint8_t* dst_argb,
                        int width);
-void ARGBBlendRow_NEON(const uint8_t* src_argb0,
+void ARGBBlendRow_NEON(const uint8_t* src_argb,
                       const uint8_t* src_argb1,
                       uint8_t* dst_argb,
                       int width);
@ -3755,7 +3759,7 @@ void ARGBBlendRow_MMI(const uint8_t* src_argb0,
                      const uint8_t* src_argb1,
                      uint8_t* dst_argb,
                      int width);
-void ARGBBlendRow_C(const uint8_t* src_argb0,
+void ARGBBlendRow_C(const uint8_t* src_argb,
                    const uint8_t* src_argb1,
                    uint8_t* dst_argb,
                    int width);
@ -3799,11 +3803,11 @@ void BlendPlaneRow_C(const uint8_t* src0,

 // ARGB multiply images. Same API as Blend, but these require
 // pointer and width alignment for SSE2.
-void ARGBMultiplyRow_C(const uint8_t* src_argb0,
+void ARGBMultiplyRow_C(const uint8_t* src_argb,
                       const uint8_t* src_argb1,
                       uint8_t* dst_argb,
                       int width);
-void ARGBMultiplyRow_SSE2(const uint8_t* src_argb0,
+void ARGBMultiplyRow_SSE2(const uint8_t* src_argb,
                          const uint8_t* src_argb1,
                          uint8_t* dst_argb,
                          int width);
@ -3811,7 +3815,7 @@ void ARGBMultiplyRow_Any_SSE2(const uint8_t* y_buf,
                              const uint8_t* uv_buf,
                              uint8_t* dst_ptr,
                              int width);
-void ARGBMultiplyRow_AVX2(const uint8_t* src_argb0,
+void ARGBMultiplyRow_AVX2(const uint8_t* src_argb,
                          const uint8_t* src_argb1,
                          uint8_t* dst_argb,
                          int width);
@ -3819,7 +3823,7 @@ void ARGBMultiplyRow_Any_AVX2(const uint8_t* y_buf,
                              const uint8_t* uv_buf,
                              uint8_t* dst_ptr,
                              int width);
-void ARGBMultiplyRow_NEON(const uint8_t* src_argb0,
+void ARGBMultiplyRow_NEON(const uint8_t* src_argb,
                          const uint8_t* src_argb1,
                          uint8_t* dst_argb,
                          int width);
@ -3845,11 +3849,11 @@ void ARGBMultiplyRow_Any_MMI(const uint8_t* y_buf,
                             int width);

 // ARGB add images.
-void ARGBAddRow_C(const uint8_t* src_argb0,
+void ARGBAddRow_C(const uint8_t* src_argb,
                  const uint8_t* src_argb1,
                  uint8_t* dst_argb,
                  int width);
-void ARGBAddRow_SSE2(const uint8_t* src_argb0,
+void ARGBAddRow_SSE2(const uint8_t* src_argb,
                     const uint8_t* src_argb1,
                     uint8_t* dst_argb,
                     int width);
@ -3857,7 +3861,7 @@ void ARGBAddRow_Any_SSE2(const uint8_t* y_buf,
                         const uint8_t* uv_buf,
                         uint8_t* dst_ptr,
                         int width);
-void ARGBAddRow_AVX2(const uint8_t* src_argb0,
+void ARGBAddRow_AVX2(const uint8_t* src_argb,
                     const uint8_t* src_argb1,
                     uint8_t* dst_argb,
                     int width);
@ -3865,7 +3869,7 @@ void ARGBAddRow_Any_AVX2(const uint8_t* y_buf,
                         const uint8_t* uv_buf,
                         uint8_t* dst_ptr,
                         int width);
-void ARGBAddRow_NEON(const uint8_t* src_argb0,
+void ARGBAddRow_NEON(const uint8_t* src_argb,
                     const uint8_t* src_argb1,
                     uint8_t* dst_argb,
                     int width);
@ -3892,11 +3896,11 @@ void ARGBAddRow_Any_MMI(const uint8_t* y_buf,

 // ARGB subtract images. Same API as Blend, but these require
 // pointer and width alignment for SSE2.
-void ARGBSubtractRow_C(const uint8_t* src_argb0,
+void ARGBSubtractRow_C(const uint8_t* src_argb,
                       const uint8_t* src_argb1,
                       uint8_t* dst_argb,
                       int width);
-void ARGBSubtractRow_SSE2(const uint8_t* src_argb0,
+void ARGBSubtractRow_SSE2(const uint8_t* src_argb,
                          const uint8_t* src_argb1,
                          uint8_t* dst_argb,
                          int width);
@ -3904,7 +3908,7 @@ void ARGBSubtractRow_Any_SSE2(const uint8_t* y_buf,
                              const uint8_t* uv_buf,
                              uint8_t* dst_ptr,
                              int width);
-void ARGBSubtractRow_AVX2(const uint8_t* src_argb0,
+void ARGBSubtractRow_AVX2(const uint8_t* src_argb,
                          const uint8_t* src_argb1,
                          uint8_t* dst_argb,
                          int width);
@ -3912,7 +3916,7 @@ void ARGBSubtractRow_Any_AVX2(const uint8_t* y_buf,
                              const uint8_t* uv_buf,
                              uint8_t* dst_ptr,
                              int width);
-void ARGBSubtractRow_NEON(const uint8_t* src_argb0,
+void ARGBSubtractRow_NEON(const uint8_t* src_argb,
                          const uint8_t* src_argb1,
                          uint8_t* dst_argb,
                          int width);
@ -4119,9 +4123,9 @@ void NV21ToRGB24Row_Any_NEON(const uint8_t* y_buf,
                             uint8_t* dst_ptr,
                             const struct YuvConstants* yuvconstants,
                             int width);
-void NV21ToYUV24Row_Any_NEON(const uint8_t* src_y,
-                             const uint8_t* src_vu,
-                             uint8_t* dst_yuv24,
+void NV21ToYUV24Row_Any_NEON(const uint8_t* y_buf,
+                             const uint8_t* uv_buf,
+                             uint8_t* dst_ptr,
                             int width);
 void NV12ToRGB565Row_Any_NEON(const uint8_t* y_buf,
                              const uint8_t* uv_buf,
@ -4323,7 +4327,7 @@ void YUY2ToUV422Row_C(const uint8_t* src_yuy2,
                      int width);
 void YUY2ToYRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
 void YUY2ToUVRow_Any_AVX2(const uint8_t* src_ptr,
-                          int src_stride_ptr,
+                          int src_stride,
                          uint8_t* dst_u,
                          uint8_t* dst_v,
                          int width);
@ -4333,7 +4337,7 @@ void YUY2ToUV422Row_Any_AVX2(const uint8_t* src_ptr,
                             int width);
 void YUY2ToYRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
 void YUY2ToUVRow_Any_SSE2(const uint8_t* src_ptr,
-                          int src_stride_ptr,
+                          int src_stride,
                          uint8_t* dst_u,
                          uint8_t* dst_v,
                          int width);
@ -4343,7 +4347,7 @@ void YUY2ToUV422Row_Any_SSE2(const uint8_t* src_ptr,
                             int width);
 void YUY2ToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
 void YUY2ToUVRow_Any_NEON(const uint8_t* src_ptr,
-                          int src_stride_ptr,
+                          int src_stride,
                          uint8_t* dst_u,
                          uint8_t* dst_v,
                          int width);
@ -4444,7 +4448,7 @@ void UYVYToUV422Row_C(const uint8_t* src_uyvy,
                      int width);
 void UYVYToYRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
 void UYVYToUVRow_Any_AVX2(const uint8_t* src_ptr,
-                          int src_stride_ptr,
+                          int src_stride,
                          uint8_t* dst_u,
                          uint8_t* dst_v,
                          int width);
@ -4454,7 +4458,7 @@ void UYVYToUV422Row_Any_AVX2(const uint8_t* src_ptr,
                             int width);
 void UYVYToYRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
 void UYVYToUVRow_Any_SSE2(const uint8_t* src_ptr,
-                          int src_stride_ptr,
+                          int src_stride,
                          uint8_t* dst_u,
                          uint8_t* dst_v,
                          int width);
@ -4464,7 +4468,7 @@ void UYVYToUV422Row_Any_SSE2(const uint8_t* src_ptr,
                             int width);
 void UYVYToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
 void UYVYToUVRow_Any_NEON(const uint8_t* src_ptr,
-                          int src_stride_ptr,
+                          int src_stride,
                          uint8_t* dst_u,
                          uint8_t* dst_v,
                          int width);
@ -4501,29 +4505,29 @@ void SwapUVRow_AVX2(const uint8_t* src_uv, uint8_t* dst_vu, int width);
 void SwapUVRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
 void AYUVToYRow_C(const uint8_t* src_ayuv, uint8_t* dst_y, int width);
 void AYUVToUVRow_C(const uint8_t* src_ayuv,
-                   int stride_ayuv,
+                   int src_stride_ayuv,
                   uint8_t* dst_uv,
                   int width);
 void AYUVToVURow_C(const uint8_t* src_ayuv,
-                   int stride_ayuv,
+                   int src_stride_ayuv,
                   uint8_t* dst_vu,
                   int width);
 void AYUVToYRow_NEON(const uint8_t* src_ayuv, uint8_t* dst_y, int width);
 void AYUVToUVRow_NEON(const uint8_t* src_ayuv,
-                      int stride_ayuv,
+                      int src_stride_ayuv,
                      uint8_t* dst_uv,
                      int width);
 void AYUVToVURow_NEON(const uint8_t* src_ayuv,
-                      int stride_ayuv,
+                      int src_stride_ayuv,
                      uint8_t* dst_vu,
                      int width);
-void AYUVToYRow_Any_NEON(const uint8_t* src_ayuv, uint8_t* dst_y, int width);
-void AYUVToUVRow_Any_NEON(const uint8_t* src_ayuv,
-                          int stride_ayuv,
-                          uint8_t* dst_uv,
+void AYUVToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void AYUVToUVRow_Any_NEON(const uint8_t* src_ptr,
+                          int src_stride,
+                          uint8_t* dst_vu,
                          int width);
-void AYUVToVURow_Any_NEON(const uint8_t* src_ayuv,
-                          int stride_ayuv,
+void AYUVToVURow_Any_NEON(const uint8_t* src_ptr,
+                          int src_stride,
                          uint8_t* dst_vu,
                          int width);

--- a/include/libyuv/version.h
+++ b/include/libyuv/version.h
@ -11,6 +11,6 @@
 #ifndef INCLUDE_LIBYUV_VERSION_H_
 #define INCLUDE_LIBYUV_VERSION_H_

-#define LIBYUV_VERSION 1784
+#define LIBYUV_VERSION 1785

 #endif  // INCLUDE_LIBYUV_VERSION_H_
--- a/source/compare_common.cc
+++ b/source/compare_common.cc
@ -17,36 +17,6 @@ namespace libyuv {
 extern "C" {
 #endif

-#if ORIGINAL_OPT
-uint32_t HammingDistance_C1(const uint8_t* src_a,
-                            const uint8_t* src_b,
-                            int count) {
-  uint32_t diff = 0u;
-
-  int i;
-  for (i = 0; i < count; ++i) {
-    int x = src_a[i] ^ src_b[i];
-    if (x & 1)
-      ++diff;
-    if (x & 2)
-      ++diff;
-    if (x & 4)
-      ++diff;
-    if (x & 8)
-      ++diff;
-    if (x & 16)
-      ++diff;
-    if (x & 32)
-      ++diff;
-    if (x & 64)
-      ++diff;
-    if (x & 128)
-      ++diff;
-  }
-  return diff;
-}
-#endif
-
 // Hakmem method for hamming distance.
 uint32_t HammingDistance_C(const uint8_t* src_a,
                           const uint8_t* src_b,
--- a/source/row_any.cc
+++ b/source/row_any.cc
@ -212,11 +212,23 @@ ANY41PT(MergeAR64Row_Any_NEON, MergeAR64Row_NEON, uint16_t, 2, uint16_t, 4, 7)
 #endif

 #ifdef HAS_MERGEARGB16TO8ROW_AVX2
-ANY41PT(MergeARGB16To8Row_Any_AVX2, MergeARGB16To8Row_AVX2, uint16_t, 2, uint8_t, 4, 15)
+ANY41PT(MergeARGB16To8Row_Any_AVX2,
+        MergeARGB16To8Row_AVX2,
+        uint16_t,
+        2,
+        uint8_t,
+        4,
+        15)
 #endif

 #ifdef HAS_MERGEARGB16TO8ROW_NEON
-ANY41PT(MergeARGB16To8Row_Any_NEON, MergeARGB16To8Row_NEON, uint16_t, 2, uint8_t, 4, 7)
+ANY41PT(MergeARGB16To8Row_Any_NEON,
+        MergeARGB16To8Row_NEON,
+        uint16_t,
+        2,
+        uint8_t,
+        4,
+        7)
 #endif

 #undef ANY41PT
@ -487,7 +499,13 @@ ANY31PT(MergeXR30Row_Any_AVX2, MergeXR30Row_AVX2, uint16_t, 2, uint8_t, 4, 15)

 #ifdef HAS_MERGEXR30ROW_NEON
 ANY31PT(MergeXR30Row_Any_NEON, MergeXR30Row_NEON, uint16_t, 2, uint8_t, 4, 3)
-ANY31PT(MergeXR30Row_10_Any_NEON, MergeXR30Row_10_NEON, uint16_t, 2, uint8_t, 4, 3)
+ANY31PT(MergeXR30Row_10_Any_NEON,
+        MergeXR30Row_10_NEON,
+        uint16_t,
+        2,
+        uint8_t,
+        4,
+        3)
 #endif

 #ifdef HAS_MERGEXR64ROW_AVX2
@ -499,11 +517,23 @@ ANY31PT(MergeXR64Row_Any_NEON, MergeXR64Row_NEON, uint16_t, 2, uint16_t, 4, 7)
 #endif

 #ifdef HAS_MERGEXRGB16TO8ROW_AVX2
-ANY31PT(MergeXRGB16To8Row_Any_AVX2, MergeXRGB16To8Row_AVX2, uint16_t, 2, uint8_t, 4, 15)
+ANY31PT(MergeXRGB16To8Row_Any_AVX2,
+        MergeXRGB16To8Row_AVX2,
+        uint16_t,
+        2,
+        uint8_t,
+        4,
+        15)
 #endif

 #ifdef HAS_MERGEXRGB16TO8ROW_NEON
-ANY31PT(MergeXRGB16To8Row_Any_NEON, MergeXRGB16To8Row_NEON, uint16_t, 2, uint8_t, 4, 7)
+ANY31PT(MergeXRGB16To8Row_Any_NEON,
+        MergeXRGB16To8Row_NEON,
+        uint16_t,
+        2,
+        uint8_t,
+        4,
+        7)
 #endif

 #undef ANY31PT
@ -1553,20 +1583,20 @@ ANY11C(UYVYToARGBRow_Any_MMI, UYVYToARGBRow_MMI, 1, 4, 4, 7)
 #undef ANY11C

 // Any 1 to 1 interpolate.  Takes 2 rows of source via stride.
-#define ANY11I(NAMEANY, ANY_SIMD, SBPP, BPP, MASK)                           \
-  void NAMEANY(uint8_t* dst_ptr, const uint8_t* src_ptr,                     \
-               ptrdiff_t src_stride_ptr, int width, int source_y_fraction) { \
-    SIMD_ALIGNED(uint8_t temp[64 * 3]);                                      \
-    memset(temp, 0, 64 * 2); /* for msan */                                  \
-    int r = width & MASK;                                                    \
-    int n = width & ~MASK;                                                   \
-    if (n > 0) {                                                             \
-      ANY_SIMD(dst_ptr, src_ptr, src_stride_ptr, n, source_y_fraction);      \
-    }                                                                        \
-    memcpy(temp, src_ptr + n * SBPP, r * SBPP);                              \
-    memcpy(temp + 64, src_ptr + src_stride_ptr + n * SBPP, r * SBPP);        \
-    ANY_SIMD(temp + 128, temp, 64, MASK + 1, source_y_fraction);             \
-    memcpy(dst_ptr + n * BPP, temp + 128, r * BPP);                          \
+#define ANY11I(NAMEANY, ANY_SIMD, SBPP, BPP, MASK)                             \
+  void NAMEANY(uint8_t* dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride, \
+               int width, int source_y_fraction) {                             \
+    SIMD_ALIGNED(uint8_t temp[64 * 3]);                                        \
+    memset(temp, 0, 64 * 2); /* for msan */                                    \
+    int r = width & MASK;                                                      \
+    int n = width & ~MASK;                                                     \
+    if (n > 0) {                                                               \
+      ANY_SIMD(dst_ptr, src_ptr, src_stride, n, source_y_fraction);            \
+    }                                                                          \
+    memcpy(temp, src_ptr + n * SBPP, r * SBPP);                                \
+    memcpy(temp + 64, src_ptr + src_stride + n * SBPP, r * SBPP);              \
+    ANY_SIMD(temp + 128, temp, 64, MASK + 1, source_y_fraction);               \
+    memcpy(dst_ptr + n * BPP, temp + 128, r * BPP);                            \
  }

 #ifdef HAS_INTERPOLATEROW_AVX2
@ -1844,17 +1874,17 @@ ANY14(SplitARGBRow_Any_NEON, SplitARGBRow_NEON, 4, 15)
 // Any 1 to 2 with source stride (2 rows of source).  Outputs UV planes.
 // 128 byte row allows for 32 avx ARGB pixels.
 #define ANY12S(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK)                        \
-  void NAMEANY(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u,   \
+  void NAMEANY(const uint8_t* src_ptr, int src_stride, uint8_t* dst_u,       \
               uint8_t* dst_v, int width) {                                  \
    SIMD_ALIGNED(uint8_t temp[128 * 4]);                                     \
    memset(temp, 0, 128 * 2); /* for msan */                                 \
    int r = width & MASK;                                                    \
    int n = width & ~MASK;                                                   \
    if (n > 0) {                                                             \
-      ANY_SIMD(src_ptr, src_stride_ptr, dst_u, dst_v, n);                    \
+      ANY_SIMD(src_ptr, src_stride, dst_u, dst_v, n);                        \
    }                                                                        \
    memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP);      \
-    memcpy(temp + 128, src_ptr + src_stride_ptr + (n >> UVSHIFT) * BPP,      \
+    memcpy(temp + 128, src_ptr + src_stride + (n >> UVSHIFT) * BPP,          \
           SS(r, UVSHIFT) * BPP);                                            \
    if ((width & 1) && UVSHIFT == 0) { /* repeat last pixel for subsample */ \
      memcpy(temp + SS(r, UVSHIFT) * BPP, temp + SS(r, UVSHIFT) * BPP - BPP, \
@ -2001,17 +2031,17 @@ ANY12S(UYVYToUVRow_Any_MMI, UYVYToUVRow_MMI, 1, 4, 15)
 // Any 1 to 1 with source stride (2 rows of source).  Outputs UV plane.
 // 128 byte row allows for 32 avx ARGB pixels.
 #define ANY11S(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK)                        \
-  void NAMEANY(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_vu,  \
+  void NAMEANY(const uint8_t* src_ptr, int src_stride, uint8_t* dst_vu,      \
               int width) {                                                  \
    SIMD_ALIGNED(uint8_t temp[128 * 3]);                                     \
    memset(temp, 0, 128 * 2); /* for msan */                                 \
    int r = width & MASK;                                                    \
    int n = width & ~MASK;                                                   \
    if (n > 0) {                                                             \
-      ANY_SIMD(src_ptr, src_stride_ptr, dst_vu, n);                          \
+      ANY_SIMD(src_ptr, src_stride, dst_vu, n);                              \
    }                                                                        \
    memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP);      \
-    memcpy(temp + 128, src_ptr + src_stride_ptr + (n >> UVSHIFT) * BPP,      \
+    memcpy(temp + 128, src_ptr + src_stride + (n >> UVSHIFT) * BPP,          \
           SS(r, UVSHIFT) * BPP);                                            \
    if ((width & 1) && UVSHIFT == 0) { /* repeat last pixel for subsample */ \
      memcpy(temp + SS(r, UVSHIFT) * BPP, temp + SS(r, UVSHIFT) * BPP - BPP, \
--- a/source/row_common.cc
+++ b/source/row_common.cc
@ -553,80 +553,80 @@ static __inline int RGB2xToV(uint16_t r, uint16_t g, uint16_t b) {
 // Intel version mimic SSE/AVX which does 2 pavgb
 #if LIBYUV_ARGBTOUV_PAVGB

-#define MAKEROWY(NAME, R, G, B, BPP)                                         \
-  void NAME##ToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width) { \
-    int x;                                                                   \
-    for (x = 0; x < width; ++x) {                                            \
-      dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]);           \
-      src_argb0 += BPP;                                                      \
-      dst_y += 1;                                                            \
-    }                                                                        \
-  }                                                                          \
-  void NAME##ToUVRow_C(const uint8_t* src_rgb0, int src_stride_rgb,          \
-                       uint8_t* dst_u, uint8_t* dst_v, int width) {          \
-    const uint8_t* src_rgb1 = src_rgb0 + src_stride_rgb;                     \
-    int x;                                                                   \
-    for (x = 0; x < width - 1; x += 2) {                                     \
-      uint8_t ab = AVGB(AVGB(src_rgb0[B], src_rgb1[B]),                      \
-                        AVGB(src_rgb0[B + BPP], src_rgb1[B + BPP]));         \
-      uint8_t ag = AVGB(AVGB(src_rgb0[G], src_rgb1[G]),                      \
-                        AVGB(src_rgb0[G + BPP], src_rgb1[G + BPP]));         \
-      uint8_t ar = AVGB(AVGB(src_rgb0[R], src_rgb1[R]),                      \
-                        AVGB(src_rgb0[R + BPP], src_rgb1[R + BPP]));         \
-      dst_u[0] = RGBToU(ar, ag, ab);                                         \
-      dst_v[0] = RGBToV(ar, ag, ab);                                         \
-      src_rgb0 += BPP * 2;                                                   \
-      src_rgb1 += BPP * 2;                                                   \
-      dst_u += 1;                                                            \
-      dst_v += 1;                                                            \
-    }                                                                        \
-    if (width & 1) {                                                         \
-      uint8_t ab = AVGB(src_rgb0[B], src_rgb1[B]);                           \
-      uint8_t ag = AVGB(src_rgb0[G], src_rgb1[G]);                           \
-      uint8_t ar = AVGB(src_rgb0[R], src_rgb1[R]);                           \
-      dst_u[0] = RGBToU(ar, ag, ab);                                         \
-      dst_v[0] = RGBToV(ar, ag, ab);                                         \
-    }                                                                        \
+#define MAKEROWY(NAME, R, G, B, BPP)                                       \
+  void NAME##ToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \
+    int x;                                                                 \
+    for (x = 0; x < width; ++x) {                                          \
+      dst_y[0] = RGBToY(src_rgb[R], src_rgb[G], src_rgb[B]);               \
+      src_rgb += BPP;                                                      \
+      dst_y += 1;                                                          \
+    }                                                                      \
+  }                                                                        \
+  void NAME##ToUVRow_C(const uint8_t* src_rgb, int src_stride_rgb,         \
+                       uint8_t* dst_u, uint8_t* dst_v, int width) {        \
+    const uint8_t* src_rgb1 = src_rgb + src_stride_rgb;                    \
+    int x;                                                                 \
+    for (x = 0; x < width - 1; x += 2) {                                   \
+      uint8_t ab = AVGB(AVGB(src_rgb[B], src_rgb1[B]),                     \
+                        AVGB(src_rgb[B + BPP], src_rgb1[B + BPP]));        \
+      uint8_t ag = AVGB(AVGB(src_rgb[G], src_rgb1[G]),                     \
+                        AVGB(src_rgb[G + BPP], src_rgb1[G + BPP]));        \
+      uint8_t ar = AVGB(AVGB(src_rgb[R], src_rgb1[R]),                     \
+                        AVGB(src_rgb[R + BPP], src_rgb1[R + BPP]));        \
+      dst_u[0] = RGBToU(ar, ag, ab);                                       \
+      dst_v[0] = RGBToV(ar, ag, ab);                                       \
+      src_rgb += BPP * 2;                                                  \
+      src_rgb1 += BPP * 2;                                                 \
+      dst_u += 1;                                                          \
+      dst_v += 1;                                                          \
+    }                                                                      \
+    if (width & 1) {                                                       \
+      uint8_t ab = AVGB(src_rgb[B], src_rgb1[B]);                          \
+      uint8_t ag = AVGB(src_rgb[G], src_rgb1[G]);                          \
+      uint8_t ar = AVGB(src_rgb[R], src_rgb1[R]);                          \
+      dst_u[0] = RGBToU(ar, ag, ab);                                       \
+      dst_v[0] = RGBToV(ar, ag, ab);                                       \
+    }                                                                      \
  }
 #else
 // ARM version does sum / 2 then multiply by 2x smaller coefficients
-#define MAKEROWY(NAME, R, G, B, BPP)                                         \
-  void NAME##ToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width) { \
-    int x;                                                                   \
-    for (x = 0; x < width; ++x) {                                            \
-      dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]);           \
-      src_argb0 += BPP;                                                      \
-      dst_y += 1;                                                            \
-    }                                                                        \
-  }                                                                          \
-  void NAME##ToUVRow_C(const uint8_t* src_rgb0, int src_stride_rgb,          \
-                       uint8_t* dst_u, uint8_t* dst_v, int width) {          \
-    const uint8_t* src_rgb1 = src_rgb0 + src_stride_rgb;                     \
-    int x;                                                                   \
-    for (x = 0; x < width - 1; x += 2) {                                     \
-      uint16_t ab = (src_rgb0[B] + src_rgb0[B + BPP] + src_rgb1[B] +         \
-                     src_rgb1[B + BPP] + 1) >>                               \
-                    1;                                                       \
-      uint16_t ag = (src_rgb0[G] + src_rgb0[G + BPP] + src_rgb1[G] +         \
-                     src_rgb1[G + BPP] + 1) >>                               \
-                    1;                                                       \
-      uint16_t ar = (src_rgb0[R] + src_rgb0[R + BPP] + src_rgb1[R] +         \
-                     src_rgb1[R + BPP] + 1) >>                               \
-                    1;                                                       \
-      dst_u[0] = RGB2xToU(ar, ag, ab);                                       \
-      dst_v[0] = RGB2xToV(ar, ag, ab);                                       \
-      src_rgb0 += BPP * 2;                                                   \
-      src_rgb1 += BPP * 2;                                                   \
-      dst_u += 1;                                                            \
-      dst_v += 1;                                                            \
-    }                                                                        \
-    if (width & 1) {                                                         \
-      uint16_t ab = src_rgb0[B] + src_rgb1[B];                               \
-      uint16_t ag = src_rgb0[G] + src_rgb1[G];                               \
-      uint16_t ar = src_rgb0[R] + src_rgb1[R];                               \
-      dst_u[0] = RGB2xToU(ar, ag, ab);                                       \
-      dst_v[0] = RGB2xToV(ar, ag, ab);                                       \
-    }                                                                        \
+#define MAKEROWY(NAME, R, G, B, BPP)                                       \
+  void NAME##ToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \
+    int x;                                                                 \
+    for (x = 0; x < width; ++x) {                                          \
+      dst_y[0] = RGBToY(src_rgb[R], src_rgb[G], src_rgb[B]);               \
+      src_rgb += BPP;                                                      \
+      dst_y += 1;                                                          \
+    }                                                                      \
+  }                                                                        \
+  void NAME##ToUVRow_C(const uint8_t* src_rgb, int src_stride_rgb,         \
+                       uint8_t* dst_u, uint8_t* dst_v, int width) {        \
+    const uint8_t* src_rgb1 = src_rgb + src_stride_rgb;                    \
+    int x;                                                                 \
+    for (x = 0; x < width - 1; x += 2) {                                   \
+      uint16_t ab = (src_rgb[B] + src_rgb[B + BPP] + src_rgb1[B] +         \
+                     src_rgb1[B + BPP] + 1) >>                             \
+                    1;                                                     \
+      uint16_t ag = (src_rgb[G] + src_rgb[G + BPP] + src_rgb1[G] +         \
+                     src_rgb1[G + BPP] + 1) >>                             \
+                    1;                                                     \
+      uint16_t ar = (src_rgb[R] + src_rgb[R + BPP] + src_rgb1[R] +         \
+                     src_rgb1[R + BPP] + 1) >>                             \
+                    1;                                                     \
+      dst_u[0] = RGB2xToU(ar, ag, ab);                                     \
+      dst_v[0] = RGB2xToV(ar, ag, ab);                                     \
+      src_rgb += BPP * 2;                                                  \
+      src_rgb1 += BPP * 2;                                                 \
+      dst_u += 1;                                                          \
+      dst_v += 1;                                                          \
+    }                                                                      \
+    if (width & 1) {                                                       \
+      uint16_t ab = src_rgb[B] + src_rgb1[B];                              \
+      uint16_t ag = src_rgb[G] + src_rgb1[G];                              \
+      uint16_t ar = src_rgb[R] + src_rgb1[R];                              \
+      dst_u[0] = RGB2xToU(ar, ag, ab);                                     \
+      dst_v[0] = RGB2xToV(ar, ag, ab);                                     \
+    }                                                                      \
  }
 #endif

@ -694,80 +694,80 @@ static __inline int RGB2xToVJ(uint16_t r, uint16_t g, uint16_t b) {
 // ARGBToYJ_C and ARGBToUVJ_C
 // Intel version mimic SSE/AVX which does 2 pavgb
 #if LIBYUV_ARGBTOUV_PAVGB
-#define MAKEROWYJ(NAME, R, G, B, BPP)                                         \
-  void NAME##ToYJRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width) { \
-    int x;                                                                    \
-    for (x = 0; x < width; ++x) {                                             \
-      dst_y[0] = RGBToYJ(src_argb0[R], src_argb0[G], src_argb0[B]);           \
-      src_argb0 += BPP;                                                       \
-      dst_y += 1;                                                             \
-    }                                                                         \
-  }                                                                           \
-  void NAME##ToUVJRow_C(const uint8_t* src_rgb0, int src_stride_rgb,          \
-                        uint8_t* dst_u, uint8_t* dst_v, int width) {          \
-    const uint8_t* src_rgb1 = src_rgb0 + src_stride_rgb;                      \
-    int x;                                                                    \
-    for (x = 0; x < width - 1; x += 2) {                                      \
-      uint8_t ab = AVGB(AVGB(src_rgb0[B], src_rgb1[B]),                       \
-                        AVGB(src_rgb0[B + BPP], src_rgb1[B + BPP]));          \
-      uint8_t ag = AVGB(AVGB(src_rgb0[G], src_rgb1[G]),                       \
-                        AVGB(src_rgb0[G + BPP], src_rgb1[G + BPP]));          \
-      uint8_t ar = AVGB(AVGB(src_rgb0[R], src_rgb1[R]),                       \
-                        AVGB(src_rgb0[R + BPP], src_rgb1[R + BPP]));          \
-      dst_u[0] = RGBToUJ(ar, ag, ab);                                         \
-      dst_v[0] = RGBToVJ(ar, ag, ab);                                         \
-      src_rgb0 += BPP * 2;                                                    \
-      src_rgb1 += BPP * 2;                                                    \
-      dst_u += 1;                                                             \
-      dst_v += 1;                                                             \
-    }                                                                         \
-    if (width & 1) {                                                          \
-      uint8_t ab = AVGB(src_rgb0[B], src_rgb1[B]);                            \
-      uint8_t ag = AVGB(src_rgb0[G], src_rgb1[G]);                            \
-      uint8_t ar = AVGB(src_rgb0[R], src_rgb1[R]);                            \
-      dst_u[0] = RGBToUJ(ar, ag, ab);                                         \
-      dst_v[0] = RGBToVJ(ar, ag, ab);                                         \
-    }                                                                         \
+#define MAKEROWYJ(NAME, R, G, B, BPP)                                       \
+  void NAME##ToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \
+    int x;                                                                  \
+    for (x = 0; x < width; ++x) {                                           \
+      dst_y[0] = RGBToYJ(src_rgb[R], src_rgb[G], src_rgb[B]);               \
+      src_rgb += BPP;                                                       \
+      dst_y += 1;                                                           \
+    }                                                                       \
+  }                                                                         \
+  void NAME##ToUVJRow_C(const uint8_t* src_rgb, int src_stride_rgb,         \
+                        uint8_t* dst_u, uint8_t* dst_v, int width) {        \
+    const uint8_t* src_rgb1 = src_rgb + src_stride_rgb;                     \
+    int x;                                                                  \
+    for (x = 0; x < width - 1; x += 2) {                                    \
+      uint8_t ab = AVGB(AVGB(src_rgb[B], src_rgb1[B]),                      \
+                        AVGB(src_rgb[B + BPP], src_rgb1[B + BPP]));         \
+      uint8_t ag = AVGB(AVGB(src_rgb[G], src_rgb1[G]),                      \
+                        AVGB(src_rgb[G + BPP], src_rgb1[G + BPP]));         \
+      uint8_t ar = AVGB(AVGB(src_rgb[R], src_rgb1[R]),                      \
+                        AVGB(src_rgb[R + BPP], src_rgb1[R + BPP]));         \
+      dst_u[0] = RGBToUJ(ar, ag, ab);                                       \
+      dst_v[0] = RGBToVJ(ar, ag, ab);                                       \
+      src_rgb += BPP * 2;                                                   \
+      src_rgb1 += BPP * 2;                                                  \
+      dst_u += 1;                                                           \
+      dst_v += 1;                                                           \
+    }                                                                       \
+    if (width & 1) {                                                        \
+      uint8_t ab = AVGB(src_rgb[B], src_rgb1[B]);                           \
+      uint8_t ag = AVGB(src_rgb[G], src_rgb1[G]);                           \
+      uint8_t ar = AVGB(src_rgb[R], src_rgb1[R]);                           \
+      dst_u[0] = RGBToUJ(ar, ag, ab);                                       \
+      dst_v[0] = RGBToVJ(ar, ag, ab);                                       \
+    }                                                                       \
  }
 #else
 // ARM version does sum / 2 then multiply by 2x smaller coefficients
-#define MAKEROWYJ(NAME, R, G, B, BPP)                                         \
-  void NAME##ToYJRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width) { \
-    int x;                                                                    \
-    for (x = 0; x < width; ++x) {                                             \
-      dst_y[0] = RGBToYJ(src_argb0[R], src_argb0[G], src_argb0[B]);           \
-      src_argb0 += BPP;                                                       \
-      dst_y += 1;                                                             \
-    }                                                                         \
-  }                                                                           \
-  void NAME##ToUVJRow_C(const uint8_t* src_rgb0, int src_stride_rgb,          \
-                        uint8_t* dst_u, uint8_t* dst_v, int width) {          \
-    const uint8_t* src_rgb1 = src_rgb0 + src_stride_rgb;                      \
-    int x;                                                                    \
-    for (x = 0; x < width - 1; x += 2) {                                      \
-      uint16_t ab = (src_rgb0[B] + src_rgb0[B + BPP] + src_rgb1[B] +          \
-                     src_rgb1[B + BPP] + 1) >>                                \
-                    1;                                                        \
-      uint16_t ag = (src_rgb0[G] + src_rgb0[G + BPP] + src_rgb1[G] +          \
-                     src_rgb1[G + BPP] + 1) >>                                \
-                    1;                                                        \
-      uint16_t ar = (src_rgb0[R] + src_rgb0[R + BPP] + src_rgb1[R] +          \
-                     src_rgb1[R + BPP] + 1) >>                                \
-                    1;                                                        \
-      dst_u[0] = RGB2xToUJ(ar, ag, ab);                                       \
-      dst_v[0] = RGB2xToVJ(ar, ag, ab);                                       \
-      src_rgb0 += BPP * 2;                                                    \
-      src_rgb1 += BPP * 2;                                                    \
-      dst_u += 1;                                                             \
-      dst_v += 1;                                                             \
-    }                                                                         \
-    if (width & 1) {                                                          \
-      uint16_t ab = (src_rgb0[B] + src_rgb1[B]);                              \
-      uint16_t ag = (src_rgb0[G] + src_rgb1[G]);                              \
-      uint16_t ar = (src_rgb0[R] + src_rgb1[R]);                              \
-      dst_u[0] = RGB2xToUJ(ar, ag, ab);                                       \
-      dst_v[0] = RGB2xToVJ(ar, ag, ab);                                       \
-    }                                                                         \
+#define MAKEROWYJ(NAME, R, G, B, BPP)                                       \
+  void NAME##ToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \
+    int x;                                                                  \
+    for (x = 0; x < width; ++x) {                                           \
+      dst_y[0] = RGBToYJ(src_rgb[R], src_rgb[G], src_rgb[B]);               \
+      src_rgb += BPP;                                                       \
+      dst_y += 1;                                                           \
+    }                                                                       \
+  }                                                                         \
+  void NAME##ToUVJRow_C(const uint8_t* src_rgb, int src_stride_rgb,         \
+                        uint8_t* dst_u, uint8_t* dst_v, int width) {        \
+    const uint8_t* src_rgb1 = src_rgb + src_stride_rgb;                     \
+    int x;                                                                  \
+    for (x = 0; x < width - 1; x += 2) {                                    \
+      uint16_t ab = (src_rgb[B] + src_rgb[B + BPP] + src_rgb1[B] +          \
+                     src_rgb1[B + BPP] + 1) >>                              \
+                    1;                                                      \
+      uint16_t ag = (src_rgb[G] + src_rgb[G + BPP] + src_rgb1[G] +          \
+                     src_rgb1[G + BPP] + 1) >>                              \
+                    1;                                                      \
+      uint16_t ar = (src_rgb[R] + src_rgb[R + BPP] + src_rgb1[R] +          \
+                     src_rgb1[R + BPP] + 1) >>                              \
+                    1;                                                      \
+      dst_u[0] = RGB2xToUJ(ar, ag, ab);                                     \
+      dst_v[0] = RGB2xToVJ(ar, ag, ab);                                     \
+      src_rgb += BPP * 2;                                                   \
+      src_rgb1 += BPP * 2;                                                  \
+      dst_u += 1;                                                           \
+      dst_v += 1;                                                           \
+    }                                                                       \
+    if (width & 1) {                                                        \
+      uint16_t ab = (src_rgb[B] + src_rgb1[B]);                             \
+      uint16_t ag = (src_rgb[G] + src_rgb1[G]);                             \
+      uint16_t ar = (src_rgb[R] + src_rgb1[R]);                             \
+      dst_u[0] = RGB2xToUJ(ar, ag, ab);                                     \
+      dst_v[0] = RGB2xToVJ(ar, ag, ab);                                     \
+    }                                                                       \
  }

 #endif
@ -1237,16 +1237,16 @@ void ARGBShadeRow_C(const uint8_t* src_argb,
 #define REPEAT8(v) (v) | ((v) << 8)
 #define SHADE(f, v) v* f >> 16

-void ARGBMultiplyRow_C(const uint8_t* src_argb0,
+void ARGBMultiplyRow_C(const uint8_t* src_argb,
                       const uint8_t* src_argb1,
                       uint8_t* dst_argb,
                       int width) {
  int i;
  for (i = 0; i < width; ++i) {
-    const uint32_t b = REPEAT8(src_argb0[0]);
-    const uint32_t g = REPEAT8(src_argb0[1]);
-    const uint32_t r = REPEAT8(src_argb0[2]);
-    const uint32_t a = REPEAT8(src_argb0[3]);
+    const uint32_t b = REPEAT8(src_argb[0]);
+    const uint32_t g = REPEAT8(src_argb[1]);
+    const uint32_t r = REPEAT8(src_argb[2]);
+    const uint32_t a = REPEAT8(src_argb[3]);
    const uint32_t b_scale = src_argb1[0];
    const uint32_t g_scale = src_argb1[1];
    const uint32_t r_scale = src_argb1[2];
@ -1255,7 +1255,7 @@ void ARGBMultiplyRow_C(const uint8_t* src_argb0,
    dst_argb[1] = SHADE(g, g_scale);
    dst_argb[2] = SHADE(r, r_scale);
    dst_argb[3] = SHADE(a, a_scale);
-    src_argb0 += 4;
+    src_argb += 4;
    src_argb1 += 4;
    dst_argb += 4;
  }
@ -1265,16 +1265,16 @@ void ARGBMultiplyRow_C(const uint8_t* src_argb0,

 #define SHADE(f, v) clamp255(v + f)

-void ARGBAddRow_C(const uint8_t* src_argb0,
+void ARGBAddRow_C(const uint8_t* src_argb,
                  const uint8_t* src_argb1,
                  uint8_t* dst_argb,
                  int width) {
  int i;
  for (i = 0; i < width; ++i) {
-    const int b = src_argb0[0];
-    const int g = src_argb0[1];
-    const int r = src_argb0[2];
-    const int a = src_argb0[3];
+    const int b = src_argb[0];
+    const int g = src_argb[1];
+    const int r = src_argb[2];
+    const int a = src_argb[3];
    const int b_add = src_argb1[0];
    const int g_add = src_argb1[1];
    const int r_add = src_argb1[2];
@ -1283,7 +1283,7 @@ void ARGBAddRow_C(const uint8_t* src_argb0,
    dst_argb[1] = SHADE(g, g_add);
    dst_argb[2] = SHADE(r, r_add);
    dst_argb[3] = SHADE(a, a_add);
-    src_argb0 += 4;
+    src_argb += 4;
    src_argb1 += 4;
    dst_argb += 4;
  }
@ -1292,16 +1292,16 @@ void ARGBAddRow_C(const uint8_t* src_argb0,

 #define SHADE(f, v) clamp0(f - v)

-void ARGBSubtractRow_C(const uint8_t* src_argb0,
+void ARGBSubtractRow_C(const uint8_t* src_argb,
                       const uint8_t* src_argb1,
                       uint8_t* dst_argb,
                       int width) {
  int i;
  for (i = 0; i < width; ++i) {
-    const int b = src_argb0[0];
-    const int g = src_argb0[1];
-    const int r = src_argb0[2];
-    const int a = src_argb0[3];
+    const int b = src_argb[0];
+    const int g = src_argb[1];
+    const int r = src_argb[2];
+    const int a = src_argb[3];
    const int b_sub = src_argb1[0];
    const int g_sub = src_argb1[1];
    const int r_sub = src_argb1[2];
@ -1310,7 +1310,7 @@ void ARGBSubtractRow_C(const uint8_t* src_argb0,
    dst_argb[1] = SHADE(g, g_sub);
    dst_argb[2] = SHADE(r, r_sub);
    dst_argb[3] = SHADE(a, a_sub);
-    src_argb0 += 4;
+    src_argb += 4;
    src_argb1 += 4;
    dst_argb += 4;
  }
@ -1486,7 +1486,7 @@ void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width) {
 // KR = 0.299; KB = 0.114

 // U and V contributions to R,G,B.
-#if defined(LIBYUV_UNLIMITED_DATA)
+#if LIBYUV_UNLIMITED_DATA
 #define UB 129 /* round(2.018 * 64) */
 #else
 #define UB 128 /* max(128, round(2.018 * 64)) */
@ -1540,7 +1540,7 @@ MAKEYUVCONSTANTS(JPEG, YG, YB, UB, UG, VG, VR, BB, BG, BR)
 //  KR = 0.2126, KB = 0.0722

 // U and V contributions to R,G,B.
-#if defined(LIBYUV_UNLIMITED_DATA)
+#if LIBYUV_UNLIMITED_DATA
 #define UB 135 /* round(2.112 * 64) */
 #else
 #define UB 128 /* max(128, round(2.112 * 64)) */
@ -1594,7 +1594,7 @@ MAKEYUVCONSTANTS(F709, YG, YB, UB, UG, VG, VR, BB, BG, BR)
 // KR = 0.2627; KB = 0.0593

 // U and V contributions to R,G,B.
-#if defined(LIBYUV_UNLIMITED_DATA)
+#if LIBYUV_UNLIMITED_DATA
 #define UB 137 /* round(2.142 * 64) */
 #else
 #define UB 128 /* max(128, round(2.142 * 64)) */
@ -1646,7 +1646,7 @@ MAKEYUVCONSTANTS(V2020, YG, YB, UB, UG, VG, VR, BB, BG, BR)

 #undef MAKEYUVCONSTANTS

-#if defined(LIBYUV_UNLIMITED_DATA)
+#if LIBYUV_UNLIMITED_DATA

 // C reference code that mimics the YUV assembly.
 // Reads 8 bit YUV and leaves result as 16 bit.
@ -3347,19 +3347,19 @@ void UYVYToYRow_C(const uint8_t* src_uyvy, uint8_t* dst_y, int width) {

 #define BLEND(f, b, a) clamp255((((256 - a) * b) >> 8) + f)

-// Blend src_argb0 over src_argb1 and store to dst_argb.
-// dst_argb may be src_argb0 or src_argb1.
+// Blend src_argb over src_argb1 and store to dst_argb.
+// dst_argb may be src_argb or src_argb1.
 // This code mimics the SSSE3 version for better testability.
-void ARGBBlendRow_C(const uint8_t* src_argb0,
+void ARGBBlendRow_C(const uint8_t* src_argb,
                    const uint8_t* src_argb1,
                    uint8_t* dst_argb,
                    int width) {
  int x;
  for (x = 0; x < width - 1; x += 2) {
-    uint32_t fb = src_argb0[0];
-    uint32_t fg = src_argb0[1];
-    uint32_t fr = src_argb0[2];
-    uint32_t a = src_argb0[3];
+    uint32_t fb = src_argb[0];
+    uint32_t fg = src_argb[1];
+    uint32_t fr = src_argb[2];
+    uint32_t a = src_argb[3];
    uint32_t bb = src_argb1[0];
    uint32_t bg = src_argb1[1];
    uint32_t br = src_argb1[2];
@ -3368,10 +3368,10 @@ void ARGBBlendRow_C(const uint8_t* src_argb0,
    dst_argb[2] = BLEND(fr, br, a);
    dst_argb[3] = 255u;

-    fb = src_argb0[4 + 0];
-    fg = src_argb0[4 + 1];
-    fr = src_argb0[4 + 2];
-    a = src_argb0[4 + 3];
+    fb = src_argb[4 + 0];
+    fg = src_argb[4 + 1];
+    fr = src_argb[4 + 2];
+    a = src_argb[4 + 3];
    bb = src_argb1[4 + 0];
    bg = src_argb1[4 + 1];
    br = src_argb1[4 + 2];
@ -3379,16 +3379,16 @@ void ARGBBlendRow_C(const uint8_t* src_argb0,
    dst_argb[4 + 1] = BLEND(fg, bg, a);
    dst_argb[4 + 2] = BLEND(fr, br, a);
    dst_argb[4 + 3] = 255u;
-    src_argb0 += 8;
+    src_argb += 8;
    src_argb1 += 8;
    dst_argb += 8;
  }

  if (width & 1) {
-    uint32_t fb = src_argb0[0];
-    uint32_t fg = src_argb0[1];
-    uint32_t fr = src_argb0[2];
-    uint32_t a = src_argb0[3];
+    uint32_t fb = src_argb[0];
+    uint32_t fg = src_argb[1];
+    uint32_t fr = src_argb[2];
+    uint32_t a = src_argb[3];
    uint32_t bb = src_argb1[0];
    uint32_t bg = src_argb1[1];
    uint32_t br = src_argb1[2];
--- a/source/row_gcc.cc
+++ b/source/row_gcc.cc
@ -1160,7 +1160,7 @@ void AR64ToARGBRow_SSSE3(const uint16_t* src_ar64,
      : "memory", "cc", "xmm0", "xmm1");
 }

-void AB64ToARGBRow_SSSE3(const uint16_t* src_ar64,
+void AB64ToARGBRow_SSSE3(const uint16_t* src_ab64,
                         uint8_t* dst_argb,
                         int width) {
  asm volatile(
@ -1178,7 +1178,7 @@ void AB64ToARGBRow_SSSE3(const uint16_t* src_ar64,
      "lea         0x10(%1),%1                   \n"
      "sub         $0x4,%2                       \n"
      "jg          1b                            \n"
-      : "+r"(src_ar64),          // %0
+      : "+r"(src_ab64),          // %0
        "+r"(dst_argb),          // %1
        "+r"(width)              // %2
      : "m"(kShuffleARGBToABGR)  // %3
@ -1267,7 +1267,7 @@ void AR64ToARGBRow_AVX2(const uint16_t* src_ar64,
 #endif

 #ifdef HAS_AB64TOARGBROW_AVX2
-void AB64ToARGBRow_AVX2(const uint16_t* src_ar64,
+void AB64ToARGBRow_AVX2(const uint16_t* src_ab64,
                        uint8_t* dst_argb,
                        int width) {
  asm volatile(
@ -1286,7 +1286,7 @@ void AB64ToARGBRow_AVX2(const uint16_t* src_ar64,
      "lea         0x20(%1),%1                   \n"
      "sub         $0x8,%2                       \n"
      "jg          1b                            \n"
-      : "+r"(src_ar64),          // %0
+      : "+r"(src_ab64),          // %0
        "+r"(dst_argb),          // %1
        "+r"(width)              // %2
      : "m"(kShuffleARGBToABGR)  // %3
@ -1506,7 +1506,7 @@ void RGBAToYJRow_AVX2(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
 #endif  // HAS_RGBATOYJROW_AVX2

 #ifdef HAS_ARGBTOUVROW_SSSE3
-void ARGBToUVRow_SSSE3(const uint8_t* src_argb0,
+void ARGBToUVRow_SSSE3(const uint8_t* src_argb,
                       int src_stride_argb,
                       uint8_t* dst_u,
                       uint8_t* dst_v,
@ -1558,7 +1558,7 @@ void ARGBToUVRow_SSSE3(const uint8_t* src_argb0,
      "lea         0x8(%1),%1                    \n"
      "sub         $0x10,%3                      \n"
      "jg          1b                            \n"
-      : "+r"(src_argb0),                   // %0
+      : "+r"(src_argb),                    // %0
        "+r"(dst_u),                       // %1
        "+r"(dst_v),                       // %2
        "+rm"(width)                       // %3
@ -1575,7 +1575,7 @@ void ARGBToUVRow_SSSE3(const uint8_t* src_argb0,
 static const lvec8 kShufARGBToUV_AVX = {
    0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15,
    0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15};
-void ARGBToUVRow_AVX2(const uint8_t* src_argb0,
+void ARGBToUVRow_AVX2(const uint8_t* src_argb,
                      int src_stride_argb,
                      uint8_t* dst_u,
                      uint8_t* dst_v,
@ -1623,7 +1623,7 @@ void ARGBToUVRow_AVX2(const uint8_t* src_argb0,
      "sub         $0x20,%3                      \n"
      "jg          1b                            \n"
      "vzeroupper                                \n"
-      : "+r"(src_argb0),                   // %0
+      : "+r"(src_argb),                    // %0
        "+r"(dst_u),                       // %1
        "+r"(dst_v),                       // %2
        "+rm"(width)                       // %3
@ -1638,7 +1638,7 @@ void ARGBToUVRow_AVX2(const uint8_t* src_argb0,
 #endif  // HAS_ARGBTOUVROW_AVX2

 #ifdef HAS_ABGRTOUVROW_AVX2
-void ABGRToUVRow_AVX2(const uint8_t* src_abgr0,
+void ABGRToUVRow_AVX2(const uint8_t* src_abgr,
                      int src_stride_abgr,
                      uint8_t* dst_u,
                      uint8_t* dst_v,
@ -1686,7 +1686,7 @@ void ABGRToUVRow_AVX2(const uint8_t* src_abgr0,
      "sub         $0x20,%3                      \n"
      "jg          1b                            \n"
      "vzeroupper                                \n"
-      : "+r"(src_abgr0),                   // %0
+      : "+r"(src_abgr),                    // %0
        "+r"(dst_u),                       // %1
        "+r"(dst_v),                       // %2
        "+rm"(width)                       // %3
@ -1701,7 +1701,7 @@ void ABGRToUVRow_AVX2(const uint8_t* src_abgr0,
 #endif  // HAS_ABGRTOUVROW_AVX2

 #ifdef HAS_ARGBTOUVJROW_AVX2
-void ARGBToUVJRow_AVX2(const uint8_t* src_argb0,
+void ARGBToUVJRow_AVX2(const uint8_t* src_argb,
                       int src_stride_argb,
                       uint8_t* dst_u,
                       uint8_t* dst_v,
@ -1750,7 +1750,7 @@ void ARGBToUVJRow_AVX2(const uint8_t* src_argb0,
      "sub         $0x20,%3                      \n"
      "jg          1b                            \n"
      "vzeroupper                                \n"
-      : "+r"(src_argb0),                   // %0
+      : "+r"(src_argb),                    // %0
        "+r"(dst_u),                       // %1
        "+r"(dst_v),                       // %2
        "+rm"(width)                       // %3
@ -1765,7 +1765,7 @@ void ARGBToUVJRow_AVX2(const uint8_t* src_argb0,
 #endif  // HAS_ARGBTOUVJROW_AVX2

 #ifdef HAS_ARGBTOUVJROW_SSSE3
-void ARGBToUVJRow_SSSE3(const uint8_t* src_argb0,
+void ARGBToUVJRow_SSSE3(const uint8_t* src_argb,
                        int src_stride_argb,
                        uint8_t* dst_u,
                        uint8_t* dst_v,
@ -1818,7 +1818,7 @@ void ARGBToUVJRow_SSSE3(const uint8_t* src_argb0,
      "lea         0x8(%1),%1                    \n"
      "sub         $0x10,%3                      \n"
      "jg          1b                            \n"
-      : "+r"(src_argb0),                   // %0
+      : "+r"(src_argb),                    // %0
        "+r"(dst_u),                       // %1
        "+r"(dst_v),                       // %2
        "+rm"(width)                       // %3
@ -1905,7 +1905,7 @@ void BGRAToYRow_SSSE3(const uint8_t* src_bgra, uint8_t* dst_y, int width) {
        "xmm7");
 }

-void BGRAToUVRow_SSSE3(const uint8_t* src_bgra0,
+void BGRAToUVRow_SSSE3(const uint8_t* src_bgra,
                       int src_stride_bgra,
                       uint8_t* dst_u,
                       uint8_t* dst_v,
@ -1957,7 +1957,7 @@ void BGRAToUVRow_SSSE3(const uint8_t* src_bgra0,
      "lea         0x8(%1),%1                    \n"
      "sub         $0x10,%3                      \n"
      "jg          1b                            \n"
-      : "+r"(src_bgra0),                   // %0
+      : "+r"(src_bgra),                    // %0
        "+r"(dst_u),                       // %1
        "+r"(dst_v),                       // %2
        "+rm"(width)                       // %3
@ -2002,7 +2002,7 @@ void RGBAToYRow_SSSE3(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
        "xmm7");
 }

-void ABGRToUVRow_SSSE3(const uint8_t* src_abgr0,
+void ABGRToUVRow_SSSE3(const uint8_t* src_abgr,
                       int src_stride_abgr,
                       uint8_t* dst_u,
                       uint8_t* dst_v,
@ -2054,7 +2054,7 @@ void ABGRToUVRow_SSSE3(const uint8_t* src_abgr0,
      "lea         0x8(%1),%1                    \n"
      "sub         $0x10,%3                      \n"
      "jg          1b                            \n"
-      : "+r"(src_abgr0),                   // %0
+      : "+r"(src_abgr),                    // %0
        "+r"(dst_u),                       // %1
        "+r"(dst_v),                       // %2
        "+rm"(width)                       // %3
@ -2065,7 +2065,7 @@ void ABGRToUVRow_SSSE3(const uint8_t* src_abgr0,
      : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm6", "xmm7");
 }

-void RGBAToUVRow_SSSE3(const uint8_t* src_rgba0,
+void RGBAToUVRow_SSSE3(const uint8_t* src_rgba,
                       int src_stride_rgba,
                       uint8_t* dst_u,
                       uint8_t* dst_v,
@ -2117,7 +2117,7 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba0,
      "lea         0x8(%1),%1                    \n"
      "sub         $0x10,%3                      \n"
      "jg          1b                            \n"
-      : "+r"(src_rgba0),                   // %0
+      : "+r"(src_rgba),                    // %0
        "+r"(dst_u),                       // %1
        "+r"(dst_v),                       // %2
        "+rm"(width)                       // %3
@ -5741,7 +5741,7 @@ void MergeXR30Row_AVX2(const uint16_t* src_r,
 #if defined(__i386__)
      : "m"(shift)  // %5
 #else
-      : "rm"(shift)  // %5
+      : "rm"(shift)           // %5
 #endif
      : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
 }
@ -5813,9 +5813,9 @@ void MergeAR64Row_AVX2(const uint16_t* src_r,
      : "m"(shift),  // %6
        "m"(mask),   // %7
 #else
-        "+rm"(width)  // %5
-      : "rm"(shift),  // %6
-        "rm"(mask),   // %7
+        "+rm"(width)          // %5
+      : "rm"(shift),          // %6
+        "rm"(mask),           // %7
 #endif
        "m"(MergeAR64Permute)  // %8
      : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
@ -5882,8 +5882,8 @@ void MergeXR64Row_AVX2(const uint16_t* src_r,
      : "m"(shift),  // %5
        "m"(mask),   // %6
 #else
-      : "rm"(shift),  // %5
-        "rm"(mask),   // %6
+      : "rm"(shift),          // %5
+        "rm"(mask),           // %6
 #endif
        "m"(MergeAR64Permute)  // %7
      : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
@ -5944,8 +5944,8 @@ void MergeARGB16To8Row_AVX2(const uint16_t* src_r,
        "+m"(width)  // %5
      : "m"(shift),  // %6
 #else
-        "+rm"(width)  // %5
-      : "rm"(shift),  // %6
+        "+rm"(width)          // %5
+      : "rm"(shift),          // %6
 #endif
        "m"(MergeARGB16To8Shuffle)  // %7
      : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
@ -6000,7 +6000,7 @@ void MergeXRGB16To8Row_AVX2(const uint16_t* src_r,
 #if defined(__i386__)
      : "m"(shift),  // %5
 #else
-      : "rm"(shift),  // %5
+      : "rm"(shift),          // %5
 #endif
        "m"(MergeARGB16To8Shuffle)  // %6
      : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
@ -6732,7 +6732,7 @@ static const uvec8 kShuffleAlpha = {3u,  0x80, 3u,  0x80, 7u,  0x80, 7u,  0x80,
                                    11u, 0x80, 11u, 0x80, 15u, 0x80, 15u, 0x80};

 // Blend 8 pixels at a time
-void ARGBBlendRow_SSSE3(const uint8_t* src_argb0,
+void ARGBBlendRow_SSSE3(const uint8_t* src_argb,
                        const uint8_t* src_argb1,
                        uint8_t* dst_argb,
                        int width) {
@ -6803,7 +6803,7 @@ void ARGBBlendRow_SSSE3(const uint8_t* src_argb0,
      "sub         $0x1,%3                       \n"
      "jge         91b                           \n"
      "99:                                       \n"
-      : "+r"(src_argb0),    // %0
+      : "+r"(src_argb),     // %0
        "+r"(src_argb1),    // %1
        "+r"(dst_argb),     // %2
        "+r"(width)         // %3
@ -7405,7 +7405,7 @@ void ARGBShadeRow_SSE2(const uint8_t* src_argb,

 #ifdef HAS_ARGBMULTIPLYROW_SSE2
 // Multiply 2 rows of ARGB pixels together, 4 pixels at a time.
-void ARGBMultiplyRow_SSE2(const uint8_t* src_argb0,
+void ARGBMultiplyRow_SSE2(const uint8_t* src_argb,
                          const uint8_t* src_argb1,
                          uint8_t* dst_argb,
                          int width) {
@ -7433,7 +7433,7 @@ void ARGBMultiplyRow_SSE2(const uint8_t* src_argb0,
      "lea         0x10(%2),%2                   \n"
      "sub         $0x4,%3                       \n"
      "jg          1b                            \n"
-      : "+r"(src_argb0),  // %0
+      : "+r"(src_argb),   // %0
        "+r"(src_argb1),  // %1
        "+r"(dst_argb),   // %2
        "+r"(width)       // %3
@ -7444,7 +7444,7 @@ void ARGBMultiplyRow_SSE2(const uint8_t* src_argb0,

 #ifdef HAS_ARGBMULTIPLYROW_AVX2
 // Multiply 2 rows of ARGB pixels together, 8 pixels at a time.
-void ARGBMultiplyRow_AVX2(const uint8_t* src_argb0,
+void ARGBMultiplyRow_AVX2(const uint8_t* src_argb,
                          const uint8_t* src_argb1,
                          uint8_t* dst_argb,
                          int width) {
@ -7471,7 +7471,7 @@ void ARGBMultiplyRow_AVX2(const uint8_t* src_argb0,
      "sub         $0x8,%3                       \n"
      "jg          1b                            \n"
      "vzeroupper                                \n"
-      : "+r"(src_argb0),  // %0
+      : "+r"(src_argb),   // %0
        "+r"(src_argb1),  // %1
        "+r"(dst_argb),   // %2
        "+r"(width)       // %3
@ -7482,7 +7482,7 @@ void ARGBMultiplyRow_AVX2(const uint8_t* src_argb0,

 #ifdef HAS_ARGBADDROW_SSE2
 // Add 2 rows of ARGB pixels together, 4 pixels at a time.
-void ARGBAddRow_SSE2(const uint8_t* src_argb0,
+void ARGBAddRow_SSE2(const uint8_t* src_argb,
                     const uint8_t* src_argb1,
                     uint8_t* dst_argb,
                     int width) {
@ -7499,7 +7499,7 @@ void ARGBAddRow_SSE2(const uint8_t* src_argb0,
      "lea         0x10(%2),%2                   \n"
      "sub         $0x4,%3                       \n"
      "jg          1b                            \n"
-      : "+r"(src_argb0),  // %0
+      : "+r"(src_argb),   // %0
        "+r"(src_argb1),  // %1
        "+r"(dst_argb),   // %2
        "+r"(width)       // %3
@ -7510,7 +7510,7 @@ void ARGBAddRow_SSE2(const uint8_t* src_argb0,

 #ifdef HAS_ARGBADDROW_AVX2
 // Add 2 rows of ARGB pixels together, 4 pixels at a time.
-void ARGBAddRow_AVX2(const uint8_t* src_argb0,
+void ARGBAddRow_AVX2(const uint8_t* src_argb,
                     const uint8_t* src_argb1,
                     uint8_t* dst_argb,
                     int width) {
@ -7527,7 +7527,7 @@ void ARGBAddRow_AVX2(const uint8_t* src_argb0,
      "sub         $0x8,%3                       \n"
      "jg          1b                            \n"
      "vzeroupper                                \n"
-      : "+r"(src_argb0),  // %0
+      : "+r"(src_argb),   // %0
        "+r"(src_argb1),  // %1
        "+r"(dst_argb),   // %2
        "+r"(width)       // %3
@ -7538,7 +7538,7 @@ void ARGBAddRow_AVX2(const uint8_t* src_argb0,

 #ifdef HAS_ARGBSUBTRACTROW_SSE2
 // Subtract 2 rows of ARGB pixels, 4 pixels at a time.
-void ARGBSubtractRow_SSE2(const uint8_t* src_argb0,
+void ARGBSubtractRow_SSE2(const uint8_t* src_argb,
                          const uint8_t* src_argb1,
                          uint8_t* dst_argb,
                          int width) {
@ -7555,7 +7555,7 @@ void ARGBSubtractRow_SSE2(const uint8_t* src_argb0,
      "lea         0x10(%2),%2                   \n"
      "sub         $0x4,%3                       \n"
      "jg          1b                            \n"
-      : "+r"(src_argb0),  // %0
+      : "+r"(src_argb),   // %0
        "+r"(src_argb1),  // %1
        "+r"(dst_argb),   // %2
        "+r"(width)       // %3
@ -7566,7 +7566,7 @@ void ARGBSubtractRow_SSE2(const uint8_t* src_argb0,

 #ifdef HAS_ARGBSUBTRACTROW_AVX2
 // Subtract 2 rows of ARGB pixels, 8 pixels at a time.
-void ARGBSubtractRow_AVX2(const uint8_t* src_argb0,
+void ARGBSubtractRow_AVX2(const uint8_t* src_argb,
                          const uint8_t* src_argb1,
                          uint8_t* dst_argb,
                          int width) {
@ -7583,7 +7583,7 @@ void ARGBSubtractRow_AVX2(const uint8_t* src_argb0,
      "sub         $0x8,%3                       \n"
      "jg          1b                            \n"
      "vzeroupper                                \n"
-      : "+r"(src_argb0),  // %0
+      : "+r"(src_argb),   // %0
        "+r"(src_argb1),  // %1
        "+r"(dst_argb),   // %2
        "+r"(width)       // %3
--- a/source/row_mmi.cc
+++ b/source/row_mmi.cc
--- a/source/row_msa.cc
+++ b/source/row_msa.cc
@ -781,7 +781,7 @@ void UYVYToUV422Row_MSA(const uint8_t* src_uyvy,
  }
 }

-void ARGBToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
+void ARGBToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width) {
  int x;
  v16u8 src0, src1, src2, src3, vec0, vec1, vec2, vec3, dst0;
  v8u16 reg0, reg1, reg2, reg3, reg4, reg5;
@ -792,10 +792,10 @@ void ARGBToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
  v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080);

  for (x = 0; x < width; x += 16) {
-    src0 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 0);
-    src1 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 16);
-    src2 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 32);
-    src3 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 48);
+    src0 = (v16u8)__msa_ld_b((v16u8*)src_argb, 0);
+    src1 = (v16u8)__msa_ld_b((v16u8*)src_argb, 16);
+    src2 = (v16u8)__msa_ld_b((v16u8*)src_argb, 32);
+    src3 = (v16u8)__msa_ld_b((v16u8*)src_argb, 48);
    vec0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0);
    vec1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2);
    vec2 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0);
@ -822,18 +822,18 @@ void ARGBToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
    reg1 = (v8u16)__msa_srai_h((v8i16)reg1, 8);
    dst0 = (v16u8)__msa_pckev_b((v16i8)reg1, (v16i8)reg0);
    ST_UB(dst0, dst_y);
-    src_argb0 += 64;
+    src_argb += 64;
    dst_y += 16;
  }
 }

-void ARGBToUVRow_MSA(const uint8_t* src_argb0,
+void ARGBToUVRow_MSA(const uint8_t* src_argb,
                     int src_stride_argb,
                     uint8_t* dst_u,
                     uint8_t* dst_v,
                     int width) {
  int x;
-  const uint8_t* src_argb0_next = src_argb0 + src_stride_argb;
+  const uint8_t* src_argb_next = src_argb + src_stride_argb;
  v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
  v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9;
  v8u16 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8, reg9;
@ -847,14 +847,14 @@ void ARGBToUVRow_MSA(const uint8_t* src_argb0,
  v8u16 const_0x0001 = (v8u16)__msa_fill_h(0x0001);

  for (x = 0; x < width; x += 32) {
-    src0 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 0);
-    src1 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 16);
-    src2 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 32);
-    src3 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 48);
-    src4 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 64);
-    src5 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 80);
-    src6 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 96);
-    src7 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 112);
+    src0 = (v16u8)__msa_ld_b((v16u8*)src_argb, 0);
+    src1 = (v16u8)__msa_ld_b((v16u8*)src_argb, 16);
+    src2 = (v16u8)__msa_ld_b((v16u8*)src_argb, 32);
+    src3 = (v16u8)__msa_ld_b((v16u8*)src_argb, 48);
+    src4 = (v16u8)__msa_ld_b((v16u8*)src_argb, 64);
+    src5 = (v16u8)__msa_ld_b((v16u8*)src_argb, 80);
+    src6 = (v16u8)__msa_ld_b((v16u8*)src_argb, 96);
+    src7 = (v16u8)__msa_ld_b((v16u8*)src_argb, 112);
    vec0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0);
    vec1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2);
    vec2 = (v16u8)__msa_pckev_b((v16i8)src5, (v16i8)src4);
@ -875,14 +875,14 @@ void ARGBToUVRow_MSA(const uint8_t* src_argb0,
    reg3 = __msa_hadd_u_h(vec5, vec5);
    reg4 = __msa_hadd_u_h(vec0, vec0);
    reg5 = __msa_hadd_u_h(vec1, vec1);
-    src0 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 0);
-    src1 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 16);
-    src2 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 32);
-    src3 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 48);
-    src4 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 64);
-    src5 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 80);
-    src6 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 96);
-    src7 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 112);
+    src0 = (v16u8)__msa_ld_b((v16u8*)src_argb_next, 0);
+    src1 = (v16u8)__msa_ld_b((v16u8*)src_argb_next, 16);
+    src2 = (v16u8)__msa_ld_b((v16u8*)src_argb_next, 32);
+    src3 = (v16u8)__msa_ld_b((v16u8*)src_argb_next, 48);
+    src4 = (v16u8)__msa_ld_b((v16u8*)src_argb_next, 64);
+    src5 = (v16u8)__msa_ld_b((v16u8*)src_argb_next, 80);
+    src6 = (v16u8)__msa_ld_b((v16u8*)src_argb_next, 96);
+    src7 = (v16u8)__msa_ld_b((v16u8*)src_argb_next, 112);
    vec0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0);
    vec1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2);
    vec2 = (v16u8)__msa_pckev_b((v16i8)src5, (v16i8)src4);
@ -945,8 +945,8 @@ void ARGBToUVRow_MSA(const uint8_t* src_argb0,
    dst1 = (v16u8)__msa_pckev_b((v16i8)reg5, (v16i8)reg4);
    ST_UB(dst0, dst_u);
    ST_UB(dst1, dst_v);
-    src_argb0 += 128;
-    src_argb0_next += 128;
+    src_argb += 128;
+    src_argb_next += 128;
    dst_u += 16;
    dst_v += 16;
  }
@ -1173,7 +1173,7 @@ void ARGBToUV444Row_MSA(const uint8_t* src_argb,
  }
 }

-void ARGBMultiplyRow_MSA(const uint8_t* src_argb0,
+void ARGBMultiplyRow_MSA(const uint8_t* src_argb,
                         const uint8_t* src_argb1,
                         uint8_t* dst_argb,
                         int width) {
@ -1184,7 +1184,7 @@ void ARGBMultiplyRow_MSA(const uint8_t* src_argb0,
  v8i16 zero = {0};

  for (x = 0; x < width; x += 4) {
-    src0 = (v16u8)__msa_ld_b((void*)src_argb0, 0);
+    src0 = (v16u8)__msa_ld_b((void*)src_argb, 0);
    src1 = (v16u8)__msa_ld_b((void*)src_argb1, 0);
    vec0 = (v8u16)__msa_ilvr_b((v16i8)src0, (v16i8)src0);
    vec1 = (v8u16)__msa_ilvl_b((v16i8)src0, (v16i8)src0);
@ -1206,13 +1206,13 @@ void ARGBMultiplyRow_MSA(const uint8_t* src_argb0,
    vec1 = (v8u16)__msa_pckev_h((v8i16)reg3, (v8i16)reg2);
    dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0);
    ST_UB(dst0, dst_argb);
-    src_argb0 += 16;
+    src_argb += 16;
    src_argb1 += 16;
    dst_argb += 16;
  }
 }

-void ARGBAddRow_MSA(const uint8_t* src_argb0,
+void ARGBAddRow_MSA(const uint8_t* src_argb,
                    const uint8_t* src_argb1,
                    uint8_t* dst_argb,
                    int width) {
@ -1220,20 +1220,20 @@ void ARGBAddRow_MSA(const uint8_t* src_argb0,
  v16u8 src0, src1, src2, src3, dst0, dst1;

  for (x = 0; x < width; x += 8) {
-    src0 = (v16u8)__msa_ld_b((void*)src_argb0, 0);
-    src1 = (v16u8)__msa_ld_b((void*)src_argb0, 16);
+    src0 = (v16u8)__msa_ld_b((void*)src_argb, 0);
+    src1 = (v16u8)__msa_ld_b((void*)src_argb, 16);
    src2 = (v16u8)__msa_ld_b((void*)src_argb1, 0);
    src3 = (v16u8)__msa_ld_b((void*)src_argb1, 16);
    dst0 = __msa_adds_u_b(src0, src2);
    dst1 = __msa_adds_u_b(src1, src3);
    ST_UB2(dst0, dst1, dst_argb, 16);
-    src_argb0 += 32;
+    src_argb += 32;
    src_argb1 += 32;
    dst_argb += 32;
  }
 }

-void ARGBSubtractRow_MSA(const uint8_t* src_argb0,
+void ARGBSubtractRow_MSA(const uint8_t* src_argb,
                         const uint8_t* src_argb1,
                         uint8_t* dst_argb,
                         int width) {
@ -1241,14 +1241,14 @@ void ARGBSubtractRow_MSA(const uint8_t* src_argb0,
  v16u8 src0, src1, src2, src3, dst0, dst1;

  for (x = 0; x < width; x += 8) {
-    src0 = (v16u8)__msa_ld_b((void*)src_argb0, 0);
-    src1 = (v16u8)__msa_ld_b((void*)src_argb0, 16);
+    src0 = (v16u8)__msa_ld_b((void*)src_argb, 0);
+    src1 = (v16u8)__msa_ld_b((void*)src_argb, 16);
    src2 = (v16u8)__msa_ld_b((void*)src_argb1, 0);
    src3 = (v16u8)__msa_ld_b((void*)src_argb1, 16);
    dst0 = __msa_subs_u_b(src0, src2);
    dst1 = __msa_subs_u_b(src1, src3);
    ST_UB2(dst0, dst1, dst_argb, 16);
-    src_argb0 += 32;
+    src_argb += 32;
    src_argb1 += 32;
    dst_argb += 32;
  }
@ -1794,7 +1794,7 @@ void RGB565ToYRow_MSA(const uint8_t* src_rgb565, uint8_t* dst_y, int width) {
  }
 }

-void RGB24ToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
+void RGB24ToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width) {
  int x;
  v16u8 src0, src1, src2, reg0, reg1, reg2, reg3, dst0;
  v8u16 vec0, vec1, vec2, vec3;
@ -1809,9 +1809,9 @@ void RGB24ToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
  v16i8 zero = {0};

  for (x = 0; x < width; x += 16) {
-    src0 = (v16u8)__msa_ld_b((void*)src_argb0, 0);
-    src1 = (v16u8)__msa_ld_b((void*)src_argb0, 16);
-    src2 = (v16u8)__msa_ld_b((void*)src_argb0, 32);
+    src0 = (v16u8)__msa_ld_b((void*)src_argb, 0);
+    src1 = (v16u8)__msa_ld_b((void*)src_argb, 16);
+    src2 = (v16u8)__msa_ld_b((void*)src_argb, 32);
    reg0 = (v16u8)__msa_vshf_b(mask0, zero, (v16i8)src0);
    reg1 = (v16u8)__msa_vshf_b(mask1, (v16i8)src1, (v16i8)src0);
    reg2 = (v16u8)__msa_vshf_b(mask2, (v16i8)src2, (v16i8)src1);
@ -1830,12 +1830,12 @@ void RGB24ToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
    vec1 = (v8u16)__msa_srai_h((v8i16)vec1, 8);
    dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0);
    ST_UB(dst0, dst_y);
-    src_argb0 += 48;
+    src_argb += 48;
    dst_y += 16;
  }
 }

-void RAWToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
+void RAWToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width) {
  int x;
  v16u8 src0, src1, src2, reg0, reg1, reg2, reg3, dst0;
  v8u16 vec0, vec1, vec2, vec3;
@ -1850,9 +1850,9 @@ void RAWToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
  v16i8 zero = {0};

  for (x = 0; x < width; x += 16) {
-    src0 = (v16u8)__msa_ld_b((void*)src_argb0, 0);
-    src1 = (v16u8)__msa_ld_b((void*)src_argb0, 16);
-    src2 = (v16u8)__msa_ld_b((void*)src_argb0, 32);
+    src0 = (v16u8)__msa_ld_b((void*)src_argb, 0);
+    src1 = (v16u8)__msa_ld_b((void*)src_argb, 16);
+    src2 = (v16u8)__msa_ld_b((void*)src_argb, 32);
    reg0 = (v16u8)__msa_vshf_b(mask0, zero, (v16i8)src0);
    reg1 = (v16u8)__msa_vshf_b(mask1, (v16i8)src1, (v16i8)src0);
    reg2 = (v16u8)__msa_vshf_b(mask2, (v16i8)src2, (v16i8)src1);
@ -1871,7 +1871,7 @@ void RAWToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
    vec1 = (v8u16)__msa_srai_h((v8i16)vec1, 8);
    dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0);
    ST_UB(dst0, dst_y);
-    src_argb0 += 48;
+    src_argb += 48;
    dst_y += 16;
  }
 }
@ -2037,14 +2037,14 @@ void RGB565ToUVRow_MSA(const uint8_t* src_rgb565,
  }
 }

-void RGB24ToUVRow_MSA(const uint8_t* src_rgb0,
+void RGB24ToUVRow_MSA(const uint8_t* src_rgb,
                      int src_stride_rgb,
                      uint8_t* dst_u,
                      uint8_t* dst_v,
                      int width) {
  int x;
-  const uint8_t* s = src_rgb0;
-  const uint8_t* t = src_rgb0 + src_stride_rgb;
+  const uint8_t* s = src_rgb;
+  const uint8_t* t = src_rgb + src_stride_rgb;
  int64_t res0, res1;
  v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
  v16u8 inp0, inp1, inp2, inp3, inp4, inp5;
@ -2147,14 +2147,14 @@ void RGB24ToUVRow_MSA(const uint8_t* src_rgb0,
  }
 }

-void RAWToUVRow_MSA(const uint8_t* src_rgb0,
+void RAWToUVRow_MSA(const uint8_t* src_rgb,
                    int src_stride_rgb,
                    uint8_t* dst_u,
                    uint8_t* dst_v,
                    int width) {
  int x;
-  const uint8_t* s = src_rgb0;
-  const uint8_t* t = src_rgb0 + src_stride_rgb;
+  const uint8_t* s = src_rgb;
+  const uint8_t* t = src_rgb + src_stride_rgb;
  int64_t res0, res1;
  v16u8 inp0, inp1, inp2, inp3, inp4, inp5;
  v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
@ -2446,7 +2446,7 @@ void SobelXYRow_MSA(const uint8_t* src_sobelx,
  }
 }

-void ARGBToYJRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
+void ARGBToYJRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width) {
  int x;
  v16u8 src0, src1, src2, src3, dst0;
  v16u8 const_0x961D = (v16u8)__msa_fill_h(0x961D);
@ -2454,19 +2454,19 @@ void ARGBToYJRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
  v8u16 const_0x80 = (v8u16)__msa_fill_h(0x80);

  for (x = 0; x < width; x += 16) {
-    src0 = (v16u8)__msa_ld_b((void*)src_argb0, 0);
-    src1 = (v16u8)__msa_ld_b((void*)src_argb0, 16);
-    src2 = (v16u8)__msa_ld_b((void*)src_argb0, 32);
-    src3 = (v16u8)__msa_ld_b((void*)src_argb0, 48);
+    src0 = (v16u8)__msa_ld_b((void*)src_argb, 0);
+    src1 = (v16u8)__msa_ld_b((void*)src_argb, 16);
+    src2 = (v16u8)__msa_ld_b((void*)src_argb, 32);
+    src3 = (v16u8)__msa_ld_b((void*)src_argb, 48);
    ARGBTOY(src0, src1, src2, src3, const_0x961D, const_0x4D, const_0x80, 8,
            dst0);
    ST_UB(dst0, dst_y);
-    src_argb0 += 64;
+    src_argb += 64;
    dst_y += 16;
  }
 }

-void BGRAToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
+void BGRAToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width) {
  int x;
  v16u8 src0, src1, src2, src3, dst0;
  v16u8 const_0x4200 = (v16u8)__msa_fill_h(0x4200);
@ -2474,19 +2474,19 @@ void BGRAToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
  v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080);

  for (x = 0; x < width; x += 16) {
-    src0 = (v16u8)__msa_ld_b((void*)src_argb0, 0);
-    src1 = (v16u8)__msa_ld_b((void*)src_argb0, 16);
-    src2 = (v16u8)__msa_ld_b((void*)src_argb0, 32);
-    src3 = (v16u8)__msa_ld_b((void*)src_argb0, 48);
+    src0 = (v16u8)__msa_ld_b((void*)src_argb, 0);
+    src1 = (v16u8)__msa_ld_b((void*)src_argb, 16);
+    src2 = (v16u8)__msa_ld_b((void*)src_argb, 32);
+    src3 = (v16u8)__msa_ld_b((void*)src_argb, 48);
    ARGBTOY(src0, src1, src2, src3, const_0x4200, const_0x1981, const_0x1080, 8,
            dst0);
    ST_UB(dst0, dst_y);
-    src_argb0 += 64;
+    src_argb += 64;
    dst_y += 16;
  }
 }

-void ABGRToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
+void ABGRToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width) {
  int x;
  v16u8 src0, src1, src2, src3, dst0;
  v16u8 const_0x8142 = (v16u8)__msa_fill_h(0x8142);
@ -2494,19 +2494,19 @@ void ABGRToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
  v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080);

  for (x = 0; x < width; x += 16) {
-    src0 = (v16u8)__msa_ld_b((void*)src_argb0, 0);
-    src1 = (v16u8)__msa_ld_b((void*)src_argb0, 16);
-    src2 = (v16u8)__msa_ld_b((void*)src_argb0, 32);
-    src3 = (v16u8)__msa_ld_b((void*)src_argb0, 48);
+    src0 = (v16u8)__msa_ld_b((void*)src_argb, 0);
+    src1 = (v16u8)__msa_ld_b((void*)src_argb, 16);
+    src2 = (v16u8)__msa_ld_b((void*)src_argb, 32);
+    src3 = (v16u8)__msa_ld_b((void*)src_argb, 48);
    ARGBTOY(src0, src1, src2, src3, const_0x8142, const_0x19, const_0x1080, 8,
            dst0);
    ST_UB(dst0, dst_y);
-    src_argb0 += 64;
+    src_argb += 64;
    dst_y += 16;
  }
 }

-void RGBAToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
+void RGBAToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width) {
  int x;
  v16u8 src0, src1, src2, src3, dst0;
  v16u8 const_0x1900 = (v16u8)__msa_fill_h(0x1900);
@ -2514,26 +2514,26 @@ void RGBAToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
  v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080);

  for (x = 0; x < width; x += 16) {
-    src0 = (v16u8)__msa_ld_b((void*)src_argb0, 0);
-    src1 = (v16u8)__msa_ld_b((void*)src_argb0, 16);
-    src2 = (v16u8)__msa_ld_b((void*)src_argb0, 32);
-    src3 = (v16u8)__msa_ld_b((void*)src_argb0, 48);
+    src0 = (v16u8)__msa_ld_b((void*)src_argb, 0);
+    src1 = (v16u8)__msa_ld_b((void*)src_argb, 16);
+    src2 = (v16u8)__msa_ld_b((void*)src_argb, 32);
+    src3 = (v16u8)__msa_ld_b((void*)src_argb, 48);
    ARGBTOY(src0, src1, src2, src3, const_0x1900, const_0x4281, const_0x1080, 8,
            dst0);
    ST_UB(dst0, dst_y);
-    src_argb0 += 64;
+    src_argb += 64;
    dst_y += 16;
  }
 }

-void ARGBToUVJRow_MSA(const uint8_t* src_rgb0,
+void ARGBToUVJRow_MSA(const uint8_t* src_rgb,
                      int src_stride_rgb,
                      uint8_t* dst_u,
                      uint8_t* dst_v,
                      int width) {
  int x;
-  const uint8_t* s = src_rgb0;
-  const uint8_t* t = src_rgb0 + src_stride_rgb;
+  const uint8_t* s = src_rgb;
+  const uint8_t* t = src_rgb + src_stride_rgb;
  v8u16 src0, src1, src2, src3, src4, src5, src6, src7;
  v8u16 vec0, vec1, vec2, vec3;
  v8u16 dst0, dst1, dst2, dst3;
@ -2658,14 +2658,14 @@ void ARGBToUVJRow_MSA(const uint8_t* src_rgb0,
  }
 }

-void BGRAToUVRow_MSA(const uint8_t* src_rgb0,
+void BGRAToUVRow_MSA(const uint8_t* src_rgb,
                     int src_stride_rgb,
                     uint8_t* dst_u,
                     uint8_t* dst_v,
                     int width) {
  int x;
-  const uint8_t* s = src_rgb0;
-  const uint8_t* t = src_rgb0 + src_stride_rgb;
+  const uint8_t* s = src_rgb;
+  const uint8_t* t = src_rgb + src_stride_rgb;
  const uint8_t unused = 0xf;
  v8u16 src0, src1, src2, src3;
  v16u8 dst0, dst1;
@ -2693,14 +2693,14 @@ void BGRAToUVRow_MSA(const uint8_t* src_rgb0,
  }
 }

-void ABGRToUVRow_MSA(const uint8_t* src_rgb0,
+void ABGRToUVRow_MSA(const uint8_t* src_rgb,
                     int src_stride_rgb,
                     uint8_t* dst_u,
                     uint8_t* dst_v,
                     int width) {
  int x;
-  const uint8_t* s = src_rgb0;
-  const uint8_t* t = src_rgb0 + src_stride_rgb;
+  const uint8_t* s = src_rgb;
+  const uint8_t* t = src_rgb + src_stride_rgb;
  const uint8_t unused = 0xf;
  v8u16 src0, src1, src2, src3;
  v16u8 dst0, dst1;
@ -2728,14 +2728,14 @@ void ABGRToUVRow_MSA(const uint8_t* src_rgb0,
  }
 }

-void RGBAToUVRow_MSA(const uint8_t* src_rgb0,
+void RGBAToUVRow_MSA(const uint8_t* src_rgb,
                     int src_stride_rgb,
                     uint8_t* dst_u,
                     uint8_t* dst_v,
                     int width) {
  int x;
-  const uint8_t* s = src_rgb0;
-  const uint8_t* t = src_rgb0 + src_stride_rgb;
+  const uint8_t* s = src_rgb;
+  const uint8_t* t = src_rgb + src_stride_rgb;
  const uint8_t unused = 0xf;
  v8u16 src0, src1, src2, src3;
  v16u8 dst0, dst1;
@ -3109,7 +3109,7 @@ void ARGBExtractAlphaRow_MSA(const uint8_t* src_argb,
  }
 }

-void ARGBBlendRow_MSA(const uint8_t* src_argb0,
+void ARGBBlendRow_MSA(const uint8_t* src_argb,
                      const uint8_t* src_argb1,
                      uint8_t* dst_argb,
                      int width) {
@ -3123,8 +3123,8 @@ void ARGBBlendRow_MSA(const uint8_t* src_argb0,
  v16i8 zero = {0};

  for (x = 0; x < width; x += 8) {
-    src0 = (v16u8)__msa_ld_b((void*)src_argb0, 0);
-    src1 = (v16u8)__msa_ld_b((void*)src_argb0, 16);
+    src0 = (v16u8)__msa_ld_b((void*)src_argb, 0);
+    src1 = (v16u8)__msa_ld_b((void*)src_argb, 16);
    src2 = (v16u8)__msa_ld_b((void*)src_argb1, 0);
    src3 = (v16u8)__msa_ld_b((void*)src_argb1, 16);
    vec0 = (v8u16)__msa_ilvr_b(zero, (v16i8)src0);
@ -3168,7 +3168,7 @@ void ARGBBlendRow_MSA(const uint8_t* src_argb0,
    dst0 = __msa_bmnz_v(dst0, const_255, mask);
    dst1 = __msa_bmnz_v(dst1, const_255, mask);
    ST_UB2(dst0, dst1, dst_argb, 16);
-    src_argb0 += 32;
+    src_argb += 32;
    src_argb1 += 32;
    dst_argb += 32;
  }
--- a/source/row_neon.cc
+++ b/source/row_neon.cc
@ -415,11 +415,11 @@ void NV12ToARGBRow_NEON(const uint8_t* src_y,
                        const struct YuvConstants* yuvconstants,
                        int width) {
  asm volatile(YUVTORGB_SETUP
-               "vmov.u8     d23, #255                     \n"
-               "1:                                        \n" READNV12 YUVTORGB
-               "subs        %3, %3, #8                    \n"
-               "vst4.8      {d20, d21, d22, d23}, [%2]!   \n"
-               "bgt         1b                            \n"
+      "vmov.u8     d23, #255                     \n"
+      "1:                                        \n" READNV12 YUVTORGB
+      "subs        %3, %3, #8                    \n"
+      "vst4.8      {d20, d21, d22, d23}, [%2]!   \n"
+      "bgt         1b                            \n"
               : "+r"(src_y),     // %0
                 "+r"(src_uv),    // %1
                 "+r"(dst_argb),  // %2
@ -438,11 +438,11 @@ void NV21ToARGBRow_NEON(const uint8_t* src_y,
                        const struct YuvConstants* yuvconstants,
                        int width) {
  asm volatile(YUVTORGB_SETUP
-               "vmov.u8     d23, #255                     \n"
-               "1:                                        \n" READNV21 YUVTORGB
-               "subs        %3, %3, #8                    \n"
-               "vst4.8      {d20, d21, d22, d23}, [%2]!   \n"
-               "bgt         1b                            \n"
+      "vmov.u8     d23, #255                     \n"
+      "1:                                        \n" READNV21 YUVTORGB
+      "subs        %3, %3, #8                    \n"
+      "vst4.8      {d20, d21, d22, d23}, [%2]!   \n"
+      "bgt         1b                            \n"
               : "+r"(src_y),     // %0
                 "+r"(src_vu),    // %1
                 "+r"(dst_argb),  // %2
@ -537,11 +537,11 @@ void YUY2ToARGBRow_NEON(const uint8_t* src_yuy2,
                        const struct YuvConstants* yuvconstants,
                        int width) {
  asm volatile(YUVTORGB_SETUP
-               "vmov.u8     d23, #255                     \n"
-               "1:                                        \n" READYUY2 YUVTORGB
-               "subs        %2, %2, #8                    \n"
-               "vst4.8      {d20, d21, d22, d23}, [%1]!   \n"
-               "bgt         1b                            \n"
+      "vmov.u8     d23, #255                     \n"
+      "1:                                        \n" READYUY2 YUVTORGB
+      "subs        %2, %2, #8                    \n"
+      "vst4.8      {d20, d21, d22, d23}, [%1]!   \n"
+      "bgt         1b                            \n"
               : "+r"(src_yuy2),  // %0
                 "+r"(dst_argb),  // %1
                 "+r"(width)      // %2
@ -558,11 +558,11 @@ void UYVYToARGBRow_NEON(const uint8_t* src_uyvy,
                        const struct YuvConstants* yuvconstants,
                        int width) {
  asm volatile(YUVTORGB_SETUP
-               "vmov.u8     d23, #255                     \n"
-               "1:                                        \n" READUYVY YUVTORGB
-               "subs        %2, %2, #8                    \n"
-               "vst4.8      {d20, d21, d22, d23}, [%1]!   \n"
-               "bgt         1b                            \n"
+      "vmov.u8     d23, #255                     \n"
+      "1:                                        \n" READUYVY YUVTORGB
+      "subs        %2, %2, #8                    \n"
+      "vst4.8      {d20, d21, d22, d23}, [%1]!   \n"
+      "bgt         1b                            \n"
               : "+r"(src_uyvy),  // %0
                 "+r"(dst_argb),  // %1
                 "+r"(width)      // %2
@ -1680,7 +1680,7 @@ void ARGBToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) {
      : "cc", "memory", "q0", "q1", "q2", "q12", "q13");
 }

-void RGBAToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) {
+void RGBAToYJRow_NEON(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
  asm volatile(
      "vmov.u8     d24, #29                      \n"  // B * 0.1140 coefficient
      "vmov.u8     d25, #150                     \n"  // G * 0.5870 coefficient
@ -1694,7 +1694,7 @@ void RGBAToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) {
      "vqrshrn.u16 d0, q2, #8                    \n"  // 16 bit to 8 bit Y
      "vst1.8      {d0}, [%1]!                   \n"  // store 8 pixels Y.
      "bgt         1b                            \n"
-      : "+r"(src_argb),  // %0
+      : "+r"(src_rgba),  // %0
        "+r"(dst_y),     // %1
        "+r"(width)      // %2
      :
@ -2655,7 +2655,7 @@ void InterpolateRow_NEON(uint8_t* dst_ptr,
 }

 // dr * (256 - sa) / 256 + sr = dr - dr * sa / 256 + sr
-void ARGBBlendRow_NEON(const uint8_t* src_argb0,
+void ARGBBlendRow_NEON(const uint8_t* src_argb,
                       const uint8_t* src_argb1,
                       uint8_t* dst_argb,
                       int width) {
@ -2706,7 +2706,7 @@ void ARGBBlendRow_NEON(const uint8_t* src_argb0,

      "99:                                       \n"

-      : "+r"(src_argb0),  // %0
+      : "+r"(src_argb),   // %0
        "+r"(src_argb1),  // %1
        "+r"(dst_argb),   // %2
        "+r"(width)       // %3
@ -2944,7 +2944,7 @@ void ARGBColorMatrixRow_NEON(const uint8_t* src_argb,
 }

 // Multiply 2 rows of ARGB pixels together, 8 pixels at a time.
-void ARGBMultiplyRow_NEON(const uint8_t* src_argb0,
+void ARGBMultiplyRow_NEON(const uint8_t* src_argb,
                          const uint8_t* src_argb1,
                          uint8_t* dst_argb,
                          int width) {
@ -2964,7 +2964,7 @@ void ARGBMultiplyRow_NEON(const uint8_t* src_argb0,
      "vrshrn.u16  d3, q3, #8                    \n"  // 16 bit to 8 bit A
      "vst4.8      {d0, d1, d2, d3}, [%2]!       \n"  // store 8 ARGB pixels.
      "bgt         1b                            \n"
-      : "+r"(src_argb0),  // %0
+      : "+r"(src_argb),   // %0
        "+r"(src_argb1),  // %1
        "+r"(dst_argb),   // %2
        "+r"(width)       // %3
@ -2973,7 +2973,7 @@ void ARGBMultiplyRow_NEON(const uint8_t* src_argb0,
 }

 // Add 2 rows of ARGB pixels together, 8 pixels at a time.
-void ARGBAddRow_NEON(const uint8_t* src_argb0,
+void ARGBAddRow_NEON(const uint8_t* src_argb,
                     const uint8_t* src_argb1,
                     uint8_t* dst_argb,
                     int width) {
@ -2987,7 +2987,7 @@ void ARGBAddRow_NEON(const uint8_t* src_argb0,
      "vqadd.u8    q1, q1, q3                    \n"  // add R, A
      "vst4.8      {d0, d1, d2, d3}, [%2]!       \n"  // store 8 ARGB pixels.
      "bgt         1b                            \n"
-      : "+r"(src_argb0),  // %0
+      : "+r"(src_argb),   // %0
        "+r"(src_argb1),  // %1
        "+r"(dst_argb),   // %2
        "+r"(width)       // %3
@ -2996,7 +2996,7 @@ void ARGBAddRow_NEON(const uint8_t* src_argb0,
 }

 // Subtract 2 rows of ARGB pixels, 8 pixels at a time.
-void ARGBSubtractRow_NEON(const uint8_t* src_argb0,
+void ARGBSubtractRow_NEON(const uint8_t* src_argb,
                          const uint8_t* src_argb1,
                          uint8_t* dst_argb,
                          int width) {
@ -3010,7 +3010,7 @@ void ARGBSubtractRow_NEON(const uint8_t* src_argb0,
      "vqsub.u8    q1, q1, q3                    \n"  // subtract R, A
      "vst4.8      {d0, d1, d2, d3}, [%2]!       \n"  // store 8 ARGB pixels.
      "bgt         1b                            \n"
-      : "+r"(src_argb0),  // %0
+      : "+r"(src_argb),   // %0
        "+r"(src_argb1),  // %1
        "+r"(dst_argb),   // %2
        "+r"(width)       // %3
--- a/source/row_neon64.cc
+++ b/source/row_neon64.cc
@ -909,7 +909,7 @@ void MergeXR30Row_NEON(const uint16_t* src_r,
        "+r"(src_b),     // %2
        "+r"(dst_ar30),  // %3
        "+r"(width)      // %4
-      : "r"(shift)  // %5
+      : "r"(shift)       // %5
      : "memory", "cc", "v0", "v1", "v2", "v30", "v31");
 }

@ -1305,10 +1305,10 @@ void RAWToARGBRow_NEON(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
      "movi        v5.8b, #255                   \n"  // Alpha
      "1:                                        \n"
      "ld3         {v0.8b,v1.8b,v2.8b}, [%0], #24 \n"  // read r g b
-      "subs        %w2, %w2, #8                  \n"  // 8 processed per loop.
-      "orr         v3.8b, v1.8b, v1.8b           \n"  // move g
+      "subs        %w2, %w2, #8                  \n"   // 8 processed per loop.
+      "orr         v3.8b, v1.8b, v1.8b           \n"   // move g
      "prfm        pldl1keep, [%0, 448]          \n"
-      "orr         v4.8b, v0.8b, v0.8b           \n"  // move r
+      "orr         v4.8b, v0.8b, v0.8b           \n"         // move r
      "st4         {v2.8b,v3.8b,v4.8b,v5.8b}, [%1], #32 \n"  // store b g r a
      "b.gt        1b                            \n"
      : "+r"(src_raw),   // %0
@ -1324,10 +1324,10 @@ void RAWToRGBARow_NEON(const uint8_t* src_raw, uint8_t* dst_rgba, int width) {
      "movi        v0.8b, #255                   \n"  // Alpha
      "1:                                        \n"
      "ld3         {v3.8b,v4.8b,v5.8b}, [%0], #24 \n"  // read r g b
-      "subs        %w2, %w2, #8                  \n"  // 8 processed per loop.
-      "orr         v2.8b, v4.8b, v4.8b           \n"  // move g
+      "subs        %w2, %w2, #8                  \n"   // 8 processed per loop.
+      "orr         v2.8b, v4.8b, v4.8b           \n"   // move g
      "prfm        pldl1keep, [%0, 448]          \n"
-      "orr         v1.8b, v5.8b, v5.8b           \n"  // move r
+      "orr         v1.8b, v5.8b, v5.8b           \n"         // move r
      "st4         {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n"  // store a b g r
      "b.gt        1b                            \n"
      : "+r"(src_raw),   // %0
@ -1377,8 +1377,7 @@ void RGB565ToARGBRow_NEON(const uint8_t* src_rgb565,
      "1:                                        \n"
      "ld1         {v0.16b}, [%0], #16           \n"  // load 8 RGB565 pixels.
      "subs        %w2, %w2, #8                  \n"  // 8 processed per loop.
-      "prfm        pldl1keep, [%0, 448]          \n"
-      RGB565TOARGB
+      "prfm        pldl1keep, [%0, 448]          \n" RGB565TOARGB
      "st4         {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n"  // store 8 ARGB
      "b.gt        1b                            \n"
      : "+r"(src_rgb565),  // %0
@ -1467,8 +1466,7 @@ void ARGB4444ToARGBRow_NEON(const uint8_t* src_argb4444,
      "1:                                        \n"
      "ld1         {v0.16b}, [%0], #16           \n"  // load 8 ARGB4444 pixels.
      "subs        %w2, %w2, #8                  \n"  // 8 processed per loop.
-      "prfm        pldl1keep, [%0, 448]          \n"
-      ARGB4444TOARGB
+      "prfm        pldl1keep, [%0, 448]          \n" ARGB4444TOARGB
      "st4         {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n"  // store 8 ARGB
      "b.gt        1b                            \n"
      : "+r"(src_argb4444),  // %0
@ -1485,7 +1483,7 @@ void ARGBToRGB24Row_NEON(const uint8_t* src_argb,
  asm volatile(
      "1:                                        \n"
      "ld4         {v1.8b,v2.8b,v3.8b,v4.8b}, [%0], #32 \n"  // load 8 ARGB
-      "subs        %w2, %w2, #8                  \n"   // 8 processed per loop.
+      "subs        %w2, %w2, #8                  \n"  // 8 processed per loop.
      "prfm        pldl1keep, [%0, 448]          \n"
      "st3         {v1.8b,v2.8b,v3.8b}, [%1], #24 \n"  // store 8 pixels of
                                                       // RGB24
@ -1502,8 +1500,8 @@ void ARGBToRAWRow_NEON(const uint8_t* src_argb, uint8_t* dst_raw, int width) {
  asm volatile(
      "1:                                        \n"
      "ld4         {v1.8b,v2.8b,v3.8b,v4.8b}, [%0], #32 \n"  // load b g r a
-      "subs        %w2, %w2, #8                  \n"   // 8 processed per loop.
-      "orr         v4.8b, v2.8b, v2.8b           \n"   // mov g
+      "subs        %w2, %w2, #8                  \n"  // 8 processed per loop.
+      "orr         v4.8b, v2.8b, v2.8b           \n"  // mov g
      "prfm        pldl1keep, [%0, 448]          \n"
      "orr         v5.8b, v1.8b, v1.8b           \n"   // mov b
      "st3         {v3.8b,v4.8b,v5.8b}, [%1], #24 \n"  // store r g b
@ -1676,7 +1674,7 @@ void I422ToYUY2Row_NEON(const uint8_t* src_y,
  asm volatile(
      "1:                                        \n"
      "ld2         {v0.8b, v1.8b}, [%0], #16     \n"  // load 16 Ys
-      "subs        %w4, %w4, #16                 \n"         // 16 pixels
+      "subs        %w4, %w4, #16                 \n"  // 16 pixels
      "orr         v2.8b, v1.8b, v1.8b           \n"
      "prfm        pldl1keep, [%0, 448]          \n"
      "ld1         {v1.8b}, [%1], #8             \n"         // load 8 Us
@ -1724,8 +1722,7 @@ void ARGBToRGB565Row_NEON(const uint8_t* src_argb,
      "ld4         {v20.8b,v21.8b,v22.8b,v23.8b}, [%0], #32 \n"  // load 8
                                                                 // pixels
      "subs        %w2, %w2, #8                  \n"  // 8 processed per loop.
-      "prfm        pldl1keep, [%0, 448]          \n"
-      ARGBTORGB565
+      "prfm        pldl1keep, [%0, 448]          \n" ARGBTORGB565
      "st1         {v0.16b}, [%1], #16           \n"  // store 8 pixels RGB565.
      "b.gt        1b                            \n"
      : "+r"(src_argb),    // %0
@ -1766,8 +1763,7 @@ void ARGBToARGB1555Row_NEON(const uint8_t* src_argb,
      "ld4         {v20.8b,v21.8b,v22.8b,v23.8b}, [%0], #32 \n"  // load 8
                                                                 // pixels
      "subs        %w2, %w2, #8                  \n"  // 8 processed per loop.
-      "prfm        pldl1keep, [%0, 448]          \n"
-      ARGBTOARGB1555
+      "prfm        pldl1keep, [%0, 448]          \n" ARGBTOARGB1555
      "st1         {v0.16b}, [%1], #16           \n"  // store 8 pixels
      "b.gt        1b                            \n"
      : "+r"(src_argb),      // %0
@ -1787,8 +1783,7 @@ void ARGBToARGB4444Row_NEON(const uint8_t* src_argb,
      "ld4         {v20.8b,v21.8b,v22.8b,v23.8b}, [%0], #32 \n"  // load 8
                                                                 // pixels
      "subs        %w2, %w2, #8                  \n"  // 8 processed per loop.
-      "prfm        pldl1keep, [%0, 448]          \n"
-      ARGBTOARGB4444
+      "prfm        pldl1keep, [%0, 448]          \n" ARGBTOARGB4444
      "st1         {v0.16b}, [%1], #16           \n"  // store 8 pixels
      "b.gt        1b                            \n"
      : "+r"(src_argb),      // %0
@ -1956,7 +1951,7 @@ void ARGBToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) {
      : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6");
 }

-void RGBAToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) {
+void RGBAToYJRow_NEON(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
  asm volatile(
      "movi        v4.8b, #29                    \n"  // B * 0.1140 coefficient
      "movi        v5.8b, #150                   \n"  // G * 0.5870 coefficient
@ -1971,7 +1966,7 @@ void RGBAToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) {
      "uqrshrn     v3.8b, v0.8h, #8              \n"  // 16 bit to 8 bit Y
      "st1         {v3.8b}, [%1], #8             \n"  // store 8 pixels Y.
      "b.gt        1b                            \n"
-      : "+r"(src_argb),  // %0
+      : "+r"(src_rgba),  // %0
        "+r"(dst_y),     // %1
        "+r"(width)      // %2
      :
@ -2668,8 +2663,8 @@ void RGB24ToYRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_y, int width) {
      "movi        v7.8b, #16                    \n"  // Add 16 constant
      "1:                                        \n"
      "ld3         {v0.8b,v1.8b,v2.8b}, [%0], #24 \n"  // load 8 pixels.
-      "subs        %w2, %w2, #8                  \n"  // 8 processed per loop.
-      "umull       v16.8h, v0.8b, v4.8b          \n"  // B
+      "subs        %w2, %w2, #8                  \n"   // 8 processed per loop.
+      "umull       v16.8h, v0.8b, v4.8b          \n"   // B
      "prfm        pldl1keep, [%0, 448]          \n"
      "umlal       v16.8h, v1.8b, v5.8b          \n"  // G
      "umlal       v16.8h, v2.8b, v6.8b          \n"  // R
@ -2692,8 +2687,8 @@ void RAWToYRow_NEON(const uint8_t* src_raw, uint8_t* dst_y, int width) {
      "movi        v7.8b, #16                    \n"  // Add 16 constant
      "1:                                        \n"
      "ld3         {v0.8b,v1.8b,v2.8b}, [%0], #24 \n"  // load 8 pixels.
-      "subs        %w2, %w2, #8                  \n"  // 8 processed per loop.
-      "umull       v16.8h, v0.8b, v4.8b          \n"  // B
+      "subs        %w2, %w2, #8                  \n"   // 8 processed per loop.
+      "umull       v16.8h, v0.8b, v4.8b          \n"   // B
      "prfm        pldl1keep, [%0, 448]          \n"
      "umlal       v16.8h, v1.8b, v5.8b          \n"  // G
      "umlal       v16.8h, v2.8b, v6.8b          \n"  // R
@ -2715,8 +2710,8 @@ void RGB24ToYJRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) {
      "movi        v6.8b, #77                    \n"  // R * 0.2990 coefficient
      "1:                                        \n"
      "ld3         {v0.8b,v1.8b,v2.8b}, [%0], #24 \n"  // load 8 pixels.
-      "subs        %w2, %w2, #8                  \n"  // 8 processed per loop.
-      "umull       v0.8h, v0.8b, v4.8b           \n"  // B
+      "subs        %w2, %w2, #8                  \n"   // 8 processed per loop.
+      "umull       v0.8h, v0.8b, v4.8b           \n"   // B
      "prfm        pldl1keep, [%0, 448]          \n"
      "umlal       v0.8h, v1.8b, v5.8b           \n"  // G
      "umlal       v0.8h, v2.8b, v6.8b           \n"  // R
@ -2737,8 +2732,8 @@ void RAWToYJRow_NEON(const uint8_t* src_raw, uint8_t* dst_yj, int width) {
      "movi        v4.8b, #77                    \n"  // R * 0.2990 coefficient
      "1:                                        \n"
      "ld3         {v0.8b,v1.8b,v2.8b}, [%0], #24 \n"  // load 8 pixels.
-      "subs        %w2, %w2, #8                  \n"  // 8 processed per loop.
-      "umull       v0.8h, v0.8b, v4.8b           \n"  // B
+      "subs        %w2, %w2, #8                  \n"   // 8 processed per loop.
+      "umull       v0.8h, v0.8b, v4.8b           \n"   // B
      "prfm        pldl1keep, [%0, 448]          \n"
      "umlal       v0.8h, v1.8b, v5.8b           \n"  // G
      "umlal       v0.8h, v2.8b, v6.8b           \n"  // R
@ -2818,7 +2813,7 @@ void InterpolateRow_NEON(uint8_t* dst_ptr,
 }

 // dr * (256 - sa) / 256 + sr = dr - dr * sa / 256 + sr
-void ARGBBlendRow_NEON(const uint8_t* src_argb0,
+void ARGBBlendRow_NEON(const uint8_t* src_argb,
                       const uint8_t* src_argb1,
                       uint8_t* dst_argb,
                       int width) {
@ -2880,7 +2875,7 @@ void ARGBBlendRow_NEON(const uint8_t* src_argb0,

      "99:                                       \n"

-      : "+r"(src_argb0),  // %0
+      : "+r"(src_argb),   // %0
        "+r"(src_argb1),  // %1
        "+r"(dst_argb),   // %2
        "+r"(width)       // %3
@ -2900,11 +2895,11 @@ void ARGBAttenuateRow_NEON(const uint8_t* src_argb,
      "subs        %w2, %w2, #8                  \n"  // 8 processed per loop.
      "umull       v4.8h, v0.8b, v3.8b           \n"  // b * a
      "prfm        pldl1keep, [%0, 448]          \n"
-      "umull       v5.8h, v1.8b, v3.8b           \n"  // g * a
-      "umull       v6.8h, v2.8b, v3.8b           \n"  // r * a
-      "uqrshrn     v0.8b, v4.8h, #8              \n"  // b >>= 8
-      "uqrshrn     v1.8b, v5.8h, #8              \n"  // g >>= 8
-      "uqrshrn     v2.8b, v6.8h, #8              \n"  // r >>= 8
+      "umull       v5.8h, v1.8b, v3.8b           \n"         // g * a
+      "umull       v6.8h, v2.8b, v3.8b           \n"         // r * a
+      "uqrshrn     v0.8b, v4.8h, #8              \n"         // b >>= 8
+      "uqrshrn     v1.8b, v5.8h, #8              \n"         // g >>= 8
+      "uqrshrn     v2.8b, v6.8h, #8              \n"         // r >>= 8
      "st4         {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n"  // store 8 ARGB
      "b.gt        1b                            \n"
      : "+r"(src_argb),  // %0
@ -2930,8 +2925,8 @@ void ARGBQuantizeRow_NEON(uint8_t* dst_argb,
      // 8 pixel loop.
      "1:                                        \n"
      "ld4         {v0.8b,v1.8b,v2.8b,v3.8b}, [%0] \n"  // load 8  ARGB.
-      "subs        %w1, %w1, #8                  \n"  // 8 processed per loop.
-      "uxtl        v0.8h, v0.8b                  \n"  // b (0 .. 255)
+      "subs        %w1, %w1, #8                  \n"    // 8 processed per loop.
+      "uxtl        v0.8h, v0.8b                  \n"    // b (0 .. 255)
      "prfm        pldl1keep, [%0, 448]          \n"
      "uxtl        v1.8h, v1.8b                  \n"
      "uxtl        v2.8h, v2.8b                  \n"
@ -3040,8 +3035,8 @@ void ARGBSepiaRow_NEON(uint8_t* dst_argb, int width) {
      "movi        v30.8b, #50                   \n"  // BR coefficient
      "1:                                        \n"
      "ld4         {v0.8b,v1.8b,v2.8b,v3.8b}, [%0] \n"  // load 8 ARGB pixels.
-      "subs        %w1, %w1, #8                  \n"  // 8 processed per loop.
-      "umull       v4.8h, v0.8b, v20.8b          \n"  // B to Sepia B
+      "subs        %w1, %w1, #8                  \n"    // 8 processed per loop.
+      "umull       v4.8h, v0.8b, v20.8b          \n"    // B to Sepia B
      "prfm        pldl1keep, [%0, 448]          \n"
      "umlal       v4.8h, v1.8b, v21.8b          \n"  // G
      "umlal       v4.8h, v2.8b, v22.8b          \n"  // R
@ -3127,7 +3122,7 @@ void ARGBColorMatrixRow_NEON(const uint8_t* src_argb,

 // TODO(fbarchard): fix vqshrun in ARGBMultiplyRow_NEON and reenable.
 // Multiply 2 rows of ARGB pixels together, 8 pixels at a time.
-void ARGBMultiplyRow_NEON(const uint8_t* src_argb0,
+void ARGBMultiplyRow_NEON(const uint8_t* src_argb,
                          const uint8_t* src_argb1,
                          uint8_t* dst_argb,
                          int width) {
@ -3149,7 +3144,7 @@ void ARGBMultiplyRow_NEON(const uint8_t* src_argb0,
      "rshrn       v3.8b, v3.8h, #8              \n"  // 16 bit to 8 bit A
      "st4         {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n"  // store 8 ARGB
      "b.gt        1b                            \n"
-      : "+r"(src_argb0),  // %0
+      : "+r"(src_argb),   // %0
        "+r"(src_argb1),  // %1
        "+r"(dst_argb),   // %2
        "+r"(width)       // %3
@ -3158,7 +3153,7 @@ void ARGBMultiplyRow_NEON(const uint8_t* src_argb0,
 }

 // Add 2 rows of ARGB pixels together, 8 pixels at a time.
-void ARGBAddRow_NEON(const uint8_t* src_argb0,
+void ARGBAddRow_NEON(const uint8_t* src_argb,
                     const uint8_t* src_argb1,
                     uint8_t* dst_argb,
                     int width) {
@ -3176,7 +3171,7 @@ void ARGBAddRow_NEON(const uint8_t* src_argb0,
      "uqadd       v3.8b, v3.8b, v7.8b           \n"
      "st4         {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n"  // store 8 ARGB
      "b.gt        1b                            \n"
-      : "+r"(src_argb0),  // %0
+      : "+r"(src_argb),   // %0
        "+r"(src_argb1),  // %1
        "+r"(dst_argb),   // %2
        "+r"(width)       // %3
@ -3185,7 +3180,7 @@ void ARGBAddRow_NEON(const uint8_t* src_argb0,
 }

 // Subtract 2 rows of ARGB pixels, 8 pixels at a time.
-void ARGBSubtractRow_NEON(const uint8_t* src_argb0,
+void ARGBSubtractRow_NEON(const uint8_t* src_argb,
                          const uint8_t* src_argb1,
                          uint8_t* dst_argb,
                          int width) {
@ -3203,7 +3198,7 @@ void ARGBSubtractRow_NEON(const uint8_t* src_argb0,
      "uqsub       v3.8b, v3.8b, v7.8b           \n"
      "st4         {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n"  // store 8 ARGB
      "b.gt        1b                            \n"
-      : "+r"(src_argb0),  // %0
+      : "+r"(src_argb),   // %0
        "+r"(src_argb1),  // %1
        "+r"(dst_argb),   // %2
        "+r"(width)       // %3
@ -3703,9 +3698,9 @@ void NV21ToYUV24Row_NEON(const uint8_t* src_y,
      "1:                                        \n"
      "ld1         {v2.16b}, [%0], #16           \n"  // load 16 Y values
      "ld2         {v0.8b, v1.8b}, [%1], #16     \n"  // load 8 VU values
-      "zip1        v0.16b, v0.16b, v0.16b        \n"      // replicate V values
+      "zip1        v0.16b, v0.16b, v0.16b        \n"  // replicate V values
      "prfm        pldl1keep, [%0, 448]          \n"
-      "zip1        v1.16b, v1.16b, v1.16b        \n"      // replicate U values
+      "zip1        v1.16b, v1.16b, v1.16b        \n"  // replicate U values
      "prfm        pldl1keep, [%1, 448]          \n"
      "subs        %w3, %w3, #16                 \n"      // 16 pixels per loop
      "st3         {v0.16b,v1.16b,v2.16b}, [%2], #48 \n"  // store 16 YUV pixels
--- a/source/row_win.cc
+++ b/source/row_win.cc
@ -1427,7 +1427,7 @@ __declspec(naked) void RGBAToYRow_SSSE3(const uint8_t* src_argb,
  }
 }

-__declspec(naked) void ARGBToUVRow_SSSE3(const uint8_t* src_argb0,
+__declspec(naked) void ARGBToUVRow_SSSE3(const uint8_t* src_argb,
                                         int src_stride_argb,
                                         uint8_t* dst_u,
                                         uint8_t* dst_v,
@ -1499,7 +1499,7 @@ __declspec(naked) void ARGBToUVRow_SSSE3(const uint8_t* src_argb0,
  }
 }

-__declspec(naked) void ARGBToUVJRow_SSSE3(const uint8_t* src_argb0,
+__declspec(naked) void ARGBToUVJRow_SSSE3(const uint8_t* src_argb,
                                          int src_stride_argb,
                                          uint8_t* dst_u,
                                          uint8_t* dst_v,
@ -1573,7 +1573,7 @@ __declspec(naked) void ARGBToUVJRow_SSSE3(const uint8_t* src_argb0,
 }

 #ifdef HAS_ARGBTOUVROW_AVX2
-__declspec(naked) void ARGBToUVRow_AVX2(const uint8_t* src_argb0,
+__declspec(naked) void ARGBToUVRow_AVX2(const uint8_t* src_argb,
                                        int src_stride_argb,
                                        uint8_t* dst_u,
                                        uint8_t* dst_v,
@ -1641,7 +1641,7 @@ __declspec(naked) void ARGBToUVRow_AVX2(const uint8_t* src_argb0,
 #endif  // HAS_ARGBTOUVROW_AVX2

 #ifdef HAS_ARGBTOUVJROW_AVX2
-__declspec(naked) void ARGBToUVJRow_AVX2(const uint8_t* src_argb0,
+__declspec(naked) void ARGBToUVJRow_AVX2(const uint8_t* src_argb,
                                         int src_stride_argb,
                                         uint8_t* dst_u,
                                         uint8_t* dst_v,
@ -1709,7 +1709,7 @@ __declspec(naked) void ARGBToUVJRow_AVX2(const uint8_t* src_argb0,
 }
 #endif  // HAS_ARGBTOUVJROW_AVX2

-__declspec(naked) void ARGBToUV444Row_SSSE3(const uint8_t* src_argb0,
+__declspec(naked) void ARGBToUV444Row_SSSE3(const uint8_t* src_argb,
                                            uint8_t* dst_u,
                                            uint8_t* dst_v,
                                            int width) {
@ -1767,7 +1767,7 @@ __declspec(naked) void ARGBToUV444Row_SSSE3(const uint8_t* src_argb0,
  }
 }

-__declspec(naked) void BGRAToUVRow_SSSE3(const uint8_t* src_argb0,
+__declspec(naked) void BGRAToUVRow_SSSE3(const uint8_t* src_argb,
                                         int src_stride_argb,
                                         uint8_t* dst_u,
                                         uint8_t* dst_v,
@ -1839,7 +1839,7 @@ __declspec(naked) void BGRAToUVRow_SSSE3(const uint8_t* src_argb0,
  }
 }

-__declspec(naked) void ABGRToUVRow_SSSE3(const uint8_t* src_argb0,
+__declspec(naked) void ABGRToUVRow_SSSE3(const uint8_t* src_argb,
                                         int src_stride_argb,
                                         uint8_t* dst_u,
                                         uint8_t* dst_v,
@ -1911,7 +1911,7 @@ __declspec(naked) void ABGRToUVRow_SSSE3(const uint8_t* src_argb0,
  }
 }

-__declspec(naked) void RGBAToUVRow_SSSE3(const uint8_t* src_argb0,
+__declspec(naked) void RGBAToUVRow_SSSE3(const uint8_t* src_argb,
                                         int src_stride_argb,
                                         uint8_t* dst_u,
                                         uint8_t* dst_v,
@ -4347,13 +4347,13 @@ static const uvec8 kShuffleAlpha = {3u,  0x80, 3u,  0x80, 7u,  0x80, 7u,  0x80,
                                    11u, 0x80, 11u, 0x80, 15u, 0x80, 15u, 0x80};

 // Blend 8 pixels at a time.
-__declspec(naked) void ARGBBlendRow_SSSE3(const uint8_t* src_argb0,
+__declspec(naked) void ARGBBlendRow_SSSE3(const uint8_t* src_argb,
                                          const uint8_t* src_argb1,
                                          uint8_t* dst_argb,
                                          int width) {
  __asm {
    push       esi
-    mov        eax, [esp + 4 + 4]  // src_argb0
+    mov        eax, [esp + 4 + 4]  // src_argb
    mov        esi, [esp + 4 + 8]  // src_argb1
    mov        edx, [esp + 4 + 12]  // dst_argb
    mov        ecx, [esp + 4 + 16]  // width
@ -4442,7 +4442,7 @@ __declspec(naked) void ARGBAttenuateRow_SSSE3(const uint8_t* src_argb,
                                              uint8_t* dst_argb,
                                              int width) {
  __asm {
-    mov        eax, [esp + 4]  // src_argb0
+    mov        eax, [esp + 4]  // src_argb
    mov        edx, [esp + 8]  // dst_argb
    mov        ecx, [esp + 12]  // width
    pcmpeqb    xmm3, xmm3  // generate mask 0xff000000
@ -4487,7 +4487,7 @@ __declspec(naked) void ARGBAttenuateRow_AVX2(const uint8_t* src_argb,
                                             uint8_t* dst_argb,
                                             int width) {
  __asm {
-    mov        eax, [esp + 4]  // src_argb0
+    mov        eax, [esp + 4]  // src_argb
    mov        edx, [esp + 8]  // dst_argb
    mov        ecx, [esp + 12]  // width
    sub        edx, eax
@ -4581,7 +4581,7 @@ __declspec(naked) void ARGBUnattenuateRow_AVX2(const uint8_t* src_argb,
                                               uint8_t* dst_argb,
                                               int width) {
  __asm {
-    mov        eax, [esp + 4]  // src_argb0
+    mov        eax, [esp + 4]  // src_argb
    mov        edx, [esp + 8]  // dst_argb
    mov        ecx, [esp + 12]  // width
    sub        edx, eax
@ -4937,20 +4937,20 @@ __declspec(naked) void ARGBShadeRow_SSE2(const uint8_t* src_argb,

 #ifdef HAS_ARGBMULTIPLYROW_SSE2
 // Multiply 2 rows of ARGB pixels together, 4 pixels at a time.
-__declspec(naked) void ARGBMultiplyRow_SSE2(const uint8_t* src_argb0,
+__declspec(naked) void ARGBMultiplyRow_SSE2(const uint8_t* src_argb,
                                            const uint8_t* src_argb1,
                                            uint8_t* dst_argb,
                                            int width) {
  __asm {
    push       esi
-    mov        eax, [esp + 4 + 4]  // src_argb0
+    mov        eax, [esp + 4 + 4]  // src_argb
    mov        esi, [esp + 4 + 8]  // src_argb1
    mov        edx, [esp + 4 + 12]  // dst_argb
    mov        ecx, [esp + 4 + 16]  // width
    pxor       xmm5, xmm5  // constant 0

 convertloop:
-    movdqu     xmm0, [eax]  // read 4 pixels from src_argb0
+    movdqu     xmm0, [eax]  // read 4 pixels from src_argb
    movdqu     xmm2, [esi]  // read 4 pixels from src_argb1
    movdqu     xmm1, xmm0
    movdqu     xmm3, xmm2
@ -4958,8 +4958,8 @@ __declspec(naked) void ARGBMultiplyRow_SSE2(const uint8_t* src_argb0,
    punpckhbw  xmm1, xmm1  // next 2
    punpcklbw  xmm2, xmm5  // first 2
    punpckhbw  xmm3, xmm5  // next 2
-    pmulhuw    xmm0, xmm2  // src_argb0 * src_argb1 first 2
-    pmulhuw    xmm1, xmm3  // src_argb0 * src_argb1 next 2
+    pmulhuw    xmm0, xmm2  // src_argb * src_argb1 first 2
+    pmulhuw    xmm1, xmm3  // src_argb * src_argb1 next 2
    lea        eax, [eax + 16]
    lea        esi, [esi + 16]
    packuswb   xmm0, xmm1
@ -4977,13 +4977,13 @@ __declspec(naked) void ARGBMultiplyRow_SSE2(const uint8_t* src_argb0,
 #ifdef HAS_ARGBADDROW_SSE2
 // Add 2 rows of ARGB pixels together, 4 pixels at a time.
 // TODO(fbarchard): Port this to posix, neon and other math functions.
-__declspec(naked) void ARGBAddRow_SSE2(const uint8_t* src_argb0,
+__declspec(naked) void ARGBAddRow_SSE2(const uint8_t* src_argb,
                                       const uint8_t* src_argb1,
                                       uint8_t* dst_argb,
                                       int width) {
  __asm {
    push       esi
-    mov        eax, [esp + 4 + 4]  // src_argb0
+    mov        eax, [esp + 4 + 4]  // src_argb
    mov        esi, [esp + 4 + 8]  // src_argb1
    mov        edx, [esp + 4 + 12]  // dst_argb
    mov        ecx, [esp + 4 + 16]  // width
@ -4992,11 +4992,11 @@ __declspec(naked) void ARGBAddRow_SSE2(const uint8_t* src_argb0,
    jl         convertloop49

 convertloop4:
-    movdqu     xmm0, [eax]  // read 4 pixels from src_argb0
+    movdqu     xmm0, [eax]  // read 4 pixels from src_argb
    lea        eax, [eax + 16]
    movdqu     xmm1, [esi]  // read 4 pixels from src_argb1
    lea        esi, [esi + 16]
-    paddusb    xmm0, xmm1  // src_argb0 + src_argb1
+    paddusb    xmm0, xmm1  // src_argb + src_argb1
    movdqu     [edx], xmm0
    lea        edx, [edx + 16]
    sub        ecx, 4
@ -5007,11 +5007,11 @@ __declspec(naked) void ARGBAddRow_SSE2(const uint8_t* src_argb0,
    jl         convertloop19

 convertloop1:
-    movd       xmm0, [eax]  // read 1 pixels from src_argb0
+    movd       xmm0, [eax]  // read 1 pixels from src_argb
    lea        eax, [eax + 4]
    movd       xmm1, [esi]  // read 1 pixels from src_argb1
    lea        esi, [esi + 4]
-    paddusb    xmm0, xmm1  // src_argb0 + src_argb1
+    paddusb    xmm0, xmm1  // src_argb + src_argb1
    movd       [edx], xmm0
    lea        edx, [edx + 4]
    sub        ecx, 1
@ -5026,23 +5026,23 @@ __declspec(naked) void ARGBAddRow_SSE2(const uint8_t* src_argb0,

 #ifdef HAS_ARGBSUBTRACTROW_SSE2
 // Subtract 2 rows of ARGB pixels together, 4 pixels at a time.
-__declspec(naked) void ARGBSubtractRow_SSE2(const uint8_t* src_argb0,
+__declspec(naked) void ARGBSubtractRow_SSE2(const uint8_t* src_argb,
                                            const uint8_t* src_argb1,
                                            uint8_t* dst_argb,
                                            int width) {
  __asm {
    push       esi
-    mov        eax, [esp + 4 + 4]  // src_argb0
+    mov        eax, [esp + 4 + 4]  // src_argb
    mov        esi, [esp + 4 + 8]  // src_argb1
    mov        edx, [esp + 4 + 12]  // dst_argb
    mov        ecx, [esp + 4 + 16]  // width

 convertloop:
-    movdqu     xmm0, [eax]  // read 4 pixels from src_argb0
+    movdqu     xmm0, [eax]  // read 4 pixels from src_argb
    lea        eax, [eax + 16]
    movdqu     xmm1, [esi]  // read 4 pixels from src_argb1
    lea        esi, [esi + 16]
-    psubusb    xmm0, xmm1  // src_argb0 - src_argb1
+    psubusb    xmm0, xmm1  // src_argb - src_argb1
    movdqu     [edx], xmm0
    lea        edx, [edx + 16]
    sub        ecx, 4
@ -5056,20 +5056,20 @@ __declspec(naked) void ARGBSubtractRow_SSE2(const uint8_t* src_argb0,

 #ifdef HAS_ARGBMULTIPLYROW_AVX2
 // Multiply 2 rows of ARGB pixels together, 8 pixels at a time.
-__declspec(naked) void ARGBMultiplyRow_AVX2(const uint8_t* src_argb0,
+__declspec(naked) void ARGBMultiplyRow_AVX2(const uint8_t* src_argb,
                                            const uint8_t* src_argb1,
                                            uint8_t* dst_argb,
                                            int width) {
  __asm {
    push       esi
-    mov        eax, [esp + 4 + 4]  // src_argb0
+    mov        eax, [esp + 4 + 4]  // src_argb
    mov        esi, [esp + 4 + 8]  // src_argb1
    mov        edx, [esp + 4 + 12]  // dst_argb
    mov        ecx, [esp + 4 + 16]  // width
    vpxor      ymm5, ymm5, ymm5  // constant 0

 convertloop:
-    vmovdqu    ymm1, [eax]  // read 8 pixels from src_argb0
+    vmovdqu    ymm1, [eax]  // read 8 pixels from src_argb
    lea        eax, [eax + 32]
    vmovdqu    ymm3, [esi]  // read 8 pixels from src_argb1
    lea        esi, [esi + 32]
@ -5077,8 +5077,8 @@ __declspec(naked) void ARGBMultiplyRow_AVX2(const uint8_t* src_argb0,
    vpunpckhbw ymm1, ymm1, ymm1  // high 4
    vpunpcklbw ymm2, ymm3, ymm5  // low 4
    vpunpckhbw ymm3, ymm3, ymm5  // high 4
-    vpmulhuw   ymm0, ymm0, ymm2  // src_argb0 * src_argb1 low 4
-    vpmulhuw   ymm1, ymm1, ymm3  // src_argb0 * src_argb1 high 4
+    vpmulhuw   ymm0, ymm0, ymm2  // src_argb * src_argb1 low 4
+    vpmulhuw   ymm1, ymm1, ymm3  // src_argb * src_argb1 high 4
    vpackuswb  ymm0, ymm0, ymm1
    vmovdqu    [edx], ymm0
    lea        edx, [edx + 32]
@ -5094,19 +5094,19 @@ __declspec(naked) void ARGBMultiplyRow_AVX2(const uint8_t* src_argb0,

 #ifdef HAS_ARGBADDROW_AVX2
 // Add 2 rows of ARGB pixels together, 8 pixels at a time.
-__declspec(naked) void ARGBAddRow_AVX2(const uint8_t* src_argb0,
+__declspec(naked) void ARGBAddRow_AVX2(const uint8_t* src_argb,
                                       const uint8_t* src_argb1,
                                       uint8_t* dst_argb,
                                       int width) {
  __asm {
    push       esi
-    mov        eax, [esp + 4 + 4]  // src_argb0
+    mov        eax, [esp + 4 + 4]  // src_argb
    mov        esi, [esp + 4 + 8]  // src_argb1
    mov        edx, [esp + 4 + 12]  // dst_argb
    mov        ecx, [esp + 4 + 16]  // width

 convertloop:
-    vmovdqu    ymm0, [eax]  // read 8 pixels from src_argb0
+    vmovdqu    ymm0, [eax]  // read 8 pixels from src_argb
    lea        eax, [eax + 32]
    vpaddusb   ymm0, ymm0, [esi]  // add 8 pixels from src_argb1
    lea        esi, [esi + 32]
@ -5124,21 +5124,21 @@ __declspec(naked) void ARGBAddRow_AVX2(const uint8_t* src_argb0,

 #ifdef HAS_ARGBSUBTRACTROW_AVX2
 // Subtract 2 rows of ARGB pixels together, 8 pixels at a time.
-__declspec(naked) void ARGBSubtractRow_AVX2(const uint8_t* src_argb0,
+__declspec(naked) void ARGBSubtractRow_AVX2(const uint8_t* src_argb,
                                            const uint8_t* src_argb1,
                                            uint8_t* dst_argb,
                                            int width) {
  __asm {
    push       esi
-    mov        eax, [esp + 4 + 4]  // src_argb0
+    mov        eax, [esp + 4 + 4]  // src_argb
    mov        esi, [esp + 4 + 8]  // src_argb1
    mov        edx, [esp + 4 + 12]  // dst_argb
    mov        ecx, [esp + 4 + 16]  // width

 convertloop:
-    vmovdqu    ymm0, [eax]  // read 8 pixels from src_argb0
+    vmovdqu    ymm0, [eax]  // read 8 pixels from src_argb
    lea        eax, [eax + 32]
-    vpsubusb   ymm0, ymm0, [esi]  // src_argb0 - src_argb1
+    vpsubusb   ymm0, ymm0, [esi]  // src_argb - src_argb1
    lea        esi, [esi + 32]
    vmovdqu    [edx], ymm0
    lea        edx, [edx + 32]
--- a/source/scale.cc
+++ b/source/scale.cc
@ -1446,7 +1446,8 @@ void ScalePlaneUp2_Bilinear(int src_width,
  for (x = 0; x < src_height - 1; ++x) {
    Scale2RowUp(src_ptr, src_stride, dst_ptr, dst_stride, dst_width);
    src_ptr += src_stride;
-    // TODO: Test performance of writing one row of destination at a time.
+    // TODO(fbarchard): Test performance of writing one row of destination at a
+    // time.
    dst_ptr += 2 * dst_stride;
  }
  if (!(dst_height & 1)) {
--- a/source/scale_uv.cc
+++ b/source/scale_uv.cc
@ -746,7 +746,8 @@ void ScaleUVBilinearUp2(int src_width,
  for (x = 0; x < src_height - 1; ++x) {
    Scale2RowUp(src_ptr, src_stride, dst_ptr, dst_stride, dst_width);
    src_ptr += src_stride;
-    // TODO: Test performance of writing one row of destination at a time.
+    // TODO(fbarchard): Test performance of writing one row of destination at a
+    // time.
    dst_ptr += 2 * dst_stride;
  }
  if (!(dst_height & 1)) {
@ -851,7 +852,8 @@ void ScaleUVBilinearUp2_16(int src_width,
  for (x = 0; x < src_height - 1; ++x) {
    Scale2RowUp(src_ptr, src_stride, dst_ptr, dst_stride, dst_width);
    src_ptr += src_stride;
-    // TODO: Test performance of writing one row of destination at a time.
+    // TODO(fbarchard): Test performance of writing one row of destination at a
+    // time.
    dst_ptr += 2 * dst_stride;
  }
  if (!(dst_height & 1)) {
--- a/unit_test/planar_test.cc
+++ b/unit_test/planar_test.cc
@ -2404,8 +2404,7 @@ TEST_F(LibYUVPlanarTest, TestARGBCopyAlpha) {
 }

 TEST_F(LibYUVPlanarTest, TestARGBExtractAlpha) {
-  // Round count up to multiple of 16
-  const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
+  const int kPixels = benchmark_width_ * benchmark_height_;
  align_buffer_page_end(src_pixels, kPixels * 4);
  align_buffer_page_end(dst_pixels_opt, kPixels);
  align_buffer_page_end(dst_pixels_c, kPixels);
@ -2433,8 +2432,7 @@ TEST_F(LibYUVPlanarTest, TestARGBExtractAlpha) {
 }

 TEST_F(LibYUVPlanarTest, TestARGBCopyYToAlpha) {
-  // Round count up to multiple of 16
-  const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
+  const int kPixels = benchmark_width_ * benchmark_height_;
  align_buffer_page_end(orig_pixels, kPixels);
  align_buffer_page_end(dst_pixels_opt, kPixels * 4);
  align_buffer_page_end(dst_pixels_c, kPixels * 4);
@ -2567,35 +2565,25 @@ TEST_F(LibYUVPlanarTest, SetPlane_Opt) {
 }

 TEST_F(LibYUVPlanarTest, MergeUVPlane_Opt) {
-  // Round count up to multiple of 16
-  const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
-  align_buffer_page_end(src_pixels, kPixels * 2);
-  align_buffer_page_end(tmp_pixels_u, kPixels);
-  align_buffer_page_end(tmp_pixels_v, kPixels);
+  const int kPixels = benchmark_width_ * benchmark_height_;
+  align_buffer_page_end(src_pixels_u, kPixels);
+  align_buffer_page_end(src_pixels_v, kPixels);
  align_buffer_page_end(dst_pixels_opt, kPixels * 2);
  align_buffer_page_end(dst_pixels_c, kPixels * 2);

-  MemRandomize(src_pixels, kPixels * 2);
-  MemRandomize(tmp_pixels_u, kPixels);
-  MemRandomize(tmp_pixels_v, kPixels);
+  MemRandomize(src_pixels_u, kPixels);
+  MemRandomize(src_pixels_v, kPixels);
  MemRandomize(dst_pixels_opt, kPixels * 2);
  MemRandomize(dst_pixels_c, kPixels * 2);

  MaskCpuFlags(disable_cpu_flags_);
-  SplitUVPlane(src_pixels, benchmark_width_ * 2, tmp_pixels_u, benchmark_width_,
-               tmp_pixels_v, benchmark_width_, benchmark_width_,
-               benchmark_height_);
-  MergeUVPlane(tmp_pixels_u, benchmark_width_, tmp_pixels_v, benchmark_width_,
+  MergeUVPlane(src_pixels_u, benchmark_width_, src_pixels_v, benchmark_width_,
               dst_pixels_c, benchmark_width_ * 2, benchmark_width_,
               benchmark_height_);
  MaskCpuFlags(benchmark_cpu_info_);

-  SplitUVPlane(src_pixels, benchmark_width_ * 2, tmp_pixels_u, benchmark_width_,
-               tmp_pixels_v, benchmark_width_, benchmark_width_,
-               benchmark_height_);
-
  for (int i = 0; i < benchmark_iterations_; ++i) {
-    MergeUVPlane(tmp_pixels_u, benchmark_width_, tmp_pixels_v, benchmark_width_,
+    MergeUVPlane(src_pixels_u, benchmark_width_, src_pixels_v, benchmark_width_,
                 dst_pixels_opt, benchmark_width_ * 2, benchmark_width_,
                 benchmark_height_);
  }
@ -2604,119 +2592,88 @@ TEST_F(LibYUVPlanarTest, MergeUVPlane_Opt) {
    EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
  }

-  free_aligned_buffer_page_end(src_pixels);
-  free_aligned_buffer_page_end(tmp_pixels_u);
-  free_aligned_buffer_page_end(tmp_pixels_v);
+  free_aligned_buffer_page_end(src_pixels_u);
+  free_aligned_buffer_page_end(src_pixels_v);
  free_aligned_buffer_page_end(dst_pixels_opt);
  free_aligned_buffer_page_end(dst_pixels_c);
 }

 // 16 bit channel split and merge
 TEST_F(LibYUVPlanarTest, MergeUVPlane_16_Opt) {
-  // Round count up to multiple of 16
-  const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
-  align_buffer_page_end(src_pixels, kPixels * 2 * 2);
-  align_buffer_page_end(tmp_pixels_u_c, kPixels * 2);
-  align_buffer_page_end(tmp_pixels_v_c, kPixels * 2);
-  align_buffer_page_end(tmp_pixels_u_opt, kPixels * 2);
-  align_buffer_page_end(tmp_pixels_v_opt, kPixels * 2);
+  const int kPixels = benchmark_width_ * benchmark_height_;
+  align_buffer_page_end(src_pixels_u, kPixels * 2);
+  align_buffer_page_end(src_pixels_v, kPixels * 2);
  align_buffer_page_end(dst_pixels_opt, kPixels * 2 * 2);
  align_buffer_page_end(dst_pixels_c, kPixels * 2 * 2);
-  MemRandomize(src_pixels, kPixels * 2 * 2);
-  MemRandomize(tmp_pixels_u_c, kPixels * 2);
-  MemRandomize(tmp_pixels_v_c, kPixels * 2);
-  MemRandomize(tmp_pixels_u_opt, kPixels * 2);
-  MemRandomize(tmp_pixels_v_opt, kPixels * 2);
+  MemRandomize(src_pixels_u, kPixels * 2);
+  MemRandomize(src_pixels_v, kPixels * 2);
  MemRandomize(dst_pixels_opt, kPixels * 2 * 2);
  MemRandomize(dst_pixels_c, kPixels * 2 * 2);

  MaskCpuFlags(disable_cpu_flags_);
-  SplitUVPlane_16((const uint16_t*)src_pixels, benchmark_width_ * 2,
-                  (uint16_t*)tmp_pixels_u_c, benchmark_width_,
-                  (uint16_t*)tmp_pixels_v_c, benchmark_width_, benchmark_width_,
-                  benchmark_height_, 12);
-  MergeUVPlane_16((const uint16_t*)tmp_pixels_u_c, benchmark_width_,
-                  (const uint16_t*)tmp_pixels_v_c, benchmark_width_,
+  MergeUVPlane_16((const uint16_t*)src_pixels_u, benchmark_width_,
+                  (const uint16_t*)src_pixels_v, benchmark_width_,
                  (uint16_t*)dst_pixels_c, benchmark_width_ * 2,
                  benchmark_width_, benchmark_height_, 12);
  MaskCpuFlags(benchmark_cpu_info_);

-  SplitUVPlane_16((const uint16_t*)src_pixels, benchmark_width_ * 2,
-                  (uint16_t*)tmp_pixels_u_opt, benchmark_width_,
-                  (uint16_t*)tmp_pixels_v_opt, benchmark_width_,
-                  benchmark_width_, benchmark_height_, 12);
-
  for (int i = 0; i < benchmark_iterations_; ++i) {
-    MergeUVPlane_16((const uint16_t*)tmp_pixels_u_opt, benchmark_width_,
-                    (const uint16_t*)tmp_pixels_v_opt, benchmark_width_,
+    MergeUVPlane_16((const uint16_t*)src_pixels_u, benchmark_width_,
+                    (const uint16_t*)src_pixels_v, benchmark_width_,
                    (uint16_t*)dst_pixels_opt, benchmark_width_ * 2,
                    benchmark_width_, benchmark_height_, 12);
  }

-  for (int i = 0; i < kPixels * 2; ++i) {
-    EXPECT_EQ(tmp_pixels_u_c[i], tmp_pixels_u_opt[i]);
-    EXPECT_EQ(tmp_pixels_v_c[i], tmp_pixels_v_opt[i]);
-  }
  for (int i = 0; i < kPixels * 2 * 2; ++i) {
    EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
  }
-  free_aligned_buffer_page_end(src_pixels);
-  free_aligned_buffer_page_end(tmp_pixels_u_c);
-  free_aligned_buffer_page_end(tmp_pixels_v_c);
-  free_aligned_buffer_page_end(tmp_pixels_u_opt);
-  free_aligned_buffer_page_end(tmp_pixels_v_opt);
+  free_aligned_buffer_page_end(src_pixels_u);
+  free_aligned_buffer_page_end(src_pixels_v);
  free_aligned_buffer_page_end(dst_pixels_opt);
  free_aligned_buffer_page_end(dst_pixels_c);
 }

 TEST_F(LibYUVPlanarTest, SplitUVPlane_Opt) {
-  // Round count up to multiple of 16
-  const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
+  const int kPixels = benchmark_width_ * benchmark_height_;
  align_buffer_page_end(src_pixels, kPixels * 2);
-  align_buffer_page_end(tmp_pixels_u, kPixels);
-  align_buffer_page_end(tmp_pixels_v, kPixels);
-  align_buffer_page_end(dst_pixels_opt, kPixels * 2);
-  align_buffer_page_end(dst_pixels_c, kPixels * 2);
+  align_buffer_page_end(dst_pixels_u_c, kPixels);
+  align_buffer_page_end(dst_pixels_v_c, kPixels);
+  align_buffer_page_end(dst_pixels_u_opt, kPixels);
+  align_buffer_page_end(dst_pixels_v_opt, kPixels);

  MemRandomize(src_pixels, kPixels * 2);
-  MemRandomize(tmp_pixels_u, kPixels);
-  MemRandomize(tmp_pixels_v, kPixels);
-  MemRandomize(dst_pixels_opt, kPixels * 2);
-  MemRandomize(dst_pixels_c, kPixels * 2);
+  MemRandomize(dst_pixels_u_c, kPixels);
+  MemRandomize(dst_pixels_v_c, kPixels);
+  MemRandomize(dst_pixels_u_opt, kPixels);
+  MemRandomize(dst_pixels_v_opt, kPixels);

  MaskCpuFlags(disable_cpu_flags_);
-  SplitUVPlane(src_pixels, benchmark_width_ * 2, tmp_pixels_u, benchmark_width_,
-               tmp_pixels_v, benchmark_width_, benchmark_width_,
-               benchmark_height_);
-  MergeUVPlane(tmp_pixels_u, benchmark_width_, tmp_pixels_v, benchmark_width_,
-               dst_pixels_c, benchmark_width_ * 2, benchmark_width_,
-               benchmark_height_);
+  SplitUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_u_c,
+               benchmark_width_, dst_pixels_v_c, benchmark_width_,
+               benchmark_width_, benchmark_height_);
  MaskCpuFlags(benchmark_cpu_info_);

  for (int i = 0; i < benchmark_iterations_; ++i) {
-    SplitUVPlane(src_pixels, benchmark_width_ * 2, tmp_pixels_u,
-                 benchmark_width_, tmp_pixels_v, benchmark_width_,
+    SplitUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_u_opt,
+                 benchmark_width_, dst_pixels_v_opt, benchmark_width_,
                 benchmark_width_, benchmark_height_);
  }
-  MergeUVPlane(tmp_pixels_u, benchmark_width_, tmp_pixels_v, benchmark_width_,
-               dst_pixels_opt, benchmark_width_ * 2, benchmark_width_,
-               benchmark_height_);

-  for (int i = 0; i < kPixels * 2; ++i) {
-    EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
+  for (int i = 0; i < kPixels; ++i) {
+    EXPECT_EQ(dst_pixels_u_c[i], dst_pixels_u_opt[i]);
+    EXPECT_EQ(dst_pixels_v_c[i], dst_pixels_v_opt[i]);
  }

  free_aligned_buffer_page_end(src_pixels);
-  free_aligned_buffer_page_end(tmp_pixels_u);
-  free_aligned_buffer_page_end(tmp_pixels_v);
-  free_aligned_buffer_page_end(dst_pixels_opt);
-  free_aligned_buffer_page_end(dst_pixels_c);
+  free_aligned_buffer_page_end(dst_pixels_u_c);
+  free_aligned_buffer_page_end(dst_pixels_v_c);
+  free_aligned_buffer_page_end(dst_pixels_u_opt);
+  free_aligned_buffer_page_end(dst_pixels_v_opt);
 }

 // 16 bit channel split
 TEST_F(LibYUVPlanarTest, SplitUVPlane_16_Opt) {
-  // Round count up to multiple of 16
-  const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
+  const int kPixels = benchmark_width_ * benchmark_height_;
  align_buffer_page_end(src_pixels, kPixels * 2 * 2);
  align_buffer_page_end(dst_pixels_u_c, kPixels * 2);
  align_buffer_page_end(dst_pixels_v_c, kPixels * 2);
@ -2755,7 +2712,7 @@ TEST_F(LibYUVPlanarTest, SplitUVPlane_16_Opt) {

 TEST_F(LibYUVPlanarTest, SwapUVPlane_Opt) {
  // Round count up to multiple of 16
-  const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
+  const int kPixels = benchmark_width_ * benchmark_height_;
  align_buffer_page_end(src_pixels, kPixels * 2);
  align_buffer_page_end(dst_pixels_opt, kPixels * 2);
  align_buffer_page_end(dst_pixels_c, kPixels * 2);
@ -2785,7 +2742,7 @@ TEST_F(LibYUVPlanarTest, SwapUVPlane_Opt) {

 TEST_F(LibYUVPlanarTest, MergeRGBPlane_Opt) {
  // Round count up to multiple of 16
-  const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
+  const int kPixels = benchmark_width_ * benchmark_height_;
  align_buffer_page_end(src_pixels, kPixels * 3);
  align_buffer_page_end(tmp_pixels_r, kPixels);
  align_buffer_page_end(tmp_pixels_g, kPixels);
@ -2834,7 +2791,7 @@ TEST_F(LibYUVPlanarTest, MergeRGBPlane_Opt) {

 TEST_F(LibYUVPlanarTest, SplitRGBPlane_Opt) {
  // Round count up to multiple of 16
-  const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
+  const int kPixels = benchmark_width_ * benchmark_height_;
  align_buffer_page_end(src_pixels, kPixels * 3);
  align_buffer_page_end(tmp_pixels_r, kPixels);
  align_buffer_page_end(tmp_pixels_g, kPixels);
@ -2881,8 +2838,7 @@ TEST_F(LibYUVPlanarTest, SplitRGBPlane_Opt) {
 }

 TEST_F(LibYUVPlanarTest, MergeARGBPlane_Opt) {
-  // Round count up to multiple of 16
-  const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
+  const int kPixels = benchmark_width_ * benchmark_height_;
  align_buffer_page_end(src_pixels, kPixels * 4);
  align_buffer_page_end(tmp_pixels_r, kPixels);
  align_buffer_page_end(tmp_pixels_g, kPixels);
@ -2936,8 +2892,7 @@ TEST_F(LibYUVPlanarTest, MergeARGBPlane_Opt) {
 }

 TEST_F(LibYUVPlanarTest, SplitARGBPlane_Opt) {
-  // Round count up to multiple of 16
-  const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
+  const int kPixels = benchmark_width_ * benchmark_height_;
  align_buffer_page_end(src_pixels, kPixels * 4);
  align_buffer_page_end(tmp_pixels_r, kPixels);
  align_buffer_page_end(tmp_pixels_g, kPixels);
@ -2991,8 +2946,7 @@ TEST_F(LibYUVPlanarTest, SplitARGBPlane_Opt) {
 }

 TEST_F(LibYUVPlanarTest, MergeXRGBPlane_Opt) {
-  // Round count up to multiple of 16
-  const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
+  const int kPixels = benchmark_width_ * benchmark_height_;
  align_buffer_page_end(src_pixels, kPixels * 4);
  align_buffer_page_end(tmp_pixels_r, kPixels);
  align_buffer_page_end(tmp_pixels_g, kPixels);
@ -3042,8 +2996,7 @@ TEST_F(LibYUVPlanarTest, MergeXRGBPlane_Opt) {
 }

 TEST_F(LibYUVPlanarTest, SplitXRGBPlane_Opt) {
-  // Round count up to multiple of 16
-  const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
+  const int kPixels = benchmark_width_ * benchmark_height_;
  align_buffer_page_end(src_pixels, kPixels * 4);
  align_buffer_page_end(tmp_pixels_r, kPixels);
  align_buffer_page_end(tmp_pixels_g, kPixels);
@ -3091,30 +3044,29 @@ TEST_F(LibYUVPlanarTest, SplitXRGBPlane_Opt) {
  free_aligned_buffer_page_end(dst_pixels_c);
 }

+// Merge 4 channels
 #define TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, W1280, N, NEG, OFF)      \
  TEST_F(LibYUVPlanarTest, FUNC##Plane_##DEPTH##N) {                        \
    const int kWidth = ((W1280) > 0) ? (W1280) : 1;                         \
-    const int kPixels = (kWidth * benchmark_height_ + 15) & ~15;            \
+    const int kPixels = kWidth * benchmark_height_;                         \
    align_buffer_page_end(src_memory_r, kPixels * sizeof(STYPE) + OFF);     \
    align_buffer_page_end(src_memory_g, kPixels * sizeof(STYPE) + OFF);     \
    align_buffer_page_end(src_memory_b, kPixels * sizeof(STYPE) + OFF);     \
    align_buffer_page_end(src_memory_a, kPixels * sizeof(STYPE) + OFF);     \
    align_buffer_page_end(dst_memory_c, kPixels * 4 * sizeof(DTYPE));       \
    align_buffer_page_end(dst_memory_opt, kPixels * 4 * sizeof(DTYPE));     \
+    MemRandomize(src_memory_r, kPixels * sizeof(STYPE) + OFF);              \
+    MemRandomize(src_memory_g, kPixels * sizeof(STYPE) + OFF);              \
+    MemRandomize(src_memory_b, kPixels * sizeof(STYPE) + OFF);              \
+    MemRandomize(src_memory_a, kPixels * sizeof(STYPE) + OFF);              \
+    memset(dst_memory_c, 0, kPixels * 4 * sizeof(DTYPE));                   \
+    memset(dst_memory_opt, 0, kPixels * 4 * sizeof(DTYPE));                 \
    STYPE* src_pixels_r = reinterpret_cast<STYPE*>(src_memory_r + OFF);     \
    STYPE* src_pixels_g = reinterpret_cast<STYPE*>(src_memory_g + OFF);     \
    STYPE* src_pixels_b = reinterpret_cast<STYPE*>(src_memory_b + OFF);     \
    STYPE* src_pixels_a = reinterpret_cast<STYPE*>(src_memory_a + OFF);     \
    DTYPE* dst_pixels_c = reinterpret_cast<DTYPE*>(dst_memory_c);           \
    DTYPE* dst_pixels_opt = reinterpret_cast<DTYPE*>(dst_memory_opt);       \
-    for (int i = 0; i < kPixels; ++i) {                                     \
-      src_pixels_r[i] = fastrand() & 65535;                                 \
-      src_pixels_g[i] = fastrand() & 65535;                                 \
-      src_pixels_b[i] = fastrand() & 65535;                                 \
-      src_pixels_a[i] = fastrand() & 65535;                                 \
-    }                                                                       \
-    memset(dst_pixels_c, 1, kPixels * 4 * sizeof(DTYPE));                   \
-    memset(dst_pixels_opt, 2, kPixels * 4 * sizeof(DTYPE));                 \
    MaskCpuFlags(disable_cpu_flags_);                                       \
    FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b,   \
                kWidth, src_pixels_a, kWidth, dst_pixels_c, kWidth * 4,     \
@ -3136,27 +3088,26 @@ TEST_F(LibYUVPlanarTest, SplitXRGBPlane_Opt) {
    free_aligned_buffer_page_end(dst_memory_opt);                           \
  }

+// Merge 3 channel RGB into 4 channel XRGB with opaque alpha
 #define TESTQPLANAROTOPI(FUNC, STYPE, DTYPE, DEPTH, W1280, N, NEG, OFF)     \
  TEST_F(LibYUVPlanarTest, FUNC##Plane_Opaque_##DEPTH##N) {                 \
    const int kWidth = ((W1280) > 0) ? (W1280) : 1;                         \
-    const int kPixels = (kWidth * benchmark_height_ + 15) & ~15;            \
+    const int kPixels = kWidth * benchmark_height_;                         \
    align_buffer_page_end(src_memory_r, kPixels * sizeof(STYPE) + OFF);     \
    align_buffer_page_end(src_memory_g, kPixels * sizeof(STYPE) + OFF);     \
    align_buffer_page_end(src_memory_b, kPixels * sizeof(STYPE) + OFF);     \
    align_buffer_page_end(dst_memory_c, kPixels * 4 * sizeof(DTYPE));       \
    align_buffer_page_end(dst_memory_opt, kPixels * 4 * sizeof(DTYPE));     \
+    MemRandomize(src_memory_r, kPixels * sizeof(STYPE) + OFF);              \
+    MemRandomize(src_memory_g, kPixels * sizeof(STYPE) + OFF);              \
+    MemRandomize(src_memory_b, kPixels * sizeof(STYPE) + OFF);              \
+    memset(dst_memory_c, 0, kPixels * 4 * sizeof(DTYPE));                   \
+    memset(dst_memory_opt, 0, kPixels * 4 * sizeof(DTYPE));                 \
    STYPE* src_pixels_r = reinterpret_cast<STYPE*>(src_memory_r + OFF);     \
    STYPE* src_pixels_g = reinterpret_cast<STYPE*>(src_memory_g + OFF);     \
    STYPE* src_pixels_b = reinterpret_cast<STYPE*>(src_memory_b + OFF);     \
    DTYPE* dst_pixels_c = reinterpret_cast<DTYPE*>(dst_memory_c);           \
    DTYPE* dst_pixels_opt = reinterpret_cast<DTYPE*>(dst_memory_opt);       \
-    for (int i = 0; i < kPixels; ++i) {                                     \
-      src_pixels_r[i] = fastrand() & 65535;                                 \
-      src_pixels_g[i] = fastrand() & 65535;                                 \
-      src_pixels_b[i] = fastrand() & 65535;                                 \
-    }                                                                       \
-    memset(dst_pixels_c, 1, kPixels * 4 * sizeof(DTYPE));                   \
-    memset(dst_pixels_opt, 2, kPixels * 4 * sizeof(DTYPE));                 \
    MaskCpuFlags(disable_cpu_flags_);                                       \
    FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b,   \
                kWidth, NULL, 0, dst_pixels_c, kWidth * 4, kWidth,          \
@ -3177,6 +3128,7 @@ TEST_F(LibYUVPlanarTest, SplitXRGBPlane_Opt) {
    free_aligned_buffer_page_end(dst_memory_opt);                           \
  }

+// TODO(fbarchard): fix bug and change to benchmark_width - 1
 #define TESTQPLANARTOP(FUNC, STYPE, DTYPE, DEPTH)                              \
  TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_ - 4, _Any, +, 0) \
  TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Unaligned, +,  \
@ -3206,16 +3158,14 @@ TESTQPLANARTOP(MergeARGB16To8, uint16_t, uint8_t, 16)
    align_buffer_page_end(src_memory_b, kPixels * sizeof(STYPE) + OFF);     \
    align_buffer_page_end(dst_memory_c, kPixels * 4 * sizeof(DTYPE));       \
    align_buffer_page_end(dst_memory_opt, kPixels * 4 * sizeof(DTYPE));     \
+    MemRandomize(src_memory_r, kPixels * sizeof(STYPE) + OFF);              \
+    MemRandomize(src_memory_g, kPixels * sizeof(STYPE) + OFF);              \
+    MemRandomize(src_memory_b, kPixels * sizeof(STYPE) + OFF);              \
    STYPE* src_pixels_r = reinterpret_cast<STYPE*>(src_memory_r + OFF);     \
    STYPE* src_pixels_g = reinterpret_cast<STYPE*>(src_memory_g + OFF);     \
    STYPE* src_pixels_b = reinterpret_cast<STYPE*>(src_memory_b + OFF);     \
    DTYPE* dst_pixels_c = reinterpret_cast<DTYPE*>(dst_memory_c);           \
    DTYPE* dst_pixels_opt = reinterpret_cast<DTYPE*>(dst_memory_opt);       \
-    for (int i = 0; i < kPixels; ++i) {                                     \
-      src_pixels_r[i] = fastrand() & 65535;                                 \
-      src_pixels_g[i] = fastrand() & 65535;                                 \
-      src_pixels_b[i] = fastrand() & 65535;                                 \
-    }                                                                       \
    memset(dst_pixels_c, 1, kPixels * 4 * sizeof(DTYPE));                   \
    memset(dst_pixels_opt, 2, kPixels * 4 * sizeof(DTYPE));                 \
    MaskCpuFlags(disable_cpu_flags_);                                       \
@ -3238,13 +3188,13 @@ TESTQPLANARTOP(MergeARGB16To8, uint16_t, uint8_t, 16)
    free_aligned_buffer_page_end(dst_memory_opt);                           \
  }

+// TODO(fbarchard): Fix MergeXR30 and change _any to width - 1
 #define TESTTPLANARTOP(FUNC, STYPE, DTYPE, DEPTH)                              \
  TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_ - 4, _Any, +, 0) \
  TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Unaligned, +,  \
                  1)                                                           \
  TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Invert, -, 0)  \
  TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Opt, +, 0)
-
 TESTTPLANARTOP(MergeXR30, uint16_t, uint8_t, 10)
 TESTTPLANARTOP(MergeXR30, uint16_t, uint8_t, 12)
 TESTTPLANARTOP(MergeXR30, uint16_t, uint8_t, 16)
@ -3254,6 +3204,7 @@ TESTTPLANARTOP(MergeXR30, uint16_t, uint8_t, 16)
 TEST_F(LibYUVPlanarTest, MergeUVRow_16_Opt) {
  // Round count up to multiple of 16
  const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
+
  align_buffer_page_end(src_pixels_u, kPixels * 2);
  align_buffer_page_end(src_pixels_v, kPixels * 2);
  align_buffer_page_end(dst_pixels_uv_opt, kPixels * 2 * 2);
@ -3299,6 +3250,7 @@ TEST_F(LibYUVPlanarTest, MergeUVRow_16_Opt) {
 TEST_F(LibYUVPlanarTest, MultiplyRow_16_Opt) {
  // Round count up to multiple of 16
  const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
+
  align_buffer_page_end(src_pixels_y, kPixels * 2);
  align_buffer_page_end(dst_pixels_y_opt, kPixels * 2);
  align_buffer_page_end(dst_pixels_y_c, kPixels * 2);
@ -3334,8 +3286,7 @@ TEST_F(LibYUVPlanarTest, MultiplyRow_16_Opt) {
 #endif  // HAS_MULTIPLYROW_16_AVX2

 TEST_F(LibYUVPlanarTest, Convert16To8Plane) {
-  // Round count up to multiple of 16
-  const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
+  const int kPixels = benchmark_width_ * benchmark_height_;
  align_buffer_page_end(src_pixels_y, kPixels * 2);
  align_buffer_page_end(dst_pixels_y_opt, kPixels);
  align_buffer_page_end(dst_pixels_y_c, kPixels);
@ -3414,8 +3365,7 @@ TEST_F(LibYUVPlanarTest, Convert16To8Row_Opt) {
 #endif  // ENABLE_ROW_TESTS

 TEST_F(LibYUVPlanarTest, Convert8To16Plane) {
-  // Round count up to multiple of 16
-  const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
+  const int kPixels = benchmark_width_ * benchmark_height_;
  align_buffer_page_end(src_pixels_y, kPixels);
  align_buffer_page_end(dst_pixels_y_opt, kPixels * 2);
  align_buffer_page_end(dst_pixels_y_c, kPixels * 2);