From 2b9c21080362d7f1b45f7465847e939b1d270583 Mon Sep 17 00:00:00 2001
From: "fbarchard@google.com"
 <fbarchard@google.com@16f28f9a-4ce2-e073-06de-1de4eb20be90>
Date: Thu, 22 Mar 2012 22:36:44 +0000
Subject: [PATCH] lint fix test BUG=none TEST=gcl lint lintfix2 Review URL:
 https://webrtc-codereview.appspot.com/458003

git-svn-id: http://libyuv.googlecode.com/svn/trunk@220 16f28f9a-4ce2-e073-06de-1de4eb20be90
---
 README.chromium               |   2 +-
 include/libyuv/version.h      |   2 +-
 include/libyuv/video_common.h |   7 ++-
 source/compare.cc             |   9 ++-
 source/convert.cc             |  72 ++++++++++-------------
 source/convert_from.cc        |   6 +-
 source/cpu_id.cc              |  10 ++--
 source/format_conversion.cc   |   4 +-
 source/planar_functions.cc    |   8 +--
 source/rotate.cc              |  10 ++--
 source/rotate_neon.cc         |   6 +-
 source/row_common.cc          |   2 +-
 source/row_neon.cc            |  16 ++---
 source/row_posix.cc           | 106 +++++++++++++++++-----------------
 source/row_win.cc             |   2 +-
 source/scale.cc               |  62 ++++++++++----------
 unit_test/compare_test.cc     |  11 ++--
 17 files changed, 161 insertions(+), 174 deletions(-)

diff --git a/README.chromium b/README.chromium
index fafe8d8f6..1b918932f 100644
--- a/README.chromium
+++ b/README.chromium
@@ -1,6 +1,6 @@
 Name: libyuv
 URL: http://code.google.com/p/libyuv/
-Version: 219
+Version: 220
 License: BSD
 License File: LICENSE
 
diff --git a/include/libyuv/version.h b/include/libyuv/version.h
index bdbe8e75a..39bce659f 100644
--- a/include/libyuv/version.h
+++ b/include/libyuv/version.h
@@ -11,7 +11,7 @@
 #ifndef INCLUDE_LIBYUV_VERSION_H_
 #define INCLUDE_LIBYUV_VERSION_H_
 
-#define LIBYUV_VERSION 219
+#define LIBYUV_VERSION 220
 
 #endif  // INCLUDE_LIBYUV_VERSION_H_
 
diff --git a/include/libyuv/video_common.h b/include/libyuv/video_common.h
index 33de6912d..8d1f96fd7 100644
--- a/include/libyuv/video_common.h
+++ b/include/libyuv/video_common.h
@@ -25,6 +25,7 @@ extern "C" {
 //////////////////////////////////////////////////////////////////////////////
 // Definition of FourCC codes
 //////////////////////////////////////////////////////////////////////////////
+
 // Convert four characters to a FourCC code.
 // Needs to be a macro otherwise the OS X compiler complains when the kFormat*
 // constants are used in a switch.
@@ -57,9 +58,9 @@ enum FourCC {
   FOURCC_ABGR = FOURCC('A', 'B', 'G', 'R'),
   FOURCC_BGRA = FOURCC('B', 'G', 'R', 'A'),
   FOURCC_ARGB = FOURCC('A', 'R', 'G', 'B'),
-  FOURCC_RGBP = FOURCC('R', 'G', 'B', 'P'), // bgr565
-  FOURCC_RGBO = FOURCC('R', 'G', 'B', 'O'), // abgr1555
-  FOURCC_R444 = FOURCC('R', '4', '4', '4'), // argb4444
+  FOURCC_RGBP = FOURCC('R', 'G', 'B', 'P'),  // bgr565
+  FOURCC_RGBO = FOURCC('R', 'G', 'B', 'O'),  // abgr1555
+  FOURCC_R444 = FOURCC('R', '4', '4', '4'),  // argb4444
   FOURCC_MJPG = FOURCC('M', 'J', 'P', 'G'),
   FOURCC_RAW  = FOURCC('r', 'a', 'w', ' '),
   FOURCC_NV21 = FOURCC('N', 'V', '2', '1'),
diff --git a/source/compare.cc b/source/compare.cc
index 1da3dc4ab..998641cf6 100644
--- a/source/compare.cc
+++ b/source/compare.cc
@@ -18,7 +18,7 @@
 
 #include "libyuv/basic_types.h"
 #include "libyuv/cpu_id.h"
-#include "row.h"
+#include "source/row.h"
 
 #ifdef __cplusplus
 namespace libyuv {
@@ -42,7 +42,7 @@ uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) {
 static uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b,
                                   int count) {
   volatile uint32 sse;
-  asm volatile (
+  asm volatile(
     "vmov.u8    q7, #0                         \n"
     "vmov.u8    q9, #0                         \n"
     "vmov.u8    q8, #0                         \n"
@@ -71,8 +71,7 @@ static uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b,
       "+r"(count),
       "=r"(sse)
     :
-    : "memory", "cc", "q0", "q1", "q2", "q3", "q7", "q8", "q9", "q10"
-  );
+    : "memory", "cc", "q0", "q1", "q2", "q3", "q7", "q8", "q9", "q10");
   return sse;
 }
 
@@ -122,7 +121,7 @@ static uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b,
 static uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b,
                                   int count) {
   uint32 sse;
-  asm volatile (
+  asm volatile(
     "pxor      %%xmm0,%%xmm0                   \n"
     "pxor      %%xmm5,%%xmm5                   \n"
     "sub       %0,%1                           \n"
diff --git a/source/convert.cc b/source/convert.cc
index fab5b16b9..7a9cdf282 100644
--- a/source/convert.cc
+++ b/source/convert.cc
@@ -19,7 +19,7 @@
 #include "libyuv/planar_functions.h"
 #include "libyuv/rotate.h"
 #include "libyuv/video_common.h"
-#include "row.h"
+#include "source/row.h"
 
 #ifdef __cplusplus
 namespace libyuv {
@@ -78,7 +78,7 @@ static void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride,
     movdqa     xmm0, [eax]
     pavgb      xmm0, [eax + edx]
     sub        ecx, 16
-    movdqa     [eax + edi], xmm0
+    movdqa     [eax + edi], xmm0     // NOLINT
     lea        eax,  [eax + 16]
     jg         convertloop
     pop        edi
@@ -86,11 +86,11 @@ static void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride,
   }
 }
 
-#elif (defined(__x86_64__) || defined(__i386__)) && !defined(YUV_DISABLE_ASM)
+#elif defined(__x86_64__) || defined(__i386__) && !defined(YUV_DISABLE_ASM)
 #define HAS_HALFROW_SSE2
 static void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride,
                          uint8* dst_uv, int pix) {
- asm volatile (
+  asm volatile(
   "sub        %0,%1                            \n"
 "1:                                            \n"
   "movdqa     (%0),%%xmm0                      \n"
@@ -137,7 +137,7 @@ int I422ToI420(const uint8* src_y, int src_stride_y,
   }
   int halfwidth = (width + 1) >> 1;
   void (*HalfRow)(const uint8* src_uv, int src_uv_stride,
-                  uint8* dst_uv, int pix);
+                  uint8* dst_uv, int pix) = HalfRow_C;
 #if defined(HAS_HALFROW_SSE2)
   if (TestCpuFlag(kCpuHasSSE2) &&
       IS_ALIGNED(halfwidth, 16) &&
@@ -146,11 +146,8 @@ int I422ToI420(const uint8* src_y, int src_stride_y,
       IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) &&
       IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) {
     HalfRow = HalfRow_SSE2;
-  } else
-#endif
-  {
-    HalfRow = HalfRow_C;
   }
+#endif
 
   // Copy Y plane
   if (dst_y) {
@@ -186,7 +183,7 @@ int I422ToI420(const uint8* src_y, int src_stride_y,
 #define HAS_SCALEROWDOWN2_NEON
 void ScaleRowDown2Int_NEON(const uint8* src_ptr, int src_stride,
                            uint8* dst, int dst_width);
-#elif (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) && \
+#elif defined(_M_IX86) || defined(__x86_64__) || defined(__i386__) && \
     !defined(YUV_DISABLE_ASM)
 void ScaleRowDown2Int_SSE2(const uint8* src_ptr, int src_stride,
                            uint8* dst_ptr, int dst_width);
@@ -213,14 +210,13 @@ int I444ToI420(const uint8* src_y, int src_stride_y,
   }
   int halfwidth = (width + 1) >> 1;
   void (*ScaleRowDown2)(const uint8* src_ptr, int src_stride,
-                        uint8* dst_ptr, int dst_width);
+                        uint8* dst_ptr, int dst_width) = ScaleRowDown2Int_C;
 #if defined(HAS_SCALEROWDOWN2_NEON)
   if (TestCpuFlag(kCpuHasNEON) &&
       IS_ALIGNED(halfwidth, 16)) {
     ScaleRowDown2 = ScaleRowDown2Int_NEON;
-  } else
-#endif
-#if defined(HAS_SCALEROWDOWN2_SSE2)
+  }
+#elif defined(HAS_SCALEROWDOWN2_SSE2)
   if (TestCpuFlag(kCpuHasSSE2) &&
       IS_ALIGNED(halfwidth, 16) &&
       IS_ALIGNED(src_u, 16) && IS_ALIGNED(src_stride_u, 16) &&
@@ -228,10 +224,8 @@ int I444ToI420(const uint8* src_y, int src_stride_y,
       IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) &&
       IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) {
     ScaleRowDown2 = ScaleRowDown2Int_SSE2;
-#endif
-  {
-    ScaleRowDown2 = ScaleRowDown2Int_C;
   }
+#endif
 
   // Copy Y plane
   if (dst_y) {
@@ -395,11 +389,12 @@ static int X420ToI420(const uint8* src_y,
   }
 
   int halfwidth = (width + 1) >> 1;
-  void (*SplitUV)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
+  void (*SplitUV)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) =
+      SplitUV_C;
 #if defined(HAS_SPLITUV_NEON)
   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(halfwidth, 16)) {
     SplitUV = SplitUV_NEON;
-  } else
+  }
 #elif defined(HAS_SPLITUV_SSE2)
   if (TestCpuFlag(kCpuHasSSE2) &&
       IS_ALIGNED(halfwidth, 16) &&
@@ -407,11 +402,8 @@ static int X420ToI420(const uint8* src_y,
       IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) &&
       IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) {
     SplitUV = SplitUV_SSE2;
-  } else
-#endif
-  {
-    SplitUV = SplitUV_C;
   }
+#endif
 
   if (dst_y) {
     CopyPlane2(src_y, src_stride_y0, src_stride_y1, dst_y, dst_stride_y,
@@ -483,7 +475,7 @@ static void SplitYUY2_SSE2(const uint8* src_yuy2,
     pand       xmm2, xmm5   // even bytes are Y
     pand       xmm3, xmm5
     packuswb   xmm2, xmm3
-    movdqa     [edx], xmm2
+    movdqa     [edx], xmm2  // NOLINT
     lea        edx, [edx + 16]
     psrlw      xmm0, 8      // YUYV -> UVUV
     psrlw      xmm1, 8
@@ -491,12 +483,12 @@ static void SplitYUY2_SSE2(const uint8* src_yuy2,
     movdqa     xmm1, xmm0
     pand       xmm0, xmm5  // U
     packuswb   xmm0, xmm0
-    movq       qword ptr [esi], xmm0
+    movq       qword ptr [esi], xmm0  // NOLINT
     lea        esi, [esi + 8]
     psrlw      xmm1, 8     // V
     packuswb   xmm1, xmm1
     sub        ecx, 16
-    movq       qword ptr [edi], xmm1
+    movq       qword ptr [edi], xmm1  // NOLINT
     lea        edi, [edi + 8]
     jg         convertloop
 
@@ -506,11 +498,11 @@ static void SplitYUY2_SSE2(const uint8* src_yuy2,
   }
 }
 
-#elif (defined(__x86_64__) || defined(__i386__)) && !defined(YUV_DISABLE_ASM)
+#elif defined(__x86_64__) || defined(__i386__) && !defined(YUV_DISABLE_ASM)
 #define HAS_SPLITYUY2_SSE2
 static void SplitYUY2_SSE2(const uint8* src_yuy2, uint8* dst_y,
                            uint8* dst_u, uint8* dst_v, int pix) {
-  asm volatile (
+  asm volatile(
   "pcmpeqb    %%xmm5,%%xmm5                    \n"
   "psrlw      $0x8,%%xmm5                      \n"
 "1:                                            \n"
@@ -604,19 +596,17 @@ int Q420ToI420(const uint8* src_y, int src_stride_y,
   }
 #endif
 
-  void (*SplitYUY2)(const uint8* src_yuy2,
-                    uint8* dst_y, uint8* dst_u, uint8* dst_v, int pix);
+  void (*SplitYUY2)(const uint8* src_yuy2, uint8* dst_y, uint8* dst_u,
+                    uint8* dst_v, int pix) = SplitYUY2_C;
 #if defined(HAS_SPLITYUY2_SSE2)
   if (TestCpuFlag(kCpuHasSSE2) &&
       IS_ALIGNED(width, 16) &&
       IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16) &&
       IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
     SplitYUY2 = SplitYUY2_SSE2;
-  } else
-#endif
-  {
-    SplitYUY2 = SplitYUY2_C;
   }
+#endif
+
   for (int y = 0; y < height; y += 2) {
     CopyRow(src_y, dst_y, width);
     dst_y += dst_stride_y;
@@ -800,13 +790,13 @@ int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy,
 #endif
 
 #ifdef LIBYUV_LITTLE_ENDIAN
-#define READWORD(p) (*((uint32*) (p)))
+#define READWORD(p) (*reinterpret_cast<const uint32*>(p))
 #else
 static inline uint32 READWORD(const uint8* p) {
-  return (uint32) p[0] |
-         ((uint32) (p[1]) << 8) |
-         ((uint32) (p[2]) << 16) |
-         ((uint32) (p[3]) << 24);
+  return static_cast<uint32>(p[0]) |
+      (static_cast<uint32>(p[1]) << 8) |
+      (static_cast<uint32>(p[2]) << 16) |
+      (static_cast<uint32>(p[3]) << 24);
 }
 #endif
 
@@ -1599,7 +1589,7 @@ int ConvertToI420(const uint8* sample, size_t sample_size,
   int tmp_y_stride = y_stride;
   int tmp_u_stride = u_stride;
   int tmp_v_stride = v_stride;
-  uint8* buf = 0;
+  uint8* buf = NULL;
   int abs_dst_height = (dst_height < 0) ? -dst_height : dst_height;
   if (need_rot) {
     int y_size = dst_width * abs_dst_height;
@@ -1618,7 +1608,7 @@ int ConvertToI420(const uint8* sample, size_t sample_size,
   switch (format) {
     // Single plane formats
     case FOURCC_YUY2:
-      src = sample + (aligned_src_width * crop_y + crop_x) * 2 ;
+      src = sample + (aligned_src_width * crop_y + crop_x) * 2;
       r = YUY2ToI420(src, aligned_src_width * 2,
                      y, y_stride,
                      u, u_stride,
diff --git a/source/convert_from.cc b/source/convert_from.cc
index 7e41e2f8b..069be7d7e 100644
--- a/source/convert_from.cc
+++ b/source/convert_from.cc
@@ -17,7 +17,7 @@
 #include "libyuv/planar_functions.h"
 #include "libyuv/rotate.h"
 #include "libyuv/video_common.h"
-#include "row.h"
+#include "source/row.h"
 
 #ifdef __cplusplus
 namespace libyuv {
@@ -289,7 +289,7 @@ static void I42xToYUY2Row_SSE2(const uint8* src_y,
                                const uint8* src_u,
                                const uint8* src_v,
                                uint8* dst_frame, int width) {
- asm volatile (
+ asm volatile(
     "sub        %1,%2                            \n"
   "1:                                            \n"
     "movq      (%1),%%xmm2                       \n"
@@ -324,7 +324,7 @@ static void I42xToUYVYRow_SSE2(const uint8* src_y,
                                const uint8* src_u,
                                const uint8* src_v,
                                uint8* dst_frame, int width) {
- asm volatile (
+ asm volatile(
     "sub        %1,%2                            \n"
   "1:                                            \n"
     "movq      (%1),%%xmm2                       \n"
diff --git a/source/cpu_id.cc b/source/cpu_id.cc
index aafbecda8..18a8572e1 100644
--- a/source/cpu_id.cc
+++ b/source/cpu_id.cc
@@ -24,21 +24,19 @@
 // TODO(fbarchard): Use cpuid.h when gcc 4.4 is used on OSX and Linux.
 #if (defined(__pic__) || defined(__APPLE__)) && defined(__i386__)
 static __inline void __cpuid(int cpu_info[4], int info_type) {
-  asm volatile (
+  asm volatile(
     "mov %%ebx, %%edi                          \n"
     "cpuid                                     \n"
     "xchg %%edi, %%ebx                         \n"
     : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
-    : "a"(info_type)
-  );
+    : "a"(info_type));
 }
 #elif defined(__i386__) || defined(__x86_64__)
 static __inline void __cpuid(int cpu_info[4], int info_type) {
-  asm volatile (
+  asm volatile(
     "cpuid                                     \n"
     : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
-    : "a"(info_type)
-  );
+    : "a"(info_type));
 }
 #endif
 
diff --git a/source/format_conversion.cc b/source/format_conversion.cc
index 5cedf2a8e..692932bee 100644
--- a/source/format_conversion.cc
+++ b/source/format_conversion.cc
@@ -13,7 +13,7 @@
 #include "libyuv/basic_types.h"
 #include "libyuv/cpu_id.h"
 #include "libyuv/video_common.h"
-#include "row.h"
+#include "source/row.h"
 
 #ifdef __cplusplus
 namespace libyuv {
@@ -53,7 +53,7 @@ static void ARGBToBayerRow_SSSE3(const uint8* src_argb,
 #define HAS_ARGBTOBAYERROW_SSSE3
 static void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer,
                                  uint32 selector, int pix) {
-  asm volatile (
+  asm volatile(
     "movd   %3,%%xmm5                          \n"
     "pshufd $0x0,%%xmm5,%%xmm5                 \n"
 "1:                                            \n"
diff --git a/source/planar_functions.cc b/source/planar_functions.cc
index 07e6173c7..a6f7b66bc 100644
--- a/source/planar_functions.cc
+++ b/source/planar_functions.cc
@@ -13,7 +13,7 @@
 #include <string.h> // for memset()
 
 #include "libyuv/cpu_id.h"
-#include "row.h"
+#include "source/row.h"
 
 #ifdef __cplusplus
 namespace libyuv {
@@ -693,7 +693,7 @@ int NV12ToRGB565(const uint8* src_y, int src_stride_y,
 #if defined(__ARM_NEON__) && !defined(YUV_DISABLE_ASM)
 #define HAS_SETROW_NEON
 static void SetRow8_NEON(uint8* dst, uint32 v32, int count) {
-  asm volatile (
+  asm volatile(
     "vdup.u32  q0, %2                          \n"  // duplicate 4 ints
     "1:                                        \n"
     "subs      %1, %1, #16                     \n"  // 16 bytes per loop
@@ -763,7 +763,7 @@ static void SetRows32_X86(uint8* dst, uint32 v32, int width,
 #define HAS_SETROW_X86
 static void SetRow8_X86(uint8* dst, uint32 v32, int width) {
   size_t width_tmp = static_cast<size_t>(width);
-  asm volatile (
+  asm volatile(
     "shr       $0x2,%1                         \n"
     "rep stosl                                 \n"
   : "+D"(dst),  // %0
@@ -778,7 +778,7 @@ static void SetRows32_X86(uint8* dst, uint32 v32, int width,
   for (int y = 0; y < height; ++y) {
     size_t width_tmp = static_cast<size_t>(width);
     uint32* d = reinterpret_cast<uint32*>(dst);
-    asm volatile (
+    asm volatile(
       "rep stosl                               \n"
     : "+D"(d),  // %0
       "+c"(width_tmp) // %1
diff --git a/source/rotate.cc b/source/rotate.cc
index d62c36a7c..01c3ffcda 100644
--- a/source/rotate.cc
+++ b/source/rotate.cc
@@ -13,8 +13,8 @@
 #include "libyuv/cpu_id.h"
 #include "libyuv/convert.h"
 #include "libyuv/planar_functions.h"
-#include "rotate_priv.h"
-#include "row.h"
+#include "source/rotate_priv.h"
+#include "source/row.h"
 
 #ifdef __cplusplus
 namespace libyuv {
@@ -295,7 +295,7 @@ static void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
 #define HAS_TRANSPOSE_WX8_SSSE3
 static void TransposeWx8_SSSE3(const uint8* src, int src_stride,
                                uint8* dst, int dst_stride, int width) {
-  asm volatile (
+  asm volatile(
     // Read in the data from the source pointer.
     // First round of bit swap.
   "1:                                            \n"
@@ -506,7 +506,7 @@ extern "C" void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
 #define HAS_TRANSPOSE_WX8_FAST_SSSE3
 static void TransposeWx8_FAST_SSSE3(const uint8* src, int src_stride,
                                     uint8* dst, int dst_stride, int width) {
-  asm volatile (
+  asm volatile(
   // Read in the data from the source pointer.
   // First round of bit swap.
 "1:                                            \n"
@@ -646,7 +646,7 @@ static void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
                                 uint8* dst_a, int dst_stride_a,
                                 uint8* dst_b, int dst_stride_b,
                                 int w) {
-  asm volatile (
+  asm volatile(
   // Read in the data from the source pointer.
   // First round of bit swap.
 "1:                                            \n"
diff --git a/source/rotate_neon.cc b/source/rotate_neon.cc
index 0f01f02b2..f99c72e77 100644
--- a/source/rotate_neon.cc
+++ b/source/rotate_neon.cc
@@ -8,7 +8,7 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-#include "row.h"
+#include "source/row.h"
 
 #include "libyuv/basic_types.h"
 
@@ -25,7 +25,7 @@ static const uvec8 vtbl_4x4_transpose =
 void TransposeWx8_NEON(const uint8* src, int src_stride,
                        uint8* dst, int dst_stride,
                        int width) {
-  asm volatile (
+  asm volatile(
     // loops are on blocks of 8.  loop will stop when
     // counter gets to or below 0.  starting the counter
     // at w-8 allow for this
@@ -191,7 +191,7 @@ void TransposeUVWx8_NEON(const uint8* src, int src_stride,
                          uint8* dst_a, int dst_stride_a,
                          uint8* dst_b, int dst_stride_b,
                          int width) {
-  asm volatile (
+  asm volatile(
     // loops are on blocks of 8.  loop will stop when
     // counter gets to or below 0.  starting the counter
     // at w-8 allow for this
diff --git a/source/row_common.cc b/source/row_common.cc
index 8b14b43d2..9b563f6bf 100644
--- a/source/row_common.cc
+++ b/source/row_common.cc
@@ -8,7 +8,7 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-#include "row.h"
+#include "source/row.h"
 
 #include "libyuv/basic_types.h"
 #include <string.h>  // For memcpy
diff --git a/source/row_neon.cc b/source/row_neon.cc
index 3ebebc113..bf4742cbc 100644
--- a/source/row_neon.cc
+++ b/source/row_neon.cc
@@ -8,7 +8,7 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-#include "row.h"
+#include "source/row.h"
 
 #ifdef __cplusplus
 namespace libyuv {
@@ -61,7 +61,7 @@ void I420ToARGBRow_NEON(const uint8* y_buf,
                         const uint8* v_buf,
                         uint8* rgb_buf,
                         int width) {
-  asm volatile (
+  asm volatile(
     "vld1.u8    {d24}, [%5]                    \n"
     "vld1.u8    {d25}, [%6]                    \n"
     "vmov.u8    d26, #128                      \n"
@@ -93,7 +93,7 @@ void I420ToBGRARow_NEON(const uint8* y_buf,
                         const uint8* v_buf,
                         uint8* rgb_buf,
                         int width) {
-  asm volatile (
+  asm volatile(
     "vld1.u8    {d24}, [%5]                    \n"
     "vld1.u8    {d25}, [%6]                    \n"
     "vmov.u8    d26, #128                      \n"
@@ -126,7 +126,7 @@ void I420ToABGRRow_NEON(const uint8* y_buf,
                         const uint8* v_buf,
                         uint8* rgb_buf,
                         int width) {
-  asm volatile (
+  asm volatile(
     "vld1.u8    {d24}, [%5]                    \n"
     "vld1.u8    {d25}, [%6]                    \n"
     "vmov.u8    d26, #128                      \n"
@@ -157,7 +157,7 @@ YUVTORGB
 // Reads 16 pairs of UV and write even values to dst_u and odd to dst_v
 // Alignment requirement: 16 bytes for pointers, and multiple of 16 pixels.
 void SplitUV_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
-  asm volatile (
+  asm volatile(
   "1:                                          \n"
     "vld2.u8    {q0,q1}, [%0]!                 \n"  // load 16 pairs of UV
     "subs       %3, %3, #16                    \n"  // 16 processed per loop
@@ -177,7 +177,7 @@ void SplitUV_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
 #ifdef HAS_COPYROW_NEON
 // Copy multiple of 64
 void CopyRow_NEON(const uint8* src, uint8* dst, int count) {
-  asm volatile (
+  asm volatile(
   "1:                                          \n"
     "pld        [%0, #0xC0]                    \n"  // preload
     "vldm       %0!,{q0,q1,q2,q3}              \n"  // load 64
@@ -195,7 +195,7 @@ void CopyRow_NEON(const uint8* src, uint8* dst, int count) {
 
 #ifdef HAS_MIRRORROW_NEON
 void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
-  asm volatile (
+  asm volatile(
     // compute where to start writing destination
     "add         %1, %2                        \n"
     // work on segments that are multiples of 16
@@ -270,7 +270,7 @@ void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
 
 #ifdef HAS_MIRRORROWUV_NEON
 void MirrorRowUV_NEON(const uint8* src, uint8* dst_a, uint8* dst_b, int width) {
-  asm volatile (
+  asm volatile(
     // compute where to start writing destination
     "add         %1, %3                        \n"  // dst_a + width
     "add         %2, %3                        \n"  // dst_b + width
diff --git a/source/row_posix.cc b/source/row_posix.cc
index 92f090385..1d0d1cf1b 100644
--- a/source/row_posix.cc
+++ b/source/row_posix.cc
@@ -8,7 +8,7 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-#include "row.h"
+#include "source/row.h"
 
 #include "libyuv/basic_types.h"
 
@@ -109,7 +109,7 @@ CONST uvec8 kShuffleMaskARGBToRAW = {
 };
 
 void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) {
-  asm volatile (
+  asm volatile(
     "pcmpeqb   %%xmm5,%%xmm5                   \n"
     "pslld     $0x18,%%xmm5                    \n"
   "1:                                          \n"
@@ -138,7 +138,7 @@ void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) {
 }
 
 void ABGRToARGBRow_SSSE3(const uint8* src_abgr, uint8* dst_argb, int pix) {
-  asm volatile (
+  asm volatile(
     "movdqa    %3,%%xmm5                       \n"
     "sub       %0,%1                           \n"
   "1:                                          \n"
@@ -161,7 +161,7 @@ void ABGRToARGBRow_SSSE3(const uint8* src_abgr, uint8* dst_argb, int pix) {
 }
 
 void BGRAToARGBRow_SSSE3(const uint8* src_bgra, uint8* dst_argb, int pix) {
-  asm volatile (
+  asm volatile(
     "movdqa    %3,%%xmm5                       \n"
     "sub       %0,%1                           \n"
   "1:                                          \n"
@@ -183,7 +183,7 @@ void BGRAToARGBRow_SSSE3(const uint8* src_bgra, uint8* dst_argb, int pix) {
 }
 
 void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix) {
-  asm volatile (
+  asm volatile(
     "pcmpeqb   %%xmm5,%%xmm5                   \n"  // generate mask 0xff000000
     "pslld     $0x18,%%xmm5                    \n"
     "movdqa    %3,%%xmm4                       \n"
@@ -223,7 +223,7 @@ void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix) {
 }
 
 void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, int pix) {
-  asm volatile (
+  asm volatile(
     "pcmpeqb   %%xmm5,%%xmm5                   \n"  // generate mask 0xff000000
     "pslld     $0x18,%%xmm5                    \n"
     "movdqa    %3,%%xmm4                       \n"
@@ -263,7 +263,7 @@ void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, int pix) {
 }
 
 void RGB565ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) {
-  asm volatile (
+  asm volatile(
     "mov       $0x1080108,%%eax                \n"
     "movd      %%eax,%%xmm5                    \n"
     "pshufd    $0x0,%%xmm5,%%xmm5              \n"
@@ -312,7 +312,7 @@ void RGB565ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) {
 }
 
 void ARGB1555ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) {
-  asm volatile (
+  asm volatile(
     "mov       $0x1080108,%%eax                \n"
     "movd      %%eax,%%xmm5                    \n"
     "pshufd    $0x0,%%xmm5,%%xmm5              \n"
@@ -364,7 +364,7 @@ void ARGB1555ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) {
 }
 
 void ARGB4444ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) {
-  asm volatile (
+  asm volatile(
     "mov       $0xf0f0f0f,%%eax                \n"
     "movd      %%eax,%%xmm4                    \n"
     "pshufd    $0x0,%%xmm4,%%xmm4              \n"
@@ -403,7 +403,7 @@ void ARGB4444ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) {
 }
 
 void ARGBToRGB24Row_SSSE3(const uint8* src, uint8* dst, int pix) {
-  asm volatile (
+  asm volatile(
     "movdqa    %3,%%xmm6                       \n"
   "1:                                          \n"
     "movdqa    (%0),%%xmm0                     \n"
@@ -443,7 +443,7 @@ void ARGBToRGB24Row_SSSE3(const uint8* src, uint8* dst, int pix) {
 }
 
 void ARGBToRAWRow_SSSE3(const uint8* src, uint8* dst, int pix) {
-  asm volatile (
+  asm volatile(
     "movdqa    %3,%%xmm6                       \n"
   "1:                                          \n"
     "movdqa    (%0),%%xmm0                     \n"
@@ -483,7 +483,7 @@ void ARGBToRAWRow_SSSE3(const uint8* src, uint8* dst, int pix) {
 }
 
 void ARGBToRGB565Row_SSE2(const uint8* src, uint8* dst, int pix) {
-  asm volatile (
+  asm volatile(
     "pcmpeqb   %%xmm3,%%xmm3                   \n"
     "psrld     $0x1b,%%xmm3                    \n"
     "pcmpeqb   %%xmm4,%%xmm4                   \n"
@@ -522,7 +522,7 @@ void ARGBToRGB565Row_SSE2(const uint8* src, uint8* dst, int pix) {
 }
 
 void ARGBToARGB1555Row_SSE2(const uint8* src, uint8* dst, int pix) {
-  asm volatile (
+  asm volatile(
     "pcmpeqb   %%xmm4,%%xmm4                   \n"
     "psrld     $0x1b,%%xmm4                    \n"
     "movdqa    %%xmm4,%%xmm5                   \n"
@@ -565,7 +565,7 @@ void ARGBToARGB1555Row_SSE2(const uint8* src, uint8* dst, int pix) {
 }
 
 void ARGBToARGB4444Row_SSE2(const uint8* src, uint8* dst, int pix) {
-  asm volatile (
+  asm volatile(
     "pcmpeqb   %%xmm4,%%xmm4                   \n"
     "psllw     $0xc,%%xmm4                     \n"
     "movdqa    %%xmm4,%%xmm3                   \n"
@@ -596,7 +596,7 @@ void ARGBToARGB4444Row_SSE2(const uint8* src, uint8* dst, int pix) {
 }
 
 void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
-  asm volatile (
+  asm volatile(
     "movdqa    %4,%%xmm5                       \n"
     "movdqa    %3,%%xmm4                       \n"
   "1:                                          \n"
@@ -632,7 +632,7 @@ void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
 }
 
 void ARGBToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
-  asm volatile (
+  asm volatile(
     "movdqa    %4,%%xmm5                       \n"
     "movdqa    %3,%%xmm4                       \n"
   "1:                                          \n"
@@ -674,7 +674,7 @@ void ARGBToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
 // and considered unsafe.
 void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
                        uint8* dst_u, uint8* dst_v, int width) {
-  asm volatile (
+  asm volatile(
     "movdqa    %0,%%xmm4                       \n"
     "movdqa    %1,%%xmm3                       \n"
     "movdqa    %2,%%xmm5                       \n"
@@ -687,7 +687,7 @@ void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
     "xmm3", "xmm4", "xmm5"
 #endif
   );
-  asm volatile (
+  asm volatile(
     "sub       %1,%2                           \n"
   "1:                                          \n"
     "movdqa    (%0),%%xmm0                     \n"
@@ -738,7 +738,7 @@ void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
 
 void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
                                  uint8* dst_u, uint8* dst_v, int width) {
-  asm volatile (
+  asm volatile(
     "movdqa    %0,%%xmm4                       \n"
     "movdqa    %1,%%xmm3                       \n"
     "movdqa    %2,%%xmm5                       \n"
@@ -751,7 +751,7 @@ void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
     "xmm3", "xmm4", "xmm5"
 #endif
   );
-  asm volatile (
+  asm volatile(
     "sub       %1,%2                           \n"
   "1:                                          \n"
     "movdqu    (%0),%%xmm0                     \n"
@@ -805,7 +805,7 @@ void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
 }
 
 void BGRAToYRow_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix) {
-  asm volatile (
+  asm volatile(
     "movdqa    %4,%%xmm5                       \n"
     "movdqa    %3,%%xmm4                       \n"
   "1:                                          \n"
@@ -841,7 +841,7 @@ void BGRAToYRow_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix) {
 }
 
 void BGRAToYRow_Unaligned_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix) {
-  asm volatile (
+  asm volatile(
     "movdqa    %4,%%xmm5                       \n"
     "movdqa    %3,%%xmm4                       \n"
   "1:                                          \n"
@@ -878,7 +878,7 @@ void BGRAToYRow_Unaligned_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix) {
 
 void BGRAToUVRow_SSSE3(const uint8* src_bgra0, int src_stride_bgra,
                        uint8* dst_u, uint8* dst_v, int width) {
-  asm volatile (
+  asm volatile(
     "movdqa    %0,%%xmm4                       \n"
     "movdqa    %1,%%xmm3                       \n"
     "movdqa    %2,%%xmm5                       \n"
@@ -891,7 +891,7 @@ void BGRAToUVRow_SSSE3(const uint8* src_bgra0, int src_stride_bgra,
     "xmm3", "xmm4", "xmm5"
 #endif
   );
-  asm volatile (
+  asm volatile(
     "sub       %1,%2                           \n"
   "1:                                          \n"
     "movdqa    (%0),%%xmm0                     \n"
@@ -942,7 +942,7 @@ void BGRAToUVRow_SSSE3(const uint8* src_bgra0, int src_stride_bgra,
 
 void BGRAToUVRow_Unaligned_SSSE3(const uint8* src_bgra0, int src_stride_bgra,
                                  uint8* dst_u, uint8* dst_v, int width) {
-  asm volatile (
+  asm volatile(
     "movdqa    %0,%%xmm4                       \n"
     "movdqa    %1,%%xmm3                       \n"
     "movdqa    %2,%%xmm5                       \n"
@@ -955,7 +955,7 @@ void BGRAToUVRow_Unaligned_SSSE3(const uint8* src_bgra0, int src_stride_bgra,
     "xmm3", "xmm4", "xmm5"
 #endif
   );
-  asm volatile (
+  asm volatile(
     "sub       %1,%2                           \n"
   "1:                                          \n"
     "movdqu    (%0),%%xmm0                     \n"
@@ -1009,7 +1009,7 @@ void BGRAToUVRow_Unaligned_SSSE3(const uint8* src_bgra0, int src_stride_bgra,
 }
 
 void ABGRToYRow_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix) {
-  asm volatile (
+  asm volatile(
     "movdqa    %4,%%xmm5                       \n"
     "movdqa    %3,%%xmm4                       \n"
   "1:                                          \n"
@@ -1045,7 +1045,7 @@ void ABGRToYRow_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix) {
 }
 
 void ABGRToYRow_Unaligned_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix) {
-  asm volatile (
+  asm volatile(
     "movdqa    %4,%%xmm5                       \n"
     "movdqa    %3,%%xmm4                       \n"
   "1:                                          \n"
@@ -1082,7 +1082,7 @@ void ABGRToYRow_Unaligned_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix) {
 
 void ABGRToUVRow_SSSE3(const uint8* src_abgr0, int src_stride_abgr,
                        uint8* dst_u, uint8* dst_v, int width) {
-  asm volatile (
+  asm volatile(
     "movdqa    %0,%%xmm4                       \n"
     "movdqa    %1,%%xmm3                       \n"
     "movdqa    %2,%%xmm5                       \n"
@@ -1095,7 +1095,7 @@ void ABGRToUVRow_SSSE3(const uint8* src_abgr0, int src_stride_abgr,
     "xmm3", "xmm4", "xmm5"
 #endif
   );
-  asm volatile (
+  asm volatile(
     "sub       %1,%2                           \n"
   "1:                                          \n"
     "movdqa    (%0),%%xmm0                     \n"
@@ -1146,7 +1146,7 @@ void ABGRToUVRow_SSSE3(const uint8* src_abgr0, int src_stride_abgr,
 
 void ABGRToUVRow_Unaligned_SSSE3(const uint8* src_abgr0, int src_stride_abgr,
                                  uint8* dst_u, uint8* dst_v, int width) {
-  asm volatile (
+  asm volatile(
     "movdqa    %0,%%xmm4                       \n"
     "movdqa    %1,%%xmm3                       \n"
     "movdqa    %2,%%xmm5                       \n"
@@ -1159,7 +1159,7 @@ void ABGRToUVRow_Unaligned_SSSE3(const uint8* src_abgr0, int src_stride_abgr,
     "xmm3", "xmm4", "xmm5"
 #endif
   );
-  asm volatile (
+  asm volatile(
     "sub       %1,%2                           \n"
   "1:                                          \n"
     "movdqu    (%0),%%xmm0                     \n"
@@ -1291,7 +1291,7 @@ void OMITFP I420ToARGBRow_SSSE3(const uint8* y_buf,
                                 const uint8* v_buf,
                                 uint8* rgb_buf,
                                 int width) {
-  asm volatile (
+  asm volatile(
     "sub       %1,%2                           \n"
     "pcmpeqb   %%xmm5,%%xmm5                   \n"
     "pxor      %%xmm4,%%xmm4                   \n"
@@ -1325,7 +1325,7 @@ void OMITFP I420ToBGRARow_SSSE3(const uint8* y_buf,
                                 const uint8* v_buf,
                                 uint8* rgb_buf,
                                 int width) {
-  asm volatile (
+  asm volatile(
     "sub       %1,%2                           \n"
     "pcmpeqb   %%xmm5,%%xmm5                   \n"
     "pxor      %%xmm4,%%xmm4                   \n"
@@ -1360,7 +1360,7 @@ void OMITFP I420ToABGRRow_SSSE3(const uint8* y_buf,
                                 const uint8* v_buf,
                                 uint8* rgb_buf,
                                 int width) {
-  asm volatile (
+  asm volatile(
     "sub       %1,%2                           \n"
     "pcmpeqb   %%xmm5,%%xmm5                   \n"
     "pxor      %%xmm4,%%xmm4                   \n"
@@ -1394,7 +1394,7 @@ void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf,
                                 const uint8* v_buf,
                                 uint8* rgb_buf,
                                 int width) {
-  asm volatile (
+  asm volatile(
     "sub       %1,%2                           \n"
     "pcmpeqb   %%xmm5,%%xmm5                   \n"
     "pxor      %%xmm4,%%xmm4                   \n"
@@ -1450,7 +1450,7 @@ void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf,
 void YToARGBRow_SSE2(const uint8* y_buf,
                      uint8* rgb_buf,
                      int width) {
-  asm volatile (
+  asm volatile(
     "pcmpeqb   %%xmm4,%%xmm4                   \n"
     "pslld     $0x18,%%xmm4                    \n"
     "mov       $0x10001000,%%eax               \n"
@@ -1501,7 +1501,7 @@ CONST uvec8 kShuffleMirror = {
 
 void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) {
   intptr_t temp_width = static_cast<intptr_t>(width);
-  asm volatile (
+  asm volatile(
     "movdqa    %3,%%xmm5                       \n"
     "lea       -0x10(%0),%0                    \n"
   "1:                                          \n"
@@ -1526,7 +1526,7 @@ void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) {
 #ifdef HAS_MIRRORROW_SSE2
 void MirrorRow_SSE2(const uint8* src, uint8* dst, int width) {
   intptr_t temp_width = static_cast<intptr_t>(width);
-  asm volatile (
+  asm volatile(
     "lea       -0x10(%0),%0                    \n"
   "1:                                          \n"
     "movdqu    (%0,%2),%%xmm0                  \n"
@@ -1561,7 +1561,7 @@ CONST uvec8 kShuffleMirrorUV = {
 void MirrorRowUV_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v,
                        int width) {
   intptr_t temp_width = static_cast<intptr_t>(width);
-  asm volatile (
+  asm volatile(
     "movdqa    %4,%%xmm1                       \n"
     "lea       -16(%0,%3,2),%0                 \n"
     "sub       %1,%2                           \n"
@@ -1589,7 +1589,7 @@ void MirrorRowUV_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v,
 
 #ifdef HAS_SPLITUV_SSE2
 void SplitUV_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
-  asm volatile (
+  asm volatile(
     "pcmpeqb    %%xmm5,%%xmm5                    \n"
     "psrlw      $0x8,%%xmm5                      \n"
     "sub        %1,%2                            \n"
@@ -1625,7 +1625,7 @@ void SplitUV_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
 
 #ifdef HAS_COPYROW_SSE2
 void CopyRow_SSE2(const uint8* src, uint8* dst, int count) {
-  asm volatile (
+  asm volatile(
     "sub        %0,%1                          \n"
   "1:                                          \n"
     "movdqa    (%0),%%xmm0                     \n"
@@ -1650,7 +1650,7 @@ void CopyRow_SSE2(const uint8* src, uint8* dst, int count) {
 #ifdef HAS_COPYROW_X86
 void CopyRow_X86(const uint8* src, uint8* dst, int width) {
   size_t width_tmp = static_cast<size_t>(width);
-  asm volatile (
+  asm volatile(
     "shr       $0x2,%2                         \n"
     "rep movsl                                 \n"
   : "+S"(src),  // %0
@@ -1664,7 +1664,7 @@ void CopyRow_X86(const uint8* src, uint8* dst, int width) {
 
 #ifdef HAS_YUY2TOYROW_SSE2
 void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix) {
-  asm volatile (
+  asm volatile(
     "pcmpeqb   %%xmm5,%%xmm5                   \n"
     "psrlw     $0x8,%%xmm5                     \n"
   "1:                                          \n"
@@ -1691,7 +1691,7 @@ void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix) {
 
 void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2,
                       uint8* dst_u, uint8* dst_y, int pix) {
-  asm volatile (
+  asm volatile(
     "pcmpeqb   %%xmm5,%%xmm5                   \n"
     "psrlw     $0x8,%%xmm5                     \n"
     "sub       %1,%2                           \n"
@@ -1730,7 +1730,7 @@ void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2,
 
 void YUY2ToYRow_Unaligned_SSE2(const uint8* src_yuy2,
                                uint8* dst_y, int pix) {
-  asm volatile (
+  asm volatile(
     "pcmpeqb   %%xmm5,%%xmm5                   \n"
     "psrlw     $0x8,%%xmm5                     \n"
   "1:                                          \n"
@@ -1759,7 +1759,7 @@ void YUY2ToUVRow_Unaligned_SSE2(const uint8* src_yuy2,
                                 int stride_yuy2,
                                 uint8* dst_u, uint8* dst_y,
                                 int pix) {
-  asm volatile (
+  asm volatile(
     "pcmpeqb   %%xmm5,%%xmm5                   \n"
     "psrlw     $0x8,%%xmm5                     \n"
     "sub       %1,%2                           \n"
@@ -1797,7 +1797,7 @@ void YUY2ToUVRow_Unaligned_SSE2(const uint8* src_yuy2,
 }
 
 void UYVYToYRow_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix) {
-  asm volatile (
+  asm volatile(
   "1:                                          \n"
     "movdqa    (%0),%%xmm0                     \n"
     "movdqa    0x10(%0),%%xmm1                 \n"
@@ -1822,7 +1822,7 @@ void UYVYToYRow_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix) {
 
 void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy,
                       uint8* dst_u, uint8* dst_y, int pix) {
-  asm volatile (
+  asm volatile(
     "pcmpeqb   %%xmm5,%%xmm5                   \n"
     "psrlw     $0x8,%%xmm5                     \n"
     "sub       %1,%2                           \n"
@@ -1861,7 +1861,7 @@ void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy,
 
 void UYVYToYRow_Unaligned_SSE2(const uint8* src_uyvy,
                                uint8* dst_y, int pix) {
-  asm volatile (
+  asm volatile(
   "1:                                          \n"
     "movdqu    (%0),%%xmm0                     \n"
     "movdqu    0x10(%0),%%xmm1                 \n"
@@ -1886,7 +1886,7 @@ void UYVYToYRow_Unaligned_SSE2(const uint8* src_uyvy,
 
 void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy,
                                 uint8* dst_u, uint8* dst_y, int pix) {
-  asm volatile (
+  asm volatile(
     "pcmpeqb   %%xmm5,%%xmm5                   \n"
     "psrlw     $0x8,%%xmm5                     \n"
     "sub       %1,%2                           \n"
@@ -1929,7 +1929,7 @@ void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy,
 // Destination aligned to 16 bytes, multiple of 4 pixels
 void ARGBBlendRow_Aligned_SSE2(const uint8* src_argb, uint8* dst_argb,
                                int width) {
-  asm volatile (
+  asm volatile(
     "pcmpeqb   %%xmm7,%%xmm7                   \n"
     "psrlw     $0xf,%%xmm7                     \n"
     "pcmpeqb   %%xmm6,%%xmm6                   \n"
@@ -1999,7 +1999,7 @@ void ARGBBlendRow_Aligned_SSE2(const uint8* src_argb, uint8* dst_argb,
 
 // Blend 1 pixel at a time, unaligned
 void ARGBBlendRow1_SSE2(const uint8* src_argb, uint8* dst_argb, int width) {
-  asm volatile (
+  asm volatile(
     "pcmpeqb   %%xmm7,%%xmm7                   \n"
     "psrlw     $0xf,%%xmm7                     \n"
     "pcmpeqb   %%xmm6,%%xmm6                   \n"
diff --git a/source/row_win.cc b/source/row_win.cc
index 7007876a0..cdf4d2bcd 100644
--- a/source/row_win.cc
+++ b/source/row_win.cc
@@ -8,7 +8,7 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-#include "row.h"
+#include "source/row.h"
 
 #if defined(_M_IX86)
 #include "emmintrin.h"
diff --git a/source/scale.cc b/source/scale.cc
index f3d6d771c..436ac3990 100644
--- a/source/scale.cc
+++ b/source/scale.cc
@@ -16,7 +16,7 @@
 
 #include "libyuv/cpu_id.h"
 #include "libyuv/planar_functions.h"  // For CopyPlane
-#include "row.h"
+#include "source/row.h"
 
 #ifdef __cplusplus
 namespace libyuv {
@@ -59,7 +59,7 @@ void SetUseReferenceImpl(bool use) {
 #define HAS_SCALEROWDOWN2_NEON
 void ScaleRowDown2_NEON(const uint8* src_ptr, int /* src_stride */,
                         uint8* dst, int dst_width) {
-  asm volatile (
+  asm volatile(
     "1:                                        \n"
     "vld2.u8    {q0,q1}, [%0]!                 \n"  // load even pixels into q0, odd into q1
     "vst1.u8    {q0}, [%1]!                    \n"  // store even pixels
@@ -75,7 +75,7 @@ void ScaleRowDown2_NEON(const uint8* src_ptr, int /* src_stride */,
 
 void ScaleRowDown2Int_NEON(const uint8* src_ptr, int src_stride,
                            uint8* dst, int dst_width) {
-  asm volatile (
+  asm volatile(
     "add        %1, %0                         \n"  // change the stride to row 2 pointer
     "1:                                        \n"
     "vld1.u8    {q0,q1}, [%0]!                 \n"  // load row 1 and post increment
@@ -101,7 +101,7 @@ void ScaleRowDown2Int_NEON(const uint8* src_ptr, int src_stride,
 #define HAS_SCALEROWDOWN4_NEON
 static void ScaleRowDown4_NEON(const uint8* src_ptr, int /* src_stride */,
                                uint8* dst_ptr, int dst_width) {
-  asm volatile (
+  asm volatile(
     "1:                                        \n"
     "vld2.u8    {d0, d1}, [%0]!                \n"
     "vtrn.u8    d1, d0                         \n"
@@ -120,7 +120,7 @@ static void ScaleRowDown4_NEON(const uint8* src_ptr, int /* src_stride */,
 
 static void ScaleRowDown4Int_NEON(const uint8* src_ptr, int src_stride,
                                   uint8* dst_ptr, int dst_width) {
-  asm volatile (
+  asm volatile(
     "add        r4, %0, %3                     \n"
     "add        r5, r4, %3                     \n"
     "add        %3, r5, %3                     \n"
@@ -159,7 +159,7 @@ static void ScaleRowDown4Int_NEON(const uint8* src_ptr, int src_stride,
 // Point samples 32 pixels to 24 pixels.
 static void ScaleRowDown34_NEON(const uint8* src_ptr, int /* src_stride */,
                                 uint8* dst_ptr, int dst_width) {
-  asm volatile (
+  asm volatile(
     "1:                                        \n"
     "vld4.u8      {d0, d1, d2, d3}, [%0]!      \n" // src line 0
     "vmov         d2, d3                       \n" // order needs to be d0, d1, d2
@@ -176,7 +176,7 @@ static void ScaleRowDown34_NEON(const uint8* src_ptr, int /* src_stride */,
 
 static void ScaleRowDown34_0_Int_NEON(const uint8* src_ptr, int src_stride,
                                       uint8* dst_ptr, int dst_width) {
-  asm volatile (
+  asm volatile(
     "vmov.u8      d24, #3                      \n"
     "add          %3, %0                       \n"
     "1:                                        \n"
@@ -231,7 +231,7 @@ static void ScaleRowDown34_0_Int_NEON(const uint8* src_ptr, int src_stride,
 
 static void ScaleRowDown34_1_Int_NEON(const uint8* src_ptr, int src_stride,
                                       uint8* dst_ptr, int dst_width) {
-  asm volatile (
+  asm volatile(
     "vmov.u8      d24, #3                      \n"
     "add          %3, %0                       \n"
     "1:                                        \n"
@@ -283,7 +283,7 @@ const unsigned short mult38_div9[8] __attribute__ ((aligned(16))) =
 // 32 -> 12
 static void ScaleRowDown38_NEON(const uint8* src_ptr, int,
                                 uint8* dst_ptr, int dst_width) {
-  asm volatile (
+  asm volatile(
     "vld1.u8      {q3}, [%3]                   \n"
     "1:                                        \n"
     "vld1.u8      {d0, d1, d2, d3}, [%0]!      \n"
@@ -304,7 +304,7 @@ static void ScaleRowDown38_NEON(const uint8* src_ptr, int,
 // 32x3 -> 12x1
 static void ScaleRowDown38_3_Int_NEON(const uint8* src_ptr, int src_stride,
                                       uint8* dst_ptr, int dst_width) {
-  asm volatile (
+  asm volatile(
     "vld1.u16     {q13}, [%4]                  \n"
     "vld1.u8      {q14}, [%5]                  \n"
     "vld1.u8      {q15}, [%6]                  \n"
@@ -413,7 +413,7 @@ static void ScaleRowDown38_3_Int_NEON(const uint8* src_ptr, int src_stride,
 // 32x2 -> 12x1
 static void ScaleRowDown38_2_Int_NEON(const uint8* src_ptr, int src_stride,
                                       uint8* dst_ptr, int dst_width) {
-  asm volatile (
+  asm volatile(
     "vld1.u16     {q13}, [%4]                  \n"
     "vld1.u8      {q14}, [%5]                  \n"
     "add          %3, %0                       \n"
@@ -508,7 +508,7 @@ static void ScaleRowDown38_2_Int_NEON(const uint8* src_ptr, int src_stride,
 static void ScaleFilterRows_NEON(uint8* dst_ptr,
                               const uint8* src_ptr, int src_stride,
                               int dst_width, int source_y_fraction) {
-  asm volatile (
+  asm volatile(
     "cmp          %4, #0                       \n"
     "beq          2f                           \n"
     "add          %2, %1                       \n"
@@ -1555,7 +1555,7 @@ static void ScaleFilterCols34_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
 #define HAS_SCALEROWDOWN2_SSE2
 static void ScaleRowDown2_SSE2(const uint8* src_ptr, int src_stride,
                                uint8* dst_ptr, int dst_width) {
-  asm volatile (
+  asm volatile(
   "pcmpeqb    %%xmm5,%%xmm5                    \n"
   "psrlw      $0x8,%%xmm5                      \n"
 "1:                                            \n"
@@ -1579,7 +1579,7 @@ static void ScaleRowDown2_SSE2(const uint8* src_ptr, int src_stride,
 
 static void ScaleRowDown2Int_SSE2(const uint8* src_ptr, int src_stride,
                                   uint8* dst_ptr, int dst_width) {
-  asm volatile (
+  asm volatile(
   "pcmpeqb    %%xmm5,%%xmm5                    \n"
   "psrlw      $0x8,%%xmm5                      \n"
 "1:                                            \n"
@@ -1614,7 +1614,7 @@ static void ScaleRowDown2Int_SSE2(const uint8* src_ptr, int src_stride,
 #define HAS_SCALEROWDOWN4_SSE2
 static void ScaleRowDown4_SSE2(const uint8* src_ptr, int src_stride,
                                uint8* dst_ptr, int dst_width) {
-  asm volatile (
+  asm volatile(
   "pcmpeqb    %%xmm5,%%xmm5                    \n"
   "psrld      $0x18,%%xmm5                     \n"
 "1:                                            \n"
@@ -1640,7 +1640,7 @@ static void ScaleRowDown4_SSE2(const uint8* src_ptr, int src_stride,
 static void ScaleRowDown4Int_SSE2(const uint8* src_ptr, int src_stride,
                                   uint8* dst_ptr, int dst_width) {
   intptr_t temp = 0;
-  asm volatile (
+  asm volatile(
   "pcmpeqb    %%xmm7,%%xmm7                    \n"
   "psrlw      $0x8,%%xmm7                      \n"
   "lea        (%4,%4,2),%3                     \n"
@@ -1693,7 +1693,7 @@ static void ScaleRowDown4Int_SSE2(const uint8* src_ptr, int src_stride,
 #define HAS_SCALEROWDOWN8_SSE2
 static void ScaleRowDown8_SSE2(const uint8* src_ptr, int src_stride,
                                uint8* dst_ptr, int dst_width) {
-  asm volatile (
+  asm volatile(
     "pcmpeqb   %%xmm5,%%xmm5                   \n"
     "psrlq     $0x38,%%xmm5                    \n"
   "1:                                          \n"
@@ -1722,7 +1722,7 @@ static void ScaleAddRows_SSE2(const uint8* src_ptr, int src_stride,
                               uint16* dst_ptr, int src_width, int src_height) {
   int tmp_height = 0;
   intptr_t tmp_src = 0;
-  asm volatile (
+  asm volatile(
     "pxor      %%xmm4,%%xmm4                   \n"
     "sub       $0x1,%5                         \n"
   "1:                                          \n"
@@ -2263,7 +2263,7 @@ extern "C" void ScaleFilterRows_SSSE3(uint8* dst_ptr,
 #elif defined(__x86_64__)
 static void ScaleRowDown8Int_SSE2(const uint8* src_ptr, int src_stride,
                                   uint8* dst_ptr, int dst_width) {
-  asm volatile (
+  asm volatile(
   "lea        (%3,%3,2),%%r10                  \n"
   "pxor       %%xmm7,%%xmm7                    \n"
 "1:"
@@ -2322,7 +2322,7 @@ static void ScaleRowDown8Int_SSE2(const uint8* src_ptr, int src_stride,
 #define HAS_SCALEROWDOWN34_SSSE3
 static void ScaleRowDown34_SSSE3(const uint8* src_ptr, int src_stride,
                                  uint8* dst_ptr, int dst_width) {
-  asm volatile (
+  asm volatile(
   "movdqa     (%3),%%xmm3                      \n"
   "movdqa     (%4),%%xmm4                      \n"
   "movdqa     (%5),%%xmm5                      \n"
@@ -2353,7 +2353,7 @@ static void ScaleRowDown34_SSSE3(const uint8* src_ptr, int src_stride,
 
 static void ScaleRowDown34_1_Int_SSSE3(const uint8* src_ptr, int src_stride,
                                        uint8* dst_ptr, int dst_width) {
-  asm volatile (
+  asm volatile(
   "movdqa     (%4),%%xmm2                      \n"  // _shuf01
   "movdqa     (%5),%%xmm3                      \n"  // _shuf11
   "movdqa     (%6),%%xmm4                      \n"  // _shuf21
@@ -2410,7 +2410,7 @@ static void ScaleRowDown34_1_Int_SSSE3(const uint8* src_ptr, int src_stride,
 
 static void ScaleRowDown34_0_Int_SSSE3(const uint8* src_ptr, int src_stride,
                                        uint8* dst_ptr, int dst_width) {
-  asm volatile (
+  asm volatile(
   "movdqa     (%4),%%xmm2                      \n"  // _shuf01
   "movdqa     (%5),%%xmm3                      \n"  // _shuf11
   "movdqa     (%6),%%xmm4                      \n"  // _shuf21
@@ -2471,7 +2471,7 @@ static void ScaleRowDown34_0_Int_SSSE3(const uint8* src_ptr, int src_stride,
 #define HAS_SCALEROWDOWN38_SSSE3
 static void ScaleRowDown38_SSSE3(const uint8* src_ptr, int src_stride,
                                  uint8* dst_ptr, int dst_width) {
-  asm volatile (
+  asm volatile(
   "movdqa     (%3),%%xmm4                      \n"
   "movdqa     (%4),%%xmm5                      \n"
 "1:"
@@ -2498,7 +2498,7 @@ static void ScaleRowDown38_SSSE3(const uint8* src_ptr, int src_stride,
 
 static void ScaleRowDown38_3_Int_SSSE3(const uint8* src_ptr, int src_stride,
                                        uint8* dst_ptr, int dst_width) {
-  asm volatile (
+  asm volatile(
   "movdqa     (%4),%%xmm4                      \n"
   "movdqa     (%5),%%xmm5                      \n"
   "movdqa     (%6),%%xmm6                      \n"
@@ -2555,7 +2555,7 @@ static void ScaleRowDown38_3_Int_SSSE3(const uint8* src_ptr, int src_stride,
 
 static void ScaleRowDown38_2_Int_SSSE3(const uint8* src_ptr, int src_stride,
                                        uint8* dst_ptr, int dst_width) {
-  asm volatile (
+  asm volatile(
   "movdqa     (%4),%%xmm4                      \n"
   "movdqa     (%5),%%xmm5                      \n"
   "movdqa     (%6),%%xmm6                      \n"
@@ -2597,7 +2597,7 @@ static void ScaleFilterRows_SSE2(uint8* dst_ptr,
                                  const uint8* src_ptr, int src_stride,
                                  int dst_width, int source_y_fraction) {
   if (source_y_fraction == 0) {
-    asm volatile (
+    asm volatile(
     "1:"
       "movdqa     (%1),%%xmm0                  \n"
       "lea        0x10(%1),%1                  \n"
@@ -2615,7 +2615,7 @@ static void ScaleFilterRows_SSE2(uint8* dst_ptr,
     );
     return;
   } else if (source_y_fraction == 128) {
-    asm volatile (
+    asm volatile(
     "1:"
       "movdqa     (%1),%%xmm0                  \n"
       "movdqa     (%1,%3,1),%%xmm2             \n"
@@ -2635,7 +2635,7 @@ static void ScaleFilterRows_SSE2(uint8* dst_ptr,
     );
     return;
   } else {
-    asm volatile (
+    asm volatile(
       "mov        %3,%%eax                     \n"
       "movd       %%eax,%%xmm6                 \n"
       "punpcklwd  %%xmm6,%%xmm6                \n"
@@ -2688,7 +2688,7 @@ static void ScaleFilterRows_SSSE3(uint8* dst_ptr,
                                   const uint8* src_ptr, int src_stride,
                                   int dst_width, int source_y_fraction) {
   if (source_y_fraction <= 1) {
-    asm volatile (
+    asm volatile(
    "1:"
       "movdqa     (%1),%%xmm0                  \n"
       "lea        0x10(%1),%1                  \n"
@@ -2706,7 +2706,7 @@ static void ScaleFilterRows_SSSE3(uint8* dst_ptr,
     );
     return;
   } else if (source_y_fraction == 128) {
-    asm volatile (
+    asm volatile(
     "1:"
       "movdqa     (%1),%%xmm0                  \n"
       "movdqa     (%1,%3,1),%%xmm2             \n"
@@ -2726,7 +2726,7 @@ static void ScaleFilterRows_SSSE3(uint8* dst_ptr,
     );
     return;
   } else {
-    asm volatile (
+    asm volatile(
       "mov        %3,%%eax                     \n"
       "shr        %%eax                        \n"
       "mov        %%al,%%ah                    \n"
diff --git a/unit_test/compare_test.cc b/unit_test/compare_test.cc
index 52730e249..4f341521f 100644
--- a/unit_test/compare_test.cc
+++ b/unit_test/compare_test.cc
@@ -8,12 +8,11 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-#include "unit_test.h"
-
 #include <stdlib.h>
 #include <string.h>
 #include <time.h>
 
+#include "unit_test/unit_test.h"
 #include "libyuv/basic_types.h"
 #include "libyuv/compare.h"
 #include "libyuv/cpu_id.h"
@@ -153,7 +152,7 @@ TEST_F(libyuvTest, BenchmarkPsnr_C) {
                   _benchmark_width, _benchmark_height);
 
   c_time = (get_time() - c_time) / _benchmark_iterations;
-  printf ("BenchmarkPsnr_C - %8d us c\n", (int)(c_time*1e6));
+  printf("BenchmarkPsnr_C - %8.2f us c\n", c_time * 1e6);
 
   MaskCpuFlags(-1);
 
@@ -176,7 +175,7 @@ TEST_F(libyuvTest, BenchmarkPsnr_OPT) {
                   _benchmark_width, _benchmark_height);
 
   opt_time = (get_time() - opt_time) / _benchmark_iterations;
-  printf ("BenchmarkPsnr_OPT - %8d us opt\n", (int)(opt_time*1e6));
+  printf("BenchmarkPsnr_OPT - %8.2f us opt\n", opt_time * 1e6);
 
   EXPECT_EQ(0, 0);
 
@@ -274,7 +273,7 @@ TEST_F(libyuvTest, BenchmarkSsim_C) {
                   _benchmark_width, _benchmark_height);
 
   c_time = (get_time() - c_time) / _benchmark_iterations;
-  printf ("BenchmarkSsim_C - %8d us c\n", (int)(c_time*1e6));
+  printf("BenchmarkSsim_C - %8.2f us c\n", c_time * 1e6);
 
   MaskCpuFlags(-1);
 
@@ -297,7 +296,7 @@ TEST_F(libyuvTest, BenchmarkSsim_OPT) {
                   _benchmark_width, _benchmark_height);
 
   opt_time = (get_time() - opt_time) / _benchmark_iterations;
-  printf ("BenchmarkPsnr_OPT - %8d us opt\n", (int)(opt_time*1e6));
+  printf("BenchmarkPsnr_OPT - %8.2f us opt\n", opt_time * 1e6);
 
   EXPECT_EQ(0, 0);