From 5b22506b14367f38c1e31d1aab4dc1132158737f Mon Sep 17 00:00:00 2001
From: "fbarchard@google.com"
 <fbarchard@google.com@16f28f9a-4ce2-e073-06de-1de4eb20be90>
Date: Thu, 29 Mar 2012 02:19:26 +0000
Subject: [PATCH] With an asm() away lint, this cleans up most remaining
 issues. BUG=none TEST=lint filename Review URL:
 https://webrtc-codereview.appspot.com/464001

git-svn-id: http://libyuv.googlecode.com/svn/trunk@230 16f28f9a-4ce2-e073-06de-1de4eb20be90
---
 README.chromium                    |  2 +-
 include/libyuv/basic_types.h       | 14 +++----
 include/libyuv/compare.h           |  2 +-
 include/libyuv/convert.h           |  2 +-
 include/libyuv/convert_from.h      |  2 +-
 include/libyuv/format_conversion.h | 12 +++---
 include/libyuv/version.h           |  2 +-
 source/compare.cc                  |  6 +--
 source/convert.cc                  |  4 +-
 source/convert_from.cc             |  4 +-
 source/cpu_id.cc                   |  4 +-
 source/format_conversion.cc        |  2 +-
 source/planar_functions.cc         |  6 +--
 source/rotate.cc                   |  6 +--
 source/rotate_neon.cc              |  4 +-
 source/row.h                       |  2 +-
 source/row_common.cc               | 28 ++++++++------
 source/row_neon.cc                 | 14 +++----
 source/scale.cc                    | 60 +++++++++++++++---------------
 19 files changed, 90 insertions(+), 86 deletions(-)

diff --git a/README.chromium b/README.chromium
index 207046502..f621b8893 100644
--- a/README.chromium
+++ b/README.chromium
@@ -1,6 +1,6 @@
 Name: libyuv
 URL: http://code.google.com/p/libyuv/
-Version: 229
+Version: 230
 License: BSD
 License File: LICENSE
 
diff --git a/include/libyuv/basic_types.h b/include/libyuv/basic_types.h
index 54c5f8a55..8c312b913 100644
--- a/include/libyuv/basic_types.h
+++ b/include/libyuv/basic_types.h
@@ -31,8 +31,8 @@ typedef __int64 int64;
 #define INT64_F "I64"
 #else  // COMPILER_MSVC
 #ifdef __LP64__
-typedef unsigned long uint64;
-typedef long int64;
+typedef unsigned long uint64;  // NOLINT
+typedef long int64;  // NOLINT
 #ifndef INT64_C
 #define INT64_C(x) x ## L
 #endif
@@ -41,8 +41,8 @@ typedef long int64;
 #endif
 #define INT64_F "l"
 #else  // __LP64__
-typedef unsigned long long uint64;
-typedef long long int64;
+typedef unsigned long long uint64;  // NOLINT
+typedef long long int64;  // NOLINT
 #ifndef INT64_C
 #define INT64_C(x) x ## LL
 #endif
@@ -54,8 +54,8 @@ typedef long long int64;
 #endif  // COMPILER_MSVC
 typedef unsigned int uint32;
 typedef int int32;
-typedef unsigned short uint16;
-typedef short int16;
+typedef unsigned short uint16;  // NOLINT
+typedef short int16;  // NOLINT
 typedef unsigned char uint8;
 typedef signed char int8;
 #endif  // INT_TYPES_DEFINED
@@ -70,4 +70,4 @@ typedef signed char int8;
   (reinterpret_cast<uint8*>(((reinterpret_cast<uintptr_t>(p) + \
   ((t)-1)) & ~((t)-1))))
 
-#endif // INCLUDE_LIBYUV_BASIC_TYPES_H_
+#endif  // INCLUDE_LIBYUV_BASIC_TYPES_H_
diff --git a/include/libyuv/compare.h b/include/libyuv/compare.h
index 3d31ecdde..1f78d2a9c 100644
--- a/include/libyuv/compare.h
+++ b/include/libyuv/compare.h
@@ -62,4 +62,4 @@ double I420Ssim(const uint8* src_y_a, int stride_y_a,
 }  // namespace libyuv
 #endif
 
-#endif // INCLUDE_LIBYUV_COMPARE_H_
+#endif  // INCLUDE_LIBYUV_COMPARE_H_
diff --git a/include/libyuv/convert.h b/include/libyuv/convert.h
index a0d1cc032..ee114f079 100644
--- a/include/libyuv/convert.h
+++ b/include/libyuv/convert.h
@@ -215,4 +215,4 @@ int ConvertToI420(const uint8* src_frame, size_t src_size,
 }  // namespace libyuv
 #endif
 
-#endif // INCLUDE_LIBYUV_CONVERT_H_
+#endif  // INCLUDE_LIBYUV_CONVERT_H_
diff --git a/include/libyuv/convert_from.h b/include/libyuv/convert_from.h
index a50c931ed..34a1571ff 100644
--- a/include/libyuv/convert_from.h
+++ b/include/libyuv/convert_from.h
@@ -139,4 +139,4 @@ int ConvertFromI420(const uint8* y, int y_stride,
 }  // namespace libyuv
 #endif
 
-#endif // INCLUDE_LIBYUV_CONVERT_FROM_H_
+#endif  // INCLUDE_LIBYUV_CONVERT_FROM_H_
diff --git a/include/libyuv/format_conversion.h b/include/libyuv/format_conversion.h
index 67e14dc7a..44e0f67fe 100644
--- a/include/libyuv/format_conversion.h
+++ b/include/libyuv/format_conversion.h
@@ -44,8 +44,8 @@ int BayerRGGBToI420(const uint8* src_bayer, int src_stride_bayer,
                     int width, int height);
 
 // Temporary API mapper
-#define BayerRGBToI420(b,bs,f,y,ys,u,us,v,vs,w,h) \
-    BayerToI420(b,bs,y,ys,u,us,v,vs,w,h,f)
+#define BayerRGBToI420(b, bs, f, y, ys, u, us, v, vs, w, h) \
+    BayerToI420(b, bs, y, ys, u, us, v, vs, w, h, f)
 
 int BayerToI420(const uint8* src_bayer, int src_stride_bayer,
                 uint8* dst_y, int dst_stride_y,
@@ -80,8 +80,8 @@ int I420ToBayerRGGB(const uint8* src_y, int src_stride_y,
                     int width, int height);
 
 // Temporary API mapper
-#define I420ToBayerRGB(y,ys,u,us,v,vs,b,bs,f,w,h) \
-    I420ToBayer(y,ys,u,us,v,vs,b,bs,w,h,f)
+#define I420ToBayerRGB(y, ys, u, us, v, vs, b, bs, f, w, h) \
+    I420ToBayer(y, ys, u, us, v, vs, b, bs, w, h, f)
 
 int I420ToBayer(const uint8* src_y, int src_stride_y,
                 const uint8* src_u, int src_stride_u,
@@ -108,7 +108,7 @@ int BayerRGGBToARGB(const uint8* src_bayer, int src_stride_bayer,
                     int width, int height);
 
 // Temporary API mapper
-#define BayerRGBToARGB(b,bs,f,a,as,w,h) BayerToARGB(b,bs,a,as,w,h,f)
+#define BayerRGBToARGB(b, bs, f, a, as, w, h) BayerToARGB(b, bs, a, as, w, h, f)
 
 int BayerToARGB(const uint8* src_bayer, int src_stride_bayer,
                 uint8* dst_argb, int dst_stride_argb,
@@ -133,7 +133,7 @@ int ARGBToBayerRGGB(const uint8* src_argb, int src_stride_argb,
                     int width, int height);
 
 // Temporary API mapper
-#define ARGBToBayerRGB(a,as,b,bs,f,w,h) ARGBToBayer(b,bs,a,as,w,h,f)
+#define ARGBToBayerRGB(a, as, b, bs, f, w, h) ARGBToBayer(b, bs, a, as, w, h, f)
 
 int ARGBToBayer(const uint8* src_argb, int src_stride_argb,
                 uint8* dst_bayer, int dst_stride_bayer,
diff --git a/include/libyuv/version.h b/include/libyuv/version.h
index 58f9280b9..081edbd03 100644
--- a/include/libyuv/version.h
+++ b/include/libyuv/version.h
@@ -11,7 +11,7 @@
 #ifndef INCLUDE_LIBYUV_VERSION_H_
 #define INCLUDE_LIBYUV_VERSION_H_
 
-#define INCLUDE_LIBYUV_VERSION 229
+#define INCLUDE_LIBYUV_VERSION 230
 
 #endif  // INCLUDE_LIBYUV_VERSION_H_
 
diff --git a/source/compare.cc b/source/compare.cc
index 998641cf6..b1b88769f 100644
--- a/source/compare.cc
+++ b/source/compare.cc
@@ -42,7 +42,7 @@ uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) {
 static uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b,
                                   int count) {
   volatile uint32 sse;
-  asm volatile(
+  asm volatile (
     "vmov.u8    q7, #0                         \n"
     "vmov.u8    q9, #0                         \n"
     "vmov.u8    q8, #0                         \n"
@@ -116,12 +116,12 @@ static uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b,
   }
 }
 
-#elif (defined(__x86_64__) || defined(__i386__)) && !defined(YUV_DISABLE_ASM)
+#elif defined(__x86_64__) || defined(__i386__) && !defined(YUV_DISABLE_ASM)
 #define HAS_SUMSQUAREERROR_SSE2
 static uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b,
                                   int count) {
   uint32 sse;
-  asm volatile(
+  asm volatile (
     "pxor      %%xmm0,%%xmm0                   \n"
     "pxor      %%xmm5,%%xmm5                   \n"
     "sub       %0,%1                           \n"
diff --git a/source/convert.cc b/source/convert.cc
index 5ad3e3222..b213ed0b3 100644
--- a/source/convert.cc
+++ b/source/convert.cc
@@ -90,7 +90,7 @@ static void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride,
 #define HAS_HALFROW_SSE2
 static void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride,
                          uint8* dst_uv, int pix) {
-  asm volatile(
+  asm volatile (
   "sub        %0,%1                            \n"
 "1:                                            \n"
   "movdqa     (%0),%%xmm0                      \n"
@@ -502,7 +502,7 @@ static void SplitYUY2_SSE2(const uint8* src_yuy2,
 #define HAS_SPLITYUY2_SSE2
 static void SplitYUY2_SSE2(const uint8* src_yuy2, uint8* dst_y,
                            uint8* dst_u, uint8* dst_v, int pix) {
-  asm volatile(
+  asm volatile (
   "pcmpeqb    %%xmm5,%%xmm5                    \n"
   "psrlw      $0x8,%%xmm5                      \n"
 "1:                                            \n"
diff --git a/source/convert_from.cc b/source/convert_from.cc
index bf46b0acf..dc51a4fe7 100644
--- a/source/convert_from.cc
+++ b/source/convert_from.cc
@@ -289,7 +289,7 @@ static void I42xToYUY2Row_SSE2(const uint8* src_y,
                                const uint8* src_u,
                                const uint8* src_v,
                                uint8* dst_frame, int width) {
- asm volatile(
+ asm volatile (
     "sub        %1,%2                            \n"
   "1:                                            \n"
     "movq      (%1),%%xmm2                       \n"
@@ -324,7 +324,7 @@ static void I42xToUYVYRow_SSE2(const uint8* src_y,
                                const uint8* src_u,
                                const uint8* src_v,
                                uint8* dst_frame, int width) {
- asm volatile(
+ asm volatile (
     "sub        %1,%2                            \n"
   "1:                                            \n"
     "movq      (%1),%%xmm2                       \n"
diff --git a/source/cpu_id.cc b/source/cpu_id.cc
index 18a8572e1..ab8a2c261 100644
--- a/source/cpu_id.cc
+++ b/source/cpu_id.cc
@@ -24,7 +24,7 @@
 // TODO(fbarchard): Use cpuid.h when gcc 4.4 is used on OSX and Linux.
 #if (defined(__pic__) || defined(__APPLE__)) && defined(__i386__)
 static __inline void __cpuid(int cpu_info[4], int info_type) {
-  asm volatile(
+  asm volatile (
     "mov %%ebx, %%edi                          \n"
     "cpuid                                     \n"
     "xchg %%edi, %%ebx                         \n"
@@ -33,7 +33,7 @@ static __inline void __cpuid(int cpu_info[4], int info_type) {
 }
 #elif defined(__i386__) || defined(__x86_64__)
 static __inline void __cpuid(int cpu_info[4], int info_type) {
-  asm volatile(
+  asm volatile (
     "cpuid                                     \n"
     : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
     : "a"(info_type));
diff --git a/source/format_conversion.cc b/source/format_conversion.cc
index 692932bee..7dfbfdb84 100644
--- a/source/format_conversion.cc
+++ b/source/format_conversion.cc
@@ -53,7 +53,7 @@ static void ARGBToBayerRow_SSSE3(const uint8* src_argb,
 #define HAS_ARGBTOBAYERROW_SSSE3
 static void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer,
                                  uint32 selector, int pix) {
-  asm volatile(
+  asm volatile (
     "movd   %3,%%xmm5                          \n"
     "pshufd $0x0,%%xmm5,%%xmm5                 \n"
 "1:                                            \n"
diff --git a/source/planar_functions.cc b/source/planar_functions.cc
index a24bcdacb..f5b834a8b 100644
--- a/source/planar_functions.cc
+++ b/source/planar_functions.cc
@@ -654,7 +654,7 @@ int NV12ToRGB565(const uint8* src_y, int src_stride_y,
 #if defined(__ARM_NEON__) && !defined(YUV_DISABLE_ASM)
 #define HAS_SETROW_NEON
 static void SetRow8_NEON(uint8* dst, uint32 v32, int count) {
-  asm volatile(
+  asm volatile (
     "vdup.u32  q0, %2                          \n"  // duplicate 4 ints
     "1:                                        \n"
     "subs      %1, %1, #16                     \n"  // 16 bytes per loop
@@ -723,7 +723,7 @@ static void SetRows32_X86(uint8* dst, uint32 v32, int width,
 #define HAS_SETROW_X86
 static void SetRow8_X86(uint8* dst, uint32 v32, int width) {
   size_t width_tmp = static_cast<size_t>(width);
-  asm volatile(
+  asm volatile (
     "shr       $0x2,%1                         \n"
     "rep stosl                                 \n"
     : "+D"(dst),       // %0
@@ -737,7 +737,7 @@ static void SetRows32_X86(uint8* dst, uint32 v32, int width,
   for (int y = 0; y < height; ++y) {
     size_t width_tmp = static_cast<size_t>(width);
     uint32* d = reinterpret_cast<uint32*>(dst);
-    asm volatile(
+    asm volatile (
       "rep stosl                               \n"
       : "+D"(d),         // %0
         "+c"(width_tmp)  // %1
diff --git a/source/rotate.cc b/source/rotate.cc
index 4a58689c8..4b11683c2 100644
--- a/source/rotate.cc
+++ b/source/rotate.cc
@@ -295,7 +295,7 @@ static void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
 #define HAS_TRANSPOSE_WX8_SSSE3
 static void TransposeWx8_SSSE3(const uint8* src, int src_stride,
                                uint8* dst, int dst_stride, int width) {
-  asm volatile(
+  asm volatile (
     // Read in the data from the source pointer.
     // First round of bit swap.
   "1:                                            \n"
@@ -506,7 +506,7 @@ extern "C" void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
 #define HAS_TRANSPOSE_WX8_FAST_SSSE3
 static void TransposeWx8_FAST_SSSE3(const uint8* src, int src_stride,
                                     uint8* dst, int dst_stride, int width) {
-  asm volatile(
+  asm volatile (
   // Read in the data from the source pointer.
   // First round of bit swap.
 "1:                                            \n"
@@ -646,7 +646,7 @@ static void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
                                 uint8* dst_a, int dst_stride_a,
                                 uint8* dst_b, int dst_stride_b,
                                 int w) {
-  asm volatile(
+  asm volatile (
   // Read in the data from the source pointer.
   // First round of bit swap.
 "1:                                            \n"
diff --git a/source/rotate_neon.cc b/source/rotate_neon.cc
index f99c72e77..0240fe12f 100644
--- a/source/rotate_neon.cc
+++ b/source/rotate_neon.cc
@@ -25,7 +25,7 @@ static const uvec8 vtbl_4x4_transpose =
 void TransposeWx8_NEON(const uint8* src, int src_stride,
                        uint8* dst, int dst_stride,
                        int width) {
-  asm volatile(
+  asm volatile (
     // loops are on blocks of 8.  loop will stop when
     // counter gets to or below 0.  starting the counter
     // at w-8 allow for this
@@ -191,7 +191,7 @@ void TransposeUVWx8_NEON(const uint8* src, int src_stride,
                          uint8* dst_a, int dst_stride_a,
                          uint8* dst_b, int dst_stride_b,
                          int width) {
-  asm volatile(
+  asm volatile (
     // loops are on blocks of 8.  loop will stop when
     // counter gets to or below 0.  starting the counter
     // at w-8 allow for this
diff --git a/source/row.h b/source/row.h
index 8c348dccc..32eefc308 100644
--- a/source/row.h
+++ b/source/row.h
@@ -87,7 +87,7 @@ extern "C" {
 typedef __declspec(align(16)) int8 vec8[16];
 typedef __declspec(align(16)) uint8 uvec8[16];
 typedef __declspec(align(16)) int16 vec16[8];
-#else // __GNUC__
+#else  // __GNUC__
 #define SIMD_ALIGNED(var) var __attribute__((aligned(16)))
 typedef int8 __attribute__((vector_size(16))) vec8;
 typedef uint8 __attribute__((vector_size(16))) uvec8;
diff --git a/source/row_common.cc b/source/row_common.cc
index 9b563f6bf..bf4bc15fe 100644
--- a/source/row_common.cc
+++ b/source/row_common.cc
@@ -10,9 +10,10 @@
 
 #include "source/row.h"
 
-#include "libyuv/basic_types.h"
 #include <string.h>  // For memcpy
 
+#include "libyuv/basic_types.h"
+
 #ifdef __cplusplus
 namespace libyuv {
 extern "C" {
@@ -195,7 +196,7 @@ static __inline int RGBToV(uint8 r, uint8 g, uint8 b) {
   return ((112 * r -  94 * g -  18 * b + 128) >> 8) + 128;
 }
 
-#define MAKEROWY(NAME,R,G,B) \
+#define MAKEROWY(NAME, R, G, B) \
 void NAME ## ToYRow_C(const uint8* src_argb0, uint8* dst_y, int width) {       \
   for (int x = 0; x < width; ++x) {                                            \
     dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]);               \
@@ -229,9 +230,9 @@ void NAME ## ToUVRow_C(const uint8* src_rgb0, int src_stride_rgb,              \
   }                                                                            \
 }
 
-MAKEROWY(ARGB,2,1,0)
-MAKEROWY(BGRA,1,2,3)
-MAKEROWY(ABGR,0,1,2)
+MAKEROWY(ARGB, 2, 1, 0)
+MAKEROWY(BGRA, 1, 2, 3)
+MAKEROWY(ABGR, 0, 1, 2)
 
 void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width) {
   // Copy a Y to RGB.
@@ -263,11 +264,11 @@ void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width) {
 
 static __inline uint32 Clip(int32 val) {
   if (val < 0) {
-    return (uint32) 0;
-  } else if (val > 255){
-    return (uint32) 255;
+    return static_cast<uint32>(0);
+  } else if (val > 255) {
+    return static_cast<uint32>(255);
   }
-  return (uint32) val;
+  return static_cast<uint32>(val);
 }
 
 static __inline void YuvPixel(uint8 y, uint8 u, uint8 v, uint8* rgb_buf,
@@ -469,7 +470,8 @@ void ARGBBlendRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
         dst_argb[2] = BLENDER(fr, br, a);
         dst_argb[3] = 255u;
       } else {
-        *(uint32*)dst_argb = *(uint32*)src_argb;
+        *reinterpret_cast<uint32*>(dst_argb) =
+            *reinterpret_cast<uint32*>(src_argb);
       }
     }
     a = src_argb[4 + 3];
@@ -486,7 +488,8 @@ void ARGBBlendRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
         dst_argb[4 + 2] = BLENDER(fr, br, a);
         dst_argb[4 + 3] = 255u;
       } else {
-        *(uint32*)(dst_argb + 4) = *(uint32*)(src_argb + 4);
+        *reinterpret_cast<uint32*>(dst_argb + 4) =
+            *reinterpret_cast<uint32*>(src_argb + 4);
       }
     }
     src_argb += 8;
@@ -508,7 +511,8 @@ void ARGBBlendRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
         dst_argb[2] = BLENDER(fr, br, a);
         dst_argb[3] = 255u;
       } else {
-        *(uint32*)dst_argb = *(uint32*)src_argb;
+        *reinterpret_cast<uint32*>(dst_argb) =
+            *reinterpret_cast<uint32*>(src_argb);
       }
     }
   }
diff --git a/source/row_neon.cc b/source/row_neon.cc
index bf4742cbc..2c68492e3 100644
--- a/source/row_neon.cc
+++ b/source/row_neon.cc
@@ -61,7 +61,7 @@ void I420ToARGBRow_NEON(const uint8* y_buf,
                         const uint8* v_buf,
                         uint8* rgb_buf,
                         int width) {
-  asm volatile(
+  asm volatile (
     "vld1.u8    {d24}, [%5]                    \n"
     "vld1.u8    {d25}, [%6]                    \n"
     "vmov.u8    d26, #128                      \n"
@@ -93,7 +93,7 @@ void I420ToBGRARow_NEON(const uint8* y_buf,
                         const uint8* v_buf,
                         uint8* rgb_buf,
                         int width) {
-  asm volatile(
+  asm volatile (
     "vld1.u8    {d24}, [%5]                    \n"
     "vld1.u8    {d25}, [%6]                    \n"
     "vmov.u8    d26, #128                      \n"
@@ -126,7 +126,7 @@ void I420ToABGRRow_NEON(const uint8* y_buf,
                         const uint8* v_buf,
                         uint8* rgb_buf,
                         int width) {
-  asm volatile(
+  asm volatile (
     "vld1.u8    {d24}, [%5]                    \n"
     "vld1.u8    {d25}, [%6]                    \n"
     "vmov.u8    d26, #128                      \n"
@@ -157,7 +157,7 @@ YUVTORGB
 // Reads 16 pairs of UV and write even values to dst_u and odd to dst_v
 // Alignment requirement: 16 bytes for pointers, and multiple of 16 pixels.
 void SplitUV_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
-  asm volatile(
+  asm volatile (
   "1:                                          \n"
     "vld2.u8    {q0,q1}, [%0]!                 \n"  // load 16 pairs of UV
     "subs       %3, %3, #16                    \n"  // 16 processed per loop
@@ -177,7 +177,7 @@ void SplitUV_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
 #ifdef HAS_COPYROW_NEON
 // Copy multiple of 64
 void CopyRow_NEON(const uint8* src, uint8* dst, int count) {
-  asm volatile(
+  asm volatile (
   "1:                                          \n"
     "pld        [%0, #0xC0]                    \n"  // preload
     "vldm       %0!,{q0,q1,q2,q3}              \n"  // load 64
@@ -195,7 +195,7 @@ void CopyRow_NEON(const uint8* src, uint8* dst, int count) {
 
 #ifdef HAS_MIRRORROW_NEON
 void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
-  asm volatile(
+  asm volatile (
     // compute where to start writing destination
     "add         %1, %2                        \n"
     // work on segments that are multiples of 16
@@ -270,7 +270,7 @@ void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
 
 #ifdef HAS_MIRRORROWUV_NEON
 void MirrorRowUV_NEON(const uint8* src, uint8* dst_a, uint8* dst_b, int width) {
-  asm volatile(
+  asm volatile (
     // compute where to start writing destination
     "add         %1, %3                        \n"  // dst_a + width
     "add         %2, %3                        \n"  // dst_b + width
diff --git a/source/scale.cc b/source/scale.cc
index 95221c2ec..315ae0ffc 100644
--- a/source/scale.cc
+++ b/source/scale.cc
@@ -59,7 +59,7 @@ void SetUseReferenceImpl(bool use) {
 #define HAS_SCALEROWDOWN2_NEON
 void ScaleRowDown2_NEON(const uint8* src_ptr, int /* src_stride */,
                         uint8* dst, int dst_width) {
-  asm volatile(
+  asm volatile (
     "1:                                        \n"
     "vld2.u8    {q0,q1}, [%0]!                 \n"  // load even pixels into q0, odd into q1
     "vst1.u8    {q0}, [%1]!                    \n"  // store even pixels
@@ -75,7 +75,7 @@ void ScaleRowDown2_NEON(const uint8* src_ptr, int /* src_stride */,
 
 void ScaleRowDown2Int_NEON(const uint8* src_ptr, int src_stride,
                            uint8* dst, int dst_width) {
-  asm volatile(
+  asm volatile (
     "add        %1, %0                         \n"  // change the stride to row 2 pointer
     "1:                                        \n"
     "vld1.u8    {q0,q1}, [%0]!                 \n"  // load row 1 and post increment
@@ -101,7 +101,7 @@ void ScaleRowDown2Int_NEON(const uint8* src_ptr, int src_stride,
 #define HAS_SCALEROWDOWN4_NEON
 static void ScaleRowDown4_NEON(const uint8* src_ptr, int /* src_stride */,
                                uint8* dst_ptr, int dst_width) {
-  asm volatile(
+  asm volatile (
     "1:                                        \n"
     "vld2.u8    {d0, d1}, [%0]!                \n"
     "vtrn.u8    d1, d0                         \n"
@@ -120,7 +120,7 @@ static void ScaleRowDown4_NEON(const uint8* src_ptr, int /* src_stride */,
 
 static void ScaleRowDown4Int_NEON(const uint8* src_ptr, int src_stride,
                                   uint8* dst_ptr, int dst_width) {
-  asm volatile(
+  asm volatile (
     "add        r4, %0, %3                     \n"
     "add        r5, r4, %3                     \n"
     "add        %3, r5, %3                     \n"
@@ -159,7 +159,7 @@ static void ScaleRowDown4Int_NEON(const uint8* src_ptr, int src_stride,
 // Point samples 32 pixels to 24 pixels.
 static void ScaleRowDown34_NEON(const uint8* src_ptr, int /* src_stride */,
                                 uint8* dst_ptr, int dst_width) {
-  asm volatile(
+  asm volatile (
     "1:                                        \n"
     "vld4.u8      {d0, d1, d2, d3}, [%0]!      \n" // src line 0
     "vmov         d2, d3                       \n" // order needs to be d0, d1, d2
@@ -176,7 +176,7 @@ static void ScaleRowDown34_NEON(const uint8* src_ptr, int /* src_stride */,
 
 static void ScaleRowDown34_0_Int_NEON(const uint8* src_ptr, int src_stride,
                                       uint8* dst_ptr, int dst_width) {
-  asm volatile(
+  asm volatile (
     "vmov.u8      d24, #3                      \n"
     "add          %3, %0                       \n"
     "1:                                        \n"
@@ -231,7 +231,7 @@ static void ScaleRowDown34_0_Int_NEON(const uint8* src_ptr, int src_stride,
 
 static void ScaleRowDown34_1_Int_NEON(const uint8* src_ptr, int src_stride,
                                       uint8* dst_ptr, int dst_width) {
-  asm volatile(
+  asm volatile (
     "vmov.u8      d24, #3                      \n"
     "add          %3, %0                       \n"
     "1:                                        \n"
@@ -283,7 +283,7 @@ const unsigned short mult38_div9[8] __attribute__ ((aligned(16))) =
 // 32 -> 12
 static void ScaleRowDown38_NEON(const uint8* src_ptr, int,
                                 uint8* dst_ptr, int dst_width) {
-  asm volatile(
+  asm volatile (
     "vld1.u8      {q3}, [%3]                   \n"
     "1:                                        \n"
     "vld1.u8      {d0, d1, d2, d3}, [%0]!      \n"
@@ -304,7 +304,7 @@ static void ScaleRowDown38_NEON(const uint8* src_ptr, int,
 // 32x3 -> 12x1
 static void ScaleRowDown38_3_Int_NEON(const uint8* src_ptr, int src_stride,
                                       uint8* dst_ptr, int dst_width) {
-  asm volatile(
+  asm volatile (
     "vld1.u16     {q13}, [%4]                  \n"
     "vld1.u8      {q14}, [%5]                  \n"
     "vld1.u8      {q15}, [%6]                  \n"
@@ -413,7 +413,7 @@ static void ScaleRowDown38_3_Int_NEON(const uint8* src_ptr, int src_stride,
 // 32x2 -> 12x1
 static void ScaleRowDown38_2_Int_NEON(const uint8* src_ptr, int src_stride,
                                       uint8* dst_ptr, int dst_width) {
-  asm volatile(
+  asm volatile (
     "vld1.u16     {q13}, [%4]                  \n"
     "vld1.u8      {q14}, [%5]                  \n"
     "add          %3, %0                       \n"
@@ -508,7 +508,7 @@ static void ScaleRowDown38_2_Int_NEON(const uint8* src_ptr, int src_stride,
 static void ScaleFilterRows_NEON(uint8* dst_ptr,
                               const uint8* src_ptr, int src_stride,
                               int dst_width, int source_y_fraction) {
-  asm volatile(
+  asm volatile (
     "cmp          %4, #0                       \n"
     "beq          2f                           \n"
     "add          %2, %1                       \n"
@@ -1555,7 +1555,7 @@ static void ScaleFilterCols34_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
 #define HAS_SCALEROWDOWN2_SSE2
 static void ScaleRowDown2_SSE2(const uint8* src_ptr, int src_stride,
                                uint8* dst_ptr, int dst_width) {
-  asm volatile(
+  asm volatile (
   "pcmpeqb    %%xmm5,%%xmm5                    \n"
   "psrlw      $0x8,%%xmm5                      \n"
 "1:                                            \n"
@@ -1579,7 +1579,7 @@ static void ScaleRowDown2_SSE2(const uint8* src_ptr, int src_stride,
 
 static void ScaleRowDown2Int_SSE2(const uint8* src_ptr, int src_stride,
                                   uint8* dst_ptr, int dst_width) {
-  asm volatile(
+  asm volatile (
   "pcmpeqb    %%xmm5,%%xmm5                    \n"
   "psrlw      $0x8,%%xmm5                      \n"
 "1:                                            \n"
@@ -1614,7 +1614,7 @@ static void ScaleRowDown2Int_SSE2(const uint8* src_ptr, int src_stride,
 #define HAS_SCALEROWDOWN4_SSE2
 static void ScaleRowDown4_SSE2(const uint8* src_ptr, int src_stride,
                                uint8* dst_ptr, int dst_width) {
-  asm volatile(
+  asm volatile (
   "pcmpeqb    %%xmm5,%%xmm5                    \n"
   "psrld      $0x18,%%xmm5                     \n"
 "1:                                            \n"
@@ -1640,7 +1640,7 @@ static void ScaleRowDown4_SSE2(const uint8* src_ptr, int src_stride,
 static void ScaleRowDown4Int_SSE2(const uint8* src_ptr, int src_stride,
                                   uint8* dst_ptr, int dst_width) {
   intptr_t temp = 0;
-  asm volatile(
+  asm volatile (
   "pcmpeqb    %%xmm7,%%xmm7                    \n"
   "psrlw      $0x8,%%xmm7                      \n"
   "lea        (%4,%4,2),%3                     \n"
@@ -1693,7 +1693,7 @@ static void ScaleRowDown4Int_SSE2(const uint8* src_ptr, int src_stride,
 #define HAS_SCALEROWDOWN8_SSE2
 static void ScaleRowDown8_SSE2(const uint8* src_ptr, int src_stride,
                                uint8* dst_ptr, int dst_width) {
-  asm volatile(
+  asm volatile (
     "pcmpeqb   %%xmm5,%%xmm5                   \n"
     "psrlq     $0x38,%%xmm5                    \n"
   "1:                                          \n"
@@ -1722,7 +1722,7 @@ static void ScaleAddRows_SSE2(const uint8* src_ptr, int src_stride,
                               uint16* dst_ptr, int src_width, int src_height) {
   int tmp_height = 0;
   intptr_t tmp_src = 0;
-  asm volatile(
+  asm volatile (
     "pxor      %%xmm4,%%xmm4                   \n"
     "sub       $0x1,%5                         \n"
   "1:                                          \n"
@@ -2263,7 +2263,7 @@ extern "C" void ScaleFilterRows_SSSE3(uint8* dst_ptr,
 #elif defined(__x86_64__)
 static void ScaleRowDown8Int_SSE2(const uint8* src_ptr, int src_stride,
                                   uint8* dst_ptr, int dst_width) {
-  asm volatile(
+  asm volatile (
   "lea        (%3,%3,2),%%r10                  \n"
   "pxor       %%xmm7,%%xmm7                    \n"
 "1:"
@@ -2322,7 +2322,7 @@ static void ScaleRowDown8Int_SSE2(const uint8* src_ptr, int src_stride,
 #define HAS_SCALEROWDOWN34_SSSE3
 static void ScaleRowDown34_SSSE3(const uint8* src_ptr, int src_stride,
                                  uint8* dst_ptr, int dst_width) {
-  asm volatile(
+  asm volatile (
   "movdqa     (%3),%%xmm3                      \n"
   "movdqa     (%4),%%xmm4                      \n"
   "movdqa     (%5),%%xmm5                      \n"
@@ -2353,7 +2353,7 @@ static void ScaleRowDown34_SSSE3(const uint8* src_ptr, int src_stride,
 
 static void ScaleRowDown34_1_Int_SSSE3(const uint8* src_ptr, int src_stride,
                                        uint8* dst_ptr, int dst_width) {
-  asm volatile(
+  asm volatile (
   "movdqa     (%4),%%xmm2                      \n"  // _shuf01
   "movdqa     (%5),%%xmm3                      \n"  // _shuf11
   "movdqa     (%6),%%xmm4                      \n"  // _shuf21
@@ -2410,7 +2410,7 @@ static void ScaleRowDown34_1_Int_SSSE3(const uint8* src_ptr, int src_stride,
 
 static void ScaleRowDown34_0_Int_SSSE3(const uint8* src_ptr, int src_stride,
                                        uint8* dst_ptr, int dst_width) {
-  asm volatile(
+  asm volatile (
   "movdqa     (%4),%%xmm2                      \n"  // _shuf01
   "movdqa     (%5),%%xmm3                      \n"  // _shuf11
   "movdqa     (%6),%%xmm4                      \n"  // _shuf21
@@ -2471,7 +2471,7 @@ static void ScaleRowDown34_0_Int_SSSE3(const uint8* src_ptr, int src_stride,
 #define HAS_SCALEROWDOWN38_SSSE3
 static void ScaleRowDown38_SSSE3(const uint8* src_ptr, int src_stride,
                                  uint8* dst_ptr, int dst_width) {
-  asm volatile(
+  asm volatile (
   "movdqa     (%3),%%xmm4                      \n"
   "movdqa     (%4),%%xmm5                      \n"
 "1:"
@@ -2498,7 +2498,7 @@ static void ScaleRowDown38_SSSE3(const uint8* src_ptr, int src_stride,
 
 static void ScaleRowDown38_3_Int_SSSE3(const uint8* src_ptr, int src_stride,
                                        uint8* dst_ptr, int dst_width) {
-  asm volatile(
+  asm volatile (
   "movdqa     (%4),%%xmm4                      \n"
   "movdqa     (%5),%%xmm5                      \n"
   "movdqa     (%6),%%xmm6                      \n"
@@ -2555,7 +2555,7 @@ static void ScaleRowDown38_3_Int_SSSE3(const uint8* src_ptr, int src_stride,
 
 static void ScaleRowDown38_2_Int_SSSE3(const uint8* src_ptr, int src_stride,
                                        uint8* dst_ptr, int dst_width) {
-  asm volatile(
+  asm volatile (
   "movdqa     (%4),%%xmm4                      \n"
   "movdqa     (%5),%%xmm5                      \n"
   "movdqa     (%6),%%xmm6                      \n"
@@ -2597,7 +2597,7 @@ static void ScaleFilterRows_SSE2(uint8* dst_ptr,
                                  const uint8* src_ptr, int src_stride,
                                  int dst_width, int source_y_fraction) {
   if (source_y_fraction == 0) {
-    asm volatile(
+    asm volatile (
     "1:"
       "movdqa     (%1),%%xmm0                  \n"
       "lea        0x10(%1),%1                  \n"
@@ -2615,7 +2615,7 @@ static void ScaleFilterRows_SSE2(uint8* dst_ptr,
     );
     return;
   } else if (source_y_fraction == 128) {
-    asm volatile(
+    asm volatile (
     "1:"
       "movdqa     (%1),%%xmm0                  \n"
       "movdqa     (%1,%3,1),%%xmm2             \n"
@@ -2635,7 +2635,7 @@ static void ScaleFilterRows_SSE2(uint8* dst_ptr,
     );
     return;
   } else {
-    asm volatile(
+    asm volatile (
       "mov        %3,%%eax                     \n"
       "movd       %%eax,%%xmm6                 \n"
       "punpcklwd  %%xmm6,%%xmm6                \n"
@@ -2688,7 +2688,7 @@ static void ScaleFilterRows_SSSE3(uint8* dst_ptr,
                                   const uint8* src_ptr, int src_stride,
                                   int dst_width, int source_y_fraction) {
   if (source_y_fraction <= 1) {
-    asm volatile(
+    asm volatile (
    "1:"
       "movdqa     (%1),%%xmm0                  \n"
       "lea        0x10(%1),%1                  \n"
@@ -2706,7 +2706,7 @@ static void ScaleFilterRows_SSSE3(uint8* dst_ptr,
     );
     return;
   } else if (source_y_fraction == 128) {
-    asm volatile(
+    asm volatile (
     "1:"
       "movdqa     (%1),%%xmm0                  \n"
       "movdqa     (%1,%3,1),%%xmm2             \n"
@@ -2726,7 +2726,7 @@ static void ScaleFilterRows_SSSE3(uint8* dst_ptr,
     );
     return;
   } else {
-    asm volatile(
+    asm volatile (
       "mov        %3,%%eax                     \n"
       "shr        %%eax                        \n"
       "mov        %%al,%%ah                    \n"