From 5b22506b14367f38c1e31d1aab4dc1132158737f Mon Sep 17 00:00:00 2001 From: "fbarchard@google.com" Date: Thu, 29 Mar 2012 02:19:26 +0000 Subject: [PATCH] With an asm() away lint, this cleans up most remaining issues. BUG=none TEST=lint filename Review URL: https://webrtc-codereview.appspot.com/464001 git-svn-id: http://libyuv.googlecode.com/svn/trunk@230 16f28f9a-4ce2-e073-06de-1de4eb20be90 --- README.chromium | 2 +- include/libyuv/basic_types.h | 14 +++---- include/libyuv/compare.h | 2 +- include/libyuv/convert.h | 2 +- include/libyuv/convert_from.h | 2 +- include/libyuv/format_conversion.h | 12 +++--- include/libyuv/version.h | 2 +- source/compare.cc | 6 +-- source/convert.cc | 4 +- source/convert_from.cc | 4 +- source/cpu_id.cc | 4 +- source/format_conversion.cc | 2 +- source/planar_functions.cc | 6 +-- source/rotate.cc | 6 +-- source/rotate_neon.cc | 4 +- source/row.h | 2 +- source/row_common.cc | 28 ++++++++------ source/row_neon.cc | 14 +++---- source/scale.cc | 60 +++++++++++++++--------------- 19 files changed, 90 insertions(+), 86 deletions(-) diff --git a/README.chromium b/README.chromium index 207046502..f621b8893 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 229 +Version: 230 License: BSD License File: LICENSE diff --git a/include/libyuv/basic_types.h b/include/libyuv/basic_types.h index 54c5f8a55..8c312b913 100644 --- a/include/libyuv/basic_types.h +++ b/include/libyuv/basic_types.h @@ -31,8 +31,8 @@ typedef __int64 int64; #define INT64_F "I64" #else // COMPILER_MSVC #ifdef __LP64__ -typedef unsigned long uint64; -typedef long int64; +typedef unsigned long uint64; // NOLINT +typedef long int64; // NOLINT #ifndef INT64_C #define INT64_C(x) x ## L #endif @@ -41,8 +41,8 @@ typedef long int64; #endif #define INT64_F "l" #else // __LP64__ -typedef unsigned long long uint64; -typedef long long int64; +typedef unsigned long long uint64; // NOLINT +typedef long long int64; // NOLINT #ifndef INT64_C #define INT64_C(x) x ## LL #endif @@ -54,8 +54,8 @@ typedef long long int64; #endif // COMPILER_MSVC typedef unsigned int uint32; typedef int int32; -typedef unsigned short uint16; -typedef short int16; +typedef unsigned short uint16; // NOLINT +typedef short int16; // NOLINT typedef unsigned char uint8; typedef signed char int8; #endif // INT_TYPES_DEFINED @@ -70,4 +70,4 @@ typedef signed char int8; (reinterpret_cast(((reinterpret_cast(p) + \ ((t)-1)) & ~((t)-1)))) -#endif // INCLUDE_LIBYUV_BASIC_TYPES_H_ +#endif // INCLUDE_LIBYUV_BASIC_TYPES_H_ diff --git a/include/libyuv/compare.h b/include/libyuv/compare.h index 3d31ecdde..1f78d2a9c 100644 --- a/include/libyuv/compare.h +++ b/include/libyuv/compare.h @@ -62,4 +62,4 @@ double I420Ssim(const uint8* src_y_a, int stride_y_a, } // namespace libyuv #endif -#endif // INCLUDE_LIBYUV_COMPARE_H_ +#endif // INCLUDE_LIBYUV_COMPARE_H_ diff --git a/include/libyuv/convert.h b/include/libyuv/convert.h index a0d1cc032..ee114f079 100644 --- a/include/libyuv/convert.h +++ b/include/libyuv/convert.h @@ -215,4 +215,4 @@ int ConvertToI420(const uint8* src_frame, size_t src_size, } // namespace libyuv #endif -#endif // INCLUDE_LIBYUV_CONVERT_H_ +#endif // INCLUDE_LIBYUV_CONVERT_H_ diff --git a/include/libyuv/convert_from.h b/include/libyuv/convert_from.h index a50c931ed..34a1571ff 100644 --- a/include/libyuv/convert_from.h +++ b/include/libyuv/convert_from.h @@ -139,4 +139,4 @@ int ConvertFromI420(const uint8* y, int y_stride, } // namespace libyuv #endif -#endif // INCLUDE_LIBYUV_CONVERT_FROM_H_ +#endif // INCLUDE_LIBYUV_CONVERT_FROM_H_ diff --git a/include/libyuv/format_conversion.h b/include/libyuv/format_conversion.h index 67e14dc7a..44e0f67fe 100644 --- a/include/libyuv/format_conversion.h +++ b/include/libyuv/format_conversion.h @@ -44,8 +44,8 @@ int BayerRGGBToI420(const uint8* src_bayer, int src_stride_bayer, int width, int height); // Temporary API mapper -#define BayerRGBToI420(b,bs,f,y,ys,u,us,v,vs,w,h) \ - BayerToI420(b,bs,y,ys,u,us,v,vs,w,h,f) +#define BayerRGBToI420(b, bs, f, y, ys, u, us, v, vs, w, h) \ + BayerToI420(b, bs, y, ys, u, us, v, vs, w, h, f) int BayerToI420(const uint8* src_bayer, int src_stride_bayer, uint8* dst_y, int dst_stride_y, @@ -80,8 +80,8 @@ int I420ToBayerRGGB(const uint8* src_y, int src_stride_y, int width, int height); // Temporary API mapper -#define I420ToBayerRGB(y,ys,u,us,v,vs,b,bs,f,w,h) \ - I420ToBayer(y,ys,u,us,v,vs,b,bs,w,h,f) +#define I420ToBayerRGB(y, ys, u, us, v, vs, b, bs, f, w, h) \ + I420ToBayer(y, ys, u, us, v, vs, b, bs, w, h, f) int I420ToBayer(const uint8* src_y, int src_stride_y, const uint8* src_u, int src_stride_u, @@ -108,7 +108,7 @@ int BayerRGGBToARGB(const uint8* src_bayer, int src_stride_bayer, int width, int height); // Temporary API mapper -#define BayerRGBToARGB(b,bs,f,a,as,w,h) BayerToARGB(b,bs,a,as,w,h,f) +#define BayerRGBToARGB(b, bs, f, a, as, w, h) BayerToARGB(b, bs, a, as, w, h, f) int BayerToARGB(const uint8* src_bayer, int src_stride_bayer, uint8* dst_argb, int dst_stride_argb, @@ -133,7 +133,7 @@ int ARGBToBayerRGGB(const uint8* src_argb, int src_stride_argb, int width, int height); // Temporary API mapper -#define ARGBToBayerRGB(a,as,b,bs,f,w,h) ARGBToBayer(b,bs,a,as,w,h,f) +#define ARGBToBayerRGB(a, as, b, bs, f, w, h) ARGBToBayer(b, bs, a, as, w, h, f) int ARGBToBayer(const uint8* src_argb, int src_stride_argb, uint8* dst_bayer, int dst_stride_bayer, diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 58f9280b9..081edbd03 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,7 +11,7 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define INCLUDE_LIBYUV_VERSION 229 +#define INCLUDE_LIBYUV_VERSION 230 #endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/source/compare.cc b/source/compare.cc index 998641cf6..b1b88769f 100644 --- a/source/compare.cc +++ b/source/compare.cc @@ -42,7 +42,7 @@ uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) { static uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) { volatile uint32 sse; - asm volatile( + asm volatile ( "vmov.u8 q7, #0 \n" "vmov.u8 q9, #0 \n" "vmov.u8 q8, #0 \n" @@ -116,12 +116,12 @@ static uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, } } -#elif (defined(__x86_64__) || defined(__i386__)) && !defined(YUV_DISABLE_ASM) +#elif defined(__x86_64__) || defined(__i386__) && !defined(YUV_DISABLE_ASM) #define HAS_SUMSQUAREERROR_SSE2 static uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) { uint32 sse; - asm volatile( + asm volatile ( "pxor %%xmm0,%%xmm0 \n" "pxor %%xmm5,%%xmm5 \n" "sub %0,%1 \n" diff --git a/source/convert.cc b/source/convert.cc index 5ad3e3222..b213ed0b3 100644 --- a/source/convert.cc +++ b/source/convert.cc @@ -90,7 +90,7 @@ static void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride, #define HAS_HALFROW_SSE2 static void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride, uint8* dst_uv, int pix) { - asm volatile( + asm volatile ( "sub %0,%1 \n" "1: \n" "movdqa (%0),%%xmm0 \n" @@ -502,7 +502,7 @@ static void SplitYUY2_SSE2(const uint8* src_yuy2, #define HAS_SPLITYUY2_SSE2 static void SplitYUY2_SSE2(const uint8* src_yuy2, uint8* dst_y, uint8* dst_u, uint8* dst_v, int pix) { - asm volatile( + asm volatile ( "pcmpeqb %%xmm5,%%xmm5 \n" "psrlw $0x8,%%xmm5 \n" "1: \n" diff --git a/source/convert_from.cc b/source/convert_from.cc index bf46b0acf..dc51a4fe7 100644 --- a/source/convert_from.cc +++ b/source/convert_from.cc @@ -289,7 +289,7 @@ static void I42xToYUY2Row_SSE2(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_frame, int width) { - asm volatile( + asm volatile ( "sub %1,%2 \n" "1: \n" "movq (%1),%%xmm2 \n" @@ -324,7 +324,7 @@ static void I42xToUYVYRow_SSE2(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_frame, int width) { - asm volatile( + asm volatile ( "sub %1,%2 \n" "1: \n" "movq (%1),%%xmm2 \n" diff --git a/source/cpu_id.cc b/source/cpu_id.cc index 18a8572e1..ab8a2c261 100644 --- a/source/cpu_id.cc +++ b/source/cpu_id.cc @@ -24,7 +24,7 @@ // TODO(fbarchard): Use cpuid.h when gcc 4.4 is used on OSX and Linux. #if (defined(__pic__) || defined(__APPLE__)) && defined(__i386__) static __inline void __cpuid(int cpu_info[4], int info_type) { - asm volatile( + asm volatile ( "mov %%ebx, %%edi \n" "cpuid \n" "xchg %%edi, %%ebx \n" @@ -33,7 +33,7 @@ static __inline void __cpuid(int cpu_info[4], int info_type) { } #elif defined(__i386__) || defined(__x86_64__) static __inline void __cpuid(int cpu_info[4], int info_type) { - asm volatile( + asm volatile ( "cpuid \n" : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3]) : "a"(info_type)); diff --git a/source/format_conversion.cc b/source/format_conversion.cc index 692932bee..7dfbfdb84 100644 --- a/source/format_conversion.cc +++ b/source/format_conversion.cc @@ -53,7 +53,7 @@ static void ARGBToBayerRow_SSSE3(const uint8* src_argb, #define HAS_ARGBTOBAYERROW_SSSE3 static void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer, uint32 selector, int pix) { - asm volatile( + asm volatile ( "movd %3,%%xmm5 \n" "pshufd $0x0,%%xmm5,%%xmm5 \n" "1: \n" diff --git a/source/planar_functions.cc b/source/planar_functions.cc index a24bcdacb..f5b834a8b 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -654,7 +654,7 @@ int NV12ToRGB565(const uint8* src_y, int src_stride_y, #if defined(__ARM_NEON__) && !defined(YUV_DISABLE_ASM) #define HAS_SETROW_NEON static void SetRow8_NEON(uint8* dst, uint32 v32, int count) { - asm volatile( + asm volatile ( "vdup.u32 q0, %2 \n" // duplicate 4 ints "1: \n" "subs %1, %1, #16 \n" // 16 bytes per loop @@ -723,7 +723,7 @@ static void SetRows32_X86(uint8* dst, uint32 v32, int width, #define HAS_SETROW_X86 static void SetRow8_X86(uint8* dst, uint32 v32, int width) { size_t width_tmp = static_cast(width); - asm volatile( + asm volatile ( "shr $0x2,%1 \n" "rep stosl \n" : "+D"(dst), // %0 @@ -737,7 +737,7 @@ static void SetRows32_X86(uint8* dst, uint32 v32, int width, for (int y = 0; y < height; ++y) { size_t width_tmp = static_cast(width); uint32* d = reinterpret_cast(dst); - asm volatile( + asm volatile ( "rep stosl \n" : "+D"(d), // %0 "+c"(width_tmp) // %1 diff --git a/source/rotate.cc b/source/rotate.cc index 4a58689c8..4b11683c2 100644 --- a/source/rotate.cc +++ b/source/rotate.cc @@ -295,7 +295,7 @@ static void TransposeUVWx8_SSE2(const uint8* src, int src_stride, #define HAS_TRANSPOSE_WX8_SSSE3 static void TransposeWx8_SSSE3(const uint8* src, int src_stride, uint8* dst, int dst_stride, int width) { - asm volatile( + asm volatile ( // Read in the data from the source pointer. // First round of bit swap. "1: \n" @@ -506,7 +506,7 @@ extern "C" void TransposeUVWx8_SSE2(const uint8* src, int src_stride, #define HAS_TRANSPOSE_WX8_FAST_SSSE3 static void TransposeWx8_FAST_SSSE3(const uint8* src, int src_stride, uint8* dst, int dst_stride, int width) { - asm volatile( + asm volatile ( // Read in the data from the source pointer. // First round of bit swap. "1: \n" @@ -646,7 +646,7 @@ static void TransposeUVWx8_SSE2(const uint8* src, int src_stride, uint8* dst_a, int dst_stride_a, uint8* dst_b, int dst_stride_b, int w) { - asm volatile( + asm volatile ( // Read in the data from the source pointer. // First round of bit swap. "1: \n" diff --git a/source/rotate_neon.cc b/source/rotate_neon.cc index f99c72e77..0240fe12f 100644 --- a/source/rotate_neon.cc +++ b/source/rotate_neon.cc @@ -25,7 +25,7 @@ static const uvec8 vtbl_4x4_transpose = void TransposeWx8_NEON(const uint8* src, int src_stride, uint8* dst, int dst_stride, int width) { - asm volatile( + asm volatile ( // loops are on blocks of 8. loop will stop when // counter gets to or below 0. starting the counter // at w-8 allow for this @@ -191,7 +191,7 @@ void TransposeUVWx8_NEON(const uint8* src, int src_stride, uint8* dst_a, int dst_stride_a, uint8* dst_b, int dst_stride_b, int width) { - asm volatile( + asm volatile ( // loops are on blocks of 8. loop will stop when // counter gets to or below 0. starting the counter // at w-8 allow for this diff --git a/source/row.h b/source/row.h index 8c348dccc..32eefc308 100644 --- a/source/row.h +++ b/source/row.h @@ -87,7 +87,7 @@ extern "C" { typedef __declspec(align(16)) int8 vec8[16]; typedef __declspec(align(16)) uint8 uvec8[16]; typedef __declspec(align(16)) int16 vec16[8]; -#else // __GNUC__ +#else // __GNUC__ #define SIMD_ALIGNED(var) var __attribute__((aligned(16))) typedef int8 __attribute__((vector_size(16))) vec8; typedef uint8 __attribute__((vector_size(16))) uvec8; diff --git a/source/row_common.cc b/source/row_common.cc index 9b563f6bf..bf4bc15fe 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -10,9 +10,10 @@ #include "source/row.h" -#include "libyuv/basic_types.h" #include // For memcpy +#include "libyuv/basic_types.h" + #ifdef __cplusplus namespace libyuv { extern "C" { @@ -195,7 +196,7 @@ static __inline int RGBToV(uint8 r, uint8 g, uint8 b) { return ((112 * r - 94 * g - 18 * b + 128) >> 8) + 128; } -#define MAKEROWY(NAME,R,G,B) \ +#define MAKEROWY(NAME, R, G, B) \ void NAME ## ToYRow_C(const uint8* src_argb0, uint8* dst_y, int width) { \ for (int x = 0; x < width; ++x) { \ dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]); \ @@ -229,9 +230,9 @@ void NAME ## ToUVRow_C(const uint8* src_rgb0, int src_stride_rgb, \ } \ } -MAKEROWY(ARGB,2,1,0) -MAKEROWY(BGRA,1,2,3) -MAKEROWY(ABGR,0,1,2) +MAKEROWY(ARGB, 2, 1, 0) +MAKEROWY(BGRA, 1, 2, 3) +MAKEROWY(ABGR, 0, 1, 2) void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width) { // Copy a Y to RGB. @@ -263,11 +264,11 @@ void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width) { static __inline uint32 Clip(int32 val) { if (val < 0) { - return (uint32) 0; - } else if (val > 255){ - return (uint32) 255; + return static_cast(0); + } else if (val > 255) { + return static_cast(255); } - return (uint32) val; + return static_cast(val); } static __inline void YuvPixel(uint8 y, uint8 u, uint8 v, uint8* rgb_buf, @@ -469,7 +470,8 @@ void ARGBBlendRow_C(const uint8* src_argb, uint8* dst_argb, int width) { dst_argb[2] = BLENDER(fr, br, a); dst_argb[3] = 255u; } else { - *(uint32*)dst_argb = *(uint32*)src_argb; + *reinterpret_cast(dst_argb) = + *reinterpret_cast(src_argb); } } a = src_argb[4 + 3]; @@ -486,7 +488,8 @@ void ARGBBlendRow_C(const uint8* src_argb, uint8* dst_argb, int width) { dst_argb[4 + 2] = BLENDER(fr, br, a); dst_argb[4 + 3] = 255u; } else { - *(uint32*)(dst_argb + 4) = *(uint32*)(src_argb + 4); + *reinterpret_cast(dst_argb + 4) = + *reinterpret_cast(src_argb + 4); } } src_argb += 8; @@ -508,7 +511,8 @@ void ARGBBlendRow_C(const uint8* src_argb, uint8* dst_argb, int width) { dst_argb[2] = BLENDER(fr, br, a); dst_argb[3] = 255u; } else { - *(uint32*)dst_argb = *(uint32*)src_argb; + *reinterpret_cast(dst_argb) = + *reinterpret_cast(src_argb); } } } diff --git a/source/row_neon.cc b/source/row_neon.cc index bf4742cbc..2c68492e3 100644 --- a/source/row_neon.cc +++ b/source/row_neon.cc @@ -61,7 +61,7 @@ void I420ToARGBRow_NEON(const uint8* y_buf, const uint8* v_buf, uint8* rgb_buf, int width) { - asm volatile( + asm volatile ( "vld1.u8 {d24}, [%5] \n" "vld1.u8 {d25}, [%6] \n" "vmov.u8 d26, #128 \n" @@ -93,7 +93,7 @@ void I420ToBGRARow_NEON(const uint8* y_buf, const uint8* v_buf, uint8* rgb_buf, int width) { - asm volatile( + asm volatile ( "vld1.u8 {d24}, [%5] \n" "vld1.u8 {d25}, [%6] \n" "vmov.u8 d26, #128 \n" @@ -126,7 +126,7 @@ void I420ToABGRRow_NEON(const uint8* y_buf, const uint8* v_buf, uint8* rgb_buf, int width) { - asm volatile( + asm volatile ( "vld1.u8 {d24}, [%5] \n" "vld1.u8 {d25}, [%6] \n" "vmov.u8 d26, #128 \n" @@ -157,7 +157,7 @@ YUVTORGB // Reads 16 pairs of UV and write even values to dst_u and odd to dst_v // Alignment requirement: 16 bytes for pointers, and multiple of 16 pixels. void SplitUV_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) { - asm volatile( + asm volatile ( "1: \n" "vld2.u8 {q0,q1}, [%0]! \n" // load 16 pairs of UV "subs %3, %3, #16 \n" // 16 processed per loop @@ -177,7 +177,7 @@ void SplitUV_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) { #ifdef HAS_COPYROW_NEON // Copy multiple of 64 void CopyRow_NEON(const uint8* src, uint8* dst, int count) { - asm volatile( + asm volatile ( "1: \n" "pld [%0, #0xC0] \n" // preload "vldm %0!,{q0,q1,q2,q3} \n" // load 64 @@ -195,7 +195,7 @@ void CopyRow_NEON(const uint8* src, uint8* dst, int count) { #ifdef HAS_MIRRORROW_NEON void MirrorRow_NEON(const uint8* src, uint8* dst, int width) { - asm volatile( + asm volatile ( // compute where to start writing destination "add %1, %2 \n" // work on segments that are multiples of 16 @@ -270,7 +270,7 @@ void MirrorRow_NEON(const uint8* src, uint8* dst, int width) { #ifdef HAS_MIRRORROWUV_NEON void MirrorRowUV_NEON(const uint8* src, uint8* dst_a, uint8* dst_b, int width) { - asm volatile( + asm volatile ( // compute where to start writing destination "add %1, %3 \n" // dst_a + width "add %2, %3 \n" // dst_b + width diff --git a/source/scale.cc b/source/scale.cc index 95221c2ec..315ae0ffc 100644 --- a/source/scale.cc +++ b/source/scale.cc @@ -59,7 +59,7 @@ void SetUseReferenceImpl(bool use) { #define HAS_SCALEROWDOWN2_NEON void ScaleRowDown2_NEON(const uint8* src_ptr, int /* src_stride */, uint8* dst, int dst_width) { - asm volatile( + asm volatile ( "1: \n" "vld2.u8 {q0,q1}, [%0]! \n" // load even pixels into q0, odd into q1 "vst1.u8 {q0}, [%1]! \n" // store even pixels @@ -75,7 +75,7 @@ void ScaleRowDown2_NEON(const uint8* src_ptr, int /* src_stride */, void ScaleRowDown2Int_NEON(const uint8* src_ptr, int src_stride, uint8* dst, int dst_width) { - asm volatile( + asm volatile ( "add %1, %0 \n" // change the stride to row 2 pointer "1: \n" "vld1.u8 {q0,q1}, [%0]! \n" // load row 1 and post increment @@ -101,7 +101,7 @@ void ScaleRowDown2Int_NEON(const uint8* src_ptr, int src_stride, #define HAS_SCALEROWDOWN4_NEON static void ScaleRowDown4_NEON(const uint8* src_ptr, int /* src_stride */, uint8* dst_ptr, int dst_width) { - asm volatile( + asm volatile ( "1: \n" "vld2.u8 {d0, d1}, [%0]! \n" "vtrn.u8 d1, d0 \n" @@ -120,7 +120,7 @@ static void ScaleRowDown4_NEON(const uint8* src_ptr, int /* src_stride */, static void ScaleRowDown4Int_NEON(const uint8* src_ptr, int src_stride, uint8* dst_ptr, int dst_width) { - asm volatile( + asm volatile ( "add r4, %0, %3 \n" "add r5, r4, %3 \n" "add %3, r5, %3 \n" @@ -159,7 +159,7 @@ static void ScaleRowDown4Int_NEON(const uint8* src_ptr, int src_stride, // Point samples 32 pixels to 24 pixels. static void ScaleRowDown34_NEON(const uint8* src_ptr, int /* src_stride */, uint8* dst_ptr, int dst_width) { - asm volatile( + asm volatile ( "1: \n" "vld4.u8 {d0, d1, d2, d3}, [%0]! \n" // src line 0 "vmov d2, d3 \n" // order needs to be d0, d1, d2 @@ -176,7 +176,7 @@ static void ScaleRowDown34_NEON(const uint8* src_ptr, int /* src_stride */, static void ScaleRowDown34_0_Int_NEON(const uint8* src_ptr, int src_stride, uint8* dst_ptr, int dst_width) { - asm volatile( + asm volatile ( "vmov.u8 d24, #3 \n" "add %3, %0 \n" "1: \n" @@ -231,7 +231,7 @@ static void ScaleRowDown34_0_Int_NEON(const uint8* src_ptr, int src_stride, static void ScaleRowDown34_1_Int_NEON(const uint8* src_ptr, int src_stride, uint8* dst_ptr, int dst_width) { - asm volatile( + asm volatile ( "vmov.u8 d24, #3 \n" "add %3, %0 \n" "1: \n" @@ -283,7 +283,7 @@ const unsigned short mult38_div9[8] __attribute__ ((aligned(16))) = // 32 -> 12 static void ScaleRowDown38_NEON(const uint8* src_ptr, int, uint8* dst_ptr, int dst_width) { - asm volatile( + asm volatile ( "vld1.u8 {q3}, [%3] \n" "1: \n" "vld1.u8 {d0, d1, d2, d3}, [%0]! \n" @@ -304,7 +304,7 @@ static void ScaleRowDown38_NEON(const uint8* src_ptr, int, // 32x3 -> 12x1 static void ScaleRowDown38_3_Int_NEON(const uint8* src_ptr, int src_stride, uint8* dst_ptr, int dst_width) { - asm volatile( + asm volatile ( "vld1.u16 {q13}, [%4] \n" "vld1.u8 {q14}, [%5] \n" "vld1.u8 {q15}, [%6] \n" @@ -413,7 +413,7 @@ static void ScaleRowDown38_3_Int_NEON(const uint8* src_ptr, int src_stride, // 32x2 -> 12x1 static void ScaleRowDown38_2_Int_NEON(const uint8* src_ptr, int src_stride, uint8* dst_ptr, int dst_width) { - asm volatile( + asm volatile ( "vld1.u16 {q13}, [%4] \n" "vld1.u8 {q14}, [%5] \n" "add %3, %0 \n" @@ -508,7 +508,7 @@ static void ScaleRowDown38_2_Int_NEON(const uint8* src_ptr, int src_stride, static void ScaleFilterRows_NEON(uint8* dst_ptr, const uint8* src_ptr, int src_stride, int dst_width, int source_y_fraction) { - asm volatile( + asm volatile ( "cmp %4, #0 \n" "beq 2f \n" "add %2, %1 \n" @@ -1555,7 +1555,7 @@ static void ScaleFilterCols34_SSSE3(uint8* dst_ptr, const uint8* src_ptr, #define HAS_SCALEROWDOWN2_SSE2 static void ScaleRowDown2_SSE2(const uint8* src_ptr, int src_stride, uint8* dst_ptr, int dst_width) { - asm volatile( + asm volatile ( "pcmpeqb %%xmm5,%%xmm5 \n" "psrlw $0x8,%%xmm5 \n" "1: \n" @@ -1579,7 +1579,7 @@ static void ScaleRowDown2_SSE2(const uint8* src_ptr, int src_stride, static void ScaleRowDown2Int_SSE2(const uint8* src_ptr, int src_stride, uint8* dst_ptr, int dst_width) { - asm volatile( + asm volatile ( "pcmpeqb %%xmm5,%%xmm5 \n" "psrlw $0x8,%%xmm5 \n" "1: \n" @@ -1614,7 +1614,7 @@ static void ScaleRowDown2Int_SSE2(const uint8* src_ptr, int src_stride, #define HAS_SCALEROWDOWN4_SSE2 static void ScaleRowDown4_SSE2(const uint8* src_ptr, int src_stride, uint8* dst_ptr, int dst_width) { - asm volatile( + asm volatile ( "pcmpeqb %%xmm5,%%xmm5 \n" "psrld $0x18,%%xmm5 \n" "1: \n" @@ -1640,7 +1640,7 @@ static void ScaleRowDown4_SSE2(const uint8* src_ptr, int src_stride, static void ScaleRowDown4Int_SSE2(const uint8* src_ptr, int src_stride, uint8* dst_ptr, int dst_width) { intptr_t temp = 0; - asm volatile( + asm volatile ( "pcmpeqb %%xmm7,%%xmm7 \n" "psrlw $0x8,%%xmm7 \n" "lea (%4,%4,2),%3 \n" @@ -1693,7 +1693,7 @@ static void ScaleRowDown4Int_SSE2(const uint8* src_ptr, int src_stride, #define HAS_SCALEROWDOWN8_SSE2 static void ScaleRowDown8_SSE2(const uint8* src_ptr, int src_stride, uint8* dst_ptr, int dst_width) { - asm volatile( + asm volatile ( "pcmpeqb %%xmm5,%%xmm5 \n" "psrlq $0x38,%%xmm5 \n" "1: \n" @@ -1722,7 +1722,7 @@ static void ScaleAddRows_SSE2(const uint8* src_ptr, int src_stride, uint16* dst_ptr, int src_width, int src_height) { int tmp_height = 0; intptr_t tmp_src = 0; - asm volatile( + asm volatile ( "pxor %%xmm4,%%xmm4 \n" "sub $0x1,%5 \n" "1: \n" @@ -2263,7 +2263,7 @@ extern "C" void ScaleFilterRows_SSSE3(uint8* dst_ptr, #elif defined(__x86_64__) static void ScaleRowDown8Int_SSE2(const uint8* src_ptr, int src_stride, uint8* dst_ptr, int dst_width) { - asm volatile( + asm volatile ( "lea (%3,%3,2),%%r10 \n" "pxor %%xmm7,%%xmm7 \n" "1:" @@ -2322,7 +2322,7 @@ static void ScaleRowDown8Int_SSE2(const uint8* src_ptr, int src_stride, #define HAS_SCALEROWDOWN34_SSSE3 static void ScaleRowDown34_SSSE3(const uint8* src_ptr, int src_stride, uint8* dst_ptr, int dst_width) { - asm volatile( + asm volatile ( "movdqa (%3),%%xmm3 \n" "movdqa (%4),%%xmm4 \n" "movdqa (%5),%%xmm5 \n" @@ -2353,7 +2353,7 @@ static void ScaleRowDown34_SSSE3(const uint8* src_ptr, int src_stride, static void ScaleRowDown34_1_Int_SSSE3(const uint8* src_ptr, int src_stride, uint8* dst_ptr, int dst_width) { - asm volatile( + asm volatile ( "movdqa (%4),%%xmm2 \n" // _shuf01 "movdqa (%5),%%xmm3 \n" // _shuf11 "movdqa (%6),%%xmm4 \n" // _shuf21 @@ -2410,7 +2410,7 @@ static void ScaleRowDown34_1_Int_SSSE3(const uint8* src_ptr, int src_stride, static void ScaleRowDown34_0_Int_SSSE3(const uint8* src_ptr, int src_stride, uint8* dst_ptr, int dst_width) { - asm volatile( + asm volatile ( "movdqa (%4),%%xmm2 \n" // _shuf01 "movdqa (%5),%%xmm3 \n" // _shuf11 "movdqa (%6),%%xmm4 \n" // _shuf21 @@ -2471,7 +2471,7 @@ static void ScaleRowDown34_0_Int_SSSE3(const uint8* src_ptr, int src_stride, #define HAS_SCALEROWDOWN38_SSSE3 static void ScaleRowDown38_SSSE3(const uint8* src_ptr, int src_stride, uint8* dst_ptr, int dst_width) { - asm volatile( + asm volatile ( "movdqa (%3),%%xmm4 \n" "movdqa (%4),%%xmm5 \n" "1:" @@ -2498,7 +2498,7 @@ static void ScaleRowDown38_SSSE3(const uint8* src_ptr, int src_stride, static void ScaleRowDown38_3_Int_SSSE3(const uint8* src_ptr, int src_stride, uint8* dst_ptr, int dst_width) { - asm volatile( + asm volatile ( "movdqa (%4),%%xmm4 \n" "movdqa (%5),%%xmm5 \n" "movdqa (%6),%%xmm6 \n" @@ -2555,7 +2555,7 @@ static void ScaleRowDown38_3_Int_SSSE3(const uint8* src_ptr, int src_stride, static void ScaleRowDown38_2_Int_SSSE3(const uint8* src_ptr, int src_stride, uint8* dst_ptr, int dst_width) { - asm volatile( + asm volatile ( "movdqa (%4),%%xmm4 \n" "movdqa (%5),%%xmm5 \n" "movdqa (%6),%%xmm6 \n" @@ -2597,7 +2597,7 @@ static void ScaleFilterRows_SSE2(uint8* dst_ptr, const uint8* src_ptr, int src_stride, int dst_width, int source_y_fraction) { if (source_y_fraction == 0) { - asm volatile( + asm volatile ( "1:" "movdqa (%1),%%xmm0 \n" "lea 0x10(%1),%1 \n" @@ -2615,7 +2615,7 @@ static void ScaleFilterRows_SSE2(uint8* dst_ptr, ); return; } else if (source_y_fraction == 128) { - asm volatile( + asm volatile ( "1:" "movdqa (%1),%%xmm0 \n" "movdqa (%1,%3,1),%%xmm2 \n" @@ -2635,7 +2635,7 @@ static void ScaleFilterRows_SSE2(uint8* dst_ptr, ); return; } else { - asm volatile( + asm volatile ( "mov %3,%%eax \n" "movd %%eax,%%xmm6 \n" "punpcklwd %%xmm6,%%xmm6 \n" @@ -2688,7 +2688,7 @@ static void ScaleFilterRows_SSSE3(uint8* dst_ptr, const uint8* src_ptr, int src_stride, int dst_width, int source_y_fraction) { if (source_y_fraction <= 1) { - asm volatile( + asm volatile ( "1:" "movdqa (%1),%%xmm0 \n" "lea 0x10(%1),%1 \n" @@ -2706,7 +2706,7 @@ static void ScaleFilterRows_SSSE3(uint8* dst_ptr, ); return; } else if (source_y_fraction == 128) { - asm volatile( + asm volatile ( "1:" "movdqa (%1),%%xmm0 \n" "movdqa (%1,%3,1),%%xmm2 \n" @@ -2726,7 +2726,7 @@ static void ScaleFilterRows_SSSE3(uint8* dst_ptr, ); return; } else { - asm volatile( + asm volatile ( "mov %3,%%eax \n" "shr %%eax \n" "mov %%al,%%ah \n"