From 2b9c21080362d7f1b45f7465847e939b1d270583 Mon Sep 17 00:00:00 2001 From: "fbarchard@google.com" Date: Thu, 22 Mar 2012 22:36:44 +0000 Subject: [PATCH] lint fix test BUG=none TEST=gcl lint lintfix2 Review URL: https://webrtc-codereview.appspot.com/458003 git-svn-id: http://libyuv.googlecode.com/svn/trunk@220 16f28f9a-4ce2-e073-06de-1de4eb20be90 --- README.chromium | 2 +- include/libyuv/version.h | 2 +- include/libyuv/video_common.h | 7 ++- source/compare.cc | 9 ++- source/convert.cc | 72 ++++++++++------------- source/convert_from.cc | 6 +- source/cpu_id.cc | 10 ++-- source/format_conversion.cc | 4 +- source/planar_functions.cc | 8 +-- source/rotate.cc | 10 ++-- source/rotate_neon.cc | 6 +- source/row_common.cc | 2 +- source/row_neon.cc | 16 ++--- source/row_posix.cc | 106 +++++++++++++++++----------------- source/row_win.cc | 2 +- source/scale.cc | 62 ++++++++++---------- unit_test/compare_test.cc | 11 ++-- 17 files changed, 161 insertions(+), 174 deletions(-) diff --git a/README.chromium b/README.chromium index fafe8d8f6..1b918932f 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 219 +Version: 220 License: BSD License File: LICENSE diff --git a/include/libyuv/version.h b/include/libyuv/version.h index bdbe8e75a..39bce659f 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,7 +11,7 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 219 +#define LIBYUV_VERSION 220 #endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/include/libyuv/video_common.h b/include/libyuv/video_common.h index 33de6912d..8d1f96fd7 100644 --- a/include/libyuv/video_common.h +++ b/include/libyuv/video_common.h @@ -25,6 +25,7 @@ extern "C" { ////////////////////////////////////////////////////////////////////////////// // Definition of FourCC codes ////////////////////////////////////////////////////////////////////////////// + // Convert four characters to a FourCC code. // Needs to be a macro otherwise the OS X compiler complains when the kFormat* // constants are used in a switch. @@ -57,9 +58,9 @@ enum FourCC { FOURCC_ABGR = FOURCC('A', 'B', 'G', 'R'), FOURCC_BGRA = FOURCC('B', 'G', 'R', 'A'), FOURCC_ARGB = FOURCC('A', 'R', 'G', 'B'), - FOURCC_RGBP = FOURCC('R', 'G', 'B', 'P'), // bgr565 - FOURCC_RGBO = FOURCC('R', 'G', 'B', 'O'), // abgr1555 - FOURCC_R444 = FOURCC('R', '4', '4', '4'), // argb4444 + FOURCC_RGBP = FOURCC('R', 'G', 'B', 'P'), // bgr565 + FOURCC_RGBO = FOURCC('R', 'G', 'B', 'O'), // abgr1555 + FOURCC_R444 = FOURCC('R', '4', '4', '4'), // argb4444 FOURCC_MJPG = FOURCC('M', 'J', 'P', 'G'), FOURCC_RAW = FOURCC('r', 'a', 'w', ' '), FOURCC_NV21 = FOURCC('N', 'V', '2', '1'), diff --git a/source/compare.cc b/source/compare.cc index 1da3dc4ab..998641cf6 100644 --- a/source/compare.cc +++ b/source/compare.cc @@ -18,7 +18,7 @@ #include "libyuv/basic_types.h" #include "libyuv/cpu_id.h" -#include "row.h" +#include "source/row.h" #ifdef __cplusplus namespace libyuv { @@ -42,7 +42,7 @@ uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) { static uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) { volatile uint32 sse; - asm volatile ( + asm volatile( "vmov.u8 q7, #0 \n" "vmov.u8 q9, #0 \n" "vmov.u8 q8, #0 \n" @@ -71,8 +71,7 @@ static uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, "+r"(count), "=r"(sse) : - : "memory", "cc", "q0", "q1", "q2", "q3", "q7", "q8", "q9", "q10" - ); + : "memory", "cc", "q0", "q1", "q2", "q3", "q7", "q8", "q9", "q10"); return sse; } @@ -122,7 +121,7 @@ static uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, static uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) { uint32 sse; - asm volatile ( + asm volatile( "pxor %%xmm0,%%xmm0 \n" "pxor %%xmm5,%%xmm5 \n" "sub %0,%1 \n" diff --git a/source/convert.cc b/source/convert.cc index fab5b16b9..7a9cdf282 100644 --- a/source/convert.cc +++ b/source/convert.cc @@ -19,7 +19,7 @@ #include "libyuv/planar_functions.h" #include "libyuv/rotate.h" #include "libyuv/video_common.h" -#include "row.h" +#include "source/row.h" #ifdef __cplusplus namespace libyuv { @@ -78,7 +78,7 @@ static void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride, movdqa xmm0, [eax] pavgb xmm0, [eax + edx] sub ecx, 16 - movdqa [eax + edi], xmm0 + movdqa [eax + edi], xmm0 // NOLINT lea eax, [eax + 16] jg convertloop pop edi @@ -86,11 +86,11 @@ static void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride, } } -#elif (defined(__x86_64__) || defined(__i386__)) && !defined(YUV_DISABLE_ASM) +#elif defined(__x86_64__) || defined(__i386__) && !defined(YUV_DISABLE_ASM) #define HAS_HALFROW_SSE2 static void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride, uint8* dst_uv, int pix) { - asm volatile ( + asm volatile( "sub %0,%1 \n" "1: \n" "movdqa (%0),%%xmm0 \n" @@ -137,7 +137,7 @@ int I422ToI420(const uint8* src_y, int src_stride_y, } int halfwidth = (width + 1) >> 1; void (*HalfRow)(const uint8* src_uv, int src_uv_stride, - uint8* dst_uv, int pix); + uint8* dst_uv, int pix) = HalfRow_C; #if defined(HAS_HALFROW_SSE2) if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(halfwidth, 16) && @@ -146,11 +146,8 @@ int I422ToI420(const uint8* src_y, int src_stride_y, IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) && IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) { HalfRow = HalfRow_SSE2; - } else -#endif - { - HalfRow = HalfRow_C; } +#endif // Copy Y plane if (dst_y) { @@ -186,7 +183,7 @@ int I422ToI420(const uint8* src_y, int src_stride_y, #define HAS_SCALEROWDOWN2_NEON void ScaleRowDown2Int_NEON(const uint8* src_ptr, int src_stride, uint8* dst, int dst_width); -#elif (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) && \ +#elif defined(_M_IX86) || defined(__x86_64__) || defined(__i386__) && \ !defined(YUV_DISABLE_ASM) void ScaleRowDown2Int_SSE2(const uint8* src_ptr, int src_stride, uint8* dst_ptr, int dst_width); @@ -213,14 +210,13 @@ int I444ToI420(const uint8* src_y, int src_stride_y, } int halfwidth = (width + 1) >> 1; void (*ScaleRowDown2)(const uint8* src_ptr, int src_stride, - uint8* dst_ptr, int dst_width); + uint8* dst_ptr, int dst_width) = ScaleRowDown2Int_C; #if defined(HAS_SCALEROWDOWN2_NEON) if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(halfwidth, 16)) { ScaleRowDown2 = ScaleRowDown2Int_NEON; - } else -#endif -#if defined(HAS_SCALEROWDOWN2_SSE2) + } +#elif defined(HAS_SCALEROWDOWN2_SSE2) if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(halfwidth, 16) && IS_ALIGNED(src_u, 16) && IS_ALIGNED(src_stride_u, 16) && @@ -228,10 +224,8 @@ int I444ToI420(const uint8* src_y, int src_stride_y, IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) && IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) { ScaleRowDown2 = ScaleRowDown2Int_SSE2; -#endif - { - ScaleRowDown2 = ScaleRowDown2Int_C; } +#endif // Copy Y plane if (dst_y) { @@ -395,11 +389,12 @@ static int X420ToI420(const uint8* src_y, } int halfwidth = (width + 1) >> 1; - void (*SplitUV)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix); + void (*SplitUV)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) = + SplitUV_C; #if defined(HAS_SPLITUV_NEON) if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(halfwidth, 16)) { SplitUV = SplitUV_NEON; - } else + } #elif defined(HAS_SPLITUV_SSE2) if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(halfwidth, 16) && @@ -407,11 +402,8 @@ static int X420ToI420(const uint8* src_y, IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) && IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) { SplitUV = SplitUV_SSE2; - } else -#endif - { - SplitUV = SplitUV_C; } +#endif if (dst_y) { CopyPlane2(src_y, src_stride_y0, src_stride_y1, dst_y, dst_stride_y, @@ -483,7 +475,7 @@ static void SplitYUY2_SSE2(const uint8* src_yuy2, pand xmm2, xmm5 // even bytes are Y pand xmm3, xmm5 packuswb xmm2, xmm3 - movdqa [edx], xmm2 + movdqa [edx], xmm2 // NOLINT lea edx, [edx + 16] psrlw xmm0, 8 // YUYV -> UVUV psrlw xmm1, 8 @@ -491,12 +483,12 @@ static void SplitYUY2_SSE2(const uint8* src_yuy2, movdqa xmm1, xmm0 pand xmm0, xmm5 // U packuswb xmm0, xmm0 - movq qword ptr [esi], xmm0 + movq qword ptr [esi], xmm0 // NOLINT lea esi, [esi + 8] psrlw xmm1, 8 // V packuswb xmm1, xmm1 sub ecx, 16 - movq qword ptr [edi], xmm1 + movq qword ptr [edi], xmm1 // NOLINT lea edi, [edi + 8] jg convertloop @@ -506,11 +498,11 @@ static void SplitYUY2_SSE2(const uint8* src_yuy2, } } -#elif (defined(__x86_64__) || defined(__i386__)) && !defined(YUV_DISABLE_ASM) +#elif defined(__x86_64__) || defined(__i386__) && !defined(YUV_DISABLE_ASM) #define HAS_SPLITYUY2_SSE2 static void SplitYUY2_SSE2(const uint8* src_yuy2, uint8* dst_y, uint8* dst_u, uint8* dst_v, int pix) { - asm volatile ( + asm volatile( "pcmpeqb %%xmm5,%%xmm5 \n" "psrlw $0x8,%%xmm5 \n" "1: \n" @@ -604,19 +596,17 @@ int Q420ToI420(const uint8* src_y, int src_stride_y, } #endif - void (*SplitYUY2)(const uint8* src_yuy2, - uint8* dst_y, uint8* dst_u, uint8* dst_v, int pix); + void (*SplitYUY2)(const uint8* src_yuy2, uint8* dst_y, uint8* dst_u, + uint8* dst_v, int pix) = SplitYUY2_C; #if defined(HAS_SPLITYUY2_SSE2) if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) && IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16) && IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { SplitYUY2 = SplitYUY2_SSE2; - } else -#endif - { - SplitYUY2 = SplitYUY2_C; } +#endif + for (int y = 0; y < height; y += 2) { CopyRow(src_y, dst_y, width); dst_y += dst_stride_y; @@ -800,13 +790,13 @@ int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy, #endif #ifdef LIBYUV_LITTLE_ENDIAN -#define READWORD(p) (*((uint32*) (p))) +#define READWORD(p) (*reinterpret_cast(p)) #else static inline uint32 READWORD(const uint8* p) { - return (uint32) p[0] | - ((uint32) (p[1]) << 8) | - ((uint32) (p[2]) << 16) | - ((uint32) (p[3]) << 24); + return static_cast(p[0]) | + (static_cast(p[1]) << 8) | + (static_cast(p[2]) << 16) | + (static_cast(p[3]) << 24); } #endif @@ -1599,7 +1589,7 @@ int ConvertToI420(const uint8* sample, size_t sample_size, int tmp_y_stride = y_stride; int tmp_u_stride = u_stride; int tmp_v_stride = v_stride; - uint8* buf = 0; + uint8* buf = NULL; int abs_dst_height = (dst_height < 0) ? -dst_height : dst_height; if (need_rot) { int y_size = dst_width * abs_dst_height; @@ -1618,7 +1608,7 @@ int ConvertToI420(const uint8* sample, size_t sample_size, switch (format) { // Single plane formats case FOURCC_YUY2: - src = sample + (aligned_src_width * crop_y + crop_x) * 2 ; + src = sample + (aligned_src_width * crop_y + crop_x) * 2; r = YUY2ToI420(src, aligned_src_width * 2, y, y_stride, u, u_stride, diff --git a/source/convert_from.cc b/source/convert_from.cc index 7e41e2f8b..069be7d7e 100644 --- a/source/convert_from.cc +++ b/source/convert_from.cc @@ -17,7 +17,7 @@ #include "libyuv/planar_functions.h" #include "libyuv/rotate.h" #include "libyuv/video_common.h" -#include "row.h" +#include "source/row.h" #ifdef __cplusplus namespace libyuv { @@ -289,7 +289,7 @@ static void I42xToYUY2Row_SSE2(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_frame, int width) { - asm volatile ( + asm volatile( "sub %1,%2 \n" "1: \n" "movq (%1),%%xmm2 \n" @@ -324,7 +324,7 @@ static void I42xToUYVYRow_SSE2(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_frame, int width) { - asm volatile ( + asm volatile( "sub %1,%2 \n" "1: \n" "movq (%1),%%xmm2 \n" diff --git a/source/cpu_id.cc b/source/cpu_id.cc index aafbecda8..18a8572e1 100644 --- a/source/cpu_id.cc +++ b/source/cpu_id.cc @@ -24,21 +24,19 @@ // TODO(fbarchard): Use cpuid.h when gcc 4.4 is used on OSX and Linux. #if (defined(__pic__) || defined(__APPLE__)) && defined(__i386__) static __inline void __cpuid(int cpu_info[4], int info_type) { - asm volatile ( + asm volatile( "mov %%ebx, %%edi \n" "cpuid \n" "xchg %%edi, %%ebx \n" : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3]) - : "a"(info_type) - ); + : "a"(info_type)); } #elif defined(__i386__) || defined(__x86_64__) static __inline void __cpuid(int cpu_info[4], int info_type) { - asm volatile ( + asm volatile( "cpuid \n" : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3]) - : "a"(info_type) - ); + : "a"(info_type)); } #endif diff --git a/source/format_conversion.cc b/source/format_conversion.cc index 5cedf2a8e..692932bee 100644 --- a/source/format_conversion.cc +++ b/source/format_conversion.cc @@ -13,7 +13,7 @@ #include "libyuv/basic_types.h" #include "libyuv/cpu_id.h" #include "libyuv/video_common.h" -#include "row.h" +#include "source/row.h" #ifdef __cplusplus namespace libyuv { @@ -53,7 +53,7 @@ static void ARGBToBayerRow_SSSE3(const uint8* src_argb, #define HAS_ARGBTOBAYERROW_SSSE3 static void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer, uint32 selector, int pix) { - asm volatile ( + asm volatile( "movd %3,%%xmm5 \n" "pshufd $0x0,%%xmm5,%%xmm5 \n" "1: \n" diff --git a/source/planar_functions.cc b/source/planar_functions.cc index 07e6173c7..a6f7b66bc 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -13,7 +13,7 @@ #include // for memset() #include "libyuv/cpu_id.h" -#include "row.h" +#include "source/row.h" #ifdef __cplusplus namespace libyuv { @@ -693,7 +693,7 @@ int NV12ToRGB565(const uint8* src_y, int src_stride_y, #if defined(__ARM_NEON__) && !defined(YUV_DISABLE_ASM) #define HAS_SETROW_NEON static void SetRow8_NEON(uint8* dst, uint32 v32, int count) { - asm volatile ( + asm volatile( "vdup.u32 q0, %2 \n" // duplicate 4 ints "1: \n" "subs %1, %1, #16 \n" // 16 bytes per loop @@ -763,7 +763,7 @@ static void SetRows32_X86(uint8* dst, uint32 v32, int width, #define HAS_SETROW_X86 static void SetRow8_X86(uint8* dst, uint32 v32, int width) { size_t width_tmp = static_cast(width); - asm volatile ( + asm volatile( "shr $0x2,%1 \n" "rep stosl \n" : "+D"(dst), // %0 @@ -778,7 +778,7 @@ static void SetRows32_X86(uint8* dst, uint32 v32, int width, for (int y = 0; y < height; ++y) { size_t width_tmp = static_cast(width); uint32* d = reinterpret_cast(dst); - asm volatile ( + asm volatile( "rep stosl \n" : "+D"(d), // %0 "+c"(width_tmp) // %1 diff --git a/source/rotate.cc b/source/rotate.cc index d62c36a7c..01c3ffcda 100644 --- a/source/rotate.cc +++ b/source/rotate.cc @@ -13,8 +13,8 @@ #include "libyuv/cpu_id.h" #include "libyuv/convert.h" #include "libyuv/planar_functions.h" -#include "rotate_priv.h" -#include "row.h" +#include "source/rotate_priv.h" +#include "source/row.h" #ifdef __cplusplus namespace libyuv { @@ -295,7 +295,7 @@ static void TransposeUVWx8_SSE2(const uint8* src, int src_stride, #define HAS_TRANSPOSE_WX8_SSSE3 static void TransposeWx8_SSSE3(const uint8* src, int src_stride, uint8* dst, int dst_stride, int width) { - asm volatile ( + asm volatile( // Read in the data from the source pointer. // First round of bit swap. "1: \n" @@ -506,7 +506,7 @@ extern "C" void TransposeUVWx8_SSE2(const uint8* src, int src_stride, #define HAS_TRANSPOSE_WX8_FAST_SSSE3 static void TransposeWx8_FAST_SSSE3(const uint8* src, int src_stride, uint8* dst, int dst_stride, int width) { - asm volatile ( + asm volatile( // Read in the data from the source pointer. // First round of bit swap. "1: \n" @@ -646,7 +646,7 @@ static void TransposeUVWx8_SSE2(const uint8* src, int src_stride, uint8* dst_a, int dst_stride_a, uint8* dst_b, int dst_stride_b, int w) { - asm volatile ( + asm volatile( // Read in the data from the source pointer. // First round of bit swap. "1: \n" diff --git a/source/rotate_neon.cc b/source/rotate_neon.cc index 0f01f02b2..f99c72e77 100644 --- a/source/rotate_neon.cc +++ b/source/rotate_neon.cc @@ -8,7 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "row.h" +#include "source/row.h" #include "libyuv/basic_types.h" @@ -25,7 +25,7 @@ static const uvec8 vtbl_4x4_transpose = void TransposeWx8_NEON(const uint8* src, int src_stride, uint8* dst, int dst_stride, int width) { - asm volatile ( + asm volatile( // loops are on blocks of 8. loop will stop when // counter gets to or below 0. starting the counter // at w-8 allow for this @@ -191,7 +191,7 @@ void TransposeUVWx8_NEON(const uint8* src, int src_stride, uint8* dst_a, int dst_stride_a, uint8* dst_b, int dst_stride_b, int width) { - asm volatile ( + asm volatile( // loops are on blocks of 8. loop will stop when // counter gets to or below 0. starting the counter // at w-8 allow for this diff --git a/source/row_common.cc b/source/row_common.cc index 8b14b43d2..9b563f6bf 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -8,7 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "row.h" +#include "source/row.h" #include "libyuv/basic_types.h" #include // For memcpy diff --git a/source/row_neon.cc b/source/row_neon.cc index 3ebebc113..bf4742cbc 100644 --- a/source/row_neon.cc +++ b/source/row_neon.cc @@ -8,7 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "row.h" +#include "source/row.h" #ifdef __cplusplus namespace libyuv { @@ -61,7 +61,7 @@ void I420ToARGBRow_NEON(const uint8* y_buf, const uint8* v_buf, uint8* rgb_buf, int width) { - asm volatile ( + asm volatile( "vld1.u8 {d24}, [%5] \n" "vld1.u8 {d25}, [%6] \n" "vmov.u8 d26, #128 \n" @@ -93,7 +93,7 @@ void I420ToBGRARow_NEON(const uint8* y_buf, const uint8* v_buf, uint8* rgb_buf, int width) { - asm volatile ( + asm volatile( "vld1.u8 {d24}, [%5] \n" "vld1.u8 {d25}, [%6] \n" "vmov.u8 d26, #128 \n" @@ -126,7 +126,7 @@ void I420ToABGRRow_NEON(const uint8* y_buf, const uint8* v_buf, uint8* rgb_buf, int width) { - asm volatile ( + asm volatile( "vld1.u8 {d24}, [%5] \n" "vld1.u8 {d25}, [%6] \n" "vmov.u8 d26, #128 \n" @@ -157,7 +157,7 @@ YUVTORGB // Reads 16 pairs of UV and write even values to dst_u and odd to dst_v // Alignment requirement: 16 bytes for pointers, and multiple of 16 pixels. void SplitUV_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) { - asm volatile ( + asm volatile( "1: \n" "vld2.u8 {q0,q1}, [%0]! \n" // load 16 pairs of UV "subs %3, %3, #16 \n" // 16 processed per loop @@ -177,7 +177,7 @@ void SplitUV_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) { #ifdef HAS_COPYROW_NEON // Copy multiple of 64 void CopyRow_NEON(const uint8* src, uint8* dst, int count) { - asm volatile ( + asm volatile( "1: \n" "pld [%0, #0xC0] \n" // preload "vldm %0!,{q0,q1,q2,q3} \n" // load 64 @@ -195,7 +195,7 @@ void CopyRow_NEON(const uint8* src, uint8* dst, int count) { #ifdef HAS_MIRRORROW_NEON void MirrorRow_NEON(const uint8* src, uint8* dst, int width) { - asm volatile ( + asm volatile( // compute where to start writing destination "add %1, %2 \n" // work on segments that are multiples of 16 @@ -270,7 +270,7 @@ void MirrorRow_NEON(const uint8* src, uint8* dst, int width) { #ifdef HAS_MIRRORROWUV_NEON void MirrorRowUV_NEON(const uint8* src, uint8* dst_a, uint8* dst_b, int width) { - asm volatile ( + asm volatile( // compute where to start writing destination "add %1, %3 \n" // dst_a + width "add %2, %3 \n" // dst_b + width diff --git a/source/row_posix.cc b/source/row_posix.cc index 92f090385..1d0d1cf1b 100644 --- a/source/row_posix.cc +++ b/source/row_posix.cc @@ -8,7 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "row.h" +#include "source/row.h" #include "libyuv/basic_types.h" @@ -109,7 +109,7 @@ CONST uvec8 kShuffleMaskARGBToRAW = { }; void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) { - asm volatile ( + asm volatile( "pcmpeqb %%xmm5,%%xmm5 \n" "pslld $0x18,%%xmm5 \n" "1: \n" @@ -138,7 +138,7 @@ void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) { } void ABGRToARGBRow_SSSE3(const uint8* src_abgr, uint8* dst_argb, int pix) { - asm volatile ( + asm volatile( "movdqa %3,%%xmm5 \n" "sub %0,%1 \n" "1: \n" @@ -161,7 +161,7 @@ void ABGRToARGBRow_SSSE3(const uint8* src_abgr, uint8* dst_argb, int pix) { } void BGRAToARGBRow_SSSE3(const uint8* src_bgra, uint8* dst_argb, int pix) { - asm volatile ( + asm volatile( "movdqa %3,%%xmm5 \n" "sub %0,%1 \n" "1: \n" @@ -183,7 +183,7 @@ void BGRAToARGBRow_SSSE3(const uint8* src_bgra, uint8* dst_argb, int pix) { } void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix) { - asm volatile ( + asm volatile( "pcmpeqb %%xmm5,%%xmm5 \n" // generate mask 0xff000000 "pslld $0x18,%%xmm5 \n" "movdqa %3,%%xmm4 \n" @@ -223,7 +223,7 @@ void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix) { } void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, int pix) { - asm volatile ( + asm volatile( "pcmpeqb %%xmm5,%%xmm5 \n" // generate mask 0xff000000 "pslld $0x18,%%xmm5 \n" "movdqa %3,%%xmm4 \n" @@ -263,7 +263,7 @@ void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, int pix) { } void RGB565ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) { - asm volatile ( + asm volatile( "mov $0x1080108,%%eax \n" "movd %%eax,%%xmm5 \n" "pshufd $0x0,%%xmm5,%%xmm5 \n" @@ -312,7 +312,7 @@ void RGB565ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) { } void ARGB1555ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) { - asm volatile ( + asm volatile( "mov $0x1080108,%%eax \n" "movd %%eax,%%xmm5 \n" "pshufd $0x0,%%xmm5,%%xmm5 \n" @@ -364,7 +364,7 @@ void ARGB1555ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) { } void ARGB4444ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) { - asm volatile ( + asm volatile( "mov $0xf0f0f0f,%%eax \n" "movd %%eax,%%xmm4 \n" "pshufd $0x0,%%xmm4,%%xmm4 \n" @@ -403,7 +403,7 @@ void ARGB4444ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) { } void ARGBToRGB24Row_SSSE3(const uint8* src, uint8* dst, int pix) { - asm volatile ( + asm volatile( "movdqa %3,%%xmm6 \n" "1: \n" "movdqa (%0),%%xmm0 \n" @@ -443,7 +443,7 @@ void ARGBToRGB24Row_SSSE3(const uint8* src, uint8* dst, int pix) { } void ARGBToRAWRow_SSSE3(const uint8* src, uint8* dst, int pix) { - asm volatile ( + asm volatile( "movdqa %3,%%xmm6 \n" "1: \n" "movdqa (%0),%%xmm0 \n" @@ -483,7 +483,7 @@ void ARGBToRAWRow_SSSE3(const uint8* src, uint8* dst, int pix) { } void ARGBToRGB565Row_SSE2(const uint8* src, uint8* dst, int pix) { - asm volatile ( + asm volatile( "pcmpeqb %%xmm3,%%xmm3 \n" "psrld $0x1b,%%xmm3 \n" "pcmpeqb %%xmm4,%%xmm4 \n" @@ -522,7 +522,7 @@ void ARGBToRGB565Row_SSE2(const uint8* src, uint8* dst, int pix) { } void ARGBToARGB1555Row_SSE2(const uint8* src, uint8* dst, int pix) { - asm volatile ( + asm volatile( "pcmpeqb %%xmm4,%%xmm4 \n" "psrld $0x1b,%%xmm4 \n" "movdqa %%xmm4,%%xmm5 \n" @@ -565,7 +565,7 @@ void ARGBToARGB1555Row_SSE2(const uint8* src, uint8* dst, int pix) { } void ARGBToARGB4444Row_SSE2(const uint8* src, uint8* dst, int pix) { - asm volatile ( + asm volatile( "pcmpeqb %%xmm4,%%xmm4 \n" "psllw $0xc,%%xmm4 \n" "movdqa %%xmm4,%%xmm3 \n" @@ -596,7 +596,7 @@ void ARGBToARGB4444Row_SSE2(const uint8* src, uint8* dst, int pix) { } void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { - asm volatile ( + asm volatile( "movdqa %4,%%xmm5 \n" "movdqa %3,%%xmm4 \n" "1: \n" @@ -632,7 +632,7 @@ void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { } void ARGBToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { - asm volatile ( + asm volatile( "movdqa %4,%%xmm5 \n" "movdqa %3,%%xmm4 \n" "1: \n" @@ -674,7 +674,7 @@ void ARGBToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { // and considered unsafe. void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, uint8* dst_u, uint8* dst_v, int width) { - asm volatile ( + asm volatile( "movdqa %0,%%xmm4 \n" "movdqa %1,%%xmm3 \n" "movdqa %2,%%xmm5 \n" @@ -687,7 +687,7 @@ void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, "xmm3", "xmm4", "xmm5" #endif ); - asm volatile ( + asm volatile( "sub %1,%2 \n" "1: \n" "movdqa (%0),%%xmm0 \n" @@ -738,7 +738,7 @@ void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb, uint8* dst_u, uint8* dst_v, int width) { - asm volatile ( + asm volatile( "movdqa %0,%%xmm4 \n" "movdqa %1,%%xmm3 \n" "movdqa %2,%%xmm5 \n" @@ -751,7 +751,7 @@ void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb, "xmm3", "xmm4", "xmm5" #endif ); - asm volatile ( + asm volatile( "sub %1,%2 \n" "1: \n" "movdqu (%0),%%xmm0 \n" @@ -805,7 +805,7 @@ void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb, } void BGRAToYRow_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix) { - asm volatile ( + asm volatile( "movdqa %4,%%xmm5 \n" "movdqa %3,%%xmm4 \n" "1: \n" @@ -841,7 +841,7 @@ void BGRAToYRow_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix) { } void BGRAToYRow_Unaligned_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix) { - asm volatile ( + asm volatile( "movdqa %4,%%xmm5 \n" "movdqa %3,%%xmm4 \n" "1: \n" @@ -878,7 +878,7 @@ void BGRAToYRow_Unaligned_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix) { void BGRAToUVRow_SSSE3(const uint8* src_bgra0, int src_stride_bgra, uint8* dst_u, uint8* dst_v, int width) { - asm volatile ( + asm volatile( "movdqa %0,%%xmm4 \n" "movdqa %1,%%xmm3 \n" "movdqa %2,%%xmm5 \n" @@ -891,7 +891,7 @@ void BGRAToUVRow_SSSE3(const uint8* src_bgra0, int src_stride_bgra, "xmm3", "xmm4", "xmm5" #endif ); - asm volatile ( + asm volatile( "sub %1,%2 \n" "1: \n" "movdqa (%0),%%xmm0 \n" @@ -942,7 +942,7 @@ void BGRAToUVRow_SSSE3(const uint8* src_bgra0, int src_stride_bgra, void BGRAToUVRow_Unaligned_SSSE3(const uint8* src_bgra0, int src_stride_bgra, uint8* dst_u, uint8* dst_v, int width) { - asm volatile ( + asm volatile( "movdqa %0,%%xmm4 \n" "movdqa %1,%%xmm3 \n" "movdqa %2,%%xmm5 \n" @@ -955,7 +955,7 @@ void BGRAToUVRow_Unaligned_SSSE3(const uint8* src_bgra0, int src_stride_bgra, "xmm3", "xmm4", "xmm5" #endif ); - asm volatile ( + asm volatile( "sub %1,%2 \n" "1: \n" "movdqu (%0),%%xmm0 \n" @@ -1009,7 +1009,7 @@ void BGRAToUVRow_Unaligned_SSSE3(const uint8* src_bgra0, int src_stride_bgra, } void ABGRToYRow_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix) { - asm volatile ( + asm volatile( "movdqa %4,%%xmm5 \n" "movdqa %3,%%xmm4 \n" "1: \n" @@ -1045,7 +1045,7 @@ void ABGRToYRow_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix) { } void ABGRToYRow_Unaligned_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix) { - asm volatile ( + asm volatile( "movdqa %4,%%xmm5 \n" "movdqa %3,%%xmm4 \n" "1: \n" @@ -1082,7 +1082,7 @@ void ABGRToYRow_Unaligned_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix) { void ABGRToUVRow_SSSE3(const uint8* src_abgr0, int src_stride_abgr, uint8* dst_u, uint8* dst_v, int width) { - asm volatile ( + asm volatile( "movdqa %0,%%xmm4 \n" "movdqa %1,%%xmm3 \n" "movdqa %2,%%xmm5 \n" @@ -1095,7 +1095,7 @@ void ABGRToUVRow_SSSE3(const uint8* src_abgr0, int src_stride_abgr, "xmm3", "xmm4", "xmm5" #endif ); - asm volatile ( + asm volatile( "sub %1,%2 \n" "1: \n" "movdqa (%0),%%xmm0 \n" @@ -1146,7 +1146,7 @@ void ABGRToUVRow_SSSE3(const uint8* src_abgr0, int src_stride_abgr, void ABGRToUVRow_Unaligned_SSSE3(const uint8* src_abgr0, int src_stride_abgr, uint8* dst_u, uint8* dst_v, int width) { - asm volatile ( + asm volatile( "movdqa %0,%%xmm4 \n" "movdqa %1,%%xmm3 \n" "movdqa %2,%%xmm5 \n" @@ -1159,7 +1159,7 @@ void ABGRToUVRow_Unaligned_SSSE3(const uint8* src_abgr0, int src_stride_abgr, "xmm3", "xmm4", "xmm5" #endif ); - asm volatile ( + asm volatile( "sub %1,%2 \n" "1: \n" "movdqu (%0),%%xmm0 \n" @@ -1291,7 +1291,7 @@ void OMITFP I420ToARGBRow_SSSE3(const uint8* y_buf, const uint8* v_buf, uint8* rgb_buf, int width) { - asm volatile ( + asm volatile( "sub %1,%2 \n" "pcmpeqb %%xmm5,%%xmm5 \n" "pxor %%xmm4,%%xmm4 \n" @@ -1325,7 +1325,7 @@ void OMITFP I420ToBGRARow_SSSE3(const uint8* y_buf, const uint8* v_buf, uint8* rgb_buf, int width) { - asm volatile ( + asm volatile( "sub %1,%2 \n" "pcmpeqb %%xmm5,%%xmm5 \n" "pxor %%xmm4,%%xmm4 \n" @@ -1360,7 +1360,7 @@ void OMITFP I420ToABGRRow_SSSE3(const uint8* y_buf, const uint8* v_buf, uint8* rgb_buf, int width) { - asm volatile ( + asm volatile( "sub %1,%2 \n" "pcmpeqb %%xmm5,%%xmm5 \n" "pxor %%xmm4,%%xmm4 \n" @@ -1394,7 +1394,7 @@ void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf, const uint8* v_buf, uint8* rgb_buf, int width) { - asm volatile ( + asm volatile( "sub %1,%2 \n" "pcmpeqb %%xmm5,%%xmm5 \n" "pxor %%xmm4,%%xmm4 \n" @@ -1450,7 +1450,7 @@ void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf, void YToARGBRow_SSE2(const uint8* y_buf, uint8* rgb_buf, int width) { - asm volatile ( + asm volatile( "pcmpeqb %%xmm4,%%xmm4 \n" "pslld $0x18,%%xmm4 \n" "mov $0x10001000,%%eax \n" @@ -1501,7 +1501,7 @@ CONST uvec8 kShuffleMirror = { void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) { intptr_t temp_width = static_cast(width); - asm volatile ( + asm volatile( "movdqa %3,%%xmm5 \n" "lea -0x10(%0),%0 \n" "1: \n" @@ -1526,7 +1526,7 @@ void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) { #ifdef HAS_MIRRORROW_SSE2 void MirrorRow_SSE2(const uint8* src, uint8* dst, int width) { intptr_t temp_width = static_cast(width); - asm volatile ( + asm volatile( "lea -0x10(%0),%0 \n" "1: \n" "movdqu (%0,%2),%%xmm0 \n" @@ -1561,7 +1561,7 @@ CONST uvec8 kShuffleMirrorUV = { void MirrorRowUV_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v, int width) { intptr_t temp_width = static_cast(width); - asm volatile ( + asm volatile( "movdqa %4,%%xmm1 \n" "lea -16(%0,%3,2),%0 \n" "sub %1,%2 \n" @@ -1589,7 +1589,7 @@ void MirrorRowUV_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v, #ifdef HAS_SPLITUV_SSE2 void SplitUV_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) { - asm volatile ( + asm volatile( "pcmpeqb %%xmm5,%%xmm5 \n" "psrlw $0x8,%%xmm5 \n" "sub %1,%2 \n" @@ -1625,7 +1625,7 @@ void SplitUV_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) { #ifdef HAS_COPYROW_SSE2 void CopyRow_SSE2(const uint8* src, uint8* dst, int count) { - asm volatile ( + asm volatile( "sub %0,%1 \n" "1: \n" "movdqa (%0),%%xmm0 \n" @@ -1650,7 +1650,7 @@ void CopyRow_SSE2(const uint8* src, uint8* dst, int count) { #ifdef HAS_COPYROW_X86 void CopyRow_X86(const uint8* src, uint8* dst, int width) { size_t width_tmp = static_cast(width); - asm volatile ( + asm volatile( "shr $0x2,%2 \n" "rep movsl \n" : "+S"(src), // %0 @@ -1664,7 +1664,7 @@ void CopyRow_X86(const uint8* src, uint8* dst, int width) { #ifdef HAS_YUY2TOYROW_SSE2 void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix) { - asm volatile ( + asm volatile( "pcmpeqb %%xmm5,%%xmm5 \n" "psrlw $0x8,%%xmm5 \n" "1: \n" @@ -1691,7 +1691,7 @@ void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix) { void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2, uint8* dst_u, uint8* dst_y, int pix) { - asm volatile ( + asm volatile( "pcmpeqb %%xmm5,%%xmm5 \n" "psrlw $0x8,%%xmm5 \n" "sub %1,%2 \n" @@ -1730,7 +1730,7 @@ void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2, void YUY2ToYRow_Unaligned_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix) { - asm volatile ( + asm volatile( "pcmpeqb %%xmm5,%%xmm5 \n" "psrlw $0x8,%%xmm5 \n" "1: \n" @@ -1759,7 +1759,7 @@ void YUY2ToUVRow_Unaligned_SSE2(const uint8* src_yuy2, int stride_yuy2, uint8* dst_u, uint8* dst_y, int pix) { - asm volatile ( + asm volatile( "pcmpeqb %%xmm5,%%xmm5 \n" "psrlw $0x8,%%xmm5 \n" "sub %1,%2 \n" @@ -1797,7 +1797,7 @@ void YUY2ToUVRow_Unaligned_SSE2(const uint8* src_yuy2, } void UYVYToYRow_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix) { - asm volatile ( + asm volatile( "1: \n" "movdqa (%0),%%xmm0 \n" "movdqa 0x10(%0),%%xmm1 \n" @@ -1822,7 +1822,7 @@ void UYVYToYRow_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix) { void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy, uint8* dst_u, uint8* dst_y, int pix) { - asm volatile ( + asm volatile( "pcmpeqb %%xmm5,%%xmm5 \n" "psrlw $0x8,%%xmm5 \n" "sub %1,%2 \n" @@ -1861,7 +1861,7 @@ void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy, void UYVYToYRow_Unaligned_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix) { - asm volatile ( + asm volatile( "1: \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x10(%0),%%xmm1 \n" @@ -1886,7 +1886,7 @@ void UYVYToYRow_Unaligned_SSE2(const uint8* src_uyvy, void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy, uint8* dst_u, uint8* dst_y, int pix) { - asm volatile ( + asm volatile( "pcmpeqb %%xmm5,%%xmm5 \n" "psrlw $0x8,%%xmm5 \n" "sub %1,%2 \n" @@ -1929,7 +1929,7 @@ void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy, // Destination aligned to 16 bytes, multiple of 4 pixels void ARGBBlendRow_Aligned_SSE2(const uint8* src_argb, uint8* dst_argb, int width) { - asm volatile ( + asm volatile( "pcmpeqb %%xmm7,%%xmm7 \n" "psrlw $0xf,%%xmm7 \n" "pcmpeqb %%xmm6,%%xmm6 \n" @@ -1999,7 +1999,7 @@ void ARGBBlendRow_Aligned_SSE2(const uint8* src_argb, uint8* dst_argb, // Blend 1 pixel at a time, unaligned void ARGBBlendRow1_SSE2(const uint8* src_argb, uint8* dst_argb, int width) { - asm volatile ( + asm volatile( "pcmpeqb %%xmm7,%%xmm7 \n" "psrlw $0xf,%%xmm7 \n" "pcmpeqb %%xmm6,%%xmm6 \n" diff --git a/source/row_win.cc b/source/row_win.cc index 7007876a0..cdf4d2bcd 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -8,7 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "row.h" +#include "source/row.h" #if defined(_M_IX86) #include "emmintrin.h" diff --git a/source/scale.cc b/source/scale.cc index f3d6d771c..436ac3990 100644 --- a/source/scale.cc +++ b/source/scale.cc @@ -16,7 +16,7 @@ #include "libyuv/cpu_id.h" #include "libyuv/planar_functions.h" // For CopyPlane -#include "row.h" +#include "source/row.h" #ifdef __cplusplus namespace libyuv { @@ -59,7 +59,7 @@ void SetUseReferenceImpl(bool use) { #define HAS_SCALEROWDOWN2_NEON void ScaleRowDown2_NEON(const uint8* src_ptr, int /* src_stride */, uint8* dst, int dst_width) { - asm volatile ( + asm volatile( "1: \n" "vld2.u8 {q0,q1}, [%0]! \n" // load even pixels into q0, odd into q1 "vst1.u8 {q0}, [%1]! \n" // store even pixels @@ -75,7 +75,7 @@ void ScaleRowDown2_NEON(const uint8* src_ptr, int /* src_stride */, void ScaleRowDown2Int_NEON(const uint8* src_ptr, int src_stride, uint8* dst, int dst_width) { - asm volatile ( + asm volatile( "add %1, %0 \n" // change the stride to row 2 pointer "1: \n" "vld1.u8 {q0,q1}, [%0]! \n" // load row 1 and post increment @@ -101,7 +101,7 @@ void ScaleRowDown2Int_NEON(const uint8* src_ptr, int src_stride, #define HAS_SCALEROWDOWN4_NEON static void ScaleRowDown4_NEON(const uint8* src_ptr, int /* src_stride */, uint8* dst_ptr, int dst_width) { - asm volatile ( + asm volatile( "1: \n" "vld2.u8 {d0, d1}, [%0]! \n" "vtrn.u8 d1, d0 \n" @@ -120,7 +120,7 @@ static void ScaleRowDown4_NEON(const uint8* src_ptr, int /* src_stride */, static void ScaleRowDown4Int_NEON(const uint8* src_ptr, int src_stride, uint8* dst_ptr, int dst_width) { - asm volatile ( + asm volatile( "add r4, %0, %3 \n" "add r5, r4, %3 \n" "add %3, r5, %3 \n" @@ -159,7 +159,7 @@ static void ScaleRowDown4Int_NEON(const uint8* src_ptr, int src_stride, // Point samples 32 pixels to 24 pixels. static void ScaleRowDown34_NEON(const uint8* src_ptr, int /* src_stride */, uint8* dst_ptr, int dst_width) { - asm volatile ( + asm volatile( "1: \n" "vld4.u8 {d0, d1, d2, d3}, [%0]! \n" // src line 0 "vmov d2, d3 \n" // order needs to be d0, d1, d2 @@ -176,7 +176,7 @@ static void ScaleRowDown34_NEON(const uint8* src_ptr, int /* src_stride */, static void ScaleRowDown34_0_Int_NEON(const uint8* src_ptr, int src_stride, uint8* dst_ptr, int dst_width) { - asm volatile ( + asm volatile( "vmov.u8 d24, #3 \n" "add %3, %0 \n" "1: \n" @@ -231,7 +231,7 @@ static void ScaleRowDown34_0_Int_NEON(const uint8* src_ptr, int src_stride, static void ScaleRowDown34_1_Int_NEON(const uint8* src_ptr, int src_stride, uint8* dst_ptr, int dst_width) { - asm volatile ( + asm volatile( "vmov.u8 d24, #3 \n" "add %3, %0 \n" "1: \n" @@ -283,7 +283,7 @@ const unsigned short mult38_div9[8] __attribute__ ((aligned(16))) = // 32 -> 12 static void ScaleRowDown38_NEON(const uint8* src_ptr, int, uint8* dst_ptr, int dst_width) { - asm volatile ( + asm volatile( "vld1.u8 {q3}, [%3] \n" "1: \n" "vld1.u8 {d0, d1, d2, d3}, [%0]! \n" @@ -304,7 +304,7 @@ static void ScaleRowDown38_NEON(const uint8* src_ptr, int, // 32x3 -> 12x1 static void ScaleRowDown38_3_Int_NEON(const uint8* src_ptr, int src_stride, uint8* dst_ptr, int dst_width) { - asm volatile ( + asm volatile( "vld1.u16 {q13}, [%4] \n" "vld1.u8 {q14}, [%5] \n" "vld1.u8 {q15}, [%6] \n" @@ -413,7 +413,7 @@ static void ScaleRowDown38_3_Int_NEON(const uint8* src_ptr, int src_stride, // 32x2 -> 12x1 static void ScaleRowDown38_2_Int_NEON(const uint8* src_ptr, int src_stride, uint8* dst_ptr, int dst_width) { - asm volatile ( + asm volatile( "vld1.u16 {q13}, [%4] \n" "vld1.u8 {q14}, [%5] \n" "add %3, %0 \n" @@ -508,7 +508,7 @@ static void ScaleRowDown38_2_Int_NEON(const uint8* src_ptr, int src_stride, static void ScaleFilterRows_NEON(uint8* dst_ptr, const uint8* src_ptr, int src_stride, int dst_width, int source_y_fraction) { - asm volatile ( + asm volatile( "cmp %4, #0 \n" "beq 2f \n" "add %2, %1 \n" @@ -1555,7 +1555,7 @@ static void ScaleFilterCols34_SSSE3(uint8* dst_ptr, const uint8* src_ptr, #define HAS_SCALEROWDOWN2_SSE2 static void ScaleRowDown2_SSE2(const uint8* src_ptr, int src_stride, uint8* dst_ptr, int dst_width) { - asm volatile ( + asm volatile( "pcmpeqb %%xmm5,%%xmm5 \n" "psrlw $0x8,%%xmm5 \n" "1: \n" @@ -1579,7 +1579,7 @@ static void ScaleRowDown2_SSE2(const uint8* src_ptr, int src_stride, static void ScaleRowDown2Int_SSE2(const uint8* src_ptr, int src_stride, uint8* dst_ptr, int dst_width) { - asm volatile ( + asm volatile( "pcmpeqb %%xmm5,%%xmm5 \n" "psrlw $0x8,%%xmm5 \n" "1: \n" @@ -1614,7 +1614,7 @@ static void ScaleRowDown2Int_SSE2(const uint8* src_ptr, int src_stride, #define HAS_SCALEROWDOWN4_SSE2 static void ScaleRowDown4_SSE2(const uint8* src_ptr, int src_stride, uint8* dst_ptr, int dst_width) { - asm volatile ( + asm volatile( "pcmpeqb %%xmm5,%%xmm5 \n" "psrld $0x18,%%xmm5 \n" "1: \n" @@ -1640,7 +1640,7 @@ static void ScaleRowDown4_SSE2(const uint8* src_ptr, int src_stride, static void ScaleRowDown4Int_SSE2(const uint8* src_ptr, int src_stride, uint8* dst_ptr, int dst_width) { intptr_t temp = 0; - asm volatile ( + asm volatile( "pcmpeqb %%xmm7,%%xmm7 \n" "psrlw $0x8,%%xmm7 \n" "lea (%4,%4,2),%3 \n" @@ -1693,7 +1693,7 @@ static void ScaleRowDown4Int_SSE2(const uint8* src_ptr, int src_stride, #define HAS_SCALEROWDOWN8_SSE2 static void ScaleRowDown8_SSE2(const uint8* src_ptr, int src_stride, uint8* dst_ptr, int dst_width) { - asm volatile ( + asm volatile( "pcmpeqb %%xmm5,%%xmm5 \n" "psrlq $0x38,%%xmm5 \n" "1: \n" @@ -1722,7 +1722,7 @@ static void ScaleAddRows_SSE2(const uint8* src_ptr, int src_stride, uint16* dst_ptr, int src_width, int src_height) { int tmp_height = 0; intptr_t tmp_src = 0; - asm volatile ( + asm volatile( "pxor %%xmm4,%%xmm4 \n" "sub $0x1,%5 \n" "1: \n" @@ -2263,7 +2263,7 @@ extern "C" void ScaleFilterRows_SSSE3(uint8* dst_ptr, #elif defined(__x86_64__) static void ScaleRowDown8Int_SSE2(const uint8* src_ptr, int src_stride, uint8* dst_ptr, int dst_width) { - asm volatile ( + asm volatile( "lea (%3,%3,2),%%r10 \n" "pxor %%xmm7,%%xmm7 \n" "1:" @@ -2322,7 +2322,7 @@ static void ScaleRowDown8Int_SSE2(const uint8* src_ptr, int src_stride, #define HAS_SCALEROWDOWN34_SSSE3 static void ScaleRowDown34_SSSE3(const uint8* src_ptr, int src_stride, uint8* dst_ptr, int dst_width) { - asm volatile ( + asm volatile( "movdqa (%3),%%xmm3 \n" "movdqa (%4),%%xmm4 \n" "movdqa (%5),%%xmm5 \n" @@ -2353,7 +2353,7 @@ static void ScaleRowDown34_SSSE3(const uint8* src_ptr, int src_stride, static void ScaleRowDown34_1_Int_SSSE3(const uint8* src_ptr, int src_stride, uint8* dst_ptr, int dst_width) { - asm volatile ( + asm volatile( "movdqa (%4),%%xmm2 \n" // _shuf01 "movdqa (%5),%%xmm3 \n" // _shuf11 "movdqa (%6),%%xmm4 \n" // _shuf21 @@ -2410,7 +2410,7 @@ static void ScaleRowDown34_1_Int_SSSE3(const uint8* src_ptr, int src_stride, static void ScaleRowDown34_0_Int_SSSE3(const uint8* src_ptr, int src_stride, uint8* dst_ptr, int dst_width) { - asm volatile ( + asm volatile( "movdqa (%4),%%xmm2 \n" // _shuf01 "movdqa (%5),%%xmm3 \n" // _shuf11 "movdqa (%6),%%xmm4 \n" // _shuf21 @@ -2471,7 +2471,7 @@ static void ScaleRowDown34_0_Int_SSSE3(const uint8* src_ptr, int src_stride, #define HAS_SCALEROWDOWN38_SSSE3 static void ScaleRowDown38_SSSE3(const uint8* src_ptr, int src_stride, uint8* dst_ptr, int dst_width) { - asm volatile ( + asm volatile( "movdqa (%3),%%xmm4 \n" "movdqa (%4),%%xmm5 \n" "1:" @@ -2498,7 +2498,7 @@ static void ScaleRowDown38_SSSE3(const uint8* src_ptr, int src_stride, static void ScaleRowDown38_3_Int_SSSE3(const uint8* src_ptr, int src_stride, uint8* dst_ptr, int dst_width) { - asm volatile ( + asm volatile( "movdqa (%4),%%xmm4 \n" "movdqa (%5),%%xmm5 \n" "movdqa (%6),%%xmm6 \n" @@ -2555,7 +2555,7 @@ static void ScaleRowDown38_3_Int_SSSE3(const uint8* src_ptr, int src_stride, static void ScaleRowDown38_2_Int_SSSE3(const uint8* src_ptr, int src_stride, uint8* dst_ptr, int dst_width) { - asm volatile ( + asm volatile( "movdqa (%4),%%xmm4 \n" "movdqa (%5),%%xmm5 \n" "movdqa (%6),%%xmm6 \n" @@ -2597,7 +2597,7 @@ static void ScaleFilterRows_SSE2(uint8* dst_ptr, const uint8* src_ptr, int src_stride, int dst_width, int source_y_fraction) { if (source_y_fraction == 0) { - asm volatile ( + asm volatile( "1:" "movdqa (%1),%%xmm0 \n" "lea 0x10(%1),%1 \n" @@ -2615,7 +2615,7 @@ static void ScaleFilterRows_SSE2(uint8* dst_ptr, ); return; } else if (source_y_fraction == 128) { - asm volatile ( + asm volatile( "1:" "movdqa (%1),%%xmm0 \n" "movdqa (%1,%3,1),%%xmm2 \n" @@ -2635,7 +2635,7 @@ static void ScaleFilterRows_SSE2(uint8* dst_ptr, ); return; } else { - asm volatile ( + asm volatile( "mov %3,%%eax \n" "movd %%eax,%%xmm6 \n" "punpcklwd %%xmm6,%%xmm6 \n" @@ -2688,7 +2688,7 @@ static void ScaleFilterRows_SSSE3(uint8* dst_ptr, const uint8* src_ptr, int src_stride, int dst_width, int source_y_fraction) { if (source_y_fraction <= 1) { - asm volatile ( + asm volatile( "1:" "movdqa (%1),%%xmm0 \n" "lea 0x10(%1),%1 \n" @@ -2706,7 +2706,7 @@ static void ScaleFilterRows_SSSE3(uint8* dst_ptr, ); return; } else if (source_y_fraction == 128) { - asm volatile ( + asm volatile( "1:" "movdqa (%1),%%xmm0 \n" "movdqa (%1,%3,1),%%xmm2 \n" @@ -2726,7 +2726,7 @@ static void ScaleFilterRows_SSSE3(uint8* dst_ptr, ); return; } else { - asm volatile ( + asm volatile( "mov %3,%%eax \n" "shr %%eax \n" "mov %%al,%%ah \n" diff --git a/unit_test/compare_test.cc b/unit_test/compare_test.cc index 52730e249..4f341521f 100644 --- a/unit_test/compare_test.cc +++ b/unit_test/compare_test.cc @@ -8,12 +8,11 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "unit_test.h" - #include #include #include +#include "unit_test/unit_test.h" #include "libyuv/basic_types.h" #include "libyuv/compare.h" #include "libyuv/cpu_id.h" @@ -153,7 +152,7 @@ TEST_F(libyuvTest, BenchmarkPsnr_C) { _benchmark_width, _benchmark_height); c_time = (get_time() - c_time) / _benchmark_iterations; - printf ("BenchmarkPsnr_C - %8d us c\n", (int)(c_time*1e6)); + printf("BenchmarkPsnr_C - %8.2f us c\n", c_time * 1e6); MaskCpuFlags(-1); @@ -176,7 +175,7 @@ TEST_F(libyuvTest, BenchmarkPsnr_OPT) { _benchmark_width, _benchmark_height); opt_time = (get_time() - opt_time) / _benchmark_iterations; - printf ("BenchmarkPsnr_OPT - %8d us opt\n", (int)(opt_time*1e6)); + printf("BenchmarkPsnr_OPT - %8.2f us opt\n", opt_time * 1e6); EXPECT_EQ(0, 0); @@ -274,7 +273,7 @@ TEST_F(libyuvTest, BenchmarkSsim_C) { _benchmark_width, _benchmark_height); c_time = (get_time() - c_time) / _benchmark_iterations; - printf ("BenchmarkSsim_C - %8d us c\n", (int)(c_time*1e6)); + printf("BenchmarkSsim_C - %8.2f us c\n", c_time * 1e6); MaskCpuFlags(-1); @@ -297,7 +296,7 @@ TEST_F(libyuvTest, BenchmarkSsim_OPT) { _benchmark_width, _benchmark_height); opt_time = (get_time() - opt_time) / _benchmark_iterations; - printf ("BenchmarkPsnr_OPT - %8d us opt\n", (int)(opt_time*1e6)); + printf("BenchmarkPsnr_OPT - %8.2f us opt\n", opt_time * 1e6); EXPECT_EQ(0, 0);