From d28cd77f99e623ce2b9493f3b197408503f9ee88 Mon Sep 17 00:00:00 2001 From: "fbarchard@google.com" Date: Thu, 19 Mar 2015 20:36:31 +0000 Subject: [PATCH] Enable assembly for clangcl build on Windows. Previously assembly was disabled so clangcl would work, but only with C code. As clangcl mimics both Visual C and GCC, ifdefs need to pick one or the other or often you'll end up with both. In this CL we disable most Visual C code and use the GCC versions which allow assembly for both 32 and 64 bit intel. BUG=412 TESTED=clang=1 build on windows R=tpsiaki@google.com Review URL: https://webrtc-codereview.appspot.com/51389004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@1341 16f28f9a-4ce2-e073-06de-1de4eb20be90 --- README.chromium | 2 +- include/libyuv/planar_functions.h | 2 +- include/libyuv/row.h | 15 +++++++-------- include/libyuv/version.h | 2 +- source/compare.cc | 6 +++--- source/compare_win.cc | 5 +++-- source/cpu_id.cc | 17 ++++++++--------- source/planar_functions.cc | 4 ++-- source/rotate.cc | 5 +++-- source/row_common.cc | 3 ++- source/row_win.cc | 9 +++++---- source/scale_win.cc | 5 +++-- unit_test/convert_test.cc | 6 ------ unit_test/planar_test.cc | 6 ------ 14 files changed, 39 insertions(+), 48 deletions(-) diff --git a/README.chromium b/README.chromium index 08892be3e..1ac598798 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1337 +Version: 1341 License: BSD License File: LICENSE diff --git a/include/libyuv/planar_functions.h b/include/libyuv/planar_functions.h index 7ca5e9820..270e76f3a 100644 --- a/include/libyuv/planar_functions.h +++ b/include/libyuv/planar_functions.h @@ -393,7 +393,7 @@ int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0, #if defined(__pnacl__) || defined(__CLR_VER) || defined(COVERAGE_ENABLED) || \ defined(TARGET_IPHONE_SIMULATOR) || \ (defined(__i386__) && !defined(__SSE2__)) || \ - (defined(_MSC_VER) && defined(__clang__)) + ((defined(_MSC_VER) && !defined(__clang__)) && defined(__clang__)) #define LIBYUV_DISABLE_X86 #endif diff --git a/include/libyuv/row.h b/include/libyuv/row.h index be21e07fb..560aeca80 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -39,8 +39,7 @@ extern "C" { #if defined(__pnacl__) || defined(__CLR_VER) || defined(COVERAGE_ENABLED) || \ defined(TARGET_IPHONE_SIMULATOR) || \ - (defined(__i386__) && !defined(__SSE2__)) || \ - (defined(_MSC_VER) && defined(__clang__)) + (defined(__i386__) && !defined(__SSE2__)) #define LIBYUV_DISABLE_X86 #endif // True if compiling for SSSE3 as a requirement. @@ -158,7 +157,7 @@ extern "C" { #endif // The following are available on x64 Visual C: -#if !defined(LIBYUV_DISABLE_X86) && defined (_M_X64) +#if !defined(LIBYUV_DISABLE_X86) && defined (_M_X64) && !defined(__clang__) #define HAS_I422TOARGBROW_SSSE3 #endif @@ -177,15 +176,16 @@ extern "C" { #endif // __clang__ // Visual C 2012 required for AVX2. -#if defined(_M_IX86) && defined(_MSC_VER) && _MSC_VER >= 1700 +#if defined(_M_IX86) && !defined(__clang__) && \ + defined(_MSC_VER) && _MSC_VER >= 1700 #define VISUALC_HAS_AVX2 1 #endif // VisualStudio >= 2012 // The following are available require VS2012. Port to GCC. #if !defined(LIBYUV_DISABLE_X86) && defined(VISUALC_HAS_AVX2) // TODO(fbarchard): fix AVX2 versions of YUV conversion. bug=393 -#define HAS_I422TOABGRROW_AVX2 #define HAS_I422TOARGBROW_AVX2 +#define HAS_I422TOABGRROW_AVX2 #define HAS_I422TOBGRAROW_AVX2 #define HAS_I422TORGBAROW_AVX2 #define HAS_NV12TOARGBROW_AVX2 @@ -382,7 +382,7 @@ extern "C" { #endif #endif -#if defined(_MSC_VER) && !defined(__CLR_VER) +#if defined(_MSC_VER) && !defined(__clang__) && !defined(__CLR_VER) #define SIMD_ALIGNED(var) __declspec(align(16)) var #define SIMD_ALIGNED32(var) __declspec(align(64)) var typedef __declspec(align(16)) int16 vec16[8]; @@ -397,8 +397,7 @@ typedef __declspec(align(32)) int8 lvec8[32]; typedef __declspec(align(32)) uint16 ulvec16[16]; typedef __declspec(align(32)) uint32 ulvec32[8]; typedef __declspec(align(32)) uint8 ulvec8[32]; - -#elif defined(__GNUC__) +#elif defined(__GNUC__) || defined(__clang__) // Caveat GCC 4.2 to 4.7 have a known issue using vectors with const. #define SIMD_ALIGNED(var) var __attribute__((aligned(16))) #define SIMD_ALIGNED32(var) var __attribute__((aligned(64))) diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 7ce1c4134..fdffe3cc8 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1337 +#define LIBYUV_VERSION 1341 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/compare.cc b/source/compare.cc index f84a08ee6..46aa8473d 100644 --- a/source/compare.cc +++ b/source/compare.cc @@ -37,7 +37,7 @@ uint32 HashDjb2_C(const uint8* src, int count, uint32 seed); #define HAS_HASHDJB2_SSE41 uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed); -#if _MSC_VER >= 1700 +#ifdef VISUALC_HAS_AVX2 #define HAS_HASHDJB2_AVX2 uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed); #endif @@ -138,8 +138,8 @@ uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count); #define HAS_SUMSQUAREERROR_SSE2 uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count); #endif -// Visual C 2012 required for AVX2. -#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && _MSC_VER >= 1700 + +#ifdef VISUALC_HAS_AVX2 #define HAS_SUMSQUAREERROR_AVX2 uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count); #endif diff --git a/source/compare_win.cc b/source/compare_win.cc index e99009a21..0395e6565 100644 --- a/source/compare_win.cc +++ b/source/compare_win.cc @@ -16,7 +16,8 @@ namespace libyuv { extern "C" { #endif -#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) +#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \ + defined(_MSC_VER) && !defined(__clang__) __declspec(naked) __declspec(align(16)) uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) { @@ -220,7 +221,7 @@ uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) { } #endif // _MSC_VER >= 1700 -#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) +#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) ... #ifdef __cplusplus } // extern "C" diff --git a/source/cpu_id.cc b/source/cpu_id.cc index d4fc7cd48..8a10b0083 100644 --- a/source/cpu_id.cc +++ b/source/cpu_id.cc @@ -10,13 +10,12 @@ #include "libyuv/cpu_id.h" -#if defined(_MSC_VER) && !defined(__clang__) +#if (defined(_MSC_VER) && !defined(__clang__)) && !defined(__clang__) #include // For __cpuidex() #endif #if !defined(__pnacl__) && !defined(__CLR_VER) && \ - !defined(__native_client__) && \ - defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219) && \ - (defined(_M_IX86) || defined(_M_X64)) + !defined(__native_client__) && (defined(_M_IX86) || defined(_M_X64)) && \ + defined(_MSC_VER) && !defined(__clang__) && (_MSC_FULL_VER >= 160040219) #include // For _xgetbv() #endif @@ -37,7 +36,7 @@ extern "C" { // For functions that use the stack and have runtime checks for overflow, // use SAFEBUFFERS to avoid additional check. -#if defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219) +#if (defined(_MSC_VER) && !defined(__clang__)) && (_MSC_FULL_VER >= 160040219) #define SAFEBUFFERS __declspec(safebuffers) #else #define SAFEBUFFERS @@ -49,7 +48,7 @@ extern "C" { !defined(__pnacl__) && !defined(__CLR_VER) LIBYUV_API void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) { -#if defined(_MSC_VER) && !defined(__clang__) +#if (defined(_MSC_VER) && !defined(__clang__)) && !defined(__clang__) // Visual C version uses intrinsic or inline x86 assembly. #if (_MSC_FULL_VER >= 160040219) __cpuidex((int*)(cpu_info), info_eax, info_ecx); @@ -72,7 +71,7 @@ void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) { } #endif // GCC version uses inline x86 assembly. -#else // defined(_MSC_VER) && !defined(__clang__) +#else // (defined(_MSC_VER) && !defined(__clang__)) && !defined(__clang__) uint32 info_ebx, info_edx; asm volatile ( // NOLINT #if defined( __i386__) && defined(__PIC__) @@ -90,7 +89,7 @@ void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) { cpu_info[1] = info_ebx; cpu_info[2] = info_ecx; cpu_info[3] = info_edx; -#endif // defined(_MSC_VER) && !defined(__clang__) +#endif // (defined(_MSC_VER) && !defined(__clang__)) && !defined(__clang__) } #else // (defined(_M_IX86) || defined(_M_X64) ... LIBYUV_API @@ -107,7 +106,7 @@ void CpuId(uint32 eax, uint32 ecx, uint32* cpu_info) { // X86 CPUs have xgetbv to detect OS saves high parts of ymm registers. int TestOsSaveYmm() { uint32 xcr0 = 0u; -#if defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219) +#if (defined(_MSC_VER) && !defined(__clang__)) && (_MSC_FULL_VER >= 160040219) xcr0 = (uint32)(_xgetbv(0)); // VS2010 SP1 required. #elif defined(_M_IX86) && defined(_MSC_VER) && !defined(__clang__) __asm { diff --git a/source/planar_functions.cc b/source/planar_functions.cc index 75ef775dd..3c7f761a4 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -677,12 +677,12 @@ int ARGBAdd(const uint8* src_argb0, int src_stride_argb0, height = 1; src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0; } -#if defined(HAS_ARGBADDROW_SSE2) && defined(_MSC_VER) +#if defined(HAS_ARGBADDROW_SSE2) && (defined(_MSC_VER) && !defined(__clang__)) if (TestCpuFlag(kCpuHasSSE2)) { ARGBAddRow = ARGBAddRow_SSE2; } #endif -#if defined(HAS_ARGBADDROW_SSE2) && !defined(_MSC_VER) +#if defined(HAS_ARGBADDROW_SSE2) && !(defined(_MSC_VER) && !defined(__clang__)) if (TestCpuFlag(kCpuHasSSE2)) { ARGBAddRow = ARGBAddRow_Any_SSE2; if (IS_ALIGNED(width, 4)) { diff --git a/source/rotate.cc b/source/rotate.cc index 5acaccfd8..bfab546a3 100644 --- a/source/rotate.cc +++ b/source/rotate.cc @@ -20,6 +20,7 @@ namespace libyuv { extern "C" { #endif +// TODO(fbarchard): switch to standard form of inline; fails on clangcl. #if !defined(LIBYUV_DISABLE_X86) && \ (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) #if defined(__APPLE__) && defined(__i386__) @@ -70,7 +71,7 @@ void TransposeUVWx8_MIPS_DSPR2(const uint8* src, int src_stride, #endif // defined(__mips__) #if !defined(LIBYUV_DISABLE_X86) && \ - defined(_M_IX86) && defined(_MSC_VER) + defined(_M_IX86) && defined(_MSC_VER) && !defined(__clang__) #define HAS_TRANSPOSE_WX8_SSSE3 __declspec(naked) __declspec(align(16)) static void TransposeWx8_SSSE3(const uint8* src, int src_stride, @@ -381,7 +382,7 @@ static void TransposeWx8_SSSE3(const uint8* src, int src_stride, ); } -#if !defined(LIBYUV_DISABLE_X86) && defined(__i386__) +#if !defined(LIBYUV_DISABLE_X86) && defined(__i386__) && !defined(__clang__) #define HAS_TRANSPOSE_UVWX8_SSE2 void TransposeUVWx8_SSE2(const uint8* src, int src_stride, uint8* dst_a, int dst_stride_a, diff --git a/source/row_common.cc b/source/row_common.cc index 669dc66f4..c1266d449 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -2175,7 +2175,8 @@ void I422ToUYVYRow_C(const uint8* src_y, // Maximum temporary width for wrappers to process at a time, in pixels. #define MAXTWIDTH 2048 -#if !defined(_MSC_VER) && defined(HAS_I422TORGB565ROW_SSSE3) +#if !(defined(_MSC_VER) && !defined(__clang__)) && \ + defined(HAS_I422TORGB565ROW_SSSE3) // row_win.cc has asm version, but GCC uses 2 step wrapper. void I422ToRGB565Row_SSSE3(const uint8* src_y, const uint8* src_u, diff --git a/source/row_win.cc b/source/row_win.cc index 4676aaec4..85c4dd20f 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -10,7 +10,8 @@ #include "libyuv/row.h" -#if defined (_M_X64) && !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) +#if !defined(LIBYUV_DISABLE_X86) && defined(_M_X64) && \ + defined(_MSC_VER) && !defined(__clang__) #include #include // For _mm_maddubs_epi16 #endif @@ -21,8 +22,8 @@ extern "C" { #endif // This module is for Visual C. -#if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) && \ - (defined(_M_IX86) || defined(_M_X64)) +#if !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64)) && \ + defined(_MSC_VER) && !defined(__clang__) struct YuvConstants { lvec8 kUVToB; // 0 @@ -6273,7 +6274,7 @@ void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb, #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 #endif // defined(_M_X64) -#endif // !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) +#endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) ... #ifdef __cplusplus } // extern "C" diff --git a/source/scale_win.cc b/source/scale_win.cc index e0209cdec..77529ffe5 100644 --- a/source/scale_win.cc +++ b/source/scale_win.cc @@ -16,7 +16,8 @@ extern "C" { #endif // This module is for Visual C x86. -#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) +#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \ + defined(_MSC_VER) && !defined(__clang__) // Offsets for source bytes 0 to 9 static uvec8 kShuf0 = @@ -1170,7 +1171,7 @@ int FixedDiv1_X86(int num, int div) { } } -#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) +#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && ... #ifdef __cplusplus } // extern "C" diff --git a/unit_test/convert_test.cc b/unit_test/convert_test.cc index 6d768d48a..506ad39a6 100644 --- a/unit_test/convert_test.cc +++ b/unit_test/convert_test.cc @@ -26,12 +26,6 @@ #include "libyuv/video_common.h" #include "../unit_test/unit_test.h" -#if defined(_MSC_VER) -#define SIMD_ALIGNED(var) __declspec(align(16)) var -#else // __GNUC__ -#define SIMD_ALIGNED(var) var __attribute__((aligned(16))) -#endif - namespace libyuv { #define SUBSAMPLE(v, a) ((((v) + (a) - 1)) / (a)) diff --git a/unit_test/planar_test.cc b/unit_test/planar_test.cc index e1822f91a..33d40df78 100644 --- a/unit_test/planar_test.cc +++ b/unit_test/planar_test.cc @@ -22,12 +22,6 @@ #include "libyuv/row.h" // For Sobel #include "../unit_test/unit_test.h" -#if defined(_MSC_VER) -#define SIMD_ALIGNED(var) __declspec(align(16)) var -#else // __GNUC__ -#define SIMD_ALIGNED(var) var __attribute__((aligned(16))) -#endif - namespace libyuv { TEST_F(libyuvTest, TestAttenuate) {