From 735ace2ed3389d590e35060dfb17b331568f78fd Mon Sep 17 00:00:00 2001 From: Frank Barchard Date: Wed, 8 Nov 2017 15:41:07 -0800 Subject: [PATCH] Re-enable x86 assembly without requiring -msse2 clang does not require -msse2 or -msse for inline, except the "x" parameter. So change this to "m" for 32 bit. 64 bit requires sse2 so use "x" for 64 bit. gcc requires -msse for xmm registers in clobber list. Reduce compiler requirement from -msse2 to -msse for enabling assembly. Bug: libyuv:754, libyuv:757 Test: CC=clang CXX=clang++ CFLAGS="-m32" CXXFLAGS="-m32 -mno-sse -O2" make -f linux.mk Change-Id: I86df72cfee80b7d349561c1fd7c97ad360767255 Reviewed-on: https://chromium-review.googlesource.com/759303 Reviewed-by: richard winterton Reviewed-by: Frank Barchard Commit-Queue: Frank Barchard --- include/libyuv/compare_row.h | 2 +- include/libyuv/planar_functions.h | 2 +- include/libyuv/rotate_row.h | 2 +- include/libyuv/row.h | 4 ++-- include/libyuv/scale_row.h | 2 +- linux.mk | 2 +- source/row_gcc.cc | 18 ++++++++++++++++-- 7 files changed, 23 insertions(+), 9 deletions(-) diff --git a/include/libyuv/compare_row.h b/include/libyuv/compare_row.h index 76ed8ab8a..2e5ebe508 100644 --- a/include/libyuv/compare_row.h +++ b/include/libyuv/compare_row.h @@ -19,7 +19,7 @@ extern "C" { #endif #if defined(__pnacl__) || defined(__CLR_VER) || \ - (defined(__i386__) && !defined(__SSE2__)) + (defined(__i386__) && !defined(__SSE__) && !defined(__clang__)) #define LIBYUV_DISABLE_X86 #endif // MemorySanitizer does not support assembly code yet. http://crbug.com/344505 diff --git a/include/libyuv/planar_functions.h b/include/libyuv/planar_functions.h index d97965cb8..c91501a9c 100644 --- a/include/libyuv/planar_functions.h +++ b/include/libyuv/planar_functions.h @@ -746,7 +746,7 @@ int I420Interpolate(const uint8* src0_y, int interpolation); #if defined(__pnacl__) || defined(__CLR_VER) || \ - (defined(__i386__) && !defined(__SSE2__)) + (defined(__i386__) && !defined(__SSE__) && !defined(__clang__)) #define LIBYUV_DISABLE_X86 #endif // MemorySanitizer does not support assembly code yet. http://crbug.com/344505 diff --git a/include/libyuv/rotate_row.h b/include/libyuv/rotate_row.h index 60ac55ef9..973fc1528 100644 --- a/include/libyuv/rotate_row.h +++ b/include/libyuv/rotate_row.h @@ -19,7 +19,7 @@ extern "C" { #endif #if defined(__pnacl__) || defined(__CLR_VER) || \ - (defined(__i386__) && !defined(__SSE2__)) + (defined(__i386__) && !defined(__SSE__) && !defined(__clang__)) #define LIBYUV_DISABLE_X86 #endif // MemorySanitizer does not support assembly code yet. http://crbug.com/344505 diff --git a/include/libyuv/row.h b/include/libyuv/row.h index 9a9b9580b..7332bdf79 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -31,7 +31,7 @@ extern "C" { var = 0 #if defined(__pnacl__) || defined(__CLR_VER) || \ - (defined(__i386__) && !defined(__SSE2__)) + (defined(__i386__) && !defined(__SSE__) && !defined(__clang__)) #define LIBYUV_DISABLE_X86 #endif // MemorySanitizer does not support assembly code yet. http://crbug.com/344505 @@ -279,7 +279,7 @@ extern "C" { (defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2)) #define HAS_MERGEUV10ROW_AVX2 #endif - + // The following are available on Neon platforms: #if !defined(LIBYUV_DISABLE_NEON) && \ (defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON)) diff --git a/include/libyuv/scale_row.h b/include/libyuv/scale_row.h index 534898cdd..c4a66aa07 100644 --- a/include/libyuv/scale_row.h +++ b/include/libyuv/scale_row.h @@ -20,7 +20,7 @@ extern "C" { #endif #if defined(__pnacl__) || defined(__CLR_VER) || \ - (defined(__i386__) && !defined(__SSE2__)) + (defined(__i386__) && !defined(__SSE__) && !defined(__clang__)) #define LIBYUV_DISABLE_X86 #endif // MemorySanitizer does not support assembly code yet. http://crbug.com/344505 diff --git a/linux.mk b/linux.mk index 1dd527c75..7e9aa5e4e 100644 --- a/linux.mk +++ b/linux.mk @@ -80,4 +80,4 @@ cpuid: util/cpuid.c libyuv.a $(CC) $(CFLAGS) -o $@ util/cpuid.c libyuv.a clean: - /bin/rm -f source/*.o *.ii *.s libyuv.a convert cpuid psnr + /bin/rm -f source/*.o *.ii *.s libyuv.a yuvconvert cpuid psnr diff --git a/source/row_gcc.cc b/source/row_gcc.cc index 3af320454..c2874e159 100644 --- a/source/row_gcc.cc +++ b/source/row_gcc.cc @@ -5687,6 +5687,7 @@ void ARGBPolynomialRow_AVX2(const uint8* src_argb, #ifdef HAS_HALFFLOATROW_SSE2 static float kScaleBias = 1.9259299444e-34f; void HalfFloatRow_SSE2(const uint16* src, uint16* dst, float scale, int width) { + scale *= kScaleBias; asm volatile ( "pshufd $0x0,%3,%%xmm4 \n" "pxor %%xmm5,%%xmm5 \n" @@ -5713,7 +5714,11 @@ void HalfFloatRow_SSE2(const uint16* src, uint16* dst, float scale, int width) { : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 - : "x"(scale * kScaleBias) // %3 +#if defined(__x86_64__) + : "x"(scale) // %3 +#else + : "m"(scale) // %3 +#endif : "memory", "cc", "xmm2", "xmm3", "xmm4", "xmm5" ); @@ -5722,6 +5727,7 @@ void HalfFloatRow_SSE2(const uint16* src, uint16* dst, float scale, int width) { #ifdef HAS_HALFFLOATROW_AVX2 void HalfFloatRow_AVX2(const uint16* src, uint16* dst, float scale, int width) { + scale *= kScaleBias; asm volatile ( "vbroadcastss %3, %%ymm4 \n" "vpxor %%ymm5,%%ymm5,%%ymm5 \n" @@ -5749,7 +5755,11 @@ void HalfFloatRow_AVX2(const uint16* src, uint16* dst, float scale, int width) { : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 - : "x"(scale * kScaleBias) // %3 +#if defined(__x86_64__) + : "x"(scale) // %3 +#else + : "m"(scale) // %3 +#endif : "memory", "cc", "xmm2", "xmm3", "xmm4", "xmm5" ); @@ -5782,7 +5792,11 @@ void HalfFloatRow_F16C(const uint16* src, uint16* dst, float scale, int width) { : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 +#if defined(__x86_64__) : "x"(scale) // %3 +#else + : "m"(scale) // %3 +#endif : "memory", "cc", "xmm2", "xmm3", "xmm4" );