mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 17:26:49 +08:00
Re-enable x86 assembly without requiring -msse2
clang does not require -msse2 or -msse for inline, except the "x" parameter. So change this to "m" for 32 bit. 64 bit requires sse2 so use "x" for 64 bit. gcc requires -msse for xmm registers in clobber list. Reduce compiler requirement from -msse2 to -msse for enabling assembly. Bug: libyuv:754, libyuv:757 Test: CC=clang CXX=clang++ CFLAGS="-m32" CXXFLAGS="-m32 -mno-sse -O2" make -f linux.mk Change-Id: I86df72cfee80b7d349561c1fd7c97ad360767255 Reviewed-on: https://chromium-review.googlesource.com/759303 Reviewed-by: richard winterton <rrwinterton@gmail.com> Reviewed-by: Frank Barchard <fbarchard@google.com> Commit-Queue: Frank Barchard <fbarchard@google.com>
This commit is contained in:
parent
68f852d835
commit
735ace2ed3
@ -19,7 +19,7 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
#if defined(__pnacl__) || defined(__CLR_VER) || \
|
||||
(defined(__i386__) && !defined(__SSE2__))
|
||||
(defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
|
||||
#define LIBYUV_DISABLE_X86
|
||||
#endif
|
||||
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
|
||||
|
||||
@ -746,7 +746,7 @@ int I420Interpolate(const uint8* src0_y,
|
||||
int interpolation);
|
||||
|
||||
#if defined(__pnacl__) || defined(__CLR_VER) || \
|
||||
(defined(__i386__) && !defined(__SSE2__))
|
||||
(defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
|
||||
#define LIBYUV_DISABLE_X86
|
||||
#endif
|
||||
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
|
||||
|
||||
@ -19,7 +19,7 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
#if defined(__pnacl__) || defined(__CLR_VER) || \
|
||||
(defined(__i386__) && !defined(__SSE2__))
|
||||
(defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
|
||||
#define LIBYUV_DISABLE_X86
|
||||
#endif
|
||||
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
|
||||
|
||||
@ -31,7 +31,7 @@ extern "C" {
|
||||
var = 0
|
||||
|
||||
#if defined(__pnacl__) || defined(__CLR_VER) || \
|
||||
(defined(__i386__) && !defined(__SSE2__))
|
||||
(defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
|
||||
#define LIBYUV_DISABLE_X86
|
||||
#endif
|
||||
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
|
||||
@ -279,7 +279,7 @@ extern "C" {
|
||||
(defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
|
||||
#define HAS_MERGEUV10ROW_AVX2
|
||||
#endif
|
||||
|
||||
|
||||
// The following are available on Neon platforms:
|
||||
#if !defined(LIBYUV_DISABLE_NEON) && \
|
||||
(defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON))
|
||||
|
||||
@ -20,7 +20,7 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
#if defined(__pnacl__) || defined(__CLR_VER) || \
|
||||
(defined(__i386__) && !defined(__SSE2__))
|
||||
(defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
|
||||
#define LIBYUV_DISABLE_X86
|
||||
#endif
|
||||
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
|
||||
|
||||
2
linux.mk
2
linux.mk
@ -80,4 +80,4 @@ cpuid: util/cpuid.c libyuv.a
|
||||
$(CC) $(CFLAGS) -o $@ util/cpuid.c libyuv.a
|
||||
|
||||
clean:
|
||||
/bin/rm -f source/*.o *.ii *.s libyuv.a convert cpuid psnr
|
||||
/bin/rm -f source/*.o *.ii *.s libyuv.a yuvconvert cpuid psnr
|
||||
|
||||
@ -5687,6 +5687,7 @@ void ARGBPolynomialRow_AVX2(const uint8* src_argb,
|
||||
#ifdef HAS_HALFFLOATROW_SSE2
|
||||
static float kScaleBias = 1.9259299444e-34f;
|
||||
void HalfFloatRow_SSE2(const uint16* src, uint16* dst, float scale, int width) {
|
||||
scale *= kScaleBias;
|
||||
asm volatile (
|
||||
"pshufd $0x0,%3,%%xmm4 \n"
|
||||
"pxor %%xmm5,%%xmm5 \n"
|
||||
@ -5713,7 +5714,11 @@ void HalfFloatRow_SSE2(const uint16* src, uint16* dst, float scale, int width) {
|
||||
: "+r"(src), // %0
|
||||
"+r"(dst), // %1
|
||||
"+r"(width) // %2
|
||||
: "x"(scale * kScaleBias) // %3
|
||||
#if defined(__x86_64__)
|
||||
: "x"(scale) // %3
|
||||
#else
|
||||
: "m"(scale) // %3
|
||||
#endif
|
||||
: "memory", "cc",
|
||||
"xmm2", "xmm3", "xmm4", "xmm5"
|
||||
);
|
||||
@ -5722,6 +5727,7 @@ void HalfFloatRow_SSE2(const uint16* src, uint16* dst, float scale, int width) {
|
||||
|
||||
#ifdef HAS_HALFFLOATROW_AVX2
|
||||
void HalfFloatRow_AVX2(const uint16* src, uint16* dst, float scale, int width) {
|
||||
scale *= kScaleBias;
|
||||
asm volatile (
|
||||
"vbroadcastss %3, %%ymm4 \n"
|
||||
"vpxor %%ymm5,%%ymm5,%%ymm5 \n"
|
||||
@ -5749,7 +5755,11 @@ void HalfFloatRow_AVX2(const uint16* src, uint16* dst, float scale, int width) {
|
||||
: "+r"(src), // %0
|
||||
"+r"(dst), // %1
|
||||
"+r"(width) // %2
|
||||
: "x"(scale * kScaleBias) // %3
|
||||
#if defined(__x86_64__)
|
||||
: "x"(scale) // %3
|
||||
#else
|
||||
: "m"(scale) // %3
|
||||
#endif
|
||||
: "memory", "cc",
|
||||
"xmm2", "xmm3", "xmm4", "xmm5"
|
||||
);
|
||||
@ -5782,7 +5792,11 @@ void HalfFloatRow_F16C(const uint16* src, uint16* dst, float scale, int width) {
|
||||
: "+r"(src), // %0
|
||||
"+r"(dst), // %1
|
||||
"+r"(width) // %2
|
||||
#if defined(__x86_64__)
|
||||
: "x"(scale) // %3
|
||||
#else
|
||||
: "m"(scale) // %3
|
||||
#endif
|
||||
: "memory", "cc",
|
||||
"xmm2", "xmm3", "xmm4"
|
||||
);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user