From a2142148e9875eb35871789b8d52bbeffc4b30a8 Mon Sep 17 00:00:00 2001 From: Frank Barchard Date: Tue, 9 Jan 2018 16:39:24 -0800 Subject: [PATCH] Remove x64 native_client macros. Bug: libyuv:702 Test: try bots pass Change-Id: I76d74b5f02fe9843418108b84742e2f714d1ab0a Reviewed-on: https://chromium-review.googlesource.com/855656 Reviewed-by: Frank Barchard --- include/libyuv/compare_row.h | 7 +++- include/libyuv/planar_functions.h | 35 +++++++++------- include/libyuv/rotate_row.h | 19 +++++---- include/libyuv/row.h | 70 ++++++++----------------------- include/libyuv/scale_row.h | 9 ++-- source/rotate_argb.cc | 8 ++-- source/row_gcc.cc | 24 +++++------ 7 files changed, 73 insertions(+), 99 deletions(-) diff --git a/include/libyuv/compare_row.h b/include/libyuv/compare_row.h index 2e5ebe508..0cde1d42c 100644 --- a/include/libyuv/compare_row.h +++ b/include/libyuv/compare_row.h @@ -18,17 +18,20 @@ namespace libyuv { extern "C" { #endif -#if defined(__pnacl__) || defined(__CLR_VER) || \ +#if defined(__pnacl__) || defined(__CLR_VER) || \ + (defined(__native_client__) && defined(__x86_64__)) || \ (defined(__i386__) && !defined(__SSE__) && !defined(__clang__)) #define LIBYUV_DISABLE_X86 #endif +#if defined(__native_client__) +#define LIBYUV_DISABLE_NEON +#endif // MemorySanitizer does not support assembly code yet. http://crbug.com/344505 #if defined(__has_feature) #if __has_feature(memory_sanitizer) #define LIBYUV_DISABLE_X86 #endif #endif - // Visual C 2012 required for AVX2. #if defined(_M_IX86) && !defined(__clang__) && defined(_MSC_VER) && \ _MSC_VER >= 1700 diff --git a/include/libyuv/planar_functions.h b/include/libyuv/planar_functions.h index 653b06197..05c9f996d 100644 --- a/include/libyuv/planar_functions.h +++ b/include/libyuv/planar_functions.h @@ -22,6 +22,24 @@ namespace libyuv { extern "C" { #endif +// TODO(fbarchard): Move cpu macros to row.h +#if defined(__pnacl__) || defined(__CLR_VER) || \ + (defined(__native_client__) && defined(__x86_64__)) || \ + (defined(__i386__) && !defined(__SSE__) && !defined(__clang__)) +#define LIBYUV_DISABLE_X86 +#endif +// MemorySanitizer does not support assembly code yet. http://crbug.com/344505 +#if defined(__has_feature) +#if __has_feature(memory_sanitizer) +#define LIBYUV_DISABLE_X86 +#endif +#endif +// The following are available on all x86 platforms: +#if !defined(LIBYUV_DISABLE_X86) && \ + (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) +#define HAS_ARGBAFFINEROW_SSE2 +#endif + // Copy a plane of data. LIBYUV_API void CopyPlane(const uint8* src_y, @@ -763,22 +781,6 @@ int I420Interpolate(const uint8* src0_y, int height, int interpolation); -#if defined(__pnacl__) || defined(__CLR_VER) || \ - (defined(__i386__) && !defined(__SSE__) && !defined(__clang__)) -#define LIBYUV_DISABLE_X86 -#endif -// MemorySanitizer does not support assembly code yet. http://crbug.com/344505 -#if defined(__has_feature) -#if __has_feature(memory_sanitizer) -#define LIBYUV_DISABLE_X86 -#endif -#endif -// The following are available on all x86 platforms: -#if !defined(LIBYUV_DISABLE_X86) && \ - (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) -#define HAS_ARGBAFFINEROW_SSE2 -#endif - // Row function for copying pixels from a source with a slope to a row // of destination. Useful for scaling, rotation, mirror, texture mapping. LIBYUV_API @@ -787,6 +789,7 @@ void ARGBAffineRow_C(const uint8* src_argb, uint8* dst_argb, const float* uv_dudv, int width); +// TODO(fbarchard): Move ARGBAffineRow_SSE2 to row.h LIBYUV_API void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride, diff --git a/include/libyuv/rotate_row.h b/include/libyuv/rotate_row.h index 7e9dfd2cf..5d1d887e3 100644 --- a/include/libyuv/rotate_row.h +++ b/include/libyuv/rotate_row.h @@ -18,10 +18,14 @@ namespace libyuv { extern "C" { #endif -#if defined(__pnacl__) || defined(__CLR_VER) || \ +#if defined(__pnacl__) || defined(__CLR_VER) || \ + (defined(__native_client__) && defined(__x86_64__)) || \ (defined(__i386__) && !defined(__SSE__) && !defined(__clang__)) #define LIBYUV_DISABLE_X86 #endif +#if defined(__native_client__) +#define LIBYUV_DISABLE_NEON +#endif // MemorySanitizer does not support assembly code yet. http://crbug.com/344505 #if defined(__has_feature) #if __has_feature(memory_sanitizer) @@ -34,21 +38,18 @@ extern "C" { #define HAS_TRANSPOSEUVWX8_SSE2 #endif -// The following are available for GCC 32 or 64 bit but not NaCL for 64 bit: -#if !defined(LIBYUV_DISABLE_X86) && \ - (defined(__i386__) || \ - (defined(__x86_64__) && !defined(__native_client__))) +// The following are available for GCC 32 or 64 bit: +#if !defined(LIBYUV_DISABLE_X86) && (defined(__i386__) || defined(__x86_64__)) #define HAS_TRANSPOSEWX8_SSSE3 #endif -// The following are available for 64 bit GCC but not NaCL: -#if !defined(LIBYUV_DISABLE_X86) && !defined(__native_client__) && \ - defined(__x86_64__) +// The following are available for 64 bit GCC: +#if !defined(LIBYUV_DISABLE_X86) && defined(__x86_64__) #define HAS_TRANSPOSEWX8_FAST_SSSE3 #define HAS_TRANSPOSEUVWX8_SSE2 #endif -#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \ +#if !defined(LIBYUV_DISABLE_NEON) && \ (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__)) #define HAS_TRANSPOSEWX8_NEON #define HAS_TRANSPOSEUVWX8_NEON diff --git a/include/libyuv/row.h b/include/libyuv/row.h index a93658f92..e272a8faf 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -20,29 +20,20 @@ namespace libyuv { extern "C" { #endif -#define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a)-1))) - -#define align_buffer_64(var, size) \ - uint8* var##_mem = (uint8*)(malloc((size) + 63)); /* NOLINT */ \ - uint8* var = (uint8*)(((intptr_t)(var##_mem) + 63) & ~63) /* NOLINT */ - -#define free_aligned_buffer_64(var) \ - free(var##_mem); \ - var = 0 - -#if defined(__pnacl__) || defined(__CLR_VER) || \ +#if defined(__pnacl__) || defined(__CLR_VER) || \ + (defined(__native_client__) && defined(__x86_64__)) || \ (defined(__i386__) && !defined(__SSE__) && !defined(__clang__)) #define LIBYUV_DISABLE_X86 #endif +#if defined(__native_client__) +#define LIBYUV_DISABLE_NEON +#endif // MemorySanitizer does not support assembly code yet. http://crbug.com/344505 #if defined(__has_feature) #if __has_feature(memory_sanitizer) -// define LIBYUV_DISABLE_X86 +#define LIBYUV_DISABLE_X86 #endif #endif -#if defined(__native_client__) -#define LIBYUV_DISABLE_NEON -#endif // clang >= 3.5.0 required for Arm64. #if defined(__clang__) && defined(__aarch64__) && !defined(LIBYUV_DISABLE_NEON) #if (__clang_major__ < 3) || (__clang_major__ == 3 && (__clang_minor__ < 5)) @@ -561,6 +552,16 @@ extern const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants); // BT.601 extern const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants); // JPeg extern const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants); // BT.709 +#define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a)-1))) + +#define align_buffer_64(var, size) \ + uint8* var##_mem = (uint8*)(malloc((size) + 63)); /* NOLINT */ \ + uint8* var = (uint8*)(((intptr_t)(var##_mem) + 63) & ~63) /* NOLINT */ + +#define free_aligned_buffer_64(var) \ + free(var##_mem); \ + var = 0 + #if defined(__APPLE__) || defined(__x86_64__) || defined(__llvm__) #define OMITFP #else @@ -573,48 +574,14 @@ extern const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants); // BT.709 #else #define LABELALIGN #endif -#if defined(__native_client__) && defined(__x86_64__) -// r14 is used for MEMOP macros. -#define BUNDLELOCK ".bundle_lock\n" -#define BUNDLEUNLOCK ".bundle_unlock\n" -#define MEMACCESS(base) "%%nacl:(%%r15,%q" #base ")" -#define MEMACCESS2(offset, base) "%%nacl:" #offset "(%%r15,%q" #base ")" -#define MEMLEA(offset, base) #offset "(%q" #base ")" -#define MEMLEA3(offset, index, scale) #offset "(,%q" #index "," #scale ")" -#define MEMLEA4(offset, base, index, scale) \ - #offset "(%q" #base ",%q" #index "," #scale ")" -#define MEMMOVESTRING(s, d) "%%nacl:(%q" #s "),%%nacl:(%q" #d "), %%r15" -#define MEMSTORESTRING(reg, d) "%%" #reg ",%%nacl:(%q" #d "), %%r15" -#define MEMOPREG(opcode, offset, base, index, scale, reg) \ - BUNDLELOCK \ - "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" #opcode \ - " (%%r15,%%r14),%%" #reg "\n" BUNDLEUNLOCK -#define MEMOPMEM(opcode, reg, offset, base, index, scale) \ - BUNDLELOCK \ - "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" #opcode \ - " %%" #reg ",(%%r15,%%r14)\n" BUNDLEUNLOCK -#define MEMOPARG(opcode, offset, base, index, scale, arg) \ - BUNDLELOCK \ - "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" #opcode \ - " (%%r15,%%r14),%" #arg "\n" BUNDLEUNLOCK -#define VMEMOPREG(opcode, offset, base, index, scale, reg1, reg2) \ - BUNDLELOCK \ - "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" #opcode \ - " (%%r15,%%r14),%%" #reg1 ",%%" #reg2 "\n" BUNDLEUNLOCK -#define VEXTOPMEM(op, sel, reg, offset, base, index, scale) \ - BUNDLELOCK \ - "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" #op \ - " $" #sel ",%%" #reg ",(%%r15,%%r14)\n" BUNDLEUNLOCK -#else // defined(__native_client__) && defined(__x86_64__) -#define BUNDLEALIGN + +// NaCL macros for GCC x64 - deprecated. #define MEMACCESS(base) "(%" #base ")" #define MEMACCESS2(offset, base) #offset "(%" #base ")" #define MEMLEA(offset, base) #offset "(%" #base ")" #define MEMLEA3(offset, index, scale) #offset "(,%" #index "," #scale ")" #define MEMLEA4(offset, base, index, scale) \ #offset "(%" #base ",%" #index "," #scale ")" -#define MEMMOVESTRING(s, d) -#define MEMSTORESTRING(reg, d) #define MEMOPREG(opcode, offset, base, index, scale, reg) \ #opcode " " #offset "(%" #base ",%" #index "," #scale "),%%" #reg "\n" #define MEMOPMEM(opcode, reg, offset, base, index, scale) \ @@ -626,7 +593,6 @@ extern const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants); // BT.709 ",%%" #reg2 "\n" #define VEXTOPMEM(op, sel, reg, offset, base, index, scale) \ #op " $" #sel ",%%" #reg "," #offset "(%" #base ",%" #index "," #scale ")\n" -#endif // defined(__native_client__) && defined(__x86_64__) // Intel Code Analizer markers. Insert IACA_START IACA_END around code to be // measured and then run with iaca -64 libyuv_unittest. diff --git a/include/libyuv/scale_row.h b/include/libyuv/scale_row.h index 3db46d399..7a10376ef 100644 --- a/include/libyuv/scale_row.h +++ b/include/libyuv/scale_row.h @@ -19,17 +19,20 @@ namespace libyuv { extern "C" { #endif -#if defined(__pnacl__) || defined(__CLR_VER) || \ +#if defined(__pnacl__) || defined(__CLR_VER) || \ + (defined(__native_client__) && defined(__x86_64__)) || \ (defined(__i386__) && !defined(__SSE__) && !defined(__clang__)) #define LIBYUV_DISABLE_X86 #endif +#if defined(__native_client__) +#define LIBYUV_DISABLE_NEON +#endif // MemorySanitizer does not support assembly code yet. http://crbug.com/344505 #if defined(__has_feature) #if __has_feature(memory_sanitizer) #define LIBYUV_DISABLE_X86 #endif #endif - // GCC >= 4.7.0 required for AVX2. #if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__)) #if (__GNUC__ > 4) || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7)) @@ -81,7 +84,7 @@ extern "C" { #endif // The following are available on Neon platforms: -#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \ +#if !defined(LIBYUV_DISABLE_NEON) && \ (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__)) #define HAS_SCALEARGBCOLS_NEON #define HAS_SCALEARGBROWDOWN2_NEON diff --git a/source/rotate_argb.cc b/source/rotate_argb.cc index f6a2bf69f..dc4ed6f57 100644 --- a/source/rotate_argb.cc +++ b/source/rotate_argb.cc @@ -22,10 +22,8 @@ extern "C" { // ARGBScale has a function to copy pixels to a row, striding each source // pixel by a constant. -#if !defined(LIBYUV_DISABLE_X86) && \ - (defined(_M_IX86) || \ - (defined(__x86_64__) && !defined(__native_client__)) || \ - defined(__i386__)) +#if !defined(LIBYUV_DISABLE_X86) && \ + (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) #define HAS_SCALEARGBROWDOWNEVEN_SSE2 void ScaleARGBRowDownEven_SSE2(const uint8* src_ptr, int src_stride, @@ -33,7 +31,7 @@ void ScaleARGBRowDownEven_SSE2(const uint8* src_ptr, uint8* dst_ptr, int dst_width); #endif -#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \ +#if !defined(LIBYUV_DISABLE_NEON) && \ (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__)) #define HAS_SCALEARGBROWDOWNEVEN_NEON void ScaleARGBRowDownEven_NEON(const uint8* src_ptr, diff --git a/source/row_gcc.cc b/source/row_gcc.cc index 1d486ee36..59a53e866 100644 --- a/source/row_gcc.cc +++ b/source/row_gcc.cc @@ -1997,7 +1997,7 @@ void OMITFP NV21ToARGBRow_SSSE3(const uint8* y_buf, [width]"+rm"(width) // %[width] : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] [kShuffleNV21]"m"(kShuffleNV21) - : "memory", "cc", YUVTORGB_REGS // Does not use r14. + : "memory", "cc", YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); // clang-format on @@ -2025,7 +2025,7 @@ void OMITFP YUY2ToARGBRow_SSSE3(const uint8* yuy2_buf, : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] [kShuffleYUY2Y]"m"(kShuffleYUY2Y), [kShuffleYUY2UV]"m"(kShuffleYUY2UV) - : "memory", "cc", YUVTORGB_REGS // Does not use r14. + : "memory", "cc", YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); // clang-format on @@ -2053,7 +2053,7 @@ void OMITFP UYVYToARGBRow_SSSE3(const uint8* uyvy_buf, : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] [kShuffleUYVYY]"m"(kShuffleUYVYY), [kShuffleUYVYUV]"m"(kShuffleUYVYUV) - : "memory", "cc", YUVTORGB_REGS // Does not use r14. + : "memory", "cc", YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); // clang-format on @@ -2471,7 +2471,7 @@ void OMITFP NV12ToARGBRow_AVX2(const uint8* y_buf, [dst_argb]"+r"(dst_argb), // %[dst_argb] [width]"+rm"(width) // %[width] : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] - : "memory", "cc", YUVTORGB_REGS_AVX2 // Does not use r14. + : "memory", "cc", YUVTORGB_REGS_AVX2 "xmm0", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); // clang-format on @@ -2505,7 +2505,7 @@ void OMITFP NV21ToARGBRow_AVX2(const uint8* y_buf, [width]"+rm"(width) // %[width] : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] [kShuffleNV21]"m"(kShuffleNV21) - : "memory", "cc", YUVTORGB_REGS_AVX2 // Does not use r14. + : "memory", "cc", YUVTORGB_REGS_AVX2 "xmm0", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); // clang-format on @@ -2538,7 +2538,7 @@ void OMITFP YUY2ToARGBRow_AVX2(const uint8* yuy2_buf, : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] [kShuffleYUY2Y]"m"(kShuffleYUY2Y), [kShuffleYUY2UV]"m"(kShuffleYUY2UV) - : "memory", "cc", YUVTORGB_REGS_AVX2 // Does not use r14. + : "memory", "cc", YUVTORGB_REGS_AVX2 "xmm0", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); // clang-format on @@ -2571,7 +2571,7 @@ void OMITFP UYVYToARGBRow_AVX2(const uint8* uyvy_buf, : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] [kShuffleUYVYY]"m"(kShuffleUYVYY), [kShuffleUYVYUV]"m"(kShuffleUYVYUV) - : "memory", "cc", YUVTORGB_REGS_AVX2 // Does not use r14. + : "memory", "cc", YUVTORGB_REGS_AVX2 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); // clang-format on @@ -3449,7 +3449,7 @@ void CopyRow_AVX(const uint8* src, uint8* dst, int count) { // Multiple of 1. void CopyRow_ERMS(const uint8* src, uint8* dst, int width) { size_t width_tmp = (size_t)(width); - asm volatile("rep movsb " MEMMOVESTRING(0, 1) " \n" + asm volatile("rep movsb \n" : "+S"(src), // %0 "+D"(dst), // %1 "+c"(width_tmp) // %2 @@ -3668,7 +3668,7 @@ void ARGBCopyYToAlphaRow_AVX2(const uint8* src, uint8* dst, int width) { void SetRow_X86(uint8* dst, uint8 v8, int width) { size_t width_tmp = (size_t)(width >> 2); const uint32 v32 = v8 * 0x01010101u; // Duplicate byte to all bytes. - asm volatile("rep stosl " MEMSTORESTRING(eax, 0) " \n" + asm volatile("rep stosl \n" : "+D"(dst), // %0 "+c"(width_tmp) // %1 : "a"(v32) // %2 @@ -3677,7 +3677,7 @@ void SetRow_X86(uint8* dst, uint8 v8, int width) { void SetRow_ERMS(uint8* dst, uint8 v8, int width) { size_t width_tmp = (size_t)(width); - asm volatile("rep stosb " MEMSTORESTRING(al, 0) " \n" + asm volatile("rep stosb \n" : "+D"(dst), // %0 "+c"(width_tmp) // %1 : "a"(v8) // %2 @@ -3686,7 +3686,7 @@ void SetRow_ERMS(uint8* dst, uint8 v8, int width) { void ARGBSetRow_X86(uint8* dst_argb, uint32 v32, int width) { size_t width_tmp = (size_t)(width); - asm volatile("rep stosl " MEMSTORESTRING(eax, 0) " \n" + asm volatile("rep stosl \n" : "+D"(dst_argb), // %0 "+c"(width_tmp) // %1 : "a"(v32) // %2 @@ -5707,7 +5707,7 @@ void InterpolateRow_AVX2(uint8* dst_ptr, // Blend 100 / 0 - Copy row unchanged. LABELALIGN "100: \n" - "rep movsb " MEMMOVESTRING(1,0) " \n" + "rep movsb \n" "jmp 999f \n" "99: \n"