From a37e6bc81b52d39cdcfd0f1428f5d6c2b2bc9861 Mon Sep 17 00:00:00 2001 From: George Steed Date: Mon, 19 Aug 2024 10:10:21 +0100 Subject: [PATCH] [AArch64] Re-enable SME only for Linux and new versions of Clang This was previously disabled in 679e851f653866a49e21f69fe8380bd20123f0ee, so re-enable it but only for Linux where SME is known to work correctly. Change-Id: I2626b03f3854b27162df1b55fc6767e02ffe318d Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/5802958 Reviewed-by: Frank Barchard Reviewed-by: Justin Green --- include/libyuv/cpu_support.h | 8 +++++--- include/libyuv/rotate_row.h | 3 ++- source/rotate_sme.cc | 6 ++++-- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/include/libyuv/cpu_support.h b/include/libyuv/cpu_support.h index ac16575a1..0d67c3465 100644 --- a/include/libyuv/cpu_support.h +++ b/include/libyuv/cpu_support.h @@ -72,9 +72,11 @@ extern "C" { #define VISUALC_HAS_AVX2 1 #endif // VisualStudio >= 2012 -// Temporary disable SME. -#if !defined(LIBYUV_DISABLE_SME) -#define LIBYUV_DISABLE_SME +// Clang 19 required for SME due to needing __arm_tpidr2_save from compiler-rt, +// only enabled on Linux for now. +#if !defined(LIBYUV_DISABLE_SME) && defined(__aarch64__) && \ + defined(__gnu_linux__) && defined(__clang__) && (__clang_major__ >= 19) +#define CLANG_HAS_SME 1 #endif #ifdef __cplusplus diff --git a/include/libyuv/rotate_row.h b/include/libyuv/rotate_row.h index 7672e2c1f..bbf217f0a 100644 --- a/include/libyuv/rotate_row.h +++ b/include/libyuv/rotate_row.h @@ -50,7 +50,8 @@ extern "C" { #define HAS_TRANSPOSE4X4_32_NEON #endif -#if !defined(LIBYUV_DISABLE_SME) && defined(__aarch64__) +#if !defined(LIBYUV_DISABLE_SME) && defined(CLANG_HAS_SME) && \ + defined(__aarch64__) #define HAS_TRANSPOSEWXH_SME #define HAS_TRANSPOSEUVWXH_SME #endif diff --git a/source/rotate_sme.cc b/source/rotate_sme.cc index 70e2a0d40..dc9ab0c3a 100644 --- a/source/rotate_sme.cc +++ b/source/rotate_sme.cc @@ -18,7 +18,8 @@ namespace libyuv { extern "C" { #endif -#if !defined(LIBYUV_DISABLE_SME) && defined(__aarch64__) +#if !defined(LIBYUV_DISABLE_SME) && defined(CLANG_HAS_SME) && \ + defined(__aarch64__) __arm_locally_streaming __arm_new("za") void TransposeWxH_SME( const uint8_t* src, @@ -164,7 +165,8 @@ __arm_locally_streaming __arm_new("za") void TransposeUVWxH_SME( } while (height > 0); } -#endif // !defined(LIBYUV_DISABLE_SME) && defined(__aarch64__) +#endif // !defined(LIBYUV_DISABLE_SME) && defined(CLANG_HAS_SME) && + // defined(__aarch64__) #ifdef __cplusplus } // extern "C"