From f145aa26da5c660d1e6376e2794eba444ae2eda5 Mon Sep 17 00:00:00 2001 From: Frank Barchard Date: Thu, 27 Mar 2025 04:05:15 -0700 Subject: [PATCH] Add SME2 detect Bug: None Change-Id: I36e576de1cf468049faaf3923b6c21fc9ad14271 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/6401373 Reviewed-by: George Steed --- include/libyuv/cpu_id.h | 1 + source/cpu_id.cc | 19 +++++++++++++------ unit_test/cpu_test.cc | 4 ++++ util/cpuid.c | 2 ++ 4 files changed, 20 insertions(+), 6 deletions(-) diff --git a/include/libyuv/cpu_id.h b/include/libyuv/cpu_id.h index 44ea8c07c..8ce319e05 100644 --- a/include/libyuv/cpu_id.h +++ b/include/libyuv/cpu_id.h @@ -29,6 +29,7 @@ static const int kCpuHasNeonI8MM = 0x400; static const int kCpuHasSVE = 0x800; static const int kCpuHasSVE2 = 0x1000; static const int kCpuHasSME = 0x2000; +static const int kCpuHasSME2 = 0x4000; // These flags are only valid on RISCV processors. static const int kCpuHasRISCV = 0x4; diff --git a/source/cpu_id.cc b/source/cpu_id.cc index e4acbecf4..daa8e2b95 100644 --- a/source/cpu_id.cc +++ b/source/cpu_id.cc @@ -182,11 +182,12 @@ LIBYUV_API SAFEBUFFERS int ArmCpuCaps(const char* cpuinfo_name) { #ifdef __linux__ // Define hwcap values ourselves: building with an old auxv header where these // hwcap values are not defined should not prevent features from being enabled. -#define YUV_AARCH64_HWCAP_ASIMDDP (1 << 20) -#define YUV_AARCH64_HWCAP_SVE (1 << 22) -#define YUV_AARCH64_HWCAP2_SVE2 (1 << 1) -#define YUV_AARCH64_HWCAP2_I8MM (1 << 13) -#define YUV_AARCH64_HWCAP2_SME (1 << 23) +#define YUV_AARCH64_HWCAP_ASIMDDP (1UL << 20) +#define YUV_AARCH64_HWCAP_SVE (1UL << 22) +#define YUV_AARCH64_HWCAP2_SVE2 (1UL << 1) +#define YUV_AARCH64_HWCAP2_I8MM (1UL << 13) +#define YUV_AARCH64_HWCAP2_SME (1UL << 23) +#define YUV_AARCH64_HWCAP2_SME2 (1UL << 37) // For AArch64, but public to allow testing on any CPU. LIBYUV_API SAFEBUFFERS int AArch64CpuCaps(unsigned long hwcap, @@ -210,6 +211,9 @@ LIBYUV_API SAFEBUFFERS int AArch64CpuCaps(unsigned long hwcap, features |= kCpuHasSVE2; if (hwcap2 & YUV_AARCH64_HWCAP2_SME) { features |= kCpuHasSME; + if (hwcap2 & YUV_AARCH64_HWCAP2_SME2) { + features |= kCpuHasSME2; + } } } } @@ -256,8 +260,11 @@ LIBYUV_API SAFEBUFFERS int AArch64CpuCaps() { features |= kCpuHasNeonDotProd; if (have_feature("hw.optional.arm.FEAT_I8MM")) { features |= kCpuHasNeonI8MM; - if (have_feature("hw.optional.arm.FEAT_SME2")) { + if (have_feature("hw.optional.arm.FEAT_SME")) { features |= kCpuHasSME; + if (have_feature("hw.optional.arm.FEAT_SME2")) { + features |= kCpuHasSME2; + } } } } diff --git a/unit_test/cpu_test.cc b/unit_test/cpu_test.cc index ebae29264..54a782e18 100644 --- a/unit_test/cpu_test.cc +++ b/unit_test/cpu_test.cc @@ -101,6 +101,7 @@ TEST_F(LibYUVBaseTest, TestCpuHas) { int has_sve = TestCpuFlag(kCpuHasSVE); int has_sve2 = TestCpuFlag(kCpuHasSVE2); int has_sme = TestCpuFlag(kCpuHasSME); + int has_sme2 = TestCpuFlag(kCpuHasSME2); printf("Has Arm 0x%x\n", has_arm); printf("Has Neon 0x%x\n", has_neon); printf("Has Neon DotProd 0x%x\n", has_neon_dotprod); @@ -108,6 +109,7 @@ TEST_F(LibYUVBaseTest, TestCpuHas) { printf("Has SVE 0x%x\n", has_sve); printf("Has SVE2 0x%x\n", has_sve2); printf("Has SME 0x%x\n", has_sme); + printf("Has SME2 0x%x\n", has_sme2); #if defined(__aarch64__) // Read and print the SVE and SME vector lengths. @@ -381,6 +383,8 @@ TEST_F(LibYUVBaseTest, TestLinuxAArch64) { // Check for SME feature detection. expected |= kCpuHasSME; EXPECT_EQ(expected, AArch64CpuCaps(0x3fffffffU, 0x82f3ffU)); + + // TODO: Check for SME2 feature detection from Apple M4 } #endif diff --git a/util/cpuid.c b/util/cpuid.c index 0f54b4222..de5ff9c96 100644 --- a/util/cpuid.c +++ b/util/cpuid.c @@ -61,6 +61,7 @@ int main(int argc, const char* argv[]) { int has_sve = TestCpuFlag(kCpuHasSVE); int has_sve2 = TestCpuFlag(kCpuHasSVE2); int has_sme = TestCpuFlag(kCpuHasSME); + int has_sme2 = TestCpuFlag(kCpuHasSME2); printf("Has Arm 0x%x\n", has_arm); printf("Has Neon 0x%x\n", has_neon); printf("Has Neon DotProd 0x%x\n", has_neon_dotprod); @@ -68,6 +69,7 @@ int main(int argc, const char* argv[]) { printf("Has SVE 0x%x\n", has_sve); printf("Has SVE2 0x%x\n", has_sve2); printf("Has SME 0x%x\n", has_sme); + printf("Has SME2 0x%x\n", has_sme2); #if __aarch64__ // Read and print the SVE and SME vector lengths.