diff --git a/include/libyuv/cpu_id.h b/include/libyuv/cpu_id.h index 44ea8c07c..8ce319e05 100644 --- a/include/libyuv/cpu_id.h +++ b/include/libyuv/cpu_id.h @@ -29,6 +29,7 @@ static const int kCpuHasNeonI8MM = 0x400; static const int kCpuHasSVE = 0x800; static const int kCpuHasSVE2 = 0x1000; static const int kCpuHasSME = 0x2000; +static const int kCpuHasSME2 = 0x4000; // These flags are only valid on RISCV processors. static const int kCpuHasRISCV = 0x4; diff --git a/source/cpu_id.cc b/source/cpu_id.cc index e4acbecf4..daa8e2b95 100644 --- a/source/cpu_id.cc +++ b/source/cpu_id.cc @@ -182,11 +182,12 @@ LIBYUV_API SAFEBUFFERS int ArmCpuCaps(const char* cpuinfo_name) { #ifdef __linux__ // Define hwcap values ourselves: building with an old auxv header where these // hwcap values are not defined should not prevent features from being enabled. -#define YUV_AARCH64_HWCAP_ASIMDDP (1 << 20) -#define YUV_AARCH64_HWCAP_SVE (1 << 22) -#define YUV_AARCH64_HWCAP2_SVE2 (1 << 1) -#define YUV_AARCH64_HWCAP2_I8MM (1 << 13) -#define YUV_AARCH64_HWCAP2_SME (1 << 23) +#define YUV_AARCH64_HWCAP_ASIMDDP (1UL << 20) +#define YUV_AARCH64_HWCAP_SVE (1UL << 22) +#define YUV_AARCH64_HWCAP2_SVE2 (1UL << 1) +#define YUV_AARCH64_HWCAP2_I8MM (1UL << 13) +#define YUV_AARCH64_HWCAP2_SME (1UL << 23) +#define YUV_AARCH64_HWCAP2_SME2 (1UL << 37) // For AArch64, but public to allow testing on any CPU. LIBYUV_API SAFEBUFFERS int AArch64CpuCaps(unsigned long hwcap, @@ -210,6 +211,9 @@ LIBYUV_API SAFEBUFFERS int AArch64CpuCaps(unsigned long hwcap, features |= kCpuHasSVE2; if (hwcap2 & YUV_AARCH64_HWCAP2_SME) { features |= kCpuHasSME; + if (hwcap2 & YUV_AARCH64_HWCAP2_SME2) { + features |= kCpuHasSME2; + } } } } @@ -256,8 +260,11 @@ LIBYUV_API SAFEBUFFERS int AArch64CpuCaps() { features |= kCpuHasNeonDotProd; if (have_feature("hw.optional.arm.FEAT_I8MM")) { features |= kCpuHasNeonI8MM; - if (have_feature("hw.optional.arm.FEAT_SME2")) { + if (have_feature("hw.optional.arm.FEAT_SME")) { features |= kCpuHasSME; + if (have_feature("hw.optional.arm.FEAT_SME2")) { + features |= kCpuHasSME2; + } } } } diff --git a/unit_test/cpu_test.cc b/unit_test/cpu_test.cc index ebae29264..54a782e18 100644 --- a/unit_test/cpu_test.cc +++ b/unit_test/cpu_test.cc @@ -101,6 +101,7 @@ TEST_F(LibYUVBaseTest, TestCpuHas) { int has_sve = TestCpuFlag(kCpuHasSVE); int has_sve2 = TestCpuFlag(kCpuHasSVE2); int has_sme = TestCpuFlag(kCpuHasSME); + int has_sme2 = TestCpuFlag(kCpuHasSME2); printf("Has Arm 0x%x\n", has_arm); printf("Has Neon 0x%x\n", has_neon); printf("Has Neon DotProd 0x%x\n", has_neon_dotprod); @@ -108,6 +109,7 @@ TEST_F(LibYUVBaseTest, TestCpuHas) { printf("Has SVE 0x%x\n", has_sve); printf("Has SVE2 0x%x\n", has_sve2); printf("Has SME 0x%x\n", has_sme); + printf("Has SME2 0x%x\n", has_sme2); #if defined(__aarch64__) // Read and print the SVE and SME vector lengths. @@ -381,6 +383,8 @@ TEST_F(LibYUVBaseTest, TestLinuxAArch64) { // Check for SME feature detection. expected |= kCpuHasSME; EXPECT_EQ(expected, AArch64CpuCaps(0x3fffffffU, 0x82f3ffU)); + + // TODO: Check for SME2 feature detection from Apple M4 } #endif diff --git a/util/cpuid.c b/util/cpuid.c index 0f54b4222..de5ff9c96 100644 --- a/util/cpuid.c +++ b/util/cpuid.c @@ -61,6 +61,7 @@ int main(int argc, const char* argv[]) { int has_sve = TestCpuFlag(kCpuHasSVE); int has_sve2 = TestCpuFlag(kCpuHasSVE2); int has_sme = TestCpuFlag(kCpuHasSME); + int has_sme2 = TestCpuFlag(kCpuHasSME2); printf("Has Arm 0x%x\n", has_arm); printf("Has Neon 0x%x\n", has_neon); printf("Has Neon DotProd 0x%x\n", has_neon_dotprod); @@ -68,6 +69,7 @@ int main(int argc, const char* argv[]) { printf("Has SVE 0x%x\n", has_sve); printf("Has SVE2 0x%x\n", has_sve2); printf("Has SME 0x%x\n", has_sme); + printf("Has SME2 0x%x\n", has_sme2); #if __aarch64__ // Read and print the SVE and SME vector lengths.