Add SME2 detect

Bug: None
Change-Id: I36e576de1cf468049faaf3923b6c21fc9ad14271
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/6401373
Reviewed-by: George Steed <george.steed@arm.com>
This commit is contained in:
Frank Barchard 2025-03-27 04:05:15 -07:00
parent 64ac2d8f0f
commit f145aa26da
4 changed files with 20 additions and 6 deletions

View File

@ -29,6 +29,7 @@ static const int kCpuHasNeonI8MM = 0x400;
static const int kCpuHasSVE = 0x800;
static const int kCpuHasSVE2 = 0x1000;
static const int kCpuHasSME = 0x2000;
static const int kCpuHasSME2 = 0x4000;
// These flags are only valid on RISCV processors.
static const int kCpuHasRISCV = 0x4;

View File

@ -182,11 +182,12 @@ LIBYUV_API SAFEBUFFERS int ArmCpuCaps(const char* cpuinfo_name) {
#ifdef __linux__
// Define hwcap values ourselves: building with an old auxv header where these
// hwcap values are not defined should not prevent features from being enabled.
#define YUV_AARCH64_HWCAP_ASIMDDP (1 << 20)
#define YUV_AARCH64_HWCAP_SVE (1 << 22)
#define YUV_AARCH64_HWCAP2_SVE2 (1 << 1)
#define YUV_AARCH64_HWCAP2_I8MM (1 << 13)
#define YUV_AARCH64_HWCAP2_SME (1 << 23)
#define YUV_AARCH64_HWCAP_ASIMDDP (1UL << 20)
#define YUV_AARCH64_HWCAP_SVE (1UL << 22)
#define YUV_AARCH64_HWCAP2_SVE2 (1UL << 1)
#define YUV_AARCH64_HWCAP2_I8MM (1UL << 13)
#define YUV_AARCH64_HWCAP2_SME (1UL << 23)
#define YUV_AARCH64_HWCAP2_SME2 (1UL << 37)
// For AArch64, but public to allow testing on any CPU.
LIBYUV_API SAFEBUFFERS int AArch64CpuCaps(unsigned long hwcap,
@ -210,6 +211,9 @@ LIBYUV_API SAFEBUFFERS int AArch64CpuCaps(unsigned long hwcap,
features |= kCpuHasSVE2;
if (hwcap2 & YUV_AARCH64_HWCAP2_SME) {
features |= kCpuHasSME;
if (hwcap2 & YUV_AARCH64_HWCAP2_SME2) {
features |= kCpuHasSME2;
}
}
}
}
@ -256,8 +260,11 @@ LIBYUV_API SAFEBUFFERS int AArch64CpuCaps() {
features |= kCpuHasNeonDotProd;
if (have_feature("hw.optional.arm.FEAT_I8MM")) {
features |= kCpuHasNeonI8MM;
if (have_feature("hw.optional.arm.FEAT_SME2")) {
if (have_feature("hw.optional.arm.FEAT_SME")) {
features |= kCpuHasSME;
if (have_feature("hw.optional.arm.FEAT_SME2")) {
features |= kCpuHasSME2;
}
}
}
}

View File

@ -101,6 +101,7 @@ TEST_F(LibYUVBaseTest, TestCpuHas) {
int has_sve = TestCpuFlag(kCpuHasSVE);
int has_sve2 = TestCpuFlag(kCpuHasSVE2);
int has_sme = TestCpuFlag(kCpuHasSME);
int has_sme2 = TestCpuFlag(kCpuHasSME2);
printf("Has Arm 0x%x\n", has_arm);
printf("Has Neon 0x%x\n", has_neon);
printf("Has Neon DotProd 0x%x\n", has_neon_dotprod);
@ -108,6 +109,7 @@ TEST_F(LibYUVBaseTest, TestCpuHas) {
printf("Has SVE 0x%x\n", has_sve);
printf("Has SVE2 0x%x\n", has_sve2);
printf("Has SME 0x%x\n", has_sme);
printf("Has SME2 0x%x\n", has_sme2);
#if defined(__aarch64__)
// Read and print the SVE and SME vector lengths.
@ -381,6 +383,8 @@ TEST_F(LibYUVBaseTest, TestLinuxAArch64) {
// Check for SME feature detection.
expected |= kCpuHasSME;
EXPECT_EQ(expected, AArch64CpuCaps(0x3fffffffU, 0x82f3ffU));
// TODO: Check for SME2 feature detection from Apple M4
}
#endif

View File

@ -61,6 +61,7 @@ int main(int argc, const char* argv[]) {
int has_sve = TestCpuFlag(kCpuHasSVE);
int has_sve2 = TestCpuFlag(kCpuHasSVE2);
int has_sme = TestCpuFlag(kCpuHasSME);
int has_sme2 = TestCpuFlag(kCpuHasSME2);
printf("Has Arm 0x%x\n", has_arm);
printf("Has Neon 0x%x\n", has_neon);
printf("Has Neon DotProd 0x%x\n", has_neon_dotprod);
@ -68,6 +69,7 @@ int main(int argc, const char* argv[]) {
printf("Has SVE 0x%x\n", has_sve);
printf("Has SVE2 0x%x\n", has_sve2);
printf("Has SME 0x%x\n", has_sme);
printf("Has SME2 0x%x\n", has_sme2);
#if __aarch64__
// Read and print the SVE and SME vector lengths.