From f5882ed1c5a843b62ad78fff01c0b77e729c9b62 Mon Sep 17 00:00:00 2001 From: George Steed Date: Fri, 15 Mar 2024 17:43:09 +0000 Subject: [PATCH] [AArch64] getauxval(AT_HWCAP{,2}) feature detection, attempt #2 This re-lands commit ba0bba5b2b7e38c9365a5d152b4efa0458863213. Now with additional #ifdef __linux__ guards to avoid compiling Linux-specific code on non-Linux platforms. Non-linux feature detection will be added in a separate patch. Using getauxval(AT_HWCAP{,2}) has the advantage of also working under emulation where faking /proc/cpuinfo is not supported. For the Chromium sandbox, getauxval is supported since API version 18. The minimum supported API version at time of writing is 21 so we should be able to use getauxval unconditionally. On the off-chance the call fails it will return 0 and we will correctly fall-back to using only Neon. If we want to read the current CPU implementer or part number we could do this by checking HWCAP_CPUID and then reading MIDR_EL1. This will cause a kernel trap to emulate the EL1 read but should still be a lot faster than reading the whole of /proc/cpuinfo. Bug: libyuv:980 Change-Id: I8ae103ea7e32ef44db72f3c9896417bfe97ff5c5 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/5465590 Reviewed-by: Frank Barchard --- include/libyuv/cpu_id.h | 11 +++- source/cpu_id.cc | 88 +++++++++++++++++------------- unit_test/cpu_test.cc | 29 +++++----- unit_test/testdata/cortex_a510.txt | 8 --- unit_test/testdata/cortex_a715.txt | 8 --- unit_test/testdata/cortex_x3.txt | 8 --- unit_test/testdata/juno.txt | 15 ----- 7 files changed, 75 insertions(+), 92 deletions(-) delete mode 100644 unit_test/testdata/cortex_a510.txt delete mode 100644 unit_test/testdata/cortex_a715.txt delete mode 100644 unit_test/testdata/cortex_x3.txt delete mode 100644 unit_test/testdata/juno.txt diff --git a/include/libyuv/cpu_id.h b/include/libyuv/cpu_id.h index 14d26e479..af8e0f5e0 100644 --- a/include/libyuv/cpu_id.h +++ b/include/libyuv/cpu_id.h @@ -88,12 +88,19 @@ static __inline int TestCpuFlag(int test_flag) { LIBYUV_API int ArmCpuCaps(const char* cpuinfo_name); LIBYUV_API -int AArch64CpuCaps(const char* cpuinfo_name); -LIBYUV_API int MipsCpuCaps(const char* cpuinfo_name); LIBYUV_API int RiscvCpuCaps(const char* cpuinfo_name); +#if __linux__ +// On Linux, parse AArch64 features from getauxval(AT_HWCAP{,2}). +LIBYUV_API +int AArch64CpuCaps(unsigned long hwcap, unsigned long hwcap2); +#else +LIBYUV_API +int AArch64CpuCaps(); +#endif + // For testing, allow CPU flags to be disabled. // ie MaskCpuFlags(~kCpuHasSSSE3) to disable SSSE3. // MaskCpuFlags(-1) to enable all cpu specific optimizations. diff --git a/source/cpu_id.cc b/source/cpu_id.cc index 221187dc2..5d192fbe5 100644 --- a/source/cpu_id.cc +++ b/source/cpu_id.cc @@ -23,6 +23,10 @@ #include // For fopen() #include +#ifdef __linux__ +#include // For getauxval() +#endif + #ifdef __cplusplus namespace libyuv { extern "C" { @@ -162,37 +166,45 @@ LIBYUV_API SAFEBUFFERS int ArmCpuCaps(const char* cpuinfo_name) { return features; } +#ifdef __linux__ +// Define hwcap values ourselves: building with an old auxv header where these +// hwcap values are not defined should not prevent features from being enabled. +#define YUV_AARCH64_HWCAP_ASIMDDP (1 << 20) +#define YUV_AARCH64_HWCAP_SVE (1 << 22) +#define YUV_AARCH64_HWCAP2_SVE2 (1 << 1) +#define YUV_AARCH64_HWCAP2_I8MM (1 << 13) + // For AArch64, but public to allow testing on any CPU. -LIBYUV_API SAFEBUFFERS int AArch64CpuCaps(const char* cpuinfo_name) { - char cpuinfo_line[512]; - FILE* f = fopen(cpuinfo_name, "re"); - if (!f) { - // Assume Neon if /proc/cpuinfo is unavailable. - // This will occur for Chrome sandbox for Pepper or Render process. - return kCpuHasNEON; - } - memset(cpuinfo_line, 0, sizeof(cpuinfo_line)); - // Neon is mandatory on AArch64. +LIBYUV_API SAFEBUFFERS int AArch64CpuCaps(unsigned long hwcap, + unsigned long hwcap2) { + // Neon is mandatory on AArch64, so enable regardless of hwcaps. int features = kCpuHasNEON; - while (fgets(cpuinfo_line, sizeof(cpuinfo_line), f)) { - if (memcmp(cpuinfo_line, "Features", 8) == 0) { - if (cpuinfo_search(cpuinfo_line, " asimddp", 8)) { - features |= kCpuHasNeonDotProd; - } - if (cpuinfo_search(cpuinfo_line, " i8mm", 5)) { - features |= kCpuHasNeonI8MM; - } - if (cpuinfo_search(cpuinfo_line, " sve", 4)) { - features |= kCpuHasSVE; - } - if (cpuinfo_search(cpuinfo_line, " sve2", 5)) { - features |= kCpuHasSVE2; - } - } + + if (hwcap & YUV_AARCH64_HWCAP_ASIMDDP) { + features |= kCpuHasNeonDotProd; + } + if (hwcap2 & YUV_AARCH64_HWCAP2_I8MM) { + features |= kCpuHasNeonI8MM; + } + if (hwcap & YUV_AARCH64_HWCAP_SVE) { + features |= kCpuHasSVE; + } + if (hwcap2 & YUV_AARCH64_HWCAP2_SVE2) { + features |= kCpuHasSVE2; } - fclose(f); return features; } +#else // !defined(__linux__) +// For AArch64, but public to allow testing on any CPU. +LIBYUV_API SAFEBUFFERS int AArch64CpuCaps() { + // Neon is mandatory on AArch64, so enable unconditionally. + int features = kCpuHasNEON; + + // TODO(libyuv:980) support feature detection on non-Linux platforms. + + return features; +} +#endif LIBYUV_API SAFEBUFFERS int RiscvCpuCaps(const char* cpuinfo_name) { char cpuinfo_line[512]; @@ -368,18 +380,20 @@ static SAFEBUFFERS int GetCpuFlags(void) { cpu_info |= kCpuHasLOONGARCH; #endif #if defined(__arm__) || defined(__aarch64__) -// gcc -mfpu=neon defines __ARM_NEON__ -// __ARM_NEON__ generates code that requires Neon. NaCL also requires Neon. -// For Linux, /proc/cpuinfo can be tested but without that assume Neon. -#if defined(__ARM_NEON__) || defined(__native_client__) || !defined(__linux__) - cpu_info = kCpuHasNEON; -// For aarch64(arm64), /proc/cpuinfo's feature is not complete, e.g. no neon -// flag in it. -// So for aarch64, neon enabling is hard coded here. -#endif -#if defined(__aarch64__) - cpu_info = AArch64CpuCaps("/proc/cpuinfo"); +#if defined(__aarch64__) && defined(__linux__) + // getauxval is supported since Android SDK version 18, minimum at time of + // writing is 21, so should be safe to always use this. If getauxval is + // somehow disabled then getauxval returns 0, which will leave Neon enabled + // since Neon is mandatory on AArch64. + unsigned long hwcap = getauxval(AT_HWCAP); + unsigned long hwcap2 = getauxval(AT_HWCAP2); + cpu_info = AArch64CpuCaps(hwcap, hwcap2); +#elif defined(__aarch64__) + cpu_info = AArch64CpuCaps(); #else + // gcc -mfpu=neon defines __ARM_NEON__ + // __ARM_NEON__ generates code that requires Neon. NaCL also requires Neon. + // For Linux, /proc/cpuinfo can be tested but without that assume Neon. // Linux arm parse text file for neon detect. cpu_info = ArmCpuCaps("/proc/cpuinfo"); #endif diff --git a/unit_test/cpu_test.cc b/unit_test/cpu_test.cc index b620e806e..38406c9a7 100644 --- a/unit_test/cpu_test.cc +++ b/unit_test/cpu_test.cc @@ -273,23 +273,24 @@ TEST_F(LibYUVBaseTest, TestLinuxArm) { #endif } +#ifdef __linux__ TEST_F(LibYUVBaseTest, TestLinuxAArch64) { - if (FileExists("../../unit_test/testdata/juno.txt")) { - printf("Note: testing to load \"../../unit_test/testdata/juno.txt\"\n"); + // Values taken from a Cortex-A57 machine, only Neon available. + EXPECT_EQ(kCpuHasNEON, AArch64CpuCaps(0xffU, 0x0U)); - EXPECT_EQ(kCpuHasNEON, AArch64CpuCaps("../../unit_test/testdata/juno.txt")); - int v9_expected = kCpuHasNEON | kCpuHasNeonDotProd | kCpuHasNeonI8MM | - kCpuHasSVE | kCpuHasSVE2; - EXPECT_EQ(v9_expected, - AArch64CpuCaps("../../unit_test/testdata/cortex_a510.txt")); - EXPECT_EQ(v9_expected, - AArch64CpuCaps("../../unit_test/testdata/cortex_a715.txt")); - EXPECT_EQ(v9_expected, - AArch64CpuCaps("../../unit_test/testdata/cortex_x3.txt")); - } else { - printf("WARNING: unable to load \"../../unit_test/testdata/juno.txt\"\n"); - } + // Values taken from a Google Pixel 7. + int expected = kCpuHasNEON | kCpuHasNeonDotProd; + EXPECT_EQ(expected, AArch64CpuCaps(0x119fffU, 0x0U)); + + // Values taken from a Google Pixel 8. + expected = kCpuHasNEON | kCpuHasNeonDotProd | kCpuHasNeonI8MM | kCpuHasSVE | + kCpuHasSVE2; + EXPECT_EQ(expected, AArch64CpuCaps(0x3fffffffU, 0x2f33fU)); + + // Values taken from a Neoverse N2 machine. + EXPECT_EQ(expected, AArch64CpuCaps(0x3fffffffU, 0x2f3ffU)); } +#endif TEST_F(LibYUVBaseTest, TestLinuxMipsMsa) { if (FileExists("../../unit_test/testdata/mips.txt")) { diff --git a/unit_test/testdata/cortex_a510.txt b/unit_test/testdata/cortex_a510.txt deleted file mode 100644 index 8c2aeb4c3..000000000 --- a/unit_test/testdata/cortex_a510.txt +++ /dev/null @@ -1,8 +0,0 @@ -processor : 0 -BogoMIPS : 49.15 -Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm jscvt fcma lrcpc dcpop sha3 sm3 sm4 asimddp sha512 sve asimdfhm dit uscat ilrcpc flagm ssbs sb paca pacg dcpodp sve2 sveaes svepmull svebitperm svesha3 svesm4 flagm2 frint svei8mm svebf16 i8mm bti -CPU implementer : 0x41 -CPU architecture: 8 -CPU variant : 0x1 -CPU part : 0xd46 -CPU revision : 1 diff --git a/unit_test/testdata/cortex_a715.txt b/unit_test/testdata/cortex_a715.txt deleted file mode 100644 index 6d1f33afa..000000000 --- a/unit_test/testdata/cortex_a715.txt +++ /dev/null @@ -1,8 +0,0 @@ -processor : 4 -BogoMIPS : 49.15 -Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm jscvt fcma lrcpc dcpop sha3 sm3 sm4 asimddp sha512 sve asimdfhm dit uscat ilrcpc flagm ssbs sb paca pacg dcpodp sve2 sveaes svepmull svebitperm svesha3 svesm4 flagm2 frint svei8mm svebf16 i8mm bti -CPU implementer : 0x41 -CPU architecture: 8 -CPU variant : 0x1 -CPU part : 0xd4d -CPU revision : 0 diff --git a/unit_test/testdata/cortex_x3.txt b/unit_test/testdata/cortex_x3.txt deleted file mode 100644 index a63db856d..000000000 --- a/unit_test/testdata/cortex_x3.txt +++ /dev/null @@ -1,8 +0,0 @@ -processor : 8 -BogoMIPS : 49.15 -Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm jscvt fcma lrcpc dcpop sha3 sm3 sm4 asimddp sha512 sve asimdfhm dit uscat ilrcpc flagm ssbs sb paca pacg dcpodp sve2 sveaes svepmull svebitperm svesha3 svesm4 flagm2 frint svei8mm svebf16 i8mm bti -CPU implementer : 0x41 -CPU architecture: 8 -CPU variant : 0x1 -CPU part : 0xd4e -CPU revision : 0 diff --git a/unit_test/testdata/juno.txt b/unit_test/testdata/juno.txt deleted file mode 100644 index dd465272b..000000000 --- a/unit_test/testdata/juno.txt +++ /dev/null @@ -1,15 +0,0 @@ -Processor : AArch64 Processor rev 0 (aarch64) -processor : 0 -processor : 1 -processor : 2 -processor : 3 -processor : 4 -processor : 5 -Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 -CPU implementer : 0x41 -CPU architecture: AArch64 -CPU variant : 0x0 -CPU part : 0xd07 -CPU revision : 0 - -Hardware : Juno