From a038cda7b8cc747209a48b14b219131356dfe50d Mon Sep 17 00:00:00 2001 From: George Steed Date: Fri, 15 Mar 2024 14:19:03 +0000 Subject: [PATCH] [AArch64] Enable detection of additional architecture features In particular there are a few extensions that are interesting for us: * FEAT_DotProd adds 4-way dot-product instructions which are useful in e.g. ARGBToY. * FEAT_I8MM adds additional mixed-sign dot-product instructions which could be useful in e.g. ARGBToUV. * FEAT_SVE and FEAT_SVE2 add support for the Scalable Vector Extension, which adds an array of new instructions including new widening loads and narrowing stores for dealing with mixed-width integer arithmetic efficiently and predication for avoiding the need for "any" cleanup loops. This commit simply adds support for detecting the presence of these features by extending the existing /proc/cpuinfo parsing, splitting it into separate Arm and AArch64 functions for simplicity. Since we have no space left in the bitset entries between Arm and X86 entries, we reuse some of the X86 entries for new AArch64 extensions. This doesn't seem obviously problematic as long as we avoid setting kCpuHasX86. Bug: libyuv:973 Bug: libyuv:977 Change-Id: I8e256225fe12a4ba5da24460f54061e16eab6c57 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/5378150 Commit-Queue: Frank Barchard Reviewed-by: Frank Barchard --- include/libyuv/cpu_id.h | 9 ++++- source/cpu_id.cc | 56 +++++++++++++++++++++++------- unit_test/cpu_test.cc | 27 +++++++++++--- unit_test/testdata/cortex_a510.txt | 8 +++++ unit_test/testdata/cortex_a715.txt | 8 +++++ unit_test/testdata/cortex_x3.txt | 8 +++++ util/cpuid.c | 12 +++++-- 7 files changed, 108 insertions(+), 20 deletions(-) create mode 100644 unit_test/testdata/cortex_a510.txt create mode 100644 unit_test/testdata/cortex_a715.txt create mode 100644 unit_test/testdata/cortex_x3.txt diff --git a/include/libyuv/cpu_id.h b/include/libyuv/cpu_id.h index 7a1f1c25b..14d26e479 100644 --- a/include/libyuv/cpu_id.h +++ b/include/libyuv/cpu_id.h @@ -21,9 +21,14 @@ extern "C" { // Internal flag to indicate cpuid requires initialization. static const int kCpuInitialized = 0x1; -// These flags are only valid on ARM processors. +// These flags are only valid on Arm processors. static const int kCpuHasARM = 0x2; static const int kCpuHasNEON = 0x4; +// Leave a gap to avoid setting kCpuHasX86. +static const int kCpuHasNeonDotProd = 0x10; +static const int kCpuHasNeonI8MM = 0x20; +static const int kCpuHasSVE = 0x40; +static const int kCpuHasSVE2 = 0x80; // These flags are only valid on x86 processors. static const int kCpuHasX86 = 0x8; @@ -83,6 +88,8 @@ static __inline int TestCpuFlag(int test_flag) { LIBYUV_API int ArmCpuCaps(const char* cpuinfo_name); LIBYUV_API +int AArch64CpuCaps(const char* cpuinfo_name); +LIBYUV_API int MipsCpuCaps(const char* cpuinfo_name); LIBYUV_API int RiscvCpuCaps(const char* cpuinfo_name); diff --git a/source/cpu_id.cc b/source/cpu_id.cc index ad4246f55..221187dc2 100644 --- a/source/cpu_id.cc +++ b/source/cpu_id.cc @@ -132,6 +132,13 @@ static int GetXCR0() { #pragma optimize("g", on) #endif +static int cpuinfo_search(const char* cpuinfo_line, + const char* needle, + int needle_len) { + const char* p = strstr(cpuinfo_line, needle); + return p && (p[needle_len] == ' ' || p[needle_len] == '\n'); +} + // Based on libvpx arm_cpudetect.c // For Arm, but public to allow testing on any CPU LIBYUV_API SAFEBUFFERS int ArmCpuCaps(const char* cpuinfo_name) { @@ -143,23 +150,48 @@ LIBYUV_API SAFEBUFFERS int ArmCpuCaps(const char* cpuinfo_name) { return kCpuHasNEON; } memset(cpuinfo_line, 0, sizeof(cpuinfo_line)); + int features = 0; while (fgets(cpuinfo_line, sizeof(cpuinfo_line), f)) { if (memcmp(cpuinfo_line, "Features", 8) == 0) { - char* p = strstr(cpuinfo_line, " neon"); - if (p && (p[5] == ' ' || p[5] == '\n')) { - fclose(f); - return kCpuHasNEON; - } - // aarch64 uses asimd for Neon. - p = strstr(cpuinfo_line, " asimd"); - if (p) { - fclose(f); - return kCpuHasNEON; + if (cpuinfo_search(cpuinfo_line, " neon", 5)) { + features |= kCpuHasNEON; } } } fclose(f); - return 0; + return features; +} + +// For AArch64, but public to allow testing on any CPU. +LIBYUV_API SAFEBUFFERS int AArch64CpuCaps(const char* cpuinfo_name) { + char cpuinfo_line[512]; + FILE* f = fopen(cpuinfo_name, "re"); + if (!f) { + // Assume Neon if /proc/cpuinfo is unavailable. + // This will occur for Chrome sandbox for Pepper or Render process. + return kCpuHasNEON; + } + memset(cpuinfo_line, 0, sizeof(cpuinfo_line)); + // Neon is mandatory on AArch64. + int features = kCpuHasNEON; + while (fgets(cpuinfo_line, sizeof(cpuinfo_line), f)) { + if (memcmp(cpuinfo_line, "Features", 8) == 0) { + if (cpuinfo_search(cpuinfo_line, " asimddp", 8)) { + features |= kCpuHasNeonDotProd; + } + if (cpuinfo_search(cpuinfo_line, " i8mm", 5)) { + features |= kCpuHasNeonI8MM; + } + if (cpuinfo_search(cpuinfo_line, " sve", 4)) { + features |= kCpuHasSVE; + } + if (cpuinfo_search(cpuinfo_line, " sve2", 5)) { + features |= kCpuHasSVE2; + } + } + } + fclose(f); + return features; } LIBYUV_API SAFEBUFFERS int RiscvCpuCaps(const char* cpuinfo_name) { @@ -346,7 +378,7 @@ static SAFEBUFFERS int GetCpuFlags(void) { // So for aarch64, neon enabling is hard coded here. #endif #if defined(__aarch64__) - cpu_info = kCpuHasNEON; + cpu_info = AArch64CpuCaps("/proc/cpuinfo"); #else // Linux arm parse text file for neon detect. cpu_info = ArmCpuCaps("/proc/cpuinfo"); diff --git a/unit_test/cpu_test.cc b/unit_test/cpu_test.cc index 6e0fdef1b..b620e806e 100644 --- a/unit_test/cpu_test.cc +++ b/unit_test/cpu_test.cc @@ -252,21 +252,20 @@ static int FileExists(const char* file_name) { return 1; } -TEST_F(LibYUVBaseTest, TestLinuxNeon) { +TEST_F(LibYUVBaseTest, TestLinuxArm) { if (FileExists("../../unit_test/testdata/arm_v7.txt")) { printf("Note: testing to load \"../../unit_test/testdata/arm_v7.txt\"\n"); EXPECT_EQ(0, ArmCpuCaps("../../unit_test/testdata/arm_v7.txt")); EXPECT_EQ(kCpuHasNEON, ArmCpuCaps("../../unit_test/testdata/tegra3.txt")); - EXPECT_EQ(kCpuHasNEON, ArmCpuCaps("../../unit_test/testdata/juno.txt")); } else { printf("WARNING: unable to load \"../../unit_test/testdata/arm_v7.txt\"\n"); } -#if defined(__linux__) && defined(__ARM_NEON__) +#if defined(__linux__) && defined(__ARM_NEON__) && !defined(__aarch64__) if (FileExists("/proc/cpuinfo")) { if (kCpuHasNEON != ArmCpuCaps("/proc/cpuinfo")) { - // This can happen on ARM emulator but /proc/cpuinfo is from host. - printf("WARNING: Neon build enabled but CPU does not have NEON\n"); + // This can happen on Arm emulator but /proc/cpuinfo is from host. + printf("WARNING: Neon build enabled but CPU does not have Neon\n"); } } else { printf("WARNING: unable to load \"/proc/cpuinfo\"\n"); @@ -274,6 +273,24 @@ TEST_F(LibYUVBaseTest, TestLinuxNeon) { #endif } +TEST_F(LibYUVBaseTest, TestLinuxAArch64) { + if (FileExists("../../unit_test/testdata/juno.txt")) { + printf("Note: testing to load \"../../unit_test/testdata/juno.txt\"\n"); + + EXPECT_EQ(kCpuHasNEON, AArch64CpuCaps("../../unit_test/testdata/juno.txt")); + int v9_expected = kCpuHasNEON | kCpuHasNeonDotProd | kCpuHasNeonI8MM | + kCpuHasSVE | kCpuHasSVE2; + EXPECT_EQ(v9_expected, + AArch64CpuCaps("../../unit_test/testdata/cortex_a510.txt")); + EXPECT_EQ(v9_expected, + AArch64CpuCaps("../../unit_test/testdata/cortex_a715.txt")); + EXPECT_EQ(v9_expected, + AArch64CpuCaps("../../unit_test/testdata/cortex_x3.txt")); + } else { + printf("WARNING: unable to load \"../../unit_test/testdata/juno.txt\"\n"); + } +} + TEST_F(LibYUVBaseTest, TestLinuxMipsMsa) { if (FileExists("../../unit_test/testdata/mips.txt")) { printf("Note: testing to load \"../../unit_test/testdata/mips.txt\"\n"); diff --git a/unit_test/testdata/cortex_a510.txt b/unit_test/testdata/cortex_a510.txt new file mode 100644 index 000000000..8c2aeb4c3 --- /dev/null +++ b/unit_test/testdata/cortex_a510.txt @@ -0,0 +1,8 @@ +processor : 0 +BogoMIPS : 49.15 +Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm jscvt fcma lrcpc dcpop sha3 sm3 sm4 asimddp sha512 sve asimdfhm dit uscat ilrcpc flagm ssbs sb paca pacg dcpodp sve2 sveaes svepmull svebitperm svesha3 svesm4 flagm2 frint svei8mm svebf16 i8mm bti +CPU implementer : 0x41 +CPU architecture: 8 +CPU variant : 0x1 +CPU part : 0xd46 +CPU revision : 1 diff --git a/unit_test/testdata/cortex_a715.txt b/unit_test/testdata/cortex_a715.txt new file mode 100644 index 000000000..6d1f33afa --- /dev/null +++ b/unit_test/testdata/cortex_a715.txt @@ -0,0 +1,8 @@ +processor : 4 +BogoMIPS : 49.15 +Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm jscvt fcma lrcpc dcpop sha3 sm3 sm4 asimddp sha512 sve asimdfhm dit uscat ilrcpc flagm ssbs sb paca pacg dcpodp sve2 sveaes svepmull svebitperm svesha3 svesm4 flagm2 frint svei8mm svebf16 i8mm bti +CPU implementer : 0x41 +CPU architecture: 8 +CPU variant : 0x1 +CPU part : 0xd4d +CPU revision : 0 diff --git a/unit_test/testdata/cortex_x3.txt b/unit_test/testdata/cortex_x3.txt new file mode 100644 index 000000000..a63db856d --- /dev/null +++ b/unit_test/testdata/cortex_x3.txt @@ -0,0 +1,8 @@ +processor : 8 +BogoMIPS : 49.15 +Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm jscvt fcma lrcpc dcpop sha3 sm3 sm4 asimddp sha512 sve asimdfhm dit uscat ilrcpc flagm ssbs sb paca pacg dcpodp sve2 sveaes svepmull svebitperm svesha3 svesm4 flagm2 frint svei8mm svebf16 i8mm bti +CPU implementer : 0x41 +CPU architecture: 8 +CPU variant : 0x1 +CPU part : 0xd4e +CPU revision : 0 diff --git a/util/cpuid.c b/util/cpuid.c index b0b85f532..c070946f7 100644 --- a/util/cpuid.c +++ b/util/cpuid.c @@ -95,8 +95,16 @@ int main(int argc, const char* argv[]) { printf("Cpu Flags 0x%x\n", cpu_flags); if (has_arm) { int has_neon = TestCpuFlag(kCpuHasNEON); - printf("Has ARM 0x%x\n", has_arm); - printf("Has NEON 0x%x\n", has_neon); + int has_neon_dotprod = TestCpuFlag(kCpuHasNeonDotProd); + int has_neon_i8mm = TestCpuFlag(kCpuHasNeonI8MM); + int has_sve = TestCpuFlag(kCpuHasSVE); + int has_sve2 = TestCpuFlag(kCpuHasSVE2); + printf("Has Arm 0x%x\n", has_arm); + printf("Has Neon 0x%x\n", has_neon); + printf("Has Neon DotProd 0x%x\n", has_neon_dotprod); + printf("Has Neon I8MM 0x%x\n", has_neon_i8mm); + printf("Has SVE 0x%x\n", has_sve); + printf("Has SVE2 0x%x\n", has_sve2); } if (has_riscv) { int has_rvv = TestCpuFlag(kCpuHasRVV);