[AArch64] Enable detection of additional architecture features

In particular there are a few extensions that are interesting for us:

* FEAT_DotProd adds 4-way dot-product instructions which are useful in
  e.g. ARGBToY.

* FEAT_I8MM adds additional mixed-sign dot-product instructions which
  could be useful in e.g. ARGBToUV.

* FEAT_SVE and FEAT_SVE2 add support for the Scalable Vector Extension,
  which adds an array of new instructions including new widening loads
  and narrowing stores for dealing with mixed-width integer arithmetic
  efficiently and predication for avoiding the need for "any" cleanup
  loops.

This commit simply adds support for detecting the presence of these
features by extending the existing /proc/cpuinfo parsing, splitting it
into separate Arm and AArch64 functions for simplicity.

Since we have no space left in the bitset entries between Arm and X86
entries, we reuse some of the X86 entries for new AArch64 extensions.
This doesn't seem obviously problematic as long as we avoid setting
kCpuHasX86.

Bug: libyuv:973
Bug: libyuv:977
Change-Id: I8e256225fe12a4ba5da24460f54061e16eab6c57
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/5378150
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
Reviewed-by: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
George Steed 2024-03-15 14:19:03 +00:00 committed by libyuv LUCI CQ
parent 4e8a843bfc
commit a038cda7b8
7 changed files with 108 additions and 20 deletions

View File

@ -21,9 +21,14 @@ extern "C" {
// Internal flag to indicate cpuid requires initialization.
static const int kCpuInitialized = 0x1;
// These flags are only valid on ARM processors.
// These flags are only valid on Arm processors.
static const int kCpuHasARM = 0x2;
static const int kCpuHasNEON = 0x4;
// Leave a gap to avoid setting kCpuHasX86.
static const int kCpuHasNeonDotProd = 0x10;
static const int kCpuHasNeonI8MM = 0x20;
static const int kCpuHasSVE = 0x40;
static const int kCpuHasSVE2 = 0x80;
// These flags are only valid on x86 processors.
static const int kCpuHasX86 = 0x8;
@ -83,6 +88,8 @@ static __inline int TestCpuFlag(int test_flag) {
LIBYUV_API
int ArmCpuCaps(const char* cpuinfo_name);
LIBYUV_API
int AArch64CpuCaps(const char* cpuinfo_name);
LIBYUV_API
int MipsCpuCaps(const char* cpuinfo_name);
LIBYUV_API
int RiscvCpuCaps(const char* cpuinfo_name);

View File

@ -132,6 +132,13 @@ static int GetXCR0() {
#pragma optimize("g", on)
#endif
static int cpuinfo_search(const char* cpuinfo_line,
const char* needle,
int needle_len) {
const char* p = strstr(cpuinfo_line, needle);
return p && (p[needle_len] == ' ' || p[needle_len] == '\n');
}
// Based on libvpx arm_cpudetect.c
// For Arm, but public to allow testing on any CPU
LIBYUV_API SAFEBUFFERS int ArmCpuCaps(const char* cpuinfo_name) {
@ -143,23 +150,48 @@ LIBYUV_API SAFEBUFFERS int ArmCpuCaps(const char* cpuinfo_name) {
return kCpuHasNEON;
}
memset(cpuinfo_line, 0, sizeof(cpuinfo_line));
int features = 0;
while (fgets(cpuinfo_line, sizeof(cpuinfo_line), f)) {
if (memcmp(cpuinfo_line, "Features", 8) == 0) {
char* p = strstr(cpuinfo_line, " neon");
if (p && (p[5] == ' ' || p[5] == '\n')) {
if (cpuinfo_search(cpuinfo_line, " neon", 5)) {
features |= kCpuHasNEON;
}
}
}
fclose(f);
return features;
}
// For AArch64, but public to allow testing on any CPU.
LIBYUV_API SAFEBUFFERS int AArch64CpuCaps(const char* cpuinfo_name) {
char cpuinfo_line[512];
FILE* f = fopen(cpuinfo_name, "re");
if (!f) {
// Assume Neon if /proc/cpuinfo is unavailable.
// This will occur for Chrome sandbox for Pepper or Render process.
return kCpuHasNEON;
}
// aarch64 uses asimd for Neon.
p = strstr(cpuinfo_line, " asimd");
if (p) {
fclose(f);
return kCpuHasNEON;
memset(cpuinfo_line, 0, sizeof(cpuinfo_line));
// Neon is mandatory on AArch64.
int features = kCpuHasNEON;
while (fgets(cpuinfo_line, sizeof(cpuinfo_line), f)) {
if (memcmp(cpuinfo_line, "Features", 8) == 0) {
if (cpuinfo_search(cpuinfo_line, " asimddp", 8)) {
features |= kCpuHasNeonDotProd;
}
if (cpuinfo_search(cpuinfo_line, " i8mm", 5)) {
features |= kCpuHasNeonI8MM;
}
if (cpuinfo_search(cpuinfo_line, " sve", 4)) {
features |= kCpuHasSVE;
}
if (cpuinfo_search(cpuinfo_line, " sve2", 5)) {
features |= kCpuHasSVE2;
}
}
}
fclose(f);
return 0;
return features;
}
LIBYUV_API SAFEBUFFERS int RiscvCpuCaps(const char* cpuinfo_name) {
@ -346,7 +378,7 @@ static SAFEBUFFERS int GetCpuFlags(void) {
// So for aarch64, neon enabling is hard coded here.
#endif
#if defined(__aarch64__)
cpu_info = kCpuHasNEON;
cpu_info = AArch64CpuCaps("/proc/cpuinfo");
#else
// Linux arm parse text file for neon detect.
cpu_info = ArmCpuCaps("/proc/cpuinfo");

View File

@ -252,21 +252,20 @@ static int FileExists(const char* file_name) {
return 1;
}
TEST_F(LibYUVBaseTest, TestLinuxNeon) {
TEST_F(LibYUVBaseTest, TestLinuxArm) {
if (FileExists("../../unit_test/testdata/arm_v7.txt")) {
printf("Note: testing to load \"../../unit_test/testdata/arm_v7.txt\"\n");
EXPECT_EQ(0, ArmCpuCaps("../../unit_test/testdata/arm_v7.txt"));
EXPECT_EQ(kCpuHasNEON, ArmCpuCaps("../../unit_test/testdata/tegra3.txt"));
EXPECT_EQ(kCpuHasNEON, ArmCpuCaps("../../unit_test/testdata/juno.txt"));
} else {
printf("WARNING: unable to load \"../../unit_test/testdata/arm_v7.txt\"\n");
}
#if defined(__linux__) && defined(__ARM_NEON__)
#if defined(__linux__) && defined(__ARM_NEON__) && !defined(__aarch64__)
if (FileExists("/proc/cpuinfo")) {
if (kCpuHasNEON != ArmCpuCaps("/proc/cpuinfo")) {
// This can happen on ARM emulator but /proc/cpuinfo is from host.
printf("WARNING: Neon build enabled but CPU does not have NEON\n");
// This can happen on Arm emulator but /proc/cpuinfo is from host.
printf("WARNING: Neon build enabled but CPU does not have Neon\n");
}
} else {
printf("WARNING: unable to load \"/proc/cpuinfo\"\n");
@ -274,6 +273,24 @@ TEST_F(LibYUVBaseTest, TestLinuxNeon) {
#endif
}
TEST_F(LibYUVBaseTest, TestLinuxAArch64) {
if (FileExists("../../unit_test/testdata/juno.txt")) {
printf("Note: testing to load \"../../unit_test/testdata/juno.txt\"\n");
EXPECT_EQ(kCpuHasNEON, AArch64CpuCaps("../../unit_test/testdata/juno.txt"));
int v9_expected = kCpuHasNEON | kCpuHasNeonDotProd | kCpuHasNeonI8MM |
kCpuHasSVE | kCpuHasSVE2;
EXPECT_EQ(v9_expected,
AArch64CpuCaps("../../unit_test/testdata/cortex_a510.txt"));
EXPECT_EQ(v9_expected,
AArch64CpuCaps("../../unit_test/testdata/cortex_a715.txt"));
EXPECT_EQ(v9_expected,
AArch64CpuCaps("../../unit_test/testdata/cortex_x3.txt"));
} else {
printf("WARNING: unable to load \"../../unit_test/testdata/juno.txt\"\n");
}
}
TEST_F(LibYUVBaseTest, TestLinuxMipsMsa) {
if (FileExists("../../unit_test/testdata/mips.txt")) {
printf("Note: testing to load \"../../unit_test/testdata/mips.txt\"\n");

8
unit_test/testdata/cortex_a510.txt vendored Normal file
View File

@ -0,0 +1,8 @@
processor : 0
BogoMIPS : 49.15
Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm jscvt fcma lrcpc dcpop sha3 sm3 sm4 asimddp sha512 sve asimdfhm dit uscat ilrcpc flagm ssbs sb paca pacg dcpodp sve2 sveaes svepmull svebitperm svesha3 svesm4 flagm2 frint svei8mm svebf16 i8mm bti
CPU implementer : 0x41
CPU architecture: 8
CPU variant : 0x1
CPU part : 0xd46
CPU revision : 1

8
unit_test/testdata/cortex_a715.txt vendored Normal file
View File

@ -0,0 +1,8 @@
processor : 4
BogoMIPS : 49.15
Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm jscvt fcma lrcpc dcpop sha3 sm3 sm4 asimddp sha512 sve asimdfhm dit uscat ilrcpc flagm ssbs sb paca pacg dcpodp sve2 sveaes svepmull svebitperm svesha3 svesm4 flagm2 frint svei8mm svebf16 i8mm bti
CPU implementer : 0x41
CPU architecture: 8
CPU variant : 0x1
CPU part : 0xd4d
CPU revision : 0

8
unit_test/testdata/cortex_x3.txt vendored Normal file
View File

@ -0,0 +1,8 @@
processor : 8
BogoMIPS : 49.15
Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm jscvt fcma lrcpc dcpop sha3 sm3 sm4 asimddp sha512 sve asimdfhm dit uscat ilrcpc flagm ssbs sb paca pacg dcpodp sve2 sveaes svepmull svebitperm svesha3 svesm4 flagm2 frint svei8mm svebf16 i8mm bti
CPU implementer : 0x41
CPU architecture: 8
CPU variant : 0x1
CPU part : 0xd4e
CPU revision : 0

View File

@ -95,8 +95,16 @@ int main(int argc, const char* argv[]) {
printf("Cpu Flags 0x%x\n", cpu_flags);
if (has_arm) {
int has_neon = TestCpuFlag(kCpuHasNEON);
printf("Has ARM 0x%x\n", has_arm);
printf("Has NEON 0x%x\n", has_neon);
int has_neon_dotprod = TestCpuFlag(kCpuHasNeonDotProd);
int has_neon_i8mm = TestCpuFlag(kCpuHasNeonI8MM);
int has_sve = TestCpuFlag(kCpuHasSVE);
int has_sve2 = TestCpuFlag(kCpuHasSVE2);
printf("Has Arm 0x%x\n", has_arm);
printf("Has Neon 0x%x\n", has_neon);
printf("Has Neon DotProd 0x%x\n", has_neon_dotprod);
printf("Has Neon I8MM 0x%x\n", has_neon_i8mm);
printf("Has SVE 0x%x\n", has_sve);
printf("Has SVE2 0x%x\n", has_sve2);
}
if (has_riscv) {
int has_rvv = TestCpuFlag(kCpuHasRVV);