mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
[AArch64] Enable detection of additional architecture features
In particular there are a few extensions that are interesting for us: * FEAT_DotProd adds 4-way dot-product instructions which are useful in e.g. ARGBToY. * FEAT_I8MM adds additional mixed-sign dot-product instructions which could be useful in e.g. ARGBToUV. * FEAT_SVE and FEAT_SVE2 add support for the Scalable Vector Extension, which adds an array of new instructions including new widening loads and narrowing stores for dealing with mixed-width integer arithmetic efficiently and predication for avoiding the need for "any" cleanup loops. This commit simply adds support for detecting the presence of these features by extending the existing /proc/cpuinfo parsing, splitting it into separate Arm and AArch64 functions for simplicity. Since we have no space left in the bitset entries between Arm and X86 entries, we reuse some of the X86 entries for new AArch64 extensions. This doesn't seem obviously problematic as long as we avoid setting kCpuHasX86. Bug: libyuv:973 Bug: libyuv:977 Change-Id: I8e256225fe12a4ba5da24460f54061e16eab6c57 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/5378150 Commit-Queue: Frank Barchard <fbarchard@chromium.org> Reviewed-by: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
parent
4e8a843bfc
commit
a038cda7b8
@ -21,9 +21,14 @@ extern "C" {
|
||||
// Internal flag to indicate cpuid requires initialization.
|
||||
static const int kCpuInitialized = 0x1;
|
||||
|
||||
// These flags are only valid on ARM processors.
|
||||
// These flags are only valid on Arm processors.
|
||||
static const int kCpuHasARM = 0x2;
|
||||
static const int kCpuHasNEON = 0x4;
|
||||
// Leave a gap to avoid setting kCpuHasX86.
|
||||
static const int kCpuHasNeonDotProd = 0x10;
|
||||
static const int kCpuHasNeonI8MM = 0x20;
|
||||
static const int kCpuHasSVE = 0x40;
|
||||
static const int kCpuHasSVE2 = 0x80;
|
||||
|
||||
// These flags are only valid on x86 processors.
|
||||
static const int kCpuHasX86 = 0x8;
|
||||
@ -83,6 +88,8 @@ static __inline int TestCpuFlag(int test_flag) {
|
||||
LIBYUV_API
|
||||
int ArmCpuCaps(const char* cpuinfo_name);
|
||||
LIBYUV_API
|
||||
int AArch64CpuCaps(const char* cpuinfo_name);
|
||||
LIBYUV_API
|
||||
int MipsCpuCaps(const char* cpuinfo_name);
|
||||
LIBYUV_API
|
||||
int RiscvCpuCaps(const char* cpuinfo_name);
|
||||
|
||||
@ -132,6 +132,13 @@ static int GetXCR0() {
|
||||
#pragma optimize("g", on)
|
||||
#endif
|
||||
|
||||
static int cpuinfo_search(const char* cpuinfo_line,
|
||||
const char* needle,
|
||||
int needle_len) {
|
||||
const char* p = strstr(cpuinfo_line, needle);
|
||||
return p && (p[needle_len] == ' ' || p[needle_len] == '\n');
|
||||
}
|
||||
|
||||
// Based on libvpx arm_cpudetect.c
|
||||
// For Arm, but public to allow testing on any CPU
|
||||
LIBYUV_API SAFEBUFFERS int ArmCpuCaps(const char* cpuinfo_name) {
|
||||
@ -143,23 +150,48 @@ LIBYUV_API SAFEBUFFERS int ArmCpuCaps(const char* cpuinfo_name) {
|
||||
return kCpuHasNEON;
|
||||
}
|
||||
memset(cpuinfo_line, 0, sizeof(cpuinfo_line));
|
||||
int features = 0;
|
||||
while (fgets(cpuinfo_line, sizeof(cpuinfo_line), f)) {
|
||||
if (memcmp(cpuinfo_line, "Features", 8) == 0) {
|
||||
char* p = strstr(cpuinfo_line, " neon");
|
||||
if (p && (p[5] == ' ' || p[5] == '\n')) {
|
||||
if (cpuinfo_search(cpuinfo_line, " neon", 5)) {
|
||||
features |= kCpuHasNEON;
|
||||
}
|
||||
}
|
||||
}
|
||||
fclose(f);
|
||||
return features;
|
||||
}
|
||||
|
||||
// For AArch64, but public to allow testing on any CPU.
|
||||
LIBYUV_API SAFEBUFFERS int AArch64CpuCaps(const char* cpuinfo_name) {
|
||||
char cpuinfo_line[512];
|
||||
FILE* f = fopen(cpuinfo_name, "re");
|
||||
if (!f) {
|
||||
// Assume Neon if /proc/cpuinfo is unavailable.
|
||||
// This will occur for Chrome sandbox for Pepper or Render process.
|
||||
return kCpuHasNEON;
|
||||
}
|
||||
// aarch64 uses asimd for Neon.
|
||||
p = strstr(cpuinfo_line, " asimd");
|
||||
if (p) {
|
||||
fclose(f);
|
||||
return kCpuHasNEON;
|
||||
memset(cpuinfo_line, 0, sizeof(cpuinfo_line));
|
||||
// Neon is mandatory on AArch64.
|
||||
int features = kCpuHasNEON;
|
||||
while (fgets(cpuinfo_line, sizeof(cpuinfo_line), f)) {
|
||||
if (memcmp(cpuinfo_line, "Features", 8) == 0) {
|
||||
if (cpuinfo_search(cpuinfo_line, " asimddp", 8)) {
|
||||
features |= kCpuHasNeonDotProd;
|
||||
}
|
||||
if (cpuinfo_search(cpuinfo_line, " i8mm", 5)) {
|
||||
features |= kCpuHasNeonI8MM;
|
||||
}
|
||||
if (cpuinfo_search(cpuinfo_line, " sve", 4)) {
|
||||
features |= kCpuHasSVE;
|
||||
}
|
||||
if (cpuinfo_search(cpuinfo_line, " sve2", 5)) {
|
||||
features |= kCpuHasSVE2;
|
||||
}
|
||||
}
|
||||
}
|
||||
fclose(f);
|
||||
return 0;
|
||||
return features;
|
||||
}
|
||||
|
||||
LIBYUV_API SAFEBUFFERS int RiscvCpuCaps(const char* cpuinfo_name) {
|
||||
@ -346,7 +378,7 @@ static SAFEBUFFERS int GetCpuFlags(void) {
|
||||
// So for aarch64, neon enabling is hard coded here.
|
||||
#endif
|
||||
#if defined(__aarch64__)
|
||||
cpu_info = kCpuHasNEON;
|
||||
cpu_info = AArch64CpuCaps("/proc/cpuinfo");
|
||||
#else
|
||||
// Linux arm parse text file for neon detect.
|
||||
cpu_info = ArmCpuCaps("/proc/cpuinfo");
|
||||
|
||||
@ -252,21 +252,20 @@ static int FileExists(const char* file_name) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
TEST_F(LibYUVBaseTest, TestLinuxNeon) {
|
||||
TEST_F(LibYUVBaseTest, TestLinuxArm) {
|
||||
if (FileExists("../../unit_test/testdata/arm_v7.txt")) {
|
||||
printf("Note: testing to load \"../../unit_test/testdata/arm_v7.txt\"\n");
|
||||
|
||||
EXPECT_EQ(0, ArmCpuCaps("../../unit_test/testdata/arm_v7.txt"));
|
||||
EXPECT_EQ(kCpuHasNEON, ArmCpuCaps("../../unit_test/testdata/tegra3.txt"));
|
||||
EXPECT_EQ(kCpuHasNEON, ArmCpuCaps("../../unit_test/testdata/juno.txt"));
|
||||
} else {
|
||||
printf("WARNING: unable to load \"../../unit_test/testdata/arm_v7.txt\"\n");
|
||||
}
|
||||
#if defined(__linux__) && defined(__ARM_NEON__)
|
||||
#if defined(__linux__) && defined(__ARM_NEON__) && !defined(__aarch64__)
|
||||
if (FileExists("/proc/cpuinfo")) {
|
||||
if (kCpuHasNEON != ArmCpuCaps("/proc/cpuinfo")) {
|
||||
// This can happen on ARM emulator but /proc/cpuinfo is from host.
|
||||
printf("WARNING: Neon build enabled but CPU does not have NEON\n");
|
||||
// This can happen on Arm emulator but /proc/cpuinfo is from host.
|
||||
printf("WARNING: Neon build enabled but CPU does not have Neon\n");
|
||||
}
|
||||
} else {
|
||||
printf("WARNING: unable to load \"/proc/cpuinfo\"\n");
|
||||
@ -274,6 +273,24 @@ TEST_F(LibYUVBaseTest, TestLinuxNeon) {
|
||||
#endif
|
||||
}
|
||||
|
||||
TEST_F(LibYUVBaseTest, TestLinuxAArch64) {
|
||||
if (FileExists("../../unit_test/testdata/juno.txt")) {
|
||||
printf("Note: testing to load \"../../unit_test/testdata/juno.txt\"\n");
|
||||
|
||||
EXPECT_EQ(kCpuHasNEON, AArch64CpuCaps("../../unit_test/testdata/juno.txt"));
|
||||
int v9_expected = kCpuHasNEON | kCpuHasNeonDotProd | kCpuHasNeonI8MM |
|
||||
kCpuHasSVE | kCpuHasSVE2;
|
||||
EXPECT_EQ(v9_expected,
|
||||
AArch64CpuCaps("../../unit_test/testdata/cortex_a510.txt"));
|
||||
EXPECT_EQ(v9_expected,
|
||||
AArch64CpuCaps("../../unit_test/testdata/cortex_a715.txt"));
|
||||
EXPECT_EQ(v9_expected,
|
||||
AArch64CpuCaps("../../unit_test/testdata/cortex_x3.txt"));
|
||||
} else {
|
||||
printf("WARNING: unable to load \"../../unit_test/testdata/juno.txt\"\n");
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(LibYUVBaseTest, TestLinuxMipsMsa) {
|
||||
if (FileExists("../../unit_test/testdata/mips.txt")) {
|
||||
printf("Note: testing to load \"../../unit_test/testdata/mips.txt\"\n");
|
||||
|
||||
8
unit_test/testdata/cortex_a510.txt
vendored
Normal file
8
unit_test/testdata/cortex_a510.txt
vendored
Normal file
@ -0,0 +1,8 @@
|
||||
processor : 0
|
||||
BogoMIPS : 49.15
|
||||
Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm jscvt fcma lrcpc dcpop sha3 sm3 sm4 asimddp sha512 sve asimdfhm dit uscat ilrcpc flagm ssbs sb paca pacg dcpodp sve2 sveaes svepmull svebitperm svesha3 svesm4 flagm2 frint svei8mm svebf16 i8mm bti
|
||||
CPU implementer : 0x41
|
||||
CPU architecture: 8
|
||||
CPU variant : 0x1
|
||||
CPU part : 0xd46
|
||||
CPU revision : 1
|
||||
8
unit_test/testdata/cortex_a715.txt
vendored
Normal file
8
unit_test/testdata/cortex_a715.txt
vendored
Normal file
@ -0,0 +1,8 @@
|
||||
processor : 4
|
||||
BogoMIPS : 49.15
|
||||
Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm jscvt fcma lrcpc dcpop sha3 sm3 sm4 asimddp sha512 sve asimdfhm dit uscat ilrcpc flagm ssbs sb paca pacg dcpodp sve2 sveaes svepmull svebitperm svesha3 svesm4 flagm2 frint svei8mm svebf16 i8mm bti
|
||||
CPU implementer : 0x41
|
||||
CPU architecture: 8
|
||||
CPU variant : 0x1
|
||||
CPU part : 0xd4d
|
||||
CPU revision : 0
|
||||
8
unit_test/testdata/cortex_x3.txt
vendored
Normal file
8
unit_test/testdata/cortex_x3.txt
vendored
Normal file
@ -0,0 +1,8 @@
|
||||
processor : 8
|
||||
BogoMIPS : 49.15
|
||||
Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm jscvt fcma lrcpc dcpop sha3 sm3 sm4 asimddp sha512 sve asimdfhm dit uscat ilrcpc flagm ssbs sb paca pacg dcpodp sve2 sveaes svepmull svebitperm svesha3 svesm4 flagm2 frint svei8mm svebf16 i8mm bti
|
||||
CPU implementer : 0x41
|
||||
CPU architecture: 8
|
||||
CPU variant : 0x1
|
||||
CPU part : 0xd4e
|
||||
CPU revision : 0
|
||||
12
util/cpuid.c
12
util/cpuid.c
@ -95,8 +95,16 @@ int main(int argc, const char* argv[]) {
|
||||
printf("Cpu Flags 0x%x\n", cpu_flags);
|
||||
if (has_arm) {
|
||||
int has_neon = TestCpuFlag(kCpuHasNEON);
|
||||
printf("Has ARM 0x%x\n", has_arm);
|
||||
printf("Has NEON 0x%x\n", has_neon);
|
||||
int has_neon_dotprod = TestCpuFlag(kCpuHasNeonDotProd);
|
||||
int has_neon_i8mm = TestCpuFlag(kCpuHasNeonI8MM);
|
||||
int has_sve = TestCpuFlag(kCpuHasSVE);
|
||||
int has_sve2 = TestCpuFlag(kCpuHasSVE2);
|
||||
printf("Has Arm 0x%x\n", has_arm);
|
||||
printf("Has Neon 0x%x\n", has_neon);
|
||||
printf("Has Neon DotProd 0x%x\n", has_neon_dotprod);
|
||||
printf("Has Neon I8MM 0x%x\n", has_neon_i8mm);
|
||||
printf("Has SVE 0x%x\n", has_sve);
|
||||
printf("Has SVE2 0x%x\n", has_sve2);
|
||||
}
|
||||
if (has_riscv) {
|
||||
int has_rvv = TestCpuFlag(kCpuHasRVV);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user