mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 08:46:47 +08:00
[AArch64] getauxval(AT_HWCAP{,2}) feature detection, attempt #2
This re-lands commit ba0bba5b2b7e38c9365a5d152b4efa0458863213.
Now with additional #ifdef __linux__ guards to avoid compiling
Linux-specific code on non-Linux platforms. Non-linux feature detection
will be added in a separate patch.
Using getauxval(AT_HWCAP{,2}) has the advantage of also working under
emulation where faking /proc/cpuinfo is not supported.
For the Chromium sandbox, getauxval is supported since API version 18.
The minimum supported API version at time of writing is 21 so we should
be able to use getauxval unconditionally. On the off-chance the call
fails it will return 0 and we will correctly fall-back to using only
Neon.
If we want to read the current CPU implementer or part number we could
do this by checking HWCAP_CPUID and then reading MIDR_EL1. This will
cause a kernel trap to emulate the EL1 read but should still be a lot
faster than reading the whole of /proc/cpuinfo.
Bug: libyuv:980
Change-Id: I8ae103ea7e32ef44db72f3c9896417bfe97ff5c5
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/5465590
Reviewed-by: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
parent
356232b687
commit
f5882ed1c5
@ -88,12 +88,19 @@ static __inline int TestCpuFlag(int test_flag) {
|
||||
LIBYUV_API
|
||||
int ArmCpuCaps(const char* cpuinfo_name);
|
||||
LIBYUV_API
|
||||
int AArch64CpuCaps(const char* cpuinfo_name);
|
||||
LIBYUV_API
|
||||
int MipsCpuCaps(const char* cpuinfo_name);
|
||||
LIBYUV_API
|
||||
int RiscvCpuCaps(const char* cpuinfo_name);
|
||||
|
||||
#if __linux__
|
||||
// On Linux, parse AArch64 features from getauxval(AT_HWCAP{,2}).
|
||||
LIBYUV_API
|
||||
int AArch64CpuCaps(unsigned long hwcap, unsigned long hwcap2);
|
||||
#else
|
||||
LIBYUV_API
|
||||
int AArch64CpuCaps();
|
||||
#endif
|
||||
|
||||
// For testing, allow CPU flags to be disabled.
|
||||
// ie MaskCpuFlags(~kCpuHasSSSE3) to disable SSSE3.
|
||||
// MaskCpuFlags(-1) to enable all cpu specific optimizations.
|
||||
|
||||
@ -23,6 +23,10 @@
|
||||
#include <stdio.h> // For fopen()
|
||||
#include <string.h>
|
||||
|
||||
#ifdef __linux__
|
||||
#include <sys/auxv.h> // For getauxval()
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
@ -162,37 +166,45 @@ LIBYUV_API SAFEBUFFERS int ArmCpuCaps(const char* cpuinfo_name) {
|
||||
return features;
|
||||
}
|
||||
|
||||
#ifdef __linux__
|
||||
// Define hwcap values ourselves: building with an old auxv header where these
|
||||
// hwcap values are not defined should not prevent features from being enabled.
|
||||
#define YUV_AARCH64_HWCAP_ASIMDDP (1 << 20)
|
||||
#define YUV_AARCH64_HWCAP_SVE (1 << 22)
|
||||
#define YUV_AARCH64_HWCAP2_SVE2 (1 << 1)
|
||||
#define YUV_AARCH64_HWCAP2_I8MM (1 << 13)
|
||||
|
||||
// For AArch64, but public to allow testing on any CPU.
|
||||
LIBYUV_API SAFEBUFFERS int AArch64CpuCaps(const char* cpuinfo_name) {
|
||||
char cpuinfo_line[512];
|
||||
FILE* f = fopen(cpuinfo_name, "re");
|
||||
if (!f) {
|
||||
// Assume Neon if /proc/cpuinfo is unavailable.
|
||||
// This will occur for Chrome sandbox for Pepper or Render process.
|
||||
return kCpuHasNEON;
|
||||
}
|
||||
memset(cpuinfo_line, 0, sizeof(cpuinfo_line));
|
||||
// Neon is mandatory on AArch64.
|
||||
LIBYUV_API SAFEBUFFERS int AArch64CpuCaps(unsigned long hwcap,
|
||||
unsigned long hwcap2) {
|
||||
// Neon is mandatory on AArch64, so enable regardless of hwcaps.
|
||||
int features = kCpuHasNEON;
|
||||
while (fgets(cpuinfo_line, sizeof(cpuinfo_line), f)) {
|
||||
if (memcmp(cpuinfo_line, "Features", 8) == 0) {
|
||||
if (cpuinfo_search(cpuinfo_line, " asimddp", 8)) {
|
||||
features |= kCpuHasNeonDotProd;
|
||||
}
|
||||
if (cpuinfo_search(cpuinfo_line, " i8mm", 5)) {
|
||||
features |= kCpuHasNeonI8MM;
|
||||
}
|
||||
if (cpuinfo_search(cpuinfo_line, " sve", 4)) {
|
||||
features |= kCpuHasSVE;
|
||||
}
|
||||
if (cpuinfo_search(cpuinfo_line, " sve2", 5)) {
|
||||
features |= kCpuHasSVE2;
|
||||
}
|
||||
}
|
||||
|
||||
if (hwcap & YUV_AARCH64_HWCAP_ASIMDDP) {
|
||||
features |= kCpuHasNeonDotProd;
|
||||
}
|
||||
if (hwcap2 & YUV_AARCH64_HWCAP2_I8MM) {
|
||||
features |= kCpuHasNeonI8MM;
|
||||
}
|
||||
if (hwcap & YUV_AARCH64_HWCAP_SVE) {
|
||||
features |= kCpuHasSVE;
|
||||
}
|
||||
if (hwcap2 & YUV_AARCH64_HWCAP2_SVE2) {
|
||||
features |= kCpuHasSVE2;
|
||||
}
|
||||
fclose(f);
|
||||
return features;
|
||||
}
|
||||
#else // !defined(__linux__)
|
||||
// For AArch64, but public to allow testing on any CPU.
|
||||
LIBYUV_API SAFEBUFFERS int AArch64CpuCaps() {
|
||||
// Neon is mandatory on AArch64, so enable unconditionally.
|
||||
int features = kCpuHasNEON;
|
||||
|
||||
// TODO(libyuv:980) support feature detection on non-Linux platforms.
|
||||
|
||||
return features;
|
||||
}
|
||||
#endif
|
||||
|
||||
LIBYUV_API SAFEBUFFERS int RiscvCpuCaps(const char* cpuinfo_name) {
|
||||
char cpuinfo_line[512];
|
||||
@ -368,18 +380,20 @@ static SAFEBUFFERS int GetCpuFlags(void) {
|
||||
cpu_info |= kCpuHasLOONGARCH;
|
||||
#endif
|
||||
#if defined(__arm__) || defined(__aarch64__)
|
||||
// gcc -mfpu=neon defines __ARM_NEON__
|
||||
// __ARM_NEON__ generates code that requires Neon. NaCL also requires Neon.
|
||||
// For Linux, /proc/cpuinfo can be tested but without that assume Neon.
|
||||
#if defined(__ARM_NEON__) || defined(__native_client__) || !defined(__linux__)
|
||||
cpu_info = kCpuHasNEON;
|
||||
// For aarch64(arm64), /proc/cpuinfo's feature is not complete, e.g. no neon
|
||||
// flag in it.
|
||||
// So for aarch64, neon enabling is hard coded here.
|
||||
#endif
|
||||
#if defined(__aarch64__)
|
||||
cpu_info = AArch64CpuCaps("/proc/cpuinfo");
|
||||
#if defined(__aarch64__) && defined(__linux__)
|
||||
// getauxval is supported since Android SDK version 18, minimum at time of
|
||||
// writing is 21, so should be safe to always use this. If getauxval is
|
||||
// somehow disabled then getauxval returns 0, which will leave Neon enabled
|
||||
// since Neon is mandatory on AArch64.
|
||||
unsigned long hwcap = getauxval(AT_HWCAP);
|
||||
unsigned long hwcap2 = getauxval(AT_HWCAP2);
|
||||
cpu_info = AArch64CpuCaps(hwcap, hwcap2);
|
||||
#elif defined(__aarch64__)
|
||||
cpu_info = AArch64CpuCaps();
|
||||
#else
|
||||
// gcc -mfpu=neon defines __ARM_NEON__
|
||||
// __ARM_NEON__ generates code that requires Neon. NaCL also requires Neon.
|
||||
// For Linux, /proc/cpuinfo can be tested but without that assume Neon.
|
||||
// Linux arm parse text file for neon detect.
|
||||
cpu_info = ArmCpuCaps("/proc/cpuinfo");
|
||||
#endif
|
||||
|
||||
@ -273,23 +273,24 @@ TEST_F(LibYUVBaseTest, TestLinuxArm) {
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef __linux__
|
||||
TEST_F(LibYUVBaseTest, TestLinuxAArch64) {
|
||||
if (FileExists("../../unit_test/testdata/juno.txt")) {
|
||||
printf("Note: testing to load \"../../unit_test/testdata/juno.txt\"\n");
|
||||
// Values taken from a Cortex-A57 machine, only Neon available.
|
||||
EXPECT_EQ(kCpuHasNEON, AArch64CpuCaps(0xffU, 0x0U));
|
||||
|
||||
EXPECT_EQ(kCpuHasNEON, AArch64CpuCaps("../../unit_test/testdata/juno.txt"));
|
||||
int v9_expected = kCpuHasNEON | kCpuHasNeonDotProd | kCpuHasNeonI8MM |
|
||||
kCpuHasSVE | kCpuHasSVE2;
|
||||
EXPECT_EQ(v9_expected,
|
||||
AArch64CpuCaps("../../unit_test/testdata/cortex_a510.txt"));
|
||||
EXPECT_EQ(v9_expected,
|
||||
AArch64CpuCaps("../../unit_test/testdata/cortex_a715.txt"));
|
||||
EXPECT_EQ(v9_expected,
|
||||
AArch64CpuCaps("../../unit_test/testdata/cortex_x3.txt"));
|
||||
} else {
|
||||
printf("WARNING: unable to load \"../../unit_test/testdata/juno.txt\"\n");
|
||||
}
|
||||
// Values taken from a Google Pixel 7.
|
||||
int expected = kCpuHasNEON | kCpuHasNeonDotProd;
|
||||
EXPECT_EQ(expected, AArch64CpuCaps(0x119fffU, 0x0U));
|
||||
|
||||
// Values taken from a Google Pixel 8.
|
||||
expected = kCpuHasNEON | kCpuHasNeonDotProd | kCpuHasNeonI8MM | kCpuHasSVE |
|
||||
kCpuHasSVE2;
|
||||
EXPECT_EQ(expected, AArch64CpuCaps(0x3fffffffU, 0x2f33fU));
|
||||
|
||||
// Values taken from a Neoverse N2 machine.
|
||||
EXPECT_EQ(expected, AArch64CpuCaps(0x3fffffffU, 0x2f3ffU));
|
||||
}
|
||||
#endif
|
||||
|
||||
TEST_F(LibYUVBaseTest, TestLinuxMipsMsa) {
|
||||
if (FileExists("../../unit_test/testdata/mips.txt")) {
|
||||
|
||||
8
unit_test/testdata/cortex_a510.txt
vendored
8
unit_test/testdata/cortex_a510.txt
vendored
@ -1,8 +0,0 @@
|
||||
processor : 0
|
||||
BogoMIPS : 49.15
|
||||
Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm jscvt fcma lrcpc dcpop sha3 sm3 sm4 asimddp sha512 sve asimdfhm dit uscat ilrcpc flagm ssbs sb paca pacg dcpodp sve2 sveaes svepmull svebitperm svesha3 svesm4 flagm2 frint svei8mm svebf16 i8mm bti
|
||||
CPU implementer : 0x41
|
||||
CPU architecture: 8
|
||||
CPU variant : 0x1
|
||||
CPU part : 0xd46
|
||||
CPU revision : 1
|
||||
8
unit_test/testdata/cortex_a715.txt
vendored
8
unit_test/testdata/cortex_a715.txt
vendored
@ -1,8 +0,0 @@
|
||||
processor : 4
|
||||
BogoMIPS : 49.15
|
||||
Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm jscvt fcma lrcpc dcpop sha3 sm3 sm4 asimddp sha512 sve asimdfhm dit uscat ilrcpc flagm ssbs sb paca pacg dcpodp sve2 sveaes svepmull svebitperm svesha3 svesm4 flagm2 frint svei8mm svebf16 i8mm bti
|
||||
CPU implementer : 0x41
|
||||
CPU architecture: 8
|
||||
CPU variant : 0x1
|
||||
CPU part : 0xd4d
|
||||
CPU revision : 0
|
||||
8
unit_test/testdata/cortex_x3.txt
vendored
8
unit_test/testdata/cortex_x3.txt
vendored
@ -1,8 +0,0 @@
|
||||
processor : 8
|
||||
BogoMIPS : 49.15
|
||||
Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm jscvt fcma lrcpc dcpop sha3 sm3 sm4 asimddp sha512 sve asimdfhm dit uscat ilrcpc flagm ssbs sb paca pacg dcpodp sve2 sveaes svepmull svebitperm svesha3 svesm4 flagm2 frint svei8mm svebf16 i8mm bti
|
||||
CPU implementer : 0x41
|
||||
CPU architecture: 8
|
||||
CPU variant : 0x1
|
||||
CPU part : 0xd4e
|
||||
CPU revision : 0
|
||||
15
unit_test/testdata/juno.txt
vendored
15
unit_test/testdata/juno.txt
vendored
@ -1,15 +0,0 @@
|
||||
Processor : AArch64 Processor rev 0 (aarch64)
|
||||
processor : 0
|
||||
processor : 1
|
||||
processor : 2
|
||||
processor : 3
|
||||
processor : 4
|
||||
processor : 5
|
||||
Features : fp asimd evtstrm aes pmull sha1 sha2 crc32
|
||||
CPU implementer : 0x41
|
||||
CPU architecture: AArch64
|
||||
CPU variant : 0x0
|
||||
CPU part : 0xd07
|
||||
CPU revision : 0
|
||||
|
||||
Hardware : Juno
|
||||
Loading…
x
Reference in New Issue
Block a user