[AArch64] Enable feature detection on Windows and Apple Silicon

Using the platform-specific functions IsProcessorFeaturePresent and
sysctlbyname to check individual features.

Bug: libyuv:980
Change-Id: I7971238ca72e5df862c30c2e65331c46dc634074
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/5465591
Reviewed-by: Frank Barchard <fbarchard@chromium.org>
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
George Steed 2024-04-19 11:10:24 +01:00 committed by libyuv LUCI CQ
parent a114f85e50
commit ee830a5f77
3 changed files with 65 additions and 4 deletions

View File

@ -92,6 +92,7 @@ int MipsCpuCaps(const char* cpuinfo_name);
LIBYUV_API LIBYUV_API
int RiscvCpuCaps(const char* cpuinfo_name); int RiscvCpuCaps(const char* cpuinfo_name);
#ifdef __aarch64__
#if __linux__ #if __linux__
// On Linux, parse AArch64 features from getauxval(AT_HWCAP{,2}). // On Linux, parse AArch64 features from getauxval(AT_HWCAP{,2}).
LIBYUV_API LIBYUV_API
@ -100,6 +101,7 @@ int AArch64CpuCaps(unsigned long hwcap, unsigned long hwcap2);
LIBYUV_API LIBYUV_API
int AArch64CpuCaps(); int AArch64CpuCaps();
#endif #endif
#endif
// For testing, allow CPU flags to be disabled. // For testing, allow CPU flags to be disabled.
// ie MaskCpuFlags(~kCpuHasSSSE3) to disable SSSE3. // ie MaskCpuFlags(~kCpuHasSSSE3) to disable SSSE3.

View File

@ -23,10 +23,22 @@
#include <stdio.h> // For fopen() #include <stdio.h> // For fopen()
#include <string.h> #include <string.h>
#ifdef __linux__ #if defined(__linux__) && defined(__aarch64__)
#include <sys/auxv.h> // For getauxval() #include <sys/auxv.h> // For getauxval()
#endif #endif
#if defined(_WIN32) && defined(__aarch64__)
#undef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN
#undef WIN32_EXTRA_LEAN
#define WIN32_EXTRA_LEAN
#include <windows.h> // For IsProcessorFeaturePresent()
#endif
#if defined(__APPLE__) && defined(__aarch64__)
#include <sys/sysctl.h> // For sysctlbyname()
#endif
#ifdef __cplusplus #ifdef __cplusplus
namespace libyuv { namespace libyuv {
extern "C" { extern "C" {
@ -166,6 +178,7 @@ LIBYUV_API SAFEBUFFERS int ArmCpuCaps(const char* cpuinfo_name) {
return features; return features;
} }
#ifdef __aarch64__
#ifdef __linux__ #ifdef __linux__
// Define hwcap values ourselves: building with an old auxv header where these // Define hwcap values ourselves: building with an old auxv header where these
// hwcap values are not defined should not prevent features from being enabled. // hwcap values are not defined should not prevent features from being enabled.
@ -194,17 +207,63 @@ LIBYUV_API SAFEBUFFERS int AArch64CpuCaps(unsigned long hwcap,
} }
return features; return features;
} }
#else // !defined(__linux__)
#elif defined(_WIN32)
// For AArch64, but public to allow testing on any CPU. // For AArch64, but public to allow testing on any CPU.
LIBYUV_API SAFEBUFFERS int AArch64CpuCaps() { LIBYUV_API SAFEBUFFERS int AArch64CpuCaps() {
// Neon is mandatory on AArch64, so enable unconditionally. // Neon is mandatory on AArch64, so enable unconditionally.
int features = kCpuHasNEON; int features = kCpuHasNEON;
// TODO(libyuv:980) support feature detection on non-Linux platforms. // For more information on IsProcessorFeaturePresent(), see:
// https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent#parameters
#ifdef PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE
if (IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE)) {
features |= kCpuHasNeonDotProd;
}
#endif
// No Neon I8MM or SVE feature detection available here at time of writing.
return features;
}
#elif defined(__APPLE__)
static bool have_feature(const char* feature) {
// For more information on sysctlbyname(), see:
// https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics
int64_t feature_present = 0;
size_t size = sizeof(feature_present);
if (sysctlbyname(feature, &feature_present, &size, NULL, 0) != 0) {
return false;
}
return feature_present;
}
// For AArch64, but public to allow testing on any CPU.
LIBYUV_API SAFEBUFFERS int AArch64CpuCaps() {
// Neon is mandatory on AArch64, so enable unconditionally.
int features = kCpuHasNEON;
if (have_feature("hw.optional.arm.FEAT_DotProd")) {
features |= kCpuHasNeonDotProd;
}
if (have_feature("hw.optional.arm.FEAT_I8MM")) {
features |= kCpuHasNeonI8MM;
}
// No SVE feature detection available here at time of writing.
return features;
}
#else // !defined(__linux__) && !defined(_WIN32) && !defined(__APPLE__)
// For AArch64, but public to allow testing on any CPU.
LIBYUV_API SAFEBUFFERS int AArch64CpuCaps() {
// Neon is mandatory on AArch64, so enable unconditionally.
int features = kCpuHasNEON;
// TODO(libyuv:980) support feature detection on other platforms.
return features; return features;
} }
#endif #endif
#endif // defined(__aarch64__)
LIBYUV_API SAFEBUFFERS int RiscvCpuCaps(const char* cpuinfo_name) { LIBYUV_API SAFEBUFFERS int RiscvCpuCaps(const char* cpuinfo_name) {
char cpuinfo_line[512]; char cpuinfo_line[512];

View File

@ -273,7 +273,7 @@ TEST_F(LibYUVBaseTest, TestLinuxArm) {
#endif #endif
} }
#ifdef __linux__ #if defined(__linux__) && defined(__aarch64__)
TEST_F(LibYUVBaseTest, TestLinuxAArch64) { TEST_F(LibYUVBaseTest, TestLinuxAArch64) {
// Values taken from a Cortex-A57 machine, only Neon available. // Values taken from a Cortex-A57 machine, only Neon available.
EXPECT_EQ(kCpuHasNEON, AArch64CpuCaps(0xffU, 0x0U)); EXPECT_EQ(kCpuHasNEON, AArch64CpuCaps(0xffU, 0x0U));