diff --git a/include/libyuv/cpu_id.h b/include/libyuv/cpu_id.h index af8e0f5e0..a352f22f6 100644 --- a/include/libyuv/cpu_id.h +++ b/include/libyuv/cpu_id.h @@ -92,6 +92,7 @@ int MipsCpuCaps(const char* cpuinfo_name); LIBYUV_API int RiscvCpuCaps(const char* cpuinfo_name); +#ifdef __aarch64__ #if __linux__ // On Linux, parse AArch64 features from getauxval(AT_HWCAP{,2}). LIBYUV_API @@ -100,6 +101,7 @@ int AArch64CpuCaps(unsigned long hwcap, unsigned long hwcap2); LIBYUV_API int AArch64CpuCaps(); #endif +#endif // For testing, allow CPU flags to be disabled. // ie MaskCpuFlags(~kCpuHasSSSE3) to disable SSSE3. diff --git a/source/cpu_id.cc b/source/cpu_id.cc index 5d192fbe5..6b6e8745f 100644 --- a/source/cpu_id.cc +++ b/source/cpu_id.cc @@ -23,10 +23,22 @@ #include // For fopen() #include -#ifdef __linux__ +#if defined(__linux__) && defined(__aarch64__) #include // For getauxval() #endif +#if defined(_WIN32) && defined(__aarch64__) +#undef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#undef WIN32_EXTRA_LEAN +#define WIN32_EXTRA_LEAN +#include // For IsProcessorFeaturePresent() +#endif + +#if defined(__APPLE__) && defined(__aarch64__) +#include // For sysctlbyname() +#endif + #ifdef __cplusplus namespace libyuv { extern "C" { @@ -166,6 +178,7 @@ LIBYUV_API SAFEBUFFERS int ArmCpuCaps(const char* cpuinfo_name) { return features; } +#ifdef __aarch64__ #ifdef __linux__ // Define hwcap values ourselves: building with an old auxv header where these // hwcap values are not defined should not prevent features from being enabled. @@ -194,17 +207,63 @@ LIBYUV_API SAFEBUFFERS int AArch64CpuCaps(unsigned long hwcap, } return features; } -#else // !defined(__linux__) + +#elif defined(_WIN32) // For AArch64, but public to allow testing on any CPU. LIBYUV_API SAFEBUFFERS int AArch64CpuCaps() { // Neon is mandatory on AArch64, so enable unconditionally. int features = kCpuHasNEON; - // TODO(libyuv:980) support feature detection on non-Linux platforms. + // For more information on IsProcessorFeaturePresent(), see: + // https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent#parameters +#ifdef PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE + if (IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE)) { + features |= kCpuHasNeonDotProd; + } +#endif + // No Neon I8MM or SVE feature detection available here at time of writing. + return features; +} + +#elif defined(__APPLE__) +static bool have_feature(const char* feature) { + // For more information on sysctlbyname(), see: + // https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics + int64_t feature_present = 0; + size_t size = sizeof(feature_present); + if (sysctlbyname(feature, &feature_present, &size, NULL, 0) != 0) { + return false; + } + return feature_present; +} + +// For AArch64, but public to allow testing on any CPU. +LIBYUV_API SAFEBUFFERS int AArch64CpuCaps() { + // Neon is mandatory on AArch64, so enable unconditionally. + int features = kCpuHasNEON; + + if (have_feature("hw.optional.arm.FEAT_DotProd")) { + features |= kCpuHasNeonDotProd; + } + if (have_feature("hw.optional.arm.FEAT_I8MM")) { + features |= kCpuHasNeonI8MM; + } + // No SVE feature detection available here at time of writing. + return features; +} + +#else // !defined(__linux__) && !defined(_WIN32) && !defined(__APPLE__) +// For AArch64, but public to allow testing on any CPU. +LIBYUV_API SAFEBUFFERS int AArch64CpuCaps() { + // Neon is mandatory on AArch64, so enable unconditionally. + int features = kCpuHasNEON; + + // TODO(libyuv:980) support feature detection on other platforms. return features; } #endif +#endif // defined(__aarch64__) LIBYUV_API SAFEBUFFERS int RiscvCpuCaps(const char* cpuinfo_name) { char cpuinfo_line[512]; diff --git a/unit_test/cpu_test.cc b/unit_test/cpu_test.cc index 38406c9a7..928ef5fbb 100644 --- a/unit_test/cpu_test.cc +++ b/unit_test/cpu_test.cc @@ -273,7 +273,7 @@ TEST_F(LibYUVBaseTest, TestLinuxArm) { #endif } -#ifdef __linux__ +#if defined(__linux__) && defined(__aarch64__) TEST_F(LibYUVBaseTest, TestLinuxAArch64) { // Values taken from a Cortex-A57 machine, only Neon available. EXPECT_EQ(kCpuHasNEON, AArch64CpuCaps(0xffU, 0x0U));