libyuv/include/libyuv/cpu_id.h
Frank Barchard 331c361581 AVX-VNNI detect
- Add kCpuHasAVXVNNI flag
- Remove deprecated GFNI detect to make space.

Meteor Lake has AVX-VNNI but not AVX512
~/intelsde/sde -mtl -- blaze-bin/third_party/libyuv/libyuv_test --gunit_filter=*CpuHas
doyuv3

Note: Google Test filter = *CpuHas
[==========] Running 1 test from 1 test suite.
[----------] Global test environment set-up.
[----------] 1 test from LibYUVBaseTest
[ RUN      ] LibYUVBaseTest.TestCpuHas
Cpu Flags 0x203ff1
Has X86 0x10
Has SSE2 0x20
Has SSSE3 0x40
Has SSE41 0x80
Has SSE42 0x100
Has AVX 0x200
Has AVX2 0x400
Has ERMS 0x800
Has FMA3 0x1000
Has F16C 0x2000
Has AVX512BW 0x0
Has AVX512VL 0x0
Has AVX512VNNI 0x0
Has AVX512VBMI 0x0
Has AVX512VBMI2 0x0
Has AVX512VBITALG 0x0
Has AVX512VPOPCNTDQ 0x0
HAS AVXVNNI 0x200000
Has AVXVNNIINT8 0x0


AVX-VNNI detect

- Add kCpuHasAVXVNNI flag
- Remove deprecated GFNI detect to make space.

https://bugs.chromium.org/p/libyuv/issues/detail?id=967

Meteor Lake has AVX-VNNI but not AVX512
~/intelsde/sde -mtl -- blaze-bin/third_party/libyuv/libyuv_test --gunit_filter=*CpuHas
doyuv3
Note: Google Test filter = *CpuHas
[==========] Running 1 test from 1 test suite.
[----------] Global test environment set-up.
[----------] 1 test from LibYUVBaseTest
[ RUN      ] LibYUVBaseTest.TestCpuHas
Cpu Flags 0x203ff1
Has X86 0x10
Has SSE2 0x20
Has SSSE3 0x40
Has SSE41 0x80
Has SSE42 0x100
Has AVX 0x200
Has AVX2 0x400
Has ERMS 0x800
Has FMA3 0x1000
Has F16C 0x2000
Has AVX512BW 0x0
Has AVX512VL 0x0
Has AVX512VNNI 0x0
Has AVX512VBMI 0x0
Has AVX512VBMI2 0x0
Has AVX512VBITALG 0x0
Has AVX512VPOPCNTDQ 0x0
HAS AVXVNNI 0x200000
Has AVXVNNIINT8 0x0

Running on all cpus the following report avx-vnni
grep 'AVXVNNI 0x2' */*
adl/libyuv64.txt:HAS AVXVNNI 0x200000
gnr/libyuv64.txt:HAS AVXVNNI 0x200000
grr/libyuv64.txt:HAS AVXVNNI 0x200000
mtl/libyuv64.txt:HAS AVXVNNI 0x200000
rpl/libyuv64.txt:HAS AVXVNNI 0x200000
spr/libyuv64.txt:HAS AVXVNNI 0x200000
srf/libyuv64.txt:HAS AVXVNNI 0x200000

while these support avx512 vnni
grep 'VNNI 0x1' */*
clx/libyuv64.txt:Has AVX512VNNI 0x10000
cpx/libyuv64.txt:Has AVX512VNNI 0x10000
gnr/libyuv64.txt:Has AVX512VNNI 0x10000
icl/libyuv64.txt:Has AVX512VNNI 0x10000
icx/libyuv64.txt:Has AVX512VNNI 0x10000
spr/libyuv64.txt:Has AVX512VNNI 0x10000
tgl/libyuv64.txt:Has AVX512VNNI 0x10000

and these support avx-vnni-int8
grep AVXVNNIINT8.0x4 */*
grr/libyuv64.txt:Has AVXVNNIINT8 0x400000
srf/libyuv64.txt:Has AVXVNNIINT8 0x400000

Bug: libyuv:967
Change-Id: I84cd71d1b320e7c284173eb695fc1d3b72d14ddb
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/4912017
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
Reviewed-by: richard winterton <rrwinterton@gmail.com>
2023-10-05 21:24:09 +00:00

136 lines
4.7 KiB
C++

/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_CPU_ID_H_
#define INCLUDE_LIBYUV_CPU_ID_H_
#include "libyuv/basic_types.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// Internal flag to indicate cpuid requires initialization.
static const int kCpuInitialized = 0x1;
// These flags are only valid on ARM processors.
static const int kCpuHasARM = 0x2;
static const int kCpuHasNEON = 0x4;
// 0x8 reserved for future ARM flag.
// These flags are only valid on x86 processors.
static const int kCpuHasX86 = 0x10;
static const int kCpuHasSSE2 = 0x20;
static const int kCpuHasSSSE3 = 0x40;
static const int kCpuHasSSE41 = 0x80;
static const int kCpuHasSSE42 = 0x100; // unused at this time.
static const int kCpuHasAVX = 0x200;
static const int kCpuHasAVX2 = 0x400;
static const int kCpuHasERMS = 0x800;
static const int kCpuHasFMA3 = 0x1000;
static const int kCpuHasF16C = 0x2000;
static const int kCpuHasAVX512BW = 0x4000;
static const int kCpuHasAVX512VL = 0x8000;
static const int kCpuHasAVX512VNNI = 0x10000;
static const int kCpuHasAVX512VBMI = 0x20000;
static const int kCpuHasAVX512VBMI2 = 0x40000;
static const int kCpuHasAVX512VBITALG = 0x80000;
static const int kCpuHasAVX512VPOPCNTDQ = 0x100000;
static const int kCpuHasAVXVNNI = 0x200000;
static const int kCpuHasAVXVNNIINT8 = 0x400000;
// These flags are only valid on MIPS processors.
static const int kCpuHasMIPS = 0x800000;
static const int kCpuHasMSA = 0x1000000;
// These flags are only valid on LOONGARCH processors.
static const int kCpuHasLOONGARCH = 0x2000000;
static const int kCpuHasLSX = 0x4000000;
static const int kCpuHasLASX = 0x8000000;
// These flags are only valid on RISCV processors.
static const int kCpuHasRISCV = 0x10000000;
static const int kCpuHasRVV = 0x20000000;
static const int kCpuHasRVVZVFH = 0x40000000;
// Optional init function. TestCpuFlag does an auto-init.
// Returns cpu_info flags.
LIBYUV_API
int InitCpuFlags(void);
// Detect CPU has SSE2 etc.
// Test_flag parameter should be one of kCpuHas constants above.
// Returns non-zero if instruction set is detected
static __inline int TestCpuFlag(int test_flag) {
LIBYUV_API extern int cpu_info_;
#ifdef __ATOMIC_RELAXED
int cpu_info = __atomic_load_n(&cpu_info_, __ATOMIC_RELAXED);
#else
int cpu_info = cpu_info_;
#endif
return (!cpu_info ? InitCpuFlags() : cpu_info) & test_flag;
}
// Internal function for parsing /proc/cpuinfo.
LIBYUV_API
int ArmCpuCaps(const char* cpuinfo_name);
LIBYUV_API
int MipsCpuCaps(const char* cpuinfo_name);
LIBYUV_API
int RiscvCpuCaps(const char* cpuinfo_name);
// For testing, allow CPU flags to be disabled.
// ie MaskCpuFlags(~kCpuHasSSSE3) to disable SSSE3.
// MaskCpuFlags(-1) to enable all cpu specific optimizations.
// MaskCpuFlags(1) to disable all cpu specific optimizations.
// MaskCpuFlags(0) to reset state so next call will auto init.
// Returns cpu_info flags.
LIBYUV_API
int MaskCpuFlags(int enable_flags);
// Sets the CPU flags to |cpu_flags|, bypassing the detection code. |cpu_flags|
// should be a valid combination of the kCpuHas constants above and include
// kCpuInitialized. Use this method when running in a sandboxed process where
// the detection code might fail (as it might access /proc/cpuinfo). In such
// cases the cpu_info can be obtained from a non sandboxed process by calling
// InitCpuFlags() and passed to the sandboxed process (via command line
// parameters, IPC...) which can then call this method to initialize the CPU
// flags.
// Notes:
// - when specifying 0 for |cpu_flags|, the auto initialization is enabled
// again.
// - enabling CPU features that are not supported by the CPU will result in
// undefined behavior.
// TODO(fbarchard): consider writing a helper function that translates from
// other library CPU info to libyuv CPU info and add a .md doc that explains
// CPU detection.
static __inline void SetCpuFlags(int cpu_flags) {
LIBYUV_API extern int cpu_info_;
#ifdef __ATOMIC_RELAXED
__atomic_store_n(&cpu_info_, cpu_flags, __ATOMIC_RELAXED);
#else
cpu_info_ = cpu_flags;
#endif
}
// Low level cpuid for X86. Returns zeros on other CPUs.
// eax is the info type that you want.
// ecx is typically the cpu number, and should normally be zero.
LIBYUV_API
void CpuId(int info_eax, int info_ecx, int* cpu_info);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_CPU_ID_H_