mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 01:06:46 +08:00
CpuId test FSMR - Fast Short Rep Movsb
- Renumber cpuid bits to use low byte to ID the type of CPU and upper 24 bits for features Intel CPUs starting at Icelake support FSMR adl:Has FSMR 0x8000 arl:Has FSMR 0x0 bdw:Has FSMR 0x0 clx:Has FSMR 0x0 cnl:Has FSMR 0x0 cpx:Has FSMR 0x0 emr:Has FSMR 0x8000 glm:Has FSMR 0x0 glp:Has FSMR 0x0 gnr:Has FSMR 0x8000 gnr256:Has FSMR 0x8000 hsw:Has FSMR 0x0 icl:Has FSMR 0x8000 icx:Has FSMR 0x8000 ivb:Has FSMR 0x0 knl:Has FSMR 0x0 knm:Has FSMR 0x0 lnl:Has FSMR 0x8000 mrm:Has FSMR 0x0 mtl:Has FSMR 0x8000 nhm:Has FSMR 0x0 pnr:Has FSMR 0x0 rpl:Has FSMR 0x8000 skl:Has FSMR 0x0 skx:Has FSMR 0x0 slm:Has FSMR 0x0 slt:Has FSMR 0x0 snb:Has FSMR 0x0 snr:Has FSMR 0x0 spr:Has FSMR 0x8000 srf:Has FSMR 0x0 tgl:Has FSMR 0x8000 tnt:Has FSMR 0x0 wsm:Has FSMR 0x0 Intel CPUs starting at Ivybridge support ERMS adl:Has ERMS 0x4000 arl:Has ERMS 0x4000 bdw:Has ERMS 0x4000 clx:Has ERMS 0x4000 cnl:Has ERMS 0x4000 cpx:Has ERMS 0x4000 emr:Has ERMS 0x4000 glm:Has ERMS 0x4000 glp:Has ERMS 0x4000 gnr:Has ERMS 0x4000 gnr256:Has ERMS 0x4000 hsw:Has ERMS 0x4000 icl:Has ERMS 0x4000 icx:Has ERMS 0x4000 ivb:Has ERMS 0x4000 knl:Has ERMS 0x4000 knm:Has ERMS 0x4000 lnl:Has ERMS 0x4000 mrm:Has ERMS 0x0 mtl:Has ERMS 0x4000 nhm:Has ERMS 0x0 pnr:Has ERMS 0x0 rpl:Has ERMS 0x4000 skl:Has ERMS 0x4000 skx:Has ERMS 0x4000 slm:Has ERMS 0x4000 slt:Has ERMS 0x0 snb:Has ERMS 0x0 snr:Has ERMS 0x4000 spr:Has ERMS 0x4000 srf:Has ERMS 0x4000 tgl:Has ERMS 0x4000 tnt:Has ERMS 0x4000 wsm:Has ERMS 0x0 Change-Id: I18e5a3905f2691ab66d4d0cb6f668c0a0ff72d37 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/6027541 Reviewed-by: richard winterton <rrwinterton@gmail.com>
This commit is contained in:
parent
75f7cfdde5
commit
1c501a8f3f
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: https://chromium.googlesource.com/libyuv/libyuv/
|
URL: https://chromium.googlesource.com/libyuv/libyuv/
|
||||||
Version: 1897
|
Version: 1898
|
||||||
License: BSD
|
License: BSD
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
Shipped: yes
|
Shipped: yes
|
||||||
|
|||||||
@ -23,49 +23,50 @@ static const int kCpuInitialized = 0x1;
|
|||||||
|
|
||||||
// These flags are only valid on Arm processors.
|
// These flags are only valid on Arm processors.
|
||||||
static const int kCpuHasARM = 0x2;
|
static const int kCpuHasARM = 0x2;
|
||||||
static const int kCpuHasNEON = 0x4;
|
static const int kCpuHasNEON = 0x100;
|
||||||
// Leave a gap to avoid setting kCpuHasX86.
|
static const int kCpuHasNeonDotProd = 0x200;
|
||||||
static const int kCpuHasNeonDotProd = 0x10;
|
static const int kCpuHasNeonI8MM = 0x400;
|
||||||
static const int kCpuHasNeonI8MM = 0x20;
|
static const int kCpuHasSVE = 0x800;
|
||||||
static const int kCpuHasSVE = 0x40;
|
static const int kCpuHasSVE2 = 0x1000;
|
||||||
static const int kCpuHasSVE2 = 0x80;
|
static const int kCpuHasSME = 0x2000;
|
||||||
static const int kCpuHasSME = 0x100;
|
|
||||||
|
|
||||||
// These flags are only valid on x86 processors.
|
|
||||||
static const int kCpuHasX86 = 0x8;
|
|
||||||
static const int kCpuHasSSE2 = 0x10;
|
|
||||||
static const int kCpuHasSSSE3 = 0x20;
|
|
||||||
static const int kCpuHasSSE41 = 0x40;
|
|
||||||
static const int kCpuHasSSE42 = 0x80;
|
|
||||||
static const int kCpuHasAVX = 0x100;
|
|
||||||
static const int kCpuHasAVX2 = 0x200;
|
|
||||||
static const int kCpuHasERMS = 0x400;
|
|
||||||
static const int kCpuHasFMA3 = 0x800;
|
|
||||||
static const int kCpuHasF16C = 0x1000;
|
|
||||||
static const int kCpuHasAVX512BW = 0x2000;
|
|
||||||
static const int kCpuHasAVX512VL = 0x4000;
|
|
||||||
static const int kCpuHasAVX512VNNI = 0x8000;
|
|
||||||
static const int kCpuHasAVX512VBMI = 0x10000;
|
|
||||||
static const int kCpuHasAVX512VBMI2 = 0x20000;
|
|
||||||
static const int kCpuHasAVX512VBITALG = 0x40000;
|
|
||||||
static const int kCpuHasAVX10 = 0x80000;
|
|
||||||
static const int kCpuHasAVXVNNI = 0x100000;
|
|
||||||
static const int kCpuHasAVXVNNIINT8 = 0x200000;
|
|
||||||
static const int kCpuHasAMXINT8 = 0x400000;
|
|
||||||
|
|
||||||
// These flags are only valid on MIPS processors.
|
|
||||||
static const int kCpuHasMIPS = 0x800000;
|
|
||||||
static const int kCpuHasMSA = 0x1000000;
|
|
||||||
|
|
||||||
// These flags are only valid on LOONGARCH processors.
|
|
||||||
static const int kCpuHasLOONGARCH = 0x2000000;
|
|
||||||
static const int kCpuHasLSX = 0x4000000;
|
|
||||||
static const int kCpuHasLASX = 0x8000000;
|
|
||||||
|
|
||||||
// These flags are only valid on RISCV processors.
|
// These flags are only valid on RISCV processors.
|
||||||
static const int kCpuHasRISCV = 0x10000000;
|
static const int kCpuHasRISCV = 0x8;
|
||||||
static const int kCpuHasRVV = 0x20000000;
|
static const int kCpuHasRVV = 0x100;
|
||||||
static const int kCpuHasRVVZVFH = 0x40000000;
|
static const int kCpuHasRVVZVFH = 0x200;
|
||||||
|
|
||||||
|
// These flags are only valid on x86 processors.
|
||||||
|
static const int kCpuHasX86 = 0x10;
|
||||||
|
static const int kCpuHasSSE2 = 0x100;
|
||||||
|
static const int kCpuHasSSSE3 = 0x200;
|
||||||
|
static const int kCpuHasSSE41 = 0x400;
|
||||||
|
static const int kCpuHasSSE42 = 0x800;
|
||||||
|
static const int kCpuHasAVX = 0x1000;
|
||||||
|
static const int kCpuHasAVX2 = 0x2000;
|
||||||
|
static const int kCpuHasERMS = 0x4000;
|
||||||
|
static const int kCpuHasFSMR = 0x8000;
|
||||||
|
static const int kCpuHasFMA3 = 0x10000;
|
||||||
|
static const int kCpuHasF16C = 0x20000;
|
||||||
|
static const int kCpuHasAVX512BW = 0x40000;
|
||||||
|
static const int kCpuHasAVX512VL = 0x80000;
|
||||||
|
static const int kCpuHasAVX512VNNI = 0x100000;
|
||||||
|
static const int kCpuHasAVX512VBMI = 0x200000;
|
||||||
|
static const int kCpuHasAVX512VBMI2 = 0x400000;
|
||||||
|
static const int kCpuHasAVX512VBITALG = 0x800000;
|
||||||
|
static const int kCpuHasAVX10 = 0x1000000;
|
||||||
|
static const int kCpuHasAVXVNNI = 0x2000000;
|
||||||
|
static const int kCpuHasAVXVNNIINT8 = 0x4000000;
|
||||||
|
static const int kCpuHasAMXINT8 = 0x8000000;
|
||||||
|
|
||||||
|
// These flags are only valid on MIPS processors.
|
||||||
|
static const int kCpuHasMIPS = 0x20;
|
||||||
|
static const int kCpuHasMSA = 0x100;
|
||||||
|
|
||||||
|
// These flags are only valid on LOONGARCH processors.
|
||||||
|
static const int kCpuHasLOONGARCH = 0x40;
|
||||||
|
static const int kCpuHasLSX = 0x100;
|
||||||
|
static const int kCpuHasLASX = 0x200;
|
||||||
|
|
||||||
|
|
||||||
// Optional init function. TestCpuFlag does an auto-init.
|
// Optional init function. TestCpuFlag does an auto-init.
|
||||||
// Returns cpu_info flags.
|
// Returns cpu_info flags.
|
||||||
|
|||||||
@ -355,6 +355,7 @@ extern "C" {
|
|||||||
// TODO(b/42280744): re-enable HAS_ARGBTORGB24ROW_AVX512VBMI.
|
// TODO(b/42280744): re-enable HAS_ARGBTORGB24ROW_AVX512VBMI.
|
||||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||||
(defined(__x86_64__) || defined(__i386__)) && defined(CLANG_HAS_AVX512)
|
(defined(__x86_64__) || defined(__i386__)) && defined(CLANG_HAS_AVX512)
|
||||||
|
#define HAS_COPYROW_AVX512BW
|
||||||
#define HAS_ARGBTORGB24ROW_AVX512VBMI
|
#define HAS_ARGBTORGB24ROW_AVX512VBMI
|
||||||
#define HAS_CONVERT16TO8ROW_AVX512BW
|
#define HAS_CONVERT16TO8ROW_AVX512BW
|
||||||
#define HAS_MERGEUVROW_AVX512BW
|
#define HAS_MERGEUVROW_AVX512BW
|
||||||
@ -3406,6 +3407,7 @@ void Convert16To8Row_Any_NEON(const uint16_t* src_ptr,
|
|||||||
|
|
||||||
void CopyRow_SSE2(const uint8_t* src, uint8_t* dst, int width);
|
void CopyRow_SSE2(const uint8_t* src, uint8_t* dst, int width);
|
||||||
void CopyRow_AVX(const uint8_t* src, uint8_t* dst, int width);
|
void CopyRow_AVX(const uint8_t* src, uint8_t* dst, int width);
|
||||||
|
void CopyRow_AVX512BW(const uint8_t* src, uint8_t* dst, int width);
|
||||||
void CopyRow_ERMS(const uint8_t* src, uint8_t* dst, int width);
|
void CopyRow_ERMS(const uint8_t* src, uint8_t* dst, int width);
|
||||||
void CopyRow_NEON(const uint8_t* src, uint8_t* dst, int width);
|
void CopyRow_NEON(const uint8_t* src, uint8_t* dst, int width);
|
||||||
void CopyRow_MIPS(const uint8_t* src, uint8_t* dst, int count);
|
void CopyRow_MIPS(const uint8_t* src, uint8_t* dst, int count);
|
||||||
@ -3413,6 +3415,7 @@ void CopyRow_RVV(const uint8_t* src, uint8_t* dst, int count);
|
|||||||
void CopyRow_C(const uint8_t* src, uint8_t* dst, int count);
|
void CopyRow_C(const uint8_t* src, uint8_t* dst, int count);
|
||||||
void CopyRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
void CopyRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||||
void CopyRow_Any_AVX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
void CopyRow_Any_AVX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||||
|
void CopyRow_Any_AVX512BW(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||||
void CopyRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
void CopyRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||||
|
|
||||||
void CopyRow_16_C(const uint16_t* src, uint16_t* dst, int count);
|
void CopyRow_16_C(const uint16_t* src, uint16_t* dst, int count);
|
||||||
|
|||||||
@ -11,6 +11,6 @@
|
|||||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||||
#define INCLUDE_LIBYUV_VERSION_H_
|
#define INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
#define LIBYUV_VERSION 1897
|
#define LIBYUV_VERSION 1898
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|||||||
@ -419,7 +419,8 @@ static SAFEBUFFERS int GetCpuFlags(void) {
|
|||||||
((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) |
|
((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) |
|
||||||
((cpu_info1[2] & 0x00080000) ? kCpuHasSSE41 : 0) |
|
((cpu_info1[2] & 0x00080000) ? kCpuHasSSE41 : 0) |
|
||||||
((cpu_info1[2] & 0x00100000) ? kCpuHasSSE42 : 0) |
|
((cpu_info1[2] & 0x00100000) ? kCpuHasSSE42 : 0) |
|
||||||
((cpu_info7[1] & 0x00000200) ? kCpuHasERMS : 0);
|
((cpu_info7[1] & 0x00000200) ? kCpuHasERMS : 0) |
|
||||||
|
((cpu_info7[3] & 0x00000010) ? kCpuHasFSMR : 0);
|
||||||
|
|
||||||
// AVX requires OS saves YMM registers.
|
// AVX requires OS saves YMM registers.
|
||||||
if (((cpu_info1[2] & 0x1c000000) == 0x1c000000) && // AVX and OSXSave
|
if (((cpu_info1[2] & 0x1c000000) == 0x1c000000) && // AVX and OSXSave
|
||||||
@ -432,14 +433,14 @@ static SAFEBUFFERS int GetCpuFlags(void) {
|
|||||||
|
|
||||||
// Detect AVX512bw
|
// Detect AVX512bw
|
||||||
if ((GetXCR0() & 0xe0) == 0xe0) {
|
if ((GetXCR0() & 0xe0) == 0xe0) {
|
||||||
cpu_info |= (cpu_info7[1] & 0x40000000) ? kCpuHasAVX512BW : 0;
|
cpu_info |= ((cpu_info7[1] & 0x40000000) ? kCpuHasAVX512BW : 0) |
|
||||||
cpu_info |= (cpu_info7[1] & 0x80000000) ? kCpuHasAVX512VL : 0;
|
((cpu_info7[1] & 0x80000000) ? kCpuHasAVX512VL : 0) |
|
||||||
cpu_info |= (cpu_info7[2] & 0x00000002) ? kCpuHasAVX512VBMI : 0;
|
((cpu_info7[2] & 0x00000002) ? kCpuHasAVX512VBMI : 0) |
|
||||||
cpu_info |= (cpu_info7[2] & 0x00000040) ? kCpuHasAVX512VBMI2 : 0;
|
((cpu_info7[2] & 0x00000040) ? kCpuHasAVX512VBMI2 : 0) |
|
||||||
cpu_info |= (cpu_info7[2] & 0x00000800) ? kCpuHasAVX512VNNI : 0;
|
((cpu_info7[2] & 0x00000800) ? kCpuHasAVX512VNNI : 0) |
|
||||||
cpu_info |= (cpu_info7[2] & 0x00001000) ? kCpuHasAVX512VBITALG : 0;
|
((cpu_info7[2] & 0x00001000) ? kCpuHasAVX512VBITALG : 0) |
|
||||||
cpu_info |= (cpu_einfo7[3] & 0x00080000) ? kCpuHasAVX10 : 0;
|
((cpu_einfo7[3] & 0x00080000) ? kCpuHasAVX10 : 0) |
|
||||||
cpu_info |= (cpu_info7[3] & 0x02000000) ? kCpuHasAMXINT8 : 0;
|
((cpu_info7[3] & 0x02000000) ? kCpuHasAMXINT8 : 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@ -62,6 +62,11 @@ void CopyPlane(const uint8_t* src_y,
|
|||||||
CopyRow = IS_ALIGNED(width, 64) ? CopyRow_AVX : CopyRow_Any_AVX;
|
CopyRow = IS_ALIGNED(width, 64) ? CopyRow_AVX : CopyRow_Any_AVX;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#if defined(HAS_COPYROW_AVX512BW)
|
||||||
|
if (TestCpuFlag(kCpuHasAVX512BW)) {
|
||||||
|
CopyRow = IS_ALIGNED(width, 128) ? CopyRow_AVX512BW : CopyRow_Any_AVX512BW;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#if defined(HAS_COPYROW_ERMS)
|
#if defined(HAS_COPYROW_ERMS)
|
||||||
if (TestCpuFlag(kCpuHasERMS)) {
|
if (TestCpuFlag(kCpuHasERMS)) {
|
||||||
CopyRow = CopyRow_ERMS;
|
CopyRow = CopyRow_ERMS;
|
||||||
|
|||||||
@ -234,6 +234,11 @@ void RotatePlane180(const uint8_t* src,
|
|||||||
CopyRow = IS_ALIGNED(width, 64) ? CopyRow_AVX : CopyRow_Any_AVX;
|
CopyRow = IS_ALIGNED(width, 64) ? CopyRow_AVX : CopyRow_Any_AVX;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#if defined(HAS_COPYROW_AVX512BW)
|
||||||
|
if (TestCpuFlag(kCpuHasAVX512BW)) {
|
||||||
|
CopyRow = IS_ALIGNED(width, 128) ? CopyRow_AVX512BW : CopyRow_Any_AVX512BW;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#if defined(HAS_COPYROW_ERMS)
|
#if defined(HAS_COPYROW_ERMS)
|
||||||
if (TestCpuFlag(kCpuHasERMS)) {
|
if (TestCpuFlag(kCpuHasERMS)) {
|
||||||
CopyRow = CopyRow_ERMS;
|
CopyRow = CopyRow_ERMS;
|
||||||
|
|||||||
@ -189,6 +189,11 @@ static int ARGBRotate180(const uint8_t* src_argb,
|
|||||||
CopyRow = IS_ALIGNED(width * 4, 64) ? CopyRow_AVX : CopyRow_Any_AVX;
|
CopyRow = IS_ALIGNED(width * 4, 64) ? CopyRow_AVX : CopyRow_Any_AVX;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#if defined(HAS_COPYROW_AVX512BW)
|
||||||
|
if (TestCpuFlag(kCpuHasAVX512BW)) {
|
||||||
|
CopyRow = IS_ALIGNED(width * 4, 128) ? CopyRow_AVX512BW : CopyRow_Any_AVX512BW;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#if defined(HAS_COPYROW_ERMS)
|
#if defined(HAS_COPYROW_ERMS)
|
||||||
if (TestCpuFlag(kCpuHasERMS)) {
|
if (TestCpuFlag(kCpuHasERMS)) {
|
||||||
CopyRow = CopyRow_ERMS;
|
CopyRow = CopyRow_ERMS;
|
||||||
|
|||||||
@ -967,6 +967,9 @@ ANY21PT(MergeUVRow_16_Any_NEON, MergeUVRow_16_NEON, uint16_t, 2, 7)
|
|||||||
memcpy(dst_ptr + n * BPP, vout, r * BPP); \
|
memcpy(dst_ptr + n * BPP, vout, r * BPP); \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef HAS_COPYROW_AVX512BW
|
||||||
|
ANY11(CopyRow_Any_AVX512BW, CopyRow_AVX512BW, 0, 1, 1, 127)
|
||||||
|
#endif
|
||||||
#ifdef HAS_COPYROW_AVX
|
#ifdef HAS_COPYROW_AVX
|
||||||
ANY11(CopyRow_Any_AVX, CopyRow_AVX, 0, 1, 1, 63)
|
ANY11(CopyRow_Any_AVX, CopyRow_AVX, 0, 1, 1, 63)
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@ -3361,7 +3361,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf,
|
|||||||
"vpunpcklbw %%zmm1,%%zmm3,%%zmm3 \n" \
|
"vpunpcklbw %%zmm1,%%zmm3,%%zmm3 \n" \
|
||||||
"vpermq $0xd8,%%zmm3,%%zmm3 \n" \
|
"vpermq $0xd8,%%zmm3,%%zmm3 \n" \
|
||||||
"vpunpcklwd %%zmm3,%%zmm3,%%zmm3 \n" \
|
"vpunpcklwd %%zmm3,%%zmm3,%%zmm3 \n" \
|
||||||
"vmovdqu8 (%[y_buf]),%%ymm4 \n" \
|
"vmovups (%[y_buf]),%%ymm4 \n" \
|
||||||
"vpermq %%zmm4,%%zmm17,%%zmm4 \n" \
|
"vpermq %%zmm4,%%zmm17,%%zmm4 \n" \
|
||||||
"vpermq $0xd8,%%zmm4,%%zmm4 \n" \
|
"vpermq $0xd8,%%zmm4,%%zmm4 \n" \
|
||||||
"vpunpcklbw %%zmm4,%%zmm4,%%zmm4 \n" \
|
"vpunpcklbw %%zmm4,%%zmm4,%%zmm4 \n" \
|
||||||
@ -3580,9 +3580,9 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf,
|
|||||||
"vpbroadcastq %%xmm11,%%zmm11 \n" \
|
"vpbroadcastq %%xmm11,%%zmm11 \n" \
|
||||||
"movq 128(%[yuvconstants]),%%xmm12 \n" \
|
"movq 128(%[yuvconstants]),%%xmm12 \n" \
|
||||||
"vpbroadcastq %%xmm12,%%zmm12 \n" \
|
"vpbroadcastq %%xmm12,%%zmm12 \n" \
|
||||||
"vmovdqu8 (%[quadsplitperm]),%%zmm16 \n" \
|
"vmovups (%[quadsplitperm]),%%zmm16 \n" \
|
||||||
"vmovdqu8 (%[dquadsplitperm]),%%zmm17 \n" \
|
"vmovups (%[dquadsplitperm]),%%zmm17 \n" \
|
||||||
"vmovdqu8 (%[unperm]),%%zmm18 \n"
|
"vmovups (%[unperm]),%%zmm18 \n"
|
||||||
|
|
||||||
#define YUVTORGB16_AVX2(yuvconstants) \
|
#define YUVTORGB16_AVX2(yuvconstants) \
|
||||||
"vpsubb %%ymm13,%%ymm3,%%ymm3 \n" \
|
"vpsubb %%ymm13,%%ymm3,%%ymm3 \n" \
|
||||||
@ -3672,8 +3672,8 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf,
|
|||||||
"vpermq %%zmm2,%%zmm18,%%zmm2 \n" \
|
"vpermq %%zmm2,%%zmm18,%%zmm2 \n" \
|
||||||
"vpunpcklwd %%zmm2,%%zmm0,%%zmm1 \n" \
|
"vpunpcklwd %%zmm2,%%zmm0,%%zmm1 \n" \
|
||||||
"vpunpckhwd %%zmm2,%%zmm0,%%zmm0 \n" \
|
"vpunpckhwd %%zmm2,%%zmm0,%%zmm0 \n" \
|
||||||
"vmovdqu8 %%zmm1,(%[dst_argb]) \n" \
|
"vmovups %%zmm1,(%[dst_argb]) \n" \
|
||||||
"vmovdqu8 %%zmm0,0x40(%[dst_argb]) \n" \
|
"vmovups %%zmm0,0x40(%[dst_argb]) \n" \
|
||||||
"lea 0x80(%[dst_argb]), %[dst_argb] \n"
|
"lea 0x80(%[dst_argb]), %[dst_argb] \n"
|
||||||
|
|
||||||
// Store 16 AR30 values.
|
// Store 16 AR30 values.
|
||||||
@ -5340,15 +5340,15 @@ void Convert16To8Row_AVX512BW(const uint16_t* src_y,
|
|||||||
// 64 pixels per loop.
|
// 64 pixels per loop.
|
||||||
LABELALIGN
|
LABELALIGN
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"vmovdqu8 (%0),%%zmm0 \n"
|
"vmovups (%0),%%zmm0 \n"
|
||||||
"vmovdqu8 0x40(%0),%%zmm1 \n"
|
"vmovups 0x40(%0),%%zmm1 \n"
|
||||||
"add $0x80,%0 \n"
|
"add $0x80,%0 \n"
|
||||||
"vpmulhuw %%zmm2,%%zmm0,%%zmm0 \n"
|
"vpmulhuw %%zmm2,%%zmm0,%%zmm0 \n"
|
||||||
"vpmulhuw %%zmm2,%%zmm1,%%zmm1 \n"
|
"vpmulhuw %%zmm2,%%zmm1,%%zmm1 \n"
|
||||||
"vpmovuswb %%zmm0,%%ymm0 \n"
|
"vpmovuswb %%zmm0,%%ymm0 \n"
|
||||||
"vpmovuswb %%zmm1,%%ymm1 \n"
|
"vpmovuswb %%zmm1,%%ymm1 \n"
|
||||||
"vmovdqu8 %%ymm0,(%1) \n"
|
"vmovups %%ymm0,(%1) \n"
|
||||||
"vmovdqu8 %%ymm1,0x20(%1) \n"
|
"vmovups %%ymm1,0x20(%1) \n"
|
||||||
"add $0x40,%1 \n"
|
"add $0x40,%1 \n"
|
||||||
"sub $0x40,%2 \n"
|
"sub $0x40,%2 \n"
|
||||||
"jg 1b \n"
|
"jg 1b \n"
|
||||||
@ -5504,17 +5504,20 @@ void SplitRGBRow_SSSE3(const uint8_t* src_rgb,
|
|||||||
#endif // HAS_SPLITRGBROW_SSSE3
|
#endif // HAS_SPLITRGBROW_SSSE3
|
||||||
|
|
||||||
#ifdef HAS_SPLITRGBROW_SSE41
|
#ifdef HAS_SPLITRGBROW_SSE41
|
||||||
// Shuffle table for converting RGB to Planar, SSE4.1.
|
// Shuffle table for converting RGB to Planar, SSE4.1. Note: these are used for
|
||||||
alignas(16) static const uvec8 kSplitRGBShuffleSSE41[4] = {
|
// the AVX2 implementation as well.
|
||||||
{1u, 128u, 0u, 1u, 128u, 0u, 1u, 128u, 0u, 1u, 128u, 0u, 1u, 128u, 0u, 1u},
|
static const uvec8 kSplitRGBShuffleSSE41[5] = {
|
||||||
{0u, 3u, 6u, 9u, 12u, 15u, 2u, 5u, 8u, 11u, 14u, 1u, 4u, 7u, 10u, 13u},
|
{0u, 3u, 6u, 9u, 12u, 15u, 2u, 5u, 8u, 11u, 14u, 1u, 4u, 7u, 10u, 13u},
|
||||||
{1u, 4u, 7u, 10u, 13u, 0u, 3u, 6u, 9u, 12u, 15u, 2u, 5u, 8u, 11u, 14u},
|
{1u, 4u, 7u, 10u, 13u, 0u, 3u, 6u, 9u, 12u, 15u, 2u, 5u, 8u, 11u, 14u},
|
||||||
{2u, 5u, 8u, 11u, 14u, 1u, 4u, 7u, 10u, 13u, 0u, 3u, 6u, 9u, 12u, 15u}};
|
{2u, 5u, 8u, 11u, 14u, 1u, 4u, 7u, 10u, 13u, 0u, 3u, 6u, 9u, 12u, 15u},
|
||||||
|
{0u, 128u, 0u, 0u, 128u, 0u, 0u, 128u, 0u, 0u, 128u, 0u, 0u, 128u, 0u, 0u},
|
||||||
|
{0u, 0u, 128u, 0u, 0u, 128u, 0u, 0u, 128u, 0u, 0u, 128u, 0u, 0u, 128u, 0u},
|
||||||
|
};
|
||||||
|
|
||||||
void SplitRGBRow_SSE41(const uint8_t* src_rgb, uint8_t* dst_r,
|
void SplitRGBRow_SSE41(const uint8_t* src_rgb, uint8_t* dst_r,
|
||||||
uint8_t* dst_g, uint8_t* dst_b, int width) {
|
uint8_t* dst_g, uint8_t* dst_b, int width) {
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"movdqa 0(%5), %%xmm0 \n"
|
"movdqa 48(%5), %%xmm0 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movdqu (%0),%%xmm1 \n"
|
"movdqu (%0),%%xmm1 \n"
|
||||||
"movdqu 0x10(%0),%%xmm2 \n"
|
"movdqu 0x10(%0),%%xmm2 \n"
|
||||||
@ -5524,14 +5527,14 @@ void SplitRGBRow_SSE41(const uint8_t* src_rgb, uint8_t* dst_r,
|
|||||||
"pblendvb %%xmm3, %%xmm1 \n"
|
"pblendvb %%xmm3, %%xmm1 \n"
|
||||||
"pblendvb %%xmm2, %%xmm3 \n"
|
"pblendvb %%xmm2, %%xmm3 \n"
|
||||||
"pblendvb %%xmm4, %%xmm2 \n"
|
"pblendvb %%xmm4, %%xmm2 \n"
|
||||||
"psrlq $0x1, %%xmm0 \n"
|
"palignr $0xF, %%xmm0, %%xmm0 \n"
|
||||||
"pblendvb %%xmm2, %%xmm1 \n"
|
"pblendvb %%xmm2, %%xmm1 \n"
|
||||||
"pblendvb %%xmm3, %%xmm2 \n"
|
"pblendvb %%xmm3, %%xmm2 \n"
|
||||||
"pblendvb %%xmm4, %%xmm3 \n"
|
"pblendvb %%xmm4, %%xmm3 \n"
|
||||||
"psllq $0x1, %%xmm0 \n"
|
"palignr $0x1, %%xmm0, %%xmm0 \n"
|
||||||
"pshufb 16(%5), %%xmm1 \n"
|
"pshufb 0(%5), %%xmm1 \n"
|
||||||
"pshufb 32(%5), %%xmm2 \n"
|
"pshufb 16(%5), %%xmm2 \n"
|
||||||
"pshufb 48(%5), %%xmm3 \n"
|
"pshufb 32(%5), %%xmm3 \n"
|
||||||
"movdqu %%xmm1,(%1) \n"
|
"movdqu %%xmm1,(%1) \n"
|
||||||
"lea 0x10(%1),%1 \n"
|
"lea 0x10(%1),%1 \n"
|
||||||
"movdqu %%xmm2,(%2) \n"
|
"movdqu %%xmm2,(%2) \n"
|
||||||
@ -5554,8 +5557,13 @@ void SplitRGBRow_SSE41(const uint8_t* src_rgb, uint8_t* dst_r,
|
|||||||
void SplitRGBRow_AVX2(const uint8_t* src_rgb, uint8_t* dst_r,
|
void SplitRGBRow_AVX2(const uint8_t* src_rgb, uint8_t* dst_r,
|
||||||
uint8_t* dst_g, uint8_t* dst_b, int width) {
|
uint8_t* dst_g, uint8_t* dst_b, int width) {
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"vbroadcasti128 0(%5), %%ymm0 \n"
|
"vbroadcasti128 48(%5), %%ymm0 \n"
|
||||||
"vpsrlq $0x1,%%ymm0,%%ymm7 \n"
|
"vbroadcasti128 64(%5), %%ymm7 \n"
|
||||||
|
#if defined(__x86_64__)
|
||||||
|
"vbroadcasti128 0(%5), %%ymm8 \n"
|
||||||
|
"vbroadcasti128 16(%5), %%ymm9 \n"
|
||||||
|
"vbroadcasti128 32(%5), %%ymm10 \n"
|
||||||
|
#endif
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"vmovdqu (%0),%%ymm4 \n"
|
"vmovdqu (%0),%%ymm4 \n"
|
||||||
"vmovdqu 0x20(%0),%%ymm5 \n"
|
"vmovdqu 0x20(%0),%%ymm5 \n"
|
||||||
@ -5570,12 +5578,18 @@ void SplitRGBRow_AVX2(const uint8_t* src_rgb, uint8_t* dst_r,
|
|||||||
"vpblendvb %%ymm7, %%ymm5, %%ymm4, %%ymm1 \n"
|
"vpblendvb %%ymm7, %%ymm5, %%ymm4, %%ymm1 \n"
|
||||||
"vpblendvb %%ymm7, %%ymm6, %%ymm5, %%ymm2 \n"
|
"vpblendvb %%ymm7, %%ymm6, %%ymm5, %%ymm2 \n"
|
||||||
"vpblendvb %%ymm7, %%ymm4, %%ymm6, %%ymm3 \n"
|
"vpblendvb %%ymm7, %%ymm4, %%ymm6, %%ymm3 \n"
|
||||||
"vbroadcasti128 16(%5), %%ymm4 \n"
|
#if defined(__x86_64__)
|
||||||
"vbroadcasti128 32(%5), %%ymm5 \n"
|
"vpshufb %%ymm8, %%ymm1, %%ymm1 \n"
|
||||||
"vbroadcasti128 48(%5), %%ymm6 \n"
|
"vpshufb %%ymm9, %%ymm2, %%ymm2 \n"
|
||||||
|
"vpshufb %%ymm10, %%ymm3, %%ymm3 \n"
|
||||||
|
#else
|
||||||
|
"vbroadcasti128 0(%5), %%ymm4 \n"
|
||||||
|
"vbroadcasti128 16(%5), %%ymm5 \n"
|
||||||
|
"vbroadcasti128 32(%5), %%ymm6 \n"
|
||||||
"vpshufb %%ymm4, %%ymm1, %%ymm1 \n"
|
"vpshufb %%ymm4, %%ymm1, %%ymm1 \n"
|
||||||
"vpshufb %%ymm5, %%ymm2, %%ymm2 \n"
|
"vpshufb %%ymm5, %%ymm2, %%ymm2 \n"
|
||||||
"vpshufb %%ymm6, %%ymm3, %%ymm3 \n"
|
"vpshufb %%ymm6, %%ymm3, %%ymm3 \n"
|
||||||
|
#endif
|
||||||
"vmovdqu %%ymm1,(%1) \n"
|
"vmovdqu %%ymm1,(%1) \n"
|
||||||
"lea 0x20(%1),%1 \n"
|
"lea 0x20(%1),%1 \n"
|
||||||
"vmovdqu %%ymm2,(%2) \n"
|
"vmovdqu %%ymm2,(%2) \n"
|
||||||
@ -5591,7 +5605,11 @@ void SplitRGBRow_AVX2(const uint8_t* src_rgb, uint8_t* dst_r,
|
|||||||
"+r"(width) // %4
|
"+r"(width) // %4
|
||||||
: "r"(&kSplitRGBShuffleSSE41[0]) // %5
|
: "r"(&kSplitRGBShuffleSSE41[0]) // %5
|
||||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
|
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
|
||||||
"xmm7");
|
"xmm7"
|
||||||
|
#if defined(__x86_64__)
|
||||||
|
, "xmm8", "xmm9", "xmm10"
|
||||||
|
#endif
|
||||||
|
);
|
||||||
}
|
}
|
||||||
#endif // HAS_SPLITRGBROW_AVX2
|
#endif // HAS_SPLITRGBROW_AVX2
|
||||||
|
|
||||||
@ -6487,6 +6505,27 @@ void CopyRow_AVX(const uint8_t* src, uint8_t* dst, int width) {
|
|||||||
}
|
}
|
||||||
#endif // HAS_COPYROW_AVX
|
#endif // HAS_COPYROW_AVX
|
||||||
|
|
||||||
|
#ifdef HAS_COPYROW_AVX512BW
|
||||||
|
void CopyRow_AVX512BW(const uint8_t* src, uint8_t* dst, int width) {
|
||||||
|
asm volatile (
|
||||||
|
"1: \n"
|
||||||
|
"vmovups (%0),%%zmm0 \n"
|
||||||
|
"vmovups 0x40(%0),%%zmm1 \n"
|
||||||
|
"lea 0x80(%0),%0 \n"
|
||||||
|
"vmovups %%zmm0,(%1) \n"
|
||||||
|
"vmovups %%zmm1,0x40(%1) \n"
|
||||||
|
"lea 0x80(%1),%1 \n"
|
||||||
|
"sub $0x80,%2 \n"
|
||||||
|
"jg 1b \n"
|
||||||
|
"vzeroupper \n"
|
||||||
|
: "+r"(src), // %0
|
||||||
|
"+r"(dst), // %1
|
||||||
|
"+r"(width) // %2
|
||||||
|
:
|
||||||
|
: "memory", "cc", "xmm0", "xmm1");
|
||||||
|
}
|
||||||
|
#endif // HAS_COPYROW_AVX512
|
||||||
|
|
||||||
#ifdef HAS_COPYROW_ERMS
|
#ifdef HAS_COPYROW_ERMS
|
||||||
// Multiple of 1.
|
// Multiple of 1.
|
||||||
void CopyRow_ERMS(const uint8_t* src, uint8_t* dst, int width) {
|
void CopyRow_ERMS(const uint8_t* src, uint8_t* dst, int width) {
|
||||||
|
|||||||
@ -15,6 +15,7 @@ namespace libyuv {
|
|||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#if !defined(LIBYUV_DISABLE_SME) && defined(CLANG_HAS_SME) && \
|
#if !defined(LIBYUV_DISABLE_SME) && defined(CLANG_HAS_SME) && \
|
||||||
defined(__aarch64__)
|
defined(__aarch64__)
|
||||||
|
|
||||||
|
|||||||
@ -24,6 +24,48 @@
|
|||||||
|
|
||||||
namespace libyuv {
|
namespace libyuv {
|
||||||
|
|
||||||
|
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || \
|
||||||
|
defined(_M_X64)
|
||||||
|
TEST_F(LibYUVBaseTest, TestCpuId) {
|
||||||
|
int has_x86 = TestCpuFlag(kCpuHasX86);
|
||||||
|
if (has_x86) {
|
||||||
|
int cpu_info[4];
|
||||||
|
// Vendor ID:
|
||||||
|
// AuthenticAMD AMD processor
|
||||||
|
// CentaurHauls Centaur processor
|
||||||
|
// CyrixInstead Cyrix processor
|
||||||
|
// GenuineIntel Intel processor
|
||||||
|
// GenuineTMx86 Transmeta processor
|
||||||
|
// Geode by NSC National Semiconductor processor
|
||||||
|
// NexGenDriven NexGen processor
|
||||||
|
// RiseRiseRise Rise Technology processor
|
||||||
|
// SiS SiS SiS SiS processor
|
||||||
|
// UMC UMC UMC UMC processor
|
||||||
|
CpuId(0, 0, cpu_info);
|
||||||
|
cpu_info[0] = cpu_info[1]; // Reorder output
|
||||||
|
cpu_info[1] = cpu_info[3];
|
||||||
|
cpu_info[3] = 0;
|
||||||
|
printf("Cpu Vendor: %s 0x%x 0x%x 0x%x\n",
|
||||||
|
reinterpret_cast<char*>(&cpu_info[0]), cpu_info[0], cpu_info[1],
|
||||||
|
cpu_info[2]);
|
||||||
|
EXPECT_EQ(12u, strlen(reinterpret_cast<char*>(&cpu_info[0])));
|
||||||
|
|
||||||
|
// CPU Family and Model
|
||||||
|
// 3:0 - Stepping
|
||||||
|
// 7:4 - Model
|
||||||
|
// 11:8 - Family
|
||||||
|
// 13:12 - Processor Type
|
||||||
|
// 19:16 - Extended Model
|
||||||
|
// 27:20 - Extended Family
|
||||||
|
CpuId(1, 0, cpu_info);
|
||||||
|
int family = ((cpu_info[0] >> 8) & 0x0f) | ((cpu_info[0] >> 16) & 0xff0);
|
||||||
|
int model = ((cpu_info[0] >> 4) & 0x0f) | ((cpu_info[0] >> 12) & 0xf0);
|
||||||
|
printf("Cpu Family %d (0x%x), Model %d (0x%x)\n", family, family, model,
|
||||||
|
model);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
static void KernelVersion(int *version) {
|
static void KernelVersion(int *version) {
|
||||||
struct utsname buffer;
|
struct utsname buffer;
|
||||||
@ -131,37 +173,6 @@ TEST_F(LibYUVBaseTest, TestCpuHas) {
|
|||||||
defined(_M_IX86) || defined(_M_X64)
|
defined(_M_IX86) || defined(_M_X64)
|
||||||
int has_x86 = TestCpuFlag(kCpuHasX86);
|
int has_x86 = TestCpuFlag(kCpuHasX86);
|
||||||
if (has_x86) {
|
if (has_x86) {
|
||||||
int family, model, cpu_info[4];
|
|
||||||
// Vendor ID:
|
|
||||||
// AuthenticAMD AMD processor
|
|
||||||
// CentaurHauls Centaur processor
|
|
||||||
// CyrixInstead Cyrix processor
|
|
||||||
// GenuineIntel Intel processor
|
|
||||||
// GenuineTMx86 Transmeta processor
|
|
||||||
// Geode by NSC National Semiconductor processor
|
|
||||||
// NexGenDriven NexGen processor
|
|
||||||
// RiseRiseRise Rise Technology processor
|
|
||||||
// SiS SiS SiS SiS processor
|
|
||||||
// UMC UMC UMC UMC processor
|
|
||||||
CpuId(0, 0, &cpu_info[0]);
|
|
||||||
cpu_info[0] = cpu_info[1]; // Reorder output
|
|
||||||
cpu_info[1] = cpu_info[3];
|
|
||||||
cpu_info[3] = 0;
|
|
||||||
printf("Cpu Vendor: %s\n", (char*)(&cpu_info[0]));
|
|
||||||
|
|
||||||
// CPU Family and Model
|
|
||||||
// 3:0 - Stepping
|
|
||||||
// 7:4 - Model
|
|
||||||
// 11:8 - Family
|
|
||||||
// 13:12 - Processor Type
|
|
||||||
// 19:16 - Extended Model
|
|
||||||
// 27:20 - Extended Family
|
|
||||||
CpuId(1, 0, &cpu_info[0]);
|
|
||||||
family = ((cpu_info[0] >> 8) & 0x0f) | ((cpu_info[0] >> 16) & 0xff0);
|
|
||||||
model = ((cpu_info[0] >> 4) & 0x0f) | ((cpu_info[0] >> 12) & 0xf0);
|
|
||||||
printf("Cpu Family %d (0x%x), Model %d (0x%x)\n", family, family,
|
|
||||||
model, model);
|
|
||||||
|
|
||||||
int has_sse2 = TestCpuFlag(kCpuHasSSE2);
|
int has_sse2 = TestCpuFlag(kCpuHasSSE2);
|
||||||
int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
|
int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
|
||||||
int has_sse41 = TestCpuFlag(kCpuHasSSE41);
|
int has_sse41 = TestCpuFlag(kCpuHasSSE41);
|
||||||
@ -169,6 +180,7 @@ TEST_F(LibYUVBaseTest, TestCpuHas) {
|
|||||||
int has_avx = TestCpuFlag(kCpuHasAVX);
|
int has_avx = TestCpuFlag(kCpuHasAVX);
|
||||||
int has_avx2 = TestCpuFlag(kCpuHasAVX2);
|
int has_avx2 = TestCpuFlag(kCpuHasAVX2);
|
||||||
int has_erms = TestCpuFlag(kCpuHasERMS);
|
int has_erms = TestCpuFlag(kCpuHasERMS);
|
||||||
|
int has_fsmr = TestCpuFlag(kCpuHasFSMR);
|
||||||
int has_fma3 = TestCpuFlag(kCpuHasFMA3);
|
int has_fma3 = TestCpuFlag(kCpuHasFMA3);
|
||||||
int has_f16c = TestCpuFlag(kCpuHasF16C);
|
int has_f16c = TestCpuFlag(kCpuHasF16C);
|
||||||
int has_avx512bw = TestCpuFlag(kCpuHasAVX512BW);
|
int has_avx512bw = TestCpuFlag(kCpuHasAVX512BW);
|
||||||
@ -189,6 +201,7 @@ TEST_F(LibYUVBaseTest, TestCpuHas) {
|
|||||||
printf("Has AVX 0x%x\n", has_avx);
|
printf("Has AVX 0x%x\n", has_avx);
|
||||||
printf("Has AVX2 0x%x\n", has_avx2);
|
printf("Has AVX2 0x%x\n", has_avx2);
|
||||||
printf("Has ERMS 0x%x\n", has_erms);
|
printf("Has ERMS 0x%x\n", has_erms);
|
||||||
|
printf("Has FSMR 0x%x\n", has_fsmr);
|
||||||
printf("Has FMA3 0x%x\n", has_fma3);
|
printf("Has FMA3 0x%x\n", has_fma3);
|
||||||
printf("Has F16C 0x%x\n", has_f16c);
|
printf("Has F16C 0x%x\n", has_f16c);
|
||||||
printf("Has AVX512BW 0x%x\n", has_avx512bw);
|
printf("Has AVX512BW 0x%x\n", has_avx512bw);
|
||||||
@ -315,48 +328,6 @@ TEST_F(LibYUVBaseTest, TestCompilerMacros) {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || \
|
|
||||||
defined(_M_X64)
|
|
||||||
TEST_F(LibYUVBaseTest, TestCpuId) {
|
|
||||||
int has_x86 = TestCpuFlag(kCpuHasX86);
|
|
||||||
if (has_x86) {
|
|
||||||
int cpu_info[4];
|
|
||||||
// Vendor ID:
|
|
||||||
// AuthenticAMD AMD processor
|
|
||||||
// CentaurHauls Centaur processor
|
|
||||||
// CyrixInstead Cyrix processor
|
|
||||||
// GenuineIntel Intel processor
|
|
||||||
// GenuineTMx86 Transmeta processor
|
|
||||||
// Geode by NSC National Semiconductor processor
|
|
||||||
// NexGenDriven NexGen processor
|
|
||||||
// RiseRiseRise Rise Technology processor
|
|
||||||
// SiS SiS SiS SiS processor
|
|
||||||
// UMC UMC UMC UMC processor
|
|
||||||
CpuId(0, 0, cpu_info);
|
|
||||||
cpu_info[0] = cpu_info[1]; // Reorder output
|
|
||||||
cpu_info[1] = cpu_info[3];
|
|
||||||
cpu_info[3] = 0;
|
|
||||||
printf("Cpu Vendor: %s 0x%x 0x%x 0x%x\n",
|
|
||||||
reinterpret_cast<char*>(&cpu_info[0]), cpu_info[0], cpu_info[1],
|
|
||||||
cpu_info[2]);
|
|
||||||
EXPECT_EQ(12u, strlen(reinterpret_cast<char*>(&cpu_info[0])));
|
|
||||||
|
|
||||||
// CPU Family and Model
|
|
||||||
// 3:0 - Stepping
|
|
||||||
// 7:4 - Model
|
|
||||||
// 11:8 - Family
|
|
||||||
// 13:12 - Processor Type
|
|
||||||
// 19:16 - Extended Model
|
|
||||||
// 27:20 - Extended Family
|
|
||||||
CpuId(1, 0, cpu_info);
|
|
||||||
int family = ((cpu_info[0] >> 8) & 0x0f) | ((cpu_info[0] >> 16) & 0xff0);
|
|
||||||
int model = ((cpu_info[0] >> 4) & 0x0f) | ((cpu_info[0] >> 12) & 0xf0);
|
|
||||||
printf("Cpu Family %d (0x%x), Model %d (0x%x)\n", family, family, model,
|
|
||||||
model);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static int FileExists(const char* file_name) {
|
static int FileExists(const char* file_name) {
|
||||||
FILE* f = fopen(file_name, "r");
|
FILE* f = fopen(file_name, "r");
|
||||||
if (!f) {
|
if (!f) {
|
||||||
|
|||||||
@ -171,6 +171,7 @@ int main(int argc, const char* argv[]) {
|
|||||||
int has_avx = TestCpuFlag(kCpuHasAVX);
|
int has_avx = TestCpuFlag(kCpuHasAVX);
|
||||||
int has_avx2 = TestCpuFlag(kCpuHasAVX2);
|
int has_avx2 = TestCpuFlag(kCpuHasAVX2);
|
||||||
int has_erms = TestCpuFlag(kCpuHasERMS);
|
int has_erms = TestCpuFlag(kCpuHasERMS);
|
||||||
|
int has_fsmr = TestCpuFlag(kCpuHasFSMR);
|
||||||
int has_fma3 = TestCpuFlag(kCpuHasFMA3);
|
int has_fma3 = TestCpuFlag(kCpuHasFMA3);
|
||||||
int has_f16c = TestCpuFlag(kCpuHasF16C);
|
int has_f16c = TestCpuFlag(kCpuHasF16C);
|
||||||
int has_avx512bw = TestCpuFlag(kCpuHasAVX512BW);
|
int has_avx512bw = TestCpuFlag(kCpuHasAVX512BW);
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user