mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
Add hybrid detect for Intel laptop cpus
- Add +i8mm build option for sve ARGBToUV which uses usdot - util/cpuid Get cpu count (windows, macos, linux) - For each x86 cpu, detect hybrid (e-core) - Includes a comment fix for ubsan unittest - Bump version - Apply clang format to util/*.c as well as all *.cc/*.h Bug: 424637372 Change-Id: I08310e18051fff62c9e4e4a10d1e4361871119ac Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/6635640 Reviewed-by: Wan-Teh Chang <wtc@google.com>
This commit is contained in:
parent
3d66e94fb5
commit
889613683a
2
BUILD.gn
2
BUILD.gn
@ -254,7 +254,7 @@ if (libyuv_use_sve) {
|
|||||||
public_configs = [ ":libyuv_config" ]
|
public_configs = [ ":libyuv_config" ]
|
||||||
|
|
||||||
# SVE2 is an Armv9-A feature.
|
# SVE2 is an Armv9-A feature.
|
||||||
cflags = [ "-march=armv9-a+sve2" ]
|
cflags = [ "-march=armv9-a+sve2+i8mm" ]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: https://chromium.googlesource.com/libyuv/libyuv/
|
URL: https://chromium.googlesource.com/libyuv/libyuv/
|
||||||
Version: 1911
|
Version: 1912
|
||||||
License: BSD-3-Clause
|
License: BSD-3-Clause
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
Shipped: yes
|
Shipped: yes
|
||||||
|
|||||||
@ -11,6 +11,6 @@
|
|||||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||||
#define INCLUDE_LIBYUV_VERSION_H_
|
#define INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
#define LIBYUV_VERSION 1911
|
#define LIBYUV_VERSION 1912
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|||||||
@ -116,8 +116,7 @@ uint32_t HashDjb2_NEON(const uint8_t* src, int count, uint32_t seed) {
|
|||||||
uint32_t hash = seed;
|
uint32_t hash = seed;
|
||||||
const uint32_t c16 = 0x92d9e201; // 33^16
|
const uint32_t c16 = 0x92d9e201; // 33^16
|
||||||
uint32_t tmp, tmp2;
|
uint32_t tmp, tmp2;
|
||||||
asm(
|
asm("ld1 {v16.4s, v17.4s, v18.4s, v19.4s}, [%[kIdx]] \n"
|
||||||
"ld1 {v16.4s, v17.4s, v18.4s, v19.4s}, [%[kIdx]] \n"
|
|
||||||
"ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [%[kMuls]] \n"
|
"ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [%[kMuls]] \n"
|
||||||
|
|
||||||
// count is always a multiple of 16.
|
// count is always a multiple of 16.
|
||||||
|
|||||||
@ -1734,9 +1734,13 @@ void ARGBToUV444MatrixRow_AVX2(const uint8_t* src_argb,
|
|||||||
|
|
||||||
#ifdef HAS_ARGBTOUVROW_SSSE3
|
#ifdef HAS_ARGBTOUVROW_SSSE3
|
||||||
|
|
||||||
void OMITFP ARGBToUVMatrixRow_SSSE3(
|
void OMITFP
|
||||||
const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_u,
|
ARGBToUVMatrixRow_SSSE3(const uint8_t* src_argb,
|
||||||
uint8_t* dst_v, int width, const struct RgbUVConstants* rgbuvconstants) {
|
int src_stride_argb,
|
||||||
|
uint8_t* dst_u,
|
||||||
|
uint8_t* dst_v,
|
||||||
|
int width,
|
||||||
|
const struct RgbUVConstants* rgbuvconstants) {
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"movdqa %0,%%xmm3 \n"
|
"movdqa %0,%%xmm3 \n"
|
||||||
"movdqa %1,%%xmm4 \n"
|
"movdqa %1,%%xmm4 \n"
|
||||||
@ -1821,9 +1825,13 @@ static const UVMatrixConstants kShufARGBToUV_AVX = {
|
|||||||
0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128,
|
0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128,
|
||||||
0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128};
|
0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128};
|
||||||
|
|
||||||
void OMITFP ARGBToUVMatrixRow_AVX2(
|
void OMITFP
|
||||||
const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_u,
|
ARGBToUVMatrixRow_AVX2(const uint8_t* src_argb,
|
||||||
uint8_t* dst_v, int width, const struct RgbUVConstants* rgbuvconstants) {
|
int src_stride_argb,
|
||||||
|
uint8_t* dst_u,
|
||||||
|
uint8_t* dst_v,
|
||||||
|
int width,
|
||||||
|
const struct RgbUVConstants* rgbuvconstants) {
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"vbroadcastf128 %0,%%ymm6 \n"
|
"vbroadcastf128 %0,%%ymm6 \n"
|
||||||
"vbroadcastf128 %1,%%ymm7 \n"
|
"vbroadcastf128 %1,%%ymm7 \n"
|
||||||
|
|||||||
@ -2744,7 +2744,7 @@ TEST_F(LibYUVConvertTest, TestI400LargeSize) {
|
|||||||
free_aligned_buffer_page_end(dest_argb);
|
free_aligned_buffer_page_end(dest_argb);
|
||||||
free_aligned_buffer_page_end(orig_i400);
|
free_aligned_buffer_page_end(orig_i400);
|
||||||
}
|
}
|
||||||
#endif // defined(_M_X64) || defined(_M_X64) || defined(__aarch64__)
|
#endif // defined(__x86_64__) || defined(_M_X64) || defined(__aarch64__)
|
||||||
|
|
||||||
#endif // !defined(LEAN_TESTS)
|
#endif // !defined(LEAN_TESTS)
|
||||||
|
|
||||||
|
|||||||
49
util/cpuid.c
49
util/cpuid.c
@ -15,6 +15,13 @@
|
|||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
#include <sys/utsname.h>
|
#include <sys/utsname.h>
|
||||||
|
#include <unistd.h> // for sysconf
|
||||||
|
#endif
|
||||||
|
#if defined(_WIN32)
|
||||||
|
#include <windows.h> // for GetSystemInfo
|
||||||
|
#endif
|
||||||
|
#if defined(__APPLE__)
|
||||||
|
#include <sys/sysctl.h> // for sysctlbyname
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include "libyuv/cpu_id.h"
|
#include "libyuv/cpu_id.h"
|
||||||
@ -51,6 +58,23 @@ int main(int argc, const char* argv[]) {
|
|||||||
printf("Kernel Version %d.%d\n", kernelversion[0], kernelversion[1]);
|
printf("Kernel Version %d.%d\n", kernelversion[0], kernelversion[1]);
|
||||||
}
|
}
|
||||||
#endif // defined(__linux__)
|
#endif // defined(__linux__)
|
||||||
|
#if defined(_WIN32)
|
||||||
|
SYSTEM_INFO sysInfo;
|
||||||
|
GetSystemInfo(&sysInfo);
|
||||||
|
int num_cpus = (int)sysInfo.dwNumberOfProcessors;
|
||||||
|
#elif defined(__linux__)
|
||||||
|
int num_cpus = sysconf(_SC_NPROCESSORS_ONLN);
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
int num_cpus = 0;
|
||||||
|
size_t num_cpus_len = sizeof(num_cpus);
|
||||||
|
// Get the number of logical CPU cores
|
||||||
|
if (sysctlbyname("hw.logicalcpu", &num_cpus, &num_cpus_len, NULL, 0) == -1) {
|
||||||
|
printf("sysctlbyname failed to get hw.logicalcpu\n");
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
int num_cpus = 0; // unknown OS
|
||||||
|
#endif
|
||||||
|
printf("Number of cpus: %d\n", num_cpus);
|
||||||
|
|
||||||
#if defined(__arm__) || defined(__aarch64__)
|
#if defined(__arm__) || defined(__aarch64__)
|
||||||
int has_arm = TestCpuFlag(kCpuHasARM);
|
int has_arm = TestCpuFlag(kCpuHasARM);
|
||||||
@ -75,7 +99,8 @@ int main(int argc, const char* argv[]) {
|
|||||||
// Read and print the SVE and SME vector lengths.
|
// Read and print the SVE and SME vector lengths.
|
||||||
if (has_sve) {
|
if (has_sve) {
|
||||||
int sve_vl;
|
int sve_vl;
|
||||||
__asm__(".inst 0x04bf5020 \n" // rdvl x0, #1
|
__asm__(
|
||||||
|
".inst 0x04bf5020 \n" // rdvl x0, #1
|
||||||
"mov %w[sve_vl], w0 \n"
|
"mov %w[sve_vl], w0 \n"
|
||||||
: [sve_vl] "=r"(sve_vl) // %[sve_vl]
|
: [sve_vl] "=r"(sve_vl) // %[sve_vl]
|
||||||
:
|
:
|
||||||
@ -84,7 +109,8 @@ int main(int argc, const char* argv[]) {
|
|||||||
}
|
}
|
||||||
if (has_sme) {
|
if (has_sme) {
|
||||||
int sme_vl;
|
int sme_vl;
|
||||||
__asm__(".inst 0x04bf5820 \n" // rdsvl x0, #1
|
__asm__(
|
||||||
|
".inst 0x04bf5820 \n" // rdsvl x0, #1
|
||||||
"mov %w[sme_vl], w0 \n"
|
"mov %w[sme_vl], w0 \n"
|
||||||
: [sme_vl] "=r"(sme_vl) // %[sme_vl]
|
: [sme_vl] "=r"(sme_vl) // %[sme_vl]
|
||||||
:
|
:
|
||||||
@ -131,8 +157,8 @@ int main(int argc, const char* argv[]) {
|
|||||||
}
|
}
|
||||||
#endif // defined(__loongarch__)
|
#endif // defined(__loongarch__)
|
||||||
|
|
||||||
#if defined(__i386__) || defined(__x86_64__) || \
|
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || \
|
||||||
defined(_M_IX86) || defined(_M_X64)
|
defined(_M_X64)
|
||||||
int has_x86 = TestCpuFlag(kCpuHasX86);
|
int has_x86 = TestCpuFlag(kCpuHasX86);
|
||||||
if (has_x86) {
|
if (has_x86) {
|
||||||
int family, model, cpu_info[4];
|
int family, model, cpu_info[4];
|
||||||
@ -153,6 +179,13 @@ int main(int argc, const char* argv[]) {
|
|||||||
cpu_info[3] = 0;
|
cpu_info[3] = 0;
|
||||||
printf("Cpu Vendor: %s\n", (char*)(&cpu_info[0]));
|
printf("Cpu Vendor: %s\n", (char*)(&cpu_info[0]));
|
||||||
|
|
||||||
|
for (int n = 0; n < num_cpus; ++n) {
|
||||||
|
// Check EDX bit 15 for hybrid design indication
|
||||||
|
CpuId(7, n, &cpu_info[0]);
|
||||||
|
int hybrid = (cpu_info[3] >> 15) & 1;
|
||||||
|
printf(" Cpu %d Hybrid %d\n", n, hybrid);
|
||||||
|
}
|
||||||
|
|
||||||
// CPU Family and Model
|
// CPU Family and Model
|
||||||
// 3:0 - Stepping
|
// 3:0 - Stepping
|
||||||
// 7:4 - Model
|
// 7:4 - Model
|
||||||
@ -163,8 +196,8 @@ int main(int argc, const char* argv[]) {
|
|||||||
CpuId(1, 0, &cpu_info[0]);
|
CpuId(1, 0, &cpu_info[0]);
|
||||||
family = ((cpu_info[0] >> 8) & 0x0f) | ((cpu_info[0] >> 16) & 0xff0);
|
family = ((cpu_info[0] >> 8) & 0x0f) | ((cpu_info[0] >> 16) & 0xff0);
|
||||||
model = ((cpu_info[0] >> 4) & 0x0f) | ((cpu_info[0] >> 12) & 0xf0);
|
model = ((cpu_info[0] >> 4) & 0x0f) | ((cpu_info[0] >> 12) & 0xf0);
|
||||||
printf("Cpu Family %d (0x%x), Model %d (0x%x)\n", family, family,
|
printf("Cpu Family %d (0x%x), Model %d (0x%x)\n", family, family, model,
|
||||||
model, model);
|
model);
|
||||||
|
|
||||||
int has_sse2 = TestCpuFlag(kCpuHasSSE2);
|
int has_sse2 = TestCpuFlag(kCpuHasSSE2);
|
||||||
int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
|
int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
|
||||||
@ -210,7 +243,7 @@ int main(int argc, const char* argv[]) {
|
|||||||
printf("Has AVXVNNIINT8 0x%x\n", has_avxvnniint8);
|
printf("Has AVXVNNIINT8 0x%x\n", has_avxvnniint8);
|
||||||
printf("Has AMXINT8 0x%x\n", has_amxint8);
|
printf("Has AMXINT8 0x%x\n", has_amxint8);
|
||||||
}
|
}
|
||||||
#endif // defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
|
#endif // defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) ||
|
||||||
|
// defined(_M_X64)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -76,10 +76,14 @@ int main(int argc, const char* argv[]) {
|
|||||||
//
|
//
|
||||||
// // U and V contributions to R,G,B.
|
// // U and V contributions to R,G,B.
|
||||||
|
|
||||||
printf("UB %-3.0f /* round(%f * 64 = %8.4f) */\n", round(ub * 64), ub, ub * 64);
|
printf("UB %-3.0f /* round(%f * 64 = %8.4f) */\n", round(ub * 64), ub,
|
||||||
printf("UG %-3.0f /* round(%f * 64 = %8.4f) */\n", round(ug * 64), ug, ug * 64);
|
ub * 64);
|
||||||
printf("VG %-3.0f /* round(%f * 64 = %8.4f) */\n", round(vg * 64), vg, vg * 64);
|
printf("UG %-3.0f /* round(%f * 64 = %8.4f) */\n", round(ug * 64), ug,
|
||||||
printf("VR %-3.0f /* round(%f * 64 = %8.4f) */\n", round(vr * 64), vr, vr * 64);
|
ug * 64);
|
||||||
|
printf("VG %-3.0f /* round(%f * 64 = %8.4f) */\n", round(vg * 64), vg,
|
||||||
|
vg * 64);
|
||||||
|
printf("VR %-3.0f /* round(%f * 64 = %8.4f) */\n", round(vr * 64), vr,
|
||||||
|
vr * 64);
|
||||||
|
|
||||||
vr = 255.f / 224.f * 2 * (1 - kr);
|
vr = 255.f / 224.f * 2 * (1 - kr);
|
||||||
ug = 255.f / 224.f * 2 * ((1 - kb) * kb / kg);
|
ug = 255.f / 224.f * 2 * ((1 - kb) * kb / kg);
|
||||||
@ -97,10 +101,14 @@ int main(int argc, const char* argv[]) {
|
|||||||
//
|
//
|
||||||
// // U and V contributions to R,G,B.
|
// // U and V contributions to R,G,B.
|
||||||
|
|
||||||
printf("UB %-3.0f /* round(%f * 64 = %8.4f) */\n", round(ub * 64), ub, ub * 64);
|
printf("UB %-3.0f /* round(%f * 64 = %8.4f) */\n", round(ub * 64), ub,
|
||||||
printf("UG %-3.0f /* round(%f * 64 = %8.4f) */\n", round(ug * 64), ug, ug * 64);
|
ub * 64);
|
||||||
printf("VG %-3.0f /* round(%f * 64 = %8.4f) */\n", round(vg * 64), vg, vg * 64);
|
printf("UG %-3.0f /* round(%f * 64 = %8.4f) */\n", round(ug * 64), ug,
|
||||||
printf("VR %-3.0f /* round(%f * 64 = %8.4f) */\n", round(vr * 64), vr, vr * 64);
|
ug * 64);
|
||||||
|
printf("VG %-3.0f /* round(%f * 64 = %8.4f) */\n", round(vg * 64), vg,
|
||||||
|
vg * 64);
|
||||||
|
printf("VR %-3.0f /* round(%f * 64 = %8.4f) */\n", round(vr * 64), vr,
|
||||||
|
vr * 64);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user