Add hybrid detect for Intel laptop cpus

- Add +i8mm build option for sve ARGBToUV which uses usdot
- util/cpuid Get cpu count (windows, macos, linux)
- For each x86 cpu, detect hybrid (e-core)
- Includes a comment fix for ubsan unittest
- Bump version
- Apply clang format to util/*.c as well as all *.cc/*.h

Bug: 424637372
Change-Id: I08310e18051fff62c9e4e4a10d1e4361871119ac
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/6635640
Reviewed-by: Wan-Teh Chang <wtc@google.com>
This commit is contained in:
Frank Barchard 2025-06-13 13:07:20 -07:00
parent 3d66e94fb5
commit 889613683a
10 changed files with 88 additions and 40 deletions

View File

@ -254,7 +254,7 @@ if (libyuv_use_sve) {
public_configs = [ ":libyuv_config" ]
# SVE2 is an Armv9-A feature.
cflags = [ "-march=armv9-a+sve2" ]
cflags = [ "-march=armv9-a+sve2+i8mm" ]
}
}

View File

@ -1,6 +1,6 @@
Name: libyuv
URL: https://chromium.googlesource.com/libyuv/libyuv/
Version: 1911
Version: 1912
License: BSD-3-Clause
License File: LICENSE
Shipped: yes

View File

@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1911
#define LIBYUV_VERSION 1912
#endif // INCLUDE_LIBYUV_VERSION_H_

View File

@ -116,8 +116,7 @@ uint32_t HashDjb2_NEON(const uint8_t* src, int count, uint32_t seed) {
uint32_t hash = seed;
const uint32_t c16 = 0x92d9e201; // 33^16
uint32_t tmp, tmp2;
asm(
"ld1 {v16.4s, v17.4s, v18.4s, v19.4s}, [%[kIdx]] \n"
asm("ld1 {v16.4s, v17.4s, v18.4s, v19.4s}, [%[kIdx]] \n"
"ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [%[kMuls]] \n"
// count is always a multiple of 16.

View File

@ -1734,9 +1734,13 @@ void ARGBToUV444MatrixRow_AVX2(const uint8_t* src_argb,
#ifdef HAS_ARGBTOUVROW_SSSE3
void OMITFP ARGBToUVMatrixRow_SSSE3(
const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_u,
uint8_t* dst_v, int width, const struct RgbUVConstants* rgbuvconstants) {
void OMITFP
ARGBToUVMatrixRow_SSSE3(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width,
const struct RgbUVConstants* rgbuvconstants) {
asm volatile(
"movdqa %0,%%xmm3 \n"
"movdqa %1,%%xmm4 \n"
@ -1821,9 +1825,13 @@ static const UVMatrixConstants kShufARGBToUV_AVX = {
0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128,
0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128};
void OMITFP ARGBToUVMatrixRow_AVX2(
const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_u,
uint8_t* dst_v, int width, const struct RgbUVConstants* rgbuvconstants) {
void OMITFP
ARGBToUVMatrixRow_AVX2(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width,
const struct RgbUVConstants* rgbuvconstants) {
asm volatile(
"vbroadcastf128 %0,%%ymm6 \n"
"vbroadcastf128 %1,%%ymm7 \n"

View File

@ -2744,7 +2744,7 @@ TEST_F(LibYUVConvertTest, TestI400LargeSize) {
free_aligned_buffer_page_end(dest_argb);
free_aligned_buffer_page_end(orig_i400);
}
#endif // defined(_M_X64) || defined(_M_X64) || defined(__aarch64__)
#endif // defined(__x86_64__) || defined(_M_X64) || defined(__aarch64__)
#endif // !defined(LEAN_TESTS)

View File

@ -15,6 +15,13 @@
#ifdef __linux__
#include <ctype.h>
#include <sys/utsname.h>
#include <unistd.h> // for sysconf
#endif
#if defined(_WIN32)
#include <windows.h> // for GetSystemInfo
#endif
#if defined(__APPLE__)
#include <sys/sysctl.h> // for sysctlbyname
#endif
#include "libyuv/cpu_id.h"
@ -51,6 +58,23 @@ int main(int argc, const char* argv[]) {
printf("Kernel Version %d.%d\n", kernelversion[0], kernelversion[1]);
}
#endif // defined(__linux__)
#if defined(_WIN32)
SYSTEM_INFO sysInfo;
GetSystemInfo(&sysInfo);
int num_cpus = (int)sysInfo.dwNumberOfProcessors;
#elif defined(__linux__)
int num_cpus = sysconf(_SC_NPROCESSORS_ONLN);
#elif defined(__APPLE__)
int num_cpus = 0;
size_t num_cpus_len = sizeof(num_cpus);
// Get the number of logical CPU cores
if (sysctlbyname("hw.logicalcpu", &num_cpus, &num_cpus_len, NULL, 0) == -1) {
printf("sysctlbyname failed to get hw.logicalcpu\n");
}
#else
int num_cpus = 0; // unknown OS
#endif
printf("Number of cpus: %d\n", num_cpus);
#if defined(__arm__) || defined(__aarch64__)
int has_arm = TestCpuFlag(kCpuHasARM);
@ -75,7 +99,8 @@ int main(int argc, const char* argv[]) {
// Read and print the SVE and SME vector lengths.
if (has_sve) {
int sve_vl;
__asm__(".inst 0x04bf5020 \n" // rdvl x0, #1
__asm__(
".inst 0x04bf5020 \n" // rdvl x0, #1
"mov %w[sve_vl], w0 \n"
: [sve_vl] "=r"(sve_vl) // %[sve_vl]
:
@ -84,7 +109,8 @@ int main(int argc, const char* argv[]) {
}
if (has_sme) {
int sme_vl;
__asm__(".inst 0x04bf5820 \n" // rdsvl x0, #1
__asm__(
".inst 0x04bf5820 \n" // rdsvl x0, #1
"mov %w[sme_vl], w0 \n"
: [sme_vl] "=r"(sme_vl) // %[sme_vl]
:
@ -131,8 +157,8 @@ int main(int argc, const char* argv[]) {
}
#endif // defined(__loongarch__)
#if defined(__i386__) || defined(__x86_64__) || \
defined(_M_IX86) || defined(_M_X64)
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || \
defined(_M_X64)
int has_x86 = TestCpuFlag(kCpuHasX86);
if (has_x86) {
int family, model, cpu_info[4];
@ -153,6 +179,13 @@ int main(int argc, const char* argv[]) {
cpu_info[3] = 0;
printf("Cpu Vendor: %s\n", (char*)(&cpu_info[0]));
for (int n = 0; n < num_cpus; ++n) {
// Check EDX bit 15 for hybrid design indication
CpuId(7, n, &cpu_info[0]);
int hybrid = (cpu_info[3] >> 15) & 1;
printf(" Cpu %d Hybrid %d\n", n, hybrid);
}
// CPU Family and Model
// 3:0 - Stepping
// 7:4 - Model
@ -163,8 +196,8 @@ int main(int argc, const char* argv[]) {
CpuId(1, 0, &cpu_info[0]);
family = ((cpu_info[0] >> 8) & 0x0f) | ((cpu_info[0] >> 16) & 0xff0);
model = ((cpu_info[0] >> 4) & 0x0f) | ((cpu_info[0] >> 12) & 0xf0);
printf("Cpu Family %d (0x%x), Model %d (0x%x)\n", family, family,
model, model);
printf("Cpu Family %d (0x%x), Model %d (0x%x)\n", family, family, model,
model);
int has_sse2 = TestCpuFlag(kCpuHasSSE2);
int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
@ -210,7 +243,7 @@ int main(int argc, const char* argv[]) {
printf("Has AVXVNNIINT8 0x%x\n", has_avxvnniint8);
printf("Has AMXINT8 0x%x\n", has_amxint8);
}
#endif // defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
#endif // defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) ||
// defined(_M_X64)
return 0;
}

View File

@ -76,10 +76,14 @@ int main(int argc, const char* argv[]) {
//
// // U and V contributions to R,G,B.
printf("UB %-3.0f /* round(%f * 64 = %8.4f) */\n", round(ub * 64), ub, ub * 64);
printf("UG %-3.0f /* round(%f * 64 = %8.4f) */\n", round(ug * 64), ug, ug * 64);
printf("VG %-3.0f /* round(%f * 64 = %8.4f) */\n", round(vg * 64), vg, vg * 64);
printf("VR %-3.0f /* round(%f * 64 = %8.4f) */\n", round(vr * 64), vr, vr * 64);
printf("UB %-3.0f /* round(%f * 64 = %8.4f) */\n", round(ub * 64), ub,
ub * 64);
printf("UG %-3.0f /* round(%f * 64 = %8.4f) */\n", round(ug * 64), ug,
ug * 64);
printf("VG %-3.0f /* round(%f * 64 = %8.4f) */\n", round(vg * 64), vg,
vg * 64);
printf("VR %-3.0f /* round(%f * 64 = %8.4f) */\n", round(vr * 64), vr,
vr * 64);
vr = 255.f / 224.f * 2 * (1 - kr);
ug = 255.f / 224.f * 2 * ((1 - kb) * kb / kg);
@ -97,10 +101,14 @@ int main(int argc, const char* argv[]) {
//
// // U and V contributions to R,G,B.
printf("UB %-3.0f /* round(%f * 64 = %8.4f) */\n", round(ub * 64), ub, ub * 64);
printf("UG %-3.0f /* round(%f * 64 = %8.4f) */\n", round(ug * 64), ug, ug * 64);
printf("VG %-3.0f /* round(%f * 64 = %8.4f) */\n", round(vg * 64), vg, vg * 64);
printf("VR %-3.0f /* round(%f * 64 = %8.4f) */\n", round(vr * 64), vr, vr * 64);
printf("UB %-3.0f /* round(%f * 64 = %8.4f) */\n", round(ub * 64), ub,
ub * 64);
printf("UG %-3.0f /* round(%f * 64 = %8.4f) */\n", round(ug * 64), ug,
ug * 64);
printf("VG %-3.0f /* round(%f * 64 = %8.4f) */\n", round(vg * 64), vg,
vg * 64);
printf("VR %-3.0f /* round(%f * 64 = %8.4f) */\n", round(vr * 64), vr,
vr * 64);
return 0;
}