Add hybrid detect for Intel laptop cpus

- Add +i8mm build option for sve ARGBToUV which uses usdot
- util/cpuid Get cpu count (windows, macos, linux)
- For each x86 cpu, detect hybrid (e-core)
- Includes a comment fix for ubsan unittest
- Bump version
- Apply clang format to util/*.c as well as all *.cc/*.h

Bug: 424637372
Change-Id: I08310e18051fff62c9e4e4a10d1e4361871119ac
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/6635640
Reviewed-by: Wan-Teh Chang <wtc@google.com>
This commit is contained in:
Frank Barchard 2025-06-13 13:07:20 -07:00
parent 3d66e94fb5
commit 889613683a
10 changed files with 88 additions and 40 deletions

View File

@ -254,7 +254,7 @@ if (libyuv_use_sve) {
public_configs = [ ":libyuv_config" ] public_configs = [ ":libyuv_config" ]
# SVE2 is an Armv9-A feature. # SVE2 is an Armv9-A feature.
cflags = [ "-march=armv9-a+sve2" ] cflags = [ "-march=armv9-a+sve2+i8mm" ]
} }
} }

View File

@ -1,6 +1,6 @@
Name: libyuv Name: libyuv
URL: https://chromium.googlesource.com/libyuv/libyuv/ URL: https://chromium.googlesource.com/libyuv/libyuv/
Version: 1911 Version: 1912
License: BSD-3-Clause License: BSD-3-Clause
License File: LICENSE License File: LICENSE
Shipped: yes Shipped: yes

View File

@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ #ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1911 #define LIBYUV_VERSION 1912
#endif // INCLUDE_LIBYUV_VERSION_H_ #endif // INCLUDE_LIBYUV_VERSION_H_

View File

@ -116,8 +116,7 @@ uint32_t HashDjb2_NEON(const uint8_t* src, int count, uint32_t seed) {
uint32_t hash = seed; uint32_t hash = seed;
const uint32_t c16 = 0x92d9e201; // 33^16 const uint32_t c16 = 0x92d9e201; // 33^16
uint32_t tmp, tmp2; uint32_t tmp, tmp2;
asm( asm("ld1 {v16.4s, v17.4s, v18.4s, v19.4s}, [%[kIdx]] \n"
"ld1 {v16.4s, v17.4s, v18.4s, v19.4s}, [%[kIdx]] \n"
"ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [%[kMuls]] \n" "ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [%[kMuls]] \n"
// count is always a multiple of 16. // count is always a multiple of 16.

View File

@ -1734,9 +1734,13 @@ void ARGBToUV444MatrixRow_AVX2(const uint8_t* src_argb,
#ifdef HAS_ARGBTOUVROW_SSSE3 #ifdef HAS_ARGBTOUVROW_SSSE3
void OMITFP ARGBToUVMatrixRow_SSSE3( void OMITFP
const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_u, ARGBToUVMatrixRow_SSSE3(const uint8_t* src_argb,
uint8_t* dst_v, int width, const struct RgbUVConstants* rgbuvconstants) { int src_stride_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width,
const struct RgbUVConstants* rgbuvconstants) {
asm volatile( asm volatile(
"movdqa %0,%%xmm3 \n" "movdqa %0,%%xmm3 \n"
"movdqa %1,%%xmm4 \n" "movdqa %1,%%xmm4 \n"
@ -1821,9 +1825,13 @@ static const UVMatrixConstants kShufARGBToUV_AVX = {
0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128,
0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128}; 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128};
void OMITFP ARGBToUVMatrixRow_AVX2( void OMITFP
const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_u, ARGBToUVMatrixRow_AVX2(const uint8_t* src_argb,
uint8_t* dst_v, int width, const struct RgbUVConstants* rgbuvconstants) { int src_stride_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width,
const struct RgbUVConstants* rgbuvconstants) {
asm volatile( asm volatile(
"vbroadcastf128 %0,%%ymm6 \n" "vbroadcastf128 %0,%%ymm6 \n"
"vbroadcastf128 %1,%%ymm7 \n" "vbroadcastf128 %1,%%ymm7 \n"

View File

@ -2744,7 +2744,7 @@ TEST_F(LibYUVConvertTest, TestI400LargeSize) {
free_aligned_buffer_page_end(dest_argb); free_aligned_buffer_page_end(dest_argb);
free_aligned_buffer_page_end(orig_i400); free_aligned_buffer_page_end(orig_i400);
} }
#endif // defined(_M_X64) || defined(_M_X64) || defined(__aarch64__) #endif // defined(__x86_64__) || defined(_M_X64) || defined(__aarch64__)
#endif // !defined(LEAN_TESTS) #endif // !defined(LEAN_TESTS)

View File

@ -15,6 +15,13 @@
#ifdef __linux__ #ifdef __linux__
#include <ctype.h> #include <ctype.h>
#include <sys/utsname.h> #include <sys/utsname.h>
#include <unistd.h> // for sysconf
#endif
#if defined(_WIN32)
#include <windows.h> // for GetSystemInfo
#endif
#if defined(__APPLE__)
#include <sys/sysctl.h> // for sysctlbyname
#endif #endif
#include "libyuv/cpu_id.h" #include "libyuv/cpu_id.h"
@ -51,6 +58,23 @@ int main(int argc, const char* argv[]) {
printf("Kernel Version %d.%d\n", kernelversion[0], kernelversion[1]); printf("Kernel Version %d.%d\n", kernelversion[0], kernelversion[1]);
} }
#endif // defined(__linux__) #endif // defined(__linux__)
#if defined(_WIN32)
SYSTEM_INFO sysInfo;
GetSystemInfo(&sysInfo);
int num_cpus = (int)sysInfo.dwNumberOfProcessors;
#elif defined(__linux__)
int num_cpus = sysconf(_SC_NPROCESSORS_ONLN);
#elif defined(__APPLE__)
int num_cpus = 0;
size_t num_cpus_len = sizeof(num_cpus);
// Get the number of logical CPU cores
if (sysctlbyname("hw.logicalcpu", &num_cpus, &num_cpus_len, NULL, 0) == -1) {
printf("sysctlbyname failed to get hw.logicalcpu\n");
}
#else
int num_cpus = 0; // unknown OS
#endif
printf("Number of cpus: %d\n", num_cpus);
#if defined(__arm__) || defined(__aarch64__) #if defined(__arm__) || defined(__aarch64__)
int has_arm = TestCpuFlag(kCpuHasARM); int has_arm = TestCpuFlag(kCpuHasARM);
@ -75,7 +99,8 @@ int main(int argc, const char* argv[]) {
// Read and print the SVE and SME vector lengths. // Read and print the SVE and SME vector lengths.
if (has_sve) { if (has_sve) {
int sve_vl; int sve_vl;
__asm__(".inst 0x04bf5020 \n" // rdvl x0, #1 __asm__(
".inst 0x04bf5020 \n" // rdvl x0, #1
"mov %w[sve_vl], w0 \n" "mov %w[sve_vl], w0 \n"
: [sve_vl] "=r"(sve_vl) // %[sve_vl] : [sve_vl] "=r"(sve_vl) // %[sve_vl]
: :
@ -84,7 +109,8 @@ int main(int argc, const char* argv[]) {
} }
if (has_sme) { if (has_sme) {
int sme_vl; int sme_vl;
__asm__(".inst 0x04bf5820 \n" // rdsvl x0, #1 __asm__(
".inst 0x04bf5820 \n" // rdsvl x0, #1
"mov %w[sme_vl], w0 \n" "mov %w[sme_vl], w0 \n"
: [sme_vl] "=r"(sme_vl) // %[sme_vl] : [sme_vl] "=r"(sme_vl) // %[sme_vl]
: :
@ -131,8 +157,8 @@ int main(int argc, const char* argv[]) {
} }
#endif // defined(__loongarch__) #endif // defined(__loongarch__)
#if defined(__i386__) || defined(__x86_64__) || \ #if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || \
defined(_M_IX86) || defined(_M_X64) defined(_M_X64)
int has_x86 = TestCpuFlag(kCpuHasX86); int has_x86 = TestCpuFlag(kCpuHasX86);
if (has_x86) { if (has_x86) {
int family, model, cpu_info[4]; int family, model, cpu_info[4];
@ -153,6 +179,13 @@ int main(int argc, const char* argv[]) {
cpu_info[3] = 0; cpu_info[3] = 0;
printf("Cpu Vendor: %s\n", (char*)(&cpu_info[0])); printf("Cpu Vendor: %s\n", (char*)(&cpu_info[0]));
for (int n = 0; n < num_cpus; ++n) {
// Check EDX bit 15 for hybrid design indication
CpuId(7, n, &cpu_info[0]);
int hybrid = (cpu_info[3] >> 15) & 1;
printf(" Cpu %d Hybrid %d\n", n, hybrid);
}
// CPU Family and Model // CPU Family and Model
// 3:0 - Stepping // 3:0 - Stepping
// 7:4 - Model // 7:4 - Model
@ -163,8 +196,8 @@ int main(int argc, const char* argv[]) {
CpuId(1, 0, &cpu_info[0]); CpuId(1, 0, &cpu_info[0]);
family = ((cpu_info[0] >> 8) & 0x0f) | ((cpu_info[0] >> 16) & 0xff0); family = ((cpu_info[0] >> 8) & 0x0f) | ((cpu_info[0] >> 16) & 0xff0);
model = ((cpu_info[0] >> 4) & 0x0f) | ((cpu_info[0] >> 12) & 0xf0); model = ((cpu_info[0] >> 4) & 0x0f) | ((cpu_info[0] >> 12) & 0xf0);
printf("Cpu Family %d (0x%x), Model %d (0x%x)\n", family, family, printf("Cpu Family %d (0x%x), Model %d (0x%x)\n", family, family, model,
model, model); model);
int has_sse2 = TestCpuFlag(kCpuHasSSE2); int has_sse2 = TestCpuFlag(kCpuHasSSE2);
int has_ssse3 = TestCpuFlag(kCpuHasSSSE3); int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
@ -210,7 +243,7 @@ int main(int argc, const char* argv[]) {
printf("Has AVXVNNIINT8 0x%x\n", has_avxvnniint8); printf("Has AVXVNNIINT8 0x%x\n", has_avxvnniint8);
printf("Has AMXINT8 0x%x\n", has_amxint8); printf("Has AMXINT8 0x%x\n", has_amxint8);
} }
#endif // defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64) #endif // defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) ||
// defined(_M_X64)
return 0; return 0;
} }

View File

@ -76,10 +76,14 @@ int main(int argc, const char* argv[]) {
// //
// // U and V contributions to R,G,B. // // U and V contributions to R,G,B.
printf("UB %-3.0f /* round(%f * 64 = %8.4f) */\n", round(ub * 64), ub, ub * 64); printf("UB %-3.0f /* round(%f * 64 = %8.4f) */\n", round(ub * 64), ub,
printf("UG %-3.0f /* round(%f * 64 = %8.4f) */\n", round(ug * 64), ug, ug * 64); ub * 64);
printf("VG %-3.0f /* round(%f * 64 = %8.4f) */\n", round(vg * 64), vg, vg * 64); printf("UG %-3.0f /* round(%f * 64 = %8.4f) */\n", round(ug * 64), ug,
printf("VR %-3.0f /* round(%f * 64 = %8.4f) */\n", round(vr * 64), vr, vr * 64); ug * 64);
printf("VG %-3.0f /* round(%f * 64 = %8.4f) */\n", round(vg * 64), vg,
vg * 64);
printf("VR %-3.0f /* round(%f * 64 = %8.4f) */\n", round(vr * 64), vr,
vr * 64);
vr = 255.f / 224.f * 2 * (1 - kr); vr = 255.f / 224.f * 2 * (1 - kr);
ug = 255.f / 224.f * 2 * ((1 - kb) * kb / kg); ug = 255.f / 224.f * 2 * ((1 - kb) * kb / kg);
@ -97,10 +101,14 @@ int main(int argc, const char* argv[]) {
// //
// // U and V contributions to R,G,B. // // U and V contributions to R,G,B.
printf("UB %-3.0f /* round(%f * 64 = %8.4f) */\n", round(ub * 64), ub, ub * 64); printf("UB %-3.0f /* round(%f * 64 = %8.4f) */\n", round(ub * 64), ub,
printf("UG %-3.0f /* round(%f * 64 = %8.4f) */\n", round(ug * 64), ug, ug * 64); ub * 64);
printf("VG %-3.0f /* round(%f * 64 = %8.4f) */\n", round(vg * 64), vg, vg * 64); printf("UG %-3.0f /* round(%f * 64 = %8.4f) */\n", round(ug * 64), ug,
printf("VR %-3.0f /* round(%f * 64 = %8.4f) */\n", round(vr * 64), vr, vr * 64); ug * 64);
printf("VG %-3.0f /* round(%f * 64 = %8.4f) */\n", round(vg * 64), vg,
vg * 64);
printf("VR %-3.0f /* round(%f * 64 = %8.4f) */\n", round(vr * 64), vr,
vr * 64);
return 0; return 0;
} }