mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 08:46:47 +08:00
Add hybrid detect for Intel laptop cpus
- Add +i8mm build option for sve ARGBToUV which uses usdot - util/cpuid Get cpu count (windows, macos, linux) - For each x86 cpu, detect hybrid (e-core) - Includes a comment fix for ubsan unittest - Bump version - Apply clang format to util/*.c as well as all *.cc/*.h Bug: 424637372 Change-Id: I08310e18051fff62c9e4e4a10d1e4361871119ac Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/6635640 Reviewed-by: Wan-Teh Chang <wtc@google.com>
This commit is contained in:
parent
3d66e94fb5
commit
889613683a
2
BUILD.gn
2
BUILD.gn
@ -254,7 +254,7 @@ if (libyuv_use_sve) {
|
||||
public_configs = [ ":libyuv_config" ]
|
||||
|
||||
# SVE2 is an Armv9-A feature.
|
||||
cflags = [ "-march=armv9-a+sve2" ]
|
||||
cflags = [ "-march=armv9-a+sve2+i8mm" ]
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: https://chromium.googlesource.com/libyuv/libyuv/
|
||||
Version: 1911
|
||||
Version: 1912
|
||||
License: BSD-3-Clause
|
||||
License File: LICENSE
|
||||
Shipped: yes
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 1911
|
||||
#define LIBYUV_VERSION 1912
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
@ -116,8 +116,7 @@ uint32_t HashDjb2_NEON(const uint8_t* src, int count, uint32_t seed) {
|
||||
uint32_t hash = seed;
|
||||
const uint32_t c16 = 0x92d9e201; // 33^16
|
||||
uint32_t tmp, tmp2;
|
||||
asm(
|
||||
"ld1 {v16.4s, v17.4s, v18.4s, v19.4s}, [%[kIdx]] \n"
|
||||
asm("ld1 {v16.4s, v17.4s, v18.4s, v19.4s}, [%[kIdx]] \n"
|
||||
"ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [%[kMuls]] \n"
|
||||
|
||||
// count is always a multiple of 16.
|
||||
|
||||
@ -2046,7 +2046,7 @@ ANY11IS(InterpolateRow_16To8_Any_AVX2,
|
||||
ANY_SIMD(src_ptr + r * BPP, dst_ptr, n); \
|
||||
} \
|
||||
ptrdiff_t np = n; \
|
||||
memcpy(vin, src_ptr, r * BPP); \
|
||||
memcpy(vin, src_ptr, r* BPP); \
|
||||
ANY_SIMD(vin, vout, MASK + 1); \
|
||||
memcpy(dst_ptr + np * BPP, vout + (MASK + 1 - r) * BPP, r * BPP); \
|
||||
}
|
||||
|
||||
@ -1734,9 +1734,13 @@ void ARGBToUV444MatrixRow_AVX2(const uint8_t* src_argb,
|
||||
|
||||
#ifdef HAS_ARGBTOUVROW_SSSE3
|
||||
|
||||
void OMITFP ARGBToUVMatrixRow_SSSE3(
|
||||
const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_u,
|
||||
uint8_t* dst_v, int width, const struct RgbUVConstants* rgbuvconstants) {
|
||||
void OMITFP
|
||||
ARGBToUVMatrixRow_SSSE3(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width,
|
||||
const struct RgbUVConstants* rgbuvconstants) {
|
||||
asm volatile(
|
||||
"movdqa %0,%%xmm3 \n"
|
||||
"movdqa %1,%%xmm4 \n"
|
||||
@ -1821,9 +1825,13 @@ static const UVMatrixConstants kShufARGBToUV_AVX = {
|
||||
0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128,
|
||||
0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128};
|
||||
|
||||
void OMITFP ARGBToUVMatrixRow_AVX2(
|
||||
const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_u,
|
||||
uint8_t* dst_v, int width, const struct RgbUVConstants* rgbuvconstants) {
|
||||
void OMITFP
|
||||
ARGBToUVMatrixRow_AVX2(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width,
|
||||
const struct RgbUVConstants* rgbuvconstants) {
|
||||
asm volatile(
|
||||
"vbroadcastf128 %0,%%ymm6 \n"
|
||||
"vbroadcastf128 %1,%%ymm7 \n"
|
||||
|
||||
@ -2731,9 +2731,9 @@ TEST_F(LibYUVConvertTest, TestI400LargeSize) {
|
||||
// Allocate one extra column so that the coalesce optimizations do not trigger
|
||||
// in convert_argb.cc (they are triggered only when stride is equal to width).
|
||||
const size_t kStride = kWidth + 1;
|
||||
align_buffer_page_end(orig_i400, (size_t) kWidth * kHeight);
|
||||
align_buffer_page_end(orig_i400, (size_t)kWidth * kHeight);
|
||||
ASSERT_NE(orig_i400, nullptr);
|
||||
align_buffer_page_end(dest_argb, (size_t) kWidth * kHeight * 4);
|
||||
align_buffer_page_end(dest_argb, (size_t)kWidth * kHeight * 4);
|
||||
ASSERT_NE(dest_argb, nullptr);
|
||||
for (int i = 0; i < kWidth * kHeight; ++i) {
|
||||
orig_i400[i] = i % 256;
|
||||
@ -2744,7 +2744,7 @@ TEST_F(LibYUVConvertTest, TestI400LargeSize) {
|
||||
free_aligned_buffer_page_end(dest_argb);
|
||||
free_aligned_buffer_page_end(orig_i400);
|
||||
}
|
||||
#endif // defined(_M_X64) || defined(_M_X64) || defined(__aarch64__)
|
||||
#endif // defined(__x86_64__) || defined(_M_X64) || defined(__aarch64__)
|
||||
|
||||
#endif // !defined(LEAN_TESTS)
|
||||
|
||||
|
||||
59
util/cpuid.c
59
util/cpuid.c
@ -15,6 +15,13 @@
|
||||
#ifdef __linux__
|
||||
#include <ctype.h>
|
||||
#include <sys/utsname.h>
|
||||
#include <unistd.h> // for sysconf
|
||||
#endif
|
||||
#if defined(_WIN32)
|
||||
#include <windows.h> // for GetSystemInfo
|
||||
#endif
|
||||
#if defined(__APPLE__)
|
||||
#include <sys/sysctl.h> // for sysctlbyname
|
||||
#endif
|
||||
|
||||
#include "libyuv/cpu_id.h"
|
||||
@ -24,16 +31,16 @@ using namespace libyuv;
|
||||
#endif
|
||||
|
||||
#ifdef __linux__
|
||||
static void KernelVersion(int *version) {
|
||||
static void KernelVersion(int* version) {
|
||||
struct utsname buffer;
|
||||
int i = 0;
|
||||
|
||||
version[0] = version[1] = 0;
|
||||
if (uname(&buffer) == 0) {
|
||||
char *v = buffer.release;
|
||||
char* v = buffer.release;
|
||||
for (i = 0; *v && i < 2; ++v) {
|
||||
if (isdigit(*v)) {
|
||||
version[i++] = (int) strtol(v, &v, 10);
|
||||
version[i++] = (int)strtol(v, &v, 10);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -51,6 +58,23 @@ int main(int argc, const char* argv[]) {
|
||||
printf("Kernel Version %d.%d\n", kernelversion[0], kernelversion[1]);
|
||||
}
|
||||
#endif // defined(__linux__)
|
||||
#if defined(_WIN32)
|
||||
SYSTEM_INFO sysInfo;
|
||||
GetSystemInfo(&sysInfo);
|
||||
int num_cpus = (int)sysInfo.dwNumberOfProcessors;
|
||||
#elif defined(__linux__)
|
||||
int num_cpus = sysconf(_SC_NPROCESSORS_ONLN);
|
||||
#elif defined(__APPLE__)
|
||||
int num_cpus = 0;
|
||||
size_t num_cpus_len = sizeof(num_cpus);
|
||||
// Get the number of logical CPU cores
|
||||
if (sysctlbyname("hw.logicalcpu", &num_cpus, &num_cpus_len, NULL, 0) == -1) {
|
||||
printf("sysctlbyname failed to get hw.logicalcpu\n");
|
||||
}
|
||||
#else
|
||||
int num_cpus = 0; // unknown OS
|
||||
#endif
|
||||
printf("Number of cpus: %d\n", num_cpus);
|
||||
|
||||
#if defined(__arm__) || defined(__aarch64__)
|
||||
int has_arm = TestCpuFlag(kCpuHasARM);
|
||||
@ -75,7 +99,8 @@ int main(int argc, const char* argv[]) {
|
||||
// Read and print the SVE and SME vector lengths.
|
||||
if (has_sve) {
|
||||
int sve_vl;
|
||||
__asm__(".inst 0x04bf5020 \n" // rdvl x0, #1
|
||||
__asm__(
|
||||
".inst 0x04bf5020 \n" // rdvl x0, #1
|
||||
"mov %w[sve_vl], w0 \n"
|
||||
: [sve_vl] "=r"(sve_vl) // %[sve_vl]
|
||||
:
|
||||
@ -84,7 +109,8 @@ int main(int argc, const char* argv[]) {
|
||||
}
|
||||
if (has_sme) {
|
||||
int sme_vl;
|
||||
__asm__(".inst 0x04bf5820 \n" // rdsvl x0, #1
|
||||
__asm__(
|
||||
".inst 0x04bf5820 \n" // rdsvl x0, #1
|
||||
"mov %w[sme_vl], w0 \n"
|
||||
: [sme_vl] "=r"(sme_vl) // %[sme_vl]
|
||||
:
|
||||
@ -104,8 +130,8 @@ int main(int argc, const char* argv[]) {
|
||||
|
||||
// Read and print the RVV vector length.
|
||||
if (has_rvv) {
|
||||
register uint32_t vlenb __asm__ ("t0");
|
||||
__asm__(".word 0xC22022F3" /* CSRR t0, vlenb */ : "=r" (vlenb));
|
||||
register uint32_t vlenb __asm__("t0");
|
||||
__asm__(".word 0xC22022F3" /* CSRR t0, vlenb */ : "=r"(vlenb));
|
||||
printf("RVV vector length: %d bytes\n", vlenb);
|
||||
}
|
||||
}
|
||||
@ -131,8 +157,8 @@ int main(int argc, const char* argv[]) {
|
||||
}
|
||||
#endif // defined(__loongarch__)
|
||||
|
||||
#if defined(__i386__) || defined(__x86_64__) || \
|
||||
defined(_M_IX86) || defined(_M_X64)
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || \
|
||||
defined(_M_X64)
|
||||
int has_x86 = TestCpuFlag(kCpuHasX86);
|
||||
if (has_x86) {
|
||||
int family, model, cpu_info[4];
|
||||
@ -153,6 +179,13 @@ int main(int argc, const char* argv[]) {
|
||||
cpu_info[3] = 0;
|
||||
printf("Cpu Vendor: %s\n", (char*)(&cpu_info[0]));
|
||||
|
||||
for (int n = 0; n < num_cpus; ++n) {
|
||||
// Check EDX bit 15 for hybrid design indication
|
||||
CpuId(7, n, &cpu_info[0]);
|
||||
int hybrid = (cpu_info[3] >> 15) & 1;
|
||||
printf(" Cpu %d Hybrid %d\n", n, hybrid);
|
||||
}
|
||||
|
||||
// CPU Family and Model
|
||||
// 3:0 - Stepping
|
||||
// 7:4 - Model
|
||||
@ -163,8 +196,8 @@ int main(int argc, const char* argv[]) {
|
||||
CpuId(1, 0, &cpu_info[0]);
|
||||
family = ((cpu_info[0] >> 8) & 0x0f) | ((cpu_info[0] >> 16) & 0xff0);
|
||||
model = ((cpu_info[0] >> 4) & 0x0f) | ((cpu_info[0] >> 12) & 0xf0);
|
||||
printf("Cpu Family %d (0x%x), Model %d (0x%x)\n", family, family,
|
||||
model, model);
|
||||
printf("Cpu Family %d (0x%x), Model %d (0x%x)\n", family, family, model,
|
||||
model);
|
||||
|
||||
int has_sse2 = TestCpuFlag(kCpuHasSSE2);
|
||||
int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
|
||||
@ -210,7 +243,7 @@ int main(int argc, const char* argv[]) {
|
||||
printf("Has AVXVNNIINT8 0x%x\n", has_avxvnniint8);
|
||||
printf("Has AMXINT8 0x%x\n", has_amxint8);
|
||||
}
|
||||
#endif // defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
|
||||
#endif // defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) ||
|
||||
// defined(_M_X64)
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@ -76,10 +76,14 @@ int main(int argc, const char* argv[]) {
|
||||
//
|
||||
// // U and V contributions to R,G,B.
|
||||
|
||||
printf("UB %-3.0f /* round(%f * 64 = %8.4f) */\n", round(ub * 64), ub, ub * 64);
|
||||
printf("UG %-3.0f /* round(%f * 64 = %8.4f) */\n", round(ug * 64), ug, ug * 64);
|
||||
printf("VG %-3.0f /* round(%f * 64 = %8.4f) */\n", round(vg * 64), vg, vg * 64);
|
||||
printf("VR %-3.0f /* round(%f * 64 = %8.4f) */\n", round(vr * 64), vr, vr * 64);
|
||||
printf("UB %-3.0f /* round(%f * 64 = %8.4f) */\n", round(ub * 64), ub,
|
||||
ub * 64);
|
||||
printf("UG %-3.0f /* round(%f * 64 = %8.4f) */\n", round(ug * 64), ug,
|
||||
ug * 64);
|
||||
printf("VG %-3.0f /* round(%f * 64 = %8.4f) */\n", round(vg * 64), vg,
|
||||
vg * 64);
|
||||
printf("VR %-3.0f /* round(%f * 64 = %8.4f) */\n", round(vr * 64), vr,
|
||||
vr * 64);
|
||||
|
||||
vr = 255.f / 224.f * 2 * (1 - kr);
|
||||
ug = 255.f / 224.f * 2 * ((1 - kb) * kb / kg);
|
||||
@ -97,10 +101,14 @@ int main(int argc, const char* argv[]) {
|
||||
//
|
||||
// // U and V contributions to R,G,B.
|
||||
|
||||
printf("UB %-3.0f /* round(%f * 64 = %8.4f) */\n", round(ub * 64), ub, ub * 64);
|
||||
printf("UG %-3.0f /* round(%f * 64 = %8.4f) */\n", round(ug * 64), ug, ug * 64);
|
||||
printf("VG %-3.0f /* round(%f * 64 = %8.4f) */\n", round(vg * 64), vg, vg * 64);
|
||||
printf("VR %-3.0f /* round(%f * 64 = %8.4f) */\n", round(vr * 64), vr, vr * 64);
|
||||
printf("UB %-3.0f /* round(%f * 64 = %8.4f) */\n", round(ub * 64), ub,
|
||||
ub * 64);
|
||||
printf("UG %-3.0f /* round(%f * 64 = %8.4f) */\n", round(ug * 64), ug,
|
||||
ug * 64);
|
||||
printf("VG %-3.0f /* round(%f * 64 = %8.4f) */\n", round(vg * 64), vg,
|
||||
vg * 64);
|
||||
printf("VR %-3.0f /* round(%f * 64 = %8.4f) */\n", round(vr * 64), vr,
|
||||
vr * 64);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user