mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 17:26:49 +08:00
Port xgetbv to posix
BUG=98 TEST=none Review URL: https://webrtc-codereview.appspot.com/848004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@374 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
78070bc1ba
commit
4c32b3a017
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 373
|
||||
Version: 374
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 373
|
||||
#define LIBYUV_VERSION 374
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||
|
||||
@ -962,8 +962,7 @@ int V210ToI420(const uint8* src_v210, int src_stride_v210,
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_UYVYTOYROW_NEON)
|
||||
#elif defined(HAS_UYVYTOYROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
if (width > 8) {
|
||||
UYVYToYRow = UYVYToYRow_Any_NEON;
|
||||
|
||||
@ -998,8 +998,7 @@ int I420ToRAW(const uint8* src_y, int src_stride_y,
|
||||
ARGBToRAWRow = ARGBToRAWRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTORAWROW_NEON)
|
||||
#elif defined(HAS_ARGBTORAWROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
if (width * 3 <= kMaxStride) {
|
||||
ARGBToRAWRow = ARGBToRAWRow_Any_NEON;
|
||||
|
||||
@ -58,18 +58,36 @@ void CpuId(int cpu_info[4], int) {
|
||||
}
|
||||
#endif
|
||||
|
||||
// Low level cpuid for X86. Returns zeros on other CPUs.
|
||||
#if !defined(__CLR_VER) && defined(_M_IX86)
|
||||
// TODO(fbarchard): Port to GCC and 64 bit Visual C.
|
||||
// X86 CPUs have xgetbv to detect OS saves high parts of ymm registers.
|
||||
#if !defined(__CLR_VER) && defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219)
|
||||
#define HAS_XGETBV
|
||||
// Return low 32 bits of BV - OS support for register saving.
|
||||
__declspec(naked)
|
||||
static uint32 XGetBV32(void) {
|
||||
_asm _emit 0x0f
|
||||
_asm _emit 0x01
|
||||
_asm _emit 0xd0 // xgetbv
|
||||
_asm ret
|
||||
static uint32 XGetBV(unsigned int xcr) {
|
||||
return static_cast<uint32>(_xgetbv(xcr));
|
||||
}
|
||||
#elif !defined(__CLR_VER) && defined(_M_IX86)
|
||||
#define HAS_XGETBV
|
||||
__declspec(naked) __declspec(align(16))
|
||||
static uint32 XGetBV(unsigned int xcr) {
|
||||
__asm {
|
||||
mov ecx, [esp + 4] // xcr
|
||||
xgetbv // updates eax and edx. edx unused/
|
||||
ret
|
||||
}
|
||||
}
|
||||
#elif defined(__i386__) || defined(__x86_64__)
|
||||
#define HAS_XGETBV
|
||||
static uint32 XGetBV(unsigned int xcr) {
|
||||
uint32 xcr_feature_mask;
|
||||
asm volatile (
|
||||
".byte 0x0f, 0x01, 0xd0\n"
|
||||
: "=a"(xcr_feature_mask)
|
||||
: "c"(xcr)
|
||||
: "memory", "cc", "edx"); // edx unused.
|
||||
return xcr_feature_mask;
|
||||
}
|
||||
#endif
|
||||
#ifdef HAS_XGETBV
|
||||
static const int kXCR_XFEATURE_ENABLED_MASK = 0;
|
||||
#endif
|
||||
|
||||
// based on libvpx arm_cpudetect.c
|
||||
@ -105,13 +123,13 @@ int InitCpuFlags() {
|
||||
((cpu_info[2] & 0x00000200) ? kCpuHasSSSE3 : 0) |
|
||||
((cpu_info[2] & 0x00080000) ? kCpuHasSSE41 : 0) |
|
||||
((cpu_info[2] & 0x00100000) ? kCpuHasSSE42 : 0) |
|
||||
// TODO(fbarchard): AVX test BV same as AVX2.
|
||||
(((cpu_info[2] & 0x18000000) == 0x18000000) ? kCpuHasAVX : 0) |
|
||||
kCpuInitialized | kCpuHasX86;
|
||||
#ifdef HAS_XGETBV
|
||||
if (cpu_info_ & kCpuHasAVX) {
|
||||
__cpuid(cpu_info, 7);
|
||||
if ((cpu_info[1] & 0x00000020) && ((XGetBV32() & 0x06) == 0x06)) {
|
||||
if ((cpu_info[1] & 0x00000020) &&
|
||||
((XGetBV(kXCR_XFEATURE_ENABLED_MASK) & 0x06) == 0x06)) {
|
||||
cpu_info_ |= kCpuHasAVX2;
|
||||
}
|
||||
}
|
||||
|
||||
@ -459,7 +459,7 @@ void ARGBToRGBARow_NEON(const uint8* src_argb, uint8* dst_rgba, int pix) {
|
||||
"1: \n"
|
||||
"vld4.8 {d5, d6, d7, d8}, [%0]! \n" // load 8 pixels of ARGB.
|
||||
"subs %2, %2, #8 \n" // 8 processed per loop.
|
||||
"vmov.u8 d4, d8 \n"
|
||||
"vmov.u8 d4, d8 \n" // move A before RGB.
|
||||
"vst4.8 {d4, d5, d6, d7}, [%1]! \n" // store 8 pixels of RGBA.
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_argb), // %0
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user