diff --git a/README.chromium b/README.chromium index 0412b0daf..143508ad0 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 373 +Version: 374 License: BSD License File: LICENSE diff --git a/include/libyuv/version.h b/include/libyuv/version.h index b8fe8d8dd..68317b35a 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 373 +#define LIBYUV_VERSION 374 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/convert.cc b/source/convert.cc index 66127e9dd..ca2bf7fa9 100644 --- a/source/convert.cc +++ b/source/convert.cc @@ -962,8 +962,7 @@ int V210ToI420(const uint8* src_v210, int src_stride_v210, } } } -#endif -#if defined(HAS_UYVYTOYROW_NEON) +#elif defined(HAS_UYVYTOYROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { if (width > 8) { UYVYToYRow = UYVYToYRow_Any_NEON; diff --git a/source/convert_from.cc b/source/convert_from.cc index f5b9dda3e..839587798 100644 --- a/source/convert_from.cc +++ b/source/convert_from.cc @@ -998,8 +998,7 @@ int I420ToRAW(const uint8* src_y, int src_stride_y, ARGBToRAWRow = ARGBToRAWRow_SSSE3; } } -#endif -#if defined(HAS_ARGBTORAWROW_NEON) +#elif defined(HAS_ARGBTORAWROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { if (width * 3 <= kMaxStride) { ARGBToRAWRow = ARGBToRAWRow_Any_NEON; diff --git a/source/cpu_id.cc b/source/cpu_id.cc index 61c2f93de..9e7ac55c4 100644 --- a/source/cpu_id.cc +++ b/source/cpu_id.cc @@ -58,18 +58,36 @@ void CpuId(int cpu_info[4], int) { } #endif -// Low level cpuid for X86. Returns zeros on other CPUs. -#if !defined(__CLR_VER) && defined(_M_IX86) -// TODO(fbarchard): Port to GCC and 64 bit Visual C. +// X86 CPUs have xgetbv to detect OS saves high parts of ymm registers. +#if !defined(__CLR_VER) && defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219) #define HAS_XGETBV -// Return low 32 bits of BV - OS support for register saving. -__declspec(naked) -static uint32 XGetBV32(void) { - _asm _emit 0x0f - _asm _emit 0x01 - _asm _emit 0xd0 // xgetbv - _asm ret +static uint32 XGetBV(unsigned int xcr) { + return static_cast(_xgetbv(xcr)); } +#elif !defined(__CLR_VER) && defined(_M_IX86) +#define HAS_XGETBV +__declspec(naked) __declspec(align(16)) +static uint32 XGetBV(unsigned int xcr) { + __asm { + mov ecx, [esp + 4] // xcr + xgetbv // updates eax and edx. edx unused/ + ret + } +} +#elif defined(__i386__) || defined(__x86_64__) +#define HAS_XGETBV +static uint32 XGetBV(unsigned int xcr) { + uint32 xcr_feature_mask; + asm volatile ( + ".byte 0x0f, 0x01, 0xd0\n" + : "=a"(xcr_feature_mask) + : "c"(xcr) + : "memory", "cc", "edx"); // edx unused. + return xcr_feature_mask; +} +#endif +#ifdef HAS_XGETBV +static const int kXCR_XFEATURE_ENABLED_MASK = 0; #endif // based on libvpx arm_cpudetect.c @@ -105,13 +123,13 @@ int InitCpuFlags() { ((cpu_info[2] & 0x00000200) ? kCpuHasSSSE3 : 0) | ((cpu_info[2] & 0x00080000) ? kCpuHasSSE41 : 0) | ((cpu_info[2] & 0x00100000) ? kCpuHasSSE42 : 0) | - // TODO(fbarchard): AVX test BV same as AVX2. (((cpu_info[2] & 0x18000000) == 0x18000000) ? kCpuHasAVX : 0) | kCpuInitialized | kCpuHasX86; #ifdef HAS_XGETBV if (cpu_info_ & kCpuHasAVX) { __cpuid(cpu_info, 7); - if ((cpu_info[1] & 0x00000020) && ((XGetBV32() & 0x06) == 0x06)) { + if ((cpu_info[1] & 0x00000020) && + ((XGetBV(kXCR_XFEATURE_ENABLED_MASK) & 0x06) == 0x06)) { cpu_info_ |= kCpuHasAVX2; } } diff --git a/source/row_neon.cc b/source/row_neon.cc index 465c5e737..6988766be 100644 --- a/source/row_neon.cc +++ b/source/row_neon.cc @@ -459,7 +459,7 @@ void ARGBToRGBARow_NEON(const uint8* src_argb, uint8* dst_rgba, int pix) { "1: \n" "vld4.8 {d5, d6, d7, d8}, [%0]! \n" // load 8 pixels of ARGB. "subs %2, %2, #8 \n" // 8 processed per loop. - "vmov.u8 d4, d8 \n" + "vmov.u8 d4, d8 \n" // move A before RGB. "vst4.8 {d4, d5, d6, d7}, [%1]! \n" // store 8 pixels of RGBA. "bgt 1b \n" : "+r"(src_argb), // %0