From fdcf524aacb38b1e476f82abe5dc2e7dbf07dbc7 Mon Sep 17 00:00:00 2001 From: Frank Barchard Date: Fri, 14 Oct 2016 16:34:08 -0700 Subject: [PATCH] Add f16c (halffloat) cpuid R=wangcheng@google.com, hubbe@chromium.org BUG=libyuv:560 Review URL: https://codereview.chromium.org/2418763006 . --- README.chromium | 2 +- include/libyuv/cpu_id.h | 6 ++++-- include/libyuv/version.h | 2 +- source/cpu_id.cc | 16 +++++++++++----- source/planar_functions.cc | 2 +- unit_test/cpu_test.cc | 2 ++ util/cpuid.c | 2 ++ 7 files changed, 22 insertions(+), 10 deletions(-) diff --git a/README.chromium b/README.chromium index acffcfd86..22c4937e3 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1626 +Version: 1627 License: BSD License File: LICENSE diff --git a/include/libyuv/cpu_id.h b/include/libyuv/cpu_id.h index 7c6c9aeb0..bcddb32ea 100644 --- a/include/libyuv/cpu_id.h +++ b/include/libyuv/cpu_id.h @@ -31,13 +31,15 @@ static const int kCpuHasX86 = 0x10; static const int kCpuHasSSE2 = 0x20; static const int kCpuHasSSSE3 = 0x40; static const int kCpuHasSSE41 = 0x80; -static const int kCpuHasSSE42 = 0x100; +static const int kCpuHasSSE42 = 0x100; // unused at this time. static const int kCpuHasAVX = 0x200; static const int kCpuHasAVX2 = 0x400; static const int kCpuHasERMS = 0x800; static const int kCpuHasFMA3 = 0x1000; static const int kCpuHasAVX3 = 0x2000; -// 0x2000, 0x4000, 0x8000 reserved for future X86 flags. +static const int kCpuHasF16C = 0x4000; + +// 0x8000 reserved for future X86 flags. // These flags are only valid on MIPS processors. static const int kCpuHasMIPS = 0x10000; diff --git a/include/libyuv/version.h b/include/libyuv/version.h index a2f6a656a..42835951f 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1626 +#define LIBYUV_VERSION 1627 #endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/source/cpu_id.cc b/source/cpu_id.cc index 27e2467b0..4fb168d74 100644 --- a/source/cpu_id.cc +++ b/source/cpu_id.cc @@ -229,19 +229,21 @@ int InitCpuFlags(void) { if (cpu_info0[0] >= 7) { CpuId(7, 0, cpu_info7); } - cpu_info = ((cpu_info1[3] & 0x04000000) ? kCpuHasSSE2 : 0) | + cpu_info = kCpuHasX86 | + ((cpu_info1[3] & 0x04000000) ? kCpuHasSSE2 : 0) | ((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) | ((cpu_info1[2] & 0x00080000) ? kCpuHasSSE41 : 0) | ((cpu_info1[2] & 0x00100000) ? kCpuHasSSE42 : 0) | - ((cpu_info7[1] & 0x00000200) ? kCpuHasERMS : 0) | - ((cpu_info1[2] & 0x00001000) ? kCpuHasFMA3 : 0) | - kCpuHasX86; + ((cpu_info7[1] & 0x00000200) ? kCpuHasERMS : 0); #ifdef HAS_XGETBV // AVX requires CPU has AVX, XSAVE and OSXSave for xgetbv if (((cpu_info1[2] & 0x1c000000) == 0x1c000000) && // AVX and OSXSave ((GetXCR0() & 6) == 6)) { // Test OS saves YMM registers - cpu_info |= ((cpu_info7[1] & 0x00000020) ? kCpuHasAVX2 : 0) | kCpuHasAVX; + cpu_info |= kCpuHasAVX | + ((cpu_info7[1] & 0x00000020) ? kCpuHasAVX2 : 0) | + ((cpu_info1[2] & 0x00001000) ? kCpuHasFMA3 : 0) | + ((cpu_info1[2] & 0x20000000) ? kCpuHasF16C : 0); // Detect AVX512bw if ((GetXCR0() & 0xe0) == 0xe0) { @@ -281,6 +283,10 @@ int InitCpuFlags(void) { if (TestEnv("LIBYUV_DISABLE_AVX3")) { cpu_info &= ~kCpuHasAVX3; } + if (TestEnv("LIBYUV_DISABLE_F16C")) { + cpu_info &= ~kCpuHasF16C; + } + #endif #if defined(__mips__) && defined(__linux__) #if defined(__mips_dspr2) diff --git a/source/planar_functions.cc b/source/planar_functions.cc index 3838759be..0bd76bc7c 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -2570,7 +2570,7 @@ int HalfFloatPlane(const uint16* src_y, int src_stride_y, } #endif #if defined(HAS_HALFFLOATROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { + if (TestCpuFlag(kCpuHasAVX2) && TestCpuFlag(kCpuHasF16C)) { HalfFloatRow = HalfFloatRow_Any_AVX2; if (IS_ALIGNED(width, 16)) { HalfFloatRow = HalfFloatRow_AVX2; diff --git a/unit_test/cpu_test.cc b/unit_test/cpu_test.cc index 97e74d0a8..7438b95a6 100644 --- a/unit_test/cpu_test.cc +++ b/unit_test/cpu_test.cc @@ -45,6 +45,8 @@ TEST_F(LibYUVBaseTest, TestCpuHas) { printf("Has FMA3 %x\n", has_fma3); int has_avx3 = TestCpuFlag(kCpuHasAVX3); printf("Has AVX3 %x\n", has_avx3); + int has_f16c = TestCpuFlag(kCpuHasF16C); + printf("Has F16C %x\n", has_f16c); int has_mips = TestCpuFlag(kCpuHasMIPS); printf("Has MIPS %x\n", has_mips); int has_dspr2 = TestCpuFlag(kCpuHasDSPR2); diff --git a/util/cpuid.c b/util/cpuid.c index 94e245b11..9716f1157 100644 --- a/util/cpuid.c +++ b/util/cpuid.c @@ -79,6 +79,7 @@ int main(int argc, const char* argv[]) { int has_avx3 = TestCpuFlag(kCpuHasAVX3); int has_erms = TestCpuFlag(kCpuHasERMS); int has_fma3 = TestCpuFlag(kCpuHasFMA3); + int has_f16c = TestCpuFlag(kCpuHasF16C); printf("Has SSE2 %x\n", has_sse2); printf("Has SSSE3 %x\n", has_ssse3); printf("Has SSE4.1 %x\n", has_sse41); @@ -88,6 +89,7 @@ int main(int argc, const char* argv[]) { printf("Has AVX3 %x\n", has_avx3); printf("Has ERMS %x\n", has_erms); printf("Has FMA3 %x\n", has_fma3); + printf("Has F16C %x\n", has_f16c); } return 0; }