From c5d44a0c5e3f67ebd64626a92d71478876ba4ff2 Mon Sep 17 00:00:00 2001 From: "fbarchard@google.com" Date: Thu, 5 Apr 2012 20:39:42 +0000 Subject: [PATCH] cpuid add sse41 and x86 BUG=none TEST=none Review URL: https://webrtc-codereview.appspot.com/485001 git-svn-id: http://libyuv.googlecode.com/svn/trunk@236 16f28f9a-4ce2-e073-06de-1de4eb20be90 --- README.chromium | 2 +- include/libyuv/cpu_id.h | 10 +++++---- include/libyuv/version.h | 2 +- source/cpu_id.cc | 10 +++++++-- source/planar_functions.cc | 46 +++++++++++++++++++------------------- source/row.h | 2 ++ unit_test/cpu_test.cc | 25 +++++++++++++++++---- 7 files changed, 62 insertions(+), 35 deletions(-) diff --git a/README.chromium b/README.chromium index dc95dc2ce..2fa724657 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 235 +Version: 236 License: BSD License File: LICENSE diff --git a/include/libyuv/cpu_id.h b/include/libyuv/cpu_id.h index 8fb572f61..d3adf51ad 100644 --- a/include/libyuv/cpu_id.h +++ b/include/libyuv/cpu_id.h @@ -17,14 +17,16 @@ extern "C" { #endif // These flags are only valid on x86 processors -static const int kCpuHasSSE2 = 1; -static const int kCpuHasSSSE3 = 2; +static const int kCpuHasX86 = 1; +static const int kCpuHasSSE2 = 2; +static const int kCpuHasSSSE3 = 4; +static const int kCpuHasSSE41 = 8; // These flags are only valid on ARM processors -static const int kCpuHasNEON = 4; +static const int kCpuHasNEON = 16; // Internal flag to indicate cpuid is initialized. -static const int kCpuInitialized = 8; +static const int kCpuInitialized = 32; // Detect CPU has SSE2 etc. // test_flag parameter should be one of kCpuHas constants above diff --git a/include/libyuv/version.h b/include/libyuv/version.h index b1a23d415..a7b4da6f4 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,7 +11,7 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 235 +#define LIBYUV_VERSION 236 #endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/source/cpu_id.cc b/source/cpu_id.cc index ab8a2c261..63fedcb80 100644 --- a/source/cpu_id.cc +++ b/source/cpu_id.cc @@ -75,16 +75,22 @@ int InitCpuFlags() { __cpuid(cpu_info, 1); cpu_info_ = (cpu_info[3] & 0x04000000 ? kCpuHasSSE2 : 0) | (cpu_info[2] & 0x00000200 ? kCpuHasSSSE3 : 0) | - kCpuInitialized; + (cpu_info[2] & 0x00080000 ? kCpuHasSSE41 : 0) | + kCpuInitialized | kCpuHasX86; // environment variable overrides for testing. + if (getenv("LIBYUV_DISABLE_X86")) { + cpu_info_ &= ~kCpuHasX86; + } if (getenv("LIBYUV_DISABLE_SSE2")) { cpu_info_ &= ~kCpuHasSSE2; } - // environment variable overrides for testing. if (getenv("LIBYUV_DISABLE_SSSE3")) { cpu_info_ &= ~kCpuHasSSSE3; } + if (getenv("LIBYUV_DISABLE_SSE41")) { + cpu_info_ &= ~kCpuHasSSE41; + } #elif defined(__linux__) && defined(__ARM_NEON__) cpu_info_ = ArmCpuCaps("/proc/cpuinfo") | kCpuInitialized; #elif defined(__ARM_NEON__) diff --git a/source/planar_functions.cc b/source/planar_functions.cc index 2bc3e3fe2..455722d08 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -29,17 +29,17 @@ void CopyPlane(const uint8* src_y, int src_stride_y, if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 64)) { CopyRow = CopyRow_NEON; } -#elif defined(HAS_COPYROW_X86) - if (IS_ALIGNED(width, 4)) { - CopyRow = CopyRow_X86; -#if defined(HAS_COPYROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && - IS_ALIGNED(width, 32) && - IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) && - IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { - CopyRow = CopyRow_SSE2; - } #endif +#if defined(HAS_COPYROW_X86) + if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) { + CopyRow = CopyRow_X86; + } +#endif +#if defined(HAS_COPYROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) && + IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) && + IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { + CopyRow = CopyRow_SSE2; } #endif @@ -755,7 +755,6 @@ static void SetRows32_X86(uint8* dst, uint32 v32, int width, } #endif -#if !defined(HAS_SETROW_X86) static void SetRow8_C(uint8* dst, uint32 v8, int count) { #ifdef _MSC_VER for (int x = 0; x < count; ++x) { @@ -776,24 +775,24 @@ static void SetRows32_C(uint8* dst, uint32 v32, int width, dst += dst_stride; } } -#endif void SetPlane(uint8* dst_y, int dst_stride_y, int width, int height, uint32 value) { -#if defined(HAS_SETROW_X86) - void (*SetRow)(uint8* dst, uint32 value, int pix) = SetRow8_X86; -#else void (*SetRow)(uint8* dst, uint32 value, int pix) = SetRow8_C; -#endif - #if defined(HAS_SETROW_NEON) if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16) && IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { SetRow = SetRow8_NEON; } -#elif defined(HAS_SETROW_SSE2) +#endif +#if defined(HAS_SETROW_X86) + if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) { + SetRow = SetRow8_X86; + } +#endif +#if defined(HAS_SETROW_SSE2) if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) && IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { @@ -836,7 +835,6 @@ int I420Rect(uint8* dst_y, int dst_stride_y, return 0; } -// TODO(fbarchard): Add TestCpuFlag(kCpuHasX86) to allow C code to be tested. // Draw a rectangle into ARGB int ARGBRect(uint8* dst_argb, int dst_stride_argb, int dst_x, int dst_y, @@ -848,9 +846,6 @@ int ARGBRect(uint8* dst_argb, int dst_stride_argb, return -1; } uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; -#if defined(HAS_SETROW_X86) - SetRows32_X86(dst, value, width, dst_stride_argb, height); -#else #if defined(HAS_SETROW_NEON) if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16) && IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride_argb, 16)) { @@ -858,8 +853,13 @@ int ARGBRect(uint8* dst_argb, int dst_stride_argb, return 0; } #endif - SetRows32_C(dst, value, width, dst_stride_argb, height); +#if defined(HAS_SETROW_X86) + if (TestCpuFlag(kCpuHasX86)) { + SetRows32_X86(dst, value, width, dst_stride_argb, height); + return 0; + } #endif + SetRows32_C(dst, value, width, dst_stride_argb, height); return 0; } diff --git a/source/row.h b/source/row.h index 4ed17a096..46fda7acb 100644 --- a/source/row.h +++ b/source/row.h @@ -89,11 +89,13 @@ extern "C" { typedef __declspec(align(16)) int8 vec8[16]; typedef __declspec(align(16)) uint8 uvec8[16]; typedef __declspec(align(16)) int16 vec16[8]; +typedef __declspec(align(16)) int32 vec32[4]; #else // __GNUC__ #define SIMD_ALIGNED(var) var __attribute__((aligned(16))) typedef int8 __attribute__((vector_size(16))) vec8; typedef uint8 __attribute__((vector_size(16))) uvec8; typedef int16 __attribute__((vector_size(16))) vec16; +typedef int32 __attribute__((vector_size(16))) vec32; #endif void I420ToARGBRow_NEON(const uint8* y_buf, diff --git a/unit_test/cpu_test.cc b/unit_test/cpu_test.cc index 0c43d3faf..e4b62fb5b 100644 --- a/unit_test/cpu_test.cc +++ b/unit_test/cpu_test.cc @@ -19,6 +19,27 @@ namespace libyuv { +TEST_F(libyuvTest, TestVersion) { + EXPECT_GE(LIBYUV_VERSION, 169); +} + +TEST_F(libyuvTest, TestCpuHas) { +#if LIBYUV_VERSION >= 236 + int has_x86 = TestCpuFlag(kCpuHasX86); + printf("Has X86 %d\n", has_x86); +#endif + int has_sse2 = TestCpuFlag(kCpuHasSSE2); + printf("Has SSE2 %d\n", has_sse2); + int has_ssse3 = TestCpuFlag(kCpuHasSSSE3); + printf("Has SSSE3 %d\n", has_ssse3); +#if LIBYUV_VERSION >= 236 + int has_sse41 = TestCpuFlag(kCpuHasSSE41); + printf("Has SSE4.1 %d\n", has_sse41); +#endif + int has_neon = TestCpuFlag(kCpuHasNEON); + printf("Has NEON %d\n", has_neon); +} + // For testing purposes call the proc/cpuinfo parser directly extern "C" int ArmCpuCaps(const char* cpuinfoname); @@ -27,8 +48,4 @@ TEST_F(libyuvTest, TestLinuxNeon) { EXPECT_EQ(kCpuHasNEON, ArmCpuCaps("unit_test/testdata/tegra3.txt")); } -TEST_F(libyuvTest, TestVersion) { - EXPECT_GE(LIBYUV_VERSION, 169); -} - } // namespace libyuv