diff --git a/README.chromium b/README.chromium index c56327178..c74afb92d 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 217 +Version: 218 License: BSD License File: LICENSE diff --git a/include/libyuv/version.h b/include/libyuv/version.h index e99ad6491..f5cb202de 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,7 +11,7 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 217 +#define LIBYUV_VERSION 218 #endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/source/compare.cc b/source/compare.cc index c57a59162..1da3dc4ab 100644 --- a/source/compare.cc +++ b/source/compare.cc @@ -39,8 +39,8 @@ uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) { #if defined(__ARM_NEON__) && !defined(YUV_DISABLE_ASM) #define HAS_SUMSQUAREERROR_NEON -static uint32 SumSquareError_NEON(const uint8* src_a, - const uint8* src_b, int count) { +static uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, + int count) { volatile uint32 sse; asm volatile ( "vmov.u8 q7, #0 \n" @@ -79,8 +79,8 @@ static uint32 SumSquareError_NEON(const uint8* src_a, #elif defined(_M_IX86) && !defined(YUV_DISABLE_ASM) #define HAS_SUMSQUAREERROR_SSE2 __declspec(naked) -static uint32 SumSquareError_SSE2(const uint8* src_a, - const uint8* src_b, int count) { +static uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, + int count) { __asm { mov eax, [esp + 4] // src_a mov edx, [esp + 8] // src_b @@ -119,8 +119,8 @@ static uint32 SumSquareError_SSE2(const uint8* src_a, #elif (defined(__x86_64__) || defined(__i386__)) && !defined(YUV_DISABLE_ASM) #define HAS_SUMSQUAREERROR_SSE2 -static uint32 SumSquareError_SSE2(const uint8* src_a, - const uint8* src_b, int count) { +static uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, + int count) { uint32 sse; asm volatile ( "pxor %%xmm0,%%xmm0 \n" @@ -165,8 +165,8 @@ static uint32 SumSquareError_SSE2(const uint8* src_a, } #endif -static uint32 SumSquareError_C(const uint8* src_a, - const uint8* src_b, int count) { +static uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, + int count) { uint32 sse = 0u; for (int x = 0; x < count; ++x) { int diff = src_a[0] - src_b[0]; @@ -177,23 +177,20 @@ static uint32 SumSquareError_C(const uint8* src_a, return sse; } -uint64 ComputeSumSquareError(const uint8* src_a, - const uint8* src_b, int count) { - uint32 (*SumSquareError)(const uint8* src_a, - const uint8* src_b, int count); +uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b, + int count) { + uint32 (*SumSquareError)(const uint8* src_a, const uint8* src_b, int count) = + SumSquareError_C; #if defined(HAS_SUMSQUAREERROR_NEON) if (TestCpuFlag(kCpuHasNEON)) { SumSquareError = SumSquareError_NEON; - } else + } #elif defined(HAS_SUMSQUAREERROR_SSE2) if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(src_a, 16) && IS_ALIGNED(src_b, 16)) { SumSquareError = SumSquareError_SSE2; - } else -#endif - { - SumSquareError = SumSquareError_C; } +#endif // 32K values will fit a 32bit int return value from SumSquareError. // After each block of 32K, accumulate into 64 bit int. const int kBlockSize = 1 << 15; // 32768; @@ -222,17 +219,18 @@ uint64 ComputeSumSquareError(const uint8* src_a, uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a, const uint8* src_b, int stride_b, int width, int height) { - uint32 (*SumSquareError)(const uint8* src_a, - const uint8* src_b, int count); + uint32 (*SumSquareError)(const uint8* src_a, const uint8* src_b, int count) = + SumSquareError_C; #if defined(HAS_SUMSQUAREERROR_NEON) - if (TestCpuFlag(kCpuHasNEON) && - IS_ALIGNED(width, 16)) { + if (TestCpuFlag(kCpuHasNEON)) { SumSquareError = SumSquareError_NEON; - } else -#endif - { - SumSquareError = SumSquareError_C; } +#elif defined(HAS_SUMSQUAREERROR_SSE2) + if (TestCpuFlag(kCpuHasSSE2) && + IS_ALIGNED(src_a, 16) && IS_ALIGNED(src_b, 16)) { + SumSquareError = SumSquareError_SSE2; + } +#endif uint64 sse = 0; for (int h = 0; h < height; ++h) { diff --git a/unit_test/compare_test.cc b/unit_test/compare_test.cc index 4430b0e06..52730e249 100644 --- a/unit_test/compare_test.cc +++ b/unit_test/compare_test.cc @@ -152,6 +152,9 @@ TEST_F(libyuvTest, BenchmarkPsnr_C) { src_b, _benchmark_width, _benchmark_width, _benchmark_height); + c_time = (get_time() - c_time) / _benchmark_iterations; + printf ("BenchmarkPsnr_C - %8d us c\n", (int)(c_time*1e6)); + MaskCpuFlags(-1); EXPECT_EQ(0, 0); @@ -164,15 +167,16 @@ TEST_F(libyuvTest, BenchmarkPsnr_OPT) { align_buffer_16(src_a, _benchmark_width * _benchmark_height) align_buffer_16(src_b, _benchmark_width * _benchmark_height) - MaskCpuFlags(kCpuInitialized); + MaskCpuFlags(-1); - double c_time = get_time(); + double opt_time = get_time(); for (int i = 0; i < _benchmark_iterations; ++i) CalcFramePsnr(src_a, _benchmark_width, src_b, _benchmark_width, _benchmark_width, _benchmark_height); - MaskCpuFlags(-1); + opt_time = (get_time() - opt_time) / _benchmark_iterations; + printf ("BenchmarkPsnr_OPT - %8d us opt\n", (int)(opt_time*1e6)); EXPECT_EQ(0, 0); @@ -269,6 +273,9 @@ TEST_F(libyuvTest, BenchmarkSsim_C) { src_b, _benchmark_width, _benchmark_width, _benchmark_height); + c_time = (get_time() - c_time) / _benchmark_iterations; + printf ("BenchmarkSsim_C - %8d us c\n", (int)(c_time*1e6)); + MaskCpuFlags(-1); EXPECT_EQ(0, 0); @@ -281,15 +288,16 @@ TEST_F(libyuvTest, BenchmarkSsim_OPT) { align_buffer_16(src_a, _benchmark_width * _benchmark_height) align_buffer_16(src_b, _benchmark_width * _benchmark_height) - MaskCpuFlags(kCpuInitialized); + MaskCpuFlags(-1); - double c_time = get_time(); + double opt_time = get_time(); for (int i = 0; i < _benchmark_iterations; ++i) CalcFrameSsim(src_a, _benchmark_width, src_b, _benchmark_width, _benchmark_width, _benchmark_height); - MaskCpuFlags(-1); + opt_time = (get_time() - opt_time) / _benchmark_iterations; + printf ("BenchmarkPsnr_OPT - %8d us opt\n", (int)(opt_time*1e6)); EXPECT_EQ(0, 0); diff --git a/unit_test/planar_test.cc b/unit_test/planar_test.cc index 46ab385e6..2e0135e98 100644 --- a/unit_test/planar_test.cc +++ b/unit_test/planar_test.cc @@ -95,7 +95,6 @@ TEST_F (libyuvTest, I420To##FMT##_CvsOPT) { \ dst_rgb_opt, src_width << 2, \ src_width, src_height); \ int err = 0; \ - int i = 0; \ for (int i = 0; i < src_height; ++i) { \ for (int j = 0; j < src_width << 2; ++j) { \ int diff = (int)(dst_rgb_c[i * src_height + j]) - \