From 167d5d1c2fc10e0524f5fed0ecd42462aaeb942c Mon Sep 17 00:00:00 2001 From: "fbarchard@google.com" Date: Wed, 8 Jan 2014 00:59:40 +0000 Subject: [PATCH] Porting parts of compare to c89 BUG=303 TESTED=try bots still build, gcc and vc direct for c testing. R=tpsiaki@google.com Review URL: https://webrtc-codereview.appspot.com/6739004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@956 16f28f9a-4ce2-e073-06de-1de4eb20be90 --- README.chromium | 2 +- include/libyuv/version.h | 2 +- source/compare.cc | 24 +++++++++++++----------- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/README.chromium b/README.chromium index cf282983c..05bec7f5b 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 955 +Version: 956 License: BSD License File: LICENSE diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 3a19a559b..a933385c4 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 955 +#define LIBYUV_VERSION 956 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/compare.cc b/source/compare.cc index 70018974b..c9010864c 100644 --- a/source/compare.cc +++ b/source/compare.cc @@ -46,6 +46,8 @@ uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed); // hash seed of 5381 recommended. LIBYUV_API uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) { + const int kBlockSize = 1 << 15; // 32768; + int remainder = (int)(count) & ~15; uint32 (*HashDjb2_SSE)(const uint8* src, int count, uint32 seed) = HashDjb2_C; #if defined(HAS_HASHDJB2_SSE41) if (TestCpuFlag(kCpuHasSSE41)) { @@ -58,13 +60,11 @@ uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) { } #endif - const int kBlockSize = 1 << 15; // 32768; while (count >= (uint64)(kBlockSize)) { seed = HashDjb2_SSE(src, kBlockSize, seed); src += kBlockSize; count -= kBlockSize; } - int remainder = (int)(count) & ~15; if (remainder) { seed = HashDjb2_SSE(src, remainder, seed); src += remainder; @@ -98,6 +98,13 @@ uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count); LIBYUV_API uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b, int count) { + // SumSquareError returns values 0 to 65535 for each squared difference. + // Up to 65536 of those can be summed and remain within a uint32. + // After each block of 65536 pixels, accumulate into a uint64. + const int kBlockSize = 65536; + int remainder = count & (kBlockSize - 1) & ~31; + uint64 sse = 0; + int i; uint32 (*SumSquareError)(const uint8* src_a, const uint8* src_b, int count) = SumSquareError_C; #if defined(HAS_SUMSQUAREERROR_NEON) @@ -118,20 +125,14 @@ uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b, SumSquareError = SumSquareError_AVX2; } #endif - // SumSquareError returns values 0 to 65535 for each squared difference. - // Up to 65536 of those can be summed and remain within a uint32. - // After each block of 65536 pixels, accumulate into a uint64. - const int kBlockSize = 65536; - uint64 sse = 0; #ifdef _OPENMP #pragma omp parallel for reduction(+: sse) #endif - for (int i = 0; i < (count - (kBlockSize - 1)); i += kBlockSize) { + for (i = 0; i < (count - (kBlockSize - 1)); i += kBlockSize) { sse += SumSquareError(src_a + i, src_b + i, kBlockSize); } src_a += count & ~(kBlockSize - 1); src_b += count & ~(kBlockSize - 1); - int remainder = count & (kBlockSize - 1) & ~31; if (remainder) { sse += SumSquareError(src_a, src_b, remainder); src_a += remainder; @@ -148,6 +149,8 @@ LIBYUV_API uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a, const uint8* src_b, int stride_b, int width, int height) { + uint64 sse = 0; + int h; // Coalesce rows. if (stride_a == width && stride_b == width) { @@ -155,8 +158,7 @@ uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a, height = 1; stride_a = stride_b = 0; } - uint64 sse = 0; - for (int h = 0; h < height; ++h) { + for (h = 0; h < height; ++h) { sse += ComputeSumSquareError(src_a, src_b, width); src_a += stride_a; src_b += stride_b;