From 60f433fbd9f8f4832ee9947a582e7d2b70a5d415 Mon Sep 17 00:00:00 2001 From: Frank Barchard Date: Tue, 10 Oct 2017 01:16:00 +0000 Subject: [PATCH] Revert "ComputeHammingDistance reduce SIMD loop to 1 call when possible." This reverts commit ec75df5894845b8d6b1341885a78db1de83decd8. Reason for revert: Original change's description: > ComputeHammingDistance reduce SIMD loop to 1 call when possible. > > 32 bit x86 has high overhead due to -fpic. So this reduces the > number of calls by 1. > > TBR=kjellander@chromium.org > Bug: libyuv:701 > Test: BenchmarkHammingDistance > Change-Id: I7f557ef047920db65eab362a5f93abbd274ca051 > Reviewed-on: https://chromium-review.googlesource.com/701755 > Reviewed-by: Frank Barchard > Reviewed-by: Cheng Wang TBR=rrwinterton@gmail.com,fbarchard@google.com,wangcheng@google.com Change-Id: Ia61e8558a8f083c14be5f51e0e141550b6f2b5c1 No-Presubmit: true No-Tree-Checks: true No-Try: true Bug: libyuv:701 Reviewed-on: https://chromium-review.googlesource.com/707823 Reviewed-by: Frank Barchard Commit-Queue: Frank Barchard --- README.chromium | 2 +- include/libyuv/version.h | 2 +- source/compare.cc | 17 ++++++----- unit_test/compare_test.cc | 61 --------------------------------------- 4 files changed, 12 insertions(+), 70 deletions(-) diff --git a/README.chromium b/README.chromium index 5a5abef93..2c97ef119 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1674 +Version: 1673 License: BSD License File: LICENSE diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 59f5a6d7f..ea589a0c0 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1674 +#define LIBYUV_VERSION 1673 #endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/source/compare.cc b/source/compare.cc index c13e103fc..986b097c0 100644 --- a/source/compare.cc +++ b/source/compare.cc @@ -114,9 +114,10 @@ LIBYUV_API uint64 ComputeHammingDistance(const uint8* src_a, const uint8* src_b, int count) { - const int kBlockSize = (65536 - 64); // Max count that SIMD wont overflow + const int kBlockSize = 65536; const int kSimdSize = 64; - int remainder; + // SIMD for multiple of 64, and C for remainder + int remainder = count & (kBlockSize - 1) & ~(kSimdSize - 1); uint64 diff = 0; int i; uint32 (*HammingDistance)(const uint8* src_a, const uint8* src_b, int count) = @@ -152,14 +153,16 @@ uint64 ComputeHammingDistance(const uint8* src_a, for (i = 0; i < (count - (kBlockSize - 1)); i += kBlockSize) { diff += HammingDistance(src_a + i, src_b + i, kBlockSize); } - remainder = (count - i) & ~(kSimdSize - 1); + src_a += count & ~(kBlockSize - 1); + src_b += count & ~(kBlockSize - 1); if (remainder) { - diff += HammingDistance(src_a + i, src_b + i, remainder); - i += remainder; + diff += HammingDistance(src_a, src_b, remainder); + src_a += remainder; + src_b += remainder; } - remainder = (count - i); + remainder = count & (kSimdSize - 1); if (remainder) { - diff += HammingDistance_C(src_a + i, src_b + i, remainder); + diff += HammingDistance_C(src_a, src_b, remainder); } return diff; } diff --git a/unit_test/compare_test.cc b/unit_test/compare_test.cc index 09ca914e0..55f5ffde4 100644 --- a/unit_test/compare_test.cc +++ b/unit_test/compare_test.cc @@ -333,67 +333,6 @@ TEST_F(LibYUVCompareTest, TestHammingDistance) { free_aligned_buffer_page_end(src_b); } -// Tests low levels match reference C for specified size. -// The opt implementations have size limitations - -static const int kMaxOptCount = 65536 - 64; -TEST_F(LibYUVCompareTest, TestHammingDistance_Opt) { - uint32 h1 = 0; - align_buffer_page_end(src_a, benchmark_width_ * benchmark_height_); - align_buffer_page_end(src_b, benchmark_width_ * benchmark_height_); - memset(src_a, 255u, benchmark_width_ * benchmark_height_); - memset(src_b, 0, benchmark_width_ * benchmark_height_); - - uint32 h0 = - HammingDistance_C(src_a, src_b, benchmark_width_ * benchmark_height_); - EXPECT_EQ(benchmark_width_ * benchmark_height_ * 8ULL, h0); - - uint32 h2 = ComputeHammingDistance(src_a, src_b, - benchmark_width_ * benchmark_height_); - EXPECT_EQ(benchmark_width_ * benchmark_height_ * 8ULL, h2); - - for (int i = 0; i < benchmark_iterations_; ++i) { -#if defined(HAS_HAMMINGDISTANCE_NEON) - h1 = HammingDistance_NEON(src_a, src_b, - benchmark_width_ * benchmark_height_); -#elif defined(HAS_HAMMINGDISTANCE_AVX2) - int has_avx2 = TestCpuFlag(kCpuHasAVX2); - if (has_avx2) { - h1 = HammingDistance_AVX2(src_a, src_b, - benchmark_width_ * benchmark_height_); - } else { - int has_ssse3 = TestCpuFlag(kCpuHasSSSE3); - if (has_ssse3) { - h1 = HammingDistance_SSSE3(src_a, src_b, - benchmark_width_ * benchmark_height_); - } else { - h1 = HammingDistance_X86(src_a, src_b, - benchmark_width_ * benchmark_height_); - } - } -#elif defined(HAS_HAMMINGDISTANCE_X86) - h1 = - HammingDistance_X86(src_a, src_b, benchmark_width_ * benchmark_height_); -#else - h1 = HammingDistance_C(src_a, src_b, benchmark_width_ * benchmark_height_); -#endif - } - // A large count will cause the low level to potentially overflow so the - // result can not be expected to be correct. - // TODO(fbarchard): Consider expecting the low 16 bits to match. - if ((benchmark_width_ * benchmark_height_) <= kMaxOptCount) { - EXPECT_EQ(h0, h1); - } else if (h0 != h1) { - printf( - "warning - HammingDistance_Opt does not match HammingDistance_C: " - "HammingDistance_Opt %u vs HammingDistance_C %u\n", - h1, h0); - } - - free_aligned_buffer_page_end(src_a); - free_aligned_buffer_page_end(src_b); -} - TEST_F(LibYUVCompareTest, BenchmarkSumSquareError_Opt) { const int kMaxWidth = 4096 * 3; align_buffer_page_end(src_a, kMaxWidth);