mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2026-01-01 03:12:16 +08:00
Revert "ComputeHammingDistance reduce SIMD loop to 1 call when possible."
This reverts commit ec75df5894845b8d6b1341885a78db1de83decd8. Reason for revert: <INSERT REASONING HERE> Original change's description: > ComputeHammingDistance reduce SIMD loop to 1 call when possible. > > 32 bit x86 has high overhead due to -fpic. So this reduces the > number of calls by 1. > > TBR=kjellander@chromium.org > Bug: libyuv:701 > Test: BenchmarkHammingDistance > Change-Id: I7f557ef047920db65eab362a5f93abbd274ca051 > Reviewed-on: https://chromium-review.googlesource.com/701755 > Reviewed-by: Frank Barchard <fbarchard@google.com> > Reviewed-by: Cheng Wang <wangcheng@google.com> TBR=rrwinterton@gmail.com,fbarchard@google.com,wangcheng@google.com Change-Id: Ia61e8558a8f083c14be5f51e0e141550b6f2b5c1 No-Presubmit: true No-Tree-Checks: true No-Try: true Bug: libyuv:701 Reviewed-on: https://chromium-review.googlesource.com/707823 Reviewed-by: Frank Barchard <fbarchard@google.com> Commit-Queue: Frank Barchard <fbarchard@google.com>
This commit is contained in:
parent
ec75df5894
commit
60f433fbd9
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 1674
|
||||
Version: 1673
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 1674
|
||||
#define LIBYUV_VERSION 1673
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
@ -114,9 +114,10 @@ LIBYUV_API
|
||||
uint64 ComputeHammingDistance(const uint8* src_a,
|
||||
const uint8* src_b,
|
||||
int count) {
|
||||
const int kBlockSize = (65536 - 64); // Max count that SIMD wont overflow
|
||||
const int kBlockSize = 65536;
|
||||
const int kSimdSize = 64;
|
||||
int remainder;
|
||||
// SIMD for multiple of 64, and C for remainder
|
||||
int remainder = count & (kBlockSize - 1) & ~(kSimdSize - 1);
|
||||
uint64 diff = 0;
|
||||
int i;
|
||||
uint32 (*HammingDistance)(const uint8* src_a, const uint8* src_b, int count) =
|
||||
@ -152,14 +153,16 @@ uint64 ComputeHammingDistance(const uint8* src_a,
|
||||
for (i = 0; i < (count - (kBlockSize - 1)); i += kBlockSize) {
|
||||
diff += HammingDistance(src_a + i, src_b + i, kBlockSize);
|
||||
}
|
||||
remainder = (count - i) & ~(kSimdSize - 1);
|
||||
src_a += count & ~(kBlockSize - 1);
|
||||
src_b += count & ~(kBlockSize - 1);
|
||||
if (remainder) {
|
||||
diff += HammingDistance(src_a + i, src_b + i, remainder);
|
||||
i += remainder;
|
||||
diff += HammingDistance(src_a, src_b, remainder);
|
||||
src_a += remainder;
|
||||
src_b += remainder;
|
||||
}
|
||||
remainder = (count - i);
|
||||
remainder = count & (kSimdSize - 1);
|
||||
if (remainder) {
|
||||
diff += HammingDistance_C(src_a + i, src_b + i, remainder);
|
||||
diff += HammingDistance_C(src_a, src_b, remainder);
|
||||
}
|
||||
return diff;
|
||||
}
|
||||
|
||||
@ -333,67 +333,6 @@ TEST_F(LibYUVCompareTest, TestHammingDistance) {
|
||||
free_aligned_buffer_page_end(src_b);
|
||||
}
|
||||
|
||||
// Tests low levels match reference C for specified size.
|
||||
// The opt implementations have size limitations
|
||||
|
||||
static const int kMaxOptCount = 65536 - 64;
|
||||
TEST_F(LibYUVCompareTest, TestHammingDistance_Opt) {
|
||||
uint32 h1 = 0;
|
||||
align_buffer_page_end(src_a, benchmark_width_ * benchmark_height_);
|
||||
align_buffer_page_end(src_b, benchmark_width_ * benchmark_height_);
|
||||
memset(src_a, 255u, benchmark_width_ * benchmark_height_);
|
||||
memset(src_b, 0, benchmark_width_ * benchmark_height_);
|
||||
|
||||
uint32 h0 =
|
||||
HammingDistance_C(src_a, src_b, benchmark_width_ * benchmark_height_);
|
||||
EXPECT_EQ(benchmark_width_ * benchmark_height_ * 8ULL, h0);
|
||||
|
||||
uint32 h2 = ComputeHammingDistance(src_a, src_b,
|
||||
benchmark_width_ * benchmark_height_);
|
||||
EXPECT_EQ(benchmark_width_ * benchmark_height_ * 8ULL, h2);
|
||||
|
||||
for (int i = 0; i < benchmark_iterations_; ++i) {
|
||||
#if defined(HAS_HAMMINGDISTANCE_NEON)
|
||||
h1 = HammingDistance_NEON(src_a, src_b,
|
||||
benchmark_width_ * benchmark_height_);
|
||||
#elif defined(HAS_HAMMINGDISTANCE_AVX2)
|
||||
int has_avx2 = TestCpuFlag(kCpuHasAVX2);
|
||||
if (has_avx2) {
|
||||
h1 = HammingDistance_AVX2(src_a, src_b,
|
||||
benchmark_width_ * benchmark_height_);
|
||||
} else {
|
||||
int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
|
||||
if (has_ssse3) {
|
||||
h1 = HammingDistance_SSSE3(src_a, src_b,
|
||||
benchmark_width_ * benchmark_height_);
|
||||
} else {
|
||||
h1 = HammingDistance_X86(src_a, src_b,
|
||||
benchmark_width_ * benchmark_height_);
|
||||
}
|
||||
}
|
||||
#elif defined(HAS_HAMMINGDISTANCE_X86)
|
||||
h1 =
|
||||
HammingDistance_X86(src_a, src_b, benchmark_width_ * benchmark_height_);
|
||||
#else
|
||||
h1 = HammingDistance_C(src_a, src_b, benchmark_width_ * benchmark_height_);
|
||||
#endif
|
||||
}
|
||||
// A large count will cause the low level to potentially overflow so the
|
||||
// result can not be expected to be correct.
|
||||
// TODO(fbarchard): Consider expecting the low 16 bits to match.
|
||||
if ((benchmark_width_ * benchmark_height_) <= kMaxOptCount) {
|
||||
EXPECT_EQ(h0, h1);
|
||||
} else if (h0 != h1) {
|
||||
printf(
|
||||
"warning - HammingDistance_Opt does not match HammingDistance_C: "
|
||||
"HammingDistance_Opt %u vs HammingDistance_C %u\n",
|
||||
h1, h0);
|
||||
}
|
||||
|
||||
free_aligned_buffer_page_end(src_a);
|
||||
free_aligned_buffer_page_end(src_b);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVCompareTest, BenchmarkSumSquareError_Opt) {
|
||||
const int kMaxWidth = 4096 * 3;
|
||||
align_buffer_page_end(src_a, kMaxWidth);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user