mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2026-01-01 03:12:16 +08:00
Hamming code difference of 2 memory blocks
BUG=libyuv:701 TEST=built and disassembled for aarch64 R=kjellander@chromium.org Change-Id: I7712b1c7934e5dfb55fda1fa7c8405c32d6964ce Reviewed-on: https://chromium-review.googlesource.com/495327 Reviewed-by: Henrik Kjellander <kjellander@chromium.org> Reviewed-by: Cheng Wang <wangcheng@google.com>
This commit is contained in:
parent
945ea1b746
commit
2136e349da
5
BUILD.gn
5
BUILD.gn
@ -33,10 +33,10 @@ group("default") {
|
||||
if (libyuv_include_tests) {
|
||||
deps += [
|
||||
":compare",
|
||||
":yuvconvert",
|
||||
":cpuid",
|
||||
":libyuv_unittest",
|
||||
":psnr",
|
||||
":yuvconvert",
|
||||
]
|
||||
}
|
||||
}
|
||||
@ -158,7 +158,8 @@ static_library("libyuv_internal") {
|
||||
}
|
||||
|
||||
# To enable AVX2 or other cpu optimization, pass flag here
|
||||
# cflags = [ "-mavx2" ]
|
||||
# cflags = [ "-mavx2" ]
|
||||
# cflags = [ "-mpopcnt" ]
|
||||
}
|
||||
|
||||
if (libyuv_use_neon) {
|
||||
|
||||
@ -67,6 +67,10 @@ extern "C" {
|
||||
#define HAS_SUMSQUAREERROR_NEON
|
||||
#endif
|
||||
|
||||
uint32 HammingDistance_C(const uint8* src_a, const uint8* src_b, int count);
|
||||
uint32 HammingDistance_X86(const uint8* src_a, const uint8* src_b, int count);
|
||||
uint32 HammingDistance_NEON(const uint8* src_a, const uint8* src_b, int count);
|
||||
|
||||
uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count);
|
||||
uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count);
|
||||
uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count);
|
||||
|
||||
@ -17,6 +17,41 @@ namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if ORIGINAL_C
|
||||
uint32 HammingDistance_C(const uint8* src_a, const uint8* src_b, int count) {
|
||||
volatile uint32 diff = 0u;
|
||||
|
||||
int i;
|
||||
for (i = 0; i < count; ++i) {
|
||||
int x = src_a[i] ^ src_b[i];
|
||||
if (x & 1) ++diff;
|
||||
if (x & 2) ++diff;
|
||||
if (x & 4) ++diff;
|
||||
if (x & 8) ++diff;
|
||||
if (x & 16) ++diff;
|
||||
if (x & 32) ++diff;
|
||||
if (x & 64) ++diff;
|
||||
if (x & 128) ++diff;
|
||||
}
|
||||
return diff;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Hakmem method for hamming distance.
|
||||
uint32 HammingDistance_C(const uint8* src_a, const uint8* src_b, int count) {
|
||||
volatile uint32 diff = 0u;
|
||||
|
||||
int i;
|
||||
for (i = 0; i < count - 3; i += 4) {
|
||||
uint32 x = *((uint32*)src_a) ^ *((uint32*)src_b);
|
||||
src_a += 4;
|
||||
src_b += 4;
|
||||
uint32 u = x - ((x >> 1) & 033333333333) - ((x >> 2) & 011111111111);
|
||||
diff += ((u + (u >> 3)) & 030707070707) % 63;
|
||||
}
|
||||
return diff;
|
||||
}
|
||||
|
||||
uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count) {
|
||||
uint32 sse = 0u;
|
||||
int i;
|
||||
|
||||
@ -22,6 +22,19 @@ extern "C" {
|
||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||
(defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
|
||||
|
||||
uint32 HammingDistance_X86(const uint8* src_a, const uint8* src_b, int count) {
|
||||
volatile uint32 diff = 0u;
|
||||
|
||||
int i;
|
||||
for (i = 0; i < count - 7; i += 8) {
|
||||
uint64 x = *((uint64*)src_a) ^ *((uint64*)src_b);
|
||||
src_a += 8;
|
||||
src_b += 8;
|
||||
diff += __builtin_popcountll(x);
|
||||
}
|
||||
return diff;
|
||||
}
|
||||
|
||||
uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
|
||||
uint32 sse;
|
||||
asm volatile (
|
||||
|
||||
@ -20,6 +20,67 @@ extern "C" {
|
||||
|
||||
#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
|
||||
|
||||
#if 0
|
||||
uint32 HammingDistance_NEON(const uint8* src_a, const uint8* src_b, int count) {
|
||||
volatile uint32 diff;
|
||||
asm volatile (
|
||||
"eor v4.16b, v4.16b, v4.16b \n"
|
||||
"eor v5.16b, v5.16b, v5.16b \n"
|
||||
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"ld1 {v0.16b}, [%0], #16 \n"
|
||||
MEMACCESS(1)
|
||||
"ld1 {v1.16b}, [%1], #16 \n"
|
||||
"subs %w2, %w2, #16 \n"
|
||||
"eor v2.16b, v0.16b, v1.16b \n"
|
||||
"cnt v3.16b, v2.16b \n"
|
||||
"addv b4, v3.16b \n"
|
||||
"add d5, d5, d4 \n"
|
||||
"b.gt 1b \n"
|
||||
|
||||
"fmov %w3, s5 \n"
|
||||
: "+r"(src_a),
|
||||
"+r"(src_b),
|
||||
"+r"(count),
|
||||
"=r"(diff)
|
||||
:
|
||||
: "cc", "v0", "v1", "v2", "v3", "v4", "v5");
|
||||
return diff;
|
||||
}
|
||||
#endif
|
||||
|
||||
uint32 HammingDistance_NEON(const uint8* src_a, const uint8* src_b, int count) {
|
||||
volatile uint32 diff;
|
||||
asm volatile (
|
||||
"movi d6, #0 \n"
|
||||
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"ld1 {v0.16b, v1.16b}, [%0], #32 \n"
|
||||
MEMACCESS(1)
|
||||
"ld1 {v2.16b, v3.16b}, [%1], #32 \n"
|
||||
"subs %w2, %w2, #32 \n"
|
||||
"eor v0.16b, v0.16b, v2.16b \n"
|
||||
"eor v1.16b, v1.16b, v3.16b \n"
|
||||
"cnt v0.16b, v0.16b \n"
|
||||
"cnt v1.16b, v1.16b \n"
|
||||
"addv b4, v0.16b \n"
|
||||
"addv b5, v1.16b \n"
|
||||
"add d6, d6, d4 \n"
|
||||
"add d6, d6, d5 \n"
|
||||
"b.gt 1b \n"
|
||||
|
||||
"fmov %w3, s6 \n"
|
||||
: "+r"(src_a),
|
||||
"+r"(src_b),
|
||||
"+r"(count),
|
||||
"=r"(diff)
|
||||
:
|
||||
: "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6");
|
||||
return diff;
|
||||
}
|
||||
|
||||
uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
|
||||
volatile uint32 sse;
|
||||
asm volatile (
|
||||
|
||||
@ -15,6 +15,7 @@
|
||||
#include "../unit_test/unit_test.h"
|
||||
#include "libyuv/basic_types.h"
|
||||
#include "libyuv/compare.h"
|
||||
#include "libyuv/compare_row.h" /* For HammingDistance_C */
|
||||
#include "libyuv/cpu_id.h"
|
||||
#include "libyuv/video_common.h"
|
||||
|
||||
@ -202,6 +203,78 @@ TEST_F(LibYUVBaseTest, BenchmarkARGBDetect_Unaligned) {
|
||||
|
||||
free_aligned_buffer_page_end(src_a);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVBaseTest, BenchmarkHammingDistance_Opt) {
|
||||
const int kMaxWidth = 4096 * 3;
|
||||
align_buffer_page_end(src_a, kMaxWidth);
|
||||
align_buffer_page_end(src_b, kMaxWidth);
|
||||
memset(src_a, 0, kMaxWidth);
|
||||
memset(src_b, 0, kMaxWidth);
|
||||
|
||||
// Test known value
|
||||
memcpy(src_a, "test0123test4567", 16);
|
||||
memcpy(src_b, "tick0123tock4567", 16);
|
||||
uint32 h1 = HammingDistance_C(src_a, src_b, 16);
|
||||
EXPECT_EQ(16u, h1);
|
||||
|
||||
// Test C vs OPT on random buffer
|
||||
MemRandomize(src_a, kMaxWidth);
|
||||
MemRandomize(src_b, kMaxWidth);
|
||||
|
||||
uint32 h0 = HammingDistance_C(src_a, src_b, kMaxWidth);
|
||||
|
||||
int count =
|
||||
benchmark_iterations_ *
|
||||
((benchmark_width_ * benchmark_height_ + kMaxWidth - 1) / kMaxWidth);
|
||||
for (int i = 0; i < count; ++i) {
|
||||
#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
|
||||
h1 = HammingDistance_NEON(src_a, src_b, kMaxWidth);
|
||||
#elif !defined(LIBYUV_DISABLE_X86) && \
|
||||
(defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
|
||||
h1 = HammingDistance_X86(src_a, src_b, kMaxWidth);
|
||||
#else
|
||||
h1 = HammingDistance_C(src_a, src_b, kMaxWidth);
|
||||
#endif
|
||||
}
|
||||
|
||||
EXPECT_EQ(h0, h1);
|
||||
|
||||
free_aligned_buffer_page_end(src_a);
|
||||
free_aligned_buffer_page_end(src_b);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVBaseTest, BenchmarkHammingDistance_C) {
|
||||
const int kMaxWidth = 4096 * 3;
|
||||
align_buffer_page_end(src_a, kMaxWidth);
|
||||
align_buffer_page_end(src_b, kMaxWidth);
|
||||
memset(src_a, 0, kMaxWidth);
|
||||
memset(src_b, 0, kMaxWidth);
|
||||
|
||||
// Test known value
|
||||
memcpy(src_a, "test0123test4567", 16);
|
||||
memcpy(src_b, "tick0123tock4567", 16);
|
||||
uint32 h1 = HammingDistance_C(src_a, src_b, 16);
|
||||
EXPECT_EQ(16u, h1);
|
||||
|
||||
// Test C vs OPT on random buffer
|
||||
MemRandomize(src_a, kMaxWidth);
|
||||
MemRandomize(src_b, kMaxWidth);
|
||||
|
||||
uint32 h0 = HammingDistance_C(src_a, src_b, kMaxWidth);
|
||||
|
||||
int count =
|
||||
benchmark_iterations_ *
|
||||
((benchmark_width_ * benchmark_height_ + kMaxWidth - 1) / kMaxWidth);
|
||||
for (int i = 0; i < count; ++i) {
|
||||
h1 = HammingDistance_C(src_a, src_b, kMaxWidth);
|
||||
}
|
||||
|
||||
EXPECT_EQ(h0, h1);
|
||||
|
||||
free_aligned_buffer_page_end(src_a);
|
||||
free_aligned_buffer_page_end(src_b);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVBaseTest, BenchmarkSumSquareError_Opt) {
|
||||
const int kMaxWidth = 4096 * 3;
|
||||
align_buffer_page_end(src_a, kMaxWidth);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user