use __popcnt for visual c HammingDistance_X86

BUG=libyuv:701
TEST=HammingDistance unittest performance is comparable to x64
R=wangcheng@google.com

Change-Id: I8abe861e086e0162ba4c7ba6f1ef7d1c006cd9d4
Reviewed-on: https://chromium-review.googlesource.com/505454
Reviewed-by: Frank Barchard <fbarchard@google.com>
Commit-Queue: Frank Barchard <fbarchard@google.com>
This commit is contained in:
Frank Barchard 2017-05-12 15:57:50 -07:00
parent e0615c0e69
commit 77f6916da2
3 changed files with 19 additions and 7 deletions

View File

@ -52,10 +52,6 @@ extern "C" {
(defined(__x86_64__) || defined(__i386__) || defined(_M_IX86))
#define HAS_HASHDJB2_SSE41
#define HAS_SUMSQUAREERROR_SSE2
#endif
// The following are available for GCC:
#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
#define HAS_HAMMINGDISTANCE_X86
#endif

View File

@ -13,6 +13,10 @@
#include "libyuv/compare_row.h"
#include "libyuv/row.h"
#if defined(_MSC_VER)
#include <intrin.h> // For __popcnt
#endif
#ifdef __cplusplus
namespace libyuv {
extern "C" {
@ -21,6 +25,19 @@ extern "C" {
// This module is for 32 bit Visual C x86 and clangcl
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
uint32 HammingDistance_X86(const uint8* src_a, const uint8* src_b, int count) {
uint32 diff = 0u;
int i;
for (i = 0; i < count - 3; i += 4) {
uint32 x = *((uint32*)src_a) ^ *((uint32*)src_b);
src_a += 4;
src_b += 4;
diff += __popcnt(x);
}
return diff;
}
__declspec(naked) uint32
SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
__asm {

View File

@ -227,10 +227,9 @@ TEST_F(LibYUVBaseTest, BenchmarkHammingDistance_Opt) {
benchmark_iterations_ *
((benchmark_width_ * benchmark_height_ + kMaxWidth - 1) / kMaxWidth);
for (int i = 0; i < count; ++i) {
#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
#if defined(HAS_HAMMINGDISTANCE_NEON)
h1 = HammingDistance_NEON(src_a, src_b, kMaxWidth);
#elif !defined(LIBYUV_DISABLE_X86) && \
(defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
#elif defined(HAS_HAMMINGDISTANCE_X86)
h1 = HammingDistance_X86(src_a, src_b, kMaxWidth);
#else
h1 = HammingDistance_C(src_a, src_b, kMaxWidth);