mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
Port HammingDistance to SSSE3
Bug: libyuv:701 Test: BenchmarkHammingDistance_Opt Change-Id: Ibdd5d382677ebef4f82a62e0d5c3b88614a3b6e4 Reviewed-on: https://chromium-review.googlesource.com/696290 Commit-Queue: Frank Barchard <fbarchard@google.com> Reviewed-by: Cheng Wang <wangcheng@google.com>
This commit is contained in:
parent
bde789b176
commit
fecd741794
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: http://code.google.com/p/libyuv/
|
URL: http://code.google.com/p/libyuv/
|
||||||
Version: 1671
|
Version: 1672
|
||||||
License: BSD
|
License: BSD
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
|
|
||||||
|
|||||||
@ -70,7 +70,13 @@ extern "C" {
|
|||||||
#define HAS_SUMSQUAREERROR_AVX2
|
#define HAS_SUMSQUAREERROR_AVX2
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// The following are available for VGCC and clangcl 64 bit:
|
// The following are available for GCC and clangcl 64 bit:
|
||||||
|
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||||
|
(defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
|
||||||
|
#define HAS_HAMMINGDISTANCE_SSSE3
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// The following are available for GCC and clangcl 64 bit:
|
||||||
#if !defined(LIBYUV_DISABLE_X86) && defined(CLANG_HAS_AVX2) && \
|
#if !defined(LIBYUV_DISABLE_X86) && defined(CLANG_HAS_AVX2) && \
|
||||||
(defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
|
(defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
|
||||||
#define HAS_HAMMINGDISTANCE_AVX2
|
#define HAS_HAMMINGDISTANCE_AVX2
|
||||||
@ -93,7 +99,7 @@ extern "C" {
|
|||||||
|
|
||||||
uint32 HammingDistance_C(const uint8* src_a, const uint8* src_b, int count);
|
uint32 HammingDistance_C(const uint8* src_a, const uint8* src_b, int count);
|
||||||
uint32 HammingDistance_X86(const uint8* src_a, const uint8* src_b, int count);
|
uint32 HammingDistance_X86(const uint8* src_a, const uint8* src_b, int count);
|
||||||
uint32 HammingDistance_SSE2(const uint8* src_a, const uint8* src_b, int count);
|
uint32 HammingDistance_SSSE3(const uint8* src_a, const uint8* src_b, int count);
|
||||||
uint32 HammingDistance_AVX2(const uint8* src_a, const uint8* src_b, int count);
|
uint32 HammingDistance_AVX2(const uint8* src_a, const uint8* src_b, int count);
|
||||||
uint32 HammingDistance_NEON(const uint8* src_a, const uint8* src_b, int count);
|
uint32 HammingDistance_NEON(const uint8* src_a, const uint8* src_b, int count);
|
||||||
uint32 HammingDistance_MSA(const uint8* src_a, const uint8* src_b, int count);
|
uint32 HammingDistance_MSA(const uint8* src_a, const uint8* src_b, int count);
|
||||||
|
|||||||
@ -11,6 +11,6 @@
|
|||||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||||
#define INCLUDE_LIBYUV_VERSION_H_
|
#define INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
#define LIBYUV_VERSION 1671
|
#define LIBYUV_VERSION 1672
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|||||||
@ -131,6 +131,11 @@ uint64 ComputeHammingDistance(const uint8* src_a,
|
|||||||
HammingDistance = HammingDistance_X86;
|
HammingDistance = HammingDistance_X86;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#if defined(HAS_HAMMINGDISTANCE_SSSE3)
|
||||||
|
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||||
|
HammingDistance = HammingDistance_SSSE3;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#if defined(HAS_HAMMINGDISTANCE_AVX2)
|
#if defined(HAS_HAMMINGDISTANCE_AVX2)
|
||||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||||
HammingDistance = HammingDistance_AVX2;
|
HammingDistance = HammingDistance_AVX2;
|
||||||
|
|||||||
@ -35,15 +35,74 @@ uint32 HammingDistance_X86(const uint8* src_a, const uint8* src_b, int count) {
|
|||||||
return diff;
|
return diff;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAS_HAMMINGDISTANCE_AVX2
|
static vec8 kNibbleMask = {15, 15, 15, 15, 15, 15, 15, 15,
|
||||||
static uint32 kNibbleMask = 0x0f0f0f0fu;
|
15, 15, 15, 15, 15, 15, 15, 15};
|
||||||
static vec8 kBitCount = {0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4};
|
static vec8 kBitCount = {0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4};
|
||||||
|
|
||||||
|
uint32 HammingDistance_SSSE3(const uint8* src_a,
|
||||||
|
const uint8* src_b,
|
||||||
|
int count) {
|
||||||
|
uint32 diff = 0u;
|
||||||
|
|
||||||
|
asm volatile(
|
||||||
|
"movdqa %4,%%xmm2 \n"
|
||||||
|
"movdqa %5,%%xmm3 \n"
|
||||||
|
"pxor %%xmm0,%%xmm0 \n"
|
||||||
|
"pxor %%xmm1,%%xmm1 \n"
|
||||||
|
"sub %0,%1 \n"
|
||||||
|
|
||||||
|
LABELALIGN
|
||||||
|
"1: \n"
|
||||||
|
"movdqa (%0),%%xmm4 \n"
|
||||||
|
"movdqa 0x10(%0), %%xmm5 \n"
|
||||||
|
"pxor (%0,%1), %%xmm4 \n"
|
||||||
|
"movdqa %%xmm4,%%xmm6 \n"
|
||||||
|
"pand %%xmm2,%%xmm6 \n"
|
||||||
|
"psrlw $0x4,%%xmm4 \n"
|
||||||
|
"movdqa %%xmm3,%%xmm7 \n"
|
||||||
|
"pshufb %%xmm6,%%xmm7 \n"
|
||||||
|
"pand %%xmm2,%%xmm4 \n"
|
||||||
|
"movdqa %%xmm3,%%xmm6 \n"
|
||||||
|
"pshufb %%xmm4,%%xmm6 \n"
|
||||||
|
"paddb %%xmm7,%%xmm6 \n"
|
||||||
|
"pxor 0x10(%0,%1),%%xmm5 \n"
|
||||||
|
"add $0x20,%0 \n"
|
||||||
|
"movdqa %%xmm5,%%xmm4 \n"
|
||||||
|
"pand %%xmm2,%%xmm5 \n"
|
||||||
|
"psrlw $0x4,%%xmm4 \n"
|
||||||
|
"movdqa %%xmm3,%%xmm7 \n"
|
||||||
|
"pshufb %%xmm5,%%xmm7 \n"
|
||||||
|
"pand %%xmm2,%%xmm4 \n"
|
||||||
|
"movdqa %%xmm3,%%xmm5 \n"
|
||||||
|
"pshufb %%xmm4,%%xmm5 \n"
|
||||||
|
"paddb %%xmm7,%%xmm5 \n"
|
||||||
|
"paddb %%xmm5,%%xmm6 \n"
|
||||||
|
"psadbw %%xmm1,%%xmm6 \n"
|
||||||
|
"paddd %%xmm6,%%xmm0 \n"
|
||||||
|
"sub $0x20,%2 \n"
|
||||||
|
"jg 1b \n"
|
||||||
|
|
||||||
|
"pshufd $0xaa,%%xmm0,%%xmm1 \n"
|
||||||
|
"paddd %%xmm1,%%xmm0 \n"
|
||||||
|
"vmovd %%xmm0, %3 \n"
|
||||||
|
: "+r"(src_a), // %0
|
||||||
|
"+r"(src_b), // %1
|
||||||
|
"+r"(count), // %2
|
||||||
|
"=r"(diff) // %3
|
||||||
|
: "m"(kNibbleMask), // %4
|
||||||
|
"m"(kBitCount) // %5
|
||||||
|
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
|
||||||
|
"xmm7");
|
||||||
|
|
||||||
|
return diff;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef HAS_HAMMINGDISTANCE_AVX2
|
||||||
uint32 HammingDistance_AVX2(const uint8* src_a, const uint8* src_b, int count) {
|
uint32 HammingDistance_AVX2(const uint8* src_a, const uint8* src_b, int count) {
|
||||||
uint32 diff = 0u;
|
uint32 diff = 0u;
|
||||||
|
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"vbroadcastss %4,%%ymm2 \n"
|
"vbroadcastf128 %4,%%ymm2 \n"
|
||||||
"vbroadcastf128 %5,%%ymm3 \n"
|
"vbroadcastf128 %5,%%ymm3 \n"
|
||||||
"vpxor %%ymm0,%%ymm0,%%ymm0 \n"
|
"vpxor %%ymm0,%%ymm0,%%ymm0 \n"
|
||||||
"vpxor %%ymm1,%%ymm1,%%ymm1 \n"
|
"vpxor %%ymm1,%%ymm1,%%ymm1 \n"
|
||||||
@ -83,7 +142,7 @@ uint32 HammingDistance_AVX2(const uint8* src_a, const uint8* src_b, int count) {
|
|||||||
: "+r"(src_a), // %0
|
: "+r"(src_a), // %0
|
||||||
"+r"(src_b), // %1
|
"+r"(src_b), // %1
|
||||||
"+r"(count), // %2
|
"+r"(count), // %2
|
||||||
"=g"(diff) // %3
|
"=r"(diff) // %3
|
||||||
: "m"(kNibbleMask), // %4
|
: "m"(kNibbleMask), // %4
|
||||||
"m"(kBitCount) // %5
|
"m"(kBitCount) // %5
|
||||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
|
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
|
||||||
|
|||||||
@ -233,9 +233,14 @@ TEST_F(LibYUVCompareTest, BenchmarkHammingDistance_Opt) {
|
|||||||
int has_avx2 = TestCpuFlag(kCpuHasAVX2);
|
int has_avx2 = TestCpuFlag(kCpuHasAVX2);
|
||||||
if (has_avx2) {
|
if (has_avx2) {
|
||||||
h1 = HammingDistance_AVX2(src_a, src_b, kMaxWidth);
|
h1 = HammingDistance_AVX2(src_a, src_b, kMaxWidth);
|
||||||
|
} else {
|
||||||
|
int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
|
||||||
|
if (has_ssse3) {
|
||||||
|
h1 = HammingDistance_SSSE3(src_a, src_b, kMaxWidth);
|
||||||
} else {
|
} else {
|
||||||
h1 = HammingDistance_X86(src_a, src_b, kMaxWidth);
|
h1 = HammingDistance_X86(src_a, src_b, kMaxWidth);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
#elif defined(HAS_HAMMINGDISTANCE_X86)
|
#elif defined(HAS_HAMMINGDISTANCE_X86)
|
||||||
h1 = HammingDistance_X86(src_a, src_b, kMaxWidth);
|
h1 = HammingDistance_X86(src_a, src_b, kMaxWidth);
|
||||||
#else
|
#else
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user