mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
AVX2 hash using vex128 as first step.
BUG=none TEST=BenchmarkDjb2_Opt R=ryanpetrie@google.com Review URL: https://webrtc-codereview.appspot.com/2219004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@792 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
a1ab194545
commit
823548cb3b
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 791
|
||||
Version: 792
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 791
|
||||
#define LIBYUV_VERSION 792
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||
|
||||
@ -10,6 +10,8 @@
|
||||
|
||||
#include "libyuv/compare.h"
|
||||
|
||||
#include <stdio.h> // printf
|
||||
|
||||
#include <float.h>
|
||||
#include <math.h>
|
||||
#ifdef _OPENMP
|
||||
@ -34,9 +36,13 @@ uint32 HashDjb2_C(const uint8* src, int count, uint32 seed);
|
||||
(defined(_M_IX86) || \
|
||||
(defined(__x86_64__) || (defined(__i386__) && !defined(__pic__))))
|
||||
#define HAS_HASHDJB2_SSE41
|
||||
|
||||
uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed);
|
||||
|
||||
#if _MSC_VER >= 1700
|
||||
#define HAS_HASHDJB2_AVX2
|
||||
uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed);
|
||||
#endif
|
||||
|
||||
#endif // HAS_HASHDJB2_SSE41
|
||||
|
||||
// hash seed of 5381 recommended.
|
||||
@ -48,6 +54,11 @@ uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) {
|
||||
HashDjb2_SSE = HashDjb2_SSE41;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_HASHDJB2_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
HashDjb2_SSE = HashDjb2_AVX2;
|
||||
}
|
||||
#endif
|
||||
|
||||
const int kBlockSize = 1 << 15; // 32768;
|
||||
while (count >= static_cast<uint64>(kBlockSize)) {
|
||||
|
||||
@ -184,6 +184,46 @@ uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
// Visual C 2012 required for AVX2.
|
||||
#if _MSC_VER >= 1700
|
||||
__declspec(naked) __declspec(align(16))
|
||||
uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) {
|
||||
__asm {
|
||||
mov eax, [esp + 4] // src
|
||||
mov ecx, [esp + 8] // count
|
||||
movd xmm0, [esp + 12] // seed
|
||||
movdqa xmm6, kHash16x33
|
||||
|
||||
align 16
|
||||
wloop:
|
||||
vpmovzxbd xmm3, dword ptr [eax] // src[0-3]
|
||||
pmulld xmm0, xmm6 // hash *= 33 ^ 16
|
||||
vpmovzxbd xmm4, dword ptr [eax + 4] // src[4-7]
|
||||
pmulld xmm3, kHashMul0
|
||||
vpmovzxbd xmm2, dword ptr [eax + 8] // src[8-11]
|
||||
pmulld xmm4, kHashMul1
|
||||
vpmovzxbd xmm1, dword ptr [eax + 12] // src[12-15]
|
||||
pmulld xmm2, kHashMul2
|
||||
lea eax, [eax + 16]
|
||||
pmulld xmm1, kHashMul3
|
||||
paddd xmm3, xmm4 // add 16 results
|
||||
paddd xmm1, xmm2
|
||||
sub ecx, 16
|
||||
paddd xmm1, xmm3
|
||||
pshufd xmm2, xmm1, 0x0e // upper 2 dwords
|
||||
paddd xmm1, xmm2
|
||||
pshufd xmm2, xmm1, 0x01
|
||||
paddd xmm1, xmm2
|
||||
paddd xmm0, xmm1
|
||||
jg wloop
|
||||
|
||||
movd eax, xmm0 // return hash
|
||||
ret
|
||||
}
|
||||
}
|
||||
#endif // _MSC_VER >= 1700
|
||||
|
||||
#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user