mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
AVX2 hash using vex128 as first step.
BUG=none TEST=BenchmarkDjb2_Opt R=ryanpetrie@google.com Review URL: https://webrtc-codereview.appspot.com/2219004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@792 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
a1ab194545
commit
823548cb3b
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: http://code.google.com/p/libyuv/
|
URL: http://code.google.com/p/libyuv/
|
||||||
Version: 791
|
Version: 792
|
||||||
License: BSD
|
License: BSD
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
|
|
||||||
|
|||||||
@ -11,6 +11,6 @@
|
|||||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||||
#define INCLUDE_LIBYUV_VERSION_H_
|
#define INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
#define LIBYUV_VERSION 791
|
#define LIBYUV_VERSION 792
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||||
|
|||||||
@ -10,6 +10,8 @@
|
|||||||
|
|
||||||
#include "libyuv/compare.h"
|
#include "libyuv/compare.h"
|
||||||
|
|
||||||
|
#include <stdio.h> // printf
|
||||||
|
|
||||||
#include <float.h>
|
#include <float.h>
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
#ifdef _OPENMP
|
#ifdef _OPENMP
|
||||||
@ -34,9 +36,13 @@ uint32 HashDjb2_C(const uint8* src, int count, uint32 seed);
|
|||||||
(defined(_M_IX86) || \
|
(defined(_M_IX86) || \
|
||||||
(defined(__x86_64__) || (defined(__i386__) && !defined(__pic__))))
|
(defined(__x86_64__) || (defined(__i386__) && !defined(__pic__))))
|
||||||
#define HAS_HASHDJB2_SSE41
|
#define HAS_HASHDJB2_SSE41
|
||||||
|
|
||||||
uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed);
|
uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed);
|
||||||
|
|
||||||
|
#if _MSC_VER >= 1700
|
||||||
|
#define HAS_HASHDJB2_AVX2
|
||||||
|
uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed);
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif // HAS_HASHDJB2_SSE41
|
#endif // HAS_HASHDJB2_SSE41
|
||||||
|
|
||||||
// hash seed of 5381 recommended.
|
// hash seed of 5381 recommended.
|
||||||
@ -48,6 +54,11 @@ uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) {
|
|||||||
HashDjb2_SSE = HashDjb2_SSE41;
|
HashDjb2_SSE = HashDjb2_SSE41;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#if defined(HAS_HASHDJB2_AVX2)
|
||||||
|
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||||
|
HashDjb2_SSE = HashDjb2_AVX2;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
const int kBlockSize = 1 << 15; // 32768;
|
const int kBlockSize = 1 << 15; // 32768;
|
||||||
while (count >= static_cast<uint64>(kBlockSize)) {
|
while (count >= static_cast<uint64>(kBlockSize)) {
|
||||||
|
|||||||
@ -184,6 +184,46 @@ uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
|
|||||||
ret
|
ret
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Visual C 2012 required for AVX2.
|
||||||
|
#if _MSC_VER >= 1700
|
||||||
|
__declspec(naked) __declspec(align(16))
|
||||||
|
uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) {
|
||||||
|
__asm {
|
||||||
|
mov eax, [esp + 4] // src
|
||||||
|
mov ecx, [esp + 8] // count
|
||||||
|
movd xmm0, [esp + 12] // seed
|
||||||
|
movdqa xmm6, kHash16x33
|
||||||
|
|
||||||
|
align 16
|
||||||
|
wloop:
|
||||||
|
vpmovzxbd xmm3, dword ptr [eax] // src[0-3]
|
||||||
|
pmulld xmm0, xmm6 // hash *= 33 ^ 16
|
||||||
|
vpmovzxbd xmm4, dword ptr [eax + 4] // src[4-7]
|
||||||
|
pmulld xmm3, kHashMul0
|
||||||
|
vpmovzxbd xmm2, dword ptr [eax + 8] // src[8-11]
|
||||||
|
pmulld xmm4, kHashMul1
|
||||||
|
vpmovzxbd xmm1, dword ptr [eax + 12] // src[12-15]
|
||||||
|
pmulld xmm2, kHashMul2
|
||||||
|
lea eax, [eax + 16]
|
||||||
|
pmulld xmm1, kHashMul3
|
||||||
|
paddd xmm3, xmm4 // add 16 results
|
||||||
|
paddd xmm1, xmm2
|
||||||
|
sub ecx, 16
|
||||||
|
paddd xmm1, xmm3
|
||||||
|
pshufd xmm2, xmm1, 0x0e // upper 2 dwords
|
||||||
|
paddd xmm1, xmm2
|
||||||
|
pshufd xmm2, xmm1, 0x01
|
||||||
|
paddd xmm1, xmm2
|
||||||
|
paddd xmm0, xmm1
|
||||||
|
jg wloop
|
||||||
|
|
||||||
|
movd eax, xmm0 // return hash
|
||||||
|
ret
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif // _MSC_VER >= 1700
|
||||||
|
|
||||||
#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
|
#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user