mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-08 01:36:47 +08:00
Use vmovd to avoid switch to sse mode
BUG=none TEST=c:\intelsde\sde -hsw -- out\release\libyuv_unittest.exe --gtest_filter=*Psnr* Review URL: https://webrtc-codereview.appspot.com/1097013 git-svn-id: http://libyuv.googlecode.com/svn/trunk@573 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
f3ad618d40
commit
408e574366
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 572
|
||||
Version: 573
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 572
|
||||
#define LIBYUV_VERSION 573
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||
|
||||
@ -102,7 +102,9 @@ uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b,
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SUMSQUAREERROR_AVX2)
|
||||
bool clear = false;
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
clear = true;
|
||||
// Note only used for multiples of 32 so count is not checked.
|
||||
SumSquareError = SumSquareError_AVX2;
|
||||
}
|
||||
@ -130,6 +132,12 @@ uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b,
|
||||
if (remainder) {
|
||||
sse += SumSquareError_C(src_a, src_b, remainder);
|
||||
}
|
||||
|
||||
#if defined(HAS_SUMSQUAREERROR_AVX2)
|
||||
if (clear) {
|
||||
__asm vzeroupper;
|
||||
}
|
||||
#endif
|
||||
return sse;
|
||||
}
|
||||
|
||||
@ -157,7 +165,9 @@ uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a,
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SUMSQUAREERROR_AVX2)
|
||||
bool clear = false;
|
||||
if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 32)) {
|
||||
clear = true;
|
||||
SumSquareError = SumSquareError_AVX2;
|
||||
}
|
||||
#endif
|
||||
@ -168,6 +178,11 @@ uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a,
|
||||
src_b += stride_b;
|
||||
}
|
||||
|
||||
#if defined(HAS_SUMSQUAREERROR_AVX2)
|
||||
if (clear) {
|
||||
__asm vzeroupper;
|
||||
}
|
||||
#endif
|
||||
return sse;
|
||||
}
|
||||
|
||||
|
||||
@ -47,9 +47,9 @@ uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
|
||||
paddd xmm0, xmm2
|
||||
jg wloop
|
||||
|
||||
pshufd xmm1, xmm0, 0EEh
|
||||
pshufd xmm1, xmm0, 0xee
|
||||
paddd xmm0, xmm1
|
||||
pshufd xmm1, xmm0, 01h
|
||||
pshufd xmm1, xmm0, 0x01
|
||||
paddd xmm0, xmm1
|
||||
movd eax, xmm0
|
||||
ret
|
||||
@ -67,7 +67,7 @@ uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) {
|
||||
mov edx, [esp + 8] // src_b
|
||||
mov ecx, [esp + 12] // count
|
||||
vpxor ymm0, ymm0, ymm0 // sum
|
||||
vpxor ymm5, ymm5, ymm5 // for unpack.
|
||||
vpxor ymm5, ymm5, ymm5 // constant 0 for unpck
|
||||
sub edx, eax
|
||||
|
||||
align 16
|
||||
@ -92,9 +92,8 @@ uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) {
|
||||
vpshufd ymm1, ymm0, 0x01 // 1 + 0 both lanes.
|
||||
vpaddd ymm0, ymm0, ymm1
|
||||
vpermq ymm1, ymm0, 0x02 // high + low lane.
|
||||
vpaddd ymm4, ymm0, ymm1
|
||||
vzeroupper // TODO(fbarchard): Remove.
|
||||
movd eax, xmm4
|
||||
vpaddd ymm0, ymm0, ymm1
|
||||
vmovd eax, xmm0
|
||||
ret
|
||||
}
|
||||
}
|
||||
@ -173,14 +172,14 @@ uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
|
||||
sub ecx, 16
|
||||
paddd xmm1, xmm3
|
||||
|
||||
pshufd xmm2, xmm1, 14 // upper 2 dwords
|
||||
pshufd xmm2, xmm1, 0x0e // upper 2 dwords
|
||||
paddd xmm1, xmm2
|
||||
pshufd xmm2, xmm1, 1
|
||||
pshufd xmm2, xmm1, 0x01
|
||||
paddd xmm1, xmm2
|
||||
paddd xmm0, xmm1
|
||||
jg wloop
|
||||
|
||||
movd eax, xmm0 // return hash
|
||||
movd eax, xmm0 // return hash
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user