mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-08 01:36:47 +08:00
Bump reciprocal up by 1
BUG=none TEST=none R=tpsiaki@google.com Review URL: https://webrtc-codereview.appspot.com/3599004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@847 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
67a0987dd9
commit
c2a889eb55
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 846
|
||||
Version: 847
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 846
|
||||
#define LIBYUV_VERSION 847
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||
|
||||
@ -5090,19 +5090,20 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
|
||||
int width, int area, uint8* dst,
|
||||
int count) {
|
||||
asm volatile (
|
||||
"movd %5,%%xmm4 \n"
|
||||
"cvtdq2ps %%xmm4,%%xmm4 \n"
|
||||
"rcpss %%xmm4,%%xmm4 \n"
|
||||
"movd %5,%%xmm5 \n"
|
||||
"cvtdq2ps %%xmm5,%%xmm5 \n"
|
||||
"rcpss %%xmm5,%%xmm4 \n"
|
||||
"pshufd $0x0,%%xmm4,%%xmm4 \n"
|
||||
"sub $0x4,%3 \n"
|
||||
"jl 49f \n"
|
||||
"cmpl $0x80,%5 \n"
|
||||
"ja 40f \n"
|
||||
|
||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||
"psrld $0x1f,%%xmm5 \n"
|
||||
"pslld $0x10,%%xmm5 \n"
|
||||
"cvtdq2ps %%xmm5,%%xmm5 \n"
|
||||
"pshufd $0x0,%%xmm5,%%xmm5 \n"
|
||||
"pcmpeqb %%xmm6,%%xmm6 \n"
|
||||
"psrld $0x10,%%xmm6 \n"
|
||||
"cvtdq2ps %%xmm6,%%xmm6 \n"
|
||||
"addps %%xmm6,%%xmm5 \n"
|
||||
"mulps %%xmm4,%%xmm5 \n"
|
||||
"cvtps2dq %%xmm5,%%xmm5 \n"
|
||||
"packssdw %%xmm5,%%xmm5 \n"
|
||||
@ -5222,7 +5223,7 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
|
||||
, "r14"
|
||||
#endif
|
||||
#if defined(__SSE2__)
|
||||
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"
|
||||
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
|
||||
#endif
|
||||
);
|
||||
}
|
||||
|
||||
@ -5763,11 +5763,11 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
|
||||
mov eax, topleft // eax topleft
|
||||
mov esi, botleft // esi botleft
|
||||
mov edx, width
|
||||
movd xmm4, area
|
||||
movd xmm5, area
|
||||
mov edi, dst
|
||||
mov ecx, count
|
||||
cvtdq2ps xmm4, xmm4
|
||||
rcpss xmm4, xmm4 // 1.0f / area
|
||||
cvtdq2ps xmm5, xmm5
|
||||
rcpss xmm4, xmm5 // 1.0f / area
|
||||
pshufd xmm4, xmm4, 0
|
||||
sub ecx, 4
|
||||
jl l4b
|
||||
@ -5775,13 +5775,14 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
|
||||
cmp area, 128 // 128 pixels will not overflow 15 bits.
|
||||
ja l4
|
||||
|
||||
pcmpeqb xmm5, xmm5 // constant of 65536.0
|
||||
psrld xmm5, 31
|
||||
pslld xmm5, 16
|
||||
cvtdq2ps xmm5, xmm5
|
||||
mulps xmm5, xmm4 // 65536.0 * 1 / area
|
||||
pshufd xmm5, xmm5, 0 // area
|
||||
pcmpeqb xmm6, xmm6 // constant of 65536.0 - 1 = 65535.0
|
||||
psrld xmm6, 16
|
||||
cvtdq2ps xmm6, xmm6
|
||||
addps xmm5, xmm6 // (65536.0 + area - 1)
|
||||
mulps xmm5, xmm4 // (65536.0 + area - 1) * 1 / area
|
||||
cvtps2dq xmm5, xmm5 // 0.16 fixed point
|
||||
packssdw xmm5, xmm5
|
||||
packssdw xmm5, xmm5 // 16 bit shorts
|
||||
|
||||
// 4 pixel loop small blocks.
|
||||
align 4
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user