mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-08 01:36:47 +08:00
polynomial sse2 do 2 pixels at a time.
BUG=265 TEST=*Poly* R=changjun.yang@intel.com Review URL: https://webrtc-codereview.appspot.com/2195004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@782 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
5442018d64
commit
c3c06ec328
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 781
|
||||
Version: 782
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 781
|
||||
#define LIBYUV_VERSION 782
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||
|
||||
@ -2052,7 +2052,7 @@ int ARGBPolynomial(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_argb, const float* poly,
|
||||
int width) = ARGBPolynomialRow_C;
|
||||
#if defined(HAS_ARGBPOLYNOMIALROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 2)) {
|
||||
ARGBPolynomialRow = ARGBPolynomialRow_SSE2;
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -6774,42 +6774,53 @@ void ARGBPolynomialRow_SSE2(const uint8* src_argb,
|
||||
uint8* dst_argb, const float* poly,
|
||||
int width) {
|
||||
__asm {
|
||||
mov eax, [esp + 12] /* poly */
|
||||
movdqu xmm4, [eax]
|
||||
movdqu xmm5, [eax + 16]
|
||||
movdqu xmm6, [eax + 32]
|
||||
movdqu xmm7, [eax + 48]
|
||||
|
||||
mov eax, [esp + 4] /* src_argb */
|
||||
mov edx, [esp + 8] /* dst_argb */
|
||||
mov ecx, [esp + 16] /* width */
|
||||
push esi
|
||||
mov eax, [esp + 4 + 4] /* src_argb */
|
||||
mov edx, [esp + 4 + 8] /* dst_argb */
|
||||
mov esi, [esp + 4 + 12] /* poly */
|
||||
mov ecx, [esp + 4 + 16] /* width */
|
||||
pxor xmm3, xmm3 // 0 constant for zero extending bytes to ints.
|
||||
|
||||
align 16
|
||||
convertloop:
|
||||
// (slow) vpmovzxbd xmm0, dword ptr [eax] // BGRA pixel
|
||||
movd xmm0, [eax] // BGRA
|
||||
lea eax, [eax + 4]
|
||||
// (slow) pmovzxbd xmm0, dword ptr [eax] // BGRA pixel
|
||||
movq xmm0, qword ptr [eax] // BGRABGRA
|
||||
lea eax, [eax + 8]
|
||||
punpcklbw xmm0, xmm3
|
||||
punpcklwd xmm0, xmm3
|
||||
movdqa xmm4, xmm0
|
||||
punpcklwd xmm0, xmm3 // pixel 0
|
||||
punpckhwd xmm4, xmm3 // pixel 1
|
||||
cvtdq2ps xmm0, xmm0 // 4 floats
|
||||
cvtdq2ps xmm4, xmm4
|
||||
movdqa xmm1, xmm0 // X
|
||||
mulps xmm0, xmm5 // C1 * X
|
||||
addps xmm0, xmm4 // result = C0 + C1 * X
|
||||
movdqa xmm5, xmm4
|
||||
mulps xmm0, [esi + 16] // C1 * X
|
||||
mulps xmm4, [esi + 16]
|
||||
addps xmm0, [esi] // result = C0 + C1 * X
|
||||
addps xmm4, [esi]
|
||||
movdqa xmm2, xmm1
|
||||
movdqa xmm6, xmm5
|
||||
mulps xmm2, xmm1 // X * X
|
||||
mulps xmm6, xmm5
|
||||
mulps xmm1, xmm2 // X * X * X
|
||||
mulps xmm2, xmm6 // C2 * X * X
|
||||
mulps xmm1, xmm7 // C3 * X * X * X
|
||||
mulps xmm5, xmm6
|
||||
mulps xmm2, [esi + 32] // C2 * X * X
|
||||
mulps xmm6, [esi + 32]
|
||||
mulps xmm1, [esi + 48] // C3 * X * X * X
|
||||
mulps xmm5, [esi + 48]
|
||||
addps xmm0, xmm2 // result += C2 * X * X
|
||||
addps xmm4, xmm6
|
||||
addps xmm0, xmm1 // result += C3 * X * X * X
|
||||
addps xmm4, xmm5
|
||||
cvttps2dq xmm0, xmm0
|
||||
cvttps2dq xmm4, xmm4
|
||||
packuswb xmm0, xmm4
|
||||
packuswb xmm0, xmm0
|
||||
packuswb xmm0, xmm0
|
||||
sub ecx, 1
|
||||
movd [edx], xmm0
|
||||
lea edx, [edx + 4]
|
||||
sub ecx, 2
|
||||
movq qword ptr [edx], xmm0
|
||||
lea edx, [edx + 8]
|
||||
jg convertloop
|
||||
pop esi
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
@ -1660,7 +1660,7 @@ TEST_F(libyuvTest, TestARGBPolynomial) {
|
||||
SIMD_ALIGNED(uint8 orig_pixels[1280][4]);
|
||||
SIMD_ALIGNED(uint8 dst_pixels[1280][4]);
|
||||
|
||||
static const float kWarmifyPolynomial[16] = {
|
||||
SIMD_ALIGNED(static const float kWarmifyPolynomial[16]) = {
|
||||
0.94230f, -3.03300f, -2.92500f, 0.f, // C0
|
||||
0.584500f, 1.112000f, 1.535000f, 1.f, // C1 x
|
||||
0.001313f, -0.002503f, -0.004496f, 0.f, // C2 x * x
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user