mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 01:06:46 +08:00
SSE2 bilinear fix for posix.
BUG=177 TEST=none Review URL: https://webrtc-codereview.appspot.com/1061004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@548 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
0f00506af7
commit
70b4928158
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: http://code.google.com/p/libyuv/
|
URL: http://code.google.com/p/libyuv/
|
||||||
Version: 547
|
Version: 548
|
||||||
License: BSD
|
License: BSD
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
|
|
||||||
|
|||||||
@ -11,6 +11,6 @@
|
|||||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||||
#define INCLUDE_LIBYUV_VERSION_H_
|
#define INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
#define LIBYUV_VERSION 547
|
#define LIBYUV_VERSION 548
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||||
|
|||||||
@ -926,13 +926,14 @@ static void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef SSE2_DISABLED
|
|
||||||
// Bilinear row filtering combines 16x2 -> 16x1. SSE2 version.
|
// Bilinear row filtering combines 16x2 -> 16x1. SSE2 version.
|
||||||
// Normal formula for bilinear interpolation is:
|
// Normal formula for bilinear interpolation is:
|
||||||
// source_y_fraction * row1 + (1 - source_y_fraction) row0
|
// source_y_fraction * row1 + (1 - source_y_fraction) row0
|
||||||
// SSE2 version using the a single multiply of difference:
|
// SSE2 version using the a single multiply of difference:
|
||||||
// source_y_fraction * (row1 - row0) + row0
|
// source_y_fraction * (row1 - row0) + row0
|
||||||
#define HAS_SCALEFILTERROWS_SSE2_DISABLED
|
// TODO(fbarchard): Specialize same as SSSE3.
|
||||||
|
|
||||||
|
#define HAS_SCALEFILTERROWS_SSE2
|
||||||
__declspec(naked) __declspec(align(16))
|
__declspec(naked) __declspec(align(16))
|
||||||
static void ScaleFilterRows_SSE2(uint8* dst_ptr, const uint8* src_ptr,
|
static void ScaleFilterRows_SSE2(uint8* dst_ptr, const uint8* src_ptr,
|
||||||
ptrdiff_t src_stride, int dst_width,
|
ptrdiff_t src_stride, int dst_width,
|
||||||
@ -948,13 +949,15 @@ static void ScaleFilterRows_SSE2(uint8* dst_ptr, const uint8* src_ptr,
|
|||||||
sub edi, esi
|
sub edi, esi
|
||||||
cmp eax, 0
|
cmp eax, 0
|
||||||
je xloop1
|
je xloop1
|
||||||
cmp eax, 128
|
cmp eax, 128 // 50%?
|
||||||
je xloop2
|
je xloop2
|
||||||
|
|
||||||
movd xmm5, eax // xmm5 = y fraction
|
movd xmm5, eax // xmm5 = y fraction
|
||||||
punpcklbw xmm5, xmm5
|
punpcklbw xmm5, xmm5
|
||||||
|
psrlw xmm5, 1
|
||||||
punpcklwd xmm5, xmm5
|
punpcklwd xmm5, xmm5
|
||||||
pshufd xmm5, xmm5, 0
|
punpckldq xmm5, xmm5
|
||||||
|
punpcklqdq xmm5, xmm5
|
||||||
pxor xmm4, xmm4
|
pxor xmm4, xmm4
|
||||||
|
|
||||||
align 16
|
align 16
|
||||||
@ -969,6 +972,8 @@ static void ScaleFilterRows_SSE2(uint8* dst_ptr, const uint8* src_ptr,
|
|||||||
punpckhbw xmm1, xmm4
|
punpckhbw xmm1, xmm4
|
||||||
psubw xmm2, xmm0 // row1 - row0
|
psubw xmm2, xmm0 // row1 - row0
|
||||||
psubw xmm3, xmm1
|
psubw xmm3, xmm1
|
||||||
|
paddw xmm2, xmm2 // 9 bits * 15 bits = 8.16
|
||||||
|
paddw xmm3, xmm3
|
||||||
pmulhw xmm2, xmm5 // scale diff
|
pmulhw xmm2, xmm5 // scale diff
|
||||||
pmulhw xmm3, xmm5
|
pmulhw xmm3, xmm5
|
||||||
paddw xmm0, xmm2 // sum rows
|
paddw xmm0, xmm2 // sum rows
|
||||||
@ -1021,7 +1026,7 @@ static void ScaleFilterRows_SSE2(uint8* dst_ptr, const uint8* src_ptr,
|
|||||||
ret
|
ret
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif // SSE2_DISABLED
|
|
||||||
// Bilinear row filtering combines 16x2 -> 16x1. SSSE3 version.
|
// Bilinear row filtering combines 16x2 -> 16x1. SSSE3 version.
|
||||||
#define HAS_SCALEFILTERROWS_SSSE3
|
#define HAS_SCALEFILTERROWS_SSSE3
|
||||||
__declspec(naked) __declspec(align(16))
|
__declspec(naked) __declspec(align(16))
|
||||||
@ -1933,9 +1938,9 @@ static void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef SSE2_DISABLED
|
|
||||||
// Bilinear row filtering combines 16x2 -> 16x1. SSE2 version
|
// Bilinear row filtering combines 16x2 -> 16x1. SSE2 version
|
||||||
#define HAS_SCALEFILTERROWS_SSE2_DISABLED
|
// For more info see comment above ScaleFilterRows_SSE2 for MSVC++
|
||||||
|
#define HAS_SCALEFILTERROWS_SSE2
|
||||||
static void ScaleFilterRows_SSE2(uint8* dst_ptr,
|
static void ScaleFilterRows_SSE2(uint8* dst_ptr,
|
||||||
const uint8* src_ptr, ptrdiff_t src_stride,
|
const uint8* src_ptr, ptrdiff_t src_stride,
|
||||||
int dst_width, int source_y_fraction) {
|
int dst_width, int source_y_fraction) {
|
||||||
@ -1945,10 +1950,13 @@ static void ScaleFilterRows_SSE2(uint8* dst_ptr,
|
|||||||
"je 2f \n"
|
"je 2f \n"
|
||||||
"cmp $0x80,%3 \n"
|
"cmp $0x80,%3 \n"
|
||||||
"je 3f \n"
|
"je 3f \n"
|
||||||
|
|
||||||
"movd %3,%%xmm5 \n"
|
"movd %3,%%xmm5 \n"
|
||||||
"punpcklbw %%xmm5,%%xmm5 \n"
|
"punpcklbw %%xmm5,%%xmm5 \n"
|
||||||
|
"psrlw $0x1,%%xmm5 \n"
|
||||||
"punpcklwd %%xmm5,%%xmm5 \n"
|
"punpcklwd %%xmm5,%%xmm5 \n"
|
||||||
"pshufd $0x0,%%xmm5,%%xmm5 \n"
|
"punpckldq %%xmm5,%%xmm5 \n"
|
||||||
|
"punpcklqdq %%xmm5,%%xmm5 \n"
|
||||||
"pxor %%xmm4,%%xmm4 \n"
|
"pxor %%xmm4,%%xmm4 \n"
|
||||||
".p2align 4 \n"
|
".p2align 4 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -1962,6 +1970,8 @@ static void ScaleFilterRows_SSE2(uint8* dst_ptr,
|
|||||||
"punpckhbw %%xmm4,%%xmm1 \n"
|
"punpckhbw %%xmm4,%%xmm1 \n"
|
||||||
"psubw %%xmm0,%%xmm2 \n"
|
"psubw %%xmm0,%%xmm2 \n"
|
||||||
"psubw %%xmm1,%%xmm3 \n"
|
"psubw %%xmm1,%%xmm3 \n"
|
||||||
|
"paddw %%xmm2,%%xmm2 \n"
|
||||||
|
"paddw %%xmm3,%%xmm3 \n"
|
||||||
"pmulhw %%xmm5,%%xmm2 \n"
|
"pmulhw %%xmm5,%%xmm2 \n"
|
||||||
"pmulhw %%xmm5,%%xmm3 \n"
|
"pmulhw %%xmm5,%%xmm3 \n"
|
||||||
"paddw %%xmm2,%%xmm0 \n"
|
"paddw %%xmm2,%%xmm0 \n"
|
||||||
@ -1996,16 +2006,15 @@ static void ScaleFilterRows_SSE2(uint8* dst_ptr,
|
|||||||
"movdqa %%xmm0,(%1,%0,1) \n"
|
"movdqa %%xmm0,(%1,%0,1) \n"
|
||||||
: "+r"(dst_ptr), // %0
|
: "+r"(dst_ptr), // %0
|
||||||
"+r"(src_ptr), // %1
|
"+r"(src_ptr), // %1
|
||||||
"+r"(dst_width), // %2
|
"+r"(dst_width) // %2
|
||||||
"+r"(source_y_fraction) // %3
|
: "r"(source_y_fraction), // %3
|
||||||
: "r"(static_cast<intptr_t>(src_stride)) // %4
|
"r"(static_cast<intptr_t>(src_stride)) // %4
|
||||||
: "memory", "cc"
|
: "memory", "cc"
|
||||||
#if defined(__SSE2__)
|
#if defined(__SSE2__)
|
||||||
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
|
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
|
||||||
#endif
|
#endif
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
#endif // SSE2_DISABLED
|
|
||||||
|
|
||||||
// Bilinear row filtering combines 16x2 -> 16x1. SSSE3 version
|
// Bilinear row filtering combines 16x2 -> 16x1. SSSE3 version
|
||||||
#define HAS_SCALEFILTERROWS_SSSE3
|
#define HAS_SCALEFILTERROWS_SSSE3
|
||||||
|
|||||||
@ -716,6 +716,7 @@ TESTINTERPOLATE(64)
|
|||||||
TESTINTERPOLATE(128)
|
TESTINTERPOLATE(128)
|
||||||
TESTINTERPOLATE(192)
|
TESTINTERPOLATE(192)
|
||||||
TESTINTERPOLATE(255)
|
TESTINTERPOLATE(255)
|
||||||
|
TESTINTERPOLATE(85)
|
||||||
|
|
||||||
static int TestBlend(int width, int height, int benchmark_iterations,
|
static int TestBlend(int width, int height, int benchmark_iterations,
|
||||||
int invert, int off) {
|
int invert, int off) {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user