mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2026-02-11 04:39:53 +08:00
compare SumSquareError_SSE2 ported to gcc
BUG=none TEST=media_unittest Review URL: http://webrtc-codereview.appspot.com/279005 git-svn-id: http://libyuv.googlecode.com/svn/trunk@79 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
2430e04e0a
commit
4cf70bd6db
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: http://code.google.com/p/libyuv/
|
URL: http://code.google.com/p/libyuv/
|
||||||
Version: 78
|
Version: 79
|
||||||
License: BSD
|
License: BSD
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
|
|
||||||
|
|||||||
@ -18,7 +18,8 @@
|
|||||||
|
|
||||||
namespace libyuv {
|
namespace libyuv {
|
||||||
|
|
||||||
#if defined(__ARM_NEON__) && !defined(COVERAGE_ENABLED)
|
#if defined(__ARM_NEON__) && \
|
||||||
|
!defined(COVERAGE_ENABLED) && !defined(TARGET_IPHONE_SIMULATOR)
|
||||||
#define HAS_SUMSQUAREERROR_NEON
|
#define HAS_SUMSQUAREERROR_NEON
|
||||||
|
|
||||||
static uint32 SumSquareError_NEON(const uint8* src_a,
|
static uint32 SumSquareError_NEON(const uint8* src_a,
|
||||||
@ -58,10 +59,8 @@ static uint32 SumSquareError_NEON(const uint8* src_a,
|
|||||||
return sse;
|
return sse;
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif (defined(WIN32) || defined(__x86_64__) || defined(__i386__)) \
|
#elif defined(WIN32) && \
|
||||||
&& !defined(COVERAGE_ENABLED) && !defined(TARGET_IPHONE_SIMULATOR)
|
!defined(COVERAGE_ENABLED) && !defined(TARGET_IPHONE_SIMULATOR)
|
||||||
|
|
||||||
#if defined(WIN32) && !defined(COVERAGE_ENABLED)
|
|
||||||
#define HAS_SUMSQUAREERROR_SSE2
|
#define HAS_SUMSQUAREERROR_SSE2
|
||||||
__declspec(naked)
|
__declspec(naked)
|
||||||
static uint32 SumSquareError_SSE2(const uint8* src_a,
|
static uint32 SumSquareError_SSE2(const uint8* src_a,
|
||||||
@ -103,41 +102,63 @@ static uint32 SumSquareError_SSE2(const uint8* src_a,
|
|||||||
|
|
||||||
#elif (defined(__x86_64__) || defined(__i386__)) && \
|
#elif (defined(__x86_64__) || defined(__i386__)) && \
|
||||||
!defined(COVERAGE_ENABLED) && !defined(TARGET_IPHONE_SIMULATOR)
|
!defined(COVERAGE_ENABLED) && !defined(TARGET_IPHONE_SIMULATOR)
|
||||||
// DISABLE
|
#define HAS_SUMSQUAREERROR_SSE2
|
||||||
//#define HAS_SUMSQUAREERROR_SSE2
|
|
||||||
// DISABLE
|
|
||||||
#if HAS_SUMSQUAREERROR_SSE2
|
|
||||||
static uint32 SumSquareError_SSE2(const uint8* src_a,
|
static uint32 SumSquareError_SSE2(const uint8* src_a,
|
||||||
const uint8* src_b, int count) {
|
const uint8* src_b, int count) {
|
||||||
volatile uint32 sse;
|
uint32 sse;
|
||||||
asm volatile (
|
asm volatile (
|
||||||
" \n"
|
"pxor %%xmm0,%%xmm0 \n"
|
||||||
|
"pxor %%xmm5,%%xmm5 \n"
|
||||||
|
"sub %0,%1 \n"
|
||||||
|
|
||||||
|
"1: \n"
|
||||||
|
"movdqa (%0),%%xmm1 \n"
|
||||||
|
"movdqa (%0,%1,1),%%xmm2 \n"
|
||||||
|
"lea 0x10(%0),%0 \n"
|
||||||
|
"movdqa %%xmm1,%%xmm3 \n"
|
||||||
|
"psubusb %%xmm2,%%xmm1 \n"
|
||||||
|
"psubusb %%xmm3,%%xmm2 \n"
|
||||||
|
"por %%xmm2,%%xmm1 \n"
|
||||||
|
"movdqa %%xmm1,%%xmm2 \n"
|
||||||
|
"punpcklbw %%xmm5,%%xmm1 \n"
|
||||||
|
"punpckhbw %%xmm5,%%xmm2 \n"
|
||||||
|
"pmaddwd %%xmm1,%%xmm1 \n"
|
||||||
|
"pmaddwd %%xmm2,%%xmm2 \n"
|
||||||
|
"paddd %%xmm1,%%xmm0 \n"
|
||||||
|
"paddd %%xmm2,%%xmm0 \n"
|
||||||
|
"sub $0x10,%2 \n"
|
||||||
|
"ja 1b \n"
|
||||||
|
|
||||||
|
"pshufd $0xee,%%xmm0,%%xmm1 \n"
|
||||||
|
"paddd %%xmm1,%%xmm0 \n"
|
||||||
|
"pshufd $0x1,%%xmm0,%%xmm1 \n"
|
||||||
|
"paddd %%xmm1,%%xmm0 \n"
|
||||||
|
"movd %%xmm0,%3 \n"
|
||||||
|
|
||||||
: "+r"(src_a), // %0
|
: "+r"(src_a), // %0
|
||||||
"+r"(src_b), // %1
|
"+r"(src_b), // %1
|
||||||
"+r"(count), // %2
|
"+r"(count), // %2
|
||||||
"=r"(sse) // %3
|
"=g"(sse) // %3
|
||||||
:
|
:
|
||||||
: "memory", "cc"
|
: "memory", "cc"
|
||||||
#if defined(__SSE2__)
|
#if defined(__SSE2__)
|
||||||
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
|
, "xmm0", "xmm1", "xmm2", "xmm5"
|
||||||
#endif
|
#endif
|
||||||
);
|
);
|
||||||
return sse;
|
return sse;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static uint32 SumSquareError_C(const uint8* src_a,
|
static uint32 SumSquareError_C(const uint8* src_a,
|
||||||
const uint8* src_b, int count) {
|
const uint8* src_b, int count) {
|
||||||
uint32 udiff = 0u;
|
uint32 sse = 0u;
|
||||||
for (int x = 0; x < count; ++x) {
|
for (int x = 0; x < count; ++x) {
|
||||||
int diff = src_a[0] - src_b[0];
|
int diff = src_a[0] - src_b[0];
|
||||||
udiff += static_cast<uint32>(diff * diff);
|
sse += static_cast<uint32>(diff * diff);
|
||||||
src_a += 1;
|
src_a += 1;
|
||||||
src_b += 1;
|
src_b += 1;
|
||||||
}
|
}
|
||||||
return udiff;
|
return sse;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64 ComputeSumSquareError(const uint8* src_a,
|
uint64 ComputeSumSquareError(const uint8* src_a,
|
||||||
@ -157,22 +178,25 @@ uint64 ComputeSumSquareError(const uint8* src_a,
|
|||||||
{
|
{
|
||||||
SumSquareError = SumSquareError_C;
|
SumSquareError = SumSquareError_C;
|
||||||
}
|
}
|
||||||
const int kBlockSize = 4096;
|
const int kBlockSize = 32768;
|
||||||
uint64 diff = 0;
|
uint64 sse = 0;
|
||||||
while (count >= kBlockSize) {
|
while (count >= kBlockSize) {
|
||||||
diff += SumSquareError(src_a, src_b, kBlockSize);
|
sse += SumSquareError(src_a, src_b, kBlockSize);
|
||||||
src_a += kBlockSize;
|
src_a += kBlockSize;
|
||||||
src_b += kBlockSize;
|
src_b += kBlockSize;
|
||||||
count -= kBlockSize;
|
count -= kBlockSize;
|
||||||
}
|
}
|
||||||
if (count > 0) {
|
int remainder = count & ~15;
|
||||||
if (count % 16 == 0) {
|
if (remainder) {
|
||||||
diff += static_cast<uint64>(SumSquareError(src_a, src_b, count));
|
sse += SumSquareError(src_a, src_b, remainder);
|
||||||
} else {
|
src_a += remainder;
|
||||||
diff += static_cast<uint64>(SumSquareError_C(src_a, src_b, count));
|
src_b += remainder;
|
||||||
}
|
count -= remainder;
|
||||||
}
|
}
|
||||||
return diff;
|
if (count) {
|
||||||
|
sse += SumSquareError_C(src_a, src_b, count);
|
||||||
|
}
|
||||||
|
return sse;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a,
|
uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a,
|
||||||
@ -192,7 +216,7 @@ uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a,
|
|||||||
|
|
||||||
uint64 sse = 0;
|
uint64 sse = 0;
|
||||||
for (int h = 0; h < height; ++h) {
|
for (int h = 0; h < height; ++h) {
|
||||||
sse += static_cast<uint64>(SumSquareError(src_a, src_b, width));
|
sse += SumSquareError(src_a, src_b, width);
|
||||||
src_a += stride_a;
|
src_a += stride_a;
|
||||||
src_b += stride_b;
|
src_b += stride_b;
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user