mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2026-01-01 03:12:16 +08:00
Porting parts of compare to c89
BUG=303 TESTED=try bots still build, gcc and vc direct for c testing. R=tpsiaki@google.com Review URL: https://webrtc-codereview.appspot.com/6739004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@956 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
53a7923b15
commit
167d5d1c2f
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 955
|
||||
Version: 956
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 955
|
||||
#define LIBYUV_VERSION 956
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||
|
||||
@ -46,6 +46,8 @@ uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed);
|
||||
// hash seed of 5381 recommended.
|
||||
LIBYUV_API
|
||||
uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) {
|
||||
const int kBlockSize = 1 << 15; // 32768;
|
||||
int remainder = (int)(count) & ~15;
|
||||
uint32 (*HashDjb2_SSE)(const uint8* src, int count, uint32 seed) = HashDjb2_C;
|
||||
#if defined(HAS_HASHDJB2_SSE41)
|
||||
if (TestCpuFlag(kCpuHasSSE41)) {
|
||||
@ -58,13 +60,11 @@ uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) {
|
||||
}
|
||||
#endif
|
||||
|
||||
const int kBlockSize = 1 << 15; // 32768;
|
||||
while (count >= (uint64)(kBlockSize)) {
|
||||
seed = HashDjb2_SSE(src, kBlockSize, seed);
|
||||
src += kBlockSize;
|
||||
count -= kBlockSize;
|
||||
}
|
||||
int remainder = (int)(count) & ~15;
|
||||
if (remainder) {
|
||||
seed = HashDjb2_SSE(src, remainder, seed);
|
||||
src += remainder;
|
||||
@ -98,6 +98,13 @@ uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count);
|
||||
LIBYUV_API
|
||||
uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b,
|
||||
int count) {
|
||||
// SumSquareError returns values 0 to 65535 for each squared difference.
|
||||
// Up to 65536 of those can be summed and remain within a uint32.
|
||||
// After each block of 65536 pixels, accumulate into a uint64.
|
||||
const int kBlockSize = 65536;
|
||||
int remainder = count & (kBlockSize - 1) & ~31;
|
||||
uint64 sse = 0;
|
||||
int i;
|
||||
uint32 (*SumSquareError)(const uint8* src_a, const uint8* src_b, int count) =
|
||||
SumSquareError_C;
|
||||
#if defined(HAS_SUMSQUAREERROR_NEON)
|
||||
@ -118,20 +125,14 @@ uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b,
|
||||
SumSquareError = SumSquareError_AVX2;
|
||||
}
|
||||
#endif
|
||||
// SumSquareError returns values 0 to 65535 for each squared difference.
|
||||
// Up to 65536 of those can be summed and remain within a uint32.
|
||||
// After each block of 65536 pixels, accumulate into a uint64.
|
||||
const int kBlockSize = 65536;
|
||||
uint64 sse = 0;
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for reduction(+: sse)
|
||||
#endif
|
||||
for (int i = 0; i < (count - (kBlockSize - 1)); i += kBlockSize) {
|
||||
for (i = 0; i < (count - (kBlockSize - 1)); i += kBlockSize) {
|
||||
sse += SumSquareError(src_a + i, src_b + i, kBlockSize);
|
||||
}
|
||||
src_a += count & ~(kBlockSize - 1);
|
||||
src_b += count & ~(kBlockSize - 1);
|
||||
int remainder = count & (kBlockSize - 1) & ~31;
|
||||
if (remainder) {
|
||||
sse += SumSquareError(src_a, src_b, remainder);
|
||||
src_a += remainder;
|
||||
@ -148,6 +149,8 @@ LIBYUV_API
|
||||
uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a,
|
||||
const uint8* src_b, int stride_b,
|
||||
int width, int height) {
|
||||
uint64 sse = 0;
|
||||
int h;
|
||||
// Coalesce rows.
|
||||
if (stride_a == width &&
|
||||
stride_b == width) {
|
||||
@ -155,8 +158,7 @@ uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a,
|
||||
height = 1;
|
||||
stride_a = stride_b = 0;
|
||||
}
|
||||
uint64 sse = 0;
|
||||
for (int h = 0; h < height; ++h) {
|
||||
for (h = 0; h < height; ++h) {
|
||||
sse += ComputeSumSquareError(src_a, src_b, width);
|
||||
src_a += stride_a;
|
||||
src_b += stride_b;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user