mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2026-02-09 03:06:44 +08:00
Move compare low levels into their own files, for consistency with NEON.
BUG=none TEST=none Review URL: https://webrtc-codereview.appspot.com/921004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@429 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
0a6b7a6eb7
commit
3f467451cf
@ -65,7 +65,10 @@
|
|||||||
|
|
||||||
# sources.
|
# sources.
|
||||||
'source/compare.cc',
|
'source/compare.cc',
|
||||||
|
'source/compare_common.cc',
|
||||||
'source/compare_neon.cc',
|
'source/compare_neon.cc',
|
||||||
|
'source/compare_posix.cc',
|
||||||
|
'source/compare_win.cc',
|
||||||
'source/convert.cc',
|
'source/convert.cc',
|
||||||
'source/convert_argb.cc',
|
'source/convert_argb.cc',
|
||||||
'source/convert_from.cc',
|
'source/convert_from.cc',
|
||||||
@ -78,13 +81,13 @@
|
|||||||
'source/rotate_argb.cc',
|
'source/rotate_argb.cc',
|
||||||
'source/rotate_neon.cc',
|
'source/rotate_neon.cc',
|
||||||
'source/row_common.cc',
|
'source/row_common.cc',
|
||||||
'source/row_neon.cc',
|
|
||||||
'source/row_mips.cc',
|
'source/row_mips.cc',
|
||||||
|
'source/row_neon.cc',
|
||||||
'source/row_posix.cc',
|
'source/row_posix.cc',
|
||||||
'source/row_win.cc',
|
'source/row_win.cc',
|
||||||
'source/scale.cc',
|
'source/scale.cc',
|
||||||
'source/scale_neon.cc',
|
|
||||||
'source/scale_argb.cc',
|
'source/scale_argb.cc',
|
||||||
|
'source/scale_neon.cc',
|
||||||
'source/video_common.cc',
|
'source/video_common.cc',
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
|||||||
@ -244,109 +244,15 @@ uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) {
|
|||||||
return seed;
|
return seed;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count);
|
||||||
#if !defined(YUV_DISABLE_ASM) && (defined(__ARM_NEON__) || defined(LIBYUV_NEON))
|
#if !defined(YUV_DISABLE_ASM) && (defined(__ARM_NEON__) || defined(LIBYUV_NEON))
|
||||||
#define HAS_SUMSQUAREERROR_NEON
|
#define HAS_SUMSQUAREERROR_NEON
|
||||||
|
|
||||||
uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count);
|
uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count);
|
||||||
|
#elif !defined(YUV_DISABLE_ASM) && (defined(_M_IX86) || \
|
||||||
#elif !defined(YUV_DISABLE_ASM) && defined(_M_IX86)
|
defined(__x86_64__) || defined(__i386__))
|
||||||
#define HAS_SUMSQUAREERROR_SSE2
|
#define HAS_SUMSQUAREERROR_SSE2
|
||||||
__declspec(naked) __declspec(align(16))
|
uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count);
|
||||||
static uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b,
|
|
||||||
int count) {
|
|
||||||
__asm {
|
|
||||||
mov eax, [esp + 4] // src_a
|
|
||||||
mov edx, [esp + 8] // src_b
|
|
||||||
mov ecx, [esp + 12] // count
|
|
||||||
pxor xmm0, xmm0
|
|
||||||
pxor xmm5, xmm5
|
|
||||||
sub edx, eax
|
|
||||||
|
|
||||||
align 16
|
|
||||||
wloop:
|
|
||||||
movdqa xmm1, [eax]
|
|
||||||
movdqa xmm2, [eax + edx]
|
|
||||||
lea eax, [eax + 16]
|
|
||||||
sub ecx, 16
|
|
||||||
movdqa xmm3, xmm1 // abs trick
|
|
||||||
psubusb xmm1, xmm2
|
|
||||||
psubusb xmm2, xmm3
|
|
||||||
por xmm1, xmm2
|
|
||||||
movdqa xmm2, xmm1
|
|
||||||
punpcklbw xmm1, xmm5
|
|
||||||
punpckhbw xmm2, xmm5
|
|
||||||
pmaddwd xmm1, xmm1
|
|
||||||
pmaddwd xmm2, xmm2
|
|
||||||
paddd xmm0, xmm1
|
|
||||||
paddd xmm0, xmm2
|
|
||||||
jg wloop
|
|
||||||
|
|
||||||
pshufd xmm1, xmm0, 0EEh
|
|
||||||
paddd xmm0, xmm1
|
|
||||||
pshufd xmm1, xmm0, 01h
|
|
||||||
paddd xmm0, xmm1
|
|
||||||
movd eax, xmm0
|
|
||||||
ret
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#elif !defined(YUV_DISABLE_ASM) && (defined(__x86_64__) || defined(__i386__))
|
|
||||||
#define HAS_SUMSQUAREERROR_SSE2
|
|
||||||
static uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b,
|
|
||||||
int count) {
|
|
||||||
uint32 sse;
|
|
||||||
asm volatile (
|
|
||||||
"pxor %%xmm0,%%xmm0 \n"
|
|
||||||
"pxor %%xmm5,%%xmm5 \n"
|
|
||||||
"sub %0,%1 \n"
|
|
||||||
".p2align 4 \n"
|
|
||||||
"1: \n"
|
|
||||||
"movdqa (%0),%%xmm1 \n"
|
|
||||||
"movdqa (%0,%1,1),%%xmm2 \n"
|
|
||||||
"lea 0x10(%0),%0 \n"
|
|
||||||
"sub $0x10,%2 \n"
|
|
||||||
"movdqa %%xmm1,%%xmm3 \n"
|
|
||||||
"psubusb %%xmm2,%%xmm1 \n"
|
|
||||||
"psubusb %%xmm3,%%xmm2 \n"
|
|
||||||
"por %%xmm2,%%xmm1 \n"
|
|
||||||
"movdqa %%xmm1,%%xmm2 \n"
|
|
||||||
"punpcklbw %%xmm5,%%xmm1 \n"
|
|
||||||
"punpckhbw %%xmm5,%%xmm2 \n"
|
|
||||||
"pmaddwd %%xmm1,%%xmm1 \n"
|
|
||||||
"pmaddwd %%xmm2,%%xmm2 \n"
|
|
||||||
"paddd %%xmm1,%%xmm0 \n"
|
|
||||||
"paddd %%xmm2,%%xmm0 \n"
|
|
||||||
"jg 1b \n"
|
|
||||||
|
|
||||||
"pshufd $0xee,%%xmm0,%%xmm1 \n"
|
|
||||||
"paddd %%xmm1,%%xmm0 \n"
|
|
||||||
"pshufd $0x1,%%xmm0,%%xmm1 \n"
|
|
||||||
"paddd %%xmm1,%%xmm0 \n"
|
|
||||||
"movd %%xmm0,%3 \n"
|
|
||||||
|
|
||||||
: "+r"(src_a), // %0
|
|
||||||
"+r"(src_b), // %1
|
|
||||||
"+r"(count), // %2
|
|
||||||
"=g"(sse) // %3
|
|
||||||
:
|
|
||||||
: "memory", "cc"
|
|
||||||
#if defined(__SSE2__)
|
|
||||||
, "xmm0", "xmm1", "xmm2", "xmm5"
|
|
||||||
#endif
|
#endif
|
||||||
);
|
|
||||||
return sse;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b,
|
|
||||||
int count) {
|
|
||||||
uint32 sse = 0u;
|
|
||||||
for (int i = 0; i < count; ++i) {
|
|
||||||
int diff = src_a[i] - src_b[i];
|
|
||||||
sse += static_cast<uint32>(diff * diff);
|
|
||||||
}
|
|
||||||
return sse;
|
|
||||||
}
|
|
||||||
|
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b,
|
uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b,
|
||||||
|
|||||||
31
source/compare_common.cc
Normal file
31
source/compare_common.cc
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
|
||||||
|
*
|
||||||
|
* Use of this source code is governed by a BSD-style license
|
||||||
|
* that can be found in the LICENSE file in the root of the source
|
||||||
|
* tree. An additional intellectual property rights grant can be found
|
||||||
|
* in the file PATENTS. All contributing project authors may
|
||||||
|
* be found in the AUTHORS file in the root of the source tree.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "libyuv/basic_types.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
namespace libyuv {
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count) {
|
||||||
|
uint32 sse = 0u;
|
||||||
|
for (int i = 0; i < count; ++i) {
|
||||||
|
int diff = src_a[i] - src_b[i];
|
||||||
|
sse += static_cast<uint32>(diff * diff);
|
||||||
|
}
|
||||||
|
return sse;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
} // extern "C"
|
||||||
|
} // namespace libyuv
|
||||||
|
#endif
|
||||||
|
|
||||||
70
source/compare_posix.cc
Normal file
70
source/compare_posix.cc
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
|
||||||
|
*
|
||||||
|
* Use of this source code is governed by a BSD-style license
|
||||||
|
* that can be found in the LICENSE file in the root of the source
|
||||||
|
* tree. An additional intellectual property rights grant can be found
|
||||||
|
* in the file PATENTS. All contributing project authors may
|
||||||
|
* be found in the AUTHORS file in the root of the source tree.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "libyuv/basic_types.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
namespace libyuv {
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if !defined(YUV_DISABLE_ASM) && (defined(__x86_64__) || defined(__i386__))
|
||||||
|
|
||||||
|
uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
|
||||||
|
uint32 sse;
|
||||||
|
asm volatile (
|
||||||
|
"pxor %%xmm0,%%xmm0 \n"
|
||||||
|
"pxor %%xmm5,%%xmm5 \n"
|
||||||
|
"sub %0,%1 \n"
|
||||||
|
".p2align 4 \n"
|
||||||
|
"1: \n"
|
||||||
|
"movdqa (%0),%%xmm1 \n"
|
||||||
|
"movdqa (%0,%1,1),%%xmm2 \n"
|
||||||
|
"lea 0x10(%0),%0 \n"
|
||||||
|
"sub $0x10,%2 \n"
|
||||||
|
"movdqa %%xmm1,%%xmm3 \n"
|
||||||
|
"psubusb %%xmm2,%%xmm1 \n"
|
||||||
|
"psubusb %%xmm3,%%xmm2 \n"
|
||||||
|
"por %%xmm2,%%xmm1 \n"
|
||||||
|
"movdqa %%xmm1,%%xmm2 \n"
|
||||||
|
"punpcklbw %%xmm5,%%xmm1 \n"
|
||||||
|
"punpckhbw %%xmm5,%%xmm2 \n"
|
||||||
|
"pmaddwd %%xmm1,%%xmm1 \n"
|
||||||
|
"pmaddwd %%xmm2,%%xmm2 \n"
|
||||||
|
"paddd %%xmm1,%%xmm0 \n"
|
||||||
|
"paddd %%xmm2,%%xmm0 \n"
|
||||||
|
"jg 1b \n"
|
||||||
|
|
||||||
|
"pshufd $0xee,%%xmm0,%%xmm1 \n"
|
||||||
|
"paddd %%xmm1,%%xmm0 \n"
|
||||||
|
"pshufd $0x1,%%xmm0,%%xmm1 \n"
|
||||||
|
"paddd %%xmm1,%%xmm0 \n"
|
||||||
|
"movd %%xmm0,%3 \n"
|
||||||
|
|
||||||
|
: "+r"(src_a), // %0
|
||||||
|
"+r"(src_b), // %1
|
||||||
|
"+r"(count), // %2
|
||||||
|
"=g"(sse) // %3
|
||||||
|
:
|
||||||
|
: "memory", "cc"
|
||||||
|
#if defined(__SSE2__)
|
||||||
|
, "xmm0", "xmm1", "xmm2", "xmm5"
|
||||||
|
#endif
|
||||||
|
);
|
||||||
|
return sse;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // defined(__x86_64__) || defined(__i386__)
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
} // extern "C"
|
||||||
|
} // namespace libyuv
|
||||||
|
#endif
|
||||||
|
|
||||||
64
source/compare_win.cc
Normal file
64
source/compare_win.cc
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
|
||||||
|
*
|
||||||
|
* Use of this source code is governed by a BSD-style license
|
||||||
|
* that can be found in the LICENSE file in the root of the source
|
||||||
|
* tree. An additional intellectual property rights grant can be found
|
||||||
|
* in the file PATENTS. All contributing project authors may
|
||||||
|
* be found in the AUTHORS file in the root of the source tree.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "libyuv/basic_types.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
namespace libyuv {
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if !defined(YUV_DISABLE_ASM) && defined(_M_IX86)
|
||||||
|
|
||||||
|
__declspec(naked) __declspec(align(16))
|
||||||
|
uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
|
||||||
|
__asm {
|
||||||
|
mov eax, [esp + 4] // src_a
|
||||||
|
mov edx, [esp + 8] // src_b
|
||||||
|
mov ecx, [esp + 12] // count
|
||||||
|
pxor xmm0, xmm0
|
||||||
|
pxor xmm5, xmm5
|
||||||
|
sub edx, eax
|
||||||
|
|
||||||
|
align 16
|
||||||
|
wloop:
|
||||||
|
movdqa xmm1, [eax]
|
||||||
|
movdqa xmm2, [eax + edx]
|
||||||
|
lea eax, [eax + 16]
|
||||||
|
sub ecx, 16
|
||||||
|
movdqa xmm3, xmm1 // abs trick
|
||||||
|
psubusb xmm1, xmm2
|
||||||
|
psubusb xmm2, xmm3
|
||||||
|
por xmm1, xmm2
|
||||||
|
movdqa xmm2, xmm1
|
||||||
|
punpcklbw xmm1, xmm5
|
||||||
|
punpckhbw xmm2, xmm5
|
||||||
|
pmaddwd xmm1, xmm1
|
||||||
|
pmaddwd xmm2, xmm2
|
||||||
|
paddd xmm0, xmm1
|
||||||
|
paddd xmm0, xmm2
|
||||||
|
jg wloop
|
||||||
|
|
||||||
|
pshufd xmm1, xmm0, 0EEh
|
||||||
|
paddd xmm0, xmm1
|
||||||
|
pshufd xmm1, xmm0, 01h
|
||||||
|
paddd xmm0, xmm1
|
||||||
|
movd eax, xmm0
|
||||||
|
ret
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // _M_IX86
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
} // extern "C"
|
||||||
|
} // namespace libyuv
|
||||||
|
#endif
|
||||||
|
|
||||||
Loading…
x
Reference in New Issue
Block a user