diff --git a/libyuv.gyp b/libyuv.gyp index db21f8a0f..107a2b4e2 100644 --- a/libyuv.gyp +++ b/libyuv.gyp @@ -65,7 +65,10 @@ # sources. 'source/compare.cc', + 'source/compare_common.cc', 'source/compare_neon.cc', + 'source/compare_posix.cc', + 'source/compare_win.cc', 'source/convert.cc', 'source/convert_argb.cc', 'source/convert_from.cc', @@ -78,13 +81,13 @@ 'source/rotate_argb.cc', 'source/rotate_neon.cc', 'source/row_common.cc', - 'source/row_neon.cc', 'source/row_mips.cc', + 'source/row_neon.cc', 'source/row_posix.cc', 'source/row_win.cc', 'source/scale.cc', - 'source/scale_neon.cc', 'source/scale_argb.cc', + 'source/scale_neon.cc', 'source/video_common.cc', ], }, diff --git a/source/compare.cc b/source/compare.cc index bf4a7daed..667f7750c 100644 --- a/source/compare.cc +++ b/source/compare.cc @@ -244,109 +244,15 @@ uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) { return seed; } +uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count); #if !defined(YUV_DISABLE_ASM) && (defined(__ARM_NEON__) || defined(LIBYUV_NEON)) #define HAS_SUMSQUAREERROR_NEON - uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count); - -#elif !defined(YUV_DISABLE_ASM) && defined(_M_IX86) +#elif !defined(YUV_DISABLE_ASM) && (defined(_M_IX86) || \ + defined(__x86_64__) || defined(__i386__)) #define HAS_SUMSQUAREERROR_SSE2 -__declspec(naked) __declspec(align(16)) -static uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, - int count) { - __asm { - mov eax, [esp + 4] // src_a - mov edx, [esp + 8] // src_b - mov ecx, [esp + 12] // count - pxor xmm0, xmm0 - pxor xmm5, xmm5 - sub edx, eax - - align 16 - wloop: - movdqa xmm1, [eax] - movdqa xmm2, [eax + edx] - lea eax, [eax + 16] - sub ecx, 16 - movdqa xmm3, xmm1 // abs trick - psubusb xmm1, xmm2 - psubusb xmm2, xmm3 - por xmm1, xmm2 - movdqa xmm2, xmm1 - punpcklbw xmm1, xmm5 - punpckhbw xmm2, xmm5 - pmaddwd xmm1, xmm1 - pmaddwd xmm2, xmm2 - paddd xmm0, xmm1 - paddd xmm0, xmm2 - jg wloop - - pshufd xmm1, xmm0, 0EEh - paddd xmm0, xmm1 - pshufd xmm1, xmm0, 01h - paddd xmm0, xmm1 - movd eax, xmm0 - ret - } -} - -#elif !defined(YUV_DISABLE_ASM) && (defined(__x86_64__) || defined(__i386__)) -#define HAS_SUMSQUAREERROR_SSE2 -static uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, - int count) { - uint32 sse; - asm volatile ( - "pxor %%xmm0,%%xmm0 \n" - "pxor %%xmm5,%%xmm5 \n" - "sub %0,%1 \n" - ".p2align 4 \n" - "1: \n" - "movdqa (%0),%%xmm1 \n" - "movdqa (%0,%1,1),%%xmm2 \n" - "lea 0x10(%0),%0 \n" - "sub $0x10,%2 \n" - "movdqa %%xmm1,%%xmm3 \n" - "psubusb %%xmm2,%%xmm1 \n" - "psubusb %%xmm3,%%xmm2 \n" - "por %%xmm2,%%xmm1 \n" - "movdqa %%xmm1,%%xmm2 \n" - "punpcklbw %%xmm5,%%xmm1 \n" - "punpckhbw %%xmm5,%%xmm2 \n" - "pmaddwd %%xmm1,%%xmm1 \n" - "pmaddwd %%xmm2,%%xmm2 \n" - "paddd %%xmm1,%%xmm0 \n" - "paddd %%xmm2,%%xmm0 \n" - "jg 1b \n" - - "pshufd $0xee,%%xmm0,%%xmm1 \n" - "paddd %%xmm1,%%xmm0 \n" - "pshufd $0x1,%%xmm0,%%xmm1 \n" - "paddd %%xmm1,%%xmm0 \n" - "movd %%xmm0,%3 \n" - - : "+r"(src_a), // %0 - "+r"(src_b), // %1 - "+r"(count), // %2 - "=g"(sse) // %3 - : - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm5" +uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count); #endif - ); - return sse; -} -#endif - -static uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, - int count) { - uint32 sse = 0u; - for (int i = 0; i < count; ++i) { - int diff = src_a[i] - src_b[i]; - sse += static_cast(diff * diff); - } - return sse; -} LIBYUV_API uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b, diff --git a/source/compare_common.cc b/source/compare_common.cc new file mode 100644 index 000000000..9fa801ee0 --- /dev/null +++ b/source/compare_common.cc @@ -0,0 +1,31 @@ +/* + * Copyright 2012 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "libyuv/basic_types.h" + +#ifdef __cplusplus +namespace libyuv { +extern "C" { +#endif + +uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count) { + uint32 sse = 0u; + for (int i = 0; i < count; ++i) { + int diff = src_a[i] - src_b[i]; + sse += static_cast(diff * diff); + } + return sse; +} + +#ifdef __cplusplus +} // extern "C" +} // namespace libyuv +#endif + diff --git a/source/compare_posix.cc b/source/compare_posix.cc new file mode 100644 index 000000000..0fca8ded4 --- /dev/null +++ b/source/compare_posix.cc @@ -0,0 +1,70 @@ +/* + * Copyright 2012 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "libyuv/basic_types.h" + +#ifdef __cplusplus +namespace libyuv { +extern "C" { +#endif + +#if !defined(YUV_DISABLE_ASM) && (defined(__x86_64__) || defined(__i386__)) + +uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) { + uint32 sse; + asm volatile ( + "pxor %%xmm0,%%xmm0 \n" + "pxor %%xmm5,%%xmm5 \n" + "sub %0,%1 \n" + ".p2align 4 \n" + "1: \n" + "movdqa (%0),%%xmm1 \n" + "movdqa (%0,%1,1),%%xmm2 \n" + "lea 0x10(%0),%0 \n" + "sub $0x10,%2 \n" + "movdqa %%xmm1,%%xmm3 \n" + "psubusb %%xmm2,%%xmm1 \n" + "psubusb %%xmm3,%%xmm2 \n" + "por %%xmm2,%%xmm1 \n" + "movdqa %%xmm1,%%xmm2 \n" + "punpcklbw %%xmm5,%%xmm1 \n" + "punpckhbw %%xmm5,%%xmm2 \n" + "pmaddwd %%xmm1,%%xmm1 \n" + "pmaddwd %%xmm2,%%xmm2 \n" + "paddd %%xmm1,%%xmm0 \n" + "paddd %%xmm2,%%xmm0 \n" + "jg 1b \n" + + "pshufd $0xee,%%xmm0,%%xmm1 \n" + "paddd %%xmm1,%%xmm0 \n" + "pshufd $0x1,%%xmm0,%%xmm1 \n" + "paddd %%xmm1,%%xmm0 \n" + "movd %%xmm0,%3 \n" + + : "+r"(src_a), // %0 + "+r"(src_b), // %1 + "+r"(count), // %2 + "=g"(sse) // %3 + : + : "memory", "cc" +#if defined(__SSE2__) + , "xmm0", "xmm1", "xmm2", "xmm5" +#endif + ); + return sse; +} + +#endif // defined(__x86_64__) || defined(__i386__) + +#ifdef __cplusplus +} // extern "C" +} // namespace libyuv +#endif + diff --git a/source/compare_win.cc b/source/compare_win.cc new file mode 100644 index 000000000..ecbe65494 --- /dev/null +++ b/source/compare_win.cc @@ -0,0 +1,64 @@ +/* + * Copyright 2012 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "libyuv/basic_types.h" + +#ifdef __cplusplus +namespace libyuv { +extern "C" { +#endif + +#if !defined(YUV_DISABLE_ASM) && defined(_M_IX86) + +__declspec(naked) __declspec(align(16)) +uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) { + __asm { + mov eax, [esp + 4] // src_a + mov edx, [esp + 8] // src_b + mov ecx, [esp + 12] // count + pxor xmm0, xmm0 + pxor xmm5, xmm5 + sub edx, eax + + align 16 + wloop: + movdqa xmm1, [eax] + movdqa xmm2, [eax + edx] + lea eax, [eax + 16] + sub ecx, 16 + movdqa xmm3, xmm1 // abs trick + psubusb xmm1, xmm2 + psubusb xmm2, xmm3 + por xmm1, xmm2 + movdqa xmm2, xmm1 + punpcklbw xmm1, xmm5 + punpckhbw xmm2, xmm5 + pmaddwd xmm1, xmm1 + pmaddwd xmm2, xmm2 + paddd xmm0, xmm1 + paddd xmm0, xmm2 + jg wloop + + pshufd xmm1, xmm0, 0EEh + paddd xmm0, xmm1 + pshufd xmm1, xmm0, 01h + paddd xmm0, xmm1 + movd eax, xmm0 + ret + } +} + +#endif // _M_IX86 + +#ifdef __cplusplus +} // extern "C" +} // namespace libyuv +#endif +