From 0d41aee26b1e6a01e7beb2197e0edff25085ef59 Mon Sep 17 00:00:00 2001 From: "fbarchard@google.com" Date: Thu, 8 Aug 2013 23:52:34 +0000 Subject: [PATCH] Port compare functions to Nacl BUG=253 TEST=none R=nfullagar@google.com Review URL: https://webrtc-codereview.appspot.com/1998004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@752 16f28f9a-4ce2-e073-06de-1de4eb20be90 --- README.chromium | 2 +- include/libyuv/version.h | 2 +- source/compare.cc | 2 -- source/compare_posix.cc | 33 ++++++++++++++++++++------------- source/compare_win.cc | 4 ++-- 5 files changed, 24 insertions(+), 19 deletions(-) diff --git a/README.chromium b/README.chromium index c6fba999b..e7ada653c 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 751 +Version: 752 License: BSD License File: LICENSE diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 86e756ce9..14a6c1b28 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 751 +#define LIBYUV_VERSION 752 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/compare.cc b/source/compare.cc index 93935b1f3..1759a9336 100644 --- a/source/compare.cc +++ b/source/compare.cc @@ -31,7 +31,6 @@ uint32 HashDjb2_C(const uint8* src, int count, uint32 seed); // This module is for Visual C x86 #if !defined(LIBYUV_DISABLE_X86) && \ - !(defined(__native_client__) && defined(__x86_64__)) && \ (defined(_M_IX86) || \ (defined(__x86_64__) || (defined(__i386__) && !defined(__pic__)))) #define HAS_HASHDJB2_SSE41 @@ -76,7 +75,6 @@ uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count); uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count); #endif #if !defined(LIBYUV_DISABLE_X86) && \ - !(defined(__native_client__) && defined(__x86_64__)) && \ (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) #define HAS_SUMSQUAREERROR_SSE2 uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count); diff --git a/source/compare_posix.cc b/source/compare_posix.cc index b97a6eaa5..da6d9a160 100644 --- a/source/compare_posix.cc +++ b/source/compare_posix.cc @@ -16,21 +16,28 @@ namespace libyuv { extern "C" { #endif -#if !defined(LIBYUV_DISABLE_X86) && \ - !(defined(__native_client__) && defined(__x86_64__)) && \ - (defined(__x86_64__) || defined(__i386__)) +#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__)) + +#if defined(__native_client__) && defined(__x86_64__) +#define MEMACCESS(base) "%%nacl:(%%r15,%q" #base ")" +#define MEMLEA(offset, base) #offset "(%q" #base ")" +#else +#define MEMACCESS(base) "(%" #base ")" +#define MEMLEA(offset, base) #offset "(%" #base ")" +#endif uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) { uint32 sse; - asm volatile ( + asm volatile ( // NOLINT "pxor %%xmm0,%%xmm0 \n" "pxor %%xmm5,%%xmm5 \n" "sub %0,%1 \n" ".p2align 4 \n" "1: \n" - "movdqa (%0),%%xmm1 \n" - "movdqa (%0,%1,1),%%xmm2 \n" - "lea 0x10(%0),%0 \n" + "movdqa "MEMACCESS(0)",%%xmm1 \n" + "lea "MEMLEA(0x10, 0)",%0 \n" + "movdqa "MEMACCESS(1)",%%xmm2 \n" + "lea "MEMLEA(0x10, 1)",%1 \n" "sub $0x10,%2 \n" "movdqa %%xmm1,%%xmm3 \n" "psubusb %%xmm2,%%xmm1 \n" @@ -60,14 +67,13 @@ uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) { #if defined(__SSE2__) , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" #endif - ); + ); // NOLINT return sse; } #endif // defined(__x86_64__) || defined(__i386__) #if !defined(LIBYUV_DISABLE_X86) && \ - !(defined(__native_client__) && defined(__x86_64__)) && \ (defined(__x86_64__) || (defined(__i386__) && !defined(__pic__))) #define HAS_HASHDJB2_SSE41 static uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 }; // 33 ^ 16 @@ -98,14 +104,14 @@ static uvec32 kHashMul3 = { uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) { uint32 hash; - asm volatile ( + asm volatile ( // NOLINT "movd %2,%%xmm0 \n" "pxor %%xmm7,%%xmm7 \n" "movdqa %4,%%xmm6 \n" ".p2align 4 \n" "1: \n" - "movdqu (%0),%%xmm1 \n" - "lea 0x10(%0),%0 \n" + "movdqu "MEMACCESS(0)",%%xmm1 \n" + "lea "MEMLEA(0x10, 0)",%0 \n" "pmulld %%xmm6,%%xmm0 \n" "movdqa %5,%%xmm5 \n" "movdqa %%xmm1,%%xmm2 \n" @@ -149,7 +155,7 @@ uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) { #if defined(__SSE2__) , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" #endif - ); + ); // NOLINT return hash; } #endif // defined(__x86_64__) || (defined(__i386__) && !defined(__pic__))) @@ -158,3 +164,4 @@ uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) { } // extern "C" } // namespace libyuv #endif + diff --git a/source/compare_win.cc b/source/compare_win.cc index fbd6e8787..dd2661073 100644 --- a/source/compare_win.cc +++ b/source/compare_win.cc @@ -26,13 +26,13 @@ uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) { mov ecx, [esp + 12] // count pxor xmm0, xmm0 pxor xmm5, xmm5 - sub edx, eax align 16 wloop: movdqa xmm1, [eax] - movdqa xmm2, [eax + edx] lea eax, [eax + 16] + movdqa xmm2, [edx] + lea edx, [edx + 16] sub ecx, 16 movdqa xmm3, xmm1 // abs trick psubusb xmm1, xmm2