From f78509b3bef818ab9cbda152efc6a614aaa967a1 Mon Sep 17 00:00:00 2001 From: "nfullagar@google.com" Date: Wed, 14 Aug 2013 23:09:44 +0000 Subject: [PATCH] Port Interpolate SSE2 to NaCl BUG=253 TEST=libyuv_test,try bots R=fbarchard@google.com Review URL: https://webrtc-codereview.appspot.com/2013005 git-svn-id: http://libyuv.googlecode.com/svn/trunk@762 16f28f9a-4ce2-e073-06de-1de4eb20be90 --- README.chromium | 2 +- include/libyuv/row.h | 4 +- include/libyuv/version.h | 2 +- source/row_posix.cc | 100 ++++++++++++++++++++++++--------------- 4 files changed, 66 insertions(+), 42 deletions(-) diff --git a/README.chromium b/README.chromium index 7127bfeb1..c3eff621b 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 761 +Version: 762 License: BSD License File: LICENSE diff --git a/include/libyuv/row.h b/include/libyuv/row.h index fecec16ad..b11f29055 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -53,6 +53,8 @@ extern "C" { #define HAS_ARGBSUBTRACTROW_SSE2 #define HAS_COMPUTECUMULATIVESUMROW_SSE2 #define HAS_CUMULATIVESUMTOAVERAGEROW_SSE2 +#define HAS_INTERPOLATEROW_SSE2 +#define HAS_INTERPOLATEROW_SSSE3 // Conversions: #define HAS_ARGBSHUFFLEROW_SSSE3 @@ -63,7 +65,6 @@ extern "C" { #define HAS_COPYROW_X86 #define HAS_FIXEDDIV_X86 #define HAS_I400TOARGBROW_SSE2 -#define HAS_INTERPOLATEROW_SSSE3 #define HAS_SETROW_X86 #endif @@ -130,7 +131,6 @@ extern "C" { // Effects: #define HAS_ARGBUNATTENUATEROW_SSE2 -#define HAS_INTERPOLATEROW_SSE2 #define HAS_SOBELROW_SSE2 #define HAS_SOBELXROW_SSSE3 #define HAS_SOBELXYROW_SSE2 diff --git a/include/libyuv/version.h b/include/libyuv/version.h index b8f507fba..9b955c700 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 761 +#define LIBYUV_VERSION 762 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/row_posix.cc b/source/row_posix.cc index e381753c5..190773113 100644 --- a/source/row_posix.cc +++ b/source/row_posix.cc @@ -5017,9 +5017,10 @@ void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr, // General purpose row blend. ".p2align 4 \n" + BUNDLEALIGN "1: \n" - "movdqa (%1),%%xmm0 \n" - "movdqa (%1,%4,1),%%xmm2 \n" + "movdqa "MEMACCESS(1)",%%xmm0 \n" + MEMOPREG(movdqa,0x00,1,4,1,xmm2) // movdqa (%1,%4,1),%%xmm2 "movdqa %%xmm0,%%xmm1 \n" "movdqa %%xmm2,%%xmm3 \n" "punpcklbw %%xmm4,%%xmm2 \n" @@ -5036,56 +5037,64 @@ void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr, "paddw %%xmm3,%%xmm1 \n" "packuswb %%xmm1,%%xmm0 \n" "sub $0x10,%2 \n" - "movdqa %%xmm0,(%1,%0,1) \n" - "lea 0x10(%1),%1 \n" + BUNDLEALIGN + MEMOPMEM(movdqa,xmm0,0x00,1,0,1) // movdqa %%xmm0,(%1,%0,1) + "lea "MEMLEA(0x10,1)",%1 \n" "jg 1b \n" "jmp 99f \n" // Blend 25 / 75. ".p2align 4 \n" + BUNDLEALIGN "25: \n" - "movdqa (%1),%%xmm0 \n" - "movdqa (%1,%4,1),%%xmm1 \n" + "movdqa "MEMACCESS(1)",%%xmm0 \n" + MEMOPREG(movdqa,0x00,1,4,1,xmm1) // movdqa (%1,%4,1),%%xmm1 "pavgb %%xmm1,%%xmm0 \n" "pavgb %%xmm1,%%xmm0 \n" "sub $0x10,%2 \n" - "movdqa %%xmm0,(%1,%0,1) \n" - "lea 0x10(%1),%1 \n" + BUNDLEALIGN + MEMOPMEM(movdqa,xmm0,0x00,1,0,1) // movdqa %%xmm0,(%1,%0,1) + "lea "MEMLEA(0x10,1)",%1 \n" "jg 25b \n" "jmp 99f \n" // Blend 50 / 50. ".p2align 4 \n" + BUNDLEALIGN "50: \n" - "movdqa (%1),%%xmm0 \n" - "movdqa (%1,%4,1),%%xmm1 \n" + "movdqa "MEMACCESS(1)",%%xmm0 \n" + MEMOPREG(movdqa,0x00,1,4,1,xmm1) // movdqa (%1,%4,1),%%xmm1 "pavgb %%xmm1,%%xmm0 \n" "sub $0x10,%2 \n" - "movdqa %%xmm0,(%1,%0,1) \n" - "lea 0x10(%1),%1 \n" + BUNDLEALIGN + MEMOPMEM(movdqa,xmm0,0x00,1,0,1) // movdqa %%xmm0,(%1,%0,1) + "lea "MEMLEA(0x10,1)",%1 \n" "jg 50b \n" "jmp 99f \n" // Blend 75 / 25. ".p2align 4 \n" + BUNDLEALIGN "75: \n" - "movdqa (%1),%%xmm1 \n" - "movdqa (%1,%4,1),%%xmm0 \n" + "movdqa "MEMACCESS(1)",%%xmm1 \n" + MEMOPREG(movdqa,0x00,1,4,1,xmm0) // movdqa (%1,%4,1),%%xmm0 "pavgb %%xmm1,%%xmm0 \n" "pavgb %%xmm1,%%xmm0 \n" "sub $0x10,%2 \n" - "movdqa %%xmm0,(%1,%0,1) \n" - "lea 0x10(%1),%1 \n" + BUNDLEALIGN + MEMOPMEM(movdqa,xmm0,0x00,1,0,1) // movdqa %%xmm0,(%1,%0,1) + "lea "MEMLEA(0x10,1)",%1 \n" "jg 75b \n" "jmp 99f \n" // Blend 100 / 0 - Copy row unchanged. ".p2align 4 \n" + BUNDLEALIGN "100: \n" - "movdqa (%1),%%xmm0 \n" + "movdqa "MEMACCESS(1)",%%xmm0 \n" "sub $0x10,%2 \n" - "movdqa %%xmm0,(%1,%0,1) \n" - "lea 0x10(%1),%1 \n" + MEMOPMEM(movdqa,xmm0,0x00,1,0,1) // movdqa %%xmm0,(%1,%0,1) + "lea "MEMLEA(0x10,1)",%1 \n" "jg 100b \n" "99: \n" @@ -5095,6 +5104,9 @@ void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr, "+r"(source_y_fraction) // %3 : "r"(static_cast(src_stride)) // %4 : "memory", "cc" +#if defined(__native_client__) && defined(__x86_64__) + , "r14" +#endif #if defined(__SSE2__) , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" #endif @@ -5247,9 +5259,10 @@ void InterpolateRow_Unaligned_SSE2(uint8* dst_ptr, const uint8* src_ptr, // General purpose row blend. ".p2align 4 \n" + BUNDLEALIGN "1: \n" - "movdqu (%1),%%xmm0 \n" - "movdqu (%1,%4,1),%%xmm2 \n" + "movdqu "MEMACCESS(1)",%%xmm0 \n" + MEMOPREG(movdqu,0x00,1,4,1,xmm2) // movdqu (%1,%4,1),%%xmm2 "movdqu %%xmm0,%%xmm1 \n" "movdqu %%xmm2,%%xmm3 \n" "punpcklbw %%xmm4,%%xmm2 \n" @@ -5266,56 +5279,64 @@ void InterpolateRow_Unaligned_SSE2(uint8* dst_ptr, const uint8* src_ptr, "paddw %%xmm3,%%xmm1 \n" "packuswb %%xmm1,%%xmm0 \n" "sub $0x10,%2 \n" - "movdqu %%xmm0,(%1,%0,1) \n" - "lea 0x10(%1),%1 \n" + BUNDLEALIGN + MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1) + "lea "MEMLEA(0x10,1)",%1 \n" "jg 1b \n" "jmp 99f \n" // Blend 25 / 75. ".p2align 4 \n" + BUNDLEALIGN "25: \n" - "movdqu (%1),%%xmm0 \n" - "movdqu (%1,%4,1),%%xmm1 \n" + "movdqu "MEMACCESS(1)",%%xmm0 \n" + MEMOPREG(movdqu,0x00,1,4,1,xmm1) // movdqu (%1,%4,1),%%xmm1 "pavgb %%xmm1,%%xmm0 \n" "pavgb %%xmm1,%%xmm0 \n" "sub $0x10,%2 \n" - "movdqu %%xmm0,(%1,%0,1) \n" - "lea 0x10(%1),%1 \n" + BUNDLEALIGN + MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1) + "lea "MEMLEA(0x10,1)",%1 \n" "jg 25b \n" "jmp 99f \n" // Blend 50 / 50. ".p2align 4 \n" + BUNDLEALIGN "50: \n" - "movdqu (%1),%%xmm0 \n" - "movdqu (%1,%4,1),%%xmm1 \n" + "movdqu "MEMACCESS(1)",%%xmm0 \n" + MEMOPREG(movdqu,0x00,1,4,1,xmm1) // movdqu (%1,%4,1),%%xmm1 "pavgb %%xmm1,%%xmm0 \n" "sub $0x10,%2 \n" - "movdqu %%xmm0,(%1,%0,1) \n" - "lea 0x10(%1),%1 \n" + BUNDLEALIGN + MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1) + "lea "MEMLEA(0x10,1)",%1 \n" "jg 50b \n" "jmp 99f \n" // Blend 75 / 25. ".p2align 4 \n" + BUNDLEALIGN "75: \n" - "movdqu (%1),%%xmm1 \n" - "movdqu (%1,%4,1),%%xmm0 \n" + "movdqu "MEMACCESS(1)",%%xmm1 \n" + MEMOPREG(movdqu,0x00,1,4,1,xmm0) // movdqu (%1,%4,1),%%xmm0 "pavgb %%xmm1,%%xmm0 \n" "pavgb %%xmm1,%%xmm0 \n" "sub $0x10,%2 \n" - "movdqu %%xmm0,(%1,%0,1) \n" - "lea 0x10(%1),%1 \n" + BUNDLEALIGN + MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1) + "lea "MEMLEA(0x10,1)",%1 \n" "jg 75b \n" "jmp 99f \n" // Blend 100 / 0 - Copy row unchanged. ".p2align 4 \n" + BUNDLEALIGN "100: \n" - "movdqu (%1),%%xmm0 \n" + "movdqu "MEMACCESS(1)",%%xmm0 \n" "sub $0x10,%2 \n" - "movdqu %%xmm0,(%1,%0,1) \n" - "lea 0x10(%1),%1 \n" + MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1) + "lea "MEMLEA(0x10,1)",%1 \n" "jg 100b \n" "99: \n" @@ -5325,6 +5346,9 @@ void InterpolateRow_Unaligned_SSE2(uint8* dst_ptr, const uint8* src_ptr, "+r"(source_y_fraction) // %3 : "r"(static_cast(src_stride)) // %4 : "memory", "cc" +#if defined(__native_client__) && defined(__x86_64__) + , "r14" +#endif #if defined(__SSE2__) , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" #endif