From 6f61eb67097b1602c957b5af87a4736e314d7e17 Mon Sep 17 00:00:00 2001 From: "fbarchard@google.com" Date: Wed, 14 Aug 2013 21:43:44 +0000 Subject: [PATCH] NaCL port of Affine BUG=253 TESTED=ncval passed R=dingkai@google.com Review URL: https://webrtc-codereview.appspot.com/2035004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@761 16f28f9a-4ce2-e073-06de-1de4eb20be90 --- README.chromium | 2 +- include/libyuv/row.h | 2 +- include/libyuv/version.h | 2 +- source/row_posix.cc | 40 +++++++++++++++++++++++++++++++++------- 4 files changed, 36 insertions(+), 10 deletions(-) diff --git a/README.chromium b/README.chromium index 044afc51a..7127bfeb1 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 760 +Version: 761 License: BSD License File: LICENSE diff --git a/include/libyuv/row.h b/include/libyuv/row.h index 5c14fc33a..fecec16ad 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -40,6 +40,7 @@ extern "C" { (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) // Effects: #define HAS_ARGBADDROW_SSE2 +#define HAS_ARGBAFFINEROW_SSE2 #define HAS_ARGBATTENUATEROW_SSSE3 #define HAS_ARGBBLENDROW_SSSE3 #define HAS_ARGBCOLORMATRIXROW_SSSE3 @@ -128,7 +129,6 @@ extern "C" { #define HAS_YUY2TOYROW_SSE2 // Effects: -#define HAS_ARGBAFFINEROW_SSE2 #define HAS_ARGBUNATTENUATEROW_SSE2 #define HAS_INTERPOLATEROW_SSE2 #define HAS_SOBELROW_SSE2 diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 9e4209192..b8f507fba 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 760 +#define LIBYUV_VERSION 761 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/row_posix.cc b/source/row_posix.cc index f7749c96c..e381753c5 100644 --- a/source/row_posix.cc +++ b/source/row_posix.cc @@ -4748,8 +4748,8 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride, intptr_t src_argb_stride_temp = src_argb_stride; intptr_t temp = 0; asm volatile ( - "movq (%3),%%xmm2 \n" - "movq 0x8(%3),%%xmm7 \n" + "movq "MEMACCESS(3)",%%xmm2 \n" + "movq "MEMACCESS2(0x08,3)",%%xmm7 \n" "shl $0x10,%1 \n" "add $0x4,%1 \n" "movd %1,%%xmm5 \n" @@ -4775,6 +4775,7 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride, "packssdw %%xmm1,%%xmm0 \n" "pmaddwd %%xmm5,%%xmm0 \n" #if defined(__x86_64__) +// TODO(fbarchard): use a real movd to zero upper with %w1 for x64 and nacl. "movd %%xmm0,%1 \n" "mov %1,%5 \n" "and $0x0fffffff,%1 \n" @@ -4786,11 +4787,19 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride, "movd %%xmm0,%5 \n" "pshufd $0x39,%%xmm0,%%xmm0 \n" #endif +#if defined(__x86_64__) && defined(__native_client__) + BUNDLEALIGN + "lea (%q0,%q1,1),%%r14d \n" + "movd (%%r15,%%r14,1),%%xmm1 \n" + "lea (%q0,%q5,1),%%r14d \n" + "movd (%%r15,%%r14,1),%%xmm6 \n" +#else "movd (%0,%1,1),%%xmm1 \n" "movd (%0,%5,1),%%xmm6 \n" +#endif "punpckldq %%xmm6,%%xmm1 \n" "addps %%xmm4,%%xmm2 \n" - "movq %%xmm1,(%2) \n" + "movq %%xmm1,"MEMACCESS(2)" \n" #if defined(__x86_64__) "movd %%xmm0,%1 \n" "mov %1,%5 \n" @@ -4801,13 +4810,21 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride, "pshufd $0x39,%%xmm0,%%xmm0 \n" "movd %%xmm0,%5 \n" #endif +#if defined(__x86_64__) && defined(__native_client__) + BUNDLEALIGN + "lea (%q0,%q1,1),%%r14d \n" + "movd (%%r15,%%r14,1),%%xmm0 \n" + "lea (%q0,%q5,1),%%r14d \n" + "movd (%%r15,%%r14,1),%%xmm6 \n" +#else "movd (%0,%1,1),%%xmm0 \n" "movd (%0,%5,1),%%xmm6 \n" +#endif "punpckldq %%xmm6,%%xmm0 \n" "addps %%xmm4,%%xmm3 \n" "sub $0x4,%4 \n" - "movq %%xmm0,0x08(%2) \n" - "lea 0x10(%2),%2 \n" + "movq %%xmm0,"MEMACCESS2(0x08,2)" \n" + "lea "MEMLEA(0x10,2)",%2 \n" "jge 40b \n" "49: \n" @@ -4825,10 +4842,16 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride, #if defined(__x86_64__) "and $0x0fffffff,%1 \n" #endif +#if defined(__x86_64__) && defined(__native_client__) + BUNDLEALIGN + "lea (%q0,%q1,1),%%r14d \n" + "movd (%%r15,%%r14,1),%%xmm0 \n" +#else "movd (%0,%1,1),%%xmm0 \n" +#endif "sub $0x1,%4 \n" - "movd %%xmm0,(%2) \n" - "lea 0x4(%2),%2 \n" + "movd %%xmm0,"MEMACCESS(2)" \n" + "lea "MEMLEA(0x04,2)",%2 \n" "jge 10b \n" "19: \n" : "+r"(src_argb), // %0 @@ -4839,6 +4862,9 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride, "+r"(temp) // %5 : : "memory", "cc" +#if defined(__native_client__) && defined(__x86_64__) + , "r14" +#endif #if defined(__SSE2__) , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" #endif