From bb5ea8e4df7aba47d39a0b6e74ce7cc85e5c8c3a Mon Sep 17 00:00:00 2001 From: "fbarchard@google.com" Date: Thu, 15 Aug 2013 06:40:42 +0000 Subject: [PATCH] Unattenuate port to NaCL BUG=253 TESTED=validator R=nfullagar@google.com Review URL: https://webrtc-codereview.appspot.com/2038004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@763 16f28f9a-4ce2-e073-06de-1de4eb20be90 --- README.chromium | 2 +- include/libyuv/row.h | 4 ++-- include/libyuv/version.h | 2 +- source/row_common.cc | 2 +- source/row_posix.cc | 30 ++++++++++++++++-------------- source/row_win.cc | 6 +++--- 6 files changed, 24 insertions(+), 22 deletions(-) diff --git a/README.chromium b/README.chromium index c3eff621b..8d46525ed 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 762 +Version: 763 License: BSD License File: LICENSE diff --git a/include/libyuv/row.h b/include/libyuv/row.h index b11f29055..cda2e5509 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -51,6 +51,7 @@ extern "C" { #define HAS_ARGBSEPIAROW_SSSE3 #define HAS_ARGBSHADEROW_SSE2 #define HAS_ARGBSUBTRACTROW_SSE2 +#define HAS_ARGBUNATTENUATEROW_SSE2 #define HAS_COMPUTECUMULATIVESUMROW_SSE2 #define HAS_CUMULATIVESUMTOAVERAGEROW_SSE2 #define HAS_INTERPOLATEROW_SSE2 @@ -130,7 +131,6 @@ extern "C" { #define HAS_YUY2TOYROW_SSE2 // Effects: -#define HAS_ARGBUNATTENUATEROW_SSE2 #define HAS_SOBELROW_SSE2 #define HAS_SOBELXROW_SSSE3 #define HAS_SOBELXYROW_SSE2 @@ -1413,7 +1413,7 @@ void ARGBAttenuateRow_Any_NEON(const uint8* src_argb, uint8* dst_argb, int width); // Inverse table for unattenuate, shared by C and SSE2. -extern uint32 fixed_invtbl8[256]; +extern const uint32 fixed_invtbl8[256]; void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width); void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width); void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width); diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 9b955c700..cdb6f1377 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 762 +#define LIBYUV_VERSION 763 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/row_common.cc b/source/row_common.cc index 67ffc96b6..1de05a69c 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -1688,7 +1688,7 @@ void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) { // Reciprocal method is off by 1 on some values. ie 125 // 8.8 fixed point inverse table with 1.0 in upper short and 1 / a in lower. #define T(a) 0x01000000 + (0x10000 / a) -uint32 fixed_invtbl8[256] = { +const uint32 fixed_invtbl8[256] = { 0x01000000, 0x0100ffff, T(0x02), T(0x03), T(0x04), T(0x05), T(0x06), T(0x07), T(0x08), T(0x09), T(0x0a), T(0x0b), T(0x0c), T(0x0d), T(0x0e), T(0x0f), T(0x10), T(0x11), T(0x12), T(0x13), T(0x14), T(0x15), T(0x16), T(0x17), diff --git a/source/row_posix.cc b/source/row_posix.cc index 190773113..ce45a5978 100644 --- a/source/row_posix.cc +++ b/source/row_posix.cc @@ -3920,35 +3920,34 @@ void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) { uintptr_t alpha = 0; asm volatile ( - "sub %0,%1 \n" - // 4 pixel loop. ".p2align 4 \n" "1: \n" - "movdqa (%0),%%xmm0 \n" - "movzb 0x3(%0),%3 \n" + "movdqa "MEMACCESS(0)",%%xmm0 \n" + "movzb "MEMACCESS2(0x03,0)",%3 \n" "punpcklbw %%xmm0,%%xmm0 \n" - "movd 0x0(%4,%3,4),%%xmm2 \n" - "movzb 0x7(%0),%3 \n" - "movd 0x0(%4,%3,4),%%xmm3 \n" + MEMOPREG(movd,0x00,4,3,4,xmm2) // movd 0x0(%4,%3,4),%%xmm2 + "movzb "MEMACCESS2(0x07,0)",%3 \n" + MEMOPREG(movd,0x00,4,3,4,xmm3) // movd 0x0(%4,%3,4),%%xmm3 "pshuflw $0x40,%%xmm2,%%xmm2 \n" "pshuflw $0x40,%%xmm3,%%xmm3 \n" "movlhps %%xmm3,%%xmm2 \n" "pmulhuw %%xmm2,%%xmm0 \n" - "movdqa (%0),%%xmm1 \n" - "movzb 0xb(%0),%3 \n" + "movdqa "MEMACCESS(0)",%%xmm1 \n" + "movzb "MEMACCESS2(0x0b,0)",%3 \n" "punpckhbw %%xmm1,%%xmm1 \n" - "movd 0x0(%4,%3,4),%%xmm2 \n" - "movzb 0xf(%0),%3 \n" - "movd 0x0(%4,%3,4),%%xmm3 \n" + MEMOPREG(movd,0x00,4,3,4,xmm2) // movd 0x0(%4,%3,4),%%xmm2 + "movzb "MEMACCESS2(0x0f,0)",%3 \n" + MEMOPREG(movd,0x00,4,3,4,xmm3) // movd 0x0(%4,%3,4),%%xmm3 "pshuflw $0x40,%%xmm2,%%xmm2 \n" "pshuflw $0x40,%%xmm3,%%xmm3 \n" "movlhps %%xmm3,%%xmm2 \n" "pmulhuw %%xmm2,%%xmm1 \n" + "lea "MEMLEA(0x10,0)",%0 \n" "packuswb %%xmm1,%%xmm0 \n" "sub $0x4,%2 \n" - "movdqa %%xmm0,(%0,%1,1) \n" - "lea 0x10(%0),%0 \n" + "movdqa %%xmm0,"MEMACCESS(1)" \n" + "lea "MEMLEA(0x10,1)",%1 \n" "jg 1b \n" : "+r"(src_argb), // %0 "+r"(dst_argb), // %1 @@ -3956,6 +3955,9 @@ void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, "+r"(alpha) // %3 : "r"(fixed_invtbl8) // %4 : "memory", "cc" +#if defined(__native_client__) && defined(__x86_64__) + , "r14" +#endif #if defined(__SSE2__) , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" #endif diff --git a/source/row_win.cc b/source/row_win.cc index fae39e090..df70c0c19 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -4765,7 +4765,6 @@ void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, mov eax, [esp + 8 + 4] // src_argb0 mov edx, [esp + 8 + 8] // dst_argb mov ecx, [esp + 8 + 12] // width - sub edx, eax align 16 convertloop: @@ -4790,11 +4789,12 @@ void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, pshuflw xmm3, xmm3, 040h // next 4 inv_alpha words movlhps xmm2, xmm3 pmulhuw xmm1, xmm2 // rgb * a + lea eax, [eax + 16] packuswb xmm0, xmm1 sub ecx, 4 - movdqa [eax + edx], xmm0 - lea eax, [eax + 16] + movdqa [edx], xmm0 + lea edx, [edx + 16] jg convertloop pop edi pop esi