From 6398e1d4ebc50935a29c3532abcc1a73568fa4fa Mon Sep 17 00:00:00 2001 From: "fbarchard@google.com" Date: Wed, 11 Jul 2012 19:12:32 +0000 Subject: [PATCH] Port ARGBShade to Mac BUG=none TEST=none Review URL: https://webrtc-codereview.appspot.com/679009 git-svn-id: http://libyuv.googlecode.com/svn/trunk@299 16f28f9a-4ce2-e073-06de-1de4eb20be90 --- README.chromium | 2 +- include/libyuv/version.h | 2 +- source/row.h | 2 +- source/row_posix.cc | 37 +++++++++++++++++++++++++++++++++++++ 4 files changed, 40 insertions(+), 3 deletions(-) diff --git a/README.chromium b/README.chromium index 5928823f8..57582730e 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 298 +Version: 299 License: BSD License File: LICENSE diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 3f3f947bd..8aabbb247 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 298 +#define LIBYUV_VERSION 299 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/row.h b/source/row.h index bc59694a6..9b720b6da 100644 --- a/source/row.h +++ b/source/row.h @@ -80,12 +80,12 @@ extern "C" { #define HAS_ARGBQUANTIZEROW_SSE2 #define HAS_COMPUTECUMULATIVESUMROW_SSE2 #define HAS_CUMULATIVESUMTOAVERAGE_SSE2 +#define HAS_ARGBSHADE_SSE2 #endif // The following are Windows only: #if !defined(YUV_DISABLE_ASM) && defined(_M_IX86) #define HAS_ARGBCOLORTABLEROW_X86 -#define HAS_ARGBSHADE_SSE2 #endif // The following are disabled when SSSE3 is available: diff --git a/source/row_posix.cc b/source/row_posix.cc index aa24149ca..0041dcece 100644 --- a/source/row_posix.cc +++ b/source/row_posix.cc @@ -3178,7 +3178,44 @@ void CumulativeSumToAverage_SSE2(const int32* topleft, const int32* botleft, ); } #endif // HAS_CUMULATIVESUMTOAVERAGE_SSE2 +#ifdef HAS_ARGBSHADE_SSE2 +// Shade 4 pixels at a time by specified value. +// Aligned to 16 bytes. +void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width, + uint32 value) { + asm volatile ( + "movd %3,%%xmm2 \n" + "sub %0,%1 \n" + "punpcklbw %%xmm2,%%xmm2 \n" + "punpcklqdq %%xmm2,%%xmm2 \n" + // 4 pixel loop. + ".p2align 2 \n" + "1: \n" + "movdqa (%0),%%xmm0 \n" + "movdqa %%xmm0,%%xmm1 \n" + "punpcklbw %%xmm0,%%xmm0 \n" + "punpckhbw %%xmm1,%%xmm1 \n" + "pmulhuw %%xmm2,%%xmm0 \n" + "pmulhuw %%xmm2,%%xmm1 \n" + "psrlw $0x8,%%xmm0 \n" + "psrlw $0x8,%%xmm1 \n" + "packuswb %%xmm1,%%xmm0 \n" + "sub $0x4,%2 \n" + "movdqa %%xmm0,(%0,%1,1) \n" + "lea 0x10(%0),%0 \n" + "jg 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : "r"(value) // %3 + : "memory", "cc" +#if defined(__SSE2__) + , "xmm0", "xmm1", "xmm2" +#endif + ); +} +#endif // HAS_ARGBSHADE_SSE2 #endif // defined(__x86_64__) || defined(__i386__)