From fa2618ee267642719ec51add88d9d60233cf9bfe Mon Sep 17 00:00:00 2001 From: Frank Barchard Date: Fri, 4 Dec 2015 11:19:41 -0800 Subject: [PATCH] Port BlendPlaneRow_SSSE3 to GCC R=dhrosa@google.com, harryjin@google.com BUG=libyuv:527 Review URL: https://codereview.chromium.org/1490273006 . --- include/libyuv/row.h | 1 + source/row_gcc.cc | 48 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/include/libyuv/row.h b/include/libyuv/row.h index 40e53e15c..deed8a422 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -182,6 +182,7 @@ extern "C" { #define HAS_SOBELXROW_SSE2 #define HAS_SOBELXYROW_SSE2 #define HAS_SOBELYROW_SSE2 +#define HAS_BLENDPLANEROW_SSSE3 #endif // The following are available on all x86 platforms, but diff --git a/source/row_gcc.cc b/source/row_gcc.cc index 6b2bc6fa2..c3ff96282 100644 --- a/source/row_gcc.cc +++ b/source/row_gcc.cc @@ -3467,6 +3467,54 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1, } #endif // HAS_ARGBBLENDROW_SSSE3 + +#ifdef HAS_BLENDPLANEROW_SSSE3 +// Blend 8 pixels at a time. +// =((G2*C2)+(H2*(D2))+32768+127)/256 +void BlendPlaneRow_SSSE3(const uint8* src0, const uint8* src1, + const uint8* alpha, uint8* dst, int width) { + asm volatile ( + "pcmpeqb %%xmm5,%%xmm5 \n" + "psllw $0x8,%%xmm5 \n" + "mov $0x80808080,%%eax \n" + "movd %%eax,%%xmm6 \n" + "pshufd $0x0,%%xmm6,%%xmm6 \n" + "mov $0x807f807f,%%eax \n" + "movd %%eax,%%xmm7 \n" + "pshufd $0x0,%%xmm7,%%xmm7 \n" + "sub %2,%0 \n" + "sub %2,%1 \n" + "sub %2,%3 \n" + + // 8 pixel loop. + LABELALIGN + "1: \n" + "movq (%2),%%xmm0 \n" + "punpcklbw %%xmm0,%%xmm0 \n" + "pxor %%xmm5,%%xmm0 \n" + "movq (%0,%2,1),%%xmm1 \n" + "movq (%1,%2,1),%%xmm2 \n" + "punpcklbw %%xmm2,%%xmm1 \n" + "psubb %%xmm6,%%xmm1 \n" + "pmaddubsw %%xmm1,%%xmm0 \n" + "paddw %%xmm7,%%xmm0 \n" + "psrlw $0x8,%%xmm0 \n" + "packuswb %%xmm0,%%xmm0 \n" + "movq %%xmm0,(%3,%2,1) \n" + "lea 0x8(%2),%2 \n" + "sub $0x8,%4 \n" + "jg 1b \n" + : "+r"(src0), // %0 + "+r"(src1), // %1 + "+r"(alpha), // %2 + "+r"(dst), // %3 + "+r"(width) // %4 + :: "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm5", "xmm6", "xmm7" + ); +} +#endif // HAS_BLENDPLANEROW_SSSE3 + + #ifdef HAS_ARGBATTENUATEROW_SSSE3 // Shuffle table duplicating alpha static uvec8 kShuffleAlpha0 = {