From f8c334473b864be6785ec57d94553936380ec207 Mon Sep 17 00:00:00 2001 From: "fbarchard@google.com" Date: Wed, 12 Nov 2014 18:38:06 +0000 Subject: [PATCH] ARGBAttenuate_AVX2 ported to GCC. BUG=269 TESTED=try bots R=harryjin@google.com Review URL: https://webrtc-codereview.appspot.com/29049004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@1162 16f28f9a-4ce2-e073-06de-1de4eb20be90 --- README.chromium | 2 +- include/libyuv/row.h | 2 +- include/libyuv/version.h | 2 +- source/row_posix.cc | 46 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 49 insertions(+), 3 deletions(-) diff --git a/README.chromium b/README.chromium index 9393cd6a1..5c5bb7a1e 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1161 +Version: 1162 License: BSD License File: LICENSE diff --git a/include/libyuv/row.h b/include/libyuv/row.h index 95083511e..9435ddf76 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -204,6 +204,7 @@ extern "C" { #define HAS_ARGBADDROW_AVX2 #define HAS_ARGBSUBTRACTROW_AVX2 #define HAS_ARGBMULTIPLYROW_AVX2 +#define HAS_ARGBATTENUATEROW_AVX2 #endif // The following are require VS2012. @@ -219,7 +220,6 @@ extern "C" { #define HAS_MIRRORROW_AVX2 // Effects: -#define HAS_ARGBATTENUATEROW_AVX2 #define HAS_ARGBMIRRORROW_AVX2 #define HAS_ARGBUNATTENUATEROW_AVX2 #endif // defined(VISUALC_HAS_AVX2) diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 80b81afa0..08e24ed45 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1161 +#define LIBYUV_VERSION 1162 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/row_posix.cc b/source/row_posix.cc index f71752665..2f272bc61 100644 --- a/source/row_posix.cc +++ b/source/row_posix.cc @@ -3549,6 +3549,52 @@ void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) { } #endif // HAS_ARGBATTENUATEROW_SSSE3 +#ifdef HAS_ARGBATTENUATEROW_AVX2 +// Shuffle table duplicating alpha. +static const ulvec8 kShuffleAlpha_AVX2 = { + 6u, 7u, 6u, 7u, 6u, 7u, 128u, 128u, + 14u, 15u, 14u, 15u, 14u, 15u, 128u, 128u +}; +// Attenuate 8 pixels at a time. +void ARGBAttenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width) { + asm volatile ( + "vbroadcastf128 %3,%%ymm4 \n" + "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" + "vpslld $0x18,%%ymm5,%%ymm5 \n" + "sub %0,%1 \n" + + // 8 pixel loop. + LABELALIGN + "1: \n" + "vmovdqu " MEMACCESS(0) ",%%ymm6 \n" + "vpunpcklbw %%ymm6,%%ymm6,%%ymm0 \n" + "vpunpckhbw %%ymm6,%%ymm6,%%ymm1 \n" + "vpshufb %%ymm4,%%ymm0,%%ymm2 \n" + "vpshufb %%ymm4,%%ymm1,%%ymm3 \n" + "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n" + "vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n" + "vpand %%ymm5,%%ymm6,%%ymm6 \n" + "vpsrlw $0x8,%%ymm0,%%ymm0 \n" + "vpsrlw $0x8,%%ymm1,%%ymm1 \n" + "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" + "vpor %%ymm6,%%ymm0,%%ymm0 \n" + "sub $0x8,%2 \n" + MEMOPMEM(vmovdqu,ymm0,0x00,0,1,1) // vmovdqu %%ymm0,(%0,%1) + "lea " MEMLEA(0x20,0) ",%0 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_argb), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : "m"(kShuffleAlpha_AVX2) // %3 + : "memory", "cc" +#if defined(__SSE2__) + , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" +#endif + ); +} +#endif // HAS_ARGBATTENUATEROW_AVX2 + #ifdef HAS_ARGBUNATTENUATEROW_SSE2 // Unattenuate 4 pixels at a time. // aligned to 16 bytes