mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2026-01-01 03:12:16 +08:00
Add ARGBExtractAlpha_AVX2 function
Port SSE2 version to AVX2. BUG=libyuv:572 TEST=/usr/local/google/home/fbarchard/intelsde/sde -skx -- out/Release/libyuv_unittest --gtest_filter=*Extract* R=wangcheng@google.com, magjed@chromium.org Review URL: https://codereview.chromium.org/2420553002 .
This commit is contained in:
parent
9fb3c31b06
commit
a5e93766a2
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 1625
|
||||
Version: 1626
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -239,6 +239,15 @@ extern "C" {
|
||||
#define HAS_BLENDPLANEROW_AVX2
|
||||
#endif
|
||||
|
||||
|
||||
// The following are available clang 3.4 or gcc 4.7.
|
||||
// TODO(fbarchard): Port to Visual C
|
||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||
(defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2)) && \
|
||||
!(defined(__clang__) && defined(_M_IX86) )
|
||||
#define HAS_ARGBEXTRACTALPHAROW_AVX2
|
||||
#endif
|
||||
|
||||
// The following are available for AVX2 Visual C and clangcl 32 bit:
|
||||
// TODO(fbarchard): Port to gcc.
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \
|
||||
@ -880,9 +889,12 @@ void ARGBCopyAlphaRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb,
|
||||
|
||||
void ARGBExtractAlphaRow_C(const uint8* src_argb, uint8* dst_a, int width);
|
||||
void ARGBExtractAlphaRow_SSE2(const uint8* src_argb, uint8* dst_a, int width);
|
||||
void ARGBExtractAlphaRow_AVX2(const uint8* src_argb, uint8* dst_a, int width);
|
||||
void ARGBExtractAlphaRow_NEON(const uint8* src_argb, uint8* dst_a, int width);
|
||||
void ARGBExtractAlphaRow_Any_SSE2(const uint8* src_argb, uint8* dst_a,
|
||||
int width);
|
||||
void ARGBExtractAlphaRow_Any_AVX2(const uint8* src_argb, uint8* dst_a,
|
||||
int width);
|
||||
void ARGBExtractAlphaRow_Any_NEON(const uint8* src_argb, uint8* dst_a,
|
||||
int width);
|
||||
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 1625
|
||||
#define LIBYUV_VERSION 1626
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
@ -2702,6 +2702,12 @@ int ARGBExtractAlpha(const uint8* src_argb, int src_stride,
|
||||
: ARGBExtractAlphaRow_Any_SSE2;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBEXTRACTALPHAROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
ARGBExtractAlphaRow = IS_ALIGNED(width, 32) ? ARGBExtractAlphaRow_AVX2
|
||||
: ARGBExtractAlphaRow_Any_AVX2;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBEXTRACTALPHAROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
ARGBExtractAlphaRow = IS_ALIGNED(width, 16) ? ARGBExtractAlphaRow_NEON
|
||||
|
||||
@ -474,6 +474,9 @@ ANY11(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, 0, 4, 4, 7)
|
||||
#ifdef HAS_ARGBEXTRACTALPHAROW_SSE2
|
||||
ANY11(ARGBExtractAlphaRow_Any_SSE2, ARGBExtractAlphaRow_SSE2, 0, 4, 1, 7)
|
||||
#endif
|
||||
#ifdef HAS_ARGBEXTRACTALPHAROW_AVX2
|
||||
ANY11(ARGBExtractAlphaRow_Any_AVX2, ARGBExtractAlphaRow_AVX2, 0, 4, 1, 32)
|
||||
#endif
|
||||
#ifdef HAS_ARGBEXTRACTALPHAROW_NEON
|
||||
ANY11(ARGBExtractAlphaRow_Any_NEON, ARGBExtractAlphaRow_NEON, 0, 4, 1, 15)
|
||||
#endif
|
||||
|
||||
@ -2860,6 +2860,47 @@ void ARGBExtractAlphaRow_SSE2(const uint8* src_argb, uint8* dst_a, int width) {
|
||||
}
|
||||
#endif // HAS_ARGBEXTRACTALPHAROW_SSE2
|
||||
|
||||
#ifdef HAS_ARGBEXTRACTALPHAROW_AVX2
|
||||
static const uvec8 kShuffleAlphaShort_AVX2 = {
|
||||
3u, 128u, 128u, 128u, 7u, 128u, 128u, 128u,
|
||||
11u, 128u, 128u, 128u, 15u, 128u, 128u, 128u
|
||||
};
|
||||
|
||||
void ARGBExtractAlphaRow_AVX2(const uint8* src_argb, uint8* dst_a, int width) {
|
||||
asm volatile (
|
||||
"vmovdqa %3,%%ymm4 \n"
|
||||
"vbroadcastf128 %4,%%ymm5 \n"
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"vmovdqu " MEMACCESS(0) ", %%ymm0 \n"
|
||||
"vmovdqu " MEMACCESS2(0x20, 0) ", %%ymm1 \n"
|
||||
"vpshufb %%ymm5,%%ymm0,%%ymm0 \n" // vpsrld $0x18, %%ymm0
|
||||
"vpshufb %%ymm5,%%ymm1,%%ymm1 \n"
|
||||
"vmovdqu " MEMACCESS2(0x40, 0) ", %%ymm2 \n"
|
||||
"vmovdqu " MEMACCESS2(0x60, 0) ", %%ymm3 \n"
|
||||
"lea " MEMLEA(0x80, 0) ", %0 \n"
|
||||
"vpackssdw %%ymm1, %%ymm0, %%ymm0 \n" // mutates
|
||||
"vpshufb %%ymm5,%%ymm2,%%ymm2 \n"
|
||||
"vpshufb %%ymm5,%%ymm3,%%ymm3 \n"
|
||||
"vpackssdw %%ymm3, %%ymm2, %%ymm2 \n" // mutates
|
||||
"vpackuswb %%ymm2,%%ymm0,%%ymm0 \n" // mutates.
|
||||
"vpermd %%ymm0,%%ymm4,%%ymm0 \n" // unmutate.
|
||||
"vmovdqu %%ymm0," MEMACCESS(1) " \n"
|
||||
"lea " MEMLEA(0x20,1) ",%1 \n"
|
||||
"sub $0x20, %2 \n"
|
||||
"jg 1b \n"
|
||||
"vzeroupper \n"
|
||||
: "+r"(src_argb), // %0
|
||||
"+r"(dst_a), // %1
|
||||
"+rm"(width) // %2
|
||||
: "m"(kPermdARGBToY_AVX), // %3
|
||||
"m"(kShuffleAlphaShort_AVX2) // %4
|
||||
: "memory", "cc"
|
||||
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
|
||||
);
|
||||
}
|
||||
#endif // HAS_ARGBEXTRACTALPHAROW_AVX2
|
||||
|
||||
#ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2
|
||||
// width in pixels
|
||||
void ARGBCopyYToAlphaRow_SSE2(const uint8* src, uint8* dst, int width) {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user