mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 01:06:46 +08:00
Port ARGBExtractAlpha_AVX2 function to windows.
BUG=libyuv:572 TEST=try bots R=wangcheng@google.com, magjed@chromium.org Review URL: https://codereview.chromium.org/2416783004 .
This commit is contained in:
parent
a5e93766a2
commit
5333e94e70
@ -83,6 +83,7 @@ extern "C" {
|
|||||||
#define HAS_ABGRTOYROW_SSSE3
|
#define HAS_ABGRTOYROW_SSSE3
|
||||||
#define HAS_ARGB1555TOARGBROW_SSE2
|
#define HAS_ARGB1555TOARGBROW_SSE2
|
||||||
#define HAS_ARGB4444TOARGBROW_SSE2
|
#define HAS_ARGB4444TOARGBROW_SSE2
|
||||||
|
#define HAS_ARGBEXTRACTALPHAROW_SSE2
|
||||||
#define HAS_ARGBSETROW_X86
|
#define HAS_ARGBSETROW_X86
|
||||||
#define HAS_ARGBSHUFFLEROW_SSE2
|
#define HAS_ARGBSHUFFLEROW_SSE2
|
||||||
#define HAS_ARGBSHUFFLEROW_SSSE3
|
#define HAS_ARGBSHUFFLEROW_SSSE3
|
||||||
@ -97,12 +98,12 @@ extern "C" {
|
|||||||
#define HAS_ARGBTOUVROW_SSSE3
|
#define HAS_ARGBTOUVROW_SSSE3
|
||||||
#define HAS_ARGBTOYJROW_SSSE3
|
#define HAS_ARGBTOYJROW_SSSE3
|
||||||
#define HAS_ARGBTOYROW_SSSE3
|
#define HAS_ARGBTOYROW_SSSE3
|
||||||
#define HAS_ARGBEXTRACTALPHAROW_SSE2
|
|
||||||
#define HAS_BGRATOUVROW_SSSE3
|
#define HAS_BGRATOUVROW_SSSE3
|
||||||
#define HAS_BGRATOYROW_SSSE3
|
#define HAS_BGRATOYROW_SSSE3
|
||||||
#define HAS_COPYROW_ERMS
|
#define HAS_COPYROW_ERMS
|
||||||
#define HAS_COPYROW_SSE2
|
#define HAS_COPYROW_SSE2
|
||||||
#define HAS_H422TOARGBROW_SSSE3
|
#define HAS_H422TOARGBROW_SSSE3
|
||||||
|
#define HAS_HALFFLOATROW_SSE2
|
||||||
#define HAS_I400TOARGBROW_SSE2
|
#define HAS_I400TOARGBROW_SSE2
|
||||||
#define HAS_I422TOARGB1555ROW_SSSE3
|
#define HAS_I422TOARGB1555ROW_SSSE3
|
||||||
#define HAS_I422TOARGB4444ROW_SSSE3
|
#define HAS_I422TOARGB4444ROW_SSSE3
|
||||||
@ -140,7 +141,6 @@ extern "C" {
|
|||||||
#define HAS_YUY2TOUV422ROW_SSE2
|
#define HAS_YUY2TOUV422ROW_SSE2
|
||||||
#define HAS_YUY2TOUVROW_SSE2
|
#define HAS_YUY2TOUVROW_SSE2
|
||||||
#define HAS_YUY2TOYROW_SSE2
|
#define HAS_YUY2TOYROW_SSE2
|
||||||
#define HAS_HALFFLOATROW_SSE2
|
|
||||||
|
|
||||||
// Effects:
|
// Effects:
|
||||||
#define HAS_ARGBADDROW_SSE2
|
#define HAS_ARGBADDROW_SSE2
|
||||||
@ -189,6 +189,7 @@ extern "C" {
|
|||||||
defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
|
defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
|
||||||
#define HAS_ARGBCOPYALPHAROW_AVX2
|
#define HAS_ARGBCOPYALPHAROW_AVX2
|
||||||
#define HAS_ARGBCOPYYTOALPHAROW_AVX2
|
#define HAS_ARGBCOPYYTOALPHAROW_AVX2
|
||||||
|
#define HAS_ARGBEXTRACTALPHAROW_AVX2
|
||||||
#define HAS_ARGBMIRRORROW_AVX2
|
#define HAS_ARGBMIRRORROW_AVX2
|
||||||
#define HAS_ARGBPOLYNOMIALROW_AVX2
|
#define HAS_ARGBPOLYNOMIALROW_AVX2
|
||||||
#define HAS_ARGBSHUFFLEROW_AVX2
|
#define HAS_ARGBSHUFFLEROW_AVX2
|
||||||
@ -199,12 +200,8 @@ extern "C" {
|
|||||||
#define HAS_ARGBTOYROW_AVX2
|
#define HAS_ARGBTOYROW_AVX2
|
||||||
#define HAS_COPYROW_AVX
|
#define HAS_COPYROW_AVX
|
||||||
#define HAS_H422TOARGBROW_AVX2
|
#define HAS_H422TOARGBROW_AVX2
|
||||||
|
#define HAS_HALFFLOATROW_AVX2
|
||||||
#define HAS_I400TOARGBROW_AVX2
|
#define HAS_I400TOARGBROW_AVX2
|
||||||
#if !(defined(_DEBUG) && defined(__i386__))
|
|
||||||
// TODO(fbarchard): fix build error on android_full_debug=1
|
|
||||||
// https://code.google.com/p/libyuv/issues/detail?id=517
|
|
||||||
#define HAS_I422ALPHATOARGBROW_AVX2
|
|
||||||
#endif
|
|
||||||
#define HAS_I422TOARGB1555ROW_AVX2
|
#define HAS_I422TOARGB1555ROW_AVX2
|
||||||
#define HAS_I422TOARGB4444ROW_AVX2
|
#define HAS_I422TOARGB4444ROW_AVX2
|
||||||
#define HAS_I422TOARGBROW_AVX2
|
#define HAS_I422TOARGBROW_AVX2
|
||||||
@ -228,7 +225,6 @@ extern "C" {
|
|||||||
#define HAS_YUY2TOUV422ROW_AVX2
|
#define HAS_YUY2TOUV422ROW_AVX2
|
||||||
#define HAS_YUY2TOUVROW_AVX2
|
#define HAS_YUY2TOUVROW_AVX2
|
||||||
#define HAS_YUY2TOYROW_AVX2
|
#define HAS_YUY2TOYROW_AVX2
|
||||||
#define HAS_HALFFLOATROW_AVX2
|
|
||||||
|
|
||||||
// Effects:
|
// Effects:
|
||||||
#define HAS_ARGBADDROW_AVX2
|
#define HAS_ARGBADDROW_AVX2
|
||||||
@ -237,15 +233,12 @@ extern "C" {
|
|||||||
#define HAS_ARGBSUBTRACTROW_AVX2
|
#define HAS_ARGBSUBTRACTROW_AVX2
|
||||||
#define HAS_ARGBUNATTENUATEROW_AVX2
|
#define HAS_ARGBUNATTENUATEROW_AVX2
|
||||||
#define HAS_BLENDPLANEROW_AVX2
|
#define HAS_BLENDPLANEROW_AVX2
|
||||||
|
|
||||||
|
#if !(defined(_DEBUG) && defined(__i386__))
|
||||||
|
// TODO(fbarchard): fix build error on android_full_debug=1
|
||||||
|
// https://code.google.com/p/libyuv/issues/detail?id=517
|
||||||
|
#define HAS_I422ALPHATOARGBROW_AVX2
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
// The following are available clang 3.4 or gcc 4.7.
|
|
||||||
// TODO(fbarchard): Port to Visual C
|
|
||||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
|
||||||
(defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2)) && \
|
|
||||||
!(defined(__clang__) && defined(_M_IX86) )
|
|
||||||
#define HAS_ARGBEXTRACTALPHAROW_AVX2
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// The following are available for AVX2 Visual C and clangcl 32 bit:
|
// The following are available for AVX2 Visual C and clangcl 32 bit:
|
||||||
@ -279,6 +272,7 @@ extern "C" {
|
|||||||
#define HAS_ARGB4444TOARGBROW_NEON
|
#define HAS_ARGB4444TOARGBROW_NEON
|
||||||
#define HAS_ARGB4444TOUVROW_NEON
|
#define HAS_ARGB4444TOUVROW_NEON
|
||||||
#define HAS_ARGB4444TOYROW_NEON
|
#define HAS_ARGB4444TOYROW_NEON
|
||||||
|
#define HAS_ARGBEXTRACTALPHAROW_NEON
|
||||||
#define HAS_ARGBSETROW_NEON
|
#define HAS_ARGBSETROW_NEON
|
||||||
#define HAS_ARGBTOARGB1555ROW_NEON
|
#define HAS_ARGBTOARGB1555ROW_NEON
|
||||||
#define HAS_ARGBTOARGB4444ROW_NEON
|
#define HAS_ARGBTOARGB4444ROW_NEON
|
||||||
@ -291,7 +285,6 @@ extern "C" {
|
|||||||
#define HAS_ARGBTOUVROW_NEON
|
#define HAS_ARGBTOUVROW_NEON
|
||||||
#define HAS_ARGBTOYJROW_NEON
|
#define HAS_ARGBTOYJROW_NEON
|
||||||
#define HAS_ARGBTOYROW_NEON
|
#define HAS_ARGBTOYROW_NEON
|
||||||
#define HAS_ARGBEXTRACTALPHAROW_NEON
|
|
||||||
#define HAS_BGRATOUVROW_NEON
|
#define HAS_BGRATOUVROW_NEON
|
||||||
#define HAS_BGRATOYROW_NEON
|
#define HAS_BGRATOYROW_NEON
|
||||||
#define HAS_COPYROW_NEON
|
#define HAS_COPYROW_NEON
|
||||||
@ -371,15 +364,15 @@ extern "C" {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
|
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
|
||||||
#define HAS_MIRRORROW_MSA
|
|
||||||
#define HAS_ARGBMIRRORROW_MSA
|
#define HAS_ARGBMIRRORROW_MSA
|
||||||
#define HAS_I422TOYUY2ROW_MSA
|
|
||||||
#define HAS_I422TOUYVYROW_MSA
|
#define HAS_I422TOUYVYROW_MSA
|
||||||
#define HAS_YUY2TOYROW_MSA
|
#define HAS_I422TOYUY2ROW_MSA
|
||||||
#define HAS_YUY2TOUVROW_MSA
|
#define HAS_MIRRORROW_MSA
|
||||||
#define HAS_YUY2TOUV422ROW_MSA
|
|
||||||
#define HAS_UYVYTOYROW_MSA
|
|
||||||
#define HAS_UYVYTOUVROW_MSA
|
#define HAS_UYVYTOUVROW_MSA
|
||||||
|
#define HAS_UYVYTOYROW_MSA
|
||||||
|
#define HAS_YUY2TOUV422ROW_MSA
|
||||||
|
#define HAS_YUY2TOUVROW_MSA
|
||||||
|
#define HAS_YUY2TOYROW_MSA
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|||||||
@ -3445,6 +3445,41 @@ void ARGBExtractAlphaRow_SSE2(const uint8* src_argb, uint8* dst_a, int width) {
|
|||||||
}
|
}
|
||||||
#endif // HAS_ARGBEXTRACTALPHAROW_SSE2
|
#endif // HAS_ARGBEXTRACTALPHAROW_SSE2
|
||||||
|
|
||||||
|
#ifdef HAS_ARGBEXTRACTALPHAROW_AVX2
|
||||||
|
// width in pixels
|
||||||
|
__declspec(naked)
|
||||||
|
void ARGBExtractAlphaRow_AVX2(const uint8* src_argb, uint8* dst_a, int width) {
|
||||||
|
__asm {
|
||||||
|
mov eax, [esp + 4] // src_argb
|
||||||
|
mov edx, [esp + 8] // dst_a
|
||||||
|
mov ecx, [esp + 12] // width
|
||||||
|
vmovdqa ymm4, ymmword ptr kPermdARGBToY_AVX
|
||||||
|
|
||||||
|
extractloop:
|
||||||
|
vmovdqu ymm0, [eax]
|
||||||
|
vmovdqu ymm1, [eax + 32]
|
||||||
|
vpsrld ymm0, ymm0, 24
|
||||||
|
vpsrld ymm1, ymm1, 24
|
||||||
|
vmovdqu ymm2, [eax + 64]
|
||||||
|
vmovdqu ymm3, [eax + 96]
|
||||||
|
lea eax, [eax + 128]
|
||||||
|
vpackssdw ymm0, ymm0, ymm1 // mutates
|
||||||
|
vpsrld ymm2, ymm2, 24
|
||||||
|
vpsrld ymm3, ymm3, 24
|
||||||
|
vpackssdw ymm2, ymm2, ymm3 // mutates
|
||||||
|
vpackuswb ymm0, ymm0, ymm2 // mutates
|
||||||
|
vpermd ymm0, ymm4, ymm0 // unmutate
|
||||||
|
vmovdqu [edx], ymm0
|
||||||
|
lea edx, [edx + 32]
|
||||||
|
sub ecx, 32
|
||||||
|
jg extractloop
|
||||||
|
|
||||||
|
vzeroupper
|
||||||
|
ret
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif // HAS_ARGBEXTRACTALPHAROW_AVX2
|
||||||
|
|
||||||
#ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2
|
#ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2
|
||||||
// width in pixels
|
// width in pixels
|
||||||
__declspec(naked)
|
__declspec(naked)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user