mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 17:26:49 +08:00
Port Polynomial AVX2 code to GCC/NaCL
BUG=269 TESTED=untested R=johannkoenig@google.com Review URL: https://webrtc-codereview.appspot.com/2262004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@795 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
afd1d6b4ec
commit
3cb6071cb9
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 794
|
||||
Version: 795
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -90,7 +90,6 @@ extern "C" {
|
||||
#define HAS_YUY2TOYROW_SSE2
|
||||
#endif
|
||||
|
||||
|
||||
// The following are available on all x86 platforms except NaCL x64:
|
||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) && \
|
||||
@ -142,6 +141,16 @@ extern "C" {
|
||||
#define HAS_RGBCOLORTABLEROW_X86
|
||||
#endif
|
||||
|
||||
// The following are available on all x86 platforms, including NaCL, but
|
||||
// require VS2012, llvm or NaCL.
|
||||
// Caveat: llvm 3.1 required, but does not provide a version.
|
||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||
((defined(_M_IX86) && defined(_MSC_VER) && _MSC_VER >= 1700) || \
|
||||
defined(__native_client__) || defined(__llvm__))
|
||||
// Effects:
|
||||
#define HAS_ARGBPOLYNOMIALROW_AVX2
|
||||
#endif
|
||||
|
||||
// The following are Windows only:
|
||||
// TODO(fbarchard): Port to gcc.
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
|
||||
@ -173,7 +182,6 @@ extern "C" {
|
||||
#define HAS_ARGBATTENUATEROW_AVX2
|
||||
#define HAS_ARGBMIRRORROW_AVX2
|
||||
#define HAS_ARGBMULTIPLYROW_AVX2
|
||||
#define HAS_ARGBPOLYNOMIALROW_AVX2
|
||||
#define HAS_ARGBSUBTRACTROW_AVX2
|
||||
#define HAS_ARGBUNATTENUATEROW_AVX2
|
||||
#endif // _MSC_VER >= 1700
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 794
|
||||
#define LIBYUV_VERSION 795
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||
|
||||
@ -5875,6 +5875,52 @@ void ARGBPolynomialRow_SSE2(const uint8* src_argb,
|
||||
}
|
||||
#endif // HAS_ARGBPOLYNOMIALROW_SSE2
|
||||
|
||||
#ifdef HAS_ARGBPOLYNOMIALROW_AVX2
|
||||
void ARGBPolynomialRow_AVX2(const uint8* src_argb,
|
||||
uint8* dst_argb, const float* poly,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"vmovdqu "MEMACCESS(3)",%%xmm4 \n"
|
||||
"vmovdqu "MEMACCESS2(0x10,3)",%%xmm5 \n"
|
||||
"vmovdqu "MEMACCESS2(0x20,3)",%%xmm6 \n"
|
||||
"vmovdqu "MEMACCESS2(0x30,3)",%%xmm7 \n"
|
||||
"vpermq $0x44,%%ymm4,%%ymm4 \n"
|
||||
"vpermq $0x44,%%ymm5,%%ymm5 \n"
|
||||
"vpermq $0x44,%%ymm6,%%ymm6 \n"
|
||||
"vpermq $0x44,%%ymm7,%%ymm7 \n"
|
||||
|
||||
// 2 pixel loop.
|
||||
".p2align 4 \n"
|
||||
"1: \n"
|
||||
"vpmovzxbd "MEMACCESS(0)",%%ymm0 \n" // 2 ARGB pixels
|
||||
"lea "MEMLEA(0x8,0)",%0 \n"
|
||||
"vcvtdq2ps %%ymm0,%%ymm0 \n" // X 8 floats
|
||||
"vmulps %%ymm0,%%ymm0,%%ymm2 \n" // X * X
|
||||
"vmulps %%ymm7,%%ymm0,%%ymm3 \n" // C3 * X
|
||||
"vfmadd132ps %%ymm5,%%ymm4,%%ymm0 \n" // result = C0 + C1 * X
|
||||
"vfmadd231ps %%ymm6,%%ymm2,%%ymm0 \n" // result += C2 * X * X
|
||||
"vfmadd231ps %%ymm3,%%ymm2,%%ymm0 \n" // result += C3 * X * X * X
|
||||
"vcvttps2dq %%ymm0,%%ymm0 \n"
|
||||
"vpackusdw %%ymm0,%%ymm0,%%ymm0 \n"
|
||||
"vpermq $0xd8,%%ymm0,%%ymm0 \n"
|
||||
"vpackuswb %%xmm0,%%xmm0,%%xmm0 \n"
|
||||
"sub $0x2,%2 \n"
|
||||
"movq %%xmm0,"MEMACCESS(1)" \n"
|
||||
"lea "MEMLEA(0x8,1)",%1 \n"
|
||||
"jg 1b \n"
|
||||
"vzeroupper \n"
|
||||
: "+r"(src_argb), // %0
|
||||
"+r"(dst_argb), // %1
|
||||
"+r"(width) // %2
|
||||
: "r"(poly) // %3
|
||||
: "memory", "cc"
|
||||
#if defined(__SSE2__)
|
||||
, "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7"
|
||||
#endif
|
||||
);
|
||||
}
|
||||
#endif // HAS_ARGBPOLYNOMIALROW_AVX2
|
||||
|
||||
#ifdef HAS_ARGBCOLORTABLEROW_X86
|
||||
// Tranform ARGB pixels with color table.
|
||||
void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user