mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 01:06:46 +08:00
ARGBPolynomial ported to gcc
BUG=265 TESTED=try bots R=changjun.yang@intel.com Review URL: https://webrtc-codereview.appspot.com/2191007 git-svn-id: http://libyuv.googlecode.com/svn/trunk@786 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
7e7c7753ba
commit
c3b04796c2
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 785
|
||||
Version: 786
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -98,6 +98,7 @@ extern "C" {
|
||||
// Conversions:
|
||||
#define HAS_ARGB1555TOARGBROW_SSE2
|
||||
#define HAS_ARGB4444TOARGBROW_SSE2
|
||||
#define HAS_ARGBPOLYNOMIALROW_SSE2
|
||||
#define HAS_ARGBTOARGB1555ROW_SSE2
|
||||
#define HAS_ARGBTOARGB4444ROW_SSE2
|
||||
#define HAS_ARGBTORAWROW_SSSE3
|
||||
@ -142,7 +143,6 @@ extern "C" {
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
|
||||
// Effects:
|
||||
#define HAS_ARGBCOLORTABLEROW_X86
|
||||
#define HAS_ARGBPOLYNOMIALROW_SSE2
|
||||
#define HAS_RGBCOLORTABLEROW_X86
|
||||
#define HAS_ARGBLUMACOLORTABLEROW_SSE2
|
||||
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 785
|
||||
#define LIBYUV_VERSION 786
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||
|
||||
@ -2075,13 +2075,22 @@ void ARGBLumaColorTableRow_C(const uint8* src_argb,
|
||||
dst_argb[3] = src_argb[3];
|
||||
const uint8* luma1 = RGBToYJx256(src_argb[6], src_argb[5], src_argb[4]) +
|
||||
luma;
|
||||
dst_argb[4] = luma0[src_argb[4]];
|
||||
dst_argb[5] = luma0[src_argb[5]];
|
||||
dst_argb[6] = luma0[src_argb[6]];
|
||||
dst_argb[4] = luma1[src_argb[4]];
|
||||
dst_argb[5] = luma1[src_argb[5]];
|
||||
dst_argb[6] = luma1[src_argb[6]];
|
||||
dst_argb[7] = src_argb[7];
|
||||
src_argb += 8;
|
||||
dst_argb += 8;
|
||||
}
|
||||
if (width & 1) {
|
||||
// Luminance in rows, color values in columns.
|
||||
const uint8* luma0 = RGBToYJx256(src_argb[2], src_argb[1], src_argb[0]) +
|
||||
luma;
|
||||
dst_argb[0] = luma0[src_argb[0]];
|
||||
dst_argb[1] = luma0[src_argb[1]];
|
||||
dst_argb[2] = luma0[src_argb[2]];
|
||||
dst_argb[3] = src_argb[3];
|
||||
}
|
||||
}
|
||||
|
||||
#undef clamp0
|
||||
|
||||
@ -5816,6 +5816,65 @@ int FixedDiv_X86(int num, int div) {
|
||||
return num;
|
||||
}
|
||||
#endif // HAS_FIXEDDIV_X86
|
||||
|
||||
#ifdef HAS_ARGBPOLYNOMIALROW_SSE2
|
||||
void ARGBPolynomialRow_SSE2(const uint8* src_argb,
|
||||
uint8* dst_argb, const float* poly,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"pxor %%xmm3,%%xmm3 \n"
|
||||
|
||||
// 2 pixel loop.
|
||||
".p2align 4 \n"
|
||||
"1: \n"
|
||||
"movq (%0),%%xmm0 \n"
|
||||
"lea 0x8(%0),%0 \n"
|
||||
"punpcklbw %%xmm3,%%xmm0 \n"
|
||||
"movdqa %%xmm0,%%xmm4 \n"
|
||||
"punpcklwd %%xmm3,%%xmm0 \n"
|
||||
"punpckhwd %%xmm3,%%xmm4 \n"
|
||||
"cvtdq2ps %%xmm0,%%xmm0 \n"
|
||||
"cvtdq2ps %%xmm4,%%xmm4 \n"
|
||||
"movdqa %%xmm0,%%xmm1 \n"
|
||||
"movdqa %%xmm4,%%xmm5 \n"
|
||||
"mulps 0x10(%3),%%xmm0 \n"
|
||||
"mulps 0x10(%3),%%xmm4 \n"
|
||||
"addps (%3),%%xmm0 \n"
|
||||
"addps (%3),%%xmm4 \n"
|
||||
"movdqa %%xmm1,%%xmm2 \n"
|
||||
"movdqa %%xmm5,%%xmm6 \n"
|
||||
"mulps %%xmm1,%%xmm2 \n"
|
||||
"mulps %%xmm5,%%xmm6 \n"
|
||||
"mulps %%xmm2,%%xmm1 \n"
|
||||
"mulps %%xmm6,%%xmm5 \n"
|
||||
"mulps 0x20(%3),%%xmm2 \n"
|
||||
"mulps 0x20(%3),%%xmm6 \n"
|
||||
"mulps 0x30(%3),%%xmm1 \n"
|
||||
"mulps 0x30(%3),%%xmm5 \n"
|
||||
"addps %%xmm2,%%xmm0 \n"
|
||||
"addps %%xmm6,%%xmm4 \n"
|
||||
"addps %%xmm1,%%xmm0 \n"
|
||||
"addps %%xmm5,%%xmm4 \n"
|
||||
"cvttps2dq %%xmm0,%%xmm0 \n"
|
||||
"cvttps2dq %%xmm4,%%xmm4 \n"
|
||||
"packuswb %%xmm4,%%xmm0 \n"
|
||||
"packuswb %%xmm0,%%xmm0 \n"
|
||||
"sub $0x2,%2 \n"
|
||||
"movq %%xmm0,(%1) \n"
|
||||
"lea 0x8(%1),%1 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src_argb), // %0
|
||||
"+r"(dst_argb), // %1
|
||||
"+r"(width) // %2
|
||||
: "r"(poly) // %3
|
||||
: "memory", "cc"
|
||||
#if defined(__SSE2__)
|
||||
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
|
||||
#endif
|
||||
);
|
||||
}
|
||||
#endif // HAS_ARGBPOLYNOMIALROW_SSE2
|
||||
|
||||
#endif // defined(__x86_64__) || defined(__i386__)
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
@ -6781,6 +6781,7 @@ void ARGBPolynomialRow_SSE2(const uint8* src_argb,
|
||||
mov ecx, [esp + 4 + 16] /* width */
|
||||
pxor xmm3, xmm3 // 0 constant for zero extending bytes to ints.
|
||||
|
||||
// 2 pixel loop.
|
||||
align 16
|
||||
convertloop:
|
||||
// (slow) pmovzxbd xmm0, dword ptr [eax] // BGRA pixel
|
||||
@ -6846,6 +6847,7 @@ void ARGBPolynomialRow_AVX2(const uint8* src_argb,
|
||||
mov edx, [esp + 8] /* dst_argb */
|
||||
mov ecx, [esp + 16] /* width */
|
||||
|
||||
// 2 pixel loop.
|
||||
align 16
|
||||
convertloop:
|
||||
vpmovzxbd ymm0, qword ptr [eax] // 2 BGRA pixels
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user