mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 01:06:46 +08:00
cleanup ScaleRowDown8Int_SSE2 and other simple gcc versions
BUG=none TEST=none Review URL: https://webrtc-codereview.appspot.com/581004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@265 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
68caaed871
commit
f368565b95
288
source/scale.cc
288
source/scale.cc
@ -1582,54 +1582,6 @@ static void ScaleFilterRows_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Note that movdqa+palign may be better than movdqu.
|
|
||||||
// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 8 byte aligned.
|
|
||||||
__declspec(naked) __declspec(align(16))
|
|
||||||
static void ScaleFilterCols34_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
|
|
||||||
int dst_width) {
|
|
||||||
__asm {
|
|
||||||
mov edx, [esp + 4] // dst_ptr
|
|
||||||
mov eax, [esp + 8] // src_ptr
|
|
||||||
mov ecx, [esp + 12] // dst_width
|
|
||||||
movdqa xmm1, _round34
|
|
||||||
movdqa xmm2, _shuf01
|
|
||||||
movdqa xmm3, _shuf11
|
|
||||||
movdqa xmm4, _shuf21
|
|
||||||
movdqa xmm5, _madd01
|
|
||||||
movdqa xmm6, _madd11
|
|
||||||
movdqa xmm7, _madd21
|
|
||||||
|
|
||||||
align 16
|
|
||||||
wloop:
|
|
||||||
movdqa xmm0, [eax] // pixels 0..7
|
|
||||||
pshufb xmm0, xmm2
|
|
||||||
pmaddubsw xmm0, xmm5
|
|
||||||
paddsw xmm0, xmm1
|
|
||||||
psrlw xmm0, 2
|
|
||||||
packuswb xmm0, xmm0
|
|
||||||
movq qword ptr [edx], xmm0
|
|
||||||
movdqu xmm0, [eax+8] // pixels 8..15
|
|
||||||
pshufb xmm0, xmm3
|
|
||||||
pmaddubsw xmm0, xmm6
|
|
||||||
paddsw xmm0, xmm1
|
|
||||||
psrlw xmm0, 2
|
|
||||||
packuswb xmm0, xmm0
|
|
||||||
movq qword ptr [edx+8], xmm0
|
|
||||||
movdqa xmm0, [eax+16] // pixels 16..23
|
|
||||||
lea eax, [eax+32]
|
|
||||||
pshufb xmm0, xmm4
|
|
||||||
pmaddubsw xmm0, xmm7
|
|
||||||
paddsw xmm0, xmm1
|
|
||||||
psrlw xmm0, 2
|
|
||||||
packuswb xmm0, xmm0
|
|
||||||
sub ecx, 24
|
|
||||||
movq qword ptr [edx+16], xmm0
|
|
||||||
lea edx, [edx+24]
|
|
||||||
jg wloop
|
|
||||||
ret
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#elif !defined(YUV_DISABLE_ASM) && (defined(__x86_64__) || defined(__i386__))
|
#elif !defined(YUV_DISABLE_ASM) && (defined(__x86_64__) || defined(__i386__))
|
||||||
|
|
||||||
// GCC versions of row functions are verbatim conversions from Visual C.
|
// GCC versions of row functions are verbatim conversions from Visual C.
|
||||||
@ -1642,7 +1594,7 @@ static void ScaleRowDown2_SSE2(const uint8* src_ptr, int src_stride,
|
|||||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||||
"psrlw $0x8,%%xmm5 \n"
|
"psrlw $0x8,%%xmm5 \n"
|
||||||
".p2align 4 \n"
|
".p2align 4 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movdqa (%0),%%xmm0 \n"
|
"movdqa (%0),%%xmm0 \n"
|
||||||
"movdqa 0x10(%0),%%xmm1 \n"
|
"movdqa 0x10(%0),%%xmm1 \n"
|
||||||
"lea 0x20(%0),%0 \n"
|
"lea 0x20(%0),%0 \n"
|
||||||
@ -1658,7 +1610,10 @@ static void ScaleRowDown2_SSE2(const uint8* src_ptr, int src_stride,
|
|||||||
"+r"(dst_width) // %2
|
"+r"(dst_width) // %2
|
||||||
:
|
:
|
||||||
: "memory", "cc"
|
: "memory", "cc"
|
||||||
);
|
#if defined(__SSE2__)
|
||||||
|
, "xmm0", "xmm1", "xmm5"
|
||||||
|
#endif
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ScaleRowDown2Int_SSE2(const uint8* src_ptr, int src_stride,
|
void ScaleRowDown2Int_SSE2(const uint8* src_ptr, int src_stride,
|
||||||
@ -1667,7 +1622,7 @@ void ScaleRowDown2Int_SSE2(const uint8* src_ptr, int src_stride,
|
|||||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||||
"psrlw $0x8,%%xmm5 \n"
|
"psrlw $0x8,%%xmm5 \n"
|
||||||
".p2align 4 \n"
|
".p2align 4 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movdqa (%0),%%xmm0 \n"
|
"movdqa (%0),%%xmm0 \n"
|
||||||
"movdqa 0x10(%0),%%xmm1 \n"
|
"movdqa 0x10(%0),%%xmm1 \n"
|
||||||
"movdqa (%0,%3,1),%%xmm2 \n"
|
"movdqa (%0,%3,1),%%xmm2 \n"
|
||||||
@ -1693,16 +1648,18 @@ void ScaleRowDown2Int_SSE2(const uint8* src_ptr, int src_stride,
|
|||||||
"+r"(dst_width) // %2
|
"+r"(dst_width) // %2
|
||||||
: "r"(static_cast<intptr_t>(src_stride)) // %3
|
: "r"(static_cast<intptr_t>(src_stride)) // %3
|
||||||
: "memory", "cc"
|
: "memory", "cc"
|
||||||
);
|
#if defined(__SSE2__)
|
||||||
|
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
|
||||||
|
#endif
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ScaleRowDown2_Unaligned_SSE2(const uint8* src_ptr, int src_stride,
|
static void ScaleRowDown2_Unaligned_SSE2(const uint8* src_ptr, int src_stride,
|
||||||
uint8* dst_ptr, int dst_width) {
|
uint8* dst_ptr, int dst_width) {
|
||||||
asm volatile (
|
asm volatile (
|
||||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||||
"psrlw $0x8,%%xmm5 \n"
|
"psrlw $0x8,%%xmm5 \n"
|
||||||
".p2align 4 \n"
|
".p2align 4 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movdqu (%0),%%xmm0 \n"
|
"movdqu (%0),%%xmm0 \n"
|
||||||
"movdqu 0x10(%0),%%xmm1 \n"
|
"movdqu 0x10(%0),%%xmm1 \n"
|
||||||
"lea 0x20(%0),%0 \n"
|
"lea 0x20(%0),%0 \n"
|
||||||
@ -1718,7 +1675,10 @@ static void ScaleRowDown2_Unaligned_SSE2(const uint8* src_ptr, int src_stride,
|
|||||||
"+r"(dst_width) // %2
|
"+r"(dst_width) // %2
|
||||||
:
|
:
|
||||||
: "memory", "cc"
|
: "memory", "cc"
|
||||||
);
|
#if defined(__SSE2__)
|
||||||
|
, "xmm0", "xmm1", "xmm5"
|
||||||
|
#endif
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ScaleRowDown2Int_Unaligned_SSE2(const uint8* src_ptr,
|
static void ScaleRowDown2Int_Unaligned_SSE2(const uint8* src_ptr,
|
||||||
@ -1728,7 +1688,7 @@ static void ScaleRowDown2Int_Unaligned_SSE2(const uint8* src_ptr,
|
|||||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||||
"psrlw $0x8,%%xmm5 \n"
|
"psrlw $0x8,%%xmm5 \n"
|
||||||
".p2align 4 \n"
|
".p2align 4 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movdqu (%0),%%xmm0 \n"
|
"movdqu (%0),%%xmm0 \n"
|
||||||
"movdqu 0x10(%0),%%xmm1 \n"
|
"movdqu 0x10(%0),%%xmm1 \n"
|
||||||
"movdqu (%0,%3,1),%%xmm2 \n"
|
"movdqu (%0,%3,1),%%xmm2 \n"
|
||||||
@ -1754,7 +1714,10 @@ static void ScaleRowDown2Int_Unaligned_SSE2(const uint8* src_ptr,
|
|||||||
"+r"(dst_width) // %2
|
"+r"(dst_width) // %2
|
||||||
: "r"(static_cast<intptr_t>(src_stride)) // %3
|
: "r"(static_cast<intptr_t>(src_stride)) // %3
|
||||||
: "memory", "cc"
|
: "memory", "cc"
|
||||||
);
|
#if defined(__SSE2__)
|
||||||
|
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
|
||||||
|
#endif
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define HAS_SCALEROWDOWN4_SSE2
|
#define HAS_SCALEROWDOWN4_SSE2
|
||||||
@ -1764,7 +1727,7 @@ static void ScaleRowDown4_SSE2(const uint8* src_ptr, int src_stride,
|
|||||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||||
"psrld $0x18,%%xmm5 \n"
|
"psrld $0x18,%%xmm5 \n"
|
||||||
".p2align 4 \n"
|
".p2align 4 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movdqa (%0),%%xmm0 \n"
|
"movdqa (%0),%%xmm0 \n"
|
||||||
"movdqa 0x10(%0),%%xmm1 \n"
|
"movdqa 0x10(%0),%%xmm1 \n"
|
||||||
"lea 0x20(%0),%0 \n"
|
"lea 0x20(%0),%0 \n"
|
||||||
@ -1781,18 +1744,21 @@ static void ScaleRowDown4_SSE2(const uint8* src_ptr, int src_stride,
|
|||||||
"+r"(dst_width) // %2
|
"+r"(dst_width) // %2
|
||||||
:
|
:
|
||||||
: "memory", "cc"
|
: "memory", "cc"
|
||||||
);
|
#if defined(__SSE2__)
|
||||||
|
, "xmm0", "xmm1", "xmm5"
|
||||||
|
#endif
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ScaleRowDown4Int_SSE2(const uint8* src_ptr, int src_stride,
|
static void ScaleRowDown4Int_SSE2(const uint8* src_ptr, int src_stride,
|
||||||
uint8* dst_ptr, int dst_width) {
|
uint8* dst_ptr, int dst_width) {
|
||||||
intptr_t temp = 0;
|
intptr_t stridex3 = 0;
|
||||||
asm volatile (
|
asm volatile (
|
||||||
"pcmpeqb %%xmm7,%%xmm7 \n"
|
"pcmpeqb %%xmm7,%%xmm7 \n"
|
||||||
"psrlw $0x8,%%xmm7 \n"
|
"psrlw $0x8,%%xmm7 \n"
|
||||||
"lea (%4,%4,2),%3 \n"
|
"lea (%4,%4,2),%3 \n"
|
||||||
".p2align 4 \n"
|
".p2align 4 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movdqa (%0),%%xmm0 \n"
|
"movdqa (%0),%%xmm0 \n"
|
||||||
"movdqa 0x10(%0),%%xmm1 \n"
|
"movdqa 0x10(%0),%%xmm1 \n"
|
||||||
"movdqa (%0,%4,1),%%xmm2 \n"
|
"movdqa (%0,%4,1),%%xmm2 \n"
|
||||||
@ -1829,13 +1795,13 @@ static void ScaleRowDown4Int_SSE2(const uint8* src_ptr, int src_stride,
|
|||||||
: "+r"(src_ptr), // %0
|
: "+r"(src_ptr), // %0
|
||||||
"+r"(dst_ptr), // %1
|
"+r"(dst_ptr), // %1
|
||||||
"+r"(dst_width), // %2
|
"+r"(dst_width), // %2
|
||||||
"+r"(temp) // %3
|
"+r"(stridex3) // %3
|
||||||
: "r"(static_cast<intptr_t>(src_stride)) // %4
|
: "r"(static_cast<intptr_t>(src_stride)) // %4
|
||||||
: "memory", "cc"
|
: "memory", "cc"
|
||||||
#if defined(__x86_64__)
|
#if defined(__SSE2__)
|
||||||
, "xmm6", "xmm7"
|
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm7"
|
||||||
#endif
|
#endif
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define HAS_SCALEROWDOWN8_SSE2
|
#define HAS_SCALEROWDOWN8_SSE2
|
||||||
@ -1869,6 +1835,72 @@ static void ScaleRowDown8_SSE2(const uint8* src_ptr, int src_stride,
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void ScaleRowDown8Int_SSE2(const uint8* src_ptr, int src_stride,
|
||||||
|
uint8* dst_ptr, int dst_width) {
|
||||||
|
intptr_t stridex3 = 0;
|
||||||
|
intptr_t row4 = 0;
|
||||||
|
asm volatile (
|
||||||
|
"lea (%5,%5,2),%3 \n"
|
||||||
|
"pxor %%xmm7,%%xmm7 \n"
|
||||||
|
".p2align 4 \n"
|
||||||
|
"1: \n"
|
||||||
|
"movdqa (%0),%%xmm0 \n"
|
||||||
|
"movdqa 0x10(%0),%%xmm1 \n"
|
||||||
|
"movdqa (%0,%5,1),%%xmm2 \n"
|
||||||
|
"movdqa 0x10(%0,%5,1),%%xmm3 \n"
|
||||||
|
"pavgb %%xmm2,%%xmm0 \n"
|
||||||
|
"pavgb %%xmm3,%%xmm1 \n"
|
||||||
|
"movdqa (%0,%5,2),%%xmm2 \n"
|
||||||
|
"movdqa 0x10(%0,%5,2),%%xmm3 \n"
|
||||||
|
"movdqa (%0,%3,1),%%xmm4 \n"
|
||||||
|
"movdqa 0x10(%0,%3,1),%%xmm5 \n"
|
||||||
|
"lea (%0,%5,4),%4 \n"
|
||||||
|
"lea 0x20(%0),%0 \n"
|
||||||
|
"pavgb %%xmm4,%%xmm2 \n"
|
||||||
|
"pavgb %%xmm5,%%xmm3 \n"
|
||||||
|
"pavgb %%xmm2,%%xmm0 \n"
|
||||||
|
"pavgb %%xmm3,%%xmm1 \n"
|
||||||
|
"movdqa 0x0(%4),%%xmm2 \n"
|
||||||
|
"movdqa 0x10(%4),%%xmm3 \n"
|
||||||
|
"movdqa 0x0(%4,%5,1),%%xmm4 \n"
|
||||||
|
"movdqa 0x10(%4,%5,1),%%xmm5 \n"
|
||||||
|
"pavgb %%xmm4,%%xmm2 \n"
|
||||||
|
"pavgb %%xmm5,%%xmm3 \n"
|
||||||
|
"movdqa 0x0(%4,%5,2),%%xmm4 \n"
|
||||||
|
"movdqa 0x10(%4,%5,2),%%xmm5 \n"
|
||||||
|
"movdqa 0x0(%4,%3,1),%%xmm6 \n"
|
||||||
|
"pavgb %%xmm6,%%xmm4 \n"
|
||||||
|
"movdqa 0x10(%4,%3,1),%%xmm6 \n"
|
||||||
|
"pavgb %%xmm6,%%xmm5 \n"
|
||||||
|
"pavgb %%xmm4,%%xmm2 \n"
|
||||||
|
"pavgb %%xmm5,%%xmm3 \n"
|
||||||
|
"pavgb %%xmm2,%%xmm0 \n"
|
||||||
|
"pavgb %%xmm3,%%xmm1 \n"
|
||||||
|
"psadbw %%xmm7,%%xmm0 \n"
|
||||||
|
"psadbw %%xmm7,%%xmm1 \n"
|
||||||
|
"pshufd $0xd8,%%xmm0,%%xmm0 \n"
|
||||||
|
"pshufd $0x8d,%%xmm1,%%xmm1 \n"
|
||||||
|
"por %%xmm1,%%xmm0 \n"
|
||||||
|
"psrlw $0x3,%%xmm0 \n"
|
||||||
|
"packuswb %%xmm0,%%xmm0 \n"
|
||||||
|
"packuswb %%xmm0,%%xmm0 \n"
|
||||||
|
"movd %%xmm0,(%1) \n"
|
||||||
|
"lea 0x4(%1),%1 \n"
|
||||||
|
"sub $0x4,%2 \n"
|
||||||
|
"jg 1b \n"
|
||||||
|
: "+r"(src_ptr), // %0
|
||||||
|
"+r"(dst_ptr), // %1
|
||||||
|
"+rm"(dst_width), // %2
|
||||||
|
"+r"(stridex3), // %3
|
||||||
|
"+r"(row4) // %4
|
||||||
|
: "r"(static_cast<intptr_t>(src_stride)) // %5
|
||||||
|
: "memory", "cc"
|
||||||
|
#if defined(__SSE2__)
|
||||||
|
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
|
||||||
|
#endif
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#define HAS_SCALEADDROWS_SSE2
|
#define HAS_SCALEADDROWS_SSE2
|
||||||
static void ScaleAddRows_SSE2(const uint8* src_ptr, int src_stride,
|
static void ScaleAddRows_SSE2(const uint8* src_ptr, int src_stride,
|
||||||
uint16* dst_ptr, int src_width, int src_height) {
|
uint16* dst_ptr, int src_width, int src_height) {
|
||||||
@ -2062,67 +2094,6 @@ static void ScaleFilterRows_SSSE3(uint8* dst_ptr,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#if !defined(YUV_DISABLE_ASM) && defined(__i386__)
|
#if !defined(YUV_DISABLE_ASM) && defined(__i386__)
|
||||||
extern "C" void ScaleRowDown8Int_SSE2(const uint8* src_ptr, int src_stride,
|
|
||||||
uint8* dst_ptr, int dst_width);
|
|
||||||
asm(
|
|
||||||
DECLARE_FUNCTION(ScaleRowDown8Int_SSE2)
|
|
||||||
"pusha \n"
|
|
||||||
"mov 0x24(%esp),%esi \n"
|
|
||||||
"mov 0x28(%esp),%ebx \n"
|
|
||||||
"mov 0x2c(%esp),%edi \n"
|
|
||||||
"mov 0x30(%esp),%ecx \n"
|
|
||||||
"lea (%ebx,%ebx,2),%edx \n"
|
|
||||||
"pxor %xmm7,%xmm7 \n"
|
|
||||||
|
|
||||||
"1:"
|
|
||||||
"movdqa (%esi),%xmm0 \n"
|
|
||||||
"movdqa 0x10(%esi),%xmm1 \n"
|
|
||||||
"movdqa (%esi,%ebx,1),%xmm2 \n"
|
|
||||||
"movdqa 0x10(%esi,%ebx,1),%xmm3 \n"
|
|
||||||
"pavgb %xmm2,%xmm0 \n"
|
|
||||||
"pavgb %xmm3,%xmm1 \n"
|
|
||||||
"movdqa (%esi,%ebx,2),%xmm2 \n"
|
|
||||||
"movdqa 0x10(%esi,%ebx,2),%xmm3 \n"
|
|
||||||
"movdqa (%esi,%edx,1),%xmm4 \n"
|
|
||||||
"movdqa 0x10(%esi,%edx,1),%xmm5 \n"
|
|
||||||
"lea (%esi,%ebx,4),%ebp \n"
|
|
||||||
"lea 0x20(%esi),%esi \n"
|
|
||||||
"pavgb %xmm4,%xmm2 \n"
|
|
||||||
"pavgb %xmm5,%xmm3 \n"
|
|
||||||
"pavgb %xmm2,%xmm0 \n"
|
|
||||||
"pavgb %xmm3,%xmm1 \n"
|
|
||||||
"movdqa 0x0(%ebp),%xmm2 \n"
|
|
||||||
"movdqa 0x10(%ebp),%xmm3 \n"
|
|
||||||
"movdqa 0x0(%ebp,%ebx,1),%xmm4 \n"
|
|
||||||
"movdqa 0x10(%ebp,%ebx,1),%xmm5 \n"
|
|
||||||
"pavgb %xmm4,%xmm2 \n"
|
|
||||||
"pavgb %xmm5,%xmm3 \n"
|
|
||||||
"movdqa 0x0(%ebp,%ebx,2),%xmm4 \n"
|
|
||||||
"movdqa 0x10(%ebp,%ebx,2),%xmm5 \n"
|
|
||||||
"movdqa 0x0(%ebp,%edx,1),%xmm6 \n"
|
|
||||||
"pavgb %xmm6,%xmm4 \n"
|
|
||||||
"movdqa 0x10(%ebp,%edx,1),%xmm6 \n"
|
|
||||||
"pavgb %xmm6,%xmm5 \n"
|
|
||||||
"pavgb %xmm4,%xmm2 \n"
|
|
||||||
"pavgb %xmm5,%xmm3 \n"
|
|
||||||
"pavgb %xmm2,%xmm0 \n"
|
|
||||||
"pavgb %xmm3,%xmm1 \n"
|
|
||||||
"psadbw %xmm7,%xmm0 \n"
|
|
||||||
"psadbw %xmm7,%xmm1 \n"
|
|
||||||
"pshufd $0xd8,%xmm0,%xmm0 \n"
|
|
||||||
"pshufd $0x8d,%xmm1,%xmm1 \n"
|
|
||||||
"por %xmm1,%xmm0 \n"
|
|
||||||
"psrlw $0x3,%xmm0 \n"
|
|
||||||
"packuswb %xmm0,%xmm0 \n"
|
|
||||||
"packuswb %xmm0,%xmm0 \n"
|
|
||||||
"sub $0x4,%ecx \n"
|
|
||||||
"movd %xmm0,(%edi) \n"
|
|
||||||
"lea 0x4(%edi),%edi \n"
|
|
||||||
"jg 1b \n"
|
|
||||||
"popa \n"
|
|
||||||
"ret \n"
|
|
||||||
);
|
|
||||||
|
|
||||||
// fpic is used for magiccam plugin
|
// fpic is used for magiccam plugin
|
||||||
#if !defined(__PIC__)
|
#if !defined(__PIC__)
|
||||||
#define HAS_SCALEROWDOWN34_SSSE3
|
#define HAS_SCALEROWDOWN34_SSSE3
|
||||||
@ -2393,65 +2364,6 @@ extern "C" void ScaleRowDown38_2_Int_SSSE3(const uint8* src_ptr, int src_stride,
|
|||||||
#endif // __PIC__
|
#endif // __PIC__
|
||||||
|
|
||||||
#elif !defined(YUV_DISABLE_ASM) && defined(__x86_64__)
|
#elif !defined(YUV_DISABLE_ASM) && defined(__x86_64__)
|
||||||
static void ScaleRowDown8Int_SSE2(const uint8* src_ptr, int src_stride,
|
|
||||||
uint8* dst_ptr, int dst_width) {
|
|
||||||
asm volatile (
|
|
||||||
"lea (%3,%3,2),%%r10 \n"
|
|
||||||
"pxor %%xmm7,%%xmm7 \n"
|
|
||||||
".p2align 4 \n"
|
|
||||||
"1:"
|
|
||||||
"movdqa (%0),%%xmm0 \n"
|
|
||||||
"movdqa 0x10(%0),%%xmm1 \n"
|
|
||||||
"movdqa (%0,%3,1),%%xmm2 \n"
|
|
||||||
"movdqa 0x10(%0,%3,1),%%xmm3 \n"
|
|
||||||
"pavgb %%xmm2,%%xmm0 \n"
|
|
||||||
"pavgb %%xmm3,%%xmm1 \n"
|
|
||||||
"movdqa (%0,%3,2),%%xmm2 \n"
|
|
||||||
"movdqa 0x10(%0,%3,2),%%xmm3 \n"
|
|
||||||
"movdqa (%0,%%r10,1),%%xmm4 \n"
|
|
||||||
"movdqa 0x10(%0,%%r10,1),%%xmm5 \n"
|
|
||||||
"lea (%0,%3,4),%%r11 \n"
|
|
||||||
"lea 0x20(%0),%0 \n"
|
|
||||||
"pavgb %%xmm4,%%xmm2 \n"
|
|
||||||
"pavgb %%xmm5,%%xmm3 \n"
|
|
||||||
"pavgb %%xmm2,%%xmm0 \n"
|
|
||||||
"pavgb %%xmm3,%%xmm1 \n"
|
|
||||||
"movdqa 0x0(%%r11),%%xmm2 \n"
|
|
||||||
"movdqa 0x10(%%r11),%%xmm3 \n"
|
|
||||||
"movdqa 0x0(%%r11,%3,1),%%xmm4 \n"
|
|
||||||
"movdqa 0x10(%%r11,%3,1),%%xmm5 \n"
|
|
||||||
"pavgb %%xmm4,%%xmm2 \n"
|
|
||||||
"pavgb %%xmm5,%%xmm3 \n"
|
|
||||||
"movdqa 0x0(%%r11,%3,2),%%xmm4 \n"
|
|
||||||
"movdqa 0x10(%%r11,%3,2),%%xmm5 \n"
|
|
||||||
"movdqa 0x0(%%r11,%%r10,1),%%xmm6 \n"
|
|
||||||
"pavgb %%xmm6,%%xmm4 \n"
|
|
||||||
"movdqa 0x10(%%r11,%%r10,1),%%xmm6 \n"
|
|
||||||
"pavgb %%xmm6,%%xmm5 \n"
|
|
||||||
"pavgb %%xmm4,%%xmm2 \n"
|
|
||||||
"pavgb %%xmm5,%%xmm3 \n"
|
|
||||||
"pavgb %%xmm2,%%xmm0 \n"
|
|
||||||
"pavgb %%xmm3,%%xmm1 \n"
|
|
||||||
"psadbw %%xmm7,%%xmm0 \n"
|
|
||||||
"psadbw %%xmm7,%%xmm1 \n"
|
|
||||||
"pshufd $0xd8,%%xmm0,%%xmm0 \n"
|
|
||||||
"pshufd $0x8d,%%xmm1,%%xmm1 \n"
|
|
||||||
"por %%xmm1,%%xmm0 \n"
|
|
||||||
"psrlw $0x3,%%xmm0 \n"
|
|
||||||
"packuswb %%xmm0,%%xmm0 \n"
|
|
||||||
"packuswb %%xmm0,%%xmm0 \n"
|
|
||||||
"movd %%xmm0,(%1) \n"
|
|
||||||
"lea 0x4(%1),%1 \n"
|
|
||||||
"sub $0x4,%2 \n"
|
|
||||||
"jg 1b \n"
|
|
||||||
: "+r"(src_ptr), // %0
|
|
||||||
"+r"(dst_ptr), // %1
|
|
||||||
"+r"(dst_width) // %2
|
|
||||||
: "r"(static_cast<intptr_t>(src_stride)) // %3
|
|
||||||
: "memory", "cc", "r10", "r11", "xmm6", "xmm7"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#define HAS_SCALEROWDOWN34_SSSE3
|
#define HAS_SCALEROWDOWN34_SSSE3
|
||||||
static void ScaleRowDown34_SSSE3(const uint8* src_ptr, int src_stride,
|
static void ScaleRowDown34_SSSE3(const uint8* src_ptr, int src_stride,
|
||||||
uint8* dst_ptr, int dst_width) {
|
uint8* dst_ptr, int dst_width) {
|
||||||
@ -3636,9 +3548,8 @@ static void ScalePlaneDown(int src_width, int src_height,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Scale a plane.
|
// Scale a plane.
|
||||||
//
|
// This function in turn calls a scaling function suitable for handling
|
||||||
// This function in turn calls a scaling function
|
// the desired resolutions.
|
||||||
// suitable for handling the desired resolutions.
|
|
||||||
|
|
||||||
void ScalePlane(const uint8* src, int src_stride,
|
void ScalePlane(const uint8* src, int src_stride,
|
||||||
int src_width, int src_height,
|
int src_width, int src_height,
|
||||||
@ -3701,7 +3612,6 @@ void ScalePlane(const uint8* src, int src_stride,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Scale an I420 image.
|
// Scale an I420 image.
|
||||||
//
|
|
||||||
// This function in turn calls a scaling function for each plane.
|
// This function in turn calls a scaling function for each plane.
|
||||||
|
|
||||||
int I420Scale(const uint8* src_y, int src_stride_y,
|
int I420Scale(const uint8* src_y, int src_stride_y,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user