mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
cleanup ScaleRowDown8Int_SSE2 and other simple gcc versions
BUG=none TEST=none Review URL: https://webrtc-codereview.appspot.com/581004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@265 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
68caaed871
commit
f368565b95
264
source/scale.cc
264
source/scale.cc
@ -1582,54 +1582,6 @@ static void ScaleFilterRows_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
|
||||
}
|
||||
}
|
||||
|
||||
// Note that movdqa+palign may be better than movdqu.
|
||||
// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 8 byte aligned.
|
||||
__declspec(naked) __declspec(align(16))
|
||||
static void ScaleFilterCols34_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
|
||||
int dst_width) {
|
||||
__asm {
|
||||
mov edx, [esp + 4] // dst_ptr
|
||||
mov eax, [esp + 8] // src_ptr
|
||||
mov ecx, [esp + 12] // dst_width
|
||||
movdqa xmm1, _round34
|
||||
movdqa xmm2, _shuf01
|
||||
movdqa xmm3, _shuf11
|
||||
movdqa xmm4, _shuf21
|
||||
movdqa xmm5, _madd01
|
||||
movdqa xmm6, _madd11
|
||||
movdqa xmm7, _madd21
|
||||
|
||||
align 16
|
||||
wloop:
|
||||
movdqa xmm0, [eax] // pixels 0..7
|
||||
pshufb xmm0, xmm2
|
||||
pmaddubsw xmm0, xmm5
|
||||
paddsw xmm0, xmm1
|
||||
psrlw xmm0, 2
|
||||
packuswb xmm0, xmm0
|
||||
movq qword ptr [edx], xmm0
|
||||
movdqu xmm0, [eax+8] // pixels 8..15
|
||||
pshufb xmm0, xmm3
|
||||
pmaddubsw xmm0, xmm6
|
||||
paddsw xmm0, xmm1
|
||||
psrlw xmm0, 2
|
||||
packuswb xmm0, xmm0
|
||||
movq qword ptr [edx+8], xmm0
|
||||
movdqa xmm0, [eax+16] // pixels 16..23
|
||||
lea eax, [eax+32]
|
||||
pshufb xmm0, xmm4
|
||||
pmaddubsw xmm0, xmm7
|
||||
paddsw xmm0, xmm1
|
||||
psrlw xmm0, 2
|
||||
packuswb xmm0, xmm0
|
||||
sub ecx, 24
|
||||
movq qword ptr [edx+16], xmm0
|
||||
lea edx, [edx+24]
|
||||
jg wloop
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
#elif !defined(YUV_DISABLE_ASM) && (defined(__x86_64__) || defined(__i386__))
|
||||
|
||||
// GCC versions of row functions are verbatim conversions from Visual C.
|
||||
@ -1658,6 +1610,9 @@ static void ScaleRowDown2_SSE2(const uint8* src_ptr, int src_stride,
|
||||
"+r"(dst_width) // %2
|
||||
:
|
||||
: "memory", "cc"
|
||||
#if defined(__SSE2__)
|
||||
, "xmm0", "xmm1", "xmm5"
|
||||
#endif
|
||||
);
|
||||
}
|
||||
|
||||
@ -1693,9 +1648,11 @@ void ScaleRowDown2Int_SSE2(const uint8* src_ptr, int src_stride,
|
||||
"+r"(dst_width) // %2
|
||||
: "r"(static_cast<intptr_t>(src_stride)) // %3
|
||||
: "memory", "cc"
|
||||
#if defined(__SSE2__)
|
||||
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
|
||||
#endif
|
||||
);
|
||||
}
|
||||
|
||||
static void ScaleRowDown2_Unaligned_SSE2(const uint8* src_ptr, int src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
asm volatile (
|
||||
@ -1718,6 +1675,9 @@ static void ScaleRowDown2_Unaligned_SSE2(const uint8* src_ptr, int src_stride,
|
||||
"+r"(dst_width) // %2
|
||||
:
|
||||
: "memory", "cc"
|
||||
#if defined(__SSE2__)
|
||||
, "xmm0", "xmm1", "xmm5"
|
||||
#endif
|
||||
);
|
||||
}
|
||||
|
||||
@ -1754,6 +1714,9 @@ static void ScaleRowDown2Int_Unaligned_SSE2(const uint8* src_ptr,
|
||||
"+r"(dst_width) // %2
|
||||
: "r"(static_cast<intptr_t>(src_stride)) // %3
|
||||
: "memory", "cc"
|
||||
#if defined(__SSE2__)
|
||||
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
|
||||
#endif
|
||||
);
|
||||
}
|
||||
|
||||
@ -1781,12 +1744,15 @@ static void ScaleRowDown4_SSE2(const uint8* src_ptr, int src_stride,
|
||||
"+r"(dst_width) // %2
|
||||
:
|
||||
: "memory", "cc"
|
||||
#if defined(__SSE2__)
|
||||
, "xmm0", "xmm1", "xmm5"
|
||||
#endif
|
||||
);
|
||||
}
|
||||
|
||||
static void ScaleRowDown4Int_SSE2(const uint8* src_ptr, int src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
intptr_t temp = 0;
|
||||
intptr_t stridex3 = 0;
|
||||
asm volatile (
|
||||
"pcmpeqb %%xmm7,%%xmm7 \n"
|
||||
"psrlw $0x8,%%xmm7 \n"
|
||||
@ -1829,11 +1795,11 @@ static void ScaleRowDown4Int_SSE2(const uint8* src_ptr, int src_stride,
|
||||
: "+r"(src_ptr), // %0
|
||||
"+r"(dst_ptr), // %1
|
||||
"+r"(dst_width), // %2
|
||||
"+r"(temp) // %3
|
||||
"+r"(stridex3) // %3
|
||||
: "r"(static_cast<intptr_t>(src_stride)) // %4
|
||||
: "memory", "cc"
|
||||
#if defined(__x86_64__)
|
||||
, "xmm6", "xmm7"
|
||||
#if defined(__SSE2__)
|
||||
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm7"
|
||||
#endif
|
||||
);
|
||||
}
|
||||
@ -1869,6 +1835,72 @@ static void ScaleRowDown8_SSE2(const uint8* src_ptr, int src_stride,
|
||||
);
|
||||
}
|
||||
|
||||
static void ScaleRowDown8Int_SSE2(const uint8* src_ptr, int src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
intptr_t stridex3 = 0;
|
||||
intptr_t row4 = 0;
|
||||
asm volatile (
|
||||
"lea (%5,%5,2),%3 \n"
|
||||
"pxor %%xmm7,%%xmm7 \n"
|
||||
".p2align 4 \n"
|
||||
"1: \n"
|
||||
"movdqa (%0),%%xmm0 \n"
|
||||
"movdqa 0x10(%0),%%xmm1 \n"
|
||||
"movdqa (%0,%5,1),%%xmm2 \n"
|
||||
"movdqa 0x10(%0,%5,1),%%xmm3 \n"
|
||||
"pavgb %%xmm2,%%xmm0 \n"
|
||||
"pavgb %%xmm3,%%xmm1 \n"
|
||||
"movdqa (%0,%5,2),%%xmm2 \n"
|
||||
"movdqa 0x10(%0,%5,2),%%xmm3 \n"
|
||||
"movdqa (%0,%3,1),%%xmm4 \n"
|
||||
"movdqa 0x10(%0,%3,1),%%xmm5 \n"
|
||||
"lea (%0,%5,4),%4 \n"
|
||||
"lea 0x20(%0),%0 \n"
|
||||
"pavgb %%xmm4,%%xmm2 \n"
|
||||
"pavgb %%xmm5,%%xmm3 \n"
|
||||
"pavgb %%xmm2,%%xmm0 \n"
|
||||
"pavgb %%xmm3,%%xmm1 \n"
|
||||
"movdqa 0x0(%4),%%xmm2 \n"
|
||||
"movdqa 0x10(%4),%%xmm3 \n"
|
||||
"movdqa 0x0(%4,%5,1),%%xmm4 \n"
|
||||
"movdqa 0x10(%4,%5,1),%%xmm5 \n"
|
||||
"pavgb %%xmm4,%%xmm2 \n"
|
||||
"pavgb %%xmm5,%%xmm3 \n"
|
||||
"movdqa 0x0(%4,%5,2),%%xmm4 \n"
|
||||
"movdqa 0x10(%4,%5,2),%%xmm5 \n"
|
||||
"movdqa 0x0(%4,%3,1),%%xmm6 \n"
|
||||
"pavgb %%xmm6,%%xmm4 \n"
|
||||
"movdqa 0x10(%4,%3,1),%%xmm6 \n"
|
||||
"pavgb %%xmm6,%%xmm5 \n"
|
||||
"pavgb %%xmm4,%%xmm2 \n"
|
||||
"pavgb %%xmm5,%%xmm3 \n"
|
||||
"pavgb %%xmm2,%%xmm0 \n"
|
||||
"pavgb %%xmm3,%%xmm1 \n"
|
||||
"psadbw %%xmm7,%%xmm0 \n"
|
||||
"psadbw %%xmm7,%%xmm1 \n"
|
||||
"pshufd $0xd8,%%xmm0,%%xmm0 \n"
|
||||
"pshufd $0x8d,%%xmm1,%%xmm1 \n"
|
||||
"por %%xmm1,%%xmm0 \n"
|
||||
"psrlw $0x3,%%xmm0 \n"
|
||||
"packuswb %%xmm0,%%xmm0 \n"
|
||||
"packuswb %%xmm0,%%xmm0 \n"
|
||||
"movd %%xmm0,(%1) \n"
|
||||
"lea 0x4(%1),%1 \n"
|
||||
"sub $0x4,%2 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src_ptr), // %0
|
||||
"+r"(dst_ptr), // %1
|
||||
"+rm"(dst_width), // %2
|
||||
"+r"(stridex3), // %3
|
||||
"+r"(row4) // %4
|
||||
: "r"(static_cast<intptr_t>(src_stride)) // %5
|
||||
: "memory", "cc"
|
||||
#if defined(__SSE2__)
|
||||
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
|
||||
#endif
|
||||
);
|
||||
}
|
||||
|
||||
#define HAS_SCALEADDROWS_SSE2
|
||||
static void ScaleAddRows_SSE2(const uint8* src_ptr, int src_stride,
|
||||
uint16* dst_ptr, int src_width, int src_height) {
|
||||
@ -2062,67 +2094,6 @@ static void ScaleFilterRows_SSSE3(uint8* dst_ptr,
|
||||
}
|
||||
#endif
|
||||
#if !defined(YUV_DISABLE_ASM) && defined(__i386__)
|
||||
extern "C" void ScaleRowDown8Int_SSE2(const uint8* src_ptr, int src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
asm(
|
||||
DECLARE_FUNCTION(ScaleRowDown8Int_SSE2)
|
||||
"pusha \n"
|
||||
"mov 0x24(%esp),%esi \n"
|
||||
"mov 0x28(%esp),%ebx \n"
|
||||
"mov 0x2c(%esp),%edi \n"
|
||||
"mov 0x30(%esp),%ecx \n"
|
||||
"lea (%ebx,%ebx,2),%edx \n"
|
||||
"pxor %xmm7,%xmm7 \n"
|
||||
|
||||
"1:"
|
||||
"movdqa (%esi),%xmm0 \n"
|
||||
"movdqa 0x10(%esi),%xmm1 \n"
|
||||
"movdqa (%esi,%ebx,1),%xmm2 \n"
|
||||
"movdqa 0x10(%esi,%ebx,1),%xmm3 \n"
|
||||
"pavgb %xmm2,%xmm0 \n"
|
||||
"pavgb %xmm3,%xmm1 \n"
|
||||
"movdqa (%esi,%ebx,2),%xmm2 \n"
|
||||
"movdqa 0x10(%esi,%ebx,2),%xmm3 \n"
|
||||
"movdqa (%esi,%edx,1),%xmm4 \n"
|
||||
"movdqa 0x10(%esi,%edx,1),%xmm5 \n"
|
||||
"lea (%esi,%ebx,4),%ebp \n"
|
||||
"lea 0x20(%esi),%esi \n"
|
||||
"pavgb %xmm4,%xmm2 \n"
|
||||
"pavgb %xmm5,%xmm3 \n"
|
||||
"pavgb %xmm2,%xmm0 \n"
|
||||
"pavgb %xmm3,%xmm1 \n"
|
||||
"movdqa 0x0(%ebp),%xmm2 \n"
|
||||
"movdqa 0x10(%ebp),%xmm3 \n"
|
||||
"movdqa 0x0(%ebp,%ebx,1),%xmm4 \n"
|
||||
"movdqa 0x10(%ebp,%ebx,1),%xmm5 \n"
|
||||
"pavgb %xmm4,%xmm2 \n"
|
||||
"pavgb %xmm5,%xmm3 \n"
|
||||
"movdqa 0x0(%ebp,%ebx,2),%xmm4 \n"
|
||||
"movdqa 0x10(%ebp,%ebx,2),%xmm5 \n"
|
||||
"movdqa 0x0(%ebp,%edx,1),%xmm6 \n"
|
||||
"pavgb %xmm6,%xmm4 \n"
|
||||
"movdqa 0x10(%ebp,%edx,1),%xmm6 \n"
|
||||
"pavgb %xmm6,%xmm5 \n"
|
||||
"pavgb %xmm4,%xmm2 \n"
|
||||
"pavgb %xmm5,%xmm3 \n"
|
||||
"pavgb %xmm2,%xmm0 \n"
|
||||
"pavgb %xmm3,%xmm1 \n"
|
||||
"psadbw %xmm7,%xmm0 \n"
|
||||
"psadbw %xmm7,%xmm1 \n"
|
||||
"pshufd $0xd8,%xmm0,%xmm0 \n"
|
||||
"pshufd $0x8d,%xmm1,%xmm1 \n"
|
||||
"por %xmm1,%xmm0 \n"
|
||||
"psrlw $0x3,%xmm0 \n"
|
||||
"packuswb %xmm0,%xmm0 \n"
|
||||
"packuswb %xmm0,%xmm0 \n"
|
||||
"sub $0x4,%ecx \n"
|
||||
"movd %xmm0,(%edi) \n"
|
||||
"lea 0x4(%edi),%edi \n"
|
||||
"jg 1b \n"
|
||||
"popa \n"
|
||||
"ret \n"
|
||||
);
|
||||
|
||||
// fpic is used for magiccam plugin
|
||||
#if !defined(__PIC__)
|
||||
#define HAS_SCALEROWDOWN34_SSSE3
|
||||
@ -2393,65 +2364,6 @@ extern "C" void ScaleRowDown38_2_Int_SSSE3(const uint8* src_ptr, int src_stride,
|
||||
#endif // __PIC__
|
||||
|
||||
#elif !defined(YUV_DISABLE_ASM) && defined(__x86_64__)
|
||||
static void ScaleRowDown8Int_SSE2(const uint8* src_ptr, int src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
asm volatile (
|
||||
"lea (%3,%3,2),%%r10 \n"
|
||||
"pxor %%xmm7,%%xmm7 \n"
|
||||
".p2align 4 \n"
|
||||
"1:"
|
||||
"movdqa (%0),%%xmm0 \n"
|
||||
"movdqa 0x10(%0),%%xmm1 \n"
|
||||
"movdqa (%0,%3,1),%%xmm2 \n"
|
||||
"movdqa 0x10(%0,%3,1),%%xmm3 \n"
|
||||
"pavgb %%xmm2,%%xmm0 \n"
|
||||
"pavgb %%xmm3,%%xmm1 \n"
|
||||
"movdqa (%0,%3,2),%%xmm2 \n"
|
||||
"movdqa 0x10(%0,%3,2),%%xmm3 \n"
|
||||
"movdqa (%0,%%r10,1),%%xmm4 \n"
|
||||
"movdqa 0x10(%0,%%r10,1),%%xmm5 \n"
|
||||
"lea (%0,%3,4),%%r11 \n"
|
||||
"lea 0x20(%0),%0 \n"
|
||||
"pavgb %%xmm4,%%xmm2 \n"
|
||||
"pavgb %%xmm5,%%xmm3 \n"
|
||||
"pavgb %%xmm2,%%xmm0 \n"
|
||||
"pavgb %%xmm3,%%xmm1 \n"
|
||||
"movdqa 0x0(%%r11),%%xmm2 \n"
|
||||
"movdqa 0x10(%%r11),%%xmm3 \n"
|
||||
"movdqa 0x0(%%r11,%3,1),%%xmm4 \n"
|
||||
"movdqa 0x10(%%r11,%3,1),%%xmm5 \n"
|
||||
"pavgb %%xmm4,%%xmm2 \n"
|
||||
"pavgb %%xmm5,%%xmm3 \n"
|
||||
"movdqa 0x0(%%r11,%3,2),%%xmm4 \n"
|
||||
"movdqa 0x10(%%r11,%3,2),%%xmm5 \n"
|
||||
"movdqa 0x0(%%r11,%%r10,1),%%xmm6 \n"
|
||||
"pavgb %%xmm6,%%xmm4 \n"
|
||||
"movdqa 0x10(%%r11,%%r10,1),%%xmm6 \n"
|
||||
"pavgb %%xmm6,%%xmm5 \n"
|
||||
"pavgb %%xmm4,%%xmm2 \n"
|
||||
"pavgb %%xmm5,%%xmm3 \n"
|
||||
"pavgb %%xmm2,%%xmm0 \n"
|
||||
"pavgb %%xmm3,%%xmm1 \n"
|
||||
"psadbw %%xmm7,%%xmm0 \n"
|
||||
"psadbw %%xmm7,%%xmm1 \n"
|
||||
"pshufd $0xd8,%%xmm0,%%xmm0 \n"
|
||||
"pshufd $0x8d,%%xmm1,%%xmm1 \n"
|
||||
"por %%xmm1,%%xmm0 \n"
|
||||
"psrlw $0x3,%%xmm0 \n"
|
||||
"packuswb %%xmm0,%%xmm0 \n"
|
||||
"packuswb %%xmm0,%%xmm0 \n"
|
||||
"movd %%xmm0,(%1) \n"
|
||||
"lea 0x4(%1),%1 \n"
|
||||
"sub $0x4,%2 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src_ptr), // %0
|
||||
"+r"(dst_ptr), // %1
|
||||
"+r"(dst_width) // %2
|
||||
: "r"(static_cast<intptr_t>(src_stride)) // %3
|
||||
: "memory", "cc", "r10", "r11", "xmm6", "xmm7"
|
||||
);
|
||||
}
|
||||
|
||||
#define HAS_SCALEROWDOWN34_SSSE3
|
||||
static void ScaleRowDown34_SSSE3(const uint8* src_ptr, int src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
@ -3636,9 +3548,8 @@ static void ScalePlaneDown(int src_width, int src_height,
|
||||
}
|
||||
|
||||
// Scale a plane.
|
||||
//
|
||||
// This function in turn calls a scaling function
|
||||
// suitable for handling the desired resolutions.
|
||||
// This function in turn calls a scaling function suitable for handling
|
||||
// the desired resolutions.
|
||||
|
||||
void ScalePlane(const uint8* src, int src_stride,
|
||||
int src_width, int src_height,
|
||||
@ -3701,7 +3612,6 @@ void ScalePlane(const uint8* src, int src_stride,
|
||||
}
|
||||
|
||||
// Scale an I420 image.
|
||||
//
|
||||
// This function in turn calls a scaling function for each plane.
|
||||
|
||||
int I420Scale(const uint8* src_y, int src_stride_y,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user