mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
Remove MEMLEA x64 NaCL macros
Bug: libyuv:702 Test: try bots pass Change-Id: I0ee094551734368f2179c298e7bf423ec80a929c Reviewed-on: https://chromium-review.googlesource.com/857845 Reviewed-by: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
parent
a2142148e9
commit
3694891922
@ -578,10 +578,6 @@ extern const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants); // BT.709
|
||||
// NaCL macros for GCC x64 - deprecated.
|
||||
#define MEMACCESS(base) "(%" #base ")"
|
||||
#define MEMACCESS2(offset, base) #offset "(%" #base ")"
|
||||
#define MEMLEA(offset, base) #offset "(%" #base ")"
|
||||
#define MEMLEA3(offset, index, scale) #offset "(,%" #index "," #scale ")"
|
||||
#define MEMLEA4(offset, base, index, scale) \
|
||||
#offset "(%" #base ",%" #index "," #scale ")"
|
||||
#define MEMOPREG(opcode, offset, base, index, scale, reg) \
|
||||
#opcode " " #offset "(%" #base ",%" #index "," #scale "),%%" #reg "\n"
|
||||
#define MEMOPMEM(opcode, reg, offset, base, index, scale) \
|
||||
|
||||
@ -235,9 +235,9 @@ uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movdqu " MEMACCESS(0) ",%%xmm1 \n"
|
||||
"lea " MEMLEA(0x10, 0) ",%0 \n"
|
||||
"lea 0x10(%0),%0 \n"
|
||||
"movdqu " MEMACCESS(1) ",%%xmm2 \n"
|
||||
"lea " MEMLEA(0x10, 1) ",%1 \n"
|
||||
"lea 0x10(%1),%1 \n"
|
||||
"movdqa %%xmm1,%%xmm3 \n"
|
||||
"psubusb %%xmm2,%%xmm1 \n"
|
||||
"psubusb %%xmm3,%%xmm2 \n"
|
||||
@ -302,7 +302,7 @@ uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movdqu " MEMACCESS(0) ",%%xmm1 \n"
|
||||
"lea " MEMLEA(0x10, 0) ",%0 \n"
|
||||
"lea 0x10(%0),%0 \n"
|
||||
"pmulld %%xmm6,%%xmm0 \n"
|
||||
"movdqa %5,%%xmm5 \n"
|
||||
"movdqa %%xmm1,%%xmm2 \n"
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -103,12 +103,12 @@ void ScaleRowDown2_SSSE3(const uint8* src_ptr,
|
||||
"1: \n"
|
||||
"movdqu " MEMACCESS(0) ",%%xmm0 \n"
|
||||
"movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
|
||||
"lea " MEMLEA(0x20,0) ",%0 \n"
|
||||
"lea 0x20(%0),%0 \n"
|
||||
"psrlw $0x8,%%xmm0 \n"
|
||||
"psrlw $0x8,%%xmm1 \n"
|
||||
"packuswb %%xmm1,%%xmm0 \n"
|
||||
"movdqu %%xmm0," MEMACCESS(1) " \n"
|
||||
"lea " MEMLEA(0x10,1) ",%1 \n"
|
||||
"lea 0x10(%1),%1 \n"
|
||||
"sub $0x10,%2 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src_ptr), // %0
|
||||
@ -133,14 +133,14 @@ void ScaleRowDown2Linear_SSSE3(const uint8* src_ptr,
|
||||
"1: \n"
|
||||
"movdqu " MEMACCESS(0) ",%%xmm0 \n"
|
||||
"movdqu " MEMACCESS2(0x10, 0) ",%%xmm1 \n"
|
||||
"lea " MEMLEA(0x20,0) ",%0 \n"
|
||||
"lea 0x20(%0),%0 \n"
|
||||
"pmaddubsw %%xmm4,%%xmm0 \n"
|
||||
"pmaddubsw %%xmm4,%%xmm1 \n"
|
||||
"pavgw %%xmm5,%%xmm0 \n"
|
||||
"pavgw %%xmm5,%%xmm1 \n"
|
||||
"packuswb %%xmm1,%%xmm0 \n"
|
||||
"movdqu %%xmm0," MEMACCESS(1) " \n"
|
||||
"lea " MEMLEA(0x10,1) ",%1 \n"
|
||||
"lea 0x10(%1),%1 \n"
|
||||
"sub $0x10,%2 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src_ptr), // %0
|
||||
@ -166,7 +166,7 @@ void ScaleRowDown2Box_SSSE3(const uint8* src_ptr,
|
||||
"movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
|
||||
MEMOPREG(movdqu,0x00,0,3,1,xmm2) // movdqu (%0,%3,1),%%xmm2
|
||||
MEMOPREG(movdqu,0x10,0,3,1,xmm3) // movdqu 0x10(%0,%3,1),%%xmm3
|
||||
"lea " MEMLEA(0x20,0) ",%0 \n"
|
||||
"lea 0x20(%0),%0 \n"
|
||||
"pmaddubsw %%xmm4,%%xmm0 \n"
|
||||
"pmaddubsw %%xmm4,%%xmm1 \n"
|
||||
"pmaddubsw %%xmm4,%%xmm2 \n"
|
||||
@ -179,7 +179,7 @@ void ScaleRowDown2Box_SSSE3(const uint8* src_ptr,
|
||||
"pavgw %%xmm5,%%xmm1 \n"
|
||||
"packuswb %%xmm1,%%xmm0 \n"
|
||||
"movdqu %%xmm0," MEMACCESS(1) " \n"
|
||||
"lea " MEMLEA(0x10,1) ",%1 \n"
|
||||
"lea 0x10(%1),%1 \n"
|
||||
"sub $0x10,%2 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src_ptr), // %0
|
||||
@ -202,13 +202,13 @@ void ScaleRowDown2_AVX2(const uint8* src_ptr,
|
||||
"1: \n"
|
||||
"vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
|
||||
"vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
|
||||
"lea " MEMLEA(0x40,0) ",%0 \n"
|
||||
"lea 0x40(%0),%0 \n"
|
||||
"vpsrlw $0x8,%%ymm0,%%ymm0 \n"
|
||||
"vpsrlw $0x8,%%ymm1,%%ymm1 \n"
|
||||
"vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
|
||||
"vpermq $0xd8,%%ymm0,%%ymm0 \n"
|
||||
"vmovdqu %%ymm0," MEMACCESS(1) " \n"
|
||||
"lea " MEMLEA(0x20,1) ",%1 \n"
|
||||
"lea 0x20(%1),%1 \n"
|
||||
"sub $0x20,%2 \n"
|
||||
"jg 1b \n"
|
||||
"vzeroupper \n"
|
||||
@ -234,7 +234,7 @@ void ScaleRowDown2Linear_AVX2(const uint8* src_ptr,
|
||||
"1: \n"
|
||||
"vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
|
||||
"vmovdqu " MEMACCESS2(0x20, 0) ",%%ymm1 \n"
|
||||
"lea " MEMLEA(0x40,0) ",%0 \n"
|
||||
"lea 0x40(%0),%0 \n"
|
||||
"vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n"
|
||||
"vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n"
|
||||
"vpavgw %%ymm5,%%ymm0,%%ymm0 \n"
|
||||
@ -242,7 +242,7 @@ void ScaleRowDown2Linear_AVX2(const uint8* src_ptr,
|
||||
"vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
|
||||
"vpermq $0xd8,%%ymm0,%%ymm0 \n"
|
||||
"vmovdqu %%ymm0," MEMACCESS(1) " \n"
|
||||
"lea " MEMLEA(0x20,1) ",%1 \n"
|
||||
"lea 0x20(%1),%1 \n"
|
||||
"sub $0x20,%2 \n"
|
||||
"jg 1b \n"
|
||||
"vzeroupper \n"
|
||||
@ -269,7 +269,7 @@ void ScaleRowDown2Box_AVX2(const uint8* src_ptr,
|
||||
"vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
|
||||
MEMOPREG(vmovdqu,0x00,0,3,1,ymm2) // vmovdqu (%0,%3,1),%%ymm2
|
||||
MEMOPREG(vmovdqu,0x20,0,3,1,ymm3) // vmovdqu 0x20(%0,%3,1),%%ymm3
|
||||
"lea " MEMLEA(0x40,0) ",%0 \n"
|
||||
"lea 0x40(%0),%0 \n"
|
||||
"vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n"
|
||||
"vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n"
|
||||
"vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n"
|
||||
@ -283,7 +283,7 @@ void ScaleRowDown2Box_AVX2(const uint8* src_ptr,
|
||||
"vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
|
||||
"vpermq $0xd8,%%ymm0,%%ymm0 \n"
|
||||
"vmovdqu %%ymm0," MEMACCESS(1) " \n"
|
||||
"lea " MEMLEA(0x20,1) ",%1 \n"
|
||||
"lea 0x20(%1),%1 \n"
|
||||
"sub $0x20,%2 \n"
|
||||
"jg 1b \n"
|
||||
"vzeroupper \n"
|
||||
@ -311,14 +311,14 @@ void ScaleRowDown4_SSSE3(const uint8* src_ptr,
|
||||
"1: \n"
|
||||
"movdqu " MEMACCESS(0) ",%%xmm0 \n"
|
||||
"movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
|
||||
"lea " MEMLEA(0x20,0) ",%0 \n"
|
||||
"lea 0x20(%0),%0 \n"
|
||||
"pand %%xmm5,%%xmm0 \n"
|
||||
"pand %%xmm5,%%xmm1 \n"
|
||||
"packuswb %%xmm1,%%xmm0 \n"
|
||||
"psrlw $0x8,%%xmm0 \n"
|
||||
"packuswb %%xmm0,%%xmm0 \n"
|
||||
"movq %%xmm0," MEMACCESS(1) " \n"
|
||||
"lea " MEMLEA(0x8,1) ",%1 \n"
|
||||
"lea 0x8(%1),%1 \n"
|
||||
"sub $0x8,%2 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src_ptr), // %0
|
||||
@ -339,7 +339,7 @@ void ScaleRowDown4Box_SSSE3(const uint8* src_ptr,
|
||||
"movdqa %%xmm4,%%xmm5 \n"
|
||||
"packuswb %%xmm4,%%xmm4 \n"
|
||||
"psllw $0x3,%%xmm5 \n"
|
||||
"lea " MEMLEA4(0x00,4,4,2) ",%3 \n"
|
||||
"lea 0x00(%4,%4,2),%3 \n"
|
||||
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
@ -361,7 +361,7 @@ void ScaleRowDown4Box_SSSE3(const uint8* src_ptr,
|
||||
"paddw %%xmm3,%%xmm1 \n"
|
||||
MEMOPREG(movdqu,0x00,0,3,1,xmm2) // movdqu (%0,%3,1),%%xmm2
|
||||
MEMOPREG(movdqu,0x10,0,3,1,xmm3) // movdqu 0x10(%0,%3,1),%%xmm3
|
||||
"lea " MEMLEA(0x20,0) ",%0 \n"
|
||||
"lea 0x20(%0),%0 \n"
|
||||
"pmaddubsw %%xmm4,%%xmm2 \n"
|
||||
"pmaddubsw %%xmm4,%%xmm3 \n"
|
||||
"paddw %%xmm2,%%xmm0 \n"
|
||||
@ -371,7 +371,7 @@ void ScaleRowDown4Box_SSSE3(const uint8* src_ptr,
|
||||
"psrlw $0x4,%%xmm0 \n"
|
||||
"packuswb %%xmm0,%%xmm0 \n"
|
||||
"movq %%xmm0," MEMACCESS(1) " \n"
|
||||
"lea " MEMLEA(0x8,1) ",%1 \n"
|
||||
"lea 0x8(%1),%1 \n"
|
||||
"sub $0x8,%2 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src_ptr), // %0
|
||||
@ -398,7 +398,7 @@ void ScaleRowDown4_AVX2(const uint8* src_ptr,
|
||||
"1: \n"
|
||||
"vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
|
||||
"vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
|
||||
"lea " MEMLEA(0x40,0) ",%0 \n"
|
||||
"lea 0x40(%0),%0 \n"
|
||||
"vpand %%ymm5,%%ymm0,%%ymm0 \n"
|
||||
"vpand %%ymm5,%%ymm1,%%ymm1 \n"
|
||||
"vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
|
||||
@ -407,7 +407,7 @@ void ScaleRowDown4_AVX2(const uint8* src_ptr,
|
||||
"vpackuswb %%ymm0,%%ymm0,%%ymm0 \n"
|
||||
"vpermq $0xd8,%%ymm0,%%ymm0 \n"
|
||||
"vmovdqu %%xmm0," MEMACCESS(1) " \n"
|
||||
"lea " MEMLEA(0x10,1) ",%1 \n"
|
||||
"lea 0x10(%1),%1 \n"
|
||||
"sub $0x10,%2 \n"
|
||||
"jg 1b \n"
|
||||
"vzeroupper \n"
|
||||
@ -448,7 +448,7 @@ void ScaleRowDown4Box_AVX2(const uint8* src_ptr,
|
||||
"vpaddw %%ymm3,%%ymm1,%%ymm1 \n"
|
||||
MEMOPREG(vmovdqu,0x00,0,4,1,ymm2) // vmovdqu (%0,%4,1),%%ymm2
|
||||
MEMOPREG(vmovdqu,0x20,0,4,1,ymm3) // vmovdqu 0x20(%0,%4,1),%%ymm3
|
||||
"lea " MEMLEA(0x40,0) ",%0 \n"
|
||||
"lea 0x40(%0),%0 \n"
|
||||
"vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n"
|
||||
"vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n"
|
||||
"vpaddw %%ymm2,%%ymm0,%%ymm0 \n"
|
||||
@ -460,7 +460,7 @@ void ScaleRowDown4Box_AVX2(const uint8* src_ptr,
|
||||
"vpackuswb %%ymm0,%%ymm0,%%ymm0 \n"
|
||||
"vpermq $0xd8,%%ymm0,%%ymm0 \n"
|
||||
"vmovdqu %%xmm0," MEMACCESS(1) " \n"
|
||||
"lea " MEMLEA(0x10,1) ",%1 \n"
|
||||
"lea 0x10(%1),%1 \n"
|
||||
"sub $0x10,%2 \n"
|
||||
"jg 1b \n"
|
||||
"vzeroupper \n"
|
||||
@ -494,7 +494,7 @@ void ScaleRowDown34_SSSE3(const uint8* src_ptr,
|
||||
"1: \n"
|
||||
"movdqu " MEMACCESS(0) ",%%xmm0 \n"
|
||||
"movdqu " MEMACCESS2(0x10,0) ",%%xmm2 \n"
|
||||
"lea " MEMLEA(0x20,0) ",%0 \n"
|
||||
"lea 0x20(%0),%0 \n"
|
||||
"movdqa %%xmm2,%%xmm1 \n"
|
||||
"palignr $0x8,%%xmm0,%%xmm1 \n"
|
||||
"pshufb %%xmm3,%%xmm0 \n"
|
||||
@ -503,7 +503,7 @@ void ScaleRowDown34_SSSE3(const uint8* src_ptr,
|
||||
"movq %%xmm0," MEMACCESS(1) " \n"
|
||||
"movq %%xmm1," MEMACCESS2(0x8,1) " \n"
|
||||
"movq %%xmm2," MEMACCESS2(0x10,1) " \n"
|
||||
"lea " MEMLEA(0x18,1) ",%1 \n"
|
||||
"lea 0x18(%1),%1 \n"
|
||||
"sub $0x18,%2 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src_ptr), // %0
|
||||
@ -558,7 +558,7 @@ void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,
|
||||
"movq %%xmm6," MEMACCESS2(0x8,1) " \n"
|
||||
"movdqu " MEMACCESS2(0x10,0) ",%%xmm6 \n"
|
||||
MEMOPREG(movdqu,0x10,0,3,1,xmm7) // movdqu 0x10(%0,%3),%%xmm7
|
||||
"lea " MEMLEA(0x20,0) ",%0 \n"
|
||||
"lea 0x20(%0),%0 \n"
|
||||
"pavgb %%xmm7,%%xmm6 \n"
|
||||
"pshufb %%xmm4,%%xmm6 \n"
|
||||
"pmaddubsw %4,%%xmm6 \n"
|
||||
@ -566,7 +566,7 @@ void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,
|
||||
"psrlw $0x2,%%xmm6 \n"
|
||||
"packuswb %%xmm6,%%xmm6 \n"
|
||||
"movq %%xmm6," MEMACCESS2(0x10,1) " \n"
|
||||
"lea " MEMLEA(0x18,1) ",%1 \n"
|
||||
"lea 0x18(%1),%1 \n"
|
||||
"sub $0x18,%2 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src_ptr), // %0
|
||||
@ -627,7 +627,7 @@ void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr,
|
||||
"movq %%xmm6," MEMACCESS2(0x8,1) " \n"
|
||||
"movdqu " MEMACCESS2(0x10,0) ",%%xmm6 \n"
|
||||
MEMOPREG(movdqu,0x10,0,3,1,xmm7) // movdqu 0x10(%0,%3,1),%%xmm7
|
||||
"lea " MEMLEA(0x20,0) ",%0 \n"
|
||||
"lea 0x20(%0),%0 \n"
|
||||
"pavgb %%xmm6,%%xmm7 \n"
|
||||
"pavgb %%xmm7,%%xmm6 \n"
|
||||
"pshufb %%xmm4,%%xmm6 \n"
|
||||
@ -636,7 +636,7 @@ void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr,
|
||||
"psrlw $0x2,%%xmm6 \n"
|
||||
"packuswb %%xmm6,%%xmm6 \n"
|
||||
"movq %%xmm6," MEMACCESS2(0x10,1) " \n"
|
||||
"lea " MEMLEA(0x18,1) ",%1 \n"
|
||||
"lea 0x18(%1),%1 \n"
|
||||
"sub $0x18,%2 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src_ptr), // %0
|
||||
@ -662,14 +662,14 @@ void ScaleRowDown38_SSSE3(const uint8* src_ptr,
|
||||
"1: \n"
|
||||
"movdqu " MEMACCESS(0) ",%%xmm0 \n"
|
||||
"movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
|
||||
"lea " MEMLEA(0x20,0) ",%0 \n"
|
||||
"lea 0x20(%0),%0 \n"
|
||||
"pshufb %%xmm4,%%xmm0 \n"
|
||||
"pshufb %%xmm5,%%xmm1 \n"
|
||||
"paddusb %%xmm1,%%xmm0 \n"
|
||||
"movq %%xmm0," MEMACCESS(1) " \n"
|
||||
"movhlps %%xmm0,%%xmm1 \n"
|
||||
"movd %%xmm1," MEMACCESS2(0x8,1) " \n"
|
||||
"lea " MEMLEA(0xc,1) ",%1 \n"
|
||||
"lea 0xc(%1),%1 \n"
|
||||
"sub $0xc,%2 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src_ptr), // %0
|
||||
@ -701,7 +701,7 @@ void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr,
|
||||
"1: \n"
|
||||
"movdqu " MEMACCESS(0) ",%%xmm0 \n"
|
||||
MEMOPREG(movdqu,0x00,0,3,1,xmm1) // movdqu (%0,%3,1),%%xmm1
|
||||
"lea " MEMLEA(0x10,0) ",%0 \n"
|
||||
"lea 0x10(%0),%0 \n"
|
||||
"pavgb %%xmm1,%%xmm0 \n"
|
||||
"movdqa %%xmm0,%%xmm1 \n"
|
||||
"pshufb %%xmm2,%%xmm1 \n"
|
||||
@ -715,7 +715,7 @@ void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr,
|
||||
"movd %%xmm1," MEMACCESS(1) " \n"
|
||||
"psrlq $0x10,%%xmm1 \n"
|
||||
"movd %%xmm1," MEMACCESS2(0x2,1) " \n"
|
||||
"lea " MEMLEA(0x6,1) ",%1 \n"
|
||||
"lea 0x6(%1),%1 \n"
|
||||
"sub $0x6,%2 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src_ptr), // %0
|
||||
@ -755,7 +755,7 @@ void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr,
|
||||
"paddusw %%xmm6,%%xmm0 \n"
|
||||
"paddusw %%xmm7,%%xmm1 \n"
|
||||
MEMOPREG(movdqu,0x00,0,3,2,xmm6) // movdqu (%0,%3,2),%%xmm6
|
||||
"lea " MEMLEA(0x10,0) ",%0 \n"
|
||||
"lea 0x10(%0),%0 \n"
|
||||
"movhlps %%xmm6,%%xmm7 \n"
|
||||
"punpcklbw %%xmm5,%%xmm6 \n"
|
||||
"punpcklbw %%xmm5,%%xmm7 \n"
|
||||
@ -779,7 +779,7 @@ void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr,
|
||||
"movd %%xmm6," MEMACCESS(1) " \n"
|
||||
"psrlq $0x10,%%xmm6 \n"
|
||||
"movd %%xmm6," MEMACCESS2(0x2,1) " \n"
|
||||
"lea " MEMLEA(0x6,1) ",%1 \n"
|
||||
"lea 0x6(%1),%1 \n"
|
||||
"sub $0x6,%2 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src_ptr), // %0
|
||||
@ -799,7 +799,7 @@ void ScaleAddRow_SSE2(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movdqu " MEMACCESS(0) ",%%xmm3 \n"
|
||||
"lea " MEMLEA(0x10,0) ",%0 \n" // src_ptr += 16
|
||||
"lea 0x10(%0),%0 \n" // src_ptr += 16
|
||||
"movdqu " MEMACCESS(1) ",%%xmm0 \n"
|
||||
"movdqu " MEMACCESS2(0x10,1) ",%%xmm1 \n"
|
||||
"movdqa %%xmm3,%%xmm2 \n"
|
||||
@ -809,7 +809,7 @@ void ScaleAddRow_SSE2(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
|
||||
"paddusw %%xmm3,%%xmm1 \n"
|
||||
"movdqu %%xmm0," MEMACCESS(1) " \n"
|
||||
"movdqu %%xmm1," MEMACCESS2(0x10,1) " \n"
|
||||
"lea " MEMLEA(0x20,1) ",%1 \n"
|
||||
"lea 0x20(%1),%1 \n"
|
||||
"sub $0x10,%2 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src_ptr), // %0
|
||||
@ -829,7 +829,7 @@ void ScaleAddRow_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"vmovdqu " MEMACCESS(0) ",%%ymm3 \n"
|
||||
"lea " MEMLEA(0x20,0) ",%0 \n" // src_ptr += 32
|
||||
"lea 0x20(%0),%0 \n" // src_ptr += 32
|
||||
"vpermq $0xd8,%%ymm3,%%ymm3 \n"
|
||||
"vpunpcklbw %%ymm5,%%ymm3,%%ymm2 \n"
|
||||
"vpunpckhbw %%ymm5,%%ymm3,%%ymm3 \n"
|
||||
@ -837,7 +837,7 @@ void ScaleAddRow_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
|
||||
"vpaddusw " MEMACCESS2(0x20,1) ",%%ymm3,%%ymm1 \n"
|
||||
"vmovdqu %%ymm0," MEMACCESS(1) " \n"
|
||||
"vmovdqu %%ymm1," MEMACCESS2(0x20,1) " \n"
|
||||
"lea " MEMLEA(0x40,1) ",%1 \n"
|
||||
"lea 0x40(%1),%1 \n"
|
||||
"sub $0x20,%2 \n"
|
||||
"jg 1b \n"
|
||||
"vzeroupper \n"
|
||||
@ -908,7 +908,7 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr,
|
||||
"packuswb %%xmm1,%%xmm1 \n"
|
||||
"movd %%xmm1,%k2 \n"
|
||||
"mov %w2," MEMACCESS(0) " \n"
|
||||
"lea " MEMLEA(0x2,0) ",%0 \n"
|
||||
"lea 0x2(%0),%0 \n"
|
||||
"subl $0x2,%5 \n"
|
||||
"jge 2b \n"
|
||||
|
||||
@ -967,13 +967,13 @@ void ScaleColsUp2_SSE2(uint8* dst_ptr,
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movdqu " MEMACCESS(1) ",%%xmm0 \n"
|
||||
"lea " MEMLEA(0x10,1) ",%1 \n"
|
||||
"lea 0x10(%1),%1 \n"
|
||||
"movdqa %%xmm0,%%xmm1 \n"
|
||||
"punpcklbw %%xmm0,%%xmm0 \n"
|
||||
"punpckhbw %%xmm1,%%xmm1 \n"
|
||||
"movdqu %%xmm0," MEMACCESS(0) " \n"
|
||||
"movdqu %%xmm1," MEMACCESS2(0x10,0) " \n"
|
||||
"lea " MEMLEA(0x20,0) ",%0 \n"
|
||||
"lea 0x20(%0),%0 \n"
|
||||
"sub $0x20,%2 \n"
|
||||
"jg 1b \n"
|
||||
|
||||
@ -994,10 +994,10 @@ void ScaleARGBRowDown2_SSE2(const uint8* src_argb,
|
||||
"1: \n"
|
||||
"movdqu " MEMACCESS(0) ",%%xmm0 \n"
|
||||
"movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
|
||||
"lea " MEMLEA(0x20,0) ",%0 \n"
|
||||
"lea 0x20(%0),%0 \n"
|
||||
"shufps $0xdd,%%xmm1,%%xmm0 \n"
|
||||
"movdqu %%xmm0," MEMACCESS(1) " \n"
|
||||
"lea " MEMLEA(0x10,1) ",%1 \n"
|
||||
"lea 0x10(%1),%1 \n"
|
||||
"sub $0x4,%2 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src_argb), // %0
|
||||
@ -1017,13 +1017,13 @@ void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb,
|
||||
"1: \n"
|
||||
"movdqu " MEMACCESS(0) ",%%xmm0 \n"
|
||||
"movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
|
||||
"lea " MEMLEA(0x20,0) ",%0 \n"
|
||||
"lea 0x20(%0),%0 \n"
|
||||
"movdqa %%xmm0,%%xmm2 \n"
|
||||
"shufps $0x88,%%xmm1,%%xmm0 \n"
|
||||
"shufps $0xdd,%%xmm1,%%xmm2 \n"
|
||||
"pavgb %%xmm2,%%xmm0 \n"
|
||||
"movdqu %%xmm0," MEMACCESS(1) " \n"
|
||||
"lea " MEMLEA(0x10,1) ",%1 \n"
|
||||
"lea 0x10(%1),%1 \n"
|
||||
"sub $0x4,%2 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src_argb), // %0
|
||||
@ -1044,7 +1044,7 @@ void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb,
|
||||
"movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
|
||||
MEMOPREG(movdqu,0x00,0,3,1,xmm2) // movdqu (%0,%3,1),%%xmm2
|
||||
MEMOPREG(movdqu,0x10,0,3,1,xmm3) // movdqu 0x10(%0,%3,1),%%xmm3
|
||||
"lea " MEMLEA(0x20,0) ",%0 \n"
|
||||
"lea 0x20(%0),%0 \n"
|
||||
"pavgb %%xmm2,%%xmm0 \n"
|
||||
"pavgb %%xmm3,%%xmm1 \n"
|
||||
"movdqa %%xmm0,%%xmm2 \n"
|
||||
@ -1052,7 +1052,7 @@ void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb,
|
||||
"shufps $0xdd,%%xmm1,%%xmm2 \n"
|
||||
"pavgb %%xmm2,%%xmm0 \n"
|
||||
"movdqu %%xmm0," MEMACCESS(1) " \n"
|
||||
"lea " MEMLEA(0x10,1) ",%1 \n"
|
||||
"lea 0x10(%1),%1 \n"
|
||||
"sub $0x4,%2 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src_argb), // %0
|
||||
@ -1075,8 +1075,8 @@ void ScaleARGBRowDownEven_SSE2(const uint8* src_argb,
|
||||
intptr_t src_stepx_x12;
|
||||
(void)src_stride;
|
||||
asm volatile (
|
||||
"lea " MEMLEA3(0x00,1,4) ",%1 \n"
|
||||
"lea " MEMLEA4(0x00,1,1,2) ",%4 \n"
|
||||
"lea 0x00(,%1,4),%1 \n"
|
||||
"lea 0x00(%1,%1,2),%4 \n"
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movd " MEMACCESS(0) ",%%xmm0 \n"
|
||||
@ -1084,11 +1084,11 @@ void ScaleARGBRowDownEven_SSE2(const uint8* src_argb,
|
||||
"punpckldq %%xmm1,%%xmm0 \n"
|
||||
MEMOPREG(movd,0x00,0,1,2,xmm2) // movd (%0,%1,2),%%xmm2
|
||||
MEMOPREG(movd,0x00,0,4,1,xmm3) // movd (%0,%4,1),%%xmm3
|
||||
"lea " MEMLEA4(0x00,0,1,4) ",%0 \n"
|
||||
"lea 0x00(%0,%1,4),%0 \n"
|
||||
"punpckldq %%xmm3,%%xmm2 \n"
|
||||
"punpcklqdq %%xmm2,%%xmm0 \n"
|
||||
"movdqu %%xmm0," MEMACCESS(2) " \n"
|
||||
"lea " MEMLEA(0x10,2) ",%2 \n"
|
||||
"lea 0x10(%2),%2 \n"
|
||||
"sub $0x4,%3 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src_argb), // %0
|
||||
@ -1112,9 +1112,9 @@ void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,
|
||||
intptr_t src_stepx_x12;
|
||||
intptr_t row1 = (intptr_t)(src_stride);
|
||||
asm volatile (
|
||||
"lea " MEMLEA3(0x00,1,4) ",%1 \n"
|
||||
"lea " MEMLEA4(0x00,1,1,2) ",%4 \n"
|
||||
"lea " MEMLEA4(0x00,0,5,1) ",%5 \n"
|
||||
"lea 0x00(,%1,4),%1 \n"
|
||||
"lea 0x00(%1,%1,2),%4 \n"
|
||||
"lea 0x00(%0,%5,1),%5 \n"
|
||||
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
@ -1122,12 +1122,12 @@ void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,
|
||||
MEMOPREG(movhps,0x00,0,1,1,xmm0) // movhps (%0,%1,1),%%xmm0
|
||||
MEMOPREG(movq,0x00,0,1,2,xmm1) // movq (%0,%1,2),%%xmm1
|
||||
MEMOPREG(movhps,0x00,0,4,1,xmm1) // movhps (%0,%4,1),%%xmm1
|
||||
"lea " MEMLEA4(0x00,0,1,4) ",%0 \n"
|
||||
"lea 0x00(%0,%1,4),%0 \n"
|
||||
"movq " MEMACCESS(5) ",%%xmm2 \n"
|
||||
MEMOPREG(movhps,0x00,5,1,1,xmm2) // movhps (%5,%1,1),%%xmm2
|
||||
MEMOPREG(movq,0x00,5,1,2,xmm3) // movq (%5,%1,2),%%xmm3
|
||||
MEMOPREG(movhps,0x00,5,4,1,xmm3) // movhps (%5,%4,1),%%xmm3
|
||||
"lea " MEMLEA4(0x00,5,1,4) ",%5 \n"
|
||||
"lea 0x00(%5,%1,4),%5 \n"
|
||||
"pavgb %%xmm2,%%xmm0 \n"
|
||||
"pavgb %%xmm3,%%xmm1 \n"
|
||||
"movdqa %%xmm0,%%xmm2 \n"
|
||||
@ -1135,7 +1135,7 @@ void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,
|
||||
"shufps $0xdd,%%xmm1,%%xmm2 \n"
|
||||
"pavgb %%xmm2,%%xmm0 \n"
|
||||
"movdqu %%xmm0," MEMACCESS(2) " \n"
|
||||
"lea " MEMLEA(0x10,2) ",%2 \n"
|
||||
"lea 0x10(%2),%2 \n"
|
||||
"sub $0x4,%3 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src_argb), // %0
|
||||
@ -1188,7 +1188,7 @@ void ScaleARGBCols_SSE2(uint8* dst_argb,
|
||||
"punpckldq %%xmm4,%%xmm1 \n"
|
||||
"punpcklqdq %%xmm1,%%xmm0 \n"
|
||||
"movdqu %%xmm0," MEMACCESS(2) " \n"
|
||||
"lea " MEMLEA(0x10,2) ",%2 \n"
|
||||
"lea 0x10(%2),%2 \n"
|
||||
"sub $0x4,%4 \n"
|
||||
"jge 40b \n"
|
||||
|
||||
@ -1200,7 +1200,7 @@ void ScaleARGBCols_SSE2(uint8* dst_argb,
|
||||
"pextrw $0x5,%%xmm2,%k0 \n"
|
||||
"punpckldq %%xmm1,%%xmm0 \n"
|
||||
"movq %%xmm0," MEMACCESS(2) " \n"
|
||||
"lea " MEMLEA(0x8,2) ",%2 \n"
|
||||
"lea 0x8(%2),%2 \n"
|
||||
"29: \n"
|
||||
"test $0x1,%4 \n"
|
||||
"je 99f \n"
|
||||
@ -1232,13 +1232,13 @@ void ScaleARGBColsUp2_SSE2(uint8* dst_argb,
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movdqu " MEMACCESS(1) ",%%xmm0 \n"
|
||||
"lea " MEMLEA(0x10,1) ",%1 \n"
|
||||
"lea 0x10(%1),%1 \n"
|
||||
"movdqa %%xmm0,%%xmm1 \n"
|
||||
"punpckldq %%xmm0,%%xmm0 \n"
|
||||
"punpckhdq %%xmm1,%%xmm1 \n"
|
||||
"movdqu %%xmm0," MEMACCESS(0) " \n"
|
||||
"movdqu %%xmm1," MEMACCESS2(0x10,0) " \n"
|
||||
"lea " MEMLEA(0x20,0) ",%0 \n"
|
||||
"lea 0x20(%0),%0 \n"
|
||||
"sub $0x8,%2 \n"
|
||||
"jg 1b \n"
|
||||
|
||||
@ -1307,7 +1307,7 @@ void ScaleARGBFilterCols_SSSE3(uint8* dst_argb,
|
||||
"pextrw $0x3,%%xmm2,%k4 \n"
|
||||
"packuswb %%xmm0,%%xmm0 \n"
|
||||
"movq %%xmm0," MEMACCESS(0) " \n"
|
||||
"lea " MEMLEA(0x8,0) ",%0 \n"
|
||||
"lea 0x8(%0),%0 \n"
|
||||
"sub $0x2,%2 \n"
|
||||
"jge 2b \n"
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user