Remove MEMOPREG x64 NaCL macros

MEMOPREG macros are deprecated in row.h

Regular expressions to remove MEMOPREG macros:

MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1)                            \
MEMOPREG\((.*), (.*), (.*), (.*), (.*), (.*)\)
"\1    \2(%\3,%\4,\5),%%\6            \\n"

MEMOPREG(movdqu,0x00,1,4,1,xmm2)
MEMOPREG\((.*),(.*),(.*),(.*),(.*),(.*)\)
"\1    \2(%\3,%\4,\5),%%\6            \\n"

MEMOPREG(movdqu,0x00,1,4,1,xmm2)
MEMOPREG\((.*),(.*),(.*),(.*),(.*),(.*)\)(.*)(//.*)
"\1    \2(%\3,%\4,\5),%%\6           \\n"

TBR=braveyao@chromium.org

Bug: libyuv:702
Test: try bots pass
Change-Id: If8743abd9af2e8c549d0c7d3d49733a9b0f0ca86
Reviewed-on: https://chromium-review.googlesource.com/865964
Reviewed-by: Frank Barchard <fbarchard@chromium.org>
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
Frank Barchard 2018-01-12 17:44:21 -08:00 committed by Commit Bot
parent b33e0f97e7
commit ecab5430c2
5 changed files with 134 additions and 139 deletions

View File

@ -1,6 +1,6 @@
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 1688 Version: 1689
License: BSD License: BSD
License File: LICENSE License File: LICENSE

View File

@ -169,7 +169,6 @@ extern "C" {
// The following are available on all x86 platforms, but // The following are available on all x86 platforms, but
// require VS2012, clang 3.4 or gcc 4.7. // require VS2012, clang 3.4 or gcc 4.7.
// The code supports NaCL but requires a new compiler and validator.
#if !defined(LIBYUV_DISABLE_X86) && \ #if !defined(LIBYUV_DISABLE_X86) && \
(defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2) || \ (defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2) || \
defined(GCC_HAS_AVX2)) defined(GCC_HAS_AVX2))
@ -575,10 +574,6 @@ extern const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants); // BT.709
#define LABELALIGN #define LABELALIGN
#endif #endif
// NaCL macros for GCC x64 - deprecated.
#define MEMOPREG(opcode, offset, base, index, scale, reg) \
#opcode " " #offset "(%" #base ",%" #index "," #scale "),%%" #reg "\n"
// Intel Code Analizer markers. Insert IACA_START IACA_END around code to be // Intel Code Analizer markers. Insert IACA_START IACA_END around code to be
// measured and then run with iaca -64 libyuv_unittest. // measured and then run with iaca -64 libyuv_unittest.
// IACA_ASM_START amd IACA_ASM_END are equivalents that can be used within // IACA_ASM_START amd IACA_ASM_END are equivalents that can be used within

View File

@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ #ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1688 #define LIBYUV_VERSION 1689
#endif // INCLUDE_LIBYUV_VERSION_H_ #endif // INCLUDE_LIBYUV_VERSION_H_

View File

@ -987,16 +987,16 @@ void ARGBToUVRow_SSSE3(const uint8* src_argb0,
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"movdqu (%0),%%xmm0 \n" "movdqu (%0),%%xmm0 \n"
MEMOPREG(movdqu,0x00,0,4,1,xmm7) // movdqu (%0,%4,1),%%xmm7 "movdqu 0x00(%0,%4,1),%%xmm7 \n"
"pavgb %%xmm7,%%xmm0 \n" "pavgb %%xmm7,%%xmm0 \n"
"movdqu 0x10(%0),%%xmm1 \n" "movdqu 0x10(%0),%%xmm1 \n"
MEMOPREG(movdqu,0x10,0,4,1,xmm7) // movdqu 0x10(%0,%4,1),%%xmm7 "movdqu 0x10(%0,%4,1),%%xmm7 \n"
"pavgb %%xmm7,%%xmm1 \n" "pavgb %%xmm7,%%xmm1 \n"
"movdqu 0x20(%0),%%xmm2 \n" "movdqu 0x20(%0),%%xmm2 \n"
MEMOPREG(movdqu,0x20,0,4,1,xmm7) // movdqu 0x20(%0,%4,1),%%xmm7 "movdqu 0x20(%0,%4,1),%%xmm7 \n"
"pavgb %%xmm7,%%xmm2 \n" "pavgb %%xmm7,%%xmm2 \n"
"movdqu 0x30(%0),%%xmm6 \n" "movdqu 0x30(%0),%%xmm6 \n"
MEMOPREG(movdqu,0x30,0,4,1,xmm7) // movdqu 0x30(%0,%4,1),%%xmm7 "movdqu 0x30(%0,%4,1),%%xmm7 \n"
"pavgb %%xmm7,%%xmm6 \n" "pavgb %%xmm7,%%xmm6 \n"
"lea 0x40(%0),%0 \n" "lea 0x40(%0),%0 \n"
@ -1187,16 +1187,16 @@ void ARGBToUVJRow_SSSE3(const uint8* src_argb0,
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"movdqu (%0),%%xmm0 \n" "movdqu (%0),%%xmm0 \n"
MEMOPREG(movdqu,0x00,0,4,1,xmm7) // movdqu (%0,%4,1),%%xmm7 "movdqu 0x00(%0,%4,1),%%xmm7 \n"
"pavgb %%xmm7,%%xmm0 \n" "pavgb %%xmm7,%%xmm0 \n"
"movdqu 0x10(%0),%%xmm1 \n" "movdqu 0x10(%0),%%xmm1 \n"
MEMOPREG(movdqu,0x10,0,4,1,xmm7) // movdqu 0x10(%0,%4,1),%%xmm7 "movdqu 0x10(%0,%4,1),%%xmm7 \n"
"pavgb %%xmm7,%%xmm1 \n" "pavgb %%xmm7,%%xmm1 \n"
"movdqu 0x20(%0),%%xmm2 \n" "movdqu 0x20(%0),%%xmm2 \n"
MEMOPREG(movdqu,0x20,0,4,1,xmm7) // movdqu 0x20(%0,%4,1),%%xmm7 "movdqu 0x20(%0,%4,1),%%xmm7 \n"
"pavgb %%xmm7,%%xmm2 \n" "pavgb %%xmm7,%%xmm2 \n"
"movdqu 0x30(%0),%%xmm6 \n" "movdqu 0x30(%0),%%xmm6 \n"
MEMOPREG(movdqu,0x30,0,4,1,xmm7) // movdqu 0x30(%0,%4,1),%%xmm7 "movdqu 0x30(%0,%4,1),%%xmm7 \n"
"pavgb %%xmm7,%%xmm6 \n" "pavgb %%xmm7,%%xmm6 \n"
"lea 0x40(%0),%0 \n" "lea 0x40(%0),%0 \n"
@ -1349,16 +1349,16 @@ void BGRAToUVRow_SSSE3(const uint8* src_bgra0,
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"movdqu (%0),%%xmm0 \n" "movdqu (%0),%%xmm0 \n"
MEMOPREG(movdqu,0x00,0,4,1,xmm7) // movdqu (%0,%4,1),%%xmm7 "movdqu 0x00(%0,%4,1),%%xmm7 \n"
"pavgb %%xmm7,%%xmm0 \n" "pavgb %%xmm7,%%xmm0 \n"
"movdqu 0x10(%0),%%xmm1 \n" "movdqu 0x10(%0),%%xmm1 \n"
MEMOPREG(movdqu,0x10,0,4,1,xmm7) // movdqu 0x10(%0,%4,1),%%xmm7 "movdqu 0x10(%0,%4,1),%%xmm7 \n"
"pavgb %%xmm7,%%xmm1 \n" "pavgb %%xmm7,%%xmm1 \n"
"movdqu 0x20(%0),%%xmm2 \n" "movdqu 0x20(%0),%%xmm2 \n"
MEMOPREG(movdqu,0x20,0,4,1,xmm7) // movdqu 0x20(%0,%4,1),%%xmm7 "movdqu 0x20(%0,%4,1),%%xmm7 \n"
"pavgb %%xmm7,%%xmm2 \n" "pavgb %%xmm7,%%xmm2 \n"
"movdqu 0x30(%0),%%xmm6 \n" "movdqu 0x30(%0),%%xmm6 \n"
MEMOPREG(movdqu,0x30,0,4,1,xmm7) // movdqu 0x30(%0,%4,1),%%xmm7 "movdqu 0x30(%0,%4,1),%%xmm7 \n"
"pavgb %%xmm7,%%xmm6 \n" "pavgb %%xmm7,%%xmm6 \n"
"lea 0x40(%0),%0 \n" "lea 0x40(%0),%0 \n"
@ -1484,16 +1484,16 @@ void ABGRToUVRow_SSSE3(const uint8* src_abgr0,
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"movdqu (%0),%%xmm0 \n" "movdqu (%0),%%xmm0 \n"
MEMOPREG(movdqu,0x00,0,4,1,xmm7) // movdqu (%0,%4,1),%%xmm7 "movdqu 0x00(%0,%4,1),%%xmm7 \n"
"pavgb %%xmm7,%%xmm0 \n" "pavgb %%xmm7,%%xmm0 \n"
"movdqu 0x10(%0),%%xmm1 \n" "movdqu 0x10(%0),%%xmm1 \n"
MEMOPREG(movdqu,0x10,0,4,1,xmm7) // movdqu 0x10(%0,%4,1),%%xmm7 "movdqu 0x10(%0,%4,1),%%xmm7 \n"
"pavgb %%xmm7,%%xmm1 \n" "pavgb %%xmm7,%%xmm1 \n"
"movdqu 0x20(%0),%%xmm2 \n" "movdqu 0x20(%0),%%xmm2 \n"
MEMOPREG(movdqu,0x20,0,4,1,xmm7) // movdqu 0x20(%0,%4,1),%%xmm7 "movdqu 0x20(%0,%4,1),%%xmm7 \n"
"pavgb %%xmm7,%%xmm2 \n" "pavgb %%xmm7,%%xmm2 \n"
"movdqu 0x30(%0),%%xmm6 \n" "movdqu 0x30(%0),%%xmm6 \n"
MEMOPREG(movdqu,0x30,0,4,1,xmm7) // movdqu 0x30(%0,%4,1),%%xmm7 "movdqu 0x30(%0,%4,1),%%xmm7 \n"
"pavgb %%xmm7,%%xmm6 \n" "pavgb %%xmm7,%%xmm6 \n"
"lea 0x40(%0),%0 \n" "lea 0x40(%0),%0 \n"
@ -1549,16 +1549,16 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0,
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"movdqu (%0),%%xmm0 \n" "movdqu (%0),%%xmm0 \n"
MEMOPREG(movdqu,0x00,0,4,1,xmm7) // movdqu (%0,%4,1),%%xmm7 "movdqu 0x00(%0,%4,1),%%xmm7 \n"
"pavgb %%xmm7,%%xmm0 \n" "pavgb %%xmm7,%%xmm0 \n"
"movdqu 0x10(%0),%%xmm1 \n" "movdqu 0x10(%0),%%xmm1 \n"
MEMOPREG(movdqu,0x10,0,4,1,xmm7) // movdqu 0x10(%0,%4,1),%%xmm7 "movdqu 0x10(%0,%4,1),%%xmm7 \n"
"pavgb %%xmm7,%%xmm1 \n" "pavgb %%xmm7,%%xmm1 \n"
"movdqu 0x20(%0),%%xmm2 \n" "movdqu 0x20(%0),%%xmm2 \n"
MEMOPREG(movdqu,0x20,0,4,1,xmm7) // movdqu 0x20(%0,%4,1),%%xmm7 "movdqu 0x20(%0,%4,1),%%xmm7 \n"
"pavgb %%xmm7,%%xmm2 \n" "pavgb %%xmm7,%%xmm2 \n"
"movdqu 0x30(%0),%%xmm6 \n" "movdqu 0x30(%0),%%xmm6 \n"
MEMOPREG(movdqu,0x30,0,4,1,xmm7) // movdqu 0x30(%0,%4,1),%%xmm7 "movdqu 0x30(%0,%4,1),%%xmm7 \n"
"pavgb %%xmm7,%%xmm6 \n" "pavgb %%xmm7,%%xmm6 \n"
"lea 0x40(%0),%0 \n" "lea 0x40(%0),%0 \n"
@ -1605,7 +1605,7 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0,
// Read 8 UV from 444 // Read 8 UV from 444
#define READYUV444 \ #define READYUV444 \
"movq (%[u_buf]),%%xmm0 \n" \ "movq (%[u_buf]),%%xmm0 \n" \
MEMOPREG(movq, 0x00, [u_buf], [v_buf], 1, xmm1) \ "movq 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \
"lea 0x8(%[u_buf]),%[u_buf] \n" \ "lea 0x8(%[u_buf]),%[u_buf] \n" \
"punpcklbw %%xmm1,%%xmm0 \n" \ "punpcklbw %%xmm1,%%xmm0 \n" \
"movq (%[y_buf]),%%xmm4 \n" \ "movq (%[y_buf]),%%xmm4 \n" \
@ -1615,7 +1615,7 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0,
// Read 4 UV from 422, upsample to 8 UV // Read 4 UV from 422, upsample to 8 UV
#define READYUV422 \ #define READYUV422 \
"movd (%[u_buf]),%%xmm0 \n" \ "movd (%[u_buf]),%%xmm0 \n" \
MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) \ "movd 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \
"lea 0x4(%[u_buf]),%[u_buf] \n" \ "lea 0x4(%[u_buf]),%[u_buf] \n" \
"punpcklbw %%xmm1,%%xmm0 \n" \ "punpcklbw %%xmm1,%%xmm0 \n" \
"punpcklwd %%xmm0,%%xmm0 \n" \ "punpcklwd %%xmm0,%%xmm0 \n" \
@ -1629,7 +1629,7 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0,
// TODO(fbarchard): Consider pmullw to replace psllw and allow different bits. // TODO(fbarchard): Consider pmullw to replace psllw and allow different bits.
#define READYUV210 \ #define READYUV210 \
"movq (%[u_buf]),%%xmm0 \n" \ "movq (%[u_buf]),%%xmm0 \n" \
MEMOPREG(movq, 0x00, [u_buf], [v_buf], 1, xmm1) \ "movq 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \
"lea 0x8(%[u_buf]),%[u_buf] \n" \ "lea 0x8(%[u_buf]),%[u_buf] \n" \
"punpcklwd %%xmm1,%%xmm0 \n" \ "punpcklwd %%xmm1,%%xmm0 \n" \
"psraw $0x2,%%xmm0 \n" \ "psraw $0x2,%%xmm0 \n" \
@ -1642,7 +1642,7 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0,
// Read 4 UV from 422, upsample to 8 UV. With 8 Alpha. // Read 4 UV from 422, upsample to 8 UV. With 8 Alpha.
#define READYUVA422 \ #define READYUVA422 \
"movd (%[u_buf]),%%xmm0 \n" \ "movd (%[u_buf]),%%xmm0 \n" \
MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) \ "movd 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \
"lea 0x4(%[u_buf]),%[u_buf] \n" \ "lea 0x4(%[u_buf]),%[u_buf] \n" \
"punpcklbw %%xmm1,%%xmm0 \n" \ "punpcklbw %%xmm1,%%xmm0 \n" \
"punpcklwd %%xmm0,%%xmm0 \n" \ "punpcklwd %%xmm0,%%xmm0 \n" \
@ -2093,7 +2093,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf,
// Read 16 UV from 444 // Read 16 UV from 444
#define READYUV444_AVX2 \ #define READYUV444_AVX2 \
"vmovdqu (%[u_buf]),%%xmm0 \n" \ "vmovdqu (%[u_buf]),%%xmm0 \n" \
MEMOPREG(vmovdqu, 0x00, [u_buf], [v_buf], 1, xmm1) \ "vmovdqu 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \
"lea 0x10(%[u_buf]),%[u_buf] \n" \ "lea 0x10(%[u_buf]),%[u_buf] \n" \
"vpermq $0xd8,%%ymm0,%%ymm0 \n" \ "vpermq $0xd8,%%ymm0,%%ymm0 \n" \
"vpermq $0xd8,%%ymm1,%%ymm1 \n" \ "vpermq $0xd8,%%ymm1,%%ymm1 \n" \
@ -2106,7 +2106,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf,
// Read 8 UV from 422, upsample to 16 UV. // Read 8 UV from 422, upsample to 16 UV.
#define READYUV422_AVX2 \ #define READYUV422_AVX2 \
"vmovq (%[u_buf]),%%xmm0 \n" \ "vmovq (%[u_buf]),%%xmm0 \n" \
MEMOPREG(vmovq, 0x00, [u_buf], [v_buf], 1, xmm1) \ "vmovq 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \
"lea 0x8(%[u_buf]),%[u_buf] \n" \ "lea 0x8(%[u_buf]),%[u_buf] \n" \
"vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \ "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \
"vpermq $0xd8,%%ymm0,%%ymm0 \n" \ "vpermq $0xd8,%%ymm0,%%ymm0 \n" \
@ -2121,7 +2121,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf,
// TODO(fbarchard): Consider vunpcklpd to combine the 2 registers into 1. // TODO(fbarchard): Consider vunpcklpd to combine the 2 registers into 1.
#define READYUV210_AVX2 \ #define READYUV210_AVX2 \
"vmovdqu (%[u_buf]),%%xmm0 \n" \ "vmovdqu (%[u_buf]),%%xmm0 \n" \
MEMOPREG(vmovdqu, 0x00, [u_buf], [v_buf], 1, xmm1) \ "vmovdqu 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \
"lea 0x10(%[u_buf]),%[u_buf] \n" \ "lea 0x10(%[u_buf]),%[u_buf] \n" \
"vpermq $0xd8,%%ymm0,%%ymm0 \n" \ "vpermq $0xd8,%%ymm0,%%ymm0 \n" \
"vpermq $0xd8,%%ymm1,%%ymm1 \n" \ "vpermq $0xd8,%%ymm1,%%ymm1 \n" \
@ -2136,7 +2136,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf,
// Read 8 UV from 422, upsample to 16 UV. With 16 Alpha. // Read 8 UV from 422, upsample to 16 UV. With 16 Alpha.
#define READYUVA422_AVX2 \ #define READYUVA422_AVX2 \
"vmovq (%[u_buf]),%%xmm0 \n" \ "vmovq (%[u_buf]),%%xmm0 \n" \
MEMOPREG(vmovq, 0x00, [u_buf], [v_buf], 1, xmm1) \ "vmovq 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \
"lea 0x8(%[u_buf]),%[u_buf] \n" \ "lea 0x8(%[u_buf]),%[u_buf] \n" \
"vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \ "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \
"vpermq $0xd8,%%ymm0,%%ymm0 \n" \ "vpermq $0xd8,%%ymm0,%%ymm0 \n" \
@ -2683,7 +2683,7 @@ void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) {
LABELALIGN LABELALIGN
"1: \n" "1: \n"
MEMOPREG(movdqu,-0x10,0,2,1,xmm0) // movdqu -0x10(%0,%2),%%xmm0 "movdqu -0x10(%0,%2,1),%%xmm0 \n"
"pshufb %%xmm5,%%xmm0 \n" "pshufb %%xmm5,%%xmm0 \n"
"movdqu %%xmm0,(%1) \n" "movdqu %%xmm0,(%1) \n"
"lea 0x10(%1),%1 \n" "lea 0x10(%1),%1 \n"
@ -2707,7 +2707,7 @@ void MirrorRow_AVX2(const uint8* src, uint8* dst, int width) {
LABELALIGN LABELALIGN
"1: \n" "1: \n"
MEMOPREG(vmovdqu,-0x20,0,2,1,ymm0) // vmovdqu -0x20(%0,%2),%%ymm0 "vmovdqu -0x20(%0,%2,1),%%ymm0 \n"
"vpshufb %%ymm5,%%ymm0,%%ymm0 \n" "vpshufb %%ymm5,%%ymm0,%%ymm0 \n"
"vpermq $0x4e,%%ymm0,%%ymm0 \n" "vpermq $0x4e,%%ymm0,%%ymm0 \n"
"vmovdqu %%ymm0,(%1) \n" "vmovdqu %%ymm0,(%1) \n"
@ -2902,7 +2902,7 @@ void MergeUVRow_AVX2(const uint8* src_u,
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"vmovdqu (%0),%%ymm0 \n" "vmovdqu (%0),%%ymm0 \n"
MEMOPREG(vmovdqu,0x00,0,1,1,ymm1) // vmovdqu (%0,%1,1),%%ymm1 "vmovdqu 0x00(%0,%1,1),%%ymm1 \n"
"lea 0x20(%0),%0 \n" "lea 0x20(%0),%0 \n"
"vpunpcklbw %%ymm1,%%ymm0,%%ymm2 \n" "vpunpcklbw %%ymm1,%%ymm0,%%ymm2 \n"
"vpunpckhbw %%ymm1,%%ymm0,%%ymm0 \n" "vpunpckhbw %%ymm1,%%ymm0,%%ymm0 \n"
@ -2936,7 +2936,7 @@ void MergeUVRow_SSE2(const uint8* src_u,
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"movdqu (%0),%%xmm0 \n" "movdqu (%0),%%xmm0 \n"
MEMOPREG(movdqu,0x00,0,1,1,xmm1) // movdqu (%0,%1,1),%%xmm1 "movdqu 0x00(%0,%1,1),%%xmm1 \n"
"lea 0x10(%0),%0 \n" "lea 0x10(%0),%0 \n"
"movdqa %%xmm0,%%xmm2 \n" "movdqa %%xmm0,%%xmm2 \n"
"punpcklbw %%xmm1,%%xmm0 \n" "punpcklbw %%xmm1,%%xmm0 \n"
@ -3735,8 +3735,8 @@ void YUY2ToUVRow_SSE2(const uint8* src_yuy2,
"1: \n" "1: \n"
"movdqu (%0),%%xmm0 \n" "movdqu (%0),%%xmm0 \n"
"movdqu 0x10(%0),%%xmm1 \n" "movdqu 0x10(%0),%%xmm1 \n"
MEMOPREG(movdqu,0x00,0,4,1,xmm2) // movdqu (%0,%4,1),%%xmm2 "movdqu 0x00(%0,%4,1),%%xmm2 \n"
MEMOPREG(movdqu,0x10,0,4,1,xmm3) // movdqu 0x10(%0,%4,1),%%xmm3 "movdqu 0x10(%0,%4,1),%%xmm3 \n"
"lea 0x20(%0),%0 \n" "lea 0x20(%0),%0 \n"
"pavgb %%xmm2,%%xmm0 \n" "pavgb %%xmm2,%%xmm0 \n"
"pavgb %%xmm3,%%xmm1 \n" "pavgb %%xmm3,%%xmm1 \n"
@ -3837,8 +3837,8 @@ void UYVYToUVRow_SSE2(const uint8* src_uyvy,
"1: \n" "1: \n"
"movdqu (%0),%%xmm0 \n" "movdqu (%0),%%xmm0 \n"
"movdqu 0x10(%0),%%xmm1 \n" "movdqu 0x10(%0),%%xmm1 \n"
MEMOPREG(movdqu,0x00,0,4,1,xmm2) // movdqu (%0,%4,1),%%xmm2 "movdqu 0x00(%0,%4,1),%%xmm2 \n"
MEMOPREG(movdqu,0x10,0,4,1,xmm3) // movdqu 0x10(%0,%4,1),%%xmm3 "movdqu 0x10(%0,%4,1),%%xmm3 \n"
"lea 0x20(%0),%0 \n" "lea 0x20(%0),%0 \n"
"pavgb %%xmm2,%%xmm0 \n" "pavgb %%xmm2,%%xmm0 \n"
"pavgb %%xmm3,%%xmm1 \n" "pavgb %%xmm3,%%xmm1 \n"
@ -4429,9 +4429,9 @@ void ARGBUnattenuateRow_SSE2(const uint8* src_argb,
"movdqu (%0),%%xmm0 \n" "movdqu (%0),%%xmm0 \n"
"movzb 0x03(%0),%3 \n" "movzb 0x03(%0),%3 \n"
"punpcklbw %%xmm0,%%xmm0 \n" "punpcklbw %%xmm0,%%xmm0 \n"
MEMOPREG(movd,0x00,4,3,4,xmm2) // movd 0x0(%4,%3,4),%%xmm2 "movd 0x00(%4,%3,4),%%xmm2 \n"
"movzb 0x07(%0),%3 \n" "movzb 0x07(%0),%3 \n"
MEMOPREG(movd,0x00,4,3,4,xmm3) // movd 0x0(%4,%3,4),%%xmm3 "movd 0x00(%4,%3,4),%%xmm3 \n"
"pshuflw $0x40,%%xmm2,%%xmm2 \n" "pshuflw $0x40,%%xmm2,%%xmm2 \n"
"pshuflw $0x40,%%xmm3,%%xmm3 \n" "pshuflw $0x40,%%xmm3,%%xmm3 \n"
"movlhps %%xmm3,%%xmm2 \n" "movlhps %%xmm3,%%xmm2 \n"
@ -4439,9 +4439,9 @@ void ARGBUnattenuateRow_SSE2(const uint8* src_argb,
"movdqu (%0),%%xmm1 \n" "movdqu (%0),%%xmm1 \n"
"movzb 0x0b(%0),%3 \n" "movzb 0x0b(%0),%3 \n"
"punpckhbw %%xmm1,%%xmm1 \n" "punpckhbw %%xmm1,%%xmm1 \n"
MEMOPREG(movd,0x00,4,3,4,xmm2) // movd 0x0(%4,%3,4),%%xmm2 "movd 0x00(%4,%3,4),%%xmm2 \n"
"movzb 0x0f(%0),%3 \n" "movzb 0x0f(%0),%3 \n"
MEMOPREG(movd,0x00,4,3,4,xmm3) // movd 0x0(%4,%3,4),%%xmm3 "movd 0x00(%4,%3,4),%%xmm3 \n"
"pshuflw $0x40,%%xmm2,%%xmm2 \n" "pshuflw $0x40,%%xmm2,%%xmm2 \n"
"pshuflw $0x40,%%xmm3,%%xmm3 \n" "pshuflw $0x40,%%xmm3,%%xmm3 \n"
"movlhps %%xmm3,%%xmm2 \n" "movlhps %%xmm3,%%xmm2 \n"
@ -4481,24 +4481,24 @@ void ARGBUnattenuateRow_AVX2(const uint8* src_argb,
"1: \n" "1: \n"
// replace VPGATHER // replace VPGATHER
"movzb 0x03(%0),%3 \n" "movzb 0x03(%0),%3 \n"
MEMOPREG(vmovd,0x00,4,3,4,xmm0) // vmovd 0x0(%4,%3,4),%%xmm0 "vmovd 0x00(%4,%3,4),%%xmm0 \n"
"movzb 0x07(%0),%3 \n" "movzb 0x07(%0),%3 \n"
MEMOPREG(vmovd,0x00,4,3,4,xmm1) // vmovd 0x0(%4,%3,4),%%xmm1 "vmovd 0x00(%4,%3,4),%%xmm1 \n"
"movzb 0x0b(%0),%3 \n" "movzb 0x0b(%0),%3 \n"
"vpunpckldq %%xmm1,%%xmm0,%%xmm6 \n" "vpunpckldq %%xmm1,%%xmm0,%%xmm6 \n"
MEMOPREG(vmovd,0x00,4,3,4,xmm2) // vmovd 0x0(%4,%3,4),%%xmm2 "vmovd 0x00(%4,%3,4),%%xmm2 \n"
"movzb 0x0f(%0),%3 \n" "movzb 0x0f(%0),%3 \n"
MEMOPREG(vmovd,0x00,4,3,4,xmm3) // vmovd 0x0(%4,%3,4),%%xmm3 "vmovd 0x00(%4,%3,4),%%xmm3 \n"
"movzb 0x13(%0),%3 \n" "movzb 0x13(%0),%3 \n"
"vpunpckldq %%xmm3,%%xmm2,%%xmm7 \n" "vpunpckldq %%xmm3,%%xmm2,%%xmm7 \n"
MEMOPREG(vmovd,0x00,4,3,4,xmm0) // vmovd 0x0(%4,%3,4),%%xmm0 "vmovd 0x00(%4,%3,4),%%xmm0 \n"
"movzb 0x17(%0),%3 \n" "movzb 0x17(%0),%3 \n"
MEMOPREG(vmovd,0x00,4,3,4,xmm1) // vmovd 0x0(%4,%3,4),%%xmm1 "vmovd 0x00(%4,%3,4),%%xmm1 \n"
"movzb 0x1b(%0),%3 \n" "movzb 0x1b(%0),%3 \n"
"vpunpckldq %%xmm1,%%xmm0,%%xmm0 \n" "vpunpckldq %%xmm1,%%xmm0,%%xmm0 \n"
MEMOPREG(vmovd,0x00,4,3,4,xmm2) // vmovd 0x0(%4,%3,4),%%xmm2 "vmovd 0x00(%4,%3,4),%%xmm2 \n"
"movzb 0x1f(%0),%3 \n" "movzb 0x1f(%0),%3 \n"
MEMOPREG(vmovd,0x00,4,3,4,xmm3) // vmovd 0x0(%4,%3,4),%%xmm3 "vmovd 0x00(%4,%3,4),%%xmm3 \n"
"vpunpckldq %%xmm3,%%xmm2,%%xmm2 \n" "vpunpckldq %%xmm3,%%xmm2,%%xmm2 \n"
"vpunpcklqdq %%xmm7,%%xmm6,%%xmm3 \n" "vpunpcklqdq %%xmm7,%%xmm6,%%xmm3 \n"
"vpunpcklqdq %%xmm2,%%xmm0,%%xmm0 \n" "vpunpcklqdq %%xmm2,%%xmm0,%%xmm0 \n"
@ -5033,13 +5033,13 @@ void SobelXRow_SSE2(const uint8* src_y0,
"punpcklbw %%xmm5,%%xmm0 \n" "punpcklbw %%xmm5,%%xmm0 \n"
"punpcklbw %%xmm5,%%xmm1 \n" "punpcklbw %%xmm5,%%xmm1 \n"
"psubw %%xmm1,%%xmm0 \n" "psubw %%xmm1,%%xmm0 \n"
MEMOPREG(movq,0x00,0,1,1,xmm1) // movq (%0,%1,1),%%xmm1 "movq 0x00(%0,%1,1),%%xmm1 \n"
MEMOPREG(movq,0x02,0,1,1,xmm2) // movq 0x2(%0,%1,1),%%xmm2 "movq 0x02(%0,%1,1),%%xmm2 \n"
"punpcklbw %%xmm5,%%xmm1 \n" "punpcklbw %%xmm5,%%xmm1 \n"
"punpcklbw %%xmm5,%%xmm2 \n" "punpcklbw %%xmm5,%%xmm2 \n"
"psubw %%xmm2,%%xmm1 \n" "psubw %%xmm2,%%xmm1 \n"
MEMOPREG(movq,0x00,0,2,1,xmm2) // movq (%0,%2,1),%%xmm2 "movq 0x00(%0,%2,1),%%xmm2 \n"
MEMOPREG(movq,0x02,0,2,1,xmm3) // movq 0x2(%0,%2,1),%%xmm3 "movq 0x02(%0,%2,1),%%xmm3 \n"
"punpcklbw %%xmm5,%%xmm2 \n" "punpcklbw %%xmm5,%%xmm2 \n"
"punpcklbw %%xmm5,%%xmm3 \n" "punpcklbw %%xmm5,%%xmm3 \n"
"psubw %%xmm3,%%xmm2 \n" "psubw %%xmm3,%%xmm2 \n"
@ -5084,17 +5084,17 @@ void SobelYRow_SSE2(const uint8* src_y0,
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"movq (%0),%%xmm0 \n" "movq (%0),%%xmm0 \n"
MEMOPREG(movq,0x00,0,1,1,xmm1) // movq (%0,%1,1),%%xmm1 "movq 0x00(%0,%1,1),%%xmm1 \n"
"punpcklbw %%xmm5,%%xmm0 \n" "punpcklbw %%xmm5,%%xmm0 \n"
"punpcklbw %%xmm5,%%xmm1 \n" "punpcklbw %%xmm5,%%xmm1 \n"
"psubw %%xmm1,%%xmm0 \n" "psubw %%xmm1,%%xmm0 \n"
"movq 0x1(%0),%%xmm1 \n" "movq 0x1(%0),%%xmm1 \n"
MEMOPREG(movq,0x01,0,1,1,xmm2) // movq 0x1(%0,%1,1),%%xmm2 "movq 0x01(%0,%1,1),%%xmm2 \n"
"punpcklbw %%xmm5,%%xmm1 \n" "punpcklbw %%xmm5,%%xmm1 \n"
"punpcklbw %%xmm5,%%xmm2 \n" "punpcklbw %%xmm5,%%xmm2 \n"
"psubw %%xmm2,%%xmm1 \n" "psubw %%xmm2,%%xmm1 \n"
"movq 0x2(%0),%%xmm2 \n" "movq 0x2(%0),%%xmm2 \n"
MEMOPREG(movq,0x02,0,1,1,xmm3) // movq 0x2(%0,%1,1),%%xmm3 "movq 0x02(%0,%1,1),%%xmm3 \n"
"punpcklbw %%xmm5,%%xmm2 \n" "punpcklbw %%xmm5,%%xmm2 \n"
"punpcklbw %%xmm5,%%xmm3 \n" "punpcklbw %%xmm5,%%xmm3 \n"
"psubw %%xmm3,%%xmm2 \n" "psubw %%xmm3,%%xmm2 \n"
@ -5139,7 +5139,7 @@ void SobelRow_SSE2(const uint8* src_sobelx,
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"movdqu (%0),%%xmm0 \n" "movdqu (%0),%%xmm0 \n"
MEMOPREG(movdqu,0x00,0,1,1,xmm1) // movdqu (%0,%1,1),%%xmm1 "movdqu 0x00(%0,%1,1),%%xmm1 \n"
"lea 0x10(%0),%0 \n" "lea 0x10(%0),%0 \n"
"paddusb %%xmm1,%%xmm0 \n" "paddusb %%xmm1,%%xmm0 \n"
"movdqa %%xmm0,%%xmm2 \n" "movdqa %%xmm0,%%xmm2 \n"
@ -5188,7 +5188,7 @@ void SobelToPlaneRow_SSE2(const uint8* src_sobelx,
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"movdqu (%0),%%xmm0 \n" "movdqu (%0),%%xmm0 \n"
MEMOPREG(movdqu,0x00,0,1,1,xmm1) // movdqu (%0,%1,1),%%xmm1 "movdqu 0x00(%0,%1,1),%%xmm1 \n"
"lea 0x10(%0),%0 \n" "lea 0x10(%0),%0 \n"
"paddusb %%xmm1,%%xmm0 \n" "paddusb %%xmm1,%%xmm0 \n"
"movdqu %%xmm0,(%2) \n" "movdqu %%xmm0,(%2) \n"
@ -5224,7 +5224,7 @@ void SobelXYRow_SSE2(const uint8* src_sobelx,
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"movdqu (%0),%%xmm0 \n" "movdqu (%0),%%xmm0 \n"
MEMOPREG(movdqu,0x00,0,1,1,xmm1) // movdqu (%0,%1,1),%%xmm1 "movdqu 0x00(%0,%1,1),%%xmm1 \n"
"lea 0x10(%0),%0 \n" "lea 0x10(%0),%0 \n"
"movdqa %%xmm0,%%xmm2 \n" "movdqa %%xmm0,%%xmm2 \n"
"paddusb %%xmm1,%%xmm2 \n" "paddusb %%xmm1,%%xmm2 \n"
@ -5373,19 +5373,19 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft,
"movdqu 0x10(%0),%%xmm1 \n" "movdqu 0x10(%0),%%xmm1 \n"
"movdqu 0x20(%0),%%xmm2 \n" "movdqu 0x20(%0),%%xmm2 \n"
"movdqu 0x30(%0),%%xmm3 \n" "movdqu 0x30(%0),%%xmm3 \n"
MEMOPREG(psubd,0x00,0,4,4,xmm0) // psubd 0x00(%0,%4,4),%%xmm0 "psubd 0x00(%0,%4,4),%%xmm0 \n"
MEMOPREG(psubd,0x10,0,4,4,xmm1) // psubd 0x10(%0,%4,4),%%xmm1 "psubd 0x10(%0,%4,4),%%xmm1 \n"
MEMOPREG(psubd,0x20,0,4,4,xmm2) // psubd 0x20(%0,%4,4),%%xmm2 "psubd 0x20(%0,%4,4),%%xmm2 \n"
MEMOPREG(psubd,0x30,0,4,4,xmm3) // psubd 0x30(%0,%4,4),%%xmm3 "psubd 0x30(%0,%4,4),%%xmm3 \n"
"lea 0x40(%0),%0 \n" "lea 0x40(%0),%0 \n"
"psubd (%1),%%xmm0 \n" "psubd (%1),%%xmm0 \n"
"psubd 0x10(%1),%%xmm1 \n" "psubd 0x10(%1),%%xmm1 \n"
"psubd 0x20(%1),%%xmm2 \n" "psubd 0x20(%1),%%xmm2 \n"
"psubd 0x30(%1),%%xmm3 \n" "psubd 0x30(%1),%%xmm3 \n"
MEMOPREG(paddd,0x00,1,4,4,xmm0) // paddd 0x00(%1,%4,4),%%xmm0 "paddd 0x00(%1,%4,4),%%xmm0 \n"
MEMOPREG(paddd,0x10,1,4,4,xmm1) // paddd 0x10(%1,%4,4),%%xmm1 "paddd 0x10(%1,%4,4),%%xmm1 \n"
MEMOPREG(paddd,0x20,1,4,4,xmm2) // paddd 0x20(%1,%4,4),%%xmm2 "paddd 0x20(%1,%4,4),%%xmm2 \n"
MEMOPREG(paddd,0x30,1,4,4,xmm3) // paddd 0x30(%1,%4,4),%%xmm3 "paddd 0x30(%1,%4,4),%%xmm3 \n"
"lea 0x40(%1),%1 \n" "lea 0x40(%1),%1 \n"
"packssdw %%xmm1,%%xmm0 \n" "packssdw %%xmm1,%%xmm0 \n"
"packssdw %%xmm3,%%xmm2 \n" "packssdw %%xmm3,%%xmm2 \n"
@ -5405,19 +5405,19 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft,
"movdqu 0x10(%0),%%xmm1 \n" "movdqu 0x10(%0),%%xmm1 \n"
"movdqu 0x20(%0),%%xmm2 \n" "movdqu 0x20(%0),%%xmm2 \n"
"movdqu 0x30(%0),%%xmm3 \n" "movdqu 0x30(%0),%%xmm3 \n"
MEMOPREG(psubd,0x00,0,4,4,xmm0) // psubd 0x00(%0,%4,4),%%xmm0 "psubd 0x00(%0,%4,4),%%xmm0 \n"
MEMOPREG(psubd,0x10,0,4,4,xmm1) // psubd 0x10(%0,%4,4),%%xmm1 "psubd 0x10(%0,%4,4),%%xmm1 \n"
MEMOPREG(psubd,0x20,0,4,4,xmm2) // psubd 0x20(%0,%4,4),%%xmm2 "psubd 0x20(%0,%4,4),%%xmm2 \n"
MEMOPREG(psubd,0x30,0,4,4,xmm3) // psubd 0x30(%0,%4,4),%%xmm3 "psubd 0x30(%0,%4,4),%%xmm3 \n"
"lea 0x40(%0),%0 \n" "lea 0x40(%0),%0 \n"
"psubd (%1),%%xmm0 \n" "psubd (%1),%%xmm0 \n"
"psubd 0x10(%1),%%xmm1 \n" "psubd 0x10(%1),%%xmm1 \n"
"psubd 0x20(%1),%%xmm2 \n" "psubd 0x20(%1),%%xmm2 \n"
"psubd 0x30(%1),%%xmm3 \n" "psubd 0x30(%1),%%xmm3 \n"
MEMOPREG(paddd,0x00,1,4,4,xmm0) // paddd 0x00(%1,%4,4),%%xmm0 "paddd 0x00(%1,%4,4),%%xmm0 \n"
MEMOPREG(paddd,0x10,1,4,4,xmm1) // paddd 0x10(%1,%4,4),%%xmm1 "paddd 0x10(%1,%4,4),%%xmm1 \n"
MEMOPREG(paddd,0x20,1,4,4,xmm2) // paddd 0x20(%1,%4,4),%%xmm2 "paddd 0x20(%1,%4,4),%%xmm2 \n"
MEMOPREG(paddd,0x30,1,4,4,xmm3) // paddd 0x30(%1,%4,4),%%xmm3 "paddd 0x30(%1,%4,4),%%xmm3 \n"
"lea 0x40(%1),%1 \n" "lea 0x40(%1),%1 \n"
"cvtdq2ps %%xmm0,%%xmm0 \n" "cvtdq2ps %%xmm0,%%xmm0 \n"
"cvtdq2ps %%xmm1,%%xmm1 \n" "cvtdq2ps %%xmm1,%%xmm1 \n"
@ -5447,10 +5447,10 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft,
LABELALIGN LABELALIGN
"10: \n" "10: \n"
"movdqu (%0),%%xmm0 \n" "movdqu (%0),%%xmm0 \n"
MEMOPREG(psubd,0x00,0,4,4,xmm0) // psubd 0x00(%0,%4,4),%%xmm0 "psubd 0x00(%0,%4,4),%%xmm0 \n"
"lea 0x10(%0),%0 \n" "lea 0x10(%0),%0 \n"
"psubd (%1),%%xmm0 \n" "psubd (%1),%%xmm0 \n"
MEMOPREG(paddd,0x00,1,4,4,xmm0) // paddd 0x00(%1,%4,4),%%xmm0 "paddd 0x00(%1,%4,4),%%xmm0 \n"
"lea 0x10(%1),%1 \n" "lea 0x10(%1),%1 \n"
"cvtdq2ps %%xmm0,%%xmm0 \n" "cvtdq2ps %%xmm0,%%xmm0 \n"
"mulps %%xmm4,%%xmm0 \n" "mulps %%xmm4,%%xmm0 \n"
@ -5515,16 +5515,16 @@ void ARGBAffineRow_SSE2(const uint8* src_argb,
"pshufd $0x39,%%xmm0,%%xmm0 \n" "pshufd $0x39,%%xmm0,%%xmm0 \n"
"movd %%xmm0,%k5 \n" "movd %%xmm0,%k5 \n"
"pshufd $0x39,%%xmm0,%%xmm0 \n" "pshufd $0x39,%%xmm0,%%xmm0 \n"
MEMOPREG(movd,0x00,0,1,1,xmm1) // movd (%0,%1,1),%%xmm1 "movd 0x00(%0,%1,1),%%xmm1 \n"
MEMOPREG(movd,0x00,0,5,1,xmm6) // movd (%0,%5,1),%%xmm6 "movd 0x00(%0,%5,1),%%xmm6 \n"
"punpckldq %%xmm6,%%xmm1 \n" "punpckldq %%xmm6,%%xmm1 \n"
"addps %%xmm4,%%xmm2 \n" "addps %%xmm4,%%xmm2 \n"
"movq %%xmm1,(%2) \n" "movq %%xmm1,(%2) \n"
"movd %%xmm0,%k1 \n" "movd %%xmm0,%k1 \n"
"pshufd $0x39,%%xmm0,%%xmm0 \n" "pshufd $0x39,%%xmm0,%%xmm0 \n"
"movd %%xmm0,%k5 \n" "movd %%xmm0,%k5 \n"
MEMOPREG(movd,0x00,0,1,1,xmm0) // movd (%0,%1,1),%%xmm0 "movd 0x00(%0,%1,1),%%xmm0 \n"
MEMOPREG(movd,0x00,0,5,1,xmm6) // movd (%0,%5,1),%%xmm6 "movd 0x00(%0,%5,1),%%xmm6 \n"
"punpckldq %%xmm6,%%xmm0 \n" "punpckldq %%xmm6,%%xmm0 \n"
"addps %%xmm4,%%xmm3 \n" "addps %%xmm4,%%xmm3 \n"
"movq %%xmm0,0x08(%2) \n" "movq %%xmm0,0x08(%2) \n"
@ -5544,7 +5544,7 @@ void ARGBAffineRow_SSE2(const uint8* src_argb,
"pmaddwd %%xmm5,%%xmm0 \n" "pmaddwd %%xmm5,%%xmm0 \n"
"addps %%xmm7,%%xmm2 \n" "addps %%xmm7,%%xmm2 \n"
"movd %%xmm0,%k1 \n" "movd %%xmm0,%k1 \n"
MEMOPREG(movd,0x00,0,1,1,xmm0) // movd (%0,%1,1),%%xmm0 "movd 0x00(%0,%1,1),%%xmm0 \n"
"movd %%xmm0,(%2) \n" "movd %%xmm0,(%2) \n"
"lea 0x04(%2),%2 \n" "lea 0x04(%2),%2 \n"
"sub $0x1,%4 \n" "sub $0x1,%4 \n"
@ -5592,7 +5592,7 @@ void InterpolateRow_SSSE3(uint8* dst_ptr,
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"movdqu (%1),%%xmm0 \n" "movdqu (%1),%%xmm0 \n"
MEMOPREG(movdqu,0x00,1,4,1,xmm2) "movdqu 0x00(%1,%4,1),%%xmm2 \n"
"movdqa %%xmm0,%%xmm1 \n" "movdqa %%xmm0,%%xmm1 \n"
"punpcklbw %%xmm2,%%xmm0 \n" "punpcklbw %%xmm2,%%xmm0 \n"
"punpckhbw %%xmm2,%%xmm1 \n" "punpckhbw %%xmm2,%%xmm1 \n"
@ -5617,7 +5617,7 @@ void InterpolateRow_SSSE3(uint8* dst_ptr,
LABELALIGN LABELALIGN
"50: \n" "50: \n"
"movdqu (%1),%%xmm0 \n" "movdqu (%1),%%xmm0 \n"
MEMOPREG(movdqu,0x00,1,4,1,xmm1) "movdqu 0x00(%1,%4,1),%%xmm1 \n"
"pavgb %%xmm1,%%xmm0 \n" "pavgb %%xmm1,%%xmm0 \n"
"movdqu %%xmm0,0x00(%1,%0,1) \n" "movdqu %%xmm0,0x00(%1,%0,1) \n"
"lea 0x10(%1),%1 \n" "lea 0x10(%1),%1 \n"
@ -5675,7 +5675,7 @@ void InterpolateRow_AVX2(uint8* dst_ptr,
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"vmovdqu (%1),%%ymm0 \n" "vmovdqu (%1),%%ymm0 \n"
MEMOPREG(vmovdqu,0x00,1,4,1,ymm2) "vmovdqu 0x00(%1,%4,1),%%ymm2 \n"
"vpunpckhbw %%ymm2,%%ymm0,%%ymm1 \n" "vpunpckhbw %%ymm2,%%ymm0,%%ymm1 \n"
"vpunpcklbw %%ymm2,%%ymm0,%%ymm0 \n" "vpunpcklbw %%ymm2,%%ymm0,%%ymm0 \n"
"vpsubb %%ymm4,%%ymm1,%%ymm1 \n" "vpsubb %%ymm4,%%ymm1,%%ymm1 \n"
@ -5796,7 +5796,7 @@ void I422ToYUY2Row_SSE2(const uint8* src_y,
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"movq (%1),%%xmm2 \n" "movq (%1),%%xmm2 \n"
MEMOPREG(movq,0x00,1,2,1,xmm3) // movq (%1,%2,1),%%xmm3 "movq 0x00(%1,%2,1),%%xmm3 \n"
"lea 0x8(%1),%1 \n" "lea 0x8(%1),%1 \n"
"punpcklbw %%xmm3,%%xmm2 \n" "punpcklbw %%xmm3,%%xmm2 \n"
"movdqu (%0),%%xmm0 \n" "movdqu (%0),%%xmm0 \n"
@ -5832,7 +5832,7 @@ void I422ToUYVYRow_SSE2(const uint8* src_y,
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"movq (%1),%%xmm2 \n" "movq (%1),%%xmm2 \n"
MEMOPREG(movq,0x00,1,2,1,xmm3) // movq (%1,%2,1),%%xmm3 "movq 0x00(%1,%2,1),%%xmm3 \n"
"lea 0x8(%1),%1 \n" "lea 0x8(%1),%1 \n"
"punpcklbw %%xmm3,%%xmm2 \n" "punpcklbw %%xmm3,%%xmm2 \n"
"movdqu (%0),%%xmm0 \n" "movdqu (%0),%%xmm0 \n"

View File

@ -164,8 +164,8 @@ void ScaleRowDown2Box_SSSE3(const uint8* src_ptr,
"1: \n" "1: \n"
"movdqu (%0),%%xmm0 \n" "movdqu (%0),%%xmm0 \n"
"movdqu 0x10(%0),%%xmm1 \n" "movdqu 0x10(%0),%%xmm1 \n"
MEMOPREG(movdqu,0x00,0,3,1,xmm2) // movdqu (%0,%3,1),%%xmm2 "movdqu 0x00(%0,%3,1),%%xmm2 \n"
MEMOPREG(movdqu,0x10,0,3,1,xmm3) // movdqu 0x10(%0,%3,1),%%xmm3 "movdqu 0x10(%0,%3,1),%%xmm3 \n"
"lea 0x20(%0),%0 \n" "lea 0x20(%0),%0 \n"
"pmaddubsw %%xmm4,%%xmm0 \n" "pmaddubsw %%xmm4,%%xmm0 \n"
"pmaddubsw %%xmm4,%%xmm1 \n" "pmaddubsw %%xmm4,%%xmm1 \n"
@ -267,8 +267,8 @@ void ScaleRowDown2Box_AVX2(const uint8* src_ptr,
"1: \n" "1: \n"
"vmovdqu (%0),%%ymm0 \n" "vmovdqu (%0),%%ymm0 \n"
"vmovdqu 0x20(%0),%%ymm1 \n" "vmovdqu 0x20(%0),%%ymm1 \n"
MEMOPREG(vmovdqu,0x00,0,3,1,ymm2) // vmovdqu (%0,%3,1),%%ymm2 "vmovdqu 0x00(%0,%3,1),%%ymm2 \n"
MEMOPREG(vmovdqu,0x20,0,3,1,ymm3) // vmovdqu 0x20(%0,%3,1),%%ymm3 "vmovdqu 0x20(%0,%3,1),%%ymm3 \n"
"lea 0x40(%0),%0 \n" "lea 0x40(%0),%0 \n"
"vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n" "vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n"
"vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n" "vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n"
@ -345,22 +345,22 @@ void ScaleRowDown4Box_SSSE3(const uint8* src_ptr,
"1: \n" "1: \n"
"movdqu (%0),%%xmm0 \n" "movdqu (%0),%%xmm0 \n"
"movdqu 0x10(%0),%%xmm1 \n" "movdqu 0x10(%0),%%xmm1 \n"
MEMOPREG(movdqu,0x00,0,4,1,xmm2) // movdqu (%0,%4,1),%%xmm2 "movdqu 0x00(%0,%4,1),%%xmm2 \n"
MEMOPREG(movdqu,0x10,0,4,1,xmm3) // movdqu 0x10(%0,%4,1),%%xmm3 "movdqu 0x10(%0,%4,1),%%xmm3 \n"
"pmaddubsw %%xmm4,%%xmm0 \n" "pmaddubsw %%xmm4,%%xmm0 \n"
"pmaddubsw %%xmm4,%%xmm1 \n" "pmaddubsw %%xmm4,%%xmm1 \n"
"pmaddubsw %%xmm4,%%xmm2 \n" "pmaddubsw %%xmm4,%%xmm2 \n"
"pmaddubsw %%xmm4,%%xmm3 \n" "pmaddubsw %%xmm4,%%xmm3 \n"
"paddw %%xmm2,%%xmm0 \n" "paddw %%xmm2,%%xmm0 \n"
"paddw %%xmm3,%%xmm1 \n" "paddw %%xmm3,%%xmm1 \n"
MEMOPREG(movdqu,0x00,0,4,2,xmm2) // movdqu (%0,%4,2),%%xmm2 "movdqu 0x00(%0,%4,2),%%xmm2 \n"
MEMOPREG(movdqu,0x10,0,4,2,xmm3) // movdqu 0x10(%0,%4,2),%%xmm3 "movdqu 0x10(%0,%4,2),%%xmm3 \n"
"pmaddubsw %%xmm4,%%xmm2 \n" "pmaddubsw %%xmm4,%%xmm2 \n"
"pmaddubsw %%xmm4,%%xmm3 \n" "pmaddubsw %%xmm4,%%xmm3 \n"
"paddw %%xmm2,%%xmm0 \n" "paddw %%xmm2,%%xmm0 \n"
"paddw %%xmm3,%%xmm1 \n" "paddw %%xmm3,%%xmm1 \n"
MEMOPREG(movdqu,0x00,0,3,1,xmm2) // movdqu (%0,%3,1),%%xmm2 "movdqu 0x00(%0,%3,1),%%xmm2 \n"
MEMOPREG(movdqu,0x10,0,3,1,xmm3) // movdqu 0x10(%0,%3,1),%%xmm3 "movdqu 0x10(%0,%3,1),%%xmm3 \n"
"lea 0x20(%0),%0 \n" "lea 0x20(%0),%0 \n"
"pmaddubsw %%xmm4,%%xmm2 \n" "pmaddubsw %%xmm4,%%xmm2 \n"
"pmaddubsw %%xmm4,%%xmm3 \n" "pmaddubsw %%xmm4,%%xmm3 \n"
@ -432,22 +432,22 @@ void ScaleRowDown4Box_AVX2(const uint8* src_ptr,
"1: \n" "1: \n"
"vmovdqu (%0),%%ymm0 \n" "vmovdqu (%0),%%ymm0 \n"
"vmovdqu 0x20(%0),%%ymm1 \n" "vmovdqu 0x20(%0),%%ymm1 \n"
MEMOPREG(vmovdqu,0x00,0,3,1,ymm2) // vmovdqu (%0,%3,1),%%ymm2 "vmovdqu 0x00(%0,%3,1),%%ymm2 \n"
MEMOPREG(vmovdqu,0x20,0,3,1,ymm3) // vmovdqu 0x20(%0,%3,1),%%ymm3 "vmovdqu 0x20(%0,%3,1),%%ymm3 \n"
"vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n" "vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n"
"vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n" "vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n"
"vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n" "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n"
"vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n" "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n"
"vpaddw %%ymm2,%%ymm0,%%ymm0 \n" "vpaddw %%ymm2,%%ymm0,%%ymm0 \n"
"vpaddw %%ymm3,%%ymm1,%%ymm1 \n" "vpaddw %%ymm3,%%ymm1,%%ymm1 \n"
MEMOPREG(vmovdqu,0x00,0,3,2,ymm2) // vmovdqu (%0,%3,2),%%ymm2 "vmovdqu 0x00(%0,%3,2),%%ymm2 \n"
MEMOPREG(vmovdqu,0x20,0,3,2,ymm3) // vmovdqu 0x20(%0,%3,2),%%ymm3 "vmovdqu 0x20(%0,%3,2),%%ymm3 \n"
"vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n" "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n"
"vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n" "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n"
"vpaddw %%ymm2,%%ymm0,%%ymm0 \n" "vpaddw %%ymm2,%%ymm0,%%ymm0 \n"
"vpaddw %%ymm3,%%ymm1,%%ymm1 \n" "vpaddw %%ymm3,%%ymm1,%%ymm1 \n"
MEMOPREG(vmovdqu,0x00,0,4,1,ymm2) // vmovdqu (%0,%4,1),%%ymm2 "vmovdqu 0x00(%0,%4,1),%%ymm2 \n"
MEMOPREG(vmovdqu,0x20,0,4,1,ymm3) // vmovdqu 0x20(%0,%4,1),%%ymm3 "vmovdqu 0x20(%0,%4,1),%%ymm3 \n"
"lea 0x40(%0),%0 \n" "lea 0x40(%0),%0 \n"
"vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n" "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n"
"vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n" "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n"
@ -539,7 +539,7 @@ void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"movdqu (%0),%%xmm6 \n" "movdqu (%0),%%xmm6 \n"
MEMOPREG(movdqu,0x00,0,3,1,xmm7) // movdqu (%0,%3),%%xmm7 "movdqu 0x00(%0,%3,1),%%xmm7 \n"
"pavgb %%xmm7,%%xmm6 \n" "pavgb %%xmm7,%%xmm6 \n"
"pshufb %%xmm2,%%xmm6 \n" "pshufb %%xmm2,%%xmm6 \n"
"pmaddubsw %%xmm5,%%xmm6 \n" "pmaddubsw %%xmm5,%%xmm6 \n"
@ -548,7 +548,7 @@ void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,
"packuswb %%xmm6,%%xmm6 \n" "packuswb %%xmm6,%%xmm6 \n"
"movq %%xmm6,(%1) \n" "movq %%xmm6,(%1) \n"
"movdqu 0x8(%0),%%xmm6 \n" "movdqu 0x8(%0),%%xmm6 \n"
MEMOPREG(movdqu,0x8,0,3,1,xmm7) // movdqu 0x8(%0,%3),%%xmm7 "movdqu 0x8(%0,%3,1),%%xmm7 \n"
"pavgb %%xmm7,%%xmm6 \n" "pavgb %%xmm7,%%xmm6 \n"
"pshufb %%xmm3,%%xmm6 \n" "pshufb %%xmm3,%%xmm6 \n"
"pmaddubsw %%xmm0,%%xmm6 \n" "pmaddubsw %%xmm0,%%xmm6 \n"
@ -557,7 +557,7 @@ void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,
"packuswb %%xmm6,%%xmm6 \n" "packuswb %%xmm6,%%xmm6 \n"
"movq %%xmm6,0x8(%1) \n" "movq %%xmm6,0x8(%1) \n"
"movdqu 0x10(%0),%%xmm6 \n" "movdqu 0x10(%0),%%xmm6 \n"
MEMOPREG(movdqu,0x10,0,3,1,xmm7) // movdqu 0x10(%0,%3),%%xmm7 "movdqu 0x10(%0,%3,1),%%xmm7 \n"
"lea 0x20(%0),%0 \n" "lea 0x20(%0),%0 \n"
"pavgb %%xmm7,%%xmm6 \n" "pavgb %%xmm7,%%xmm6 \n"
"pshufb %%xmm4,%%xmm6 \n" "pshufb %%xmm4,%%xmm6 \n"
@ -606,7 +606,7 @@ void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr,
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"movdqu (%0),%%xmm6 \n" "movdqu (%0),%%xmm6 \n"
MEMOPREG(movdqu,0x00,0,3,1,xmm7) // movdqu (%0,%3,1),%%xmm7 "movdqu 0x00(%0,%3,1),%%xmm7 \n"
"pavgb %%xmm6,%%xmm7 \n" "pavgb %%xmm6,%%xmm7 \n"
"pavgb %%xmm7,%%xmm6 \n" "pavgb %%xmm7,%%xmm6 \n"
"pshufb %%xmm2,%%xmm6 \n" "pshufb %%xmm2,%%xmm6 \n"
@ -616,7 +616,7 @@ void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr,
"packuswb %%xmm6,%%xmm6 \n" "packuswb %%xmm6,%%xmm6 \n"
"movq %%xmm6,(%1) \n" "movq %%xmm6,(%1) \n"
"movdqu 0x8(%0),%%xmm6 \n" "movdqu 0x8(%0),%%xmm6 \n"
MEMOPREG(movdqu,0x8,0,3,1,xmm7) // movdqu 0x8(%0,%3,1),%%xmm7 "movdqu 0x8(%0,%3,1),%%xmm7 \n"
"pavgb %%xmm6,%%xmm7 \n" "pavgb %%xmm6,%%xmm7 \n"
"pavgb %%xmm7,%%xmm6 \n" "pavgb %%xmm7,%%xmm6 \n"
"pshufb %%xmm3,%%xmm6 \n" "pshufb %%xmm3,%%xmm6 \n"
@ -626,7 +626,7 @@ void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr,
"packuswb %%xmm6,%%xmm6 \n" "packuswb %%xmm6,%%xmm6 \n"
"movq %%xmm6,0x8(%1) \n" "movq %%xmm6,0x8(%1) \n"
"movdqu 0x10(%0),%%xmm6 \n" "movdqu 0x10(%0),%%xmm6 \n"
MEMOPREG(movdqu,0x10,0,3,1,xmm7) // movdqu 0x10(%0,%3,1),%%xmm7 "movdqu 0x10(%0,%3,1),%%xmm7 \n"
"lea 0x20(%0),%0 \n" "lea 0x20(%0),%0 \n"
"pavgb %%xmm6,%%xmm7 \n" "pavgb %%xmm6,%%xmm7 \n"
"pavgb %%xmm7,%%xmm6 \n" "pavgb %%xmm7,%%xmm6 \n"
@ -700,7 +700,7 @@ void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr,
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"movdqu (%0),%%xmm0 \n" "movdqu (%0),%%xmm0 \n"
MEMOPREG(movdqu,0x00,0,3,1,xmm1) // movdqu (%0,%3,1),%%xmm1 "movdqu 0x00(%0,%3,1),%%xmm1 \n"
"lea 0x10(%0),%0 \n" "lea 0x10(%0),%0 \n"
"pavgb %%xmm1,%%xmm0 \n" "pavgb %%xmm1,%%xmm0 \n"
"movdqa %%xmm0,%%xmm1 \n" "movdqa %%xmm0,%%xmm1 \n"
@ -745,7 +745,7 @@ void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr,
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"movdqu (%0),%%xmm0 \n" "movdqu (%0),%%xmm0 \n"
MEMOPREG(movdqu,0x00,0,3,1,xmm6) // movdqu (%0,%3,1),%%xmm6 "movdqu 0x00(%0,%3,1),%%xmm6 \n"
"movhlps %%xmm0,%%xmm1 \n" "movhlps %%xmm0,%%xmm1 \n"
"movhlps %%xmm6,%%xmm7 \n" "movhlps %%xmm6,%%xmm7 \n"
"punpcklbw %%xmm5,%%xmm0 \n" "punpcklbw %%xmm5,%%xmm0 \n"
@ -754,7 +754,7 @@ void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr,
"punpcklbw %%xmm5,%%xmm7 \n" "punpcklbw %%xmm5,%%xmm7 \n"
"paddusw %%xmm6,%%xmm0 \n" "paddusw %%xmm6,%%xmm0 \n"
"paddusw %%xmm7,%%xmm1 \n" "paddusw %%xmm7,%%xmm1 \n"
MEMOPREG(movdqu,0x00,0,3,2,xmm6) // movdqu (%0,%3,2),%%xmm6 "movdqu 0x00(%0,%3,2),%%xmm6 \n"
"lea 0x10(%0),%0 \n" "lea 0x10(%0),%0 \n"
"movhlps %%xmm6,%%xmm7 \n" "movhlps %%xmm6,%%xmm7 \n"
"punpcklbw %%xmm5,%%xmm6 \n" "punpcklbw %%xmm5,%%xmm6 \n"
@ -1042,8 +1042,8 @@ void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb,
"1: \n" "1: \n"
"movdqu (%0),%%xmm0 \n" "movdqu (%0),%%xmm0 \n"
"movdqu 0x10(%0),%%xmm1 \n" "movdqu 0x10(%0),%%xmm1 \n"
MEMOPREG(movdqu,0x00,0,3,1,xmm2) // movdqu (%0,%3,1),%%xmm2 "movdqu 0x00(%0,%3,1),%%xmm2 \n"
MEMOPREG(movdqu,0x10,0,3,1,xmm3) // movdqu 0x10(%0,%3,1),%%xmm3 "movdqu 0x10(%0,%3,1),%%xmm3 \n"
"lea 0x20(%0),%0 \n" "lea 0x20(%0),%0 \n"
"pavgb %%xmm2,%%xmm0 \n" "pavgb %%xmm2,%%xmm0 \n"
"pavgb %%xmm3,%%xmm1 \n" "pavgb %%xmm3,%%xmm1 \n"
@ -1080,10 +1080,10 @@ void ScaleARGBRowDownEven_SSE2(const uint8* src_argb,
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"movd (%0),%%xmm0 \n" "movd (%0),%%xmm0 \n"
MEMOPREG(movd,0x00,0,1,1,xmm1) // movd (%0,%1,1),%%xmm1 "movd 0x00(%0,%1,1),%%xmm1 \n"
"punpckldq %%xmm1,%%xmm0 \n" "punpckldq %%xmm1,%%xmm0 \n"
MEMOPREG(movd,0x00,0,1,2,xmm2) // movd (%0,%1,2),%%xmm2 "movd 0x00(%0,%1,2),%%xmm2 \n"
MEMOPREG(movd,0x00,0,4,1,xmm3) // movd (%0,%4,1),%%xmm3 "movd 0x00(%0,%4,1),%%xmm3 \n"
"lea 0x00(%0,%1,4),%0 \n" "lea 0x00(%0,%1,4),%0 \n"
"punpckldq %%xmm3,%%xmm2 \n" "punpckldq %%xmm3,%%xmm2 \n"
"punpcklqdq %%xmm2,%%xmm0 \n" "punpcklqdq %%xmm2,%%xmm0 \n"
@ -1119,14 +1119,14 @@ void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"movq (%0),%%xmm0 \n" "movq (%0),%%xmm0 \n"
MEMOPREG(movhps,0x00,0,1,1,xmm0) // movhps (%0,%1,1),%%xmm0 "movhps 0x00(%0,%1,1),%%xmm0 \n"
MEMOPREG(movq,0x00,0,1,2,xmm1) // movq (%0,%1,2),%%xmm1 "movq 0x00(%0,%1,2),%%xmm1 \n"
MEMOPREG(movhps,0x00,0,4,1,xmm1) // movhps (%0,%4,1),%%xmm1 "movhps 0x00(%0,%4,1),%%xmm1 \n"
"lea 0x00(%0,%1,4),%0 \n" "lea 0x00(%0,%1,4),%0 \n"
"movq (%5),%%xmm2 \n" "movq (%5),%%xmm2 \n"
MEMOPREG(movhps,0x00,5,1,1,xmm2) // movhps (%5,%1,1),%%xmm2 "movhps 0x00(%5,%1,1),%%xmm2 \n"
MEMOPREG(movq,0x00,5,1,2,xmm3) // movq (%5,%1,2),%%xmm3 "movq 0x00(%5,%1,2),%%xmm3 \n"
MEMOPREG(movhps,0x00,5,4,1,xmm3) // movhps (%5,%4,1),%%xmm3 "movhps 0x00(%5,%4,1),%%xmm3 \n"
"lea 0x00(%5,%1,4),%5 \n" "lea 0x00(%5,%1,4),%5 \n"
"pavgb %%xmm2,%%xmm0 \n" "pavgb %%xmm2,%%xmm0 \n"
"pavgb %%xmm3,%%xmm1 \n" "pavgb %%xmm3,%%xmm1 \n"
@ -1175,14 +1175,14 @@ void ScaleARGBCols_SSE2(uint8* dst_argb,
LABELALIGN LABELALIGN
"40: \n" "40: \n"
MEMOPREG(movd,0x00,3,0,4,xmm0) // movd (%3,%0,4),%%xmm0 "movd 0x00(%3,%0,4),%%xmm0 \n"
MEMOPREG(movd,0x00,3,1,4,xmm1) // movd (%3,%1,4),%%xmm1 "movd 0x00(%3,%1,4),%%xmm1 \n"
"pextrw $0x5,%%xmm2,%k0 \n" "pextrw $0x5,%%xmm2,%k0 \n"
"pextrw $0x7,%%xmm2,%k1 \n" "pextrw $0x7,%%xmm2,%k1 \n"
"paddd %%xmm3,%%xmm2 \n" "paddd %%xmm3,%%xmm2 \n"
"punpckldq %%xmm1,%%xmm0 \n" "punpckldq %%xmm1,%%xmm0 \n"
MEMOPREG(movd,0x00,3,0,4,xmm1) // movd (%3,%0,4),%%xmm1 "movd 0x00(%3,%0,4),%%xmm1 \n"
MEMOPREG(movd,0x00,3,1,4,xmm4) // movd (%3,%1,4),%%xmm4 "movd 0x00(%3,%1,4),%%xmm4 \n"
"pextrw $0x1,%%xmm2,%k0 \n" "pextrw $0x1,%%xmm2,%k0 \n"
"pextrw $0x3,%%xmm2,%k1 \n" "pextrw $0x3,%%xmm2,%k1 \n"
"punpckldq %%xmm4,%%xmm1 \n" "punpckldq %%xmm4,%%xmm1 \n"
@ -1195,8 +1195,8 @@ void ScaleARGBCols_SSE2(uint8* dst_argb,
"49: \n" "49: \n"
"test $0x2,%4 \n" "test $0x2,%4 \n"
"je 29f \n" "je 29f \n"
MEMOPREG(movd,0x00,3,0,4,xmm0) // movd (%3,%0,4),%%xmm0 "movd 0x00(%3,%0,4),%%xmm0 \n"
MEMOPREG(movd,0x00,3,1,4,xmm1) // movd (%3,%1,4),%%xmm1 "movd 0x00(%3,%1,4),%%xmm1 \n"
"pextrw $0x5,%%xmm2,%k0 \n" "pextrw $0x5,%%xmm2,%k0 \n"
"punpckldq %%xmm1,%%xmm0 \n" "punpckldq %%xmm1,%%xmm0 \n"
"movq %%xmm0,(%2) \n" "movq %%xmm0,(%2) \n"
@ -1204,7 +1204,7 @@ void ScaleARGBCols_SSE2(uint8* dst_argb,
"29: \n" "29: \n"
"test $0x1,%4 \n" "test $0x1,%4 \n"
"je 99f \n" "je 99f \n"
MEMOPREG(movd,0x00,3,0,4,xmm0) // movd (%3,%0,4),%%xmm0 "movd 0x00(%3,%0,4),%%xmm0 \n"
"movd %%xmm0,(%2) \n" "movd %%xmm0,(%2) \n"
"99: \n" "99: \n"
: "=&a"(x0), // %0 : "=&a"(x0), // %0
@ -1295,9 +1295,9 @@ void ScaleARGBFilterCols_SSSE3(uint8* dst_argb,
"2: \n" "2: \n"
"movdqa %%xmm2,%%xmm1 \n" "movdqa %%xmm2,%%xmm1 \n"
"paddd %%xmm3,%%xmm2 \n" "paddd %%xmm3,%%xmm2 \n"
MEMOPREG(movq,0x00,1,3,4,xmm0) // movq (%1,%3,4),%%xmm0 "movq 0x00(%1,%3,4),%%xmm0 \n"
"psrlw $0x9,%%xmm1 \n" "psrlw $0x9,%%xmm1 \n"
MEMOPREG(movhps,0x00,1,4,4,xmm0) // movhps (%1,%4,4),%%xmm0 "movhps 0x00(%1,%4,4),%%xmm0 \n"
"pshufb %%xmm5,%%xmm1 \n" "pshufb %%xmm5,%%xmm1 \n"
"pshufb %%xmm4,%%xmm0 \n" "pshufb %%xmm4,%%xmm0 \n"
"pxor %%xmm6,%%xmm1 \n" "pxor %%xmm6,%%xmm1 \n"
@ -1316,7 +1316,7 @@ void ScaleARGBFilterCols_SSSE3(uint8* dst_argb,
"add $0x1,%2 \n" "add $0x1,%2 \n"
"jl 99f \n" "jl 99f \n"
"psrlw $0x9,%%xmm2 \n" "psrlw $0x9,%%xmm2 \n"
MEMOPREG(movq,0x00,1,3,4,xmm0) // movq (%1,%3,4),%%xmm0 "movq 0x00(%1,%3,4),%%xmm0 \n"
"pshufb %%xmm5,%%xmm2 \n" "pshufb %%xmm5,%%xmm2 \n"
"pshufb %%xmm4,%%xmm0 \n" "pshufb %%xmm4,%%xmm0 \n"
"pxor %%xmm6,%%xmm2 \n" "pxor %%xmm6,%%xmm2 \n"