From ecab5430c217d8a26f643a5884352c1deb235669 Mon Sep 17 00:00:00 2001 From: Frank Barchard Date: Fri, 12 Jan 2018 17:44:21 -0800 Subject: [PATCH] Remove MEMOPREG x64 NaCL macros MEMOPREG macros are deprecated in row.h Regular expressions to remove MEMOPREG macros: MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) \ MEMOPREG\((.*), (.*), (.*), (.*), (.*), (.*)\) "\1 \2(%\3,%\4,\5),%%\6 \\n" MEMOPREG(movdqu,0x00,1,4,1,xmm2) MEMOPREG\((.*),(.*),(.*),(.*),(.*),(.*)\) "\1 \2(%\3,%\4,\5),%%\6 \\n" MEMOPREG(movdqu,0x00,1,4,1,xmm2) MEMOPREG\((.*),(.*),(.*),(.*),(.*),(.*)\)(.*)(//.*) "\1 \2(%\3,%\4,\5),%%\6 \\n" TBR=braveyao@chromium.org Bug: libyuv:702 Test: try bots pass Change-Id: If8743abd9af2e8c549d0c7d3d49733a9b0f0ca86 Reviewed-on: https://chromium-review.googlesource.com/865964 Reviewed-by: Frank Barchard Commit-Queue: Frank Barchard --- README.chromium | 2 +- include/libyuv/row.h | 5 -- include/libyuv/version.h | 2 +- source/row_gcc.cc | 172 +++++++++++++++++++-------------------- source/scale_gcc.cc | 92 ++++++++++----------- 5 files changed, 134 insertions(+), 139 deletions(-) diff --git a/README.chromium b/README.chromium index 373e5f75c..77ab37e82 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1688 +Version: 1689 License: BSD License File: LICENSE diff --git a/include/libyuv/row.h b/include/libyuv/row.h index 89addc5ae..28ecc6726 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -169,7 +169,6 @@ extern "C" { // The following are available on all x86 platforms, but // require VS2012, clang 3.4 or gcc 4.7. -// The code supports NaCL but requires a new compiler and validator. #if !defined(LIBYUV_DISABLE_X86) && \ (defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2) || \ defined(GCC_HAS_AVX2)) @@ -575,10 +574,6 @@ extern const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants); // BT.709 #define LABELALIGN #endif -// NaCL macros for GCC x64 - deprecated. -#define MEMOPREG(opcode, offset, base, index, scale, reg) \ - #opcode " " #offset "(%" #base ",%" #index "," #scale "),%%" #reg "\n" - // Intel Code Analizer markers. Insert IACA_START IACA_END around code to be // measured and then run with iaca -64 libyuv_unittest. // IACA_ASM_START amd IACA_ASM_END are equivalents that can be used within diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 97595e58f..b191add97 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1688 +#define LIBYUV_VERSION 1689 #endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/source/row_gcc.cc b/source/row_gcc.cc index 97fddc2c5..df6a8c1e3 100644 --- a/source/row_gcc.cc +++ b/source/row_gcc.cc @@ -987,16 +987,16 @@ void ARGBToUVRow_SSSE3(const uint8* src_argb0, LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" - MEMOPREG(movdqu,0x00,0,4,1,xmm7) // movdqu (%0,%4,1),%%xmm7 + "movdqu 0x00(%0,%4,1),%%xmm7 \n" "pavgb %%xmm7,%%xmm0 \n" "movdqu 0x10(%0),%%xmm1 \n" - MEMOPREG(movdqu,0x10,0,4,1,xmm7) // movdqu 0x10(%0,%4,1),%%xmm7 + "movdqu 0x10(%0,%4,1),%%xmm7 \n" "pavgb %%xmm7,%%xmm1 \n" "movdqu 0x20(%0),%%xmm2 \n" - MEMOPREG(movdqu,0x20,0,4,1,xmm7) // movdqu 0x20(%0,%4,1),%%xmm7 + "movdqu 0x20(%0,%4,1),%%xmm7 \n" "pavgb %%xmm7,%%xmm2 \n" "movdqu 0x30(%0),%%xmm6 \n" - MEMOPREG(movdqu,0x30,0,4,1,xmm7) // movdqu 0x30(%0,%4,1),%%xmm7 + "movdqu 0x30(%0,%4,1),%%xmm7 \n" "pavgb %%xmm7,%%xmm6 \n" "lea 0x40(%0),%0 \n" @@ -1187,16 +1187,16 @@ void ARGBToUVJRow_SSSE3(const uint8* src_argb0, LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" - MEMOPREG(movdqu,0x00,0,4,1,xmm7) // movdqu (%0,%4,1),%%xmm7 + "movdqu 0x00(%0,%4,1),%%xmm7 \n" "pavgb %%xmm7,%%xmm0 \n" "movdqu 0x10(%0),%%xmm1 \n" - MEMOPREG(movdqu,0x10,0,4,1,xmm7) // movdqu 0x10(%0,%4,1),%%xmm7 + "movdqu 0x10(%0,%4,1),%%xmm7 \n" "pavgb %%xmm7,%%xmm1 \n" "movdqu 0x20(%0),%%xmm2 \n" - MEMOPREG(movdqu,0x20,0,4,1,xmm7) // movdqu 0x20(%0,%4,1),%%xmm7 + "movdqu 0x20(%0,%4,1),%%xmm7 \n" "pavgb %%xmm7,%%xmm2 \n" "movdqu 0x30(%0),%%xmm6 \n" - MEMOPREG(movdqu,0x30,0,4,1,xmm7) // movdqu 0x30(%0,%4,1),%%xmm7 + "movdqu 0x30(%0,%4,1),%%xmm7 \n" "pavgb %%xmm7,%%xmm6 \n" "lea 0x40(%0),%0 \n" @@ -1349,16 +1349,16 @@ void BGRAToUVRow_SSSE3(const uint8* src_bgra0, LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" - MEMOPREG(movdqu,0x00,0,4,1,xmm7) // movdqu (%0,%4,1),%%xmm7 + "movdqu 0x00(%0,%4,1),%%xmm7 \n" "pavgb %%xmm7,%%xmm0 \n" "movdqu 0x10(%0),%%xmm1 \n" - MEMOPREG(movdqu,0x10,0,4,1,xmm7) // movdqu 0x10(%0,%4,1),%%xmm7 + "movdqu 0x10(%0,%4,1),%%xmm7 \n" "pavgb %%xmm7,%%xmm1 \n" "movdqu 0x20(%0),%%xmm2 \n" - MEMOPREG(movdqu,0x20,0,4,1,xmm7) // movdqu 0x20(%0,%4,1),%%xmm7 + "movdqu 0x20(%0,%4,1),%%xmm7 \n" "pavgb %%xmm7,%%xmm2 \n" "movdqu 0x30(%0),%%xmm6 \n" - MEMOPREG(movdqu,0x30,0,4,1,xmm7) // movdqu 0x30(%0,%4,1),%%xmm7 + "movdqu 0x30(%0,%4,1),%%xmm7 \n" "pavgb %%xmm7,%%xmm6 \n" "lea 0x40(%0),%0 \n" @@ -1484,16 +1484,16 @@ void ABGRToUVRow_SSSE3(const uint8* src_abgr0, LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" - MEMOPREG(movdqu,0x00,0,4,1,xmm7) // movdqu (%0,%4,1),%%xmm7 + "movdqu 0x00(%0,%4,1),%%xmm7 \n" "pavgb %%xmm7,%%xmm0 \n" "movdqu 0x10(%0),%%xmm1 \n" - MEMOPREG(movdqu,0x10,0,4,1,xmm7) // movdqu 0x10(%0,%4,1),%%xmm7 + "movdqu 0x10(%0,%4,1),%%xmm7 \n" "pavgb %%xmm7,%%xmm1 \n" "movdqu 0x20(%0),%%xmm2 \n" - MEMOPREG(movdqu,0x20,0,4,1,xmm7) // movdqu 0x20(%0,%4,1),%%xmm7 + "movdqu 0x20(%0,%4,1),%%xmm7 \n" "pavgb %%xmm7,%%xmm2 \n" "movdqu 0x30(%0),%%xmm6 \n" - MEMOPREG(movdqu,0x30,0,4,1,xmm7) // movdqu 0x30(%0,%4,1),%%xmm7 + "movdqu 0x30(%0,%4,1),%%xmm7 \n" "pavgb %%xmm7,%%xmm6 \n" "lea 0x40(%0),%0 \n" @@ -1549,16 +1549,16 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0, LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" - MEMOPREG(movdqu,0x00,0,4,1,xmm7) // movdqu (%0,%4,1),%%xmm7 + "movdqu 0x00(%0,%4,1),%%xmm7 \n" "pavgb %%xmm7,%%xmm0 \n" "movdqu 0x10(%0),%%xmm1 \n" - MEMOPREG(movdqu,0x10,0,4,1,xmm7) // movdqu 0x10(%0,%4,1),%%xmm7 + "movdqu 0x10(%0,%4,1),%%xmm7 \n" "pavgb %%xmm7,%%xmm1 \n" "movdqu 0x20(%0),%%xmm2 \n" - MEMOPREG(movdqu,0x20,0,4,1,xmm7) // movdqu 0x20(%0,%4,1),%%xmm7 + "movdqu 0x20(%0,%4,1),%%xmm7 \n" "pavgb %%xmm7,%%xmm2 \n" "movdqu 0x30(%0),%%xmm6 \n" - MEMOPREG(movdqu,0x30,0,4,1,xmm7) // movdqu 0x30(%0,%4,1),%%xmm7 + "movdqu 0x30(%0,%4,1),%%xmm7 \n" "pavgb %%xmm7,%%xmm6 \n" "lea 0x40(%0),%0 \n" @@ -1605,7 +1605,7 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0, // Read 8 UV from 444 #define READYUV444 \ "movq (%[u_buf]),%%xmm0 \n" \ - MEMOPREG(movq, 0x00, [u_buf], [v_buf], 1, xmm1) \ + "movq 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \ "lea 0x8(%[u_buf]),%[u_buf] \n" \ "punpcklbw %%xmm1,%%xmm0 \n" \ "movq (%[y_buf]),%%xmm4 \n" \ @@ -1615,7 +1615,7 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0, // Read 4 UV from 422, upsample to 8 UV #define READYUV422 \ "movd (%[u_buf]),%%xmm0 \n" \ - MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) \ + "movd 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \ "lea 0x4(%[u_buf]),%[u_buf] \n" \ "punpcklbw %%xmm1,%%xmm0 \n" \ "punpcklwd %%xmm0,%%xmm0 \n" \ @@ -1629,7 +1629,7 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0, // TODO(fbarchard): Consider pmullw to replace psllw and allow different bits. #define READYUV210 \ "movq (%[u_buf]),%%xmm0 \n" \ - MEMOPREG(movq, 0x00, [u_buf], [v_buf], 1, xmm1) \ + "movq 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \ "lea 0x8(%[u_buf]),%[u_buf] \n" \ "punpcklwd %%xmm1,%%xmm0 \n" \ "psraw $0x2,%%xmm0 \n" \ @@ -1642,7 +1642,7 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0, // Read 4 UV from 422, upsample to 8 UV. With 8 Alpha. #define READYUVA422 \ "movd (%[u_buf]),%%xmm0 \n" \ - MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) \ + "movd 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \ "lea 0x4(%[u_buf]),%[u_buf] \n" \ "punpcklbw %%xmm1,%%xmm0 \n" \ "punpcklwd %%xmm0,%%xmm0 \n" \ @@ -2093,7 +2093,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf, // Read 16 UV from 444 #define READYUV444_AVX2 \ "vmovdqu (%[u_buf]),%%xmm0 \n" \ - MEMOPREG(vmovdqu, 0x00, [u_buf], [v_buf], 1, xmm1) \ + "vmovdqu 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \ "lea 0x10(%[u_buf]),%[u_buf] \n" \ "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ "vpermq $0xd8,%%ymm1,%%ymm1 \n" \ @@ -2106,7 +2106,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf, // Read 8 UV from 422, upsample to 16 UV. #define READYUV422_AVX2 \ "vmovq (%[u_buf]),%%xmm0 \n" \ - MEMOPREG(vmovq, 0x00, [u_buf], [v_buf], 1, xmm1) \ + "vmovq 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \ "lea 0x8(%[u_buf]),%[u_buf] \n" \ "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \ "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ @@ -2121,7 +2121,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf, // TODO(fbarchard): Consider vunpcklpd to combine the 2 registers into 1. #define READYUV210_AVX2 \ "vmovdqu (%[u_buf]),%%xmm0 \n" \ - MEMOPREG(vmovdqu, 0x00, [u_buf], [v_buf], 1, xmm1) \ + "vmovdqu 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \ "lea 0x10(%[u_buf]),%[u_buf] \n" \ "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ "vpermq $0xd8,%%ymm1,%%ymm1 \n" \ @@ -2136,7 +2136,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf, // Read 8 UV from 422, upsample to 16 UV. With 16 Alpha. #define READYUVA422_AVX2 \ "vmovq (%[u_buf]),%%xmm0 \n" \ - MEMOPREG(vmovq, 0x00, [u_buf], [v_buf], 1, xmm1) \ + "vmovq 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \ "lea 0x8(%[u_buf]),%[u_buf] \n" \ "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \ "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ @@ -2683,7 +2683,7 @@ void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) { LABELALIGN "1: \n" - MEMOPREG(movdqu,-0x10,0,2,1,xmm0) // movdqu -0x10(%0,%2),%%xmm0 + "movdqu -0x10(%0,%2,1),%%xmm0 \n" "pshufb %%xmm5,%%xmm0 \n" "movdqu %%xmm0,(%1) \n" "lea 0x10(%1),%1 \n" @@ -2707,7 +2707,7 @@ void MirrorRow_AVX2(const uint8* src, uint8* dst, int width) { LABELALIGN "1: \n" - MEMOPREG(vmovdqu,-0x20,0,2,1,ymm0) // vmovdqu -0x20(%0,%2),%%ymm0 + "vmovdqu -0x20(%0,%2,1),%%ymm0 \n" "vpshufb %%ymm5,%%ymm0,%%ymm0 \n" "vpermq $0x4e,%%ymm0,%%ymm0 \n" "vmovdqu %%ymm0,(%1) \n" @@ -2902,7 +2902,7 @@ void MergeUVRow_AVX2(const uint8* src_u, LABELALIGN "1: \n" "vmovdqu (%0),%%ymm0 \n" - MEMOPREG(vmovdqu,0x00,0,1,1,ymm1) // vmovdqu (%0,%1,1),%%ymm1 + "vmovdqu 0x00(%0,%1,1),%%ymm1 \n" "lea 0x20(%0),%0 \n" "vpunpcklbw %%ymm1,%%ymm0,%%ymm2 \n" "vpunpckhbw %%ymm1,%%ymm0,%%ymm0 \n" @@ -2936,7 +2936,7 @@ void MergeUVRow_SSE2(const uint8* src_u, LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" - MEMOPREG(movdqu,0x00,0,1,1,xmm1) // movdqu (%0,%1,1),%%xmm1 + "movdqu 0x00(%0,%1,1),%%xmm1 \n" "lea 0x10(%0),%0 \n" "movdqa %%xmm0,%%xmm2 \n" "punpcklbw %%xmm1,%%xmm0 \n" @@ -3735,8 +3735,8 @@ void YUY2ToUVRow_SSE2(const uint8* src_yuy2, "1: \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x10(%0),%%xmm1 \n" - MEMOPREG(movdqu,0x00,0,4,1,xmm2) // movdqu (%0,%4,1),%%xmm2 - MEMOPREG(movdqu,0x10,0,4,1,xmm3) // movdqu 0x10(%0,%4,1),%%xmm3 + "movdqu 0x00(%0,%4,1),%%xmm2 \n" + "movdqu 0x10(%0,%4,1),%%xmm3 \n" "lea 0x20(%0),%0 \n" "pavgb %%xmm2,%%xmm0 \n" "pavgb %%xmm3,%%xmm1 \n" @@ -3837,8 +3837,8 @@ void UYVYToUVRow_SSE2(const uint8* src_uyvy, "1: \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x10(%0),%%xmm1 \n" - MEMOPREG(movdqu,0x00,0,4,1,xmm2) // movdqu (%0,%4,1),%%xmm2 - MEMOPREG(movdqu,0x10,0,4,1,xmm3) // movdqu 0x10(%0,%4,1),%%xmm3 + "movdqu 0x00(%0,%4,1),%%xmm2 \n" + "movdqu 0x10(%0,%4,1),%%xmm3 \n" "lea 0x20(%0),%0 \n" "pavgb %%xmm2,%%xmm0 \n" "pavgb %%xmm3,%%xmm1 \n" @@ -4429,9 +4429,9 @@ void ARGBUnattenuateRow_SSE2(const uint8* src_argb, "movdqu (%0),%%xmm0 \n" "movzb 0x03(%0),%3 \n" "punpcklbw %%xmm0,%%xmm0 \n" - MEMOPREG(movd,0x00,4,3,4,xmm2) // movd 0x0(%4,%3,4),%%xmm2 + "movd 0x00(%4,%3,4),%%xmm2 \n" "movzb 0x07(%0),%3 \n" - MEMOPREG(movd,0x00,4,3,4,xmm3) // movd 0x0(%4,%3,4),%%xmm3 + "movd 0x00(%4,%3,4),%%xmm3 \n" "pshuflw $0x40,%%xmm2,%%xmm2 \n" "pshuflw $0x40,%%xmm3,%%xmm3 \n" "movlhps %%xmm3,%%xmm2 \n" @@ -4439,9 +4439,9 @@ void ARGBUnattenuateRow_SSE2(const uint8* src_argb, "movdqu (%0),%%xmm1 \n" "movzb 0x0b(%0),%3 \n" "punpckhbw %%xmm1,%%xmm1 \n" - MEMOPREG(movd,0x00,4,3,4,xmm2) // movd 0x0(%4,%3,4),%%xmm2 + "movd 0x00(%4,%3,4),%%xmm2 \n" "movzb 0x0f(%0),%3 \n" - MEMOPREG(movd,0x00,4,3,4,xmm3) // movd 0x0(%4,%3,4),%%xmm3 + "movd 0x00(%4,%3,4),%%xmm3 \n" "pshuflw $0x40,%%xmm2,%%xmm2 \n" "pshuflw $0x40,%%xmm3,%%xmm3 \n" "movlhps %%xmm3,%%xmm2 \n" @@ -4481,24 +4481,24 @@ void ARGBUnattenuateRow_AVX2(const uint8* src_argb, "1: \n" // replace VPGATHER "movzb 0x03(%0),%3 \n" - MEMOPREG(vmovd,0x00,4,3,4,xmm0) // vmovd 0x0(%4,%3,4),%%xmm0 + "vmovd 0x00(%4,%3,4),%%xmm0 \n" "movzb 0x07(%0),%3 \n" - MEMOPREG(vmovd,0x00,4,3,4,xmm1) // vmovd 0x0(%4,%3,4),%%xmm1 + "vmovd 0x00(%4,%3,4),%%xmm1 \n" "movzb 0x0b(%0),%3 \n" "vpunpckldq %%xmm1,%%xmm0,%%xmm6 \n" - MEMOPREG(vmovd,0x00,4,3,4,xmm2) // vmovd 0x0(%4,%3,4),%%xmm2 + "vmovd 0x00(%4,%3,4),%%xmm2 \n" "movzb 0x0f(%0),%3 \n" - MEMOPREG(vmovd,0x00,4,3,4,xmm3) // vmovd 0x0(%4,%3,4),%%xmm3 + "vmovd 0x00(%4,%3,4),%%xmm3 \n" "movzb 0x13(%0),%3 \n" "vpunpckldq %%xmm3,%%xmm2,%%xmm7 \n" - MEMOPREG(vmovd,0x00,4,3,4,xmm0) // vmovd 0x0(%4,%3,4),%%xmm0 + "vmovd 0x00(%4,%3,4),%%xmm0 \n" "movzb 0x17(%0),%3 \n" - MEMOPREG(vmovd,0x00,4,3,4,xmm1) // vmovd 0x0(%4,%3,4),%%xmm1 + "vmovd 0x00(%4,%3,4),%%xmm1 \n" "movzb 0x1b(%0),%3 \n" "vpunpckldq %%xmm1,%%xmm0,%%xmm0 \n" - MEMOPREG(vmovd,0x00,4,3,4,xmm2) // vmovd 0x0(%4,%3,4),%%xmm2 + "vmovd 0x00(%4,%3,4),%%xmm2 \n" "movzb 0x1f(%0),%3 \n" - MEMOPREG(vmovd,0x00,4,3,4,xmm3) // vmovd 0x0(%4,%3,4),%%xmm3 + "vmovd 0x00(%4,%3,4),%%xmm3 \n" "vpunpckldq %%xmm3,%%xmm2,%%xmm2 \n" "vpunpcklqdq %%xmm7,%%xmm6,%%xmm3 \n" "vpunpcklqdq %%xmm2,%%xmm0,%%xmm0 \n" @@ -5033,13 +5033,13 @@ void SobelXRow_SSE2(const uint8* src_y0, "punpcklbw %%xmm5,%%xmm0 \n" "punpcklbw %%xmm5,%%xmm1 \n" "psubw %%xmm1,%%xmm0 \n" - MEMOPREG(movq,0x00,0,1,1,xmm1) // movq (%0,%1,1),%%xmm1 - MEMOPREG(movq,0x02,0,1,1,xmm2) // movq 0x2(%0,%1,1),%%xmm2 + "movq 0x00(%0,%1,1),%%xmm1 \n" + "movq 0x02(%0,%1,1),%%xmm2 \n" "punpcklbw %%xmm5,%%xmm1 \n" "punpcklbw %%xmm5,%%xmm2 \n" "psubw %%xmm2,%%xmm1 \n" - MEMOPREG(movq,0x00,0,2,1,xmm2) // movq (%0,%2,1),%%xmm2 - MEMOPREG(movq,0x02,0,2,1,xmm3) // movq 0x2(%0,%2,1),%%xmm3 + "movq 0x00(%0,%2,1),%%xmm2 \n" + "movq 0x02(%0,%2,1),%%xmm3 \n" "punpcklbw %%xmm5,%%xmm2 \n" "punpcklbw %%xmm5,%%xmm3 \n" "psubw %%xmm3,%%xmm2 \n" @@ -5084,17 +5084,17 @@ void SobelYRow_SSE2(const uint8* src_y0, LABELALIGN "1: \n" "movq (%0),%%xmm0 \n" - MEMOPREG(movq,0x00,0,1,1,xmm1) // movq (%0,%1,1),%%xmm1 + "movq 0x00(%0,%1,1),%%xmm1 \n" "punpcklbw %%xmm5,%%xmm0 \n" "punpcklbw %%xmm5,%%xmm1 \n" "psubw %%xmm1,%%xmm0 \n" "movq 0x1(%0),%%xmm1 \n" - MEMOPREG(movq,0x01,0,1,1,xmm2) // movq 0x1(%0,%1,1),%%xmm2 + "movq 0x01(%0,%1,1),%%xmm2 \n" "punpcklbw %%xmm5,%%xmm1 \n" "punpcklbw %%xmm5,%%xmm2 \n" "psubw %%xmm2,%%xmm1 \n" "movq 0x2(%0),%%xmm2 \n" - MEMOPREG(movq,0x02,0,1,1,xmm3) // movq 0x2(%0,%1,1),%%xmm3 + "movq 0x02(%0,%1,1),%%xmm3 \n" "punpcklbw %%xmm5,%%xmm2 \n" "punpcklbw %%xmm5,%%xmm3 \n" "psubw %%xmm3,%%xmm2 \n" @@ -5139,7 +5139,7 @@ void SobelRow_SSE2(const uint8* src_sobelx, LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" - MEMOPREG(movdqu,0x00,0,1,1,xmm1) // movdqu (%0,%1,1),%%xmm1 + "movdqu 0x00(%0,%1,1),%%xmm1 \n" "lea 0x10(%0),%0 \n" "paddusb %%xmm1,%%xmm0 \n" "movdqa %%xmm0,%%xmm2 \n" @@ -5188,7 +5188,7 @@ void SobelToPlaneRow_SSE2(const uint8* src_sobelx, LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" - MEMOPREG(movdqu,0x00,0,1,1,xmm1) // movdqu (%0,%1,1),%%xmm1 + "movdqu 0x00(%0,%1,1),%%xmm1 \n" "lea 0x10(%0),%0 \n" "paddusb %%xmm1,%%xmm0 \n" "movdqu %%xmm0,(%2) \n" @@ -5224,7 +5224,7 @@ void SobelXYRow_SSE2(const uint8* src_sobelx, LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" - MEMOPREG(movdqu,0x00,0,1,1,xmm1) // movdqu (%0,%1,1),%%xmm1 + "movdqu 0x00(%0,%1,1),%%xmm1 \n" "lea 0x10(%0),%0 \n" "movdqa %%xmm0,%%xmm2 \n" "paddusb %%xmm1,%%xmm2 \n" @@ -5373,19 +5373,19 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft, "movdqu 0x10(%0),%%xmm1 \n" "movdqu 0x20(%0),%%xmm2 \n" "movdqu 0x30(%0),%%xmm3 \n" - MEMOPREG(psubd,0x00,0,4,4,xmm0) // psubd 0x00(%0,%4,4),%%xmm0 - MEMOPREG(psubd,0x10,0,4,4,xmm1) // psubd 0x10(%0,%4,4),%%xmm1 - MEMOPREG(psubd,0x20,0,4,4,xmm2) // psubd 0x20(%0,%4,4),%%xmm2 - MEMOPREG(psubd,0x30,0,4,4,xmm3) // psubd 0x30(%0,%4,4),%%xmm3 + "psubd 0x00(%0,%4,4),%%xmm0 \n" + "psubd 0x10(%0,%4,4),%%xmm1 \n" + "psubd 0x20(%0,%4,4),%%xmm2 \n" + "psubd 0x30(%0,%4,4),%%xmm3 \n" "lea 0x40(%0),%0 \n" "psubd (%1),%%xmm0 \n" "psubd 0x10(%1),%%xmm1 \n" "psubd 0x20(%1),%%xmm2 \n" "psubd 0x30(%1),%%xmm3 \n" - MEMOPREG(paddd,0x00,1,4,4,xmm0) // paddd 0x00(%1,%4,4),%%xmm0 - MEMOPREG(paddd,0x10,1,4,4,xmm1) // paddd 0x10(%1,%4,4),%%xmm1 - MEMOPREG(paddd,0x20,1,4,4,xmm2) // paddd 0x20(%1,%4,4),%%xmm2 - MEMOPREG(paddd,0x30,1,4,4,xmm3) // paddd 0x30(%1,%4,4),%%xmm3 + "paddd 0x00(%1,%4,4),%%xmm0 \n" + "paddd 0x10(%1,%4,4),%%xmm1 \n" + "paddd 0x20(%1,%4,4),%%xmm2 \n" + "paddd 0x30(%1,%4,4),%%xmm3 \n" "lea 0x40(%1),%1 \n" "packssdw %%xmm1,%%xmm0 \n" "packssdw %%xmm3,%%xmm2 \n" @@ -5405,19 +5405,19 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft, "movdqu 0x10(%0),%%xmm1 \n" "movdqu 0x20(%0),%%xmm2 \n" "movdqu 0x30(%0),%%xmm3 \n" - MEMOPREG(psubd,0x00,0,4,4,xmm0) // psubd 0x00(%0,%4,4),%%xmm0 - MEMOPREG(psubd,0x10,0,4,4,xmm1) // psubd 0x10(%0,%4,4),%%xmm1 - MEMOPREG(psubd,0x20,0,4,4,xmm2) // psubd 0x20(%0,%4,4),%%xmm2 - MEMOPREG(psubd,0x30,0,4,4,xmm3) // psubd 0x30(%0,%4,4),%%xmm3 + "psubd 0x00(%0,%4,4),%%xmm0 \n" + "psubd 0x10(%0,%4,4),%%xmm1 \n" + "psubd 0x20(%0,%4,4),%%xmm2 \n" + "psubd 0x30(%0,%4,4),%%xmm3 \n" "lea 0x40(%0),%0 \n" "psubd (%1),%%xmm0 \n" "psubd 0x10(%1),%%xmm1 \n" "psubd 0x20(%1),%%xmm2 \n" "psubd 0x30(%1),%%xmm3 \n" - MEMOPREG(paddd,0x00,1,4,4,xmm0) // paddd 0x00(%1,%4,4),%%xmm0 - MEMOPREG(paddd,0x10,1,4,4,xmm1) // paddd 0x10(%1,%4,4),%%xmm1 - MEMOPREG(paddd,0x20,1,4,4,xmm2) // paddd 0x20(%1,%4,4),%%xmm2 - MEMOPREG(paddd,0x30,1,4,4,xmm3) // paddd 0x30(%1,%4,4),%%xmm3 + "paddd 0x00(%1,%4,4),%%xmm0 \n" + "paddd 0x10(%1,%4,4),%%xmm1 \n" + "paddd 0x20(%1,%4,4),%%xmm2 \n" + "paddd 0x30(%1,%4,4),%%xmm3 \n" "lea 0x40(%1),%1 \n" "cvtdq2ps %%xmm0,%%xmm0 \n" "cvtdq2ps %%xmm1,%%xmm1 \n" @@ -5447,10 +5447,10 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft, LABELALIGN "10: \n" "movdqu (%0),%%xmm0 \n" - MEMOPREG(psubd,0x00,0,4,4,xmm0) // psubd 0x00(%0,%4,4),%%xmm0 + "psubd 0x00(%0,%4,4),%%xmm0 \n" "lea 0x10(%0),%0 \n" "psubd (%1),%%xmm0 \n" - MEMOPREG(paddd,0x00,1,4,4,xmm0) // paddd 0x00(%1,%4,4),%%xmm0 + "paddd 0x00(%1,%4,4),%%xmm0 \n" "lea 0x10(%1),%1 \n" "cvtdq2ps %%xmm0,%%xmm0 \n" "mulps %%xmm4,%%xmm0 \n" @@ -5515,16 +5515,16 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, "pshufd $0x39,%%xmm0,%%xmm0 \n" "movd %%xmm0,%k5 \n" "pshufd $0x39,%%xmm0,%%xmm0 \n" - MEMOPREG(movd,0x00,0,1,1,xmm1) // movd (%0,%1,1),%%xmm1 - MEMOPREG(movd,0x00,0,5,1,xmm6) // movd (%0,%5,1),%%xmm6 + "movd 0x00(%0,%1,1),%%xmm1 \n" + "movd 0x00(%0,%5,1),%%xmm6 \n" "punpckldq %%xmm6,%%xmm1 \n" "addps %%xmm4,%%xmm2 \n" "movq %%xmm1,(%2) \n" "movd %%xmm0,%k1 \n" "pshufd $0x39,%%xmm0,%%xmm0 \n" "movd %%xmm0,%k5 \n" - MEMOPREG(movd,0x00,0,1,1,xmm0) // movd (%0,%1,1),%%xmm0 - MEMOPREG(movd,0x00,0,5,1,xmm6) // movd (%0,%5,1),%%xmm6 + "movd 0x00(%0,%1,1),%%xmm0 \n" + "movd 0x00(%0,%5,1),%%xmm6 \n" "punpckldq %%xmm6,%%xmm0 \n" "addps %%xmm4,%%xmm3 \n" "movq %%xmm0,0x08(%2) \n" @@ -5544,7 +5544,7 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, "pmaddwd %%xmm5,%%xmm0 \n" "addps %%xmm7,%%xmm2 \n" "movd %%xmm0,%k1 \n" - MEMOPREG(movd,0x00,0,1,1,xmm0) // movd (%0,%1,1),%%xmm0 + "movd 0x00(%0,%1,1),%%xmm0 \n" "movd %%xmm0,(%2) \n" "lea 0x04(%2),%2 \n" "sub $0x1,%4 \n" @@ -5592,7 +5592,7 @@ void InterpolateRow_SSSE3(uint8* dst_ptr, LABELALIGN "1: \n" "movdqu (%1),%%xmm0 \n" - MEMOPREG(movdqu,0x00,1,4,1,xmm2) + "movdqu 0x00(%1,%4,1),%%xmm2 \n" "movdqa %%xmm0,%%xmm1 \n" "punpcklbw %%xmm2,%%xmm0 \n" "punpckhbw %%xmm2,%%xmm1 \n" @@ -5617,7 +5617,7 @@ void InterpolateRow_SSSE3(uint8* dst_ptr, LABELALIGN "50: \n" "movdqu (%1),%%xmm0 \n" - MEMOPREG(movdqu,0x00,1,4,1,xmm1) + "movdqu 0x00(%1,%4,1),%%xmm1 \n" "pavgb %%xmm1,%%xmm0 \n" "movdqu %%xmm0,0x00(%1,%0,1) \n" "lea 0x10(%1),%1 \n" @@ -5675,7 +5675,7 @@ void InterpolateRow_AVX2(uint8* dst_ptr, LABELALIGN "1: \n" "vmovdqu (%1),%%ymm0 \n" - MEMOPREG(vmovdqu,0x00,1,4,1,ymm2) + "vmovdqu 0x00(%1,%4,1),%%ymm2 \n" "vpunpckhbw %%ymm2,%%ymm0,%%ymm1 \n" "vpunpcklbw %%ymm2,%%ymm0,%%ymm0 \n" "vpsubb %%ymm4,%%ymm1,%%ymm1 \n" @@ -5796,7 +5796,7 @@ void I422ToYUY2Row_SSE2(const uint8* src_y, LABELALIGN "1: \n" "movq (%1),%%xmm2 \n" - MEMOPREG(movq,0x00,1,2,1,xmm3) // movq (%1,%2,1),%%xmm3 + "movq 0x00(%1,%2,1),%%xmm3 \n" "lea 0x8(%1),%1 \n" "punpcklbw %%xmm3,%%xmm2 \n" "movdqu (%0),%%xmm0 \n" @@ -5832,7 +5832,7 @@ void I422ToUYVYRow_SSE2(const uint8* src_y, LABELALIGN "1: \n" "movq (%1),%%xmm2 \n" - MEMOPREG(movq,0x00,1,2,1,xmm3) // movq (%1,%2,1),%%xmm3 + "movq 0x00(%1,%2,1),%%xmm3 \n" "lea 0x8(%1),%1 \n" "punpcklbw %%xmm3,%%xmm2 \n" "movdqu (%0),%%xmm0 \n" diff --git a/source/scale_gcc.cc b/source/scale_gcc.cc index eb8184cf4..047131f16 100644 --- a/source/scale_gcc.cc +++ b/source/scale_gcc.cc @@ -164,8 +164,8 @@ void ScaleRowDown2Box_SSSE3(const uint8* src_ptr, "1: \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x10(%0),%%xmm1 \n" - MEMOPREG(movdqu,0x00,0,3,1,xmm2) // movdqu (%0,%3,1),%%xmm2 - MEMOPREG(movdqu,0x10,0,3,1,xmm3) // movdqu 0x10(%0,%3,1),%%xmm3 + "movdqu 0x00(%0,%3,1),%%xmm2 \n" + "movdqu 0x10(%0,%3,1),%%xmm3 \n" "lea 0x20(%0),%0 \n" "pmaddubsw %%xmm4,%%xmm0 \n" "pmaddubsw %%xmm4,%%xmm1 \n" @@ -267,8 +267,8 @@ void ScaleRowDown2Box_AVX2(const uint8* src_ptr, "1: \n" "vmovdqu (%0),%%ymm0 \n" "vmovdqu 0x20(%0),%%ymm1 \n" - MEMOPREG(vmovdqu,0x00,0,3,1,ymm2) // vmovdqu (%0,%3,1),%%ymm2 - MEMOPREG(vmovdqu,0x20,0,3,1,ymm3) // vmovdqu 0x20(%0,%3,1),%%ymm3 + "vmovdqu 0x00(%0,%3,1),%%ymm2 \n" + "vmovdqu 0x20(%0,%3,1),%%ymm3 \n" "lea 0x40(%0),%0 \n" "vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n" "vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n" @@ -345,22 +345,22 @@ void ScaleRowDown4Box_SSSE3(const uint8* src_ptr, "1: \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x10(%0),%%xmm1 \n" - MEMOPREG(movdqu,0x00,0,4,1,xmm2) // movdqu (%0,%4,1),%%xmm2 - MEMOPREG(movdqu,0x10,0,4,1,xmm3) // movdqu 0x10(%0,%4,1),%%xmm3 + "movdqu 0x00(%0,%4,1),%%xmm2 \n" + "movdqu 0x10(%0,%4,1),%%xmm3 \n" "pmaddubsw %%xmm4,%%xmm0 \n" "pmaddubsw %%xmm4,%%xmm1 \n" "pmaddubsw %%xmm4,%%xmm2 \n" "pmaddubsw %%xmm4,%%xmm3 \n" "paddw %%xmm2,%%xmm0 \n" "paddw %%xmm3,%%xmm1 \n" - MEMOPREG(movdqu,0x00,0,4,2,xmm2) // movdqu (%0,%4,2),%%xmm2 - MEMOPREG(movdqu,0x10,0,4,2,xmm3) // movdqu 0x10(%0,%4,2),%%xmm3 + "movdqu 0x00(%0,%4,2),%%xmm2 \n" + "movdqu 0x10(%0,%4,2),%%xmm3 \n" "pmaddubsw %%xmm4,%%xmm2 \n" "pmaddubsw %%xmm4,%%xmm3 \n" "paddw %%xmm2,%%xmm0 \n" "paddw %%xmm3,%%xmm1 \n" - MEMOPREG(movdqu,0x00,0,3,1,xmm2) // movdqu (%0,%3,1),%%xmm2 - MEMOPREG(movdqu,0x10,0,3,1,xmm3) // movdqu 0x10(%0,%3,1),%%xmm3 + "movdqu 0x00(%0,%3,1),%%xmm2 \n" + "movdqu 0x10(%0,%3,1),%%xmm3 \n" "lea 0x20(%0),%0 \n" "pmaddubsw %%xmm4,%%xmm2 \n" "pmaddubsw %%xmm4,%%xmm3 \n" @@ -432,22 +432,22 @@ void ScaleRowDown4Box_AVX2(const uint8* src_ptr, "1: \n" "vmovdqu (%0),%%ymm0 \n" "vmovdqu 0x20(%0),%%ymm1 \n" - MEMOPREG(vmovdqu,0x00,0,3,1,ymm2) // vmovdqu (%0,%3,1),%%ymm2 - MEMOPREG(vmovdqu,0x20,0,3,1,ymm3) // vmovdqu 0x20(%0,%3,1),%%ymm3 + "vmovdqu 0x00(%0,%3,1),%%ymm2 \n" + "vmovdqu 0x20(%0,%3,1),%%ymm3 \n" "vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n" "vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n" "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n" "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n" "vpaddw %%ymm2,%%ymm0,%%ymm0 \n" "vpaddw %%ymm3,%%ymm1,%%ymm1 \n" - MEMOPREG(vmovdqu,0x00,0,3,2,ymm2) // vmovdqu (%0,%3,2),%%ymm2 - MEMOPREG(vmovdqu,0x20,0,3,2,ymm3) // vmovdqu 0x20(%0,%3,2),%%ymm3 + "vmovdqu 0x00(%0,%3,2),%%ymm2 \n" + "vmovdqu 0x20(%0,%3,2),%%ymm3 \n" "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n" "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n" "vpaddw %%ymm2,%%ymm0,%%ymm0 \n" "vpaddw %%ymm3,%%ymm1,%%ymm1 \n" - MEMOPREG(vmovdqu,0x00,0,4,1,ymm2) // vmovdqu (%0,%4,1),%%ymm2 - MEMOPREG(vmovdqu,0x20,0,4,1,ymm3) // vmovdqu 0x20(%0,%4,1),%%ymm3 + "vmovdqu 0x00(%0,%4,1),%%ymm2 \n" + "vmovdqu 0x20(%0,%4,1),%%ymm3 \n" "lea 0x40(%0),%0 \n" "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n" "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n" @@ -539,7 +539,7 @@ void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr, LABELALIGN "1: \n" "movdqu (%0),%%xmm6 \n" - MEMOPREG(movdqu,0x00,0,3,1,xmm7) // movdqu (%0,%3),%%xmm7 + "movdqu 0x00(%0,%3,1),%%xmm7 \n" "pavgb %%xmm7,%%xmm6 \n" "pshufb %%xmm2,%%xmm6 \n" "pmaddubsw %%xmm5,%%xmm6 \n" @@ -548,7 +548,7 @@ void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr, "packuswb %%xmm6,%%xmm6 \n" "movq %%xmm6,(%1) \n" "movdqu 0x8(%0),%%xmm6 \n" - MEMOPREG(movdqu,0x8,0,3,1,xmm7) // movdqu 0x8(%0,%3),%%xmm7 + "movdqu 0x8(%0,%3,1),%%xmm7 \n" "pavgb %%xmm7,%%xmm6 \n" "pshufb %%xmm3,%%xmm6 \n" "pmaddubsw %%xmm0,%%xmm6 \n" @@ -557,7 +557,7 @@ void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr, "packuswb %%xmm6,%%xmm6 \n" "movq %%xmm6,0x8(%1) \n" "movdqu 0x10(%0),%%xmm6 \n" - MEMOPREG(movdqu,0x10,0,3,1,xmm7) // movdqu 0x10(%0,%3),%%xmm7 + "movdqu 0x10(%0,%3,1),%%xmm7 \n" "lea 0x20(%0),%0 \n" "pavgb %%xmm7,%%xmm6 \n" "pshufb %%xmm4,%%xmm6 \n" @@ -606,7 +606,7 @@ void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr, LABELALIGN "1: \n" "movdqu (%0),%%xmm6 \n" - MEMOPREG(movdqu,0x00,0,3,1,xmm7) // movdqu (%0,%3,1),%%xmm7 + "movdqu 0x00(%0,%3,1),%%xmm7 \n" "pavgb %%xmm6,%%xmm7 \n" "pavgb %%xmm7,%%xmm6 \n" "pshufb %%xmm2,%%xmm6 \n" @@ -616,7 +616,7 @@ void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr, "packuswb %%xmm6,%%xmm6 \n" "movq %%xmm6,(%1) \n" "movdqu 0x8(%0),%%xmm6 \n" - MEMOPREG(movdqu,0x8,0,3,1,xmm7) // movdqu 0x8(%0,%3,1),%%xmm7 + "movdqu 0x8(%0,%3,1),%%xmm7 \n" "pavgb %%xmm6,%%xmm7 \n" "pavgb %%xmm7,%%xmm6 \n" "pshufb %%xmm3,%%xmm6 \n" @@ -626,7 +626,7 @@ void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr, "packuswb %%xmm6,%%xmm6 \n" "movq %%xmm6,0x8(%1) \n" "movdqu 0x10(%0),%%xmm6 \n" - MEMOPREG(movdqu,0x10,0,3,1,xmm7) // movdqu 0x10(%0,%3,1),%%xmm7 + "movdqu 0x10(%0,%3,1),%%xmm7 \n" "lea 0x20(%0),%0 \n" "pavgb %%xmm6,%%xmm7 \n" "pavgb %%xmm7,%%xmm6 \n" @@ -700,7 +700,7 @@ void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr, LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" - MEMOPREG(movdqu,0x00,0,3,1,xmm1) // movdqu (%0,%3,1),%%xmm1 + "movdqu 0x00(%0,%3,1),%%xmm1 \n" "lea 0x10(%0),%0 \n" "pavgb %%xmm1,%%xmm0 \n" "movdqa %%xmm0,%%xmm1 \n" @@ -745,7 +745,7 @@ void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr, LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" - MEMOPREG(movdqu,0x00,0,3,1,xmm6) // movdqu (%0,%3,1),%%xmm6 + "movdqu 0x00(%0,%3,1),%%xmm6 \n" "movhlps %%xmm0,%%xmm1 \n" "movhlps %%xmm6,%%xmm7 \n" "punpcklbw %%xmm5,%%xmm0 \n" @@ -754,7 +754,7 @@ void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr, "punpcklbw %%xmm5,%%xmm7 \n" "paddusw %%xmm6,%%xmm0 \n" "paddusw %%xmm7,%%xmm1 \n" - MEMOPREG(movdqu,0x00,0,3,2,xmm6) // movdqu (%0,%3,2),%%xmm6 + "movdqu 0x00(%0,%3,2),%%xmm6 \n" "lea 0x10(%0),%0 \n" "movhlps %%xmm6,%%xmm7 \n" "punpcklbw %%xmm5,%%xmm6 \n" @@ -1042,8 +1042,8 @@ void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb, "1: \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x10(%0),%%xmm1 \n" - MEMOPREG(movdqu,0x00,0,3,1,xmm2) // movdqu (%0,%3,1),%%xmm2 - MEMOPREG(movdqu,0x10,0,3,1,xmm3) // movdqu 0x10(%0,%3,1),%%xmm3 + "movdqu 0x00(%0,%3,1),%%xmm2 \n" + "movdqu 0x10(%0,%3,1),%%xmm3 \n" "lea 0x20(%0),%0 \n" "pavgb %%xmm2,%%xmm0 \n" "pavgb %%xmm3,%%xmm1 \n" @@ -1080,10 +1080,10 @@ void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, LABELALIGN "1: \n" "movd (%0),%%xmm0 \n" - MEMOPREG(movd,0x00,0,1,1,xmm1) // movd (%0,%1,1),%%xmm1 + "movd 0x00(%0,%1,1),%%xmm1 \n" "punpckldq %%xmm1,%%xmm0 \n" - MEMOPREG(movd,0x00,0,1,2,xmm2) // movd (%0,%1,2),%%xmm2 - MEMOPREG(movd,0x00,0,4,1,xmm3) // movd (%0,%4,1),%%xmm3 + "movd 0x00(%0,%1,2),%%xmm2 \n" + "movd 0x00(%0,%4,1),%%xmm3 \n" "lea 0x00(%0,%1,4),%0 \n" "punpckldq %%xmm3,%%xmm2 \n" "punpcklqdq %%xmm2,%%xmm0 \n" @@ -1119,14 +1119,14 @@ void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb, LABELALIGN "1: \n" "movq (%0),%%xmm0 \n" - MEMOPREG(movhps,0x00,0,1,1,xmm0) // movhps (%0,%1,1),%%xmm0 - MEMOPREG(movq,0x00,0,1,2,xmm1) // movq (%0,%1,2),%%xmm1 - MEMOPREG(movhps,0x00,0,4,1,xmm1) // movhps (%0,%4,1),%%xmm1 + "movhps 0x00(%0,%1,1),%%xmm0 \n" + "movq 0x00(%0,%1,2),%%xmm1 \n" + "movhps 0x00(%0,%4,1),%%xmm1 \n" "lea 0x00(%0,%1,4),%0 \n" "movq (%5),%%xmm2 \n" - MEMOPREG(movhps,0x00,5,1,1,xmm2) // movhps (%5,%1,1),%%xmm2 - MEMOPREG(movq,0x00,5,1,2,xmm3) // movq (%5,%1,2),%%xmm3 - MEMOPREG(movhps,0x00,5,4,1,xmm3) // movhps (%5,%4,1),%%xmm3 + "movhps 0x00(%5,%1,1),%%xmm2 \n" + "movq 0x00(%5,%1,2),%%xmm3 \n" + "movhps 0x00(%5,%4,1),%%xmm3 \n" "lea 0x00(%5,%1,4),%5 \n" "pavgb %%xmm2,%%xmm0 \n" "pavgb %%xmm3,%%xmm1 \n" @@ -1175,14 +1175,14 @@ void ScaleARGBCols_SSE2(uint8* dst_argb, LABELALIGN "40: \n" - MEMOPREG(movd,0x00,3,0,4,xmm0) // movd (%3,%0,4),%%xmm0 - MEMOPREG(movd,0x00,3,1,4,xmm1) // movd (%3,%1,4),%%xmm1 + "movd 0x00(%3,%0,4),%%xmm0 \n" + "movd 0x00(%3,%1,4),%%xmm1 \n" "pextrw $0x5,%%xmm2,%k0 \n" "pextrw $0x7,%%xmm2,%k1 \n" "paddd %%xmm3,%%xmm2 \n" "punpckldq %%xmm1,%%xmm0 \n" - MEMOPREG(movd,0x00,3,0,4,xmm1) // movd (%3,%0,4),%%xmm1 - MEMOPREG(movd,0x00,3,1,4,xmm4) // movd (%3,%1,4),%%xmm4 + "movd 0x00(%3,%0,4),%%xmm1 \n" + "movd 0x00(%3,%1,4),%%xmm4 \n" "pextrw $0x1,%%xmm2,%k0 \n" "pextrw $0x3,%%xmm2,%k1 \n" "punpckldq %%xmm4,%%xmm1 \n" @@ -1195,8 +1195,8 @@ void ScaleARGBCols_SSE2(uint8* dst_argb, "49: \n" "test $0x2,%4 \n" "je 29f \n" - MEMOPREG(movd,0x00,3,0,4,xmm0) // movd (%3,%0,4),%%xmm0 - MEMOPREG(movd,0x00,3,1,4,xmm1) // movd (%3,%1,4),%%xmm1 + "movd 0x00(%3,%0,4),%%xmm0 \n" + "movd 0x00(%3,%1,4),%%xmm1 \n" "pextrw $0x5,%%xmm2,%k0 \n" "punpckldq %%xmm1,%%xmm0 \n" "movq %%xmm0,(%2) \n" @@ -1204,7 +1204,7 @@ void ScaleARGBCols_SSE2(uint8* dst_argb, "29: \n" "test $0x1,%4 \n" "je 99f \n" - MEMOPREG(movd,0x00,3,0,4,xmm0) // movd (%3,%0,4),%%xmm0 + "movd 0x00(%3,%0,4),%%xmm0 \n" "movd %%xmm0,(%2) \n" "99: \n" : "=&a"(x0), // %0 @@ -1295,9 +1295,9 @@ void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, "2: \n" "movdqa %%xmm2,%%xmm1 \n" "paddd %%xmm3,%%xmm2 \n" - MEMOPREG(movq,0x00,1,3,4,xmm0) // movq (%1,%3,4),%%xmm0 + "movq 0x00(%1,%3,4),%%xmm0 \n" "psrlw $0x9,%%xmm1 \n" - MEMOPREG(movhps,0x00,1,4,4,xmm0) // movhps (%1,%4,4),%%xmm0 + "movhps 0x00(%1,%4,4),%%xmm0 \n" "pshufb %%xmm5,%%xmm1 \n" "pshufb %%xmm4,%%xmm0 \n" "pxor %%xmm6,%%xmm1 \n" @@ -1316,7 +1316,7 @@ void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, "add $0x1,%2 \n" "jl 99f \n" "psrlw $0x9,%%xmm2 \n" - MEMOPREG(movq,0x00,1,3,4,xmm0) // movq (%1,%3,4),%%xmm0 + "movq 0x00(%1,%3,4),%%xmm0 \n" "pshufb %%xmm5,%%xmm2 \n" "pshufb %%xmm4,%%xmm0 \n" "pxor %%xmm6,%%xmm2 \n"