From d70293993fac8161bd48d86d74cd93ad6cad65a0 Mon Sep 17 00:00:00 2001 From: Frank Barchard Date: Tue, 6 Oct 2015 16:54:26 -0700 Subject: [PATCH] port scale box filter sse2 to gcc TBR=harryjin@google.com BUG=libyuv:492 Review URL: https://codereview.chromium.org/1393653002 . --- README.chromium | 2 +- include/libyuv/scale_row.h | 6 +----- include/libyuv/version.h | 2 +- source/scale_gcc.cc | 42 ++++++++++++-------------------------- 4 files changed, 16 insertions(+), 36 deletions(-) diff --git a/README.chromium b/README.chromium index ff3f16627..54997b24a 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1499 +Version: 1500 License: BSD License File: LICENSE diff --git a/include/libyuv/scale_row.h b/include/libyuv/scale_row.h index e7594841b..30f3cdc57 100644 --- a/include/libyuv/scale_row.h +++ b/include/libyuv/scale_row.h @@ -53,6 +53,7 @@ extern "C" { #define HAS_SCALEROWDOWN34_SSSE3 #define HAS_SCALEROWDOWN38_SSSE3 #define HAS_SCALEROWDOWN4_SSE2 +#define HAS_SCALEADDROW_SSE2 #endif // The following are available for Visual C and clangcl 32 bit: @@ -63,11 +64,6 @@ extern "C" { #define HAS_SCALEROWDOWN4_AVX2 #endif -// The following are available on Visual C: -#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) -#define HAS_SCALEADDROW_SSE2 -#endif - // The following are available on Neon platforms: #if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \ (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__)) diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 85fefc356..7a6c8fd72 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1499 +#define LIBYUV_VERSION 1500 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/scale_gcc.cc b/source/scale_gcc.cc index 43b68fa0a..47437b83e 100644 --- a/source/scale_gcc.cc +++ b/source/scale_gcc.cc @@ -575,47 +575,31 @@ void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr, } // Reads 16xN bytes and produces 16 shorts at a time. -void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, - uint16* dst_ptr, int src_width, int src_height) { - int tmp_height = 0; - intptr_t tmp_src = 0; +void ScaleAddRow_SSE2(const uint8* src_ptr, uint16* dst_ptr, int src_width) { asm volatile ( - "mov %0,%3 \n" // row pointer - "mov %5,%2 \n" // height - "pxor %%xmm0,%%xmm0 \n" // clear accumulators - "pxor %%xmm1,%%xmm1 \n" - "pxor %%xmm4,%%xmm4 \n" + "pxor %%xmm5,%%xmm5 \n" LABELALIGN "1: \n" - "movdqu " MEMACCESS(3) ",%%xmm2 \n" - "add %6,%3 \n" - "movdqa %%xmm2,%%xmm3 \n" - "punpcklbw %%xmm4,%%xmm2 \n" - "punpckhbw %%xmm4,%%xmm3 \n" + "movdqu " MEMACCESS(0) ",%%xmm3 \n" + "lea " MEMLEA(0x10,0) ",%0 \n" // src_ptr += 16 + "movdqu " MEMACCESS(1) ",%%xmm0 \n" + "movdqu " MEMACCESS2(0x10,1) ",%%xmm1 \n" + "movdqa %%xmm3,%%xmm2 \n" + "punpcklbw %%xmm5,%%xmm2 \n" + "punpckhbw %%xmm5,%%xmm3 \n" "paddusw %%xmm2,%%xmm0 \n" "paddusw %%xmm3,%%xmm1 \n" - "sub $0x1,%2 \n" - "jg 1b \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n" "lea " MEMLEA(0x20,1) ",%1 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" // src_ptr += 16 - "mov %0,%3 \n" // row pointer - "mov %5,%2 \n" // height - "pxor %%xmm0,%%xmm0 \n" // clear accumulators - "pxor %%xmm1,%%xmm1 \n" - "sub $0x10,%4 \n" + "sub $0x10,%2 \n" "jg 1b \n" : "+r"(src_ptr), // %0 "+r"(dst_ptr), // %1 - "+r"(tmp_height), // %2 - "+r"(tmp_src), // %3 - "+r"(src_width), // %4 - "+rm"(src_height) // %5 - : "rm"((intptr_t)(src_stride)) // %6 - : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4" + "+r"(src_width) // %2 + : + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" ); }