From 7344440fb22b65ec4915b6084963816404edc088 Mon Sep 17 00:00:00 2001 From: "fbarchard@google.com" Date: Thu, 9 Aug 2012 17:33:29 +0000 Subject: [PATCH] AffineRow for GCC. BUG=62 TEST=planar_unittest Review URL: https://webrtc-codereview.appspot.com/733004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@317 16f28f9a-4ce2-e073-06de-1de4eb20be90 --- DEPS | 2 +- README.chromium | 2 +- include/libyuv/planar_functions.h | 8 +++- include/libyuv/version.h | 2 +- source/row.h | 2 +- source/row_posix.cc | 76 +++++++++++++++++++++++++++++++ 6 files changed, 87 insertions(+), 5 deletions(-) diff --git a/DEPS b/DEPS index 4c2263c5d..c0cfc5dfc 100644 --- a/DEPS +++ b/DEPS @@ -27,7 +27,7 @@ deps = { # Dependencies used by libjpeg-turbo # Optional jpeg decoder "trunk/third_party/libjpeg_turbo/": - Var("chromium_trunk") + "/deps/third_party/libjpeg_turbo@119959", + Var("chromium_trunk") + "/deps/third_party/libjpeg_turbo@149334", # Yasm assember required for libjpeg_turbo "trunk/third_party/yasm/": diff --git a/README.chromium b/README.chromium index 6b77771ec..20f3afdcb 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 315 +Version: 316 License: BSD License File: LICENSE diff --git a/include/libyuv/planar_functions.h b/include/libyuv/planar_functions.h index 2b2f9c307..d411966a5 100644 --- a/include/libyuv/planar_functions.h +++ b/include/libyuv/planar_functions.h @@ -228,11 +228,17 @@ int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0, uint8* dst_argb, int dst_stride_argb, int width, int height, int interpolation); +#if defined(__CLR_VER) || defined(COVERAGE_ENABLED) || \ + defined(TARGET_IPHONE_SIMULATOR) +#define YUV_DISABLE_ASM +#endif // Row functions for copying a pixels from a source with a slope to a row // of destination. Useful for scaling, rotation, mirror, texture mapping. void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride, uint8* dst_argb, const float* uv_dudv, int width); -#if defined(_MSC_VER) +// The following are available on all x86 platforms: +#if !defined(YUV_DISABLE_ASM) && \ + (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride, uint8* dst_argb, const float* uv_dudv, int width); #define HAS_ARGBAFFINEROW_SSE2 diff --git a/include/libyuv/version.h b/include/libyuv/version.h index ab79f711f..5220b1026 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 315 +#define LIBYUV_VERSION 316 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/row.h b/source/row.h index c09320751..db5e7b44b 100644 --- a/source/row.h +++ b/source/row.h @@ -79,6 +79,7 @@ extern "C" { #define HAS_COMPUTECUMULATIVESUMROW_SSE2 #define HAS_CUMULATIVESUMTOAVERAGE_SSE2 #define HAS_ARGBSHADE_SSE2 +#define HAS_ARGBAFFINEROW_SSE2 #endif // The following are Windows only: @@ -87,7 +88,6 @@ extern "C" { #define HAS_ARGBCOLORTABLEROW_X86 #define HAS_NV12TOARGBROW_SSSE3 #define HAS_NV21TOARGBROW_SSSE3 -#define HAS_ARGBAFFINEROW_SSE2 #endif // The following are disabled when SSSE3 is available: diff --git a/source/row_posix.cc b/source/row_posix.cc index 06aefb516..ee1dbc00a 100644 --- a/source/row_posix.cc +++ b/source/row_posix.cc @@ -3219,6 +3219,82 @@ void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width, } #endif // HAS_ARGBSHADE_SSE2 +#ifdef HAS_ARGBAFFINEROW_SSE2 +// Copy ARGB pixels from source image with slope to a row of destination. +void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride, + uint8* dst_argb, const float* uv_dudv, int width) { + intptr_t src_argb_stride_temp = src_argb_stride; + asm volatile ( + "movq (%3),%%xmm2 \n" + "movq 0x8(%3),%%xmm3 \n" + "shl $0x10,%1 \n" + "add $0x4,%1 \n" + "movd %1,%%xmm4 \n" + "xor %1,%1 \n" // cleanse upper bits. + "sub $0x2,%4 \n" + "jl 29f \n" + "movdqa %%xmm2,%%xmm0 \n" + "addps %%xmm3,%%xmm0 \n" + "movlhps %%xmm0,%%xmm2 \n" + "pshufd $0x0,%%xmm4,%%xmm4 \n" + "movlhps %%xmm3,%%xmm3 \n" + "addps %%xmm3,%%xmm3 \n" + "pshufd $0x0,%%xmm4,%%xmm4 \n" + + // 2 pixel loop \n" + ".p2align 2 \n" + "20: \n" + "cvttps2dq %%xmm2,%%xmm1 \n" + "packssdw %%xmm1,%%xmm1 \n" + "pmaddwd %%xmm4,%%xmm1 \n" + "addps %%xmm3,%%xmm2 \n" + "movd %%xmm1,%1 \n" + "and $0x0fffffff,%1 \n" + "movdqa %%xmm1,%%xmm5 \n" + "pshufd $0x55,%%xmm5,%%xmm5 \n" + "movd (%0,%1,1),%%xmm0 \n" + "movd %%xmm5,%1 \n" + "and $0x0fffffff,%1 \n" + "movd (%0,%1,1),%%xmm5 \n" + "punpckldq %%xmm5,%%xmm0 \n" + "sub $0x2,%4 \n" + "movq %%xmm0,(%2) \n" + "lea 0x8(%2),%2 \n" + "jge 20b \n" + + "29: \n" + "add $0x1,%4 \n" + "jl 19f \n" + + // 1 pixel loop \n" + ".p2align 2 \n" + "10: \n" + "cvttps2dq %%xmm2,%%xmm1 \n" + "packssdw %%xmm1,%%xmm1 \n" + "pmaddwd %%xmm4,%%xmm1 \n" + "addps %%xmm3,%%xmm2 \n" + "movd %%xmm1,%1 \n" + "and $0x0fffffff,%1 \n" + "movd (%0,%1,1),%%xmm0 \n" + "sub $0x1,%4 \n" + "movd %%xmm0,(%2) \n" + "lea 0x4(%2),%2 \n" + "jge 10b \n" + "19: \n" + : "+r"(src_argb), // %0 + "+r"(src_argb_stride_temp), // %1 + "+r"(dst_argb), // %2 + "+r"(uv_dudv), // %3 + "+rm"(width) // %4 + : + : "memory", "cc" +#if defined(__SSE2__) + , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" +#endif + ); +} +#endif // HAS_ARGBAFFINEROW_SSE2 + #endif // defined(__x86_64__) || defined(__i386__) #ifdef __cplusplus