AffineRow for GCC.

BUG=62
TEST=planar_unittest
Review URL: https://webrtc-codereview.appspot.com/733004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@317 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
fbarchard@google.com 2012-08-09 17:33:29 +00:00
parent 2a95465795
commit 7344440fb2
6 changed files with 87 additions and 5 deletions

2
DEPS
View File

@ -27,7 +27,7 @@ deps = {
# Dependencies used by libjpeg-turbo
# Optional jpeg decoder
"trunk/third_party/libjpeg_turbo/":
Var("chromium_trunk") + "/deps/third_party/libjpeg_turbo@119959",
Var("chromium_trunk") + "/deps/third_party/libjpeg_turbo@149334",
# Yasm assember required for libjpeg_turbo
"trunk/third_party/yasm/":

View File

@ -1,6 +1,6 @@
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 315
Version: 316
License: BSD
License File: LICENSE

View File

@ -228,11 +228,17 @@ int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0,
uint8* dst_argb, int dst_stride_argb,
int width, int height, int interpolation);
#if defined(__CLR_VER) || defined(COVERAGE_ENABLED) || \
defined(TARGET_IPHONE_SIMULATOR)
#define YUV_DISABLE_ASM
#endif
// Row functions for copying a pixels from a source with a slope to a row
// of destination. Useful for scaling, rotation, mirror, texture mapping.
void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride,
uint8* dst_argb, const float* uv_dudv, int width);
#if defined(_MSC_VER)
// The following are available on all x86 platforms:
#if !defined(YUV_DISABLE_ASM) && \
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
uint8* dst_argb, const float* uv_dudv, int width);
#define HAS_ARGBAFFINEROW_SSE2

View File

@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 315
#define LIBYUV_VERSION 316
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT

View File

@ -79,6 +79,7 @@ extern "C" {
#define HAS_COMPUTECUMULATIVESUMROW_SSE2
#define HAS_CUMULATIVESUMTOAVERAGE_SSE2
#define HAS_ARGBSHADE_SSE2
#define HAS_ARGBAFFINEROW_SSE2
#endif
// The following are Windows only:
@ -87,7 +88,6 @@ extern "C" {
#define HAS_ARGBCOLORTABLEROW_X86
#define HAS_NV12TOARGBROW_SSSE3
#define HAS_NV21TOARGBROW_SSSE3
#define HAS_ARGBAFFINEROW_SSE2
#endif
// The following are disabled when SSSE3 is available:

View File

@ -3219,6 +3219,82 @@ void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width,
}
#endif // HAS_ARGBSHADE_SSE2
#ifdef HAS_ARGBAFFINEROW_SSE2
// Copy ARGB pixels from source image with slope to a row of destination.
void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
uint8* dst_argb, const float* uv_dudv, int width) {
intptr_t src_argb_stride_temp = src_argb_stride;
asm volatile (
"movq (%3),%%xmm2 \n"
"movq 0x8(%3),%%xmm3 \n"
"shl $0x10,%1 \n"
"add $0x4,%1 \n"
"movd %1,%%xmm4 \n"
"xor %1,%1 \n" // cleanse upper bits.
"sub $0x2,%4 \n"
"jl 29f \n"
"movdqa %%xmm2,%%xmm0 \n"
"addps %%xmm3,%%xmm0 \n"
"movlhps %%xmm0,%%xmm2 \n"
"pshufd $0x0,%%xmm4,%%xmm4 \n"
"movlhps %%xmm3,%%xmm3 \n"
"addps %%xmm3,%%xmm3 \n"
"pshufd $0x0,%%xmm4,%%xmm4 \n"
// 2 pixel loop \n"
".p2align 2 \n"
"20: \n"
"cvttps2dq %%xmm2,%%xmm1 \n"
"packssdw %%xmm1,%%xmm1 \n"
"pmaddwd %%xmm4,%%xmm1 \n"
"addps %%xmm3,%%xmm2 \n"
"movd %%xmm1,%1 \n"
"and $0x0fffffff,%1 \n"
"movdqa %%xmm1,%%xmm5 \n"
"pshufd $0x55,%%xmm5,%%xmm5 \n"
"movd (%0,%1,1),%%xmm0 \n"
"movd %%xmm5,%1 \n"
"and $0x0fffffff,%1 \n"
"movd (%0,%1,1),%%xmm5 \n"
"punpckldq %%xmm5,%%xmm0 \n"
"sub $0x2,%4 \n"
"movq %%xmm0,(%2) \n"
"lea 0x8(%2),%2 \n"
"jge 20b \n"
"29: \n"
"add $0x1,%4 \n"
"jl 19f \n"
// 1 pixel loop \n"
".p2align 2 \n"
"10: \n"
"cvttps2dq %%xmm2,%%xmm1 \n"
"packssdw %%xmm1,%%xmm1 \n"
"pmaddwd %%xmm4,%%xmm1 \n"
"addps %%xmm3,%%xmm2 \n"
"movd %%xmm1,%1 \n"
"and $0x0fffffff,%1 \n"
"movd (%0,%1,1),%%xmm0 \n"
"sub $0x1,%4 \n"
"movd %%xmm0,(%2) \n"
"lea 0x4(%2),%2 \n"
"jge 10b \n"
"19: \n"
: "+r"(src_argb), // %0
"+r"(src_argb_stride_temp), // %1
"+r"(dst_argb), // %2
"+r"(uv_dudv), // %3
"+rm"(width) // %4
:
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
#endif
);
}
#endif // HAS_ARGBAFFINEROW_SSE2
#endif // defined(__x86_64__) || defined(__i386__)
#ifdef __cplusplus