mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-12 06:29:57 +08:00
AffineRow for GCC.
BUG=62 TEST=planar_unittest Review URL: https://webrtc-codereview.appspot.com/733004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@317 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
2a95465795
commit
7344440fb2
2
DEPS
2
DEPS
@ -27,7 +27,7 @@ deps = {
|
||||
# Dependencies used by libjpeg-turbo
|
||||
# Optional jpeg decoder
|
||||
"trunk/third_party/libjpeg_turbo/":
|
||||
Var("chromium_trunk") + "/deps/third_party/libjpeg_turbo@119959",
|
||||
Var("chromium_trunk") + "/deps/third_party/libjpeg_turbo@149334",
|
||||
|
||||
# Yasm assember required for libjpeg_turbo
|
||||
"trunk/third_party/yasm/":
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 315
|
||||
Version: 316
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -228,11 +228,17 @@ int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height, int interpolation);
|
||||
|
||||
#if defined(__CLR_VER) || defined(COVERAGE_ENABLED) || \
|
||||
defined(TARGET_IPHONE_SIMULATOR)
|
||||
#define YUV_DISABLE_ASM
|
||||
#endif
|
||||
// Row functions for copying a pixels from a source with a slope to a row
|
||||
// of destination. Useful for scaling, rotation, mirror, texture mapping.
|
||||
void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride,
|
||||
uint8* dst_argb, const float* uv_dudv, int width);
|
||||
#if defined(_MSC_VER)
|
||||
// The following are available on all x86 platforms:
|
||||
#if !defined(YUV_DISABLE_ASM) && \
|
||||
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
|
||||
void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
|
||||
uint8* dst_argb, const float* uv_dudv, int width);
|
||||
#define HAS_ARGBAFFINEROW_SSE2
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 315
|
||||
#define LIBYUV_VERSION 316
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||
|
||||
@ -79,6 +79,7 @@ extern "C" {
|
||||
#define HAS_COMPUTECUMULATIVESUMROW_SSE2
|
||||
#define HAS_CUMULATIVESUMTOAVERAGE_SSE2
|
||||
#define HAS_ARGBSHADE_SSE2
|
||||
#define HAS_ARGBAFFINEROW_SSE2
|
||||
#endif
|
||||
|
||||
// The following are Windows only:
|
||||
@ -87,7 +88,6 @@ extern "C" {
|
||||
#define HAS_ARGBCOLORTABLEROW_X86
|
||||
#define HAS_NV12TOARGBROW_SSSE3
|
||||
#define HAS_NV21TOARGBROW_SSSE3
|
||||
#define HAS_ARGBAFFINEROW_SSE2
|
||||
#endif
|
||||
|
||||
// The following are disabled when SSSE3 is available:
|
||||
|
||||
@ -3219,6 +3219,82 @@ void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width,
|
||||
}
|
||||
#endif // HAS_ARGBSHADE_SSE2
|
||||
|
||||
#ifdef HAS_ARGBAFFINEROW_SSE2
|
||||
// Copy ARGB pixels from source image with slope to a row of destination.
|
||||
void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
|
||||
uint8* dst_argb, const float* uv_dudv, int width) {
|
||||
intptr_t src_argb_stride_temp = src_argb_stride;
|
||||
asm volatile (
|
||||
"movq (%3),%%xmm2 \n"
|
||||
"movq 0x8(%3),%%xmm3 \n"
|
||||
"shl $0x10,%1 \n"
|
||||
"add $0x4,%1 \n"
|
||||
"movd %1,%%xmm4 \n"
|
||||
"xor %1,%1 \n" // cleanse upper bits.
|
||||
"sub $0x2,%4 \n"
|
||||
"jl 29f \n"
|
||||
"movdqa %%xmm2,%%xmm0 \n"
|
||||
"addps %%xmm3,%%xmm0 \n"
|
||||
"movlhps %%xmm0,%%xmm2 \n"
|
||||
"pshufd $0x0,%%xmm4,%%xmm4 \n"
|
||||
"movlhps %%xmm3,%%xmm3 \n"
|
||||
"addps %%xmm3,%%xmm3 \n"
|
||||
"pshufd $0x0,%%xmm4,%%xmm4 \n"
|
||||
|
||||
// 2 pixel loop \n"
|
||||
".p2align 2 \n"
|
||||
"20: \n"
|
||||
"cvttps2dq %%xmm2,%%xmm1 \n"
|
||||
"packssdw %%xmm1,%%xmm1 \n"
|
||||
"pmaddwd %%xmm4,%%xmm1 \n"
|
||||
"addps %%xmm3,%%xmm2 \n"
|
||||
"movd %%xmm1,%1 \n"
|
||||
"and $0x0fffffff,%1 \n"
|
||||
"movdqa %%xmm1,%%xmm5 \n"
|
||||
"pshufd $0x55,%%xmm5,%%xmm5 \n"
|
||||
"movd (%0,%1,1),%%xmm0 \n"
|
||||
"movd %%xmm5,%1 \n"
|
||||
"and $0x0fffffff,%1 \n"
|
||||
"movd (%0,%1,1),%%xmm5 \n"
|
||||
"punpckldq %%xmm5,%%xmm0 \n"
|
||||
"sub $0x2,%4 \n"
|
||||
"movq %%xmm0,(%2) \n"
|
||||
"lea 0x8(%2),%2 \n"
|
||||
"jge 20b \n"
|
||||
|
||||
"29: \n"
|
||||
"add $0x1,%4 \n"
|
||||
"jl 19f \n"
|
||||
|
||||
// 1 pixel loop \n"
|
||||
".p2align 2 \n"
|
||||
"10: \n"
|
||||
"cvttps2dq %%xmm2,%%xmm1 \n"
|
||||
"packssdw %%xmm1,%%xmm1 \n"
|
||||
"pmaddwd %%xmm4,%%xmm1 \n"
|
||||
"addps %%xmm3,%%xmm2 \n"
|
||||
"movd %%xmm1,%1 \n"
|
||||
"and $0x0fffffff,%1 \n"
|
||||
"movd (%0,%1,1),%%xmm0 \n"
|
||||
"sub $0x1,%4 \n"
|
||||
"movd %%xmm0,(%2) \n"
|
||||
"lea 0x4(%2),%2 \n"
|
||||
"jge 10b \n"
|
||||
"19: \n"
|
||||
: "+r"(src_argb), // %0
|
||||
"+r"(src_argb_stride_temp), // %1
|
||||
"+r"(dst_argb), // %2
|
||||
"+r"(uv_dudv), // %3
|
||||
"+rm"(width) // %4
|
||||
:
|
||||
: "memory", "cc"
|
||||
#if defined(__SSE2__)
|
||||
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
|
||||
#endif
|
||||
);
|
||||
}
|
||||
#endif // HAS_ARGBAFFINEROW_SSE2
|
||||
|
||||
#endif // defined(__x86_64__) || defined(__i386__)
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user