From 864f828a0167bde25b0d24d6b865aa514919fcc9 Mon Sep 17 00:00:00 2001 From: "fbarchard@google.com" Date: Wed, 8 Aug 2012 18:10:15 +0000 Subject: [PATCH] ARGBAffineRow_SSE2 function to copy pixels from a source with slope to a row of destination. BUG=60 TEST=none Review URL: https://webrtc-codereview.appspot.com/727004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@313 16f28f9a-4ce2-e073-06de-1de4eb20be90 --- README.chromium | 2 +- include/libyuv/version.h | 2 +- source/row.h | 3 ++ source/row_win.cc | 72 +++++++++++++++++++++++++++++++++++++++- 4 files changed, 76 insertions(+), 3 deletions(-) diff --git a/README.chromium b/README.chromium index f0763709f..863fc49e8 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 312 +Version: 313 License: BSD License File: LICENSE diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 499d6aa5c..7705637fd 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 312 +#define LIBYUV_VERSION 313 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/row.h b/source/row.h index 6c4a0f959..e7fe86fd8 100644 --- a/source/row.h +++ b/source/row.h @@ -87,6 +87,7 @@ extern "C" { #define HAS_ARGBCOLORTABLEROW_X86 #define HAS_NV12TOARGBROW_SSSE3 #define HAS_NV21TOARGBROW_SSSE3 +#define HAS_ARGBAFFINEROW_SSE2 #endif // The following are disabled when SSSE3 is available: @@ -522,6 +523,8 @@ void ARGBShadeRow_C(const uint8* src_argb, uint8* dst_argb, int width, uint32 value); void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width, uint32 value); +void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride, + uint8* dst_argb, const float* uv_dudv, int width); #ifdef __cplusplus } // extern "C" diff --git a/source/row_win.cc b/source/row_win.cc index 9135b8071..2a0a97c0d 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -3347,8 +3347,78 @@ void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width, } #endif // HAS_ARGBSHADE_SSE2 -#endif // _M_IX86 +#ifdef HAS_ARGBAFFINEROW_SSE2 +// Copy ARGB pixels from source image with slope to a row of destination. +__declspec(naked) __declspec(align(16)) +void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride, + uint8* dst_argb, const float* uv_dudv, int width) { + __asm { + push esi + mov eax, [esp + 8] // src_argb + mov esi, [esp + 12] // stride + mov edx, [esp + 16] // dst_argb + mov ecx, [esp + 20] // pointer to uv_dudv + movq xmm2, qword ptr [ecx] // uv + movq xmm3, qword ptr [ecx + 8] // dudv + mov ecx, [esp + 24] // width + shl esi, 16 // 4, stride + add esi, 4 + movd xmm4, esi + sub ecx, 2 + jl l2b + movdqa xmm0, xmm2 // x0, y0, x1, y1 + addps xmm0, xmm3 + movlhps xmm2, xmm0 + pshufd xmm4, xmm4, 0 // dup 4, stride + movlhps xmm3, xmm3 // dudv + addps xmm3, xmm3 // dudv *= 2 + pshufd xmm4, xmm4, 0 + + // 2 pixel loop + align 4 + l2: + cvttps2dq xmm1, xmm2 // x, y float to int + packssdw xmm1, xmm1 // x, y as shorts + pmaddwd xmm1, xmm4 // offset = x * 4 + y * stride + addps xmm2, xmm3 // x, y += dx, dy + movd esi, xmm1 + movdqa xmm5, xmm1 + pshufd xmm5, xmm5, 0x55 + movd xmm0, [eax + esi] // read pixel 0 + movd esi, xmm5 + movd xmm5, [eax + esi] // read pixel 1 + punpckldq xmm0, xmm5 + sub ecx, 2 + movq qword ptr [edx], xmm0 + lea edx, [edx + 8] + jge l2 + + l2b: + add ecx, 2 - 1 + jl l1b + + // 1 pixel loop + align 4 + l1: + cvttps2dq xmm1, xmm2 // x, y float to int + packssdw xmm1, xmm1 // x, y as shorts + pmaddwd xmm1, xmm4 // offset = x * 4 + y * stride + addps xmm2, xmm3 // x, y += dx, dy + movd esi, xmm1 + movd xmm0, [eax + esi] // copy a pixel + sub ecx, 1 + movd [edx], xmm0 + lea edx, [edx + 4] + jge l1 + l1b: + pop esi + ret + } +} +#endif // HAS_ARGBAFFINEROW_SSE2 + +#endif // _M_IX86 #ifdef __cplusplus } // extern "C"