mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
ARGBAffineRow_SSE2 function to copy pixels from a source with slope to a row of destination.
BUG=60 TEST=none Review URL: https://webrtc-codereview.appspot.com/727004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@313 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
4f10e97ff2
commit
864f828a01
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: http://code.google.com/p/libyuv/
|
URL: http://code.google.com/p/libyuv/
|
||||||
Version: 312
|
Version: 313
|
||||||
License: BSD
|
License: BSD
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
|
|
||||||
|
|||||||
@ -11,6 +11,6 @@
|
|||||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||||
#define INCLUDE_LIBYUV_VERSION_H_
|
#define INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
#define LIBYUV_VERSION 312
|
#define LIBYUV_VERSION 313
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||||
|
|||||||
@ -87,6 +87,7 @@ extern "C" {
|
|||||||
#define HAS_ARGBCOLORTABLEROW_X86
|
#define HAS_ARGBCOLORTABLEROW_X86
|
||||||
#define HAS_NV12TOARGBROW_SSSE3
|
#define HAS_NV12TOARGBROW_SSSE3
|
||||||
#define HAS_NV21TOARGBROW_SSSE3
|
#define HAS_NV21TOARGBROW_SSSE3
|
||||||
|
#define HAS_ARGBAFFINEROW_SSE2
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// The following are disabled when SSSE3 is available:
|
// The following are disabled when SSSE3 is available:
|
||||||
@ -522,6 +523,8 @@ void ARGBShadeRow_C(const uint8* src_argb, uint8* dst_argb, int width,
|
|||||||
uint32 value);
|
uint32 value);
|
||||||
void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width,
|
void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width,
|
||||||
uint32 value);
|
uint32 value);
|
||||||
|
void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
|
||||||
|
uint8* dst_argb, const float* uv_dudv, int width);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
} // extern "C"
|
} // extern "C"
|
||||||
|
|||||||
@ -3347,8 +3347,78 @@ void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width,
|
|||||||
}
|
}
|
||||||
#endif // HAS_ARGBSHADE_SSE2
|
#endif // HAS_ARGBSHADE_SSE2
|
||||||
|
|
||||||
#endif // _M_IX86
|
#ifdef HAS_ARGBAFFINEROW_SSE2
|
||||||
|
// Copy ARGB pixels from source image with slope to a row of destination.
|
||||||
|
__declspec(naked) __declspec(align(16))
|
||||||
|
void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
|
||||||
|
uint8* dst_argb, const float* uv_dudv, int width) {
|
||||||
|
__asm {
|
||||||
|
push esi
|
||||||
|
mov eax, [esp + 8] // src_argb
|
||||||
|
mov esi, [esp + 12] // stride
|
||||||
|
mov edx, [esp + 16] // dst_argb
|
||||||
|
mov ecx, [esp + 20] // pointer to uv_dudv
|
||||||
|
movq xmm2, qword ptr [ecx] // uv
|
||||||
|
movq xmm3, qword ptr [ecx + 8] // dudv
|
||||||
|
mov ecx, [esp + 24] // width
|
||||||
|
shl esi, 16 // 4, stride
|
||||||
|
add esi, 4
|
||||||
|
movd xmm4, esi
|
||||||
|
sub ecx, 2
|
||||||
|
jl l2b
|
||||||
|
|
||||||
|
movdqa xmm0, xmm2 // x0, y0, x1, y1
|
||||||
|
addps xmm0, xmm3
|
||||||
|
movlhps xmm2, xmm0
|
||||||
|
pshufd xmm4, xmm4, 0 // dup 4, stride
|
||||||
|
movlhps xmm3, xmm3 // dudv
|
||||||
|
addps xmm3, xmm3 // dudv *= 2
|
||||||
|
pshufd xmm4, xmm4, 0
|
||||||
|
|
||||||
|
// 2 pixel loop
|
||||||
|
align 4
|
||||||
|
l2:
|
||||||
|
cvttps2dq xmm1, xmm2 // x, y float to int
|
||||||
|
packssdw xmm1, xmm1 // x, y as shorts
|
||||||
|
pmaddwd xmm1, xmm4 // offset = x * 4 + y * stride
|
||||||
|
addps xmm2, xmm3 // x, y += dx, dy
|
||||||
|
movd esi, xmm1
|
||||||
|
movdqa xmm5, xmm1
|
||||||
|
pshufd xmm5, xmm5, 0x55
|
||||||
|
movd xmm0, [eax + esi] // read pixel 0
|
||||||
|
movd esi, xmm5
|
||||||
|
movd xmm5, [eax + esi] // read pixel 1
|
||||||
|
punpckldq xmm0, xmm5
|
||||||
|
sub ecx, 2
|
||||||
|
movq qword ptr [edx], xmm0
|
||||||
|
lea edx, [edx + 8]
|
||||||
|
jge l2
|
||||||
|
|
||||||
|
l2b:
|
||||||
|
add ecx, 2 - 1
|
||||||
|
jl l1b
|
||||||
|
|
||||||
|
// 1 pixel loop
|
||||||
|
align 4
|
||||||
|
l1:
|
||||||
|
cvttps2dq xmm1, xmm2 // x, y float to int
|
||||||
|
packssdw xmm1, xmm1 // x, y as shorts
|
||||||
|
pmaddwd xmm1, xmm4 // offset = x * 4 + y * stride
|
||||||
|
addps xmm2, xmm3 // x, y += dx, dy
|
||||||
|
movd esi, xmm1
|
||||||
|
movd xmm0, [eax + esi] // copy a pixel
|
||||||
|
sub ecx, 1
|
||||||
|
movd [edx], xmm0
|
||||||
|
lea edx, [edx + 4]
|
||||||
|
jge l1
|
||||||
|
l1b:
|
||||||
|
pop esi
|
||||||
|
ret
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif // HAS_ARGBAFFINEROW_SSE2
|
||||||
|
|
||||||
|
#endif // _M_IX86
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
} // extern "C"
|
} // extern "C"
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user