mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
ARGBToI444_SSSE3 UV function ported. Thanks to changjun.yang@intel.com
BUG=148 TESTED=out\release\libyuv_unittest --gtest_filter=*ARGBToI* | grep ms Review URL: https://webrtc-codereview.appspot.com/1019011 git-svn-id: http://libyuv.googlecode.com/svn/trunk@539 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
ba45bbff34
commit
41e972ec31
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: http://code.google.com/p/libyuv/
|
URL: http://code.google.com/p/libyuv/
|
||||||
Version: 538
|
Version: 539
|
||||||
License: BSD
|
License: BSD
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
|
|
||||||
|
|||||||
@ -119,6 +119,7 @@ extern "C" {
|
|||||||
#if !defined(YUV_DISABLE_ASM) && defined(_M_IX86)
|
#if !defined(YUV_DISABLE_ASM) && defined(_M_IX86)
|
||||||
#define HAS_ARGBCOLORTABLEROW_X86
|
#define HAS_ARGBCOLORTABLEROW_X86
|
||||||
#define HAS_ARGBMULTIPLYROW_SSE2
|
#define HAS_ARGBMULTIPLYROW_SSE2
|
||||||
|
#define HAS_ARGBTOUV444ROW_SSSE3
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// The following are Yasm x86 only.
|
// The following are Yasm x86 only.
|
||||||
@ -492,6 +493,13 @@ void ARGB1555ToUVRow_C(const uint8* src_argb1555, int src_stride_argb1555,
|
|||||||
void ARGB4444ToUVRow_C(const uint8* src_argb4444, int src_stride_argb4444,
|
void ARGB4444ToUVRow_C(const uint8* src_argb4444, int src_stride_argb4444,
|
||||||
uint8* dst_u, uint8* dst_v, int width);
|
uint8* dst_u, uint8* dst_v, int width);
|
||||||
|
|
||||||
|
void ARGBToUV444Row_SSSE3(const uint8* src_argb,
|
||||||
|
uint8* dst_u, uint8* dst_v, int width);
|
||||||
|
void ARGBToUV444Row_Unaligned_SSSE3(const uint8* src_argb,
|
||||||
|
uint8* dst_u, uint8* dst_v, int width);
|
||||||
|
void ARGBToUV444Row_Any_SSSE3(const uint8* src_argb,
|
||||||
|
uint8* dst_u, uint8* dst_v, int width);
|
||||||
|
|
||||||
void ARGBToUV422Row_SSSE3(const uint8* src_argb,
|
void ARGBToUV422Row_SSSE3(const uint8* src_argb,
|
||||||
uint8* dst_u, uint8* dst_v, int width);
|
uint8* dst_u, uint8* dst_v, int width);
|
||||||
void ARGBToUV422Row_Unaligned_SSSE3(const uint8* src_argb,
|
void ARGBToUV422Row_Unaligned_SSSE3(const uint8* src_argb,
|
||||||
|
|||||||
@ -11,6 +11,6 @@
|
|||||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||||
#define INCLUDE_LIBYUV_VERSION_H_
|
#define INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
#define LIBYUV_VERSION 538
|
#define LIBYUV_VERSION 539
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@ -287,6 +287,23 @@ UVANY(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, UYVYToUVRow_C, 2)
|
|||||||
#endif
|
#endif
|
||||||
#undef UVANY
|
#undef UVANY
|
||||||
|
|
||||||
|
#define UV444ANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, BPP, MASK) \
|
||||||
|
void NAMEANY(const uint8* src_uv, \
|
||||||
|
uint8* dst_u, uint8* dst_v, int width) { \
|
||||||
|
int n = width & ~MASK; \
|
||||||
|
ANYTOUV_SIMD(src_uv, dst_u, dst_v, n); \
|
||||||
|
ANYTOUV_C(src_uv + n * BPP, \
|
||||||
|
dst_u + n, \
|
||||||
|
dst_v + n, \
|
||||||
|
width & MASK); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef HAS_ARGBTOUV444ROW_SSSE3
|
||||||
|
UV444ANY(ARGBToUV444Row_Any_SSSE3, ARGBToUV444Row_Unaligned_SSSE3,
|
||||||
|
ARGBToUV444Row_C, 4, 15)
|
||||||
|
#endif
|
||||||
|
#undef UV444ANY
|
||||||
|
|
||||||
#define UV422ANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, BPP, MASK, SHIFT) \
|
#define UV422ANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, BPP, MASK, SHIFT) \
|
||||||
void NAMEANY(const uint8* src_uv, \
|
void NAMEANY(const uint8* src_uv, \
|
||||||
uint8* dst_u, uint8* dst_v, int width) { \
|
uint8* dst_u, uint8* dst_v, int width) { \
|
||||||
|
|||||||
@ -1101,6 +1101,122 @@ __asm {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__declspec(naked) __declspec(align(16))
|
||||||
|
void ARGBToUV444Row_SSSE3(const uint8* src_argb0,
|
||||||
|
uint8* dst_u, uint8* dst_v, int width) {
|
||||||
|
__asm {
|
||||||
|
push edi
|
||||||
|
mov eax, [esp + 4 + 4] // src_argb
|
||||||
|
mov edx, [esp + 4 + 8] // dst_u
|
||||||
|
mov edi, [esp + 4 + 12] // dst_v
|
||||||
|
mov ecx, [esp + 4 + 16] // pix
|
||||||
|
movdqa xmm7, kARGBToU
|
||||||
|
movdqa xmm6, kARGBToV
|
||||||
|
movdqa xmm5, kAddUV128
|
||||||
|
sub edi, edx // stride from u to v
|
||||||
|
|
||||||
|
align 16
|
||||||
|
convertloop:
|
||||||
|
/* convert to U and V */
|
||||||
|
movdqa xmm0, [eax] // U
|
||||||
|
movdqa xmm1, [eax + 16]
|
||||||
|
movdqa xmm2, [eax + 32]
|
||||||
|
movdqa xmm3, [eax + 48]
|
||||||
|
pmaddubsw xmm0, xmm7
|
||||||
|
pmaddubsw xmm1, xmm7
|
||||||
|
pmaddubsw xmm2, xmm7
|
||||||
|
pmaddubsw xmm3, xmm7
|
||||||
|
phaddw xmm0, xmm1
|
||||||
|
phaddw xmm2, xmm3
|
||||||
|
psrlw xmm0, 8
|
||||||
|
psrlw xmm2, 8
|
||||||
|
packuswb xmm0, xmm2
|
||||||
|
paddb xmm0, xmm5
|
||||||
|
sub ecx, 16
|
||||||
|
movdqa [edx], xmm0
|
||||||
|
|
||||||
|
movdqa xmm0, [eax] // V
|
||||||
|
movdqa xmm1, [eax + 16]
|
||||||
|
movdqa xmm2, [eax + 32]
|
||||||
|
movdqa xmm3, [eax + 48]
|
||||||
|
pmaddubsw xmm0, xmm6
|
||||||
|
pmaddubsw xmm1, xmm6
|
||||||
|
pmaddubsw xmm2, xmm6
|
||||||
|
pmaddubsw xmm3, xmm6
|
||||||
|
phaddw xmm0, xmm1
|
||||||
|
phaddw xmm2, xmm3
|
||||||
|
psrlw xmm0, 8
|
||||||
|
psrlw xmm2, 8
|
||||||
|
packuswb xmm0, xmm2
|
||||||
|
paddb xmm0, xmm5
|
||||||
|
lea eax, [eax + 64]
|
||||||
|
movdqa [edx + edi], xmm0
|
||||||
|
lea edx, [edx + 16]
|
||||||
|
jg convertloop
|
||||||
|
|
||||||
|
pop edi
|
||||||
|
ret
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__declspec(naked) __declspec(align(16))
|
||||||
|
void ARGBToUV444Row_Unaligned_SSSE3(const uint8* src_argb0,
|
||||||
|
uint8* dst_u, uint8* dst_v, int width) {
|
||||||
|
__asm {
|
||||||
|
push edi
|
||||||
|
mov eax, [esp + 4 + 4] // src_argb
|
||||||
|
mov edx, [esp + 4 + 8] // dst_u
|
||||||
|
mov edi, [esp + 4 + 12] // dst_v
|
||||||
|
mov ecx, [esp + 4 + 16] // pix
|
||||||
|
movdqa xmm7, kARGBToU
|
||||||
|
movdqa xmm6, kARGBToV
|
||||||
|
movdqa xmm5, kAddUV128
|
||||||
|
sub edi, edx // stride from u to v
|
||||||
|
|
||||||
|
align 16
|
||||||
|
convertloop:
|
||||||
|
/* convert to U and V */
|
||||||
|
movdqu xmm0, [eax] // U
|
||||||
|
movdqu xmm1, [eax + 16]
|
||||||
|
movdqu xmm2, [eax + 32]
|
||||||
|
movdqu xmm3, [eax + 48]
|
||||||
|
pmaddubsw xmm0, xmm7
|
||||||
|
pmaddubsw xmm1, xmm7
|
||||||
|
pmaddubsw xmm2, xmm7
|
||||||
|
pmaddubsw xmm3, xmm7
|
||||||
|
phaddw xmm0, xmm1
|
||||||
|
phaddw xmm2, xmm3
|
||||||
|
psrlw xmm0, 8
|
||||||
|
psrlw xmm2, 8
|
||||||
|
packuswb xmm0, xmm2
|
||||||
|
paddb xmm0, xmm5
|
||||||
|
sub ecx, 16
|
||||||
|
movdqu [edx], xmm0
|
||||||
|
|
||||||
|
movdqu xmm0, [eax] // V
|
||||||
|
movdqu xmm1, [eax + 16]
|
||||||
|
movdqu xmm2, [eax + 32]
|
||||||
|
movdqu xmm3, [eax + 48]
|
||||||
|
pmaddubsw xmm0, xmm6
|
||||||
|
pmaddubsw xmm1, xmm6
|
||||||
|
pmaddubsw xmm2, xmm6
|
||||||
|
pmaddubsw xmm3, xmm6
|
||||||
|
phaddw xmm0, xmm1
|
||||||
|
phaddw xmm2, xmm3
|
||||||
|
psrlw xmm0, 8
|
||||||
|
psrlw xmm2, 8
|
||||||
|
packuswb xmm0, xmm2
|
||||||
|
paddb xmm0, xmm5
|
||||||
|
lea eax, [eax + 64]
|
||||||
|
movdqu [edx + edi], xmm0
|
||||||
|
lea edx, [edx + 16]
|
||||||
|
jg convertloop
|
||||||
|
|
||||||
|
pop edi
|
||||||
|
ret
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
__declspec(naked) __declspec(align(16))
|
__declspec(naked) __declspec(align(16))
|
||||||
void ARGBToUV422Row_SSSE3(const uint8* src_argb0,
|
void ARGBToUV422Row_SSSE3(const uint8* src_argb0,
|
||||||
uint8* dst_u, uint8* dst_v, int width) {
|
uint8* dst_u, uint8* dst_v, int width) {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user