mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 01:06:46 +08:00
ARGBToI444_SSSE3 UV function ported. Thanks to changjun.yang@intel.com
BUG=148 TESTED=out\release\libyuv_unittest --gtest_filter=*ARGBToI* | grep ms Review URL: https://webrtc-codereview.appspot.com/1019011 git-svn-id: http://libyuv.googlecode.com/svn/trunk@539 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
ba45bbff34
commit
41e972ec31
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 538
|
||||
Version: 539
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -119,6 +119,7 @@ extern "C" {
|
||||
#if !defined(YUV_DISABLE_ASM) && defined(_M_IX86)
|
||||
#define HAS_ARGBCOLORTABLEROW_X86
|
||||
#define HAS_ARGBMULTIPLYROW_SSE2
|
||||
#define HAS_ARGBTOUV444ROW_SSSE3
|
||||
#endif
|
||||
|
||||
// The following are Yasm x86 only.
|
||||
@ -492,6 +493,13 @@ void ARGB1555ToUVRow_C(const uint8* src_argb1555, int src_stride_argb1555,
|
||||
void ARGB4444ToUVRow_C(const uint8* src_argb4444, int src_stride_argb4444,
|
||||
uint8* dst_u, uint8* dst_v, int width);
|
||||
|
||||
void ARGBToUV444Row_SSSE3(const uint8* src_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width);
|
||||
void ARGBToUV444Row_Unaligned_SSSE3(const uint8* src_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width);
|
||||
void ARGBToUV444Row_Any_SSSE3(const uint8* src_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width);
|
||||
|
||||
void ARGBToUV422Row_SSSE3(const uint8* src_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width);
|
||||
void ARGBToUV422Row_Unaligned_SSSE3(const uint8* src_argb,
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 538
|
||||
#define LIBYUV_VERSION 539
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||
|
||||
@ -40,6 +40,17 @@ int ARGBToI444(const uint8* src_argb, int src_stride_argb,
|
||||
ARGBToYRow_C;
|
||||
void (*ARGBToUV444Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
|
||||
int pix) = ARGBToUV444Row_C;
|
||||
#if defined(HAS_ARGBTOUV444ROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
|
||||
ARGBToUV444Row = ARGBToUV444Row_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUV444Row = ARGBToUV444Row_Unaligned_SSSE3;
|
||||
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
|
||||
ARGBToUV444Row = ARGBToUV444Row_SSSE3;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
|
||||
ARGBToYRow = ARGBToYRow_Any_SSSE3;
|
||||
@ -51,6 +62,7 @@ int ARGBToI444(const uint8* src_argb, int src_stride_argb,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#elif defined(HAS_ARGBTOYROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
|
||||
ARGBToYRow = ARGBToYRow_Any_NEON;
|
||||
|
||||
@ -287,6 +287,23 @@ UVANY(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, UYVYToUVRow_C, 2)
|
||||
#endif
|
||||
#undef UVANY
|
||||
|
||||
#define UV444ANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, BPP, MASK) \
|
||||
void NAMEANY(const uint8* src_uv, \
|
||||
uint8* dst_u, uint8* dst_v, int width) { \
|
||||
int n = width & ~MASK; \
|
||||
ANYTOUV_SIMD(src_uv, dst_u, dst_v, n); \
|
||||
ANYTOUV_C(src_uv + n * BPP, \
|
||||
dst_u + n, \
|
||||
dst_v + n, \
|
||||
width & MASK); \
|
||||
}
|
||||
|
||||
#ifdef HAS_ARGBTOUV444ROW_SSSE3
|
||||
UV444ANY(ARGBToUV444Row_Any_SSSE3, ARGBToUV444Row_Unaligned_SSSE3,
|
||||
ARGBToUV444Row_C, 4, 15)
|
||||
#endif
|
||||
#undef UV444ANY
|
||||
|
||||
#define UV422ANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, BPP, MASK, SHIFT) \
|
||||
void NAMEANY(const uint8* src_uv, \
|
||||
uint8* dst_u, uint8* dst_v, int width) { \
|
||||
|
||||
@ -1101,6 +1101,122 @@ __asm {
|
||||
}
|
||||
}
|
||||
|
||||
__declspec(naked) __declspec(align(16))
|
||||
void ARGBToUV444Row_SSSE3(const uint8* src_argb0,
|
||||
uint8* dst_u, uint8* dst_v, int width) {
|
||||
__asm {
|
||||
push edi
|
||||
mov eax, [esp + 4 + 4] // src_argb
|
||||
mov edx, [esp + 4 + 8] // dst_u
|
||||
mov edi, [esp + 4 + 12] // dst_v
|
||||
mov ecx, [esp + 4 + 16] // pix
|
||||
movdqa xmm7, kARGBToU
|
||||
movdqa xmm6, kARGBToV
|
||||
movdqa xmm5, kAddUV128
|
||||
sub edi, edx // stride from u to v
|
||||
|
||||
align 16
|
||||
convertloop:
|
||||
/* convert to U and V */
|
||||
movdqa xmm0, [eax] // U
|
||||
movdqa xmm1, [eax + 16]
|
||||
movdqa xmm2, [eax + 32]
|
||||
movdqa xmm3, [eax + 48]
|
||||
pmaddubsw xmm0, xmm7
|
||||
pmaddubsw xmm1, xmm7
|
||||
pmaddubsw xmm2, xmm7
|
||||
pmaddubsw xmm3, xmm7
|
||||
phaddw xmm0, xmm1
|
||||
phaddw xmm2, xmm3
|
||||
psrlw xmm0, 8
|
||||
psrlw xmm2, 8
|
||||
packuswb xmm0, xmm2
|
||||
paddb xmm0, xmm5
|
||||
sub ecx, 16
|
||||
movdqa [edx], xmm0
|
||||
|
||||
movdqa xmm0, [eax] // V
|
||||
movdqa xmm1, [eax + 16]
|
||||
movdqa xmm2, [eax + 32]
|
||||
movdqa xmm3, [eax + 48]
|
||||
pmaddubsw xmm0, xmm6
|
||||
pmaddubsw xmm1, xmm6
|
||||
pmaddubsw xmm2, xmm6
|
||||
pmaddubsw xmm3, xmm6
|
||||
phaddw xmm0, xmm1
|
||||
phaddw xmm2, xmm3
|
||||
psrlw xmm0, 8
|
||||
psrlw xmm2, 8
|
||||
packuswb xmm0, xmm2
|
||||
paddb xmm0, xmm5
|
||||
lea eax, [eax + 64]
|
||||
movdqa [edx + edi], xmm0
|
||||
lea edx, [edx + 16]
|
||||
jg convertloop
|
||||
|
||||
pop edi
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
__declspec(naked) __declspec(align(16))
|
||||
void ARGBToUV444Row_Unaligned_SSSE3(const uint8* src_argb0,
|
||||
uint8* dst_u, uint8* dst_v, int width) {
|
||||
__asm {
|
||||
push edi
|
||||
mov eax, [esp + 4 + 4] // src_argb
|
||||
mov edx, [esp + 4 + 8] // dst_u
|
||||
mov edi, [esp + 4 + 12] // dst_v
|
||||
mov ecx, [esp + 4 + 16] // pix
|
||||
movdqa xmm7, kARGBToU
|
||||
movdqa xmm6, kARGBToV
|
||||
movdqa xmm5, kAddUV128
|
||||
sub edi, edx // stride from u to v
|
||||
|
||||
align 16
|
||||
convertloop:
|
||||
/* convert to U and V */
|
||||
movdqu xmm0, [eax] // U
|
||||
movdqu xmm1, [eax + 16]
|
||||
movdqu xmm2, [eax + 32]
|
||||
movdqu xmm3, [eax + 48]
|
||||
pmaddubsw xmm0, xmm7
|
||||
pmaddubsw xmm1, xmm7
|
||||
pmaddubsw xmm2, xmm7
|
||||
pmaddubsw xmm3, xmm7
|
||||
phaddw xmm0, xmm1
|
||||
phaddw xmm2, xmm3
|
||||
psrlw xmm0, 8
|
||||
psrlw xmm2, 8
|
||||
packuswb xmm0, xmm2
|
||||
paddb xmm0, xmm5
|
||||
sub ecx, 16
|
||||
movdqu [edx], xmm0
|
||||
|
||||
movdqu xmm0, [eax] // V
|
||||
movdqu xmm1, [eax + 16]
|
||||
movdqu xmm2, [eax + 32]
|
||||
movdqu xmm3, [eax + 48]
|
||||
pmaddubsw xmm0, xmm6
|
||||
pmaddubsw xmm1, xmm6
|
||||
pmaddubsw xmm2, xmm6
|
||||
pmaddubsw xmm3, xmm6
|
||||
phaddw xmm0, xmm1
|
||||
phaddw xmm2, xmm3
|
||||
psrlw xmm0, 8
|
||||
psrlw xmm2, 8
|
||||
packuswb xmm0, xmm2
|
||||
paddb xmm0, xmm5
|
||||
lea eax, [eax + 64]
|
||||
movdqu [edx + edi], xmm0
|
||||
lea edx, [edx + 16]
|
||||
jg convertloop
|
||||
|
||||
pop edi
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
__declspec(naked) __declspec(align(16))
|
||||
void ARGBToUV422Row_SSSE3(const uint8* src_argb0,
|
||||
uint8* dst_u, uint8* dst_v, int width) {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user