mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2026-02-16 23:29:52 +08:00
RGBAToI420_SSSE3 and I420ToRGBA_SSSE3 implemented.
BUG=78 TESTED=gcl lint Review URL: https://webrtc-codereview.appspot.com/796009 git-svn-id: http://libyuv.googlecode.com/svn/trunk@359 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
13f3894033
commit
25dc05858e
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: http://code.google.com/p/libyuv/
|
URL: http://code.google.com/p/libyuv/
|
||||||
Version: 358
|
Version: 359
|
||||||
License: BSD
|
License: BSD
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
|
|
||||||
|
|||||||
@ -11,6 +11,6 @@
|
|||||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||||
#define INCLUDE_LIBYUV_VERSION_H_
|
#define INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
#define LIBYUV_VERSION 358
|
#define LIBYUV_VERSION 359
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||||
|
|||||||
@ -37,9 +37,6 @@ extern "C" {
|
|||||||
#define HAS_ABGRTOARGBROW_SSSE3
|
#define HAS_ABGRTOARGBROW_SSSE3
|
||||||
#define HAS_ABGRTOUVROW_SSSE3
|
#define HAS_ABGRTOUVROW_SSSE3
|
||||||
#define HAS_ABGRTOYROW_SSSE3
|
#define HAS_ABGRTOYROW_SSSE3
|
||||||
// TODO(FBARCHARD): #define HAS_RGBATOARGBROW_SSSE3
|
|
||||||
// TODO(FBARCHARD): #define HAS_RGBATOUVROW_SSSE3
|
|
||||||
// TODO(FBARCHARD): #define HAS_RGBATOYROW_SSSE3
|
|
||||||
#define HAS_ARGBTORGBAROW_SSSE3
|
#define HAS_ARGBTORGBAROW_SSSE3
|
||||||
#define HAS_ARGB1555TOARGBROW_SSE2
|
#define HAS_ARGB1555TOARGBROW_SSE2
|
||||||
#define HAS_ARGB4444TOARGBROW_SSE2
|
#define HAS_ARGB4444TOARGBROW_SSE2
|
||||||
@ -61,7 +58,6 @@ extern "C" {
|
|||||||
#define HAS_I422TOARGBROW_SSSE3
|
#define HAS_I422TOARGBROW_SSSE3
|
||||||
#define HAS_I422TOBGRAROW_SSSE3
|
#define HAS_I422TOBGRAROW_SSSE3
|
||||||
#define HAS_I422TOABGRROW_SSSE3
|
#define HAS_I422TOABGRROW_SSSE3
|
||||||
// TODO(FBARCHARD): #define HAS_I422TORGBAROW_SSSE3
|
|
||||||
#define HAS_I444TOARGBROW_SSSE3
|
#define HAS_I444TOARGBROW_SSSE3
|
||||||
#define HAS_I411TOARGBROW_SSSE3
|
#define HAS_I411TOARGBROW_SSSE3
|
||||||
#define HAS_I400TOARGBROW_SSE2
|
#define HAS_I400TOARGBROW_SSE2
|
||||||
@ -91,10 +87,13 @@ extern "C" {
|
|||||||
|
|
||||||
// The following are Windows only:
|
// The following are Windows only:
|
||||||
#if !defined(YUV_DISABLE_ASM) && defined(_M_IX86)
|
#if !defined(YUV_DISABLE_ASM) && defined(_M_IX86)
|
||||||
// TODO(fbarchard): Investigate possible issue in this function and reenable.
|
|
||||||
#define HAS_ARGBCOLORTABLEROW_X86
|
#define HAS_ARGBCOLORTABLEROW_X86
|
||||||
#define HAS_NV12TOARGBROW_SSSE3
|
#define HAS_NV12TOARGBROW_SSSE3
|
||||||
#define HAS_NV21TOARGBROW_SSSE3
|
#define HAS_NV21TOARGBROW_SSSE3
|
||||||
|
#define HAS_I422TORGBAROW_SSSE3
|
||||||
|
#define HAS_RGBATOARGBROW_SSSE3
|
||||||
|
#define HAS_RGBATOUVROW_SSSE3
|
||||||
|
#define HAS_RGBATOYROW_SSSE3
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// The following are disabled when SSSE3 is available:
|
// The following are disabled when SSSE3 is available:
|
||||||
|
|||||||
@ -941,7 +941,7 @@ void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#if defined(HAS_I422TOARGBROW_SSSE3)
|
#ifdef HAS_I422TOARGBROW_SSSE3
|
||||||
YANY(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_Unaligned_SSSE3, I444ToARGBRow_C, 0)
|
YANY(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_Unaligned_SSSE3, I444ToARGBRow_C, 0)
|
||||||
YANY(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_Unaligned_SSSE3, I422ToARGBRow_C, 1)
|
YANY(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_Unaligned_SSSE3, I422ToARGBRow_C, 1)
|
||||||
YANY(I411ToARGBRow_Any_SSSE3, I411ToARGBRow_Unaligned_SSSE3, I411ToARGBRow_C, 2)
|
YANY(I411ToARGBRow_Any_SSSE3, I411ToARGBRow_Unaligned_SSSE3, I411ToARGBRow_C, 2)
|
||||||
@ -949,13 +949,17 @@ Y2NY(NV12ToARGBRow_Any_SSSE3, NV12ToARGBRow_Unaligned_SSSE3, NV12ToARGBRow_C, 0)
|
|||||||
Y2NY(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_Unaligned_SSSE3, NV21ToARGBRow_C, 0)
|
Y2NY(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_Unaligned_SSSE3, NV21ToARGBRow_C, 0)
|
||||||
YANY(I422ToBGRARow_Any_SSSE3, I422ToBGRARow_Unaligned_SSSE3, I422ToBGRARow_C, 1)
|
YANY(I422ToBGRARow_Any_SSSE3, I422ToBGRARow_Unaligned_SSSE3, I422ToBGRARow_C, 1)
|
||||||
YANY(I422ToABGRRow_Any_SSSE3, I422ToABGRRow_Unaligned_SSSE3, I422ToABGRRow_C, 1)
|
YANY(I422ToABGRRow_Any_SSSE3, I422ToABGRRow_Unaligned_SSSE3, I422ToABGRRow_C, 1)
|
||||||
// TODO(fbarchard): NOLINT YANY(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_Unaligned_SSSE3, I422ToRGBARow_C, 1)
|
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_I422TOARGBROW_NEON)
|
#ifdef HAS_I422TORGBAROW_SSSE3
|
||||||
|
YANY(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_Unaligned_SSSE3, I422ToRGBARow_C, 1)
|
||||||
|
#endif
|
||||||
|
#ifdef HAS_I422TOARGBROW_NEON
|
||||||
YANY(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, I422ToARGBRow_C, 1)
|
YANY(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, I422ToARGBRow_C, 1)
|
||||||
YANY(I422ToBGRARow_Any_NEON, I422ToBGRARow_NEON, I422ToBGRARow_C, 1)
|
YANY(I422ToBGRARow_Any_NEON, I422ToBGRARow_NEON, I422ToBGRARow_C, 1)
|
||||||
YANY(I422ToABGRRow_Any_NEON, I422ToABGRRow_NEON, I422ToABGRRow_C, 1)
|
YANY(I422ToABGRRow_Any_NEON, I422ToABGRRow_NEON, I422ToABGRRow_C, 1)
|
||||||
// TODO(fbarchard): NOLINT YANY(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, I422ToRGBARow_C, 1)
|
#endif
|
||||||
|
#ifdef HAS_I422TORGBAROW_NEON
|
||||||
|
YANY(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, I422ToRGBARow_C, 1)
|
||||||
#endif
|
#endif
|
||||||
#undef YANY
|
#undef YANY
|
||||||
|
|
||||||
@ -987,7 +991,9 @@ RGBANY(ARGBToARGB4444Row_Any_SSE2, ARGBToARGB4444Row_SSE2, 2)
|
|||||||
YANY(ARGBToYRow_Any_SSSE3, ARGBToYRow_Unaligned_SSSE3, 4)
|
YANY(ARGBToYRow_Any_SSSE3, ARGBToYRow_Unaligned_SSSE3, 4)
|
||||||
YANY(BGRAToYRow_Any_SSSE3, BGRAToYRow_Unaligned_SSSE3, 4)
|
YANY(BGRAToYRow_Any_SSSE3, BGRAToYRow_Unaligned_SSSE3, 4)
|
||||||
YANY(ABGRToYRow_Any_SSSE3, ABGRToYRow_Unaligned_SSSE3, 4)
|
YANY(ABGRToYRow_Any_SSSE3, ABGRToYRow_Unaligned_SSSE3, 4)
|
||||||
// TODO(fbarchard): YANY(RGBAToYRow_Any_SSSE3, RGBAToYRow_Unaligned_SSSE3, 4)
|
#ifdef HAS_RGBATOYROW_SSSE3
|
||||||
|
YANY(RGBAToYRow_Any_SSSE3, RGBAToYRow_Unaligned_SSSE3, 4)
|
||||||
|
#endif
|
||||||
YANY(YUY2ToYRow_Any_SSE2, YUY2ToYRow_Unaligned_SSE2, 2)
|
YANY(YUY2ToYRow_Any_SSE2, YUY2ToYRow_Unaligned_SSE2, 2)
|
||||||
YANY(UYVYToYRow_Any_SSE2, UYVYToYRow_Unaligned_SSE2, 2)
|
YANY(UYVYToYRow_Any_SSE2, UYVYToYRow_Unaligned_SSE2, 2)
|
||||||
#undef YANY
|
#undef YANY
|
||||||
@ -1006,7 +1012,9 @@ YANY(UYVYToYRow_Any_SSE2, UYVYToYRow_Unaligned_SSE2, 2)
|
|||||||
UVANY(ARGBToUVRow_Any_SSSE3, ARGBToUVRow_Unaligned_SSSE3, ARGBToUVRow_C, 4)
|
UVANY(ARGBToUVRow_Any_SSSE3, ARGBToUVRow_Unaligned_SSSE3, ARGBToUVRow_C, 4)
|
||||||
UVANY(BGRAToUVRow_Any_SSSE3, BGRAToUVRow_Unaligned_SSSE3, BGRAToUVRow_C, 4)
|
UVANY(BGRAToUVRow_Any_SSSE3, BGRAToUVRow_Unaligned_SSSE3, BGRAToUVRow_C, 4)
|
||||||
UVANY(ABGRToUVRow_Any_SSSE3, ABGRToUVRow_Unaligned_SSSE3, ABGRToUVRow_C, 4)
|
UVANY(ABGRToUVRow_Any_SSSE3, ABGRToUVRow_Unaligned_SSSE3, ABGRToUVRow_C, 4)
|
||||||
// TODO(fbarchard): NOLINT UVANY(RGBAToUVRow_Any_SSSE3, RGBAToUVRow_Unaligned_SSSE3, RGBAToUVRow_C, 4)
|
#ifdef HAS_RGBATOYROW_SSSE3
|
||||||
|
UVANY(RGBAToUVRow_Any_SSSE3, RGBAToUVRow_Unaligned_SSSE3, RGBAToUVRow_C, 4)
|
||||||
|
#endif
|
||||||
UVANY(YUY2ToUVRow_Any_SSE2, YUY2ToUVRow_Unaligned_SSE2, YUY2ToUVRow_C, 2)
|
UVANY(YUY2ToUVRow_Any_SSE2, YUY2ToUVRow_Unaligned_SSE2, YUY2ToUVRow_C, 2)
|
||||||
UVANY(UYVYToUVRow_Any_SSE2, UYVYToUVRow_Unaligned_SSE2, UYVYToUVRow_C, 2)
|
UVANY(UYVYToUVRow_Any_SSE2, UYVYToUVRow_Unaligned_SSE2, UYVYToUVRow_C, 2)
|
||||||
#undef UVANY
|
#undef UVANY
|
||||||
|
|||||||
@ -59,6 +59,19 @@ static const vec8 kABGRToV = {
|
|||||||
112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0
|
112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Constants for RGBA.
|
||||||
|
static const vec8 kRGBAToY = {
|
||||||
|
0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33
|
||||||
|
};
|
||||||
|
|
||||||
|
static const vec8 kRGBAToU = {
|
||||||
|
0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38
|
||||||
|
};
|
||||||
|
|
||||||
|
static const vec8 kRGBAToV = {
|
||||||
|
0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112
|
||||||
|
};
|
||||||
|
|
||||||
static const uvec8 kAddY16 = {
|
static const uvec8 kAddY16 = {
|
||||||
16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u
|
16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u
|
||||||
};
|
};
|
||||||
@ -78,16 +91,16 @@ static const uvec8 kShuffleMaskRAWToARGB = {
|
|||||||
2u, 1u, 0u, 12u, 5u, 4u, 3u, 13u, 8u, 7u, 6u, 14u, 11u, 10u, 9u, 15u
|
2u, 1u, 0u, 12u, 5u, 4u, 3u, 13u, 8u, 7u, 6u, 14u, 11u, 10u, 9u, 15u
|
||||||
};
|
};
|
||||||
|
|
||||||
// Shuffle table for converting ABGR to ARGB.
|
|
||||||
static const uvec8 kShuffleMaskABGRToARGB = {
|
|
||||||
2u, 1u, 0u, 3u, 6u, 5u, 4u, 7u, 10u, 9u, 8u, 11u, 14u, 13u, 12u, 15u
|
|
||||||
};
|
|
||||||
|
|
||||||
// Shuffle table for converting BGRA to ARGB.
|
// Shuffle table for converting BGRA to ARGB.
|
||||||
static const uvec8 kShuffleMaskBGRAToARGB = {
|
static const uvec8 kShuffleMaskBGRAToARGB = {
|
||||||
3u, 2u, 1u, 0u, 7u, 6u, 5u, 4u, 11u, 10u, 9u, 8u, 15u, 14u, 13u, 12u
|
3u, 2u, 1u, 0u, 7u, 6u, 5u, 4u, 11u, 10u, 9u, 8u, 15u, 14u, 13u, 12u
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Shuffle table for converting ABGR to ARGB.
|
||||||
|
static const uvec8 kShuffleMaskABGRToARGB = {
|
||||||
|
2u, 1u, 0u, 3u, 6u, 5u, 4u, 7u, 10u, 9u, 8u, 11u, 14u, 13u, 12u, 15u
|
||||||
|
};
|
||||||
|
|
||||||
// Shuffle table for converting RGBA to ARGB.
|
// Shuffle table for converting RGBA to ARGB.
|
||||||
static const uvec8 kShuffleMaskRGBAToARGB = {
|
static const uvec8 kShuffleMaskRGBAToARGB = {
|
||||||
1u, 2u, 3u, 0u, 5u, 6u, 7u, 4u, 9u, 10u, 11u, 8u, 13u, 14u, 15u, 12u
|
1u, 2u, 3u, 0u, 5u, 6u, 7u, 4u, 9u, 10u, 11u, 8u, 13u, 14u, 15u, 12u
|
||||||
@ -137,12 +150,12 @@ void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
__declspec(naked) __declspec(align(16))
|
__declspec(naked) __declspec(align(16))
|
||||||
void ABGRToARGBRow_SSSE3(const uint8* src_abgr, uint8* dst_argb, int pix) {
|
void BGRAToARGBRow_SSSE3(const uint8* src_bgra, uint8* dst_argb, int pix) {
|
||||||
__asm {
|
__asm {
|
||||||
mov eax, [esp + 4] // src_abgr
|
mov eax, [esp + 4] // src_bgra
|
||||||
mov edx, [esp + 8] // dst_argb
|
mov edx, [esp + 8] // dst_argb
|
||||||
mov ecx, [esp + 12] // pix
|
mov ecx, [esp + 12] // pix
|
||||||
movdqa xmm5, kShuffleMaskABGRToARGB
|
movdqa xmm5, kShuffleMaskBGRAToARGB
|
||||||
sub edx, eax
|
sub edx, eax
|
||||||
|
|
||||||
align 16
|
align 16
|
||||||
@ -158,12 +171,12 @@ __asm {
|
|||||||
}
|
}
|
||||||
|
|
||||||
__declspec(naked) __declspec(align(16))
|
__declspec(naked) __declspec(align(16))
|
||||||
void BGRAToARGBRow_SSSE3(const uint8* src_bgra, uint8* dst_argb, int pix) {
|
void ABGRToARGBRow_SSSE3(const uint8* src_abgr, uint8* dst_argb, int pix) {
|
||||||
__asm {
|
__asm {
|
||||||
mov eax, [esp + 4] // src_bgra
|
mov eax, [esp + 4] // src_abgr
|
||||||
mov edx, [esp + 8] // dst_argb
|
mov edx, [esp + 8] // dst_argb
|
||||||
mov ecx, [esp + 12] // pix
|
mov ecx, [esp + 12] // pix
|
||||||
movdqa xmm5, kShuffleMaskBGRAToARGB
|
movdqa xmm5, kShuffleMaskABGRToARGB
|
||||||
sub edx, eax
|
sub edx, eax
|
||||||
|
|
||||||
align 16
|
align 16
|
||||||
@ -844,6 +857,74 @@ __asm {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__declspec(naked) __declspec(align(16))
|
||||||
|
void RGBAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
|
||||||
|
__asm {
|
||||||
|
mov eax, [esp + 4] /* src_argb */
|
||||||
|
mov edx, [esp + 8] /* dst_y */
|
||||||
|
mov ecx, [esp + 12] /* pix */
|
||||||
|
movdqa xmm5, kAddY16
|
||||||
|
movdqa xmm4, kRGBAToY
|
||||||
|
|
||||||
|
align 16
|
||||||
|
convertloop:
|
||||||
|
movdqa xmm0, [eax]
|
||||||
|
movdqa xmm1, [eax + 16]
|
||||||
|
movdqa xmm2, [eax + 32]
|
||||||
|
movdqa xmm3, [eax + 48]
|
||||||
|
pmaddubsw xmm0, xmm4
|
||||||
|
pmaddubsw xmm1, xmm4
|
||||||
|
pmaddubsw xmm2, xmm4
|
||||||
|
pmaddubsw xmm3, xmm4
|
||||||
|
lea eax, [eax + 64]
|
||||||
|
phaddw xmm0, xmm1
|
||||||
|
phaddw xmm2, xmm3
|
||||||
|
psrlw xmm0, 7
|
||||||
|
psrlw xmm2, 7
|
||||||
|
packuswb xmm0, xmm2
|
||||||
|
paddb xmm0, xmm5
|
||||||
|
movdqa [edx], xmm0
|
||||||
|
lea edx, [edx + 16]
|
||||||
|
sub ecx, 16
|
||||||
|
jg convertloop
|
||||||
|
ret
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__declspec(naked) __declspec(align(16))
|
||||||
|
void RGBAToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
|
||||||
|
__asm {
|
||||||
|
mov eax, [esp + 4] /* src_argb */
|
||||||
|
mov edx, [esp + 8] /* dst_y */
|
||||||
|
mov ecx, [esp + 12] /* pix */
|
||||||
|
movdqa xmm5, kAddY16
|
||||||
|
movdqa xmm4, kRGBAToY
|
||||||
|
|
||||||
|
align 16
|
||||||
|
convertloop:
|
||||||
|
movdqu xmm0, [eax]
|
||||||
|
movdqu xmm1, [eax + 16]
|
||||||
|
movdqu xmm2, [eax + 32]
|
||||||
|
movdqu xmm3, [eax + 48]
|
||||||
|
pmaddubsw xmm0, xmm4
|
||||||
|
pmaddubsw xmm1, xmm4
|
||||||
|
pmaddubsw xmm2, xmm4
|
||||||
|
pmaddubsw xmm3, xmm4
|
||||||
|
lea eax, [eax + 64]
|
||||||
|
phaddw xmm0, xmm1
|
||||||
|
phaddw xmm2, xmm3
|
||||||
|
psrlw xmm0, 7
|
||||||
|
psrlw xmm2, 7
|
||||||
|
packuswb xmm0, xmm2
|
||||||
|
paddb xmm0, xmm5
|
||||||
|
sub ecx, 16
|
||||||
|
movdqu [edx], xmm0
|
||||||
|
lea edx, [edx + 16]
|
||||||
|
jg convertloop
|
||||||
|
ret
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
__declspec(naked) __declspec(align(16))
|
__declspec(naked) __declspec(align(16))
|
||||||
void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
||||||
uint8* dst_u, uint8* dst_v, int width) {
|
uint8* dst_u, uint8* dst_v, int width) {
|
||||||
@ -1251,6 +1332,142 @@ __asm {
|
|||||||
ret
|
ret
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__declspec(naked) __declspec(align(16))
|
||||||
|
void RGBAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
||||||
|
uint8* dst_u, uint8* dst_v, int width) {
|
||||||
|
__asm {
|
||||||
|
push esi
|
||||||
|
push edi
|
||||||
|
mov eax, [esp + 8 + 4] // src_argb
|
||||||
|
mov esi, [esp + 8 + 8] // src_stride_argb
|
||||||
|
mov edx, [esp + 8 + 12] // dst_u
|
||||||
|
mov edi, [esp + 8 + 16] // dst_v
|
||||||
|
mov ecx, [esp + 8 + 20] // pix
|
||||||
|
movdqa xmm7, kRGBAToU
|
||||||
|
movdqa xmm6, kRGBAToV
|
||||||
|
movdqa xmm5, kAddUV128
|
||||||
|
sub edi, edx // stride from u to v
|
||||||
|
|
||||||
|
align 16
|
||||||
|
convertloop:
|
||||||
|
/* step 1 - subsample 16x2 argb pixels to 8x1 */
|
||||||
|
movdqa xmm0, [eax]
|
||||||
|
movdqa xmm1, [eax + 16]
|
||||||
|
movdqa xmm2, [eax + 32]
|
||||||
|
movdqa xmm3, [eax + 48]
|
||||||
|
pavgb xmm0, [eax + esi]
|
||||||
|
pavgb xmm1, [eax + esi + 16]
|
||||||
|
pavgb xmm2, [eax + esi + 32]
|
||||||
|
pavgb xmm3, [eax + esi + 48]
|
||||||
|
lea eax, [eax + 64]
|
||||||
|
movdqa xmm4, xmm0
|
||||||
|
shufps xmm0, xmm1, 0x88
|
||||||
|
shufps xmm4, xmm1, 0xdd
|
||||||
|
pavgb xmm0, xmm4
|
||||||
|
movdqa xmm4, xmm2
|
||||||
|
shufps xmm2, xmm3, 0x88
|
||||||
|
shufps xmm4, xmm3, 0xdd
|
||||||
|
pavgb xmm2, xmm4
|
||||||
|
|
||||||
|
// step 2 - convert to U and V
|
||||||
|
// from here down is very similar to Y code except
|
||||||
|
// instead of 16 different pixels, its 8 pixels of U and 8 of V
|
||||||
|
movdqa xmm1, xmm0
|
||||||
|
movdqa xmm3, xmm2
|
||||||
|
pmaddubsw xmm0, xmm7 // U
|
||||||
|
pmaddubsw xmm2, xmm7
|
||||||
|
pmaddubsw xmm1, xmm6 // V
|
||||||
|
pmaddubsw xmm3, xmm6
|
||||||
|
phaddw xmm0, xmm2
|
||||||
|
phaddw xmm1, xmm3
|
||||||
|
psraw xmm0, 8
|
||||||
|
psraw xmm1, 8
|
||||||
|
packsswb xmm0, xmm1
|
||||||
|
paddb xmm0, xmm5 // -> unsigned
|
||||||
|
|
||||||
|
// step 3 - store 8 U and 8 V values
|
||||||
|
sub ecx, 16
|
||||||
|
movlps qword ptr [edx], xmm0 // U
|
||||||
|
movhps qword ptr [edx + edi], xmm0 // V
|
||||||
|
lea edx, [edx + 8]
|
||||||
|
jg convertloop
|
||||||
|
|
||||||
|
pop edi
|
||||||
|
pop esi
|
||||||
|
ret
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__declspec(naked) __declspec(align(16))
|
||||||
|
void RGBAToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
||||||
|
uint8* dst_u, uint8* dst_v, int width) {
|
||||||
|
__asm {
|
||||||
|
push esi
|
||||||
|
push edi
|
||||||
|
mov eax, [esp + 8 + 4] // src_argb
|
||||||
|
mov esi, [esp + 8 + 8] // src_stride_argb
|
||||||
|
mov edx, [esp + 8 + 12] // dst_u
|
||||||
|
mov edi, [esp + 8 + 16] // dst_v
|
||||||
|
mov ecx, [esp + 8 + 20] // pix
|
||||||
|
movdqa xmm7, kRGBAToU
|
||||||
|
movdqa xmm6, kRGBAToV
|
||||||
|
movdqa xmm5, kAddUV128
|
||||||
|
sub edi, edx // stride from u to v
|
||||||
|
|
||||||
|
align 16
|
||||||
|
convertloop:
|
||||||
|
/* step 1 - subsample 16x2 argb pixels to 8x1 */
|
||||||
|
movdqu xmm0, [eax]
|
||||||
|
movdqu xmm1, [eax + 16]
|
||||||
|
movdqu xmm2, [eax + 32]
|
||||||
|
movdqu xmm3, [eax + 48]
|
||||||
|
movdqu xmm4, [eax + esi]
|
||||||
|
pavgb xmm0, xmm4
|
||||||
|
movdqu xmm4, [eax + esi + 16]
|
||||||
|
pavgb xmm1, xmm4
|
||||||
|
movdqu xmm4, [eax + esi + 32]
|
||||||
|
pavgb xmm2, xmm4
|
||||||
|
movdqu xmm4, [eax + esi + 48]
|
||||||
|
pavgb xmm3, xmm4
|
||||||
|
lea eax, [eax + 64]
|
||||||
|
movdqa xmm4, xmm0
|
||||||
|
shufps xmm0, xmm1, 0x88
|
||||||
|
shufps xmm4, xmm1, 0xdd
|
||||||
|
pavgb xmm0, xmm4
|
||||||
|
movdqa xmm4, xmm2
|
||||||
|
shufps xmm2, xmm3, 0x88
|
||||||
|
shufps xmm4, xmm3, 0xdd
|
||||||
|
pavgb xmm2, xmm4
|
||||||
|
|
||||||
|
// step 2 - convert to U and V
|
||||||
|
// from here down is very similar to Y code except
|
||||||
|
// instead of 16 different pixels, its 8 pixels of U and 8 of V
|
||||||
|
movdqa xmm1, xmm0
|
||||||
|
movdqa xmm3, xmm2
|
||||||
|
pmaddubsw xmm0, xmm7 // U
|
||||||
|
pmaddubsw xmm2, xmm7
|
||||||
|
pmaddubsw xmm1, xmm6 // V
|
||||||
|
pmaddubsw xmm3, xmm6
|
||||||
|
phaddw xmm0, xmm2
|
||||||
|
phaddw xmm1, xmm3
|
||||||
|
psraw xmm0, 8
|
||||||
|
psraw xmm1, 8
|
||||||
|
packsswb xmm0, xmm1
|
||||||
|
paddb xmm0, xmm5 // -> unsigned
|
||||||
|
|
||||||
|
// step 3 - store 8 U and 8 V values
|
||||||
|
sub ecx, 16
|
||||||
|
movlps qword ptr [edx], xmm0 // U
|
||||||
|
movhps qword ptr [edx + edi], xmm0 // V
|
||||||
|
lea edx, [edx + 8]
|
||||||
|
jg convertloop
|
||||||
|
|
||||||
|
pop edi
|
||||||
|
pop esi
|
||||||
|
ret
|
||||||
|
}
|
||||||
|
}
|
||||||
#endif // HAS_ARGBTOYROW_SSSE3
|
#endif // HAS_ARGBTOYROW_SSSE3
|
||||||
|
|
||||||
#ifdef HAS_I422TOARGBROW_SSSE3
|
#ifdef HAS_I422TOARGBROW_SSSE3
|
||||||
@ -1847,47 +2064,6 @@ void I422ToBGRARow_SSSE3(const uint8* y_buf,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__declspec(naked) __declspec(align(16))
|
|
||||||
void I422ToABGRRow_SSSE3(const uint8* y_buf,
|
|
||||||
const uint8* u_buf,
|
|
||||||
const uint8* v_buf,
|
|
||||||
uint8* abgr_buf,
|
|
||||||
int width) {
|
|
||||||
__asm {
|
|
||||||
push esi
|
|
||||||
push edi
|
|
||||||
mov eax, [esp + 8 + 4] // Y
|
|
||||||
mov esi, [esp + 8 + 8] // U
|
|
||||||
mov edi, [esp + 8 + 12] // V
|
|
||||||
mov edx, [esp + 8 + 16] // abgr
|
|
||||||
mov ecx, [esp + 8 + 20] // width
|
|
||||||
sub edi, esi
|
|
||||||
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
|
|
||||||
pxor xmm4, xmm4
|
|
||||||
|
|
||||||
align 16
|
|
||||||
convertloop:
|
|
||||||
READYUV422
|
|
||||||
YUVTORGB
|
|
||||||
|
|
||||||
// Step 3: Weave into ARGB
|
|
||||||
punpcklbw xmm2, xmm1 // RG
|
|
||||||
punpcklbw xmm0, xmm5 // BA
|
|
||||||
movdqa xmm1, xmm2
|
|
||||||
punpcklwd xmm2, xmm0 // RGBA first 4 pixels
|
|
||||||
punpckhwd xmm1, xmm0 // RGBA next 4 pixels
|
|
||||||
movdqa [edx], xmm2
|
|
||||||
movdqa [edx + 16], xmm1
|
|
||||||
lea edx, [edx + 32]
|
|
||||||
sub ecx, 8
|
|
||||||
jg convertloop
|
|
||||||
|
|
||||||
pop edi
|
|
||||||
pop esi
|
|
||||||
ret
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
__declspec(naked) __declspec(align(16))
|
__declspec(naked) __declspec(align(16))
|
||||||
void I422ToBGRARow_Unaligned_SSSE3(const uint8* y_buf,
|
void I422ToBGRARow_Unaligned_SSSE3(const uint8* y_buf,
|
||||||
const uint8* u_buf,
|
const uint8* u_buf,
|
||||||
@ -1929,6 +2105,47 @@ void I422ToBGRARow_Unaligned_SSSE3(const uint8* y_buf,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__declspec(naked) __declspec(align(16))
|
||||||
|
void I422ToABGRRow_SSSE3(const uint8* y_buf,
|
||||||
|
const uint8* u_buf,
|
||||||
|
const uint8* v_buf,
|
||||||
|
uint8* abgr_buf,
|
||||||
|
int width) {
|
||||||
|
__asm {
|
||||||
|
push esi
|
||||||
|
push edi
|
||||||
|
mov eax, [esp + 8 + 4] // Y
|
||||||
|
mov esi, [esp + 8 + 8] // U
|
||||||
|
mov edi, [esp + 8 + 12] // V
|
||||||
|
mov edx, [esp + 8 + 16] // abgr
|
||||||
|
mov ecx, [esp + 8 + 20] // width
|
||||||
|
sub edi, esi
|
||||||
|
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
|
||||||
|
pxor xmm4, xmm4
|
||||||
|
|
||||||
|
align 16
|
||||||
|
convertloop:
|
||||||
|
READYUV422
|
||||||
|
YUVTORGB
|
||||||
|
|
||||||
|
// Step 3: Weave into ARGB
|
||||||
|
punpcklbw xmm2, xmm1 // RG
|
||||||
|
punpcklbw xmm0, xmm5 // BA
|
||||||
|
movdqa xmm1, xmm2
|
||||||
|
punpcklwd xmm2, xmm0 // RGBA first 4 pixels
|
||||||
|
punpckhwd xmm1, xmm0 // RGBA next 4 pixels
|
||||||
|
movdqa [edx], xmm2
|
||||||
|
movdqa [edx + 16], xmm1
|
||||||
|
lea edx, [edx + 32]
|
||||||
|
sub ecx, 8
|
||||||
|
jg convertloop
|
||||||
|
|
||||||
|
pop edi
|
||||||
|
pop esi
|
||||||
|
ret
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
__declspec(naked) __declspec(align(16))
|
__declspec(naked) __declspec(align(16))
|
||||||
void I422ToABGRRow_Unaligned_SSSE3(const uint8* y_buf,
|
void I422ToABGRRow_Unaligned_SSSE3(const uint8* y_buf,
|
||||||
const uint8* u_buf,
|
const uint8* u_buf,
|
||||||
@ -1969,6 +2186,89 @@ void I422ToABGRRow_Unaligned_SSSE3(const uint8* y_buf,
|
|||||||
ret
|
ret
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__declspec(naked) __declspec(align(16))
|
||||||
|
void I422ToRGBARow_SSSE3(const uint8* y_buf,
|
||||||
|
const uint8* u_buf,
|
||||||
|
const uint8* v_buf,
|
||||||
|
uint8* rgba_buf,
|
||||||
|
int width) {
|
||||||
|
__asm {
|
||||||
|
push esi
|
||||||
|
push edi
|
||||||
|
mov eax, [esp + 8 + 4] // Y
|
||||||
|
mov esi, [esp + 8 + 8] // U
|
||||||
|
mov edi, [esp + 8 + 12] // V
|
||||||
|
mov edx, [esp + 8 + 16] // rgba
|
||||||
|
mov ecx, [esp + 8 + 20] // width
|
||||||
|
sub edi, esi
|
||||||
|
pxor xmm4, xmm4
|
||||||
|
|
||||||
|
align 16
|
||||||
|
convertloop:
|
||||||
|
READYUV422
|
||||||
|
YUVTORGB
|
||||||
|
|
||||||
|
// Step 3: Weave into RGBA
|
||||||
|
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
|
||||||
|
punpcklbw xmm1, xmm2 // GR
|
||||||
|
punpcklbw xmm5, xmm0 // AB
|
||||||
|
movdqa xmm0, xmm5
|
||||||
|
punpcklwd xmm5, xmm1 // RGBA first 4 pixels
|
||||||
|
punpckhwd xmm0, xmm1 // RGBA next 4 pixels
|
||||||
|
movdqa [edx], xmm5
|
||||||
|
movdqa [edx + 16], xmm0
|
||||||
|
lea edx, [edx + 32]
|
||||||
|
sub ecx, 8
|
||||||
|
jg convertloop
|
||||||
|
|
||||||
|
pop edi
|
||||||
|
pop esi
|
||||||
|
ret
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__declspec(naked) __declspec(align(16))
|
||||||
|
void I422ToRGBARow_Unaligned_SSSE3(const uint8* y_buf,
|
||||||
|
const uint8* u_buf,
|
||||||
|
const uint8* v_buf,
|
||||||
|
uint8* rgba_buf,
|
||||||
|
int width) {
|
||||||
|
__asm {
|
||||||
|
push esi
|
||||||
|
push edi
|
||||||
|
mov eax, [esp + 8 + 4] // Y
|
||||||
|
mov esi, [esp + 8 + 8] // U
|
||||||
|
mov edi, [esp + 8 + 12] // V
|
||||||
|
mov edx, [esp + 8 + 16] // rgba
|
||||||
|
mov ecx, [esp + 8 + 20] // width
|
||||||
|
sub edi, esi
|
||||||
|
pxor xmm4, xmm4
|
||||||
|
|
||||||
|
align 16
|
||||||
|
convertloop:
|
||||||
|
READYUV422
|
||||||
|
YUVTORGB
|
||||||
|
|
||||||
|
// Step 3: Weave into RGBA
|
||||||
|
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
|
||||||
|
punpcklbw xmm1, xmm2 // GR
|
||||||
|
punpcklbw xmm5, xmm0 // AB
|
||||||
|
movdqa xmm0, xmm5
|
||||||
|
punpcklwd xmm5, xmm1 // RGBA first 4 pixels
|
||||||
|
punpckhwd xmm0, xmm1 // RGBA next 4 pixels
|
||||||
|
movdqu [edx], xmm5
|
||||||
|
movdqu [edx + 16], xmm0
|
||||||
|
lea edx, [edx + 32]
|
||||||
|
sub ecx, 8
|
||||||
|
jg convertloop
|
||||||
|
|
||||||
|
pop edi
|
||||||
|
pop esi
|
||||||
|
ret
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#endif // HAS_I422TOARGBROW_SSSE3
|
#endif // HAS_I422TOARGBROW_SSSE3
|
||||||
|
|
||||||
#ifdef HAS_YTOARGBROW_SSE2
|
#ifdef HAS_YTOARGBROW_SSE2
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user