mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
SSSE3 version using pshufb for ARGBAttenuateRow_SSSE3
BUG=none TEST=none Review URL: https://webrtc-codereview.appspot.com/490011 git-svn-id: http://libyuv.googlecode.com/svn/trunk@243 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
8ed54222e7
commit
f2c86d01cc
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: http://code.google.com/p/libyuv/
|
URL: http://code.google.com/p/libyuv/
|
||||||
Version: 242
|
Version: 243
|
||||||
License: BSD
|
License: BSD
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
|
|
||||||
|
|||||||
@ -11,7 +11,7 @@
|
|||||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||||
#define INCLUDE_LIBYUV_VERSION_H_
|
#define INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
#define LIBYUV_VERSION 242
|
#define LIBYUV_VERSION 243
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
|
|||||||
@ -893,6 +893,13 @@ int ARGBAttenuate(const uint8* src_argb, int src_stride_argb,
|
|||||||
ARGBAttenuateRow = ARGBAttenuateRow_SSE2;
|
ARGBAttenuateRow = ARGBAttenuateRow_SSE2;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#if defined(HAS_ARGBATTENUATE_SSSE3)
|
||||||
|
if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4) &&
|
||||||
|
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
|
||||||
|
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
|
||||||
|
ARGBAttenuateRow = ARGBAttenuateRow_SSSE3;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
for (int y = 0; y < height; ++y) {
|
for (int y = 0; y < height; ++y) {
|
||||||
ARGBAttenuateRow(src_argb, dst_argb, width);
|
ARGBAttenuateRow(src_argb, dst_argb, width);
|
||||||
|
|||||||
@ -69,6 +69,11 @@ extern "C" {
|
|||||||
#define HAS_ARGBATTENUATE_SSE2
|
#define HAS_ARGBATTENUATE_SSE2
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// The following are available on Windows 32 bit
|
||||||
|
#if !defined(YUV_DISABLE_ASM) && defined(_M_IX86)
|
||||||
|
#define HAS_ARGBATTENUATE_SSSE3
|
||||||
|
#endif
|
||||||
|
|
||||||
// The following are available on Neon platforms
|
// The following are available on Neon platforms
|
||||||
#if !defined(YUV_DISABLE_ASM) && defined(__ARM_NEON__)
|
#if !defined(YUV_DISABLE_ASM) && defined(__ARM_NEON__)
|
||||||
#define HAS_MIRRORROW_NEON
|
#define HAS_MIRRORROW_NEON
|
||||||
@ -363,6 +368,7 @@ void UYVYToYRow_Any_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix);
|
|||||||
|
|
||||||
void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width);
|
void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width);
|
||||||
void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width);
|
void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width);
|
||||||
|
void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
} // extern "C"
|
} // extern "C"
|
||||||
|
|||||||
@ -2334,8 +2334,58 @@ void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) {
|
|||||||
ret
|
ret
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // HAS_ARGBATTENUATE_SSE2
|
#endif // HAS_ARGBATTENUATE_SSE2
|
||||||
|
|
||||||
|
#ifdef HAS_ARGBATTENUATE_SSSE3
|
||||||
|
// Shuffle table duplicating alpha
|
||||||
|
static const uvec8 kShuffleAlpha0 = {
|
||||||
|
3u, 3u, 3u, 3u, 3u, 3u, 128u, 128u, 7u, 7u, 7u, 7u, 7u, 7u, 128u, 128u,
|
||||||
|
};
|
||||||
|
static const uvec8 kShuffleAlpha1 = {
|
||||||
|
11u, 11u, 11u, 11u, 11u, 11u, 128u, 128u,
|
||||||
|
15u, 15u, 15u, 15u, 15u, 15u, 128u, 128u,
|
||||||
|
};
|
||||||
|
__declspec(naked) __declspec(align(16))
|
||||||
|
void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
|
||||||
|
__asm {
|
||||||
|
mov eax, [esp + 4] // src_argb0
|
||||||
|
mov edx, [esp + 8] // dst_argb
|
||||||
|
mov ecx, [esp + 12] // width
|
||||||
|
sub edx, eax
|
||||||
|
pcmpeqb xmm3, xmm3 // generate mask 0xff000000
|
||||||
|
pslld xmm3, 24
|
||||||
|
movdqa xmm4, kShuffleAlpha0
|
||||||
|
movdqa xmm5, kShuffleAlpha1
|
||||||
|
|
||||||
|
align 16
|
||||||
|
convertloop:
|
||||||
|
movdqa xmm0, [eax] // read 4 pixels
|
||||||
|
pshufb xmm0, xmm4 // isolate first 2 alphas
|
||||||
|
movdqa xmm1, [eax] // read 4 pixels
|
||||||
|
punpcklbw xmm1, xmm1 // first 2 pixel rgbs
|
||||||
|
pmulhuw xmm0, xmm1 // rgb * a
|
||||||
|
movdqa xmm1, [eax] // read 4 pixels
|
||||||
|
pshufb xmm1, xmm5 // isolate next 2 alphas
|
||||||
|
movdqa xmm2, [eax] // read 4 pixels
|
||||||
|
punpckhbw xmm2, xmm2 // next 2 pixel rgbs
|
||||||
|
pmulhuw xmm1, xmm2 // rgb * a
|
||||||
|
movdqa xmm2, [eax] // mask original alpha
|
||||||
|
pand xmm2, xmm3
|
||||||
|
psrlw xmm0, 8
|
||||||
|
psrlw xmm1, 8
|
||||||
|
packuswb xmm0, xmm1
|
||||||
|
por xmm0, xmm2 // copy original alpha
|
||||||
|
sub ecx, 4
|
||||||
|
movdqa [eax + edx], xmm0
|
||||||
|
lea eax, [eax + 16]
|
||||||
|
jg convertloop
|
||||||
|
|
||||||
|
ret
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // HAS_ARGBATTENUATE_SSSE3
|
||||||
|
|
||||||
#endif // _M_IX86
|
#endif // _M_IX86
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user