mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-08 01:36:47 +08:00
I400ToARGB_AVX2 port from SSE2 to AVX2.
BUG=403 TESTED=libyuv_unittest.exe --gtest_catch_exceptions=0 --gtest_filter=*I400ToARGB* R=brucedawson@google.com Review URL: https://webrtc-codereview.appspot.com/46569004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@1322 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
f5a7b2b48a
commit
685b92b0a6
@ -200,6 +200,7 @@ extern "C" {
|
||||
#define HAS_I422TOARGB4444ROW_AVX2
|
||||
#define HAS_I444TOARGBROW_AVX2
|
||||
#define HAS_I411TOARGBROW_AVX2
|
||||
#define HAS_I400TOARGBROW_AVX2
|
||||
// TODO(fbarchard): Port to Neon
|
||||
#define HAS_ARGBTORGB565DITHERROW_SSE2
|
||||
#define HAS_ARGBTORGB565DITHERROW_AVX2
|
||||
@ -935,9 +936,11 @@ void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int pix);
|
||||
void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int pix);
|
||||
|
||||
void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix);
|
||||
void I400ToARGBRow_AVX2(const uint8* src_y, uint8* dst_argb, int pix);
|
||||
void I400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int pix);
|
||||
void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int pix);
|
||||
void I400ToARGBRow_Any_SSE2(const uint8* src_y, uint8* dst_argb, int pix);
|
||||
void I400ToARGBRow_Any_AVX2(const uint8* src_y, uint8* dst_argb, int pix);
|
||||
void I400ToARGBRow_Any_NEON(const uint8* src_y, uint8* dst_argb, int pix);
|
||||
|
||||
void I444ToARGBRow_C(const uint8* src_y,
|
||||
|
||||
@ -349,6 +349,14 @@ int I400ToARGB(const uint8* src_y, int src_stride_y,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I400TOARGBROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
I400ToARGBRow = I400ToARGBRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
I400ToARGBRow = I400ToARGBRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I400TOARGBROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
I400ToARGBRow = I400ToARGBRow_Any_NEON;
|
||||
|
||||
@ -192,6 +192,9 @@ RGBANY(ARGBToARGB4444Row_Any_AVX2, ARGBToARGB4444Row_AVX2, ARGBToARGB4444Row_C,
|
||||
#if defined(HAS_I400TOARGBROW_SSE2)
|
||||
RGBANY(I400ToARGBRow_Any_SSE2, I400ToARGBRow_SSE2, I400ToARGBRow_C, 1, 4, 7)
|
||||
#endif
|
||||
#if defined(HAS_I400TOARGBROW_AVX2)
|
||||
RGBANY(I400ToARGBRow_Any_AVX2, I400ToARGBRow_AVX2, I400ToARGBRow_C, 1, 4, 15)
|
||||
#endif
|
||||
#if defined(HAS_YTOARGBROW_SSE2)
|
||||
RGBANY(YToARGBRow_Any_SSE2, YToARGBRow_SSE2, YToARGBRow_C, 1, 4, 7)
|
||||
#endif
|
||||
|
||||
@ -284,6 +284,38 @@ void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) {
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef HAS_I400TOARGBROW_AVX2
|
||||
// Duplicates gray value 3 times and fills in alpha opaque.
|
||||
__declspec(naked) __declspec(align(16))
|
||||
void I400ToARGBRow_AVX2(const uint8* src_y, uint8* dst_argb, int pix) {
|
||||
__asm {
|
||||
mov eax, [esp + 4] // src_y
|
||||
mov edx, [esp + 8] // dst_argb
|
||||
mov ecx, [esp + 12] // pix
|
||||
vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0xff000000
|
||||
vpslld ymm5, ymm5, 24
|
||||
|
||||
convertloop:
|
||||
vmovdqu xmm0, [eax]
|
||||
lea eax, [eax + 16]
|
||||
vpermq ymm0, ymm0, 0xd8
|
||||
vpunpcklbw ymm0, ymm0, ymm0
|
||||
vpermq ymm0, ymm0, 0xd8
|
||||
vpunpckhwd ymm1, ymm0, ymm0
|
||||
vpunpcklwd ymm0, ymm0, ymm0
|
||||
vpor ymm0, ymm0, ymm5
|
||||
vpor ymm1, ymm1, ymm5
|
||||
vmovdqu [edx], ymm0
|
||||
vmovdqu [edx + 32], ymm1
|
||||
lea edx, [edx + 64]
|
||||
sub ecx, 16
|
||||
jg convertloop
|
||||
vzeroupper
|
||||
ret
|
||||
}
|
||||
}
|
||||
#endif // HAS_I400TOARGBROW_AVX2
|
||||
|
||||
__declspec(naked) __declspec(align(16))
|
||||
void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix) {
|
||||
__asm {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user