vpermq for avx2 ARGB4444ToARGB, ARGB1555ToARGB and RGB565ToARGB

R=harryjin@google.com
BUG=libyuv:462

Review URL: https://webrtc-codereview.appspot.com/52759004.
This commit is contained in:
Frank Barchard 2015-07-07 17:06:04 -07:00
parent 97b35daf75
commit 715a29195b
2 changed files with 7 additions and 4 deletions

View File

@ -186,9 +186,8 @@ extern "C" {
// The following are available require VS2012. Port to GCC.
#if !defined(LIBYUV_DISABLE_X86) && defined(VISUALC_HAS_AVX2)
// Some AVX2 versions disabled. See libyuv bug 462.
// #define HAS_ARGB1555TOARGBROW_AVX2
// #define HAS_ARGB4444TOARGBROW_AVX2
#define HAS_ARGB1555TOARGBROW_AVX2
#define HAS_ARGB4444TOARGBROW_AVX2
#define HAS_ARGBTOARGB1555ROW_AVX2
#define HAS_ARGBTOARGB4444ROW_AVX2
#define HAS_ARGBTORGB565DITHERROW_AVX2
@ -204,7 +203,7 @@ extern "C" {
#define HAS_NV12TORGB565ROW_AVX2
#define HAS_NV21TOARGBROW_AVX2
#define HAS_NV21TORGB565ROW_AVX2
// #define HAS_RGB565TOARGBROW_AVX2
#define HAS_RGB565TOARGBROW_AVX2
#endif
// The following are available on all x86 platforms, but

View File

@ -608,6 +608,8 @@ void ARGB1555ToARGBRow_AVX2(const uint8* src_argb1555, uint8* dst_argb,
vpmulhuw ymm0, ymm0, ymm6 // << 6 * (256 + 8)
vpand ymm2, ymm2, ymm7
vpor ymm0, ymm0, ymm2 // AG
vpermq ymm0, ymm0, 0xd8 // mutate for unpack
vpermq ymm1, ymm1, 0xd8
vpunpckhbw ymm2, ymm1, ymm0
vpunpcklbw ymm1, ymm1, ymm0
vmovdqu [eax * 2 + edx], ymm1 // store 8 pixels of ARGB
@ -644,6 +646,8 @@ void ARGB4444ToARGBRow_AVX2(const uint8* src_argb4444, uint8* dst_argb,
vpsllw ymm1, ymm0, 4
vpor ymm2, ymm2, ymm3
vpor ymm0, ymm0, ymm1
vpermq ymm0, ymm0, 0xd8 // mutate for unpack
vpermq ymm2, ymm2, 0xd8
vpunpckhbw ymm1, ymm0, ymm2
vpunpcklbw ymm0, ymm0, ymm2
vmovdqu [eax * 2 + edx], ymm0 // store 8 pixels of ARGB