mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-10 02:36:46 +08:00
MergeUV AVX2 use vextractf128 to store results to avoid shuffling.
BUG=none TESTED=intel sde on unittests R=brucedawson@google.com Review URL: https://webrtc-codereview.appspot.com/33369004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@1178 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
147f7b70f5
commit
ef14972df0
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 1177
|
||||
Version: 1178
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 1177
|
||||
#define LIBYUV_VERSION 1178
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||
|
||||
@ -2446,8 +2446,6 @@ void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
|
||||
}
|
||||
#endif // HAS_SPLITUVROW_SSE2
|
||||
|
||||
// TODO(fbarchard): Consider vpunpcklbw, vpunpckhbw, store-low1, store-low2,
|
||||
// extract-high1, extract-high2.
|
||||
#ifdef HAS_MERGEUVROW_AVX2
|
||||
void MergeUVRow_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
|
||||
int width) {
|
||||
@ -2458,13 +2456,12 @@ void MergeUVRow_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
|
||||
"vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
|
||||
MEMOPREG(vmovdqu,0x00,0,1,1,ymm1) // vmovdqu (%0,%1,1),%%ymm1
|
||||
"lea " MEMLEA(0x20,0) ",%0 \n"
|
||||
|
||||
"vpunpcklbw %%ymm1,%%ymm0,%%ymm2 \n"
|
||||
"vpunpckhbw %%ymm1,%%ymm0,%%ymm0 \n"
|
||||
"vperm2i128 $0x20,%%ymm0,%%ymm2,%%ymm1 \n"
|
||||
"vperm2i128 $0x31,%%ymm0,%%ymm2,%%ymm2 \n"
|
||||
"vmovdqu %%ymm1," MEMACCESS(2) " \n"
|
||||
"vmovdqu %%ymm2," MEMACCESS2(0x20,2) " \n"
|
||||
"vextractf128 $0x0,%%ymm2," MEMACCESS(2) " \n"
|
||||
"vextractf128 $0x0,%%ymm0," MEMACCESS2(0x10,2) "\n"
|
||||
"vextractf128 $0x1,%%ymm2," MEMACCESS2(0x20,2) "\n"
|
||||
"vextractf128 $0x1,%%ymm0," MEMACCESS2(0x30,2) "\n"
|
||||
"lea " MEMLEA(0x40,2) ",%2 \n"
|
||||
"sub $0x20,%3 \n"
|
||||
"jg 1b \n"
|
||||
|
||||
@ -2686,10 +2686,10 @@ void MergeUVRow_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
|
||||
lea eax, [eax + 32]
|
||||
vpunpcklbw ymm2, ymm0, ymm1 // low 16 UV pairs. mutated qqword 0,2
|
||||
vpunpckhbw ymm0, ymm0, ymm1 // high 16 UV pairs. mutated qqword 1,3
|
||||
vperm2i128 ymm1, ymm2, ymm0, 0x20 // low 128 of ymm2 and low 128 of ymm0
|
||||
vperm2i128 ymm2, ymm2, ymm0, 0x31 // high 128 of ymm2 and high 128 of ymm0
|
||||
vmovdqu [edi], ymm1
|
||||
vmovdqu [edi + 32], ymm2
|
||||
vextractf128 [edi], ymm2, 0 // bytes 0..15
|
||||
vextractf128 [edi + 16], ymm0, 0 // bytes 16..31
|
||||
vextractf128 [edi + 32], ymm2, 1 // bytes 32..47
|
||||
vextractf128 [edi + 48], ymm0, 1 // bytes 47..63
|
||||
lea edi, [edi + 64]
|
||||
sub ecx, 32
|
||||
jg convertloop
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user