MergeUVRow_AVX2 implementation is consistent in row_win.cc and row_gcc.cc,the commit can fix memory is wrote out of bounds

Change-Id: I4b771a46fc853effc4c0fa3ae8032322a8369dc9
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/4514810
Reviewed-by: Frank Barchard <fbarchard@chromium.org>
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
shaodiwei 2023-05-09 19:11:14 +08:00 committed by Frank Barchard
parent b2ea744591
commit 4c209d264d

View File

@ -3461,17 +3461,14 @@ __declspec(naked) void MergeUVRow_AVX2(const uint8_t* src_u,
sub edx, eax
convertloop:
vmovdqu ymm0, [eax] // read 32 U's
vmovdqu ymm1, [eax + edx] // and 32 V's
lea eax, [eax + 32]
vpunpcklbw ymm2, ymm0, ymm1 // low 16 UV pairs. mutated qqword 0,2
vpunpckhbw ymm0, ymm0, ymm1 // high 16 UV pairs. mutated qqword 1,3
vextractf128 [edi], ymm2, 0 // bytes 0..15
vextractf128 [edi + 16], ymm0, 0 // bytes 16..31
vextractf128 [edi + 32], ymm2, 1 // bytes 32..47
vextractf128 [edi + 48], ymm0, 1 // bytes 47..63
lea edi, [edi + 64]
sub ecx, 32
vpmovzxbw ymm0, [eax]
vpmovzxbw ymm1, [eax + edx]
lea eax, [eax + 16]
vpsllw ymm1, ymm1, 8
vpor ymm2, ymm1, ymm0
vmovdqu [edi], ymm2
lea edi, [edi + 32]
sub ecx, 16
jg convertloop
pop edi