MergeUVRow_AVX2 implementation is consistent in row_win.cc and row_gcc.cc,the commit can fix memory is wrote out of bounds

Change-Id: I4b771a46fc853effc4c0fa3ae8032322a8369dc9
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/4514810
Reviewed-by: Frank Barchard <fbarchard@chromium.org>
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
shaodiwei 2023-05-09 19:11:14 +08:00 committed by Frank Barchard
parent b2ea744591
commit 4c209d264d

View File

@ -3461,17 +3461,14 @@ __declspec(naked) void MergeUVRow_AVX2(const uint8_t* src_u,
sub edx, eax sub edx, eax
convertloop: convertloop:
vmovdqu ymm0, [eax] // read 32 U's vpmovzxbw ymm0, [eax]
vmovdqu ymm1, [eax + edx] // and 32 V's vpmovzxbw ymm1, [eax + edx]
lea eax, [eax + 32] lea eax, [eax + 16]
vpunpcklbw ymm2, ymm0, ymm1 // low 16 UV pairs. mutated qqword 0,2 vpsllw ymm1, ymm1, 8
vpunpckhbw ymm0, ymm0, ymm1 // high 16 UV pairs. mutated qqword 1,3 vpor ymm2, ymm1, ymm0
vextractf128 [edi], ymm2, 0 // bytes 0..15 vmovdqu [edi], ymm2
vextractf128 [edi + 16], ymm0, 0 // bytes 16..31 lea edi, [edi + 32]
vextractf128 [edi + 32], ymm2, 1 // bytes 32..47 sub ecx, 16
vextractf128 [edi + 48], ymm0, 1 // bytes 47..63
lea edi, [edi + 64]
sub ecx, 32
jg convertloop jg convertloop
pop edi pop edi