From 4c209d264d5e4b297bab17f7ba0a49ecafa08b98 Mon Sep 17 00:00:00 2001 From: shaodiwei Date: Tue, 9 May 2023 19:11:14 +0800 Subject: [PATCH] =?UTF-8?q?MergeUVRow=5FAVX2=20implementation=20is=20consi?= =?UTF-8?q?stent=20in=20row=5Fwin.cc=20and=20row=5Fgcc.cc=EF=BC=8Cthe=20co?= =?UTF-8?q?mmit=20can=20fix=20memory=20is=20wrote=20out=20of=20bounds?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change-Id: I4b771a46fc853effc4c0fa3ae8032322a8369dc9 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/4514810 Reviewed-by: Frank Barchard Commit-Queue: Frank Barchard --- source/row_win.cc | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/source/row_win.cc b/source/row_win.cc index c5a14f86f..ba5e53185 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -3461,17 +3461,14 @@ __declspec(naked) void MergeUVRow_AVX2(const uint8_t* src_u, sub edx, eax convertloop: - vmovdqu ymm0, [eax] // read 32 U's - vmovdqu ymm1, [eax + edx] // and 32 V's - lea eax, [eax + 32] - vpunpcklbw ymm2, ymm0, ymm1 // low 16 UV pairs. mutated qqword 0,2 - vpunpckhbw ymm0, ymm0, ymm1 // high 16 UV pairs. mutated qqword 1,3 - vextractf128 [edi], ymm2, 0 // bytes 0..15 - vextractf128 [edi + 16], ymm0, 0 // bytes 16..31 - vextractf128 [edi + 32], ymm2, 1 // bytes 32..47 - vextractf128 [edi + 48], ymm0, 1 // bytes 47..63 - lea edi, [edi + 64] - sub ecx, 32 + vpmovzxbw ymm0, [eax] + vpmovzxbw ymm1, [eax + edx] + lea eax, [eax + 16] + vpsllw ymm1, ymm1, 8 + vpor ymm2, ymm1, ymm0 + vmovdqu [edi], ymm2 + lea edi, [edi + 32] + sub ecx, 16 jg convertloop pop edi