From 0e83b64e8879e9469919dc96b5d970c7c5bd05af Mon Sep 17 00:00:00 2001 From: Frank Barchard Date: Tue, 7 Jul 2015 17:48:04 -0700 Subject: [PATCH] scalerow avx2 bug fix. was using ymm2 instead of ymm3. R=harryjin@google.com BUG=libyuv:462 Review URL: https://webrtc-codereview.appspot.com/56639004. --- include/libyuv/scale_row.h | 3 +-- source/scale_win.cc | 10 ++++------ 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/include/libyuv/scale_row.h b/include/libyuv/scale_row.h index c117d7a3f..23b2471fd 100644 --- a/include/libyuv/scale_row.h +++ b/include/libyuv/scale_row.h @@ -50,8 +50,7 @@ extern "C" { // The following are available on VS2012: #if !defined(LIBYUV_DISABLE_X86) && defined(VISUALC_HAS_AVX2) -// Some AVX2 versions disabled. See libyuv bug 462. -// #define HAS_SCALEADDROW_AVX2 +#define HAS_SCALEADDROW_AVX2 #define HAS_SCALEROWDOWN2_AVX2 #define HAS_SCALEROWDOWN4_AVX2 #endif diff --git a/source/scale_win.cc b/source/scale_win.cc index 92773a39f..102f33edb 100644 --- a/source/scale_win.cc +++ b/source/scale_win.cc @@ -838,17 +838,15 @@ void ScaleAddRow_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width) { // sum rows xloop: vmovdqu ymm3, [eax] // read 32 bytes - vpermq ymm3, ymm2, 0xd8 // unmutate for vpunpck lea eax, [eax + 32] - vmovdqu ymm0, [edx] // read 32 words from destination - vmovdqu ymm1, [edx + 32] + vpermq ymm3, ymm3, 0xd8 // unmutate for vpunpck vpunpcklbw ymm2, ymm3, ymm5 vpunpckhbw ymm3, ymm3, ymm5 - vpaddusw ymm0, ymm0, ymm2 // sum 16 words - vpaddusw ymm1, ymm1, ymm3 + vpaddusw ymm0, ymm2, [edx] // sum 16 words + vpaddusw ymm1, ymm3, [edx + 32] vmovdqu [edx], ymm0 // write 32 words to destination vmovdqu [edx + 32], ymm1 - lea edx, [edx + 64] + lea edx, [edx + 64] sub ecx, 32 jg xloop