From 50f8cb2db33f563b290a2831b3aecbf357a44e32 Mon Sep 17 00:00:00 2001 From: Frank Barchard Date: Wed, 18 Nov 2015 13:05:39 -0800 Subject: [PATCH] port I411 movzx 2 byte reader to gcc previously the I411 format used movd to read U, V pixels. But this reads 4 bytes, and can cause a memory exception. pinsrw can be used, but fails on drmemory 1.5, and is slow. So in this change a movzxw is used to read 2 bytes into EBX, then copy to xmm0 with movd. Slightly slower, but no memory exception Was LibYUVConvertTest.I411ToARGB_Opt (577 ms) Now LibYUVConvertTest.I411ToARGB_Opt (608 ms) TBR=harryjin@google.com BUG=libyuv:525 Review URL: https://codereview.chromium.org/1457783004 . --- README.chromium | 2 +- include/libyuv/version.h | 2 +- source/row_gcc.cc | 20 ++++++++++++++------ 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/README.chromium b/README.chromium index 8c136c06a..ef2a1d76b 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1540 +Version: 1541 License: BSD License File: LICENSE diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 01b4f6620..d9eff38ff 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1540 +#define LIBYUV_VERSION 1541 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/row_gcc.cc b/source/row_gcc.cc index dab651e7e..732d83b84 100644 --- a/source/row_gcc.cc +++ b/source/row_gcc.cc @@ -1518,10 +1518,18 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0, int src_stride_rgba, "movq " MEMACCESS([a_buf]) ",%%xmm5 \n" \ "lea " MEMLEA(0x8, [a_buf]) ",%[a_buf] \n" -// Read 2 UV from 411, upsample to 8 UV -#define READYUV411 \ - "movd " MEMACCESS([u_buf]) ",%%xmm0 \n" \ - MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) \ +// Read 2 UV from 411, upsample to 8 UV. +// reading 4 bytes is an msan violation. +// "movd " MEMACCESS([u_buf]) ",%%xmm0 \n" +// MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) +// pinsrw fails with drmemory +// __asm pinsrw xmm0, [esi], 0 /* U */ +// __asm pinsrw xmm1, [esi + edi], 0 /* V */ +#define READYUV411_EBX \ + "movzw " MEMACCESS([u_buf]) ",%%ebx \n" \ + "movd %%ebx,%%xmm0 \n" \ + MEMOPREG(movzw,0x00,[u_buf],[v_buf],1,ebx) " \n" \ + "movd %%ebx,%%xmm1 \n" \ "lea " MEMLEA(0x2, [u_buf]) ",%[u_buf] \n" \ "punpcklbw %%xmm1,%%xmm0 \n" \ "punpcklwd %%xmm0,%%xmm0 \n" \ @@ -1800,7 +1808,7 @@ void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf, "pcmpeqb %%xmm5,%%xmm5 \n" LABELALIGN "1: \n" - READYUV411 + READYUV411_EBX YUVTORGB(yuvconstants) STOREARGB "sub $0x8,%[width] \n" @@ -1811,7 +1819,7 @@ void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf, [dst_argb]"+r"(dst_argb), // %[dst_argb] [width]"+rm"(width) // %[width] : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] - : "memory", "cc", NACL_R14 YUVTORGB_REGS + : "memory", "cc", "ebx", NACL_R14 YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); }