From 48e536431342238d6747ca6911c1772f44d90979 Mon Sep 17 00:00:00 2001 From: "fbarchard@google.com" Date: Wed, 4 Dec 2013 03:04:58 +0000 Subject: [PATCH] Use xor/mov bx instead of movzx to avoid drmemory bug BUG=none TEST=none R=johannkoenig@google.com, tpsiaki@google.com Review URL: https://webrtc-codereview.appspot.com/4879004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@891 16f28f9a-4ce2-e073-06de-1de4eb20be90 --- README.chromium | 2 +- include/libyuv/version.h | 2 +- source/convert.cc | 10 ++++++---- source/scale_posix.cc | 19 +++++++++++-------- source/scale_win.cc | 16 +++++++++++++--- unit_test/unit_test.cc | 4 ++-- 6 files changed, 34 insertions(+), 19 deletions(-) diff --git a/README.chromium b/README.chromium index 93d07fbb5..b2dc03f38 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 890 +Version: 891 License: BSD License File: LICENSE diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 033dbf667..0ef1a3c69 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 890 +#define LIBYUV_VERSION 891 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/convert.cc b/source/convert.cc index eeacb8b46..edd6d274f 100644 --- a/source/convert.cc +++ b/source/convert.cc @@ -95,12 +95,12 @@ int I422ToI420(const uint8* src_y, int src_stride_y, // Resample U plane. ScalePlane(src_u, src_stride_u, halfwidth, height, dst_u, dst_stride_u, halfwidth, halfheight, - kFilterNone); + kFilterBilinear); // Resample V plane. ScalePlane(src_v, src_stride_v, halfwidth, height, dst_v, dst_stride_v, halfwidth, halfheight, - kFilterNone); + kFilterBilinear); return 0; } @@ -141,17 +141,19 @@ int I444ToI420(const uint8* src_y, int src_stride_y, // Resample U plane. ScalePlane(src_u, src_stride_u, width, height, dst_u, dst_stride_u, halfwidth, halfheight, - kFilterNone); + kFilterBilinear); // Resample V plane. ScalePlane(src_v, src_stride_v, width, height, dst_v, dst_stride_v, halfwidth, halfheight, - kFilterNone); + kFilterBilinear); return 0; } // 411 chroma is 1/4 width, 1x height // 420 chroma is 1/2 width, 1/2 height +// TODO(fbarchard): Change to kFilterBilinear; Test with valgrind. +// TODO(fbarchard): Share code for 444 and 422 to 420. LIBYUV_API int I411ToI420(const uint8* src_y, int src_stride_y, const uint8* src_u, int src_stride_u, diff --git a/source/scale_posix.cc b/source/scale_posix.cc index 86cc5bf48..03882af4a 100644 --- a/source/scale_posix.cc +++ b/source/scale_posix.cc @@ -108,12 +108,12 @@ static uvec16 kScaleAb2 = #define MEMOPREG(opcode, offset, base, index, scale, reg) \ "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \ #opcode " (%%r15,%%r14),%%" #reg "\n" -#define MEMOPREGK(opcode, offset, base, index, scale, reg) \ - "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \ - #opcode " (%%r15,%%r14),%k" #reg "\n" #define MEMOPMEM(opcode, reg, offset, base, index, scale) \ "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \ #opcode " %%" #reg ",(%%r15,%%r14)\n" +#define MEMOP(opcode, offset, base, index, scale) \ + "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \ + #opcode " (%%r15,%%r14)" #define BUNDLEALIGN ".p2align 5\n" #else #define MEMACCESS(base) "(%" #base ")" @@ -125,10 +125,10 @@ static uvec16 kScaleAb2 = #offset "(%" #base ",%" #index "," #scale ")" #define MEMOPREG(opcode, offset, base, index, scale, reg) \ #opcode " " #offset "(%" #base ",%" #index "," #scale "),%%" #reg "\n" -#define MEMOPREGK(opcode, offset, base, index, scale, reg) \ - #opcode " " #offset "(%" #base ",%" #index "," #scale "),%k" #reg "\n" #define MEMOPMEM(opcode, reg, offset, base, index, scale) \ #opcode " %%" #reg ","#offset "(%" #base ",%" #index "," #scale ")\n" +#define MEMOP(opcode, offset, base, index, scale) \ + #opcode " " #offset "(%" #base ",%" #index "," #scale ")" #define BUNDLEALIGN #endif @@ -857,11 +857,13 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr, "2: \n" "movdqa %%xmm2,%%xmm1 \n" "paddd %%xmm3,%%xmm2 \n" - MEMOPREGK(movzwl,0x00,1,3,1,2) // movzwl (%1,%3,1),%k2 + "xor %2,%2 \n" + MEMOP(mov,0x00,1,3,1) ",%w2 \n" // mov (%1,%3,1),%w2 "movd %k2,%%xmm0 \n" "psrlw $0x9,%%xmm1 \n" BUNDLEALIGN - MEMOPREGK(movzwl,0x00,1,4,1,2) // movzwl (%1,%4,1),%k2 + "xor %2,%2 \n" + MEMOP(mov,0x00,1,4,1) ",%w2 \n" // mov (%1,%4,1),%w2 "movd %k2,%%xmm4 \n" "pshufb %%xmm5,%%xmm1 \n" "punpcklwd %%xmm4,%%xmm0 \n" @@ -881,7 +883,8 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr, "29: \n" "addl $0x1,%5 \n" "jl 99f \n" - MEMOPREGK(movzwl,0x00,1,3,1,2) // movzwl (%1,%3,1),%k2 + "xor %2,%2 \n" + MEMOP(mov,0x00,1,3,1) ",%w2 \n" // mov (%1,%3,1),%w2 "movd %k2,%%xmm0 \n" "psrlw $0x9,%%xmm2 \n" "pshufb %%xmm5,%%xmm2 \n" diff --git a/source/scale_win.cc b/source/scale_win.cc index bbf85899f..fe2f84cfa 100644 --- a/source/scale_win.cc +++ b/source/scale_win.cc @@ -791,6 +791,13 @@ void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, // Bilinear column filtering. SSSE3 version. // TODO(fbarchard): Port to Neon +// TODO(fbarchard): Switch the following: +// xor ebx, ebx +// mov bx, word ptr [esi + eax] // 2 source x0 pixels +// To +// movzx ebx, word ptr [esi + eax] // 2 source x0 pixels +// when drmemory bug fixed. +// https://code.google.com/p/drmemory/issues/detail?id=1396 __declspec(naked) __declspec(align(16)) void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr, @@ -824,10 +831,12 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr, xloop2: movdqa xmm1, xmm2 // x0, x1 fractions. paddd xmm2, xmm3 // x += dx - movzx ebx, word ptr [esi + eax] // 2 source x0 pixels + xor ebx, ebx + mov bx, word ptr [esi + eax] // 2 source x0 pixels movd xmm0, ebx psrlw xmm1, 9 // 7 bit fractions. - movzx ebx, word ptr [esi + edx] // 2 source x1 pixels + xor ebx, ebx + mov bx, word ptr [esi + edx] // 2 source x1 pixels movd xmm4, ebx pshufb xmm1, xmm5 // 0011 punpcklwd xmm0, xmm4 @@ -850,7 +859,8 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr, jl xloop99 // 1 pixel remainder - movzx ebx, word ptr [esi + eax] // 2 source x0 pixels + xor ebx, ebx + mov bx, word ptr [esi + eax] // 2 source x0 pixels movd xmm0, ebx psrlw xmm2, 9 // 7 bit fractions. pshufb xmm2, xmm5 // 0011 diff --git a/unit_test/unit_test.cc b/unit_test/unit_test.cc index b11bd2463..d1bbffa4a 100644 --- a/unit_test/unit_test.cc +++ b/unit_test/unit_test.cc @@ -19,8 +19,8 @@ #define BENCHMARK_ITERATIONS 1 libyuvTest::libyuvTest() : rotate_max_w_(128), rotate_max_h_(128), - benchmark_iterations_(BENCHMARK_ITERATIONS), benchmark_width_(22), - benchmark_height_(14) { + benchmark_iterations_(BENCHMARK_ITERATIONS), benchmark_width_(33), + benchmark_height_(17) { const char* repeat = getenv("LIBYUV_REPEAT"); if (repeat) { benchmark_iterations_ = atoi(repeat); // NOLINT