mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2026-01-01 03:12:16 +08:00
Use xor/mov bx instead of movzx to avoid drmemory bug
BUG=none TEST=none R=johannkoenig@google.com, tpsiaki@google.com Review URL: https://webrtc-codereview.appspot.com/4879004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@891 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
064d2768a8
commit
48e5364313
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 890
|
||||
Version: 891
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 890
|
||||
#define LIBYUV_VERSION 891
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||
|
||||
@ -95,12 +95,12 @@ int I422ToI420(const uint8* src_y, int src_stride_y,
|
||||
// Resample U plane.
|
||||
ScalePlane(src_u, src_stride_u, halfwidth, height,
|
||||
dst_u, dst_stride_u, halfwidth, halfheight,
|
||||
kFilterNone);
|
||||
kFilterBilinear);
|
||||
|
||||
// Resample V plane.
|
||||
ScalePlane(src_v, src_stride_v, halfwidth, height,
|
||||
dst_v, dst_stride_v, halfwidth, halfheight,
|
||||
kFilterNone);
|
||||
kFilterBilinear);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -141,17 +141,19 @@ int I444ToI420(const uint8* src_y, int src_stride_y,
|
||||
// Resample U plane.
|
||||
ScalePlane(src_u, src_stride_u, width, height,
|
||||
dst_u, dst_stride_u, halfwidth, halfheight,
|
||||
kFilterNone);
|
||||
kFilterBilinear);
|
||||
|
||||
// Resample V plane.
|
||||
ScalePlane(src_v, src_stride_v, width, height,
|
||||
dst_v, dst_stride_v, halfwidth, halfheight,
|
||||
kFilterNone);
|
||||
kFilterBilinear);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// 411 chroma is 1/4 width, 1x height
|
||||
// 420 chroma is 1/2 width, 1/2 height
|
||||
// TODO(fbarchard): Change to kFilterBilinear; Test with valgrind.
|
||||
// TODO(fbarchard): Share code for 444 and 422 to 420.
|
||||
LIBYUV_API
|
||||
int I411ToI420(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
|
||||
@ -108,12 +108,12 @@ static uvec16 kScaleAb2 =
|
||||
#define MEMOPREG(opcode, offset, base, index, scale, reg) \
|
||||
"lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
|
||||
#opcode " (%%r15,%%r14),%%" #reg "\n"
|
||||
#define MEMOPREGK(opcode, offset, base, index, scale, reg) \
|
||||
"lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
|
||||
#opcode " (%%r15,%%r14),%k" #reg "\n"
|
||||
#define MEMOPMEM(opcode, reg, offset, base, index, scale) \
|
||||
"lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
|
||||
#opcode " %%" #reg ",(%%r15,%%r14)\n"
|
||||
#define MEMOP(opcode, offset, base, index, scale) \
|
||||
"lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
|
||||
#opcode " (%%r15,%%r14)"
|
||||
#define BUNDLEALIGN ".p2align 5\n"
|
||||
#else
|
||||
#define MEMACCESS(base) "(%" #base ")"
|
||||
@ -125,10 +125,10 @@ static uvec16 kScaleAb2 =
|
||||
#offset "(%" #base ",%" #index "," #scale ")"
|
||||
#define MEMOPREG(opcode, offset, base, index, scale, reg) \
|
||||
#opcode " " #offset "(%" #base ",%" #index "," #scale "),%%" #reg "\n"
|
||||
#define MEMOPREGK(opcode, offset, base, index, scale, reg) \
|
||||
#opcode " " #offset "(%" #base ",%" #index "," #scale "),%k" #reg "\n"
|
||||
#define MEMOPMEM(opcode, reg, offset, base, index, scale) \
|
||||
#opcode " %%" #reg ","#offset "(%" #base ",%" #index "," #scale ")\n"
|
||||
#define MEMOP(opcode, offset, base, index, scale) \
|
||||
#opcode " " #offset "(%" #base ",%" #index "," #scale ")"
|
||||
#define BUNDLEALIGN
|
||||
#endif
|
||||
|
||||
@ -857,11 +857,13 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
|
||||
"2: \n"
|
||||
"movdqa %%xmm2,%%xmm1 \n"
|
||||
"paddd %%xmm3,%%xmm2 \n"
|
||||
MEMOPREGK(movzwl,0x00,1,3,1,2) // movzwl (%1,%3,1),%k2
|
||||
"xor %2,%2 \n"
|
||||
MEMOP(mov,0x00,1,3,1) ",%w2 \n" // mov (%1,%3,1),%w2
|
||||
"movd %k2,%%xmm0 \n"
|
||||
"psrlw $0x9,%%xmm1 \n"
|
||||
BUNDLEALIGN
|
||||
MEMOPREGK(movzwl,0x00,1,4,1,2) // movzwl (%1,%4,1),%k2
|
||||
"xor %2,%2 \n"
|
||||
MEMOP(mov,0x00,1,4,1) ",%w2 \n" // mov (%1,%4,1),%w2
|
||||
"movd %k2,%%xmm4 \n"
|
||||
"pshufb %%xmm5,%%xmm1 \n"
|
||||
"punpcklwd %%xmm4,%%xmm0 \n"
|
||||
@ -881,7 +883,8 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
|
||||
"29: \n"
|
||||
"addl $0x1,%5 \n"
|
||||
"jl 99f \n"
|
||||
MEMOPREGK(movzwl,0x00,1,3,1,2) // movzwl (%1,%3,1),%k2
|
||||
"xor %2,%2 \n"
|
||||
MEMOP(mov,0x00,1,3,1) ",%w2 \n" // mov (%1,%3,1),%w2
|
||||
"movd %k2,%%xmm0 \n"
|
||||
"psrlw $0x9,%%xmm2 \n"
|
||||
"pshufb %%xmm5,%%xmm2 \n"
|
||||
|
||||
@ -791,6 +791,13 @@ void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
|
||||
// Bilinear column filtering. SSSE3 version.
|
||||
// TODO(fbarchard): Port to Neon
|
||||
// TODO(fbarchard): Switch the following:
|
||||
// xor ebx, ebx
|
||||
// mov bx, word ptr [esi + eax] // 2 source x0 pixels
|
||||
// To
|
||||
// movzx ebx, word ptr [esi + eax] // 2 source x0 pixels
|
||||
// when drmemory bug fixed.
|
||||
// https://code.google.com/p/drmemory/issues/detail?id=1396
|
||||
|
||||
__declspec(naked) __declspec(align(16))
|
||||
void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
|
||||
@ -824,10 +831,12 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
|
||||
xloop2:
|
||||
movdqa xmm1, xmm2 // x0, x1 fractions.
|
||||
paddd xmm2, xmm3 // x += dx
|
||||
movzx ebx, word ptr [esi + eax] // 2 source x0 pixels
|
||||
xor ebx, ebx
|
||||
mov bx, word ptr [esi + eax] // 2 source x0 pixels
|
||||
movd xmm0, ebx
|
||||
psrlw xmm1, 9 // 7 bit fractions.
|
||||
movzx ebx, word ptr [esi + edx] // 2 source x1 pixels
|
||||
xor ebx, ebx
|
||||
mov bx, word ptr [esi + edx] // 2 source x1 pixels
|
||||
movd xmm4, ebx
|
||||
pshufb xmm1, xmm5 // 0011
|
||||
punpcklwd xmm0, xmm4
|
||||
@ -850,7 +859,8 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
|
||||
jl xloop99
|
||||
|
||||
// 1 pixel remainder
|
||||
movzx ebx, word ptr [esi + eax] // 2 source x0 pixels
|
||||
xor ebx, ebx
|
||||
mov bx, word ptr [esi + eax] // 2 source x0 pixels
|
||||
movd xmm0, ebx
|
||||
psrlw xmm2, 9 // 7 bit fractions.
|
||||
pshufb xmm2, xmm5 // 0011
|
||||
|
||||
@ -19,8 +19,8 @@
|
||||
#define BENCHMARK_ITERATIONS 1
|
||||
|
||||
libyuvTest::libyuvTest() : rotate_max_w_(128), rotate_max_h_(128),
|
||||
benchmark_iterations_(BENCHMARK_ITERATIONS), benchmark_width_(22),
|
||||
benchmark_height_(14) {
|
||||
benchmark_iterations_(BENCHMARK_ITERATIONS), benchmark_width_(33),
|
||||
benchmark_height_(17) {
|
||||
const char* repeat = getenv("LIBYUV_REPEAT");
|
||||
if (repeat) {
|
||||
benchmark_iterations_ = atoi(repeat); // NOLINT
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user