rounding for arm filter

R=wangcheng@google.com, harryjin@google.com
BUG=libyuv:607

Review URL: https://codereview.chromium.org/2093913004 .
This commit is contained in:
Frank Barchard 2016-06-24 16:07:49 -07:00
parent 1b3e4aee47
commit b8ddb5a2a7
5 changed files with 10 additions and 12 deletions

View File

@ -1,6 +1,6 @@
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 1600
Version: 1601
License: BSD
License File: LICENSE

View File

@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1600
#define LIBYUV_VERSION 1601
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT

View File

@ -417,11 +417,9 @@ void ScaleColsUp2_16_C(uint16* dst_ptr, const uint16* src_ptr,
}
// (1-f)a + fb can be replaced with a + f(b-a)
#if defined(__arm__)
// arm uses 16 bit math with truncation.
// TODO(fbarchard): add rounding.
#if defined(__arm__) || defined(__aarch64__)
#define BLENDER(a, b, f) (uint8)((int)(a) + \
(((int)((f)) * ((int)(b) - (int)(a))) >> 16))
((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16))
#else
// inteluses 7 bit math with rounding.
#define BLENDER(a, b, f) (uint8)((int)(a) + \
@ -480,7 +478,7 @@ void ScaleFilterCols64_C(uint8* dst_ptr, const uint8* src_ptr,
// Same as 8 bit arm blender but return is cast to uint16
#define BLENDER(a, b, f) (uint16)((int)(a) + \
(((int)((f)) * ((int)(b) - (int)(a))) >> 16))
((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16))
void ScaleFilterCols_16_C(uint16* dst_ptr, const uint16* src_ptr,
int dst_width, int x, int dx) {
@ -818,7 +816,7 @@ void ScaleARGBColsUp2_C(uint8* dst_argb, const uint8* src_argb,
}
}
// TODO(fbarchard): Replace 0x7f ^ f with 128-f. bug=605.
// TODO(fbarchard): Replace 0x7f ^ f with 128-f. bug=607.
// Mimics SSSE3 blender
#define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b) * f) >> 7
#define BLENDERC(a, b, f, s) (uint32)( \

View File

@ -612,8 +612,8 @@ void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr,
"vmovl.u16 q10, d21 \n"
"vmul.s32 q11, q11, q13 \n"
"vmul.s32 q12, q12, q10 \n"
"vshrn.s32 d18, q11, #16 \n"
"vshrn.s32 d19, q12, #16 \n"
"vrshrn.s32 d18, q11, #16 \n"
"vrshrn.s32 d19, q12, #16 \n"
"vadd.s16 q8, q8, q9 \n"
"vmovn.s16 d6, q8 \n"

View File

@ -626,8 +626,8 @@ void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr,
"ushll2 v6.4s, v6.8h, #0 \n"
"mul v16.4s, v16.4s, v7.4s \n"
"mul v17.4s, v17.4s, v6.4s \n"
"shrn v6.4h, v16.4s, #16 \n"
"shrn2 v6.8h, v17.4s, #16 \n"
"rshrn v6.4h, v16.4s, #16 \n"
"rshrn2 v6.8h, v17.4s, #16 \n"
"add v4.8h, v4.8h, v6.8h \n"
"xtn v4.8b, v4.8h \n"