rounding for arm filter

R=wangcheng@google.com, harryjin@google.com
BUG=libyuv:607

Review URL: https://codereview.chromium.org/2093913004 .
This commit is contained in:
Frank Barchard 2016-06-24 16:07:49 -07:00
parent 1b3e4aee47
commit b8ddb5a2a7
5 changed files with 10 additions and 12 deletions

View File

@ -1,6 +1,6 @@
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 1600 Version: 1601
License: BSD License: BSD
License File: LICENSE License File: LICENSE

View File

@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1600 #define LIBYUV_VERSION 1601
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT

View File

@ -417,11 +417,9 @@ void ScaleColsUp2_16_C(uint16* dst_ptr, const uint16* src_ptr,
} }
// (1-f)a + fb can be replaced with a + f(b-a) // (1-f)a + fb can be replaced with a + f(b-a)
#if defined(__arm__) #if defined(__arm__) || defined(__aarch64__)
// arm uses 16 bit math with truncation.
// TODO(fbarchard): add rounding.
#define BLENDER(a, b, f) (uint8)((int)(a) + \ #define BLENDER(a, b, f) (uint8)((int)(a) + \
(((int)((f)) * ((int)(b) - (int)(a))) >> 16)) ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16))
#else #else
// inteluses 7 bit math with rounding. // inteluses 7 bit math with rounding.
#define BLENDER(a, b, f) (uint8)((int)(a) + \ #define BLENDER(a, b, f) (uint8)((int)(a) + \
@ -480,7 +478,7 @@ void ScaleFilterCols64_C(uint8* dst_ptr, const uint8* src_ptr,
// Same as 8 bit arm blender but return is cast to uint16 // Same as 8 bit arm blender but return is cast to uint16
#define BLENDER(a, b, f) (uint16)((int)(a) + \ #define BLENDER(a, b, f) (uint16)((int)(a) + \
(((int)((f)) * ((int)(b) - (int)(a))) >> 16)) ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16))
void ScaleFilterCols_16_C(uint16* dst_ptr, const uint16* src_ptr, void ScaleFilterCols_16_C(uint16* dst_ptr, const uint16* src_ptr,
int dst_width, int x, int dx) { int dst_width, int x, int dx) {
@ -818,7 +816,7 @@ void ScaleARGBColsUp2_C(uint8* dst_argb, const uint8* src_argb,
} }
} }
// TODO(fbarchard): Replace 0x7f ^ f with 128-f. bug=605. // TODO(fbarchard): Replace 0x7f ^ f with 128-f. bug=607.
// Mimics SSSE3 blender // Mimics SSSE3 blender
#define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b) * f) >> 7 #define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b) * f) >> 7
#define BLENDERC(a, b, f, s) (uint32)( \ #define BLENDERC(a, b, f, s) (uint32)( \

View File

@ -612,8 +612,8 @@ void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr,
"vmovl.u16 q10, d21 \n" "vmovl.u16 q10, d21 \n"
"vmul.s32 q11, q11, q13 \n" "vmul.s32 q11, q11, q13 \n"
"vmul.s32 q12, q12, q10 \n" "vmul.s32 q12, q12, q10 \n"
"vshrn.s32 d18, q11, #16 \n" "vrshrn.s32 d18, q11, #16 \n"
"vshrn.s32 d19, q12, #16 \n" "vrshrn.s32 d19, q12, #16 \n"
"vadd.s16 q8, q8, q9 \n" "vadd.s16 q8, q8, q9 \n"
"vmovn.s16 d6, q8 \n" "vmovn.s16 d6, q8 \n"

View File

@ -626,8 +626,8 @@ void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr,
"ushll2 v6.4s, v6.8h, #0 \n" "ushll2 v6.4s, v6.8h, #0 \n"
"mul v16.4s, v16.4s, v7.4s \n" "mul v16.4s, v16.4s, v7.4s \n"
"mul v17.4s, v17.4s, v6.4s \n" "mul v17.4s, v17.4s, v6.4s \n"
"shrn v6.4h, v16.4s, #16 \n" "rshrn v6.4h, v16.4s, #16 \n"
"shrn2 v6.8h, v17.4s, #16 \n" "rshrn2 v6.8h, v17.4s, #16 \n"
"add v4.8h, v4.8h, v6.8h \n" "add v4.8h, v4.8h, v6.8h \n"
"xtn v4.8b, v4.8h \n" "xtn v4.8b, v4.8h \n"