mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 01:06:46 +08:00
rounding for arm filter
R=wangcheng@google.com, harryjin@google.com BUG=libyuv:607 Review URL: https://codereview.chromium.org/2093913004 .
This commit is contained in:
parent
1b3e4aee47
commit
b8ddb5a2a7
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: http://code.google.com/p/libyuv/
|
URL: http://code.google.com/p/libyuv/
|
||||||
Version: 1600
|
Version: 1601
|
||||||
License: BSD
|
License: BSD
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
|
|
||||||
|
|||||||
@ -11,6 +11,6 @@
|
|||||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||||
#define INCLUDE_LIBYUV_VERSION_H_
|
#define INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
#define LIBYUV_VERSION 1600
|
#define LIBYUV_VERSION 1601
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||||
|
|||||||
@ -417,11 +417,9 @@ void ScaleColsUp2_16_C(uint16* dst_ptr, const uint16* src_ptr,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// (1-f)a + fb can be replaced with a + f(b-a)
|
// (1-f)a + fb can be replaced with a + f(b-a)
|
||||||
#if defined(__arm__)
|
#if defined(__arm__) || defined(__aarch64__)
|
||||||
// arm uses 16 bit math with truncation.
|
|
||||||
// TODO(fbarchard): add rounding.
|
|
||||||
#define BLENDER(a, b, f) (uint8)((int)(a) + \
|
#define BLENDER(a, b, f) (uint8)((int)(a) + \
|
||||||
(((int)((f)) * ((int)(b) - (int)(a))) >> 16))
|
((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16))
|
||||||
#else
|
#else
|
||||||
// inteluses 7 bit math with rounding.
|
// inteluses 7 bit math with rounding.
|
||||||
#define BLENDER(a, b, f) (uint8)((int)(a) + \
|
#define BLENDER(a, b, f) (uint8)((int)(a) + \
|
||||||
@ -480,7 +478,7 @@ void ScaleFilterCols64_C(uint8* dst_ptr, const uint8* src_ptr,
|
|||||||
|
|
||||||
// Same as 8 bit arm blender but return is cast to uint16
|
// Same as 8 bit arm blender but return is cast to uint16
|
||||||
#define BLENDER(a, b, f) (uint16)((int)(a) + \
|
#define BLENDER(a, b, f) (uint16)((int)(a) + \
|
||||||
(((int)((f)) * ((int)(b) - (int)(a))) >> 16))
|
((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16))
|
||||||
|
|
||||||
void ScaleFilterCols_16_C(uint16* dst_ptr, const uint16* src_ptr,
|
void ScaleFilterCols_16_C(uint16* dst_ptr, const uint16* src_ptr,
|
||||||
int dst_width, int x, int dx) {
|
int dst_width, int x, int dx) {
|
||||||
@ -818,7 +816,7 @@ void ScaleARGBColsUp2_C(uint8* dst_argb, const uint8* src_argb,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO(fbarchard): Replace 0x7f ^ f with 128-f. bug=605.
|
// TODO(fbarchard): Replace 0x7f ^ f with 128-f. bug=607.
|
||||||
// Mimics SSSE3 blender
|
// Mimics SSSE3 blender
|
||||||
#define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b) * f) >> 7
|
#define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b) * f) >> 7
|
||||||
#define BLENDERC(a, b, f, s) (uint32)( \
|
#define BLENDERC(a, b, f, s) (uint32)( \
|
||||||
|
|||||||
@ -612,8 +612,8 @@ void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr,
|
|||||||
"vmovl.u16 q10, d21 \n"
|
"vmovl.u16 q10, d21 \n"
|
||||||
"vmul.s32 q11, q11, q13 \n"
|
"vmul.s32 q11, q11, q13 \n"
|
||||||
"vmul.s32 q12, q12, q10 \n"
|
"vmul.s32 q12, q12, q10 \n"
|
||||||
"vshrn.s32 d18, q11, #16 \n"
|
"vrshrn.s32 d18, q11, #16 \n"
|
||||||
"vshrn.s32 d19, q12, #16 \n"
|
"vrshrn.s32 d19, q12, #16 \n"
|
||||||
"vadd.s16 q8, q8, q9 \n"
|
"vadd.s16 q8, q8, q9 \n"
|
||||||
"vmovn.s16 d6, q8 \n"
|
"vmovn.s16 d6, q8 \n"
|
||||||
|
|
||||||
|
|||||||
@ -626,8 +626,8 @@ void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr,
|
|||||||
"ushll2 v6.4s, v6.8h, #0 \n"
|
"ushll2 v6.4s, v6.8h, #0 \n"
|
||||||
"mul v16.4s, v16.4s, v7.4s \n"
|
"mul v16.4s, v16.4s, v7.4s \n"
|
||||||
"mul v17.4s, v17.4s, v6.4s \n"
|
"mul v17.4s, v17.4s, v6.4s \n"
|
||||||
"shrn v6.4h, v16.4s, #16 \n"
|
"rshrn v6.4h, v16.4s, #16 \n"
|
||||||
"shrn2 v6.8h, v17.4s, #16 \n"
|
"rshrn2 v6.8h, v17.4s, #16 \n"
|
||||||
"add v4.8h, v4.8h, v6.8h \n"
|
"add v4.8h, v4.8h, v6.8h \n"
|
||||||
"xtn v4.8b, v4.8h \n"
|
"xtn v4.8b, v4.8h \n"
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user