diff --git a/README.chromium b/README.chromium index ca11605ae..181e653ea 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1600 +Version: 1601 License: BSD License File: LICENSE diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 6434a8dbd..896d1d9b7 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1600 +#define LIBYUV_VERSION 1601 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/scale_common.cc b/source/scale_common.cc index baed70b9d..3507aa4d9 100644 --- a/source/scale_common.cc +++ b/source/scale_common.cc @@ -417,11 +417,9 @@ void ScaleColsUp2_16_C(uint16* dst_ptr, const uint16* src_ptr, } // (1-f)a + fb can be replaced with a + f(b-a) -#if defined(__arm__) -// arm uses 16 bit math with truncation. -// TODO(fbarchard): add rounding. +#if defined(__arm__) || defined(__aarch64__) #define BLENDER(a, b, f) (uint8)((int)(a) + \ - (((int)((f)) * ((int)(b) - (int)(a))) >> 16)) + ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16)) #else // inteluses 7 bit math with rounding. #define BLENDER(a, b, f) (uint8)((int)(a) + \ @@ -480,7 +478,7 @@ void ScaleFilterCols64_C(uint8* dst_ptr, const uint8* src_ptr, // Same as 8 bit arm blender but return is cast to uint16 #define BLENDER(a, b, f) (uint16)((int)(a) + \ - (((int)((f)) * ((int)(b) - (int)(a))) >> 16)) + ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16)) void ScaleFilterCols_16_C(uint16* dst_ptr, const uint16* src_ptr, int dst_width, int x, int dx) { @@ -818,7 +816,7 @@ void ScaleARGBColsUp2_C(uint8* dst_argb, const uint8* src_argb, } } -// TODO(fbarchard): Replace 0x7f ^ f with 128-f. bug=605. +// TODO(fbarchard): Replace 0x7f ^ f with 128-f. bug=607. // Mimics SSSE3 blender #define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b) * f) >> 7 #define BLENDERC(a, b, f, s) (uint32)( \ diff --git a/source/scale_neon.cc b/source/scale_neon.cc index 26bb70592..44b0c8080 100644 --- a/source/scale_neon.cc +++ b/source/scale_neon.cc @@ -612,8 +612,8 @@ void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr, "vmovl.u16 q10, d21 \n" "vmul.s32 q11, q11, q13 \n" "vmul.s32 q12, q12, q10 \n" - "vshrn.s32 d18, q11, #16 \n" - "vshrn.s32 d19, q12, #16 \n" + "vrshrn.s32 d18, q11, #16 \n" + "vrshrn.s32 d19, q12, #16 \n" "vadd.s16 q8, q8, q9 \n" "vmovn.s16 d6, q8 \n" diff --git a/source/scale_neon64.cc b/source/scale_neon64.cc index 3a62db5b8..ff277f26f 100644 --- a/source/scale_neon64.cc +++ b/source/scale_neon64.cc @@ -626,8 +626,8 @@ void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr, "ushll2 v6.4s, v6.8h, #0 \n" "mul v16.4s, v16.4s, v7.4s \n" "mul v17.4s, v17.4s, v6.4s \n" - "shrn v6.4h, v16.4s, #16 \n" - "shrn2 v6.8h, v17.4s, #16 \n" + "rshrn v6.4h, v16.4s, #16 \n" + "rshrn2 v6.8h, v17.4s, #16 \n" "add v4.8h, v4.8h, v6.8h \n" "xtn v4.8b, v4.8h \n"