diff --git a/source/scale_common.cc b/source/scale_common.cc index 3507aa4d9..e9c4eff93 100644 --- a/source/scale_common.cc +++ b/source/scale_common.cc @@ -421,7 +421,7 @@ void ScaleColsUp2_16_C(uint16* dst_ptr, const uint16* src_ptr, #define BLENDER(a, b, f) (uint8)((int)(a) + \ ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16)) #else -// inteluses 7 bit math with rounding. +// Intel uses 7 bit math with rounding. #define BLENDER(a, b, f) (uint8)((int)(a) + \ (((int)((f) >> 9) * ((int)(b) - (int)(a)) + 0x40) >> 7)) #endif diff --git a/source/scale_gcc.cc b/source/scale_gcc.cc index e2f88544b..1ab5a7627 100644 --- a/source/scale_gcc.cc +++ b/source/scale_gcc.cc @@ -867,7 +867,7 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr, "pshufb %%xmm5,%%xmm1 \n" "punpcklwd %%xmm4,%%xmm0 \n" "psubb %8,%%xmm0 \n" // make pixels signed. - "pxor %%xmm6,%%xmm1 \n" // 128 -f = (f ^ 127 ) + 1 + "pxor %%xmm6,%%xmm1 \n" // 128 - f = (f ^ 127 ) + 1 "paddusb %%xmm7,%%xmm1 \n" "pmaddubsw %%xmm0,%%xmm1 \n" "pextrw $0x1,%%xmm2,%k3 \n" diff --git a/source/scale_neon.cc b/source/scale_neon.cc index 44b0c8080..cb0a6ee5c 100644 --- a/source/scale_neon.cc +++ b/source/scale_neon.cc @@ -572,9 +572,9 @@ void ScaleAddRows_NEON(const uint8* src_ptr, ptrdiff_t src_stride, MEMACCESS(6) \ "vld2.8 {d6["#n"], d7["#n"]}, [%6] \n" -// The NEON version mimics this formula: -// #define BLENDER(a, b, f) (uint8)((int)(a) + -// ((int)(f) * ((int)(b) - (int)(a)) >> 16)) +// The NEON version mimics this formula (from row_common.cc): +// #define BLENDER(a, b, f) (uint8)((int)(a) + \ +// ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16)) void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr, int dst_width, int x, int dx) { diff --git a/source/scale_neon64.cc b/source/scale_neon64.cc index ff277f26f..2362f0874 100644 --- a/source/scale_neon64.cc +++ b/source/scale_neon64.cc @@ -587,6 +587,10 @@ void ScaleAddRows_NEON(const uint8* src_ptr, ptrdiff_t src_stride, MEMACCESS(6) \ "ld2 {v4.b, v5.b}["#n"], [%6] \n" +// The NEON version mimics this formula (from row_common.cc): +// #define BLENDER(a, b, f) (uint8)((int)(a) + \ +// ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16)) + void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr, int dst_width, int x, int dx) { int dx_offset[4] = {0, 1, 2, 3}; @@ -626,8 +630,8 @@ void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr, "ushll2 v6.4s, v6.8h, #0 \n" "mul v16.4s, v16.4s, v7.4s \n" "mul v17.4s, v17.4s, v6.4s \n" - "rshrn v6.4h, v16.4s, #16 \n" - "rshrn2 v6.8h, v17.4s, #16 \n" + "rshrn v6.4h, v16.4s, #16 \n" + "rshrn2 v6.8h, v17.4s, #16 \n" "add v4.8h, v4.8h, v6.8h \n" "xtn v4.8b, v4.8h \n"