Fix linear interpolation

C interpolator applied to chroma plane at scaling NV12 on Mac/ARM used
(0x7f ^ f) which is (127-f) instead of (128-f). This resulted in changes
like 128 -> 127 when scaling flat colors and caused visually noticeable
difference.

Bug: b/465721312
Change-Id: Iecf5d2ca2a85602de4146cba7e0f64ecb4b2c1fe
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/7830198
Reviewed-by: Frank Barchard <fbarchard@google.com>
Reviewed-by: Mirko Bonadei <mbonadei@chromium.org>
Reviewed-by: richard winterton <rrwinterton@gmail.com>
Commit-Queue: Mirko Bonadei <mbonadei@chromium.org>
This commit is contained in:
Sergey Silkin 2026-05-08 09:13:35 +00:00 committed by libyuv-scoped@luci-project-accounts.iam.gserviceaccount.com
parent c6c8689c74
commit 0f320a03f7
2 changed files with 24 additions and 28 deletions

View File

@ -1536,9 +1536,9 @@ void ScaleUVColsUp2_C(uint8_t* dst_uv,
}
}
// TODO(fbarchard): Replace 0x7f ^ f with 128-f. bug=607.
// Mimics SSSE3 blender
#define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b)*f) >> 7
// Performs (a + ((f * (b - a) + 64) >> 7)) which is equivalent of
// ((a * (128 - f) + b * f + 64) >> 7).
#define BLENDER1(a, b, f) ((a) + (((f) * ((b) - (a)) + 64) >> 7))
#define BLENDERC(a, b, f, s) \
(uint16_t)(BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s)
#define BLENDER(a, b, f) BLENDERC(a, b, f, 8) | BLENDERC(a, b, f, 0)

View File

@ -870,14 +870,8 @@ __declspec(naked) void ScaleAddRow_AVX2(const uint8_t* src_ptr,
}
#endif // HAS_SCALEADDROW_AVX2
// Constant for making pixels signed to avoid pmaddubsw
// saturation.
static const uvec8 kFsub80 = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80};
// Constant for making pixels unsigned and adding .5 for rounding.
static const uvec16 kFadd40 = {0x4040, 0x4040, 0x4040, 0x4040,
0x4040, 0x4040, 0x4040, 0x4040};
static const uvec16 kFadd40 = {0x0040, 0x0040, 0x0040, 0x0040,
0x0040, 0x0040, 0x0040, 0x0040};
// Bilinear column filtering. SSSE3 version.
__declspec(naked) void ScaleFilterCols_SSSE3(uint8_t* dst_ptr,
@ -898,8 +892,8 @@ __declspec(naked) void ScaleFilterCols_SSSE3(uint8_t* dst_ptr,
movd xmm5, eax
pcmpeqb xmm6, xmm6 // generate 0x007f for inverting fraction.
psrlw xmm6, 9
pcmpeqb xmm7, xmm7 // generate 0x0001
psrlw xmm7, 15
pcmpeqb xmm7, xmm7 // generate 0x00ff mask for extracting 'a'
psrlw xmm7, 8
pextrw eax, xmm2, 1 // get x0 integer. preroll
sub ecx, 2
jl xloop29
@ -922,16 +916,17 @@ __declspec(naked) void ScaleFilterCols_SSSE3(uint8_t* dst_ptr,
movd xmm4, ebx
pshufb xmm1, xmm5 // 0011
punpcklwd xmm0, xmm4
psubb xmm0, xmmword ptr kFsub80 // make pixels signed.
pxor xmm1, xmm6 // 0..7f and 7f..0
paddusb xmm1, xmm7 // +1 so 0..7f and 80..1
pmaddubsw xmm1, xmm0 // 16 bit, 2 pixels.
movdqa xmm4, xmm0 // Copy pixels.
pmaddubsw xmm0, xmm1 // a * (127 - f) + b * f
pand xmm4, xmm7 // Extract left pixels 'a'.
paddw xmm0, xmm4 // a * 128 + f * (b - a)
pextrw eax, xmm2, 1 // get x0 integer. next iteration.
paddw xmm0, xmmword ptr kFadd40 // add rounding bias 64.
pextrw edx, xmm2, 3 // get x1 integer. next iteration.
paddw xmm1, xmmword ptr kFadd40 // make pixels unsigned and round.
psrlw xmm1, 7 // 8.7 fixed point to low 8 bits.
packuswb xmm1, xmm1 // 8 bits, 2 pixels.
movd ebx, xmm1
psrlw xmm0, 7 // 8.7 fixed point to low 8 bits.
packuswb xmm0, xmm0 // 8 bits, 2 pixels.
movd ebx, xmm0
mov [edi], bx
lea edi, [edi + 2]
sub ecx, 2 // 2 pixels
@ -941,19 +936,20 @@ __declspec(naked) void ScaleFilterCols_SSSE3(uint8_t* dst_ptr,
add ecx, 2 - 1
jl xloop99
// 1 pixel remainder
// 1 pixel remainder
movzx ebx, word ptr [esi + eax] // 2 source x0 pixels
movd xmm0, ebx
psrlw xmm2, 9 // 7 bit fractions.
pshufb xmm2, xmm5 // 0011
psubb xmm0, xmmword ptr kFsub80 // make pixels signed.
pxor xmm2, xmm6 // 0..7f and 7f..0
paddusb xmm2, xmm7 // +1 so 0..7f and 80..1
pmaddubsw xmm2, xmm0 // 16 bit
paddw xmm2, xmmword ptr kFadd40 // make pixels unsigned and round.
psrlw xmm2, 7 // 8.7 fixed point to low 8 bits.
packuswb xmm2, xmm2 // 8 bits
movd ebx, xmm2
movdqa xmm4, xmm0 // Copy pixels.
pmaddubsw xmm0, xmm2 // a * (127 - f) + b * f
pand xmm4, xmm7 // Extract left pixel 'a'.
paddw xmm0, xmm4 // a * 128 + f * (b - a)
paddw xmm0, xmmword ptr kFadd40 // add rounding bias 64.
psrlw xmm0, 7 // 8.7 fixed point to low 8 bits.
packuswb xmm0, xmm0 // 8 bits
movd ebx, xmm0
mov [edi], bl
xloop99: