mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
YUV scale filter columns improved filtering accuracy
upscale a YUV image. observe change in hue.. green especially. disable ScaleFilterCols_SSSE3, falling back on ScaleFilterCols_C observe hue.. green especially, is better. was ScaleFrom1280x720_Bilinear (1620 ms) now ScaleFrom1280x720_Bilinear (1907 ms) BUG=libyuv:605 TEST=try bots R=harryjin@google.com, wangcheng@google.com Review URL: https://codereview.chromium.org/2084533006 .
This commit is contained in:
parent
24b9fa6671
commit
cc88adc620
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 1599
|
||||
Version: 1600
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -61,8 +61,7 @@ extern "C" {
|
||||
#define HAS_SCALEARGBROWDOWN2_SSE2
|
||||
#define HAS_SCALEARGBROWDOWNEVEN_SSE2
|
||||
#define HAS_SCALECOLSUP2_SSE2
|
||||
// TODO(fbarchard): HAS_SCALEFILTERCOLS_SSSE3 doesnt match C very well.
|
||||
// #define HAS_SCALEFILTERCOLS_SSSE3
|
||||
#define HAS_SCALEFILTERCOLS_SSSE3
|
||||
#define HAS_SCALEROWDOWN2_SSSE3
|
||||
#define HAS_SCALEROWDOWN34_SSSE3
|
||||
#define HAS_SCALEROWDOWN38_SSSE3
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 1599
|
||||
#define LIBYUV_VERSION 1600
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||
|
||||
@ -417,8 +417,16 @@ void ScaleColsUp2_16_C(uint16* dst_ptr, const uint16* src_ptr,
|
||||
}
|
||||
|
||||
// (1-f)a + fb can be replaced with a + f(b-a)
|
||||
#if defined(__arm__)
|
||||
// arm uses 16 bit math with truncation.
|
||||
// TODO(fbarchard): add rounding.
|
||||
#define BLENDER(a, b, f) (uint8)((int)(a) + \
|
||||
((int)(f) * ((int)(b) - (int)(a)) >> 16))
|
||||
(((int)((f)) * ((int)(b) - (int)(a))) >> 16))
|
||||
#else
|
||||
// inteluses 7 bit math with rounding.
|
||||
#define BLENDER(a, b, f) (uint8)((int)(a) + \
|
||||
(((int)((f) >> 9) * ((int)(b) - (int)(a)) + 0x40) >> 7))
|
||||
#endif
|
||||
|
||||
void ScaleFilterCols_C(uint8* dst_ptr, const uint8* src_ptr,
|
||||
int dst_width, int x, int dx) {
|
||||
@ -470,8 +478,9 @@ void ScaleFilterCols64_C(uint8* dst_ptr, const uint8* src_ptr,
|
||||
}
|
||||
#undef BLENDER
|
||||
|
||||
// Same as 8 bit arm blender but return is cast to uint16
|
||||
#define BLENDER(a, b, f) (uint16)((int)(a) + \
|
||||
((int)(f) * ((int)(b) - (int)(a)) >> 16))
|
||||
(((int)((f)) * ((int)(b) - (int)(a))) >> 16))
|
||||
|
||||
void ScaleFilterCols_16_C(uint16* dst_ptr, const uint16* src_ptr,
|
||||
int dst_width, int x, int dx) {
|
||||
@ -809,6 +818,7 @@ void ScaleARGBColsUp2_C(uint8* dst_argb, const uint8* src_argb,
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(fbarchard): Replace 0x7f ^ f with 128-f. bug=605.
|
||||
// Mimics SSSE3 blender
|
||||
#define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b) * f) >> 7
|
||||
#define BLENDERC(a, b, f, s) (uint32)( \
|
||||
|
||||
@ -821,6 +821,16 @@ void ScaleAddRow_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
|
||||
}
|
||||
#endif // HAS_SCALEADDROW_AVX2
|
||||
|
||||
// Constant for making pixels signed to avoid pmaddubsw
|
||||
// saturation.
|
||||
static uvec8 kFsub80 =
|
||||
{ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
|
||||
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 };
|
||||
|
||||
// Constant for making pixels unsigned and adding .5 for rounding.
|
||||
static uvec16 kFadd40 =
|
||||
{ 0x4040, 0x4040, 0x4040, 0x4040, 0x4040, 0x4040, 0x4040, 0x4040 };
|
||||
|
||||
// Bilinear column filtering. SSSE3 version.
|
||||
void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
|
||||
int dst_width, int x, int dx) {
|
||||
@ -831,7 +841,10 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
|
||||
"movl $0x04040000,%k2 \n"
|
||||
"movd %k2,%%xmm5 \n"
|
||||
"pcmpeqb %%xmm6,%%xmm6 \n"
|
||||
"psrlw $0x9,%%xmm6 \n"
|
||||
"psrlw $0x9,%%xmm6 \n" // 0x007f007f
|
||||
"pcmpeqb %%xmm7,%%xmm7 \n"
|
||||
"psrlw $15,%%xmm7 \n" // 0x00010001
|
||||
|
||||
"pextrw $0x1,%%xmm2,%k3 \n"
|
||||
"subl $0x2,%5 \n"
|
||||
"jl 29f \n"
|
||||
@ -853,13 +866,16 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
|
||||
"movd %k2,%%xmm4 \n"
|
||||
"pshufb %%xmm5,%%xmm1 \n"
|
||||
"punpcklwd %%xmm4,%%xmm0 \n"
|
||||
"pxor %%xmm6,%%xmm1 \n"
|
||||
"pmaddubsw %%xmm1,%%xmm0 \n"
|
||||
"psubb %8,%%xmm0 \n" // make pixels signed.
|
||||
"pxor %%xmm6,%%xmm1 \n" // 128 -f = (f ^ 127 ) + 1
|
||||
"paddusb %%xmm7,%%xmm1 \n"
|
||||
"pmaddubsw %%xmm0,%%xmm1 \n"
|
||||
"pextrw $0x1,%%xmm2,%k3 \n"
|
||||
"pextrw $0x3,%%xmm2,%k4 \n"
|
||||
"psrlw $0x7,%%xmm0 \n"
|
||||
"packuswb %%xmm0,%%xmm0 \n"
|
||||
"movd %%xmm0,%k2 \n"
|
||||
"paddw %9,%%xmm1 \n" // make pixels unsigned.
|
||||
"psrlw $0x7,%%xmm1 \n"
|
||||
"packuswb %%xmm1,%%xmm1 \n"
|
||||
"movd %%xmm1,%k2 \n"
|
||||
"mov %w2," MEMACCESS(0) " \n"
|
||||
"lea " MEMLEA(0x2,0) ",%0 \n"
|
||||
"sub $0x2,%5 \n"
|
||||
@ -873,11 +889,14 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
|
||||
"movd %k2,%%xmm0 \n"
|
||||
"psrlw $0x9,%%xmm2 \n"
|
||||
"pshufb %%xmm5,%%xmm2 \n"
|
||||
"psubb %8,%%xmm0 \n" // make pixels signed.
|
||||
"pxor %%xmm6,%%xmm2 \n"
|
||||
"pmaddubsw %%xmm2,%%xmm0 \n"
|
||||
"psrlw $0x7,%%xmm0 \n"
|
||||
"packuswb %%xmm0,%%xmm0 \n"
|
||||
"movd %%xmm0,%k2 \n"
|
||||
"paddusb %%xmm7,%%xmm2 \n"
|
||||
"pmaddubsw %%xmm0,%%xmm2 \n"
|
||||
"paddw %9,%%xmm2 \n" // make pixels unsigned.
|
||||
"psrlw $0x7,%%xmm2 \n"
|
||||
"packuswb %%xmm2,%%xmm2 \n"
|
||||
"movd %%xmm2,%k2 \n"
|
||||
"mov %b2," MEMACCESS(0) " \n"
|
||||
"99: \n"
|
||||
: "+r"(dst_ptr), // %0
|
||||
@ -887,9 +906,16 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
|
||||
"=&r"(x1), // %4
|
||||
"+rm"(dst_width) // %5
|
||||
: "rm"(x), // %6
|
||||
"rm"(dx) // %7
|
||||
"rm"(dx), // %7
|
||||
#if defined(__x86_64__)
|
||||
"x"(kFsub80), // %8
|
||||
"x"(kFadd40) // %9
|
||||
#else
|
||||
"m"(kFsub80), // %8
|
||||
"m"(kFadd40) // %9
|
||||
#endif
|
||||
: "memory", "cc", NACL_R14
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@ -572,6 +572,10 @@ void ScaleAddRows_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
MEMACCESS(6) \
|
||||
"vld2.8 {d6["#n"], d7["#n"]}, [%6] \n"
|
||||
|
||||
// The NEON version mimics this formula:
|
||||
// #define BLENDER(a, b, f) (uint8)((int)(a) +
|
||||
// ((int)(f) * ((int)(b) - (int)(a)) >> 16))
|
||||
|
||||
void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr,
|
||||
int dst_width, int x, int dx) {
|
||||
int dx_offset[4] = {0, 1, 2, 3};
|
||||
|
||||
@ -860,6 +860,16 @@ void ScaleAddRow_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
|
||||
}
|
||||
#endif // HAS_SCALEADDROW_AVX2
|
||||
|
||||
// Constant for making pixels signed to avoid pmaddubsw
|
||||
// saturation.
|
||||
static uvec8 kFsub80 =
|
||||
{ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
|
||||
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 };
|
||||
|
||||
// Constant for making pixels unsigned and adding .5 for rounding.
|
||||
static uvec16 kFadd40 =
|
||||
{ 0x4040, 0x4040, 0x4040, 0x4040, 0x4040, 0x4040, 0x4040, 0x4040 };
|
||||
|
||||
// Bilinear column filtering. SSSE3 version.
|
||||
__declspec(naked)
|
||||
void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
|
||||
@ -877,6 +887,8 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
|
||||
movd xmm5, eax
|
||||
pcmpeqb xmm6, xmm6 // generate 0x007f for inverting fraction.
|
||||
psrlw xmm6, 9
|
||||
pcmpeqb xmm7, xmm7 // generate 0x0001
|
||||
psrlw xmm7, 15
|
||||
pextrw eax, xmm2, 1 // get x0 integer. preroll
|
||||
sub ecx, 2
|
||||
jl xloop29
|
||||
@ -899,20 +911,22 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
|
||||
movd xmm4, ebx
|
||||
pshufb xmm1, xmm5 // 0011
|
||||
punpcklwd xmm0, xmm4
|
||||
psubb xmm0, xmmword ptr kFsub80 // make pixels signed.
|
||||
pxor xmm1, xmm6 // 0..7f and 7f..0
|
||||
pmaddubsw xmm0, xmm1 // 16 bit, 2 pixels.
|
||||
paddusb xmm1, xmm7 // +1 so 0..7f and 80..1
|
||||
pmaddubsw xmm1, xmm0 // 16 bit, 2 pixels.
|
||||
pextrw eax, xmm2, 1 // get x0 integer. next iteration.
|
||||
pextrw edx, xmm2, 3 // get x1 integer. next iteration.
|
||||
psrlw xmm0, 7 // 8.7 fixed point to low 8 bits.
|
||||
packuswb xmm0, xmm0 // 8 bits, 2 pixels.
|
||||
movd ebx, xmm0
|
||||
paddw xmm1, xmmword ptr kFadd40 // make pixels unsigned and round.
|
||||
psrlw xmm1, 7 // 8.7 fixed point to low 8 bits.
|
||||
packuswb xmm1, xmm1 // 8 bits, 2 pixels.
|
||||
movd ebx, xmm1
|
||||
mov [edi], bx
|
||||
lea edi, [edi + 2]
|
||||
sub ecx, 2 // 2 pixels
|
||||
jge xloop2
|
||||
|
||||
xloop29:
|
||||
|
||||
add ecx, 2 - 1
|
||||
jl xloop99
|
||||
|
||||
@ -921,11 +935,14 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
|
||||
movd xmm0, ebx
|
||||
psrlw xmm2, 9 // 7 bit fractions.
|
||||
pshufb xmm2, xmm5 // 0011
|
||||
psubb xmm0, xmmword ptr kFsub80 // make pixels signed.
|
||||
pxor xmm2, xmm6 // 0..7f and 7f..0
|
||||
pmaddubsw xmm0, xmm2 // 16 bit
|
||||
psrlw xmm0, 7 // 8.7 fixed point to low 8 bits.
|
||||
packuswb xmm0, xmm0 // 8 bits
|
||||
movd ebx, xmm0
|
||||
paddusb xmm2, xmm7 // +1 so 0..7f and 80..1
|
||||
pmaddubsw xmm2, xmm0 // 16 bit
|
||||
paddw xmm2, xmmword ptr kFadd40 // make pixels unsigned and round.
|
||||
psrlw xmm2, 7 // 8.7 fixed point to low 8 bits.
|
||||
packuswb xmm2, xmm2 // 8 bits
|
||||
movd ebx, xmm2
|
||||
mov [edi], bl
|
||||
|
||||
xloop99:
|
||||
|
||||
@ -314,10 +314,10 @@ static int TestFilter_16(int src_width, int src_height,
|
||||
|
||||
TEST_FACTOR(2, 1, 2, 0)
|
||||
TEST_FACTOR(4, 1, 4, 0)
|
||||
TEST_FACTOR(8, 1, 8, 3)
|
||||
TEST_FACTOR(8, 1, 8, 0)
|
||||
TEST_FACTOR(3by4, 3, 4, 1)
|
||||
TEST_FACTOR(3by8, 3, 8, 1)
|
||||
TEST_FACTOR(3, 1, 3, 3)
|
||||
TEST_FACTOR(3, 1, 3, 0)
|
||||
#undef TEST_FACTOR1
|
||||
#undef TEST_FACTOR
|
||||
#undef SX
|
||||
@ -356,9 +356,9 @@ TEST_FACTOR(3, 1, 3, 3)
|
||||
// Test scale to a specified size with all 4 filters.
|
||||
#define TEST_SCALETO(name, width, height) \
|
||||
TEST_SCALETO1(name, width, height, None, 0) \
|
||||
TEST_SCALETO1(name, width, height, Linear, 3) \
|
||||
TEST_SCALETO1(name, width, height, Bilinear, 3) \
|
||||
TEST_SCALETO1(name, width, height, Box, 3)
|
||||
TEST_SCALETO1(name, width, height, Linear, 0) \
|
||||
TEST_SCALETO1(name, width, height, Bilinear, 0) \
|
||||
TEST_SCALETO1(name, width, height, Box, 0)
|
||||
|
||||
TEST_SCALETO(Scale, 1, 1)
|
||||
TEST_SCALETO(Scale, 320, 240)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user