mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-08 01:36:47 +08:00
Linear interpolation.
BUG=none TEST=*Linear* R=tpsiaki@google.com Review URL: https://webrtc-codereview.appspot.com/3689004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@848 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
c2a889eb55
commit
788f757016
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 847
|
||||
Version: 848
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -22,7 +22,8 @@ extern "C" {
|
||||
enum FilterMode {
|
||||
kFilterNone = 0, // Point sample; Fastest.
|
||||
kFilterBilinear = 1, // Faster than box, but lower quality scaling down.
|
||||
kFilterBox = 2 // Highest quality.
|
||||
kFilterBox = 2, // Highest quality.
|
||||
kFilterLinear = 3 // Faster than bilinear, slower than None.
|
||||
};
|
||||
|
||||
// Scale a YUV plane.
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 847
|
||||
#define LIBYUV_VERSION 848
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||
|
||||
242
source/scale.cc
242
source/scale.cc
@ -37,19 +37,7 @@ static __inline int Half(int v) {
|
||||
// Note: Some SSE2 reference manuals
|
||||
// cpuvol1.pdf agner_instruction_tables.pdf 253666.pdf 253667.pdf
|
||||
|
||||
// Set the following flag to true to revert to only
|
||||
// using the reference implementation ScalePlaneBox(), and
|
||||
// NOT the optimized versions. Useful for debugging and
|
||||
// when comparing the quality of the resulting YUV planes
|
||||
// as produced by the optimized and non-optimized versions.
|
||||
static bool use_reference_impl_ = false;
|
||||
|
||||
LIBYUV_API
|
||||
void SetUseReferenceImpl(bool use) {
|
||||
use_reference_impl_ = use;
|
||||
}
|
||||
|
||||
// ScaleRowDown2Int also used by planar functions
|
||||
// ScaleRowDown2Box also used by planar functions
|
||||
// NEON downscalers with interpolation.
|
||||
|
||||
#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
|
||||
@ -208,6 +196,44 @@ static void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
}
|
||||
}
|
||||
|
||||
// Blends 32x1 rectangle to 16x1.
|
||||
// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned.
|
||||
__declspec(naked) __declspec(align(16))
|
||||
void ScaleRowDown2Linear_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
__asm {
|
||||
mov eax, [esp + 4] // src_ptr
|
||||
// src_stride
|
||||
mov edx, [esp + 12] // dst_ptr
|
||||
mov ecx, [esp + 16] // dst_width
|
||||
pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
|
||||
psrlw xmm5, 8
|
||||
|
||||
align 16
|
||||
wloop:
|
||||
movdqa xmm0, [eax]
|
||||
movdqa xmm1, [eax + 16]
|
||||
lea eax, [eax + 32]
|
||||
|
||||
movdqa xmm2, xmm0 // average columns (32 to 16 pixels)
|
||||
psrlw xmm0, 8
|
||||
movdqa xmm3, xmm1
|
||||
psrlw xmm1, 8
|
||||
pand xmm2, xmm5
|
||||
pand xmm3, xmm5
|
||||
pavgw xmm0, xmm2
|
||||
pavgw xmm1, xmm3
|
||||
packuswb xmm0, xmm1
|
||||
|
||||
sub ecx, 16
|
||||
movdqa [edx], xmm0
|
||||
lea edx, [edx + 16]
|
||||
jg wloop
|
||||
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
// Blends 32x2 rectangle to 16x1.
|
||||
// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned.
|
||||
__declspec(naked) __declspec(align(16))
|
||||
@ -281,6 +307,44 @@ static void ScaleRowDown2_Unaligned_SSE2(const uint8* src_ptr,
|
||||
}
|
||||
}
|
||||
|
||||
// Blends 32x1 rectangle to 16x1.
|
||||
// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned.
|
||||
__declspec(naked) __declspec(align(16))
|
||||
void ScaleRowDown2Linear_Unaligned_SSE2(const uint8* src_ptr, ptrdiff_t,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
__asm {
|
||||
mov eax, [esp + 4] // src_ptr
|
||||
// src_stride
|
||||
mov edx, [esp + 12] // dst_ptr
|
||||
mov ecx, [esp + 16] // dst_width
|
||||
pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
|
||||
psrlw xmm5, 8
|
||||
|
||||
align 16
|
||||
wloop:
|
||||
movdqu xmm0, [eax]
|
||||
movdqu xmm1, [eax + 16]
|
||||
lea eax, [eax + 32]
|
||||
|
||||
movdqa xmm2, xmm0 // average columns (32 to 16 pixels)
|
||||
psrlw xmm0, 8
|
||||
movdqa xmm3, xmm1
|
||||
psrlw xmm1, 8
|
||||
pand xmm2, xmm5
|
||||
pand xmm3, xmm5
|
||||
pavgw xmm0, xmm2
|
||||
pavgw xmm1, xmm3
|
||||
packuswb xmm0, xmm1
|
||||
|
||||
sub ecx, 16
|
||||
movdqu [edx], xmm0
|
||||
lea edx, [edx + 16]
|
||||
jg wloop
|
||||
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
// Blends 32x2 rectangle to 16x1.
|
||||
// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned.
|
||||
__declspec(naked) __declspec(align(16))
|
||||
@ -838,6 +902,40 @@ static void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleRowDown2Linear_SSE2(const uint8* src_ptr, ptrdiff_t,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
asm volatile (
|
||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||
"psrlw $0x8,%%xmm5 \n"
|
||||
".p2align 4 \n"
|
||||
"1: \n"
|
||||
"movdqa (%0),%%xmm0 \n"
|
||||
"movdqa 0x10(%0),%%xmm1 \n"
|
||||
"lea 0x20(%0),%0 \n"
|
||||
"movdqa %%xmm0,%%xmm2 \n"
|
||||
"psrlw $0x8,%%xmm0 \n"
|
||||
"movdqa %%xmm1,%%xmm3 \n"
|
||||
"psrlw $0x8,%%xmm1 \n"
|
||||
"pand %%xmm5,%%xmm2 \n"
|
||||
"pand %%xmm5,%%xmm3 \n"
|
||||
"pavgw %%xmm2,%%xmm0 \n"
|
||||
"pavgw %%xmm3,%%xmm1 \n"
|
||||
"packuswb %%xmm1,%%xmm0 \n"
|
||||
"movdqa %%xmm0,(%1) \n"
|
||||
"lea 0x10(%1),%1 \n"
|
||||
"sub $0x10,%2 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src_ptr), // %0
|
||||
"+r"(dst_ptr), // %1
|
||||
"+r"(dst_width) // %2
|
||||
:
|
||||
: "memory", "cc"
|
||||
#if defined(__SSE2__)
|
||||
, "xmm0", "xmm1", "xmm5"
|
||||
#endif
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleRowDown2Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
asm volatile (
|
||||
@ -903,6 +1001,40 @@ static void ScaleRowDown2_Unaligned_SSE2(const uint8* src_ptr,
|
||||
);
|
||||
}
|
||||
|
||||
static void ScaleRowDown2Linear_Unaligned_SSE2(const uint8* src_ptr, ptrdiff_t,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
asm volatile (
|
||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||
"psrlw $0x8,%%xmm5 \n"
|
||||
".p2align 4 \n"
|
||||
"1: \n"
|
||||
"movdqu (%0),%%xmm0 \n"
|
||||
"movdqu 0x10(%0),%%xmm1 \n"
|
||||
"lea 0x20(%0),%0 \n"
|
||||
"movdqa %%xmm0,%%xmm2 \n"
|
||||
"psrlw $0x8,%%xmm0 \n"
|
||||
"movdqa %%xmm1,%%xmm3 \n"
|
||||
"psrlw $0x8,%%xmm1 \n"
|
||||
"pand %%xmm5,%%xmm2 \n"
|
||||
"pand %%xmm5,%%xmm3 \n"
|
||||
"pavgw %%xmm2,%%xmm0 \n"
|
||||
"pavgw %%xmm3,%%xmm1 \n"
|
||||
"packuswb %%xmm1,%%xmm0 \n"
|
||||
"movdqu %%xmm0,(%1) \n"
|
||||
"lea 0x10(%1),%1 \n"
|
||||
"sub $0x10,%2 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src_ptr), // %0
|
||||
"+r"(dst_ptr), // %1
|
||||
"+r"(dst_width) // %2
|
||||
:
|
||||
: "memory", "cc"
|
||||
#if defined(__SSE2__)
|
||||
, "xmm0", "xmm1", "xmm5"
|
||||
#endif
|
||||
);
|
||||
}
|
||||
|
||||
static void ScaleRowDown2Box_Unaligned_SSE2(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
@ -1447,6 +1579,21 @@ static void ScaleRowDown2_C(const uint8* src_ptr, ptrdiff_t /* src_stride */,
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleRowDown2Linear_C(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width) {
|
||||
const uint8* s = src_ptr;
|
||||
uint8* dend = dst + dst_width - 1;
|
||||
do {
|
||||
dst[0] = (s[0] + s[1] + 1) >> 1;
|
||||
dst[1] = (s[2] + s[3] + 1) >> 1;
|
||||
dst += 2;
|
||||
s += 4;
|
||||
} while (dst < dend);
|
||||
if (dst_width & 1) {
|
||||
dst[0] = (s[0] + s[1] + 1) >> 1;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width) {
|
||||
const uint8* s = src_ptr;
|
||||
@ -1685,7 +1832,9 @@ static void ScalePlaneDown2(int /* src_width */, int /* src_height */,
|
||||
FilterMode filtering) {
|
||||
void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) =
|
||||
filtering ? ScaleRowDown2Box_C : ScaleRowDown2_C;
|
||||
filtering == kFilterNone ? ScaleRowDown2_C :
|
||||
(filtering == kFilterLinear ? ScaleRowDown2Linear_C :
|
||||
ScaleRowDown2Box_C);
|
||||
int row_stride = src_stride << 1;
|
||||
if (!filtering) {
|
||||
src_ptr += src_stride; // Point to odd rows.
|
||||
@ -1698,12 +1847,15 @@ static void ScalePlaneDown2(int /* src_width */, int /* src_height */,
|
||||
}
|
||||
#elif defined(HAS_SCALEROWDOWN2_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) {
|
||||
ScaleRowDown2 = filtering ? ScaleRowDown2Box_Unaligned_SSE2 :
|
||||
ScaleRowDown2_Unaligned_SSE2;
|
||||
ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Unaligned_SSE2 :
|
||||
(filtering == kFilterLinear ? ScaleRowDown2Linear_Unaligned_SSE2 :
|
||||
ScaleRowDown2Box_Unaligned_SSE2);
|
||||
if (IS_ALIGNED(src_ptr, 16) &&
|
||||
IS_ALIGNED(src_stride, 16) && IS_ALIGNED(row_stride, 16) &&
|
||||
IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
|
||||
ScaleRowDown2 = filtering ? ScaleRowDown2Box_SSE2 : ScaleRowDown2_SSE2;
|
||||
ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_SSE2 :
|
||||
(filtering == kFilterLinear ? ScaleRowDown2Linear_SSE2 :
|
||||
ScaleRowDown2Box_SSE2);
|
||||
}
|
||||
}
|
||||
#elif defined(HAS_SCALEROWDOWN2_MIPS_DSPR2)
|
||||
@ -1715,6 +1867,9 @@ static void ScalePlaneDown2(int /* src_width */, int /* src_height */,
|
||||
}
|
||||
#endif
|
||||
|
||||
if (filtering == kFilterLinear) {
|
||||
src_stride = 0;
|
||||
}
|
||||
// TODO(fbarchard): Loop through source height to allow odd height.
|
||||
for (int y = 0; y < dst_height; ++y) {
|
||||
ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width);
|
||||
@ -1759,6 +1914,9 @@ static void ScalePlaneDown4(int /* src_width */, int /* src_height */,
|
||||
}
|
||||
#endif
|
||||
|
||||
if (filtering == kFilterLinear) {
|
||||
src_stride = 0;
|
||||
}
|
||||
for (int y = 0; y < dst_height; ++y) {
|
||||
ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width);
|
||||
src_ptr += row_stride;
|
||||
@ -1822,14 +1980,15 @@ static void ScalePlaneDown34(int /* src_width */, int /* src_height */,
|
||||
}
|
||||
#endif
|
||||
|
||||
const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
|
||||
for (int y = 0; y < dst_height - 2; y += 3) {
|
||||
ScaleRowDown34_0(src_ptr, src_stride, dst_ptr, dst_width);
|
||||
ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
|
||||
src_ptr += src_stride;
|
||||
dst_ptr += dst_stride;
|
||||
ScaleRowDown34_1(src_ptr, src_stride, dst_ptr, dst_width);
|
||||
ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width);
|
||||
src_ptr += src_stride;
|
||||
dst_ptr += dst_stride;
|
||||
ScaleRowDown34_0(src_ptr + src_stride, -src_stride,
|
||||
ScaleRowDown34_0(src_ptr + src_stride, -filter_stride,
|
||||
dst_ptr, dst_width);
|
||||
src_ptr += src_stride * 2;
|
||||
dst_ptr += dst_stride;
|
||||
@ -1837,7 +1996,7 @@ static void ScalePlaneDown34(int /* src_width */, int /* src_height */,
|
||||
|
||||
// Remainder 1 or 2 rows with last row vertically unfiltered
|
||||
if ((dst_height % 3) == 2) {
|
||||
ScaleRowDown34_0(src_ptr, src_stride, dst_ptr, dst_width);
|
||||
ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
|
||||
src_ptr += src_stride;
|
||||
dst_ptr += dst_stride;
|
||||
ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width);
|
||||
@ -1914,21 +2073,22 @@ static void ScalePlaneDown38(int /* src_width */, int /* src_height */,
|
||||
}
|
||||
#endif
|
||||
|
||||
const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
|
||||
for (int y = 0; y < dst_height - 2; y += 3) {
|
||||
ScaleRowDown38_3(src_ptr, src_stride, dst_ptr, dst_width);
|
||||
ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
|
||||
src_ptr += src_stride * 3;
|
||||
dst_ptr += dst_stride;
|
||||
ScaleRowDown38_3(src_ptr, src_stride, dst_ptr, dst_width);
|
||||
ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
|
||||
src_ptr += src_stride * 3;
|
||||
dst_ptr += dst_stride;
|
||||
ScaleRowDown38_2(src_ptr, src_stride, dst_ptr, dst_width);
|
||||
ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width);
|
||||
src_ptr += src_stride * 2;
|
||||
dst_ptr += dst_stride;
|
||||
}
|
||||
|
||||
// Remainder 1 or 2 rows with last row vertically unfiltered
|
||||
if ((dst_height % 3) == 2) {
|
||||
ScaleRowDown38_3(src_ptr, src_stride, dst_ptr, dst_width);
|
||||
ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
|
||||
src_ptr += src_stride * 3;
|
||||
dst_ptr += dst_stride;
|
||||
ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
|
||||
@ -2080,7 +2240,8 @@ SAFEBUFFERS
|
||||
void ScalePlaneBilinear(int src_width, int src_height,
|
||||
int dst_width, int dst_height,
|
||||
int src_stride, int dst_stride,
|
||||
const uint8* src_ptr, uint8* dst_ptr) {
|
||||
const uint8* src_ptr, uint8* dst_ptr,
|
||||
FilterMode filtering) {
|
||||
assert(dst_width > 0);
|
||||
assert(dst_height > 0);
|
||||
assert(Abs(src_width) <= kMaxStride);
|
||||
@ -2164,10 +2325,14 @@ void ScalePlaneBilinear(int src_width, int src_height,
|
||||
y = max_y;
|
||||
}
|
||||
int yi = y >> 16;
|
||||
int yf = (y >> 8) & 255;
|
||||
const uint8* src = src_ptr + yi * src_stride;
|
||||
InterpolateRow(row, src, src_stride, src_width, yf);
|
||||
ScaleFilterCols_C(dst_ptr, row, dst_width, x, dx);
|
||||
if (filtering == kFilterLinear) {
|
||||
ScaleFilterCols_C(dst_ptr, src, dst_width, x, dx);
|
||||
} else {
|
||||
int yf = (y >> 8) & 255;
|
||||
InterpolateRow(row, src, src_stride, src_width, yf);
|
||||
ScaleFilterCols_C(dst_ptr, row, dst_width, x, dx);
|
||||
}
|
||||
dst_ptr += dst_stride;
|
||||
y += dy;
|
||||
}
|
||||
@ -2219,15 +2384,11 @@ static void ScalePlaneAnySize(int src_width, int src_height,
|
||||
src_stride, dst_stride, src_ptr, dst_ptr);
|
||||
} else {
|
||||
ScalePlaneBilinear(src_width, src_height, dst_width, dst_height,
|
||||
src_stride, dst_stride, src_ptr, dst_ptr);
|
||||
src_stride, dst_stride, src_ptr, dst_ptr, filtering);
|
||||
}
|
||||
}
|
||||
|
||||
// Scale plane down, any size
|
||||
//
|
||||
// This is an optimized version for scaling down a plane to any size.
|
||||
// The current implementation is ~10 times faster compared to the
|
||||
// reference implementation for e.g. XGA->LowResPAL
|
||||
|
||||
static void ScalePlaneDown(int src_width, int src_height,
|
||||
int dst_width, int dst_height,
|
||||
@ -2237,10 +2398,11 @@ static void ScalePlaneDown(int src_width, int src_height,
|
||||
if (!filtering || src_width > kMaxStride) {
|
||||
ScalePlaneSimple(src_width, src_height, dst_width, dst_height,
|
||||
src_stride, dst_stride, src_ptr, dst_ptr);
|
||||
} else if (filtering == kFilterBilinear || dst_height * 2 > src_height) {
|
||||
} else if (filtering == kFilterBilinear || filtering == kFilterLinear ||
|
||||
dst_height * 2 > src_height) {
|
||||
// between 1/2x and 1x use bilinear
|
||||
ScalePlaneBilinear(src_width, src_height, dst_width, dst_height,
|
||||
src_stride, dst_stride, src_ptr, dst_ptr);
|
||||
src_stride, dst_stride, src_ptr, dst_ptr, filtering);
|
||||
} else {
|
||||
ScalePlaneBox(src_width, src_height, dst_width, dst_height,
|
||||
src_stride, dst_stride, src_ptr, dst_ptr);
|
||||
@ -2271,12 +2433,8 @@ void ScalePlane(const uint8* src, int src_stride,
|
||||
0, 0, dy, 1, filtering);
|
||||
} else if (dst_width <= Abs(src_width) && dst_height <= src_height) {
|
||||
// Scale down.
|
||||
if (use_reference_impl_) {
|
||||
// For testing, allow the optimized versions to be disabled.
|
||||
ScalePlaneDown(src_width, src_height, dst_width, dst_height,
|
||||
src_stride, dst_stride, src, dst, filtering);
|
||||
} else if (4 * dst_width == 3 * src_width &&
|
||||
4 * dst_height == 3 * src_height) {
|
||||
if (4 * dst_width == 3 * src_width &&
|
||||
4 * dst_height == 3 * src_height) {
|
||||
// optimized, 3/4
|
||||
ScalePlaneDown34(src_width, src_height, dst_width, dst_height,
|
||||
src_stride, dst_stride, src, dst, filtering);
|
||||
|
||||
@ -74,6 +74,36 @@ static void ScaleARGBRowDown2_SSE2(const uint8* src_argb,
|
||||
}
|
||||
}
|
||||
|
||||
// Blends 8x1 rectangle to 4x1.
|
||||
// Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned.
|
||||
__declspec(naked) __declspec(align(16))
|
||||
static void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb,
|
||||
ptrdiff_t /* src_stride */,
|
||||
uint8* dst_argb, int dst_width) {
|
||||
__asm {
|
||||
mov eax, [esp + 4] // src_argb
|
||||
// src_stride ignored
|
||||
mov edx, [esp + 12] // dst_argb
|
||||
mov ecx, [esp + 16] // dst_width
|
||||
|
||||
align 16
|
||||
wloop:
|
||||
movdqa xmm0, [eax]
|
||||
movdqa xmm1, [eax + 16]
|
||||
lea eax, [eax + 32]
|
||||
movdqa xmm2, xmm0
|
||||
shufps xmm0, xmm1, 0x88 // even pixels
|
||||
shufps xmm2, xmm1, 0xdd // odd pixels
|
||||
pavgb xmm0, xmm2
|
||||
sub ecx, 4
|
||||
movdqa [edx], xmm0
|
||||
lea edx, [edx + 16]
|
||||
jg wloop
|
||||
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
// Blends 8x2 rectangle to 4x1.
|
||||
// Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned.
|
||||
__declspec(naked) __declspec(align(16))
|
||||
@ -466,6 +496,35 @@ static void ScaleARGBRowDown2_SSE2(const uint8* src_argb,
|
||||
);
|
||||
}
|
||||
|
||||
static void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb,
|
||||
ptrdiff_t /* src_stride */,
|
||||
uint8* dst_argb, int dst_width) {
|
||||
asm volatile (
|
||||
".p2align 4 \n"
|
||||
BUNDLEALIGN
|
||||
"1: \n"
|
||||
"movdqa " MEMACCESS(0) ",%%xmm0 \n"
|
||||
"movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
|
||||
"lea " MEMLEA(0x20,0) ",%0 \n"
|
||||
"movdqa %%xmm0,%%xmm2 \n"
|
||||
"shufps $0x88,%%xmm1,%%xmm0 \n"
|
||||
"shufps $0xdd,%%xmm1,%%xmm2 \n"
|
||||
"pavgb %%xmm2,%%xmm0 \n"
|
||||
"sub $0x4,%2 \n"
|
||||
"movdqa %%xmm0," MEMACCESS(1) " \n"
|
||||
"lea " MEMLEA(0x10,1) ",%1 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src_argb), // %0
|
||||
"+r"(dst_argb), // %1
|
||||
"+r"(dst_width) // %2
|
||||
:
|
||||
: "memory", "cc"
|
||||
#if defined(__SSE2__)
|
||||
, "xmm0", "xmm1"
|
||||
#endif
|
||||
);
|
||||
}
|
||||
|
||||
static void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_argb, int dst_width) {
|
||||
@ -822,6 +881,19 @@ static void ScaleARGBRowDown2_C(const uint8* src_argb,
|
||||
}
|
||||
}
|
||||
|
||||
static void ScaleARGBRowDown2Linear_C(const uint8* src_argb,
|
||||
ptrdiff_t /* src_stride */,
|
||||
uint8* dst_argb, int dst_width) {
|
||||
for (int x = 0; x < dst_width; ++x) {
|
||||
dst_argb[0] = (src_argb[0] + src_argb[4] + 1) >> 1;
|
||||
dst_argb[1] = (src_argb[1] + src_argb[5] + 1) >> 1;
|
||||
dst_argb[2] = (src_argb[2] + src_argb[6] + 1) >> 1;
|
||||
dst_argb[3] = (src_argb[3] + src_argb[7] + 1) >> 1;
|
||||
src_argb += 8;
|
||||
dst_argb += 4;
|
||||
}
|
||||
}
|
||||
|
||||
static void ScaleARGBRowDown2Box_C(const uint8* src_argb, ptrdiff_t src_stride,
|
||||
uint8* dst_argb, int dst_width) {
|
||||
for (int x = 0; x < dst_width; ++x) {
|
||||
@ -930,13 +1002,16 @@ static void ScaleARGBDown2(int /* src_width */, int /* src_height */,
|
||||
int row_stride = src_stride * (dy >> 16);
|
||||
void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
|
||||
uint8* dst_argb, int dst_width) =
|
||||
filtering ? ScaleARGBRowDown2Box_C : ScaleARGBRowDown2_C;
|
||||
filtering == kFilterNone ? ScaleARGBRowDown2_C :
|
||||
(filtering == kFilterLinear ? ScaleARGBRowDown2Linear_C :
|
||||
ScaleARGBRowDown2Box_C);
|
||||
#if defined(HAS_SCALEARGBROWDOWN2_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) &&
|
||||
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(row_stride, 16) &&
|
||||
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
|
||||
ScaleARGBRowDown2 = filtering ? ScaleARGBRowDown2Box_SSE2 :
|
||||
ScaleARGBRowDown2_SSE2;
|
||||
ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_SSE2 :
|
||||
(filtering == kFilterLinear ? ScaleARGBRowDown2Linear_SSE2 :
|
||||
ScaleARGBRowDown2Box_SSE2);
|
||||
}
|
||||
#elif defined(HAS_SCALEARGBROWDOWN2_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8) &&
|
||||
@ -946,7 +1021,9 @@ static void ScaleARGBDown2(int /* src_width */, int /* src_height */,
|
||||
}
|
||||
#endif
|
||||
|
||||
// TODO(fbarchard): Loop through source height to allow odd height.
|
||||
if (filtering == kFilterLinear) {
|
||||
src_stride = 0;
|
||||
}
|
||||
for (int y = 0; y < dst_height; ++y) {
|
||||
ScaleARGBRowDown2(src_argb, src_stride, dst_argb, dst_width);
|
||||
src_argb += row_stride;
|
||||
@ -985,6 +1062,9 @@ static void ScaleARGBDownEven(int src_width, int src_height,
|
||||
}
|
||||
#endif
|
||||
|
||||
if (filtering == kFilterLinear) {
|
||||
src_stride = 0;
|
||||
}
|
||||
for (int y = 0; y < dst_height; ++y) {
|
||||
ScaleARGBRowDownEven(src_argb, src_stride, col_step, dst_argb, dst_width);
|
||||
src_argb += row_stride;
|
||||
@ -998,7 +1078,8 @@ static void ScaleARGBBilinearDown(int src_height,
|
||||
int dst_width, int dst_height,
|
||||
int src_stride, int dst_stride,
|
||||
const uint8* src_argb, uint8* dst_argb,
|
||||
int x, int dx, int y, int dy) {
|
||||
int x, int dx, int y, int dy,
|
||||
FilterMode filtering) {
|
||||
assert(src_height > 0);
|
||||
assert(dst_width > 0);
|
||||
assert(dst_height > 0);
|
||||
@ -1076,10 +1157,14 @@ static void ScaleARGBBilinearDown(int src_height,
|
||||
y = max_y;
|
||||
}
|
||||
int yi = y >> 16;
|
||||
int yf = (y >> 8) & 255;
|
||||
const uint8* src = src_argb + yi * src_stride;
|
||||
InterpolateRow(row, src, src_stride, clip_src_width, yf);
|
||||
ScaleARGBFilterCols(dst_argb, row, dst_width, x, dx);
|
||||
if (filtering == kFilterLinear) {
|
||||
ScaleARGBFilterCols(dst_argb, src, dst_width, x, dx);
|
||||
} else {
|
||||
int yf = (y >> 8) & 255;
|
||||
InterpolateRow(row, src, src_stride, clip_src_width, yf);
|
||||
ScaleARGBFilterCols(dst_argb, row, dst_width, x, dx);
|
||||
}
|
||||
dst_argb += dst_stride;
|
||||
y += dy;
|
||||
}
|
||||
@ -1091,7 +1176,8 @@ static void ScaleARGBBilinearUp(int src_width, int src_height,
|
||||
int dst_width, int dst_height,
|
||||
int src_stride, int dst_stride,
|
||||
const uint8* src_argb, uint8* dst_argb,
|
||||
int x, int dx, int y, int dy) {
|
||||
int x, int dx, int y, int dy,
|
||||
FilterMode filtering) {
|
||||
assert(src_width > 0);
|
||||
assert(src_height > 0);
|
||||
assert(dst_width > 0);
|
||||
@ -1180,8 +1266,12 @@ static void ScaleARGBBilinearUp(int src_width, int src_height,
|
||||
src += src_stride;
|
||||
}
|
||||
}
|
||||
int yf = (y >> 8) & 255;
|
||||
InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
|
||||
if (filtering == kFilterLinear) {
|
||||
InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
|
||||
} else {
|
||||
int yf = (y >> 8) & 255;
|
||||
InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
|
||||
}
|
||||
dst_argb += dst_stride;
|
||||
y += dy;
|
||||
}
|
||||
@ -1200,7 +1290,8 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb,
|
||||
int x, int dx, int y, int dy) {
|
||||
int x, int dx, int y, int dy,
|
||||
FilterMode filtering) {
|
||||
assert(src_width > 0);
|
||||
assert(src_height > 0);
|
||||
assert(dst_width > 0);
|
||||
@ -1353,8 +1444,12 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
|
||||
}
|
||||
}
|
||||
}
|
||||
int yf = (y >> 8) & 255;
|
||||
InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
|
||||
if (filtering == kFilterLinear) {
|
||||
InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
|
||||
} else {
|
||||
int yf = (y >> 8) & 255;
|
||||
InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
|
||||
}
|
||||
dst_argb += dst_stride_argb;
|
||||
y += dy;
|
||||
}
|
||||
@ -1424,14 +1519,14 @@ static void ScaleARGBAnySize(int src_width, int src_height,
|
||||
ScaleARGBBilinearUp(src_width, src_height,
|
||||
clip_width, clip_height,
|
||||
src_stride, dst_stride, src_argb, dst_argb,
|
||||
x, dx, y, dy);
|
||||
x, dx, y, dy, filtering);
|
||||
return;
|
||||
}
|
||||
if (filtering && src_width * 4 < kMaxStride) {
|
||||
ScaleARGBBilinearDown(src_height,
|
||||
clip_width, clip_height,
|
||||
src_stride, dst_stride, src_argb, dst_argb,
|
||||
x, dx, y, dy);
|
||||
x, dx, y, dy, filtering);
|
||||
return;
|
||||
}
|
||||
ScaleARGBSimple(src_width, src_height, clip_width, clip_height,
|
||||
|
||||
@ -213,18 +213,20 @@ static int ARGBClipTestFilter(int src_width, int src_height,
|
||||
// Test a scale factor with 2 filters. Expect unfiltered to be exact, but
|
||||
// filtering is different fixed point implementations for SSSE3, Neon and C.
|
||||
#define TEST_FACTOR(name, hfactor, vfactor) \
|
||||
TEST_FACTOR1(name, None, hfactor, vfactor, 2) \
|
||||
TEST_FACTOR1(name, Linear, hfactor, vfactor, 2) \
|
||||
TEST_FACTOR1(name, Bilinear, hfactor, vfactor, 2)
|
||||
|
||||
// TODO(fbarchard): ScaleDownBy1 should be lossless, but Box has error of 2.
|
||||
// TEST_FACTOR(1, 1 / 1, 1 / 1)
|
||||
TEST_FACTOR(1, 1 / 1, 1 / 1)
|
||||
TEST_FACTOR(2, 1 / 2, 1 / 2)
|
||||
TEST_FACTOR(4, 1 / 4, 1 / 4)
|
||||
// TEST_FACTOR(8, 1 / 8, 1 / 8)
|
||||
// TEST_FACTOR(16, 1 / 16, 1 / 16)
|
||||
// TEST_FACTOR(2by3, 2 / 3, 2 / 3)
|
||||
TEST_FACTOR(8, 1 / 8, 1 / 8)
|
||||
TEST_FACTOR(16, 1 / 16, 1 / 16)
|
||||
TEST_FACTOR(2by3, 2 / 3, 2 / 3)
|
||||
TEST_FACTOR(3by4, 3 / 4, 3 / 4)
|
||||
// TEST_FACTOR(3by8, 3 / 8, 3 / 8)
|
||||
// TEST_FACTOR(Vertical2by3, 1, 2 / 3)
|
||||
TEST_FACTOR(3by8, 3 / 8, 3 / 8)
|
||||
TEST_FACTOR(Vertical2by3, 1, 2 / 3)
|
||||
#undef TEST_FACTOR1
|
||||
#undef TEST_FACTOR
|
||||
|
||||
@ -257,14 +259,15 @@ TEST_FACTOR(3by4, 3 / 4, 3 / 4)
|
||||
// Test scale to a specified size with all 3 filters.
|
||||
#define TEST_SCALETO(name, width, height) \
|
||||
TEST_SCALETO1(name, width, height, None, 0) \
|
||||
TEST_SCALETO1(name, width, height, Linear, 2) \
|
||||
TEST_SCALETO1(name, width, height, Bilinear, 2)
|
||||
|
||||
TEST_SCALETO(ARGBScale, 640, 360)
|
||||
TEST_SCALETO(DISABLED_ARGBScale, 853, 480)
|
||||
TEST_SCALETO(DISABLED_ARGBScale, 1280, 720)
|
||||
TEST_SCALETO(DISABLED_ARGBScale, 1280, 800)
|
||||
TEST_SCALETO(DISABLED_ARGBScale, 1366, 768)
|
||||
TEST_SCALETO(DISABLED_ARGBScale, 1920, 1080)
|
||||
TEST_SCALETO(ARGBScale, 853, 480)
|
||||
TEST_SCALETO(ARGBScale, 1280, 720)
|
||||
TEST_SCALETO(ARGBScale, 1280, 800)
|
||||
TEST_SCALETO(ARGBScale, 1366, 768)
|
||||
TEST_SCALETO(ARGBScale, 1920, 1080)
|
||||
#undef TEST_SCALETO1
|
||||
#undef TEST_SCALETO
|
||||
|
||||
|
||||
@ -141,23 +141,24 @@ static int TestFilter(int src_width, int src_height,
|
||||
EXPECT_LE(diff, max_diff); \
|
||||
}
|
||||
|
||||
// Test a scale factor with all 3 filters. Expect unfiltered to be exact, but
|
||||
// Test a scale factor with all 4 filters. Expect unfiltered to be exact, but
|
||||
// filtering is different fixed point implementations for SSSE3, Neon and C.
|
||||
#define TEST_FACTOR(name, hfactor, vfactor) \
|
||||
TEST_FACTOR1(name, None, hfactor, vfactor, 0) \
|
||||
TEST_FACTOR1(name, Linear, hfactor, vfactor, 2) \
|
||||
TEST_FACTOR1(name, Bilinear, hfactor, vfactor, 2) \
|
||||
TEST_FACTOR1(name, Box, hfactor, vfactor, 2) \
|
||||
|
||||
// TODO(fbarchard): ScaleDownBy1 should be lossless, but Box has error of 2.
|
||||
// TEST_FACTOR(1, 1 / 1, 1 / 1)
|
||||
TEST_FACTOR(1, 1 / 1, 1 / 1)
|
||||
TEST_FACTOR(2, 1 / 2, 1 / 2)
|
||||
TEST_FACTOR(4, 1 / 4, 1 / 4)
|
||||
// TEST_FACTOR(8, 1 / 8, 1 / 8)
|
||||
// TEST_FACTOR(16, 1 / 16, 1 / 16)
|
||||
// TEST_FACTOR(2by3, 2 / 3, 2 / 3)
|
||||
TEST_FACTOR(8, 1 / 8, 1 / 8)
|
||||
TEST_FACTOR(16, 1 / 16, 1 / 16)
|
||||
TEST_FACTOR(2by3, 2 / 3, 2 / 3)
|
||||
TEST_FACTOR(3by4, 3 / 4, 3 / 4)
|
||||
// TEST_FACTOR(3by8, 3 / 8, 3 / 8)
|
||||
// TEST_FACTOR(Vertical2by3, 1, 2 / 3)
|
||||
TEST_FACTOR(3by8, 3 / 8, 3 / 8)
|
||||
TEST_FACTOR(Vertical2by3, 1, 2 / 3)
|
||||
#undef TEST_FACTOR1
|
||||
#undef TEST_FACTOR
|
||||
|
||||
@ -175,17 +176,19 @@ TEST_FACTOR(3by4, 3 / 4, 3 / 4)
|
||||
EXPECT_LE(diff, max_diff); \
|
||||
}
|
||||
|
||||
// Test scale to a specified size with all 3 filters.
|
||||
// Test scale to a specified size with all 4 filters.
|
||||
#define TEST_SCALETO(name, width, height) \
|
||||
TEST_SCALETO1(name, width, height, None, 0) \
|
||||
TEST_SCALETO1(name, width, height, Linear, 0) \
|
||||
TEST_SCALETO1(name, width, height, Bilinear, 2) \
|
||||
TEST_SCALETO1(name, width, height, Box, 2)
|
||||
|
||||
TEST_SCALETO(Scale, 640, 360)
|
||||
TEST_SCALETO(DISABLED_Scale, 853, 480)
|
||||
TEST_SCALETO(DISABLED_Scale, 1280, 720)
|
||||
TEST_SCALETO(DISABLED_Scale, 1280, 800)
|
||||
TEST_SCALETO(DISABLED_Scale, 1366, 768)
|
||||
TEST_SCALETO(DISABLED_Scale, 1920, 1080)
|
||||
TEST_SCALETO(Scale, 853, 480)
|
||||
TEST_SCALETO(Scale, 1280, 720)
|
||||
TEST_SCALETO(Scale, 1280, 800)
|
||||
TEST_SCALETO(Scale, 1366, 768)
|
||||
TEST_SCALETO(Scale, 1920, 1080)
|
||||
#undef TEST_SCALETO1
|
||||
#undef TEST_SCALETO
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user