mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2026-02-16 23:29:52 +08:00
Faster point samplers using row functions and specialized 2x upsampler.
BUG=none TEST=none R=tpsiaki@google.com Review URL: https://webrtc-codereview.appspot.com/3859004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@854 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
a2311691c6
commit
e8c74b61d3
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: http://code.google.com/p/libyuv/
|
URL: http://code.google.com/p/libyuv/
|
||||||
Version: 854
|
Version: 855
|
||||||
License: BSD
|
License: BSD
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
|
|
||||||
|
|||||||
@ -11,6 +11,6 @@
|
|||||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||||
#define INCLUDE_LIBYUV_VERSION_H_
|
#define INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
#define LIBYUV_VERSION 854
|
#define LIBYUV_VERSION 855
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||||
|
|||||||
268
source/scale.cc
268
source/scale.cc
@ -953,11 +953,76 @@ static void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define HAS_SCALECOLSUP2_SSE2
|
||||||
|
// Reads 16 pixels, duplicates them and writes 32 pixels.
|
||||||
|
// Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned.
|
||||||
|
__declspec(naked) __declspec(align(16))
|
||||||
|
void ScaleColsUp2_SSE2(uint8* dst_ptr, const uint8* src_ptr,
|
||||||
|
int dst_width, int /* x */, int /* dx */) {
|
||||||
|
__asm {
|
||||||
|
mov edx, [esp + 4] // dst_ptr
|
||||||
|
mov eax, [esp + 8] // src_ptr
|
||||||
|
mov ecx, [esp + 12] // dst_width
|
||||||
|
|
||||||
|
align 16
|
||||||
|
wloop:
|
||||||
|
movdqa xmm0, [eax]
|
||||||
|
lea eax, [eax + 16]
|
||||||
|
movdqa xmm1, xmm0
|
||||||
|
punpcklbw xmm0, xmm0
|
||||||
|
punpckhbw xmm1, xmm1
|
||||||
|
sub ecx, 32
|
||||||
|
movdqa [edx], xmm0
|
||||||
|
movdqa [edx + 16], xmm1
|
||||||
|
lea edx, [edx + 32]
|
||||||
|
jg wloop
|
||||||
|
|
||||||
|
ret
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#elif !defined(LIBYUV_DISABLE_X86) && \
|
#elif !defined(LIBYUV_DISABLE_X86) && \
|
||||||
((defined(__x86_64__) && !defined(__native_client__)) || defined(__i386__))
|
((defined(__x86_64__) && !defined(__native_client__)) || defined(__i386__))
|
||||||
|
|
||||||
|
// TODO(nfullagar): For Native Client: When new toolchain becomes available,
|
||||||
|
// take advantage of bundle lock / unlock feature. This will reduce the amount
|
||||||
|
// of manual bundle alignment done below, and bundle alignment could even be
|
||||||
|
// moved into each macro that doesn't use %%nacl: such as MEMOPREG.
|
||||||
|
|
||||||
|
#if defined(__native_client__) && defined(__x86_64__)
|
||||||
|
#define MEMACCESS(base) "%%nacl:(%%r15,%q" #base ")"
|
||||||
|
#define MEMACCESS2(offset, base) "%%nacl:" #offset "(%%r15,%q" #base ")"
|
||||||
|
#define MEMLEA(offset, base) #offset "(%q" #base ")"
|
||||||
|
#define MEMLEA3(offset, index, scale) \
|
||||||
|
#offset "(,%q" #index "," #scale ")"
|
||||||
|
#define MEMLEA4(offset, base, index, scale) \
|
||||||
|
#offset "(%q" #base ",%q" #index "," #scale ")"
|
||||||
|
#define MEMOPREG(opcode, offset, base, index, scale, reg) \
|
||||||
|
"lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
|
||||||
|
#opcode " (%%r15,%%r14),%%" #reg "\n"
|
||||||
|
#define MEMOPMEM(opcode, reg, offset, base, index, scale) \
|
||||||
|
"lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
|
||||||
|
#opcode " %%" #reg ",(%%r15,%%r14)\n"
|
||||||
|
#define BUNDLEALIGN ".p2align 5 \n"
|
||||||
|
#else
|
||||||
|
#define MEMACCESS(base) "(%" #base ")"
|
||||||
|
#define MEMACCESS2(offset, base) #offset "(%" #base ")"
|
||||||
|
#define MEMLEA(offset, base) #offset "(%" #base ")"
|
||||||
|
#define MEMLEA3(offset, index, scale) \
|
||||||
|
#offset "(,%" #index "," #scale ")"
|
||||||
|
#define MEMLEA4(offset, base, index, scale) \
|
||||||
|
#offset "(%" #base ",%" #index "," #scale ")"
|
||||||
|
#define MEMOPREG(opcode, offset, base, index, scale, reg) \
|
||||||
|
#opcode " " #offset "(%" #base ",%" #index "," #scale "),%%" #reg "\n"
|
||||||
|
#define MEMOPMEM(opcode, reg, offset, base, index, scale) \
|
||||||
|
#opcode " %%" #reg ","#offset "(%" #base ",%" #index "," #scale ")\n"
|
||||||
|
#define BUNDLEALIGN
|
||||||
|
#endif
|
||||||
|
|
||||||
// GCC versions of row functions are verbatim conversions from Visual C.
|
// GCC versions of row functions are verbatim conversions from Visual C.
|
||||||
// Generated using gcc disassembly on Visual C object file:
|
// Generated using gcc disassembly on Visual C object file:
|
||||||
// objdump -D yuvscaler.obj >yuvscaler.txt
|
// objdump -D yuvscaler.obj >yuvscaler.txt
|
||||||
|
|
||||||
#define HAS_SCALEROWDOWN2_SSE2
|
#define HAS_SCALEROWDOWN2_SSE2
|
||||||
static void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
static void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||||
uint8* dst_ptr, int dst_width) {
|
uint8* dst_ptr, int dst_width) {
|
||||||
@ -1689,6 +1754,40 @@ static void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Reads 4 pixels, duplicates them and writes 8 pixels.
|
||||||
|
// Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned.
|
||||||
|
#define HAS_SCALECOLSUP2_SSE2
|
||||||
|
void ScaleColsUp2_SSE2(uint8* dst_ptr, const uint8* src_ptr,
|
||||||
|
int dst_width, int /* x */, int /* dx */) {
|
||||||
|
asm volatile (
|
||||||
|
".p2align 4 \n"
|
||||||
|
BUNDLEALIGN
|
||||||
|
"1: \n"
|
||||||
|
"movdqa " MEMACCESS(1) ",%%xmm0 \n"
|
||||||
|
"lea " MEMLEA(0x10,1) ",%1 \n"
|
||||||
|
"movdqa %%xmm0,%%xmm1 \n"
|
||||||
|
"punpcklbw %%xmm0,%%xmm0 \n"
|
||||||
|
"punpckhbw %%xmm1,%%xmm1 \n"
|
||||||
|
"sub $0x20,%2 \n"
|
||||||
|
"movdqa %%xmm0," MEMACCESS(0) " \n"
|
||||||
|
"movdqa %%xmm1," MEMACCESS2(0x10,0) " \n"
|
||||||
|
"lea " MEMLEA(0x20,0) ",%0 \n"
|
||||||
|
"jg 1b \n"
|
||||||
|
|
||||||
|
: "+r"(dst_ptr), // %0
|
||||||
|
"+r"(src_ptr), // %1
|
||||||
|
"+r"(dst_width) // %2
|
||||||
|
:
|
||||||
|
: "memory", "cc"
|
||||||
|
#if defined(__native_client__) && defined(__x86_64__)
|
||||||
|
, "r14"
|
||||||
|
#endif
|
||||||
|
#if defined(__SSE2__)
|
||||||
|
, "xmm0", "xmm1"
|
||||||
|
#endif
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#endif // defined(__x86_64__) || defined(__i386__)
|
#endif // defined(__x86_64__) || defined(__i386__)
|
||||||
|
|
||||||
#if !defined(LIBYUV_DISABLE_MIPS) && !defined(__native_client__) && \
|
#if !defined(LIBYUV_DISABLE_MIPS) && !defined(__native_client__) && \
|
||||||
@ -1876,6 +1975,34 @@ static void ScaleRowDown34_1_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
|
|||||||
} while (d < dend);
|
} while (d < dend);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Scales a single row of pixels using point sampling.
|
||||||
|
void ScaleCols_C(uint8* dst_ptr, const uint8* src_ptr,
|
||||||
|
int dst_width, int x, int dx) {
|
||||||
|
for (int j = 0; j < dst_width - 1; j += 2) {
|
||||||
|
dst_ptr[0] = src_ptr[x >> 16];
|
||||||
|
x += dx;
|
||||||
|
dst_ptr[1] = src_ptr[x >> 16];
|
||||||
|
x += dx;
|
||||||
|
dst_ptr += 2;
|
||||||
|
}
|
||||||
|
if (dst_width & 1) {
|
||||||
|
dst_ptr[0] = src_ptr[x >> 16];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Scales a single row of pixels up by 2x using point sampling.
|
||||||
|
void ScaleColsUp2_C(uint8* dst_ptr, const uint8* src_ptr,
|
||||||
|
int dst_width, int, int) {
|
||||||
|
for (int j = 0; j < dst_width - 1; j += 2) {
|
||||||
|
dst_ptr[1] = dst_ptr[0] = src_ptr[0];
|
||||||
|
src_ptr += 1;
|
||||||
|
dst_ptr += 2;
|
||||||
|
}
|
||||||
|
if (dst_width & 1) {
|
||||||
|
dst_ptr[0] = src_ptr[0];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// (1-f)a + fb can be replaced with a + f(b-a)
|
// (1-f)a + fb can be replaced with a + f(b-a)
|
||||||
#define BLENDER(a, b, f) (static_cast<int>(a) + \
|
#define BLENDER(a, b, f) (static_cast<int>(a) + \
|
||||||
((f) * (static_cast<int>(b) - static_cast<int>(a)) >> 16))
|
((f) * (static_cast<int>(b) - static_cast<int>(a)) >> 16))
|
||||||
@ -2484,7 +2611,7 @@ void ScalePlaneBilinearDown(int src_width, int src_height,
|
|||||||
} else if (dst_height > 1) {
|
} else if (dst_height > 1) {
|
||||||
dy = FixedDiv(src_height - 1, dst_height - 1);
|
dy = FixedDiv(src_height - 1, dst_height - 1);
|
||||||
}
|
}
|
||||||
const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
|
const int max_y = (src_height - 1) << 16;
|
||||||
for (int j = 0; j < dst_height; ++j) {
|
for (int j = 0; j < dst_height; ++j) {
|
||||||
if (y > max_y) {
|
if (y > max_y) {
|
||||||
y = max_y;
|
y = max_y;
|
||||||
@ -2515,6 +2642,29 @@ void ScalePlaneBilinearUp(int src_width, int src_height,
|
|||||||
assert(dst_width > 0);
|
assert(dst_width > 0);
|
||||||
assert(dst_height > 0);
|
assert(dst_height > 0);
|
||||||
assert(Abs(dst_width) <= kMaxStride);
|
assert(Abs(dst_width) <= kMaxStride);
|
||||||
|
int dx = 0;
|
||||||
|
int dy = 0;
|
||||||
|
int x = 0;
|
||||||
|
int y = 0;
|
||||||
|
if (dst_width <= Abs(src_width)) {
|
||||||
|
dx = FixedDiv(Abs(src_width), dst_width);
|
||||||
|
x = (dx >> 1) - 32768;
|
||||||
|
} else if (dst_width > 1) {
|
||||||
|
dx = FixedDiv(Abs(src_width) - 1, dst_width - 1);
|
||||||
|
}
|
||||||
|
// Negative src_width means horizontally mirror.
|
||||||
|
if (src_width < 0) {
|
||||||
|
x += (dst_width - 1) * dx;
|
||||||
|
dx = -dx;
|
||||||
|
src_width = -src_width;
|
||||||
|
}
|
||||||
|
if (dst_height <= src_height) {
|
||||||
|
dy = FixedDiv(src_height, dst_height);
|
||||||
|
y = (dy >> 1) - 32768;
|
||||||
|
} else if (dst_height > 1) {
|
||||||
|
dy = FixedDiv(src_height - 1, dst_height - 1);
|
||||||
|
}
|
||||||
|
|
||||||
void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
|
void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
|
||||||
ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
|
ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
|
||||||
InterpolateRow_C;
|
InterpolateRow_C;
|
||||||
@ -2566,36 +2716,25 @@ void ScalePlaneBilinearUp(int src_width, int src_height,
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
void (*ScaleFilterCols)(uint8* dst_ptr, const uint8* src_ptr,
|
void (*ScaleFilterCols)(uint8* dst_ptr, const uint8* src_ptr,
|
||||||
int dst_width, int x, int dx) = ScaleFilterCols_C;
|
int dst_width, int x, int dx) =
|
||||||
|
filtering ? ScaleFilterCols_C : ScaleCols_C;
|
||||||
#if defined(HAS_SCALEFILTERCOLS_SSSE3)
|
#if defined(HAS_SCALEFILTERCOLS_SSSE3)
|
||||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
if (filtering && TestCpuFlag(kCpuHasSSSE3)) {
|
||||||
ScaleFilterCols = ScaleFilterCols_SSSE3;
|
ScaleFilterCols = ScaleFilterCols_SSSE3;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
int dx = 0;
|
if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
|
||||||
int dy = 0;
|
ScaleFilterCols = ScaleColsUp2_C;
|
||||||
int x = 0;
|
#if defined(HAS_SCALECOLS_SSE2)
|
||||||
int y = 0;
|
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
|
||||||
if (dst_width <= Abs(src_width)) {
|
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&
|
||||||
dx = FixedDiv(Abs(src_width), dst_width);
|
IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
|
||||||
x = (dx >> 1) - 32768;
|
ScaleFilterCols = ScaleColsUp2_SSE2;
|
||||||
} else if (dst_width > 1) {
|
}
|
||||||
dx = FixedDiv(Abs(src_width) - 1, dst_width - 1);
|
#endif
|
||||||
}
|
|
||||||
// Negative src_width means horizontally mirror.
|
|
||||||
if (src_width < 0) {
|
|
||||||
x += (dst_width - 1) * dx;
|
|
||||||
dx = -dx;
|
|
||||||
src_width = -src_width;
|
|
||||||
}
|
|
||||||
if (dst_height <= src_height) {
|
|
||||||
dy = FixedDiv(src_height, dst_height);
|
|
||||||
y = (dy >> 1) - 32768;
|
|
||||||
} else if (dst_height > 1) {
|
|
||||||
dy = FixedDiv(src_height - 1, dst_height - 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
|
const int max_y = (src_height - 1) << 16;
|
||||||
if (y > max_y) {
|
if (y > max_y) {
|
||||||
y = max_y;
|
y = max_y;
|
||||||
}
|
}
|
||||||
@ -2616,7 +2755,11 @@ void ScalePlaneBilinearUp(int src_width, int src_height,
|
|||||||
for (int j = 0; j < dst_height; ++j) {
|
for (int j = 0; j < dst_height; ++j) {
|
||||||
yi = y >> 16;
|
yi = y >> 16;
|
||||||
if (yi != lasty) {
|
if (yi != lasty) {
|
||||||
if (y <= max_y) {
|
if (y > max_y) {
|
||||||
|
y = max_y;
|
||||||
|
yi = y >> 16;
|
||||||
|
}
|
||||||
|
if (yi != lasty) {
|
||||||
ScaleFilterCols(rowptr, src, dst_width, x, dx);
|
ScaleFilterCols(rowptr, src, dst_width, x, dx);
|
||||||
rowptr += rowstride;
|
rowptr += rowstride;
|
||||||
rowstride = -rowstride;
|
rowstride = -rowstride;
|
||||||
@ -2635,7 +2778,7 @@ void ScalePlaneBilinearUp(int src_width, int src_height,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Scale plane to/from any dimensions, without interpolation.
|
// Scale Plane to/from any dimensions, without interpolation.
|
||||||
// Fixed point math is used for performance: The upper 16 bits
|
// Fixed point math is used for performance: The upper 16 bits
|
||||||
// of x and dx is the integer part of the source position and
|
// of x and dx is the integer part of the source position and
|
||||||
// the lower 16 bits are the fixed decimal part.
|
// the lower 16 bits are the fixed decimal part.
|
||||||
@ -2654,47 +2797,27 @@ static void ScalePlaneSimple(int src_width, int src_height,
|
|||||||
dx = -dx;
|
dx = -dx;
|
||||||
src_width = -src_width;
|
src_width = -src_width;
|
||||||
}
|
}
|
||||||
|
void (*ScaleCols)(uint8* dst_ptr, const uint8* src_ptr,
|
||||||
for (int j = 0; j < dst_height; ++j) {
|
int dst_width, int x, int dx) = ScaleCols_C;
|
||||||
int xs = x;
|
if (src_width * 2 == dst_width && x < 0x8000) {
|
||||||
int yi = y >> 16;
|
ScaleCols = ScaleColsUp2_C;
|
||||||
const uint8* src = src_ptr + yi * src_stride;
|
#if defined(HAS_SCALECOLS_SSE2)
|
||||||
uint8* dst = dst_ptr;
|
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
|
||||||
for (int i = 0; i < dst_width; ++i) {
|
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&
|
||||||
*dst++ = src[xs >> 16];
|
IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
|
||||||
xs += dx;
|
ScaleCols = ScaleColsUp2_SSE2;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < dst_height; ++i) {
|
||||||
|
ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride,
|
||||||
|
dst_width, x, dx);
|
||||||
dst_ptr += dst_stride;
|
dst_ptr += dst_stride;
|
||||||
y += dy;
|
y += dy;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Scale plane to/from any dimensions.
|
|
||||||
static void ScalePlaneAnySize(int src_width, int src_height,
|
|
||||||
int dst_width, int dst_height,
|
|
||||||
int src_stride, int dst_stride,
|
|
||||||
const uint8* src_ptr, uint8* dst_ptr,
|
|
||||||
FilterMode filtering) {
|
|
||||||
if (filtering == kFilterBox && src_width <= kMaxStride &&
|
|
||||||
dst_height * 2 < src_height ) {
|
|
||||||
ScalePlaneBox(src_width, src_height, dst_width, dst_height,
|
|
||||||
src_stride, dst_stride, src_ptr, dst_ptr);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (filtering && dst_height > src_height && dst_width <= kMaxStride) {
|
|
||||||
ScalePlaneBilinearUp(src_width, src_height, dst_width, dst_height,
|
|
||||||
src_stride, dst_stride, src_ptr, dst_ptr, filtering);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (filtering && src_width <= kMaxStride) {
|
|
||||||
ScalePlaneBilinearDown(src_width, src_height, dst_width, dst_height,
|
|
||||||
src_stride, dst_stride, src_ptr, dst_ptr, filtering);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
ScalePlaneSimple(src_width, src_height, dst_width, dst_height,
|
|
||||||
src_stride, dst_stride, src_ptr, dst_ptr);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Scale a plane.
|
// Scale a plane.
|
||||||
// This function in turn calls a scaling function suitable for handling
|
// This function in turn calls a scaling function suitable for handling
|
||||||
// the desired resolutions.
|
// the desired resolutions.
|
||||||
@ -2752,9 +2875,24 @@ void ScalePlane(const uint8* src, int src_stride,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Arbitrary scale up and/or down.
|
if (filtering == kFilterBox && src_width <= kMaxStride &&
|
||||||
ScalePlaneAnySize(src_width, src_height, dst_width, dst_height,
|
dst_height * 2 < src_height ) {
|
||||||
src_stride, dst_stride, src, dst, filtering);
|
ScalePlaneBox(src_width, src_height, dst_width, dst_height,
|
||||||
|
src_stride, dst_stride, src, dst);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (filtering && dst_height > src_height && dst_width <= kMaxStride) {
|
||||||
|
ScalePlaneBilinearUp(src_width, src_height, dst_width, dst_height,
|
||||||
|
src_stride, dst_stride, src, dst, filtering);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (filtering && src_width <= kMaxStride) {
|
||||||
|
ScalePlaneBilinearDown(src_width, src_height, dst_width, dst_height,
|
||||||
|
src_stride, dst_stride, src, dst, filtering);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
ScalePlaneSimple(src_width, src_height, dst_width, dst_height,
|
||||||
|
src_stride, dst_stride, src, dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Scale an I420 image.
|
// Scale an I420 image.
|
||||||
|
|||||||
@ -401,6 +401,7 @@ static void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,
|
|||||||
|
|
||||||
// Reads 4 pixels, duplicates them and writes 8 pixels.
|
// Reads 4 pixels, duplicates them and writes 8 pixels.
|
||||||
// Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned.
|
// Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned.
|
||||||
|
#define HAS_SCALEARGBCOLSUP2_SSE2
|
||||||
__declspec(naked) __declspec(align(16))
|
__declspec(naked) __declspec(align(16))
|
||||||
void ScaleARGBColsUp2_SSE2(uint8* dst_argb, const uint8* src_argb,
|
void ScaleARGBColsUp2_SSE2(uint8* dst_argb, const uint8* src_argb,
|
||||||
int dst_width, int /* x */, int /* dx */) {
|
int dst_width, int /* x */, int /* dx */) {
|
||||||
@ -735,6 +736,7 @@ void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb,
|
|||||||
|
|
||||||
// Reads 4 pixels, duplicates them and writes 8 pixels.
|
// Reads 4 pixels, duplicates them and writes 8 pixels.
|
||||||
// Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned.
|
// Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned.
|
||||||
|
#define HAS_SCALEARGBCOLSUP2_SSE2
|
||||||
void ScaleARGBColsUp2_SSE2(uint8* dst_argb, const uint8* src_argb,
|
void ScaleARGBColsUp2_SSE2(uint8* dst_argb, const uint8* src_argb,
|
||||||
int dst_width, int /* x */, int /* dx */) {
|
int dst_width, int /* x */, int /* dx */) {
|
||||||
asm volatile (
|
asm volatile (
|
||||||
@ -945,6 +947,38 @@ static void ScaleARGBRowDownEvenBox_C(const uint8* src_argb,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Scales a single row of pixels using point sampling.
|
||||||
|
void ScaleARGBCols_C(uint8* dst_argb, const uint8* src_argb,
|
||||||
|
int dst_width, int x, int dx) {
|
||||||
|
const uint32* src = reinterpret_cast<const uint32*>(src_argb);
|
||||||
|
uint32* dst = reinterpret_cast<uint32*>(dst_argb);
|
||||||
|
for (int j = 0; j < dst_width - 1; j += 2) {
|
||||||
|
dst[0] = src[x >> 16];
|
||||||
|
x += dx;
|
||||||
|
dst[1] = src[x >> 16];
|
||||||
|
x += dx;
|
||||||
|
dst += 2;
|
||||||
|
}
|
||||||
|
if (dst_width & 1) {
|
||||||
|
dst[0] = src[x >> 16];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Scales a single row of pixels up by 2x using point sampling.
|
||||||
|
void ScaleARGBColsUp2_C(uint8* dst_argb, const uint8* src_argb,
|
||||||
|
int dst_width, int, int) {
|
||||||
|
const uint32* src = reinterpret_cast<const uint32*>(src_argb);
|
||||||
|
uint32* dst = reinterpret_cast<uint32*>(dst_argb);
|
||||||
|
for (int j = 0; j < dst_width - 1; j += 2) {
|
||||||
|
dst[1] = dst[0] = src[0];
|
||||||
|
src += 1;
|
||||||
|
dst += 2;
|
||||||
|
}
|
||||||
|
if (dst_width & 1) {
|
||||||
|
dst[0] = src[0];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Mimics SSSE3 blender
|
// Mimics SSSE3 blender
|
||||||
#define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b) * f) >> 7
|
#define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b) * f) >> 7
|
||||||
#define BLENDERC(a, b, f, s) static_cast<uint32>( \
|
#define BLENDERC(a, b, f, s) static_cast<uint32>( \
|
||||||
@ -1151,7 +1185,7 @@ static void ScaleARGBBilinearDown(int src_height,
|
|||||||
ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
|
ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
|
const int max_y = (src_height - 1) << 16;
|
||||||
for (int j = 0; j < dst_height; ++j) {
|
for (int j = 0; j < dst_height; ++j) {
|
||||||
if (y > max_y) {
|
if (y > max_y) {
|
||||||
y = max_y;
|
y = max_y;
|
||||||
@ -1231,13 +1265,30 @@ static void ScaleARGBBilinearUp(int src_width, int src_height,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
|
void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
|
||||||
int dst_width, int x, int dx) = ScaleARGBFilterCols_C;
|
int dst_width, int x, int dx) =
|
||||||
|
filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
|
||||||
#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
|
#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
|
||||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
if (filtering && TestCpuFlag(kCpuHasSSSE3)) {
|
||||||
ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
|
ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
|
#if defined(HAS_SCALEARGBCOLS_SSE2)
|
||||||
|
if (!filtering && TestCpuFlag(kCpuHasSSE2)) {
|
||||||
|
ScaleARGBFilterCols = ScaleARGBCols_SSE2;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
|
||||||
|
ScaleARGBFilterCols = ScaleARGBColsUp2_C;
|
||||||
|
#if defined(HAS_SCALEARGBCOLSUP2_SSE2)
|
||||||
|
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
|
||||||
|
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
|
||||||
|
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
|
||||||
|
ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
const int max_y = (src_height - 1) << 16;
|
||||||
if (y > max_y) {
|
if (y > max_y) {
|
||||||
y = max_y;
|
y = max_y;
|
||||||
}
|
}
|
||||||
@ -1258,7 +1309,11 @@ static void ScaleARGBBilinearUp(int src_width, int src_height,
|
|||||||
for (int j = 0; j < dst_height; ++j) {
|
for (int j = 0; j < dst_height; ++j) {
|
||||||
yi = y >> 16;
|
yi = y >> 16;
|
||||||
if (yi != lasty) {
|
if (yi != lasty) {
|
||||||
if (y <= max_y) {
|
if (y > max_y) {
|
||||||
|
y = max_y;
|
||||||
|
yi = y >> 16;
|
||||||
|
}
|
||||||
|
if (yi != lasty) {
|
||||||
ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
|
ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
|
||||||
rowptr += rowstride;
|
rowptr += rowstride;
|
||||||
rowstride = -rowstride;
|
rowstride = -rowstride;
|
||||||
@ -1394,7 +1449,7 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
|
|||||||
ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
|
ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
|
const int max_y = (src_height - 1) << 16;
|
||||||
if (y > max_y) {
|
if (y > max_y) {
|
||||||
y = max_y;
|
y = max_y;
|
||||||
}
|
}
|
||||||
@ -1430,7 +1485,11 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
|
|||||||
for (int j = 0; j < dst_height; ++j) {
|
for (int j = 0; j < dst_height; ++j) {
|
||||||
yi = y >> 16;
|
yi = y >> 16;
|
||||||
if (yi != lasty) {
|
if (yi != lasty) {
|
||||||
if (y <= max_y) {
|
if (y > max_y) {
|
||||||
|
y = max_y;
|
||||||
|
yi = y >> 16;
|
||||||
|
}
|
||||||
|
if (yi != lasty) {
|
||||||
// TODO(fbarchard): Convert the clipped region of row.
|
// TODO(fbarchard): Convert the clipped region of row.
|
||||||
I422ToARGBRow(src_row_y, src_row_u, src_row_v, argb_row, src_width);
|
I422ToARGBRow(src_row_y, src_row_u, src_row_v, argb_row, src_width);
|
||||||
ScaleARGBFilterCols(rowptr, argb_row, dst_width, x, dx);
|
ScaleARGBFilterCols(rowptr, argb_row, dst_width, x, dx);
|
||||||
@ -1456,26 +1515,7 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Scales a single row of pixels using point sampling.
|
// Scale ARGB to/from any dimensions, without interpolation.
|
||||||
// Code is adapted from libyuv bilinear yuv scaling, but with bilinear
|
|
||||||
// interpolation off, and argb pixels instead of yuv.
|
|
||||||
void ScaleARGBCols_C(uint8* dst_argb, const uint8* src_argb,
|
|
||||||
int dst_width, int x, int dx) {
|
|
||||||
const uint32* src = reinterpret_cast<const uint32*>(src_argb);
|
|
||||||
uint32* dst = reinterpret_cast<uint32*>(dst_argb);
|
|
||||||
for (int j = 0; j < dst_width - 1; j += 2) {
|
|
||||||
dst[0] = src[x >> 16];
|
|
||||||
x += dx;
|
|
||||||
dst[1] = src[x >> 16];
|
|
||||||
x += dx;
|
|
||||||
dst += 2;
|
|
||||||
}
|
|
||||||
if (dst_width & 1) {
|
|
||||||
dst[0] = src[x >> 16];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// ScaleARGB ARGB to/from any dimensions, without interpolation.
|
|
||||||
// Fixed point math is used for performance: The upper 16 bits
|
// Fixed point math is used for performance: The upper 16 bits
|
||||||
// of x and dx is the integer part of the source position and
|
// of x and dx is the integer part of the source position and
|
||||||
// the lower 16 bits are the fixed decimal part.
|
// the lower 16 bits are the fixed decimal part.
|
||||||
@ -1490,14 +1530,18 @@ static void ScaleARGBSimple(int src_width, int src_height,
|
|||||||
#if defined(HAS_SCALEARGBCOLS_SSE2)
|
#if defined(HAS_SCALEARGBCOLS_SSE2)
|
||||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||||
ScaleARGBCols = ScaleARGBCols_SSE2;
|
ScaleARGBCols = ScaleARGBCols_SSE2;
|
||||||
if (src_width * 2 == dst_width && IS_ALIGNED(dst_width, 8) &&
|
}
|
||||||
(x >> 16) == 0 &&
|
#endif
|
||||||
|
if (src_width * 2 == dst_width && x < 0x8000) {
|
||||||
|
ScaleARGBCols = ScaleARGBColsUp2_C;
|
||||||
|
#if defined(HAS_SCALEARGBCOLSUP2_SSE2)
|
||||||
|
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
|
||||||
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
|
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
|
||||||
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
|
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
|
||||||
ScaleARGBCols = ScaleARGBColsUp2_SSE2;
|
ScaleARGBCols = ScaleARGBColsUp2_SSE2;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
for (int i = 0; i < dst_height; ++i) {
|
for (int i = 0; i < dst_height; ++i) {
|
||||||
ScaleARGBCols(dst_argb, src_argb + (y >> 16) * src_stride,
|
ScaleARGBCols(dst_argb, src_argb + (y >> 16) * src_stride,
|
||||||
@ -1507,33 +1551,6 @@ static void ScaleARGBSimple(int src_width, int src_height,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ScaleARGB ARGB to/from any dimensions.
|
|
||||||
static void ScaleARGBAnySize(int src_width, int src_height,
|
|
||||||
int dst_width, int dst_height,
|
|
||||||
int clip_width, int clip_height,
|
|
||||||
int src_stride, int dst_stride,
|
|
||||||
const uint8* src_argb, uint8* dst_argb,
|
|
||||||
int x, int dx, int y, int dy,
|
|
||||||
FilterMode filtering) {
|
|
||||||
if (filtering && dy < 65536 && dst_width * 4 <= kMaxStride) {
|
|
||||||
ScaleARGBBilinearUp(src_width, src_height,
|
|
||||||
clip_width, clip_height,
|
|
||||||
src_stride, dst_stride, src_argb, dst_argb,
|
|
||||||
x, dx, y, dy, filtering);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (filtering && src_width * 4 < kMaxStride) {
|
|
||||||
ScaleARGBBilinearDown(src_height,
|
|
||||||
clip_width, clip_height,
|
|
||||||
src_stride, dst_stride, src_argb, dst_argb,
|
|
||||||
x, dx, y, dy, filtering);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
ScaleARGBSimple(src_width, src_height, clip_width, clip_height,
|
|
||||||
src_stride, dst_stride, src_argb, dst_argb,
|
|
||||||
x, dx, y, dy);
|
|
||||||
}
|
|
||||||
|
|
||||||
// ScaleARGB a ARGB.
|
// ScaleARGB a ARGB.
|
||||||
// This function in turn calls a scaling function
|
// This function in turn calls a scaling function
|
||||||
// suitable for handling the desired resolutions.
|
// suitable for handling the desired resolutions.
|
||||||
@ -1631,13 +1648,23 @@ static void ScaleARGB(const uint8* src, int src_stride,
|
|||||||
x, y, dy, 4, filtering);
|
x, y, dy, 4, filtering);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
if (filtering && dy < 65536 && dst_width * 4 <= kMaxStride) {
|
||||||
// Arbitrary scale up and/or down.
|
ScaleARGBBilinearUp(src_width, src_height,
|
||||||
ScaleARGBAnySize(src_width, src_height,
|
clip_width, clip_height,
|
||||||
dst_width, dst_height,
|
src_stride, dst_stride, src, dst,
|
||||||
clip_width, clip_height,
|
x, dx, y, dy, filtering);
|
||||||
src_stride, dst_stride, src, dst,
|
return;
|
||||||
x, dx, y, dy, filtering);
|
}
|
||||||
|
if (filtering && src_width * 4 < kMaxStride) {
|
||||||
|
ScaleARGBBilinearDown(src_height,
|
||||||
|
clip_width, clip_height,
|
||||||
|
src_stride, dst_stride, src, dst,
|
||||||
|
x, dx, y, dy, filtering);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
ScaleARGBSimple(src_width, src_height, clip_width, clip_height,
|
||||||
|
src_stride, dst_stride, src, dst,
|
||||||
|
x, dx, y, dy);
|
||||||
}
|
}
|
||||||
|
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user