mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
Specialized scale down sample to 1 / 2 size adjust to match general purpose code which uses odd pixel (rounded up - nearest neighbor).
BUG=223 TEST=out\Debug\convert.exe -f 0 faces_640x480_P420.yuv face2_320x240_P420.yuv R=johannkoenig@google.com Review URL: https://webrtc-codereview.appspot.com/1583005 git-svn-id: http://libyuv.googlecode.com/svn/trunk@708 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
83408b85e4
commit
8b54a8f9f2
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 707
|
||||
Version: 708
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 707
|
||||
#define LIBYUV_VERSION 708
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||
|
||||
@ -196,16 +196,14 @@ static void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
// src_stride ignored
|
||||
mov edx, [esp + 12] // dst_ptr
|
||||
mov ecx, [esp + 16] // dst_width
|
||||
pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
|
||||
psrlw xmm5, 8
|
||||
|
||||
align 16
|
||||
wloop:
|
||||
movdqa xmm0, [eax]
|
||||
movdqa xmm1, [eax + 16]
|
||||
lea eax, [eax + 32]
|
||||
pand xmm0, xmm5
|
||||
pand xmm1, xmm5
|
||||
psrlw xmm0, 8 // isolate odd pixels.
|
||||
psrlw xmm1, 8
|
||||
packuswb xmm0, xmm1
|
||||
sub ecx, 16
|
||||
movdqa [edx], xmm0
|
||||
@ -271,16 +269,14 @@ static void ScaleRowDown2_Unaligned_SSE2(const uint8* src_ptr,
|
||||
// src_stride ignored
|
||||
mov edx, [esp + 12] // dst_ptr
|
||||
mov ecx, [esp + 16] // dst_width
|
||||
pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
|
||||
psrlw xmm5, 8
|
||||
|
||||
align 16
|
||||
wloop:
|
||||
movdqu xmm0, [eax]
|
||||
movdqu xmm1, [eax + 16]
|
||||
lea eax, [eax + 32]
|
||||
pand xmm0, xmm5
|
||||
pand xmm1, xmm5
|
||||
psrlw xmm0, 8 // isolate odd pixels.
|
||||
psrlw xmm1, 8
|
||||
packuswb xmm0, xmm1
|
||||
sub ecx, 16
|
||||
movdqu [edx], xmm0
|
||||
@ -1269,15 +1265,13 @@ static void ScaleFilterRows_Unaligned_SSSE3(uint8* dst_ptr,
|
||||
static void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
asm volatile (
|
||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||
"psrlw $0x8,%%xmm5 \n"
|
||||
".p2align 4 \n"
|
||||
"1: \n"
|
||||
"movdqa (%0),%%xmm0 \n"
|
||||
"movdqa 0x10(%0),%%xmm1 \n"
|
||||
"lea 0x20(%0),%0 \n"
|
||||
"pand %%xmm5,%%xmm0 \n"
|
||||
"pand %%xmm5,%%xmm1 \n"
|
||||
"psrlw $0x8,%%xmm0 \n"
|
||||
"psrlw $0x8,%%xmm1 \n"
|
||||
"packuswb %%xmm1,%%xmm0 \n"
|
||||
"movdqa %%xmm0,(%1) \n"
|
||||
"lea 0x10(%1),%1 \n"
|
||||
@ -1289,7 +1283,7 @@ static void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
:
|
||||
: "memory", "cc"
|
||||
#if defined(__SSE2__)
|
||||
, "xmm0", "xmm1", "xmm5"
|
||||
, "xmm0", "xmm1"
|
||||
#endif
|
||||
);
|
||||
}
|
||||
@ -1336,15 +1330,13 @@ static void ScaleRowDown2_Unaligned_SSE2(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
asm volatile (
|
||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||
"psrlw $0x8,%%xmm5 \n"
|
||||
".p2align 4 \n"
|
||||
"1: \n"
|
||||
"movdqu (%0),%%xmm0 \n"
|
||||
"movdqu 0x10(%0),%%xmm1 \n"
|
||||
"lea 0x20(%0),%0 \n"
|
||||
"pand %%xmm5,%%xmm0 \n"
|
||||
"pand %%xmm5,%%xmm1 \n"
|
||||
"psrlw $0x8,%%xmm0 \n"
|
||||
"psrlw $0x8,%%xmm1 \n"
|
||||
"packuswb %%xmm1,%%xmm0 \n"
|
||||
"movdqu %%xmm0,(%1) \n"
|
||||
"lea 0x10(%1),%1 \n"
|
||||
@ -1356,7 +1348,7 @@ static void ScaleRowDown2_Unaligned_SSE2(const uint8* src_ptr,
|
||||
:
|
||||
: "memory", "cc"
|
||||
#if defined(__SSE2__)
|
||||
, "xmm0", "xmm1", "xmm5"
|
||||
, "xmm0", "xmm1"
|
||||
#endif
|
||||
);
|
||||
}
|
||||
@ -2324,13 +2316,13 @@ static void ScaleRowDown2_C(const uint8* src_ptr, ptrdiff_t /* src_stride */,
|
||||
uint8* dst, int dst_width) {
|
||||
uint8* dend = dst + dst_width - 1;
|
||||
do {
|
||||
dst[0] = src_ptr[0];
|
||||
dst[1] = src_ptr[2];
|
||||
dst[0] = src_ptr[1];
|
||||
dst[1] = src_ptr[3];
|
||||
dst += 2;
|
||||
src_ptr += 4;
|
||||
} while (dst < dend);
|
||||
if (dst_width & 1) {
|
||||
dst[0] = src_ptr[0];
|
||||
dst[0] = src_ptr[1];
|
||||
}
|
||||
}
|
||||
|
||||
@ -2689,6 +2681,7 @@ static void ScalePlaneDown2(int /* src_width */, int /* src_height */,
|
||||
}
|
||||
#endif
|
||||
|
||||
src_ptr += src_stride; // Point to odd rows.
|
||||
// TODO(fbarchard): Loop through source height to allow odd height.
|
||||
for (int y = 0; y < dst_height; ++y) {
|
||||
ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width);
|
||||
|
||||
@ -62,7 +62,7 @@ static void ScaleARGBRowDown2_SSE2(const uint8* src_argb,
|
||||
movdqa xmm0, [eax]
|
||||
movdqa xmm1, [eax + 16]
|
||||
lea eax, [eax + 32]
|
||||
shufps xmm0, xmm1, 0x88
|
||||
shufps xmm0, xmm1, 0xdd
|
||||
sub ecx, 4
|
||||
movdqa [edx], xmm0
|
||||
lea edx, [edx + 16]
|
||||
@ -350,7 +350,7 @@ static void ScaleARGBRowDown2_SSE2(const uint8* src_argb,
|
||||
"movdqa (%0),%%xmm0 \n"
|
||||
"movdqa 0x10(%0),%%xmm1 \n"
|
||||
"lea 0x20(%0),%0 \n"
|
||||
"shufps $0x88,%%xmm1,%%xmm0 \n"
|
||||
"shufps $0xdd,%%xmm1,%%xmm0 \n"
|
||||
"sub $0x4,%2 \n"
|
||||
"movdqa %%xmm0,(%1) \n"
|
||||
"lea 0x10(%1),%1 \n"
|
||||
@ -634,13 +634,13 @@ static void ScaleARGBRowDown2_C(const uint8* src_argb,
|
||||
uint32* dst = reinterpret_cast<uint32*>(dst_argb);
|
||||
|
||||
for (int x = 0; x < dst_width - 1; x += 2) {
|
||||
dst[0] = src[0];
|
||||
dst[1] = src[2];
|
||||
dst[0] = src[1];
|
||||
dst[1] = src[3];
|
||||
src += 4;
|
||||
dst += 2;
|
||||
}
|
||||
if (dst_width & 1) {
|
||||
dst[0] = src[0];
|
||||
dst[0] = src[1];
|
||||
}
|
||||
}
|
||||
|
||||
@ -743,25 +743,26 @@ static void ScaleARGBDown2(int /* src_width */, int /* src_height */,
|
||||
FilterMode filtering) {
|
||||
assert(dx == 65536 * 2); // Test scale factor of 2.
|
||||
assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2.
|
||||
// Advance to odd row / even column.
|
||||
src_argb += (y >> 16) * src_stride + ((x >> 16) - 1) * 4;
|
||||
int row_stride = src_stride * (dy >> 16);
|
||||
void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
|
||||
uint8* dst_argb, int dst_width) =
|
||||
filtering ? ScaleARGBRowDown2Int_C : ScaleARGBRowDown2_C;
|
||||
#if defined(HAS_SCALEARGBROWDOWN2_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) &&
|
||||
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
|
||||
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(row_stride, 16) &&
|
||||
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
|
||||
ScaleARGBRowDown2 = filtering ? ScaleARGBRowDown2Int_SSE2 :
|
||||
ScaleARGBRowDown2_SSE2;
|
||||
}
|
||||
#elif defined(HAS_SCALEARGBROWDOWN2_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8) &&
|
||||
IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4)) {
|
||||
IS_ALIGNED(src_argb, 4) && IS_ALIGNED(row_stride, 4)) {
|
||||
ScaleARGBRowDown2 = filtering ? ScaleARGBRowDown2Int_NEON :
|
||||
ScaleARGBRowDown2_NEON;
|
||||
}
|
||||
#endif
|
||||
src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
|
||||
int row_stride = src_stride * (dy >> 16);
|
||||
|
||||
// TODO(fbarchard): Loop through source height to allow odd height.
|
||||
for (int y = 0; y < dst_height; ++y) {
|
||||
@ -782,6 +783,9 @@ static void ScaleARGBDownEven(int src_width, int src_height,
|
||||
FilterMode filtering) {
|
||||
assert(IS_ALIGNED(src_width, 2));
|
||||
assert(IS_ALIGNED(src_height, 2));
|
||||
int col_step = dx >> 16;
|
||||
int row_stride = (dy >> 16) * src_stride;
|
||||
src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
|
||||
void (*ScaleARGBRowDownEven)(const uint8* src_argb, ptrdiff_t src_stride,
|
||||
int src_step, uint8* dst_argb, int dst_width) =
|
||||
filtering ? ScaleARGBRowDownEvenInt_C : ScaleARGBRowDownEven_C;
|
||||
@ -798,9 +802,6 @@ static void ScaleARGBDownEven(int src_width, int src_height,
|
||||
ScaleARGBRowDownEven_NEON;
|
||||
}
|
||||
#endif
|
||||
int col_step = dx >> 16;
|
||||
int row_stride = (dy >> 16) * src_stride;
|
||||
src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
|
||||
|
||||
for (int y = 0; y < dst_height; ++y) {
|
||||
ScaleARGBRowDownEven(src_argb, src_stride, col_step, dst_argb, dst_width);
|
||||
|
||||
@ -27,8 +27,8 @@ void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t /* src_stride */,
|
||||
"vld2.u32 {q0, q1}, [%0]! \n"
|
||||
"vld2.u32 {q2, q3}, [%0]! \n"
|
||||
"subs %2, %2, #8 \n" // 8 processed per loop
|
||||
"vst1.u8 {q0}, [%1]! \n" // store even pixels
|
||||
"vst1.u8 {q2}, [%1]! \n"
|
||||
"vst1.u8 {q1}, [%1]! \n" // store odd pixels
|
||||
"vst1.u8 {q3}, [%1]! \n"
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_ptr), // %0
|
||||
"+r"(dst), // %1
|
||||
@ -78,6 +78,7 @@ void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t,
|
||||
int src_stepx,
|
||||
uint8* dst_argb, int dst_width) {
|
||||
asm volatile (
|
||||
"add %0, #4 \n" // point to odd pixels.
|
||||
"mov r12, %3, lsl #2 \n"
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
|
||||
@ -39,6 +39,7 @@ void ScaleRowDown2_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t /* src_stride */,
|
||||
"lw $t5, 20(%[src_ptr]) \n" // |23|22|21|20|
|
||||
"lw $t6, 24(%[src_ptr]) \n" // |27|26|25|24|
|
||||
"lw $t7, 28(%[src_ptr]) \n" // |31|30|29|28|
|
||||
// TODO(fbarchard): Use odd pixels instead of even.
|
||||
"precr.qb.ph $t8, $t1, $t0 \n" // |6|4|2|0|
|
||||
"precr.qb.ph $t0, $t3, $t2 \n" // |14|12|10|8|
|
||||
"precr.qb.ph $t1, $t5, $t4 \n" // |22|20|18|16|
|
||||
|
||||
@ -29,7 +29,7 @@ void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t /* src_stride */,
|
||||
// load even pixels into q0, odd into q1
|
||||
"vld2.u8 {q0,q1}, [%0]! \n"
|
||||
"subs %2, %2, #16 \n" // 16 processed per loop
|
||||
"vst1.u8 {q0}, [%1]! \n" // store even pixels
|
||||
"vst1.u8 {q1}, [%1]! \n" // store odd pixels
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_ptr), // %0
|
||||
"+r"(dst), // %1
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user