mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 01:06:46 +08:00
Specialized scale down sample to 1 / 2 size adjust to match general purpose code which uses odd pixel (rounded up - nearest neighbor).
BUG=223 TEST=out\Debug\convert.exe -f 0 faces_640x480_P420.yuv face2_320x240_P420.yuv R=johannkoenig@google.com Review URL: https://webrtc-codereview.appspot.com/1583005 git-svn-id: http://libyuv.googlecode.com/svn/trunk@708 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
83408b85e4
commit
8b54a8f9f2
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: http://code.google.com/p/libyuv/
|
URL: http://code.google.com/p/libyuv/
|
||||||
Version: 707
|
Version: 708
|
||||||
License: BSD
|
License: BSD
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
|
|
||||||
|
|||||||
@ -11,6 +11,6 @@
|
|||||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||||
#define INCLUDE_LIBYUV_VERSION_H_
|
#define INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
#define LIBYUV_VERSION 707
|
#define LIBYUV_VERSION 708
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||||
|
|||||||
@ -196,16 +196,14 @@ static void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
|||||||
// src_stride ignored
|
// src_stride ignored
|
||||||
mov edx, [esp + 12] // dst_ptr
|
mov edx, [esp + 12] // dst_ptr
|
||||||
mov ecx, [esp + 16] // dst_width
|
mov ecx, [esp + 16] // dst_width
|
||||||
pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
|
|
||||||
psrlw xmm5, 8
|
|
||||||
|
|
||||||
align 16
|
align 16
|
||||||
wloop:
|
wloop:
|
||||||
movdqa xmm0, [eax]
|
movdqa xmm0, [eax]
|
||||||
movdqa xmm1, [eax + 16]
|
movdqa xmm1, [eax + 16]
|
||||||
lea eax, [eax + 32]
|
lea eax, [eax + 32]
|
||||||
pand xmm0, xmm5
|
psrlw xmm0, 8 // isolate odd pixels.
|
||||||
pand xmm1, xmm5
|
psrlw xmm1, 8
|
||||||
packuswb xmm0, xmm1
|
packuswb xmm0, xmm1
|
||||||
sub ecx, 16
|
sub ecx, 16
|
||||||
movdqa [edx], xmm0
|
movdqa [edx], xmm0
|
||||||
@ -271,16 +269,14 @@ static void ScaleRowDown2_Unaligned_SSE2(const uint8* src_ptr,
|
|||||||
// src_stride ignored
|
// src_stride ignored
|
||||||
mov edx, [esp + 12] // dst_ptr
|
mov edx, [esp + 12] // dst_ptr
|
||||||
mov ecx, [esp + 16] // dst_width
|
mov ecx, [esp + 16] // dst_width
|
||||||
pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
|
|
||||||
psrlw xmm5, 8
|
|
||||||
|
|
||||||
align 16
|
align 16
|
||||||
wloop:
|
wloop:
|
||||||
movdqu xmm0, [eax]
|
movdqu xmm0, [eax]
|
||||||
movdqu xmm1, [eax + 16]
|
movdqu xmm1, [eax + 16]
|
||||||
lea eax, [eax + 32]
|
lea eax, [eax + 32]
|
||||||
pand xmm0, xmm5
|
psrlw xmm0, 8 // isolate odd pixels.
|
||||||
pand xmm1, xmm5
|
psrlw xmm1, 8
|
||||||
packuswb xmm0, xmm1
|
packuswb xmm0, xmm1
|
||||||
sub ecx, 16
|
sub ecx, 16
|
||||||
movdqu [edx], xmm0
|
movdqu [edx], xmm0
|
||||||
@ -1269,15 +1265,13 @@ static void ScaleFilterRows_Unaligned_SSSE3(uint8* dst_ptr,
|
|||||||
static void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
static void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||||
uint8* dst_ptr, int dst_width) {
|
uint8* dst_ptr, int dst_width) {
|
||||||
asm volatile (
|
asm volatile (
|
||||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
|
||||||
"psrlw $0x8,%%xmm5 \n"
|
|
||||||
".p2align 4 \n"
|
".p2align 4 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movdqa (%0),%%xmm0 \n"
|
"movdqa (%0),%%xmm0 \n"
|
||||||
"movdqa 0x10(%0),%%xmm1 \n"
|
"movdqa 0x10(%0),%%xmm1 \n"
|
||||||
"lea 0x20(%0),%0 \n"
|
"lea 0x20(%0),%0 \n"
|
||||||
"pand %%xmm5,%%xmm0 \n"
|
"psrlw $0x8,%%xmm0 \n"
|
||||||
"pand %%xmm5,%%xmm1 \n"
|
"psrlw $0x8,%%xmm1 \n"
|
||||||
"packuswb %%xmm1,%%xmm0 \n"
|
"packuswb %%xmm1,%%xmm0 \n"
|
||||||
"movdqa %%xmm0,(%1) \n"
|
"movdqa %%xmm0,(%1) \n"
|
||||||
"lea 0x10(%1),%1 \n"
|
"lea 0x10(%1),%1 \n"
|
||||||
@ -1289,7 +1283,7 @@ static void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
|||||||
:
|
:
|
||||||
: "memory", "cc"
|
: "memory", "cc"
|
||||||
#if defined(__SSE2__)
|
#if defined(__SSE2__)
|
||||||
, "xmm0", "xmm1", "xmm5"
|
, "xmm0", "xmm1"
|
||||||
#endif
|
#endif
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@ -1336,15 +1330,13 @@ static void ScaleRowDown2_Unaligned_SSE2(const uint8* src_ptr,
|
|||||||
ptrdiff_t src_stride,
|
ptrdiff_t src_stride,
|
||||||
uint8* dst_ptr, int dst_width) {
|
uint8* dst_ptr, int dst_width) {
|
||||||
asm volatile (
|
asm volatile (
|
||||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
|
||||||
"psrlw $0x8,%%xmm5 \n"
|
|
||||||
".p2align 4 \n"
|
".p2align 4 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movdqu (%0),%%xmm0 \n"
|
"movdqu (%0),%%xmm0 \n"
|
||||||
"movdqu 0x10(%0),%%xmm1 \n"
|
"movdqu 0x10(%0),%%xmm1 \n"
|
||||||
"lea 0x20(%0),%0 \n"
|
"lea 0x20(%0),%0 \n"
|
||||||
"pand %%xmm5,%%xmm0 \n"
|
"psrlw $0x8,%%xmm0 \n"
|
||||||
"pand %%xmm5,%%xmm1 \n"
|
"psrlw $0x8,%%xmm1 \n"
|
||||||
"packuswb %%xmm1,%%xmm0 \n"
|
"packuswb %%xmm1,%%xmm0 \n"
|
||||||
"movdqu %%xmm0,(%1) \n"
|
"movdqu %%xmm0,(%1) \n"
|
||||||
"lea 0x10(%1),%1 \n"
|
"lea 0x10(%1),%1 \n"
|
||||||
@ -1356,7 +1348,7 @@ static void ScaleRowDown2_Unaligned_SSE2(const uint8* src_ptr,
|
|||||||
:
|
:
|
||||||
: "memory", "cc"
|
: "memory", "cc"
|
||||||
#if defined(__SSE2__)
|
#if defined(__SSE2__)
|
||||||
, "xmm0", "xmm1", "xmm5"
|
, "xmm0", "xmm1"
|
||||||
#endif
|
#endif
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@ -2324,13 +2316,13 @@ static void ScaleRowDown2_C(const uint8* src_ptr, ptrdiff_t /* src_stride */,
|
|||||||
uint8* dst, int dst_width) {
|
uint8* dst, int dst_width) {
|
||||||
uint8* dend = dst + dst_width - 1;
|
uint8* dend = dst + dst_width - 1;
|
||||||
do {
|
do {
|
||||||
dst[0] = src_ptr[0];
|
dst[0] = src_ptr[1];
|
||||||
dst[1] = src_ptr[2];
|
dst[1] = src_ptr[3];
|
||||||
dst += 2;
|
dst += 2;
|
||||||
src_ptr += 4;
|
src_ptr += 4;
|
||||||
} while (dst < dend);
|
} while (dst < dend);
|
||||||
if (dst_width & 1) {
|
if (dst_width & 1) {
|
||||||
dst[0] = src_ptr[0];
|
dst[0] = src_ptr[1];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2689,6 +2681,7 @@ static void ScalePlaneDown2(int /* src_width */, int /* src_height */,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
src_ptr += src_stride; // Point to odd rows.
|
||||||
// TODO(fbarchard): Loop through source height to allow odd height.
|
// TODO(fbarchard): Loop through source height to allow odd height.
|
||||||
for (int y = 0; y < dst_height; ++y) {
|
for (int y = 0; y < dst_height; ++y) {
|
||||||
ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width);
|
ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width);
|
||||||
|
|||||||
@ -62,7 +62,7 @@ static void ScaleARGBRowDown2_SSE2(const uint8* src_argb,
|
|||||||
movdqa xmm0, [eax]
|
movdqa xmm0, [eax]
|
||||||
movdqa xmm1, [eax + 16]
|
movdqa xmm1, [eax + 16]
|
||||||
lea eax, [eax + 32]
|
lea eax, [eax + 32]
|
||||||
shufps xmm0, xmm1, 0x88
|
shufps xmm0, xmm1, 0xdd
|
||||||
sub ecx, 4
|
sub ecx, 4
|
||||||
movdqa [edx], xmm0
|
movdqa [edx], xmm0
|
||||||
lea edx, [edx + 16]
|
lea edx, [edx + 16]
|
||||||
@ -350,7 +350,7 @@ static void ScaleARGBRowDown2_SSE2(const uint8* src_argb,
|
|||||||
"movdqa (%0),%%xmm0 \n"
|
"movdqa (%0),%%xmm0 \n"
|
||||||
"movdqa 0x10(%0),%%xmm1 \n"
|
"movdqa 0x10(%0),%%xmm1 \n"
|
||||||
"lea 0x20(%0),%0 \n"
|
"lea 0x20(%0),%0 \n"
|
||||||
"shufps $0x88,%%xmm1,%%xmm0 \n"
|
"shufps $0xdd,%%xmm1,%%xmm0 \n"
|
||||||
"sub $0x4,%2 \n"
|
"sub $0x4,%2 \n"
|
||||||
"movdqa %%xmm0,(%1) \n"
|
"movdqa %%xmm0,(%1) \n"
|
||||||
"lea 0x10(%1),%1 \n"
|
"lea 0x10(%1),%1 \n"
|
||||||
@ -634,13 +634,13 @@ static void ScaleARGBRowDown2_C(const uint8* src_argb,
|
|||||||
uint32* dst = reinterpret_cast<uint32*>(dst_argb);
|
uint32* dst = reinterpret_cast<uint32*>(dst_argb);
|
||||||
|
|
||||||
for (int x = 0; x < dst_width - 1; x += 2) {
|
for (int x = 0; x < dst_width - 1; x += 2) {
|
||||||
dst[0] = src[0];
|
dst[0] = src[1];
|
||||||
dst[1] = src[2];
|
dst[1] = src[3];
|
||||||
src += 4;
|
src += 4;
|
||||||
dst += 2;
|
dst += 2;
|
||||||
}
|
}
|
||||||
if (dst_width & 1) {
|
if (dst_width & 1) {
|
||||||
dst[0] = src[0];
|
dst[0] = src[1];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -743,25 +743,26 @@ static void ScaleARGBDown2(int /* src_width */, int /* src_height */,
|
|||||||
FilterMode filtering) {
|
FilterMode filtering) {
|
||||||
assert(dx == 65536 * 2); // Test scale factor of 2.
|
assert(dx == 65536 * 2); // Test scale factor of 2.
|
||||||
assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2.
|
assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2.
|
||||||
|
// Advance to odd row / even column.
|
||||||
|
src_argb += (y >> 16) * src_stride + ((x >> 16) - 1) * 4;
|
||||||
|
int row_stride = src_stride * (dy >> 16);
|
||||||
void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
|
void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
|
||||||
uint8* dst_argb, int dst_width) =
|
uint8* dst_argb, int dst_width) =
|
||||||
filtering ? ScaleARGBRowDown2Int_C : ScaleARGBRowDown2_C;
|
filtering ? ScaleARGBRowDown2Int_C : ScaleARGBRowDown2_C;
|
||||||
#if defined(HAS_SCALEARGBROWDOWN2_SSE2)
|
#if defined(HAS_SCALEARGBROWDOWN2_SSE2)
|
||||||
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) &&
|
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) &&
|
||||||
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
|
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(row_stride, 16) &&
|
||||||
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
|
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
|
||||||
ScaleARGBRowDown2 = filtering ? ScaleARGBRowDown2Int_SSE2 :
|
ScaleARGBRowDown2 = filtering ? ScaleARGBRowDown2Int_SSE2 :
|
||||||
ScaleARGBRowDown2_SSE2;
|
ScaleARGBRowDown2_SSE2;
|
||||||
}
|
}
|
||||||
#elif defined(HAS_SCALEARGBROWDOWN2_NEON)
|
#elif defined(HAS_SCALEARGBROWDOWN2_NEON)
|
||||||
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8) &&
|
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8) &&
|
||||||
IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4)) {
|
IS_ALIGNED(src_argb, 4) && IS_ALIGNED(row_stride, 4)) {
|
||||||
ScaleARGBRowDown2 = filtering ? ScaleARGBRowDown2Int_NEON :
|
ScaleARGBRowDown2 = filtering ? ScaleARGBRowDown2Int_NEON :
|
||||||
ScaleARGBRowDown2_NEON;
|
ScaleARGBRowDown2_NEON;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
|
|
||||||
int row_stride = src_stride * (dy >> 16);
|
|
||||||
|
|
||||||
// TODO(fbarchard): Loop through source height to allow odd height.
|
// TODO(fbarchard): Loop through source height to allow odd height.
|
||||||
for (int y = 0; y < dst_height; ++y) {
|
for (int y = 0; y < dst_height; ++y) {
|
||||||
@ -782,6 +783,9 @@ static void ScaleARGBDownEven(int src_width, int src_height,
|
|||||||
FilterMode filtering) {
|
FilterMode filtering) {
|
||||||
assert(IS_ALIGNED(src_width, 2));
|
assert(IS_ALIGNED(src_width, 2));
|
||||||
assert(IS_ALIGNED(src_height, 2));
|
assert(IS_ALIGNED(src_height, 2));
|
||||||
|
int col_step = dx >> 16;
|
||||||
|
int row_stride = (dy >> 16) * src_stride;
|
||||||
|
src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
|
||||||
void (*ScaleARGBRowDownEven)(const uint8* src_argb, ptrdiff_t src_stride,
|
void (*ScaleARGBRowDownEven)(const uint8* src_argb, ptrdiff_t src_stride,
|
||||||
int src_step, uint8* dst_argb, int dst_width) =
|
int src_step, uint8* dst_argb, int dst_width) =
|
||||||
filtering ? ScaleARGBRowDownEvenInt_C : ScaleARGBRowDownEven_C;
|
filtering ? ScaleARGBRowDownEvenInt_C : ScaleARGBRowDownEven_C;
|
||||||
@ -798,9 +802,6 @@ static void ScaleARGBDownEven(int src_width, int src_height,
|
|||||||
ScaleARGBRowDownEven_NEON;
|
ScaleARGBRowDownEven_NEON;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
int col_step = dx >> 16;
|
|
||||||
int row_stride = (dy >> 16) * src_stride;
|
|
||||||
src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
|
|
||||||
|
|
||||||
for (int y = 0; y < dst_height; ++y) {
|
for (int y = 0; y < dst_height; ++y) {
|
||||||
ScaleARGBRowDownEven(src_argb, src_stride, col_step, dst_argb, dst_width);
|
ScaleARGBRowDownEven(src_argb, src_stride, col_step, dst_argb, dst_width);
|
||||||
|
|||||||
@ -27,8 +27,8 @@ void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t /* src_stride */,
|
|||||||
"vld2.u32 {q0, q1}, [%0]! \n"
|
"vld2.u32 {q0, q1}, [%0]! \n"
|
||||||
"vld2.u32 {q2, q3}, [%0]! \n"
|
"vld2.u32 {q2, q3}, [%0]! \n"
|
||||||
"subs %2, %2, #8 \n" // 8 processed per loop
|
"subs %2, %2, #8 \n" // 8 processed per loop
|
||||||
"vst1.u8 {q0}, [%1]! \n" // store even pixels
|
"vst1.u8 {q1}, [%1]! \n" // store odd pixels
|
||||||
"vst1.u8 {q2}, [%1]! \n"
|
"vst1.u8 {q3}, [%1]! \n"
|
||||||
"bgt 1b \n"
|
"bgt 1b \n"
|
||||||
: "+r"(src_ptr), // %0
|
: "+r"(src_ptr), // %0
|
||||||
"+r"(dst), // %1
|
"+r"(dst), // %1
|
||||||
@ -78,6 +78,7 @@ void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t,
|
|||||||
int src_stepx,
|
int src_stepx,
|
||||||
uint8* dst_argb, int dst_width) {
|
uint8* dst_argb, int dst_width) {
|
||||||
asm volatile (
|
asm volatile (
|
||||||
|
"add %0, #4 \n" // point to odd pixels.
|
||||||
"mov r12, %3, lsl #2 \n"
|
"mov r12, %3, lsl #2 \n"
|
||||||
".p2align 2 \n"
|
".p2align 2 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
|
|||||||
@ -39,6 +39,7 @@ void ScaleRowDown2_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t /* src_stride */,
|
|||||||
"lw $t5, 20(%[src_ptr]) \n" // |23|22|21|20|
|
"lw $t5, 20(%[src_ptr]) \n" // |23|22|21|20|
|
||||||
"lw $t6, 24(%[src_ptr]) \n" // |27|26|25|24|
|
"lw $t6, 24(%[src_ptr]) \n" // |27|26|25|24|
|
||||||
"lw $t7, 28(%[src_ptr]) \n" // |31|30|29|28|
|
"lw $t7, 28(%[src_ptr]) \n" // |31|30|29|28|
|
||||||
|
// TODO(fbarchard): Use odd pixels instead of even.
|
||||||
"precr.qb.ph $t8, $t1, $t0 \n" // |6|4|2|0|
|
"precr.qb.ph $t8, $t1, $t0 \n" // |6|4|2|0|
|
||||||
"precr.qb.ph $t0, $t3, $t2 \n" // |14|12|10|8|
|
"precr.qb.ph $t0, $t3, $t2 \n" // |14|12|10|8|
|
||||||
"precr.qb.ph $t1, $t5, $t4 \n" // |22|20|18|16|
|
"precr.qb.ph $t1, $t5, $t4 \n" // |22|20|18|16|
|
||||||
|
|||||||
@ -29,7 +29,7 @@ void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t /* src_stride */,
|
|||||||
// load even pixels into q0, odd into q1
|
// load even pixels into q0, odd into q1
|
||||||
"vld2.u8 {q0,q1}, [%0]! \n"
|
"vld2.u8 {q0,q1}, [%0]! \n"
|
||||||
"subs %2, %2, #16 \n" // 16 processed per loop
|
"subs %2, %2, #16 \n" // 16 processed per loop
|
||||||
"vst1.u8 {q0}, [%1]! \n" // store even pixels
|
"vst1.u8 {q1}, [%1]! \n" // store odd pixels
|
||||||
"bgt 1b \n"
|
"bgt 1b \n"
|
||||||
: "+r"(src_ptr), // %0
|
: "+r"(src_ptr), // %0
|
||||||
"+r"(dst), // %1
|
"+r"(dst), // %1
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user