mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 01:06:46 +08:00
scale to 3/4 or 3/8 with odd width destinations efficiently. previously if width was not multiple of what the simd loop would do (24), scaling would fall back on slower C code. This change allows SIMD to be used for most of the scaling and C for the remainder, improving efficiency.
BUG=314 TESTED=set LIBYUV_WIDTH=1896 & ScaleDownBy3by4_* R=tpsiaki@google.com Review URL: https://webrtc-codereview.appspot.com/48249004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@1380 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
1ffb04b43e
commit
4e78b8dc2e
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: http://code.google.com/p/libyuv/
|
URL: http://code.google.com/p/libyuv/
|
||||||
Version: 1378
|
Version: 1379
|
||||||
License: BSD
|
License: BSD
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
|
|
||||||
|
|||||||
@ -261,6 +261,22 @@ void ScaleRowDown4_Any_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
|||||||
uint8* dst_ptr, int dst_width);
|
uint8* dst_ptr, int dst_width);
|
||||||
void ScaleRowDown4Box_Any_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
void ScaleRowDown4Box_Any_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||||
uint8* dst_ptr, int dst_width);
|
uint8* dst_ptr, int dst_width);
|
||||||
|
void ScaleRowDown34_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||||
|
uint8* dst_ptr, int dst_width);
|
||||||
|
void ScaleRowDown34_1_Box_Any_SSSE3(const uint8* src_ptr,
|
||||||
|
ptrdiff_t src_stride,
|
||||||
|
uint8* dst_ptr, int dst_width);
|
||||||
|
void ScaleRowDown34_0_Box_Any_SSSE3(const uint8* src_ptr,
|
||||||
|
ptrdiff_t src_stride,
|
||||||
|
uint8* dst_ptr, int dst_width);
|
||||||
|
void ScaleRowDown38_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||||
|
uint8* dst_ptr, int dst_width);
|
||||||
|
void ScaleRowDown38_3_Box_Any_SSSE3(const uint8* src_ptr,
|
||||||
|
ptrdiff_t src_stride,
|
||||||
|
uint8* dst_ptr, int dst_width);
|
||||||
|
void ScaleRowDown38_2_Box_Any_SSSE3(const uint8* src_ptr,
|
||||||
|
ptrdiff_t src_stride,
|
||||||
|
uint8* dst_ptr, int dst_width);
|
||||||
|
|
||||||
void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||||
uint16* dst_ptr, int src_width, int src_height);
|
uint16* dst_ptr, int src_width, int src_height);
|
||||||
@ -358,6 +374,21 @@ void ScaleRowDown4_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
|||||||
uint8* dst_ptr, int dst_width);
|
uint8* dst_ptr, int dst_width);
|
||||||
void ScaleRowDown4Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
void ScaleRowDown4Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||||
uint8* dst_ptr, int dst_width);
|
uint8* dst_ptr, int dst_width);
|
||||||
|
void ScaleRowDown34_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||||
|
uint8* dst_ptr, int dst_width);
|
||||||
|
void ScaleRowDown34_0_Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||||
|
uint8* dst_ptr, int dst_width);
|
||||||
|
void ScaleRowDown34_1_Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||||
|
uint8* dst_ptr, int dst_width);
|
||||||
|
// 32 -> 12
|
||||||
|
void ScaleRowDown38_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||||
|
uint8* dst_ptr, int dst_width);
|
||||||
|
// 32x3 -> 12x1
|
||||||
|
void ScaleRowDown38_3_Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||||
|
uint8* dst_ptr, int dst_width);
|
||||||
|
// 32x2 -> 12x1
|
||||||
|
void ScaleRowDown38_2_Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||||
|
uint8* dst_ptr, int dst_width);
|
||||||
|
|
||||||
void ScaleAddRows_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
void ScaleAddRows_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||||
uint16* dst_ptr, int src_width, int src_height);
|
uint16* dst_ptr, int src_width, int src_height);
|
||||||
|
|||||||
@ -11,6 +11,6 @@
|
|||||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||||
#define INCLUDE_LIBYUV_VERSION_H_
|
#define INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
#define LIBYUV_VERSION 1378
|
#define LIBYUV_VERSION 1379
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||||
|
|||||||
@ -277,7 +277,15 @@ static void ScalePlaneDown34(int src_width, int src_height,
|
|||||||
ScaleRowDown34_1 = ScaleRowDown34_1_Box_C;
|
ScaleRowDown34_1 = ScaleRowDown34_1_Box_C;
|
||||||
}
|
}
|
||||||
#if defined(HAS_SCALEROWDOWN34_NEON)
|
#if defined(HAS_SCALEROWDOWN34_NEON)
|
||||||
if (TestCpuFlag(kCpuHasNEON) && (dst_width % 24 == 0)) {
|
if (TestCpuFlag(kCpuHasNEON)) {
|
||||||
|
if (!filtering) {
|
||||||
|
ScaleRowDown34_0 = ScaleRowDown34_Any_NEON;
|
||||||
|
ScaleRowDown34_1 = ScaleRowDown34_Any_NEON;
|
||||||
|
} else {
|
||||||
|
ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_NEON;
|
||||||
|
ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_NEON;
|
||||||
|
}
|
||||||
|
if (dst_width % 24 == 0) {
|
||||||
if (!filtering) {
|
if (!filtering) {
|
||||||
ScaleRowDown34_0 = ScaleRowDown34_NEON;
|
ScaleRowDown34_0 = ScaleRowDown34_NEON;
|
||||||
ScaleRowDown34_1 = ScaleRowDown34_NEON;
|
ScaleRowDown34_1 = ScaleRowDown34_NEON;
|
||||||
@ -286,9 +294,18 @@ static void ScalePlaneDown34(int src_width, int src_height,
|
|||||||
ScaleRowDown34_1 = ScaleRowDown34_1_Box_NEON;
|
ScaleRowDown34_1 = ScaleRowDown34_1_Box_NEON;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_SCALEROWDOWN34_SSSE3)
|
#if defined(HAS_SCALEROWDOWN34_SSSE3)
|
||||||
if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) {
|
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||||
|
if (!filtering) {
|
||||||
|
ScaleRowDown34_0 = ScaleRowDown34_Any_SSSE3;
|
||||||
|
ScaleRowDown34_1 = ScaleRowDown34_Any_SSSE3;
|
||||||
|
} else {
|
||||||
|
ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_SSSE3;
|
||||||
|
ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_SSSE3;
|
||||||
|
}
|
||||||
|
if (dst_width % 24 == 0) {
|
||||||
if (!filtering) {
|
if (!filtering) {
|
||||||
ScaleRowDown34_0 = ScaleRowDown34_SSSE3;
|
ScaleRowDown34_0 = ScaleRowDown34_SSSE3;
|
||||||
ScaleRowDown34_1 = ScaleRowDown34_SSSE3;
|
ScaleRowDown34_1 = ScaleRowDown34_SSSE3;
|
||||||
@ -297,6 +314,7 @@ static void ScalePlaneDown34(int src_width, int src_height,
|
|||||||
ScaleRowDown34_1 = ScaleRowDown34_1_Box_SSSE3;
|
ScaleRowDown34_1 = ScaleRowDown34_1_Box_SSSE3;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_SCALEROWDOWN34_MIPS_DSPR2)
|
#if defined(HAS_SCALEROWDOWN34_MIPS_DSPR2)
|
||||||
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 24 == 0) &&
|
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 24 == 0) &&
|
||||||
@ -450,8 +468,17 @@ static void ScalePlaneDown38(int src_width, int src_height,
|
|||||||
ScaleRowDown38_3 = ScaleRowDown38_3_Box_C;
|
ScaleRowDown38_3 = ScaleRowDown38_3_Box_C;
|
||||||
ScaleRowDown38_2 = ScaleRowDown38_2_Box_C;
|
ScaleRowDown38_2 = ScaleRowDown38_2_Box_C;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(HAS_SCALEROWDOWN38_NEON)
|
#if defined(HAS_SCALEROWDOWN38_NEON)
|
||||||
if (TestCpuFlag(kCpuHasNEON) && (dst_width % 12 == 0)) {
|
if (TestCpuFlag(kCpuHasNEON)) {
|
||||||
|
if (!filtering) {
|
||||||
|
ScaleRowDown38_3 = ScaleRowDown38_Any_NEON;
|
||||||
|
ScaleRowDown38_2 = ScaleRowDown38_Any_NEON;
|
||||||
|
} else {
|
||||||
|
ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_NEON;
|
||||||
|
ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_NEON;
|
||||||
|
}
|
||||||
|
if (dst_width % 12 == 0) {
|
||||||
if (!filtering) {
|
if (!filtering) {
|
||||||
ScaleRowDown38_3 = ScaleRowDown38_NEON;
|
ScaleRowDown38_3 = ScaleRowDown38_NEON;
|
||||||
ScaleRowDown38_2 = ScaleRowDown38_NEON;
|
ScaleRowDown38_2 = ScaleRowDown38_NEON;
|
||||||
@ -460,13 +487,22 @@ static void ScalePlaneDown38(int src_width, int src_height,
|
|||||||
ScaleRowDown38_2 = ScaleRowDown38_2_Box_NEON;
|
ScaleRowDown38_2 = ScaleRowDown38_2_Box_NEON;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_SCALEROWDOWN38_SSSE3)
|
#if defined(HAS_SCALEROWDOWN38_SSSE3)
|
||||||
if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) {
|
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||||
if (!filtering) {
|
if (!filtering) {
|
||||||
|
ScaleRowDown38_3 = ScaleRowDown38_Any_SSSE3;
|
||||||
|
ScaleRowDown38_2 = ScaleRowDown38_Any_SSSE3;
|
||||||
|
} else {
|
||||||
|
ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_SSSE3;
|
||||||
|
ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_SSSE3;
|
||||||
|
}
|
||||||
|
if (dst_width % 12 == 0 && !filtering) {
|
||||||
ScaleRowDown38_3 = ScaleRowDown38_SSSE3;
|
ScaleRowDown38_3 = ScaleRowDown38_SSSE3;
|
||||||
ScaleRowDown38_2 = ScaleRowDown38_SSSE3;
|
ScaleRowDown38_2 = ScaleRowDown38_SSSE3;
|
||||||
} else {
|
}
|
||||||
|
if (dst_width % 6 == 0 && filtering) {
|
||||||
ScaleRowDown38_3 = ScaleRowDown38_3_Box_SSSE3;
|
ScaleRowDown38_3 = ScaleRowDown38_3_Box_SSSE3;
|
||||||
ScaleRowDown38_2 = ScaleRowDown38_2_Box_SSSE3;
|
ScaleRowDown38_2 = ScaleRowDown38_2_Box_SSSE3;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -87,6 +87,40 @@ SDANY(ScaleRowDown4_Any_NEON, ScaleRowDown4_NEON, ScaleRowDown4_C, 4, 1, 7)
|
|||||||
SDANY(ScaleRowDown4Box_Any_NEON, ScaleRowDown4Box_NEON, ScaleRowDown4Box_C,
|
SDANY(ScaleRowDown4Box_Any_NEON, ScaleRowDown4Box_NEON, ScaleRowDown4Box_C,
|
||||||
4, 1, 7)
|
4, 1, 7)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef HAS_SCALEROWDOWN34_SSSE3
|
||||||
|
SDANY(ScaleRowDown34_Any_SSSE3, ScaleRowDown34_SSSE3, ScaleRowDown34_C,
|
||||||
|
3 / 4, 1, 23)
|
||||||
|
SDANY(ScaleRowDown34_0_Box_Any_SSSE3, ScaleRowDown34_0_Box_SSSE3,
|
||||||
|
ScaleRowDown34_0_Box_C, 3 / 4, 1, 23)
|
||||||
|
SDANY(ScaleRowDown34_1_Box_Any_SSSE3, ScaleRowDown34_1_Box_SSSE3,
|
||||||
|
ScaleRowDown34_1_Box_C, 3 / 4, 1, 23)
|
||||||
|
#endif
|
||||||
|
#ifdef HAS_SCALEROWDOWN34_NEON
|
||||||
|
SDANY(ScaleRowDown34_Any_NEON, ScaleRowDown34_NEON, ScaleRowDown34_C,
|
||||||
|
3 / 4, 1, 23)
|
||||||
|
SDANY(ScaleRowDown34_0_Box_Any_NEON, ScaleRowDown34_0_Box_NEON,
|
||||||
|
ScaleRowDown34_0_Box_C, 3 / 4, 1, 23)
|
||||||
|
SDANY(ScaleRowDown34_1_Box_Any_NEON, ScaleRowDown34_1_Box_NEON,
|
||||||
|
ScaleRowDown34_1_Box_C, 3 / 4, 1, 23)
|
||||||
|
#endif
|
||||||
|
#ifdef HAS_SCALEROWDOWN38_SSSE3
|
||||||
|
SDANY(ScaleRowDown38_Any_SSSE3, ScaleRowDown38_SSSE3, ScaleRowDown38_C,
|
||||||
|
3 / 4, 1, 11)
|
||||||
|
SDANY(ScaleRowDown38_3_Box_Any_SSSE3, ScaleRowDown38_3_Box_SSSE3,
|
||||||
|
ScaleRowDown38_3_Box_C, 3 / 4, 1, 5)
|
||||||
|
SDANY(ScaleRowDown38_2_Box_Any_SSSE3, ScaleRowDown38_2_Box_SSSE3,
|
||||||
|
ScaleRowDown38_2_Box_C, 3 / 4, 1, 5)
|
||||||
|
#endif
|
||||||
|
#ifdef HAS_SCALEROWDOWN38_NEON
|
||||||
|
SDANY(ScaleRowDown38_Any_NEON, ScaleRowDown38_NEON, ScaleRowDown38_C,
|
||||||
|
3 / 4, 1, 11)
|
||||||
|
SDANY(ScaleRowDown38_3_Box_Any_NEON, ScaleRowDown38_3_Box_NEON,
|
||||||
|
ScaleRowDown38_3_Box_C, 3 / 4, 1, 11)
|
||||||
|
SDANY(ScaleRowDown38_2_Box_Any_NEON, ScaleRowDown38_2_Box_NEON,
|
||||||
|
ScaleRowDown38_2_Box_C, 3 / 4, 1, 11)
|
||||||
|
#endif
|
||||||
#undef SDANY
|
#undef SDANY
|
||||||
|
|
||||||
// Fixed scale down.
|
// Fixed scale down.
|
||||||
|
|||||||
@ -400,7 +400,6 @@ void ScaleRowDown4Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
|||||||
// Produces three 8 byte values. For each 8 bytes, 16 bytes are read.
|
// Produces three 8 byte values. For each 8 bytes, 16 bytes are read.
|
||||||
// Then shuffled to do the scaling.
|
// Then shuffled to do the scaling.
|
||||||
|
|
||||||
// Note that movdqa+palign may be better than movdqu.
|
|
||||||
__declspec(naked)
|
__declspec(naked)
|
||||||
void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||||
uint8* dst_ptr, int dst_width) {
|
uint8* dst_ptr, int dst_width) {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user