odd width support for scale by even scale factor and box scale down by 4. scale down by 4 uses scale down by 2 internally.

BUG=431
TESTED=libyuvTest.ARGBScaleDownBy4_Bilinear

Review URL: https://webrtc-codereview.appspot.com/57399004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@1412 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
fbarchard@google.com 2015-05-26 17:56:51 +00:00
parent 80be2468b0
commit 7c09264ffc
5 changed files with 84 additions and 31 deletions

View File

@ -1,6 +1,6 @@
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 1410 Version: 1412
License: BSD License: BSD
License File: LICENSE License File: LICENSE

View File

@ -313,9 +313,12 @@ void ScaleARGBColsUp2_SSE2(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx); int dst_width, int x, int dx);
void ScaleARGBFilterCols_NEON(uint8* dst_argb, const uint8* src_argb, void ScaleARGBFilterCols_NEON(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx); int dst_width, int x, int dx);
void ScaleARGBCols_NEON(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx);
void ScaleARGBFilterCols_Any_NEON(uint8* dst_argb, const uint8* src_argb, void ScaleARGBFilterCols_Any_NEON(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx); int dst_width, int x, int dx);
void ScaleARGBCols_Any_NEON(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx);
// ARGB Row functions // ARGB Row functions
void ScaleARGBRowDown2_SSE2(const uint8* src_argb, ptrdiff_t src_stride, void ScaleARGBRowDown2_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
@ -324,28 +327,12 @@ void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
uint8* dst_argb, int dst_width); uint8* dst_argb, int dst_width);
void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb, ptrdiff_t src_stride, void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
uint8* dst_argb, int dst_width); uint8* dst_argb, int dst_width);
void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
int src_stepx, uint8* dst_argb, int dst_width);
void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
int src_stepx,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width); uint8* dst, int dst_width);
void ScaleARGBRowDown2Linear_NEON(const uint8* src_argb, ptrdiff_t src_stride, void ScaleARGBRowDown2Linear_NEON(const uint8* src_argb, ptrdiff_t src_stride,
uint8* dst_argb, int dst_width); uint8* dst_argb, int dst_width);
void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width); uint8* dst, int dst_width);
void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t src_stride,
int src_stepx,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride,
int src_stepx,
uint8* dst_argb, int dst_width);
void ScaleARGBCols_NEON(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx);
void ScaleARGBCols_Any_NEON(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx);
void ScaleARGBRowDown2_Any_SSE2(const uint8* src_argb, ptrdiff_t src_stride, void ScaleARGBRowDown2_Any_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
uint8* dst_argb, int dst_width); uint8* dst_argb, int dst_width);
void ScaleARGBRowDown2Linear_Any_SSE2(const uint8* src_argb, void ScaleARGBRowDown2Linear_Any_SSE2(const uint8* src_argb,
@ -361,6 +348,32 @@ void ScaleARGBRowDown2Linear_Any_NEON(const uint8* src_argb,
void ScaleARGBRowDown2Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleARGBRowDown2Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width); uint8* dst, int dst_width);
void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
int src_stepx, uint8* dst_argb, int dst_width);
void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
int src_stepx,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t src_stride,
int src_stepx,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride,
int src_stepx,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDownEven_Any_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
int src_stepx,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDownEvenBox_Any_SSE2(const uint8* src_argb,
ptrdiff_t src_stride,
int src_stepx,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDownEven_Any_NEON(const uint8* src_argb, ptrdiff_t src_stride,
int src_stepx,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDownEvenBox_Any_NEON(const uint8* src_argb,
ptrdiff_t src_stride,
int src_stepx,
uint8* dst_argb, int dst_width);
// ScaleRowDown2Box also used by planar functions // ScaleRowDown2Box also used by planar functions
// NEON downscalers with interpolation. // NEON downscalers with interpolation.

View File

@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1410 #define LIBYUV_VERSION 1412
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT

View File

@ -140,10 +140,35 @@ SDANY(ScaleARGBRowDown2Linear_Any_NEON, ScaleARGBRowDown2Linear_NEON,
SDANY(ScaleARGBRowDown2Box_Any_NEON, ScaleARGBRowDown2Box_NEON, SDANY(ScaleARGBRowDown2Box_Any_NEON, ScaleARGBRowDown2Box_NEON,
ScaleARGBRowDown2Box_C, 2, 4, 7) ScaleARGBRowDown2Box_C, 2, 4, 7)
#endif #endif
#undef SDANY #undef SDANY
// Fixed scale down. // Scale down by even scale factor.
#define SDAANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, BPP, MASK) \
void NAMEANY(const uint8* src_ptr, ptrdiff_t src_stride, int src_stepx, \
uint8* dst_ptr, int dst_width) { \
int r = (int)((unsigned int)dst_width % (MASK + 1)); \
int n = dst_width - r; \
if (n > 0) { \
SCALEROWDOWN_SIMD(src_ptr, src_stride, src_stepx, dst_ptr, n); \
} \
SCALEROWDOWN_C(src_ptr + (n * src_stepx) * BPP, src_stride, \
src_stepx, dst_ptr + n * BPP, r); \
}
#ifdef HAS_SCALEARGBROWDOWNEVEN_SSE2
SDAANY(ScaleARGBRowDownEven_Any_SSE2, ScaleARGBRowDownEven_SSE2,
ScaleARGBRowDownEven_C, 4, 3)
SDAANY(ScaleARGBRowDownEvenBox_Any_SSE2, ScaleARGBRowDownEvenBox_SSE2,
ScaleARGBRowDownEvenBox_C, 4, 3)
#endif
#ifdef HAS_SCALEARGBROWDOWNEVEN_NEON
SDAANY(ScaleARGBRowDownEven_Any_NEON, ScaleARGBRowDownEven_NEON,
ScaleARGBRowDownEven_C, 4, 3)
SDAANY(ScaleARGBRowDownEvenBox_Any_NEON, ScaleARGBRowDownEvenBox_NEON,
ScaleARGBRowDownEvenBox_C, 4, 3)
#endif
// Add rows box filter scale down.
#define SAANY(NAMEANY, SCALEADDROWS_SIMD, SCALEADDROWS_C, MASK) \ #define SAANY(NAMEANY, SCALEADDROWS_SIMD, SCALEADDROWS_C, MASK) \
void NAMEANY(const uint8* src_ptr, ptrdiff_t src_stride, \ void NAMEANY(const uint8* src_ptr, ptrdiff_t src_stride, \
uint16* dst_ptr, int src_width, int src_height) { \ uint16* dst_ptr, int src_width, int src_height) { \

View File

@ -107,15 +107,22 @@ static void ScaleARGBDown4Box(int src_width, int src_height,
assert(dx == 65536 * 4); // Test scale factor of 4. assert(dx == 65536 * 4); // Test scale factor of 4.
assert((dy & 0x3ffff) == 0); // Test vertical scale is multiple of 4. assert((dy & 0x3ffff) == 0); // Test vertical scale is multiple of 4.
#if defined(HAS_SCALEARGBROWDOWN2_SSE2) #if defined(HAS_SCALEARGBROWDOWN2_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4)) { if (TestCpuFlag(kCpuHasSSE2)) {
ScaleARGBRowDown2 = ScaleARGBRowDown2Box_Any_SSE2;
if (IS_ALIGNED(dst_width, 4)) {
ScaleARGBRowDown2 = ScaleARGBRowDown2Box_SSE2; ScaleARGBRowDown2 = ScaleARGBRowDown2Box_SSE2;
} }
}
#endif #endif
#if defined(HAS_SCALEARGBROWDOWN2_NEON) #if defined(HAS_SCALEARGBROWDOWN2_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8)) { if (TestCpuFlag(kCpuHasNEON)) {
ScaleARGBRowDown2 = ScaleARGBRowDown2Box_Any_NEON;
if (IS_ALIGNED(dst_width, 8)) {
ScaleARGBRowDown2 = ScaleARGBRowDown2Box_NEON; ScaleARGBRowDown2 = ScaleARGBRowDown2Box_NEON;
} }
}
#endif #endif
for (j = 0; j < dst_height; ++j) { for (j = 0; j < dst_height; ++j) {
ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2); ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2);
ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride, ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride,
@ -146,16 +153,24 @@ static void ScaleARGBDownEven(int src_width, int src_height,
assert(IS_ALIGNED(src_height, 2)); assert(IS_ALIGNED(src_height, 2));
src_argb += (y >> 16) * src_stride + (x >> 16) * 4; src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
#if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2) #if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4)) { if (TestCpuFlag(kCpuHasSSE2)) {
ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_SSE2 :
ScaleARGBRowDownEven_Any_SSE2;
if (IS_ALIGNED(dst_width, 4)) {
ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_SSE2 : ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_SSE2 :
ScaleARGBRowDownEven_SSE2; ScaleARGBRowDownEven_SSE2;
} }
}
#endif #endif
#if defined(HAS_SCALEARGBROWDOWNEVEN_NEON) #if defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 4)) { if (TestCpuFlag(kCpuHasNEON)) {
ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_NEON :
ScaleARGBRowDownEven_Any_NEON;
if (IS_ALIGNED(dst_width, 4)) {
ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_NEON : ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_NEON :
ScaleARGBRowDownEven_NEON; ScaleARGBRowDownEven_NEON;
} }
}
#endif #endif
if (filtering == kFilterLinear) { if (filtering == kFilterLinear) {