mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
Handle scale down by factor of 2 efficiently by calling SIMD for multiple of 16 destination pixels, and C for remainder.
BUG=314 TESTED=out\release\libyuv_unittest.exe --gtest_catch_exceptions=0 --gtest_filter=*.ScaleDownBy2* R=bcornell@google.com Review URL: https://webrtc-codereview.appspot.com/48689004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@1344 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
0d3bfab6db
commit
d41fbf40dd
@ -222,6 +222,12 @@ void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr,
|
|||||||
void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr,
|
void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr,
|
||||||
ptrdiff_t src_stride,
|
ptrdiff_t src_stride,
|
||||||
uint8* dst_ptr, int dst_width);
|
uint8* dst_ptr, int dst_width);
|
||||||
|
void ScaleRowDown2_Any_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||||
|
uint8* dst_ptr, int dst_width);
|
||||||
|
void ScaleRowDown2Linear_Any_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||||
|
uint8* dst_ptr, int dst_width);
|
||||||
|
void ScaleRowDown2Box_Any_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||||
|
uint8* dst_ptr, int dst_width);
|
||||||
void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||||
uint16* dst_ptr, int src_width,
|
uint16* dst_ptr, int src_width,
|
||||||
int src_height);
|
int src_height);
|
||||||
@ -271,10 +277,8 @@ void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
|||||||
// Note - not static due to reuse in convert for 444 to 420.
|
// Note - not static due to reuse in convert for 444 to 420.
|
||||||
void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||||
uint8* dst, int dst_width);
|
uint8* dst, int dst_width);
|
||||||
|
|
||||||
void ScaleRowDown2Linear_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
void ScaleRowDown2Linear_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||||
uint8* dst, int dst_width);
|
uint8* dst, int dst_width);
|
||||||
|
|
||||||
void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||||
uint8* dst, int dst_width);
|
uint8* dst, int dst_width);
|
||||||
|
|
||||||
@ -309,6 +313,13 @@ void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
|
|||||||
ptrdiff_t src_stride,
|
ptrdiff_t src_stride,
|
||||||
uint8* dst_ptr, int dst_width);
|
uint8* dst_ptr, int dst_width);
|
||||||
|
|
||||||
|
void ScaleRowDown2_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||||
|
uint8* dst, int dst_width);
|
||||||
|
void ScaleRowDown2Linear_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||||
|
uint8* dst, int dst_width);
|
||||||
|
void ScaleRowDown2Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||||
|
uint8* dst, int dst_width);
|
||||||
|
|
||||||
void ScaleAddRows_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
void ScaleAddRows_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||||
uint16* dst_ptr, int src_width, int src_height);
|
uint16* dst_ptr, int src_width, int src_height);
|
||||||
|
|
||||||
|
|||||||
@ -54,18 +54,28 @@ static void ScalePlaneDown2(int src_width, int src_height,
|
|||||||
}
|
}
|
||||||
|
|
||||||
#if defined(HAS_SCALEROWDOWN2_NEON)
|
#if defined(HAS_SCALEROWDOWN2_NEON)
|
||||||
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 16)) {
|
if (TestCpuFlag(kCpuHasNEON)) {
|
||||||
|
ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Any_NEON :
|
||||||
|
(filtering == kFilterLinear ? ScaleRowDown2Linear_Any_NEON :
|
||||||
|
ScaleRowDown2Box_Any_NEON);
|
||||||
|
if (IS_ALIGNED(dst_width, 16)) {
|
||||||
ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_NEON :
|
ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_NEON :
|
||||||
(filtering == kFilterLinear ? ScaleRowDown2Linear_NEON :
|
(filtering == kFilterLinear ? ScaleRowDown2Linear_NEON :
|
||||||
ScaleRowDown2Box_NEON);
|
ScaleRowDown2Box_NEON);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_SCALEROWDOWN2_SSE2)
|
#if defined(HAS_SCALEROWDOWN2_SSE2)
|
||||||
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) {
|
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||||
|
ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Any_SSE2 :
|
||||||
|
(filtering == kFilterLinear ? ScaleRowDown2Linear_Any_SSE2 :
|
||||||
|
ScaleRowDown2Box_Any_SSE2);
|
||||||
|
if (IS_ALIGNED(dst_width, 16)) {
|
||||||
ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_SSE2 :
|
ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_SSE2 :
|
||||||
(filtering == kFilterLinear ? ScaleRowDown2Linear_SSE2 :
|
(filtering == kFilterLinear ? ScaleRowDown2Linear_SSE2 :
|
||||||
ScaleRowDown2Box_SSE2);
|
ScaleRowDown2Box_SSE2);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_SCALEROWDOWN2_MIPS_DSPR2)
|
#if defined(HAS_SCALEROWDOWN2_MIPS_DSPR2)
|
||||||
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_ptr, 4) &&
|
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_ptr, 4) &&
|
||||||
|
|||||||
@ -35,6 +35,36 @@ CANY(ScaleFilterCols_Any_NEON, ScaleFilterCols_NEON, ScaleFilterCols_C, 1, 7)
|
|||||||
#endif
|
#endif
|
||||||
#undef CANY
|
#undef CANY
|
||||||
|
|
||||||
|
// Fixed scale down.
|
||||||
|
#define SDANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \
|
||||||
|
void NAMEANY(const uint8* src_ptr, ptrdiff_t src_stride, \
|
||||||
|
uint8* dst_ptr, int dst_width) { \
|
||||||
|
int n = dst_width & ~MASK; \
|
||||||
|
if (n > 0) { \
|
||||||
|
SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \
|
||||||
|
} \
|
||||||
|
SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \
|
||||||
|
dst_ptr + n * BPP, dst_width & MASK); \
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef HAS_SCALEROWDOWN2_SSE2
|
||||||
|
SDANY(ScaleRowDown2_Any_SSE2, ScaleRowDown2_SSE2, ScaleRowDown2_C, 2, 1, 15)
|
||||||
|
SDANY(ScaleRowDown2Linear_Any_SSE2, ScaleRowDown2Linear_SSE2,
|
||||||
|
ScaleRowDown2Linear_C, 2, 1, 15)
|
||||||
|
SDANY(ScaleRowDown2Box_Any_SSE2, ScaleRowDown2Box_SSE2,
|
||||||
|
ScaleRowDown2Box_C, 2, 1, 15)
|
||||||
|
#endif
|
||||||
|
#ifdef HAS_SCALEROWDOWN2_NEON
|
||||||
|
SDANY(ScaleRowDown2_Any_NEON, ScaleRowDown2_NEON, ScaleRowDown2_C, 2, 1, 15)
|
||||||
|
SDANY(ScaleRowDown2Linear_Any_NEON, ScaleRowDown2Linear_NEON,
|
||||||
|
ScaleRowDown2Linear_C, 2, 1, 15)
|
||||||
|
SDANY(ScaleRowDown2Box_Any_NEON, ScaleRowDown2Box_NEON,
|
||||||
|
ScaleRowDown2Box_C, 2, 1, 15)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#undef SDANY
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
} // extern "C"
|
} // extern "C"
|
||||||
} // namespace libyuv
|
} // namespace libyuv
|
||||||
|
|||||||
@ -666,7 +666,6 @@ void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Bilinear column filtering. SSSE3 version.
|
// Bilinear column filtering. SSSE3 version.
|
||||||
// TODO(fbarchard): Port to Neon
|
|
||||||
// TODO(fbarchard): Switch the following:
|
// TODO(fbarchard): Switch the following:
|
||||||
// xor ebx, ebx
|
// xor ebx, ebx
|
||||||
// mov bx, word ptr [esi + eax] // 2 source x0 pixels
|
// mov bx, word ptr [esi + eax] // 2 source x0 pixels
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user