diff --git a/include/libyuv/scale_row.h b/include/libyuv/scale_row.h index d5ff33a1c..9763355d1 100644 --- a/include/libyuv/scale_row.h +++ b/include/libyuv/scale_row.h @@ -210,6 +210,7 @@ void ScaleARGBFilterCols_C(uint8* dst_argb, const uint8* src_argb, void ScaleARGBFilterCols64_C(uint8* dst_argb, const uint8* src_argb, int dst_width, int x, int dx); +// Specialized scalers for x86. void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst_ptr, int dst_width); void ScaleRowDown2Linear_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, @@ -254,6 +255,11 @@ void ScaleRowDown2Linear_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst_ptr, int dst_width); void ScaleRowDown2Box_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst_ptr, int dst_width); +void ScaleRowDown4_Any_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, + uint8* dst_ptr, int dst_width); +void ScaleRowDown4Box_Any_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, + uint8* dst_ptr, int dst_width); + void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, uint16* dst_ptr, int src_width, int src_height); @@ -345,6 +351,10 @@ void ScaleRowDown2Linear_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst, int dst_width); void ScaleRowDown2Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst, int dst_width); +void ScaleRowDown4_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride, + uint8* dst_ptr, int dst_width); +void ScaleRowDown4Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride, + uint8* dst_ptr, int dst_width); void ScaleAddRows_NEON(const uint8* src_ptr, ptrdiff_t src_stride, uint16* dst_ptr, int src_width, int src_height); diff --git a/source/scale.cc b/source/scale.cc index f1822e762..963599075 100644 --- a/source/scale.cc +++ b/source/scale.cc @@ -174,13 +174,21 @@ static void ScalePlaneDown4(int src_width, int src_height, src_stride = 0; } #if defined(HAS_SCALEROWDOWN4_NEON) - if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8)) { - ScaleRowDown4 = filtering ? ScaleRowDown4Box_NEON : ScaleRowDown4_NEON; + if (TestCpuFlag(kCpuHasNEON)) { + ScaleRowDown4 = filtering ? + ScaleRowDown4Box_Any_NEON : ScaleRowDown4_Any_NEON; + if (IS_ALIGNED(dst_width, 8)) { + ScaleRowDown4 = filtering ? ScaleRowDown4Box_NEON : ScaleRowDown4_NEON; + } } #endif #if defined(HAS_SCALEROWDOWN4_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) { - ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSE2 : ScaleRowDown4_SSE2; + if (TestCpuFlag(kCpuHasSSE2)) { + ScaleRowDown4 = filtering ? + ScaleRowDown4Box_Any_SSE2 : ScaleRowDown4_Any_SSE2; + if (IS_ALIGNED(dst_width, 8)) { + ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSE2 : ScaleRowDown4_SSE2; + } } #endif #if defined(HAS_SCALEROWDOWN4_MIPS_DSPR2) diff --git a/source/scale_any.cc b/source/scale_any.cc index 9099efaaa..6e8917f02 100644 --- a/source/scale_any.cc +++ b/source/scale_any.cc @@ -72,7 +72,16 @@ SDANY(ScaleRowDown2Linear_Any_NEON, ScaleRowDown2Linear_NEON, SDANY(ScaleRowDown2Box_Any_NEON, ScaleRowDown2Box_NEON, ScaleRowDown2Box_C, 2, 1, 15) #endif - +#ifdef HAS_SCALEROWDOWN4_SSE2 +SDANY(ScaleRowDown4_Any_SSE2, ScaleRowDown4_SSE2, ScaleRowDown4_C, 4, 1, 7) +SDANY(ScaleRowDown4Box_Any_SSE2, ScaleRowDown4Box_SSE2, ScaleRowDown4Box_C, + 4, 1, 7) +#endif +#ifdef HAS_SCALEROWDOWN4_NEON +SDANY(ScaleRowDown4_Any_NEON, ScaleRowDown4_NEON, ScaleRowDown4_C, 4, 1, 7) +SDANY(ScaleRowDown4Box_Any_NEON, ScaleRowDown4Box_NEON, ScaleRowDown4Box_C, + 4, 1, 7) +#endif #undef SDANY #ifdef __cplusplus