diff --git a/README.chromium b/README.chromium index 69f1b44e3..3fef840d8 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1410 +Version: 1412 License: BSD License File: LICENSE diff --git a/include/libyuv/scale_row.h b/include/libyuv/scale_row.h index 5cf0cbe48..b78a56bce 100644 --- a/include/libyuv/scale_row.h +++ b/include/libyuv/scale_row.h @@ -313,9 +313,12 @@ void ScaleARGBColsUp2_SSE2(uint8* dst_argb, const uint8* src_argb, int dst_width, int x, int dx); void ScaleARGBFilterCols_NEON(uint8* dst_argb, const uint8* src_argb, int dst_width, int x, int dx); - +void ScaleARGBCols_NEON(uint8* dst_argb, const uint8* src_argb, + int dst_width, int x, int dx); void ScaleARGBFilterCols_Any_NEON(uint8* dst_argb, const uint8* src_argb, int dst_width, int x, int dx); +void ScaleARGBCols_Any_NEON(uint8* dst_argb, const uint8* src_argb, + int dst_width, int x, int dx); // ARGB Row functions void ScaleARGBRowDown2_SSE2(const uint8* src_argb, ptrdiff_t src_stride, @@ -324,28 +327,12 @@ void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb, ptrdiff_t src_stride, uint8* dst_argb, int dst_width); void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb, ptrdiff_t src_stride, uint8* dst_argb, int dst_width); -void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride, - int src_stepx, uint8* dst_argb, int dst_width); -void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb, ptrdiff_t src_stride, - int src_stepx, - uint8* dst_argb, int dst_width); void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst, int dst_width); void ScaleARGBRowDown2Linear_NEON(const uint8* src_argb, ptrdiff_t src_stride, uint8* dst_argb, int dst_width); void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst, int dst_width); -void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t src_stride, - int src_stepx, - uint8* dst_argb, int dst_width); -void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride, - int src_stepx, - uint8* dst_argb, int dst_width); -void ScaleARGBCols_NEON(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx); -void ScaleARGBCols_Any_NEON(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx); - void ScaleARGBRowDown2_Any_SSE2(const uint8* src_argb, ptrdiff_t src_stride, uint8* dst_argb, int dst_width); void ScaleARGBRowDown2Linear_Any_SSE2(const uint8* src_argb, @@ -361,6 +348,32 @@ void ScaleARGBRowDown2Linear_Any_NEON(const uint8* src_argb, void ScaleARGBRowDown2Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst, int dst_width); +void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride, + int src_stepx, uint8* dst_argb, int dst_width); +void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb, ptrdiff_t src_stride, + int src_stepx, + uint8* dst_argb, int dst_width); +void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t src_stride, + int src_stepx, + uint8* dst_argb, int dst_width); +void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride, + int src_stepx, + uint8* dst_argb, int dst_width); +void ScaleARGBRowDownEven_Any_SSE2(const uint8* src_argb, ptrdiff_t src_stride, + int src_stepx, + uint8* dst_argb, int dst_width); +void ScaleARGBRowDownEvenBox_Any_SSE2(const uint8* src_argb, + ptrdiff_t src_stride, + int src_stepx, + uint8* dst_argb, int dst_width); +void ScaleARGBRowDownEven_Any_NEON(const uint8* src_argb, ptrdiff_t src_stride, + int src_stepx, + uint8* dst_argb, int dst_width); +void ScaleARGBRowDownEvenBox_Any_NEON(const uint8* src_argb, + ptrdiff_t src_stride, + int src_stepx, + uint8* dst_argb, int dst_width); + // ScaleRowDown2Box also used by planar functions // NEON downscalers with interpolation. diff --git a/include/libyuv/version.h b/include/libyuv/version.h index e2c4b48c9..34b4f7863 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1410 +#define LIBYUV_VERSION 1412 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/scale_any.cc b/source/scale_any.cc index 90ea61c77..b3f2ecf2c 100644 --- a/source/scale_any.cc +++ b/source/scale_any.cc @@ -140,10 +140,35 @@ SDANY(ScaleARGBRowDown2Linear_Any_NEON, ScaleARGBRowDown2Linear_NEON, SDANY(ScaleARGBRowDown2Box_Any_NEON, ScaleARGBRowDown2Box_NEON, ScaleARGBRowDown2Box_C, 2, 4, 7) #endif - #undef SDANY -// Fixed scale down. +// Scale down by even scale factor. +#define SDAANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, BPP, MASK) \ + void NAMEANY(const uint8* src_ptr, ptrdiff_t src_stride, int src_stepx, \ + uint8* dst_ptr, int dst_width) { \ + int r = (int)((unsigned int)dst_width % (MASK + 1)); \ + int n = dst_width - r; \ + if (n > 0) { \ + SCALEROWDOWN_SIMD(src_ptr, src_stride, src_stepx, dst_ptr, n); \ + } \ + SCALEROWDOWN_C(src_ptr + (n * src_stepx) * BPP, src_stride, \ + src_stepx, dst_ptr + n * BPP, r); \ + } + +#ifdef HAS_SCALEARGBROWDOWNEVEN_SSE2 +SDAANY(ScaleARGBRowDownEven_Any_SSE2, ScaleARGBRowDownEven_SSE2, + ScaleARGBRowDownEven_C, 4, 3) +SDAANY(ScaleARGBRowDownEvenBox_Any_SSE2, ScaleARGBRowDownEvenBox_SSE2, + ScaleARGBRowDownEvenBox_C, 4, 3) +#endif +#ifdef HAS_SCALEARGBROWDOWNEVEN_NEON +SDAANY(ScaleARGBRowDownEven_Any_NEON, ScaleARGBRowDownEven_NEON, + ScaleARGBRowDownEven_C, 4, 3) +SDAANY(ScaleARGBRowDownEvenBox_Any_NEON, ScaleARGBRowDownEvenBox_NEON, + ScaleARGBRowDownEvenBox_C, 4, 3) +#endif + +// Add rows box filter scale down. #define SAANY(NAMEANY, SCALEADDROWS_SIMD, SCALEADDROWS_C, MASK) \ void NAMEANY(const uint8* src_ptr, ptrdiff_t src_stride, \ uint16* dst_ptr, int src_width, int src_height) { \ diff --git a/source/scale_argb.cc b/source/scale_argb.cc index 79b5fa618..40a2d1ab2 100644 --- a/source/scale_argb.cc +++ b/source/scale_argb.cc @@ -107,15 +107,22 @@ static void ScaleARGBDown4Box(int src_width, int src_height, assert(dx == 65536 * 4); // Test scale factor of 4. assert((dy & 0x3ffff) == 0); // Test vertical scale is multiple of 4. #if defined(HAS_SCALEARGBROWDOWN2_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4)) { - ScaleARGBRowDown2 = ScaleARGBRowDown2Box_SSE2; + if (TestCpuFlag(kCpuHasSSE2)) { + ScaleARGBRowDown2 = ScaleARGBRowDown2Box_Any_SSE2; + if (IS_ALIGNED(dst_width, 4)) { + ScaleARGBRowDown2 = ScaleARGBRowDown2Box_SSE2; + } } #endif #if defined(HAS_SCALEARGBROWDOWN2_NEON) - if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8)) { - ScaleARGBRowDown2 = ScaleARGBRowDown2Box_NEON; + if (TestCpuFlag(kCpuHasNEON)) { + ScaleARGBRowDown2 = ScaleARGBRowDown2Box_Any_NEON; + if (IS_ALIGNED(dst_width, 8)) { + ScaleARGBRowDown2 = ScaleARGBRowDown2Box_NEON; + } } #endif + for (j = 0; j < dst_height; ++j) { ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2); ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride, @@ -146,15 +153,23 @@ static void ScaleARGBDownEven(int src_width, int src_height, assert(IS_ALIGNED(src_height, 2)); src_argb += (y >> 16) * src_stride + (x >> 16) * 4; #if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4)) { - ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_SSE2 : - ScaleARGBRowDownEven_SSE2; + if (TestCpuFlag(kCpuHasSSE2)) { + ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_SSE2 : + ScaleARGBRowDownEven_Any_SSE2; + if (IS_ALIGNED(dst_width, 4)) { + ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_SSE2 : + ScaleARGBRowDownEven_SSE2; + } } #endif #if defined(HAS_SCALEARGBROWDOWNEVEN_NEON) - if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 4)) { - ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_NEON : - ScaleARGBRowDownEven_NEON; + if (TestCpuFlag(kCpuHasNEON)) { + ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_NEON : + ScaleARGBRowDownEven_Any_NEON; + if (IS_ALIGNED(dst_width, 4)) { + ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_NEON : + ScaleARGBRowDownEven_NEON; + } } #endif