From 6dc80ab585de785ea3492ac3f8a66f2252edb9f8 Mon Sep 17 00:00:00 2001 From: "fbarchard@google.com" Date: Fri, 3 Jan 2014 01:15:31 +0000 Subject: [PATCH] gargantuan width support on ARGBScale BUG=302 TEST=libyuv ARGBScale tests with LIBYUV_WIDTH=90000 R=tpsiaki@google.com Review URL: https://webrtc-codereview.appspot.com/6519005 git-svn-id: http://libyuv.googlecode.com/svn/trunk@944 16f28f9a-4ce2-e073-06de-1de4eb20be90 --- include/libyuv/scale_row.h | 2 ++ source/scale_argb.cc | 41 ++++++++++++++++++++++++++++++-------- source/scale_common.cc | 29 +++++++++++++++++++++++++++ 3 files changed, 64 insertions(+), 8 deletions(-) diff --git a/include/libyuv/scale_row.h b/include/libyuv/scale_row.h index e1b0333cd..e74448688 100644 --- a/include/libyuv/scale_row.h +++ b/include/libyuv/scale_row.h @@ -144,6 +144,8 @@ void ScaleARGBColsUp2_C(uint8* dst_argb, const uint8* src_argb, int dst_width, int, int); void ScaleARGBFilterCols_C(uint8* dst_argb, const uint8* src_argb, int dst_width, int x, int dx); +void ScaleARGBFilterCols64_C(uint8* dst_argb, const uint8* src_argb, + int dst_width, int x, int dx); void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst_ptr, int dst_width); diff --git a/source/scale_argb.cc b/source/scale_argb.cc index 769c4a73f..6d688570e 100644 --- a/source/scale_argb.cc +++ b/source/scale_argb.cc @@ -162,7 +162,7 @@ static void ScaleARGBDownEven(int src_width, int src_height, } // Scale ARGB down with bilinear interpolation. -static void ScaleARGBBilinearDown(int src_height, +static void ScaleARGBBilinearDown(int src_width, int src_height, int dst_width, int dst_height, int src_stride, int dst_stride, const uint8* src_argb, uint8* dst_argb, @@ -230,9 +230,10 @@ static void ScaleARGBBilinearDown(int src_height, } #endif void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx) = ScaleARGBFilterCols_C; + int dst_width, int x, int dx) = + (src_width >= 32768) ? ScaleARGBFilterCols64_C : ScaleARGBFilterCols_C; #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { + if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3; } #endif @@ -321,13 +322,16 @@ static void ScaleARGBBilinearUp(int src_width, int src_height, void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb, int dst_width, int x, int dx) = filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C; + if (filtering && src_width >= 32768) { + ScaleARGBFilterCols = ScaleARGBFilterCols64_C; + } #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3) - if (filtering && TestCpuFlag(kCpuHasSSSE3)) { + if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3; } #endif #if defined(HAS_SCALEARGBCOLS_SSE2) - if (!filtering && TestCpuFlag(kCpuHasSSE2)) { + if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) { ScaleARGBFilterCols = ScaleARGBCols_SSE2; } #endif @@ -500,13 +504,34 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height, InterpolateRow = InterpolateRow_MIPS_DSPR2; } #endif + void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx) = ScaleARGBFilterCols_C; + int dst_width, int x, int dx) = + filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C; + if (filtering && src_width >= 32768) { + ScaleARGBFilterCols = ScaleARGBFilterCols64_C; + } #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { + if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3; } #endif +#if defined(HAS_SCALEARGBCOLS_SSE2) + if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) { + ScaleARGBFilterCols = ScaleARGBCols_SSE2; + } +#endif + if (!filtering && src_width * 2 == dst_width && x < 0x8000) { + ScaleARGBFilterCols = ScaleARGBColsUp2_C; +#if defined(HAS_SCALEARGBCOLSUP2_SSE2) + if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) && + IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) && + IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) { + ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2; + } +#endif + } + const int max_y = (src_height - 1) << 16; if (y > max_y) { y = max_y; @@ -716,7 +741,7 @@ static void ScaleARGB(const uint8* src, int src_stride, return; } if (filtering) { - ScaleARGBBilinearDown(src_height, + ScaleARGBBilinearDown(src_width, src_height, clip_width, clip_height, src_stride, dst_stride, src, dst, x, dx, y, dy, filtering); diff --git a/source/scale_common.cc b/source/scale_common.cc index 5c760b964..5ee289c93 100644 --- a/source/scale_common.cc +++ b/source/scale_common.cc @@ -477,6 +477,35 @@ void ScaleARGBFilterCols_C(uint8* dst_argb, const uint8* src_argb, dst[0] = BLENDER(a, b, xf); } } + +void ScaleARGBFilterCols64_C(uint8* dst_argb, const uint8* src_argb, + int dst_width, int x32, int dx) { + int64 x = static_cast(x32); + const uint32* src = reinterpret_cast(src_argb); + uint32* dst = reinterpret_cast(dst_argb); + for (int j = 0; j < dst_width - 1; j += 2) { + int64 xi = x >> 16; + int xf = (x >> 9) & 0x7f; + uint32 a = src[xi]; + uint32 b = src[xi + 1]; + dst[0] = BLENDER(a, b, xf); + x += dx; + xi = x >> 16; + xf = (x >> 9) & 0x7f; + a = src[xi]; + b = src[xi + 1]; + dst[1] = BLENDER(a, b, xf); + x += dx; + dst += 2; + } + if (dst_width & 1) { + int64 xi = x >> 16; + int xf = (x >> 9) & 0x7f; + uint32 a = src[xi]; + uint32 b = src[xi + 1]; + dst[0] = BLENDER(a, b, xf); + } +} #undef BLENDER1 #undef BLENDERC #undef BLENDER