From e86abbd2441ac05fd331ccf91da40273d92853c2 Mon Sep 17 00:00:00 2001 From: "fbarchard@google.com" Date: Fri, 27 Dec 2013 01:11:26 +0000 Subject: [PATCH] Use malloc for row buffers in scalers removing size limitations. BUG=296 TESTED=libyuvTest.Scale* R=tpsiaki@google.com Review URL: https://webrtc-codereview.appspot.com/6369004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@925 16f28f9a-4ce2-e073-06de-1de4eb20be90 --- README.chromium | 2 +- include/libyuv/version.h | 2 +- source/scale.cc | 46 +++++++++++++++++++++--------------- source/scale_argb.cc | 51 ++++++++++++++++++++++++++-------------- 4 files changed, 62 insertions(+), 39 deletions(-) diff --git a/README.chromium b/README.chromium index 5fb25d1b6..121c4394a 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 924 +Version: 925 License: BSD License File: LICENSE diff --git a/include/libyuv/version.h b/include/libyuv/version.h index e5e853242..e67e21810 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 924 +#define LIBYUV_VERSION 925 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/scale.cc b/source/scale.cc index 36bc9c084..9fcd5cc43 100644 --- a/source/scale.cc +++ b/source/scale.cc @@ -391,8 +391,8 @@ static void ScalePlaneBox(int src_width, int src_height, ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox, &x, &y, &dx, &dy); const int max_y = (src_height << 16); - if (!IS_ALIGNED(src_width, 16) || (src_width > kMaxStride) || - dst_height * 2 > src_height) { + // TODO(fbarchard): Remove this and make AddRows handle boxheight 1. + if (!IS_ALIGNED(src_width, 16) || dst_height * 2 > src_height) { uint8* dst = dst_ptr; for (int j = 0; j < dst_height; ++j) { int iy = y >> 16; @@ -409,13 +409,14 @@ static void ScalePlaneBox(int src_width, int src_height, } return; } - // TODO(fbarchard): Remove kMaxStride limitation. - SIMD_ALIGNED(uint16 row[kMaxStride]); - void (*ScaleAddRows)(const uint8* src_ptr, ptrdiff_t src_stride, - uint16* dst_ptr, int src_width, int src_height) = ScaleAddRows_C; + // Allocate a row buffer of uint16. + align_buffer_64(row16, src_width * 2); + void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx, const uint16* src_ptr, uint8* dst_ptr) = (dx & 0xffff) ? ScaleAddCols2_C: ScaleAddCols1_C; + void (*ScaleAddRows)(const uint8* src_ptr, ptrdiff_t src_stride, + uint16* dst_ptr, int src_width, int src_height) = ScaleAddRows_C; #if defined(HAS_SCALEADDROWS_SSE2) if (TestCpuFlag(kCpuHasSSE2) && #ifdef AVOID_OVERREAD @@ -434,11 +435,13 @@ static void ScalePlaneBox(int src_width, int src_height, y = (src_height << 16); } int boxheight = (y >> 16) - iy; - ScaleAddRows(src, src_stride, row, src_width, boxheight); - ScaleAddCols(dst_width, boxheight, x, dx, row, dst_ptr); + ScaleAddRows(src, src_stride, reinterpret_cast(row16), + src_width, boxheight); + ScaleAddCols(dst_width, boxheight, x, dx, reinterpret_cast(row16), + dst_ptr); dst_ptr += dst_stride; } - + free_aligned_buffer_64(row16); } // Scale plane down with bilinear interpolation. @@ -450,7 +453,6 @@ void ScalePlaneBilinearDown(int src_width, int src_height, FilterMode filtering) { assert(dst_width > 0); assert(dst_height > 0); - assert(Abs(src_width) <= kMaxStride); // Initial source x/y coordinate and step values as 16.16 fixed point. int x = 0; @@ -460,8 +462,6 @@ void ScalePlaneBilinearDown(int src_width, int src_height, ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y, &dx, &dy); - SIMD_ALIGNED(uint8 row[kMaxStride + 16]); - void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_C; @@ -520,6 +520,10 @@ void ScalePlaneBilinearDown(int src_width, int src_height, } #endif + // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear. + // Allocate a row buffer. + align_buffer_64(row, src_width); + const int max_y = (src_height - 1) << 16; for (int j = 0; j < dst_height; ++j) { if (y > max_y) { @@ -537,6 +541,7 @@ void ScalePlaneBilinearDown(int src_width, int src_height, dst_ptr += dst_stride; y += dy; } + free_aligned_buffer_64(row); } // Scale up down with bilinear interpolation. @@ -550,7 +555,6 @@ void ScalePlaneBilinearUp(int src_width, int src_height, assert(src_height != 0); assert(dst_width > 0); assert(dst_height > 0); - assert(Abs(dst_width) <= kMaxStride); // Initial source x/y coordinate and step values as 16.16 fixed point. int x = 0; @@ -635,9 +639,13 @@ void ScalePlaneBilinearUp(int src_width, int src_height, } int yi = y >> 16; const uint8* src = src_ptr + yi * src_stride; - SIMD_ALIGNED(uint8 row[2 * kMaxStride]); + + // Allocate 2 row buffers. + const int kRowSize = (dst_width + 15) & ~15; + align_buffer_64(row, kRowSize * 2); + uint8* rowptr = row; - int rowstride = kMaxStride; + int rowstride = kRowSize; int lasty = yi; ScaleFilterCols(rowptr, src, dst_width, x, dx); @@ -672,6 +680,7 @@ void ScalePlaneBilinearUp(int src_width, int src_height, dst_ptr += dst_stride; y += dy; } + free_aligned_buffer_64(row); } // Scale Plane to/from any dimensions, without interpolation. @@ -780,18 +789,17 @@ void ScalePlane(const uint8* src, int src_stride, return; } } - if (filtering == kFilterBox && src_width <= kMaxStride && - dst_height * 2 < src_height ) { + if (filtering == kFilterBox && dst_height * 2 < src_height ) { ScalePlaneBox(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst); return; } - if (filtering && dst_height > src_height && dst_width <= kMaxStride) { + if (filtering && dst_height > src_height) { ScalePlaneBilinearUp(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst, filtering); return; } - if (filtering && src_width <= kMaxStride) { + if (filtering) { ScalePlaneBilinearDown(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst, filtering); return; diff --git a/source/scale_argb.cc b/source/scale_argb.cc index fb2222e2a..976418c37 100644 --- a/source/scale_argb.cc +++ b/source/scale_argb.cc @@ -87,10 +87,6 @@ static void ScaleARGBDown4Box(int /* src_width */, int /* src_height */, assert(dx == 65536 * 4); // Test scale factor of 4. assert((dy & 0x3ffff) == 0); // Test vertical scale is multiple of 4. - assert(dst_width * 2 <= kMaxStride); - // TODO(fbarchard): Remove clip_src_width alignment checks. - SIMD_ALIGNED(uint8 row[kMaxStride * 2 + 16]); - // Advance to odd row, even column. src_argb += (y >> 16) * src_stride + (x >> 16) * 4; int row_stride = src_stride * (dy >> 16); @@ -109,14 +105,19 @@ static void ScaleARGBDown4Box(int /* src_width */, int /* src_height */, } #endif + // Allocate 2 rows of ARGB. + const int kRowSize = (dst_width * 2 * 4 + 15) & ~15; + align_buffer_64(row, kRowSize * 2); + for (int y = 0; y < dst_height; ++y) { ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2); ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride, - row + kMaxStride, dst_width * 2); - ScaleARGBRowDown2(row, kMaxStride, dst_argb, dst_width); + row + kRowSize, dst_width * 2); + ScaleARGBRowDown2(row, kRowSize, dst_argb, dst_width); src_argb += row_stride; dst_argb += dst_stride; } + free_aligned_buffer_64(row); } // ScaleARGB ARGB Even @@ -179,9 +180,6 @@ static void ScaleARGBBilinearDown(int src_height, int clip_src_width = (((xr - xl) + 1 + 3) & ~3) * 4; // Width aligned to 4. src_argb += xl * 4; x -= (xl << 16); - assert(clip_src_width <= kMaxStride); - // TODO(fbarchard): Remove clip_src_width alignment checks. - SIMD_ALIGNED(uint8 row[kMaxStride + 16]); void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_C; @@ -239,6 +237,10 @@ static void ScaleARGBBilinearDown(int src_height, ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3; } #endif + // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear. + // Allocate a row of ARGB. + align_buffer_64(row, clip_src_width * 4); + const int max_y = (src_height - 1) << 16; for (int j = 0; j < dst_height; ++j) { if (y > max_y) { @@ -256,6 +258,7 @@ static void ScaleARGBBilinearDown(int src_height, dst_argb += dst_stride; y += dy; } + free_aligned_buffer_64(row); } // Scale ARGB up with bilinear interpolation. @@ -270,7 +273,6 @@ static void ScaleARGBBilinearUp(int src_width, int src_height, assert(src_height > 0); assert(dst_width > 0); assert(dst_height > 0); - assert(dst_width * 4 <= kMaxStride); void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_C; @@ -348,9 +350,13 @@ static void ScaleARGBBilinearUp(int src_width, int src_height, } int yi = y >> 16; const uint8* src = src_argb + yi * src_stride; - SIMD_ALIGNED(uint8 row[2 * kMaxStride]); + + // Allocate 2 rows of ARGB. + const int kRowSize = (dst_width * 4 + 15) & ~15; + align_buffer_64(row, kRowSize * 2); + uint8* rowptr = row; - int rowstride = kMaxStride; + int rowstride = kRowSize; int lasty = yi; ScaleARGBFilterCols(rowptr, src, dst_width, x, dx); @@ -385,6 +391,7 @@ static void ScaleARGBBilinearUp(int src_width, int src_height, dst_argb += dst_stride; y += dy; } + free_aligned_buffer_64(row); } #ifdef YUVSCALEUP @@ -406,7 +413,6 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height, assert(src_height > 0); assert(dst_width > 0); assert(dst_height > 0); - assert(dst_width * 4 <= kMaxStride); void (*I422ToARGBRow)(const uint8* y_buf, const uint8* u_buf, @@ -514,12 +520,19 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height, const uint8* src_row_y = src_y + yi * src_stride_y; const uint8* src_row_u = src_u + uv_yi * src_stride_u; const uint8* src_row_v = src_v + uv_yi * src_stride_v; - SIMD_ALIGNED(uint8 row[2 * kMaxStride]); - SIMD_ALIGNED(uint8 argb_row[kMaxStride * 4]); + + // Allocate 2 rows of ARGB. + const int kRowSize = (dst_width * 4 + 15) & ~15; + align_buffer_64(row, kRowSize * 2); + + // Allocate 1 row of ARGB for source conversion. + align_buffer_64(argb_row, src_width * 4); + uint8* rowptr = row; - int rowstride = kMaxStride; + int rowstride = kRowSize; int lasty = yi; + // TODO(fbarchard): Convert first 2 rows of YUV to ARGB. ScaleARGBFilterCols(rowptr, src_row_y, dst_width, x, dx); if (src_height > 1) { src_row_y += src_stride_y; @@ -571,6 +584,8 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height, dst_argb += dst_stride_argb; y += dy; } + free_aligned_buffer_64(row); + free_aligned_buffer_64(row_argb); } #endif @@ -696,14 +711,14 @@ static void ScaleARGB(const uint8* src, int src_stride, x, y, dy, 4, filtering); return; } - if (filtering && dy < 65536 && dst_width * 4 <= kMaxStride) { + if (filtering && dy < 65536) { ScaleARGBBilinearUp(src_width, src_height, clip_width, clip_height, src_stride, dst_stride, src, dst, x, dx, y, dy, filtering); return; } - if (filtering && src_width * 4 < kMaxStride) { + if (filtering) { ScaleARGBBilinearDown(src_height, clip_width, clip_height, src_stride, dst_stride, src, dst,