diff --git a/README.chromium b/README.chromium index 7f22b144e..d25641e28 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 693 +Version: 694 License: BSD License File: LICENSE diff --git a/include/libyuv/scale_argb.h b/include/libyuv/scale_argb.h index 48d5297fd..b6f510522 100644 --- a/include/libyuv/scale_argb.h +++ b/include/libyuv/scale_argb.h @@ -26,6 +26,7 @@ int ARGBScale(const uint8* src_argb, int src_stride_argb, int dst_width, int dst_height, enum FilterMode filtering); +// Clipped scale takes destination rectangle coordinates for clip values. LIBYUV_API int ARGBScaleClip(const uint8* src_argb, int src_stride_argb, int src_width, int src_height, diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 5feb0d7af..ab0099cec 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 693 +#define LIBYUV_VERSION 694 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/scale_argb.cc b/source/scale_argb.cc index 06daeae18..dec1ae635 100644 --- a/source/scale_argb.cc +++ b/source/scale_argb.cc @@ -740,7 +740,10 @@ static void ScaleARGBDown2(int /* src_width */, int /* src_height */, int dst_width, int dst_height, int src_stride, int dst_stride, const uint8* src_argb, uint8* dst_argb, + int x, int dx, int y, int dy, FilterMode filtering) { + assert(dx == 65536 * 2); // Test scale factor of 2. + assert(dy == 65536 * 2); void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride, uint8* dst_argb, int dst_width) = filtering ? ScaleARGBRowDown2Int_C : ScaleARGBRowDown2_C; @@ -758,6 +761,7 @@ static void ScaleARGBDown2(int /* src_width */, int /* src_height */, ScaleARGBRowDown2_NEON; } #endif + src_argb += (y >> 16) * src_stride + (x >> 16) * 4; // TODO(fbarchard): Loop through source height to allow odd height. for (int y = 0; y < dst_height; ++y) { @@ -770,11 +774,11 @@ static void ScaleARGBDown2(int /* src_width */, int /* src_height */, // ScaleARGB ARGB Even // This is an optimized version for scaling down a ARGB to even // multiple of its original size. - static void ScaleARGBDownEven(int src_width, int src_height, int dst_width, int dst_height, int src_stride, int dst_stride, const uint8* src_argb, uint8* dst_argb, + int x, int dx, int y, int dy, FilterMode filtering) { assert(IS_ALIGNED(src_width, 2)); assert(IS_ALIGNED(src_height, 2)); @@ -794,41 +798,48 @@ static void ScaleARGBDownEven(int src_width, int src_height, ScaleARGBRowDownEven_NEON; } #endif - int src_step = src_width / dst_width; - // Adjust to point to center of box. - int row_step = src_height / dst_height; - int row_stride = row_step * src_stride; - src_argb += ((row_step >> 1) - 1) * src_stride + - ((Abs(src_step) >> 1) - 1) * 4; - // Negative src_width means horizontally mirror. - if (src_width < 0) { - src_argb += -src_step * (dst_width - 1) * 4; - } + int col_step = dx >> 16; + int row_stride = (dy >> 16) * src_stride; + src_argb += (y >> 16) * src_stride + (x >> 16) * 4; for (int y = 0; y < dst_height; ++y) { - ScaleARGBRowDownEven(src_argb, src_stride, src_step, dst_argb, dst_width); + ScaleARGBRowDownEven(src_argb, src_stride, col_step, dst_argb, dst_width); src_argb += row_stride; dst_argb += dst_stride; } } -// ScaleARGB ARGB to/from any dimensions, with bilinear -// interpolation. +// ScaleARGB ARGB to/from any dimensions, with bilinear interpolation. static void ScaleARGBBilinearDown(int src_width, int src_height, int dst_width, int dst_height, int src_stride, int dst_stride, - const uint8* src_argb, uint8* dst_argb) { + const uint8* src_argb, uint8* dst_argb, + int x, int dx, int y, int dy) { + assert(src_width > 0); + assert(src_height > 0); assert(dst_width > 0); assert(dst_height > 0); - assert(Abs(src_width) * 4 <= kMaxStride); +#ifdef CLIP_ADJUST + // TODO(fbarchard): Adjust end points for alignment. + // Adjust + int xlast = x + (dst_width - 1) * dx; + int xl = (dx >= 0) ? x : xlast; + int xr = (dx >= 0) ? xlast : x; + int clip_src_width = (xr >> 16) - (xl >> 16) + 2; + src_argb -= (xl >> 16) * 4; + x -= (xl & 0xffff0000); + assert(clip_src_width * 4 <= kMaxStride); +#else + int clip_src_width = src_width; +#endif SIMD_ALIGNED(uint8 row[kMaxStride + 16]); void (*ScaleARGBFilterRows)(uint8* dst_argb, const uint8* src_argb, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = ARGBInterpolateRow_C; #if defined(HAS_ARGBINTERPOLATEROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && Abs(src_width) >= 4) { + if (TestCpuFlag(kCpuHasSSE2) && clip_src_width >= 4) { ScaleARGBFilterRows = ARGBInterpolateRow_Any_SSE2; - if (IS_ALIGNED(Abs(src_width), 4)) { + if (IS_ALIGNED(clip_src_width, 4)) { ScaleARGBFilterRows = ARGBInterpolateRow_Unaligned_SSE2; if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16)) { ScaleARGBFilterRows = ARGBInterpolateRow_SSE2; @@ -837,9 +848,9 @@ static void ScaleARGBBilinearDown(int src_width, int src_height, } #endif #if defined(HAS_ARGBINTERPOLATEROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && Abs(src_width) >= 4) { + if (TestCpuFlag(kCpuHasSSSE3) && clip_src_width >= 4) { ScaleARGBFilterRows = ARGBInterpolateRow_Any_SSSE3; - if (IS_ALIGNED(Abs(src_width), 4)) { + if (IS_ALIGNED(clip_src_width, 4)) { ScaleARGBFilterRows = ARGBInterpolateRow_Unaligned_SSSE3; if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16)) { ScaleARGBFilterRows = ARGBInterpolateRow_SSSE3; @@ -848,9 +859,9 @@ static void ScaleARGBBilinearDown(int src_width, int src_height, } #endif #if defined(HAS_ARGBINTERPOLATEROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && Abs(src_width) >= 4) { + if (TestCpuFlag(kCpuHasNEON) && clip_src_width >= 4) { ScaleARGBFilterRows = ARGBInterpolateRow_Any_NEON; - if (IS_ALIGNED(Abs(src_width), 4)) { + if (IS_ALIGNED(clip_src_width, 4)) { ScaleARGBFilterRows = ARGBInterpolateRow_NEON; } } @@ -862,28 +873,6 @@ static void ScaleARGBBilinearDown(int src_width, int src_height, ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3; } #endif - int dx = 0; - int dy = 0; - int x = 0; - int y = 0; - if (dst_width <= Abs(src_width)) { - dx = (Abs(src_width) << 16) / dst_width; - x = (dx >> 1) - 32768; - } else if (dst_width > 1) { - dx = ((Abs(src_width) - 1) << 16) / (dst_width - 1); - } - // Negative src_width means horizontally mirror. - if (src_width < 0) { - x += (dst_width - 1) * dx; - dx = -dx; - src_width = -src_width; - } - if (dst_height <= src_height) { - dy = (src_height << 16) / dst_height; - y = (dy >> 1) - 32768; - } else if (dst_height > 1) { - dy = ((src_height - 1) << 16) / (dst_height - 1); - } int maxy = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0; for (int j = 0; j < dst_height; ++j) { if (y > maxy) { @@ -892,19 +881,21 @@ static void ScaleARGBBilinearDown(int src_width, int src_height, int yi = y >> 16; int yf = (y >> 8) & 255; const uint8* src = src_argb + yi * src_stride; - ScaleARGBFilterRows(row, src, src_stride, src_width, yf); + ScaleARGBFilterRows(row, src, src_stride, clip_src_width, yf); ScaleARGBFilterCols(dst_argb, row, dst_width, x, dx); dst_argb += dst_stride; y += dy; } } -// ScaleARGB ARGB to/from any dimensions, with bilinear -// interpolation. +// ScaleARGB ARGB to/from any dimensions, with bilinear interpolation. static void ScaleARGBBilinearUp(int src_width, int src_height, int dst_width, int dst_height, int src_stride, int dst_stride, - const uint8* src_argb, uint8* dst_argb) { + const uint8* src_argb, uint8* dst_argb, + int x, int dx, int y, int dy) { + assert(src_width > 0); + assert(src_height > 0); assert(dst_width > 0); assert(dst_height > 0); assert(dst_width * 4 <= kMaxStride); @@ -948,39 +939,16 @@ static void ScaleARGBBilinearUp(int src_width, int src_height, ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3; } #endif - int dx = 0; - int dy = 0; - int x = 0; - int y = 0; - if (dst_width <= Abs(src_width)) { - dx = (Abs(src_width) << 16) / dst_width; - x = (dx >> 1) - 32768; - } else if (dst_width > 1) { - dx = ((Abs(src_width) - 1) << 16) / (dst_width - 1); - } - // Negative src_width means horizontally mirror. - if (src_width < 0) { - x += (dst_width - 1) * dx; - dx = -dx; - src_width = -src_width; - } - if (dst_height <= src_height) { - dy = (src_height << 16) / dst_height; - y = (dy >> 1) - 32768; - } else if (dst_height > 1) { - dy = ((src_height - 1) << 16) / (dst_height - 1); - } int maxy = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0; if (y > maxy) { y = maxy; } int yi = y >> 16; - int yf = (y >> 8) & 255; const uint8* src = src_argb + yi * src_stride; SIMD_ALIGNED(uint8 row[2 * kMaxStride]); uint8* rowptr = row; int rowstride = kMaxStride; - int lasty = 0; + int lasty = yi; ScaleARGBFilterCols(rowptr, src, dst_width, x, dx); if (src_height > 1) { @@ -991,13 +959,8 @@ static void ScaleARGBBilinearUp(int src_width, int src_height, for (int j = 0; j < dst_height; ++j) { yi = y >> 16; - yf = (y >> 8) & 255; if (yi != lasty) { if (y <= maxy) { - y = maxy; - yi = y >> 16; - yf = (y >> 8) & 255; - } else { ScaleARGBFilterCols(rowptr, src, dst_width, x, dx); rowptr += rowstride; rowstride = -rowstride; @@ -1005,6 +968,7 @@ static void ScaleARGBBilinearUp(int src_width, int src_height, src += src_stride; } } + int yf = (y >> 8) & 255; ScaleARGBFilterRows(dst_argb, rowptr, rowstride, dst_width, yf); dst_argb += dst_stride; y += dy; @@ -1013,7 +977,7 @@ static void ScaleARGBBilinearUp(int src_width, int src_height, // Scales a single row of pixels using point sampling. // Code is adapted from libyuv bilinear yuv scaling, but with bilinear -// interpolation off, and argb pixels instead of yuv. +// interpolation off, and argb pixels instead of yuv. static void ScaleARGBCols_C(uint8* dst_argb, const uint8* src_argb, int dst_width, int x, int dx) { const uint32* src = reinterpret_cast(src_argb); @@ -1038,7 +1002,8 @@ static void ScaleARGBCols_C(uint8* dst_argb, const uint8* src_argb, static void ScaleARGBSimple(int src_width, int src_height, int dst_width, int dst_height, int src_stride, int dst_stride, - const uint8* src_argb, uint8* dst_argb) { + const uint8* src_argb, uint8* dst_argb, + int x, int dx, int y, int dy) { void (*ScaleARGBCols)(uint8* dst_argb, const uint8* src_argb, int dst_width, int x, int dx) = ScaleARGBCols_C; #if defined(HAS_SCALEARGBCOLS_SSE2) @@ -1046,22 +1011,6 @@ static void ScaleARGBSimple(int src_width, int src_height, ScaleARGBCols = ScaleARGBCols_SSE2; } #endif - int dx = (Abs(src_width) << 16) / dst_width; - int dy = (src_height << 16) / dst_height; - int x = 0; - int y = 0; - if (dst_width <= Abs(src_width)) { - x = (dx >> 1) - 32768; - } - // Negative src_width means horizontally mirror. - if (src_width < 0) { - x += (dst_width - 1) * dx; - dx = -dx; - src_width = -src_width; - } - if (dst_height <= src_height) { - y = (dy >> 1) - 32768; - } for (int i = 0; i < dst_height; ++i) { ScaleARGBCols(dst_argb, src_argb + (y >> 16) * src_stride, @@ -1072,38 +1021,95 @@ static void ScaleARGBSimple(int src_width, int src_height, } // ScaleARGB ARGB to/from any dimensions. - static void ScaleARGBAnySize(int src_width, int src_height, int dst_width, int dst_height, + int clip_width, int clip_height, int src_stride, int dst_stride, const uint8* src_argb, uint8* dst_argb, + int x, int dx, int y, int dy, FilterMode filtering) { if (!filtering || - (Abs(src_width) * 4 > kMaxStride && dst_width * 4 > kMaxStride)) { - ScaleARGBSimple(src_width, src_height, dst_width, dst_height, - src_stride, dst_stride, src_argb, dst_argb); + (src_width * 4 > kMaxStride && dst_width * 4 > kMaxStride)) { + ScaleARGBSimple(src_width, src_height, clip_width, clip_height, + src_stride, dst_stride, src_argb, dst_argb, + x, dx, y, dy); return; } - if (dst_height <= src_height || dst_width * 4 > kMaxStride) { - ScaleARGBBilinearDown(src_width, src_height, dst_width, dst_height, - src_stride, dst_stride, src_argb, dst_argb); + if (dy >= 65536 || dst_width * 4 > kMaxStride) { + ScaleARGBBilinearDown(src_width, src_height, + clip_width, clip_height, + src_stride, dst_stride, src_argb, dst_argb, + x, dx, y, dy); } else { - ScaleARGBBilinearUp(src_width, src_height, dst_width, dst_height, - src_stride, dst_stride, src_argb, dst_argb); + ScaleARGBBilinearUp(src_width, src_height, + clip_width, clip_height, + src_stride, dst_stride, src_argb, dst_argb, + x, dx, y, dy); } } // ScaleARGB a ARGB. // This function in turn calls a scaling function // suitable for handling the desired resolutions. - static void ScaleARGB(const uint8* src, int src_stride, int src_width, int src_height, uint8* dst, int dst_stride, int dst_width, int dst_height, + int clip_x, int clip_y, int clip_width, int clip_height, FilterMode filtering) { + // Negative src_height means invert the image. + if (src_height < 0) { + src_height = -src_height; + src = src + (src_height - 1) * src_stride; + src_stride = -src_stride; + } + // Initial source x/y coordinate and step values as 16.16 fixed point. + int dx = 0; + int dy = 0; + int x = 0; + int y = 0; + if (filtering) { + // Scale step for bilinear sampling renders last pixel once for upsample. + if (dst_width <= Abs(src_width)) { + dx = (Abs(src_width) << 16) / dst_width; + x = (dx >> 1) - 32768; + } else if (dst_width > 1) { + dx = ((Abs(src_width) - 1) << 16) / (dst_width - 1); + } + if (dst_height <= src_height) { + dy = (src_height << 16) / dst_height; + y = (dy >> 1) - 32768; + } else if (dst_height > 1) { + dy = ((src_height - 1) << 16) / (dst_height - 1); + } + } else { + // Scale step for point sampling duplicates all pixels equally. + dx = (Abs(src_width) << 16) / dst_width; + dy = (src_height << 16) / dst_height; + if (dst_width <= Abs(src_width)) { + x = (dx >> 1) - 32768; + } + if (dst_height <= src_height) { + y = (dy >> 1) - 32768; + } + } + // Negative src_width means horizontally mirror. + if (src_width < 0) { + x += (dst_width - 1) * dx; + dx = -dx; + src_width = -src_width; + } + if (clip_x) { + x += clip_x * dx; + dst += clip_x * 4; + } + if (clip_y) { + y += clip_y * dy; + dst += clip_y * dst_stride; + } + #ifdef CPU_X86 - // environment variable overrides for testing. + // Environment variable overrides for testing. char* filter_override = getenv("LIBYUV_FILTER"); if (filter_override) { filtering = (FilterMode)atoi(filter_override); // NOLINT @@ -1111,23 +1117,29 @@ static void ScaleARGB(const uint8* src, int src_stride, #endif if (dst_width == src_width && dst_height == src_height) { // Straight copy. - ARGBCopy(src, src_stride, dst, dst_stride, dst_width, dst_height); + ARGBCopy(src + (y >> 16) * src_stride + (x >> 16) * 4, src_stride, + dst, dst_stride, clip_width, clip_height); return; } + // TODO(fbarchard): Allow different vertical scales. if (2 * dst_width == src_width && 2 * dst_height == src_height) { // Optimized 1/2. - ScaleARGBDown2(src_width, src_height, dst_width, dst_height, - src_stride, dst_stride, src, dst, filtering); + ScaleARGBDown2(src_width, src_height, + clip_width, clip_height, + src_stride, dst_stride, src, dst, + x, dx, y, dy, filtering); return; } + // TODO(fbarchard): Remove this divide, reusing dx and dy. int scale_down_x = src_width / dst_width; int scale_down_y = src_height / dst_height; if (dst_width * scale_down_x == src_width && dst_height * scale_down_y == src_height) { if (!(scale_down_x & 1) && !(scale_down_y & 1)) { - // Optimized even scale down. ie 4, 6, 8, 10x - ScaleARGBDownEven(src_width, src_height, dst_width, dst_height, - src_stride, dst_stride, src, dst, filtering); + // Optimized even scale down. ie 4, 6, 8, 10x. + ScaleARGBDownEven(src_width, src_height, clip_width, clip_height, + src_stride, dst_stride, src, dst, + x, dx, y, dy, filtering); return; } if ((scale_down_x & 1) && (scale_down_y & 1)) { @@ -1135,11 +1147,32 @@ static void ScaleARGB(const uint8* src, int src_stride, } } // Arbitrary scale up and/or down. - ScaleARGBAnySize(src_width, src_height, dst_width, dst_height, - src_stride, dst_stride, src, dst, filtering); + ScaleARGBAnySize(src_width, src_height, + dst_width, dst_height, + clip_width, clip_height, + src_stride, dst_stride, src, dst, x, dx, y, dy, filtering); +} +LIBYUV_API +int ARGBScaleClip(const uint8* src_argb, int src_stride_argb, + int src_width, int src_height, + uint8* dst_argb, int dst_stride_argb, + int dst_width, int dst_height, + int clip_x, int clip_y, int clip_width, int clip_height, + enum FilterMode filtering) { + if (!src_argb || src_width == 0 || src_height == 0 || + !dst_argb || dst_width <= 0 || dst_height <= 0 || + clip_x < 0 || clip_y < 0 || + (clip_x + clip_width) > dst_width || + (clip_y + clip_height) > dst_height) { + return -1; + } + ScaleARGB(src_argb, src_stride_argb, src_width, src_height, + dst_argb, dst_stride_argb, dst_width, dst_height, + clip_x, clip_y, clip_width, clip_height, filtering); + return 0; } -// ScaleARGB an ARGB image. +// Scale an ARGB image. LIBYUV_API int ARGBScale(const uint8* src_argb, int src_stride_argb, int src_width, int src_height, @@ -1150,15 +1183,9 @@ int ARGBScale(const uint8* src_argb, int src_stride_argb, !dst_argb || dst_width <= 0 || dst_height <= 0) { return -1; } - // Negative height means invert the image. - if (src_height < 0) { - src_height = -src_height; - src_argb = src_argb + (src_height - 1) * src_stride_argb; - src_stride_argb = -src_stride_argb; - } ScaleARGB(src_argb, src_stride_argb, src_width, src_height, dst_argb, dst_stride_argb, dst_width, dst_height, - filtering); + 0, 0, dst_width, dst_height, filtering); return 0; } diff --git a/unit_test/scale_argb_test.cc b/unit_test/scale_argb_test.cc index 6f3020190..947531f04 100644 --- a/unit_test/scale_argb_test.cc +++ b/unit_test/scale_argb_test.cc @@ -105,6 +105,8 @@ static int ARGBTestFilter(int src_width, int src_height, return max_diff; } +// TODO(fbarchard): Consider TEST_P to iterate through test cases. + TEST_F(libyuvTest, ARGBScaleDownBy2_None) { const int src_width = benchmark_width_; const int src_height = benchmark_height_; @@ -131,6 +133,32 @@ TEST_F(libyuvTest, ARGBScaleDownBy2_Bilinear) { EXPECT_LE(max_diff, 2); } +TEST_F(libyuvTest, ARGBScaleDownBy1_None) { + const int src_width = benchmark_width_; + const int src_height = benchmark_height_; + const int dst_width = Abs(src_width); + const int dst_height = Abs(src_height); + + int max_diff = ARGBTestFilter(src_width, src_height, + dst_width, dst_height, + kFilterNone, + benchmark_iterations_); + EXPECT_LE(max_diff, 0); +} + +TEST_F(libyuvTest, ARGBScaleDownBy1_Bilinear) { + const int src_width = benchmark_width_; + const int src_height = benchmark_height_; + const int dst_width = Abs(src_width); + const int dst_height = Abs(src_height); + + int max_diff = ARGBTestFilter(src_width, src_height, + dst_width, dst_height, + kFilterBilinear, + benchmark_iterations_); + EXPECT_LE(max_diff, 0); +} + TEST_F(libyuvTest, ARGBScaleDownBy4_None) { const int src_width = benchmark_width_; const int src_height = benchmark_height_; @@ -392,4 +420,416 @@ TEST_F(libyuvTest, ARGBScaleFrom640x360_Bilinear) { EXPECT_LE(max_diff, 2); } +static const int kTileX = 16; +static const int kTileY = 16; + +static int TileARGBScale(const uint8* src_argb, int src_stride_argb, + int src_width, int src_height, + uint8* dst_argb, int dst_stride_argb, + int dst_width, int dst_height, + FilterMode filtering) { + for (int y = 0; y < dst_height; y += kTileY) { + for (int x = 0; x < dst_width; x += kTileX) { + int clip_width = kTileX; + if (x + clip_width > dst_width) { + clip_width = dst_width - x; + } + int clip_height = kTileY; + if (y + clip_height > dst_height) { + clip_height = dst_height - y; + } + int r = ARGBScaleClip(src_argb, src_stride_argb, + src_width, src_height, + dst_argb, dst_stride_argb, + dst_width, dst_height, + x, y, clip_width, clip_height, filtering); + if (r) { + return r; + } + } + } + return 0; +} + +static int ARGBClipTestFilter(int src_width, int src_height, + int dst_width, int dst_height, + FilterMode f, int benchmark_iterations) { + const int b = 128; + int src_argb_plane_size = (Abs(src_width) + b * 2) * + (Abs(src_height) + b * 2) * 4; + int src_stride_argb = (b * 2 + Abs(src_width)) * 4; + + align_buffer_64(src_argb, src_argb_plane_size) + memset(src_argb, 1, src_argb_plane_size); + + int dst_argb_plane_size = (dst_width + b * 2) * (dst_height + b * 2) * 4; + int dst_stride_argb = (b * 2 + dst_width) * 4; + + srandom(time(NULL)); + + int i, j; + for (i = b; i < (Abs(src_height) + b); ++i) { + for (j = b; j < (Abs(src_width) + b) * 4; ++j) { + src_argb[(i * src_stride_argb) + j] = (random() & 0xff); + } + } + + align_buffer_64(dst_argb_c, dst_argb_plane_size) + align_buffer_64(dst_argb_opt, dst_argb_plane_size) + memset(dst_argb_c, 2, dst_argb_plane_size); + memset(dst_argb_opt, 3, dst_argb_plane_size); + + // Do full image, no clipping. + double c_time = get_time(); + ARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb, + src_width, src_height, + dst_argb_c + (dst_stride_argb * b) + b * 4, dst_stride_argb, + dst_width, dst_height, f); + c_time = (get_time() - c_time); + + // Do tiled image, clipping scale to a tile at a time. + double opt_time = get_time(); + for (i = 0; i < benchmark_iterations; ++i) { + TileARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb, + src_width, src_height, + dst_argb_opt + (dst_stride_argb * b) + b * 4, dst_stride_argb, + dst_width, dst_height, f); + } + opt_time = (get_time() - opt_time) / benchmark_iterations; + + // Report performance of Full vs Tiled. + printf("filter %d - %8d us Full - %8d us Tiled\n", + f, static_cast(c_time * 1e6), static_cast(opt_time * 1e6)); + + // Compare full scaled image vs tiled image. + int max_diff = 0; + for (i = b; i < (dst_height + b); ++i) { + for (j = b * 4; j < (dst_width + b) * 4; ++j) { + int abs_diff = abs(dst_argb_c[(i * dst_stride_argb) + j] - + dst_argb_opt[(i * dst_stride_argb) + j]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + } + + free_aligned_buffer_64(dst_argb_c) + free_aligned_buffer_64(dst_argb_opt) + free_aligned_buffer_64(src_argb) + return max_diff; +} + +TEST_F(libyuvTest, ARGBScaleClipDownBy1_None) { + const int src_width = benchmark_width_; + const int src_height = benchmark_height_; + const int dst_width = Abs(src_width); + const int dst_height = Abs(src_height); + + int max_diff = ARGBClipTestFilter(src_width, src_height, + dst_width, dst_height, + kFilterNone, + benchmark_iterations_); + EXPECT_EQ(0, max_diff); +} + +TEST_F(libyuvTest, ARGBScaleClipDownBy1_Bilinear) { + const int src_width = benchmark_width_; + const int src_height = benchmark_height_; + const int dst_width = Abs(src_width); + const int dst_height = Abs(src_height); + + int max_diff = ARGBClipTestFilter(src_width, src_height, + dst_width, dst_height, + kFilterBilinear, + benchmark_iterations_); + EXPECT_EQ(0, max_diff); +} + +TEST_F(libyuvTest, ARGBScaleClipDownBy2_None) { + const int src_width = benchmark_width_; + const int src_height = benchmark_height_; + const int dst_width = Abs(src_width) / 2; + const int dst_height = Abs(src_height) / 2; + + int max_diff = ARGBClipTestFilter(src_width, src_height, + dst_width, dst_height, + kFilterNone, + benchmark_iterations_); + EXPECT_EQ(0, max_diff); +} + +TEST_F(libyuvTest, ARGBScaleClipDownBy2_Bilinear) { + const int src_width = benchmark_width_; + const int src_height = benchmark_height_; + const int dst_width = Abs(src_width) / 2; + const int dst_height = Abs(src_height) / 2; + + int max_diff = ARGBClipTestFilter(src_width, src_height, + dst_width, dst_height, + kFilterBilinear, + benchmark_iterations_); + EXPECT_EQ(0, max_diff); +} + +TEST_F(libyuvTest, ARGBScaleClipDownBy4_None) { + const int src_width = benchmark_width_; + const int src_height = benchmark_height_; + const int dst_width = Abs(src_width) / 4; + const int dst_height = Abs(src_height) / 4; + + int max_diff = ARGBClipTestFilter(src_width, src_height, + dst_width, dst_height, + kFilterNone, + benchmark_iterations_); + EXPECT_EQ(0, max_diff); +} + +TEST_F(libyuvTest, ARGBScaleClipDownBy4_Bilinear) { + const int src_width = benchmark_width_; + const int src_height = benchmark_height_; + const int dst_width = Abs(src_width) / 4; + const int dst_height = Abs(src_height) / 4; + + int max_diff = ARGBClipTestFilter(src_width, src_height, + dst_width, dst_height, + kFilterBilinear, + benchmark_iterations_); + EXPECT_EQ(0, max_diff); +} + +TEST_F(libyuvTest, ARGBScaleClipDownBy5_None) { + const int src_width = benchmark_width_; + const int src_height = benchmark_height_; + const int dst_width = Abs(src_width) / 5; + const int dst_height = Abs(src_height) / 5; + + int max_diff = ARGBClipTestFilter(src_width, src_height, + dst_width, dst_height, + kFilterNone, + benchmark_iterations_); + EXPECT_EQ(0, max_diff); +} + +TEST_F(libyuvTest, ARGBScaleClipDownBy5_Bilinear) { + const int src_width = benchmark_width_; + const int src_height = benchmark_height_; + const int dst_width = Abs(src_width) / 5; + const int dst_height = Abs(src_height) / 5; + + int max_diff = ARGBClipTestFilter(src_width, src_height, + dst_width, dst_height, + kFilterBilinear, + benchmark_iterations_); + EXPECT_EQ(0, max_diff); +} + +TEST_F(libyuvTest, ARGBScaleClipDownBy8_None) { + const int src_width = benchmark_width_; + const int src_height = benchmark_height_; + const int dst_width = Abs(src_width) / 8; + const int dst_height = Abs(src_height) / 8; + + int max_diff = ARGBClipTestFilter(src_width, src_height, + dst_width, dst_height, + kFilterNone, + benchmark_iterations_); + EXPECT_EQ(0, max_diff); +} + +TEST_F(libyuvTest, ARGBScaleClipDownBy8_Bilinear) { + const int src_width = benchmark_width_; + const int src_height = benchmark_height_; + const int dst_width = Abs(src_width) / 8; + const int dst_height = Abs(src_height) / 8; + + int max_diff = ARGBClipTestFilter(src_width, src_height, + dst_width, dst_height, + kFilterBilinear, + benchmark_iterations_); + EXPECT_EQ(0, max_diff); +} + +TEST_F(libyuvTest, ARGBScaleClipDownBy16_None) { + const int src_width = benchmark_width_; + const int src_height = benchmark_height_; + const int dst_width = Abs(src_width) / 16; + const int dst_height = Abs(src_height) / 16; + + int max_diff = ARGBClipTestFilter(src_width, src_height, + dst_width, dst_height, + kFilterNone, + benchmark_iterations_); + EXPECT_EQ(0, max_diff); +} + +TEST_F(libyuvTest, ARGBScaleClipDownBy16_Bilinear) { + const int src_width = benchmark_width_; + const int src_height = benchmark_height_; + const int dst_width = Abs(src_width) / 16; + const int dst_height = Abs(src_height) / 16; + + int max_diff = ARGBClipTestFilter(src_width, src_height, + dst_width, dst_height, + kFilterBilinear, + benchmark_iterations_); + EXPECT_EQ(0, max_diff); +} + +TEST_F(libyuvTest, ARGBScaleClipDownBy34_None) { + const int src_width = benchmark_width_; + const int src_height = benchmark_height_; + const int dst_width = Abs(src_width) * 3 / 4; + const int dst_height = Abs(src_height) * 3 / 4; + + int max_diff = ARGBClipTestFilter(src_width, src_height, + dst_width, dst_height, + kFilterNone, + benchmark_iterations_); + EXPECT_EQ(0, max_diff); +} + +TEST_F(libyuvTest, ARGBScaleClipDownBy34_Bilinear) { + const int src_width = benchmark_width_; + const int src_height = benchmark_height_; + const int dst_width = Abs(src_width) * 3 / 4; + const int dst_height = Abs(src_height) * 3 / 4; + + int max_diff = ARGBClipTestFilter(src_width, src_height, + dst_width, dst_height, + kFilterBilinear, + benchmark_iterations_); + EXPECT_EQ(0, max_diff); +} + +TEST_F(libyuvTest, ARGBScaleClipDownBy38_None) { + int src_width = benchmark_width_; + int src_height = benchmark_height_; + int dst_width = Abs(src_width) * 3 / 8; + int dst_height = Abs(src_height) * 3 / 8; + + int max_diff = ARGBClipTestFilter(src_width, src_height, + dst_width, dst_height, + kFilterNone, + benchmark_iterations_); + EXPECT_EQ(0, max_diff); +} + +TEST_F(libyuvTest, ARGBScaleClipDownBy38_Bilinear) { + int src_width = benchmark_width_; + int src_height = benchmark_height_; + int dst_width = Abs(src_width) * 3 / 8; + int dst_height = Abs(src_height) * 3 / 8; + + int max_diff = ARGBClipTestFilter(src_width, src_height, + dst_width, dst_height, + kFilterBilinear, + benchmark_iterations_); + EXPECT_EQ(0, max_diff); +} + +TEST_F(libyuvTest, ARGBScaleClipTo1366x768_None) { + int src_width = benchmark_width_; + int src_height = benchmark_height_; + int dst_width = 1366; + int dst_height = 768; + + int max_diff = ARGBClipTestFilter(src_width, src_height, + dst_width, dst_height, + kFilterNone, + benchmark_iterations_); + EXPECT_EQ(0, max_diff); +} + +TEST_F(libyuvTest, ARGBScaleClipTo1366x768_Bilinear) { + int src_width = benchmark_width_; + int src_height = benchmark_height_; + int dst_width = 1366; + int dst_height = 768; + + int max_diff = ARGBClipTestFilter(src_width, src_height, + dst_width, dst_height, + kFilterBilinear, + benchmark_iterations_); + EXPECT_EQ(0, max_diff); +} + + +TEST_F(libyuvTest, ARGBScaleClipTo1280x720_None) { + int src_width = benchmark_width_; + int src_height = benchmark_height_; + int dst_width = 1280; + int dst_height = 720; + + int max_diff = ARGBClipTestFilter(src_width, src_height, + dst_width, dst_height, + kFilterNone, + benchmark_iterations_); + EXPECT_EQ(0, max_diff); +} + +TEST_F(libyuvTest, ARGBScaleClipTo1280x720_Bilinear) { + int src_width = benchmark_width_; + int src_height = benchmark_height_; + int dst_width = 1280; + int dst_height = 720; + + int max_diff = ARGBClipTestFilter(src_width, src_height, + dst_width, dst_height, + kFilterBilinear, + benchmark_iterations_); + EXPECT_EQ(0, max_diff); +} + +TEST_F(libyuvTest, ARGBScaleClipTo853x480_None) { + int src_width = benchmark_width_; + int src_height = benchmark_height_; + int dst_width = 853; + int dst_height = 480; + + int max_diff = ARGBClipTestFilter(src_width, src_height, + dst_width, dst_height, + kFilterNone, + benchmark_iterations_); + EXPECT_EQ(0, max_diff); +} + +TEST_F(libyuvTest, ARGBScaleClipTo853x480_Bilinear) { + int src_width = benchmark_width_; + int src_height = benchmark_height_; + int dst_width = 853; + int dst_height = 480; + + int max_diff = ARGBClipTestFilter(src_width, src_height, + dst_width, dst_height, + kFilterBilinear, + benchmark_iterations_); + EXPECT_EQ(0, max_diff); +} + +TEST_F(libyuvTest, ARGBScaleClipFrom640x360_None) { + int src_width = 640; + int src_height = 360; + int dst_width = Abs(benchmark_width_); + int dst_height = Abs(benchmark_height_); + + int max_diff = ARGBClipTestFilter(src_width, src_height, + dst_width, dst_height, + kFilterNone, + benchmark_iterations_); + EXPECT_EQ(0, max_diff); +} + +TEST_F(libyuvTest, ARGBScaleClipFrom640x360_Bilinear) { + int src_width = 640; + int src_height = 360; + int dst_width = Abs(benchmark_width_); + int dst_height = Abs(benchmark_height_); + + int max_diff = ARGBClipTestFilter(src_width, src_height, + dst_width, dst_height, + kFilterBilinear, + benchmark_iterations_); + EXPECT_EQ(0, max_diff); +} + } // namespace libyuv diff --git a/util/convert.cc b/util/convert.cc index 599ca6267..6f1b1506c 100644 --- a/util/convert.cc +++ b/util/convert.cc @@ -146,6 +146,37 @@ void ParseOptions(int argc, const char* argv[]) { } } +static const int kTileX = 12; +static const int kTileY = 8; + +static int TileARGBScale(const uint8* src_argb, int src_stride_argb, + int src_width, int src_height, + uint8* dst_argb, int dst_stride_argb, + int dst_width, int dst_height, + libyuv::FilterMode filtering) { + for (int y = 0; y < dst_height; y += kTileY) { + for (int x = 0; x < dst_width; x += kTileX) { + int clip_width = kTileX; + if (x + clip_width > dst_width) { + clip_width = dst_width - x; + } + int clip_height = kTileY; + if (y + clip_height > dst_height) { + clip_height = dst_height - y; + } + int r = libyuv::ARGBScaleClip(src_argb, src_stride_argb, + src_width, src_height, + dst_argb, dst_stride_argb, + dst_width, dst_height, + x, y, clip_width, clip_height, filtering); + if (r) { + return r; + } + } + } + return 0; +} + int main(int argc, const char* argv[]) { ParseOptions(argc, argv); @@ -217,11 +248,11 @@ int main(int argc, const char* argv[]) { break; for (int cur_rec = 0; cur_rec < num_rec; ++cur_rec) { - libyuv::ARGBScale(ch_org, Abs(image_width) * 4, - image_width, image_height, - ch_dst, dst_width * 4, - dst_width, dst_height, - static_cast(filter)); + TileARGBScale(ch_org, Abs(image_width) * 4, + image_width, image_height, + ch_dst, dst_width * 4, + dst_width, dst_height, + static_cast(filter)); // Output scaled ARGB. if (strstr(argv[fileindex_rec + cur_rec], "_ARGB.")) {