From 461767d6d5c7ff2823db1b75daa8326afab54f86 Mon Sep 17 00:00:00 2001 From: "fbarchard@google.com" Date: Mon, 15 Apr 2013 18:32:45 +0000 Subject: [PATCH] Bilinear upsample BUG=208 TEST=out\release\libyuv_unittest --gtest_filter=*ARGBScale*640* Review URL: https://webrtc-codereview.appspot.com/1303006 git-svn-id: http://libyuv.googlecode.com/svn/trunk@664 16f28f9a-4ce2-e073-06de-1de4eb20be90 --- README.chromium | 3 +- include/libyuv/row.h | 11 +-- include/libyuv/version.h | 2 +- source/scale_argb.cc | 187 ++++++++++++++++++++++++++++++----- unit_test/scale_argb_test.cc | 74 +++----------- util/convert.cc | 46 ++++++--- 6 files changed, 210 insertions(+), 113 deletions(-) diff --git a/README.chromium b/README.chromium index d6cf03036..84905cb55 100644 --- a/README.chromium +++ b/README.chromium @@ -1,7 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 663 -License: BSD +Version: 664 License File: LICENSE Description: diff --git a/include/libyuv/row.h b/include/libyuv/row.h index f95b87220..2c17a3585 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -19,12 +19,11 @@ extern "C" { #endif // TODO(fbarchard): Remove kMaxStride. -// Functions should allocate a single row buffer of this size on the stack. -// Functions that allocate more than one row buffer may fail or cause stack -// probe. -// This size is a retina Mac pixels of 32 bit ARGB. -// Functions may want less for 8 or 16 bit row buffers. -#define kMaxStride (2880 * 4) +#ifdef __arm__ +#define kMaxStride (1920 * 4) +#else +#define kMaxStride (4096 * 4) +#endif #define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a) - 1))) #if defined(__CLR_VER) || defined(COVERAGE_ENABLED) || \ diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 73c363dfb..a932dd3a2 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 663 +#define LIBYUV_VERSION 664 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/scale_argb.cc b/source/scale_argb.cc index fc580599b..31d966ccb 100644 --- a/source/scale_argb.cc +++ b/source/scale_argb.cc @@ -24,7 +24,6 @@ extern "C" { #endif // ARGB scaling uses bilinear or point, but not box filter. - #if !defined(LIBYUV_DISABLE_NEON) && \ (defined(__ARM_NEON__) || defined(LIBYUV_NEON)) #define HAS_SCALEARGBROWDOWNEVEN_NEON @@ -1297,28 +1296,45 @@ static void ScaleARGBDownEven(int src_width, int src_height, // ScaleARGB ARGB to/from any dimensions, with bilinear // interpolation. - -static void ScaleARGBBilinear(int src_width, int src_height, - int dst_width, int dst_height, - int src_stride, int dst_stride, - const uint8* src_argb, uint8* dst_argb) { +static void ScaleARGBBilinearDown(int src_width, int src_height, + int dst_width, int dst_height, + int src_stride, int dst_stride, + const uint8* src_argb, uint8* dst_argb) { assert(dst_width > 0); assert(dst_height > 0); assert(src_width * 4 <= kMaxStride); SIMD_ALIGNED(uint8 row[kMaxStride + 16]); void (*ScaleARGBFilterRows)(uint8* dst_argb, const uint8* src_argb, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = - ScaleARGBFilterRows_C; -#if defined(HAS_SCALEARGBFILTERROWS_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(src_width, 4) && - IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16)) { - ScaleARGBFilterRows = ScaleARGBFilterRows_SSE2; + ARGBInterpolateRow_C; +#if defined(HAS_ARGBINTERPOLATEROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2) && src_width >= 4) { + ScaleARGBFilterRows = ARGBInterpolateRow_Any_SSE2; + if (IS_ALIGNED(src_width, 4)) { + ScaleARGBFilterRows = ARGBInterpolateRow_Unaligned_SSE2; + if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16)) { + ScaleARGBFilterRows = ARGBInterpolateRow_SSE2; + } + } } #endif -#if defined(HAS_SCALEARGBFILTERROWS_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(src_width, 4) && - IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16)) { - ScaleARGBFilterRows = ScaleARGBFilterRows_SSSE3; +#if defined(HAS_ARGBINTERPOLATEROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3) && src_width >= 4) { + ScaleARGBFilterRows = ARGBInterpolateRow_Any_SSSE3; + if (IS_ALIGNED(src_width, 4)) { + ScaleARGBFilterRows = ARGBInterpolateRow_Unaligned_SSSE3; + if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16)) { + ScaleARGBFilterRows = ARGBInterpolateRow_SSSE3; + } + } + } +#endif +#if defined(HAS_ARGBINTERPOLATEROW_NEON) + if (TestCpuFlag(kCpuHasNEON) && src_width >= 4) { + ScaleARGBFilterRows = ARGBInterpolateRow_Any_NEON; + if (IS_ALIGNED(src_width, 4)) { + ScaleARGBFilterRows = ARGBInterpolateRow_NEON; + } } #endif void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb, @@ -1328,15 +1344,22 @@ static void ScaleARGBBilinear(int src_width, int src_height, ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3; } #endif -#if defined(HAS_SCALEARGBFILTERROWS_NEON) - if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(src_width, 4)) { - ScaleARGBFilterRows = ScaleARGBFilterRows_NEON; + int dx = 0; + int dy = 0; + int x = 0; + int y = 0; + if (dst_width <= src_width) { + dx = (src_width << 16) / dst_width; + x = (dx >> 1) - 32768; + } else if (dst_width > 1) { + dx = ((src_width - 1) << 16) / (dst_width - 1); + } + if (dst_height <= src_height) { + dy = (src_height << 16) / dst_height; + y = (dy >> 1) - 32768; + } else if (dst_height > 1) { + dy = ((src_height - 1) << 16) / (dst_height - 1); } -#endif - int dx = (src_width << 16) / dst_width; - int dy = (src_height << 16) / dst_height; - int x = (dx >= 65536) ? ((dx >> 1) - 32768) : (dx >> 1); - int y = (dy >= 65536) ? ((dy >> 1) - 32768) : (dy >> 1); int maxy = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0; for (int j = 0; j < dst_height; ++j) { if (y > maxy) { @@ -1352,6 +1375,112 @@ static void ScaleARGBBilinear(int src_width, int src_height, } } +// ScaleARGB ARGB to/from any dimensions, with bilinear +// interpolation. +static void ScaleARGBBilinearUp(int src_width, int src_height, + int dst_width, int dst_height, + int src_stride, int dst_stride, + const uint8* src_argb, uint8* dst_argb) { + assert(dst_width > 0); + assert(dst_height > 0); + assert(dst_width * 4 <= kMaxStride); + void (*ScaleARGBFilterRows)(uint8* dst_argb, const uint8* src_argb, + ptrdiff_t src_stride, int dst_width, int source_y_fraction) = + ARGBInterpolateRow_C; +#if defined(HAS_ARGBINTERPOLATEROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 4) { + ScaleARGBFilterRows = ARGBInterpolateRow_Any_SSE2; + if (IS_ALIGNED(dst_width, 4)) { + ScaleARGBFilterRows = ARGBInterpolateRow_Unaligned_SSE2; + if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) { + ScaleARGBFilterRows = ARGBInterpolateRow_SSE2; + } + } + } +#endif +#if defined(HAS_ARGBINTERPOLATEROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 4) { + ScaleARGBFilterRows = ARGBInterpolateRow_Any_SSSE3; + if (IS_ALIGNED(dst_width, 4)) { + ScaleARGBFilterRows = ARGBInterpolateRow_Unaligned_SSSE3; + if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) { + ScaleARGBFilterRows = ARGBInterpolateRow_SSSE3; + } + } + } +#endif +#if defined(HAS_ARGBINTERPOLATEROW_NEON) + if (TestCpuFlag(kCpuHasNEON) && dst_width >= 4) { + ScaleARGBFilterRows = ARGBInterpolateRow_Any_NEON; + if (IS_ALIGNED(dst_width, 4)) { + ScaleARGBFilterRows = ARGBInterpolateRow_NEON; + } + } +#endif + void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb, + int dst_width, int x, int dx) = ScaleARGBFilterCols_C; +#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3; + } +#endif + int dx = 0; + int dy = 0; + int x = 0; + int y = 0; + if (dst_width <= src_width) { + dx = (src_width << 16) / dst_width; + x = (dx >> 1) - 32768; + } else if (dst_width > 1) { + dx = ((src_width - 1) << 16) / (dst_width - 1); + } + if (dst_height <= src_height) { + dy = (src_height << 16) / dst_height; + y = (dy >> 1) - 32768; + } else if (dst_height > 1) { + dy = ((src_height - 1) << 16) / (dst_height - 1); + } + int maxy = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0; + if (y > maxy) { + y = maxy; + } + int yi = y >> 16; + int yf = (y >> 8) & 255; + const uint8* src = src_argb + yi * src_stride; + SIMD_ALIGNED(uint8 row[2 * kMaxStride]); + uint8* rowptr = row; + int rowstride = kMaxStride; + int lasty = 0; + + ScaleARGBFilterCols(rowptr, src, dst_width, x, dx); + if (src_height > 1) { + src += src_stride; + } + ScaleARGBFilterCols(rowptr + rowstride, src, dst_width, x, dx); + src += src_stride; + + for (int j = 0; j < dst_height; ++j) { + yi = y >> 16; + yf = (y >> 8) & 255; + if (yi != lasty) { + if (y <= maxy) { + y = maxy; + yi = y >> 16; + yf = (y >> 8) & 255; + } else { + ScaleARGBFilterCols(rowptr, src, dst_width, x, dx); + rowptr += rowstride; + rowstride = -rowstride; + lasty = yi; + src += src_stride; + } + } + ScaleARGBFilterRows(dst_argb, rowptr, rowstride, dst_width, yf); + dst_argb += dst_stride; + y += dy; + } +} + // Scales a single row of pixels using point sampling. // Code is adapted from libyuv bilinear yuv scaling, but with bilinear // interpolation off, and argb pixels instead of yuv. @@ -1406,12 +1535,18 @@ static void ScaleARGBAnySize(int src_width, int src_height, int src_stride, int dst_stride, const uint8* src_argb, uint8* dst_argb, FilterMode filtering) { - if (!filtering || (src_width * 4 > kMaxStride)) { + if (!filtering || + (src_width * 4 > kMaxStride && dst_width * 4 > kMaxStride)) { ScaleARGBSimple(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src_argb, dst_argb); + return; + } + if (dst_height <= src_height || dst_width * 4 > kMaxStride) { + ScaleARGBBilinearDown(src_width, src_height, dst_width, dst_height, + src_stride, dst_stride, src_argb, dst_argb); } else { - ScaleARGBBilinear(src_width, src_height, dst_width, dst_height, - src_stride, dst_stride, src_argb, dst_argb); + ScaleARGBBilinearUp(src_width, src_height, dst_width, dst_height, + src_stride, dst_stride, src_argb, dst_argb); } } diff --git a/unit_test/scale_argb_test.cc b/unit_test/scale_argb_test.cc index 224985b70..21b812be1 100644 --- a/unit_test/scale_argb_test.cc +++ b/unit_test/scale_argb_test.cc @@ -123,20 +123,7 @@ TEST_F(libyuvTest, ARGBScaleDownBy2_Bilinear) { dst_width, dst_height, kFilterBilinear, benchmark_iterations_); - EXPECT_LE(max_diff, 1); -} - -TEST_F(libyuvTest, ARGBScaleDownBy2_Box) { - const int src_width = benchmark_width_; - const int src_height = benchmark_height_; - const int dst_width = src_width / 2; - const int dst_height = src_height / 2; - - int max_diff = ARGBTestFilter(src_width, src_height, - dst_width, dst_height, - kFilterBox, - benchmark_iterations_); - EXPECT_LE(max_diff, 1); + EXPECT_LE(max_diff, 2); } TEST_F(libyuvTest, ARGBScaleDownBy4_None) { @@ -162,20 +149,7 @@ TEST_F(libyuvTest, ARGBScaleDownBy4_Bilinear) { dst_width, dst_height, kFilterBilinear, benchmark_iterations_); - EXPECT_LE(max_diff, 1); -} - -TEST_F(libyuvTest, ARGBScaleDownBy4_Box) { - const int src_width = benchmark_width_; - const int src_height = benchmark_height_; - const int dst_width = src_width / 4; - const int dst_height = src_height / 4; - - int max_diff = ARGBTestFilter(src_width, src_height, - dst_width, dst_height, - kFilterBox, - benchmark_iterations_); - EXPECT_LE(max_diff, 1); + EXPECT_LE(max_diff, 2); } TEST_F(libyuvTest, ARGBScaleDownBy5_None) { @@ -201,20 +175,7 @@ TEST_F(libyuvTest, ARGBScaleDownBy5_Bilinear) { dst_width, dst_height, kFilterBilinear, benchmark_iterations_); - EXPECT_LE(max_diff, 1); -} - -TEST_F(libyuvTest, ARGBScaleDownBy5_Box) { - const int src_width = benchmark_width_; - const int src_height = benchmark_height_; - const int dst_width = src_width / 5; - const int dst_height = src_height / 5; - - int max_diff = ARGBTestFilter(src_width, src_height, - dst_width, dst_height, - kFilterBox, - benchmark_iterations_); - EXPECT_LE(max_diff, 1); + EXPECT_LE(max_diff, 2); } TEST_F(libyuvTest, ARGBScaleDownBy8_None) { @@ -240,20 +201,7 @@ TEST_F(libyuvTest, ARGBScaleDownBy8_Bilinear) { dst_width, dst_height, kFilterBilinear, benchmark_iterations_); - EXPECT_LE(max_diff, 1); -} - -TEST_F(libyuvTest, ARGBScaleDownBy8_Box) { - const int src_width = benchmark_width_; - const int src_height = benchmark_height_; - const int dst_width = src_width / 8; - const int dst_height = src_height / 8; - - int max_diff = ARGBTestFilter(src_width, src_height, - dst_width, dst_height, - kFilterBox, - benchmark_iterations_); - EXPECT_LE(max_diff, 1); + EXPECT_LE(max_diff, 2); } TEST_F(libyuvTest, ARGBScaleDownBy16_None) { @@ -279,7 +227,7 @@ TEST_F(libyuvTest, ARGBScaleDownBy16_Bilinear) { dst_width, dst_height, kFilterBilinear, benchmark_iterations_); - EXPECT_LE(max_diff, 1); + EXPECT_LE(max_diff, 2); } TEST_F(libyuvTest, ARGBScaleDownBy34_None) { @@ -305,7 +253,7 @@ TEST_F(libyuvTest, ARGBScaleDownBy34_Bilinear) { dst_width, dst_height, kFilterBilinear, benchmark_iterations_); - EXPECT_LE(max_diff, 1); + EXPECT_LE(max_diff, 2); } TEST_F(libyuvTest, ARGBScaleDownBy38_None) { @@ -331,7 +279,7 @@ TEST_F(libyuvTest, ARGBScaleDownBy38_Bilinear) { dst_width, dst_height, kFilterBilinear, benchmark_iterations_); - EXPECT_LE(max_diff, 1); + EXPECT_LE(max_diff, 2); } TEST_F(libyuvTest, ARGBScaleTo1366x768_None) { @@ -357,7 +305,7 @@ TEST_F(libyuvTest, ARGBScaleTo1366x768_Bilinear) { dst_width, dst_height, kFilterBilinear, benchmark_iterations_); - EXPECT_LE(max_diff, 1); + EXPECT_LE(max_diff, 2); } @@ -384,7 +332,7 @@ TEST_F(libyuvTest, ARGBScaleTo1280x720_Bilinear) { dst_width, dst_height, kFilterBilinear, benchmark_iterations_); - EXPECT_LE(max_diff, 1); + EXPECT_LE(max_diff, 2); } TEST_F(libyuvTest, ARGBScaleTo853x480_None) { @@ -410,7 +358,7 @@ TEST_F(libyuvTest, ARGBScaleTo853x480_Bilinear) { dst_width, dst_height, kFilterBilinear, benchmark_iterations_); - EXPECT_LE(max_diff, 1); + EXPECT_LE(max_diff, 2); } TEST_F(libyuvTest, ARGBScaleFrom640x360_None) { @@ -436,7 +384,7 @@ TEST_F(libyuvTest, ARGBScaleFrom640x360_Bilinear) { dst_width, dst_height, kFilterBilinear, benchmark_iterations_); - EXPECT_LE(max_diff, 1); + EXPECT_LE(max_diff, 2); } } // namespace libyuv diff --git a/util/convert.cc b/util/convert.cc index 5c9077696..f90645df9 100644 --- a/util/convert.cc +++ b/util/convert.cc @@ -32,6 +32,7 @@ int fileindex_rec = 0; // argv argument contains the reconstructed file name. int num_rec = 0; // Number of reconstructed images. int num_skip_org = 0; // Number of frames to skip in original. int num_frames = 0; // Number of frames to convert. +int filter = 1; // Bilinear filter for scaling. // Parse PYUV format. ie name.1920x800_24Hz_P420.yuv bool ExtractResolutionFromFilename(const char* name, @@ -58,6 +59,7 @@ void PrintHelp(const char * program) { " resolution (ie. " "name.1920x800_24Hz_P420.yuv)\n"); printf(" -d .... specify destination resolution.\n"); + printf(" -f ............ 0 = point, 1 = bilinear (default).\n"); printf(" -skip ....... Number of frame to skip of src_argb\n"); printf(" -frames .......... Number of frames to convert\n"); printf(" -v ..................... verbose\n"); @@ -82,6 +84,8 @@ void ParseOptions(int argc, const char* argv[]) { num_skip_org = atoi(argv[++c]); // NOLINT } else if (!strcmp(argv[c], "-frames") && c + 1 < argc) { num_frames = atoi(argv[++c]); // NOLINT + } else if (!strcmp(argv[c], "-f") && c + 1 < argc) { + filter = atoi(argv[++c]); // NOLINT } else if (argv[c][0] == '-') { fprintf(stderr, "Unknown option. %s\n", argv[c]); } else if (fileindex_org == 0) { @@ -202,8 +206,9 @@ int main(int argc, const char* argv[]) { if (num_frames && number_of_frames >= num_frames) break; - size_t bytes_org = fread(ch_org, sizeof(uint8), org_size, file_org); - if (bytes_org < total_size) + size_t bytes_org = fread(ch_org, sizeof(uint8), + static_cast(org_size), file_org); + if (bytes_org < static_cast(org_size)) break; for (int cur_rec = 0; cur_rec < num_rec; ++cur_rec) { @@ -211,20 +216,31 @@ int main(int argc, const char* argv[]) { image_width, image_height, ch_dst, dst_width * 4, dst_width, dst_height, - libyuv::kFilterBilinear); + static_cast(filter)); - int half_width = (dst_width + 1) / 2; - int half_height = (dst_height + 1) / 2; - libyuv::ARGBToI420(ch_dst, dst_width * 4, - ch_rec, dst_width, - ch_rec + dst_width * dst_height, half_width, - ch_rec + dst_width * dst_height + - half_width * half_height, half_width, - dst_width, dst_height); - size_t bytes_rec = fwrite(ch_rec, sizeof(uint8), - total_size, file_rec[cur_rec]); - if (bytes_rec < total_size) - break; + // Output scaled ARGB. + if (strstr(argv[fileindex_rec + cur_rec], "_ARGB.")) { + size_t bytes_rec = fwrite(ch_dst, sizeof(uint8), + static_cast(dst_size), + file_rec[cur_rec]); + if (bytes_rec < static_cast(dst_size)) + break; + } else { + // Output YUV. + int half_width = (dst_width + 1) / 2; + int half_height = (dst_height + 1) / 2; + libyuv::ARGBToI420(ch_dst, dst_width * 4, + ch_rec, dst_width, + ch_rec + dst_width * dst_height, half_width, + ch_rec + dst_width * dst_height + + half_width * half_height, half_width, + dst_width, dst_height); + size_t bytes_rec = fwrite(ch_rec, sizeof(uint8), + static_cast(total_size), + file_rec[cur_rec]); + if (bytes_rec < static_cast(total_size)) + break; + } if (verbose) { printf("%5d", number_of_frames);