From 0e9b515f5348a453cd97578b4668b3c330973298 Mon Sep 17 00:00:00 2001 From: "frkoenig@google.com" Date: Sat, 29 Oct 2011 00:26:17 +0000 Subject: [PATCH] Neon 38 downscaler. Fixed up unit tests for filters to use same image generation and comparison code. Added timing information output from doing scale. Review URL: http://webrtc-codereview.appspot.com/244016 git-svn-id: http://libyuv.googlecode.com/svn/trunk@48 16f28f9a-4ce2-e073-06de-1de4eb20be90 --- source/scale.cc | 250 +++++++++++++++++++++++++++++ unit_test/scale_test.cc | 339 ++++++++++++++++++---------------------- 2 files changed, 402 insertions(+), 187 deletions(-) diff --git a/source/scale.cc b/source/scale.cc index 6be7f4ed8..15544a2a7 100644 --- a/source/scale.cc +++ b/source/scale.cc @@ -286,6 +286,244 @@ static void ScaleRowDown34_1_Int_NEON(const uint8* src_ptr, int src_stride, ); } +#define HAS_SCALEROWDOWN38_NEON +const uint8 shuf38[16] __attribute__ ((aligned(16))) = + { 0, 3, 6, 8, 11, 14, 16, 19, 22, 24, 27, 30, 0, 0, 0, 0 }; +const uint8 shuf38_2[16] __attribute__ ((aligned(16))) = + { 0, 8, 16, 2, 10, 17, 4, 12, 18, 6, 14, 19, 0, 0, 0, 0 }; +const unsigned short mult38_div6[8] __attribute__ ((aligned(16))) = + { 65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12, + 65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12 }; +const unsigned short mult38_div9[8] __attribute__ ((aligned(16))) = + { 65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18, + 65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18 }; + +// 32 -> 12 +static void ScaleRowDown38_NEON(const uint8* src_ptr, int, + uint8* dst_ptr, int dst_width) { + __asm__ volatile + ( + "vld1.u8 {q3}, [%3] \n" + "1: \n" + "vld1.u8 {d0, d1, d2, d3}, [%0]! \n" + "vtbl.u8 d4, {d0, d1, d2, d3}, d6 \n" + "vtbl.u8 d5, {d0, d1, d2, d3}, d7 \n" + "vst1.u8 {d4}, [%1]! \n" + "vst1.u32 {d5[0]}, [%1]! \n" + "subs %2, #12 \n" + "bhi 1b \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(dst_width) // %2 + : "r"(shuf38) // %3 + : "d0", "d1", "d2", "d3", "d4", "d5", "memory", "cc" + ); +} + +// 32x3 -> 12x1 +static void ScaleRowDown38_3_Int_NEON(const uint8* src_ptr, int src_stride, + uint8* dst_ptr, int dst_width) { + __asm__ volatile + ( + "vld1.u16 {q4}, [%4] \n" + "vld1.u8 {q5}, [%5] \n" + "vld1.u8 {q8}, [%6] \n" + "add r4, %0, %3, lsl #1 \n" + "add %3, %0 \n" + "1: \n" + + // d0 = 00 40 01 41 02 42 03 43 + // d1 = 10 50 11 51 12 52 13 53 + // d2 = 20 60 21 61 22 62 23 63 + // d3 = 30 70 31 71 32 72 33 73 + "vld4.u8 {d0, d1, d2, d3}, [%0]! \n" + "vld4.u8 {d4, d5, d6, d7}, [%3]! \n" + "vld4.u8 {d12, d13, d14, d15}, [r4]! \n" + + // Shuffle the input data around to get align the data + // so adjacent data can be added. 0,1 - 2,3 - 4,5 - 6,7 + // d0 = 00 10 01 11 02 12 03 13 + // d1 = 40 50 41 51 42 52 43 53 + "vtrn.u8 d0, d1 \n" + "vtrn.u8 d4, d5 \n" + "vtrn.u8 d12, d13 \n" + + // d2 = 20 30 21 31 22 32 23 33 + // d3 = 60 70 61 71 62 72 63 73 + "vtrn.u8 d2, d3 \n" + "vtrn.u8 d6, d7 \n" + "vtrn.u8 d14, d15 \n" + + // d0 = 00+10 01+11 02+12 03+13 + // d2 = 40+50 41+51 42+52 43+53 + "vpaddl.u8 q0, q0 \n" + "vpaddl.u8 q2, q2 \n" + "vpaddl.u8 q6, q6 \n" + + // d3 = 60+70 61+71 62+72 63+73 + "vpaddl.u8 d3, d3 \n" + "vpaddl.u8 d7, d7 \n" + "vpaddl.u8 d15, d15 \n" + + // combine source lines + "vadd.u16 q0, q2 \n" + "vadd.u16 q0, q6 \n" + "vadd.u16 d4, d3, d7 \n" + "vadd.u16 d4, d15 \n" + + // dst_ptr[3] = (s[6 + st * 0] + s[7 + st * 0] + // + s[6 + st * 1] + s[7 + st * 1] + // + s[6 + st * 2] + s[7 + st * 2]) / 6 + "vqrdmulh.s16 q2, q4 \n" + "vmovn.u16 d4, q2 \n" + + // Shuffle 2,3 reg around so that 2 can be added to the + // 0,1 reg and 3 can be added to the 4,5 reg. This + // requires expanding from u8 to u16 as the 0,1 and 4,5 + // registers are already expanded. Then do transposes + // to get aligned. + // q2 = xx 20 xx 30 xx 21 xx 31 xx 22 xx 32 xx 23 xx 33 + "vmovl.u8 q1, d2 \n" + "vmovl.u8 q3, d6 \n" + "vmovl.u8 q7, d14 \n" + + // combine source lines + "vadd.u16 q1, q3 \n" + "vadd.u16 q1, q7 \n" + + // d4 = xx 20 xx 30 xx 22 xx 32 + // d5 = xx 21 xx 31 xx 23 xx 33 + "vtrn.u32 d2, d3 \n" + + // d4 = xx 20 xx 21 xx 22 xx 23 + // d5 = xx 30 xx 31 xx 32 xx 33 + "vtrn.u16 d2, d3 \n" + + // 0+1+2, 3+4+5 + "vadd.u16 q0, q1 \n" + + // Need to divide, but can't downshift as the the value + // isn't a power of 2. So multiply by 65536 / n + // and take the upper 16 bits. + "vqrdmulh.s16 q0, q8 \n" + + // Align for table lookup, vtbl requires registers to + // be adjacent + "vmov.u8 d2, d4 \n" + + "vtbl.u8 d3, {d0, d1, d2}, d10 \n" + "vtbl.u8 d4, {d0, d1, d2}, d11 \n" + + "vst1.u8 {d3}, [%1]! \n" + "vst1.u32 {d4[0]}, [%1]! \n" + "subs %2, #12 \n" + "bhi 1b \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(dst_width), // %2 + "+r"(src_stride) // %3 + : "r"(mult38_div6), // %4 + "r"(shuf38_2), // %5 + "r"(mult38_div9) // %6 + : "r4", "q0", "q1", "q2", "q3", "q4", + "q5", "q6", "q7", "q8", "memory", "cc" + ); +} + +// 32x2 -> 12x1 +static void ScaleRowDown38_2_Int_NEON(const uint8* src_ptr, int src_stride, + uint8* dst_ptr, int dst_width) { + __asm__ volatile + ( + "vld1.u16 {q4}, [%4] \n" + "vld1.u8 {q5}, [%5] \n" + "add %3, %0 \n" + "1: \n" + + // d0 = 00 40 01 41 02 42 03 43 + // d1 = 10 50 11 51 12 52 13 53 + // d2 = 20 60 21 61 22 62 23 63 + // d3 = 30 70 31 71 32 72 33 73 + "vld4.u8 {d0, d1, d2, d3}, [%0]! \n" + "vld4.u8 {d4, d5, d6, d7}, [%3]! \n" + + // Shuffle the input data around to get align the data + // so adjacent data can be added. 0,1 - 2,3 - 4,5 - 6,7 + // d0 = 00 10 01 11 02 12 03 13 + // d1 = 40 50 41 51 42 52 43 53 + "vtrn.u8 d0, d1 \n" + "vtrn.u8 d4, d5 \n" + + // d2 = 20 30 21 31 22 32 23 33 + // d3 = 60 70 61 71 62 72 63 73 + "vtrn.u8 d2, d3 \n" + "vtrn.u8 d6, d7 \n" + + // d0 = 00+10 01+11 02+12 03+13 + // d2 = 40+50 41+51 42+52 43+53 + "vpaddl.u8 q0, q0 \n" + "vpaddl.u8 q2, q2 \n" + + // d3 = 60+70 61+71 62+72 63+73 + "vpaddl.u8 d3, d3 \n" + "vpaddl.u8 d7, d7 \n" + + // combine source lines + "vadd.u16 q0, q2 \n" + "vadd.u16 d4, d3, d7 \n" + + // dst_ptr[3] = (s[6] + s[7] + s[6+st] + s[7+st]) / 4 + "vqrshrn.u16 d4, q2, #2 \n" + + // Shuffle 2,3 reg around so that 2 can be added to the + // 0,1 reg and 3 can be added to the 4,5 reg. This + // requires expanding from u8 to u16 as the 0,1 and 4,5 + // registers are already expanded. Then do transposes + // to get aligned. + // q2 = xx 20 xx 30 xx 21 xx 31 xx 22 xx 32 xx 23 xx 33 + "vmovl.u8 q1, d2 \n" + "vmovl.u8 q3, d6 \n" + + // combine source lines + "vadd.u16 q1, q3 \n" + + // d4 = xx 20 xx 30 xx 22 xx 32 + // d5 = xx 21 xx 31 xx 23 xx 33 + "vtrn.u32 d2, d3 \n" + + // d4 = xx 20 xx 21 xx 22 xx 23 + // d5 = xx 30 xx 31 xx 32 xx 33 + "vtrn.u16 d2, d3 \n" + + // 0+1+2, 3+4+5 + "vadd.u16 q0, q1 \n" + + // Need to divide, but can't downshift as the the value + // isn't a power of 2. So multiply by 65536 / n + // and take the upper 16 bits. + "vqrdmulh.s16 q0, q4 \n" + + // Align for table lookup, vtbl requires registers to + // be adjacent + "vmov.u8 d2, d4 \n" + + "vtbl.u8 d3, {d0, d1, d2}, d10 \n" + "vtbl.u8 d4, {d0, d1, d2}, d11 \n" + + "vst1.u8 {d3}, [%1]! \n" + "vst1.u32 {d4[0]}, [%1]! \n" + "subs %2, #12 \n" + "bhi 1b \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(dst_width), // %2 + "+r"(src_stride) // %3 + : "r"(mult38_div6), // %4 + "r"(shuf38_2) // %5 + : "q0", "q1", "q2", "q3", "q4", "q5", "memory", "cc" + ); +} + /** * SSE2 downscalers with interpolation. * @@ -3064,6 +3302,18 @@ static void ScalePlaneDown38(int src_width, int src_height, uint8* dst_ptr, int dst_width); void (*ScaleRowDown38_2)(const uint8* src_ptr, int src_stride, uint8* dst_ptr, int dst_width); +#if defined(HAS_SCALEROWDOWN38_NEON) + if (libyuv::TestCpuFlag(libyuv::kCpuHasNEON) && + (dst_width % 24 == 0)) { + if (!filtering) { + ScaleRowDown38_3 = ScaleRowDown38_NEON; + ScaleRowDown38_2 = ScaleRowDown38_NEON; + } else { + ScaleRowDown38_3 = ScaleRowDown38_3_Int_NEON; + ScaleRowDown38_2 = ScaleRowDown38_2_Int_NEON; + } + } else +#endif #if defined(HAS_SCALEROWDOWN38_SSSE3) if (libyuv::TestCpuFlag(libyuv::kCpuHasSSSE3) && (dst_width % 24 == 0) && (src_stride % 16 == 0) && diff --git a/unit_test/scale_test.cc b/unit_test/scale_test.cc index 1d41668cf..44774a54f 100644 --- a/unit_test/scale_test.cc +++ b/unit_test/scale_test.cc @@ -27,142 +27,37 @@ using namespace libyuv; free(var##_mem); \ var = 0; -TEST_F(libyuvTest, ScaleDownBy4) { - int b = 128; - int src_width = 1280; - int src_height = 720; - int src_width_uv = (src_width + 1) >> 1; - int src_height_uv = (src_height + 1) >> 1; +#ifdef WIN32 - int src_y_plane_size = (src_width + (2 * b)) * (src_height + (2 * b)); - int src_uv_plane_size = (src_width_uv + (2 * b)) * (src_height_uv + (2 * b)); - - int src_stride_y = 2 * b + src_width; - int src_stride_uv = 2 * b + src_width_uv; - - align_buffer_16(src_y, src_y_plane_size) - align_buffer_16(src_u, src_uv_plane_size) - align_buffer_16(src_v, src_uv_plane_size) - - int dst_width = src_width >> 2; - int dst_height = src_height >> 2; - - int dst_width_uv = (dst_width + 1) >> 1; - int dst_height_uv = (dst_height + 1) >> 1; - - int dst_y_plane_size = (dst_width + (2 * b)) * (dst_height + (2 * b)); - int dst_uv_plane_size = (dst_width_uv + (2 * b)) * (dst_height_uv + (2 * b)); - - int dst_stride_y = 2 * b + dst_width; - int dst_stride_uv = 2 * b + dst_width_uv; - - align_buffer_16(dst_y, dst_y_plane_size) - align_buffer_16(dst_u, dst_uv_plane_size) - align_buffer_16(dst_v, dst_uv_plane_size) - - // create an image with random data reoccurring in 4x4 grid. When the image - // is filtered all the values should be the same. - srandom(time(NULL)); - - uint8 block_data[16]; - - int i, j; - - // Pulling 16 random numbers there is an infinitesimally small - // chance that they are all 0. Then the output will be all 0. - // Output buffer is filled with 0, want to make sure that after the - // filtering something went into the output buffer. - // Avoid this by setting one of the values to 128. Also set the - // random data to at least 1 for when point sampling to prevent - // output all being 0. - block_data[0] = 128; - - for (i = 1; i < 16; i++) - block_data[i] = (random() & 0xfe) + 1; - - for (i = b; i < (src_height + b); i += 4) { - for (j = b; j < (src_width + b); j += 4) { - uint8 *ptr = src_y + (i * src_stride_y) + j; - int k, l; - for (k = 0; k < 4; ++k) - for (l = 0; l < 4; ++l) - ptr[k + src_stride_y * l] = block_data[k + 4 * l]; - } - } - - for (i = 1; i < 16; i++) - block_data[i] = (random() & 0xfe) + 1; - - for (i = b; i < (src_height_uv + b); i += 4) { - for (j = b; j < (src_width_uv + b); j += 4) { - uint8 *ptru = src_u + (i * src_stride_uv) + j; - uint8 *ptrv = src_v + (i * src_stride_uv) + j; - int k, l; - for (k = 0; k < 4; ++k) - for (l = 0; l < 4; ++l) { - ptru[k + src_stride_uv * l] = block_data[k + 4 * l]; - ptrv[k + src_stride_uv * l] = block_data[k + 4 * l]; - } - } - } - - int f; - int err = 0; - - // currently three filter modes, defined as FilterMode in scale.h - for (f = 0; f < 3; ++f) { - I420Scale(src_y + (src_stride_y * b) + b, src_stride_y, - src_u + (src_stride_uv * b) + b, src_stride_uv, - src_v + (src_stride_uv * b) + b, src_stride_uv, - src_width, src_height, - dst_y + (dst_stride_y * b) + b, dst_stride_y, - dst_u + (dst_stride_uv * b) + b, dst_stride_uv, - dst_v + (dst_stride_uv * b) + b, dst_stride_uv, - dst_width, dst_height, - static_cast(f)); - - int value = dst_y[(dst_stride_y * b) + b]; - - // catch the case that the output buffer is all 0 - if (value == 0) - ++err; - - for (i = b; i < (dst_height + b); ++i) { - for (j = b; j < (dst_width + b); ++j) { - if (value != dst_y[(i * dst_stride_y) + j]) - ++err; - } - } - - value = dst_u[(dst_stride_uv * b) + b]; - - if (value == 0) - ++err; - - for (i = b; i < (dst_height_uv + b); ++i) { - for (j = b; j < (dst_width_uv + b); ++j) { - if (value != dst_u[(i * dst_stride_uv) + j]) - ++err; - if (value != dst_v[(i * dst_stride_uv) + j]) - ++err; - } - } - } - - free_aligned_buffer_16(src_y) - free_aligned_buffer_16(src_u) - free_aligned_buffer_16(src_v) - free_aligned_buffer_16(dst_y) - free_aligned_buffer_16(dst_u) - free_aligned_buffer_16(dst_v) - - EXPECT_EQ(0, err); +#include +static double get_time() +{ + LARGE_INTEGER t, f; + QueryPerformanceCounter(&t); + QueryPerformanceFrequency(&f); + return double(t.QuadPart)/double(f.QuadPart); } -TEST_F(libyuvTest, ScaleDownBy34) { +#else + +#include +#include + +static double get_time() +{ + struct timeval t; + struct timezone tzp; + gettimeofday(&t, &tzp); + return t.tv_sec + t.tv_usec*1e-6; +} + +#endif + +static int TestFilter(int src_width, int src_height, + int dst_width, int dst_height, + FilterMode f) { + int b = 128; - int src_width = 1280; - int src_height = 720; int src_width_uv = (src_width + 1) >> 1; int src_height_uv = (src_height + 1) >> 1; @@ -176,9 +71,6 @@ TEST_F(libyuvTest, ScaleDownBy34) { align_buffer_16(src_u, src_uv_plane_size) align_buffer_16(src_v, src_uv_plane_size) - int dst_width = (src_width*3) >> 2; - int dst_height = (src_height*3) >> 2; - int dst_width_uv = (dst_width + 1) >> 1; int dst_height_uv = (dst_height + 1) >> 1; @@ -205,20 +97,18 @@ TEST_F(libyuvTest, ScaleDownBy34) { } } - int f; - int err = 0; + const int runs = 128; + align_buffer_16(dst_y_c, dst_y_plane_size) + align_buffer_16(dst_u_c, dst_uv_plane_size) + align_buffer_16(dst_v_c, dst_uv_plane_size) + align_buffer_16(dst_y_opt, dst_y_plane_size) + align_buffer_16(dst_u_opt, dst_uv_plane_size) + align_buffer_16(dst_v_opt, dst_uv_plane_size) - // currently three filter modes, defined as FilterMode in scale.h - for (f = 0; f < 3; ++f) { - int max_diff = 0; - align_buffer_16(dst_y_c, dst_y_plane_size) - align_buffer_16(dst_u_c, dst_uv_plane_size) - align_buffer_16(dst_v_c, dst_uv_plane_size) - align_buffer_16(dst_y_opt, dst_y_plane_size) - align_buffer_16(dst_u_opt, dst_uv_plane_size) - align_buffer_16(dst_v_opt, dst_uv_plane_size) + libyuv::MaskCpuFlags(0); + double c_time = get_time(); - libyuv::MaskCpuFlagsForTest(0); + for (i = 0; i < runs; ++i) I420Scale(src_y + (src_stride_y * b) + b, src_stride_y, src_u + (src_stride_uv * b) + b, src_stride_uv, src_v + (src_stride_uv * b) + b, src_stride_uv, @@ -226,10 +116,14 @@ TEST_F(libyuvTest, ScaleDownBy34) { dst_y_c + (dst_stride_y * b) + b, dst_stride_y, dst_u_c + (dst_stride_uv * b) + b, dst_stride_uv, dst_v_c + (dst_stride_uv * b) + b, dst_stride_uv, - dst_width, dst_height, - static_cast(f)); + dst_width, dst_height, f); - libyuv::MaskCpuFlagsForTest(-1); + c_time = (get_time() - c_time) / runs; + + libyuv::MaskCpuFlags(-1); + double opt_time = get_time(); + + for (i = 0; i < runs; ++i) I420Scale(src_y + (src_stride_y * b) + b, src_stride_y, src_u + (src_stride_uv * b) + b, src_stride_uv, src_v + (src_stride_uv * b) + b, src_stride_uv, @@ -237,50 +131,121 @@ TEST_F(libyuvTest, ScaleDownBy34) { dst_y_opt + (dst_stride_y * b) + b, dst_stride_y, dst_u_opt + (dst_stride_uv * b) + b, dst_stride_uv, dst_v_opt + (dst_stride_uv * b) + b, dst_stride_uv, - dst_width, dst_height, - static_cast(f)); + dst_width, dst_height, f); - // C version may be a little off from the optimized. Order of - // operations may introduce rounding somewhere. So do a difference - // of the buffers and look to see that the max difference isn't - // over 2. - for (i = b; i < (dst_height + b); ++i) { - for (j = b; j < (dst_width + b); ++j) { - int abs_diff = abs(dst_y_c[(i * dst_stride_y) + j] - - dst_y_opt[(i * dst_stride_y) + j]); - if (abs_diff > max_diff) - max_diff = abs_diff; - } + opt_time = (get_time() - opt_time) / runs; + + printf ("filter %d - %8d us c - %8d us opt\n", + f, (int)(c_time*1e6), (int)(opt_time*1e6)); + ::testing::Test::RecordProperty("C", (int)c_time); + ::testing::Test::RecordProperty("Opt", (int)opt_time); + + // C version may be a little off from the optimized. Order of + // operations may introduce rounding somewhere. So do a difference + // of the buffers and look to see that the max difference isn't + // over 2. + int err = 0; + int max_diff = 0; + for (i = b; i < (dst_height + b); ++i) { + for (j = b; j < (dst_width + b); ++j) { + int abs_diff = abs(dst_y_c[(i * dst_stride_y) + j] - + dst_y_opt[(i * dst_stride_y) + j]); + if (abs_diff > max_diff) + max_diff = abs_diff; } - - for (i = b; i < (dst_height_uv + b); ++i) { - for (j = b; j < (dst_width_uv + b); ++j) { - int abs_diff = abs(dst_u_c[(i * dst_stride_uv) + j] - - dst_u_opt[(i * dst_stride_uv) + j]); - if (abs_diff > max_diff) - max_diff = abs_diff; - abs_diff = abs(dst_v_c[(i * dst_stride_uv) + j] - - dst_v_opt[(i * dst_stride_uv) + j]); - if (abs_diff > max_diff) - max_diff = abs_diff; - - } - } - - if (max_diff > 2) - err++; - - free_aligned_buffer_16(dst_y_c) - free_aligned_buffer_16(dst_u_c) - free_aligned_buffer_16(dst_v_c) - free_aligned_buffer_16(dst_y_opt) - free_aligned_buffer_16(dst_u_opt) - free_aligned_buffer_16(dst_v_opt) } + for (i = b; i < (dst_height_uv + b); ++i) { + for (j = b; j < (dst_width_uv + b); ++j) { + int abs_diff = abs(dst_u_c[(i * dst_stride_uv) + j] - + dst_u_opt[(i * dst_stride_uv) + j]); + if (abs_diff > max_diff) + max_diff = abs_diff; + abs_diff = abs(dst_v_c[(i * dst_stride_uv) + j] - + dst_v_opt[(i * dst_stride_uv) + j]); + if (abs_diff > max_diff) + max_diff = abs_diff; + + } + } + + if (max_diff > 2) + err++; + + free_aligned_buffer_16(dst_y_c) + free_aligned_buffer_16(dst_u_c) + free_aligned_buffer_16(dst_v_c) + free_aligned_buffer_16(dst_y_opt) + free_aligned_buffer_16(dst_u_opt) + free_aligned_buffer_16(dst_v_opt) + free_aligned_buffer_16(src_y) free_aligned_buffer_16(src_u) free_aligned_buffer_16(src_v) + return err; +} + +TEST_F(libyuvTest, ScaleDownBy2) { + + const int src_width = 1280; + const int src_height = 720; + const int dst_width = src_width >> 1; + const int dst_height = src_height >> 1; + int err = 0; + + for (int f = 0; f < 3; ++f) + err += TestFilter (src_width, src_height, + dst_width, dst_height, + static_cast(f)); + + EXPECT_EQ(0, err); +} + +TEST_F(libyuvTest, ScaleDownBy4) { + + const int src_width = 1280; + const int src_height = 720; + const int dst_width = src_width >> 2; + const int dst_height = src_height >> 2; + int err = 0; + + for (int f = 0; f < 3; ++f) + err += TestFilter (src_width, src_height, + dst_width, dst_height, + static_cast(f)); + + EXPECT_EQ(0, err); +} + +TEST_F(libyuvTest, ScaleDownBy34) { + + const int src_width = 1280; + const int src_height = 720; + const int dst_width = (src_width*3) >> 2; + const int dst_height = (src_height*3) >> 2; + int err = 0; + + for (int f = 0; f < 3; ++f) + err += TestFilter (src_width, src_height, + dst_width, dst_height, + static_cast(f)); + + EXPECT_EQ(0, err); +} + +TEST_F(libyuvTest, ScaleDownBy38) { + int src_width = 1280; + int src_height = 720; + int dst_width = (src_width*3) >> 3; + int dst_height = (src_height*3) >> 3; + + int err = 0; + + for (int f = 0; f < 3; ++f) + err += TestFilter (src_width, src_height, + dst_width, dst_height, + static_cast(f)); + EXPECT_EQ(0, err); }