From 788f757016c118f5d095b4cd4f0b157af0931169 Mon Sep 17 00:00:00 2001 From: "fbarchard@google.com" Date: Mon, 11 Nov 2013 18:53:19 +0000 Subject: [PATCH] Linear interpolation. BUG=none TEST=*Linear* R=tpsiaki@google.com Review URL: https://webrtc-codereview.appspot.com/3689004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@848 16f28f9a-4ce2-e073-06de-1de4eb20be90 --- README.chromium | 2 +- include/libyuv/scale.h | 3 +- include/libyuv/version.h | 2 +- source/scale.cc | 242 +++++++++++++++++++++++++++++------ source/scale_argb.cc | 127 +++++++++++++++--- unit_test/scale_argb_test.cc | 25 ++-- unit_test/scale_test.cc | 29 +++-- 7 files changed, 345 insertions(+), 85 deletions(-) diff --git a/README.chromium b/README.chromium index 2ce0a625a..186cdcb3e 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 847 +Version: 848 License: BSD License File: LICENSE diff --git a/include/libyuv/scale.h b/include/libyuv/scale.h index b1efc95d2..03a4f50ce 100644 --- a/include/libyuv/scale.h +++ b/include/libyuv/scale.h @@ -22,7 +22,8 @@ extern "C" { enum FilterMode { kFilterNone = 0, // Point sample; Fastest. kFilterBilinear = 1, // Faster than box, but lower quality scaling down. - kFilterBox = 2 // Highest quality. + kFilterBox = 2, // Highest quality. + kFilterLinear = 3 // Faster than bilinear, slower than None. }; // Scale a YUV plane. diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 19fe8dbd3..674592788 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 847 +#define LIBYUV_VERSION 848 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/scale.cc b/source/scale.cc index 3271013d5..6c708c795 100644 --- a/source/scale.cc +++ b/source/scale.cc @@ -37,19 +37,7 @@ static __inline int Half(int v) { // Note: Some SSE2 reference manuals // cpuvol1.pdf agner_instruction_tables.pdf 253666.pdf 253667.pdf -// Set the following flag to true to revert to only -// using the reference implementation ScalePlaneBox(), and -// NOT the optimized versions. Useful for debugging and -// when comparing the quality of the resulting YUV planes -// as produced by the optimized and non-optimized versions. -static bool use_reference_impl_ = false; - -LIBYUV_API -void SetUseReferenceImpl(bool use) { - use_reference_impl_ = use; -} - -// ScaleRowDown2Int also used by planar functions +// ScaleRowDown2Box also used by planar functions // NEON downscalers with interpolation. #if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \ @@ -208,6 +196,44 @@ static void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, } } +// Blends 32x1 rectangle to 16x1. +// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned. +__declspec(naked) __declspec(align(16)) +void ScaleRowDown2Linear_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, + uint8* dst_ptr, int dst_width) { + __asm { + mov eax, [esp + 4] // src_ptr + // src_stride + mov edx, [esp + 12] // dst_ptr + mov ecx, [esp + 16] // dst_width + pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff + psrlw xmm5, 8 + + align 16 + wloop: + movdqa xmm0, [eax] + movdqa xmm1, [eax + 16] + lea eax, [eax + 32] + + movdqa xmm2, xmm0 // average columns (32 to 16 pixels) + psrlw xmm0, 8 + movdqa xmm3, xmm1 + psrlw xmm1, 8 + pand xmm2, xmm5 + pand xmm3, xmm5 + pavgw xmm0, xmm2 + pavgw xmm1, xmm3 + packuswb xmm0, xmm1 + + sub ecx, 16 + movdqa [edx], xmm0 + lea edx, [edx + 16] + jg wloop + + ret + } +} + // Blends 32x2 rectangle to 16x1. // Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned. __declspec(naked) __declspec(align(16)) @@ -281,6 +307,44 @@ static void ScaleRowDown2_Unaligned_SSE2(const uint8* src_ptr, } } +// Blends 32x1 rectangle to 16x1. +// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned. +__declspec(naked) __declspec(align(16)) +void ScaleRowDown2Linear_Unaligned_SSE2(const uint8* src_ptr, ptrdiff_t, + uint8* dst_ptr, int dst_width) { + __asm { + mov eax, [esp + 4] // src_ptr + // src_stride + mov edx, [esp + 12] // dst_ptr + mov ecx, [esp + 16] // dst_width + pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff + psrlw xmm5, 8 + + align 16 + wloop: + movdqu xmm0, [eax] + movdqu xmm1, [eax + 16] + lea eax, [eax + 32] + + movdqa xmm2, xmm0 // average columns (32 to 16 pixels) + psrlw xmm0, 8 + movdqa xmm3, xmm1 + psrlw xmm1, 8 + pand xmm2, xmm5 + pand xmm3, xmm5 + pavgw xmm0, xmm2 + pavgw xmm1, xmm3 + packuswb xmm0, xmm1 + + sub ecx, 16 + movdqu [edx], xmm0 + lea edx, [edx + 16] + jg wloop + + ret + } +} + // Blends 32x2 rectangle to 16x1. // Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned. __declspec(naked) __declspec(align(16)) @@ -838,6 +902,40 @@ static void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, ); } +void ScaleRowDown2Linear_SSE2(const uint8* src_ptr, ptrdiff_t, + uint8* dst_ptr, int dst_width) { + asm volatile ( + "pcmpeqb %%xmm5,%%xmm5 \n" + "psrlw $0x8,%%xmm5 \n" + ".p2align 4 \n" + "1: \n" + "movdqa (%0),%%xmm0 \n" + "movdqa 0x10(%0),%%xmm1 \n" + "lea 0x20(%0),%0 \n" + "movdqa %%xmm0,%%xmm2 \n" + "psrlw $0x8,%%xmm0 \n" + "movdqa %%xmm1,%%xmm3 \n" + "psrlw $0x8,%%xmm1 \n" + "pand %%xmm5,%%xmm2 \n" + "pand %%xmm5,%%xmm3 \n" + "pavgw %%xmm2,%%xmm0 \n" + "pavgw %%xmm3,%%xmm1 \n" + "packuswb %%xmm1,%%xmm0 \n" + "movdqa %%xmm0,(%1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(dst_width) // %2 + : + : "memory", "cc" +#if defined(__SSE2__) + , "xmm0", "xmm1", "xmm5" +#endif + ); +} + void ScaleRowDown2Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst_ptr, int dst_width) { asm volatile ( @@ -903,6 +1001,40 @@ static void ScaleRowDown2_Unaligned_SSE2(const uint8* src_ptr, ); } +static void ScaleRowDown2Linear_Unaligned_SSE2(const uint8* src_ptr, ptrdiff_t, + uint8* dst_ptr, int dst_width) { + asm volatile ( + "pcmpeqb %%xmm5,%%xmm5 \n" + "psrlw $0x8,%%xmm5 \n" + ".p2align 4 \n" + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "lea 0x20(%0),%0 \n" + "movdqa %%xmm0,%%xmm2 \n" + "psrlw $0x8,%%xmm0 \n" + "movdqa %%xmm1,%%xmm3 \n" + "psrlw $0x8,%%xmm1 \n" + "pand %%xmm5,%%xmm2 \n" + "pand %%xmm5,%%xmm3 \n" + "pavgw %%xmm2,%%xmm0 \n" + "pavgw %%xmm3,%%xmm1 \n" + "packuswb %%xmm1,%%xmm0 \n" + "movdqu %%xmm0,(%1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(dst_width) // %2 + : + : "memory", "cc" +#if defined(__SSE2__) + , "xmm0", "xmm1", "xmm5" +#endif + ); +} + static void ScaleRowDown2Box_Unaligned_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst_ptr, int dst_width) { @@ -1447,6 +1579,21 @@ static void ScaleRowDown2_C(const uint8* src_ptr, ptrdiff_t /* src_stride */, } } +void ScaleRowDown2Linear_C(const uint8* src_ptr, ptrdiff_t src_stride, + uint8* dst, int dst_width) { + const uint8* s = src_ptr; + uint8* dend = dst + dst_width - 1; + do { + dst[0] = (s[0] + s[1] + 1) >> 1; + dst[1] = (s[2] + s[3] + 1) >> 1; + dst += 2; + s += 4; + } while (dst < dend); + if (dst_width & 1) { + dst[0] = (s[0] + s[1] + 1) >> 1; + } +} + void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst, int dst_width) { const uint8* s = src_ptr; @@ -1685,7 +1832,9 @@ static void ScalePlaneDown2(int /* src_width */, int /* src_height */, FilterMode filtering) { void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst_ptr, int dst_width) = - filtering ? ScaleRowDown2Box_C : ScaleRowDown2_C; + filtering == kFilterNone ? ScaleRowDown2_C : + (filtering == kFilterLinear ? ScaleRowDown2Linear_C : + ScaleRowDown2Box_C); int row_stride = src_stride << 1; if (!filtering) { src_ptr += src_stride; // Point to odd rows. @@ -1698,12 +1847,15 @@ static void ScalePlaneDown2(int /* src_width */, int /* src_height */, } #elif defined(HAS_SCALEROWDOWN2_SSE2) if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) { - ScaleRowDown2 = filtering ? ScaleRowDown2Box_Unaligned_SSE2 : - ScaleRowDown2_Unaligned_SSE2; + ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Unaligned_SSE2 : + (filtering == kFilterLinear ? ScaleRowDown2Linear_Unaligned_SSE2 : + ScaleRowDown2Box_Unaligned_SSE2); if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) && IS_ALIGNED(row_stride, 16) && IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) { - ScaleRowDown2 = filtering ? ScaleRowDown2Box_SSE2 : ScaleRowDown2_SSE2; + ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_SSE2 : + (filtering == kFilterLinear ? ScaleRowDown2Linear_SSE2 : + ScaleRowDown2Box_SSE2); } } #elif defined(HAS_SCALEROWDOWN2_MIPS_DSPR2) @@ -1715,6 +1867,9 @@ static void ScalePlaneDown2(int /* src_width */, int /* src_height */, } #endif + if (filtering == kFilterLinear) { + src_stride = 0; + } // TODO(fbarchard): Loop through source height to allow odd height. for (int y = 0; y < dst_height; ++y) { ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width); @@ -1759,6 +1914,9 @@ static void ScalePlaneDown4(int /* src_width */, int /* src_height */, } #endif + if (filtering == kFilterLinear) { + src_stride = 0; + } for (int y = 0; y < dst_height; ++y) { ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width); src_ptr += row_stride; @@ -1822,14 +1980,15 @@ static void ScalePlaneDown34(int /* src_width */, int /* src_height */, } #endif + const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride; for (int y = 0; y < dst_height - 2; y += 3) { - ScaleRowDown34_0(src_ptr, src_stride, dst_ptr, dst_width); + ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width); src_ptr += src_stride; dst_ptr += dst_stride; - ScaleRowDown34_1(src_ptr, src_stride, dst_ptr, dst_width); + ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width); src_ptr += src_stride; dst_ptr += dst_stride; - ScaleRowDown34_0(src_ptr + src_stride, -src_stride, + ScaleRowDown34_0(src_ptr + src_stride, -filter_stride, dst_ptr, dst_width); src_ptr += src_stride * 2; dst_ptr += dst_stride; @@ -1837,7 +1996,7 @@ static void ScalePlaneDown34(int /* src_width */, int /* src_height */, // Remainder 1 or 2 rows with last row vertically unfiltered if ((dst_height % 3) == 2) { - ScaleRowDown34_0(src_ptr, src_stride, dst_ptr, dst_width); + ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width); src_ptr += src_stride; dst_ptr += dst_stride; ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width); @@ -1914,21 +2073,22 @@ static void ScalePlaneDown38(int /* src_width */, int /* src_height */, } #endif + const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride; for (int y = 0; y < dst_height - 2; y += 3) { - ScaleRowDown38_3(src_ptr, src_stride, dst_ptr, dst_width); + ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width); src_ptr += src_stride * 3; dst_ptr += dst_stride; - ScaleRowDown38_3(src_ptr, src_stride, dst_ptr, dst_width); + ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width); src_ptr += src_stride * 3; dst_ptr += dst_stride; - ScaleRowDown38_2(src_ptr, src_stride, dst_ptr, dst_width); + ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width); src_ptr += src_stride * 2; dst_ptr += dst_stride; } // Remainder 1 or 2 rows with last row vertically unfiltered if ((dst_height % 3) == 2) { - ScaleRowDown38_3(src_ptr, src_stride, dst_ptr, dst_width); + ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width); src_ptr += src_stride * 3; dst_ptr += dst_stride; ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width); @@ -2080,7 +2240,8 @@ SAFEBUFFERS void ScalePlaneBilinear(int src_width, int src_height, int dst_width, int dst_height, int src_stride, int dst_stride, - const uint8* src_ptr, uint8* dst_ptr) { + const uint8* src_ptr, uint8* dst_ptr, + FilterMode filtering) { assert(dst_width > 0); assert(dst_height > 0); assert(Abs(src_width) <= kMaxStride); @@ -2164,10 +2325,14 @@ void ScalePlaneBilinear(int src_width, int src_height, y = max_y; } int yi = y >> 16; - int yf = (y >> 8) & 255; const uint8* src = src_ptr + yi * src_stride; - InterpolateRow(row, src, src_stride, src_width, yf); - ScaleFilterCols_C(dst_ptr, row, dst_width, x, dx); + if (filtering == kFilterLinear) { + ScaleFilterCols_C(dst_ptr, src, dst_width, x, dx); + } else { + int yf = (y >> 8) & 255; + InterpolateRow(row, src, src_stride, src_width, yf); + ScaleFilterCols_C(dst_ptr, row, dst_width, x, dx); + } dst_ptr += dst_stride; y += dy; } @@ -2219,15 +2384,11 @@ static void ScalePlaneAnySize(int src_width, int src_height, src_stride, dst_stride, src_ptr, dst_ptr); } else { ScalePlaneBilinear(src_width, src_height, dst_width, dst_height, - src_stride, dst_stride, src_ptr, dst_ptr); + src_stride, dst_stride, src_ptr, dst_ptr, filtering); } } // Scale plane down, any size -// -// This is an optimized version for scaling down a plane to any size. -// The current implementation is ~10 times faster compared to the -// reference implementation for e.g. XGA->LowResPAL static void ScalePlaneDown(int src_width, int src_height, int dst_width, int dst_height, @@ -2237,10 +2398,11 @@ static void ScalePlaneDown(int src_width, int src_height, if (!filtering || src_width > kMaxStride) { ScalePlaneSimple(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src_ptr, dst_ptr); - } else if (filtering == kFilterBilinear || dst_height * 2 > src_height) { + } else if (filtering == kFilterBilinear || filtering == kFilterLinear || + dst_height * 2 > src_height) { // between 1/2x and 1x use bilinear ScalePlaneBilinear(src_width, src_height, dst_width, dst_height, - src_stride, dst_stride, src_ptr, dst_ptr); + src_stride, dst_stride, src_ptr, dst_ptr, filtering); } else { ScalePlaneBox(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src_ptr, dst_ptr); @@ -2271,12 +2433,8 @@ void ScalePlane(const uint8* src, int src_stride, 0, 0, dy, 1, filtering); } else if (dst_width <= Abs(src_width) && dst_height <= src_height) { // Scale down. - if (use_reference_impl_) { - // For testing, allow the optimized versions to be disabled. - ScalePlaneDown(src_width, src_height, dst_width, dst_height, - src_stride, dst_stride, src, dst, filtering); - } else if (4 * dst_width == 3 * src_width && - 4 * dst_height == 3 * src_height) { + if (4 * dst_width == 3 * src_width && + 4 * dst_height == 3 * src_height) { // optimized, 3/4 ScalePlaneDown34(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst, filtering); diff --git a/source/scale_argb.cc b/source/scale_argb.cc index 21ed8bcb9..f00dde26e 100644 --- a/source/scale_argb.cc +++ b/source/scale_argb.cc @@ -74,6 +74,36 @@ static void ScaleARGBRowDown2_SSE2(const uint8* src_argb, } } +// Blends 8x1 rectangle to 4x1. +// Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned. +__declspec(naked) __declspec(align(16)) +static void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb, + ptrdiff_t /* src_stride */, + uint8* dst_argb, int dst_width) { + __asm { + mov eax, [esp + 4] // src_argb + // src_stride ignored + mov edx, [esp + 12] // dst_argb + mov ecx, [esp + 16] // dst_width + + align 16 + wloop: + movdqa xmm0, [eax] + movdqa xmm1, [eax + 16] + lea eax, [eax + 32] + movdqa xmm2, xmm0 + shufps xmm0, xmm1, 0x88 // even pixels + shufps xmm2, xmm1, 0xdd // odd pixels + pavgb xmm0, xmm2 + sub ecx, 4 + movdqa [edx], xmm0 + lea edx, [edx + 16] + jg wloop + + ret + } +} + // Blends 8x2 rectangle to 4x1. // Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned. __declspec(naked) __declspec(align(16)) @@ -466,6 +496,35 @@ static void ScaleARGBRowDown2_SSE2(const uint8* src_argb, ); } +static void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb, + ptrdiff_t /* src_stride */, + uint8* dst_argb, int dst_width) { + asm volatile ( + ".p2align 4 \n" + BUNDLEALIGN + "1: \n" + "movdqa " MEMACCESS(0) ",%%xmm0 \n" + "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n" + "lea " MEMLEA(0x20,0) ",%0 \n" + "movdqa %%xmm0,%%xmm2 \n" + "shufps $0x88,%%xmm1,%%xmm0 \n" + "shufps $0xdd,%%xmm1,%%xmm2 \n" + "pavgb %%xmm2,%%xmm0 \n" + "sub $0x4,%2 \n" + "movdqa %%xmm0," MEMACCESS(1) " \n" + "lea " MEMLEA(0x10,1) ",%1 \n" + "jg 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_argb), // %1 + "+r"(dst_width) // %2 + : + : "memory", "cc" +#if defined(__SSE2__) + , "xmm0", "xmm1" +#endif + ); +} + static void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb, ptrdiff_t src_stride, uint8* dst_argb, int dst_width) { @@ -822,6 +881,19 @@ static void ScaleARGBRowDown2_C(const uint8* src_argb, } } +static void ScaleARGBRowDown2Linear_C(const uint8* src_argb, + ptrdiff_t /* src_stride */, + uint8* dst_argb, int dst_width) { + for (int x = 0; x < dst_width; ++x) { + dst_argb[0] = (src_argb[0] + src_argb[4] + 1) >> 1; + dst_argb[1] = (src_argb[1] + src_argb[5] + 1) >> 1; + dst_argb[2] = (src_argb[2] + src_argb[6] + 1) >> 1; + dst_argb[3] = (src_argb[3] + src_argb[7] + 1) >> 1; + src_argb += 8; + dst_argb += 4; + } +} + static void ScaleARGBRowDown2Box_C(const uint8* src_argb, ptrdiff_t src_stride, uint8* dst_argb, int dst_width) { for (int x = 0; x < dst_width; ++x) { @@ -930,13 +1002,16 @@ static void ScaleARGBDown2(int /* src_width */, int /* src_height */, int row_stride = src_stride * (dy >> 16); void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride, uint8* dst_argb, int dst_width) = - filtering ? ScaleARGBRowDown2Box_C : ScaleARGBRowDown2_C; + filtering == kFilterNone ? ScaleARGBRowDown2_C : + (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_C : + ScaleARGBRowDown2Box_C); #if defined(HAS_SCALEARGBROWDOWN2_SSE2) if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) && IS_ALIGNED(src_argb, 16) && IS_ALIGNED(row_stride, 16) && IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) { - ScaleARGBRowDown2 = filtering ? ScaleARGBRowDown2Box_SSE2 : - ScaleARGBRowDown2_SSE2; + ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_SSE2 : + (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_SSE2 : + ScaleARGBRowDown2Box_SSE2); } #elif defined(HAS_SCALEARGBROWDOWN2_NEON) if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8) && @@ -946,7 +1021,9 @@ static void ScaleARGBDown2(int /* src_width */, int /* src_height */, } #endif - // TODO(fbarchard): Loop through source height to allow odd height. + if (filtering == kFilterLinear) { + src_stride = 0; + } for (int y = 0; y < dst_height; ++y) { ScaleARGBRowDown2(src_argb, src_stride, dst_argb, dst_width); src_argb += row_stride; @@ -985,6 +1062,9 @@ static void ScaleARGBDownEven(int src_width, int src_height, } #endif + if (filtering == kFilterLinear) { + src_stride = 0; + } for (int y = 0; y < dst_height; ++y) { ScaleARGBRowDownEven(src_argb, src_stride, col_step, dst_argb, dst_width); src_argb += row_stride; @@ -998,7 +1078,8 @@ static void ScaleARGBBilinearDown(int src_height, int dst_width, int dst_height, int src_stride, int dst_stride, const uint8* src_argb, uint8* dst_argb, - int x, int dx, int y, int dy) { + int x, int dx, int y, int dy, + FilterMode filtering) { assert(src_height > 0); assert(dst_width > 0); assert(dst_height > 0); @@ -1076,10 +1157,14 @@ static void ScaleARGBBilinearDown(int src_height, y = max_y; } int yi = y >> 16; - int yf = (y >> 8) & 255; const uint8* src = src_argb + yi * src_stride; - InterpolateRow(row, src, src_stride, clip_src_width, yf); - ScaleARGBFilterCols(dst_argb, row, dst_width, x, dx); + if (filtering == kFilterLinear) { + ScaleARGBFilterCols(dst_argb, src, dst_width, x, dx); + } else { + int yf = (y >> 8) & 255; + InterpolateRow(row, src, src_stride, clip_src_width, yf); + ScaleARGBFilterCols(dst_argb, row, dst_width, x, dx); + } dst_argb += dst_stride; y += dy; } @@ -1091,7 +1176,8 @@ static void ScaleARGBBilinearUp(int src_width, int src_height, int dst_width, int dst_height, int src_stride, int dst_stride, const uint8* src_argb, uint8* dst_argb, - int x, int dx, int y, int dy) { + int x, int dx, int y, int dy, + FilterMode filtering) { assert(src_width > 0); assert(src_height > 0); assert(dst_width > 0); @@ -1180,8 +1266,12 @@ static void ScaleARGBBilinearUp(int src_width, int src_height, src += src_stride; } } - int yf = (y >> 8) & 255; - InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf); + if (filtering == kFilterLinear) { + InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0); + } else { + int yf = (y >> 8) & 255; + InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf); + } dst_argb += dst_stride; y += dy; } @@ -1200,7 +1290,8 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height, const uint8* src_u, const uint8* src_v, uint8* dst_argb, - int x, int dx, int y, int dy) { + int x, int dx, int y, int dy, + FilterMode filtering) { assert(src_width > 0); assert(src_height > 0); assert(dst_width > 0); @@ -1353,8 +1444,12 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height, } } } - int yf = (y >> 8) & 255; - InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf); + if (filtering == kFilterLinear) { + InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0); + } else { + int yf = (y >> 8) & 255; + InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf); + } dst_argb += dst_stride_argb; y += dy; } @@ -1424,14 +1519,14 @@ static void ScaleARGBAnySize(int src_width, int src_height, ScaleARGBBilinearUp(src_width, src_height, clip_width, clip_height, src_stride, dst_stride, src_argb, dst_argb, - x, dx, y, dy); + x, dx, y, dy, filtering); return; } if (filtering && src_width * 4 < kMaxStride) { ScaleARGBBilinearDown(src_height, clip_width, clip_height, src_stride, dst_stride, src_argb, dst_argb, - x, dx, y, dy); + x, dx, y, dy, filtering); return; } ScaleARGBSimple(src_width, src_height, clip_width, clip_height, diff --git a/unit_test/scale_argb_test.cc b/unit_test/scale_argb_test.cc index a75b8f31b..f04088b17 100644 --- a/unit_test/scale_argb_test.cc +++ b/unit_test/scale_argb_test.cc @@ -213,18 +213,20 @@ static int ARGBClipTestFilter(int src_width, int src_height, // Test a scale factor with 2 filters. Expect unfiltered to be exact, but // filtering is different fixed point implementations for SSSE3, Neon and C. #define TEST_FACTOR(name, hfactor, vfactor) \ + TEST_FACTOR1(name, None, hfactor, vfactor, 2) \ + TEST_FACTOR1(name, Linear, hfactor, vfactor, 2) \ TEST_FACTOR1(name, Bilinear, hfactor, vfactor, 2) // TODO(fbarchard): ScaleDownBy1 should be lossless, but Box has error of 2. -// TEST_FACTOR(1, 1 / 1, 1 / 1) +TEST_FACTOR(1, 1 / 1, 1 / 1) TEST_FACTOR(2, 1 / 2, 1 / 2) TEST_FACTOR(4, 1 / 4, 1 / 4) -// TEST_FACTOR(8, 1 / 8, 1 / 8) -// TEST_FACTOR(16, 1 / 16, 1 / 16) -// TEST_FACTOR(2by3, 2 / 3, 2 / 3) +TEST_FACTOR(8, 1 / 8, 1 / 8) +TEST_FACTOR(16, 1 / 16, 1 / 16) +TEST_FACTOR(2by3, 2 / 3, 2 / 3) TEST_FACTOR(3by4, 3 / 4, 3 / 4) -// TEST_FACTOR(3by8, 3 / 8, 3 / 8) -// TEST_FACTOR(Vertical2by3, 1, 2 / 3) +TEST_FACTOR(3by8, 3 / 8, 3 / 8) +TEST_FACTOR(Vertical2by3, 1, 2 / 3) #undef TEST_FACTOR1 #undef TEST_FACTOR @@ -257,14 +259,15 @@ TEST_FACTOR(3by4, 3 / 4, 3 / 4) // Test scale to a specified size with all 3 filters. #define TEST_SCALETO(name, width, height) \ TEST_SCALETO1(name, width, height, None, 0) \ + TEST_SCALETO1(name, width, height, Linear, 2) \ TEST_SCALETO1(name, width, height, Bilinear, 2) TEST_SCALETO(ARGBScale, 640, 360) -TEST_SCALETO(DISABLED_ARGBScale, 853, 480) -TEST_SCALETO(DISABLED_ARGBScale, 1280, 720) -TEST_SCALETO(DISABLED_ARGBScale, 1280, 800) -TEST_SCALETO(DISABLED_ARGBScale, 1366, 768) -TEST_SCALETO(DISABLED_ARGBScale, 1920, 1080) +TEST_SCALETO(ARGBScale, 853, 480) +TEST_SCALETO(ARGBScale, 1280, 720) +TEST_SCALETO(ARGBScale, 1280, 800) +TEST_SCALETO(ARGBScale, 1366, 768) +TEST_SCALETO(ARGBScale, 1920, 1080) #undef TEST_SCALETO1 #undef TEST_SCALETO diff --git a/unit_test/scale_test.cc b/unit_test/scale_test.cc index d21615dda..2fa904bf8 100644 --- a/unit_test/scale_test.cc +++ b/unit_test/scale_test.cc @@ -141,23 +141,24 @@ static int TestFilter(int src_width, int src_height, EXPECT_LE(diff, max_diff); \ } -// Test a scale factor with all 3 filters. Expect unfiltered to be exact, but +// Test a scale factor with all 4 filters. Expect unfiltered to be exact, but // filtering is different fixed point implementations for SSSE3, Neon and C. #define TEST_FACTOR(name, hfactor, vfactor) \ TEST_FACTOR1(name, None, hfactor, vfactor, 0) \ + TEST_FACTOR1(name, Linear, hfactor, vfactor, 2) \ TEST_FACTOR1(name, Bilinear, hfactor, vfactor, 2) \ TEST_FACTOR1(name, Box, hfactor, vfactor, 2) \ // TODO(fbarchard): ScaleDownBy1 should be lossless, but Box has error of 2. -// TEST_FACTOR(1, 1 / 1, 1 / 1) +TEST_FACTOR(1, 1 / 1, 1 / 1) TEST_FACTOR(2, 1 / 2, 1 / 2) TEST_FACTOR(4, 1 / 4, 1 / 4) -// TEST_FACTOR(8, 1 / 8, 1 / 8) -// TEST_FACTOR(16, 1 / 16, 1 / 16) -// TEST_FACTOR(2by3, 2 / 3, 2 / 3) +TEST_FACTOR(8, 1 / 8, 1 / 8) +TEST_FACTOR(16, 1 / 16, 1 / 16) +TEST_FACTOR(2by3, 2 / 3, 2 / 3) TEST_FACTOR(3by4, 3 / 4, 3 / 4) -// TEST_FACTOR(3by8, 3 / 8, 3 / 8) -// TEST_FACTOR(Vertical2by3, 1, 2 / 3) +TEST_FACTOR(3by8, 3 / 8, 3 / 8) +TEST_FACTOR(Vertical2by3, 1, 2 / 3) #undef TEST_FACTOR1 #undef TEST_FACTOR @@ -175,17 +176,19 @@ TEST_FACTOR(3by4, 3 / 4, 3 / 4) EXPECT_LE(diff, max_diff); \ } -// Test scale to a specified size with all 3 filters. +// Test scale to a specified size with all 4 filters. #define TEST_SCALETO(name, width, height) \ TEST_SCALETO1(name, width, height, None, 0) \ + TEST_SCALETO1(name, width, height, Linear, 0) \ + TEST_SCALETO1(name, width, height, Bilinear, 2) \ TEST_SCALETO1(name, width, height, Box, 2) TEST_SCALETO(Scale, 640, 360) -TEST_SCALETO(DISABLED_Scale, 853, 480) -TEST_SCALETO(DISABLED_Scale, 1280, 720) -TEST_SCALETO(DISABLED_Scale, 1280, 800) -TEST_SCALETO(DISABLED_Scale, 1366, 768) -TEST_SCALETO(DISABLED_Scale, 1920, 1080) +TEST_SCALETO(Scale, 853, 480) +TEST_SCALETO(Scale, 1280, 720) +TEST_SCALETO(Scale, 1280, 800) +TEST_SCALETO(Scale, 1366, 768) +TEST_SCALETO(Scale, 1920, 1080) #undef TEST_SCALETO1 #undef TEST_SCALETO