diff --git a/README.chromium b/README.chromium index 0fe897610..660a3b088 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 255 +Version: 256 License: BSD License File: LICENSE diff --git a/include/libyuv.h b/include/libyuv.h index 649167188..d44557378 100644 --- a/include/libyuv.h +++ b/include/libyuv.h @@ -20,6 +20,7 @@ #include "libyuv/planar_functions.h" #include "libyuv/rotate.h" #include "libyuv/scale.h" +#include "libyuv/scale_argb.h" #include "libyuv/version.h" #include "libyuv/video_common.h" diff --git a/include/libyuv/scale.h b/include/libyuv/scale.h index 7fb421c06..be7da448a 100644 --- a/include/libyuv/scale.h +++ b/include/libyuv/scale.h @@ -66,14 +66,6 @@ int ScaleOffset(const uint8* src, int src_width, int src_height, uint8* dst, int dst_width, int dst_height, int dst_yoffset, bool interpolate); -typedef void (*ARGBBlendRow)(const uint8* src_argb0, - const uint8* src_argb1, - uint8* dst_argb, int width); - -// Get function to Alpha Blend ARGB pixels and store to destination. -ARGBBlendRow GetARGBBlend(uint8* dst_argb, int dst_stride_argb, int width); - - // For testing, allow disabling of optimizations. void SetUseReferenceImpl(bool use); diff --git a/include/libyuv/scale_argb.h b/include/libyuv/scale_argb.h new file mode 100644 index 000000000..575119c9c --- /dev/null +++ b/include/libyuv/scale_argb.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2012 The LibYuv project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef INCLUDE_LIBYUV_SCALE_ARGB_H_ +#define INCLUDE_LIBYUV_SCALE_ARGB_H_ + +#include "libyuv/basic_types.h" + +#ifdef __cplusplus +namespace libyuv { +extern "C" { +#endif + +// Supported filtering +enum FilterMode { + kFilterNone = 0, // Point sample; Fastest + kFilterBilinear = 1, // Faster than box, but lower quality scaling down. + kFilterBox = 2 // Highest quality +}; + +int ARGBScale(const uint8* src_argb, int src_stride_argb, + int src_width, int src_height, + uint8* dst_argb, int dst_stride_argb, + int dst_width, int dst_height, + FilterMode filtering); + +#ifdef __cplusplus +} // extern "C" +} // namespace libyuv +#endif + +#endif // INCLUDE_LIBYUV_SCALE_ARGB_H_ diff --git a/include/libyuv/version.h b/include/libyuv/version.h index facc89116..95d57ddb6 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,7 +11,7 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 255 +#define LIBYUV_VERSION 256 #endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/libyuv.gyp b/libyuv.gyp index 45c3ad29e..6a89e5c98 100644 --- a/libyuv.gyp +++ b/libyuv.gyp @@ -56,6 +56,7 @@ 'source/row_posix.cc', 'source/row_win.cc', 'source/scale.cc', + 'source/scale_argb.cc', 'source/video_common.cc', ], }, diff --git a/libyuv_test.gyp b/libyuv_test.gyp index 52e11fdc1..64fa95d09 100644 --- a/libyuv_test.gyp +++ b/libyuv_test.gyp @@ -27,6 +27,7 @@ 'unit_test/planar_test.cc', 'unit_test/rotate_test.cc', 'unit_test/scale_test.cc', + 'unit_test/scale_argb_test.cc', 'unit_test/unit_test.cc', ], 'conditions': [ diff --git a/source/convertfrom.cc b/source/convertfrom.cc deleted file mode 100644 index 3b78a44f7..000000000 --- a/source/convertfrom.cc +++ /dev/null @@ -1,13 +0,0 @@ -/* - * Copyright (c) 2012 The LibYuv project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -// TODO(fbarchard): Remove once builds have switched to convert_from -#include "convert_from.cc" - diff --git a/source/scale.cc b/source/scale.cc index 044d2ebe8..46cda7d07 100644 --- a/source/scale.cc +++ b/source/scale.cc @@ -23,12 +23,6 @@ namespace libyuv { extern "C" { #endif -#if defined(_MSC_VER) -#define ALIGN16(var) __declspec(align(16)) var -#else -#define ALIGN16(var) var __attribute__((aligned(16))) -#endif - // Note: A Neon reference manual // http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0204j/CJAJIIGG.html // Note: Some SSE2 reference manuals @@ -571,12 +565,12 @@ static void ScaleFilterRows_NEON(uint8* dst_ptr, (defined(_M_IX86) || defined(__i386__) || defined(__x86_64__)) #if defined(_MSC_VER) -#define TALIGN16(t, var) __declspec(align(16)) t _ ## var +#define TSIMD_ALIGNED(t, var) __declspec(align(16)) t _ ## var #elif defined(__i386__) && \ (defined(__APPLE__) || defined(__MINGW32__) || defined(__CYGWIN__)) -#define TALIGN16(t, var) t var __attribute__((aligned(16))) +#define TSIMD_ALIGNED(t, var) t var __attribute__((aligned(16))) #else -#define TALIGN16(t, var) t _ ## var __attribute__((aligned(16))) +#define TSIMD_ALIGNED(t, var) t _ ## var __attribute__((aligned(16))) #endif #if defined(__APPLE__) && defined(__i386__) @@ -598,77 +592,77 @@ static void ScaleFilterRows_NEON(uint8* dst_ptr, #endif // Offsets for source bytes 0 to 9 -extern "C" TALIGN16(const uint8, shuf0[16]) = +extern "C" TSIMD_ALIGNED(const uint8, shuf0[16]) = { 0, 1, 3, 4, 5, 7, 8, 9, 128, 128, 128, 128, 128, 128, 128, 128 }; // Offsets for source bytes 11 to 20 with 8 subtracted = 3 to 12. -extern "C" TALIGN16(const uint8, shuf1[16]) = +extern "C" TSIMD_ALIGNED(const uint8, shuf1[16]) = { 3, 4, 5, 7, 8, 9, 11, 12, 128, 128, 128, 128, 128, 128, 128, 128 }; // Offsets for source bytes 21 to 31 with 16 subtracted = 5 to 31. -extern "C" TALIGN16(const uint8, shuf2[16]) = +extern "C" TSIMD_ALIGNED(const uint8, shuf2[16]) = { 5, 7, 8, 9, 11, 12, 13, 15, 128, 128, 128, 128, 128, 128, 128, 128 }; // Offsets for source bytes 0 to 10 -extern "C" TALIGN16(const uint8, shuf01[16]) = +extern "C" TSIMD_ALIGNED(const uint8, shuf01[16]) = { 0, 1, 1, 2, 2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10 }; // Offsets for source bytes 10 to 21 with 8 subtracted = 3 to 13. -extern "C" TALIGN16(const uint8, shuf11[16]) = +extern "C" TSIMD_ALIGNED(const uint8, shuf11[16]) = { 2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10, 10, 11, 12, 13 }; // Offsets for source bytes 21 to 31 with 16 subtracted = 5 to 31. -extern "C" TALIGN16(const uint8, shuf21[16]) = +extern "C" TSIMD_ALIGNED(const uint8, shuf21[16]) = { 5, 6, 6, 7, 8, 9, 9, 10, 10, 11, 12, 13, 13, 14, 14, 15 }; // Coefficients for source bytes 0 to 10 -extern "C" TALIGN16(const uint8, madd01[16]) = +extern "C" TSIMD_ALIGNED(const uint8, madd01[16]) = { 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2 }; // Coefficients for source bytes 10 to 21 -extern "C" TALIGN16(const uint8, madd11[16]) = +extern "C" TSIMD_ALIGNED(const uint8, madd11[16]) = { 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1 }; // Coefficients for source bytes 21 to 31 -extern "C" TALIGN16(const uint8, madd21[16]) = +extern "C" TSIMD_ALIGNED(const uint8, madd21[16]) = { 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3 }; // Coefficients for source bytes 21 to 31 -extern "C" TALIGN16(const int16, round34[8]) = +extern "C" TSIMD_ALIGNED(const int16, round34[8]) = { 2, 2, 2, 2, 2, 2, 2, 2 }; -extern "C" TALIGN16(const uint8, kShuf38a[16]) = +extern "C" TSIMD_ALIGNED(const uint8, kShuf38a[16]) = { 0, 3, 6, 8, 11, 14, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; -extern "C" TALIGN16(const uint8, kShuf38b[16]) = +extern "C" TSIMD_ALIGNED(const uint8, kShuf38b[16]) = { 128, 128, 128, 128, 128, 128, 0, 3, 6, 8, 11, 14, 128, 128, 128, 128 }; // Arrange words 0,3,6 into 0,1,2 -extern "C" TALIGN16(const uint8, shufac0[16]) = +extern "C" TSIMD_ALIGNED(const uint8, shufac0[16]) = { 0, 1, 6, 7, 12, 13, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; // Arrange words 0,3,6 into 3,4,5 -extern "C" TALIGN16(const uint8, shufac3[16]) = +extern "C" TSIMD_ALIGNED(const uint8, shufac3[16]) = { 128, 128, 128, 128, 128, 128, 0, 1, 6, 7, 12, 13, 128, 128, 128, 128 }; // Scaling values for boxes of 3x3 and 2x3 -extern "C" TALIGN16(const uint16, scaleac3[8]) = +extern "C" TSIMD_ALIGNED(const uint16, scaleac3[8]) = { 65536 / 9, 65536 / 9, 65536 / 6, 65536 / 9, 65536 / 9, 65536 / 6, 0, 0 }; // Arrange first value for pixels 0,1,2,3,4,5 -extern "C" TALIGN16(const uint8, shufab0[16]) = +extern "C" TSIMD_ALIGNED(const uint8, shufab0[16]) = { 0, 128, 3, 128, 6, 128, 8, 128, 11, 128, 14, 128, 128, 128, 128, 128 }; // Arrange second value for pixels 0,1,2,3,4,5 -extern "C" TALIGN16(const uint8, shufab1[16]) = +extern "C" TSIMD_ALIGNED(const uint8, shufab1[16]) = { 1, 128, 4, 128, 7, 128, 9, 128, 12, 128, 15, 128, 128, 128, 128, 128 }; // Arrange third value for pixels 0,1,2,3,4,5 -extern "C" TALIGN16(const uint8, shufab2[16]) = +extern "C" TSIMD_ALIGNED(const uint8, shufab2[16]) = { 2, 128, 5, 128, 128, 128, 10, 128, 13, 128, 128, 128, 128, 128, 128, 128 }; // Scaling values for boxes of 3x2 and 2x2 -extern "C" TALIGN16(const uint16, scaleab2[8]) = +extern "C" TSIMD_ALIGNED(const uint16, scaleab2[8]) = { 65536 / 3, 65536 / 3, 65536 / 2, 65536 / 3, 65536 / 3, 65536 / 2, 0, 0 }; #endif @@ -3052,7 +3046,7 @@ static void ScaleRowDown8_C(const uint8* src_ptr, int, // uses ScaleRowDown8_C instead. static void ScaleRowDown8Int_C(const uint8* src_ptr, int src_stride, uint8* dst, int dst_width) { - ALIGN16(uint8 src_row[kMaxRow12 * 2]); + SIMD_ALIGNED(uint8 src_row[kMaxRow12 * 2]); assert(dst_width <= kMaxOutputWidth); ScaleRowDown4Int_C(src_ptr, src_stride, src_row, dst_width * 2); ScaleRowDown4Int_C(src_ptr + src_stride * 4, src_stride, @@ -3171,7 +3165,7 @@ static const int kMaxInputWidth = 2560; static void ScaleRowDown34_0_Int_SSE2(const uint8* src_ptr, int src_stride, uint8* dst_ptr, int dst_width) { assert((dst_width % 3 == 0) && (dst_width > 0)); - ALIGN16(uint8 row[kMaxInputWidth]); + SIMD_ALIGNED(uint8 row[kMaxInputWidth]); ScaleFilterRows_SSE2(row, src_ptr, src_stride, dst_width * 4 / 3, 256 / 4); ScaleFilterCols34_C(dst_ptr, row, dst_width); } @@ -3180,7 +3174,7 @@ static void ScaleRowDown34_0_Int_SSE2(const uint8* src_ptr, int src_stride, static void ScaleRowDown34_1_Int_SSE2(const uint8* src_ptr, int src_stride, uint8* dst_ptr, int dst_width) { assert((dst_width % 3 == 0) && (dst_width > 0)); - ALIGN16(uint8 row[kMaxInputWidth]); + SIMD_ALIGNED(uint8 row[kMaxInputWidth]); ScaleFilterRows_SSE2(row, src_ptr, src_stride, dst_width * 4 / 3, 256 / 2); ScaleFilterCols34_C(dst_ptr, row, dst_width); } @@ -3648,7 +3642,7 @@ static void ScalePlaneBox(int src_width, int src_height, dst += dst_stride; } } else { - ALIGN16(uint16 row[kMaxInputWidth]); + SIMD_ALIGNED(uint16 row[kMaxInputWidth]); void (*ScaleAddRows)(const uint8* src_ptr, int src_stride, uint16* dst_ptr, int src_width, int src_height)= ScaleAddRows_C; @@ -3737,7 +3731,7 @@ void ScalePlaneBilinear(int src_width, int src_height, src_stride, dst_stride, src_ptr, dst_ptr); } else { - ALIGN16(uint8 row[kMaxInputWidth + 1]); + SIMD_ALIGNED(uint8 row[kMaxInputWidth + 1]); void (*ScaleFilterRows)(uint8* dst_ptr, const uint8* src_ptr, int src_stride, int dst_width, int source_y_fraction) = diff --git a/source/scale_argb.cc b/source/scale_argb.cc new file mode 100644 index 000000000..64196815d --- /dev/null +++ b/source/scale_argb.cc @@ -0,0 +1,823 @@ +/* + * Copyright (c) 2011 The LibYuv project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "libyuv/scale.h" + +#include +#include +#include // For getenv() + +#include "libyuv/cpu_id.h" +#include "libyuv/planar_functions.h" // For CopyARGB +#include "source/row.h" + +#ifdef __cplusplus +namespace libyuv { +extern "C" { +#endif + +// ARGB scaling uses bilinear or point, but not box filter. + +/** + * SSE2 downscalers with bilinear interpolation. + */ + +#if !defined(YUV_DISABLE_ASM) && defined(_M_IX86) + +#define HAS_SCALEARGBROWDOWN2_SSE2 +// Reads 8 pixels, throws half away and writes 4 even pixels (0, 2, 4, 6) +// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned. +__declspec(naked) __declspec(align(16)) +static void ScaleARGBRowDown2_SSE2(const uint8* src_ptr, int src_stride, + uint8* dst_ptr, int dst_width) { + __asm { + mov eax, [esp + 4] // src_ptr + // src_stride ignored + mov edx, [esp + 12] // dst_ptr + mov ecx, [esp + 16] // dst_width + + align 16 + wloop: + movdqa xmm0, [eax] + movdqa xmm1, [eax + 16] + lea eax, [eax + 32] + shufps xmm0, xmm1, 0x88 + sub ecx, 4 + movdqa [edx], xmm0 + lea edx, [edx + 16] + jg wloop + + ret + } +} + +// Blends 8x2 rectangle to 4x1. +// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned. +__declspec(naked) __declspec(align(16)) +void ScaleARGBRowDown2Int_SSE2(const uint8* src_ptr, int src_stride, + uint8* dst_ptr, int dst_width) { + __asm { + push esi + mov eax, [esp + 4 + 4] // src_ptr + mov esi, [esp + 4 + 8] // src_stride + mov edx, [esp + 4 + 12] // dst_ptr + mov ecx, [esp + 4 + 16] // dst_width + + align 16 + wloop: + movdqa xmm0, [eax] + movdqa xmm1, [eax + 16] + movdqa xmm2, [eax + esi] + movdqa xmm3, [eax + esi + 16] + lea eax, [eax + 32] + pavgb xmm0, xmm2 // average rows + pavgb xmm1, xmm3 + + movdqa xmm2, xmm0 // average columns (32 to 16 pixels) + shufps xmm0, xmm1, 0x88 // even pixels + shufps xmm2, xmm1, 0xdd // odd pixels + pavgb xmm0, xmm2 + sub ecx, 4 + movdqa [edx], xmm0 + lea edx, [edx + 16] + jg wloop + + pop esi + ret + } +} + +// Bilinear row filtering combines 4x2 -> 4x1. SSE2 version. +#define HAS_SCALEARGBFILTERROWS_SSE2 +__declspec(naked) __declspec(align(16)) +static void ScaleARGBFilterRows_SSE2(uint8* dst_ptr, const uint8* src_ptr, + int src_stride, int dst_width, + int source_y_fraction) { + __asm { + push esi + push edi + mov edi, [esp + 8 + 4] // dst_ptr + mov esi, [esp + 8 + 8] // src_ptr + mov edx, [esp + 8 + 12] // src_stride + mov ecx, [esp + 8 + 16] // dst_width + mov eax, [esp + 8 + 20] // source_y_fraction (0..255) + sub edi, esi + cmp eax, 0 + je xloop1 + cmp eax, 128 + je xloop2 + + movd xmm6, eax // xmm6 = y fraction + punpcklwd xmm6, xmm6 + pshufd xmm6, xmm6, 0 + neg eax // xmm5 = 256 - y fraction + add eax, 256 + movd xmm5, eax + punpcklwd xmm5, xmm5 + pshufd xmm5, xmm5, 0 + pxor xmm7, xmm7 + + align 16 + xloop: + movdqa xmm0, [esi] + movdqa xmm2, [esi + edx] + movdqa xmm1, xmm0 + movdqa xmm3, xmm2 + punpcklbw xmm0, xmm7 + punpcklbw xmm2, xmm7 + punpckhbw xmm1, xmm7 + punpckhbw xmm3, xmm7 + pmullw xmm0, xmm5 // scale row 0 + pmullw xmm1, xmm5 + pmullw xmm2, xmm6 // scale row 1 + pmullw xmm3, xmm6 + paddusw xmm0, xmm2 // sum rows + paddusw xmm1, xmm3 + psrlw xmm0, 8 + psrlw xmm1, 8 + packuswb xmm0, xmm1 + sub ecx, 4 + movdqa [esi + edi], xmm0 + lea esi, [esi + 16] + jg xloop + + shufps xmm0, xmm0, 0xff + movdqa [esi + edi], xmm0 // duplicate last pixel to allow horizontal filtering + pop edi + pop esi + ret + + align 16 + xloop1: + movdqa xmm0, [esi] + sub ecx, 4 + movdqa [esi + edi], xmm0 + lea esi, [esi + 16] + jg xloop1 + + shufps xmm0, xmm0, 0xff + movdqa [esi + edi], xmm0 + pop edi + pop esi + ret + + align 16 + xloop2: + movdqa xmm0, [esi] + pavgb xmm0, [esi + edx] + sub ecx, 4 + movdqa [esi + edi], xmm0 + lea esi, [esi + 16] + jg xloop2 + + shufps xmm0, xmm0, 0xff + movdqa [esi + edi], xmm0 + pop edi + pop esi + ret + } +} + +// Bilinear row filtering combines 4x2 -> 4x1. SSSE3 version. +#define HAS_SCALEARGBFILTERROWS_SSSE3 +__declspec(naked) __declspec(align(16)) +static void ScaleARGBFilterRows_SSSE3(uint8* dst_ptr, const uint8* src_ptr, + int src_stride, int dst_width, + int source_y_fraction) { + __asm { + push esi + push edi + mov edi, [esp + 8 + 4] // dst_ptr + mov esi, [esp + 8 + 8] // src_ptr + mov edx, [esp + 8 + 12] // src_stride + mov ecx, [esp + 8 + 16] // dst_width + mov eax, [esp + 8 + 20] // source_y_fraction (0..255) + sub edi, esi + shr eax, 1 + cmp eax, 0 + je xloop1 + cmp eax, 64 + je xloop2 + mov ah, al + neg al + add al, 128 + movd xmm5, eax + punpcklwd xmm5, xmm5 + pshufd xmm5, xmm5, 0 + + align 16 + xloop: + movdqa xmm0, [esi] + movdqa xmm2, [esi + edx] + movdqa xmm1, xmm0 + punpcklbw xmm0, xmm2 + punpckhbw xmm1, xmm2 + pmaddubsw xmm0, xmm5 + pmaddubsw xmm1, xmm5 + psrlw xmm0, 7 + psrlw xmm1, 7 + packuswb xmm0, xmm1 + sub ecx, 4 + movdqa [esi + edi], xmm0 + lea esi, [esi + 16] + jg xloop + + shufps xmm0, xmm0, 0xff + movdqa [esi + edi], xmm0 // duplicate last pixel to allow horizontal filtering + pop edi + pop esi + ret + + align 16 + xloop1: + movdqa xmm0, [esi] + sub ecx, 4 + movdqa [esi + edi], xmm0 + lea esi, [esi + 16] + jg xloop1 + + shufps xmm0, xmm0, 0xff + movdqa [esi + edi], xmm0 + pop edi + pop esi + ret + + align 16 + xloop2: + movdqa xmm0, [esi] + pavgb xmm0, [esi + edx] + sub ecx, 4 + movdqa [esi + edi], xmm0 + lea esi, [esi + 16] + jg xloop2 + + shufps xmm0, xmm0, 0xff + movdqa [esi + edi], xmm0 + pop edi + pop esi + ret + } +} + +#elif !defined(YUV_DISABLE_ASM) && (defined(__x86_64__) || defined(__i386__)) + +// GCC versions of row functions are verbatim conversions from Visual C. +// Generated using gcc disassembly on Visual C object file: +// objdump -D yuvscaler.obj >yuvscaler.txt +#define HAS_SCALEARGBROWDOWN2_SSE2 +static void ScaleARGBRowDown2_SSE2(const uint8* src_ptr, int , + uint8* dst_ptr, int dst_width) { + asm volatile ( + ".p2align 4 \n" + "1: \n" + "movdqa (%0),%%xmm0 \n" + "movdqa 0x10(%0),%%xmm1 \n" + "lea 0x20(%0),%0 \n" + "shufps $0x88,%%xmm1,%%xmm0 \n" + "sub $0x4,%2 \n" + "movdqa %%xmm0,(%1) \n" + "lea 0x10(%1),%1 \n" + "jg 1b \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(dst_width) // %2 + : + : "memory", "cc" +#if defined(__SSE2__) + , "xmm0", "xmm1" +#endif + ); +} + +static void ScaleARGBRowDown2Int_SSE2(const uint8* src_ptr, int src_stride, + uint8* dst_ptr, int dst_width) { + asm volatile ( + ".p2align 4 \n" + "1: \n" + "movdqa (%0),%%xmm0 \n" + "movdqa 0x10(%0),%%xmm1 \n" + "movdqa (%0,%3,1),%%xmm2 \n" + "movdqa 0x10(%0,%3,1),%%xmm3 \n" + "lea 0x20(%0),%0 \n" + "pavgb %%xmm2,%%xmm0 \n" + "pavgb %%xmm3,%%xmm1 \n" + "movdqa %%xmm0,%%xmm2 \n" + "shufps $0x88,%%xmm1,%%xmm0 \n" + "shufps $0xdd,%%xmm1,%%xmm2 \n" + "pavgb %%xmm2,%%xmm0 \n" + "sub $0x4,%2 \n" + "movdqa %%xmm0,(%1) \n" + "lea 0x10(%1),%1 \n" + "jg 1b \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(dst_width) // %2 + : "r"(static_cast(src_stride)) // %3 + : "memory", "cc" +#if defined(__SSE2__) + , "xmm0", "xmm1", "xmm2", "xmm3" +#endif +); +} + +// Bilinear row filtering combines 4x2 -> 4x1. SSE2 version +// TODO(fbarchard): write single inline instead of 3 and use single mul of diff +#define HAS_SCALEARGBFILTERROWS_SSE2 +static void ScaleARGBFilterRows_SSE2(uint8* dst_ptr, + const uint8* src_ptr, int src_stride, + int dst_width, int source_y_fraction) { + if (source_y_fraction == 0) { + asm volatile ( + ".p2align 4 \n" + "1:" + "movdqa (%1),%%xmm0 \n" + "lea 0x10(%1),%1 \n" + "movdqa %%xmm0,(%0) \n" + "lea 0x10(%0),%0 \n" + "sub $0x04,%2 \n" + "jg 1b \n" + "shufps $0xff,%%xmm0,%%xmm0 \n" + "movdqa %%xmm0,(%0) \n" + : "+r"(dst_ptr), // %0 + "+r"(src_ptr), // %1 + "+r"(dst_width) // %2 + : + : "memory", "cc" +#if defined(__SSE2__) + , "xmm0" +#endif + ); + return; + } else if (source_y_fraction == 128) { + asm volatile ( + ".p2align 4 \n" + "1:" + "movdqa (%1),%%xmm0 \n" + "movdqa (%1,%3,1),%%xmm2 \n" + "lea 0x10(%1),%1 \n" + "pavgb %%xmm2,%%xmm0 \n" + "movdqa %%xmm0,(%0) \n" + "lea 0x10(%0),%0 \n" + "sub $0x04,%2 \n" + "jg 1b \n" + "shufps $0xff,%%xmm0,%%xmm0 \n" + "movdqa %%xmm0,(%0) \n" + : "+r"(dst_ptr), // %0 + "+r"(src_ptr), // %1 + "+r"(dst_width) // %2 + : "r"(static_cast(src_stride)) // %3 + : "memory", "cc" +#if defined(__SSE2__) + , "xmm0", "xmm2" +#endif + ); + return; + } else { + asm volatile ( + "mov %3,%%eax \n" + "movd %%eax,%%xmm6 \n" + "punpcklwd %%xmm6,%%xmm6 \n" + "pshufd $0x0,%%xmm6,%%xmm6 \n" + "neg %%eax \n" + "add $0x100,%%eax \n" + "movd %%eax,%%xmm5 \n" + "punpcklwd %%xmm5,%%xmm5 \n" + "pshufd $0x0,%%xmm5,%%xmm5 \n" + "pxor %%xmm7,%%xmm7 \n" + ".p2align 4 \n" + "1:" + "movdqa (%1),%%xmm0 \n" + "movdqa (%1,%4,1),%%xmm2 \n" + "lea 0x10(%1),%1 \n" + "movdqa %%xmm0,%%xmm1 \n" + "movdqa %%xmm2,%%xmm3 \n" + "punpcklbw %%xmm7,%%xmm0 \n" + "punpcklbw %%xmm7,%%xmm2 \n" + "punpckhbw %%xmm7,%%xmm1 \n" + "punpckhbw %%xmm7,%%xmm3 \n" + "pmullw %%xmm5,%%xmm0 \n" + "pmullw %%xmm5,%%xmm1 \n" + "pmullw %%xmm6,%%xmm2 \n" + "pmullw %%xmm6,%%xmm3 \n" + "paddusw %%xmm2,%%xmm0 \n" + "paddusw %%xmm3,%%xmm1 \n" + "psrlw $0x8,%%xmm0 \n" + "psrlw $0x8,%%xmm1 \n" + "packuswb %%xmm1,%%xmm0 \n" + "movdqa %%xmm0,(%0) \n" + "lea 0x10(%0),%0 \n" + "sub $0x04,%2 \n" + "jg 1b \n" + "shufps $0xff,%%xmm0,%%xmm0 \n" + "movdqa %%xmm0,(%0) \n" + : "+r"(dst_ptr), // %0 + "+r"(src_ptr), // %1 + "+r"(dst_width), // %2 + "+r"(source_y_fraction) // %3 + : "r"(static_cast(src_stride)) // %4 + : "memory", "cc", "eax" +#if defined(__SSE2__) + , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5", "xmm6", "xmm7" +#endif + ); + } + return; +} + +// Bilinear row filtering combines 4x2 -> 4x1. SSSE3 version +#define HAS_SCALEARGBFILTERROWS_SSSE3 +static void ScaleARGBFilterRows_SSSE3(uint8* dst_ptr, + const uint8* src_ptr, int src_stride, + int dst_width, int source_y_fraction) { + if (source_y_fraction <= 1) { + asm volatile ( + ".p2align 4 \n" + "1:" + "movdqa (%1),%%xmm0 \n" + "lea 0x10(%1),%1 \n" + "movdqa %%xmm0,(%0) \n" + "lea 0x10(%0),%0 \n" + "sub $0x04,%2 \n" + "jg 1b \n" + "shufps $0xff,%%xmm0,%%xmm0 \n" + "movdqa %%xmm0,(%0) \n" + : "+r"(dst_ptr), // %0 + "+r"(src_ptr), // %1 + "+r"(dst_width) // %2 + : + : "memory", "cc" +#if defined(__SSE2__) + , "xmm0" +#endif + ); + return; + } else if (source_y_fraction == 128) { + asm volatile ( + ".p2align 4 \n" + "1:" + "movdqa (%1),%%xmm0 \n" + "movdqa (%1,%3,1),%%xmm2 \n" + "lea 0x10(%1),%1 \n" + "pavgb %%xmm2,%%xmm0 \n" + "movdqa %%xmm0,(%0) \n" + "lea 0x10(%0),%0 \n" + "sub $0x04,%2 \n" + "jg 1b \n" + "shufps $0xff,%%xmm0,%%xmm0 \n" + "movdqa %%xmm0,(%0) \n" + : "+r"(dst_ptr), // %0 + "+r"(src_ptr), // %1 + "+r"(dst_width) // %2 + : "r"(static_cast(src_stride)) // %3 + : "memory", "cc" +#if defined(__SSE2__) + , "xmm0", "xmm2" +#endif + ); + return; + } else { + asm volatile ( + "mov %3,%%eax \n" + "shr %%eax \n" + "mov %%al,%%ah \n" + "neg %%al \n" + "add $0x80,%%al \n" + "movd %%eax,%%xmm5 \n" + "punpcklwd %%xmm5,%%xmm5 \n" + "pshufd $0x0,%%xmm5,%%xmm5 \n" + ".p2align 4 \n" + "1:" + "movdqa (%1),%%xmm0 \n" + "movdqa (%1,%4,1),%%xmm2 \n" + "lea 0x10(%1),%1 \n" + "movdqa %%xmm0,%%xmm1 \n" + "punpcklbw %%xmm2,%%xmm0 \n" + "punpckhbw %%xmm2,%%xmm1 \n" + "pmaddubsw %%xmm5,%%xmm0 \n" + "pmaddubsw %%xmm5,%%xmm1 \n" + "psrlw $0x7,%%xmm0 \n" + "psrlw $0x7,%%xmm1 \n" + "packuswb %%xmm1,%%xmm0 \n" + "movdqa %%xmm0,(%0) \n" + "lea 0x10(%0),%0 \n" + "sub $0x04,%2 \n" + "jg 1b \n" + "shufps $0xff,%%xmm0,%%xmm0 \n" + "movdqa %%xmm0,(%0) \n" + : "+r"(dst_ptr), // %0 + "+r"(src_ptr), // %1 + "+r"(dst_width), // %2 + "+r"(source_y_fraction) // %3 + : "r"(static_cast(src_stride)) // %4 + : "memory", "cc", "eax" +#if defined(__SSE2__) + , "xmm0", "xmm1", "xmm2", "xmm5" +#endif + ); + } + return; +} +#endif + +static void ScaleARGBRowDown2_C(const uint8* src_ptr, int, + uint8* dst_ptr, int dst_width) { + const uint32* src = reinterpret_cast(src_ptr); + uint32* dst = reinterpret_cast(dst_ptr); + + for (int x = 0; x < dst_width - 1; x += 2) { + dst[0] = src[0]; + dst[1] = src[2]; + dst += 2; + src += 4; + } + if (dst_width & 1) { + dst[0] = src[0]; + } +} + +void ScaleARGBRowDown2Int_C(const uint8* src_ptr, int src_stride, + uint8* dst_ptr, int dst_width) { + for (int x = 0; x < dst_width; ++x) { + dst_ptr[0] = (src_ptr[0] + src_ptr[4] + + src_ptr[src_stride] + src_ptr[src_stride + 4] + 2) >> 2; + dst_ptr[1] = (src_ptr[1] + src_ptr[5] + + src_ptr[src_stride + 1] + src_ptr[src_stride + 5] + 2) >> 2; + dst_ptr[2] = (src_ptr[2] + src_ptr[6] + + src_ptr[src_stride + 2] + src_ptr[src_stride + 6] + 2) >> 2; + dst_ptr[3] = (src_ptr[3] + src_ptr[7] + + src_ptr[src_stride + 3] + src_ptr[src_stride + 7] + 2) >> 2; + dst_ptr += 4; + src_ptr += 8; + } +} + +// (1-f)a + fb can be replaced with a + f(b-a) + +#define BLENDER1(a, b, f) (static_cast(a) + \ + ((f) * (static_cast(b) - static_cast(a)) >> 16)) + +#define BLENDERC(a, b, f, s) static_cast( \ + BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s) + +#define BLENDER(a, b, f) \ + BLENDERC(a, b, f, 24) | BLENDERC(a, b, f, 16) | \ + BLENDERC(a, b, f, 8) | BLENDERC(a, b, f, 0) + +static void ScaleARGBFilterCols_C(uint8* dst_ptr, const uint8* src_ptr, + int dst_width, int x, int dx) { + const uint32* src = reinterpret_cast(src_ptr); + uint32* dst = reinterpret_cast(dst_ptr); + for (int j = 0; j < dst_width - 1; j += 2) { + int xi = x >> 16; + uint32 a = src[xi]; + uint32 b = src[xi + 1]; + dst[0] = BLENDER(a, b, x & 0xffff); + x += dx; + xi = x >> 16; + a = src[xi]; + b = src[xi + 1]; + dst[1] = BLENDER(a, b, x & 0xffff); + x += dx; + dst += 2; + } + if (dst_width & 1) { + int xi = x >> 16; + uint32 a = src[xi]; + uint32 b = src[xi + 1]; + dst[0] = BLENDER(a, b, x & 0xffff); + } +} + +static const int kMaxInputWidth = 2560; + +// C version 2x2 -> 2x1 +static void ScaleARGBFilterRows_C(uint8* dst_ptr, + const uint8* src_ptr, int src_stride, + int dst_width, int source_y_fraction) { + assert(dst_width > 0); + int y1_fraction = source_y_fraction; + int y0_fraction = 256 - y1_fraction; + const uint8* src_ptr1 = src_ptr + src_stride; + uint8* end = dst_ptr + dst_width; + do { + dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8; + dst_ptr[1] = (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8; + dst_ptr[2] = (src_ptr[2] * y0_fraction + src_ptr1[2] * y1_fraction) >> 8; + dst_ptr[3] = (src_ptr[3] * y0_fraction + src_ptr1[3] * y1_fraction) >> 8; + dst_ptr[4] = (src_ptr[4] * y0_fraction + src_ptr1[4] * y1_fraction) >> 8; + dst_ptr[5] = (src_ptr[5] * y0_fraction + src_ptr1[5] * y1_fraction) >> 8; + dst_ptr[6] = (src_ptr[6] * y0_fraction + src_ptr1[6] * y1_fraction) >> 8; + dst_ptr[7] = (src_ptr[7] * y0_fraction + src_ptr1[7] * y1_fraction) >> 8; + src_ptr += 8; + src_ptr1 += 8; + dst_ptr += 8; + } while (dst_ptr < end); + dst_ptr[0] = dst_ptr[-1]; +} + +/** + * ScaleARGB ARGB, 1/2 + * + * This is an optimized version for scaling down a ARGB to 1/2 of + * its original size. + * + */ +static void ScaleARGBDown2(int src_width, int src_height, + int dst_width, int dst_height, + int src_stride, int dst_stride, + const uint8* src_ptr, uint8* dst_ptr, + FilterMode filtering) { + assert(IS_ALIGNED(src_width, 2)); + assert(IS_ALIGNED(src_height, 2)); + void (*ScaleARGBRowDown2)(const uint8* src_ptr, int src_stride, + uint8* dst_ptr, int dst_width) = + filtering ? ScaleARGBRowDown2Int_C : ScaleARGBRowDown2_C; +#if defined(HAS_SCALEARGBROWDOWN2_SSE2) + if (TestCpuFlag(kCpuHasSSE2) && + IS_ALIGNED(dst_width, 16) && + IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) && + IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) { + ScaleARGBRowDown2 = filtering ? ScaleARGBRowDown2Int_SSE2 : ScaleARGBRowDown2_SSE2; + } +#endif + + // TODO(fbarchard): Loop through source height to allow odd height. + for (int y = 0; y < dst_height; ++y) { + ScaleARGBRowDown2(src_ptr, src_stride, dst_ptr, dst_width); + src_ptr += (src_stride << 1); + dst_ptr += dst_stride; + } +} + +/** + * ScaleARGB ARGB to/from any dimensions, with bilinear + * interpolation. + */ + +void ScaleARGBBilinear(int src_width, int src_height, + int dst_width, int dst_height, + int src_stride, int dst_stride, + const uint8* src_ptr, uint8* dst_ptr) { + assert(dst_width > 0); + assert(dst_height > 0); + assert(src_width <= kMaxInputWidth); + SIMD_ALIGNED(uint8 row[kMaxInputWidth * 4 + 4]); + void (*ScaleARGBFilterRows)(uint8* dst_ptr, const uint8* src_ptr, + int src_stride, + int dst_width, int source_y_fraction) = + ScaleARGBFilterRows_C; +#if defined(HAS_SCALEARGBFILTERROWS_SSE2) + if (TestCpuFlag(kCpuHasSSE2) && + IS_ALIGNED(src_stride, 16) && IS_ALIGNED(src_ptr, 16)) { + ScaleARGBFilterRows = ScaleARGBFilterRows_SSE2; + } +#endif +#if defined(HAS_SCALEARGBFILTERROWS_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3) && + IS_ALIGNED(src_stride, 16) && IS_ALIGNED(src_ptr, 16)) { + ScaleARGBFilterRows = ScaleARGBFilterRows_SSSE3; + } +#endif + int dx = (src_width << 16) / dst_width; + int dy = (src_height << 16) / dst_height; + int x = (dx >= 65536) ? ((dx >> 1) - 32768) : (dx >> 1); + int y = (dy >= 65536) ? ((dy >> 1) - 32768) : (dy >> 1); + int maxy = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0; + for (int j = 0; j < dst_height; ++j) { + int yi = y >> 16; + int yf = (y >> 8) & 255; + const uint8* src = src_ptr + yi * src_stride; + ScaleARGBFilterRows(row, src, src_stride, src_width, yf); + ScaleARGBFilterCols_C(dst_ptr, row, dst_width, x, dx); + dst_ptr += dst_stride; + y += dy; + if (y > maxy) { + y = maxy; + } + } +} + +// Scales a single row of pixels using point sampling. +// Code is adapted from libyuv bilinear yuv scaling, but with bilinear +// interpolation off, and argb pixels instead of yuv. +static void ScaleARGBCols(uint8* dst_ptr, const uint8* src_ptr, + int dst_width, int x, int dx) { + const uint32* src = reinterpret_cast(src_ptr); + uint32* dst = reinterpret_cast(dst_ptr); + for (int j = 0; j < dst_width - 1; j += 2) { + dst[0] = src[x >> 16]; + x += dx; + dst[1] = src[x >> 16]; + x += dx; + dst += 2; + } + if (dst_width & 1) { + dst[0] = src[x >> 16]; + } +} + +/** + * ScaleARGB ARGB to/from any dimensions, without interpolation. + * Fixed point math is used for performance: The upper 16 bits + * of x and dx is the integer part of the source position and + * the lower 16 bits are the fixed decimal part. + */ + +static void ScaleARGBSimple(int src_width, int src_height, + int dst_width, int dst_height, + int src_stride, int dst_stride, + const uint8* src_ptr, uint8* dst_ptr) { + int dx = (src_width << 16) / dst_width; + int dy = (src_height << 16) / dst_height; + int x = (dx >= 65536) ? ((dx >> 1) - 32768) : (dx >> 1); + int y = (dy >= 65536) ? ((dy >> 1) - 32768) : (dy >> 1); + for (int i = 0; i < dst_height; ++i) { + ScaleARGBCols(dst_ptr, src_ptr + (y >> 16) * src_stride, dst_width, x, dx); + dst_ptr += dst_stride; + y += dy; + } +} + +/** + * ScaleARGB ARGB to/from any dimensions. + */ +static void ScaleARGBAnySize(int src_width, int src_height, + int dst_width, int dst_height, + int src_stride, int dst_stride, + const uint8* src_ptr, uint8* dst_ptr, + FilterMode filtering) { + if (!filtering || (src_width > kMaxInputWidth)) { + ScaleARGBSimple(src_width, src_height, dst_width, dst_height, + src_stride, dst_stride, src_ptr, dst_ptr); + } else { + ScaleARGBBilinear(src_width, src_height, dst_width, dst_height, + src_stride, dst_stride, src_ptr, dst_ptr); + } +} + +// ScaleARGB a ARGB. +// +// This function in turn calls a scaling function +// suitable for handling the desired resolutions. + +static void ScaleARGB(const uint8* src, int src_stride, + int src_width, int src_height, + uint8* dst, int dst_stride, + int dst_width, int dst_height, + FilterMode filtering) { +#ifdef CPU_X86 + // environment variable overrides for testing. + char *filter_override = getenv("LIBYUV_FILTER"); + if (filter_override) { + filtering = (FilterMode)atoi(filter_override); // NOLINT + } +#endif + if (dst_width == src_width && dst_height == src_height) { + // Straight copy. + ARGBCopy(src, src_stride, dst, dst_stride, dst_width, dst_height); + return; + } + if (2 * dst_width == src_width && 2 * dst_height == src_height) { + // optimized 1/2. + ScaleARGBDown2(src_width, src_height, dst_width, dst_height, + src_stride, dst_stride, src, dst, filtering); + return; + } + // Arbitrary scale up and/or down. + ScaleARGBAnySize(src_width, src_height, dst_width, dst_height, + src_stride, dst_stride, src, dst, filtering); +} + +// ScaleARGB an ARGB image. +int ARGBScale(const uint8* src_argb, int src_stride_argb, + int src_width, int src_height, + uint8* dst_argb, int dst_stride_argb, + int dst_width, int dst_height, + FilterMode filtering) { + if (!src_argb || src_width <= 0 || src_height == 0 || + !dst_argb || dst_width <= 0 || dst_height <= 0) { + return -1; + } + // Negative height means invert the image. + if (src_height < 0) { + src_height = -src_height; + src_argb = src_argb + (src_height - 1) * src_stride_argb; + src_stride_argb = -src_stride_argb; + } + ScaleARGB(src_argb, src_stride_argb, src_width, src_height, + dst_argb, dst_stride_argb, dst_width, dst_height, + filtering); + return 0; +} + +#ifdef __cplusplus +} // extern "C" +} // namespace libyuv +#endif diff --git a/unit_test/scale_argb_test.cc b/unit_test/scale_argb_test.cc new file mode 100644 index 000000000..80edc3b6d --- /dev/null +++ b/unit_test/scale_argb_test.cc @@ -0,0 +1,180 @@ +/* + * Copyright (c) 2011 The LibYuv project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include + +#include "libyuv/cpu_id.h" +#include "libyuv/scale_argb.h" +#include "unit_test/unit_test.h" + +namespace libyuv { + +static int ARGBTestFilter(int src_width, int src_height, + int dst_width, int dst_height, + FilterMode f) { + + int b = 128; + + int src_y_plane_size = (src_width + (2 * b)) * (src_height + (2 * b)) * 4; + int src_stride_y = (2 * b + src_width) * 4; + + align_buffer_16(src_y, src_y_plane_size) + + int dst_y_plane_size = (dst_width + (2 * b)) * (dst_height + (2 * b)) * 4; + int dst_stride_y = (2 * b + dst_width) * 4; + + srandom(time(NULL)); + + int i, j; + + for (i = b; i < (src_height + b); ++i) { + for (j = b; j < (src_width + b) * 4; ++j) { + src_y[(i * src_stride_y) + j] = (random() & 0xff); + } + } + + const int runs = 1000; + align_buffer_16(dst_y_c, dst_y_plane_size) + align_buffer_16(dst_y_opt, dst_y_plane_size) + + MaskCpuFlags(kCpuInitialized); + double c_time = get_time(); + + for (i = 0; i < runs; ++i) + ARGBScale(src_y + (src_stride_y * b) + b * 4, src_stride_y, + src_width, src_height, + dst_y_c + (dst_stride_y * b) + b * 4, dst_stride_y, + dst_width, dst_height, f); + + c_time = (get_time() - c_time) / runs; + + MaskCpuFlags(-1); + double opt_time = get_time(); + + for (i = 0; i < runs; ++i) + ARGBScale(src_y + (src_stride_y * b) + b * 4, src_stride_y, + src_width, src_height, + dst_y_opt + (dst_stride_y * b) + b * 4, dst_stride_y, + dst_width, dst_height, f); + + opt_time = (get_time() - opt_time) / runs; + + printf ("filter %d - %8d us c - %8d us opt\n", + f, (int)(c_time*1e6), (int)(opt_time*1e6)); + + // C version may be a little off from the optimized. Order of + // operations may introduce rounding somewhere. So do a difference + // of the buffers and look to see that the max difference isn't + // over 2. + int err = 0; + int max_diff = 0; + for (i = b; i < (dst_height + b); ++i) { + for (j = b * 4; j < (dst_width + b) * 4; ++j) { + int abs_diff = abs(dst_y_c[(i * dst_stride_y) + j] - + dst_y_opt[(i * dst_stride_y) + j]); + if (abs_diff > max_diff) + max_diff = abs_diff; + } + } + + if (max_diff > 2) + err++; + + free_aligned_buffer_16(dst_y_c) + free_aligned_buffer_16(dst_y_opt) + free_aligned_buffer_16(src_y) + return err; +} + +TEST_F(libyuvTest, ARGBScaleDownBy2) { + + const int src_width = 1280; + const int src_height = 720; + const int dst_width = src_width / 2; + const int dst_height = src_height / 2; + int err = 0; + + for (int f = 0; f < 1; ++f) { + err += ARGBTestFilter(src_width, src_height, + dst_width, dst_height, + static_cast(f)); + } + + EXPECT_EQ(0, err); +} + +TEST_F(libyuvTest, ARGBScaleDownBy4) { + + const int src_width = 1280; + const int src_height = 720; + const int dst_width = src_width / 4; + const int dst_height = src_height / 4; + int err = 0; + + for (int f = 0; f < 1; ++f) { + err += ARGBTestFilter(src_width, src_height, + dst_width, dst_height, + static_cast(f)); + } + + EXPECT_EQ(0, err); +} + +TEST_F(libyuvTest, ARGBScaleDownBy34) { + + const int src_width = 1280; + const int src_height = 720; + const int dst_width = src_width * 3 / 4; + const int dst_height = src_height * 3 / 4; + int err = 0; + + for (int f = 0; f < 1; ++f) { + err += ARGBTestFilter(src_width, src_height, + dst_width, dst_height, + static_cast(f)); + } + + EXPECT_EQ(0, err); +} + +TEST_F(libyuvTest, ARGBScaleDownBy38) { + int src_width = 1280; + int src_height = 720; + int dst_width = src_width * 3 / 8; + int dst_height = src_height * 3 / 8; + int err = 0; + + for (int f = 0; f < 1; ++f) { + err += ARGBTestFilter(src_width, src_height, + dst_width, dst_height, + static_cast(f)); + } + + EXPECT_EQ(0, err); +} + +TEST_F(libyuvTest, ARGBScalePlaneBilinear) { + int src_width = 1280; + int src_height = 720; + int dst_width = 1366; + int dst_height = 768; + int err = 0; + + for (int f = 0; f < 1; ++f) { + err += ARGBTestFilter(src_width, src_height, + dst_width, dst_height, + static_cast(f)); + } + + EXPECT_EQ(0, err); +} + +} // namespace libyuv diff --git a/unit_test/scale_test.cc b/unit_test/scale_test.cc index c3e0c6dfe..215aadfb6 100644 --- a/unit_test/scale_test.cc +++ b/unit_test/scale_test.cc @@ -61,7 +61,7 @@ static int TestFilter(int src_width, int src_height, } } - const int runs = 128; + const int runs = 1000; align_buffer_16(dst_y_c, dst_y_plane_size) align_buffer_16(dst_u_c, dst_uv_plane_size) align_buffer_16(dst_v_c, dst_uv_plane_size) @@ -111,7 +111,7 @@ static int TestFilter(int src_width, int src_height, for (i = b; i < (dst_height + b); ++i) { for (j = b; j < (dst_width + b); ++j) { int abs_diff = abs(dst_y_c[(i * dst_stride_y) + j] - - dst_y_opt[(i * dst_stride_y) + j]); + dst_y_opt[(i * dst_stride_y) + j]); if (abs_diff > max_diff) max_diff = abs_diff; } @@ -120,11 +120,11 @@ static int TestFilter(int src_width, int src_height, for (i = b; i < (dst_height_uv + b); ++i) { for (j = b; j < (dst_width_uv + b); ++j) { int abs_diff = abs(dst_u_c[(i * dst_stride_uv) + j] - - dst_u_opt[(i * dst_stride_uv) + j]); + dst_u_opt[(i * dst_stride_uv) + j]); if (abs_diff > max_diff) max_diff = abs_diff; abs_diff = abs(dst_v_c[(i * dst_stride_uv) + j] - - dst_v_opt[(i * dst_stride_uv) + j]); + dst_v_opt[(i * dst_stride_uv) + j]); if (abs_diff > max_diff) max_diff = abs_diff; @@ -157,9 +157,9 @@ TEST_F(libyuvTest, ScaleDownBy2) { int err = 0; for (int f = 0; f < 3; ++f) - err += TestFilter (src_width, src_height, - dst_width, dst_height, - static_cast(f)); + err += TestFilter(src_width, src_height, + dst_width, dst_height, + static_cast(f)); EXPECT_EQ(0, err); } @@ -173,9 +173,9 @@ TEST_F(libyuvTest, ScaleDownBy4) { int err = 0; for (int f = 0; f < 3; ++f) - err += TestFilter (src_width, src_height, - dst_width, dst_height, - static_cast(f)); + err += TestFilter(src_width, src_height, + dst_width, dst_height, + static_cast(f)); EXPECT_EQ(0, err); } @@ -189,9 +189,9 @@ TEST_F(libyuvTest, ScaleDownBy34) { int err = 0; for (int f = 0; f < 3; ++f) - err += TestFilter (src_width, src_height, - dst_width, dst_height, - static_cast(f)); + err += TestFilter(src_width, src_height, + dst_width, dst_height, + static_cast(f)); EXPECT_EQ(0, err); } @@ -201,13 +201,12 @@ TEST_F(libyuvTest, ScaleDownBy38) { int src_height = 720; int dst_width = src_width * 3 / 8; int dst_height = src_height * 3 / 8; - int err = 0; for (int f = 0; f < 3; ++f) - err += TestFilter (src_width, src_height, - dst_width, dst_height, - static_cast(f)); + err += TestFilter(src_width, src_height, + dst_width, dst_height, + static_cast(f)); EXPECT_EQ(0, err); } @@ -217,13 +216,12 @@ TEST_F(libyuvTest, ScalePlaneBilinear) { int src_height = 720; int dst_width = 1366; int dst_height = 768; - int err = 0; for (int f = 0; f < 3; ++f) - err += TestFilter (src_width, src_height, - dst_width, dst_height, - static_cast(f)); + err += TestFilter(src_width, src_height, + dst_width, dst_height, + static_cast(f)); EXPECT_EQ(0, err); }