From 0d95d4775792c26117d803adbab0bef1cee18052 Mon Sep 17 00:00:00 2001 From: "fbarchard@google.com" Date: Thu, 12 Jul 2012 03:28:09 +0000 Subject: [PATCH] ARGBInterpolate for blending 2 images with a fractional amount of each. BUG=none TEST=none Review URL: https://webrtc-codereview.appspot.com/670008 git-svn-id: http://libyuv.googlecode.com/svn/trunk@300 16f28f9a-4ce2-e073-06de-1de4eb20be90 --- README.chromium | 2 +- include/libyuv/planar_functions.h | 10 +++++ include/libyuv/version.h | 2 +- source/planar_functions.cc | 59 +++++++++++++++++++++++- source/scale_argb.cc | 29 ++++++------ unit_test/planar_test.cc | 74 ++++++++++++++++++++++++++++++- 6 files changed, 156 insertions(+), 20 deletions(-) diff --git a/README.chromium b/README.chromium index 57582730e..fbb55b20f 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 299 +Version: 300 License: BSD License File: LICENSE diff --git a/include/libyuv/planar_functions.h b/include/libyuv/planar_functions.h index fe40f3a7f..7ee27c191 100644 --- a/include/libyuv/planar_functions.h +++ b/include/libyuv/planar_functions.h @@ -212,6 +212,16 @@ int ARGBShade(const uint8* src_argb, int src_stride_argb, uint8* dst_argb, int dst_stride_argb, int width, int height, uint32 value); +// Interpolate between two ARGB images using specified amount of interpolation +// (0 to 255) and store to destination. +// 'interpolation' is specified as 8 bit fraction where 0 means 100% src_argb0 +// and 255 means 1% src_argb0 and 99% src_argb1. +// Internally uses ARGBScale bilinear filtering. +int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0, + const uint8* src_argb1, int src_stride_argb1, + uint8* dst_argb, int dst_stride_argb, + int width, int height, int interpolation); + #ifdef __cplusplus } // extern "C" } // namespace libyuv diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 8aabbb247..e47fa36a9 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 299 +#define LIBYUV_VERSION 300 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/planar_functions.cc b/source/planar_functions.cc index 17e191231..0daa4d14b 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -1082,7 +1082,7 @@ int ARGBBlur(const uint8* src_argb, int src_stride_argb, return 0; } -// Multiply ARGB image by ARGB value. +// Multiply ARGB image by a specified ARGB value. int ARGBShade(const uint8* src_argb, int src_stride_argb, uint8* dst_argb, int dst_stride_argb, int width, int height, uint32 value) { @@ -1112,6 +1112,63 @@ int ARGBShade(const uint8* src_argb, int src_stride_argb, return 0; } +#if !defined(YUV_DISABLE_ASM) && (defined(_M_IX86) || \ + (defined(__x86_64__) || defined(__i386__))) +#define HAS_SCALEARGBFILTERROWS_SSE2 +#define HAS_SCALEARGBFILTERROWS_SSSE3 +#endif +void ScaleARGBFilterRows_C(uint8* dst_ptr, const uint8* src_ptr, int src_stride, + int dst_width, int source_y_fraction); +void ScaleARGBFilterRows_SSE2(uint8* dst_ptr, const uint8* src_ptr, + int src_stride, int dst_width, + int source_y_fraction); +void ScaleARGBFilterRows_SSSE3(uint8* dst_ptr, const uint8* src_ptr, + int src_stride, int dst_width, + int source_y_fraction); + +// Interpolate 2 ARGB images by specified amount (0 to 255). +int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0, + const uint8* src_argb1, int src_stride_argb1, + uint8* dst_argb, int dst_stride_argb, + int width, int height, int interpolation) { + if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; + dst_stride_argb = -dst_stride_argb; + } + void (*ScaleARGBFilterRows)(uint8* dst_ptr, const uint8* src_ptr, + int src_stride, int dst_width, + int source_y_fraction) = ScaleARGBFilterRows_C; +#if defined(HAS_SCALEARGBFILTERROWS_SSE2) + if (TestCpuFlag(kCpuHasSSE2) && + IS_ALIGNED(src_argb0, 16) && IS_ALIGNED(src_stride_argb0, 16) && + IS_ALIGNED(src_argb1, 16) && IS_ALIGNED(src_stride_argb1, 16) && + IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { + ScaleARGBFilterRows = ScaleARGBFilterRows_SSE2; + } +#endif +#if defined(HAS_SCALEARGBFILTERROWS_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3) && + IS_ALIGNED(src_argb0, 16) && IS_ALIGNED(src_stride_argb0, 16) && + IS_ALIGNED(src_argb1, 16) && IS_ALIGNED(src_stride_argb1, 16) && + IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { + ScaleARGBFilterRows = ScaleARGBFilterRows_SSSE3; + } +#endif + for (int y = 0; y < height; ++y) { + ScaleARGBFilterRows(dst_argb, src_argb0, src_argb1 - src_argb0, + width, interpolation); + src_argb0 += src_stride_argb0; + src_argb1 += src_stride_argb1; + dst_argb += dst_stride_argb; + } + return 0; +} + #ifdef __cplusplus } // extern "C" } // namespace libyuv diff --git a/source/scale_argb.cc b/source/scale_argb.cc index 616ab9c1f..7931c71e9 100644 --- a/source/scale_argb.cc +++ b/source/scale_argb.cc @@ -184,9 +184,9 @@ static void ScaleARGBRowDownEvenInt_SSE2(const uint8* src_ptr, int src_stride, // Bilinear row filtering combines 4x2 -> 4x1. SSE2 version. #define HAS_SCALEARGBFILTERROWS_SSE2 __declspec(naked) __declspec(align(16)) -static void ScaleARGBFilterRows_SSE2(uint8* dst_ptr, const uint8* src_ptr, - int src_stride, int dst_width, - int source_y_fraction) { +void ScaleARGBFilterRows_SSE2(uint8* dst_ptr, const uint8* src_ptr, + int src_stride, int dst_width, + int source_y_fraction) { __asm { push esi push edi @@ -271,9 +271,9 @@ static void ScaleARGBFilterRows_SSE2(uint8* dst_ptr, const uint8* src_ptr, // Bilinear row filtering combines 4x2 -> 4x1. SSSE3 version. #define HAS_SCALEARGBFILTERROWS_SSSE3 __declspec(naked) __declspec(align(16)) -static void ScaleARGBFilterRows_SSSE3(uint8* dst_ptr, const uint8* src_ptr, - int src_stride, int dst_width, - int source_y_fraction) { +void ScaleARGBFilterRows_SSSE3(uint8* dst_ptr, const uint8* src_ptr, + int src_stride, int dst_width, + int source_y_fraction) { __asm { push esi push edi @@ -499,9 +499,9 @@ static void ScaleARGBRowDownEvenInt_SSE2(const uint8* src_ptr, int src_stride, // Bilinear row filtering combines 4x2 -> 4x1. SSE2 version #define HAS_SCALEARGBFILTERROWS_SSE2 -static void ScaleARGBFilterRows_SSE2(uint8* dst_ptr, - const uint8* src_ptr, int src_stride, - int dst_width, int source_y_fraction) { +void ScaleARGBFilterRows_SSE2(uint8* dst_ptr, const uint8* src_ptr, + int src_stride, int dst_width, + int source_y_fraction) { asm volatile ( "sub %1,%0 \n" "cmp $0x0,%3 \n" @@ -570,9 +570,9 @@ static void ScaleARGBFilterRows_SSE2(uint8* dst_ptr, // Bilinear row filtering combines 4x2 -> 4x1. SSSE3 version #define HAS_SCALEARGBFILTERROWS_SSSE3 -static void ScaleARGBFilterRows_SSSE3(uint8* dst_ptr, - const uint8* src_ptr, int src_stride, - int dst_width, int source_y_fraction) { +void ScaleARGBFilterRows_SSSE3(uint8* dst_ptr, const uint8* src_ptr, + int src_stride, int dst_width, + int source_y_fraction) { asm volatile ( "sub %1,%0 \n" "shr %3 \n" @@ -743,9 +743,8 @@ static void ScaleARGBFilterCols_C(uint8* dst_ptr, const uint8* src_ptr, static const int kMaxInputWidth = 2560; // C version 2x2 -> 2x1 -static void ScaleARGBFilterRows_C(uint8* dst_ptr, - const uint8* src_ptr, int src_stride, - int dst_width, int source_y_fraction) { +void ScaleARGBFilterRows_C(uint8* dst_ptr, const uint8* src_ptr, int src_stride, + int dst_width, int source_y_fraction) { assert(dst_width > 0); int y1_fraction = source_y_fraction; int y0_fraction = 256 - y1_fraction; diff --git a/unit_test/planar_test.cc b/unit_test/planar_test.cc index 384c3e0be..03e63d4f5 100644 --- a/unit_test/planar_test.cc +++ b/unit_test/planar_test.cc @@ -652,12 +652,10 @@ TEST_F(libyuvTest, TestShade) { SIMD_ALIGNED(uint8 orig_pixels[256][4]); SIMD_ALIGNED(uint8 shade_pixels[256][4]); - // Test unattenuation clamps orig_pixels[0][0] = 10u; orig_pixels[0][1] = 20u; orig_pixels[0][2] = 40u; orig_pixels[0][3] = 80u; - // Test unattenuation transparent and opaque are unaffected orig_pixels[1][0] = 0u; orig_pixels[1][1] = 0u; orig_pixels[1][2] = 0u; @@ -700,4 +698,76 @@ TEST_F(libyuvTest, TestShade) { } } +TEST_F(libyuvTest, TestInterpolate) { + SIMD_ALIGNED(uint8 orig_pixels_0[256][4]); + SIMD_ALIGNED(uint8 orig_pixels_1[256][4]); + SIMD_ALIGNED(uint8 interpolate_pixels[256][4]); + + orig_pixels_0[0][0] = 10u; + orig_pixels_0[0][1] = 20u; + orig_pixels_0[0][2] = 40u; + orig_pixels_0[0][3] = 80u; + orig_pixels_0[1][0] = 0u; + orig_pixels_0[1][1] = 0u; + orig_pixels_0[1][2] = 0u; + orig_pixels_0[1][3] = 255u; + orig_pixels_0[2][0] = 0u; + orig_pixels_0[2][1] = 0u; + orig_pixels_0[2][2] = 0u; + orig_pixels_0[2][3] = 0u; + orig_pixels_0[3][0] = 0u; + orig_pixels_0[3][1] = 0u; + orig_pixels_0[3][2] = 0u; + orig_pixels_0[3][3] = 0u; + + orig_pixels_1[0][0] = 0u; + orig_pixels_1[0][1] = 0u; + orig_pixels_1[0][2] = 0u; + orig_pixels_1[0][3] = 0u; + orig_pixels_1[1][0] = 0u; + orig_pixels_1[1][1] = 0u; + orig_pixels_1[1][2] = 0u; + orig_pixels_1[1][3] = 0u; + orig_pixels_1[2][0] = 0u; + orig_pixels_1[2][1] = 0u; + orig_pixels_1[2][2] = 0u; + orig_pixels_1[2][3] = 0u; + orig_pixels_1[3][0] = 255u; + orig_pixels_1[3][1] = 255u; + orig_pixels_1[3][2] = 255u; + orig_pixels_1[3][3] = 255u; + + ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0, + &interpolate_pixels[0][0], 0, 4, 1, 128); + EXPECT_EQ(5u, interpolate_pixels[0][0]); + EXPECT_EQ(10u, interpolate_pixels[0][1]); + EXPECT_EQ(20u, interpolate_pixels[0][2]); + EXPECT_EQ(40u, interpolate_pixels[0][3]); + EXPECT_EQ(0u, interpolate_pixels[1][0]); + EXPECT_EQ(0u, interpolate_pixels[1][1]); + EXPECT_EQ(0u, interpolate_pixels[1][2]); + EXPECT_NEAR(128u, interpolate_pixels[1][3], 1); // C = 127, SSE = 128. + EXPECT_EQ(0u, interpolate_pixels[2][0]); + EXPECT_EQ(0u, interpolate_pixels[2][1]); + EXPECT_EQ(0u, interpolate_pixels[2][2]); + EXPECT_EQ(0u, interpolate_pixels[2][3]); + EXPECT_NEAR(128u, interpolate_pixels[3][0], 1); + EXPECT_NEAR(128u, interpolate_pixels[3][1], 1); + EXPECT_NEAR(128u, interpolate_pixels[3][2], 1); + EXPECT_NEAR(128u, interpolate_pixels[3][3], 1); + + ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0, + &interpolate_pixels[0][0], 0, 4, 1, 0); + + EXPECT_EQ(10u, interpolate_pixels[0][0]); + EXPECT_EQ(20u, interpolate_pixels[0][1]); + EXPECT_EQ(40u, interpolate_pixels[0][2]); + EXPECT_EQ(80u, interpolate_pixels[0][3]); + + for (int i = 0; i < 1000 * 1280 * 720 / 256; ++i) { + ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0, + &interpolate_pixels[0][0], 0, 256, 1, 128); + } +} + } // namespace libyuv