From 2657688e701709a5af935e6ea27f4f8967208f2d Mon Sep 17 00:00:00 2001 From: Frank Barchard Date: Mon, 7 Dec 2015 12:03:20 -0800 Subject: [PATCH] Add support for odd height YUVA alpha blending. R=dhrosa@google.com, harryjin@google.com BUG=libyuv:527 Review URL: https://codereview.chromium.org/1507683003 . --- README.chromium | 2 +- include/libyuv/version.h | 2 +- source/planar_functions.cc | 30 ++++++++++++++++-------------- unit_test/planar_test.cc | 17 ++++++++++++++++- 4 files changed, 34 insertions(+), 17 deletions(-) diff --git a/README.chromium b/README.chromium index b0bc90214..a18603599 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1548 +Version: 1549 License: BSD License File: LICENSE diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 2c47a4c9f..649c7e241 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1548 +#define LIBYUV_VERSION 1549 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/planar_functions.cc b/source/planar_functions.cc index b7de63b4a..459df6fe0 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -651,6 +651,8 @@ int I420Blend(const uint8* src_y0, int src_stride_y0, uint8* dst_v, int dst_stride_v, int width, int height) { int y; + // Half width/height for UV. + int halfwidth = (width + 1) >> 1; void (*BlendPlaneRow)(const uint8* src0, const uint8* src1, const uint8* alpha, uint8* dst, int width) = BlendPlaneRow_C; void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride, @@ -674,15 +676,11 @@ int I420Blend(const uint8* src_y0, int src_stride_y0, dst_y, dst_stride_y, width, height); - // Half width/height for UV. - width = (width + 1) >> 1; - height = (height + 1) >> 1; - #if defined(HAS_BLENDPLANEROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { // TODO(fbarchard): Implement any versions for odd width. // BlendPlaneRow = BlendPlaneRow_Any_SSSE3; - if (IS_ALIGNED(width, 8)) { + if (IS_ALIGNED(halfwidth, 8)) { BlendPlaneRow = BlendPlaneRow_SSSE3; } } @@ -690,7 +688,7 @@ int I420Blend(const uint8* src_y0, int src_stride_y0, #if defined(HAS_BLENDPLANEROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { // BlendPlaneRow = BlendPlaneRow_Any_AVX2; - if (IS_ALIGNED(width, 16)) { + if (IS_ALIGNED(halfwidth, 16)) { BlendPlaneRow = BlendPlaneRow_AVX2; } } @@ -698,7 +696,7 @@ int I420Blend(const uint8* src_y0, int src_stride_y0, #if defined(HAS_SCALEROWDOWN2_NEON) if (TestCpuFlag(kCpuHasNEON)) { ScaleRowDown2 = ScaleRowDown2Box_Any_NEON; - if (IS_ALIGNED(width, 16)) { + if (IS_ALIGNED(halfwidth, 16)) { ScaleRowDown2 = ScaleRowDown2Box_NEON; } } @@ -706,7 +704,7 @@ int I420Blend(const uint8* src_y0, int src_stride_y0, #if defined(HAS_SCALEROWDOWN2_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { ScaleRowDown2 = ScaleRowDown2Box_Any_SSE2; - if (IS_ALIGNED(width, 16)) { + if (IS_ALIGNED(halfwidth, 16)) { ScaleRowDown2 = ScaleRowDown2Box_SSE2; } } @@ -714,20 +712,24 @@ int I420Blend(const uint8* src_y0, int src_stride_y0, #if defined(HAS_SCALEROWDOWN2_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ScaleRowDown2 = ScaleRowDown2Box_Any_AVX2; - if (IS_ALIGNED(width, 32)) { + if (IS_ALIGNED(halfwidth, 32)) { ScaleRowDown2 = ScaleRowDown2Box_AVX2; } } #endif // Row buffer for intermediate alpha pixels. - align_buffer_64(halfalpha, width); - for (y = 0; y < height; ++y) { + align_buffer_64(halfalpha, halfwidth); + for (y = 0; y < height; y += 2) { + // last row of odd height image use 1 row of alpha instead of 2. + if (y == (height - 1)) { + alpha_stride = 0; + } // Subsample 2 rows of UV to half width and half height. - ScaleRowDown2(alpha, alpha_stride, halfalpha, width); + ScaleRowDown2(alpha, alpha_stride, halfalpha, halfwidth); alpha += alpha_stride * 2; - BlendPlaneRow(src_u0, src_u1, halfalpha, dst_u, width); - BlendPlaneRow(src_v0, src_v1, halfalpha, dst_v, width); + BlendPlaneRow(src_u0, src_u1, halfalpha, dst_u, halfwidth); + BlendPlaneRow(src_v0, src_v1, halfalpha, dst_v, halfwidth); src_u0 += src_stride_u0; src_u1 += src_stride_u1; dst_u += dst_stride_u; diff --git a/unit_test/planar_test.cc b/unit_test/planar_test.cc index f5a8b2129..271e0cc5e 100644 --- a/unit_test/planar_test.cc +++ b/unit_test/planar_test.cc @@ -1164,7 +1164,6 @@ TEST_F(LibYUVPlanarTest, ARGBBlend_Opt) { } #ifdef HAS_BLENDPLANEROW_AVX2 -// TODO(fbarchard): Switch to I420Blend. static void TestBlendPlaneRow(int width, int height, int benchmark_iterations, int invert, int off) { int has_ssse3 = TestCpuFlag(kCpuHasSSSE3); @@ -1348,6 +1347,14 @@ TEST_F(LibYUVPlanarTest, BlendPlane_Unaligned) { TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 1); } +TEST_F(LibYUVPlanarTest, BlendPlane_Any) { + TestBlendPlane(benchmark_width_ - 4, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 1); +} +TEST_F(LibYUVPlanarTest, BlendPlane_Invert) { + TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, -1, 1); +} #define SUBSAMPLE(v, a) ((((v) + (a) - 1)) / (a)) @@ -1442,6 +1449,14 @@ TEST_F(LibYUVPlanarTest, I420Blend_Unaligned) { TestI420Blend(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 1); } +TEST_F(LibYUVPlanarTest, I420Blend_Any) { + TestI420Blend(benchmark_width_ - 4, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 0); +} +TEST_F(LibYUVPlanarTest, I420Blend_Invert) { + TestI420Blend(benchmark_width_, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, -1, 0); +} TEST_F(LibYUVPlanarTest, TestAffine) { SIMD_ALIGNED(uint8 orig_pixels_0[1280][4]);