From a2ea90567998b1ab93ce7fe3acc25922862e4c9c Mon Sep 17 00:00:00 2001 From: Frank Barchard Date: Tue, 8 Dec 2015 18:59:48 -0800 Subject: [PATCH] BlendPlane any width. Benchmark out\release\libyuv_unittest --libyuv_width=1279 --libyuv_height=719 --libyuv_repeat=999 --libyuv_flags=-1 --gtest_filter=*Blend* | sortms Was I420Blend_Any (2321 ms) I420Blend_Unaligned (1684 ms) I420Blend_Opt (1675 ms) I420Blend_Invert (1653 ms) BlendPlane_Invert (1556 ms) BlendPlane_Any (1552 ms) BlendPlane_Unaligned (1548 ms) BlendPlane_Opt (1535 ms) ARGBBlend_Unaligned (659 ms) ARGBBlend_Any (596 ms) ARGBBlend_Invert (591 ms) ARGBBlend_Opt (508 ms) BlendPlaneRow_Unaligned (186 ms) BlendPlaneRow_Opt (171 ms) Now ARGBBlend_Any (621 ms) ARGBBlend_Unaligned (585 ms) ARGBBlend_Invert (564 ms) ARGBBlend_Opt (512 ms) I420Blend_Unaligned (347 ms) I420Blend_Invert (345 ms) I420Blend_Any (337 ms) I420Blend_Opt (327 ms) BlendPlane_Unaligned (187 ms) BlendPlaneRow_Unaligned (187 ms) BlendPlane_Invert (186 ms) BlendPlane_Any (186 ms) BlendPlaneRow_Opt (173 ms) BlendPlane_Opt (171 ms) which is comparable to aligned case out\release\libyuv_unittest --libyuv_width=1280 --libyuv_height=720 --libyuv_repeat=999 --libyuv_flags=-1 --gtest_filter=*Blend* | sortms ARGBBlend_Any (625 ms) ARGBBlend_Unaligned (602 ms) ARGBBlend_Invert (508 ms) ARGBBlend_Opt (506 ms) I420Blend_Any (353 ms) I420Blend_Unaligned (322 ms) I420Blend_Invert (304 ms) I420Blend_Opt (301 ms) BlendPlaneRow_Unaligned (188 ms) BlendPlane_Unaligned (186 ms) BlendPlane_Invert (185 ms) BlendPlane_Any (184 ms) BlendPlaneRow_Opt (173 ms) BlendPlane_Opt (169 ms) R=dhrosa@google.com, harryjin@google.com BUG=libyuv:527 Review URL: https://codereview.chromium.org/1513443002 . --- README.chromium | 2 +- include/libyuv/version.h | 2 +- source/planar_functions.cc | 10 ++++------ source/row_any.cc | 6 ++++++ 4 files changed, 12 insertions(+), 8 deletions(-) diff --git a/README.chromium b/README.chromium index 811ec9b52..e76949316 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1550 +Version: 1551 License: BSD License File: LICENSE diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 57e0c0a05..93cbb7868 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1550 +#define LIBYUV_VERSION 1551 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/planar_functions.cc b/source/planar_functions.cc index c73fa8ab4..2731bd0da 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -610,8 +610,7 @@ int BlendPlane(const uint8* src_y0, int src_stride_y0, #if defined(HAS_BLENDPLANEROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { -// TODO(fbarchard): Implement any versions for odd width. -// BlendPlaneRow = BlendPlaneRow_Any_SSSE3; + BlendPlaneRow = BlendPlaneRow_Any_SSSE3; if (IS_ALIGNED(width, 8)) { BlendPlaneRow = BlendPlaneRow_SSSE3; } @@ -619,7 +618,7 @@ int BlendPlane(const uint8* src_y0, int src_stride_y0, #endif #if defined(HAS_BLENDPLANEROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { -// BlendPlaneRow = BlendPlaneRow_Any_AVX2; + BlendPlaneRow = BlendPlaneRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { BlendPlaneRow = BlendPlaneRow_AVX2; } @@ -678,8 +677,7 @@ int I420Blend(const uint8* src_y0, int src_stride_y0, #if defined(HAS_BLENDPLANEROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { -// TODO(fbarchard): Implement any versions for odd width. -// BlendPlaneRow = BlendPlaneRow_Any_SSSE3; + BlendPlaneRow = BlendPlaneRow_Any_SSSE3; if (IS_ALIGNED(halfwidth, 8)) { BlendPlaneRow = BlendPlaneRow_SSSE3; } @@ -687,7 +685,7 @@ int I420Blend(const uint8* src_y0, int src_stride_y0, #endif #if defined(HAS_BLENDPLANEROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { -// BlendPlaneRow = BlendPlaneRow_Any_AVX2; + BlendPlaneRow = BlendPlaneRow_Any_AVX2; if (IS_ALIGNED(halfwidth, 32)) { BlendPlaneRow = BlendPlaneRow_AVX2; } diff --git a/source/row_any.cc b/source/row_any.cc index 511ca1cc8..fef7ecd9a 100644 --- a/source/row_any.cc +++ b/source/row_any.cc @@ -83,6 +83,12 @@ ANY31(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, 1, 1, 4, 15) #ifdef HAS_I422TOUYVYROW_NEON ANY31(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, 1, 1, 4, 15) #endif +#ifdef HAS_BLENDPLANEROW_AVX2 +ANY31(BlendPlaneRow_Any_AVX2, BlendPlaneRow_AVX2, 0, 0, 1, 31) +#endif +#ifdef HAS_BLENDPLANEROW_SSSE3 +ANY31(BlendPlaneRow_Any_SSSE3, BlendPlaneRow_SSSE3, 0, 0, 1, 7) +#endif #undef ANY31 // Note that odd width replication includes 444 due to implementation