diff --git a/README.chromium b/README.chromium index 440dac6a1..7858de66b 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 282 +Version: 283 License: BSD License File: LICENSE diff --git a/include/libyuv/version.h b/include/libyuv/version.h index cba66fc71..b5043f59e 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,7 +11,7 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 282 +#define LIBYUV_VERSION 283 #endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/source/planar_functions.cc b/source/planar_functions.cc index 6383552eb..bdd1dfed4 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -1709,26 +1709,50 @@ int ARGBBlur(const uint8* src_argb, int src_stride_argb, uint8* dst_argb, int dst_stride_argb, int32* dst_cumsum, int dst_stride32_cumsum, int width, int height, int radius) { + void (*ComputeCumulativeSumRow)(const uint8* row, int32* cumsum, + int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C; void (*CumulativeSumToAverage)(const int32* topleft, const int32* botleft, int width, int area, uint8* dst, int count) = CumulativeSumToAverage_C; #if defined(HAS_CUMULATIVESUMTOAVERAGE_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { + ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2; CumulativeSumToAverage = CumulativeSumToAverage_SSE2; } #endif - ARGBComputeCumulativeSum(src_argb, src_stride_argb, dst_cumsum, dst_stride32_cumsum, - width, height); + width, radius); + + src_argb = src_argb + radius * src_stride_argb; + int32* cumsum_bot_row = &dst_cumsum[(radius - 1) * dst_stride32_cumsum]; + + int32* max_cumsum_bot_row = + &dst_cumsum[(radius * 2 + 2) * dst_stride32_cumsum]; + int32* cumsum_top_row = &dst_cumsum[0]; for (int y = 0; y < height; ++y) { int top_y = ((y - radius - 1) >= 0) ? (y - radius - 1) : 0; int bot_y = ((y + radius) < height) ? (y + radius) : (height - 1); - int32* cumsum_top_row = &dst_cumsum[top_y * dst_stride32_cumsum]; - int32* cumsum_bot_row = &dst_cumsum[bot_y * dst_stride32_cumsum]; + int area = radius * (bot_y - top_y); + + if (top_y) { + cumsum_top_row += dst_stride32_cumsum; + if (cumsum_top_row >= max_cumsum_bot_row) { + cumsum_top_row = dst_cumsum; + } + } + if ((y + radius) < height) { + int32* prev_cumsum_bot_row = cumsum_bot_row; + cumsum_bot_row += dst_stride32_cumsum; + if (cumsum_bot_row >= max_cumsum_bot_row) { + cumsum_bot_row = dst_cumsum; + } + ComputeCumulativeSumRow(src_argb, cumsum_bot_row, prev_cumsum_bot_row, + width); + src_argb += src_stride_argb; + } // Left clipped. - int area = radius * (bot_y - top_y); int boxwidth = radius * 4; int x; for (x = 0; x < radius + 1; ++x) { diff --git a/source/row_posix.cc b/source/row_posix.cc index a1d499abc..0fdb0923b 100644 --- a/source/row_posix.cc +++ b/source/row_posix.cc @@ -2989,8 +2989,8 @@ void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum, "10: \n" "movd (%0),%%xmm2 \n" "lea 0x4(%0),%0 \n" - "punpcklbw %%xmm4,%%xmm2 \n" - "punpcklwd %%xmm4,%%xmm2 \n" + "punpcklbw %%xmm1,%%xmm2 \n" + "punpcklwd %%xmm1,%%xmm2 \n" "paddd %%xmm2,%%xmm0 \n" "movdqu (%1,%2,1),%%xmm2 \n" "paddd %%xmm0,%%xmm2 \n" diff --git a/source/row_win.cc b/source/row_win.cc index 2fd6f5c02..6d8bcecca 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -3187,8 +3187,8 @@ void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum, l1: movd xmm2, dword ptr [eax] // 1 argb pixel 4 bytes. lea eax, [eax + 4] - punpcklbw xmm2, xmm4 - punpcklwd xmm2, xmm4 + punpcklbw xmm2, xmm1 + punpcklwd xmm2, xmm1 paddd xmm0, xmm2 movdqu xmm2, [edx + esi] paddd xmm2, xmm0