mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 08:46:47 +08:00
Blur use circular buffer of Cumulative Sums instead of full frame for better cache coherency.
BUG=none TEST=none Review URL: https://webrtc-codereview.appspot.com/646008 git-svn-id: http://libyuv.googlecode.com/svn/trunk@283 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
f51e87912e
commit
f38aefef4b
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 282
|
||||
Version: 283
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -11,7 +11,7 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 282
|
||||
#define LIBYUV_VERSION 283
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
|
||||
@ -1709,26 +1709,50 @@ int ARGBBlur(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int32* dst_cumsum, int dst_stride32_cumsum,
|
||||
int width, int height, int radius) {
|
||||
void (*ComputeCumulativeSumRow)(const uint8* row, int32* cumsum,
|
||||
int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C;
|
||||
void (*CumulativeSumToAverage)(const int32* topleft, const int32* botleft,
|
||||
int width, int area, uint8* dst, int count) = CumulativeSumToAverage_C;
|
||||
#if defined(HAS_CUMULATIVESUMTOAVERAGE_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
|
||||
CumulativeSumToAverage = CumulativeSumToAverage_SSE2;
|
||||
}
|
||||
#endif
|
||||
|
||||
ARGBComputeCumulativeSum(src_argb, src_stride_argb,
|
||||
dst_cumsum, dst_stride32_cumsum,
|
||||
width, height);
|
||||
width, radius);
|
||||
|
||||
src_argb = src_argb + radius * src_stride_argb;
|
||||
int32* cumsum_bot_row = &dst_cumsum[(radius - 1) * dst_stride32_cumsum];
|
||||
|
||||
int32* max_cumsum_bot_row =
|
||||
&dst_cumsum[(radius * 2 + 2) * dst_stride32_cumsum];
|
||||
int32* cumsum_top_row = &dst_cumsum[0];
|
||||
|
||||
for (int y = 0; y < height; ++y) {
|
||||
int top_y = ((y - radius - 1) >= 0) ? (y - radius - 1) : 0;
|
||||
int bot_y = ((y + radius) < height) ? (y + radius) : (height - 1);
|
||||
int32* cumsum_top_row = &dst_cumsum[top_y * dst_stride32_cumsum];
|
||||
int32* cumsum_bot_row = &dst_cumsum[bot_y * dst_stride32_cumsum];
|
||||
int area = radius * (bot_y - top_y);
|
||||
|
||||
if (top_y) {
|
||||
cumsum_top_row += dst_stride32_cumsum;
|
||||
if (cumsum_top_row >= max_cumsum_bot_row) {
|
||||
cumsum_top_row = dst_cumsum;
|
||||
}
|
||||
}
|
||||
if ((y + radius) < height) {
|
||||
int32* prev_cumsum_bot_row = cumsum_bot_row;
|
||||
cumsum_bot_row += dst_stride32_cumsum;
|
||||
if (cumsum_bot_row >= max_cumsum_bot_row) {
|
||||
cumsum_bot_row = dst_cumsum;
|
||||
}
|
||||
ComputeCumulativeSumRow(src_argb, cumsum_bot_row, prev_cumsum_bot_row,
|
||||
width);
|
||||
src_argb += src_stride_argb;
|
||||
}
|
||||
|
||||
// Left clipped.
|
||||
int area = radius * (bot_y - top_y);
|
||||
int boxwidth = radius * 4;
|
||||
int x;
|
||||
for (x = 0; x < radius + 1; ++x) {
|
||||
|
||||
@ -2989,8 +2989,8 @@ void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum,
|
||||
"10: \n"
|
||||
"movd (%0),%%xmm2 \n"
|
||||
"lea 0x4(%0),%0 \n"
|
||||
"punpcklbw %%xmm4,%%xmm2 \n"
|
||||
"punpcklwd %%xmm4,%%xmm2 \n"
|
||||
"punpcklbw %%xmm1,%%xmm2 \n"
|
||||
"punpcklwd %%xmm1,%%xmm2 \n"
|
||||
"paddd %%xmm2,%%xmm0 \n"
|
||||
"movdqu (%1,%2,1),%%xmm2 \n"
|
||||
"paddd %%xmm0,%%xmm2 \n"
|
||||
|
||||
@ -3187,8 +3187,8 @@ void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum,
|
||||
l1:
|
||||
movd xmm2, dword ptr [eax] // 1 argb pixel 4 bytes.
|
||||
lea eax, [eax + 4]
|
||||
punpcklbw xmm2, xmm4
|
||||
punpcklwd xmm2, xmm4
|
||||
punpcklbw xmm2, xmm1
|
||||
punpcklwd xmm2, xmm1
|
||||
paddd xmm0, xmm2
|
||||
movdqu xmm2, [edx + esi]
|
||||
paddd xmm2, xmm0
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user