mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
AddRow function to help implement box filter
BUG=none TEST=none Review URL: https://webrtc-codereview.appspot.com/527002 git-svn-id: http://libyuv.googlecode.com/svn/trunk@252 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
5ff3a8fec5
commit
5566302866
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 251
|
||||
Version: 252
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -170,6 +170,11 @@ int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
|
||||
// Get function to add or subtract rows of bytes to a 16 bit buffer. For blur.
|
||||
typedef void (*AddRow)(const uint8* src, uint16* dst, int width);
|
||||
AddRow GetAddRow(uint16* dst, int width);
|
||||
AddRow GetSubRow(uint16* dst, int width);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
|
||||
@ -66,6 +66,14 @@ int ScaleOffset(const uint8* src, int src_width, int src_height,
|
||||
uint8* dst, int dst_width, int dst_height, int dst_yoffset,
|
||||
bool interpolate);
|
||||
|
||||
typedef void (*ARGBBlendRow)(const uint8* src_argb0,
|
||||
const uint8* src_argb1,
|
||||
uint8* dst_argb, int width);
|
||||
|
||||
// Get function to Alpha Blend ARGB pixels and store to destination.
|
||||
ARGBBlendRow GetARGBBlend(uint8* dst_argb, int dst_stride_argb, int width);
|
||||
|
||||
|
||||
// For testing, allow disabling of optimizations.
|
||||
void SetUseReferenceImpl(bool use);
|
||||
|
||||
|
||||
@ -11,7 +11,7 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 251
|
||||
#define LIBYUV_VERSION 252
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
|
||||
@ -944,6 +944,32 @@ int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb,
|
||||
return 0;
|
||||
}
|
||||
|
||||
// AddRow is useful for summing up rows of an image, when implementing a
|
||||
// box filter or blur effect.
|
||||
AddRow GetAddRow(uint16* dst, int width) {
|
||||
AddRow AddRowF = AddRow_C;
|
||||
#if defined(HAS_ADDROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2) &&
|
||||
IS_ALIGNED(dst, 16) && IS_ALIGNED(width, 16)) {
|
||||
AddRowF = AddRow_SSE2;
|
||||
}
|
||||
#endif
|
||||
return AddRowF;
|
||||
}
|
||||
|
||||
// SubRow is useful when a sum of rows exists and the caller wants to
|
||||
// remove a row and add a new row without recomputing the full sum of rows.
|
||||
AddRow GetSubRow(uint16* dst, int width) {
|
||||
AddRow SubRowF = SubRow_C;
|
||||
#if defined(HAS_ADDROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2) &&
|
||||
IS_ALIGNED(dst, 16) && IS_ALIGNED(width, 16)) {
|
||||
SubRowF = SubRow_SSE2;
|
||||
}
|
||||
#endif
|
||||
return SubRowF;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
|
||||
@ -61,6 +61,7 @@ extern "C" {
|
||||
#define HAS_I444TOARGBROW_SSSE3
|
||||
#define HAS_MIRRORROW_SSSE3
|
||||
#define HAS_MIRRORROWUV_SSSE3
|
||||
#define HAS_ADDROW_SSE2
|
||||
#define HAS_RAWTOARGBROW_SSSE3
|
||||
#define HAS_RGB24TOARGBROW_SSSE3
|
||||
#define HAS_RGB565TOARGBROW_SSE2
|
||||
@ -152,6 +153,11 @@ void MirrorRowUV_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v, int width);
|
||||
void MirrorRowUV_NEON(const uint8* src, uint8* dst_u, uint8* dst_v, int width);
|
||||
void MirrorRowUV_C(const uint8* src, uint8* dst_u, uint8* dst_v, int width);
|
||||
|
||||
void AddRow_SSE2(const uint8* src, uint16* dst, int width);
|
||||
void SubRow_SSE2(const uint8* src, uint16* dst, int width);
|
||||
void AddRow_C(const uint8* src, uint16* dst, int width);
|
||||
void SubRow_C(const uint8* src, uint16* dst, int width);
|
||||
|
||||
void SplitUV_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
|
||||
void SplitUV_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
|
||||
void SplitUV_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
|
||||
|
||||
@ -386,6 +386,30 @@ void MirrorRowUV_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
|
||||
}
|
||||
}
|
||||
|
||||
void AddRow_C(const uint8* src, uint16* dst, int width) {
|
||||
for (int x = 0; x < width - 1; x += 2) {
|
||||
dst[0] += static_cast<uint16>(src[0]);
|
||||
dst[1] += static_cast<uint16>(src[1]);
|
||||
src += 2;
|
||||
dst += 2;
|
||||
}
|
||||
if (width & 1) {
|
||||
dst[0] += static_cast<uint16>(src[0]);
|
||||
}
|
||||
}
|
||||
|
||||
void SubRow_C(const uint8* src, uint16* dst, int width) {
|
||||
for (int x = 0; x < width - 1; x += 2) {
|
||||
dst[0] -= static_cast<uint16>(src[0]);
|
||||
dst[1] -= static_cast<uint16>(src[1]);
|
||||
src += 2;
|
||||
dst += 2;
|
||||
}
|
||||
if (width & 1) {
|
||||
dst[0] -= static_cast<uint16>(src[0]);
|
||||
}
|
||||
}
|
||||
|
||||
void SplitUV_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
|
||||
for (int x = 0; x < width - 1; x += 2) {
|
||||
dst_u[x] = src_uv[0];
|
||||
|
||||
@ -1690,6 +1690,68 @@ void MirrorRowUV_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v,
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef HAS_ADDROW_SSE2
|
||||
// dst and width aligned to 16
|
||||
void AddRow_SSE2(const uint8* src, uint16* dst, int width) {
|
||||
asm volatile (
|
||||
"pxor %%xmm4,%%xmm4 \n"
|
||||
"1: \n"
|
||||
"movdqu (%0),%%xmm2 \n"
|
||||
"lea 0x10(%0),%0 \n"
|
||||
"movdqa (%1),%%xmm0 \n"
|
||||
"movdqa 0x10(%1),%%xmm1 \n"
|
||||
"movdqa %%xmm2,%%xmm3 \n"
|
||||
"punpcklbw %%xmm4,%%xmm2 \n"
|
||||
"punpckhbw %%xmm4,%%xmm3 \n"
|
||||
"paddusw %%xmm2,%%xmm0 \n"
|
||||
"paddusw %%xmm3,%%xmm1 \n"
|
||||
"sub $0x10,%2 \n"
|
||||
"movdqa %%xmm0,(%1) \n"
|
||||
"movdqa %%xmm1,0x10(%1) \n"
|
||||
"lea 0x20(%1),%1 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src), // %0
|
||||
"+r"(dst), // %1
|
||||
"+r"(width) // %2
|
||||
:
|
||||
: "memory", "cc"
|
||||
#if defined(__SSE2__)
|
||||
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"
|
||||
#endif
|
||||
);
|
||||
}
|
||||
|
||||
// dst and width aligned to 16
|
||||
void SubRow_SSE2(const uint8* src, uint16* dst, int width) {
|
||||
asm volatile (
|
||||
"pxor %%xmm4,%%xmm4 \n"
|
||||
"1: \n"
|
||||
"movdqu (%0),%%xmm2 \n"
|
||||
"lea 0x10(%0),%0 \n"
|
||||
"movdqa (%1),%%xmm0 \n"
|
||||
"movdqa 0x10(%1),%%xmm1 \n"
|
||||
"movdqa %%xmm2,%%xmm3 \n"
|
||||
"punpcklbw %%xmm4,%%xmm2 \n"
|
||||
"punpckhbw %%xmm4,%%xmm3 \n"
|
||||
"psubusw %%xmm2,%%xmm0 \n"
|
||||
"psubusw %%xmm3,%%xmm1 \n"
|
||||
"sub $0x10,%2 \n"
|
||||
"movdqa %%xmm0,(%1) \n"
|
||||
"movdqa %%xmm1,0x10(%1) \n"
|
||||
"lea 0x20(%1),%1 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src), // %0
|
||||
"+r"(dst), // %1
|
||||
"+r"(width) // %2
|
||||
:
|
||||
: "memory", "cc"
|
||||
#if defined(__SSE2__)
|
||||
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"
|
||||
#endif
|
||||
);
|
||||
}
|
||||
#endif // HAS_ADDROW_SSE2
|
||||
|
||||
#ifdef HAS_SPLITUV_SSE2
|
||||
void SplitUV_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
|
||||
asm volatile (
|
||||
|
||||
@ -1716,6 +1716,65 @@ void MirrorRowUV_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v,
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef HAS_ADDROW_SSE2
|
||||
// dst and width aligned to 16
|
||||
__declspec(naked) __declspec(align(16))
|
||||
void AddRow_SSE2(const uint8* src, uint16* dst, int width) {
|
||||
__asm {
|
||||
mov eax, [esp + 4] // src
|
||||
mov edx, [esp + 8] // dst
|
||||
mov ecx, [esp + 12] // width
|
||||
pxor xmm4, xmm4
|
||||
|
||||
align 16
|
||||
convertloop:
|
||||
movdqu xmm2, [eax] // read 16 bytes
|
||||
lea eax, [eax + 16]
|
||||
movdqa xmm0, [edx] // read first 8 words
|
||||
movdqa xmm1, [edx + 16] // read next 8 words
|
||||
movdqa xmm3, xmm2
|
||||
punpcklbw xmm2, xmm4
|
||||
punpckhbw xmm3, xmm4
|
||||
paddusw xmm0, xmm2 // add 16 words
|
||||
paddusw xmm1, xmm3
|
||||
sub ecx, 16
|
||||
movdqa [edx], xmm0 // store 16 words
|
||||
movdqa [edx + 16], xmm1
|
||||
lea edx, [edx + 32]
|
||||
jg convertloop
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
__declspec(naked) __declspec(align(16))
|
||||
void SubRow_SSE2(const uint8* src, uint16* dst, int width) {
|
||||
__asm {
|
||||
mov eax, [esp + 4] // src
|
||||
mov edx, [esp + 8] // dst
|
||||
mov ecx, [esp + 12] // width
|
||||
pxor xmm4, xmm4
|
||||
|
||||
align 16
|
||||
convertloop:
|
||||
movdqu xmm2, [eax] // read 16 bytes
|
||||
lea eax, [eax + 16]
|
||||
movdqa xmm0, [edx] // read first 8 words
|
||||
movdqa xmm1, [edx + 16] // read next 8 words
|
||||
movdqa xmm3, xmm2
|
||||
punpcklbw xmm2, xmm4
|
||||
punpckhbw xmm3, xmm4
|
||||
psubusw xmm0, xmm2 // sub 16 words
|
||||
psubusw xmm1, xmm3
|
||||
sub ecx, 16
|
||||
movdqa [edx], xmm0 // store 16 words
|
||||
movdqa [edx + 16], xmm1
|
||||
lea edx, [edx + 32]
|
||||
jg convertloop
|
||||
ret
|
||||
}
|
||||
}
|
||||
#endif // HAS_ADDROW_SSE2
|
||||
|
||||
#ifdef HAS_SPLITUV_SSE2
|
||||
__declspec(naked) __declspec(align(16))
|
||||
void SplitUV_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
|
||||
|
||||
@ -193,4 +193,31 @@ TEST_F(libyuvTest, TestAttenuate) {
|
||||
EXPECT_EQ(85, atten_pixels[255][2]);
|
||||
EXPECT_EQ(255, atten_pixels[255][3]);
|
||||
}
|
||||
|
||||
TEST_F(libyuvTest, TestAddRow) {
|
||||
SIMD_ALIGNED(uint8 orig_pixels[256]);
|
||||
SIMD_ALIGNED(uint16 added_pixels[256]);
|
||||
|
||||
libyuv::AddRow AddRow = GetAddRow(added_pixels, 256);
|
||||
libyuv::AddRow SubRow = GetSubRow(added_pixels, 256);
|
||||
|
||||
for (int i = 0; i < 256; ++i) {
|
||||
orig_pixels[i] = i;
|
||||
}
|
||||
memset(added_pixels, 0, sizeof(uint16) * 256);
|
||||
|
||||
AddRow(orig_pixels, added_pixels, 256);
|
||||
EXPECT_EQ(7u, added_pixels[7]);
|
||||
EXPECT_EQ(250u, added_pixels[250]);
|
||||
AddRow(orig_pixels, added_pixels, 256);
|
||||
EXPECT_EQ(14u, added_pixels[7]);
|
||||
EXPECT_EQ(500u, added_pixels[250]);
|
||||
SubRow(orig_pixels, added_pixels, 256);
|
||||
EXPECT_EQ(7u, added_pixels[7]);
|
||||
EXPECT_EQ(250u, added_pixels[250]);
|
||||
|
||||
for (int i = 0; i < 1000 * (1280 * 720 * 4 / 256); ++i) {
|
||||
AddRow(orig_pixels, added_pixels, 256);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user