mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-08 01:36:47 +08:00
Sobel port to posix. Improved unittest for C version.
BUG=201 TESTED=try bots Review URL: https://webrtc-codereview.appspot.com/1242004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@619 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
735f9921e9
commit
9d48df9ac4
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 618
|
||||
Version: 619
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -118,6 +118,10 @@ extern "C" {
|
||||
#define HAS_ARGBUNATTENUATEROW_SSE2
|
||||
#define HAS_COMPUTECUMULATIVESUMROW_SSE2
|
||||
#define HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
|
||||
#define HAS_SOBELROW_SSE2
|
||||
#define HAS_SOBELXROW_SSSE3
|
||||
#define HAS_SOBELXYROW_SSE2
|
||||
#define HAS_SOBELYROW_SSSE3
|
||||
#endif
|
||||
|
||||
// The following are Windows only.
|
||||
@ -125,10 +129,6 @@ extern "C" {
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
|
||||
#define HAS_ARGBCOLORTABLEROW_X86
|
||||
#define HAS_COPYROW_AVX2
|
||||
#define HAS_SOBELXROW_SSSE3
|
||||
#define HAS_SOBELYROW_SSSE3
|
||||
#define HAS_SOBELROW_SSE2
|
||||
#define HAS_SOBELXYROW_SSE2
|
||||
// Visual C 2012 required for AVX2.
|
||||
#if _MSC_VER >= 1700
|
||||
// TODO(fbarchard): Hook these up to all functions. e.g. format conversion.
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 618
|
||||
#define LIBYUV_VERSION 619
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||
|
||||
@ -1970,7 +1970,6 @@ int ARGBSobelXY(const uint8* src_argb, int src_stride_argb,
|
||||
SobelXRow = SobelXRow_SSSE3;
|
||||
}
|
||||
#endif
|
||||
|
||||
void (*SobelXYRow)(const uint8* src_sobelx, const uint8* src_sobely,
|
||||
uint8* dst_argb, int width) = SobelXYRow_C;
|
||||
#if defined(HAS_SOBELXYROW_SSE2)
|
||||
|
||||
@ -4067,6 +4067,217 @@ void ARGBSubtractRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
|
||||
}
|
||||
#endif // HAS_ARGBSUBTRACTROW_SSE2
|
||||
|
||||
#ifdef HAS_SOBELXROW_SSSE3
|
||||
// SobelX as a matrix is
|
||||
// -1 0 1
|
||||
// -2 0 2
|
||||
// -1 0 1
|
||||
void SobelXRow_SSSE3(const uint8* src_y0, const uint8* src_y1,
|
||||
const uint8* src_y2, uint8* dst_sobelx, int width) {
|
||||
asm volatile (
|
||||
"sub %0,%1 \n"
|
||||
"sub %0,%2 \n"
|
||||
"sub %0,%3 \n"
|
||||
"pxor %%xmm5,%%xmm5 \n"
|
||||
|
||||
// 8 pixel loop.
|
||||
".p2align 4 \n"
|
||||
"1: \n"
|
||||
"movq (%0),%%xmm0 \n"
|
||||
"movq 0x2(%0),%%xmm1 \n"
|
||||
"punpcklbw %%xmm5,%%xmm0 \n"
|
||||
"punpcklbw %%xmm5,%%xmm1 \n"
|
||||
"psubw %%xmm1,%%xmm0 \n"
|
||||
"movq (%0,%1,1),%%xmm1 \n"
|
||||
"movq 0x2(%0,%1,1),%%xmm2 \n"
|
||||
"punpcklbw %%xmm5,%%xmm1 \n"
|
||||
"punpcklbw %%xmm5,%%xmm2 \n"
|
||||
"psubw %%xmm2,%%xmm1 \n"
|
||||
"movq (%0,%2,1),%%xmm2 \n"
|
||||
"movq 0x2(%0,%2,1),%%xmm3 \n"
|
||||
"punpcklbw %%xmm5,%%xmm2 \n"
|
||||
"punpcklbw %%xmm5,%%xmm3 \n"
|
||||
"psubw %%xmm3,%%xmm2 \n"
|
||||
"paddw %%xmm2,%%xmm0 \n"
|
||||
"paddw %%xmm1,%%xmm0 \n"
|
||||
"paddw %%xmm1,%%xmm0 \n"
|
||||
"pabsw %%xmm0,%%xmm0 \n"
|
||||
"packuswb %%xmm0,%%xmm0 \n"
|
||||
"sub $0x8,%4 \n"
|
||||
"movq %%xmm0,(%0,%3,1) \n"
|
||||
"lea 0x8(%0),%0 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src_y0), // %0
|
||||
"+r"(src_y1), // %1
|
||||
"+r"(src_y2), // %2
|
||||
"+r"(dst_sobelx), // %3
|
||||
"+r"(width) // %4
|
||||
:
|
||||
: "memory", "cc"
|
||||
#if defined(__SSE2__)
|
||||
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
|
||||
#endif
|
||||
);
|
||||
}
|
||||
#endif // HAS_SOBELXROW_SSSE3
|
||||
|
||||
#ifdef HAS_SOBELYROW_SSSE3
|
||||
// SobelY as a matrix is
|
||||
// -1 -2 -1
|
||||
// 0 0 0
|
||||
// 1 2 1
|
||||
void SobelYRow_SSSE3(const uint8* src_y0, const uint8* src_y1,
|
||||
uint8* dst_sobely, int width) {
|
||||
asm volatile (
|
||||
"sub %0,%1 \n"
|
||||
"sub %0,%2 \n"
|
||||
"pxor %%xmm5,%%xmm5 \n"
|
||||
|
||||
// 8 pixel loop.
|
||||
".p2align 4 \n"
|
||||
"1: \n"
|
||||
"movq (%0),%%xmm0 \n"
|
||||
"movq (%0,%1,1),%%xmm1 \n"
|
||||
"punpcklbw %%xmm5,%%xmm0 \n"
|
||||
"punpcklbw %%xmm5,%%xmm1 \n"
|
||||
"psubw %%xmm1,%%xmm0 \n"
|
||||
"movq 0x1(%0),%%xmm1 \n"
|
||||
"movq 0x1(%0,%1,1),%%xmm2 \n"
|
||||
"punpcklbw %%xmm5,%%xmm1 \n"
|
||||
"punpcklbw %%xmm5,%%xmm2 \n"
|
||||
"psubw %%xmm2,%%xmm1 \n"
|
||||
"movq 0x2(%0),%%xmm2 \n"
|
||||
"movq 0x2(%0,%1,1),%%xmm3 \n"
|
||||
"punpcklbw %%xmm5,%%xmm2 \n"
|
||||
"punpcklbw %%xmm5,%%xmm3 \n"
|
||||
"psubw %%xmm3,%%xmm2 \n"
|
||||
"paddw %%xmm2,%%xmm0 \n"
|
||||
"paddw %%xmm1,%%xmm0 \n"
|
||||
"paddw %%xmm1,%%xmm0 \n"
|
||||
"pabsw %%xmm0,%%xmm0 \n"
|
||||
"packuswb %%xmm0,%%xmm0 \n"
|
||||
"sub $0x8,%3 \n"
|
||||
"movq %%xmm0,(%0,%2,1) \n"
|
||||
"lea 0x8(%0),%0 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src_y0), // %0
|
||||
"+r"(src_y1), // %1
|
||||
"+r"(dst_sobely), // %2
|
||||
"+r"(width) // %3
|
||||
:
|
||||
: "memory", "cc"
|
||||
#if defined(__SSE2__)
|
||||
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
|
||||
#endif
|
||||
);
|
||||
}
|
||||
#endif // HAS_SOBELYROW_SSSE3
|
||||
|
||||
#ifdef HAS_SOBELROW_SSE2
|
||||
// Adds Sobel X and Sobel Y and stores Sobel into ARGB.
|
||||
// A = 255
|
||||
// R = Sobel
|
||||
// G = Sobel
|
||||
// B = Sobel
|
||||
void SobelRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
|
||||
uint8* dst_argb, int width) {
|
||||
asm volatile (
|
||||
"sub %0,%1 \n"
|
||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||
"pslld $0x18,%%xmm5 \n"
|
||||
|
||||
// 8 pixel loop.
|
||||
".p2align 4 \n"
|
||||
"1: \n"
|
||||
"movdqa (%0),%%xmm0 \n"
|
||||
"movdqa (%0,%1,1),%%xmm1 \n"
|
||||
"lea 0x10(%0),%0 \n"
|
||||
"paddusb %%xmm1,%%xmm0 \n"
|
||||
"movdqa %%xmm0,%%xmm2 \n"
|
||||
"punpcklbw %%xmm0,%%xmm2 \n"
|
||||
"punpckhbw %%xmm0,%%xmm0 \n"
|
||||
"movdqa %%xmm2,%%xmm1 \n"
|
||||
"punpcklwd %%xmm2,%%xmm1 \n"
|
||||
"punpckhwd %%xmm2,%%xmm2 \n"
|
||||
"por %%xmm5,%%xmm1 \n"
|
||||
"por %%xmm5,%%xmm2 \n"
|
||||
"movdqa %%xmm0,%%xmm3 \n"
|
||||
"punpcklwd %%xmm0,%%xmm3 \n"
|
||||
"punpckhwd %%xmm0,%%xmm0 \n"
|
||||
"por %%xmm5,%%xmm3 \n"
|
||||
"por %%xmm5,%%xmm0 \n"
|
||||
"sub $0x10,%3 \n"
|
||||
"movdqa %%xmm1,(%2) \n"
|
||||
"movdqa %%xmm2,0x10(%2) \n"
|
||||
"movdqa %%xmm3,0x20(%2) \n"
|
||||
"movdqa %%xmm0,0x30(%2) \n"
|
||||
"lea 0x40(%2),%2 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src_sobelx), // %0
|
||||
"+r"(src_sobely), // %1
|
||||
"+r"(dst_argb), // %2
|
||||
"+r"(width) // %3
|
||||
:
|
||||
: "memory", "cc"
|
||||
#if defined(__SSE2__)
|
||||
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
|
||||
#endif
|
||||
);
|
||||
}
|
||||
#endif // HAS_SOBELROW_SSE2
|
||||
|
||||
#ifdef HAS_SOBELXYROW_SSE2
|
||||
// Mixes Sobel X, Sobel Y and Sobel into ARGB.
|
||||
// A = 255
|
||||
// R = Sobel X
|
||||
// G = Sobel
|
||||
// B = Sobel Y
|
||||
void SobelXYRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
|
||||
uint8* dst_argb, int width) {
|
||||
asm volatile (
|
||||
"sub %0,%1 \n"
|
||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||
|
||||
// 8 pixel loop.
|
||||
".p2align 4 \n"
|
||||
"1: \n"
|
||||
"movdqa (%0),%%xmm0 \n"
|
||||
"movdqa (%0,%1,1),%%xmm1 \n"
|
||||
"lea 0x10(%0),%0 \n"
|
||||
"movdqa %%xmm0,%%xmm2 \n"
|
||||
"paddusb %%xmm1,%%xmm2 \n"
|
||||
"movdqa %%xmm0,%%xmm3 \n"
|
||||
"punpcklbw %%xmm5,%%xmm3 \n"
|
||||
"punpckhbw %%xmm5,%%xmm0 \n"
|
||||
"movdqa %%xmm1,%%xmm4 \n"
|
||||
"punpcklbw %%xmm2,%%xmm4 \n"
|
||||
"punpckhbw %%xmm2,%%xmm1 \n"
|
||||
"movdqa %%xmm4,%%xmm6 \n"
|
||||
"punpcklwd %%xmm3,%%xmm6 \n"
|
||||
"punpckhwd %%xmm3,%%xmm4 \n"
|
||||
"movdqa %%xmm1,%%xmm7 \n"
|
||||
"punpcklwd %%xmm0,%%xmm7 \n"
|
||||
"punpckhwd %%xmm0,%%xmm1 \n"
|
||||
"sub $0x10,%3 \n"
|
||||
"movdqa %%xmm6,(%2) \n"
|
||||
"movdqa %%xmm4,0x10(%2) \n"
|
||||
"movdqa %%xmm7,0x20(%2) \n"
|
||||
"movdqa %%xmm1,0x30(%2) \n"
|
||||
"lea 0x40(%2),%2 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src_sobelx), // %0
|
||||
"+r"(src_sobely), // %1
|
||||
"+r"(dst_argb), // %2
|
||||
"+r"(width) // %3
|
||||
:
|
||||
: "memory", "cc"
|
||||
#if defined(__SSE2__)
|
||||
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
|
||||
#endif
|
||||
);
|
||||
}
|
||||
#endif // HAS_SOBELXYROW_SSE2
|
||||
|
||||
#ifdef HAS_COMPUTECUMULATIVESUMROW_SSE2
|
||||
// Creates a table of cumulative sums where each value is a sum of all values
|
||||
// above and to the left of the value, inclusive of the value.
|
||||
|
||||
@ -896,6 +896,7 @@ TEST_F(libyuvTest, TestSobelX) {
|
||||
SIMD_ALIGNED(uint8 orig_pixels_1[256 + 2]);
|
||||
SIMD_ALIGNED(uint8 orig_pixels_2[256 + 2]);
|
||||
SIMD_ALIGNED(uint8 sobel_pixels_c[256]);
|
||||
SIMD_ALIGNED(uint8 sobel_pixels_opt[256]);
|
||||
|
||||
for (int i = 0; i < 256 + 2; ++i) {
|
||||
orig_pixels_0[i] = i;
|
||||
@ -909,30 +910,29 @@ TEST_F(libyuvTest, TestSobelX) {
|
||||
EXPECT_EQ(16u, sobel_pixels_c[0]);
|
||||
EXPECT_EQ(16u, sobel_pixels_c[100]);
|
||||
EXPECT_EQ(255u, sobel_pixels_c[255]);
|
||||
|
||||
void (*SobelXRow)(const uint8* src_y0, const uint8* src_y1,
|
||||
const uint8* src_y2, uint8* dst_sobely, int width) =
|
||||
SobelXRow_C;
|
||||
#if defined(HAS_SOBELXROW_SSSE3)
|
||||
SIMD_ALIGNED(uint8 sobel_pixels_opt[256]);
|
||||
int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
|
||||
if (has_ssse3) {
|
||||
for (int i = 0; i < benchmark_pixels_div256_; ++i) {
|
||||
SobelXRow_SSSE3(orig_pixels_0, orig_pixels_1, orig_pixels_2,
|
||||
sobel_pixels_opt, 256);
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < benchmark_pixels_div256_; ++i) {
|
||||
SobelXRow_C(orig_pixels_0, orig_pixels_1, orig_pixels_2,
|
||||
sobel_pixels_opt, 256);
|
||||
}
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
SobelXRow = SobelXRow_SSSE3;
|
||||
}
|
||||
#endif
|
||||
for (int i = 0; i < benchmark_pixels_div256_; ++i) {
|
||||
SobelXRow(orig_pixels_0, orig_pixels_1, orig_pixels_2,
|
||||
sobel_pixels_opt, 256);
|
||||
}
|
||||
for (int i = 0; i < 256; ++i) {
|
||||
EXPECT_EQ(sobel_pixels_opt[i], sobel_pixels_c[i]);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
TEST_F(libyuvTest, TestSobelY) {
|
||||
SIMD_ALIGNED(uint8 orig_pixels_0[256 + 2]);
|
||||
SIMD_ALIGNED(uint8 orig_pixels_1[256 + 2]);
|
||||
SIMD_ALIGNED(uint8 sobel_pixels_c[256]);
|
||||
SIMD_ALIGNED(uint8 sobel_pixels_opt[256]);
|
||||
|
||||
for (int i = 0; i < 256 + 2; ++i) {
|
||||
orig_pixels_0[i] = i;
|
||||
@ -944,28 +944,26 @@ TEST_F(libyuvTest, TestSobelY) {
|
||||
EXPECT_EQ(4u, sobel_pixels_c[0]);
|
||||
EXPECT_EQ(255u, sobel_pixels_c[100]);
|
||||
EXPECT_EQ(0u, sobel_pixels_c[255]);
|
||||
void (*SobelYRow)(const uint8* src_y0, const uint8* src_y1,
|
||||
uint8* dst_sobely, int width) = SobelYRow_C;
|
||||
#if defined(HAS_SOBELYROW_SSSE3)
|
||||
SIMD_ALIGNED(uint8 sobel_pixels_opt[256]);
|
||||
int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
|
||||
if (has_ssse3) {
|
||||
for (int i = 0; i < benchmark_pixels_div256_; ++i) {
|
||||
SobelYRow_SSSE3(orig_pixels_0, orig_pixels_1, sobel_pixels_opt, 256);
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < benchmark_pixels_div256_; ++i) {
|
||||
SobelYRow_C(orig_pixels_0, orig_pixels_1, sobel_pixels_opt, 256);
|
||||
}
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
SobelYRow = SobelYRow_SSSE3;
|
||||
}
|
||||
#endif
|
||||
for (int i = 0; i < benchmark_pixels_div256_; ++i) {
|
||||
SobelYRow(orig_pixels_0, orig_pixels_1, sobel_pixels_opt, 256);
|
||||
}
|
||||
for (int i = 0; i < 256; ++i) {
|
||||
EXPECT_EQ(sobel_pixels_opt[i], sobel_pixels_c[i]);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
TEST_F(libyuvTest, TestSobel) {
|
||||
SIMD_ALIGNED(uint8 orig_sobelx[256]);
|
||||
SIMD_ALIGNED(uint8 orig_sobely[256]);
|
||||
SIMD_ALIGNED(uint8 sobel_pixels_c[256 * 4]);
|
||||
SIMD_ALIGNED(uint8 sobel_pixels_opt[256 * 4]);
|
||||
|
||||
for (int i = 0; i < 256; ++i) {
|
||||
orig_sobelx[i] = i;
|
||||
@ -985,28 +983,26 @@ TEST_F(libyuvTest, TestSobel) {
|
||||
EXPECT_EQ(255u, sobel_pixels_c[7]);
|
||||
EXPECT_EQ(255u, sobel_pixels_c[100 * 4 + 1]);
|
||||
EXPECT_EQ(255u, sobel_pixels_c[255 * 4 + 1]);
|
||||
void (*SobelRow)(const uint8* src_sobelx, const uint8* src_sobely,
|
||||
uint8* dst_argb, int width) = SobelRow_C;
|
||||
#if defined(HAS_SOBELROW_SSE2)
|
||||
SIMD_ALIGNED(uint8 sobel_pixels_opt[256 * 4]);
|
||||
int has_sse2 = TestCpuFlag(kCpuHasSSE2);
|
||||
if (has_sse2) {
|
||||
for (int i = 0; i < benchmark_pixels_div256_; ++i) {
|
||||
SobelRow_SSE2(orig_sobelx, orig_sobely, sobel_pixels_opt, 256);
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < benchmark_pixels_div256_; ++i) {
|
||||
SobelRow_C(orig_sobelx, orig_sobely, sobel_pixels_opt, 256);
|
||||
}
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
SobelRow = SobelRow_SSE2;
|
||||
}
|
||||
#endif
|
||||
for (int i = 0; i < benchmark_pixels_div256_; ++i) {
|
||||
SobelRow(orig_sobelx, orig_sobely, sobel_pixels_opt, 256);
|
||||
}
|
||||
for (int i = 0; i < 16; ++i) {
|
||||
EXPECT_EQ(sobel_pixels_opt[i], sobel_pixels_c[i]);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
TEST_F(libyuvTest, TestSobelXY) {
|
||||
SIMD_ALIGNED(uint8 orig_sobelx[256]);
|
||||
SIMD_ALIGNED(uint8 orig_sobely[256]);
|
||||
SIMD_ALIGNED(uint8 sobel_pixels_c[256 * 4]);
|
||||
SIMD_ALIGNED(uint8 sobel_pixels_opt[256 * 4]);
|
||||
|
||||
for (int i = 0; i < 256; ++i) {
|
||||
orig_sobelx[i] = i;
|
||||
@ -1022,22 +1018,19 @@ TEST_F(libyuvTest, TestSobelXY) {
|
||||
EXPECT_EQ(255u, sobel_pixels_c[7]);
|
||||
EXPECT_EQ(255u, sobel_pixels_c[100 * 4 + 1]);
|
||||
EXPECT_EQ(255u, sobel_pixels_c[255 * 4 + 1]);
|
||||
void (*SobelXYRow)(const uint8* src_sobelx, const uint8* src_sobely,
|
||||
uint8* dst_argb, int width) = SobelXYRow_C;
|
||||
#if defined(HAS_SOBELXYROW_SSE2)
|
||||
SIMD_ALIGNED(uint8 sobel_pixels_opt[256 * 4]);
|
||||
int has_sse2 = TestCpuFlag(kCpuHasSSE2);
|
||||
if (has_sse2) {
|
||||
for (int i = 0; i < benchmark_pixels_div256_; ++i) {
|
||||
SobelXYRow_SSE2(orig_sobelx, orig_sobely, sobel_pixels_opt, 256);
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < benchmark_pixels_div256_; ++i) {
|
||||
SobelXYRow_C(orig_sobelx, orig_sobely, sobel_pixels_opt, 256);
|
||||
}
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
SobelXYRow = SobelXYRow_SSE2;
|
||||
}
|
||||
#endif
|
||||
for (int i = 0; i < benchmark_pixels_div256_; ++i) {
|
||||
SobelXYRow(orig_sobelx, orig_sobely, sobel_pixels_opt, 256);
|
||||
}
|
||||
for (int i = 0; i < 16; ++i) {
|
||||
EXPECT_EQ(sobel_pixels_opt[i], sobel_pixels_c[i]);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
TEST_F(libyuvTest, TestCopyPlane) {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user