From c93a137671e2e281dfb8d32561fed95caacf608b Mon Sep 17 00:00:00 2001 From: "fbarchard@google.com" Date: Tue, 19 Mar 2013 23:51:09 +0000 Subject: [PATCH] Replace Add+YToARGB with SobelRow BUG=201 TEST=Sobel* Review URL: https://webrtc-codereview.appspot.com/1217005 git-svn-id: http://libyuv.googlecode.com/svn/trunk@611 16f28f9a-4ce2-e073-06de-1de4eb20be90 --- README.chromium | 2 +- include/libyuv/row.h | 5 ++++ include/libyuv/version.h | 2 +- source/planar_functions.cc | 53 ++++++------------------------------- source/row_common.cc | 17 ++++++++++++ source/row_win.cc | 54 +++++++++++++++++++++++++++++++++++++- unit_test/planar_test.cc | 41 +++++++++++++++++++++++++++++ 7 files changed, 126 insertions(+), 48 deletions(-) diff --git a/README.chromium b/README.chromium index 5800eef7c..e3e82bd9f 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 610 +Version: 611 License: BSD License File: LICENSE diff --git a/include/libyuv/row.h b/include/libyuv/row.h index d7e34777d..ee6c1709d 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -122,6 +122,7 @@ extern "C" { #define HAS_COPYROW_AVX2 #define HAS_SOBELXROW_SSSE3 #define HAS_SOBELYROW_SSSE3 +#define HAS_SOBELROW_SSE2 #define HAS_SOBELXYROW_SSE2 // Visual C 2012 required for AVX2. #if _MSC_VER >= 1700 @@ -1431,6 +1432,10 @@ void SobelYRow_C(const uint8* src_y0, const uint8* src_y1, uint8* dst_sobely, int width); void SobelYRow_SSSE3(const uint8* src_y0, const uint8* src_y1, uint8* dst_sobely, int width); +void SobelRow_C(const uint8* src_sobelx, const uint8* src_sobely, + uint8* dst_argb, int width); +void SobelRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely, + uint8* dst_argb, int width); void SobelXYRow_C(const uint8* src_sobelx, const uint8* src_sobely, uint8* dst_argb, int width); void SobelXYRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely, diff --git a/include/libyuv/version.h b/include/libyuv/version.h index f44015509..9c12b157a 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 610 +#define LIBYUV_VERSION 611 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/planar_functions.cc b/source/planar_functions.cc index b1dc5fa0e..1cc7d6d2a 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -1870,25 +1870,6 @@ int ARGBSobel(const uint8* src_argb, int src_stride_argb, } } #endif - void (*YToARGBRow)(const uint8* y_buf, - uint8* rgb_buf, - int width) = YToARGBRow_C; -#if defined(HAS_YTOARGBROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && width >= 8 && - IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { - YToARGBRow = YToARGBRow_Any_SSE2; - if (IS_ALIGNED(width, 8)) { - YToARGBRow = YToARGBRow_SSE2; - } - } -#elif defined(HAS_YTOARGBROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - YToARGBRow = YToARGBRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - YToARGBRow = YToARGBRow_NEON; - } - } -#endif void (*SobelYRow)(const uint8* src_y0, const uint8* src_y1, uint8* dst_sobely, int width) = SobelYRow_C; @@ -1905,23 +1886,12 @@ int ARGBSobel(const uint8* src_argb, int src_stride_argb, SobelXRow = SobelXRow_SSSE3; } #endif - - void (*ARGBAddRow)(const uint8* src0, const uint8* src1, uint8* dst, - int width) = ARGBAddRow_C; -#if defined(HAS_ARGBADDROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2)) { - ARGBAddRow = ARGBAddRow_SSE2; - } -#endif -#if defined(HAS_ARGBADDROW_AVX2_DISABLED) - if (TestCpuFlag(kCpuHasAVX2)) { - clear = true; - ARGBAddRow = ARGBAddRow_AVX2; - } -#endif -#if defined(HAS_ARGBADDROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ARGBAddRow = ARGBAddRow_NEON; + void (*SobelRow)(const uint8* src_sobelx, const uint8* src_sobely, + uint8* dst_argb, int width) = SobelRow_C; +#if defined(HAS_SOBELROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) && + IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { + SobelRow = SobelRow_SSE2; } #endif @@ -1929,7 +1899,6 @@ int ARGBSobel(const uint8* src_argb, int src_stride_argb, SIMD_ALIGNED(uint8 row_y[(kMaxStride + kEdge) * 3 + kEdge]); SIMD_ALIGNED(uint8 row_sobelx[kMaxStride]); SIMD_ALIGNED(uint8 row_sobely[kMaxStride]); - SIMD_ALIGNED(uint8 row_sobel[kMaxStride]); // Convert first row. uint8* row_y0 = row_y + kEdge; @@ -1941,7 +1910,6 @@ int ARGBSobel(const uint8* src_argb, int src_stride_argb, ARGBToYRow(src_argb, row_y1, width); row_y1[-1] = row_y1[0]; row_y1[width] = row_y1[width - 1]; - int awidth = (width + 3) >> 2; for (int y = 0; y < height; ++y) { // Convert next row of ARGB to Y. @@ -1954,10 +1922,7 @@ int ARGBSobel(const uint8* src_argb, int src_stride_argb, SobelXRow(row_y0 - 1, row_y1 - 1, row_y2 - 1, row_sobelx, width); SobelYRow(row_y0 - 1, row_y2 - 1, row_sobely, width); - - ARGBAddRow(row_sobelx, row_sobely, row_sobel, awidth); - - YToARGBRow(row_sobel, dst_argb, width); + SobelRow(row_sobelx, row_sobely, dst_argb, width); // Cycle thru circular queue of 3 row_y buffers. uint8* row_yt = row_y0; @@ -1975,7 +1940,6 @@ int ARGBSobel(const uint8* src_argb, int src_stride_argb, return 0; } - // SobelXY ARGB effect. // Similar to Sobel, but also stores Sobel X in R and Sobel Y in B. G = Sobel. // TODO(fbarchard): Enable AVX2. Mixing SSSE3 and AVX2 requires zeroupper. @@ -2042,7 +2006,7 @@ int ARGBSobelXY(const uint8* src_argb, int src_stride_argb, #endif void (*SobelXYRow)(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_argb, int width) = SobelXYRow_C; + uint8* dst_argb, int width) = SobelXYRow_C; #if defined(HAS_SOBELXYROW_SSE2) if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) && IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { @@ -2065,7 +2029,6 @@ int ARGBSobelXY(const uint8* src_argb, int src_stride_argb, ARGBToYRow(src_argb, row_y1, width); row_y1[-1] = row_y1[0]; row_y1[width] = row_y1[width - 1]; - int awidth = (width + 3) >> 2; for (int y = 0; y < height; ++y) { // Convert next row of ARGB to Y. diff --git a/source/row_common.cc b/source/row_common.cc index 42570299d..ebbcd435a 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -759,6 +759,23 @@ void SobelYRow_C(const uint8* src_y0, const uint8* src_y1, } } +void SobelRow_C(const uint8* src_sobelx, const uint8* src_sobely, + uint8* dst_argb, int width) { + for (int i = 0; i < width; ++i) { + int r = src_sobelx[i]; + int b = src_sobely[i]; + int s = r + b; + if (s > 255) { + s = 255; + } + dst_argb[0] = static_cast(s); + dst_argb[1] = static_cast(s); + dst_argb[2] = static_cast(s); + dst_argb[3] = static_cast(255u); + dst_argb += 4; + } +} + void SobelXYRow_C(const uint8* src_sobelx, const uint8* src_sobely, uint8* dst_argb, int width) { for (int i = 0; i < width; ++i) { diff --git a/source/row_win.cc b/source/row_win.cc index b8b5d3852..a46396940 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -5133,6 +5133,58 @@ void SobelYRow_SSSE3(const uint8* src_y0, const uint8* src_y1, } #endif // HAS_SOBELYROW_SSSE3 +#ifdef HAS_SOBELROW_SSE2 +// Adds Sobel X and Sobel Y and stores Sobel into ARGB. +// A = 255 +// R = Sobel +// G = Sobel +// B = Sobel +__declspec(naked) __declspec(align(16)) +void SobelRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely, + uint8* dst_argb, int width) { + __asm { + push esi + mov eax, [esp + 4 + 4] // src_sobelx + mov esi, [esp + 4 + 8] // src_sobely + mov edx, [esp + 4 + 12] // dst_argb + mov ecx, [esp + 4 + 16] // width + sub esi, eax + pcmpeqb xmm5, xmm5 // alpha 255 + pslld xmm5, 24 // 0xff000000 + + align 16 + convertloop: + movdqa xmm0, [eax] // read 16 pixels src_sobelx + movdqa xmm1, [eax + esi] // read 16 pixels src_sobely + lea eax, [eax + 16] + paddusb xmm0, xmm1 // sobel = sobelx + sobely + movdqa xmm2, xmm0 // GG + punpcklbw xmm2, xmm0 // First 8 + punpckhbw xmm0, xmm0 // Next 8 + movdqa xmm1, xmm2 // GGGG + punpcklwd xmm1, xmm2 // First 4 + punpckhwd xmm2, xmm2 // Next 4 + por xmm1, xmm5 // GGGA + por xmm2, xmm5 + movdqa xmm3, xmm0 // GGGG + punpcklwd xmm3, xmm0 // Next 4 + punpckhwd xmm0, xmm0 // Last 4 + por xmm3, xmm5 // GGGA + por xmm0, xmm5 + sub ecx, 16 + movdqa [edx], xmm1 + movdqa [edx + 16], xmm2 + movdqa [edx + 32], xmm3 + movdqa [edx + 48], xmm0 + lea edx, [edx + 64] + jg convertloop + + pop esi + ret + } +} +#endif // HAS_SOBELROW_SSE2 + #ifdef HAS_SOBELXYROW_SSE2 // Mixes Sobel X, Sobel Y and Sobel into ARGB. // A = 255 @@ -5182,7 +5234,7 @@ void SobelXYRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely, ret } } -#endif // HAS_SOBELXYROW_SSE2 +#endif // HAS_SOBELXYROW_SSE2 #ifdef HAS_CUMULATIVESUMTOAVERAGEROW_SSE2 // Consider float CumulativeSum. diff --git a/unit_test/planar_test.cc b/unit_test/planar_test.cc index 302d21626..1026cdefc 100644 --- a/unit_test/planar_test.cc +++ b/unit_test/planar_test.cc @@ -962,6 +962,47 @@ TEST_F(libyuvTest, TestSobelY) { #endif } +TEST_F(libyuvTest, TestSobel) { + SIMD_ALIGNED(uint8 orig_sobelx[256]); + SIMD_ALIGNED(uint8 orig_sobely[256]); + SIMD_ALIGNED(uint8 sobel_pixels_c[256 * 4]); + + for (int i = 0; i < 256; ++i) { + orig_sobelx[i] = i; + orig_sobely[i] = i * 2; + } + + SobelRow_C(orig_sobelx, orig_sobely, sobel_pixels_c, 256); + + EXPECT_EQ(0u, sobel_pixels_c[0]); + EXPECT_EQ(3u, sobel_pixels_c[4]); + EXPECT_EQ(3u, sobel_pixels_c[5]); + EXPECT_EQ(3u, sobel_pixels_c[6]); + EXPECT_EQ(255u, sobel_pixels_c[7]); + EXPECT_EQ(6u, sobel_pixels_c[8]); + EXPECT_EQ(6u, sobel_pixels_c[9]); + EXPECT_EQ(6u, sobel_pixels_c[10]); + EXPECT_EQ(255u, sobel_pixels_c[7]); + EXPECT_EQ(255u, sobel_pixels_c[100 * 4 + 1]); + EXPECT_EQ(255u, sobel_pixels_c[255 * 4 + 1]); +#if defined(HAS_SOBELROW_SSE2) + SIMD_ALIGNED(uint8 sobel_pixels_opt[256 * 4]); + int has_sse2 = TestCpuFlag(kCpuHasSSE2); + if (has_sse2) { + for (int i = 0; i < benchmark_pixels_div256_; ++i) { + SobelRow_SSE2(orig_sobelx, orig_sobely, sobel_pixels_opt, 256); + } + } else { + for (int i = 0; i < benchmark_pixels_div256_; ++i) { + SobelRow_C(orig_sobelx, orig_sobely, sobel_pixels_opt, 256); + } + } + for (int i = 0; i < 16; ++i) { + EXPECT_EQ(sobel_pixels_opt[i], sobel_pixels_c[i]); + } +#endif +} + TEST_F(libyuvTest, TestSobelXY) { SIMD_ALIGNED(uint8 orig_sobelx[256]); SIMD_ALIGNED(uint8 orig_sobely[256]);