diff --git a/README.chromium b/README.chromium index 4635fc904..5800eef7c 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 609 +Version: 610 License: BSD License File: LICENSE diff --git a/include/libyuv/planar_functions.h b/include/libyuv/planar_functions.h index 481b68455..cb14678a8 100644 --- a/include/libyuv/planar_functions.h +++ b/include/libyuv/planar_functions.h @@ -353,6 +353,12 @@ int ARGBSobel(const uint8* src_argb, int src_stride_argb, uint8* dst_argb, int dst_stride_argb, int width, int height); +// Sobel ARGB effect w/ Sobel X, Sobel, Sobel Y in ARGB. +LIBYUV_API +int ARGBSobelXY(const uint8* src_argb, int src_stride_argb, + uint8* dst_argb, int dst_stride_argb, + int width, int height); + #ifdef __cplusplus } // extern "C" } // namespace libyuv diff --git a/include/libyuv/row.h b/include/libyuv/row.h index 16df7084e..d7e34777d 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -122,6 +122,7 @@ extern "C" { #define HAS_COPYROW_AVX2 #define HAS_SOBELXROW_SSSE3 #define HAS_SOBELYROW_SSSE3 +#define HAS_SOBELXYROW_SSE2 // Visual C 2012 required for AVX2. #if _MSC_VER >= 1700 // TODO(fbarchard): Hook these up to all functions. e.g. format conversion. @@ -1430,6 +1431,10 @@ void SobelYRow_C(const uint8* src_y0, const uint8* src_y1, uint8* dst_sobely, int width); void SobelYRow_SSSE3(const uint8* src_y0, const uint8* src_y1, uint8* dst_sobely, int width); +void SobelXYRow_C(const uint8* src_sobelx, const uint8* src_sobely, + uint8* dst_argb, int width); +void SobelXYRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely, + uint8* dst_argb, int width); #ifdef __cplusplus } // extern "C" diff --git a/include/libyuv/version.h b/include/libyuv/version.h index dcab7c69d..f44015509 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 609 +#define LIBYUV_VERSION 610 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/planar_functions.cc b/source/planar_functions.cc index 0fbdbfb0d..b1dc5fa0e 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -1975,6 +1975,127 @@ int ARGBSobel(const uint8* src_argb, int src_stride_argb, return 0; } + +// SobelXY ARGB effect. +// Similar to Sobel, but also stores Sobel X in R and Sobel Y in B. G = Sobel. +// TODO(fbarchard): Enable AVX2. Mixing SSSE3 and AVX2 requires zeroupper. +LIBYUV_API +int ARGBSobelXY(const uint8* src_argb, int src_stride_argb, + uint8* dst_argb, int dst_stride_argb, + int width, int height) { + if (!src_argb || !dst_argb || + width <= 0 || height == 0 || width > kMaxStride) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_argb = src_argb + (height - 1) * src_stride_argb; + src_stride_argb = -src_stride_argb; + } + void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = + ARGBToYRow_C; +#if defined(HAS_ARGBTOYROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { + ARGBToYRow = ARGBToYRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_Unaligned_SSSE3; + // Assumed row buffer aligned. + if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) { + ARGBToYRow = ARGBToYRow_SSSE3; + } + } + } +#endif +#if defined(HAS_ARGBTOYROW_AVX2_DISABLED) + bool clear = false; + if (TestCpuFlag(kCpuHasAVX2) && width >= 32) { + clear = true; + ARGBToYRow = ARGBToYRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToYRow = ARGBToYRow_AVX2; + } + } +#endif +#if defined(HAS_ARGBTOYROW_NEON) + if (TestCpuFlag(kCpuHasNEON) && width >= 8) { + ARGBToYRow = ARGBToYRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + ARGBToYRow = ARGBToYRow_NEON; + } + } +#endif + void (*SobelYRow)(const uint8* src_y0, const uint8* src_y1, + uint8* dst_sobely, int width) = SobelYRow_C; +#if defined(HAS_SOBELYROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + SobelYRow = SobelYRow_SSSE3; + } +#endif + void (*SobelXRow)(const uint8* src_y0, const uint8* src_y1, + const uint8* src_y2, uint8* dst_sobely, int width) = + SobelXRow_C; +#if defined(HAS_SOBELXROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + SobelXRow = SobelXRow_SSSE3; + } +#endif + + void (*SobelXYRow)(const uint8* src_sobelx, const uint8* src_sobely, + uint8* dst_argb, int width) = SobelXYRow_C; +#if defined(HAS_SOBELXYROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) && + IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { + SobelXYRow = SobelXYRow_SSE2; + } +#endif + + const int kEdge = 16; // Extra pixels at start of row for extrude/align. + SIMD_ALIGNED(uint8 row_y[(kMaxStride + kEdge) * 3 + kEdge]); + SIMD_ALIGNED(uint8 row_sobelx[kMaxStride]); + SIMD_ALIGNED(uint8 row_sobely[kMaxStride]); + + // Convert first row. + uint8* row_y0 = row_y + kEdge; + uint8* row_y1 = row_y0 + kMaxStride; + uint8* row_y2 = row_y1 + kMaxStride; + ARGBToYRow(src_argb, row_y0, width); + row_y0[-1] = row_y0[0]; + row_y0[width] = row_y0[width - 1]; + ARGBToYRow(src_argb, row_y1, width); + row_y1[-1] = row_y1[0]; + row_y1[width] = row_y1[width - 1]; + int awidth = (width + 3) >> 2; + + for (int y = 0; y < height; ++y) { + // Convert next row of ARGB to Y. + if (y < (height - 1)) { + src_argb += src_stride_argb; + } + ARGBToYRow(src_argb, row_y2, width); + row_y2[-1] = row_y2[0]; + row_y2[width] = row_y2[width - 1]; + + SobelXRow(row_y0 - 1, row_y1 - 1, row_y2 - 1, row_sobelx, width); + SobelYRow(row_y0 - 1, row_y2 - 1, row_sobely, width); + SobelXYRow(row_sobelx, row_sobely, dst_argb, width); + + // Cycle thru circular queue of 3 row_y buffers. + uint8* row_yt = row_y0; + row_y0 = row_y1; + row_y1 = row_y2; + row_y2 = row_yt; + + dst_argb += dst_stride_argb; + } +#if defined(HAS_ARGBTOYROW_AVX2_DISABLED) + if (clear) { + __asm vzeroupper; + } +#endif + return 0; +} + #ifdef __cplusplus } // extern "C" } // namespace libyuv diff --git a/source/row_common.cc b/source/row_common.cc index 2ba95214e..42570299d 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -759,6 +759,23 @@ void SobelYRow_C(const uint8* src_y0, const uint8* src_y1, } } +void SobelXYRow_C(const uint8* src_sobelx, const uint8* src_sobely, + uint8* dst_argb, int width) { + for (int i = 0; i < width; ++i) { + int r = src_sobelx[i]; + int b = src_sobely[i]; + int g = r + b; + if (g > 255) { + g = 255; + } + dst_argb[0] = static_cast(b); + dst_argb[1] = static_cast(g); + dst_argb[2] = static_cast(r); + dst_argb[3] = static_cast(255u); + dst_argb += 4; + } +} + void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width) { // Copy a Y to RGB. for (int x = 0; x < width; ++x) { diff --git a/source/row_win.cc b/source/row_win.cc index 57a69bae8..b8b5d3852 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -5133,6 +5133,57 @@ void SobelYRow_SSSE3(const uint8* src_y0, const uint8* src_y1, } #endif // HAS_SOBELYROW_SSSE3 +#ifdef HAS_SOBELXYROW_SSE2 +// Mixes Sobel X, Sobel Y and Sobel into ARGB. +// A = 255 +// R = Sobel X +// G = Sobel +// B = Sobel Y +__declspec(naked) __declspec(align(16)) +void SobelXYRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely, + uint8* dst_argb, int width) { + __asm { + push esi + mov eax, [esp + 4 + 4] // src_sobelx + mov esi, [esp + 4 + 8] // src_sobely + mov edx, [esp + 4 + 12] // dst_argb + mov ecx, [esp + 4 + 16] // width + sub esi, eax + pcmpeqb xmm5, xmm5 // alpha 255 + + align 16 + convertloop: + movdqa xmm0, [eax] // read 16 pixels src_sobelx + movdqa xmm1, [eax + esi] // read 16 pixels src_sobely + lea eax, [eax + 16] + movdqa xmm2, xmm0 + paddusb xmm2, xmm1 // sobel = sobelx + sobely + movdqa xmm3, xmm0 // XA + punpcklbw xmm3, xmm5 + punpckhbw xmm0, xmm5 + movdqa xmm4, xmm1 // YS + punpcklbw xmm4, xmm2 + punpckhbw xmm1, xmm2 + movdqa xmm6, xmm4 // YSXA + punpcklwd xmm6, xmm3 // First 4 + punpckhwd xmm4, xmm3 // Next 4 + movdqa xmm7, xmm1 // YSXA + punpcklwd xmm7, xmm0 // Next 4 + punpckhwd xmm1, xmm0 // Last 4 + sub ecx, 16 + movdqa [edx], xmm6 + movdqa [edx + 16], xmm4 + movdqa [edx + 32], xmm7 + movdqa [edx + 48], xmm1 + lea edx, [edx + 64] + jg convertloop + + pop esi + ret + } +} +#endif // HAS_SOBELXYROW_SSE2 + #ifdef HAS_CUMULATIVESUMTOAVERAGEROW_SSE2 // Consider float CumulativeSum. // Consider calling CumulativeSum one row at time as needed. diff --git a/unit_test/planar_test.cc b/unit_test/planar_test.cc index 76808ad50..302d21626 100644 --- a/unit_test/planar_test.cc +++ b/unit_test/planar_test.cc @@ -962,6 +962,43 @@ TEST_F(libyuvTest, TestSobelY) { #endif } +TEST_F(libyuvTest, TestSobelXY) { + SIMD_ALIGNED(uint8 orig_sobelx[256]); + SIMD_ALIGNED(uint8 orig_sobely[256]); + SIMD_ALIGNED(uint8 sobel_pixels_c[256 * 4]); + + for (int i = 0; i < 256; ++i) { + orig_sobelx[i] = i; + orig_sobely[i] = i * 2; + } + + SobelXYRow_C(orig_sobelx, orig_sobely, sobel_pixels_c, 256); + + EXPECT_EQ(0u, sobel_pixels_c[0]); + EXPECT_EQ(2u, sobel_pixels_c[4]); + EXPECT_EQ(3u, sobel_pixels_c[5]); + EXPECT_EQ(1u, sobel_pixels_c[6]); + EXPECT_EQ(255u, sobel_pixels_c[7]); + EXPECT_EQ(255u, sobel_pixels_c[100 * 4 + 1]); + EXPECT_EQ(255u, sobel_pixels_c[255 * 4 + 1]); +#if defined(HAS_SOBELXYROW_SSE2) + SIMD_ALIGNED(uint8 sobel_pixels_opt[256 * 4]); + int has_sse2 = TestCpuFlag(kCpuHasSSE2); + if (has_sse2) { + for (int i = 0; i < benchmark_pixels_div256_; ++i) { + SobelXYRow_SSE2(orig_sobelx, orig_sobely, sobel_pixels_opt, 256); + } + } else { + for (int i = 0; i < benchmark_pixels_div256_; ++i) { + SobelXYRow_C(orig_sobelx, orig_sobely, sobel_pixels_opt, 256); + } + } + for (int i = 0; i < 16; ++i) { + EXPECT_EQ(sobel_pixels_opt[i], sobel_pixels_c[i]); + } +#endif +} + TEST_F(libyuvTest, TestCopyPlane) { int err = 0; int yw = benchmark_width_; @@ -1295,4 +1332,67 @@ TEST_F(libyuvTest, ARGBSobel_Opt) { EXPECT_LE(max_diff, 14); } +static int TestSobelXY(int width, int height, int benchmark_iterations, + int invert, int off) { + const int kBpp = 4; + const int kStride = (width * kBpp + 15) & ~15; + align_buffer_64(src_argb_a, kStride * height + off); + align_buffer_64(dst_argb_c, kStride * height); + align_buffer_64(dst_argb_opt, kStride * height); + srandom(time(NULL)); + for (int i = 0; i < kStride * height; ++i) { + src_argb_a[i + off] = (random() & 0xff); + } + memset(dst_argb_c, 0, kStride * height); + memset(dst_argb_opt, 0, kStride * height); + + MaskCpuFlags(0); + ARGBSobelXY(src_argb_a + off, kStride, + dst_argb_c, kStride, + width, invert * height); + MaskCpuFlags(-1); + for (int i = 0; i < benchmark_iterations; ++i) { + ARGBSobelXY(src_argb_a + off, kStride, + dst_argb_opt, kStride, + width, invert * height); + } + int max_diff = 0; + for (int i = 0; i < kStride * height; ++i) { + int abs_diff = + abs(static_cast(dst_argb_c[i]) - + static_cast(dst_argb_opt[i])); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + free_aligned_buffer_64(src_argb_a) + free_aligned_buffer_64(dst_argb_c) + free_aligned_buffer_64(dst_argb_opt) + return max_diff; +} + +TEST_F(libyuvTest, ARGBSobelXY_Any) { + int max_diff = TestSobelXY(benchmark_width_ - 1, benchmark_height_, + benchmark_iterations_, +1, 0); + EXPECT_LE(max_diff, 14); +} + +TEST_F(libyuvTest, ARGBSobelXY_Unaligned) { + int max_diff = TestSobelXY(benchmark_width_, benchmark_height_, + benchmark_iterations_, +1, 1); + EXPECT_LE(max_diff, 14); +} + +TEST_F(libyuvTest, ARGBSobelXY_Invert) { + int max_diff = TestSobelXY(benchmark_width_, benchmark_height_, + benchmark_iterations_, -1, 0); + EXPECT_LE(max_diff, 14); +} + +TEST_F(libyuvTest, ARGBSobelXY_Opt) { + int max_diff = TestSobelXY(benchmark_width_, benchmark_height_, + benchmark_iterations_, +1, 0); + EXPECT_LE(max_diff, 14); +} + } // namespace libyuv