diff --git a/README.chromium b/README.chromium index f9943f523..4635fc904 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 608 +Version: 609 License: BSD License File: LICENSE diff --git a/include/libyuv/planar_functions.h b/include/libyuv/planar_functions.h index 73625b1cd..481b68455 100644 --- a/include/libyuv/planar_functions.h +++ b/include/libyuv/planar_functions.h @@ -347,6 +347,12 @@ int ARGBShuffle(const uint8* src_bgra, int src_stride_bgra, uint8* dst_argb, int dst_stride_argb, const uint8* shuffler, int width, int height); +// Sobel ARGB effect. +LIBYUV_API +int ARGBSobel(const uint8* src_argb, int src_stride_argb, + uint8* dst_argb, int dst_stride_argb, + int width, int height); + #ifdef __cplusplus } // extern "C" } // namespace libyuv diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 3de4f02bb..dcab7c69d 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 608 +#define LIBYUV_VERSION 609 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/libyuv_test.gyp b/libyuv_test.gyp index a31d3a9bc..4429a1de0 100644 --- a/libyuv_test.gyp +++ b/libyuv_test.gyp @@ -71,7 +71,6 @@ }], ], # conditions }, - ], # targets } diff --git a/source/planar_functions.cc b/source/planar_functions.cc index cdad49c9e..0fbdbfb0d 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -1822,6 +1822,159 @@ int ARGBShuffle(const uint8* src_bgra, int src_stride_bgra, return 0; } +// Sobel ARGB effect. +// TODO(fbarchard): Enable AVX2. Mixing SSSE3 and AVX2 requires zeroupper. +LIBYUV_API +int ARGBSobel(const uint8* src_argb, int src_stride_argb, + uint8* dst_argb, int dst_stride_argb, + int width, int height) { + if (!src_argb || !dst_argb || + width <= 0 || height == 0 || width > kMaxStride) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_argb = src_argb + (height - 1) * src_stride_argb; + src_stride_argb = -src_stride_argb; + } + void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = + ARGBToYRow_C; +#if defined(HAS_ARGBTOYROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { + ARGBToYRow = ARGBToYRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_Unaligned_SSSE3; + // Assumed row buffer aligned. + if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) { + ARGBToYRow = ARGBToYRow_SSSE3; + } + } + } +#endif +#if defined(HAS_ARGBTOYROW_AVX2_DISABLED) + bool clear = false; + if (TestCpuFlag(kCpuHasAVX2) && width >= 32) { + clear = true; + ARGBToYRow = ARGBToYRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToYRow = ARGBToYRow_AVX2; + } + } +#endif +#if defined(HAS_ARGBTOYROW_NEON) + if (TestCpuFlag(kCpuHasNEON) && width >= 8) { + ARGBToYRow = ARGBToYRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + ARGBToYRow = ARGBToYRow_NEON; + } + } +#endif + void (*YToARGBRow)(const uint8* y_buf, + uint8* rgb_buf, + int width) = YToARGBRow_C; +#if defined(HAS_YTOARGBROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2) && width >= 8 && + IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { + YToARGBRow = YToARGBRow_Any_SSE2; + if (IS_ALIGNED(width, 8)) { + YToARGBRow = YToARGBRow_SSE2; + } + } +#elif defined(HAS_YTOARGBROW_NEON) + if (TestCpuFlag(kCpuHasNEON) && width >= 8) { + YToARGBRow = YToARGBRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + YToARGBRow = YToARGBRow_NEON; + } + } +#endif + + void (*SobelYRow)(const uint8* src_y0, const uint8* src_y1, + uint8* dst_sobely, int width) = SobelYRow_C; +#if defined(HAS_SOBELYROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + SobelYRow = SobelYRow_SSSE3; + } +#endif + void (*SobelXRow)(const uint8* src_y0, const uint8* src_y1, + const uint8* src_y2, uint8* dst_sobely, int width) = + SobelXRow_C; +#if defined(HAS_SOBELXROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + SobelXRow = SobelXRow_SSSE3; + } +#endif + + void (*ARGBAddRow)(const uint8* src0, const uint8* src1, uint8* dst, + int width) = ARGBAddRow_C; +#if defined(HAS_ARGBADDROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + ARGBAddRow = ARGBAddRow_SSE2; + } +#endif +#if defined(HAS_ARGBADDROW_AVX2_DISABLED) + if (TestCpuFlag(kCpuHasAVX2)) { + clear = true; + ARGBAddRow = ARGBAddRow_AVX2; + } +#endif +#if defined(HAS_ARGBADDROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBAddRow = ARGBAddRow_NEON; + } +#endif + + const int kEdge = 16; // Extra pixels at start of row for extrude/align. + SIMD_ALIGNED(uint8 row_y[(kMaxStride + kEdge) * 3 + kEdge]); + SIMD_ALIGNED(uint8 row_sobelx[kMaxStride]); + SIMD_ALIGNED(uint8 row_sobely[kMaxStride]); + SIMD_ALIGNED(uint8 row_sobel[kMaxStride]); + + // Convert first row. + uint8* row_y0 = row_y + kEdge; + uint8* row_y1 = row_y0 + kMaxStride; + uint8* row_y2 = row_y1 + kMaxStride; + ARGBToYRow(src_argb, row_y0, width); + row_y0[-1] = row_y0[0]; + row_y0[width] = row_y0[width - 1]; + ARGBToYRow(src_argb, row_y1, width); + row_y1[-1] = row_y1[0]; + row_y1[width] = row_y1[width - 1]; + int awidth = (width + 3) >> 2; + + for (int y = 0; y < height; ++y) { + // Convert next row of ARGB to Y. + if (y < (height - 1)) { + src_argb += src_stride_argb; + } + ARGBToYRow(src_argb, row_y2, width); + row_y2[-1] = row_y2[0]; + row_y2[width] = row_y2[width - 1]; + + SobelXRow(row_y0 - 1, row_y1 - 1, row_y2 - 1, row_sobelx, width); + SobelYRow(row_y0 - 1, row_y2 - 1, row_sobely, width); + + ARGBAddRow(row_sobelx, row_sobely, row_sobel, awidth); + + YToARGBRow(row_sobel, dst_argb, width); + + // Cycle thru circular queue of 3 row_y buffers. + uint8* row_yt = row_y0; + row_y0 = row_y1; + row_y1 = row_y2; + row_y2 = row_yt; + + dst_argb += dst_stride_argb; + } +#if defined(HAS_ARGBTOYROW_AVX2_DISABLED) + if (clear) { + __asm vzeroupper; + } +#endif + return 0; +} + #ifdef __cplusplus } // extern "C" } // namespace libyuv diff --git a/unit_test/planar_test.cc b/unit_test/planar_test.cc index 886b36dd1..76808ad50 100644 --- a/unit_test/planar_test.cc +++ b/unit_test/planar_test.cc @@ -1232,4 +1232,67 @@ TEST_F(libyuvTest, ARGBSubtract_Opt) { EXPECT_LE(max_diff, 1); } +static int TestSobel(int width, int height, int benchmark_iterations, + int invert, int off) { + const int kBpp = 4; + const int kStride = (width * kBpp + 15) & ~15; + align_buffer_64(src_argb_a, kStride * height + off); + align_buffer_64(dst_argb_c, kStride * height); + align_buffer_64(dst_argb_opt, kStride * height); + srandom(time(NULL)); + for (int i = 0; i < kStride * height; ++i) { + src_argb_a[i + off] = (random() & 0xff); + } + memset(dst_argb_c, 0, kStride * height); + memset(dst_argb_opt, 0, kStride * height); + + MaskCpuFlags(0); + ARGBSobel(src_argb_a + off, kStride, + dst_argb_c, kStride, + width, invert * height); + MaskCpuFlags(-1); + for (int i = 0; i < benchmark_iterations; ++i) { + ARGBSobel(src_argb_a + off, kStride, + dst_argb_opt, kStride, + width, invert * height); + } + int max_diff = 0; + for (int i = 0; i < kStride * height; ++i) { + int abs_diff = + abs(static_cast(dst_argb_c[i]) - + static_cast(dst_argb_opt[i])); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + free_aligned_buffer_64(src_argb_a) + free_aligned_buffer_64(dst_argb_c) + free_aligned_buffer_64(dst_argb_opt) + return max_diff; +} + +TEST_F(libyuvTest, ARGBSobel_Any) { + int max_diff = TestSobel(benchmark_width_ - 1, benchmark_height_, + benchmark_iterations_, +1, 0); + EXPECT_LE(max_diff, 14); +} + +TEST_F(libyuvTest, ARGBSobel_Unaligned) { + int max_diff = TestSobel(benchmark_width_, benchmark_height_, + benchmark_iterations_, +1, 1); + EXPECT_LE(max_diff, 14); +} + +TEST_F(libyuvTest, ARGBSobel_Invert) { + int max_diff = TestSobel(benchmark_width_, benchmark_height_, + benchmark_iterations_, -1, 0); + EXPECT_LE(max_diff, 14); +} + +TEST_F(libyuvTest, ARGBSobel_Opt) { + int max_diff = TestSobel(benchmark_width_, benchmark_height_, + benchmark_iterations_, +1, 0); + EXPECT_LE(max_diff, 14); +} + } // namespace libyuv diff --git a/unit_test/unit_test.cc b/unit_test/unit_test.cc index 57fda52cf..fac702621 100644 --- a/unit_test/unit_test.cc +++ b/unit_test/unit_test.cc @@ -39,8 +39,9 @@ libyuvTest::libyuvTest() : rotate_max_w_(128), rotate_max_h_(128), if (height) { benchmark_height_ = atoi(height); // NOLINT } - benchmark_pixels_div256_ = (benchmark_iterations_ * benchmark_width_ * - benchmark_height_ + 255) / 256; + benchmark_pixels_div256_ = static_cast( + (static_cast(benchmark_width_ * + benchmark_height_) * benchmark_iterations_ + 255.0) / 256.0); } int main(int argc, char** argv) {