From 7f67961ec53f0ad12f827905fc4a4cc880f00931 Mon Sep 17 00:00:00 2001 From: "fbarchard@google.com" Date: Sat, 12 Oct 2013 22:27:37 +0000 Subject: [PATCH] ARGBCopyAlpha for effects BUG=none TEST=none R=johannkoenig@google.com Review URL: https://webrtc-codereview.appspot.com/2385004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@810 16f28f9a-4ce2-e073-06de-1de4eb20be90 --- README.chromium | 2 +- include/libyuv/planar_functions.h | 18 +++++---------- include/libyuv/row.h | 5 ++++ include/libyuv/version.h | 2 +- source/planar_functions.cc | 38 +++++++++++++++++++++++++++++++ source/row_common.cc | 12 ++++++++++ source/row_win.cc | 31 +++++++++++++++++++++++++ unit_test/planar_test.cc | 29 +++++++++++++++++++++++ 8 files changed, 123 insertions(+), 14 deletions(-) diff --git a/README.chromium b/README.chromium index 2755c0e18..e17f6aaf6 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 809 +Version: 810 License: BSD License File: LICENSE diff --git a/include/libyuv/planar_functions.h b/include/libyuv/planar_functions.h index a50a35502..e30c99ced 100644 --- a/include/libyuv/planar_functions.h +++ b/include/libyuv/planar_functions.h @@ -209,18 +209,6 @@ int RGBColorMatrix(uint8* dst_argb, int dst_stride_argb, const int8* matrix_rgb, int x, int y, int width, int height); -#ifdef __cplusplus -} // extern "C" -// Deprecated. Temporary API mapper. -int inline ARGBColorMatrix(uint8* dst_argb, int dst_stride_argb, - const int8* matrix_rgb, - int x, int y, int width, int height) { - return RGBColorMatrix(dst_argb, dst_stride_argb, matrix_rgb, - x, y, width, height); -} -extern "C" { -#endif - // Apply a color table each ARGB pixel. // Table contains 256 ARGB values. LIBYUV_API @@ -273,6 +261,12 @@ int ARGBCopy(const uint8* src_argb, int src_stride_argb, uint8* dst_argb, int dst_stride_argb, int width, int height); +// Copy ARGB to ARGB. +LIBYUV_API +int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb, + uint8* dst_argb, int dst_stride_argb, + int width, int height); + typedef void (*ARGBBlendRow)(const uint8* src_argb0, const uint8* src_argb1, uint8* dst_argb, int width); diff --git a/include/libyuv/row.h b/include/libyuv/row.h index 7ddd501be..005e499e5 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -167,6 +167,8 @@ extern "C" { // Effects: // TODO(fbarchard): Optimize and enable // #define HAS_ARGBLUMACOLORTABLEROW_SSSE3 +// TODO(fbarchard): Optimize and enable +// #define HAS_ARGBCOPYALPHAROW_SSE2 // Caveat: Visual C 2012 required for AVX2. #if _MSC_VER >= 1700 @@ -697,6 +699,9 @@ void CopyRow_NEON(const uint8* src, uint8* dst, int count); void CopyRow_MIPS(const uint8* src, uint8* dst, int count); void CopyRow_C(const uint8* src, uint8* dst, int count); +void ARGBCopyAlphaRow_C(const uint8* src, uint8* dst, int width); +void ARGBCopyAlphaRow_SSE2(const uint8* src, uint8* dst, int width); + void SetRow_X86(uint8* dst, uint32 v32, int count); void ARGBSetRows_X86(uint8* dst, uint32 v32, int width, int dst_stride, int height); diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 757a7c5eb..62eae0d35 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 809 +#define LIBYUV_VERSION 810 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/planar_functions.cc b/source/planar_functions.cc index b8b8be425..d151c2a5c 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -2162,6 +2162,44 @@ int ARGBLumaColorTable(const uint8* src_argb, int src_stride_argb, return 0; } +// Copy ARGB with optional flipping +LIBYUV_API +int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb, + uint8* dst_argb, int dst_stride_argb, + int width, int height) { + // TODO(fbarchard): Consider macro for boiler plate checks, invert and/or + // row coalesce. + if (!src_argb || !dst_argb || + width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_argb = src_argb + (height - 1) * src_stride_argb; + src_stride_argb = -src_stride_argb; + } + // Coalesce contiguous rows. + if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) { + return ARGBCopyAlpha(src_argb, 0, + dst_argb, 0, + width * height, 1); + } + void (*ARGBCopyAlphaRow)(const uint8* src_argb, uint8* dst_argb, int width) = + ARGBCopyAlphaRow_C; +#if defined(HAS_ARGBCOPYALPHAROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 8)) { + ARGBCopyAlphaRow = ARGBCopyAlphaRow_SSE2; + } +#endif + for (int y = 0; y < height; ++y) { + ARGBCopyAlphaRow(src_argb, dst_argb, width); + src_argb += src_stride_argb; + dst_argb += dst_stride_argb; + } + return 0; +} + #ifdef __cplusplus } // extern "C" } // namespace libyuv diff --git a/source/row_common.cc b/source/row_common.cc index f412336e5..5c6817a13 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -2099,6 +2099,18 @@ void ARGBLumaColorTableRow_C(const uint8* src_argb, } } +void ARGBCopyAlphaRow_C(const uint8* src, uint8* dst, int width) { + for (int i = 0; i < width - 1; i += 2) { + dst[3] = src[3]; + dst[7] = src[7]; + dst += 8; + src += 8; + } + if (width & 1) { + dst[3] = src[3]; + } +} + #undef clamp0 #undef clamp255 diff --git a/source/row_win.cc b/source/row_win.cc index 4a554a828..1632732ea 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -3603,6 +3603,37 @@ void CopyRow_X86(const uint8* src, uint8* dst, int count) { } #endif // HAS_COPYROW_X86 + +#ifdef HAS_ARGBCOPYALPHAROW_SSE2 +// width in pixels +__declspec(naked) __declspec(align(16)) +void ARGBCopyAlphaRow_SSE2(const uint8* src, uint8* dst, int width) { + __asm { + mov edx, edi + mov eax, [esp + 4] // src + mov edi, [esp + 8] // dst + mov ecx, [esp + 12] // count + pcmpeqb xmm5, xmm5 // generate mask 0xff000000 + pslld xmm5, 24 + + align 16 + convertloop: + movdqa xmm0, [eax] + movdqa xmm1, [eax + 16] + lea eax, [eax + 32] + maskmovdqu xmm0, xmm5 + lea edi, [edi + 16] + maskmovdqu xmm1, xmm5 + lea edi, [edi + 16] + sub ecx, 8 + jg convertloop + + mov edi, edx + ret + } +} +#endif // HAS_ARGBCOPYALPHAROW_SSE2 + #ifdef HAS_SETROW_X86 // SetRow8 writes 'count' bytes using a 32 bit value repeated. __declspec(naked) __declspec(align(16)) diff --git a/unit_test/planar_test.cc b/unit_test/planar_test.cc index 009690e07..9c646909a 100644 --- a/unit_test/planar_test.cc +++ b/unit_test/planar_test.cc @@ -1906,5 +1906,34 @@ TEST_F(libyuvTest, TestARGBLumaColorTable) { } } +TEST_F(libyuvTest, TestARGBCopyAlpha) { + const int kSize = benchmark_width_ * benchmark_height_ * 4; + align_buffer_64(orig_pixels, kSize); + align_buffer_64(dst_pixels_opt, kSize); + align_buffer_64(dst_pixels_c, kSize); + + MemRandomize(orig_pixels, kSize); + MemRandomize(dst_pixels_opt, kSize); + memcpy(dst_pixels_c, dst_pixels_opt, kSize); + + MaskCpuFlags(0); + ARGBCopyAlpha(orig_pixels, benchmark_width_ * 4, + dst_pixels_c, benchmark_width_ * 4, + benchmark_width_, benchmark_height_); + MaskCpuFlags(-1); + + for (int i = 0; i < benchmark_iterations_; ++i) { + ARGBCopyAlpha(orig_pixels, benchmark_width_ * 4, + dst_pixels_opt, benchmark_width_ * 4, + benchmark_width_, benchmark_height_); + } + for (int i = 0; i < kSize; ++i) { + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + } + + free_aligned_buffer_64(dst_pixels_c) + free_aligned_buffer_64(dst_pixels_opt) + free_aligned_buffer_64(orig_pixels) +} } // namespace libyuv