From adef267edfb3539cd773692d6fa4050ffd092f55 Mon Sep 17 00:00:00 2001 From: "fbarchard@google.com" Date: Thu, 17 Oct 2013 07:32:16 +0000 Subject: [PATCH] CopyYToAlpha to copy from a plane to alpha channel of ARGB BUG=275 TESTED=untested R=ryanpetrie@google.com Review URL: https://webrtc-codereview.appspot.com/2415004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@814 16f28f9a-4ce2-e073-06de-1de4eb20be90 --- include/libyuv/planar_functions.h | 6 +++ include/libyuv/row.h | 12 +++-- source/planar_functions.cc | 54 ++++++++++++++++--- source/row_common.cc | 13 ++++- source/row_win.cc | 88 +++++++++++++++++++++++++++---- unit_test/planar_test.cc | 30 +++++++++++ 6 files changed, 182 insertions(+), 21 deletions(-) diff --git a/include/libyuv/planar_functions.h b/include/libyuv/planar_functions.h index e30c99ced..7acf19b4a 100644 --- a/include/libyuv/planar_functions.h +++ b/include/libyuv/planar_functions.h @@ -267,6 +267,12 @@ int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb, uint8* dst_argb, int dst_stride_argb, int width, int height); +// Copy ARGB to ARGB. +LIBYUV_API +int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y, + uint8* dst_argb, int dst_stride_argb, + int width, int height); + typedef void (*ARGBBlendRow)(const uint8* src_argb0, const uint8* src_argb1, uint8* dst_argb, int width); diff --git a/include/libyuv/row.h b/include/libyuv/row.h index 69a7076c0..d2235fc43 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -168,6 +168,7 @@ extern "C" { // TODO(fbarchard): Optimize and enable // #define HAS_ARGBLUMACOLORTABLEROW_SSSE3 #define HAS_ARGBCOPYALPHAROW_SSE2 +#define HAS_ARGBCOPYYTOALPHAROW_SSE2 // Caveat: Visual C 2012 required for AVX2. #if _MSC_VER >= 1700 @@ -187,6 +188,7 @@ extern "C" { #define HAS_YUY2TOUVROW_AVX2 #define HAS_YUY2TOYROW_AVX2 #define HAS_ARGBCOPYALPHAROW_AVX2 +#define HAS_ARGBCOPYYTOALPHAROW_AVX2 // Effects: #define HAS_ARGBADDROW_AVX2 @@ -699,9 +701,13 @@ void CopyRow_NEON(const uint8* src, uint8* dst, int count); void CopyRow_MIPS(const uint8* src, uint8* dst, int count); void CopyRow_C(const uint8* src, uint8* dst, int count); -void ARGBCopyAlphaRow_C(const uint8* src, uint8* dst, int width); -void ARGBCopyAlphaRow_SSE2(const uint8* src, uint8* dst, int width); -void ARGBCopyAlphaRow_AVX2(const uint8* src, uint8* dst, int width); +void ARGBCopyAlphaRow_C(const uint8* src_argb, uint8* dst_argb, int width); +void ARGBCopyAlphaRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width); +void ARGBCopyAlphaRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width); + +void ARGBCopyYToAlphaRow_C(const uint8* src_y, uint8* dst_argb, int width); +void ARGBCopyYToAlphaRow_SSE2(const uint8* src_y, uint8* dst_argb, int width); +void ARGBCopyYToAlphaRow_AVX2(const uint8* src_y, uint8* dst_argb, int width); void SetRow_X86(uint8* dst, uint32 v32, int count); void ARGBSetRows_X86(uint8* dst, uint32 v32, int width, diff --git a/source/planar_functions.cc b/source/planar_functions.cc index 243a87f59..5daa5c627 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -2162,15 +2162,14 @@ int ARGBLumaColorTable(const uint8* src_argb, int src_stride_argb, return 0; } -// Copy ARGB with optional flipping +// Copy Alpha from one ARGB image to another. LIBYUV_API int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb, uint8* dst_argb, int dst_stride_argb, int width, int height) { // TODO(fbarchard): Consider macro for boiler plate checks, invert and/or // row coalesce. - if (!src_argb || !dst_argb || - width <= 0 || height == 0) { + if (!src_argb || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -2181,9 +2180,9 @@ int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb, } // Coalesce contiguous rows. if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) { - return ARGBCopyAlpha(src_argb, 0, - dst_argb, 0, - width * height, 1); + width *= height; + height = 1; + src_stride_argb = dst_stride_argb = 0; } void (*ARGBCopyAlphaRow)(const uint8* src_argb, uint8* dst_argb, int width) = ARGBCopyAlphaRow_C; @@ -2208,6 +2207,49 @@ int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb, return 0; } +// Copy a planar Y channel to the alpha channel of a destination ARGB image. +LIBYUV_API +int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y, + uint8* dst_argb, int dst_stride_argb, + int width, int height) { + if (!src_y || !dst_argb || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_y = src_y + (height - 1) * src_stride_y; + src_stride_y = -src_stride_y; + } + // Coalesce contiguous rows. + if (src_stride_y == width && dst_stride_argb == width * 4) { + width *= height; + height = 1; + src_stride_y = dst_stride_argb = 0; + } + void (*ARGBCopyYToAlphaRow)(const uint8* src_y, uint8* dst_argb, int width) = + ARGBCopyYToAlphaRow_C; +#if defined(HAS_ARGBCOPYYTOALPHAROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2) && + IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) && + IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16) && + IS_ALIGNED(width, 8)) { + ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_SSE2; + } +#endif +#if defined(HAS_ARGBCOPYYTOALPHAROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 16)) { + ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_AVX2; + } +#endif + for (int y = 0; y < height; ++y) { + ARGBCopyYToAlphaRow(src_y, dst_argb, width); + src_y += src_stride_y; + dst_argb += dst_stride_argb; + } + return 0; +} + #ifdef __cplusplus } // extern "C" } // namespace libyuv diff --git a/source/row_common.cc b/source/row_common.cc index 5c6817a13..e5e5f8a2c 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -2111,8 +2111,17 @@ void ARGBCopyAlphaRow_C(const uint8* src, uint8* dst, int width) { } } -#undef clamp0 -#undef clamp255 +void ARGBCopyYToAlphaRow_C(const uint8* src, uint8* dst, int width) { + for (int i = 0; i < width - 1; i += 2) { + dst[3] = src[0]; + dst[7] = src[1]; + dst += 8; + src += 2; + } + if (width & 1) { + dst[3] = src[0]; + } +} #ifdef __cplusplus } // extern "C" diff --git a/source/row_win.cc b/source/row_win.cc index dd2152f34..20636ffcd 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -3649,20 +3649,17 @@ void ARGBCopyAlphaRow_AVX2(const uint8* src, uint8* dst, int width) { mov edx, [esp + 8] // dst mov ecx, [esp + 12] // count vpcmpeqb ymm0, ymm0, ymm0 - vpsrld ymm1, ymm0, 8 // generate mask 0x00ffffff - vpslld ymm0, ymm0, 24 // generate mask 0xff000000 + vpsrld ymm0, ymm0, 8 // generate mask 0x00ffffff align 4 convertloop: - vpand ymm2, ymm0, [eax] - vpand ymm3, ymm0, [eax + 32] + vmovdqu ymm1, [eax] + vmovdqu ymm2, [eax + 32] lea eax, [eax + 64] - vpand ymm4, ymm1, [edx] - vpand ymm5, ymm1, [edx + 32] - vpor ymm2, ymm2, ymm4 - vpor ymm3, ymm3, ymm5 - vmovdqu [edx], ymm2 - vmovdqu [edx + 32], ymm3 + vpblendvb ymm1, ymm1, [edx], ymm0 + vpblendvb ymm2, ymm2, [edx + 32], ymm0 + vmovdqu [edx], ymm1 + vmovdqu [edx + 32], ymm2 lea edx, [edx + 64] sub ecx, 16 jg convertloop @@ -3673,6 +3670,77 @@ void ARGBCopyAlphaRow_AVX2(const uint8* src, uint8* dst, int width) { } #endif // HAS_ARGBCOPYALPHAROW_AVX2 +#ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2 +// width in pixels +__declspec(naked) __declspec(align(16)) +void ARGBCopyYToAlphaRow_SSE2(const uint8* src, uint8* dst, int width) { + __asm { + mov eax, [esp + 4] // src + mov edx, [esp + 8] // dst + mov ecx, [esp + 12] // count + pcmpeqb xmm0, xmm0 // generate mask 0xff000000 + pslld xmm0, 24 + pcmpeqb xmm1, xmm1 // generate mask 0x00ffffff + psrld xmm1, 8 + + align 4 + convertloop: + movq xmm2, qword ptr [eax] // 8 Y's + lea eax, [eax + 8] + punpcklbw xmm2, xmm2 + punpckhwd xmm3, xmm2 + punpcklwd xmm2, xmm2 + movdqa xmm4, [edx] + movdqa xmm5, [edx + 16] + pand xmm2, xmm0 + pand xmm3, xmm0 + pand xmm4, xmm1 + pand xmm5, xmm1 + por xmm2, xmm4 + por xmm3, xmm5 + movdqa [edx], xmm2 + movdqa [edx + 16], xmm3 + lea edx, [edx + 32] + sub ecx, 8 + jg convertloop + + ret + } +} +#endif // HAS_ARGBCOPYYTOALPHAROW_SSE2 + +#ifdef HAS_ARGBCOPYYTOALPHAROW_AVX2 +// width in pixels +__declspec(naked) __declspec(align(16)) +void ARGBCopyYToAlphaRow_AVX2(const uint8* src, uint8* dst, int width) { + __asm { + mov eax, [esp + 4] // src + mov edx, [esp + 8] // dst + mov ecx, [esp + 12] // count + vpcmpeqb ymm0, ymm0, ymm0 + vpsrld ymm0, ymm0, 8 // generate mask 0x00ffffff + + align 4 + convertloop: + vpmovzxbd ymm1, qword ptr [eax] + vpmovzxbd ymm2, qword ptr [eax + 8] + lea eax, [eax + 16] + vpslld ymm1, ymm1, 24 + vpslld ymm2, ymm2, 24 + vpblendvb ymm1, ymm1, [edx], ymm0 + vpblendvb ymm2, ymm2, [edx + 32], ymm0 + vmovdqu [edx], ymm1 + vmovdqu [edx + 32], ymm2 + lea edx, [edx + 64] + sub ecx, 16 + jg convertloop + + vzeroupper + ret + } +} +#endif // HAS_ARGBCOPYYTOALPHAROW_AVX2 + #ifdef HAS_SETROW_X86 // SetRow8 writes 'count' bytes using a 32 bit value repeated. __declspec(naked) __declspec(align(16)) diff --git a/unit_test/planar_test.cc b/unit_test/planar_test.cc index 9c646909a..be6c42970 100644 --- a/unit_test/planar_test.cc +++ b/unit_test/planar_test.cc @@ -1936,4 +1936,34 @@ TEST_F(libyuvTest, TestARGBCopyAlpha) { free_aligned_buffer_64(orig_pixels) } +TEST_F(libyuvTest, TestARGBCopyYToAlpha) { + const int kPixels = benchmark_width_ * benchmark_height_; + align_buffer_64(orig_pixels, kPixels); + align_buffer_64(dst_pixels_opt, kPixels * 4); + align_buffer_64(dst_pixels_c, kPixels * 4); + + MemRandomize(orig_pixels, kPixels); + MemRandomize(dst_pixels_opt, kPixels * 4); + memcpy(dst_pixels_c, dst_pixels_opt, kPixels * 4); + + MaskCpuFlags(0); + ARGBCopyYToAlpha(orig_pixels, benchmark_width_, + dst_pixels_c, benchmark_width_ * 4, + benchmark_width_, benchmark_height_); + MaskCpuFlags(-1); + + for (int i = 0; i < benchmark_iterations_; ++i) { + ARGBCopyYToAlpha(orig_pixels, benchmark_width_, + dst_pixels_opt, benchmark_width_ * 4, + benchmark_width_, benchmark_height_); + } + for (int i = 0; i < kPixels * 4; ++i) { + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + } + + free_aligned_buffer_64(dst_pixels_c) + free_aligned_buffer_64(dst_pixels_opt) + free_aligned_buffer_64(orig_pixels) +} + } // namespace libyuv