From eeac2903ef22110d475c50ef9bfd7826d3183a5e Mon Sep 17 00:00:00 2001 From: "fbarchard@google.com" Date: Wed, 18 Jul 2012 18:54:32 +0000 Subject: [PATCH] ARGBGrayTo takes source and destination pointers so caller does not need to make a copy of the image. BUG=none TEST=none Review URL: https://webrtc-codereview.appspot.com/696004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@308 16f28f9a-4ce2-e073-06de-1de4eb20be90 --- include/libyuv/planar_functions.h | 5 +++ source/planar_functions.cc | 39 +++++++++++++++++++--- source/row.h | 8 ++--- source/row_common.cc | 6 ++-- source/row_posix.cc | 26 ++++++++------- source/row_win.cc | 20 ++++++----- unit_test/planar_test.cc | 55 +++++++++++++++++++++++++++++++ 7 files changed, 128 insertions(+), 31 deletions(-) diff --git a/include/libyuv/planar_functions.h b/include/libyuv/planar_functions.h index 9d5c4c779..ab5e86c18 100644 --- a/include/libyuv/planar_functions.h +++ b/include/libyuv/planar_functions.h @@ -116,6 +116,11 @@ int I420Rect(uint8* dst_y, int dst_stride_y, int ARGBRect(uint8* dst_argb, int dst_stride_argb, int x, int y, int width, int height, uint32 value); +// Convert ARGB to gray scale ARGB. +int ARGBGrayTo(const uint8* src_argb, int src_stride_argb, + uint8* dst_argb, int dst_stride_argb, + int width, int height); + // Make a rectangle of ARGB gray scale. int ARGBGray(uint8* dst_argb, int dst_stride_argb, int x, int y, int width, int height); diff --git a/source/planar_functions.cc b/source/planar_functions.cc index 69cf2c410..fd5ac81f9 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -802,7 +802,7 @@ int ARGBAttenuate(const uint8* src_argb, int src_stride_argb, ARGBAttenuateRow = ARGBAttenuateRow_SSE2; } #endif -#if defined(HAS_ARGBATTENUATE_SSSE3) +#if defined(HAS_ARGBATTENUATEROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4) && IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) && IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { @@ -832,7 +832,7 @@ int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb, } void (*ARGBUnattenuateRow)(const uint8* src_argb, uint8* dst_argb, int width) = ARGBUnattenuateRow_C; -#if defined(HAS_ARGBUNATTENUATE_SSE2) +#if defined(HAS_ARGBUNATTENUATEROW_SSE2) if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4) && IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) && IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { @@ -848,6 +848,36 @@ int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb, return 0; } +// Convert ARGB to Grayed ARGB. +int ARGBGrayTo(const uint8* src_argb, int src_stride_argb, + uint8* dst_argb, int dst_stride_argb, + int width, int height) { + if (!src_argb || !dst_argb || width <= 0 || height == 0) { + return -1; + } + if (height < 0) { + height = -height; + src_argb = src_argb + (height - 1) * src_stride_argb; + src_stride_argb = -src_stride_argb; + } + void (*ARGBGrayRow)(const uint8* src_argb, uint8* dst_argb, + int width) = ARGBGrayRow_C; +#if defined(HAS_ARGBGRAYROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) && + IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) && + IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { + ARGBGrayRow = ARGBGrayRow_SSSE3; + } +#endif + + for (int y = 0; y < height; ++y) { + ARGBGrayRow(src_argb, dst_argb, width); + src_argb += src_stride_argb; + dst_argb += dst_stride_argb; + } + return 0; +} + // Make a rectangle of ARGB gray scale. int ARGBGray(uint8* dst_argb, int dst_stride_argb, int dst_x, int dst_y, @@ -855,7 +885,8 @@ int ARGBGray(uint8* dst_argb, int dst_stride_argb, if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) { return -1; } - void (*ARGBGrayRow)(uint8* dst_argb, int width) = ARGBGrayRow_C; + void (*ARGBGrayRow)(const uint8* src_argb, uint8* dst_argb, + int width) = ARGBGrayRow_C; #if defined(HAS_ARGBGRAYROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) && IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { @@ -864,7 +895,7 @@ int ARGBGray(uint8* dst_argb, int dst_stride_argb, #endif uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; for (int y = 0; y < height; ++y) { - ARGBGrayRow(dst, width); + ARGBGrayRow(dst, dst, width); dst += dst_stride_argb; } return 0; diff --git a/source/row.h b/source/row.h index 1e25bcb7a..b67b94331 100644 --- a/source/row.h +++ b/source/row.h @@ -39,7 +39,7 @@ extern "C" { #define HAS_ABGRTOYROW_SSSE3 #define HAS_ARGB1555TOARGBROW_SSE2 #define HAS_ARGB4444TOARGBROW_SSE2 -#define HAS_ARGBATTENUATE_SSSE3 +#define HAS_ARGBATTENUATEROW_SSSE3 #define HAS_ARGBBLENDROW_SSSE3 #define HAS_ARGBTOARGB1555ROW_SSE2 #define HAS_ARGBTOARGB4444ROW_SSE2 @@ -48,7 +48,7 @@ extern "C" { #define HAS_ARGBTORGB565ROW_SSE2 #define HAS_ARGBTOUVROW_SSSE3 #define HAS_ARGBTOYROW_SSSE3 -#define HAS_ARGBUNATTENUATE_SSE2 +#define HAS_ARGBUNATTENUATEROW_SSE2 #define HAS_BGRATOARGBROW_SSSE3 #define HAS_BGRATOUVROW_SSSE3 #define HAS_BGRATOYROW_SSSE3 @@ -489,8 +489,8 @@ extern uint32 fixed_invtbl8[256]; void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width); void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBGrayRow_C(uint8* dst_argb, int width); -void ARGBGrayRow_SSSE3(uint8* dst_argb, int width); +void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width); +void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width); void ARGBSepiaRow_C(uint8* dst_argb, int width); void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width); diff --git a/source/row_common.cc b/source/row_common.cc index 7a431e732..9592555ea 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -279,11 +279,13 @@ static __inline int RGBToGray(uint8 r, uint8 g, uint8 b) { return (( 76 * r + 152 * g + 28 * b) >> 8); } -void ARGBGrayRow_C(uint8* dst_argb, int width) { +void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width) { for (int x = 0; x < width; ++x) { - uint8 y = RGBToGray(dst_argb[2], dst_argb[1], dst_argb[0]); + uint8 y = RGBToGray(src_argb[2], src_argb[1], src_argb[0]); dst_argb[2] = dst_argb[1] = dst_argb[0] = y; + dst_argb[3] = src_argb[3]; dst_argb += 4; + src_argb += 4; } } diff --git a/source/row_posix.cc b/source/row_posix.cc index 0041dcece..c46df674a 100644 --- a/source/row_posix.cc +++ b/source/row_posix.cc @@ -2656,7 +2656,7 @@ void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) { } #endif // HAS_ARGBATTENUATE_SSE2 -#ifdef HAS_ARGBATTENUATE_SSSE3 +#ifdef HAS_ARGBATTENUATEROW_SSSE3 // Shuffle table duplicating alpha CONST uvec8 kShuffleAlpha0 = { 3u, 3u, 3u, 3u, 3u, 3u, 128u, 128u, 7u, 7u, 7u, 7u, 7u, 7u, 128u, 128u, @@ -2709,9 +2709,9 @@ void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) { #endif ); } -#endif // HAS_ARGBATTENUATE_SSSE3 +#endif // HAS_ARGBATTENUATEROW_SSSE3 -#ifdef HAS_ARGBUNATTENUATE_SSE2 +#ifdef HAS_ARGBUNATTENUATEROW_SSE2 // Unattenuate 4 pixels at a time. // aligned to 16 bytes void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, @@ -2764,7 +2764,7 @@ void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, #endif ); } -#endif // HAS_ARGBUNATTENUATE_SSE2 +#endif // HAS_ARGBUNATTENUATEROW_SSE2 #ifdef HAS_ARGBGRAYROW_SSSE3 // Constant for ARGB color to gray scale. 0.11 * B + 0.59 * G + 0.30 * R @@ -2773,9 +2773,10 @@ CONST vec8 kARGBToGray = { }; // Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels -void ARGBGrayRow_SSSE3(uint8* dst_argb, int width) { +void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) { asm volatile ( - "movdqa %2,%%xmm4 \n" + "movdqa %3,%%xmm4 \n" + "sub %0,%1 \n" // 8 pixel loop. ".p2align 4 \n" @@ -2799,14 +2800,15 @@ void ARGBGrayRow_SSSE3(uint8* dst_argb, int width) { "movdqa %%xmm0,%%xmm1 \n" "punpcklwd %%xmm3,%%xmm0 \n" "punpckhwd %%xmm3,%%xmm1 \n" - "sub $0x8,%1 \n" - "movdqa %%xmm0,(%0) \n" - "movdqa %%xmm1,0x10(%0) \n" + "sub $0x8,%2 \n" + "movdqa %%xmm0,(%0,%1,1) \n" + "movdqa %%xmm1,0x10(%0,%1,1) \n" "lea 0x20(%0),%0 \n" "jg 1b \n" - : "+r"(dst_argb), // %0 - "+r"(width) // %1 - : "m"(kARGBToGray) // %2 + : "+r"(src_argb), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : "m"(kARGBToGray) // %3 : "memory", "cc" #if defined(__SSE2__) , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4" diff --git a/source/row_win.cc b/source/row_win.cc index 795245204..9135b8071 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -2741,7 +2741,7 @@ void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) { } #endif // HAS_ARGBATTENUATE_SSE2 -#ifdef HAS_ARGBATTENUATE_SSSE3 +#ifdef HAS_ARGBATTENUATEROW_SSSE3 // Shuffle table duplicating alpha. static const uvec8 kShuffleAlpha0 = { 3u, 3u, 3u, 3u, 3u, 3u, 128u, 128u, 7u, 7u, 7u, 7u, 7u, 7u, 128u, 128u, @@ -2788,9 +2788,9 @@ void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) { ret } } -#endif // HAS_ARGBATTENUATE_SSSE3 +#endif // HAS_ARGBATTENUATEROW_SSSE3 -#ifdef HAS_ARGBUNATTENUATE_SSE2 +#ifdef HAS_ARGBUNATTENUATEROW_SSE2 // Unattenuate 4 pixels at a time. // Aligned to 16 bytes. __declspec(naked) __declspec(align(16)) @@ -2843,7 +2843,7 @@ void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, ret } } -#endif // HAS_ARGBUNATTENUATE_SSE2 +#endif // HAS_ARGBUNATTENUATEROW_SSE2 #ifdef HAS_ARGBGRAYROW_SSSE3 // Constant for ARGB color to gray scale: 0.11 * B + 0.59 * G + 0.30 * R @@ -2853,11 +2853,13 @@ static const vec8 kARGBToGray = { // Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels. __declspec(naked) __declspec(align(16)) -void ARGBGrayRow_SSSE3(uint8* dst_argb, int width) { +void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) { __asm { - mov eax, [esp + 4] /* dst_argb */ - mov ecx, [esp + 8] /* width */ + mov eax, [esp + 4] /* src_argb */ + mov edx, [esp + 8] /* dst_argb */ + mov ecx, [esp + 12] /* width */ movdqa xmm4, kARGBToGray + sub edx, eax align 16 convertloop: @@ -2881,8 +2883,8 @@ void ARGBGrayRow_SSSE3(uint8* dst_argb, int width) { punpcklwd xmm0, xmm3 // GGGA first 4 punpckhwd xmm1, xmm3 // GGGA next 4 sub ecx, 8 - movdqa [eax], xmm0 - movdqa [eax + 16], xmm1 + movdqa [eax + edx], xmm0 + movdqa [eax + edx + 16], xmm1 lea eax, [eax + 32] jg convertloop ret diff --git a/unit_test/planar_test.cc b/unit_test/planar_test.cc index 65757c475..4b308f5cf 100644 --- a/unit_test/planar_test.cc +++ b/unit_test/planar_test.cc @@ -486,6 +486,61 @@ TEST_F(libyuvTest, TestARGBGray) { } } +TEST_F(libyuvTest, TestARGBGrayTo) { + SIMD_ALIGNED(uint8 orig_pixels[256][4]); + SIMD_ALIGNED(uint8 gray_pixels[256][4]); + + // Test blue + orig_pixels[0][0] = 255u; + orig_pixels[0][1] = 0u; + orig_pixels[0][2] = 0u; + orig_pixels[0][3] = 128u; + // Test green + orig_pixels[1][0] = 0u; + orig_pixels[1][1] = 255u; + orig_pixels[1][2] = 0u; + orig_pixels[1][3] = 0u; + // Test red + orig_pixels[2][0] = 0u; + orig_pixels[2][1] = 0u; + orig_pixels[2][2] = 255u; + orig_pixels[2][3] = 255u; + // Test color + orig_pixels[3][0] = 16u; + orig_pixels[3][1] = 64u; + orig_pixels[3][2] = 192u; + orig_pixels[3][3] = 224u; + // Do 16 to test asm version. + ARGBGrayTo(&orig_pixels[0][0], 0, &gray_pixels[0][0], 0, 16, 1); + EXPECT_EQ(27u, gray_pixels[0][0]); + EXPECT_EQ(27u, gray_pixels[0][1]); + EXPECT_EQ(27u, gray_pixels[0][2]); + EXPECT_EQ(128u, gray_pixels[0][3]); + EXPECT_EQ(151u, gray_pixels[1][0]); + EXPECT_EQ(151u, gray_pixels[1][1]); + EXPECT_EQ(151u, gray_pixels[1][2]); + EXPECT_EQ(0u, gray_pixels[1][3]); + EXPECT_EQ(75u, gray_pixels[2][0]); + EXPECT_EQ(75u, gray_pixels[2][1]); + EXPECT_EQ(75u, gray_pixels[2][2]); + EXPECT_EQ(255u, gray_pixels[2][3]); + EXPECT_EQ(96u, gray_pixels[3][0]); + EXPECT_EQ(96u, gray_pixels[3][1]); + EXPECT_EQ(96u, gray_pixels[3][2]); + EXPECT_EQ(224u, gray_pixels[3][3]); + + for (int i = 0; i < 256; ++i) { + orig_pixels[i][0] = i; + orig_pixels[i][1] = i / 2; + orig_pixels[i][2] = i / 3; + orig_pixels[i][3] = i; + } + + for (int i = 0; i < 1000 * 1280 * 720 / 256; ++i) { + ARGBGrayTo(&orig_pixels[0][0], 0, &gray_pixels[0][0], 0, 256, 1); + } +} + TEST_F(libyuvTest, TestARGBSepia) { SIMD_ALIGNED(uint8 orig_pixels[256][4]);