From b8eabfea6487a4be3c1497a7ba7c9e2ab2f5f46d Mon Sep 17 00:00:00 2001 From: "fbarchard@google.com" Date: Fri, 14 Sep 2012 06:59:31 +0000 Subject: [PATCH] RGBAToARGB conversion. SSSE3 optimized. BUG=78 TEST=RGBA unittests Review URL: https://webrtc-codereview.appspot.com/788008 git-svn-id: http://libyuv.googlecode.com/svn/trunk@351 16f28f9a-4ce2-e073-06de-1de4eb20be90 --- README.chromium | 2 +- include/libyuv/convert_argb.h | 5 +++++ include/libyuv/version.h | 2 +- include/libyuv/video_common.h | 11 ++++++---- source/convert_argb.cc | 39 +++++++++++++++++++++++++++++++++++ source/row.h | 3 +++ source/row_common.cc | 16 ++++++++++++++ source/row_posix.cc | 29 ++++++++++++++++++++++++++ source/row_win.cc | 26 +++++++++++++++++++++++ unit_test/planar_test.cc | 2 ++ 10 files changed, 129 insertions(+), 6 deletions(-) diff --git a/README.chromium b/README.chromium index 3080bb1ed..7883f4c9e 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 350 +Version: 351 License: BSD License File: LICENSE diff --git a/include/libyuv/convert_argb.h b/include/libyuv/convert_argb.h index 47ca947c9..cb41e111c 100644 --- a/include/libyuv/convert_argb.h +++ b/include/libyuv/convert_argb.h @@ -122,6 +122,11 @@ int ABGRToARGB(const uint8* src_frame, int src_stride_frame, uint8* dst_argb, int dst_stride_argb, int width, int height); +// RGBA little endian (abgr in memory) to ARGB +int RGBAToARGB(const uint8* src_frame, int src_stride_frame, + uint8* dst_argb, int dst_stride_argb, + int width, int height); + // Deprecated function name. #define BG24ToARGB RGB24ToARGB diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 159717c86..ca08dc58e 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 350 +#define LIBYUV_VERSION 351 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/include/libyuv/video_common.h b/include/libyuv/video_common.h index 49b753284..a3f23b27b 100644 --- a/include/libyuv/video_common.h +++ b/include/libyuv/video_common.h @@ -36,6 +36,7 @@ extern "C" { // http://v4l2spec.bytesex.org/spec/book1.htm // http://developer.apple.com/quicktime/icefloe/dispatch020.html // http://msdn.microsoft.com/library/windows/desktop/dd206750.aspx#nv12 +// http://people.xiph.org/~xiphmont/containers/nut/nut4cc.txt enum FourCC { // Canonical fourcc codes used in our code. @@ -53,9 +54,10 @@ enum FourCC { FOURCC_Q420 = FOURCC('Q', '4', '2', '0'), FOURCC_V210 = FOURCC('V', '2', '1', '0'), FOURCC_24BG = FOURCC('2', '4', 'B', 'G'), - FOURCC_ABGR = FOURCC('A', 'B', 'G', 'R'), - FOURCC_BGRA = FOURCC('B', 'G', 'R', 'A'), FOURCC_ARGB = FOURCC('A', 'R', 'G', 'B'), + FOURCC_BGRA = FOURCC('B', 'G', 'R', 'A'), + FOURCC_ABGR = FOURCC('A', 'B', 'G', 'R'), + FOURCC_RGBA = FOURCC('R', 'G', 'B', 'A'), FOURCC_RGBP = FOURCC('R', 'G', 'B', 'P'), // bgr565. FOURCC_RGBO = FOURCC('R', 'G', 'B', 'O'), // abgr1555. FOURCC_R444 = FOURCC('R', '4', '4', '4'), // argb4444. @@ -107,9 +109,10 @@ enum FourCCBpp { FOURCC_BPP_Q420 = 12, FOURCC_BPP_V210 = 22, // 22.5 actually FOURCC_BPP_24BG = 24, - FOURCC_BPP_ABGR = 32, - FOURCC_BPP_BGRA = 32, FOURCC_BPP_ARGB = 32, + FOURCC_BPP_BGRA = 32, + FOURCC_BPP_ABGR = 32, + FOURCC_BPP_RGBA = 32, FOURCC_BPP_RGBP = 16, FOURCC_BPP_RGBO = 16, FOURCC_BPP_R444 = 16, diff --git a/source/convert_argb.cc b/source/convert_argb.cc index 8cc3e4981..04f78b9c3 100644 --- a/source/convert_argb.cc +++ b/source/convert_argb.cc @@ -317,6 +317,39 @@ int BGRAToARGB(const uint8* src_bgra, int src_stride_bgra, return 0; } +// Convert RGBA to ARGB. +int RGBAToARGB(const uint8* src_rgba, int src_stride_rgba, + uint8* dst_argb, int dst_stride_argb, + int width, int height) { + if (!src_rgba || !dst_argb || + width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_rgba = src_rgba + (height - 1) * src_stride_rgba; + src_stride_rgba = -src_stride_rgba; + } + void (*RGBAToARGBRow)(const uint8* src_rgba, uint8* dst_argb, int pix) = + RGBAToARGBRow_C; +#if defined(HAS_RGBATOARGBROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3) && + IS_ALIGNED(width, 4) && + IS_ALIGNED(src_rgba, 16) && IS_ALIGNED(src_stride_rgba, 16) && + IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { + RGBAToARGBRow = RGBAToARGBRow_SSSE3; + } +#endif + + for (int y = 0; y < height; ++y) { + RGBAToARGBRow(src_rgba, dst_argb, width); + src_rgba += src_stride_rgba; + dst_argb += dst_stride_argb; + } + return 0; +} + // Convert RAW to ARGB. int RAWToARGB(const uint8* src_raw, int src_stride_raw, uint8* dst_argb, int dst_stride_argb, @@ -1006,6 +1039,12 @@ int ConvertToARGB(const uint8* sample, size_t sample_size, dst_argb, argb_stride, dst_width, inv_dst_height); break; + case FOURCC_RGBA: + src = sample + (src_width * crop_y + crop_x) * 4; + r = RGBAToARGB(src, src_width * 4, + dst_argb, argb_stride, + dst_width, inv_dst_height); + break; case FOURCC_RGBP: src = sample + (src_width * crop_y + crop_x) * 2; r = RGB565ToARGB(src, src_width * 2, diff --git a/source/row.h b/source/row.h index 4999651f4..f1b3df2dd 100644 --- a/source/row.h +++ b/source/row.h @@ -37,6 +37,7 @@ extern "C" { #define HAS_ABGRTOARGBROW_SSSE3 #define HAS_ABGRTOUVROW_SSSE3 #define HAS_ABGRTOYROW_SSSE3 +#define HAS_RGBATOARGBROW_SSSE3 #define HAS_ARGB1555TOARGBROW_SSE2 #define HAS_ARGB4444TOARGBROW_SSE2 #define HAS_ARGBATTENUATEROW_SSSE3 @@ -215,6 +216,7 @@ void ABGRToUVRow_C(const uint8* src_argb0, int src_stride_argb, void ABGRToARGBRow_SSSE3(const uint8* src_abgr, uint8* dst_argb, int pix); void BGRAToARGBRow_SSSE3(const uint8* src_bgra, uint8* dst_argb, int pix); +void RGBAToARGBRow_SSSE3(const uint8* src_abgr, uint8* dst_argb, int pix); void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix); void RAWToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix); void ARGB1555ToARGBRow_SSE2(const uint8* src_argb, uint8* dst_argb, int pix); @@ -223,6 +225,7 @@ void ARGB4444ToARGBRow_SSE2(const uint8* src_argb, uint8* dst_argb, int pix); void ABGRToARGBRow_C(const uint8* src_abgr, uint8* dst_argb, int pix); void BGRAToARGBRow_C(const uint8* src_bgra, uint8* dst_argb, int pix); +void RGBAToARGBRow_C(const uint8* src_abgr, uint8* dst_argb, int pix); void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int pix); void RAWToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int pix); void RGB565ToARGBRow_C(const uint8* src_rgb, uint8* dst_argb, int pix); diff --git a/source/row_common.cc b/source/row_common.cc index cd37c4584..de9add459 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -51,6 +51,22 @@ void BGRAToARGBRow_C(const uint8* src_bgra, uint8* dst_argb, int width) { } } +void RGBAToARGBRow_C(const uint8* src_abgr, uint8* dst_argb, int width) { + for (int x = 0; x < width; ++x) { + // To support in-place conversion. + uint8 a = src_abgr[0]; + uint8 b = src_abgr[1]; + uint8 g = src_abgr[2]; + uint8 r = src_abgr[3]; + dst_argb[0] = b; + dst_argb[1] = g; + dst_argb[2] = r; + dst_argb[3] = a; + dst_argb += 4; + src_abgr += 4; + } +} + void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int width) { for (int x = 0; x < width; ++x) { uint8 b = src_rgb24[0]; diff --git a/source/row_posix.cc b/source/row_posix.cc index 609e7dc7a..97feb2a38 100644 --- a/source/row_posix.cc +++ b/source/row_posix.cc @@ -98,6 +98,11 @@ CONST uvec8 kShuffleMaskBGRAToARGB = { 3u, 2u, 1u, 0u, 7u, 6u, 5u, 4u, 11u, 10u, 9u, 8u, 15u, 14u, 13u, 12u }; +// Shuffle table for converting RGBA to ARGB. +CONST uvec8 kShuffleMaskRGBAToARGB = { + 1u, 2u, 3u, 0u, 5u, 6u, 7u, 4u, 9u, 10u, 11u, 8u, 13u, 14u, 15u, 12u +}; + // Shuffle table for converting ARGB to RGB24. CONST uvec8 kShuffleMaskARGBToRGB24 = { 0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 10u, 12u, 13u, 14u, 128u, 128u, 128u, 128u @@ -185,6 +190,30 @@ void BGRAToARGBRow_SSSE3(const uint8* src_bgra, uint8* dst_argb, int pix) { ); } +void RGBAToARGBRow_SSSE3(const uint8* src_rgba, uint8* dst_argb, int pix) { + asm volatile ( + "movdqa %3,%%xmm5 \n" + "sub %0,%1 \n" + ".p2align 4 \n" + "1: \n" + "movdqa (%0),%%xmm0 \n" + "pshufb %%xmm5,%%xmm0 \n" + "sub $0x4,%2 \n" + "movdqa %%xmm0,(%0,%1,1) \n" + "lea 0x10(%0),%0 \n" + "jg 1b \n" + + : "+r"(src_rgba), // %0 + "+r"(dst_argb), // %1 + "+r"(pix) // %2 + : "m"(kShuffleMaskRGBAToARGB) // %3 + : "memory", "cc" +#if defined(__SSE2__) + , "xmm0", "xmm5" +#endif + ); +} + void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix) { asm volatile ( "pcmpeqb %%xmm5,%%xmm5 \n" // generate mask 0xff000000 diff --git a/source/row_win.cc b/source/row_win.cc index 47a6749d1..5ac8c9cc6 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -88,6 +88,11 @@ static const uvec8 kShuffleMaskBGRAToARGB = { 3u, 2u, 1u, 0u, 7u, 6u, 5u, 4u, 11u, 10u, 9u, 8u, 15u, 14u, 13u, 12u }; +// Shuffle table for converting RGBA to ARGB. +static const uvec8 kShuffleMaskRGBAToARGB = { + 1u, 2u, 3u, 0u, 5u, 6u, 7u, 4u, 9u, 10u, 11u, 8u, 13u, 14u, 15u, 12u +}; + // Shuffle table for converting ARGB to RGB24. static const uvec8 kShuffleMaskARGBToRGB24 = { 0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 10u, 12u, 13u, 14u, 128u, 128u, 128u, 128u @@ -168,6 +173,27 @@ __asm { } } +__declspec(naked) __declspec(align(16)) +void RGBAToARGBRow_SSSE3(const uint8* src_rgba, uint8* dst_argb, int pix) { +__asm { + mov eax, [esp + 4] // src_rgba + mov edx, [esp + 8] // dst_argb + mov ecx, [esp + 12] // pix + movdqa xmm5, kShuffleMaskRGBAToARGB + sub edx, eax + + align 16 + convertloop: + movdqa xmm0, [eax] + pshufb xmm0, xmm5 + sub ecx, 4 + movdqa [eax + edx], xmm0 + lea eax, [eax + 16] + jg convertloop + ret + } +} + __declspec(naked) __declspec(align(16)) void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix) { __asm { diff --git a/unit_test/planar_test.cc b/unit_test/planar_test.cc index 89b9d354f..62e0a4461 100644 --- a/unit_test/planar_test.cc +++ b/unit_test/planar_test.cc @@ -306,6 +306,7 @@ TESTATOB(ARGB, 4, 4, ARGB1555, 2) TESTATOB(ARGB, 4, 4, ARGB4444, 2) TESTATOB(BGRA, 4, 4, ARGB, 4) TESTATOB(ABGR, 4, 4, ARGB, 4) +TESTATOB(RGBA, 4, 4, ARGB, 4) TESTATOB(RAW, 3, 3, ARGB, 4) TESTATOB(RGB24, 3, 3, ARGB, 4) TESTATOB(RGB565, 2, 2, ARGB, 4) @@ -362,6 +363,7 @@ TESTATOBRANDOM(ARGB, 4, 4, ARGB4444, 2) TESTATOBRANDOM(BGRA, 4, 4, ARGB, 4) TESTATOBRANDOM(ABGR, 4, 4, ARGB, 4) +TESTATOBRANDOM(RGBA, 4, 4, ARGB, 4) TESTATOBRANDOM(RAW, 3, 3, ARGB, 4) TESTATOBRANDOM(RGB24, 3, 3, ARGB, 4) TESTATOBRANDOM(RGB565, 2, 2, ARGB, 4)