From 6d6b7709f754391252dc716b92801cc8ec425642 Mon Sep 17 00:00:00 2001 From: "fbarchard@google.com" Date: Mon, 4 Jun 2012 15:29:15 +0000 Subject: [PATCH] Fix RGB565ToARGB_SSE2. Expose rgb conversions. Low levels were already there for i420 to rgb, but rgb to rgb functions were not exposed. Added unittests. BUG=none TEST=planar unittests Review URL: https://webrtc-codereview.appspot.com/632004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@276 16f28f9a-4ce2-e073-06de-1de4eb20be90 --- README.chromium | 2 +- include/libyuv/convert.h | 3 + include/libyuv/planar_functions.h | 28 +++- include/libyuv/version.h | 2 +- source/planar_functions.cc | 212 +++++++++++++++++++++++++----- source/row_common.cc | 84 ++++++++---- source/row_posix.cc | 2 +- source/row_win.cc | 2 +- unit_test/planar_test.cc | 14 +- 9 files changed, 283 insertions(+), 66 deletions(-) diff --git a/README.chromium b/README.chromium index eb2ceabe8..c725da45f 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 275 +Version: 276 License: BSD License File: LICENSE diff --git a/include/libyuv/convert.h b/include/libyuv/convert.h index ee114f079..56b772d8f 100644 --- a/include/libyuv/convert.h +++ b/include/libyuv/convert.h @@ -22,6 +22,9 @@ namespace libyuv { extern "C" { #endif +// Alias +#define I420ToI420 I420Copy + // Copy I420 to I420. int I420Copy(const uint8* src_y, int src_stride_y, const uint8* src_u, int src_stride_u, diff --git a/include/libyuv/planar_functions.h b/include/libyuv/planar_functions.h index 4850dc622..1d8c2abb8 100644 --- a/include/libyuv/planar_functions.h +++ b/include/libyuv/planar_functions.h @@ -104,9 +104,10 @@ int ABGRToARGB(const uint8* src_abgr, int src_stride_abgr, uint8* dst_argb, int dst_stride_argb, int width, int height); -// Palindromes. +// Aliases. #define ARGBToBGRA BGRAToARGB #define ARGBToABGR ABGRToARGB +#define ARGBToARGB ARGBCopy // Convert BGRA to ARGB. Also used for ARGB to BGRA. int BGRAToARGB(const uint8* src_bgra, int src_stride_bgra, @@ -128,6 +129,31 @@ int ARGBToRGB565(const uint8* src_argb, int src_stride_argb, uint8* dst_rgb565, int dst_stride_rgb565, int width, int height); +// Convert ARGB To ARGB1555. +int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb, + uint8* dst_argb1555, int dst_stride_argb1555, + int width, int height); + +// Convert ARGB To ARGB4444. +int ARGBToARGB4444(const uint8* src_argb, int src_stride_argb, + uint8* dst_argb4444, int dst_stride_argb4444, + int width, int height); + +// Convert RGB565 To ARGB. +int RGB565ToARGB(const uint8* src_rgb565, int src_stride_rgb565, + uint8* dst_argb, int dst_stride_argb, + int width, int height); + +// Convert ARGB1555 To ARGB. +int ARGB1555ToARGB(const uint8* src_argb1555, int src_stride_argb1555, + uint8* dst_argb, int dst_stride_argb, + int width, int height); + +// Convert ARGB4444 To ARGB. +int ARGB4444ToARGB(const uint8* src_argb4444, int src_stride_argb4444, + uint8* dst_argb, int dst_stride_argb, + int width, int height); + // Convert ARGB to I400. int ARGBToI400(const uint8* src_argb, int src_stride_argb, uint8* dst_y, int dst_stride_y, diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 5a7c7dabd..542b8309d 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,7 +11,7 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 275 +#define LIBYUV_VERSION 276 #endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/source/planar_functions.cc b/source/planar_functions.cc index 51f928358..9239d89cc 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -338,6 +338,34 @@ int I400ToARGB(const uint8* src_y, int src_stride_y, return 0; } +// Convert ARGB to I400. +int ARGBToI400(const uint8* src_argb, int src_stride_argb, + uint8* dst_y, int dst_stride_y, + int width, int height) { + if (height < 0) { + height = -height; + src_argb = src_argb + (height - 1) * src_stride_argb; + src_stride_argb = -src_stride_argb; + } + void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = + ARGBToYRow_C; +#if defined(HAS_ARGBTOYROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3) && + IS_ALIGNED(width, 4) && + IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) && + IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { + ARGBToYRow = ARGBToYRow_SSSE3; + } +#endif + + for (int y = 0; y < height; ++y) { + ARGBToYRow(src_argb, dst_y, width); + src_argb += src_stride_argb; + dst_y += dst_stride_y; + } + return 0; +} + int ABGRToARGB(const uint8* src_abgr, int src_stride_abgr, uint8* dst_argb, int dst_stride_argb, int width, int height) { @@ -393,34 +421,6 @@ int BGRAToARGB(const uint8* src_bgra, int src_stride_bgra, return 0; } -// Convert ARGB to I400. -int ARGBToI400(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - int width, int height) { - if (height < 0) { - height = -height; - src_argb = src_argb + (height - 1) * src_stride_argb; - src_stride_argb = -src_stride_argb; - } - void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = - ARGBToYRow_C; -#if defined(HAS_ARGBTOYROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && - IS_ALIGNED(width, 4) && - IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) && - IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { - ARGBToYRow = ARGBToYRow_SSSE3; - } -#endif - - for (int y = 0; y < height; ++y) { - ARGBToYRow(src_argb, dst_y, width); - src_argb += src_stride_argb; - dst_y += dst_stride_y; - } - return 0; -} - // Convert RAW to ARGB. int RAWToARGB(const uint8* src_raw, int src_stride_raw, uint8* dst_argb, int dst_stride_argb, @@ -475,6 +475,87 @@ int RGB24ToARGB(const uint8* src_rgb24, int src_stride_rgb24, return 0; } +// Convert RGB565 to ARGB. +int RGB565ToARGB(const uint8* src_rgb565, int src_stride_rgb565, + uint8* dst_argb, int dst_stride_argb, + int width, int height) { + if (height < 0) { + height = -height; + src_rgb565 = src_rgb565 + (height - 1) * src_stride_rgb565; + src_stride_rgb565 = -src_stride_rgb565; + } + void (*RGB565ToARGBRow)(const uint8* src_rgb565, uint8* dst_argb, int pix) = + RGB565ToARGBRow_C; +#if defined(HAS_RGB565TOARGBROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2) && + IS_ALIGNED(width, 8) && + IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { + RGB565ToARGBRow = RGB565ToARGBRow_SSE2; + } +#endif + + for (int y = 0; y < height; ++y) { + RGB565ToARGBRow(src_rgb565, dst_argb, width); + src_rgb565 += src_stride_rgb565; + dst_argb += dst_stride_argb; + } + return 0; +} + +// Convert ARGB1555 to ARGB. +int ARGB1555ToARGB(const uint8* src_argb1555, int src_stride_argb1555, + uint8* dst_argb, int dst_stride_argb, + int width, int height) { + if (height < 0) { + height = -height; + src_argb1555 = src_argb1555 + (height - 1) * src_stride_argb1555; + src_stride_argb1555 = -src_stride_argb1555; + } + void (*ARGB1555ToARGBRow)(const uint8* src_argb1555, uint8* dst_argb, int pix) = + ARGB1555ToARGBRow_C; +#if defined(HAS_ARGB1555TOARGBROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2) && + IS_ALIGNED(width, 8) && + IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { + ARGB1555ToARGBRow = ARGB1555ToARGBRow_SSE2; + } +#endif + + for (int y = 0; y < height; ++y) { + ARGB1555ToARGBRow(src_argb1555, dst_argb, width); + src_argb1555 += src_stride_argb1555; + dst_argb += dst_stride_argb; + } + return 0; +} + +// Convert ARGB4444 to ARGB. +int ARGB4444ToARGB(const uint8* src_argb4444, int src_stride_argb4444, + uint8* dst_argb, int dst_stride_argb, + int width, int height) { + if (height < 0) { + height = -height; + src_argb4444 = src_argb4444 + (height - 1) * src_stride_argb4444; + src_stride_argb4444 = -src_stride_argb4444; + } + void (*ARGB4444ToARGBRow)(const uint8* src_argb4444, uint8* dst_argb, int pix) = + ARGB4444ToARGBRow_C; +#if defined(HAS_ARGB4444TOARGBROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2) && + IS_ALIGNED(width, 8) && + IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { + ARGB4444ToARGBRow = ARGB4444ToARGBRow_SSE2; + } +#endif + + for (int y = 0; y < height; ++y) { + ARGB4444ToARGBRow(src_argb4444, dst_argb, width); + src_argb4444 += src_stride_argb4444; + dst_argb += dst_stride_argb; + } + return 0; +} + // Convert ARGB To RGB24. int ARGBToRGB24(const uint8* src_argb, int src_stride_argb, uint8* dst_rgb24, int dst_stride_rgb24, @@ -550,15 +631,14 @@ int ARGBToRGB565(const uint8* src_argb, int src_stride_argb, } void (*ARGBToRGB565Row)(const uint8* src_argb, uint8* dst_rgb, int pix) = ARGBToRGB565Row_C; -#if defined(HAS_ARGBTORGB565ROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && +#if defined(HAS_ARGBTORGB565ROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) { - if (width * 3 <= kMaxStride) { - ARGBToRGB565Row = ARGBToRGB565Row_Any_SSSE3; + if (width * 2 <= kMaxStride) { + ARGBToRGB565Row = ARGBToRGB565Row_Any_SSE2; } - if (IS_ALIGNED(width, 16) && - IS_ALIGNED(dst_rgb565, 16) && IS_ALIGNED(dst_stride_rgb565, 16)) { - ARGBToRGB565Row = ARGBToRGB565Row_SSSE3; + if (IS_ALIGNED(width, 4)) { + ARGBToRGB565Row = ARGBToRGB565Row_SSE2; } } #endif @@ -571,6 +651,68 @@ int ARGBToRGB565(const uint8* src_argb, int src_stride_argb, return 0; } +// Convert ARGB To ARGB1555. +int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb, + uint8* dst_argb1555, int dst_stride_argb1555, + int width, int height) { + if (height < 0) { + height = -height; + src_argb = src_argb + (height - 1) * src_stride_argb; + src_stride_argb = -src_stride_argb; + } + void (*ARGBToARGB1555Row)(const uint8* src_argb, uint8* dst_rgb, int pix) = + ARGBToARGB1555Row_C; +#if defined(HAS_ARGBTOARGB1555ROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2) && + IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) { + if (width * 2 <= kMaxStride) { + ARGBToARGB1555Row = ARGBToARGB1555Row_Any_SSE2; + } + if (IS_ALIGNED(width, 4)) { + ARGBToARGB1555Row = ARGBToARGB1555Row_SSE2; + } + } +#endif + + for (int y = 0; y < height; ++y) { + ARGBToARGB1555Row(src_argb, dst_argb1555, width); + src_argb += src_stride_argb; + dst_argb1555 += dst_stride_argb1555; + } + return 0; +} + +// Convert ARGB To ARGB4444. +int ARGBToARGB4444(const uint8* src_argb, int src_stride_argb, + uint8* dst_argb4444, int dst_stride_argb4444, + int width, int height) { + if (height < 0) { + height = -height; + src_argb = src_argb + (height - 1) * src_stride_argb; + src_stride_argb = -src_stride_argb; + } + void (*ARGBToARGB4444Row)(const uint8* src_argb, uint8* dst_rgb, int pix) = + ARGBToARGB4444Row_C; +#if defined(HAS_ARGBTOARGB4444ROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2) && + IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) { + if (width * 2 <= kMaxStride) { + ARGBToARGB4444Row = ARGBToARGB4444Row_Any_SSE2; + } + if (IS_ALIGNED(width, 4)) { + ARGBToARGB4444Row = ARGBToARGB4444Row_SSE2; + } + } +#endif + + for (int y = 0; y < height; ++y) { + ARGBToARGB4444Row(src_argb, dst_argb4444, width); + src_argb += src_stride_argb; + dst_argb4444 += dst_stride_argb4444; + } + return 0; +} + // Convert NV12 to ARGB. int NV12ToARGB(const uint8* src_y, int src_stride_y, const uint8* src_uv, int src_stride_uv, diff --git a/source/row_common.cc b/source/row_common.cc index cfb1dc52d..6acfbbdcc 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -151,37 +151,75 @@ void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int width) { // TODO(fbarchard): support big endian CPU void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int width) { - for (int x = 0; x < width; ++x) { - uint8 b = src_argb[0] >> 3; - uint8 g = src_argb[1] >> 2; - uint8 r = src_argb[2] >> 3; - *reinterpret_cast(dst_rgb) = (r << 11) | (g << 5) | b; - dst_rgb += 2; - src_argb += 4; + for (int x = 0; x < width - 1; x += 2) { + uint8 b0 = src_argb[0] >> 3; + uint8 g0 = src_argb[1] >> 2; + uint8 r0 = src_argb[2] >> 3; + uint8 b1 = src_argb[4] >> 3; + uint8 g1 = src_argb[5] >> 2; + uint8 r1 = src_argb[6] >> 3; + *reinterpret_cast(dst_rgb) = b0 | (g0 << 5) | (r0 << 11) | + (b1 << 16) | (g1 << 21) | (r1 << 27); + dst_rgb += 4; + src_argb += 8; + } + if (width & 1) { + uint8 b0 = src_argb[0] >> 3; + uint8 g0 = src_argb[1] >> 2; + uint8 r0 = src_argb[2] >> 3; + *reinterpret_cast(dst_rgb) = b0 | (g0 << 5) | (r0 << 11); } } void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int width) { - for (int x = 0; x < width; ++x) { - uint8 b = src_argb[0] >> 3; - uint8 g = src_argb[1] >> 3; - uint8 r = src_argb[2] >> 3; - uint8 a = src_argb[3] >> 7; - *reinterpret_cast(dst_rgb) = (a << 15) | (r << 10) | (g << 5) | b; - dst_rgb += 2; - src_argb += 4; + for (int x = 0; x < width - 1; x += 2) { + uint8 b0 = src_argb[0] >> 3; + uint8 g0 = src_argb[1] >> 3; + uint8 r0 = src_argb[2] >> 3; + uint8 a0 = src_argb[3] >> 7; + uint8 b1 = src_argb[4] >> 3; + uint8 g1 = src_argb[5] >> 3; + uint8 r1 = src_argb[6] >> 3; + uint8 a1 = src_argb[7] >> 7; + *reinterpret_cast(dst_rgb) = + b0 | (g0 << 5) | (r0 << 10) | (a0 << 15) | + (b1 << 16) | (g1 << 21) | (r1 << 26) | (a1 << 31); + dst_rgb += 4; + src_argb += 8; + } + if (width & 1) { + uint8 b0 = src_argb[0] >> 3; + uint8 g0 = src_argb[1] >> 3; + uint8 r0 = src_argb[2] >> 3; + uint8 a0 = src_argb[3] >> 7; + *reinterpret_cast(dst_rgb) = + b0 | (g0 << 5) | (r0 << 10) | (a0 << 15); } } void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int width) { - for (int x = 0; x < width; ++x) { - uint8 b = src_argb[0] >> 4; - uint8 g = src_argb[1] >> 4; - uint8 r = src_argb[2] >> 4; - uint8 a = src_argb[3] >> 4; - *reinterpret_cast(dst_rgb) = (a << 12) | (r << 8) | (g << 4) | b; - dst_rgb += 2; - src_argb += 4; + for (int x = 0; x < width - 1; x += 2) { + uint8 b0 = src_argb[0] >> 4; + uint8 g0 = src_argb[1] >> 4; + uint8 r0 = src_argb[2] >> 4; + uint8 a0 = src_argb[3] >> 4; + uint8 b1 = src_argb[4] >> 4; + uint8 g1 = src_argb[5] >> 4; + uint8 r1 = src_argb[6] >> 4; + uint8 a1 = src_argb[7] >> 4; + *reinterpret_cast(dst_rgb) = + b0 | (g0 << 4) | (r0 << 8) | (a0 << 12) | + (b1 << 16) | (g1 << 20) | (r1 << 24) | (a1 << 28); + dst_rgb += 4; + src_argb += 8; + } + if (width & 1) { + uint8 b0 = src_argb[0] >> 4; + uint8 g0 = src_argb[1] >> 4; + uint8 r0 = src_argb[2] >> 4; + uint8 a0 = src_argb[3] >> 4; + *reinterpret_cast(dst_rgb) = + b0 | (g0 << 4) | (r0 << 8) | (a0 << 12); } } diff --git a/source/row_posix.cc b/source/row_posix.cc index 1e61e1fea..a51207de8 100644 --- a/source/row_posix.cc +++ b/source/row_posix.cc @@ -272,7 +272,7 @@ void RGB565ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) { "mov $0x1080108,%%eax \n" "movd %%eax,%%xmm5 \n" "pshufd $0x0,%%xmm5,%%xmm5 \n" - "mov $0x20082008,%%eax \n" + "mov $0x20802080,%%eax \n" "movd %%eax,%%xmm6 \n" "pshufd $0x0,%%xmm6,%%xmm6 \n" "pcmpeqb %%xmm3,%%xmm3 \n" diff --git a/source/row_win.cc b/source/row_win.cc index 89eacf9d0..bb36d3802 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -261,7 +261,7 @@ __asm { mov eax, 0x01080108 // generate multiplier to repeat 5 bits movd xmm5, eax pshufd xmm5, xmm5, 0 - mov eax, 0x20082008 // multiplier shift by 5 and then repeat 6 bits + mov eax, 0x20802080 // multiplier shift by 5 and then repeat 6 bits movd xmm6, eax pshufd xmm6, xmm6, 0 pcmpeqb xmm3, xmm3 // generate mask 0xf800f800 for Red diff --git a/unit_test/planar_test.cc b/unit_test/planar_test.cc index 799b1652e..1ad56adf1 100644 --- a/unit_test/planar_test.cc +++ b/unit_test/planar_test.cc @@ -165,14 +165,22 @@ TEST_F(libyuvTest, ##FMT_A##To##FMT_B##_CvsOPT) { \ free_aligned_buffer_16(dst_rgb_opt) \ } -// TODO(fbarchard): Expose more ARGBToRGB functions and test. +TESTATOB(ARGB, 4, ARGB, 4) TESTATOB(ARGB, 4, BGRA, 4) TESTATOB(ARGB, 4, ABGR, 4) TESTATOB(ARGB, 4, RAW, 3) TESTATOB(ARGB, 4, RGB24, 3) TESTATOB(ARGB, 4, RGB565, 2) -//TESTATOB(ARGB, 4, ARGB1555, 2) -//TESTATOB(ARGB, 4, ARGB4444, 2) +TESTATOB(ARGB, 4, ARGB1555, 2) +TESTATOB(ARGB, 4, ARGB4444, 2) + +TESTATOB(BGRA, 4, ARGB, 4) +TESTATOB(ABGR, 4, ARGB, 4) +TESTATOB(RAW, 3, ARGB, 4) +TESTATOB(RGB24, 3, ARGB, 4) +TESTATOB(RGB565, 2, ARGB, 4) +TESTATOB(ARGB1555, 2, ARGB, 4) +TESTATOB(ARGB4444, 2, ARGB, 4) TESTATOB(YUY2, 2, ARGB, 4) TESTATOB(UYVY, 2, ARGB, 4)