diff --git a/README.chromium b/README.chromium index a8ba7d129..25e0a1f38 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1737 +Version: 1738 License: BSD License File: LICENSE diff --git a/include/libyuv/convert_argb.h b/include/libyuv/convert_argb.h index e8ed1f59f..33c783b5d 100644 --- a/include/libyuv/convert_argb.h +++ b/include/libyuv/convert_argb.h @@ -576,6 +576,15 @@ int RAWToARGB(const uint8_t* src_raw, int width, int height); +// RGB big endian (rgb in memory) to RGBA. +LIBYUV_API +int RAWToRGBA(const uint8_t* src_raw, + int src_stride_raw, + uint8_t* dst_rgba, + int dst_stride_rgba, + int width, + int height); + // RGB16 (RGBP fourcc) little endian to ARGB. LIBYUV_API int RGB565ToARGB(const uint8_t* src_rgb565, diff --git a/include/libyuv/row.h b/include/libyuv/row.h index fe2133946..ba55c0258 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -274,6 +274,7 @@ extern "C" { #define HAS_I210TOARGBROW_SSSE3 #define HAS_I422TOAR30ROW_SSSE3 #define HAS_MERGERGBROW_SSSE3 +#define HAS_RAWTORGBAROW_SSSE3 #define HAS_RGBATOYJROW_SSSE3 #define HAS_SPLITRGBROW_SSSE3 #define HAS_SWAPUVROW_SSSE3 @@ -369,6 +370,7 @@ extern "C" { #define HAS_NV21TORGB24ROW_NEON #define HAS_NV21TOYUV24ROW_NEON #define HAS_RAWTOARGBROW_NEON +#define HAS_RAWTORGBAROW_NEON #define HAS_RAWTORGB24ROW_NEON #define HAS_RAWTOUVROW_NEON #define HAS_RAWTOYROW_NEON @@ -1941,6 +1943,7 @@ void RGB24ToARGBRow_SSSE3(const uint8_t* src_rgb24, uint8_t* dst_argb, int width); void RAWToARGBRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_argb, int width); +void RAWToRGBARow_SSSE3(const uint8_t* src_raw, uint8_t* dst_rgba, int width); void RAWToRGB24Row_SSSE3(const uint8_t* src_raw, uint8_t* dst_rgb24, int width); void RGB565ToARGBRow_SSE2(const uint8_t* src, uint8_t* dst, int width); void ARGB1555ToARGBRow_SSE2(const uint8_t* src, uint8_t* dst, int width); @@ -1961,6 +1964,7 @@ void RGB24ToARGBRow_NEON(const uint8_t* src_rgb24, void RGB24ToARGBRow_MSA(const uint8_t* src_rgb24, uint8_t* dst_argb, int width); void RGB24ToARGBRow_MMI(const uint8_t* src_rgb24, uint8_t* dst_argb, int width); void RAWToARGBRow_NEON(const uint8_t* src_raw, uint8_t* dst_argb, int width); +void RAWToRGBARow_NEON(const uint8_t* src_raw, uint8_t* dst_rgba, int width); void RAWToARGBRow_MSA(const uint8_t* src_raw, uint8_t* dst_argb, int width); void RAWToARGBRow_MMI(const uint8_t* src_raw, uint8_t* dst_argb, int width); void RAWToRGB24Row_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width); @@ -1995,6 +1999,7 @@ void ARGB4444ToARGBRow_MMI(const uint8_t* src_argb4444, int width); void RGB24ToARGBRow_C(const uint8_t* src_rgb24, uint8_t* dst_argb, int width); void RAWToARGBRow_C(const uint8_t* src_raw, uint8_t* dst_argb, int width); +void RAWToRGBARow_C(const uint8_t* src_raw, uint8_t* dst_rgba, int width); void RAWToRGB24Row_C(const uint8_t* src_raw, uint8_t* dst_rgb24, int width); void RGB565ToARGBRow_C(const uint8_t* src_rgb565, uint8_t* dst_argb, int width); void ARGB1555ToARGBRow_C(const uint8_t* src_argb1555, @@ -2014,6 +2019,9 @@ void RGB24ToARGBRow_Any_SSSE3(const uint8_t* src_ptr, void RAWToARGBRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RAWToRGBARow_Any_SSSE3(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); void RAWToRGB24Row_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); @@ -2047,6 +2055,7 @@ void RGB24ToARGBRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RAWToARGBRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RAWToRGBARow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RAWToARGBRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RAWToARGBRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RAWToRGB24Row_Any_NEON(const uint8_t* src_ptr, diff --git a/include/libyuv/version.h b/include/libyuv/version.h index b41f1b5f0..ad9a784c4 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1737 +#define LIBYUV_VERSION 1738 #endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/source/convert_argb.cc b/source/convert_argb.cc index 38011d115..f46b4309d 100644 --- a/source/convert_argb.cc +++ b/source/convert_argb.cc @@ -1349,6 +1349,57 @@ int RAWToARGB(const uint8_t* src_raw, return 0; } +// Convert RAW to RGBA. +LIBYUV_API +int RAWToRGBA(const uint8_t* src_raw, + int src_stride_raw, + uint8_t* dst_rgba, + int dst_stride_rgba, + int width, + int height) { + int y; + void (*RAWToRGBARow)(const uint8_t* src_rgb, uint8_t* dst_rgba, int width) = + RAWToRGBARow_C; + if (!src_raw || !dst_rgba || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_raw = src_raw + (height - 1) * src_stride_raw; + src_stride_raw = -src_stride_raw; + } + // Coalesce rows. + if (src_stride_raw == width * 3 && dst_stride_rgba == width * 4) { + width *= height; + height = 1; + src_stride_raw = dst_stride_rgba = 0; + } +#if defined(HAS_RAWTORGBAROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + RAWToRGBARow = RAWToRGBARow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + RAWToRGBARow = RAWToRGBARow_SSSE3; + } + } +#endif +#if defined(HAS_RAWTORGBAROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + RAWToRGBARow = RAWToRGBARow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + RAWToRGBARow = RAWToRGBARow_NEON; + } + } +#endif + + for (y = 0; y < height; ++y) { + RAWToRGBARow(src_raw, dst_rgba, width); + src_raw += src_stride_raw; + dst_rgba += dst_stride_rgba; + } + return 0; +} + // Convert RGB565 to ARGB. LIBYUV_API int RGB565ToARGB(const uint8_t* src_rgb565, diff --git a/source/row_any.cc b/source/row_any.cc index a45c90b5d..9b29b2bfb 100644 --- a/source/row_any.cc +++ b/source/row_any.cc @@ -559,6 +559,9 @@ ANY11(RGB565ToARGBRow_Any_SSE2, RGB565ToARGBRow_SSE2, 0, 2, 4, 7) ANY11(ARGB1555ToARGBRow_Any_SSE2, ARGB1555ToARGBRow_SSE2, 0, 2, 4, 7) ANY11(ARGB4444ToARGBRow_Any_SSE2, ARGB4444ToARGBRow_SSE2, 0, 2, 4, 7) #endif +#if defined(HAS_RAWTORGBAROW_SSSE3) +ANY11(RAWToRGBARow_Any_SSSE3, RAWToRGBARow_SSSE3, 0, 3, 4, 15) +#endif #if defined(HAS_RAWTORGB24ROW_SSSE3) ANY11(RAWToRGB24Row_Any_SSSE3, RAWToRGB24Row_SSSE3, 0, 3, 3, 7) #endif @@ -773,6 +776,9 @@ ANY11(RGB24ToARGBRow_Any_MMI, RGB24ToARGBRow_MMI, 0, 3, 4, 3) #ifdef HAS_RAWTOARGBROW_NEON ANY11(RAWToARGBRow_Any_NEON, RAWToARGBRow_NEON, 0, 3, 4, 7) #endif +#ifdef HAS_RAWTORGBAROW_NEON +ANY11(RAWToRGBARow_Any_NEON, RAWToRGBARow_NEON, 0, 3, 4, 7) +#endif #ifdef HAS_RAWTOARGBROW_MSA ANY11(RAWToARGBRow_Any_MSA, RAWToARGBRow_MSA, 0, 3, 4, 15) #endif diff --git a/source/row_common.cc b/source/row_common.cc index 306e9d433..a35c7cc3f 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -123,6 +123,21 @@ void RAWToARGBRow_C(const uint8_t* src_raw, uint8_t* dst_argb, int width) { } } +void RAWToRGBARow_C(const uint8_t* src_raw, uint8_t* dst_rgba, int width) { + int x; + for (x = 0; x < width; ++x) { + uint8_t r = src_raw[0]; + uint8_t g = src_raw[1]; + uint8_t b = src_raw[2]; + dst_rgba[0] = 255u; + dst_rgba[1] = b; + dst_rgba[2] = g; + dst_rgba[3] = r; + dst_rgba += 4; + src_raw += 3; + } +} + void RAWToRGB24Row_C(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) { int x; for (x = 0; x < width; ++x) { diff --git a/source/row_gcc.cc b/source/row_gcc.cc index 274cc9d82..fa7b8cb31 100644 --- a/source/row_gcc.cc +++ b/source/row_gcc.cc @@ -98,6 +98,10 @@ static const uvec8 kShuffleMaskRGB24ToARGB = { static const uvec8 kShuffleMaskRAWToARGB = {2u, 1u, 0u, 12u, 5u, 4u, 3u, 13u, 8u, 7u, 6u, 14u, 11u, 10u, 9u, 15u}; +// Shuffle table for converting RAW to RGBA. +static const uvec8 kShuffleMaskRAWToRGBA = {12u, 2u, 1u, 0u, 13u, 5u, 4u, 3u, + 14u, 8u, 7u, 6u, 15u, 11u, 10u, 9u}; + // Shuffle table for converting RAW to RGB24. First 8. static const uvec8 kShuffleMaskRAWToRGB24_0 = { 2u, 1u, 0u, 5u, 4u, 3u, 8u, 7u, @@ -260,6 +264,45 @@ void RAWToARGBRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_argb, int width) { : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } +// Same code as RAWToARGB with different shuffler and A in low bits +void RAWToRGBARow_SSSE3(const uint8_t* src_raw, uint8_t* dst_rgba, int width) { + asm volatile( + "pcmpeqb %%xmm5,%%xmm5 \n" // 0x000000ff + "psrld $0x18,%%xmm5 \n" + "movdqa %3,%%xmm4 \n" + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x20(%0),%%xmm3 \n" + "lea 0x30(%0),%0 \n" + "movdqa %%xmm3,%%xmm2 \n" + "palignr $0x8,%%xmm1,%%xmm2 \n" + "pshufb %%xmm4,%%xmm2 \n" + "por %%xmm5,%%xmm2 \n" + "palignr $0xc,%%xmm0,%%xmm1 \n" + "pshufb %%xmm4,%%xmm0 \n" + "movdqu %%xmm2,0x20(%1) \n" + "por %%xmm5,%%xmm0 \n" + "pshufb %%xmm4,%%xmm1 \n" + "movdqu %%xmm0,(%1) \n" + "por %%xmm5,%%xmm1 \n" + "palignr $0x4,%%xmm3,%%xmm3 \n" + "pshufb %%xmm4,%%xmm3 \n" + "movdqu %%xmm1,0x10(%1) \n" + "por %%xmm5,%%xmm3 \n" + "movdqu %%xmm3,0x30(%1) \n" + "lea 0x40(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + : "+r"(src_raw), // %0 + "+r"(dst_rgba), // %1 + "+r"(width) // %2 + : "m"(kShuffleMaskRAWToRGBA) // %3 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); +} + void RAWToRGB24Row_SSSE3(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) { diff --git a/source/row_neon.cc b/source/row_neon.cc index aa5bbc42a..1cf8eefea 100644 --- a/source/row_neon.cc +++ b/source/row_neon.cc @@ -781,6 +781,22 @@ void RAWToARGBRow_NEON(const uint8_t* src_raw, uint8_t* dst_argb, int width) { ); } +void RAWToRGBARow_NEON(const uint8_t* src_raw, uint8_t* dst_rgba, int width) { + asm volatile( + "vmov.u8 d0, #255 \n" // Alpha + "1: \n" + "vld3.8 {d1, d2, d3}, [%0]! \n" // load 8 pixels of RAW. + "subs %2, %2, #8 \n" // 8 processed per loop. + "vswp.u8 d1, d3 \n" // swap R, B + "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 pixels of RGBA. + "bgt 1b \n" + : "+r"(src_raw), // %0 + "+r"(dst_rgba), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "d0", "d1", "d2", "d3" // Clobber List + ); +} void RAWToRGB24Row_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) { asm volatile( "1: \n" diff --git a/source/row_neon64.cc b/source/row_neon64.cc index 45d93b298..866e7bfc6 100644 --- a/source/row_neon64.cc +++ b/source/row_neon64.cc @@ -821,6 +821,24 @@ void RAWToARGBRow_NEON(const uint8_t* src_raw, uint8_t* dst_argb, int width) { ); } +void RAWToRGBARow_NEON(const uint8_t* src_raw, uint8_t* dst_rgba, int width) { + asm volatile( + "movi v0.8b, #255 \n" // Alpha + "1: \n" + "ld3 {v3.8b,v4.8b,v5.8b}, [%0], #24 \n" // read r g b + "subs %w2, %w2, #8 \n" // 8 processed per loop. + "orr v2.8b, v4.8b, v4.8b \n" // move g + "orr v1.8b, v5.8b, v5.8b \n" // move r + "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n" // store a b g r + "b.gt 1b \n" + : "+r"(src_raw), // %0 + "+r"(dst_rgba), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5" // Clobber List + ); +} + void RAWToRGB24Row_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) { asm volatile( "1: \n" diff --git a/unit_test/convert_test.cc b/unit_test/convert_test.cc index 987dbce8a..2c8eae509 100644 --- a/unit_test/convert_test.cc +++ b/unit_test/convert_test.cc @@ -1232,6 +1232,7 @@ TESTATOB(I400, 1, 1, 1, I400Mirror, 1, 1, 1, 0) TESTATOB(J400, 1, 1, 1, ARGB, 4, 4, 1, 0) TESTATOB(J400, 1, 1, 1, J400, 1, 1, 1, 0) TESTATOB(RAW, 3, 3, 1, ARGB, 4, 4, 1, 0) +TESTATOB(RAW, 3, 3, 1, RGBA, 4, 4, 1, 0) TESTATOB(RAW, 3, 3, 1, RGB24, 3, 3, 1, 0) TESTATOB(RGB24, 3, 3, 1, ARGB, 4, 4, 1, 0) TESTATOB(RGB24, 3, 3, 1, J400, 1, 1, 1, 0)