diff --git a/README.chromium b/README.chromium index 67e4b9391..5ae35d2ea 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 472 +Version: 473 License: BSD License File: LICENSE diff --git a/include/libyuv/row.h b/include/libyuv/row.h index b7bb7dafe..b84a1aece 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -202,6 +202,8 @@ extern "C" { #define HAS_ARGB1555TOARGBROW_NEON #define HAS_ARGB4444TOARGBROW_NEON #define HAS_RGB565TOYROW_NEON +#define HAS_ARGB1555TOYROW_NEON +#define HAS_ARGB4444TOYROW_NEON #endif // The following are available on Mips platforms @@ -346,6 +348,8 @@ void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int pix); void RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int pix); void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int pix); void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int pix); +void ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int pix); +void ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int pix); void ARGBToYRow_C(const uint8* src_argb, uint8* dst_y, int pix); void BGRAToYRow_C(const uint8* src_bgra, uint8* dst_y, int pix); void ABGRToYRow_C(const uint8* src_abgr, uint8* dst_y, int pix); @@ -353,6 +357,8 @@ void RGBAToYRow_C(const uint8* src_rgba, uint8* dst_y, int pix); void RGB24ToYRow_C(const uint8* src_rgb24, uint8* dst_y, int pix); void RAWToYRow_C(const uint8* src_raw, uint8* dst_y, int pix); void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int pix); +void ARGB1555ToYRow_C(const uint8* src_argb1555, uint8* dst_y, int pix); +void ARGB4444ToYRow_C(const uint8* src_argb4444, uint8* dst_y, int pix); void ARGBToYRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int pix); void BGRAToYRow_Any_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix); void ABGRToYRow_Any_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix); @@ -366,6 +372,8 @@ void RGBAToYRow_Any_NEON(const uint8* src_rgba, uint8* dst_y, int pix); void RGB24ToYRow_Any_NEON(const uint8* src_rgb24, uint8* dst_y, int pix); void RAWToYRow_Any_NEON(const uint8* src_raw, uint8* dst_y, int pix); void RGB565ToYRow_Any_NEON(const uint8* src_rgb565, uint8* dst_y, int pix); +void ARGB1555ToYRow_Any_NEON(const uint8* src_argb1555, uint8* dst_y, int pix); +void ARGB4444ToYRow_Any_NEON(const uint8* src_argb4444, uint8* dst_y, int pix); void ARGBToUVRow_SSSE3(const uint8* src_argb, int src_stride_argb, uint8* dst_u, uint8* dst_v, int width); diff --git a/include/libyuv/version.h b/include/libyuv/version.h index d11d47ed4..07a3a7562 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 472 +#define LIBYUV_VERSION 473 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/convert.cc b/source/convert.cc index 5efefc3a3..e2d4870f3 100644 --- a/source/convert.cc +++ b/source/convert.cc @@ -1453,12 +1453,13 @@ int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565, return 0; } +// Convert ARGB1555 to I420. LIBYUV_API int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height) { if (!src_argb1555 || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0 || width * 4 > kMaxStride) { @@ -1471,43 +1472,71 @@ int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555, src_stride_argb1555 = -src_stride_argb1555; } SIMD_ALIGNED(uint8 row[kMaxStride * 2]); - void (*ARGB1555ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix); - - ARGB1555ToARGBRow = ARGB1555ToARGBRow_C; + void (*ARGB1555ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) = + ARGB1555ToARGBRow_C; #if defined(HAS_ARGB1555TOARGBROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && - TestReadSafe(src_argb1555, src_stride_argb1555, width, height, 2, 16)) { - ARGB1555ToARGBRow = ARGB1555ToARGBRow_SSE2; + if (TestCpuFlag(kCpuHasSSE2) && width >= 8) { + ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_SSE2; + if (IS_ALIGNED(width, 8)) { + ARGB1555ToARGBRow = ARGB1555ToARGBRow_SSE2; + } + } +#elif defined(HAS_ARGB1555TOARGBROW_NEON) + if (TestCpuFlag(kCpuHasNEON) && width >= 8) { + ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + ARGB1555ToARGBRow = ARGB1555ToARGBRow_NEON; + } } #endif - void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix); void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); - - ARGBToYRow = ARGBToYRow_C; - ARGBToUVRow = ARGBToUVRow_C; -#if defined(HAS_ARGBTOYROW_SSSE3) + uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C; +#if defined(HAS_ARGBTOUVROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { ARGBToUVRow = ARGBToUVRow_Any_SSSE3; - ARGBToYRow = ARGBToYRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { ARGBToUVRow = ARGBToUVRow_SSSE3; + } + } +#endif + +#if defined(HAS_ARGB1555TOYROW_NEON) + void (*ARGB1555ToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = + ARGB1555ToYRow_C; + if (TestCpuFlag(kCpuHasNEON) && width >= 8) { + ARGB1555ToYRow = ARGB1555ToYRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + ARGB1555ToYRow = ARGB1555ToYRow_NEON; + } + } +#else + void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = + ARGBToYRow_C; +#if defined(HAS_ARGBTOUVROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { + ARGBToYRow = ARGBToYRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { ARGBToYRow = ARGBToYRow_Unaligned_SSSE3; if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { ARGBToYRow = ARGBToYRow_SSSE3; } } } -#endif +#endif // HAS_ARGBTOUVROW_SSSE3 +#endif // HAS_ARGB1555TOYROW_NEON for (int y = 0; y < height - 1; y += 2) { ARGB1555ToARGBRow(src_argb1555, row, width); - ARGB1555ToARGBRow(src_argb1555 + src_stride_argb1555, - row + kMaxStride, width); + ARGB1555ToARGBRow(src_argb1555 + src_stride_argb1555, row + kMaxStride, width); ARGBToUVRow(row, kMaxStride, dst_u, dst_v, width); +#if defined(HAS_ARGB1555TOYROW_NEON) + ARGB1555ToYRow(src_argb1555, dst_y, width); + ARGB1555ToYRow(src_argb1555 + src_stride_argb1555, dst_y + dst_stride_y, width); +#else ARGBToYRow(row, dst_y, width); ARGBToYRow(row + kMaxStride, dst_y + dst_stride_y, width); +#endif src_argb1555 += src_stride_argb1555 * 2; dst_y += dst_stride_y * 2; dst_u += dst_stride_u; @@ -1516,11 +1545,16 @@ int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555, if (height & 1) { ARGB1555ToARGBRow_C(src_argb1555, row, width); ARGBToUVRow(row, 0, dst_u, dst_v, width); +#if defined(HAS_ARGB1555TOYROW_NEON) + ARGB1555ToYRow(src_argb1555, dst_y, width); +#else ARGBToYRow(row, dst_y, width); +#endif } return 0; } +// Convert ARGB4444 to I420. LIBYUV_API int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444, uint8* dst_y, int dst_stride_y, @@ -1539,43 +1573,71 @@ int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444, src_stride_argb4444 = -src_stride_argb4444; } SIMD_ALIGNED(uint8 row[kMaxStride * 2]); - void (*ARGB4444ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix); - - ARGB4444ToARGBRow = ARGB4444ToARGBRow_C; + void (*ARGB4444ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) = + ARGB4444ToARGBRow_C; #if defined(HAS_ARGB4444TOARGBROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && - TestReadSafe(src_argb4444, src_stride_argb4444, width, height, 2, 16)) { - ARGB4444ToARGBRow = ARGB4444ToARGBRow_SSE2; + if (TestCpuFlag(kCpuHasSSE2) && width >= 8) { + ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_SSE2; + if (IS_ALIGNED(width, 8)) { + ARGB4444ToARGBRow = ARGB4444ToARGBRow_SSE2; + } + } +#elif defined(HAS_ARGB4444TOARGBROW_NEON) + if (TestCpuFlag(kCpuHasNEON) && width >= 8) { + ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + ARGB4444ToARGBRow = ARGB4444ToARGBRow_NEON; + } } #endif - void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix); void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); - - ARGBToYRow = ARGBToYRow_C; - ARGBToUVRow = ARGBToUVRow_C; -#if defined(HAS_ARGBTOYROW_SSSE3) + uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C; +#if defined(HAS_ARGBTOUVROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { ARGBToUVRow = ARGBToUVRow_Any_SSSE3; - ARGBToYRow = ARGBToYRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { ARGBToUVRow = ARGBToUVRow_SSSE3; + } + } +#endif + +#if defined(HAS_ARGB4444TOYROW_NEON) + void (*ARGB4444ToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = + ARGB4444ToYRow_C; + if (TestCpuFlag(kCpuHasNEON) && width >= 8) { + ARGB4444ToYRow = ARGB4444ToYRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + ARGB4444ToYRow = ARGB4444ToYRow_NEON; + } + } +#else + void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = + ARGBToYRow_C; +#if defined(HAS_ARGBTOUVROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { + ARGBToYRow = ARGBToYRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { ARGBToYRow = ARGBToYRow_Unaligned_SSSE3; if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { ARGBToYRow = ARGBToYRow_SSSE3; } } } -#endif +#endif // HAS_ARGBTOUVROW_SSSE3 +#endif // HAS_ARGB4444TOYROW_NEON for (int y = 0; y < height - 1; y += 2) { ARGB4444ToARGBRow(src_argb4444, row, width); - ARGB4444ToARGBRow(src_argb4444 + src_stride_argb4444, - row + kMaxStride, width); + ARGB4444ToARGBRow(src_argb4444 + src_stride_argb4444, row + kMaxStride, width); ARGBToUVRow(row, kMaxStride, dst_u, dst_v, width); +#if defined(HAS_ARGB4444TOYROW_NEON) + ARGB4444ToYRow(src_argb4444, dst_y, width); + ARGB4444ToYRow(src_argb4444 + src_stride_argb4444, dst_y + dst_stride_y, width); +#else ARGBToYRow(row, dst_y, width); ARGBToYRow(row + kMaxStride, dst_y + dst_stride_y, width); +#endif src_argb4444 += src_stride_argb4444 * 2; dst_y += dst_stride_y * 2; dst_u += dst_stride_u; @@ -1584,7 +1646,11 @@ int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444, if (height & 1) { ARGB4444ToARGBRow_C(src_argb4444, row, width); ARGBToUVRow(row, 0, dst_u, dst_v, width); +#if defined(HAS_ARGB4444TOYROW_NEON) + ARGB4444ToYRow(src_argb4444, dst_y, width); +#else ARGBToYRow(row, dst_y, width); +#endif } return 0; } diff --git a/source/row_any.cc b/source/row_any.cc index 45330e510..a048101cd 100644 --- a/source/row_any.cc +++ b/source/row_any.cc @@ -195,6 +195,8 @@ YANY(RGBAToYRow_Any_NEON, RGBAToYRow_NEON, 4, 1, 8) YANY(RGB24ToYRow_Any_NEON, RGB24ToYRow_NEON, 3, 1, 8) YANY(RAWToYRow_Any_NEON, RAWToYRow_NEON, 3, 1, 8) YANY(RGB565ToYRow_Any_NEON, RGB565ToYRow_NEON, 2, 1, 8) +YANY(ARGB1555ToYRow_Any_NEON, ARGB1555ToYRow_NEON, 2, 1, 8) +YANY(ARGB4444ToYRow_Any_NEON, ARGB4444ToYRow_NEON, 2, 1, 8) YANY(YUY2ToYRow_Any_NEON, YUY2ToYRow_NEON, 2, 1, 16) YANY(UYVYToYRow_Any_NEON, UYVYToYRow_NEON, 2, 1, 16) YANY(RGB24ToARGBRow_Any_NEON, RGB24ToARGBRow_NEON, 3, 4, 8) diff --git a/source/row_common.cc b/source/row_common.cc index aad97b001..b20aa522f 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -321,6 +321,34 @@ void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int width) { } } +void ARGB1555ToYRow_C(const uint8* src_argb1555, uint8* dst_y, int width) { + for (int x = 0; x < width; ++x) { + uint8 b = src_argb1555[0] & 0x1f; + uint8 g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3); + uint8 r = (src_argb1555[1] & 0x7c) >> 2; + b = (b << 3) | (b >> 2); + g = (g << 3) | (g >> 2); + r = (r << 3) | (r >> 2); + dst_y[0] = RGBToY(r, g, b); + src_argb1555 += 2; + dst_y += 1; + } +} + +void ARGB4444ToYRow_C(const uint8* src_argb4444, uint8* dst_y, int width) { + for (int x = 0; x < width; ++x) { + uint8 b = src_argb4444[0] & 0x0f; + uint8 g = src_argb4444[0] >> 4; + uint8 r = src_argb4444[1] & 0x0f; + b = (b << 4) | b; + g = (g << 4) | g; + r = (r << 4) | r; + dst_y[0] = RGBToY(r, g, b); + src_argb4444 += 2; + dst_y += 1; + } +} + void ARGBToUV444Row_C(const uint8* src_argb, uint8* dst_u, uint8* dst_v, int width) { for (int x = 0; x < width; ++x) { diff --git a/source/row_neon.cc b/source/row_neon.cc index 8851d2991..39f122766 100644 --- a/source/row_neon.cc +++ b/source/row_neon.cc @@ -1554,7 +1554,7 @@ void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) { "vmov.u8 d27, #16 \n" // Add 16 constant ".p2align 2 \n" "1: \n" - "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of ARGB. + "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels. "subs %2, %2, #8 \n" // 8 processed per loop. "vmull.u8 q2, d0, d24 \n" // B "vmlal.u8 q2, d1, d25 \n" // G @@ -1582,7 +1582,7 @@ void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int pix) { "vmov.u8 d27, #16 \n" // Add 16 constant ".p2align 2 \n" "1: \n" - "vld1.8 {q0}, [%0]! \n" // load 8 pixels of RGB565. + "vld1.8 {q0}, [%0]! \n" // load 8 RGB565 pixels. "subs %2, %2, #8 \n" // 8 processed per loop. RGB565TOARGB "vmull.u8 q2, d0, d24 \n" // B @@ -1601,6 +1601,61 @@ void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int pix) { } #endif // HAS_RGB565TOYROW_NEON +#ifdef HAS_ARGB1555TOYROW_NEON +void ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int pix) { + asm volatile ( + "vmov.u8 d24, #13 \n" // B * 0.1016 coefficient + "vmov.u8 d25, #65 \n" // G * 0.5078 coefficient + "vmov.u8 d26, #33 \n" // R * 0.2578 coefficient + "vmov.u8 d27, #16 \n" // Add 16 constant + ".p2align 2 \n" + "1: \n" + "vld1.8 {q0}, [%0]! \n" // load 8 ARGB1555 pixels. + "subs %2, %2, #8 \n" // 8 processed per loop. + ARGB1555TOARGB + "vmull.u8 q2, d0, d24 \n" // B + "vmlal.u8 q2, d1, d25 \n" // G + "vmlal.u8 q2, d2, d26 \n" // R + "vqrshrun.s16 d0, q2, #7 \n" // 16 bit to 8 bit Y + "vqadd.u8 d0, d27 \n" + "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. + "bgt 1b \n" + : "+r"(src_argb1555), // %0 + "+r"(dst_y), // %1 + "+r"(pix) // %2 + : + : "memory", "cc", "q0", "q1", "q2", "q3", "q12", "q13" + ); +} +#endif // HAS_ARGB1555TOYROW_NEON + +#ifdef HAS_ARGB4444TOYROW_NEON +void ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int pix) { + asm volatile ( + "vmov.u8 d24, #13 \n" // B * 0.1016 coefficient + "vmov.u8 d25, #65 \n" // G * 0.5078 coefficient + "vmov.u8 d26, #33 \n" // R * 0.2578 coefficient + "vmov.u8 d27, #16 \n" // Add 16 constant + ".p2align 2 \n" + "1: \n" + "vld1.8 {q0}, [%0]! \n" // load 8 ARGB4444 pixels. + "subs %2, %2, #8 \n" // 8 processed per loop. + ARGB4444TOARGB + "vmull.u8 q2, d0, d24 \n" // B + "vmlal.u8 q2, d1, d25 \n" // G + "vmlal.u8 q2, d2, d26 \n" // R + "vqrshrun.s16 d0, q2, #7 \n" // 16 bit to 8 bit Y + "vqadd.u8 d0, d27 \n" + "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. + "bgt 1b \n" + : "+r"(src_argb4444), // %0 + "+r"(dst_y), // %1 + "+r"(pix) // %2 + : + : "memory", "cc", "q0", "q1", "q2", "q3", "q12", "q13" + ); +} +#endif // HAS_ARGB4444TOYROW_NEON #ifdef HAS_BGRATOYROW_NEON void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int pix) {