From 1bdcc4c3e3d9207252b57b0b7f99b4113caa7d34 Mon Sep 17 00:00:00 2001 From: "fbarchard@google.com" Date: Mon, 15 Oct 2012 17:46:59 +0000 Subject: [PATCH] rgb565 and argb1555 neon BUG=none TEST=none Review URL: https://webrtc-codereview.appspot.com/881004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@420 16f28f9a-4ce2-e073-06de-1de4eb20be90 --- README.chromium | 2 +- include/libyuv/row.h | 8 ++++- include/libyuv/version.h | 2 +- source/convert_from_argb.cc | 14 +++++++++ source/row_common.cc | 4 +++ source/row_neon.cc | 61 +++++++++++++++++++++++++++++++++++++ 6 files changed, 88 insertions(+), 3 deletions(-) diff --git a/README.chromium b/README.chromium index 269e40409..f09c1c099 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 418 +Version: 420 License: BSD License File: LICENSE diff --git a/include/libyuv/row.h b/include/libyuv/row.h index ae3a15726..aa7c2193a 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -150,6 +150,8 @@ extern "C" { #define HAS_YUY2TOYROW_NEON #define HAS_I422TOYUY2ROW_NEON #define HAS_I422TOUYVYROW_NEON +#define HAS_ARGBTORGB565ROW_NEON +#define HAS_ARGBTOARGB1555ROW_NEON #define HAS_ARGBTOARGB4444ROW_NEON #endif @@ -308,8 +310,8 @@ void ABGRToARGBRow_SSSE3(const uint8* src_abgr, uint8* dst_argb, int pix); void RGBAToARGBRow_SSSE3(const uint8* src_rgba, uint8* dst_argb, int pix); void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix); void RAWToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix); -void ARGB1555ToARGBRow_SSE2(const uint8* src_argb, uint8* dst_argb, int pix); void RGB565ToARGBRow_SSE2(const uint8* src_argb, uint8* dst_argb, int pix); +void ARGB1555ToARGBRow_SSE2(const uint8* src_argb, uint8* dst_argb, int pix); void ARGB4444ToARGBRow_SSE2(const uint8* src_argb, uint8* dst_argb, int pix); void BGRAToARGBRow_NEON(const uint8* src_bgra, uint8* dst_argb, int pix); @@ -337,6 +339,8 @@ void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToRGBARow_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); +void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); +void ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToRGBARow_C(const uint8* src_argb, uint8* dst_rgb, int pix); @@ -593,6 +597,8 @@ void ARGBToARGB4444Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToRGB24Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToRAWRow_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); +void ARGBToRGB565Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); +void ARGBToARGB1555Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToARGB4444Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToYRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int pix); diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 0501c911a..2df08ed56 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 418 +#define LIBYUV_VERSION 420 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/convert_from_argb.cc b/source/convert_from_argb.cc index 7c2482964..22c72c9d6 100644 --- a/source/convert_from_argb.cc +++ b/source/convert_from_argb.cc @@ -245,6 +245,13 @@ int ARGBToRGB565(const uint8* src_argb, int src_stride_argb, ARGBToRGB565Row = ARGBToRGB565Row_SSE2; } } +#elif defined(HAS_ARGBTORGB565ROW_NEON) + if (TestCpuFlag(kCpuHasNEON) && width >= 8) { + ARGBToRGB565Row = ARGBToRGB565Row_Any_NEON; + if (IS_ALIGNED(width, 8)) { + ARGBToRGB565Row = ARGBToRGB565Row_NEON; + } + } #endif for (int y = 0; y < height; ++y) { @@ -278,6 +285,13 @@ int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb, ARGBToARGB1555Row = ARGBToARGB1555Row_SSE2; } } +#elif defined(HAS_ARGBTOARGB1555ROW_NEON) + if (TestCpuFlag(kCpuHasNEON) && width >= 8) { + ARGBToARGB1555Row = ARGBToARGB1555Row_Any_NEON; + if (IS_ALIGNED(width, 8)) { + ARGBToARGB1555Row = ARGBToARGB1555Row_NEON; + } + } #endif for (int y = 0; y < height; ++y) { diff --git a/source/row_common.cc b/source/row_common.cc index 5238f0ece..9bd5dfe73 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -1073,6 +1073,10 @@ RGBANY(ARGBToARGB4444Row_Any_SSE2, ARGBToARGB4444Row_SSE2, ARGBToARGB4444Row_C, #if defined(HAS_ARGBTORGB24ROW_NEON) RGBANY(ARGBToRGB24Row_Any_NEON, ARGBToRGB24Row_NEON, ARGBToRGB24Row_C, 7, 4, 3) RGBANY(ARGBToRAWRow_Any_NEON, ARGBToRAWRow_NEON, ARGBToRAWRow_C, 7, 4, 3) +RGBANY(ARGBToRGB565Row_Any_NEON, ARGBToRGB565Row_NEON, ARGBToRGB565Row_C, + 7, 4, 2) +RGBANY(ARGBToARGB1555Row_Any_NEON, ARGBToARGB1555Row_NEON, ARGBToARGB1555Row_C, + 7, 4, 2) RGBANY(ARGBToARGB4444Row_Any_NEON, ARGBToARGB4444Row_NEON, ARGBToARGB4444Row_C, 7, 4, 2) #endif diff --git a/source/row_neon.cc b/source/row_neon.cc index 2e888cdd5..5b37890a6 100644 --- a/source/row_neon.cc +++ b/source/row_neon.cc @@ -908,6 +908,67 @@ void I422ToUYVYRow_NEON(const uint8* src_y, ); } +#ifdef HAS_ARGBTORGB565ROW_NEON +void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb565, int pix) { + asm volatile ( + ".p2align 2 \n" + "1: \n" + "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of ARGB. + "subs %2, %2, #8 \n" // 8 processed per loop. + "vshr.u8 d0, d0, #3 \n" // B + "vshr.u8 d1, d1, #2 \n" // G + "vshr.u8 d2, d2, #3 \n" // R + "vmovl.u8 q8, d0 \n" // B + "vmovl.u8 q9, d1 \n" // G + "vmovl.u8 q10, d2 \n" // R + "vshl.u16 q9, q9, #5 \n" // G + "vshl.u16 q10, q10, #11 \n" // R + "vorr q0, q8, q9 \n" // BG + "vorr q0, q0, q10 \n" // BGR + "vst1.8 {q0}, [%1]! \n" // store 8 pixels RGB565. + "bgt 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_rgb565), // %1 + "+r"(pix) // %2 + : + : "memory", "cc", "d0", "d1", "d2", "d3", "d4", "q8", "q9", "q10" + ); +} +#endif // HAS_ARGBTORGB565ROW_NEON + +#ifdef HAS_ARGBTOARGB1555ROW_NEON +void ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_argb1555, + int pix) { + asm volatile ( + ".p2align 2 \n" + "1: \n" + "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of ARGB. + "subs %2, %2, #8 \n" // 8 processed per loop. + "vshr.u8 d0, d0, #3 \n" // B + "vshr.u8 d1, d1, #3 \n" // G + "vshr.u8 d2, d2, #3 \n" // R + "vshr.u8 d3, d3, #7 \n" // A + "vmovl.u8 q8, d0 \n" // B + "vmovl.u8 q9, d1 \n" // G + "vmovl.u8 q10, d2 \n" // R + "vmovl.u8 q11, d3 \n" // A + "vshl.u16 q9, q9, #5 \n" // G + "vshl.u16 q10, q10, #10 \n" // R + "vshl.u16 q11, q11, #15 \n" // A + "vorr q0, q8, q9 \n" // BG + "vorr q1, q10, q11 \n" // RA + "vorr q0, q0, q1 \n" // BGRA + "vst1.8 {q0}, [%1]! \n" // store 8 pixels ARGB1555. + "bgt 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_argb1555), // %1 + "+r"(pix) // %2 + : + : "memory", "cc", "d0", "d1", "d2", "d3", "q8", "q9", "q10", "q11" + ); +} +#endif // HAS_ARGBTOARGB1555ROW_NEON + #ifdef HAS_ARGBTOARGB4444ROW_NEON void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_argb4444, int pix) {