From c389e8e3b14bcdbb4dce7a5667abba98a65cf1b6 Mon Sep 17 00:00:00 2001 From: "fbarchard@google.com" Date: Sat, 13 Oct 2012 02:51:48 +0000 Subject: [PATCH] Convert ARGB to ARGB4444 with Neon BUG=none TEST=none Review URL: https://webrtc-codereview.appspot.com/875004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@415 16f28f9a-4ce2-e073-06de-1de4eb20be90 --- README.chromium | 2 +- include/libyuv/row.h | 4 +++- include/libyuv/version.h | 2 +- source/convert_from_argb.cc | 7 +++++++ source/row_common.cc | 2 ++ source/row_neon.cc | 37 +++++++++++++++++++++++++++++++------ 6 files changed, 45 insertions(+), 9 deletions(-) diff --git a/README.chromium b/README.chromium index 20b08d3b4..b0015d0bc 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 414 +Version: 415 License: BSD License File: LICENSE diff --git a/include/libyuv/row.h b/include/libyuv/row.h index b25853c48..d4fb0ab32 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -150,7 +150,7 @@ extern "C" { #define HAS_YUY2TOYROW_NEON #define HAS_I422TOYUY2ROW_NEON #define HAS_I422TOUYVYROW_NEON - +#define HAS_ARGBTOARGB4444ROW_NEON #endif #if defined(_MSC_VER) && !defined(__CLR_VER) @@ -328,6 +328,7 @@ void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToRGBARow_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); +void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToRGBARow_C(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int pix); @@ -583,6 +584,7 @@ void ARGBToARGB4444Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToRGB24Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToRAWRow_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); +void ARGBToARGB4444Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToYRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int pix); void BGRAToYRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int pix); diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 11891ad21..21f6dd2e0 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 414 +#define LIBYUV_VERSION 415 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/convert_from_argb.cc b/source/convert_from_argb.cc index 9dd6b1e02..7c2482964 100644 --- a/source/convert_from_argb.cc +++ b/source/convert_from_argb.cc @@ -311,6 +311,13 @@ int ARGBToARGB4444(const uint8* src_argb, int src_stride_argb, ARGBToARGB4444Row = ARGBToARGB4444Row_SSE2; } } +#elif defined(HAS_ARGBTOARGB4444ROW_NEON) + if (TestCpuFlag(kCpuHasNEON) && width >= 8) { + ARGBToARGB4444Row = ARGBToARGB4444Row_Any_NEON; + if (IS_ALIGNED(width, 8)) { + ARGBToARGB4444Row = ARGBToARGB4444Row_NEON; + } + } #endif for (int y = 0; y < height; ++y) { diff --git a/source/row_common.cc b/source/row_common.cc index 2e7b2e4cb..5238f0ece 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -1073,6 +1073,8 @@ RGBANY(ARGBToARGB4444Row_Any_SSE2, ARGBToARGB4444Row_SSE2, ARGBToARGB4444Row_C, #if defined(HAS_ARGBTORGB24ROW_NEON) RGBANY(ARGBToRGB24Row_Any_NEON, ARGBToRGB24Row_NEON, ARGBToRGB24Row_C, 7, 4, 3) RGBANY(ARGBToRAWRow_Any_NEON, ARGBToRAWRow_NEON, ARGBToRAWRow_C, 7, 4, 3) +RGBANY(ARGBToARGB4444Row_Any_NEON, ARGBToARGB4444Row_NEON, ARGBToARGB4444Row_C, + 7, 4, 2) #endif #undef RGBANY diff --git a/source/row_neon.cc b/source/row_neon.cc index 5c5b8150c..cc25dcda6 100644 --- a/source/row_neon.cc +++ b/source/row_neon.cc @@ -49,15 +49,15 @@ extern "C" { "vsub.s16 q0, q0, q15 \n"/* offset y */\ "vmul.s16 q0, q0, q14 \n" \ "vadd.s16 d18, d19 \n" \ - "vqadd.s16 d20, d0, d16 \n" \ + "vqadd.s16 d20, d0, d16 \n" /* B */ \ "vqadd.s16 d21, d1, d16 \n" \ - "vqadd.s16 d22, d0, d17 \n" \ + "vqadd.s16 d22, d0, d17 \n" /* R */ \ "vqadd.s16 d23, d1, d17 \n" \ - "vqadd.s16 d16, d0, d18 \n" \ + "vqadd.s16 d16, d0, d18 \n" /* G */ \ "vqadd.s16 d17, d1, d18 \n" \ - "vqrshrun.s16 d0, q10, #6 \n" \ - "vqrshrun.s16 d1, q11, #6 \n" \ - "vqrshrun.s16 d2, q8, #6 \n" \ + "vqrshrun.s16 d0, q10, #6 \n" /* B */ \ + "vqrshrun.s16 d1, q11, #6 \n" /* G */ \ + "vqrshrun.s16 d2, q8, #6 \n" /* R */ \ "vmovl.u8 q10, d0 \n"/* set up for reinterleave*/\ "vmovl.u8 q11, d1 \n" \ "vmovl.u8 q8, d2 \n" \ @@ -908,6 +908,31 @@ void I422ToUYVYRow_NEON(const uint8* src_y, ); } +#ifdef HAS_ARGBTOARGB4444ROW_NEON +void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_argb4444, int pix) { + asm volatile ( + "vmov.u8 d4, #0x0f \n" // bits to clear with vbic. + ".p2align 2 \n" + "1: \n" + "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of ARGB. + "subs %2, %2, #8 \n" // 8 processed per loop. + "vshr.u8 d0, d0, #4 \n" // B + "vbic.32 d1, d1, d4 \n" // G + "vshr.u8 d2, d2, #4 \n" // R + "vbic.32 d3, d3, d4 \n" // A + "vorr d0, d0, d1 \n" // BG + "vorr d1, d2, d3 \n" // RA + "vzip.u8 d0, d1 \n" // BGRA weaved together. + "vst1.8 {q0}, [%1]! \n" // store 8 pixels ARGB4444. + "bgt 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_argb4444), // %1 + "+r"(pix) // %2 + : + : "memory", "cc", "d0", "d1", "d2", "d3", "d4" // Clobber List + ); +} +#endif // HAS_ARGBTOARGB4444ROW_NEON #endif // __ARM_NEON__ #ifdef __cplusplus