From e246e6c18f0606accdd02adce54ac85bba98f947 Mon Sep 17 00:00:00 2001 From: "yang.zhang@arm.com" Date: Tue, 17 Mar 2015 02:22:25 +0000 Subject: [PATCH] Add ARGBToRGB565DitherRow_NEON for ARM32/64 ARM32/64 NEON versions of ARGBToRGB565DitherRow_NEON are implemented. BUG=407 TESTED=libyuvTest.* on ARM32/64 with Android R=fbarchard@google.com Change-Id: Ia689170fb39db964392e5e1113801592ab0628bf Review URL: https://webrtc-codereview.appspot.com/49409004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@1335 16f28f9a-4ce2-e073-06de-1de4eb20be90 --- include/libyuv/row.h | 5 +++++ source/convert_from.cc | 8 ++++++++ source/convert_from_argb.cc | 8 ++++++++ source/row_any.cc | 4 ++++ source/row_neon.cc | 24 ++++++++++++++++++++++++ source/row_neon64.cc | 25 +++++++++++++++++++++++++ 6 files changed, 74 insertions(+) diff --git a/include/libyuv/row.h b/include/libyuv/row.h index f88f39f82..bbdbd7f38 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -342,6 +342,7 @@ extern "C" { #define HAS_YUY2TOUV422ROW_NEON #define HAS_YUY2TOUVROW_NEON #define HAS_YUY2TOYROW_NEON +#define HAS_ARGBTORGB565DITHERROW_NEON // Effects: #define HAS_ARGBADDROW_NEON @@ -927,6 +928,8 @@ void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); +void ARGBToRGB565DitherRow_NEON(const uint8* src_argb, uint8* dst_rgb, + const uint32 dither4, int width); void ARGBToRGBARow_C(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int pix); @@ -1422,6 +1425,8 @@ void ARGBToRAWRow_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToRGB565Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToARGB1555Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToARGB4444Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); +void ARGBToRGB565DitherRow_Any_NEON(const uint8* src_argb, uint8* dst_rgb, + const uint32 dither4, int width); void I444ToARGBRow_Any_NEON(const uint8* src_y, const uint8* src_u, diff --git a/source/convert_from.cc b/source/convert_from.cc index 554ef3622..d1ee22a42 100644 --- a/source/convert_from.cc +++ b/source/convert_from.cc @@ -1081,6 +1081,14 @@ int I420ToRGB565Dither(const uint8* src_y, int src_stride_y, ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_AVX2; } } +#endif +#if defined(HAS_ARGBTORGB565DITHERROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_NEON; + } + } #endif { // Allocate a row of argb. diff --git a/source/convert_from_argb.cc b/source/convert_from_argb.cc index c27e2118b..e9818d0a1 100644 --- a/source/convert_from_argb.cc +++ b/source/convert_from_argb.cc @@ -846,6 +846,14 @@ int ARGBToRGB565Dither(const uint8* src_argb, int src_stride_argb, ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_AVX2; } } +#endif +#if defined(HAS_ARGBTORGB565DITHERROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_NEON; + } + } #endif for (y = 0; y < height; ++y) { ARGBToRGB565DitherRow(src_argb, dst_rgb565, diff --git a/source/row_any.cc b/source/row_any.cc index 64d6faef2..66b7ad9f9 100644 --- a/source/row_any.cc +++ b/source/row_any.cc @@ -254,6 +254,10 @@ RGBDANY(ARGBToRGB565DitherRow_Any_SSE2, ARGBToRGB565DitherRow_SSE2, RGBDANY(ARGBToRGB565DitherRow_Any_AVX2, ARGBToRGB565DitherRow_AVX2, ARGBToRGB565DitherRow_C, 4, 2, 7) #endif +#if defined(HAS_ARGBTORGB565DITHERROW_NEON) +RGBDANY(ARGBToRGB565DitherRow_Any_NEON, ARGBToRGB565DitherRow_NEON, + ARGBToRGB565DitherRow_C, 4, 2, 7) +#endif #undef RGBDANY // ARGB to Bayer does multiple of 4 pixels, SSSE3 aligned src, unaligned dst. diff --git a/source/row_neon.cc b/source/row_neon.cc index 6a936c382..8ba983e79 100644 --- a/source/row_neon.cc +++ b/source/row_neon.cc @@ -1360,6 +1360,30 @@ void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb565, int pix) { ); } +void ARGBToRGB565DitherRow_NEON(const uint8* src_argb, uint8* dst_rgb, + const uint32 dither4, int width) { + asm volatile ( + ".p2align 2 \n" + "vdup.32 d2, %2 \n" // dither4 + "1: \n" + MEMACCESS(1) + "vld4.8 {d20, d21, d22, d23}, [%1]! \n" // load 8 pixels of ARGB. + "subs %3, %3, #8 \n" // 8 processed per loop. + "vqadd.u8 d20, d20, d2 \n" + "vqadd.u8 d21, d21, d2 \n" + "vqadd.u8 d22, d22, d2 \n" + ARGBTORGB565 + MEMACCESS(0) + "vst1.8 {q0}, [%0]! \n" // store 8 pixels RGB565. + "bgt 1b \n" + : "+r"(dst_rgb) // %0 + : "r"(src_argb), // %1 + "r"(dither4), // %2 + "r"(width) // %3 + : "cc", "memory", "q0", "q1", "q8", "q9", "q10", "q11" + ); +} + void ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_argb1555, int pix) { asm volatile ( diff --git a/source/row_neon64.cc b/source/row_neon64.cc index 4ca595286..379835fc7 100644 --- a/source/row_neon64.cc +++ b/source/row_neon64.cc @@ -1376,6 +1376,31 @@ void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb565, int pix) { } #endif // HAS_ARGBTORGB565ROW_NEON +#ifdef HAS_ARGBTORGB565DITHERROW_NEON +void ARGBToRGB565DitherRow_NEON(const uint8* src_argb, uint8* dst_rgb, + const uint32 dither4, int width) { + asm volatile ( + "dup v1.4s, %w2 \n" // dither4 + "1: \n" + MEMACCESS(1) + "ld4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n" // load 8 pixels + "subs %3, %3, #8 \n" // 8 processed per loop. + "uqadd v20.8b, v20.8b, v1.8b \n" + "uqadd v21.8b, v21.8b, v1.8b \n" + "uqadd v22.8b, v22.8b, v1.8b \n" + ARGBTORGB565 + MEMACCESS(0) + "st1 {v0.16b}, [%0], #16 \n" // store 8 pixels RGB565. + "b.gt 1b \n" + : "+r"(dst_rgb) // %0 + : "r"(src_argb), // %1 + "r"(dither4), // %2 + "r"(width) // %3 + : "cc", "memory", "v0", "v1", "v20", "v21", "v22", "v23" + ); +} +#endif // HAS_ARGBTORGB565ROW_NEON + #ifdef HAS_ARGBTOARGB1555ROW_NEON void ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_argb1555, int pix) {