From 0b9a65b0dcdcc433fa05ea33b476095f246f05b7 Mon Sep 17 00:00:00 2001 From: "fbarchard@google.com" Date: Fri, 24 Feb 2012 21:17:17 +0000 Subject: [PATCH] CopyRow_Neon reimplemented with vldm BUG=none TEST=none Review URL: https://webrtc-codereview.appspot.com/412001 git-svn-id: http://libyuv.googlecode.com/svn/trunk@190 16f28f9a-4ce2-e073-06de-1de4eb20be90 --- README.chromium | 2 +- include/libyuv/version.h | 2 +- source/row.h | 6 +----- source/row_neon.cc | 8 ++++---- 4 files changed, 7 insertions(+), 11 deletions(-) diff --git a/README.chromium b/README.chromium index f2870a4f3..a6e13d7fb 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 189 +Version: 190 License: BSD License File: LICENSE diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 1005874e3..14be9f165 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,7 +11,7 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 189 +#define LIBYUV_VERSION 190 #endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/source/row.h b/source/row.h index db8bfeb3a..f90eb2473 100644 --- a/source/row.h +++ b/source/row.h @@ -67,16 +67,12 @@ extern "C" { #if defined(__ARM_NEON__) && !defined(YUV_DISABLE_ASM) #define HAS_MIRRORROW_NEON #define HAS_SPLITUV_NEON +#define HAS_COPYROW_NEON #define HAS_I420TOARGBROW_NEON #define HAS_I420TOBGRAROW_NEON #define HAS_I420TOABGRROW_NEON #endif -// The following are available on Neon platforms -#if defined(__ARM_NEON__) && !defined(__thumb__) && !defined(YUV_DISABLE_ASM) -#define HAS_COPYROW_NEON -#endif - #if defined(_MSC_VER) #define SIMD_ALIGNED(var) __declspec(align(16)) var typedef __declspec(align(16)) signed char vec8[16]; diff --git a/source/row_neon.cc b/source/row_neon.cc index e21b4c054..afa98bdbf 100644 --- a/source/row_neon.cc +++ b/source/row_neon.cc @@ -187,15 +187,15 @@ void SplitUV_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) { #endif #if defined(HAS_COPYROW_NEON) -// TODO(fbarchard): Test with and without pld -// "pld [%0, #0xC0] \n" // preload +// TODO(fbarchard): Test without pld on NexusS // Copy multiple of 64 void CopyRow_NEON(const uint8* src, uint8* dst, int count) { asm volatile ( "1: \n" - "vld1.u8 {q0,q1,q2,q3}, [%0]! \n" // load 64 + "pld [%0, #0xC0] \n" // preload + "vldm %0!,{q0,q1,q2,q3} \n" // load 64 "subs %2, %2, #64 \n" // 64 processed per loop - "vst1.u8 {q0,q1,q2,q3}, [%1]! \n" // store 64 + "vstm %1!,{q0,q1,q2,q3} \n" // store 64 "bhi 1b \n" : "+r"(src), // %0 "+r"(dst), // %1