From 14b46c15a21e8009544c5b945b76a8e9189f5f49 Mon Sep 17 00:00:00 2001 From: "fbarchard@google.com" Date: Wed, 3 Dec 2014 19:45:20 +0000 Subject: [PATCH] psnr utility aarch64 bit version to fix build error on ios and optimize for 64 bit. BUG=383 TESTED=try bots R=johannkoenig@google.com Review URL: https://webrtc-codereview.appspot.com/30349004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@1189 16f28f9a-4ce2-e073-06de-1de4eb20be90 --- README.chromium | 2 +- include/libyuv/version.h | 2 +- util/psnr.cc | 42 +++++++++++++++++++++++++++++++++++++++- 3 files changed, 43 insertions(+), 3 deletions(-) diff --git a/README.chromium b/README.chromium index 7b759bf48..3cefb12d8 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1185 +Version: 1188 License: BSD License File: LICENSE diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 8ef03888e..13c1e8ae4 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1185 +#define LIBYUV_VERSION 1188 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/util/psnr.cc b/util/psnr.cc index f3cc0cf8d..01f51144f 100644 --- a/util/psnr.cc +++ b/util/psnr.cc @@ -35,7 +35,8 @@ typedef unsigned long long uint64; // NOLINT // libyuv provides this function when linking library for jpeg support. #if !defined(HAVE_JPEG) -#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) +#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) && \ + !defined(__aarch64__) #define HAS_SUMSQUAREERROR_NEON static uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) { @@ -72,6 +73,45 @@ static uint32 SumSquareError_NEON(const uint8* src_a, : "memory", "cc", "q0", "q1", "q2", "q3", "q7", "q8", "q9", "q10"); return sse; } +#elif !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) +#define HAS_SUMSQUAREERROR_NEON +static uint32 SumSquareError_NEON(const uint8* src_a, + const uint8* src_b, int count) { + volatile uint32 sse; + asm volatile ( + "eor v16.16b, v16.16b, v16.16b \n" + "eor v18.16b, v18.16b, v18.16b \n" + "eor v17.16b, v17.16b, v17.16b \n" + "eor v19.16b, v19.16b, v19.16b \n" + + ".p2align 2 \n" + "1: \n" + MEMACCESS(0) + "ld1 {v0.16b}, [%0], #16 \n" + MEMACCESS(1) + "ld1 {v1.16b}, [%1], #16 \n" + "subs %2, %2, #16 \n" + "usubl v2.8h, v0.8b, v1.8b \n" + "usubl2 v3.8h, v0.16b, v1.16b \n" + "smlal v16.4s, v2.4h, v2.4h \n" + "smlal v17.4s, v3.4h, v3.4h \n" + "smlal2 v18.4s, v2.8h, v2.8h \n" + "smlal2 v19.4s, v3.8h, v3.8h \n" + "b.gt 1b \n" + + "add v16.4s, v16.4s, v17.4s \n" + "add v18.4s, v18.4s, v19.4s \n" + "add v19.4s, v16.4s, v18.4s \n" + "addv s0, v19.4s \n" + "fmov %w3, s0 \n" + : "+r"(src_a), + "+r"(src_b), + "+r"(count), + "=r"(sse) + : + : "cc", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19"); + return sse; +} #elif !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) #define HAS_SUMSQUAREERROR_SSE2 __declspec(naked)