From 0d3bfab6db10762ab162b1f1cd80cd9ca3f81916 Mon Sep 17 00:00:00 2001 From: "yang.zhang@arm.com" Date: Tue, 24 Mar 2015 08:02:30 +0000 Subject: [PATCH] Add nacl macros to ScaleFilterCols_NEON on ARM32/64 platform Add the nacl macros to ARM functions. If not, a bunch of code is failing to validate. BUG=319 TESTED=libyuvTest.* on ARM32/64 with Android R=fbarchard@google.com Change-Id: I7a36434f18e0de8b8f8a9fe01167bfe50cff8962 Review URL: https://webrtc-codereview.appspot.com/47739004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@1343 16f28f9a-4ce2-e073-06de-1de4eb20be90 --- source/scale_neon.cc | 25 +++++++++++++++---------- source/scale_neon64.cc | 25 +++++++++++++++---------- 2 files changed, 30 insertions(+), 20 deletions(-) diff --git a/source/scale_neon.cc b/source/scale_neon.cc index 43b8b5aee..25098fc14 100644 --- a/source/scale_neon.cc +++ b/source/scale_neon.cc @@ -579,13 +579,16 @@ void ScaleAddRows_NEON(const uint8* src_ptr, ptrdiff_t src_stride, // the x/dx stepping #define LOAD2_DATA8_LANE(n) \ "lsr %5, %3, #16 \n" \ - "add r12, %1, %5 \n" \ + "add %6, %1, %5 \n" \ "add %3, %3, %4 \n" \ - "vld2.8 {d6["#n"], d7["#n"]}, [r12] \n" + MEMACCESS(6) \ + "vld2.8 {d6["#n"], d7["#n"]}, [%6] \n" void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr, int dst_width, int x, int dx) { - int tmp[4] = {0, 1, 2, 3}; + int dx_offset[4] = {0, 1, 2, 3}; + int *tmp = dx_offset; + const uint8* src_tmp = src_ptr; asm volatile ( ".p2align 2 \n" "vdup.32 q0, %3 \n" // x @@ -629,13 +632,15 @@ void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr, "vadd.s32 q2, q2, q0 \n" "subs %2, %2, #8 \n" // 8 processed per loop "bgt 1b \n" - : "+r"(dst_ptr) // %0 - : "r"(src_ptr), // %1 - "r"(dst_width), // %2 - "r"(x), // %3 - "r"(dx), // %4 - "r"(tmp) // %5 - : "memory", "cc", "r12", "q0", "q1", "q2", "q3", + : "+r"(dst_ptr), // %0 + "+r"(src_ptr), // %1 + "+r"(dst_width), // %2 + "+r"(x), // %3 + "+r"(dx), // %4 + "+r"(tmp), // %5 + "+r"(src_tmp) // %6 + : + : "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "q12", "q13" ); } diff --git a/source/scale_neon64.cc b/source/scale_neon64.cc index faa859f7d..9f1cac490 100644 --- a/source/scale_neon64.cc +++ b/source/scale_neon64.cc @@ -582,13 +582,16 @@ void ScaleAddRows_NEON(const uint8* src_ptr, ptrdiff_t src_stride, // the x/dx stepping #define LOAD2_DATA8_LANE(n) \ "lsr %5, %3, #16 \n" \ - "add x12, %1, %5 \n" \ + "add %6, %1, %5 \n" \ "add %3, %3, %4 \n" \ - "ld2 {v4.b, v5.b}["#n"], [x12] \n" + MEMACCESS(6) \ + "ld2 {v4.b, v5.b}["#n"], [%6] \n" void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr, int dst_width, int x, int dx) { - int tmp[4] = {0, 1, 2, 3}; + int dx_offset[4] = {0, 1, 2, 3}; + int *tmp = dx_offset; + const uint8* src_tmp = src_ptr; asm volatile ( "dup v0.4s, %w3 \n" // x "dup v1.4s, %w4 \n" // dx @@ -631,13 +634,15 @@ void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr, "add v2.4s, v2.4s, v0.4s \n" "subs %2, %2, #8 \n" // 8 processed per loop "b.gt 1b \n" - : "+r"(dst_ptr) // %0 - : "r"(src_ptr), // %1 - "r"(dst_width), // %2 - "r"(static_cast(x)), // %3 - "r"(static_cast(dx)), // %4 - "r"(tmp) // %5 - : "memory", "cc", "x12", "v0", "v1", "v2", "v3", + : "+r"(dst_ptr), // %0 + "+r"(src_ptr), // %1 + "+r"(dst_width), // %2 + "+r"(x), // %3 + "+r"(dx), // %4 + "+r"(tmp), // %5 + "+r"(src_tmp) // %6 + : + : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17" ); }