mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2026-02-15 22:59:53 +08:00
rm for stride in addrows for clang on mac to not run out of registers
BUG=none TEST=none Review URL: https://webrtc-codereview.appspot.com/440001 git-svn-id: http://libyuv.googlecode.com/svn/trunk@204 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
965fb914ea
commit
9198f3754b
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: http://code.google.com/p/libyuv/
|
URL: http://code.google.com/p/libyuv/
|
||||||
Version: 203
|
Version: 204
|
||||||
License: BSD
|
License: BSD
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
|
|
||||||
|
|||||||
@ -11,7 +11,7 @@
|
|||||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||||
#define INCLUDE_LIBYUV_VERSION_H_
|
#define INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
#define LIBYUV_VERSION 203
|
#define LIBYUV_VERSION 204
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
|
|||||||
@ -749,7 +749,6 @@ static void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
|
|||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
static void TransposeWx8_C(const uint8* src, int src_stride,
|
static void TransposeWx8_C(const uint8* src, int src_stride,
|
||||||
uint8* dst, int dst_stride,
|
uint8* dst, int dst_stride,
|
||||||
int w) {
|
int w) {
|
||||||
|
|||||||
@ -198,10 +198,8 @@ void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
|
|||||||
asm volatile (
|
asm volatile (
|
||||||
// compute where to start writing destination
|
// compute where to start writing destination
|
||||||
"add %1, %2 \n"
|
"add %1, %2 \n"
|
||||||
|
|
||||||
// work on segments that are multiples of 16
|
// work on segments that are multiples of 16
|
||||||
"lsrs r3, %2, #4 \n"
|
"lsrs r3, %2, #4 \n"
|
||||||
|
|
||||||
// the output is written in two block. 8 bytes followed
|
// the output is written in two block. 8 bytes followed
|
||||||
// by another 8. reading is done sequentially, from left to
|
// by another 8. reading is done sequentially, from left to
|
||||||
// right. writing is done from right to left in block sizes
|
// right. writing is done from right to left in block sizes
|
||||||
@ -209,31 +207,26 @@ void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
|
|||||||
// the first of the two blocks. need to subtract that 8 off
|
// the first of the two blocks. need to subtract that 8 off
|
||||||
// along with 16 to get the next location.
|
// along with 16 to get the next location.
|
||||||
"mov r3, #-24 \n"
|
"mov r3, #-24 \n"
|
||||||
|
|
||||||
"beq 2f \n"
|
"beq 2f \n"
|
||||||
|
|
||||||
// back of destination by the size of the register that is
|
// back of destination by the size of the register that is
|
||||||
// going to be mirrord
|
// going to be mirrored
|
||||||
"sub %1, #16 \n"
|
"sub %1, #16 \n"
|
||||||
|
|
||||||
// the loop needs to run on blocks of 16. what will be left
|
// the loop needs to run on blocks of 16. what will be left
|
||||||
// over is either a negative number, the residuals that need
|
// over is either a negative number, the residuals that need
|
||||||
// to be done, or 0. if this isn't subtracted off here the
|
// to be done, or 0. if this isn't subtracted off here the
|
||||||
// loop will run one extra time.
|
// loop will run one extra time.
|
||||||
"sub %2, #16 \n"
|
"sub %2, #16 \n"
|
||||||
|
|
||||||
|
// mirror the bytes in the 64 bit segments. unable to mirror
|
||||||
|
// the bytes in the entire 128 bits in one go.
|
||||||
|
// because of the inability to mirror the entire 128 bits
|
||||||
|
// mirror the writing out of the two 64 bit segments.
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"vld1.8 {q0}, [%0]! \n" // src += 16
|
"vld1.8 {q0}, [%0]! \n" // src += 16
|
||||||
|
|
||||||
// mirror the bytes in the 64 bit segments. unable to mirror
|
|
||||||
// the bytes in the entire 128 bits in one go.
|
|
||||||
"vrev64.8 q0, q0 \n"
|
"vrev64.8 q0, q0 \n"
|
||||||
|
|
||||||
// because of the inability to mirror the entire 128 bits
|
|
||||||
// mirror the writing out of the two 64 bit segments.
|
|
||||||
"vst1.8 {d1}, [%1]! \n"
|
"vst1.8 {d1}, [%1]! \n"
|
||||||
"vst1.8 {d0}, [%1], r3 \n" // dst -= 16
|
"vst1.8 {d0}, [%1], r3 \n" // dst -= 16
|
||||||
|
|
||||||
"subs %2, #16 \n"
|
"subs %2, #16 \n"
|
||||||
"bge 1b \n"
|
"bge 1b \n"
|
||||||
|
|
||||||
@ -241,13 +234,9 @@ void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
|
|||||||
// residuals so jump past
|
// residuals so jump past
|
||||||
"adds %2, #16 \n"
|
"adds %2, #16 \n"
|
||||||
"beq 5f \n"
|
"beq 5f \n"
|
||||||
|
|
||||||
"add %1, #16 \n"
|
"add %1, #16 \n"
|
||||||
|
"2: \n"
|
||||||
"2: \n"
|
|
||||||
|
|
||||||
"mov r3, #-3 \n"
|
"mov r3, #-3 \n"
|
||||||
|
|
||||||
"sub %1, #2 \n"
|
"sub %1, #2 \n"
|
||||||
"subs %2, #2 \n"
|
"subs %2, #2 \n"
|
||||||
// check for 16*n+1 scenarios where segments_of_2 should not
|
// check for 16*n+1 scenarios where segments_of_2 should not
|
||||||
@ -256,24 +245,20 @@ void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
|
|||||||
|
|
||||||
// do this in neon registers as per
|
// do this in neon registers as per
|
||||||
// http://blogs.arm.com/software-enablement/196-coding-for-neon-part-2-dealing-with-leftovers/
|
// http://blogs.arm.com/software-enablement/196-coding-for-neon-part-2-dealing-with-leftovers/
|
||||||
"3: \n"
|
"3: \n"
|
||||||
"vld2.8 {d0[0], d1[0]}, [%0]! \n" // src += 2
|
"vld2.8 {d0[0], d1[0]}, [%0]! \n" // src += 2
|
||||||
|
|
||||||
"vst1.8 {d1[0]}, [%1]! \n"
|
"vst1.8 {d1[0]}, [%1]! \n"
|
||||||
"vst1.8 {d0[0]}, [%1], r3 \n" // dst -= 2
|
"vst1.8 {d0[0]}, [%1], r3 \n" // dst -= 2
|
||||||
|
|
||||||
"subs %2, #2 \n"
|
"subs %2, #2 \n"
|
||||||
"bge 3b \n"
|
"bge 3b \n"
|
||||||
|
|
||||||
"adds %2, #2 \n"
|
"adds %2, #2 \n"
|
||||||
"beq 5f \n"
|
"beq 5f \n"
|
||||||
|
"4: \n"
|
||||||
"4: \n"
|
|
||||||
"add %1, #1 \n"
|
"add %1, #1 \n"
|
||||||
"vld1.8 {d0[0]}, [%0] \n"
|
"vld1.8 {d0[0]}, [%0] \n"
|
||||||
"vst1.8 {d0[0]}, [%1] \n"
|
"vst1.8 {d0[0]}, [%1] \n"
|
||||||
|
"5: \n"
|
||||||
"5: \n"
|
|
||||||
: "+r"(src), // %0
|
: "+r"(src), // %0
|
||||||
"+r"(dst), // %1
|
"+r"(dst), // %1
|
||||||
"+r"(width) // %2
|
"+r"(width) // %2
|
||||||
@ -289,37 +274,29 @@ void MirrorRowUV_NEON(const uint8* src, uint8* dst_a, uint8* dst_b, int width) {
|
|||||||
// compute where to start writing destination
|
// compute where to start writing destination
|
||||||
"add %1, %3 \n" // dst_a + width
|
"add %1, %3 \n" // dst_a + width
|
||||||
"add %2, %3 \n" // dst_b + width
|
"add %2, %3 \n" // dst_b + width
|
||||||
|
|
||||||
// work on input segments that are multiples of 16, but
|
// work on input segments that are multiples of 16, but
|
||||||
// width that has been passed is output segments, half
|
// width that has been passed is output segments, half
|
||||||
// the size of input.
|
// the size of input.
|
||||||
"lsrs r12, %3, #3 \n"
|
"lsrs r12, %3, #3 \n"
|
||||||
|
|
||||||
"beq 2f \n"
|
"beq 2f \n"
|
||||||
|
|
||||||
// the output is written in to two blocks.
|
// the output is written in to two blocks.
|
||||||
"mov r12, #-8 \n"
|
"mov r12, #-8 \n"
|
||||||
|
|
||||||
// back of destination by the size of the register that is
|
// back of destination by the size of the register that is
|
||||||
// going to be mirrord
|
// going to be mirrord
|
||||||
"sub %1, #8 \n"
|
"sub %1, #8 \n"
|
||||||
"sub %2, #8 \n"
|
"sub %2, #8 \n"
|
||||||
|
|
||||||
// the loop needs to run on blocks of 8. what will be left
|
// the loop needs to run on blocks of 8. what will be left
|
||||||
// over is either a negative number, the residuals that need
|
// over is either a negative number, the residuals that need
|
||||||
// to be done, or 0. if this isn't subtracted off here the
|
// to be done, or 0. if this isn't subtracted off here the
|
||||||
// loop will run one extra time.
|
// loop will run one extra time.
|
||||||
"sub %3, #8 \n"
|
"sub %3, #8 \n"
|
||||||
|
|
||||||
|
// mirror the bytes in the 64 bit segments
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"vld2.8 {d0, d1}, [%0]! \n" // src += 16
|
"vld2.8 {d0, d1}, [%0]! \n" // src += 16
|
||||||
|
|
||||||
// mirror the bytes in the 64 bit segments
|
|
||||||
"vrev64.8 q0, q0 \n"
|
"vrev64.8 q0, q0 \n"
|
||||||
|
|
||||||
"vst1.8 {d0}, [%1], r12 \n" // dst_a -= 8
|
"vst1.8 {d0}, [%1], r12 \n" // dst_a -= 8
|
||||||
"vst1.8 {d1}, [%2], r12 \n" // dst_b -= 8
|
"vst1.8 {d1}, [%2], r12 \n" // dst_b -= 8
|
||||||
|
|
||||||
"subs %3, #8 \n"
|
"subs %3, #8 \n"
|
||||||
"bge 1b \n"
|
"bge 1b \n"
|
||||||
|
|
||||||
@ -327,26 +304,19 @@ void MirrorRowUV_NEON(const uint8* src, uint8* dst_a, uint8* dst_b, int width) {
|
|||||||
// residuals so return
|
// residuals so return
|
||||||
"adds %3, #8 \n"
|
"adds %3, #8 \n"
|
||||||
"beq 4f \n"
|
"beq 4f \n"
|
||||||
|
|
||||||
"add %1, #8 \n"
|
"add %1, #8 \n"
|
||||||
"add %2, #8 \n"
|
"add %2, #8 \n"
|
||||||
|
"2: \n"
|
||||||
"2: \n"
|
|
||||||
|
|
||||||
"mov r12, #-1 \n"
|
"mov r12, #-1 \n"
|
||||||
|
|
||||||
"sub %1, #1 \n"
|
"sub %1, #1 \n"
|
||||||
"sub %2, #1 \n"
|
"sub %2, #1 \n"
|
||||||
|
"3: \n"
|
||||||
"3: \n"
|
|
||||||
"vld2.8 {d0[0], d1[0]}, [%0]! \n" // src += 2
|
"vld2.8 {d0[0], d1[0]}, [%0]! \n" // src += 2
|
||||||
|
|
||||||
"vst1.8 {d0[0]}, [%1], r12 \n" // dst_a -= 1
|
"vst1.8 {d0[0]}, [%1], r12 \n" // dst_a -= 1
|
||||||
"vst1.8 {d1[0]}, [%2], r12 \n" // dst_b -= 1
|
"vst1.8 {d1[0]}, [%2], r12 \n" // dst_b -= 1
|
||||||
|
|
||||||
"subs %3, %3, #1 \n"
|
"subs %3, %3, #1 \n"
|
||||||
"bgt 3b \n"
|
"bgt 3b \n"
|
||||||
"4: \n"
|
"4: \n"
|
||||||
: "+r"(src), // %0
|
: "+r"(src), // %0
|
||||||
"+r"(dst_a), // %1
|
"+r"(dst_a), // %1
|
||||||
"+r"(dst_b), // %2
|
"+r"(dst_b), // %2
|
||||||
|
|||||||
@ -1731,7 +1731,7 @@ static void ScaleAddRows_SSE2(const uint8* src_ptr, int src_stride,
|
|||||||
"+r"(dst_ptr), // %1
|
"+r"(dst_ptr), // %1
|
||||||
"+r"(tmp_height), // %2
|
"+r"(tmp_height), // %2
|
||||||
"+r"(tmp_src), // %3
|
"+r"(tmp_src), // %3
|
||||||
"+r"(tmp_src_stride), // %4
|
"+rm"(tmp_src_stride), // %4
|
||||||
"+rm"(src_width), // %5
|
"+rm"(src_width), // %5
|
||||||
"+rm"(src_height) // %6
|
"+rm"(src_height) // %6
|
||||||
:
|
:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user