From d459f382fd321295a42db481eee9970034409bde Mon Sep 17 00:00:00 2001 From: "fbarchard@google.com" Date: Wed, 14 Aug 2013 17:58:33 +0000 Subject: [PATCH] Align branch targets for NaCL validator. BUG=253 TEST=validator R=johannkoenig@google.com, ryanpetrie@google.com Review URL: https://webrtc-codereview.appspot.com/2033004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@758 16f28f9a-4ce2-e073-06de-1de4eb20be90 --- README.chromium | 2 +- include/libyuv/version.h | 2 +- source/scale.cc | 4 ++++ source/scale_argb.cc | 18 +++++++++++++++++- 4 files changed, 23 insertions(+), 3 deletions(-) diff --git a/README.chromium b/README.chromium index bdcd05bf7..631570605 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 757 +Version: 758 License: BSD License File: LICENSE diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 598d1a07e..b94dd33e9 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 757 +#define LIBYUV_VERSION 758 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/scale.cc b/source/scale.cc index 99c528b52..bf473b54b 100644 --- a/source/scale.cc +++ b/source/scale.cc @@ -781,6 +781,8 @@ static void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, paddusw xmm1, xmm3 sub ebp, 1 jg yloop + + align 16 ydone: movdqa [edi], xmm0 movdqa [edi + 16], xmm1 @@ -1358,6 +1360,7 @@ static void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, "mov %5,%2 \n" "test %2,%2 \n" "je 3f \n" + ".p2align 4 \n" "2: \n" "movdqa (%0),%%xmm2 \n" "add %6,%0 \n" @@ -1368,6 +1371,7 @@ static void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, "paddusw %%xmm3,%%xmm1 \n" "sub $0x1,%2 \n" "jg 2b \n" + ".p2align 4 \n" "3: \n" "movdqa %%xmm0,(%1) \n" "movdqa %%xmm1,0x10(%1) \n" diff --git a/source/scale_argb.cc b/source/scale_argb.cc index 7d87b86c7..a0f23d719 100644 --- a/source/scale_argb.cc +++ b/source/scale_argb.cc @@ -236,6 +236,8 @@ static void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb, lea edi, [edi + 8] sub ecx, 2 // 2 pixels jge xloop2 + + align 16 xloop29: add ecx, 2 - 1 @@ -244,6 +246,8 @@ static void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb, // 1 pixel remainder movd xmm0, qword ptr [esi + eax * 4] // 1 source x0 pixels movd [edi], xmm0 + + align 16 xloop99: pop edi @@ -313,6 +317,8 @@ static void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb, lea edi, [edi + 8] sub ecx, 2 // 2 pixels jge xloop2 + + align 16 xloop29: add ecx, 2 - 1 @@ -328,6 +334,8 @@ static void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb, psrlw xmm0, 7 packuswb xmm0, xmm0 // argb 8 bits, 1 pixel. movd [edi], xmm0 + + align 16 xloop99: pop edi @@ -485,6 +493,8 @@ static void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb, } #define HAS_SCALEARGBCOLS_SSE2 +// TODO(fbarchard): p2align 5 is for nacl branch targets. Reduce using +// pseudoop, bundle or macro. static void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb, int dst_width, int x, int dx) { intptr_t x0 = 0, x1 = 0; @@ -501,7 +511,7 @@ static void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb, "paddd %%xmm3,%%xmm3 \n" "pextrw $0x3,%%xmm2,%k4 \n" - ".p2align 4 \n" + ".p2align 5 \n" "2: \n" "paddd %%xmm3,%%xmm2 \n" "movd (%1,%3,4),%%xmm0 \n" @@ -514,11 +524,14 @@ static void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb, "sub $0x2,%2 \n" "jge 2b \n" + ".p2align 5 \n" "29: \n" "add $0x1,%2 \n" "jl 99f \n" "movd (%1,%3,4),%%xmm0 \n" "movd %%xmm0,(%0) \n" + + ".p2align 5 \n" "99: \n" : "+r"(dst_argb), // %0 "+r"(src_argb), // %1 @@ -593,6 +606,7 @@ static void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb, "sub $0x2,%2 \n" "jge 2b \n" + ".p2align 4 \n" "29: \n" "add $0x1,%2 \n" "jl 99f \n" @@ -605,6 +619,8 @@ static void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb, "psrlw $0x7,%%xmm0 \n" "packuswb %%xmm0,%%xmm0 \n" "movd %%xmm0,(%0) \n" + + ".p2align 4 \n" "99: \n" : "+r"(dst_argb), // %0 "+r"(src_argb), // %1