diff --git a/README.chromium b/README.chromium index 9e3a46c71..c460a7b3f 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1593 +Version: 1594 License: BSD License File: LICENSE diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 1fc63dcfc..72a751740 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1593 +#define LIBYUV_VERSION 1594 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/libyuv.gyp b/libyuv.gyp index ed7c40c7c..db4b54904 100644 --- a/libyuv.gyp +++ b/libyuv.gyp @@ -61,6 +61,7 @@ '-mfpu=vfp', '-mfpu=vfpv3', '-mfpu=vfpv3-d16', + # '-mthumb', # arm32 not thumb ], 'conditions': [ # Disable LTO in libyuv_neon target due to gcc 4.9 compiler bug. @@ -74,6 +75,7 @@ ['target_arch != "arm64"', { 'cflags': [ '-mfpu=neon', + # '-marm', # arm32 not thumb ], }], ], diff --git a/source/planar_functions.cc b/source/planar_functions.cc index b1b1f2c83..237ab6831 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -2404,8 +2404,8 @@ int ARGBExtractAlpha(const uint8* src_argb, int src_stride, #endif #if defined(HAS_ARGBEXTRACTALPHAROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { - ARGBExtractAlphaRow = IS_ALIGNED(width, 8) ? ARGBExtractAlphaRow_NEON - : ARGBExtractAlphaRow_Any_NEON; + ARGBExtractAlphaRow = IS_ALIGNED(width, 16) ? ARGBExtractAlphaRow_NEON + : ARGBExtractAlphaRow_Any_NEON; } #endif diff --git a/source/row_any.cc b/source/row_any.cc index 94ae0edd7..494164fd0 100644 --- a/source/row_any.cc +++ b/source/row_any.cc @@ -470,7 +470,7 @@ ANY11(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, 0, 4, 4, 7) ANY11(ARGBExtractAlphaRow_Any_SSE2, ARGBExtractAlphaRow_SSE2, 0, 4, 1, 7) #endif #ifdef HAS_ARGBEXTRACTALPHAROW_NEON -ANY11(ARGBExtractAlphaRow_Any_NEON, ARGBExtractAlphaRow_NEON, 0, 4, 1, 7) +ANY11(ARGBExtractAlphaRow_Any_NEON, ARGBExtractAlphaRow_NEON, 0, 4, 1, 15) #endif #undef ANY11 diff --git a/source/row_neon.cc b/source/row_neon.cc index 7574cee85..9e60237ec 100644 --- a/source/row_neon.cc +++ b/source/row_neon.cc @@ -1302,16 +1302,17 @@ void ARGBExtractAlphaRow_NEON(const uint8* src_argb, uint8* dst_a, int width) { asm volatile ( "1: \n" MEMACCESS(0) - "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load row 8 pixels - "subs %2, %2, #8 \n" // 8 processed per loop + "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels + "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels + "subs %2, %2, #16 \n" // 16 processed per loop MEMACCESS(1) - "vst1.8 {d3}, [%1]! \n" // store 8 A's. + "vst1.8 {q3}, [%1]! \n" // store 16 A's. "bgt 1b \n" : "+r"(src_argb), // %0 "+r"(dst_a), // %1 "+r"(width) // %2 : - : "cc", "memory", "d0", "d1", "d2", "d3" // Clobber List + : "cc", "memory", "q0", "q1", "q2", "q3" // Clobber List ); } diff --git a/source/row_neon64.cc b/source/row_neon64.cc index e5f2dc8f3..80e1515b2 100644 --- a/source/row_neon64.cc +++ b/source/row_neon64.cc @@ -450,7 +450,6 @@ void I422ToARGB4444Row_NEON(const uint8* src_y, void I400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int width) { - int64 width64 = (int64)(width); asm volatile ( YUVTORGB_SETUP "movi v23.8b, #255 \n" @@ -463,7 +462,7 @@ void I400ToARGBRow_NEON(const uint8* src_y, "b.gt 1b \n" : "+r"(src_y), // %0 "+r"(dst_argb), // %1 - "+r"(width64) // %2 + "+r"(width) // %2 : [kUVToRB]"r"(&kYuvI601Constants.kUVToRB), [kUVToG]"r"(&kYuvI601Constants.kUVToG), [kUVBiasBGR]"r"(&kYuvI601Constants.kUVBiasBGR), @@ -1404,10 +1403,10 @@ void ARGBExtractAlphaRow_NEON(const uint8* src_argb, uint8* dst_a, int width) { asm volatile ( "1: \n" MEMACCESS(0) - "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load row 8 pixels - "subs %w2, %w2, #8 \n" // 8 processed per loop + "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load row 16 pixels + "subs %w2, %w2, #16 \n" // 16 processed per loop MEMACCESS(1) - "st1 {v3.8b}, [%1], #8 \n" // store 8 A's. + "st1 {v3.16b}, [%1], #16 \n" // store 16 A's. "b.gt 1b \n" : "+r"(src_argb), // %0 "+r"(dst_a), // %1