diff --git a/README.chromium b/README.chromium index 06c1c70d1..6a7da8db4 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 845 +Version: 846 License: BSD License File: LICENSE diff --git a/include/libyuv/planar_functions.h b/include/libyuv/planar_functions.h index 6b0dae855..22650f2ad 100644 --- a/include/libyuv/planar_functions.h +++ b/include/libyuv/planar_functions.h @@ -355,6 +355,7 @@ int ARGBComputeCumulativeSum(const uint8* src_argb, int src_stride_argb, // dst_cumsum table of width * height * 16 bytes aligned to 16 byte boundary. // dst_stride32_cumsum is number of ints in a row (width * 4). // radius is number of pixels around the center. e.g. 1 = 3x3. 2=5x5. +// Blur is optimized for radius of 5 (11x11) or less. LIBYUV_API int ARGBBlur(const uint8* src_argb, int src_stride_argb, uint8* dst_argb, int dst_stride_argb, diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 436574664..ad6b5253d 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 845 +#define LIBYUV_VERSION 846 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/scale_argb.cc b/source/scale_argb.cc index f224761fb..21ed8bcb9 100644 --- a/source/scale_argb.cc +++ b/source/scale_argb.cc @@ -371,7 +371,6 @@ static void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb, // Reads 4 pixels, duplicates them and writes 8 pixels. // Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned. -#define HAS_SCALEARGBCOLSUP2_SSE2 __declspec(naked) __declspec(align(16)) void ScaleARGBColsUp2_SSE2(uint8* dst_argb, const uint8* src_argb, int dst_width, int /* x */, int /* dx */) { @@ -675,6 +674,39 @@ void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb, ); } +// Reads 4 pixels, duplicates them and writes 8 pixels. +// Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned. +void ScaleARGBColsUp2_SSE2(uint8* dst_argb, const uint8* src_argb, + int dst_width, int /* x */, int /* dx */) { + asm volatile ( + ".p2align 4 \n" + BUNDLEALIGN + "1: \n" + "movdqa " MEMACCESS(1) ",%%xmm0 \n" + "lea " MEMLEA(0x10,1) ",%1 \n" + "movdqa %%xmm0,%%xmm1 \n" + "punpckldq %%xmm0,%%xmm0 \n" + "punpckhdq %%xmm1,%%xmm1 \n" + "sub $0x8,%2 \n" + "movdqa %%xmm0," MEMACCESS(0) " \n" + "movdqa %%xmm1," MEMACCESS2(0x10,0) " \n" + "lea " MEMLEA(0x20,0) ",%0 \n" + "jg 1b \n" + + : "+r"(dst_argb), // %0 + "+r"(src_argb), // %1 + "+r"(dst_width) // %2 + : + : "memory", "cc" +#if defined(__native_client__) && defined(__x86_64__) + , "r14" +#endif +#if defined(__SSE2__) + , "xmm0", "xmm1" +#endif + ); +} + // Shuffle table for arranging 2 pixels into pairs for pmaddubsw static uvec8 kShuffleColARGB = { 0u, 4u, 1u, 5u, 2u, 6u, 3u, 7u, // bbggrraa 1st pixel @@ -1363,14 +1395,12 @@ static void ScaleARGBSimple(int src_width, int src_height, #if defined(HAS_SCALEARGBCOLS_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { ScaleARGBCols = ScaleARGBCols_SSE2; -#if defined(HAS_SCALEARGBCOLS_SSE2) if (src_width * 2 == dst_width && IS_ALIGNED(dst_width, 8) && (x >> 16) == 0 && IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) && IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) { ScaleARGBCols = ScaleARGBColsUp2_SSE2; } -#endif } #endif