diff --git a/DEPS b/DEPS index eafc459c3..c4c0ac7ec 100644 --- a/DEPS +++ b/DEPS @@ -78,6 +78,22 @@ deps_os = { "third_party/gold": From("chromium_deps", "src/third_party/gold"), }, + "ios": { + # NSS, for SSLClientSocketNSS. + "third_party/nss": + From("chromium_deps", "src/third_party/nss"), + + "net/third_party/nss": + Var("chromium_trunk") + "/src/net/third_party/nss@" + Var("chromium_revision"), + + # class-dump utility to generate header files for undocumented SDKs. + "testing/iossim/third_party/class-dump": + From("chromium_deps", "src/testing/iossim/third_party/class-dump"), + + # Helper for running under the simulator. + "testing/iossim": + Var("chromium_trunk") + "/src/testing/iossim@" + Var("chromium_revision"), + }, } hooks = [ diff --git a/README.chromium b/README.chromium index a3b9484d2..0a0bf7c2b 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 825 +Version: 826 License: BSD License File: LICENSE diff --git a/include/libyuv/planar_functions.h b/include/libyuv/planar_functions.h index 322f0ccd7..f2d82cf7f 100644 --- a/include/libyuv/planar_functions.h +++ b/include/libyuv/planar_functions.h @@ -351,8 +351,9 @@ int ARGBComputeCumulativeSum(const uint8* src_argb, int src_stride_argb, int width, int height); // Blur ARGB image. -// Caller should allocate dst_cumsum table of width * height * 16 bytes aligned -// to 16 byte boundary. +// dst_cumsum table of width * height * 16 bytes aligned to 16 byte boundary. +// dst_stride32_cumsum is number of ints in a row (width * 4). +// radius is number of pixels around the center. e.g. 1 = 3x3. 2=5x5. LIBYUV_API int ARGBBlur(const uint8* src_argb, int src_stride_argb, uint8* dst_argb, int dst_stride_argb, diff --git a/include/libyuv/row.h b/include/libyuv/row.h index 8be9ba146..44c3cffd1 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -70,6 +70,7 @@ extern "C" { #define HAS_ARGBSHUFFLEROW_SSE2 #define HAS_ARGBSHUFFLEROW_SSSE3 #define HAS_ARGBTOBAYERROW_SSSE3 +#define HAS_ARGBTOBAYERGGROW_SSE2 #define HAS_ARGBTOUV422ROW_SSSE3 #define HAS_ARGBTOUV444ROW_SSSE3 #define HAS_ARGBTOUVJROW_SSSE3 @@ -156,7 +157,9 @@ extern "C" { // to __native_client__ to test. #if !defined(LIBYUV_DISABLE_X86) && \ ((defined(_M_IX86) && defined(_MSC_VER) && _MSC_VER >= 1700) || \ - defined(__native_client__AVX2) || defined(__clang__) || defined(GCC_HAS_AVX2)) + ((defined(__x86_64__) || defined(__i386__)) && \ + (defined(__native_client__AVX2) || defined(__clang__) || \ + defined(GCC_HAS_AVX2)))) // Effects: #define HAS_ARGBPOLYNOMIALROW_AVX2 #define HAS_ARGBSHUFFLEROW_AVX2 @@ -1398,8 +1401,16 @@ void ARGBToBayerRow_Any_SSSE3(const uint8* src_argb, uint8* dst_bayer, uint32 selector, int pix); void ARGBToBayerRow_Any_NEON(const uint8* src_argb, uint8* dst_bayer, uint32 selector, int pix); +void ARGBToBayerGGRow_C(const uint8* src_argb, uint8* dst_bayer, + uint32 /* selector */, int pix); +void ARGBToBayerGGRow_SSE2(const uint8* src_argb, uint8* dst_bayer, + uint32 /* selector */, int pix); void ARGBToBayerGGRow_NEON(const uint8* src_argb, uint8* dst_bayer, uint32 /* selector */, int pix); +void ARGBToBayerGGRow_Any_SSE2(const uint8* src_argb, uint8* dst_bayer, + uint32 /* selector */, int pix); +void ARGBToBayerGGRow_Any_NEON(const uint8* src_argb, uint8* dst_bayer, + uint32 /* selector */, int pix); void I422ToYUY2Row_C(const uint8* src_y, const uint8* src_u, diff --git a/include/libyuv/version.h b/include/libyuv/version.h index aa63e9950..f1808d2d1 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 825 +#define LIBYUV_VERSION 826 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/planar_functions.cc b/source/planar_functions.cc index 066d165ed..6880de2e2 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -1849,6 +1849,15 @@ static int ARGBSobelize(const uint8* src_argb, int src_stride_argb, // ARGBToBayer used to select G channel from ARGB. void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer, uint32 selector, int pix) = ARGBToBayerRow_C; +#if defined(HAS_ARGBTOBAYERGGROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2) && width >= 8 && + IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) { + ARGBToBayerRow = ARGBToBayerGGRow_Any_SSE2; + if (IS_ALIGNED(width, 8)) { + ARGBToBayerRow = ARGBToBayerGGRow_SSE2; + } + } +#endif #if defined(HAS_ARGBTOBAYERROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3) && width >= 8 && IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) { @@ -1857,11 +1866,12 @@ static int ARGBSobelize(const uint8* src_argb, int src_stride_argb, ARGBToBayerRow = ARGBToBayerRow_SSSE3; } } -#elif defined(HAS_ARGBTOBAYERROW_NEON) +#endif +#if defined(HAS_ARGBTOBAYERGGROW_NEON) if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - ARGBToBayerRow = ARGBToBayerRow_Any_NEON; + ARGBToBayerRow = ARGBToBayerGGRow_Any_NEON; if (IS_ALIGNED(width, 8)) { - ARGBToBayerRow = ARGBToBayerRow_NEON; + ARGBToBayerRow = ARGBToBayerGGRow_NEON; } } #endif diff --git a/source/row_any.cc b/source/row_any.cc index d04673574..90c6a3ff5 100644 --- a/source/row_any.cc +++ b/source/row_any.cc @@ -199,6 +199,15 @@ BAYERANY(ARGBToBayerRow_Any_SSSE3, ARGBToBayerRow_SSSE3, ARGBToBayerRow_C, BAYERANY(ARGBToBayerRow_Any_NEON, ARGBToBayerRow_NEON, ARGBToBayerRow_C, 7, 4, 1) #endif +#if defined(HAS_ARGBTOBAYERGGROW_SSE2) +BAYERANY(ARGBToBayerGGRow_Any_SSE2, ARGBToBayerGGRow_SSE2, ARGBToBayerGGRow_C, + 7, 4, 1) +#endif +#if defined(HAS_ARGBTOBAYERGGROW_NEON) +BAYERANY(ARGBToBayerGGRow_Any_NEON, ARGBToBayerGGRow_NEON, ARGBToBayerGGRow_C, + 7, 4, 1) +#endif + #undef BAYERANY // RGB/YUV to Y does multiple of 16 with SIMD and last 16 with SIMD. diff --git a/source/row_common.cc b/source/row_common.cc index 95d548665..4f3d937a8 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -1861,6 +1861,21 @@ void ARGBToBayerRow_C(const uint8* src_argb, } } +// Select G channel from ARGB. e.g. GGGGGGGG +void ARGBToBayerGGRow_C(const uint8* src_argb, + uint8* dst_bayer, uint32 /*selector*/, int pix) { + // Copy a row of Bayer. + for (int x = 0; x < pix - 1; x += 2) { + dst_bayer[0] = src_argb[1]; + dst_bayer[1] = src_argb[5]; + src_argb += 8; + dst_bayer += 2; + } + if (pix & 1) { + dst_bayer[0] = src_argb[1]; + } +} + // Use first 4 shuffler values to reorder ARGB channels. void ARGBShuffleRow_C(const uint8* src_argb, uint8* dst_argb, const uint8* shuffler, int pix) { diff --git a/source/row_neon.cc b/source/row_neon.cc index 0d84786c9..cd76f68bf 100644 --- a/source/row_neon.cc +++ b/source/row_neon.cc @@ -1193,6 +1193,23 @@ void ARGBToBayerRow_NEON(const uint8* src_argb, uint8* dst_bayer, ); } +// Select G channels from ARGB. e.g. GGGGGGGG +void ARGBToBayerGGRow_NEON(const uint8* src_argb, uint8* dst_bayer, + uint32 /*selector*/, int pix) { + asm volatile ( + "1: \n" + "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load row 8 pixels. + "subs %2, %2, #8 \n" // 8 processed per loop + "vst1.8 {d1}, [%1]! \n" // store 8 G's. + "bgt 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_bayer), // %1 + "+r"(pix) // %2 + : + : "cc", "memory", "q0", "q1" // Clobber List + ); +} + // For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA. void ARGBShuffleRow_NEON(const uint8* src_argb, uint8* dst_argb, const uint8* shuffler, int pix) { diff --git a/source/row_posix.cc b/source/row_posix.cc index e41bdd114..de1eac1cf 100644 --- a/source/row_posix.cc +++ b/source/row_posix.cc @@ -5721,6 +5721,39 @@ void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer, } #endif // HAS_ARGBTOBAYERROW_SSSE3 +#ifdef HAS_ARGBTOBAYERGGROW_SSE2 +void ARGBToBayerGGRow_SSE2(const uint8* src_argb, uint8* dst_bayer, + uint32 selector, int pix) { + asm volatile ( + "pcmpeqb %%xmm5,%%xmm5 \n" + "psrld $0x18,%%xmm5 \n" + ".p2align 4 \n" + "1: \n" + "movdqa " MEMACCESS(0) ",%%xmm0 \n" + "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n" + "lea " MEMLEA(0x20,0) ",%0 \n" + "psrld $0x8,%%xmm0 \n" + "psrld $0x8,%%xmm1 \n" + "pand %%xmm5,%%xmm0 \n" + "pand %%xmm5,%%xmm1 \n" + "packusdw %%xmm1,%%xmm0 \n" + "packuswb %%xmm1,%%xmm0 \n" + "sub $0x8,%2 \n" + "movq %%xmm0," MEMACCESS(1) " \n" + "lea " MEMLEA(0x8,1) ",%1 \n" + "jg 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_bayer), // %1 + "+r"(pix) // %2 + : + : "memory", "cc" +#if defined(__SSE2__) + , "xmm0", "xmm1", "xmm5" +#endif + ); +} +#endif // HAS_ARGBTOBAYERGGROW_SSE2 + #ifdef HAS_ARGBSHUFFLEROW_SSSE3 // For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA. void ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb, diff --git a/source/row_win.cc b/source/row_win.cc index 2fb8f3414..1540a7faf 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -6718,6 +6718,37 @@ void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer, } } +// Specialized ARGB to Bayer that just isolates G channel. +__declspec(naked) __declspec(align(16)) +void ARGBToBayerGGRow_SSE2(const uint8* src_argb, uint8* dst_bayer, + uint32 selector, int pix) { + __asm { + mov eax, [esp + 4] // src_argb + mov edx, [esp + 8] // dst_bayer + // selector + mov ecx, [esp + 16] // pix + pcmpeqb xmm5, xmm5 // generate mask 0x000000ff + psrld xmm5, 24 + + align 16 + wloop: + movdqa xmm0, [eax] + movdqa xmm1, [eax + 16] + lea eax, [eax + 32] + psrld xmm0, 8 // Move green to bottom. + psrld xmm1, 8 + pand xmm0, xmm5 + pand xmm1, xmm5 + packusdw xmm0, xmm1 + packuswb xmm0, xmm1 + sub ecx, 8 + movq qword ptr [edx], xmm0 + lea edx, [edx + 8] + jg wloop + ret + } +} + // For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA. __declspec(naked) __declspec(align(16)) void ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb,