diff --git a/Android.bp b/Android.bp index 7d95a7865..fc6a81fc6 100644 --- a/Android.bp +++ b/Android.bp @@ -43,7 +43,6 @@ cc_library { "source/scale_neon.cc", "source/scale_neon64.cc", "source/video_common.cc", - "source/convert_jpeg.cc", "source/mjpeg_decoder.cc", "source/mjpeg_validate.cc", diff --git a/README.chromium b/README.chromium index c25373e23..a118c1330 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1703 +Version: 1704 License: BSD License File: LICENSE diff --git a/include/libyuv/row.h b/include/libyuv/row.h index 257cc76c6..0f320c718 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -277,6 +277,7 @@ extern "C" { #define HAS_ARGBTOAR30ROW_AVX2 #define HAS_ARGBTORAWROW_AVX2 #define HAS_ARGBTORGB24ROW_AVX2 +#define HAS_ARGBTORGB24ROW_AVX512VBMI #define HAS_CONVERT16TO8ROW_AVX2 #define HAS_CONVERT8TO16ROW_AVX2 #define HAS_I210TOAR30ROW_AVX2 @@ -1706,6 +1707,8 @@ void ARGBToAR30Row_SSSE3(const uint8_t* src, uint8_t* dst, int width); void ARGBToRAWRow_AVX2(const uint8_t* src, uint8_t* dst, int width); void ARGBToRGB24Row_AVX2(const uint8_t* src, uint8_t* dst, int width); +void ARGBToRGB24Row_AVX512VBMI(const uint8_t* src, uint8_t* dst, int width); + void ARGBToRGB565DitherRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, const uint32_t dither4, @@ -2497,12 +2500,13 @@ void ABGRToAR30Row_Any_SSSE3(const uint8_t* src_ptr, void ARGBToAR30Row_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void ARGBToRAWRow_Any_AVX2(const uint8_t* src_ptr, - uint8_t* dst_ptr, - int width); +void ARGBToRAWRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBToRGB24Row_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ARGBToRGB24Row_Any_AVX512VBMI(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); void ARGBToRGB565DitherRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, const uint32_t param, diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 21522cf35..94d9ea4dc 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1703 +#define LIBYUV_VERSION 1704 #endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/source/convert_from_argb.cc b/source/convert_from_argb.cc index 37adb5e73..2a36d6d83 100644 --- a/source/convert_from_argb.cc +++ b/source/convert_from_argb.cc @@ -887,6 +887,14 @@ int ARGBToRGB24(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTORGB24ROW_AVX512VBMI) + if (TestCpuFlag(kCpuHasAVX512VBMI)) { + ARGBToRGB24Row = ARGBToRGB24Row_Any_AVX512VBMI; + if (IS_ALIGNED(width, 32)) { + ARGBToRGB24Row = ARGBToRGB24Row_AVX512VBMI; + } + } +#endif #if defined(HAS_ARGBTORGB24ROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBToRGB24Row = ARGBToRGB24Row_Any_NEON; diff --git a/source/row_any.cc b/source/row_any.cc index 5040e3224..0983e2a1a 100644 --- a/source/row_any.cc +++ b/source/row_any.cc @@ -447,6 +447,9 @@ ANY11(ARGBToARGB4444Row_Any_SSE2, ARGBToARGB4444Row_SSE2, 0, 4, 2, 3) #if defined(HAS_ARGBTORGB24ROW_AVX2) ANY11(ARGBToRGB24Row_Any_AVX2, ARGBToRGB24Row_AVX2, 0, 4, 3, 31) #endif +#if defined(HAS_ARGBTORGB24ROW_AVX512VBMI) +ANY11(ARGBToRGB24Row_Any_AVX512VBMI, ARGBToRGB24Row_AVX512VBMI, 0, 4, 3, 31) +#endif #if defined(HAS_ARGBTORAWROW_AVX2) ANY11(ARGBToRAWRow_Any_AVX2, ARGBToRAWRow_AVX2, 0, 4, 3, 31) #endif diff --git a/source/row_gcc.cc b/source/row_gcc.cc index 640342395..13f4719c0 100644 --- a/source/row_gcc.cc +++ b/source/row_gcc.cc @@ -552,6 +552,54 @@ void ARGBToRGB24Row_AVX2(const uint8_t* src, uint8_t* dst, int width) { "xmm7"); } +// TODO(fbarchard): Detect compiler can di avx512 and add ifdefs + +// Shuffle table for converting ARGBToRGB24 +static const ulvec8 kPermARGBToRGB24_0 = { + 0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 10u, 12u, 13u, + 14u, 16u, 17u, 18u, 20u, 21u, 22u, 24u, 25u, 26u, 28u, + 29u, 30u, 32u, 33u, 34u, 36u, 37u, 38u, 40u, 41u}; +static const ulvec8 kPermARGBToRGB24_1 = { + 10u, 12u, 13u, 14u, 16u, 17u, 18u, 20u, 21u, 22u, 24u, + 25u, 26u, 28u, 29u, 30u, 32u, 33u, 34u, 36u, 37u, 38u, + 40u, 41u, 42u, 44u, 45u, 46u, 48u, 49u, 50u, 52u}; +static const ulvec8 kPermARGBToRGB24_2 = { + 21u, 22u, 24u, 25u, 26u, 28u, 29u, 30u, 32u, 33u, 34u, + 36u, 37u, 38u, 40u, 41u, 42u, 44u, 45u, 46u, 48u, 49u, + 50u, 52u, 53u, 54u, 56u, 57u, 58u, 60u, 61u, 62u}; + +void ARGBToRGB24Row_AVX512VBMI(const uint8_t* src, uint8_t* dst, int width) { + asm volatile( + "vmovdqa %3,%%ymm5 \n" + "vmovdqa %4,%%ymm6 \n" + "vmovdqa %5,%%ymm7 \n" + + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "vmovdqu 0x20(%0),%%ymm1 \n" + "vmovdqu 0x40(%0),%%ymm2 \n" + "vmovdqu 0x60(%0),%%ymm3 \n" + "lea 0x80(%0),%0 \n" + "vpermt2b %%ymm1,%%ymm5,%%ymm0 \n" + "vpermt2b %%ymm2,%%ymm6,%%ymm1 \n" + "vpermt2b %%ymm3,%%ymm7,%%ymm2 \n" + "vmovdqu %%ymm0,(%1) \n" + "vmovdqu %%ymm1,0x20(%1) \n" + "vmovdqu %%ymm2,0x40(%1) \n" + "lea 0x60(%1),%1 \n" + "sub $0x20,%2 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : "m"(kPermARGBToRGB24_0), // %3 + "m"(kPermARGBToRGB24_1), // %4 + "m"(kPermARGBToRGB24_2) // %5 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5", "xmm6", "xmm7"); +} + void ARGBToRAWRow_AVX2(const uint8_t* src, uint8_t* dst, int width) { asm volatile( "vbroadcastf128 %3,%%ymm6 \n" @@ -587,11 +635,11 @@ void ARGBToRAWRow_AVX2(const uint8_t* src, uint8_t* dst, int width) { "sub $0x20,%2 \n" "jg 1b \n" "vzeroupper \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(width) // %2 - : "m"(kShuffleMaskARGBToRAW), // %3 - "m"(kPermdRGB24_AVX) // %4 + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : "m"(kShuffleMaskARGBToRAW), // %3 + "m"(kPermdRGB24_AVX) // %4 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"); }