From cb35d5f90e2a74c502f343abc6beae6440d14f10 Mon Sep 17 00:00:00 2001 From: Frank Barchard Date: Thu, 13 Oct 2022 16:20:08 -0700 Subject: [PATCH] BGRAToI420 use SSSE3 for Y but C for UV when LIBYUV_BIT_EXACT enabled - Previously was C for both Y and UV. Was BGRAToI420_Opt (17780 ms) Now BGRAToI420_Opt (9546 ms) Bug: b/253491233 Change-Id: Id103d8d5ba0fed0f7a427dd5955e1830275eff6b Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/3953131 Reviewed-by: Wan-Teh Chang --- README.chromium | 2 +- include/libyuv/version.h | 2 +- source/convert.cc | 58 ++++++++++++++++++++++++++++------------ 3 files changed, 43 insertions(+), 19 deletions(-) diff --git a/README.chromium b/README.chromium index d784f6618..cf50831ea 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1845 +Version: 1846 License: BSD License File: LICENSE diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 43d0c1d88..6d52b22a5 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1845 +#define LIBYUV_VERSION 1846 #endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/source/convert.cc b/source/convert.cc index 8b745e7d9..ad0edd1f2 100644 --- a/source/convert.cc +++ b/source/convert.cc @@ -1647,16 +1647,6 @@ int BGRAToI420(const uint8_t* src_bgra, src_bgra = src_bgra + (height - 1) * src_stride_bgra; src_stride_bgra = -src_stride_bgra; } -#if defined(HAS_BGRATOYROW_SSSE3) && defined(HAS_BGRATOUVROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - BGRAToUVRow = BGRAToUVRow_Any_SSSE3; - BGRAToYRow = BGRAToYRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - BGRAToUVRow = BGRAToUVRow_SSSE3; - BGRAToYRow = BGRAToYRow_SSSE3; - } - } -#endif #if defined(HAS_BGRATOYROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { BGRAToYRow = BGRAToYRow_Any_NEON; @@ -1673,23 +1663,57 @@ int BGRAToI420(const uint8_t* src_bgra, } } #endif +#if defined(HAS_BGRATOYROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + BGRAToYRow = BGRAToYRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + BGRAToYRow = BGRAToYRow_SSSE3; + } + } +#endif +#if defined(HAS_BGRATOUVROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + BGRAToUVRow = BGRAToUVRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + BGRAToUVRow = BGRAToUVRow_SSSE3; + } + } +#endif +#if defined(HAS_BGRATOYROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + BGRAToYRow = BGRAToYRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + BGRAToYRow = BGRAToYRow_AVX2; + } + } +#endif +#if defined(HAS_BGRATOUVROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + BGRAToUVRow = BGRAToUVRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + BGRAToUVRow = BGRAToUVRow_AVX2; + } + } +#endif #if defined(HAS_BGRATOYROW_MSA) && defined(HAS_BGRATOUVROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { BGRAToYRow = BGRAToYRow_Any_MSA; BGRAToUVRow = BGRAToUVRow_Any_MSA; if (IS_ALIGNED(width, 16)) { BGRAToYRow = BGRAToYRow_MSA; + } + if (IS_ALIGNED(width, 32)) { BGRAToUVRow = BGRAToUVRow_MSA; } } #endif -#if defined(HAS_BGRATOYROW_LSX) && defined(HAS_BGRATOUVROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - BGRAToYRow = BGRAToYRow_Any_LSX; - BGRAToUVRow = BGRAToUVRow_Any_LSX; - if (IS_ALIGNED(width, 16)) { - BGRAToYRow = BGRAToYRow_LSX; - BGRAToUVRow = BGRAToUVRow_LSX; +#if defined(HAS_BGRATOYROW_LASX) && defined(HAS_BGRATOUVROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + BGRAToYRow = BGRAToYRow_Any_LASX; + BGRAToUVRow = BGRAToUVRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + BGRAToYRow = BGRAToYRow_LASX; + BGRAToUVRow = BGRAToUVRow_LASX; } } #endif