mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2026-06-15 00:16:08 +08:00
I420ToRAW use 2 step AVX512
On Icelake Was AVX2 I420ToRAW_Opt (283 ms) 67.55% I422ToARGBRow_AVX2 26.46% ARGBToRGB24Row_AVX2 Now AVX512VBMI I420ToRAW_Opt (238 ms) 73.08% I422ToARGBRow_AVX512BW 21.59% ARGBToRGB24Row_AVX512VBMI Bug: 42280902 Change-Id: I9d4d21faed30c529a5e593819f103be115709f37 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/7909924 Reviewed-by: richard winterton <rrwinterton@gmail.com> Commit-Queue: Frank Barchard <fbarchard@google.com>
This commit is contained in:
parent
4be798d7c5
commit
3bdb3b94ca
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: https://chromium.googlesource.com/libyuv/libyuv/
|
||||
Version: 1947
|
||||
Version: 1948
|
||||
Revision: DEPS
|
||||
License: BSD-3-Clause
|
||||
License File: LICENSE
|
||||
|
||||
@ -398,6 +398,8 @@ extern "C" {
|
||||
#define HAS_ARGBTOUV444MATRIXROW_AVX512BW
|
||||
#define HAS_ARGBTOYROW_AVX512BW
|
||||
#define HAS_ARGBTOYMATRIXROW_AVX512BW
|
||||
#define HAS_I422TORGB24ROW_AVX512VBMI
|
||||
#define HAS_I422TORGB24ROW_AVX512BW
|
||||
#define HAS_ARGBTOUVJ444ROW_AVX512BW
|
||||
#define HAS_ARGBTOUVROW_AVX512BW
|
||||
#define HAS_ARGBTOUVJROW_AVX512BW
|
||||
@ -5148,6 +5150,18 @@ void I422ToRGB24Row_AVX2(const uint8_t* src_y,
|
||||
uint8_t* dst_rgb24,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422ToRGB24Row_AVX512VBMI(const uint8_t* src_y,
|
||||
const uint8_t* src_u,
|
||||
const uint8_t* src_v,
|
||||
uint8_t* dst_rgb24,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422ToRGB24Row_AVX512BW(const uint8_t* src_y,
|
||||
const uint8_t* src_u,
|
||||
const uint8_t* src_v,
|
||||
uint8_t* dst_rgb24,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422ToARGBRow_Any_AVX2(const uint8_t* y_buf,
|
||||
const uint8_t* u_buf,
|
||||
const uint8_t* v_buf,
|
||||
@ -5466,6 +5480,18 @@ void I422ToRGB24Row_Any_AVX2(const uint8_t* y_buf,
|
||||
uint8_t* dst_ptr,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422ToRGB24Row_Any_AVX512VBMI(const uint8_t* y_buf,
|
||||
const uint8_t* u_buf,
|
||||
const uint8_t* v_buf,
|
||||
uint8_t* dst_ptr,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422ToRGB24Row_Any_AVX512BW(const uint8_t* y_buf,
|
||||
const uint8_t* u_buf,
|
||||
const uint8_t* v_buf,
|
||||
uint8_t* dst_ptr,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
|
||||
void I400ToARGBRow_C(const uint8_t* src_y,
|
||||
uint8_t* rgb_buf,
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 1947
|
||||
#define LIBYUV_VERSION 1948
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
@ -5556,6 +5556,22 @@ int I420ToRGB24Matrix(const uint8_t* src_y,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TORGB24ROW_AVX512BW)
|
||||
if (TestCpuFlag(kCpuHasAVX512BW)) {
|
||||
I422ToRGB24Row = I422ToRGB24Row_Any_AVX512BW;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
I422ToRGB24Row = I422ToRGB24Row_AVX512BW;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TORGB24ROW_AVX512VBMI)
|
||||
if (TestCpuFlag(kCpuHasAVX512VBMI)) {
|
||||
I422ToRGB24Row = I422ToRGB24Row_Any_AVX512VBMI;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
I422ToRGB24Row = I422ToRGB24Row_AVX512VBMI;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TORGB24ROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
I422ToRGB24Row = I422ToRGB24Row_Any_NEON;
|
||||
@ -5761,6 +5777,22 @@ int I422ToRGB24Matrix(const uint8_t* src_y,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TORGB24ROW_AVX512BW)
|
||||
if (TestCpuFlag(kCpuHasAVX512BW)) {
|
||||
I422ToRGB24Row = I422ToRGB24Row_Any_AVX512BW;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
I422ToRGB24Row = I422ToRGB24Row_AVX512BW;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TORGB24ROW_AVX512VBMI)
|
||||
if (TestCpuFlag(kCpuHasAVX512VBMI)) {
|
||||
I422ToRGB24Row = I422ToRGB24Row_Any_AVX512VBMI;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
I422ToRGB24Row = I422ToRGB24Row_AVX512VBMI;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TORGB24ROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
I422ToRGB24Row = I422ToRGB24Row_Any_NEON;
|
||||
|
||||
@ -387,6 +387,12 @@ ANY31C(I444ToRGB24Row_Any_SSSE3, I444ToRGB24Row_SSSE3, 0, 0, 3, 15)
|
||||
#ifdef HAS_I422TORGB24ROW_AVX2
|
||||
ANY31C(I422ToRGB24Row_Any_AVX2, I422ToRGB24Row_AVX2, 1, 0, 3, 31)
|
||||
#endif
|
||||
#ifdef HAS_I422TORGB24ROW_AVX512VBMI
|
||||
ANY31C(I422ToRGB24Row_Any_AVX512VBMI, I422ToRGB24Row_AVX512VBMI, 1, 0, 3, 31)
|
||||
#endif
|
||||
#ifdef HAS_I422TORGB24ROW_AVX512BW
|
||||
ANY31C(I422ToRGB24Row_Any_AVX512BW, I422ToRGB24Row_AVX512BW, 1, 0, 3, 31)
|
||||
#endif
|
||||
#ifdef HAS_I422TOARGBROW_AVX2
|
||||
ANY31C(I422ToARGBRow_Any_AVX2, I422ToARGBRow_AVX2, 1, 0, 4, 15)
|
||||
#endif
|
||||
|
||||
@ -4298,6 +4298,50 @@ void I422ToRGB24Row_AVX2(const uint8_t* src_y,
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(HAS_I422TOARGBROW_AVX512BW) && defined(HAS_ARGBTORGB24ROW_AVX512VBMI)
|
||||
void I422ToRGB24Row_AVX512VBMI(const uint8_t* src_y,
|
||||
const uint8_t* src_u,
|
||||
const uint8_t* src_v,
|
||||
uint8_t* dst_rgb24,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
// Row buffer for intermediate ARGB pixels.
|
||||
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
|
||||
while (width > 0) {
|
||||
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
|
||||
I422ToARGBRow_AVX512BW(src_y, src_u, src_v, row, yuvconstants, twidth);
|
||||
ARGBToRGB24Row_AVX512VBMI(row, dst_rgb24, twidth);
|
||||
src_y += twidth;
|
||||
src_u += twidth / 2;
|
||||
src_v += twidth / 2;
|
||||
dst_rgb24 += twidth * 3;
|
||||
width -= twidth;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(HAS_I422TOARGBROW_AVX512BW) && defined(HAS_ARGBTORGB24ROW_AVX2)
|
||||
void I422ToRGB24Row_AVX512BW(const uint8_t* src_y,
|
||||
const uint8_t* src_u,
|
||||
const uint8_t* src_v,
|
||||
uint8_t* dst_rgb24,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
// Row buffer for intermediate ARGB pixels.
|
||||
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
|
||||
while (width > 0) {
|
||||
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
|
||||
I422ToARGBRow_AVX512BW(src_y, src_u, src_v, row, yuvconstants, twidth);
|
||||
ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth);
|
||||
src_y += twidth;
|
||||
src_u += twidth / 2;
|
||||
src_v += twidth / 2;
|
||||
dst_rgb24 += twidth * 3;
|
||||
width -= twidth;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(HAS_I444TOARGBROW_AVX2) && defined(HAS_ARGBTORGB24ROW_AVX2)
|
||||
void I444ToRGB24Row_AVX2(const uint8_t* src_y,
|
||||
const uint8_t* src_u,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user