mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2026-06-15 00:16:08 +08:00
I420ToRAW use 2 step AVX512
On Icelake Was AVX2 I420ToRAW_Opt (283 ms) 67.55% I422ToARGBRow_AVX2 26.46% ARGBToRGB24Row_AVX2 Now AVX512VBMI I420ToRAW_Opt (238 ms) 73.08% I422ToARGBRow_AVX512BW 21.59% ARGBToRGB24Row_AVX512VBMI Bug: 42280902 Change-Id: I9d4d21faed30c529a5e593819f103be115709f37 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/7909924 Reviewed-by: richard winterton <rrwinterton@gmail.com> Commit-Queue: Frank Barchard <fbarchard@google.com>
This commit is contained in:
parent
4be798d7c5
commit
3bdb3b94ca
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: https://chromium.googlesource.com/libyuv/libyuv/
|
URL: https://chromium.googlesource.com/libyuv/libyuv/
|
||||||
Version: 1947
|
Version: 1948
|
||||||
Revision: DEPS
|
Revision: DEPS
|
||||||
License: BSD-3-Clause
|
License: BSD-3-Clause
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
|
|||||||
@ -398,6 +398,8 @@ extern "C" {
|
|||||||
#define HAS_ARGBTOUV444MATRIXROW_AVX512BW
|
#define HAS_ARGBTOUV444MATRIXROW_AVX512BW
|
||||||
#define HAS_ARGBTOYROW_AVX512BW
|
#define HAS_ARGBTOYROW_AVX512BW
|
||||||
#define HAS_ARGBTOYMATRIXROW_AVX512BW
|
#define HAS_ARGBTOYMATRIXROW_AVX512BW
|
||||||
|
#define HAS_I422TORGB24ROW_AVX512VBMI
|
||||||
|
#define HAS_I422TORGB24ROW_AVX512BW
|
||||||
#define HAS_ARGBTOUVJ444ROW_AVX512BW
|
#define HAS_ARGBTOUVJ444ROW_AVX512BW
|
||||||
#define HAS_ARGBTOUVROW_AVX512BW
|
#define HAS_ARGBTOUVROW_AVX512BW
|
||||||
#define HAS_ARGBTOUVJROW_AVX512BW
|
#define HAS_ARGBTOUVJROW_AVX512BW
|
||||||
@ -5148,6 +5150,18 @@ void I422ToRGB24Row_AVX2(const uint8_t* src_y,
|
|||||||
uint8_t* dst_rgb24,
|
uint8_t* dst_rgb24,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width);
|
int width);
|
||||||
|
void I422ToRGB24Row_AVX512VBMI(const uint8_t* src_y,
|
||||||
|
const uint8_t* src_u,
|
||||||
|
const uint8_t* src_v,
|
||||||
|
uint8_t* dst_rgb24,
|
||||||
|
const struct YuvConstants* yuvconstants,
|
||||||
|
int width);
|
||||||
|
void I422ToRGB24Row_AVX512BW(const uint8_t* src_y,
|
||||||
|
const uint8_t* src_u,
|
||||||
|
const uint8_t* src_v,
|
||||||
|
uint8_t* dst_rgb24,
|
||||||
|
const struct YuvConstants* yuvconstants,
|
||||||
|
int width);
|
||||||
void I422ToARGBRow_Any_AVX2(const uint8_t* y_buf,
|
void I422ToARGBRow_Any_AVX2(const uint8_t* y_buf,
|
||||||
const uint8_t* u_buf,
|
const uint8_t* u_buf,
|
||||||
const uint8_t* v_buf,
|
const uint8_t* v_buf,
|
||||||
@ -5466,6 +5480,18 @@ void I422ToRGB24Row_Any_AVX2(const uint8_t* y_buf,
|
|||||||
uint8_t* dst_ptr,
|
uint8_t* dst_ptr,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width);
|
int width);
|
||||||
|
void I422ToRGB24Row_Any_AVX512VBMI(const uint8_t* y_buf,
|
||||||
|
const uint8_t* u_buf,
|
||||||
|
const uint8_t* v_buf,
|
||||||
|
uint8_t* dst_ptr,
|
||||||
|
const struct YuvConstants* yuvconstants,
|
||||||
|
int width);
|
||||||
|
void I422ToRGB24Row_Any_AVX512BW(const uint8_t* y_buf,
|
||||||
|
const uint8_t* u_buf,
|
||||||
|
const uint8_t* v_buf,
|
||||||
|
uint8_t* dst_ptr,
|
||||||
|
const struct YuvConstants* yuvconstants,
|
||||||
|
int width);
|
||||||
|
|
||||||
void I400ToARGBRow_C(const uint8_t* src_y,
|
void I400ToARGBRow_C(const uint8_t* src_y,
|
||||||
uint8_t* rgb_buf,
|
uint8_t* rgb_buf,
|
||||||
|
|||||||
@ -11,6 +11,6 @@
|
|||||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||||
#define INCLUDE_LIBYUV_VERSION_H_
|
#define INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
#define LIBYUV_VERSION 1947
|
#define LIBYUV_VERSION 1948
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|||||||
@ -5556,6 +5556,22 @@ int I420ToRGB24Matrix(const uint8_t* src_y,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#if defined(HAS_I422TORGB24ROW_AVX512BW)
|
||||||
|
if (TestCpuFlag(kCpuHasAVX512BW)) {
|
||||||
|
I422ToRGB24Row = I422ToRGB24Row_Any_AVX512BW;
|
||||||
|
if (IS_ALIGNED(width, 32)) {
|
||||||
|
I422ToRGB24Row = I422ToRGB24Row_AVX512BW;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#if defined(HAS_I422TORGB24ROW_AVX512VBMI)
|
||||||
|
if (TestCpuFlag(kCpuHasAVX512VBMI)) {
|
||||||
|
I422ToRGB24Row = I422ToRGB24Row_Any_AVX512VBMI;
|
||||||
|
if (IS_ALIGNED(width, 32)) {
|
||||||
|
I422ToRGB24Row = I422ToRGB24Row_AVX512VBMI;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#if defined(HAS_I422TORGB24ROW_NEON)
|
#if defined(HAS_I422TORGB24ROW_NEON)
|
||||||
if (TestCpuFlag(kCpuHasNEON)) {
|
if (TestCpuFlag(kCpuHasNEON)) {
|
||||||
I422ToRGB24Row = I422ToRGB24Row_Any_NEON;
|
I422ToRGB24Row = I422ToRGB24Row_Any_NEON;
|
||||||
@ -5761,6 +5777,22 @@ int I422ToRGB24Matrix(const uint8_t* src_y,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#if defined(HAS_I422TORGB24ROW_AVX512BW)
|
||||||
|
if (TestCpuFlag(kCpuHasAVX512BW)) {
|
||||||
|
I422ToRGB24Row = I422ToRGB24Row_Any_AVX512BW;
|
||||||
|
if (IS_ALIGNED(width, 32)) {
|
||||||
|
I422ToRGB24Row = I422ToRGB24Row_AVX512BW;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#if defined(HAS_I422TORGB24ROW_AVX512VBMI)
|
||||||
|
if (TestCpuFlag(kCpuHasAVX512VBMI)) {
|
||||||
|
I422ToRGB24Row = I422ToRGB24Row_Any_AVX512VBMI;
|
||||||
|
if (IS_ALIGNED(width, 32)) {
|
||||||
|
I422ToRGB24Row = I422ToRGB24Row_AVX512VBMI;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#if defined(HAS_I422TORGB24ROW_NEON)
|
#if defined(HAS_I422TORGB24ROW_NEON)
|
||||||
if (TestCpuFlag(kCpuHasNEON)) {
|
if (TestCpuFlag(kCpuHasNEON)) {
|
||||||
I422ToRGB24Row = I422ToRGB24Row_Any_NEON;
|
I422ToRGB24Row = I422ToRGB24Row_Any_NEON;
|
||||||
|
|||||||
@ -387,6 +387,12 @@ ANY31C(I444ToRGB24Row_Any_SSSE3, I444ToRGB24Row_SSSE3, 0, 0, 3, 15)
|
|||||||
#ifdef HAS_I422TORGB24ROW_AVX2
|
#ifdef HAS_I422TORGB24ROW_AVX2
|
||||||
ANY31C(I422ToRGB24Row_Any_AVX2, I422ToRGB24Row_AVX2, 1, 0, 3, 31)
|
ANY31C(I422ToRGB24Row_Any_AVX2, I422ToRGB24Row_AVX2, 1, 0, 3, 31)
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef HAS_I422TORGB24ROW_AVX512VBMI
|
||||||
|
ANY31C(I422ToRGB24Row_Any_AVX512VBMI, I422ToRGB24Row_AVX512VBMI, 1, 0, 3, 31)
|
||||||
|
#endif
|
||||||
|
#ifdef HAS_I422TORGB24ROW_AVX512BW
|
||||||
|
ANY31C(I422ToRGB24Row_Any_AVX512BW, I422ToRGB24Row_AVX512BW, 1, 0, 3, 31)
|
||||||
|
#endif
|
||||||
#ifdef HAS_I422TOARGBROW_AVX2
|
#ifdef HAS_I422TOARGBROW_AVX2
|
||||||
ANY31C(I422ToARGBRow_Any_AVX2, I422ToARGBRow_AVX2, 1, 0, 4, 15)
|
ANY31C(I422ToARGBRow_Any_AVX2, I422ToARGBRow_AVX2, 1, 0, 4, 15)
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@ -4298,6 +4298,50 @@ void I422ToRGB24Row_AVX2(const uint8_t* src_y,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(HAS_I422TOARGBROW_AVX512BW) && defined(HAS_ARGBTORGB24ROW_AVX512VBMI)
|
||||||
|
void I422ToRGB24Row_AVX512VBMI(const uint8_t* src_y,
|
||||||
|
const uint8_t* src_u,
|
||||||
|
const uint8_t* src_v,
|
||||||
|
uint8_t* dst_rgb24,
|
||||||
|
const struct YuvConstants* yuvconstants,
|
||||||
|
int width) {
|
||||||
|
// Row buffer for intermediate ARGB pixels.
|
||||||
|
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
|
||||||
|
while (width > 0) {
|
||||||
|
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
|
||||||
|
I422ToARGBRow_AVX512BW(src_y, src_u, src_v, row, yuvconstants, twidth);
|
||||||
|
ARGBToRGB24Row_AVX512VBMI(row, dst_rgb24, twidth);
|
||||||
|
src_y += twidth;
|
||||||
|
src_u += twidth / 2;
|
||||||
|
src_v += twidth / 2;
|
||||||
|
dst_rgb24 += twidth * 3;
|
||||||
|
width -= twidth;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(HAS_I422TOARGBROW_AVX512BW) && defined(HAS_ARGBTORGB24ROW_AVX2)
|
||||||
|
void I422ToRGB24Row_AVX512BW(const uint8_t* src_y,
|
||||||
|
const uint8_t* src_u,
|
||||||
|
const uint8_t* src_v,
|
||||||
|
uint8_t* dst_rgb24,
|
||||||
|
const struct YuvConstants* yuvconstants,
|
||||||
|
int width) {
|
||||||
|
// Row buffer for intermediate ARGB pixels.
|
||||||
|
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
|
||||||
|
while (width > 0) {
|
||||||
|
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
|
||||||
|
I422ToARGBRow_AVX512BW(src_y, src_u, src_v, row, yuvconstants, twidth);
|
||||||
|
ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth);
|
||||||
|
src_y += twidth;
|
||||||
|
src_u += twidth / 2;
|
||||||
|
src_v += twidth / 2;
|
||||||
|
dst_rgb24 += twidth * 3;
|
||||||
|
width -= twidth;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#if defined(HAS_I444TOARGBROW_AVX2) && defined(HAS_ARGBTORGB24ROW_AVX2)
|
#if defined(HAS_I444TOARGBROW_AVX2) && defined(HAS_ARGBTORGB24ROW_AVX2)
|
||||||
void I444ToRGB24Row_AVX2(const uint8_t* src_y,
|
void I444ToRGB24Row_AVX2(const uint8_t* src_y,
|
||||||
const uint8_t* src_u,
|
const uint8_t* src_u,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user