Add RAWToI444

Skylake Xeon
  RAWToI444_Opt (433 ms)
  RAWToJ444_Opt (1781 ms)
  ARGBToI444_Opt (352 ms)
  ARGBToJ444_Opt (1577 ms)

Samsung S22 Exynos
  ARGBToI444_Opt (283 ms)
  ARGBToJ444_Opt (209 ms)
  RAWToI444_Opt (294 ms)
  RAWToJ444_Opt (293 ms)

Profiling on Samsung S22 Exynos
37.62%,  ARGBToUV444Row_NEON_I8MM
29.42%,  RAWToARGBRow_SVE2
19.61%,  ARGBToYRow_NEON_DotProd

Passing different --libyuv_cpu_info=N etc we can compare each ISA
C           1  RAWToI444_Opt (781 ms)
NEON      511  RAWToI444_Opt (757 ms)
NEONDOT  1023  RAWToI444_Opt (571 ms)
NEONI8MM 2047  RAWToI444_Opt (334 ms)
SVE2     8191  RAWToI444_Opt (307 ms)



Bug: 390247964
Change-Id: I0316fedd32222588455afa751f5b854f46bce024
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/6223658
Reviewed-by: Wan-Teh Chang <wtc@google.com>
This commit is contained in:
Frank Barchard 2025-02-03 14:44:55 -08:00
parent b3fd3f3f3b
commit 5a9a6ea936
6 changed files with 229 additions and 4 deletions

View File

@ -1,6 +1,6 @@
Name: libyuv Name: libyuv
URL: https://chromium.googlesource.com/libyuv/libyuv/ URL: https://chromium.googlesource.com/libyuv/libyuv/
Version: 1902 Version: 1903
License: BSD License: BSD
License File: LICENSE License File: LICENSE
Shipped: yes Shipped: yes

View File

@ -924,6 +924,19 @@ int RAWToI420(const uint8_t* src_raw,
int width, int width,
int height); int height);
// RGB big endian (rgb in memory) to I444.
LIBYUV_API
int RAWToI444(const uint8_t* src_raw,
int src_stride_raw,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// RGB big endian (rgb in memory) to J420. // RGB big endian (rgb in memory) to J420.
LIBYUV_API LIBYUV_API
int RAWToJ420(const uint8_t* src_raw, int RAWToJ420(const uint8_t* src_raw,

View File

@ -398,7 +398,6 @@ extern "C" {
#define HAS_ARGBTOUVJ444ROW_NEON #define HAS_ARGBTOUVJ444ROW_NEON
#define HAS_ARGBTOUVJROW_NEON #define HAS_ARGBTOUVJROW_NEON
#define HAS_ARGBTOUVROW_NEON #define HAS_ARGBTOUVROW_NEON
// TODO: Fix ARGBTOYROW and test ARGBToI444 tests pass.
#define HAS_ARGBTOYJROW_NEON #define HAS_ARGBTOYJROW_NEON
#define HAS_ARGBTOYROW_NEON #define HAS_ARGBTOYROW_NEON
#define HAS_AYUVTOUVROW_NEON #define HAS_AYUVTOUVROW_NEON

View File

@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ #ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1902 #define LIBYUV_VERSION 1903
#endif // INCLUDE_LIBYUV_VERSION_H_ #endif // INCLUDE_LIBYUV_VERSION_H_

View File

@ -3521,6 +3521,218 @@ int RAWToJ420(const uint8_t* src_raw,
} }
#undef HAS_RAWTOYJROW #undef HAS_RAWTOYJROW
// RAW big endian (rgb in memory) to I444
// 2 step conversion of RAWToARGB then ARGBToY and ARGBToUV444
LIBYUV_API
int RAWToI444(const uint8_t* src_raw,
int src_stride_raw,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height) {
int y;
void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) =
RAWToARGBRow_C;
void (*ARGBToYRow)(const uint8_t* src_raw, uint8_t* dst_y, int width) =
ARGBToYRow_C;
void (*ARGBToUV444Row)(const uint8_t* src_raw, uint8_t* dst_u, uint8_t* dst_v,
int width) = ARGBToUV444Row_C;
if (!src_raw || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
return -1;
}
if (height < 0) {
height = -height;
src_raw = src_raw + (height - 1) * src_stride_raw;
src_stride_raw = -src_stride_raw;
}
// TODO: add row coalesce when main loop handles large width in blocks
// TODO: implement UV444 or trim the ifdef below
#if defined(HAS_ARGBTOUV444ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUV444Row = ARGBToUV444Row_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUV444Row = ARGBToUV444Row_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOUV444ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToUV444Row = ARGBToUV444Row_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToUV444Row = ARGBToUV444Row_NEON;
}
}
#endif
#if defined(HAS_ARGBTOUV444ROW_NEON_I8MM)
if (TestCpuFlag(kCpuHasNeonI8MM)) {
ARGBToUV444Row = ARGBToUV444Row_Any_NEON_I8MM;
if (IS_ALIGNED(width, 8)) {
ARGBToUV444Row = ARGBToUV444Row_NEON_I8MM;
}
}
#endif
#if defined(HAS_ARGBTOUV444ROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ARGBToUV444Row = ARGBToUV444Row_Any_MSA;
if (IS_ALIGNED(width, 16)) {
ARGBToUV444Row = ARGBToUV444Row_MSA;
}
}
#endif
#if defined(HAS_ARGBTOUV444ROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
ARGBToUV444Row = ARGBToUV444Row_Any_LSX;
if (IS_ALIGNED(width, 16)) {
ARGBToUV444Row = ARGBToUV444Row_LSX;
}
}
#endif
#if defined(HAS_ARGBTOUV444ROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToUV444Row = ARGBToUV444Row_Any_LASX;
if (IS_ALIGNED(width, 32)) {
ARGBToUV444Row = ARGBToUV444Row_LASX;
}
}
#endif
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOYROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToYRow = ARGBToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToYRow = ARGBToYRow_AVX2;
}
}
#endif
#if defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_NEON;
}
}
#endif
#if defined(HAS_ARGBTOYROW_NEON_DOTPROD)
if (TestCpuFlag(kCpuHasNeonDotProd)) {
ARGBToYRow = ARGBToYRow_Any_NEON_DotProd;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_NEON_DotProd;
}
}
#endif
#if defined(HAS_ARGBTOYROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ARGBToYRow = ARGBToYRow_Any_MSA;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_MSA;
}
}
#endif
#if defined(HAS_ARGBTOYROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
ARGBToYRow = ARGBToYRow_Any_LSX;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_LSX;
}
}
#endif
#if defined(HAS_ARGBTOYROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToYRow = ARGBToYRow_Any_LASX;
if (IS_ALIGNED(width, 32)) {
ARGBToYRow = ARGBToYRow_LASX;
}
}
#endif
#if defined(HAS_ARGBTOYROW_RVV)
if (TestCpuFlag(kCpuHasRVV)) {
ARGBToYRow = ARGBToYRow_RVV;
}
#endif
#if defined(HAS_RAWTOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
RAWToARGBRow = RAWToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
RAWToARGBRow = RAWToARGBRow_SSSE3;
}
}
#endif
#if defined(HAS_RAWTOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
RAWToARGBRow = RAWToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
RAWToARGBRow = RAWToARGBRow_NEON;
}
}
#endif
#if defined(HAS_RAWTOARGBROW_SVE2)
if (TestCpuFlag(kCpuHasSVE2)) {
RAWToARGBRow = RAWToARGBRow_SVE2;
}
#endif
#if defined(HAS_RAWTOARGBROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
RAWToARGBRow = RAWToARGBRow_Any_MSA;
if (IS_ALIGNED(width, 16)) {
RAWToARGBRow = RAWToARGBRow_MSA;
}
}
#endif
#if defined(HAS_RAWTOARGBROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
RAWToARGBRow = RAWToARGBRow_Any_LSX;
if (IS_ALIGNED(width, 16)) {
RAWToARGBRow = RAWToARGBRow_LSX;
}
}
#endif
#if defined(HAS_RAWTOARGBROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
RAWToARGBRow = RAWToARGBRow_Any_LASX;
if (IS_ALIGNED(width, 32)) {
RAWToARGBRow = RAWToARGBRow_LASX;
}
}
#endif
#if defined(HAS_RAWTOARGBROW_RVV)
if (TestCpuFlag(kCpuHasRVV)) {
RAWToARGBRow = RAWToARGBRow_RVV;
}
#endif
{
// Allocate a row of ARGB.
const int row_size = width * 4;
align_buffer_64(row, row_size);
if (!row)
return 1;
for (y = 0; y < height; ++y) {
RAWToARGBRow(src_raw, row, width);
ARGBToUV444Row(row, dst_u, dst_v, width);
ARGBToYRow(row, dst_y, width);
src_raw += src_stride_raw;
dst_y += dst_stride_y;
dst_u += dst_stride_u;
dst_v += dst_stride_v;
}
free_aligned_buffer_64(row);
}
return 0;
}
// RAW big endian (rgb in memory) to J444 // RAW big endian (rgb in memory) to J444
// 2 step conversion of RAWToARGB then ARGBToYJ and ARGBToUVJ444 // 2 step conversion of RAWToARGB then ARGBToYJ and ARGBToUVJ444
LIBYUV_API LIBYUV_API
@ -3714,7 +3926,7 @@ int RAWToJ444(const uint8_t* src_raw,
{ {
// Allocate a row of ARGB. // Allocate a row of ARGB.
const int row_size = (width * 4 + 31) & ~31; const int row_size = width * 4;
align_buffer_64(row, row_size); align_buffer_64(row, row_size);
if (!row) if (!row)
return 1; return 1;

View File

@ -678,6 +678,7 @@ TESTATOPLANAR(BGRA, 4, 1, I420, 2, 2)
TESTATOPLANAR(I400, 1, 1, I420, 2, 2) TESTATOPLANAR(I400, 1, 1, I420, 2, 2)
TESTATOPLANAR(J400, 1, 1, J420, 2, 2) TESTATOPLANAR(J400, 1, 1, J420, 2, 2)
TESTATOPLANAR(RAW, 3, 1, I420, 2, 2) TESTATOPLANAR(RAW, 3, 1, I420, 2, 2)
TESTATOPLANAR(RAW, 3, 1, I444, 1, 1)
TESTATOPLANAR(RAW, 3, 1, J420, 2, 2) TESTATOPLANAR(RAW, 3, 1, J420, 2, 2)
TESTATOPLANAR(RAW, 3, 1, J444, 1, 1) TESTATOPLANAR(RAW, 3, 1, J444, 1, 1)
TESTATOPLANAR(RGB24, 3, 1, I420, 2, 2) TESTATOPLANAR(RGB24, 3, 1, I420, 2, 2)