RAWToJ444 and ARGBToJ444

- ARGBToJ444 implements ARGBToUVJ444Row_C
- RAWToJ444 implemented as 2 steps - RAWToARGB and ARGBToJ444

libyuv_test '--gunit_filter=*R*To?444_Opt' --libyuv_width=1280 --libyuv_height=720 --libyuv_repeat=1000 --libyuv_flags=-1 --libyuv_cpu_info=-1
(with bit exact off)

Samsung S23
RAWToJ444_Opt (437 ms)
ARGBToJ444_Opt (337 ms)
ARGBToI444_Opt (196 ms)

Skylake Xeon
RAWToJ444_Opt (1699 ms)
ARGBToJ444_Opt (1559 ms)
ARGBToI444_Opt (346 ms)

Bug: 390247964
Change-Id: Id1b1b45a5e4512ab50830aadf62f780fbe631575
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/6207845
Reviewed-by: Wan-Teh Chang <wtc@google.com>
This commit is contained in:
Frank Barchard 2025-01-29 15:03:04 -08:00
parent c4a0c8d34a
commit c1bac9e6a5
9 changed files with 418 additions and 4 deletions

View File

@ -1,6 +1,6 @@
Name: libyuv
URL: https://chromium.googlesource.com/libyuv/libyuv/
Version: 1901
Version: 1902
License: BSD
License File: LICENSE
Shipped: yes

View File

@ -937,6 +937,19 @@ int RAWToJ420(const uint8_t* src_raw,
int width,
int height);
// RGB big endian (rgb in memory) to J444.
LIBYUV_API
int RAWToJ444(const uint8_t* src_raw,
int src_stride_raw,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// RGB16 (RGBP fourcc) little endian to I420.
LIBYUV_API
int RGB565ToI420(const uint8_t* src_rgb565,

View File

@ -229,6 +229,19 @@ int ARGBToJ422(const uint8_t* src_argb,
int width,
int height);
// Convert ARGB to J444.
LIBYUV_API
int ARGBToJ444(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_yj,
int dst_stride_yj,
uint8_t* dst_uj,
int dst_stride_uj,
uint8_t* dst_vj,
int dst_stride_vj,
int width,
int height);
// Convert ARGB to J400. (JPeg full range).
LIBYUV_API
int ARGBToJ400(const uint8_t* src_argb,

View File

@ -2686,6 +2686,11 @@ void ARGBToUV444Row_C(const uint8_t* src_argb,
uint8_t* dst_v,
int width);
void ARGBToUVJ444Row_C(const uint8_t* src_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void MirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width);
void MirrorRow_SSSE3(const uint8_t* src, uint8_t* dst, int width);
void MirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width);

View File

@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1901
#define LIBYUV_VERSION 1902
#endif // INCLUDE_LIBYUV_VERSION_H_

View File

@ -3521,6 +3521,218 @@ int RAWToJ420(const uint8_t* src_raw,
}
#undef HAS_RAWTOYJROW
// RAW big endian (rgb in memory) to J444
// 2 step conversion of RAWToARGB then ARGBToYJ and ARGBToUVJ444
LIBYUV_API
int RAWToJ444(const uint8_t* src_raw,
int src_stride_raw,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height) {
int y;
void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) =
RAWToARGBRow_C;
void (*ARGBToYJRow)(const uint8_t* src_raw, uint8_t* dst_y, int width) =
ARGBToYJRow_C;
void (*ARGBToUVJ444Row)(const uint8_t* src_raw, uint8_t* dst_u,
uint8_t* dst_v, int width) = ARGBToUVJ444Row_C;
if (!src_raw || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
return -1;
}
if (height < 0) {
height = -height;
src_raw = src_raw + (height - 1) * src_stride_raw;
src_stride_raw = -src_stride_raw;
}
// TODO: add row coalesce when main loop handles large width in blocks
// TODO: implement UVJ444 or trim the ifdef below
#if defined(HAS_ARGBTOUVJ444ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVJ444Row = ARGBToUVJ444Row_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVJ444Row = ARGBToUVJ444Row_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOUVJ444ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToUVJ444Row = ARGBToUVJ444Row_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToUVJ444Row = ARGBToUVJ444Row_NEON;
}
}
#endif
#if defined(HAS_ARGBTOUVJ444ROW_NEON_I8MM)
if (TestCpuFlag(kCpuHasNeonI8MM)) {
ARGBToUVJ444Row = ARGBToUVJ444Row_Any_NEON_I8MM;
if (IS_ALIGNED(width, 8)) {
ARGBToUVJ444Row = ARGBToUVJ444Row_NEON_I8MM;
}
}
#endif
#if defined(HAS_ARGBTOUVJ444ROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ARGBToUVJ444Row = ARGBToUVJ444Row_Any_MSA;
if (IS_ALIGNED(width, 16)) {
ARGBToUVJ444Row = ARGBToUVJ444Row_MSA;
}
}
#endif
#if defined(HAS_ARGBTOUVJ444ROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
ARGBToUVJ444Row = ARGBToUVJ444Row_Any_LSX;
if (IS_ALIGNED(width, 16)) {
ARGBToUVJ444Row = ARGBToUVJ444Row_LSX;
}
}
#endif
#if defined(HAS_ARGBTOUVJ444ROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToUVJ444Row = ARGBToUVJ444Row_Any_LASX;
if (IS_ALIGNED(width, 32)) {
ARGBToUVJ444Row = ARGBToUVJ444Row_LASX;
}
}
#endif
#if defined(HAS_ARGBTOYJROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToYJRow = ARGBToYJRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOYJROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToYJRow = ARGBToYJRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToYJRow = ARGBToYJRow_AVX2;
}
}
#endif
#if defined(HAS_ARGBTOYJROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYJRow = ARGBToYJRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToYJRow = ARGBToYJRow_NEON;
}
}
#endif
#if defined(HAS_ARGBTOYJROW_NEON_DOTPROD)
if (TestCpuFlag(kCpuHasNeonDotProd)) {
ARGBToYJRow = ARGBToYJRow_Any_NEON_DotProd;
if (IS_ALIGNED(width, 16)) {
ARGBToYJRow = ARGBToYJRow_NEON_DotProd;
}
}
#endif
#if defined(HAS_ARGBTOYJROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ARGBToYJRow = ARGBToYJRow_Any_MSA;
if (IS_ALIGNED(width, 16)) {
ARGBToYJRow = ARGBToYJRow_MSA;
}
}
#endif
#if defined(HAS_ARGBTOYJROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
ARGBToYJRow = ARGBToYJRow_Any_LSX;
if (IS_ALIGNED(width, 16)) {
ARGBToYJRow = ARGBToYJRow_LSX;
}
}
#endif
#if defined(HAS_ARGBTOYJROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToYJRow = ARGBToYJRow_Any_LASX;
if (IS_ALIGNED(width, 32)) {
ARGBToYJRow = ARGBToYJRow_LASX;
}
}
#endif
#if defined(HAS_ARGBTOYJROW_RVV)
if (TestCpuFlag(kCpuHasRVV)) {
ARGBToYJRow = ARGBToYJRow_RVV;
}
#endif
#if defined(HAS_RAWTOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
RAWToARGBRow = RAWToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
RAWToARGBRow = RAWToARGBRow_SSSE3;
}
}
#endif
#if defined(HAS_RAWTOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
RAWToARGBRow = RAWToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
RAWToARGBRow = RAWToARGBRow_NEON;
}
}
#endif
#if defined(HAS_RAWTOARGBROW_SVE2)
if (TestCpuFlag(kCpuHasSVE2)) {
RAWToARGBRow = RAWToARGBRow_SVE2;
}
#endif
#if defined(HAS_RAWTOARGBROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
RAWToARGBRow = RAWToARGBRow_Any_MSA;
if (IS_ALIGNED(width, 16)) {
RAWToARGBRow = RAWToARGBRow_MSA;
}
}
#endif
#if defined(HAS_RAWTOARGBROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
RAWToARGBRow = RAWToARGBRow_Any_LSX;
if (IS_ALIGNED(width, 16)) {
RAWToARGBRow = RAWToARGBRow_LSX;
}
}
#endif
#if defined(HAS_RAWTOARGBROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
RAWToARGBRow = RAWToARGBRow_Any_LASX;
if (IS_ALIGNED(width, 32)) {
RAWToARGBRow = RAWToARGBRow_LASX;
}
}
#endif
#if defined(HAS_RAWTOARGBROW_RVV)
if (TestCpuFlag(kCpuHasRVV)) {
RAWToARGBRow = RAWToARGBRow_RVV;
}
#endif
{
// Allocate a row of ARGB.
const int row_size = (width * 4 + 31) & ~31;
align_buffer_64(row, row_size);
if (!row)
return 1;
for (y = 0; y < height; ++y) {
RAWToARGBRow(src_raw, row, width);
ARGBToUVJ444Row(row, dst_u, dst_v, width);
ARGBToYJRow(row, dst_y, width);
src_raw += src_stride_raw;
dst_y += dst_stride_y;
dst_u += dst_stride_u;
dst_v += dst_stride_v;
}
free_aligned_buffer_64(row);
}
return 0;
}
// Convert RGB565 to I420.
LIBYUV_API
int RGB565ToI420(const uint8_t* src_rgb565,

View File

@ -2405,6 +2405,159 @@ int ARGBToAR30(const uint8_t* src_argb,
return 0;
}
// ARGB little endian (bgra in memory) to J444
LIBYUV_API
int ARGBToJ444(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height) {
int y;
void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
ARGBToYJRow_C;
void (*ARGBToUVJ444Row)(const uint8_t* src_argb, uint8_t* dst_u,
uint8_t* dst_v, int width) = ARGBToUVJ444Row_C;
if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
return -1;
}
if (height < 0) {
height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
// Coalesce rows.
if (src_stride_argb == width * 4 && dst_stride_y == width &&
dst_stride_u == width && dst_stride_v == width) {
width *= height;
height = 1;
src_stride_argb = dst_stride_y = dst_stride_u = dst_stride_v = 0;
}
#if defined(HAS_ARGBTOUVJ444ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVJ444Row = ARGBToUVJ444Row_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVJ444Row = ARGBToUVJ444Row_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOUVJ444ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToUVJ444Row = ARGBToUVJ444Row_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToUVJ444Row = ARGBToUVJ444Row_NEON;
}
}
#endif
#if defined(HAS_ARGBTOUVJ444ROW_NEON_I8MM)
if (TestCpuFlag(kCpuHasNeonI8MM)) {
ARGBToUVJ444Row = ARGBToUVJ444Row_Any_NEON_I8MM;
if (IS_ALIGNED(width, 8)) {
ARGBToUVJ444Row = ARGBToUVJ444Row_NEON_I8MM;
}
}
#endif
#if defined(HAS_ARGBTOUVJ444ROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ARGBToUVJ444Row = ARGBToUVJ444Row_Any_MSA;
if (IS_ALIGNED(width, 16)) {
ARGBToUVJ444Row = ARGBToUVJ444Row_MSA;
}
}
#endif
#if defined(HAS_ARGBTOUVJ444ROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
ARGBToUVJ444Row = ARGBToUVJ444Row_Any_LSX;
if (IS_ALIGNED(width, 16)) {
ARGBToUVJ444Row = ARGBToUVJ444Row_LSX;
}
}
#endif
#if defined(HAS_ARGBTOUVJ444ROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToUVJ444Row = ARGBToUVJ444Row_Any_LASX;
if (IS_ALIGNED(width, 32)) {
ARGBToUVJ444Row = ARGBToUVJ444Row_LASX;
}
}
#endif
#if defined(HAS_ARGBTOYJROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToYJRow = ARGBToYJRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOYJROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToYJRow = ARGBToYJRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToYJRow = ARGBToYJRow_AVX2;
}
}
#endif
#if defined(HAS_ARGBTOYJROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYJRow = ARGBToYJRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToYJRow = ARGBToYJRow_NEON;
}
}
#endif
#if defined(HAS_ARGBTOYJROW_NEON_DOTPROD)
if (TestCpuFlag(kCpuHasNeonDotProd)) {
ARGBToYJRow = ARGBToYJRow_Any_NEON_DotProd;
if (IS_ALIGNED(width, 16)) {
ARGBToYJRow = ARGBToYJRow_NEON_DotProd;
}
}
#endif
#if defined(HAS_ARGBTOYJROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ARGBToYJRow = ARGBToYJRow_Any_MSA;
if (IS_ALIGNED(width, 16)) {
ARGBToYJRow = ARGBToYJRow_MSA;
}
}
#endif
#if defined(HAS_ARGBTOYJROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
ARGBToYJRow = ARGBToYJRow_Any_LSX;
if (IS_ALIGNED(width, 16)) {
ARGBToYJRow = ARGBToYJRow_LSX;
}
}
#endif
#if defined(HAS_ARGBTOYJROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToYJRow = ARGBToYJRow_Any_LASX;
if (IS_ALIGNED(width, 32)) {
ARGBToYJRow = ARGBToYJRow_LASX;
}
}
#endif
#if defined(HAS_ARGBTOYJROW_RVV)
if (TestCpuFlag(kCpuHasRVV)) {
ARGBToYJRow = ARGBToYJRow_RVV;
}
#endif
for (y = 0; y < height; ++y) {
ARGBToUVJ444Row(src_argb, dst_u, dst_v, width);
ARGBToYJRow(src_argb, dst_y, width);
src_argb += src_stride_argb;
dst_y += dst_stride_y;
dst_u += dst_stride_u;
dst_v += dst_stride_v;
}
return 0;
}
// Convert ARGB to J420. (JPeg full range I420).
LIBYUV_API
int ARGBToJ420(const uint8_t* src_argb,

View File

@ -799,14 +799,13 @@ static __inline uint8_t RGBToYJ(uint8_t r, uint8_t g, uint8_t b) {
}
#endif
#if defined(LIBYUV_ARGBTOUV_PAVGB)
static __inline uint8_t RGBToUJ(uint8_t r, uint8_t g, uint8_t b) {
return (127 * b - 84 * g - 43 * r + 0x8080) >> 8;
}
static __inline uint8_t RGBToVJ(uint8_t r, uint8_t g, uint8_t b) {
return (127 * r - 107 * g - 20 * b + 0x8080) >> 8;
}
#else
#if !defined(LIBYUV_ARGBTOUV_PAVGB)
static __inline uint8_t RGB2xToUJ(uint16_t r, uint16_t g, uint16_t b) {
return ((127 / 2) * b - (84 / 2) * g - (43 / 2) * r + 0x8080) >> 8;
}
@ -1230,6 +1229,23 @@ void ARGBToUV444Row_C(const uint8_t* src_argb,
}
}
void ARGBToUVJ444Row_C(const uint8_t* src_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width) {
int x;
for (x = 0; x < width; ++x) {
uint8_t ab = src_argb[0];
uint8_t ag = src_argb[1];
uint8_t ar = src_argb[2];
dst_u[0] = RGBToUJ(ar, ag, ab);
dst_v[0] = RGBToVJ(ar, ag, ab);
src_argb += 4;
dst_u += 1;
dst_v += 1;
}
}
void ARGBGrayRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
int x;
for (x = 0; x < width; ++x) {

View File

@ -666,6 +666,7 @@ TESTATOPLANAR(ARGB, 4, 1, I422, 2, 1)
TESTATOPLANAR(ARGB, 4, 1, I444, 1, 1)
TESTATOPLANAR(ARGB, 4, 1, J420, 2, 2)
TESTATOPLANAR(ARGB, 4, 1, J422, 2, 1)
TESTATOPLANAR(ARGB, 4, 1, J444, 1, 1)
TESTATOPLANAR(ABGR, 4, 1, J420, 2, 2)
TESTATOPLANAR(ABGR, 4, 1, J422, 2, 1)
#ifdef LITTLE_ENDIAN_ONLY_TEST
@ -678,6 +679,7 @@ TESTATOPLANAR(I400, 1, 1, I420, 2, 2)
TESTATOPLANAR(J400, 1, 1, J420, 2, 2)
TESTATOPLANAR(RAW, 3, 1, I420, 2, 2)
TESTATOPLANAR(RAW, 3, 1, J420, 2, 2)
TESTATOPLANAR(RAW, 3, 1, J444, 1, 1)
TESTATOPLANAR(RGB24, 3, 1, I420, 2, 2)
TESTATOPLANAR(RGB24, 3, 1, J420, 2, 2)
TESTATOPLANAR(RGBA, 4, 1, I420, 2, 2)