Add RGBToNV21Matrix function

- implement wrappers with RAW, RGB24, NV21 and JNV21 to call it.

Zen5
Was [       OK ] LibYUVConvertTest.RAWToJNV21_Opt (1146 ms)
Now [       OK ] LibYUVConvertTest.RAWToJNV21_Opt (1446 ms)
reason - the new code uses 1 pass for RAWToY but 2 pass for RAWToARGB,ARGBToUV.  needs 1 RGBToUV

Bug: libyuv:42280902
Change-Id: Ife6fbed0829484045409e6d42b85cec1d1fd6052
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/7780026
Reviewed-by: richard winterton <rrwinterton@gmail.com>
Commit-Queue: Frank Barchard <fbarchard@google.com>
This commit is contained in:
Frank Barchard 2026-04-20 17:22:17 -07:00 committed by libyuv-scoped@luci-project-accounts.iam.gserviceaccount.com
parent 9f13b2814d
commit 81f698829b
11 changed files with 514 additions and 1018 deletions

View File

@ -1,6 +1,6 @@
Name: libyuv
URL: https://chromium.googlesource.com/libyuv/libyuv/
Version: 1929
Version: 1930
Revision: DEPS
License: BSD-3-Clause
License File: LICENSE

View File

@ -456,6 +456,29 @@ int ARGBToUYVY(const uint8_t* src_argb,
int width,
int height);
// RAW to NV21 with matrix.
LIBYUV_API
int RAWToNV21Matrix(const uint8_t* src_raw,
int src_stride_raw,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_vu,
int dst_stride_vu,
const struct ArgbConstants* argbconstants,
int width,
int height);
// RAW to NV21.
LIBYUV_API
int RAWToNV21(const uint8_t* src_raw,
int src_stride_raw,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_vu,
int dst_stride_vu,
int width,
int height);
// RAW to JNV21 full range NV21
LIBYUV_API
int RAWToJNV21(const uint8_t* src_raw,

View File

@ -2191,6 +2191,10 @@ void RAWToYJRow_LASX(const uint8_t* src_raw, uint8_t* dst_yj, int width);
void ARGBToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width);
void ARGBToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width);
void RGBToYMatrixRow_C(const uint8_t* src_rgb24,
uint8_t* dst_y,
int width,
const struct ArgbConstants* c);
void ARGBToYMatrixRow_C(const uint8_t* src_argb,
uint8_t* dst_y,
int width,
@ -2199,6 +2203,10 @@ void ARGBToYMatrixRow_RVV(const uint8_t* src_argb,
uint8_t* dst_y,
int width,
const struct ArgbConstants* c);
void RGBToYMatrixRow_RVV(const uint8_t* src_rgb,
uint8_t* dst_y,
int width,
const struct ArgbConstants* c);
void ARGBToUVMatrixRow_C(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_u,
@ -2223,6 +2231,14 @@ void ARGBToUVMatrixRow_AVX512BW(const uint8_t* src_argb,
uint8_t* dst_v,
int width,
const struct ArgbConstants* c);
void RGBToUVMatrixRow_C(const uint8_t* src_rgb24,
int src_stride_rgb24,
uint8_t* dst_u,
uint8_t* dst_v,
int width,
const struct ArgbConstants* c);
void ARGBToUV444MatrixRow_C(const uint8_t* src_argb,
uint8_t* dst_u,
uint8_t* dst_v,

View File

@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1929
#define LIBYUV_VERSION 1930
#endif // INCLUDE_LIBYUV_VERSION_H_

View File

@ -11,6 +11,7 @@
#include "libyuv/convert.h"
#include "libyuv/basic_types.h"
#include "libyuv/convert_from_argb.h"
#include "libyuv/cpu_id.h"
#include "libyuv/planar_functions.h"
#include "libyuv/rotate.h"
@ -2205,7 +2206,14 @@ int ARGBToI420Matrix(const uint8_t* src_argb,
ARGBToYMatrixRow = ARGBToYMatrixRow_RVV;
}
#endif
// TODO(fbarchard): add AVX512BW
#if defined(HAS_ARGBTOYMATRIXROW_AVX512BW)
if (TestCpuFlag(kCpuHasAVX512BW)) {
ARGBToYMatrixRow = ARGBToYMatrixRow_Any_AVX512BW;
if (IS_ALIGNED(width, 64)) {
ARGBToYMatrixRow = ARGBToYMatrixRow_AVX512BW;
}
}
#endif
#if defined(HAS_ARGBTOYMATRIXROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYMatrixRow = ARGBToYMatrixRow_Any_NEON;
@ -2923,12 +2931,89 @@ int RGBAToI420(const uint8_t* src_rgba,
return 0;
}
// Enabled if 1 pass is available
#if (defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_LSX) || \
defined(HAS_RGB24TOYROW_AVX2) || \
defined(HAS_RGB24TOYROW_RVV))
#define HAS_RGB24TOYROW
// Convert RGB to I420 with matrix.
LIBYUV_API
int RGBToI420Matrix(const uint8_t* src_rgb24,
int src_stride_rgb24,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
const struct ArgbConstants* argbconstants,
int width,
int height) {
int y;
void (*RGBToUVMatrixRow)(const uint8_t* src_rgb24, int src_stride_rgb24,
uint8_t* dst_u, uint8_t* dst_v, int width,
const struct ArgbConstants* c) = RGBToUVMatrixRow_C;
void (*RGBToYMatrixRow)(const uint8_t* src_rgb24, uint8_t* dst_y, int width,
const struct ArgbConstants* c) = RGBToYMatrixRow_C;
if (!src_rgb24 || !dst_y || !dst_u || !dst_v || !argbconstants || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24;
src_stride_rgb24 = -src_stride_rgb24;
}
#if defined(HAS_RGBTOYMATRIXROW_RVV)
if (TestCpuFlag(kCpuHasRVV)) {
RGBToYMatrixRow = RGBToYMatrixRow_RVV;
}
#endif
#if defined(HAS_RGBTOYMATRIXROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
RGBToYMatrixRow = RGBToYMatrixRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
RGBToYMatrixRow = RGBToYMatrixRow_AVX2;
}
}
#endif
#if defined(HAS_RGBTOYMATRIXROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
RGBToYMatrixRow = RGBToYMatrixRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
RGBToYMatrixRow = RGBToYMatrixRow_NEON;
}
}
#endif
#if defined(HAS_RGBTOYMATRIXROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
RGBToYMatrixRow = RGBToYMatrixRow_Any_LSX;
if (IS_ALIGNED(width, 16)) {
RGBToYMatrixRow = RGBToYMatrixRow_LSX;
}
}
#endif
#if defined(HAS_RGBTOYMATRIXROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
RGBToYMatrixRow = RGBToYMatrixRow_Any_LASX;
if (IS_ALIGNED(width, 32)) {
RGBToYMatrixRow = RGBToYMatrixRow_LASX;
}
}
#endif
for (y = 0; y < height - 1; y += 2) {
RGBToUVMatrixRow(src_rgb24, src_stride_rgb24, dst_u, dst_v, width, argbconstants);
RGBToYMatrixRow(src_rgb24, dst_y, width, argbconstants);
RGBToYMatrixRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width, argbconstants);
src_rgb24 += src_stride_rgb24 * 2;
dst_y += dst_stride_y * 2;
dst_u += dst_stride_u;
dst_v += dst_stride_v;
}
if (height & 1) {
RGBToUVMatrixRow(src_rgb24, 0, dst_u, dst_v, width, argbconstants);
RGBToYMatrixRow(src_rgb24, dst_y, width, argbconstants);
}
return 0;
}
// Convert RGB24 to I420.
LIBYUV_API
@ -2942,190 +3027,10 @@ int RGB24ToI420(const uint8_t* src_rgb24,
int dst_stride_v,
int width,
int height) {
int y;
#if defined(HAS_RGB24TOYROW)
void (*RGB24ToUVRow)(const uint8_t* src_rgb24, int src_stride_rgb24,
uint8_t* dst_u, uint8_t* dst_v, int width) =
RGB24ToUVRow_C;
void (*RGB24ToYRow)(const uint8_t* src_rgb24, uint8_t* dst_y, int width) =
RGB24ToYRow_C;
#else
void (*RGB24ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) =
RGB24ToARGBRow_C;
void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb,
uint8_t* dst_u, uint8_t* dst_v, int width) =
ARGBToUVRow_C;
void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
ARGBToYRow_C;
#endif
if (!src_rgb24 || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24;
src_stride_rgb24 = -src_stride_rgb24;
}
#if defined(HAS_RGB24TOYROW)
#if defined(HAS_RGB24TOYROW_AVX2) && defined(HAS_RGBTOYMATRIXROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
// TODO(fbarchard): Write an AVX2 function for RGB24ToUVRow.
RGB24ToYRow = RGB24ToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
RGB24ToYRow = RGB24ToYRow_AVX2;
}
}
#endif
// Neon version does direct RGB24 to YUV.
#if defined(HAS_RGB24TOYROW_NEON) && defined(HAS_RGB24TOUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
RGB24ToUVRow = RGB24ToUVRow_Any_NEON;
RGB24ToYRow = RGB24ToYRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
RGB24ToYRow = RGB24ToYRow_NEON;
RGB24ToUVRow = RGB24ToUVRow_NEON;
}
}
#endif
#if defined(HAS_RGB24TOYROW_LSX) && defined(HAS_RGB24TOUVROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
RGB24ToUVRow = RGB24ToUVRow_Any_LSX;
RGB24ToYRow = RGB24ToYRow_Any_LSX;
if (IS_ALIGNED(width, 16)) {
RGB24ToYRow = RGB24ToYRow_LSX;
RGB24ToUVRow = RGB24ToUVRow_LSX;
}
}
#endif
#if defined(HAS_RGB24TOYROW_LASX) && defined(HAS_RGB24TOUVROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
RGB24ToUVRow = RGB24ToUVRow_Any_LASX;
RGB24ToYRow = RGB24ToYRow_Any_LASX;
if (IS_ALIGNED(width, 32)) {
RGB24ToYRow = RGB24ToYRow_LASX;
RGB24ToUVRow = RGB24ToUVRow_LASX;
}
}
#endif
#if defined(HAS_RGB24TOYROW_RVV)
if (TestCpuFlag(kCpuHasRVV)) {
RGB24ToYRow = RGB24ToYRow_RVV;
}
#endif
// Other platforms do intermediate conversion from RGB24 to ARGB.
#else // HAS_RGB24TOYROW
#if defined(HAS_RGB24TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
RGB24ToARGBRow = RGB24ToARGBRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOYROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToYRow = ARGBToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToYRow = ARGBToYRow_AVX2;
}
}
#endif
#if defined(HAS_ARGBTOYROW_AVX512BW)
if (TestCpuFlag(kCpuHasAVX512BW)) {
ARGBToYRow = ARGBToYRow_Any_AVX512BW;
if (IS_ALIGNED(width, 64)) {
ARGBToYRow = ARGBToYRow_AVX512BW;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToUVRow = ARGBToUVRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToUVRow = ARGBToUVRow_AVX2;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_AVX512BW)
if (TestCpuFlag(kCpuHasAVX512BW)) {
ARGBToUVRow = ARGBToUVRow_Any_AVX512BW;
if (IS_ALIGNED(width, 64)) {
ARGBToUVRow = ARGBToUVRow_AVX512BW;
}
}
#endif
#endif // HAS_RGB24TOYROW
{
#if !defined(HAS_RGB24TOYROW)
// Allocate 2 rows of ARGB.
const int row_size = (width * 4 + 31) & ~31;
align_buffer_64(row, row_size * 2);
if (!row)
return 1;
#endif
for (y = 0; y < height - 1; y += 2) {
#if defined(HAS_RGB24TOYROW)
RGB24ToUVRow(src_rgb24, src_stride_rgb24, dst_u, dst_v, width);
RGB24ToYRow(src_rgb24, dst_y, width);
RGB24ToYRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width);
#else
RGB24ToARGBRow(src_rgb24, row, width);
RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + row_size, width);
ARGBToUVRow(row, row_size, dst_u, dst_v, width);
ARGBToYRow(row, dst_y, width);
ARGBToYRow(row + row_size, dst_y + dst_stride_y, width);
#endif
src_rgb24 += src_stride_rgb24 * 2;
dst_y += dst_stride_y * 2;
dst_u += dst_stride_u;
dst_v += dst_stride_v;
}
if (height & 1) {
#if defined(HAS_RGB24TOYROW)
RGB24ToUVRow(src_rgb24, 0, dst_u, dst_v, width);
RGB24ToYRow(src_rgb24, dst_y, width);
#else
RGB24ToARGBRow(src_rgb24, row, width);
ARGBToUVRow(row, 0, dst_u, dst_v, width);
ARGBToYRow(row, dst_y, width);
#endif
}
#if !defined(HAS_RGB24TOYROW)
free_aligned_buffer_64(row);
#endif
}
return 0;
return RGBToI420Matrix(src_rgb24, src_stride_rgb24, dst_y, dst_stride_y,
dst_u, dst_stride_u, dst_v, dst_stride_v,
&kArgbI601Constants, width, height);
}
#undef HAS_RGB24TOYROW
// Enabled if 1 pass is available
#if defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_AVX2) || defined(HAS_RGB24TOYJROW_RVV)
#define HAS_RGB24TOYJROW
#endif
// Convert RGB24 to J420.
LIBYUV_API
@ -3139,179 +3044,10 @@ int RGB24ToJ420(const uint8_t* src_rgb24,
int dst_stride_v,
int width,
int height) {
int y;
#if defined(HAS_RGB24TOYJROW)
void (*RGB24ToUVJRow)(const uint8_t* src_rgb24, int src_stride_rgb24,
uint8_t* dst_u, uint8_t* dst_v, int width) =
RGB24ToUVJRow_C;
void (*RGB24ToYJRow)(const uint8_t* src_rgb24, uint8_t* dst_y, int width) =
RGB24ToYJRow_C;
#else
void (*RGB24ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) =
RGB24ToARGBRow_C;
void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb,
uint8_t* dst_u, uint8_t* dst_v, int width) =
ARGBToUVJRow_C;
void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
ARGBToYJRow_C;
#endif
if (!src_rgb24 || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24;
src_stride_rgb24 = -src_stride_rgb24;
}
#if defined(HAS_RGB24TOYJROW)
#if defined(HAS_RGB24TOYJROW_AVX2) && defined(HAS_RGBTOYMATRIXROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
RGB24ToYJRow = RGB24ToYJRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
RGB24ToYJRow = RGB24ToYJRow_AVX2;
}
}
#endif
// Neon version does direct RGB24 to YUV.
#if defined(HAS_RGB24TOYJROW_NEON) && defined(HAS_RGB24TOUVJROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
RGB24ToUVJRow = RGB24ToUVJRow_Any_NEON;
RGB24ToYJRow = RGB24ToYJRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
RGB24ToYJRow = RGB24ToYJRow_NEON;
RGB24ToUVJRow = RGB24ToUVJRow_NEON;
}
}
#endif
#if defined(HAS_RGB24TOYJROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
RGB24ToYJRow = RGB24ToYJRow_Any_LSX;
if (IS_ALIGNED(width, 16)) {
RGB24ToYJRow = RGB24ToYJRow_LSX;
}
}
#endif
#if defined(HAS_RGB24TOYJROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
RGB24ToYJRow = RGB24ToYJRow_Any_LASX;
if (IS_ALIGNED(width, 32)) {
RGB24ToYJRow = RGB24ToYJRow_LASX;
}
}
#endif
#if defined(HAS_RGB24TOYJROW_RVV)
if (TestCpuFlag(kCpuHasRVV)) {
RGB24ToYJRow = RGB24ToYJRow_RVV;
}
#endif
// Other platforms do intermediate conversion from RGB24 to ARGB.
#else // HAS_RGB24TOYJROW
#if defined(HAS_RGB24TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
RGB24ToARGBRow = RGB24ToARGBRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOYJROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToYJRow = ARGBToYJRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOYJROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToYJRow = ARGBToYJRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToYJRow = ARGBToYJRow_AVX2;
}
}
#endif
#if defined(HAS_ARGBTOYROW_AVX512BW)
if (TestCpuFlag(kCpuHasAVX512BW)) {
ARGBToYJRow = ARGBToYJRow_Any_AVX512BW;
if (IS_ALIGNED(width, 64)) {
ARGBToYJRow = ARGBToYJRow_AVX512BW;
}
}
#endif
#if defined(HAS_ARGBTOUVJROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVJRow = ARGBToUVJRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOUVJROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToUVJRow = ARGBToUVJRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToUVJRow = ARGBToUVJRow_AVX2;
}
}
#endif
#endif // HAS_RGB24TOYJROW
{
#if !defined(HAS_RGB24TOYJROW)
// Allocate 2 rows of ARGB.
const int row_size = (width * 4 + 31) & ~31;
align_buffer_64(row, row_size * 2);
if (!row)
return 1;
#endif
for (y = 0; y < height - 1; y += 2) {
#if defined(HAS_RGB24TOYJROW)
RGB24ToUVJRow(src_rgb24, src_stride_rgb24, dst_u, dst_v, width);
RGB24ToYJRow(src_rgb24, dst_y, width);
RGB24ToYJRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width);
#else
RGB24ToARGBRow(src_rgb24, row, width);
RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + row_size, width);
ARGBToUVJRow(row, row_size, dst_u, dst_v, width);
ARGBToYJRow(row, dst_y, width);
ARGBToYJRow(row + row_size, dst_y + dst_stride_y, width);
#endif
src_rgb24 += src_stride_rgb24 * 2;
dst_y += dst_stride_y * 2;
dst_u += dst_stride_u;
dst_v += dst_stride_v;
}
if (height & 1) {
#if defined(HAS_RGB24TOYJROW)
RGB24ToUVJRow(src_rgb24, 0, dst_u, dst_v, width);
RGB24ToYJRow(src_rgb24, dst_y, width);
#else
RGB24ToARGBRow(src_rgb24, row, width);
ARGBToUVJRow(row, 0, dst_u, dst_v, width);
ARGBToYJRow(row, dst_y, width);
#endif
}
#if !defined(HAS_RGB24TOYJROW)
free_aligned_buffer_64(row);
#endif
}
return 0;
return RGBToI420Matrix(src_rgb24, src_stride_rgb24, dst_y, dst_stride_y,
dst_u, dst_stride_u, dst_v, dst_stride_v,
&kArgbJPEGConstants, width, height);
}
#undef HAS_RGB24TOYJROW
// Enabled if 1 pass is available
#if (defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_LSX) || \
defined(HAS_RAWTOYROW_AVX2) || \
defined(HAS_RAWTOYROW_RVV))
#define HAS_RAWTOYROW
#endif
// Convert RAW to I420.
LIBYUV_API
@ -3325,197 +3061,10 @@ int RAWToI420(const uint8_t* src_raw,
int dst_stride_v,
int width,
int height) {
int y;
#if defined(HAS_RAWTOYROW)
void (*RAWToUVRow)(const uint8_t* src_raw, int src_stride_raw, uint8_t* dst_u,
uint8_t* dst_v, int width) = RAWToUVRow_C;
void (*RAWToYRow)(const uint8_t* src_raw, uint8_t* dst_y, int width) =
RAWToYRow_C;
#else
void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) =
RAWToARGBRow_C;
void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb,
uint8_t* dst_u, uint8_t* dst_v, int width) =
ARGBToUVRow_C;
void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
ARGBToYRow_C;
#endif
if (!src_raw || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_raw = src_raw + (height - 1) * src_stride_raw;
src_stride_raw = -src_stride_raw;
}
#if defined(HAS_RAWTOYROW)
#if defined(HAS_RAWTOYROW_AVX2) && defined(HAS_RGBTOYMATRIXROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
// TODO(fbarchard): Write an AVX2 function for RAWToUVRow.
RAWToYRow = RAWToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
RAWToYRow = RAWToYRow_AVX2;
}
}
#endif
// Neon version does direct RAW to YUV.
#if defined(HAS_RAWTOYROW_NEON) && defined(HAS_RAWTOUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
RAWToUVRow = RAWToUVRow_Any_NEON;
RAWToYRow = RAWToYRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
RAWToYRow = RAWToYRow_NEON;
RAWToUVRow = RAWToUVRow_NEON;
}
}
#endif
#if defined(HAS_RAWTOYROW_LSX) && defined(HAS_RAWTOUVROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
RAWToUVRow = RAWToUVRow_Any_LSX;
RAWToYRow = RAWToYRow_Any_LSX;
if (IS_ALIGNED(width, 16)) {
RAWToYRow = RAWToYRow_LSX;
RAWToUVRow = RAWToUVRow_LSX;
}
}
#endif
#if defined(HAS_RAWTOYROW_LASX) && defined(HAS_RAWTOUVROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
RAWToUVRow = RAWToUVRow_Any_LASX;
RAWToYRow = RAWToYRow_Any_LASX;
if (IS_ALIGNED(width, 32)) {
RAWToYRow = RAWToYRow_LASX;
RAWToUVRow = RAWToUVRow_LASX;
}
}
#endif
#if defined(HAS_RAWTOYROW_RVV)
if (TestCpuFlag(kCpuHasRVV)) {
RAWToYRow = RAWToYRow_RVV;
}
#endif
// Other platforms do intermediate conversion from RAW to ARGB.
#else // HAS_RAWTOYROW
#if defined(HAS_RAWTOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
RAWToARGBRow = RAWToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
RAWToARGBRow = RAWToARGBRow_SSSE3;
}
}
#endif
#if defined(HAS_RAWTOARGBROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
RAWToARGBRow = RAWToARGBRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
RAWToARGBRow = RAWToARGBRow_AVX2;
}
}
#endif
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOYROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToYRow = ARGBToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToYRow = ARGBToYRow_AVX2;
}
}
#endif
#if defined(HAS_ARGBTOYROW_AVX512BW)
if (TestCpuFlag(kCpuHasAVX512BW)) {
ARGBToYRow = ARGBToYRow_Any_AVX512BW;
if (IS_ALIGNED(width, 64)) {
ARGBToYRow = ARGBToYRow_AVX512BW;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToUVRow = ARGBToUVRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToUVRow = ARGBToUVRow_AVX2;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_AVX512BW)
if (TestCpuFlag(kCpuHasAVX512BW)) {
ARGBToUVRow = ARGBToUVRow_Any_AVX512BW;
if (IS_ALIGNED(width, 64)) {
ARGBToUVRow = ARGBToUVRow_AVX512BW;
}
}
#endif
#endif // HAS_RAWTOYROW
{
#if !defined(HAS_RAWTOYROW)
// Allocate 2 rows of ARGB.
const int row_size = (width * 4 + 31) & ~31;
align_buffer_64(row, row_size * 2);
if (!row)
return 1;
#endif
for (y = 0; y < height - 1; y += 2) {
#if defined(HAS_RAWTOYROW)
RAWToUVRow(src_raw, src_stride_raw, dst_u, dst_v, width);
RAWToYRow(src_raw, dst_y, width);
RAWToYRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width);
#else
RAWToARGBRow(src_raw, row, width);
RAWToARGBRow(src_raw + src_stride_raw, row + row_size, width);
ARGBToUVRow(row, row_size, dst_u, dst_v, width);
ARGBToYRow(row, dst_y, width);
ARGBToYRow(row + row_size, dst_y + dst_stride_y, width);
#endif
src_raw += src_stride_raw * 2;
dst_y += dst_stride_y * 2;
dst_u += dst_stride_u;
dst_v += dst_stride_v;
}
if (height & 1) {
#if defined(HAS_RAWTOYROW)
RAWToUVRow(src_raw, 0, dst_u, dst_v, width);
RAWToYRow(src_raw, dst_y, width);
#else
RAWToARGBRow(src_raw, row, width);
ARGBToUVRow(row, 0, dst_u, dst_v, width);
ARGBToYRow(row, dst_y, width);
#endif
}
#if !defined(HAS_RAWTOYROW)
free_aligned_buffer_64(row);
#endif
}
return 0;
return RGBToI420Matrix(src_raw, src_stride_raw, dst_y, dst_stride_y,
dst_v, dst_stride_v, dst_u, dst_stride_u,
&kAbgrI601Constants, width, height);
}
#undef HAS_RAWTOYROW
// Enabled if 1 pass is available
#if defined(HAS_RAWTOYJROW_NEON) || defined(HAS_RAWTOYJROW_AVX2) || defined(HAS_RAWTOYJROW_RVV)
#define HAS_RAWTOYJROW
#endif
// Convert RAW to J420.
LIBYUV_API
@ -3529,176 +3078,10 @@ int RAWToJ420(const uint8_t* src_raw,
int dst_stride_v,
int width,
int height) {
int y;
#if defined(HAS_RAWTOYJROW)
void (*RAWToUVJRow)(const uint8_t* src_raw, int src_stride_raw,
uint8_t* dst_u, uint8_t* dst_v, int width) =
RAWToUVJRow_C;
void (*RAWToYJRow)(const uint8_t* src_raw, uint8_t* dst_y, int width) =
RAWToYJRow_C;
#else
void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) =
RAWToARGBRow_C;
void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb,
uint8_t* dst_u, uint8_t* dst_v, int width) =
ARGBToUVJRow_C;
void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
ARGBToYJRow_C;
#endif
if (!src_raw || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_raw = src_raw + (height - 1) * src_stride_raw;
src_stride_raw = -src_stride_raw;
}
#if defined(HAS_RAWTOYJROW)
#if defined(HAS_RAWTOYJROW_AVX2) && defined(HAS_RGBTOYMATRIXROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
RAWToYJRow = RAWToYJRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
RAWToYJRow = RAWToYJRow_AVX2;
}
}
#endif
// Neon version does direct RAW to YUV.
#if defined(HAS_RAWTOYJROW_NEON) && defined(HAS_RAWTOUVJROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
RAWToUVJRow = RAWToUVJRow_Any_NEON;
RAWToYJRow = RAWToYJRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
RAWToYJRow = RAWToYJRow_NEON;
RAWToUVJRow = RAWToUVJRow_NEON;
}
}
#endif
#if defined(HAS_RAWTOYJROW_LSX) && defined(HAS_RAWTOUVJROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
RAWToUVJRow = RAWToUVJRow_Any_LSX;
RAWToYJRow = RAWToYJRow_Any_LSX;
if (IS_ALIGNED(width, 16)) {
RAWToYJRow = RAWToYJRow_LSX;
RAWToUVJRow = RAWToUVJRow_LSX;
}
}
#endif
#if defined(HAS_RAWTOYJROW_LASX) && defined(HAS_RAWTOUVJROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
RAWToUVJRow = RAWToUVJRow_Any_LASX;
RAWToYJRow = RAWToYJRow_Any_LASX;
if (IS_ALIGNED(width, 32)) {
RAWToYJRow = RAWToYJRow_LASX;
RAWToUVJRow = RAWToUVJRow_LASX;
}
}
#endif
#if defined(HAS_RAWTOYJROW_RVV)
if (TestCpuFlag(kCpuHasRVV)) {
RAWToYJRow = RAWToYJRow_RVV;
}
#endif
// Other platforms do intermediate conversion from RAW to ARGB.
#else // HAS_RAWTOYJROW
#if defined(HAS_RAWTOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
RAWToARGBRow = RAWToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
RAWToARGBRow = RAWToARGBRow_SSSE3;
}
}
#endif
#if defined(HAS_RAWTOARGBROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
RAWToARGBRow = RAWToARGBRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
RAWToARGBRow = RAWToARGBRow_AVX2;
}
}
#endif
#if defined(HAS_ARGBTOYJROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToYJRow = ARGBToYJRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOYJROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToYJRow = ARGBToYJRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToYJRow = ARGBToYJRow_AVX2;
}
}
#endif
#if defined(HAS_ARGBTOUVJROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVJRow = ARGBToUVJRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOUVJROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToUVJRow = ARGBToUVJRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToUVJRow = ARGBToUVJRow_AVX2;
}
}
#endif
#endif // HAS_RAWTOYJROW
{
#if !defined(HAS_RAWTOYJROW)
// Allocate 2 rows of ARGB.
const int row_size = (width * 4 + 31) & ~31;
align_buffer_64(row, row_size * 2);
if (!row)
return 1;
#endif
for (y = 0; y < height - 1; y += 2) {
#if defined(HAS_RAWTOYJROW)
RAWToUVJRow(src_raw, src_stride_raw, dst_u, dst_v, width);
RAWToYJRow(src_raw, dst_y, width);
RAWToYJRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width);
#else
RAWToARGBRow(src_raw, row, width);
RAWToARGBRow(src_raw + src_stride_raw, row + row_size, width);
ARGBToUVJRow(row, row_size, dst_u, dst_v, width);
ARGBToYJRow(row, dst_y, width);
ARGBToYJRow(row + row_size, dst_y + dst_stride_y, width);
#endif
src_raw += src_stride_raw * 2;
dst_y += dst_stride_y * 2;
dst_u += dst_stride_u;
dst_v += dst_stride_v;
}
if (height & 1) {
#if defined(HAS_RAWTOYJROW)
RAWToUVJRow(src_raw, 0, dst_u, dst_v, width);
RAWToYJRow(src_raw, dst_y, width);
#else
RAWToARGBRow(src_raw, row, width);
ARGBToUVJRow(row, 0, dst_u, dst_v, width);
ARGBToYJRow(row, dst_y, width);
#endif
}
#if !defined(HAS_RAWTOYJROW)
free_aligned_buffer_64(row);
#endif
}
return 0;
return RGBToI420Matrix(src_raw, src_stride_raw, dst_y, dst_stride_y,
dst_v, dst_stride_v, dst_u, dst_stride_u,
&kAbgrJPEGConstants, width, height);
}
#undef HAS_RAWTOYJROW
// RAW big endian (rgb in memory) to I444
// 2 step conversion of RAWToARGB then ARGBToY and ARGBToUV444

View File

@ -248,6 +248,22 @@ int ARGBToI444Matrix(const uint8_t* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOYROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
ARGBToYMatrixRow = ARGBToYMatrixRow_Any_LSX;
if (IS_ALIGNED(width, 16)) {
ARGBToYMatrixRow = ARGBToYMatrixRow_LSX;
}
}
#endif
#if defined(HAS_ARGBTOYROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToYMatrixRow = ARGBToYMatrixRow_Any_LASX;
if (IS_ALIGNED(width, 32)) {
ARGBToYMatrixRow = ARGBToYMatrixRow_LASX;
}
}
#endif
#if defined(HAS_ARGBTOUV444MATRIXROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToUV444MatrixRow = ARGBToUV444MatrixRow_Any_NEON;
@ -511,6 +527,22 @@ int ARGBToI422Matrix(const uint8_t* src_argb,
ARGBToYMatrixRow = ARGBToYMatrixRow_NEON;
}
}
#endif
#if defined(HAS_ARGBTOYROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
ARGBToYMatrixRow = ARGBToYMatrixRow_Any_LSX;
if (IS_ALIGNED(width, 16)) {
ARGBToYMatrixRow = ARGBToYMatrixRow_LSX;
}
}
#endif
#if defined(HAS_ARGBTOYROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToYMatrixRow = ARGBToYMatrixRow_Any_LASX;
if (IS_ALIGNED(width, 32)) {
ARGBToYMatrixRow = ARGBToYMatrixRow_LASX;
}
}
#endif
if (!src_argb || !dst_y || !dst_u || !dst_v || !argbconstants || width <= 0 ||
height == 0) {
@ -779,43 +811,9 @@ int ARGBToNV12Matrix(const uint8_t* src_argb,
uint8_t* dst_u, uint8_t* dst_v, int width,
const struct ArgbConstants* c) =
ARGBToUVMatrixRow_C;
#if defined(HAS_ARGBTOUVMATRIXROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
ARGBToUVMatrixRow = ARGBToUVMatrixRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOUVMATRIXROW_RVV)
if (TestCpuFlag(kCpuHasRVV)) {
ARGBToUVMatrixRow = ARGBToUVMatrixRow_RVV;
}
#endif
#if defined(HAS_ARGBTOUVMATRIXROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX2;
}
}
#endif
#if defined(HAS_ARGBTOYMATRIXROW_RVV)
if (TestCpuFlag(kCpuHasRVV)) {
ARGBToYMatrixRow = ARGBToYMatrixRow_RVV;
}
#endif
// TODO(fbarchard): add AVX512BW
#if defined(HAS_ARGBTOYMATRIXROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYMatrixRow = ARGBToYMatrixRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToYMatrixRow = ARGBToYMatrixRow_NEON;
}
}
#endif
void (*MergeUVRow)(const uint8_t* src_u, const uint8_t* src_v,
uint8_t* dst_uv, int width) = MergeUVRow_C;
if (!src_argb || !dst_y || !dst_uv || !argbconstants || width <= 0 ||
height == 0) {
return -1;
@ -826,6 +824,90 @@ int ARGBToNV12Matrix(const uint8_t* src_argb,
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYMatrixRow = ARGBToYMatrixRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToYMatrixRow = ARGBToYMatrixRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOUVMATRIXROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
ARGBToUVMatrixRow = ARGBToUVMatrixRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOYROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToYMatrixRow = ARGBToYMatrixRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToYMatrixRow = ARGBToYMatrixRow_AVX2;
}
}
#endif
#if defined(HAS_ARGBTOYROW_AVX512BW)
if (TestCpuFlag(kCpuHasAVX512BW)) {
ARGBToYMatrixRow = ARGBToYMatrixRow_Any_AVX512BW;
if (IS_ALIGNED(width, 64)) {
ARGBToYMatrixRow = ARGBToYMatrixRow_AVX512BW;
}
}
#endif
#if defined(HAS_ARGBTOUVMATRIXROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX2;
}
}
#endif
#if defined(HAS_ARGBTOUVMATRIXROW_AVX512BW)
if (TestCpuFlag(kCpuHasAVX512BW)) {
ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX512BW;
if (IS_ALIGNED(width, 32)) {
ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX512BW;
}
}
#endif
#if defined(HAS_ARGBTOYMATRIXROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYMatrixRow = ARGBToYMatrixRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToYMatrixRow = ARGBToYMatrixRow_NEON;
}
}
#endif
#if defined(HAS_ARGBTOYROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
ARGBToYMatrixRow = ARGBToYMatrixRow_Any_LSX;
if (IS_ALIGNED(width, 16)) {
ARGBToYMatrixRow = ARGBToYMatrixRow_LSX;
}
}
#endif
#if defined(HAS_ARGBTOYROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToYMatrixRow = ARGBToYMatrixRow_Any_LASX;
if (IS_ALIGNED(width, 32)) {
ARGBToYMatrixRow = ARGBToYMatrixRow_LASX;
}
}
#endif
#if defined(HAS_ARGBTOYMATRIXROW_RVV)
if (TestCpuFlag(kCpuHasRVV)) {
ARGBToYMatrixRow = ARGBToYMatrixRow_RVV;
}
#endif
#if defined(HAS_ARGBTOUVMATRIXROW_RVV)
if (TestCpuFlag(kCpuHasRVV)) {
ARGBToUVMatrixRow = ARGBToUVMatrixRow_RVV;
}
#endif
#if defined(HAS_MERGEUVROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
MergeUVRow = MergeUVRow_Any_SSE2;
@ -877,29 +959,31 @@ int ARGBToNV12Matrix(const uint8_t* src_argb,
}
#endif
// Allocate a rows of uv.
align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2);
uint8_t* row_v = row_u + ((halfwidth + 31) & ~31);
if (!row_u)
return 1;
{
// Allocate a rows of uv.
align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2);
uint8_t* row_v = row_u + ((halfwidth + 31) & ~31);
if (!row_u)
return 1;
for (y = 0; y < height - 1; y += 2) {
ARGBToUVMatrixRow(src_argb, src_stride_argb, row_u, row_v, width,
argbconstants);
MergeUVRow(row_u, row_v, dst_uv, halfwidth);
ARGBToYMatrixRow(src_argb, dst_y, width, argbconstants);
ARGBToYMatrixRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width,
argbconstants);
src_argb += src_stride_argb * 2;
dst_y += dst_stride_y * 2;
dst_uv += dst_stride_uv;
for (y = 0; y < height - 1; y += 2) {
ARGBToUVMatrixRow(src_argb, src_stride_argb, row_u, row_v, width,
argbconstants);
MergeUVRow(row_u, row_v, dst_uv, halfwidth);
ARGBToYMatrixRow(src_argb, dst_y, width, argbconstants);
ARGBToYMatrixRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width,
argbconstants);
src_argb += src_stride_argb * 2;
dst_y += dst_stride_y * 2;
dst_uv += dst_stride_uv;
}
if (height & 1) {
ARGBToUVMatrixRow(src_argb, 0, row_u, row_v, width, argbconstants);
MergeUVRow(row_u, row_v, dst_uv, halfwidth);
ARGBToYMatrixRow(src_argb, dst_y, width, argbconstants);
}
free_aligned_buffer_64(row_u);
}
if (height & 1) {
ARGBToUVMatrixRow(src_argb, 0, row_u, row_v, width, argbconstants);
MergeUVRow(row_u, row_v, dst_uv, halfwidth);
ARGBToYMatrixRow(src_argb, dst_y, width, argbconstants);
}
free_aligned_buffer_64(row_u);
return 0;
}
@ -915,13 +999,17 @@ int ARGBToNV21(const uint8_t* src_argb,
int height) {
int y;
int halfwidth = (width + 1) >> 1;
void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb,
uint8_t* dst_u, uint8_t* dst_v, int width) =
ARGBToUVRow_C;
void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
ARGBToYRow_C;
void (*ARGBToYMatrixRow)(const uint8_t* src_argb, uint8_t* dst_y, int width,
const struct ArgbConstants* c) = ARGBToYMatrixRow_C;
void (*ARGBToUVMatrixRow)(const uint8_t* src_argb, int src_stride_argb,
uint8_t* dst_u, uint8_t* dst_v, int width,
const struct ArgbConstants* c) =
ARGBToUVMatrixRow_C;
void (*MergeUVRow)(const uint8_t* src_u, const uint8_t* src_v,
uint8_t* dst_vu, int width) = MergeUVRow_C;
uint8_t* dst_vu, int width) = MergeUVRow_C;
const struct ArgbConstants* argbconstants = &kArgbI601Constants;
if (!src_argb || !dst_y || !dst_vu || width <= 0 || height == 0) {
return -1;
}
@ -931,140 +1019,90 @@ int ARGBToNV21(const uint8_t* src_argb,
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYRow = ARGBToYRow_Any_SSSE3;
ARGBToYMatrixRow = ARGBToYMatrixRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3;
ARGBToYMatrixRow = ARGBToYMatrixRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_SSSE3)
#if defined(HAS_ARGBTOUVMATRIXROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_SSSE3;
ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
ARGBToUVMatrixRow = ARGBToUVMatrixRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOYROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToYRow = ARGBToYRow_Any_AVX2;
ARGBToYMatrixRow = ARGBToYMatrixRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToYRow = ARGBToYRow_AVX2;
ARGBToYMatrixRow = ARGBToYMatrixRow_AVX2;
}
}
#endif
#if defined(HAS_ARGBTOYROW_AVX512BW)
if (TestCpuFlag(kCpuHasAVX512BW)) {
ARGBToYRow = ARGBToYRow_Any_AVX512BW;
ARGBToYMatrixRow = ARGBToYMatrixRow_Any_AVX512BW;
if (IS_ALIGNED(width, 64)) {
ARGBToYRow = ARGBToYRow_AVX512BW;
ARGBToYMatrixRow = ARGBToYMatrixRow_AVX512BW;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_AVX2)
#if defined(HAS_ARGBTOUVMATRIXROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToUVRow = ARGBToUVRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToUVRow = ARGBToUVRow_AVX2;
ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX2;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_AVX512BW)
#if defined(HAS_ARGBTOUVMATRIXROW_AVX512BW)
if (TestCpuFlag(kCpuHasAVX512BW)) {
ARGBToUVRow = ARGBToUVRow_Any_AVX512BW;
if (IS_ALIGNED(width, 64)) {
ARGBToUVRow = ARGBToUVRow_AVX512BW;
ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX512BW;
if (IS_ALIGNED(width, 32)) {
ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX512BW;
}
}
#endif
#if defined(HAS_ARGBTOYROW_NEON)
#if defined(HAS_ARGBTOYMATRIXROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYRow = ARGBToYRow_Any_NEON;
ARGBToYMatrixRow = ARGBToYMatrixRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_NEON;
}
}
#endif
#if defined(HAS_ARGBTOYROW_NEON_DOTPROD)
if (TestCpuFlag(kCpuHasNeonDotProd)) {
ARGBToYRow = ARGBToYRow_Any_NEON_DotProd;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_NEON_DotProd;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_RVV)
if (TestCpuFlag(kCpuHasRVV)) {
ARGBToUVRow = ARGBToUVRow_RVV;
}
#endif
#if defined(HAS_ARGBTOUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToUVRow = ARGBToUVRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_NEON;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_NEON_I8MM)
if (TestCpuFlag(kCpuHasNeonI8MM)) {
ARGBToUVRow = ARGBToUVRow_Any_NEON_I8MM;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_NEON_I8MM;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_SVE2)
if (TestCpuFlag(kCpuHasSVE2)) {
ARGBToUVRow = ARGBToUVRow_Any_SVE2;
if (IS_ALIGNED(width, 2)) {
ARGBToUVRow = ARGBToUVRow_SVE2;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_SME)
if (TestCpuFlag(kCpuHasSME)) {
ARGBToUVRow = ARGBToUVRow_Any_SME;
if (IS_ALIGNED(width, 2)) {
ARGBToUVRow = ARGBToUVRow_SME;
ARGBToYMatrixRow = ARGBToYMatrixRow_NEON;
}
}
#endif
#if defined(HAS_ARGBTOYROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
ARGBToYRow = ARGBToYRow_Any_LSX;
ARGBToYMatrixRow = ARGBToYMatrixRow_Any_LSX;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_LSX;
ARGBToYMatrixRow = ARGBToYMatrixRow_LSX;
}
}
#endif
#if defined(HAS_ARGBTOYROW_LSX) && defined(HAS_ARGBTOUVROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
ARGBToYRow = ARGBToYRow_Any_LSX;
ARGBToUVRow = ARGBToUVRow_Any_LSX;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_LSX;
ARGBToUVRow = ARGBToUVRow_LSX;
}
}
#endif
#if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX)
#if defined(HAS_ARGBTOYROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToYRow = ARGBToYRow_Any_LASX;
ARGBToUVRow = ARGBToUVRow_Any_LASX;
ARGBToYMatrixRow = ARGBToYMatrixRow_Any_LASX;
if (IS_ALIGNED(width, 32)) {
ARGBToYRow = ARGBToYRow_LASX;
ARGBToUVRow = ARGBToUVRow_LASX;
ARGBToYMatrixRow = ARGBToYMatrixRow_LASX;
}
}
#endif
#if defined(HAS_ARGBTOYROW_RVV)
#if defined(HAS_ARGBTOYMATRIXROW_RVV)
if (TestCpuFlag(kCpuHasRVV)) {
ARGBToYRow = ARGBToYRow_RVV;
ARGBToYMatrixRow = ARGBToYMatrixRow_RVV;
}
#endif
#if defined(HAS_ARGBTOUVMATRIXROW_RVV)
if (TestCpuFlag(kCpuHasRVV)) {
ARGBToUVMatrixRow = ARGBToUVMatrixRow_RVV;
}
#endif
#if defined(HAS_MERGEUVROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
MergeUVRow = MergeUVRow_Any_SSE2;
@ -1084,7 +1122,7 @@ int ARGBToNV21(const uint8_t* src_argb,
#if defined(HAS_MERGEUVROW_AVX512BW)
if (TestCpuFlag(kCpuHasAVX512BW)) {
MergeUVRow = MergeUVRow_Any_AVX512BW;
if (IS_ALIGNED(halfwidth, 64)) {
if (IS_ALIGNED(halfwidth, 32)) {
MergeUVRow = MergeUVRow_AVX512BW;
}
}
@ -1115,6 +1153,7 @@ int ARGBToNV21(const uint8_t* src_argb,
MergeUVRow = MergeUVRow_RVV;
}
#endif
{
// Allocate a rows of uv.
align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2);
@ -1123,25 +1162,25 @@ int ARGBToNV21(const uint8_t* src_argb,
return 1;
for (y = 0; y < height - 1; y += 2) {
ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width);
ARGBToUVMatrixRow(src_argb, src_stride_argb, row_u, row_v, width,
argbconstants);
MergeUVRow(row_v, row_u, dst_vu, halfwidth);
ARGBToYRow(src_argb, dst_y, width);
ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width);
ARGBToYMatrixRow(src_argb, dst_y, width, argbconstants);
ARGBToYMatrixRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width,
argbconstants);
src_argb += src_stride_argb * 2;
dst_y += dst_stride_y * 2;
dst_vu += dst_stride_vu;
}
if (height & 1) {
ARGBToUVRow(src_argb, 0, row_u, row_v, width);
ARGBToUVMatrixRow(src_argb, 0, row_u, row_v, width, argbconstants);
MergeUVRow(row_v, row_u, dst_vu, halfwidth);
ARGBToYRow(src_argb, dst_y, width);
ARGBToYMatrixRow(src_argb, dst_y, width, argbconstants);
}
free_aligned_buffer_64(row_u);
}
return 0;
}
LIBYUV_API
int ABGRToNV12(const uint8_t* src_abgr,
int src_stride_abgr,
uint8_t* dst_y,
@ -3983,36 +4022,32 @@ int ARGBToAB64(const uint8_t* src_argb,
#define HAS_RAWTOYJROW
#endif
// RAW to JNV21 full range NV21
// RAW to NV21 with matrix.
LIBYUV_API
int RAWToJNV21(const uint8_t* src_raw,
int src_stride_raw,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_vu,
int dst_stride_vu,
int width,
int height) {
int RAWToNV21Matrix(const uint8_t* src_raw,
int src_stride_raw,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_vu,
int dst_stride_vu,
const struct ArgbConstants* argbconstants,
int width,
int height) {
int y;
int halfwidth = (width + 1) >> 1;
#if defined(HAS_RAWTOYJROW)
void (*RAWToUVJRow)(const uint8_t* src_raw, int src_stride_raw,
uint8_t* dst_uj, uint8_t* dst_vj, int width) =
RAWToUVJRow_C;
void (*RAWToYJRow)(const uint8_t* src_raw, uint8_t* dst_y, int width) =
RAWToYJRow_C;
#else
void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) =
RAWToARGBRow_C;
void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb,
uint8_t* dst_uj, uint8_t* dst_vj, int width) =
ARGBToUVJRow_C;
void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
ARGBToYJRow_C;
#endif
void (*MergeUVRow)(const uint8_t* src_uj, const uint8_t* src_vj,
uint8_t* dst_vu, int width) = MergeUVRow_C;
if (!src_raw || !dst_y || !dst_vu || width <= 0 || height == 0) {
void (*ARGBToUVMatrixRow)(const uint8_t* src_argb, int src_stride_argb,
uint8_t* dst_u, uint8_t* dst_v, int width,
const struct ArgbConstants* c) =
ARGBToUVMatrixRow_C;
void (*RGBToYMatrixRow)(const uint8_t* src_argb, uint8_t* dst_y, int width,
const struct ArgbConstants* c) = NULL;
void (*MergeUVRow)(const uint8_t* src_u, const uint8_t* src_v,
uint8_t* dst_uv, int width) = MergeUVRow_C;
if (!src_raw || !dst_y || !dst_vu || !argbconstants || width <= 0 ||
height == 0) {
return -1;
}
// Negative height means invert the image.
@ -4022,54 +4057,6 @@ int RAWToJNV21(const uint8_t* src_raw,
src_stride_raw = -src_stride_raw;
}
#if defined(HAS_RAWTOYJROW)
#if defined(HAS_RAWTOYJROW_AVX2) && defined(HAS_RGBTOYMATRIXROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
// TODO(fbarchard): Write an AVX2 function for RAWToUVJRow.
RAWToYJRow = RAWToYJRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
RAWToYJRow = RAWToYJRow_AVX2;
}
}
#endif
// Neon version does direct RAW to YUV.
#if defined(HAS_RAWTOYJROW_NEON) && defined(HAS_RAWTOUVJROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
RAWToUVJRow = RAWToUVJRow_Any_NEON;
RAWToYJRow = RAWToYJRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
RAWToYJRow = RAWToYJRow_NEON;
RAWToUVJRow = RAWToUVJRow_NEON;
}
}
#endif
#if defined(HAS_RAWTOYJROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
RAWToYJRow = RAWToYJRow_Any_LSX;
if (IS_ALIGNED(width, 16)) {
RAWToYJRow = RAWToYJRow_LSX;
}
}
#endif
#if defined(HAS_RAWTOYJROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
RAWToYJRow = RAWToYJRow_Any_LASX;
if (IS_ALIGNED(width, 32)) {
RAWToYJRow = RAWToYJRow_LASX;
}
}
#endif
#if defined(HAS_RAWTOYJROW_RVV)
if (TestCpuFlag(kCpuHasRVV)) {
RAWToYJRow = RAWToYJRow_RVV;
}
#endif
// Other platforms do intermediate conversion from RAW to ARGB.
#else // HAS_RAWTOYJROW
#if defined(HAS_RAWTOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
RAWToARGBRow = RAWToARGBRow_Any_SSSE3;
@ -4086,47 +4073,57 @@ int RAWToJNV21(const uint8_t* src_raw,
}
}
#endif
#if defined(HAS_ARGBTOYJROW_SSSE3)
#if defined(HAS_ARGBTOUVMATRIXROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToYJRow = ARGBToYJRow_SSSE3;
ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
ARGBToUVMatrixRow = ARGBToUVMatrixRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOYJROW_AVX2)
#if defined(HAS_ARGBTOUVMATRIXROW_RVV)
if (TestCpuFlag(kCpuHasRVV)) {
ARGBToUVMatrixRow = ARGBToUVMatrixRow_RVV;
}
#endif
#if defined(HAS_ARGBTOUVMATRIXROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToYJRow = ARGBToYJRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToYJRow = ARGBToYJRow_AVX2;
}
}
#endif
#if defined(HAS_ARGBTOUVJROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3;
ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
ARGBToUVJRow = ARGBToUVJRow_SSSE3;
ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX2;
}
}
#endif
#if defined(HAS_ARGBTOUVJROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToUVJRow = ARGBToUVJRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToUVJRow = ARGBToUVJRow_AVX2;
}
}
#endif
#if defined(HAS_ARGBTOUVJROW_AVX512BW)
#if defined(HAS_ARGBTOUVMATRIXROW_AVX512BW)
if (TestCpuFlag(kCpuHasAVX512BW)) {
ARGBToUVJRow = ARGBToUVJRow_Any_AVX512BW;
if (IS_ALIGNED(width, 64)) {
ARGBToUVJRow = ARGBToUVJRow_AVX512BW;
ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX512BW;
if (IS_ALIGNED(width, 32)) {
ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX512BW;
}
}
#endif
#endif // HAS_RAWTOYJROW
#if defined(HAS_RGBTOYMATRIXROW_RVV)
if (TestCpuFlag(kCpuHasRVV)) {
RGBToYMatrixRow = RGBToYMatrixRow_RVV;
}
#endif
#if defined(HAS_RGBTOYMATRIXROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
RGBToYMatrixRow = RGBToYMatrixRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
RGBToYMatrixRow = RGBToYMatrixRow_AVX2;
}
}
#endif
#if defined(HAS_RGBTOYMATRIXROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
RGBToYMatrixRow = RGBToYMatrixRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
RGBToYMatrixRow = RGBToYMatrixRow_NEON;
}
}
#endif
#if defined(HAS_MERGEUVROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
MergeUVRow = MergeUVRow_Any_SSE2;
@ -4146,7 +4143,7 @@ int RAWToJNV21(const uint8_t* src_raw,
#if defined(HAS_MERGEUVROW_AVX512BW)
if (TestCpuFlag(kCpuHasAVX512BW)) {
MergeUVRow = MergeUVRow_Any_AVX512BW;
if (IS_ALIGNED(halfwidth, 64)) {
if (IS_ALIGNED(halfwidth, 32)) {
MergeUVRow = MergeUVRow_AVX512BW;
}
}
@ -4177,58 +4174,90 @@ int RAWToJNV21(const uint8_t* src_raw,
MergeUVRow = MergeUVRow_RVV;
}
#endif
{
#if defined(HAS_RAWTOYJROW)
// Allocate a row of uv.
const int row_uv_size = ((halfwidth + 31) & ~31);
align_buffer_64(row_uj, row_uv_size * 2);
uint8_t* row_vj = row_uj + row_uv_size;
#else
// Allocate row of uv and 2 rows of ARGB.
const struct ArgbConstants* uvconstants = argbconstants;
if (argbconstants == &kAbgrI601Constants) {
uvconstants = &kArgbI601Constants;
} else if (argbconstants == &kAbgrJPEGConstants) {
uvconstants = &kArgbJPEGConstants;
}
// Allocate rows of uv and 2 rows of ARGB.
const int row_size = ((width * 4 + 31) & ~31);
const int row_uv_size = ((halfwidth + 31) & ~31);
align_buffer_64(row_uj, row_uv_size * 2 + row_size * 2);
uint8_t* row_vj = row_uj + row_uv_size;
uint8_t* row = row_vj + row_uv_size;
#endif
if (!row_uj)
align_buffer_64(row_u, row_uv_size * 2 + row_size * 2);
uint8_t* row_v = row_u + row_uv_size;
uint8_t* row = row_v + row_uv_size;
if (!row_u)
return 1;
for (y = 0; y < height - 1; y += 2) {
#if defined(HAS_RAWTOYJROW)
RAWToUVJRow(src_raw, src_stride_raw, row_uj, row_vj, width);
MergeUVRow(row_vj, row_uj, dst_vu, halfwidth);
RAWToYJRow(src_raw, dst_y, width);
RAWToYJRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width);
#else
RAWToARGBRow(src_raw, row, width);
RAWToARGBRow(src_raw + src_stride_raw, row + row_size, width);
ARGBToUVJRow(row, row_size, row_uj, row_vj, width);
MergeUVRow(row_vj, row_uj, dst_vu, halfwidth);
ARGBToYJRow(row, dst_y, width);
ARGBToYJRow(row + row_size, dst_y + dst_stride_y, width);
#endif
ARGBToUVMatrixRow(row, row_size, row_u, row_v, width, uvconstants);
MergeUVRow(row_v, row_u, dst_vu, halfwidth);
if (RGBToYMatrixRow) {
RGBToYMatrixRow(src_raw, dst_y, width, argbconstants);
RGBToYMatrixRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width,
argbconstants);
} else {
void (*ARGBToYMatrixRow)(const uint8_t* src_argb, uint8_t* dst_y,
int width, const struct ArgbConstants* c) =
ARGBToYMatrixRow_C;
ARGBToYMatrixRow(row, dst_y, width, uvconstants);
ARGBToYMatrixRow(row + row_size, dst_y + dst_stride_y, width,
uvconstants);
}
src_raw += src_stride_raw * 2;
dst_y += dst_stride_y * 2;
dst_vu += dst_stride_vu;
}
if (height & 1) {
#if defined(HAS_RAWTOYJROW)
RAWToUVJRow(src_raw, 0, row_uj, row_vj, width);
MergeUVRow(row_vj, row_uj, dst_vu, halfwidth);
RAWToYJRow(src_raw, dst_y, width);
#else
RAWToARGBRow(src_raw, row, width);
ARGBToUVJRow(row, 0, row_uj, row_vj, width);
MergeUVRow(row_vj, row_uj, dst_vu, halfwidth);
ARGBToYJRow(row, dst_y, width);
#endif
ARGBToUVMatrixRow(row, 0, row_u, row_v, width, uvconstants);
MergeUVRow(row_v, row_u, dst_vu, halfwidth);
if (RGBToYMatrixRow) {
RGBToYMatrixRow(src_raw, dst_y, width, argbconstants);
} else {
void (*ARGBToYMatrixRow)(const uint8_t* src_argb, uint8_t* dst_y,
int width, const struct ArgbConstants* c) =
ARGBToYMatrixRow_C;
ARGBToYMatrixRow(row, dst_y, width, uvconstants);
}
}
free_aligned_buffer_64(row_uj);
free_aligned_buffer_64(row_u);
}
return 0;
}
#undef HAS_RAWTOYJROW
// RAW to NV21.
LIBYUV_API
int RAWToNV21(const uint8_t* src_raw,
int src_stride_raw,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_vu,
int dst_stride_vu,
int width,
int height) {
return RAWToNV21Matrix(src_raw, src_stride_raw, dst_y, dst_stride_y, dst_vu,
dst_stride_vu, &kAbgrI601Constants, width, height);
}
// RAW to JNV21 full range NV21
LIBYUV_API
int RAWToJNV21(const uint8_t* src_raw,
int src_stride_raw,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_vu,
int dst_stride_vu,
int width,
int height) {
return RAWToNV21Matrix(src_raw, src_stride_raw, dst_y, dst_stride_y, dst_vu,
dst_stride_vu, &kAbgrJPEGConstants, width, height);
}
#ifdef __cplusplus
} // extern "C"

View File

@ -2321,7 +2321,7 @@ ANY12M(ARGBToUV444MatrixRow_Any_NEON, ARGBToUV444MatrixRow_NEON, 4, 7)
#ifdef HAS_ARGBTOYROW_SSSE3
ANY11MC(ARGBToYMatrixRow_Any_SSSE3, ARGBToYMatrixRow_SSSE3, 4, 15)
#endif
#ifdef HAS_ARGBTOYMATRIXROW_AVX2
#ifdef HAS_ARGBTOYROW_AVX2
ANY11MC(ARGBToYMatrixRow_Any_AVX2, ARGBToYMatrixRow_AVX2, 4, 31)
ANY11MC(RGBToYMatrixRow_Any_AVX2, RGBToYMatrixRow_AVX2, 3, 31)
#endif

View File

@ -782,6 +782,18 @@ static __inline uint8_t RGBToVMatrix(uint8_t r,
8;
}
void RGBToYMatrixRow_C(const uint8_t* src_rgb24,
uint8_t* dst_y,
int width,
const struct ArgbConstants* c) {
int x;
for (x = 0; x < width; ++x) {
dst_y[0] = RGBToYMatrix(src_rgb24[2], src_rgb24[1], src_rgb24[0], c);
src_rgb24 += 3;
dst_y += 1;
}
}
void ARGBToYMatrixRow_C(const uint8_t* src_argb,
uint8_t* dst_y,
int width,
@ -825,6 +837,38 @@ void ARGBToUVMatrixRow_C(const uint8_t* src_argb,
}
}
void RGBToUVMatrixRow_C(const uint8_t* src_rgb24,
int src_stride_rgb24,
uint8_t* dst_u,
uint8_t* dst_v,
int width,
const struct ArgbConstants* c) {
const uint8_t* src_rgb24_1 = src_rgb24 + src_stride_rgb24;
int x;
for (x = 0; x < width - 1; x += 2) {
uint8_t ab =
(src_rgb24[0] + src_rgb24[3] + src_rgb24_1[0] + src_rgb24_1[3] + 2) >> 2;
uint8_t ag =
(src_rgb24[1] + src_rgb24[4] + src_rgb24_1[1] + src_rgb24_1[4] + 2) >> 2;
uint8_t ar =
(src_rgb24[2] + src_rgb24[5] + src_rgb24_1[2] + src_rgb24_1[5] + 2) >> 2;
dst_u[0] = RGBToUMatrix(ar, ag, ab, c);
dst_v[0] = RGBToVMatrix(ar, ag, ab, c);
src_rgb24 += 6;
src_rgb24_1 += 6;
dst_u += 1;
dst_v += 1;
}
if (width & 1) {
uint8_t ab = (src_rgb24[0] + src_rgb24_1[0] + 1) >> 1;
uint8_t ag = (src_rgb24[1] + src_rgb24_1[1] + 1) >> 1;
uint8_t ar = (src_rgb24[2] + src_rgb24_1[2] + 1) >> 1;
dst_u[0] = RGBToUMatrix(ar, ag, ab, c);
dst_v[0] = RGBToVMatrix(ar, ag, ab, c);
}
}
void ARGBToUV444MatrixRow_C(const uint8_t* src_argb,
uint8_t* dst_u,
uint8_t* dst_v,

View File

@ -1238,7 +1238,7 @@ void BGRAToYRow_RVV(const uint8_t* src_bgra, uint8_t* dst_y, int width) {
#endif
#ifdef HAS_RGBTOYMATRIXROW_RVV
static void RGBToYMatrixRow_RVV(const uint8_t* src_rgb,
void RGBToYMatrixRow_RVV(const uint8_t* src_rgb,
uint8_t* dst_y,
int width,
const struct ArgbConstants* c) {

View File

@ -2901,14 +2901,14 @@ TEST_F(LibYUVConvertTest, TestARGBToUVRow_Any) {
#if defined(HAS_ARGBTOUVROW_AVX2)
int has_avx2 = TestCpuFlag(kCpuHasAVX2);
if (has_avx2) {
ARGBToUVRow_AVX2(&orig_argb_pixels[0], 0, &dest_u_opt[0], &dest_v_opt[0], kWidth);
ARGBToUVRow_Any_AVX2(&orig_argb_pixels[0], 0, &dest_u_opt[0], &dest_v_opt[0], kWidth);
} else {
ARGBToUVRow_C(&orig_argb_pixels[0], 0, &dest_u_opt[0], &dest_v_opt[0], kWidth);
}
#elif defined(HAS_ARGBTOUVROW_NEON)
ARGBToUVRow_NEON(&orig_argb_pixels[0], 0, &dest_u_opt[0], &dest_v_opt[0], kWidth);
ARGBToUVRow_Any_NEON(&orig_argb_pixels[0], 0, &dest_u_opt[0], &dest_v_opt[0], kWidth);
#elif defined(HAS_ARGBTOUVROW_RVV)
ARGBToUVRow_RVV(&orig_argb_pixels[0], 0, &dest_u_opt[0], &dest_v_opt[0], kWidth);
ARGBToUVRow_Any_RVV(&orig_argb_pixels[0], 0, &dest_u_opt[0], &dest_v_opt[0], kWidth);
#else
ARGBToUVRow_C(&orig_argb_pixels[0], 0, &dest_u_opt[0], &dest_v_opt[0], kWidth);
#endif

View File

@ -826,6 +826,7 @@ TESTATOBP(ARGB, 1, 4, NV21, 2, 2)
TESTATOBP(ABGR, 1, 4, NV12, 2, 2)
TESTATOBP(ABGR, 1, 4, NV21, 2, 2)
TESTATOBP(RAW, 1, 3, JNV21, 2, 2)
TESTATOBP(RAW, 1, 3, NV21, 2, 2)
TESTATOBP(YUY2, 2, 4, NV12, 2, 2)
TESTATOBP(UYVY, 2, 4, NV12, 2, 2)
TESTATOBP(AYUV, 1, 4, NV12, 2, 2)