Add 12 bit YUV to 10 bit RGB

Bug: libyuv:843
Change-Id: I0104c8fcaeed09e83d2fd654c6a5e7d41bcb74cf
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/2727775
Reviewed-by: Frank Barchard <fbarchard@chromium.org>
Reviewed-by: Wan-Teh Chang <wtc@google.com>
This commit is contained in:
Frank Barchard 2021-03-04 12:33:02 -08:00 committed by Frank Barchard
parent 95ff456c33
commit ba033a11e3
14 changed files with 1127 additions and 517 deletions

View File

@ -4,7 +4,9 @@ Formats (FOURCC) supported by libyuv are detailed here.
# Core Formats # Core Formats
There are 2 core formats supported by libyuv - I420 and ARGB. All YUV formats can be converted to/from I420. All RGB formats can be converted to/from ARGB. There are 2 core formats supported by libyuv - I420 and ARGB.
All YUV formats can be converted to/from I420.
All RGB formats can be converted to/from ARGB.
Filtering functions such as scaling and planar functions work on I420 and/or ARGB. Filtering functions such as scaling and planar functions work on I420 and/or ARGB.
@ -109,6 +111,27 @@ The following is extracted from video_common.h as a complete list of formats sup
I444, NV24 and NV42 are full width, full height I444, NV24 and NV42 are full width, full height
I400 and J400 have no chroma channel. I400 and J400 have no chroma channel.
# Color space
The YUV formats start with a letter to specify the color space. e.g. I420
I = BT.601 limited range
J = BT.601 full range (J = JPEG that uses this)
H = BT.709 limited range (H for HD)
F = BT.709 full range (F for Full range)
U = BT.2020 limited range (U for UHD)
V = BT.2020 full range
For YUV to RGB conversions, a matrix can be passed. See also convert_argh.h
# HDR formats
Planar formats with 10 or 12 bits use the following fourcc:
I010, I012, P010, P012 are half width, half height
I210, I212, P210, P212 are half width, full height
I410, I412, P410, P412 are full width, full height
where
I is the color space (see above) and 3 planes: Y, U and V.
P is a biplanar format, similar to NV12 but 16 bits, with the valid bits in the high bits. There is a Y plane and a UV plane.
0, 2 or 4 is the last digit of subsampling: 4:2:0, 4:2:2, or 4:4:4
10 or 12 is the bits per channel. The bits are in the low bits of a 16 bit channel.
# The ARGB FOURCC # The ARGB FOURCC
There are 4 ARGB layouts - ARGB, BGRA, ABGR and RGBA. ARGB is most common by far, used for screen formats, and windows webcam drivers. There are 4 ARGB layouts - ARGB, BGRA, ABGR and RGBA. ARGB is most common by far, used for screen formats, and windows webcam drivers.

View File

@ -1488,6 +1488,34 @@ int I010ToARGBMatrix(const uint16_t* src_y,
int width, int width,
int height); int height);
// multiply 12 bit yuv into high bits to allow any number of bits.
LIBYUV_API
int I012ToAR30Matrix(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_ar30,
int dst_stride_ar30,
const struct YuvConstants* yuvconstants,
int width,
int height);
// Convert 12 bit YUV to ARGB with matrix.
LIBYUV_API
int I012ToARGBMatrix(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height);
// Convert 10 bit 422 YUV to ARGB with matrix. // Convert 10 bit 422 YUV to ARGB with matrix.
LIBYUV_API LIBYUV_API
int I210ToARGBMatrix(const uint16_t* src_y, int I210ToARGBMatrix(const uint16_t* src_y,

View File

@ -282,6 +282,8 @@ extern "C" {
#define HAS_HALFMERGEUVROW_SSSE3 #define HAS_HALFMERGEUVROW_SSSE3
#define HAS_I210TOAR30ROW_SSSE3 #define HAS_I210TOAR30ROW_SSSE3
#define HAS_I210TOARGBROW_SSSE3 #define HAS_I210TOARGBROW_SSSE3
#define HAS_I212TOAR30ROW_SSSE3
#define HAS_I212TOARGBROW_SSSE3
#define HAS_I400TOARGBROW_SSE2 #define HAS_I400TOARGBROW_SSE2
#define HAS_I422TOAR30ROW_SSSE3 #define HAS_I422TOAR30ROW_SSSE3
#define HAS_I410TOAR30ROW_SSSE3 #define HAS_I410TOAR30ROW_SSSE3
@ -320,6 +322,8 @@ extern "C" {
#define HAS_MERGEARGBROW_AVX2 #define HAS_MERGEARGBROW_AVX2
#define HAS_I210TOAR30ROW_AVX2 #define HAS_I210TOAR30ROW_AVX2
#define HAS_I210TOARGBROW_AVX2 #define HAS_I210TOARGBROW_AVX2
#define HAS_I212TOAR30ROW_AVX2
#define HAS_I212TOARGBROW_AVX2
#define HAS_I400TOARGBROW_AVX2 #define HAS_I400TOARGBROW_AVX2
#define HAS_I410TOAR30ROW_AVX2 #define HAS_I410TOAR30ROW_AVX2
#define HAS_I410TOARGBROW_AVX2 #define HAS_I410TOARGBROW_AVX2
@ -721,9 +725,15 @@ struct YuvConstants {
#else #else
// This struct is for Intel color conversion. // This struct is for Intel color conversion.
struct YuvConstants { struct YuvConstants {
#if LIBYUV_UNLIMITED_DATA
uint8_t kUVToB[32];
uint8_t kUVToG[32];
uint8_t kUVToR[32];
#else
int8_t kUVToB[32]; int8_t kUVToB[32];
int8_t kUVToG[32]; int8_t kUVToG[32];
int8_t kUVToR[32]; int8_t kUVToR[32];
#endif
int16_t kUVBiasB[16]; int16_t kUVBiasB[16];
int16_t kUVBiasG[16]; int16_t kUVBiasG[16];
int16_t kUVBiasR[16]; int16_t kUVBiasR[16];
@ -2040,10 +2050,10 @@ void MergeUVRow_16_AVX2(const uint16_t* src_u,
int depth, int depth,
int width); int width);
void MergeUVRow_16_Any_AVX2(const uint16_t* src_u, void MergeUVRow_16_Any_AVX2(const uint16_t* src_u,
const uint16_t* src_v, const uint16_t* src_v,
uint16_t* dst_uv, uint16_t* dst_uv,
int depth, int depth,
int width); int width);
void MergeUVRow_16_NEON(const uint16_t* src_u, void MergeUVRow_16_NEON(const uint16_t* src_u,
const uint16_t* src_v, const uint16_t* src_v,
uint16_t* dst_uv, uint16_t* dst_uv,
@ -2591,6 +2601,18 @@ void I210ToARGBRow_C(const uint16_t* src_y,
uint8_t* rgb_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width); int width);
void I212ToAR30Row_C(const uint16_t* src_y,
const uint16_t* src_u,
const uint16_t* src_v,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width);
void I212ToARGBRow_C(const uint16_t* src_y,
const uint16_t* src_u,
const uint16_t* src_v,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width);
void I410ToAR30Row_C(const uint16_t* src_y, void I410ToAR30Row_C(const uint16_t* src_y,
const uint16_t* src_u, const uint16_t* src_u,
const uint16_t* src_v, const uint16_t* src_v,
@ -2617,7 +2639,6 @@ void I410AlphaToARGBRow_C(const uint16_t* src_y,
uint8_t* rgb_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width); int width);
void I444AlphaToARGBRow_C(const uint8_t* src_y, void I444AlphaToARGBRow_C(const uint8_t* src_y,
const uint8_t* src_u, const uint8_t* src_u,
const uint8_t* src_v, const uint8_t* src_v,
@ -2769,6 +2790,18 @@ void I210ToARGBRow_SSSE3(const uint16_t* y_buf,
uint8_t* dst_argb, uint8_t* dst_argb,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width); int width);
void I212ToAR30Row_SSSE3(const uint16_t* y_buf,
const uint16_t* u_buf,
const uint16_t* v_buf,
uint8_t* dst_ar30,
const struct YuvConstants* yuvconstants,
int width);
void I212ToARGBRow_SSSE3(const uint16_t* y_buf,
const uint16_t* u_buf,
const uint16_t* v_buf,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I410ToAR30Row_SSSE3(const uint16_t* src_y, void I410ToAR30Row_SSSE3(const uint16_t* src_y,
const uint16_t* src_u, const uint16_t* src_u,
const uint16_t* src_v, const uint16_t* src_v,
@ -2813,6 +2846,18 @@ void I210ToAR30Row_AVX2(const uint16_t* y_buf,
uint8_t* dst_ar30, uint8_t* dst_ar30,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width); int width);
void I212ToARGBRow_AVX2(const uint16_t* y_buf,
const uint16_t* u_buf,
const uint16_t* v_buf,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I212ToAR30Row_AVX2(const uint16_t* y_buf,
const uint16_t* u_buf,
const uint16_t* v_buf,
uint8_t* dst_ar30,
const struct YuvConstants* yuvconstants,
int width);
void I410ToAR30Row_AVX2(const uint16_t* src_y, void I410ToAR30Row_AVX2(const uint16_t* src_y,
const uint16_t* src_u, const uint16_t* src_u,
const uint16_t* src_v, const uint16_t* src_v,
@ -3081,6 +3126,18 @@ void I210ToARGBRow_Any_SSSE3(const uint16_t* y_buf,
uint8_t* dst_ptr, uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width); int width);
void I212ToAR30Row_Any_SSSE3(const uint16_t* y_buf,
const uint16_t* u_buf,
const uint16_t* v_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
void I212ToARGBRow_Any_SSSE3(const uint16_t* y_buf,
const uint16_t* u_buf,
const uint16_t* v_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
void I410ToAR30Row_Any_SSSE3(const uint16_t* src_y, void I410ToAR30Row_Any_SSSE3(const uint16_t* src_y,
const uint16_t* src_u, const uint16_t* src_u,
const uint16_t* src_v, const uint16_t* src_v,
@ -3125,6 +3182,18 @@ void I210ToAR30Row_Any_AVX2(const uint16_t* y_buf,
uint8_t* dst_ptr, uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width); int width);
void I212ToARGBRow_Any_AVX2(const uint16_t* y_buf,
const uint16_t* u_buf,
const uint16_t* v_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
void I212ToAR30Row_Any_AVX2(const uint16_t* y_buf,
const uint16_t* u_buf,
const uint16_t* v_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
void I410ToAR30Row_Any_AVX2(const uint16_t* src_y, void I410ToAR30Row_Any_AVX2(const uint16_t* src_y,
const uint16_t* src_u, const uint16_t* src_u,
const uint16_t* src_v, const uint16_t* src_v,
@ -3788,25 +3857,25 @@ void UYVYToARGBRow_Any_NEON(const uint8_t* src_ptr,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width); int width);
void P210ToARGBRow_NEON(const uint16_t* y_buf, void P210ToARGBRow_NEON(const uint16_t* y_buf,
const uint16_t* uv_buf, const uint16_t* uv_buf,
uint8_t* dst_argb, uint8_t* dst_argb,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width); int width);
void P410ToARGBRow_NEON(const uint16_t* y_buf, void P410ToARGBRow_NEON(const uint16_t* y_buf,
const uint16_t* uv_buf, const uint16_t* uv_buf,
uint8_t* dst_argb, uint8_t* dst_argb,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width); int width);
void P210ToAR30Row_NEON(const uint16_t* y_buf, void P210ToAR30Row_NEON(const uint16_t* y_buf,
const uint16_t* uv_buf, const uint16_t* uv_buf,
uint8_t* dst_ar30, uint8_t* dst_ar30,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width); int width);
void P410ToAR30Row_NEON(const uint16_t* y_buf, void P410ToAR30Row_NEON(const uint16_t* y_buf,
const uint16_t* uv_buf, const uint16_t* uv_buf,
uint8_t* dst_ar30, uint8_t* dst_ar30,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width); int width);
void P210ToARGBRow_Any_NEON(const uint16_t* y_buf, void P210ToARGBRow_Any_NEON(const uint16_t* y_buf,
const uint16_t* uv_buf, const uint16_t* uv_buf,
uint8_t* dst_argb, uint8_t* dst_argb,

View File

@ -626,13 +626,13 @@ void ScaleRowUp2_Bilinear_12_SSSE3(const uint16_t* src_ptr,
ptrdiff_t dst_stride, ptrdiff_t dst_stride,
int dst_width); int dst_width);
void ScaleRowUp2_Linear_16_SSE2(const uint16_t* src_ptr, void ScaleRowUp2_Linear_16_SSE2(const uint16_t* src_ptr,
uint16_t* dst_ptr, uint16_t* dst_ptr,
int dst_width); int dst_width);
void ScaleRowUp2_Bilinear_16_SSE2(const uint16_t* src_ptr, void ScaleRowUp2_Bilinear_16_SSE2(const uint16_t* src_ptr,
ptrdiff_t src_stride, ptrdiff_t src_stride,
uint16_t* dst_ptr, uint16_t* dst_ptr,
ptrdiff_t dst_stride, ptrdiff_t dst_stride,
int dst_width); int dst_width);
void ScaleRowUp2_Linear_SSSE3(const uint8_t* src_ptr, void ScaleRowUp2_Linear_SSSE3(const uint8_t* src_ptr,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int dst_width); int dst_width);
@ -682,8 +682,8 @@ void ScaleRowUp2_Bilinear_12_Any_SSSE3(const uint16_t* src_ptr,
ptrdiff_t dst_stride, ptrdiff_t dst_stride,
int dst_width); int dst_width);
void ScaleRowUp2_Linear_16_Any_SSE2(const uint16_t* src_ptr, void ScaleRowUp2_Linear_16_Any_SSE2(const uint16_t* src_ptr,
uint16_t* dst_ptr, uint16_t* dst_ptr,
int dst_width); int dst_width);
void ScaleRowUp2_Bilinear_16_Any_SSSE3(const uint16_t* src_ptr, void ScaleRowUp2_Bilinear_16_Any_SSSE3(const uint16_t* src_ptr,
ptrdiff_t src_stride, ptrdiff_t src_stride,
uint16_t* dst_ptr, uint16_t* dst_ptr,

View File

@ -888,6 +888,63 @@ int U010ToAB30(const uint16_t* src_y,
&kYuv2020Constants, width, height); &kYuv2020Constants, width, height);
} }
// Convert 12 bit YUV to ARGB with matrix.
// TODO(fbarchard): Consider passing scale multiplier to I212ToARGB to
// multiply 12 bit yuv into high bits to allow any number of bits.
LIBYUV_API
int I012ToAR30Matrix(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_ar30,
int dst_stride_ar30,
const struct YuvConstants* yuvconstants,
int width,
int height) {
int y;
void (*I212ToAR30Row)(const uint16_t* y_buf, const uint16_t* u_buf,
const uint16_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I212ToAR30Row_C;
if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_ar30 = dst_ar30 + (height - 1) * dst_stride_ar30;
dst_stride_ar30 = -dst_stride_ar30;
}
#if defined(HAS_I212TOAR30ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I212ToAR30Row = I212ToAR30Row_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
I212ToAR30Row = I212ToAR30Row_SSSE3;
}
}
#endif
#if defined(HAS_I212TOAR30ROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
I212ToAR30Row = I212ToAR30Row_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
I212ToAR30Row = I212ToAR30Row_AVX2;
}
}
#endif
for (y = 0; y < height; ++y) {
I212ToAR30Row(src_y, src_u, src_v, dst_ar30, yuvconstants, width);
dst_ar30 += dst_stride_ar30;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
return 0;
}
// Convert 10 bit YUV to ARGB with matrix. // Convert 10 bit YUV to ARGB with matrix.
// TODO(fbarchard): Consider passing scale multiplier to I210ToARGB to // TODO(fbarchard): Consider passing scale multiplier to I210ToARGB to
// multiply 10 bit yuv into high bits to allow any number of bits. // multiply 10 bit yuv into high bits to allow any number of bits.
@ -1061,7 +1118,7 @@ int I410ToAR30Matrix(const uint16_t* src_y,
void (*I410ToAR30Row)(const uint16_t* y_buf, const uint16_t* u_buf, void (*I410ToAR30Row)(const uint16_t* y_buf, const uint16_t* u_buf,
const uint16_t* v_buf, uint8_t* rgb_buf, const uint16_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) = const struct YuvConstants* yuvconstants, int width) =
I410ToAR30Row_C; I410ToAR30Row_C;
if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) { if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) {
return -1; return -1;
} }
@ -1260,6 +1317,61 @@ int U010ToABGR(const uint16_t* src_y,
width, height); width, height);
} }
// Convert 12 bit YUV to ARGB with matrix.
LIBYUV_API
int I012ToARGBMatrix(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height) {
int y;
void (*I212ToARGBRow)(const uint16_t* y_buf, const uint16_t* u_buf,
const uint16_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I212ToARGBRow_C;
if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
#if defined(HAS_I212TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I212ToARGBRow = I212ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
I212ToARGBRow = I212ToARGBRow_SSSE3;
}
}
#endif
#if defined(HAS_I212TOARGBROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
I212ToARGBRow = I212ToARGBRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
I212ToARGBRow = I212ToARGBRow_AVX2;
}
}
#endif
for (y = 0; y < height; ++y) {
I212ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
return 0;
}
// Convert 10 bit 422 YUV to ARGB with matrix. // Convert 10 bit 422 YUV to ARGB with matrix.
LIBYUV_API LIBYUV_API
int I210ToARGBMatrix(const uint16_t* src_y, int I210ToARGBMatrix(const uint16_t* src_y,
@ -1437,7 +1549,7 @@ int I410ToARGBMatrix(const uint16_t* src_y,
void (*I410ToARGBRow)(const uint16_t* y_buf, const uint16_t* u_buf, void (*I410ToARGBRow)(const uint16_t* y_buf, const uint16_t* u_buf,
const uint16_t* v_buf, uint8_t* rgb_buf, const uint16_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) = const struct YuvConstants* yuvconstants, int width) =
I410ToARGBRow_C; I410ToARGBRow_C;
if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
return -1; return -1;
} }
@ -1484,9 +1596,9 @@ int P010ToARGBMatrix(const uint16_t* src_y,
int width, int width,
int height) { int height) {
int y; int y;
void (*P210ToARGBRow)(const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf, void (*P210ToARGBRow)(
const struct YuvConstants* yuvconstants, int width) = const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf,
P210ToARGBRow_C; const struct YuvConstants* yuvconstants, int width) = P210ToARGBRow_C;
if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) { if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) {
return -1; return -1;
} }
@ -1534,9 +1646,9 @@ int P210ToARGBMatrix(const uint16_t* src_y,
int width, int width,
int height) { int height) {
int y; int y;
void (*P210ToARGBRow)(const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf, void (*P210ToARGBRow)(
const struct YuvConstants* yuvconstants, int width) = const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf,
P210ToARGBRow_C; const struct YuvConstants* yuvconstants, int width) = P210ToARGBRow_C;
if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) { if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) {
return -1; return -1;
} }
@ -1582,9 +1694,9 @@ int P010ToAR30Matrix(const uint16_t* src_y,
int width, int width,
int height) { int height) {
int y; int y;
void (*P210ToAR30Row)(const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf, void (*P210ToAR30Row)(
const struct YuvConstants* yuvconstants, int width) = const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf,
P210ToAR30Row_C; const struct YuvConstants* yuvconstants, int width) = P210ToAR30Row_C;
if (!src_y || !src_uv || !dst_ar30 || width <= 0 || height == 0) { if (!src_y || !src_uv || !dst_ar30 || width <= 0 || height == 0) {
return -1; return -1;
} }
@ -1632,9 +1744,9 @@ int P210ToAR30Matrix(const uint16_t* src_y,
int width, int width,
int height) { int height) {
int y; int y;
void (*P210ToAR30Row)(const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf, void (*P210ToAR30Row)(
const struct YuvConstants* yuvconstants, int width) = const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf,
P210ToAR30Row_C; const struct YuvConstants* yuvconstants, int width) = P210ToAR30Row_C;
if (!src_y || !src_uv || !dst_ar30 || width <= 0 || height == 0) { if (!src_y || !src_uv || !dst_ar30 || width <= 0 || height == 0) {
return -1; return -1;
} }

View File

@ -138,19 +138,47 @@ ANY41C(I422AlphaToARGBRow_Any_MMI, I422AlphaToARGBRow_MMI, 1, 0, 4, 7)
} }
#ifdef HAS_I210ALPHATOARGBROW_SSSE3 #ifdef HAS_I210ALPHATOARGBROW_SSSE3
ANY41CT(I210AlphaToARGBRow_Any_SSSE3, I210AlphaToARGBRow_SSSE3, 1, 0, uint16_t, 2, 4, 7) ANY41CT(I210AlphaToARGBRow_Any_SSSE3,
I210AlphaToARGBRow_SSSE3,
1,
0,
uint16_t,
2,
4,
7)
#endif #endif
#ifdef HAS_I210ALPHATOARGBROW_AVX2 #ifdef HAS_I210ALPHATOARGBROW_AVX2
ANY41CT(I210AlphaToARGBRow_Any_AVX2, I210AlphaToARGBRow_AVX2, 1, 0, uint16_t, 2, 4, 15) ANY41CT(I210AlphaToARGBRow_Any_AVX2,
I210AlphaToARGBRow_AVX2,
1,
0,
uint16_t,
2,
4,
15)
#endif #endif
#ifdef HAS_I410ALPHATOARGBROW_SSSE3 #ifdef HAS_I410ALPHATOARGBROW_SSSE3
ANY41CT(I410AlphaToARGBRow_Any_SSSE3, I410AlphaToARGBRow_SSSE3, 0, 0, uint16_t, 2, 4, 7) ANY41CT(I410AlphaToARGBRow_Any_SSSE3,
I410AlphaToARGBRow_SSSE3,
0,
0,
uint16_t,
2,
4,
7)
#endif #endif
#ifdef HAS_I410ALPHATOARGBROW_AVX2 #ifdef HAS_I410ALPHATOARGBROW_AVX2
ANY41CT(I410AlphaToARGBRow_Any_AVX2, I410AlphaToARGBRow_AVX2, 0, 0, uint16_t, 2, 4, 15) ANY41CT(I410AlphaToARGBRow_Any_AVX2,
I410AlphaToARGBRow_AVX2,
0,
0,
uint16_t,
2,
4,
15)
#endif #endif
#undef ANY41CT #undef ANY41CT
@ -382,6 +410,18 @@ ANY31CT(I410ToAR30Row_Any_AVX2, I410ToAR30Row_AVX2, 0, 0, uint16_t, 2, 4, 15)
#ifdef HAS_I210TOARGBROW_MMI #ifdef HAS_I210TOARGBROW_MMI
ANY31CT(I210ToARGBRow_Any_MMI, I210ToARGBRow_MMI, 1, 0, uint16_t, 2, 4, 7) ANY31CT(I210ToARGBRow_Any_MMI, I210ToARGBRow_MMI, 1, 0, uint16_t, 2, 4, 7)
#endif #endif
#ifdef HAS_I212TOAR30ROW_SSSE3
ANY31CT(I212ToAR30Row_Any_SSSE3, I212ToAR30Row_SSSE3, 1, 0, uint16_t, 2, 4, 7)
#endif
#ifdef HAS_I212TOARGBROW_SSSE3
ANY31CT(I212ToARGBRow_Any_SSSE3, I212ToARGBRow_SSSE3, 1, 0, uint16_t, 2, 4, 7)
#endif
#ifdef HAS_I212TOARGBROW_AVX2
ANY31CT(I212ToARGBRow_Any_AVX2, I212ToARGBRow_AVX2, 1, 0, uint16_t, 2, 4, 15)
#endif
#ifdef HAS_I212TOAR30ROW_AVX2
ANY31CT(I212ToAR30Row_Any_AVX2, I212ToAR30Row_AVX2, 1, 0, uint16_t, 2, 4, 15)
#endif
#undef ANY31CT #undef ANY31CT
// Any 2 planes to 1. // Any 2 planes to 1.

View File

@ -21,6 +21,11 @@ namespace libyuv {
extern "C" { extern "C" {
#endif #endif
// These 2 macros control YUV to RGB using unsigned math to extend range.
// They can be used separately to enable new code and old data (clamped)
// LIBYUV_UNLIMITED_DATA
// LIBYUV_UNLIMITED_CODE
// The following ifdef from row_win makes the C code match the row_win code, // The following ifdef from row_win makes the C code match the row_win code,
// which is 7 bit fixed point. // which is 7 bit fixed point.
#if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) && \ #if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) && \
@ -1395,7 +1400,11 @@ void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width) {
// KR = 0.299; KB = 0.114 // KR = 0.299; KB = 0.114
// U and V contributions to R,G,B. // U and V contributions to R,G,B.
#if LIBYUV_UNLIMITED_DATA
#define UB 129 /* round(2.018 * 64) */
#else
#define UB 128 /* max(128, round(2.018 * 64)) */ #define UB 128 /* max(128, round(2.018 * 64)) */
#endif
#define UG 25 /* round(0.391 * 64) */ #define UG 25 /* round(0.391 * 64) */
#define VG 52 /* round(0.813 * 64) */ #define VG 52 /* round(0.813 * 64) */
#define VR 102 /* round(1.596 * 64) */ #define VR 102 /* round(1.596 * 64) */
@ -1444,9 +1453,12 @@ MAKEYUVCONSTANTS(JPEG, YG, YB, UB, UG, VG, VR, BB, BG, BR)
// B = (Y - 16) * 1.164 + U * 2.112 // B = (Y - 16) * 1.164 + U * 2.112
// KR = 0.2126, KB = 0.0722 // KR = 0.2126, KB = 0.0722
// TODO(fbarchard): Find way to express 2.112 instead of 2.0.
// U and V contributions to R,G,B. // U and V contributions to R,G,B.
#if LIBYUV_UNLIMITED_DATA
#define UB 135 /* round(2.112 * 64) */
#else
#define UB 128 /* max(128, round(2.112 * 64)) */ #define UB 128 /* max(128, round(2.112 * 64)) */
#endif
#define UG 14 /* round(0.213 * 64) */ #define UG 14 /* round(0.213 * 64) */
#define VG 34 /* round(0.533 * 64) */ #define VG 34 /* round(0.533 * 64) */
#define VR 115 /* round(1.793 * 64) */ #define VR 115 /* round(1.793 * 64) */
@ -1495,9 +1507,12 @@ MAKEYUVCONSTANTS(F709, YG, YB, UB, UG, VG, VR, BB, BG, BR)
// B = (Y - 16) * 1.164384 + U * 2.14177 // B = (Y - 16) * 1.164384 + U * 2.14177
// KR = 0.2627; KB = 0.0593 // KR = 0.2627; KB = 0.0593
// TODO(fbarchard): Improve accuracy; the B channel is off by 7%.
// U and V contributions to R,G,B. // U and V contributions to R,G,B.
#if LIBYUV_UNLIMITED_DATA
#define UB 137 /* round(2.142 * 64) */
#else
#define UB 128 /* max(128, round(2.142 * 64)) */ #define UB 128 /* max(128, round(2.142 * 64)) */
#endif
#define UG 12 /* round(0.187326 * 64) */ #define UG 12 /* round(0.187326 * 64) */
#define VG 42 /* round(0.65042 * 64) */ #define VG 42 /* round(0.65042 * 64) */
#define VR 107 /* round(1.67867 * 64) */ #define VR 107 /* round(1.67867 * 64) */
@ -1545,15 +1560,61 @@ MAKEYUVCONSTANTS(V2020, YG, YB, UB, UG, VG, VR, BB, BG, BR)
#undef MAKEYUVCONSTANTS #undef MAKEYUVCONSTANTS
#if LIBYUV_UNLIMITED_DATA
// C reference code that mimics the YUV assembly.
// Reads 8 bit YUV and leaves result as 16 bit.
static __inline void YuvPixel(uint8_t y,
uint8_t u,
uint8_t v,
uint8_t* b,
uint8_t* g,
uint8_t* r,
const struct YuvConstants* yuvconstants) {
#if defined(__aarch64__)
int ub = yuvconstants->kUVToRB[0];
int ug = yuvconstants->kUVToG[0];
int vg = yuvconstants->kUVToG[1];
int vr = yuvconstants->kUVToRB[1];
int bb = yuvconstants->kUVBiasBGR[0];
int bg = yuvconstants->kUVBiasBGR[1];
int br = yuvconstants->kUVBiasBGR[2];
int yg = yuvconstants->kYToRgb[1];
#elif defined(__arm__)
int ub = yuvconstants->kUVToRB[0];
int ug = yuvconstants->kUVToG[0];
int vg = yuvconstants->kUVToG[4];
int vr = yuvconstants->kUVToRB[4];
int bb = yuvconstants->kUVBiasBGR[0];
int bg = yuvconstants->kUVBiasBGR[1];
int br = yuvconstants->kUVBiasBGR[2];
int yg = yuvconstants->kYToRgb[1];
#else
int ub = -yuvconstants->kUVToB[0];
int ug = yuvconstants->kUVToG[0];
int vg = yuvconstants->kUVToG[1];
int vr = -yuvconstants->kUVToR[1];
int bb = yuvconstants->kUVBiasB[0];
int bg = yuvconstants->kUVBiasG[0];
int br = yuvconstants->kUVBiasR[0];
int yg = yuvconstants->kYToRgb[0];
#endif
uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16;
*b = Clamp((int32_t)(y1 + (u * ub) + bb) >> 6);
*g = Clamp((int32_t)(y1 - (u * ug + v * vg) + bg) >> 6);
*r = Clamp((int32_t)(y1 + (v * vr) + br) >> 6);
}
#else
// C reference code that mimics the YUV assembly. // C reference code that mimics the YUV assembly.
// Reads 8 bit YUV and leaves result as 8 bit. // Reads 8 bit YUV and leaves result as 8 bit.
static __inline void YuvPixel8_8(uint8_t y, static __inline void YuvPixel(uint8_t y,
uint8_t u, uint8_t u,
uint8_t v, uint8_t v,
uint8_t* b, uint8_t* b,
uint8_t* g, uint8_t* g,
uint8_t* r, uint8_t* r,
const struct YuvConstants* yuvconstants) { const struct YuvConstants* yuvconstants) {
#if defined(__aarch64__) #if defined(__aarch64__)
int ub = -yuvconstants->kUVToRB[0]; int ub = -yuvconstants->kUVToRB[0];
int ug = yuvconstants->kUVToG[0]; int ug = yuvconstants->kUVToG[0];
@ -1584,10 +1645,11 @@ static __inline void YuvPixel8_8(uint8_t y,
#endif #endif
uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16; uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16;
*b = Clamp((int32_t)(y1 + -(u * ub) + bb) >> 6); *b = Clamp((int32_t)(y1 - (u * ub) + bb) >> 6);
*g = Clamp((int32_t)(y1 + -(u * ug + v * vg) + bg) >> 6); *g = Clamp((int32_t)(y1 - (u * ug + v * vg) + bg) >> 6);
*r = Clamp((int32_t)(y1 + -(v * vr) + br) >> 6); *r = Clamp((int32_t)(y1 - (v * vr) + br) >> 6);
} }
#endif
// Reads 8 bit YUV and leaves result as 16 bit. // Reads 8 bit YUV and leaves result as 16 bit.
static __inline void YuvPixel8_16(uint8_t y, static __inline void YuvPixel8_16(uint8_t y,
@ -1627,9 +1689,9 @@ static __inline void YuvPixel8_16(uint8_t y,
#endif #endif
uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16; uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16;
*b = (int)(-(u * ub) + y1 + bb); *b = (int)(y1 - (u * ub) + bb);
*g = (int)(-(u * ug + v * vg) + y1 + bg); *g = (int)(y1 - (u * ug + v * vg) + bg);
*r = (int)(-(v * vr) + y1 + br); *r = (int)(y1 - (v * vr) + br);
} }
// C reference code that mimics the YUV 16 bit assembly. // C reference code that mimics the YUV 16 bit assembly.
@ -1678,15 +1740,61 @@ static __inline void YuvPixel10_16(uint16_t y,
*r = (int)(-(v * vr) + y1 + br); *r = (int)(-(v * vr) + y1 + br);
} }
// C reference code that mimics the YUV 16 bit assembly.
// Reads 12 bit YUV and leaves result as 16 bit.
static __inline void YuvPixel12_16(int16_t y,
int16_t u,
int16_t v,
int* b,
int* g,
int* r,
const struct YuvConstants* yuvconstants) {
#if defined(__aarch64__)
int ub = -yuvconstants->kUVToRB[0];
int ug = yuvconstants->kUVToG[0];
int vg = yuvconstants->kUVToG[1];
int vr = -yuvconstants->kUVToRB[1];
int bb = yuvconstants->kUVBiasBGR[0];
int bg = yuvconstants->kUVBiasBGR[1];
int br = yuvconstants->kUVBiasBGR[2];
int yg = yuvconstants->kYToRgb[1];
#elif defined(__arm__)
int ub = -yuvconstants->kUVToRB[0];
int ug = yuvconstants->kUVToG[0];
int vg = yuvconstants->kUVToG[4];
int vr = -yuvconstants->kUVToRB[4];
int bb = yuvconstants->kUVBiasBGR[0];
int bg = yuvconstants->kUVBiasBGR[1];
int br = yuvconstants->kUVBiasBGR[2];
int yg = yuvconstants->kYToRgb[1];
#else
int ub = yuvconstants->kUVToB[0];
int ug = yuvconstants->kUVToG[0];
int vg = yuvconstants->kUVToG[1];
int vr = yuvconstants->kUVToR[1];
int bb = yuvconstants->kUVBiasB[0];
int bg = yuvconstants->kUVBiasG[0];
int br = yuvconstants->kUVBiasR[0];
int yg = yuvconstants->kYToRgb[0];
#endif
uint32_t y1 = (uint32_t)((y << 4) * yg) >> 16;
u = clamp255(u >> 4);
v = clamp255(v >> 4);
*b = (int)(-(u * ub) + y1 + bb);
*g = (int)(-(u * ug + v * vg) + y1 + bg);
*r = (int)(-(v * vr) + y1 + br);
}
// C reference code that mimics the YUV 10 bit assembly. // C reference code that mimics the YUV 10 bit assembly.
// Reads 10 bit YUV and clamps down to 8 bit RGB. // Reads 10 bit YUV and clamps down to 8 bit RGB.
static __inline void YuvPixel10_8(uint16_t y, static __inline void YuvPixel10(uint16_t y,
uint16_t u, uint16_t u,
uint16_t v, uint16_t v,
uint8_t* b, uint8_t* b,
uint8_t* g, uint8_t* g,
uint8_t* r, uint8_t* r,
const struct YuvConstants* yuvconstants) { const struct YuvConstants* yuvconstants) {
int b16; int b16;
int g16; int g16;
int r16; int r16;
@ -1696,6 +1804,24 @@ static __inline void YuvPixel10_8(uint16_t y,
*r = Clamp(r16 >> 6); *r = Clamp(r16 >> 6);
} }
// C reference code that mimics the YUV 12 bit assembly.
// Reads 12 bit YUV and clamps down to 8 bit RGB.
static __inline void YuvPixel12(uint16_t y,
uint16_t u,
uint16_t v,
uint8_t* b,
uint8_t* g,
uint8_t* r,
const struct YuvConstants* yuvconstants) {
int b16;
int g16;
int r16;
YuvPixel12_16(y, u, v, &b16, &g16, &r16, yuvconstants);
*b = Clamp(b16 >> 6);
*g = Clamp(g16 >> 6);
*r = Clamp(r16 >> 6);
}
// C reference code that mimics the YUV 16 bit assembly. // C reference code that mimics the YUV 16 bit assembly.
// Reads 16 bit YUV and leaves result as 8 bit. // Reads 16 bit YUV and leaves result as 8 bit.
static __inline void YuvPixel16_8(uint16_t y, static __inline void YuvPixel16_8(uint16_t y,
@ -1783,9 +1909,9 @@ static __inline void YuvPixel16_16(uint16_t y,
uint32_t y1 = (uint32_t)(y * yg) >> 16; uint32_t y1 = (uint32_t)(y * yg) >> 16;
u = clamp255(u >> 8); u = clamp255(u >> 8);
v = clamp255(v >> 8); v = clamp255(v >> 8);
*b = (int)(-(u * ub) + y1 + bb); *b = (int)(y1 + -(u * ub) + bb);
*g = (int)(-(u * ug + v * vg) + y1 + bg); *g = (int)(y1 + -(u * ug + v * vg) + bg);
*r = (int)(-(v * vr) + y1 + br); *r = (int)(y1 + -(v * vr) + br);
} }
// C reference code that mimics the YUV assembly. // C reference code that mimics the YUV assembly.
@ -1822,11 +1948,11 @@ void I444ToARGBRow_C(const uint8_t* src_y,
for (x = 0; x < width - 1; x += 2) { for (x = 0; x < width - 1; x += 2) {
uint8_t u = (src_u[0] + src_u[1] + 1) >> 1; uint8_t u = (src_u[0] + src_u[1] + 1) >> 1;
uint8_t v = (src_v[0] + src_v[1] + 1) >> 1; uint8_t v = (src_v[0] + src_v[1] + 1) >> 1;
YuvPixel8_8(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, YuvPixel(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2,
yuvconstants); yuvconstants);
rgb_buf[3] = 255; rgb_buf[3] = 255;
YuvPixel8_8(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, YuvPixel(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6,
yuvconstants); yuvconstants);
rgb_buf[7] = 255; rgb_buf[7] = 255;
src_y += 2; src_y += 2;
src_u += 2; src_u += 2;
@ -1834,8 +1960,8 @@ void I444ToARGBRow_C(const uint8_t* src_y,
rgb_buf += 8; // Advance 2 pixels. rgb_buf += 8; // Advance 2 pixels.
} }
if (width & 1) { if (width & 1) {
YuvPixel8_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants); rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255; rgb_buf[3] = 255;
} }
} }
@ -1848,8 +1974,8 @@ void I444ToARGBRow_C(const uint8_t* src_y,
int width) { int width) {
int x; int x;
for (x = 0; x < width; ++x) { for (x = 0; x < width; ++x) {
YuvPixel8_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants); rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255; rgb_buf[3] = 255;
src_y += 1; src_y += 1;
src_u += 1; src_u += 1;
@ -1868,11 +1994,11 @@ void I422ToARGBRow_C(const uint8_t* src_y,
int width) { int width) {
int x; int x;
for (x = 0; x < width - 1; x += 2) { for (x = 0; x < width - 1; x += 2) {
YuvPixel8_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants); rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255; rgb_buf[3] = 255;
YuvPixel8_8(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5, YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
rgb_buf + 6, yuvconstants); rgb_buf + 6, yuvconstants);
rgb_buf[7] = 255; rgb_buf[7] = 255;
src_y += 2; src_y += 2;
src_u += 1; src_u += 1;
@ -1880,8 +2006,8 @@ void I422ToARGBRow_C(const uint8_t* src_y,
rgb_buf += 8; // Advance 2 pixels. rgb_buf += 8; // Advance 2 pixels.
} }
if (width & 1) { if (width & 1) {
YuvPixel8_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants); rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255; rgb_buf[3] = 255;
} }
} }
@ -1895,11 +2021,11 @@ void I210ToARGBRow_C(const uint16_t* src_y,
int width) { int width) {
int x; int x;
for (x = 0; x < width - 1; x += 2) { for (x = 0; x < width - 1; x += 2) {
YuvPixel10_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants); rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255; rgb_buf[3] = 255;
YuvPixel10_8(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5, YuvPixel10(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
rgb_buf + 6, yuvconstants); rgb_buf + 6, yuvconstants);
rgb_buf[7] = 255; rgb_buf[7] = 255;
src_y += 2; src_y += 2;
src_u += 1; src_u += 1;
@ -1907,8 +2033,8 @@ void I210ToARGBRow_C(const uint16_t* src_y,
rgb_buf += 8; // Advance 2 pixels. rgb_buf += 8; // Advance 2 pixels.
} }
if (width & 1) { if (width & 1) {
YuvPixel10_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants); rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255; rgb_buf[3] = 255;
} }
} }
@ -1921,8 +2047,8 @@ void I410ToARGBRow_C(const uint16_t* src_y,
int width) { int width) {
int x; int x;
for (x = 0; x < width; ++x) { for (x = 0; x < width; ++x) {
YuvPixel10_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants); rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255; rgb_buf[3] = 255;
src_y += 1; src_y += 1;
src_u += 1; src_u += 1;
@ -1940,11 +2066,11 @@ void I210AlphaToARGBRow_C(const uint16_t* src_y,
int width) { int width) {
int x; int x;
for (x = 0; x < width - 1; x += 2) { for (x = 0; x < width - 1; x += 2) {
YuvPixel10_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants); rgb_buf + 2, yuvconstants);
rgb_buf[3] = clamp255(src_a[0] >> 2); rgb_buf[3] = clamp255(src_a[0] >> 2);
YuvPixel10_8(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5, YuvPixel10(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
rgb_buf + 6, yuvconstants); rgb_buf + 6, yuvconstants);
rgb_buf[7] = clamp255(src_a[1] >> 2); rgb_buf[7] = clamp255(src_a[1] >> 2);
src_y += 2; src_y += 2;
src_u += 1; src_u += 1;
@ -1953,8 +2079,8 @@ void I210AlphaToARGBRow_C(const uint16_t* src_y,
rgb_buf += 8; // Advance 2 pixels. rgb_buf += 8; // Advance 2 pixels.
} }
if (width & 1) { if (width & 1) {
YuvPixel10_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants); rgb_buf + 2, yuvconstants);
rgb_buf[3] = clamp255(src_a[0] >> 2); rgb_buf[3] = clamp255(src_a[0] >> 2);
} }
} }
@ -1968,8 +2094,8 @@ void I410AlphaToARGBRow_C(const uint16_t* src_y,
int width) { int width) {
int x; int x;
for (x = 0; x < width; ++x) { for (x = 0; x < width; ++x) {
YuvPixel10_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants); rgb_buf + 2, yuvconstants);
rgb_buf[3] = clamp255(src_a[0] >> 2); rgb_buf[3] = clamp255(src_a[0] >> 2);
src_y += 1; src_y += 1;
src_u += 1; src_u += 1;
@ -1979,6 +2105,33 @@ void I410AlphaToARGBRow_C(const uint16_t* src_y,
} }
} }
// 12 bit YUV to ARGB
void I212ToARGBRow_C(const uint16_t* src_y,
const uint16_t* src_u,
const uint16_t* src_v,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
YuvPixel12(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
YuvPixel12(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
rgb_buf + 6, yuvconstants);
rgb_buf[7] = 255;
src_y += 2;
src_u += 1;
src_v += 1;
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel12(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
}
}
static void StoreAR30(uint8_t* rgb_buf, int b, int g, int r) { static void StoreAR30(uint8_t* rgb_buf, int b, int g, int r) {
uint32_t ar30; uint32_t ar30;
b = b >> 4; // convert 8 bit 10.6 to 10 bit. b = b >> 4; // convert 8 bit 10.6 to 10 bit.
@ -2018,6 +2171,33 @@ void I210ToAR30Row_C(const uint16_t* src_y,
} }
} }
// 12 bit YUV to 10 bit AR30
void I212ToAR30Row_C(const uint16_t* src_y,
const uint16_t* src_u,
const uint16_t* src_v,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
int x;
int b;
int g;
int r;
for (x = 0; x < width - 1; x += 2) {
YuvPixel12_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
StoreAR30(rgb_buf, b, g, r);
YuvPixel12_16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
StoreAR30(rgb_buf + 4, b, g, r);
src_y += 2;
src_u += 1;
src_v += 1;
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel12_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
StoreAR30(rgb_buf, b, g, r);
}
}
void I410ToAR30Row_C(const uint16_t* src_y, void I410ToAR30Row_C(const uint16_t* src_y,
const uint16_t* src_u, const uint16_t* src_u,
const uint16_t* src_v, const uint16_t* src_v,
@ -2038,6 +2218,7 @@ void I410ToAR30Row_C(const uint16_t* src_y,
} }
} }
// P210 has 10 bits in msb of 16 bit NV12 style layout.
void P210ToARGBRow_C(const uint16_t* src_y, void P210ToARGBRow_C(const uint16_t* src_y,
const uint16_t* src_uv, const uint16_t* src_uv,
uint8_t* rgb_buf, uint8_t* rgb_buf,
@ -2163,11 +2344,11 @@ void I444AlphaToARGBRow_C(const uint8_t* src_y,
for (x = 0; x < width - 1; x += 2) { for (x = 0; x < width - 1; x += 2) {
uint8_t u = (src_u[0] + src_u[1] + 1) >> 1; uint8_t u = (src_u[0] + src_u[1] + 1) >> 1;
uint8_t v = (src_v[0] + src_v[1] + 1) >> 1; uint8_t v = (src_v[0] + src_v[1] + 1) >> 1;
YuvPixel8_8(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, YuvPixel(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2,
yuvconstants); yuvconstants);
rgb_buf[3] = src_a[0]; rgb_buf[3] = src_a[0];
YuvPixel8_8(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, YuvPixel(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6,
yuvconstants); yuvconstants);
rgb_buf[7] = src_a[1]; rgb_buf[7] = src_a[1];
src_y += 2; src_y += 2;
src_u += 2; src_u += 2;
@ -2176,8 +2357,8 @@ void I444AlphaToARGBRow_C(const uint8_t* src_y,
rgb_buf += 8; // Advance 2 pixels. rgb_buf += 8; // Advance 2 pixels.
} }
if (width & 1) { if (width & 1) {
YuvPixel8_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants); rgb_buf + 2, yuvconstants);
rgb_buf[3] = src_a[0]; rgb_buf[3] = src_a[0];
} }
} }
@ -2191,8 +2372,8 @@ void I444AlphaToARGBRow_C(const uint8_t* src_y,
int width) { int width) {
int x; int x;
for (x = 0; x < width; ++x) { for (x = 0; x < width; ++x) {
YuvPixel8_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants); rgb_buf + 2, yuvconstants);
rgb_buf[3] = src_a[0]; rgb_buf[3] = src_a[0];
src_y += 1; src_y += 1;
src_u += 1; src_u += 1;
@ -2212,11 +2393,11 @@ void I422AlphaToARGBRow_C(const uint8_t* src_y,
int width) { int width) {
int x; int x;
for (x = 0; x < width - 1; x += 2) { for (x = 0; x < width - 1; x += 2) {
YuvPixel8_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants); rgb_buf + 2, yuvconstants);
rgb_buf[3] = src_a[0]; rgb_buf[3] = src_a[0];
YuvPixel8_8(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5, YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
rgb_buf + 6, yuvconstants); rgb_buf + 6, yuvconstants);
rgb_buf[7] = src_a[1]; rgb_buf[7] = src_a[1];
src_y += 2; src_y += 2;
src_u += 1; src_u += 1;
@ -2225,8 +2406,8 @@ void I422AlphaToARGBRow_C(const uint8_t* src_y,
rgb_buf += 8; // Advance 2 pixels. rgb_buf += 8; // Advance 2 pixels.
} }
if (width & 1) { if (width & 1) {
YuvPixel8_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants); rgb_buf + 2, yuvconstants);
rgb_buf[3] = src_a[0]; rgb_buf[3] = src_a[0];
} }
} }
@ -2239,18 +2420,18 @@ void I422ToRGB24Row_C(const uint8_t* src_y,
int width) { int width) {
int x; int x;
for (x = 0; x < width - 1; x += 2) { for (x = 0; x < width - 1; x += 2) {
YuvPixel8_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants); rgb_buf + 2, yuvconstants);
YuvPixel8_8(src_y[1], src_u[0], src_v[0], rgb_buf + 3, rgb_buf + 4, YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 3, rgb_buf + 4,
rgb_buf + 5, yuvconstants); rgb_buf + 5, yuvconstants);
src_y += 2; src_y += 2;
src_u += 1; src_u += 1;
src_v += 1; src_v += 1;
rgb_buf += 6; // Advance 2 pixels. rgb_buf += 6; // Advance 2 pixels.
} }
if (width & 1) { if (width & 1) {
YuvPixel8_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants); rgb_buf + 2, yuvconstants);
} }
} }
@ -2268,8 +2449,8 @@ void I422ToARGB4444Row_C(const uint8_t* src_y,
uint8_t r1; uint8_t r1;
int x; int x;
for (x = 0; x < width - 1; x += 2) { for (x = 0; x < width - 1; x += 2) {
YuvPixel8_8(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants); YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
YuvPixel8_8(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants); YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
b0 = b0 >> 4; b0 = b0 >> 4;
g0 = g0 >> 4; g0 = g0 >> 4;
r0 = r0 >> 4; r0 = r0 >> 4;
@ -2284,7 +2465,7 @@ void I422ToARGB4444Row_C(const uint8_t* src_y,
dst_argb4444 += 4; // Advance 2 pixels. dst_argb4444 += 4; // Advance 2 pixels.
} }
if (width & 1) { if (width & 1) {
YuvPixel8_8(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants); YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
b0 = b0 >> 4; b0 = b0 >> 4;
g0 = g0 >> 4; g0 = g0 >> 4;
r0 = r0 >> 4; r0 = r0 >> 4;
@ -2306,8 +2487,8 @@ void I422ToARGB1555Row_C(const uint8_t* src_y,
uint8_t r1; uint8_t r1;
int x; int x;
for (x = 0; x < width - 1; x += 2) { for (x = 0; x < width - 1; x += 2) {
YuvPixel8_8(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants); YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
YuvPixel8_8(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants); YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
b0 = b0 >> 3; b0 = b0 >> 3;
g0 = g0 >> 3; g0 = g0 >> 3;
r0 = r0 >> 3; r0 = r0 >> 3;
@ -2322,7 +2503,7 @@ void I422ToARGB1555Row_C(const uint8_t* src_y,
dst_argb1555 += 4; // Advance 2 pixels. dst_argb1555 += 4; // Advance 2 pixels.
} }
if (width & 1) { if (width & 1) {
YuvPixel8_8(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants); YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
b0 = b0 >> 3; b0 = b0 >> 3;
g0 = g0 >> 3; g0 = g0 >> 3;
r0 = r0 >> 3; r0 = r0 >> 3;
@ -2344,8 +2525,8 @@ void I422ToRGB565Row_C(const uint8_t* src_y,
uint8_t r1; uint8_t r1;
int x; int x;
for (x = 0; x < width - 1; x += 2) { for (x = 0; x < width - 1; x += 2) {
YuvPixel8_8(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants); YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
YuvPixel8_8(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants); YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
b0 = b0 >> 3; b0 = b0 >> 3;
g0 = g0 >> 2; g0 = g0 >> 2;
r0 = r0 >> 3; r0 = r0 >> 3;
@ -2360,7 +2541,7 @@ void I422ToRGB565Row_C(const uint8_t* src_y,
dst_rgb565 += 4; // Advance 2 pixels. dst_rgb565 += 4; // Advance 2 pixels.
} }
if (width & 1) { if (width & 1) {
YuvPixel8_8(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants); YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
b0 = b0 >> 3; b0 = b0 >> 3;
g0 = g0 >> 2; g0 = g0 >> 2;
r0 = r0 >> 3; r0 = r0 >> 3;
@ -2375,19 +2556,19 @@ void NV12ToARGBRow_C(const uint8_t* src_y,
int width) { int width) {
int x; int x;
for (x = 0; x < width - 1; x += 2) { for (x = 0; x < width - 1; x += 2) {
YuvPixel8_8(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1, YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants); rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255; rgb_buf[3] = 255;
YuvPixel8_8(src_y[1], src_uv[0], src_uv[1], rgb_buf + 4, rgb_buf + 5, YuvPixel(src_y[1], src_uv[0], src_uv[1], rgb_buf + 4, rgb_buf + 5,
rgb_buf + 6, yuvconstants); rgb_buf + 6, yuvconstants);
rgb_buf[7] = 255; rgb_buf[7] = 255;
src_y += 2; src_y += 2;
src_uv += 2; src_uv += 2;
rgb_buf += 8; // Advance 2 pixels. rgb_buf += 8; // Advance 2 pixels.
} }
if (width & 1) { if (width & 1) {
YuvPixel8_8(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1, YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants); rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255; rgb_buf[3] = 255;
} }
} }
@ -2399,19 +2580,19 @@ void NV21ToARGBRow_C(const uint8_t* src_y,
int width) { int width) {
int x; int x;
for (x = 0; x < width - 1; x += 2) { for (x = 0; x < width - 1; x += 2) {
YuvPixel8_8(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1, YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants); rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255; rgb_buf[3] = 255;
YuvPixel8_8(src_y[1], src_vu[1], src_vu[0], rgb_buf + 4, rgb_buf + 5, YuvPixel(src_y[1], src_vu[1], src_vu[0], rgb_buf + 4, rgb_buf + 5,
rgb_buf + 6, yuvconstants); rgb_buf + 6, yuvconstants);
rgb_buf[7] = 255; rgb_buf[7] = 255;
src_y += 2; src_y += 2;
src_vu += 2; src_vu += 2;
rgb_buf += 8; // Advance 2 pixels. rgb_buf += 8; // Advance 2 pixels.
} }
if (width & 1) { if (width & 1) {
YuvPixel8_8(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1, YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants); rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255; rgb_buf[3] = 255;
} }
} }
@ -2423,17 +2604,17 @@ void NV12ToRGB24Row_C(const uint8_t* src_y,
int width) { int width) {
int x; int x;
for (x = 0; x < width - 1; x += 2) { for (x = 0; x < width - 1; x += 2) {
YuvPixel8_8(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1, YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants); rgb_buf + 2, yuvconstants);
YuvPixel8_8(src_y[1], src_uv[0], src_uv[1], rgb_buf + 3, rgb_buf + 4, YuvPixel(src_y[1], src_uv[0], src_uv[1], rgb_buf + 3, rgb_buf + 4,
rgb_buf + 5, yuvconstants); rgb_buf + 5, yuvconstants);
src_y += 2; src_y += 2;
src_uv += 2; src_uv += 2;
rgb_buf += 6; // Advance 2 pixels. rgb_buf += 6; // Advance 2 pixels.
} }
if (width & 1) { if (width & 1) {
YuvPixel8_8(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1, YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants); rgb_buf + 2, yuvconstants);
} }
} }
@ -2444,17 +2625,17 @@ void NV21ToRGB24Row_C(const uint8_t* src_y,
int width) { int width) {
int x; int x;
for (x = 0; x < width - 1; x += 2) { for (x = 0; x < width - 1; x += 2) {
YuvPixel8_8(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1, YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants); rgb_buf + 2, yuvconstants);
YuvPixel8_8(src_y[1], src_vu[1], src_vu[0], rgb_buf + 3, rgb_buf + 4, YuvPixel(src_y[1], src_vu[1], src_vu[0], rgb_buf + 3, rgb_buf + 4,
rgb_buf + 5, yuvconstants); rgb_buf + 5, yuvconstants);
src_y += 2; src_y += 2;
src_vu += 2; src_vu += 2;
rgb_buf += 6; // Advance 2 pixels. rgb_buf += 6; // Advance 2 pixels.
} }
if (width & 1) { if (width & 1) {
YuvPixel8_8(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1, YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants); rgb_buf + 2, yuvconstants);
} }
} }
@ -2471,8 +2652,8 @@ void NV12ToRGB565Row_C(const uint8_t* src_y,
uint8_t r1; uint8_t r1;
int x; int x;
for (x = 0; x < width - 1; x += 2) { for (x = 0; x < width - 1; x += 2) {
YuvPixel8_8(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants); YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants);
YuvPixel8_8(src_y[1], src_uv[0], src_uv[1], &b1, &g1, &r1, yuvconstants); YuvPixel(src_y[1], src_uv[0], src_uv[1], &b1, &g1, &r1, yuvconstants);
b0 = b0 >> 3; b0 = b0 >> 3;
g0 = g0 >> 2; g0 = g0 >> 2;
r0 = r0 >> 3; r0 = r0 >> 3;
@ -2486,7 +2667,7 @@ void NV12ToRGB565Row_C(const uint8_t* src_y,
dst_rgb565 += 4; // Advance 2 pixels. dst_rgb565 += 4; // Advance 2 pixels.
} }
if (width & 1) { if (width & 1) {
YuvPixel8_8(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants); YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants);
b0 = b0 >> 3; b0 = b0 >> 3;
g0 = g0 >> 2; g0 = g0 >> 2;
r0 = r0 >> 3; r0 = r0 >> 3;
@ -2500,18 +2681,18 @@ void YUY2ToARGBRow_C(const uint8_t* src_yuy2,
int width) { int width) {
int x; int x;
for (x = 0; x < width - 1; x += 2) { for (x = 0; x < width - 1; x += 2) {
YuvPixel8_8(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, rgb_buf + 1, YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants); rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255; rgb_buf[3] = 255;
YuvPixel8_8(src_yuy2[2], src_yuy2[1], src_yuy2[3], rgb_buf + 4, rgb_buf + 5, YuvPixel(src_yuy2[2], src_yuy2[1], src_yuy2[3], rgb_buf + 4, rgb_buf + 5,
rgb_buf + 6, yuvconstants); rgb_buf + 6, yuvconstants);
rgb_buf[7] = 255; rgb_buf[7] = 255;
src_yuy2 += 4; src_yuy2 += 4;
rgb_buf += 8; // Advance 2 pixels. rgb_buf += 8; // Advance 2 pixels.
} }
if (width & 1) { if (width & 1) {
YuvPixel8_8(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, rgb_buf + 1, YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants); rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255; rgb_buf[3] = 255;
} }
} }
@ -2522,18 +2703,18 @@ void UYVYToARGBRow_C(const uint8_t* src_uyvy,
int width) { int width) {
int x; int x;
for (x = 0; x < width - 1; x += 2) { for (x = 0; x < width - 1; x += 2) {
YuvPixel8_8(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, rgb_buf + 1, YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants); rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255; rgb_buf[3] = 255;
YuvPixel8_8(src_uyvy[3], src_uyvy[0], src_uyvy[2], rgb_buf + 4, rgb_buf + 5, YuvPixel(src_uyvy[3], src_uyvy[0], src_uyvy[2], rgb_buf + 4, rgb_buf + 5,
rgb_buf + 6, yuvconstants); rgb_buf + 6, yuvconstants);
rgb_buf[7] = 255; rgb_buf[7] = 255;
src_uyvy += 4; src_uyvy += 4;
rgb_buf += 8; // Advance 2 pixels. rgb_buf += 8; // Advance 2 pixels.
} }
if (width & 1) { if (width & 1) {
YuvPixel8_8(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, rgb_buf + 1, YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants); rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255; rgb_buf[3] = 255;
} }
} }
@ -2546,11 +2727,11 @@ void I422ToRGBARow_C(const uint8_t* src_y,
int width) { int width) {
int x; int x;
for (x = 0; x < width - 1; x += 2) { for (x = 0; x < width - 1; x += 2) {
YuvPixel8_8(src_y[0], src_u[0], src_v[0], rgb_buf + 1, rgb_buf + 2, YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 1, rgb_buf + 2,
rgb_buf + 3, yuvconstants); rgb_buf + 3, yuvconstants);
rgb_buf[0] = 255; rgb_buf[0] = 255;
YuvPixel8_8(src_y[1], src_u[0], src_v[0], rgb_buf + 5, rgb_buf + 6, YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 5, rgb_buf + 6,
rgb_buf + 7, yuvconstants); rgb_buf + 7, yuvconstants);
rgb_buf[4] = 255; rgb_buf[4] = 255;
src_y += 2; src_y += 2;
src_u += 1; src_u += 1;
@ -2558,8 +2739,8 @@ void I422ToRGBARow_C(const uint8_t* src_y,
rgb_buf += 8; // Advance 2 pixels. rgb_buf += 8; // Advance 2 pixels.
} }
if (width & 1) { if (width & 1) {
YuvPixel8_8(src_y[0], src_u[0], src_v[0], rgb_buf + 1, rgb_buf + 2, YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 1, rgb_buf + 2,
rgb_buf + 3, yuvconstants); rgb_buf + 3, yuvconstants);
rgb_buf[0] = 255; rgb_buf[0] = 255;
} }
} }

View File

@ -2001,6 +2001,19 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba0,
"packuswb %%xmm5,%%xmm5 \n" \ "packuswb %%xmm5,%%xmm5 \n" \
"lea 0x10(%[a_buf]),%[a_buf] \n" "lea 0x10(%[a_buf]),%[a_buf] \n"
// Read 4 UV from 422 12 bit, upsample to 8 UV
#define READYUV212 \
"movq (%[u_buf]),%%xmm0 \n" \
"movq 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \
"lea 0x8(%[u_buf]),%[u_buf] \n" \
"punpcklwd %%xmm1,%%xmm0 \n" \
"psraw $0x4,%%xmm0 \n" \
"packuswb %%xmm0,%%xmm0 \n" \
"punpcklwd %%xmm0,%%xmm0 \n" \
"movdqu (%[y_buf]),%%xmm4 \n" \
"psllw $0x4,%%xmm4 \n" \
"lea 0x10(%[y_buf]),%[y_buf] \n"
// Read 4 UV from 422, upsample to 8 UV. With 8 Alpha. // Read 4 UV from 422, upsample to 8 UV. With 8 Alpha.
#define READYUVA422 \ #define READYUVA422 \
"movd (%[u_buf]),%%xmm0 \n" \ "movd (%[u_buf]),%%xmm0 \n" \
@ -2398,6 +2411,36 @@ void OMITFP I210ToARGBRow_SSSE3(const uint16_t* y_buf,
); );
} }
// 12 bit YUV to ARGB
void OMITFP I212ToARGBRow_SSSE3(const uint16_t* y_buf,
const uint16_t* u_buf,
const uint16_t* v_buf,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
YUVTORGB_SETUP(yuvconstants)
"sub %[u_buf],%[v_buf] \n"
"pcmpeqb %%xmm5,%%xmm5 \n"
LABELALIGN
"1: \n"
READYUV212
YUVTORGB(yuvconstants)
STOREARGB
"sub $0x8,%[width] \n"
"jg 1b \n"
: [y_buf]"+r"(y_buf), // %[y_buf]
[u_buf]"+r"(u_buf), // %[u_buf]
[v_buf]"+r"(v_buf), // %[v_buf]
[dst_argb]"+r"(dst_argb), // %[dst_argb]
[width]"+rm"(width) // %[width]
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
: "memory", "cc", YUVTORGB_REGS
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
}
// 10 bit YUV to AR30 // 10 bit YUV to AR30
void OMITFP I210ToAR30Row_SSSE3(const uint16_t* y_buf, void OMITFP I210ToAR30Row_SSSE3(const uint16_t* y_buf,
const uint16_t* u_buf, const uint16_t* u_buf,
@ -2433,6 +2476,41 @@ void OMITFP I210ToAR30Row_SSSE3(const uint16_t* y_buf,
); );
} }
// 12 bit YUV to AR30
void OMITFP I212ToAR30Row_SSSE3(const uint16_t* y_buf,
const uint16_t* u_buf,
const uint16_t* v_buf,
uint8_t* dst_ar30,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
YUVTORGB_SETUP(yuvconstants)
"sub %[u_buf],%[v_buf] \n"
"pcmpeqb %%xmm5,%%xmm5 \n"
"psrlw $14,%%xmm5 \n"
"psllw $4,%%xmm5 \n" // 2 alpha bits
"pxor %%xmm6,%%xmm6 \n"
"pcmpeqb %%xmm7,%%xmm7 \n" // 0 for min
"psrlw $6,%%xmm7 \n" // 1023 for max
LABELALIGN
"1: \n"
READYUV212
YUVTORGB16(yuvconstants)
STOREAR30
"sub $0x8,%[width] \n"
"jg 1b \n"
: [y_buf]"+r"(y_buf), // %[y_buf]
[u_buf]"+r"(u_buf), // %[u_buf]
[v_buf]"+r"(v_buf), // %[v_buf]
[dst_ar30]"+r"(dst_ar30), // %[dst_ar30]
[width]"+rm"(width) // %[width]
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
: "memory", "cc", YUVTORGB_REGS
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
);
}
// 10 bit YUV to ARGB // 10 bit YUV to ARGB
void OMITFP I410ToARGBRow_SSSE3(const uint16_t* y_buf, void OMITFP I410ToARGBRow_SSSE3(const uint16_t* y_buf,
const uint16_t* u_buf, const uint16_t* u_buf,
@ -2443,16 +2521,16 @@ void OMITFP I410ToARGBRow_SSSE3(const uint16_t* y_buf,
asm volatile ( asm volatile (
YUVTORGB_SETUP(yuvconstants) YUVTORGB_SETUP(yuvconstants)
"sub %[u_buf],%[v_buf] \n" "sub %[u_buf],%[v_buf] \n"
"pcmpeqb %%xmm5,%%xmm5 \n" "pcmpeqb %%xmm5,%%xmm5 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
READYUV410 READYUV410
YUVTORGB(yuvconstants) YUVTORGB(yuvconstants)
STOREARGB STOREARGB
"sub $0x8,%[width] \n" "sub $0x8,%[width] \n"
"jg 1b \n" "jg 1b \n"
: [y_buf]"+r"(y_buf), // %[y_buf] : [y_buf]"+r"(y_buf), // %[y_buf]
[u_buf]"+r"(u_buf), // %[u_buf] [u_buf]"+r"(u_buf), // %[u_buf]
[v_buf]"+r"(v_buf), // %[v_buf] [v_buf]"+r"(v_buf), // %[v_buf]
@ -2474,29 +2552,26 @@ void OMITFP I210AlphaToARGBRow_SSSE3(const uint16_t* y_buf,
int width) { int width) {
asm volatile( asm volatile(
YUVTORGB_SETUP(yuvconstants) YUVTORGB_SETUP(
"sub %[u_buf],%[v_buf] \n" yuvconstants) "sub %[u_buf],%[v_buf] \n"
LABELALIGN LABELALIGN "1: \n" READYUVA210
"1: \n" YUVTORGB(yuvconstants) STOREARGB
READYUVA210 "subl $0x8,%[width] \n"
YUVTORGB(yuvconstants) "jg 1b \n"
STOREARGB : [y_buf] "+r"(y_buf), // %[y_buf]
"subl $0x8,%[width] \n" [u_buf] "+r"(u_buf), // %[u_buf]
"jg 1b \n" [v_buf] "+r"(v_buf), // %[v_buf]
: [y_buf] "+r"(y_buf), // %[y_buf] [a_buf] "+r"(a_buf),
[u_buf] "+r"(u_buf), // %[u_buf] [dst_argb] "+r"(dst_argb), // %[dst_argb]
[v_buf] "+r"(v_buf), // %[v_buf]
[a_buf] "+r"(a_buf),
[dst_argb] "+r"(dst_argb), // %[dst_argb]
#if defined(__i386__) #if defined(__i386__)
[width]"+m"(width) // %[width] [width] "+m"(width) // %[width]
#else #else
[width]"+rm"(width) // %[width] [width] "+rm"(width) // %[width]
#endif #endif
: [yuvconstants] "r"(yuvconstants) // %[yuvconstants] : [yuvconstants] "r"(yuvconstants) // %[yuvconstants]
: "memory", "cc", YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" : "memory", "cc", YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4",
); "xmm5");
} }
#endif #endif
@ -2511,29 +2586,26 @@ void OMITFP I410AlphaToARGBRow_SSSE3(const uint16_t* y_buf,
int width) { int width) {
asm volatile( asm volatile(
YUVTORGB_SETUP(yuvconstants) YUVTORGB_SETUP(
"sub %[u_buf],%[v_buf] \n" yuvconstants) "sub %[u_buf],%[v_buf] \n"
LABELALIGN LABELALIGN "1: \n" READYUVA410
"1: \n" YUVTORGB(yuvconstants) STOREARGB
READYUVA410 "subl $0x8,%[width] \n"
YUVTORGB(yuvconstants) "jg 1b \n"
STOREARGB : [y_buf] "+r"(y_buf), // %[y_buf]
"subl $0x8,%[width] \n" [u_buf] "+r"(u_buf), // %[u_buf]
"jg 1b \n" [v_buf] "+r"(v_buf), // %[v_buf]
: [y_buf] "+r"(y_buf), // %[y_buf] [a_buf] "+r"(a_buf),
[u_buf] "+r"(u_buf), // %[u_buf] [dst_argb] "+r"(dst_argb), // %[dst_argb]
[v_buf] "+r"(v_buf), // %[v_buf]
[a_buf] "+r"(a_buf),
[dst_argb] "+r"(dst_argb), // %[dst_argb]
#if defined(__i386__) #if defined(__i386__)
[width]"+m"(width) // %[width] [width] "+m"(width) // %[width]
#else #else
[width]"+rm"(width) // %[width] [width] "+rm"(width) // %[width]
#endif #endif
: [yuvconstants] "r"(yuvconstants) // %[yuvconstants] : [yuvconstants] "r"(yuvconstants) // %[yuvconstants]
: "memory", "cc", YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" : "memory", "cc", YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4",
); "xmm5");
} }
#endif #endif
@ -2547,21 +2619,21 @@ void OMITFP I410ToAR30Row_SSSE3(const uint16_t* y_buf,
asm volatile ( asm volatile (
YUVTORGB_SETUP(yuvconstants) YUVTORGB_SETUP(yuvconstants)
"sub %[u_buf],%[v_buf] \n" "sub %[u_buf],%[v_buf] \n"
"pcmpeqb %%xmm5,%%xmm5 \n" "pcmpeqb %%xmm5,%%xmm5 \n"
"psrlw $14,%%xmm5 \n" "psrlw $14,%%xmm5 \n"
"psllw $4,%%xmm5 \n" // 2 alpha bits "psllw $4,%%xmm5 \n" // 2 alpha bits
"pxor %%xmm6,%%xmm6 \n" "pxor %%xmm6,%%xmm6 \n"
"pcmpeqb %%xmm7,%%xmm7 \n" // 0 for min "pcmpeqb %%xmm7,%%xmm7 \n" // 0 for min
"psrlw $6,%%xmm7 \n" // 1023 for max "psrlw $6,%%xmm7 \n" // 1023 for max
LABELALIGN LABELALIGN
"1: \n" "1: \n"
READYUV410 READYUV410
YUVTORGB16(yuvconstants) YUVTORGB16(yuvconstants)
STOREAR30 STOREAR30
"sub $0x8,%[width] \n" "sub $0x8,%[width] \n"
"jg 1b \n" "jg 1b \n"
: [y_buf]"+r"(y_buf), // %[y_buf] : [y_buf]"+r"(y_buf), // %[y_buf]
[u_buf]"+r"(u_buf), // %[u_buf] [u_buf]"+r"(u_buf), // %[u_buf]
[v_buf]"+r"(v_buf), // %[v_buf] [v_buf]"+r"(v_buf), // %[v_buf]
@ -2729,26 +2801,22 @@ void OMITFP P210ToARGBRow_SSSE3(const uint16_t* y_buf,
uint8_t* dst_argb, uint8_t* dst_argb,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width) { int width) {
asm volatile ( asm volatile(
YUVTORGB_SETUP(yuvconstants) YUVTORGB_SETUP(
"pcmpeqb %%xmm5,%%xmm5 \n" yuvconstants) "pcmpeqb %%xmm5,%%xmm5 \n"
LABELALIGN LABELALIGN "1: \n" READP210
"1: \n" YUVTORGB(yuvconstants) STOREARGB
READP210 "sub $0x8,%[width] \n"
YUVTORGB(yuvconstants) "jg 1b \n"
STOREARGB : [y_buf] "+r"(y_buf), // %[y_buf]
"sub $0x8,%[width] \n" [uv_buf] "+r"(uv_buf), // %[u_buf]
"jg 1b \n" [dst_argb] "+r"(dst_argb), // %[dst_argb]
: [y_buf] "+r"(y_buf), // %[y_buf] [width] "+rm"(width) // %[width]
[uv_buf] "+r"(uv_buf), // %[u_buf] : [yuvconstants] "r"(yuvconstants) // %[yuvconstants]
[dst_argb] "+r"(dst_argb), // %[dst_argb] : "memory", "cc", YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4",
[width] "+rm"(width) // %[width] "xmm5");
: [yuvconstants] "r"(yuvconstants) // %[yuvconstants]
: "memory", "cc", YUVTORGB_REGS
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
} }
void OMITFP P410ToARGBRow_SSSE3(const uint16_t* y_buf, void OMITFP P410ToARGBRow_SSSE3(const uint16_t* y_buf,
@ -2756,25 +2824,22 @@ void OMITFP P410ToARGBRow_SSSE3(const uint16_t* y_buf,
uint8_t* dst_argb, uint8_t* dst_argb,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width) { int width) {
asm volatile ( asm volatile(
YUVTORGB_SETUP(yuvconstants) YUVTORGB_SETUP(
"pcmpeqb %%xmm5,%%xmm5 \n" yuvconstants) "pcmpeqb %%xmm5,%%xmm5 \n"
LABELALIGN LABELALIGN "1: \n" READP410
"1: \n" YUVTORGB(yuvconstants) STOREARGB
READP410 "sub $0x8,%[width] \n"
YUVTORGB(yuvconstants) "jg 1b \n"
STOREARGB : [y_buf] "+r"(y_buf), // %[y_buf]
"sub $0x8,%[width] \n" [uv_buf] "+r"(uv_buf), // %[u_buf]
"jg 1b \n" [dst_argb] "+r"(dst_argb), // %[dst_argb]
: [y_buf] "+r"(y_buf), // %[y_buf] [width] "+rm"(width) // %[width]
[uv_buf] "+r"(uv_buf), // %[u_buf] : [yuvconstants] "r"(yuvconstants) // %[yuvconstants]
[dst_argb] "+r"(dst_argb), // %[dst_argb] : "memory", "cc", YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4",
[width] "+rm"(width) // %[width] "xmm5");
: [yuvconstants] "r"(yuvconstants) // %[yuvconstants]
: "memory", "cc", YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4",
"xmm5");
} }
void OMITFP P210ToAR30Row_SSSE3(const uint16_t* y_buf, void OMITFP P210ToAR30Row_SSSE3(const uint16_t* y_buf,
@ -2785,20 +2850,20 @@ void OMITFP P210ToAR30Row_SSSE3(const uint16_t* y_buf,
asm volatile ( asm volatile (
YUVTORGB_SETUP(yuvconstants) YUVTORGB_SETUP(yuvconstants)
"pcmpeqb %%xmm5,%%xmm5 \n" "pcmpeqb %%xmm5,%%xmm5 \n"
"psrlw $14,%%xmm5 \n" "psrlw $14,%%xmm5 \n"
"psllw $4,%%xmm5 \n" // 2 alpha bits "psllw $4,%%xmm5 \n" // 2 alpha bits
"pxor %%xmm6,%%xmm6 \n" "pxor %%xmm6,%%xmm6 \n"
"pcmpeqb %%xmm7,%%xmm7 \n" // 0 for min "pcmpeqb %%xmm7,%%xmm7 \n" // 0 for min
"psrlw $6,%%xmm7 \n" // 1023 for max "psrlw $6,%%xmm7 \n" // 1023 for max
LABELALIGN LABELALIGN
"1: \n" "1: \n"
READP210 READP210
YUVTORGB16(yuvconstants) YUVTORGB16(yuvconstants)
STOREAR30 STOREAR30
"sub $0x8,%[width] \n" "sub $0x8,%[width] \n"
"jg 1b \n" "jg 1b \n"
: [y_buf]"+r"(y_buf), // %[y_buf] : [y_buf]"+r"(y_buf), // %[y_buf]
[uv_buf]"+r"(uv_buf), // %[uv_buf] [uv_buf]"+r"(uv_buf), // %[uv_buf]
[dst_ar30]"+r"(dst_ar30), // %[dst_ar30] [dst_ar30]"+r"(dst_ar30), // %[dst_ar30]
@ -2817,20 +2882,20 @@ void OMITFP P410ToAR30Row_SSSE3(const uint16_t* y_buf,
asm volatile ( asm volatile (
YUVTORGB_SETUP(yuvconstants) YUVTORGB_SETUP(yuvconstants)
"pcmpeqb %%xmm5,%%xmm5 \n" "pcmpeqb %%xmm5,%%xmm5 \n"
"psrlw $14,%%xmm5 \n" "psrlw $14,%%xmm5 \n"
"psllw $4,%%xmm5 \n" // 2 alpha bits "psllw $4,%%xmm5 \n" // 2 alpha bits
"pxor %%xmm6,%%xmm6 \n" "pxor %%xmm6,%%xmm6 \n"
"pcmpeqb %%xmm7,%%xmm7 \n" // 0 for min "pcmpeqb %%xmm7,%%xmm7 \n" // 0 for min
"psrlw $6,%%xmm7 \n" // 1023 for max "psrlw $6,%%xmm7 \n" // 1023 for max
LABELALIGN LABELALIGN
"1: \n" "1: \n"
READP410 READP410
YUVTORGB16(yuvconstants) YUVTORGB16(yuvconstants)
STOREAR30 STOREAR30
"sub $0x8,%[width] \n" "sub $0x8,%[width] \n"
"jg 1b \n" "jg 1b \n"
: [y_buf]"+r"(y_buf), // %[y_buf] : [y_buf]"+r"(y_buf), // %[y_buf]
[uv_buf]"+r"(uv_buf), // %[uv_buf] [uv_buf]"+r"(uv_buf), // %[uv_buf]
[dst_ar30]"+r"(dst_ar30), // %[dst_ar30] [dst_ar30]"+r"(dst_ar30), // %[dst_ar30]
@ -2948,6 +3013,21 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf,
"vpsllw $6,%%ymm4,%%ymm4 \n" \ "vpsllw $6,%%ymm4,%%ymm4 \n" \
"lea 0x20(%[y_buf]),%[y_buf] \n" "lea 0x20(%[y_buf]),%[y_buf] \n"
// Read 8 UV from 212 12 bit, upsample to 16 UV
#define READYUV212_AVX2 \
"vmovdqu (%[u_buf]),%%xmm0 \n" \
"vmovdqu 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \
"lea 0x10(%[u_buf]),%[u_buf] \n" \
"vpermq $0xd8,%%ymm0,%%ymm0 \n" \
"vpermq $0xd8,%%ymm1,%%ymm1 \n" \
"vpunpcklwd %%ymm1,%%ymm0,%%ymm0 \n" \
"vpsraw $0x4,%%ymm0,%%ymm0 \n" \
"vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" \
"vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \
"vmovdqu (%[y_buf]),%%ymm4 \n" \
"vpsllw $0x4,%%ymm4,%%ymm4 \n" \
"lea 0x20(%[y_buf]),%[y_buf] \n"
// Read 16 UV from 410. With 16 Alpha. // Read 16 UV from 410. With 16 Alpha.
#define READYUVA410_AVX2 \ #define READYUVA410_AVX2 \
"vmovdqu (%[u_buf]),%%ymm0 \n" \ "vmovdqu (%[u_buf]),%%ymm0 \n" \
@ -3295,6 +3375,41 @@ void OMITFP I210ToARGBRow_AVX2(const uint16_t* y_buf,
} }
#endif // HAS_I210TOARGBROW_AVX2 #endif // HAS_I210TOARGBROW_AVX2
#if defined(HAS_I212TOARGBROW_AVX2)
// 16 pixels
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
void OMITFP I212ToARGBRow_AVX2(const uint16_t* y_buf,
const uint16_t* u_buf,
const uint16_t* v_buf,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
YUVTORGB_SETUP_AVX2(yuvconstants)
"sub %[u_buf],%[v_buf] \n"
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
LABELALIGN
"1: \n"
READYUV212_AVX2
YUVTORGB_AVX2(yuvconstants)
STOREARGB_AVX2
"sub $0x10,%[width] \n"
"jg 1b \n"
"vzeroupper \n"
: [y_buf]"+r"(y_buf), // %[y_buf]
[u_buf]"+r"(u_buf), // %[u_buf]
[v_buf]"+r"(v_buf), // %[v_buf]
[dst_argb]"+r"(dst_argb), // %[dst_argb]
[width]"+rm"(width) // %[width]
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
: "memory", "cc", YUVTORGB_REGS_AVX2
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
}
#endif // HAS_I212TOARGBROW_AVX2
#if defined(HAS_I210TOAR30ROW_AVX2) #if defined(HAS_I210TOAR30ROW_AVX2)
// 16 pixels // 16 pixels
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 AR30 (64 bytes). // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 AR30 (64 bytes).
@ -3335,6 +3450,46 @@ void OMITFP I210ToAR30Row_AVX2(const uint16_t* y_buf,
} }
#endif // HAS_I210TOAR30ROW_AVX2 #endif // HAS_I210TOAR30ROW_AVX2
#if defined(HAS_I212TOAR30ROW_AVX2)
// 16 pixels
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 AR30 (64 bytes).
void OMITFP I212ToAR30Row_AVX2(const uint16_t* y_buf,
const uint16_t* u_buf,
const uint16_t* v_buf,
uint8_t* dst_ar30,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
YUVTORGB_SETUP_AVX2(yuvconstants)
"sub %[u_buf],%[v_buf] \n"
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // AR30 constants
"vpsrlw $14,%%ymm5,%%ymm5 \n"
"vpsllw $4,%%ymm5,%%ymm5 \n" // 2 alpha bits
"vpxor %%ymm6,%%ymm6,%%ymm6 \n" // 0 for min
"vpcmpeqb %%ymm7,%%ymm7,%%ymm7 \n" // 1023 for max
"vpsrlw $6,%%ymm7,%%ymm7 \n"
LABELALIGN
"1: \n"
READYUV212_AVX2
YUVTORGB16_AVX2(yuvconstants)
STOREAR30_AVX2
"sub $0x10,%[width] \n"
"jg 1b \n"
"vzeroupper \n"
: [y_buf]"+r"(y_buf), // %[y_buf]
[u_buf]"+r"(u_buf), // %[u_buf]
[v_buf]"+r"(v_buf), // %[v_buf]
[dst_ar30]"+r"(dst_ar30), // %[dst_ar30]
[width]"+rm"(width) // %[width]
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
: "memory", "cc", YUVTORGB_REGS_AVX2
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
}
#endif // HAS_I212TOAR30ROW_AVX2
#if defined(HAS_I410TOARGBROW_AVX2) #if defined(HAS_I410TOARGBROW_AVX2)
// 16 pixels // 16 pixels
// 16 UV values with 16 Y producing 16 ARGB (64 bytes). // 16 UV values with 16 Y producing 16 ARGB (64 bytes).
@ -3347,17 +3502,17 @@ void OMITFP I410ToARGBRow_AVX2(const uint16_t* y_buf,
asm volatile ( asm volatile (
YUVTORGB_SETUP_AVX2(yuvconstants) YUVTORGB_SETUP_AVX2(yuvconstants)
"sub %[u_buf],%[v_buf] \n" "sub %[u_buf],%[v_buf] \n"
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
READYUV410_AVX2 READYUV410_AVX2
YUVTORGB_AVX2(yuvconstants) YUVTORGB_AVX2(yuvconstants)
STOREARGB_AVX2 STOREARGB_AVX2
"sub $0x10,%[width] \n" "sub $0x10,%[width] \n"
"jg 1b \n" "jg 1b \n"
"vzeroupper \n" "vzeroupper \n"
: [y_buf]"+r"(y_buf), // %[y_buf] : [y_buf]"+r"(y_buf), // %[y_buf]
[u_buf]"+r"(u_buf), // %[u_buf] [u_buf]"+r"(u_buf), // %[u_buf]
@ -3383,32 +3538,28 @@ void OMITFP I210AlphaToARGBRow_AVX2(const uint16_t* y_buf,
int width) { int width) {
asm volatile( asm volatile(
YUVTORGB_SETUP_AVX2(yuvconstants) YUVTORGB_SETUP_AVX2(
"sub %[u_buf],%[v_buf] \n" yuvconstants) "sub %[u_buf],%[v_buf] \n"
LABELALIGN LABELALIGN "1: \n" READYUVA210_AVX2
"1: \n" YUVTORGB_AVX2(yuvconstants) STOREARGB_AVX2
READYUVA210_AVX2 "subl $0x10,%[width] \n"
YUVTORGB_AVX2(yuvconstants) "jg 1b \n"
STOREARGB_AVX2 "vzeroupper \n"
"subl $0x10,%[width] \n"
"jg 1b \n"
"vzeroupper \n"
: [y_buf] "+r"(y_buf), // %[y_buf] : [y_buf] "+r"(y_buf), // %[y_buf]
[u_buf] "+r"(u_buf), // %[u_buf] [u_buf] "+r"(u_buf), // %[u_buf]
[v_buf] "+r"(v_buf), // %[v_buf] [v_buf] "+r"(v_buf), // %[v_buf]
[a_buf] "+r"(a_buf), // %[a_buf] [a_buf] "+r"(a_buf), // %[a_buf]
[dst_argb] "+r"(dst_argb), // %[dst_argb] [dst_argb] "+r"(dst_argb), // %[dst_argb]
#if defined(__i386__) #if defined(__i386__)
[width]"+m"(width) // %[width] [width] "+m"(width) // %[width]
#else #else
[width]"+rm"(width) // %[width] [width] "+rm"(width) // %[width]
#endif #endif
: [yuvconstants] "r"(yuvconstants) // %[yuvconstants] : [yuvconstants] "r"(yuvconstants) // %[yuvconstants]
: "memory", "cc", YUVTORGB_REGS_AVX2 "xmm0", "xmm1", "xmm2", "xmm3", : "memory", "cc", YUVTORGB_REGS_AVX2 "xmm0", "xmm1", "xmm2", "xmm3",
"xmm4", "xmm5" "xmm4", "xmm5");
);
} }
#endif // HAS_I210TOARGBROW_AVX2 #endif // HAS_I210TOARGBROW_AVX2
@ -3424,32 +3575,28 @@ void OMITFP I410AlphaToARGBRow_AVX2(const uint16_t* y_buf,
int width) { int width) {
asm volatile( asm volatile(
YUVTORGB_SETUP_AVX2(yuvconstants) YUVTORGB_SETUP_AVX2(
"sub %[u_buf],%[v_buf] \n" yuvconstants) "sub %[u_buf],%[v_buf] \n"
LABELALIGN LABELALIGN "1: \n" READYUVA410_AVX2
"1: \n" YUVTORGB_AVX2(yuvconstants) STOREARGB_AVX2
READYUVA410_AVX2 "subl $0x10,%[width] \n"
YUVTORGB_AVX2(yuvconstants) "jg 1b \n"
STOREARGB_AVX2 "vzeroupper \n"
"subl $0x10,%[width] \n"
"jg 1b \n"
"vzeroupper \n"
: [y_buf] "+r"(y_buf), // %[y_buf] : [y_buf] "+r"(y_buf), // %[y_buf]
[u_buf] "+r"(u_buf), // %[u_buf] [u_buf] "+r"(u_buf), // %[u_buf]
[v_buf] "+r"(v_buf), // %[v_buf] [v_buf] "+r"(v_buf), // %[v_buf]
[a_buf] "+r"(a_buf), // %[a_buf] [a_buf] "+r"(a_buf), // %[a_buf]
[dst_argb] "+r"(dst_argb), // %[dst_argb] [dst_argb] "+r"(dst_argb), // %[dst_argb]
#if defined(__i386__) #if defined(__i386__)
[width]"+m"(width) // %[width] [width] "+m"(width) // %[width]
#else #else
[width]"+rm"(width) // %[width] [width] "+rm"(width) // %[width]
#endif #endif
: [yuvconstants] "r"(yuvconstants) // %[yuvconstants] : [yuvconstants] "r"(yuvconstants) // %[yuvconstants]
: "memory", "cc", YUVTORGB_REGS_AVX2 "xmm0", "xmm1", "xmm2", "xmm3", : "memory", "cc", YUVTORGB_REGS_AVX2 "xmm0", "xmm1", "xmm2", "xmm3",
"xmm4", "xmm5" "xmm4", "xmm5");
);
} }
#endif // HAS_I410TOARGBROW_AVX2 #endif // HAS_I410TOARGBROW_AVX2
@ -3465,23 +3612,23 @@ void OMITFP I410ToAR30Row_AVX2(const uint16_t* y_buf,
asm volatile ( asm volatile (
YUVTORGB_SETUP_AVX2(yuvconstants) YUVTORGB_SETUP_AVX2(yuvconstants)
"sub %[u_buf],%[v_buf] \n" "sub %[u_buf],%[v_buf] \n"
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // AR30 constants "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // AR30 constants
"vpsrlw $14,%%ymm5,%%ymm5 \n" "vpsrlw $14,%%ymm5,%%ymm5 \n"
"vpsllw $4,%%ymm5,%%ymm5 \n" // 2 alpha bits "vpsllw $4,%%ymm5,%%ymm5 \n" // 2 alpha bits
"vpxor %%ymm6,%%ymm6,%%ymm6 \n" // 0 for min "vpxor %%ymm6,%%ymm6,%%ymm6 \n" // 0 for min
"vpcmpeqb %%ymm7,%%ymm7,%%ymm7 \n" // 1023 for max "vpcmpeqb %%ymm7,%%ymm7,%%ymm7 \n" // 1023 for max
"vpsrlw $6,%%ymm7,%%ymm7 \n" "vpsrlw $6,%%ymm7,%%ymm7 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
READYUV410_AVX2 READYUV410_AVX2
YUVTORGB16_AVX2(yuvconstants) YUVTORGB16_AVX2(yuvconstants)
STOREAR30_AVX2 STOREAR30_AVX2
"sub $0x10,%[width] \n" "sub $0x10,%[width] \n"
"jg 1b \n" "jg 1b \n"
"vzeroupper \n" "vzeroupper \n"
: [y_buf]"+r"(y_buf), // %[y_buf] : [y_buf]"+r"(y_buf), // %[y_buf]
[u_buf]"+r"(u_buf), // %[u_buf] [u_buf]"+r"(u_buf), // %[u_buf]
[v_buf]"+r"(v_buf), // %[v_buf] [v_buf]"+r"(v_buf), // %[v_buf]
@ -3764,16 +3911,16 @@ void OMITFP P210ToARGBRow_AVX2(const uint16_t* y_buf,
// clang-format off // clang-format off
asm volatile ( asm volatile (
YUVTORGB_SETUP_AVX2(yuvconstants) YUVTORGB_SETUP_AVX2(yuvconstants)
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
READP210_AVX2 READP210_AVX2
YUVTORGB_AVX2(yuvconstants) YUVTORGB_AVX2(yuvconstants)
STOREARGB_AVX2 STOREARGB_AVX2
"sub $0x10,%[width] \n" "sub $0x10,%[width] \n"
"jg 1b \n" "jg 1b \n"
"vzeroupper \n" "vzeroupper \n"
: [y_buf]"+r"(y_buf), // %[y_buf] : [y_buf]"+r"(y_buf), // %[y_buf]
[uv_buf]"+r"(uv_buf), // %[uv_buf] [uv_buf]"+r"(uv_buf), // %[uv_buf]
[dst_argb]"+r"(dst_argb), // %[dst_argb] [dst_argb]"+r"(dst_argb), // %[dst_argb]
@ -3797,16 +3944,16 @@ void OMITFP P410ToARGBRow_AVX2(const uint16_t* y_buf,
// clang-format off // clang-format off
asm volatile ( asm volatile (
YUVTORGB_SETUP_AVX2(yuvconstants) YUVTORGB_SETUP_AVX2(yuvconstants)
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
READP410_AVX2 READP410_AVX2
YUVTORGB_AVX2(yuvconstants) YUVTORGB_AVX2(yuvconstants)
STOREARGB_AVX2 STOREARGB_AVX2
"sub $0x10,%[width] \n" "sub $0x10,%[width] \n"
"jg 1b \n" "jg 1b \n"
"vzeroupper \n" "vzeroupper \n"
: [y_buf]"+r"(y_buf), // %[y_buf] : [y_buf]"+r"(y_buf), // %[y_buf]
[uv_buf]"+r"(uv_buf), // %[uv_buf] [uv_buf]"+r"(uv_buf), // %[uv_buf]
[dst_argb]"+r"(dst_argb), // %[dst_argb] [dst_argb]"+r"(dst_argb), // %[dst_argb]
@ -3830,22 +3977,22 @@ void OMITFP P210ToAR30Row_AVX2(const uint16_t* y_buf,
asm volatile ( asm volatile (
YUVTORGB_SETUP_AVX2(yuvconstants) YUVTORGB_SETUP_AVX2(yuvconstants)
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // AR30 constants "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // AR30 constants
"vpsrlw $14,%%ymm5,%%ymm5 \n" "vpsrlw $14,%%ymm5,%%ymm5 \n"
"vpsllw $4,%%ymm5,%%ymm5 \n" // 2 alpha bits "vpsllw $4,%%ymm5,%%ymm5 \n" // 2 alpha bits
"vpxor %%ymm6,%%ymm6,%%ymm6 \n" // 0 for min "vpxor %%ymm6,%%ymm6,%%ymm6 \n" // 0 for min
"vpcmpeqb %%ymm7,%%ymm7,%%ymm7 \n" // 1023 for max "vpcmpeqb %%ymm7,%%ymm7,%%ymm7 \n" // 1023 for max
"vpsrlw $6,%%ymm7,%%ymm7 \n" "vpsrlw $6,%%ymm7,%%ymm7 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
READP210_AVX2 READP210_AVX2
YUVTORGB16_AVX2(yuvconstants) YUVTORGB16_AVX2(yuvconstants)
STOREAR30_AVX2 STOREAR30_AVX2
"sub $0x10,%[width] \n" "sub $0x10,%[width] \n"
"jg 1b \n" "jg 1b \n"
"vzeroupper \n" "vzeroupper \n"
: [y_buf]"+r"(y_buf), // %[y_buf] : [y_buf]"+r"(y_buf), // %[y_buf]
[uv_buf]"+r"(uv_buf), // %[uv_buf] [uv_buf]"+r"(uv_buf), // %[uv_buf]
[dst_ar30]"+r"(dst_ar30), // %[dst_ar30] [dst_ar30]"+r"(dst_ar30), // %[dst_ar30]
@ -3868,22 +4015,22 @@ void OMITFP P410ToAR30Row_AVX2(const uint16_t* y_buf,
asm volatile ( asm volatile (
YUVTORGB_SETUP_AVX2(yuvconstants) YUVTORGB_SETUP_AVX2(yuvconstants)
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // AR30 constants "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // AR30 constants
"vpsrlw $14,%%ymm5,%%ymm5 \n" "vpsrlw $14,%%ymm5,%%ymm5 \n"
"vpsllw $4,%%ymm5,%%ymm5 \n" // 2 alpha bits "vpsllw $4,%%ymm5,%%ymm5 \n" // 2 alpha bits
"vpxor %%ymm6,%%ymm6,%%ymm6 \n" // 0 for min "vpxor %%ymm6,%%ymm6,%%ymm6 \n" // 0 for min
"vpcmpeqb %%ymm7,%%ymm7,%%ymm7 \n" // 1023 for max "vpcmpeqb %%ymm7,%%ymm7,%%ymm7 \n" // 1023 for max
"vpsrlw $6,%%ymm7,%%ymm7 \n" "vpsrlw $6,%%ymm7,%%ymm7 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
READP410_AVX2 READP410_AVX2
YUVTORGB16_AVX2(yuvconstants) YUVTORGB16_AVX2(yuvconstants)
STOREAR30_AVX2 STOREAR30_AVX2
"sub $0x10,%[width] \n" "sub $0x10,%[width] \n"
"jg 1b \n" "jg 1b \n"
"vzeroupper \n" "vzeroupper \n"
: [y_buf]"+r"(y_buf), // %[y_buf] : [y_buf]"+r"(y_buf), // %[y_buf]
[uv_buf]"+r"(uv_buf), // %[uv_buf] [uv_buf]"+r"(uv_buf), // %[uv_buf]
[dst_ar30]"+r"(dst_ar30), // %[dst_ar30] [dst_ar30]"+r"(dst_ar30), // %[dst_ar30]
@ -4409,33 +4556,33 @@ void SplitUVRow_16_AVX2(const uint16_t* src_uv,
depth = 16 - depth; depth = 16 - depth;
// clang-format off // clang-format off
asm volatile ( asm volatile (
"vmovd %4,%%xmm3 \n" "vmovd %4,%%xmm3 \n"
"vpunpcklwd %%xmm3,%%xmm3,%%xmm3 \n" "vpunpcklwd %%xmm3,%%xmm3,%%xmm3 \n"
"vbroadcastss %%xmm3,%%xmm3 \n" "vbroadcastss %%xmm3,%%xmm3 \n"
"vbroadcastf128 %5,%%ymm4 \n" "vbroadcastf128 %5,%%ymm4 \n"
"sub %1,%2 \n" "sub %1,%2 \n"
// 16 pixels per loop. // 16 pixels per loop.
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"vmovdqu (%0),%%ymm0 \n" "vmovdqu (%0),%%ymm0 \n"
"vmovdqu 0x20(%0),%%ymm1 \n" "vmovdqu 0x20(%0),%%ymm1 \n"
"add $0x40,%0 \n" "add $0x40,%0 \n"
"vpsrlw %%xmm3,%%ymm0,%%ymm0 \n" "vpsrlw %%xmm3,%%ymm0,%%ymm0 \n"
"vpsrlw %%xmm3,%%ymm1,%%ymm1 \n" "vpsrlw %%xmm3,%%ymm1,%%ymm1 \n"
"vpshufb %%ymm4,%%ymm0,%%ymm0 \n" "vpshufb %%ymm4,%%ymm0,%%ymm0 \n"
"vpshufb %%ymm4,%%ymm1,%%ymm1 \n" "vpshufb %%ymm4,%%ymm1,%%ymm1 \n"
"vpermq $0xd8,%%ymm0,%%ymm0 \n" "vpermq $0xd8,%%ymm0,%%ymm0 \n"
"vpermq $0xd8,%%ymm1,%%ymm1 \n" "vpermq $0xd8,%%ymm1,%%ymm1 \n"
"vextractf128 $0x0,%%ymm0,(%1) \n" "vextractf128 $0x0,%%ymm0,(%1) \n"
"vextractf128 $0x0,%%ymm1,0x10(%1) \n" "vextractf128 $0x0,%%ymm1,0x10(%1) \n"
"vextractf128 $0x1,%%ymm0,(%1,%2) \n" "vextractf128 $0x1,%%ymm0,(%1,%2) \n"
"vextractf128 $0x1,%%ymm1,0x10(%1,%2) \n" "vextractf128 $0x1,%%ymm1,0x10(%1,%2) \n"
"add $0x20,%1 \n" "add $0x20,%1 \n"
"sub $0x10,%3 \n" "sub $0x10,%3 \n"
"jg 1b \n" "jg 1b \n"
"vzeroupper \n" "vzeroupper \n"
: "+r"(src_uv), // %0 : "+r"(src_uv), // %0
"+r"(dst_u), // %1 "+r"(dst_u), // %1
"+r"(dst_v), // %2 "+r"(dst_v), // %2
@ -4499,24 +4646,24 @@ void DivideRow_16_AVX2(const uint16_t* src_y,
int width) { int width) {
// clang-format off // clang-format off
asm volatile ( asm volatile (
"vmovd %3,%%xmm3 \n" "vmovd %3,%%xmm3 \n"
"vpunpcklwd %%xmm3,%%xmm3,%%xmm3 \n" "vpunpcklwd %%xmm3,%%xmm3,%%xmm3 \n"
"vbroadcastss %%xmm3,%%ymm3 \n" "vbroadcastss %%xmm3,%%ymm3 \n"
"sub %0,%1 \n" "sub %0,%1 \n"
// 32 pixels per loop. // 32 pixels per loop.
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"vmovdqu (%0),%%ymm0 \n" "vmovdqu (%0),%%ymm0 \n"
"vmovdqu 0x20(%0),%%ymm1 \n" "vmovdqu 0x20(%0),%%ymm1 \n"
"vpmulhuw %%ymm3,%%ymm0,%%ymm0 \n" "vpmulhuw %%ymm3,%%ymm0,%%ymm0 \n"
"vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n" "vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n"
"vmovdqu %%ymm0,(%0,%1) \n" "vmovdqu %%ymm0,(%0,%1) \n"
"vmovdqu %%ymm1,0x20(%0,%1) \n" "vmovdqu %%ymm1,0x20(%0,%1) \n"
"add $0x40,%0 \n" "add $0x40,%0 \n"
"sub $0x20,%2 \n" "sub $0x20,%2 \n"
"jg 1b \n" "jg 1b \n"
"vzeroupper \n" "vzeroupper \n"
: "+r"(src_y), // %0 : "+r"(src_y), // %0
"+r"(dst_y), // %1 "+r"(dst_y), // %1
"+r"(width), // %2 "+r"(width), // %2
@ -5173,7 +5320,7 @@ void SplitARGBRow_SSSE3(const uint8_t* src_argb,
#if defined(__i386__) #if defined(__i386__)
"+m"(width) // %5 "+m"(width) // %5
#else #else
"+rm"(width) // %5 "+rm"(width) // %5
#endif #endif
: "m"(kShuffleMaskARGBSplit) // %6 : "m"(kShuffleMaskARGBSplit) // %6
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3"); : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3");
@ -5264,7 +5411,7 @@ void SplitARGBRow_AVX2(const uint8_t* src_argb,
#if defined(__i386__) #if defined(__i386__)
"+m"(width) // %5 "+m"(width) // %5
#else #else
"+rm"(width) // %5 "+rm"(width) // %5
#endif #endif
: "m"(kShuffleMaskARGBSplit), // %6 : "m"(kShuffleMaskARGBSplit), // %6
"m"(kShuffleMaskARGBPermute) // %7 "m"(kShuffleMaskARGBPermute) // %7
@ -7981,7 +8128,7 @@ void HalfFloatRow_AVX2(const uint16_t* src,
#if defined(__x86_64__) #if defined(__x86_64__)
: "x"(scale) // %3 : "x"(scale) // %3
#else #else
: "m"(scale) // %3 : "m"(scale) // %3
#endif #endif
: "memory", "cc", "xmm2", "xmm3", "xmm4", "xmm5"); : "memory", "cc", "xmm2", "xmm3", "xmm4", "xmm5");
} }
@ -8019,7 +8166,7 @@ void HalfFloatRow_F16C(const uint16_t* src,
#if defined(__x86_64__) #if defined(__x86_64__)
: "x"(scale) // %3 : "x"(scale) // %3
#else #else
: "m"(scale) // %3 : "m"(scale) // %3
#endif #endif
: "memory", "cc", "xmm2", "xmm3", "xmm4"); : "memory", "cc", "xmm2", "xmm3", "xmm4");
} }

View File

@ -1619,7 +1619,7 @@ void ScalePlaneUp2_16_Bilinear(int src_width,
uint16_t* dst_ptr) { uint16_t* dst_ptr) {
void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride, void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride,
uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) = uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) =
ScaleRowUp2_Bilinear_16_Any_C; ScaleRowUp2_Bilinear_16_Any_C;
int x; int x;
// This function can only scale up by 2 times. // This function can only scale up by 2 times.

View File

@ -3167,67 +3167,11 @@ TEST_F(LibYUVConvertTest, ABGRToAR30Row_Opt) {
} }
#endif // HAS_ABGRTOAR30ROW_AVX2 #endif // HAS_ABGRTOAR30ROW_AVX2
// TODO(fbarchard): Fix clamping issue affected by U channel. // Provide matrix wrappers for 12 bit YUV
#define TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ #define I012ToARGB(a, b, c, d, e, f, g, h, i, j) \
ALIGN, YALIGN, W1280, N, NEG, SOFF, DOFF, S_DEPTH) \ I012ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j)
TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \ #define I012ToAR30(a, b, c, d, e, f, g, h, i, j) \
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ I012ToAR30Matrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j)
const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \
const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \
const int kBpc = 2; \
align_buffer_page_end(src_y, kWidth* kHeight* kBpc + SOFF); \
align_buffer_page_end(src_u, kSizeUV* kBpc + SOFF); \
align_buffer_page_end(src_v, kSizeUV* kBpc + SOFF); \
align_buffer_page_end(dst_argb_c, kStrideB* kHeight + DOFF); \
align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + DOFF); \
for (int i = 0; i < kWidth * kHeight; ++i) { \
reinterpret_cast<uint16_t*>(src_y + SOFF)[i] = \
(fastrand() & ((1 << S_DEPTH) - 1)); \
} \
for (int i = 0; i < kSizeUV; ++i) { \
reinterpret_cast<uint16_t*>(src_u + SOFF)[i] = \
(fastrand() & ((1 << S_DEPTH) - 1)); \
reinterpret_cast<uint16_t*>(src_v + SOFF)[i] = \
(fastrand() & ((1 << S_DEPTH) - 1)); \
} \
memset(dst_argb_c + DOFF, 1, kStrideB * kHeight); \
memset(dst_argb_opt + DOFF, 101, kStrideB * kHeight); \
MaskCpuFlags(disable_cpu_flags_); \
FMT_PLANAR##To##FMT_B( \
reinterpret_cast<uint16_t*>(src_y + SOFF), kWidth, \
reinterpret_cast<uint16_t*>(src_u + SOFF), kStrideUV, \
reinterpret_cast<uint16_t*>(src_v + SOFF), kStrideUV, \
dst_argb_c + DOFF, kStrideB, kWidth, NEG kHeight); \
MaskCpuFlags(benchmark_cpu_info_); \
for (int i = 0; i < benchmark_iterations_; ++i) { \
FMT_PLANAR##To##FMT_B( \
reinterpret_cast<uint16_t*>(src_y + SOFF), kWidth, \
reinterpret_cast<uint16_t*>(src_u + SOFF), kStrideUV, \
reinterpret_cast<uint16_t*>(src_v + SOFF), kStrideUV, \
dst_argb_opt + DOFF, kStrideB, kWidth, NEG kHeight); \
} \
for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) { \
EXPECT_EQ(dst_argb_c[i + DOFF], dst_argb_opt[i + DOFF]); \
} \
free_aligned_buffer_page_end(src_y); \
free_aligned_buffer_page_end(src_u); \
free_aligned_buffer_page_end(src_v); \
free_aligned_buffer_page_end(dst_argb_c); \
free_aligned_buffer_page_end(dst_argb_opt); \
}
#define TESTPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, S_DEPTH) \
TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, benchmark_width_ - 4, _Any, +, 0, 0, S_DEPTH) \
TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, benchmark_width_, _Unaligned, +, 1, 1, S_DEPTH) \
TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, benchmark_width_, _Invert, -, 0, 0, S_DEPTH) \
TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, benchmark_width_, _Opt, +, 0, 0, S_DEPTH)
#define I410ToARGB(a, b, c, d, e, f, g, h, i, j) \ #define I410ToARGB(a, b, c, d, e, f, g, h, i, j) \
I410ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j) I410ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j)
@ -3254,43 +3198,105 @@ TEST_F(LibYUVConvertTest, ABGRToAR30Row_Opt) {
#define U410ToAB30(a, b, c, d, e, f, g, h, i, j) \ #define U410ToAB30(a, b, c, d, e, f, g, h, i, j) \
I410ToAB30Matrix(a, b, c, d, e, f, g, h, &kYuv2020Constants, i, j) I410ToAB30Matrix(a, b, c, d, e, f, g, h, &kYuv2020Constants, i, j)
TESTPLANAR16TOB(I010, 2, 2, ARGB, 4, 4, 1, 10) // TODO(fbarchard): Fix clamping issue affected by U channel.
TESTPLANAR16TOB(I010, 2, 2, ABGR, 4, 4, 1, 10) #define TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, \
TESTPLANAR16TOB(H010, 2, 2, ARGB, 4, 4, 1, 10) BPP_B, ALIGN, YALIGN, W1280, N, NEG, SOFF, DOFF) \
TESTPLANAR16TOB(H010, 2, 2, ABGR, 4, 4, 1, 10) TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \
TESTPLANAR16TOB(U010, 2, 2, ARGB, 4, 4, 1, 10) const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
TESTPLANAR16TOB(U010, 2, 2, ABGR, 4, 4, 1, 10) const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \
TESTPLANAR16TOB(I210, 2, 1, ARGB, 4, 4, 1, 10) const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \
TESTPLANAR16TOB(I210, 2, 1, ABGR, 4, 4, 1, 10) const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
TESTPLANAR16TOB(H210, 2, 1, ARGB, 4, 4, 1, 10) const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \
TESTPLANAR16TOB(H210, 2, 1, ABGR, 4, 4, 1, 10) const int kBpc = 2; \
TESTPLANAR16TOB(U210, 2, 1, ARGB, 4, 4, 1, 10) align_buffer_page_end(src_y, kWidth* kHeight* kBpc + SOFF); \
TESTPLANAR16TOB(U210, 2, 1, ABGR, 4, 4, 1, 10) align_buffer_page_end(src_u, kSizeUV* kBpc + SOFF); \
TESTPLANAR16TOB(I410, 1, 1, ARGB, 4, 4, 1, 10) align_buffer_page_end(src_v, kSizeUV* kBpc + SOFF); \
TESTPLANAR16TOB(I410, 1, 1, ABGR, 4, 4, 1, 10) align_buffer_page_end(dst_argb_c, kStrideB* kHeight + DOFF); \
TESTPLANAR16TOB(H410, 1, 1, ARGB, 4, 4, 1, 10) align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + DOFF); \
TESTPLANAR16TOB(H410, 1, 1, ABGR, 4, 4, 1, 10) for (int i = 0; i < kWidth * kHeight; ++i) { \
TESTPLANAR16TOB(U410, 1, 1, ARGB, 4, 4, 1, 10) reinterpret_cast<uint16_t*>(src_y + SOFF)[i] = (fastrand() & FMT_MASK); \
TESTPLANAR16TOB(U410, 1, 1, ABGR, 4, 4, 1, 10) } \
for (int i = 0; i < kSizeUV; ++i) { \
reinterpret_cast<uint16_t*>(src_u + SOFF)[i] = (fastrand() & FMT_MASK); \
reinterpret_cast<uint16_t*>(src_v + SOFF)[i] = (fastrand() & FMT_MASK); \
} \
memset(dst_argb_c + DOFF, 1, kStrideB * kHeight); \
memset(dst_argb_opt + DOFF, 101, kStrideB * kHeight); \
MaskCpuFlags(disable_cpu_flags_); \
FMT_PLANAR##To##FMT_B( \
reinterpret_cast<uint16_t*>(src_y + SOFF), kWidth, \
reinterpret_cast<uint16_t*>(src_u + SOFF), kStrideUV, \
reinterpret_cast<uint16_t*>(src_v + SOFF), kStrideUV, \
dst_argb_c + DOFF, kStrideB, kWidth, NEG kHeight); \
MaskCpuFlags(benchmark_cpu_info_); \
for (int i = 0; i < benchmark_iterations_; ++i) { \
FMT_PLANAR##To##FMT_B( \
reinterpret_cast<uint16_t*>(src_y + SOFF), kWidth, \
reinterpret_cast<uint16_t*>(src_u + SOFF), kStrideUV, \
reinterpret_cast<uint16_t*>(src_v + SOFF), kStrideUV, \
dst_argb_opt + DOFF, kStrideB, kWidth, NEG kHeight); \
} \
for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) { \
EXPECT_EQ(dst_argb_c[i + DOFF], dst_argb_opt[i + DOFF]); \
} \
free_aligned_buffer_page_end(src_y); \
free_aligned_buffer_page_end(src_u); \
free_aligned_buffer_page_end(src_v); \
free_aligned_buffer_page_end(dst_argb_c); \
free_aligned_buffer_page_end(dst_argb_opt); \
}
#define TESTPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, \
BPP_B, ALIGN, YALIGN) \
TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, BPP_B, \
ALIGN, YALIGN, benchmark_width_ - 4, _Any, +, 0, 0) \
TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, BPP_B, \
ALIGN, YALIGN, benchmark_width_, _Unaligned, +, 1, 1) \
TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, BPP_B, \
ALIGN, YALIGN, benchmark_width_, _Invert, -, 0, 0) \
TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, BPP_B, \
ALIGN, YALIGN, benchmark_width_, _Opt, +, 0, 0)
TESTPLANAR16TOB(I010, 2, 2, 0x3ff, ARGB, 4, 4, 1)
TESTPLANAR16TOB(I010, 2, 2, 0x3ff, ABGR, 4, 4, 1)
TESTPLANAR16TOB(H010, 2, 2, 0x3ff, ARGB, 4, 4, 1)
TESTPLANAR16TOB(H010, 2, 2, 0x3ff, ABGR, 4, 4, 1)
TESTPLANAR16TOB(U010, 2, 2, 0x3ff, ARGB, 4, 4, 1)
TESTPLANAR16TOB(U010, 2, 2, 0x3ff, ABGR, 4, 4, 1)
TESTPLANAR16TOB(I210, 2, 1, 0x3ff, ARGB, 4, 4, 1)
TESTPLANAR16TOB(I210, 2, 1, 0x3ff, ABGR, 4, 4, 1)
TESTPLANAR16TOB(H210, 2, 1, 0x3ff, ARGB, 4, 4, 1)
TESTPLANAR16TOB(H210, 2, 1, 0x3ff, ABGR, 4, 4, 1)
TESTPLANAR16TOB(U210, 2, 1, 0x3ff, ARGB, 4, 4, 1)
TESTPLANAR16TOB(U210, 2, 1, 0x3ff, ABGR, 4, 4, 1)
TESTPLANAR16TOB(I410, 1, 1, 0x3ff, ARGB, 4, 4, 1)
TESTPLANAR16TOB(I410, 1, 1, 0x3ff, ABGR, 4, 4, 1)
TESTPLANAR16TOB(H410, 1, 1, 0x3ff, ARGB, 4, 4, 1)
TESTPLANAR16TOB(H410, 1, 1, 0x3ff, ABGR, 4, 4, 1)
TESTPLANAR16TOB(U410, 1, 1, 0x3ff, ARGB, 4, 4, 1)
TESTPLANAR16TOB(U410, 1, 1, 0x3ff, ABGR, 4, 4, 1)
TESTPLANAR16TOB(I012, 2, 2, 0xfff, ARGB, 4, 4, 1)
#ifdef LITTLE_ENDIAN_ONLY_TEST #ifdef LITTLE_ENDIAN_ONLY_TEST
TESTPLANAR16TOB(I010, 2, 2, AR30, 4, 4, 1, 10) TESTPLANAR16TOB(I010, 2, 2, 0x3ff, AR30, 4, 4, 1)
TESTPLANAR16TOB(I010, 2, 2, AB30, 4, 4, 1, 10) TESTPLANAR16TOB(I010, 2, 2, 0x3ff, AB30, 4, 4, 1)
TESTPLANAR16TOB(H010, 2, 2, AR30, 4, 4, 1, 10) TESTPLANAR16TOB(H010, 2, 2, 0x3ff, AR30, 4, 4, 1)
TESTPLANAR16TOB(H010, 2, 2, AB30, 4, 4, 1, 10) TESTPLANAR16TOB(H010, 2, 2, 0x3ff, AB30, 4, 4, 1)
TESTPLANAR16TOB(U010, 2, 2, AR30, 4, 4, 1, 10) TESTPLANAR16TOB(U010, 2, 2, 0x3ff, AR30, 4, 4, 1)
TESTPLANAR16TOB(U010, 2, 2, AB30, 4, 4, 1, 10) TESTPLANAR16TOB(U010, 2, 2, 0x3ff, AB30, 4, 4, 1)
TESTPLANAR16TOB(I210, 2, 1, AR30, 4, 4, 1, 10) TESTPLANAR16TOB(I210, 2, 1, 0x3ff, AR30, 4, 4, 1)
TESTPLANAR16TOB(I210, 2, 1, AB30, 4, 4, 1, 10) TESTPLANAR16TOB(I210, 2, 1, 0x3ff, AB30, 4, 4, 1)
TESTPLANAR16TOB(H210, 2, 1, AR30, 4, 4, 1, 10) TESTPLANAR16TOB(H210, 2, 1, 0x3ff, AR30, 4, 4, 1)
TESTPLANAR16TOB(H210, 2, 1, AB30, 4, 4, 1, 10) TESTPLANAR16TOB(H210, 2, 1, 0x3ff, AB30, 4, 4, 1)
TESTPLANAR16TOB(U210, 2, 1, AR30, 4, 4, 1, 10) TESTPLANAR16TOB(U210, 2, 1, 0x3ff, AR30, 4, 4, 1)
TESTPLANAR16TOB(U210, 2, 1, AB30, 4, 4, 1, 10) TESTPLANAR16TOB(U210, 2, 1, 0x3ff, AB30, 4, 4, 1)
TESTPLANAR16TOB(I410, 1, 1, AR30, 4, 4, 1, 10) TESTPLANAR16TOB(I410, 1, 1, 0x3ff, AR30, 4, 4, 1)
TESTPLANAR16TOB(I410, 1, 1, AB30, 4, 4, 1, 10) TESTPLANAR16TOB(I410, 1, 1, 0x3ff, AB30, 4, 4, 1)
TESTPLANAR16TOB(H410, 1, 1, AR30, 4, 4, 1, 10) TESTPLANAR16TOB(H410, 1, 1, 0x3ff, AR30, 4, 4, 1)
TESTPLANAR16TOB(H410, 1, 1, AB30, 4, 4, 1, 10) TESTPLANAR16TOB(H410, 1, 1, 0x3ff, AB30, 4, 4, 1)
TESTPLANAR16TOB(U410, 1, 1, AR30, 4, 4, 1, 10) TESTPLANAR16TOB(U410, 1, 1, 0x3ff, AR30, 4, 4, 1)
TESTPLANAR16TOB(U410, 1, 1, AB30, 4, 4, 1, 10) TESTPLANAR16TOB(U410, 1, 1, 0x3ff, AB30, 4, 4, 1)
TESTPLANAR16TOB(I012, 2, 2, 0xfff, AR30, 4, 4, 1)
#endif #endif
#define TESTQPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ #define TESTQPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \

View File

@ -302,7 +302,7 @@ TEST_FACTOR(3, 1, 3)
TEST_SCALETO1(name, width, height, Bilinear, 3) TEST_SCALETO1(name, width, height, Bilinear, 3)
TEST_SCALETO(ARGBScale, 1, 1) TEST_SCALETO(ARGBScale, 1, 1)
TEST_SCALETO(ARGBScale, 256, 144) /* 128x72 * 2 */ TEST_SCALETO(ARGBScale, 256, 144) /* 128x72 * 2 */
TEST_SCALETO(ARGBScale, 320, 240) TEST_SCALETO(ARGBScale, 320, 240)
TEST_SCALETO(ARGBScale, 569, 480) TEST_SCALETO(ARGBScale, 569, 480)
TEST_SCALETO(ARGBScale, 640, 360) TEST_SCALETO(ARGBScale, 640, 360)

View File

@ -1025,7 +1025,7 @@ TEST_FACTOR(3, 1, 3, 0)
#endif #endif
TEST_SCALETO(Scale, 1, 1) TEST_SCALETO(Scale, 1, 1)
TEST_SCALETO(Scale, 256, 144) /* 128x72 * 2 */ TEST_SCALETO(Scale, 256, 144) /* 128x72 * 2 */
TEST_SCALETO(Scale, 320, 240) TEST_SCALETO(Scale, 320, 240)
TEST_SCALETO(Scale, 569, 480) TEST_SCALETO(Scale, 569, 480)
TEST_SCALETO(Scale, 640, 360) TEST_SCALETO(Scale, 640, 360)

View File

@ -166,7 +166,7 @@ TEST_FACTOR(3, 1, 3)
TEST_SCALETO1(name, width, height, Bilinear, 3) TEST_SCALETO1(name, width, height, Bilinear, 3)
TEST_SCALETO(UVScale, 1, 1) TEST_SCALETO(UVScale, 1, 1)
TEST_SCALETO(UVScale, 256, 144) /* 128x72 * 2 */ TEST_SCALETO(UVScale, 256, 144) /* 128x72 * 2 */
TEST_SCALETO(UVScale, 320, 240) TEST_SCALETO(UVScale, 320, 240)
TEST_SCALETO(UVScale, 569, 480) TEST_SCALETO(UVScale, 569, 480)
TEST_SCALETO(UVScale, 640, 360) TEST_SCALETO(UVScale, 640, 360)

View File

@ -26,9 +26,13 @@ unsigned int fastrand_seed = 0xfb;
ABSL_FLAG(int32_t, libyuv_width, 0, "width of test image."); ABSL_FLAG(int32_t, libyuv_width, 0, "width of test image.");
ABSL_FLAG(int32_t, libyuv_height, 0, "height of test image."); ABSL_FLAG(int32_t, libyuv_height, 0, "height of test image.");
ABSL_FLAG(int32_t, libyuv_repeat, 0, "number of times to repeat test."); ABSL_FLAG(int32_t, libyuv_repeat, 0, "number of times to repeat test.");
ABSL_FLAG(int32_t, libyuv_flags, 0, ABSL_FLAG(int32_t,
libyuv_flags,
0,
"cpu flags for reference code. 1 = C, -1 = SIMD"); "cpu flags for reference code. 1 = C, -1 = SIMD");
ABSL_FLAG(int32_t, libyuv_cpu_info, 0, ABSL_FLAG(int32_t,
libyuv_cpu_info,
0,
"cpu flags for benchmark code. 1 = C, -1 = SIMD"); "cpu flags for benchmark code. 1 = C, -1 = SIMD");
#else #else
// Disable command line parameters if absl/flags disabled. // Disable command line parameters if absl/flags disabled.