mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 01:06:46 +08:00
Add 12 bit YUV to 10 bit RGB
Bug: libyuv:843 Change-Id: I0104c8fcaeed09e83d2fd654c6a5e7d41bcb74cf Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/2727775 Reviewed-by: Frank Barchard <fbarchard@chromium.org> Reviewed-by: Wan-Teh Chang <wtc@google.com>
This commit is contained in:
parent
95ff456c33
commit
ba033a11e3
@ -4,7 +4,9 @@ Formats (FOURCC) supported by libyuv are detailed here.
|
||||
|
||||
# Core Formats
|
||||
|
||||
There are 2 core formats supported by libyuv - I420 and ARGB. All YUV formats can be converted to/from I420. All RGB formats can be converted to/from ARGB.
|
||||
There are 2 core formats supported by libyuv - I420 and ARGB.
|
||||
All YUV formats can be converted to/from I420.
|
||||
All RGB formats can be converted to/from ARGB.
|
||||
|
||||
Filtering functions such as scaling and planar functions work on I420 and/or ARGB.
|
||||
|
||||
@ -109,6 +111,27 @@ The following is extracted from video_common.h as a complete list of formats sup
|
||||
I444, NV24 and NV42 are full width, full height
|
||||
I400 and J400 have no chroma channel.
|
||||
|
||||
# Color space
|
||||
The YUV formats start with a letter to specify the color space. e.g. I420
|
||||
I = BT.601 limited range
|
||||
J = BT.601 full range (J = JPEG that uses this)
|
||||
H = BT.709 limited range (H for HD)
|
||||
F = BT.709 full range (F for Full range)
|
||||
U = BT.2020 limited range (U for UHD)
|
||||
V = BT.2020 full range
|
||||
For YUV to RGB conversions, a matrix can be passed. See also convert_argh.h
|
||||
|
||||
# HDR formats
|
||||
Planar formats with 10 or 12 bits use the following fourcc:
|
||||
I010, I012, P010, P012 are half width, half height
|
||||
I210, I212, P210, P212 are half width, full height
|
||||
I410, I412, P410, P412 are full width, full height
|
||||
where
|
||||
I is the color space (see above) and 3 planes: Y, U and V.
|
||||
P is a biplanar format, similar to NV12 but 16 bits, with the valid bits in the high bits. There is a Y plane and a UV plane.
|
||||
0, 2 or 4 is the last digit of subsampling: 4:2:0, 4:2:2, or 4:4:4
|
||||
10 or 12 is the bits per channel. The bits are in the low bits of a 16 bit channel.
|
||||
|
||||
# The ARGB FOURCC
|
||||
|
||||
There are 4 ARGB layouts - ARGB, BGRA, ABGR and RGBA. ARGB is most common by far, used for screen formats, and windows webcam drivers.
|
||||
|
||||
@ -1488,6 +1488,34 @@ int I010ToARGBMatrix(const uint16_t* src_y,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// multiply 12 bit yuv into high bits to allow any number of bits.
|
||||
LIBYUV_API
|
||||
int I012ToAR30Matrix(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_ar30,
|
||||
int dst_stride_ar30,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert 12 bit YUV to ARGB with matrix.
|
||||
LIBYUV_API
|
||||
int I012ToARGBMatrix(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert 10 bit 422 YUV to ARGB with matrix.
|
||||
LIBYUV_API
|
||||
int I210ToARGBMatrix(const uint16_t* src_y,
|
||||
|
||||
@ -282,6 +282,8 @@ extern "C" {
|
||||
#define HAS_HALFMERGEUVROW_SSSE3
|
||||
#define HAS_I210TOAR30ROW_SSSE3
|
||||
#define HAS_I210TOARGBROW_SSSE3
|
||||
#define HAS_I212TOAR30ROW_SSSE3
|
||||
#define HAS_I212TOARGBROW_SSSE3
|
||||
#define HAS_I400TOARGBROW_SSE2
|
||||
#define HAS_I422TOAR30ROW_SSSE3
|
||||
#define HAS_I410TOAR30ROW_SSSE3
|
||||
@ -320,6 +322,8 @@ extern "C" {
|
||||
#define HAS_MERGEARGBROW_AVX2
|
||||
#define HAS_I210TOAR30ROW_AVX2
|
||||
#define HAS_I210TOARGBROW_AVX2
|
||||
#define HAS_I212TOAR30ROW_AVX2
|
||||
#define HAS_I212TOARGBROW_AVX2
|
||||
#define HAS_I400TOARGBROW_AVX2
|
||||
#define HAS_I410TOAR30ROW_AVX2
|
||||
#define HAS_I410TOARGBROW_AVX2
|
||||
@ -721,9 +725,15 @@ struct YuvConstants {
|
||||
#else
|
||||
// This struct is for Intel color conversion.
|
||||
struct YuvConstants {
|
||||
#if LIBYUV_UNLIMITED_DATA
|
||||
uint8_t kUVToB[32];
|
||||
uint8_t kUVToG[32];
|
||||
uint8_t kUVToR[32];
|
||||
#else
|
||||
int8_t kUVToB[32];
|
||||
int8_t kUVToG[32];
|
||||
int8_t kUVToR[32];
|
||||
#endif
|
||||
int16_t kUVBiasB[16];
|
||||
int16_t kUVBiasG[16];
|
||||
int16_t kUVBiasR[16];
|
||||
@ -2591,6 +2601,18 @@ void I210ToARGBRow_C(const uint16_t* src_y,
|
||||
uint8_t* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I212ToAR30Row_C(const uint16_t* src_y,
|
||||
const uint16_t* src_u,
|
||||
const uint16_t* src_v,
|
||||
uint8_t* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I212ToARGBRow_C(const uint16_t* src_y,
|
||||
const uint16_t* src_u,
|
||||
const uint16_t* src_v,
|
||||
uint8_t* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I410ToAR30Row_C(const uint16_t* src_y,
|
||||
const uint16_t* src_u,
|
||||
const uint16_t* src_v,
|
||||
@ -2617,7 +2639,6 @@ void I410AlphaToARGBRow_C(const uint16_t* src_y,
|
||||
uint8_t* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
|
||||
void I444AlphaToARGBRow_C(const uint8_t* src_y,
|
||||
const uint8_t* src_u,
|
||||
const uint8_t* src_v,
|
||||
@ -2769,6 +2790,18 @@ void I210ToARGBRow_SSSE3(const uint16_t* y_buf,
|
||||
uint8_t* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I212ToAR30Row_SSSE3(const uint16_t* y_buf,
|
||||
const uint16_t* u_buf,
|
||||
const uint16_t* v_buf,
|
||||
uint8_t* dst_ar30,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I212ToARGBRow_SSSE3(const uint16_t* y_buf,
|
||||
const uint16_t* u_buf,
|
||||
const uint16_t* v_buf,
|
||||
uint8_t* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I410ToAR30Row_SSSE3(const uint16_t* src_y,
|
||||
const uint16_t* src_u,
|
||||
const uint16_t* src_v,
|
||||
@ -2813,6 +2846,18 @@ void I210ToAR30Row_AVX2(const uint16_t* y_buf,
|
||||
uint8_t* dst_ar30,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I212ToARGBRow_AVX2(const uint16_t* y_buf,
|
||||
const uint16_t* u_buf,
|
||||
const uint16_t* v_buf,
|
||||
uint8_t* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I212ToAR30Row_AVX2(const uint16_t* y_buf,
|
||||
const uint16_t* u_buf,
|
||||
const uint16_t* v_buf,
|
||||
uint8_t* dst_ar30,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I410ToAR30Row_AVX2(const uint16_t* src_y,
|
||||
const uint16_t* src_u,
|
||||
const uint16_t* src_v,
|
||||
@ -3081,6 +3126,18 @@ void I210ToARGBRow_Any_SSSE3(const uint16_t* y_buf,
|
||||
uint8_t* dst_ptr,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I212ToAR30Row_Any_SSSE3(const uint16_t* y_buf,
|
||||
const uint16_t* u_buf,
|
||||
const uint16_t* v_buf,
|
||||
uint8_t* dst_ptr,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I212ToARGBRow_Any_SSSE3(const uint16_t* y_buf,
|
||||
const uint16_t* u_buf,
|
||||
const uint16_t* v_buf,
|
||||
uint8_t* dst_ptr,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I410ToAR30Row_Any_SSSE3(const uint16_t* src_y,
|
||||
const uint16_t* src_u,
|
||||
const uint16_t* src_v,
|
||||
@ -3125,6 +3182,18 @@ void I210ToAR30Row_Any_AVX2(const uint16_t* y_buf,
|
||||
uint8_t* dst_ptr,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I212ToARGBRow_Any_AVX2(const uint16_t* y_buf,
|
||||
const uint16_t* u_buf,
|
||||
const uint16_t* v_buf,
|
||||
uint8_t* dst_ptr,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I212ToAR30Row_Any_AVX2(const uint16_t* y_buf,
|
||||
const uint16_t* u_buf,
|
||||
const uint16_t* v_buf,
|
||||
uint8_t* dst_ptr,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I410ToAR30Row_Any_AVX2(const uint16_t* src_y,
|
||||
const uint16_t* src_u,
|
||||
const uint16_t* src_v,
|
||||
|
||||
@ -888,6 +888,63 @@ int U010ToAB30(const uint16_t* src_y,
|
||||
&kYuv2020Constants, width, height);
|
||||
}
|
||||
|
||||
// Convert 12 bit YUV to ARGB with matrix.
|
||||
// TODO(fbarchard): Consider passing scale multiplier to I212ToARGB to
|
||||
// multiply 12 bit yuv into high bits to allow any number of bits.
|
||||
LIBYUV_API
|
||||
int I012ToAR30Matrix(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_ar30,
|
||||
int dst_stride_ar30,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
void (*I212ToAR30Row)(const uint16_t* y_buf, const uint16_t* u_buf,
|
||||
const uint16_t* v_buf, uint8_t* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants, int width) =
|
||||
I212ToAR30Row_C;
|
||||
if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
dst_ar30 = dst_ar30 + (height - 1) * dst_stride_ar30;
|
||||
dst_stride_ar30 = -dst_stride_ar30;
|
||||
}
|
||||
#if defined(HAS_I212TOAR30ROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
I212ToAR30Row = I212ToAR30Row_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I212ToAR30Row = I212ToAR30Row_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I212TOAR30ROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
I212ToAR30Row = I212ToAR30Row_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
I212ToAR30Row = I212ToAR30Row_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
for (y = 0; y < height; ++y) {
|
||||
I212ToAR30Row(src_y, src_u, src_v, dst_ar30, yuvconstants, width);
|
||||
dst_ar30 += dst_stride_ar30;
|
||||
src_y += src_stride_y;
|
||||
if (y & 1) {
|
||||
src_u += src_stride_u;
|
||||
src_v += src_stride_v;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert 10 bit YUV to ARGB with matrix.
|
||||
// TODO(fbarchard): Consider passing scale multiplier to I210ToARGB to
|
||||
// multiply 10 bit yuv into high bits to allow any number of bits.
|
||||
@ -1260,6 +1317,61 @@ int U010ToABGR(const uint16_t* src_y,
|
||||
width, height);
|
||||
}
|
||||
|
||||
// Convert 12 bit YUV to ARGB with matrix.
|
||||
LIBYUV_API
|
||||
int I012ToARGBMatrix(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
void (*I212ToARGBRow)(const uint16_t* y_buf, const uint16_t* u_buf,
|
||||
const uint16_t* v_buf, uint8_t* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants, int width) =
|
||||
I212ToARGBRow_C;
|
||||
if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
|
||||
dst_stride_argb = -dst_stride_argb;
|
||||
}
|
||||
#if defined(HAS_I212TOARGBROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
I212ToARGBRow = I212ToARGBRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I212ToARGBRow = I212ToARGBRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I212TOARGBROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
I212ToARGBRow = I212ToARGBRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
I212ToARGBRow = I212ToARGBRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
for (y = 0; y < height; ++y) {
|
||||
I212ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width);
|
||||
dst_argb += dst_stride_argb;
|
||||
src_y += src_stride_y;
|
||||
if (y & 1) {
|
||||
src_u += src_stride_u;
|
||||
src_v += src_stride_v;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert 10 bit 422 YUV to ARGB with matrix.
|
||||
LIBYUV_API
|
||||
int I210ToARGBMatrix(const uint16_t* src_y,
|
||||
@ -1484,9 +1596,9 @@ int P010ToARGBMatrix(const uint16_t* src_y,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
void (*P210ToARGBRow)(const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants, int width) =
|
||||
P210ToARGBRow_C;
|
||||
void (*P210ToARGBRow)(
|
||||
const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants, int width) = P210ToARGBRow_C;
|
||||
if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
@ -1534,9 +1646,9 @@ int P210ToARGBMatrix(const uint16_t* src_y,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
void (*P210ToARGBRow)(const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants, int width) =
|
||||
P210ToARGBRow_C;
|
||||
void (*P210ToARGBRow)(
|
||||
const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants, int width) = P210ToARGBRow_C;
|
||||
if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
@ -1582,9 +1694,9 @@ int P010ToAR30Matrix(const uint16_t* src_y,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
void (*P210ToAR30Row)(const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants, int width) =
|
||||
P210ToAR30Row_C;
|
||||
void (*P210ToAR30Row)(
|
||||
const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants, int width) = P210ToAR30Row_C;
|
||||
if (!src_y || !src_uv || !dst_ar30 || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
@ -1632,9 +1744,9 @@ int P210ToAR30Matrix(const uint16_t* src_y,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
void (*P210ToAR30Row)(const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants, int width) =
|
||||
P210ToAR30Row_C;
|
||||
void (*P210ToAR30Row)(
|
||||
const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants, int width) = P210ToAR30Row_C;
|
||||
if (!src_y || !src_uv || !dst_ar30 || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@ -138,19 +138,47 @@ ANY41C(I422AlphaToARGBRow_Any_MMI, I422AlphaToARGBRow_MMI, 1, 0, 4, 7)
|
||||
}
|
||||
|
||||
#ifdef HAS_I210ALPHATOARGBROW_SSSE3
|
||||
ANY41CT(I210AlphaToARGBRow_Any_SSSE3, I210AlphaToARGBRow_SSSE3, 1, 0, uint16_t, 2, 4, 7)
|
||||
ANY41CT(I210AlphaToARGBRow_Any_SSSE3,
|
||||
I210AlphaToARGBRow_SSSE3,
|
||||
1,
|
||||
0,
|
||||
uint16_t,
|
||||
2,
|
||||
4,
|
||||
7)
|
||||
#endif
|
||||
|
||||
#ifdef HAS_I210ALPHATOARGBROW_AVX2
|
||||
ANY41CT(I210AlphaToARGBRow_Any_AVX2, I210AlphaToARGBRow_AVX2, 1, 0, uint16_t, 2, 4, 15)
|
||||
ANY41CT(I210AlphaToARGBRow_Any_AVX2,
|
||||
I210AlphaToARGBRow_AVX2,
|
||||
1,
|
||||
0,
|
||||
uint16_t,
|
||||
2,
|
||||
4,
|
||||
15)
|
||||
#endif
|
||||
|
||||
#ifdef HAS_I410ALPHATOARGBROW_SSSE3
|
||||
ANY41CT(I410AlphaToARGBRow_Any_SSSE3, I410AlphaToARGBRow_SSSE3, 0, 0, uint16_t, 2, 4, 7)
|
||||
ANY41CT(I410AlphaToARGBRow_Any_SSSE3,
|
||||
I410AlphaToARGBRow_SSSE3,
|
||||
0,
|
||||
0,
|
||||
uint16_t,
|
||||
2,
|
||||
4,
|
||||
7)
|
||||
#endif
|
||||
|
||||
#ifdef HAS_I410ALPHATOARGBROW_AVX2
|
||||
ANY41CT(I410AlphaToARGBRow_Any_AVX2, I410AlphaToARGBRow_AVX2, 0, 0, uint16_t, 2, 4, 15)
|
||||
ANY41CT(I410AlphaToARGBRow_Any_AVX2,
|
||||
I410AlphaToARGBRow_AVX2,
|
||||
0,
|
||||
0,
|
||||
uint16_t,
|
||||
2,
|
||||
4,
|
||||
15)
|
||||
#endif
|
||||
|
||||
#undef ANY41CT
|
||||
@ -382,6 +410,18 @@ ANY31CT(I410ToAR30Row_Any_AVX2, I410ToAR30Row_AVX2, 0, 0, uint16_t, 2, 4, 15)
|
||||
#ifdef HAS_I210TOARGBROW_MMI
|
||||
ANY31CT(I210ToARGBRow_Any_MMI, I210ToARGBRow_MMI, 1, 0, uint16_t, 2, 4, 7)
|
||||
#endif
|
||||
#ifdef HAS_I212TOAR30ROW_SSSE3
|
||||
ANY31CT(I212ToAR30Row_Any_SSSE3, I212ToAR30Row_SSSE3, 1, 0, uint16_t, 2, 4, 7)
|
||||
#endif
|
||||
#ifdef HAS_I212TOARGBROW_SSSE3
|
||||
ANY31CT(I212ToARGBRow_Any_SSSE3, I212ToARGBRow_SSSE3, 1, 0, uint16_t, 2, 4, 7)
|
||||
#endif
|
||||
#ifdef HAS_I212TOARGBROW_AVX2
|
||||
ANY31CT(I212ToARGBRow_Any_AVX2, I212ToARGBRow_AVX2, 1, 0, uint16_t, 2, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_I212TOAR30ROW_AVX2
|
||||
ANY31CT(I212ToAR30Row_Any_AVX2, I212ToAR30Row_AVX2, 1, 0, uint16_t, 2, 4, 15)
|
||||
#endif
|
||||
#undef ANY31CT
|
||||
|
||||
// Any 2 planes to 1.
|
||||
|
||||
@ -21,6 +21,11 @@ namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// These 2 macros control YUV to RGB using unsigned math to extend range.
|
||||
// They can be used separately to enable new code and old data (clamped)
|
||||
// LIBYUV_UNLIMITED_DATA
|
||||
// LIBYUV_UNLIMITED_CODE
|
||||
|
||||
// The following ifdef from row_win makes the C code match the row_win code,
|
||||
// which is 7 bit fixed point.
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) && \
|
||||
@ -1395,7 +1400,11 @@ void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width) {
|
||||
// KR = 0.299; KB = 0.114
|
||||
|
||||
// U and V contributions to R,G,B.
|
||||
#if LIBYUV_UNLIMITED_DATA
|
||||
#define UB 129 /* round(2.018 * 64) */
|
||||
#else
|
||||
#define UB 128 /* max(128, round(2.018 * 64)) */
|
||||
#endif
|
||||
#define UG 25 /* round(0.391 * 64) */
|
||||
#define VG 52 /* round(0.813 * 64) */
|
||||
#define VR 102 /* round(1.596 * 64) */
|
||||
@ -1444,9 +1453,12 @@ MAKEYUVCONSTANTS(JPEG, YG, YB, UB, UG, VG, VR, BB, BG, BR)
|
||||
// B = (Y - 16) * 1.164 + U * 2.112
|
||||
// KR = 0.2126, KB = 0.0722
|
||||
|
||||
// TODO(fbarchard): Find way to express 2.112 instead of 2.0.
|
||||
// U and V contributions to R,G,B.
|
||||
#if LIBYUV_UNLIMITED_DATA
|
||||
#define UB 135 /* round(2.112 * 64) */
|
||||
#else
|
||||
#define UB 128 /* max(128, round(2.112 * 64)) */
|
||||
#endif
|
||||
#define UG 14 /* round(0.213 * 64) */
|
||||
#define VG 34 /* round(0.533 * 64) */
|
||||
#define VR 115 /* round(1.793 * 64) */
|
||||
@ -1495,9 +1507,12 @@ MAKEYUVCONSTANTS(F709, YG, YB, UB, UG, VG, VR, BB, BG, BR)
|
||||
// B = (Y - 16) * 1.164384 + U * 2.14177
|
||||
// KR = 0.2627; KB = 0.0593
|
||||
|
||||
// TODO(fbarchard): Improve accuracy; the B channel is off by 7%.
|
||||
// U and V contributions to R,G,B.
|
||||
#if LIBYUV_UNLIMITED_DATA
|
||||
#define UB 137 /* round(2.142 * 64) */
|
||||
#else
|
||||
#define UB 128 /* max(128, round(2.142 * 64)) */
|
||||
#endif
|
||||
#define UG 12 /* round(0.187326 * 64) */
|
||||
#define VG 42 /* round(0.65042 * 64) */
|
||||
#define VR 107 /* round(1.67867 * 64) */
|
||||
@ -1545,9 +1560,55 @@ MAKEYUVCONSTANTS(V2020, YG, YB, UB, UG, VG, VR, BB, BG, BR)
|
||||
|
||||
#undef MAKEYUVCONSTANTS
|
||||
|
||||
#if LIBYUV_UNLIMITED_DATA
|
||||
|
||||
// C reference code that mimics the YUV assembly.
|
||||
// Reads 8 bit YUV and leaves result as 16 bit.
|
||||
static __inline void YuvPixel(uint8_t y,
|
||||
uint8_t u,
|
||||
uint8_t v,
|
||||
uint8_t* b,
|
||||
uint8_t* g,
|
||||
uint8_t* r,
|
||||
const struct YuvConstants* yuvconstants) {
|
||||
#if defined(__aarch64__)
|
||||
int ub = yuvconstants->kUVToRB[0];
|
||||
int ug = yuvconstants->kUVToG[0];
|
||||
int vg = yuvconstants->kUVToG[1];
|
||||
int vr = yuvconstants->kUVToRB[1];
|
||||
int bb = yuvconstants->kUVBiasBGR[0];
|
||||
int bg = yuvconstants->kUVBiasBGR[1];
|
||||
int br = yuvconstants->kUVBiasBGR[2];
|
||||
int yg = yuvconstants->kYToRgb[1];
|
||||
#elif defined(__arm__)
|
||||
int ub = yuvconstants->kUVToRB[0];
|
||||
int ug = yuvconstants->kUVToG[0];
|
||||
int vg = yuvconstants->kUVToG[4];
|
||||
int vr = yuvconstants->kUVToRB[4];
|
||||
int bb = yuvconstants->kUVBiasBGR[0];
|
||||
int bg = yuvconstants->kUVBiasBGR[1];
|
||||
int br = yuvconstants->kUVBiasBGR[2];
|
||||
int yg = yuvconstants->kYToRgb[1];
|
||||
#else
|
||||
int ub = -yuvconstants->kUVToB[0];
|
||||
int ug = yuvconstants->kUVToG[0];
|
||||
int vg = yuvconstants->kUVToG[1];
|
||||
int vr = -yuvconstants->kUVToR[1];
|
||||
int bb = yuvconstants->kUVBiasB[0];
|
||||
int bg = yuvconstants->kUVBiasG[0];
|
||||
int br = yuvconstants->kUVBiasR[0];
|
||||
int yg = yuvconstants->kYToRgb[0];
|
||||
#endif
|
||||
|
||||
uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16;
|
||||
*b = Clamp((int32_t)(y1 + (u * ub) + bb) >> 6);
|
||||
*g = Clamp((int32_t)(y1 - (u * ug + v * vg) + bg) >> 6);
|
||||
*r = Clamp((int32_t)(y1 + (v * vr) + br) >> 6);
|
||||
}
|
||||
#else
|
||||
// C reference code that mimics the YUV assembly.
|
||||
// Reads 8 bit YUV and leaves result as 8 bit.
|
||||
static __inline void YuvPixel8_8(uint8_t y,
|
||||
static __inline void YuvPixel(uint8_t y,
|
||||
uint8_t u,
|
||||
uint8_t v,
|
||||
uint8_t* b,
|
||||
@ -1584,10 +1645,11 @@ static __inline void YuvPixel8_8(uint8_t y,
|
||||
#endif
|
||||
|
||||
uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16;
|
||||
*b = Clamp((int32_t)(y1 + -(u * ub) + bb) >> 6);
|
||||
*g = Clamp((int32_t)(y1 + -(u * ug + v * vg) + bg) >> 6);
|
||||
*r = Clamp((int32_t)(y1 + -(v * vr) + br) >> 6);
|
||||
*b = Clamp((int32_t)(y1 - (u * ub) + bb) >> 6);
|
||||
*g = Clamp((int32_t)(y1 - (u * ug + v * vg) + bg) >> 6);
|
||||
*r = Clamp((int32_t)(y1 - (v * vr) + br) >> 6);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Reads 8 bit YUV and leaves result as 16 bit.
|
||||
static __inline void YuvPixel8_16(uint8_t y,
|
||||
@ -1627,9 +1689,9 @@ static __inline void YuvPixel8_16(uint8_t y,
|
||||
#endif
|
||||
|
||||
uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16;
|
||||
*b = (int)(-(u * ub) + y1 + bb);
|
||||
*g = (int)(-(u * ug + v * vg) + y1 + bg);
|
||||
*r = (int)(-(v * vr) + y1 + br);
|
||||
*b = (int)(y1 - (u * ub) + bb);
|
||||
*g = (int)(y1 - (u * ug + v * vg) + bg);
|
||||
*r = (int)(y1 - (v * vr) + br);
|
||||
}
|
||||
|
||||
// C reference code that mimics the YUV 16 bit assembly.
|
||||
@ -1678,9 +1740,55 @@ static __inline void YuvPixel10_16(uint16_t y,
|
||||
*r = (int)(-(v * vr) + y1 + br);
|
||||
}
|
||||
|
||||
// C reference code that mimics the YUV 16 bit assembly.
|
||||
// Reads 12 bit YUV and leaves result as 16 bit.
|
||||
static __inline void YuvPixel12_16(int16_t y,
|
||||
int16_t u,
|
||||
int16_t v,
|
||||
int* b,
|
||||
int* g,
|
||||
int* r,
|
||||
const struct YuvConstants* yuvconstants) {
|
||||
#if defined(__aarch64__)
|
||||
int ub = -yuvconstants->kUVToRB[0];
|
||||
int ug = yuvconstants->kUVToG[0];
|
||||
int vg = yuvconstants->kUVToG[1];
|
||||
int vr = -yuvconstants->kUVToRB[1];
|
||||
int bb = yuvconstants->kUVBiasBGR[0];
|
||||
int bg = yuvconstants->kUVBiasBGR[1];
|
||||
int br = yuvconstants->kUVBiasBGR[2];
|
||||
int yg = yuvconstants->kYToRgb[1];
|
||||
#elif defined(__arm__)
|
||||
int ub = -yuvconstants->kUVToRB[0];
|
||||
int ug = yuvconstants->kUVToG[0];
|
||||
int vg = yuvconstants->kUVToG[4];
|
||||
int vr = -yuvconstants->kUVToRB[4];
|
||||
int bb = yuvconstants->kUVBiasBGR[0];
|
||||
int bg = yuvconstants->kUVBiasBGR[1];
|
||||
int br = yuvconstants->kUVBiasBGR[2];
|
||||
int yg = yuvconstants->kYToRgb[1];
|
||||
#else
|
||||
int ub = yuvconstants->kUVToB[0];
|
||||
int ug = yuvconstants->kUVToG[0];
|
||||
int vg = yuvconstants->kUVToG[1];
|
||||
int vr = yuvconstants->kUVToR[1];
|
||||
int bb = yuvconstants->kUVBiasB[0];
|
||||
int bg = yuvconstants->kUVBiasG[0];
|
||||
int br = yuvconstants->kUVBiasR[0];
|
||||
int yg = yuvconstants->kYToRgb[0];
|
||||
#endif
|
||||
|
||||
uint32_t y1 = (uint32_t)((y << 4) * yg) >> 16;
|
||||
u = clamp255(u >> 4);
|
||||
v = clamp255(v >> 4);
|
||||
*b = (int)(-(u * ub) + y1 + bb);
|
||||
*g = (int)(-(u * ug + v * vg) + y1 + bg);
|
||||
*r = (int)(-(v * vr) + y1 + br);
|
||||
}
|
||||
|
||||
// C reference code that mimics the YUV 10 bit assembly.
|
||||
// Reads 10 bit YUV and clamps down to 8 bit RGB.
|
||||
static __inline void YuvPixel10_8(uint16_t y,
|
||||
static __inline void YuvPixel10(uint16_t y,
|
||||
uint16_t u,
|
||||
uint16_t v,
|
||||
uint8_t* b,
|
||||
@ -1696,6 +1804,24 @@ static __inline void YuvPixel10_8(uint16_t y,
|
||||
*r = Clamp(r16 >> 6);
|
||||
}
|
||||
|
||||
// C reference code that mimics the YUV 12 bit assembly.
|
||||
// Reads 12 bit YUV and clamps down to 8 bit RGB.
|
||||
static __inline void YuvPixel12(uint16_t y,
|
||||
uint16_t u,
|
||||
uint16_t v,
|
||||
uint8_t* b,
|
||||
uint8_t* g,
|
||||
uint8_t* r,
|
||||
const struct YuvConstants* yuvconstants) {
|
||||
int b16;
|
||||
int g16;
|
||||
int r16;
|
||||
YuvPixel12_16(y, u, v, &b16, &g16, &r16, yuvconstants);
|
||||
*b = Clamp(b16 >> 6);
|
||||
*g = Clamp(g16 >> 6);
|
||||
*r = Clamp(r16 >> 6);
|
||||
}
|
||||
|
||||
// C reference code that mimics the YUV 16 bit assembly.
|
||||
// Reads 16 bit YUV and leaves result as 8 bit.
|
||||
static __inline void YuvPixel16_8(uint16_t y,
|
||||
@ -1783,9 +1909,9 @@ static __inline void YuvPixel16_16(uint16_t y,
|
||||
uint32_t y1 = (uint32_t)(y * yg) >> 16;
|
||||
u = clamp255(u >> 8);
|
||||
v = clamp255(v >> 8);
|
||||
*b = (int)(-(u * ub) + y1 + bb);
|
||||
*g = (int)(-(u * ug + v * vg) + y1 + bg);
|
||||
*r = (int)(-(v * vr) + y1 + br);
|
||||
*b = (int)(y1 + -(u * ub) + bb);
|
||||
*g = (int)(y1 + -(u * ug + v * vg) + bg);
|
||||
*r = (int)(y1 + -(v * vr) + br);
|
||||
}
|
||||
|
||||
// C reference code that mimics the YUV assembly.
|
||||
@ -1822,10 +1948,10 @@ void I444ToARGBRow_C(const uint8_t* src_y,
|
||||
for (x = 0; x < width - 1; x += 2) {
|
||||
uint8_t u = (src_u[0] + src_u[1] + 1) >> 1;
|
||||
uint8_t v = (src_v[0] + src_v[1] + 1) >> 1;
|
||||
YuvPixel8_8(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2,
|
||||
YuvPixel(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2,
|
||||
yuvconstants);
|
||||
rgb_buf[3] = 255;
|
||||
YuvPixel8_8(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6,
|
||||
YuvPixel(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6,
|
||||
yuvconstants);
|
||||
rgb_buf[7] = 255;
|
||||
src_y += 2;
|
||||
@ -1834,7 +1960,7 @@ void I444ToARGBRow_C(const uint8_t* src_y,
|
||||
rgb_buf += 8; // Advance 2 pixels.
|
||||
}
|
||||
if (width & 1) {
|
||||
YuvPixel8_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
|
||||
YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
|
||||
rgb_buf + 2, yuvconstants);
|
||||
rgb_buf[3] = 255;
|
||||
}
|
||||
@ -1848,7 +1974,7 @@ void I444ToARGBRow_C(const uint8_t* src_y,
|
||||
int width) {
|
||||
int x;
|
||||
for (x = 0; x < width; ++x) {
|
||||
YuvPixel8_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
|
||||
YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
|
||||
rgb_buf + 2, yuvconstants);
|
||||
rgb_buf[3] = 255;
|
||||
src_y += 1;
|
||||
@ -1868,10 +1994,10 @@ void I422ToARGBRow_C(const uint8_t* src_y,
|
||||
int width) {
|
||||
int x;
|
||||
for (x = 0; x < width - 1; x += 2) {
|
||||
YuvPixel8_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
|
||||
YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
|
||||
rgb_buf + 2, yuvconstants);
|
||||
rgb_buf[3] = 255;
|
||||
YuvPixel8_8(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
|
||||
YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
|
||||
rgb_buf + 6, yuvconstants);
|
||||
rgb_buf[7] = 255;
|
||||
src_y += 2;
|
||||
@ -1880,7 +2006,7 @@ void I422ToARGBRow_C(const uint8_t* src_y,
|
||||
rgb_buf += 8; // Advance 2 pixels.
|
||||
}
|
||||
if (width & 1) {
|
||||
YuvPixel8_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
|
||||
YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
|
||||
rgb_buf + 2, yuvconstants);
|
||||
rgb_buf[3] = 255;
|
||||
}
|
||||
@ -1895,10 +2021,10 @@ void I210ToARGBRow_C(const uint16_t* src_y,
|
||||
int width) {
|
||||
int x;
|
||||
for (x = 0; x < width - 1; x += 2) {
|
||||
YuvPixel10_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
|
||||
YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
|
||||
rgb_buf + 2, yuvconstants);
|
||||
rgb_buf[3] = 255;
|
||||
YuvPixel10_8(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
|
||||
YuvPixel10(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
|
||||
rgb_buf + 6, yuvconstants);
|
||||
rgb_buf[7] = 255;
|
||||
src_y += 2;
|
||||
@ -1907,7 +2033,7 @@ void I210ToARGBRow_C(const uint16_t* src_y,
|
||||
rgb_buf += 8; // Advance 2 pixels.
|
||||
}
|
||||
if (width & 1) {
|
||||
YuvPixel10_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
|
||||
YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
|
||||
rgb_buf + 2, yuvconstants);
|
||||
rgb_buf[3] = 255;
|
||||
}
|
||||
@ -1921,7 +2047,7 @@ void I410ToARGBRow_C(const uint16_t* src_y,
|
||||
int width) {
|
||||
int x;
|
||||
for (x = 0; x < width; ++x) {
|
||||
YuvPixel10_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
|
||||
YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
|
||||
rgb_buf + 2, yuvconstants);
|
||||
rgb_buf[3] = 255;
|
||||
src_y += 1;
|
||||
@ -1940,10 +2066,10 @@ void I210AlphaToARGBRow_C(const uint16_t* src_y,
|
||||
int width) {
|
||||
int x;
|
||||
for (x = 0; x < width - 1; x += 2) {
|
||||
YuvPixel10_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
|
||||
YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
|
||||
rgb_buf + 2, yuvconstants);
|
||||
rgb_buf[3] = clamp255(src_a[0] >> 2);
|
||||
YuvPixel10_8(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
|
||||
YuvPixel10(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
|
||||
rgb_buf + 6, yuvconstants);
|
||||
rgb_buf[7] = clamp255(src_a[1] >> 2);
|
||||
src_y += 2;
|
||||
@ -1953,7 +2079,7 @@ void I210AlphaToARGBRow_C(const uint16_t* src_y,
|
||||
rgb_buf += 8; // Advance 2 pixels.
|
||||
}
|
||||
if (width & 1) {
|
||||
YuvPixel10_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
|
||||
YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
|
||||
rgb_buf + 2, yuvconstants);
|
||||
rgb_buf[3] = clamp255(src_a[0] >> 2);
|
||||
}
|
||||
@ -1968,7 +2094,7 @@ void I410AlphaToARGBRow_C(const uint16_t* src_y,
|
||||
int width) {
|
||||
int x;
|
||||
for (x = 0; x < width; ++x) {
|
||||
YuvPixel10_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
|
||||
YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
|
||||
rgb_buf + 2, yuvconstants);
|
||||
rgb_buf[3] = clamp255(src_a[0] >> 2);
|
||||
src_y += 1;
|
||||
@ -1979,6 +2105,33 @@ void I410AlphaToARGBRow_C(const uint16_t* src_y,
|
||||
}
|
||||
}
|
||||
|
||||
// 12 bit YUV to ARGB
|
||||
void I212ToARGBRow_C(const uint16_t* src_y,
|
||||
const uint16_t* src_u,
|
||||
const uint16_t* src_v,
|
||||
uint8_t* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
int x;
|
||||
for (x = 0; x < width - 1; x += 2) {
|
||||
YuvPixel12(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
|
||||
rgb_buf + 2, yuvconstants);
|
||||
rgb_buf[3] = 255;
|
||||
YuvPixel12(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
|
||||
rgb_buf + 6, yuvconstants);
|
||||
rgb_buf[7] = 255;
|
||||
src_y += 2;
|
||||
src_u += 1;
|
||||
src_v += 1;
|
||||
rgb_buf += 8; // Advance 2 pixels.
|
||||
}
|
||||
if (width & 1) {
|
||||
YuvPixel12(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
|
||||
rgb_buf + 2, yuvconstants);
|
||||
rgb_buf[3] = 255;
|
||||
}
|
||||
}
|
||||
|
||||
static void StoreAR30(uint8_t* rgb_buf, int b, int g, int r) {
|
||||
uint32_t ar30;
|
||||
b = b >> 4; // convert 8 bit 10.6 to 10 bit.
|
||||
@ -2018,6 +2171,33 @@ void I210ToAR30Row_C(const uint16_t* src_y,
|
||||
}
|
||||
}
|
||||
|
||||
// 12 bit YUV to 10 bit AR30
|
||||
void I212ToAR30Row_C(const uint16_t* src_y,
|
||||
const uint16_t* src_u,
|
||||
const uint16_t* src_v,
|
||||
uint8_t* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
int x;
|
||||
int b;
|
||||
int g;
|
||||
int r;
|
||||
for (x = 0; x < width - 1; x += 2) {
|
||||
YuvPixel12_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
|
||||
StoreAR30(rgb_buf, b, g, r);
|
||||
YuvPixel12_16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
|
||||
StoreAR30(rgb_buf + 4, b, g, r);
|
||||
src_y += 2;
|
||||
src_u += 1;
|
||||
src_v += 1;
|
||||
rgb_buf += 8; // Advance 2 pixels.
|
||||
}
|
||||
if (width & 1) {
|
||||
YuvPixel12_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
|
||||
StoreAR30(rgb_buf, b, g, r);
|
||||
}
|
||||
}
|
||||
|
||||
void I410ToAR30Row_C(const uint16_t* src_y,
|
||||
const uint16_t* src_u,
|
||||
const uint16_t* src_v,
|
||||
@ -2038,6 +2218,7 @@ void I410ToAR30Row_C(const uint16_t* src_y,
|
||||
}
|
||||
}
|
||||
|
||||
// P210 has 10 bits in msb of 16 bit NV12 style layout.
|
||||
void P210ToARGBRow_C(const uint16_t* src_y,
|
||||
const uint16_t* src_uv,
|
||||
uint8_t* rgb_buf,
|
||||
@ -2163,10 +2344,10 @@ void I444AlphaToARGBRow_C(const uint8_t* src_y,
|
||||
for (x = 0; x < width - 1; x += 2) {
|
||||
uint8_t u = (src_u[0] + src_u[1] + 1) >> 1;
|
||||
uint8_t v = (src_v[0] + src_v[1] + 1) >> 1;
|
||||
YuvPixel8_8(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2,
|
||||
YuvPixel(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2,
|
||||
yuvconstants);
|
||||
rgb_buf[3] = src_a[0];
|
||||
YuvPixel8_8(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6,
|
||||
YuvPixel(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6,
|
||||
yuvconstants);
|
||||
rgb_buf[7] = src_a[1];
|
||||
src_y += 2;
|
||||
@ -2176,7 +2357,7 @@ void I444AlphaToARGBRow_C(const uint8_t* src_y,
|
||||
rgb_buf += 8; // Advance 2 pixels.
|
||||
}
|
||||
if (width & 1) {
|
||||
YuvPixel8_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
|
||||
YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
|
||||
rgb_buf + 2, yuvconstants);
|
||||
rgb_buf[3] = src_a[0];
|
||||
}
|
||||
@ -2191,7 +2372,7 @@ void I444AlphaToARGBRow_C(const uint8_t* src_y,
|
||||
int width) {
|
||||
int x;
|
||||
for (x = 0; x < width; ++x) {
|
||||
YuvPixel8_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
|
||||
YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
|
||||
rgb_buf + 2, yuvconstants);
|
||||
rgb_buf[3] = src_a[0];
|
||||
src_y += 1;
|
||||
@ -2212,10 +2393,10 @@ void I422AlphaToARGBRow_C(const uint8_t* src_y,
|
||||
int width) {
|
||||
int x;
|
||||
for (x = 0; x < width - 1; x += 2) {
|
||||
YuvPixel8_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
|
||||
YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
|
||||
rgb_buf + 2, yuvconstants);
|
||||
rgb_buf[3] = src_a[0];
|
||||
YuvPixel8_8(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
|
||||
YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
|
||||
rgb_buf + 6, yuvconstants);
|
||||
rgb_buf[7] = src_a[1];
|
||||
src_y += 2;
|
||||
@ -2225,7 +2406,7 @@ void I422AlphaToARGBRow_C(const uint8_t* src_y,
|
||||
rgb_buf += 8; // Advance 2 pixels.
|
||||
}
|
||||
if (width & 1) {
|
||||
YuvPixel8_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
|
||||
YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
|
||||
rgb_buf + 2, yuvconstants);
|
||||
rgb_buf[3] = src_a[0];
|
||||
}
|
||||
@ -2239,9 +2420,9 @@ void I422ToRGB24Row_C(const uint8_t* src_y,
|
||||
int width) {
|
||||
int x;
|
||||
for (x = 0; x < width - 1; x += 2) {
|
||||
YuvPixel8_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
|
||||
YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
|
||||
rgb_buf + 2, yuvconstants);
|
||||
YuvPixel8_8(src_y[1], src_u[0], src_v[0], rgb_buf + 3, rgb_buf + 4,
|
||||
YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 3, rgb_buf + 4,
|
||||
rgb_buf + 5, yuvconstants);
|
||||
src_y += 2;
|
||||
src_u += 1;
|
||||
@ -2249,7 +2430,7 @@ void I422ToRGB24Row_C(const uint8_t* src_y,
|
||||
rgb_buf += 6; // Advance 2 pixels.
|
||||
}
|
||||
if (width & 1) {
|
||||
YuvPixel8_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
|
||||
YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
|
||||
rgb_buf + 2, yuvconstants);
|
||||
}
|
||||
}
|
||||
@ -2268,8 +2449,8 @@ void I422ToARGB4444Row_C(const uint8_t* src_y,
|
||||
uint8_t r1;
|
||||
int x;
|
||||
for (x = 0; x < width - 1; x += 2) {
|
||||
YuvPixel8_8(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
|
||||
YuvPixel8_8(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
|
||||
YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
|
||||
YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
|
||||
b0 = b0 >> 4;
|
||||
g0 = g0 >> 4;
|
||||
r0 = r0 >> 4;
|
||||
@ -2284,7 +2465,7 @@ void I422ToARGB4444Row_C(const uint8_t* src_y,
|
||||
dst_argb4444 += 4; // Advance 2 pixels.
|
||||
}
|
||||
if (width & 1) {
|
||||
YuvPixel8_8(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
|
||||
YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
|
||||
b0 = b0 >> 4;
|
||||
g0 = g0 >> 4;
|
||||
r0 = r0 >> 4;
|
||||
@ -2306,8 +2487,8 @@ void I422ToARGB1555Row_C(const uint8_t* src_y,
|
||||
uint8_t r1;
|
||||
int x;
|
||||
for (x = 0; x < width - 1; x += 2) {
|
||||
YuvPixel8_8(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
|
||||
YuvPixel8_8(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
|
||||
YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
|
||||
YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
|
||||
b0 = b0 >> 3;
|
||||
g0 = g0 >> 3;
|
||||
r0 = r0 >> 3;
|
||||
@ -2322,7 +2503,7 @@ void I422ToARGB1555Row_C(const uint8_t* src_y,
|
||||
dst_argb1555 += 4; // Advance 2 pixels.
|
||||
}
|
||||
if (width & 1) {
|
||||
YuvPixel8_8(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
|
||||
YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
|
||||
b0 = b0 >> 3;
|
||||
g0 = g0 >> 3;
|
||||
r0 = r0 >> 3;
|
||||
@ -2344,8 +2525,8 @@ void I422ToRGB565Row_C(const uint8_t* src_y,
|
||||
uint8_t r1;
|
||||
int x;
|
||||
for (x = 0; x < width - 1; x += 2) {
|
||||
YuvPixel8_8(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
|
||||
YuvPixel8_8(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
|
||||
YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
|
||||
YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
|
||||
b0 = b0 >> 3;
|
||||
g0 = g0 >> 2;
|
||||
r0 = r0 >> 3;
|
||||
@ -2360,7 +2541,7 @@ void I422ToRGB565Row_C(const uint8_t* src_y,
|
||||
dst_rgb565 += 4; // Advance 2 pixels.
|
||||
}
|
||||
if (width & 1) {
|
||||
YuvPixel8_8(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
|
||||
YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
|
||||
b0 = b0 >> 3;
|
||||
g0 = g0 >> 2;
|
||||
r0 = r0 >> 3;
|
||||
@ -2375,10 +2556,10 @@ void NV12ToARGBRow_C(const uint8_t* src_y,
|
||||
int width) {
|
||||
int x;
|
||||
for (x = 0; x < width - 1; x += 2) {
|
||||
YuvPixel8_8(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
|
||||
YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
|
||||
rgb_buf + 2, yuvconstants);
|
||||
rgb_buf[3] = 255;
|
||||
YuvPixel8_8(src_y[1], src_uv[0], src_uv[1], rgb_buf + 4, rgb_buf + 5,
|
||||
YuvPixel(src_y[1], src_uv[0], src_uv[1], rgb_buf + 4, rgb_buf + 5,
|
||||
rgb_buf + 6, yuvconstants);
|
||||
rgb_buf[7] = 255;
|
||||
src_y += 2;
|
||||
@ -2386,7 +2567,7 @@ void NV12ToARGBRow_C(const uint8_t* src_y,
|
||||
rgb_buf += 8; // Advance 2 pixels.
|
||||
}
|
||||
if (width & 1) {
|
||||
YuvPixel8_8(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
|
||||
YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
|
||||
rgb_buf + 2, yuvconstants);
|
||||
rgb_buf[3] = 255;
|
||||
}
|
||||
@ -2399,10 +2580,10 @@ void NV21ToARGBRow_C(const uint8_t* src_y,
|
||||
int width) {
|
||||
int x;
|
||||
for (x = 0; x < width - 1; x += 2) {
|
||||
YuvPixel8_8(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
|
||||
YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
|
||||
rgb_buf + 2, yuvconstants);
|
||||
rgb_buf[3] = 255;
|
||||
YuvPixel8_8(src_y[1], src_vu[1], src_vu[0], rgb_buf + 4, rgb_buf + 5,
|
||||
YuvPixel(src_y[1], src_vu[1], src_vu[0], rgb_buf + 4, rgb_buf + 5,
|
||||
rgb_buf + 6, yuvconstants);
|
||||
rgb_buf[7] = 255;
|
||||
src_y += 2;
|
||||
@ -2410,7 +2591,7 @@ void NV21ToARGBRow_C(const uint8_t* src_y,
|
||||
rgb_buf += 8; // Advance 2 pixels.
|
||||
}
|
||||
if (width & 1) {
|
||||
YuvPixel8_8(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
|
||||
YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
|
||||
rgb_buf + 2, yuvconstants);
|
||||
rgb_buf[3] = 255;
|
||||
}
|
||||
@ -2423,16 +2604,16 @@ void NV12ToRGB24Row_C(const uint8_t* src_y,
|
||||
int width) {
|
||||
int x;
|
||||
for (x = 0; x < width - 1; x += 2) {
|
||||
YuvPixel8_8(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
|
||||
YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
|
||||
rgb_buf + 2, yuvconstants);
|
||||
YuvPixel8_8(src_y[1], src_uv[0], src_uv[1], rgb_buf + 3, rgb_buf + 4,
|
||||
YuvPixel(src_y[1], src_uv[0], src_uv[1], rgb_buf + 3, rgb_buf + 4,
|
||||
rgb_buf + 5, yuvconstants);
|
||||
src_y += 2;
|
||||
src_uv += 2;
|
||||
rgb_buf += 6; // Advance 2 pixels.
|
||||
}
|
||||
if (width & 1) {
|
||||
YuvPixel8_8(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
|
||||
YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
|
||||
rgb_buf + 2, yuvconstants);
|
||||
}
|
||||
}
|
||||
@ -2444,16 +2625,16 @@ void NV21ToRGB24Row_C(const uint8_t* src_y,
|
||||
int width) {
|
||||
int x;
|
||||
for (x = 0; x < width - 1; x += 2) {
|
||||
YuvPixel8_8(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
|
||||
YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
|
||||
rgb_buf + 2, yuvconstants);
|
||||
YuvPixel8_8(src_y[1], src_vu[1], src_vu[0], rgb_buf + 3, rgb_buf + 4,
|
||||
YuvPixel(src_y[1], src_vu[1], src_vu[0], rgb_buf + 3, rgb_buf + 4,
|
||||
rgb_buf + 5, yuvconstants);
|
||||
src_y += 2;
|
||||
src_vu += 2;
|
||||
rgb_buf += 6; // Advance 2 pixels.
|
||||
}
|
||||
if (width & 1) {
|
||||
YuvPixel8_8(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
|
||||
YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
|
||||
rgb_buf + 2, yuvconstants);
|
||||
}
|
||||
}
|
||||
@ -2471,8 +2652,8 @@ void NV12ToRGB565Row_C(const uint8_t* src_y,
|
||||
uint8_t r1;
|
||||
int x;
|
||||
for (x = 0; x < width - 1; x += 2) {
|
||||
YuvPixel8_8(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants);
|
||||
YuvPixel8_8(src_y[1], src_uv[0], src_uv[1], &b1, &g1, &r1, yuvconstants);
|
||||
YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants);
|
||||
YuvPixel(src_y[1], src_uv[0], src_uv[1], &b1, &g1, &r1, yuvconstants);
|
||||
b0 = b0 >> 3;
|
||||
g0 = g0 >> 2;
|
||||
r0 = r0 >> 3;
|
||||
@ -2486,7 +2667,7 @@ void NV12ToRGB565Row_C(const uint8_t* src_y,
|
||||
dst_rgb565 += 4; // Advance 2 pixels.
|
||||
}
|
||||
if (width & 1) {
|
||||
YuvPixel8_8(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants);
|
||||
YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants);
|
||||
b0 = b0 >> 3;
|
||||
g0 = g0 >> 2;
|
||||
r0 = r0 >> 3;
|
||||
@ -2500,17 +2681,17 @@ void YUY2ToARGBRow_C(const uint8_t* src_yuy2,
|
||||
int width) {
|
||||
int x;
|
||||
for (x = 0; x < width - 1; x += 2) {
|
||||
YuvPixel8_8(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, rgb_buf + 1,
|
||||
YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, rgb_buf + 1,
|
||||
rgb_buf + 2, yuvconstants);
|
||||
rgb_buf[3] = 255;
|
||||
YuvPixel8_8(src_yuy2[2], src_yuy2[1], src_yuy2[3], rgb_buf + 4, rgb_buf + 5,
|
||||
YuvPixel(src_yuy2[2], src_yuy2[1], src_yuy2[3], rgb_buf + 4, rgb_buf + 5,
|
||||
rgb_buf + 6, yuvconstants);
|
||||
rgb_buf[7] = 255;
|
||||
src_yuy2 += 4;
|
||||
rgb_buf += 8; // Advance 2 pixels.
|
||||
}
|
||||
if (width & 1) {
|
||||
YuvPixel8_8(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, rgb_buf + 1,
|
||||
YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, rgb_buf + 1,
|
||||
rgb_buf + 2, yuvconstants);
|
||||
rgb_buf[3] = 255;
|
||||
}
|
||||
@ -2522,17 +2703,17 @@ void UYVYToARGBRow_C(const uint8_t* src_uyvy,
|
||||
int width) {
|
||||
int x;
|
||||
for (x = 0; x < width - 1; x += 2) {
|
||||
YuvPixel8_8(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, rgb_buf + 1,
|
||||
YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, rgb_buf + 1,
|
||||
rgb_buf + 2, yuvconstants);
|
||||
rgb_buf[3] = 255;
|
||||
YuvPixel8_8(src_uyvy[3], src_uyvy[0], src_uyvy[2], rgb_buf + 4, rgb_buf + 5,
|
||||
YuvPixel(src_uyvy[3], src_uyvy[0], src_uyvy[2], rgb_buf + 4, rgb_buf + 5,
|
||||
rgb_buf + 6, yuvconstants);
|
||||
rgb_buf[7] = 255;
|
||||
src_uyvy += 4;
|
||||
rgb_buf += 8; // Advance 2 pixels.
|
||||
}
|
||||
if (width & 1) {
|
||||
YuvPixel8_8(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, rgb_buf + 1,
|
||||
YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, rgb_buf + 1,
|
||||
rgb_buf + 2, yuvconstants);
|
||||
rgb_buf[3] = 255;
|
||||
}
|
||||
@ -2546,10 +2727,10 @@ void I422ToRGBARow_C(const uint8_t* src_y,
|
||||
int width) {
|
||||
int x;
|
||||
for (x = 0; x < width - 1; x += 2) {
|
||||
YuvPixel8_8(src_y[0], src_u[0], src_v[0], rgb_buf + 1, rgb_buf + 2,
|
||||
YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 1, rgb_buf + 2,
|
||||
rgb_buf + 3, yuvconstants);
|
||||
rgb_buf[0] = 255;
|
||||
YuvPixel8_8(src_y[1], src_u[0], src_v[0], rgb_buf + 5, rgb_buf + 6,
|
||||
YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 5, rgb_buf + 6,
|
||||
rgb_buf + 7, yuvconstants);
|
||||
rgb_buf[4] = 255;
|
||||
src_y += 2;
|
||||
@ -2558,7 +2739,7 @@ void I422ToRGBARow_C(const uint8_t* src_y,
|
||||
rgb_buf += 8; // Advance 2 pixels.
|
||||
}
|
||||
if (width & 1) {
|
||||
YuvPixel8_8(src_y[0], src_u[0], src_v[0], rgb_buf + 1, rgb_buf + 2,
|
||||
YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 1, rgb_buf + 2,
|
||||
rgb_buf + 3, yuvconstants);
|
||||
rgb_buf[0] = 255;
|
||||
}
|
||||
|
||||
@ -2001,6 +2001,19 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba0,
|
||||
"packuswb %%xmm5,%%xmm5 \n" \
|
||||
"lea 0x10(%[a_buf]),%[a_buf] \n"
|
||||
|
||||
// Read 4 UV from 422 12 bit, upsample to 8 UV
|
||||
#define READYUV212 \
|
||||
"movq (%[u_buf]),%%xmm0 \n" \
|
||||
"movq 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \
|
||||
"lea 0x8(%[u_buf]),%[u_buf] \n" \
|
||||
"punpcklwd %%xmm1,%%xmm0 \n" \
|
||||
"psraw $0x4,%%xmm0 \n" \
|
||||
"packuswb %%xmm0,%%xmm0 \n" \
|
||||
"punpcklwd %%xmm0,%%xmm0 \n" \
|
||||
"movdqu (%[y_buf]),%%xmm4 \n" \
|
||||
"psllw $0x4,%%xmm4 \n" \
|
||||
"lea 0x10(%[y_buf]),%[y_buf] \n"
|
||||
|
||||
// Read 4 UV from 422, upsample to 8 UV. With 8 Alpha.
|
||||
#define READYUVA422 \
|
||||
"movd (%[u_buf]),%%xmm0 \n" \
|
||||
@ -2398,6 +2411,36 @@ void OMITFP I210ToARGBRow_SSSE3(const uint16_t* y_buf,
|
||||
);
|
||||
}
|
||||
|
||||
// 12 bit YUV to ARGB
|
||||
void OMITFP I212ToARGBRow_SSSE3(const uint16_t* y_buf,
|
||||
const uint16_t* u_buf,
|
||||
const uint16_t* v_buf,
|
||||
uint8_t* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
YUVTORGB_SETUP(yuvconstants)
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
READYUV212
|
||||
YUVTORGB(yuvconstants)
|
||||
STOREARGB
|
||||
"sub $0x8,%[width] \n"
|
||||
"jg 1b \n"
|
||||
: [y_buf]"+r"(y_buf), // %[y_buf]
|
||||
[u_buf]"+r"(u_buf), // %[u_buf]
|
||||
[v_buf]"+r"(v_buf), // %[v_buf]
|
||||
[dst_argb]"+r"(dst_argb), // %[dst_argb]
|
||||
[width]"+rm"(width) // %[width]
|
||||
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
|
||||
: "memory", "cc", YUVTORGB_REGS
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
|
||||
);
|
||||
}
|
||||
|
||||
// 10 bit YUV to AR30
|
||||
void OMITFP I210ToAR30Row_SSSE3(const uint16_t* y_buf,
|
||||
const uint16_t* u_buf,
|
||||
@ -2433,6 +2476,41 @@ void OMITFP I210ToAR30Row_SSSE3(const uint16_t* y_buf,
|
||||
);
|
||||
}
|
||||
|
||||
// 12 bit YUV to AR30
|
||||
void OMITFP I212ToAR30Row_SSSE3(const uint16_t* y_buf,
|
||||
const uint16_t* u_buf,
|
||||
const uint16_t* v_buf,
|
||||
uint8_t* dst_ar30,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
YUVTORGB_SETUP(yuvconstants)
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||
"psrlw $14,%%xmm5 \n"
|
||||
"psllw $4,%%xmm5 \n" // 2 alpha bits
|
||||
"pxor %%xmm6,%%xmm6 \n"
|
||||
"pcmpeqb %%xmm7,%%xmm7 \n" // 0 for min
|
||||
"psrlw $6,%%xmm7 \n" // 1023 for max
|
||||
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
READYUV212
|
||||
YUVTORGB16(yuvconstants)
|
||||
STOREAR30
|
||||
"sub $0x8,%[width] \n"
|
||||
"jg 1b \n"
|
||||
: [y_buf]"+r"(y_buf), // %[y_buf]
|
||||
[u_buf]"+r"(u_buf), // %[u_buf]
|
||||
[v_buf]"+r"(v_buf), // %[v_buf]
|
||||
[dst_ar30]"+r"(dst_ar30), // %[dst_ar30]
|
||||
[width]"+rm"(width) // %[width]
|
||||
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
|
||||
: "memory", "cc", YUVTORGB_REGS
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
|
||||
);
|
||||
}
|
||||
|
||||
// 10 bit YUV to ARGB
|
||||
void OMITFP I410ToARGBRow_SSSE3(const uint16_t* y_buf,
|
||||
const uint16_t* u_buf,
|
||||
@ -2474,14 +2552,11 @@ void OMITFP I210AlphaToARGBRow_SSSE3(const uint16_t* y_buf,
|
||||
int width) {
|
||||
asm volatile(
|
||||
|
||||
YUVTORGB_SETUP(yuvconstants)
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
YUVTORGB_SETUP(
|
||||
yuvconstants) "sub %[u_buf],%[v_buf] \n"
|
||||
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
READYUVA210
|
||||
YUVTORGB(yuvconstants)
|
||||
STOREARGB
|
||||
LABELALIGN "1: \n" READYUVA210
|
||||
YUVTORGB(yuvconstants) STOREARGB
|
||||
"subl $0x8,%[width] \n"
|
||||
"jg 1b \n"
|
||||
: [y_buf] "+r"(y_buf), // %[y_buf]
|
||||
@ -2495,8 +2570,8 @@ void OMITFP I210AlphaToARGBRow_SSSE3(const uint16_t* y_buf,
|
||||
[width] "+rm"(width) // %[width]
|
||||
#endif
|
||||
: [yuvconstants] "r"(yuvconstants) // %[yuvconstants]
|
||||
: "memory", "cc", YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
|
||||
);
|
||||
: "memory", "cc", YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4",
|
||||
"xmm5");
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -2511,14 +2586,11 @@ void OMITFP I410AlphaToARGBRow_SSSE3(const uint16_t* y_buf,
|
||||
int width) {
|
||||
asm volatile(
|
||||
|
||||
YUVTORGB_SETUP(yuvconstants)
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
YUVTORGB_SETUP(
|
||||
yuvconstants) "sub %[u_buf],%[v_buf] \n"
|
||||
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
READYUVA410
|
||||
YUVTORGB(yuvconstants)
|
||||
STOREARGB
|
||||
LABELALIGN "1: \n" READYUVA410
|
||||
YUVTORGB(yuvconstants) STOREARGB
|
||||
"subl $0x8,%[width] \n"
|
||||
"jg 1b \n"
|
||||
: [y_buf] "+r"(y_buf), // %[y_buf]
|
||||
@ -2532,8 +2604,8 @@ void OMITFP I410AlphaToARGBRow_SSSE3(const uint16_t* y_buf,
|
||||
[width] "+rm"(width) // %[width]
|
||||
#endif
|
||||
: [yuvconstants] "r"(yuvconstants) // %[yuvconstants]
|
||||
: "memory", "cc", YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
|
||||
);
|
||||
: "memory", "cc", YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4",
|
||||
"xmm5");
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -2731,14 +2803,11 @@ void OMITFP P210ToARGBRow_SSSE3(const uint16_t* y_buf,
|
||||
int width) {
|
||||
asm volatile(
|
||||
|
||||
YUVTORGB_SETUP(yuvconstants)
|
||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||
YUVTORGB_SETUP(
|
||||
yuvconstants) "pcmpeqb %%xmm5,%%xmm5 \n"
|
||||
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
READP210
|
||||
YUVTORGB(yuvconstants)
|
||||
STOREARGB
|
||||
LABELALIGN "1: \n" READP210
|
||||
YUVTORGB(yuvconstants) STOREARGB
|
||||
"sub $0x8,%[width] \n"
|
||||
"jg 1b \n"
|
||||
: [y_buf] "+r"(y_buf), // %[y_buf]
|
||||
@ -2746,9 +2815,8 @@ void OMITFP P210ToARGBRow_SSSE3(const uint16_t* y_buf,
|
||||
[dst_argb] "+r"(dst_argb), // %[dst_argb]
|
||||
[width] "+rm"(width) // %[width]
|
||||
: [yuvconstants] "r"(yuvconstants) // %[yuvconstants]
|
||||
: "memory", "cc", YUVTORGB_REGS
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
|
||||
);
|
||||
: "memory", "cc", YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4",
|
||||
"xmm5");
|
||||
}
|
||||
|
||||
void OMITFP P410ToARGBRow_SSSE3(const uint16_t* y_buf,
|
||||
@ -2758,14 +2826,11 @@ void OMITFP P410ToARGBRow_SSSE3(const uint16_t* y_buf,
|
||||
int width) {
|
||||
asm volatile(
|
||||
|
||||
YUVTORGB_SETUP(yuvconstants)
|
||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||
YUVTORGB_SETUP(
|
||||
yuvconstants) "pcmpeqb %%xmm5,%%xmm5 \n"
|
||||
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
READP410
|
||||
YUVTORGB(yuvconstants)
|
||||
STOREARGB
|
||||
LABELALIGN "1: \n" READP410
|
||||
YUVTORGB(yuvconstants) STOREARGB
|
||||
"sub $0x8,%[width] \n"
|
||||
"jg 1b \n"
|
||||
: [y_buf] "+r"(y_buf), // %[y_buf]
|
||||
@ -2948,6 +3013,21 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf,
|
||||
"vpsllw $6,%%ymm4,%%ymm4 \n" \
|
||||
"lea 0x20(%[y_buf]),%[y_buf] \n"
|
||||
|
||||
// Read 8 UV from 212 12 bit, upsample to 16 UV
|
||||
#define READYUV212_AVX2 \
|
||||
"vmovdqu (%[u_buf]),%%xmm0 \n" \
|
||||
"vmovdqu 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \
|
||||
"lea 0x10(%[u_buf]),%[u_buf] \n" \
|
||||
"vpermq $0xd8,%%ymm0,%%ymm0 \n" \
|
||||
"vpermq $0xd8,%%ymm1,%%ymm1 \n" \
|
||||
"vpunpcklwd %%ymm1,%%ymm0,%%ymm0 \n" \
|
||||
"vpsraw $0x4,%%ymm0,%%ymm0 \n" \
|
||||
"vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" \
|
||||
"vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \
|
||||
"vmovdqu (%[y_buf]),%%ymm4 \n" \
|
||||
"vpsllw $0x4,%%ymm4,%%ymm4 \n" \
|
||||
"lea 0x20(%[y_buf]),%[y_buf] \n"
|
||||
|
||||
// Read 16 UV from 410. With 16 Alpha.
|
||||
#define READYUVA410_AVX2 \
|
||||
"vmovdqu (%[u_buf]),%%ymm0 \n" \
|
||||
@ -3295,6 +3375,41 @@ void OMITFP I210ToARGBRow_AVX2(const uint16_t* y_buf,
|
||||
}
|
||||
#endif // HAS_I210TOARGBROW_AVX2
|
||||
|
||||
#if defined(HAS_I212TOARGBROW_AVX2)
|
||||
// 16 pixels
|
||||
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
|
||||
void OMITFP I212ToARGBRow_AVX2(const uint16_t* y_buf,
|
||||
const uint16_t* u_buf,
|
||||
const uint16_t* v_buf,
|
||||
uint8_t* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
YUVTORGB_SETUP_AVX2(yuvconstants)
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
|
||||
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
READYUV212_AVX2
|
||||
YUVTORGB_AVX2(yuvconstants)
|
||||
STOREARGB_AVX2
|
||||
"sub $0x10,%[width] \n"
|
||||
"jg 1b \n"
|
||||
|
||||
"vzeroupper \n"
|
||||
: [y_buf]"+r"(y_buf), // %[y_buf]
|
||||
[u_buf]"+r"(u_buf), // %[u_buf]
|
||||
[v_buf]"+r"(v_buf), // %[v_buf]
|
||||
[dst_argb]"+r"(dst_argb), // %[dst_argb]
|
||||
[width]"+rm"(width) // %[width]
|
||||
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
|
||||
: "memory", "cc", YUVTORGB_REGS_AVX2
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
|
||||
);
|
||||
}
|
||||
#endif // HAS_I212TOARGBROW_AVX2
|
||||
|
||||
#if defined(HAS_I210TOAR30ROW_AVX2)
|
||||
// 16 pixels
|
||||
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 AR30 (64 bytes).
|
||||
@ -3335,6 +3450,46 @@ void OMITFP I210ToAR30Row_AVX2(const uint16_t* y_buf,
|
||||
}
|
||||
#endif // HAS_I210TOAR30ROW_AVX2
|
||||
|
||||
#if defined(HAS_I212TOAR30ROW_AVX2)
|
||||
// 16 pixels
|
||||
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 AR30 (64 bytes).
|
||||
void OMITFP I212ToAR30Row_AVX2(const uint16_t* y_buf,
|
||||
const uint16_t* u_buf,
|
||||
const uint16_t* v_buf,
|
||||
uint8_t* dst_ar30,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
YUVTORGB_SETUP_AVX2(yuvconstants)
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // AR30 constants
|
||||
"vpsrlw $14,%%ymm5,%%ymm5 \n"
|
||||
"vpsllw $4,%%ymm5,%%ymm5 \n" // 2 alpha bits
|
||||
"vpxor %%ymm6,%%ymm6,%%ymm6 \n" // 0 for min
|
||||
"vpcmpeqb %%ymm7,%%ymm7,%%ymm7 \n" // 1023 for max
|
||||
"vpsrlw $6,%%ymm7,%%ymm7 \n"
|
||||
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
READYUV212_AVX2
|
||||
YUVTORGB16_AVX2(yuvconstants)
|
||||
STOREAR30_AVX2
|
||||
"sub $0x10,%[width] \n"
|
||||
"jg 1b \n"
|
||||
|
||||
"vzeroupper \n"
|
||||
: [y_buf]"+r"(y_buf), // %[y_buf]
|
||||
[u_buf]"+r"(u_buf), // %[u_buf]
|
||||
[v_buf]"+r"(v_buf), // %[v_buf]
|
||||
[dst_ar30]"+r"(dst_ar30), // %[dst_ar30]
|
||||
[width]"+rm"(width) // %[width]
|
||||
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
|
||||
: "memory", "cc", YUVTORGB_REGS_AVX2
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
|
||||
);
|
||||
}
|
||||
#endif // HAS_I212TOAR30ROW_AVX2
|
||||
|
||||
#if defined(HAS_I410TOARGBROW_AVX2)
|
||||
// 16 pixels
|
||||
// 16 UV values with 16 Y producing 16 ARGB (64 bytes).
|
||||
@ -3383,14 +3538,11 @@ void OMITFP I210AlphaToARGBRow_AVX2(const uint16_t* y_buf,
|
||||
int width) {
|
||||
asm volatile(
|
||||
|
||||
YUVTORGB_SETUP_AVX2(yuvconstants)
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
YUVTORGB_SETUP_AVX2(
|
||||
yuvconstants) "sub %[u_buf],%[v_buf] \n"
|
||||
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
READYUVA210_AVX2
|
||||
YUVTORGB_AVX2(yuvconstants)
|
||||
STOREARGB_AVX2
|
||||
LABELALIGN "1: \n" READYUVA210_AVX2
|
||||
YUVTORGB_AVX2(yuvconstants) STOREARGB_AVX2
|
||||
"subl $0x10,%[width] \n"
|
||||
"jg 1b \n"
|
||||
"vzeroupper \n"
|
||||
@ -3407,8 +3559,7 @@ void OMITFP I210AlphaToARGBRow_AVX2(const uint16_t* y_buf,
|
||||
#endif
|
||||
: [yuvconstants] "r"(yuvconstants) // %[yuvconstants]
|
||||
: "memory", "cc", YUVTORGB_REGS_AVX2 "xmm0", "xmm1", "xmm2", "xmm3",
|
||||
"xmm4", "xmm5"
|
||||
);
|
||||
"xmm4", "xmm5");
|
||||
}
|
||||
#endif // HAS_I210TOARGBROW_AVX2
|
||||
|
||||
@ -3424,14 +3575,11 @@ void OMITFP I410AlphaToARGBRow_AVX2(const uint16_t* y_buf,
|
||||
int width) {
|
||||
asm volatile(
|
||||
|
||||
YUVTORGB_SETUP_AVX2(yuvconstants)
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
YUVTORGB_SETUP_AVX2(
|
||||
yuvconstants) "sub %[u_buf],%[v_buf] \n"
|
||||
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
READYUVA410_AVX2
|
||||
YUVTORGB_AVX2(yuvconstants)
|
||||
STOREARGB_AVX2
|
||||
LABELALIGN "1: \n" READYUVA410_AVX2
|
||||
YUVTORGB_AVX2(yuvconstants) STOREARGB_AVX2
|
||||
"subl $0x10,%[width] \n"
|
||||
"jg 1b \n"
|
||||
"vzeroupper \n"
|
||||
@ -3448,8 +3596,7 @@ void OMITFP I410AlphaToARGBRow_AVX2(const uint16_t* y_buf,
|
||||
#endif
|
||||
: [yuvconstants] "r"(yuvconstants) // %[yuvconstants]
|
||||
: "memory", "cc", YUVTORGB_REGS_AVX2 "xmm0", "xmm1", "xmm2", "xmm3",
|
||||
"xmm4", "xmm5"
|
||||
);
|
||||
"xmm4", "xmm5");
|
||||
}
|
||||
#endif // HAS_I410TOARGBROW_AVX2
|
||||
|
||||
|
||||
@ -3167,9 +3167,40 @@ TEST_F(LibYUVConvertTest, ABGRToAR30Row_Opt) {
|
||||
}
|
||||
#endif // HAS_ABGRTOAR30ROW_AVX2
|
||||
|
||||
// Provide matrix wrappers for 12 bit YUV
|
||||
#define I012ToARGB(a, b, c, d, e, f, g, h, i, j) \
|
||||
I012ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j)
|
||||
#define I012ToAR30(a, b, c, d, e, f, g, h, i, j) \
|
||||
I012ToAR30Matrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j)
|
||||
|
||||
#define I410ToARGB(a, b, c, d, e, f, g, h, i, j) \
|
||||
I410ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j)
|
||||
#define I410ToABGR(a, b, c, d, e, f, g, h, i, j) \
|
||||
I410ToABGRMatrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j)
|
||||
#define H410ToARGB(a, b, c, d, e, f, g, h, i, j) \
|
||||
I410ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvH709Constants, i, j)
|
||||
#define H410ToABGR(a, b, c, d, e, f, g, h, i, j) \
|
||||
I410ToABGRMatrix(a, b, c, d, e, f, g, h, &kYuvH709Constants, i, j)
|
||||
#define U410ToARGB(a, b, c, d, e, f, g, h, i, j) \
|
||||
I410ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuv2020Constants, i, j)
|
||||
#define U410ToABGR(a, b, c, d, e, f, g, h, i, j) \
|
||||
I410ToABGRMatrix(a, b, c, d, e, f, g, h, &kYuv2020Constants, i, j)
|
||||
#define I410ToAR30(a, b, c, d, e, f, g, h, i, j) \
|
||||
I410ToAR30Matrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j)
|
||||
#define I410ToAB30(a, b, c, d, e, f, g, h, i, j) \
|
||||
I410ToAB30Matrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j)
|
||||
#define H410ToAR30(a, b, c, d, e, f, g, h, i, j) \
|
||||
I410ToAR30Matrix(a, b, c, d, e, f, g, h, &kYuvH709Constants, i, j)
|
||||
#define H410ToAB30(a, b, c, d, e, f, g, h, i, j) \
|
||||
I410ToAB30Matrix(a, b, c, d, e, f, g, h, &kYuvH709Constants, i, j)
|
||||
#define U410ToAR30(a, b, c, d, e, f, g, h, i, j) \
|
||||
I410ToAR30Matrix(a, b, c, d, e, f, g, h, &kYuv2020Constants, i, j)
|
||||
#define U410ToAB30(a, b, c, d, e, f, g, h, i, j) \
|
||||
I410ToAB30Matrix(a, b, c, d, e, f, g, h, &kYuv2020Constants, i, j)
|
||||
|
||||
// TODO(fbarchard): Fix clamping issue affected by U channel.
|
||||
#define TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
|
||||
ALIGN, YALIGN, W1280, N, NEG, SOFF, DOFF, S_DEPTH) \
|
||||
#define TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, \
|
||||
BPP_B, ALIGN, YALIGN, W1280, N, NEG, SOFF, DOFF) \
|
||||
TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \
|
||||
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
|
||||
const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \
|
||||
@ -3183,14 +3214,11 @@ TEST_F(LibYUVConvertTest, ABGRToAR30Row_Opt) {
|
||||
align_buffer_page_end(dst_argb_c, kStrideB* kHeight + DOFF); \
|
||||
align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + DOFF); \
|
||||
for (int i = 0; i < kWidth * kHeight; ++i) { \
|
||||
reinterpret_cast<uint16_t*>(src_y + SOFF)[i] = \
|
||||
(fastrand() & ((1 << S_DEPTH) - 1)); \
|
||||
reinterpret_cast<uint16_t*>(src_y + SOFF)[i] = (fastrand() & FMT_MASK); \
|
||||
} \
|
||||
for (int i = 0; i < kSizeUV; ++i) { \
|
||||
reinterpret_cast<uint16_t*>(src_u + SOFF)[i] = \
|
||||
(fastrand() & ((1 << S_DEPTH) - 1)); \
|
||||
reinterpret_cast<uint16_t*>(src_v + SOFF)[i] = \
|
||||
(fastrand() & ((1 << S_DEPTH) - 1)); \
|
||||
reinterpret_cast<uint16_t*>(src_u + SOFF)[i] = (fastrand() & FMT_MASK); \
|
||||
reinterpret_cast<uint16_t*>(src_v + SOFF)[i] = (fastrand() & FMT_MASK); \
|
||||
} \
|
||||
memset(dst_argb_c + DOFF, 1, kStrideB * kHeight); \
|
||||
memset(dst_argb_opt + DOFF, 101, kStrideB * kHeight); \
|
||||
@ -3218,79 +3246,57 @@ TEST_F(LibYUVConvertTest, ABGRToAR30Row_Opt) {
|
||||
free_aligned_buffer_page_end(dst_argb_opt); \
|
||||
}
|
||||
|
||||
#define TESTPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
|
||||
YALIGN, S_DEPTH) \
|
||||
TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
|
||||
YALIGN, benchmark_width_ - 4, _Any, +, 0, 0, S_DEPTH) \
|
||||
TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
|
||||
YALIGN, benchmark_width_, _Unaligned, +, 1, 1, S_DEPTH) \
|
||||
TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
|
||||
YALIGN, benchmark_width_, _Invert, -, 0, 0, S_DEPTH) \
|
||||
TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
|
||||
YALIGN, benchmark_width_, _Opt, +, 0, 0, S_DEPTH)
|
||||
#define TESTPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, \
|
||||
BPP_B, ALIGN, YALIGN) \
|
||||
TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, BPP_B, \
|
||||
ALIGN, YALIGN, benchmark_width_ - 4, _Any, +, 0, 0) \
|
||||
TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, BPP_B, \
|
||||
ALIGN, YALIGN, benchmark_width_, _Unaligned, +, 1, 1) \
|
||||
TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, BPP_B, \
|
||||
ALIGN, YALIGN, benchmark_width_, _Invert, -, 0, 0) \
|
||||
TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, BPP_B, \
|
||||
ALIGN, YALIGN, benchmark_width_, _Opt, +, 0, 0)
|
||||
|
||||
#define I410ToARGB(a, b, c, d, e, f, g, h, i, j) \
|
||||
I410ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j)
|
||||
#define I410ToABGR(a, b, c, d, e, f, g, h, i, j) \
|
||||
I410ToABGRMatrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j)
|
||||
#define H410ToARGB(a, b, c, d, e, f, g, h, i, j) \
|
||||
I410ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvH709Constants, i, j)
|
||||
#define H410ToABGR(a, b, c, d, e, f, g, h, i, j) \
|
||||
I410ToABGRMatrix(a, b, c, d, e, f, g, h, &kYuvH709Constants, i, j)
|
||||
#define U410ToARGB(a, b, c, d, e, f, g, h, i, j) \
|
||||
I410ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuv2020Constants, i, j)
|
||||
#define U410ToABGR(a, b, c, d, e, f, g, h, i, j) \
|
||||
I410ToABGRMatrix(a, b, c, d, e, f, g, h, &kYuv2020Constants, i, j)
|
||||
#define I410ToAR30(a, b, c, d, e, f, g, h, i, j) \
|
||||
I410ToAR30Matrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j)
|
||||
#define I410ToAB30(a, b, c, d, e, f, g, h, i, j) \
|
||||
I410ToAB30Matrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j)
|
||||
#define H410ToAR30(a, b, c, d, e, f, g, h, i, j) \
|
||||
I410ToAR30Matrix(a, b, c, d, e, f, g, h, &kYuvH709Constants, i, j)
|
||||
#define H410ToAB30(a, b, c, d, e, f, g, h, i, j) \
|
||||
I410ToAB30Matrix(a, b, c, d, e, f, g, h, &kYuvH709Constants, i, j)
|
||||
#define U410ToAR30(a, b, c, d, e, f, g, h, i, j) \
|
||||
I410ToAR30Matrix(a, b, c, d, e, f, g, h, &kYuv2020Constants, i, j)
|
||||
#define U410ToAB30(a, b, c, d, e, f, g, h, i, j) \
|
||||
I410ToAB30Matrix(a, b, c, d, e, f, g, h, &kYuv2020Constants, i, j)
|
||||
TESTPLANAR16TOB(I010, 2, 2, 0x3ff, ARGB, 4, 4, 1)
|
||||
TESTPLANAR16TOB(I010, 2, 2, 0x3ff, ABGR, 4, 4, 1)
|
||||
TESTPLANAR16TOB(H010, 2, 2, 0x3ff, ARGB, 4, 4, 1)
|
||||
TESTPLANAR16TOB(H010, 2, 2, 0x3ff, ABGR, 4, 4, 1)
|
||||
TESTPLANAR16TOB(U010, 2, 2, 0x3ff, ARGB, 4, 4, 1)
|
||||
TESTPLANAR16TOB(U010, 2, 2, 0x3ff, ABGR, 4, 4, 1)
|
||||
TESTPLANAR16TOB(I210, 2, 1, 0x3ff, ARGB, 4, 4, 1)
|
||||
TESTPLANAR16TOB(I210, 2, 1, 0x3ff, ABGR, 4, 4, 1)
|
||||
TESTPLANAR16TOB(H210, 2, 1, 0x3ff, ARGB, 4, 4, 1)
|
||||
TESTPLANAR16TOB(H210, 2, 1, 0x3ff, ABGR, 4, 4, 1)
|
||||
TESTPLANAR16TOB(U210, 2, 1, 0x3ff, ARGB, 4, 4, 1)
|
||||
TESTPLANAR16TOB(U210, 2, 1, 0x3ff, ABGR, 4, 4, 1)
|
||||
TESTPLANAR16TOB(I410, 1, 1, 0x3ff, ARGB, 4, 4, 1)
|
||||
TESTPLANAR16TOB(I410, 1, 1, 0x3ff, ABGR, 4, 4, 1)
|
||||
TESTPLANAR16TOB(H410, 1, 1, 0x3ff, ARGB, 4, 4, 1)
|
||||
TESTPLANAR16TOB(H410, 1, 1, 0x3ff, ABGR, 4, 4, 1)
|
||||
TESTPLANAR16TOB(U410, 1, 1, 0x3ff, ARGB, 4, 4, 1)
|
||||
TESTPLANAR16TOB(U410, 1, 1, 0x3ff, ABGR, 4, 4, 1)
|
||||
TESTPLANAR16TOB(I012, 2, 2, 0xfff, ARGB, 4, 4, 1)
|
||||
|
||||
TESTPLANAR16TOB(I010, 2, 2, ARGB, 4, 4, 1, 10)
|
||||
TESTPLANAR16TOB(I010, 2, 2, ABGR, 4, 4, 1, 10)
|
||||
TESTPLANAR16TOB(H010, 2, 2, ARGB, 4, 4, 1, 10)
|
||||
TESTPLANAR16TOB(H010, 2, 2, ABGR, 4, 4, 1, 10)
|
||||
TESTPLANAR16TOB(U010, 2, 2, ARGB, 4, 4, 1, 10)
|
||||
TESTPLANAR16TOB(U010, 2, 2, ABGR, 4, 4, 1, 10)
|
||||
TESTPLANAR16TOB(I210, 2, 1, ARGB, 4, 4, 1, 10)
|
||||
TESTPLANAR16TOB(I210, 2, 1, ABGR, 4, 4, 1, 10)
|
||||
TESTPLANAR16TOB(H210, 2, 1, ARGB, 4, 4, 1, 10)
|
||||
TESTPLANAR16TOB(H210, 2, 1, ABGR, 4, 4, 1, 10)
|
||||
TESTPLANAR16TOB(U210, 2, 1, ARGB, 4, 4, 1, 10)
|
||||
TESTPLANAR16TOB(U210, 2, 1, ABGR, 4, 4, 1, 10)
|
||||
TESTPLANAR16TOB(I410, 1, 1, ARGB, 4, 4, 1, 10)
|
||||
TESTPLANAR16TOB(I410, 1, 1, ABGR, 4, 4, 1, 10)
|
||||
TESTPLANAR16TOB(H410, 1, 1, ARGB, 4, 4, 1, 10)
|
||||
TESTPLANAR16TOB(H410, 1, 1, ABGR, 4, 4, 1, 10)
|
||||
TESTPLANAR16TOB(U410, 1, 1, ARGB, 4, 4, 1, 10)
|
||||
TESTPLANAR16TOB(U410, 1, 1, ABGR, 4, 4, 1, 10)
|
||||
#ifdef LITTLE_ENDIAN_ONLY_TEST
|
||||
TESTPLANAR16TOB(I010, 2, 2, AR30, 4, 4, 1, 10)
|
||||
TESTPLANAR16TOB(I010, 2, 2, AB30, 4, 4, 1, 10)
|
||||
TESTPLANAR16TOB(H010, 2, 2, AR30, 4, 4, 1, 10)
|
||||
TESTPLANAR16TOB(H010, 2, 2, AB30, 4, 4, 1, 10)
|
||||
TESTPLANAR16TOB(U010, 2, 2, AR30, 4, 4, 1, 10)
|
||||
TESTPLANAR16TOB(U010, 2, 2, AB30, 4, 4, 1, 10)
|
||||
TESTPLANAR16TOB(I210, 2, 1, AR30, 4, 4, 1, 10)
|
||||
TESTPLANAR16TOB(I210, 2, 1, AB30, 4, 4, 1, 10)
|
||||
TESTPLANAR16TOB(H210, 2, 1, AR30, 4, 4, 1, 10)
|
||||
TESTPLANAR16TOB(H210, 2, 1, AB30, 4, 4, 1, 10)
|
||||
TESTPLANAR16TOB(U210, 2, 1, AR30, 4, 4, 1, 10)
|
||||
TESTPLANAR16TOB(U210, 2, 1, AB30, 4, 4, 1, 10)
|
||||
TESTPLANAR16TOB(I410, 1, 1, AR30, 4, 4, 1, 10)
|
||||
TESTPLANAR16TOB(I410, 1, 1, AB30, 4, 4, 1, 10)
|
||||
TESTPLANAR16TOB(H410, 1, 1, AR30, 4, 4, 1, 10)
|
||||
TESTPLANAR16TOB(H410, 1, 1, AB30, 4, 4, 1, 10)
|
||||
TESTPLANAR16TOB(U410, 1, 1, AR30, 4, 4, 1, 10)
|
||||
TESTPLANAR16TOB(U410, 1, 1, AB30, 4, 4, 1, 10)
|
||||
TESTPLANAR16TOB(I010, 2, 2, 0x3ff, AR30, 4, 4, 1)
|
||||
TESTPLANAR16TOB(I010, 2, 2, 0x3ff, AB30, 4, 4, 1)
|
||||
TESTPLANAR16TOB(H010, 2, 2, 0x3ff, AR30, 4, 4, 1)
|
||||
TESTPLANAR16TOB(H010, 2, 2, 0x3ff, AB30, 4, 4, 1)
|
||||
TESTPLANAR16TOB(U010, 2, 2, 0x3ff, AR30, 4, 4, 1)
|
||||
TESTPLANAR16TOB(U010, 2, 2, 0x3ff, AB30, 4, 4, 1)
|
||||
TESTPLANAR16TOB(I210, 2, 1, 0x3ff, AR30, 4, 4, 1)
|
||||
TESTPLANAR16TOB(I210, 2, 1, 0x3ff, AB30, 4, 4, 1)
|
||||
TESTPLANAR16TOB(H210, 2, 1, 0x3ff, AR30, 4, 4, 1)
|
||||
TESTPLANAR16TOB(H210, 2, 1, 0x3ff, AB30, 4, 4, 1)
|
||||
TESTPLANAR16TOB(U210, 2, 1, 0x3ff, AR30, 4, 4, 1)
|
||||
TESTPLANAR16TOB(U210, 2, 1, 0x3ff, AB30, 4, 4, 1)
|
||||
TESTPLANAR16TOB(I410, 1, 1, 0x3ff, AR30, 4, 4, 1)
|
||||
TESTPLANAR16TOB(I410, 1, 1, 0x3ff, AB30, 4, 4, 1)
|
||||
TESTPLANAR16TOB(H410, 1, 1, 0x3ff, AR30, 4, 4, 1)
|
||||
TESTPLANAR16TOB(H410, 1, 1, 0x3ff, AB30, 4, 4, 1)
|
||||
TESTPLANAR16TOB(U410, 1, 1, 0x3ff, AR30, 4, 4, 1)
|
||||
TESTPLANAR16TOB(U410, 1, 1, 0x3ff, AB30, 4, 4, 1)
|
||||
TESTPLANAR16TOB(I012, 2, 2, 0xfff, AR30, 4, 4, 1)
|
||||
#endif
|
||||
|
||||
#define TESTQPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
|
||||
|
||||
@ -26,9 +26,13 @@ unsigned int fastrand_seed = 0xfb;
|
||||
ABSL_FLAG(int32_t, libyuv_width, 0, "width of test image.");
|
||||
ABSL_FLAG(int32_t, libyuv_height, 0, "height of test image.");
|
||||
ABSL_FLAG(int32_t, libyuv_repeat, 0, "number of times to repeat test.");
|
||||
ABSL_FLAG(int32_t, libyuv_flags, 0,
|
||||
ABSL_FLAG(int32_t,
|
||||
libyuv_flags,
|
||||
0,
|
||||
"cpu flags for reference code. 1 = C, -1 = SIMD");
|
||||
ABSL_FLAG(int32_t, libyuv_cpu_info, 0,
|
||||
ABSL_FLAG(int32_t,
|
||||
libyuv_cpu_info,
|
||||
0,
|
||||
"cpu flags for benchmark code. 1 = C, -1 = SIMD");
|
||||
#else
|
||||
// Disable command line parameters if absl/flags disabled.
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user