Add more 10 bit YUV To RGB function

The following functions are added:
planar YUV:
 I410ToAR30, I410ToARGB
planar YUVA:
 I010AlphaToARGB, I210AlphaToARGB, I410AlphaToARGB
biplanar YUV:
 P010ToARGB, P210ToARGB
 P010ToAR30, P210ToAR30

biplanar functions can also handle 12 bit and 16 bit samples.

libyuv_unittest --gtest_filter=LibYUVConvertTest.*10*ToA*:LibYUVConvertTest.*P?1?ToA*

R=fbarchard@chromium.org

Bug: libyuv:751, libyuv:844
Change-Id: I2be02244dfa23335e1e7bc241fb0613990208de5
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/2707003
Reviewed-by: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
Yuan Tong 2021-03-03 11:22:32 +08:00 committed by Frank Barchard
parent c41eabe3d4
commit cdabad5bfa
9 changed files with 2700 additions and 254 deletions

View File

@ -1,6 +1,6 @@
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 1779
Version: 1780
License: BSD
License File: LICENSE

View File

@ -54,12 +54,30 @@ LIBYUV_API extern const struct YuvConstants kYvuV2020Constants; // BT.2020 full
NV21ToRGB24Matrix(a, b, c, d, e, f, g##VU, h, i)
#define NV21ToRAWMatrix(a, b, c, d, e, f, g, h, i) \
NV12ToRGB24Matrix(a, b, c, d, e, f, g##VU, h, i)
#define I010ToABGRMatrix(a, b, c, d, e, f, g, h, i, j, k) \
I010ToARGBMatrix(a, b, e, f, c, d, g, h, i##VU, j, k)
#define I210ToABGRMatrix(a, b, c, d, e, f, g, h, i, j, k) \
I210ToARGBMatrix(a, b, e, f, c, d, g, h, i##VU, j, k)
#define I410ToABGRMatrix(a, b, c, d, e, f, g, h, i, j, k) \
I410ToARGBMatrix(a, b, e, f, c, d, g, h, i##VU, j, k)
#define I010ToAB30Matrix(a, b, c, d, e, f, g, h, i, j, k) \
I010ToAR30Matrix(a, b, e, f, c, d, g, h, i##VU, j, k)
#define I210ToAB30Matrix(a, b, c, d, e, f, g, h, i, j, k) \
I210ToAR30Matrix(a, b, e, f, c, d, g, h, i##VU, j, k)
#define I410ToAB30Matrix(a, b, c, d, e, f, g, h, i, j, k) \
I410ToAR30Matrix(a, b, e, f, c, d, g, h, i##VU, j, k)
#define I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, k, l, m, n) \
I420AlphaToARGBMatrix(a, b, e, f, c, d, g, h, i, j, k##VU, l, m, n)
#define I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, k, l, m, n) \
I422AlphaToARGBMatrix(a, b, e, f, c, d, g, h, i, j, k##VU, l, m, n)
#define I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, k, l, m, n) \
I444AlphaToARGBMatrix(a, b, e, f, c, d, g, h, i, j, k##VU, l, m, n)
#define I010AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, k, l, m, n) \
I010AlphaToARGBMatrix(a, b, e, f, c, d, g, h, i, j, k##VU, l, m, n)
#define I210AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, k, l, m, n) \
I210AlphaToARGBMatrix(a, b, e, f, c, d, g, h, i, j, k##VU, l, m, n)
#define I410AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, k, l, m, n) \
I410AlphaToARGBMatrix(a, b, e, f, c, d, g, h, i, j, k##VU, l, m, n)
// Alias.
#define ARGBToARGB ARGBCopy
@ -125,32 +143,6 @@ int J420ToABGR(const uint8_t* src_y,
int width,
int height);
// Convert F420 to ARGB. BT.709 full range
LIBYUV_API
int F420ToARGB(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert F420 to ABGR. BT.709 full range
LIBYUV_API
int F420ToABGR(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height);
// Convert H420 to ARGB.
LIBYUV_API
int H420ToARGB(const uint8_t* src_y,
@ -1440,7 +1432,7 @@ int I444ToARGBMatrix(const uint8_t* src_y,
int width,
int height);
// multiply 10 bit yuv into high bits to allow any number of bits.
// Convert 10 bit 420 YUV to ARGB with matrix.
LIBYUV_API
int I010ToAR30Matrix(const uint16_t* src_y,
int src_stride_y,
@ -1454,7 +1446,7 @@ int I010ToAR30Matrix(const uint16_t* src_y,
int width,
int height);
// multiply 10 bit yuv into high bits to allow any number of bits.
// Convert 10 bit 420 YUV to ARGB with matrix.
LIBYUV_API
int I210ToAR30Matrix(const uint16_t* src_y,
int src_stride_y,
@ -1468,6 +1460,20 @@ int I210ToAR30Matrix(const uint16_t* src_y,
int width,
int height);
// Convert 10 bit 444 YUV to ARGB with matrix.
LIBYUV_API
int I410ToAR30Matrix(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_ar30,
int dst_stride_ar30,
const struct YuvConstants* yuvconstants,
int width,
int height);
// Convert 10 bit YUV to ARGB with matrix.
LIBYUV_API
int I010ToARGBMatrix(const uint16_t* src_y,
@ -1496,6 +1502,87 @@ int I210ToARGBMatrix(const uint16_t* src_y,
int width,
int height);
// Convert 10 bit 444 YUV to ARGB with matrix.
LIBYUV_API
int I410ToARGBMatrix(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height);
// Convert P010 to ARGB with matrix.
LIBYUV_API
int P010ToARGBMatrix(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_uv,
int src_stride_uv,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height);
// Convert P210 to ARGB with matrix.
LIBYUV_API
int P210ToARGBMatrix(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_uv,
int src_stride_uv,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height);
// Convert P010 to AR30 with matrix.
LIBYUV_API
int P010ToAR30Matrix(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_uv,
int src_stride_uv,
uint8_t* dst_ar30,
int dst_stride_ar30,
const struct YuvConstants* yuvconstants,
int width,
int height);
// Convert P210 to AR30 with matrix.
LIBYUV_API
int P210ToAR30Matrix(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_uv,
int src_stride_uv,
uint8_t* dst_ar30,
int dst_stride_ar30,
const struct YuvConstants* yuvconstants,
int width,
int height);
// P012 and P010 use most significant bits so the conversion is the same.
// Convert P012 to ARGB with matrix.
#define P012ToARGBMatrix P010ToARGBMatrix
// Convert P012 to AR30 with matrix.
#define P012ToAR30Matrix P010ToAR30Matrix
// Convert P212 to ARGB with matrix.
#define P212ToARGBMatrix P210ToARGBMatrix
// Convert P212 to AR30 with matrix.
#define P212ToAR30Matrix P210ToAR30Matrix
// Convert P016 to ARGB with matrix.
#define P016ToARGBMatrix P010ToARGBMatrix
// Convert P016 to AR30 with matrix.
#define P016ToAR30Matrix P010ToAR30Matrix
// Convert P216 to ARGB with matrix.
#define P216ToARGBMatrix P210ToARGBMatrix
// Convert P216 to AR30 with matrix.
#define P216ToAR30Matrix P210ToAR30Matrix
// Convert I420 with Alpha to preattenuated ARGB with matrix.
LIBYUV_API
int I420AlphaToARGBMatrix(const uint8_t* src_y,
@ -1547,6 +1634,57 @@ int I444AlphaToARGBMatrix(const uint8_t* src_y,
int height,
int attenuate);
// Convert I010 with Alpha to preattenuated ARGB with matrix.
LIBYUV_API
int I010AlphaToARGBMatrix(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
const uint16_t* src_a,
int src_stride_a,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height,
int attenuate);
// Convert I210 with Alpha to preattenuated ARGB with matrix.
LIBYUV_API
int I210AlphaToARGBMatrix(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
const uint16_t* src_a,
int src_stride_a,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height,
int attenuate);
// Convert I410 with Alpha to preattenuated ARGB with matrix.
LIBYUV_API
int I410AlphaToARGBMatrix(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
const uint16_t* src_a,
int src_stride_a,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height,
int attenuate);
// Convert NV12 to ARGB with matrix.
LIBYUV_API
int NV12ToARGBMatrix(const uint8_t* src_y,

View File

@ -175,8 +175,10 @@ extern "C" {
defined(_MSC_VER)
// TODO(fbarchard): fix build error on android_full_debug=1
// https://code.google.com/p/libyuv/issues/detail?id=517
#define HAS_I444ALPHATOARGBROW_SSSE3
#define HAS_I210ALPHATOARGBROW_SSSE3
#define HAS_I410ALPHATOARGBROW_SSSE3
#define HAS_I422ALPHATOARGBROW_SSSE3
#define HAS_I444ALPHATOARGBROW_SSSE3
#endif
#endif
@ -240,8 +242,10 @@ extern "C" {
defined(_MSC_VER)
// TODO(fbarchard): fix build error on android_full_debug=1
// https://code.google.com/p/libyuv/issues/detail?id=517
#define HAS_I444ALPHATOARGBROW_AVX2
#define HAS_I210ALPHATOARGBROW_AVX2
#define HAS_I410ALPHATOARGBROW_AVX2
#define HAS_I422ALPHATOARGBROW_AVX2
#define HAS_I444ALPHATOARGBROW_AVX2
#endif
#endif
@ -280,9 +284,15 @@ extern "C" {
#define HAS_I210TOARGBROW_SSSE3
#define HAS_I400TOARGBROW_SSE2
#define HAS_I422TOAR30ROW_SSSE3
#define HAS_I410TOAR30ROW_SSSE3
#define HAS_I410TOARGBROW_SSSE3
#define HAS_MERGEARGBROW_SSE2
#define HAS_MERGERGBROW_SSSE3
#define HAS_MIRRORUVROW_SSSE3
#define HAS_P210TOAR30ROW_SSSE3
#define HAS_P210TOARGBROW_SSSE3
#define HAS_P410TOAR30ROW_SSSE3
#define HAS_P410TOARGBROW_SSSE3
#define HAS_RAWTORGBAROW_SSSE3
#define HAS_RGB24MIRRORROW_SSSE3
#define HAS_RGBATOYJROW_SSSE3
@ -311,6 +321,12 @@ extern "C" {
#define HAS_I210TOAR30ROW_AVX2
#define HAS_I210TOARGBROW_AVX2
#define HAS_I400TOARGBROW_AVX2
#define HAS_I410TOAR30ROW_AVX2
#define HAS_I410TOARGBROW_AVX2
#define HAS_P210TOAR30ROW_AVX2
#define HAS_P210TOARGBROW_AVX2
#define HAS_P410TOAR30ROW_AVX2
#define HAS_P410TOARGBROW_AVX2
#define HAS_I422TOAR30ROW_AVX2
#define HAS_I422TOUYVYROW_AVX2
#define HAS_I422TOYUY2ROW_AVX2
@ -2575,6 +2591,33 @@ void I210ToARGBRow_C(const uint16_t* src_y,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width);
void I410ToAR30Row_C(const uint16_t* src_y,
const uint16_t* src_u,
const uint16_t* src_v,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width);
void I410ToARGBRow_C(const uint16_t* src_y,
const uint16_t* src_u,
const uint16_t* src_v,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width);
void I210AlphaToARGBRow_C(const uint16_t* src_y,
const uint16_t* src_u,
const uint16_t* src_v,
const uint16_t* src_a,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width);
void I410AlphaToARGBRow_C(const uint16_t* src_y,
const uint16_t* src_u,
const uint16_t* src_v,
const uint16_t* src_a,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width);
void I444AlphaToARGBRow_C(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
@ -2626,6 +2669,27 @@ void UYVYToARGBRow_C(const uint8_t* src_uyvy,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width);
void P210ToARGBRow_C(const uint16_t* y_buf,
const uint16_t* uv_buf,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void P410ToARGBRow_C(const uint16_t* y_buf,
const uint16_t* uv_buf,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void P210ToAR30Row_C(const uint16_t* y_buf,
const uint16_t* uv_buf,
uint8_t* dst_ar30,
const struct YuvConstants* yuvconstants,
int width);
void P410ToAR30Row_C(const uint16_t* y_buf,
const uint16_t* uv_buf,
uint8_t* dst_ar30,
const struct YuvConstants* yuvconstants,
int width);
void I422ToRGBARow_C(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
@ -2705,6 +2769,32 @@ void I210ToARGBRow_SSSE3(const uint16_t* y_buf,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I410ToAR30Row_SSSE3(const uint16_t* src_y,
const uint16_t* src_u,
const uint16_t* src_v,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width);
void I410ToARGBRow_SSSE3(const uint16_t* src_y,
const uint16_t* src_u,
const uint16_t* src_v,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width);
void I210AlphaToARGBRow_SSSE3(const uint16_t* src_y,
const uint16_t* src_u,
const uint16_t* src_v,
const uint16_t* src_a,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width);
void I410AlphaToARGBRow_SSSE3(const uint16_t* src_y,
const uint16_t* src_u,
const uint16_t* src_v,
const uint16_t* src_a,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width);
void I422ToAR30Row_AVX2(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
@ -2723,6 +2813,32 @@ void I210ToAR30Row_AVX2(const uint16_t* y_buf,
uint8_t* dst_ar30,
const struct YuvConstants* yuvconstants,
int width);
void I410ToAR30Row_AVX2(const uint16_t* src_y,
const uint16_t* src_u,
const uint16_t* src_v,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width);
void I410ToARGBRow_AVX2(const uint16_t* src_y,
const uint16_t* src_u,
const uint16_t* src_v,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width);
void I210AlphaToARGBRow_AVX2(const uint16_t* src_y,
const uint16_t* src_u,
const uint16_t* src_v,
const uint16_t* src_a,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width);
void I410AlphaToARGBRow_AVX2(const uint16_t* src_y,
const uint16_t* src_u,
const uint16_t* src_v,
const uint16_t* src_a,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width);
void I444AlphaToARGBRow_SSSE3(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
@ -2821,6 +2937,48 @@ void UYVYToARGBRow_AVX2(const uint8_t* uyvy_buf,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void P210ToARGBRow_SSSE3(const uint16_t* y_buf,
const uint16_t* uv_buf,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void P410ToARGBRow_SSSE3(const uint16_t* y_buf,
const uint16_t* uv_buf,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void P210ToAR30Row_SSSE3(const uint16_t* y_buf,
const uint16_t* uv_buf,
uint8_t* dst_ar30,
const struct YuvConstants* yuvconstants,
int width);
void P410ToAR30Row_SSSE3(const uint16_t* y_buf,
const uint16_t* uv_buf,
uint8_t* dst_ar30,
const struct YuvConstants* yuvconstants,
int width);
void P210ToARGBRow_AVX2(const uint16_t* y_buf,
const uint16_t* uv_buf,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void P410ToARGBRow_AVX2(const uint16_t* y_buf,
const uint16_t* uv_buf,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void P210ToAR30Row_AVX2(const uint16_t* y_buf,
const uint16_t* uv_buf,
uint8_t* dst_ar30,
const struct YuvConstants* yuvconstants,
int width);
void P410ToAR30Row_AVX2(const uint16_t* y_buf,
const uint16_t* uv_buf,
uint8_t* dst_ar30,
const struct YuvConstants* yuvconstants,
int width);
void I422ToRGBARow_SSSE3(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
@ -2923,6 +3081,32 @@ void I210ToARGBRow_Any_SSSE3(const uint16_t* y_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
void I410ToAR30Row_Any_SSSE3(const uint16_t* src_y,
const uint16_t* src_u,
const uint16_t* src_v,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width);
void I410ToARGBRow_Any_SSSE3(const uint16_t* src_y,
const uint16_t* src_u,
const uint16_t* src_v,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width);
void I210AlphaToARGBRow_Any_SSSE3(const uint16_t* src_y,
const uint16_t* src_u,
const uint16_t* src_v,
const uint16_t* src_a,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width);
void I410AlphaToARGBRow_Any_SSSE3(const uint16_t* src_y,
const uint16_t* src_u,
const uint16_t* src_v,
const uint16_t* src_a,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width);
void I422ToAR30Row_Any_AVX2(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
@ -2941,6 +3125,32 @@ void I210ToAR30Row_Any_AVX2(const uint16_t* y_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
void I410ToAR30Row_Any_AVX2(const uint16_t* src_y,
const uint16_t* src_u,
const uint16_t* src_v,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width);
void I410ToARGBRow_Any_AVX2(const uint16_t* src_y,
const uint16_t* src_u,
const uint16_t* src_v,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width);
void I210AlphaToARGBRow_Any_AVX2(const uint16_t* src_y,
const uint16_t* src_u,
const uint16_t* src_v,
const uint16_t* src_a,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width);
void I410AlphaToARGBRow_Any_AVX2(const uint16_t* src_y,
const uint16_t* src_u,
const uint16_t* src_v,
const uint16_t* src_a,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width);
void I444AlphaToARGBRow_Any_SSSE3(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
@ -3039,6 +3249,46 @@ void UYVYToARGBRow_Any_AVX2(const uint8_t* src_ptr,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
void P210ToARGBRow_Any_SSSE3(const uint16_t* y_buf,
const uint16_t* uv_buf,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void P410ToARGBRow_Any_SSSE3(const uint16_t* y_buf,
const uint16_t* uv_buf,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void P210ToAR30Row_Any_SSSE3(const uint16_t* y_buf,
const uint16_t* uv_buf,
uint8_t* dst_ar30,
const struct YuvConstants* yuvconstants,
int width);
void P410ToAR30Row_Any_SSSE3(const uint16_t* y_buf,
const uint16_t* uv_buf,
uint8_t* dst_ar30,
const struct YuvConstants* yuvconstants,
int width);
void P210ToARGBRow_Any_AVX2(const uint16_t* y_buf,
const uint16_t* uv_buf,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void P410ToARGBRow_Any_AVX2(const uint16_t* y_buf,
const uint16_t* uv_buf,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void P210ToAR30Row_Any_AVX2(const uint16_t* y_buf,
const uint16_t* uv_buf,
uint8_t* dst_ar30,
const struct YuvConstants* yuvconstants,
int width);
void P410ToAR30Row_Any_AVX2(const uint16_t* y_buf,
const uint16_t* uv_buf,
uint8_t* dst_ar30,
const struct YuvConstants* yuvconstants,
int width);
void I422ToRGBARow_Any_SSSE3(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
@ -3537,6 +3787,46 @@ void UYVYToARGBRow_Any_NEON(const uint8_t* src_ptr,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
void P210ToARGBRow_NEON(const uint16_t* y_buf,
const uint16_t* uv_buf,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void P410ToARGBRow_NEON(const uint16_t* y_buf,
const uint16_t* uv_buf,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void P210ToAR30Row_NEON(const uint16_t* y_buf,
const uint16_t* uv_buf,
uint8_t* dst_ar30,
const struct YuvConstants* yuvconstants,
int width);
void P410ToAR30Row_NEON(const uint16_t* y_buf,
const uint16_t* uv_buf,
uint8_t* dst_ar30,
const struct YuvConstants* yuvconstants,
int width);
void P210ToARGBRow_Any_NEON(const uint16_t* y_buf,
const uint16_t* uv_buf,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void P410ToARGBRow_Any_NEON(const uint16_t* y_buf,
const uint16_t* uv_buf,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void P210ToAR30Row_Any_NEON(const uint16_t* y_buf,
const uint16_t* uv_buf,
uint8_t* dst_ar30,
const struct YuvConstants* yuvconstants,
int width);
void P410ToAR30Row_Any_NEON(const uint16_t* y_buf,
const uint16_t* uv_buf,
uint8_t* dst_ar30,
const struct YuvConstants* yuvconstants,
int width);
void I444ToARGBRow_Any_MSA(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,

View File

@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1779
#define LIBYUV_VERSION 1780
#endif // INCLUDE_LIBYUV_VERSION_H_

View File

@ -1045,6 +1045,58 @@ int U210ToAB30(const uint16_t* src_y,
&kYuv2020Constants, width, height);
}
LIBYUV_API
int I410ToAR30Matrix(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_ar30,
int dst_stride_ar30,
const struct YuvConstants* yuvconstants,
int width,
int height) {
int y;
void (*I410ToAR30Row)(const uint16_t* y_buf, const uint16_t* u_buf,
const uint16_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I410ToAR30Row_C;
if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_ar30 = dst_ar30 + (height - 1) * dst_stride_ar30;
dst_stride_ar30 = -dst_stride_ar30;
}
#if defined(HAS_I410TOAR30ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I410ToAR30Row = I410ToAR30Row_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
I410ToAR30Row = I410ToAR30Row_SSSE3;
}
}
#endif
#if defined(HAS_I410TOAR30ROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
I410ToAR30Row = I410ToAR30Row_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
I410ToAR30Row = I410ToAR30Row_AVX2;
}
}
#endif
for (y = 0; y < height; ++y) {
I410ToAR30Row(src_y, src_u, src_v, dst_ar30, yuvconstants, width);
dst_ar30 += dst_stride_ar30;
src_y += src_stride_y;
src_u += src_stride_u;
src_v += src_stride_v;
}
return 0;
}
// Convert 10 bit YUV to ARGB with matrix.
LIBYUV_API
int I010ToARGBMatrix(const uint16_t* src_y,
@ -1087,14 +1139,6 @@ int I010ToARGBMatrix(const uint16_t* src_y,
I210ToARGBRow = I210ToARGBRow_AVX2;
}
}
#endif
#if defined(HAS_I210TOARGBROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
I210ToARGBRow = I210ToARGBRow_Any_MMI;
if (IS_ALIGNED(width, 4)) {
I210ToARGBRow = I210ToARGBRow_MMI;
}
}
#endif
for (y = 0; y < height; ++y) {
I210ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width);
@ -1258,14 +1302,6 @@ int I210ToARGBMatrix(const uint16_t* src_y,
I210ToARGBRow = I210ToARGBRow_AVX2;
}
}
#endif
#if defined(HAS_I210TOARGBROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
I210ToARGBRow = I210ToARGBRow_Any_MMI;
if (IS_ALIGNED(width, 4)) {
I210ToARGBRow = I210ToARGBRow_MMI;
}
}
#endif
for (y = 0; y < height; ++y) {
I210ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width);
@ -1385,6 +1421,254 @@ int U210ToABGR(const uint16_t* src_y,
width, height);
}
LIBYUV_API
int I410ToARGBMatrix(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height) {
int y;
void (*I410ToARGBRow)(const uint16_t* y_buf, const uint16_t* u_buf,
const uint16_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I410ToARGBRow_C;
if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
#if defined(HAS_I410TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I410ToARGBRow = I410ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
I410ToARGBRow = I410ToARGBRow_SSSE3;
}
}
#endif
#if defined(HAS_I410TOARGBROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
I410ToARGBRow = I410ToARGBRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
I410ToARGBRow = I410ToARGBRow_AVX2;
}
}
#endif
for (y = 0; y < height; ++y) {
I410ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
src_u += src_stride_u;
src_v += src_stride_v;
}
return 0;
}
LIBYUV_API
int P010ToARGBMatrix(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_uv,
int src_stride_uv,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height) {
int y;
void (*P210ToARGBRow)(const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
P210ToARGBRow_C;
if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
#if defined(HAS_P210TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
P210ToARGBRow = P210ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
P210ToARGBRow = P210ToARGBRow_SSSE3;
}
}
#endif
#if defined(HAS_P210TOARGBROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
P210ToARGBRow = P210ToARGBRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
P210ToARGBRow = P210ToARGBRow_AVX2;
}
}
#endif
for (y = 0; y < height; ++y) {
P210ToARGBRow(src_y, src_uv, dst_argb, yuvconstants, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
if (y & 1) {
src_uv += src_stride_uv;
}
}
return 0;
}
LIBYUV_API
int P210ToARGBMatrix(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_uv,
int src_stride_uv,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height) {
int y;
void (*P210ToARGBRow)(const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
P210ToARGBRow_C;
if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
#if defined(HAS_P210TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
P210ToARGBRow = P210ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
P210ToARGBRow = P210ToARGBRow_SSSE3;
}
}
#endif
#if defined(HAS_P210TOARGBROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
P210ToARGBRow = P210ToARGBRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
P210ToARGBRow = P210ToARGBRow_AVX2;
}
}
#endif
for (y = 0; y < height; ++y) {
P210ToARGBRow(src_y, src_uv, dst_argb, yuvconstants, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
src_uv += src_stride_uv;
}
return 0;
}
LIBYUV_API
int P010ToAR30Matrix(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_uv,
int src_stride_uv,
uint8_t* dst_ar30,
int dst_stride_ar30,
const struct YuvConstants* yuvconstants,
int width,
int height) {
int y;
void (*P210ToAR30Row)(const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
P210ToAR30Row_C;
if (!src_y || !src_uv || !dst_ar30 || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_ar30 = dst_ar30 + (height - 1) * dst_stride_ar30;
dst_stride_ar30 = -dst_stride_ar30;
}
#if defined(HAS_P210TOAR30ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
P210ToAR30Row = P210ToAR30Row_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
P210ToAR30Row = P210ToAR30Row_SSSE3;
}
}
#endif
#if defined(HAS_P210TOAR30ROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
P210ToAR30Row = P210ToAR30Row_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
P210ToAR30Row = P210ToAR30Row_AVX2;
}
}
#endif
for (y = 0; y < height; ++y) {
P210ToAR30Row(src_y, src_uv, dst_ar30, yuvconstants, width);
dst_ar30 += dst_stride_ar30;
src_y += src_stride_y;
if (y & 1) {
src_uv += src_stride_uv;
}
}
return 0;
}
LIBYUV_API
int P210ToAR30Matrix(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_uv,
int src_stride_uv,
uint8_t* dst_ar30,
int dst_stride_ar30,
const struct YuvConstants* yuvconstants,
int width,
int height) {
int y;
void (*P210ToAR30Row)(const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
P210ToAR30Row_C;
if (!src_y || !src_uv || !dst_ar30 || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_ar30 = dst_ar30 + (height - 1) * dst_stride_ar30;
dst_stride_ar30 = -dst_stride_ar30;
}
#if defined(HAS_P210TOAR30ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
P210ToAR30Row = P210ToAR30Row_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
P210ToAR30Row = P210ToAR30Row_SSSE3;
}
}
#endif
#if defined(HAS_P210TOAR30ROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
P210ToAR30Row = P210ToAR30Row_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
P210ToAR30Row = P210ToAR30Row_AVX2;
}
}
#endif
for (y = 0; y < height; ++y) {
P210ToAR30Row(src_y, src_uv, dst_ar30, yuvconstants, width);
dst_ar30 += dst_stride_ar30;
src_y += src_stride_y;
src_uv += src_stride_uv;
}
return 0;
}
// Convert I420 with Alpha to preattenuated ARGB with matrix.
LIBYUV_API
int I420AlphaToARGBMatrix(const uint8_t* src_y,
@ -1903,6 +2187,323 @@ int I444AlphaToABGR(const uint8_t* src_y,
width, height, attenuate);
}
// Convert I010 with Alpha to preattenuated ARGB with matrix.
LIBYUV_API
int I010AlphaToARGBMatrix(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
const uint16_t* src_a,
int src_stride_a,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height,
int attenuate) {
int y;
void (*I210AlphaToARGBRow)(const uint16_t* y_buf, const uint16_t* u_buf,
const uint16_t* v_buf, const uint16_t* a_buf,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) = I210AlphaToARGBRow_C;
void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
int width) = ARGBAttenuateRow_C;
if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
#if defined(HAS_I210ALPHATOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I210AlphaToARGBRow = I210AlphaToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
I210AlphaToARGBRow = I210AlphaToARGBRow_SSSE3;
}
}
#endif
#if defined(HAS_I210ALPHATOARGBROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
I210AlphaToARGBRow = I210AlphaToARGBRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
I210AlphaToARGBRow = I210AlphaToARGBRow_AVX2;
}
}
#endif
#if defined(HAS_ARGBATTENUATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3;
if (IS_ALIGNED(width, 4)) {
ARGBAttenuateRow = ARGBAttenuateRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBATTENUATEROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBAttenuateRow = ARGBAttenuateRow_Any_AVX2;
if (IS_ALIGNED(width, 8)) {
ARGBAttenuateRow = ARGBAttenuateRow_AVX2;
}
}
#endif
#if defined(HAS_ARGBATTENUATEROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBAttenuateRow = ARGBAttenuateRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBAttenuateRow = ARGBAttenuateRow_NEON;
}
}
#endif
#if defined(HAS_ARGBATTENUATEROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBAttenuateRow = ARGBAttenuateRow_Any_MMI;
if (IS_ALIGNED(width, 2)) {
ARGBAttenuateRow = ARGBAttenuateRow_MMI;
}
}
#endif
#if defined(HAS_ARGBATTENUATEROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ARGBAttenuateRow = ARGBAttenuateRow_Any_MSA;
if (IS_ALIGNED(width, 8)) {
ARGBAttenuateRow = ARGBAttenuateRow_MSA;
}
}
#endif
for (y = 0; y < height; ++y) {
I210AlphaToARGBRow(src_y, src_u, src_v, src_a, dst_argb, yuvconstants,
width);
if (attenuate) {
ARGBAttenuateRow(dst_argb, dst_argb, width);
}
dst_argb += dst_stride_argb;
src_a += src_stride_a;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
return 0;
}
// Convert I210 with Alpha to preattenuated ARGB with matrix.
LIBYUV_API
int I210AlphaToARGBMatrix(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
const uint16_t* src_a,
int src_stride_a,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height,
int attenuate) {
int y;
void (*I210AlphaToARGBRow)(const uint16_t* y_buf, const uint16_t* u_buf,
const uint16_t* v_buf, const uint16_t* a_buf,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) = I210AlphaToARGBRow_C;
void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
int width) = ARGBAttenuateRow_C;
if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
#if defined(HAS_I210ALPHATOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I210AlphaToARGBRow = I210AlphaToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
I210AlphaToARGBRow = I210AlphaToARGBRow_SSSE3;
}
}
#endif
#if defined(HAS_I210ALPHATOARGBROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
I210AlphaToARGBRow = I210AlphaToARGBRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
I210AlphaToARGBRow = I210AlphaToARGBRow_AVX2;
}
}
#endif
#if defined(HAS_ARGBATTENUATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3;
if (IS_ALIGNED(width, 4)) {
ARGBAttenuateRow = ARGBAttenuateRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBATTENUATEROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBAttenuateRow = ARGBAttenuateRow_Any_AVX2;
if (IS_ALIGNED(width, 8)) {
ARGBAttenuateRow = ARGBAttenuateRow_AVX2;
}
}
#endif
#if defined(HAS_ARGBATTENUATEROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBAttenuateRow = ARGBAttenuateRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBAttenuateRow = ARGBAttenuateRow_NEON;
}
}
#endif
#if defined(HAS_ARGBATTENUATEROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBAttenuateRow = ARGBAttenuateRow_Any_MMI;
if (IS_ALIGNED(width, 2)) {
ARGBAttenuateRow = ARGBAttenuateRow_MMI;
}
}
#endif
#if defined(HAS_ARGBATTENUATEROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ARGBAttenuateRow = ARGBAttenuateRow_Any_MSA;
if (IS_ALIGNED(width, 8)) {
ARGBAttenuateRow = ARGBAttenuateRow_MSA;
}
}
#endif
for (y = 0; y < height; ++y) {
I210AlphaToARGBRow(src_y, src_u, src_v, src_a, dst_argb, yuvconstants,
width);
if (attenuate) {
ARGBAttenuateRow(dst_argb, dst_argb, width);
}
dst_argb += dst_stride_argb;
src_a += src_stride_a;
src_y += src_stride_y;
src_u += src_stride_u;
src_v += src_stride_v;
}
return 0;
}
// Convert I410 with Alpha to preattenuated ARGB with matrix.
LIBYUV_API
int I410AlphaToARGBMatrix(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
const uint16_t* src_a,
int src_stride_a,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height,
int attenuate) {
int y;
void (*I410AlphaToARGBRow)(const uint16_t* y_buf, const uint16_t* u_buf,
const uint16_t* v_buf, const uint16_t* a_buf,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) = I410AlphaToARGBRow_C;
void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
int width) = ARGBAttenuateRow_C;
if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
#if defined(HAS_I410ALPHATOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I410AlphaToARGBRow = I410AlphaToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
I410AlphaToARGBRow = I410AlphaToARGBRow_SSSE3;
}
}
#endif
#if defined(HAS_I410ALPHATOARGBROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
I410AlphaToARGBRow = I410AlphaToARGBRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
I410AlphaToARGBRow = I410AlphaToARGBRow_AVX2;
}
}
#endif
#if defined(HAS_ARGBATTENUATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3;
if (IS_ALIGNED(width, 4)) {
ARGBAttenuateRow = ARGBAttenuateRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBATTENUATEROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBAttenuateRow = ARGBAttenuateRow_Any_AVX2;
if (IS_ALIGNED(width, 8)) {
ARGBAttenuateRow = ARGBAttenuateRow_AVX2;
}
}
#endif
#if defined(HAS_ARGBATTENUATEROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBAttenuateRow = ARGBAttenuateRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBAttenuateRow = ARGBAttenuateRow_NEON;
}
}
#endif
#if defined(HAS_ARGBATTENUATEROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBAttenuateRow = ARGBAttenuateRow_Any_MMI;
if (IS_ALIGNED(width, 2)) {
ARGBAttenuateRow = ARGBAttenuateRow_MMI;
}
}
#endif
#if defined(HAS_ARGBATTENUATEROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ARGBAttenuateRow = ARGBAttenuateRow_Any_MSA;
if (IS_ALIGNED(width, 8)) {
ARGBAttenuateRow = ARGBAttenuateRow_MSA;
}
}
#endif
for (y = 0; y < height; ++y) {
I410AlphaToARGBRow(src_y, src_u, src_v, src_a, dst_argb, yuvconstants,
width);
if (attenuate) {
ARGBAttenuateRow(dst_argb, dst_argb, width);
}
dst_argb += dst_stride_argb;
src_a += src_stride_a;
src_y += src_stride_y;
src_u += src_stride_u;
src_v += src_stride_v;
}
return 0;
}
// Convert I400 to ARGB with matrix.
LIBYUV_API
int I400ToARGBMatrix(const uint8_t* src_y,

View File

@ -115,6 +115,46 @@ ANY41C(I422AlphaToARGBRow_Any_MMI, I422AlphaToARGBRow_MMI, 1, 0, 4, 7)
#endif
#undef ANY41C
// Any 4 planes to 1 plane of 8 bit with yuvconstants
#define ANY41CT(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, T, SBPP, BPP, MASK) \
void NAMEANY(const T* y_buf, const T* u_buf, const T* v_buf, const T* a_buf, \
uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, \
int width) { \
SIMD_ALIGNED(T temp[16 * 4]); \
SIMD_ALIGNED(uint8_t out[64]); \
memset(temp, 0, 16 * 4 * SBPP); /* for YUY2 and msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, yuvconstants, n); \
} \
memcpy(temp, y_buf + n, r * SBPP); \
memcpy(temp + 16, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \
memcpy(temp + 32, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \
memcpy(temp + 48, a_buf + n, r * SBPP); \
ANY_SIMD(temp, temp + 16, temp + 32, temp + 48, out, yuvconstants, \
MASK + 1); \
memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, out, SS(r, DUVSHIFT) * BPP); \
}
#ifdef HAS_I210ALPHATOARGBROW_SSSE3
ANY41CT(I210AlphaToARGBRow_Any_SSSE3, I210AlphaToARGBRow_SSSE3, 1, 0, uint16_t, 2, 4, 7)
#endif
#ifdef HAS_I210ALPHATOARGBROW_AVX2
ANY41CT(I210AlphaToARGBRow_Any_AVX2, I210AlphaToARGBRow_AVX2, 1, 0, uint16_t, 2, 4, 15)
#endif
#ifdef HAS_I410ALPHATOARGBROW_SSSE3
ANY41CT(I410AlphaToARGBRow_Any_SSSE3, I410AlphaToARGBRow_SSSE3, 0, 0, uint16_t, 2, 4, 7)
#endif
#ifdef HAS_I410ALPHATOARGBROW_AVX2
ANY41CT(I410AlphaToARGBRow_Any_AVX2, I410AlphaToARGBRow_AVX2, 0, 0, uint16_t, 2, 4, 15)
#endif
#undef ANY41CT
// Any 3 planes to 1.
#define ANY31(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \
void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \
@ -327,6 +367,18 @@ ANY31CT(I210ToARGBRow_Any_AVX2, I210ToARGBRow_AVX2, 1, 0, uint16_t, 2, 4, 15)
#ifdef HAS_I210TOAR30ROW_AVX2
ANY31CT(I210ToAR30Row_Any_AVX2, I210ToAR30Row_AVX2, 1, 0, uint16_t, 2, 4, 15)
#endif
#ifdef HAS_I410TOAR30ROW_SSSE3
ANY31CT(I410ToAR30Row_Any_SSSE3, I410ToAR30Row_SSSE3, 0, 0, uint16_t, 2, 4, 7)
#endif
#ifdef HAS_I410TOARGBROW_SSSE3
ANY31CT(I410ToARGBRow_Any_SSSE3, I410ToARGBRow_SSSE3, 0, 0, uint16_t, 2, 4, 7)
#endif
#ifdef HAS_I410TOARGBROW_AVX2
ANY31CT(I410ToARGBRow_Any_AVX2, I410ToARGBRow_AVX2, 0, 0, uint16_t, 2, 4, 15)
#endif
#ifdef HAS_I410TOAR30ROW_AVX2
ANY31CT(I410ToAR30Row_Any_AVX2, I410ToAR30Row_AVX2, 0, 0, uint16_t, 2, 4, 15)
#endif
#ifdef HAS_I210TOARGBROW_MMI
ANY31CT(I210ToARGBRow_Any_MMI, I210ToARGBRow_MMI, 1, 0, uint16_t, 2, 4, 7)
#endif
@ -546,12 +598,57 @@ ANY21C(NV12ToRGB565Row_Any_MMI, NV12ToRGB565Row_MMI, 1, 1, 2, 2, 7)
#endif
#undef ANY21C
// Any 2 planes of 16 bit to 1 with yuvconstants
#define ANY21CT(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, T, SBPP, BPP, MASK) \
void NAMEANY(const T* y_buf, const T* uv_buf, uint8_t* dst_ptr, \
const struct YuvConstants* yuvconstants, int width) { \
SIMD_ALIGNED(T temp[16 * 3]); \
SIMD_ALIGNED(uint8_t out[64]); \
memset(temp, 0, 16 * 3 * SBPP); /* for YUY2 and msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(y_buf, uv_buf, dst_ptr, yuvconstants, n); \
} \
memcpy(temp, y_buf + n, r * SBPP); \
memcpy(temp + 16, uv_buf + 2 * (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP * 2); \
ANY_SIMD(temp, temp + 16, out, yuvconstants, MASK + 1); \
memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, out, SS(r, DUVSHIFT) * BPP); \
}
#ifdef HAS_P210TOAR30ROW_SSSE3
ANY21CT(P210ToAR30Row_Any_SSSE3, P210ToAR30Row_SSSE3, 1, 0, uint16_t, 2, 4, 7)
#endif
#ifdef HAS_P210TOARGBROW_SSSE3
ANY21CT(P210ToARGBRow_Any_SSSE3, P210ToARGBRow_SSSE3, 1, 0, uint16_t, 2, 4, 7)
#endif
#ifdef HAS_P210TOARGBROW_AVX2
ANY21CT(P210ToARGBRow_Any_AVX2, P210ToARGBRow_AVX2, 1, 0, uint16_t, 2, 4, 15)
#endif
#ifdef HAS_P210TOAR30ROW_AVX2
ANY21CT(P210ToAR30Row_Any_AVX2, P210ToAR30Row_AVX2, 1, 0, uint16_t, 2, 4, 15)
#endif
#ifdef HAS_P410TOAR30ROW_SSSE3
ANY21CT(P410ToAR30Row_Any_SSSE3, P410ToAR30Row_SSSE3, 0, 0, uint16_t, 2, 4, 7)
#endif
#ifdef HAS_P410TOARGBROW_SSSE3
ANY21CT(P410ToARGBRow_Any_SSSE3, P410ToARGBRow_SSSE3, 0, 0, uint16_t, 2, 4, 7)
#endif
#ifdef HAS_P410TOARGBROW_AVX2
ANY21CT(P410ToARGBRow_Any_AVX2, P410ToARGBRow_AVX2, 0, 0, uint16_t, 2, 4, 15)
#endif
#ifdef HAS_P410TOAR30ROW_AVX2
ANY21CT(P410ToAR30Row_Any_AVX2, P410ToAR30Row_AVX2, 0, 0, uint16_t, 2, 4, 15)
#endif
#undef ANY21CT
// Any 2 16 bit planes with parameter to 1
#define ANY21PT(NAMEANY, ANY_SIMD, T, BPP, MASK) \
void NAMEANY(const T* src_u, const T* src_v, T* dst_uv, int depth, \
int width) { \
SIMD_ALIGNED(T temp[16 * 4]); \
memset(temp, 0, 16 * 4); /* for msan */ \
memset(temp, 0, 16 * 4 * BPP); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \

View File

@ -1546,14 +1546,14 @@ MAKEYUVCONSTANTS(V2020, YG, YB, UB, UG, VG, VR, BB, BG, BR)
#undef MAKEYUVCONSTANTS
// C reference code that mimics the YUV assembly.
// Reads 8 bit YUV and leaves result as 16 bit.
static __inline void YuvPixel(uint8_t y,
uint8_t u,
uint8_t v,
uint8_t* b,
uint8_t* g,
uint8_t* r,
const struct YuvConstants* yuvconstants) {
// Reads 8 bit YUV and leaves result as 8 bit.
static __inline void YuvPixel8_8(uint8_t y,
uint8_t u,
uint8_t v,
uint8_t* b,
uint8_t* g,
uint8_t* r,
const struct YuvConstants* yuvconstants) {
#if defined(__aarch64__)
int ub = -yuvconstants->kUVToRB[0];
int ug = yuvconstants->kUVToG[0];
@ -1634,13 +1634,13 @@ static __inline void YuvPixel8_16(uint8_t y,
// C reference code that mimics the YUV 16 bit assembly.
// Reads 10 bit YUV and leaves result as 16 bit.
static __inline void YuvPixel16(int16_t y,
int16_t u,
int16_t v,
int* b,
int* g,
int* r,
const struct YuvConstants* yuvconstants) {
static __inline void YuvPixel10_16(uint16_t y,
uint16_t u,
uint16_t v,
int* b,
int* g,
int* r,
const struct YuvConstants* yuvconstants) {
#if defined(__aarch64__)
int ub = -yuvconstants->kUVToRB[0];
int ug = yuvconstants->kUVToG[0];
@ -1680,24 +1680,116 @@ static __inline void YuvPixel16(int16_t y,
// C reference code that mimics the YUV 10 bit assembly.
// Reads 10 bit YUV and clamps down to 8 bit RGB.
static __inline void YuvPixel10(uint16_t y,
uint16_t u,
uint16_t v,
uint8_t* b,
uint8_t* g,
uint8_t* r,
const struct YuvConstants* yuvconstants) {
static __inline void YuvPixel10_8(uint16_t y,
uint16_t u,
uint16_t v,
uint8_t* b,
uint8_t* g,
uint8_t* r,
const struct YuvConstants* yuvconstants) {
int b16;
int g16;
int r16;
YuvPixel16(y, u, v, &b16, &g16, &r16, yuvconstants);
YuvPixel10_16(y, u, v, &b16, &g16, &r16, yuvconstants);
*b = Clamp(b16 >> 6);
*g = Clamp(g16 >> 6);
*r = Clamp(r16 >> 6);
}
// C reference code that mimics the YUV 16 bit assembly.
// Reads 16 bit YUV and leaves result as 8 bit.
static __inline void YuvPixel16_8(uint16_t y,
uint16_t u,
uint16_t v,
uint8_t* b,
uint8_t* g,
uint8_t* r,
const struct YuvConstants* yuvconstants) {
#if defined(__aarch64__)
int ub = -yuvconstants->kUVToRB[0];
int ug = yuvconstants->kUVToG[0];
int vg = yuvconstants->kUVToG[1];
int vr = -yuvconstants->kUVToRB[1];
int bb = yuvconstants->kUVBiasBGR[0];
int bg = yuvconstants->kUVBiasBGR[1];
int br = yuvconstants->kUVBiasBGR[2];
int yg = yuvconstants->kYToRgb[1];
#elif defined(__arm__)
int ub = -yuvconstants->kUVToRB[0];
int ug = yuvconstants->kUVToG[0];
int vg = yuvconstants->kUVToG[4];
int vr = -yuvconstants->kUVToRB[4];
int bb = yuvconstants->kUVBiasBGR[0];
int bg = yuvconstants->kUVBiasBGR[1];
int br = yuvconstants->kUVBiasBGR[2];
int yg = yuvconstants->kYToRgb[1];
#else
int ub = yuvconstants->kUVToB[0];
int ug = yuvconstants->kUVToG[0];
int vg = yuvconstants->kUVToG[1];
int vr = yuvconstants->kUVToR[1];
int bb = yuvconstants->kUVBiasB[0];
int bg = yuvconstants->kUVBiasG[0];
int br = yuvconstants->kUVBiasR[0];
int yg = yuvconstants->kYToRgb[0];
#endif
uint32_t y1 = (uint32_t)(y * yg) >> 16;
u = clamp255(u >> 8);
v = clamp255(v >> 8);
*b = Clamp((int32_t)(y1 + -(u * ub) + bb) >> 6);
*g = Clamp((int32_t)(y1 + -(u * ug + v * vg) + bg) >> 6);
*r = Clamp((int32_t)(y1 + -(v * vr) + br) >> 6);
}
// C reference code that mimics the YUV 16 bit assembly.
// Reads 16 bit YUV and leaves result as 16 bit.
static __inline void YuvPixel16_16(uint16_t y,
uint16_t u,
uint16_t v,
int* b,
int* g,
int* r,
const struct YuvConstants* yuvconstants) {
#if defined(__aarch64__)
int ub = -yuvconstants->kUVToRB[0];
int ug = yuvconstants->kUVToG[0];
int vg = yuvconstants->kUVToG[1];
int vr = -yuvconstants->kUVToRB[1];
int bb = yuvconstants->kUVBiasBGR[0];
int bg = yuvconstants->kUVBiasBGR[1];
int br = yuvconstants->kUVBiasBGR[2];
int yg = yuvconstants->kYToRgb[1];
#elif defined(__arm__)
int ub = -yuvconstants->kUVToRB[0];
int ug = yuvconstants->kUVToG[0];
int vg = yuvconstants->kUVToG[4];
int vr = -yuvconstants->kUVToRB[4];
int bb = yuvconstants->kUVBiasBGR[0];
int bg = yuvconstants->kUVBiasBGR[1];
int br = yuvconstants->kUVBiasBGR[2];
int yg = yuvconstants->kYToRgb[1];
#else
int ub = yuvconstants->kUVToB[0];
int ug = yuvconstants->kUVToG[0];
int vg = yuvconstants->kUVToG[1];
int vr = yuvconstants->kUVToR[1];
int bb = yuvconstants->kUVBiasB[0];
int bg = yuvconstants->kUVBiasG[0];
int br = yuvconstants->kUVBiasR[0];
int yg = yuvconstants->kYToRgb[0];
#endif
uint32_t y1 = (uint32_t)(y * yg) >> 16;
u = clamp255(u >> 8);
v = clamp255(v >> 8);
*b = (int)(-(u * ub) + y1 + bb);
*g = (int)(-(u * ug + v * vg) + y1 + bg);
*r = (int)(-(v * vr) + y1 + br);
}
// C reference code that mimics the YUV assembly.
// Reads 8 bit YUV and leaves result as 16 bit.
// Reads 8 bit YUV and leaves result as 8 bit.
static __inline void YPixel(uint8_t y,
uint8_t* b,
uint8_t* g,
@ -1730,11 +1822,11 @@ void I444ToARGBRow_C(const uint8_t* src_y,
for (x = 0; x < width - 1; x += 2) {
uint8_t u = (src_u[0] + src_u[1] + 1) >> 1;
uint8_t v = (src_v[0] + src_v[1] + 1) >> 1;
YuvPixel(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2,
yuvconstants);
YuvPixel8_8(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2,
yuvconstants);
rgb_buf[3] = 255;
YuvPixel(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6,
yuvconstants);
YuvPixel8_8(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6,
yuvconstants);
rgb_buf[7] = 255;
src_y += 2;
src_u += 2;
@ -1742,8 +1834,8 @@ void I444ToARGBRow_C(const uint8_t* src_y,
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
YuvPixel8_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
}
}
@ -1756,8 +1848,8 @@ void I444ToARGBRow_C(const uint8_t* src_y,
int width) {
int x;
for (x = 0; x < width; ++x) {
YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
YuvPixel8_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
src_y += 1;
src_u += 1;
@ -1776,11 +1868,11 @@ void I422ToARGBRow_C(const uint8_t* src_y,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
YuvPixel8_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
rgb_buf + 6, yuvconstants);
YuvPixel8_8(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
rgb_buf + 6, yuvconstants);
rgb_buf[7] = 255;
src_y += 2;
src_u += 1;
@ -1788,8 +1880,8 @@ void I422ToARGBRow_C(const uint8_t* src_y,
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
YuvPixel8_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
}
}
@ -1803,11 +1895,11 @@ void I210ToARGBRow_C(const uint16_t* src_y,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
YuvPixel10_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
YuvPixel10(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
rgb_buf + 6, yuvconstants);
YuvPixel10_8(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
rgb_buf + 6, yuvconstants);
rgb_buf[7] = 255;
src_y += 2;
src_u += 1;
@ -1815,15 +1907,81 @@ void I210ToARGBRow_C(const uint16_t* src_y,
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
YuvPixel10_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
}
}
void I410ToARGBRow_C(const uint16_t* src_y,
const uint16_t* src_u,
const uint16_t* src_v,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
int x;
for (x = 0; x < width; ++x) {
YuvPixel10_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
src_y += 1;
src_u += 1;
src_v += 1;
rgb_buf += 4; // Advance 1 pixels.
}
}
void I210AlphaToARGBRow_C(const uint16_t* src_y,
const uint16_t* src_u,
const uint16_t* src_v,
const uint16_t* src_a,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
YuvPixel10_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
rgb_buf[3] = clamp255(src_a[0] >> 2);
YuvPixel10_8(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
rgb_buf + 6, yuvconstants);
rgb_buf[7] = clamp255(src_a[1] >> 2);
src_y += 2;
src_u += 1;
src_v += 1;
src_a += 2;
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel10_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
rgb_buf[3] = clamp255(src_a[0] >> 2);
}
}
void I410AlphaToARGBRow_C(const uint16_t* src_y,
const uint16_t* src_u,
const uint16_t* src_v,
const uint16_t* src_a,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
int x;
for (x = 0; x < width; ++x) {
YuvPixel10_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
rgb_buf[3] = clamp255(src_a[0] >> 2);
src_y += 1;
src_u += 1;
src_v += 1;
src_a += 1;
rgb_buf += 4; // Advance 1 pixels.
}
}
static void StoreAR30(uint8_t* rgb_buf, int b, int g, int r) {
uint32_t ar30;
b = b >> 4; // convert 10.6 to 10 bit.
b = b >> 4; // convert 8 bit 10.6 to 10 bit.
g = g >> 4;
r = r >> 4;
b = Clamp10(b);
@ -1845,9 +2003,9 @@ void I210ToAR30Row_C(const uint16_t* src_y,
int g;
int r;
for (x = 0; x < width - 1; x += 2) {
YuvPixel16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
YuvPixel10_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
StoreAR30(rgb_buf, b, g, r);
YuvPixel16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
YuvPixel10_16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
StoreAR30(rgb_buf + 4, b, g, r);
src_y += 2;
src_u += 1;
@ -1855,11 +2013,113 @@ void I210ToAR30Row_C(const uint16_t* src_y,
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
YuvPixel10_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
StoreAR30(rgb_buf, b, g, r);
}
}
void I410ToAR30Row_C(const uint16_t* src_y,
const uint16_t* src_u,
const uint16_t* src_v,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
int x;
int b;
int g;
int r;
for (x = 0; x < width; ++x) {
YuvPixel10_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
StoreAR30(rgb_buf, b, g, r);
src_y += 1;
src_u += 1;
src_v += 1;
rgb_buf += 4; // Advance 1 pixel.
}
}
void P210ToARGBRow_C(const uint16_t* src_y,
const uint16_t* src_uv,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
YuvPixel16_8(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
YuvPixel16_8(src_y[1], src_uv[0], src_uv[1], rgb_buf + 4, rgb_buf + 5,
rgb_buf + 6, yuvconstants);
rgb_buf[7] = 255;
src_y += 2;
src_uv += 2;
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel16_8(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
}
}
void P410ToARGBRow_C(const uint16_t* src_y,
const uint16_t* src_uv,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
int x;
for (x = 0; x < width; ++x) {
YuvPixel16_8(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
src_y += 1;
src_uv += 2;
rgb_buf += 4; // Advance 1 pixels.
}
}
void P210ToAR30Row_C(const uint16_t* src_y,
const uint16_t* src_uv,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
int x;
int b;
int g;
int r;
for (x = 0; x < width - 1; x += 2) {
YuvPixel16_16(src_y[0], src_uv[0], src_uv[1], &b, &g, &r, yuvconstants);
StoreAR30(rgb_buf, b, g, r);
YuvPixel16_16(src_y[1], src_uv[0], src_uv[1], &b, &g, &r, yuvconstants);
StoreAR30(rgb_buf + 4, b, g, r);
src_y += 2;
src_uv += 2;
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel16_16(src_y[0], src_uv[0], src_uv[1], &b, &g, &r, yuvconstants);
StoreAR30(rgb_buf, b, g, r);
}
}
void P410ToAR30Row_C(const uint16_t* src_y,
const uint16_t* src_uv,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
int x;
int b;
int g;
int r;
for (x = 0; x < width; ++x) {
YuvPixel16_16(src_y[0], src_uv[0], src_uv[1], &b, &g, &r, yuvconstants);
StoreAR30(rgb_buf, b, g, r);
src_y += 1;
src_uv += 2;
rgb_buf += 4; // Advance 1 pixel.
}
}
// 8 bit YUV to 10 bit AR30
// Uses same code as 10 bit YUV bit shifts the 8 bit values up to 10 bits.
void I422ToAR30Row_C(const uint8_t* src_y,
@ -1903,11 +2163,11 @@ void I444AlphaToARGBRow_C(const uint8_t* src_y,
for (x = 0; x < width - 1; x += 2) {
uint8_t u = (src_u[0] + src_u[1] + 1) >> 1;
uint8_t v = (src_v[0] + src_v[1] + 1) >> 1;
YuvPixel(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2,
yuvconstants);
YuvPixel8_8(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2,
yuvconstants);
rgb_buf[3] = src_a[0];
YuvPixel(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6,
yuvconstants);
YuvPixel8_8(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6,
yuvconstants);
rgb_buf[7] = src_a[1];
src_y += 2;
src_u += 2;
@ -1916,8 +2176,8 @@ void I444AlphaToARGBRow_C(const uint8_t* src_y,
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
YuvPixel8_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
rgb_buf[3] = src_a[0];
}
}
@ -1931,8 +2191,8 @@ void I444AlphaToARGBRow_C(const uint8_t* src_y,
int width) {
int x;
for (x = 0; x < width; ++x) {
YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
YuvPixel8_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
rgb_buf[3] = src_a[0];
src_y += 1;
src_u += 1;
@ -1952,11 +2212,11 @@ void I422AlphaToARGBRow_C(const uint8_t* src_y,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
YuvPixel8_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
rgb_buf[3] = src_a[0];
YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
rgb_buf + 6, yuvconstants);
YuvPixel8_8(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
rgb_buf + 6, yuvconstants);
rgb_buf[7] = src_a[1];
src_y += 2;
src_u += 1;
@ -1965,8 +2225,8 @@ void I422AlphaToARGBRow_C(const uint8_t* src_y,
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
YuvPixel8_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
rgb_buf[3] = src_a[0];
}
}
@ -1979,18 +2239,18 @@ void I422ToRGB24Row_C(const uint8_t* src_y,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 3, rgb_buf + 4,
rgb_buf + 5, yuvconstants);
YuvPixel8_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
YuvPixel8_8(src_y[1], src_u[0], src_v[0], rgb_buf + 3, rgb_buf + 4,
rgb_buf + 5, yuvconstants);
src_y += 2;
src_u += 1;
src_v += 1;
rgb_buf += 6; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
YuvPixel8_8(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
}
}
@ -2008,8 +2268,8 @@ void I422ToARGB4444Row_C(const uint8_t* src_y,
uint8_t r1;
int x;
for (x = 0; x < width - 1; x += 2) {
YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
YuvPixel8_8(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
YuvPixel8_8(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
b0 = b0 >> 4;
g0 = g0 >> 4;
r0 = r0 >> 4;
@ -2024,7 +2284,7 @@ void I422ToARGB4444Row_C(const uint8_t* src_y,
dst_argb4444 += 4; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
YuvPixel8_8(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
b0 = b0 >> 4;
g0 = g0 >> 4;
r0 = r0 >> 4;
@ -2046,8 +2306,8 @@ void I422ToARGB1555Row_C(const uint8_t* src_y,
uint8_t r1;
int x;
for (x = 0; x < width - 1; x += 2) {
YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
YuvPixel8_8(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
YuvPixel8_8(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
b0 = b0 >> 3;
g0 = g0 >> 3;
r0 = r0 >> 3;
@ -2062,7 +2322,7 @@ void I422ToARGB1555Row_C(const uint8_t* src_y,
dst_argb1555 += 4; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
YuvPixel8_8(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
b0 = b0 >> 3;
g0 = g0 >> 3;
r0 = r0 >> 3;
@ -2084,8 +2344,8 @@ void I422ToRGB565Row_C(const uint8_t* src_y,
uint8_t r1;
int x;
for (x = 0; x < width - 1; x += 2) {
YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
YuvPixel8_8(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
YuvPixel8_8(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
b0 = b0 >> 3;
g0 = g0 >> 2;
r0 = r0 >> 3;
@ -2100,7 +2360,7 @@ void I422ToRGB565Row_C(const uint8_t* src_y,
dst_rgb565 += 4; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
YuvPixel8_8(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
b0 = b0 >> 3;
g0 = g0 >> 2;
r0 = r0 >> 3;
@ -2115,19 +2375,19 @@ void NV12ToARGBRow_C(const uint8_t* src_y,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
YuvPixel8_8(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
YuvPixel(src_y[1], src_uv[0], src_uv[1], rgb_buf + 4, rgb_buf + 5,
rgb_buf + 6, yuvconstants);
YuvPixel8_8(src_y[1], src_uv[0], src_uv[1], rgb_buf + 4, rgb_buf + 5,
rgb_buf + 6, yuvconstants);
rgb_buf[7] = 255;
src_y += 2;
src_uv += 2;
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
YuvPixel8_8(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
}
}
@ -2139,19 +2399,19 @@ void NV21ToARGBRow_C(const uint8_t* src_y,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
YuvPixel8_8(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
YuvPixel(src_y[1], src_vu[1], src_vu[0], rgb_buf + 4, rgb_buf + 5,
rgb_buf + 6, yuvconstants);
YuvPixel8_8(src_y[1], src_vu[1], src_vu[0], rgb_buf + 4, rgb_buf + 5,
rgb_buf + 6, yuvconstants);
rgb_buf[7] = 255;
src_y += 2;
src_vu += 2;
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
YuvPixel8_8(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
}
}
@ -2163,17 +2423,17 @@ void NV12ToRGB24Row_C(const uint8_t* src_y,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
YuvPixel(src_y[1], src_uv[0], src_uv[1], rgb_buf + 3, rgb_buf + 4,
rgb_buf + 5, yuvconstants);
YuvPixel8_8(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
YuvPixel8_8(src_y[1], src_uv[0], src_uv[1], rgb_buf + 3, rgb_buf + 4,
rgb_buf + 5, yuvconstants);
src_y += 2;
src_uv += 2;
rgb_buf += 6; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
YuvPixel8_8(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
}
}
@ -2184,17 +2444,17 @@ void NV21ToRGB24Row_C(const uint8_t* src_y,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
YuvPixel(src_y[1], src_vu[1], src_vu[0], rgb_buf + 3, rgb_buf + 4,
rgb_buf + 5, yuvconstants);
YuvPixel8_8(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
YuvPixel8_8(src_y[1], src_vu[1], src_vu[0], rgb_buf + 3, rgb_buf + 4,
rgb_buf + 5, yuvconstants);
src_y += 2;
src_vu += 2;
rgb_buf += 6; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
YuvPixel8_8(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
}
}
@ -2211,8 +2471,8 @@ void NV12ToRGB565Row_C(const uint8_t* src_y,
uint8_t r1;
int x;
for (x = 0; x < width - 1; x += 2) {
YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants);
YuvPixel(src_y[1], src_uv[0], src_uv[1], &b1, &g1, &r1, yuvconstants);
YuvPixel8_8(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants);
YuvPixel8_8(src_y[1], src_uv[0], src_uv[1], &b1, &g1, &r1, yuvconstants);
b0 = b0 >> 3;
g0 = g0 >> 2;
r0 = r0 >> 3;
@ -2226,7 +2486,7 @@ void NV12ToRGB565Row_C(const uint8_t* src_y,
dst_rgb565 += 4; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants);
YuvPixel8_8(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants);
b0 = b0 >> 3;
g0 = g0 >> 2;
r0 = r0 >> 3;
@ -2240,18 +2500,18 @@ void YUY2ToARGBRow_C(const uint8_t* src_yuy2,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
YuvPixel8_8(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
YuvPixel(src_yuy2[2], src_yuy2[1], src_yuy2[3], rgb_buf + 4, rgb_buf + 5,
rgb_buf + 6, yuvconstants);
YuvPixel8_8(src_yuy2[2], src_yuy2[1], src_yuy2[3], rgb_buf + 4, rgb_buf + 5,
rgb_buf + 6, yuvconstants);
rgb_buf[7] = 255;
src_yuy2 += 4;
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
YuvPixel8_8(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
}
}
@ -2262,18 +2522,18 @@ void UYVYToARGBRow_C(const uint8_t* src_uyvy,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
YuvPixel8_8(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
YuvPixel(src_uyvy[3], src_uyvy[0], src_uyvy[2], rgb_buf + 4, rgb_buf + 5,
rgb_buf + 6, yuvconstants);
YuvPixel8_8(src_uyvy[3], src_uyvy[0], src_uyvy[2], rgb_buf + 4, rgb_buf + 5,
rgb_buf + 6, yuvconstants);
rgb_buf[7] = 255;
src_uyvy += 4;
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
YuvPixel8_8(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
}
}
@ -2286,11 +2546,11 @@ void I422ToRGBARow_C(const uint8_t* src_y,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 1, rgb_buf + 2,
rgb_buf + 3, yuvconstants);
YuvPixel8_8(src_y[0], src_u[0], src_v[0], rgb_buf + 1, rgb_buf + 2,
rgb_buf + 3, yuvconstants);
rgb_buf[0] = 255;
YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 5, rgb_buf + 6,
rgb_buf + 7, yuvconstants);
YuvPixel8_8(src_y[1], src_u[0], src_v[0], rgb_buf + 5, rgb_buf + 6,
rgb_buf + 7, yuvconstants);
rgb_buf[4] = 255;
src_y += 2;
src_u += 1;
@ -2298,8 +2558,8 @@ void I422ToRGBARow_C(const uint8_t* src_y,
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 1, rgb_buf + 2,
rgb_buf + 3, yuvconstants);
YuvPixel8_8(src_y[0], src_u[0], src_v[0], rgb_buf + 1, rgb_buf + 2,
rgb_buf + 3, yuvconstants);
rgb_buf[0] = 255;
}
}

View File

@ -1944,13 +1944,63 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba0,
"movq 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \
"lea 0x8(%[u_buf]),%[u_buf] \n" \
"punpcklwd %%xmm1,%%xmm0 \n" \
"psraw $0x2,%%xmm0 \n" \
"psraw $2,%%xmm0 \n" \
"packuswb %%xmm0,%%xmm0 \n" \
"punpcklwd %%xmm0,%%xmm0 \n" \
"movdqu (%[y_buf]),%%xmm4 \n" \
"psllw $0x6,%%xmm4 \n" \
"psllw $6,%%xmm4 \n" \
"lea 0x10(%[y_buf]),%[y_buf] \n"
#define READYUVA210 \
"movq (%[u_buf]),%%xmm0 \n" \
"movq 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \
"lea 0x8(%[u_buf]),%[u_buf] \n" \
"punpcklwd %%xmm1,%%xmm0 \n" \
"psraw $2,%%xmm0 \n" \
"packuswb %%xmm0,%%xmm0 \n" \
"punpcklwd %%xmm0,%%xmm0 \n" \
"movdqu (%[y_buf]),%%xmm4 \n" \
"psllw $6,%%xmm4 \n" \
"lea 0x10(%[y_buf]),%[y_buf] \n" \
"movdqu (%[a_buf]),%%xmm5 \n" \
"psraw $2,%%xmm5 \n" \
"packuswb %%xmm5,%%xmm5 \n" \
"lea 0x10(%[a_buf]),%[a_buf] \n"
// Read 8 UV from 444 10 bit
#define READYUV410 \
"movdqu (%[u_buf]),%%xmm0 \n" \
"movdqu 0x00(%[u_buf],%[v_buf],1),%%xmm2 \n" \
"lea 0x10(%[u_buf]),%[u_buf] \n" \
"psraw $2,%%xmm0 \n" \
"psraw $2,%%xmm2 \n" \
"movdqa %%xmm0,%%xmm1 \n" \
"punpcklwd %%xmm2,%%xmm0 \n" \
"punpckhwd %%xmm2,%%xmm1 \n" \
"packuswb %%xmm1,%%xmm0 \n" \
"movdqu (%[y_buf]),%%xmm4 \n" \
"psllw $6,%%xmm4 \n" \
"lea 0x10(%[y_buf]),%[y_buf] \n"
// Read 8 UV from 444 10 bit. With 8 Alpha.
#define READYUVA410 \
"movdqu (%[u_buf]),%%xmm0 \n" \
"movdqu 0x00(%[u_buf],%[v_buf],1),%%xmm2 \n" \
"lea 0x10(%[u_buf]),%[u_buf] \n" \
"psraw $2,%%xmm0 \n" \
"psraw $2,%%xmm2 \n" \
"movdqa %%xmm0,%%xmm1 \n" \
"punpcklwd %%xmm2,%%xmm0 \n" \
"punpckhwd %%xmm2,%%xmm1 \n" \
"packuswb %%xmm1,%%xmm0 \n" \
"movdqu (%[y_buf]),%%xmm4 \n" \
"psllw $0x6,%%xmm4 \n" \
"lea 0x10(%[y_buf]),%[y_buf] \n" \
"movdqu (%[a_buf]),%%xmm5 \n" \
"psraw $2,%%xmm5 \n" \
"packuswb %%xmm5,%%xmm5 \n" \
"lea 0x10(%[a_buf]),%[a_buf] \n"
// Read 4 UV from 422, upsample to 8 UV. With 8 Alpha.
#define READYUVA422 \
"movd (%[u_buf]),%%xmm0 \n" \
@ -2010,6 +2060,27 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba0,
"pshufb %[kShuffleUYVYUV], %%xmm0 \n" \
"lea 0x10(%[uyvy_buf]),%[uyvy_buf] \n"
// Read 4 UV from P210, upsample to 8 UV
#define READP210 \
"movdqu (%[uv_buf]),%%xmm0 \n" \
"lea 0x10(%[uv_buf]),%[uv_buf] \n" \
"psrlw $0x8,%%xmm0 \n" \
"packuswb %%xmm0,%%xmm0 \n" \
"punpcklwd %%xmm0,%%xmm0 \n" \
"movdqu (%[y_buf]),%%xmm4 \n" \
"lea 0x10(%[y_buf]),%[y_buf] \n"
// Read 8 UV from P410
#define READP410 \
"movdqu (%[uv_buf]),%%xmm0 \n" \
"movdqu 0x10(%[uv_buf]),%%xmm1 \n" \
"lea 0x20(%[uv_buf]),%[uv_buf] \n" \
"psrlw $0x8,%%xmm0 \n" \
"psrlw $0x8,%%xmm1 \n" \
"packuswb %%xmm1,%%xmm0 \n" \
"movdqu (%[y_buf]),%%xmm4 \n" \
"lea 0x10(%[y_buf]),%[y_buf] \n"
#if defined(__x86_64__)
#define YUVTORGB_SETUP(yuvconstants) \
"movdqa (%[yuvconstants]),%%xmm8 \n" \
@ -2362,6 +2433,146 @@ void OMITFP I210ToAR30Row_SSSE3(const uint16_t* y_buf,
);
}
// 10 bit YUV to ARGB
void OMITFP I410ToARGBRow_SSSE3(const uint16_t* y_buf,
const uint16_t* u_buf,
const uint16_t* v_buf,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
YUVTORGB_SETUP(yuvconstants)
"sub %[u_buf],%[v_buf] \n"
"pcmpeqb %%xmm5,%%xmm5 \n"
LABELALIGN
"1: \n"
READYUV410
YUVTORGB(yuvconstants)
STOREARGB
"sub $0x8,%[width] \n"
"jg 1b \n"
: [y_buf]"+r"(y_buf), // %[y_buf]
[u_buf]"+r"(u_buf), // %[u_buf]
[v_buf]"+r"(v_buf), // %[v_buf]
[dst_argb]"+r"(dst_argb), // %[dst_argb]
[width]"+rm"(width) // %[width]
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
: "memory", "cc", YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
}
#ifdef HAS_I210ALPHATOARGBROW_SSSE3
// 10 bit YUVA to ARGB
void OMITFP I210AlphaToARGBRow_SSSE3(const uint16_t* y_buf,
const uint16_t* u_buf,
const uint16_t* v_buf,
const uint16_t* a_buf,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile(
YUVTORGB_SETUP(yuvconstants)
"sub %[u_buf],%[v_buf] \n"
LABELALIGN
"1: \n"
READYUVA210
YUVTORGB(yuvconstants)
STOREARGB
"subl $0x8,%[width] \n"
"jg 1b \n"
: [y_buf] "+r"(y_buf), // %[y_buf]
[u_buf] "+r"(u_buf), // %[u_buf]
[v_buf] "+r"(v_buf), // %[v_buf]
[a_buf] "+r"(a_buf),
[dst_argb] "+r"(dst_argb), // %[dst_argb]
#if defined(__i386__)
[width]"+m"(width) // %[width]
#else
[width]"+rm"(width) // %[width]
#endif
: [yuvconstants] "r"(yuvconstants) // %[yuvconstants]
: "memory", "cc", YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
}
#endif
#ifdef HAS_I410ALPHATOARGBROW_SSSE3
// 10 bit YUVA to ARGB
void OMITFP I410AlphaToARGBRow_SSSE3(const uint16_t* y_buf,
const uint16_t* u_buf,
const uint16_t* v_buf,
const uint16_t* a_buf,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile(
YUVTORGB_SETUP(yuvconstants)
"sub %[u_buf],%[v_buf] \n"
LABELALIGN
"1: \n"
READYUVA410
YUVTORGB(yuvconstants)
STOREARGB
"subl $0x8,%[width] \n"
"jg 1b \n"
: [y_buf] "+r"(y_buf), // %[y_buf]
[u_buf] "+r"(u_buf), // %[u_buf]
[v_buf] "+r"(v_buf), // %[v_buf]
[a_buf] "+r"(a_buf),
[dst_argb] "+r"(dst_argb), // %[dst_argb]
#if defined(__i386__)
[width]"+m"(width) // %[width]
#else
[width]"+rm"(width) // %[width]
#endif
: [yuvconstants] "r"(yuvconstants) // %[yuvconstants]
: "memory", "cc", YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
}
#endif
// 10 bit YUV to AR30
void OMITFP I410ToAR30Row_SSSE3(const uint16_t* y_buf,
const uint16_t* u_buf,
const uint16_t* v_buf,
uint8_t* dst_ar30,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
YUVTORGB_SETUP(yuvconstants)
"sub %[u_buf],%[v_buf] \n"
"pcmpeqb %%xmm5,%%xmm5 \n"
"psrlw $14,%%xmm5 \n"
"psllw $4,%%xmm5 \n" // 2 alpha bits
"pxor %%xmm6,%%xmm6 \n"
"pcmpeqb %%xmm7,%%xmm7 \n" // 0 for min
"psrlw $6,%%xmm7 \n" // 1023 for max
LABELALIGN
"1: \n"
READYUV410
YUVTORGB16(yuvconstants)
STOREAR30
"sub $0x8,%[width] \n"
"jg 1b \n"
: [y_buf]"+r"(y_buf), // %[y_buf]
[u_buf]"+r"(u_buf), // %[u_buf]
[v_buf]"+r"(v_buf), // %[v_buf]
[dst_ar30]"+r"(dst_ar30), // %[dst_ar30]
[width]"+rm"(width) // %[width]
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
: "memory", "cc", YUVTORGB_REGS
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
);
}
#ifdef HAS_I422ALPHATOARGBROW_SSSE3
void OMITFP I422AlphaToARGBRow_SSSE3(const uint8_t* y_buf,
const uint8_t* u_buf,
@ -2513,6 +2724,123 @@ void OMITFP UYVYToARGBRow_SSSE3(const uint8_t* uyvy_buf,
// clang-format on
}
void OMITFP P210ToARGBRow_SSSE3(const uint16_t* y_buf,
const uint16_t* uv_buf,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
YUVTORGB_SETUP(yuvconstants)
"pcmpeqb %%xmm5,%%xmm5 \n"
LABELALIGN
"1: \n"
READP210
YUVTORGB(yuvconstants)
STOREARGB
"sub $0x8,%[width] \n"
"jg 1b \n"
: [y_buf] "+r"(y_buf), // %[y_buf]
[uv_buf] "+r"(uv_buf), // %[u_buf]
[dst_argb] "+r"(dst_argb), // %[dst_argb]
[width] "+rm"(width) // %[width]
: [yuvconstants] "r"(yuvconstants) // %[yuvconstants]
: "memory", "cc", YUVTORGB_REGS
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
}
void OMITFP P410ToARGBRow_SSSE3(const uint16_t* y_buf,
const uint16_t* uv_buf,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
YUVTORGB_SETUP(yuvconstants)
"pcmpeqb %%xmm5,%%xmm5 \n"
LABELALIGN
"1: \n"
READP410
YUVTORGB(yuvconstants)
STOREARGB
"sub $0x8,%[width] \n"
"jg 1b \n"
: [y_buf] "+r"(y_buf), // %[y_buf]
[uv_buf] "+r"(uv_buf), // %[u_buf]
[dst_argb] "+r"(dst_argb), // %[dst_argb]
[width] "+rm"(width) // %[width]
: [yuvconstants] "r"(yuvconstants) // %[yuvconstants]
: "memory", "cc", YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4",
"xmm5");
}
void OMITFP P210ToAR30Row_SSSE3(const uint16_t* y_buf,
const uint16_t* uv_buf,
uint8_t* dst_ar30,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
YUVTORGB_SETUP(yuvconstants)
"pcmpeqb %%xmm5,%%xmm5 \n"
"psrlw $14,%%xmm5 \n"
"psllw $4,%%xmm5 \n" // 2 alpha bits
"pxor %%xmm6,%%xmm6 \n"
"pcmpeqb %%xmm7,%%xmm7 \n" // 0 for min
"psrlw $6,%%xmm7 \n" // 1023 for max
LABELALIGN
"1: \n"
READP210
YUVTORGB16(yuvconstants)
STOREAR30
"sub $0x8,%[width] \n"
"jg 1b \n"
: [y_buf]"+r"(y_buf), // %[y_buf]
[uv_buf]"+r"(uv_buf), // %[uv_buf]
[dst_ar30]"+r"(dst_ar30), // %[dst_ar30]
[width]"+rm"(width) // %[width]
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
: "memory", "cc", YUVTORGB_REGS
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
);
}
void OMITFP P410ToAR30Row_SSSE3(const uint16_t* y_buf,
const uint16_t* uv_buf,
uint8_t* dst_ar30,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
YUVTORGB_SETUP(yuvconstants)
"pcmpeqb %%xmm5,%%xmm5 \n"
"psrlw $14,%%xmm5 \n"
"psllw $4,%%xmm5 \n" // 2 alpha bits
"pxor %%xmm6,%%xmm6 \n"
"pcmpeqb %%xmm7,%%xmm7 \n" // 0 for min
"psrlw $6,%%xmm7 \n" // 1023 for max
LABELALIGN
"1: \n"
READP410
YUVTORGB16(yuvconstants)
STOREAR30
"sub $0x8,%[width] \n"
"jg 1b \n"
: [y_buf]"+r"(y_buf), // %[y_buf]
[uv_buf]"+r"(uv_buf), // %[uv_buf]
[dst_ar30]"+r"(dst_ar30), // %[dst_ar30]
[width]"+rm"(width) // %[width]
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
: "memory", "cc", YUVTORGB_REGS
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
);
}
void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
@ -2570,7 +2898,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf,
"vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \
"lea 0x10(%[y_buf]),%[y_buf] \n"
// Read 8 UV from 210 10 bit, upsample to 16 UV
// Read 8 UV from 210, upsample to 16 UV
// TODO(fbarchard): Consider vshufb to replace pack/unpack
// TODO(fbarchard): Consider vunpcklpd to combine the 2 registers into 1.
#define READYUV210_AVX2 \
@ -2580,13 +2908,64 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf,
"vpermq $0xd8,%%ymm0,%%ymm0 \n" \
"vpermq $0xd8,%%ymm1,%%ymm1 \n" \
"vpunpcklwd %%ymm1,%%ymm0,%%ymm0 \n" \
"vpsraw $0x2,%%ymm0,%%ymm0 \n" \
"vpsraw $2,%%ymm0,%%ymm0 \n" \
"vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" \
"vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \
"vmovdqu (%[y_buf]),%%ymm4 \n" \
"vpsllw $0x6,%%ymm4,%%ymm4 \n" \
"vpsllw $6,%%ymm4,%%ymm4 \n" \
"lea 0x20(%[y_buf]),%[y_buf] \n"
// Read 8 UV from 210, upsample to 16 UV. With 16 Alpha.
#define READYUVA210_AVX2 \
"vmovdqu (%[u_buf]),%%xmm0 \n" \
"vmovdqu 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \
"lea 0x10(%[u_buf]),%[u_buf] \n" \
"vpermq $0xd8,%%ymm0,%%ymm0 \n" \
"vpermq $0xd8,%%ymm1,%%ymm1 \n" \
"vpunpcklwd %%ymm1,%%ymm0,%%ymm0 \n" \
"vpsraw $2,%%ymm0,%%ymm0 \n" \
"vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" \
"vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \
"vmovdqu (%[y_buf]),%%ymm4 \n" \
"vpsllw $6,%%ymm4,%%ymm4 \n" \
"lea 0x20(%[y_buf]),%[y_buf] \n" \
"vmovdqu (%[a_buf]),%%ymm5 \n" \
"vpsraw $2,%%ymm5,%%ymm5 \n" \
"vpackuswb %%ymm5,%%ymm5,%%ymm5 \n" \
"lea 0x20(%[a_buf]),%[a_buf] \n"
// Read 16 UV from 410
#define READYUV410_AVX2 \
"vmovdqu (%[u_buf]),%%ymm0 \n" \
"vmovdqu 0x00(%[u_buf],%[v_buf],1),%%ymm2 \n" \
"lea 0x20(%[u_buf]),%[u_buf] \n" \
"vpsraw $2,%%ymm0,%%ymm0 \n" \
"vpsraw $2,%%ymm2,%%ymm2 \n" \
"vpunpckhwd %%ymm2,%%ymm0,%%ymm1 \n" \
"vpunpcklwd %%ymm2,%%ymm0,%%ymm0 \n" \
"vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" \
"vmovdqu (%[y_buf]),%%ymm4 \n" \
"vpsllw $6,%%ymm4,%%ymm4 \n" \
"lea 0x20(%[y_buf]),%[y_buf] \n"
// Read 16 UV from 410. With 16 Alpha.
#define READYUVA410_AVX2 \
"vmovdqu (%[u_buf]),%%ymm0 \n" \
"vmovdqu 0x00(%[u_buf],%[v_buf],1),%%ymm2 \n" \
"lea 0x20(%[u_buf]),%[u_buf] \n" \
"vpsraw $2,%%ymm0,%%ymm0 \n" \
"vpsraw $2,%%ymm2,%%ymm2 \n" \
"vpunpckhwd %%ymm2,%%ymm0,%%ymm1 \n" \
"vpunpcklwd %%ymm2,%%ymm0,%%ymm0 \n" \
"vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" \
"vmovdqu (%[y_buf]),%%ymm4 \n" \
"vpsllw $6,%%ymm4,%%ymm4 \n" \
"lea 0x20(%[y_buf]),%[y_buf] \n" \
"vmovdqu (%[a_buf]),%%ymm5 \n" \
"vpsraw $2,%%ymm5,%%ymm5 \n" \
"vpackuswb %%ymm5,%%ymm5,%%ymm5 \n" \
"lea 0x20(%[a_buf]),%[a_buf] \n"
// Read 16 UV from 444. With 16 Alpha.
#define READYUVA444_AVX2 \
"vmovdqu (%[u_buf]),%%xmm0 \n" \
@ -2641,6 +3020,28 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf,
"vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \
"lea 0x10(%[y_buf]),%[y_buf] \n"
// Read 4 UV from P210, upsample to 8 UV
#define READP210_AVX2 \
"vmovdqu (%[uv_buf]),%%ymm0 \n" \
"lea 0x20(%[uv_buf]),%[uv_buf] \n" \
"vpsrlw $0x8,%%ymm0,%%ymm0 \n" \
"vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" \
"vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \
"vmovdqu (%[y_buf]),%%ymm4 \n" \
"lea 0x20(%[y_buf]),%[y_buf] \n"
// Read 8 UV from P410
#define READP410_AVX2 \
"vmovdqu (%[uv_buf]),%%ymm0 \n" \
"vmovdqu 0x20(%[uv_buf]),%%ymm1 \n" \
"lea 0x40(%[uv_buf]),%[uv_buf] \n" \
"vpsrlw $0x8,%%ymm0,%%ymm0 \n" \
"vpsrlw $0x8,%%ymm1,%%ymm1 \n" \
"vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" \
"vpermq $0xd8,%%ymm0,%%ymm0 \n" \
"vmovdqu (%[y_buf]),%%ymm4 \n" \
"lea 0x20(%[y_buf]),%[y_buf] \n"
// Read 8 YUY2 with 16 Y and upsample 8 UV to 16 UV.
#define READYUY2_AVX2 \
"vmovdqu (%[yuy2_buf]),%%ymm4 \n" \
@ -2934,6 +3335,165 @@ void OMITFP I210ToAR30Row_AVX2(const uint16_t* y_buf,
}
#endif // HAS_I210TOAR30ROW_AVX2
#if defined(HAS_I410TOARGBROW_AVX2)
// 16 pixels
// 16 UV values with 16 Y producing 16 ARGB (64 bytes).
void OMITFP I410ToARGBRow_AVX2(const uint16_t* y_buf,
const uint16_t* u_buf,
const uint16_t* v_buf,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
YUVTORGB_SETUP_AVX2(yuvconstants)
"sub %[u_buf],%[v_buf] \n"
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
LABELALIGN
"1: \n"
READYUV410_AVX2
YUVTORGB_AVX2(yuvconstants)
STOREARGB_AVX2
"sub $0x10,%[width] \n"
"jg 1b \n"
"vzeroupper \n"
: [y_buf]"+r"(y_buf), // %[y_buf]
[u_buf]"+r"(u_buf), // %[u_buf]
[v_buf]"+r"(v_buf), // %[v_buf]
[dst_argb]"+r"(dst_argb), // %[dst_argb]
[width]"+rm"(width) // %[width]
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
: "memory", "cc", YUVTORGB_REGS_AVX2
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
}
#endif // HAS_I410TOARGBROW_AVX2
#if defined(HAS_I210ALPHATOARGBROW_AVX2)
// 16 pixels
// 8 UV, 16 Y and 16 A producing 16 ARGB (64 bytes).
void OMITFP I210AlphaToARGBRow_AVX2(const uint16_t* y_buf,
const uint16_t* u_buf,
const uint16_t* v_buf,
const uint16_t* a_buf,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile(
YUVTORGB_SETUP_AVX2(yuvconstants)
"sub %[u_buf],%[v_buf] \n"
LABELALIGN
"1: \n"
READYUVA210_AVX2
YUVTORGB_AVX2(yuvconstants)
STOREARGB_AVX2
"subl $0x10,%[width] \n"
"jg 1b \n"
"vzeroupper \n"
: [y_buf] "+r"(y_buf), // %[y_buf]
[u_buf] "+r"(u_buf), // %[u_buf]
[v_buf] "+r"(v_buf), // %[v_buf]
[a_buf] "+r"(a_buf), // %[a_buf]
[dst_argb] "+r"(dst_argb), // %[dst_argb]
#if defined(__i386__)
[width]"+m"(width) // %[width]
#else
[width]"+rm"(width) // %[width]
#endif
: [yuvconstants] "r"(yuvconstants) // %[yuvconstants]
: "memory", "cc", YUVTORGB_REGS_AVX2 "xmm0", "xmm1", "xmm2", "xmm3",
"xmm4", "xmm5"
);
}
#endif // HAS_I210TOARGBROW_AVX2
#if defined(HAS_I410ALPHATOARGBROW_AVX2)
// 16 pixels
// 16 UV, 16 Y and 16 A producing 16 ARGB (64 bytes).
void OMITFP I410AlphaToARGBRow_AVX2(const uint16_t* y_buf,
const uint16_t* u_buf,
const uint16_t* v_buf,
const uint16_t* a_buf,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile(
YUVTORGB_SETUP_AVX2(yuvconstants)
"sub %[u_buf],%[v_buf] \n"
LABELALIGN
"1: \n"
READYUVA410_AVX2
YUVTORGB_AVX2(yuvconstants)
STOREARGB_AVX2
"subl $0x10,%[width] \n"
"jg 1b \n"
"vzeroupper \n"
: [y_buf] "+r"(y_buf), // %[y_buf]
[u_buf] "+r"(u_buf), // %[u_buf]
[v_buf] "+r"(v_buf), // %[v_buf]
[a_buf] "+r"(a_buf), // %[a_buf]
[dst_argb] "+r"(dst_argb), // %[dst_argb]
#if defined(__i386__)
[width]"+m"(width) // %[width]
#else
[width]"+rm"(width) // %[width]
#endif
: [yuvconstants] "r"(yuvconstants) // %[yuvconstants]
: "memory", "cc", YUVTORGB_REGS_AVX2 "xmm0", "xmm1", "xmm2", "xmm3",
"xmm4", "xmm5"
);
}
#endif // HAS_I410TOARGBROW_AVX2
#if defined(HAS_I410TOAR30ROW_AVX2)
// 16 pixels
// 16 UV values with 16 Y producing 16 AR30 (64 bytes).
void OMITFP I410ToAR30Row_AVX2(const uint16_t* y_buf,
const uint16_t* u_buf,
const uint16_t* v_buf,
uint8_t* dst_ar30,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
YUVTORGB_SETUP_AVX2(yuvconstants)
"sub %[u_buf],%[v_buf] \n"
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // AR30 constants
"vpsrlw $14,%%ymm5,%%ymm5 \n"
"vpsllw $4,%%ymm5,%%ymm5 \n" // 2 alpha bits
"vpxor %%ymm6,%%ymm6,%%ymm6 \n" // 0 for min
"vpcmpeqb %%ymm7,%%ymm7,%%ymm7 \n" // 1023 for max
"vpsrlw $6,%%ymm7,%%ymm7 \n"
LABELALIGN
"1: \n"
READYUV410_AVX2
YUVTORGB16_AVX2(yuvconstants)
STOREAR30_AVX2
"sub $0x10,%[width] \n"
"jg 1b \n"
"vzeroupper \n"
: [y_buf]"+r"(y_buf), // %[y_buf]
[u_buf]"+r"(u_buf), // %[u_buf]
[v_buf]"+r"(v_buf), // %[v_buf]
[dst_ar30]"+r"(dst_ar30), // %[dst_ar30]
[width]"+rm"(width) // %[width]
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
: "memory", "cc", YUVTORGB_REGS_AVX2
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
}
#endif // HAS_I410TOAR30ROW_AVX2
#if defined(HAS_I444ALPHATOARGBROW_AVX2)
// 16 pixels
// 16 UV values with 16 Y and 16 A producing 16 ARGB.
@ -3193,6 +3753,148 @@ void OMITFP UYVYToARGBRow_AVX2(const uint8_t* uyvy_buf,
}
#endif // HAS_UYVYTOARGBROW_AVX2
#if defined(HAS_P210TOARGBROW_AVX2)
// 16 pixels.
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
void OMITFP P210ToARGBRow_AVX2(const uint16_t* y_buf,
const uint16_t* uv_buf,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
// clang-format off
asm volatile (
YUVTORGB_SETUP_AVX2(yuvconstants)
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
LABELALIGN
"1: \n"
READP210_AVX2
YUVTORGB_AVX2(yuvconstants)
STOREARGB_AVX2
"sub $0x10,%[width] \n"
"jg 1b \n"
"vzeroupper \n"
: [y_buf]"+r"(y_buf), // %[y_buf]
[uv_buf]"+r"(uv_buf), // %[uv_buf]
[dst_argb]"+r"(dst_argb), // %[dst_argb]
[width]"+rm"(width) // %[width]
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
: "memory", "cc", YUVTORGB_REGS_AVX2
"xmm0", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
// clang-format on
}
#endif // HAS_P210TOARGBROW_AVX2
#if defined(HAS_P410TOARGBROW_AVX2)
// 16 pixels.
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
void OMITFP P410ToARGBRow_AVX2(const uint16_t* y_buf,
const uint16_t* uv_buf,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
// clang-format off
asm volatile (
YUVTORGB_SETUP_AVX2(yuvconstants)
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
LABELALIGN
"1: \n"
READP410_AVX2
YUVTORGB_AVX2(yuvconstants)
STOREARGB_AVX2
"sub $0x10,%[width] \n"
"jg 1b \n"
"vzeroupper \n"
: [y_buf]"+r"(y_buf), // %[y_buf]
[uv_buf]"+r"(uv_buf), // %[uv_buf]
[dst_argb]"+r"(dst_argb), // %[dst_argb]
[width]"+rm"(width) // %[width]
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
: "memory", "cc", YUVTORGB_REGS_AVX2
"xmm0", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
// clang-format on
}
#endif // HAS_P410TOARGBROW_AVX2
#if defined(HAS_P210TOAR30ROW_AVX2)
// 16 pixels
// 16 UV values with 16 Y producing 16 AR30 (64 bytes).
void OMITFP P210ToAR30Row_AVX2(const uint16_t* y_buf,
const uint16_t* uv_buf,
uint8_t* dst_ar30,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
YUVTORGB_SETUP_AVX2(yuvconstants)
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // AR30 constants
"vpsrlw $14,%%ymm5,%%ymm5 \n"
"vpsllw $4,%%ymm5,%%ymm5 \n" // 2 alpha bits
"vpxor %%ymm6,%%ymm6,%%ymm6 \n" // 0 for min
"vpcmpeqb %%ymm7,%%ymm7,%%ymm7 \n" // 1023 for max
"vpsrlw $6,%%ymm7,%%ymm7 \n"
LABELALIGN
"1: \n"
READP210_AVX2
YUVTORGB16_AVX2(yuvconstants)
STOREAR30_AVX2
"sub $0x10,%[width] \n"
"jg 1b \n"
"vzeroupper \n"
: [y_buf]"+r"(y_buf), // %[y_buf]
[uv_buf]"+r"(uv_buf), // %[uv_buf]
[dst_ar30]"+r"(dst_ar30), // %[dst_ar30]
[width]"+rm"(width) // %[width]
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
: "memory", "cc", YUVTORGB_REGS_AVX2
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
);
}
#endif // HAS_P210TOAR30ROW_AVX2
#if defined(HAS_P410TOAR30ROW_AVX2)
// 16 pixels
// 16 UV values with 16 Y producing 16 AR30 (64 bytes).
void OMITFP P410ToAR30Row_AVX2(const uint16_t* y_buf,
const uint16_t* uv_buf,
uint8_t* dst_ar30,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
YUVTORGB_SETUP_AVX2(yuvconstants)
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // AR30 constants
"vpsrlw $14,%%ymm5,%%ymm5 \n"
"vpsllw $4,%%ymm5,%%ymm5 \n" // 2 alpha bits
"vpxor %%ymm6,%%ymm6,%%ymm6 \n" // 0 for min
"vpcmpeqb %%ymm7,%%ymm7,%%ymm7 \n" // 1023 for max
"vpsrlw $6,%%ymm7,%%ymm7 \n"
LABELALIGN
"1: \n"
READP410_AVX2
YUVTORGB16_AVX2(yuvconstants)
STOREAR30_AVX2
"sub $0x10,%[width] \n"
"jg 1b \n"
"vzeroupper \n"
: [y_buf]"+r"(y_buf), // %[y_buf]
[uv_buf]"+r"(uv_buf), // %[uv_buf]
[dst_ar30]"+r"(dst_ar30), // %[dst_ar30]
[width]"+rm"(width) // %[width]
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
: "memory", "cc", YUVTORGB_REGS_AVX2
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
);
}
#endif // HAS_P410TOAR30ROW_AVX2
#ifdef HAS_I400TOARGBROW_SSE2
void I400ToARGBRow_SSE2(const uint8_t* y_buf,
uint8_t* dst_argb,

View File

@ -55,13 +55,13 @@ namespace libyuv {
static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported"); \
static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \
static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2, \
"SRC_SUBSAMP_X unsupported"); \
"SRC_SUBSAMP_X unsupported"); \
static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2, \
"SRC_SUBSAMP_Y unsupported"); \
"SRC_SUBSAMP_Y unsupported"); \
static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2, \
"DST_SUBSAMP_X unsupported"); \
"DST_SUBSAMP_X unsupported"); \
static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2, \
"DST_SUBSAMP_Y unsupported"); \
"DST_SUBSAMP_Y unsupported"); \
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
const int kHeight = benchmark_height_; \
const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X); \
@ -928,6 +928,8 @@ TESTQPLANARTOB(J420Alpha, 2, 2, ARGB, 4, 4, 1)
TESTQPLANARTOB(J420Alpha, 2, 2, ABGR, 4, 4, 1)
TESTQPLANARTOB(H420Alpha, 2, 2, ARGB, 4, 4, 1)
TESTQPLANARTOB(H420Alpha, 2, 2, ABGR, 4, 4, 1)
TESTQPLANARTOB(F420Alpha, 2, 2, ARGB, 4, 4, 1)
TESTQPLANARTOB(F420Alpha, 2, 2, ABGR, 4, 4, 1)
TESTQPLANARTOB(U420Alpha, 2, 2, ARGB, 4, 4, 1)
TESTQPLANARTOB(U420Alpha, 2, 2, ABGR, 4, 4, 1)
TESTQPLANARTOB(V420Alpha, 2, 2, ARGB, 4, 4, 1)
@ -938,6 +940,8 @@ TESTQPLANARTOB(J422Alpha, 2, 1, ARGB, 4, 4, 1)
TESTQPLANARTOB(J422Alpha, 2, 1, ABGR, 4, 4, 1)
TESTQPLANARTOB(H422Alpha, 2, 1, ARGB, 4, 4, 1)
TESTQPLANARTOB(H422Alpha, 2, 1, ABGR, 4, 4, 1)
TESTQPLANARTOB(F422Alpha, 2, 1, ARGB, 4, 4, 1)
TESTQPLANARTOB(F422Alpha, 2, 1, ABGR, 4, 4, 1)
TESTQPLANARTOB(U422Alpha, 2, 1, ARGB, 4, 4, 1)
TESTQPLANARTOB(U422Alpha, 2, 1, ABGR, 4, 4, 1)
TESTQPLANARTOB(V422Alpha, 2, 1, ARGB, 4, 4, 1)
@ -948,6 +952,8 @@ TESTQPLANARTOB(J444Alpha, 1, 1, ARGB, 4, 4, 1)
TESTQPLANARTOB(J444Alpha, 1, 1, ABGR, 4, 4, 1)
TESTQPLANARTOB(H444Alpha, 1, 1, ARGB, 4, 4, 1)
TESTQPLANARTOB(H444Alpha, 1, 1, ABGR, 4, 4, 1)
TESTQPLANARTOB(F444Alpha, 1, 1, ARGB, 4, 4, 1)
TESTQPLANARTOB(F444Alpha, 1, 1, ABGR, 4, 4, 1)
TESTQPLANARTOB(U444Alpha, 1, 1, ARGB, 4, 4, 1)
TESTQPLANARTOB(U444Alpha, 1, 1, ABGR, 4, 4, 1)
TESTQPLANARTOB(V444Alpha, 1, 1, ARGB, 4, 4, 1)
@ -3162,89 +3168,441 @@ TEST_F(LibYUVConvertTest, ABGRToAR30Row_Opt) {
#endif // HAS_ABGRTOAR30ROW_AVX2
// TODO(fbarchard): Fix clamping issue affected by U channel.
#define TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
ALIGN, YALIGN, W1280, N, NEG, SOFF, DOFF) \
TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \
const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \
const int kBpc = 2; \
align_buffer_page_end(src_y, kWidth* kHeight* kBpc + SOFF); \
align_buffer_page_end(src_u, kSizeUV* kBpc + SOFF); \
align_buffer_page_end(src_v, kSizeUV* kBpc + SOFF); \
align_buffer_page_end(dst_argb_c, kStrideB* kHeight + DOFF); \
align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + DOFF); \
for (int i = 0; i < kWidth * kHeight; ++i) { \
reinterpret_cast<uint16_t*>(src_y + SOFF)[i] = (fastrand() & 0x3ff); \
} \
for (int i = 0; i < kSizeUV; ++i) { \
reinterpret_cast<uint16_t*>(src_u + SOFF)[i] = (fastrand() & 0x3ff); \
reinterpret_cast<uint16_t*>(src_v + SOFF)[i] = (fastrand() & 0x3ff); \
} \
memset(dst_argb_c + DOFF, 1, kStrideB * kHeight); \
memset(dst_argb_opt + DOFF, 101, kStrideB * kHeight); \
MaskCpuFlags(disable_cpu_flags_); \
FMT_PLANAR##To##FMT_B( \
reinterpret_cast<uint16_t*>(src_y + SOFF), kWidth, \
reinterpret_cast<uint16_t*>(src_u + SOFF), kStrideUV, \
reinterpret_cast<uint16_t*>(src_v + SOFF), kStrideUV, \
dst_argb_c + DOFF, kStrideB, kWidth, NEG kHeight); \
MaskCpuFlags(benchmark_cpu_info_); \
for (int i = 0; i < benchmark_iterations_; ++i) { \
FMT_PLANAR##To##FMT_B( \
reinterpret_cast<uint16_t*>(src_y + SOFF), kWidth, \
reinterpret_cast<uint16_t*>(src_u + SOFF), kStrideUV, \
reinterpret_cast<uint16_t*>(src_v + SOFF), kStrideUV, \
dst_argb_opt + DOFF, kStrideB, kWidth, NEG kHeight); \
} \
for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) { \
EXPECT_EQ(dst_argb_c[i + DOFF], dst_argb_opt[i + DOFF]); \
} \
free_aligned_buffer_page_end(src_y); \
free_aligned_buffer_page_end(src_u); \
free_aligned_buffer_page_end(src_v); \
free_aligned_buffer_page_end(dst_argb_c); \
free_aligned_buffer_page_end(dst_argb_opt); \
#define TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
ALIGN, YALIGN, W1280, N, NEG, SOFF, DOFF, S_DEPTH) \
TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \
const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \
const int kBpc = 2; \
align_buffer_page_end(src_y, kWidth* kHeight* kBpc + SOFF); \
align_buffer_page_end(src_u, kSizeUV* kBpc + SOFF); \
align_buffer_page_end(src_v, kSizeUV* kBpc + SOFF); \
align_buffer_page_end(dst_argb_c, kStrideB* kHeight + DOFF); \
align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + DOFF); \
for (int i = 0; i < kWidth * kHeight; ++i) { \
reinterpret_cast<uint16_t*>(src_y + SOFF)[i] = \
(fastrand() & ((1 << S_DEPTH) - 1)); \
} \
for (int i = 0; i < kSizeUV; ++i) { \
reinterpret_cast<uint16_t*>(src_u + SOFF)[i] = \
(fastrand() & ((1 << S_DEPTH) - 1)); \
reinterpret_cast<uint16_t*>(src_v + SOFF)[i] = \
(fastrand() & ((1 << S_DEPTH) - 1)); \
} \
memset(dst_argb_c + DOFF, 1, kStrideB * kHeight); \
memset(dst_argb_opt + DOFF, 101, kStrideB * kHeight); \
MaskCpuFlags(disable_cpu_flags_); \
FMT_PLANAR##To##FMT_B( \
reinterpret_cast<uint16_t*>(src_y + SOFF), kWidth, \
reinterpret_cast<uint16_t*>(src_u + SOFF), kStrideUV, \
reinterpret_cast<uint16_t*>(src_v + SOFF), kStrideUV, \
dst_argb_c + DOFF, kStrideB, kWidth, NEG kHeight); \
MaskCpuFlags(benchmark_cpu_info_); \
for (int i = 0; i < benchmark_iterations_; ++i) { \
FMT_PLANAR##To##FMT_B( \
reinterpret_cast<uint16_t*>(src_y + SOFF), kWidth, \
reinterpret_cast<uint16_t*>(src_u + SOFF), kStrideUV, \
reinterpret_cast<uint16_t*>(src_v + SOFF), kStrideUV, \
dst_argb_opt + DOFF, kStrideB, kWidth, NEG kHeight); \
} \
for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) { \
EXPECT_EQ(dst_argb_c[i + DOFF], dst_argb_opt[i + DOFF]); \
} \
free_aligned_buffer_page_end(src_y); \
free_aligned_buffer_page_end(src_u); \
free_aligned_buffer_page_end(src_v); \
free_aligned_buffer_page_end(dst_argb_c); \
free_aligned_buffer_page_end(dst_argb_opt); \
}
#define TESTPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN) \
YALIGN, S_DEPTH) \
TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, benchmark_width_ - 4, _Any, +, 0, 0) \
YALIGN, benchmark_width_ - 4, _Any, +, 0, 0, S_DEPTH) \
TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, benchmark_width_, _Unaligned, +, 1, 1) \
YALIGN, benchmark_width_, _Unaligned, +, 1, 1, S_DEPTH) \
TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, benchmark_width_, _Invert, -, 0, 0) \
YALIGN, benchmark_width_, _Invert, -, 0, 0, S_DEPTH) \
TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, benchmark_width_, _Opt, +, 0, 0)
YALIGN, benchmark_width_, _Opt, +, 0, 0, S_DEPTH)
TESTPLANAR16TOB(I010, 2, 2, ARGB, 4, 4, 1)
TESTPLANAR16TOB(I010, 2, 2, ABGR, 4, 4, 1)
TESTPLANAR16TOB(H010, 2, 2, ARGB, 4, 4, 1)
TESTPLANAR16TOB(H010, 2, 2, ABGR, 4, 4, 1)
TESTPLANAR16TOB(U010, 2, 2, ARGB, 4, 4, 1)
TESTPLANAR16TOB(U010, 2, 2, ABGR, 4, 4, 1)
TESTPLANAR16TOB(I210, 2, 1, ARGB, 4, 4, 1)
TESTPLANAR16TOB(I210, 2, 1, ABGR, 4, 4, 1)
TESTPLANAR16TOB(H210, 2, 1, ARGB, 4, 4, 1)
TESTPLANAR16TOB(H210, 2, 1, ABGR, 4, 4, 1)
TESTPLANAR16TOB(U210, 2, 1, ARGB, 4, 4, 1)
TESTPLANAR16TOB(U210, 2, 1, ABGR, 4, 4, 1)
#define I410ToARGB(a, b, c, d, e, f, g, h, i, j) \
I410ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j)
#define I410ToABGR(a, b, c, d, e, f, g, h, i, j) \
I410ToABGRMatrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j)
#define H410ToARGB(a, b, c, d, e, f, g, h, i, j) \
I410ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvH709Constants, i, j)
#define H410ToABGR(a, b, c, d, e, f, g, h, i, j) \
I410ToABGRMatrix(a, b, c, d, e, f, g, h, &kYuvH709Constants, i, j)
#define U410ToARGB(a, b, c, d, e, f, g, h, i, j) \
I410ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuv2020Constants, i, j)
#define U410ToABGR(a, b, c, d, e, f, g, h, i, j) \
I410ToABGRMatrix(a, b, c, d, e, f, g, h, &kYuv2020Constants, i, j)
#define I410ToAR30(a, b, c, d, e, f, g, h, i, j) \
I410ToAR30Matrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j)
#define I410ToAB30(a, b, c, d, e, f, g, h, i, j) \
I410ToAB30Matrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j)
#define H410ToAR30(a, b, c, d, e, f, g, h, i, j) \
I410ToAR30Matrix(a, b, c, d, e, f, g, h, &kYuvH709Constants, i, j)
#define H410ToAB30(a, b, c, d, e, f, g, h, i, j) \
I410ToAB30Matrix(a, b, c, d, e, f, g, h, &kYuvH709Constants, i, j)
#define U410ToAR30(a, b, c, d, e, f, g, h, i, j) \
I410ToAR30Matrix(a, b, c, d, e, f, g, h, &kYuv2020Constants, i, j)
#define U410ToAB30(a, b, c, d, e, f, g, h, i, j) \
I410ToAB30Matrix(a, b, c, d, e, f, g, h, &kYuv2020Constants, i, j)
TESTPLANAR16TOB(I010, 2, 2, ARGB, 4, 4, 1, 10)
TESTPLANAR16TOB(I010, 2, 2, ABGR, 4, 4, 1, 10)
TESTPLANAR16TOB(H010, 2, 2, ARGB, 4, 4, 1, 10)
TESTPLANAR16TOB(H010, 2, 2, ABGR, 4, 4, 1, 10)
TESTPLANAR16TOB(U010, 2, 2, ARGB, 4, 4, 1, 10)
TESTPLANAR16TOB(U010, 2, 2, ABGR, 4, 4, 1, 10)
TESTPLANAR16TOB(I210, 2, 1, ARGB, 4, 4, 1, 10)
TESTPLANAR16TOB(I210, 2, 1, ABGR, 4, 4, 1, 10)
TESTPLANAR16TOB(H210, 2, 1, ARGB, 4, 4, 1, 10)
TESTPLANAR16TOB(H210, 2, 1, ABGR, 4, 4, 1, 10)
TESTPLANAR16TOB(U210, 2, 1, ARGB, 4, 4, 1, 10)
TESTPLANAR16TOB(U210, 2, 1, ABGR, 4, 4, 1, 10)
TESTPLANAR16TOB(I410, 1, 1, ARGB, 4, 4, 1, 10)
TESTPLANAR16TOB(I410, 1, 1, ABGR, 4, 4, 1, 10)
TESTPLANAR16TOB(H410, 1, 1, ARGB, 4, 4, 1, 10)
TESTPLANAR16TOB(H410, 1, 1, ABGR, 4, 4, 1, 10)
TESTPLANAR16TOB(U410, 1, 1, ARGB, 4, 4, 1, 10)
TESTPLANAR16TOB(U410, 1, 1, ABGR, 4, 4, 1, 10)
#ifdef LITTLE_ENDIAN_ONLY_TEST
TESTPLANAR16TOB(I010, 2, 2, AR30, 4, 4, 1)
TESTPLANAR16TOB(I010, 2, 2, AB30, 4, 4, 1)
TESTPLANAR16TOB(H010, 2, 2, AR30, 4, 4, 1)
TESTPLANAR16TOB(H010, 2, 2, AB30, 4, 4, 1)
TESTPLANAR16TOB(U010, 2, 2, AR30, 4, 4, 1)
TESTPLANAR16TOB(U010, 2, 2, AB30, 4, 4, 1)
TESTPLANAR16TOB(I210, 2, 1, AR30, 4, 4, 1)
TESTPLANAR16TOB(I210, 2, 1, AB30, 4, 4, 1)
TESTPLANAR16TOB(H210, 2, 1, AR30, 4, 4, 1)
TESTPLANAR16TOB(H210, 2, 1, AB30, 4, 4, 1)
TESTPLANAR16TOB(U210, 2, 1, AR30, 4, 4, 1)
TESTPLANAR16TOB(U210, 2, 1, AB30, 4, 4, 1)
TESTPLANAR16TOB(I010, 2, 2, AR30, 4, 4, 1, 10)
TESTPLANAR16TOB(I010, 2, 2, AB30, 4, 4, 1, 10)
TESTPLANAR16TOB(H010, 2, 2, AR30, 4, 4, 1, 10)
TESTPLANAR16TOB(H010, 2, 2, AB30, 4, 4, 1, 10)
TESTPLANAR16TOB(U010, 2, 2, AR30, 4, 4, 1, 10)
TESTPLANAR16TOB(U010, 2, 2, AB30, 4, 4, 1, 10)
TESTPLANAR16TOB(I210, 2, 1, AR30, 4, 4, 1, 10)
TESTPLANAR16TOB(I210, 2, 1, AB30, 4, 4, 1, 10)
TESTPLANAR16TOB(H210, 2, 1, AR30, 4, 4, 1, 10)
TESTPLANAR16TOB(H210, 2, 1, AB30, 4, 4, 1, 10)
TESTPLANAR16TOB(U210, 2, 1, AR30, 4, 4, 1, 10)
TESTPLANAR16TOB(U210, 2, 1, AB30, 4, 4, 1, 10)
TESTPLANAR16TOB(I410, 1, 1, AR30, 4, 4, 1, 10)
TESTPLANAR16TOB(I410, 1, 1, AB30, 4, 4, 1, 10)
TESTPLANAR16TOB(H410, 1, 1, AR30, 4, 4, 1, 10)
TESTPLANAR16TOB(H410, 1, 1, AB30, 4, 4, 1, 10)
TESTPLANAR16TOB(U410, 1, 1, AR30, 4, 4, 1, 10)
TESTPLANAR16TOB(U410, 1, 1, AB30, 4, 4, 1, 10)
#endif
#define TESTQPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
ALIGN, YALIGN, W1280, N, NEG, OFF, ATTEN, S_DEPTH) \
TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \
const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \
const int kBpc = 2; \
align_buffer_page_end(src_y, kWidth* kHeight* kBpc + OFF); \
align_buffer_page_end(src_u, kSizeUV* kBpc + OFF); \
align_buffer_page_end(src_v, kSizeUV* kBpc + OFF); \
align_buffer_page_end(src_a, kWidth* kHeight* kBpc + OFF); \
align_buffer_page_end(dst_argb_c, kStrideB* kHeight + OFF); \
align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + OFF); \
for (int i = 0; i < kWidth * kHeight; ++i) { \
reinterpret_cast<uint16_t*>(src_y + OFF)[i] = \
(fastrand() & ((1 << S_DEPTH) - 1)); \
reinterpret_cast<uint16_t*>(src_a + OFF)[i] = \
(fastrand() & ((1 << S_DEPTH) - 1)); \
} \
for (int i = 0; i < kSizeUV; ++i) { \
reinterpret_cast<uint16_t*>(src_u + OFF)[i] = \
(fastrand() & ((1 << S_DEPTH) - 1)); \
reinterpret_cast<uint16_t*>(src_v + OFF)[i] = \
(fastrand() & ((1 << S_DEPTH) - 1)); \
} \
memset(dst_argb_c + OFF, 1, kStrideB * kHeight); \
memset(dst_argb_opt + OFF, 101, kStrideB * kHeight); \
MaskCpuFlags(disable_cpu_flags_); \
FMT_PLANAR##To##FMT_B(reinterpret_cast<uint16_t*>(src_y + OFF), kWidth, \
reinterpret_cast<uint16_t*>(src_u + OFF), kStrideUV, \
reinterpret_cast<uint16_t*>(src_v + OFF), kStrideUV, \
reinterpret_cast<uint16_t*>(src_a + OFF), kWidth, \
dst_argb_c + OFF, kStrideB, kWidth, NEG kHeight, \
ATTEN); \
MaskCpuFlags(benchmark_cpu_info_); \
for (int i = 0; i < benchmark_iterations_; ++i) { \
FMT_PLANAR##To##FMT_B( \
reinterpret_cast<uint16_t*>(src_y + OFF), kWidth, \
reinterpret_cast<uint16_t*>(src_u + OFF), kStrideUV, \
reinterpret_cast<uint16_t*>(src_v + OFF), kStrideUV, \
reinterpret_cast<uint16_t*>(src_a + OFF), kWidth, \
dst_argb_opt + OFF, kStrideB, kWidth, NEG kHeight, ATTEN); \
} \
for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) { \
EXPECT_EQ(dst_argb_c[i + OFF], dst_argb_opt[i + OFF]); \
} \
free_aligned_buffer_page_end(src_y); \
free_aligned_buffer_page_end(src_u); \
free_aligned_buffer_page_end(src_v); \
free_aligned_buffer_page_end(src_a); \
free_aligned_buffer_page_end(dst_argb_c); \
free_aligned_buffer_page_end(dst_argb_opt); \
}
#define TESTQPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
ALIGN, YALIGN, S_DEPTH) \
TESTQPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, benchmark_width_ - 4, _Any, +, 0, 0, S_DEPTH) \
TESTQPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, benchmark_width_, _Unaligned, +, 1, 0, S_DEPTH) \
TESTQPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, benchmark_width_, _Invert, -, 0, 0, S_DEPTH) \
TESTQPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, benchmark_width_, _Opt, +, 0, 0, S_DEPTH) \
TESTQPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, benchmark_width_, _Premult, +, 0, 1, S_DEPTH)
#define I010AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
I010AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvI601Constants, k, \
l, m)
#define I010AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
I010AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvI601Constants, k, \
l, m)
#define J010AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
I010AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
l, m)
#define J010AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
I010AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
l, m)
#define F010AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
I010AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
l, m)
#define F010AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
I010AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
l, m)
#define H010AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
I010AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
l, m)
#define H010AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
I010AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
l, m)
#define U010AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
I010AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
l, m)
#define U010AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
I010AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
l, m)
#define V010AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
I010AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
l, m)
#define V010AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
I010AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
l, m)
#define I210AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
I210AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvI601Constants, k, \
l, m)
#define I210AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
I210AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvI601Constants, k, \
l, m)
#define J210AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
I210AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
l, m)
#define J210AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
I210AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
l, m)
#define F210AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
I210AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
l, m)
#define F210AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
I210AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
l, m)
#define H210AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
I210AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
l, m)
#define H210AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
I210AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
l, m)
#define U210AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
I210AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
l, m)
#define U210AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
I210AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
l, m)
#define V210AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
I210AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
l, m)
#define V210AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
I210AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
l, m)
#define I410AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
I410AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvI601Constants, k, \
l, m)
#define I410AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
I410AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvI601Constants, k, \
l, m)
#define J410AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
I410AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
l, m)
#define J410AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
I410AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
l, m)
#define F410AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
I410AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
l, m)
#define F410AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
I410AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
l, m)
#define H410AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
I410AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
l, m)
#define H410AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
I410AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
l, m)
#define U410AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
I410AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
l, m)
#define U410AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
I410AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
l, m)
#define V410AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
I410AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
l, m)
#define V410AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
I410AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
l, m)
TESTQPLANAR16TOB(I010Alpha, 2, 2, ARGB, 4, 4, 1, 10)
TESTQPLANAR16TOB(I010Alpha, 2, 2, ABGR, 4, 4, 1, 10)
TESTQPLANAR16TOB(J010Alpha, 2, 2, ARGB, 4, 4, 1, 10)
TESTQPLANAR16TOB(J010Alpha, 2, 2, ABGR, 4, 4, 1, 10)
TESTQPLANAR16TOB(H010Alpha, 2, 2, ARGB, 4, 4, 1, 10)
TESTQPLANAR16TOB(H010Alpha, 2, 2, ABGR, 4, 4, 1, 10)
TESTQPLANAR16TOB(F010Alpha, 2, 2, ARGB, 4, 4, 1, 10)
TESTQPLANAR16TOB(F010Alpha, 2, 2, ABGR, 4, 4, 1, 10)
TESTQPLANAR16TOB(U010Alpha, 2, 2, ARGB, 4, 4, 1, 10)
TESTQPLANAR16TOB(U010Alpha, 2, 2, ABGR, 4, 4, 1, 10)
TESTQPLANAR16TOB(V010Alpha, 2, 2, ARGB, 4, 4, 1, 10)
TESTQPLANAR16TOB(V010Alpha, 2, 2, ABGR, 4, 4, 1, 10)
TESTQPLANAR16TOB(I210Alpha, 2, 1, ARGB, 4, 4, 1, 10)
TESTQPLANAR16TOB(I210Alpha, 2, 1, ABGR, 4, 4, 1, 10)
TESTQPLANAR16TOB(J210Alpha, 2, 1, ARGB, 4, 4, 1, 10)
TESTQPLANAR16TOB(J210Alpha, 2, 1, ABGR, 4, 4, 1, 10)
TESTQPLANAR16TOB(H210Alpha, 2, 1, ARGB, 4, 4, 1, 10)
TESTQPLANAR16TOB(H210Alpha, 2, 1, ABGR, 4, 4, 1, 10)
TESTQPLANAR16TOB(F210Alpha, 2, 1, ARGB, 4, 4, 1, 10)
TESTQPLANAR16TOB(F210Alpha, 2, 1, ABGR, 4, 4, 1, 10)
TESTQPLANAR16TOB(U210Alpha, 2, 1, ARGB, 4, 4, 1, 10)
TESTQPLANAR16TOB(U210Alpha, 2, 1, ABGR, 4, 4, 1, 10)
TESTQPLANAR16TOB(V210Alpha, 2, 1, ARGB, 4, 4, 1, 10)
TESTQPLANAR16TOB(V210Alpha, 2, 1, ABGR, 4, 4, 1, 10)
TESTQPLANAR16TOB(I410Alpha, 1, 1, ARGB, 4, 4, 1, 10)
TESTQPLANAR16TOB(I410Alpha, 1, 1, ABGR, 4, 4, 1, 10)
TESTQPLANAR16TOB(J410Alpha, 1, 1, ARGB, 4, 4, 1, 10)
TESTQPLANAR16TOB(J410Alpha, 1, 1, ABGR, 4, 4, 1, 10)
TESTQPLANAR16TOB(H410Alpha, 1, 1, ARGB, 4, 4, 1, 10)
TESTQPLANAR16TOB(H410Alpha, 1, 1, ABGR, 4, 4, 1, 10)
TESTQPLANAR16TOB(F410Alpha, 1, 1, ARGB, 4, 4, 1, 10)
TESTQPLANAR16TOB(F410Alpha, 1, 1, ABGR, 4, 4, 1, 10)
TESTQPLANAR16TOB(U410Alpha, 1, 1, ARGB, 4, 4, 1, 10)
TESTQPLANAR16TOB(U410Alpha, 1, 1, ABGR, 4, 4, 1, 10)
TESTQPLANAR16TOB(V410Alpha, 1, 1, ARGB, 4, 4, 1, 10)
TESTQPLANAR16TOB(V410Alpha, 1, 1, ABGR, 4, 4, 1, 10)
#define TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
ALIGN, YALIGN, W1280, N, NEG, SOFF, DOFF, S_DEPTH) \
TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \
const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X) * 2; \
const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y) * 2; \
const int kBpc = 2; \
align_buffer_page_end(src_y, kWidth* kHeight* kBpc + SOFF); \
align_buffer_page_end(src_uv, kSizeUV* kBpc + SOFF); \
align_buffer_page_end(dst_argb_c, kStrideB* kHeight + DOFF); \
align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + DOFF); \
for (int i = 0; i < kWidth * kHeight; ++i) { \
reinterpret_cast<uint16_t*>(src_y + SOFF)[i] = \
(fastrand() & (((uint16_t)(-1)) << (16 - S_DEPTH))); \
} \
for (int i = 0; i < kSizeUV; ++i) { \
reinterpret_cast<uint16_t*>(src_uv + SOFF)[i] = \
(fastrand() & (((uint16_t)(-1)) << (16 - S_DEPTH))); \
} \
memset(dst_argb_c + DOFF, 1, kStrideB * kHeight); \
memset(dst_argb_opt + DOFF, 101, kStrideB * kHeight); \
MaskCpuFlags(disable_cpu_flags_); \
FMT_PLANAR##To##FMT_B(reinterpret_cast<uint16_t*>(src_y + SOFF), kWidth, \
reinterpret_cast<uint16_t*>(src_uv + SOFF), \
kStrideUV, dst_argb_c + DOFF, kStrideB, kWidth, \
NEG kHeight); \
MaskCpuFlags(benchmark_cpu_info_); \
for (int i = 0; i < benchmark_iterations_; ++i) { \
FMT_PLANAR##To##FMT_B(reinterpret_cast<uint16_t*>(src_y + SOFF), kWidth, \
reinterpret_cast<uint16_t*>(src_uv + SOFF), \
kStrideUV, dst_argb_opt + DOFF, kStrideB, kWidth, \
NEG kHeight); \
} \
for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) { \
EXPECT_EQ(dst_argb_c[i + DOFF], dst_argb_opt[i + DOFF]); \
} \
free_aligned_buffer_page_end(src_y); \
free_aligned_buffer_page_end(src_uv); \
free_aligned_buffer_page_end(dst_argb_c); \
free_aligned_buffer_page_end(dst_argb_opt); \
}
#define TESTBIPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
ALIGN, YALIGN, S_DEPTH) \
TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, benchmark_width_ - 4, _Any, +, 0, 0, S_DEPTH) \
TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, benchmark_width_, _Unaligned, +, 1, 1, S_DEPTH) \
TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, benchmark_width_, _Invert, -, 0, 0, S_DEPTH) \
TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, benchmark_width_, _Opt, +, 0, 0, S_DEPTH)
#define P010ToARGB(a, b, c, d, e, f, g, h) \
P010ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
#define P210ToARGB(a, b, c, d, e, f, g, h) \
P210ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
#define P010ToAR30(a, b, c, d, e, f, g, h) \
P010ToAR30Matrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
#define P210ToAR30(a, b, c, d, e, f, g, h) \
P210ToAR30Matrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
#define P012ToARGB(a, b, c, d, e, f, g, h) \
P012ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
#define P212ToARGB(a, b, c, d, e, f, g, h) \
P212ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
#define P012ToAR30(a, b, c, d, e, f, g, h) \
P012ToAR30Matrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
#define P212ToAR30(a, b, c, d, e, f, g, h) \
P212ToAR30Matrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
#define P016ToARGB(a, b, c, d, e, f, g, h) \
P016ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
#define P216ToARGB(a, b, c, d, e, f, g, h) \
P216ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
#define P016ToAR30(a, b, c, d, e, f, g, h) \
P016ToAR30Matrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
#define P216ToAR30(a, b, c, d, e, f, g, h) \
P216ToAR30Matrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
TESTBIPLANAR16TOB(P010, 2, 2, ARGB, 4, 4, 1, 10)
TESTBIPLANAR16TOB(P210, 2, 1, ARGB, 4, 4, 1, 10)
TESTBIPLANAR16TOB(P012, 2, 2, ARGB, 4, 4, 1, 12)
TESTBIPLANAR16TOB(P212, 2, 1, ARGB, 4, 4, 1, 12)
TESTBIPLANAR16TOB(P016, 2, 2, ARGB, 4, 4, 1, 16)
TESTBIPLANAR16TOB(P216, 2, 1, ARGB, 4, 4, 1, 16)
#ifdef LITTLE_ENDIAN_ONLY_TEST
TESTBIPLANAR16TOB(P010, 2, 2, AR30, 4, 4, 1, 10)
TESTBIPLANAR16TOB(P210, 2, 1, AR30, 4, 4, 1, 10)
TESTBIPLANAR16TOB(P012, 2, 2, AR30, 4, 4, 1, 12)
TESTBIPLANAR16TOB(P212, 2, 1, AR30, 4, 4, 1, 12)
TESTBIPLANAR16TOB(P016, 2, 2, AR30, 4, 4, 1, 16)
TESTBIPLANAR16TOB(P216, 2, 1, AR30, 4, 4, 1, 16)
#endif
static int Clamp(int y) {