I400ToARGBMatrix Pass a color matrix to use different coefficients

32 bit
Neon I400ToARGB_Opt (1937 ms)
64 bit
C I400ToARGB_Opt (8957 ms)
NEON I400ToARGB_Opt (2147 ms)

x86
cI400ToARGB_Opt (1110 ms)
AVX2 I400ToARGB_Opt (213 ms)
SSE2 I400ToARGB_Opt (225 ms)

Bug: libyuv:861, b/156642185
Change-Id: I96b6f4ebba6ff9c4ed8803291ce098de6f93fa4f
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/2209718
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
Reviewed-by: richard winterton <rrwinterton@gmail.com>
This commit is contained in:
Frank Barchard 2020-05-20 04:53:55 -07:00 committed by Commit Bot
parent d426247a3b
commit da41bca02b
14 changed files with 320 additions and 137 deletions

View File

@ -1,6 +1,6 @@
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 1755
Version: 1756
License: BSD
License File: LICENSE

View File

@ -1548,6 +1548,16 @@ int I420ToAR30Matrix(const uint8_t* src_y,
int width,
int height);
// Convert I400 (grey) to ARGB. Reverse of ARGBToI400.
LIBYUV_API
int I400ToARGBMatrix(const uint8_t* src_y,
int src_stride_y,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height);
// Convert camera sample to ARGB with cropping, rotation and vertical flip.
// "sample_size" is needed to parse MJPG.
// "dst_stride_argb" number of bytes in a row of the dst_argb plane.

View File

@ -98,7 +98,6 @@ extern "C" {
#define HAS_COPYROW_SSE2
#define HAS_H422TOARGBROW_SSSE3
#define HAS_HALFFLOATROW_SSE2
#define HAS_I400TOARGBROW_SSE2
#define HAS_I422TOARGB1555ROW_SSSE3
#define HAS_I422TOARGB4444ROW_SSSE3
#define HAS_I422TOARGBROW_SSSE3
@ -201,7 +200,6 @@ extern "C" {
#define HAS_H422TOARGBROW_AVX2
#define HAS_HALFFLOATROW_AVX2
// #define HAS_HALFFLOATROW_F16C // Enable to test halffloat cast
#define HAS_I400TOARGBROW_AVX2
#define HAS_I422TOARGB1555ROW_AVX2
#define HAS_I422TOARGB4444ROW_AVX2
#define HAS_I422TOARGBROW_AVX2
@ -275,6 +273,7 @@ extern "C" {
#define HAS_HALFMERGEUVROW_SSSE3
#define HAS_I210TOAR30ROW_SSSE3
#define HAS_I210TOARGBROW_SSSE3
#define HAS_I400TOARGBROW_SSE2
#define HAS_I422TOAR30ROW_SSSE3
#define HAS_MERGERGBROW_SSSE3
#define HAS_MIRRORUVROW_AVX2
@ -303,6 +302,7 @@ extern "C" {
#define HAS_HALFMERGEUVROW_AVX2
#define HAS_I210TOAR30ROW_AVX2
#define HAS_I210TOARGBROW_AVX2
#define HAS_I400TOARGBROW_AVX2
#define HAS_I422TOAR30ROW_AVX2
#define HAS_I422TOUYVYROW_AVX2
#define HAS_I422TOYUY2ROW_AVX2
@ -693,6 +693,7 @@ struct YuvConstants {
int16_t kUVBiasG[16];
int16_t kUVBiasR[16];
int16_t kYToRgb[16];
int16_t kYBiasToRgb[16];
};
// Offsets into YuvConstants structure
@ -703,6 +704,8 @@ struct YuvConstants {
#define KUVBIASG 128
#define KUVBIASR 160
#define KYTORGB 192
#define KYBIASTORGB 224
#endif
#define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a)-1)))
@ -2796,23 +2799,50 @@ void I422ToRGB24Row_Any_AVX2(const uint8_t* y_buf,
const struct YuvConstants* yuvconstants,
int width);
void I400ToARGBRow_C(const uint8_t* src_y, uint8_t* rgb_buf, int width);
void I400ToARGBRow_SSE2(const uint8_t* y_buf, uint8_t* dst_argb, int width);
void I400ToARGBRow_AVX2(const uint8_t* y_buf, uint8_t* dst_argb, int width);
void I400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width);
void I400ToARGBRow_MSA(const uint8_t* src_y, uint8_t* dst_argb, int width);
void I400ToARGBRow_MMI(const uint8_t* src_y, uint8_t* dst_argb, int width);
void I400ToARGBRow_C(const uint8_t* src_y,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width);
void I400ToARGBRow_SSE2(const uint8_t* y_buf,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I400ToARGBRow_AVX2(const uint8_t* y_buf,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I400ToARGBRow_NEON(const uint8_t* src_y,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I400ToARGBRow_MSA(const uint8_t* src_y,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I400ToARGBRow_MMI(const uint8_t* src_y,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I400ToARGBRow_Any_SSE2(const uint8_t* src_ptr,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
void I400ToARGBRow_Any_AVX2(const uint8_t* src_ptr,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
void I400ToARGBRow_Any_NEON(const uint8_t* src_ptr,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
void I400ToARGBRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void I400ToARGBRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void I400ToARGBRow_Any_MSA(const uint8_t* src_ptr,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
void I400ToARGBRow_Any_MMI(const uint8_t* src_ptr,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
// ARGB preattenuated alpha blend.
void ARGBBlendRow_SSSE3(const uint8_t* src_argb0,

View File

@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1755
#define LIBYUV_VERSION 1756
#endif // INCLUDE_LIBYUV_VERSION_H_

View File

@ -1559,16 +1559,18 @@ int I420AlphaToABGR(const uint8_t* src_y,
width, height, attenuate);
}
// Convert I400 to ARGB.
// Convert I400 to ARGB with matrix.
LIBYUV_API
int I400ToARGB(const uint8_t* src_y,
int src_stride_y,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height) {
int I400ToARGBMatrix(const uint8_t* src_y,
int src_stride_y,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height) {
int y;
void (*I400ToARGBRow)(const uint8_t* y_buf, uint8_t* rgb_buf, int width) =
void (*I400ToARGBRow)(const uint8_t* y_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I400ToARGBRow_C;
if (!src_y || !dst_argb || width <= 0 || height == 0) {
return -1;
@ -1627,13 +1629,25 @@ int I400ToARGB(const uint8_t* src_y,
#endif
for (y = 0; y < height; ++y) {
I400ToARGBRow(src_y, dst_argb, width);
I400ToARGBRow(src_y, dst_argb, yuvconstants, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
}
return 0;
}
// Convert I400 to ARGB.
LIBYUV_API
int I400ToARGB(const uint8_t* src_y,
int src_stride_y,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height) {
return I400ToARGBMatrix(src_y, src_stride_y, dst_argb, dst_stride_argb,
&kYuvI601Constants, width, height);
}
// Convert J400 to ARGB.
LIBYUV_API
int J400ToARGB(const uint8_t* src_y,

View File

@ -547,12 +547,6 @@ ANY11(J400ToARGBRow_Any_SSE2, J400ToARGBRow_SSE2, 0, 1, 4, 7)
#if defined(HAS_J400TOARGBROW_AVX2)
ANY11(J400ToARGBRow_Any_AVX2, J400ToARGBRow_AVX2, 0, 1, 4, 15)
#endif
#if defined(HAS_I400TOARGBROW_SSE2)
ANY11(I400ToARGBRow_Any_SSE2, I400ToARGBRow_SSE2, 0, 1, 4, 7)
#endif
#if defined(HAS_I400TOARGBROW_AVX2)
ANY11(I400ToARGBRow_Any_AVX2, I400ToARGBRow_AVX2, 0, 1, 4, 15)
#endif
#if defined(HAS_RGB24TOARGBROW_SSSE3)
ANY11(RGB24ToARGBRow_Any_SSSE3, RGB24ToARGBRow_SSSE3, 0, 3, 4, 15)
ANY11(RAWToARGBRow_Any_SSSE3, RAWToARGBRow_SSSE3, 0, 3, 4, 15)
@ -582,7 +576,6 @@ ANY11(ARGBToRGB565Row_Any_NEON, ARGBToRGB565Row_NEON, 0, 4, 2, 7)
ANY11(ARGBToARGB1555Row_Any_NEON, ARGBToARGB1555Row_NEON, 0, 4, 2, 7)
ANY11(ARGBToARGB4444Row_Any_NEON, ARGBToARGB4444Row_NEON, 0, 4, 2, 7)
ANY11(J400ToARGBRow_Any_NEON, J400ToARGBRow_NEON, 0, 1, 4, 7)
ANY11(I400ToARGBRow_Any_NEON, I400ToARGBRow_NEON, 0, 1, 4, 7)
#endif
#if defined(HAS_ARGBTORGB24ROW_MSA)
ANY11(ARGBToRGB24Row_Any_MSA, ARGBToRGB24Row_MSA, 0, 4, 3, 15)
@ -591,7 +584,6 @@ ANY11(ARGBToRGB565Row_Any_MSA, ARGBToRGB565Row_MSA, 0, 4, 2, 7)
ANY11(ARGBToARGB1555Row_Any_MSA, ARGBToARGB1555Row_MSA, 0, 4, 2, 7)
ANY11(ARGBToARGB4444Row_Any_MSA, ARGBToARGB4444Row_MSA, 0, 4, 2, 7)
ANY11(J400ToARGBRow_Any_MSA, J400ToARGBRow_MSA, 0, 1, 4, 15)
ANY11(I400ToARGBRow_Any_MSA, I400ToARGBRow_MSA, 0, 1, 4, 15)
#endif
#if defined(HAS_ARGBTORGB24ROW_MMI)
ANY11(ARGBToRGB24Row_Any_MMI, ARGBToRGB24Row_MMI, 0, 4, 3, 3)
@ -600,7 +592,6 @@ ANY11(ARGBToRGB565Row_Any_MMI, ARGBToRGB565Row_MMI, 0, 4, 2, 3)
ANY11(ARGBToARGB1555Row_Any_MMI, ARGBToARGB1555Row_MMI, 0, 4, 2, 3)
ANY11(ARGBToARGB4444Row_Any_MMI, ARGBToARGB4444Row_MMI, 0, 4, 2, 3)
ANY11(J400ToARGBRow_Any_MMI, J400ToARGBRow_MMI, 0, 1, 4, 3)
ANY11(I400ToARGBRow_Any_MMI, I400ToARGBRow_MMI, 0, 1, 4, 7)
#endif
#if defined(HAS_RAWTORGB24ROW_NEON)
ANY11(RAWToRGB24Row_Any_NEON, RAWToRGB24Row_NEON, 0, 3, 3, 7)
@ -920,6 +911,47 @@ ANY11B(ARGBCopyYToAlphaRow_Any_MMI, ARGBCopyYToAlphaRow_MMI, 0, 1, 4, 7)
memcpy(dst_ptr + n * BPP, temp + 64, r * BPP); \
}
#if defined(HAS_I400TOARGBROW_SSE2)
ANY11P(I400ToARGBRow_Any_SSE2,
I400ToARGBRow_SSE2,
const struct YuvConstants*,
1,
4,
7)
#endif
#if defined(HAS_I400TOARGBROW_AVX2)
ANY11P(I400ToARGBRow_Any_AVX2,
I400ToARGBRow_AVX2,
const struct YuvConstants*,
1,
4,
15)
#endif
#if defined(HAS_I400TOARGBROW_NEON)
ANY11P(I400ToARGBRow_Any_NEON,
I400ToARGBRow_NEON,
const struct YuvConstants*,
1,
4,
7)
#endif
#if defined(HAS_I400TOARGBROW_MSA)
ANY11P(I400ToARGBRow_Any_MSA,
I400ToARGBRow_MSA,
const struct YuvConstants*,
1,
4,
15)
#endif
#if defined(HAS_I400TOARGBROW_MMI)
ANY11P(I400ToARGBRow_Any_MMI,
I400ToARGBRow_MMI,
const struct YuvConstants*,
1,
4,
7)
#endif
#if defined(HAS_ARGBTORGB565DITHERROW_SSE2)
ANY11P(ARGBToRGB565DitherRow_Any_SSE2,
ARGBToRGB565DitherRow_SSE2,

View File

@ -1353,26 +1353,26 @@ const struct YuvConstants SIMD_ALIGNED(kYuvI601Constants) = {
{-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
{UG, VG, UG, VG, UG, VG, UG, VG},
{UG, VG, UG, VG, UG, VG, UG, VG},
{BB, BG, BR, 0, 0, 0, 0, 0},
{0x0101 * YG, 0, 0, 0}};
{BB, BG, BR, YGB, 0, 0, 0, 0},
{0x0101 * YG, YG, 0, 0}};
const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants) = {
{-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
{-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
{VG, UG, VG, UG, VG, UG, VG, UG},
{VG, UG, VG, UG, VG, UG, VG, UG},
{BR, BG, BB, 0, 0, 0, 0, 0},
{0x0101 * YG, 0, 0, 0}};
{BR, BG, BB, YGB, 0, 0, 0, 0},
{0x0101 * YG, YG, 0, 0}};
#elif defined(__arm__) // 32 bit arm
const struct YuvConstants SIMD_ALIGNED(kYuvI601Constants) = {
{-UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0},
{UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0},
{BB, BG, BR, 0, 0, 0, 0, 0},
{0x0101 * YG, 0, 0, 0}};
{BB, BG, BR, YGB, 0, 0, 0, 0},
{0x0101 * YG, YG, 0, 0}};
const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants) = {
{-VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0},
{VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0},
{BR, BG, BB, 0, 0, 0, 0, 0},
{0x0101 * YG, 0, 0, 0}};
{BR, BG, BB, YGB, 0, 0, 0, 0},
{0x0101 * YG, YG, 0, 0}};
#else
const struct YuvConstants SIMD_ALIGNED(kYuvI601Constants) = {
{UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
@ -1384,7 +1384,9 @@ const struct YuvConstants SIMD_ALIGNED(kYuvI601Constants) = {
{BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
{BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
{BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
{YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}};
{YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG},
{YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB,
YGB}};
const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants) = {
{VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0,
VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0},
@ -1395,7 +1397,9 @@ const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants) = {
{BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
{BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
{BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
{YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}};
{YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG},
{YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB,
YGB}};
#endif
#undef BB
@ -1434,26 +1438,26 @@ const struct YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = {
{-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
{UG, VG, UG, VG, UG, VG, UG, VG},
{UG, VG, UG, VG, UG, VG, UG, VG},
{BB, BG, BR, 0, 0, 0, 0, 0},
{0x0101 * YG, 0, 0, 0}};
{BB, BG, BR, YGB, 0, 0, 0, 0},
{0x0101 * YG, YG, 0, 0}};
const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = {
{-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
{-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
{VG, UG, VG, UG, VG, UG, VG, UG},
{VG, UG, VG, UG, VG, UG, VG, UG},
{BR, BG, BB, 0, 0, 0, 0, 0},
{0x0101 * YG, 0, 0, 0}};
{BR, BG, BB, YGB, 0, 0, 0, 0},
{0x0101 * YG, YG, 0, 0}};
#elif defined(__arm__)
const struct YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = {
{-UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0},
{UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0},
{BB, BG, BR, 0, 0, 0, 0, 0},
{0x0101 * YG, 0, 0, 0}};
{BB, BG, BR, YGB, 0, 0, 0, 0},
{0x0101 * YG, YG, 0, 0}};
const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = {
{-VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0},
{VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0},
{BR, BG, BB, 0, 0, 0, 0, 0},
{0x0101 * YG, 0, 0, 0}};
{BR, BG, BB, YGB, 0, 0, 0, 0},
{0x0101 * YG, YG, 0, 0}};
#else
const struct YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = {
{UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
@ -1465,7 +1469,9 @@ const struct YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = {
{BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
{BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
{BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
{YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}};
{YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG},
{YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB,
YGB}};
const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = {
{VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0,
VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0},
@ -1476,7 +1482,9 @@ const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = {
{BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
{BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
{BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
{YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}};
{YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG},
{YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB,
YGB}};
#endif
#undef BB
@ -1517,26 +1525,26 @@ const struct YuvConstants SIMD_ALIGNED(kYuvH709Constants) = {
{-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
{UG, VG, UG, VG, UG, VG, UG, VG},
{UG, VG, UG, VG, UG, VG, UG, VG},
{BB, BG, BR, 0, 0, 0, 0, 0},
{0x0101 * YG, 0, 0, 0}};
{BB, BG, BR, YGB, 0, 0, 0, 0},
{0x0101 * YG, YG, 0, 0}};
const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants) = {
{-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
{-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
{VG, UG, VG, UG, VG, UG, VG, UG},
{VG, UG, VG, UG, VG, UG, VG, UG},
{BR, BG, BB, 0, 0, 0, 0, 0},
{0x0101 * YG, 0, 0, 0}};
{BR, BG, BB, YGB, 0, 0, 0, 0},
{0x0101 * YG, YG, 0, 0}};
#elif defined(__arm__)
const struct YuvConstants SIMD_ALIGNED(kYuvH709Constants) = {
{-UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0},
{UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0},
{BB, BG, BR, 0, 0, 0, 0, 0},
{0x0101 * YG, 0, 0, 0}};
{BB, BG, BR, YGB, 0, 0, 0, 0},
{0x0101 * YG, YG, 0, 0}};
const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants) = {
{-VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0},
{VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0},
{BR, BG, BB, 0, 0, 0, 0, 0},
{0x0101 * YG, 0, 0, 0}};
{BR, BG, BB, YGB, 0, 0, 0, 0},
{0x0101 * YG, YG, 0, 0}};
#else
const struct YuvConstants SIMD_ALIGNED(kYuvH709Constants) = {
{UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
@ -1548,7 +1556,9 @@ const struct YuvConstants SIMD_ALIGNED(kYuvH709Constants) = {
{BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
{BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
{BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
{YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}};
{YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG},
{YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB,
YGB}};
const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants) = {
{VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0,
VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0},
@ -1559,7 +1569,9 @@ const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants) = {
{BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
{BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
{BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
{YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}};
{YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG},
{YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB,
YGB}};
#endif
#undef BB
@ -1598,26 +1610,26 @@ const struct YuvConstants SIMD_ALIGNED(kYuv2020Constants) = {
{-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
{UG, VG, UG, VG, UG, VG, UG, VG},
{UG, VG, UG, VG, UG, VG, UG, VG},
{BB, BG, BR, 0, 0, 0, 0, 0},
{0x0101 * YG, 0, 0, 0}};
{BB, BG, BR, YGB, 0, 0, 0, 0},
{0x0101 * YG, YG, 0, 0}};
const struct YuvConstants SIMD_ALIGNED(kYvu2020Constants) = {
{-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
{-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
{VG, UG, VG, UG, VG, UG, VG, UG},
{VG, UG, VG, UG, VG, UG, VG, UG},
{BR, BG, BB, 0, 0, 0, 0, 0},
{0x0101 * YG, 0, 0, 0}};
{BR, BG, BB, YGB, 0, 0, 0, 0},
{0x0101 * YG, YG, 0, 0}};
#elif defined(__arm__)
const struct YuvConstants SIMD_ALIGNED(kYuv2020Constants) = {
{-UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0},
{UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0},
{BB, BG, BR, 0, 0, 0, 0, 0},
{0x0101 * YG, 0, 0, 0}};
{BB, BG, BR, YGB, 0, 0, 0, 0},
{0x0101 * YG, YG, 0, 0}};
const struct YuvConstants SIMD_ALIGNED(kYvu2020Constants) = {
{-VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0},
{VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0},
{BR, BG, BB, 0, 0, 0, 0, 0},
{0x0101 * YG, 0, 0, 0}};
{BR, BG, BB, YGB, 0, 0, 0, 0},
{0x0101 * YG, YG, 0, 0}};
#else
const struct YuvConstants SIMD_ALIGNED(kYuv2020Constants) = {
{UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
@ -1629,7 +1641,9 @@ const struct YuvConstants SIMD_ALIGNED(kYuv2020Constants) = {
{BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
{BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
{BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
{YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}};
{YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG},
{YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB,
YGB}};
const struct YuvConstants SIMD_ALIGNED(kYvu2020Constants) = {
{VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0,
VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0},
@ -1640,7 +1654,9 @@ const struct YuvConstants SIMD_ALIGNED(kYvu2020Constants) = {
{BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
{BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
{BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
{YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}};
{YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG},
{YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB,
YGB}};
#endif
#undef BB
@ -1655,7 +1671,6 @@ const struct YuvConstants SIMD_ALIGNED(kYvu2020Constants) = {
// C reference code that mimics the YUV assembly.
// Reads 8 bit YUV and leaves result as 16 bit.
static __inline void YuvPixel(uint8_t y,
uint8_t u,
uint8_t v,
@ -1671,7 +1686,7 @@ static __inline void YuvPixel(uint8_t y,
int bb = yuvconstants->kUVBiasBGR[0];
int bg = yuvconstants->kUVBiasBGR[1];
int br = yuvconstants->kUVBiasBGR[2];
int yg = yuvconstants->kYToRgb[0] / 0x0101;
int yg = yuvconstants->kYToRgb[1];
#elif defined(__arm__)
int ub = -yuvconstants->kUVToRB[0];
int ug = yuvconstants->kUVToG[0];
@ -1680,7 +1695,7 @@ static __inline void YuvPixel(uint8_t y,
int bb = yuvconstants->kUVBiasBGR[0];
int bg = yuvconstants->kUVBiasBGR[1];
int br = yuvconstants->kUVBiasBGR[2];
int yg = yuvconstants->kYToRgb[0] / 0x0101;
int yg = yuvconstants->kYToRgb[1];
#else
int ub = yuvconstants->kUVToB[0];
int ug = yuvconstants->kUVToG[0];
@ -1714,7 +1729,7 @@ static __inline void YuvPixel8_16(uint8_t y,
int bb = yuvconstants->kUVBiasBGR[0];
int bg = yuvconstants->kUVBiasBGR[1];
int br = yuvconstants->kUVBiasBGR[2];
int yg = yuvconstants->kYToRgb[0] / 0x0101;
int yg = yuvconstants->kYToRgb[1];
#elif defined(__arm__)
int ub = -yuvconstants->kUVToRB[0];
int ug = yuvconstants->kUVToG[0];
@ -1723,7 +1738,7 @@ static __inline void YuvPixel8_16(uint8_t y,
int bb = yuvconstants->kUVBiasBGR[0];
int bg = yuvconstants->kUVBiasBGR[1];
int br = yuvconstants->kUVBiasBGR[2];
int yg = yuvconstants->kYToRgb[0] / 0x0101;
int yg = yuvconstants->kYToRgb[1];
#else
int ub = yuvconstants->kUVToB[0];
int ug = yuvconstants->kUVToG[0];
@ -1758,7 +1773,7 @@ static __inline void YuvPixel16(int16_t y,
int bb = yuvconstants->kUVBiasBGR[0];
int bg = yuvconstants->kUVBiasBGR[1];
int br = yuvconstants->kUVBiasBGR[2];
int yg = yuvconstants->kYToRgb[0] / 0x0101;
int yg = yuvconstants->kYToRgb[1];
#elif defined(__arm__)
int ub = -yuvconstants->kUVToRB[0];
int ug = yuvconstants->kUVToG[0];
@ -1767,7 +1782,7 @@ static __inline void YuvPixel16(int16_t y,
int bb = yuvconstants->kUVBiasBGR[0];
int bg = yuvconstants->kUVBiasBGR[1];
int br = yuvconstants->kUVBiasBGR[2];
int yg = yuvconstants->kYToRgb[0] / 0x0101;
int yg = yuvconstants->kYToRgb[1];
#else
int ub = yuvconstants->kUVToB[0];
int ug = yuvconstants->kUVToG[0];
@ -1805,21 +1820,26 @@ static __inline void YuvPixel10(uint16_t y,
*r = Clamp(r16 >> 6);
}
// Y contribution to R,G,B. Scale and bias.
#define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */
#define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
// C reference code that mimics the YUV assembly.
static __inline void YPixel(uint8_t y, uint8_t* b, uint8_t* g, uint8_t* r) {
uint32_t y1 = (uint32_t)(y * 0x0101 * YG) >> 16;
*b = Clamp((int32_t)(y1 + YGB) >> 6);
*g = Clamp((int32_t)(y1 + YGB) >> 6);
*r = Clamp((int32_t)(y1 + YGB) >> 6);
// Reads 8 bit YUV and leaves result as 16 bit.
static __inline void YPixel(uint8_t y,
uint8_t* b,
uint8_t* g,
uint8_t* r,
const struct YuvConstants* yuvconstants) {
#if defined(__aarch64__) || defined(__arm__)
int ygb = yuvconstants->kUVBiasBGR[3];
int yg = yuvconstants->kYToRgb[1];
#else
int ygb = yuvconstants->kYBiasToRgb[0];
int yg = yuvconstants->kYToRgb[0];
#endif
uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16;
*b = Clamp(((int32_t)(y1) + ygb) >> 6);
*g = Clamp(((int32_t)(y1) + ygb) >> 6);
*r = Clamp(((int32_t)(y1) + ygb) >> 6);
}
#undef YG
#undef YGB
#if !defined(LIBYUV_DISABLE_NEON) && \
(defined(__ARM_NEON__) || defined(__aarch64__) || defined(LIBYUV_NEON))
// C mimic assembly.
@ -2353,18 +2373,21 @@ void I422ToRGBARow_C(const uint8_t* src_y,
}
}
void I400ToARGBRow_C(const uint8_t* src_y, uint8_t* rgb_buf, int width) {
void I400ToARGBRow_C(const uint8_t* src_y,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
YPixel(src_y[1], rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
YPixel(src_y[1], rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants);
rgb_buf[7] = 255;
src_y += 2;
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
}
}

View File

@ -3089,16 +3089,14 @@ void OMITFP UYVYToARGBRow_AVX2(const uint8_t* uyvy_buf,
#endif // HAS_UYVYTOARGBROW_AVX2
#ifdef HAS_I400TOARGBROW_SSE2
void I400ToARGBRow_SSE2(const uint8_t* y_buf, uint8_t* dst_argb, int width) {
void I400ToARGBRow_SSE2(const uint8_t* y_buf,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile(
"mov $0x4a354a35,%%eax \n" // 4a35 = 18997 = 1.164
"movd %%eax,%%xmm2 \n"
"pshufd $0x0,%%xmm2,%%xmm2 \n"
"mov $0x04880488,%%eax \n" // 0488 = 1160 = 1.164 *
// 16
"movd %%eax,%%xmm3 \n"
"pshufd $0x0,%%xmm3,%%xmm3 \n"
"pcmpeqb %%xmm4,%%xmm4 \n"
"movdqa 192(%3),%%xmm2 \n" // yg = 18997 = 1.164
"movdqa 224(%3),%%xmm3 \n" // ygb = 1160 = 1.164 * 16
"pcmpeqb %%xmm4,%%xmm4 \n" // 0xff000000
"pslld $0x18,%%xmm4 \n"
LABELALIGN
@ -3108,8 +3106,8 @@ void I400ToARGBRow_SSE2(const uint8_t* y_buf, uint8_t* dst_argb, int width) {
"lea 0x8(%0),%0 \n"
"punpcklbw %%xmm0,%%xmm0 \n"
"pmulhuw %%xmm2,%%xmm0 \n"
"psubusw %%xmm3,%%xmm0 \n"
"psrlw $6, %%xmm0 \n"
"paddsw %%xmm3,%%xmm0 \n"
"psraw $6, %%xmm0 \n"
"packuswb %%xmm0,%%xmm0 \n"
// Step 2: Weave into ARGB
@ -3125,27 +3123,26 @@ void I400ToARGBRow_SSE2(const uint8_t* y_buf, uint8_t* dst_argb, int width) {
"sub $0x8,%2 \n"
"jg 1b \n"
: "+r"(y_buf), // %0
"+r"(dst_argb), // %1
"+rm"(width) // %2
:
: "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4");
: "+r"(y_buf), // %0
"+r"(dst_argb), // %1
"+rm"(width) // %2
: "r"(yuvconstants) // %3
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4");
}
#endif // HAS_I400TOARGBROW_SSE2
#ifdef HAS_I400TOARGBROW_AVX2
// 16 pixels of Y converted to 16 pixels of ARGB (64 bytes).
// note: vpunpcklbw mutates and vpackuswb unmutates.
void I400ToARGBRow_AVX2(const uint8_t* y_buf, uint8_t* dst_argb, int width) {
void I400ToARGBRow_AVX2(const uint8_t* y_buf,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile(
"mov $0x4a354a35,%%eax \n" // 0488 = 1160 = 1.164 *
"vmovdqa 192(%3),%%ymm2 \n" // yg = 18997 = 1.164
"vmovdqa 224(%3),%%ymm3 \n" // ygb = -1160 = 1.164 *
// 16
"vmovd %%eax,%%xmm2 \n"
"vbroadcastss %%xmm2,%%ymm2 \n"
"mov $0x4880488,%%eax \n" // 4a35 = 18997 = 1.164
"vmovd %%eax,%%xmm3 \n"
"vbroadcastss %%xmm3,%%ymm3 \n"
"vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n"
"vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n" // 0xff000000
"vpslld $0x18,%%ymm4,%%ymm4 \n"
LABELALIGN
@ -3156,8 +3153,8 @@ void I400ToARGBRow_AVX2(const uint8_t* y_buf, uint8_t* dst_argb, int width) {
"vpermq $0xd8,%%ymm0,%%ymm0 \n"
"vpunpcklbw %%ymm0,%%ymm0,%%ymm0 \n"
"vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n"
"vpsubusw %%ymm3,%%ymm0,%%ymm0 \n"
"vpsrlw $0x6,%%ymm0,%%ymm0 \n"
"vpaddsw %%ymm3,%%ymm0,%%ymm0 \n"
"vpsraw $0x6,%%ymm0,%%ymm0 \n"
"vpackuswb %%ymm0,%%ymm0,%%ymm0 \n"
"vpunpcklbw %%ymm0,%%ymm0,%%ymm1 \n"
"vpermq $0xd8,%%ymm1,%%ymm1 \n"
@ -3167,15 +3164,15 @@ void I400ToARGBRow_AVX2(const uint8_t* y_buf, uint8_t* dst_argb, int width) {
"vpor %%ymm4,%%ymm1,%%ymm1 \n"
"vmovdqu %%ymm0,(%1) \n"
"vmovdqu %%ymm1,0x20(%1) \n"
"lea 0x40(%1),%1 \n"
"lea 0x40(%1),%1 \n"
"sub $0x10,%2 \n"
"jg 1b \n"
"vzeroupper \n"
: "+r"(y_buf), // %0
"+r"(dst_argb), // %1
"+rm"(width) // %2
:
: "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4");
: "+r"(y_buf), // %0
"+r"(dst_argb), // %1
"+rm"(width) // %2
: "r"(yuvconstants) // %3
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4");
}
#endif // HAS_I400TOARGBROW_AVX2

View File

@ -4781,7 +4781,9 @@ void J400ToARGBRow_MMI(const uint8_t* src_y, uint8_t* dst_argb, int width) {
: "memory");
}
void I400ToARGBRow_MMI(const uint8_t* src_y, uint8_t* rgb_buf, int width) {
// TODO - respect YuvConstants
void I400ToARGBRow_MMI(const uint8_t* src_y, uint8_t* rgb_buf,
const struct YuvConstants*, int width) {
uint64_t src, src_lo, src_hi, dest, dest_lo, dest_hi;
const uint64_t mask0 = 0x0;
const uint64_t mask1 = 0x55;

View File

@ -2735,7 +2735,11 @@ void I444ToARGBRow_MSA(const uint8_t* src_y,
}
}
void I400ToARGBRow_MSA(const uint8_t* src_y, uint8_t* dst_argb, int width) {
// TODO - respect YuvConstants
void I400ToARGBRow_MSA(const uint8_t* src_y,
uint8_t* dst_argb,
const struct YuvConstants*,
int width) {
int x;
v16u8 src0, res0, res1, res2, res3, res4, dst0, dst1, dst2, dst3;
v8i16 vec0, vec1;

View File

@ -344,7 +344,10 @@ void I422ToARGB4444Row_NEON(const uint8_t* src_y,
"q12", "q13", "q14", "q15");
}
void I400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width) {
void I400ToARGBRow_NEON(const uint8_t* src_y,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile(
YUVTORGB_SETUP
"vmov.u8 d23, #255 \n"
@ -355,10 +358,10 @@ void I400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width) {
: "+r"(src_y), // %0
"+r"(dst_argb), // %1
"+r"(width) // %2
: [kUVToRB] "r"(&kYuvI601Constants.kUVToRB),
[kUVToG] "r"(&kYuvI601Constants.kUVToG),
[kUVBiasBGR] "r"(&kYuvI601Constants.kUVBiasBGR),
[kYToRgb] "r"(&kYuvI601Constants.kYToRgb)
: [kUVToRB] "r"(&yuvconstants->kUVToRB),
[kUVToG] "r"(&yuvconstants->kUVToG),
[kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR),
[kYToRgb] "r"(&yuvconstants->kYToRgb)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11",
"q12", "q13", "q14", "q15");
}

View File

@ -397,7 +397,10 @@ void I422ToARGB4444Row_NEON(const uint8_t* src_y,
);
}
void I400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width) {
void I400ToARGBRow_NEON(const uint8_t* src_y,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
YUVTORGB_SETUP
"movi v23.8b, #255 \n"
@ -411,10 +414,10 @@ void I400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width) {
: "+r"(src_y), // %0
"+r"(dst_argb), // %1
"+r"(width) // %2
: [kUVToRB]"r"(&kYuvI601Constants.kUVToRB),
[kUVToG]"r"(&kYuvI601Constants.kUVToG),
[kUVBiasBGR]"r"(&kYuvI601Constants.kUVBiasBGR),
[kYToRgb]"r"(&kYuvI601Constants.kYToRgb)
: [kUVToRB]"r"(&yuvconstants->kUVToRB),
[kUVToG]"r"(&yuvconstants->kUVToG),
[kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
[kYToRgb]"r"(&yuvconstants->kYToRgb)
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
);

View File

@ -2900,10 +2900,12 @@ __declspec(naked) void I422ToRGBARow_SSSE3(
}
#endif // HAS_I422TOARGBROW_SSSE3
// I400ToARGBRow_SSE2 is disabled due to new yuvconstant parameter
#ifdef HAS_I400TOARGBROW_SSE2
// 8 pixels of Y converted to 8 pixels of ARGB (32 bytes).
__declspec(naked) void I400ToARGBRow_SSE2(const uint8_t* y_buf,
uint8_t* rgb_buf,
const struct YuvConstants*,
int width) {
__asm {
mov eax, 0x4a354a35 // 4a35 = 18997 = round(1.164 * 64 * 256)
@ -2951,6 +2953,7 @@ __declspec(naked) void I400ToARGBRow_SSE2(const uint8_t* y_buf,
// note: vpunpcklbw mutates and vpackuswb unmutates.
__declspec(naked) void I400ToARGBRow_AVX2(const uint8_t* y_buf,
uint8_t* rgb_buf,
const struct YuvConstants*,
int width) {
__asm {
mov eax, 0x4a354a35 // 4a35 = 18997 = round(1.164 * 64 * 256)

View File

@ -3140,4 +3140,66 @@ TEST_F(LibYUVConvertTest, TestARGBToRGB24) {
free_aligned_buffer_page_end(dest_rgb24);
}
// Test I400 with jpeg matrix is same as J400
TEST_F(LibYUVConvertTest, TestI400) {
const int kSize = 256;
align_buffer_page_end(orig_i400, kSize);
align_buffer_page_end(argb_pixels_i400, kSize * 4);
align_buffer_page_end(argb_pixels_j400, kSize * 4);
align_buffer_page_end(argb_pixels_jpeg_i400, kSize * 4);
align_buffer_page_end(argb_pixels_h709_i400, kSize * 4);
align_buffer_page_end(argb_pixels_2020_i400, kSize * 4);
// Test grey scale
for (int i = 0; i < kSize; ++i) {
orig_i400[i] = i;
}
J400ToARGB(orig_i400, 0, argb_pixels_j400, 0, kSize, 1);
I400ToARGB(orig_i400, 0, argb_pixels_i400, 0, kSize, 1);
I400ToARGBMatrix(orig_i400, 0, argb_pixels_jpeg_i400, 0, &kYuvJPEGConstants,
kSize, 1);
I400ToARGBMatrix(orig_i400, 0, argb_pixels_h709_i400, 0, &kYuvH709Constants,
kSize, 1);
I400ToARGBMatrix(orig_i400, 0, argb_pixels_2020_i400, 0, &kYuv2020Constants,
kSize, 1);
EXPECT_EQ(0, argb_pixels_i400[0]);
EXPECT_EQ(0, argb_pixels_j400[0]);
EXPECT_EQ(0, argb_pixels_jpeg_i400[0]);
EXPECT_EQ(0, argb_pixels_h709_i400[0]);
EXPECT_EQ(0, argb_pixels_2020_i400[0]);
EXPECT_EQ(0, argb_pixels_i400[16 * 4]);
EXPECT_EQ(16, argb_pixels_j400[16 * 4]);
EXPECT_EQ(16, argb_pixels_jpeg_i400[16 * 4]);
EXPECT_EQ(0, argb_pixels_h709_i400[16 * 4]);
EXPECT_EQ(0, argb_pixels_2020_i400[16 * 4]);
EXPECT_EQ(130, argb_pixels_i400[128 * 4]);
EXPECT_EQ(128, argb_pixels_j400[128 * 4]);
EXPECT_EQ(128, argb_pixels_jpeg_i400[128 * 4]);
EXPECT_EQ(130, argb_pixels_h709_i400[128 * 4]);
EXPECT_EQ(130, argb_pixels_2020_i400[128 * 4]);
EXPECT_EQ(255, argb_pixels_i400[255 * 4]);
EXPECT_EQ(255, argb_pixels_j400[255 * 4]);
EXPECT_EQ(255, argb_pixels_jpeg_i400[255 * 4]);
EXPECT_EQ(255, argb_pixels_h709_i400[255 * 4]);
EXPECT_EQ(255, argb_pixels_2020_i400[255 * 4]);
for (int i = 0; i < kSize * 4; ++i) {
if ((i & 3) == 3) {
EXPECT_EQ(255, argb_pixels_j400[i]);
} else {
EXPECT_EQ(i / 4, argb_pixels_j400[i]);
}
EXPECT_EQ(argb_pixels_jpeg_i400[i], argb_pixels_j400[i]);
}
free_aligned_buffer_page_end(orig_i400);
free_aligned_buffer_page_end(argb_pixels_i400);
free_aligned_buffer_page_end(argb_pixels_j400);
free_aligned_buffer_page_end(argb_pixels_jpeg_i400);
free_aligned_buffer_page_end(argb_pixels_h709_i400);
free_aligned_buffer_page_end(argb_pixels_2020_i400);
}
} // namespace libyuv