mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-08 01:36:47 +08:00
I400ToARGBMatrix Pass a color matrix to use different coefficients
32 bit Neon I400ToARGB_Opt (1937 ms) 64 bit C I400ToARGB_Opt (8957 ms) NEON I400ToARGB_Opt (2147 ms) x86 cI400ToARGB_Opt (1110 ms) AVX2 I400ToARGB_Opt (213 ms) SSE2 I400ToARGB_Opt (225 ms) Bug: libyuv:861, b/156642185 Change-Id: I96b6f4ebba6ff9c4ed8803291ce098de6f93fa4f Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/2209718 Commit-Queue: Frank Barchard <fbarchard@chromium.org> Reviewed-by: richard winterton <rrwinterton@gmail.com>
This commit is contained in:
parent
d426247a3b
commit
da41bca02b
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 1755
|
||||
Version: 1756
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -1548,6 +1548,16 @@ int I420ToAR30Matrix(const uint8_t* src_y,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert I400 (grey) to ARGB. Reverse of ARGBToI400.
|
||||
LIBYUV_API
|
||||
int I400ToARGBMatrix(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert camera sample to ARGB with cropping, rotation and vertical flip.
|
||||
// "sample_size" is needed to parse MJPG.
|
||||
// "dst_stride_argb" number of bytes in a row of the dst_argb plane.
|
||||
|
||||
@ -98,7 +98,6 @@ extern "C" {
|
||||
#define HAS_COPYROW_SSE2
|
||||
#define HAS_H422TOARGBROW_SSSE3
|
||||
#define HAS_HALFFLOATROW_SSE2
|
||||
#define HAS_I400TOARGBROW_SSE2
|
||||
#define HAS_I422TOARGB1555ROW_SSSE3
|
||||
#define HAS_I422TOARGB4444ROW_SSSE3
|
||||
#define HAS_I422TOARGBROW_SSSE3
|
||||
@ -201,7 +200,6 @@ extern "C" {
|
||||
#define HAS_H422TOARGBROW_AVX2
|
||||
#define HAS_HALFFLOATROW_AVX2
|
||||
// #define HAS_HALFFLOATROW_F16C // Enable to test halffloat cast
|
||||
#define HAS_I400TOARGBROW_AVX2
|
||||
#define HAS_I422TOARGB1555ROW_AVX2
|
||||
#define HAS_I422TOARGB4444ROW_AVX2
|
||||
#define HAS_I422TOARGBROW_AVX2
|
||||
@ -275,6 +273,7 @@ extern "C" {
|
||||
#define HAS_HALFMERGEUVROW_SSSE3
|
||||
#define HAS_I210TOAR30ROW_SSSE3
|
||||
#define HAS_I210TOARGBROW_SSSE3
|
||||
#define HAS_I400TOARGBROW_SSE2
|
||||
#define HAS_I422TOAR30ROW_SSSE3
|
||||
#define HAS_MERGERGBROW_SSSE3
|
||||
#define HAS_MIRRORUVROW_AVX2
|
||||
@ -303,6 +302,7 @@ extern "C" {
|
||||
#define HAS_HALFMERGEUVROW_AVX2
|
||||
#define HAS_I210TOAR30ROW_AVX2
|
||||
#define HAS_I210TOARGBROW_AVX2
|
||||
#define HAS_I400TOARGBROW_AVX2
|
||||
#define HAS_I422TOAR30ROW_AVX2
|
||||
#define HAS_I422TOUYVYROW_AVX2
|
||||
#define HAS_I422TOYUY2ROW_AVX2
|
||||
@ -693,6 +693,7 @@ struct YuvConstants {
|
||||
int16_t kUVBiasG[16];
|
||||
int16_t kUVBiasR[16];
|
||||
int16_t kYToRgb[16];
|
||||
int16_t kYBiasToRgb[16];
|
||||
};
|
||||
|
||||
// Offsets into YuvConstants structure
|
||||
@ -703,6 +704,8 @@ struct YuvConstants {
|
||||
#define KUVBIASG 128
|
||||
#define KUVBIASR 160
|
||||
#define KYTORGB 192
|
||||
#define KYBIASTORGB 224
|
||||
|
||||
#endif
|
||||
|
||||
#define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a)-1)))
|
||||
@ -2796,23 +2799,50 @@ void I422ToRGB24Row_Any_AVX2(const uint8_t* y_buf,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
|
||||
void I400ToARGBRow_C(const uint8_t* src_y, uint8_t* rgb_buf, int width);
|
||||
void I400ToARGBRow_SSE2(const uint8_t* y_buf, uint8_t* dst_argb, int width);
|
||||
void I400ToARGBRow_AVX2(const uint8_t* y_buf, uint8_t* dst_argb, int width);
|
||||
void I400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width);
|
||||
void I400ToARGBRow_MSA(const uint8_t* src_y, uint8_t* dst_argb, int width);
|
||||
void I400ToARGBRow_MMI(const uint8_t* src_y, uint8_t* dst_argb, int width);
|
||||
void I400ToARGBRow_C(const uint8_t* src_y,
|
||||
uint8_t* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I400ToARGBRow_SSE2(const uint8_t* y_buf,
|
||||
uint8_t* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I400ToARGBRow_AVX2(const uint8_t* y_buf,
|
||||
uint8_t* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I400ToARGBRow_NEON(const uint8_t* src_y,
|
||||
uint8_t* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I400ToARGBRow_MSA(const uint8_t* src_y,
|
||||
uint8_t* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I400ToARGBRow_MMI(const uint8_t* src_y,
|
||||
uint8_t* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I400ToARGBRow_Any_SSE2(const uint8_t* src_ptr,
|
||||
uint8_t* dst_ptr,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I400ToARGBRow_Any_AVX2(const uint8_t* src_ptr,
|
||||
uint8_t* dst_ptr,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I400ToARGBRow_Any_NEON(const uint8_t* src_ptr,
|
||||
uint8_t* dst_ptr,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I400ToARGBRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void I400ToARGBRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void I400ToARGBRow_Any_MSA(const uint8_t* src_ptr,
|
||||
uint8_t* dst_ptr,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I400ToARGBRow_Any_MMI(const uint8_t* src_ptr,
|
||||
uint8_t* dst_ptr,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
|
||||
// ARGB preattenuated alpha blend.
|
||||
void ARGBBlendRow_SSSE3(const uint8_t* src_argb0,
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 1755
|
||||
#define LIBYUV_VERSION 1756
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
@ -1559,16 +1559,18 @@ int I420AlphaToABGR(const uint8_t* src_y,
|
||||
width, height, attenuate);
|
||||
}
|
||||
|
||||
// Convert I400 to ARGB.
|
||||
// Convert I400 to ARGB with matrix.
|
||||
LIBYUV_API
|
||||
int I400ToARGB(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height) {
|
||||
int I400ToARGBMatrix(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
void (*I400ToARGBRow)(const uint8_t* y_buf, uint8_t* rgb_buf, int width) =
|
||||
void (*I400ToARGBRow)(const uint8_t* y_buf, uint8_t* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants, int width) =
|
||||
I400ToARGBRow_C;
|
||||
if (!src_y || !dst_argb || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
@ -1627,13 +1629,25 @@ int I400ToARGB(const uint8_t* src_y,
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
I400ToARGBRow(src_y, dst_argb, width);
|
||||
I400ToARGBRow(src_y, dst_argb, yuvconstants, width);
|
||||
dst_argb += dst_stride_argb;
|
||||
src_y += src_stride_y;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert I400 to ARGB.
|
||||
LIBYUV_API
|
||||
int I400ToARGB(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height) {
|
||||
return I400ToARGBMatrix(src_y, src_stride_y, dst_argb, dst_stride_argb,
|
||||
&kYuvI601Constants, width, height);
|
||||
}
|
||||
|
||||
// Convert J400 to ARGB.
|
||||
LIBYUV_API
|
||||
int J400ToARGB(const uint8_t* src_y,
|
||||
|
||||
@ -547,12 +547,6 @@ ANY11(J400ToARGBRow_Any_SSE2, J400ToARGBRow_SSE2, 0, 1, 4, 7)
|
||||
#if defined(HAS_J400TOARGBROW_AVX2)
|
||||
ANY11(J400ToARGBRow_Any_AVX2, J400ToARGBRow_AVX2, 0, 1, 4, 15)
|
||||
#endif
|
||||
#if defined(HAS_I400TOARGBROW_SSE2)
|
||||
ANY11(I400ToARGBRow_Any_SSE2, I400ToARGBRow_SSE2, 0, 1, 4, 7)
|
||||
#endif
|
||||
#if defined(HAS_I400TOARGBROW_AVX2)
|
||||
ANY11(I400ToARGBRow_Any_AVX2, I400ToARGBRow_AVX2, 0, 1, 4, 15)
|
||||
#endif
|
||||
#if defined(HAS_RGB24TOARGBROW_SSSE3)
|
||||
ANY11(RGB24ToARGBRow_Any_SSSE3, RGB24ToARGBRow_SSSE3, 0, 3, 4, 15)
|
||||
ANY11(RAWToARGBRow_Any_SSSE3, RAWToARGBRow_SSSE3, 0, 3, 4, 15)
|
||||
@ -582,7 +576,6 @@ ANY11(ARGBToRGB565Row_Any_NEON, ARGBToRGB565Row_NEON, 0, 4, 2, 7)
|
||||
ANY11(ARGBToARGB1555Row_Any_NEON, ARGBToARGB1555Row_NEON, 0, 4, 2, 7)
|
||||
ANY11(ARGBToARGB4444Row_Any_NEON, ARGBToARGB4444Row_NEON, 0, 4, 2, 7)
|
||||
ANY11(J400ToARGBRow_Any_NEON, J400ToARGBRow_NEON, 0, 1, 4, 7)
|
||||
ANY11(I400ToARGBRow_Any_NEON, I400ToARGBRow_NEON, 0, 1, 4, 7)
|
||||
#endif
|
||||
#if defined(HAS_ARGBTORGB24ROW_MSA)
|
||||
ANY11(ARGBToRGB24Row_Any_MSA, ARGBToRGB24Row_MSA, 0, 4, 3, 15)
|
||||
@ -591,7 +584,6 @@ ANY11(ARGBToRGB565Row_Any_MSA, ARGBToRGB565Row_MSA, 0, 4, 2, 7)
|
||||
ANY11(ARGBToARGB1555Row_Any_MSA, ARGBToARGB1555Row_MSA, 0, 4, 2, 7)
|
||||
ANY11(ARGBToARGB4444Row_Any_MSA, ARGBToARGB4444Row_MSA, 0, 4, 2, 7)
|
||||
ANY11(J400ToARGBRow_Any_MSA, J400ToARGBRow_MSA, 0, 1, 4, 15)
|
||||
ANY11(I400ToARGBRow_Any_MSA, I400ToARGBRow_MSA, 0, 1, 4, 15)
|
||||
#endif
|
||||
#if defined(HAS_ARGBTORGB24ROW_MMI)
|
||||
ANY11(ARGBToRGB24Row_Any_MMI, ARGBToRGB24Row_MMI, 0, 4, 3, 3)
|
||||
@ -600,7 +592,6 @@ ANY11(ARGBToRGB565Row_Any_MMI, ARGBToRGB565Row_MMI, 0, 4, 2, 3)
|
||||
ANY11(ARGBToARGB1555Row_Any_MMI, ARGBToARGB1555Row_MMI, 0, 4, 2, 3)
|
||||
ANY11(ARGBToARGB4444Row_Any_MMI, ARGBToARGB4444Row_MMI, 0, 4, 2, 3)
|
||||
ANY11(J400ToARGBRow_Any_MMI, J400ToARGBRow_MMI, 0, 1, 4, 3)
|
||||
ANY11(I400ToARGBRow_Any_MMI, I400ToARGBRow_MMI, 0, 1, 4, 7)
|
||||
#endif
|
||||
#if defined(HAS_RAWTORGB24ROW_NEON)
|
||||
ANY11(RAWToRGB24Row_Any_NEON, RAWToRGB24Row_NEON, 0, 3, 3, 7)
|
||||
@ -920,6 +911,47 @@ ANY11B(ARGBCopyYToAlphaRow_Any_MMI, ARGBCopyYToAlphaRow_MMI, 0, 1, 4, 7)
|
||||
memcpy(dst_ptr + n * BPP, temp + 64, r * BPP); \
|
||||
}
|
||||
|
||||
#if defined(HAS_I400TOARGBROW_SSE2)
|
||||
ANY11P(I400ToARGBRow_Any_SSE2,
|
||||
I400ToARGBRow_SSE2,
|
||||
const struct YuvConstants*,
|
||||
1,
|
||||
4,
|
||||
7)
|
||||
#endif
|
||||
#if defined(HAS_I400TOARGBROW_AVX2)
|
||||
ANY11P(I400ToARGBRow_Any_AVX2,
|
||||
I400ToARGBRow_AVX2,
|
||||
const struct YuvConstants*,
|
||||
1,
|
||||
4,
|
||||
15)
|
||||
#endif
|
||||
#if defined(HAS_I400TOARGBROW_NEON)
|
||||
ANY11P(I400ToARGBRow_Any_NEON,
|
||||
I400ToARGBRow_NEON,
|
||||
const struct YuvConstants*,
|
||||
1,
|
||||
4,
|
||||
7)
|
||||
#endif
|
||||
#if defined(HAS_I400TOARGBROW_MSA)
|
||||
ANY11P(I400ToARGBRow_Any_MSA,
|
||||
I400ToARGBRow_MSA,
|
||||
const struct YuvConstants*,
|
||||
1,
|
||||
4,
|
||||
15)
|
||||
#endif
|
||||
#if defined(HAS_I400TOARGBROW_MMI)
|
||||
ANY11P(I400ToARGBRow_Any_MMI,
|
||||
I400ToARGBRow_MMI,
|
||||
const struct YuvConstants*,
|
||||
1,
|
||||
4,
|
||||
7)
|
||||
#endif
|
||||
|
||||
#if defined(HAS_ARGBTORGB565DITHERROW_SSE2)
|
||||
ANY11P(ARGBToRGB565DitherRow_Any_SSE2,
|
||||
ARGBToRGB565DitherRow_SSE2,
|
||||
|
||||
@ -1353,26 +1353,26 @@ const struct YuvConstants SIMD_ALIGNED(kYuvI601Constants) = {
|
||||
{-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
|
||||
{UG, VG, UG, VG, UG, VG, UG, VG},
|
||||
{UG, VG, UG, VG, UG, VG, UG, VG},
|
||||
{BB, BG, BR, 0, 0, 0, 0, 0},
|
||||
{0x0101 * YG, 0, 0, 0}};
|
||||
{BB, BG, BR, YGB, 0, 0, 0, 0},
|
||||
{0x0101 * YG, YG, 0, 0}};
|
||||
const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants) = {
|
||||
{-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
|
||||
{-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
|
||||
{VG, UG, VG, UG, VG, UG, VG, UG},
|
||||
{VG, UG, VG, UG, VG, UG, VG, UG},
|
||||
{BR, BG, BB, 0, 0, 0, 0, 0},
|
||||
{0x0101 * YG, 0, 0, 0}};
|
||||
{BR, BG, BB, YGB, 0, 0, 0, 0},
|
||||
{0x0101 * YG, YG, 0, 0}};
|
||||
#elif defined(__arm__) // 32 bit arm
|
||||
const struct YuvConstants SIMD_ALIGNED(kYuvI601Constants) = {
|
||||
{-UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{BB, BG, BR, 0, 0, 0, 0, 0},
|
||||
{0x0101 * YG, 0, 0, 0}};
|
||||
{BB, BG, BR, YGB, 0, 0, 0, 0},
|
||||
{0x0101 * YG, YG, 0, 0}};
|
||||
const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants) = {
|
||||
{-VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{BR, BG, BB, 0, 0, 0, 0, 0},
|
||||
{0x0101 * YG, 0, 0, 0}};
|
||||
{BR, BG, BB, YGB, 0, 0, 0, 0},
|
||||
{0x0101 * YG, YG, 0, 0}};
|
||||
#else
|
||||
const struct YuvConstants SIMD_ALIGNED(kYuvI601Constants) = {
|
||||
{UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
|
||||
@ -1384,7 +1384,9 @@ const struct YuvConstants SIMD_ALIGNED(kYuvI601Constants) = {
|
||||
{BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
|
||||
{BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
|
||||
{BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
|
||||
{YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}};
|
||||
{YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG},
|
||||
{YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB,
|
||||
YGB}};
|
||||
const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants) = {
|
||||
{VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0,
|
||||
VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0},
|
||||
@ -1395,7 +1397,9 @@ const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants) = {
|
||||
{BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
|
||||
{BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
|
||||
{BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
|
||||
{YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}};
|
||||
{YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG},
|
||||
{YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB,
|
||||
YGB}};
|
||||
#endif
|
||||
|
||||
#undef BB
|
||||
@ -1434,26 +1438,26 @@ const struct YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = {
|
||||
{-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
|
||||
{UG, VG, UG, VG, UG, VG, UG, VG},
|
||||
{UG, VG, UG, VG, UG, VG, UG, VG},
|
||||
{BB, BG, BR, 0, 0, 0, 0, 0},
|
||||
{0x0101 * YG, 0, 0, 0}};
|
||||
{BB, BG, BR, YGB, 0, 0, 0, 0},
|
||||
{0x0101 * YG, YG, 0, 0}};
|
||||
const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = {
|
||||
{-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
|
||||
{-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
|
||||
{VG, UG, VG, UG, VG, UG, VG, UG},
|
||||
{VG, UG, VG, UG, VG, UG, VG, UG},
|
||||
{BR, BG, BB, 0, 0, 0, 0, 0},
|
||||
{0x0101 * YG, 0, 0, 0}};
|
||||
{BR, BG, BB, YGB, 0, 0, 0, 0},
|
||||
{0x0101 * YG, YG, 0, 0}};
|
||||
#elif defined(__arm__)
|
||||
const struct YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = {
|
||||
{-UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{BB, BG, BR, 0, 0, 0, 0, 0},
|
||||
{0x0101 * YG, 0, 0, 0}};
|
||||
{BB, BG, BR, YGB, 0, 0, 0, 0},
|
||||
{0x0101 * YG, YG, 0, 0}};
|
||||
const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = {
|
||||
{-VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{BR, BG, BB, 0, 0, 0, 0, 0},
|
||||
{0x0101 * YG, 0, 0, 0}};
|
||||
{BR, BG, BB, YGB, 0, 0, 0, 0},
|
||||
{0x0101 * YG, YG, 0, 0}};
|
||||
#else
|
||||
const struct YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = {
|
||||
{UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
|
||||
@ -1465,7 +1469,9 @@ const struct YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = {
|
||||
{BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
|
||||
{BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
|
||||
{BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
|
||||
{YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}};
|
||||
{YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG},
|
||||
{YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB,
|
||||
YGB}};
|
||||
const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = {
|
||||
{VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0,
|
||||
VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0},
|
||||
@ -1476,7 +1482,9 @@ const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = {
|
||||
{BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
|
||||
{BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
|
||||
{BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
|
||||
{YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}};
|
||||
{YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG},
|
||||
{YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB,
|
||||
YGB}};
|
||||
#endif
|
||||
|
||||
#undef BB
|
||||
@ -1517,26 +1525,26 @@ const struct YuvConstants SIMD_ALIGNED(kYuvH709Constants) = {
|
||||
{-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
|
||||
{UG, VG, UG, VG, UG, VG, UG, VG},
|
||||
{UG, VG, UG, VG, UG, VG, UG, VG},
|
||||
{BB, BG, BR, 0, 0, 0, 0, 0},
|
||||
{0x0101 * YG, 0, 0, 0}};
|
||||
{BB, BG, BR, YGB, 0, 0, 0, 0},
|
||||
{0x0101 * YG, YG, 0, 0}};
|
||||
const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants) = {
|
||||
{-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
|
||||
{-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
|
||||
{VG, UG, VG, UG, VG, UG, VG, UG},
|
||||
{VG, UG, VG, UG, VG, UG, VG, UG},
|
||||
{BR, BG, BB, 0, 0, 0, 0, 0},
|
||||
{0x0101 * YG, 0, 0, 0}};
|
||||
{BR, BG, BB, YGB, 0, 0, 0, 0},
|
||||
{0x0101 * YG, YG, 0, 0}};
|
||||
#elif defined(__arm__)
|
||||
const struct YuvConstants SIMD_ALIGNED(kYuvH709Constants) = {
|
||||
{-UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{BB, BG, BR, 0, 0, 0, 0, 0},
|
||||
{0x0101 * YG, 0, 0, 0}};
|
||||
{BB, BG, BR, YGB, 0, 0, 0, 0},
|
||||
{0x0101 * YG, YG, 0, 0}};
|
||||
const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants) = {
|
||||
{-VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{BR, BG, BB, 0, 0, 0, 0, 0},
|
||||
{0x0101 * YG, 0, 0, 0}};
|
||||
{BR, BG, BB, YGB, 0, 0, 0, 0},
|
||||
{0x0101 * YG, YG, 0, 0}};
|
||||
#else
|
||||
const struct YuvConstants SIMD_ALIGNED(kYuvH709Constants) = {
|
||||
{UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
|
||||
@ -1548,7 +1556,9 @@ const struct YuvConstants SIMD_ALIGNED(kYuvH709Constants) = {
|
||||
{BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
|
||||
{BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
|
||||
{BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
|
||||
{YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}};
|
||||
{YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG},
|
||||
{YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB,
|
||||
YGB}};
|
||||
const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants) = {
|
||||
{VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0,
|
||||
VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0},
|
||||
@ -1559,7 +1569,9 @@ const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants) = {
|
||||
{BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
|
||||
{BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
|
||||
{BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
|
||||
{YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}};
|
||||
{YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG},
|
||||
{YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB,
|
||||
YGB}};
|
||||
#endif
|
||||
|
||||
#undef BB
|
||||
@ -1598,26 +1610,26 @@ const struct YuvConstants SIMD_ALIGNED(kYuv2020Constants) = {
|
||||
{-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
|
||||
{UG, VG, UG, VG, UG, VG, UG, VG},
|
||||
{UG, VG, UG, VG, UG, VG, UG, VG},
|
||||
{BB, BG, BR, 0, 0, 0, 0, 0},
|
||||
{0x0101 * YG, 0, 0, 0}};
|
||||
{BB, BG, BR, YGB, 0, 0, 0, 0},
|
||||
{0x0101 * YG, YG, 0, 0}};
|
||||
const struct YuvConstants SIMD_ALIGNED(kYvu2020Constants) = {
|
||||
{-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
|
||||
{-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
|
||||
{VG, UG, VG, UG, VG, UG, VG, UG},
|
||||
{VG, UG, VG, UG, VG, UG, VG, UG},
|
||||
{BR, BG, BB, 0, 0, 0, 0, 0},
|
||||
{0x0101 * YG, 0, 0, 0}};
|
||||
{BR, BG, BB, YGB, 0, 0, 0, 0},
|
||||
{0x0101 * YG, YG, 0, 0}};
|
||||
#elif defined(__arm__)
|
||||
const struct YuvConstants SIMD_ALIGNED(kYuv2020Constants) = {
|
||||
{-UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{BB, BG, BR, 0, 0, 0, 0, 0},
|
||||
{0x0101 * YG, 0, 0, 0}};
|
||||
{BB, BG, BR, YGB, 0, 0, 0, 0},
|
||||
{0x0101 * YG, YG, 0, 0}};
|
||||
const struct YuvConstants SIMD_ALIGNED(kYvu2020Constants) = {
|
||||
{-VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{BR, BG, BB, 0, 0, 0, 0, 0},
|
||||
{0x0101 * YG, 0, 0, 0}};
|
||||
{BR, BG, BB, YGB, 0, 0, 0, 0},
|
||||
{0x0101 * YG, YG, 0, 0}};
|
||||
#else
|
||||
const struct YuvConstants SIMD_ALIGNED(kYuv2020Constants) = {
|
||||
{UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
|
||||
@ -1629,7 +1641,9 @@ const struct YuvConstants SIMD_ALIGNED(kYuv2020Constants) = {
|
||||
{BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
|
||||
{BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
|
||||
{BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
|
||||
{YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}};
|
||||
{YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG},
|
||||
{YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB,
|
||||
YGB}};
|
||||
const struct YuvConstants SIMD_ALIGNED(kYvu2020Constants) = {
|
||||
{VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0,
|
||||
VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0},
|
||||
@ -1640,7 +1654,9 @@ const struct YuvConstants SIMD_ALIGNED(kYvu2020Constants) = {
|
||||
{BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
|
||||
{BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
|
||||
{BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
|
||||
{YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}};
|
||||
{YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG},
|
||||
{YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB,
|
||||
YGB}};
|
||||
#endif
|
||||
|
||||
#undef BB
|
||||
@ -1655,7 +1671,6 @@ const struct YuvConstants SIMD_ALIGNED(kYvu2020Constants) = {
|
||||
|
||||
// C reference code that mimics the YUV assembly.
|
||||
// Reads 8 bit YUV and leaves result as 16 bit.
|
||||
|
||||
static __inline void YuvPixel(uint8_t y,
|
||||
uint8_t u,
|
||||
uint8_t v,
|
||||
@ -1671,7 +1686,7 @@ static __inline void YuvPixel(uint8_t y,
|
||||
int bb = yuvconstants->kUVBiasBGR[0];
|
||||
int bg = yuvconstants->kUVBiasBGR[1];
|
||||
int br = yuvconstants->kUVBiasBGR[2];
|
||||
int yg = yuvconstants->kYToRgb[0] / 0x0101;
|
||||
int yg = yuvconstants->kYToRgb[1];
|
||||
#elif defined(__arm__)
|
||||
int ub = -yuvconstants->kUVToRB[0];
|
||||
int ug = yuvconstants->kUVToG[0];
|
||||
@ -1680,7 +1695,7 @@ static __inline void YuvPixel(uint8_t y,
|
||||
int bb = yuvconstants->kUVBiasBGR[0];
|
||||
int bg = yuvconstants->kUVBiasBGR[1];
|
||||
int br = yuvconstants->kUVBiasBGR[2];
|
||||
int yg = yuvconstants->kYToRgb[0] / 0x0101;
|
||||
int yg = yuvconstants->kYToRgb[1];
|
||||
#else
|
||||
int ub = yuvconstants->kUVToB[0];
|
||||
int ug = yuvconstants->kUVToG[0];
|
||||
@ -1714,7 +1729,7 @@ static __inline void YuvPixel8_16(uint8_t y,
|
||||
int bb = yuvconstants->kUVBiasBGR[0];
|
||||
int bg = yuvconstants->kUVBiasBGR[1];
|
||||
int br = yuvconstants->kUVBiasBGR[2];
|
||||
int yg = yuvconstants->kYToRgb[0] / 0x0101;
|
||||
int yg = yuvconstants->kYToRgb[1];
|
||||
#elif defined(__arm__)
|
||||
int ub = -yuvconstants->kUVToRB[0];
|
||||
int ug = yuvconstants->kUVToG[0];
|
||||
@ -1723,7 +1738,7 @@ static __inline void YuvPixel8_16(uint8_t y,
|
||||
int bb = yuvconstants->kUVBiasBGR[0];
|
||||
int bg = yuvconstants->kUVBiasBGR[1];
|
||||
int br = yuvconstants->kUVBiasBGR[2];
|
||||
int yg = yuvconstants->kYToRgb[0] / 0x0101;
|
||||
int yg = yuvconstants->kYToRgb[1];
|
||||
#else
|
||||
int ub = yuvconstants->kUVToB[0];
|
||||
int ug = yuvconstants->kUVToG[0];
|
||||
@ -1758,7 +1773,7 @@ static __inline void YuvPixel16(int16_t y,
|
||||
int bb = yuvconstants->kUVBiasBGR[0];
|
||||
int bg = yuvconstants->kUVBiasBGR[1];
|
||||
int br = yuvconstants->kUVBiasBGR[2];
|
||||
int yg = yuvconstants->kYToRgb[0] / 0x0101;
|
||||
int yg = yuvconstants->kYToRgb[1];
|
||||
#elif defined(__arm__)
|
||||
int ub = -yuvconstants->kUVToRB[0];
|
||||
int ug = yuvconstants->kUVToG[0];
|
||||
@ -1767,7 +1782,7 @@ static __inline void YuvPixel16(int16_t y,
|
||||
int bb = yuvconstants->kUVBiasBGR[0];
|
||||
int bg = yuvconstants->kUVBiasBGR[1];
|
||||
int br = yuvconstants->kUVBiasBGR[2];
|
||||
int yg = yuvconstants->kYToRgb[0] / 0x0101;
|
||||
int yg = yuvconstants->kYToRgb[1];
|
||||
#else
|
||||
int ub = yuvconstants->kUVToB[0];
|
||||
int ug = yuvconstants->kUVToG[0];
|
||||
@ -1805,21 +1820,26 @@ static __inline void YuvPixel10(uint16_t y,
|
||||
*r = Clamp(r16 >> 6);
|
||||
}
|
||||
|
||||
// Y contribution to R,G,B. Scale and bias.
|
||||
#define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */
|
||||
#define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
|
||||
|
||||
// C reference code that mimics the YUV assembly.
|
||||
static __inline void YPixel(uint8_t y, uint8_t* b, uint8_t* g, uint8_t* r) {
|
||||
uint32_t y1 = (uint32_t)(y * 0x0101 * YG) >> 16;
|
||||
*b = Clamp((int32_t)(y1 + YGB) >> 6);
|
||||
*g = Clamp((int32_t)(y1 + YGB) >> 6);
|
||||
*r = Clamp((int32_t)(y1 + YGB) >> 6);
|
||||
// Reads 8 bit YUV and leaves result as 16 bit.
|
||||
static __inline void YPixel(uint8_t y,
|
||||
uint8_t* b,
|
||||
uint8_t* g,
|
||||
uint8_t* r,
|
||||
const struct YuvConstants* yuvconstants) {
|
||||
#if defined(__aarch64__) || defined(__arm__)
|
||||
int ygb = yuvconstants->kUVBiasBGR[3];
|
||||
int yg = yuvconstants->kYToRgb[1];
|
||||
#else
|
||||
int ygb = yuvconstants->kYBiasToRgb[0];
|
||||
int yg = yuvconstants->kYToRgb[0];
|
||||
#endif
|
||||
uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16;
|
||||
*b = Clamp(((int32_t)(y1) + ygb) >> 6);
|
||||
*g = Clamp(((int32_t)(y1) + ygb) >> 6);
|
||||
*r = Clamp(((int32_t)(y1) + ygb) >> 6);
|
||||
}
|
||||
|
||||
#undef YG
|
||||
#undef YGB
|
||||
|
||||
#if !defined(LIBYUV_DISABLE_NEON) && \
|
||||
(defined(__ARM_NEON__) || defined(__aarch64__) || defined(LIBYUV_NEON))
|
||||
// C mimic assembly.
|
||||
@ -2353,18 +2373,21 @@ void I422ToRGBARow_C(const uint8_t* src_y,
|
||||
}
|
||||
}
|
||||
|
||||
void I400ToARGBRow_C(const uint8_t* src_y, uint8_t* rgb_buf, int width) {
|
||||
void I400ToARGBRow_C(const uint8_t* src_y,
|
||||
uint8_t* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
int x;
|
||||
for (x = 0; x < width - 1; x += 2) {
|
||||
YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
|
||||
YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
|
||||
rgb_buf[3] = 255;
|
||||
YPixel(src_y[1], rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
|
||||
YPixel(src_y[1], rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants);
|
||||
rgb_buf[7] = 255;
|
||||
src_y += 2;
|
||||
rgb_buf += 8; // Advance 2 pixels.
|
||||
}
|
||||
if (width & 1) {
|
||||
YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
|
||||
YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
|
||||
rgb_buf[3] = 255;
|
||||
}
|
||||
}
|
||||
|
||||
@ -3089,16 +3089,14 @@ void OMITFP UYVYToARGBRow_AVX2(const uint8_t* uyvy_buf,
|
||||
#endif // HAS_UYVYTOARGBROW_AVX2
|
||||
|
||||
#ifdef HAS_I400TOARGBROW_SSE2
|
||||
void I400ToARGBRow_SSE2(const uint8_t* y_buf, uint8_t* dst_argb, int width) {
|
||||
void I400ToARGBRow_SSE2(const uint8_t* y_buf,
|
||||
uint8_t* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile(
|
||||
"mov $0x4a354a35,%%eax \n" // 4a35 = 18997 = 1.164
|
||||
"movd %%eax,%%xmm2 \n"
|
||||
"pshufd $0x0,%%xmm2,%%xmm2 \n"
|
||||
"mov $0x04880488,%%eax \n" // 0488 = 1160 = 1.164 *
|
||||
// 16
|
||||
"movd %%eax,%%xmm3 \n"
|
||||
"pshufd $0x0,%%xmm3,%%xmm3 \n"
|
||||
"pcmpeqb %%xmm4,%%xmm4 \n"
|
||||
"movdqa 192(%3),%%xmm2 \n" // yg = 18997 = 1.164
|
||||
"movdqa 224(%3),%%xmm3 \n" // ygb = 1160 = 1.164 * 16
|
||||
"pcmpeqb %%xmm4,%%xmm4 \n" // 0xff000000
|
||||
"pslld $0x18,%%xmm4 \n"
|
||||
|
||||
LABELALIGN
|
||||
@ -3108,8 +3106,8 @@ void I400ToARGBRow_SSE2(const uint8_t* y_buf, uint8_t* dst_argb, int width) {
|
||||
"lea 0x8(%0),%0 \n"
|
||||
"punpcklbw %%xmm0,%%xmm0 \n"
|
||||
"pmulhuw %%xmm2,%%xmm0 \n"
|
||||
"psubusw %%xmm3,%%xmm0 \n"
|
||||
"psrlw $6, %%xmm0 \n"
|
||||
"paddsw %%xmm3,%%xmm0 \n"
|
||||
"psraw $6, %%xmm0 \n"
|
||||
"packuswb %%xmm0,%%xmm0 \n"
|
||||
|
||||
// Step 2: Weave into ARGB
|
||||
@ -3125,27 +3123,26 @@ void I400ToARGBRow_SSE2(const uint8_t* y_buf, uint8_t* dst_argb, int width) {
|
||||
|
||||
"sub $0x8,%2 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(y_buf), // %0
|
||||
"+r"(dst_argb), // %1
|
||||
"+rm"(width) // %2
|
||||
:
|
||||
: "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4");
|
||||
: "+r"(y_buf), // %0
|
||||
"+r"(dst_argb), // %1
|
||||
"+rm"(width) // %2
|
||||
: "r"(yuvconstants) // %3
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4");
|
||||
}
|
||||
#endif // HAS_I400TOARGBROW_SSE2
|
||||
|
||||
#ifdef HAS_I400TOARGBROW_AVX2
|
||||
// 16 pixels of Y converted to 16 pixels of ARGB (64 bytes).
|
||||
// note: vpunpcklbw mutates and vpackuswb unmutates.
|
||||
void I400ToARGBRow_AVX2(const uint8_t* y_buf, uint8_t* dst_argb, int width) {
|
||||
void I400ToARGBRow_AVX2(const uint8_t* y_buf,
|
||||
uint8_t* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile(
|
||||
"mov $0x4a354a35,%%eax \n" // 0488 = 1160 = 1.164 *
|
||||
"vmovdqa 192(%3),%%ymm2 \n" // yg = 18997 = 1.164
|
||||
"vmovdqa 224(%3),%%ymm3 \n" // ygb = -1160 = 1.164 *
|
||||
// 16
|
||||
"vmovd %%eax,%%xmm2 \n"
|
||||
"vbroadcastss %%xmm2,%%ymm2 \n"
|
||||
"mov $0x4880488,%%eax \n" // 4a35 = 18997 = 1.164
|
||||
"vmovd %%eax,%%xmm3 \n"
|
||||
"vbroadcastss %%xmm3,%%ymm3 \n"
|
||||
"vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n"
|
||||
"vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n" // 0xff000000
|
||||
"vpslld $0x18,%%ymm4,%%ymm4 \n"
|
||||
|
||||
LABELALIGN
|
||||
@ -3156,8 +3153,8 @@ void I400ToARGBRow_AVX2(const uint8_t* y_buf, uint8_t* dst_argb, int width) {
|
||||
"vpermq $0xd8,%%ymm0,%%ymm0 \n"
|
||||
"vpunpcklbw %%ymm0,%%ymm0,%%ymm0 \n"
|
||||
"vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n"
|
||||
"vpsubusw %%ymm3,%%ymm0,%%ymm0 \n"
|
||||
"vpsrlw $0x6,%%ymm0,%%ymm0 \n"
|
||||
"vpaddsw %%ymm3,%%ymm0,%%ymm0 \n"
|
||||
"vpsraw $0x6,%%ymm0,%%ymm0 \n"
|
||||
"vpackuswb %%ymm0,%%ymm0,%%ymm0 \n"
|
||||
"vpunpcklbw %%ymm0,%%ymm0,%%ymm1 \n"
|
||||
"vpermq $0xd8,%%ymm1,%%ymm1 \n"
|
||||
@ -3167,15 +3164,15 @@ void I400ToARGBRow_AVX2(const uint8_t* y_buf, uint8_t* dst_argb, int width) {
|
||||
"vpor %%ymm4,%%ymm1,%%ymm1 \n"
|
||||
"vmovdqu %%ymm0,(%1) \n"
|
||||
"vmovdqu %%ymm1,0x20(%1) \n"
|
||||
"lea 0x40(%1),%1 \n"
|
||||
"lea 0x40(%1),%1 \n"
|
||||
"sub $0x10,%2 \n"
|
||||
"jg 1b \n"
|
||||
"vzeroupper \n"
|
||||
: "+r"(y_buf), // %0
|
||||
"+r"(dst_argb), // %1
|
||||
"+rm"(width) // %2
|
||||
:
|
||||
: "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4");
|
||||
: "+r"(y_buf), // %0
|
||||
"+r"(dst_argb), // %1
|
||||
"+rm"(width) // %2
|
||||
: "r"(yuvconstants) // %3
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4");
|
||||
}
|
||||
#endif // HAS_I400TOARGBROW_AVX2
|
||||
|
||||
|
||||
@ -4781,7 +4781,9 @@ void J400ToARGBRow_MMI(const uint8_t* src_y, uint8_t* dst_argb, int width) {
|
||||
: "memory");
|
||||
}
|
||||
|
||||
void I400ToARGBRow_MMI(const uint8_t* src_y, uint8_t* rgb_buf, int width) {
|
||||
// TODO - respect YuvConstants
|
||||
void I400ToARGBRow_MMI(const uint8_t* src_y, uint8_t* rgb_buf,
|
||||
const struct YuvConstants*, int width) {
|
||||
uint64_t src, src_lo, src_hi, dest, dest_lo, dest_hi;
|
||||
const uint64_t mask0 = 0x0;
|
||||
const uint64_t mask1 = 0x55;
|
||||
|
||||
@ -2735,7 +2735,11 @@ void I444ToARGBRow_MSA(const uint8_t* src_y,
|
||||
}
|
||||
}
|
||||
|
||||
void I400ToARGBRow_MSA(const uint8_t* src_y, uint8_t* dst_argb, int width) {
|
||||
// TODO - respect YuvConstants
|
||||
void I400ToARGBRow_MSA(const uint8_t* src_y,
|
||||
uint8_t* dst_argb,
|
||||
const struct YuvConstants*,
|
||||
int width) {
|
||||
int x;
|
||||
v16u8 src0, res0, res1, res2, res3, res4, dst0, dst1, dst2, dst3;
|
||||
v8i16 vec0, vec1;
|
||||
|
||||
@ -344,7 +344,10 @@ void I422ToARGB4444Row_NEON(const uint8_t* src_y,
|
||||
"q12", "q13", "q14", "q15");
|
||||
}
|
||||
|
||||
void I400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width) {
|
||||
void I400ToARGBRow_NEON(const uint8_t* src_y,
|
||||
uint8_t* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile(
|
||||
YUVTORGB_SETUP
|
||||
"vmov.u8 d23, #255 \n"
|
||||
@ -355,10 +358,10 @@ void I400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width) {
|
||||
: "+r"(src_y), // %0
|
||||
"+r"(dst_argb), // %1
|
||||
"+r"(width) // %2
|
||||
: [kUVToRB] "r"(&kYuvI601Constants.kUVToRB),
|
||||
[kUVToG] "r"(&kYuvI601Constants.kUVToG),
|
||||
[kUVBiasBGR] "r"(&kYuvI601Constants.kUVBiasBGR),
|
||||
[kYToRgb] "r"(&kYuvI601Constants.kYToRgb)
|
||||
: [kUVToRB] "r"(&yuvconstants->kUVToRB),
|
||||
[kUVToG] "r"(&yuvconstants->kUVToG),
|
||||
[kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR),
|
||||
[kYToRgb] "r"(&yuvconstants->kYToRgb)
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11",
|
||||
"q12", "q13", "q14", "q15");
|
||||
}
|
||||
|
||||
@ -397,7 +397,10 @@ void I422ToARGB4444Row_NEON(const uint8_t* src_y,
|
||||
);
|
||||
}
|
||||
|
||||
void I400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width) {
|
||||
void I400ToARGBRow_NEON(const uint8_t* src_y,
|
||||
uint8_t* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
YUVTORGB_SETUP
|
||||
"movi v23.8b, #255 \n"
|
||||
@ -411,10 +414,10 @@ void I400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width) {
|
||||
: "+r"(src_y), // %0
|
||||
"+r"(dst_argb), // %1
|
||||
"+r"(width) // %2
|
||||
: [kUVToRB]"r"(&kYuvI601Constants.kUVToRB),
|
||||
[kUVToG]"r"(&kYuvI601Constants.kUVToG),
|
||||
[kUVBiasBGR]"r"(&kYuvI601Constants.kUVBiasBGR),
|
||||
[kYToRgb]"r"(&kYuvI601Constants.kYToRgb)
|
||||
: [kUVToRB]"r"(&yuvconstants->kUVToRB),
|
||||
[kUVToG]"r"(&yuvconstants->kUVToG),
|
||||
[kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
|
||||
[kYToRgb]"r"(&yuvconstants->kYToRgb)
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
|
||||
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
|
||||
);
|
||||
|
||||
@ -2900,10 +2900,12 @@ __declspec(naked) void I422ToRGBARow_SSSE3(
|
||||
}
|
||||
#endif // HAS_I422TOARGBROW_SSSE3
|
||||
|
||||
// I400ToARGBRow_SSE2 is disabled due to new yuvconstant parameter
|
||||
#ifdef HAS_I400TOARGBROW_SSE2
|
||||
// 8 pixels of Y converted to 8 pixels of ARGB (32 bytes).
|
||||
__declspec(naked) void I400ToARGBRow_SSE2(const uint8_t* y_buf,
|
||||
uint8_t* rgb_buf,
|
||||
const struct YuvConstants*,
|
||||
int width) {
|
||||
__asm {
|
||||
mov eax, 0x4a354a35 // 4a35 = 18997 = round(1.164 * 64 * 256)
|
||||
@ -2951,6 +2953,7 @@ __declspec(naked) void I400ToARGBRow_SSE2(const uint8_t* y_buf,
|
||||
// note: vpunpcklbw mutates and vpackuswb unmutates.
|
||||
__declspec(naked) void I400ToARGBRow_AVX2(const uint8_t* y_buf,
|
||||
uint8_t* rgb_buf,
|
||||
const struct YuvConstants*,
|
||||
int width) {
|
||||
__asm {
|
||||
mov eax, 0x4a354a35 // 4a35 = 18997 = round(1.164 * 64 * 256)
|
||||
|
||||
@ -3140,4 +3140,66 @@ TEST_F(LibYUVConvertTest, TestARGBToRGB24) {
|
||||
free_aligned_buffer_page_end(dest_rgb24);
|
||||
}
|
||||
|
||||
// Test I400 with jpeg matrix is same as J400
|
||||
TEST_F(LibYUVConvertTest, TestI400) {
|
||||
const int kSize = 256;
|
||||
align_buffer_page_end(orig_i400, kSize);
|
||||
align_buffer_page_end(argb_pixels_i400, kSize * 4);
|
||||
align_buffer_page_end(argb_pixels_j400, kSize * 4);
|
||||
align_buffer_page_end(argb_pixels_jpeg_i400, kSize * 4);
|
||||
align_buffer_page_end(argb_pixels_h709_i400, kSize * 4);
|
||||
align_buffer_page_end(argb_pixels_2020_i400, kSize * 4);
|
||||
|
||||
// Test grey scale
|
||||
for (int i = 0; i < kSize; ++i) {
|
||||
orig_i400[i] = i;
|
||||
}
|
||||
|
||||
J400ToARGB(orig_i400, 0, argb_pixels_j400, 0, kSize, 1);
|
||||
I400ToARGB(orig_i400, 0, argb_pixels_i400, 0, kSize, 1);
|
||||
I400ToARGBMatrix(orig_i400, 0, argb_pixels_jpeg_i400, 0, &kYuvJPEGConstants,
|
||||
kSize, 1);
|
||||
I400ToARGBMatrix(orig_i400, 0, argb_pixels_h709_i400, 0, &kYuvH709Constants,
|
||||
kSize, 1);
|
||||
I400ToARGBMatrix(orig_i400, 0, argb_pixels_2020_i400, 0, &kYuv2020Constants,
|
||||
kSize, 1);
|
||||
|
||||
EXPECT_EQ(0, argb_pixels_i400[0]);
|
||||
EXPECT_EQ(0, argb_pixels_j400[0]);
|
||||
EXPECT_EQ(0, argb_pixels_jpeg_i400[0]);
|
||||
EXPECT_EQ(0, argb_pixels_h709_i400[0]);
|
||||
EXPECT_EQ(0, argb_pixels_2020_i400[0]);
|
||||
EXPECT_EQ(0, argb_pixels_i400[16 * 4]);
|
||||
EXPECT_EQ(16, argb_pixels_j400[16 * 4]);
|
||||
EXPECT_EQ(16, argb_pixels_jpeg_i400[16 * 4]);
|
||||
EXPECT_EQ(0, argb_pixels_h709_i400[16 * 4]);
|
||||
EXPECT_EQ(0, argb_pixels_2020_i400[16 * 4]);
|
||||
EXPECT_EQ(130, argb_pixels_i400[128 * 4]);
|
||||
EXPECT_EQ(128, argb_pixels_j400[128 * 4]);
|
||||
EXPECT_EQ(128, argb_pixels_jpeg_i400[128 * 4]);
|
||||
EXPECT_EQ(130, argb_pixels_h709_i400[128 * 4]);
|
||||
EXPECT_EQ(130, argb_pixels_2020_i400[128 * 4]);
|
||||
EXPECT_EQ(255, argb_pixels_i400[255 * 4]);
|
||||
EXPECT_EQ(255, argb_pixels_j400[255 * 4]);
|
||||
EXPECT_EQ(255, argb_pixels_jpeg_i400[255 * 4]);
|
||||
EXPECT_EQ(255, argb_pixels_h709_i400[255 * 4]);
|
||||
EXPECT_EQ(255, argb_pixels_2020_i400[255 * 4]);
|
||||
|
||||
for (int i = 0; i < kSize * 4; ++i) {
|
||||
if ((i & 3) == 3) {
|
||||
EXPECT_EQ(255, argb_pixels_j400[i]);
|
||||
} else {
|
||||
EXPECT_EQ(i / 4, argb_pixels_j400[i]);
|
||||
}
|
||||
EXPECT_EQ(argb_pixels_jpeg_i400[i], argb_pixels_j400[i]);
|
||||
}
|
||||
|
||||
free_aligned_buffer_page_end(orig_i400);
|
||||
free_aligned_buffer_page_end(argb_pixels_i400);
|
||||
free_aligned_buffer_page_end(argb_pixels_j400);
|
||||
free_aligned_buffer_page_end(argb_pixels_jpeg_i400);
|
||||
free_aligned_buffer_page_end(argb_pixels_h709_i400);
|
||||
free_aligned_buffer_page_end(argb_pixels_2020_i400);
|
||||
}
|
||||
|
||||
} // namespace libyuv
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user