mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2026-04-30 19:09:18 +08:00
Unify X86/X64 versions of ARGBToI4xxMatrix functions
Change-Id: Iead13414414543e5f10ba9ba47a6ceaeb3113dee Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/7562443 Reviewed-by: Frank Barchard <fbarchard@chromium.org> Commit-Queue: Dale Curtis <dalecurtis@chromium.org> Reviewed-by: Wan-Teh Chang <wtc@google.com>
This commit is contained in:
parent
f69a479f04
commit
b1cacfb38f
@ -23,7 +23,7 @@ extern "C" {
|
||||
// (bgra in memory) you'll want to use the kArgb* constants. Alternatively, if
|
||||
// your input is ABGR little endian (rgba in memory) you'd use the kAbgr* ones.
|
||||
//
|
||||
// Conversion matrix for RGB to YUV.
|
||||
// Conversion matrix for xRGB to YUV.
|
||||
LIBYUV_API extern const struct ArgbConstants kArgbI601Constants; // BT.601
|
||||
LIBYUV_API extern const struct ArgbConstants kArgbJPEGConstants; // BT.601 full
|
||||
LIBYUV_API extern const struct ArgbConstants kArgbH709Constants; // BT.709
|
||||
@ -32,7 +32,7 @@ LIBYUV_API extern const struct ArgbConstants kArgbU2020Constants; // BT.2020
|
||||
LIBYUV_API extern const struct ArgbConstants
|
||||
kArgbV2020Constants; // BT.2020 full
|
||||
|
||||
// Conversion matrix for BGR to YUV.
|
||||
// Conversion matrix for xBGR to YUV.
|
||||
LIBYUV_API extern const struct ArgbConstants kAbgrI601Constants; // BT.601
|
||||
LIBYUV_API extern const struct ArgbConstants kAbgrJPEGConstants; // BT.601 full
|
||||
LIBYUV_API extern const struct ArgbConstants kAbgrH709Constants; // BT.709
|
||||
@ -41,6 +41,24 @@ LIBYUV_API extern const struct ArgbConstants kAbgrU2020Constants; // BT.2020
|
||||
LIBYUV_API extern const struct ArgbConstants
|
||||
kAbgrV2020Constants; // BT.2020 full
|
||||
|
||||
// Conversion matrix for RGBx to YUV.
|
||||
LIBYUV_API extern const struct ArgbConstants kRgbaI601Constants; // BT.601
|
||||
LIBYUV_API extern const struct ArgbConstants kRgbaJPEGConstants; // BT.601 full
|
||||
LIBYUV_API extern const struct ArgbConstants kRgbaH709Constants; // BT.709
|
||||
LIBYUV_API extern const struct ArgbConstants kRgbaF709Constants; // BT.709 full
|
||||
LIBYUV_API extern const struct ArgbConstants kRgbaU2020Constants; // BT.2020
|
||||
LIBYUV_API extern const struct ArgbConstants
|
||||
kRgbaV2020Constants; // BT.2020 full
|
||||
|
||||
// Conversion matrix from BGRx to YUV.
|
||||
LIBYUV_API extern const struct ArgbConstants kBgraI601Constants; // BT.601
|
||||
LIBYUV_API extern const struct ArgbConstants kBgraJPEGConstants; // BT.601 full
|
||||
LIBYUV_API extern const struct ArgbConstants kBgraH709Constants; // BT.709
|
||||
LIBYUV_API extern const struct ArgbConstants kBgraF709Constants; // BT.709 full
|
||||
LIBYUV_API extern const struct ArgbConstants kBgraU2020Constants; // BT.2020
|
||||
LIBYUV_API extern const struct ArgbConstants
|
||||
kBgraV2020Constants; // BT.2020 full
|
||||
|
||||
// Copy ARGB to ARGB.
|
||||
#define ARGBToARGB ARGBCopy
|
||||
LIBYUV_API
|
||||
|
||||
@ -273,6 +273,8 @@ extern "C" {
|
||||
#define HAS_ARGBTOUVROW_SSSE3
|
||||
#define HAS_BGRATOUVROW_SSSE3
|
||||
#define HAS_RGBATOUVROW_SSSE3
|
||||
#define HAS_ARGBTOUVMATRIXROW_SSSE3
|
||||
#define HAS_ARGBTOUV444MATRIXROW_SSSE3
|
||||
|
||||
#if defined(__x86_64__) || !defined(__pic__)
|
||||
// TODO(fbarchard): fix build error on android_full_debug=1
|
||||
@ -305,6 +307,8 @@ extern "C" {
|
||||
#define HAS_ARGBTOUVJ444ROW_AVX2
|
||||
#define HAS_ARGBTOUVJROW_AVX2
|
||||
#define HAS_ARGBTOUVROW_AVX2
|
||||
#define HAS_ARGBTOUVMATRIXROW_AVX2
|
||||
#define HAS_ARGBTOUV444MATRIXROW_AVX2
|
||||
#define HAS_ARGBTOYJROW_AVX2
|
||||
#define HAS_ARGBTOYROW_AVX2
|
||||
#define HAS_ARGBUNATTENUATEROW_AVX2
|
||||
@ -973,8 +977,8 @@ typedef uint8_t ulvec8[32];
|
||||
|
||||
struct ArgbConstants {
|
||||
uint8_t kRGBToY[32];
|
||||
int16_t kRGBToU[16];
|
||||
int16_t kRGBToV[16];
|
||||
int8_t kRGBToU[32];
|
||||
int8_t kRGBToV[32];
|
||||
uint16_t kAddY[16];
|
||||
uint16_t kAddUV[16];
|
||||
};
|
||||
@ -2092,11 +2096,60 @@ void ARGBToUVMatrixRow_C(const uint8_t* src_argb,
|
||||
uint8_t* dst_v,
|
||||
int width,
|
||||
const struct ArgbConstants* c);
|
||||
void ARGBToUVMatrixRow_SSSE3(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width,
|
||||
const struct ArgbConstants* c);
|
||||
void ARGBToUVMatrixRow_AVX2(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width,
|
||||
const struct ArgbConstants* c);
|
||||
void ARGBToUV444MatrixRow_C(const uint8_t* src_argb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width,
|
||||
const struct ArgbConstants* c);
|
||||
void ARGBToUV444MatrixRow_SSSE3(const uint8_t* src_argb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width,
|
||||
const struct ArgbConstants* c);
|
||||
void ARGBToUV444MatrixRow_AVX2(const uint8_t* src_argb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width,
|
||||
const struct ArgbConstants* c);
|
||||
void ARGBToUVMatrixRow_Any_SSSE3(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width,
|
||||
const struct ArgbConstants* c);
|
||||
void ARGBToUVMatrixRow_Any_AVX2(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width,
|
||||
const struct ArgbConstants* c);
|
||||
void ARGBToUV444MatrixRow_Any_SSSE3(const uint8_t* src_argb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width,
|
||||
const struct ArgbConstants* c);
|
||||
void ARGBToUV444MatrixRow_Any_AVX2(const uint8_t* src_argb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width,
|
||||
const struct ArgbConstants* c);
|
||||
void ARGBToUV444MatrixRow_Any_AVX2(const uint8_t* src_argb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width,
|
||||
const struct ArgbConstants* c);
|
||||
|
||||
void ABGRToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width);
|
||||
void RGBAToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width);
|
||||
|
||||
@ -2163,6 +2163,22 @@ int ARGBToI420Matrix(const uint8_t* src_argb,
|
||||
uint8_t* dst_u, uint8_t* dst_v, int width,
|
||||
const struct ArgbConstants* c) =
|
||||
ARGBToUVMatrixRow_C;
|
||||
#if defined(HAS_ARGBTOUVMATRIXROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGBToUVMatrixRow = ARGBToUVMatrixRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOUVMATRIXROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
if (!src_argb || !dst_y || !dst_u || !dst_v || !argbconstants || width <= 0 ||
|
||||
height == 0) {
|
||||
return -1;
|
||||
|
||||
@ -184,6 +184,22 @@ int ARGBToI444Matrix(const uint8_t* src_argb,
|
||||
uint8_t* dst_v, int width,
|
||||
const struct ArgbConstants* c) =
|
||||
ARGBToUV444MatrixRow_C;
|
||||
#if defined(HAS_ARGBTOUV444MATRIXROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
ARGBToUV444MatrixRow = ARGBToUV444MatrixRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUV444MatrixRow = ARGBToUV444MatrixRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOUV444MATRIXROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
ARGBToUV444MatrixRow = ARGBToUV444MatrixRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
ARGBToUV444MatrixRow = ARGBToUV444MatrixRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
if (!src_argb || !dst_y || !dst_u || !dst_v || !argbconstants || width <= 0 ||
|
||||
height == 0) {
|
||||
return -1;
|
||||
@ -384,6 +400,22 @@ int ARGBToI422Matrix(const uint8_t* src_argb,
|
||||
uint8_t* dst_u, uint8_t* dst_v, int width,
|
||||
const struct ArgbConstants* c) =
|
||||
ARGBToUVMatrixRow_C;
|
||||
#if defined(HAS_ARGBTOUVMATRIXROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGBToUVMatrixRow = ARGBToUVMatrixRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOUVMATRIXROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
if (!src_argb || !dst_y || !dst_u || !dst_v || !argbconstants || width <= 0 ||
|
||||
height == 0) {
|
||||
return -1;
|
||||
@ -630,6 +662,22 @@ int ARGBToNV12Matrix(const uint8_t* src_argb,
|
||||
uint8_t* dst_u, uint8_t* dst_v, int width,
|
||||
const struct ArgbConstants* c) =
|
||||
ARGBToUVMatrixRow_C;
|
||||
#if defined(HAS_ARGBTOUVMATRIXROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGBToUVMatrixRow = ARGBToUVMatrixRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOUVMATRIXROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
void (*MergeUVRow)(const uint8_t* src_u, const uint8_t* src_v,
|
||||
uint8_t* dst_uv, int width) = MergeUVRow_C;
|
||||
if (!src_argb || !dst_y || !dst_uv || !argbconstants || width <= 0 ||
|
||||
|
||||
@ -2198,6 +2198,62 @@ ANY14(SplitARGBRow_Any_NEON, SplitARGBRow_NEON, 4, 15)
|
||||
memcpy(dst_v + (np >> 1), vout + 128, SS(r, 1)); \
|
||||
}
|
||||
|
||||
#define ANY12M(NAMEANY, ANY_SIMD, BPP, MASK) \
|
||||
void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, \
|
||||
int width, const struct ArgbConstants* c) { \
|
||||
SIMD_ALIGNED(uint8_t vin[128]); \
|
||||
SIMD_ALIGNED(uint8_t vout[128 * 2]); \
|
||||
memset(vin, 0, sizeof(vin)); /* for msan */ \
|
||||
int r = width & MASK; \
|
||||
int n = width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
ANY_SIMD(src_ptr, dst_u, dst_v, n, c); \
|
||||
} \
|
||||
memcpy(vin, src_ptr + (ptrdiff_t)n * BPP, (ptrdiff_t)r * BPP); \
|
||||
ANY_SIMD(vin, vout, vout + 128, MASK + 1, c); \
|
||||
memcpy(dst_u + (ptrdiff_t)n, vout, (ptrdiff_t)r); \
|
||||
memcpy(dst_v + (ptrdiff_t)n, vout + 128, (ptrdiff_t)r); \
|
||||
}
|
||||
|
||||
#define ANY12MS(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK) \
|
||||
void NAMEANY(const uint8_t* src_ptr, int src_stride, uint8_t* dst_u, \
|
||||
uint8_t* dst_v, int width, const struct ArgbConstants* c) { \
|
||||
SIMD_ALIGNED(uint8_t vin[128 * 2]); \
|
||||
SIMD_ALIGNED(uint8_t vout[128 * 2]); \
|
||||
memset(vin, 0, sizeof(vin)); /* for msan */ \
|
||||
int r = width & MASK; \
|
||||
int n = width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
ANY_SIMD(src_ptr, src_stride, dst_u, dst_v, n, c); \
|
||||
} \
|
||||
ptrdiff_t np = n; \
|
||||
memcpy(vin, src_ptr + (np >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
|
||||
memcpy(vin + 128, src_ptr + src_stride + (np >> UVSHIFT) * BPP, \
|
||||
SS(r, UVSHIFT) * BPP); \
|
||||
if ((width & 1) && UVSHIFT == 0) { /* repeat last pixel for subsample */ \
|
||||
memcpy(vin + SS(r, UVSHIFT) * BPP, vin + SS(r, UVSHIFT) * BPP - BPP, \
|
||||
BPP); \
|
||||
memcpy(vin + 128 + SS(r, UVSHIFT) * BPP, \
|
||||
vin + 128 + SS(r, UVSHIFT) * BPP - BPP, BPP); \
|
||||
} \
|
||||
ANY_SIMD(vin, 128, vout, vout + 128, MASK + 1, c); \
|
||||
memcpy(dst_u + (np >> 1), vout, SS(r, 1)); \
|
||||
memcpy(dst_v + (np >> 1), vout + 128, SS(r, 1)); \
|
||||
}
|
||||
|
||||
#ifdef HAS_ARGBTOUVMATRIXROW_AVX2
|
||||
ANY12MS(ARGBToUVMatrixRow_Any_AVX2, ARGBToUVMatrixRow_AVX2, 0, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_ARGBTOUVMATRIXROW_SSSE3
|
||||
ANY12MS(ARGBToUVMatrixRow_Any_SSSE3, ARGBToUVMatrixRow_SSSE3, 0, 4, 7)
|
||||
#endif
|
||||
#ifdef HAS_ARGBTOUV444MATRIXROW_AVX2
|
||||
ANY12M(ARGBToUV444MatrixRow_Any_AVX2, ARGBToUV444MatrixRow_AVX2, 4, 31)
|
||||
#endif
|
||||
#ifdef HAS_ARGBTOUV444MATRIXROW_SSSE3
|
||||
ANY12M(ARGBToUV444MatrixRow_Any_SSSE3, ARGBToUV444MatrixRow_SSSE3, 4, 15)
|
||||
#endif
|
||||
|
||||
#ifdef HAS_ARGBTOUVROW_AVX2
|
||||
ANY12S(ARGBToUVRow_Any_AVX2, ARGBToUVRow_AVX2, 0, 4, 31)
|
||||
#endif
|
||||
|
||||
@ -769,16 +769,16 @@ static __inline uint8_t RGBToUMatrix(uint8_t r,
|
||||
uint8_t g,
|
||||
uint8_t b,
|
||||
const struct ArgbConstants* c) {
|
||||
return (c->kRGBToU[2] * r + c->kRGBToU[1] * g + c->kRGBToU[0] * b +
|
||||
c->kAddUV[0]) >>
|
||||
return (c->kAddUV[0] -
|
||||
(c->kRGBToU[2] * r + c->kRGBToU[1] * g + c->kRGBToU[0] * b)) >>
|
||||
8;
|
||||
}
|
||||
static __inline uint8_t RGBToVMatrix(uint8_t r,
|
||||
uint8_t g,
|
||||
uint8_t b,
|
||||
const struct ArgbConstants* c) {
|
||||
return (c->kRGBToV[2] * r + c->kRGBToV[1] * g + c->kRGBToV[0] * b +
|
||||
c->kAddUV[0]) >>
|
||||
return (c->kAddUV[0] -
|
||||
(c->kRGBToV[2] * r + c->kRGBToV[1] * g + c->kRGBToV[0] * b)) >>
|
||||
8;
|
||||
}
|
||||
|
||||
@ -1486,13 +1486,16 @@ void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width) {
|
||||
{YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB}}
|
||||
#endif
|
||||
|
||||
#define ARGBCONSTANTSBODY(RY, GY, BY, RU, GU, BU, RV, GV, BV, AY, AUV) \
|
||||
{{BY, GY, RY, 0, BY, GY, RY, 0, BY, GY, RY, 0, BY, GY, RY, 0, \
|
||||
BY, GY, RY, 0, BY, GY, RY, 0, BY, GY, RY, 0, BY, GY, RY, 0}, \
|
||||
{BU, GU, RU, 0, BU, GU, RU, 0, BU, GU, RU, 0, BU, GU, RU, 0}, \
|
||||
{BV, GV, RV, 0, BV, GV, RV, 0, BV, GV, RV, 0, BV, GV, RV, 0}, \
|
||||
{AY, AY, AY, AY, AY, AY, AY, AY, AY, AY, AY, AY, AY, AY, AY, AY}, \
|
||||
{AUV, AUV, AUV, AUV, AUV, AUV, AUV, AUV, AUV, AUV, AUV, AUV, AUV, AUV, \
|
||||
#define ARGBCONSTANTSBODY(Y0, Y1, Y2, Y3, U0, U1, U2, U3, V0, V1, V2, V3, AY, \
|
||||
AUV) \
|
||||
{{Y0, Y1, Y2, Y3, Y0, Y1, Y2, Y3, Y0, Y1, Y2, Y3, Y0, Y1, Y2, Y3, \
|
||||
Y0, Y1, Y2, Y3, Y0, Y1, Y2, Y3, Y0, Y1, Y2, Y3, Y0, Y1, Y2, Y3}, \
|
||||
{U0, U1, U2, U3, U0, U1, U2, U3, U0, U1, U2, U3, U0, U1, U2, U3, \
|
||||
U0, U1, U2, U3, U0, U1, U2, U3, U0, U1, U2, U3, U0, U1, U2, U3}, \
|
||||
{V0, V1, V2, V3, V0, V1, V2, V3, V0, V1, V2, V3, V0, V1, V2, V3, \
|
||||
V0, V1, V2, V3, V0, V1, V2, V3, V0, V1, V2, V3, V0, V1, V2, V3}, \
|
||||
{AY, AY, AY, AY, AY, AY, AY, AY, AY, AY, AY, AY, AY, AY, AY, AY}, \
|
||||
{AUV, AUV, AUV, AUV, AUV, AUV, AUV, AUV, AUV, AUV, AUV, AUV, AUV, AUV, \
|
||||
AUV, AUV}}
|
||||
|
||||
// clang-format on
|
||||
@ -1503,11 +1506,19 @@ void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width) {
|
||||
const struct YuvConstants SIMD_ALIGNED(kYvu##name##Constants) = \
|
||||
YUVCONSTANTSBODY(YG, YB, VR, VG, UG, UB);
|
||||
|
||||
#define MAKEARGBCONSTANTS(name, RY, GY, BY, RU, GU, BU, RV, GV, BV, AY, AUV) \
|
||||
const struct ArgbConstants SIMD_ALIGNED(kArgb##name##Constants) = \
|
||||
ARGBCONSTANTSBODY(RY, GY, BY, RU, GU, BU, RV, GV, BV, AY, AUV); \
|
||||
const struct ArgbConstants SIMD_ALIGNED(kAbgr##name##Constants) = \
|
||||
ARGBCONSTANTSBODY(BY, GY, RY, BU, GU, RU, BV, GV, RV, AY, AUV);
|
||||
#define MAKEARGBCONSTANTS(name, RY, GY, BY, RU, GU, BU, RV, GV, BV, AY, AUV) \
|
||||
const struct ArgbConstants SIMD_ALIGNED(kArgb##name##Constants) = \
|
||||
ARGBCONSTANTSBODY(BY, GY, RY, 0, -(BU), -(GU), -(RU), 0, -(BV), -(GV), \
|
||||
-(RV), 0, AY, AUV); \
|
||||
const struct ArgbConstants SIMD_ALIGNED(kAbgr##name##Constants) = \
|
||||
ARGBCONSTANTSBODY(RY, GY, BY, 0, -(RU), -(GU), -(BU), 0, -(RV), -(GV), \
|
||||
-(BV), 0, AY, AUV); \
|
||||
const struct ArgbConstants SIMD_ALIGNED(kRgba##name##Constants) = \
|
||||
ARGBCONSTANTSBODY(0, BY, GY, RY, 0, -(BU), -(GU), -(RU), 0, -(BV), \
|
||||
-(GV), -(RV), AY, AUV); \
|
||||
const struct ArgbConstants SIMD_ALIGNED(kBgra##name##Constants) = \
|
||||
ARGBCONSTANTSBODY(0, RY, GY, BY, 0, -(RU), -(GU), -(BU), 0, -(RV), \
|
||||
-(GV), -(BV), AY, AUV);
|
||||
|
||||
// BT.601 limited range RGB to YUV coefficients
|
||||
// RY = round(0.299 * 219 / 255 * 256) = 66
|
||||
|
||||
@ -9,6 +9,8 @@
|
||||
*/
|
||||
|
||||
#include "libyuv/row.h"
|
||||
#include "libyuv/convert_from_argb.h" // For ArgbConstants
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
@ -1580,26 +1582,16 @@ void RGBAToYJRow_AVX2(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
|
||||
|
||||
#ifdef HAS_ARGBTOUV444ROW_SSSE3
|
||||
|
||||
// Coefficients expressed as negatives to allow 128
|
||||
struct RgbUVConstants {
|
||||
vec8 kRGBToU;
|
||||
vec8 kRGBToV;
|
||||
};
|
||||
|
||||
// Offsets into RgbUVConstants structure
|
||||
#define KRGBTOU 0
|
||||
#define KRGBTOV 16
|
||||
|
||||
void ARGBToUV444MatrixRow_SSSE3(const uint8_t* src_argb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width,
|
||||
const struct RgbUVConstants* rgbuvconstants) {
|
||||
const struct ArgbConstants* c) {
|
||||
asm volatile(
|
||||
"pcmpeqb %%xmm5,%%xmm5 \n" // 0x8000
|
||||
"psllw $15,%%xmm5 \n"
|
||||
"movdqa 0x0(%4),%%xmm3 \n" // kRGBToU
|
||||
"movdqa 0x10(%4),%%xmm4 \n" // kRGBToV
|
||||
"movdqa 0x20(%4),%%xmm3 \n" // kRGBToU
|
||||
"movdqa 0x40(%4),%%xmm4 \n" // kRGBToV
|
||||
"sub %1,%2 \n"
|
||||
|
||||
LABELALIGN
|
||||
@ -1654,7 +1646,7 @@ void ARGBToUV444MatrixRow_SSSE3(const uint8_t* src_argb,
|
||||
#else
|
||||
"+rm"(width) // %3
|
||||
#endif
|
||||
: "r"(rgbuvconstants) // %4
|
||||
: "r"(c) // %4
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
|
||||
}
|
||||
#endif // HAS_ARGBTOUV444ROW_SSSE3
|
||||
@ -1665,10 +1657,10 @@ void ARGBToUV444MatrixRow_AVX2(const uint8_t* src_argb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width,
|
||||
const struct RgbUVConstants* rgbuvconstants) {
|
||||
const struct ArgbConstants* c) {
|
||||
asm volatile(
|
||||
"vbroadcastf128 0x0(%4),%%ymm3 \n" // kRGBToU
|
||||
"vbroadcastf128 0x10(%4),%%ymm4 \n" // kRGBToV
|
||||
"vmovdqa 0x20(%4),%%ymm3 \n" // kRGBToU
|
||||
"vmovdqa 0x40(%4),%%ymm4 \n" // kRGBToV
|
||||
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // 0x8000
|
||||
"vpsllw $15,%%ymm5,%%ymm5 \n"
|
||||
"vmovdqa %5,%%ymm7 \n"
|
||||
@ -1724,7 +1716,7 @@ void ARGBToUV444MatrixRow_AVX2(const uint8_t* src_argb,
|
||||
#else
|
||||
"+rm"(width) // %3
|
||||
#endif
|
||||
: "r"(rgbuvconstants), // %4
|
||||
: "r"(c), // %4
|
||||
"m"(kPermdARGBToY_AVX) // %5
|
||||
: "memory", "cc", "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6",
|
||||
"ymm7");
|
||||
@ -1746,10 +1738,10 @@ void ARGBToUVMatrixRow_SSSE3(const uint8_t* src_argb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width,
|
||||
const struct RgbUVConstants* rgbuvconstants) {
|
||||
const struct ArgbConstants* c) {
|
||||
asm volatile(
|
||||
"movdqa 0x0(%5),%%xmm4 \n" // RGBToU
|
||||
"movdqa 0x10(%5),%%xmm5 \n" // RGBToV
|
||||
"movdqa 0x20(%5),%%xmm4 \n" // RGBToU
|
||||
"movdqa 0x40(%5),%%xmm5 \n" // RGBToV
|
||||
"pcmpeqb %%xmm6,%%xmm6 \n" // 0x0101
|
||||
"pabsb %%xmm6,%%xmm6 \n"
|
||||
"movdqa %6,%%xmm7 \n" // kShuffleAARRGGBB
|
||||
@ -1803,7 +1795,7 @@ void ARGBToUVMatrixRow_SSSE3(const uint8_t* src_argb,
|
||||
"+rm"(width) // %3
|
||||
#endif
|
||||
: "r"((intptr_t)(src_stride_argb)), // %4
|
||||
"r"(rgbuvconstants), // %5
|
||||
"r"(c), // %5
|
||||
"m"(kShuffleAARRGGBB) // %6
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
|
||||
"xmm7");
|
||||
@ -1820,10 +1812,10 @@ void ARGBToUVMatrixRow_AVX2(const uint8_t* src_argb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width,
|
||||
const struct RgbUVConstants* rgbuvconstants) {
|
||||
const struct ArgbConstants* c) {
|
||||
asm volatile(
|
||||
"vbroadcastf128 0(%5),%%ymm4 \n" // RGBToU
|
||||
"vbroadcastf128 0x10(%5),%%ymm5 \n" // RGBToV
|
||||
"vbroadcastf128 0x20(%5),%%ymm4 \n" // RGBToU
|
||||
"vbroadcastf128 0x40(%5),%%ymm5 \n" // RGBToV
|
||||
"vpcmpeqb %%ymm6,%%ymm6,%%ymm6 \n" // 0x0101
|
||||
"vpabsb %%ymm6,%%ymm6 \n"
|
||||
"vmovdqa %6,%%ymm7 \n" // kShuffleAARRGGBB
|
||||
@ -1878,15 +1870,13 @@ void ARGBToUVMatrixRow_AVX2(const uint8_t* src_argb,
|
||||
"+rm"(width) // %3
|
||||
#endif
|
||||
: "r"((intptr_t)(src_stride_argb)), // %4
|
||||
"r"(rgbuvconstants), // %5
|
||||
"r"(c), // %5
|
||||
"m"(kShuffleAARRGGBB) // %6
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
|
||||
"xmm7");
|
||||
}
|
||||
#endif // HAS_ARGBTOUVROW_AVX2
|
||||
|
||||
#if defined(HAS_ARGBTOUV444ROW_SSSE3) || defined(HAS_ARGBTOUVROW_AVX2)
|
||||
|
||||
// RGB to BT601 coefficients
|
||||
// UB 0.875 coefficient = 112
|
||||
// UG -0.5781 coefficient = -74
|
||||
@ -1895,30 +1885,13 @@ void ARGBToUVMatrixRow_AVX2(const uint8_t* src_argb,
|
||||
// VG -0.7344 coefficient = -94
|
||||
// VR 0.875 coefficient = 112
|
||||
|
||||
static const struct RgbUVConstants kARGBI601UVConstants = {
|
||||
{-112, 74, 38, 0, -112, 74, 38, 0, -112, 74, 38, 0, -112, 74, 38, 0},
|
||||
{18, 94, -112, 0, 18, 94, -112, 0, 18, 94, -112, 0, 18, 94, -112, 0}};
|
||||
|
||||
static const struct RgbUVConstants kABGRI601UVConstants = {
|
||||
{38, 74, -112, 0, 38, 74, -112, 0, 38, 74, -112, 0, 38, 74, -112, 0},
|
||||
{-112, 94, 18, 0, -112, 94, 18, 0, -112, 94, 18, 0, -112, 94, 18, 0}};
|
||||
|
||||
static const struct RgbUVConstants kBGRAI601UVConstants = {
|
||||
{0, 38, 74, -112, 0, 38, 74, -112, 0, 38, 74, -112, 0, 38, 74, -112},
|
||||
{0, -112, 94, 18, 0, -112, 94, 18, 0, -112, 94, 18, 0, -112, 94, 18}};
|
||||
|
||||
static const struct RgbUVConstants kRGBAI601UVConstants = {
|
||||
{0, -112, 74, 38, 0, -112, 74, 38, 0, -112, 74, 38, 0, -112, 74, 38},
|
||||
{0, 18, 94, -112, 0, 18, 94, -112, 0, 18, 94, -112, 0, 18, 94, -112}};
|
||||
#endif
|
||||
|
||||
#ifdef HAS_ARGBTOUV444ROW_SSSE3
|
||||
void ARGBToUV444Row_SSSE3(const uint8_t* src_argb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width) {
|
||||
ARGBToUV444MatrixRow_SSSE3(src_argb, dst_u, dst_v, width,
|
||||
&kARGBI601UVConstants);
|
||||
&kArgbI601Constants);
|
||||
}
|
||||
#endif // HAS_ARGBTOUV444ROW_SSSE3
|
||||
|
||||
@ -1927,8 +1900,7 @@ void ARGBToUV444Row_AVX2(const uint8_t* src_argb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width) {
|
||||
ARGBToUV444MatrixRow_AVX2(src_argb, dst_u, dst_v, width,
|
||||
&kARGBI601UVConstants);
|
||||
ARGBToUV444MatrixRow_AVX2(src_argb, dst_u, dst_v, width, &kArgbI601Constants);
|
||||
}
|
||||
#endif // HAS_ARGBTOUV444ROW_AVX2
|
||||
|
||||
@ -1939,7 +1911,7 @@ void ARGBToUVRow_SSSE3(const uint8_t* src_argb,
|
||||
uint8_t* dst_v,
|
||||
int width) {
|
||||
ARGBToUVMatrixRow_SSSE3(src_argb, src_stride_argb, dst_u, dst_v, width,
|
||||
&kARGBI601UVConstants);
|
||||
&kArgbI601Constants);
|
||||
}
|
||||
|
||||
void ABGRToUVRow_SSSE3(const uint8_t* src_abgr,
|
||||
@ -1948,7 +1920,7 @@ void ABGRToUVRow_SSSE3(const uint8_t* src_abgr,
|
||||
uint8_t* dst_v,
|
||||
int width) {
|
||||
ARGBToUVMatrixRow_SSSE3(src_abgr, src_stride_abgr, dst_u, dst_v, width,
|
||||
&kABGRI601UVConstants);
|
||||
&kAbgrI601Constants);
|
||||
}
|
||||
|
||||
void BGRAToUVRow_SSSE3(const uint8_t* src_bgra,
|
||||
@ -1957,7 +1929,7 @@ void BGRAToUVRow_SSSE3(const uint8_t* src_bgra,
|
||||
uint8_t* dst_v,
|
||||
int width) {
|
||||
ARGBToUVMatrixRow_SSSE3(src_bgra, src_stride_bgra, dst_u, dst_v, width,
|
||||
&kBGRAI601UVConstants);
|
||||
&kBgraI601Constants);
|
||||
}
|
||||
|
||||
void RGBAToUVRow_SSSE3(const uint8_t* src_rgba,
|
||||
@ -1966,7 +1938,7 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba,
|
||||
uint8_t* dst_v,
|
||||
int width) {
|
||||
ARGBToUVMatrixRow_SSSE3(src_rgba, src_stride_rgba, dst_u, dst_v, width,
|
||||
&kRGBAI601UVConstants);
|
||||
&kRgbaI601Constants);
|
||||
}
|
||||
#endif // HAS_ARGBTOUVROW_SSSE3
|
||||
|
||||
@ -1977,7 +1949,7 @@ void ARGBToUVRow_AVX2(const uint8_t* src_argb,
|
||||
uint8_t* dst_v,
|
||||
int width) {
|
||||
ARGBToUVMatrixRow_AVX2(src_argb, src_stride_argb, dst_u, dst_v, width,
|
||||
&kARGBI601UVConstants);
|
||||
&kArgbI601Constants);
|
||||
}
|
||||
|
||||
void ABGRToUVRow_AVX2(const uint8_t* src_abgr,
|
||||
@ -1986,31 +1958,18 @@ void ABGRToUVRow_AVX2(const uint8_t* src_abgr,
|
||||
uint8_t* dst_v,
|
||||
int width) {
|
||||
ARGBToUVMatrixRow_AVX2(src_abgr, src_stride_abgr, dst_u, dst_v, width,
|
||||
&kABGRI601UVConstants);
|
||||
&kAbgrI601Constants);
|
||||
}
|
||||
#endif // HAS_ARGBTOUVROW_AVX2
|
||||
|
||||
#ifdef HAS_ARGBTOUVJ444ROW_SSSE3
|
||||
// RGB to JPEG coefficients
|
||||
// UB 0.500 coefficient = 128
|
||||
// UG -0.33126 coefficient = -85
|
||||
// UR -0.16874 coefficient = -43
|
||||
// VB -0.08131 coefficient = -21
|
||||
// VG -0.41869 coefficient = -107
|
||||
// VR 0.500 coefficient = 128
|
||||
|
||||
static const struct RgbUVConstants kARGBJPEGUVConstants = {
|
||||
{-128, 85, 43, 0, -128, 85, 43, 0, -128, 85, 43, 0, -128, 85, 43, 0},
|
||||
{21, 107, -128, 0, 21, 107, -128, 0, 21, 107, -128, 0, 21, 107, -128, 0}};
|
||||
|
||||
void ARGBToUVJ444Row_SSSE3(const uint8_t* src_argb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width) {
|
||||
ARGBToUV444MatrixRow_SSSE3(src_argb, dst_u, dst_v, width,
|
||||
&kARGBJPEGUVConstants);
|
||||
&kArgbJPEGConstants);
|
||||
}
|
||||
|
||||
#endif // HAS_ARGBTOUVJ444ROW_SSSE3
|
||||
|
||||
#ifdef HAS_ARGBTOUVJ444ROW_AVX2
|
||||
@ -2018,15 +1977,10 @@ void ARGBToUVJ444Row_AVX2(const uint8_t* src_argb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width) {
|
||||
ARGBToUV444MatrixRow_AVX2(src_argb, dst_u, dst_v, width,
|
||||
&kARGBJPEGUVConstants);
|
||||
ARGBToUV444MatrixRow_AVX2(src_argb, dst_u, dst_v, width, &kArgbJPEGConstants);
|
||||
}
|
||||
#endif // HAS_ARGBTOUVJ444ROW_AVX2
|
||||
|
||||
static const struct RgbUVConstants kABGRJPEGUVConstants = {
|
||||
{43, 85, -128, 0, 43, 85, -128, 0, 43, 85, -128, 0, 43, 85, -128, 0},
|
||||
{-128, 107, 21, 0, -128, 107, 21, 0, -128, 107, 21, 0, -128, 107, 21, 0}};
|
||||
|
||||
#ifdef HAS_ARGBTOUVJROW_SSSE3
|
||||
void ARGBToUVJRow_SSSE3(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
@ -2034,7 +1988,7 @@ void ARGBToUVJRow_SSSE3(const uint8_t* src_argb,
|
||||
uint8_t* dst_v,
|
||||
int width) {
|
||||
ARGBToUVMatrixRow_SSSE3(src_argb, src_stride_argb, dst_u, dst_v, width,
|
||||
&kARGBJPEGUVConstants);
|
||||
&kArgbJPEGConstants);
|
||||
}
|
||||
#endif // HAS_ARGBTOUVJROW_SSSE3
|
||||
|
||||
@ -2045,7 +1999,7 @@ void ABGRToUVJRow_SSSE3(const uint8_t* src_abgr,
|
||||
uint8_t* dst_v,
|
||||
int width) {
|
||||
ARGBToUVMatrixRow_SSSE3(src_abgr, src_stride_abgr, dst_u, dst_v, width,
|
||||
&kABGRJPEGUVConstants);
|
||||
&kAbgrJPEGConstants);
|
||||
}
|
||||
#endif // HAS_ABGRTOUVJROW_SSSE3
|
||||
|
||||
@ -2056,7 +2010,7 @@ void ARGBToUVJRow_AVX2(const uint8_t* src_argb,
|
||||
uint8_t* dst_v,
|
||||
int width) {
|
||||
ARGBToUVMatrixRow_AVX2(src_argb, src_stride_argb, dst_u, dst_v, width,
|
||||
&kARGBJPEGUVConstants);
|
||||
&kArgbJPEGConstants);
|
||||
}
|
||||
#endif // HAS_ARGBTOUVJROW_AVX2
|
||||
|
||||
@ -2067,7 +2021,7 @@ void ABGRToUVJRow_AVX2(const uint8_t* src_abgr,
|
||||
uint8_t* dst_v,
|
||||
int width) {
|
||||
ARGBToUVMatrixRow_AVX2(src_abgr, src_stride_abgr, dst_u, dst_v, width,
|
||||
&kABGRJPEGUVConstants);
|
||||
&kAbgrJPEGConstants);
|
||||
}
|
||||
#endif // HAS_ABGRTOUVJROW_AVX2
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user