Unify X86/X64 versions of ARGBToI4xxMatrix functions

Change-Id: Iead13414414543e5f10ba9ba47a6ceaeb3113dee
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/7562443
Reviewed-by: Frank Barchard <fbarchard@chromium.org>
Commit-Queue: Dale Curtis <dalecurtis@chromium.org>
Reviewed-by: Wan-Teh Chang <wtc@google.com>
This commit is contained in:
Dale Curtis 2026-03-18 22:21:53 +00:00 committed by libyuv LUCI CQ
parent f69a479f04
commit b1cacfb38f
7 changed files with 254 additions and 98 deletions

View File

@ -23,7 +23,7 @@ extern "C" {
// (bgra in memory) you'll want to use the kArgb* constants. Alternatively, if
// your input is ABGR little endian (rgba in memory) you'd use the kAbgr* ones.
//
// Conversion matrix for RGB to YUV.
// Conversion matrix for xRGB to YUV.
LIBYUV_API extern const struct ArgbConstants kArgbI601Constants; // BT.601
LIBYUV_API extern const struct ArgbConstants kArgbJPEGConstants; // BT.601 full
LIBYUV_API extern const struct ArgbConstants kArgbH709Constants; // BT.709
@ -32,7 +32,7 @@ LIBYUV_API extern const struct ArgbConstants kArgbU2020Constants; // BT.2020
LIBYUV_API extern const struct ArgbConstants
kArgbV2020Constants; // BT.2020 full
// Conversion matrix for BGR to YUV.
// Conversion matrix for xBGR to YUV.
LIBYUV_API extern const struct ArgbConstants kAbgrI601Constants; // BT.601
LIBYUV_API extern const struct ArgbConstants kAbgrJPEGConstants; // BT.601 full
LIBYUV_API extern const struct ArgbConstants kAbgrH709Constants; // BT.709
@ -41,6 +41,24 @@ LIBYUV_API extern const struct ArgbConstants kAbgrU2020Constants; // BT.2020
LIBYUV_API extern const struct ArgbConstants
kAbgrV2020Constants; // BT.2020 full
// Conversion matrix for RGBx to YUV.
LIBYUV_API extern const struct ArgbConstants kRgbaI601Constants; // BT.601
LIBYUV_API extern const struct ArgbConstants kRgbaJPEGConstants; // BT.601 full
LIBYUV_API extern const struct ArgbConstants kRgbaH709Constants; // BT.709
LIBYUV_API extern const struct ArgbConstants kRgbaF709Constants; // BT.709 full
LIBYUV_API extern const struct ArgbConstants kRgbaU2020Constants; // BT.2020
LIBYUV_API extern const struct ArgbConstants
kRgbaV2020Constants; // BT.2020 full
// Conversion matrix from BGRx to YUV.
LIBYUV_API extern const struct ArgbConstants kBgraI601Constants; // BT.601
LIBYUV_API extern const struct ArgbConstants kBgraJPEGConstants; // BT.601 full
LIBYUV_API extern const struct ArgbConstants kBgraH709Constants; // BT.709
LIBYUV_API extern const struct ArgbConstants kBgraF709Constants; // BT.709 full
LIBYUV_API extern const struct ArgbConstants kBgraU2020Constants; // BT.2020
LIBYUV_API extern const struct ArgbConstants
kBgraV2020Constants; // BT.2020 full
// Copy ARGB to ARGB.
#define ARGBToARGB ARGBCopy
LIBYUV_API

View File

@ -273,6 +273,8 @@ extern "C" {
#define HAS_ARGBTOUVROW_SSSE3
#define HAS_BGRATOUVROW_SSSE3
#define HAS_RGBATOUVROW_SSSE3
#define HAS_ARGBTOUVMATRIXROW_SSSE3
#define HAS_ARGBTOUV444MATRIXROW_SSSE3
#if defined(__x86_64__) || !defined(__pic__)
// TODO(fbarchard): fix build error on android_full_debug=1
@ -305,6 +307,8 @@ extern "C" {
#define HAS_ARGBTOUVJ444ROW_AVX2
#define HAS_ARGBTOUVJROW_AVX2
#define HAS_ARGBTOUVROW_AVX2
#define HAS_ARGBTOUVMATRIXROW_AVX2
#define HAS_ARGBTOUV444MATRIXROW_AVX2
#define HAS_ARGBTOYJROW_AVX2
#define HAS_ARGBTOYROW_AVX2
#define HAS_ARGBUNATTENUATEROW_AVX2
@ -973,8 +977,8 @@ typedef uint8_t ulvec8[32];
struct ArgbConstants {
uint8_t kRGBToY[32];
int16_t kRGBToU[16];
int16_t kRGBToV[16];
int8_t kRGBToU[32];
int8_t kRGBToV[32];
uint16_t kAddY[16];
uint16_t kAddUV[16];
};
@ -2092,11 +2096,60 @@ void ARGBToUVMatrixRow_C(const uint8_t* src_argb,
uint8_t* dst_v,
int width,
const struct ArgbConstants* c);
void ARGBToUVMatrixRow_SSSE3(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width,
const struct ArgbConstants* c);
void ARGBToUVMatrixRow_AVX2(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width,
const struct ArgbConstants* c);
void ARGBToUV444MatrixRow_C(const uint8_t* src_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width,
const struct ArgbConstants* c);
void ARGBToUV444MatrixRow_SSSE3(const uint8_t* src_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width,
const struct ArgbConstants* c);
void ARGBToUV444MatrixRow_AVX2(const uint8_t* src_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width,
const struct ArgbConstants* c);
void ARGBToUVMatrixRow_Any_SSSE3(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width,
const struct ArgbConstants* c);
void ARGBToUVMatrixRow_Any_AVX2(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width,
const struct ArgbConstants* c);
void ARGBToUV444MatrixRow_Any_SSSE3(const uint8_t* src_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width,
const struct ArgbConstants* c);
void ARGBToUV444MatrixRow_Any_AVX2(const uint8_t* src_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width,
const struct ArgbConstants* c);
void ARGBToUV444MatrixRow_Any_AVX2(const uint8_t* src_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width,
const struct ArgbConstants* c);
void ABGRToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width);
void RGBAToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width);

View File

@ -2163,6 +2163,22 @@ int ARGBToI420Matrix(const uint8_t* src_argb,
uint8_t* dst_u, uint8_t* dst_v, int width,
const struct ArgbConstants* c) =
ARGBToUVMatrixRow_C;
#if defined(HAS_ARGBTOUVMATRIXROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
ARGBToUVMatrixRow = ARGBToUVMatrixRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOUVMATRIXROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX2;
}
}
#endif
if (!src_argb || !dst_y || !dst_u || !dst_v || !argbconstants || width <= 0 ||
height == 0) {
return -1;

View File

@ -184,6 +184,22 @@ int ARGBToI444Matrix(const uint8_t* src_argb,
uint8_t* dst_v, int width,
const struct ArgbConstants* c) =
ARGBToUV444MatrixRow_C;
#if defined(HAS_ARGBTOUV444MATRIXROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUV444MatrixRow = ARGBToUV444MatrixRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUV444MatrixRow = ARGBToUV444MatrixRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOUV444MATRIXROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToUV444MatrixRow = ARGBToUV444MatrixRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToUV444MatrixRow = ARGBToUV444MatrixRow_AVX2;
}
}
#endif
if (!src_argb || !dst_y || !dst_u || !dst_v || !argbconstants || width <= 0 ||
height == 0) {
return -1;
@ -384,6 +400,22 @@ int ARGBToI422Matrix(const uint8_t* src_argb,
uint8_t* dst_u, uint8_t* dst_v, int width,
const struct ArgbConstants* c) =
ARGBToUVMatrixRow_C;
#if defined(HAS_ARGBTOUVMATRIXROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
ARGBToUVMatrixRow = ARGBToUVMatrixRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOUVMATRIXROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX2;
}
}
#endif
if (!src_argb || !dst_y || !dst_u || !dst_v || !argbconstants || width <= 0 ||
height == 0) {
return -1;
@ -630,6 +662,22 @@ int ARGBToNV12Matrix(const uint8_t* src_argb,
uint8_t* dst_u, uint8_t* dst_v, int width,
const struct ArgbConstants* c) =
ARGBToUVMatrixRow_C;
#if defined(HAS_ARGBTOUVMATRIXROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
ARGBToUVMatrixRow = ARGBToUVMatrixRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOUVMATRIXROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX2;
}
}
#endif
void (*MergeUVRow)(const uint8_t* src_u, const uint8_t* src_v,
uint8_t* dst_uv, int width) = MergeUVRow_C;
if (!src_argb || !dst_y || !dst_uv || !argbconstants || width <= 0 ||

View File

@ -2198,6 +2198,62 @@ ANY14(SplitARGBRow_Any_NEON, SplitARGBRow_NEON, 4, 15)
memcpy(dst_v + (np >> 1), vout + 128, SS(r, 1)); \
}
#define ANY12M(NAMEANY, ANY_SIMD, BPP, MASK) \
void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, \
int width, const struct ArgbConstants* c) { \
SIMD_ALIGNED(uint8_t vin[128]); \
SIMD_ALIGNED(uint8_t vout[128 * 2]); \
memset(vin, 0, sizeof(vin)); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(src_ptr, dst_u, dst_v, n, c); \
} \
memcpy(vin, src_ptr + (ptrdiff_t)n * BPP, (ptrdiff_t)r * BPP); \
ANY_SIMD(vin, vout, vout + 128, MASK + 1, c); \
memcpy(dst_u + (ptrdiff_t)n, vout, (ptrdiff_t)r); \
memcpy(dst_v + (ptrdiff_t)n, vout + 128, (ptrdiff_t)r); \
}
#define ANY12MS(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK) \
void NAMEANY(const uint8_t* src_ptr, int src_stride, uint8_t* dst_u, \
uint8_t* dst_v, int width, const struct ArgbConstants* c) { \
SIMD_ALIGNED(uint8_t vin[128 * 2]); \
SIMD_ALIGNED(uint8_t vout[128 * 2]); \
memset(vin, 0, sizeof(vin)); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(src_ptr, src_stride, dst_u, dst_v, n, c); \
} \
ptrdiff_t np = n; \
memcpy(vin, src_ptr + (np >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
memcpy(vin + 128, src_ptr + src_stride + (np >> UVSHIFT) * BPP, \
SS(r, UVSHIFT) * BPP); \
if ((width & 1) && UVSHIFT == 0) { /* repeat last pixel for subsample */ \
memcpy(vin + SS(r, UVSHIFT) * BPP, vin + SS(r, UVSHIFT) * BPP - BPP, \
BPP); \
memcpy(vin + 128 + SS(r, UVSHIFT) * BPP, \
vin + 128 + SS(r, UVSHIFT) * BPP - BPP, BPP); \
} \
ANY_SIMD(vin, 128, vout, vout + 128, MASK + 1, c); \
memcpy(dst_u + (np >> 1), vout, SS(r, 1)); \
memcpy(dst_v + (np >> 1), vout + 128, SS(r, 1)); \
}
#ifdef HAS_ARGBTOUVMATRIXROW_AVX2
ANY12MS(ARGBToUVMatrixRow_Any_AVX2, ARGBToUVMatrixRow_AVX2, 0, 4, 15)
#endif
#ifdef HAS_ARGBTOUVMATRIXROW_SSSE3
ANY12MS(ARGBToUVMatrixRow_Any_SSSE3, ARGBToUVMatrixRow_SSSE3, 0, 4, 7)
#endif
#ifdef HAS_ARGBTOUV444MATRIXROW_AVX2
ANY12M(ARGBToUV444MatrixRow_Any_AVX2, ARGBToUV444MatrixRow_AVX2, 4, 31)
#endif
#ifdef HAS_ARGBTOUV444MATRIXROW_SSSE3
ANY12M(ARGBToUV444MatrixRow_Any_SSSE3, ARGBToUV444MatrixRow_SSSE3, 4, 15)
#endif
#ifdef HAS_ARGBTOUVROW_AVX2
ANY12S(ARGBToUVRow_Any_AVX2, ARGBToUVRow_AVX2, 0, 4, 31)
#endif

View File

@ -769,16 +769,16 @@ static __inline uint8_t RGBToUMatrix(uint8_t r,
uint8_t g,
uint8_t b,
const struct ArgbConstants* c) {
return (c->kRGBToU[2] * r + c->kRGBToU[1] * g + c->kRGBToU[0] * b +
c->kAddUV[0]) >>
return (c->kAddUV[0] -
(c->kRGBToU[2] * r + c->kRGBToU[1] * g + c->kRGBToU[0] * b)) >>
8;
}
static __inline uint8_t RGBToVMatrix(uint8_t r,
uint8_t g,
uint8_t b,
const struct ArgbConstants* c) {
return (c->kRGBToV[2] * r + c->kRGBToV[1] * g + c->kRGBToV[0] * b +
c->kAddUV[0]) >>
return (c->kAddUV[0] -
(c->kRGBToV[2] * r + c->kRGBToV[1] * g + c->kRGBToV[0] * b)) >>
8;
}
@ -1486,11 +1486,14 @@ void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width) {
{YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB}}
#endif
#define ARGBCONSTANTSBODY(RY, GY, BY, RU, GU, BU, RV, GV, BV, AY, AUV) \
{{BY, GY, RY, 0, BY, GY, RY, 0, BY, GY, RY, 0, BY, GY, RY, 0, \
BY, GY, RY, 0, BY, GY, RY, 0, BY, GY, RY, 0, BY, GY, RY, 0}, \
{BU, GU, RU, 0, BU, GU, RU, 0, BU, GU, RU, 0, BU, GU, RU, 0}, \
{BV, GV, RV, 0, BV, GV, RV, 0, BV, GV, RV, 0, BV, GV, RV, 0}, \
#define ARGBCONSTANTSBODY(Y0, Y1, Y2, Y3, U0, U1, U2, U3, V0, V1, V2, V3, AY, \
AUV) \
{{Y0, Y1, Y2, Y3, Y0, Y1, Y2, Y3, Y0, Y1, Y2, Y3, Y0, Y1, Y2, Y3, \
Y0, Y1, Y2, Y3, Y0, Y1, Y2, Y3, Y0, Y1, Y2, Y3, Y0, Y1, Y2, Y3}, \
{U0, U1, U2, U3, U0, U1, U2, U3, U0, U1, U2, U3, U0, U1, U2, U3, \
U0, U1, U2, U3, U0, U1, U2, U3, U0, U1, U2, U3, U0, U1, U2, U3}, \
{V0, V1, V2, V3, V0, V1, V2, V3, V0, V1, V2, V3, V0, V1, V2, V3, \
V0, V1, V2, V3, V0, V1, V2, V3, V0, V1, V2, V3, V0, V1, V2, V3}, \
{AY, AY, AY, AY, AY, AY, AY, AY, AY, AY, AY, AY, AY, AY, AY, AY}, \
{AUV, AUV, AUV, AUV, AUV, AUV, AUV, AUV, AUV, AUV, AUV, AUV, AUV, AUV, \
AUV, AUV}}
@ -1505,9 +1508,17 @@ void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width) {
#define MAKEARGBCONSTANTS(name, RY, GY, BY, RU, GU, BU, RV, GV, BV, AY, AUV) \
const struct ArgbConstants SIMD_ALIGNED(kArgb##name##Constants) = \
ARGBCONSTANTSBODY(RY, GY, BY, RU, GU, BU, RV, GV, BV, AY, AUV); \
ARGBCONSTANTSBODY(BY, GY, RY, 0, -(BU), -(GU), -(RU), 0, -(BV), -(GV), \
-(RV), 0, AY, AUV); \
const struct ArgbConstants SIMD_ALIGNED(kAbgr##name##Constants) = \
ARGBCONSTANTSBODY(BY, GY, RY, BU, GU, RU, BV, GV, RV, AY, AUV);
ARGBCONSTANTSBODY(RY, GY, BY, 0, -(RU), -(GU), -(BU), 0, -(RV), -(GV), \
-(BV), 0, AY, AUV); \
const struct ArgbConstants SIMD_ALIGNED(kRgba##name##Constants) = \
ARGBCONSTANTSBODY(0, BY, GY, RY, 0, -(BU), -(GU), -(RU), 0, -(BV), \
-(GV), -(RV), AY, AUV); \
const struct ArgbConstants SIMD_ALIGNED(kBgra##name##Constants) = \
ARGBCONSTANTSBODY(0, RY, GY, BY, 0, -(RU), -(GU), -(BU), 0, -(RV), \
-(GV), -(BV), AY, AUV);
// BT.601 limited range RGB to YUV coefficients
// RY = round(0.299 * 219 / 255 * 256) = 66

View File

@ -9,6 +9,8 @@
*/
#include "libyuv/row.h"
#include "libyuv/convert_from_argb.h" // For ArgbConstants
#ifdef __cplusplus
namespace libyuv {
extern "C" {
@ -1580,26 +1582,16 @@ void RGBAToYJRow_AVX2(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
#ifdef HAS_ARGBTOUV444ROW_SSSE3
// Coefficients expressed as negatives to allow 128
struct RgbUVConstants {
vec8 kRGBToU;
vec8 kRGBToV;
};
// Offsets into RgbUVConstants structure
#define KRGBTOU 0
#define KRGBTOV 16
void ARGBToUV444MatrixRow_SSSE3(const uint8_t* src_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width,
const struct RgbUVConstants* rgbuvconstants) {
const struct ArgbConstants* c) {
asm volatile(
"pcmpeqb %%xmm5,%%xmm5 \n" // 0x8000
"psllw $15,%%xmm5 \n"
"movdqa 0x0(%4),%%xmm3 \n" // kRGBToU
"movdqa 0x10(%4),%%xmm4 \n" // kRGBToV
"movdqa 0x20(%4),%%xmm3 \n" // kRGBToU
"movdqa 0x40(%4),%%xmm4 \n" // kRGBToV
"sub %1,%2 \n"
LABELALIGN
@ -1654,7 +1646,7 @@ void ARGBToUV444MatrixRow_SSSE3(const uint8_t* src_argb,
#else
"+rm"(width) // %3
#endif
: "r"(rgbuvconstants) // %4
: "r"(c) // %4
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
}
#endif // HAS_ARGBTOUV444ROW_SSSE3
@ -1665,10 +1657,10 @@ void ARGBToUV444MatrixRow_AVX2(const uint8_t* src_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width,
const struct RgbUVConstants* rgbuvconstants) {
const struct ArgbConstants* c) {
asm volatile(
"vbroadcastf128 0x0(%4),%%ymm3 \n" // kRGBToU
"vbroadcastf128 0x10(%4),%%ymm4 \n" // kRGBToV
"vmovdqa 0x20(%4),%%ymm3 \n" // kRGBToU
"vmovdqa 0x40(%4),%%ymm4 \n" // kRGBToV
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // 0x8000
"vpsllw $15,%%ymm5,%%ymm5 \n"
"vmovdqa %5,%%ymm7 \n"
@ -1724,7 +1716,7 @@ void ARGBToUV444MatrixRow_AVX2(const uint8_t* src_argb,
#else
"+rm"(width) // %3
#endif
: "r"(rgbuvconstants), // %4
: "r"(c), // %4
"m"(kPermdARGBToY_AVX) // %5
: "memory", "cc", "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6",
"ymm7");
@ -1746,10 +1738,10 @@ void ARGBToUVMatrixRow_SSSE3(const uint8_t* src_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width,
const struct RgbUVConstants* rgbuvconstants) {
const struct ArgbConstants* c) {
asm volatile(
"movdqa 0x0(%5),%%xmm4 \n" // RGBToU
"movdqa 0x10(%5),%%xmm5 \n" // RGBToV
"movdqa 0x20(%5),%%xmm4 \n" // RGBToU
"movdqa 0x40(%5),%%xmm5 \n" // RGBToV
"pcmpeqb %%xmm6,%%xmm6 \n" // 0x0101
"pabsb %%xmm6,%%xmm6 \n"
"movdqa %6,%%xmm7 \n" // kShuffleAARRGGBB
@ -1803,7 +1795,7 @@ void ARGBToUVMatrixRow_SSSE3(const uint8_t* src_argb,
"+rm"(width) // %3
#endif
: "r"((intptr_t)(src_stride_argb)), // %4
"r"(rgbuvconstants), // %5
"r"(c), // %5
"m"(kShuffleAARRGGBB) // %6
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
"xmm7");
@ -1820,10 +1812,10 @@ void ARGBToUVMatrixRow_AVX2(const uint8_t* src_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width,
const struct RgbUVConstants* rgbuvconstants) {
const struct ArgbConstants* c) {
asm volatile(
"vbroadcastf128 0(%5),%%ymm4 \n" // RGBToU
"vbroadcastf128 0x10(%5),%%ymm5 \n" // RGBToV
"vbroadcastf128 0x20(%5),%%ymm4 \n" // RGBToU
"vbroadcastf128 0x40(%5),%%ymm5 \n" // RGBToV
"vpcmpeqb %%ymm6,%%ymm6,%%ymm6 \n" // 0x0101
"vpabsb %%ymm6,%%ymm6 \n"
"vmovdqa %6,%%ymm7 \n" // kShuffleAARRGGBB
@ -1878,15 +1870,13 @@ void ARGBToUVMatrixRow_AVX2(const uint8_t* src_argb,
"+rm"(width) // %3
#endif
: "r"((intptr_t)(src_stride_argb)), // %4
"r"(rgbuvconstants), // %5
"r"(c), // %5
"m"(kShuffleAARRGGBB) // %6
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
"xmm7");
}
#endif // HAS_ARGBTOUVROW_AVX2
#if defined(HAS_ARGBTOUV444ROW_SSSE3) || defined(HAS_ARGBTOUVROW_AVX2)
// RGB to BT601 coefficients
// UB 0.875 coefficient = 112
// UG -0.5781 coefficient = -74
@ -1895,30 +1885,13 @@ void ARGBToUVMatrixRow_AVX2(const uint8_t* src_argb,
// VG -0.7344 coefficient = -94
// VR 0.875 coefficient = 112
static const struct RgbUVConstants kARGBI601UVConstants = {
{-112, 74, 38, 0, -112, 74, 38, 0, -112, 74, 38, 0, -112, 74, 38, 0},
{18, 94, -112, 0, 18, 94, -112, 0, 18, 94, -112, 0, 18, 94, -112, 0}};
static const struct RgbUVConstants kABGRI601UVConstants = {
{38, 74, -112, 0, 38, 74, -112, 0, 38, 74, -112, 0, 38, 74, -112, 0},
{-112, 94, 18, 0, -112, 94, 18, 0, -112, 94, 18, 0, -112, 94, 18, 0}};
static const struct RgbUVConstants kBGRAI601UVConstants = {
{0, 38, 74, -112, 0, 38, 74, -112, 0, 38, 74, -112, 0, 38, 74, -112},
{0, -112, 94, 18, 0, -112, 94, 18, 0, -112, 94, 18, 0, -112, 94, 18}};
static const struct RgbUVConstants kRGBAI601UVConstants = {
{0, -112, 74, 38, 0, -112, 74, 38, 0, -112, 74, 38, 0, -112, 74, 38},
{0, 18, 94, -112, 0, 18, 94, -112, 0, 18, 94, -112, 0, 18, 94, -112}};
#endif
#ifdef HAS_ARGBTOUV444ROW_SSSE3
void ARGBToUV444Row_SSSE3(const uint8_t* src_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width) {
ARGBToUV444MatrixRow_SSSE3(src_argb, dst_u, dst_v, width,
&kARGBI601UVConstants);
&kArgbI601Constants);
}
#endif // HAS_ARGBTOUV444ROW_SSSE3
@ -1927,8 +1900,7 @@ void ARGBToUV444Row_AVX2(const uint8_t* src_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width) {
ARGBToUV444MatrixRow_AVX2(src_argb, dst_u, dst_v, width,
&kARGBI601UVConstants);
ARGBToUV444MatrixRow_AVX2(src_argb, dst_u, dst_v, width, &kArgbI601Constants);
}
#endif // HAS_ARGBTOUV444ROW_AVX2
@ -1939,7 +1911,7 @@ void ARGBToUVRow_SSSE3(const uint8_t* src_argb,
uint8_t* dst_v,
int width) {
ARGBToUVMatrixRow_SSSE3(src_argb, src_stride_argb, dst_u, dst_v, width,
&kARGBI601UVConstants);
&kArgbI601Constants);
}
void ABGRToUVRow_SSSE3(const uint8_t* src_abgr,
@ -1948,7 +1920,7 @@ void ABGRToUVRow_SSSE3(const uint8_t* src_abgr,
uint8_t* dst_v,
int width) {
ARGBToUVMatrixRow_SSSE3(src_abgr, src_stride_abgr, dst_u, dst_v, width,
&kABGRI601UVConstants);
&kAbgrI601Constants);
}
void BGRAToUVRow_SSSE3(const uint8_t* src_bgra,
@ -1957,7 +1929,7 @@ void BGRAToUVRow_SSSE3(const uint8_t* src_bgra,
uint8_t* dst_v,
int width) {
ARGBToUVMatrixRow_SSSE3(src_bgra, src_stride_bgra, dst_u, dst_v, width,
&kBGRAI601UVConstants);
&kBgraI601Constants);
}
void RGBAToUVRow_SSSE3(const uint8_t* src_rgba,
@ -1966,7 +1938,7 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba,
uint8_t* dst_v,
int width) {
ARGBToUVMatrixRow_SSSE3(src_rgba, src_stride_rgba, dst_u, dst_v, width,
&kRGBAI601UVConstants);
&kRgbaI601Constants);
}
#endif // HAS_ARGBTOUVROW_SSSE3
@ -1977,7 +1949,7 @@ void ARGBToUVRow_AVX2(const uint8_t* src_argb,
uint8_t* dst_v,
int width) {
ARGBToUVMatrixRow_AVX2(src_argb, src_stride_argb, dst_u, dst_v, width,
&kARGBI601UVConstants);
&kArgbI601Constants);
}
void ABGRToUVRow_AVX2(const uint8_t* src_abgr,
@ -1986,31 +1958,18 @@ void ABGRToUVRow_AVX2(const uint8_t* src_abgr,
uint8_t* dst_v,
int width) {
ARGBToUVMatrixRow_AVX2(src_abgr, src_stride_abgr, dst_u, dst_v, width,
&kABGRI601UVConstants);
&kAbgrI601Constants);
}
#endif // HAS_ARGBTOUVROW_AVX2
#ifdef HAS_ARGBTOUVJ444ROW_SSSE3
// RGB to JPEG coefficients
// UB 0.500 coefficient = 128
// UG -0.33126 coefficient = -85
// UR -0.16874 coefficient = -43
// VB -0.08131 coefficient = -21
// VG -0.41869 coefficient = -107
// VR 0.500 coefficient = 128
static const struct RgbUVConstants kARGBJPEGUVConstants = {
{-128, 85, 43, 0, -128, 85, 43, 0, -128, 85, 43, 0, -128, 85, 43, 0},
{21, 107, -128, 0, 21, 107, -128, 0, 21, 107, -128, 0, 21, 107, -128, 0}};
void ARGBToUVJ444Row_SSSE3(const uint8_t* src_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width) {
ARGBToUV444MatrixRow_SSSE3(src_argb, dst_u, dst_v, width,
&kARGBJPEGUVConstants);
&kArgbJPEGConstants);
}
#endif // HAS_ARGBTOUVJ444ROW_SSSE3
#ifdef HAS_ARGBTOUVJ444ROW_AVX2
@ -2018,15 +1977,10 @@ void ARGBToUVJ444Row_AVX2(const uint8_t* src_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width) {
ARGBToUV444MatrixRow_AVX2(src_argb, dst_u, dst_v, width,
&kARGBJPEGUVConstants);
ARGBToUV444MatrixRow_AVX2(src_argb, dst_u, dst_v, width, &kArgbJPEGConstants);
}
#endif // HAS_ARGBTOUVJ444ROW_AVX2
static const struct RgbUVConstants kABGRJPEGUVConstants = {
{43, 85, -128, 0, 43, 85, -128, 0, 43, 85, -128, 0, 43, 85, -128, 0},
{-128, 107, 21, 0, -128, 107, 21, 0, -128, 107, 21, 0, -128, 107, 21, 0}};
#ifdef HAS_ARGBTOUVJROW_SSSE3
void ARGBToUVJRow_SSSE3(const uint8_t* src_argb,
int src_stride_argb,
@ -2034,7 +1988,7 @@ void ARGBToUVJRow_SSSE3(const uint8_t* src_argb,
uint8_t* dst_v,
int width) {
ARGBToUVMatrixRow_SSSE3(src_argb, src_stride_argb, dst_u, dst_v, width,
&kARGBJPEGUVConstants);
&kArgbJPEGConstants);
}
#endif // HAS_ARGBTOUVJROW_SSSE3
@ -2045,7 +1999,7 @@ void ABGRToUVJRow_SSSE3(const uint8_t* src_abgr,
uint8_t* dst_v,
int width) {
ARGBToUVMatrixRow_SSSE3(src_abgr, src_stride_abgr, dst_u, dst_v, width,
&kABGRJPEGUVConstants);
&kAbgrJPEGConstants);
}
#endif // HAS_ABGRTOUVJROW_SSSE3
@ -2056,7 +2010,7 @@ void ARGBToUVJRow_AVX2(const uint8_t* src_argb,
uint8_t* dst_v,
int width) {
ARGBToUVMatrixRow_AVX2(src_argb, src_stride_argb, dst_u, dst_v, width,
&kARGBJPEGUVConstants);
&kArgbJPEGConstants);
}
#endif // HAS_ARGBTOUVJROW_AVX2
@ -2067,7 +2021,7 @@ void ABGRToUVJRow_AVX2(const uint8_t* src_abgr,
uint8_t* dst_v,
int width) {
ARGBToUVMatrixRow_AVX2(src_abgr, src_stride_abgr, dst_u, dst_v, width,
&kABGRJPEGUVConstants);
&kAbgrJPEGConstants);
}
#endif // HAS_ABGRTOUVJROW_AVX2