diff --git a/include/libyuv/convert_from_argb.h b/include/libyuv/convert_from_argb.h index 284462d60..c0473fd70 100644 --- a/include/libyuv/convert_from_argb.h +++ b/include/libyuv/convert_from_argb.h @@ -23,7 +23,7 @@ extern "C" { // (bgra in memory) you'll want to use the kArgb* constants. Alternatively, if // your input is ABGR little endian (rgba in memory) you'd use the kAbgr* ones. // -// Conversion matrix for RGB to YUV. +// Conversion matrix for xRGB to YUV. LIBYUV_API extern const struct ArgbConstants kArgbI601Constants; // BT.601 LIBYUV_API extern const struct ArgbConstants kArgbJPEGConstants; // BT.601 full LIBYUV_API extern const struct ArgbConstants kArgbH709Constants; // BT.709 @@ -32,7 +32,7 @@ LIBYUV_API extern const struct ArgbConstants kArgbU2020Constants; // BT.2020 LIBYUV_API extern const struct ArgbConstants kArgbV2020Constants; // BT.2020 full -// Conversion matrix for BGR to YUV. +// Conversion matrix for xBGR to YUV. LIBYUV_API extern const struct ArgbConstants kAbgrI601Constants; // BT.601 LIBYUV_API extern const struct ArgbConstants kAbgrJPEGConstants; // BT.601 full LIBYUV_API extern const struct ArgbConstants kAbgrH709Constants; // BT.709 @@ -41,6 +41,24 @@ LIBYUV_API extern const struct ArgbConstants kAbgrU2020Constants; // BT.2020 LIBYUV_API extern const struct ArgbConstants kAbgrV2020Constants; // BT.2020 full +// Conversion matrix for RGBx to YUV. +LIBYUV_API extern const struct ArgbConstants kRgbaI601Constants; // BT.601 +LIBYUV_API extern const struct ArgbConstants kRgbaJPEGConstants; // BT.601 full +LIBYUV_API extern const struct ArgbConstants kRgbaH709Constants; // BT.709 +LIBYUV_API extern const struct ArgbConstants kRgbaF709Constants; // BT.709 full +LIBYUV_API extern const struct ArgbConstants kRgbaU2020Constants; // BT.2020 +LIBYUV_API extern const struct ArgbConstants + kRgbaV2020Constants; // BT.2020 full + +// Conversion matrix from BGRx to YUV. +LIBYUV_API extern const struct ArgbConstants kBgraI601Constants; // BT.601 +LIBYUV_API extern const struct ArgbConstants kBgraJPEGConstants; // BT.601 full +LIBYUV_API extern const struct ArgbConstants kBgraH709Constants; // BT.709 +LIBYUV_API extern const struct ArgbConstants kBgraF709Constants; // BT.709 full +LIBYUV_API extern const struct ArgbConstants kBgraU2020Constants; // BT.2020 +LIBYUV_API extern const struct ArgbConstants + kBgraV2020Constants; // BT.2020 full + // Copy ARGB to ARGB. #define ARGBToARGB ARGBCopy LIBYUV_API diff --git a/include/libyuv/row.h b/include/libyuv/row.h index f2e352328..034ff866e 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -273,6 +273,8 @@ extern "C" { #define HAS_ARGBTOUVROW_SSSE3 #define HAS_BGRATOUVROW_SSSE3 #define HAS_RGBATOUVROW_SSSE3 +#define HAS_ARGBTOUVMATRIXROW_SSSE3 +#define HAS_ARGBTOUV444MATRIXROW_SSSE3 #if defined(__x86_64__) || !defined(__pic__) // TODO(fbarchard): fix build error on android_full_debug=1 @@ -305,6 +307,8 @@ extern "C" { #define HAS_ARGBTOUVJ444ROW_AVX2 #define HAS_ARGBTOUVJROW_AVX2 #define HAS_ARGBTOUVROW_AVX2 +#define HAS_ARGBTOUVMATRIXROW_AVX2 +#define HAS_ARGBTOUV444MATRIXROW_AVX2 #define HAS_ARGBTOYJROW_AVX2 #define HAS_ARGBTOYROW_AVX2 #define HAS_ARGBUNATTENUATEROW_AVX2 @@ -973,8 +977,8 @@ typedef uint8_t ulvec8[32]; struct ArgbConstants { uint8_t kRGBToY[32]; - int16_t kRGBToU[16]; - int16_t kRGBToV[16]; + int8_t kRGBToU[32]; + int8_t kRGBToV[32]; uint16_t kAddY[16]; uint16_t kAddUV[16]; }; @@ -2092,11 +2096,60 @@ void ARGBToUVMatrixRow_C(const uint8_t* src_argb, uint8_t* dst_v, int width, const struct ArgbConstants* c); +void ARGBToUVMatrixRow_SSSE3(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width, + const struct ArgbConstants* c); +void ARGBToUVMatrixRow_AVX2(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width, + const struct ArgbConstants* c); void ARGBToUV444MatrixRow_C(const uint8_t* src_argb, uint8_t* dst_u, uint8_t* dst_v, int width, const struct ArgbConstants* c); +void ARGBToUV444MatrixRow_SSSE3(const uint8_t* src_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width, + const struct ArgbConstants* c); +void ARGBToUV444MatrixRow_AVX2(const uint8_t* src_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width, + const struct ArgbConstants* c); +void ARGBToUVMatrixRow_Any_SSSE3(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width, + const struct ArgbConstants* c); +void ARGBToUVMatrixRow_Any_AVX2(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width, + const struct ArgbConstants* c); +void ARGBToUV444MatrixRow_Any_SSSE3(const uint8_t* src_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width, + const struct ArgbConstants* c); +void ARGBToUV444MatrixRow_Any_AVX2(const uint8_t* src_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width, + const struct ArgbConstants* c); +void ARGBToUV444MatrixRow_Any_AVX2(const uint8_t* src_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width, + const struct ArgbConstants* c); void ABGRToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width); void RGBAToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width); diff --git a/source/convert.cc b/source/convert.cc index f8aa29c69..e01442316 100644 --- a/source/convert.cc +++ b/source/convert.cc @@ -2163,6 +2163,22 @@ int ARGBToI420Matrix(const uint8_t* src_argb, uint8_t* dst_u, uint8_t* dst_v, int width, const struct ArgbConstants* c) = ARGBToUVMatrixRow_C; +#if defined(HAS_ARGBTOUVMATRIXROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_SSSE3; + if (IS_ALIGNED(width, 8)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX2; + if (IS_ALIGNED(width, 16)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX2; + } + } +#endif if (!src_argb || !dst_y || !dst_u || !dst_v || !argbconstants || width <= 0 || height == 0) { return -1; diff --git a/source/convert_from_argb.cc b/source/convert_from_argb.cc index 3e70b08ae..9c956c579 100644 --- a/source/convert_from_argb.cc +++ b/source/convert_from_argb.cc @@ -184,6 +184,22 @@ int ARGBToI444Matrix(const uint8_t* src_argb, uint8_t* dst_v, int width, const struct ArgbConstants* c) = ARGBToUV444MatrixRow_C; +#if defined(HAS_ARGBTOUV444MATRIXROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToUV444MatrixRow = ARGBToUV444MatrixRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToUV444MatrixRow = ARGBToUV444MatrixRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOUV444MATRIXROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToUV444MatrixRow = ARGBToUV444MatrixRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToUV444MatrixRow = ARGBToUV444MatrixRow_AVX2; + } + } +#endif if (!src_argb || !dst_y || !dst_u || !dst_v || !argbconstants || width <= 0 || height == 0) { return -1; @@ -384,6 +400,22 @@ int ARGBToI422Matrix(const uint8_t* src_argb, uint8_t* dst_u, uint8_t* dst_v, int width, const struct ArgbConstants* c) = ARGBToUVMatrixRow_C; +#if defined(HAS_ARGBTOUVMATRIXROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_SSSE3; + if (IS_ALIGNED(width, 8)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX2; + if (IS_ALIGNED(width, 16)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX2; + } + } +#endif if (!src_argb || !dst_y || !dst_u || !dst_v || !argbconstants || width <= 0 || height == 0) { return -1; @@ -630,6 +662,22 @@ int ARGBToNV12Matrix(const uint8_t* src_argb, uint8_t* dst_u, uint8_t* dst_v, int width, const struct ArgbConstants* c) = ARGBToUVMatrixRow_C; +#if defined(HAS_ARGBTOUVMATRIXROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_SSSE3; + if (IS_ALIGNED(width, 8)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX2; + if (IS_ALIGNED(width, 16)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX2; + } + } +#endif void (*MergeUVRow)(const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uv, int width) = MergeUVRow_C; if (!src_argb || !dst_y || !dst_uv || !argbconstants || width <= 0 || diff --git a/source/row_any.cc b/source/row_any.cc index cd0d3e76d..f44bcfb5c 100644 --- a/source/row_any.cc +++ b/source/row_any.cc @@ -2198,6 +2198,62 @@ ANY14(SplitARGBRow_Any_NEON, SplitARGBRow_NEON, 4, 15) memcpy(dst_v + (np >> 1), vout + 128, SS(r, 1)); \ } +#define ANY12M(NAMEANY, ANY_SIMD, BPP, MASK) \ + void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, \ + int width, const struct ArgbConstants* c) { \ + SIMD_ALIGNED(uint8_t vin[128]); \ + SIMD_ALIGNED(uint8_t vout[128 * 2]); \ + memset(vin, 0, sizeof(vin)); /* for msan */ \ + int r = width & MASK; \ + int n = width & ~MASK; \ + if (n > 0) { \ + ANY_SIMD(src_ptr, dst_u, dst_v, n, c); \ + } \ + memcpy(vin, src_ptr + (ptrdiff_t)n * BPP, (ptrdiff_t)r * BPP); \ + ANY_SIMD(vin, vout, vout + 128, MASK + 1, c); \ + memcpy(dst_u + (ptrdiff_t)n, vout, (ptrdiff_t)r); \ + memcpy(dst_v + (ptrdiff_t)n, vout + 128, (ptrdiff_t)r); \ + } + +#define ANY12MS(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK) \ + void NAMEANY(const uint8_t* src_ptr, int src_stride, uint8_t* dst_u, \ + uint8_t* dst_v, int width, const struct ArgbConstants* c) { \ + SIMD_ALIGNED(uint8_t vin[128 * 2]); \ + SIMD_ALIGNED(uint8_t vout[128 * 2]); \ + memset(vin, 0, sizeof(vin)); /* for msan */ \ + int r = width & MASK; \ + int n = width & ~MASK; \ + if (n > 0) { \ + ANY_SIMD(src_ptr, src_stride, dst_u, dst_v, n, c); \ + } \ + ptrdiff_t np = n; \ + memcpy(vin, src_ptr + (np >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \ + memcpy(vin + 128, src_ptr + src_stride + (np >> UVSHIFT) * BPP, \ + SS(r, UVSHIFT) * BPP); \ + if ((width & 1) && UVSHIFT == 0) { /* repeat last pixel for subsample */ \ + memcpy(vin + SS(r, UVSHIFT) * BPP, vin + SS(r, UVSHIFT) * BPP - BPP, \ + BPP); \ + memcpy(vin + 128 + SS(r, UVSHIFT) * BPP, \ + vin + 128 + SS(r, UVSHIFT) * BPP - BPP, BPP); \ + } \ + ANY_SIMD(vin, 128, vout, vout + 128, MASK + 1, c); \ + memcpy(dst_u + (np >> 1), vout, SS(r, 1)); \ + memcpy(dst_v + (np >> 1), vout + 128, SS(r, 1)); \ + } + +#ifdef HAS_ARGBTOUVMATRIXROW_AVX2 +ANY12MS(ARGBToUVMatrixRow_Any_AVX2, ARGBToUVMatrixRow_AVX2, 0, 4, 15) +#endif +#ifdef HAS_ARGBTOUVMATRIXROW_SSSE3 +ANY12MS(ARGBToUVMatrixRow_Any_SSSE3, ARGBToUVMatrixRow_SSSE3, 0, 4, 7) +#endif +#ifdef HAS_ARGBTOUV444MATRIXROW_AVX2 +ANY12M(ARGBToUV444MatrixRow_Any_AVX2, ARGBToUV444MatrixRow_AVX2, 4, 31) +#endif +#ifdef HAS_ARGBTOUV444MATRIXROW_SSSE3 +ANY12M(ARGBToUV444MatrixRow_Any_SSSE3, ARGBToUV444MatrixRow_SSSE3, 4, 15) +#endif + #ifdef HAS_ARGBTOUVROW_AVX2 ANY12S(ARGBToUVRow_Any_AVX2, ARGBToUVRow_AVX2, 0, 4, 31) #endif diff --git a/source/row_common.cc b/source/row_common.cc index 3a68c00e6..a9969d808 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -769,16 +769,16 @@ static __inline uint8_t RGBToUMatrix(uint8_t r, uint8_t g, uint8_t b, const struct ArgbConstants* c) { - return (c->kRGBToU[2] * r + c->kRGBToU[1] * g + c->kRGBToU[0] * b + - c->kAddUV[0]) >> + return (c->kAddUV[0] - + (c->kRGBToU[2] * r + c->kRGBToU[1] * g + c->kRGBToU[0] * b)) >> 8; } static __inline uint8_t RGBToVMatrix(uint8_t r, uint8_t g, uint8_t b, const struct ArgbConstants* c) { - return (c->kRGBToV[2] * r + c->kRGBToV[1] * g + c->kRGBToV[0] * b + - c->kAddUV[0]) >> + return (c->kAddUV[0] - + (c->kRGBToV[2] * r + c->kRGBToV[1] * g + c->kRGBToV[0] * b)) >> 8; } @@ -1486,13 +1486,16 @@ void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width) { {YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB}} #endif -#define ARGBCONSTANTSBODY(RY, GY, BY, RU, GU, BU, RV, GV, BV, AY, AUV) \ - {{BY, GY, RY, 0, BY, GY, RY, 0, BY, GY, RY, 0, BY, GY, RY, 0, \ - BY, GY, RY, 0, BY, GY, RY, 0, BY, GY, RY, 0, BY, GY, RY, 0}, \ - {BU, GU, RU, 0, BU, GU, RU, 0, BU, GU, RU, 0, BU, GU, RU, 0}, \ - {BV, GV, RV, 0, BV, GV, RV, 0, BV, GV, RV, 0, BV, GV, RV, 0}, \ - {AY, AY, AY, AY, AY, AY, AY, AY, AY, AY, AY, AY, AY, AY, AY, AY}, \ - {AUV, AUV, AUV, AUV, AUV, AUV, AUV, AUV, AUV, AUV, AUV, AUV, AUV, AUV, \ +#define ARGBCONSTANTSBODY(Y0, Y1, Y2, Y3, U0, U1, U2, U3, V0, V1, V2, V3, AY, \ + AUV) \ + {{Y0, Y1, Y2, Y3, Y0, Y1, Y2, Y3, Y0, Y1, Y2, Y3, Y0, Y1, Y2, Y3, \ + Y0, Y1, Y2, Y3, Y0, Y1, Y2, Y3, Y0, Y1, Y2, Y3, Y0, Y1, Y2, Y3}, \ + {U0, U1, U2, U3, U0, U1, U2, U3, U0, U1, U2, U3, U0, U1, U2, U3, \ + U0, U1, U2, U3, U0, U1, U2, U3, U0, U1, U2, U3, U0, U1, U2, U3}, \ + {V0, V1, V2, V3, V0, V1, V2, V3, V0, V1, V2, V3, V0, V1, V2, V3, \ + V0, V1, V2, V3, V0, V1, V2, V3, V0, V1, V2, V3, V0, V1, V2, V3}, \ + {AY, AY, AY, AY, AY, AY, AY, AY, AY, AY, AY, AY, AY, AY, AY, AY}, \ + {AUV, AUV, AUV, AUV, AUV, AUV, AUV, AUV, AUV, AUV, AUV, AUV, AUV, AUV, \ AUV, AUV}} // clang-format on @@ -1503,11 +1506,19 @@ void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width) { const struct YuvConstants SIMD_ALIGNED(kYvu##name##Constants) = \ YUVCONSTANTSBODY(YG, YB, VR, VG, UG, UB); -#define MAKEARGBCONSTANTS(name, RY, GY, BY, RU, GU, BU, RV, GV, BV, AY, AUV) \ - const struct ArgbConstants SIMD_ALIGNED(kArgb##name##Constants) = \ - ARGBCONSTANTSBODY(RY, GY, BY, RU, GU, BU, RV, GV, BV, AY, AUV); \ - const struct ArgbConstants SIMD_ALIGNED(kAbgr##name##Constants) = \ - ARGBCONSTANTSBODY(BY, GY, RY, BU, GU, RU, BV, GV, RV, AY, AUV); +#define MAKEARGBCONSTANTS(name, RY, GY, BY, RU, GU, BU, RV, GV, BV, AY, AUV) \ + const struct ArgbConstants SIMD_ALIGNED(kArgb##name##Constants) = \ + ARGBCONSTANTSBODY(BY, GY, RY, 0, -(BU), -(GU), -(RU), 0, -(BV), -(GV), \ + -(RV), 0, AY, AUV); \ + const struct ArgbConstants SIMD_ALIGNED(kAbgr##name##Constants) = \ + ARGBCONSTANTSBODY(RY, GY, BY, 0, -(RU), -(GU), -(BU), 0, -(RV), -(GV), \ + -(BV), 0, AY, AUV); \ + const struct ArgbConstants SIMD_ALIGNED(kRgba##name##Constants) = \ + ARGBCONSTANTSBODY(0, BY, GY, RY, 0, -(BU), -(GU), -(RU), 0, -(BV), \ + -(GV), -(RV), AY, AUV); \ + const struct ArgbConstants SIMD_ALIGNED(kBgra##name##Constants) = \ + ARGBCONSTANTSBODY(0, RY, GY, BY, 0, -(RU), -(GU), -(BU), 0, -(RV), \ + -(GV), -(BV), AY, AUV); // BT.601 limited range RGB to YUV coefficients // RY = round(0.299 * 219 / 255 * 256) = 66 diff --git a/source/row_gcc.cc b/source/row_gcc.cc index 9af3fc408..48998d323 100644 --- a/source/row_gcc.cc +++ b/source/row_gcc.cc @@ -9,6 +9,8 @@ */ #include "libyuv/row.h" +#include "libyuv/convert_from_argb.h" // For ArgbConstants + #ifdef __cplusplus namespace libyuv { extern "C" { @@ -1580,26 +1582,16 @@ void RGBAToYJRow_AVX2(const uint8_t* src_rgba, uint8_t* dst_y, int width) { #ifdef HAS_ARGBTOUV444ROW_SSSE3 -// Coefficients expressed as negatives to allow 128 -struct RgbUVConstants { - vec8 kRGBToU; - vec8 kRGBToV; -}; - -// Offsets into RgbUVConstants structure -#define KRGBTOU 0 -#define KRGBTOV 16 - void ARGBToUV444MatrixRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_u, uint8_t* dst_v, int width, - const struct RgbUVConstants* rgbuvconstants) { + const struct ArgbConstants* c) { asm volatile( "pcmpeqb %%xmm5,%%xmm5 \n" // 0x8000 "psllw $15,%%xmm5 \n" - "movdqa 0x0(%4),%%xmm3 \n" // kRGBToU - "movdqa 0x10(%4),%%xmm4 \n" // kRGBToV + "movdqa 0x20(%4),%%xmm3 \n" // kRGBToU + "movdqa 0x40(%4),%%xmm4 \n" // kRGBToV "sub %1,%2 \n" LABELALIGN @@ -1654,7 +1646,7 @@ void ARGBToUV444MatrixRow_SSSE3(const uint8_t* src_argb, #else "+rm"(width) // %3 #endif - : "r"(rgbuvconstants) // %4 + : "r"(c) // %4 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); } #endif // HAS_ARGBTOUV444ROW_SSSE3 @@ -1665,10 +1657,10 @@ void ARGBToUV444MatrixRow_AVX2(const uint8_t* src_argb, uint8_t* dst_u, uint8_t* dst_v, int width, - const struct RgbUVConstants* rgbuvconstants) { + const struct ArgbConstants* c) { asm volatile( - "vbroadcastf128 0x0(%4),%%ymm3 \n" // kRGBToU - "vbroadcastf128 0x10(%4),%%ymm4 \n" // kRGBToV + "vmovdqa 0x20(%4),%%ymm3 \n" // kRGBToU + "vmovdqa 0x40(%4),%%ymm4 \n" // kRGBToV "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // 0x8000 "vpsllw $15,%%ymm5,%%ymm5 \n" "vmovdqa %5,%%ymm7 \n" @@ -1724,7 +1716,7 @@ void ARGBToUV444MatrixRow_AVX2(const uint8_t* src_argb, #else "+rm"(width) // %3 #endif - : "r"(rgbuvconstants), // %4 + : "r"(c), // %4 "m"(kPermdARGBToY_AVX) // %5 : "memory", "cc", "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7"); @@ -1746,10 +1738,10 @@ void ARGBToUVMatrixRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_u, uint8_t* dst_v, int width, - const struct RgbUVConstants* rgbuvconstants) { + const struct ArgbConstants* c) { asm volatile( - "movdqa 0x0(%5),%%xmm4 \n" // RGBToU - "movdqa 0x10(%5),%%xmm5 \n" // RGBToV + "movdqa 0x20(%5),%%xmm4 \n" // RGBToU + "movdqa 0x40(%5),%%xmm5 \n" // RGBToV "pcmpeqb %%xmm6,%%xmm6 \n" // 0x0101 "pabsb %%xmm6,%%xmm6 \n" "movdqa %6,%%xmm7 \n" // kShuffleAARRGGBB @@ -1803,7 +1795,7 @@ void ARGBToUVMatrixRow_SSSE3(const uint8_t* src_argb, "+rm"(width) // %3 #endif : "r"((intptr_t)(src_stride_argb)), // %4 - "r"(rgbuvconstants), // %5 + "r"(c), // %5 "m"(kShuffleAARRGGBB) // %6 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"); @@ -1820,10 +1812,10 @@ void ARGBToUVMatrixRow_AVX2(const uint8_t* src_argb, uint8_t* dst_u, uint8_t* dst_v, int width, - const struct RgbUVConstants* rgbuvconstants) { + const struct ArgbConstants* c) { asm volatile( - "vbroadcastf128 0(%5),%%ymm4 \n" // RGBToU - "vbroadcastf128 0x10(%5),%%ymm5 \n" // RGBToV + "vbroadcastf128 0x20(%5),%%ymm4 \n" // RGBToU + "vbroadcastf128 0x40(%5),%%ymm5 \n" // RGBToV "vpcmpeqb %%ymm6,%%ymm6,%%ymm6 \n" // 0x0101 "vpabsb %%ymm6,%%ymm6 \n" "vmovdqa %6,%%ymm7 \n" // kShuffleAARRGGBB @@ -1878,15 +1870,13 @@ void ARGBToUVMatrixRow_AVX2(const uint8_t* src_argb, "+rm"(width) // %3 #endif : "r"((intptr_t)(src_stride_argb)), // %4 - "r"(rgbuvconstants), // %5 + "r"(c), // %5 "m"(kShuffleAARRGGBB) // %6 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"); } #endif // HAS_ARGBTOUVROW_AVX2 -#if defined(HAS_ARGBTOUV444ROW_SSSE3) || defined(HAS_ARGBTOUVROW_AVX2) - // RGB to BT601 coefficients // UB 0.875 coefficient = 112 // UG -0.5781 coefficient = -74 @@ -1895,30 +1885,13 @@ void ARGBToUVMatrixRow_AVX2(const uint8_t* src_argb, // VG -0.7344 coefficient = -94 // VR 0.875 coefficient = 112 -static const struct RgbUVConstants kARGBI601UVConstants = { - {-112, 74, 38, 0, -112, 74, 38, 0, -112, 74, 38, 0, -112, 74, 38, 0}, - {18, 94, -112, 0, 18, 94, -112, 0, 18, 94, -112, 0, 18, 94, -112, 0}}; - -static const struct RgbUVConstants kABGRI601UVConstants = { - {38, 74, -112, 0, 38, 74, -112, 0, 38, 74, -112, 0, 38, 74, -112, 0}, - {-112, 94, 18, 0, -112, 94, 18, 0, -112, 94, 18, 0, -112, 94, 18, 0}}; - -static const struct RgbUVConstants kBGRAI601UVConstants = { - {0, 38, 74, -112, 0, 38, 74, -112, 0, 38, 74, -112, 0, 38, 74, -112}, - {0, -112, 94, 18, 0, -112, 94, 18, 0, -112, 94, 18, 0, -112, 94, 18}}; - -static const struct RgbUVConstants kRGBAI601UVConstants = { - {0, -112, 74, 38, 0, -112, 74, 38, 0, -112, 74, 38, 0, -112, 74, 38}, - {0, 18, 94, -112, 0, 18, 94, -112, 0, 18, 94, -112, 0, 18, 94, -112}}; -#endif - #ifdef HAS_ARGBTOUV444ROW_SSSE3 void ARGBToUV444Row_SSSE3(const uint8_t* src_argb, uint8_t* dst_u, uint8_t* dst_v, int width) { ARGBToUV444MatrixRow_SSSE3(src_argb, dst_u, dst_v, width, - &kARGBI601UVConstants); + &kArgbI601Constants); } #endif // HAS_ARGBTOUV444ROW_SSSE3 @@ -1927,8 +1900,7 @@ void ARGBToUV444Row_AVX2(const uint8_t* src_argb, uint8_t* dst_u, uint8_t* dst_v, int width) { - ARGBToUV444MatrixRow_AVX2(src_argb, dst_u, dst_v, width, - &kARGBI601UVConstants); + ARGBToUV444MatrixRow_AVX2(src_argb, dst_u, dst_v, width, &kArgbI601Constants); } #endif // HAS_ARGBTOUV444ROW_AVX2 @@ -1939,7 +1911,7 @@ void ARGBToUVRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_v, int width) { ARGBToUVMatrixRow_SSSE3(src_argb, src_stride_argb, dst_u, dst_v, width, - &kARGBI601UVConstants); + &kArgbI601Constants); } void ABGRToUVRow_SSSE3(const uint8_t* src_abgr, @@ -1948,7 +1920,7 @@ void ABGRToUVRow_SSSE3(const uint8_t* src_abgr, uint8_t* dst_v, int width) { ARGBToUVMatrixRow_SSSE3(src_abgr, src_stride_abgr, dst_u, dst_v, width, - &kABGRI601UVConstants); + &kAbgrI601Constants); } void BGRAToUVRow_SSSE3(const uint8_t* src_bgra, @@ -1957,7 +1929,7 @@ void BGRAToUVRow_SSSE3(const uint8_t* src_bgra, uint8_t* dst_v, int width) { ARGBToUVMatrixRow_SSSE3(src_bgra, src_stride_bgra, dst_u, dst_v, width, - &kBGRAI601UVConstants); + &kBgraI601Constants); } void RGBAToUVRow_SSSE3(const uint8_t* src_rgba, @@ -1966,7 +1938,7 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba, uint8_t* dst_v, int width) { ARGBToUVMatrixRow_SSSE3(src_rgba, src_stride_rgba, dst_u, dst_v, width, - &kRGBAI601UVConstants); + &kRgbaI601Constants); } #endif // HAS_ARGBTOUVROW_SSSE3 @@ -1977,7 +1949,7 @@ void ARGBToUVRow_AVX2(const uint8_t* src_argb, uint8_t* dst_v, int width) { ARGBToUVMatrixRow_AVX2(src_argb, src_stride_argb, dst_u, dst_v, width, - &kARGBI601UVConstants); + &kArgbI601Constants); } void ABGRToUVRow_AVX2(const uint8_t* src_abgr, @@ -1986,31 +1958,18 @@ void ABGRToUVRow_AVX2(const uint8_t* src_abgr, uint8_t* dst_v, int width) { ARGBToUVMatrixRow_AVX2(src_abgr, src_stride_abgr, dst_u, dst_v, width, - &kABGRI601UVConstants); + &kAbgrI601Constants); } #endif // HAS_ARGBTOUVROW_AVX2 #ifdef HAS_ARGBTOUVJ444ROW_SSSE3 -// RGB to JPEG coefficients -// UB 0.500 coefficient = 128 -// UG -0.33126 coefficient = -85 -// UR -0.16874 coefficient = -43 -// VB -0.08131 coefficient = -21 -// VG -0.41869 coefficient = -107 -// VR 0.500 coefficient = 128 - -static const struct RgbUVConstants kARGBJPEGUVConstants = { - {-128, 85, 43, 0, -128, 85, 43, 0, -128, 85, 43, 0, -128, 85, 43, 0}, - {21, 107, -128, 0, 21, 107, -128, 0, 21, 107, -128, 0, 21, 107, -128, 0}}; - void ARGBToUVJ444Row_SSSE3(const uint8_t* src_argb, uint8_t* dst_u, uint8_t* dst_v, int width) { ARGBToUV444MatrixRow_SSSE3(src_argb, dst_u, dst_v, width, - &kARGBJPEGUVConstants); + &kArgbJPEGConstants); } - #endif // HAS_ARGBTOUVJ444ROW_SSSE3 #ifdef HAS_ARGBTOUVJ444ROW_AVX2 @@ -2018,15 +1977,10 @@ void ARGBToUVJ444Row_AVX2(const uint8_t* src_argb, uint8_t* dst_u, uint8_t* dst_v, int width) { - ARGBToUV444MatrixRow_AVX2(src_argb, dst_u, dst_v, width, - &kARGBJPEGUVConstants); + ARGBToUV444MatrixRow_AVX2(src_argb, dst_u, dst_v, width, &kArgbJPEGConstants); } #endif // HAS_ARGBTOUVJ444ROW_AVX2 -static const struct RgbUVConstants kABGRJPEGUVConstants = { - {43, 85, -128, 0, 43, 85, -128, 0, 43, 85, -128, 0, 43, 85, -128, 0}, - {-128, 107, 21, 0, -128, 107, 21, 0, -128, 107, 21, 0, -128, 107, 21, 0}}; - #ifdef HAS_ARGBTOUVJROW_SSSE3 void ARGBToUVJRow_SSSE3(const uint8_t* src_argb, int src_stride_argb, @@ -2034,7 +1988,7 @@ void ARGBToUVJRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_v, int width) { ARGBToUVMatrixRow_SSSE3(src_argb, src_stride_argb, dst_u, dst_v, width, - &kARGBJPEGUVConstants); + &kArgbJPEGConstants); } #endif // HAS_ARGBTOUVJROW_SSSE3 @@ -2045,7 +1999,7 @@ void ABGRToUVJRow_SSSE3(const uint8_t* src_abgr, uint8_t* dst_v, int width) { ARGBToUVMatrixRow_SSSE3(src_abgr, src_stride_abgr, dst_u, dst_v, width, - &kABGRJPEGUVConstants); + &kAbgrJPEGConstants); } #endif // HAS_ABGRTOUVJROW_SSSE3 @@ -2056,7 +2010,7 @@ void ARGBToUVJRow_AVX2(const uint8_t* src_argb, uint8_t* dst_v, int width) { ARGBToUVMatrixRow_AVX2(src_argb, src_stride_argb, dst_u, dst_v, width, - &kARGBJPEGUVConstants); + &kArgbJPEGConstants); } #endif // HAS_ARGBTOUVJROW_AVX2 @@ -2067,7 +2021,7 @@ void ABGRToUVJRow_AVX2(const uint8_t* src_abgr, uint8_t* dst_v, int width) { ARGBToUVMatrixRow_AVX2(src_abgr, src_stride_abgr, dst_u, dst_v, width, - &kABGRJPEGUVConstants); + &kAbgrJPEGConstants); } #endif // HAS_ABGRTOUVJROW_AVX2