mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-08 01:36:47 +08:00
I420ToARGB conversion with matrix.
Take color conversion constants as a parameter to row function for I420ToARGBMatrixRow_SSSE3. Allows future variations of color space using a single low level. R=harryjin@google.com BUG=libyuv:488 Review URL: https://webrtc-codereview.appspot.com/56669004 .
This commit is contained in:
parent
0bc626a5d7
commit
925c3d9e26
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 1475
|
||||
Version: 1476
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -88,6 +88,7 @@ extern "C" {
|
||||
#define HAS_I422TOARGB1555ROW_SSSE3
|
||||
#define HAS_I422TOARGB4444ROW_SSSE3
|
||||
#define HAS_I422TOARGBROW_SSSE3
|
||||
#define HAS_I422TOARGBMATRIXROW_SSSE3
|
||||
#define HAS_I422TOBGRAROW_SSSE3
|
||||
#define HAS_I422TORAWROW_SSSE3
|
||||
#define HAS_I422TORGB24ROW_SSSE3
|
||||
@ -161,6 +162,7 @@ extern "C" {
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined (_M_X64) && \
|
||||
(!defined(__clang__) || defined(__SSSE3__))
|
||||
#define HAS_I422TOARGBROW_SSSE3
|
||||
#define HAS_I422TOARGBMATRIXROW_SSSE3
|
||||
#endif
|
||||
|
||||
// GCC >= 4.7.0 required for AVX2.
|
||||
@ -223,6 +225,7 @@ extern "C" {
|
||||
#define HAS_I400TOARGBROW_AVX2
|
||||
#define HAS_I422TOABGRROW_AVX2
|
||||
#define HAS_I422TOARGBROW_AVX2
|
||||
#define HAS_I422TOARGBMATRIXROW_AVX2
|
||||
#define HAS_I422TOBGRAROW_AVX2
|
||||
#define HAS_I422TORAWROW_AVX2
|
||||
#define HAS_I422TORGB24ROW_AVX2
|
||||
@ -290,6 +293,8 @@ extern "C" {
|
||||
#define HAS_I422TOARGB1555ROW_NEON
|
||||
#define HAS_I422TOARGB4444ROW_NEON
|
||||
#define HAS_I422TOARGBROW_NEON
|
||||
// TODO(fbarchard): Implement NEON version
|
||||
#define HAS_I422TOARGBMATRIXROW_NEON
|
||||
#define HAS_I422TOBGRAROW_NEON
|
||||
#define HAS_I422TORAWROW_NEON
|
||||
#define HAS_I422TORGB24ROW_NEON
|
||||
@ -414,6 +419,21 @@ typedef uint32 ulvec32[8];
|
||||
typedef uint8 ulvec8[32];
|
||||
#endif
|
||||
|
||||
// This struct is for Intel color conversion.
|
||||
#if defined(_M_IX86) || defined(_M_X64) || \
|
||||
defined(__x86_64__) || defined(__i386__)
|
||||
|
||||
struct YuvConstants {
|
||||
lvec8 kUVToB;
|
||||
lvec8 kUVToG;
|
||||
lvec8 kUVToR;
|
||||
lvec16 kUVBiasB;
|
||||
lvec16 kUVBiasG;
|
||||
lvec16 kUVBiasR;
|
||||
lvec16 kYToRgb;
|
||||
};
|
||||
#endif
|
||||
|
||||
#if defined(__APPLE__) || defined(__x86_64__) || defined(__llvm__)
|
||||
#define OMITFP
|
||||
#else
|
||||
@ -509,6 +529,12 @@ void I422ToARGBRow_NEON(const uint8* src_y,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb,
|
||||
int width);
|
||||
void I422ToARGBMatrixRow_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* YuvConstants,
|
||||
int width);
|
||||
void I411ToARGBRow_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
@ -962,6 +988,12 @@ void I422ToARGBRow_C(const uint8* src_y,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb,
|
||||
int width);
|
||||
void I422ToARGBMatrixRow_C(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* YuvConstants,
|
||||
int width);
|
||||
void I411ToARGBRow_C(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
@ -1039,6 +1071,12 @@ void I422ToARGBRow_AVX2(const uint8* src_y,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb,
|
||||
int width);
|
||||
void I422ToARGBMatrixRow_AVX2(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* YuvConstants,
|
||||
int width);
|
||||
void I422ToBGRARow_AVX2(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
@ -1069,6 +1107,12 @@ void I422ToARGBRow_SSSE3(const uint8* src_y,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb,
|
||||
int width);
|
||||
void I422ToARGBMatrixRow_SSSE3(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* YuvConstants,
|
||||
int width);
|
||||
void I411ToARGBRow_SSSE3(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
@ -1203,6 +1247,12 @@ void I422ToARGBRow_Any_AVX2(const uint8* src_y,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb,
|
||||
int width);
|
||||
void I422ToARGBMatrixRow_Any_AVX2(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* YuvConstants,
|
||||
int width);
|
||||
void I422ToBGRARow_Any_AVX2(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
@ -1233,6 +1283,12 @@ void I422ToARGBRow_Any_SSSE3(const uint8* src_y,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb,
|
||||
int width);
|
||||
void I422ToARGBMatrixRow_Any_SSSE3(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* YuvConstants,
|
||||
int width);
|
||||
void I411ToARGBRow_Any_SSSE3(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
@ -1463,7 +1519,13 @@ void I422ToARGBRow_Any_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* YuvConstants,
|
||||
int width);
|
||||
void I422ToARGBMatrixRow_Any_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb,
|
||||
int width);
|
||||
void I411ToARGBRow_Any_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 1475
|
||||
#define LIBYUV_VERSION 1476
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||
|
||||
@ -2156,6 +2156,51 @@ void I422ToUYVYRow_C(const uint8* src_y,
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(HAS_I422TOARGBMATRIXROW_SSSE3)
|
||||
extern struct YuvConstants kYuvConstants;
|
||||
extern struct YuvConstants kYuvJConstants;
|
||||
|
||||
// JPeg color space version of I422ToARGB
|
||||
void J422ToARGBRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
int width) {
|
||||
I422ToARGBMatrixRow_SSSE3(y_buf, u_buf, v_buf, dst_argb,
|
||||
&kYuvJConstants, width);
|
||||
}
|
||||
|
||||
void I422ToARGBRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
int width) {
|
||||
I422ToARGBMatrixRow_SSSE3(y_buf, u_buf, v_buf, dst_argb,
|
||||
&kYuvConstants, width);
|
||||
}
|
||||
|
||||
#if defined(HAS_I422TOARGBMATRIXROW_AVX2)
|
||||
// JPeg color space version of I422ToARGB
|
||||
void J422ToARGBRow_AVX2(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
int width) {
|
||||
I422ToARGBMatrixRow_AVX2(y_buf, u_buf, v_buf, dst_argb,
|
||||
&kYuvJConstants, width);
|
||||
}
|
||||
|
||||
void I422ToARGBRow_AVX2(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
int width) {
|
||||
I422ToARGBMatrixRow_AVX2(y_buf, u_buf, v_buf, dst_argb,
|
||||
&kYuvConstants, width);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// Maximum temporary width for wrappers to process at a time, in pixels.
|
||||
#define MAXTWIDTH 2048
|
||||
|
||||
|
||||
@ -1319,16 +1319,6 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0, int src_stride_rgba,
|
||||
|
||||
#if defined(HAS_I422TOARGBROW_SSSE3) || defined(HAS_I422TOARGBROW_AVX2)
|
||||
|
||||
struct YuvConstants {
|
||||
lvec8 kUVToB; // 0
|
||||
lvec8 kUVToG; // 32
|
||||
lvec8 kUVToR; // 64
|
||||
lvec16 kUVBiasB; // 96
|
||||
lvec16 kUVBiasG; // 128
|
||||
lvec16 kUVBiasR; // 160
|
||||
lvec16 kYToRgb; // 192
|
||||
};
|
||||
|
||||
// BT.601 YUV to RGB reference
|
||||
// R = (Y - 16) * 1.164 - V * -1.596
|
||||
// G = (Y - 16) * 1.164 - U * 0.391 - V * 0.813
|
||||
@ -1351,7 +1341,7 @@ struct YuvConstants {
|
||||
#define BR (VR * 128 + YGB)
|
||||
|
||||
// BT601 constants for YUV to RGB.
|
||||
static YuvConstants SIMD_ALIGNED(kYuvConstants) = {
|
||||
YuvConstants SIMD_ALIGNED(kYuvConstants) = {
|
||||
{ UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
|
||||
UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0 },
|
||||
{ UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,
|
||||
@ -1365,7 +1355,7 @@ static YuvConstants SIMD_ALIGNED(kYuvConstants) = {
|
||||
};
|
||||
|
||||
// BT601 constants for NV21 where chroma plane is VU instead of UV.
|
||||
static YuvConstants SIMD_ALIGNED(kYvuConstants) = {
|
||||
YuvConstants SIMD_ALIGNED(kYvuConstants) = {
|
||||
{ 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB,
|
||||
0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB },
|
||||
{ VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
|
||||
@ -1658,11 +1648,12 @@ void OMITFP I422ToRAWRow_SSSE3(const uint8* y_buf,
|
||||
);
|
||||
}
|
||||
|
||||
void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
int width) {
|
||||
void OMITFP I422ToARGBMatrixRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* YuvConstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||
@ -1678,33 +1669,7 @@ void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf,
|
||||
[v_buf]"+r"(v_buf), // %[v_buf]
|
||||
[dst_argb]"+r"(dst_argb), // %[dst_argb]
|
||||
[width]"+rm"(width) // %[width]
|
||||
: [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
|
||||
: "memory", "cc", NACL_R14
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
|
||||
);
|
||||
}
|
||||
|
||||
void OMITFP J422ToARGBRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
READYUV422
|
||||
YUVTORGB(kYuvConstants)
|
||||
STOREARGB
|
||||
"sub $0x8,%[width] \n"
|
||||
"jg 1b \n"
|
||||
: [y_buf]"+r"(y_buf), // %[y_buf]
|
||||
[u_buf]"+r"(u_buf), // %[u_buf]
|
||||
[v_buf]"+r"(v_buf), // %[v_buf]
|
||||
[dst_argb]"+r"(dst_argb), // %[dst_argb]
|
||||
[width]"+rm"(width) // %[width]
|
||||
: [kYuvConstants]"r"(&kYuvJConstants.kUVToB) // %[kYuvConstants]
|
||||
: [kYuvConstants]"r"(YuvConstants) // %[YuvConstants]
|
||||
: "memory", "cc", NACL_R14
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
|
||||
);
|
||||
@ -1939,14 +1904,15 @@ void OMITFP I422ToBGRARow_AVX2(const uint8* y_buf,
|
||||
}
|
||||
#endif // HAS_I422TOBGRAROW_AVX2
|
||||
|
||||
#if defined(HAS_I422TOARGBROW_AVX2)
|
||||
#if defined(HAS_I422TOARGBMATRIXROW_AVX2)
|
||||
// 16 pixels
|
||||
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
|
||||
void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
int width) {
|
||||
void OMITFP I422ToARGBMatrixRow_AVX2(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* YuvConstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
|
||||
@ -1974,54 +1940,12 @@ void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf,
|
||||
[v_buf]"+r"(v_buf), // %[v_buf]
|
||||
[dst_argb]"+r"(dst_argb), // %[dst_argb]
|
||||
[width]"+rm"(width) // %[width]
|
||||
: [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
|
||||
: [kYuvConstants]"r"(YuvConstants) // %[YuvConstants]
|
||||
: "memory", "cc", NACL_R14
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
|
||||
);
|
||||
}
|
||||
#endif // HAS_I422TOARGBROW_AVX2
|
||||
|
||||
#if defined(HAS_J422TOARGBROW_AVX2)
|
||||
// 16 pixels
|
||||
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
|
||||
void OMITFP J422ToARGBRow_AVX2(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
READYUV422_AVX2
|
||||
YUVTORGB_AVX2(kYuvConstants)
|
||||
|
||||
// Step 3: Weave into ARGB
|
||||
"vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" // BG
|
||||
"vpermq $0xd8,%%ymm0,%%ymm0 \n"
|
||||
"vpunpcklbw %%ymm5,%%ymm2,%%ymm2 \n" // RA
|
||||
"vpermq $0xd8,%%ymm2,%%ymm2 \n"
|
||||
"vpunpcklwd %%ymm2,%%ymm0,%%ymm1 \n" // BGRA first 8 pixels
|
||||
"vpunpckhwd %%ymm2,%%ymm0,%%ymm0 \n" // BGRA next 8 pixels
|
||||
|
||||
"vmovdqu %%ymm1," MEMACCESS([dst_argb]) "\n"
|
||||
"vmovdqu %%ymm0," MEMACCESS2(0x20,[dst_argb]) "\n"
|
||||
"lea " MEMLEA(0x40,[dst_argb]) ",%[dst_argb] \n"
|
||||
"sub $0x10,%[width] \n"
|
||||
"jg 1b \n"
|
||||
"vzeroupper \n"
|
||||
: [y_buf]"+r"(y_buf), // %[y_buf]
|
||||
[u_buf]"+r"(u_buf), // %[u_buf]
|
||||
[v_buf]"+r"(v_buf), // %[v_buf]
|
||||
[dst_argb]"+r"(dst_argb), // %[dst_argb]
|
||||
[width]"+rm"(width) // %[width]
|
||||
: [kYuvConstants]"r"(&kYuvJConstants.kUVToB) // %[kYuvConstants]
|
||||
: "memory", "cc", NACL_R14
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
|
||||
);
|
||||
}
|
||||
#endif // HAS_J422TOARGBROW_AVX2
|
||||
#endif // HAS_I422TOARGBMATRIXROW_AVX2
|
||||
|
||||
#if defined(HAS_I422TOABGRROW_AVX2)
|
||||
// 16 pixels
|
||||
|
||||
@ -25,16 +25,6 @@ extern "C" {
|
||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||
(defined(_M_IX86) || (defined(_M_X64) && !defined(__clang__)))
|
||||
|
||||
struct YuvConstants {
|
||||
lvec8 kUVToB;
|
||||
lvec8 kUVToG;
|
||||
lvec8 kUVToR;
|
||||
lvec16 kUVBiasB;
|
||||
lvec16 kUVBiasG;
|
||||
lvec16 kUVBiasR;
|
||||
lvec16 kYToRgb;
|
||||
};
|
||||
|
||||
#define KUVTOB 0
|
||||
#define KUVTOG 32
|
||||
#define KUVTOR 64
|
||||
@ -65,7 +55,7 @@ struct YuvConstants {
|
||||
#define BR (VR * 128 + YGB)
|
||||
|
||||
// BT601 constants for YUV to RGB.
|
||||
static YuvConstants SIMD_ALIGNED(kYuvConstants) = {
|
||||
YuvConstants SIMD_ALIGNED(kYuvConstants) = {
|
||||
{ UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
|
||||
UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0 },
|
||||
{ UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,
|
||||
@ -79,7 +69,7 @@ static YuvConstants SIMD_ALIGNED(kYuvConstants) = {
|
||||
};
|
||||
|
||||
// BT601 constants for NV21 where chroma plane is VU instead of UV.
|
||||
static YuvConstants SIMD_ALIGNED(kYvuConstants) = {
|
||||
YuvConstants SIMD_ALIGNED(kYvuConstants) = {
|
||||
{ 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB,
|
||||
0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB },
|
||||
{ VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
|
||||
@ -124,7 +114,7 @@ static YuvConstants SIMD_ALIGNED(kYvuConstants) = {
|
||||
#define BRJ (VRJ * 128 + YGBJ)
|
||||
|
||||
// JPEG constants for YUV to RGB.
|
||||
static YuvConstants SIMD_ALIGNED(kYuvJConstants) = {
|
||||
YuvConstants SIMD_ALIGNED(kYuvJConstants) = {
|
||||
{ UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0,
|
||||
UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0 },
|
||||
{ UGJ, VGJ, UGJ, VGJ, UGJ, VGJ, UGJ, VGJ,
|
||||
@ -155,12 +145,13 @@ static YuvConstants SIMD_ALIGNED(kYuvJConstants) = {
|
||||
|
||||
// 64 bit
|
||||
#if defined(_M_X64)
|
||||
#if defined(HAS_I422TOARGBROW_SSSE3)
|
||||
void I422ToARGBRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
int width) {
|
||||
#if defined(HAS_I422TOARGBMATRIXROW_SSSE3)
|
||||
void I422ToARGBMatrixRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* YuvConstants,
|
||||
int width) {
|
||||
__m128i xmm0, xmm1, xmm2, xmm3;
|
||||
const __m128i xmm5 = _mm_set1_epi8(-1);
|
||||
const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf;
|
||||
@ -172,15 +163,15 @@ void I422ToARGBRow_SSSE3(const uint8* y_buf,
|
||||
xmm0 = _mm_unpacklo_epi16(xmm0, xmm0);
|
||||
xmm1 = _mm_loadu_si128(&xmm0);
|
||||
xmm2 = _mm_loadu_si128(&xmm0);
|
||||
xmm0 = _mm_maddubs_epi16(xmm0, *(__m128i*)kYuvConstants.kUVToB);
|
||||
xmm1 = _mm_maddubs_epi16(xmm1, *(__m128i*)kYuvConstants.kUVToG);
|
||||
xmm2 = _mm_maddubs_epi16(xmm2, *(__m128i*)kYuvConstants.kUVToR);
|
||||
xmm0 = _mm_sub_epi16(*(__m128i*)kYuvConstants.kUVBiasB, xmm0);
|
||||
xmm1 = _mm_sub_epi16(*(__m128i*)kYuvConstants.kUVBiasG, xmm1);
|
||||
xmm2 = _mm_sub_epi16(*(__m128i*)kYuvConstants.kUVBiasR, xmm2);
|
||||
xmm0 = _mm_maddubs_epi16(xmm0, *(__m128i*)YuvConstants->kUVToB);
|
||||
xmm1 = _mm_maddubs_epi16(xmm1, *(__m128i*)YuvConstants->kUVToG);
|
||||
xmm2 = _mm_maddubs_epi16(xmm2, *(__m128i*)YuvConstants->kUVToR);
|
||||
xmm0 = _mm_sub_epi16(*(__m128i*)YuvConstants->kUVBiasB, xmm0);
|
||||
xmm1 = _mm_sub_epi16(*(__m128i*)YuvConstants->kUVBiasG, xmm1);
|
||||
xmm2 = _mm_sub_epi16(*(__m128i*)YuvConstants->kUVBiasR, xmm2);
|
||||
xmm3 = _mm_loadl_epi64((__m128i*)y_buf);
|
||||
xmm3 = _mm_unpacklo_epi8(xmm3, xmm3);
|
||||
xmm3 = _mm_mulhi_epu16(xmm3, *(__m128i*)kYuvConstants.kYToRgb);
|
||||
xmm3 = _mm_mulhi_epu16(xmm3, *(__m128i*)YuvConstants->kYToRgb);
|
||||
xmm0 = _mm_adds_epi16(xmm0, xmm3);
|
||||
xmm1 = _mm_adds_epi16(xmm1, xmm3);
|
||||
xmm2 = _mm_adds_epi16(xmm2, xmm3);
|
||||
@ -2012,77 +2003,45 @@ void RGBAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
||||
__asm lea edx, [edx + 64] \
|
||||
}
|
||||
|
||||
#ifdef HAS_I422TOARGBROW_AVX2
|
||||
#ifdef HAS_I422TOARGBMATRIXROW_AVX2
|
||||
// 16 pixels
|
||||
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
|
||||
__declspec(naked)
|
||||
void I422ToARGBRow_AVX2(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
int width) {
|
||||
void I422ToARGBMatrixRow_AVX2(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* YuvConstants,
|
||||
int width) {
|
||||
__asm {
|
||||
push esi
|
||||
push edi
|
||||
mov eax, [esp + 8 + 4] // Y
|
||||
mov esi, [esp + 8 + 8] // U
|
||||
mov edi, [esp + 8 + 12] // V
|
||||
mov edx, [esp + 8 + 16] // argb
|
||||
mov ecx, [esp + 8 + 20] // width
|
||||
push ebp
|
||||
mov eax, [esp + 12 + 4] // Y
|
||||
mov esi, [esp + 12 + 8] // U
|
||||
mov edi, [esp + 12 + 12] // V
|
||||
mov edx, [esp + 12 + 16] // argb
|
||||
mov ebp, [esp + 12 + 20] // YuvConstants
|
||||
mov ecx, [esp + 12 + 20] // width
|
||||
sub edi, esi
|
||||
vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
|
||||
|
||||
convertloop:
|
||||
READYUV422_AVX2
|
||||
YUVTORGB_AVX2(kYuvConstants)
|
||||
YUVTORGB_AVX2(ebp)
|
||||
STOREARGB_AVX2
|
||||
|
||||
sub ecx, 16
|
||||
jg convertloop
|
||||
|
||||
pop ebp
|
||||
pop edi
|
||||
pop esi
|
||||
vzeroupper
|
||||
ret
|
||||
}
|
||||
}
|
||||
#endif // HAS_I422TOARGBROW_AVX2
|
||||
|
||||
#ifdef HAS_J422TOARGBROW_AVX2
|
||||
// 16 pixels
|
||||
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
|
||||
__declspec(naked)
|
||||
void J422ToARGBRow_AVX2(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
int width) {
|
||||
__asm {
|
||||
push esi
|
||||
push edi
|
||||
mov eax, [esp + 8 + 4] // Y
|
||||
mov esi, [esp + 8 + 8] // U
|
||||
mov edi, [esp + 8 + 12] // V
|
||||
mov edx, [esp + 8 + 16] // argb
|
||||
mov ecx, [esp + 8 + 20] // width
|
||||
sub edi, esi
|
||||
vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
|
||||
|
||||
convertloop:
|
||||
READYUV422_AVX2
|
||||
YUVTORGB_AVX2(kYuvJConstants)
|
||||
STOREARGB_AVX2
|
||||
|
||||
sub ecx, 16
|
||||
jg convertloop
|
||||
|
||||
pop edi
|
||||
pop esi
|
||||
vzeroupper
|
||||
ret
|
||||
}
|
||||
}
|
||||
#endif // HAS_J422TOARGBROW_AVX2
|
||||
#endif // HAS_I422TOARGBMATRIXROW_AVX2
|
||||
|
||||
#ifdef HAS_I444TOARGBROW_AVX2
|
||||
// 16 pixels
|
||||
@ -2691,11 +2650,12 @@ void I422ToRGB565Row_SSSE3(const uint8* y_buf,
|
||||
// 8 pixels.
|
||||
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
|
||||
__declspec(naked)
|
||||
void I422ToARGBRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
int width) {
|
||||
void I422ToARGBMatrixRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* YuvConstants,
|
||||
int width) {
|
||||
__asm {
|
||||
push esi
|
||||
push edi
|
||||
@ -2704,8 +2664,9 @@ void I422ToARGBRow_SSSE3(const uint8* y_buf,
|
||||
mov esi, [esp + 12 + 8] // U
|
||||
mov edi, [esp + 12 + 12] // V
|
||||
mov edx, [esp + 12 + 16] // argb
|
||||
mov ecx, [esp + 12 + 20] // width
|
||||
lea ebp, kYuvConstants
|
||||
mov ebp, [esp + 12 + 20] // YuvConstants
|
||||
mov ecx, [esp + 12 + 24] // width
|
||||
|
||||
sub edi, esi
|
||||
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
|
||||
|
||||
@ -2724,40 +2685,6 @@ void I422ToARGBRow_SSSE3(const uint8* y_buf,
|
||||
}
|
||||
}
|
||||
|
||||
// 8 pixels.
|
||||
// JPeg color space version of I422ToARGB
|
||||
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
|
||||
__declspec(naked)
|
||||
void J422ToARGBRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
int width) {
|
||||
__asm {
|
||||
push esi
|
||||
push edi
|
||||
mov eax, [esp + 8 + 4] // Y
|
||||
mov esi, [esp + 8 + 8] // U
|
||||
mov edi, [esp + 8 + 12] // V
|
||||
mov edx, [esp + 8 + 16] // argb
|
||||
mov ecx, [esp + 8 + 20] // width
|
||||
sub edi, esi
|
||||
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
|
||||
|
||||
convertloop:
|
||||
READYUV422
|
||||
YUVTORGB(kYuvJConstants)
|
||||
STOREARGB
|
||||
|
||||
sub ecx, 8
|
||||
jg convertloop
|
||||
|
||||
pop edi
|
||||
pop esi
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
// 8 pixels.
|
||||
// 2 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
|
||||
// Similar to I420 but duplicate UV once more.
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user