mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
ARGBToUV use AVX2 for 64 bit x86
Skylake Was ARGBToJ420_Opt (312 ms) Now ARGBToJ420_Opt (242 ms) Icelake Was ARGBToJ420_Opt (302 ms) Now ARGBToJ420_Opt (220 ms) AMD Zen3 on Windows Was ARGBToJ420_Opt (305 ms) Now ARGBToJ420_Opt (216 ms) 32 bit x86 uses SSE Now ARGBToJ420_Opt (326 ms) MCA analysis of new AVX, SSE and old AVX https://godbolt.org/z/37bdazWYr Bug: None Change-Id: I72f5504407751e164c3558aebe836dd15223d65f Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/6957477 Reviewed-by: Justin Green <greenjustin@google.com>
This commit is contained in:
parent
b33794a586
commit
142db12947
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: https://chromium.googlesource.com/libyuv/libyuv/
|
URL: https://chromium.googlesource.com/libyuv/libyuv/
|
||||||
Version: 1918
|
Version: 1919
|
||||||
License: BSD-3-Clause
|
License: BSD-3-Clause
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
Shipped: yes
|
Shipped: yes
|
||||||
|
|||||||
@ -346,11 +346,6 @@ extern "C" {
|
|||||||
#define HAS_SPLITXRGBROW_AVX2
|
#define HAS_SPLITXRGBROW_AVX2
|
||||||
#define HAS_SWAPUVROW_AVX2
|
#define HAS_SWAPUVROW_AVX2
|
||||||
#define HAS_YUY2TONVUVROW_AVX2
|
#define HAS_YUY2TONVUVROW_AVX2
|
||||||
// TODO: Port SSSE3 to AVX2
|
|
||||||
// #define HAS_ABGRTOUVJROW_AVX2
|
|
||||||
// #define HAS_ABGRTOUVROW_AVX2
|
|
||||||
// #define HAS_ARGBTOUVJROW_AVX2
|
|
||||||
#define HAS_ARGBTOUVROW_AVX2
|
|
||||||
|
|
||||||
#if defined(__x86_64__) || !defined(__pic__)
|
#if defined(__x86_64__) || !defined(__pic__)
|
||||||
// TODO(fbarchard): fix build error on android_full_debug=1
|
// TODO(fbarchard): fix build error on android_full_debug=1
|
||||||
@ -360,6 +355,18 @@ extern "C" {
|
|||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// The following are available for AVX2 gcc/clang x64 platforms:
|
||||||
|
// TODO(fbarchard): Port to Visual C
|
||||||
|
// TODO(fbarchard): Port to x86 32 bit
|
||||||
|
#if !defined(LIBYUV_DISABLE_X86) && defined(__x86_64__) && \
|
||||||
|
(defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2)) && \
|
||||||
|
!defined(LIBYUV_ENABLE_ROWWIN)
|
||||||
|
#define HAS_ABGRTOUVJROW_AVX2
|
||||||
|
#define HAS_ABGRTOUVROW_AVX2
|
||||||
|
#define HAS_ARGBTOUVJROW_AVX2
|
||||||
|
#define HAS_ARGBTOUVROW_AVX2
|
||||||
|
#endif
|
||||||
|
|
||||||
// The following are available for AVX512 clang x86 platforms:
|
// The following are available for AVX512 clang x86 platforms:
|
||||||
// TODO(fbarchard): Port to GCC and Visual C
|
// TODO(fbarchard): Port to GCC and Visual C
|
||||||
// TODO(b/42280744): re-enable HAS_ARGBTORGB24ROW_AVX512VBMI.
|
// TODO(b/42280744): re-enable HAS_ARGBTORGB24ROW_AVX512VBMI.
|
||||||
|
|||||||
@ -11,6 +11,6 @@
|
|||||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||||
#define INCLUDE_LIBYUV_VERSION_H_
|
#define INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
#define LIBYUV_VERSION 1918
|
#define LIBYUV_VERSION 1919
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|||||||
@ -1747,9 +1747,8 @@ void ARGBToUVMatrixRow_SSSE3(const uint8_t* src_argb,
|
|||||||
"movdqa %0,%%xmm6 \n" // ARGB to U
|
"movdqa %0,%%xmm6 \n" // ARGB to U
|
||||||
"movdqa %1,%%xmm7 \n" // ARGB to V
|
"movdqa %1,%%xmm7 \n" // ARGB to V
|
||||||
:
|
:
|
||||||
:
|
: "m"(rgbuvconstants->kRGBToU), // %0
|
||||||
"m"(rgbuvconstants->kRGBToU), // %0
|
"m"(rgbuvconstants->kRGBToV) // %1
|
||||||
"m"(rgbuvconstants->kRGBToV) // %1
|
|
||||||
: "memory", "cc");
|
: "memory", "cc");
|
||||||
|
|
||||||
asm volatile(
|
asm volatile(
|
||||||
@ -1824,9 +1823,12 @@ void ARGBToUVMatrixRow_SSSE3(const uint8_t* src_argb,
|
|||||||
|
|
||||||
#ifdef HAS_ARGBTOUVROW_AVX2
|
#ifdef HAS_ARGBTOUVROW_AVX2
|
||||||
|
|
||||||
// UYVY shuf 8 UV to 16 UV.
|
// ARGBARGB to AARRGGBB shuffle
|
||||||
static const vec8 kShuffleAARRGGBB = {0, 4, 1, 5, 2, 6, 3, 7,
|
static const lvec8 kShuffleAARRGGBB = {
|
||||||
8, 12, 9, 13, 10, 14, 11, 15};
|
0, 4, 1, 5, 2, 6, 3, 7, 8, 12, 9, 13, 10, 14, 11, 15,
|
||||||
|
0, 4, 1, 5, 2, 6, 3, 7, 8, 12, 9, 13, 10, 14, 11, 15,
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
// 16x2 -> 8x1 ARGB pixels converted to 8 U and 8 V
|
// 16x2 -> 8x1 ARGB pixels converted to 8 U and 8 V
|
||||||
// ARGBToUV does rounding average of 4 ARGB pixels
|
// ARGBToUV does rounding average of 4 ARGB pixels
|
||||||
@ -1837,19 +1839,16 @@ void ARGBToUVMatrixRow_AVX2(const uint8_t* src_argb,
|
|||||||
int width,
|
int width,
|
||||||
const struct RgbUVConstants* rgbuvconstants) {
|
const struct RgbUVConstants* rgbuvconstants) {
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"vbroadcastf128 %7,%%ymm15 \n" // kShuffleAARRGGBB
|
"vmovdqa %7,%%ymm15 \n" // kShuffleAARRGGBB
|
||||||
"vpcmpeqb %%ymm14,%%ymm14,%%ymm14 \n" // 0x0101
|
"vpcmpeqb %%ymm14,%%ymm14,%%ymm14 \n" // 0x0101
|
||||||
"vpabsb %%ymm14,%%ymm14 \n"
|
"vpabsb %%ymm14,%%ymm14 \n"
|
||||||
"vpxor %%ymm13,%%ymm13,%%ymm13 \n" // 0 for vpavgw
|
"vpxor %%ymm13,%%ymm13,%%ymm13 \n" // 0 for vpavgw
|
||||||
|
|
||||||
"vbroadcastf128 %5,%%ymm6 \n" // RGBToU
|
"vbroadcastf128 %5,%%ymm6 \n" // RGBToU
|
||||||
"vbroadcastf128 %6,%%ymm7 \n" // RGBToV
|
"vbroadcastf128 %6,%%ymm7 \n" // RGBToV
|
||||||
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // 0x8000
|
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // 0x8000
|
||||||
"vpsllw $15,%%ymm5,%%ymm5 \n"
|
"vpsllw $15,%%ymm5,%%ymm5 \n"
|
||||||
|
|
||||||
"sub %1,%2 \n"
|
"sub %1,%2 \n"
|
||||||
|
|
||||||
LABELALIGN
|
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"vmovdqu (%0),%%ymm8 \n" // Read 16x2 ARGB Pixels
|
"vmovdqu (%0),%%ymm8 \n" // Read 16x2 ARGB Pixels
|
||||||
"vmovdqu 0x20(%0),%%ymm9 \n"
|
"vmovdqu 0x20(%0),%%ymm9 \n"
|
||||||
@ -1886,7 +1885,7 @@ void ARGBToUVMatrixRow_AVX2(const uint8_t* src_argb,
|
|||||||
"lea 0x40(%0),%0 \n"
|
"lea 0x40(%0),%0 \n"
|
||||||
"lea 0x8(%1),%1 \n"
|
"lea 0x8(%1),%1 \n"
|
||||||
"subl $0x10,%3 \n"
|
"subl $0x10,%3 \n"
|
||||||
"jg 1b \n"
|
"jg 1b \n"
|
||||||
"vzeroupper \n"
|
"vzeroupper \n"
|
||||||
: "+r"(src_argb), // %0
|
: "+r"(src_argb), // %0
|
||||||
"+r"(dst_u), // %1
|
"+r"(dst_u), // %1
|
||||||
@ -1899,10 +1898,85 @@ void ARGBToUVMatrixRow_AVX2(const uint8_t* src_argb,
|
|||||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm5", "xmm6", "xmm7",
|
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm5", "xmm6", "xmm7",
|
||||||
"xmm8", "xmm9", "xmm10", "xmm11", "xmm13", "xmm14", "xmm15");
|
"xmm8", "xmm9", "xmm10", "xmm11", "xmm13", "xmm14", "xmm15");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 16x2 -> 8x1 ARGB pixels converted to 8 U and 8 V
|
||||||
|
// ARGBToUV does rounding average of 4 ARGB pixels
|
||||||
|
// TODO: unroll to 32x2 or update caller/any to 16 pixels
|
||||||
|
void ARGBToUVMatrixRow_AVX2_MADDWD(const uint8_t* src_argb,
|
||||||
|
int src_stride_argb,
|
||||||
|
uint8_t* dst_u,
|
||||||
|
uint8_t* dst_v,
|
||||||
|
int width,
|
||||||
|
const struct RgbUVConstants* rgbuvconstants) {
|
||||||
|
asm volatile(
|
||||||
|
"vmovdqa %7,%%ymm15 \n" // kShuffleAARRGGBB
|
||||||
|
"vpcmpeqb %%ymm14,%%ymm14,%%ymm14 \n" // 0x0101
|
||||||
|
"vpabsb %%ymm14,%%ymm14 \n"
|
||||||
|
"vpxor %%ymm13,%%ymm13,%%ymm13 \n" // 0 for vpavgw
|
||||||
|
"vpcmpeqw %%ymm12,%%ymm12,%%ymm12 \n" // -1
|
||||||
|
"vbroadcastf128 %5,%%ymm6 \n" // RGBToU
|
||||||
|
"vbroadcastf128 %6,%%ymm7 \n" // RGBToV
|
||||||
|
"vpcmpeqd %%ymm5,%%ymm5,%%ymm5 \n" // 0x00008000
|
||||||
|
"vpabsd %%ymm5,%%ymm5 \n"
|
||||||
|
"vpslld $15,%%ymm5,%%ymm5 \n"
|
||||||
|
"sub %1,%2 \n"
|
||||||
|
|
||||||
|
LABELALIGN
|
||||||
|
"1: \n"
|
||||||
|
"vmovdqu (%0),%%ymm8 \n" // Read 16x2 ARGB Pixels
|
||||||
|
"vmovdqu 0x20(%0),%%ymm9 \n"
|
||||||
|
"vmovdqu 0x00(%0,%4,1),%%ymm10 \n"
|
||||||
|
"vmovdqu 0x20(%0,%4,1),%%ymm11 \n"
|
||||||
|
"vpshufb %%ymm15,%%ymm8,%%ymm8 \n" // aarrggbb
|
||||||
|
"vpshufb %%ymm15,%%ymm9,%%ymm9 \n"
|
||||||
|
"vpshufb %%ymm15,%%ymm10,%%ymm10 \n"
|
||||||
|
"vpshufb %%ymm15,%%ymm11,%%ymm11 \n"
|
||||||
|
"vpmaddubsw %%ymm14,%%ymm8,%%ymm8 \n" // 16x2 -> 8x2
|
||||||
|
"vpmaddubsw %%ymm14,%%ymm9,%%ymm9 \n"
|
||||||
|
"vpmaddubsw %%ymm14,%%ymm10,%%ymm10 \n"
|
||||||
|
"vpmaddubsw %%ymm14,%%ymm11,%%ymm11 \n"
|
||||||
|
"vpaddw %%ymm8,%%ymm10,%%ymm8 \n" // 8x2 -> 8x1
|
||||||
|
"vpaddw %%ymm9,%%ymm11,%%ymm9 \n"
|
||||||
|
"vpsrlw $1,%%ymm8,%%ymm8 \n"
|
||||||
|
"vpsrlw $1,%%ymm9,%%ymm9 \n"
|
||||||
|
"vpavgw %%ymm13,%%ymm8,%%ymm8 \n"
|
||||||
|
"vpavgw %%ymm13,%%ymm9,%%ymm9 \n"
|
||||||
|
"vpackuswb %%ymm9,%%ymm8,%%ymm0 \n" // mutates
|
||||||
|
"vpermq $0xd8,%%ymm0,%%ymm0 \n" // 8 ARGB Pixels
|
||||||
|
|
||||||
|
"vpmaddubsw %%ymm7,%%ymm0,%%ymm1 \n" // 8 V
|
||||||
|
"vpmaddubsw %%ymm6,%%ymm0,%%ymm0 \n" // 8 U
|
||||||
|
"vpmaddwd %%ymm12,%%ymm1,%%ymm1 \n" // negate + hadd
|
||||||
|
"vpmaddwd %%ymm12,%%ymm0,%%ymm0 \n"
|
||||||
|
"vpaddd %%ymm1,%%ymm5,%%ymm1 \n" // +0x8000 = 0 to 0xFFFF
|
||||||
|
"vpaddd %%ymm0,%%ymm5,%%ymm0 \n"
|
||||||
|
"vpackusdw %%ymm1,%%ymm0,%%ymm0 \n" // mutates
|
||||||
|
"vpermq $0xd8,%%ymm0,%%ymm0 \n" // uuuuuuuu vvvvvvvv
|
||||||
|
"vpsrlw $0x8,%%ymm0,%%ymm0 \n"
|
||||||
|
"vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" // mutates
|
||||||
|
"vmovq %%xmm0,(%1) \n" // Write 8 U's
|
||||||
|
"vextractf128 $0x1,%%ymm0,%%xmm0 \n" // Copy V to low 8 bytes
|
||||||
|
"vmovq %%xmm0,0x00(%1,%2,1) \n" // Write 8 V's
|
||||||
|
|
||||||
|
"lea 0x40(%0),%0 \n"
|
||||||
|
"lea 0x8(%1),%1 \n"
|
||||||
|
"subl $0x10,%3 \n"
|
||||||
|
"jg 1b \n"
|
||||||
|
"vzeroupper \n"
|
||||||
|
: "+r"(src_argb), // %0
|
||||||
|
"+r"(dst_u), // %1
|
||||||
|
"+r"(dst_v), // %2
|
||||||
|
"+r"(width) // %3
|
||||||
|
: "r"((intptr_t)(src_stride_argb)), // %4
|
||||||
|
"m"(rgbuvconstants->kRGBToU), // %5
|
||||||
|
"m"(rgbuvconstants->kRGBToV), // %6
|
||||||
|
"m"(kShuffleAARRGGBB) // %7
|
||||||
|
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm5", "xmm6", "xmm7", "xmm8",
|
||||||
|
"xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15");
|
||||||
|
}
|
||||||
#endif // HAS_ARGBTOUVROW_AVX2
|
#endif // HAS_ARGBTOUVROW_AVX2
|
||||||
|
|
||||||
|
#if defined(HAS_ARGBTOUV444ROW_SSSE3) || defined(HAS_ARGBTOUVROW_AVX2)
|
||||||
#ifdef HAS_ARGBTOUV444ROW_SSSE3
|
|
||||||
|
|
||||||
// RGB to BT601 coefficients
|
// RGB to BT601 coefficients
|
||||||
// UB 0.875 coefficient = 112
|
// UB 0.875 coefficient = 112
|
||||||
@ -1916,6 +1990,20 @@ static const struct RgbUVConstants kARGBI601UVConstants = {
|
|||||||
{-112, 74, 38, 0, -112, 74, 38, 0, -112, 74, 38, 0, -112, 74, 38, 0},
|
{-112, 74, 38, 0, -112, 74, 38, 0, -112, 74, 38, 0, -112, 74, 38, 0},
|
||||||
{18, 94, -112, 0, 18, 94, -112, 0, 18, 94, -112, 0, 18, 94, -112, 0}};
|
{18, 94, -112, 0, 18, 94, -112, 0, 18, 94, -112, 0, 18, 94, -112, 0}};
|
||||||
|
|
||||||
|
static const struct RgbUVConstants kABGRI601UVConstants = {
|
||||||
|
{38, 74, -112, 0, 38, 74, -112, 0, 38, 74, -112, 0, 38, 74, -112, 0},
|
||||||
|
{-112, 94, 18, 0, -112, 94, 18, 0, -112, 94, 18, 0, -112, 94, 18, 0}};
|
||||||
|
|
||||||
|
static const struct RgbUVConstants kBGRAI601UVConstants = {
|
||||||
|
{0, 38, 74, -112, 0, 38, 74, -112, 0, 38, 74, -112, 0, 38, 74, -112},
|
||||||
|
{0, -112, 94, 18, 0, -112, 94, 18, 0, -112, 94, 18, 0, -112, 94, 18}};
|
||||||
|
|
||||||
|
static const struct RgbUVConstants kRGBAI601UVConstants = {
|
||||||
|
{0, -112, 74, 38, 0, -112, 74, 38, 0, -112, 74, 38, 0, -112, 74, 38},
|
||||||
|
{0, 18, 94, -112, 0, 18, 94, -112, 0, 18, 94, -112, 0, 18, 94, -112}};
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef HAS_ARGBTOUV444ROW_SSSE3
|
||||||
void ARGBToUV444Row_SSSE3(const uint8_t* src_argb,
|
void ARGBToUV444Row_SSSE3(const uint8_t* src_argb,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
@ -1935,18 +2023,6 @@ void ARGBToUV444Row_AVX2(const uint8_t* src_argb,
|
|||||||
}
|
}
|
||||||
#endif // HAS_ARGBTOUV444ROW_AVX2
|
#endif // HAS_ARGBTOUV444ROW_AVX2
|
||||||
|
|
||||||
#ifdef HAS_ARGBTOUVROW_AVX2
|
|
||||||
void ARGBToUVRow_AVX2(const uint8_t* src_argb,
|
|
||||||
int src_stride_argb,
|
|
||||||
uint8_t* dst_u,
|
|
||||||
uint8_t* dst_v,
|
|
||||||
int width) {
|
|
||||||
ARGBToUVMatrixRow_AVX2(src_argb, src_stride_argb, dst_u, dst_v, width,
|
|
||||||
&kARGBI601UVConstants);
|
|
||||||
}
|
|
||||||
#endif // HAS_ARGBTOUVROW_AVX2
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef HAS_ARGBTOUVROW_SSSE3
|
#ifdef HAS_ARGBTOUVROW_SSSE3
|
||||||
void ARGBToUVRow_SSSE3(const uint8_t* src_argb,
|
void ARGBToUVRow_SSSE3(const uint8_t* src_argb,
|
||||||
int src_stride_argb,
|
int src_stride_argb,
|
||||||
@ -1957,10 +2033,6 @@ void ARGBToUVRow_SSSE3(const uint8_t* src_argb,
|
|||||||
&kARGBI601UVConstants);
|
&kARGBI601UVConstants);
|
||||||
}
|
}
|
||||||
|
|
||||||
static const struct RgbUVConstants kABGRI601UVConstants = {
|
|
||||||
{38, 74, -112, 0, 38, 74, -112, 0, 38, 74, -112, 0, 38, 74, -112, 0},
|
|
||||||
{-112, 94, 18, 0, -112, 94, 18, 0, -112, 94, 18, 0, -112, 94, 18, 0}};
|
|
||||||
|
|
||||||
void ABGRToUVRow_SSSE3(const uint8_t* src_abgr,
|
void ABGRToUVRow_SSSE3(const uint8_t* src_abgr,
|
||||||
int src_stride_abgr,
|
int src_stride_abgr,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
@ -1970,10 +2042,6 @@ void ABGRToUVRow_SSSE3(const uint8_t* src_abgr,
|
|||||||
&kABGRI601UVConstants);
|
&kABGRI601UVConstants);
|
||||||
}
|
}
|
||||||
|
|
||||||
static const struct RgbUVConstants kBGRAI601UVConstants = {
|
|
||||||
{0, 38, 74, -112, 0, 38, 74, -112, 0, 38, 74, -112, 0, 38, 74, -112},
|
|
||||||
{0, -112, 94, 18, 0, -112, 94, 18, 0, -112, 94, 18, 0, -112, 94, 18}};
|
|
||||||
|
|
||||||
void BGRAToUVRow_SSSE3(const uint8_t* src_bgra,
|
void BGRAToUVRow_SSSE3(const uint8_t* src_bgra,
|
||||||
int src_stride_bgra,
|
int src_stride_bgra,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
@ -1983,10 +2051,6 @@ void BGRAToUVRow_SSSE3(const uint8_t* src_bgra,
|
|||||||
&kBGRAI601UVConstants);
|
&kBGRAI601UVConstants);
|
||||||
}
|
}
|
||||||
|
|
||||||
static const struct RgbUVConstants kRGBAI601UVConstants = {
|
|
||||||
{0, -112, 74, 38, 0, -112, 74, 38, 0, -112, 74, 38, 0, -112, 74, 38},
|
|
||||||
{0, 18, 94, -112, 0, 18, 94, -112, 0, 18, 94, -112, 0, 18, 94, -112}};
|
|
||||||
|
|
||||||
void RGBAToUVRow_SSSE3(const uint8_t* src_rgba,
|
void RGBAToUVRow_SSSE3(const uint8_t* src_rgba,
|
||||||
int src_stride_rgba,
|
int src_stride_rgba,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
@ -1997,6 +2061,26 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba,
|
|||||||
}
|
}
|
||||||
#endif // HAS_ARGBTOUVROW_SSSE3
|
#endif // HAS_ARGBTOUVROW_SSSE3
|
||||||
|
|
||||||
|
#ifdef HAS_ARGBTOUVROW_AVX2
|
||||||
|
void ARGBToUVRow_AVX2(const uint8_t* src_argb,
|
||||||
|
int src_stride_argb,
|
||||||
|
uint8_t* dst_u,
|
||||||
|
uint8_t* dst_v,
|
||||||
|
int width) {
|
||||||
|
ARGBToUVMatrixRow_AVX2(src_argb, src_stride_argb, dst_u, dst_v, width,
|
||||||
|
&kARGBI601UVConstants);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ABGRToUVRow_AVX2(const uint8_t* src_abgr,
|
||||||
|
int src_stride_abgr,
|
||||||
|
uint8_t* dst_u,
|
||||||
|
uint8_t* dst_v,
|
||||||
|
int width) {
|
||||||
|
ARGBToUVMatrixRow_AVX2(src_abgr, src_stride_abgr, dst_u, dst_v, width,
|
||||||
|
&kABGRI601UVConstants);
|
||||||
|
}
|
||||||
|
#endif // HAS_ARGBTOUVROW_AVX2
|
||||||
|
|
||||||
#ifdef HAS_ARGBTOUVJ444ROW_SSSE3
|
#ifdef HAS_ARGBTOUVJ444ROW_SSSE3
|
||||||
// RGB to JPEG coefficients
|
// RGB to JPEG coefficients
|
||||||
// UB 0.500 coefficient = 128
|
// UB 0.500 coefficient = 128
|
||||||
@ -2043,7 +2127,9 @@ void ARGBToUVJRow_SSSE3(const uint8_t* src_argb,
|
|||||||
ARGBToUVMatrixRow_SSSE3(src_argb, src_stride_argb, dst_u, dst_v, width,
|
ARGBToUVMatrixRow_SSSE3(src_argb, src_stride_argb, dst_u, dst_v, width,
|
||||||
&kARGBJPEGUVConstants);
|
&kARGBJPEGUVConstants);
|
||||||
}
|
}
|
||||||
|
#endif // HAS_ARGBTOUVJROW_SSSE3
|
||||||
|
|
||||||
|
#ifdef HAS_ABGRTOUVJROW_SSSE3
|
||||||
void ABGRToUVJRow_SSSE3(const uint8_t* src_abgr,
|
void ABGRToUVJRow_SSSE3(const uint8_t* src_abgr,
|
||||||
int src_stride_abgr,
|
int src_stride_abgr,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
@ -2054,6 +2140,28 @@ void ABGRToUVJRow_SSSE3(const uint8_t* src_abgr,
|
|||||||
}
|
}
|
||||||
#endif // HAS_ABGRTOUVJROW_SSSE3
|
#endif // HAS_ABGRTOUVJROW_SSSE3
|
||||||
|
|
||||||
|
#ifdef HAS_ARGBTOUVJROW_AVX2
|
||||||
|
void ARGBToUVJRow_AVX2(const uint8_t* src_argb,
|
||||||
|
int src_stride_argb,
|
||||||
|
uint8_t* dst_u,
|
||||||
|
uint8_t* dst_v,
|
||||||
|
int width) {
|
||||||
|
ARGBToUVMatrixRow_AVX2(src_argb, src_stride_argb, dst_u, dst_v, width,
|
||||||
|
&kARGBJPEGUVConstants);
|
||||||
|
}
|
||||||
|
#endif // HAS_ARGBTOUVJROW_AVX2
|
||||||
|
|
||||||
|
#ifdef HAS_ABGRTOUVJROW_AVX2
|
||||||
|
void ABGRToUVJRow_AVX2(const uint8_t* src_abgr,
|
||||||
|
int src_stride_abgr,
|
||||||
|
uint8_t* dst_u,
|
||||||
|
uint8_t* dst_v,
|
||||||
|
int width) {
|
||||||
|
ARGBToUVMatrixRow_AVX2(src_abgr, src_stride_abgr, dst_u, dst_v, width,
|
||||||
|
&kABGRJPEGUVConstants);
|
||||||
|
}
|
||||||
|
#endif // HAS_ABGRTOUVJROW_AVX2
|
||||||
|
|
||||||
void BGRAToYRow_SSSE3(const uint8_t* src_bgra, uint8_t* dst_y, int width) {
|
void BGRAToYRow_SSSE3(const uint8_t* src_bgra, uint8_t* dst_y, int width) {
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"movdqa %3,%%xmm4 \n"
|
"movdqa %3,%%xmm4 \n"
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user