mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
ARGBToUV allow 32 bit x86 build
- make width loop count on stack - set YMM constants in its own asm block - make struct for shuffle and add constants - disable clang format on row_neon.cc function Bug: 413781394 Change-Id: I263f6862cb7589dc31ac65d118f7ebeb65dbb24a Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/6495259 Reviewed-by: Wan-Teh Chang <wtc@google.com> Commit-Queue: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
parent
1e40e34573
commit
9f9b5cf660
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: https://chromium.googlesource.com/libyuv/libyuv/
|
||||
Version: 1908
|
||||
Version: 1909
|
||||
License: BSD-3-Clause
|
||||
License File: LICENSE
|
||||
Shipped: yes
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 1908
|
||||
#define LIBYUV_VERSION 1909
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
@ -1642,12 +1642,16 @@ void ARGBToUV444MatrixRow_SSSE3(const uint8_t* src_argb,
|
||||
|
||||
"lea 0x40(%0),%0 \n"
|
||||
"lea 0x10(%1),%1 \n"
|
||||
"sub $0x10,%3 \n"
|
||||
"subl $0x10,%3 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src_argb), // %0
|
||||
"+r"(dst_u), // %1
|
||||
"+r"(dst_v), // %2
|
||||
#if defined(__i386__)
|
||||
"+m"(width) // %3
|
||||
#else
|
||||
"+rm"(width) // %3
|
||||
#endif
|
||||
: "m"(rgbuvconstants->kRGBToU), // %4
|
||||
"m"(rgbuvconstants->kRGBToV), // %5
|
||||
"m"(kAddUV128) // %6
|
||||
@ -1708,74 +1712,6 @@ void ARGBToUV444MatrixRow_AVX2(const uint8_t* src_argb,
|
||||
"vmovdqu %%ymm0,(%1,%2,1) \n"
|
||||
"lea 0x80(%0),%0 \n"
|
||||
"lea 0x20(%1),%1 \n"
|
||||
"sub $0x20,%3 \n"
|
||||
"jg 1b \n"
|
||||
"vzeroupper \n"
|
||||
: "+r"(src_argb), // %0
|
||||
"+r"(dst_u), // %1
|
||||
"+r"(dst_v), // %2
|
||||
"+rm"(width) // %3
|
||||
: "m"(rgbuvconstants->kRGBToU), // %4
|
||||
"m"(rgbuvconstants->kRGBToV), // %5
|
||||
"m"(kAddUV128), // %6
|
||||
"m"(kPermdARGBToY_AVX) // %7
|
||||
: "memory", "cc", "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6",
|
||||
"ymm7");
|
||||
}
|
||||
#endif // HAS_ARGBTOUV444ROW_AVX2
|
||||
|
||||
// vpshufb for vphaddw + vpackuswb packed to shorts.
|
||||
static const lvec8 kShufARGBToUV_AVX = {
|
||||
0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15,
|
||||
0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15};
|
||||
|
||||
void ARGBToUVMatrixRow_AVX2(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width,
|
||||
const struct RgbUVConstants* rgbuvconstants) {
|
||||
asm volatile(
|
||||
"vbroadcastf128 %5,%%ymm5 \n"
|
||||
"vbroadcastf128 %6,%%ymm6 \n"
|
||||
"vbroadcastf128 %7,%%ymm7 \n"
|
||||
"sub %1,%2 \n"
|
||||
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"vmovdqu (%0),%%ymm0 \n"
|
||||
"vmovdqu 0x20(%0),%%ymm1 \n"
|
||||
"vmovdqu 0x40(%0),%%ymm2 \n"
|
||||
"vmovdqu 0x60(%0),%%ymm3 \n"
|
||||
"vpavgb 0x00(%0,%4,1),%%ymm0,%%ymm0 \n"
|
||||
"vpavgb 0x20(%0,%4,1),%%ymm1,%%ymm1 \n"
|
||||
"vpavgb 0x40(%0,%4,1),%%ymm2,%%ymm2 \n"
|
||||
"vpavgb 0x60(%0,%4,1),%%ymm3,%%ymm3 \n"
|
||||
"lea 0x80(%0),%0 \n"
|
||||
"vshufps $0x88,%%ymm1,%%ymm0,%%ymm4 \n"
|
||||
"vshufps $0xdd,%%ymm1,%%ymm0,%%ymm0 \n"
|
||||
"vpavgb %%ymm4,%%ymm0,%%ymm0 \n"
|
||||
"vshufps $0x88,%%ymm3,%%ymm2,%%ymm4 \n"
|
||||
"vshufps $0xdd,%%ymm3,%%ymm2,%%ymm2 \n"
|
||||
"vpavgb %%ymm4,%%ymm2,%%ymm2 \n"
|
||||
|
||||
"vpmaddubsw %%ymm6,%%ymm0,%%ymm1 \n"
|
||||
"vpmaddubsw %%ymm6,%%ymm2,%%ymm3 \n"
|
||||
"vpmaddubsw %%ymm7,%%ymm0,%%ymm0 \n"
|
||||
"vpmaddubsw %%ymm7,%%ymm2,%%ymm2 \n"
|
||||
"vphaddw %%ymm3,%%ymm1,%%ymm1 \n"
|
||||
"vphaddw %%ymm2,%%ymm0,%%ymm0 \n"
|
||||
"vpsubw %%ymm0,%%ymm5,%%ymm0 \n"
|
||||
"vpsubw %%ymm1,%%ymm5,%%ymm1 \n"
|
||||
"vpsrlw $0x8,%%ymm1,%%ymm1 \n"
|
||||
"vpsrlw $0x8,%%ymm0,%%ymm0 \n"
|
||||
"vpackuswb %%ymm0,%%ymm1,%%ymm0 \n"
|
||||
"vpermq $0xd8,%%ymm0,%%ymm0 \n"
|
||||
"vpshufb %8,%%ymm0,%%ymm0 \n"
|
||||
|
||||
"vextractf128 $0x0,%%ymm0,(%1) \n"
|
||||
"vextractf128 $0x1,%%ymm0,0x0(%1,%2,1) \n"
|
||||
"lea 0x10(%1),%1 \n"
|
||||
"subl $0x20,%3 \n"
|
||||
"jg 1b \n"
|
||||
"vzeroupper \n"
|
||||
@ -1787,28 +1723,31 @@ void ARGBToUVMatrixRow_AVX2(const uint8_t* src_argb,
|
||||
#else
|
||||
"+rm"(width) // %3
|
||||
#endif
|
||||
: "r"((intptr_t)(src_stride_argb)), // %4
|
||||
"m"(kAddUV128), // %5
|
||||
"m"(rgbuvconstants->kRGBToU), // %6
|
||||
"m"(rgbuvconstants->kRGBToV), // %7
|
||||
"m"(kShufARGBToUV_AVX) // %8
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
|
||||
"xmm7");
|
||||
: "m"(rgbuvconstants->kRGBToU), // %4
|
||||
"m"(rgbuvconstants->kRGBToV), // %5
|
||||
"m"(kAddUV128), // %6
|
||||
"m"(kPermdARGBToY_AVX) // %7
|
||||
: "memory", "cc", "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6",
|
||||
"ymm7");
|
||||
}
|
||||
#endif // HAS_ARGBTOUV444ROW_AVX2
|
||||
|
||||
#ifdef HAS_ARGBTOUVROW_SSSE3
|
||||
|
||||
void ARGBToUVMatrixRow_SSSE3(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width,
|
||||
const struct RgbUVConstants* rgbuvconstants) {
|
||||
void OMITFP ARGBToUVMatrixRow_SSSE3(
|
||||
const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_u,
|
||||
uint8_t* dst_v, int width, const struct RgbUVConstants* rgbuvconstants) {
|
||||
asm volatile(
|
||||
"movdqa %5,%%xmm3 \n"
|
||||
"movdqa %6,%%xmm4 \n"
|
||||
"movdqa %7,%%xmm5 \n"
|
||||
"sub %1,%2 \n"
|
||||
"movdqa %0,%%xmm3 \n"
|
||||
"movdqa %1,%%xmm4 \n"
|
||||
"movdqa %2,%%xmm5 \n"
|
||||
:
|
||||
: "m"(rgbuvconstants->kRGBToU), // %0
|
||||
"m"(rgbuvconstants->kRGBToV), // %1
|
||||
"m"(kAddUV128) // %2
|
||||
: "xmm3", "xmm4", "xmm5");
|
||||
|
||||
asm volatile("sub %1,%2 \n"
|
||||
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
@ -1862,16 +1801,93 @@ void ARGBToUVMatrixRow_SSSE3(const uint8_t* src_argb,
|
||||
#else
|
||||
"+rm"(width) // %3
|
||||
#endif
|
||||
: "r"((intptr_t)(src_stride_argb)), // %4
|
||||
"m"(rgbuvconstants->kRGBToU), // %5
|
||||
"m"(rgbuvconstants->kRGBToV), // %6
|
||||
"m"(kAddUV128) // %7
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
|
||||
"xmm7");
|
||||
: "r"((intptr_t)(src_stride_argb)) // %4
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5",
|
||||
"xmm6", "xmm7");
|
||||
}
|
||||
|
||||
#endif // HAS_ARGBTOUVROW_SSSE3
|
||||
|
||||
// vpshufb for vphaddw + vpackuswb packed to shorts.
|
||||
// Coefficients expressed as negatives to allow 128
|
||||
struct UVMatrixConstants {
|
||||
lvec8 kShufARGBToUV;
|
||||
ulvec8 kAddUV128;
|
||||
};
|
||||
|
||||
static const UVMatrixConstants kShufARGBToUV_AVX = {
|
||||
0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15,
|
||||
0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15,
|
||||
0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128,
|
||||
0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128};
|
||||
|
||||
void OMITFP ARGBToUVMatrixRow_AVX2(
|
||||
const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_u,
|
||||
uint8_t* dst_v, int width, const struct RgbUVConstants* rgbuvconstants) {
|
||||
asm volatile(
|
||||
"vbroadcastf128 %0,%%ymm6 \n"
|
||||
"vbroadcastf128 %1,%%ymm7 \n"
|
||||
:
|
||||
: "m"(rgbuvconstants->kRGBToU), // %0
|
||||
"m"(rgbuvconstants->kRGBToV) // %1
|
||||
:);
|
||||
|
||||
asm volatile(
|
||||
"vmovdqa 32(%5),%%ymm5 \n"
|
||||
"sub %1,%2 \n"
|
||||
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"vmovdqu (%0),%%ymm0 \n"
|
||||
"vmovdqu 0x20(%0),%%ymm1 \n"
|
||||
"vmovdqu 0x40(%0),%%ymm2 \n"
|
||||
"vmovdqu 0x60(%0),%%ymm3 \n"
|
||||
"vpavgb 0x00(%0,%4,1),%%ymm0,%%ymm0 \n"
|
||||
"vpavgb 0x20(%0,%4,1),%%ymm1,%%ymm1 \n"
|
||||
"vpavgb 0x40(%0,%4,1),%%ymm2,%%ymm2 \n"
|
||||
"vpavgb 0x60(%0,%4,1),%%ymm3,%%ymm3 \n"
|
||||
"lea 0x80(%0),%0 \n"
|
||||
"vshufps $0x88,%%ymm1,%%ymm0,%%ymm4 \n"
|
||||
"vshufps $0xdd,%%ymm1,%%ymm0,%%ymm0 \n"
|
||||
"vpavgb %%ymm4,%%ymm0,%%ymm0 \n"
|
||||
"vshufps $0x88,%%ymm3,%%ymm2,%%ymm4 \n"
|
||||
"vshufps $0xdd,%%ymm3,%%ymm2,%%ymm2 \n"
|
||||
"vpavgb %%ymm4,%%ymm2,%%ymm2 \n"
|
||||
|
||||
"vpmaddubsw %%ymm6,%%ymm0,%%ymm1 \n"
|
||||
"vpmaddubsw %%ymm6,%%ymm2,%%ymm3 \n"
|
||||
"vpmaddubsw %%ymm7,%%ymm0,%%ymm0 \n"
|
||||
"vpmaddubsw %%ymm7,%%ymm2,%%ymm2 \n"
|
||||
"vphaddw %%ymm3,%%ymm1,%%ymm1 \n"
|
||||
"vphaddw %%ymm2,%%ymm0,%%ymm0 \n"
|
||||
"vpsubw %%ymm0,%%ymm5,%%ymm0 \n"
|
||||
"vpsubw %%ymm1,%%ymm5,%%ymm1 \n"
|
||||
"vpsrlw $0x8,%%ymm1,%%ymm1 \n"
|
||||
"vpsrlw $0x8,%%ymm0,%%ymm0 \n"
|
||||
"vpackuswb %%ymm0,%%ymm1,%%ymm0 \n"
|
||||
"vpermq $0xd8,%%ymm0,%%ymm0 \n"
|
||||
"vpshufb (%5),%%ymm0,%%ymm0 \n"
|
||||
|
||||
"vextractf128 $0x0,%%ymm0,(%1) \n"
|
||||
"vextractf128 $0x1,%%ymm0,0x0(%1,%2,1) \n"
|
||||
"lea 0x10(%1),%1 \n"
|
||||
"subl $0x20,%3 \n"
|
||||
"jg 1b \n"
|
||||
"vzeroupper \n"
|
||||
: "+r"(src_argb), // %0
|
||||
"+r"(dst_u), // %1
|
||||
"+r"(dst_v), // %2
|
||||
#if defined(__i386__)
|
||||
"+m"(width) // %3
|
||||
#else
|
||||
"+rm"(width) // %3
|
||||
#endif
|
||||
: "r"((intptr_t)(src_stride_argb)), // %4
|
||||
"r"(&kShufARGBToUV_AVX) // %5
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
|
||||
"xmm7");
|
||||
}
|
||||
|
||||
#ifdef HAS_ARGBTOUV444ROW_SSSE3
|
||||
|
||||
// RGB to BT601 coefficients
|
||||
|
||||
@ -267,9 +267,12 @@ void I422ToRGBARow_NEON(const uint8_t* src_y,
|
||||
YUVTORGB_SETUP
|
||||
"vmov.u8 d6, #255 \n"
|
||||
"1: \n" //
|
||||
READYUV422
|
||||
"subs %[width], %[width], #8 \n" YUVTORGB RGBTORGB8
|
||||
STORERGBA "bgt 1b \n"
|
||||
READYUV422 //
|
||||
"subs %[width], %[width], #8 \n" //
|
||||
YUVTORGB //
|
||||
RGBTORGB8 //
|
||||
STORERGBA //
|
||||
"bgt 1b \n"
|
||||
: [src_y] "+r"(src_y), // %[src_y]
|
||||
[src_u] "+r"(src_u), // %[src_u]
|
||||
[src_v] "+r"(src_v), // %[src_v]
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user