mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
Reimplement NV21ToARGB to allow different color matrix.
Low level for NV21ToARGB written to accept yuv matrix used by other YUV to ARGB functions. Previously NV21 was implemented for Windows using NV12 with a different matrix that swapped U and V. But the Arm version of the low level does not allow the matrix U and V contributions to be swapped. Using a new low level function that reads NV21 and uses the same yuvconstants as other YUV conversion functions allows an Arm port of this function. TBR=harryjin@google.com BUG=libyuv:500 Review URL: https://codereview.chromium.org/1388273002 .
This commit is contained in:
parent
68fa59c873
commit
914a9856c7
@ -145,13 +145,6 @@ int NV12ToRGB565(const uint8* src_y, int src_stride_y,
|
||||
uint8* dst_rgb565, int dst_stride_rgb565,
|
||||
int width, int height);
|
||||
|
||||
// Convert NV21 to RGB565.
|
||||
LIBYUV_API
|
||||
int NV21ToRGB565(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_uv, int src_stride_uv,
|
||||
uint8* dst_rgb565, int dst_stride_rgb565,
|
||||
int width, int height);
|
||||
|
||||
// I422ToARGB is in convert_argb.h
|
||||
// Convert I422 to BGRA.
|
||||
LIBYUV_API
|
||||
|
||||
@ -126,6 +126,7 @@ extern "C" {
|
||||
#define HAS_MIRRORUVROW_SSSE3
|
||||
#define HAS_NV12TOARGBROW_SSSE3
|
||||
#define HAS_NV12TORGB565ROW_SSSE3
|
||||
#define HAS_NV21TOARGBROW_SSSE3
|
||||
#define HAS_RAWTOARGBROW_SSSE3
|
||||
#define HAS_RAWTOYROW_SSSE3
|
||||
#define HAS_RGB24TOARGBROW_SSSE3
|
||||
@ -249,6 +250,7 @@ extern "C" {
|
||||
#define HAS_YUY2TOUVROW_AVX2
|
||||
#define HAS_YUY2TOYROW_AVX2
|
||||
#define HAS_NV12TOARGBROW_AVX2
|
||||
#define HAS_NV21TOARGBROW_AVX2
|
||||
#define HAS_I422ALPHATOARGBROW_AVX2
|
||||
#define HAS_I422ALPHATOABGRROW_AVX2
|
||||
|
||||
@ -312,6 +314,7 @@ extern "C" {
|
||||
#define HAS_MIRRORUVROW_NEON
|
||||
#define HAS_NV12TOARGBROW_NEON
|
||||
#define HAS_NV12TORGB565ROW_NEON
|
||||
#define HAS_NV21TOARGBROW_NEON
|
||||
#define HAS_RAWTOARGBROW_NEON
|
||||
#define HAS_RAWTOUVROW_NEON
|
||||
#define HAS_RAWTOYROW_NEON
|
||||
@ -632,6 +635,11 @@ void NV12ToRGB565Row_NEON(const uint8* src_y,
|
||||
uint8* dst_rgb565,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void NV21ToARGBRow_NEON(const uint8* src_y,
|
||||
const uint8* src_vu,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
@ -1075,6 +1083,11 @@ void NV12ToRGB565Row_C(const uint8* src_y,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void NV21ToARGBRow_C(const uint8* src_y,
|
||||
const uint8* src_uv,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void YUY2ToARGBRow_C(const uint8* src_yuy2,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
@ -1293,6 +1306,16 @@ void NV12ToRGB565Row_AVX2(const uint8* src_y,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void NV21ToARGBRow_SSSE3(const uint8* src_y,
|
||||
const uint8* src_uv,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void NV21ToARGBRow_AVX2(const uint8* src_y,
|
||||
const uint8* src_uv,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
@ -1491,6 +1514,16 @@ void NV12ToARGBRow_Any_AVX2(const uint8* src_y,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void NV21ToARGBRow_Any_SSSE3(const uint8* src_y,
|
||||
const uint8* src_vu,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void NV21ToARGBRow_Any_AVX2(const uint8* src_y,
|
||||
const uint8* src_vu,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void NV12ToRGB565Row_Any_SSSE3(const uint8* src_y,
|
||||
const uint8* src_uv,
|
||||
uint8* dst_argb,
|
||||
@ -1756,6 +1789,11 @@ void NV12ToARGBRow_Any_NEON(const uint8* src_y,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void NV21ToARGBRow_Any_NEON(const uint8* src_y,
|
||||
const uint8* src_vu,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void NV12ToRGB565Row_Any_NEON(const uint8* src_y,
|
||||
const uint8* src_uv,
|
||||
uint8* dst_argb,
|
||||
|
||||
@ -24,11 +24,12 @@ extern "C" {
|
||||
#define LIBYUV_DISABLE_X86
|
||||
#endif
|
||||
|
||||
// Visual C 2012 required for AVX2.
|
||||
#if defined(_M_IX86) && !defined(__clang__) && \
|
||||
defined(_MSC_VER) && _MSC_VER >= 1700
|
||||
#define VISUALC_HAS_AVX2 1
|
||||
#endif // VisualStudio >= 2012
|
||||
// GCC >= 4.7.0 required for AVX2.
|
||||
#if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
|
||||
#if (__GNUC__ > 4) || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7))
|
||||
#define GCC_HAS_AVX2 1
|
||||
#endif // GNUC >= 4.7
|
||||
#endif // __GNUC__
|
||||
|
||||
// clang >= 3.4.0 required for AVX2.
|
||||
#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
|
||||
@ -37,6 +38,12 @@ extern "C" {
|
||||
#endif // clang >= 3.4
|
||||
#endif // __clang__
|
||||
|
||||
// Visual C 2012 required for AVX2.
|
||||
#if defined(_M_IX86) && !defined(__clang__) && \
|
||||
defined(_MSC_VER) && _MSC_VER >= 1700
|
||||
#define VISUALC_HAS_AVX2 1
|
||||
#endif // VisualStudio >= 2012
|
||||
|
||||
// The following are available on all x86 platforms:
|
||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
|
||||
@ -56,10 +63,17 @@ extern "C" {
|
||||
#define HAS_SCALEADDROW_SSE2
|
||||
#endif
|
||||
|
||||
// The following are available on all x86 platforms, but
|
||||
// require VS2012, clang 3.4 or gcc 4.7.
|
||||
// The code supports NaCL but requires a new compiler and validator.
|
||||
#if !defined(LIBYUV_DISABLE_X86) && (defined(VISUALC_HAS_AVX2) || \
|
||||
defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
|
||||
#define HAS_SCALEADDROW_AVX2
|
||||
#endif
|
||||
|
||||
// The following are available for Visual C and clangcl 32 bit:
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \
|
||||
(defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2))
|
||||
#define HAS_SCALEADDROW_AVX2
|
||||
#define HAS_SCALEROWDOWN2_AVX2
|
||||
#define HAS_SCALEROWDOWN4_AVX2
|
||||
#endif
|
||||
|
||||
@ -1093,11 +1093,11 @@ int NV21ToARGB(const uint8* src_y, int src_stride_y,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*NV12ToARGBRow)(const uint8* y_buf,
|
||||
void (*NV21ToARGBRow)(const uint8* y_buf,
|
||||
const uint8* uv_buf,
|
||||
uint8* rgb_buf,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) = NV12ToARGBRow_C;
|
||||
int width) = NV21ToARGBRow_C;
|
||||
if (!src_y || !src_uv || !dst_argb ||
|
||||
width <= 0 || height == 0) {
|
||||
return -1;
|
||||
@ -1108,33 +1108,33 @@ int NV21ToARGB(const uint8* src_y, int src_stride_y,
|
||||
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
|
||||
dst_stride_argb = -dst_stride_argb;
|
||||
}
|
||||
#if defined(HAS_NV12TOARGBROW_SSSE3)
|
||||
#if defined(HAS_NV21TOARGBROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
NV12ToARGBRow = NV12ToARGBRow_Any_SSSE3;
|
||||
NV21ToARGBRow = NV21ToARGBRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
NV12ToARGBRow = NV12ToARGBRow_SSSE3;
|
||||
NV21ToARGBRow = NV21ToARGBRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_NV12TOARGBROW_AVX2)
|
||||
#if defined(HAS_NV21TOARGBROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
NV12ToARGBRow = NV12ToARGBRow_Any_AVX2;
|
||||
NV21ToARGBRow = NV21ToARGBRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
NV12ToARGBRow = NV12ToARGBRow_AVX2;
|
||||
NV21ToARGBRow = NV21ToARGBRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_NV12TOARGBROW_NEON)
|
||||
#if defined(HAS_NV21TOARGBROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
NV12ToARGBRow = NV12ToARGBRow_Any_NEON;
|
||||
NV21ToARGBRow = NV21ToARGBRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
NV12ToARGBRow = NV12ToARGBRow_NEON;
|
||||
NV21ToARGBRow = NV21ToARGBRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
NV12ToARGBRow(src_y, src_uv, dst_argb, &kYvuConstants, width);
|
||||
NV21ToARGBRow(src_y, src_uv, dst_argb, &kYuvConstants, width);
|
||||
dst_argb += dst_stride_argb;
|
||||
src_y += src_stride_y;
|
||||
if (y & 1) {
|
||||
|
||||
@ -1039,64 +1039,6 @@ int NV12ToRGB565(const uint8* src_y, int src_stride_y,
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert NV21 to RGB565.
|
||||
LIBYUV_API
|
||||
int NV21ToRGB565(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_vu, int src_stride_vu,
|
||||
uint8* dst_rgb565, int dst_stride_rgb565,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*NV12ToRGB565Row)(const uint8* y_buf,
|
||||
const uint8* src_vu,
|
||||
uint8* rgb_buf,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) = NV12ToRGB565Row_C;
|
||||
if (!src_y || !src_vu || !dst_rgb565 ||
|
||||
width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
|
||||
dst_stride_rgb565 = -dst_stride_rgb565;
|
||||
}
|
||||
#if defined(HAS_NV12TORGB565ROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
NV12ToRGB565Row = NV12ToRGB565Row_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
NV12ToRGB565Row = NV12ToRGB565Row_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_NV12TORGB565ROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
NV12ToRGB565Row = NV12ToRGB565Row_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
NV12ToRGB565Row = NV12ToRGB565Row_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_NV12TORGB565ROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
NV12ToRGB565Row = NV12ToRGB565Row_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
NV12ToRGB565Row = NV12ToRGB565Row_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
NV12ToRGB565Row(src_y, src_vu, dst_rgb565, &kYvuConstants, width);
|
||||
dst_rgb565 += dst_stride_rgb565;
|
||||
src_y += src_stride_y;
|
||||
if (y & 1) {
|
||||
src_vu += src_stride_vu;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
void SetPlane(uint8* dst_y, int dst_stride_y,
|
||||
int width, int height,
|
||||
|
||||
@ -280,6 +280,15 @@ ANY21C(NV12ToARGBRow_Any_AVX2, NV12ToARGBRow_AVX2, 1, 1, 2, 4, 15)
|
||||
#ifdef HAS_NV12TOARGBROW_NEON
|
||||
ANY21C(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, 1, 1, 2, 4, 7)
|
||||
#endif
|
||||
#ifdef HAS_NV21TOARGBROW_SSSE3
|
||||
ANY21C(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_SSSE3, 1, 1, 2, 4, 7)
|
||||
#endif
|
||||
#ifdef HAS_NV21TOARGBROW_AVX2
|
||||
ANY21C(NV21ToARGBRow_Any_AVX2, NV21ToARGBRow_AVX2, 1, 1, 2, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_NV21TOARGBROW_NEON
|
||||
ANY21C(NV21ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, 1, 1, 2, 4, 7)
|
||||
#endif
|
||||
#ifdef HAS_NV12TORGB565ROW_SSSE3
|
||||
ANY21C(NV12ToRGB565Row_Any_SSSE3, NV12ToRGB565Row_SSSE3, 1, 1, 2, 2, 7)
|
||||
#endif
|
||||
|
||||
@ -1663,6 +1663,30 @@ void NV12ToARGBRow_C(const uint8* src_y,
|
||||
}
|
||||
}
|
||||
|
||||
void NV21ToARGBRow_C(const uint8* src_y,
|
||||
const uint8* src_vu,
|
||||
uint8* rgb_buf,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
int x;
|
||||
for (x = 0; x < width - 1; x += 2) {
|
||||
YuvPixel(src_y[0], src_vu[1], src_vu[0],
|
||||
rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
|
||||
rgb_buf[3] = 255;
|
||||
YuvPixel(src_y[1], src_vu[1], src_vu[0],
|
||||
rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants);
|
||||
rgb_buf[7] = 255;
|
||||
src_y += 2;
|
||||
src_vu += 2;
|
||||
rgb_buf += 8; // Advance 2 pixels.
|
||||
}
|
||||
if (width & 1) {
|
||||
YuvPixel(src_y[0], src_vu[1], src_vu[0],
|
||||
rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
|
||||
rgb_buf[3] = 255;
|
||||
}
|
||||
}
|
||||
|
||||
void NV12ToRGB565Row_C(const uint8* src_y,
|
||||
const uint8* src_uv,
|
||||
uint8* dst_rgb565,
|
||||
|
||||
@ -164,6 +164,12 @@ static const lvec8 kShuffleUYVYUV = {
|
||||
0, 2, 0, 2, 4, 6, 4, 6, 8, 10, 8, 10, 12, 14, 12, 14,
|
||||
0, 2, 0, 2, 4, 6, 4, 6, 8, 10, 8, 10, 12, 14, 12, 14
|
||||
};
|
||||
|
||||
// NV21 shuf 8 VU to 16 UV.
|
||||
static const lvec8 kShuffleNV21 = {
|
||||
1, 0, 1, 0, 3, 2, 3, 2, 5, 4, 5, 4, 7, 6, 7, 6,
|
||||
1, 0, 1, 0, 3, 2, 3, 2, 5, 4, 5, 4, 7, 6, 7, 6,
|
||||
};
|
||||
#endif // HAS_RGB24TOARGBROW_SSSE3
|
||||
|
||||
#ifdef HAS_J400TOARGBROW_SSE2
|
||||
@ -1398,6 +1404,15 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0, int src_stride_rgba,
|
||||
"punpcklbw %%xmm4,%%xmm4 \n" \
|
||||
"lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n"
|
||||
|
||||
// Read 4 VU from NV21, upsample to 8 UV
|
||||
#define READNV21 \
|
||||
"movq " MEMACCESS([vu_buf]) ",%%xmm0 \n" \
|
||||
"lea " MEMLEA(0x8, [vu_buf]) ",%[vu_buf] \n" \
|
||||
"pshufb %[kShuffleNV21], %%xmm0 \n" \
|
||||
"movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \
|
||||
"punpcklbw %%xmm4,%%xmm4 \n" \
|
||||
"lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n"
|
||||
|
||||
// Read 4 YUY2 with 8 Y and update 4 UV to 8 UV.
|
||||
#define READYUY2 \
|
||||
"movdqu " MEMACCESS([yuy2_buf]) ",%%xmm4 \n" \
|
||||
@ -1769,6 +1784,31 @@ void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf,
|
||||
);
|
||||
}
|
||||
|
||||
void OMITFP NV21ToARGBRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* vu_buf,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
READNV21
|
||||
YUVTORGB(yuvconstants)
|
||||
STOREARGB
|
||||
"sub $0x8,%[width] \n"
|
||||
"jg 1b \n"
|
||||
: [y_buf]"+r"(y_buf), // %[y_buf]
|
||||
[vu_buf]"+r"(vu_buf), // %[vu_buf]
|
||||
[dst_argb]"+r"(dst_argb), // %[dst_argb]
|
||||
[width]"+rm"(width) // %[width]
|
||||
: [yuvconstants]"r"(yuvconstants), // %[yuvconstants]
|
||||
[kShuffleNV21]"m"(kShuffleNV21)
|
||||
// Does not use r14.
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
|
||||
);
|
||||
}
|
||||
|
||||
void OMITFP YUY2ToARGBRow_SSSE3(const uint8* yuy2_buf,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
@ -1940,6 +1980,17 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf,
|
||||
"vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \
|
||||
"lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n"
|
||||
|
||||
// Read 8 VU from NV21, upsample to 16 UV.
|
||||
#define READNV21_AVX2 \
|
||||
"vmovdqu " MEMACCESS([vu_buf]) ",%%xmm0 \n" \
|
||||
"lea " MEMLEA(0x10, [vu_buf]) ",%[vu_buf] \n" \
|
||||
"vpermq $0xd8,%%ymm0,%%ymm0 \n" \
|
||||
"vpshufb %[kShuffleNV21], %%ymm0, %%ymm0 \n" \
|
||||
"vmovdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \
|
||||
"vpermq $0xd8,%%ymm4,%%ymm4 \n" \
|
||||
"vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \
|
||||
"lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n"
|
||||
|
||||
// Read 8 YUY2 with 16 Y and upsample 8 UV to 16 UV.
|
||||
#define READYUY2_AVX2 \
|
||||
"vmovdqu " MEMACCESS([yuy2_buf]) ",%%ymm4 \n" \
|
||||
@ -2251,8 +2302,37 @@ void OMITFP NV12ToARGBRow_AVX2(const uint8* y_buf,
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
|
||||
);
|
||||
}
|
||||
#endif // HAS_YUY2TOARGBROW_AVX2
|
||||
#endif // HAS_NV12TOARGBROW_AVX2
|
||||
|
||||
#if defined(HAS_NV21TOARGBROW_AVX2)
|
||||
// 16 pixels.
|
||||
// 8 VU values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
|
||||
void OMITFP NV21ToARGBRow_AVX2(const uint8* y_buf,
|
||||
const uint8* vu_buf,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
READNV21_AVX2
|
||||
YUVTORGB_AVX2(yuvconstants)
|
||||
STOREARGB_AVX2
|
||||
"sub $0x10,%[width] \n"
|
||||
"jg 1b \n"
|
||||
"vzeroupper \n"
|
||||
: [y_buf]"+r"(y_buf), // %[y_buf]
|
||||
[vu_buf]"+r"(vu_buf), // %[vu_buf]
|
||||
[dst_argb]"+r"(dst_argb), // %[dst_argb]
|
||||
[width]"+rm"(width) // %[width]
|
||||
: [yuvconstants]"r"(yuvconstants), // %[yuvconstants]
|
||||
[kShuffleNV21]"m"(kShuffleNV21)
|
||||
// Does not use r14.
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
|
||||
);
|
||||
}
|
||||
#endif // HAS_NV21TOARGBROW_AVX2
|
||||
|
||||
#if defined(HAS_YUY2TOARGBROW_AVX2)
|
||||
// 16 pixels.
|
||||
|
||||
@ -579,6 +579,34 @@ void NV12ToARGBRow_NEON(const uint8* src_y,
|
||||
);
|
||||
}
|
||||
|
||||
void NV21ToARGBRow_NEON(const uint8* src_y,
|
||||
const uint8* src_vu,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
YUVTORGB_SETUP
|
||||
"1: \n"
|
||||
READNV21
|
||||
YUVTORGB
|
||||
"subs %3, %3, #8 \n"
|
||||
"vmov.u8 d23, #255 \n"
|
||||
MEMACCESS(2)
|
||||
"vst4.8 {d20, d21, d22, d23}, [%2]! \n"
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_y), // %0
|
||||
"+r"(src_vu), // %1
|
||||
"+r"(dst_argb), // %2
|
||||
"+r"(width) // %3
|
||||
: [kUVToRB]"r"(&yuvconstants->kUVToRB),
|
||||
[kUVToG]"r"(&yuvconstants->kUVToG),
|
||||
[kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
|
||||
[kYToRgb]"r"(&yuvconstants->kYToRgb)
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
|
||||
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
|
||||
);
|
||||
}
|
||||
|
||||
void NV12ToRGB565Row_NEON(const uint8* src_y,
|
||||
const uint8* src_uv,
|
||||
uint8* dst_rgb565,
|
||||
|
||||
@ -576,6 +576,34 @@ void NV12ToARGBRow_NEON(const uint8* src_y,
|
||||
}
|
||||
#endif // HAS_NV12TOARGBROW_NEON
|
||||
|
||||
#ifdef HAS_NV12TOARGBROW_NEON
|
||||
void NV21ToARGBRow_NEON(const uint8* src_y,
|
||||
const uint8* src_vu,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
YUVTORGB_SETUP
|
||||
"1: \n"
|
||||
READNV21
|
||||
YUVTORGB(v22, v21, v20)
|
||||
"subs %w3, %w3, #8 \n"
|
||||
"movi v23.8b, #255 \n"
|
||||
MEMACCESS(2)
|
||||
"st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%2], #32 \n"
|
||||
"b.gt 1b \n"
|
||||
: "+r"(src_y), // %0
|
||||
"+r"(src_vu), // %1
|
||||
"+r"(dst_argb), // %2
|
||||
"+r"(width) // %3
|
||||
: [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
|
||||
[kYToRgb]"r"(&kYuvConstants.kYToRgb)
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
|
||||
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
|
||||
);
|
||||
}
|
||||
#endif // HAS_NV12TOARGBROW_NEON
|
||||
|
||||
#ifdef HAS_NV12TORGB565ROW_NEON
|
||||
void NV12ToRGB565Row_NEON(const uint8* src_y,
|
||||
const uint8* src_uv,
|
||||
|
||||
@ -319,6 +319,12 @@ static const lvec8 kShuffleUYVYUV = {
|
||||
0, 2, 0, 2, 4, 6, 4, 6, 8, 10, 8, 10, 12, 14, 12, 14
|
||||
};
|
||||
|
||||
// NV21 shuf 8 VU to 16 UV.
|
||||
static const lvec8 kShuffleNV21 = {
|
||||
1, 0, 1, 0, 3, 2, 3, 2, 5, 4, 5, 4, 7, 6, 7, 6,
|
||||
1, 0, 1, 0, 3, 2, 3, 2, 5, 4, 5, 4, 7, 6, 7, 6,
|
||||
};
|
||||
|
||||
// Duplicates gray value 3 times and fills in alpha opaque.
|
||||
__declspec(naked)
|
||||
void J400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) {
|
||||
@ -1992,6 +1998,18 @@ void RGBAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
||||
__asm lea eax, [eax + 16] \
|
||||
}
|
||||
|
||||
// Read 8 UV from NV21, upsample to 16 UV.
|
||||
#define READNV21_AVX2 __asm { \
|
||||
__asm vmovdqu xmm0, [esi] /* UV */ \
|
||||
__asm lea esi, [esi + 16] \
|
||||
__asm vpermq ymm0, ymm0, 0xd8 \
|
||||
__asm vpshufb ymm0, ymm0, ymmword ptr kShuffleNV21 \
|
||||
__asm vmovdqu xmm4, [eax] /* Y */ \
|
||||
__asm vpermq ymm4, ymm4, 0xd8 \
|
||||
__asm vpunpcklbw ymm4, ymm4, ymm4 \
|
||||
__asm lea eax, [eax + 16] \
|
||||
}
|
||||
|
||||
// Read 8 YUY2 with 16 Y and upsample 8 UV to 16 UV.
|
||||
#define READYUY2_AVX2 __asm { \
|
||||
__asm vmovdqu ymm4, [eax] /* YUY2 */ \
|
||||
@ -2365,6 +2383,41 @@ void NV12ToARGBRow_AVX2(const uint8* y_buf,
|
||||
}
|
||||
#endif // HAS_NV12TOARGBROW_AVX2
|
||||
|
||||
#ifdef HAS_NV21TOARGBROW_AVX2
|
||||
// 16 pixels.
|
||||
// 8 VU values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
|
||||
__declspec(naked)
|
||||
void NV21ToARGBRow_AVX2(const uint8* y_buf,
|
||||
const uint8* vu_buf,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
__asm {
|
||||
push esi
|
||||
push ebx
|
||||
mov eax, [esp + 8 + 4] // Y
|
||||
mov esi, [esp + 8 + 8] // VU
|
||||
mov edx, [esp + 8 + 12] // argb
|
||||
mov ebx, [esp + 8 + 16] // yuvconstants
|
||||
mov ecx, [esp + 8 + 20] // width
|
||||
vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
|
||||
|
||||
convertloop:
|
||||
READNV21_AVX2
|
||||
YUVTORGB_AVX2(ebx)
|
||||
STOREARGB_AVX2
|
||||
|
||||
sub ecx, 16
|
||||
jg convertloop
|
||||
|
||||
pop ebx
|
||||
pop esi
|
||||
vzeroupper
|
||||
ret
|
||||
}
|
||||
}
|
||||
#endif // HAS_NV21TOARGBROW_AVX2
|
||||
|
||||
// 16 pixels.
|
||||
// 8 YUY2 values with 16 Y and 8 UV producing 16 ARGB (64 bytes).
|
||||
__declspec(naked)
|
||||
@ -2608,6 +2661,16 @@ void I422ToABGRRow_AVX2(const uint8* y_buf,
|
||||
__asm lea eax, [eax + 8] \
|
||||
}
|
||||
|
||||
// Read 4 VU from NV21, upsample to 8 UV.
|
||||
#define READNV21 __asm { \
|
||||
__asm movq xmm0, qword ptr [esi] /* UV */ \
|
||||
__asm lea esi, [esi + 8] \
|
||||
__asm pshufb xmm0, xmmword ptr kShuffleNV21 \
|
||||
__asm movq xmm4, qword ptr [eax] \
|
||||
__asm punpcklbw xmm4, xmm4 \
|
||||
__asm lea eax, [eax + 8] \
|
||||
}
|
||||
|
||||
// Read 4 YUY2 with 8 Y and upsample 4 UV to 8 UV.
|
||||
#define READYUY2 __asm { \
|
||||
__asm movdqu xmm4, [eax] /* YUY2 */ \
|
||||
@ -3152,6 +3215,38 @@ void NV12ToARGBRow_SSSE3(const uint8* y_buf,
|
||||
}
|
||||
}
|
||||
|
||||
// 8 pixels.
|
||||
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
|
||||
__declspec(naked)
|
||||
void NV21ToARGBRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* vu_buf,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
__asm {
|
||||
push esi
|
||||
push ebx
|
||||
mov eax, [esp + 8 + 4] // Y
|
||||
mov esi, [esp + 8 + 8] // VU
|
||||
mov edx, [esp + 8 + 12] // argb
|
||||
mov ebx, [esp + 8 + 16] // yuvconstants
|
||||
mov ecx, [esp + 8 + 20] // width
|
||||
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
|
||||
|
||||
convertloop:
|
||||
READNV21
|
||||
YUVTORGB(ebx)
|
||||
STOREARGB
|
||||
|
||||
sub ecx, 8
|
||||
jg convertloop
|
||||
|
||||
pop ebx
|
||||
pop esi
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
// 8 pixels.
|
||||
// 4 YUY2 values with 8 Y and 4 UV producing 8 ARGB (32 bytes).
|
||||
__declspec(naked)
|
||||
|
||||
@ -9,6 +9,7 @@
|
||||
*/
|
||||
|
||||
#include "libyuv/row.h"
|
||||
#include "libyuv/scale_row.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
@ -608,12 +609,12 @@ void ScaleAddRow_SSE2(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
|
||||
// Reads 32 bytes and accumulates to 32 shorts at a time.
|
||||
void ScaleAddRow_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
|
||||
asm volatile (
|
||||
"vpxor %%xmm5,%%xmm5 \n"
|
||||
"vpxor %%ymm5,%%ymm5,%%ymm5 \n"
|
||||
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"vmovdqu " MEMACCESS(0) ",%%ymm3 \n"
|
||||
"lea " MEMLEA(0x20,0) ",%0 \n" // src_ptr += 16
|
||||
"lea " MEMLEA(0x20,0) ",%0 \n" // src_ptr += 32
|
||||
"vpermq $0xd8,%%ymm3,%%ymm3 \n"
|
||||
"vpunpcklbw %%ymm5,%%ymm3,%%ymm2 \n"
|
||||
"vpunpckhbw %%ymm5,%%ymm3,%%ymm3 \n"
|
||||
|
||||
@ -671,7 +671,6 @@ TEST_F(libyuvTest, FMT_PLANAR##To##FMT_B##N) { \
|
||||
TESTBIPLANARTOB(NV12, 2, 2, ARGB, 4, 2)
|
||||
TESTBIPLANARTOB(NV21, 2, 2, ARGB, 4, 2)
|
||||
TESTBIPLANARTOB(NV12, 2, 2, RGB565, 2, 9)
|
||||
TESTBIPLANARTOB(NV21, 2, 2, RGB565, 2, 9)
|
||||
|
||||
#define TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
|
||||
W1280, DIFF, N, NEG, OFF) \
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user