RGB24Mirror function

Bug: b/151960427
Change-Id: I413db0011a4ed87eefc0dd166bb8e076b5aa4b1d
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/2116639
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
Reviewed-by: richard winterton <rrwinterton@gmail.com>
This commit is contained in:
Frank Barchard 2020-03-24 12:07:52 -07:00 committed by Commit Bot
parent 7f00d67d7c
commit aabcc477bd
16 changed files with 1614 additions and 1529 deletions

View File

@ -313,6 +313,18 @@ int ARGBMirror(const uint8_t* src_argb,
int width,
int height);
// Alias
#define RGB24ToRGB24Mirror RGB24Mirror
// RGB24 mirror.
LIBYUV_API
int RGB24Mirror(const uint8_t* src_rgb24,
int src_stride_rgb24,
uint8_t* dst_rgb24,
int dst_stride_rgb24,
int width,
int height);
// Mirror a plane of data.
LIBYUV_API
void MirrorPlane(const uint8_t* src_y,

View File

@ -83,6 +83,20 @@ int NV12ToI420Rotate(const uint8_t* src_y,
int height,
enum RotationMode mode);
// Rotate NV12 input and store in NV12.
LIBYUV_API
int NV12Rotate(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
int src_stride_uv,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_uv,
int dst_stride_uv,
int width,
int height,
enum RotationMode mode);
// Rotate a plane by 0, 90, 180, or 270.
LIBYUV_API
int RotatePlane(const uint8_t* src,

View File

@ -279,6 +279,7 @@ extern "C" {
#define HAS_I422TOAR30ROW_SSSE3
#define HAS_MERGERGBROW_SSSE3
#define HAS_RAWTORGBAROW_SSSE3
#define HAS_RGB24MIRRORROW_SSSE3
#define HAS_RGBATOYJROW_SSSE3
#define HAS_SPLITRGBROW_SSSE3
#define HAS_SWAPUVROW_SSSE3
@ -408,6 +409,7 @@ extern "C" {
#define HAS_ARGBCOLORMATRIXROW_NEON
#define HAS_ARGBGRAYROW_NEON
#define HAS_ARGBMIRRORROW_NEON
#define HAS_RGB24MIRRORROW_NEON
#define HAS_ARGBMULTIPLYROW_NEON
#define HAS_ARGBQUANTIZEROW_NEON
#define HAS_ARGBSEPIAROW_NEON
@ -1196,7 +1198,9 @@ void BGRAToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ABGRToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RGBAToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RGB24ToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RGB24ToYJRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RGB24ToYJRow_Any_SSSE3(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
void RAWToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RAWToYJRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RGB24ToYJRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
@ -1608,6 +1612,16 @@ void ARGBMirrorRow_Any_NEON(const uint8_t* src_ptr,
void ARGBMirrorRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ARGBMirrorRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RGB24MirrorRow_SSSE3(const uint8_t* src, uint8_t* dst, int width);
void RGB24MirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width);
void RGB24MirrorRow_C(const uint8_t* src, uint8_t* dst, int width);
void RGB24MirrorRow_Any_SSSE3(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
void RGB24MirrorRow_Any_NEON(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
void SplitUVRow_C(const uint8_t* src_uv,
uint8_t* dst_u,
uint8_t* dst_v,

View File

@ -86,10 +86,14 @@ enum FourCC {
FOURCC_YV16 = FOURCC('Y', 'V', '1', '6'),
FOURCC_YV24 = FOURCC('Y', 'V', '2', '4'),
FOURCC_YU12 = FOURCC('Y', 'U', '1', '2'), // Linux version of I420.
FOURCC_J420 = FOURCC('J', '4', '2', '0'), // jpeg (bt.601 full), unofficial fourcc
FOURCC_J422 = FOURCC('J', '4', '2', '2'), // jpeg (bt.601 full), unofficial fourcc
FOURCC_J444 = FOURCC('J', '4', '4', '4'), // jpeg (bt.601 full), unofficial fourcc
FOURCC_J400 = FOURCC('J', '4', '0', '0'), // jpeg (bt.601 full), unofficial fourcc
FOURCC_J420 =
FOURCC('J', '4', '2', '0'), // jpeg (bt.601 full), unofficial fourcc
FOURCC_J422 =
FOURCC('J', '4', '2', '2'), // jpeg (bt.601 full), unofficial fourcc
FOURCC_J444 =
FOURCC('J', '4', '4', '4'), // jpeg (bt.601 full), unofficial fourcc
FOURCC_J400 =
FOURCC('J', '4', '0', '0'), // jpeg (bt.601 full), unofficial fourcc
FOURCC_H420 = FOURCC('H', '4', '2', '0'), // bt.709, unofficial fourcc
FOURCC_H422 = FOURCC('H', '4', '2', '2'), // bt.709, unofficial fourcc
FOURCC_H444 = FOURCC('H', '4', '4', '4'), // bt.709, unofficial fourcc

View File

@ -1599,8 +1599,7 @@ int RGB24ToJ420(const uint8_t* src_rgb24,
int height) {
int y;
#if (defined(HAS_RGB24TOYJROW_NEON) && defined(HAS_RGB24TOUVJROW_NEON)) || \
defined(HAS_RGB24TOYJROW_MSA) || \
defined(HAS_RGB24TOYJROW_MMI)
defined(HAS_RGB24TOYJROW_MSA) || defined(HAS_RGB24TOYJROW_MMI)
void (*RGB24ToUVJRow)(const uint8_t* src_rgb24, int src_stride_rgb24,
uint8_t* dst_u, uint8_t* dst_v, int width) =
RGB24ToUVJRow_C;
@ -1748,8 +1747,7 @@ int RAWToI420(const uint8_t* src_raw,
int height) {
int y;
#if (defined(HAS_RAWTOYROW_NEON) && defined(HAS_RAWTOUVROW_NEON)) || \
defined(HAS_RAWTOYROW_MSA) || \
defined(HAS_RAWTOYROW_MMI)
defined(HAS_RAWTOYROW_MSA) || defined(HAS_RAWTOYROW_MMI)
void (*RAWToUVRow)(const uint8_t* src_raw, int src_stride_raw, uint8_t* dst_u,
uint8_t* dst_v, int width) = RAWToUVRow_C;
void (*RAWToYRow)(const uint8_t* src_raw, uint8_t* dst_y, int width) =

View File

@ -1270,9 +1270,6 @@ static int I210ToARGBMatrix(const uint16_t* src_y,
return 0;
}
// Convert I210 to ARGB.
LIBYUV_API
int I210ToARGB(const uint16_t* src_y,

View File

@ -1132,7 +1132,7 @@ int ARGBMirror(const uint8_t* src_argb,
#if defined(HAS_ARGBMIRRORROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBMirrorRow = ARGBMirrorRow_Any_NEON;
if (IS_ALIGNED(width, 4)) {
if (IS_ALIGNED(width, 16)) {
ARGBMirrorRow = ARGBMirrorRow_NEON;
}
}
@ -1179,6 +1179,52 @@ int ARGBMirror(const uint8_t* src_argb,
return 0;
}
// RGB24 mirror.
LIBYUV_API
int RGB24Mirror(const uint8_t* src_rgb24,
int src_stride_rgb24,
uint8_t* dst_rgb24,
int dst_stride_rgb24,
int width,
int height) {
int y;
void (*RGB24MirrorRow)(const uint8_t* src, uint8_t* dst, int width) =
RGB24MirrorRow_C;
if (!src_rgb24 || !dst_rgb24 || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24;
src_stride_rgb24 = -src_stride_rgb24;
}
#if defined(HAS_RGB24MIRRORROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
RGB24MirrorRow = RGB24MirrorRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
RGB24MirrorRow = RGB24MirrorRow_NEON;
}
}
#endif
#if defined(HAS_RGB24MIRRORROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
RGB24MirrorRow = RGB24MirrorRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
RGB24MirrorRow = RGB24MirrorRow_SSSE3;
}
}
#endif
// Mirror plane
for (y = 0; y < height; ++y) {
RGB24MirrorRow(src_rgb24, dst_rgb24, width);
src_rgb24 += src_stride_rgb24;
dst_rgb24 += dst_stride_rgb24;
}
return 0;
}
// Get a blender that optimized for the CPU and pixel count.
// As there are 6 blenders to choose from, the caller should try to use
// the same blend function for all pixels if possible.

View File

@ -126,7 +126,7 @@ static int ARGBRotate180(const uint8_t* src_argb,
#if defined(HAS_ARGBMIRRORROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBMirrorRow = ARGBMirrorRow_Any_NEON;
if (IS_ALIGNED(width, 4)) {
if (IS_ALIGNED(width, 16)) {
ARGBMirrorRow = ARGBMirrorRow_NEON;
}
}

View File

@ -1189,7 +1189,7 @@ ANY11M(ARGBMirrorRow_Any_AVX2, ARGBMirrorRow_AVX2, 4, 7)
ANY11M(ARGBMirrorRow_Any_SSE2, ARGBMirrorRow_SSE2, 4, 3)
#endif
#ifdef HAS_ARGBMIRRORROW_NEON
ANY11M(ARGBMirrorRow_Any_NEON, ARGBMirrorRow_NEON, 4, 3)
ANY11M(ARGBMirrorRow_Any_NEON, ARGBMirrorRow_NEON, 4, 15)
#endif
#ifdef HAS_ARGBMIRRORROW_MSA
ANY11M(ARGBMirrorRow_Any_MSA, ARGBMirrorRow_MSA, 4, 15)
@ -1197,6 +1197,12 @@ ANY11M(ARGBMirrorRow_Any_MSA, ARGBMirrorRow_MSA, 4, 15)
#ifdef HAS_ARGBMIRRORROW_MMI
ANY11M(ARGBMirrorRow_Any_MMI, ARGBMirrorRow_MMI, 4, 1)
#endif
#ifdef HAS_RGB24MIRRORROW_SSSE3
ANY11M(RGB24MirrorRow_Any_SSSE3, RGB24MirrorRow_SSSE3, 3, 15)
#endif
#ifdef HAS_RGB24MIRRORROW_NEON
ANY11M(RGB24MirrorRow_Any_NEON, RGB24MirrorRow_NEON, 3, 15)
#endif
#undef ANY11M
// Any 1 plane. (memset)

View File

@ -2201,6 +2201,22 @@ void ARGBMirrorRow_C(const uint8_t* src, uint8_t* dst, int width) {
}
}
void RGB24MirrorRow_C(const uint8_t* src_rgb24, uint8_t* dst_rgb24,
int width) {
int x;
src_rgb24 += width * 3 - 3;
for (x = 0; x < width; ++x) {
uint8_t b = src_rgb24[0];
uint8_t g = src_rgb24[1];
uint8_t r = src_rgb24[2];
dst_rgb24[0] = b;
dst_rgb24[1] = g;
dst_rgb24[2] = r;
src_rgb24 -= 3;
dst_rgb24 += 3;
}
}
void SplitUVRow_C(const uint8_t* src_uv,
uint8_t* dst_u,
uint8_t* dst_v,
@ -3427,8 +3443,8 @@ void GaussCol_C(const uint16_t* src0,
void GaussRow_F32_C(const float* src, float* dst, int width) {
int i;
for (i = 0; i < width; ++i) {
*dst++ =
(src[0] + src[1] * 4 + src[2] * 6 + src[3] * 4 + src[4]) * (1.0f / 256.0f);
*dst++ = (src[0] + src[1] * 4 + src[2] * 6 + src[3] * 4 + src[4]) *
(1.0f / 256.0f);
++src;
}
}

View File

@ -3262,6 +3262,52 @@ void MirrorUVRow_SSSE3(const uint8_t* src,
}
#endif // HAS_MIRRORUVROW_SSSE3
#ifdef HAS_RGB24MIRRORROW_SSSE3
// Shuffle first 5 pixels to last 5 mirrored. first byte zero
static const uvec8 kShuffleMirrorRGB0 = {128u, 12u, 13u, 14u, 9u, 10u, 11u, 6u,
7u, 8u, 3u, 4u, 5u, 0u, 1u, 2u};
// Shuffle last 5 pixels to first 5 mirrored. last byte zero
static const uvec8 kShuffleMirrorRGB1 = {13u, 14u, 15u, 10u, 11u, 12u, 7u,
8u, 9u, 4u, 5u, 6u, 1u, 2u, 3u, 128u};
// Shuffle 5 pixels at a time (15 bytes)
void RGB24MirrorRow_SSSE3(const uint8_t* src_rgb24, uint8_t* dst_rgb24,
int width) {
intptr_t temp_width = (intptr_t)(width);
src_rgb24 += width * 3 - 48;
asm volatile(
"movdqa %3,%%xmm4 \n"
"movdqa %4,%%xmm5 \n"
LABELALIGN
"1: \n"
"movdqu (%0),%%xmm0 \n" // first 5
"movdqu 15(%0),%%xmm1 \n" // next 5
"movdqu 30(%0),%%xmm2 \n" // next 5
"movdqu 32(%0),%%xmm3 \n" // last 1 special
"pshufb %%xmm4,%%xmm0 \n"
"pshufb %%xmm4,%%xmm1 \n"
"pshufb %%xmm4,%%xmm2 \n"
"pshufb %%xmm5,%%xmm3 \n"
"lea -0x30(%0),%0 \n"
"movdqu %%xmm0,32(%1) \n" // last 5
"movdqu %%xmm1,17(%1) \n" // next 5
"movdqu %%xmm2,2(%1) \n" // next 5
"movlpd %%xmm3,0(%1) \n" // first 1
"lea 0x30(%1),%1 \n"
"sub $0x10,%2 \n"
"jg 1b \n"
: "+r"(src_rgb24), // %0
"+r"(dst_rgb24), // %1
"+r"(temp_width) // %2
: "m"(kShuffleMirrorRGB0), // %3
"m"(kShuffleMirrorRGB1) // %4
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
}
#endif // HAS_RGB24MIRRORROW_SSSE3
#ifdef HAS_ARGBMIRRORROW_SSE2
void ARGBMirrorRow_SSE2(const uint8_t* src, uint8_t* dst, int width) {

View File

@ -6048,7 +6048,9 @@ void I444ToARGBRow_MMI(const uint8_t* src_y,
"ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t" // yg
"ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t" // bb
"ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t" // ub
"or %[ub], %[ub], %[mask] \n\t"//must sign extension
"or %[ub], %[ub], %[mask] \n\t" // must
// sign
// extension
"ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t" // bg
"ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t" // ug
"punpcklbh %[ug], %[ug], %[zero] \n\t"
@ -6060,7 +6062,8 @@ void I444ToARGBRow_MMI(const uint8_t* src_y,
"ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t" // vr
"punpcklbh %[vr], %[vr], %[zero] \n\t"
"pshufh %[vr], %[vr], %[five] \n\t"
"or %[vr], %[vr], %[mask] \n\t"//sign extension
"or %[vr], %[vr], %[mask] \n\t" // sign
// extension
"1: \n\t"
"gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
@ -6110,23 +6113,17 @@ void I444ToARGBRow_MMI(const uint8_t* src_y,
"daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t"
"daddi %[width], %[width], -0x04 \n\t"
"bnez %[width], 1b \n\t"
: [y]"=&f"(y),
[u]"=&f"(u), [v]"=&f"(v),
[b_vec0]"=&f"(b_vec[0]), [b_vec1]"=&f"(b_vec[1]),
[g_vec0]"=&f"(g_vec[0]), [g_vec1]"=&f"(g_vec[1]),
[r_vec0]"=&f"(r_vec[0]), [r_vec1]"=&f"(r_vec[1]),
[ub]"=&f"(ub), [ug]"=&f"(ug),
[vg]"=&f"(vg), [vr]"=&f"(vr),
[bb]"=&f"(bb), [bg]"=&f"(bg),
: [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec0] "=&f"(b_vec[0]),
[b_vec1] "=&f"(b_vec[1]), [g_vec0] "=&f"(g_vec[0]),
[g_vec1] "=&f"(g_vec[1]), [r_vec0] "=&f"(r_vec[0]),
[r_vec1] "=&f"(r_vec[1]), [ub] "=&f"(ub), [ug] "=&f"(ug),
[vg] "=&f"(vg), [vr] "=&f"(vr), [bb] "=&f"(bb), [bg] "=&f"(bg),
[br] "=&f"(br), [yg] "=&f"(yg)
: [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
[v_ptr]"r"(src_v), [rgbbuf_ptr]"r"(rgb_buf),
[yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
[zero]"f"(0x00), [alpha]"f"(-1),
[six]"f"(0x6), [five]"f"(0x55),
[mask]"f"(mask)
: "memory"
);
: [y_ptr] "r"(src_y), [u_ptr] "r"(src_u), [v_ptr] "r"(src_v),
[rgbbuf_ptr] "r"(rgb_buf), [yuvcons_ptr] "r"(yuvconstants),
[width] "r"(width), [zero] "f"(0x00), [alpha] "f"(-1), [six] "f"(0x6),
[five] "f"(0x55), [mask] "f"(mask)
: "memory");
}
// Also used for 420
@ -6145,7 +6142,9 @@ void I422ToARGBRow_MMI(const uint8_t* src_y,
"ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t" // yg
"ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t" // bb
"ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t" // ub
"or %[ub], %[ub], %[mask] \n\t"//must sign extension
"or %[ub], %[ub], %[mask] \n\t" // must
// sign
// extension
"ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t" // bg
"ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t" // ug
"punpcklbh %[ug], %[ug], %[zero] \n\t"
@ -6157,7 +6156,8 @@ void I422ToARGBRow_MMI(const uint8_t* src_y,
"ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t" // vr
"punpcklbh %[vr], %[vr], %[zero] \n\t"
"pshufh %[vr], %[vr], %[five] \n\t"
"or %[vr], %[vr], %[mask] \n\t"//sign extension
"or %[vr], %[vr], %[mask] \n\t" // sign
// extension
"1: \n\t"
"gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
@ -6212,23 +6212,17 @@ void I422ToARGBRow_MMI(const uint8_t* src_y,
"daddi %[width], %[width], -0x04 \n\t"
"bnez %[width], 1b \n\t"
: [y]"=&f"(y),
[u]"=&f"(u), [v]"=&f"(v),
[b_vec0]"=&f"(b_vec[0]), [b_vec1]"=&f"(b_vec[1]),
[g_vec0]"=&f"(g_vec[0]), [g_vec1]"=&f"(g_vec[1]),
[r_vec0]"=&f"(r_vec[0]), [r_vec1]"=&f"(r_vec[1]),
[ub]"=&f"(ub), [ug]"=&f"(ug),
[vg]"=&f"(vg), [vr]"=&f"(vr),
[bb]"=&f"(bb), [bg]"=&f"(bg),
: [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec0] "=&f"(b_vec[0]),
[b_vec1] "=&f"(b_vec[1]), [g_vec0] "=&f"(g_vec[0]),
[g_vec1] "=&f"(g_vec[1]), [r_vec0] "=&f"(r_vec[0]),
[r_vec1] "=&f"(r_vec[1]), [ub] "=&f"(ub), [ug] "=&f"(ug),
[vg] "=&f"(vg), [vr] "=&f"(vr), [bb] "=&f"(bb), [bg] "=&f"(bg),
[br] "=&f"(br), [yg] "=&f"(yg)
: [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
[v_ptr]"r"(src_v), [rgbbuf_ptr]"r"(rgb_buf),
[yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
[zero]"f"(0x00), [alpha]"f"(-1),
[six]"f"(0x6), [five]"f"(0x55),
[mask]"f"(mask)
: "memory"
);
: [y_ptr] "r"(src_y), [u_ptr] "r"(src_u), [v_ptr] "r"(src_v),
[rgbbuf_ptr] "r"(rgb_buf), [yuvcons_ptr] "r"(yuvconstants),
[width] "r"(width), [zero] "f"(0x00), [alpha] "f"(-1), [six] "f"(0x6),
[five] "f"(0x55), [mask] "f"(mask)
: "memory");
}
// 10 bit YUV to ARGB
@ -6316,24 +6310,18 @@ void I210ToARGBRow_MMI(const uint16_t* src_y,
"daddi %[width], %[width], -0x04 \n\t"
"bnez %[width], 1b \n\t"
: [y]"=&f"(y),
[u]"=&f"(u), [v]"=&f"(v),
[b_vec0]"=&f"(b_vec[0]), [b_vec1]"=&f"(b_vec[1]),
[g_vec0]"=&f"(g_vec[0]), [g_vec1]"=&f"(g_vec[1]),
[r_vec0]"=&f"(r_vec[0]), [r_vec1]"=&f"(r_vec[1]),
[ub]"=&f"(ub), [ug]"=&f"(ug),
[vg]"=&f"(vg), [vr]"=&f"(vr),
[bb]"=&f"(bb), [bg]"=&f"(bg),
: [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec0] "=&f"(b_vec[0]),
[b_vec1] "=&f"(b_vec[1]), [g_vec0] "=&f"(g_vec[0]),
[g_vec1] "=&f"(g_vec[1]), [r_vec0] "=&f"(r_vec[0]),
[r_vec1] "=&f"(r_vec[1]), [ub] "=&f"(ub), [ug] "=&f"(ug),
[vg] "=&f"(vg), [vr] "=&f"(vr), [bb] "=&f"(bb), [bg] "=&f"(bg),
[br] "=&f"(br), [yg] "=&f"(yg)
: [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
[v_ptr]"r"(src_v), [rgbbuf_ptr]"r"(rgb_buf),
[yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
[zero]"f"(0x00), [alpha]"f"(-1),
[six]"f"(0x6), [five]"f"(0x55),
[mask]"f"(mask), [two]"f"(0x02),
: [y_ptr] "r"(src_y), [u_ptr] "r"(src_u), [v_ptr] "r"(src_v),
[rgbbuf_ptr] "r"(rgb_buf), [yuvcons_ptr] "r"(yuvconstants),
[width] "r"(width), [zero] "f"(0x00), [alpha] "f"(-1), [six] "f"(0x6),
[five] "f"(0x55), [mask] "f"(mask), [two] "f"(0x02),
[mask1] "f"(0x00ff00ff00ff00ff)
: "memory"
);
: "memory");
}
void I422AlphaToARGBRow_MMI(const uint8_t* src_y,
@ -6422,23 +6410,17 @@ void I422AlphaToARGBRow_MMI(const uint8_t* src_y,
"daddi %[width], %[width], -0x04 \n\t"
"bnez %[width], 1b \n\t"
: [y]"=&f"(y), [u]"=&f"(u),
[v]"=&f"(v), [a]"=&f"(a),
: [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [a] "=&f"(a),
[b_vec0] "=&f"(b_vec[0]), [b_vec1] "=&f"(b_vec[1]),
[g_vec0] "=&f"(g_vec[0]), [g_vec1] "=&f"(g_vec[1]),
[r_vec0]"=&f"(r_vec[0]), [r_vec1]"=&f"(r_vec[1]),
[ub]"=&f"(ub), [ug]"=&f"(ug),
[vg]"=&f"(vg), [vr]"=&f"(vr),
[bb]"=&f"(bb), [bg]"=&f"(bg),
[br]"=&f"(br), [yg]"=&f"(yg)
: [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
[v_ptr]"r"(src_v), [rgbbuf_ptr]"r"(rgb_buf),
[yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
[a_ptr]"r"(src_a), [zero]"f"(0x00),
[six]"f"(0x6), [five]"f"(0x55),
[mask]"f"(mask)
: "memory"
);
[r_vec0] "=&f"(r_vec[0]), [r_vec1] "=&f"(r_vec[1]), [ub] "=&f"(ub),
[ug] "=&f"(ug), [vg] "=&f"(vg), [vr] "=&f"(vr), [bb] "=&f"(bb),
[bg] "=&f"(bg), [br] "=&f"(br), [yg] "=&f"(yg)
: [y_ptr] "r"(src_y), [u_ptr] "r"(src_u), [v_ptr] "r"(src_v),
[rgbbuf_ptr] "r"(rgb_buf), [yuvcons_ptr] "r"(yuvconstants),
[width] "r"(width), [a_ptr] "r"(src_a), [zero] "f"(0x00),
[six] "f"(0x6), [five] "f"(0x55), [mask] "f"(mask)
: "memory");
}
void I422ToRGB24Row_MMI(const uint8_t* src_y,
@ -6528,7 +6510,6 @@ void I422ToRGB24Row_MMI(const uint8_t* src_y,
"gsswlc1 %[g_vec1], 0x0b(%[rgbbuf_ptr]) \n\t"
"gsswrc1 %[g_vec1], 0x08(%[rgbbuf_ptr]) \n\t"
"daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
"daddiu %[u_ptr], %[u_ptr], 0x02 \n\t"
"daddiu %[v_ptr], %[v_ptr], 0x02 \n\t"
@ -6536,24 +6517,17 @@ void I422ToRGB24Row_MMI(const uint8_t* src_y,
"daddi %[width], %[width], -0x04 \n\t"
"bnez %[width], 1b \n\t"
: [y]"=&f"(y), [u]"=&f"(u),
[v]"=&f"(v),
[b_vec0]"=&f"(b_vec[0]), [b_vec1]"=&f"(b_vec[1]),
[g_vec0]"=&f"(g_vec[0]), [g_vec1]"=&f"(g_vec[1]),
[r_vec0]"=&f"(r_vec[0]), [r_vec1]"=&f"(r_vec[1]),
[ub]"=&f"(ub), [ug]"=&f"(ug),
[vg]"=&f"(vg), [vr]"=&f"(vr),
[bb]"=&f"(bb), [bg]"=&f"(bg),
: [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec0] "=&f"(b_vec[0]),
[b_vec1] "=&f"(b_vec[1]), [g_vec0] "=&f"(g_vec[0]),
[g_vec1] "=&f"(g_vec[1]), [r_vec0] "=&f"(r_vec[0]),
[r_vec1] "=&f"(r_vec[1]), [ub] "=&f"(ub), [ug] "=&f"(ug),
[vg] "=&f"(vg), [vr] "=&f"(vr), [bb] "=&f"(bb), [bg] "=&f"(bg),
[br] "=&f"(br), [yg] "=&f"(yg)
: [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
[v_ptr]"r"(src_v), [rgbbuf_ptr]"r"(rgb_buf),
[yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
[zero]"f"(0x00), [five]"f"(0x55),
[six]"f"(0x6), [mask]"f"(mask),
[lmove1]"f"(0x18), [rmove1]"f"(0x8),
[one]"f"(0x1)
: "memory"
);
: [y_ptr] "r"(src_y), [u_ptr] "r"(src_u), [v_ptr] "r"(src_v),
[rgbbuf_ptr] "r"(rgb_buf), [yuvcons_ptr] "r"(yuvconstants),
[width] "r"(width), [zero] "f"(0x00), [five] "f"(0x55), [six] "f"(0x6),
[mask] "f"(mask), [lmove1] "f"(0x18), [rmove1] "f"(0x8), [one] "f"(0x1)
: "memory");
}
void I422ToARGB4444Row_MMI(const uint8_t* src_y,
@ -6651,23 +6625,16 @@ void I422ToARGB4444Row_MMI(const uint8_t* src_y,
"daddi %[width], %[width], -0x04 \n\t"
"bnez %[width], 1b \n\t"
: [y]"=&f"(y), [u]"=&f"(u),
[v]"=&f"(v),
[b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
[r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
[ub]"=&f"(ub), [ug]"=&f"(ug),
[vg]"=&f"(vg), [vr]"=&f"(vr),
[bb]"=&f"(bb), [bg]"=&f"(bg),
[br]"=&f"(br), [yg]"=&f"(yg)
: [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
[v_ptr]"r"(src_v), [dst_argb4444]"r"(dst_argb4444),
[yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
[zero]"f"(0x00), [five]"f"(0x55),
[six]"f"(0x6), [mask]"f"(0xff00ff00ff00ff00),
[four]"f"(0x4), [mask1]"f"(0xf0f0f0f0f0f0f0f0),
[alpha]"f"(-1)
: "memory"
);
: [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec] "=&f"(b_vec),
[g_vec] "=&f"(g_vec), [r_vec] "=&f"(r_vec), [temp] "=&f"(temp),
[ub] "=&f"(ub), [ug] "=&f"(ug), [vg] "=&f"(vg), [vr] "=&f"(vr),
[bb] "=&f"(bb), [bg] "=&f"(bg), [br] "=&f"(br), [yg] "=&f"(yg)
: [y_ptr] "r"(src_y), [u_ptr] "r"(src_u), [v_ptr] "r"(src_v),
[dst_argb4444] "r"(dst_argb4444), [yuvcons_ptr] "r"(yuvconstants),
[width] "r"(width), [zero] "f"(0x00), [five] "f"(0x55), [six] "f"(0x6),
[mask] "f"(0xff00ff00ff00ff00), [four] "f"(0x4),
[mask1] "f"(0xf0f0f0f0f0f0f0f0), [alpha] "f"(-1)
: "memory");
}
void I422ToARGB1555Row_MMI(const uint8_t* src_y,
@ -6779,24 +6746,17 @@ void I422ToARGB1555Row_MMI(const uint8_t* src_y,
"daddi %[width], %[width], -0x04 \n\t"
"bnez %[width], 1b \n\t"
: [y]"=&f"(y), [u]"=&f"(u),
[v]"=&f"(v),
[b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
[r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
[ub]"=&f"(ub), [ug]"=&f"(ug),
[vg]"=&f"(vg), [vr]"=&f"(vr),
[bb]"=&f"(bb), [bg]"=&f"(bg),
[br]"=&f"(br), [yg]"=&f"(yg)
: [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
[v_ptr]"r"(src_v), [dst_argb1555]"r"(dst_argb1555),
[yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
[zero]"f"(0x00), [five]"f"(0x55),
[six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
[three]"f"(0x3), [mask2]"f"(0x1f0000001f),
[eight]"f"(0x8), [mask3]"f"(0x800000008000),
[lmove5]"f"(0x5)
: "memory"
);
: [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec] "=&f"(b_vec),
[g_vec] "=&f"(g_vec), [r_vec] "=&f"(r_vec), [temp] "=&f"(temp),
[ub] "=&f"(ub), [ug] "=&f"(ug), [vg] "=&f"(vg), [vr] "=&f"(vr),
[bb] "=&f"(bb), [bg] "=&f"(bg), [br] "=&f"(br), [yg] "=&f"(yg)
: [y_ptr] "r"(src_y), [u_ptr] "r"(src_u), [v_ptr] "r"(src_v),
[dst_argb1555] "r"(dst_argb1555), [yuvcons_ptr] "r"(yuvconstants),
[width] "r"(width), [zero] "f"(0x00), [five] "f"(0x55), [six] "f"(0x6),
[mask1] "f"(0xff00ff00ff00ff00), [three] "f"(0x3),
[mask2] "f"(0x1f0000001f), [eight] "f"(0x8),
[mask3] "f"(0x800000008000), [lmove5] "f"(0x5)
: "memory");
}
void I422ToRGB565Row_MMI(const uint8_t* src_y,
@ -6910,24 +6870,17 @@ void I422ToRGB565Row_MMI(const uint8_t* src_y,
"daddi %[width], %[width], -0x04 \n\t"
"bnez %[width], 1b \n\t"
: [y]"=&f"(y), [u]"=&f"(u),
[v]"=&f"(v),
[b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
[r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
[ub]"=&f"(ub), [ug]"=&f"(ug),
[vg]"=&f"(vg), [vr]"=&f"(vr),
[bb]"=&f"(bb), [bg]"=&f"(bg),
[br]"=&f"(br), [yg]"=&f"(yg)
: [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
[v_ptr]"r"(src_v), [dst_rgb565]"r"(dst_rgb565),
[yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
[zero]"f"(0x00), [five]"f"(0x55),
[six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
[three]"f"(0x3), [mask2]"f"(0x1f0000001f),
[eight]"f"(0x8), [seven]"f"(0x7),
: [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec] "=&f"(b_vec),
[g_vec] "=&f"(g_vec), [r_vec] "=&f"(r_vec), [temp] "=&f"(temp),
[ub] "=&f"(ub), [ug] "=&f"(ug), [vg] "=&f"(vg), [vr] "=&f"(vr),
[bb] "=&f"(bb), [bg] "=&f"(bg), [br] "=&f"(br), [yg] "=&f"(yg)
: [y_ptr] "r"(src_y), [u_ptr] "r"(src_u), [v_ptr] "r"(src_v),
[dst_rgb565] "r"(dst_rgb565), [yuvcons_ptr] "r"(yuvconstants),
[width] "r"(width), [zero] "f"(0x00), [five] "f"(0x55), [six] "f"(0x6),
[mask1] "f"(0xff00ff00ff00ff00), [three] "f"(0x3),
[mask2] "f"(0x1f0000001f), [eight] "f"(0x8), [seven] "f"(0x7),
[lmove5] "f"(0x5)
: "memory"
);
: "memory");
}
void NV12ToARGBRow_MMI(const uint8_t* src_y,
@ -7005,23 +6958,15 @@ void NV12ToARGBRow_MMI(const uint8_t* src_y,
"daddi %[width], %[width], -0x04 \n\t"
"bnez %[width], 1b \n\t"
: [y]"=&f"(y), [u]"=&f"(u),
[v]"=&f"(v),
[b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
[r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
[ub]"=&f"(ub), [ug]"=&f"(ug),
[vg]"=&f"(vg), [vr]"=&f"(vr),
[bb]"=&f"(bb), [bg]"=&f"(bg),
[br]"=&f"(br), [yg]"=&f"(yg)
: [y_ptr]"r"(src_y), [uv_ptr]"r"(src_uv),
[rgbbuf_ptr]"r"(rgb_buf),
[yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
[zero]"f"(0x00), [five]"f"(0x55),
[six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
[ushu]"f"(0xA0), [vshu]"f"(0xf5),
[alpha]"f"(-1)
: "memory"
);
: [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec] "=&f"(b_vec),
[g_vec] "=&f"(g_vec), [r_vec] "=&f"(r_vec), [temp] "=&f"(temp),
[ub] "=&f"(ub), [ug] "=&f"(ug), [vg] "=&f"(vg), [vr] "=&f"(vr),
[bb] "=&f"(bb), [bg] "=&f"(bg), [br] "=&f"(br), [yg] "=&f"(yg)
: [y_ptr] "r"(src_y), [uv_ptr] "r"(src_uv), [rgbbuf_ptr] "r"(rgb_buf),
[yuvcons_ptr] "r"(yuvconstants), [width] "r"(width), [zero] "f"(0x00),
[five] "f"(0x55), [six] "f"(0x6), [mask1] "f"(0xff00ff00ff00ff00),
[ushu] "f"(0xA0), [vshu] "f"(0xf5), [alpha] "f"(-1)
: "memory");
}
void NV21ToARGBRow_MMI(const uint8_t* src_y,
@ -7099,23 +7044,15 @@ void NV21ToARGBRow_MMI(const uint8_t* src_y,
"daddi %[width], %[width], -0x04 \n\t"
"bnez %[width], 1b \n\t"
: [y]"=&f"(y), [u]"=&f"(u),
[v]"=&f"(v),
[b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
[r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
[ub]"=&f"(ub), [ug]"=&f"(ug),
[vg]"=&f"(vg), [vr]"=&f"(vr),
[bb]"=&f"(bb), [bg]"=&f"(bg),
[br]"=&f"(br), [yg]"=&f"(yg)
: [y_ptr]"r"(src_y), [vu_ptr]"r"(src_vu),
[rgbbuf_ptr]"r"(rgb_buf),
[yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
[zero]"f"(0x00), [five]"f"(0x55),
[six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
[ushu]"f"(0xA0), [vshu]"f"(0xf5),
[alpha]"f"(-1)
: "memory"
);
: [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec] "=&f"(b_vec),
[g_vec] "=&f"(g_vec), [r_vec] "=&f"(r_vec), [temp] "=&f"(temp),
[ub] "=&f"(ub), [ug] "=&f"(ug), [vg] "=&f"(vg), [vr] "=&f"(vr),
[bb] "=&f"(bb), [bg] "=&f"(bg), [br] "=&f"(br), [yg] "=&f"(yg)
: [y_ptr] "r"(src_y), [vu_ptr] "r"(src_vu), [rgbbuf_ptr] "r"(rgb_buf),
[yuvcons_ptr] "r"(yuvconstants), [width] "r"(width), [zero] "f"(0x00),
[five] "f"(0x55), [six] "f"(0x6), [mask1] "f"(0xff00ff00ff00ff00),
[ushu] "f"(0xA0), [vshu] "f"(0xf5), [alpha] "f"(-1)
: "memory");
}
void NV12ToRGB24Row_MMI(const uint8_t* src_y,
@ -7204,24 +7141,16 @@ void NV12ToRGB24Row_MMI(const uint8_t* src_y,
"daddi %[width], %[width], -0x04 \n\t"
"bnez %[width], 1b \n\t"
: [y]"=&f"(y), [u]"=&f"(u),
[v]"=&f"(v),
[b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
[r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
[ub]"=&f"(ub), [ug]"=&f"(ug),
[vg]"=&f"(vg), [vr]"=&f"(vr),
[bb]"=&f"(bb), [bg]"=&f"(bg),
[br]"=&f"(br), [yg]"=&f"(yg)
: [y_ptr]"r"(src_y), [uv_ptr]"r"(src_uv),
[rgbbuf_ptr]"r"(rgb_buf),
[yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
[zero]"f"(0x00), [five]"f"(0x55),
[six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
[ushu]"f"(0xA0), [vshu]"f"(0xf5),
[alpha]"f"(-1), [lmove1]"f"(0x18),
: [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec] "=&f"(b_vec),
[g_vec] "=&f"(g_vec), [r_vec] "=&f"(r_vec), [temp] "=&f"(temp),
[ub] "=&f"(ub), [ug] "=&f"(ug), [vg] "=&f"(vg), [vr] "=&f"(vr),
[bb] "=&f"(bb), [bg] "=&f"(bg), [br] "=&f"(br), [yg] "=&f"(yg)
: [y_ptr] "r"(src_y), [uv_ptr] "r"(src_uv), [rgbbuf_ptr] "r"(rgb_buf),
[yuvcons_ptr] "r"(yuvconstants), [width] "r"(width), [zero] "f"(0x00),
[five] "f"(0x55), [six] "f"(0x6), [mask1] "f"(0xff00ff00ff00ff00),
[ushu] "f"(0xA0), [vshu] "f"(0xf5), [alpha] "f"(-1), [lmove1] "f"(0x18),
[one] "f"(0x1), [rmove1] "f"(0x8)
: "memory"
);
: "memory");
}
void NV21ToRGB24Row_MMI(const uint8_t* src_y,
@ -7310,24 +7239,16 @@ void NV21ToRGB24Row_MMI(const uint8_t* src_y,
"daddi %[width], %[width], -0x04 \n\t"
"bnez %[width], 1b \n\t"
: [y]"=&f"(y), [u]"=&f"(u),
[v]"=&f"(v),
[b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
[r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
[ub]"=&f"(ub), [ug]"=&f"(ug),
[vg]"=&f"(vg), [vr]"=&f"(vr),
[bb]"=&f"(bb), [bg]"=&f"(bg),
[br]"=&f"(br), [yg]"=&f"(yg)
: [y_ptr]"r"(src_y), [vu_ptr]"r"(src_vu),
[rgbbuf_ptr]"r"(rgb_buf),
[yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
[zero]"f"(0x00), [five]"f"(0x55),
[six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
[ushu]"f"(0xA0), [vshu]"f"(0xf5),
[lmove1]"f"(0x18), [rmove1]"f"(0x8),
[one]"f"(0x1)
: "memory"
);
: [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec] "=&f"(b_vec),
[g_vec] "=&f"(g_vec), [r_vec] "=&f"(r_vec), [temp] "=&f"(temp),
[ub] "=&f"(ub), [ug] "=&f"(ug), [vg] "=&f"(vg), [vr] "=&f"(vr),
[bb] "=&f"(bb), [bg] "=&f"(bg), [br] "=&f"(br), [yg] "=&f"(yg)
: [y_ptr] "r"(src_y), [vu_ptr] "r"(src_vu), [rgbbuf_ptr] "r"(rgb_buf),
[yuvcons_ptr] "r"(yuvconstants), [width] "r"(width), [zero] "f"(0x00),
[five] "f"(0x55), [six] "f"(0x6), [mask1] "f"(0xff00ff00ff00ff00),
[ushu] "f"(0xA0), [vshu] "f"(0xf5), [lmove1] "f"(0x18),
[rmove1] "f"(0x8), [one] "f"(0x1)
: "memory");
}
void NV12ToRGB565Row_MMI(const uint8_t* src_y,
@ -7436,24 +7357,16 @@ void NV12ToRGB565Row_MMI(const uint8_t* src_y,
"daddi %[width], %[width], -0x04 \n\t"
"bnez %[width], 1b \n\t"
: [y]"=&f"(y), [u]"=&f"(u),
[v]"=&f"(v),
[b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
[r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
[ub]"=&f"(ub), [ug]"=&f"(ug),
[vg]"=&f"(vg), [vr]"=&f"(vr),
[bb]"=&f"(bb), [bg]"=&f"(bg),
[br]"=&f"(br), [yg]"=&f"(yg)
: [y_ptr]"r"(src_y), [uv_ptr]"r"(src_uv),
[dst_rgb565]"r"(dst_rgb565),
[yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
[zero]"f"(0x00), [five]"f"(0x55),
[six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
[ushu]"f"(0xA0), [vshu]"f"(0xf5),
[three]"f"(0x3), [mask2]"f"(0x1f0000001f),
[eight]"f"(0x8), [seven]"f"(0x7)
: "memory"
);
: [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec] "=&f"(b_vec),
[g_vec] "=&f"(g_vec), [r_vec] "=&f"(r_vec), [temp] "=&f"(temp),
[ub] "=&f"(ub), [ug] "=&f"(ug), [vg] "=&f"(vg), [vr] "=&f"(vr),
[bb] "=&f"(bb), [bg] "=&f"(bg), [br] "=&f"(br), [yg] "=&f"(yg)
: [y_ptr] "r"(src_y), [uv_ptr] "r"(src_uv), [dst_rgb565] "r"(dst_rgb565),
[yuvcons_ptr] "r"(yuvconstants), [width] "r"(width), [zero] "f"(0x00),
[five] "f"(0x55), [six] "f"(0x6), [mask1] "f"(0xff00ff00ff00ff00),
[ushu] "f"(0xA0), [vshu] "f"(0xf5), [three] "f"(0x3),
[mask2] "f"(0x1f0000001f), [eight] "f"(0x8), [seven] "f"(0x7)
: "memory");
}
void YUY2ToARGBRow_MMI(const uint8_t* src_yuy2,
@ -7530,22 +7443,15 @@ void YUY2ToARGBRow_MMI(const uint8_t* src_yuy2,
"daddi %[width], %[width], -0x04 \n\t"
"bnez %[width], 1b \n\t"
: [y]"=&f"(y), [u]"=&f"(u),
[v]"=&f"(v),
[b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
[r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
[ub]"=&f"(ub), [ug]"=&f"(ug),
[vg]"=&f"(vg), [vr]"=&f"(vr),
[bb]"=&f"(bb), [bg]"=&f"(bg),
[br]"=&f"(br), [yg]"=&f"(yg)
: [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec] "=&f"(b_vec),
[g_vec] "=&f"(g_vec), [r_vec] "=&f"(r_vec), [temp] "=&f"(temp),
[ub] "=&f"(ub), [ug] "=&f"(ug), [vg] "=&f"(vg), [vr] "=&f"(vr),
[bb] "=&f"(bb), [bg] "=&f"(bg), [br] "=&f"(br), [yg] "=&f"(yg)
: [yuy2_ptr] "r"(src_yuy2), [rgbbuf_ptr] "r"(rgb_buf),
[yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
[zero]"f"(0x00), [five]"f"(0x55),
[six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
[ushu]"f"(0xA0), [vshu]"f"(0xf5),
[alpha]"f"(-1), [eight]"f"(0x8)
: "memory"
);
[yuvcons_ptr] "r"(yuvconstants), [width] "r"(width), [zero] "f"(0x00),
[five] "f"(0x55), [six] "f"(0x6), [mask1] "f"(0xff00ff00ff00ff00),
[ushu] "f"(0xA0), [vshu] "f"(0xf5), [alpha] "f"(-1), [eight] "f"(0x8)
: "memory");
}
void UYVYToARGBRow_MMI(const uint8_t* src_uyvy,
@ -7622,22 +7528,15 @@ void UYVYToARGBRow_MMI(const uint8_t* src_uyvy,
"daddi %[width], %[width], -0x04 \n\t"
"bnez %[width], 1b \n\t"
: [y]"=&f"(y), [u]"=&f"(u),
[v]"=&f"(v),
[b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
[r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
[ub]"=&f"(ub), [ug]"=&f"(ug),
[vg]"=&f"(vg), [vr]"=&f"(vr),
[bb]"=&f"(bb), [bg]"=&f"(bg),
[br]"=&f"(br), [yg]"=&f"(yg)
: [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec] "=&f"(b_vec),
[g_vec] "=&f"(g_vec), [r_vec] "=&f"(r_vec), [temp] "=&f"(temp),
[ub] "=&f"(ub), [ug] "=&f"(ug), [vg] "=&f"(vg), [vr] "=&f"(vr),
[bb] "=&f"(bb), [bg] "=&f"(bg), [br] "=&f"(br), [yg] "=&f"(yg)
: [uyvy_ptr] "r"(src_uyvy), [rgbbuf_ptr] "r"(rgb_buf),
[yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
[zero]"f"(0x00), [five]"f"(0x55),
[six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
[ushu]"f"(0xA0), [vshu]"f"(0xf5),
[alpha]"f"(-1), [eight]"f"(0x8)
: "memory"
);
[yuvcons_ptr] "r"(yuvconstants), [width] "r"(width), [zero] "f"(0x00),
[five] "f"(0x55), [six] "f"(0x6), [mask1] "f"(0xff00ff00ff00ff00),
[ushu] "f"(0xA0), [vshu] "f"(0xf5), [alpha] "f"(-1), [eight] "f"(0x8)
: "memory");
}
void I422ToRGBARow_MMI(const uint8_t* src_y,
@ -7720,22 +7619,15 @@ void I422ToRGBARow_MMI(const uint8_t* src_y,
"daddi %[width], %[width], -0x04 \n\t"
"bnez %[width], 1b \n\t"
: [y]"=&f"(y), [u]"=&f"(u),
[v]"=&f"(v),
[b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
[r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
[ub]"=&f"(ub), [ug]"=&f"(ug),
[vg]"=&f"(vg), [vr]"=&f"(vr),
[bb]"=&f"(bb), [bg]"=&f"(bg),
[br]"=&f"(br), [yg]"=&f"(yg)
: [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
[v_ptr]"r"(src_v), [rgbbuf_ptr]"r"(rgb_buf),
[yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
[zero]"f"(0x00), [five]"f"(0x55),
[six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
[alpha]"f"(-1)
: "memory"
);
: [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec] "=&f"(b_vec),
[g_vec] "=&f"(g_vec), [r_vec] "=&f"(r_vec), [temp] "=&f"(temp),
[ub] "=&f"(ub), [ug] "=&f"(ug), [vg] "=&f"(vg), [vr] "=&f"(vr),
[bb] "=&f"(bb), [bg] "=&f"(bg), [br] "=&f"(br), [yg] "=&f"(yg)
: [y_ptr] "r"(src_y), [u_ptr] "r"(src_u), [v_ptr] "r"(src_v),
[rgbbuf_ptr] "r"(rgb_buf), [yuvcons_ptr] "r"(yuvconstants),
[width] "r"(width), [zero] "f"(0x00), [five] "f"(0x55), [six] "f"(0x6),
[mask1] "f"(0xff00ff00ff00ff00), [alpha] "f"(-1)
: "memory");
}
void ARGBSetRow_MMI(uint8_t* dst_argb, uint32_t v32, int width) {
@ -7752,8 +7644,7 @@ void ARGBSetRow_MMI(uint8_t* dst_argb, uint32_t v32, int width) {
"bnez %[width], 1b \n\t"
: [v32] "+&f"(v32)
: [dst_ptr] "r"(dst_argb), [width] "r"(width)
: "memory"
);
: "memory");
}
// 10 bit YUV to ARGB

View File

@ -727,14 +727,10 @@ void MirrorUVRow_NEON(const uint8_t* src_uv,
}
void ARGBMirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width) {
src += width * 4 - 16;
asm volatile(
// Start at end of source row.
"mov r3, #-16 \n"
"add %0, %0, %2, lsl #2 \n"
"sub %0, #16 \n"
"1: \n"
"vld1.8 {q0}, [%0], r3 \n" // src -= 16
"vld1.8 {q0}, [%0], %3 \n" // src -= 16
"subs %2, #4 \n" // 4 pixels per loop.
"vrev64.32 q0, q0 \n"
"vst1.8 {d1}, [%1]! \n" // dst += 16
@ -743,12 +739,30 @@ void ARGBMirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width) {
: "+r"(src), // %0
"+r"(dst), // %1
"+r"(width) // %2
:
: "cc", "memory", "r3", "q0");
: "r"(-16) // %3
: "cc", "memory", "q0");
}
void RGB24ToARGBRow_NEON(const uint8_t* src_rgb24,
uint8_t* dst_argb,
void RGB24MirrorRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_rgb24,
int width) {
src_rgb24 += width * 3 - 24;
asm volatile(
"1: \n"
"vld3.8 {d0, d1, d2}, [%0], %3 \n" // src -= 24
"subs %2, #8 \n" // 8 pixels per loop.
"vrev64.8 d0, d0 \n"
"vrev64.8 d1, d1 \n"
"vrev64.8 d2, d2 \n"
"vst3.8 {d0, d1, d2}, [%1]! \n" // dst += 24
"bgt 1b \n"
: "+r"(src_rgb24), // %0
"+r"(dst_rgb24), // %1
"+r"(width) // %2
: "r"(-24) // %3
: "cc", "memory", "d0", "d1", "d2");
}
void RGB24ToARGBRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_argb,
int width) {
asm volatile(
"vmov.u8 d4, #255 \n" // Alpha
@ -2065,7 +2079,6 @@ void RAWToYRow_NEON(const uint8_t* src_raw, uint8_t* dst_y, int width) {
: "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8");
}
void RGB24ToYJRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) {
asm volatile(
"vmov.u8 d4, #29 \n" // B * 0.1140 coefficient

View File

@ -278,7 +278,8 @@ void I422ToRGB565Row_NEON(const uint8_t* src_y,
v20) "subs %w4, %w4, #8 \n" ARGBTORGB565
"st1 {v0.8h}, [%3], #16 \n" // store 8 pixels
// RGB565.
"prfm pldl1keep, [%0, 448] \n" // prefetch 7 lines ahead
"prfm pldl1keep, [%0, 448] \n" // prefetch 7
// lines ahead
"b.gt 1b \n"
: "+r"(src_y), // %0
"+r"(src_u), // %1
@ -316,7 +317,8 @@ void I422ToARGB1555Row_NEON(const uint8_t* src_y,
v20) "subs %w4, %w4, #8 \n" ARGBTOARGB1555
"st1 {v0.8h}, [%3], #16 \n" // store 8 pixels
// RGB565.
"prfm pldl1keep, [%0, 448] \n" // prefetch 7 lines ahead
"prfm pldl1keep, [%0, 448] \n" // prefetch 7
// lines ahead
"b.gt 1b \n"
: "+r"(src_y), // %0
"+r"(src_u), // %1
@ -530,7 +532,8 @@ void NV12ToRGB565Row_NEON(const uint8_t* src_y,
v20) "subs %w3, %w3, #8 \n" ARGBTORGB565
"st1 {v0.8h}, [%2], 16 \n" // store 8 pixels
// RGB565.
"prfm pldl1keep, [%0, 448] \n" // prefetch 7 lines ahead
"prfm pldl1keep, [%0, 448] \n" // prefetch 7
// lines ahead
"b.gt 1b \n"
: "+r"(src_y), // %0
"+r"(src_uv), // %1
@ -750,7 +753,6 @@ void MirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width) {
"tbl v1.16b, {v1.16b}, v3.16b \n"
"tbl v0.16b, {v2.16b}, v3.16b \n"
"st1 {v0.16b, v1.16b}, [%1], #32 \n" // store 32 pixels
"prfm pldl1keep, [%0, 448] \n" // prefetch 7 lines ahead
"b.gt 1b \n"
: "+r"(src), // %0
"+r"(dst), // %1
@ -775,7 +777,6 @@ void MirrorUVRow_NEON(const uint8_t* src_uv,
"rev64 v1.8b, v1.8b \n"
"st1 {v0.8b}, [%1], #8 \n" // dst += 8
"st1 {v1.8b}, [%2], #8 \n"
"prfm pldl1keep, [%0, 448] \n" // prefetch 7 lines ahead
"b.gt 1b \n"
: "+r"(src_uv), // %0
"+r"(dst_u), // %1
@ -785,24 +786,49 @@ void MirrorUVRow_NEON(const uint8_t* src_uv,
: "cc", "memory", "v0", "v1");
}
void ARGBMirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width) {
void ARGBMirrorRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb,
int width) {
asm volatile(
// Start at end of source row.
"add %0, %0, %w2, sxtw #2 \n"
"sub %0, %0, #16 \n"
"ld1 {v4.16b}, [%4] \n" // shuffler
"add %0, %0, %w2, sxtw #2 \n" // Start at end of row.
"sub %0, %0, #64 \n"
"1: \n"
"ld1 {v0.16b}, [%0], %3 \n" // src -= 16
"subs %w2, %w2, #4 \n" // 4 pixels per loop.
"rev64 v0.4s, v0.4s \n"
"st1 {v0.D}[1], [%1], #8 \n" // dst += 16
"st1 {v0.D}[0], [%1], #8 \n"
"prfm pldl1keep, [%0, 448] \n" // prefetch 7 lines ahead
"ld4 {v0.16b, v1.16b, v2.16b, v3.16b}, [%0], %3\n" // src -= 64
"subs %w2, %w2, #16 \n" // 16 pixels per loop.
"tbl v0.16b, {v0.16b}, v4.16b \n"
"tbl v1.16b, {v1.16b}, v4.16b \n"
"tbl v2.16b, {v2.16b}, v4.16b \n"
"tbl v3.16b, {v3.16b}, v4.16b \n"
"st4 {v0.16b, v1.16b, v2.16b, v3.16b}, [%1], #64 \n" // dst += 64
"b.gt 1b \n"
: "+r"(src), // %0
"+r"(dst), // %1
: "+r"(src_argb), // %0
"+r"(dst_argb), // %1
"+r"(width) // %2
: "r"((ptrdiff_t)-16) // %3
: "cc", "memory", "v0");
: "r"((ptrdiff_t)-64), // %3
"r"(&kShuffleMirror) // %4
: "cc", "memory", "v0", "v1", "v2", "v3", "v4");
}
void RGB24MirrorRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_rgb24,
int width) {
src_rgb24 += width * 3 - 48;
asm volatile(
"ld1 {v3.16b}, [%4] \n" // shuffler
"1: \n"
"ld3 {v0.16b, v1.16b, v2.16b}, [%0], %3\n" // src -= 48
"subs %w2, %w2, #16 \n" // 16 pixels per loop.
"tbl v0.16b, {v0.16b}, v3.16b \n"
"tbl v1.16b, {v1.16b}, v3.16b \n"
"tbl v2.16b, {v2.16b}, v3.16b \n"
"st3 {v0.16b, v1.16b, v2.16b}, [%1], #48 \n" // dst += 48
"b.gt 1b \n"
: "+r"(src_rgb24), // %0
"+r"(dst_rgb24), // %1
"+r"(width) // %2
: "r"((ptrdiff_t)-48), // %3
"r"(&kShuffleMirror) // %4
: "cc", "memory", "v0", "v1", "v2", "v3");
}
void RGB24ToARGBRow_NEON(const uint8_t* src_rgb24,
@ -2185,7 +2211,8 @@ void RAWToYJRow_NEON(const uint8_t* src_raw, uint8_t* dst_yj, int width) {
"umull v0.8h, v0.8b, v4.8b \n" // B
"umlal v0.8h, v1.8b, v5.8b \n" // G
"umlal v0.8h, v2.8b, v6.8b \n" // R
"prfm pldl1keep, [%0, 448] \n" // prefetch 7 cache lines ahead
"prfm pldl1keep, [%0, 448] \n" // prefetch 7 cache lines
// ahead
"uqrshrn v0.8b, v0.8h, #8 \n" // 16 bit to 8 bit Y
"st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y.
"prfm pldl1keep, [%0, 448] \n" // prefetch 7 lines ahead
@ -3089,9 +3116,7 @@ void GaussCol_F32_NEON(const float* src0,
}
// filter 5 rows with 1, 4, 6, 4, 1 coefficients to produce 1 row.
void GaussRow_F32_NEON(const float* src,
float* dst,
int width) {
void GaussRow_F32_NEON(const float* src, float* dst, int width) {
asm volatile(
"ld3r {v6.4s, v7.4s, v8.4s}, [%3] \n" // constants 4, 6, 1/256

View File

@ -1245,6 +1245,7 @@ TESTATOB(RAW, 3, 3, 1, RGBA, 4, 4, 1, 0)
TESTATOB(RAW, 3, 3, 1, RGB24, 3, 3, 1, 0)
TESTATOB(RGB24, 3, 3, 1, ARGB, 4, 4, 1, 0)
TESTATOB(RGB24, 3, 3, 1, J400, 1, 1, 1, 0)
TESTATOB(RGB24, 3, 3, 1, RGB24Mirror, 3, 3, 1, 0)
TESTATOB(RAW, 3, 3, 1, J400, 1, 1, 1, 0)
#ifdef INTEL_TEST
TESTATOB(RGB565, 2, 2, 1, ARGB, 4, 4, 1, 0)

View File

@ -574,13 +574,15 @@ TEST_FACTOR(3, 1, 3, 0)
disable_cpu_flags_, benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, DISABLED_##I420##name##To##width##x##height##_##filter##_16) { \
TEST_F(LibYUVScaleTest, \
DISABLED_##I420##name##To##width##x##height##_##filter##_16) { \
int diff = I420TestFilter_16( \
benchmark_width_, benchmark_height_, width, height, kFilter##filter, \
benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, DISABLED_##I444##name##To##width##x##height##_##filter##_16) { \
TEST_F(LibYUVScaleTest, \
DISABLED_##I444##name##To##width##x##height##_##filter##_16) { \
int diff = I444TestFilter_16( \
benchmark_width_, benchmark_height_, width, height, kFilter##filter, \
benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \