mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2026-01-01 03:12:16 +08:00
Neon RGB To I420
BUG=none TEST=convert_test Review URL: https://webrtc-codereview.appspot.com/936015 git-svn-id: http://libyuv.googlecode.com/svn/trunk@479 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
dd2d512e5a
commit
f1daa3db65
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 478
|
||||
Version: 479
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -197,6 +197,7 @@ extern "C" {
|
||||
#define HAS_ARGBTOUV422ROW_NEON
|
||||
#define HAS_ARGBTOUV411ROW_NEON
|
||||
#define HAS_ARGBTOUVROW_NEON
|
||||
#define HAS_RGB565TOUVROW_NEON
|
||||
#define HAS_BGRATOYROW_NEON
|
||||
#define HAS_ABGRTOYROW_NEON
|
||||
#define HAS_RGBATOYROW_NEON
|
||||
@ -354,6 +355,8 @@ void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
|
||||
int pix);
|
||||
void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int pix);
|
||||
void RGB565ToUVRow_NEON(const uint8* src_rgb565, int src_stride_rgb565,
|
||||
uint8* dst_u, uint8* dst_v, int pix);
|
||||
void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int pix);
|
||||
void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int pix);
|
||||
void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int pix);
|
||||
@ -402,7 +405,7 @@ void BGRAToUVRow_Unaligned_SSSE3(const uint8* src_bgra, int src_stride_bgra,
|
||||
void ABGRToUVRow_Unaligned_SSSE3(const uint8* src_abgr, int src_stride_abgr,
|
||||
uint8* dst_u, uint8* dst_v, int width);
|
||||
void RGBAToUVRow_Unaligned_SSSE3(const uint8* src_rgba, int src_stride_rgba,
|
||||
uint8* dst_u, uint8* dst_v, int width);
|
||||
uint8* dst_u, uint8* dst_v, int width);
|
||||
void ARGBToUVRow_Any_SSSE3(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width);
|
||||
void BGRAToUVRow_Any_SSSE3(const uint8* src_bgra, int src_stride_bgra,
|
||||
@ -411,6 +414,10 @@ void ABGRToUVRow_Any_SSSE3(const uint8* src_abgr, int src_stride_abgr,
|
||||
uint8* dst_u, uint8* dst_v, int width);
|
||||
void RGBAToUVRow_Any_SSSE3(const uint8* src_rgba, int src_stride_rgba,
|
||||
uint8* dst_u, uint8* dst_v, int width);
|
||||
void ARGBToUVRow_Any_NEON(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int pix);
|
||||
void RGB565ToUVRow_Any_NEON(const uint8* src_rgb565, int src_stride_rgb565,
|
||||
uint8* dst_u, uint8* dst_v, int pix);
|
||||
void ARGBToUVRow_C(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width);
|
||||
void BGRAToUVRow_C(const uint8* src_bgra, int src_stride_bgra,
|
||||
@ -419,6 +426,8 @@ void ABGRToUVRow_C(const uint8* src_abgr, int src_stride_abgr,
|
||||
uint8* dst_u, uint8* dst_v, int width);
|
||||
void RGBAToUVRow_C(const uint8* src_rgba, int src_stride_rgba,
|
||||
uint8* dst_u, uint8* dst_v, int width);
|
||||
void RGB565ToUVRow_C(const uint8* src_rgb565, int src_stride_rgb565,
|
||||
uint8* dst_u, uint8* dst_v, int width);
|
||||
|
||||
void ARGBToUV422Row_SSSE3(const uint8* src_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width);
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 478
|
||||
#define LIBYUV_VERSION 479
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||
|
||||
@ -1379,6 +1379,25 @@ int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565,
|
||||
src_rgb565 = src_rgb565 + (height - 1) * src_stride_rgb565;
|
||||
src_stride_rgb565 = -src_stride_rgb565;
|
||||
}
|
||||
|
||||
#if defined(HAS_RGB565TOYROW_NEON)
|
||||
void (*RGB565ToUVRow)(const uint8* src_rgb565, int src_stride_rgb565,
|
||||
uint8* dst_u, uint8* dst_v, int width) = RGB565ToUVRow_C;
|
||||
void (*RGB565ToYRow)(const uint8* src_rgb565, uint8* dst_y, int pix) =
|
||||
RGB565ToYRow_C;
|
||||
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
|
||||
RGB565ToYRow = RGB565ToYRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
RGB565ToYRow = RGB565ToYRow_NEON;
|
||||
}
|
||||
if (width >= 16) {
|
||||
RGB565ToUVRow = RGB565ToUVRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
RGB565ToUVRow = RGB565ToUVRow_NEON;
|
||||
}
|
||||
}
|
||||
}
|
||||
#else // HAS_RGB565TOYROW_NEON
|
||||
SIMD_ALIGNED(uint8 row[kMaxStride * 2]);
|
||||
void (*RGB565ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
|
||||
RGB565ToARGBRow_C;
|
||||
@ -1389,15 +1408,7 @@ int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565,
|
||||
RGB565ToARGBRow = RGB565ToARGBRow_SSE2;
|
||||
}
|
||||
}
|
||||
#elif defined(HAS_RGB565TOARGBROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
|
||||
RGB565ToARGBRow = RGB565ToARGBRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
RGB565ToARGBRow = RGB565ToARGBRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
|
||||
#if defined(HAS_ARGBTOUVROW_SSSE3)
|
||||
@ -1408,20 +1419,6 @@ int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(HAS_RGB565TOYROW_NEON)
|
||||
void (*RGB565ToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
|
||||
RGB565ToYRow_C;
|
||||
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
|
||||
RGB565ToYRow = RGB565ToYRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
RGB565ToYRow = RGB565ToYRow_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUVRow = ARGBToUVRow_NEON;
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
|
||||
ARGBToYRow_C;
|
||||
#if defined(HAS_ARGBTOUVROW_SSSE3)
|
||||
@ -1438,13 +1435,14 @@ int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565,
|
||||
#endif // HAS_RGB565TOYROW_NEON
|
||||
|
||||
for (int y = 0; y < height - 1; y += 2) {
|
||||
RGB565ToARGBRow(src_rgb565, row, width);
|
||||
RGB565ToARGBRow(src_rgb565 + src_stride_rgb565, row + kMaxStride, width);
|
||||
ARGBToUVRow(row, kMaxStride, dst_u, dst_v, width);
|
||||
#if defined(HAS_RGB565TOYROW_NEON)
|
||||
RGB565ToUVRow(src_rgb565, src_stride_rgb565, dst_u, dst_v, width);
|
||||
RGB565ToYRow(src_rgb565, dst_y, width);
|
||||
RGB565ToYRow(src_rgb565 + src_stride_rgb565, dst_y + dst_stride_y, width);
|
||||
#else
|
||||
RGB565ToARGBRow(src_rgb565, row, width);
|
||||
RGB565ToARGBRow(src_rgb565 + src_stride_rgb565, row + kMaxStride, width);
|
||||
ARGBToUVRow(row, kMaxStride, dst_u, dst_v, width);
|
||||
ARGBToYRow(row, dst_y, width);
|
||||
ARGBToYRow(row + kMaxStride, dst_y + dst_stride_y, width);
|
||||
#endif
|
||||
@ -1454,11 +1452,12 @@ int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565,
|
||||
dst_v += dst_stride_v;
|
||||
}
|
||||
if (height & 1) {
|
||||
RGB565ToARGBRow_C(src_rgb565, row, width);
|
||||
ARGBToUVRow(row, 0, dst_u, dst_v, width);
|
||||
#if defined(HAS_RGB565TOYROW_NEON)
|
||||
RGB565ToUVRow(src_rgb565, 0, dst_u, dst_v, width);
|
||||
RGB565ToYRow(src_rgb565, dst_y, width);
|
||||
#else
|
||||
RGB565ToARGBRow(src_rgb565, row, width);
|
||||
ARGBToUVRow(row, 0, dst_u, dst_v, width);
|
||||
ARGBToYRow(row, dst_y, width);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -782,9 +782,9 @@ int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2,
|
||||
void (*YUY2ToARGBRow)(const uint8* src_yuy2, uint8* dst_argb, int pix) =
|
||||
YUY2ToARGBRow_C;
|
||||
#if defined(HAS_YUY2TOARGBROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
|
||||
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { // posix it 16, win is 8.
|
||||
YUY2ToARGBRow = YUY2ToARGBRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
YUY2ToARGBRow = YUY2ToARGBRow_Unaligned_SSSE3;
|
||||
if (IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16) &&
|
||||
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
|
||||
@ -826,9 +826,9 @@ int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy,
|
||||
void (*UYVYToARGBRow)(const uint8* src_uyvy, uint8* dst_argb, int pix) =
|
||||
UYVYToARGBRow_C;
|
||||
#if defined(HAS_UYVYTOARGBROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
|
||||
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { // posix it 16, win is 8.
|
||||
UYVYToARGBRow = UYVYToARGBRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
UYVYToARGBRow = UYVYToARGBRow_Unaligned_SSSE3;
|
||||
if (IS_ALIGNED(src_uyvy, 16) && IS_ALIGNED(src_stride_uyvy, 16) &&
|
||||
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
|
||||
|
||||
@ -143,9 +143,9 @@ RGBANY(ARGBToARGB4444Row_Any_SSE2, ARGBToARGB4444Row_SSE2, ARGBToARGB4444Row_C,
|
||||
RGBANY(I400ToARGBRow_Any_SSE2, I400ToARGBRow_Unaligned_SSE2, I400ToARGBRow_C,
|
||||
7, 1, 4)
|
||||
RGBANY(YUY2ToARGBRow_Any_SSSE3, YUY2ToARGBRow_Unaligned_SSSE3, YUY2ToARGBRow_C,
|
||||
7, 2, 4)
|
||||
15, 2, 4)
|
||||
RGBANY(UYVYToARGBRow_Any_SSSE3, UYVYToARGBRow_Unaligned_SSSE3, UYVYToARGBRow_C,
|
||||
7, 2, 4)
|
||||
15, 2, 4)
|
||||
#endif
|
||||
#if defined(HAS_ARGBTORGB24ROW_NEON)
|
||||
RGBANY(ARGBToRGB24Row_Any_NEON, ARGBToRGB24Row_NEON, ARGBToRGB24Row_C, 7, 4, 3)
|
||||
@ -224,12 +224,12 @@ UVANY(ARGBToUVRow_Any_SSSE3, ARGBToUVRow_Unaligned_SSSE3, ARGBToUVRow_C, 4)
|
||||
UVANY(BGRAToUVRow_Any_SSSE3, BGRAToUVRow_Unaligned_SSSE3, BGRAToUVRow_C, 4)
|
||||
UVANY(ABGRToUVRow_Any_SSSE3, ABGRToUVRow_Unaligned_SSSE3, ABGRToUVRow_C, 4)
|
||||
UVANY(RGBAToUVRow_Any_SSSE3, RGBAToUVRow_Unaligned_SSSE3, RGBAToUVRow_C, 4)
|
||||
#endif
|
||||
#ifdef HAS_YUY2TOUVROW_SSE2
|
||||
UVANY(YUY2ToUVRow_Any_SSE2, YUY2ToUVRow_Unaligned_SSE2, YUY2ToUVRow_C, 2)
|
||||
UVANY(UYVYToUVRow_Any_SSE2, UYVYToUVRow_Unaligned_SSE2, UYVYToUVRow_C, 2)
|
||||
#endif
|
||||
#ifdef HAS_YUY2TOUVROW_NEON
|
||||
#ifdef HAS_ARGBTOUVROW_NEON
|
||||
UVANY(ARGBToUVRow_Any_NEON, ARGBToUVRow_NEON, ARGBToUVRow_C, 4)
|
||||
UVANY(RGB565ToUVRow_Any_NEON, RGB565ToUVRow_NEON, RGB565ToUVRow_C, 2)
|
||||
UVANY(YUY2ToUVRow_Any_NEON, YUY2ToUVRow_NEON, YUY2ToUVRow_C, 2)
|
||||
UVANY(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, UYVYToUVRow_C, 2)
|
||||
#endif
|
||||
|
||||
@ -349,6 +349,52 @@ void ARGB4444ToYRow_C(const uint8* src_argb4444, uint8* dst_y, int width) {
|
||||
}
|
||||
}
|
||||
|
||||
void RGB565ToUVRow_C(const uint8* src_rgb565, int src_stride_rgb565,
|
||||
uint8* dst_u, uint8* dst_v, int width) {
|
||||
const uint8* next_rgb565 = src_rgb565 + src_stride_rgb565;
|
||||
for (int x = 0; x < width - 1; x += 2) {
|
||||
uint8 b0 = src_rgb565[0] & 0x1f;
|
||||
uint8 g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
|
||||
uint8 r0 = src_rgb565[1] >> 3;
|
||||
uint8 b1 = src_rgb565[2] & 0x1f;
|
||||
uint8 g1 = (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3);
|
||||
uint8 r1 = src_rgb565[3] >> 3;
|
||||
uint8 b2 = next_rgb565[0] & 0x1f;
|
||||
uint8 g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
|
||||
uint8 r2 = next_rgb565[1] >> 3;
|
||||
uint8 b3 = next_rgb565[2] & 0x1f;
|
||||
uint8 g3 = (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3);
|
||||
uint8 r3 = next_rgb565[3] >> 3;
|
||||
uint8 ab = (b0 + b1 + b2 + b3);
|
||||
uint8 ag = (g0 + g1 + g2 + g3);
|
||||
uint8 ar = (r0 + r1 + r2 + r3);
|
||||
ab = (ab << 1) | (ab >> 6);
|
||||
ar = (ar << 1) | (ar >> 6);
|
||||
dst_u[0] = RGBToU(ar, ag, ab);
|
||||
dst_v[0] = RGBToV(ar, ag, ab);
|
||||
src_rgb565 += 4;
|
||||
next_rgb565 += 4;
|
||||
dst_u += 1;
|
||||
dst_v += 1;
|
||||
}
|
||||
if (width & 1) {
|
||||
uint8 b0 = src_rgb565[0] & 0x1f;
|
||||
uint8 g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
|
||||
uint8 r0 = src_rgb565[1] >> 3;
|
||||
uint8 b2 = next_rgb565[0] & 0x1f;
|
||||
uint8 g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
|
||||
uint8 r2 = next_rgb565[1] >> 3;
|
||||
uint8 ab = (b0 + b2);
|
||||
uint8 ag = (g0 + g2);
|
||||
uint8 ar = (r0 + r2);
|
||||
ab = (ab << 2) | (ab >> 4);
|
||||
ag = (ag << 1) | (ag >> 6);
|
||||
ar = (ar << 2) | (ar >> 4);
|
||||
dst_u[0] = RGBToU(ar, ag, ab);
|
||||
dst_v[0] = RGBToV(ar, ag, ab);
|
||||
}
|
||||
}
|
||||
|
||||
void ARGBToUV444Row_C(const uint8* src_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width) {
|
||||
for (int x = 0; x < width; ++x) {
|
||||
|
||||
@ -1114,22 +1114,20 @@ void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int pix) {
|
||||
|
||||
#ifdef HAS_RGB565TOARGBROW_NEON
|
||||
#define RGB565TOARGB \
|
||||
"vmovn.u16 d4, q0 \n" /* B xxxBBBBB */ \
|
||||
"vshrn.u16 d5, q0, #5 \n" /* G xxGGGGGG */ \
|
||||
"vshrn.u16 d6, q0, #8 \n" /* R RRRRRxxx */ \
|
||||
"vshl.u8 d0, d4, #3 \n" /* B BBBBB000 upper 5 */ \
|
||||
"vshl.u8 d1, d5, #2 \n" /* G GGGGGG00 upper 6 */ \
|
||||
"vbic.u8 d2, d6, d7 \n" /* R RRRRR000 upper 5 */ \
|
||||
"vshr.u8 d4, d0, #5 \n" /* B 00000BBB lower 3 */ \
|
||||
"vshr.u8 d5, d1, #6 \n" /* G 000000GG lower 2 */ \
|
||||
"vshr.u8 d6, d2, #5 \n" /* R 00000RRR lower 3 */ \
|
||||
"vorr.u8 q0, q0, q2 \n" /* B,G */ \
|
||||
"vorr.u8 d2, d2, d6 \n" /* R */
|
||||
"vshrn.u16 d6, q0, #5 \n" /* G xxGGGGGG */ \
|
||||
"vuzp.u8 d0, d1 \n" /* d0 xxxBBBBB RRRRRxxx */ \
|
||||
"vshl.u8 d6, d6, #2 \n" /* G GGGGGG00 upper 6 */ \
|
||||
"vshr.u8 d1, d1, #3 \n" /* R 000RRRRR lower 5 */ \
|
||||
"vshl.u8 q0, q0, #3 \n" /* B,R BBBBB000 upper 5 */ \
|
||||
"vshr.u8 q2, q0, #5 \n" /* B,R 00000BBB lower 3 */ \
|
||||
"vorr.u8 d0, d0, d4 \n" /* B */ \
|
||||
"vshr.u8 d4, d6, #6 \n" /* G 000000GG lower 2 */ \
|
||||
"vorr.u8 d2, d1, d5 \n" /* R */ \
|
||||
"vorr.u8 d1, d4, d6 \n" /* G */
|
||||
|
||||
void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int pix) {
|
||||
asm volatile (
|
||||
"vmov.u8 d3, #255 \n" // Alpha
|
||||
"vmov.u8 d7, #7 \n" // 5 bit mask
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
"vld1.8 {q0}, [%0]! \n" // load 8 RGB565 pixels.
|
||||
@ -1207,7 +1205,7 @@ void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444, uint8* dst_argb,
|
||||
"+r"(dst_argb), // %1
|
||||
"+r"(pix) // %2
|
||||
:
|
||||
: "memory", "cc", "q0", "q1", "q2", "q3" // Clobber List
|
||||
: "memory", "cc", "q0", "q1", "q2" // Clobber List
|
||||
);
|
||||
}
|
||||
#endif // HAS_ARGB4444TOARGBROW_NEON
|
||||
@ -1765,10 +1763,72 @@ void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb,
|
||||
}
|
||||
#endif // HAS_ARGBTOUVROW_NEON
|
||||
|
||||
|
||||
// 16x2 pixels -> 8x1. pix is number of argb pixels. e.g. 16.
|
||||
#ifdef HAS_RGB565TOUVROW_NEON
|
||||
void RGB565ToUVRow_NEON(const uint8* src_rgb565, int src_stride_rgb565,
|
||||
uint8* dst_u, uint8* dst_v, int pix) {
|
||||
asm volatile (
|
||||
"add %1, %0, %1 \n" // src_stride + src_argb
|
||||
"vmov.s16 q10, #112 / 4 \n" // UB / VR 0.875 coefficient
|
||||
"vmov.s16 q11, #74 / 4 \n" // UG -0.5781 coefficient
|
||||
"vmov.s16 q12, #38 / 4 \n" // UR -0.2969 coefficient
|
||||
"vmov.s16 q13, #18 / 4 \n" // VB -0.1406 coefficient
|
||||
"vmov.s16 q14, #94 / 4 \n" // VG -0.7344 coefficient
|
||||
"vmov.u16 q15, #0x8080 \n" // 128.5
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
"vld1.8 {q0}, [%0]! \n" // load 8 RGB565 pixels.
|
||||
RGB565TOARGB
|
||||
"vpaddl.u8 d8, d0 \n" // B 8 bytes -> 4 shorts.
|
||||
"vpaddl.u8 d10, d1 \n" // G 8 bytes -> 4 shorts.
|
||||
"vpaddl.u8 d12, d2 \n" // R 8 bytes -> 4 shorts.
|
||||
"vld1.8 {q0}, [%0]! \n" // next 8 RGB565 pixels.
|
||||
RGB565TOARGB
|
||||
"vpaddl.u8 d9, d0 \n" // B 8 bytes -> 4 shorts.
|
||||
"vpaddl.u8 d11, d1 \n" // G 8 bytes -> 4 shorts.
|
||||
"vpaddl.u8 d13, d2 \n" // R 8 bytes -> 4 shorts.
|
||||
|
||||
"vld1.8 {q0}, [%1]! \n" // load 8 RGB565 pixels.
|
||||
RGB565TOARGB
|
||||
"vpadal.u8 d8, d0 \n" // B 8 bytes -> 4 shorts.
|
||||
"vpadal.u8 d10, d1 \n" // G 8 bytes -> 4 shorts.
|
||||
"vpadal.u8 d12, d2 \n" // R 8 bytes -> 4 shorts.
|
||||
"vld1.8 {q0}, [%1]! \n" // next 8 RGB565 pixels.
|
||||
RGB565TOARGB
|
||||
"vpadal.u8 d9, d0 \n" // B 8 bytes -> 4 shorts.
|
||||
"vpadal.u8 d11, d1 \n" // G 8 bytes -> 4 shorts.
|
||||
"vpadal.u8 d13, d2 \n" // R 8 bytes -> 4 shorts.
|
||||
|
||||
"subs %4, %4, #16 \n" // 16 processed per loop.
|
||||
"vmul.s16 q8, q4, q10 \n" // B
|
||||
"vmls.s16 q8, q5, q11 \n" // G
|
||||
"vmls.s16 q8, q6, q12 \n" // R
|
||||
"vadd.u16 q8, q8, q15 \n" // +128 -> unsigned
|
||||
"vmul.s16 q9, q6, q10 \n" // R
|
||||
"vmls.s16 q9, q5, q14 \n" // G
|
||||
"vmls.s16 q9, q4, q13 \n" // B
|
||||
"vadd.u16 q9, q9, q15 \n" // +128 -> unsigned
|
||||
"vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U
|
||||
"vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V
|
||||
"vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
|
||||
"vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_rgb565), // %0
|
||||
"+r"(src_stride_rgb565), // %1
|
||||
"+r"(dst_u), // %2
|
||||
"+r"(dst_v), // %3
|
||||
"+r"(pix) // %4
|
||||
:
|
||||
: "memory", "cc", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
|
||||
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
|
||||
);
|
||||
}
|
||||
#endif // HAS_ARGBTOUVROW_NEON
|
||||
|
||||
#ifdef HAS_RGB565TOYROW_NEON
|
||||
void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int pix) {
|
||||
asm volatile (
|
||||
"vmov.u8 d7, #7 \n" // 5 bit mask
|
||||
"vmov.u8 d24, #13 \n" // B * 0.1016 coefficient
|
||||
"vmov.u8 d25, #65 \n" // G * 0.5078 coefficient
|
||||
"vmov.u8 d26, #33 \n" // R * 0.2578 coefficient
|
||||
|
||||
@ -611,7 +611,7 @@ TESTATOPLANAR(ABGR, 4, I420, 2, 2, 4)
|
||||
TESTATOPLANAR(RGBA, 4, I420, 2, 2, 4)
|
||||
TESTATOPLANAR(RAW, 3, I420, 2, 2, 4)
|
||||
TESTATOPLANAR(RGB24, 3, I420, 2, 2, 4)
|
||||
TESTATOPLANAR(RGB565, 2, I420, 2, 2, 4)
|
||||
TESTATOPLANAR(RGB565, 2, I420, 2, 2, 5)
|
||||
TESTATOPLANAR(ARGB1555, 2, I420, 2, 2, 4)
|
||||
TESTATOPLANAR(ARGB4444, 2, I420, 2, 2, 4)
|
||||
TESTATOPLANAR(ARGB, 4, I411, 4, 1, 4)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user