mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2026-01-01 03:12:16 +08:00
Neon RGB24 to I420
BUG=none TEST=convert_test Review URL: https://webrtc-codereview.appspot.com/965018 git-svn-id: http://libyuv.googlecode.com/svn/trunk@481 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
522d757c92
commit
9573071950
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 480
|
||||
Version: 481
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -362,6 +362,16 @@ void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
|
||||
int pix);
|
||||
void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int pix);
|
||||
void BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra,
|
||||
uint8* dst_u, uint8* dst_v, int pix);
|
||||
void ABGRToUVRow_NEON(const uint8* src_abgr, int src_stride_abgr,
|
||||
uint8* dst_u, uint8* dst_v, int pix);
|
||||
void RGBAToUVRow_NEON(const uint8* src_rgba, int src_stride_rgba,
|
||||
uint8* dst_u, uint8* dst_v, int pix);
|
||||
void RGB24ToUVRow_NEON(const uint8* src_rgb24, int src_stride_rgb24,
|
||||
uint8* dst_u, uint8* dst_v, int pix);
|
||||
void RAWToUVRow_NEON(const uint8* src_raw, int src_stride_raw,
|
||||
uint8* dst_u, uint8* dst_v, int pix);
|
||||
void RGB565ToUVRow_NEON(const uint8* src_rgb565, int src_stride_rgb565,
|
||||
uint8* dst_u, uint8* dst_v, int pix);
|
||||
void ARGB1555ToUVRow_NEON(const uint8* src_argb1555, int src_stride_argb1555,
|
||||
@ -433,6 +443,16 @@ void ARGBToUV411Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
|
||||
int pix);
|
||||
void ARGBToUVRow_Any_NEON(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int pix);
|
||||
void BGRAToUVRow_Any_NEON(const uint8* src_bgra, int src_stride_bgra,
|
||||
uint8* dst_u, uint8* dst_v, int pix);
|
||||
void ABGRToUVRow_Any_NEON(const uint8* src_abgr, int src_stride_abgr,
|
||||
uint8* dst_u, uint8* dst_v, int pix);
|
||||
void RGBAToUVRow_Any_NEON(const uint8* src_rgba, int src_stride_rgba,
|
||||
uint8* dst_u, uint8* dst_v, int pix);
|
||||
void RGB24ToUVRow_Any_NEON(const uint8* src_rgb24, int src_stride_rgb24,
|
||||
uint8* dst_u, uint8* dst_v, int pix);
|
||||
void RAWToUVRow_Any_NEON(const uint8* src_raw, int src_stride_raw,
|
||||
uint8* dst_u, uint8* dst_v, int pix);
|
||||
void RGB565ToUVRow_Any_NEON(const uint8* src_rgb565, int src_stride_rgb565,
|
||||
uint8* dst_u, uint8* dst_v, int pix);
|
||||
void ARGB1555ToUVRow_Any_NEON(const uint8* src_argb1555,
|
||||
@ -449,6 +469,10 @@ void ABGRToUVRow_C(const uint8* src_abgr, int src_stride_abgr,
|
||||
uint8* dst_u, uint8* dst_v, int width);
|
||||
void RGBAToUVRow_C(const uint8* src_rgba, int src_stride_rgba,
|
||||
uint8* dst_u, uint8* dst_v, int width);
|
||||
void RGB24ToUVRow_C(const uint8* src_rgb24, int src_stride_rgb24,
|
||||
uint8* dst_u, uint8* dst_v, int width);
|
||||
void RAWToUVRow_C(const uint8* src_raw, int src_stride_raw,
|
||||
uint8* dst_u, uint8* dst_v, int width);
|
||||
void RGB565ToUVRow_C(const uint8* src_rgb565, int src_stride_rgb565,
|
||||
uint8* dst_u, uint8* dst_v, int width);
|
||||
void ARGB1555ToUVRow_C(const uint8* src_argb1555, int src_stride_argb1555,
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 480
|
||||
#define LIBYUV_VERSION 481
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||
|
||||
@ -1012,6 +1012,12 @@ int BGRAToI420(const uint8* src_bgra, int src_stride_bgra,
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
BGRAToYRow = BGRAToYRow_NEON;
|
||||
}
|
||||
if (width >= 16) {
|
||||
BGRAToUVRow = BGRAToUVRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
BGRAToUVRow = BGRAToUVRow_NEON;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -1074,6 +1080,12 @@ int ABGRToI420(const uint8* src_abgr, int src_stride_abgr,
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ABGRToYRow = ABGRToYRow_NEON;
|
||||
}
|
||||
if (width >= 16) {
|
||||
ABGRToUVRow = ABGRToUVRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ABGRToUVRow = ABGRToUVRow_NEON;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -1136,6 +1148,12 @@ int RGBAToI420(const uint8* src_rgba, int src_stride_rgba,
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
RGBAToYRow = RGBAToYRow_NEON;
|
||||
}
|
||||
if (width >= 16) {
|
||||
RGBAToUVRow = RGBAToUVRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
RGBAToUVRow = RGBAToUVRow_NEON;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -1173,6 +1191,25 @@ int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24,
|
||||
src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24;
|
||||
src_stride_rgb24 = -src_stride_rgb24;
|
||||
}
|
||||
|
||||
#if defined(HAS_RGB24TOYROW_NEON)
|
||||
void (*RGB24ToUVRow)(const uint8* src_rgb24, int src_stride_rgb24,
|
||||
uint8* dst_u, uint8* dst_v, int width) = RGB24ToUVRow_C;
|
||||
void (*RGB24ToYRow)(const uint8* src_rgb24, uint8* dst_y, int pix) =
|
||||
RGB24ToYRow_C;
|
||||
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
|
||||
RGB24ToYRow = RGB24ToYRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
RGB24ToYRow = RGB24ToYRow_NEON;
|
||||
}
|
||||
if (width >= 16) {
|
||||
RGB24ToUVRow = RGB24ToUVRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
RGB24ToUVRow = RGB24ToUVRow_NEON;
|
||||
}
|
||||
}
|
||||
}
|
||||
#else // HAS_RGB24TOYROW_NEON
|
||||
SIMD_ALIGNED(uint8 row[kMaxStride * 2]);
|
||||
void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
|
||||
RGB24ToARGBRow_C;
|
||||
@ -1183,15 +1220,7 @@ int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24,
|
||||
RGB24ToARGBRow = RGB24ToARGBRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#elif defined(HAS_RGB24TOARGBROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
|
||||
RGB24ToARGBRow = RGB24ToARGBRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
RGB24ToARGBRow = RGB24ToARGBRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
|
||||
#if defined(HAS_ARGBTOUVROW_SSSE3)
|
||||
@ -1202,23 +1231,6 @@ int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(HAS_RGB24TOYROW_NEON)
|
||||
void (*RGB24ToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
|
||||
RGB24ToYRow_C;
|
||||
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
|
||||
RGB24ToYRow = RGB24ToYRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
RGB24ToYRow = RGB24ToYRow_NEON;
|
||||
}
|
||||
if (width >= 16) {
|
||||
ARGBToUVRow = ARGBToUVRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUVRow = ARGBToUVRow_NEON;
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
|
||||
ARGBToYRow_C;
|
||||
#if defined(HAS_ARGBTOUVROW_SSSE3)
|
||||
@ -1235,13 +1247,14 @@ int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24,
|
||||
#endif // HAS_RGB24TOYROW_NEON
|
||||
|
||||
for (int y = 0; y < height - 1; y += 2) {
|
||||
RGB24ToARGBRow(src_rgb24, row, width);
|
||||
RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + kMaxStride, width);
|
||||
ARGBToUVRow(row, kMaxStride, dst_u, dst_v, width);
|
||||
#if defined(HAS_RGB24TOYROW_NEON)
|
||||
RGB24ToUVRow(src_rgb24, src_stride_rgb24, dst_u, dst_v, width);
|
||||
RGB24ToYRow(src_rgb24, dst_y, width);
|
||||
RGB24ToYRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width);
|
||||
#else
|
||||
RGB24ToARGBRow(src_rgb24, row, width);
|
||||
RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + kMaxStride, width);
|
||||
ARGBToUVRow(row, kMaxStride, dst_u, dst_v, width);
|
||||
ARGBToYRow(row, dst_y, width);
|
||||
ARGBToYRow(row + kMaxStride, dst_y + dst_stride_y, width);
|
||||
#endif
|
||||
@ -1251,11 +1264,12 @@ int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24,
|
||||
dst_v += dst_stride_v;
|
||||
}
|
||||
if (height & 1) {
|
||||
RGB24ToARGBRow_C(src_rgb24, row, width);
|
||||
ARGBToUVRow(row, 0, dst_u, dst_v, width);
|
||||
#if defined(HAS_RGB24TOYROW_NEON)
|
||||
RGB24ToUVRow(src_rgb24, 0, dst_u, dst_v, width);
|
||||
RGB24ToYRow(src_rgb24, dst_y, width);
|
||||
#else
|
||||
RGB24ToARGBRow(src_rgb24, row, width);
|
||||
ARGBToUVRow(row, 0, dst_u, dst_v, width);
|
||||
ARGBToYRow(row, dst_y, width);
|
||||
#endif
|
||||
}
|
||||
@ -1263,7 +1277,6 @@ int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24,
|
||||
}
|
||||
|
||||
// Convert RAW to I420.
|
||||
// Same as RGB24 but RGB vs BGR
|
||||
LIBYUV_API
|
||||
int RAWToI420(const uint8* src_raw, int src_stride_raw,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
@ -1281,6 +1294,25 @@ int RAWToI420(const uint8* src_raw, int src_stride_raw,
|
||||
src_raw = src_raw + (height - 1) * src_stride_raw;
|
||||
src_stride_raw = -src_stride_raw;
|
||||
}
|
||||
|
||||
#if defined(HAS_RAWTOYROW_NEON)
|
||||
void (*RAWToUVRow)(const uint8* src_raw, int src_stride_raw,
|
||||
uint8* dst_u, uint8* dst_v, int width) = RAWToUVRow_C;
|
||||
void (*RAWToYRow)(const uint8* src_raw, uint8* dst_y, int pix) =
|
||||
RAWToYRow_C;
|
||||
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
|
||||
RAWToYRow = RAWToYRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
RAWToYRow = RAWToYRow_NEON;
|
||||
}
|
||||
if (width >= 16) {
|
||||
RAWToUVRow = RAWToUVRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
RAWToUVRow = RAWToUVRow_NEON;
|
||||
}
|
||||
}
|
||||
}
|
||||
#else // HAS_RAWTOYROW_NEON
|
||||
SIMD_ALIGNED(uint8 row[kMaxStride * 2]);
|
||||
void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
|
||||
RAWToARGBRow_C;
|
||||
@ -1291,15 +1323,7 @@ int RAWToI420(const uint8* src_raw, int src_stride_raw,
|
||||
RAWToARGBRow = RAWToARGBRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#elif defined(HAS_RAWTOARGBROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
|
||||
RAWToARGBRow = RAWToARGBRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
RAWToARGBRow = RAWToARGBRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
|
||||
#if defined(HAS_ARGBTOUVROW_SSSE3)
|
||||
@ -1310,23 +1334,6 @@ int RAWToI420(const uint8* src_raw, int src_stride_raw,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(HAS_RAWTOYROW_NEON)
|
||||
void (*RAWToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
|
||||
RAWToYRow_C;
|
||||
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
|
||||
RAWToYRow = RAWToYRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
RAWToYRow = RAWToYRow_NEON;
|
||||
}
|
||||
if (width >= 16) {
|
||||
ARGBToUVRow = ARGBToUVRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUVRow = ARGBToUVRow_NEON;
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
|
||||
ARGBToYRow_C;
|
||||
#if defined(HAS_ARGBTOUVROW_SSSE3)
|
||||
@ -1343,13 +1350,14 @@ int RAWToI420(const uint8* src_raw, int src_stride_raw,
|
||||
#endif // HAS_RAWTOYROW_NEON
|
||||
|
||||
for (int y = 0; y < height - 1; y += 2) {
|
||||
RAWToARGBRow(src_raw, row, width);
|
||||
RAWToARGBRow(src_raw + src_stride_raw, row + kMaxStride, width);
|
||||
ARGBToUVRow(row, kMaxStride, dst_u, dst_v, width);
|
||||
#if defined(HAS_RAWTOYROW_NEON)
|
||||
RAWToUVRow(src_raw, src_stride_raw, dst_u, dst_v, width);
|
||||
RAWToYRow(src_raw, dst_y, width);
|
||||
RAWToYRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width);
|
||||
#else
|
||||
RAWToARGBRow(src_raw, row, width);
|
||||
RAWToARGBRow(src_raw + src_stride_raw, row + kMaxStride, width);
|
||||
ARGBToUVRow(row, kMaxStride, dst_u, dst_v, width);
|
||||
ARGBToYRow(row, dst_y, width);
|
||||
ARGBToYRow(row + kMaxStride, dst_y + dst_stride_y, width);
|
||||
#endif
|
||||
@ -1359,11 +1367,12 @@ int RAWToI420(const uint8* src_raw, int src_stride_raw,
|
||||
dst_v += dst_stride_v;
|
||||
}
|
||||
if (height & 1) {
|
||||
RAWToARGBRow_C(src_raw, row, width);
|
||||
ARGBToUVRow(row, 0, dst_u, dst_v, width);
|
||||
#if defined(HAS_RAWTOYROW_NEON)
|
||||
RAWToUVRow(src_raw, 0, dst_u, dst_v, width);
|
||||
RAWToYRow(src_raw, dst_y, width);
|
||||
#else
|
||||
RAWToARGBRow(src_raw, row, width);
|
||||
ARGBToUVRow(row, 0, dst_u, dst_v, width);
|
||||
ARGBToYRow(row, dst_y, width);
|
||||
#endif
|
||||
}
|
||||
@ -1550,10 +1559,12 @@ int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555,
|
||||
#if defined(HAS_ARGB1555TOYROW_NEON)
|
||||
ARGB1555ToUVRow(src_argb1555, src_stride_argb1555, dst_u, dst_v, width);
|
||||
ARGB1555ToYRow(src_argb1555, dst_y, width);
|
||||
ARGB1555ToYRow(src_argb1555 + src_stride_argb1555, dst_y + dst_stride_y, width);
|
||||
ARGB1555ToYRow(src_argb1555 + src_stride_argb1555, dst_y + dst_stride_y,
|
||||
width);
|
||||
#else
|
||||
ARGB1555ToARGBRow(src_argb1555, row, width);
|
||||
ARGB1555ToARGBRow(src_argb1555 + src_stride_argb1555, row + kMaxStride, width);
|
||||
ARGB1555ToARGBRow(src_argb1555 + src_stride_argb1555, row + kMaxStride,
|
||||
width);
|
||||
ARGBToUVRow(row, kMaxStride, dst_u, dst_v, width);
|
||||
ARGBToYRow(row, dst_y, width);
|
||||
ARGBToYRow(row + kMaxStride, dst_y + dst_stride_y, width);
|
||||
|
||||
@ -543,8 +543,8 @@ int ARGB1555ToARGB(const uint8* src_argb1555, int src_stride_argb1555,
|
||||
src_argb1555 = src_argb1555 + (height - 1) * src_stride_argb1555;
|
||||
src_stride_argb1555 = -src_stride_argb1555;
|
||||
}
|
||||
void (*ARGB1555ToARGBRow)(const uint8* src_argb1555, uint8* dst_argb, int pix) =
|
||||
ARGB1555ToARGBRow_C;
|
||||
void (*ARGB1555ToARGBRow)(const uint8* src_argb1555, uint8* dst_argb,
|
||||
int pix) = ARGB1555ToARGBRow_C;
|
||||
#if defined(HAS_ARGB1555TOARGBROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2) && width >= 8 &&
|
||||
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
|
||||
@ -585,8 +585,8 @@ int ARGB4444ToARGB(const uint8* src_argb4444, int src_stride_argb4444,
|
||||
src_argb4444 = src_argb4444 + (height - 1) * src_stride_argb4444;
|
||||
src_stride_argb4444 = -src_stride_argb4444;
|
||||
}
|
||||
void (*ARGB4444ToARGBRow)(const uint8* src_argb4444, uint8* dst_argb, int pix) =
|
||||
ARGB4444ToARGBRow_C;
|
||||
void (*ARGB4444ToARGBRow)(const uint8* src_argb4444, uint8* dst_argb,
|
||||
int pix) = ARGB4444ToARGBRow_C;
|
||||
#if defined(HAS_ARGB4444TOARGBROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2) && width >= 8 &&
|
||||
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
|
||||
|
||||
@ -95,8 +95,7 @@ int ARGBToI422(const uint8* src_argb, int src_stride_argb,
|
||||
ARGBToUV422Row = ARGBToUV422Row_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUV422Row = ARGBToUV422Row_Unaligned_SSSE3;
|
||||
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
|
||||
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
|
||||
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
|
||||
ARGBToUV422Row = ARGBToUV422Row_SSSE3;
|
||||
}
|
||||
}
|
||||
@ -239,6 +238,9 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
|
||||
ARGBToYRow = ARGBToYRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGBToYRow = ARGBToYRow_NEON;
|
||||
}
|
||||
if (width >= 16) {
|
||||
ARGBToUVRow = ARGBToUVRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUVRow = ARGBToUVRow_NEON;
|
||||
}
|
||||
@ -345,6 +347,9 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
|
||||
ARGBToYRow = ARGBToYRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGBToYRow = ARGBToYRow_NEON;
|
||||
}
|
||||
if (width >= 16) {
|
||||
ARGBToUVRow = ARGBToUVRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUVRow = ARGBToUVRow_NEON;
|
||||
}
|
||||
@ -425,20 +430,27 @@ int ARGBToYUY2(const uint8* src_argb, int src_stride_argb,
|
||||
dst_yuy2 = dst_yuy2 + (height - 1) * dst_stride_yuy2;
|
||||
dst_stride_yuy2 = -dst_stride_yuy2;
|
||||
}
|
||||
|
||||
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
|
||||
void (*ARGBToUV422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
|
||||
int pix) = ARGBToUV422Row_C;
|
||||
#if defined(HAS_ARGBTOUV422ROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
|
||||
ARGBToUV422Row = ARGBToUV422Row_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUV422Row = ARGBToUV422Row_Unaligned_SSSE3;
|
||||
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
|
||||
ARGBToUV422Row = ARGBToUV422Row_SSSE3;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
|
||||
ARGBToYRow_C;
|
||||
#if defined(HAS_ARGBTOYROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
|
||||
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
|
||||
ARGBToYRow = ARGBToYRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUVRow = ARGBToUVRow_Unaligned_SSSE3;
|
||||
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
|
||||
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
|
||||
ARGBToUVRow = ARGBToUVRow_SSSE3;
|
||||
ARGBToYRow = ARGBToYRow_SSSE3;
|
||||
}
|
||||
}
|
||||
@ -448,8 +460,11 @@ int ARGBToYUY2(const uint8* src_argb, int src_stride_argb,
|
||||
ARGBToYRow = ARGBToYRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGBToYRow = ARGBToYRow_NEON;
|
||||
}
|
||||
if (width >= 16) {
|
||||
ARGBToUV422Row = ARGBToUV422Row_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUVRow = ARGBToUVRow_NEON;
|
||||
ARGBToUV422Row = ARGBToUV422Row_NEON;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -479,7 +494,7 @@ int ARGBToYUY2(const uint8* src_argb, int src_stride_argb,
|
||||
SIMD_ALIGNED(uint8 row_v[kMaxStride / 2]);
|
||||
|
||||
for (int y = 0; y < height; ++y) {
|
||||
ARGBToUVRow(src_argb, 0, row_u, row_v, width);
|
||||
ARGBToUV422Row(src_argb, row_u, row_v, width);
|
||||
ARGBToYRow(src_argb, row_y, width);
|
||||
I422ToYUY2Row(row_y, row_u, row_v, dst_yuy2, width);
|
||||
src_argb += src_stride_argb;
|
||||
@ -504,20 +519,27 @@ int ARGBToUYVY(const uint8* src_argb, int src_stride_argb,
|
||||
dst_uyvy = dst_uyvy + (height - 1) * dst_stride_uyvy;
|
||||
dst_stride_uyvy = -dst_stride_uyvy;
|
||||
}
|
||||
|
||||
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
|
||||
void (*ARGBToUV422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
|
||||
int pix) = ARGBToUV422Row_C;
|
||||
#if defined(HAS_ARGBTOUV422ROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
|
||||
ARGBToUV422Row = ARGBToUV422Row_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUV422Row = ARGBToUV422Row_Unaligned_SSSE3;
|
||||
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
|
||||
ARGBToUV422Row = ARGBToUV422Row_SSSE3;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
|
||||
ARGBToYRow_C;
|
||||
#if defined(HAS_ARGBTOYROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
|
||||
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
|
||||
ARGBToYRow = ARGBToYRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUVRow = ARGBToUVRow_Unaligned_SSSE3;
|
||||
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
|
||||
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
|
||||
ARGBToUVRow = ARGBToUVRow_SSSE3;
|
||||
ARGBToYRow = ARGBToYRow_SSSE3;
|
||||
}
|
||||
}
|
||||
@ -527,8 +549,11 @@ int ARGBToUYVY(const uint8* src_argb, int src_stride_argb,
|
||||
ARGBToYRow = ARGBToYRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGBToYRow = ARGBToYRow_NEON;
|
||||
}
|
||||
if (width >= 16) {
|
||||
ARGBToUV422Row = ARGBToUV422Row_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUVRow = ARGBToUVRow_NEON;
|
||||
ARGBToUV422Row = ARGBToUV422Row_NEON;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -558,7 +583,7 @@ int ARGBToUYVY(const uint8* src_argb, int src_stride_argb,
|
||||
SIMD_ALIGNED(uint8 row_v[kMaxStride / 2]);
|
||||
|
||||
for (int y = 0; y < height; ++y) {
|
||||
ARGBToUVRow(src_argb, 0, row_u, row_v, width);
|
||||
ARGBToUV422Row(src_argb, row_u, row_v, width);
|
||||
ARGBToYRow(src_argb, row_y, width);
|
||||
I422ToUYVYRow(row_y, row_u, row_v, dst_uyvy, width);
|
||||
src_argb += src_stride_argb;
|
||||
|
||||
@ -315,8 +315,8 @@ int BayerToI420(const uint8* src_bayer, int src_stride_bayer,
|
||||
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
|
||||
ARGBToYRow = ARGBToYRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUVRow = ARGBToUVRow_SSSE3;
|
||||
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
|
||||
ARGBToUVRow = ARGBToUVRow_SSSE3;
|
||||
if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
|
||||
ARGBToYRow = ARGBToYRow_SSSE3;
|
||||
}
|
||||
@ -327,6 +327,9 @@ int BayerToI420(const uint8* src_bayer, int src_stride_bayer,
|
||||
ARGBToYRow = ARGBToYRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGBToYRow = ARGBToYRow_NEON;
|
||||
}
|
||||
if (width >= 16) {
|
||||
ARGBToUVRow = ARGBToUVRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUVRow = ARGBToUVRow_NEON;
|
||||
}
|
||||
|
||||
@ -248,6 +248,11 @@ UVANY(UYVYToUVRow_Any_SSE2, UYVYToUVRow_Unaligned_SSE2, UYVYToUVRow_C, 2)
|
||||
#endif
|
||||
#ifdef HAS_ARGBTOUVROW_NEON
|
||||
UVANY(ARGBToUVRow_Any_NEON, ARGBToUVRow_NEON, ARGBToUVRow_C, 4)
|
||||
UVANY(BGRAToUVRow_Any_NEON, BGRAToUVRow_NEON, BGRAToUVRow_C, 4)
|
||||
UVANY(ABGRToUVRow_Any_NEON, ABGRToUVRow_NEON, ABGRToUVRow_C, 4)
|
||||
UVANY(RGBAToUVRow_Any_NEON, RGBAToUVRow_NEON, RGBAToUVRow_C, 4)
|
||||
UVANY(RGB24ToUVRow_Any_NEON, RGB24ToUVRow_NEON, RGB24ToUVRow_C, 3)
|
||||
UVANY(RAWToUVRow_Any_NEON, RAWToUVRow_NEON, RAWToUVRow_C, 3)
|
||||
UVANY(RGB565ToUVRow_Any_NEON, RGB565ToUVRow_NEON, RGB565ToUVRow_C, 2)
|
||||
UVANY(ARGB1555ToUVRow_Any_NEON, ARGB1555ToUVRow_NEON, ARGB1555ToUVRow_C, 2)
|
||||
UVANY(ARGB4444ToUVRow_Any_NEON, ARGB4444ToUVRow_NEON, ARGB4444ToUVRow_C, 2)
|
||||
|
||||
@ -109,7 +109,8 @@ void RGB565ToARGBRow_C(const uint8* src_rgb565, uint8* dst_argb, int width) {
|
||||
}
|
||||
}
|
||||
|
||||
void ARGB1555ToARGBRow_C(const uint8* src_argb1555, uint8* dst_argb, int width) {
|
||||
void ARGB1555ToARGBRow_C(const uint8* src_argb1555, uint8* dst_argb,
|
||||
int width) {
|
||||
for (int x = 0; x < width; ++x) {
|
||||
uint8 b = src_argb1555[0] & 0x1f;
|
||||
uint8 g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
|
||||
@ -124,7 +125,8 @@ void ARGB1555ToARGBRow_C(const uint8* src_argb1555, uint8* dst_argb, int width)
|
||||
}
|
||||
}
|
||||
|
||||
void ARGB4444ToARGBRow_C(const uint8* src_argb4444, uint8* dst_argb, int width) {
|
||||
void ARGB4444ToARGBRow_C(const uint8* src_argb4444, uint8* dst_argb,
|
||||
int width) {
|
||||
for (int x = 0; x < width; ++x) {
|
||||
uint8 b = src_argb4444[0] & 0x0f;
|
||||
uint8 g = src_argb4444[0] >> 4;
|
||||
|
||||
@ -1729,14 +1729,14 @@ void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
|
||||
// 16x2 pixels -> 8x1. pix is number of argb pixels. e.g. 16.
|
||||
#ifdef HAS_ARGBTOUVROW_NEON
|
||||
|
||||
#define RGBTOUV \
|
||||
"vmul.s16 q8, q0, q10 \n" /* B */ \
|
||||
"vmls.s16 q8, q1, q11 \n" /* G */ \
|
||||
"vmls.s16 q8, q2, q12 \n" /* R */ \
|
||||
#define RGBTOUV(QB, QG, QR) \
|
||||
"vmul.s16 q8, " #QB ", q10 \n" /* B */ \
|
||||
"vmls.s16 q8, " #QG ", q11 \n" /* G */ \
|
||||
"vmls.s16 q8, " #QR ", q12 \n" /* R */ \
|
||||
"vadd.u16 q8, q8, q15 \n" /* +128 -> unsigned */ \
|
||||
"vmul.s16 q9, q2, q10 \n" /* R */ \
|
||||
"vmls.s16 q9, q1, q14 \n" /* G */ \
|
||||
"vmls.s16 q9, q0, q13 \n" /* B */ \
|
||||
"vmul.s16 q9, " #QR ", q10 \n" /* R */ \
|
||||
"vmls.s16 q9, " #QG ", q14 \n" /* G */ \
|
||||
"vmls.s16 q9, " #QB ", q13 \n" /* B */ \
|
||||
"vadd.u16 q9, q9, q15 \n" /* +128 -> unsigned */ \
|
||||
"vqshrn.u16 d0, q8, #8 \n" /* 16 bit to 8 bit U */ \
|
||||
"vqshrn.u16 d1, q9, #8 \n" /* 16 bit to 8 bit V */
|
||||
@ -1764,7 +1764,7 @@ void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb,
|
||||
"vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts.
|
||||
"vpadal.u8 q2, q6 \n" // R 16 bytes -> 8 shorts.
|
||||
"subs %4, %4, #16 \n" // 32 processed per loop.
|
||||
RGBTOUV
|
||||
RGBTOUV(q0, q1, q2)
|
||||
"vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
|
||||
"vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
|
||||
"bgt 1b \n"
|
||||
@ -1778,6 +1778,197 @@ void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb,
|
||||
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
|
||||
);
|
||||
}
|
||||
|
||||
void BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra,
|
||||
uint8* dst_u, uint8* dst_v, int pix) {
|
||||
asm volatile (
|
||||
"add %1, %0, %1 \n" // src_stride + src_bgra
|
||||
"vmov.s16 q10, #112 / 4 \n" // UB / VR 0.875 coefficient
|
||||
"vmov.s16 q11, #74 / 4 \n" // UG -0.5781 coefficient
|
||||
"vmov.s16 q12, #38 / 4 \n" // UR -0.2969 coefficient
|
||||
"vmov.s16 q13, #18 / 4 \n" // VB -0.1406 coefficient
|
||||
"vmov.s16 q14, #94 / 4 \n" // VG -0.7344 coefficient
|
||||
"vmov.u16 q15, #0x8080 \n" // 128.5
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 BGRA pixels.
|
||||
"vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 BGRA pixels.
|
||||
"vpaddl.u8 q3, q3 \n" // B 16 bytes -> 8 shorts.
|
||||
"vpaddl.u8 q2, q2 \n" // G 16 bytes -> 8 shorts.
|
||||
"vpaddl.u8 q1, q1 \n" // R 16 bytes -> 8 shorts.
|
||||
"vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more BGRA pixels.
|
||||
"vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 BGRA pixels.
|
||||
"vpadal.u8 q3, q7 \n" // B 16 bytes -> 8 shorts.
|
||||
"vpadal.u8 q2, q6 \n" // G 16 bytes -> 8 shorts.
|
||||
"vpadal.u8 q1, q5 \n" // R 16 bytes -> 8 shorts.
|
||||
"subs %4, %4, #16 \n" // 32 processed per loop.
|
||||
RGBTOUV(q3, q2, q1)
|
||||
"vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
|
||||
"vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_bgra), // %0
|
||||
"+r"(src_stride_bgra), // %1
|
||||
"+r"(dst_u), // %2
|
||||
"+r"(dst_v), // %3
|
||||
"+r"(pix) // %4
|
||||
:
|
||||
: "memory", "cc", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
|
||||
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
|
||||
);
|
||||
}
|
||||
|
||||
void ABGRToUVRow_NEON(const uint8* src_abgr, int src_stride_abgr,
|
||||
uint8* dst_u, uint8* dst_v, int pix) {
|
||||
asm volatile (
|
||||
"add %1, %0, %1 \n" // src_stride + src_abgr
|
||||
"vmov.s16 q10, #112 / 4 \n" // UB / VR 0.875 coefficient
|
||||
"vmov.s16 q11, #74 / 4 \n" // UG -0.5781 coefficient
|
||||
"vmov.s16 q12, #38 / 4 \n" // UR -0.2969 coefficient
|
||||
"vmov.s16 q13, #18 / 4 \n" // VB -0.1406 coefficient
|
||||
"vmov.s16 q14, #94 / 4 \n" // VG -0.7344 coefficient
|
||||
"vmov.u16 q15, #0x8080 \n" // 128.5
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ABGR pixels.
|
||||
"vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ABGR pixels.
|
||||
"vpaddl.u8 q2, q2 \n" // B 16 bytes -> 8 shorts.
|
||||
"vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
|
||||
"vpaddl.u8 q0, q0 \n" // R 16 bytes -> 8 shorts.
|
||||
"vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more ABGR pixels.
|
||||
"vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 ABGR pixels.
|
||||
"vpadal.u8 q2, q6 \n" // B 16 bytes -> 8 shorts.
|
||||
"vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts.
|
||||
"vpadal.u8 q0, q4 \n" // R 16 bytes -> 8 shorts.
|
||||
"subs %4, %4, #16 \n" // 32 processed per loop.
|
||||
RGBTOUV(q2, q1, q0)
|
||||
"vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
|
||||
"vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_abgr), // %0
|
||||
"+r"(src_stride_abgr), // %1
|
||||
"+r"(dst_u), // %2
|
||||
"+r"(dst_v), // %3
|
||||
"+r"(pix) // %4
|
||||
:
|
||||
: "memory", "cc", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
|
||||
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
|
||||
);
|
||||
}
|
||||
|
||||
void RGBAToUVRow_NEON(const uint8* src_rgba, int src_stride_rgba,
|
||||
uint8* dst_u, uint8* dst_v, int pix) {
|
||||
asm volatile (
|
||||
"add %1, %0, %1 \n" // src_stride + src_rgba
|
||||
"vmov.s16 q10, #112 / 4 \n" // UB / VR 0.875 coefficient
|
||||
"vmov.s16 q11, #74 / 4 \n" // UG -0.5781 coefficient
|
||||
"vmov.s16 q12, #38 / 4 \n" // UR -0.2969 coefficient
|
||||
"vmov.s16 q13, #18 / 4 \n" // VB -0.1406 coefficient
|
||||
"vmov.s16 q14, #94 / 4 \n" // VG -0.7344 coefficient
|
||||
"vmov.u16 q15, #0x8080 \n" // 128.5
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 RGBA pixels.
|
||||
"vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 RGBA pixels.
|
||||
"vpaddl.u8 q0, q1 \n" // B 16 bytes -> 8 shorts.
|
||||
"vpaddl.u8 q1, q2 \n" // G 16 bytes -> 8 shorts.
|
||||
"vpaddl.u8 q2, q3 \n" // R 16 bytes -> 8 shorts.
|
||||
"vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more RGBA pixels.
|
||||
"vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 RGBA pixels.
|
||||
"vpadal.u8 q0, q5 \n" // B 16 bytes -> 8 shorts.
|
||||
"vpadal.u8 q1, q6 \n" // G 16 bytes -> 8 shorts.
|
||||
"vpadal.u8 q2, q7 \n" // R 16 bytes -> 8 shorts.
|
||||
"subs %4, %4, #16 \n" // 32 processed per loop.
|
||||
RGBTOUV(q0, q1, q2)
|
||||
"vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
|
||||
"vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_rgba), // %0
|
||||
"+r"(src_stride_rgba), // %1
|
||||
"+r"(dst_u), // %2
|
||||
"+r"(dst_v), // %3
|
||||
"+r"(pix) // %4
|
||||
:
|
||||
: "memory", "cc", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
|
||||
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
|
||||
);
|
||||
}
|
||||
|
||||
void RGB24ToUVRow_NEON(const uint8* src_rgb24, int src_stride_rgb24,
|
||||
uint8* dst_u, uint8* dst_v, int pix) {
|
||||
asm volatile (
|
||||
"add %1, %0, %1 \n" // src_stride + src_rgb24
|
||||
"vmov.s16 q10, #112 / 4 \n" // UB / VR 0.875 coefficient
|
||||
"vmov.s16 q11, #74 / 4 \n" // UG -0.5781 coefficient
|
||||
"vmov.s16 q12, #38 / 4 \n" // UR -0.2969 coefficient
|
||||
"vmov.s16 q13, #18 / 4 \n" // VB -0.1406 coefficient
|
||||
"vmov.s16 q14, #94 / 4 \n" // VG -0.7344 coefficient
|
||||
"vmov.u16 q15, #0x8080 \n" // 128.5
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
"vld3.8 {d0, d2, d4}, [%0]! \n" // load 8 RGB24 pixels.
|
||||
"vld3.8 {d1, d3, d5}, [%0]! \n" // load next 8 RGB24 pixels.
|
||||
"vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts.
|
||||
"vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
|
||||
"vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts.
|
||||
"vld3.8 {d8, d10, d12}, [%1]! \n" // load 8 more RGB24 pixels.
|
||||
"vld3.8 {d9, d11, d13}, [%1]! \n" // load last 8 RGB24 pixels.
|
||||
"vpadal.u8 q0, q4 \n" // B 16 bytes -> 8 shorts.
|
||||
"vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts.
|
||||
"vpadal.u8 q2, q6 \n" // R 16 bytes -> 8 shorts.
|
||||
"subs %4, %4, #16 \n" // 32 processed per loop.
|
||||
RGBTOUV(q0, q1, q2)
|
||||
"vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
|
||||
"vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_rgb24), // %0
|
||||
"+r"(src_stride_rgb24), // %1
|
||||
"+r"(dst_u), // %2
|
||||
"+r"(dst_v), // %3
|
||||
"+r"(pix) // %4
|
||||
:
|
||||
: "memory", "cc", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
|
||||
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
|
||||
);
|
||||
}
|
||||
|
||||
void RAWToUVRow_NEON(const uint8* src_raw, int src_stride_raw,
|
||||
uint8* dst_u, uint8* dst_v, int pix) {
|
||||
asm volatile (
|
||||
"add %1, %0, %1 \n" // src_stride + src_raw
|
||||
"vmov.s16 q10, #112 / 4 \n" // UB / VR 0.875 coefficient
|
||||
"vmov.s16 q11, #74 / 4 \n" // UG -0.5781 coefficient
|
||||
"vmov.s16 q12, #38 / 4 \n" // UR -0.2969 coefficient
|
||||
"vmov.s16 q13, #18 / 4 \n" // VB -0.1406 coefficient
|
||||
"vmov.s16 q14, #94 / 4 \n" // VG -0.7344 coefficient
|
||||
"vmov.u16 q15, #0x8080 \n" // 128.5
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
"vld3.8 {d0, d2, d4}, [%0]! \n" // load 8 RAW pixels.
|
||||
"vld3.8 {d1, d3, d5}, [%0]! \n" // load next 8 RAW pixels.
|
||||
"vpaddl.u8 q2, q2 \n" // B 16 bytes -> 8 shorts.
|
||||
"vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
|
||||
"vpaddl.u8 q0, q0 \n" // R 16 bytes -> 8 shorts.
|
||||
"vld3.8 {d8, d10, d12}, [%1]! \n" // load 8 more RAW pixels.
|
||||
"vld3.8 {d9, d11, d13}, [%1]! \n" // load last 8 RAW pixels.
|
||||
"vpadal.u8 q2, q6 \n" // B 16 bytes -> 8 shorts.
|
||||
"vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts.
|
||||
"vpadal.u8 q0, q4 \n" // R 16 bytes -> 8 shorts.
|
||||
"subs %4, %4, #16 \n" // 32 processed per loop.
|
||||
RGBTOUV(q2, q1, q0)
|
||||
"vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
|
||||
"vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_raw), // %0
|
||||
"+r"(src_stride_raw), // %1
|
||||
"+r"(dst_u), // %2
|
||||
"+r"(dst_v), // %3
|
||||
"+r"(pix) // %4
|
||||
:
|
||||
: "memory", "cc", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
|
||||
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
|
||||
);
|
||||
}
|
||||
|
||||
#endif // HAS_ARGBTOUVROW_NEON
|
||||
|
||||
// 16x2 pixels -> 8x1. pix is number of argb pixels. e.g. 16.
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user