mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2026-01-01 03:12:16 +08:00
Neon optimized ARGBToI444/422/411/420 Any variations, ARGB1555ToI420 Neon, ARGB4444ToI420
BUG=148 TEST=sudo LIBYUV_REPEAT=1000 nice --5 ./libyuv_unittest --gtest_filter=*R*ToI4* | sed 's/\(.*(\)\([0-9]*\)\( ms)\)/\2 - \1\2\3/g' | sort -rn | grep ms Review URL: https://webrtc-codereview.appspot.com/936020 git-svn-id: http://libyuv.googlecode.com/svn/trunk@480 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
f1daa3db65
commit
522d757c92
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 479
|
||||
Version: 480
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -48,8 +48,8 @@ extern "C" {
|
||||
#define HAS_ARGBTORGB24ROW_SSSE3
|
||||
#define HAS_ARGBTORGB565ROW_SSE2
|
||||
#define HAS_ARGBTORGBAROW_SSSE3
|
||||
#define HAS_ARGBTOUVROW_SSSE3
|
||||
#define HAS_ARGBTOUV422ROW_SSSE3
|
||||
#define HAS_ARGBTOUVROW_SSSE3
|
||||
#define HAS_ARGBTOYROW_SSSE3
|
||||
#define HAS_BGRATOARGBROW_SSSE3
|
||||
#define HAS_BGRATOUVROW_SSSE3
|
||||
@ -60,43 +60,43 @@ extern "C" {
|
||||
#define HAS_I400TOARGBROW_SSE2
|
||||
#define HAS_I411TOARGBROW_SSSE3
|
||||
#define HAS_I422TOABGRROW_SSSE3
|
||||
#define HAS_I422TOARGB1555ROW_SSSE3
|
||||
#define HAS_I422TOARGB4444ROW_SSSE3
|
||||
#define HAS_I422TOARGBROW_SSSE3
|
||||
#define HAS_I422TOBGRAROW_SSSE3
|
||||
#define HAS_I422TORAWROW_SSSE3
|
||||
#define HAS_I422TORGB24ROW_SSSE3
|
||||
#define HAS_I422TORGB565ROW_SSSE3
|
||||
#define HAS_I422TORGBAROW_SSSE3
|
||||
#define HAS_I422TOUYVYROW_SSE2
|
||||
#define HAS_I422TOYUY2ROW_SSE2
|
||||
#define HAS_I444TOARGBROW_SSSE3
|
||||
#define HAS_MERGEUV_SSE2
|
||||
#define HAS_MIRRORROW_SSSE3
|
||||
#define HAS_MirrorUVRow_SSSE3
|
||||
#define HAS_NV12TOARGBROW_SSSE3
|
||||
#define HAS_NV21TOARGBROW_SSSE3
|
||||
#define HAS_NV12TORGB565ROW_SSSE3
|
||||
#define HAS_NV21TOARGBROW_SSSE3
|
||||
#define HAS_NV21TORGB565ROW_SSSE3
|
||||
#define HAS_RAWTOARGBROW_SSSE3
|
||||
#define HAS_RAWTOYROW_SSSE3
|
||||
#define HAS_RGB24TOARGBROW_SSSE3
|
||||
#define HAS_RGB24TOYROW_SSSE3
|
||||
#define HAS_RGB565TOARGBROW_SSE2
|
||||
#define HAS_RGBATOARGBROW_SSSE3
|
||||
#define HAS_RGBATOUVROW_SSSE3
|
||||
#define HAS_RGBATOYROW_SSSE3
|
||||
#define HAS_SETROW_X86
|
||||
#define HAS_SPLITUV_SSE2
|
||||
#define HAS_UYVYTOARGBROW_SSSE3
|
||||
#define HAS_UYVYTOUV422ROW_SSE2
|
||||
#define HAS_UYVYTOUVROW_SSE2
|
||||
#define HAS_UYVYTOYROW_SSE2
|
||||
#define HAS_YTOARGBROW_SSE2
|
||||
#define HAS_YUY2TOARGBROW_SSSE3
|
||||
#define HAS_YUY2TOUV422ROW_SSE2
|
||||
#define HAS_YUY2TOUVROW_SSE2
|
||||
#define HAS_YUY2TOYROW_SSE2
|
||||
#define HAS_I422TOYUY2ROW_SSE2
|
||||
#define HAS_I422TOUYVYROW_SSE2
|
||||
#define HAS_MERGEUV_SSE2
|
||||
#define HAS_I422TOARGB4444ROW_SSSE3
|
||||
#define HAS_I422TOARGB1555ROW_SSSE3
|
||||
#define HAS_I422TORGB565ROW_SSSE3
|
||||
#define HAS_YUY2TOARGBROW_SSSE3
|
||||
#define HAS_UYVYTOARGBROW_SSSE3
|
||||
#define HAS_RGB24TOYROW_SSSE3
|
||||
#define HAS_RAWTOYROW_SSSE3
|
||||
|
||||
// Effects
|
||||
#define HAS_ARGBAFFINEROW_SSE2
|
||||
@ -147,68 +147,75 @@ extern "C" {
|
||||
// The following are available on Neon platforms
|
||||
#if !defined(YUV_DISABLE_ASM) && (defined(__ARM_NEON__) || defined(LIBYUV_NEON))
|
||||
#define HAS_ABGRTOARGBROW_NEON
|
||||
#define HAS_ABGRTOUVROW_NEON
|
||||
#define HAS_ABGRTOYROW_NEON
|
||||
#define HAS_ARGB1555TOARGBROW_NEON
|
||||
#define HAS_ARGB1555TOUVROW_NEON
|
||||
#define HAS_ARGB1555TOYROW_NEON
|
||||
#define HAS_ARGB4444TOARGBROW_NEON
|
||||
#define HAS_ARGB4444TOUVROW_NEON
|
||||
#define HAS_ARGB4444TOYROW_NEON
|
||||
#define HAS_ARGBTOARGB1555ROW_NEON
|
||||
#define HAS_ARGBTOARGB4444ROW_NEON
|
||||
#define HAS_ARGBTOBAYERROW_NEON
|
||||
#define HAS_ARGBTORAWROW_NEON
|
||||
#define HAS_I400TOARGBROW_NEON
|
||||
#define HAS_ARGBTORGB24ROW_NEON
|
||||
#define HAS_ARGBTORGB565ROW_NEON
|
||||
#define HAS_ARGBTORGBAROW_NEON
|
||||
#define HAS_ARGBTOUV411ROW_NEON
|
||||
#define HAS_ARGBTOUV422ROW_NEON
|
||||
#define HAS_ARGBTOUV444ROW_NEON
|
||||
#define HAS_ARGBTOUVROW_NEON
|
||||
#define HAS_ARGBTOYROW_NEON
|
||||
#define HAS_BGRATOARGBROW_NEON
|
||||
#define HAS_BGRATOUVROW_NEON
|
||||
#define HAS_BGRATOYROW_NEON
|
||||
#define HAS_COPYROW_NEON
|
||||
#define HAS_HALFROW_NEON
|
||||
#define HAS_I400TOARGBROW_NEON
|
||||
#define HAS_I411TOARGBROW_NEON
|
||||
#define HAS_I422TOABGRROW_NEON
|
||||
#define HAS_I422TOARGB1555ROW_NEON
|
||||
#define HAS_I422TOARGB4444ROW_NEON
|
||||
#define HAS_I422TOARGBROW_NEON
|
||||
#define HAS_I422TOBGRAROW_NEON
|
||||
#define HAS_I422TORAWROW_NEON
|
||||
#define HAS_I422TORGB24ROW_NEON
|
||||
#define HAS_I422TOARGB4444ROW_NEON
|
||||
#define HAS_I422TOARGB1555ROW_NEON
|
||||
#define HAS_I422TORGB565ROW_NEON
|
||||
#define HAS_I422TORGBAROW_NEON
|
||||
#define HAS_I422TOUYVYROW_NEON
|
||||
#define HAS_I422TOYUY2ROW_NEON
|
||||
#define HAS_I444TOARGBROW_NEON
|
||||
#define HAS_MERGEUV_NEON
|
||||
#define HAS_MIRRORROW_NEON
|
||||
#define HAS_MirrorUVRow_NEON
|
||||
#define HAS_NV12TOARGBROW_NEON
|
||||
#define HAS_NV21TOARGBROW_NEON
|
||||
#define HAS_YUY2TOARGBROW_NEON
|
||||
#define HAS_UYVYTOARGBROW_NEON
|
||||
#define HAS_NV12TORGB565ROW_NEON
|
||||
#define HAS_NV21TOARGBROW_NEON
|
||||
#define HAS_NV21TORGB565ROW_NEON
|
||||
#define HAS_RAWTOARGBROW_NEON
|
||||
#define HAS_RAWTOUVROW_NEON
|
||||
#define HAS_RAWTOYROW_NEON
|
||||
#define HAS_RGB24TOARGBROW_NEON
|
||||
#define HAS_RGB24TOUVROW_NEON
|
||||
#define HAS_RGB24TOYROW_NEON
|
||||
#define HAS_RGB565TOARGBROW_NEON
|
||||
#define HAS_RGB565TOUVROW_NEON
|
||||
#define HAS_RGB565TOYROW_NEON
|
||||
#define HAS_RGBATOARGBROW_NEON
|
||||
#define HAS_RGBATOUVROW_NEON
|
||||
#define HAS_RGBATOYROW_NEON
|
||||
#define HAS_SETROW_NEON
|
||||
#define HAS_SPLITUV_NEON
|
||||
#define HAS_UYVYTOARGBROW_NEON
|
||||
#define HAS_UYVYTOUV422ROW_NEON
|
||||
#define HAS_UYVYTOUVROW_NEON
|
||||
#define HAS_UYVYTOYROW_NEON
|
||||
#define HAS_YTOARGBROW_NEON
|
||||
#define HAS_YUY2TOARGBROW_NEON
|
||||
#define HAS_YUY2TOUV422ROW_NEON
|
||||
#define HAS_YUY2TOUVROW_NEON
|
||||
#define HAS_YUY2TOYROW_NEON
|
||||
#define HAS_I422TOYUY2ROW_NEON
|
||||
#define HAS_I422TOUYVYROW_NEON
|
||||
#define HAS_ARGBTORGB565ROW_NEON
|
||||
#define HAS_ARGBTOARGB1555ROW_NEON
|
||||
#define HAS_ARGBTOARGB4444ROW_NEON
|
||||
#define HAS_MERGEUV_NEON
|
||||
#define HAS_YTOARGBROW_NEON
|
||||
#define HAS_I444TOARGBROW_NEON
|
||||
#define HAS_I411TOARGBROW_NEON
|
||||
#define HAS_ARGBTOYROW_NEON
|
||||
#define HAS_ARGBTOUV444ROW_NEON
|
||||
#define HAS_ARGBTOUV422ROW_NEON
|
||||
#define HAS_ARGBTOUV411ROW_NEON
|
||||
#define HAS_ARGBTOUVROW_NEON
|
||||
#define HAS_RGB565TOUVROW_NEON
|
||||
#define HAS_BGRATOYROW_NEON
|
||||
#define HAS_ABGRTOYROW_NEON
|
||||
#define HAS_RGBATOYROW_NEON
|
||||
#define HAS_RGB24TOYROW_NEON
|
||||
#define HAS_RAWTOYROW_NEON
|
||||
#define HAS_RGB565TOARGBROW_NEON
|
||||
#define HAS_ARGB1555TOARGBROW_NEON
|
||||
#define HAS_ARGB4444TOARGBROW_NEON
|
||||
#define HAS_RGB565TOYROW_NEON
|
||||
#define HAS_ARGB1555TOYROW_NEON
|
||||
#define HAS_ARGB4444TOYROW_NEON
|
||||
#endif
|
||||
|
||||
// The following are available on Mips platforms
|
||||
@ -357,6 +364,10 @@ void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int pix);
|
||||
void RGB565ToUVRow_NEON(const uint8* src_rgb565, int src_stride_rgb565,
|
||||
uint8* dst_u, uint8* dst_v, int pix);
|
||||
void ARGB1555ToUVRow_NEON(const uint8* src_argb1555, int src_stride_argb1555,
|
||||
uint8* dst_u, uint8* dst_v, int pix);
|
||||
void ARGB4444ToUVRow_NEON(const uint8* src_argb4444, int src_stride_argb4444,
|
||||
uint8* dst_u, uint8* dst_v, int pix);
|
||||
void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int pix);
|
||||
void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int pix);
|
||||
void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int pix);
|
||||
@ -414,10 +425,22 @@ void ABGRToUVRow_Any_SSSE3(const uint8* src_abgr, int src_stride_abgr,
|
||||
uint8* dst_u, uint8* dst_v, int width);
|
||||
void RGBAToUVRow_Any_SSSE3(const uint8* src_rgba, int src_stride_rgba,
|
||||
uint8* dst_u, uint8* dst_v, int width);
|
||||
void ARGBToUV444Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
|
||||
int pix);
|
||||
void ARGBToUV422Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
|
||||
int pix);
|
||||
void ARGBToUV411Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
|
||||
int pix);
|
||||
void ARGBToUVRow_Any_NEON(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int pix);
|
||||
void RGB565ToUVRow_Any_NEON(const uint8* src_rgb565, int src_stride_rgb565,
|
||||
uint8* dst_u, uint8* dst_v, int pix);
|
||||
void ARGB1555ToUVRow_Any_NEON(const uint8* src_argb1555,
|
||||
int src_stride_argb1555,
|
||||
uint8* dst_u, uint8* dst_v, int pix);
|
||||
void ARGB4444ToUVRow_Any_NEON(const uint8* src_argb4444,
|
||||
int src_stride_argb4444,
|
||||
uint8* dst_u, uint8* dst_v, int pix);
|
||||
void ARGBToUVRow_C(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width);
|
||||
void BGRAToUVRow_C(const uint8* src_bgra, int src_stride_bgra,
|
||||
@ -428,6 +451,10 @@ void RGBAToUVRow_C(const uint8* src_rgba, int src_stride_rgba,
|
||||
uint8* dst_u, uint8* dst_v, int width);
|
||||
void RGB565ToUVRow_C(const uint8* src_rgb565, int src_stride_rgb565,
|
||||
uint8* dst_u, uint8* dst_v, int width);
|
||||
void ARGB1555ToUVRow_C(const uint8* src_argb1555, int src_stride_argb1555,
|
||||
uint8* dst_u, uint8* dst_v, int width);
|
||||
void ARGB4444ToUVRow_C(const uint8* src_argb4444, int src_stride_argb4444,
|
||||
uint8* dst_u, uint8* dst_v, int width);
|
||||
|
||||
void ARGBToUV422Row_SSSE3(const uint8* src_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width);
|
||||
@ -1087,6 +1114,10 @@ void ARGBToBayerRow_SSSE3(const uint8* src_argb,
|
||||
uint8* dst_bayer, uint32 selector, int pix);
|
||||
void ARGBToBayerRow_NEON(const uint8* src_argb,
|
||||
uint8* dst_bayer, uint32 selector, int pix);
|
||||
void ARGBToBayerRow_Any_SSSE3(const uint8* src_argb,
|
||||
uint8* dst_bayer, uint32 selector, int pix);
|
||||
void ARGBToBayerRow_Any_NEON(const uint8* src_argb,
|
||||
uint8* dst_bayer, uint32 selector, int pix);
|
||||
|
||||
void I422ToYUY2Row_C(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 479
|
||||
#define LIBYUV_VERSION 480
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||
|
||||
@ -943,6 +943,9 @@ int ARGBToI420(const uint8* src_argb, int src_stride_argb,
|
||||
ARGBToYRow = ARGBToYRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGBToYRow = ARGBToYRow_NEON;
|
||||
}
|
||||
if (width >= 16) {
|
||||
ARGBToUVRow = ARGBToUVRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUVRow = ARGBToUVRow_NEON;
|
||||
}
|
||||
@ -1207,6 +1210,9 @@ int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24,
|
||||
RGB24ToYRow = RGB24ToYRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
RGB24ToYRow = RGB24ToYRow_NEON;
|
||||
}
|
||||
if (width >= 16) {
|
||||
ARGBToUVRow = ARGBToUVRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUVRow = ARGBToUVRow_NEON;
|
||||
}
|
||||
@ -1312,6 +1318,9 @@ int RAWToI420(const uint8* src_raw, int src_stride_raw,
|
||||
RAWToYRow = RAWToYRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
RAWToYRow = RAWToYRow_NEON;
|
||||
}
|
||||
if (width >= 16) {
|
||||
ARGBToUVRow = ARGBToUVRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUVRow = ARGBToUVRow_NEON;
|
||||
}
|
||||
@ -1482,6 +1491,25 @@ int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555,
|
||||
src_argb1555 = src_argb1555 + (height - 1) * src_stride_argb1555;
|
||||
src_stride_argb1555 = -src_stride_argb1555;
|
||||
}
|
||||
|
||||
#if defined(HAS_ARGB1555TOYROW_NEON)
|
||||
void (*ARGB1555ToUVRow)(const uint8* src_argb1555, int src_stride_argb1555,
|
||||
uint8* dst_u, uint8* dst_v, int width) = ARGB1555ToUVRow_C;
|
||||
void (*ARGB1555ToYRow)(const uint8* src_argb1555, uint8* dst_y, int pix) =
|
||||
ARGB1555ToYRow_C;
|
||||
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
|
||||
ARGB1555ToYRow = ARGB1555ToYRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGB1555ToYRow = ARGB1555ToYRow_NEON;
|
||||
}
|
||||
if (width >= 16) {
|
||||
ARGB1555ToUVRow = ARGB1555ToUVRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGB1555ToUVRow = ARGB1555ToUVRow_NEON;
|
||||
}
|
||||
}
|
||||
}
|
||||
#else // HAS_ARGB1555TOYROW_NEON
|
||||
SIMD_ALIGNED(uint8 row[kMaxStride * 2]);
|
||||
void (*ARGB1555ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
|
||||
ARGB1555ToARGBRow_C;
|
||||
@ -1492,15 +1520,7 @@ int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555,
|
||||
ARGB1555ToARGBRow = ARGB1555ToARGBRow_SSE2;
|
||||
}
|
||||
}
|
||||
#elif defined(HAS_ARGB1555TOARGBROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
|
||||
ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGB1555ToARGBRow = ARGB1555ToARGBRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
|
||||
#if defined(HAS_ARGBTOUVROW_SSSE3)
|
||||
@ -1511,20 +1531,6 @@ int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(HAS_ARGB1555TOYROW_NEON)
|
||||
void (*ARGB1555ToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
|
||||
ARGB1555ToYRow_C;
|
||||
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
|
||||
ARGB1555ToYRow = ARGB1555ToYRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGB1555ToYRow = ARGB1555ToYRow_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUVRow = ARGBToUVRow_NEON;
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
|
||||
ARGBToYRow_C;
|
||||
#if defined(HAS_ARGBTOUVROW_SSSE3)
|
||||
@ -1541,13 +1547,14 @@ int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555,
|
||||
#endif // HAS_ARGB1555TOYROW_NEON
|
||||
|
||||
for (int y = 0; y < height - 1; y += 2) {
|
||||
ARGB1555ToARGBRow(src_argb1555, row, width);
|
||||
ARGB1555ToARGBRow(src_argb1555 + src_stride_argb1555, row + kMaxStride, width);
|
||||
ARGBToUVRow(row, kMaxStride, dst_u, dst_v, width);
|
||||
#if defined(HAS_ARGB1555TOYROW_NEON)
|
||||
ARGB1555ToUVRow(src_argb1555, src_stride_argb1555, dst_u, dst_v, width);
|
||||
ARGB1555ToYRow(src_argb1555, dst_y, width);
|
||||
ARGB1555ToYRow(src_argb1555 + src_stride_argb1555, dst_y + dst_stride_y, width);
|
||||
#else
|
||||
ARGB1555ToARGBRow(src_argb1555, row, width);
|
||||
ARGB1555ToARGBRow(src_argb1555 + src_stride_argb1555, row + kMaxStride, width);
|
||||
ARGBToUVRow(row, kMaxStride, dst_u, dst_v, width);
|
||||
ARGBToYRow(row, dst_y, width);
|
||||
ARGBToYRow(row + kMaxStride, dst_y + dst_stride_y, width);
|
||||
#endif
|
||||
@ -1557,11 +1564,12 @@ int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555,
|
||||
dst_v += dst_stride_v;
|
||||
}
|
||||
if (height & 1) {
|
||||
ARGB1555ToARGBRow_C(src_argb1555, row, width);
|
||||
ARGBToUVRow(row, 0, dst_u, dst_v, width);
|
||||
#if defined(HAS_ARGB1555TOYROW_NEON)
|
||||
ARGB1555ToUVRow(src_argb1555, 0, dst_u, dst_v, width);
|
||||
ARGB1555ToYRow(src_argb1555, dst_y, width);
|
||||
#else
|
||||
ARGB1555ToARGBRow(src_argb1555, row, width);
|
||||
ARGBToUVRow(row, 0, dst_u, dst_v, width);
|
||||
ARGBToYRow(row, dst_y, width);
|
||||
#endif
|
||||
}
|
||||
@ -1586,6 +1594,25 @@ int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444,
|
||||
src_argb4444 = src_argb4444 + (height - 1) * src_stride_argb4444;
|
||||
src_stride_argb4444 = -src_stride_argb4444;
|
||||
}
|
||||
|
||||
#if defined(HAS_ARGB4444TOYROW_NEON)
|
||||
void (*ARGB4444ToUVRow)(const uint8* src_argb4444, int src_stride_argb4444,
|
||||
uint8* dst_u, uint8* dst_v, int width) = ARGB4444ToUVRow_C;
|
||||
void (*ARGB4444ToYRow)(const uint8* src_argb4444, uint8* dst_y, int pix) =
|
||||
ARGB4444ToYRow_C;
|
||||
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
|
||||
ARGB4444ToYRow = ARGB4444ToYRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGB4444ToYRow = ARGB4444ToYRow_NEON;
|
||||
}
|
||||
if (width >= 16) {
|
||||
ARGB4444ToUVRow = ARGB4444ToUVRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGB4444ToUVRow = ARGB4444ToUVRow_NEON;
|
||||
}
|
||||
}
|
||||
}
|
||||
#else // HAS_ARGB4444TOYROW_NEON
|
||||
SIMD_ALIGNED(uint8 row[kMaxStride * 2]);
|
||||
void (*ARGB4444ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
|
||||
ARGB4444ToARGBRow_C;
|
||||
@ -1596,15 +1623,7 @@ int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444,
|
||||
ARGB4444ToARGBRow = ARGB4444ToARGBRow_SSE2;
|
||||
}
|
||||
}
|
||||
#elif defined(HAS_ARGB4444TOARGBROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
|
||||
ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGB4444ToARGBRow = ARGB4444ToARGBRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
|
||||
#if defined(HAS_ARGBTOUVROW_SSSE3)
|
||||
@ -1615,20 +1634,6 @@ int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(HAS_ARGB4444TOYROW_NEON)
|
||||
void (*ARGB4444ToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
|
||||
ARGB4444ToYRow_C;
|
||||
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
|
||||
ARGB4444ToYRow = ARGB4444ToYRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGB4444ToYRow = ARGB4444ToYRow_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUVRow = ARGBToUVRow_NEON;
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
|
||||
ARGBToYRow_C;
|
||||
#if defined(HAS_ARGBTOUVROW_SSSE3)
|
||||
@ -1645,13 +1650,16 @@ int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444,
|
||||
#endif // HAS_ARGB4444TOYROW_NEON
|
||||
|
||||
for (int y = 0; y < height - 1; y += 2) {
|
||||
ARGB4444ToARGBRow(src_argb4444, row, width);
|
||||
ARGB4444ToARGBRow(src_argb4444 + src_stride_argb4444, row + kMaxStride, width);
|
||||
ARGBToUVRow(row, kMaxStride, dst_u, dst_v, width);
|
||||
#if defined(HAS_ARGB4444TOYROW_NEON)
|
||||
ARGB4444ToUVRow(src_argb4444, src_stride_argb4444, dst_u, dst_v, width);
|
||||
ARGB4444ToYRow(src_argb4444, dst_y, width);
|
||||
ARGB4444ToYRow(src_argb4444 + src_stride_argb4444, dst_y + dst_stride_y, width);
|
||||
ARGB4444ToYRow(src_argb4444 + src_stride_argb4444, dst_y + dst_stride_y,
|
||||
width);
|
||||
#else
|
||||
ARGB4444ToARGBRow(src_argb4444, row, width);
|
||||
ARGB4444ToARGBRow(src_argb4444 + src_stride_argb4444, row + kMaxStride,
|
||||
width);
|
||||
ARGBToUVRow(row, kMaxStride, dst_u, dst_v, width);
|
||||
ARGBToYRow(row, dst_y, width);
|
||||
ARGBToYRow(row + kMaxStride, dst_y + dst_stride_y, width);
|
||||
#endif
|
||||
@ -1661,11 +1669,12 @@ int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444,
|
||||
dst_v += dst_stride_v;
|
||||
}
|
||||
if (height & 1) {
|
||||
ARGB4444ToARGBRow_C(src_argb4444, row, width);
|
||||
ARGBToUVRow(row, 0, dst_u, dst_v, width);
|
||||
#if defined(HAS_ARGB4444TOYROW_NEON)
|
||||
ARGB4444ToUVRow(src_argb4444, 0, dst_u, dst_v, width);
|
||||
ARGB4444ToYRow(src_argb4444, dst_y, width);
|
||||
#else
|
||||
ARGB4444ToARGBRow(src_argb4444, row, width);
|
||||
ARGBToUVRow(row, 0, dst_u, dst_v, width);
|
||||
ARGBToYRow(row, dst_y, width);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -54,6 +54,7 @@ int ARGBToI444(const uint8* src_argb, int src_stride_argb,
|
||||
#elif defined(HAS_ARGBTOYROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
|
||||
ARGBToYRow = ARGBToYRow_Any_NEON;
|
||||
ARGBToUV444Row = ARGBToUV444Row_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGBToYRow = ARGBToYRow_NEON;
|
||||
ARGBToUV444Row = ARGBToUV444Row_NEON;
|
||||
@ -120,6 +121,9 @@ int ARGBToI422(const uint8* src_argb, int src_stride_argb,
|
||||
ARGBToYRow = ARGBToYRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGBToYRow = ARGBToYRow_NEON;
|
||||
}
|
||||
if (width >= 16) {
|
||||
ARGBToUV422Row = ARGBToUV422Row_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUV422Row = ARGBToUV422Row_NEON;
|
||||
}
|
||||
@ -173,6 +177,9 @@ int ARGBToI411(const uint8* src_argb, int src_stride_argb,
|
||||
ARGBToYRow = ARGBToYRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGBToYRow = ARGBToYRow_NEON;
|
||||
}
|
||||
if (width >= 32) {
|
||||
ARGBToUV411Row = ARGBToUV411Row_Any_NEON;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
ARGBToUV411Row = ARGBToUV411Row_NEON;
|
||||
}
|
||||
|
||||
@ -72,13 +72,19 @@ int ARGBToBayer(const uint8* src_argb, int src_stride_argb,
|
||||
void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer,
|
||||
uint32 selector, int pix) = ARGBToBayerRow_C;
|
||||
#if defined(HAS_ARGBTOBAYERROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4) &&
|
||||
if (TestCpuFlag(kCpuHasSSSE3) && width >= 4 &&
|
||||
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
|
||||
ARGBToBayerRow = ARGBToBayerRow_SSSE3;
|
||||
ARGBToBayerRow = ARGBToBayerRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 4)) {
|
||||
ARGBToBayerRow = ARGBToBayerRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#elif defined(HAS_ARGBTOBAYERROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 4)) {
|
||||
ARGBToBayerRow = ARGBToBayerRow_NEON;
|
||||
if (TestCpuFlag(kCpuHasNEON) && width >= 4) {
|
||||
ARGBToBayerRow = ARGBToBayerRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 4)) {
|
||||
ARGBToBayerRow = ARGBToBayerRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
const int blue_index = 0; // Offsets for ARGB format
|
||||
@ -398,7 +404,7 @@ int I420ToBayer(const uint8* src_y, int src_stride_y,
|
||||
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
|
||||
I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I422ToARGBRow = I422ToARGBRow_Unaligned_SSSE3;
|
||||
I422ToARGBRow = I422ToARGBRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#elif defined(HAS_I422TOARGBROW_NEON)
|
||||
@ -408,20 +414,34 @@ int I420ToBayer(const uint8* src_y, int src_stride_y,
|
||||
I422ToARGBRow = I422ToARGBRow_NEON;
|
||||
}
|
||||
}
|
||||
#elif defined(HAS_I422TOARGBROW_MIPS_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
|
||||
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
|
||||
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
|
||||
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2)) {
|
||||
I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
|
||||
}
|
||||
#endif
|
||||
|
||||
SIMD_ALIGNED(uint8 row[kMaxStride]);
|
||||
void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer,
|
||||
uint32 selector, int pix) = ARGBToBayerRow_C;
|
||||
#if defined(HAS_ARGBTOBAYERROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4)) {
|
||||
ARGBToBayerRow = ARGBToBayerRow_SSSE3;
|
||||
if (TestCpuFlag(kCpuHasSSSE3) && width >= 4) {
|
||||
ARGBToBayerRow = ARGBToBayerRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 4)) {
|
||||
ARGBToBayerRow = ARGBToBayerRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#elif defined(HAS_ARGBTOBAYERROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 4)) {
|
||||
ARGBToBayerRow = ARGBToBayerRow_NEON;
|
||||
if (TestCpuFlag(kCpuHasNEON) && width >= 4) {
|
||||
ARGBToBayerRow = ARGBToBayerRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 4)) {
|
||||
ARGBToBayerRow = ARGBToBayerRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
const int blue_index = 0; // Offsets for ARGB format
|
||||
const int green_index = 1;
|
||||
const int red_index = 2;
|
||||
|
||||
@ -19,6 +19,9 @@ namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// TODO(fbarchard): Consider 'any' functions handling any quantity of pixels.
|
||||
// TODO(fbarchard): Consider 'any' functions handling odd alignment.
|
||||
|
||||
// YUV to RGB does multiple of 8 with SIMD and remainder with C.
|
||||
#define YANY(NAMEANY, I420TORGB_SIMD, I420TORGB_C, UV_SHIFT, BPP, MASK) \
|
||||
void NAMEANY(const uint8* y_buf, \
|
||||
@ -114,12 +117,8 @@ NV2NY(NV21ToRGB565Row_Any_NEON, NV21ToRGB565Row_NEON, NV21ToRGB565Row_C, 0, 2)
|
||||
#endif // HAS_NV12TORGB565ROW_NEON
|
||||
#undef NVANY
|
||||
|
||||
// YUY2 to RGB does 8 at a time.
|
||||
// RGB to RGB does multiple of 16 pixels with SIMD and remainder with C.
|
||||
// SSSE3 RGB24 is multiple of 16 pixels, aligned source and destination.
|
||||
// SSE2 RGB565 is multiple of 4 pixels, ARGB must be aligned to 16 bytes.
|
||||
// NEON RGB24 is multiple of 8 pixels, unaligned source and destination.
|
||||
// I400 To ARGB does multiple of 8 pixels with SIMD and remainder with C.
|
||||
// TODO(fbarchard): RGBANY use last 16 method.
|
||||
// ARGB to Bayer does multiple of 4 pixels, SSSE3 aligned src, unaligned dst.
|
||||
#define RGBANY(NAMEANY, ARGBTORGB_SIMD, ARGBTORGB_C, MASK, SBPP, BPP) \
|
||||
void NAMEANY(const uint8* src, \
|
||||
uint8* dst, \
|
||||
@ -165,6 +164,26 @@ RGBANY(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, UYVYToARGBRow_C,
|
||||
#endif
|
||||
#undef RGBANY
|
||||
|
||||
// ARGB to Bayer does multiple of 4 pixels, SSSE3 aligned src, unaligned dst.
|
||||
#define BAYERANY(NAMEANY, ARGBTORGB_SIMD, ARGBTORGB_C, MASK, SBPP, BPP) \
|
||||
void NAMEANY(const uint8* src, \
|
||||
uint8* dst, uint32 selector, \
|
||||
int width) { \
|
||||
int n = width & ~MASK; \
|
||||
ARGBTORGB_SIMD(src, dst, selector, n); \
|
||||
ARGBTORGB_C(src + n * SBPP, dst + n * BPP, selector, width & MASK); \
|
||||
}
|
||||
|
||||
#if defined(HAS_ARGBTOBAYERROW_SSSE3)
|
||||
BAYERANY(ARGBToBayerRow_Any_SSSE3, ARGBToBayerRow_SSSE3, ARGBToBayerRow_C,
|
||||
3, 4, 1)
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOBAYERROW_NEON)
|
||||
BAYERANY(ARGBToBayerRow_Any_NEON, ARGBToBayerRow_NEON, ARGBToBayerRow_C,
|
||||
3, 4, 1)
|
||||
#endif
|
||||
#undef BAYERANY
|
||||
|
||||
// RGB/YUV to Y does multiple of 16 with SIMD and last 16 with SIMD.
|
||||
// TODO(fbarchard): Use last 16 method for all unsubsampled conversions.
|
||||
#define YANY(NAMEANY, ARGBTOY_SIMD, SBPP, BPP, NUM) \
|
||||
@ -230,37 +249,43 @@ UVANY(UYVYToUVRow_Any_SSE2, UYVYToUVRow_Unaligned_SSE2, UYVYToUVRow_C, 2)
|
||||
#ifdef HAS_ARGBTOUVROW_NEON
|
||||
UVANY(ARGBToUVRow_Any_NEON, ARGBToUVRow_NEON, ARGBToUVRow_C, 4)
|
||||
UVANY(RGB565ToUVRow_Any_NEON, RGB565ToUVRow_NEON, RGB565ToUVRow_C, 2)
|
||||
UVANY(ARGB1555ToUVRow_Any_NEON, ARGB1555ToUVRow_NEON, ARGB1555ToUVRow_C, 2)
|
||||
UVANY(ARGB4444ToUVRow_Any_NEON, ARGB4444ToUVRow_NEON, ARGB4444ToUVRow_C, 2)
|
||||
UVANY(YUY2ToUVRow_Any_NEON, YUY2ToUVRow_NEON, YUY2ToUVRow_C, 2)
|
||||
UVANY(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, UYVYToUVRow_C, 2)
|
||||
#endif
|
||||
#undef UVANY
|
||||
|
||||
#define UV422ANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, BPP) \
|
||||
#define UV422ANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, BPP, MASK, SHIFT) \
|
||||
void NAMEANY(const uint8* src_uv, \
|
||||
uint8* dst_u, uint8* dst_v, int width) { \
|
||||
int n = width & ~15; \
|
||||
int n = width & ~MASK; \
|
||||
ANYTOUV_SIMD(src_uv, dst_u, dst_v, n); \
|
||||
ANYTOUV_C(src_uv + n * BPP, \
|
||||
dst_u + (n >> 1), \
|
||||
dst_v + (n >> 1), \
|
||||
width & 15); \
|
||||
dst_u + (n >> SHIFT), \
|
||||
dst_v + (n >> SHIFT), \
|
||||
width & MASK); \
|
||||
}
|
||||
|
||||
#ifdef HAS_ARGBTOUVROW_SSSE3
|
||||
UV422ANY(ARGBToUV422Row_Any_SSSE3, ARGBToUV422Row_Unaligned_SSSE3,
|
||||
ARGBToUV422Row_C, 4)
|
||||
#endif
|
||||
#ifdef HAS_YUY2TOUV422ROW_SSE2
|
||||
ARGBToUV422Row_C, 4, 15, 1)
|
||||
UV422ANY(YUY2ToUV422Row_Any_SSE2, YUY2ToUV422Row_Unaligned_SSE2,
|
||||
YUY2ToUV422Row_C, 2)
|
||||
YUY2ToUV422Row_C, 2, 15, 1)
|
||||
UV422ANY(UYVYToUV422Row_Any_SSE2, UYVYToUV422Row_Unaligned_SSE2,
|
||||
UYVYToUV422Row_C, 2)
|
||||
UYVYToUV422Row_C, 2, 15, 1)
|
||||
#endif
|
||||
#ifdef HAS_YUY2TOUV422ROW_NEON
|
||||
UV422ANY(ARGBToUV444Row_Any_NEON, ARGBToUV444Row_NEON,
|
||||
ARGBToUV444Row_C, 4, 8, 0)
|
||||
UV422ANY(ARGBToUV422Row_Any_NEON, ARGBToUV422Row_NEON,
|
||||
ARGBToUV422Row_C, 4, 15, 1)
|
||||
UV422ANY(ARGBToUV411Row_Any_NEON, ARGBToUV411Row_NEON,
|
||||
ARGBToUV411Row_C, 4, 31, 2)
|
||||
UV422ANY(YUY2ToUV422Row_Any_NEON, YUY2ToUV422Row_NEON,
|
||||
YUY2ToUV422Row_C, 2)
|
||||
YUY2ToUV422Row_C, 2, 15, 1)
|
||||
UV422ANY(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON,
|
||||
UYVYToUV422Row_C, 2)
|
||||
UYVYToUV422Row_C, 2, 15, 1)
|
||||
#endif
|
||||
#undef UV422ANY
|
||||
|
||||
|
||||
@ -350,7 +350,7 @@ void ARGB4444ToYRow_C(const uint8* src_argb4444, uint8* dst_y, int width) {
|
||||
}
|
||||
|
||||
void RGB565ToUVRow_C(const uint8* src_rgb565, int src_stride_rgb565,
|
||||
uint8* dst_u, uint8* dst_v, int width) {
|
||||
uint8* dst_u, uint8* dst_v, int width) {
|
||||
const uint8* next_rgb565 = src_rgb565 + src_stride_rgb565;
|
||||
for (int x = 0; x < width - 1; x += 2) {
|
||||
uint8 b0 = src_rgb565[0] & 0x1f;
|
||||
@ -365,13 +365,13 @@ void RGB565ToUVRow_C(const uint8* src_rgb565, int src_stride_rgb565,
|
||||
uint8 b3 = next_rgb565[2] & 0x1f;
|
||||
uint8 g3 = (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3);
|
||||
uint8 r3 = next_rgb565[3] >> 3;
|
||||
uint8 ab = (b0 + b1 + b2 + b3);
|
||||
uint8 ag = (g0 + g1 + g2 + g3);
|
||||
uint8 ar = (r0 + r1 + r2 + r3);
|
||||
ab = (ab << 1) | (ab >> 6);
|
||||
ar = (ar << 1) | (ar >> 6);
|
||||
dst_u[0] = RGBToU(ar, ag, ab);
|
||||
dst_v[0] = RGBToV(ar, ag, ab);
|
||||
uint8 b = (b0 + b1 + b2 + b3); // 565 * 4 = 787.
|
||||
uint8 g = (g0 + g1 + g2 + g3);
|
||||
uint8 r = (r0 + r1 + r2 + r3);
|
||||
b = (b << 1) | (b >> 6); // 787 -> 888.
|
||||
r = (r << 1) | (r >> 6);
|
||||
dst_u[0] = RGBToU(r, g, b);
|
||||
dst_v[0] = RGBToV(r, g, b);
|
||||
src_rgb565 += 4;
|
||||
next_rgb565 += 4;
|
||||
dst_u += 1;
|
||||
@ -384,14 +384,108 @@ void RGB565ToUVRow_C(const uint8* src_rgb565, int src_stride_rgb565,
|
||||
uint8 b2 = next_rgb565[0] & 0x1f;
|
||||
uint8 g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
|
||||
uint8 r2 = next_rgb565[1] >> 3;
|
||||
uint8 ab = (b0 + b2);
|
||||
uint8 ag = (g0 + g2);
|
||||
uint8 ar = (r0 + r2);
|
||||
ab = (ab << 2) | (ab >> 4);
|
||||
ag = (ag << 1) | (ag >> 6);
|
||||
ar = (ar << 2) | (ar >> 4);
|
||||
dst_u[0] = RGBToU(ar, ag, ab);
|
||||
dst_v[0] = RGBToV(ar, ag, ab);
|
||||
uint8 b = (b0 + b2); // 565 * 2 = 676.
|
||||
uint8 g = (g0 + g2);
|
||||
uint8 r = (r0 + r2);
|
||||
b = (b << 2) | (b >> 4); // 676 -> 888
|
||||
g = (g << 1) | (g >> 6);
|
||||
r = (r << 2) | (r >> 4);
|
||||
dst_u[0] = RGBToU(r, g, b);
|
||||
dst_v[0] = RGBToV(r, g, b);
|
||||
}
|
||||
}
|
||||
|
||||
void ARGB1555ToUVRow_C(const uint8* src_argb1555, int src_stride_argb1555,
|
||||
uint8* dst_u, uint8* dst_v, int width) {
|
||||
const uint8* next_argb1555 = src_argb1555 + src_stride_argb1555;
|
||||
for (int x = 0; x < width - 1; x += 2) {
|
||||
uint8 b0 = src_argb1555[0] & 0x1f;
|
||||
uint8 g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
|
||||
uint8 r0 = (src_argb1555[1] & 0x7c) >> 2;
|
||||
uint8 b1 = src_argb1555[2] & 0x1f;
|
||||
uint8 g1 = (src_argb1555[2] >> 5) | ((src_argb1555[3] & 0x03) << 3);
|
||||
uint8 r1 = (src_argb1555[3] & 0x7c) >> 2;
|
||||
uint8 b2 = next_argb1555[0] & 0x1f;
|
||||
uint8 g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
|
||||
uint8 r2 = (next_argb1555[1] & 0x7c) >> 2;
|
||||
uint8 b3 = next_argb1555[2] & 0x1f;
|
||||
uint8 g3 = (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3);
|
||||
uint8 r3 = (next_argb1555[3] & 0x7c) >> 2;
|
||||
uint8 b = (b0 + b1 + b2 + b3); // 555 * 4 = 777.
|
||||
uint8 g = (g0 + g1 + g2 + g3);
|
||||
uint8 r = (r0 + r1 + r2 + r3);
|
||||
b = (b << 1) | (b >> 6); // 777 -> 888.
|
||||
g = (g << 1) | (g >> 6);
|
||||
r = (r << 1) | (r >> 6);
|
||||
dst_u[0] = RGBToU(r, g, b);
|
||||
dst_v[0] = RGBToV(r, g, b);
|
||||
src_argb1555 += 4;
|
||||
next_argb1555 += 4;
|
||||
dst_u += 1;
|
||||
dst_v += 1;
|
||||
}
|
||||
if (width & 1) {
|
||||
uint8 b0 = src_argb1555[0] & 0x1f;
|
||||
uint8 g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
|
||||
uint8 r0 = (src_argb1555[1] & 0x7c) >> 2;
|
||||
uint8 b2 = next_argb1555[0] & 0x1f;
|
||||
uint8 g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
|
||||
uint8 r2 = next_argb1555[1] >> 3;
|
||||
uint8 b = (b0 + b2); // 555 * 2 = 666.
|
||||
uint8 g = (g0 + g2);
|
||||
uint8 r = (r0 + r2);
|
||||
b = (b << 2) | (b >> 4); // 666 -> 888.
|
||||
g = (g << 2) | (g >> 4);
|
||||
r = (r << 2) | (r >> 4);
|
||||
dst_u[0] = RGBToU(r, g, b);
|
||||
dst_v[0] = RGBToV(r, g, b);
|
||||
}
|
||||
}
|
||||
|
||||
void ARGB4444ToUVRow_C(const uint8* src_argb4444, int src_stride_argb4444,
|
||||
uint8* dst_u, uint8* dst_v, int width) {
|
||||
const uint8* next_argb4444 = src_argb4444 + src_stride_argb4444;
|
||||
for (int x = 0; x < width - 1; x += 2) {
|
||||
uint8 b0 = src_argb4444[0] & 0x0f;
|
||||
uint8 g0 = src_argb4444[0] >> 4;
|
||||
uint8 r0 = src_argb4444[1] & 0x0f;
|
||||
uint8 b1 = src_argb4444[2] & 0x0f;
|
||||
uint8 g1 = src_argb4444[2] >> 4;
|
||||
uint8 r1 = src_argb4444[3] & 0x0f;
|
||||
uint8 b2 = next_argb4444[0] & 0x0f;
|
||||
uint8 g2 = next_argb4444[0] >> 4;
|
||||
uint8 r2 = next_argb4444[1] & 0x0f;
|
||||
uint8 b3 = next_argb4444[2] & 0x0f;
|
||||
uint8 g3 = next_argb4444[2] >> 4;
|
||||
uint8 r3 = next_argb4444[3] & 0x0f;
|
||||
uint8 b = (b0 + b1 + b2 + b3); // 444 * 4 = 666.
|
||||
uint8 g = (g0 + g1 + g2 + g3);
|
||||
uint8 r = (r0 + r1 + r2 + r3);
|
||||
b = (b << 2) | (b >> 4); // 666 -> 888.
|
||||
g = (g << 2) | (g >> 4);
|
||||
r = (r << 2) | (r >> 4);
|
||||
dst_u[0] = RGBToU(r, g, b);
|
||||
dst_v[0] = RGBToV(r, g, b);
|
||||
src_argb4444 += 4;
|
||||
next_argb4444 += 4;
|
||||
dst_u += 1;
|
||||
dst_v += 1;
|
||||
}
|
||||
if (width & 1) {
|
||||
uint8 b0 = src_argb4444[0] & 0x0f;
|
||||
uint8 g0 = src_argb4444[0] >> 4;
|
||||
uint8 r0 = src_argb4444[1] & 0x0f;
|
||||
uint8 b2 = next_argb4444[0] & 0x0f;
|
||||
uint8 g2 = next_argb4444[0] >> 4;
|
||||
uint8 r2 = next_argb4444[1] & 0x0f;
|
||||
uint8 b = (b0 + b2); // 444 * 2 = 555.
|
||||
uint8 g = (g0 + g2);
|
||||
uint8 r = (r0 + r2);
|
||||
b = (b << 3) | (b >> 2); // 555 -> 888.
|
||||
g = (g << 3) | (g >> 2);
|
||||
r = (r << 3) | (r >> 2);
|
||||
dst_u[0] = RGBToU(r, g, b);
|
||||
dst_v[0] = RGBToV(r, g, b);
|
||||
}
|
||||
}
|
||||
|
||||
@ -799,10 +893,10 @@ void I422ToARGB1555Row_C(const uint8* src_y,
|
||||
}
|
||||
|
||||
void I422ToRGB565Row_C(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_rgb565,
|
||||
int width) {
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_rgb565,
|
||||
int width) {
|
||||
uint8 b0;
|
||||
uint8 g0;
|
||||
uint8 r0;
|
||||
|
||||
@ -1159,6 +1159,19 @@ void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int pix) {
|
||||
"vorr.u8 q1, q1, q3 \n" /* R,A */ \
|
||||
"vorr.u8 q0, q0, q2 \n" /* B,G */ \
|
||||
|
||||
// RGB555TOARGB is same as ARGB1555TOARGB but ignores alpha.
|
||||
#define RGB555TOARGB \
|
||||
"vshrn.u16 d6, q0, #5 \n" /* G xxxGGGGG */ \
|
||||
"vuzp.u8 d0, d1 \n" /* d0 xxxBBBBB xRRRRRxx */ \
|
||||
"vshl.u8 d6, d6, #3 \n" /* G GGGGG000 upper 5 */ \
|
||||
"vshr.u8 d1, d1, #2 \n" /* R 00xRRRRR lower 5 */ \
|
||||
"vshl.u8 q0, q0, #3 \n" /* B,R BBBBB000 upper 5 */ \
|
||||
"vshr.u8 q2, q0, #5 \n" /* B,R 00000BBB lower 3 */ \
|
||||
"vorr.u8 d0, d0, d4 \n" /* B */ \
|
||||
"vshr.u8 d4, d6, #5 \n" /* G 00000GGG lower 3 */ \
|
||||
"vorr.u8 d2, d1, d5 \n" /* R */ \
|
||||
"vorr.u8 d1, d4, d6 \n" /* G */
|
||||
|
||||
void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555, uint8* dst_argb,
|
||||
int pix) {
|
||||
asm volatile (
|
||||
@ -1715,6 +1728,19 @@ void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
|
||||
|
||||
// 16x2 pixels -> 8x1. pix is number of argb pixels. e.g. 16.
|
||||
#ifdef HAS_ARGBTOUVROW_NEON
|
||||
|
||||
#define RGBTOUV \
|
||||
"vmul.s16 q8, q0, q10 \n" /* B */ \
|
||||
"vmls.s16 q8, q1, q11 \n" /* G */ \
|
||||
"vmls.s16 q8, q2, q12 \n" /* R */ \
|
||||
"vadd.u16 q8, q8, q15 \n" /* +128 -> unsigned */ \
|
||||
"vmul.s16 q9, q2, q10 \n" /* R */ \
|
||||
"vmls.s16 q9, q1, q14 \n" /* G */ \
|
||||
"vmls.s16 q9, q0, q13 \n" /* B */ \
|
||||
"vadd.u16 q9, q9, q15 \n" /* +128 -> unsigned */ \
|
||||
"vqshrn.u16 d0, q8, #8 \n" /* 16 bit to 8 bit U */ \
|
||||
"vqshrn.u16 d1, q9, #8 \n" /* 16 bit to 8 bit V */
|
||||
|
||||
void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int pix) {
|
||||
asm volatile (
|
||||
@ -1738,16 +1764,7 @@ void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb,
|
||||
"vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts.
|
||||
"vpadal.u8 q2, q6 \n" // R 16 bytes -> 8 shorts.
|
||||
"subs %4, %4, #16 \n" // 32 processed per loop.
|
||||
"vmul.s16 q8, q0, q10 \n" // B
|
||||
"vmls.s16 q8, q1, q11 \n" // G
|
||||
"vmls.s16 q8, q2, q12 \n" // R
|
||||
"vadd.u16 q8, q8, q15 \n" // +128 -> unsigned
|
||||
"vmul.s16 q9, q2, q10 \n" // R
|
||||
"vmls.s16 q9, q1, q14 \n" // G
|
||||
"vmls.s16 q9, q0, q13 \n" // B
|
||||
"vadd.u16 q9, q9, q15 \n" // +128 -> unsigned
|
||||
"vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U
|
||||
"vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V
|
||||
RGBTOUV
|
||||
"vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
|
||||
"vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
|
||||
"bgt 1b \n"
|
||||
@ -1763,7 +1780,6 @@ void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb,
|
||||
}
|
||||
#endif // HAS_ARGBTOUVROW_NEON
|
||||
|
||||
|
||||
// 16x2 pixels -> 8x1. pix is number of argb pixels. e.g. 16.
|
||||
#ifdef HAS_RGB565TOUVROW_NEON
|
||||
void RGB565ToUVRow_NEON(const uint8* src_rgb565, int src_stride_rgb565,
|
||||
@ -1824,7 +1840,131 @@ void RGB565ToUVRow_NEON(const uint8* src_rgb565, int src_stride_rgb565,
|
||||
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
|
||||
);
|
||||
}
|
||||
#endif // HAS_ARGBTOUVROW_NEON
|
||||
#endif // HAS_RGB565TOUVROW_NEON
|
||||
|
||||
// 16x2 pixels -> 8x1. pix is number of argb pixels. e.g. 16.
|
||||
#ifdef HAS_ARGB1555TOUVROW_NEON
|
||||
void ARGB1555ToUVRow_NEON(const uint8* src_argb1555, int src_stride_argb1555,
|
||||
uint8* dst_u, uint8* dst_v, int pix) {
|
||||
asm volatile (
|
||||
"add %1, %0, %1 \n" // src_stride + src_argb
|
||||
"vmov.s16 q10, #112 / 4 \n" // UB / VR 0.875 coefficient
|
||||
"vmov.s16 q11, #74 / 4 \n" // UG -0.5781 coefficient
|
||||
"vmov.s16 q12, #38 / 4 \n" // UR -0.2969 coefficient
|
||||
"vmov.s16 q13, #18 / 4 \n" // VB -0.1406 coefficient
|
||||
"vmov.s16 q14, #94 / 4 \n" // VG -0.7344 coefficient
|
||||
"vmov.u16 q15, #0x8080 \n" // 128.5
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
"vld1.8 {q0}, [%0]! \n" // load 8 ARGB1555 pixels.
|
||||
RGB555TOARGB
|
||||
"vpaddl.u8 d8, d0 \n" // B 8 bytes -> 4 shorts.
|
||||
"vpaddl.u8 d10, d1 \n" // G 8 bytes -> 4 shorts.
|
||||
"vpaddl.u8 d12, d2 \n" // R 8 bytes -> 4 shorts.
|
||||
"vld1.8 {q0}, [%0]! \n" // next 8 ARGB1555 pixels.
|
||||
RGB555TOARGB
|
||||
"vpaddl.u8 d9, d0 \n" // B 8 bytes -> 4 shorts.
|
||||
"vpaddl.u8 d11, d1 \n" // G 8 bytes -> 4 shorts.
|
||||
"vpaddl.u8 d13, d2 \n" // R 8 bytes -> 4 shorts.
|
||||
|
||||
"vld1.8 {q0}, [%1]! \n" // load 8 ARGB1555 pixels.
|
||||
RGB555TOARGB
|
||||
"vpadal.u8 d8, d0 \n" // B 8 bytes -> 4 shorts.
|
||||
"vpadal.u8 d10, d1 \n" // G 8 bytes -> 4 shorts.
|
||||
"vpadal.u8 d12, d2 \n" // R 8 bytes -> 4 shorts.
|
||||
"vld1.8 {q0}, [%1]! \n" // next 8 ARGB1555 pixels.
|
||||
RGB555TOARGB
|
||||
"vpadal.u8 d9, d0 \n" // B 8 bytes -> 4 shorts.
|
||||
"vpadal.u8 d11, d1 \n" // G 8 bytes -> 4 shorts.
|
||||
"vpadal.u8 d13, d2 \n" // R 8 bytes -> 4 shorts.
|
||||
|
||||
"subs %4, %4, #16 \n" // 16 processed per loop.
|
||||
"vmul.s16 q8, q4, q10 \n" // B
|
||||
"vmls.s16 q8, q5, q11 \n" // G
|
||||
"vmls.s16 q8, q6, q12 \n" // R
|
||||
"vadd.u16 q8, q8, q15 \n" // +128 -> unsigned
|
||||
"vmul.s16 q9, q6, q10 \n" // R
|
||||
"vmls.s16 q9, q5, q14 \n" // G
|
||||
"vmls.s16 q9, q4, q13 \n" // B
|
||||
"vadd.u16 q9, q9, q15 \n" // +128 -> unsigned
|
||||
"vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U
|
||||
"vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V
|
||||
"vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
|
||||
"vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_argb1555), // %0
|
||||
"+r"(src_stride_argb1555), // %1
|
||||
"+r"(dst_u), // %2
|
||||
"+r"(dst_v), // %3
|
||||
"+r"(pix) // %4
|
||||
:
|
||||
: "memory", "cc", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
|
||||
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
|
||||
);
|
||||
}
|
||||
#endif // HAS_ARGB1555TOUVROW_NEON
|
||||
|
||||
// 16x2 pixels -> 8x1. pix is number of argb pixels. e.g. 16.
|
||||
#ifdef HAS_ARGB4444TOUVROW_NEON
|
||||
void ARGB4444ToUVRow_NEON(const uint8* src_argb4444, int src_stride_argb4444,
|
||||
uint8* dst_u, uint8* dst_v, int pix) {
|
||||
asm volatile (
|
||||
"add %1, %0, %1 \n" // src_stride + src_argb
|
||||
"vmov.s16 q10, #112 / 4 \n" // UB / VR 0.875 coefficient
|
||||
"vmov.s16 q11, #74 / 4 \n" // UG -0.5781 coefficient
|
||||
"vmov.s16 q12, #38 / 4 \n" // UR -0.2969 coefficient
|
||||
"vmov.s16 q13, #18 / 4 \n" // VB -0.1406 coefficient
|
||||
"vmov.s16 q14, #94 / 4 \n" // VG -0.7344 coefficient
|
||||
"vmov.u16 q15, #0x8080 \n" // 128.5
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
"vld1.8 {q0}, [%0]! \n" // load 8 ARGB4444 pixels.
|
||||
ARGB4444TOARGB
|
||||
"vpaddl.u8 d8, d0 \n" // B 8 bytes -> 4 shorts.
|
||||
"vpaddl.u8 d10, d1 \n" // G 8 bytes -> 4 shorts.
|
||||
"vpaddl.u8 d12, d2 \n" // R 8 bytes -> 4 shorts.
|
||||
"vld1.8 {q0}, [%0]! \n" // next 8 ARGB4444 pixels.
|
||||
ARGB4444TOARGB
|
||||
"vpaddl.u8 d9, d0 \n" // B 8 bytes -> 4 shorts.
|
||||
"vpaddl.u8 d11, d1 \n" // G 8 bytes -> 4 shorts.
|
||||
"vpaddl.u8 d13, d2 \n" // R 8 bytes -> 4 shorts.
|
||||
|
||||
"vld1.8 {q0}, [%1]! \n" // load 8 ARGB4444 pixels.
|
||||
ARGB4444TOARGB
|
||||
"vpadal.u8 d8, d0 \n" // B 8 bytes -> 4 shorts.
|
||||
"vpadal.u8 d10, d1 \n" // G 8 bytes -> 4 shorts.
|
||||
"vpadal.u8 d12, d2 \n" // R 8 bytes -> 4 shorts.
|
||||
"vld1.8 {q0}, [%1]! \n" // next 8 ARGB4444 pixels.
|
||||
ARGB4444TOARGB
|
||||
"vpadal.u8 d9, d0 \n" // B 8 bytes -> 4 shorts.
|
||||
"vpadal.u8 d11, d1 \n" // G 8 bytes -> 4 shorts.
|
||||
"vpadal.u8 d13, d2 \n" // R 8 bytes -> 4 shorts.
|
||||
|
||||
"subs %4, %4, #16 \n" // 16 processed per loop.
|
||||
"vmul.s16 q8, q4, q10 \n" // B
|
||||
"vmls.s16 q8, q5, q11 \n" // G
|
||||
"vmls.s16 q8, q6, q12 \n" // R
|
||||
"vadd.u16 q8, q8, q15 \n" // +128 -> unsigned
|
||||
"vmul.s16 q9, q6, q10 \n" // R
|
||||
"vmls.s16 q9, q5, q14 \n" // G
|
||||
"vmls.s16 q9, q4, q13 \n" // B
|
||||
"vadd.u16 q9, q9, q15 \n" // +128 -> unsigned
|
||||
"vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U
|
||||
"vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V
|
||||
"vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
|
||||
"vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_argb4444), // %0
|
||||
"+r"(src_stride_argb4444), // %1
|
||||
"+r"(dst_u), // %2
|
||||
"+r"(dst_v), // %3
|
||||
"+r"(pix) // %4
|
||||
:
|
||||
: "memory", "cc", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
|
||||
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
|
||||
);
|
||||
}
|
||||
#endif // HAS_ARGB4444TOUVROW_NEON
|
||||
|
||||
#ifdef HAS_RGB565TOYROW_NEON
|
||||
void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int pix) {
|
||||
|
||||
@ -612,8 +612,9 @@ TESTATOPLANAR(RGBA, 4, I420, 2, 2, 4)
|
||||
TESTATOPLANAR(RAW, 3, I420, 2, 2, 4)
|
||||
TESTATOPLANAR(RGB24, 3, I420, 2, 2, 4)
|
||||
TESTATOPLANAR(RGB565, 2, I420, 2, 2, 5)
|
||||
TESTATOPLANAR(ARGB1555, 2, I420, 2, 2, 4)
|
||||
TESTATOPLANAR(ARGB4444, 2, I420, 2, 2, 4)
|
||||
// TODO(fbarchard): Make 1555 neon work same as C code, reduce to diff 9.
|
||||
TESTATOPLANAR(ARGB1555, 2, I420, 2, 2, 15)
|
||||
TESTATOPLANAR(ARGB4444, 2, I420, 2, 2, 17)
|
||||
TESTATOPLANAR(ARGB, 4, I411, 4, 1, 4)
|
||||
TESTATOPLANAR(ARGB, 4, I422, 2, 1, 2)
|
||||
TESTATOPLANAR(ARGB, 4, I444, 1, 1, 2)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user