RGB formats converted to YUV with Neon

BUG=none
TEST=convert_test
Review URL: https://webrtc-codereview.appspot.com/936013

git-svn-id: http://libyuv.googlecode.com/svn/trunk@471 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
fbarchard@google.com 2012-11-05 23:40:11 +00:00
parent d8427fd50a
commit bdf7cb5914
20 changed files with 2528 additions and 1279 deletions

View File

@ -1,6 +1,6 @@
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 470
Version: 471
License: BSD
License File: LICENSE

View File

@ -22,22 +22,9 @@ namespace libyuv {
extern "C" {
#endif
// Alias.
#define I420ToI420 I420Copy
// Copy I420 to I420.
// Convert I444 to I420.
LIBYUV_API
int I420Copy(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert I422 to I420.
LIBYUV_API
int I422ToI420(const uint8* src_y, int src_stride_y,
int I444ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
@ -45,9 +32,9 @@ int I422ToI420(const uint8* src_y, int src_stride_y,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert I444 to I420.
// Convert I422 to I420.
LIBYUV_API
int I444ToI420(const uint8* src_y, int src_stride_y,
int I422ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
@ -65,6 +52,17 @@ int I411ToI420(const uint8* src_y, int src_stride_y,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Copy I420 to I420.
#define I420ToI420 I420Copy
LIBYUV_API
int I420Copy(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert I400 (grey) to I420.
LIBYUV_API
int I400ToI420(const uint8* src_y, int src_stride_y,
@ -91,6 +89,22 @@ int NV21ToI420(const uint8* src_y, int src_stride_y,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert YUY2 to I420.
LIBYUV_API
int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert UYVY to I420.
LIBYUV_API
int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert M420 to I420.
LIBYUV_API
int M420ToI420(const uint8* src_m420, int src_stride_m420,
@ -108,22 +122,6 @@ int Q420ToI420(const uint8* src_y, int src_stride_y,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert YUY2 to I420.
LIBYUV_API
int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert UYVY to I420.
LIBYUV_API
int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert V210 to I420.
LIBYUV_API
int V210ToI420(const uint8* src_uyvy, int src_stride_uyvy,

View File

@ -18,15 +18,27 @@ namespace libyuv {
extern "C" {
#endif
// Alias.
#define ARGBToARGB ARGBCopy
// Copy ARGB to ARGB.
#define ARGBToARGB ARGBCopy
LIBYUV_API
int ARGBCopy(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert ARGB To BGRA. (alias)
#define ARGBToBGRA BGRAToARGB
LIBYUV_API
int BGRAToARGB(const uint8* src_frame, int src_stride_frame,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert ARGB To ABGR. (alias)
#define ARGBToABGR ABGRToARGB
LIBYUV_API
int ABGRToARGB(const uint8* src_frame, int src_stride_frame,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert ARGB To RGBA.
LIBYUV_API
int ARGBToRGBA(const uint8* src_frame, int src_stride_frame,
@ -63,34 +75,75 @@ int ARGBToARGB4444(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb4444, int dst_stride_argb4444,
int width, int height);
// Convert ARGB To I444.
LIBYUV_API
int ARGBToI444(const uint8* src_argb, int src_stride_argb,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert ARGB To I422.
LIBYUV_API
int ARGBToI422(const uint8* src_argb, int src_stride_argb,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert ARGB To I420. (also in convert.h)
LIBYUV_API
int ARGBToI420(const uint8* src_argb, int src_stride_argb,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert ARGB To I411.
LIBYUV_API
int ARGBToI411(const uint8* src_argb, int src_stride_argb,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert ARGB to I400.
LIBYUV_API
int ARGBToI400(const uint8* src_argb, int src_stride_argb,
uint8* dst_y, int dst_stride_y,
int width, int height);
// ARGB little endian (bgra in memory) to I422.
// Convert ARGB To NV12.
LIBYUV_API
int ARGBToI422(const uint8* src_frame, int src_stride_frame,
int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
uint8* dst_uv, int dst_stride_uv,
int width, int height);
// Aliases.
#define ARGBToBGRA BGRAToARGB
#define ARGBToABGR ABGRToARGB
// BGRA little endian (argb in memory) to ARGB.
// Convert ARGB To NV21.
LIBYUV_API
int BGRAToARGB(const uint8* src_frame, int src_stride_frame,
uint8* dst_argb, int dst_stride_argb,
int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
uint8* dst_y, int dst_stride_y,
uint8* dst_vu, int dst_stride_vu,
int width, int height);
// ABGR little endian (rgba in memory) to ARGB.
// Convert ARGB To NV21.
LIBYUV_API
int ABGRToARGB(const uint8* src_frame, int src_stride_frame,
uint8* dst_argb, int dst_stride_argb,
int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
uint8* dst_y, int dst_stride_y,
uint8* dst_vu, int dst_stride_vu,
int width, int height);
// Convert ARGB To YUY2.
LIBYUV_API
int ARGBToYUY2(const uint8* src_argb, int src_stride_argb,
uint8* dst_yuy2, int dst_stride_yuy2,
int width, int height);
// Convert ARGB To UYVY.
LIBYUV_API
int ARGBToUYVY(const uint8* src_argb, int src_stride_argb,
uint8* dst_uyvy, int dst_stride_uyvy,
int width, int height);
#ifdef __cplusplus

File diff suppressed because it is too large Load Diff

View File

@ -66,8 +66,8 @@ int Scale(const uint8* src_y, const uint8* src_u, const uint8* src_v,
// Legacy API. Deprecated.
LIBYUV_API
int ScaleOffset(const uint8* src, int src_width, int src_height,
uint8* dst, int dst_width, int dst_height, int dst_yoffset,
int ScaleOffset(const uint8* src_i420, int src_width, int src_height,
uint8* dst_i420, int dst_width, int dst_height, int dst_yoffset,
bool interpolate);
// For testing, allow disabling of specialized scalers.

View File

@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 470
#define LIBYUV_VERSION 471
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT

View File

@ -533,11 +533,9 @@ int Q420ToI420(const uint8* src_y, int src_stride_y,
void (*YUY2ToYRow)(const uint8* src_yuy2, uint8* dst_y, int pix) =
YUY2ToYRow_C;
#if defined(HAS_YUY2TOYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
if (width > 16) {
YUY2ToUV422Row = YUY2ToUV422Row_Any_SSE2;
YUY2ToYRow = YUY2ToYRow_Any_SSE2;
}
if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
YUY2ToUV422Row = YUY2ToUV422Row_Any_SSE2;
YUY2ToYRow = YUY2ToYRow_Any_SSE2;
if (IS_ALIGNED(width, 16)) {
YUY2ToUV422Row = YUY2ToUV422Row_Unaligned_SSE2;
YUY2ToYRow = YUY2ToYRow_Unaligned_SSE2;
@ -550,12 +548,10 @@ int Q420ToI420(const uint8* src_y, int src_stride_y,
}
}
#elif defined(HAS_YUY2TOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
if (width > 8) {
YUY2ToYRow = YUY2ToYRow_Any_NEON;
if (width > 16) {
YUY2ToUV422Row = YUY2ToUV422Row_Any_NEON;
}
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
YUY2ToYRow = YUY2ToYRow_Any_NEON;
if (width >= 16) {
YUY2ToUV422Row = YUY2ToUV422Row_Any_NEON;
}
if (IS_ALIGNED(width, 16)) {
YUY2ToYRow = YUY2ToYRow_NEON;
@ -656,11 +652,9 @@ int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2,
YUY2ToYRow = YUY2ToYRow_C;
YUY2ToUVRow = YUY2ToUVRow_C;
#if defined(HAS_YUY2TOYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
if (width > 16) {
YUY2ToUVRow = YUY2ToUVRow_Any_SSE2;
YUY2ToYRow = YUY2ToYRow_Any_SSE2;
}
if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
YUY2ToUVRow = YUY2ToUVRow_Any_SSE2;
YUY2ToYRow = YUY2ToYRow_Any_SSE2;
if (IS_ALIGNED(width, 16)) {
YUY2ToUVRow = YUY2ToUVRow_Unaligned_SSE2;
YUY2ToYRow = YUY2ToYRow_Unaligned_SSE2;
@ -673,12 +667,10 @@ int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2,
}
}
#elif defined(HAS_YUY2TOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
if (width > 8) {
YUY2ToYRow = YUY2ToYRow_Any_NEON;
if (width > 16) {
YUY2ToUVRow = YUY2ToUVRow_Any_NEON;
}
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
YUY2ToYRow = YUY2ToYRow_Any_NEON;
if (width >= 16) {
YUY2ToUVRow = YUY2ToUVRow_Any_NEON;
}
if (IS_ALIGNED(width, 16)) {
YUY2ToYRow = YUY2ToYRow_NEON;
@ -723,11 +715,9 @@ int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy,
UYVYToYRow = UYVYToYRow_C;
UYVYToUVRow = UYVYToUVRow_C;
#if defined(HAS_UYVYTOYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
if (width > 16) {
UYVYToUVRow = UYVYToUVRow_Any_SSE2;
UYVYToYRow = UYVYToYRow_Any_SSE2;
}
if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
UYVYToUVRow = UYVYToUVRow_Any_SSE2;
UYVYToYRow = UYVYToYRow_Any_SSE2;
if (IS_ALIGNED(width, 16)) {
UYVYToUVRow = UYVYToUVRow_Unaligned_SSE2;
UYVYToYRow = UYVYToYRow_Unaligned_SSE2;
@ -740,12 +730,10 @@ int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy,
}
}
#elif defined(HAS_UYVYTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
if (width > 8) {
UYVYToYRow = UYVYToYRow_Any_NEON;
if (width > 16) {
UYVYToUVRow = UYVYToUVRow_Any_NEON;
}
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
UYVYToYRow = UYVYToYRow_Any_NEON;
if (width >= 16) {
UYVYToUVRow = UYVYToUVRow_Any_NEON;
}
if (IS_ALIGNED(width, 16)) {
UYVYToYRow = UYVYToYRow_NEON;
@ -827,10 +815,9 @@ int V210ToI420(const uint8* src_v210, int src_stride_v210,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
if (width * 2 * 2 > kMaxStride) { // 2 rows of UYVY are required.
return -1;
} else if (!src_v210 || !dst_y || !dst_u || !dst_v ||
width <= 0 || height == 0) {
if (!src_v210 || !dst_y || !dst_u || !dst_v ||
width <= 0 || height == 0 ||
width * 2 * 2 > kMaxStride) {
return -1;
}
// Negative height means invert the image.
@ -858,12 +845,10 @@ int V210ToI420(const uint8* src_v210, int src_stride_v210,
}
}
#elif defined(HAS_UYVYTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
if (width > 8) {
UYVYToYRow = UYVYToYRow_Any_NEON;
if (width > 16) {
UYVYToUVRow = UYVYToUVRow_Any_NEON;
}
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
UYVYToYRow = UYVYToYRow_Any_NEON;
if (width >= 16) {
UYVYToUVRow = UYVYToUVRow_Any_NEON;
}
if (IS_ALIGNED(width, 16)) {
UYVYToYRow = UYVYToYRow_NEON;
@ -873,11 +858,9 @@ int V210ToI420(const uint8* src_v210, int src_stride_v210,
#endif
#if defined(HAS_UYVYTOYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
if (width > 16) {
UYVYToUVRow = UYVYToUVRow_Any_SSE2;
UYVYToYRow = UYVYToYRow_Any_SSE2;
}
if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
UYVYToUVRow = UYVYToUVRow_Any_SSE2;
UYVYToYRow = UYVYToYRow_Any_SSE2;
if (IS_ALIGNED(width, 16)) {
UYVYToYRow = UYVYToYRow_Unaligned_SSE2;
UYVYToUVRow = UYVYToUVRow_SSE2;
@ -887,12 +870,10 @@ int V210ToI420(const uint8* src_v210, int src_stride_v210,
}
}
#elif defined(HAS_UYVYTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
if (width > 8) {
UYVYToYRow = UYVYToYRow_Any_NEON;
if (width > 16) {
UYVYToUVRow = UYVYToUVRow_Any_NEON;
}
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
UYVYToYRow = UYVYToYRow_Any_NEON;
if (width >= 16) {
UYVYToUVRow = UYVYToUVRow_Any_NEON;
}
if (IS_ALIGNED(width, 16)) {
UYVYToYRow = UYVYToYRow_NEON;
@ -920,6 +901,7 @@ int V210ToI420(const uint8* src_v210, int src_stride_v210,
return 0;
}
// Convert ARGB to I420.
LIBYUV_API
int ARGBToI420(const uint8* src_argb, int src_stride_argb,
uint8* dst_y, int dst_stride_y,
@ -942,11 +924,9 @@ int ARGBToI420(const uint8* src_argb, int src_stride_argb,
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
ARGBToYRow_C;
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
if (width > 16) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
ARGBToYRow = ARGBToYRow_Any_SSSE3;
}
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_Unaligned_SSSE3;
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
@ -959,10 +939,8 @@ int ARGBToI420(const uint8* src_argb, int src_stride_argb,
}
}
#elif defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
if (width > 8) {
ARGBToYRow = ARGBToYRow_Any_NEON;
}
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
}
@ -985,6 +963,7 @@ int ARGBToI420(const uint8* src_argb, int src_stride_argb,
return 0;
}
// Convert BGRA to I420.
LIBYUV_API
int BGRAToI420(const uint8* src_bgra, int src_stride_bgra,
uint8* dst_y, int dst_stride_y,
@ -1002,18 +981,14 @@ int BGRAToI420(const uint8* src_bgra, int src_stride_bgra,
src_bgra = src_bgra + (height - 1) * src_stride_bgra;
src_stride_bgra = -src_stride_bgra;
}
void (*BGRAToYRow)(const uint8* src_bgra, uint8* dst_y, int pix);
void (*BGRAToUVRow)(const uint8* src_bgra0, int src_stride_bgra,
uint8* dst_u, uint8* dst_v, int width);
BGRAToYRow = BGRAToYRow_C;
BGRAToUVRow = BGRAToUVRow_C;
uint8* dst_u, uint8* dst_v, int width) = BGRAToUVRow_C;
void (*BGRAToYRow)(const uint8* src_bgra, uint8* dst_y, int pix) =
BGRAToYRow_C;
#if defined(HAS_BGRATOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
if (width > 16) {
BGRAToUVRow = BGRAToUVRow_Any_SSSE3;
BGRAToYRow = BGRAToYRow_Any_SSSE3;
}
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
BGRAToUVRow = BGRAToUVRow_Any_SSSE3;
BGRAToYRow = BGRAToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
BGRAToUVRow = BGRAToUVRow_Unaligned_SSSE3;
BGRAToYRow = BGRAToYRow_Unaligned_SSSE3;
@ -1025,6 +1000,13 @@ int BGRAToI420(const uint8* src_bgra, int src_stride_bgra,
}
}
}
#elif defined(HAS_BGRATOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
BGRAToYRow = BGRAToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
BGRAToYRow = BGRAToYRow_NEON;
}
}
#endif
for (int y = 0; y < height - 1; y += 2) {
@ -1043,6 +1025,7 @@ int BGRAToI420(const uint8* src_bgra, int src_stride_bgra,
return 0;
}
// Convert ABGR to I420.
LIBYUV_API
int ABGRToI420(const uint8* src_abgr, int src_stride_abgr,
uint8* dst_y, int dst_stride_y,
@ -1060,18 +1043,14 @@ int ABGRToI420(const uint8* src_abgr, int src_stride_abgr,
src_abgr = src_abgr + (height - 1) * src_stride_abgr;
src_stride_abgr = -src_stride_abgr;
}
void (*ABGRToYRow)(const uint8* src_abgr, uint8* dst_y, int pix);
void (*ABGRToUVRow)(const uint8* src_abgr0, int src_stride_abgr,
uint8* dst_u, uint8* dst_v, int width);
ABGRToYRow = ABGRToYRow_C;
ABGRToUVRow = ABGRToUVRow_C;
uint8* dst_u, uint8* dst_v, int width) = ABGRToUVRow_C;
void (*ABGRToYRow)(const uint8* src_abgr, uint8* dst_y, int pix) =
ABGRToYRow_C;
#if defined(HAS_ABGRTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
if (width > 16) {
ABGRToUVRow = ABGRToUVRow_Any_SSSE3;
ABGRToYRow = ABGRToYRow_Any_SSSE3;
}
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
ABGRToUVRow = ABGRToUVRow_Any_SSSE3;
ABGRToYRow = ABGRToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ABGRToUVRow = ABGRToUVRow_Unaligned_SSSE3;
ABGRToYRow = ABGRToYRow_Unaligned_SSSE3;
@ -1083,6 +1062,13 @@ int ABGRToI420(const uint8* src_abgr, int src_stride_abgr,
}
}
}
#elif defined(HAS_ABGRTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
ABGRToYRow = ABGRToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ABGRToYRow = ABGRToYRow_NEON;
}
}
#endif
for (int y = 0; y < height - 1; y += 2) {
@ -1101,6 +1087,7 @@ int ABGRToI420(const uint8* src_abgr, int src_stride_abgr,
return 0;
}
// Convert RGBA to I420.
LIBYUV_API
int RGBAToI420(const uint8* src_rgba, int src_stride_rgba,
uint8* dst_y, int dst_stride_y,
@ -1118,18 +1105,14 @@ int RGBAToI420(const uint8* src_rgba, int src_stride_rgba,
src_rgba = src_rgba + (height - 1) * src_stride_rgba;
src_stride_rgba = -src_stride_rgba;
}
void (*RGBAToYRow)(const uint8* src_rgba, uint8* dst_y, int pix);
void (*RGBAToUVRow)(const uint8* src_rgba0, int src_stride_rgba,
uint8* dst_u, uint8* dst_v, int width);
RGBAToYRow = RGBAToYRow_C;
RGBAToUVRow = RGBAToUVRow_C;
uint8* dst_u, uint8* dst_v, int width) = RGBAToUVRow_C;
void (*RGBAToYRow)(const uint8* src_rgba, uint8* dst_y, int pix) =
RGBAToYRow_C;
#if defined(HAS_RGBATOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
if (width > 16) {
RGBAToUVRow = RGBAToUVRow_Any_SSSE3;
RGBAToYRow = RGBAToYRow_Any_SSSE3;
}
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
RGBAToUVRow = RGBAToUVRow_Any_SSSE3;
RGBAToYRow = RGBAToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
RGBAToUVRow = RGBAToUVRow_Unaligned_SSSE3;
RGBAToYRow = RGBAToYRow_Unaligned_SSSE3;
@ -1141,6 +1124,13 @@ int RGBAToI420(const uint8* src_rgba, int src_stride_rgba,
}
}
}
#elif defined(HAS_RGBATOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
RGBAToYRow = RGBAToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
RGBAToYRow = RGBAToYRow_NEON;
}
}
#endif
for (int y = 0; y < height - 1; y += 2) {
@ -1159,18 +1149,17 @@ int RGBAToI420(const uint8* src_rgba, int src_stride_rgba,
return 0;
}
// Convert RGB24 to I420.
LIBYUV_API
int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
if (width * 4 > kMaxStride) { // Row buffer is required.
if (!src_rgb24 || !dst_y || !dst_u || !dst_v ||
width <= 0 || height == 0 ||
width * 4 > kMaxStride) {
return -1;
} else if (!src_rgb24 ||
!dst_y || !dst_u || !dst_v ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
@ -1179,44 +1168,71 @@ int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24,
src_stride_rgb24 = -src_stride_rgb24;
}
SIMD_ALIGNED(uint8 row[kMaxStride * 2]);
void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix);
RGB24ToARGBRow = RGB24ToARGBRow_C;
void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
RGB24ToARGBRow_C;
#if defined(HAS_RGB24TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) &&
TestReadSafe(src_rgb24, src_stride_rgb24, width, height, 3, 48)) {
RGB24ToARGBRow = RGB24ToARGBRow_SSSE3;
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
RGB24ToARGBRow = RGB24ToARGBRow_SSSE3;
}
}
#elif defined(HAS_RGB24TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
RGB24ToARGBRow = RGB24ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
RGB24ToARGBRow = RGB24ToARGBRow_NEON;
}
}
#endif
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix);
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width);
ARGBToYRow = ARGBToYRow_C;
ARGBToUVRow = ARGBToUVRow_C;
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
if (width > 16) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
}
ARGBToYRow = ARGBToYRow_Any_SSSE3;
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
#if defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_SSSE3;
}
}
#endif
#if defined(HAS_RGB24TOYROW_NEON)
void (*RGB24ToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
RGB24ToYRow_C;
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
RGB24ToYRow = RGB24ToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
RGB24ToYRow = RGB24ToYRow_NEON;
}
}
#else
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
ARGBToYRow_C;
#if defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3;
}
}
}
#endif
#endif // HAS_ARGBTOUVROW_SSSE3
#endif // HAS_RGB24TOYROW_NEON
for (int y = 0; y < height - 1; y += 2) {
RGB24ToARGBRow(src_rgb24, row, width);
RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + kMaxStride, width);
ARGBToUVRow(row, kMaxStride, dst_u, dst_v, width);
#if defined(HAS_RGB24TOYROW_NEON)
RGB24ToYRow(src_rgb24, dst_y, width);
RGB24ToYRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width);
#else
ARGBToYRow(row, dst_y, width);
ARGBToYRow(row + kMaxStride, dst_y + dst_stride_y, width);
#endif
src_rgb24 += src_stride_rgb24 * 2;
dst_y += dst_stride_y * 2;
dst_u += dst_stride_u;
@ -1225,23 +1241,27 @@ int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24,
if (height & 1) {
RGB24ToARGBRow_C(src_rgb24, row, width);
ARGBToUVRow(row, 0, dst_u, dst_v, width);
#if defined(HAS_RGB24TOYROW_NEON)
RGB24ToYRow(src_rgb24, dst_y, width);
#else
ARGBToYRow(row, dst_y, width);
#endif
}
return 0;
}
// Convert RAW to I420.
// Same as RGB24 but RGB vs BGR
LIBYUV_API
int RAWToI420(const uint8* src_raw, int src_stride_raw,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
if (width * 4 > kMaxStride) { // Row buffer is required.
if (!src_raw || !dst_y || !dst_u || !dst_v ||
width <= 0 || height == 0 ||
width * 4 > kMaxStride) {
return -1;
} else if (!src_raw ||
!dst_y || !dst_u || !dst_v ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
@ -1250,44 +1270,71 @@ int RAWToI420(const uint8* src_raw, int src_stride_raw,
src_stride_raw = -src_stride_raw;
}
SIMD_ALIGNED(uint8 row[kMaxStride * 2]);
void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix);
RAWToARGBRow = RAWToARGBRow_C;
void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
RAWToARGBRow_C;
#if defined(HAS_RAWTOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) &&
TestReadSafe(src_raw, src_stride_raw, width, height, 3, 48)) {
RAWToARGBRow = RAWToARGBRow_SSSE3;
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
RAWToARGBRow = RAWToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
RAWToARGBRow = RAWToARGBRow_SSSE3;
}
}
#elif defined(HAS_RAWTOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
RAWToARGBRow = RAWToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
RAWToARGBRow = RAWToARGBRow_NEON;
}
}
#endif
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix);
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width);
ARGBToYRow = ARGBToYRow_C;
ARGBToUVRow = ARGBToUVRow_C;
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
if (width > 16) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
}
ARGBToYRow = ARGBToYRow_Any_SSSE3;
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
#if defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_SSSE3;
}
}
#endif
#if defined(HAS_RAWTOYROW_NEON)
void (*RAWToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
RAWToYRow_C;
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
RAWToYRow = RAWToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
RAWToYRow = RAWToYRow_NEON;
}
}
#else
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
ARGBToYRow_C;
#if defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3;
}
}
}
#endif
#endif // HAS_ARGBTOUVROW_SSSE3
#endif // HAS_RAWTOYROW_NEON
for (int y = 0; y < height - 1; y += 2) {
RAWToARGBRow(src_raw, row, width);
RAWToARGBRow(src_raw + src_stride_raw, row + kMaxStride, width);
ARGBToUVRow(row, kMaxStride, dst_u, dst_v, width);
#if defined(HAS_RAWTOYROW_NEON)
RAWToYRow(src_raw, dst_y, width);
RAWToYRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width);
#else
ARGBToYRow(row, dst_y, width);
ARGBToYRow(row + kMaxStride, dst_y + dst_stride_y, width);
#endif
src_raw += src_stride_raw * 2;
dst_y += dst_stride_y * 2;
dst_u += dst_stride_u;
@ -1296,22 +1343,25 @@ int RAWToI420(const uint8* src_raw, int src_stride_raw,
if (height & 1) {
RAWToARGBRow_C(src_raw, row, width);
ARGBToUVRow(row, 0, dst_u, dst_v, width);
#if defined(HAS_RAWTOYROW_NEON)
RAWToYRow(src_raw, dst_y, width);
#else
ARGBToYRow(row, dst_y, width);
#endif
}
return 0;
}
// Convert RGB565 to I420.
LIBYUV_API
int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
if (width * 4 > kMaxStride) { // Row buffer is required.
return -1;
} else if (!src_rgb565 ||
!dst_y || !dst_u || !dst_v ||
width <= 0 || height == 0) {
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
if (!src_rgb565 || !dst_y || !dst_u || !dst_v ||
width <= 0 || height == 0 ||
width * 4 > kMaxStride) {
return -1;
}
// Negative height means invert the image.
@ -1321,44 +1371,71 @@ int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565,
src_stride_rgb565 = -src_stride_rgb565;
}
SIMD_ALIGNED(uint8 row[kMaxStride * 2]);
void (*RGB565ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix);
RGB565ToARGBRow = RGB565ToARGBRow_C;
void (*RGB565ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
RGB565ToARGBRow_C;
#if defined(HAS_RGB565TOARGBROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
TestReadSafe(src_rgb565, src_stride_rgb565, width, height, 2, 16)) {
RGB565ToARGBRow = RGB565ToARGBRow_SSE2;
if (TestCpuFlag(kCpuHasSSE2) && width >= 8) {
RGB565ToARGBRow = RGB565ToARGBRow_Any_SSE2;
if (IS_ALIGNED(width, 8)) {
RGB565ToARGBRow = RGB565ToARGBRow_SSE2;
}
}
#elif defined(HAS_RGB565TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
RGB565ToARGBRow = RGB565ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
RGB565ToARGBRow = RGB565ToARGBRow_NEON;
}
}
#endif
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix);
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width);
ARGBToYRow = ARGBToYRow_C;
ARGBToUVRow = ARGBToUVRow_C;
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
if (width > 16) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
}
ARGBToYRow = ARGBToYRow_Any_SSSE3;
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
#if defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_SSSE3;
}
}
#endif
#if defined(HAS_RGB565TOYROW_NEON)
void (*RGB565ToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
RGB565ToYRow_C;
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
RGB565ToYRow = RGB565ToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
RGB565ToYRow = RGB565ToYRow_NEON;
}
}
#else
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
ARGBToYRow_C;
#if defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3;
}
}
}
#endif
#endif // HAS_ARGBTOUVROW_SSSE3
#endif // HAS_RGB565TOYROW_NEON
for (int y = 0; y < height - 1; y += 2) {
RGB565ToARGBRow(src_rgb565, row, width);
RGB565ToARGBRow(src_rgb565 + src_stride_rgb565, row + kMaxStride, width);
ARGBToUVRow(row, kMaxStride, dst_u, dst_v, width);
#if defined(HAS_RGB565TOYROW_NEON)
RGB565ToYRow(src_rgb565, dst_y, width);
RGB565ToYRow(src_rgb565 + src_stride_rgb565, dst_y + dst_stride_y, width);
#else
ARGBToYRow(row, dst_y, width);
ARGBToYRow(row + kMaxStride, dst_y + dst_stride_y, width);
#endif
src_rgb565 += src_stride_rgb565 * 2;
dst_y += dst_stride_y * 2;
dst_u += dst_stride_u;
@ -1367,7 +1444,11 @@ int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565,
if (height & 1) {
RGB565ToARGBRow_C(src_rgb565, row, width);
ARGBToUVRow(row, 0, dst_u, dst_v, width);
#if defined(HAS_RGB565TOYROW_NEON)
RGB565ToYRow(src_rgb565, dst_y, width);
#else
ARGBToYRow(row, dst_y, width);
#endif
}
return 0;
}
@ -1378,12 +1459,10 @@ int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
if (width * 4 > kMaxStride) { // Row buffer is required.
if (!src_argb1555 || !dst_y || !dst_u || !dst_v ||
width <= 0 || height == 0 ||
width * 4 > kMaxStride) {
return -1;
} else if (!src_argb1555 ||
!dst_y || !dst_u || !dst_v ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
@ -1409,10 +1488,8 @@ int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555,
ARGBToYRow = ARGBToYRow_C;
ARGBToUVRow = ARGBToUVRow_C;
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
if (width > 16) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
}
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_SSSE3;
@ -1450,12 +1527,10 @@ int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
if (width * 4 > kMaxStride) { // Row buffer is required.
if (!src_argb4444 || !dst_y || !dst_u || !dst_v ||
width <= 0 || height == 0 ||
width * 4 > kMaxStride) {
return -1;
} else if (!src_argb4444 ||
!dst_y || !dst_u || !dst_v ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
@ -1481,10 +1556,8 @@ int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444,
ARGBToYRow = ARGBToYRow_C;
ARGBToUVRow = ARGBToUVRow_C;
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
if (width > 16) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
}
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_SSSE3;

View File

@ -405,8 +405,8 @@ int RGBAToARGB(const uint8* src_rgba, int src_stride_rgba,
// Convert RGB24 to ARGB.
LIBYUV_API
int RGB24ToARGB(const uint8* src_rgb24, int src_stride_rgb24,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
if (!src_rgb24 || !dst_argb ||
width <= 0 || height == 0) {
return -1;
@ -417,16 +417,22 @@ int RGB24ToARGB(const uint8* src_rgb24, int src_stride_rgb24,
src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24;
src_stride_rgb24 = -src_stride_rgb24;
}
void (*RGB24ToARGBRow)(const uint8* src_rgb24, uint8* dst_argb, int pix) =
void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
RGB24ToARGBRow_C;
#if defined(HAS_RGB24TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) &&
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16 &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
RGB24ToARGBRow = RGB24ToARGBRow_SSSE3;
RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
RGB24ToARGBRow = RGB24ToARGBRow_SSSE3;
}
}
#elif defined(HAS_RGB24TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
RGB24ToARGBRow = RGB24ToARGBRow_NEON;
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
RGB24ToARGBRow = RGB24ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
RGB24ToARGBRow = RGB24ToARGBRow_NEON;
}
}
#endif
@ -441,8 +447,8 @@ int RGB24ToARGB(const uint8* src_rgb24, int src_stride_rgb24,
// Convert RAW to ARGB.
LIBYUV_API
int RAWToARGB(const uint8* src_raw, int src_stride_raw,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
if (!src_raw || !dst_argb ||
width <= 0 || height == 0) {
return -1;
@ -453,16 +459,22 @@ int RAWToARGB(const uint8* src_raw, int src_stride_raw,
src_raw = src_raw + (height - 1) * src_stride_raw;
src_stride_raw = -src_stride_raw;
}
void (*RAWToARGBRow)(const uint8* src_raw, uint8* dst_argb, int pix) =
void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
RAWToARGBRow_C;
#if defined(HAS_RAWTOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) &&
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16 &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
RAWToARGBRow = RAWToARGBRow_SSSE3;
RAWToARGBRow = RAWToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
RAWToARGBRow = RAWToARGBRow_SSSE3;
}
}
#elif defined(HAS_RAWTOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
RAWToARGBRow = RAWToARGBRow_NEON;
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
RAWToARGBRow = RAWToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
RAWToARGBRow = RAWToARGBRow_NEON;
}
}
#endif
@ -492,10 +504,19 @@ int RGB565ToARGB(const uint8* src_rgb565, int src_stride_rgb565,
void (*RGB565ToARGBRow)(const uint8* src_rgb565, uint8* dst_argb, int pix) =
RGB565ToARGBRow_C;
#if defined(HAS_RGB565TOARGBROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 8) &&
if (TestCpuFlag(kCpuHasSSE2) && width >= 8 &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
RGB565ToARGBRow = RGB565ToARGBRow_SSE2;
RGB565ToARGBRow = RGB565ToARGBRow_Any_SSE2;
if (IS_ALIGNED(width, 8)) {
RGB565ToARGBRow = RGB565ToARGBRow_SSE2;
}
}
#elif defined(HAS_RGB565TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
RGB565ToARGBRow = RGB565ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
RGB565ToARGBRow = RGB565ToARGBRow_NEON;
}
}
#endif

View File

@ -265,25 +265,25 @@ LIBYUV_API
int I422ToYUY2(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
uint8* dst_yuy2, int dst_stride_yuy2,
int width, int height) {
if (!src_y || !src_u || !src_v || !dst_frame ||
if (!src_y || !src_u || !src_v || !dst_yuy2 ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_frame = dst_frame + (height - 1) * dst_stride_frame;
dst_stride_frame = -dst_stride_frame;
dst_yuy2 = dst_yuy2 + (height - 1) * dst_stride_yuy2;
dst_stride_yuy2 = -dst_stride_yuy2;
}
void (*I422ToYUY2Row)(const uint8* src_y, const uint8* src_u,
const uint8* src_v, uint8* dst_frame, int width) =
const uint8* src_v, uint8* dst_yuy2, int width) =
I422ToYUY2Row_C;
#if defined(HAS_I422TOYUY2ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && width >= 16 &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) {
IS_ALIGNED(dst_yuy2, 16) && IS_ALIGNED(dst_stride_yuy2, 16)) {
I422ToYUY2Row = I422ToYUY2Row_Any_SSE2;
if (IS_ALIGNED(width, 16)) {
I422ToYUY2Row = I422ToYUY2Row_SSE2;
@ -299,11 +299,11 @@ int I422ToYUY2(const uint8* src_y, int src_stride_y,
#endif
for (int y = 0; y < height; ++y) {
I422ToYUY2Row(src_y, src_u, src_y, dst_frame, width);
I422ToYUY2Row(src_y, src_u, src_v, dst_yuy2, width);
src_y += src_stride_y;
src_u += src_stride_u;
src_v += src_stride_v;
dst_frame += dst_stride_frame;
dst_yuy2 += dst_stride_yuy2;
}
return 0;
}
@ -312,25 +312,25 @@ LIBYUV_API
int I420ToYUY2(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
uint8* dst_yuy2, int dst_stride_yuy2,
int width, int height) {
if (!src_y || !src_u || !src_v || !dst_frame ||
if (!src_y || !src_u || !src_v || !dst_yuy2 ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_frame = dst_frame + (height - 1) * dst_stride_frame;
dst_stride_frame = -dst_stride_frame;
dst_yuy2 = dst_yuy2 + (height - 1) * dst_stride_yuy2;
dst_stride_yuy2 = -dst_stride_yuy2;
}
void (*I422ToYUY2Row)(const uint8* src_y, const uint8* src_u,
const uint8* src_v, uint8* dst_frame, int width) =
const uint8* src_v, uint8* dst_yuy2, int width) =
I422ToYUY2Row_C;
#if defined(HAS_I422TOYUY2ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && width >= 16 &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) {
IS_ALIGNED(dst_yuy2, 16) && IS_ALIGNED(dst_stride_yuy2, 16)) {
I422ToYUY2Row = I422ToYUY2Row_Any_SSE2;
if (IS_ALIGNED(width, 16)) {
I422ToYUY2Row = I422ToYUY2Row_SSE2;
@ -346,16 +346,16 @@ int I420ToYUY2(const uint8* src_y, int src_stride_y,
#endif
for (int y = 0; y < height - 1; y += 2) {
I422ToYUY2Row(src_y, src_u, src_v, dst_frame, width);
I422ToYUY2Row(src_y, src_u, src_v, dst_yuy2, width);
I422ToYUY2Row(src_y + src_stride_y, src_u, src_v,
dst_frame + dst_stride_frame, width);
dst_yuy2 + dst_stride_yuy2, width);
src_y += src_stride_y * 2;
src_u += src_stride_u;
src_v += src_stride_v;
dst_frame += dst_stride_frame * 2;
dst_yuy2 += dst_stride_yuy2 * 2;
}
if (height & 1) {
I422ToYUY2Row(src_y, src_u, src_v, dst_frame, width);
I422ToYUY2Row(src_y, src_u, src_v, dst_yuy2, width);
}
return 0;
}
@ -365,25 +365,25 @@ LIBYUV_API
int I422ToUYVY(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
uint8* dst_uyvy, int dst_stride_uyvy,
int width, int height) {
if (!src_y || !src_u || !src_v || !dst_frame ||
if (!src_y || !src_u || !src_v || !dst_uyvy ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_frame = dst_frame + (height - 1) * dst_stride_frame;
dst_stride_frame = -dst_stride_frame;
dst_uyvy = dst_uyvy + (height - 1) * dst_stride_uyvy;
dst_stride_uyvy = -dst_stride_uyvy;
}
void (*I422ToUYVYRow)(const uint8* src_y, const uint8* src_u,
const uint8* src_v, uint8* dst_frame, int width) =
const uint8* src_v, uint8* dst_uyvy, int width) =
I422ToUYVYRow_C;
#if defined(HAS_I422TOUYVYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && width >= 16 &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) {
IS_ALIGNED(dst_uyvy, 16) && IS_ALIGNED(dst_stride_uyvy, 16)) {
I422ToUYVYRow = I422ToUYVYRow_Any_SSE2;
if (IS_ALIGNED(width, 16)) {
I422ToUYVYRow = I422ToUYVYRow_SSE2;
@ -399,11 +399,11 @@ int I422ToUYVY(const uint8* src_y, int src_stride_y,
#endif
for (int y = 0; y < height; ++y) {
I422ToUYVYRow(src_y, src_u, src_y, dst_frame, width);
I422ToUYVYRow(src_y, src_u, src_v, dst_uyvy, width);
src_y += src_stride_y;
src_u += src_stride_u;
src_v += src_stride_v;
dst_frame += dst_stride_frame;
dst_uyvy += dst_stride_uyvy;
}
return 0;
}
@ -412,25 +412,25 @@ LIBYUV_API
int I420ToUYVY(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
uint8* dst_uyvy, int dst_stride_uyvy,
int width, int height) {
if (!src_y || !src_u || !src_v || !dst_frame ||
if (!src_y || !src_u || !src_v || !dst_uyvy ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_frame = dst_frame + (height - 1) * dst_stride_frame;
dst_stride_frame = -dst_stride_frame;
dst_uyvy = dst_uyvy + (height - 1) * dst_stride_uyvy;
dst_stride_uyvy = -dst_stride_uyvy;
}
void (*I422ToUYVYRow)(const uint8* src_y, const uint8* src_u,
const uint8* src_v, uint8* dst_frame, int width) =
const uint8* src_v, uint8* dst_uyvy, int width) =
I422ToUYVYRow_C;
#if defined(HAS_I422TOUYVYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && width >= 16 &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) {
IS_ALIGNED(dst_uyvy, 16) && IS_ALIGNED(dst_stride_uyvy, 16)) {
I422ToUYVYRow = I422ToUYVYRow_Any_SSE2;
if (IS_ALIGNED(width, 16)) {
I422ToUYVYRow = I422ToUYVYRow_SSE2;
@ -446,16 +446,16 @@ int I420ToUYVY(const uint8* src_y, int src_stride_y,
#endif
for (int y = 0; y < height - 1; y += 2) {
I422ToUYVYRow(src_y, src_u, src_v, dst_frame, width);
I422ToUYVYRow(src_y, src_u, src_v, dst_uyvy, width);
I422ToUYVYRow(src_y + src_stride_y, src_u, src_v,
dst_frame + dst_stride_frame, width);
dst_uyvy + dst_stride_uyvy, width);
src_y += src_stride_y * 2;
src_u += src_stride_u;
src_v += src_stride_v;
dst_frame += dst_stride_frame * 2;
dst_uyvy += dst_stride_uyvy * 2;
}
if (height & 1) {
I422ToUYVYRow(src_y, src_u, src_v, dst_frame, width);
I422ToUYVYRow(src_y, src_u, src_v, dst_uyvy, width);
}
return 0;
}
@ -464,35 +464,35 @@ LIBYUV_API
int I420ToV210(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
uint8* dst_v210, int dst_stride_v210,
int width, int height) {
if (width * 16 / 6 > kMaxStride ||
!src_y || !src_u || !src_v || !dst_frame ||
!src_y || !src_u || !src_v || !dst_v210 ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_frame = dst_frame + (height - 1) * dst_stride_frame;
dst_stride_frame = -dst_stride_frame;
dst_v210 = dst_v210 + (height - 1) * dst_stride_v210;
dst_stride_v210 = -dst_stride_v210;
}
SIMD_ALIGNED(uint8 row[kMaxStride]);
for (int y = 0; y < height - 1; y += 2) {
I422ToUYVYRow_C(src_y, src_u, src_v, row, width);
UYVYToV210Row_C(row, dst_frame, width);
UYVYToV210Row_C(row, dst_v210, width);
I422ToUYVYRow_C(src_y + src_stride_y, src_u, src_v, row, width);
UYVYToV210Row_C(row, dst_frame + dst_stride_frame, width);
UYVYToV210Row_C(row, dst_v210 + dst_stride_v210, width);
src_y += src_stride_y * 2;
src_u += src_stride_u;
src_v += src_stride_v;
dst_frame += dst_stride_frame * 2;
dst_v210 += dst_stride_v210 * 2;
}
if (height & 1) {
I422ToUYVYRow_C(src_y, src_u, src_v, row, width);
UYVYToV210Row_C(row, dst_frame, width);
UYVYToV210Row_C(row, dst_v210, width);
}
return 0;
}
@ -521,7 +521,7 @@ int I420ToNV12(const uint8* src_y, int src_stride_y,
int halfwidth = (width + 1) >> 1;
void (*MergeUV)(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
int width) = MergeUV_C;
#if defined(HAS_SPLITUV_SSE2)
#if defined(HAS_MERGEUV_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) {
MergeUV = MergeUV_Any_SSE2;
if (IS_ALIGNED(halfwidth, 16)) {
@ -534,7 +534,7 @@ int I420ToNV12(const uint8* src_y, int src_stride_y,
}
}
#endif
#if defined(HAS_SPLITUV_AVX2)
#if defined(HAS_MERGEUV_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) {
MergeUV = MergeUV_Any_AVX2;
if (IS_ALIGNED(halfwidth, 32)) {
@ -547,7 +547,7 @@ int I420ToNV12(const uint8* src_y, int src_stride_y,
}
}
#endif
#if defined(HAS_SPLITUV_NEON)
#if defined(HAS_MERGEUV_NEON)
if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) {
MergeUV = MergeUV_Any_NEON;
if (IS_ALIGNED(halfwidth, 16)) {

View File

@ -21,6 +21,522 @@ namespace libyuv {
extern "C" {
#endif
// ARGB little endian (bgra in memory) to I444
LIBYUV_API
int ARGBToI444(const uint8* src_argb, int src_stride_argb,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
return -1;
}
if (height < 0) {
height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
ARGBToYRow_C;
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3;
}
}
}
#elif defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
}
}
#endif
for (int y = 0; y < height; ++y) {
ARGBToUV444Row_C(src_argb, dst_u, dst_v, width);
ARGBToYRow(src_argb, dst_y, width);
src_argb += src_stride_argb;
dst_y += dst_stride_y;
dst_u += dst_stride_u;
dst_v += dst_stride_v;
}
return 0;
}
// ARGB little endian (bgra in memory) to I422
LIBYUV_API
int ARGBToI422(const uint8* src_argb, int src_stride_argb,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
return -1;
}
if (height < 0) {
height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
void (*ARGBToUV422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
int pix) = ARGBToUV422Row_C;
#if defined(HAS_ARGBTOUV422ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
ARGBToUV422Row = ARGBToUV422Row_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUV422Row = ARGBToUV422Row_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
ARGBToUV422Row = ARGBToUV422Row_SSSE3;
}
}
}
#endif
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
ARGBToYRow_C;
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3;
}
}
}
#elif defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
}
}
#endif
for (int y = 0; y < height; ++y) {
ARGBToUV422Row(src_argb, dst_u, dst_v, width);
ARGBToYRow(src_argb, dst_y, width);
src_argb += src_stride_argb;
dst_y += dst_stride_y;
dst_u += dst_stride_u;
dst_v += dst_stride_v;
}
return 0;
}
// ARGB little endian (bgra in memory) to I411
LIBYUV_API
int ARGBToI411(const uint8* src_argb, int src_stride_argb,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
return -1;
}
if (height < 0) {
height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
ARGBToYRow_C;
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3;
}
}
}
#elif defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
}
}
#endif
for (int y = 0; y < height; ++y) {
ARGBToUV411Row_C(src_argb, dst_u, dst_v, width);
ARGBToYRow(src_argb, dst_y, width);
src_argb += src_stride_argb;
dst_y += dst_stride_y;
dst_u += dst_stride_u;
dst_v += dst_stride_v;
}
return 0;
}
LIBYUV_API
int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
uint8* dst_y, int dst_stride_y,
uint8* dst_uv, int dst_stride_uv,
int width, int height) {
if (!src_argb ||
!dst_y || !dst_uv ||
width <= 0 || height == 0 ||
width > kMaxStride) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
ARGBToYRow_C;
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_Unaligned_SSSE3;
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
ARGBToUVRow = ARGBToUVRow_SSSE3;
if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3;
}
}
}
}
#elif defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
}
}
#endif
int halfwidth = (width + 1) >> 1;
void (*MergeUV)(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
int width) = MergeUV_C;
#if defined(HAS_MERGEUV_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) {
MergeUV = MergeUV_Any_SSE2;
if (IS_ALIGNED(halfwidth, 16)) {
MergeUV = MergeUV_Unaligned_SSE2;
if (IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) {
MergeUV = MergeUV_SSE2;
}
}
}
#endif
#if defined(HAS_MERGEUV_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) {
MergeUV = MergeUV_Any_AVX2;
if (IS_ALIGNED(halfwidth, 32)) {
MergeUV = MergeUV_Unaligned_AVX2;
if (IS_ALIGNED(dst_uv, 32) && IS_ALIGNED(dst_stride_uv, 32)) {
MergeUV = MergeUV_AVX2;
}
}
}
#endif
#if defined(HAS_MERGEUV_NEON)
if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) {
MergeUV = MergeUV_Any_NEON;
if (IS_ALIGNED(halfwidth, 16)) {
MergeUV = MergeUV_Unaligned_NEON;
if (IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) {
MergeUV = MergeUV_NEON;
}
}
}
#endif
SIMD_ALIGNED(uint8 row_u[kMaxStride / 2]);
SIMD_ALIGNED(uint8 row_v[kMaxStride / 2]);
for (int y = 0; y < height - 1; y += 2) {
ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width);
MergeUV(row_u, row_v, dst_uv, halfwidth);
ARGBToYRow(src_argb, dst_y, width);
ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width);
src_argb += src_stride_argb * 2;
dst_y += dst_stride_y * 2;
dst_uv += dst_stride_uv;
}
if (height & 1) {
ARGBToUVRow(src_argb, 0, row_u, row_v, width);
MergeUV(row_u, row_v, dst_uv, halfwidth);
ARGBToYRow(src_argb, dst_y, width);
ARGBToYRow(src_argb + 0, dst_y + dst_stride_y, width);
}
return 0;
}
// Same as NV12 but U and V swapped.
LIBYUV_API
int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
uint8* dst_y, int dst_stride_y,
uint8* dst_uv, int dst_stride_uv,
int width, int height) {
if (!src_argb ||
!dst_y || !dst_uv ||
width <= 0 || height == 0 ||
width > kMaxStride) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
ARGBToYRow_C;
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_Unaligned_SSSE3;
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
ARGBToUVRow = ARGBToUVRow_SSSE3;
if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3;
}
}
}
}
#elif defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
}
}
#endif
int halfwidth = (width + 1) >> 1;
void (*MergeUV)(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
int width) = MergeUV_C;
#if defined(HAS_MERGEUV_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) {
MergeUV = MergeUV_Any_SSE2;
if (IS_ALIGNED(halfwidth, 16)) {
MergeUV = MergeUV_Unaligned_SSE2;
if (IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) {
MergeUV = MergeUV_SSE2;
}
}
}
#endif
#if defined(HAS_MERGEUV_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) {
MergeUV = MergeUV_Any_AVX2;
if (IS_ALIGNED(halfwidth, 32)) {
MergeUV = MergeUV_Unaligned_AVX2;
if (IS_ALIGNED(dst_uv, 32) && IS_ALIGNED(dst_stride_uv, 32)) {
MergeUV = MergeUV_AVX2;
}
}
}
#endif
#if defined(HAS_MERGEUV_NEON)
if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) {
MergeUV = MergeUV_Any_NEON;
if (IS_ALIGNED(halfwidth, 16)) {
MergeUV = MergeUV_Unaligned_NEON;
if (IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) {
MergeUV = MergeUV_NEON;
}
}
}
#endif
SIMD_ALIGNED(uint8 row_u[kMaxStride / 2]);
SIMD_ALIGNED(uint8 row_v[kMaxStride / 2]);
for (int y = 0; y < height - 1; y += 2) {
ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width);
MergeUV(row_v, row_u, dst_uv, halfwidth);
ARGBToYRow(src_argb, dst_y, width);
ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width);
src_argb += src_stride_argb * 2;
dst_y += dst_stride_y * 2;
dst_uv += dst_stride_uv;
}
if (height & 1) {
ARGBToUVRow(src_argb, 0, row_u, row_v, width);
MergeUV(row_v, row_u, dst_uv, halfwidth);
ARGBToYRow(src_argb, dst_y, width);
ARGBToYRow(src_argb + 0, dst_y + dst_stride_y, width);
}
return 0;
}
// Convert ARGB to YUY2.
LIBYUV_API
int ARGBToYUY2(const uint8* src_argb, int src_stride_argb,
uint8* dst_yuy2, int dst_stride_yuy2,
int width, int height) {
if (!src_argb || !dst_yuy2 ||
width <= 0 || height == 0 ||
width > kMaxStride) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_yuy2 = dst_yuy2 + (height - 1) * dst_stride_yuy2;
dst_stride_yuy2 = -dst_stride_yuy2;
}
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
ARGBToYRow_C;
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_Unaligned_SSSE3;
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
ARGBToUVRow = ARGBToUVRow_SSSE3;
ARGBToYRow = ARGBToYRow_SSSE3;
}
}
}
#elif defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
}
}
#endif
void (*I422ToYUY2Row)(const uint8* src_y, const uint8* src_u,
const uint8* src_v, uint8* dst_yuy2, int width) =
I422ToYUY2Row_C;
#if defined(HAS_I422TOYUY2ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && width >= 16 &&
IS_ALIGNED(dst_yuy2, 16) && IS_ALIGNED(dst_stride_yuy2, 16)) {
I422ToYUY2Row = I422ToYUY2Row_Any_SSE2;
if (IS_ALIGNED(width, 16)) {
I422ToYUY2Row = I422ToYUY2Row_SSE2;
}
}
#elif defined(HAS_I422TOYUY2ROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
I422ToYUY2Row = I422ToYUY2Row_Any_NEON;
if (IS_ALIGNED(width, 16)) {
I422ToYUY2Row = I422ToYUY2Row_NEON;
}
}
#endif
SIMD_ALIGNED(uint8 row_y[kMaxStride]);
SIMD_ALIGNED(uint8 row_u[kMaxStride / 2]);
SIMD_ALIGNED(uint8 row_v[kMaxStride / 2]);
for (int y = 0; y < height; ++y) {
ARGBToUVRow(src_argb, 0, row_u, row_v, width);
ARGBToYRow(src_argb, row_y, width);
I422ToYUY2Row(row_y, row_u, row_v, dst_yuy2, width);
src_argb += src_stride_argb;
dst_yuy2 += dst_stride_yuy2;
}
return 0;
}
// Convert ARGB to UYVY.
LIBYUV_API
int ARGBToUYVY(const uint8* src_argb, int src_stride_argb,
uint8* dst_uyvy, int dst_stride_uyvy,
int width, int height) {
if (!src_argb || !dst_uyvy ||
width <= 0 || height == 0 ||
width > kMaxStride) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_uyvy = dst_uyvy + (height - 1) * dst_stride_uyvy;
dst_stride_uyvy = -dst_stride_uyvy;
}
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
ARGBToYRow_C;
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_Unaligned_SSSE3;
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
ARGBToUVRow = ARGBToUVRow_SSSE3;
ARGBToYRow = ARGBToYRow_SSSE3;
}
}
}
#elif defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
}
}
#endif
void (*I422ToUYVYRow)(const uint8* src_y, const uint8* src_u,
const uint8* src_v, uint8* dst_uyvy, int width) =
I422ToUYVYRow_C;
#if defined(HAS_I422TOUYVYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && width >= 16 &&
IS_ALIGNED(dst_uyvy, 16) && IS_ALIGNED(dst_stride_uyvy, 16)) {
I422ToUYVYRow = I422ToUYVYRow_Any_SSE2;
if (IS_ALIGNED(width, 16)) {
I422ToUYVYRow = I422ToUYVYRow_SSE2;
}
}
#elif defined(HAS_I422TOUYVYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
I422ToUYVYRow = I422ToUYVYRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
I422ToUYVYRow = I422ToUYVYRow_NEON;
}
}
#endif
SIMD_ALIGNED(uint8 row_y[kMaxStride]);
SIMD_ALIGNED(uint8 row_u[kMaxStride / 2]);
SIMD_ALIGNED(uint8 row_v[kMaxStride / 2]);
for (int y = 0; y < height; ++y) {
ARGBToUVRow(src_argb, 0, row_u, row_v, width);
ARGBToYRow(src_argb, row_y, width);
I422ToUYVYRow(row_y, row_u, row_v, dst_uyvy, width);
src_argb += src_stride_argb;
dst_uyvy += dst_stride_uyvy;
}
return 0;
}
// Convert ARGB to I400.
LIBYUV_API
int ARGBToI400(const uint8* src_argb, int src_stride_argb,
@ -37,10 +553,8 @@ int ARGBToI400(const uint8* src_argb, int src_stride_argb,
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
ARGBToYRow_C;
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
if (width > 16) {
ARGBToYRow = ARGBToYRow_Any_SSSE3;
}
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
@ -50,10 +564,8 @@ int ARGBToI400(const uint8* src_argb, int src_stride_argb,
}
}
#elif defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
if (width > 8) {
ARGBToYRow = ARGBToYRow_Any_NEON;
}
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
}
@ -68,64 +580,6 @@ int ARGBToI400(const uint8* src_argb, int src_stride_argb,
return 0;
}
// ARGB little endian (bgra in memory) to I422
// same as I420 except UV plane is full height
LIBYUV_API
int ARGBToI422(const uint8* src_argb, int src_stride_argb,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
return -1;
}
if (height < 0) {
height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
ARGBToYRow_C;
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
if (width > 16) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
ARGBToYRow = ARGBToYRow_Any_SSSE3;
}
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_Unaligned_SSSE3;
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
ARGBToUVRow = ARGBToUVRow_SSSE3;
if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3;
}
}
}
}
#elif defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
if (width > 8) {
ARGBToYRow = ARGBToYRow_Any_NEON;
}
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
}
}
#endif
for (int y = 0; y < height; ++y) {
ARGBToUVRow(src_argb, 0, dst_u, dst_v, width);
ARGBToYRow(src_argb, dst_y, width);
src_argb += src_stride_argb;
dst_y += dst_stride_y;
dst_u += dst_stride_u;
dst_v += dst_stride_v;
}
return 0;
}
// Convert ARGB to RGBA.
LIBYUV_API
int ARGBToRGBA(const uint8* src_argb, int src_stride_argb,

View File

@ -268,7 +268,7 @@ int BayerToARGB(const uint8* src_bayer, int src_stride_bayer,
dst_argb += dst_stride_argb * 2;
}
if (height & 1) {
BayerRow0(src_bayer, -src_stride_bayer, dst_argb, width);
BayerRow0(src_bayer, src_stride_bayer, dst_argb, width);
}
return 0;
}
@ -305,11 +305,9 @@ int BayerToI420(const uint8* src_bayer, int src_stride_bayer,
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
ARGBToYRow_C;
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
if (width > 16) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
ARGBToYRow = ARGBToYRow_Any_SSSE3;
}
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_SSSE3;
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
@ -319,10 +317,8 @@ int BayerToI420(const uint8* src_bayer, int src_stride_bayer,
}
}
#elif defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
if (width > 8) {
ARGBToYRow = ARGBToYRow_Any_NEON;
}
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
}

View File

@ -617,7 +617,7 @@ int NV21ToRGB565(const uint8* src_y, int src_stride_y,
dst_stride_rgb565 = -dst_stride_rgb565;
}
void (*NV21ToRGB565Row)(const uint8* y_buf,
const uint8* vu_buf,
const uint8* src_vu,
uint8* rgb_buf,
int width) = NV21ToRGB565Row_C;
#if defined(HAS_NV21TORGB565ROW_SSSE3)

View File

@ -45,7 +45,7 @@ extern "C" {
#define HAS_MIRRORROW_NEON
void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
#define HAS_MIRRORROW_UV_NEON
void MirrorRowUV_NEON(const uint8* src, uint8* dst_a, uint8* dst_b, int width);
void MirrorUVRow_NEON(const uint8* src, uint8* dst_a, uint8* dst_b, int width);
#define HAS_TRANSPOSE_WX8_NEON
void TransposeWx8_NEON(const uint8* src, int src_stride,
uint8* dst, int dst_stride, int width);
@ -1049,21 +1049,21 @@ void RotateUV180(const uint8* src, int src_stride,
uint8* dst_b, int dst_stride_b,
int width, int height) {
void (*MirrorRowUV)(const uint8* src, uint8* dst_u, uint8* dst_v, int width) =
MirrorRowUV_C;
MirrorUVRow_C;
#if defined(HAS_MIRRORROW_UV_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
MirrorRowUV = MirrorRowUV_NEON;
MirrorRowUV = MirrorUVRow_NEON;
}
#elif defined(HAS_MIRRORROW_UV_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) {
MirrorRowUV = MirrorRowUV_SSSE3;
MirrorRowUV = MirrorUVRow_SSSE3;
}
#elif defined(HAS_MIRRORROWUV_MIPS_DSPR2)
#elif defined(HAS_MirrorUVRow_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) {
MirrorRowUV = MirrorRowUV_MIPS_DSPR2;
MirrorRowUV = MirrorUVRow_MIPS_DSPR2;
}
#endif

View File

@ -121,12 +121,12 @@ NV2NY(NV21ToRGB565Row_Any_NEON, NV21ToRGB565Row_NEON, NV21ToRGB565Row_C, 0, 2)
// NEON RGB24 is multiple of 8 pixels, unaligned source and destination.
// I400 To ARGB does multiple of 8 pixels with SIMD and remainder with C.
#define RGBANY(NAMEANY, ARGBTORGB_SIMD, ARGBTORGB_C, MASK, SBPP, BPP) \
void NAMEANY(const uint8* argb_buf, \
uint8* rgb_buf, \
void NAMEANY(const uint8* src, \
uint8* dst, \
int width) { \
int n = width & ~MASK; \
ARGBTORGB_SIMD(argb_buf, rgb_buf, n); \
ARGBTORGB_C(argb_buf + n * SBPP, rgb_buf + n * BPP, width & MASK); \
ARGBTORGB_SIMD(src, dst, n); \
ARGBTORGB_C(src + n * SBPP, dst + n * BPP, width & MASK); \
}
#if defined(HAS_ARGBTORGB24ROW_SSSE3)
@ -167,30 +167,37 @@ RGBANY(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, UYVYToARGBRow_C,
// RGB/YUV to Y does multiple of 16 with SIMD and last 16 with SIMD.
// TODO(fbarchard): Use last 16 method for all unsubsampled conversions.
#define YANY(NAMEANY, ARGBTOY_SIMD, BPP, NUM) \
#define YANY(NAMEANY, ARGBTOY_SIMD, SBPP, BPP, NUM) \
void NAMEANY(const uint8* src_argb, uint8* dst_y, int width) { \
ARGBTOY_SIMD(src_argb, dst_y, width - NUM); \
ARGBTOY_SIMD(src_argb + (width - NUM) * BPP, dst_y + (width - NUM), NUM);\
ARGBTOY_SIMD(src_argb + (width - NUM) * SBPP, \
dst_y + (width - NUM) * BPP, NUM); \
}
#ifdef HAS_ARGBTOYROW_SSSE3
YANY(ARGBToYRow_Any_SSSE3, ARGBToYRow_Unaligned_SSSE3, 4, 16)
YANY(BGRAToYRow_Any_SSSE3, BGRAToYRow_Unaligned_SSSE3, 4, 16)
YANY(ABGRToYRow_Any_SSSE3, ABGRToYRow_Unaligned_SSSE3, 4, 16)
#endif
#ifdef HAS_RGBATOYROW_SSSE3
YANY(RGBAToYRow_Any_SSSE3, RGBAToYRow_Unaligned_SSSE3, 4, 16)
YANY(ARGBToYRow_Any_SSSE3, ARGBToYRow_Unaligned_SSSE3, 4, 1, 16)
YANY(BGRAToYRow_Any_SSSE3, BGRAToYRow_Unaligned_SSSE3, 4, 1, 16)
YANY(ABGRToYRow_Any_SSSE3, ABGRToYRow_Unaligned_SSSE3, 4, 1, 16)
YANY(RGBAToYRow_Any_SSSE3, RGBAToYRow_Unaligned_SSSE3, 4, 1, 16)
YANY(YUY2ToYRow_Any_SSE2, YUY2ToYRow_Unaligned_SSE2, 2, 1, 16)
YANY(UYVYToYRow_Any_SSE2, UYVYToYRow_Unaligned_SSE2, 2, 1, 16)
YANY(RGB24ToARGBRow_Any_SSSE3, RGB24ToARGBRow_SSSE3, 3, 4, 16)
YANY(RAWToARGBRow_Any_SSSE3, RAWToARGBRow_SSSE3, 3, 4, 16)
YANY(RGB565ToARGBRow_Any_SSE2, RGB565ToARGBRow_SSE2, 2, 4, 8)
#endif
#ifdef HAS_ARGBTOYROW_NEON
YANY(ARGBToYRow_Any_NEON, ARGBToYRow_NEON, 4, 8)
#endif
#ifdef HAS_YUY2TOYROW_SSE2
YANY(YUY2ToYRow_Any_SSE2, YUY2ToYRow_Unaligned_SSE2, 2, 16)
YANY(UYVYToYRow_Any_SSE2, UYVYToYRow_Unaligned_SSE2, 2, 16)
#endif
#ifdef HAS_YUY2TOYROW_NEON
YANY(YUY2ToYRow_Any_NEON, YUY2ToYRow_NEON, 2, 16)
YANY(UYVYToYRow_Any_NEON, UYVYToYRow_NEON, 2, 16)
YANY(ARGBToYRow_Any_NEON, ARGBToYRow_NEON, 4, 1, 8)
YANY(BGRAToYRow_Any_NEON, BGRAToYRow_NEON, 4, 1, 8)
YANY(ABGRToYRow_Any_NEON, ABGRToYRow_NEON, 4, 1, 8)
YANY(RGBAToYRow_Any_NEON, RGBAToYRow_NEON, 4, 1, 8)
YANY(RGB24ToYRow_Any_NEON, RGB24ToYRow_NEON, 3, 1, 8)
YANY(RAWToYRow_Any_NEON, RAWToYRow_NEON, 3, 1, 8)
YANY(RGB565ToYRow_Any_NEON, RGB565ToYRow_NEON, 2, 1, 8)
YANY(YUY2ToYRow_Any_NEON, YUY2ToYRow_NEON, 2, 1, 16)
YANY(UYVYToYRow_Any_NEON, UYVYToYRow_NEON, 2, 1, 16)
YANY(RGB24ToARGBRow_Any_NEON, RGB24ToARGBRow_NEON, 3, 4, 8)
YANY(RAWToARGBRow_Any_NEON, RAWToARGBRow_NEON, 3, 4, 8)
YANY(RGB565ToARGBRow_Any_NEON, RGB565ToARGBRow_NEON, 2, 4, 8)
#endif
#undef YANY
@ -201,17 +208,15 @@ YANY(UYVYToYRow_Any_NEON, UYVYToYRow_NEON, 2, 16)
int n = width & ~15; \
ANYTOUV_SIMD(src_argb, src_stride_argb, dst_u, dst_v, n); \
ANYTOUV_C(src_argb + n * BPP, src_stride_argb, \
dst_u + (n >> 1), \
dst_v + (n >> 1), \
width & 15); \
dst_u + (n >> 1), \
dst_v + (n >> 1), \
width & 15); \
}
#ifdef HAS_ARGBTOUVROW_SSSE3
UVANY(ARGBToUVRow_Any_SSSE3, ARGBToUVRow_Unaligned_SSSE3, ARGBToUVRow_C, 4)
UVANY(BGRAToUVRow_Any_SSSE3, BGRAToUVRow_Unaligned_SSSE3, BGRAToUVRow_C, 4)
UVANY(ABGRToUVRow_Any_SSSE3, ABGRToUVRow_Unaligned_SSSE3, ABGRToUVRow_C, 4)
#endif
#ifdef HAS_RGBATOYROW_SSSE3
UVANY(RGBAToUVRow_Any_SSSE3, RGBAToUVRow_Unaligned_SSSE3, RGBAToUVRow_C, 4)
#endif
#ifdef HAS_YUY2TOUVROW_SSE2
@ -230,11 +235,15 @@ UVANY(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, UYVYToUVRow_C, 2)
int n = width & ~15; \
ANYTOUV_SIMD(src_uv, dst_u, dst_v, n); \
ANYTOUV_C(src_uv + n * BPP, \
dst_u + (n >> 1), \
dst_v + (n >> 1), \
width & 15); \
dst_u + (n >> 1), \
dst_v + (n >> 1), \
width & 15); \
}
#ifdef HAS_ARGBTOUVROW_SSSE3
UV422ANY(ARGBToUV422Row_Any_SSSE3, ARGBToUV422Row_Unaligned_SSSE3,
ARGBToUV422Row_C, 4)
#endif
#ifdef HAS_YUY2TOUV422ROW_SSE2
UV422ANY(YUY2ToUV422Row_Any_SSE2, YUY2ToUV422Row_Unaligned_SSE2,
YUY2ToUV422Row_C, 2)

View File

@ -95,47 +95,47 @@ void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int width) {
}
}
void RGB565ToARGBRow_C(const uint8* src_rgb, uint8* dst_argb, int width) {
void RGB565ToARGBRow_C(const uint8* src_rgb565, uint8* dst_argb, int width) {
for (int x = 0; x < width; ++x) {
uint8 b = src_rgb[0] & 0x1f;
uint8 g = (src_rgb[0] >> 5) | ((src_rgb[1] & 0x07) << 3);
uint8 r = src_rgb[1] >> 3;
uint8 b = src_rgb565[0] & 0x1f;
uint8 g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
uint8 r = src_rgb565[1] >> 3;
dst_argb[0] = (b << 3) | (b >> 2);
dst_argb[1] = (g << 2) | (g >> 4);
dst_argb[2] = (r << 3) | (r >> 2);
dst_argb[3] = 255u;
dst_argb += 4;
src_rgb += 2;
src_rgb565 += 2;
}
}
void ARGB1555ToARGBRow_C(const uint8* src_rgb, uint8* dst_argb, int width) {
void ARGB1555ToARGBRow_C(const uint8* src_argb1555, uint8* dst_argb, int width) {
for (int x = 0; x < width; ++x) {
uint8 b = src_rgb[0] & 0x1f;
uint8 g = (src_rgb[0] >> 5) | ((src_rgb[1] & 0x03) << 3);
uint8 r = (src_rgb[1] & 0x7c) >> 2;
uint8 a = src_rgb[1] >> 7;
uint8 b = src_argb1555[0] & 0x1f;
uint8 g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
uint8 r = (src_argb1555[1] & 0x7c) >> 2;
uint8 a = src_argb1555[1] >> 7;
dst_argb[0] = (b << 3) | (b >> 2);
dst_argb[1] = (g << 3) | (g >> 2);
dst_argb[2] = (r << 3) | (r >> 2);
dst_argb[3] = -a;
dst_argb += 4;
src_rgb += 2;
src_argb1555 += 2;
}
}
void ARGB4444ToARGBRow_C(const uint8* src_rgb, uint8* dst_argb, int width) {
void ARGB4444ToARGBRow_C(const uint8* src_argb4444, uint8* dst_argb, int width) {
for (int x = 0; x < width; ++x) {
uint8 b = src_rgb[0] & 0x0f;
uint8 g = src_rgb[0] >> 4;
uint8 r = src_rgb[1] & 0x0f;
uint8 a = src_rgb[1] >> 4;
uint8 b = src_argb4444[0] & 0x0f;
uint8 g = src_argb4444[0] >> 4;
uint8 r = src_argb4444[1] & 0x0f;
uint8 a = src_argb4444[1] >> 4;
dst_argb[0] = (b << 4) | b;
dst_argb[1] = (g << 4) | g;
dst_argb[2] = (r << 4) | r;
dst_argb[3] = (a << 4) | a;
dst_argb += 4;
src_rgb += 2;
src_argb4444 += 2;
}
}
@ -265,11 +265,11 @@ static __inline int RGBToV(uint8 r, uint8 g, uint8 b) {
return ((112 * r - 94 * g - 18 * b + 128) >> 8) + 128;
}
#define MAKEROWY(NAME, R, G, B) \
#define MAKEROWY(NAME, R, G, B, BPP) \
void NAME ## ToYRow_C(const uint8* src_argb0, uint8* dst_y, int width) { \
for (int x = 0; x < width; ++x) { \
dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]); \
src_argb0 += 4; \
src_argb0 += BPP; \
dst_y += 1; \
} \
} \
@ -277,16 +277,16 @@ void NAME ## ToUVRow_C(const uint8* src_rgb0, int src_stride_rgb, \
uint8* dst_u, uint8* dst_v, int width) { \
const uint8* src_rgb1 = src_rgb0 + src_stride_rgb; \
for (int x = 0; x < width - 1; x += 2) { \
uint8 ab = (src_rgb0[B] + src_rgb0[B + 4] + \
src_rgb1[B] + src_rgb1[B + 4]) >> 2; \
uint8 ag = (src_rgb0[G] + src_rgb0[G + 4] + \
src_rgb1[G] + src_rgb1[G + 4]) >> 2; \
uint8 ar = (src_rgb0[R] + src_rgb0[R + 4] + \
src_rgb1[R] + src_rgb1[R + 4]) >> 2; \
uint8 ab = (src_rgb0[B] + src_rgb0[B + BPP] + \
src_rgb1[B] + src_rgb1[B + BPP]) >> 2; \
uint8 ag = (src_rgb0[G] + src_rgb0[G + BPP] + \
src_rgb1[G] + src_rgb1[G + BPP]) >> 2; \
uint8 ar = (src_rgb0[R] + src_rgb0[R + BPP] + \
src_rgb1[R] + src_rgb1[R + BPP]) >> 2; \
dst_u[0] = RGBToU(ar, ag, ab); \
dst_v[0] = RGBToV(ar, ag, ab); \
src_rgb0 += 8; \
src_rgb1 += 8; \
src_rgb0 += BPP * 2; \
src_rgb1 += BPP * 2; \
dst_u += 1; \
dst_v += 1; \
} \
@ -299,10 +299,95 @@ void NAME ## ToUVRow_C(const uint8* src_rgb0, int src_stride_rgb, \
} \
}
MAKEROWY(ARGB, 2, 1, 0)
MAKEROWY(BGRA, 1, 2, 3)
MAKEROWY(ABGR, 0, 1, 2)
MAKEROWY(RGBA, 3, 2, 1)
MAKEROWY(ARGB, 2, 1, 0, 4)
MAKEROWY(BGRA, 1, 2, 3, 4)
MAKEROWY(ABGR, 0, 1, 2, 4)
MAKEROWY(RGBA, 3, 2, 1, 4)
MAKEROWY(RGB24, 2, 1, 0, 3)
MAKEROWY(RAW, 0, 1, 2, 3)
#undef MAKEROWY
void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int width) {
for (int x = 0; x < width; ++x) {
uint8 b = src_rgb565[0] & 0x1f;
uint8 g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
uint8 r = src_rgb565[1] >> 3;
b = (b << 3) | (b >> 2);
g = (g << 2) | (g >> 4);
r = (r << 3) | (r >> 2);
dst_y[0] = RGBToY(r, g, b);
src_rgb565 += 2;
dst_y += 1;
}
}
void ARGBToUV444Row_C(const uint8* src_argb,
uint8* dst_u, uint8* dst_v, int width) {
for (int x = 0; x < width; ++x) {
uint8 ab = src_argb[0];
uint8 ag = src_argb[1];
uint8 ar = src_argb[2];
dst_u[0] = RGBToU(ar, ag, ab);
dst_v[0] = RGBToV(ar, ag, ab);
src_argb += 4;
dst_u += 1;
dst_v += 1;
}
}
void ARGBToUV422Row_C(const uint8* src_argb,
uint8* dst_u, uint8* dst_v, int width) {
for (int x = 0; x < width - 1; x += 2) {
uint8 ab = (src_argb[0] + src_argb[4]) >> 1;
uint8 ag = (src_argb[1] + src_argb[5]) >> 1;
uint8 ar = (src_argb[2] + src_argb[6]) >> 1;
dst_u[0] = RGBToU(ar, ag, ab);
dst_v[0] = RGBToV(ar, ag, ab);
src_argb += 8;
dst_u += 1;
dst_v += 1;
}
if ((width & 3) == 1) {
uint8 ab = src_argb[0];
uint8 ag = src_argb[1];
uint8 ar = src_argb[2];
dst_u[0] = RGBToU(ar, ag, ab);
dst_v[0] = RGBToV(ar, ag, ab);
}
}
void ARGBToUV411Row_C(const uint8* src_argb,
uint8* dst_u, uint8* dst_v, int width) {
for (int x = 0; x < width - 3; x += 4) {
uint8 ab = (src_argb[0] + src_argb[4] + src_argb[8] + src_argb[12]) >> 2;
uint8 ag = (src_argb[1] + src_argb[5] + src_argb[9] + src_argb[13]) >> 2;
uint8 ar = (src_argb[2] + src_argb[6] + src_argb[10] + src_argb[14]) >> 2;
dst_u[0] = RGBToU(ar, ag, ab);
dst_v[0] = RGBToV(ar, ag, ab);
src_argb += 16;
dst_u += 1;
dst_v += 1;
}
if ((width & 3) == 3) {
uint8 ab = (src_argb[0] + src_argb[4] + src_argb[8]) / 3;
uint8 ag = (src_argb[1] + src_argb[5] + src_argb[9]) / 3;
uint8 ar = (src_argb[2] + src_argb[6] + src_argb[10]) / 3;
dst_u[0] = RGBToU(ar, ag, ab);
dst_v[0] = RGBToV(ar, ag, ab);
} else if ((width & 3) == 2) {
uint8 ab = (src_argb[0] + src_argb[4]) >> 1;
uint8 ag = (src_argb[1] + src_argb[5]) >> 1;
uint8 ar = (src_argb[2] + src_argb[6]) >> 1;
dst_u[0] = RGBToU(ar, ag, ab);
dst_v[0] = RGBToV(ar, ag, ab);
} else if ((width & 3) == 1) {
uint8 ab = src_argb[0];
uint8 ag = src_argb[1];
uint8 ar = src_argb[2];
dst_u[0] = RGBToU(ar, ag, ab);
dst_v[0] = RGBToV(ar, ag, ab);
}
}
// http://en.wikipedia.org/wiki/Grayscale.
// 0.11 * B + 0.59 * G + 0.30 * R
@ -470,104 +555,104 @@ static __inline void YuvPixel2(uint8 y, uint8 u, uint8 v,
#if defined(__ARM_NEON__)
// C mimic assembly.
// TODO(fbarchard): Remove subsampling from Neon.
void I444ToARGBRow_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
void I444ToARGBRow_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* rgb_buf,
int width) {
for (int x = 0; x < width - 1; x += 2) {
uint8 u = (u_buf[0] + u_buf[1] + 1) >> 1;
uint8 v = (v_buf[0] + v_buf[1] + 1) >> 1;
YuvPixel(y_buf[0], u, v, rgb_buf + 0, 24, 16, 8, 0);
YuvPixel(y_buf[1], u, v, rgb_buf + 4, 24, 16, 8, 0);
y_buf += 2;
u_buf += 2;
v_buf += 2;
uint8 u = (src_u[0] + src_u[1] + 1) >> 1;
uint8 v = (src_v[0] + src_v[1] + 1) >> 1;
YuvPixel(src_y[0], u, v, rgb_buf + 0, 24, 16, 8, 0);
YuvPixel(src_y[1], u, v, rgb_buf + 4, 24, 16, 8, 0);
src_y += 2;
src_u += 2;
src_v += 2;
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 16, 8, 0);
YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, 24, 16, 8, 0);
}
}
#else
void I444ToARGBRow_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
void I444ToARGBRow_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* rgb_buf,
int width) {
for (int x = 0; x < width; ++x) {
YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf, 24, 16, 8, 0);
y_buf += 1;
u_buf += 1;
v_buf += 1;
YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf, 24, 16, 8, 0);
src_y += 1;
src_u += 1;
src_v += 1;
rgb_buf += 4; // Advance 1 pixel.
}
}
#endif
// Also used for 420
void I422ToARGBRow_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
void I422ToARGBRow_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* rgb_buf,
int width) {
for (int x = 0; x < width - 1; x += 2) {
YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 16, 8, 0);
YuvPixel(y_buf[1], u_buf[0], v_buf[0], rgb_buf + 4, 24, 16, 8, 0);
y_buf += 2;
u_buf += 1;
v_buf += 1;
YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, 24, 16, 8, 0);
YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, 24, 16, 8, 0);
src_y += 2;
src_u += 1;
src_v += 1;
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 16, 8, 0);
YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, 24, 16, 8, 0);
}
}
void I422ToRGB24Row_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
void I422ToRGB24Row_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* rgb_buf,
int width) {
for (int x = 0; x < width - 1; x += 2) {
YuvPixel2(y_buf[0], u_buf[0], v_buf[0],
YuvPixel2(src_y[0], src_u[0], src_v[0],
rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
YuvPixel2(y_buf[1], u_buf[0], v_buf[0],
YuvPixel2(src_y[1], src_u[0], src_v[0],
rgb_buf + 3, rgb_buf + 4, rgb_buf + 5);
y_buf += 2;
u_buf += 1;
v_buf += 1;
src_y += 2;
src_u += 1;
src_v += 1;
rgb_buf += 6; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel2(y_buf[0], u_buf[0], v_buf[0],
YuvPixel2(src_y[0], src_u[0], src_v[0],
rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
}
}
void I422ToRAWRow_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
void I422ToRAWRow_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* rgb_buf,
int width) {
for (int x = 0; x < width - 1; x += 2) {
YuvPixel2(y_buf[0], u_buf[0], v_buf[0],
YuvPixel2(src_y[0], src_u[0], src_v[0],
rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
YuvPixel2(y_buf[1], u_buf[0], v_buf[0],
YuvPixel2(src_y[1], src_u[0], src_v[0],
rgb_buf + 5, rgb_buf + 4, rgb_buf + 3);
y_buf += 2;
u_buf += 1;
v_buf += 1;
src_y += 2;
src_u += 1;
src_v += 1;
rgb_buf += 6; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel2(y_buf[0], u_buf[0], v_buf[0],
YuvPixel2(src_y[0], src_u[0], src_v[0],
rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
}
}
void I422ToARGB4444Row_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
void I422ToARGB4444Row_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb4444,
int width) {
uint8 b0;
@ -577,8 +662,8 @@ void I422ToARGB4444Row_C(const uint8* y_buf,
uint8 g1;
uint8 r1;
for (int x = 0; x < width - 1; x += 2) {
YuvPixel2(y_buf[0], u_buf[0], v_buf[0], &b0, &g0, &r0);
YuvPixel2(y_buf[1], u_buf[0], v_buf[0], &b1, &g1, &r1);
YuvPixel2(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
YuvPixel2(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1);
b0 = b0 >> 4;
g0 = g0 >> 4;
r0 = r0 >> 4;
@ -587,13 +672,13 @@ void I422ToARGB4444Row_C(const uint8* y_buf,
r1 = r1 >> 4;
*reinterpret_cast<uint32*>(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) |
(b1 << 16) | (g1 << 20) | (r1 << 24) | 0xf000f000;
y_buf += 2;
u_buf += 1;
v_buf += 1;
src_y += 2;
src_u += 1;
src_v += 1;
dst_argb4444 += 4; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel2(y_buf[0], u_buf[0], v_buf[0], &b0, &g0, &r0);
YuvPixel2(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
b0 = b0 >> 4;
g0 = g0 >> 4;
r0 = r0 >> 4;
@ -602,9 +687,9 @@ void I422ToARGB4444Row_C(const uint8* y_buf,
}
}
void I422ToARGB1555Row_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
void I422ToARGB1555Row_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb1555,
int width) {
uint8 b0;
@ -614,8 +699,8 @@ void I422ToARGB1555Row_C(const uint8* y_buf,
uint8 g1;
uint8 r1;
for (int x = 0; x < width - 1; x += 2) {
YuvPixel2(y_buf[0], u_buf[0], v_buf[0], &b0, &g0, &r0);
YuvPixel2(y_buf[1], u_buf[0], v_buf[0], &b1, &g1, &r1);
YuvPixel2(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
YuvPixel2(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1);
b0 = b0 >> 3;
g0 = g0 >> 3;
r0 = r0 >> 3;
@ -624,13 +709,13 @@ void I422ToARGB1555Row_C(const uint8* y_buf,
r1 = r1 >> 3;
*reinterpret_cast<uint32*>(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) |
(b1 << 16) | (g1 << 21) | (r1 << 26) | 0x80008000;
y_buf += 2;
u_buf += 1;
v_buf += 1;
src_y += 2;
src_u += 1;
src_v += 1;
dst_argb1555 += 4; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel2(y_buf[0], u_buf[0], v_buf[0], &b0, &g0, &r0);
YuvPixel2(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
b0 = b0 >> 3;
g0 = g0 >> 3;
r0 = r0 >> 3;
@ -639,9 +724,9 @@ void I422ToARGB1555Row_C(const uint8* y_buf,
}
}
void I422ToRGB565Row_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
void I422ToRGB565Row_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgb565,
int width) {
uint8 b0;
@ -651,8 +736,8 @@ void I422ToRGB565Row_C(const uint8* y_buf,
uint8 g1;
uint8 r1;
for (int x = 0; x < width - 1; x += 2) {
YuvPixel2(y_buf[0], u_buf[0], v_buf[0], &b0, &g0, &r0);
YuvPixel2(y_buf[1], u_buf[0], v_buf[0], &b1, &g1, &r1);
YuvPixel2(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
YuvPixel2(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1);
b0 = b0 >> 3;
g0 = g0 >> 2;
r0 = r0 >> 3;
@ -661,13 +746,13 @@ void I422ToRGB565Row_C(const uint8* y_buf,
r1 = r1 >> 3;
*reinterpret_cast<uint32*>(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) |
(b1 << 16) | (g1 << 21) | (r1 << 27);
y_buf += 2;
u_buf += 1;
v_buf += 1;
src_y += 2;
src_u += 1;
src_v += 1;
dst_rgb565 += 4; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel2(y_buf[0], u_buf[0], v_buf[0], &b0, &g0, &r0);
YuvPixel2(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
b0 = b0 >> 3;
g0 = g0 >> 2;
r0 = r0 >> 3;
@ -675,66 +760,66 @@ void I422ToRGB565Row_C(const uint8* y_buf,
}
}
void I411ToARGBRow_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
void I411ToARGBRow_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* rgb_buf,
int width) {
for (int x = 0; x < width - 3; x += 4) {
YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 16, 8, 0);
YuvPixel(y_buf[1], u_buf[0], v_buf[0], rgb_buf + 4, 24, 16, 8, 0);
YuvPixel(y_buf[2], u_buf[0], v_buf[0], rgb_buf + 8, 24, 16, 8, 0);
YuvPixel(y_buf[3], u_buf[0], v_buf[0], rgb_buf + 12, 24, 16, 8, 0);
y_buf += 4;
u_buf += 1;
v_buf += 1;
YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, 24, 16, 8, 0);
YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, 24, 16, 8, 0);
YuvPixel(src_y[2], src_u[0], src_v[0], rgb_buf + 8, 24, 16, 8, 0);
YuvPixel(src_y[3], src_u[0], src_v[0], rgb_buf + 12, 24, 16, 8, 0);
src_y += 4;
src_u += 1;
src_v += 1;
rgb_buf += 16; // Advance 4 pixels.
}
if (width & 2) {
YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 16, 8, 0);
YuvPixel(y_buf[1], u_buf[0], v_buf[0], rgb_buf + 4, 24, 16, 8, 0);
y_buf += 2;
YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, 24, 16, 8, 0);
YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, 24, 16, 8, 0);
src_y += 2;
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 16, 8, 0);
YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, 24, 16, 8, 0);
}
}
void NV12ToARGBRow_C(const uint8* y_buf,
const uint8* uv_buf,
void NV12ToARGBRow_C(const uint8* src_y,
const uint8* usrc_v,
uint8* rgb_buf,
int width) {
for (int x = 0; x < width - 1; x += 2) {
YuvPixel(y_buf[0], uv_buf[0], uv_buf[1], rgb_buf + 0, 24, 16, 8, 0);
YuvPixel(y_buf[1], uv_buf[0], uv_buf[1], rgb_buf + 4, 24, 16, 8, 0);
y_buf += 2;
uv_buf += 2;
YuvPixel(src_y[0], usrc_v[0], usrc_v[1], rgb_buf + 0, 24, 16, 8, 0);
YuvPixel(src_y[1], usrc_v[0], usrc_v[1], rgb_buf + 4, 24, 16, 8, 0);
src_y += 2;
usrc_v += 2;
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel(y_buf[0], uv_buf[0], uv_buf[1], rgb_buf + 0, 24, 16, 8, 0);
YuvPixel(src_y[0], usrc_v[0], usrc_v[1], rgb_buf + 0, 24, 16, 8, 0);
}
}
void NV21ToARGBRow_C(const uint8* y_buf,
const uint8* vu_buf,
void NV21ToARGBRow_C(const uint8* src_y,
const uint8* src_vu,
uint8* rgb_buf,
int width) {
for (int x = 0; x < width - 1; x += 2) {
YuvPixel(y_buf[0], vu_buf[1], vu_buf[0], rgb_buf + 0, 24, 16, 8, 0);
YuvPixel(y_buf[1], vu_buf[1], vu_buf[0], rgb_buf + 4, 24, 16, 8, 0);
y_buf += 2;
vu_buf += 2;
YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, 24, 16, 8, 0);
YuvPixel(src_y[1], src_vu[1], src_vu[0], rgb_buf + 4, 24, 16, 8, 0);
src_y += 2;
src_vu += 2;
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel(y_buf[0], vu_buf[1], vu_buf[0], rgb_buf + 0, 24, 16, 8, 0);
YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, 24, 16, 8, 0);
}
}
void NV12ToRGB565Row_C(const uint8* y_buf,
const uint8* uv_buf,
void NV12ToRGB565Row_C(const uint8* src_y,
const uint8* usrc_v,
uint8* dst_rgb565,
int width) {
uint8 b0;
@ -744,8 +829,8 @@ void NV12ToRGB565Row_C(const uint8* y_buf,
uint8 g1;
uint8 r1;
for (int x = 0; x < width - 1; x += 2) {
YuvPixel2(y_buf[0], uv_buf[0], uv_buf[1], &b0, &g0, &r0);
YuvPixel2(y_buf[1], uv_buf[0], uv_buf[1], &b1, &g1, &r1);
YuvPixel2(src_y[0], usrc_v[0], usrc_v[1], &b0, &g0, &r0);
YuvPixel2(src_y[1], usrc_v[0], usrc_v[1], &b1, &g1, &r1);
b0 = b0 >> 3;
g0 = g0 >> 2;
r0 = r0 >> 3;
@ -754,12 +839,12 @@ void NV12ToRGB565Row_C(const uint8* y_buf,
r1 = r1 >> 3;
*reinterpret_cast<uint32*>(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) |
(b1 << 16) | (g1 << 21) | (r1 << 27);
y_buf += 2;
uv_buf += 2;
src_y += 2;
usrc_v += 2;
dst_rgb565 += 4; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel2(y_buf[0], uv_buf[0], uv_buf[1], &b0, &g0, &r0);
YuvPixel2(src_y[0], usrc_v[0], usrc_v[1], &b0, &g0, &r0);
b0 = b0 >> 3;
g0 = g0 >> 2;
r0 = r0 >> 3;
@ -767,8 +852,8 @@ void NV12ToRGB565Row_C(const uint8* y_buf,
}
}
void NV21ToRGB565Row_C(const uint8* y_buf,
const uint8* vu_buf,
void NV21ToRGB565Row_C(const uint8* src_y,
const uint8* vsrc_u,
uint8* dst_rgb565,
int width) {
uint8 b0;
@ -778,8 +863,8 @@ void NV21ToRGB565Row_C(const uint8* y_buf,
uint8 g1;
uint8 r1;
for (int x = 0; x < width - 1; x += 2) {
YuvPixel2(y_buf[0], vu_buf[1], vu_buf[0], &b0, &g0, &r0);
YuvPixel2(y_buf[1], vu_buf[1], vu_buf[0], &b1, &g1, &r1);
YuvPixel2(src_y[0], vsrc_u[1], vsrc_u[0], &b0, &g0, &r0);
YuvPixel2(src_y[1], vsrc_u[1], vsrc_u[0], &b1, &g1, &r1);
b0 = b0 >> 3;
g0 = g0 >> 2;
r0 = r0 >> 3;
@ -788,12 +873,12 @@ void NV21ToRGB565Row_C(const uint8* y_buf,
r1 = r1 >> 3;
*reinterpret_cast<uint32*>(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) |
(b1 << 16) | (g1 << 21) | (r1 << 27);
y_buf += 2;
vu_buf += 2;
src_y += 2;
vsrc_u += 2;
dst_rgb565 += 4; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel2(y_buf[0], vu_buf[1], vu_buf[0], &b0, &g0, &r0);
YuvPixel2(src_y[0], vsrc_u[1], vsrc_u[0], &b0, &g0, &r0);
b0 = b0 >> 3;
g0 = g0 >> 2;
r0 = r0 >> 3;
@ -801,92 +886,92 @@ void NV21ToRGB565Row_C(const uint8* y_buf,
}
}
void YUY2ToARGBRow_C(const uint8* yuy2_buf,
void YUY2ToARGBRow_C(const uint8* src_yuy2,
uint8* rgb_buf,
int width) {
for (int x = 0; x < width - 1; x += 2) {
YuvPixel(yuy2_buf[0], yuy2_buf[1], yuy2_buf[3], rgb_buf + 0, 24, 16, 8, 0);
YuvPixel(yuy2_buf[2], yuy2_buf[1], yuy2_buf[3], rgb_buf + 4, 24, 16, 8, 0);
yuy2_buf += 4;
YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, 24, 16, 8, 0);
YuvPixel(src_yuy2[2], src_yuy2[1], src_yuy2[3], rgb_buf + 4, 24, 16, 8, 0);
src_yuy2 += 4;
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel(yuy2_buf[0], yuy2_buf[1], yuy2_buf[3], rgb_buf + 0, 24, 16, 8, 0);
YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, 24, 16, 8, 0);
}
}
void UYVYToARGBRow_C(const uint8* uyvy_buf,
void UYVYToARGBRow_C(const uint8* src_uyvy,
uint8* rgb_buf,
int width) {
for (int x = 0; x < width - 1; x += 2) {
YuvPixel(uyvy_buf[1], uyvy_buf[0], uyvy_buf[2], rgb_buf + 0, 24, 16, 8, 0);
YuvPixel(uyvy_buf[3], uyvy_buf[0], uyvy_buf[2], rgb_buf + 4, 24, 16, 8, 0);
uyvy_buf += 4;
YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, 24, 16, 8, 0);
YuvPixel(src_uyvy[3], src_uyvy[0], src_uyvy[2], rgb_buf + 4, 24, 16, 8, 0);
src_uyvy += 4;
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel(uyvy_buf[1], uyvy_buf[0], uyvy_buf[2], rgb_buf + 0, 24, 16, 8, 0);
YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, 24, 16, 8, 0);
}
}
void I422ToBGRARow_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
void I422ToBGRARow_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* rgb_buf,
int width) {
for (int x = 0; x < width - 1; x += 2) {
YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 0, 8, 16, 24);
YuvPixel(y_buf[1], u_buf[0], v_buf[0], rgb_buf + 4, 0, 8, 16, 24);
y_buf += 2;
u_buf += 1;
v_buf += 1;
YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, 0, 8, 16, 24);
YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, 0, 8, 16, 24);
src_y += 2;
src_u += 1;
src_v += 1;
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf, 0, 8, 16, 24);
YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf, 0, 8, 16, 24);
}
}
void I422ToABGRRow_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
void I422ToABGRRow_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* rgb_buf,
int width) {
for (int x = 0; x < width - 1; x += 2) {
YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 0, 8, 16);
YuvPixel(y_buf[1], u_buf[0], v_buf[0], rgb_buf + 4, 24, 0, 8, 16);
y_buf += 2;
u_buf += 1;
v_buf += 1;
YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, 24, 0, 8, 16);
YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, 24, 0, 8, 16);
src_y += 2;
src_u += 1;
src_v += 1;
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 0, 8, 16);
YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, 24, 0, 8, 16);
}
}
void I422ToRGBARow_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
void I422ToRGBARow_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* rgb_buf,
int width) {
for (int x = 0; x < width - 1; x += 2) {
YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 0, 24, 16, 8);
YuvPixel(y_buf[1], u_buf[0], v_buf[0], rgb_buf + 4, 0, 24, 16, 8);
y_buf += 2;
u_buf += 1;
v_buf += 1;
YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, 0, 24, 16, 8);
YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, 0, 24, 16, 8);
src_y += 2;
src_u += 1;
src_v += 1;
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 0, 24, 16, 8);
YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, 0, 24, 16, 8);
}
}
void YToARGBRow_C(const uint8* y_buf, uint8* rgb_buf, int width) {
void YToARGBRow_C(const uint8* src_y, uint8* rgb_buf, int width) {
for (int x = 0; x < width; ++x) {
YuvPixel(y_buf[0], 128, 128, rgb_buf, 24, 16, 8, 0);
y_buf += 1;
YuvPixel(src_y[0], 128, 128, rgb_buf, 24, 16, 8, 0);
src_y += 1;
rgb_buf += 4; // Advance 1 pixel.
}
}
@ -903,7 +988,7 @@ void MirrorRow_C(const uint8* src, uint8* dst, int width) {
}
}
void MirrorRowUV_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
src_uv += (width - 1) << 1;
for (int x = 0; x < width - 1; x += 2) {
dst_u[x] = src_uv[0];
@ -1399,35 +1484,35 @@ void I422ToUYVYRow_C(const uint8* src_y,
// row_win.cc has asm version, but GCC uses 2 step wrapper. 5% slower.
// TODO(fbarchard): Handle width > kMaxStride here instead of calling code.
#if defined(__x86_64__) || defined(__i386__)
void I422ToRGB565Row_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
void I422ToRGB565Row_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* rgb_buf,
int width) {
SIMD_ALIGNED(uint8 row[kMaxStride]);
I422ToARGBRow_SSSE3(y_buf, u_buf, v_buf, row, width);
I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, width);
ARGBToRGB565Row_SSE2(row, rgb_buf, width);
}
#endif // defined(__x86_64__) || defined(__i386__)
#if defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)
void I422ToARGB1555Row_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
void I422ToARGB1555Row_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* rgb_buf,
int width) {
SIMD_ALIGNED(uint8 row[kMaxStride]);
I422ToARGBRow_SSSE3(y_buf, u_buf, v_buf, row, width);
I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, width);
ARGBToARGB1555Row_SSE2(row, rgb_buf, width);
}
void I422ToARGB4444Row_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
void I422ToARGB4444Row_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* rgb_buf,
int width) {
SIMD_ALIGNED(uint8 row[kMaxStride]);
I422ToARGBRow_SSSE3(y_buf, u_buf, v_buf, row, width);
I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, width);
ARGBToARGB4444Row_SSE2(row, rgb_buf, width);
}
@ -1452,45 +1537,45 @@ void NV21ToRGB565Row_SSSE3(const uint8* src_y,
void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2,
uint8* dst_argb,
int width) {
SIMD_ALIGNED(uint8 rowy[kMaxStride]);
SIMD_ALIGNED(uint8 rowu[kMaxStride]);
SIMD_ALIGNED(uint8 rowv[kMaxStride]);
YUY2ToUV422Row_SSE2(src_yuy2, rowu, rowv, width);
YUY2ToYRow_SSE2(src_yuy2, rowy, width);
I422ToARGBRow_SSSE3(rowy, rowu, rowv, dst_argb, width);
SIMD_ALIGNED(uint8 row_y[kMaxStride]);
SIMD_ALIGNED(uint8 row_u[kMaxStride / 2]);
SIMD_ALIGNED(uint8 row_v[kMaxStride / 2]);
YUY2ToUV422Row_SSE2(src_yuy2, row_u, row_v, width);
YUY2ToYRow_SSE2(src_yuy2, row_y, width);
I422ToARGBRow_SSSE3(row_y, row_u, row_v, dst_argb, width);
}
void YUY2ToARGBRow_Unaligned_SSSE3(const uint8* src_yuy2,
uint8* dst_argb,
int width) {
SIMD_ALIGNED(uint8 rowy[kMaxStride]);
SIMD_ALIGNED(uint8 rowu[kMaxStride]);
SIMD_ALIGNED(uint8 rowv[kMaxStride]);
YUY2ToUV422Row_Unaligned_SSE2(src_yuy2, rowu, rowv, width);
YUY2ToYRow_Unaligned_SSE2(src_yuy2, rowy, width);
I422ToARGBRow_Unaligned_SSSE3(rowy, rowu, rowv, dst_argb, width);
SIMD_ALIGNED(uint8 row_y[kMaxStride]);
SIMD_ALIGNED(uint8 row_u[kMaxStride / 2]);
SIMD_ALIGNED(uint8 row_v[kMaxStride / 2]);
YUY2ToUV422Row_Unaligned_SSE2(src_yuy2, row_u, row_v, width);
YUY2ToYRow_Unaligned_SSE2(src_yuy2, row_y, width);
I422ToARGBRow_Unaligned_SSSE3(row_y, row_u, row_v, dst_argb, width);
}
void UYVYToARGBRow_SSSE3(const uint8* src_uyvy,
uint8* dst_argb,
int width) {
SIMD_ALIGNED(uint8 rowy[kMaxStride]);
SIMD_ALIGNED(uint8 rowu[kMaxStride]);
SIMD_ALIGNED(uint8 rowv[kMaxStride]);
UYVYToUV422Row_SSE2(src_uyvy, rowu, rowv, width);
UYVYToYRow_SSE2(src_uyvy, rowy, width);
I422ToARGBRow_SSSE3(rowy, rowu, rowv, dst_argb, width);
SIMD_ALIGNED(uint8 row_y[kMaxStride]);
SIMD_ALIGNED(uint8 row_u[kMaxStride / 2]);
SIMD_ALIGNED(uint8 row_v[kMaxStride / 2]);
UYVYToUV422Row_SSE2(src_uyvy, row_u, row_v, width);
UYVYToYRow_SSE2(src_uyvy, row_y, width);
I422ToARGBRow_SSSE3(row_y, row_u, row_v, dst_argb, width);
}
void UYVYToARGBRow_Unaligned_SSSE3(const uint8* src_uyvy,
uint8* dst_argb,
int width) {
SIMD_ALIGNED(uint8 rowy[kMaxStride]);
SIMD_ALIGNED(uint8 rowu[kMaxStride]);
SIMD_ALIGNED(uint8 rowv[kMaxStride]);
UYVYToUV422Row_Unaligned_SSE2(src_uyvy, rowu, rowv, width);
UYVYToYRow_Unaligned_SSE2(src_uyvy, rowy, width);
I422ToARGBRow_Unaligned_SSSE3(rowy, rowu, rowv, dst_argb, width);
SIMD_ALIGNED(uint8 row_y[kMaxStride]);
SIMD_ALIGNED(uint8 row_u[kMaxStride / 2]);
SIMD_ALIGNED(uint8 row_v[kMaxStride / 2]);
UYVYToUV422Row_Unaligned_SSE2(src_uyvy, row_u, row_v, width);
UYVYToYRow_Unaligned_SSE2(src_uyvy, row_y, width);
I422ToARGBRow_Unaligned_SSSE3(row_y, row_u, row_v, dst_argb, width);
}
#endif // defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)

View File

@ -225,8 +225,8 @@ void MirrorRow_MIPS_DSPR2(const uint8* src, uint8* dst, int width) {
}
#endif // HAS_MIRRORROW_MIPS_DSPR2
#ifdef HAS_MIRRORROWUV_MIPS_DSPR2
void MirrorRowUV_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
#ifdef HAS_MirrorUVRow_MIPS_DSPR2
void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
int width) {
int x = 0;
int y = 0;
@ -315,7 +315,7 @@ void MirrorRowUV_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
"t5", "t7", "t8", "t9"
);
}
#endif // HAS_MIRRORROWUV_MIPS_DSPR2
#endif // HAS_MirrorUVRow_MIPS_DSPR2

View File

@ -629,9 +629,9 @@ void NV21ToARGBRow_NEON(const uint8* src_y,
#ifdef HAS_NV12TORGB565ROW_NEON
void NV12ToRGB565Row_NEON(const uint8* src_y,
const uint8* src_uv,
uint8* dst_rgb565,
int width) {
const uint8* src_uv,
uint8* dst_rgb565,
int width) {
asm volatile (
"vld1.u8 {d24}, [%4] \n"
"vld1.u8 {d25}, [%5] \n"
@ -660,9 +660,9 @@ void NV12ToRGB565Row_NEON(const uint8* src_y,
#ifdef HAS_NV21TORGB565ROW_NEON
void NV21ToRGB565Row_NEON(const uint8* src_y,
const uint8* src_uv,
uint8* dst_rgb565,
int width) {
const uint8* src_uv,
uint8* dst_rgb565,
int width) {
asm volatile (
"vld1.u8 {d24}, [%4] \n"
"vld1.u8 {d25}, [%5] \n"
@ -955,8 +955,8 @@ void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
}
#endif // HAS_MIRRORROW_NEON
#ifdef HAS_MIRRORROWUV_NEON
void MirrorRowUV_NEON(const uint8* src, uint8* dst_a, uint8* dst_b, int width) {
#ifdef HAS_MirrorUVRow_NEON
void MirrorUVRow_NEON(const uint8* src, uint8* dst_a, uint8* dst_b, int width) {
asm volatile (
// compute where to start writing destination
"add %1, %3 \n" // dst_a + width
@ -1013,7 +1013,7 @@ void MirrorRowUV_NEON(const uint8* src, uint8* dst_a, uint8* dst_b, int width) {
: "memory", "cc", "r12", "q0"
);
}
#endif // HAS_MIRRORROWUV_NEON
#endif // HAS_MirrorUVRow_NEON
#ifdef HAS_BGRATOARGBROW_NEON
void BGRAToARGBRow_NEON(const uint8* src_bgra, uint8* dst_argb, int pix) {
@ -1112,6 +1112,41 @@ void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int pix) {
}
#endif // HAS_RAWTOARGBROW_NEON
#ifdef HAS_RGB565TOARGBROW_NEON
#define RGB565TOARGB \
"vmovn.u16 d4, q0 \n" /* B xxxBBBBB */ \
"vshrn.u16 d5, q0, #5 \n" /* G xxGGGGGG */ \
"vshrn.u16 d6, q0, #8 \n" /* R RRRRRxxx */ \
"vshl.u8 d0, d4, #3 \n" /* B BBBBB000 upper 5 */ \
"vshl.u8 d1, d5, #2 \n" /* G GGGGGG00 upper 6 */ \
"vbic.u8 d2, d6, d7 \n" /* R RRRRR000 upper 5 */ \
"vshr.u8 d4, d0, #5 \n" /* B 00000BBB lower 3 */ \
"vshr.u8 d5, d1, #6 \n" /* G 000000GG lower 2 */ \
"vshr.u8 d6, d2, #5 \n" /* R 00000RRR lower 3 */ \
"vorr.u8 q0, q0, q2 \n" /* B,G */ \
"vorr.u8 d2, d2, d6 \n" /* R */
void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int pix) {
asm volatile (
"vmov.u8 d3, #255 \n" // Alpha
"vmov.u8 d7, #7 \n" // 5 bit mask
".p2align 2 \n"
"1: \n"
"vld1.8 {q0}, [%0]! \n" // load 8 pixels of RGB565.
"subs %2, %2, #8 \n" // 8 processed per loop.
RGB565TOARGB
"vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 pixels of ARGB.
"bgt 1b \n"
: "+r"(src_rgb565), // %0
"+r"(dst_argb), // %1
"+r"(pix) // %2
:
: "memory", "cc", "q0", "q1", "q2", "q3" // Clobber List
);
}
#endif // HAS_RGB565TOARGBROW_NEON
#ifdef HAS_ARGBTORGBAROW_NEON
void ARGBToRGBARow_NEON(const uint8* src_argb, uint8* dst_rgba, int pix) {
asm volatile (
@ -1436,9 +1471,9 @@ void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_argb4444,
ARGBTOARGB4444
"vst1.8 {q0}, [%1]! \n" // store 8 pixels ARGB4444.
"bgt 1b \n"
: "+r"(src_argb), // %0
: "+r"(src_argb), // %0
"+r"(dst_argb4444), // %1
"+r"(pix) // %2
"+r"(pix) // %2
:
: "memory", "cc", "q0", "q8", "q9", "q10", "q11"
);
@ -1447,6 +1482,117 @@ void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_argb4444,
#ifdef HAS_ARGBTOYROW_NEON
void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) {
asm volatile (
"vmov.u8 d24, #13 \n" // B * 0.1016 coefficient
"vmov.u8 d25, #65 \n" // G * 0.5078 coefficient
"vmov.u8 d26, #33 \n" // R * 0.2578 coefficient
"vmov.u8 d27, #16 \n" // Add 16 constant
".p2align 2 \n"
"1: \n"
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of ARGB.
"subs %2, %2, #8 \n" // 8 processed per loop.
"vmull.u8 q2, d0, d24 \n" // B
"vmlal.u8 q2, d1, d25 \n" // G
"vmlal.u8 q2, d2, d26 \n" // R
"vqrshrun.s16 d0, q2, #7 \n" // 16 bit to 8 bit Y
"vqadd.u8 d0, d27 \n"
"vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
"bgt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_y), // %1
"+r"(pix) // %2
:
: "memory", "cc", "q0", "q1", "q2", "q12", "q13"
);
}
#endif // HAS_ARGBTOYROW_NEON
#ifdef HAS_RGB565TOYROW_NEON
void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int pix) {
asm volatile (
"vmov.u8 d7, #7 \n" // 5 bit mask
"vmov.u8 d24, #13 \n" // B * 0.1016 coefficient
"vmov.u8 d25, #65 \n" // G * 0.5078 coefficient
"vmov.u8 d26, #33 \n" // R * 0.2578 coefficient
"vmov.u8 d27, #16 \n" // Add 16 constant
".p2align 2 \n"
"1: \n"
"vld1.8 {q0}, [%0]! \n" // load 8 pixels of RGB565.
"subs %2, %2, #8 \n" // 8 processed per loop.
RGB565TOARGB
"vmull.u8 q2, d0, d24 \n" // B
"vmlal.u8 q2, d1, d25 \n" // G
"vmlal.u8 q2, d2, d26 \n" // R
"vqrshrun.s16 d0, q2, #7 \n" // 16 bit to 8 bit Y
"vqadd.u8 d0, d27 \n"
"vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
"bgt 1b \n"
: "+r"(src_rgb565), // %0
"+r"(dst_y), // %1
"+r"(pix) // %2
:
: "memory", "cc", "q0", "q1", "q2", "q3", "q12", "q13"
);
}
#endif // HAS_RGB565TOYROW_NEON
#ifdef HAS_BGRATOYROW_NEON
void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int pix) {
asm volatile (
"vmov.u8 d4, #33 \n" // R * 0.2578 coefficient
"vmov.u8 d5, #65 \n" // G * 0.5078 coefficient
"vmov.u8 d6, #13 \n" // B * 0.1016 coefficient
"vmov.u8 d7, #16 \n" // Add 16 constant
".p2align 2 \n"
"1: \n"
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of BGRA.
"subs %2, %2, #8 \n" // 8 processed per loop.
"vmull.u8 q8, d1, d4 \n" // R
"vmlal.u8 q8, d2, d5 \n" // G
"vmlal.u8 q8, d3, d6 \n" // B
"vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y
"vqadd.u8 d0, d7 \n"
"vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
"bgt 1b \n"
: "+r"(src_bgra), // %0
"+r"(dst_y), // %1
"+r"(pix) // %2
:
: "memory", "cc", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"
);
}
#endif // HAS_BGRATOYROW_NEON
#ifdef HAS_ABGRTOYROW_NEON
void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int pix) {
asm volatile (
"vmov.u8 d4, #33 \n" // R * 0.2578 coefficient
"vmov.u8 d5, #65 \n" // G * 0.5078 coefficient
"vmov.u8 d6, #13 \n" // B * 0.1016 coefficient
"vmov.u8 d7, #16 \n" // Add 16 constant
".p2align 2 \n"
"1: \n"
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of ABGR.
"subs %2, %2, #8 \n" // 8 processed per loop.
"vmull.u8 q8, d0, d4 \n" // R
"vmlal.u8 q8, d1, d5 \n" // G
"vmlal.u8 q8, d2, d6 \n" // B
"vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y
"vqadd.u8 d0, d7 \n"
"vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
"bgt 1b \n"
: "+r"(src_abgr), // %0
"+r"(dst_y), // %1
"+r"(pix) // %2
:
: "memory", "cc", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"
);
}
#endif // HAS_ABGRTOYROW_NEON
#ifdef HAS_RGBATOYROW_NEON
void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int pix) {
asm volatile (
"vmov.u8 d4, #13 \n" // B * 0.1016 coefficient
"vmov.u8 d5, #65 \n" // G * 0.5078 coefficient
@ -1454,7 +1600,34 @@ void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) {
"vmov.u8 d7, #16 \n" // Add 16 constant
".p2align 2 \n"
"1: \n"
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of ARGB.
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of RGBA.
"subs %2, %2, #8 \n" // 8 processed per loop.
"vmull.u8 q8, d1, d4 \n" // B
"vmlal.u8 q8, d2, d5 \n" // G
"vmlal.u8 q8, d3, d6 \n" // R
"vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y
"vqadd.u8 d0, d7 \n"
"vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
"bgt 1b \n"
: "+r"(src_rgba), // %0
"+r"(dst_y), // %1
"+r"(pix) // %2
:
: "memory", "cc", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"
);
}
#endif // HAS_RGBATOYROW_NEON
#ifdef HAS_RGB24TOYROW_NEON
void RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int pix) {
asm volatile (
"vmov.u8 d4, #13 \n" // B * 0.1016 coefficient
"vmov.u8 d5, #65 \n" // G * 0.5078 coefficient
"vmov.u8 d6, #33 \n" // R * 0.2578 coefficient
"vmov.u8 d7, #16 \n" // Add 16 constant
".p2align 2 \n"
"1: \n"
"vld3.8 {d0, d1, d2}, [%0]! \n" // load 8 pixels of RGB24.
"subs %2, %2, #8 \n" // 8 processed per loop.
"vmull.u8 q8, d0, d4 \n" // B
"vmlal.u8 q8, d1, d5 \n" // G
@ -1463,14 +1636,41 @@ void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) {
"vqadd.u8 d0, d7 \n"
"vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
"bgt 1b \n"
: "+r"(src_argb), // %0
: "+r"(src_rgb24), // %0
"+r"(dst_y), // %1
"+r"(pix) // %2
:
: "memory", "cc", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"
);
}
#endif // HAS_ARGBTOYROW_NEON
#endif // HAS_RGB24TOYROW_NEON
#ifdef HAS_RAWTOYROW_NEON
void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int pix) {
asm volatile (
"vmov.u8 d4, #33 \n" // R * 0.2578 coefficient
"vmov.u8 d5, #65 \n" // G * 0.5078 coefficient
"vmov.u8 d6, #13 \n" // B * 0.1016 coefficient
"vmov.u8 d7, #16 \n" // Add 16 constant
".p2align 2 \n"
"1: \n"
"vld3.8 {d0, d1, d2}, [%0]! \n" // load 8 pixels of RAW.
"subs %2, %2, #8 \n" // 8 processed per loop.
"vmull.u8 q8, d0, d4 \n" // B
"vmlal.u8 q8, d1, d5 \n" // G
"vmlal.u8 q8, d2, d6 \n" // R
"vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y
"vqadd.u8 d0, d7 \n"
"vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
"bgt 1b \n"
: "+r"(src_raw), // %0
"+r"(dst_y), // %1
"+r"(pix) // %2
:
: "memory", "cc", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"
);
}
#endif // HAS_RAWTOYROW_NEON
#endif // __ARM_NEON__

View File

@ -925,6 +925,120 @@ void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
);
}
void ARGBToUV422Row_SSSE3(const uint8* src_argb0,
uint8* dst_u, uint8* dst_v, int width) {
asm volatile (
"movdqa %0,%%xmm4 \n"
"movdqa %1,%%xmm3 \n"
"movdqa %2,%%xmm5 \n"
:
: "m"(kARGBToU), // %0
"m"(kARGBToV), // %1
"m"(kAddUV128) // %2
);
asm volatile (
"sub %1,%2 \n"
".p2align 4 \n"
"1: \n"
"movdqa (%0),%%xmm0 \n"
"movdqa 0x10(%0),%%xmm1 \n"
"movdqa 0x20(%0),%%xmm2 \n"
"movdqa 0x30(%0),%%xmm6 \n"
"lea 0x40(%0),%0 \n"
"movdqa %%xmm0,%%xmm7 \n"
"shufps $0x88,%%xmm1,%%xmm0 \n"
"shufps $0xdd,%%xmm1,%%xmm7 \n"
"pavgb %%xmm7,%%xmm0 \n"
"movdqa %%xmm2,%%xmm7 \n"
"shufps $0x88,%%xmm6,%%xmm2 \n"
"shufps $0xdd,%%xmm6,%%xmm7 \n"
"pavgb %%xmm7,%%xmm2 \n"
"movdqa %%xmm0,%%xmm1 \n"
"movdqa %%xmm2,%%xmm6 \n"
"pmaddubsw %%xmm4,%%xmm0 \n"
"pmaddubsw %%xmm4,%%xmm2 \n"
"pmaddubsw %%xmm3,%%xmm1 \n"
"pmaddubsw %%xmm3,%%xmm6 \n"
"phaddw %%xmm2,%%xmm0 \n"
"phaddw %%xmm6,%%xmm1 \n"
"psraw $0x8,%%xmm0 \n"
"psraw $0x8,%%xmm1 \n"
"packsswb %%xmm1,%%xmm0 \n"
"paddb %%xmm5,%%xmm0 \n"
"sub $0x10,%3 \n"
"movlps %%xmm0,(%1) \n"
"movhps %%xmm0,(%1,%2,1) \n"
"lea 0x8(%1),%1 \n"
"jg 1b \n"
: "+r"(src_argb0), // %0
"+r"(dst_u), // %1
"+r"(dst_v), // %2
"+rm"(width) // %3
:
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
#endif
);
}
void ARGBToUV422Row_Unaligned_SSSE3(const uint8* src_argb0,
uint8* dst_u, uint8* dst_v, int width) {
asm volatile (
"movdqa %0,%%xmm4 \n"
"movdqa %1,%%xmm3 \n"
"movdqa %2,%%xmm5 \n"
:
: "m"(kARGBToU), // %0
"m"(kARGBToV), // %1
"m"(kAddUV128) // %2
);
asm volatile (
"sub %1,%2 \n"
".p2align 4 \n"
"1: \n"
"movdqu (%0),%%xmm0 \n"
"movdqu 0x10(%0),%%xmm1 \n"
"movdqu 0x20(%0),%%xmm2 \n"
"movdqu 0x30(%0),%%xmm6 \n"
"lea 0x40(%0),%0 \n"
"movdqa %%xmm0,%%xmm7 \n"
"shufps $0x88,%%xmm1,%%xmm0 \n"
"shufps $0xdd,%%xmm1,%%xmm7 \n"
"pavgb %%xmm7,%%xmm0 \n"
"movdqa %%xmm2,%%xmm7 \n"
"shufps $0x88,%%xmm6,%%xmm2 \n"
"shufps $0xdd,%%xmm6,%%xmm7 \n"
"pavgb %%xmm7,%%xmm2 \n"
"movdqa %%xmm0,%%xmm1 \n"
"movdqa %%xmm2,%%xmm6 \n"
"pmaddubsw %%xmm4,%%xmm0 \n"
"pmaddubsw %%xmm4,%%xmm2 \n"
"pmaddubsw %%xmm3,%%xmm1 \n"
"pmaddubsw %%xmm3,%%xmm6 \n"
"phaddw %%xmm2,%%xmm0 \n"
"phaddw %%xmm6,%%xmm1 \n"
"psraw $0x8,%%xmm0 \n"
"psraw $0x8,%%xmm1 \n"
"packsswb %%xmm1,%%xmm0 \n"
"paddb %%xmm5,%%xmm0 \n"
"sub $0x10,%3 \n"
"movlps %%xmm0,(%1) \n"
"movhps %%xmm0,(%1,%2,1) \n"
"lea 0x8(%1),%1 \n"
"jg 1b \n"
: "+r"(src_argb0), // %0
"+r"(dst_u), // %1
"+r"(dst_v), // %2
"+rm"(width) // %3
:
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
#endif
);
}
void BGRAToYRow_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix) {
asm volatile (
"movdqa %4,%%xmm5 \n"
@ -1652,7 +1766,7 @@ struct {
void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* argb_buf,
uint8* dst_argb,
int width) {
asm volatile (
"sub %[u_buf],%[v_buf] \n"
@ -1688,7 +1802,7 @@ void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf,
void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb24_buf,
uint8* dst_rgb24,
int width) {
// fpic 32 bit gcc 4.2 on OSX runs out of GPR regs.
#ifdef __APPLE__
@ -1743,7 +1857,7 @@ void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf,
void OMITFP I422ToRAWRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* raw_buf,
uint8* dst_raw,
int width) {
// fpic 32 bit gcc 4.2 on OSX runs out of GPR regs.
#ifdef __APPLE__
@ -1798,7 +1912,7 @@ void OMITFP I422ToRAWRow_SSSE3(const uint8* y_buf,
void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* argb_buf,
uint8* dst_argb,
int width) {
asm volatile (
"sub %[u_buf],%[v_buf] \n"
@ -1834,7 +1948,7 @@ void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf,
void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* argb_buf,
uint8* dst_argb,
int width) {
asm volatile (
"sub %[u_buf],%[v_buf] \n"
@ -1869,7 +1983,7 @@ void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf,
void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* uv_buf,
uint8* argb_buf,
uint8* dst_argb,
int width) {
asm volatile (
"pcmpeqb %%xmm5,%%xmm5 \n"
@ -1901,8 +2015,8 @@ void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf,
}
void OMITFP NV21ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* vu_buf,
uint8* argb_buf,
const uint8* src_vu,
uint8* dst_argb,
int width) {
asm volatile (
"pcmpeqb %%xmm5,%%xmm5 \n"
@ -1936,7 +2050,7 @@ void OMITFP NV21ToARGBRow_SSSE3(const uint8* y_buf,
void OMITFP I444ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* argb_buf,
uint8* dst_argb,
int width) {
asm volatile (
"sub %[u_buf],%[v_buf] \n"
@ -1972,7 +2086,7 @@ void OMITFP I444ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
void OMITFP I422ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* argb_buf,
uint8* dst_argb,
int width) {
asm volatile (
"sub %[u_buf],%[v_buf] \n"
@ -2008,7 +2122,7 @@ void OMITFP I422ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
void OMITFP I411ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* argb_buf,
uint8* dst_argb,
int width) {
asm volatile (
"sub %[u_buf],%[v_buf] \n"
@ -2043,7 +2157,7 @@ void OMITFP I411ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
void OMITFP NV12ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
const uint8* uv_buf,
uint8* argb_buf,
uint8* dst_argb,
int width) {
asm volatile (
"pcmpeqb %%xmm5,%%xmm5 \n"
@ -2075,8 +2189,8 @@ void OMITFP NV12ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
}
void OMITFP NV21ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
const uint8* vu_buf,
uint8* argb_buf,
const uint8* src_vu,
uint8* dst_argb,
int width) {
asm volatile (
"pcmpeqb %%xmm5,%%xmm5 \n"
@ -2110,7 +2224,7 @@ void OMITFP NV21ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
void OMITFP I422ToBGRARow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* bgra_buf,
uint8* dst_bgra,
int width) {
asm volatile (
"sub %[u_buf],%[v_buf] \n"
@ -2147,7 +2261,7 @@ void OMITFP I422ToBGRARow_SSSE3(const uint8* y_buf,
void OMITFP I422ToABGRRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* abgr_buf,
uint8* dst_abgr,
int width) {
asm volatile (
"sub %[u_buf],%[v_buf] \n"
@ -2183,7 +2297,7 @@ void OMITFP I422ToABGRRow_SSSE3(const uint8* y_buf,
void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgba_buf,
uint8* dst_rgba,
int width) {
asm volatile (
"sub %[u_buf],%[v_buf] \n"
@ -2220,7 +2334,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf,
void OMITFP I422ToBGRARow_Unaligned_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* bgra_buf,
uint8* dst_bgra,
int width) {
asm volatile (
"sub %[u_buf],%[v_buf] \n"
@ -2257,7 +2371,7 @@ void OMITFP I422ToBGRARow_Unaligned_SSSE3(const uint8* y_buf,
void OMITFP I422ToABGRRow_Unaligned_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* abgr_buf,
uint8* dst_abgr,
int width) {
asm volatile (
"sub %[u_buf],%[v_buf] \n"
@ -2293,7 +2407,7 @@ void OMITFP I422ToABGRRow_Unaligned_SSSE3(const uint8* y_buf,
void OMITFP I422ToRGBARow_Unaligned_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgba_buf,
uint8* dst_rgba,
int width) {
asm volatile (
"sub %[u_buf],%[v_buf] \n"
@ -2446,7 +2560,7 @@ void MirrorRow_SSE2(const uint8* src, uint8* dst, int width) {
CONST uvec8 kShuffleMirrorUV = {
14u, 12u, 10u, 8u, 6u, 4u, 2u, 0u, 15u, 13u, 11u, 9u, 7u, 5u, 3u, 1u
};
void MirrorRowUV_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v,
void MirrorUVRow_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v,
int width) {
intptr_t temp_width = static_cast<intptr_t>(width);
asm volatile (

View File

@ -1101,6 +1101,124 @@ __asm {
}
}
__declspec(naked) __declspec(align(16))
void ARGBToUV422Row_SSSE3(const uint8* src_argb0,
uint8* dst_u, uint8* dst_v, int width) {
__asm {
push edi
mov eax, [esp + 4 + 4] // src_argb
mov edx, [esp + 4 + 8] // dst_u
mov edi, [esp + 4 + 12] // dst_v
mov ecx, [esp + 4 + 16] // pix
movdqa xmm7, kARGBToU
movdqa xmm6, kARGBToV
movdqa xmm5, kAddUV128
sub edi, edx // stride from u to v
align 16
convertloop:
/* step 1 - subsample 16x2 argb pixels to 8x1 */
movdqa xmm0, [eax]
movdqa xmm1, [eax + 16]
movdqa xmm2, [eax + 32]
movdqa xmm3, [eax + 48]
lea eax, [eax + 64]
movdqa xmm4, xmm0
shufps xmm0, xmm1, 0x88
shufps xmm4, xmm1, 0xdd
pavgb xmm0, xmm4
movdqa xmm4, xmm2
shufps xmm2, xmm3, 0x88
shufps xmm4, xmm3, 0xdd
pavgb xmm2, xmm4
// step 2 - convert to U and V
// from here down is very similar to Y code except
// instead of 16 different pixels, its 8 pixels of U and 8 of V
movdqa xmm1, xmm0
movdqa xmm3, xmm2
pmaddubsw xmm0, xmm7 // U
pmaddubsw xmm2, xmm7
pmaddubsw xmm1, xmm6 // V
pmaddubsw xmm3, xmm6
phaddw xmm0, xmm2
phaddw xmm1, xmm3
psraw xmm0, 8
psraw xmm1, 8
packsswb xmm0, xmm1
paddb xmm0, xmm5 // -> unsigned
// step 3 - store 8 U and 8 V values
sub ecx, 16
movlps qword ptr [edx], xmm0 // U
movhps qword ptr [edx + edi], xmm0 // V
lea edx, [edx + 8]
jg convertloop
pop edi
ret
}
}
__declspec(naked) __declspec(align(16))
void ARGBToUV422Row_Unaligned_SSSE3(const uint8* src_argb0,
uint8* dst_u, uint8* dst_v, int width) {
__asm {
push edi
mov eax, [esp + 4 + 4] // src_argb
mov edx, [esp + 4 + 8] // dst_u
mov edi, [esp + 4 + 12] // dst_v
mov ecx, [esp + 4 + 16] // pix
movdqa xmm7, kARGBToU
movdqa xmm6, kARGBToV
movdqa xmm5, kAddUV128
sub edi, edx // stride from u to v
align 16
convertloop:
/* step 1 - subsample 16x2 argb pixels to 8x1 */
movdqu xmm0, [eax]
movdqu xmm1, [eax + 16]
movdqu xmm2, [eax + 32]
movdqu xmm3, [eax + 48]
lea eax, [eax + 64]
movdqa xmm4, xmm0
shufps xmm0, xmm1, 0x88
shufps xmm4, xmm1, 0xdd
pavgb xmm0, xmm4
movdqa xmm4, xmm2
shufps xmm2, xmm3, 0x88
shufps xmm4, xmm3, 0xdd
pavgb xmm2, xmm4
// step 2 - convert to U and V
// from here down is very similar to Y code except
// instead of 16 different pixels, its 8 pixels of U and 8 of V
movdqa xmm1, xmm0
movdqa xmm3, xmm2
pmaddubsw xmm0, xmm7 // U
pmaddubsw xmm2, xmm7
pmaddubsw xmm1, xmm6 // V
pmaddubsw xmm3, xmm6
phaddw xmm0, xmm2
phaddw xmm1, xmm3
psraw xmm0, 8
psraw xmm1, 8
packsswb xmm0, xmm1
paddb xmm0, xmm5 // -> unsigned
// step 3 - store 8 U and 8 V values
sub ecx, 16
movlps qword ptr [edx], xmm0 // U
movhps qword ptr [edx + edi], xmm0 // V
lea edx, [edx + 8]
jg convertloop
pop edi
ret
}
}
__declspec(naked) __declspec(align(16))
void BGRAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width) {
@ -1656,7 +1774,7 @@ __declspec(naked) __declspec(align(16))
void I444ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* argb_buf,
uint8* dst_argb,
int width) {
__asm {
push esi
@ -1699,7 +1817,7 @@ __declspec(naked) __declspec(align(16))
void I422ToRGB24Row_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb24_buf,
uint8* dst_rgb24,
int width) {
__asm {
push esi
@ -1746,7 +1864,7 @@ __declspec(naked) __declspec(align(16))
void I422ToRAWRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* raw_buf,
uint8* dst_raw,
int width) {
__asm {
push esi
@ -1866,7 +1984,7 @@ __declspec(naked) __declspec(align(16))
void I422ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* argb_buf,
uint8* dst_argb,
int width) {
__asm {
push esi
@ -1910,7 +2028,7 @@ __declspec(naked) __declspec(align(16))
void I411ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* argb_buf,
uint8* dst_argb,
int width) {
__asm {
push esi
@ -1952,7 +2070,7 @@ void I411ToARGBRow_SSSE3(const uint8* y_buf,
__declspec(naked) __declspec(align(16))
void NV12ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* uv_buf,
uint8* argb_buf,
uint8* dst_argb,
int width) {
__asm {
push esi
@ -1990,7 +2108,7 @@ void NV12ToARGBRow_SSSE3(const uint8* y_buf,
__declspec(naked) __declspec(align(16))
void NV21ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* uv_buf,
uint8* argb_buf,
uint8* dst_argb,
int width) {
__asm {
push esi
@ -2029,7 +2147,7 @@ __declspec(naked) __declspec(align(16))
void I444ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* argb_buf,
uint8* dst_argb,
int width) {
__asm {
push esi
@ -2072,7 +2190,7 @@ __declspec(naked) __declspec(align(16))
void I422ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* argb_buf,
uint8* dst_argb,
int width) {
__asm {
push esi
@ -2116,7 +2234,7 @@ __declspec(naked) __declspec(align(16))
void I411ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* argb_buf,
uint8* dst_argb,
int width) {
__asm {
push esi
@ -2158,7 +2276,7 @@ void I411ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
__declspec(naked) __declspec(align(16))
void NV12ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
const uint8* uv_buf,
uint8* argb_buf,
uint8* dst_argb,
int width) {
__asm {
push esi
@ -2196,7 +2314,7 @@ void NV12ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
__declspec(naked) __declspec(align(16))
void NV21ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
const uint8* uv_buf,
uint8* argb_buf,
uint8* dst_argb,
int width) {
__asm {
push esi
@ -2233,7 +2351,7 @@ __declspec(naked) __declspec(align(16))
void I422ToBGRARow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* bgra_buf,
uint8* dst_bgra,
int width) {
__asm {
push esi
@ -2274,7 +2392,7 @@ __declspec(naked) __declspec(align(16))
void I422ToBGRARow_Unaligned_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* bgra_buf,
uint8* dst_bgra,
int width) {
__asm {
push esi
@ -2315,7 +2433,7 @@ __declspec(naked) __declspec(align(16))
void I422ToABGRRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* abgr_buf,
uint8* dst_abgr,
int width) {
__asm {
push esi
@ -2356,7 +2474,7 @@ __declspec(naked) __declspec(align(16))
void I422ToABGRRow_Unaligned_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* abgr_buf,
uint8* dst_abgr,
int width) {
__asm {
push esi
@ -2397,7 +2515,7 @@ __declspec(naked) __declspec(align(16))
void I422ToRGBARow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgba_buf,
uint8* dst_rgba,
int width) {
__asm {
push esi
@ -2438,7 +2556,7 @@ __declspec(naked) __declspec(align(16))
void I422ToRGBARow_Unaligned_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgba_buf,
uint8* dst_rgba,
int width) {
__asm {
push esi
@ -2591,7 +2709,7 @@ static const uvec8 kShuffleMirrorUV = {
};
__declspec(naked) __declspec(align(16))
void MirrorRowUV_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v,
void MirrorUVRow_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v,
int width) {
__asm {
push edi

View File

@ -220,7 +220,7 @@ TEST_F(libyuvTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \
FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
benchmark_width_ - 4, _Any, +, 0) \
benchmark_width_ - 4, _Any, +, 0) \
TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
benchmark_width_, _Unaligned, +, 1) \
@ -614,10 +614,9 @@ TESTATOPLANAR(RGB24, 3, I420, 2, 2)
TESTATOPLANAR(RGB565, 2, I420, 2, 2)
TESTATOPLANAR(ARGB1555, 2, I420, 2, 2)
TESTATOPLANAR(ARGB4444, 2, I420, 2, 2)
// TESTATOPLANAR(ARGB, 4, I411, 4, 1)
TESTATOPLANAR(ARGB, 4, I411, 4, 1)
TESTATOPLANAR(ARGB, 4, I422, 2, 1)
// TESTATOPLANAR(ARGB, 4, I444, 1, 1)
// TODO(fbarchard): Implement and test 411 and 444
TESTATOPLANAR(ARGB, 4, I444, 1, 1)
TESTATOPLANAR(V210, 16 / 6, I420, 2, 2)
TESTATOPLANAR(YUY2, 2, I420, 2, 2)
TESTATOPLANAR(UYVY, 2, I420, 2, 2)
@ -629,30 +628,103 @@ TESTATOPLANAR(BayerRGGB, 1, I420, 2, 2)
TESTATOPLANAR(BayerGBRG, 1, I420, 2, 2)
TESTATOPLANAR(BayerGRBG, 1, I420, 2, 2)
#define TESTATOBI(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, W1280, DIFF, \
N, NEG, OFF) \
#define TESTATOBIPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
W1280, N, NEG, OFF) \
TEST_F(libyuvTest, FMT_A##To##FMT_PLANAR##N) { \
const int kWidth = W1280; \
const int kHeight = benchmark_height_; \
const int kStride = (kWidth * 8 * BPP_A + 7) / 8; \
align_buffer_16(src_argb, kStride * kHeight + OFF); \
align_buffer_16(dst_y_c, kWidth * kHeight); \
align_buffer_16(dst_uv_c, kWidth / SUBSAMP_X * 2 * kHeight / SUBSAMP_Y); \
align_buffer_16(dst_y_opt, kWidth * kHeight); \
align_buffer_16(dst_uv_opt, kWidth / SUBSAMP_X * 2 * kHeight / SUBSAMP_Y); \
srandom(time(NULL)); \
for (int i = 0; i < kHeight; ++i) \
for (int j = 0; j < kStride; ++j) \
src_argb[(i * kStride) + j + OFF] = (random() & 0xff); \
MaskCpuFlags(0); \
FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, \
dst_y_c, kWidth, \
dst_uv_c, kWidth / SUBSAMP_X * 2, \
kWidth, NEG kHeight); \
MaskCpuFlags(-1); \
for (int i = 0; i < benchmark_iterations_; ++i) { \
FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, \
dst_y_opt, kWidth, \
dst_uv_opt, kWidth / SUBSAMP_X * 2, \
kWidth, NEG kHeight); \
} \
int max_diff = 0; \
for (int i = 0; i < kHeight; ++i) { \
for (int j = 0; j < kWidth; ++j) { \
int abs_diff = \
abs(static_cast<int>(dst_y_c[i * kWidth + j]) - \
static_cast<int>(dst_y_opt[i * kWidth + j])); \
if (abs_diff > max_diff) { \
max_diff = abs_diff; \
} \
} \
} \
EXPECT_LE(max_diff, 2); \
for (int i = 0; i < kHeight / SUBSAMP_Y; ++i) { \
for (int j = 0; j < kWidth / SUBSAMP_X * 2; ++j) { \
int abs_diff = \
abs(static_cast<int>(dst_uv_c[i * kWidth / SUBSAMP_X * 2 + j]) - \
static_cast<int>(dst_uv_opt[i * kWidth / SUBSAMP_X * 2 + j])); \
if (abs_diff > max_diff) { \
max_diff = abs_diff; \
} \
} \
} \
EXPECT_LE(max_diff, 2); \
free_aligned_buffer_16(dst_y_c) \
free_aligned_buffer_16(dst_uv_c) \
free_aligned_buffer_16(dst_y_opt) \
free_aligned_buffer_16(dst_uv_opt) \
free_aligned_buffer_16(src_argb) \
}
#define TESTATOBIPLANAR(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
TESTATOBIPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
benchmark_width_ - 4, _Any, +, 0) \
TESTATOBIPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
benchmark_width_, _Unaligned, +, 1) \
TESTATOBIPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
benchmark_width_, _Invert, -, 0) \
TESTATOBIPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
benchmark_width_, _Opt, +, 0)
TESTATOBIPLANAR(ARGB, 4, NV12, 2, 2)
TESTATOBIPLANAR(ARGB, 4, NV21, 2, 2)
#define TESTATOBI(FMT_A, BPP_A, STRIDE_A, \
FMT_B, BPP_B, STRIDE_B, \
W1280, DIFF, N, NEG, OFF) \
TEST_F(libyuvTest, FMT_A##To##FMT_B##N) { \
const int kWidth = W1280; \
const int kHeight = benchmark_height_; \
align_buffer_16(src_argb, (kWidth * BPP_A) * kHeight + OFF); \
align_buffer_16(dst_argb_c, (kWidth * BPP_B) * kHeight); \
align_buffer_16(dst_argb_opt, (kWidth * BPP_B) * kHeight); \
const int kStrideA = (kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \
const int kStrideB = (kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \
align_buffer_16(src_argb, kStrideA * kHeight + OFF); \
align_buffer_16(dst_argb_c, kStrideB * kHeight); \
align_buffer_16(dst_argb_opt, kStrideB * kHeight); \
srandom(time(NULL)); \
for (int i = 0; i < kHeight * kWidth * BPP_A; ++i) { \
for (int i = 0; i < kStrideA * kHeight; ++i) { \
src_argb[i + OFF] = (random() & 0xff); \
} \
MaskCpuFlags(0); \
FMT_A##To##FMT_B(src_argb + OFF, kWidth * STRIDE_A, \
dst_argb_c, kWidth * BPP_B, \
FMT_A##To##FMT_B(src_argb + OFF, kStrideA, \
dst_argb_c, kStrideB, \
kWidth, NEG kHeight); \
MaskCpuFlags(-1); \
for (int i = 0; i < benchmark_iterations_; ++i) { \
FMT_A##To##FMT_B(src_argb + OFF, kWidth * STRIDE_A, \
dst_argb_opt, kWidth * BPP_B, \
FMT_A##To##FMT_B(src_argb + OFF, kStrideA, \
dst_argb_opt, kStrideB, \
kWidth, NEG kHeight); \
} \
int max_diff = 0; \
for (int i = 0; i < kHeight * kWidth * BPP_B; ++i) { \
for (int i = 0; i < kStrideB * kHeight; ++i) { \
int abs_diff = \
abs(static_cast<int>(dst_argb_c[i]) - \
static_cast<int>(dst_argb_opt[i])); \
@ -665,65 +737,26 @@ TEST_F(libyuvTest, FMT_A##To##FMT_B##N) { \
free_aligned_buffer_16(dst_argb_c) \
free_aligned_buffer_16(dst_argb_opt) \
}
#define TESTATOB(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, DIFF) \
TESTATOBI(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, benchmark_width_, DIFF, \
_Any, +, 0) \
TESTATOBI(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, benchmark_width_, DIFF, \
_Unaligned, +, 1) \
TESTATOBI(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, benchmark_width_, DIFF, \
_Invert, -, 0) \
TESTATOBI(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, benchmark_width_, DIFF, \
_Opt, +, 0)
TESTATOB(ARGB, 4, 4, ARGB, 4, 0)
TESTATOB(ARGB, 4, 4, BGRA, 4, 0)
TESTATOB(ARGB, 4, 4, ABGR, 4, 0)
TESTATOB(ARGB, 4, 4, RGBA, 4, 0)
TESTATOB(ARGB, 4, 4, RAW, 3, 0)
TESTATOB(ARGB, 4, 4, RGB24, 3, 0)
TESTATOB(ARGB, 4, 4, RGB565, 2, 0)
TESTATOB(ARGB, 4, 4, ARGB1555, 2, 0)
TESTATOB(ARGB, 4, 4, ARGB4444, 2, 0)
TESTATOB(ARGB, 4, 4, BayerBGGR, 1, 0)
TESTATOB(ARGB, 4, 4, BayerRGGB, 1, 0)
TESTATOB(ARGB, 4, 4, BayerGBRG, 1, 0)
TESTATOB(ARGB, 4, 4, BayerGRBG, 1, 0)
TESTATOB(ARGB, 4, 4, I400, 1, 2)
TESTATOB(BGRA, 4, 4, ARGB, 4, 0)
TESTATOB(ABGR, 4, 4, ARGB, 4, 0)
TESTATOB(RGBA, 4, 4, ARGB, 4, 0)
TESTATOB(RAW, 3, 3, ARGB, 4, 0)
TESTATOB(RGB24, 3, 3, ARGB, 4, 0)
TESTATOB(RGB565, 2, 2, ARGB, 4, 0)
TESTATOB(ARGB1555, 2, 2, ARGB, 4, 0)
TESTATOB(ARGB4444, 2, 2, ARGB, 4, 0)
TESTATOB(YUY2, 2, 2, ARGB, 4, 0)
TESTATOB(UYVY, 2, 2, ARGB, 4, 0)
TESTATOB(M420, 3 / 2, 1, ARGB, 4, 0)
TESTATOB(BayerBGGR, 1, 1, ARGB, 4, 0)
TESTATOB(BayerRGGB, 1, 1, ARGB, 4, 0)
TESTATOB(BayerGBRG, 1, 1, ARGB, 4, 0)
TESTATOB(BayerGRBG, 1, 1, ARGB, 4, 0)
TESTATOB(I400, 1, 1, ARGB, 4, 0)
TESTATOB(I400, 1, 1, I400, 1, 0)
TESTATOB(I400, 1, 1, I400Mirror, 1, 0)
TESTATOB(Y, 1, 1, ARGB, 4, 0)
TESTATOB(ARGB, 4, 4, ARGBMirror, 4, 0)
#define TESTATOBRANDOM(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, STRIDE_B, DIFF) \
#define TESTATOBRANDOM(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, \
FMT_B, BPP_B, STRIDE_B, HEIGHT_B, DIFF) \
TEST_F(libyuvTest, FMT_A##To##FMT_B##_Random) { \
srandom(time(NULL)); \
for (int times = 0; times < benchmark_iterations_; ++times) { \
const int kWidth = (random() & 63) + 1; \
const int kHeight = (random() & 31) + 1; \
const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A; \
const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B; \
const int kStrideA = (kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A;\
const int kStrideB = (kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B;\
align_buffer_page_end(src_argb, kStrideA * kHeight); \
align_buffer_page_end(dst_argb_c, kStrideB * kHeight); \
align_buffer_page_end(dst_argb_opt, kStrideB * kHeight); \
for (int i = 0; i < kStrideA * kHeight; ++i) { \
align_buffer_page_end(src_argb, kStrideA * kHeightA); \
align_buffer_page_end(dst_argb_c, kStrideB * kHeightB); \
align_buffer_page_end(dst_argb_opt, kStrideB * kHeightB); \
for (int i = 0; i < kStrideA * kHeightA; ++i) { \
src_argb[i] = (random() & 0xff); \
} \
memset(dst_argb_c, 0, kStrideB * kHeightB); \
memset(dst_argb_opt, 0, kStrideB * kHeightB); \
MaskCpuFlags(0); \
FMT_A##To##FMT_B(src_argb, kStrideA, \
dst_argb_c, kStrideB, \
@ -733,7 +766,7 @@ TEST_F(libyuvTest, FMT_A##To##FMT_B##_Random) { \
dst_argb_opt, kStrideB, \
kWidth, kHeight); \
int max_diff = 0; \
for (int i = 0; i < kStrideB * kHeight; ++i) { \
for (int i = 0; i < kStrideB * kHeightB; ++i) { \
int abs_diff = \
abs(static_cast<int>(dst_argb_c[i]) - \
static_cast<int>(dst_argb_opt[i])); \
@ -748,33 +781,58 @@ TEST_F(libyuvTest, FMT_A##To##FMT_B##_Random) { \
} \
}
TESTATOBRANDOM(ARGB, 4, 4, ARGB, 4, 4, 0)
TESTATOBRANDOM(ARGB, 4, 4, BGRA, 4, 4, 0)
TESTATOBRANDOM(ARGB, 4, 4, ABGR, 4, 4, 0)
TESTATOBRANDOM(ARGB, 4, 4, RGBA, 4, 4, 0)
TESTATOBRANDOM(ARGB, 4, 4, RAW, 3, 3, 0)
TESTATOBRANDOM(ARGB, 4, 4, RGB24, 3, 3, 0)
TESTATOBRANDOM(ARGB, 4, 4, RGB565, 2, 2, 0)
TESTATOBRANDOM(ARGB, 4, 4, ARGB1555, 2, 2, 0)
TESTATOBRANDOM(ARGB, 4, 4, ARGB4444, 2, 2, 0)
TESTATOBRANDOM(ARGB, 4, 4, I400, 1, 1, 2)
// TODO(fbarchard): Implement YUY2
// TESTATOBRANDOM(ARGB, 4, 4, YUY2, 4, 2, 0)
// TESTATOBRANDOM(ARGB, 4, 4, UYVY, 4, 2, 0)
TESTATOBRANDOM(BGRA, 4, 4, ARGB, 4, 4, 0)
TESTATOBRANDOM(ABGR, 4, 4, ARGB, 4, 4, 0)
TESTATOBRANDOM(RGBA, 4, 4, ARGB, 4, 4, 0)
TESTATOBRANDOM(RAW, 3, 3, ARGB, 4, 4, 0)
TESTATOBRANDOM(RGB24, 3, 3, ARGB, 4, 4, 0)
TESTATOBRANDOM(RGB565, 2, 2, ARGB, 4, 4, 0)
TESTATOBRANDOM(ARGB1555, 2, 2, ARGB, 4, 4, 0)
TESTATOBRANDOM(ARGB4444, 2, 2, ARGB, 4, 4, 0)
TESTATOBRANDOM(I400, 1, 1, ARGB, 4, 4, 0)
TESTATOBRANDOM(YUY2, 4, 2, ARGB, 4, 4, 0)
TESTATOBRANDOM(UYVY, 4, 2, ARGB, 4, 4, 0)
TESTATOBRANDOM(I400, 1, 1, I400, 1, 1, 0)
TESTATOBRANDOM(I400, 1, 1, I400Mirror, 1, 1, 0)
TESTATOBRANDOM(ARGB, 4, 4, ARGBMirror, 4, 4, 0)
#define TESTATOB(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, \
FMT_B, BPP_B, STRIDE_B, HEIGHT_B, DIFF) \
TESTATOBI(FMT_A, BPP_A, STRIDE_A, \
FMT_B, BPP_B, STRIDE_B, \
benchmark_width_, DIFF, _Any, +, 0) \
TESTATOBI(FMT_A, BPP_A, STRIDE_A, \
FMT_B, BPP_B, STRIDE_B, \
benchmark_width_, DIFF, _Unaligned, +, 1) \
TESTATOBI(FMT_A, BPP_A, STRIDE_A, \
FMT_B, BPP_B, STRIDE_B, \
benchmark_width_, DIFF, _Invert, -, 0) \
TESTATOBI(FMT_A, BPP_A, STRIDE_A, \
FMT_B, BPP_B, STRIDE_B, \
benchmark_width_, DIFF, _Opt, +, 0) \
TESTATOBRANDOM(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, \
FMT_B, BPP_B, STRIDE_B, HEIGHT_B, DIFF)
TESTATOB(ARGB, 4, 4, 1, ARGB, 4, 4, 1, 0)
TESTATOB(ARGB, 4, 4, 1, BGRA, 4, 4, 1, 0)
TESTATOB(ARGB, 4, 4, 1, ABGR, 4, 4, 1, 0)
TESTATOB(ARGB, 4, 4, 1, RGBA, 4, 4, 1, 0)
TESTATOB(ARGB, 4, 4, 1, RAW, 3, 3, 1, 0)
TESTATOB(ARGB, 4, 4, 1, RGB24, 3, 3, 1, 0)
TESTATOB(ARGB, 4, 4, 1, RGB565, 2, 2, 1, 0)
TESTATOB(ARGB, 4, 4, 1, ARGB1555, 2, 2, 1, 0)
TESTATOB(ARGB, 4, 4, 1, ARGB4444, 2, 2, 1, 0)
TESTATOB(ARGB, 4, 4, 1, BayerBGGR, 1, 2, 2, 0)
TESTATOB(ARGB, 4, 4, 1, BayerRGGB, 1, 2, 2, 0)
TESTATOB(ARGB, 4, 4, 1, BayerGBRG, 1, 2, 2, 0)
TESTATOB(ARGB, 4, 4, 1, BayerGRBG, 1, 2, 2, 0)
TESTATOB(ARGB, 4, 4, 1, YUY2, 2, 4, 1, 2)
TESTATOB(ARGB, 4, 4, 1, UYVY, 2, 4, 1, 2)
TESTATOB(ARGB, 4, 4, 1, I400, 1, 1, 1, 2)
TESTATOB(BGRA, 4, 4, 1, ARGB, 4, 4, 1, 0)
TESTATOB(ABGR, 4, 4, 1, ARGB, 4, 4, 1, 0)
TESTATOB(RGBA, 4, 4, 1, ARGB, 4, 4, 1, 0)
TESTATOB(RAW, 3, 3, 1, ARGB, 4, 4, 1, 0)
TESTATOB(RGB24, 3, 3, 1, ARGB, 4, 4, 1, 0)
TESTATOB(RGB565, 2, 2, 1, ARGB, 4, 4, 1, 0)
TESTATOB(ARGB1555, 2, 2, 1, ARGB, 4, 4, 1, 0)
TESTATOB(ARGB4444, 2, 2, 1, ARGB, 4, 4, 1, 0)
TESTATOB(YUY2, 2, 4, 1, ARGB, 4, 4, 1, 0)
TESTATOB(UYVY, 2, 4, 1, ARGB, 4, 4, 1, 0)
TESTATOB(BayerBGGR, 1, 2, 2, ARGB, 4, 4, 1, 0)
TESTATOB(BayerRGGB, 1, 2, 2, ARGB, 4, 4, 1, 0)
TESTATOB(BayerGBRG, 1, 2, 2, ARGB, 4, 4, 1, 0)
TESTATOB(BayerGRBG, 1, 2, 2, ARGB, 4, 4, 1, 0)
TESTATOB(I400, 1, 1, 1, ARGB, 4, 4, 1, 0)
TESTATOB(I400, 1, 1, 1, I400, 1, 1, 1, 0)
TESTATOB(I400, 1, 1, 1, I400Mirror, 1, 1, 1, 0)
TESTATOB(Y, 1, 1, 1, ARGB, 4, 4, 1, 0)
TESTATOB(ARGB, 4, 4, 1, ARGBMirror, 4, 4, 1, 0)
TEST_F(libyuvTest, Test565) {
SIMD_ALIGNED(uint8 orig_pixels[256][4]);