mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2026-01-01 03:12:16 +08:00
YUY2 and UYVY Unaligned and any versions
TEST=none BUG=none Review URL: https://webrtc-codereview.appspot.com/379009 git-svn-id: http://libyuv.googlecode.com/svn/trunk@168 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
79a06ac5fe
commit
e5f3fd4cc8
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 167
|
||||
Version: 168
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -11,16 +11,7 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define LIBYUV_VERSION 167
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
#define LIBYUV_VERSION 168
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
|
||||
@ -366,7 +366,7 @@ int ARGBToI420(const uint8* src_frame, int src_stride_frame,
|
||||
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
|
||||
ARGBToYRow = ARGBToYRow_SSSE3;
|
||||
} else if (TestCpuFlag(kCpuHasSSSE3) && width <= kMaxStride) {
|
||||
ARGBToYRow = ARGBToYAnyRow_SSSE3;
|
||||
ARGBToYRow = ARGBToYRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
|
||||
}
|
||||
@ -382,7 +382,7 @@ int ARGBToI420(const uint8* src_frame, int src_stride_frame,
|
||||
ARGBToUVRow = ARGBToUVRow_SSSE3;
|
||||
} else if (TestCpuFlag(kCpuHasSSSE3) &&
|
||||
IS_ALIGNED(width, 2) && width <= kMaxStride) {
|
||||
ARGBToUVRow = ARGBToUVAnyRow_SSSE3;
|
||||
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUVRow = ARGBToUVRow_Unaligned_SSSE3;
|
||||
}
|
||||
@ -428,7 +428,7 @@ int BGRAToI420(const uint8* src_frame, int src_stride_frame,
|
||||
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
|
||||
ARGBToYRow = BGRAToYRow_SSSE3;
|
||||
} else if (TestCpuFlag(kCpuHasSSSE3) && width <= kMaxStride) {
|
||||
ARGBToYRow = BGRAToYAnyRow_SSSE3;
|
||||
ARGBToYRow = BGRAToYRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToYRow = BGRAToYRow_Unaligned_SSSE3;
|
||||
}
|
||||
@ -444,7 +444,7 @@ int BGRAToI420(const uint8* src_frame, int src_stride_frame,
|
||||
ARGBToUVRow = BGRAToUVRow_SSSE3;
|
||||
} else if (TestCpuFlag(kCpuHasSSSE3) &&
|
||||
IS_ALIGNED(width, 2) && width <= kMaxStride) {
|
||||
ARGBToUVRow = BGRAToUVAnyRow_SSSE3;
|
||||
ARGBToUVRow = BGRAToUVRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUVRow = BGRAToUVRow_Unaligned_SSSE3;
|
||||
}
|
||||
@ -490,7 +490,7 @@ int ABGRToI420(const uint8* src_frame, int src_stride_frame,
|
||||
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
|
||||
ARGBToYRow = ABGRToYRow_SSSE3;
|
||||
} else if (TestCpuFlag(kCpuHasSSSE3) && width <= kMaxStride) {
|
||||
ARGBToYRow = ABGRToYAnyRow_SSSE3;
|
||||
ARGBToYRow = ABGRToYRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToYRow = ABGRToYRow_Unaligned_SSSE3;
|
||||
}
|
||||
@ -506,7 +506,7 @@ int ABGRToI420(const uint8* src_frame, int src_stride_frame,
|
||||
ARGBToUVRow = ABGRToUVRow_SSSE3;
|
||||
} else if (TestCpuFlag(kCpuHasSSSE3) &&
|
||||
IS_ALIGNED(width, 2) && width <= kMaxStride) {
|
||||
ARGBToUVRow = ABGRToUVAnyRow_SSSE3;
|
||||
ARGBToUVRow = ABGRToUVRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUVRow = ABGRToUVRow_Unaligned_SSSE3;
|
||||
}
|
||||
@ -561,7 +561,7 @@ int RGB24ToI420(const uint8* src_frame, int src_stride_frame,
|
||||
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
|
||||
ARGBToYRow = ARGBToYRow_SSSE3;
|
||||
} else if (TestCpuFlag(kCpuHasSSSE3) && width <= kMaxStride) {
|
||||
ARGBToYRow = ARGBToYAnyRow_SSSE3;
|
||||
ARGBToYRow = ARGBToYRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
|
||||
}
|
||||
@ -575,7 +575,7 @@ int RGB24ToI420(const uint8* src_frame, int src_stride_frame,
|
||||
ARGBToUVRow = ARGBToUVRow_SSSE3;
|
||||
} else if (TestCpuFlag(kCpuHasSSSE3) &&
|
||||
IS_ALIGNED(width, 2) && width <= kMaxStride) {
|
||||
ARGBToUVRow = ARGBToUVAnyRow_SSSE3;
|
||||
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
@ -630,7 +630,7 @@ int RAWToI420(const uint8* src_frame, int src_stride_frame,
|
||||
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
|
||||
ARGBToYRow = ARGBToYRow_SSSE3;
|
||||
} else if (TestCpuFlag(kCpuHasSSSE3) && width <= kMaxStride) {
|
||||
ARGBToYRow = ARGBToYAnyRow_SSSE3;
|
||||
ARGBToYRow = ARGBToYRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
|
||||
}
|
||||
@ -644,7 +644,7 @@ int RAWToI420(const uint8* src_frame, int src_stride_frame,
|
||||
ARGBToUVRow = ARGBToUVRow_SSSE3;
|
||||
} else if (TestCpuFlag(kCpuHasSSSE3) &&
|
||||
IS_ALIGNED(width, 2) && width <= kMaxStride) {
|
||||
ARGBToUVRow = ARGBToUVAnyRow_SSSE3;
|
||||
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
@ -699,7 +699,7 @@ int RGB565ToI420(const uint8* src_frame, int src_stride_frame,
|
||||
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
|
||||
ARGBToYRow = ARGBToYRow_SSSE3;
|
||||
} else if (TestCpuFlag(kCpuHasSSSE3) && width <= kMaxStride) {
|
||||
ARGBToYRow = ARGBToYAnyRow_SSSE3;
|
||||
ARGBToYRow = ARGBToYRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
|
||||
}
|
||||
@ -713,7 +713,7 @@ int RGB565ToI420(const uint8* src_frame, int src_stride_frame,
|
||||
ARGBToUVRow = ARGBToUVRow_SSSE3;
|
||||
} else if (TestCpuFlag(kCpuHasSSSE3) &&
|
||||
IS_ALIGNED(width, 2) && width <= kMaxStride) {
|
||||
ARGBToUVRow = ARGBToUVAnyRow_SSSE3;
|
||||
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
@ -768,7 +768,7 @@ int ARGB1555ToI420(const uint8* src_frame, int src_stride_frame,
|
||||
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
|
||||
ARGBToYRow = ARGBToYRow_SSSE3;
|
||||
} else if (TestCpuFlag(kCpuHasSSSE3) && width <= kMaxStride) {
|
||||
ARGBToYRow = ARGBToYAnyRow_SSSE3;
|
||||
ARGBToYRow = ARGBToYRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
|
||||
}
|
||||
@ -782,7 +782,7 @@ int ARGB1555ToI420(const uint8* src_frame, int src_stride_frame,
|
||||
ARGBToUVRow = ARGBToUVRow_SSSE3;
|
||||
} else if (TestCpuFlag(kCpuHasSSSE3) &&
|
||||
IS_ALIGNED(width, 2) && width <= kMaxStride) {
|
||||
ARGBToUVRow = ARGBToUVAnyRow_SSSE3;
|
||||
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
@ -837,7 +837,7 @@ int ARGB4444ToI420(const uint8* src_frame, int src_stride_frame,
|
||||
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
|
||||
ARGBToYRow = ARGBToYRow_SSSE3;
|
||||
} else if (TestCpuFlag(kCpuHasSSSE3) && width <= kMaxStride) {
|
||||
ARGBToYRow = ARGBToYAnyRow_SSSE3;
|
||||
ARGBToYRow = ARGBToYRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
|
||||
}
|
||||
@ -851,7 +851,7 @@ int ARGB4444ToI420(const uint8* src_frame, int src_stride_frame,
|
||||
ARGBToUVRow = ARGBToUVRow_SSSE3;
|
||||
} else if (TestCpuFlag(kCpuHasSSSE3) &&
|
||||
IS_ALIGNED(width, 2) && width <= kMaxStride) {
|
||||
ARGBToUVRow = ARGBToUVAnyRow_SSSE3;
|
||||
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
|
||||
@ -452,22 +452,22 @@ int I420ToBayer(const uint8* src_y, int src_stride_y,
|
||||
src_stride_u = -src_stride_u;
|
||||
src_stride_v = -src_stride_v;
|
||||
}
|
||||
void (*FastConvertYUVToARGBRow)(const uint8* y_buf,
|
||||
void (*I420ToARGBRow)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
#if defined(HAS_FASTCONVERTYUVTOARGBROW_NEON)
|
||||
#if defined(HAS_I420TOARGBROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_NEON;
|
||||
I420ToARGBRow = I420ToARGBRow_NEON;
|
||||
} else
|
||||
#elif defined(HAS_FASTCONVERTYUVTOARGBROW_SSSE3)
|
||||
#elif defined(HAS_I420TOARGBROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_SSSE3;
|
||||
I420ToARGBRow = I420ToARGBRow_SSSE3;
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_C;
|
||||
I420ToARGBRow = I420ToARGBRow_C;
|
||||
}
|
||||
SIMD_ALIGNED(uint8 row[kMaxStride]);
|
||||
void (*ARGBToBayerRow)(const uint8* src_argb,
|
||||
@ -490,7 +490,7 @@ int I420ToBayer(const uint8* src_y, int src_stride_y,
|
||||
}
|
||||
|
||||
for (int y = 0; y < height; ++y) {
|
||||
FastConvertYUVToARGBRow(src_y, src_u, src_v, row, width);
|
||||
I420ToARGBRow(src_y, src_u, src_v, row, width);
|
||||
ARGBToBayerRow(row, dst_bayer, index_map[y & 1], width);
|
||||
dst_bayer += dst_stride_bayer;
|
||||
src_y += src_stride_y;
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
259
source/row.h
259
source/row.h
@ -13,6 +13,11 @@
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define kMaxStride (2048 * 4)
|
||||
#define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a) - 1)))
|
||||
|
||||
@ -34,13 +39,15 @@
|
||||
#define HAS_BGRATOUVROW_SSSE3
|
||||
#define HAS_ABGRTOUVROW_SSSE3
|
||||
#define HAS_I400TOARGBROW_SSE2
|
||||
#define HAS_FASTCONVERTYTOARGBROW_SSE2
|
||||
#define HAS_FASTCONVERTYUVTOARGBROW_SSSE3
|
||||
#define HAS_FASTCONVERTYUVTOBGRAROW_SSSE3
|
||||
#define HAS_FASTCONVERTYUVTOABGRROW_SSSE3
|
||||
#define HAS_FASTCONVERTYUV444TOARGBROW_SSSE3
|
||||
#define HAS_YTOARGBROW_SSE2
|
||||
#define HAS_I420TOARGBROW_SSSE3
|
||||
#define HAS_I420TOBGRAROW_SSSE3
|
||||
#define HAS_I420TOABGRROW_SSSE3
|
||||
#define HAS_I444TOARGBROW_SSSE3
|
||||
#define HAS_MIRRORROW_SSSE3
|
||||
#define HAS_MIRRORROW_SSE2
|
||||
#define HAS_YUY2TOI420ROW_SSE2
|
||||
#define HAS_UYVYTOI420ROW_SSE2
|
||||
#endif
|
||||
|
||||
// The following are available on Windows platforms
|
||||
@ -48,7 +55,6 @@
|
||||
#define HAS_RGB565TOARGBROW_SSE2
|
||||
#define HAS_ARGB1555TOARGBROW_SSE2
|
||||
#define HAS_ARGB4444TOARGBROW_SSE2
|
||||
|
||||
#define HAS_ARGBTORGB24ROW_SSSE3
|
||||
#define HAS_ARGBTORAWROW_SSSE3
|
||||
#define HAS_ARGBTORGB565ROW_SSE2
|
||||
@ -59,14 +65,9 @@
|
||||
// The following are available on Neon platforms
|
||||
#if defined(__ARM_NEON__) && !defined(YUV_DISABLE_ASM)
|
||||
#define HAS_MIRRORROW_NEON
|
||||
#define HAS_FASTCONVERTYUVTOARGBROW_NEON
|
||||
#define HAS_FASTCONVERTYUVTOBGRAROW_NEON
|
||||
#define HAS_FASTCONVERTYUVTOABGRROW_NEON
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#define HAS_I420TOARGBROW_NEON
|
||||
#define HAS_I420TOBGRAROW_NEON
|
||||
#define HAS_I420TOABGRROW_NEON
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
@ -81,21 +82,21 @@ typedef unsigned char __attribute__((vector_size(16))) uvec8;
|
||||
typedef signed short __attribute__((vector_size(16))) vec16;
|
||||
#endif
|
||||
|
||||
void FastConvertYUVToARGBRow_NEON(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
void FastConvertYUVToBGRARow_NEON(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
void FastConvertYUVToABGRRow_NEON(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
void I420ToARGBRow_NEON(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
void I420ToBGRARow_NEON(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
void I420ToABGRRow_NEON(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
|
||||
void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
|
||||
void BGRAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
|
||||
@ -164,114 +165,144 @@ void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int pix);
|
||||
void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix);
|
||||
void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int pix);
|
||||
|
||||
void FastConvertYUVToARGBRow_C(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
void I420ToARGBRow_C(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
|
||||
void FastConvertYUVToBGRARow_C(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
void I420ToBGRARow_C(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
|
||||
void FastConvertYUVToABGRRow_C(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
void I420ToABGRRow_C(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
|
||||
void FastConvertYUV444ToARGBRow_C(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
void I444ToARGBRow_C(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
|
||||
void FastConvertYToARGBRow_C(const uint8* y_buf,
|
||||
void YToARGBRow_C(const uint8* y_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
|
||||
void FastConvertYUVToARGBRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
void I420ToARGBRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
|
||||
void FastConvertYUVToBGRARow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
void I420ToBGRARow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
|
||||
void FastConvertYUVToABGRRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
void I420ToABGRRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
|
||||
void FastConvertYUV444ToARGBRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
void I444ToARGBRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
|
||||
void FastConvertYToARGBRow_SSE2(const uint8* y_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
void YToARGBRow_SSE2(const uint8* y_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
|
||||
// 'Any' wrappers use memcpy()
|
||||
void FastConvertYUVToARGBAnyRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
void I420ToARGBRow_Any_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
|
||||
void FastConvertYUVToBGRAAnyRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
void I420ToBGRARow_Any_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
|
||||
void FastConvertYUVToABGRAnyRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
void I420ToABGRRow_Any_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
|
||||
void ARGBToRGB24AnyRow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix);
|
||||
void ARGBToRAWAnyRow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix);
|
||||
void ARGBToRGB565AnyRow_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
|
||||
void ARGBToARGB1555AnyRow_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
|
||||
void ARGBToARGB4444AnyRow_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
|
||||
void ARGBToRGB24Row_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix);
|
||||
void ARGBToRAWRow_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix);
|
||||
void ARGBToRGB565Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
|
||||
void ARGBToARGB1555Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
|
||||
void ARGBToARGB4444Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
|
||||
|
||||
void ARGBToYAnyRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
|
||||
void BGRAToYAnyRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
|
||||
void ABGRToYAnyRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
|
||||
void ARGBToUVAnyRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
||||
void ARGBToYRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
|
||||
void BGRAToYRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
|
||||
void ABGRToYRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
|
||||
void ARGBToUVRow_Any_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width);
|
||||
void BGRAToUVAnyRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
||||
void BGRAToUVRow_Any_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width);
|
||||
void ABGRToUVAnyRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
||||
void ABGRToUVRow_Any_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width);
|
||||
|
||||
void FastConvertYUVToARGBAnyRow_NEON(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
void I420ToARGBRow_Any_NEON(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
|
||||
void FastConvertYUVToBGRAAnyRow_NEON(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
void I420ToBGRARow_Any_NEON(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
|
||||
void FastConvertYUVToABGRAnyRow_NEON(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
void I420ToABGRRow_Any_NEON(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
|
||||
void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix);
|
||||
void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2,
|
||||
uint8* dst_u, uint8* dst_y, int pix);
|
||||
void YUY2ToYRow_Unaligned_SSE2(const uint8* src_yuy2,
|
||||
uint8* dst_y, int pix);
|
||||
void YUY2ToUVRow_Unaligned_SSE2(const uint8* src_yuy2, int stride_yuy2,
|
||||
uint8* dst_u, uint8* dst_y, int pix);
|
||||
|
||||
void UYVYToYRow_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix);
|
||||
void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy,
|
||||
uint8* dst_u, uint8* dst_y, int pix);
|
||||
void UYVYToYRow_Unaligned_SSE2(const uint8* src_uyvy,
|
||||
uint8* dst_y, int pix);
|
||||
void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy,
|
||||
uint8* dst_u, uint8* dst_y, int pix);
|
||||
|
||||
void YUY2ToUVRow_C(const uint8* src_yuy2, int src_stride_yuy2,
|
||||
uint8* dst_u, uint8* dst_v, int pix);
|
||||
void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int pix);
|
||||
void UYVYToUVRow_C(const uint8* src_uyvy, int src_stride_uyvy,
|
||||
uint8* dst_u, uint8* dst_v, int pix);
|
||||
void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int pix);
|
||||
|
||||
void YUY2ToUVRow_Any_SSE2(const uint8* src_yuy2, int src_stride_yuy2,
|
||||
uint8* dst_u, uint8* dst_v, int pix);
|
||||
void YUY2ToYRow_Any_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix);
|
||||
void UYVYToUVRow_Any_SSE2(const uint8* src_uyvy, int src_stride_uyvy,
|
||||
uint8* dst_u, uint8* dst_v, int pix);
|
||||
void UYVYToYRow_Any_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
||||
@ -271,7 +271,7 @@ static __inline uint32 Clip(int32 val) {
|
||||
}
|
||||
|
||||
static __inline void YuvPixel(uint8 y, uint8 u, uint8 v, uint8* rgb_buf,
|
||||
int ashift, int rshift, int gshift, int bshift) {
|
||||
int ashift, int rshift, int gshift, int bshift) {
|
||||
int32 y1 = (static_cast<int32>(y) - 16) * YG;
|
||||
uint32 b = Clip(static_cast<int32>((u * UB + v * VB) - (BB) + y1) >> 6);
|
||||
uint32 g = Clip(static_cast<int32>((u * UG + v * VG) - (BG) + y1) >> 6);
|
||||
@ -282,11 +282,11 @@ static __inline void YuvPixel(uint8 y, uint8 u, uint8 v, uint8* rgb_buf,
|
||||
(255u << ashift);
|
||||
}
|
||||
|
||||
void FastConvertYUVToARGBRow_C(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) {
|
||||
void I420ToARGBRow_C(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) {
|
||||
for (int x = 0; x < width - 1; x += 2) {
|
||||
YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 16, 8, 0);
|
||||
YuvPixel(y_buf[1], u_buf[0], v_buf[0], rgb_buf + 4, 24, 16, 8, 0);
|
||||
@ -300,11 +300,11 @@ void FastConvertYUVToARGBRow_C(const uint8* y_buf,
|
||||
}
|
||||
}
|
||||
|
||||
void FastConvertYUVToBGRARow_C(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) {
|
||||
void I420ToBGRARow_C(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) {
|
||||
for (int x = 0; x < width - 1; x += 2) {
|
||||
YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 0, 8, 16, 24);
|
||||
YuvPixel(y_buf[1], u_buf[0], v_buf[0], rgb_buf + 4, 0, 8, 16, 24);
|
||||
@ -318,11 +318,11 @@ void FastConvertYUVToBGRARow_C(const uint8* y_buf,
|
||||
}
|
||||
}
|
||||
|
||||
void FastConvertYUVToABGRRow_C(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) {
|
||||
void I420ToABGRRow_C(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) {
|
||||
for (int x = 0; x < width - 1; x += 2) {
|
||||
YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 0, 8, 16);
|
||||
YuvPixel(y_buf[1], u_buf[0], v_buf[0], rgb_buf + 4, 24, 0, 8, 16);
|
||||
@ -336,11 +336,11 @@ void FastConvertYUVToABGRRow_C(const uint8* y_buf,
|
||||
}
|
||||
}
|
||||
|
||||
void FastConvertYUV444ToARGBRow_C(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) {
|
||||
void I444ToARGBRow_C(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) {
|
||||
for (int x = 0; x < width; ++x) {
|
||||
YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf, 24, 16, 8, 0);
|
||||
y_buf += 1;
|
||||
@ -350,9 +350,9 @@ void FastConvertYUV444ToARGBRow_C(const uint8* y_buf,
|
||||
}
|
||||
}
|
||||
|
||||
void FastConvertYToARGBRow_C(const uint8* y_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) {
|
||||
void YToARGBRow_C(const uint8* y_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) {
|
||||
for (int x = 0; x < width; ++x) {
|
||||
YuvPixel(y_buf[0], 128, 128, rgb_buf, 24, 16, 8, 0);
|
||||
y_buf += 1;
|
||||
@ -368,6 +368,51 @@ void MirrorRow_C(const uint8* src, uint8* dst, int width) {
|
||||
}
|
||||
}
|
||||
|
||||
// Filter 2 rows of YUY2 UV's (422) into U and V (420)
|
||||
void YUY2ToUVRow_C(const uint8* src_yuy2, int src_stride_yuy2,
|
||||
uint8* dst_u, uint8* dst_v, int pix) {
|
||||
// Output a row of UV values, filtering 2 rows of YUY2
|
||||
for (int x = 0; x < pix; x += 2) {
|
||||
dst_u[0] = (src_yuy2[1] + src_yuy2[src_stride_yuy2 + 1] + 1) >> 1;
|
||||
dst_v[0] = (src_yuy2[3] + src_yuy2[src_stride_yuy2 + 3] + 1) >> 1;
|
||||
src_yuy2 += 4;
|
||||
dst_u += 1;
|
||||
dst_v += 1;
|
||||
}
|
||||
}
|
||||
|
||||
void YUY2ToYRow_C(const uint8* src_yuy2,
|
||||
uint8* dst_y, int pix) {
|
||||
// Copy a row of yuy2 Y values
|
||||
for (int x = 0; x < pix; ++x) {
|
||||
dst_y[0] = src_yuy2[0];
|
||||
src_yuy2 += 2;
|
||||
dst_y += 1;
|
||||
}
|
||||
}
|
||||
|
||||
void UYVYToUVRow_C(const uint8* src_uyvy, int src_stride_uyvy,
|
||||
uint8* dst_u, uint8* dst_v, int pix) {
|
||||
// Copy a row of uyvy UV values
|
||||
for (int x = 0; x < pix; x += 2) {
|
||||
dst_u[0] = (src_uyvy[0] + src_uyvy[src_stride_uyvy + 0] + 1) >> 1;
|
||||
dst_v[0] = (src_uyvy[2] + src_uyvy[src_stride_uyvy + 2] + 1) >> 1;
|
||||
src_uyvy += 4;
|
||||
dst_u += 1;
|
||||
dst_v += 1;
|
||||
}
|
||||
}
|
||||
|
||||
void UYVYToYRow_C(const uint8* src_uyvy,
|
||||
uint8* dst_y, int pix) {
|
||||
// Copy a row of uyvy Y values
|
||||
for (int x = 0; x < pix; ++x) {
|
||||
dst_y[0] = src_uyvy[1];
|
||||
src_uyvy += 2;
|
||||
dst_y += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Wrappers to handle odd sizes/alignments
|
||||
#define MAKEYUVANY(NAMEANY, NAME) \
|
||||
void NAMEANY(const uint8* y_buf, \
|
||||
@ -380,15 +425,15 @@ void NAMEANY(const uint8* y_buf, \
|
||||
memcpy(rgb_buf, row, width << 2); \
|
||||
}
|
||||
|
||||
#if defined(HAS_FASTCONVERTYUVTOARGBROW_SSSE3)
|
||||
MAKEYUVANY(FastConvertYUVToARGBAnyRow_SSSE3, FastConvertYUVToARGBRow_SSSE3)
|
||||
MAKEYUVANY(FastConvertYUVToBGRAAnyRow_SSSE3, FastConvertYUVToBGRARow_SSSE3)
|
||||
MAKEYUVANY(FastConvertYUVToABGRAnyRow_SSSE3, FastConvertYUVToABGRRow_SSSE3)
|
||||
#if defined(HAS_I420TOARGBROW_SSSE3)
|
||||
MAKEYUVANY(I420ToARGBRow_Any_SSSE3, I420ToARGBRow_SSSE3)
|
||||
MAKEYUVANY(I420ToBGRARow_Any_SSSE3, I420ToBGRARow_SSSE3)
|
||||
MAKEYUVANY(I420ToABGRRow_Any_SSSE3, I420ToABGRRow_SSSE3)
|
||||
#endif
|
||||
#if defined(HAS_FASTCONVERTYUVTOARGBROW_NEON)
|
||||
MAKEYUVANY(FastConvertYUVToARGBAnyRow_NEON, FastConvertYUVToARGBRow_NEON)
|
||||
MAKEYUVANY(FastConvertYUVToBGRAAnyRow_NEON, FastConvertYUVToBGRARow_NEON)
|
||||
MAKEYUVANY(FastConvertYUVToABGRAnyRow_NEON, FastConvertYUVToABGRRow_NEON)
|
||||
#if defined(HAS_I420TOARGBROW_NEON)
|
||||
MAKEYUVANY(I420ToARGBRow_Any_NEON, I420ToARGBRow_NEON)
|
||||
MAKEYUVANY(I420ToBGRARow_Any_NEON, I420ToBGRARow_NEON)
|
||||
MAKEYUVANY(I420ToABGRRow_Any_NEON, I420ToABGRRow_NEON)
|
||||
#endif
|
||||
|
||||
#define MAKEYUVANYRGB(NAMEANY, ARGBTORGB, BPP) \
|
||||
@ -401,27 +446,29 @@ void NAMEANY(const uint8* argb_buf, \
|
||||
}
|
||||
|
||||
#if defined(HAS_ARGBTORGB24ROW_SSSE3)
|
||||
MAKEYUVANYRGB(ARGBToRGB24AnyRow_SSSE3, ARGBToRGB24Row_SSSE3, 3)
|
||||
MAKEYUVANYRGB(ARGBToRAWAnyRow_SSSE3, ARGBToRAWRow_SSSE3, 3)
|
||||
MAKEYUVANYRGB(ARGBToRGB565AnyRow_SSE2, ARGBToRGB565Row_SSE2, 2)
|
||||
MAKEYUVANYRGB(ARGBToARGB1555AnyRow_SSE2, ARGBToARGB1555Row_SSE2, 2)
|
||||
MAKEYUVANYRGB(ARGBToARGB4444AnyRow_SSE2, ARGBToARGB4444Row_SSE2, 2)
|
||||
MAKEYUVANYRGB(ARGBToRGB24Row_Any_SSSE3, ARGBToRGB24Row_SSSE3, 3)
|
||||
MAKEYUVANYRGB(ARGBToRAWRow_Any_SSSE3, ARGBToRAWRow_SSSE3, 3)
|
||||
MAKEYUVANYRGB(ARGBToRGB565Row_Any_SSE2, ARGBToRGB565Row_SSE2, 2)
|
||||
MAKEYUVANYRGB(ARGBToARGB1555Row_Any_SSE2, ARGBToARGB1555Row_SSE2, 2)
|
||||
MAKEYUVANYRGB(ARGBToARGB4444Row_Any_SSE2, ARGBToARGB4444Row_SSE2, 2)
|
||||
#endif
|
||||
|
||||
#ifdef HAS_ARGBTOYROW_SSSE3
|
||||
|
||||
#define MAKEARGBTOYANY(NAMEANY, ARGBTOY) \
|
||||
#define MAKEANYTOYANY(NAMEANY, ARGBTOY) \
|
||||
void NAMEANY(const uint8* src_argb, uint8* dst_y, int width) { \
|
||||
SIMD_ALIGNED(uint8 row[kMaxStride]); \
|
||||
ARGBTOY(src_argb, row, width); \
|
||||
memcpy(dst_y, row, width); \
|
||||
}
|
||||
|
||||
MAKEARGBTOYANY(ARGBToYAnyRow_SSSE3, ARGBToYRow_Unaligned_SSSE3)
|
||||
MAKEARGBTOYANY(BGRAToYAnyRow_SSSE3, BGRAToYRow_Unaligned_SSSE3)
|
||||
MAKEARGBTOYANY(ABGRToYAnyRow_SSSE3, ABGRToYRow_Unaligned_SSSE3)
|
||||
MAKEANYTOYANY(ARGBToYRow_Any_SSSE3, ARGBToYRow_Unaligned_SSSE3)
|
||||
MAKEANYTOYANY(BGRAToYRow_Any_SSSE3, BGRAToYRow_Unaligned_SSSE3)
|
||||
MAKEANYTOYANY(ABGRToYRow_Any_SSSE3, ABGRToYRow_Unaligned_SSSE3)
|
||||
MAKEANYTOYANY(YUY2ToYRow_Any_SSE2, YUY2ToYRow_Unaligned_SSE2)
|
||||
MAKEANYTOYANY(UYVYToYRow_Any_SSE2, UYVYToYRow_Unaligned_SSE2)
|
||||
|
||||
#define MAKEARGBTOUVANY(NAMEANY, ARGBTOUV) \
|
||||
#define MAKEANYTOUVANY(NAMEANY, ARGBTOUV) \
|
||||
void NAMEANY(const uint8* src_argb0, int src_stride_argb, \
|
||||
uint8* dst_u, uint8* dst_v, int width) { \
|
||||
SIMD_ALIGNED(uint8 row[kMaxStride * 2]); \
|
||||
@ -431,9 +478,11 @@ MAKEARGBTOYANY(ABGRToYAnyRow_SSSE3, ABGRToYRow_Unaligned_SSSE3)
|
||||
memcpy(dst_v, row + kMaxStride, halfwidth); \
|
||||
}
|
||||
|
||||
MAKEARGBTOUVANY(ARGBToUVAnyRow_SSSE3, ARGBToUVRow_Unaligned_SSSE3)
|
||||
MAKEARGBTOUVANY(BGRAToUVAnyRow_SSSE3, BGRAToUVRow_Unaligned_SSSE3)
|
||||
MAKEARGBTOUVANY(ABGRToUVAnyRow_SSSE3, ABGRToUVRow_Unaligned_SSSE3)
|
||||
MAKEANYTOUVANY(ARGBToUVRow_Any_SSSE3, ARGBToUVRow_Unaligned_SSSE3)
|
||||
MAKEANYTOUVANY(BGRAToUVRow_Any_SSSE3, BGRAToUVRow_Unaligned_SSSE3)
|
||||
MAKEANYTOUVANY(ABGRToUVRow_Any_SSSE3, ABGRToUVRow_Unaligned_SSSE3)
|
||||
MAKEANYTOUVANY(YUY2ToUVRow_Any_SSE2, YUY2ToUVRow_Unaligned_SSE2)
|
||||
MAKEANYTOUVANY(UYVYToUVRow_Any_SSE2, UYVYToUVRow_Unaligned_SSE2)
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
@ -55,19 +55,19 @@ extern "C" {
|
||||
"vtrn.u8 d22, d23 \n" \
|
||||
"vtrn.u8 d16, d17 \n" \
|
||||
|
||||
#if defined(HAS_FASTCONVERTYUVTOARGBROW_NEON) || \
|
||||
defined(HAS_FASTCONVERTYUVTOBGRAROW_NEON) || \
|
||||
defined(HAS_FASTCONVERTYUVTOABGRROW_NEON)
|
||||
#if defined(HAS_I420TOARGBROW_NEON) || \
|
||||
defined(HAS_I420TOBGRAROW_NEON) || \
|
||||
defined(HAS_I420TOABGRROW_NEON)
|
||||
static const vec8 kUVToRB[8] = { 127, 127, 127, 127, 102, 102, 102, 102 };
|
||||
static const vec8 kUVToG[8] = { -25, -25, -25, -25, -52, -52, -52, -52 };
|
||||
#endif
|
||||
|
||||
#if defined(HAS_FASTCONVERTYUVTOARGBROW_NEON)
|
||||
void FastConvertYUVToARGBRow_NEON(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) {
|
||||
#if defined(HAS_I420TOARGBROW_NEON)
|
||||
void I420ToARGBRow_NEON(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"vld1.u8 {d24}, [%5] \n"
|
||||
"vld1.u8 {d25}, [%6] \n"
|
||||
@ -94,12 +94,12 @@ YUVTORGB
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(HAS_FASTCONVERTYUVTOBGRAROW_NEON)
|
||||
void FastConvertYUVToBGRARow_NEON(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) {
|
||||
#if defined(HAS_I420TOBGRAROW_NEON)
|
||||
void I420ToBGRARow_NEON(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"vld1.u8 {d24}, [%5] \n"
|
||||
"vld1.u8 {d25}, [%6] \n"
|
||||
@ -127,12 +127,12 @@ YUVTORGB
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(HAS_FASTCONVERTYUVTOABGRROW_NEON)
|
||||
void FastConvertYUVToABGRRow_NEON(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) {
|
||||
#if defined(HAS_I420TOABGRROW_NEON)
|
||||
void I420ToABGRRow_NEON(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"vld1.u8 {d24}, [%5] \n"
|
||||
"vld1.u8 {d25}, [%6] \n"
|
||||
|
||||
1173
source/row_posix.cc
1173
source/row_posix.cc
File diff suppressed because it is too large
Load Diff
@ -1174,7 +1174,7 @@ __asm {
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef HAS_FASTCONVERTYUVTOARGBROW_SSSE3
|
||||
#ifdef HAS_I420TOARGBROW_SSSE3
|
||||
|
||||
#define YG 74 /* static_cast<int8>(1.164 * 64 + 0.5) */
|
||||
|
||||
@ -1242,11 +1242,11 @@ static const vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR };
|
||||
}
|
||||
|
||||
__declspec(naked)
|
||||
void FastConvertYUVToARGBRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) {
|
||||
void I420ToARGBRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) {
|
||||
__asm {
|
||||
push esi
|
||||
push edi
|
||||
@ -1282,11 +1282,11 @@ void FastConvertYUVToARGBRow_SSSE3(const uint8* y_buf,
|
||||
}
|
||||
|
||||
__declspec(naked)
|
||||
void FastConvertYUVToBGRARow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) {
|
||||
void I420ToBGRARow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) {
|
||||
__asm {
|
||||
push esi
|
||||
push edi
|
||||
@ -1322,11 +1322,11 @@ void FastConvertYUVToBGRARow_SSSE3(const uint8* y_buf,
|
||||
}
|
||||
|
||||
__declspec(naked)
|
||||
void FastConvertYUVToABGRRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) {
|
||||
void I420ToABGRRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) {
|
||||
__asm {
|
||||
push esi
|
||||
push edi
|
||||
@ -1362,11 +1362,11 @@ void FastConvertYUVToABGRRow_SSSE3(const uint8* y_buf,
|
||||
}
|
||||
|
||||
__declspec(naked)
|
||||
void FastConvertYUV444ToARGBRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) {
|
||||
void I444ToARGBRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) {
|
||||
__asm {
|
||||
push esi
|
||||
push edi
|
||||
@ -1427,11 +1427,11 @@ void FastConvertYUV444ToARGBRow_SSSE3(const uint8* y_buf,
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef HAS_FASTCONVERTYTOARGBROW_SSE2
|
||||
#ifdef HAS_YTOARGBROW_SSE2
|
||||
__declspec(naked)
|
||||
void FastConvertYToARGBRow_SSE2(const uint8* y_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) {
|
||||
void YToARGBRow_SSE2(const uint8* y_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) {
|
||||
__asm {
|
||||
pcmpeqb xmm4, xmm4 // generate mask 0xff000000
|
||||
pslld xmm4, 24
|
||||
@ -1529,6 +1529,277 @@ __asm {
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef HAS_YUY2TOI420ROW_SSE2
|
||||
__declspec(naked)
|
||||
void YUY2ToYRow_SSE2(const uint8* src_yuy2,
|
||||
uint8* dst_y, int pix) {
|
||||
__asm {
|
||||
mov eax, [esp + 4] // src_yuy2
|
||||
mov edx, [esp + 8] // dst_y
|
||||
mov ecx, [esp + 12] // pix
|
||||
pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
|
||||
psrlw xmm5, 8
|
||||
|
||||
convertloop:
|
||||
movdqa xmm0, [eax]
|
||||
movdqa xmm1, [eax + 16]
|
||||
lea eax, [eax + 32]
|
||||
pand xmm0, xmm5 // even bytes are Y
|
||||
pand xmm1, xmm5
|
||||
packuswb xmm0, xmm1
|
||||
movdqa [edx], xmm0
|
||||
lea edx, [edx + 16]
|
||||
sub ecx, 16
|
||||
ja convertloop
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
__declspec(naked)
|
||||
void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2,
|
||||
uint8* dst_u, uint8* dst_y, int pix) {
|
||||
__asm {
|
||||
push esi
|
||||
push edi
|
||||
mov eax, [esp + 8 + 4] // src_yuy2
|
||||
mov esi, [esp + 8 + 8] // stride_yuy2
|
||||
mov edx, [esp + 8 + 12] // dst_u
|
||||
mov edi, [esp + 8 + 16] // dst_v
|
||||
mov ecx, [esp + 8 + 20] // pix
|
||||
pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
|
||||
psrlw xmm5, 8
|
||||
sub edi, edx
|
||||
|
||||
convertloop:
|
||||
movdqa xmm0, [eax]
|
||||
movdqa xmm1, [eax + 16]
|
||||
movdqa xmm2, [eax + esi]
|
||||
movdqa xmm3, [eax + esi + 16]
|
||||
lea eax, [eax + 32]
|
||||
pavgb xmm0, xmm2
|
||||
pavgb xmm1, xmm3
|
||||
psrlw xmm0, 8 // YUYV -> UVUV
|
||||
psrlw xmm1, 8
|
||||
packuswb xmm0, xmm1
|
||||
movdqa xmm1, xmm0
|
||||
pand xmm0, xmm5 // U
|
||||
packuswb xmm0, xmm0
|
||||
psrlw xmm1, 8 // V
|
||||
packuswb xmm1, xmm1
|
||||
movq qword ptr [edx], xmm0
|
||||
movq qword ptr [edx + edi], xmm1
|
||||
lea edx, [edx + 8]
|
||||
sub ecx, 16
|
||||
ja convertloop
|
||||
|
||||
pop edi
|
||||
pop esi
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
__declspec(naked)
|
||||
void YUY2ToYRow_Unaligned_SSE2(const uint8* src_yuy2,
|
||||
uint8* dst_y, int pix) {
|
||||
__asm {
|
||||
mov eax, [esp + 4] // src_yuy2
|
||||
mov edx, [esp + 8] // dst_y
|
||||
mov ecx, [esp + 12] // pix
|
||||
pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
|
||||
psrlw xmm5, 8
|
||||
|
||||
convertloop:
|
||||
movdqu xmm0, [eax]
|
||||
movdqu xmm1, [eax + 16]
|
||||
lea eax, [eax + 32]
|
||||
pand xmm0, xmm5 // even bytes are Y
|
||||
pand xmm1, xmm5
|
||||
packuswb xmm0, xmm1
|
||||
movdqu [edx], xmm0
|
||||
lea edx, [edx + 16]
|
||||
sub ecx, 16
|
||||
ja convertloop
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
__declspec(naked)
|
||||
void YUY2ToUVRow_Unaligned_SSE2(const uint8* src_yuy2, int stride_yuy2,
|
||||
uint8* dst_u, uint8* dst_y, int pix) {
|
||||
__asm {
|
||||
push esi
|
||||
push edi
|
||||
mov eax, [esp + 8 + 4] // src_yuy2
|
||||
mov esi, [esp + 8 + 8] // stride_yuy2
|
||||
mov edx, [esp + 8 + 12] // dst_u
|
||||
mov edi, [esp + 8 + 16] // dst_v
|
||||
mov ecx, [esp + 8 + 20] // pix
|
||||
pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
|
||||
psrlw xmm5, 8
|
||||
sub edi, edx
|
||||
|
||||
convertloop:
|
||||
movdqu xmm0, [eax]
|
||||
movdqu xmm1, [eax + 16]
|
||||
movdqu xmm2, [eax + esi]
|
||||
movdqu xmm3, [eax + esi + 16]
|
||||
lea eax, [eax + 32]
|
||||
pavgb xmm0, xmm2
|
||||
pavgb xmm1, xmm3
|
||||
psrlw xmm0, 8 // YUYV -> UVUV
|
||||
psrlw xmm1, 8
|
||||
packuswb xmm0, xmm1
|
||||
movdqa xmm1, xmm0
|
||||
pand xmm0, xmm5 // U
|
||||
packuswb xmm0, xmm0
|
||||
psrlw xmm1, 8 // V
|
||||
packuswb xmm1, xmm1
|
||||
movq qword ptr [edx], xmm0
|
||||
movq qword ptr [edx + edi], xmm1
|
||||
lea edx, [edx + 8]
|
||||
sub ecx, 16
|
||||
ja convertloop
|
||||
|
||||
pop edi
|
||||
pop esi
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
__declspec(naked)
|
||||
void UYVYToYRow_SSE2(const uint8* src_uyvy,
|
||||
uint8* dst_y, int pix) {
|
||||
__asm {
|
||||
mov eax, [esp + 4] // src_uyvy
|
||||
mov edx, [esp + 8] // dst_y
|
||||
mov ecx, [esp + 12] // pix
|
||||
|
||||
convertloop:
|
||||
movdqa xmm0, [eax]
|
||||
movdqa xmm1, [eax + 16]
|
||||
lea eax, [eax + 32]
|
||||
psrlw xmm0, 8 // odd bytes are Y
|
||||
psrlw xmm1, 8
|
||||
packuswb xmm0, xmm1
|
||||
movdqa [edx], xmm0
|
||||
lea edx, [edx + 16]
|
||||
sub ecx, 16
|
||||
ja convertloop
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
__declspec(naked)
|
||||
void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy,
|
||||
uint8* dst_u, uint8* dst_y, int pix) {
|
||||
__asm {
|
||||
push esi
|
||||
push edi
|
||||
mov eax, [esp + 8 + 4] // src_yuy2
|
||||
mov esi, [esp + 8 + 8] // stride_yuy2
|
||||
mov edx, [esp + 8 + 12] // dst_u
|
||||
mov edi, [esp + 8 + 16] // dst_v
|
||||
mov ecx, [esp + 8 + 20] // pix
|
||||
pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
|
||||
psrlw xmm5, 8
|
||||
sub edi, edx
|
||||
|
||||
convertloop:
|
||||
movdqa xmm0, [eax]
|
||||
movdqa xmm1, [eax + 16]
|
||||
movdqa xmm2, [eax + esi]
|
||||
movdqa xmm3, [eax + esi + 16]
|
||||
lea eax, [eax + 32]
|
||||
pavgb xmm0, xmm2
|
||||
pavgb xmm1, xmm3
|
||||
pand xmm0, xmm5 // UYVY -> UVUV
|
||||
pand xmm1, xmm5
|
||||
packuswb xmm0, xmm1
|
||||
movdqa xmm1, xmm0
|
||||
pand xmm0, xmm5 // U
|
||||
packuswb xmm0, xmm0
|
||||
psrlw xmm1, 8 // V
|
||||
packuswb xmm1, xmm1
|
||||
movq qword ptr [edx], xmm0
|
||||
movq qword ptr [edx + edi], xmm1
|
||||
lea edx, [edx + 8]
|
||||
sub ecx, 16
|
||||
ja convertloop
|
||||
|
||||
pop edi
|
||||
pop esi
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
__declspec(naked)
|
||||
void UYVYToYRow_Unaligned_SSE2(const uint8* src_uyvy,
|
||||
uint8* dst_y, int pix) {
|
||||
__asm {
|
||||
mov eax, [esp + 4] // src_uyvy
|
||||
mov edx, [esp + 8] // dst_y
|
||||
mov ecx, [esp + 12] // pix
|
||||
|
||||
convertloop:
|
||||
movdqu xmm0, [eax]
|
||||
movdqu xmm1, [eax + 16]
|
||||
lea eax, [eax + 32]
|
||||
psrlw xmm0, 8 // odd bytes are Y
|
||||
psrlw xmm1, 8
|
||||
packuswb xmm0, xmm1
|
||||
movdqu [edx], xmm0
|
||||
lea edx, [edx + 16]
|
||||
sub ecx, 16
|
||||
ja convertloop
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
__declspec(naked)
|
||||
void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy,
|
||||
uint8* dst_u, uint8* dst_y, int pix) {
|
||||
__asm {
|
||||
push esi
|
||||
push edi
|
||||
mov eax, [esp + 8 + 4] // src_yuy2
|
||||
mov esi, [esp + 8 + 8] // stride_yuy2
|
||||
mov edx, [esp + 8 + 12] // dst_u
|
||||
mov edi, [esp + 8 + 16] // dst_v
|
||||
mov ecx, [esp + 8 + 20] // pix
|
||||
pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
|
||||
psrlw xmm5, 8
|
||||
sub edi, edx
|
||||
|
||||
convertloop:
|
||||
movdqu xmm0, [eax]
|
||||
movdqu xmm1, [eax + 16]
|
||||
movdqu xmm2, [eax + esi]
|
||||
movdqu xmm3, [eax + esi + 16]
|
||||
lea eax, [eax + 32]
|
||||
pavgb xmm0, xmm2
|
||||
pavgb xmm1, xmm3
|
||||
pand xmm0, xmm5 // UYVY -> UVUV
|
||||
pand xmm1, xmm5
|
||||
packuswb xmm0, xmm1
|
||||
movdqa xmm1, xmm0
|
||||
pand xmm0, xmm5 // U
|
||||
packuswb xmm0, xmm0
|
||||
psrlw xmm1, 8 // V
|
||||
packuswb xmm1, xmm1
|
||||
movq qword ptr [edx], xmm0
|
||||
movq qword ptr [edx + edi], xmm1
|
||||
lea edx, [edx + 8]
|
||||
sub ecx, 16
|
||||
ja convertloop
|
||||
|
||||
pop edi
|
||||
pop esi
|
||||
ret
|
||||
}
|
||||
}
|
||||
#endif // HAS_YUY2TOI420ROW_SSE2
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user