mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 17:26:49 +08:00
Libyuv MIPS DSPR2 optimizations.
Optimized functions: I444ToARGBRow_DSPR2 I422ToARGB4444Row_DSPR2 I422ToARGB1555Row_DSPR2 NV12ToARGBRow_DSPR2 BGRAToUVRow_DSPR2 BGRAToYRow_DSPR2 ABGRToUVRow_DSPR2 ARGBToYRow_DSPR2 ABGRToYRow_DSPR2 RGBAToUVRow_DSPR2 RGBAToYRow_DSPR2 ARGBToUVRow_DSPR2 RGB24ToARGBRow_DSPR2 RAWToARGBRow_DSPR2 RGB565ToARGBRow_DSPR2 ARGB1555ToARGBRow_DSPR2 ARGB4444ToARGBRow_DSPR2 ScaleAddRow_DSPR2 Bug-fixes in functions: ScaleRowDown2_DSPR2 ScaleRowDown4_DSPR2 BUG= Review-Url: https://codereview.chromium.org/2626123003 .
This commit is contained in:
parent
288bfbefb5
commit
000d2fa91a
@ -364,6 +364,23 @@ extern "C" {
|
||||
#define HAS_MIRRORROW_DSPR2
|
||||
#define HAS_MIRRORUVROW_DSPR2
|
||||
#define HAS_SPLITUVROW_DSPR2
|
||||
#define HAS_RGB24TOARGBROW_DSPR2
|
||||
#define HAS_RAWTOARGBROW_DSPR2
|
||||
#define HAS_RGB565TOARGBROW_DSPR2
|
||||
#define HAS_ARGB1555TOARGBROW_DSPR2
|
||||
#define HAS_ARGB4444TOARGBROW_DSPR2
|
||||
#define HAS_I444TOARGBROW_DSPR2
|
||||
#define HAS_I422TOARGB4444ROW_DSPR2
|
||||
#define HAS_I422TOARGB1555ROW_DSPR2
|
||||
#define HAS_NV12TOARGBROW_DSPR2
|
||||
#define HAS_BGRATOUVROW_DSPR2
|
||||
#define HAS_BGRATOYROW_DSPR2
|
||||
#define HAS_ABGRTOUVROW_DSPR2
|
||||
#define HAS_ARGBTOYROW_DSPR2
|
||||
#define HAS_ABGRTOYROW_DSPR2
|
||||
#define HAS_RGBATOUVROW_DSPR2
|
||||
#define HAS_RGBATOYROW_DSPR2
|
||||
#define HAS_ARGBTOUVROW_DSPR2
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@ -660,6 +677,30 @@ void UYVYToARGBRow_NEON(const uint8* src_uyvy,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I444ToARGBRow_DSPR2(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422ToARGB4444Row_DSPR2(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb4444,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422ToARGB1555Row_DSPR2(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb1555,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void NV12ToARGBRow_DSPR2(const uint8* src_y,
|
||||
const uint8* src_uv,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
|
||||
void I422ToARGBRow_MSA(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
@ -789,6 +830,30 @@ void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int width);
|
||||
void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int width);
|
||||
void ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int width);
|
||||
void ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int width);
|
||||
void BGRAToUVRow_DSPR2(const uint8* src_bgra,
|
||||
int src_stride_bgra,
|
||||
uint8* dst_u,
|
||||
uint8* dst_v,
|
||||
int width);
|
||||
void BGRAToYRow_DSPR2(const uint8* src_bgra, uint8* dst_y, int width);
|
||||
void ABGRToUVRow_DSPR2(const uint8* src_abgr,
|
||||
int src_stride_abgr,
|
||||
uint8* dst_u,
|
||||
uint8* dst_v,
|
||||
int width);
|
||||
void ARGBToYRow_DSPR2(const uint8* src_argb, uint8* dst_y, int width);
|
||||
void ABGRToYRow_DSPR2(const uint8* src_abgr, uint8* dst_y, int width);
|
||||
void RGBAToUVRow_DSPR2(const uint8* src_rgba,
|
||||
int src_stride_rgba,
|
||||
uint8* dst_u,
|
||||
uint8* dst_v,
|
||||
int width);
|
||||
void RGBAToYRow_DSPR2(const uint8* src_rgba, uint8* dst_y, int width);
|
||||
void ARGBToUVRow_DSPR2(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_u,
|
||||
uint8* dst_v,
|
||||
int width);
|
||||
void ARGBToYRow_C(const uint8* src_argb, uint8* dst_y, int width);
|
||||
void ARGBToYJRow_C(const uint8* src_argb, uint8* dst_y, int width);
|
||||
void BGRAToYRow_C(const uint8* src_bgra, uint8* dst_y, int width);
|
||||
@ -817,6 +882,10 @@ void RGB565ToYRow_Any_NEON(const uint8* src_rgb565, uint8* dst_y, int width);
|
||||
void ARGB1555ToYRow_Any_NEON(const uint8* src_argb1555,
|
||||
uint8* dst_y,
|
||||
int width);
|
||||
void BGRAToYRow_Any_DSPR2(const uint8* src_bgra, uint8* dst_y, int width);
|
||||
void ARGBToYRow_Any_DSPR2(const uint8* src_argb, uint8* dst_y, int width);
|
||||
void ABGRToYRow_Any_DSPR2(const uint8* src_abgr, uint8* dst_y, int width);
|
||||
void RGBAToYRow_Any_DSPR2(const uint8* src_rgba, uint8* dst_y, int width);
|
||||
void ARGB4444ToYRow_Any_NEON(const uint8* src_argb4444,
|
||||
uint8* dst_y,
|
||||
int width);
|
||||
@ -955,6 +1024,36 @@ void ARGB4444ToUVRow_Any_NEON(const uint8* src_argb4444,
|
||||
uint8* dst_u,
|
||||
uint8* dst_v,
|
||||
int width);
|
||||
void BGRAToUVRow_Any_DSPR2(const uint8* src_bgra,
|
||||
int src_stride_bgra,
|
||||
uint8* dst_u,
|
||||
uint8* dst_v,
|
||||
int width);
|
||||
void ABGRToUVRow_Any_DSPR2(const uint8* src_abgr,
|
||||
int src_stride_abgr,
|
||||
uint8* dst_u,
|
||||
uint8* dst_v,
|
||||
int width);
|
||||
void RGBAToUVRow_Any_DSPR2(const uint8* src_rgba,
|
||||
int src_stride_rgba,
|
||||
uint8* dst_u,
|
||||
uint8* dst_v,
|
||||
int width);
|
||||
void ARGBToUVRow_Any_DSPR2(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_u,
|
||||
uint8* dst_v,
|
||||
int width);
|
||||
void ARGBToUVRow_C(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_u,
|
||||
uint8* dst_v,
|
||||
int width);
|
||||
void ARGBToUVJRow_C(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_u,
|
||||
uint8* dst_v,
|
||||
int width);
|
||||
void ARGBToUVRow_C(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_u,
|
||||
@ -1251,6 +1350,15 @@ void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555,
|
||||
void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444,
|
||||
uint8* dst_argb,
|
||||
int width);
|
||||
void RGB24ToARGBRow_DSPR2(const uint8* src_rgb24, uint8* dst_argb, int width);
|
||||
void RAWToARGBRow_DSPR2(const uint8* src_raw, uint8* dst_argb, int width);
|
||||
void RGB565ToARGBRow_DSPR2(const uint8* src_rgb565, uint8* dst_argb, int width);
|
||||
void ARGB1555ToARGBRow_DSPR2(const uint8* src_argb1555,
|
||||
uint8* dst_argb,
|
||||
int width);
|
||||
void ARGB4444ToARGBRow_DSPR2(const uint8* src_argb4444,
|
||||
uint8* dst_argb,
|
||||
int width);
|
||||
void ARGB4444ToARGBRow_MSA(const uint8* src_argb4444,
|
||||
uint8* dst_argb,
|
||||
int width);
|
||||
@ -1299,6 +1407,20 @@ void ARGB1555ToARGBRow_Any_NEON(const uint8* src_argb1555,
|
||||
void ARGB4444ToARGBRow_Any_NEON(const uint8* src_argb4444,
|
||||
uint8* dst_argb,
|
||||
int width);
|
||||
void RGB24ToARGBRow_Any_DSPR2(const uint8* src_rgb24,
|
||||
uint8* dst_argb,
|
||||
int width);
|
||||
void RAWToARGBRow_Any_DSPR2(const uint8* src_raw, uint8* dst_argb, int width);
|
||||
void RGB565ToARGBRow_Any_DSPR2(const uint8* src_rgb565,
|
||||
uint8* dst_argb,
|
||||
int width);
|
||||
void ARGB1555ToARGBRow_Any_DSPR2(const uint8* src_argb1555,
|
||||
uint8* dst_argb,
|
||||
int width);
|
||||
void ARGB4444ToARGBRow_Any_DSPR2(const uint8* src_argb4444,
|
||||
uint8* dst_argb,
|
||||
int width);
|
||||
|
||||
void ARGB4444ToARGBRow_Any_MSA(const uint8* src_argb4444,
|
||||
uint8* dst_argb,
|
||||
int width);
|
||||
@ -2042,12 +2164,47 @@ void UYVYToARGBRow_Any_NEON(const uint8* src_uyvy,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I444ToARGBRow_Any_DSPR2(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422ToARGB4444Row_Any_DSPR2(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422ToARGBRow_Any_DSPR2(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422ToARGBRow_DSPR2(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422ToARGB1555Row_Any_DSPR2(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I411ToARGBRow_Any_DSPR2(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void NV12ToARGBRow_Any_DSPR2(const uint8* src_y,
|
||||
const uint8* src_uv,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I422ToARGBRow_DSPR2(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
|
||||
@ -101,6 +101,7 @@ extern "C" {
|
||||
#define HAS_SCALEROWDOWN4_DSPR2
|
||||
#define HAS_SCALEROWDOWN34_DSPR2
|
||||
#define HAS_SCALEROWDOWN38_DSPR2
|
||||
#define HAS_SCALEADDROW_DSPR2
|
||||
#endif
|
||||
|
||||
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
|
||||
@ -846,6 +847,10 @@ void ScaleRowDown38_3_Box_DSPR2(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
void ScaleAddRow_DSPR2(const uint8* src_ptr, uint16* dst_ptr, int src_width);
|
||||
void ScaleAddRow_Any_DSPR2(const uint8* src_ptr,
|
||||
uint16* dst_ptr,
|
||||
int src_width);
|
||||
|
||||
void ScaleRowDown2_MSA(const uint8_t* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
@ -885,9 +890,9 @@ void ScaleRowDown2_Any_MSA(const uint8_t* src_ptr,
|
||||
uint8_t* dst,
|
||||
int dst_width);
|
||||
void ScaleRowDown2Linear_Any_MSA(const uint8_t* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_width);
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_width);
|
||||
void ScaleRowDown2Box_Any_MSA(const uint8_t* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8_t* dst,
|
||||
|
||||
@ -579,6 +579,14 @@ int ARGBToI420(const uint8* src_argb,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2)) {
|
||||
ARGBToYRow = ARGBToYRow_Any_DSPR2;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGBToYRow = ARGBToYRow_DSPR2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_MSA)
|
||||
if (TestCpuFlag(kCpuHasMSA)) {
|
||||
ARGBToYRow = ARGBToYRow_Any_MSA;
|
||||
@ -587,6 +595,14 @@ int ARGBToI420(const uint8* src_argb,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOUVROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2)) {
|
||||
ARGBToUVRow = ARGBToUVRow_Any_DSPR2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUVRow = ARGBToUVRow_DSPR2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOUVROW_MSA)
|
||||
if (TestCpuFlag(kCpuHasMSA)) {
|
||||
ARGBToUVRow = ARGBToUVRow_Any_MSA;
|
||||
@ -664,6 +680,22 @@ int BGRAToI420(const uint8* src_bgra,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_BGRATOYROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2)) {
|
||||
BGRAToYRow = BGRAToYRow_Any_DSPR2;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
BGRAToYRow = BGRAToYRow_DSPR2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_BGRATOUVROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2)) {
|
||||
BGRAToUVRow = BGRAToUVRow_Any_DSPR2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
BGRAToUVRow = BGRAToUVRow_DSPR2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height - 1; y += 2) {
|
||||
BGRAToUVRow(src_bgra, src_stride_bgra, dst_u, dst_v, width);
|
||||
@ -733,6 +765,22 @@ int ABGRToI420(const uint8* src_abgr,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ABGRTOYROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2)) {
|
||||
ABGRToYRow = ABGRToYRow_Any_DSPR2;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ABGRToYRow = ABGRToYRow_DSPR2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ABGRTOUVROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2)) {
|
||||
ABGRToUVRow = ABGRToUVRow_Any_DSPR2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ABGRToUVRow = ABGRToUVRow_DSPR2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height - 1; y += 2) {
|
||||
ABGRToUVRow(src_abgr, src_stride_abgr, dst_u, dst_v, width);
|
||||
@ -802,6 +850,22 @@ int RGBAToI420(const uint8* src_rgba,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_RGBATOYROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2)) {
|
||||
RGBAToYRow = RGBAToYRow_Any_DSPR2;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
RGBAToYRow = RGBAToYRow_DSPR2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_RGBATOUVROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2)) {
|
||||
RGBAToUVRow = RGBAToUVRow_Any_DSPR2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
RGBAToUVRow = RGBAToUVRow_DSPR2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height - 1; y += 2) {
|
||||
RGBAToUVRow(src_rgba, src_stride_rgba, dst_u, dst_v, width);
|
||||
@ -1014,6 +1078,14 @@ int RAWToI420(const uint8* src_raw,
|
||||
ARGBToYRow = ARGBToYRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_RAWTOARGBROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2)) {
|
||||
RAWToARGBRow = RAWToARGBRow_Any_DSPR2;
|
||||
if (IS_ALIGNED(width, 4)) {
|
||||
RAWToARGBRow = RAWToARGBRow_DSPR2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
{
|
||||
// Allocate 2 rows of ARGB.
|
||||
@ -1142,6 +1214,14 @@ int RGB565ToI420(const uint8* src_rgb565,
|
||||
ARGBToYRow = ARGBToYRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_RGB565TOARGBROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2)) {
|
||||
RGB565ToARGBRow = RGB565ToARGBRow_Any_DSPR2;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
RGB565ToARGBRow = RGB565ToARGBRow_DSPR2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
{
|
||||
// Allocate 2 rows of ARGB.
|
||||
|
||||
@ -485,6 +485,14 @@ static int I444ToARGBMatrix(const uint8* src_y,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I444TOARGBROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2)) {
|
||||
I444ToARGBRow = I444ToARGBRow_Any_DSPR2;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I444ToARGBRow = I444ToARGBRow_DSPR2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
I444ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width);
|
||||
@ -946,6 +954,14 @@ int RGB24ToARGB(const uint8* src_rgb24,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_RGB24TOARGBROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2)) {
|
||||
RGB24ToARGBRow = RGB24ToARGBRow_Any_DSPR2;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
RGB24ToARGBRow = RGB24ToARGBRow_DSPR2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
RGB24ToARGBRow(src_rgb24, dst_argb, width);
|
||||
@ -997,6 +1013,14 @@ int RAWToARGB(const uint8* src_raw,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_RAWTOARGBROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2)) {
|
||||
RAWToARGBRow = RAWToARGBRow_Any_DSPR2;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
RAWToARGBRow = RAWToARGBRow_DSPR2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
RAWToARGBRow(src_raw, dst_argb, width);
|
||||
@ -1056,6 +1080,14 @@ int RGB565ToARGB(const uint8* src_rgb565,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_RGB565TOARGBROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2)) {
|
||||
RGB565ToARGBRow = RGB565ToARGBRow_Any_DSPR2;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
RGB565ToARGBRow = RGB565ToARGBRow_DSPR2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
RGB565ToARGBRow(src_rgb565, dst_argb, width);
|
||||
@ -1115,6 +1147,14 @@ int ARGB1555ToARGB(const uint8* src_argb1555,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGB1555TOARGBROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2)) {
|
||||
ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_DSPR2;
|
||||
if (IS_ALIGNED(width, 4)) {
|
||||
ARGB1555ToARGBRow = ARGB1555ToARGBRow_DSPR2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
ARGB1555ToARGBRow(src_argb1555, dst_argb, width);
|
||||
@ -1174,6 +1214,14 @@ int ARGB4444ToARGB(const uint8* src_argb4444,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGB4444TOARGBROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2)) {
|
||||
ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_DSPR2;
|
||||
if (IS_ALIGNED(width, 4)) {
|
||||
ARGB4444ToARGBRow = ARGB4444ToARGBRow_DSPR2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGB4444TOARGBROW_MSA)
|
||||
if (TestCpuFlag(kCpuHasMSA)) {
|
||||
ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_MSA;
|
||||
@ -1238,6 +1286,14 @@ int NV12ToARGB(const uint8* src_y,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_NV12TOARGBROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2)) {
|
||||
NV12ToARGBRow = NV12ToARGBRow_Any_DSPR2;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
NV12ToARGBRow = NV12ToARGBRow_DSPR2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
NV12ToARGBRow(src_y, src_uv, dst_argb, &kYuvI601Constants, width);
|
||||
@ -1354,6 +1410,14 @@ int M420ToARGB(const uint8* src_m420,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_NV12TOARGBROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2)) {
|
||||
NV12ToARGBRow = NV12ToARGBRow_Any_DSPR2;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
NV12ToARGBRow = NV12ToARGBRow_DSPR2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height - 1; y += 2) {
|
||||
NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb,
|
||||
|
||||
@ -708,6 +708,14 @@ int I420ToARGB1555(const uint8* src_y,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOARGB1555ROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2)) {
|
||||
I422ToARGB1555Row = I422ToARGB1555Row_Any_DSPR2;
|
||||
if (IS_ALIGNED(width, 4)) {
|
||||
I422ToARGB1555Row = I422ToARGB1555Row_DSPR2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOARGB1555ROW_MSA)
|
||||
if (TestCpuFlag(kCpuHasMSA)) {
|
||||
I422ToARGB1555Row = I422ToARGB1555Row_Any_MSA;
|
||||
@ -781,6 +789,14 @@ int I420ToARGB4444(const uint8* src_y,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOARGB4444ROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2)) {
|
||||
I422ToARGB4444Row = I422ToARGB4444Row_Any_DSPR2;
|
||||
if (IS_ALIGNED(width, 4)) {
|
||||
I422ToARGB4444Row = I422ToARGB4444Row_DSPR2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOARGB4444ROW_MSA)
|
||||
if (TestCpuFlag(kCpuHasMSA)) {
|
||||
I422ToARGB4444Row = I422ToARGB4444Row_Any_MSA;
|
||||
|
||||
@ -100,6 +100,14 @@ int ARGBToI444(const uint8* src_argb,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2)) {
|
||||
ARGBToYRow = ARGBToYRow_Any_DSPR2;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGBToYRow = ARGBToYRow_DSPR2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_MSA)
|
||||
if (TestCpuFlag(kCpuHasMSA)) {
|
||||
ARGBToYRow = ARGBToYRow_Any_MSA;
|
||||
@ -189,6 +197,23 @@ int ARGBToI422(const uint8* src_argb,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2)) {
|
||||
ARGBToYRow = ARGBToYRow_Any_DSPR2;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGBToYRow = ARGBToYRow_DSPR2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOUVROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2)) {
|
||||
ARGBToUVRow = ARGBToUVRow_Any_DSPR2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUVRow = ARGBToUVRow_DSPR2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(HAS_ARGBTOYROW_MSA)
|
||||
if (TestCpuFlag(kCpuHasMSA)) {
|
||||
ARGBToYRow = ARGBToYRow_Any_MSA;
|
||||
@ -318,6 +343,22 @@ int ARGBToNV12(const uint8* src_argb,
|
||||
MergeUVRow_ = MergeUVRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2)) {
|
||||
ARGBToYRow = ARGBToYRow_Any_DSPR2;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGBToYRow = ARGBToYRow_DSPR2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOUVROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2)) {
|
||||
ARGBToUVRow = ARGBToUVRow_Any_DSPR2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUVRow = ARGBToUVRow_DSPR2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
{
|
||||
// Allocate a rows of uv.
|
||||
@ -445,6 +486,22 @@ int ARGBToNV21(const uint8* src_argb,
|
||||
MergeUVRow_ = MergeUVRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2)) {
|
||||
ARGBToYRow = ARGBToYRow_Any_DSPR2;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGBToYRow = ARGBToYRow_DSPR2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOUVROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2)) {
|
||||
ARGBToUVRow = ARGBToUVRow_Any_DSPR2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUVRow = ARGBToUVRow_DSPR2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
{
|
||||
// Allocate a rows of uv.
|
||||
@ -570,6 +627,22 @@ int ARGBToYUY2(const uint8* src_argb,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2)) {
|
||||
ARGBToYRow = ARGBToYRow_Any_DSPR2;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGBToYRow = ARGBToYRow_DSPR2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOUVROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2)) {
|
||||
ARGBToUVRow = ARGBToUVRow_Any_DSPR2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUVRow = ARGBToUVRow_DSPR2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOYUY2ROW_MSA)
|
||||
if (TestCpuFlag(kCpuHasMSA)) {
|
||||
I422ToYUY2Row = I422ToYUY2Row_Any_MSA;
|
||||
@ -698,6 +771,22 @@ int ARGBToUYVY(const uint8* src_argb,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2)) {
|
||||
ARGBToYRow = ARGBToYRow_Any_DSPR2;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGBToYRow = ARGBToYRow_DSPR2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOUVROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2)) {
|
||||
ARGBToUVRow = ARGBToUVRow_Any_DSPR2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUVRow = ARGBToUVRow_DSPR2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOUYVYROW_MSA)
|
||||
if (TestCpuFlag(kCpuHasMSA)) {
|
||||
I422ToUYVYRow = I422ToUYVYRow_Any_MSA;
|
||||
@ -775,6 +864,14 @@ int ARGBToI400(const uint8* src_argb,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2)) {
|
||||
ARGBToYRow = ARGBToYRow_Any_DSPR2;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGBToYRow = ARGBToYRow_DSPR2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_MSA)
|
||||
if (TestCpuFlag(kCpuHasMSA)) {
|
||||
ARGBToYRow = ARGBToYRow_Any_MSA;
|
||||
|
||||
@ -167,6 +167,12 @@ ANY31C(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, 1, 0, 2, 7)
|
||||
ANY31C(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, 1, 0, 2, 7)
|
||||
ANY31C(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, 1, 0, 2, 7)
|
||||
#endif
|
||||
#ifdef HAS_I422TOARGBROW_DSPR2
|
||||
ANY31C(I444ToARGBRow_Any_DSPR2, I444ToARGBRow_DSPR2, 0, 0, 4, 7)
|
||||
ANY31C(I422ToARGBRow_Any_DSPR2, I422ToARGBRow_DSPR2, 1, 0, 4, 7)
|
||||
ANY31C(I422ToARGB4444Row_Any_DSPR2, I422ToARGB4444Row_DSPR2, 1, 0, 2, 7)
|
||||
ANY31C(I422ToARGB1555Row_Any_DSPR2, I422ToARGB1555Row_DSPR2, 1, 0, 2, 7)
|
||||
#endif
|
||||
#ifdef HAS_I422TOARGBROW_MSA
|
||||
ANY31C(I422ToARGBRow_Any_MSA, I422ToARGBRow_MSA, 1, 0, 4, 7)
|
||||
ANY31C(I422ToRGBARow_Any_MSA, I422ToRGBARow_MSA, 1, 0, 4, 7)
|
||||
@ -291,6 +297,9 @@ ANY21C(NV12ToARGBRow_Any_AVX2, NV12ToARGBRow_AVX2, 1, 1, 2, 4, 15)
|
||||
#ifdef HAS_NV12TOARGBROW_NEON
|
||||
ANY21C(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, 1, 1, 2, 4, 7)
|
||||
#endif
|
||||
#ifdef HAS_NV12TOARGBROW_DSPR2
|
||||
ANY21C(NV12ToARGBRow_Any_DSPR2, NV12ToARGBRow_DSPR2, 1, 1, 2, 4, 7)
|
||||
#endif
|
||||
#ifdef HAS_NV21TOARGBROW_SSSE3
|
||||
ANY21C(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_SSSE3, 1, 1, 2, 4, 7)
|
||||
#endif
|
||||
@ -484,6 +493,33 @@ ANY11(ARGB1555ToARGBRow_Any_NEON, ARGB1555ToARGBRow_NEON, 0, 2, 4, 7)
|
||||
#ifdef HAS_ARGB4444TOARGBROW_NEON
|
||||
ANY11(ARGB4444ToARGBRow_Any_NEON, ARGB4444ToARGBRow_NEON, 0, 2, 4, 7)
|
||||
#endif
|
||||
#ifdef HAS_RGB24TOARGBROW_DSPR2
|
||||
ANY11(RGB24ToARGBRow_Any_DSPR2, RGB24ToARGBRow_DSPR2, 0, 3, 4, 7)
|
||||
#endif
|
||||
#ifdef HAS_RAWTOARGBROW_DSPR2
|
||||
ANY11(RAWToARGBRow_Any_DSPR2, RAWToARGBRow_DSPR2, 0, 3, 4, 7)
|
||||
#endif
|
||||
#ifdef HAS_RGB565TOARGBROW_DSPR2
|
||||
ANY11(RGB565ToARGBRow_Any_DSPR2, RGB565ToARGBRow_DSPR2, 0, 2, 4, 7)
|
||||
#endif
|
||||
#ifdef HAS_ARGB1555TOARGBROW_DSPR2
|
||||
ANY11(ARGB1555ToARGBRow_Any_DSPR2, ARGB1555ToARGBRow_DSPR2, 0, 2, 4, 7)
|
||||
#endif
|
||||
#ifdef HAS_ARGB4444TOARGBROW_DSPR2
|
||||
ANY11(ARGB4444ToARGBRow_Any_DSPR2, ARGB4444ToARGBRow_DSPR2, 0, 2, 4, 7)
|
||||
#endif
|
||||
#ifdef HAS_BGRATOYROW_DSPR2
|
||||
ANY11(BGRAToYRow_Any_DSPR2, BGRAToYRow_DSPR2, 0, 4, 1, 7)
|
||||
#endif
|
||||
#ifdef HAS_ARGBTOYROW_DSPR2
|
||||
ANY11(ARGBToYRow_Any_DSPR2, ARGBToYRow_DSPR2, 0, 4, 1, 7)
|
||||
#endif
|
||||
#ifdef HAS_ABGRTOYROW_DSPR2
|
||||
ANY11(ABGRToYRow_Any_DSPR2, ABGRToYRow_DSPR2, 0, 4, 1, 7)
|
||||
#endif
|
||||
#ifdef HAS_RGBATOYROW_DSPR2
|
||||
ANY11(RGBAToYRow_Any_DSPR2, RGBAToYRow_DSPR2, 0, 4, 1, 7)
|
||||
#endif
|
||||
#ifdef HAS_ARGB4444TOARGBROW_MSA
|
||||
ANY11(ARGB4444ToARGBRow_Any_MSA, ARGB4444ToARGBRow_MSA, 0, 2, 4, 15)
|
||||
#endif
|
||||
@ -904,6 +940,18 @@ ANY12S(YUY2ToUVRow_Any_NEON, YUY2ToUVRow_NEON, 1, 4, 15)
|
||||
#ifdef HAS_UYVYTOUVROW_NEON
|
||||
ANY12S(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, 1, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_BGRATOUVROW_DSPR2
|
||||
ANY12S(BGRAToUVRow_Any_DSPR2, BGRAToUVRow_DSPR2, 0, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_ABGRTOUVROW_DSPR2
|
||||
ANY12S(ABGRToUVRow_Any_DSPR2, ABGRToUVRow_DSPR2, 0, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_RGBATOUVROW_DSPR2
|
||||
ANY12S(RGBAToUVRow_Any_DSPR2, RGBAToUVRow_DSPR2, 0, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_ARGBTOUVROW_DSPR2
|
||||
ANY12S(ARGBToUVRow_Any_DSPR2, ARGBToUVRow_DSPR2, 0, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_YUY2TOUVROW_MSA
|
||||
ANY12S(YUY2ToUVRow_Any_MSA, YUY2ToUVRow_MSA, 1, 4, 31)
|
||||
#endif
|
||||
|
||||
@ -202,8 +202,9 @@ void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
|
||||
uint8 b1 = src_argb[4] >> 3;
|
||||
uint8 g1 = src_argb[5] >> 2;
|
||||
uint8 r1 = src_argb[6] >> 3;
|
||||
WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) |
|
||||
(r1 << 27));
|
||||
WRITEWORD(
|
||||
dst_rgb,
|
||||
b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) | (r1 << 27));
|
||||
dst_rgb += 4;
|
||||
src_argb += 8;
|
||||
}
|
||||
@ -237,8 +238,9 @@ void ARGBToRGB565DitherRow_C(const uint8* src_argb,
|
||||
uint8 b1 = clamp255(src_argb[4] + dither1) >> 3;
|
||||
uint8 g1 = clamp255(src_argb[5] + dither1) >> 2;
|
||||
uint8 r1 = clamp255(src_argb[6] + dither1) >> 3;
|
||||
WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) |
|
||||
(r1 << 27));
|
||||
WRITEWORD(
|
||||
dst_rgb,
|
||||
b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) | (r1 << 27));
|
||||
dst_rgb += 4;
|
||||
src_argb += 8;
|
||||
}
|
||||
|
||||
1190
source/row_mips.cc
1190
source/row_mips.cc
File diff suppressed because it is too large
Load Diff
@ -2032,7 +2032,7 @@ __declspec(naked) void RGBAToUVRow_SSSE3(const uint8* src_argb0,
|
||||
__asm vpsraw ymm2, ymm2, 6 \
|
||||
__asm vpackuswb ymm0, ymm0, ymm0 /* B */ \
|
||||
__asm vpackuswb ymm1, ymm1, ymm1 /* G */ \
|
||||
__asm vpackuswb ymm2, ymm2, ymm2 /* R */ \
|
||||
__asm vpackuswb ymm2, ymm2, ymm2 /* R */ \
|
||||
}
|
||||
|
||||
// Store 16 ARGB values.
|
||||
|
||||
@ -894,6 +894,14 @@ static void ScalePlaneBox(int src_width,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SCALEADDROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2)) {
|
||||
ScaleAddRow = ScaleAddRow_Any_DSPR2;
|
||||
if (IS_ALIGNED(src_width, 16)) {
|
||||
ScaleAddRow = ScaleAddRow_DSPR2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (j = 0; j < dst_height; ++j) {
|
||||
int boxheight;
|
||||
|
||||
@ -421,6 +421,9 @@ SAANY(ScaleAddRow_Any_NEON, ScaleAddRow_NEON, ScaleAddRow_C, 15)
|
||||
#ifdef HAS_SCALEADDROW_MSA
|
||||
SAANY(ScaleAddRow_Any_MSA, ScaleAddRow_MSA, ScaleAddRow_C, 15)
|
||||
#endif
|
||||
#ifdef HAS_SCALEADDROW_DSPR2
|
||||
SAANY(ScaleAddRow_Any_DSPR2, ScaleAddRow_DSPR2, ScaleAddRow_C, 15)
|
||||
#endif
|
||||
#undef SAANY
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
@ -42,10 +42,10 @@ void ScaleRowDown2_DSPR2(const uint8* src_ptr,
|
||||
"lw $t6, 24(%[src_ptr]) \n" // |27|26|25|24|
|
||||
"lw $t7, 28(%[src_ptr]) \n" // |31|30|29|28|
|
||||
// TODO(fbarchard): Use odd pixels instead of even.
|
||||
"precr.qb.ph $t8, $t1, $t0 \n" // |6|4|2|0|
|
||||
"precr.qb.ph $t0, $t3, $t2 \n" // |14|12|10|8|
|
||||
"precr.qb.ph $t1, $t5, $t4 \n" // |22|20|18|16|
|
||||
"precr.qb.ph $t2, $t7, $t6 \n" // |30|28|26|24|
|
||||
"precrq.qb.ph $t8, $t1, $t0 \n" // |7|5|3|1|
|
||||
"precrq.qb.ph $t0, $t3, $t2 \n" // |15|13|11|9|
|
||||
"precrq.qb.ph $t1, $t5, $t4 \n" // |23|21|19|17|
|
||||
"precrq.qb.ph $t2, $t7, $t6 \n" // |31|29|27|25|
|
||||
"addiu %[src_ptr], %[src_ptr], 32 \n"
|
||||
"addiu $t9, $t9, -1 \n"
|
||||
"sw $t8, 0(%[dst]) \n"
|
||||
@ -61,7 +61,7 @@ void ScaleRowDown2_DSPR2(const uint8* src_ptr,
|
||||
" nop \n"
|
||||
|
||||
"21: \n"
|
||||
"lbu $t0, 0(%[src_ptr]) \n"
|
||||
"lbu $t0, 1(%[src_ptr]) \n"
|
||||
"addiu %[src_ptr], %[src_ptr], 2 \n"
|
||||
"addiu $t9, $t9, -1 \n"
|
||||
"sb $t0, 0(%[dst]) \n"
|
||||
@ -198,8 +198,8 @@ void ScaleRowDown4_DSPR2(const uint8* src_ptr,
|
||||
"precr.qb.ph $t2, $t4, $t3 \n" // |14|12|10|8|
|
||||
"precr.qb.ph $t5, $t6, $t5 \n" // |22|20|18|16|
|
||||
"precr.qb.ph $t6, $t8, $t7 \n" // |30|28|26|24|
|
||||
"precr.qb.ph $t1, $t2, $t1 \n" // |12|8|4|0|
|
||||
"precr.qb.ph $t5, $t6, $t5 \n" // |28|24|20|16|
|
||||
"precrq.qb.ph $t1, $t2, $t1 \n" // |14|10|6|2|
|
||||
"precrq.qb.ph $t5, $t6, $t5 \n" // |30|26|22|18|
|
||||
"addiu %[src_ptr], %[src_ptr], 32 \n"
|
||||
"addiu $t9, $t9, -1 \n"
|
||||
"sw $t1, 0(%[dst]) \n"
|
||||
@ -213,7 +213,7 @@ void ScaleRowDown4_DSPR2(const uint8* src_ptr,
|
||||
" nop \n"
|
||||
|
||||
"21: \n"
|
||||
"lbu $t1, 0(%[src_ptr]) \n"
|
||||
"lbu $t1, 2(%[src_ptr]) \n"
|
||||
"addiu %[src_ptr], %[src_ptr], 4 \n"
|
||||
"addiu $t9, $t9, -1 \n"
|
||||
"sb $t1, 0(%[dst]) \n"
|
||||
@ -615,6 +615,51 @@ void ScaleRowDown38_3_Box_DSPR2(const uint8* src_ptr,
|
||||
: "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8");
|
||||
}
|
||||
|
||||
void ScaleAddRow_DSPR2(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
|
||||
int x;
|
||||
for (x = 0; x < ((src_width - 1)); x += 8) {
|
||||
uint32 tmp_t1, tmp_t2, tmp_t3, tmp_t4;
|
||||
uint32 tmp_t5, tmp_t6, tmp_t7, tmp_t8;
|
||||
__asm__ __volatile__(
|
||||
".set push \n"
|
||||
".set noreorder \n"
|
||||
"lw %[tmp_t5], 0(%[src_ptr]) \n"
|
||||
"lw %[tmp_t6], 4(%[src_ptr]) \n"
|
||||
"lw %[tmp_t1], 0(%[dst_ptr]) \n"
|
||||
"lw %[tmp_t2], 4(%[dst_ptr]) \n"
|
||||
"lw %[tmp_t3], 8(%[dst_ptr]) \n"
|
||||
"lw %[tmp_t4], 12(%[dst_ptr]) \n"
|
||||
"preceu.ph.qbr %[tmp_t7], %[tmp_t5] \n"
|
||||
"preceu.ph.qbl %[tmp_t8], %[tmp_t5] \n"
|
||||
"addu.ph %[tmp_t1], %[tmp_t1], %[tmp_t7] \n"
|
||||
"addu.ph %[tmp_t2], %[tmp_t2], %[tmp_t8] \n"
|
||||
"preceu.ph.qbr %[tmp_t7], %[tmp_t6] \n"
|
||||
"preceu.ph.qbl %[tmp_t8], %[tmp_t6] \n"
|
||||
"addu.ph %[tmp_t3], %[tmp_t3], %[tmp_t7] \n"
|
||||
"addu.ph %[tmp_t4], %[tmp_t4], %[tmp_t8] \n"
|
||||
"sw %[tmp_t1], 0(%[dst_ptr]) \n"
|
||||
"sw %[tmp_t2], 4(%[dst_ptr]) \n"
|
||||
"sw %[tmp_t3], 8(%[dst_ptr]) \n"
|
||||
"sw %[tmp_t4], 12(%[dst_ptr]) \n"
|
||||
".set pop \n"
|
||||
:
|
||||
[tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2), [tmp_t3] "=&r"(tmp_t3),
|
||||
[tmp_t4] "=&r"(tmp_t4), [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6),
|
||||
[tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), [src_ptr] "+r"(src_ptr)
|
||||
: [dst_ptr] "r"(dst_ptr));
|
||||
src_ptr += 8;
|
||||
dst_ptr += 8;
|
||||
}
|
||||
|
||||
if ((src_width)&7) {
|
||||
for (x = 0; x < ((src_width - 1) & 7); x += 1) {
|
||||
dst_ptr[0] += src_ptr[0];
|
||||
src_ptr += 1;
|
||||
dst_ptr += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif // defined(__mips_dsp) && (__mips_dsp_rev >= 2)
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
@ -30,107 +30,113 @@ namespace libyuv {
|
||||
|
||||
#define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a))
|
||||
|
||||
#define TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
|
||||
FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, OFF) \
|
||||
TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \
|
||||
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
|
||||
const int kHeight = benchmark_height_; \
|
||||
align_buffer_page_end(src_y, kWidth* kHeight + OFF); \
|
||||
align_buffer_page_end(src_u, SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \
|
||||
SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \
|
||||
OFF); \
|
||||
align_buffer_page_end(src_v, SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \
|
||||
SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \
|
||||
OFF); \
|
||||
align_buffer_page_end(dst_y_c, kWidth* kHeight); \
|
||||
align_buffer_page_end(dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \
|
||||
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
align_buffer_page_end(dst_v_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \
|
||||
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
align_buffer_page_end(dst_y_opt, kWidth* kHeight); \
|
||||
align_buffer_page_end(dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \
|
||||
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
align_buffer_page_end(dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \
|
||||
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
for (int i = 0; i < kHeight; ++i) \
|
||||
for (int j = 0; j < kWidth; ++j) \
|
||||
src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \
|
||||
for (int i = 0; i < SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); ++i) { \
|
||||
for (int j = 0; j < SUBSAMPLE(kWidth, SRC_SUBSAMP_X); ++j) { \
|
||||
src_u[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] = \
|
||||
(fastrand() & 0xff); \
|
||||
src_v[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] = \
|
||||
(fastrand() & 0xff); \
|
||||
} \
|
||||
} \
|
||||
memset(dst_y_c, 1, kWidth* kHeight); \
|
||||
memset(dst_u_c, 2, \
|
||||
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
memset(dst_v_c, 3, \
|
||||
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
memset(dst_y_opt, 101, kWidth* kHeight); \
|
||||
memset(dst_u_opt, 102, \
|
||||
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
memset(dst_v_opt, 103, \
|
||||
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
MaskCpuFlags(disable_cpu_flags_); \
|
||||
SRC_FMT_PLANAR##To##FMT_PLANAR( \
|
||||
src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \
|
||||
src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), dst_y_c, kWidth, \
|
||||
dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X), dst_v_c, \
|
||||
SUBSAMPLE(kWidth, SUBSAMP_X), kWidth, NEG kHeight); \
|
||||
MaskCpuFlags(benchmark_cpu_info_); \
|
||||
for (int i = 0; i < benchmark_iterations_; ++i) { \
|
||||
SRC_FMT_PLANAR##To##FMT_PLANAR( \
|
||||
src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \
|
||||
src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), dst_y_opt, kWidth, \
|
||||
dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X), dst_v_opt, \
|
||||
SUBSAMPLE(kWidth, SUBSAMP_X), kWidth, NEG kHeight); \
|
||||
} \
|
||||
int max_diff = 0; \
|
||||
for (int i = 0; i < kHeight; ++i) { \
|
||||
for (int j = 0; j < kWidth; ++j) { \
|
||||
int abs_diff = abs(static_cast<int>(dst_y_c[i * kWidth + j]) - \
|
||||
static_cast<int>(dst_y_opt[i * kWidth + j])); \
|
||||
if (abs_diff > max_diff) { \
|
||||
max_diff = abs_diff; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
EXPECT_EQ(0, max_diff); \
|
||||
for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
|
||||
for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \
|
||||
int abs_diff = abs( \
|
||||
static_cast<int>(dst_u_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]) - \
|
||||
static_cast<int>( \
|
||||
dst_u_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j])); \
|
||||
if (abs_diff > max_diff) { \
|
||||
max_diff = abs_diff; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
EXPECT_LE(max_diff, 3); \
|
||||
for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
|
||||
for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \
|
||||
int abs_diff = abs( \
|
||||
static_cast<int>(dst_v_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]) - \
|
||||
static_cast<int>( \
|
||||
dst_v_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j])); \
|
||||
if (abs_diff > max_diff) { \
|
||||
max_diff = abs_diff; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
EXPECT_LE(max_diff, 3); \
|
||||
free_aligned_buffer_page_end(dst_y_c); \
|
||||
free_aligned_buffer_page_end(dst_u_c); \
|
||||
free_aligned_buffer_page_end(dst_v_c); \
|
||||
free_aligned_buffer_page_end(dst_y_opt); \
|
||||
free_aligned_buffer_page_end(dst_u_opt); \
|
||||
free_aligned_buffer_page_end(dst_v_opt); \
|
||||
free_aligned_buffer_page_end(src_y); \
|
||||
free_aligned_buffer_page_end(src_u); \
|
||||
free_aligned_buffer_page_end(src_v); \
|
||||
#define TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
|
||||
FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, OFF) \
|
||||
TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \
|
||||
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
|
||||
const int kHeight = benchmark_height_; \
|
||||
align_buffer_page_end(src_y, kWidth* kHeight + OFF); \
|
||||
align_buffer_page_end( \
|
||||
src_u, \
|
||||
SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \
|
||||
OFF); \
|
||||
align_buffer_page_end( \
|
||||
src_v, \
|
||||
SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \
|
||||
OFF); \
|
||||
align_buffer_page_end(dst_y_c, kWidth* kHeight); \
|
||||
align_buffer_page_end( \
|
||||
dst_u_c, \
|
||||
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
align_buffer_page_end( \
|
||||
dst_v_c, \
|
||||
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
align_buffer_page_end(dst_y_opt, kWidth* kHeight); \
|
||||
align_buffer_page_end( \
|
||||
dst_u_opt, \
|
||||
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
align_buffer_page_end( \
|
||||
dst_v_opt, \
|
||||
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
for (int i = 0; i < kHeight; ++i) \
|
||||
for (int j = 0; j < kWidth; ++j) \
|
||||
src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \
|
||||
for (int i = 0; i < SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); ++i) { \
|
||||
for (int j = 0; j < SUBSAMPLE(kWidth, SRC_SUBSAMP_X); ++j) { \
|
||||
src_u[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] = \
|
||||
(fastrand() & 0xff); \
|
||||
src_v[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] = \
|
||||
(fastrand() & 0xff); \
|
||||
} \
|
||||
} \
|
||||
memset(dst_y_c, 1, kWidth* kHeight); \
|
||||
memset(dst_u_c, 2, \
|
||||
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
memset(dst_v_c, 3, \
|
||||
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
memset(dst_y_opt, 101, kWidth* kHeight); \
|
||||
memset(dst_u_opt, 102, \
|
||||
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
memset(dst_v_opt, 103, \
|
||||
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
MaskCpuFlags(disable_cpu_flags_); \
|
||||
SRC_FMT_PLANAR##To##FMT_PLANAR( \
|
||||
src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \
|
||||
src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), dst_y_c, kWidth, \
|
||||
dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X), dst_v_c, \
|
||||
SUBSAMPLE(kWidth, SUBSAMP_X), kWidth, NEG kHeight); \
|
||||
MaskCpuFlags(benchmark_cpu_info_); \
|
||||
for (int i = 0; i < benchmark_iterations_; ++i) { \
|
||||
SRC_FMT_PLANAR##To##FMT_PLANAR( \
|
||||
src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \
|
||||
src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), dst_y_opt, kWidth, \
|
||||
dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X), dst_v_opt, \
|
||||
SUBSAMPLE(kWidth, SUBSAMP_X), kWidth, NEG kHeight); \
|
||||
} \
|
||||
int max_diff = 0; \
|
||||
for (int i = 0; i < kHeight; ++i) { \
|
||||
for (int j = 0; j < kWidth; ++j) { \
|
||||
int abs_diff = abs(static_cast<int>(dst_y_c[i * kWidth + j]) - \
|
||||
static_cast<int>(dst_y_opt[i * kWidth + j])); \
|
||||
if (abs_diff > max_diff) { \
|
||||
max_diff = abs_diff; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
EXPECT_EQ(0, max_diff); \
|
||||
for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
|
||||
for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \
|
||||
int abs_diff = abs( \
|
||||
static_cast<int>(dst_u_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]) - \
|
||||
static_cast<int>( \
|
||||
dst_u_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j])); \
|
||||
if (abs_diff > max_diff) { \
|
||||
max_diff = abs_diff; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
EXPECT_LE(max_diff, 3); \
|
||||
for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
|
||||
for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \
|
||||
int abs_diff = abs( \
|
||||
static_cast<int>(dst_v_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]) - \
|
||||
static_cast<int>( \
|
||||
dst_v_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j])); \
|
||||
if (abs_diff > max_diff) { \
|
||||
max_diff = abs_diff; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
EXPECT_LE(max_diff, 3); \
|
||||
free_aligned_buffer_page_end(dst_y_c); \
|
||||
free_aligned_buffer_page_end(dst_u_c); \
|
||||
free_aligned_buffer_page_end(dst_v_c); \
|
||||
free_aligned_buffer_page_end(dst_y_opt); \
|
||||
free_aligned_buffer_page_end(dst_u_opt); \
|
||||
free_aligned_buffer_page_end(dst_v_opt); \
|
||||
free_aligned_buffer_page_end(src_y); \
|
||||
free_aligned_buffer_page_end(src_u); \
|
||||
free_aligned_buffer_page_end(src_v); \
|
||||
}
|
||||
|
||||
#define TESTPLANARTOP(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
|
||||
@ -166,15 +172,19 @@ TESTPLANARTOP(I444, 1, 1, I444, 1, 1)
|
||||
align_buffer_page_end(src_uv, \
|
||||
kSizeUV*((PIXEL_STRIDE == 3) ? 3 : 2) + OFF); \
|
||||
align_buffer_page_end(dst_y_c, kWidth* kHeight); \
|
||||
align_buffer_page_end(dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \
|
||||
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
align_buffer_page_end(dst_v_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \
|
||||
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
align_buffer_page_end( \
|
||||
dst_u_c, \
|
||||
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
align_buffer_page_end( \
|
||||
dst_v_c, \
|
||||
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
align_buffer_page_end(dst_y_opt, kWidth* kHeight); \
|
||||
align_buffer_page_end(dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \
|
||||
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
align_buffer_page_end(dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \
|
||||
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
align_buffer_page_end( \
|
||||
dst_u_opt, \
|
||||
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
align_buffer_page_end( \
|
||||
dst_v_opt, \
|
||||
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
uint8* src_u = src_uv + OFF_U; \
|
||||
uint8* src_v = src_uv + (PIXEL_STRIDE == 1 ? kSizeUV : OFF_V); \
|
||||
int src_stride_uv = SUBSAMPLE(kWidth, SUBSAMP_X) * PIXEL_STRIDE; \
|
||||
@ -278,84 +288,88 @@ TESTAPLANARTOP(Android420, I420, 1, 0, 0, 2, 2, I420, 2, 2)
|
||||
TESTAPLANARTOP(Android420, NV12, 2, 0, 1, 2, 2, I420, 2, 2)
|
||||
TESTAPLANARTOP(Android420, NV21, 2, 1, 0, 2, 2, I420, 2, 2)
|
||||
|
||||
#define TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
|
||||
FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, OFF) \
|
||||
TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \
|
||||
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
|
||||
const int kHeight = benchmark_height_; \
|
||||
align_buffer_page_end(src_y, kWidth* kHeight + OFF); \
|
||||
align_buffer_page_end(src_u, SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \
|
||||
SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \
|
||||
OFF); \
|
||||
align_buffer_page_end(src_v, SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \
|
||||
SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \
|
||||
OFF); \
|
||||
align_buffer_page_end(dst_y_c, kWidth* kHeight); \
|
||||
align_buffer_page_end(dst_uv_c, SUBSAMPLE(kWidth * 2, SUBSAMP_X) * \
|
||||
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
align_buffer_page_end(dst_y_opt, kWidth* kHeight); \
|
||||
align_buffer_page_end(dst_uv_opt, SUBSAMPLE(kWidth * 2, SUBSAMP_X) * \
|
||||
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
for (int i = 0; i < kHeight; ++i) \
|
||||
for (int j = 0; j < kWidth; ++j) \
|
||||
src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \
|
||||
for (int i = 0; i < SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); ++i) { \
|
||||
for (int j = 0; j < SUBSAMPLE(kWidth, SRC_SUBSAMP_X); ++j) { \
|
||||
src_u[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] = \
|
||||
(fastrand() & 0xff); \
|
||||
src_v[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] = \
|
||||
(fastrand() & 0xff); \
|
||||
} \
|
||||
} \
|
||||
memset(dst_y_c, 1, kWidth* kHeight); \
|
||||
memset(dst_uv_c, 2, \
|
||||
SUBSAMPLE(kWidth * 2, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
memset(dst_y_opt, 101, kWidth* kHeight); \
|
||||
memset(dst_uv_opt, 102, \
|
||||
SUBSAMPLE(kWidth * 2, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
MaskCpuFlags(disable_cpu_flags_); \
|
||||
SRC_FMT_PLANAR##To##FMT_PLANAR( \
|
||||
src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \
|
||||
src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), dst_y_c, kWidth, \
|
||||
dst_uv_c, SUBSAMPLE(kWidth * 2, SUBSAMP_X), kWidth, NEG kHeight); \
|
||||
MaskCpuFlags(benchmark_cpu_info_); \
|
||||
for (int i = 0; i < benchmark_iterations_; ++i) { \
|
||||
SRC_FMT_PLANAR##To##FMT_PLANAR( \
|
||||
src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \
|
||||
src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), dst_y_opt, kWidth, \
|
||||
dst_uv_opt, SUBSAMPLE(kWidth * 2, SUBSAMP_X), kWidth, NEG kHeight); \
|
||||
} \
|
||||
int max_diff = 0; \
|
||||
for (int i = 0; i < kHeight; ++i) { \
|
||||
for (int j = 0; j < kWidth; ++j) { \
|
||||
int abs_diff = abs(static_cast<int>(dst_y_c[i * kWidth + j]) - \
|
||||
static_cast<int>(dst_y_opt[i * kWidth + j])); \
|
||||
if (abs_diff > max_diff) { \
|
||||
max_diff = abs_diff; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
EXPECT_LE(max_diff, 1); \
|
||||
for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
|
||||
for (int j = 0; j < SUBSAMPLE(kWidth * 2, SUBSAMP_X); ++j) { \
|
||||
int abs_diff = \
|
||||
abs(static_cast<int>( \
|
||||
dst_uv_c[i * SUBSAMPLE(kWidth * 2, SUBSAMP_X) + j]) - \
|
||||
static_cast<int>( \
|
||||
dst_uv_opt[i * SUBSAMPLE(kWidth * 2, SUBSAMP_X) + j])); \
|
||||
if (abs_diff > max_diff) { \
|
||||
max_diff = abs_diff; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
EXPECT_LE(max_diff, 1); \
|
||||
free_aligned_buffer_page_end(dst_y_c); \
|
||||
free_aligned_buffer_page_end(dst_uv_c); \
|
||||
free_aligned_buffer_page_end(dst_y_opt); \
|
||||
free_aligned_buffer_page_end(dst_uv_opt); \
|
||||
free_aligned_buffer_page_end(src_y); \
|
||||
free_aligned_buffer_page_end(src_u); \
|
||||
free_aligned_buffer_page_end(src_v); \
|
||||
#define TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
|
||||
FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, OFF) \
|
||||
TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \
|
||||
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
|
||||
const int kHeight = benchmark_height_; \
|
||||
align_buffer_page_end(src_y, kWidth* kHeight + OFF); \
|
||||
align_buffer_page_end( \
|
||||
src_u, \
|
||||
SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \
|
||||
OFF); \
|
||||
align_buffer_page_end( \
|
||||
src_v, \
|
||||
SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \
|
||||
OFF); \
|
||||
align_buffer_page_end(dst_y_c, kWidth* kHeight); \
|
||||
align_buffer_page_end( \
|
||||
dst_uv_c, \
|
||||
SUBSAMPLE(kWidth * 2, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
align_buffer_page_end(dst_y_opt, kWidth* kHeight); \
|
||||
align_buffer_page_end( \
|
||||
dst_uv_opt, \
|
||||
SUBSAMPLE(kWidth * 2, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
for (int i = 0; i < kHeight; ++i) \
|
||||
for (int j = 0; j < kWidth; ++j) \
|
||||
src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \
|
||||
for (int i = 0; i < SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); ++i) { \
|
||||
for (int j = 0; j < SUBSAMPLE(kWidth, SRC_SUBSAMP_X); ++j) { \
|
||||
src_u[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] = \
|
||||
(fastrand() & 0xff); \
|
||||
src_v[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] = \
|
||||
(fastrand() & 0xff); \
|
||||
} \
|
||||
} \
|
||||
memset(dst_y_c, 1, kWidth* kHeight); \
|
||||
memset(dst_uv_c, 2, \
|
||||
SUBSAMPLE(kWidth * 2, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
memset(dst_y_opt, 101, kWidth* kHeight); \
|
||||
memset(dst_uv_opt, 102, \
|
||||
SUBSAMPLE(kWidth * 2, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
MaskCpuFlags(disable_cpu_flags_); \
|
||||
SRC_FMT_PLANAR##To##FMT_PLANAR( \
|
||||
src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \
|
||||
src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), dst_y_c, kWidth, \
|
||||
dst_uv_c, SUBSAMPLE(kWidth * 2, SUBSAMP_X), kWidth, NEG kHeight); \
|
||||
MaskCpuFlags(benchmark_cpu_info_); \
|
||||
for (int i = 0; i < benchmark_iterations_; ++i) { \
|
||||
SRC_FMT_PLANAR##To##FMT_PLANAR( \
|
||||
src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \
|
||||
src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), dst_y_opt, kWidth, \
|
||||
dst_uv_opt, SUBSAMPLE(kWidth * 2, SUBSAMP_X), kWidth, NEG kHeight); \
|
||||
} \
|
||||
int max_diff = 0; \
|
||||
for (int i = 0; i < kHeight; ++i) { \
|
||||
for (int j = 0; j < kWidth; ++j) { \
|
||||
int abs_diff = abs(static_cast<int>(dst_y_c[i * kWidth + j]) - \
|
||||
static_cast<int>(dst_y_opt[i * kWidth + j])); \
|
||||
if (abs_diff > max_diff) { \
|
||||
max_diff = abs_diff; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
EXPECT_LE(max_diff, 1); \
|
||||
for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
|
||||
for (int j = 0; j < SUBSAMPLE(kWidth * 2, SUBSAMP_X); ++j) { \
|
||||
int abs_diff = \
|
||||
abs(static_cast<int>( \
|
||||
dst_uv_c[i * SUBSAMPLE(kWidth * 2, SUBSAMP_X) + j]) - \
|
||||
static_cast<int>( \
|
||||
dst_uv_opt[i * SUBSAMPLE(kWidth * 2, SUBSAMP_X) + j])); \
|
||||
if (abs_diff > max_diff) { \
|
||||
max_diff = abs_diff; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
EXPECT_LE(max_diff, 1); \
|
||||
free_aligned_buffer_page_end(dst_y_c); \
|
||||
free_aligned_buffer_page_end(dst_uv_c); \
|
||||
free_aligned_buffer_page_end(dst_y_opt); \
|
||||
free_aligned_buffer_page_end(dst_uv_opt); \
|
||||
free_aligned_buffer_page_end(src_y); \
|
||||
free_aligned_buffer_page_end(src_u); \
|
||||
free_aligned_buffer_page_end(src_v); \
|
||||
}
|
||||
|
||||
#define TESTPLANARTOBP(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
|
||||
@ -379,19 +393,24 @@ TESTPLANARTOBP(I420, 2, 2, NV21, 2, 2)
|
||||
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
|
||||
const int kHeight = benchmark_height_; \
|
||||
align_buffer_page_end(src_y, kWidth* kHeight + OFF); \
|
||||
align_buffer_page_end(src_uv, 2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \
|
||||
SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \
|
||||
OFF); \
|
||||
align_buffer_page_end(src_uv, \
|
||||
2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \
|
||||
SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \
|
||||
OFF); \
|
||||
align_buffer_page_end(dst_y_c, kWidth* kHeight); \
|
||||
align_buffer_page_end(dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \
|
||||
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
align_buffer_page_end(dst_v_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \
|
||||
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
align_buffer_page_end( \
|
||||
dst_u_c, \
|
||||
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
align_buffer_page_end( \
|
||||
dst_v_c, \
|
||||
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
align_buffer_page_end(dst_y_opt, kWidth* kHeight); \
|
||||
align_buffer_page_end(dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \
|
||||
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
align_buffer_page_end(dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \
|
||||
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
align_buffer_page_end( \
|
||||
dst_u_opt, \
|
||||
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
align_buffer_page_end( \
|
||||
dst_v_opt, \
|
||||
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
for (int i = 0; i < kHeight; ++i) \
|
||||
for (int j = 0; j < kWidth; ++j) \
|
||||
src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \
|
||||
@ -1369,10 +1388,12 @@ TEST_F(LibYUVConvertTest, MJPGToI420) {
|
||||
const int kSize = kImageSize + kOff;
|
||||
align_buffer_page_end(orig_pixels, kSize);
|
||||
align_buffer_page_end(dst_y_opt, benchmark_width_ * benchmark_height_);
|
||||
align_buffer_page_end(dst_u_opt, SUBSAMPLE(benchmark_width_, 2) *
|
||||
SUBSAMPLE(benchmark_height_, 2));
|
||||
align_buffer_page_end(dst_v_opt, SUBSAMPLE(benchmark_width_, 2) *
|
||||
SUBSAMPLE(benchmark_height_, 2));
|
||||
align_buffer_page_end(
|
||||
dst_u_opt,
|
||||
SUBSAMPLE(benchmark_width_, 2) * SUBSAMPLE(benchmark_height_, 2));
|
||||
align_buffer_page_end(
|
||||
dst_v_opt,
|
||||
SUBSAMPLE(benchmark_width_, 2) * SUBSAMPLE(benchmark_height_, 2));
|
||||
|
||||
// EOI, SOI to make MJPG appear valid.
|
||||
memset(orig_pixels, 0, kSize);
|
||||
@ -1444,16 +1465,20 @@ TEST_F(LibYUVConvertTest, NV12Crop) {
|
||||
uint8* src_uv = src_y + kWidth * kHeight;
|
||||
|
||||
align_buffer_page_end(dst_y, kDestWidth * kDestHeight);
|
||||
align_buffer_page_end(dst_u, SUBSAMPLE(kDestWidth, SUBSAMP_X) *
|
||||
SUBSAMPLE(kDestHeight, SUBSAMP_Y));
|
||||
align_buffer_page_end(dst_v, SUBSAMPLE(kDestWidth, SUBSAMP_X) *
|
||||
SUBSAMPLE(kDestHeight, SUBSAMP_Y));
|
||||
align_buffer_page_end(
|
||||
dst_u,
|
||||
SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y));
|
||||
align_buffer_page_end(
|
||||
dst_v,
|
||||
SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y));
|
||||
|
||||
align_buffer_page_end(dst_y_2, kDestWidth * kDestHeight);
|
||||
align_buffer_page_end(dst_u_2, SUBSAMPLE(kDestWidth, SUBSAMP_X) *
|
||||
SUBSAMPLE(kDestHeight, SUBSAMP_Y));
|
||||
align_buffer_page_end(dst_v_2, SUBSAMPLE(kDestWidth, SUBSAMP_X) *
|
||||
SUBSAMPLE(kDestHeight, SUBSAMP_Y));
|
||||
align_buffer_page_end(
|
||||
dst_u_2,
|
||||
SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y));
|
||||
align_buffer_page_end(
|
||||
dst_v_2,
|
||||
SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y));
|
||||
|
||||
for (int i = 0; i < kHeight * kWidth; ++i) {
|
||||
src_y[i] = (fastrand() & 0xff);
|
||||
|
||||
@ -356,17 +356,19 @@ int main(int argc, const char* argv[]) {
|
||||
const int uv_size = ((image_width + 1) / 2) * ((image_height + 1) / 2);
|
||||
const size_t total_size = y_size + 2 * uv_size; // NOLINT
|
||||
#if defined(_MSC_VER)
|
||||
_fseeki64(file_org, static_cast<__int64>(num_skip_org) *
|
||||
static_cast<__int64>(total_size),
|
||||
SEEK_SET);
|
||||
_fseeki64(
|
||||
file_org,
|
||||
static_cast<__int64>(num_skip_org) * static_cast<__int64>(total_size),
|
||||
SEEK_SET);
|
||||
#else
|
||||
fseek(file_org, num_skip_org * total_size, SEEK_SET);
|
||||
#endif
|
||||
for (int cur_rec = 0; cur_rec < num_rec; ++cur_rec) {
|
||||
#if defined(_MSC_VER)
|
||||
_fseeki64(file_rec[cur_rec], static_cast<__int64>(num_skip_rec) *
|
||||
static_cast<__int64>(total_size),
|
||||
SEEK_SET);
|
||||
_fseeki64(
|
||||
file_rec[cur_rec],
|
||||
static_cast<__int64>(num_skip_rec) * static_cast<__int64>(total_size),
|
||||
SEEK_SET);
|
||||
#else
|
||||
fseek(file_rec[cur_rec], num_skip_rec * total_size, SEEK_SET);
|
||||
#endif
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user