mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
clang-tidy applied
Bug: libyuv:886, libyuv:889 Change-Id: I2d14d03c19402381256d3c6d988e0b7307bdffd8 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/2800147 Reviewed-by: richard winterton <rrwinterton@gmail.com>
This commit is contained in:
parent
34bf48e160
commit
60db98b6fa
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: http://code.google.com/p/libyuv/
|
URL: http://code.google.com/p/libyuv/
|
||||||
Version: 1784
|
Version: 1785
|
||||||
License: BSD
|
License: BSD
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
|
|
||||||
|
|||||||
@ -767,7 +767,7 @@ struct YuvConstants {
|
|||||||
#else
|
#else
|
||||||
// This struct is for Intel color conversion.
|
// This struct is for Intel color conversion.
|
||||||
struct YuvConstants {
|
struct YuvConstants {
|
||||||
#if LIBYUV_UNLIMITED_DATA
|
#if defined(LIBYUV_UNLIMITED_DATA)
|
||||||
uint8_t kUVToB[32];
|
uint8_t kUVToB[32];
|
||||||
uint8_t kUVToG[32];
|
uint8_t kUVToG[32];
|
||||||
uint8_t kUVToR[32];
|
uint8_t kUVToR[32];
|
||||||
@ -1063,11 +1063,11 @@ void BGRAToYRow_SSSE3(const uint8_t* src_bgra, uint8_t* dst_y, int width);
|
|||||||
void ABGRToYRow_SSSE3(const uint8_t* src_abgr, uint8_t* dst_y, int width);
|
void ABGRToYRow_SSSE3(const uint8_t* src_abgr, uint8_t* dst_y, int width);
|
||||||
void RGBAToYRow_SSSE3(const uint8_t* src_rgba, uint8_t* dst_y, int width);
|
void RGBAToYRow_SSSE3(const uint8_t* src_rgba, uint8_t* dst_y, int width);
|
||||||
void RGB24ToYRow_SSSE3(const uint8_t* src_rgb24, uint8_t* dst_y, int width);
|
void RGB24ToYRow_SSSE3(const uint8_t* src_rgb24, uint8_t* dst_y, int width);
|
||||||
void RGB24ToYJRow_SSSE3(const uint8_t* src_rgb24, uint8_t* dst_y, int width);
|
void RGB24ToYJRow_SSSE3(const uint8_t* src_rgb24, uint8_t* dst_yj, int width);
|
||||||
void RAWToYRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_y, int width);
|
void RAWToYRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_y, int width);
|
||||||
void RAWToYJRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_y, int width);
|
void RAWToYJRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_yj, int width);
|
||||||
void RGB24ToYJRow_AVX2(const uint8_t* src_rgb24, uint8_t* dst_y, int width);
|
void RGB24ToYJRow_AVX2(const uint8_t* src_rgb24, uint8_t* dst_yj, int width);
|
||||||
void RAWToYJRow_AVX2(const uint8_t* src_raw, uint8_t* dst_y, int width);
|
void RAWToYJRow_AVX2(const uint8_t* src_raw, uint8_t* dst_yj, int width);
|
||||||
void ARGBToYRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width);
|
void ARGBToYRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width);
|
||||||
void ARGBToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width);
|
void ARGBToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width);
|
||||||
void RGBAToYJRow_NEON(const uint8_t* src_rgba, uint8_t* dst_y, int width);
|
void RGBAToYJRow_NEON(const uint8_t* src_rgba, uint8_t* dst_y, int width);
|
||||||
@ -1262,16 +1262,16 @@ void RGB565ToYRow_MMI(const uint8_t* src_rgb565, uint8_t* dst_y, int width);
|
|||||||
void ARGB1555ToYRow_MMI(const uint8_t* src_argb1555, uint8_t* dst_y, int width);
|
void ARGB1555ToYRow_MMI(const uint8_t* src_argb1555, uint8_t* dst_y, int width);
|
||||||
void ARGB4444ToYRow_MMI(const uint8_t* src_argb4444, uint8_t* dst_y, int width);
|
void ARGB4444ToYRow_MMI(const uint8_t* src_argb4444, uint8_t* dst_y, int width);
|
||||||
|
|
||||||
void ARGBToYRow_C(const uint8_t* src_argb, uint8_t* dst_y, int width);
|
void ARGBToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width);
|
||||||
void ARGBToYJRow_C(const uint8_t* src_argb, uint8_t* dst_y, int width);
|
void ARGBToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width);
|
||||||
void RGBAToYJRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width);
|
void RGBAToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width);
|
||||||
void BGRAToYRow_C(const uint8_t* src_argb, uint8_t* dst_y, int width);
|
void BGRAToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width);
|
||||||
void ABGRToYRow_C(const uint8_t* src_argb, uint8_t* dst_y, int width);
|
void ABGRToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width);
|
||||||
void RGBAToYRow_C(const uint8_t* src_argb, uint8_t* dst_y, int width);
|
void RGBAToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width);
|
||||||
void RGB24ToYRow_C(const uint8_t* src_argb, uint8_t* dst_y, int width);
|
void RGB24ToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width);
|
||||||
void RGB24ToYJRow_C(const uint8_t* src_argb, uint8_t* dst_yj, int width);
|
void RGB24ToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width);
|
||||||
void RAWToYRow_C(const uint8_t* src_argb, uint8_t* dst_y, int width);
|
void RAWToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width);
|
||||||
void RAWToYJRow_C(const uint8_t* src_argb, uint8_t* dst_yj, int width);
|
void RAWToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width);
|
||||||
void RGB565ToYRow_C(const uint8_t* src_rgb565, uint8_t* dst_y, int width);
|
void RGB565ToYRow_C(const uint8_t* src_rgb565, uint8_t* dst_y, int width);
|
||||||
void ARGB1555ToYRow_C(const uint8_t* src_argb1555, uint8_t* dst_y, int width);
|
void ARGB1555ToYRow_C(const uint8_t* src_argb1555, uint8_t* dst_y, int width);
|
||||||
void ARGB4444ToYRow_C(const uint8_t* src_argb4444, uint8_t* dst_y, int width);
|
void ARGB4444ToYRow_C(const uint8_t* src_argb4444, uint8_t* dst_y, int width);
|
||||||
@ -1373,42 +1373,42 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba,
|
|||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width);
|
int width);
|
||||||
void ARGBToUVRow_Any_AVX2(const uint8_t* src_ptr,
|
void ARGBToUVRow_Any_AVX2(const uint8_t* src_ptr,
|
||||||
int src_stride_ptr,
|
int src_stride,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width);
|
int width);
|
||||||
void ABGRToUVRow_Any_AVX2(const uint8_t* src_ptr,
|
void ABGRToUVRow_Any_AVX2(const uint8_t* src_ptr,
|
||||||
int src_stride_ptr,
|
int src_stride,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width);
|
int width);
|
||||||
void ARGBToUVJRow_Any_AVX2(const uint8_t* src_ptr,
|
void ARGBToUVJRow_Any_AVX2(const uint8_t* src_ptr,
|
||||||
int src_stride_ptr,
|
int src_stride,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width);
|
int width);
|
||||||
void ARGBToUVRow_Any_SSSE3(const uint8_t* src_ptr,
|
void ARGBToUVRow_Any_SSSE3(const uint8_t* src_ptr,
|
||||||
int src_stride_ptr,
|
int src_stride,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width);
|
int width);
|
||||||
void ARGBToUVJRow_Any_SSSE3(const uint8_t* src_ptr,
|
void ARGBToUVJRow_Any_SSSE3(const uint8_t* src_ptr,
|
||||||
int src_stride_ptr,
|
int src_stride,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width);
|
int width);
|
||||||
void BGRAToUVRow_Any_SSSE3(const uint8_t* src_ptr,
|
void BGRAToUVRow_Any_SSSE3(const uint8_t* src_ptr,
|
||||||
int src_stride_ptr,
|
int src_stride,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width);
|
int width);
|
||||||
void ABGRToUVRow_Any_SSSE3(const uint8_t* src_ptr,
|
void ABGRToUVRow_Any_SSSE3(const uint8_t* src_ptr,
|
||||||
int src_stride_ptr,
|
int src_stride,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width);
|
int width);
|
||||||
void RGBAToUVRow_Any_SSSE3(const uint8_t* src_ptr,
|
void RGBAToUVRow_Any_SSSE3(const uint8_t* src_ptr,
|
||||||
int src_stride_ptr,
|
int src_stride,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width);
|
int width);
|
||||||
@ -1417,7 +1417,7 @@ void ARGBToUV444Row_Any_NEON(const uint8_t* src_ptr,
|
|||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width);
|
int width);
|
||||||
void ARGBToUVRow_Any_NEON(const uint8_t* src_ptr,
|
void ARGBToUVRow_Any_NEON(const uint8_t* src_ptr,
|
||||||
int src_stride_ptr,
|
int src_stride,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width);
|
int width);
|
||||||
@ -1440,47 +1440,47 @@ void ARGBToUVRow_Any_MMI(const uint8_t* src_ptr,
|
|||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width);
|
int width);
|
||||||
void ARGBToUVJRow_Any_NEON(const uint8_t* src_ptr,
|
void ARGBToUVJRow_Any_NEON(const uint8_t* src_ptr,
|
||||||
int src_stride_ptr,
|
int src_stride,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width);
|
int width);
|
||||||
void BGRAToUVRow_Any_NEON(const uint8_t* src_ptr,
|
void BGRAToUVRow_Any_NEON(const uint8_t* src_ptr,
|
||||||
int src_stride_ptr,
|
int src_stride,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width);
|
int width);
|
||||||
void ABGRToUVRow_Any_NEON(const uint8_t* src_ptr,
|
void ABGRToUVRow_Any_NEON(const uint8_t* src_ptr,
|
||||||
int src_stride_ptr,
|
int src_stride,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width);
|
int width);
|
||||||
void RGBAToUVRow_Any_NEON(const uint8_t* src_ptr,
|
void RGBAToUVRow_Any_NEON(const uint8_t* src_ptr,
|
||||||
int src_stride_ptr,
|
int src_stride,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width);
|
int width);
|
||||||
void RGB24ToUVRow_Any_NEON(const uint8_t* src_ptr,
|
void RGB24ToUVRow_Any_NEON(const uint8_t* src_ptr,
|
||||||
int src_stride_ptr,
|
int src_stride,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width);
|
int width);
|
||||||
void RAWToUVRow_Any_NEON(const uint8_t* src_ptr,
|
void RAWToUVRow_Any_NEON(const uint8_t* src_ptr,
|
||||||
int src_stride_ptr,
|
int src_stride,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width);
|
int width);
|
||||||
void RGB565ToUVRow_Any_NEON(const uint8_t* src_ptr,
|
void RGB565ToUVRow_Any_NEON(const uint8_t* src_ptr,
|
||||||
int src_stride_ptr,
|
int src_stride,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width);
|
int width);
|
||||||
void ARGB1555ToUVRow_Any_NEON(const uint8_t* src_ptr,
|
void ARGB1555ToUVRow_Any_NEON(const uint8_t* src_ptr,
|
||||||
int src_stride_ptr,
|
int src_stride,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width);
|
int width);
|
||||||
void ARGB4444ToUVRow_Any_NEON(const uint8_t* src_ptr,
|
void ARGB4444ToUVRow_Any_NEON(const uint8_t* src_ptr,
|
||||||
int src_stride_ptr,
|
int src_stride,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width);
|
int width);
|
||||||
@ -1689,7 +1689,7 @@ void MirrorSplitUVRow_C(const uint8_t* src_uv,
|
|||||||
|
|
||||||
void ARGBMirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width);
|
void ARGBMirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width);
|
||||||
void ARGBMirrorRow_SSE2(const uint8_t* src, uint8_t* dst, int width);
|
void ARGBMirrorRow_SSE2(const uint8_t* src, uint8_t* dst, int width);
|
||||||
void ARGBMirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width);
|
void ARGBMirrorRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width);
|
||||||
void ARGBMirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width);
|
void ARGBMirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width);
|
||||||
void ARGBMirrorRow_MMI(const uint8_t* src, uint8_t* dst, int width);
|
void ARGBMirrorRow_MMI(const uint8_t* src, uint8_t* dst, int width);
|
||||||
void ARGBMirrorRow_C(const uint8_t* src, uint8_t* dst, int width);
|
void ARGBMirrorRow_C(const uint8_t* src, uint8_t* dst, int width);
|
||||||
@ -1705,9 +1705,13 @@ void ARGBMirrorRow_Any_NEON(const uint8_t* src_ptr,
|
|||||||
void ARGBMirrorRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
void ARGBMirrorRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||||
void ARGBMirrorRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
void ARGBMirrorRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||||
|
|
||||||
void RGB24MirrorRow_SSSE3(const uint8_t* src, uint8_t* dst, int width);
|
void RGB24MirrorRow_SSSE3(const uint8_t* src_rgb24,
|
||||||
void RGB24MirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width);
|
uint8_t* dst_rgb24,
|
||||||
void RGB24MirrorRow_C(const uint8_t* src, uint8_t* dst, int width);
|
int width);
|
||||||
|
void RGB24MirrorRow_NEON(const uint8_t* src_rgb24,
|
||||||
|
uint8_t* dst_rgb24,
|
||||||
|
int width);
|
||||||
|
void RGB24MirrorRow_C(const uint8_t* src_rgb24, uint8_t* dst_rgb24, int width);
|
||||||
void RGB24MirrorRow_Any_SSSE3(const uint8_t* src_ptr,
|
void RGB24MirrorRow_Any_SSSE3(const uint8_t* src_ptr,
|
||||||
uint8_t* dst_ptr,
|
uint8_t* dst_ptr,
|
||||||
int width);
|
int width);
|
||||||
@ -1928,23 +1932,23 @@ void MergeARGBRow_NEON(const uint8_t* src_r,
|
|||||||
const uint8_t* src_a,
|
const uint8_t* src_a,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width);
|
int width);
|
||||||
void MergeARGBRow_Any_SSE2(const uint8_t* src_r,
|
void MergeARGBRow_Any_SSE2(const uint8_t* y_buf,
|
||||||
const uint8_t* src_g,
|
const uint8_t* u_buf,
|
||||||
const uint8_t* src_b,
|
const uint8_t* v_buf,
|
||||||
const uint8_t* src_a,
|
const uint8_t* a_buf,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_ptr,
|
||||||
int width);
|
int width);
|
||||||
void MergeARGBRow_Any_AVX2(const uint8_t* src_r,
|
void MergeARGBRow_Any_AVX2(const uint8_t* y_buf,
|
||||||
const uint8_t* src_g,
|
const uint8_t* u_buf,
|
||||||
const uint8_t* src_b,
|
const uint8_t* v_buf,
|
||||||
const uint8_t* src_a,
|
const uint8_t* a_buf,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_ptr,
|
||||||
int width);
|
int width);
|
||||||
void MergeARGBRow_Any_NEON(const uint8_t* src_r,
|
void MergeARGBRow_Any_NEON(const uint8_t* y_buf,
|
||||||
const uint8_t* src_g,
|
const uint8_t* u_buf,
|
||||||
const uint8_t* src_b,
|
const uint8_t* v_buf,
|
||||||
const uint8_t* src_a,
|
const uint8_t* a_buf,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_ptr,
|
||||||
int width);
|
int width);
|
||||||
void SplitARGBRow_C(const uint8_t* src_argb,
|
void SplitARGBRow_C(const uint8_t* src_argb,
|
||||||
uint8_t* dst_r,
|
uint8_t* dst_r,
|
||||||
@ -1970,31 +1974,31 @@ void SplitARGBRow_AVX2(const uint8_t* src_argb,
|
|||||||
uint8_t* dst_b,
|
uint8_t* dst_b,
|
||||||
uint8_t* dst_a,
|
uint8_t* dst_a,
|
||||||
int width);
|
int width);
|
||||||
void SplitARGBRow_NEON(const uint8_t* src_argb,
|
void SplitARGBRow_NEON(const uint8_t* src_rgba,
|
||||||
uint8_t* dst_r,
|
uint8_t* dst_r,
|
||||||
uint8_t* dst_g,
|
uint8_t* dst_g,
|
||||||
uint8_t* dst_b,
|
uint8_t* dst_b,
|
||||||
uint8_t* dst_a,
|
uint8_t* dst_a,
|
||||||
int width);
|
int width);
|
||||||
void SplitARGBRow_Any_SSE2(const uint8_t* src_argb,
|
void SplitARGBRow_Any_SSE2(const uint8_t* src_ptr,
|
||||||
uint8_t* dst_r,
|
uint8_t* dst_r,
|
||||||
uint8_t* dst_g,
|
uint8_t* dst_g,
|
||||||
uint8_t* dst_b,
|
uint8_t* dst_b,
|
||||||
uint8_t* dst_a,
|
uint8_t* dst_a,
|
||||||
int width);
|
int width);
|
||||||
void SplitARGBRow_Any_SSSE3(const uint8_t* src_argb,
|
void SplitARGBRow_Any_SSSE3(const uint8_t* src_ptr,
|
||||||
uint8_t* dst_r,
|
uint8_t* dst_r,
|
||||||
uint8_t* dst_g,
|
uint8_t* dst_g,
|
||||||
uint8_t* dst_b,
|
uint8_t* dst_b,
|
||||||
uint8_t* dst_a,
|
uint8_t* dst_a,
|
||||||
int width);
|
int width);
|
||||||
void SplitARGBRow_Any_AVX2(const uint8_t* src_argb,
|
void SplitARGBRow_Any_AVX2(const uint8_t* src_ptr,
|
||||||
uint8_t* dst_r,
|
uint8_t* dst_r,
|
||||||
uint8_t* dst_g,
|
uint8_t* dst_g,
|
||||||
uint8_t* dst_b,
|
uint8_t* dst_b,
|
||||||
uint8_t* dst_a,
|
uint8_t* dst_a,
|
||||||
int width);
|
int width);
|
||||||
void SplitARGBRow_Any_NEON(const uint8_t* src_argb,
|
void SplitARGBRow_Any_NEON(const uint8_t* src_ptr,
|
||||||
uint8_t* dst_r,
|
uint8_t* dst_r,
|
||||||
uint8_t* dst_g,
|
uint8_t* dst_g,
|
||||||
uint8_t* dst_b,
|
uint8_t* dst_b,
|
||||||
@ -2020,20 +2024,20 @@ void MergeXRGBRow_NEON(const uint8_t* src_r,
|
|||||||
const uint8_t* src_b,
|
const uint8_t* src_b,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width);
|
int width);
|
||||||
void MergeXRGBRow_Any_SSE2(const uint8_t* src_r,
|
void MergeXRGBRow_Any_SSE2(const uint8_t* y_buf,
|
||||||
const uint8_t* src_g,
|
const uint8_t* u_buf,
|
||||||
const uint8_t* src_b,
|
const uint8_t* v_buf,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_ptr,
|
||||||
int width);
|
int width);
|
||||||
void MergeXRGBRow_Any_AVX2(const uint8_t* src_r,
|
void MergeXRGBRow_Any_AVX2(const uint8_t* y_buf,
|
||||||
const uint8_t* src_g,
|
const uint8_t* u_buf,
|
||||||
const uint8_t* src_b,
|
const uint8_t* v_buf,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_ptr,
|
||||||
int width);
|
int width);
|
||||||
void MergeXRGBRow_Any_NEON(const uint8_t* src_r,
|
void MergeXRGBRow_Any_NEON(const uint8_t* y_buf,
|
||||||
const uint8_t* src_g,
|
const uint8_t* u_buf,
|
||||||
const uint8_t* src_b,
|
const uint8_t* v_buf,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_ptr,
|
||||||
int width);
|
int width);
|
||||||
void SplitXRGBRow_C(const uint8_t* src_argb,
|
void SplitXRGBRow_C(const uint8_t* src_argb,
|
||||||
uint8_t* dst_r,
|
uint8_t* dst_r,
|
||||||
@ -2055,27 +2059,27 @@ void SplitXRGBRow_AVX2(const uint8_t* src_argb,
|
|||||||
uint8_t* dst_g,
|
uint8_t* dst_g,
|
||||||
uint8_t* dst_b,
|
uint8_t* dst_b,
|
||||||
int width);
|
int width);
|
||||||
void SplitXRGBRow_NEON(const uint8_t* src_argb,
|
void SplitXRGBRow_NEON(const uint8_t* src_rgba,
|
||||||
uint8_t* dst_r,
|
uint8_t* dst_r,
|
||||||
uint8_t* dst_g,
|
uint8_t* dst_g,
|
||||||
uint8_t* dst_b,
|
uint8_t* dst_b,
|
||||||
int width);
|
int width);
|
||||||
void SplitXRGBRow_Any_SSE2(const uint8_t* src_argb,
|
void SplitXRGBRow_Any_SSE2(const uint8_t* src_ptr,
|
||||||
uint8_t* dst_r,
|
uint8_t* dst_r,
|
||||||
uint8_t* dst_g,
|
uint8_t* dst_g,
|
||||||
uint8_t* dst_b,
|
uint8_t* dst_b,
|
||||||
int width);
|
int width);
|
||||||
void SplitXRGBRow_Any_SSSE3(const uint8_t* src_argb,
|
void SplitXRGBRow_Any_SSSE3(const uint8_t* src_ptr,
|
||||||
uint8_t* dst_r,
|
uint8_t* dst_r,
|
||||||
uint8_t* dst_g,
|
uint8_t* dst_g,
|
||||||
uint8_t* dst_b,
|
uint8_t* dst_b,
|
||||||
int width);
|
int width);
|
||||||
void SplitXRGBRow_Any_AVX2(const uint8_t* src_argb,
|
void SplitXRGBRow_Any_AVX2(const uint8_t* src_ptr,
|
||||||
uint8_t* dst_r,
|
uint8_t* dst_r,
|
||||||
uint8_t* dst_g,
|
uint8_t* dst_g,
|
||||||
uint8_t* dst_b,
|
uint8_t* dst_b,
|
||||||
int width);
|
int width);
|
||||||
void SplitXRGBRow_Any_NEON(const uint8_t* src_argb,
|
void SplitXRGBRow_Any_NEON(const uint8_t* src_ptr,
|
||||||
uint8_t* dst_r,
|
uint8_t* dst_r,
|
||||||
uint8_t* dst_g,
|
uint8_t* dst_g,
|
||||||
uint8_t* dst_b,
|
uint8_t* dst_b,
|
||||||
@ -2183,74 +2187,74 @@ void MergeXRGB16To8Row_NEON(const uint16_t* src_r,
|
|||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int depth,
|
int depth,
|
||||||
int width);
|
int width);
|
||||||
void MergeXR30Row_Any_AVX2(const uint16_t* src_r,
|
void MergeXR30Row_Any_AVX2(const uint16_t* r_buf,
|
||||||
const uint16_t* src_g,
|
const uint16_t* g_buf,
|
||||||
const uint16_t* src_b,
|
const uint16_t* b_buf,
|
||||||
uint8_t* dst_ar30,
|
uint8_t* dst_ptr,
|
||||||
int depth,
|
int depth,
|
||||||
int width);
|
int width);
|
||||||
void MergeAR64Row_Any_AVX2(const uint16_t* src_r,
|
void MergeAR64Row_Any_AVX2(const uint16_t* r_buf,
|
||||||
const uint16_t* src_g,
|
const uint16_t* g_buf,
|
||||||
const uint16_t* src_b,
|
const uint16_t* b_buf,
|
||||||
const uint16_t* src_a,
|
const uint16_t* a_buf,
|
||||||
uint16_t* dst_ar64,
|
uint16_t* dst_ptr,
|
||||||
int depth,
|
int depth,
|
||||||
int width);
|
int width);
|
||||||
void MergeXR64Row_Any_AVX2(const uint16_t* src_r,
|
void MergeXR64Row_Any_AVX2(const uint16_t* r_buf,
|
||||||
const uint16_t* src_g,
|
const uint16_t* g_buf,
|
||||||
const uint16_t* src_b,
|
const uint16_t* b_buf,
|
||||||
uint16_t* dst_ar64,
|
uint16_t* dst_ptr,
|
||||||
int depth,
|
int depth,
|
||||||
int width);
|
int width);
|
||||||
void MergeARGB16To8Row_Any_AVX2(const uint16_t* src_r,
|
void MergeARGB16To8Row_Any_AVX2(const uint16_t* r_buf,
|
||||||
const uint16_t* src_g,
|
const uint16_t* g_buf,
|
||||||
const uint16_t* src_b,
|
const uint16_t* b_buf,
|
||||||
const uint16_t* src_a,
|
const uint16_t* a_buf,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_ptr,
|
||||||
int depth,
|
int depth,
|
||||||
int width);
|
int width);
|
||||||
void MergeXRGB16To8Row_Any_AVX2(const uint16_t* src_r,
|
void MergeXRGB16To8Row_Any_AVX2(const uint16_t* r_buf,
|
||||||
const uint16_t* src_g,
|
const uint16_t* g_buf,
|
||||||
const uint16_t* src_b,
|
const uint16_t* b_buf,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_ptr,
|
||||||
int depth,
|
int depth,
|
||||||
int width);
|
int width);
|
||||||
void MergeXR30Row_Any_NEON(const uint16_t* src_r,
|
void MergeXR30Row_Any_NEON(const uint16_t* r_buf,
|
||||||
const uint16_t* src_g,
|
const uint16_t* g_buf,
|
||||||
const uint16_t* src_b,
|
const uint16_t* b_buf,
|
||||||
uint8_t* dst_ar30,
|
uint8_t* dst_ptr,
|
||||||
int depth,
|
int depth,
|
||||||
int width);
|
int width);
|
||||||
void MergeXR30Row_10_Any_NEON(const uint16_t* src_r,
|
void MergeXR30Row_10_Any_NEON(const uint16_t* r_buf,
|
||||||
const uint16_t* src_g,
|
const uint16_t* g_buf,
|
||||||
const uint16_t* src_b,
|
const uint16_t* b_buf,
|
||||||
uint8_t* dst_ar30,
|
uint8_t* dst_ptr,
|
||||||
int depth,
|
int depth,
|
||||||
int width);
|
int width);
|
||||||
void MergeAR64Row_Any_NEON(const uint16_t* src_r,
|
void MergeAR64Row_Any_NEON(const uint16_t* r_buf,
|
||||||
const uint16_t* src_g,
|
const uint16_t* g_buf,
|
||||||
const uint16_t* src_b,
|
const uint16_t* b_buf,
|
||||||
const uint16_t* src_a,
|
const uint16_t* a_buf,
|
||||||
uint16_t* dst_ar64,
|
uint16_t* dst_ptr,
|
||||||
int depth,
|
int depth,
|
||||||
int width);
|
int width);
|
||||||
void MergeARGB16To8Row_Any_NEON(const uint16_t* src_r,
|
void MergeARGB16To8Row_Any_NEON(const uint16_t* r_buf,
|
||||||
const uint16_t* src_g,
|
const uint16_t* g_buf,
|
||||||
const uint16_t* src_b,
|
const uint16_t* b_buf,
|
||||||
const uint16_t* src_a,
|
const uint16_t* a_buf,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_ptr,
|
||||||
int depth,
|
int depth,
|
||||||
int width);
|
int width);
|
||||||
void MergeXR64Row_Any_NEON(const uint16_t* src_r,
|
void MergeXR64Row_Any_NEON(const uint16_t* r_buf,
|
||||||
const uint16_t* src_g,
|
const uint16_t* g_buf,
|
||||||
const uint16_t* src_b,
|
const uint16_t* b_buf,
|
||||||
uint16_t* dst_ar64,
|
uint16_t* dst_ptr,
|
||||||
int depth,
|
int depth,
|
||||||
int width);
|
int width);
|
||||||
void MergeXRGB16To8Row_Any_NEON(const uint16_t* src_r,
|
void MergeXRGB16To8Row_Any_NEON(const uint16_t* r_buf,
|
||||||
const uint16_t* src_g,
|
const uint16_t* g_buf,
|
||||||
const uint16_t* src_b,
|
const uint16_t* b_buf,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_ptr,
|
||||||
int depth,
|
int depth,
|
||||||
int width);
|
int width);
|
||||||
|
|
||||||
@ -2314,16 +2318,16 @@ void MultiplyRow_16_AVX2(const uint16_t* src_y,
|
|||||||
uint16_t* dst_y,
|
uint16_t* dst_y,
|
||||||
int scale,
|
int scale,
|
||||||
int width);
|
int width);
|
||||||
void MultiplyRow_16_Any_AVX2(const uint16_t* src_y,
|
void MultiplyRow_16_Any_AVX2(const uint16_t* src_ptr,
|
||||||
uint16_t* dst_y,
|
uint16_t* dst_ptr,
|
||||||
int scale,
|
int scale,
|
||||||
int width);
|
int width);
|
||||||
void MultiplyRow_16_NEON(const uint16_t* src_y,
|
void MultiplyRow_16_NEON(const uint16_t* src_y,
|
||||||
uint16_t* dst_y,
|
uint16_t* dst_y,
|
||||||
int scale,
|
int scale,
|
||||||
int width);
|
int width);
|
||||||
void MultiplyRow_16_Any_NEON(const uint16_t* src_y,
|
void MultiplyRow_16_Any_NEON(const uint16_t* src_ptr,
|
||||||
uint16_t* dst_y,
|
uint16_t* dst_ptr,
|
||||||
int scale,
|
int scale,
|
||||||
int width);
|
int width);
|
||||||
|
|
||||||
@ -2335,16 +2339,16 @@ void DivideRow_16_AVX2(const uint16_t* src_y,
|
|||||||
uint16_t* dst_y,
|
uint16_t* dst_y,
|
||||||
int scale,
|
int scale,
|
||||||
int width);
|
int width);
|
||||||
void DivideRow_16_Any_AVX2(const uint16_t* src_y,
|
void DivideRow_16_Any_AVX2(const uint16_t* src_ptr,
|
||||||
uint16_t* dst_y,
|
uint16_t* dst_ptr,
|
||||||
int scale,
|
int scale,
|
||||||
int width);
|
int width);
|
||||||
void DivideRow_16_NEON(const uint16_t* src_y,
|
void DivideRow_16_NEON(const uint16_t* src_y,
|
||||||
uint16_t* dst_y,
|
uint16_t* dst_y,
|
||||||
int scale,
|
int scale,
|
||||||
int width);
|
int width);
|
||||||
void DivideRow_16_Any_NEON(const uint16_t* src_y,
|
void DivideRow_16_Any_NEON(const uint16_t* src_ptr,
|
||||||
uint16_t* dst_y,
|
uint16_t* dst_ptr,
|
||||||
int scale,
|
int scale,
|
||||||
int width);
|
int width);
|
||||||
|
|
||||||
@ -3719,15 +3723,15 @@ void I400ToARGBRow_MMI(const uint8_t* src_y,
|
|||||||
int width);
|
int width);
|
||||||
void I400ToARGBRow_Any_SSE2(const uint8_t* src_ptr,
|
void I400ToARGBRow_Any_SSE2(const uint8_t* src_ptr,
|
||||||
uint8_t* dst_ptr,
|
uint8_t* dst_ptr,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* param,
|
||||||
int width);
|
int width);
|
||||||
void I400ToARGBRow_Any_AVX2(const uint8_t* src_ptr,
|
void I400ToARGBRow_Any_AVX2(const uint8_t* src_ptr,
|
||||||
uint8_t* dst_ptr,
|
uint8_t* dst_ptr,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* param,
|
||||||
int width);
|
int width);
|
||||||
void I400ToARGBRow_Any_NEON(const uint8_t* src_ptr,
|
void I400ToARGBRow_Any_NEON(const uint8_t* src_ptr,
|
||||||
uint8_t* dst_ptr,
|
uint8_t* dst_ptr,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* param,
|
||||||
int width);
|
int width);
|
||||||
void I400ToARGBRow_Any_MSA(const uint8_t* src_ptr,
|
void I400ToARGBRow_Any_MSA(const uint8_t* src_ptr,
|
||||||
uint8_t* dst_ptr,
|
uint8_t* dst_ptr,
|
||||||
@ -3739,11 +3743,11 @@ void I400ToARGBRow_Any_MMI(const uint8_t* src_ptr,
|
|||||||
int width);
|
int width);
|
||||||
|
|
||||||
// ARGB preattenuated alpha blend.
|
// ARGB preattenuated alpha blend.
|
||||||
void ARGBBlendRow_SSSE3(const uint8_t* src_argb0,
|
void ARGBBlendRow_SSSE3(const uint8_t* src_argb,
|
||||||
const uint8_t* src_argb1,
|
const uint8_t* src_argb1,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width);
|
int width);
|
||||||
void ARGBBlendRow_NEON(const uint8_t* src_argb0,
|
void ARGBBlendRow_NEON(const uint8_t* src_argb,
|
||||||
const uint8_t* src_argb1,
|
const uint8_t* src_argb1,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width);
|
int width);
|
||||||
@ -3755,7 +3759,7 @@ void ARGBBlendRow_MMI(const uint8_t* src_argb0,
|
|||||||
const uint8_t* src_argb1,
|
const uint8_t* src_argb1,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width);
|
int width);
|
||||||
void ARGBBlendRow_C(const uint8_t* src_argb0,
|
void ARGBBlendRow_C(const uint8_t* src_argb,
|
||||||
const uint8_t* src_argb1,
|
const uint8_t* src_argb1,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width);
|
int width);
|
||||||
@ -3799,11 +3803,11 @@ void BlendPlaneRow_C(const uint8_t* src0,
|
|||||||
|
|
||||||
// ARGB multiply images. Same API as Blend, but these require
|
// ARGB multiply images. Same API as Blend, but these require
|
||||||
// pointer and width alignment for SSE2.
|
// pointer and width alignment for SSE2.
|
||||||
void ARGBMultiplyRow_C(const uint8_t* src_argb0,
|
void ARGBMultiplyRow_C(const uint8_t* src_argb,
|
||||||
const uint8_t* src_argb1,
|
const uint8_t* src_argb1,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width);
|
int width);
|
||||||
void ARGBMultiplyRow_SSE2(const uint8_t* src_argb0,
|
void ARGBMultiplyRow_SSE2(const uint8_t* src_argb,
|
||||||
const uint8_t* src_argb1,
|
const uint8_t* src_argb1,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width);
|
int width);
|
||||||
@ -3811,7 +3815,7 @@ void ARGBMultiplyRow_Any_SSE2(const uint8_t* y_buf,
|
|||||||
const uint8_t* uv_buf,
|
const uint8_t* uv_buf,
|
||||||
uint8_t* dst_ptr,
|
uint8_t* dst_ptr,
|
||||||
int width);
|
int width);
|
||||||
void ARGBMultiplyRow_AVX2(const uint8_t* src_argb0,
|
void ARGBMultiplyRow_AVX2(const uint8_t* src_argb,
|
||||||
const uint8_t* src_argb1,
|
const uint8_t* src_argb1,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width);
|
int width);
|
||||||
@ -3819,7 +3823,7 @@ void ARGBMultiplyRow_Any_AVX2(const uint8_t* y_buf,
|
|||||||
const uint8_t* uv_buf,
|
const uint8_t* uv_buf,
|
||||||
uint8_t* dst_ptr,
|
uint8_t* dst_ptr,
|
||||||
int width);
|
int width);
|
||||||
void ARGBMultiplyRow_NEON(const uint8_t* src_argb0,
|
void ARGBMultiplyRow_NEON(const uint8_t* src_argb,
|
||||||
const uint8_t* src_argb1,
|
const uint8_t* src_argb1,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width);
|
int width);
|
||||||
@ -3845,11 +3849,11 @@ void ARGBMultiplyRow_Any_MMI(const uint8_t* y_buf,
|
|||||||
int width);
|
int width);
|
||||||
|
|
||||||
// ARGB add images.
|
// ARGB add images.
|
||||||
void ARGBAddRow_C(const uint8_t* src_argb0,
|
void ARGBAddRow_C(const uint8_t* src_argb,
|
||||||
const uint8_t* src_argb1,
|
const uint8_t* src_argb1,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width);
|
int width);
|
||||||
void ARGBAddRow_SSE2(const uint8_t* src_argb0,
|
void ARGBAddRow_SSE2(const uint8_t* src_argb,
|
||||||
const uint8_t* src_argb1,
|
const uint8_t* src_argb1,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width);
|
int width);
|
||||||
@ -3857,7 +3861,7 @@ void ARGBAddRow_Any_SSE2(const uint8_t* y_buf,
|
|||||||
const uint8_t* uv_buf,
|
const uint8_t* uv_buf,
|
||||||
uint8_t* dst_ptr,
|
uint8_t* dst_ptr,
|
||||||
int width);
|
int width);
|
||||||
void ARGBAddRow_AVX2(const uint8_t* src_argb0,
|
void ARGBAddRow_AVX2(const uint8_t* src_argb,
|
||||||
const uint8_t* src_argb1,
|
const uint8_t* src_argb1,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width);
|
int width);
|
||||||
@ -3865,7 +3869,7 @@ void ARGBAddRow_Any_AVX2(const uint8_t* y_buf,
|
|||||||
const uint8_t* uv_buf,
|
const uint8_t* uv_buf,
|
||||||
uint8_t* dst_ptr,
|
uint8_t* dst_ptr,
|
||||||
int width);
|
int width);
|
||||||
void ARGBAddRow_NEON(const uint8_t* src_argb0,
|
void ARGBAddRow_NEON(const uint8_t* src_argb,
|
||||||
const uint8_t* src_argb1,
|
const uint8_t* src_argb1,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width);
|
int width);
|
||||||
@ -3892,11 +3896,11 @@ void ARGBAddRow_Any_MMI(const uint8_t* y_buf,
|
|||||||
|
|
||||||
// ARGB subtract images. Same API as Blend, but these require
|
// ARGB subtract images. Same API as Blend, but these require
|
||||||
// pointer and width alignment for SSE2.
|
// pointer and width alignment for SSE2.
|
||||||
void ARGBSubtractRow_C(const uint8_t* src_argb0,
|
void ARGBSubtractRow_C(const uint8_t* src_argb,
|
||||||
const uint8_t* src_argb1,
|
const uint8_t* src_argb1,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width);
|
int width);
|
||||||
void ARGBSubtractRow_SSE2(const uint8_t* src_argb0,
|
void ARGBSubtractRow_SSE2(const uint8_t* src_argb,
|
||||||
const uint8_t* src_argb1,
|
const uint8_t* src_argb1,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width);
|
int width);
|
||||||
@ -3904,7 +3908,7 @@ void ARGBSubtractRow_Any_SSE2(const uint8_t* y_buf,
|
|||||||
const uint8_t* uv_buf,
|
const uint8_t* uv_buf,
|
||||||
uint8_t* dst_ptr,
|
uint8_t* dst_ptr,
|
||||||
int width);
|
int width);
|
||||||
void ARGBSubtractRow_AVX2(const uint8_t* src_argb0,
|
void ARGBSubtractRow_AVX2(const uint8_t* src_argb,
|
||||||
const uint8_t* src_argb1,
|
const uint8_t* src_argb1,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width);
|
int width);
|
||||||
@ -3912,7 +3916,7 @@ void ARGBSubtractRow_Any_AVX2(const uint8_t* y_buf,
|
|||||||
const uint8_t* uv_buf,
|
const uint8_t* uv_buf,
|
||||||
uint8_t* dst_ptr,
|
uint8_t* dst_ptr,
|
||||||
int width);
|
int width);
|
||||||
void ARGBSubtractRow_NEON(const uint8_t* src_argb0,
|
void ARGBSubtractRow_NEON(const uint8_t* src_argb,
|
||||||
const uint8_t* src_argb1,
|
const uint8_t* src_argb1,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width);
|
int width);
|
||||||
@ -4119,9 +4123,9 @@ void NV21ToRGB24Row_Any_NEON(const uint8_t* y_buf,
|
|||||||
uint8_t* dst_ptr,
|
uint8_t* dst_ptr,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width);
|
int width);
|
||||||
void NV21ToYUV24Row_Any_NEON(const uint8_t* src_y,
|
void NV21ToYUV24Row_Any_NEON(const uint8_t* y_buf,
|
||||||
const uint8_t* src_vu,
|
const uint8_t* uv_buf,
|
||||||
uint8_t* dst_yuv24,
|
uint8_t* dst_ptr,
|
||||||
int width);
|
int width);
|
||||||
void NV12ToRGB565Row_Any_NEON(const uint8_t* y_buf,
|
void NV12ToRGB565Row_Any_NEON(const uint8_t* y_buf,
|
||||||
const uint8_t* uv_buf,
|
const uint8_t* uv_buf,
|
||||||
@ -4323,7 +4327,7 @@ void YUY2ToUV422Row_C(const uint8_t* src_yuy2,
|
|||||||
int width);
|
int width);
|
||||||
void YUY2ToYRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
void YUY2ToYRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||||
void YUY2ToUVRow_Any_AVX2(const uint8_t* src_ptr,
|
void YUY2ToUVRow_Any_AVX2(const uint8_t* src_ptr,
|
||||||
int src_stride_ptr,
|
int src_stride,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width);
|
int width);
|
||||||
@ -4333,7 +4337,7 @@ void YUY2ToUV422Row_Any_AVX2(const uint8_t* src_ptr,
|
|||||||
int width);
|
int width);
|
||||||
void YUY2ToYRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
void YUY2ToYRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||||
void YUY2ToUVRow_Any_SSE2(const uint8_t* src_ptr,
|
void YUY2ToUVRow_Any_SSE2(const uint8_t* src_ptr,
|
||||||
int src_stride_ptr,
|
int src_stride,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width);
|
int width);
|
||||||
@ -4343,7 +4347,7 @@ void YUY2ToUV422Row_Any_SSE2(const uint8_t* src_ptr,
|
|||||||
int width);
|
int width);
|
||||||
void YUY2ToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
void YUY2ToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||||
void YUY2ToUVRow_Any_NEON(const uint8_t* src_ptr,
|
void YUY2ToUVRow_Any_NEON(const uint8_t* src_ptr,
|
||||||
int src_stride_ptr,
|
int src_stride,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width);
|
int width);
|
||||||
@ -4444,7 +4448,7 @@ void UYVYToUV422Row_C(const uint8_t* src_uyvy,
|
|||||||
int width);
|
int width);
|
||||||
void UYVYToYRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
void UYVYToYRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||||
void UYVYToUVRow_Any_AVX2(const uint8_t* src_ptr,
|
void UYVYToUVRow_Any_AVX2(const uint8_t* src_ptr,
|
||||||
int src_stride_ptr,
|
int src_stride,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width);
|
int width);
|
||||||
@ -4454,7 +4458,7 @@ void UYVYToUV422Row_Any_AVX2(const uint8_t* src_ptr,
|
|||||||
int width);
|
int width);
|
||||||
void UYVYToYRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
void UYVYToYRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||||
void UYVYToUVRow_Any_SSE2(const uint8_t* src_ptr,
|
void UYVYToUVRow_Any_SSE2(const uint8_t* src_ptr,
|
||||||
int src_stride_ptr,
|
int src_stride,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width);
|
int width);
|
||||||
@ -4464,7 +4468,7 @@ void UYVYToUV422Row_Any_SSE2(const uint8_t* src_ptr,
|
|||||||
int width);
|
int width);
|
||||||
void UYVYToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
void UYVYToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||||
void UYVYToUVRow_Any_NEON(const uint8_t* src_ptr,
|
void UYVYToUVRow_Any_NEON(const uint8_t* src_ptr,
|
||||||
int src_stride_ptr,
|
int src_stride,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width);
|
int width);
|
||||||
@ -4501,29 +4505,29 @@ void SwapUVRow_AVX2(const uint8_t* src_uv, uint8_t* dst_vu, int width);
|
|||||||
void SwapUVRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
void SwapUVRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||||
void AYUVToYRow_C(const uint8_t* src_ayuv, uint8_t* dst_y, int width);
|
void AYUVToYRow_C(const uint8_t* src_ayuv, uint8_t* dst_y, int width);
|
||||||
void AYUVToUVRow_C(const uint8_t* src_ayuv,
|
void AYUVToUVRow_C(const uint8_t* src_ayuv,
|
||||||
int stride_ayuv,
|
int src_stride_ayuv,
|
||||||
uint8_t* dst_uv,
|
uint8_t* dst_uv,
|
||||||
int width);
|
int width);
|
||||||
void AYUVToVURow_C(const uint8_t* src_ayuv,
|
void AYUVToVURow_C(const uint8_t* src_ayuv,
|
||||||
int stride_ayuv,
|
int src_stride_ayuv,
|
||||||
uint8_t* dst_vu,
|
uint8_t* dst_vu,
|
||||||
int width);
|
int width);
|
||||||
void AYUVToYRow_NEON(const uint8_t* src_ayuv, uint8_t* dst_y, int width);
|
void AYUVToYRow_NEON(const uint8_t* src_ayuv, uint8_t* dst_y, int width);
|
||||||
void AYUVToUVRow_NEON(const uint8_t* src_ayuv,
|
void AYUVToUVRow_NEON(const uint8_t* src_ayuv,
|
||||||
int stride_ayuv,
|
int src_stride_ayuv,
|
||||||
uint8_t* dst_uv,
|
uint8_t* dst_uv,
|
||||||
int width);
|
int width);
|
||||||
void AYUVToVURow_NEON(const uint8_t* src_ayuv,
|
void AYUVToVURow_NEON(const uint8_t* src_ayuv,
|
||||||
int stride_ayuv,
|
int src_stride_ayuv,
|
||||||
uint8_t* dst_vu,
|
uint8_t* dst_vu,
|
||||||
int width);
|
int width);
|
||||||
void AYUVToYRow_Any_NEON(const uint8_t* src_ayuv, uint8_t* dst_y, int width);
|
void AYUVToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||||
void AYUVToUVRow_Any_NEON(const uint8_t* src_ayuv,
|
void AYUVToUVRow_Any_NEON(const uint8_t* src_ptr,
|
||||||
int stride_ayuv,
|
int src_stride,
|
||||||
uint8_t* dst_uv,
|
uint8_t* dst_vu,
|
||||||
int width);
|
int width);
|
||||||
void AYUVToVURow_Any_NEON(const uint8_t* src_ayuv,
|
void AYUVToVURow_Any_NEON(const uint8_t* src_ptr,
|
||||||
int stride_ayuv,
|
int src_stride,
|
||||||
uint8_t* dst_vu,
|
uint8_t* dst_vu,
|
||||||
int width);
|
int width);
|
||||||
|
|
||||||
|
|||||||
@ -11,6 +11,6 @@
|
|||||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||||
#define INCLUDE_LIBYUV_VERSION_H_
|
#define INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
#define LIBYUV_VERSION 1784
|
#define LIBYUV_VERSION 1785
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|||||||
@ -17,36 +17,6 @@ namespace libyuv {
|
|||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if ORIGINAL_OPT
|
|
||||||
uint32_t HammingDistance_C1(const uint8_t* src_a,
|
|
||||||
const uint8_t* src_b,
|
|
||||||
int count) {
|
|
||||||
uint32_t diff = 0u;
|
|
||||||
|
|
||||||
int i;
|
|
||||||
for (i = 0; i < count; ++i) {
|
|
||||||
int x = src_a[i] ^ src_b[i];
|
|
||||||
if (x & 1)
|
|
||||||
++diff;
|
|
||||||
if (x & 2)
|
|
||||||
++diff;
|
|
||||||
if (x & 4)
|
|
||||||
++diff;
|
|
||||||
if (x & 8)
|
|
||||||
++diff;
|
|
||||||
if (x & 16)
|
|
||||||
++diff;
|
|
||||||
if (x & 32)
|
|
||||||
++diff;
|
|
||||||
if (x & 64)
|
|
||||||
++diff;
|
|
||||||
if (x & 128)
|
|
||||||
++diff;
|
|
||||||
}
|
|
||||||
return diff;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Hakmem method for hamming distance.
|
// Hakmem method for hamming distance.
|
||||||
uint32_t HammingDistance_C(const uint8_t* src_a,
|
uint32_t HammingDistance_C(const uint8_t* src_a,
|
||||||
const uint8_t* src_b,
|
const uint8_t* src_b,
|
||||||
|
|||||||
@ -212,11 +212,23 @@ ANY41PT(MergeAR64Row_Any_NEON, MergeAR64Row_NEON, uint16_t, 2, uint16_t, 4, 7)
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_MERGEARGB16TO8ROW_AVX2
|
#ifdef HAS_MERGEARGB16TO8ROW_AVX2
|
||||||
ANY41PT(MergeARGB16To8Row_Any_AVX2, MergeARGB16To8Row_AVX2, uint16_t, 2, uint8_t, 4, 15)
|
ANY41PT(MergeARGB16To8Row_Any_AVX2,
|
||||||
|
MergeARGB16To8Row_AVX2,
|
||||||
|
uint16_t,
|
||||||
|
2,
|
||||||
|
uint8_t,
|
||||||
|
4,
|
||||||
|
15)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_MERGEARGB16TO8ROW_NEON
|
#ifdef HAS_MERGEARGB16TO8ROW_NEON
|
||||||
ANY41PT(MergeARGB16To8Row_Any_NEON, MergeARGB16To8Row_NEON, uint16_t, 2, uint8_t, 4, 7)
|
ANY41PT(MergeARGB16To8Row_Any_NEON,
|
||||||
|
MergeARGB16To8Row_NEON,
|
||||||
|
uint16_t,
|
||||||
|
2,
|
||||||
|
uint8_t,
|
||||||
|
4,
|
||||||
|
7)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#undef ANY41PT
|
#undef ANY41PT
|
||||||
@ -487,7 +499,13 @@ ANY31PT(MergeXR30Row_Any_AVX2, MergeXR30Row_AVX2, uint16_t, 2, uint8_t, 4, 15)
|
|||||||
|
|
||||||
#ifdef HAS_MERGEXR30ROW_NEON
|
#ifdef HAS_MERGEXR30ROW_NEON
|
||||||
ANY31PT(MergeXR30Row_Any_NEON, MergeXR30Row_NEON, uint16_t, 2, uint8_t, 4, 3)
|
ANY31PT(MergeXR30Row_Any_NEON, MergeXR30Row_NEON, uint16_t, 2, uint8_t, 4, 3)
|
||||||
ANY31PT(MergeXR30Row_10_Any_NEON, MergeXR30Row_10_NEON, uint16_t, 2, uint8_t, 4, 3)
|
ANY31PT(MergeXR30Row_10_Any_NEON,
|
||||||
|
MergeXR30Row_10_NEON,
|
||||||
|
uint16_t,
|
||||||
|
2,
|
||||||
|
uint8_t,
|
||||||
|
4,
|
||||||
|
3)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_MERGEXR64ROW_AVX2
|
#ifdef HAS_MERGEXR64ROW_AVX2
|
||||||
@ -499,11 +517,23 @@ ANY31PT(MergeXR64Row_Any_NEON, MergeXR64Row_NEON, uint16_t, 2, uint16_t, 4, 7)
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_MERGEXRGB16TO8ROW_AVX2
|
#ifdef HAS_MERGEXRGB16TO8ROW_AVX2
|
||||||
ANY31PT(MergeXRGB16To8Row_Any_AVX2, MergeXRGB16To8Row_AVX2, uint16_t, 2, uint8_t, 4, 15)
|
ANY31PT(MergeXRGB16To8Row_Any_AVX2,
|
||||||
|
MergeXRGB16To8Row_AVX2,
|
||||||
|
uint16_t,
|
||||||
|
2,
|
||||||
|
uint8_t,
|
||||||
|
4,
|
||||||
|
15)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_MERGEXRGB16TO8ROW_NEON
|
#ifdef HAS_MERGEXRGB16TO8ROW_NEON
|
||||||
ANY31PT(MergeXRGB16To8Row_Any_NEON, MergeXRGB16To8Row_NEON, uint16_t, 2, uint8_t, 4, 7)
|
ANY31PT(MergeXRGB16To8Row_Any_NEON,
|
||||||
|
MergeXRGB16To8Row_NEON,
|
||||||
|
uint16_t,
|
||||||
|
2,
|
||||||
|
uint8_t,
|
||||||
|
4,
|
||||||
|
7)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#undef ANY31PT
|
#undef ANY31PT
|
||||||
@ -1553,20 +1583,20 @@ ANY11C(UYVYToARGBRow_Any_MMI, UYVYToARGBRow_MMI, 1, 4, 4, 7)
|
|||||||
#undef ANY11C
|
#undef ANY11C
|
||||||
|
|
||||||
// Any 1 to 1 interpolate. Takes 2 rows of source via stride.
|
// Any 1 to 1 interpolate. Takes 2 rows of source via stride.
|
||||||
#define ANY11I(NAMEANY, ANY_SIMD, SBPP, BPP, MASK) \
|
#define ANY11I(NAMEANY, ANY_SIMD, SBPP, BPP, MASK) \
|
||||||
void NAMEANY(uint8_t* dst_ptr, const uint8_t* src_ptr, \
|
void NAMEANY(uint8_t* dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride, \
|
||||||
ptrdiff_t src_stride_ptr, int width, int source_y_fraction) { \
|
int width, int source_y_fraction) { \
|
||||||
SIMD_ALIGNED(uint8_t temp[64 * 3]); \
|
SIMD_ALIGNED(uint8_t temp[64 * 3]); \
|
||||||
memset(temp, 0, 64 * 2); /* for msan */ \
|
memset(temp, 0, 64 * 2); /* for msan */ \
|
||||||
int r = width & MASK; \
|
int r = width & MASK; \
|
||||||
int n = width & ~MASK; \
|
int n = width & ~MASK; \
|
||||||
if (n > 0) { \
|
if (n > 0) { \
|
||||||
ANY_SIMD(dst_ptr, src_ptr, src_stride_ptr, n, source_y_fraction); \
|
ANY_SIMD(dst_ptr, src_ptr, src_stride, n, source_y_fraction); \
|
||||||
} \
|
} \
|
||||||
memcpy(temp, src_ptr + n * SBPP, r * SBPP); \
|
memcpy(temp, src_ptr + n * SBPP, r * SBPP); \
|
||||||
memcpy(temp + 64, src_ptr + src_stride_ptr + n * SBPP, r * SBPP); \
|
memcpy(temp + 64, src_ptr + src_stride + n * SBPP, r * SBPP); \
|
||||||
ANY_SIMD(temp + 128, temp, 64, MASK + 1, source_y_fraction); \
|
ANY_SIMD(temp + 128, temp, 64, MASK + 1, source_y_fraction); \
|
||||||
memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
|
memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAS_INTERPOLATEROW_AVX2
|
#ifdef HAS_INTERPOLATEROW_AVX2
|
||||||
@ -1844,17 +1874,17 @@ ANY14(SplitARGBRow_Any_NEON, SplitARGBRow_NEON, 4, 15)
|
|||||||
// Any 1 to 2 with source stride (2 rows of source). Outputs UV planes.
|
// Any 1 to 2 with source stride (2 rows of source). Outputs UV planes.
|
||||||
// 128 byte row allows for 32 avx ARGB pixels.
|
// 128 byte row allows for 32 avx ARGB pixels.
|
||||||
#define ANY12S(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK) \
|
#define ANY12S(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK) \
|
||||||
void NAMEANY(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, \
|
void NAMEANY(const uint8_t* src_ptr, int src_stride, uint8_t* dst_u, \
|
||||||
uint8_t* dst_v, int width) { \
|
uint8_t* dst_v, int width) { \
|
||||||
SIMD_ALIGNED(uint8_t temp[128 * 4]); \
|
SIMD_ALIGNED(uint8_t temp[128 * 4]); \
|
||||||
memset(temp, 0, 128 * 2); /* for msan */ \
|
memset(temp, 0, 128 * 2); /* for msan */ \
|
||||||
int r = width & MASK; \
|
int r = width & MASK; \
|
||||||
int n = width & ~MASK; \
|
int n = width & ~MASK; \
|
||||||
if (n > 0) { \
|
if (n > 0) { \
|
||||||
ANY_SIMD(src_ptr, src_stride_ptr, dst_u, dst_v, n); \
|
ANY_SIMD(src_ptr, src_stride, dst_u, dst_v, n); \
|
||||||
} \
|
} \
|
||||||
memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
|
memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
|
||||||
memcpy(temp + 128, src_ptr + src_stride_ptr + (n >> UVSHIFT) * BPP, \
|
memcpy(temp + 128, src_ptr + src_stride + (n >> UVSHIFT) * BPP, \
|
||||||
SS(r, UVSHIFT) * BPP); \
|
SS(r, UVSHIFT) * BPP); \
|
||||||
if ((width & 1) && UVSHIFT == 0) { /* repeat last pixel for subsample */ \
|
if ((width & 1) && UVSHIFT == 0) { /* repeat last pixel for subsample */ \
|
||||||
memcpy(temp + SS(r, UVSHIFT) * BPP, temp + SS(r, UVSHIFT) * BPP - BPP, \
|
memcpy(temp + SS(r, UVSHIFT) * BPP, temp + SS(r, UVSHIFT) * BPP - BPP, \
|
||||||
@ -2001,17 +2031,17 @@ ANY12S(UYVYToUVRow_Any_MMI, UYVYToUVRow_MMI, 1, 4, 15)
|
|||||||
// Any 1 to 1 with source stride (2 rows of source). Outputs UV plane.
|
// Any 1 to 1 with source stride (2 rows of source). Outputs UV plane.
|
||||||
// 128 byte row allows for 32 avx ARGB pixels.
|
// 128 byte row allows for 32 avx ARGB pixels.
|
||||||
#define ANY11S(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK) \
|
#define ANY11S(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK) \
|
||||||
void NAMEANY(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_vu, \
|
void NAMEANY(const uint8_t* src_ptr, int src_stride, uint8_t* dst_vu, \
|
||||||
int width) { \
|
int width) { \
|
||||||
SIMD_ALIGNED(uint8_t temp[128 * 3]); \
|
SIMD_ALIGNED(uint8_t temp[128 * 3]); \
|
||||||
memset(temp, 0, 128 * 2); /* for msan */ \
|
memset(temp, 0, 128 * 2); /* for msan */ \
|
||||||
int r = width & MASK; \
|
int r = width & MASK; \
|
||||||
int n = width & ~MASK; \
|
int n = width & ~MASK; \
|
||||||
if (n > 0) { \
|
if (n > 0) { \
|
||||||
ANY_SIMD(src_ptr, src_stride_ptr, dst_vu, n); \
|
ANY_SIMD(src_ptr, src_stride, dst_vu, n); \
|
||||||
} \
|
} \
|
||||||
memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
|
memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
|
||||||
memcpy(temp + 128, src_ptr + src_stride_ptr + (n >> UVSHIFT) * BPP, \
|
memcpy(temp + 128, src_ptr + src_stride + (n >> UVSHIFT) * BPP, \
|
||||||
SS(r, UVSHIFT) * BPP); \
|
SS(r, UVSHIFT) * BPP); \
|
||||||
if ((width & 1) && UVSHIFT == 0) { /* repeat last pixel for subsample */ \
|
if ((width & 1) && UVSHIFT == 0) { /* repeat last pixel for subsample */ \
|
||||||
memcpy(temp + SS(r, UVSHIFT) * BPP, temp + SS(r, UVSHIFT) * BPP - BPP, \
|
memcpy(temp + SS(r, UVSHIFT) * BPP, temp + SS(r, UVSHIFT) * BPP - BPP, \
|
||||||
|
|||||||
@ -553,80 +553,80 @@ static __inline int RGB2xToV(uint16_t r, uint16_t g, uint16_t b) {
|
|||||||
// Intel version mimic SSE/AVX which does 2 pavgb
|
// Intel version mimic SSE/AVX which does 2 pavgb
|
||||||
#if LIBYUV_ARGBTOUV_PAVGB
|
#if LIBYUV_ARGBTOUV_PAVGB
|
||||||
|
|
||||||
#define MAKEROWY(NAME, R, G, B, BPP) \
|
#define MAKEROWY(NAME, R, G, B, BPP) \
|
||||||
void NAME##ToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width) { \
|
void NAME##ToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \
|
||||||
int x; \
|
int x; \
|
||||||
for (x = 0; x < width; ++x) { \
|
for (x = 0; x < width; ++x) { \
|
||||||
dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]); \
|
dst_y[0] = RGBToY(src_rgb[R], src_rgb[G], src_rgb[B]); \
|
||||||
src_argb0 += BPP; \
|
src_rgb += BPP; \
|
||||||
dst_y += 1; \
|
dst_y += 1; \
|
||||||
} \
|
} \
|
||||||
} \
|
} \
|
||||||
void NAME##ToUVRow_C(const uint8_t* src_rgb0, int src_stride_rgb, \
|
void NAME##ToUVRow_C(const uint8_t* src_rgb, int src_stride_rgb, \
|
||||||
uint8_t* dst_u, uint8_t* dst_v, int width) { \
|
uint8_t* dst_u, uint8_t* dst_v, int width) { \
|
||||||
const uint8_t* src_rgb1 = src_rgb0 + src_stride_rgb; \
|
const uint8_t* src_rgb1 = src_rgb + src_stride_rgb; \
|
||||||
int x; \
|
int x; \
|
||||||
for (x = 0; x < width - 1; x += 2) { \
|
for (x = 0; x < width - 1; x += 2) { \
|
||||||
uint8_t ab = AVGB(AVGB(src_rgb0[B], src_rgb1[B]), \
|
uint8_t ab = AVGB(AVGB(src_rgb[B], src_rgb1[B]), \
|
||||||
AVGB(src_rgb0[B + BPP], src_rgb1[B + BPP])); \
|
AVGB(src_rgb[B + BPP], src_rgb1[B + BPP])); \
|
||||||
uint8_t ag = AVGB(AVGB(src_rgb0[G], src_rgb1[G]), \
|
uint8_t ag = AVGB(AVGB(src_rgb[G], src_rgb1[G]), \
|
||||||
AVGB(src_rgb0[G + BPP], src_rgb1[G + BPP])); \
|
AVGB(src_rgb[G + BPP], src_rgb1[G + BPP])); \
|
||||||
uint8_t ar = AVGB(AVGB(src_rgb0[R], src_rgb1[R]), \
|
uint8_t ar = AVGB(AVGB(src_rgb[R], src_rgb1[R]), \
|
||||||
AVGB(src_rgb0[R + BPP], src_rgb1[R + BPP])); \
|
AVGB(src_rgb[R + BPP], src_rgb1[R + BPP])); \
|
||||||
dst_u[0] = RGBToU(ar, ag, ab); \
|
dst_u[0] = RGBToU(ar, ag, ab); \
|
||||||
dst_v[0] = RGBToV(ar, ag, ab); \
|
dst_v[0] = RGBToV(ar, ag, ab); \
|
||||||
src_rgb0 += BPP * 2; \
|
src_rgb += BPP * 2; \
|
||||||
src_rgb1 += BPP * 2; \
|
src_rgb1 += BPP * 2; \
|
||||||
dst_u += 1; \
|
dst_u += 1; \
|
||||||
dst_v += 1; \
|
dst_v += 1; \
|
||||||
} \
|
} \
|
||||||
if (width & 1) { \
|
if (width & 1) { \
|
||||||
uint8_t ab = AVGB(src_rgb0[B], src_rgb1[B]); \
|
uint8_t ab = AVGB(src_rgb[B], src_rgb1[B]); \
|
||||||
uint8_t ag = AVGB(src_rgb0[G], src_rgb1[G]); \
|
uint8_t ag = AVGB(src_rgb[G], src_rgb1[G]); \
|
||||||
uint8_t ar = AVGB(src_rgb0[R], src_rgb1[R]); \
|
uint8_t ar = AVGB(src_rgb[R], src_rgb1[R]); \
|
||||||
dst_u[0] = RGBToU(ar, ag, ab); \
|
dst_u[0] = RGBToU(ar, ag, ab); \
|
||||||
dst_v[0] = RGBToV(ar, ag, ab); \
|
dst_v[0] = RGBToV(ar, ag, ab); \
|
||||||
} \
|
} \
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
// ARM version does sum / 2 then multiply by 2x smaller coefficients
|
// ARM version does sum / 2 then multiply by 2x smaller coefficients
|
||||||
#define MAKEROWY(NAME, R, G, B, BPP) \
|
#define MAKEROWY(NAME, R, G, B, BPP) \
|
||||||
void NAME##ToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width) { \
|
void NAME##ToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \
|
||||||
int x; \
|
int x; \
|
||||||
for (x = 0; x < width; ++x) { \
|
for (x = 0; x < width; ++x) { \
|
||||||
dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]); \
|
dst_y[0] = RGBToY(src_rgb[R], src_rgb[G], src_rgb[B]); \
|
||||||
src_argb0 += BPP; \
|
src_rgb += BPP; \
|
||||||
dst_y += 1; \
|
dst_y += 1; \
|
||||||
} \
|
} \
|
||||||
} \
|
} \
|
||||||
void NAME##ToUVRow_C(const uint8_t* src_rgb0, int src_stride_rgb, \
|
void NAME##ToUVRow_C(const uint8_t* src_rgb, int src_stride_rgb, \
|
||||||
uint8_t* dst_u, uint8_t* dst_v, int width) { \
|
uint8_t* dst_u, uint8_t* dst_v, int width) { \
|
||||||
const uint8_t* src_rgb1 = src_rgb0 + src_stride_rgb; \
|
const uint8_t* src_rgb1 = src_rgb + src_stride_rgb; \
|
||||||
int x; \
|
int x; \
|
||||||
for (x = 0; x < width - 1; x += 2) { \
|
for (x = 0; x < width - 1; x += 2) { \
|
||||||
uint16_t ab = (src_rgb0[B] + src_rgb0[B + BPP] + src_rgb1[B] + \
|
uint16_t ab = (src_rgb[B] + src_rgb[B + BPP] + src_rgb1[B] + \
|
||||||
src_rgb1[B + BPP] + 1) >> \
|
src_rgb1[B + BPP] + 1) >> \
|
||||||
1; \
|
1; \
|
||||||
uint16_t ag = (src_rgb0[G] + src_rgb0[G + BPP] + src_rgb1[G] + \
|
uint16_t ag = (src_rgb[G] + src_rgb[G + BPP] + src_rgb1[G] + \
|
||||||
src_rgb1[G + BPP] + 1) >> \
|
src_rgb1[G + BPP] + 1) >> \
|
||||||
1; \
|
1; \
|
||||||
uint16_t ar = (src_rgb0[R] + src_rgb0[R + BPP] + src_rgb1[R] + \
|
uint16_t ar = (src_rgb[R] + src_rgb[R + BPP] + src_rgb1[R] + \
|
||||||
src_rgb1[R + BPP] + 1) >> \
|
src_rgb1[R + BPP] + 1) >> \
|
||||||
1; \
|
1; \
|
||||||
dst_u[0] = RGB2xToU(ar, ag, ab); \
|
dst_u[0] = RGB2xToU(ar, ag, ab); \
|
||||||
dst_v[0] = RGB2xToV(ar, ag, ab); \
|
dst_v[0] = RGB2xToV(ar, ag, ab); \
|
||||||
src_rgb0 += BPP * 2; \
|
src_rgb += BPP * 2; \
|
||||||
src_rgb1 += BPP * 2; \
|
src_rgb1 += BPP * 2; \
|
||||||
dst_u += 1; \
|
dst_u += 1; \
|
||||||
dst_v += 1; \
|
dst_v += 1; \
|
||||||
} \
|
} \
|
||||||
if (width & 1) { \
|
if (width & 1) { \
|
||||||
uint16_t ab = src_rgb0[B] + src_rgb1[B]; \
|
uint16_t ab = src_rgb[B] + src_rgb1[B]; \
|
||||||
uint16_t ag = src_rgb0[G] + src_rgb1[G]; \
|
uint16_t ag = src_rgb[G] + src_rgb1[G]; \
|
||||||
uint16_t ar = src_rgb0[R] + src_rgb1[R]; \
|
uint16_t ar = src_rgb[R] + src_rgb1[R]; \
|
||||||
dst_u[0] = RGB2xToU(ar, ag, ab); \
|
dst_u[0] = RGB2xToU(ar, ag, ab); \
|
||||||
dst_v[0] = RGB2xToV(ar, ag, ab); \
|
dst_v[0] = RGB2xToV(ar, ag, ab); \
|
||||||
} \
|
} \
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -694,80 +694,80 @@ static __inline int RGB2xToVJ(uint16_t r, uint16_t g, uint16_t b) {
|
|||||||
// ARGBToYJ_C and ARGBToUVJ_C
|
// ARGBToYJ_C and ARGBToUVJ_C
|
||||||
// Intel version mimic SSE/AVX which does 2 pavgb
|
// Intel version mimic SSE/AVX which does 2 pavgb
|
||||||
#if LIBYUV_ARGBTOUV_PAVGB
|
#if LIBYUV_ARGBTOUV_PAVGB
|
||||||
#define MAKEROWYJ(NAME, R, G, B, BPP) \
|
#define MAKEROWYJ(NAME, R, G, B, BPP) \
|
||||||
void NAME##ToYJRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width) { \
|
void NAME##ToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \
|
||||||
int x; \
|
int x; \
|
||||||
for (x = 0; x < width; ++x) { \
|
for (x = 0; x < width; ++x) { \
|
||||||
dst_y[0] = RGBToYJ(src_argb0[R], src_argb0[G], src_argb0[B]); \
|
dst_y[0] = RGBToYJ(src_rgb[R], src_rgb[G], src_rgb[B]); \
|
||||||
src_argb0 += BPP; \
|
src_rgb += BPP; \
|
||||||
dst_y += 1; \
|
dst_y += 1; \
|
||||||
} \
|
} \
|
||||||
} \
|
} \
|
||||||
void NAME##ToUVJRow_C(const uint8_t* src_rgb0, int src_stride_rgb, \
|
void NAME##ToUVJRow_C(const uint8_t* src_rgb, int src_stride_rgb, \
|
||||||
uint8_t* dst_u, uint8_t* dst_v, int width) { \
|
uint8_t* dst_u, uint8_t* dst_v, int width) { \
|
||||||
const uint8_t* src_rgb1 = src_rgb0 + src_stride_rgb; \
|
const uint8_t* src_rgb1 = src_rgb + src_stride_rgb; \
|
||||||
int x; \
|
int x; \
|
||||||
for (x = 0; x < width - 1; x += 2) { \
|
for (x = 0; x < width - 1; x += 2) { \
|
||||||
uint8_t ab = AVGB(AVGB(src_rgb0[B], src_rgb1[B]), \
|
uint8_t ab = AVGB(AVGB(src_rgb[B], src_rgb1[B]), \
|
||||||
AVGB(src_rgb0[B + BPP], src_rgb1[B + BPP])); \
|
AVGB(src_rgb[B + BPP], src_rgb1[B + BPP])); \
|
||||||
uint8_t ag = AVGB(AVGB(src_rgb0[G], src_rgb1[G]), \
|
uint8_t ag = AVGB(AVGB(src_rgb[G], src_rgb1[G]), \
|
||||||
AVGB(src_rgb0[G + BPP], src_rgb1[G + BPP])); \
|
AVGB(src_rgb[G + BPP], src_rgb1[G + BPP])); \
|
||||||
uint8_t ar = AVGB(AVGB(src_rgb0[R], src_rgb1[R]), \
|
uint8_t ar = AVGB(AVGB(src_rgb[R], src_rgb1[R]), \
|
||||||
AVGB(src_rgb0[R + BPP], src_rgb1[R + BPP])); \
|
AVGB(src_rgb[R + BPP], src_rgb1[R + BPP])); \
|
||||||
dst_u[0] = RGBToUJ(ar, ag, ab); \
|
dst_u[0] = RGBToUJ(ar, ag, ab); \
|
||||||
dst_v[0] = RGBToVJ(ar, ag, ab); \
|
dst_v[0] = RGBToVJ(ar, ag, ab); \
|
||||||
src_rgb0 += BPP * 2; \
|
src_rgb += BPP * 2; \
|
||||||
src_rgb1 += BPP * 2; \
|
src_rgb1 += BPP * 2; \
|
||||||
dst_u += 1; \
|
dst_u += 1; \
|
||||||
dst_v += 1; \
|
dst_v += 1; \
|
||||||
} \
|
} \
|
||||||
if (width & 1) { \
|
if (width & 1) { \
|
||||||
uint8_t ab = AVGB(src_rgb0[B], src_rgb1[B]); \
|
uint8_t ab = AVGB(src_rgb[B], src_rgb1[B]); \
|
||||||
uint8_t ag = AVGB(src_rgb0[G], src_rgb1[G]); \
|
uint8_t ag = AVGB(src_rgb[G], src_rgb1[G]); \
|
||||||
uint8_t ar = AVGB(src_rgb0[R], src_rgb1[R]); \
|
uint8_t ar = AVGB(src_rgb[R], src_rgb1[R]); \
|
||||||
dst_u[0] = RGBToUJ(ar, ag, ab); \
|
dst_u[0] = RGBToUJ(ar, ag, ab); \
|
||||||
dst_v[0] = RGBToVJ(ar, ag, ab); \
|
dst_v[0] = RGBToVJ(ar, ag, ab); \
|
||||||
} \
|
} \
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
// ARM version does sum / 2 then multiply by 2x smaller coefficients
|
// ARM version does sum / 2 then multiply by 2x smaller coefficients
|
||||||
#define MAKEROWYJ(NAME, R, G, B, BPP) \
|
#define MAKEROWYJ(NAME, R, G, B, BPP) \
|
||||||
void NAME##ToYJRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width) { \
|
void NAME##ToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \
|
||||||
int x; \
|
int x; \
|
||||||
for (x = 0; x < width; ++x) { \
|
for (x = 0; x < width; ++x) { \
|
||||||
dst_y[0] = RGBToYJ(src_argb0[R], src_argb0[G], src_argb0[B]); \
|
dst_y[0] = RGBToYJ(src_rgb[R], src_rgb[G], src_rgb[B]); \
|
||||||
src_argb0 += BPP; \
|
src_rgb += BPP; \
|
||||||
dst_y += 1; \
|
dst_y += 1; \
|
||||||
} \
|
} \
|
||||||
} \
|
} \
|
||||||
void NAME##ToUVJRow_C(const uint8_t* src_rgb0, int src_stride_rgb, \
|
void NAME##ToUVJRow_C(const uint8_t* src_rgb, int src_stride_rgb, \
|
||||||
uint8_t* dst_u, uint8_t* dst_v, int width) { \
|
uint8_t* dst_u, uint8_t* dst_v, int width) { \
|
||||||
const uint8_t* src_rgb1 = src_rgb0 + src_stride_rgb; \
|
const uint8_t* src_rgb1 = src_rgb + src_stride_rgb; \
|
||||||
int x; \
|
int x; \
|
||||||
for (x = 0; x < width - 1; x += 2) { \
|
for (x = 0; x < width - 1; x += 2) { \
|
||||||
uint16_t ab = (src_rgb0[B] + src_rgb0[B + BPP] + src_rgb1[B] + \
|
uint16_t ab = (src_rgb[B] + src_rgb[B + BPP] + src_rgb1[B] + \
|
||||||
src_rgb1[B + BPP] + 1) >> \
|
src_rgb1[B + BPP] + 1) >> \
|
||||||
1; \
|
1; \
|
||||||
uint16_t ag = (src_rgb0[G] + src_rgb0[G + BPP] + src_rgb1[G] + \
|
uint16_t ag = (src_rgb[G] + src_rgb[G + BPP] + src_rgb1[G] + \
|
||||||
src_rgb1[G + BPP] + 1) >> \
|
src_rgb1[G + BPP] + 1) >> \
|
||||||
1; \
|
1; \
|
||||||
uint16_t ar = (src_rgb0[R] + src_rgb0[R + BPP] + src_rgb1[R] + \
|
uint16_t ar = (src_rgb[R] + src_rgb[R + BPP] + src_rgb1[R] + \
|
||||||
src_rgb1[R + BPP] + 1) >> \
|
src_rgb1[R + BPP] + 1) >> \
|
||||||
1; \
|
1; \
|
||||||
dst_u[0] = RGB2xToUJ(ar, ag, ab); \
|
dst_u[0] = RGB2xToUJ(ar, ag, ab); \
|
||||||
dst_v[0] = RGB2xToVJ(ar, ag, ab); \
|
dst_v[0] = RGB2xToVJ(ar, ag, ab); \
|
||||||
src_rgb0 += BPP * 2; \
|
src_rgb += BPP * 2; \
|
||||||
src_rgb1 += BPP * 2; \
|
src_rgb1 += BPP * 2; \
|
||||||
dst_u += 1; \
|
dst_u += 1; \
|
||||||
dst_v += 1; \
|
dst_v += 1; \
|
||||||
} \
|
} \
|
||||||
if (width & 1) { \
|
if (width & 1) { \
|
||||||
uint16_t ab = (src_rgb0[B] + src_rgb1[B]); \
|
uint16_t ab = (src_rgb[B] + src_rgb1[B]); \
|
||||||
uint16_t ag = (src_rgb0[G] + src_rgb1[G]); \
|
uint16_t ag = (src_rgb[G] + src_rgb1[G]); \
|
||||||
uint16_t ar = (src_rgb0[R] + src_rgb1[R]); \
|
uint16_t ar = (src_rgb[R] + src_rgb1[R]); \
|
||||||
dst_u[0] = RGB2xToUJ(ar, ag, ab); \
|
dst_u[0] = RGB2xToUJ(ar, ag, ab); \
|
||||||
dst_v[0] = RGB2xToVJ(ar, ag, ab); \
|
dst_v[0] = RGB2xToVJ(ar, ag, ab); \
|
||||||
} \
|
} \
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
@ -1237,16 +1237,16 @@ void ARGBShadeRow_C(const uint8_t* src_argb,
|
|||||||
#define REPEAT8(v) (v) | ((v) << 8)
|
#define REPEAT8(v) (v) | ((v) << 8)
|
||||||
#define SHADE(f, v) v* f >> 16
|
#define SHADE(f, v) v* f >> 16
|
||||||
|
|
||||||
void ARGBMultiplyRow_C(const uint8_t* src_argb0,
|
void ARGBMultiplyRow_C(const uint8_t* src_argb,
|
||||||
const uint8_t* src_argb1,
|
const uint8_t* src_argb1,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width) {
|
int width) {
|
||||||
int i;
|
int i;
|
||||||
for (i = 0; i < width; ++i) {
|
for (i = 0; i < width; ++i) {
|
||||||
const uint32_t b = REPEAT8(src_argb0[0]);
|
const uint32_t b = REPEAT8(src_argb[0]);
|
||||||
const uint32_t g = REPEAT8(src_argb0[1]);
|
const uint32_t g = REPEAT8(src_argb[1]);
|
||||||
const uint32_t r = REPEAT8(src_argb0[2]);
|
const uint32_t r = REPEAT8(src_argb[2]);
|
||||||
const uint32_t a = REPEAT8(src_argb0[3]);
|
const uint32_t a = REPEAT8(src_argb[3]);
|
||||||
const uint32_t b_scale = src_argb1[0];
|
const uint32_t b_scale = src_argb1[0];
|
||||||
const uint32_t g_scale = src_argb1[1];
|
const uint32_t g_scale = src_argb1[1];
|
||||||
const uint32_t r_scale = src_argb1[2];
|
const uint32_t r_scale = src_argb1[2];
|
||||||
@ -1255,7 +1255,7 @@ void ARGBMultiplyRow_C(const uint8_t* src_argb0,
|
|||||||
dst_argb[1] = SHADE(g, g_scale);
|
dst_argb[1] = SHADE(g, g_scale);
|
||||||
dst_argb[2] = SHADE(r, r_scale);
|
dst_argb[2] = SHADE(r, r_scale);
|
||||||
dst_argb[3] = SHADE(a, a_scale);
|
dst_argb[3] = SHADE(a, a_scale);
|
||||||
src_argb0 += 4;
|
src_argb += 4;
|
||||||
src_argb1 += 4;
|
src_argb1 += 4;
|
||||||
dst_argb += 4;
|
dst_argb += 4;
|
||||||
}
|
}
|
||||||
@ -1265,16 +1265,16 @@ void ARGBMultiplyRow_C(const uint8_t* src_argb0,
|
|||||||
|
|
||||||
#define SHADE(f, v) clamp255(v + f)
|
#define SHADE(f, v) clamp255(v + f)
|
||||||
|
|
||||||
void ARGBAddRow_C(const uint8_t* src_argb0,
|
void ARGBAddRow_C(const uint8_t* src_argb,
|
||||||
const uint8_t* src_argb1,
|
const uint8_t* src_argb1,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width) {
|
int width) {
|
||||||
int i;
|
int i;
|
||||||
for (i = 0; i < width; ++i) {
|
for (i = 0; i < width; ++i) {
|
||||||
const int b = src_argb0[0];
|
const int b = src_argb[0];
|
||||||
const int g = src_argb0[1];
|
const int g = src_argb[1];
|
||||||
const int r = src_argb0[2];
|
const int r = src_argb[2];
|
||||||
const int a = src_argb0[3];
|
const int a = src_argb[3];
|
||||||
const int b_add = src_argb1[0];
|
const int b_add = src_argb1[0];
|
||||||
const int g_add = src_argb1[1];
|
const int g_add = src_argb1[1];
|
||||||
const int r_add = src_argb1[2];
|
const int r_add = src_argb1[2];
|
||||||
@ -1283,7 +1283,7 @@ void ARGBAddRow_C(const uint8_t* src_argb0,
|
|||||||
dst_argb[1] = SHADE(g, g_add);
|
dst_argb[1] = SHADE(g, g_add);
|
||||||
dst_argb[2] = SHADE(r, r_add);
|
dst_argb[2] = SHADE(r, r_add);
|
||||||
dst_argb[3] = SHADE(a, a_add);
|
dst_argb[3] = SHADE(a, a_add);
|
||||||
src_argb0 += 4;
|
src_argb += 4;
|
||||||
src_argb1 += 4;
|
src_argb1 += 4;
|
||||||
dst_argb += 4;
|
dst_argb += 4;
|
||||||
}
|
}
|
||||||
@ -1292,16 +1292,16 @@ void ARGBAddRow_C(const uint8_t* src_argb0,
|
|||||||
|
|
||||||
#define SHADE(f, v) clamp0(f - v)
|
#define SHADE(f, v) clamp0(f - v)
|
||||||
|
|
||||||
void ARGBSubtractRow_C(const uint8_t* src_argb0,
|
void ARGBSubtractRow_C(const uint8_t* src_argb,
|
||||||
const uint8_t* src_argb1,
|
const uint8_t* src_argb1,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width) {
|
int width) {
|
||||||
int i;
|
int i;
|
||||||
for (i = 0; i < width; ++i) {
|
for (i = 0; i < width; ++i) {
|
||||||
const int b = src_argb0[0];
|
const int b = src_argb[0];
|
||||||
const int g = src_argb0[1];
|
const int g = src_argb[1];
|
||||||
const int r = src_argb0[2];
|
const int r = src_argb[2];
|
||||||
const int a = src_argb0[3];
|
const int a = src_argb[3];
|
||||||
const int b_sub = src_argb1[0];
|
const int b_sub = src_argb1[0];
|
||||||
const int g_sub = src_argb1[1];
|
const int g_sub = src_argb1[1];
|
||||||
const int r_sub = src_argb1[2];
|
const int r_sub = src_argb1[2];
|
||||||
@ -1310,7 +1310,7 @@ void ARGBSubtractRow_C(const uint8_t* src_argb0,
|
|||||||
dst_argb[1] = SHADE(g, g_sub);
|
dst_argb[1] = SHADE(g, g_sub);
|
||||||
dst_argb[2] = SHADE(r, r_sub);
|
dst_argb[2] = SHADE(r, r_sub);
|
||||||
dst_argb[3] = SHADE(a, a_sub);
|
dst_argb[3] = SHADE(a, a_sub);
|
||||||
src_argb0 += 4;
|
src_argb += 4;
|
||||||
src_argb1 += 4;
|
src_argb1 += 4;
|
||||||
dst_argb += 4;
|
dst_argb += 4;
|
||||||
}
|
}
|
||||||
@ -1486,7 +1486,7 @@ void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width) {
|
|||||||
// KR = 0.299; KB = 0.114
|
// KR = 0.299; KB = 0.114
|
||||||
|
|
||||||
// U and V contributions to R,G,B.
|
// U and V contributions to R,G,B.
|
||||||
#if defined(LIBYUV_UNLIMITED_DATA)
|
#if LIBYUV_UNLIMITED_DATA
|
||||||
#define UB 129 /* round(2.018 * 64) */
|
#define UB 129 /* round(2.018 * 64) */
|
||||||
#else
|
#else
|
||||||
#define UB 128 /* max(128, round(2.018 * 64)) */
|
#define UB 128 /* max(128, round(2.018 * 64)) */
|
||||||
@ -1540,7 +1540,7 @@ MAKEYUVCONSTANTS(JPEG, YG, YB, UB, UG, VG, VR, BB, BG, BR)
|
|||||||
// KR = 0.2126, KB = 0.0722
|
// KR = 0.2126, KB = 0.0722
|
||||||
|
|
||||||
// U and V contributions to R,G,B.
|
// U and V contributions to R,G,B.
|
||||||
#if defined(LIBYUV_UNLIMITED_DATA)
|
#if LIBYUV_UNLIMITED_DATA
|
||||||
#define UB 135 /* round(2.112 * 64) */
|
#define UB 135 /* round(2.112 * 64) */
|
||||||
#else
|
#else
|
||||||
#define UB 128 /* max(128, round(2.112 * 64)) */
|
#define UB 128 /* max(128, round(2.112 * 64)) */
|
||||||
@ -1594,7 +1594,7 @@ MAKEYUVCONSTANTS(F709, YG, YB, UB, UG, VG, VR, BB, BG, BR)
|
|||||||
// KR = 0.2627; KB = 0.0593
|
// KR = 0.2627; KB = 0.0593
|
||||||
|
|
||||||
// U and V contributions to R,G,B.
|
// U and V contributions to R,G,B.
|
||||||
#if defined(LIBYUV_UNLIMITED_DATA)
|
#if LIBYUV_UNLIMITED_DATA
|
||||||
#define UB 137 /* round(2.142 * 64) */
|
#define UB 137 /* round(2.142 * 64) */
|
||||||
#else
|
#else
|
||||||
#define UB 128 /* max(128, round(2.142 * 64)) */
|
#define UB 128 /* max(128, round(2.142 * 64)) */
|
||||||
@ -1646,7 +1646,7 @@ MAKEYUVCONSTANTS(V2020, YG, YB, UB, UG, VG, VR, BB, BG, BR)
|
|||||||
|
|
||||||
#undef MAKEYUVCONSTANTS
|
#undef MAKEYUVCONSTANTS
|
||||||
|
|
||||||
#if defined(LIBYUV_UNLIMITED_DATA)
|
#if LIBYUV_UNLIMITED_DATA
|
||||||
|
|
||||||
// C reference code that mimics the YUV assembly.
|
// C reference code that mimics the YUV assembly.
|
||||||
// Reads 8 bit YUV and leaves result as 16 bit.
|
// Reads 8 bit YUV and leaves result as 16 bit.
|
||||||
@ -3347,19 +3347,19 @@ void UYVYToYRow_C(const uint8_t* src_uyvy, uint8_t* dst_y, int width) {
|
|||||||
|
|
||||||
#define BLEND(f, b, a) clamp255((((256 - a) * b) >> 8) + f)
|
#define BLEND(f, b, a) clamp255((((256 - a) * b) >> 8) + f)
|
||||||
|
|
||||||
// Blend src_argb0 over src_argb1 and store to dst_argb.
|
// Blend src_argb over src_argb1 and store to dst_argb.
|
||||||
// dst_argb may be src_argb0 or src_argb1.
|
// dst_argb may be src_argb or src_argb1.
|
||||||
// This code mimics the SSSE3 version for better testability.
|
// This code mimics the SSSE3 version for better testability.
|
||||||
void ARGBBlendRow_C(const uint8_t* src_argb0,
|
void ARGBBlendRow_C(const uint8_t* src_argb,
|
||||||
const uint8_t* src_argb1,
|
const uint8_t* src_argb1,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width) {
|
int width) {
|
||||||
int x;
|
int x;
|
||||||
for (x = 0; x < width - 1; x += 2) {
|
for (x = 0; x < width - 1; x += 2) {
|
||||||
uint32_t fb = src_argb0[0];
|
uint32_t fb = src_argb[0];
|
||||||
uint32_t fg = src_argb0[1];
|
uint32_t fg = src_argb[1];
|
||||||
uint32_t fr = src_argb0[2];
|
uint32_t fr = src_argb[2];
|
||||||
uint32_t a = src_argb0[3];
|
uint32_t a = src_argb[3];
|
||||||
uint32_t bb = src_argb1[0];
|
uint32_t bb = src_argb1[0];
|
||||||
uint32_t bg = src_argb1[1];
|
uint32_t bg = src_argb1[1];
|
||||||
uint32_t br = src_argb1[2];
|
uint32_t br = src_argb1[2];
|
||||||
@ -3368,10 +3368,10 @@ void ARGBBlendRow_C(const uint8_t* src_argb0,
|
|||||||
dst_argb[2] = BLEND(fr, br, a);
|
dst_argb[2] = BLEND(fr, br, a);
|
||||||
dst_argb[3] = 255u;
|
dst_argb[3] = 255u;
|
||||||
|
|
||||||
fb = src_argb0[4 + 0];
|
fb = src_argb[4 + 0];
|
||||||
fg = src_argb0[4 + 1];
|
fg = src_argb[4 + 1];
|
||||||
fr = src_argb0[4 + 2];
|
fr = src_argb[4 + 2];
|
||||||
a = src_argb0[4 + 3];
|
a = src_argb[4 + 3];
|
||||||
bb = src_argb1[4 + 0];
|
bb = src_argb1[4 + 0];
|
||||||
bg = src_argb1[4 + 1];
|
bg = src_argb1[4 + 1];
|
||||||
br = src_argb1[4 + 2];
|
br = src_argb1[4 + 2];
|
||||||
@ -3379,16 +3379,16 @@ void ARGBBlendRow_C(const uint8_t* src_argb0,
|
|||||||
dst_argb[4 + 1] = BLEND(fg, bg, a);
|
dst_argb[4 + 1] = BLEND(fg, bg, a);
|
||||||
dst_argb[4 + 2] = BLEND(fr, br, a);
|
dst_argb[4 + 2] = BLEND(fr, br, a);
|
||||||
dst_argb[4 + 3] = 255u;
|
dst_argb[4 + 3] = 255u;
|
||||||
src_argb0 += 8;
|
src_argb += 8;
|
||||||
src_argb1 += 8;
|
src_argb1 += 8;
|
||||||
dst_argb += 8;
|
dst_argb += 8;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (width & 1) {
|
if (width & 1) {
|
||||||
uint32_t fb = src_argb0[0];
|
uint32_t fb = src_argb[0];
|
||||||
uint32_t fg = src_argb0[1];
|
uint32_t fg = src_argb[1];
|
||||||
uint32_t fr = src_argb0[2];
|
uint32_t fr = src_argb[2];
|
||||||
uint32_t a = src_argb0[3];
|
uint32_t a = src_argb[3];
|
||||||
uint32_t bb = src_argb1[0];
|
uint32_t bb = src_argb1[0];
|
||||||
uint32_t bg = src_argb1[1];
|
uint32_t bg = src_argb1[1];
|
||||||
uint32_t br = src_argb1[2];
|
uint32_t br = src_argb1[2];
|
||||||
|
|||||||
@ -1160,7 +1160,7 @@ void AR64ToARGBRow_SSSE3(const uint16_t* src_ar64,
|
|||||||
: "memory", "cc", "xmm0", "xmm1");
|
: "memory", "cc", "xmm0", "xmm1");
|
||||||
}
|
}
|
||||||
|
|
||||||
void AB64ToARGBRow_SSSE3(const uint16_t* src_ar64,
|
void AB64ToARGBRow_SSSE3(const uint16_t* src_ab64,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile(
|
asm volatile(
|
||||||
@ -1178,7 +1178,7 @@ void AB64ToARGBRow_SSSE3(const uint16_t* src_ar64,
|
|||||||
"lea 0x10(%1),%1 \n"
|
"lea 0x10(%1),%1 \n"
|
||||||
"sub $0x4,%2 \n"
|
"sub $0x4,%2 \n"
|
||||||
"jg 1b \n"
|
"jg 1b \n"
|
||||||
: "+r"(src_ar64), // %0
|
: "+r"(src_ab64), // %0
|
||||||
"+r"(dst_argb), // %1
|
"+r"(dst_argb), // %1
|
||||||
"+r"(width) // %2
|
"+r"(width) // %2
|
||||||
: "m"(kShuffleARGBToABGR) // %3
|
: "m"(kShuffleARGBToABGR) // %3
|
||||||
@ -1267,7 +1267,7 @@ void AR64ToARGBRow_AVX2(const uint16_t* src_ar64,
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_AB64TOARGBROW_AVX2
|
#ifdef HAS_AB64TOARGBROW_AVX2
|
||||||
void AB64ToARGBRow_AVX2(const uint16_t* src_ar64,
|
void AB64ToARGBRow_AVX2(const uint16_t* src_ab64,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile(
|
asm volatile(
|
||||||
@ -1286,7 +1286,7 @@ void AB64ToARGBRow_AVX2(const uint16_t* src_ar64,
|
|||||||
"lea 0x20(%1),%1 \n"
|
"lea 0x20(%1),%1 \n"
|
||||||
"sub $0x8,%2 \n"
|
"sub $0x8,%2 \n"
|
||||||
"jg 1b \n"
|
"jg 1b \n"
|
||||||
: "+r"(src_ar64), // %0
|
: "+r"(src_ab64), // %0
|
||||||
"+r"(dst_argb), // %1
|
"+r"(dst_argb), // %1
|
||||||
"+r"(width) // %2
|
"+r"(width) // %2
|
||||||
: "m"(kShuffleARGBToABGR) // %3
|
: "m"(kShuffleARGBToABGR) // %3
|
||||||
@ -1506,7 +1506,7 @@ void RGBAToYJRow_AVX2(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
|
|||||||
#endif // HAS_RGBATOYJROW_AVX2
|
#endif // HAS_RGBATOYJROW_AVX2
|
||||||
|
|
||||||
#ifdef HAS_ARGBTOUVROW_SSSE3
|
#ifdef HAS_ARGBTOUVROW_SSSE3
|
||||||
void ARGBToUVRow_SSSE3(const uint8_t* src_argb0,
|
void ARGBToUVRow_SSSE3(const uint8_t* src_argb,
|
||||||
int src_stride_argb,
|
int src_stride_argb,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
@ -1558,7 +1558,7 @@ void ARGBToUVRow_SSSE3(const uint8_t* src_argb0,
|
|||||||
"lea 0x8(%1),%1 \n"
|
"lea 0x8(%1),%1 \n"
|
||||||
"sub $0x10,%3 \n"
|
"sub $0x10,%3 \n"
|
||||||
"jg 1b \n"
|
"jg 1b \n"
|
||||||
: "+r"(src_argb0), // %0
|
: "+r"(src_argb), // %0
|
||||||
"+r"(dst_u), // %1
|
"+r"(dst_u), // %1
|
||||||
"+r"(dst_v), // %2
|
"+r"(dst_v), // %2
|
||||||
"+rm"(width) // %3
|
"+rm"(width) // %3
|
||||||
@ -1575,7 +1575,7 @@ void ARGBToUVRow_SSSE3(const uint8_t* src_argb0,
|
|||||||
static const lvec8 kShufARGBToUV_AVX = {
|
static const lvec8 kShufARGBToUV_AVX = {
|
||||||
0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15,
|
0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15,
|
||||||
0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15};
|
0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15};
|
||||||
void ARGBToUVRow_AVX2(const uint8_t* src_argb0,
|
void ARGBToUVRow_AVX2(const uint8_t* src_argb,
|
||||||
int src_stride_argb,
|
int src_stride_argb,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
@ -1623,7 +1623,7 @@ void ARGBToUVRow_AVX2(const uint8_t* src_argb0,
|
|||||||
"sub $0x20,%3 \n"
|
"sub $0x20,%3 \n"
|
||||||
"jg 1b \n"
|
"jg 1b \n"
|
||||||
"vzeroupper \n"
|
"vzeroupper \n"
|
||||||
: "+r"(src_argb0), // %0
|
: "+r"(src_argb), // %0
|
||||||
"+r"(dst_u), // %1
|
"+r"(dst_u), // %1
|
||||||
"+r"(dst_v), // %2
|
"+r"(dst_v), // %2
|
||||||
"+rm"(width) // %3
|
"+rm"(width) // %3
|
||||||
@ -1638,7 +1638,7 @@ void ARGBToUVRow_AVX2(const uint8_t* src_argb0,
|
|||||||
#endif // HAS_ARGBTOUVROW_AVX2
|
#endif // HAS_ARGBTOUVROW_AVX2
|
||||||
|
|
||||||
#ifdef HAS_ABGRTOUVROW_AVX2
|
#ifdef HAS_ABGRTOUVROW_AVX2
|
||||||
void ABGRToUVRow_AVX2(const uint8_t* src_abgr0,
|
void ABGRToUVRow_AVX2(const uint8_t* src_abgr,
|
||||||
int src_stride_abgr,
|
int src_stride_abgr,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
@ -1686,7 +1686,7 @@ void ABGRToUVRow_AVX2(const uint8_t* src_abgr0,
|
|||||||
"sub $0x20,%3 \n"
|
"sub $0x20,%3 \n"
|
||||||
"jg 1b \n"
|
"jg 1b \n"
|
||||||
"vzeroupper \n"
|
"vzeroupper \n"
|
||||||
: "+r"(src_abgr0), // %0
|
: "+r"(src_abgr), // %0
|
||||||
"+r"(dst_u), // %1
|
"+r"(dst_u), // %1
|
||||||
"+r"(dst_v), // %2
|
"+r"(dst_v), // %2
|
||||||
"+rm"(width) // %3
|
"+rm"(width) // %3
|
||||||
@ -1701,7 +1701,7 @@ void ABGRToUVRow_AVX2(const uint8_t* src_abgr0,
|
|||||||
#endif // HAS_ABGRTOUVROW_AVX2
|
#endif // HAS_ABGRTOUVROW_AVX2
|
||||||
|
|
||||||
#ifdef HAS_ARGBTOUVJROW_AVX2
|
#ifdef HAS_ARGBTOUVJROW_AVX2
|
||||||
void ARGBToUVJRow_AVX2(const uint8_t* src_argb0,
|
void ARGBToUVJRow_AVX2(const uint8_t* src_argb,
|
||||||
int src_stride_argb,
|
int src_stride_argb,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
@ -1750,7 +1750,7 @@ void ARGBToUVJRow_AVX2(const uint8_t* src_argb0,
|
|||||||
"sub $0x20,%3 \n"
|
"sub $0x20,%3 \n"
|
||||||
"jg 1b \n"
|
"jg 1b \n"
|
||||||
"vzeroupper \n"
|
"vzeroupper \n"
|
||||||
: "+r"(src_argb0), // %0
|
: "+r"(src_argb), // %0
|
||||||
"+r"(dst_u), // %1
|
"+r"(dst_u), // %1
|
||||||
"+r"(dst_v), // %2
|
"+r"(dst_v), // %2
|
||||||
"+rm"(width) // %3
|
"+rm"(width) // %3
|
||||||
@ -1765,7 +1765,7 @@ void ARGBToUVJRow_AVX2(const uint8_t* src_argb0,
|
|||||||
#endif // HAS_ARGBTOUVJROW_AVX2
|
#endif // HAS_ARGBTOUVJROW_AVX2
|
||||||
|
|
||||||
#ifdef HAS_ARGBTOUVJROW_SSSE3
|
#ifdef HAS_ARGBTOUVJROW_SSSE3
|
||||||
void ARGBToUVJRow_SSSE3(const uint8_t* src_argb0,
|
void ARGBToUVJRow_SSSE3(const uint8_t* src_argb,
|
||||||
int src_stride_argb,
|
int src_stride_argb,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
@ -1818,7 +1818,7 @@ void ARGBToUVJRow_SSSE3(const uint8_t* src_argb0,
|
|||||||
"lea 0x8(%1),%1 \n"
|
"lea 0x8(%1),%1 \n"
|
||||||
"sub $0x10,%3 \n"
|
"sub $0x10,%3 \n"
|
||||||
"jg 1b \n"
|
"jg 1b \n"
|
||||||
: "+r"(src_argb0), // %0
|
: "+r"(src_argb), // %0
|
||||||
"+r"(dst_u), // %1
|
"+r"(dst_u), // %1
|
||||||
"+r"(dst_v), // %2
|
"+r"(dst_v), // %2
|
||||||
"+rm"(width) // %3
|
"+rm"(width) // %3
|
||||||
@ -1905,7 +1905,7 @@ void BGRAToYRow_SSSE3(const uint8_t* src_bgra, uint8_t* dst_y, int width) {
|
|||||||
"xmm7");
|
"xmm7");
|
||||||
}
|
}
|
||||||
|
|
||||||
void BGRAToUVRow_SSSE3(const uint8_t* src_bgra0,
|
void BGRAToUVRow_SSSE3(const uint8_t* src_bgra,
|
||||||
int src_stride_bgra,
|
int src_stride_bgra,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
@ -1957,7 +1957,7 @@ void BGRAToUVRow_SSSE3(const uint8_t* src_bgra0,
|
|||||||
"lea 0x8(%1),%1 \n"
|
"lea 0x8(%1),%1 \n"
|
||||||
"sub $0x10,%3 \n"
|
"sub $0x10,%3 \n"
|
||||||
"jg 1b \n"
|
"jg 1b \n"
|
||||||
: "+r"(src_bgra0), // %0
|
: "+r"(src_bgra), // %0
|
||||||
"+r"(dst_u), // %1
|
"+r"(dst_u), // %1
|
||||||
"+r"(dst_v), // %2
|
"+r"(dst_v), // %2
|
||||||
"+rm"(width) // %3
|
"+rm"(width) // %3
|
||||||
@ -2002,7 +2002,7 @@ void RGBAToYRow_SSSE3(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
|
|||||||
"xmm7");
|
"xmm7");
|
||||||
}
|
}
|
||||||
|
|
||||||
void ABGRToUVRow_SSSE3(const uint8_t* src_abgr0,
|
void ABGRToUVRow_SSSE3(const uint8_t* src_abgr,
|
||||||
int src_stride_abgr,
|
int src_stride_abgr,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
@ -2054,7 +2054,7 @@ void ABGRToUVRow_SSSE3(const uint8_t* src_abgr0,
|
|||||||
"lea 0x8(%1),%1 \n"
|
"lea 0x8(%1),%1 \n"
|
||||||
"sub $0x10,%3 \n"
|
"sub $0x10,%3 \n"
|
||||||
"jg 1b \n"
|
"jg 1b \n"
|
||||||
: "+r"(src_abgr0), // %0
|
: "+r"(src_abgr), // %0
|
||||||
"+r"(dst_u), // %1
|
"+r"(dst_u), // %1
|
||||||
"+r"(dst_v), // %2
|
"+r"(dst_v), // %2
|
||||||
"+rm"(width) // %3
|
"+rm"(width) // %3
|
||||||
@ -2065,7 +2065,7 @@ void ABGRToUVRow_SSSE3(const uint8_t* src_abgr0,
|
|||||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm6", "xmm7");
|
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm6", "xmm7");
|
||||||
}
|
}
|
||||||
|
|
||||||
void RGBAToUVRow_SSSE3(const uint8_t* src_rgba0,
|
void RGBAToUVRow_SSSE3(const uint8_t* src_rgba,
|
||||||
int src_stride_rgba,
|
int src_stride_rgba,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
@ -2117,7 +2117,7 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba0,
|
|||||||
"lea 0x8(%1),%1 \n"
|
"lea 0x8(%1),%1 \n"
|
||||||
"sub $0x10,%3 \n"
|
"sub $0x10,%3 \n"
|
||||||
"jg 1b \n"
|
"jg 1b \n"
|
||||||
: "+r"(src_rgba0), // %0
|
: "+r"(src_rgba), // %0
|
||||||
"+r"(dst_u), // %1
|
"+r"(dst_u), // %1
|
||||||
"+r"(dst_v), // %2
|
"+r"(dst_v), // %2
|
||||||
"+rm"(width) // %3
|
"+rm"(width) // %3
|
||||||
@ -5741,7 +5741,7 @@ void MergeXR30Row_AVX2(const uint16_t* src_r,
|
|||||||
#if defined(__i386__)
|
#if defined(__i386__)
|
||||||
: "m"(shift) // %5
|
: "m"(shift) // %5
|
||||||
#else
|
#else
|
||||||
: "rm"(shift) // %5
|
: "rm"(shift) // %5
|
||||||
#endif
|
#endif
|
||||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
|
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
|
||||||
}
|
}
|
||||||
@ -5813,9 +5813,9 @@ void MergeAR64Row_AVX2(const uint16_t* src_r,
|
|||||||
: "m"(shift), // %6
|
: "m"(shift), // %6
|
||||||
"m"(mask), // %7
|
"m"(mask), // %7
|
||||||
#else
|
#else
|
||||||
"+rm"(width) // %5
|
"+rm"(width) // %5
|
||||||
: "rm"(shift), // %6
|
: "rm"(shift), // %6
|
||||||
"rm"(mask), // %7
|
"rm"(mask), // %7
|
||||||
#endif
|
#endif
|
||||||
"m"(MergeAR64Permute) // %8
|
"m"(MergeAR64Permute) // %8
|
||||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
|
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
|
||||||
@ -5882,8 +5882,8 @@ void MergeXR64Row_AVX2(const uint16_t* src_r,
|
|||||||
: "m"(shift), // %5
|
: "m"(shift), // %5
|
||||||
"m"(mask), // %6
|
"m"(mask), // %6
|
||||||
#else
|
#else
|
||||||
: "rm"(shift), // %5
|
: "rm"(shift), // %5
|
||||||
"rm"(mask), // %6
|
"rm"(mask), // %6
|
||||||
#endif
|
#endif
|
||||||
"m"(MergeAR64Permute) // %7
|
"m"(MergeAR64Permute) // %7
|
||||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
|
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
|
||||||
@ -5944,8 +5944,8 @@ void MergeARGB16To8Row_AVX2(const uint16_t* src_r,
|
|||||||
"+m"(width) // %5
|
"+m"(width) // %5
|
||||||
: "m"(shift), // %6
|
: "m"(shift), // %6
|
||||||
#else
|
#else
|
||||||
"+rm"(width) // %5
|
"+rm"(width) // %5
|
||||||
: "rm"(shift), // %6
|
: "rm"(shift), // %6
|
||||||
#endif
|
#endif
|
||||||
"m"(MergeARGB16To8Shuffle) // %7
|
"m"(MergeARGB16To8Shuffle) // %7
|
||||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
|
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
|
||||||
@ -6000,7 +6000,7 @@ void MergeXRGB16To8Row_AVX2(const uint16_t* src_r,
|
|||||||
#if defined(__i386__)
|
#if defined(__i386__)
|
||||||
: "m"(shift), // %5
|
: "m"(shift), // %5
|
||||||
#else
|
#else
|
||||||
: "rm"(shift), // %5
|
: "rm"(shift), // %5
|
||||||
#endif
|
#endif
|
||||||
"m"(MergeARGB16To8Shuffle) // %6
|
"m"(MergeARGB16To8Shuffle) // %6
|
||||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
|
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
|
||||||
@ -6732,7 +6732,7 @@ static const uvec8 kShuffleAlpha = {3u, 0x80, 3u, 0x80, 7u, 0x80, 7u, 0x80,
|
|||||||
11u, 0x80, 11u, 0x80, 15u, 0x80, 15u, 0x80};
|
11u, 0x80, 11u, 0x80, 15u, 0x80, 15u, 0x80};
|
||||||
|
|
||||||
// Blend 8 pixels at a time
|
// Blend 8 pixels at a time
|
||||||
void ARGBBlendRow_SSSE3(const uint8_t* src_argb0,
|
void ARGBBlendRow_SSSE3(const uint8_t* src_argb,
|
||||||
const uint8_t* src_argb1,
|
const uint8_t* src_argb1,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width) {
|
int width) {
|
||||||
@ -6803,7 +6803,7 @@ void ARGBBlendRow_SSSE3(const uint8_t* src_argb0,
|
|||||||
"sub $0x1,%3 \n"
|
"sub $0x1,%3 \n"
|
||||||
"jge 91b \n"
|
"jge 91b \n"
|
||||||
"99: \n"
|
"99: \n"
|
||||||
: "+r"(src_argb0), // %0
|
: "+r"(src_argb), // %0
|
||||||
"+r"(src_argb1), // %1
|
"+r"(src_argb1), // %1
|
||||||
"+r"(dst_argb), // %2
|
"+r"(dst_argb), // %2
|
||||||
"+r"(width) // %3
|
"+r"(width) // %3
|
||||||
@ -7405,7 +7405,7 @@ void ARGBShadeRow_SSE2(const uint8_t* src_argb,
|
|||||||
|
|
||||||
#ifdef HAS_ARGBMULTIPLYROW_SSE2
|
#ifdef HAS_ARGBMULTIPLYROW_SSE2
|
||||||
// Multiply 2 rows of ARGB pixels together, 4 pixels at a time.
|
// Multiply 2 rows of ARGB pixels together, 4 pixels at a time.
|
||||||
void ARGBMultiplyRow_SSE2(const uint8_t* src_argb0,
|
void ARGBMultiplyRow_SSE2(const uint8_t* src_argb,
|
||||||
const uint8_t* src_argb1,
|
const uint8_t* src_argb1,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width) {
|
int width) {
|
||||||
@ -7433,7 +7433,7 @@ void ARGBMultiplyRow_SSE2(const uint8_t* src_argb0,
|
|||||||
"lea 0x10(%2),%2 \n"
|
"lea 0x10(%2),%2 \n"
|
||||||
"sub $0x4,%3 \n"
|
"sub $0x4,%3 \n"
|
||||||
"jg 1b \n"
|
"jg 1b \n"
|
||||||
: "+r"(src_argb0), // %0
|
: "+r"(src_argb), // %0
|
||||||
"+r"(src_argb1), // %1
|
"+r"(src_argb1), // %1
|
||||||
"+r"(dst_argb), // %2
|
"+r"(dst_argb), // %2
|
||||||
"+r"(width) // %3
|
"+r"(width) // %3
|
||||||
@ -7444,7 +7444,7 @@ void ARGBMultiplyRow_SSE2(const uint8_t* src_argb0,
|
|||||||
|
|
||||||
#ifdef HAS_ARGBMULTIPLYROW_AVX2
|
#ifdef HAS_ARGBMULTIPLYROW_AVX2
|
||||||
// Multiply 2 rows of ARGB pixels together, 8 pixels at a time.
|
// Multiply 2 rows of ARGB pixels together, 8 pixels at a time.
|
||||||
void ARGBMultiplyRow_AVX2(const uint8_t* src_argb0,
|
void ARGBMultiplyRow_AVX2(const uint8_t* src_argb,
|
||||||
const uint8_t* src_argb1,
|
const uint8_t* src_argb1,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width) {
|
int width) {
|
||||||
@ -7471,7 +7471,7 @@ void ARGBMultiplyRow_AVX2(const uint8_t* src_argb0,
|
|||||||
"sub $0x8,%3 \n"
|
"sub $0x8,%3 \n"
|
||||||
"jg 1b \n"
|
"jg 1b \n"
|
||||||
"vzeroupper \n"
|
"vzeroupper \n"
|
||||||
: "+r"(src_argb0), // %0
|
: "+r"(src_argb), // %0
|
||||||
"+r"(src_argb1), // %1
|
"+r"(src_argb1), // %1
|
||||||
"+r"(dst_argb), // %2
|
"+r"(dst_argb), // %2
|
||||||
"+r"(width) // %3
|
"+r"(width) // %3
|
||||||
@ -7482,7 +7482,7 @@ void ARGBMultiplyRow_AVX2(const uint8_t* src_argb0,
|
|||||||
|
|
||||||
#ifdef HAS_ARGBADDROW_SSE2
|
#ifdef HAS_ARGBADDROW_SSE2
|
||||||
// Add 2 rows of ARGB pixels together, 4 pixels at a time.
|
// Add 2 rows of ARGB pixels together, 4 pixels at a time.
|
||||||
void ARGBAddRow_SSE2(const uint8_t* src_argb0,
|
void ARGBAddRow_SSE2(const uint8_t* src_argb,
|
||||||
const uint8_t* src_argb1,
|
const uint8_t* src_argb1,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width) {
|
int width) {
|
||||||
@ -7499,7 +7499,7 @@ void ARGBAddRow_SSE2(const uint8_t* src_argb0,
|
|||||||
"lea 0x10(%2),%2 \n"
|
"lea 0x10(%2),%2 \n"
|
||||||
"sub $0x4,%3 \n"
|
"sub $0x4,%3 \n"
|
||||||
"jg 1b \n"
|
"jg 1b \n"
|
||||||
: "+r"(src_argb0), // %0
|
: "+r"(src_argb), // %0
|
||||||
"+r"(src_argb1), // %1
|
"+r"(src_argb1), // %1
|
||||||
"+r"(dst_argb), // %2
|
"+r"(dst_argb), // %2
|
||||||
"+r"(width) // %3
|
"+r"(width) // %3
|
||||||
@ -7510,7 +7510,7 @@ void ARGBAddRow_SSE2(const uint8_t* src_argb0,
|
|||||||
|
|
||||||
#ifdef HAS_ARGBADDROW_AVX2
|
#ifdef HAS_ARGBADDROW_AVX2
|
||||||
// Add 2 rows of ARGB pixels together, 4 pixels at a time.
|
// Add 2 rows of ARGB pixels together, 4 pixels at a time.
|
||||||
void ARGBAddRow_AVX2(const uint8_t* src_argb0,
|
void ARGBAddRow_AVX2(const uint8_t* src_argb,
|
||||||
const uint8_t* src_argb1,
|
const uint8_t* src_argb1,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width) {
|
int width) {
|
||||||
@ -7527,7 +7527,7 @@ void ARGBAddRow_AVX2(const uint8_t* src_argb0,
|
|||||||
"sub $0x8,%3 \n"
|
"sub $0x8,%3 \n"
|
||||||
"jg 1b \n"
|
"jg 1b \n"
|
||||||
"vzeroupper \n"
|
"vzeroupper \n"
|
||||||
: "+r"(src_argb0), // %0
|
: "+r"(src_argb), // %0
|
||||||
"+r"(src_argb1), // %1
|
"+r"(src_argb1), // %1
|
||||||
"+r"(dst_argb), // %2
|
"+r"(dst_argb), // %2
|
||||||
"+r"(width) // %3
|
"+r"(width) // %3
|
||||||
@ -7538,7 +7538,7 @@ void ARGBAddRow_AVX2(const uint8_t* src_argb0,
|
|||||||
|
|
||||||
#ifdef HAS_ARGBSUBTRACTROW_SSE2
|
#ifdef HAS_ARGBSUBTRACTROW_SSE2
|
||||||
// Subtract 2 rows of ARGB pixels, 4 pixels at a time.
|
// Subtract 2 rows of ARGB pixels, 4 pixels at a time.
|
||||||
void ARGBSubtractRow_SSE2(const uint8_t* src_argb0,
|
void ARGBSubtractRow_SSE2(const uint8_t* src_argb,
|
||||||
const uint8_t* src_argb1,
|
const uint8_t* src_argb1,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width) {
|
int width) {
|
||||||
@ -7555,7 +7555,7 @@ void ARGBSubtractRow_SSE2(const uint8_t* src_argb0,
|
|||||||
"lea 0x10(%2),%2 \n"
|
"lea 0x10(%2),%2 \n"
|
||||||
"sub $0x4,%3 \n"
|
"sub $0x4,%3 \n"
|
||||||
"jg 1b \n"
|
"jg 1b \n"
|
||||||
: "+r"(src_argb0), // %0
|
: "+r"(src_argb), // %0
|
||||||
"+r"(src_argb1), // %1
|
"+r"(src_argb1), // %1
|
||||||
"+r"(dst_argb), // %2
|
"+r"(dst_argb), // %2
|
||||||
"+r"(width) // %3
|
"+r"(width) // %3
|
||||||
@ -7566,7 +7566,7 @@ void ARGBSubtractRow_SSE2(const uint8_t* src_argb0,
|
|||||||
|
|
||||||
#ifdef HAS_ARGBSUBTRACTROW_AVX2
|
#ifdef HAS_ARGBSUBTRACTROW_AVX2
|
||||||
// Subtract 2 rows of ARGB pixels, 8 pixels at a time.
|
// Subtract 2 rows of ARGB pixels, 8 pixels at a time.
|
||||||
void ARGBSubtractRow_AVX2(const uint8_t* src_argb0,
|
void ARGBSubtractRow_AVX2(const uint8_t* src_argb,
|
||||||
const uint8_t* src_argb1,
|
const uint8_t* src_argb1,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width) {
|
int width) {
|
||||||
@ -7583,7 +7583,7 @@ void ARGBSubtractRow_AVX2(const uint8_t* src_argb0,
|
|||||||
"sub $0x8,%3 \n"
|
"sub $0x8,%3 \n"
|
||||||
"jg 1b \n"
|
"jg 1b \n"
|
||||||
"vzeroupper \n"
|
"vzeroupper \n"
|
||||||
: "+r"(src_argb0), // %0
|
: "+r"(src_argb), // %0
|
||||||
"+r"(src_argb1), // %1
|
"+r"(src_argb1), // %1
|
||||||
"+r"(dst_argb), // %2
|
"+r"(dst_argb), // %2
|
||||||
"+r"(width) // %3
|
"+r"(width) // %3
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@ -781,7 +781,7 @@ void UYVYToUV422Row_MSA(const uint8_t* src_uyvy,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ARGBToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
|
void ARGBToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width) {
|
||||||
int x;
|
int x;
|
||||||
v16u8 src0, src1, src2, src3, vec0, vec1, vec2, vec3, dst0;
|
v16u8 src0, src1, src2, src3, vec0, vec1, vec2, vec3, dst0;
|
||||||
v8u16 reg0, reg1, reg2, reg3, reg4, reg5;
|
v8u16 reg0, reg1, reg2, reg3, reg4, reg5;
|
||||||
@ -792,10 +792,10 @@ void ARGBToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
|
|||||||
v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080);
|
v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080);
|
||||||
|
|
||||||
for (x = 0; x < width; x += 16) {
|
for (x = 0; x < width; x += 16) {
|
||||||
src0 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 0);
|
src0 = (v16u8)__msa_ld_b((v16u8*)src_argb, 0);
|
||||||
src1 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 16);
|
src1 = (v16u8)__msa_ld_b((v16u8*)src_argb, 16);
|
||||||
src2 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 32);
|
src2 = (v16u8)__msa_ld_b((v16u8*)src_argb, 32);
|
||||||
src3 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 48);
|
src3 = (v16u8)__msa_ld_b((v16u8*)src_argb, 48);
|
||||||
vec0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0);
|
vec0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0);
|
||||||
vec1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2);
|
vec1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2);
|
||||||
vec2 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0);
|
vec2 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0);
|
||||||
@ -822,18 +822,18 @@ void ARGBToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
|
|||||||
reg1 = (v8u16)__msa_srai_h((v8i16)reg1, 8);
|
reg1 = (v8u16)__msa_srai_h((v8i16)reg1, 8);
|
||||||
dst0 = (v16u8)__msa_pckev_b((v16i8)reg1, (v16i8)reg0);
|
dst0 = (v16u8)__msa_pckev_b((v16i8)reg1, (v16i8)reg0);
|
||||||
ST_UB(dst0, dst_y);
|
ST_UB(dst0, dst_y);
|
||||||
src_argb0 += 64;
|
src_argb += 64;
|
||||||
dst_y += 16;
|
dst_y += 16;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ARGBToUVRow_MSA(const uint8_t* src_argb0,
|
void ARGBToUVRow_MSA(const uint8_t* src_argb,
|
||||||
int src_stride_argb,
|
int src_stride_argb,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width) {
|
int width) {
|
||||||
int x;
|
int x;
|
||||||
const uint8_t* src_argb0_next = src_argb0 + src_stride_argb;
|
const uint8_t* src_argb_next = src_argb + src_stride_argb;
|
||||||
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
|
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
|
||||||
v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9;
|
v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9;
|
||||||
v8u16 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8, reg9;
|
v8u16 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8, reg9;
|
||||||
@ -847,14 +847,14 @@ void ARGBToUVRow_MSA(const uint8_t* src_argb0,
|
|||||||
v8u16 const_0x0001 = (v8u16)__msa_fill_h(0x0001);
|
v8u16 const_0x0001 = (v8u16)__msa_fill_h(0x0001);
|
||||||
|
|
||||||
for (x = 0; x < width; x += 32) {
|
for (x = 0; x < width; x += 32) {
|
||||||
src0 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 0);
|
src0 = (v16u8)__msa_ld_b((v16u8*)src_argb, 0);
|
||||||
src1 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 16);
|
src1 = (v16u8)__msa_ld_b((v16u8*)src_argb, 16);
|
||||||
src2 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 32);
|
src2 = (v16u8)__msa_ld_b((v16u8*)src_argb, 32);
|
||||||
src3 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 48);
|
src3 = (v16u8)__msa_ld_b((v16u8*)src_argb, 48);
|
||||||
src4 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 64);
|
src4 = (v16u8)__msa_ld_b((v16u8*)src_argb, 64);
|
||||||
src5 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 80);
|
src5 = (v16u8)__msa_ld_b((v16u8*)src_argb, 80);
|
||||||
src6 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 96);
|
src6 = (v16u8)__msa_ld_b((v16u8*)src_argb, 96);
|
||||||
src7 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 112);
|
src7 = (v16u8)__msa_ld_b((v16u8*)src_argb, 112);
|
||||||
vec0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0);
|
vec0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0);
|
||||||
vec1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2);
|
vec1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2);
|
||||||
vec2 = (v16u8)__msa_pckev_b((v16i8)src5, (v16i8)src4);
|
vec2 = (v16u8)__msa_pckev_b((v16i8)src5, (v16i8)src4);
|
||||||
@ -875,14 +875,14 @@ void ARGBToUVRow_MSA(const uint8_t* src_argb0,
|
|||||||
reg3 = __msa_hadd_u_h(vec5, vec5);
|
reg3 = __msa_hadd_u_h(vec5, vec5);
|
||||||
reg4 = __msa_hadd_u_h(vec0, vec0);
|
reg4 = __msa_hadd_u_h(vec0, vec0);
|
||||||
reg5 = __msa_hadd_u_h(vec1, vec1);
|
reg5 = __msa_hadd_u_h(vec1, vec1);
|
||||||
src0 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 0);
|
src0 = (v16u8)__msa_ld_b((v16u8*)src_argb_next, 0);
|
||||||
src1 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 16);
|
src1 = (v16u8)__msa_ld_b((v16u8*)src_argb_next, 16);
|
||||||
src2 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 32);
|
src2 = (v16u8)__msa_ld_b((v16u8*)src_argb_next, 32);
|
||||||
src3 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 48);
|
src3 = (v16u8)__msa_ld_b((v16u8*)src_argb_next, 48);
|
||||||
src4 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 64);
|
src4 = (v16u8)__msa_ld_b((v16u8*)src_argb_next, 64);
|
||||||
src5 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 80);
|
src5 = (v16u8)__msa_ld_b((v16u8*)src_argb_next, 80);
|
||||||
src6 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 96);
|
src6 = (v16u8)__msa_ld_b((v16u8*)src_argb_next, 96);
|
||||||
src7 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 112);
|
src7 = (v16u8)__msa_ld_b((v16u8*)src_argb_next, 112);
|
||||||
vec0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0);
|
vec0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0);
|
||||||
vec1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2);
|
vec1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2);
|
||||||
vec2 = (v16u8)__msa_pckev_b((v16i8)src5, (v16i8)src4);
|
vec2 = (v16u8)__msa_pckev_b((v16i8)src5, (v16i8)src4);
|
||||||
@ -945,8 +945,8 @@ void ARGBToUVRow_MSA(const uint8_t* src_argb0,
|
|||||||
dst1 = (v16u8)__msa_pckev_b((v16i8)reg5, (v16i8)reg4);
|
dst1 = (v16u8)__msa_pckev_b((v16i8)reg5, (v16i8)reg4);
|
||||||
ST_UB(dst0, dst_u);
|
ST_UB(dst0, dst_u);
|
||||||
ST_UB(dst1, dst_v);
|
ST_UB(dst1, dst_v);
|
||||||
src_argb0 += 128;
|
src_argb += 128;
|
||||||
src_argb0_next += 128;
|
src_argb_next += 128;
|
||||||
dst_u += 16;
|
dst_u += 16;
|
||||||
dst_v += 16;
|
dst_v += 16;
|
||||||
}
|
}
|
||||||
@ -1173,7 +1173,7 @@ void ARGBToUV444Row_MSA(const uint8_t* src_argb,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ARGBMultiplyRow_MSA(const uint8_t* src_argb0,
|
void ARGBMultiplyRow_MSA(const uint8_t* src_argb,
|
||||||
const uint8_t* src_argb1,
|
const uint8_t* src_argb1,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width) {
|
int width) {
|
||||||
@ -1184,7 +1184,7 @@ void ARGBMultiplyRow_MSA(const uint8_t* src_argb0,
|
|||||||
v8i16 zero = {0};
|
v8i16 zero = {0};
|
||||||
|
|
||||||
for (x = 0; x < width; x += 4) {
|
for (x = 0; x < width; x += 4) {
|
||||||
src0 = (v16u8)__msa_ld_b((void*)src_argb0, 0);
|
src0 = (v16u8)__msa_ld_b((void*)src_argb, 0);
|
||||||
src1 = (v16u8)__msa_ld_b((void*)src_argb1, 0);
|
src1 = (v16u8)__msa_ld_b((void*)src_argb1, 0);
|
||||||
vec0 = (v8u16)__msa_ilvr_b((v16i8)src0, (v16i8)src0);
|
vec0 = (v8u16)__msa_ilvr_b((v16i8)src0, (v16i8)src0);
|
||||||
vec1 = (v8u16)__msa_ilvl_b((v16i8)src0, (v16i8)src0);
|
vec1 = (v8u16)__msa_ilvl_b((v16i8)src0, (v16i8)src0);
|
||||||
@ -1206,13 +1206,13 @@ void ARGBMultiplyRow_MSA(const uint8_t* src_argb0,
|
|||||||
vec1 = (v8u16)__msa_pckev_h((v8i16)reg3, (v8i16)reg2);
|
vec1 = (v8u16)__msa_pckev_h((v8i16)reg3, (v8i16)reg2);
|
||||||
dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0);
|
dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0);
|
||||||
ST_UB(dst0, dst_argb);
|
ST_UB(dst0, dst_argb);
|
||||||
src_argb0 += 16;
|
src_argb += 16;
|
||||||
src_argb1 += 16;
|
src_argb1 += 16;
|
||||||
dst_argb += 16;
|
dst_argb += 16;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ARGBAddRow_MSA(const uint8_t* src_argb0,
|
void ARGBAddRow_MSA(const uint8_t* src_argb,
|
||||||
const uint8_t* src_argb1,
|
const uint8_t* src_argb1,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width) {
|
int width) {
|
||||||
@ -1220,20 +1220,20 @@ void ARGBAddRow_MSA(const uint8_t* src_argb0,
|
|||||||
v16u8 src0, src1, src2, src3, dst0, dst1;
|
v16u8 src0, src1, src2, src3, dst0, dst1;
|
||||||
|
|
||||||
for (x = 0; x < width; x += 8) {
|
for (x = 0; x < width; x += 8) {
|
||||||
src0 = (v16u8)__msa_ld_b((void*)src_argb0, 0);
|
src0 = (v16u8)__msa_ld_b((void*)src_argb, 0);
|
||||||
src1 = (v16u8)__msa_ld_b((void*)src_argb0, 16);
|
src1 = (v16u8)__msa_ld_b((void*)src_argb, 16);
|
||||||
src2 = (v16u8)__msa_ld_b((void*)src_argb1, 0);
|
src2 = (v16u8)__msa_ld_b((void*)src_argb1, 0);
|
||||||
src3 = (v16u8)__msa_ld_b((void*)src_argb1, 16);
|
src3 = (v16u8)__msa_ld_b((void*)src_argb1, 16);
|
||||||
dst0 = __msa_adds_u_b(src0, src2);
|
dst0 = __msa_adds_u_b(src0, src2);
|
||||||
dst1 = __msa_adds_u_b(src1, src3);
|
dst1 = __msa_adds_u_b(src1, src3);
|
||||||
ST_UB2(dst0, dst1, dst_argb, 16);
|
ST_UB2(dst0, dst1, dst_argb, 16);
|
||||||
src_argb0 += 32;
|
src_argb += 32;
|
||||||
src_argb1 += 32;
|
src_argb1 += 32;
|
||||||
dst_argb += 32;
|
dst_argb += 32;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ARGBSubtractRow_MSA(const uint8_t* src_argb0,
|
void ARGBSubtractRow_MSA(const uint8_t* src_argb,
|
||||||
const uint8_t* src_argb1,
|
const uint8_t* src_argb1,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width) {
|
int width) {
|
||||||
@ -1241,14 +1241,14 @@ void ARGBSubtractRow_MSA(const uint8_t* src_argb0,
|
|||||||
v16u8 src0, src1, src2, src3, dst0, dst1;
|
v16u8 src0, src1, src2, src3, dst0, dst1;
|
||||||
|
|
||||||
for (x = 0; x < width; x += 8) {
|
for (x = 0; x < width; x += 8) {
|
||||||
src0 = (v16u8)__msa_ld_b((void*)src_argb0, 0);
|
src0 = (v16u8)__msa_ld_b((void*)src_argb, 0);
|
||||||
src1 = (v16u8)__msa_ld_b((void*)src_argb0, 16);
|
src1 = (v16u8)__msa_ld_b((void*)src_argb, 16);
|
||||||
src2 = (v16u8)__msa_ld_b((void*)src_argb1, 0);
|
src2 = (v16u8)__msa_ld_b((void*)src_argb1, 0);
|
||||||
src3 = (v16u8)__msa_ld_b((void*)src_argb1, 16);
|
src3 = (v16u8)__msa_ld_b((void*)src_argb1, 16);
|
||||||
dst0 = __msa_subs_u_b(src0, src2);
|
dst0 = __msa_subs_u_b(src0, src2);
|
||||||
dst1 = __msa_subs_u_b(src1, src3);
|
dst1 = __msa_subs_u_b(src1, src3);
|
||||||
ST_UB2(dst0, dst1, dst_argb, 16);
|
ST_UB2(dst0, dst1, dst_argb, 16);
|
||||||
src_argb0 += 32;
|
src_argb += 32;
|
||||||
src_argb1 += 32;
|
src_argb1 += 32;
|
||||||
dst_argb += 32;
|
dst_argb += 32;
|
||||||
}
|
}
|
||||||
@ -1794,7 +1794,7 @@ void RGB565ToYRow_MSA(const uint8_t* src_rgb565, uint8_t* dst_y, int width) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void RGB24ToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
|
void RGB24ToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width) {
|
||||||
int x;
|
int x;
|
||||||
v16u8 src0, src1, src2, reg0, reg1, reg2, reg3, dst0;
|
v16u8 src0, src1, src2, reg0, reg1, reg2, reg3, dst0;
|
||||||
v8u16 vec0, vec1, vec2, vec3;
|
v8u16 vec0, vec1, vec2, vec3;
|
||||||
@ -1809,9 +1809,9 @@ void RGB24ToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
|
|||||||
v16i8 zero = {0};
|
v16i8 zero = {0};
|
||||||
|
|
||||||
for (x = 0; x < width; x += 16) {
|
for (x = 0; x < width; x += 16) {
|
||||||
src0 = (v16u8)__msa_ld_b((void*)src_argb0, 0);
|
src0 = (v16u8)__msa_ld_b((void*)src_argb, 0);
|
||||||
src1 = (v16u8)__msa_ld_b((void*)src_argb0, 16);
|
src1 = (v16u8)__msa_ld_b((void*)src_argb, 16);
|
||||||
src2 = (v16u8)__msa_ld_b((void*)src_argb0, 32);
|
src2 = (v16u8)__msa_ld_b((void*)src_argb, 32);
|
||||||
reg0 = (v16u8)__msa_vshf_b(mask0, zero, (v16i8)src0);
|
reg0 = (v16u8)__msa_vshf_b(mask0, zero, (v16i8)src0);
|
||||||
reg1 = (v16u8)__msa_vshf_b(mask1, (v16i8)src1, (v16i8)src0);
|
reg1 = (v16u8)__msa_vshf_b(mask1, (v16i8)src1, (v16i8)src0);
|
||||||
reg2 = (v16u8)__msa_vshf_b(mask2, (v16i8)src2, (v16i8)src1);
|
reg2 = (v16u8)__msa_vshf_b(mask2, (v16i8)src2, (v16i8)src1);
|
||||||
@ -1830,12 +1830,12 @@ void RGB24ToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
|
|||||||
vec1 = (v8u16)__msa_srai_h((v8i16)vec1, 8);
|
vec1 = (v8u16)__msa_srai_h((v8i16)vec1, 8);
|
||||||
dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0);
|
dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0);
|
||||||
ST_UB(dst0, dst_y);
|
ST_UB(dst0, dst_y);
|
||||||
src_argb0 += 48;
|
src_argb += 48;
|
||||||
dst_y += 16;
|
dst_y += 16;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void RAWToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
|
void RAWToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width) {
|
||||||
int x;
|
int x;
|
||||||
v16u8 src0, src1, src2, reg0, reg1, reg2, reg3, dst0;
|
v16u8 src0, src1, src2, reg0, reg1, reg2, reg3, dst0;
|
||||||
v8u16 vec0, vec1, vec2, vec3;
|
v8u16 vec0, vec1, vec2, vec3;
|
||||||
@ -1850,9 +1850,9 @@ void RAWToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
|
|||||||
v16i8 zero = {0};
|
v16i8 zero = {0};
|
||||||
|
|
||||||
for (x = 0; x < width; x += 16) {
|
for (x = 0; x < width; x += 16) {
|
||||||
src0 = (v16u8)__msa_ld_b((void*)src_argb0, 0);
|
src0 = (v16u8)__msa_ld_b((void*)src_argb, 0);
|
||||||
src1 = (v16u8)__msa_ld_b((void*)src_argb0, 16);
|
src1 = (v16u8)__msa_ld_b((void*)src_argb, 16);
|
||||||
src2 = (v16u8)__msa_ld_b((void*)src_argb0, 32);
|
src2 = (v16u8)__msa_ld_b((void*)src_argb, 32);
|
||||||
reg0 = (v16u8)__msa_vshf_b(mask0, zero, (v16i8)src0);
|
reg0 = (v16u8)__msa_vshf_b(mask0, zero, (v16i8)src0);
|
||||||
reg1 = (v16u8)__msa_vshf_b(mask1, (v16i8)src1, (v16i8)src0);
|
reg1 = (v16u8)__msa_vshf_b(mask1, (v16i8)src1, (v16i8)src0);
|
||||||
reg2 = (v16u8)__msa_vshf_b(mask2, (v16i8)src2, (v16i8)src1);
|
reg2 = (v16u8)__msa_vshf_b(mask2, (v16i8)src2, (v16i8)src1);
|
||||||
@ -1871,7 +1871,7 @@ void RAWToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
|
|||||||
vec1 = (v8u16)__msa_srai_h((v8i16)vec1, 8);
|
vec1 = (v8u16)__msa_srai_h((v8i16)vec1, 8);
|
||||||
dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0);
|
dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0);
|
||||||
ST_UB(dst0, dst_y);
|
ST_UB(dst0, dst_y);
|
||||||
src_argb0 += 48;
|
src_argb += 48;
|
||||||
dst_y += 16;
|
dst_y += 16;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2037,14 +2037,14 @@ void RGB565ToUVRow_MSA(const uint8_t* src_rgb565,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void RGB24ToUVRow_MSA(const uint8_t* src_rgb0,
|
void RGB24ToUVRow_MSA(const uint8_t* src_rgb,
|
||||||
int src_stride_rgb,
|
int src_stride_rgb,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width) {
|
int width) {
|
||||||
int x;
|
int x;
|
||||||
const uint8_t* s = src_rgb0;
|
const uint8_t* s = src_rgb;
|
||||||
const uint8_t* t = src_rgb0 + src_stride_rgb;
|
const uint8_t* t = src_rgb + src_stride_rgb;
|
||||||
int64_t res0, res1;
|
int64_t res0, res1;
|
||||||
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
|
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
|
||||||
v16u8 inp0, inp1, inp2, inp3, inp4, inp5;
|
v16u8 inp0, inp1, inp2, inp3, inp4, inp5;
|
||||||
@ -2147,14 +2147,14 @@ void RGB24ToUVRow_MSA(const uint8_t* src_rgb0,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void RAWToUVRow_MSA(const uint8_t* src_rgb0,
|
void RAWToUVRow_MSA(const uint8_t* src_rgb,
|
||||||
int src_stride_rgb,
|
int src_stride_rgb,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width) {
|
int width) {
|
||||||
int x;
|
int x;
|
||||||
const uint8_t* s = src_rgb0;
|
const uint8_t* s = src_rgb;
|
||||||
const uint8_t* t = src_rgb0 + src_stride_rgb;
|
const uint8_t* t = src_rgb + src_stride_rgb;
|
||||||
int64_t res0, res1;
|
int64_t res0, res1;
|
||||||
v16u8 inp0, inp1, inp2, inp3, inp4, inp5;
|
v16u8 inp0, inp1, inp2, inp3, inp4, inp5;
|
||||||
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
|
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
|
||||||
@ -2446,7 +2446,7 @@ void SobelXYRow_MSA(const uint8_t* src_sobelx,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ARGBToYJRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
|
void ARGBToYJRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width) {
|
||||||
int x;
|
int x;
|
||||||
v16u8 src0, src1, src2, src3, dst0;
|
v16u8 src0, src1, src2, src3, dst0;
|
||||||
v16u8 const_0x961D = (v16u8)__msa_fill_h(0x961D);
|
v16u8 const_0x961D = (v16u8)__msa_fill_h(0x961D);
|
||||||
@ -2454,19 +2454,19 @@ void ARGBToYJRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
|
|||||||
v8u16 const_0x80 = (v8u16)__msa_fill_h(0x80);
|
v8u16 const_0x80 = (v8u16)__msa_fill_h(0x80);
|
||||||
|
|
||||||
for (x = 0; x < width; x += 16) {
|
for (x = 0; x < width; x += 16) {
|
||||||
src0 = (v16u8)__msa_ld_b((void*)src_argb0, 0);
|
src0 = (v16u8)__msa_ld_b((void*)src_argb, 0);
|
||||||
src1 = (v16u8)__msa_ld_b((void*)src_argb0, 16);
|
src1 = (v16u8)__msa_ld_b((void*)src_argb, 16);
|
||||||
src2 = (v16u8)__msa_ld_b((void*)src_argb0, 32);
|
src2 = (v16u8)__msa_ld_b((void*)src_argb, 32);
|
||||||
src3 = (v16u8)__msa_ld_b((void*)src_argb0, 48);
|
src3 = (v16u8)__msa_ld_b((void*)src_argb, 48);
|
||||||
ARGBTOY(src0, src1, src2, src3, const_0x961D, const_0x4D, const_0x80, 8,
|
ARGBTOY(src0, src1, src2, src3, const_0x961D, const_0x4D, const_0x80, 8,
|
||||||
dst0);
|
dst0);
|
||||||
ST_UB(dst0, dst_y);
|
ST_UB(dst0, dst_y);
|
||||||
src_argb0 += 64;
|
src_argb += 64;
|
||||||
dst_y += 16;
|
dst_y += 16;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void BGRAToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
|
void BGRAToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width) {
|
||||||
int x;
|
int x;
|
||||||
v16u8 src0, src1, src2, src3, dst0;
|
v16u8 src0, src1, src2, src3, dst0;
|
||||||
v16u8 const_0x4200 = (v16u8)__msa_fill_h(0x4200);
|
v16u8 const_0x4200 = (v16u8)__msa_fill_h(0x4200);
|
||||||
@ -2474,19 +2474,19 @@ void BGRAToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
|
|||||||
v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080);
|
v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080);
|
||||||
|
|
||||||
for (x = 0; x < width; x += 16) {
|
for (x = 0; x < width; x += 16) {
|
||||||
src0 = (v16u8)__msa_ld_b((void*)src_argb0, 0);
|
src0 = (v16u8)__msa_ld_b((void*)src_argb, 0);
|
||||||
src1 = (v16u8)__msa_ld_b((void*)src_argb0, 16);
|
src1 = (v16u8)__msa_ld_b((void*)src_argb, 16);
|
||||||
src2 = (v16u8)__msa_ld_b((void*)src_argb0, 32);
|
src2 = (v16u8)__msa_ld_b((void*)src_argb, 32);
|
||||||
src3 = (v16u8)__msa_ld_b((void*)src_argb0, 48);
|
src3 = (v16u8)__msa_ld_b((void*)src_argb, 48);
|
||||||
ARGBTOY(src0, src1, src2, src3, const_0x4200, const_0x1981, const_0x1080, 8,
|
ARGBTOY(src0, src1, src2, src3, const_0x4200, const_0x1981, const_0x1080, 8,
|
||||||
dst0);
|
dst0);
|
||||||
ST_UB(dst0, dst_y);
|
ST_UB(dst0, dst_y);
|
||||||
src_argb0 += 64;
|
src_argb += 64;
|
||||||
dst_y += 16;
|
dst_y += 16;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ABGRToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
|
void ABGRToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width) {
|
||||||
int x;
|
int x;
|
||||||
v16u8 src0, src1, src2, src3, dst0;
|
v16u8 src0, src1, src2, src3, dst0;
|
||||||
v16u8 const_0x8142 = (v16u8)__msa_fill_h(0x8142);
|
v16u8 const_0x8142 = (v16u8)__msa_fill_h(0x8142);
|
||||||
@ -2494,19 +2494,19 @@ void ABGRToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
|
|||||||
v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080);
|
v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080);
|
||||||
|
|
||||||
for (x = 0; x < width; x += 16) {
|
for (x = 0; x < width; x += 16) {
|
||||||
src0 = (v16u8)__msa_ld_b((void*)src_argb0, 0);
|
src0 = (v16u8)__msa_ld_b((void*)src_argb, 0);
|
||||||
src1 = (v16u8)__msa_ld_b((void*)src_argb0, 16);
|
src1 = (v16u8)__msa_ld_b((void*)src_argb, 16);
|
||||||
src2 = (v16u8)__msa_ld_b((void*)src_argb0, 32);
|
src2 = (v16u8)__msa_ld_b((void*)src_argb, 32);
|
||||||
src3 = (v16u8)__msa_ld_b((void*)src_argb0, 48);
|
src3 = (v16u8)__msa_ld_b((void*)src_argb, 48);
|
||||||
ARGBTOY(src0, src1, src2, src3, const_0x8142, const_0x19, const_0x1080, 8,
|
ARGBTOY(src0, src1, src2, src3, const_0x8142, const_0x19, const_0x1080, 8,
|
||||||
dst0);
|
dst0);
|
||||||
ST_UB(dst0, dst_y);
|
ST_UB(dst0, dst_y);
|
||||||
src_argb0 += 64;
|
src_argb += 64;
|
||||||
dst_y += 16;
|
dst_y += 16;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void RGBAToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
|
void RGBAToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width) {
|
||||||
int x;
|
int x;
|
||||||
v16u8 src0, src1, src2, src3, dst0;
|
v16u8 src0, src1, src2, src3, dst0;
|
||||||
v16u8 const_0x1900 = (v16u8)__msa_fill_h(0x1900);
|
v16u8 const_0x1900 = (v16u8)__msa_fill_h(0x1900);
|
||||||
@ -2514,26 +2514,26 @@ void RGBAToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
|
|||||||
v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080);
|
v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080);
|
||||||
|
|
||||||
for (x = 0; x < width; x += 16) {
|
for (x = 0; x < width; x += 16) {
|
||||||
src0 = (v16u8)__msa_ld_b((void*)src_argb0, 0);
|
src0 = (v16u8)__msa_ld_b((void*)src_argb, 0);
|
||||||
src1 = (v16u8)__msa_ld_b((void*)src_argb0, 16);
|
src1 = (v16u8)__msa_ld_b((void*)src_argb, 16);
|
||||||
src2 = (v16u8)__msa_ld_b((void*)src_argb0, 32);
|
src2 = (v16u8)__msa_ld_b((void*)src_argb, 32);
|
||||||
src3 = (v16u8)__msa_ld_b((void*)src_argb0, 48);
|
src3 = (v16u8)__msa_ld_b((void*)src_argb, 48);
|
||||||
ARGBTOY(src0, src1, src2, src3, const_0x1900, const_0x4281, const_0x1080, 8,
|
ARGBTOY(src0, src1, src2, src3, const_0x1900, const_0x4281, const_0x1080, 8,
|
||||||
dst0);
|
dst0);
|
||||||
ST_UB(dst0, dst_y);
|
ST_UB(dst0, dst_y);
|
||||||
src_argb0 += 64;
|
src_argb += 64;
|
||||||
dst_y += 16;
|
dst_y += 16;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ARGBToUVJRow_MSA(const uint8_t* src_rgb0,
|
void ARGBToUVJRow_MSA(const uint8_t* src_rgb,
|
||||||
int src_stride_rgb,
|
int src_stride_rgb,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width) {
|
int width) {
|
||||||
int x;
|
int x;
|
||||||
const uint8_t* s = src_rgb0;
|
const uint8_t* s = src_rgb;
|
||||||
const uint8_t* t = src_rgb0 + src_stride_rgb;
|
const uint8_t* t = src_rgb + src_stride_rgb;
|
||||||
v8u16 src0, src1, src2, src3, src4, src5, src6, src7;
|
v8u16 src0, src1, src2, src3, src4, src5, src6, src7;
|
||||||
v8u16 vec0, vec1, vec2, vec3;
|
v8u16 vec0, vec1, vec2, vec3;
|
||||||
v8u16 dst0, dst1, dst2, dst3;
|
v8u16 dst0, dst1, dst2, dst3;
|
||||||
@ -2658,14 +2658,14 @@ void ARGBToUVJRow_MSA(const uint8_t* src_rgb0,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void BGRAToUVRow_MSA(const uint8_t* src_rgb0,
|
void BGRAToUVRow_MSA(const uint8_t* src_rgb,
|
||||||
int src_stride_rgb,
|
int src_stride_rgb,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width) {
|
int width) {
|
||||||
int x;
|
int x;
|
||||||
const uint8_t* s = src_rgb0;
|
const uint8_t* s = src_rgb;
|
||||||
const uint8_t* t = src_rgb0 + src_stride_rgb;
|
const uint8_t* t = src_rgb + src_stride_rgb;
|
||||||
const uint8_t unused = 0xf;
|
const uint8_t unused = 0xf;
|
||||||
v8u16 src0, src1, src2, src3;
|
v8u16 src0, src1, src2, src3;
|
||||||
v16u8 dst0, dst1;
|
v16u8 dst0, dst1;
|
||||||
@ -2693,14 +2693,14 @@ void BGRAToUVRow_MSA(const uint8_t* src_rgb0,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ABGRToUVRow_MSA(const uint8_t* src_rgb0,
|
void ABGRToUVRow_MSA(const uint8_t* src_rgb,
|
||||||
int src_stride_rgb,
|
int src_stride_rgb,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width) {
|
int width) {
|
||||||
int x;
|
int x;
|
||||||
const uint8_t* s = src_rgb0;
|
const uint8_t* s = src_rgb;
|
||||||
const uint8_t* t = src_rgb0 + src_stride_rgb;
|
const uint8_t* t = src_rgb + src_stride_rgb;
|
||||||
const uint8_t unused = 0xf;
|
const uint8_t unused = 0xf;
|
||||||
v8u16 src0, src1, src2, src3;
|
v8u16 src0, src1, src2, src3;
|
||||||
v16u8 dst0, dst1;
|
v16u8 dst0, dst1;
|
||||||
@ -2728,14 +2728,14 @@ void ABGRToUVRow_MSA(const uint8_t* src_rgb0,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void RGBAToUVRow_MSA(const uint8_t* src_rgb0,
|
void RGBAToUVRow_MSA(const uint8_t* src_rgb,
|
||||||
int src_stride_rgb,
|
int src_stride_rgb,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width) {
|
int width) {
|
||||||
int x;
|
int x;
|
||||||
const uint8_t* s = src_rgb0;
|
const uint8_t* s = src_rgb;
|
||||||
const uint8_t* t = src_rgb0 + src_stride_rgb;
|
const uint8_t* t = src_rgb + src_stride_rgb;
|
||||||
const uint8_t unused = 0xf;
|
const uint8_t unused = 0xf;
|
||||||
v8u16 src0, src1, src2, src3;
|
v8u16 src0, src1, src2, src3;
|
||||||
v16u8 dst0, dst1;
|
v16u8 dst0, dst1;
|
||||||
@ -3109,7 +3109,7 @@ void ARGBExtractAlphaRow_MSA(const uint8_t* src_argb,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ARGBBlendRow_MSA(const uint8_t* src_argb0,
|
void ARGBBlendRow_MSA(const uint8_t* src_argb,
|
||||||
const uint8_t* src_argb1,
|
const uint8_t* src_argb1,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width) {
|
int width) {
|
||||||
@ -3123,8 +3123,8 @@ void ARGBBlendRow_MSA(const uint8_t* src_argb0,
|
|||||||
v16i8 zero = {0};
|
v16i8 zero = {0};
|
||||||
|
|
||||||
for (x = 0; x < width; x += 8) {
|
for (x = 0; x < width; x += 8) {
|
||||||
src0 = (v16u8)__msa_ld_b((void*)src_argb0, 0);
|
src0 = (v16u8)__msa_ld_b((void*)src_argb, 0);
|
||||||
src1 = (v16u8)__msa_ld_b((void*)src_argb0, 16);
|
src1 = (v16u8)__msa_ld_b((void*)src_argb, 16);
|
||||||
src2 = (v16u8)__msa_ld_b((void*)src_argb1, 0);
|
src2 = (v16u8)__msa_ld_b((void*)src_argb1, 0);
|
||||||
src3 = (v16u8)__msa_ld_b((void*)src_argb1, 16);
|
src3 = (v16u8)__msa_ld_b((void*)src_argb1, 16);
|
||||||
vec0 = (v8u16)__msa_ilvr_b(zero, (v16i8)src0);
|
vec0 = (v8u16)__msa_ilvr_b(zero, (v16i8)src0);
|
||||||
@ -3168,7 +3168,7 @@ void ARGBBlendRow_MSA(const uint8_t* src_argb0,
|
|||||||
dst0 = __msa_bmnz_v(dst0, const_255, mask);
|
dst0 = __msa_bmnz_v(dst0, const_255, mask);
|
||||||
dst1 = __msa_bmnz_v(dst1, const_255, mask);
|
dst1 = __msa_bmnz_v(dst1, const_255, mask);
|
||||||
ST_UB2(dst0, dst1, dst_argb, 16);
|
ST_UB2(dst0, dst1, dst_argb, 16);
|
||||||
src_argb0 += 32;
|
src_argb += 32;
|
||||||
src_argb1 += 32;
|
src_argb1 += 32;
|
||||||
dst_argb += 32;
|
dst_argb += 32;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -415,11 +415,11 @@ void NV12ToARGBRow_NEON(const uint8_t* src_y,
|
|||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile(YUVTORGB_SETUP
|
asm volatile(YUVTORGB_SETUP
|
||||||
"vmov.u8 d23, #255 \n"
|
"vmov.u8 d23, #255 \n"
|
||||||
"1: \n" READNV12 YUVTORGB
|
"1: \n" READNV12 YUVTORGB
|
||||||
"subs %3, %3, #8 \n"
|
"subs %3, %3, #8 \n"
|
||||||
"vst4.8 {d20, d21, d22, d23}, [%2]! \n"
|
"vst4.8 {d20, d21, d22, d23}, [%2]! \n"
|
||||||
"bgt 1b \n"
|
"bgt 1b \n"
|
||||||
: "+r"(src_y), // %0
|
: "+r"(src_y), // %0
|
||||||
"+r"(src_uv), // %1
|
"+r"(src_uv), // %1
|
||||||
"+r"(dst_argb), // %2
|
"+r"(dst_argb), // %2
|
||||||
@ -438,11 +438,11 @@ void NV21ToARGBRow_NEON(const uint8_t* src_y,
|
|||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile(YUVTORGB_SETUP
|
asm volatile(YUVTORGB_SETUP
|
||||||
"vmov.u8 d23, #255 \n"
|
"vmov.u8 d23, #255 \n"
|
||||||
"1: \n" READNV21 YUVTORGB
|
"1: \n" READNV21 YUVTORGB
|
||||||
"subs %3, %3, #8 \n"
|
"subs %3, %3, #8 \n"
|
||||||
"vst4.8 {d20, d21, d22, d23}, [%2]! \n"
|
"vst4.8 {d20, d21, d22, d23}, [%2]! \n"
|
||||||
"bgt 1b \n"
|
"bgt 1b \n"
|
||||||
: "+r"(src_y), // %0
|
: "+r"(src_y), // %0
|
||||||
"+r"(src_vu), // %1
|
"+r"(src_vu), // %1
|
||||||
"+r"(dst_argb), // %2
|
"+r"(dst_argb), // %2
|
||||||
@ -537,11 +537,11 @@ void YUY2ToARGBRow_NEON(const uint8_t* src_yuy2,
|
|||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile(YUVTORGB_SETUP
|
asm volatile(YUVTORGB_SETUP
|
||||||
"vmov.u8 d23, #255 \n"
|
"vmov.u8 d23, #255 \n"
|
||||||
"1: \n" READYUY2 YUVTORGB
|
"1: \n" READYUY2 YUVTORGB
|
||||||
"subs %2, %2, #8 \n"
|
"subs %2, %2, #8 \n"
|
||||||
"vst4.8 {d20, d21, d22, d23}, [%1]! \n"
|
"vst4.8 {d20, d21, d22, d23}, [%1]! \n"
|
||||||
"bgt 1b \n"
|
"bgt 1b \n"
|
||||||
: "+r"(src_yuy2), // %0
|
: "+r"(src_yuy2), // %0
|
||||||
"+r"(dst_argb), // %1
|
"+r"(dst_argb), // %1
|
||||||
"+r"(width) // %2
|
"+r"(width) // %2
|
||||||
@ -558,11 +558,11 @@ void UYVYToARGBRow_NEON(const uint8_t* src_uyvy,
|
|||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile(YUVTORGB_SETUP
|
asm volatile(YUVTORGB_SETUP
|
||||||
"vmov.u8 d23, #255 \n"
|
"vmov.u8 d23, #255 \n"
|
||||||
"1: \n" READUYVY YUVTORGB
|
"1: \n" READUYVY YUVTORGB
|
||||||
"subs %2, %2, #8 \n"
|
"subs %2, %2, #8 \n"
|
||||||
"vst4.8 {d20, d21, d22, d23}, [%1]! \n"
|
"vst4.8 {d20, d21, d22, d23}, [%1]! \n"
|
||||||
"bgt 1b \n"
|
"bgt 1b \n"
|
||||||
: "+r"(src_uyvy), // %0
|
: "+r"(src_uyvy), // %0
|
||||||
"+r"(dst_argb), // %1
|
"+r"(dst_argb), // %1
|
||||||
"+r"(width) // %2
|
"+r"(width) // %2
|
||||||
@ -1680,7 +1680,7 @@ void ARGBToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) {
|
|||||||
: "cc", "memory", "q0", "q1", "q2", "q12", "q13");
|
: "cc", "memory", "q0", "q1", "q2", "q12", "q13");
|
||||||
}
|
}
|
||||||
|
|
||||||
void RGBAToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) {
|
void RGBAToYJRow_NEON(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"vmov.u8 d24, #29 \n" // B * 0.1140 coefficient
|
"vmov.u8 d24, #29 \n" // B * 0.1140 coefficient
|
||||||
"vmov.u8 d25, #150 \n" // G * 0.5870 coefficient
|
"vmov.u8 d25, #150 \n" // G * 0.5870 coefficient
|
||||||
@ -1694,7 +1694,7 @@ void RGBAToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) {
|
|||||||
"vqrshrn.u16 d0, q2, #8 \n" // 16 bit to 8 bit Y
|
"vqrshrn.u16 d0, q2, #8 \n" // 16 bit to 8 bit Y
|
||||||
"vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
|
"vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
|
||||||
"bgt 1b \n"
|
"bgt 1b \n"
|
||||||
: "+r"(src_argb), // %0
|
: "+r"(src_rgba), // %0
|
||||||
"+r"(dst_y), // %1
|
"+r"(dst_y), // %1
|
||||||
"+r"(width) // %2
|
"+r"(width) // %2
|
||||||
:
|
:
|
||||||
@ -2655,7 +2655,7 @@ void InterpolateRow_NEON(uint8_t* dst_ptr,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// dr * (256 - sa) / 256 + sr = dr - dr * sa / 256 + sr
|
// dr * (256 - sa) / 256 + sr = dr - dr * sa / 256 + sr
|
||||||
void ARGBBlendRow_NEON(const uint8_t* src_argb0,
|
void ARGBBlendRow_NEON(const uint8_t* src_argb,
|
||||||
const uint8_t* src_argb1,
|
const uint8_t* src_argb1,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width) {
|
int width) {
|
||||||
@ -2706,7 +2706,7 @@ void ARGBBlendRow_NEON(const uint8_t* src_argb0,
|
|||||||
|
|
||||||
"99: \n"
|
"99: \n"
|
||||||
|
|
||||||
: "+r"(src_argb0), // %0
|
: "+r"(src_argb), // %0
|
||||||
"+r"(src_argb1), // %1
|
"+r"(src_argb1), // %1
|
||||||
"+r"(dst_argb), // %2
|
"+r"(dst_argb), // %2
|
||||||
"+r"(width) // %3
|
"+r"(width) // %3
|
||||||
@ -2944,7 +2944,7 @@ void ARGBColorMatrixRow_NEON(const uint8_t* src_argb,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Multiply 2 rows of ARGB pixels together, 8 pixels at a time.
|
// Multiply 2 rows of ARGB pixels together, 8 pixels at a time.
|
||||||
void ARGBMultiplyRow_NEON(const uint8_t* src_argb0,
|
void ARGBMultiplyRow_NEON(const uint8_t* src_argb,
|
||||||
const uint8_t* src_argb1,
|
const uint8_t* src_argb1,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width) {
|
int width) {
|
||||||
@ -2964,7 +2964,7 @@ void ARGBMultiplyRow_NEON(const uint8_t* src_argb0,
|
|||||||
"vrshrn.u16 d3, q3, #8 \n" // 16 bit to 8 bit A
|
"vrshrn.u16 d3, q3, #8 \n" // 16 bit to 8 bit A
|
||||||
"vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels.
|
"vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels.
|
||||||
"bgt 1b \n"
|
"bgt 1b \n"
|
||||||
: "+r"(src_argb0), // %0
|
: "+r"(src_argb), // %0
|
||||||
"+r"(src_argb1), // %1
|
"+r"(src_argb1), // %1
|
||||||
"+r"(dst_argb), // %2
|
"+r"(dst_argb), // %2
|
||||||
"+r"(width) // %3
|
"+r"(width) // %3
|
||||||
@ -2973,7 +2973,7 @@ void ARGBMultiplyRow_NEON(const uint8_t* src_argb0,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Add 2 rows of ARGB pixels together, 8 pixels at a time.
|
// Add 2 rows of ARGB pixels together, 8 pixels at a time.
|
||||||
void ARGBAddRow_NEON(const uint8_t* src_argb0,
|
void ARGBAddRow_NEON(const uint8_t* src_argb,
|
||||||
const uint8_t* src_argb1,
|
const uint8_t* src_argb1,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width) {
|
int width) {
|
||||||
@ -2987,7 +2987,7 @@ void ARGBAddRow_NEON(const uint8_t* src_argb0,
|
|||||||
"vqadd.u8 q1, q1, q3 \n" // add R, A
|
"vqadd.u8 q1, q1, q3 \n" // add R, A
|
||||||
"vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels.
|
"vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels.
|
||||||
"bgt 1b \n"
|
"bgt 1b \n"
|
||||||
: "+r"(src_argb0), // %0
|
: "+r"(src_argb), // %0
|
||||||
"+r"(src_argb1), // %1
|
"+r"(src_argb1), // %1
|
||||||
"+r"(dst_argb), // %2
|
"+r"(dst_argb), // %2
|
||||||
"+r"(width) // %3
|
"+r"(width) // %3
|
||||||
@ -2996,7 +2996,7 @@ void ARGBAddRow_NEON(const uint8_t* src_argb0,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Subtract 2 rows of ARGB pixels, 8 pixels at a time.
|
// Subtract 2 rows of ARGB pixels, 8 pixels at a time.
|
||||||
void ARGBSubtractRow_NEON(const uint8_t* src_argb0,
|
void ARGBSubtractRow_NEON(const uint8_t* src_argb,
|
||||||
const uint8_t* src_argb1,
|
const uint8_t* src_argb1,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width) {
|
int width) {
|
||||||
@ -3010,7 +3010,7 @@ void ARGBSubtractRow_NEON(const uint8_t* src_argb0,
|
|||||||
"vqsub.u8 q1, q1, q3 \n" // subtract R, A
|
"vqsub.u8 q1, q1, q3 \n" // subtract R, A
|
||||||
"vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels.
|
"vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels.
|
||||||
"bgt 1b \n"
|
"bgt 1b \n"
|
||||||
: "+r"(src_argb0), // %0
|
: "+r"(src_argb), // %0
|
||||||
"+r"(src_argb1), // %1
|
"+r"(src_argb1), // %1
|
||||||
"+r"(dst_argb), // %2
|
"+r"(dst_argb), // %2
|
||||||
"+r"(width) // %3
|
"+r"(width) // %3
|
||||||
|
|||||||
@ -909,7 +909,7 @@ void MergeXR30Row_NEON(const uint16_t* src_r,
|
|||||||
"+r"(src_b), // %2
|
"+r"(src_b), // %2
|
||||||
"+r"(dst_ar30), // %3
|
"+r"(dst_ar30), // %3
|
||||||
"+r"(width) // %4
|
"+r"(width) // %4
|
||||||
: "r"(shift) // %5
|
: "r"(shift) // %5
|
||||||
: "memory", "cc", "v0", "v1", "v2", "v30", "v31");
|
: "memory", "cc", "v0", "v1", "v2", "v30", "v31");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1305,10 +1305,10 @@ void RAWToARGBRow_NEON(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
|
|||||||
"movi v5.8b, #255 \n" // Alpha
|
"movi v5.8b, #255 \n" // Alpha
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"ld3 {v0.8b,v1.8b,v2.8b}, [%0], #24 \n" // read r g b
|
"ld3 {v0.8b,v1.8b,v2.8b}, [%0], #24 \n" // read r g b
|
||||||
"subs %w2, %w2, #8 \n" // 8 processed per loop.
|
"subs %w2, %w2, #8 \n" // 8 processed per loop.
|
||||||
"orr v3.8b, v1.8b, v1.8b \n" // move g
|
"orr v3.8b, v1.8b, v1.8b \n" // move g
|
||||||
"prfm pldl1keep, [%0, 448] \n"
|
"prfm pldl1keep, [%0, 448] \n"
|
||||||
"orr v4.8b, v0.8b, v0.8b \n" // move r
|
"orr v4.8b, v0.8b, v0.8b \n" // move r
|
||||||
"st4 {v2.8b,v3.8b,v4.8b,v5.8b}, [%1], #32 \n" // store b g r a
|
"st4 {v2.8b,v3.8b,v4.8b,v5.8b}, [%1], #32 \n" // store b g r a
|
||||||
"b.gt 1b \n"
|
"b.gt 1b \n"
|
||||||
: "+r"(src_raw), // %0
|
: "+r"(src_raw), // %0
|
||||||
@ -1324,10 +1324,10 @@ void RAWToRGBARow_NEON(const uint8_t* src_raw, uint8_t* dst_rgba, int width) {
|
|||||||
"movi v0.8b, #255 \n" // Alpha
|
"movi v0.8b, #255 \n" // Alpha
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"ld3 {v3.8b,v4.8b,v5.8b}, [%0], #24 \n" // read r g b
|
"ld3 {v3.8b,v4.8b,v5.8b}, [%0], #24 \n" // read r g b
|
||||||
"subs %w2, %w2, #8 \n" // 8 processed per loop.
|
"subs %w2, %w2, #8 \n" // 8 processed per loop.
|
||||||
"orr v2.8b, v4.8b, v4.8b \n" // move g
|
"orr v2.8b, v4.8b, v4.8b \n" // move g
|
||||||
"prfm pldl1keep, [%0, 448] \n"
|
"prfm pldl1keep, [%0, 448] \n"
|
||||||
"orr v1.8b, v5.8b, v5.8b \n" // move r
|
"orr v1.8b, v5.8b, v5.8b \n" // move r
|
||||||
"st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n" // store a b g r
|
"st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n" // store a b g r
|
||||||
"b.gt 1b \n"
|
"b.gt 1b \n"
|
||||||
: "+r"(src_raw), // %0
|
: "+r"(src_raw), // %0
|
||||||
@ -1377,8 +1377,7 @@ void RGB565ToARGBRow_NEON(const uint8_t* src_rgb565,
|
|||||||
"1: \n"
|
"1: \n"
|
||||||
"ld1 {v0.16b}, [%0], #16 \n" // load 8 RGB565 pixels.
|
"ld1 {v0.16b}, [%0], #16 \n" // load 8 RGB565 pixels.
|
||||||
"subs %w2, %w2, #8 \n" // 8 processed per loop.
|
"subs %w2, %w2, #8 \n" // 8 processed per loop.
|
||||||
"prfm pldl1keep, [%0, 448] \n"
|
"prfm pldl1keep, [%0, 448] \n" RGB565TOARGB
|
||||||
RGB565TOARGB
|
|
||||||
"st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n" // store 8 ARGB
|
"st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n" // store 8 ARGB
|
||||||
"b.gt 1b \n"
|
"b.gt 1b \n"
|
||||||
: "+r"(src_rgb565), // %0
|
: "+r"(src_rgb565), // %0
|
||||||
@ -1467,8 +1466,7 @@ void ARGB4444ToARGBRow_NEON(const uint8_t* src_argb4444,
|
|||||||
"1: \n"
|
"1: \n"
|
||||||
"ld1 {v0.16b}, [%0], #16 \n" // load 8 ARGB4444 pixels.
|
"ld1 {v0.16b}, [%0], #16 \n" // load 8 ARGB4444 pixels.
|
||||||
"subs %w2, %w2, #8 \n" // 8 processed per loop.
|
"subs %w2, %w2, #8 \n" // 8 processed per loop.
|
||||||
"prfm pldl1keep, [%0, 448] \n"
|
"prfm pldl1keep, [%0, 448] \n" ARGB4444TOARGB
|
||||||
ARGB4444TOARGB
|
|
||||||
"st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n" // store 8 ARGB
|
"st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n" // store 8 ARGB
|
||||||
"b.gt 1b \n"
|
"b.gt 1b \n"
|
||||||
: "+r"(src_argb4444), // %0
|
: "+r"(src_argb4444), // %0
|
||||||
@ -1485,7 +1483,7 @@ void ARGBToRGB24Row_NEON(const uint8_t* src_argb,
|
|||||||
asm volatile(
|
asm volatile(
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"ld4 {v1.8b,v2.8b,v3.8b,v4.8b}, [%0], #32 \n" // load 8 ARGB
|
"ld4 {v1.8b,v2.8b,v3.8b,v4.8b}, [%0], #32 \n" // load 8 ARGB
|
||||||
"subs %w2, %w2, #8 \n" // 8 processed per loop.
|
"subs %w2, %w2, #8 \n" // 8 processed per loop.
|
||||||
"prfm pldl1keep, [%0, 448] \n"
|
"prfm pldl1keep, [%0, 448] \n"
|
||||||
"st3 {v1.8b,v2.8b,v3.8b}, [%1], #24 \n" // store 8 pixels of
|
"st3 {v1.8b,v2.8b,v3.8b}, [%1], #24 \n" // store 8 pixels of
|
||||||
// RGB24
|
// RGB24
|
||||||
@ -1502,8 +1500,8 @@ void ARGBToRAWRow_NEON(const uint8_t* src_argb, uint8_t* dst_raw, int width) {
|
|||||||
asm volatile(
|
asm volatile(
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"ld4 {v1.8b,v2.8b,v3.8b,v4.8b}, [%0], #32 \n" // load b g r a
|
"ld4 {v1.8b,v2.8b,v3.8b,v4.8b}, [%0], #32 \n" // load b g r a
|
||||||
"subs %w2, %w2, #8 \n" // 8 processed per loop.
|
"subs %w2, %w2, #8 \n" // 8 processed per loop.
|
||||||
"orr v4.8b, v2.8b, v2.8b \n" // mov g
|
"orr v4.8b, v2.8b, v2.8b \n" // mov g
|
||||||
"prfm pldl1keep, [%0, 448] \n"
|
"prfm pldl1keep, [%0, 448] \n"
|
||||||
"orr v5.8b, v1.8b, v1.8b \n" // mov b
|
"orr v5.8b, v1.8b, v1.8b \n" // mov b
|
||||||
"st3 {v3.8b,v4.8b,v5.8b}, [%1], #24 \n" // store r g b
|
"st3 {v3.8b,v4.8b,v5.8b}, [%1], #24 \n" // store r g b
|
||||||
@ -1676,7 +1674,7 @@ void I422ToYUY2Row_NEON(const uint8_t* src_y,
|
|||||||
asm volatile(
|
asm volatile(
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"ld2 {v0.8b, v1.8b}, [%0], #16 \n" // load 16 Ys
|
"ld2 {v0.8b, v1.8b}, [%0], #16 \n" // load 16 Ys
|
||||||
"subs %w4, %w4, #16 \n" // 16 pixels
|
"subs %w4, %w4, #16 \n" // 16 pixels
|
||||||
"orr v2.8b, v1.8b, v1.8b \n"
|
"orr v2.8b, v1.8b, v1.8b \n"
|
||||||
"prfm pldl1keep, [%0, 448] \n"
|
"prfm pldl1keep, [%0, 448] \n"
|
||||||
"ld1 {v1.8b}, [%1], #8 \n" // load 8 Us
|
"ld1 {v1.8b}, [%1], #8 \n" // load 8 Us
|
||||||
@ -1724,8 +1722,7 @@ void ARGBToRGB565Row_NEON(const uint8_t* src_argb,
|
|||||||
"ld4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%0], #32 \n" // load 8
|
"ld4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%0], #32 \n" // load 8
|
||||||
// pixels
|
// pixels
|
||||||
"subs %w2, %w2, #8 \n" // 8 processed per loop.
|
"subs %w2, %w2, #8 \n" // 8 processed per loop.
|
||||||
"prfm pldl1keep, [%0, 448] \n"
|
"prfm pldl1keep, [%0, 448] \n" ARGBTORGB565
|
||||||
ARGBTORGB565
|
|
||||||
"st1 {v0.16b}, [%1], #16 \n" // store 8 pixels RGB565.
|
"st1 {v0.16b}, [%1], #16 \n" // store 8 pixels RGB565.
|
||||||
"b.gt 1b \n"
|
"b.gt 1b \n"
|
||||||
: "+r"(src_argb), // %0
|
: "+r"(src_argb), // %0
|
||||||
@ -1766,8 +1763,7 @@ void ARGBToARGB1555Row_NEON(const uint8_t* src_argb,
|
|||||||
"ld4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%0], #32 \n" // load 8
|
"ld4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%0], #32 \n" // load 8
|
||||||
// pixels
|
// pixels
|
||||||
"subs %w2, %w2, #8 \n" // 8 processed per loop.
|
"subs %w2, %w2, #8 \n" // 8 processed per loop.
|
||||||
"prfm pldl1keep, [%0, 448] \n"
|
"prfm pldl1keep, [%0, 448] \n" ARGBTOARGB1555
|
||||||
ARGBTOARGB1555
|
|
||||||
"st1 {v0.16b}, [%1], #16 \n" // store 8 pixels
|
"st1 {v0.16b}, [%1], #16 \n" // store 8 pixels
|
||||||
"b.gt 1b \n"
|
"b.gt 1b \n"
|
||||||
: "+r"(src_argb), // %0
|
: "+r"(src_argb), // %0
|
||||||
@ -1787,8 +1783,7 @@ void ARGBToARGB4444Row_NEON(const uint8_t* src_argb,
|
|||||||
"ld4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%0], #32 \n" // load 8
|
"ld4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%0], #32 \n" // load 8
|
||||||
// pixels
|
// pixels
|
||||||
"subs %w2, %w2, #8 \n" // 8 processed per loop.
|
"subs %w2, %w2, #8 \n" // 8 processed per loop.
|
||||||
"prfm pldl1keep, [%0, 448] \n"
|
"prfm pldl1keep, [%0, 448] \n" ARGBTOARGB4444
|
||||||
ARGBTOARGB4444
|
|
||||||
"st1 {v0.16b}, [%1], #16 \n" // store 8 pixels
|
"st1 {v0.16b}, [%1], #16 \n" // store 8 pixels
|
||||||
"b.gt 1b \n"
|
"b.gt 1b \n"
|
||||||
: "+r"(src_argb), // %0
|
: "+r"(src_argb), // %0
|
||||||
@ -1956,7 +1951,7 @@ void ARGBToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) {
|
|||||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6");
|
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6");
|
||||||
}
|
}
|
||||||
|
|
||||||
void RGBAToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) {
|
void RGBAToYJRow_NEON(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"movi v4.8b, #29 \n" // B * 0.1140 coefficient
|
"movi v4.8b, #29 \n" // B * 0.1140 coefficient
|
||||||
"movi v5.8b, #150 \n" // G * 0.5870 coefficient
|
"movi v5.8b, #150 \n" // G * 0.5870 coefficient
|
||||||
@ -1971,7 +1966,7 @@ void RGBAToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) {
|
|||||||
"uqrshrn v3.8b, v0.8h, #8 \n" // 16 bit to 8 bit Y
|
"uqrshrn v3.8b, v0.8h, #8 \n" // 16 bit to 8 bit Y
|
||||||
"st1 {v3.8b}, [%1], #8 \n" // store 8 pixels Y.
|
"st1 {v3.8b}, [%1], #8 \n" // store 8 pixels Y.
|
||||||
"b.gt 1b \n"
|
"b.gt 1b \n"
|
||||||
: "+r"(src_argb), // %0
|
: "+r"(src_rgba), // %0
|
||||||
"+r"(dst_y), // %1
|
"+r"(dst_y), // %1
|
||||||
"+r"(width) // %2
|
"+r"(width) // %2
|
||||||
:
|
:
|
||||||
@ -2668,8 +2663,8 @@ void RGB24ToYRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_y, int width) {
|
|||||||
"movi v7.8b, #16 \n" // Add 16 constant
|
"movi v7.8b, #16 \n" // Add 16 constant
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"ld3 {v0.8b,v1.8b,v2.8b}, [%0], #24 \n" // load 8 pixels.
|
"ld3 {v0.8b,v1.8b,v2.8b}, [%0], #24 \n" // load 8 pixels.
|
||||||
"subs %w2, %w2, #8 \n" // 8 processed per loop.
|
"subs %w2, %w2, #8 \n" // 8 processed per loop.
|
||||||
"umull v16.8h, v0.8b, v4.8b \n" // B
|
"umull v16.8h, v0.8b, v4.8b \n" // B
|
||||||
"prfm pldl1keep, [%0, 448] \n"
|
"prfm pldl1keep, [%0, 448] \n"
|
||||||
"umlal v16.8h, v1.8b, v5.8b \n" // G
|
"umlal v16.8h, v1.8b, v5.8b \n" // G
|
||||||
"umlal v16.8h, v2.8b, v6.8b \n" // R
|
"umlal v16.8h, v2.8b, v6.8b \n" // R
|
||||||
@ -2692,8 +2687,8 @@ void RAWToYRow_NEON(const uint8_t* src_raw, uint8_t* dst_y, int width) {
|
|||||||
"movi v7.8b, #16 \n" // Add 16 constant
|
"movi v7.8b, #16 \n" // Add 16 constant
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"ld3 {v0.8b,v1.8b,v2.8b}, [%0], #24 \n" // load 8 pixels.
|
"ld3 {v0.8b,v1.8b,v2.8b}, [%0], #24 \n" // load 8 pixels.
|
||||||
"subs %w2, %w2, #8 \n" // 8 processed per loop.
|
"subs %w2, %w2, #8 \n" // 8 processed per loop.
|
||||||
"umull v16.8h, v0.8b, v4.8b \n" // B
|
"umull v16.8h, v0.8b, v4.8b \n" // B
|
||||||
"prfm pldl1keep, [%0, 448] \n"
|
"prfm pldl1keep, [%0, 448] \n"
|
||||||
"umlal v16.8h, v1.8b, v5.8b \n" // G
|
"umlal v16.8h, v1.8b, v5.8b \n" // G
|
||||||
"umlal v16.8h, v2.8b, v6.8b \n" // R
|
"umlal v16.8h, v2.8b, v6.8b \n" // R
|
||||||
@ -2715,8 +2710,8 @@ void RGB24ToYJRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) {
|
|||||||
"movi v6.8b, #77 \n" // R * 0.2990 coefficient
|
"movi v6.8b, #77 \n" // R * 0.2990 coefficient
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"ld3 {v0.8b,v1.8b,v2.8b}, [%0], #24 \n" // load 8 pixels.
|
"ld3 {v0.8b,v1.8b,v2.8b}, [%0], #24 \n" // load 8 pixels.
|
||||||
"subs %w2, %w2, #8 \n" // 8 processed per loop.
|
"subs %w2, %w2, #8 \n" // 8 processed per loop.
|
||||||
"umull v0.8h, v0.8b, v4.8b \n" // B
|
"umull v0.8h, v0.8b, v4.8b \n" // B
|
||||||
"prfm pldl1keep, [%0, 448] \n"
|
"prfm pldl1keep, [%0, 448] \n"
|
||||||
"umlal v0.8h, v1.8b, v5.8b \n" // G
|
"umlal v0.8h, v1.8b, v5.8b \n" // G
|
||||||
"umlal v0.8h, v2.8b, v6.8b \n" // R
|
"umlal v0.8h, v2.8b, v6.8b \n" // R
|
||||||
@ -2737,8 +2732,8 @@ void RAWToYJRow_NEON(const uint8_t* src_raw, uint8_t* dst_yj, int width) {
|
|||||||
"movi v4.8b, #77 \n" // R * 0.2990 coefficient
|
"movi v4.8b, #77 \n" // R * 0.2990 coefficient
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"ld3 {v0.8b,v1.8b,v2.8b}, [%0], #24 \n" // load 8 pixels.
|
"ld3 {v0.8b,v1.8b,v2.8b}, [%0], #24 \n" // load 8 pixels.
|
||||||
"subs %w2, %w2, #8 \n" // 8 processed per loop.
|
"subs %w2, %w2, #8 \n" // 8 processed per loop.
|
||||||
"umull v0.8h, v0.8b, v4.8b \n" // B
|
"umull v0.8h, v0.8b, v4.8b \n" // B
|
||||||
"prfm pldl1keep, [%0, 448] \n"
|
"prfm pldl1keep, [%0, 448] \n"
|
||||||
"umlal v0.8h, v1.8b, v5.8b \n" // G
|
"umlal v0.8h, v1.8b, v5.8b \n" // G
|
||||||
"umlal v0.8h, v2.8b, v6.8b \n" // R
|
"umlal v0.8h, v2.8b, v6.8b \n" // R
|
||||||
@ -2818,7 +2813,7 @@ void InterpolateRow_NEON(uint8_t* dst_ptr,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// dr * (256 - sa) / 256 + sr = dr - dr * sa / 256 + sr
|
// dr * (256 - sa) / 256 + sr = dr - dr * sa / 256 + sr
|
||||||
void ARGBBlendRow_NEON(const uint8_t* src_argb0,
|
void ARGBBlendRow_NEON(const uint8_t* src_argb,
|
||||||
const uint8_t* src_argb1,
|
const uint8_t* src_argb1,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width) {
|
int width) {
|
||||||
@ -2880,7 +2875,7 @@ void ARGBBlendRow_NEON(const uint8_t* src_argb0,
|
|||||||
|
|
||||||
"99: \n"
|
"99: \n"
|
||||||
|
|
||||||
: "+r"(src_argb0), // %0
|
: "+r"(src_argb), // %0
|
||||||
"+r"(src_argb1), // %1
|
"+r"(src_argb1), // %1
|
||||||
"+r"(dst_argb), // %2
|
"+r"(dst_argb), // %2
|
||||||
"+r"(width) // %3
|
"+r"(width) // %3
|
||||||
@ -2900,11 +2895,11 @@ void ARGBAttenuateRow_NEON(const uint8_t* src_argb,
|
|||||||
"subs %w2, %w2, #8 \n" // 8 processed per loop.
|
"subs %w2, %w2, #8 \n" // 8 processed per loop.
|
||||||
"umull v4.8h, v0.8b, v3.8b \n" // b * a
|
"umull v4.8h, v0.8b, v3.8b \n" // b * a
|
||||||
"prfm pldl1keep, [%0, 448] \n"
|
"prfm pldl1keep, [%0, 448] \n"
|
||||||
"umull v5.8h, v1.8b, v3.8b \n" // g * a
|
"umull v5.8h, v1.8b, v3.8b \n" // g * a
|
||||||
"umull v6.8h, v2.8b, v3.8b \n" // r * a
|
"umull v6.8h, v2.8b, v3.8b \n" // r * a
|
||||||
"uqrshrn v0.8b, v4.8h, #8 \n" // b >>= 8
|
"uqrshrn v0.8b, v4.8h, #8 \n" // b >>= 8
|
||||||
"uqrshrn v1.8b, v5.8h, #8 \n" // g >>= 8
|
"uqrshrn v1.8b, v5.8h, #8 \n" // g >>= 8
|
||||||
"uqrshrn v2.8b, v6.8h, #8 \n" // r >>= 8
|
"uqrshrn v2.8b, v6.8h, #8 \n" // r >>= 8
|
||||||
"st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n" // store 8 ARGB
|
"st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n" // store 8 ARGB
|
||||||
"b.gt 1b \n"
|
"b.gt 1b \n"
|
||||||
: "+r"(src_argb), // %0
|
: "+r"(src_argb), // %0
|
||||||
@ -2930,8 +2925,8 @@ void ARGBQuantizeRow_NEON(uint8_t* dst_argb,
|
|||||||
// 8 pixel loop.
|
// 8 pixel loop.
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0] \n" // load 8 ARGB.
|
"ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0] \n" // load 8 ARGB.
|
||||||
"subs %w1, %w1, #8 \n" // 8 processed per loop.
|
"subs %w1, %w1, #8 \n" // 8 processed per loop.
|
||||||
"uxtl v0.8h, v0.8b \n" // b (0 .. 255)
|
"uxtl v0.8h, v0.8b \n" // b (0 .. 255)
|
||||||
"prfm pldl1keep, [%0, 448] \n"
|
"prfm pldl1keep, [%0, 448] \n"
|
||||||
"uxtl v1.8h, v1.8b \n"
|
"uxtl v1.8h, v1.8b \n"
|
||||||
"uxtl v2.8h, v2.8b \n"
|
"uxtl v2.8h, v2.8b \n"
|
||||||
@ -3040,8 +3035,8 @@ void ARGBSepiaRow_NEON(uint8_t* dst_argb, int width) {
|
|||||||
"movi v30.8b, #50 \n" // BR coefficient
|
"movi v30.8b, #50 \n" // BR coefficient
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0] \n" // load 8 ARGB pixels.
|
"ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0] \n" // load 8 ARGB pixels.
|
||||||
"subs %w1, %w1, #8 \n" // 8 processed per loop.
|
"subs %w1, %w1, #8 \n" // 8 processed per loop.
|
||||||
"umull v4.8h, v0.8b, v20.8b \n" // B to Sepia B
|
"umull v4.8h, v0.8b, v20.8b \n" // B to Sepia B
|
||||||
"prfm pldl1keep, [%0, 448] \n"
|
"prfm pldl1keep, [%0, 448] \n"
|
||||||
"umlal v4.8h, v1.8b, v21.8b \n" // G
|
"umlal v4.8h, v1.8b, v21.8b \n" // G
|
||||||
"umlal v4.8h, v2.8b, v22.8b \n" // R
|
"umlal v4.8h, v2.8b, v22.8b \n" // R
|
||||||
@ -3127,7 +3122,7 @@ void ARGBColorMatrixRow_NEON(const uint8_t* src_argb,
|
|||||||
|
|
||||||
// TODO(fbarchard): fix vqshrun in ARGBMultiplyRow_NEON and reenable.
|
// TODO(fbarchard): fix vqshrun in ARGBMultiplyRow_NEON and reenable.
|
||||||
// Multiply 2 rows of ARGB pixels together, 8 pixels at a time.
|
// Multiply 2 rows of ARGB pixels together, 8 pixels at a time.
|
||||||
void ARGBMultiplyRow_NEON(const uint8_t* src_argb0,
|
void ARGBMultiplyRow_NEON(const uint8_t* src_argb,
|
||||||
const uint8_t* src_argb1,
|
const uint8_t* src_argb1,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width) {
|
int width) {
|
||||||
@ -3149,7 +3144,7 @@ void ARGBMultiplyRow_NEON(const uint8_t* src_argb0,
|
|||||||
"rshrn v3.8b, v3.8h, #8 \n" // 16 bit to 8 bit A
|
"rshrn v3.8b, v3.8h, #8 \n" // 16 bit to 8 bit A
|
||||||
"st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n" // store 8 ARGB
|
"st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n" // store 8 ARGB
|
||||||
"b.gt 1b \n"
|
"b.gt 1b \n"
|
||||||
: "+r"(src_argb0), // %0
|
: "+r"(src_argb), // %0
|
||||||
"+r"(src_argb1), // %1
|
"+r"(src_argb1), // %1
|
||||||
"+r"(dst_argb), // %2
|
"+r"(dst_argb), // %2
|
||||||
"+r"(width) // %3
|
"+r"(width) // %3
|
||||||
@ -3158,7 +3153,7 @@ void ARGBMultiplyRow_NEON(const uint8_t* src_argb0,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Add 2 rows of ARGB pixels together, 8 pixels at a time.
|
// Add 2 rows of ARGB pixels together, 8 pixels at a time.
|
||||||
void ARGBAddRow_NEON(const uint8_t* src_argb0,
|
void ARGBAddRow_NEON(const uint8_t* src_argb,
|
||||||
const uint8_t* src_argb1,
|
const uint8_t* src_argb1,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width) {
|
int width) {
|
||||||
@ -3176,7 +3171,7 @@ void ARGBAddRow_NEON(const uint8_t* src_argb0,
|
|||||||
"uqadd v3.8b, v3.8b, v7.8b \n"
|
"uqadd v3.8b, v3.8b, v7.8b \n"
|
||||||
"st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n" // store 8 ARGB
|
"st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n" // store 8 ARGB
|
||||||
"b.gt 1b \n"
|
"b.gt 1b \n"
|
||||||
: "+r"(src_argb0), // %0
|
: "+r"(src_argb), // %0
|
||||||
"+r"(src_argb1), // %1
|
"+r"(src_argb1), // %1
|
||||||
"+r"(dst_argb), // %2
|
"+r"(dst_argb), // %2
|
||||||
"+r"(width) // %3
|
"+r"(width) // %3
|
||||||
@ -3185,7 +3180,7 @@ void ARGBAddRow_NEON(const uint8_t* src_argb0,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Subtract 2 rows of ARGB pixels, 8 pixels at a time.
|
// Subtract 2 rows of ARGB pixels, 8 pixels at a time.
|
||||||
void ARGBSubtractRow_NEON(const uint8_t* src_argb0,
|
void ARGBSubtractRow_NEON(const uint8_t* src_argb,
|
||||||
const uint8_t* src_argb1,
|
const uint8_t* src_argb1,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width) {
|
int width) {
|
||||||
@ -3203,7 +3198,7 @@ void ARGBSubtractRow_NEON(const uint8_t* src_argb0,
|
|||||||
"uqsub v3.8b, v3.8b, v7.8b \n"
|
"uqsub v3.8b, v3.8b, v7.8b \n"
|
||||||
"st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n" // store 8 ARGB
|
"st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n" // store 8 ARGB
|
||||||
"b.gt 1b \n"
|
"b.gt 1b \n"
|
||||||
: "+r"(src_argb0), // %0
|
: "+r"(src_argb), // %0
|
||||||
"+r"(src_argb1), // %1
|
"+r"(src_argb1), // %1
|
||||||
"+r"(dst_argb), // %2
|
"+r"(dst_argb), // %2
|
||||||
"+r"(width) // %3
|
"+r"(width) // %3
|
||||||
@ -3703,9 +3698,9 @@ void NV21ToYUV24Row_NEON(const uint8_t* src_y,
|
|||||||
"1: \n"
|
"1: \n"
|
||||||
"ld1 {v2.16b}, [%0], #16 \n" // load 16 Y values
|
"ld1 {v2.16b}, [%0], #16 \n" // load 16 Y values
|
||||||
"ld2 {v0.8b, v1.8b}, [%1], #16 \n" // load 8 VU values
|
"ld2 {v0.8b, v1.8b}, [%1], #16 \n" // load 8 VU values
|
||||||
"zip1 v0.16b, v0.16b, v0.16b \n" // replicate V values
|
"zip1 v0.16b, v0.16b, v0.16b \n" // replicate V values
|
||||||
"prfm pldl1keep, [%0, 448] \n"
|
"prfm pldl1keep, [%0, 448] \n"
|
||||||
"zip1 v1.16b, v1.16b, v1.16b \n" // replicate U values
|
"zip1 v1.16b, v1.16b, v1.16b \n" // replicate U values
|
||||||
"prfm pldl1keep, [%1, 448] \n"
|
"prfm pldl1keep, [%1, 448] \n"
|
||||||
"subs %w3, %w3, #16 \n" // 16 pixels per loop
|
"subs %w3, %w3, #16 \n" // 16 pixels per loop
|
||||||
"st3 {v0.16b,v1.16b,v2.16b}, [%2], #48 \n" // store 16 YUV pixels
|
"st3 {v0.16b,v1.16b,v2.16b}, [%2], #48 \n" // store 16 YUV pixels
|
||||||
|
|||||||
@ -1427,7 +1427,7 @@ __declspec(naked) void RGBAToYRow_SSSE3(const uint8_t* src_argb,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__declspec(naked) void ARGBToUVRow_SSSE3(const uint8_t* src_argb0,
|
__declspec(naked) void ARGBToUVRow_SSSE3(const uint8_t* src_argb,
|
||||||
int src_stride_argb,
|
int src_stride_argb,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
@ -1499,7 +1499,7 @@ __declspec(naked) void ARGBToUVRow_SSSE3(const uint8_t* src_argb0,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__declspec(naked) void ARGBToUVJRow_SSSE3(const uint8_t* src_argb0,
|
__declspec(naked) void ARGBToUVJRow_SSSE3(const uint8_t* src_argb,
|
||||||
int src_stride_argb,
|
int src_stride_argb,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
@ -1573,7 +1573,7 @@ __declspec(naked) void ARGBToUVJRow_SSSE3(const uint8_t* src_argb0,
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAS_ARGBTOUVROW_AVX2
|
#ifdef HAS_ARGBTOUVROW_AVX2
|
||||||
__declspec(naked) void ARGBToUVRow_AVX2(const uint8_t* src_argb0,
|
__declspec(naked) void ARGBToUVRow_AVX2(const uint8_t* src_argb,
|
||||||
int src_stride_argb,
|
int src_stride_argb,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
@ -1641,7 +1641,7 @@ __declspec(naked) void ARGBToUVRow_AVX2(const uint8_t* src_argb0,
|
|||||||
#endif // HAS_ARGBTOUVROW_AVX2
|
#endif // HAS_ARGBTOUVROW_AVX2
|
||||||
|
|
||||||
#ifdef HAS_ARGBTOUVJROW_AVX2
|
#ifdef HAS_ARGBTOUVJROW_AVX2
|
||||||
__declspec(naked) void ARGBToUVJRow_AVX2(const uint8_t* src_argb0,
|
__declspec(naked) void ARGBToUVJRow_AVX2(const uint8_t* src_argb,
|
||||||
int src_stride_argb,
|
int src_stride_argb,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
@ -1709,7 +1709,7 @@ __declspec(naked) void ARGBToUVJRow_AVX2(const uint8_t* src_argb0,
|
|||||||
}
|
}
|
||||||
#endif // HAS_ARGBTOUVJROW_AVX2
|
#endif // HAS_ARGBTOUVJROW_AVX2
|
||||||
|
|
||||||
__declspec(naked) void ARGBToUV444Row_SSSE3(const uint8_t* src_argb0,
|
__declspec(naked) void ARGBToUV444Row_SSSE3(const uint8_t* src_argb,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width) {
|
int width) {
|
||||||
@ -1767,7 +1767,7 @@ __declspec(naked) void ARGBToUV444Row_SSSE3(const uint8_t* src_argb0,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__declspec(naked) void BGRAToUVRow_SSSE3(const uint8_t* src_argb0,
|
__declspec(naked) void BGRAToUVRow_SSSE3(const uint8_t* src_argb,
|
||||||
int src_stride_argb,
|
int src_stride_argb,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
@ -1839,7 +1839,7 @@ __declspec(naked) void BGRAToUVRow_SSSE3(const uint8_t* src_argb0,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__declspec(naked) void ABGRToUVRow_SSSE3(const uint8_t* src_argb0,
|
__declspec(naked) void ABGRToUVRow_SSSE3(const uint8_t* src_argb,
|
||||||
int src_stride_argb,
|
int src_stride_argb,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
@ -1911,7 +1911,7 @@ __declspec(naked) void ABGRToUVRow_SSSE3(const uint8_t* src_argb0,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__declspec(naked) void RGBAToUVRow_SSSE3(const uint8_t* src_argb0,
|
__declspec(naked) void RGBAToUVRow_SSSE3(const uint8_t* src_argb,
|
||||||
int src_stride_argb,
|
int src_stride_argb,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
@ -4347,13 +4347,13 @@ static const uvec8 kShuffleAlpha = {3u, 0x80, 3u, 0x80, 7u, 0x80, 7u, 0x80,
|
|||||||
11u, 0x80, 11u, 0x80, 15u, 0x80, 15u, 0x80};
|
11u, 0x80, 11u, 0x80, 15u, 0x80, 15u, 0x80};
|
||||||
|
|
||||||
// Blend 8 pixels at a time.
|
// Blend 8 pixels at a time.
|
||||||
__declspec(naked) void ARGBBlendRow_SSSE3(const uint8_t* src_argb0,
|
__declspec(naked) void ARGBBlendRow_SSSE3(const uint8_t* src_argb,
|
||||||
const uint8_t* src_argb1,
|
const uint8_t* src_argb1,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width) {
|
int width) {
|
||||||
__asm {
|
__asm {
|
||||||
push esi
|
push esi
|
||||||
mov eax, [esp + 4 + 4] // src_argb0
|
mov eax, [esp + 4 + 4] // src_argb
|
||||||
mov esi, [esp + 4 + 8] // src_argb1
|
mov esi, [esp + 4 + 8] // src_argb1
|
||||||
mov edx, [esp + 4 + 12] // dst_argb
|
mov edx, [esp + 4 + 12] // dst_argb
|
||||||
mov ecx, [esp + 4 + 16] // width
|
mov ecx, [esp + 4 + 16] // width
|
||||||
@ -4442,7 +4442,7 @@ __declspec(naked) void ARGBAttenuateRow_SSSE3(const uint8_t* src_argb,
|
|||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width) {
|
int width) {
|
||||||
__asm {
|
__asm {
|
||||||
mov eax, [esp + 4] // src_argb0
|
mov eax, [esp + 4] // src_argb
|
||||||
mov edx, [esp + 8] // dst_argb
|
mov edx, [esp + 8] // dst_argb
|
||||||
mov ecx, [esp + 12] // width
|
mov ecx, [esp + 12] // width
|
||||||
pcmpeqb xmm3, xmm3 // generate mask 0xff000000
|
pcmpeqb xmm3, xmm3 // generate mask 0xff000000
|
||||||
@ -4487,7 +4487,7 @@ __declspec(naked) void ARGBAttenuateRow_AVX2(const uint8_t* src_argb,
|
|||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width) {
|
int width) {
|
||||||
__asm {
|
__asm {
|
||||||
mov eax, [esp + 4] // src_argb0
|
mov eax, [esp + 4] // src_argb
|
||||||
mov edx, [esp + 8] // dst_argb
|
mov edx, [esp + 8] // dst_argb
|
||||||
mov ecx, [esp + 12] // width
|
mov ecx, [esp + 12] // width
|
||||||
sub edx, eax
|
sub edx, eax
|
||||||
@ -4581,7 +4581,7 @@ __declspec(naked) void ARGBUnattenuateRow_AVX2(const uint8_t* src_argb,
|
|||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width) {
|
int width) {
|
||||||
__asm {
|
__asm {
|
||||||
mov eax, [esp + 4] // src_argb0
|
mov eax, [esp + 4] // src_argb
|
||||||
mov edx, [esp + 8] // dst_argb
|
mov edx, [esp + 8] // dst_argb
|
||||||
mov ecx, [esp + 12] // width
|
mov ecx, [esp + 12] // width
|
||||||
sub edx, eax
|
sub edx, eax
|
||||||
@ -4937,20 +4937,20 @@ __declspec(naked) void ARGBShadeRow_SSE2(const uint8_t* src_argb,
|
|||||||
|
|
||||||
#ifdef HAS_ARGBMULTIPLYROW_SSE2
|
#ifdef HAS_ARGBMULTIPLYROW_SSE2
|
||||||
// Multiply 2 rows of ARGB pixels together, 4 pixels at a time.
|
// Multiply 2 rows of ARGB pixels together, 4 pixels at a time.
|
||||||
__declspec(naked) void ARGBMultiplyRow_SSE2(const uint8_t* src_argb0,
|
__declspec(naked) void ARGBMultiplyRow_SSE2(const uint8_t* src_argb,
|
||||||
const uint8_t* src_argb1,
|
const uint8_t* src_argb1,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width) {
|
int width) {
|
||||||
__asm {
|
__asm {
|
||||||
push esi
|
push esi
|
||||||
mov eax, [esp + 4 + 4] // src_argb0
|
mov eax, [esp + 4 + 4] // src_argb
|
||||||
mov esi, [esp + 4 + 8] // src_argb1
|
mov esi, [esp + 4 + 8] // src_argb1
|
||||||
mov edx, [esp + 4 + 12] // dst_argb
|
mov edx, [esp + 4 + 12] // dst_argb
|
||||||
mov ecx, [esp + 4 + 16] // width
|
mov ecx, [esp + 4 + 16] // width
|
||||||
pxor xmm5, xmm5 // constant 0
|
pxor xmm5, xmm5 // constant 0
|
||||||
|
|
||||||
convertloop:
|
convertloop:
|
||||||
movdqu xmm0, [eax] // read 4 pixels from src_argb0
|
movdqu xmm0, [eax] // read 4 pixels from src_argb
|
||||||
movdqu xmm2, [esi] // read 4 pixels from src_argb1
|
movdqu xmm2, [esi] // read 4 pixels from src_argb1
|
||||||
movdqu xmm1, xmm0
|
movdqu xmm1, xmm0
|
||||||
movdqu xmm3, xmm2
|
movdqu xmm3, xmm2
|
||||||
@ -4958,8 +4958,8 @@ __declspec(naked) void ARGBMultiplyRow_SSE2(const uint8_t* src_argb0,
|
|||||||
punpckhbw xmm1, xmm1 // next 2
|
punpckhbw xmm1, xmm1 // next 2
|
||||||
punpcklbw xmm2, xmm5 // first 2
|
punpcklbw xmm2, xmm5 // first 2
|
||||||
punpckhbw xmm3, xmm5 // next 2
|
punpckhbw xmm3, xmm5 // next 2
|
||||||
pmulhuw xmm0, xmm2 // src_argb0 * src_argb1 first 2
|
pmulhuw xmm0, xmm2 // src_argb * src_argb1 first 2
|
||||||
pmulhuw xmm1, xmm3 // src_argb0 * src_argb1 next 2
|
pmulhuw xmm1, xmm3 // src_argb * src_argb1 next 2
|
||||||
lea eax, [eax + 16]
|
lea eax, [eax + 16]
|
||||||
lea esi, [esi + 16]
|
lea esi, [esi + 16]
|
||||||
packuswb xmm0, xmm1
|
packuswb xmm0, xmm1
|
||||||
@ -4977,13 +4977,13 @@ __declspec(naked) void ARGBMultiplyRow_SSE2(const uint8_t* src_argb0,
|
|||||||
#ifdef HAS_ARGBADDROW_SSE2
|
#ifdef HAS_ARGBADDROW_SSE2
|
||||||
// Add 2 rows of ARGB pixels together, 4 pixels at a time.
|
// Add 2 rows of ARGB pixels together, 4 pixels at a time.
|
||||||
// TODO(fbarchard): Port this to posix, neon and other math functions.
|
// TODO(fbarchard): Port this to posix, neon and other math functions.
|
||||||
__declspec(naked) void ARGBAddRow_SSE2(const uint8_t* src_argb0,
|
__declspec(naked) void ARGBAddRow_SSE2(const uint8_t* src_argb,
|
||||||
const uint8_t* src_argb1,
|
const uint8_t* src_argb1,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width) {
|
int width) {
|
||||||
__asm {
|
__asm {
|
||||||
push esi
|
push esi
|
||||||
mov eax, [esp + 4 + 4] // src_argb0
|
mov eax, [esp + 4 + 4] // src_argb
|
||||||
mov esi, [esp + 4 + 8] // src_argb1
|
mov esi, [esp + 4 + 8] // src_argb1
|
||||||
mov edx, [esp + 4 + 12] // dst_argb
|
mov edx, [esp + 4 + 12] // dst_argb
|
||||||
mov ecx, [esp + 4 + 16] // width
|
mov ecx, [esp + 4 + 16] // width
|
||||||
@ -4992,11 +4992,11 @@ __declspec(naked) void ARGBAddRow_SSE2(const uint8_t* src_argb0,
|
|||||||
jl convertloop49
|
jl convertloop49
|
||||||
|
|
||||||
convertloop4:
|
convertloop4:
|
||||||
movdqu xmm0, [eax] // read 4 pixels from src_argb0
|
movdqu xmm0, [eax] // read 4 pixels from src_argb
|
||||||
lea eax, [eax + 16]
|
lea eax, [eax + 16]
|
||||||
movdqu xmm1, [esi] // read 4 pixels from src_argb1
|
movdqu xmm1, [esi] // read 4 pixels from src_argb1
|
||||||
lea esi, [esi + 16]
|
lea esi, [esi + 16]
|
||||||
paddusb xmm0, xmm1 // src_argb0 + src_argb1
|
paddusb xmm0, xmm1 // src_argb + src_argb1
|
||||||
movdqu [edx], xmm0
|
movdqu [edx], xmm0
|
||||||
lea edx, [edx + 16]
|
lea edx, [edx + 16]
|
||||||
sub ecx, 4
|
sub ecx, 4
|
||||||
@ -5007,11 +5007,11 @@ __declspec(naked) void ARGBAddRow_SSE2(const uint8_t* src_argb0,
|
|||||||
jl convertloop19
|
jl convertloop19
|
||||||
|
|
||||||
convertloop1:
|
convertloop1:
|
||||||
movd xmm0, [eax] // read 1 pixels from src_argb0
|
movd xmm0, [eax] // read 1 pixels from src_argb
|
||||||
lea eax, [eax + 4]
|
lea eax, [eax + 4]
|
||||||
movd xmm1, [esi] // read 1 pixels from src_argb1
|
movd xmm1, [esi] // read 1 pixels from src_argb1
|
||||||
lea esi, [esi + 4]
|
lea esi, [esi + 4]
|
||||||
paddusb xmm0, xmm1 // src_argb0 + src_argb1
|
paddusb xmm0, xmm1 // src_argb + src_argb1
|
||||||
movd [edx], xmm0
|
movd [edx], xmm0
|
||||||
lea edx, [edx + 4]
|
lea edx, [edx + 4]
|
||||||
sub ecx, 1
|
sub ecx, 1
|
||||||
@ -5026,23 +5026,23 @@ __declspec(naked) void ARGBAddRow_SSE2(const uint8_t* src_argb0,
|
|||||||
|
|
||||||
#ifdef HAS_ARGBSUBTRACTROW_SSE2
|
#ifdef HAS_ARGBSUBTRACTROW_SSE2
|
||||||
// Subtract 2 rows of ARGB pixels together, 4 pixels at a time.
|
// Subtract 2 rows of ARGB pixels together, 4 pixels at a time.
|
||||||
__declspec(naked) void ARGBSubtractRow_SSE2(const uint8_t* src_argb0,
|
__declspec(naked) void ARGBSubtractRow_SSE2(const uint8_t* src_argb,
|
||||||
const uint8_t* src_argb1,
|
const uint8_t* src_argb1,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width) {
|
int width) {
|
||||||
__asm {
|
__asm {
|
||||||
push esi
|
push esi
|
||||||
mov eax, [esp + 4 + 4] // src_argb0
|
mov eax, [esp + 4 + 4] // src_argb
|
||||||
mov esi, [esp + 4 + 8] // src_argb1
|
mov esi, [esp + 4 + 8] // src_argb1
|
||||||
mov edx, [esp + 4 + 12] // dst_argb
|
mov edx, [esp + 4 + 12] // dst_argb
|
||||||
mov ecx, [esp + 4 + 16] // width
|
mov ecx, [esp + 4 + 16] // width
|
||||||
|
|
||||||
convertloop:
|
convertloop:
|
||||||
movdqu xmm0, [eax] // read 4 pixels from src_argb0
|
movdqu xmm0, [eax] // read 4 pixels from src_argb
|
||||||
lea eax, [eax + 16]
|
lea eax, [eax + 16]
|
||||||
movdqu xmm1, [esi] // read 4 pixels from src_argb1
|
movdqu xmm1, [esi] // read 4 pixels from src_argb1
|
||||||
lea esi, [esi + 16]
|
lea esi, [esi + 16]
|
||||||
psubusb xmm0, xmm1 // src_argb0 - src_argb1
|
psubusb xmm0, xmm1 // src_argb - src_argb1
|
||||||
movdqu [edx], xmm0
|
movdqu [edx], xmm0
|
||||||
lea edx, [edx + 16]
|
lea edx, [edx + 16]
|
||||||
sub ecx, 4
|
sub ecx, 4
|
||||||
@ -5056,20 +5056,20 @@ __declspec(naked) void ARGBSubtractRow_SSE2(const uint8_t* src_argb0,
|
|||||||
|
|
||||||
#ifdef HAS_ARGBMULTIPLYROW_AVX2
|
#ifdef HAS_ARGBMULTIPLYROW_AVX2
|
||||||
// Multiply 2 rows of ARGB pixels together, 8 pixels at a time.
|
// Multiply 2 rows of ARGB pixels together, 8 pixels at a time.
|
||||||
__declspec(naked) void ARGBMultiplyRow_AVX2(const uint8_t* src_argb0,
|
__declspec(naked) void ARGBMultiplyRow_AVX2(const uint8_t* src_argb,
|
||||||
const uint8_t* src_argb1,
|
const uint8_t* src_argb1,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width) {
|
int width) {
|
||||||
__asm {
|
__asm {
|
||||||
push esi
|
push esi
|
||||||
mov eax, [esp + 4 + 4] // src_argb0
|
mov eax, [esp + 4 + 4] // src_argb
|
||||||
mov esi, [esp + 4 + 8] // src_argb1
|
mov esi, [esp + 4 + 8] // src_argb1
|
||||||
mov edx, [esp + 4 + 12] // dst_argb
|
mov edx, [esp + 4 + 12] // dst_argb
|
||||||
mov ecx, [esp + 4 + 16] // width
|
mov ecx, [esp + 4 + 16] // width
|
||||||
vpxor ymm5, ymm5, ymm5 // constant 0
|
vpxor ymm5, ymm5, ymm5 // constant 0
|
||||||
|
|
||||||
convertloop:
|
convertloop:
|
||||||
vmovdqu ymm1, [eax] // read 8 pixels from src_argb0
|
vmovdqu ymm1, [eax] // read 8 pixels from src_argb
|
||||||
lea eax, [eax + 32]
|
lea eax, [eax + 32]
|
||||||
vmovdqu ymm3, [esi] // read 8 pixels from src_argb1
|
vmovdqu ymm3, [esi] // read 8 pixels from src_argb1
|
||||||
lea esi, [esi + 32]
|
lea esi, [esi + 32]
|
||||||
@ -5077,8 +5077,8 @@ __declspec(naked) void ARGBMultiplyRow_AVX2(const uint8_t* src_argb0,
|
|||||||
vpunpckhbw ymm1, ymm1, ymm1 // high 4
|
vpunpckhbw ymm1, ymm1, ymm1 // high 4
|
||||||
vpunpcklbw ymm2, ymm3, ymm5 // low 4
|
vpunpcklbw ymm2, ymm3, ymm5 // low 4
|
||||||
vpunpckhbw ymm3, ymm3, ymm5 // high 4
|
vpunpckhbw ymm3, ymm3, ymm5 // high 4
|
||||||
vpmulhuw ymm0, ymm0, ymm2 // src_argb0 * src_argb1 low 4
|
vpmulhuw ymm0, ymm0, ymm2 // src_argb * src_argb1 low 4
|
||||||
vpmulhuw ymm1, ymm1, ymm3 // src_argb0 * src_argb1 high 4
|
vpmulhuw ymm1, ymm1, ymm3 // src_argb * src_argb1 high 4
|
||||||
vpackuswb ymm0, ymm0, ymm1
|
vpackuswb ymm0, ymm0, ymm1
|
||||||
vmovdqu [edx], ymm0
|
vmovdqu [edx], ymm0
|
||||||
lea edx, [edx + 32]
|
lea edx, [edx + 32]
|
||||||
@ -5094,19 +5094,19 @@ __declspec(naked) void ARGBMultiplyRow_AVX2(const uint8_t* src_argb0,
|
|||||||
|
|
||||||
#ifdef HAS_ARGBADDROW_AVX2
|
#ifdef HAS_ARGBADDROW_AVX2
|
||||||
// Add 2 rows of ARGB pixels together, 8 pixels at a time.
|
// Add 2 rows of ARGB pixels together, 8 pixels at a time.
|
||||||
__declspec(naked) void ARGBAddRow_AVX2(const uint8_t* src_argb0,
|
__declspec(naked) void ARGBAddRow_AVX2(const uint8_t* src_argb,
|
||||||
const uint8_t* src_argb1,
|
const uint8_t* src_argb1,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width) {
|
int width) {
|
||||||
__asm {
|
__asm {
|
||||||
push esi
|
push esi
|
||||||
mov eax, [esp + 4 + 4] // src_argb0
|
mov eax, [esp + 4 + 4] // src_argb
|
||||||
mov esi, [esp + 4 + 8] // src_argb1
|
mov esi, [esp + 4 + 8] // src_argb1
|
||||||
mov edx, [esp + 4 + 12] // dst_argb
|
mov edx, [esp + 4 + 12] // dst_argb
|
||||||
mov ecx, [esp + 4 + 16] // width
|
mov ecx, [esp + 4 + 16] // width
|
||||||
|
|
||||||
convertloop:
|
convertloop:
|
||||||
vmovdqu ymm0, [eax] // read 8 pixels from src_argb0
|
vmovdqu ymm0, [eax] // read 8 pixels from src_argb
|
||||||
lea eax, [eax + 32]
|
lea eax, [eax + 32]
|
||||||
vpaddusb ymm0, ymm0, [esi] // add 8 pixels from src_argb1
|
vpaddusb ymm0, ymm0, [esi] // add 8 pixels from src_argb1
|
||||||
lea esi, [esi + 32]
|
lea esi, [esi + 32]
|
||||||
@ -5124,21 +5124,21 @@ __declspec(naked) void ARGBAddRow_AVX2(const uint8_t* src_argb0,
|
|||||||
|
|
||||||
#ifdef HAS_ARGBSUBTRACTROW_AVX2
|
#ifdef HAS_ARGBSUBTRACTROW_AVX2
|
||||||
// Subtract 2 rows of ARGB pixels together, 8 pixels at a time.
|
// Subtract 2 rows of ARGB pixels together, 8 pixels at a time.
|
||||||
__declspec(naked) void ARGBSubtractRow_AVX2(const uint8_t* src_argb0,
|
__declspec(naked) void ARGBSubtractRow_AVX2(const uint8_t* src_argb,
|
||||||
const uint8_t* src_argb1,
|
const uint8_t* src_argb1,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width) {
|
int width) {
|
||||||
__asm {
|
__asm {
|
||||||
push esi
|
push esi
|
||||||
mov eax, [esp + 4 + 4] // src_argb0
|
mov eax, [esp + 4 + 4] // src_argb
|
||||||
mov esi, [esp + 4 + 8] // src_argb1
|
mov esi, [esp + 4 + 8] // src_argb1
|
||||||
mov edx, [esp + 4 + 12] // dst_argb
|
mov edx, [esp + 4 + 12] // dst_argb
|
||||||
mov ecx, [esp + 4 + 16] // width
|
mov ecx, [esp + 4 + 16] // width
|
||||||
|
|
||||||
convertloop:
|
convertloop:
|
||||||
vmovdqu ymm0, [eax] // read 8 pixels from src_argb0
|
vmovdqu ymm0, [eax] // read 8 pixels from src_argb
|
||||||
lea eax, [eax + 32]
|
lea eax, [eax + 32]
|
||||||
vpsubusb ymm0, ymm0, [esi] // src_argb0 - src_argb1
|
vpsubusb ymm0, ymm0, [esi] // src_argb - src_argb1
|
||||||
lea esi, [esi + 32]
|
lea esi, [esi + 32]
|
||||||
vmovdqu [edx], ymm0
|
vmovdqu [edx], ymm0
|
||||||
lea edx, [edx + 32]
|
lea edx, [edx + 32]
|
||||||
|
|||||||
@ -1446,7 +1446,8 @@ void ScalePlaneUp2_Bilinear(int src_width,
|
|||||||
for (x = 0; x < src_height - 1; ++x) {
|
for (x = 0; x < src_height - 1; ++x) {
|
||||||
Scale2RowUp(src_ptr, src_stride, dst_ptr, dst_stride, dst_width);
|
Scale2RowUp(src_ptr, src_stride, dst_ptr, dst_stride, dst_width);
|
||||||
src_ptr += src_stride;
|
src_ptr += src_stride;
|
||||||
// TODO: Test performance of writing one row of destination at a time.
|
// TODO(fbarchard): Test performance of writing one row of destination at a
|
||||||
|
// time.
|
||||||
dst_ptr += 2 * dst_stride;
|
dst_ptr += 2 * dst_stride;
|
||||||
}
|
}
|
||||||
if (!(dst_height & 1)) {
|
if (!(dst_height & 1)) {
|
||||||
|
|||||||
@ -746,7 +746,8 @@ void ScaleUVBilinearUp2(int src_width,
|
|||||||
for (x = 0; x < src_height - 1; ++x) {
|
for (x = 0; x < src_height - 1; ++x) {
|
||||||
Scale2RowUp(src_ptr, src_stride, dst_ptr, dst_stride, dst_width);
|
Scale2RowUp(src_ptr, src_stride, dst_ptr, dst_stride, dst_width);
|
||||||
src_ptr += src_stride;
|
src_ptr += src_stride;
|
||||||
// TODO: Test performance of writing one row of destination at a time.
|
// TODO(fbarchard): Test performance of writing one row of destination at a
|
||||||
|
// time.
|
||||||
dst_ptr += 2 * dst_stride;
|
dst_ptr += 2 * dst_stride;
|
||||||
}
|
}
|
||||||
if (!(dst_height & 1)) {
|
if (!(dst_height & 1)) {
|
||||||
@ -851,7 +852,8 @@ void ScaleUVBilinearUp2_16(int src_width,
|
|||||||
for (x = 0; x < src_height - 1; ++x) {
|
for (x = 0; x < src_height - 1; ++x) {
|
||||||
Scale2RowUp(src_ptr, src_stride, dst_ptr, dst_stride, dst_width);
|
Scale2RowUp(src_ptr, src_stride, dst_ptr, dst_stride, dst_width);
|
||||||
src_ptr += src_stride;
|
src_ptr += src_stride;
|
||||||
// TODO: Test performance of writing one row of destination at a time.
|
// TODO(fbarchard): Test performance of writing one row of destination at a
|
||||||
|
// time.
|
||||||
dst_ptr += 2 * dst_stride;
|
dst_ptr += 2 * dst_stride;
|
||||||
}
|
}
|
||||||
if (!(dst_height & 1)) {
|
if (!(dst_height & 1)) {
|
||||||
|
|||||||
@ -2404,8 +2404,7 @@ TEST_F(LibYUVPlanarTest, TestARGBCopyAlpha) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(LibYUVPlanarTest, TestARGBExtractAlpha) {
|
TEST_F(LibYUVPlanarTest, TestARGBExtractAlpha) {
|
||||||
// Round count up to multiple of 16
|
const int kPixels = benchmark_width_ * benchmark_height_;
|
||||||
const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
|
|
||||||
align_buffer_page_end(src_pixels, kPixels * 4);
|
align_buffer_page_end(src_pixels, kPixels * 4);
|
||||||
align_buffer_page_end(dst_pixels_opt, kPixels);
|
align_buffer_page_end(dst_pixels_opt, kPixels);
|
||||||
align_buffer_page_end(dst_pixels_c, kPixels);
|
align_buffer_page_end(dst_pixels_c, kPixels);
|
||||||
@ -2433,8 +2432,7 @@ TEST_F(LibYUVPlanarTest, TestARGBExtractAlpha) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(LibYUVPlanarTest, TestARGBCopyYToAlpha) {
|
TEST_F(LibYUVPlanarTest, TestARGBCopyYToAlpha) {
|
||||||
// Round count up to multiple of 16
|
const int kPixels = benchmark_width_ * benchmark_height_;
|
||||||
const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
|
|
||||||
align_buffer_page_end(orig_pixels, kPixels);
|
align_buffer_page_end(orig_pixels, kPixels);
|
||||||
align_buffer_page_end(dst_pixels_opt, kPixels * 4);
|
align_buffer_page_end(dst_pixels_opt, kPixels * 4);
|
||||||
align_buffer_page_end(dst_pixels_c, kPixels * 4);
|
align_buffer_page_end(dst_pixels_c, kPixels * 4);
|
||||||
@ -2567,35 +2565,25 @@ TEST_F(LibYUVPlanarTest, SetPlane_Opt) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(LibYUVPlanarTest, MergeUVPlane_Opt) {
|
TEST_F(LibYUVPlanarTest, MergeUVPlane_Opt) {
|
||||||
// Round count up to multiple of 16
|
const int kPixels = benchmark_width_ * benchmark_height_;
|
||||||
const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
|
align_buffer_page_end(src_pixels_u, kPixels);
|
||||||
align_buffer_page_end(src_pixels, kPixels * 2);
|
align_buffer_page_end(src_pixels_v, kPixels);
|
||||||
align_buffer_page_end(tmp_pixels_u, kPixels);
|
|
||||||
align_buffer_page_end(tmp_pixels_v, kPixels);
|
|
||||||
align_buffer_page_end(dst_pixels_opt, kPixels * 2);
|
align_buffer_page_end(dst_pixels_opt, kPixels * 2);
|
||||||
align_buffer_page_end(dst_pixels_c, kPixels * 2);
|
align_buffer_page_end(dst_pixels_c, kPixels * 2);
|
||||||
|
|
||||||
MemRandomize(src_pixels, kPixels * 2);
|
MemRandomize(src_pixels_u, kPixels);
|
||||||
MemRandomize(tmp_pixels_u, kPixels);
|
MemRandomize(src_pixels_v, kPixels);
|
||||||
MemRandomize(tmp_pixels_v, kPixels);
|
|
||||||
MemRandomize(dst_pixels_opt, kPixels * 2);
|
MemRandomize(dst_pixels_opt, kPixels * 2);
|
||||||
MemRandomize(dst_pixels_c, kPixels * 2);
|
MemRandomize(dst_pixels_c, kPixels * 2);
|
||||||
|
|
||||||
MaskCpuFlags(disable_cpu_flags_);
|
MaskCpuFlags(disable_cpu_flags_);
|
||||||
SplitUVPlane(src_pixels, benchmark_width_ * 2, tmp_pixels_u, benchmark_width_,
|
MergeUVPlane(src_pixels_u, benchmark_width_, src_pixels_v, benchmark_width_,
|
||||||
tmp_pixels_v, benchmark_width_, benchmark_width_,
|
|
||||||
benchmark_height_);
|
|
||||||
MergeUVPlane(tmp_pixels_u, benchmark_width_, tmp_pixels_v, benchmark_width_,
|
|
||||||
dst_pixels_c, benchmark_width_ * 2, benchmark_width_,
|
dst_pixels_c, benchmark_width_ * 2, benchmark_width_,
|
||||||
benchmark_height_);
|
benchmark_height_);
|
||||||
MaskCpuFlags(benchmark_cpu_info_);
|
MaskCpuFlags(benchmark_cpu_info_);
|
||||||
|
|
||||||
SplitUVPlane(src_pixels, benchmark_width_ * 2, tmp_pixels_u, benchmark_width_,
|
|
||||||
tmp_pixels_v, benchmark_width_, benchmark_width_,
|
|
||||||
benchmark_height_);
|
|
||||||
|
|
||||||
for (int i = 0; i < benchmark_iterations_; ++i) {
|
for (int i = 0; i < benchmark_iterations_; ++i) {
|
||||||
MergeUVPlane(tmp_pixels_u, benchmark_width_, tmp_pixels_v, benchmark_width_,
|
MergeUVPlane(src_pixels_u, benchmark_width_, src_pixels_v, benchmark_width_,
|
||||||
dst_pixels_opt, benchmark_width_ * 2, benchmark_width_,
|
dst_pixels_opt, benchmark_width_ * 2, benchmark_width_,
|
||||||
benchmark_height_);
|
benchmark_height_);
|
||||||
}
|
}
|
||||||
@ -2604,119 +2592,88 @@ TEST_F(LibYUVPlanarTest, MergeUVPlane_Opt) {
|
|||||||
EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
|
EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
free_aligned_buffer_page_end(src_pixels);
|
free_aligned_buffer_page_end(src_pixels_u);
|
||||||
free_aligned_buffer_page_end(tmp_pixels_u);
|
free_aligned_buffer_page_end(src_pixels_v);
|
||||||
free_aligned_buffer_page_end(tmp_pixels_v);
|
|
||||||
free_aligned_buffer_page_end(dst_pixels_opt);
|
free_aligned_buffer_page_end(dst_pixels_opt);
|
||||||
free_aligned_buffer_page_end(dst_pixels_c);
|
free_aligned_buffer_page_end(dst_pixels_c);
|
||||||
}
|
}
|
||||||
|
|
||||||
// 16 bit channel split and merge
|
// 16 bit channel split and merge
|
||||||
TEST_F(LibYUVPlanarTest, MergeUVPlane_16_Opt) {
|
TEST_F(LibYUVPlanarTest, MergeUVPlane_16_Opt) {
|
||||||
// Round count up to multiple of 16
|
const int kPixels = benchmark_width_ * benchmark_height_;
|
||||||
const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
|
align_buffer_page_end(src_pixels_u, kPixels * 2);
|
||||||
align_buffer_page_end(src_pixels, kPixels * 2 * 2);
|
align_buffer_page_end(src_pixels_v, kPixels * 2);
|
||||||
align_buffer_page_end(tmp_pixels_u_c, kPixels * 2);
|
|
||||||
align_buffer_page_end(tmp_pixels_v_c, kPixels * 2);
|
|
||||||
align_buffer_page_end(tmp_pixels_u_opt, kPixels * 2);
|
|
||||||
align_buffer_page_end(tmp_pixels_v_opt, kPixels * 2);
|
|
||||||
align_buffer_page_end(dst_pixels_opt, kPixels * 2 * 2);
|
align_buffer_page_end(dst_pixels_opt, kPixels * 2 * 2);
|
||||||
align_buffer_page_end(dst_pixels_c, kPixels * 2 * 2);
|
align_buffer_page_end(dst_pixels_c, kPixels * 2 * 2);
|
||||||
MemRandomize(src_pixels, kPixels * 2 * 2);
|
MemRandomize(src_pixels_u, kPixels * 2);
|
||||||
MemRandomize(tmp_pixels_u_c, kPixels * 2);
|
MemRandomize(src_pixels_v, kPixels * 2);
|
||||||
MemRandomize(tmp_pixels_v_c, kPixels * 2);
|
|
||||||
MemRandomize(tmp_pixels_u_opt, kPixels * 2);
|
|
||||||
MemRandomize(tmp_pixels_v_opt, kPixels * 2);
|
|
||||||
MemRandomize(dst_pixels_opt, kPixels * 2 * 2);
|
MemRandomize(dst_pixels_opt, kPixels * 2 * 2);
|
||||||
MemRandomize(dst_pixels_c, kPixels * 2 * 2);
|
MemRandomize(dst_pixels_c, kPixels * 2 * 2);
|
||||||
|
|
||||||
MaskCpuFlags(disable_cpu_flags_);
|
MaskCpuFlags(disable_cpu_flags_);
|
||||||
SplitUVPlane_16((const uint16_t*)src_pixels, benchmark_width_ * 2,
|
MergeUVPlane_16((const uint16_t*)src_pixels_u, benchmark_width_,
|
||||||
(uint16_t*)tmp_pixels_u_c, benchmark_width_,
|
(const uint16_t*)src_pixels_v, benchmark_width_,
|
||||||
(uint16_t*)tmp_pixels_v_c, benchmark_width_, benchmark_width_,
|
|
||||||
benchmark_height_, 12);
|
|
||||||
MergeUVPlane_16((const uint16_t*)tmp_pixels_u_c, benchmark_width_,
|
|
||||||
(const uint16_t*)tmp_pixels_v_c, benchmark_width_,
|
|
||||||
(uint16_t*)dst_pixels_c, benchmark_width_ * 2,
|
(uint16_t*)dst_pixels_c, benchmark_width_ * 2,
|
||||||
benchmark_width_, benchmark_height_, 12);
|
benchmark_width_, benchmark_height_, 12);
|
||||||
MaskCpuFlags(benchmark_cpu_info_);
|
MaskCpuFlags(benchmark_cpu_info_);
|
||||||
|
|
||||||
SplitUVPlane_16((const uint16_t*)src_pixels, benchmark_width_ * 2,
|
|
||||||
(uint16_t*)tmp_pixels_u_opt, benchmark_width_,
|
|
||||||
(uint16_t*)tmp_pixels_v_opt, benchmark_width_,
|
|
||||||
benchmark_width_, benchmark_height_, 12);
|
|
||||||
|
|
||||||
for (int i = 0; i < benchmark_iterations_; ++i) {
|
for (int i = 0; i < benchmark_iterations_; ++i) {
|
||||||
MergeUVPlane_16((const uint16_t*)tmp_pixels_u_opt, benchmark_width_,
|
MergeUVPlane_16((const uint16_t*)src_pixels_u, benchmark_width_,
|
||||||
(const uint16_t*)tmp_pixels_v_opt, benchmark_width_,
|
(const uint16_t*)src_pixels_v, benchmark_width_,
|
||||||
(uint16_t*)dst_pixels_opt, benchmark_width_ * 2,
|
(uint16_t*)dst_pixels_opt, benchmark_width_ * 2,
|
||||||
benchmark_width_, benchmark_height_, 12);
|
benchmark_width_, benchmark_height_, 12);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i = 0; i < kPixels * 2; ++i) {
|
|
||||||
EXPECT_EQ(tmp_pixels_u_c[i], tmp_pixels_u_opt[i]);
|
|
||||||
EXPECT_EQ(tmp_pixels_v_c[i], tmp_pixels_v_opt[i]);
|
|
||||||
}
|
|
||||||
for (int i = 0; i < kPixels * 2 * 2; ++i) {
|
for (int i = 0; i < kPixels * 2 * 2; ++i) {
|
||||||
EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
|
EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
|
||||||
}
|
}
|
||||||
free_aligned_buffer_page_end(src_pixels);
|
free_aligned_buffer_page_end(src_pixels_u);
|
||||||
free_aligned_buffer_page_end(tmp_pixels_u_c);
|
free_aligned_buffer_page_end(src_pixels_v);
|
||||||
free_aligned_buffer_page_end(tmp_pixels_v_c);
|
|
||||||
free_aligned_buffer_page_end(tmp_pixels_u_opt);
|
|
||||||
free_aligned_buffer_page_end(tmp_pixels_v_opt);
|
|
||||||
free_aligned_buffer_page_end(dst_pixels_opt);
|
free_aligned_buffer_page_end(dst_pixels_opt);
|
||||||
free_aligned_buffer_page_end(dst_pixels_c);
|
free_aligned_buffer_page_end(dst_pixels_c);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(LibYUVPlanarTest, SplitUVPlane_Opt) {
|
TEST_F(LibYUVPlanarTest, SplitUVPlane_Opt) {
|
||||||
// Round count up to multiple of 16
|
const int kPixels = benchmark_width_ * benchmark_height_;
|
||||||
const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
|
|
||||||
align_buffer_page_end(src_pixels, kPixels * 2);
|
align_buffer_page_end(src_pixels, kPixels * 2);
|
||||||
align_buffer_page_end(tmp_pixels_u, kPixels);
|
align_buffer_page_end(dst_pixels_u_c, kPixels);
|
||||||
align_buffer_page_end(tmp_pixels_v, kPixels);
|
align_buffer_page_end(dst_pixels_v_c, kPixels);
|
||||||
align_buffer_page_end(dst_pixels_opt, kPixels * 2);
|
align_buffer_page_end(dst_pixels_u_opt, kPixels);
|
||||||
align_buffer_page_end(dst_pixels_c, kPixels * 2);
|
align_buffer_page_end(dst_pixels_v_opt, kPixels);
|
||||||
|
|
||||||
MemRandomize(src_pixels, kPixels * 2);
|
MemRandomize(src_pixels, kPixels * 2);
|
||||||
MemRandomize(tmp_pixels_u, kPixels);
|
MemRandomize(dst_pixels_u_c, kPixels);
|
||||||
MemRandomize(tmp_pixels_v, kPixels);
|
MemRandomize(dst_pixels_v_c, kPixels);
|
||||||
MemRandomize(dst_pixels_opt, kPixels * 2);
|
MemRandomize(dst_pixels_u_opt, kPixels);
|
||||||
MemRandomize(dst_pixels_c, kPixels * 2);
|
MemRandomize(dst_pixels_v_opt, kPixels);
|
||||||
|
|
||||||
MaskCpuFlags(disable_cpu_flags_);
|
MaskCpuFlags(disable_cpu_flags_);
|
||||||
SplitUVPlane(src_pixels, benchmark_width_ * 2, tmp_pixels_u, benchmark_width_,
|
SplitUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_u_c,
|
||||||
tmp_pixels_v, benchmark_width_, benchmark_width_,
|
benchmark_width_, dst_pixels_v_c, benchmark_width_,
|
||||||
benchmark_height_);
|
benchmark_width_, benchmark_height_);
|
||||||
MergeUVPlane(tmp_pixels_u, benchmark_width_, tmp_pixels_v, benchmark_width_,
|
|
||||||
dst_pixels_c, benchmark_width_ * 2, benchmark_width_,
|
|
||||||
benchmark_height_);
|
|
||||||
MaskCpuFlags(benchmark_cpu_info_);
|
MaskCpuFlags(benchmark_cpu_info_);
|
||||||
|
|
||||||
for (int i = 0; i < benchmark_iterations_; ++i) {
|
for (int i = 0; i < benchmark_iterations_; ++i) {
|
||||||
SplitUVPlane(src_pixels, benchmark_width_ * 2, tmp_pixels_u,
|
SplitUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_u_opt,
|
||||||
benchmark_width_, tmp_pixels_v, benchmark_width_,
|
benchmark_width_, dst_pixels_v_opt, benchmark_width_,
|
||||||
benchmark_width_, benchmark_height_);
|
benchmark_width_, benchmark_height_);
|
||||||
}
|
}
|
||||||
MergeUVPlane(tmp_pixels_u, benchmark_width_, tmp_pixels_v, benchmark_width_,
|
|
||||||
dst_pixels_opt, benchmark_width_ * 2, benchmark_width_,
|
|
||||||
benchmark_height_);
|
|
||||||
|
|
||||||
for (int i = 0; i < kPixels * 2; ++i) {
|
for (int i = 0; i < kPixels; ++i) {
|
||||||
EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
|
EXPECT_EQ(dst_pixels_u_c[i], dst_pixels_u_opt[i]);
|
||||||
|
EXPECT_EQ(dst_pixels_v_c[i], dst_pixels_v_opt[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
free_aligned_buffer_page_end(src_pixels);
|
free_aligned_buffer_page_end(src_pixels);
|
||||||
free_aligned_buffer_page_end(tmp_pixels_u);
|
free_aligned_buffer_page_end(dst_pixels_u_c);
|
||||||
free_aligned_buffer_page_end(tmp_pixels_v);
|
free_aligned_buffer_page_end(dst_pixels_v_c);
|
||||||
free_aligned_buffer_page_end(dst_pixels_opt);
|
free_aligned_buffer_page_end(dst_pixels_u_opt);
|
||||||
free_aligned_buffer_page_end(dst_pixels_c);
|
free_aligned_buffer_page_end(dst_pixels_v_opt);
|
||||||
}
|
}
|
||||||
|
|
||||||
// 16 bit channel split
|
// 16 bit channel split
|
||||||
TEST_F(LibYUVPlanarTest, SplitUVPlane_16_Opt) {
|
TEST_F(LibYUVPlanarTest, SplitUVPlane_16_Opt) {
|
||||||
// Round count up to multiple of 16
|
const int kPixels = benchmark_width_ * benchmark_height_;
|
||||||
const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
|
|
||||||
align_buffer_page_end(src_pixels, kPixels * 2 * 2);
|
align_buffer_page_end(src_pixels, kPixels * 2 * 2);
|
||||||
align_buffer_page_end(dst_pixels_u_c, kPixels * 2);
|
align_buffer_page_end(dst_pixels_u_c, kPixels * 2);
|
||||||
align_buffer_page_end(dst_pixels_v_c, kPixels * 2);
|
align_buffer_page_end(dst_pixels_v_c, kPixels * 2);
|
||||||
@ -2755,7 +2712,7 @@ TEST_F(LibYUVPlanarTest, SplitUVPlane_16_Opt) {
|
|||||||
|
|
||||||
TEST_F(LibYUVPlanarTest, SwapUVPlane_Opt) {
|
TEST_F(LibYUVPlanarTest, SwapUVPlane_Opt) {
|
||||||
// Round count up to multiple of 16
|
// Round count up to multiple of 16
|
||||||
const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
|
const int kPixels = benchmark_width_ * benchmark_height_;
|
||||||
align_buffer_page_end(src_pixels, kPixels * 2);
|
align_buffer_page_end(src_pixels, kPixels * 2);
|
||||||
align_buffer_page_end(dst_pixels_opt, kPixels * 2);
|
align_buffer_page_end(dst_pixels_opt, kPixels * 2);
|
||||||
align_buffer_page_end(dst_pixels_c, kPixels * 2);
|
align_buffer_page_end(dst_pixels_c, kPixels * 2);
|
||||||
@ -2785,7 +2742,7 @@ TEST_F(LibYUVPlanarTest, SwapUVPlane_Opt) {
|
|||||||
|
|
||||||
TEST_F(LibYUVPlanarTest, MergeRGBPlane_Opt) {
|
TEST_F(LibYUVPlanarTest, MergeRGBPlane_Opt) {
|
||||||
// Round count up to multiple of 16
|
// Round count up to multiple of 16
|
||||||
const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
|
const int kPixels = benchmark_width_ * benchmark_height_;
|
||||||
align_buffer_page_end(src_pixels, kPixels * 3);
|
align_buffer_page_end(src_pixels, kPixels * 3);
|
||||||
align_buffer_page_end(tmp_pixels_r, kPixels);
|
align_buffer_page_end(tmp_pixels_r, kPixels);
|
||||||
align_buffer_page_end(tmp_pixels_g, kPixels);
|
align_buffer_page_end(tmp_pixels_g, kPixels);
|
||||||
@ -2834,7 +2791,7 @@ TEST_F(LibYUVPlanarTest, MergeRGBPlane_Opt) {
|
|||||||
|
|
||||||
TEST_F(LibYUVPlanarTest, SplitRGBPlane_Opt) {
|
TEST_F(LibYUVPlanarTest, SplitRGBPlane_Opt) {
|
||||||
// Round count up to multiple of 16
|
// Round count up to multiple of 16
|
||||||
const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
|
const int kPixels = benchmark_width_ * benchmark_height_;
|
||||||
align_buffer_page_end(src_pixels, kPixels * 3);
|
align_buffer_page_end(src_pixels, kPixels * 3);
|
||||||
align_buffer_page_end(tmp_pixels_r, kPixels);
|
align_buffer_page_end(tmp_pixels_r, kPixels);
|
||||||
align_buffer_page_end(tmp_pixels_g, kPixels);
|
align_buffer_page_end(tmp_pixels_g, kPixels);
|
||||||
@ -2881,8 +2838,7 @@ TEST_F(LibYUVPlanarTest, SplitRGBPlane_Opt) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(LibYUVPlanarTest, MergeARGBPlane_Opt) {
|
TEST_F(LibYUVPlanarTest, MergeARGBPlane_Opt) {
|
||||||
// Round count up to multiple of 16
|
const int kPixels = benchmark_width_ * benchmark_height_;
|
||||||
const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
|
|
||||||
align_buffer_page_end(src_pixels, kPixels * 4);
|
align_buffer_page_end(src_pixels, kPixels * 4);
|
||||||
align_buffer_page_end(tmp_pixels_r, kPixels);
|
align_buffer_page_end(tmp_pixels_r, kPixels);
|
||||||
align_buffer_page_end(tmp_pixels_g, kPixels);
|
align_buffer_page_end(tmp_pixels_g, kPixels);
|
||||||
@ -2936,8 +2892,7 @@ TEST_F(LibYUVPlanarTest, MergeARGBPlane_Opt) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(LibYUVPlanarTest, SplitARGBPlane_Opt) {
|
TEST_F(LibYUVPlanarTest, SplitARGBPlane_Opt) {
|
||||||
// Round count up to multiple of 16
|
const int kPixels = benchmark_width_ * benchmark_height_;
|
||||||
const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
|
|
||||||
align_buffer_page_end(src_pixels, kPixels * 4);
|
align_buffer_page_end(src_pixels, kPixels * 4);
|
||||||
align_buffer_page_end(tmp_pixels_r, kPixels);
|
align_buffer_page_end(tmp_pixels_r, kPixels);
|
||||||
align_buffer_page_end(tmp_pixels_g, kPixels);
|
align_buffer_page_end(tmp_pixels_g, kPixels);
|
||||||
@ -2991,8 +2946,7 @@ TEST_F(LibYUVPlanarTest, SplitARGBPlane_Opt) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(LibYUVPlanarTest, MergeXRGBPlane_Opt) {
|
TEST_F(LibYUVPlanarTest, MergeXRGBPlane_Opt) {
|
||||||
// Round count up to multiple of 16
|
const int kPixels = benchmark_width_ * benchmark_height_;
|
||||||
const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
|
|
||||||
align_buffer_page_end(src_pixels, kPixels * 4);
|
align_buffer_page_end(src_pixels, kPixels * 4);
|
||||||
align_buffer_page_end(tmp_pixels_r, kPixels);
|
align_buffer_page_end(tmp_pixels_r, kPixels);
|
||||||
align_buffer_page_end(tmp_pixels_g, kPixels);
|
align_buffer_page_end(tmp_pixels_g, kPixels);
|
||||||
@ -3042,8 +2996,7 @@ TEST_F(LibYUVPlanarTest, MergeXRGBPlane_Opt) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(LibYUVPlanarTest, SplitXRGBPlane_Opt) {
|
TEST_F(LibYUVPlanarTest, SplitXRGBPlane_Opt) {
|
||||||
// Round count up to multiple of 16
|
const int kPixels = benchmark_width_ * benchmark_height_;
|
||||||
const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
|
|
||||||
align_buffer_page_end(src_pixels, kPixels * 4);
|
align_buffer_page_end(src_pixels, kPixels * 4);
|
||||||
align_buffer_page_end(tmp_pixels_r, kPixels);
|
align_buffer_page_end(tmp_pixels_r, kPixels);
|
||||||
align_buffer_page_end(tmp_pixels_g, kPixels);
|
align_buffer_page_end(tmp_pixels_g, kPixels);
|
||||||
@ -3091,30 +3044,29 @@ TEST_F(LibYUVPlanarTest, SplitXRGBPlane_Opt) {
|
|||||||
free_aligned_buffer_page_end(dst_pixels_c);
|
free_aligned_buffer_page_end(dst_pixels_c);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Merge 4 channels
|
||||||
#define TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, W1280, N, NEG, OFF) \
|
#define TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, W1280, N, NEG, OFF) \
|
||||||
TEST_F(LibYUVPlanarTest, FUNC##Plane_##DEPTH##N) { \
|
TEST_F(LibYUVPlanarTest, FUNC##Plane_##DEPTH##N) { \
|
||||||
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
|
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
|
||||||
const int kPixels = (kWidth * benchmark_height_ + 15) & ~15; \
|
const int kPixels = kWidth * benchmark_height_; \
|
||||||
align_buffer_page_end(src_memory_r, kPixels * sizeof(STYPE) + OFF); \
|
align_buffer_page_end(src_memory_r, kPixels * sizeof(STYPE) + OFF); \
|
||||||
align_buffer_page_end(src_memory_g, kPixels * sizeof(STYPE) + OFF); \
|
align_buffer_page_end(src_memory_g, kPixels * sizeof(STYPE) + OFF); \
|
||||||
align_buffer_page_end(src_memory_b, kPixels * sizeof(STYPE) + OFF); \
|
align_buffer_page_end(src_memory_b, kPixels * sizeof(STYPE) + OFF); \
|
||||||
align_buffer_page_end(src_memory_a, kPixels * sizeof(STYPE) + OFF); \
|
align_buffer_page_end(src_memory_a, kPixels * sizeof(STYPE) + OFF); \
|
||||||
align_buffer_page_end(dst_memory_c, kPixels * 4 * sizeof(DTYPE)); \
|
align_buffer_page_end(dst_memory_c, kPixels * 4 * sizeof(DTYPE)); \
|
||||||
align_buffer_page_end(dst_memory_opt, kPixels * 4 * sizeof(DTYPE)); \
|
align_buffer_page_end(dst_memory_opt, kPixels * 4 * sizeof(DTYPE)); \
|
||||||
|
MemRandomize(src_memory_r, kPixels * sizeof(STYPE) + OFF); \
|
||||||
|
MemRandomize(src_memory_g, kPixels * sizeof(STYPE) + OFF); \
|
||||||
|
MemRandomize(src_memory_b, kPixels * sizeof(STYPE) + OFF); \
|
||||||
|
MemRandomize(src_memory_a, kPixels * sizeof(STYPE) + OFF); \
|
||||||
|
memset(dst_memory_c, 0, kPixels * 4 * sizeof(DTYPE)); \
|
||||||
|
memset(dst_memory_opt, 0, kPixels * 4 * sizeof(DTYPE)); \
|
||||||
STYPE* src_pixels_r = reinterpret_cast<STYPE*>(src_memory_r + OFF); \
|
STYPE* src_pixels_r = reinterpret_cast<STYPE*>(src_memory_r + OFF); \
|
||||||
STYPE* src_pixels_g = reinterpret_cast<STYPE*>(src_memory_g + OFF); \
|
STYPE* src_pixels_g = reinterpret_cast<STYPE*>(src_memory_g + OFF); \
|
||||||
STYPE* src_pixels_b = reinterpret_cast<STYPE*>(src_memory_b + OFF); \
|
STYPE* src_pixels_b = reinterpret_cast<STYPE*>(src_memory_b + OFF); \
|
||||||
STYPE* src_pixels_a = reinterpret_cast<STYPE*>(src_memory_a + OFF); \
|
STYPE* src_pixels_a = reinterpret_cast<STYPE*>(src_memory_a + OFF); \
|
||||||
DTYPE* dst_pixels_c = reinterpret_cast<DTYPE*>(dst_memory_c); \
|
DTYPE* dst_pixels_c = reinterpret_cast<DTYPE*>(dst_memory_c); \
|
||||||
DTYPE* dst_pixels_opt = reinterpret_cast<DTYPE*>(dst_memory_opt); \
|
DTYPE* dst_pixels_opt = reinterpret_cast<DTYPE*>(dst_memory_opt); \
|
||||||
for (int i = 0; i < kPixels; ++i) { \
|
|
||||||
src_pixels_r[i] = fastrand() & 65535; \
|
|
||||||
src_pixels_g[i] = fastrand() & 65535; \
|
|
||||||
src_pixels_b[i] = fastrand() & 65535; \
|
|
||||||
src_pixels_a[i] = fastrand() & 65535; \
|
|
||||||
} \
|
|
||||||
memset(dst_pixels_c, 1, kPixels * 4 * sizeof(DTYPE)); \
|
|
||||||
memset(dst_pixels_opt, 2, kPixels * 4 * sizeof(DTYPE)); \
|
|
||||||
MaskCpuFlags(disable_cpu_flags_); \
|
MaskCpuFlags(disable_cpu_flags_); \
|
||||||
FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b, \
|
FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b, \
|
||||||
kWidth, src_pixels_a, kWidth, dst_pixels_c, kWidth * 4, \
|
kWidth, src_pixels_a, kWidth, dst_pixels_c, kWidth * 4, \
|
||||||
@ -3136,27 +3088,26 @@ TEST_F(LibYUVPlanarTest, SplitXRGBPlane_Opt) {
|
|||||||
free_aligned_buffer_page_end(dst_memory_opt); \
|
free_aligned_buffer_page_end(dst_memory_opt); \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Merge 3 channel RGB into 4 channel XRGB with opaque alpha
|
||||||
#define TESTQPLANAROTOPI(FUNC, STYPE, DTYPE, DEPTH, W1280, N, NEG, OFF) \
|
#define TESTQPLANAROTOPI(FUNC, STYPE, DTYPE, DEPTH, W1280, N, NEG, OFF) \
|
||||||
TEST_F(LibYUVPlanarTest, FUNC##Plane_Opaque_##DEPTH##N) { \
|
TEST_F(LibYUVPlanarTest, FUNC##Plane_Opaque_##DEPTH##N) { \
|
||||||
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
|
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
|
||||||
const int kPixels = (kWidth * benchmark_height_ + 15) & ~15; \
|
const int kPixels = kWidth * benchmark_height_; \
|
||||||
align_buffer_page_end(src_memory_r, kPixels * sizeof(STYPE) + OFF); \
|
align_buffer_page_end(src_memory_r, kPixels * sizeof(STYPE) + OFF); \
|
||||||
align_buffer_page_end(src_memory_g, kPixels * sizeof(STYPE) + OFF); \
|
align_buffer_page_end(src_memory_g, kPixels * sizeof(STYPE) + OFF); \
|
||||||
align_buffer_page_end(src_memory_b, kPixels * sizeof(STYPE) + OFF); \
|
align_buffer_page_end(src_memory_b, kPixels * sizeof(STYPE) + OFF); \
|
||||||
align_buffer_page_end(dst_memory_c, kPixels * 4 * sizeof(DTYPE)); \
|
align_buffer_page_end(dst_memory_c, kPixels * 4 * sizeof(DTYPE)); \
|
||||||
align_buffer_page_end(dst_memory_opt, kPixels * 4 * sizeof(DTYPE)); \
|
align_buffer_page_end(dst_memory_opt, kPixels * 4 * sizeof(DTYPE)); \
|
||||||
|
MemRandomize(src_memory_r, kPixels * sizeof(STYPE) + OFF); \
|
||||||
|
MemRandomize(src_memory_g, kPixels * sizeof(STYPE) + OFF); \
|
||||||
|
MemRandomize(src_memory_b, kPixels * sizeof(STYPE) + OFF); \
|
||||||
|
memset(dst_memory_c, 0, kPixels * 4 * sizeof(DTYPE)); \
|
||||||
|
memset(dst_memory_opt, 0, kPixels * 4 * sizeof(DTYPE)); \
|
||||||
STYPE* src_pixels_r = reinterpret_cast<STYPE*>(src_memory_r + OFF); \
|
STYPE* src_pixels_r = reinterpret_cast<STYPE*>(src_memory_r + OFF); \
|
||||||
STYPE* src_pixels_g = reinterpret_cast<STYPE*>(src_memory_g + OFF); \
|
STYPE* src_pixels_g = reinterpret_cast<STYPE*>(src_memory_g + OFF); \
|
||||||
STYPE* src_pixels_b = reinterpret_cast<STYPE*>(src_memory_b + OFF); \
|
STYPE* src_pixels_b = reinterpret_cast<STYPE*>(src_memory_b + OFF); \
|
||||||
DTYPE* dst_pixels_c = reinterpret_cast<DTYPE*>(dst_memory_c); \
|
DTYPE* dst_pixels_c = reinterpret_cast<DTYPE*>(dst_memory_c); \
|
||||||
DTYPE* dst_pixels_opt = reinterpret_cast<DTYPE*>(dst_memory_opt); \
|
DTYPE* dst_pixels_opt = reinterpret_cast<DTYPE*>(dst_memory_opt); \
|
||||||
for (int i = 0; i < kPixels; ++i) { \
|
|
||||||
src_pixels_r[i] = fastrand() & 65535; \
|
|
||||||
src_pixels_g[i] = fastrand() & 65535; \
|
|
||||||
src_pixels_b[i] = fastrand() & 65535; \
|
|
||||||
} \
|
|
||||||
memset(dst_pixels_c, 1, kPixels * 4 * sizeof(DTYPE)); \
|
|
||||||
memset(dst_pixels_opt, 2, kPixels * 4 * sizeof(DTYPE)); \
|
|
||||||
MaskCpuFlags(disable_cpu_flags_); \
|
MaskCpuFlags(disable_cpu_flags_); \
|
||||||
FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b, \
|
FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b, \
|
||||||
kWidth, NULL, 0, dst_pixels_c, kWidth * 4, kWidth, \
|
kWidth, NULL, 0, dst_pixels_c, kWidth * 4, kWidth, \
|
||||||
@ -3177,6 +3128,7 @@ TEST_F(LibYUVPlanarTest, SplitXRGBPlane_Opt) {
|
|||||||
free_aligned_buffer_page_end(dst_memory_opt); \
|
free_aligned_buffer_page_end(dst_memory_opt); \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO(fbarchard): fix bug and change to benchmark_width - 1
|
||||||
#define TESTQPLANARTOP(FUNC, STYPE, DTYPE, DEPTH) \
|
#define TESTQPLANARTOP(FUNC, STYPE, DTYPE, DEPTH) \
|
||||||
TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_ - 4, _Any, +, 0) \
|
TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_ - 4, _Any, +, 0) \
|
||||||
TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Unaligned, +, \
|
TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Unaligned, +, \
|
||||||
@ -3206,16 +3158,14 @@ TESTQPLANARTOP(MergeARGB16To8, uint16_t, uint8_t, 16)
|
|||||||
align_buffer_page_end(src_memory_b, kPixels * sizeof(STYPE) + OFF); \
|
align_buffer_page_end(src_memory_b, kPixels * sizeof(STYPE) + OFF); \
|
||||||
align_buffer_page_end(dst_memory_c, kPixels * 4 * sizeof(DTYPE)); \
|
align_buffer_page_end(dst_memory_c, kPixels * 4 * sizeof(DTYPE)); \
|
||||||
align_buffer_page_end(dst_memory_opt, kPixels * 4 * sizeof(DTYPE)); \
|
align_buffer_page_end(dst_memory_opt, kPixels * 4 * sizeof(DTYPE)); \
|
||||||
|
MemRandomize(src_memory_r, kPixels * sizeof(STYPE) + OFF); \
|
||||||
|
MemRandomize(src_memory_g, kPixels * sizeof(STYPE) + OFF); \
|
||||||
|
MemRandomize(src_memory_b, kPixels * sizeof(STYPE) + OFF); \
|
||||||
STYPE* src_pixels_r = reinterpret_cast<STYPE*>(src_memory_r + OFF); \
|
STYPE* src_pixels_r = reinterpret_cast<STYPE*>(src_memory_r + OFF); \
|
||||||
STYPE* src_pixels_g = reinterpret_cast<STYPE*>(src_memory_g + OFF); \
|
STYPE* src_pixels_g = reinterpret_cast<STYPE*>(src_memory_g + OFF); \
|
||||||
STYPE* src_pixels_b = reinterpret_cast<STYPE*>(src_memory_b + OFF); \
|
STYPE* src_pixels_b = reinterpret_cast<STYPE*>(src_memory_b + OFF); \
|
||||||
DTYPE* dst_pixels_c = reinterpret_cast<DTYPE*>(dst_memory_c); \
|
DTYPE* dst_pixels_c = reinterpret_cast<DTYPE*>(dst_memory_c); \
|
||||||
DTYPE* dst_pixels_opt = reinterpret_cast<DTYPE*>(dst_memory_opt); \
|
DTYPE* dst_pixels_opt = reinterpret_cast<DTYPE*>(dst_memory_opt); \
|
||||||
for (int i = 0; i < kPixels; ++i) { \
|
|
||||||
src_pixels_r[i] = fastrand() & 65535; \
|
|
||||||
src_pixels_g[i] = fastrand() & 65535; \
|
|
||||||
src_pixels_b[i] = fastrand() & 65535; \
|
|
||||||
} \
|
|
||||||
memset(dst_pixels_c, 1, kPixels * 4 * sizeof(DTYPE)); \
|
memset(dst_pixels_c, 1, kPixels * 4 * sizeof(DTYPE)); \
|
||||||
memset(dst_pixels_opt, 2, kPixels * 4 * sizeof(DTYPE)); \
|
memset(dst_pixels_opt, 2, kPixels * 4 * sizeof(DTYPE)); \
|
||||||
MaskCpuFlags(disable_cpu_flags_); \
|
MaskCpuFlags(disable_cpu_flags_); \
|
||||||
@ -3238,13 +3188,13 @@ TESTQPLANARTOP(MergeARGB16To8, uint16_t, uint8_t, 16)
|
|||||||
free_aligned_buffer_page_end(dst_memory_opt); \
|
free_aligned_buffer_page_end(dst_memory_opt); \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO(fbarchard): Fix MergeXR30 and change _any to width - 1
|
||||||
#define TESTTPLANARTOP(FUNC, STYPE, DTYPE, DEPTH) \
|
#define TESTTPLANARTOP(FUNC, STYPE, DTYPE, DEPTH) \
|
||||||
TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_ - 4, _Any, +, 0) \
|
TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_ - 4, _Any, +, 0) \
|
||||||
TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Unaligned, +, \
|
TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Unaligned, +, \
|
||||||
1) \
|
1) \
|
||||||
TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Invert, -, 0) \
|
TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Invert, -, 0) \
|
||||||
TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Opt, +, 0)
|
TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Opt, +, 0)
|
||||||
|
|
||||||
TESTTPLANARTOP(MergeXR30, uint16_t, uint8_t, 10)
|
TESTTPLANARTOP(MergeXR30, uint16_t, uint8_t, 10)
|
||||||
TESTTPLANARTOP(MergeXR30, uint16_t, uint8_t, 12)
|
TESTTPLANARTOP(MergeXR30, uint16_t, uint8_t, 12)
|
||||||
TESTTPLANARTOP(MergeXR30, uint16_t, uint8_t, 16)
|
TESTTPLANARTOP(MergeXR30, uint16_t, uint8_t, 16)
|
||||||
@ -3254,6 +3204,7 @@ TESTTPLANARTOP(MergeXR30, uint16_t, uint8_t, 16)
|
|||||||
TEST_F(LibYUVPlanarTest, MergeUVRow_16_Opt) {
|
TEST_F(LibYUVPlanarTest, MergeUVRow_16_Opt) {
|
||||||
// Round count up to multiple of 16
|
// Round count up to multiple of 16
|
||||||
const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
|
const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
|
||||||
|
|
||||||
align_buffer_page_end(src_pixels_u, kPixels * 2);
|
align_buffer_page_end(src_pixels_u, kPixels * 2);
|
||||||
align_buffer_page_end(src_pixels_v, kPixels * 2);
|
align_buffer_page_end(src_pixels_v, kPixels * 2);
|
||||||
align_buffer_page_end(dst_pixels_uv_opt, kPixels * 2 * 2);
|
align_buffer_page_end(dst_pixels_uv_opt, kPixels * 2 * 2);
|
||||||
@ -3299,6 +3250,7 @@ TEST_F(LibYUVPlanarTest, MergeUVRow_16_Opt) {
|
|||||||
TEST_F(LibYUVPlanarTest, MultiplyRow_16_Opt) {
|
TEST_F(LibYUVPlanarTest, MultiplyRow_16_Opt) {
|
||||||
// Round count up to multiple of 16
|
// Round count up to multiple of 16
|
||||||
const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
|
const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
|
||||||
|
|
||||||
align_buffer_page_end(src_pixels_y, kPixels * 2);
|
align_buffer_page_end(src_pixels_y, kPixels * 2);
|
||||||
align_buffer_page_end(dst_pixels_y_opt, kPixels * 2);
|
align_buffer_page_end(dst_pixels_y_opt, kPixels * 2);
|
||||||
align_buffer_page_end(dst_pixels_y_c, kPixels * 2);
|
align_buffer_page_end(dst_pixels_y_c, kPixels * 2);
|
||||||
@ -3334,8 +3286,7 @@ TEST_F(LibYUVPlanarTest, MultiplyRow_16_Opt) {
|
|||||||
#endif // HAS_MULTIPLYROW_16_AVX2
|
#endif // HAS_MULTIPLYROW_16_AVX2
|
||||||
|
|
||||||
TEST_F(LibYUVPlanarTest, Convert16To8Plane) {
|
TEST_F(LibYUVPlanarTest, Convert16To8Plane) {
|
||||||
// Round count up to multiple of 16
|
const int kPixels = benchmark_width_ * benchmark_height_;
|
||||||
const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
|
|
||||||
align_buffer_page_end(src_pixels_y, kPixels * 2);
|
align_buffer_page_end(src_pixels_y, kPixels * 2);
|
||||||
align_buffer_page_end(dst_pixels_y_opt, kPixels);
|
align_buffer_page_end(dst_pixels_y_opt, kPixels);
|
||||||
align_buffer_page_end(dst_pixels_y_c, kPixels);
|
align_buffer_page_end(dst_pixels_y_c, kPixels);
|
||||||
@ -3414,8 +3365,7 @@ TEST_F(LibYUVPlanarTest, Convert16To8Row_Opt) {
|
|||||||
#endif // ENABLE_ROW_TESTS
|
#endif // ENABLE_ROW_TESTS
|
||||||
|
|
||||||
TEST_F(LibYUVPlanarTest, Convert8To16Plane) {
|
TEST_F(LibYUVPlanarTest, Convert8To16Plane) {
|
||||||
// Round count up to multiple of 16
|
const int kPixels = benchmark_width_ * benchmark_height_;
|
||||||
const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
|
|
||||||
align_buffer_page_end(src_pixels_y, kPixels);
|
align_buffer_page_end(src_pixels_y, kPixels);
|
||||||
align_buffer_page_end(dst_pixels_y_opt, kPixels * 2);
|
align_buffer_page_end(dst_pixels_y_opt, kPixels * 2);
|
||||||
align_buffer_page_end(dst_pixels_y_c, kPixels * 2);
|
align_buffer_page_end(dst_pixels_y_c, kPixels * 2);
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user