libyuv:loongson optimize compare/row/scale/rotate files with mmi.

Currently, libyuv supports MIPS SIMD Arch(MSA),
but libyuv does not supports MultiMedia Instruction(MMI)(such as loongson3a platform).

In order to improve performance of libyuv on loongson3a platform,
this provides optimize 98 functions with mmi.

BUG=libyuv:804

Change-Id: I8947626009efad769b3103a867363ece25d79629
Reviewed-on: https://chromium-review.googlesource.com/1122064
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
Reviewed-by: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
lixia zhang 2018-07-04 18:40:48 +08:00 committed by Commit Bot
parent 55f5d91f11
commit 21be9122aa
33 changed files with 9572 additions and 31 deletions

View File

@ -69,6 +69,10 @@ group("libyuv") {
deps += [ ":libyuv_msa" ] deps += [ ":libyuv_msa" ]
} }
if (libyuv_use_mmi) {
deps += [ ":libyuv_mmi" ]
}
if (!is_ios) { if (!is_ios) {
# Make sure that clients of libyuv link with libjpeg. This can't go in # Make sure that clients of libyuv link with libjpeg. This can't go in
# libyuv_internal because in Windows x64 builds that will generate a clang # libyuv_internal because in Windows x64 builds that will generate a clang
@ -229,6 +233,24 @@ if (libyuv_use_msa) {
} }
} }
if (libyuv_use_mmi) {
static_library("libyuv_mmi") {
sources = [
# MMI Source Files
"source/compare_mmi.cc",
"source/rotate_mmi.cc",
"source/row_mmi.cc",
"source/scale_mmi.cc",
]
deps = [
":libyuv_internal",
]
public_configs = [ ":libyuv_config" ]
}
}
if (libyuv_include_tests) { if (libyuv_include_tests) {
config("libyuv_unittest_warnings_config") { config("libyuv_unittest_warnings_config") {
if (!is_win) { if (!is_win) {

View File

@ -178,6 +178,15 @@ Running test with C code:
ninja -v -C out/Debug libyuv_unittest ninja -v -C out/Debug libyuv_unittest
ninja -v -C out/Release libyuv_unittest ninja -v -C out/Release libyuv_unittest
### MIPS Linux
mips
gn gen out/Release "--args=is_debug=false target_os=\"linux\" target_cpu=\"mips64el\" mips_arch_variant=\"loongson3\" mips_use_mmi=true is_component_build=false is_clang=false use_sysroot=false use_gold=false"
gn gen out/Debug "--args=is_debug=true target_os=\"linux\" target_cpu=\"mips64el\" mips_arch_variant=\"loongson3\" mips_use_mmi=true is_component_build=false is_clang=false use_sysroot=false use_gold=false"
ninja -v -C out/Debug libyuv_unittest
ninja -v -C out/Release libyuv_unittest
## Building the Library with make ## Building the Library with make
### Linux ### Linux

View File

@ -84,6 +84,11 @@ extern "C" {
#define HAS_SUMSQUAREERROR_MSA #define HAS_SUMSQUAREERROR_MSA
#endif #endif
#if !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
#define HAS_HAMMINGDISTANCE_MMI
#define HAS_SUMSQUAREERROR_MMI
#endif
uint32_t HammingDistance_C(const uint8_t* src_a, uint32_t HammingDistance_C(const uint8_t* src_a,
const uint8_t* src_b, const uint8_t* src_b,
int count); int count);
@ -102,7 +107,9 @@ uint32_t HammingDistance_NEON(const uint8_t* src_a,
uint32_t HammingDistance_MSA(const uint8_t* src_a, uint32_t HammingDistance_MSA(const uint8_t* src_a,
const uint8_t* src_b, const uint8_t* src_b,
int count); int count);
uint32_t HammingDistance_MMI(const uint8_t* src_a,
const uint8_t* src_b,
int count);
uint32_t SumSquareError_C(const uint8_t* src_a, uint32_t SumSquareError_C(const uint8_t* src_a,
const uint8_t* src_b, const uint8_t* src_b,
int count); int count);
@ -118,6 +125,9 @@ uint32_t SumSquareError_NEON(const uint8_t* src_a,
uint32_t SumSquareError_MSA(const uint8_t* src_a, uint32_t SumSquareError_MSA(const uint8_t* src_a,
const uint8_t* src_b, const uint8_t* src_b,
int count); int count);
uint32_t SumSquareError_MMI(const uint8_t* src_a,
const uint8_t* src_b,
int count);
uint32_t HashDjb2_C(const uint8_t* src, int count, uint32_t seed); uint32_t HashDjb2_C(const uint8_t* src, int count, uint32_t seed);
uint32_t HashDjb2_SSE41(const uint8_t* src, int count, uint32_t seed); uint32_t HashDjb2_SSE41(const uint8_t* src, int count, uint32_t seed);

View File

@ -48,6 +48,7 @@ static const int kCpuHasAVX512VPOPCNTDQ = 0x100000;
// These flags are only valid on MIPS processors. // These flags are only valid on MIPS processors.
static const int kCpuHasMIPS = 0x200000; static const int kCpuHasMIPS = 0x200000;
static const int kCpuHasMSA = 0x400000; static const int kCpuHasMSA = 0x400000;
static const int kCpuHasMMI = 0x800000;
// Optional init function. TestCpuFlag does an auto-init. // Optional init function. TestCpuFlag does an auto-init.
// Returns cpu_info flags. // Returns cpu_info flags.

29
include/libyuv/rotate_row.h Normal file → Executable file
View File

@ -60,6 +60,11 @@ extern "C" {
#define HAS_TRANSPOSEUVWX16_MSA #define HAS_TRANSPOSEUVWX16_MSA
#endif #endif
#if !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
#define HAS_TRANSPOSEWX8_MMI
#define HAS_TRANSPOSEUVWX8_MMI
#endif
void TransposeWxH_C(const uint8_t* src, void TransposeWxH_C(const uint8_t* src,
int src_stride, int src_stride,
uint8_t* dst, uint8_t* dst,
@ -87,6 +92,11 @@ void TransposeWx8_SSSE3(const uint8_t* src,
uint8_t* dst, uint8_t* dst,
int dst_stride, int dst_stride,
int width); int width);
void TransposeWx8_MMI(const uint8_t* src,
int src_stride,
uint8_t* dst,
int dst_stride,
int width);
void TransposeWx8_Fast_SSSE3(const uint8_t* src, void TransposeWx8_Fast_SSSE3(const uint8_t* src,
int src_stride, int src_stride,
uint8_t* dst, uint8_t* dst,
@ -108,6 +118,11 @@ void TransposeWx8_Any_SSSE3(const uint8_t* src,
uint8_t* dst, uint8_t* dst,
int dst_stride, int dst_stride,
int width); int width);
void TransposeWx8_Any_MMI(const uint8_t* src,
int src_stride,
uint8_t* dst,
int dst_stride,
int width);
void TransposeWx8_Fast_Any_SSSE3(const uint8_t* src, void TransposeWx8_Fast_Any_SSSE3(const uint8_t* src,
int src_stride, int src_stride,
uint8_t* dst, uint8_t* dst,
@ -156,6 +171,13 @@ void TransposeUVWx8_NEON(const uint8_t* src,
uint8_t* dst_b, uint8_t* dst_b,
int dst_stride_b, int dst_stride_b,
int width); int width);
void TransposeUVWx8_MMI(const uint8_t* src,
int src_stride,
uint8_t* dst_a,
int dst_stride_a,
uint8_t* dst_b,
int dst_stride_b,
int width);
void TransposeUVWx16_MSA(const uint8_t* src, void TransposeUVWx16_MSA(const uint8_t* src,
int src_stride, int src_stride,
uint8_t* dst_a, uint8_t* dst_a,
@ -178,6 +200,13 @@ void TransposeUVWx8_Any_NEON(const uint8_t* src,
uint8_t* dst_b, uint8_t* dst_b,
int dst_stride_b, int dst_stride_b,
int width); int width);
void TransposeUVWx8_Any_MMI(const uint8_t* src,
int src_stride,
uint8_t* dst_a,
int dst_stride_a,
uint8_t* dst_b,
int dst_stride_b,
int width);
void TransposeUVWx16_Any_MSA(const uint8_t* src, void TransposeUVWx16_Any_MSA(const uint8_t* src,
int src_stride, int src_stride,
uint8_t* dst_a, uint8_t* dst_a,

500
include/libyuv/row.h Normal file → Executable file
View File

@ -483,6 +483,81 @@ extern "C" {
#define HAS_YUY2TOYROW_MSA #define HAS_YUY2TOYROW_MSA
#endif #endif
#if !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
#define HAS_ABGRTOUVROW_MMI
#define HAS_ABGRTOYROW_MMI
#define HAS_ARGB1555TOARGBROW_MMI
#define HAS_ARGB1555TOUVROW_MMI
#define HAS_ARGB1555TOYROW_MMI
#define HAS_ARGB4444TOARGBROW_MMI
#define HAS_ARGB4444TOUVROW_MMI
#define HAS_ARGB4444TOYROW_MMI
#define HAS_ARGBADDROW_MMI
#define HAS_ARGBATTENUATEROW_MMI
#define HAS_ARGBBLENDROW_MMI
#define HAS_ARGBCOLORMATRIXROW_MMI
#define HAS_ARGBCOPYALPHAROW_MMI
#define HAS_ARGBCOPYYTOALPHAROW_MMI
#define HAS_ARGBEXTRACTALPHAROW_MMI
#define HAS_ARGBGRAYROW_MMI
#define HAS_ARGBMIRRORROW_MMI
#define HAS_ARGBMULTIPLYROW_MMI
#define HAS_ARGBSEPIAROW_MMI
#define HAS_ARGBSHADEROW_MMI
#define HAS_ARGBSHUFFLEROW_MMI
#define HAS_ARGBSUBTRACTROW_MMI
#define HAS_ARGBTOARGB1555ROW_MMI
#define HAS_ARGBTOARGB4444ROW_MMI
#define HAS_ARGBTORAWROW_MMI
#define HAS_ARGBTORGB24ROW_MMI
#define HAS_ARGBTORGB565DITHERROW_MMI
#define HAS_ARGBTORGB565ROW_MMI
#define HAS_ARGBTOUV444ROW_MMI
#define HAS_ARGBTOUVJROW_MMI
#define HAS_ARGBTOUVROW_MMI
#define HAS_ARGBTOYJROW_MMI
#define HAS_ARGBTOYROW_MMI
#define HAS_BGRATOUVROW_MMI
#define HAS_BGRATOYROW_MMI
#define HAS_BLENDPLANEROW_MMI
#define HAS_COMPUTECUMULATIVESUMROW_MMI
#define HAS_CUMULATIVESUMTOAVERAGEROW_MMI
#define HAS_HALFFLOATROW_MMI
#define HAS_I400TOARGBROW_MMI
#define HAS_I422TOUYVYROW_MMI
#define HAS_I422TOYUY2ROW_MMI
#define HAS_INTERPOLATEROW_MMI
#define HAS_J400TOARGBROW_MMI
#define HAS_MERGERGBROW_MMI
#define HAS_MERGEUVROW_MMI
#define HAS_MIRRORROW_MMI
#define HAS_MIRRORUVROW_MMI
#define HAS_RAWTOARGBROW_MMI
#define HAS_RAWTORGB24ROW_MMI
#define HAS_RAWTOUVROW_MMI
#define HAS_RAWTOYROW_MMI
#define HAS_RGB24TOARGBROW_MMI
#define HAS_RGB24TOUVROW_MMI
#define HAS_RGB24TOYROW_MMI
#define HAS_RGB565TOARGBROW_MMI
#define HAS_RGB565TOUVROW_MMI
#define HAS_RGB565TOYROW_MMI
#define HAS_RGBATOUVROW_MMI
#define HAS_RGBATOYROW_MMI
#define HAS_SOBELROW_MMI
#define HAS_SOBELTOPLANEROW_MMI
#define HAS_SOBELXROW_MMI
#define HAS_SOBELXYROW_MMI
#define HAS_SOBELYROW_MMI
#define HAS_SPLITRGBROW_MMI
#define HAS_SPLITUVROW_MMI
#define HAS_UYVYTOUVROW_MMI
#define HAS_UYVYTOYROW_MMI
#define HAS_YUY2TOUV422ROW_MMI
#define HAS_YUY2TOUVROW_MMI
#define HAS_YUY2TOYROW_MMI
#endif
#if defined(_MSC_VER) && !defined(__CLR_VER) && !defined(__clang__) #if defined(_MSC_VER) && !defined(__CLR_VER) && !defined(__clang__)
#if defined(VISUALC_HAS_AVX2) #if defined(VISUALC_HAS_AVX2)
#define SIMD_ALIGNED(var) __declspec(align(32)) var #define SIMD_ALIGNED(var) __declspec(align(32)) var
@ -837,6 +912,8 @@ void ARGBToYRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width);
void ARGBToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width); void ARGBToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width);
void ARGBToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width); void ARGBToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width);
void ARGBToYJRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width); void ARGBToYJRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width);
void ARGBToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width);
void ARGBToYJRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width);
void ARGBToUV444Row_NEON(const uint8_t* src_argb, void ARGBToUV444Row_NEON(const uint8_t* src_argb,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
@ -855,6 +932,15 @@ void ARGBToUVRow_MSA(const uint8_t* src_argb0,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
int width); int width);
void ARGBToUV444Row_MMI(const uint8_t* src_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void ARGBToUVRow_MMI(const uint8_t* src_argb0,
int src_stride_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void ARGBToUVJRow_NEON(const uint8_t* src_argb, void ARGBToUVJRow_NEON(const uint8_t* src_argb,
int src_stride_argb, int src_stride_argb,
uint8_t* dst_u, uint8_t* dst_u,
@ -940,6 +1026,51 @@ void ARGB1555ToUVRow_MSA(const uint8_t* src_argb1555,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
int width); int width);
void ARGBToUVJRow_MMI(const uint8_t* src_rgb0,
int src_stride_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void BGRAToUVRow_MMI(const uint8_t* src_rgb0,
int src_stride_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void ABGRToUVRow_MMI(const uint8_t* src_rgb0,
int src_stride_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void RGBAToUVRow_MMI(const uint8_t* src_rgb0,
int src_stride_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void RGB24ToUVRow_MMI(const uint8_t* src_rgb0,
int src_stride_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void RAWToUVRow_MMI(const uint8_t* src_rgb0,
int src_stride_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void RGB565ToUVRow_MMI(const uint8_t* src_rgb565,
int src_stride_rgb565,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void ARGB1555ToUVRow_MMI(const uint8_t* src_argb1555,
int src_stride_argb1555,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void ARGB4444ToUVRow_MMI(const uint8_t* src_argb4444,
int src_stride_argb4444,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void BGRAToYRow_NEON(const uint8_t* src_bgra, uint8_t* dst_y, int width); void BGRAToYRow_NEON(const uint8_t* src_bgra, uint8_t* dst_y, int width);
void ABGRToYRow_NEON(const uint8_t* src_abgr, uint8_t* dst_y, int width); void ABGRToYRow_NEON(const uint8_t* src_abgr, uint8_t* dst_y, int width);
void RGBAToYRow_NEON(const uint8_t* src_rgba, uint8_t* dst_y, int width); void RGBAToYRow_NEON(const uint8_t* src_rgba, uint8_t* dst_y, int width);
@ -959,6 +1090,17 @@ void RGB24ToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width);
void RAWToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width); void RAWToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width);
void RGB565ToYRow_MSA(const uint8_t* src_rgb565, uint8_t* dst_y, int width); void RGB565ToYRow_MSA(const uint8_t* src_rgb565, uint8_t* dst_y, int width);
void ARGB1555ToYRow_MSA(const uint8_t* src_argb1555, uint8_t* dst_y, int width); void ARGB1555ToYRow_MSA(const uint8_t* src_argb1555, uint8_t* dst_y, int width);
void BGRAToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width);
void ABGRToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width);
void RGBAToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width);
void RGB24ToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width);
void RAWToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width);
void RGB565ToYRow_MMI(const uint8_t* src_rgb565, uint8_t* dst_y, int width);
void ARGB1555ToYRow_MMI(const uint8_t* src_argb1555, uint8_t* dst_y, int width);
void ARGB4444ToYRow_MMI(const uint8_t* src_argb4444,
uint8_t* dst_y,
int width);
void ARGBToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width); void ARGBToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width);
void ARGBToYJRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width); void ARGBToYJRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width);
void BGRAToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width); void BGRAToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width);
@ -1001,6 +1143,20 @@ void RGB565ToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ARGB1555ToYRow_Any_MSA(const uint8_t* src_ptr, void ARGB1555ToYRow_Any_MSA(const uint8_t* src_ptr,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int width); int width);
void BGRAToYRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ABGRToYRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RGBAToYRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ARGBToYJRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ARGBToYRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RGB24ToYRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RAWToYRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RGB565ToYRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ARGB1555ToYRow_Any_MMI(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
void ARGB4444ToYRow_Any_MMI(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
void ARGBToUVRow_AVX2(const uint8_t* src_argb0, void ARGBToUVRow_AVX2(const uint8_t* src_argb0,
int src_stride_argb, int src_stride_argb,
@ -1090,6 +1246,15 @@ void ARGBToUVRow_Any_MSA(const uint8_t* src_ptr,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
int width); int width);
void ARGBToUV444Row_Any_MMI(const uint8_t* src_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void ARGBToUVRow_Any_MMI(const uint8_t* src_ptr,
int src_stride_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void ARGBToUVJRow_Any_NEON(const uint8_t* src_ptr, void ARGBToUVJRow_Any_NEON(const uint8_t* src_ptr,
int src_stride_ptr, int src_stride_ptr,
uint8_t* dst_u, uint8_t* dst_u,
@ -1175,6 +1340,51 @@ void ARGB1555ToUVRow_Any_MSA(const uint8_t* src_ptr,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
int width); int width);
void ARGBToUVJRow_Any_MMI(const uint8_t* src_ptr,
int src_stride_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void BGRAToUVRow_Any_MMI(const uint8_t* src_ptr,
int src_stride_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void ABGRToUVRow_Any_MMI(const uint8_t* src_ptr,
int src_stride_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void RGBAToUVRow_Any_MMI(const uint8_t* src_ptr,
int src_stride_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void RGB24ToUVRow_Any_MMI(const uint8_t* src_ptr,
int src_stride_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void RAWToUVRow_Any_MMI(const uint8_t* src_ptr,
int src_stride_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void RGB565ToUVRow_Any_MMI(const uint8_t* src_ptr,
int src_stride_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void ARGB1555ToUVRow_Any_MMI(const uint8_t* src_ptr,
int src_stride_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void ARGB4444ToUVRow_Any_MMI(const uint8_t* src_ptr,
int src_stride_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void ARGBToUVRow_C(const uint8_t* src_rgb0, void ARGBToUVRow_C(const uint8_t* src_rgb0,
int src_stride_rgb, int src_stride_rgb,
uint8_t* dst_u, uint8_t* dst_u,
@ -1254,12 +1464,14 @@ void MirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width);
void MirrorRow_SSSE3(const uint8_t* src, uint8_t* dst, int width); void MirrorRow_SSSE3(const uint8_t* src, uint8_t* dst, int width);
void MirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width); void MirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width);
void MirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width); void MirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width);
void MirrorRow_MMI(const uint8_t* src, uint8_t* dst, int width);
void MirrorRow_C(const uint8_t* src, uint8_t* dst, int width); void MirrorRow_C(const uint8_t* src, uint8_t* dst, int width);
void MirrorRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void MirrorRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void MirrorRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void MirrorRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void MirrorRow_Any_SSE2(const uint8_t* src, uint8_t* dst, int width); void MirrorRow_Any_SSE2(const uint8_t* src, uint8_t* dst, int width);
void MirrorRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void MirrorRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void MirrorRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void MirrorRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void MirrorRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void MirrorUVRow_SSSE3(const uint8_t* src, void MirrorUVRow_SSSE3(const uint8_t* src,
uint8_t* dst_u, uint8_t* dst_u,
@ -1273,6 +1485,10 @@ void MirrorUVRow_MSA(const uint8_t* src_uv,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
int width); int width);
void MirrorUVRow_MMI(const uint8_t* src_uv,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void MirrorUVRow_C(const uint8_t* src_uv, void MirrorUVRow_C(const uint8_t* src_uv,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
@ -1282,6 +1498,7 @@ void ARGBMirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width);
void ARGBMirrorRow_SSE2(const uint8_t* src, uint8_t* dst, int width); void ARGBMirrorRow_SSE2(const uint8_t* src, uint8_t* dst, int width);
void ARGBMirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width); void ARGBMirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width);
void ARGBMirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width); void ARGBMirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width);
void ARGBMirrorRow_MMI(const uint8_t* src, uint8_t* dst, int width);
void ARGBMirrorRow_C(const uint8_t* src, uint8_t* dst, int width); void ARGBMirrorRow_C(const uint8_t* src, uint8_t* dst, int width);
void ARGBMirrorRow_Any_AVX2(const uint8_t* src_ptr, void ARGBMirrorRow_Any_AVX2(const uint8_t* src_ptr,
uint8_t* dst_ptr, uint8_t* dst_ptr,
@ -1293,6 +1510,7 @@ void ARGBMirrorRow_Any_NEON(const uint8_t* src_ptr,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int width); int width);
void ARGBMirrorRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBMirrorRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ARGBMirrorRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void SplitUVRow_C(const uint8_t* src_uv, void SplitUVRow_C(const uint8_t* src_uv,
uint8_t* dst_u, uint8_t* dst_u,
@ -1314,6 +1532,10 @@ void SplitUVRow_MSA(const uint8_t* src_uv,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
int width); int width);
void SplitUVRow_MMI(const uint8_t* src_uv,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void SplitUVRow_Any_SSE2(const uint8_t* src_ptr, void SplitUVRow_Any_SSE2(const uint8_t* src_ptr,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
@ -1330,6 +1552,10 @@ void SplitUVRow_Any_MSA(const uint8_t* src_ptr,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
int width); int width);
void SplitUVRow_Any_MMI(const uint8_t* src_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void MergeUVRow_C(const uint8_t* src_u, void MergeUVRow_C(const uint8_t* src_u,
const uint8_t* src_v, const uint8_t* src_v,
@ -1351,6 +1577,10 @@ void MergeUVRow_MSA(const uint8_t* src_u,
const uint8_t* src_v, const uint8_t* src_v,
uint8_t* dst_uv, uint8_t* dst_uv,
int width); int width);
void MergeUVRow_MMI(const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* dst_uv,
int width);
void MergeUVRow_Any_SSE2(const uint8_t* y_buf, void MergeUVRow_Any_SSE2(const uint8_t* y_buf,
const uint8_t* uv_buf, const uint8_t* uv_buf,
uint8_t* dst_ptr, uint8_t* dst_ptr,
@ -1367,6 +1597,10 @@ void MergeUVRow_Any_MSA(const uint8_t* y_buf,
const uint8_t* uv_buf, const uint8_t* uv_buf,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int width); int width);
void MergeUVRow_Any_MMI(const uint8_t* y_buf,
const uint8_t* uv_buf,
uint8_t* dst_ptr,
int width);
void SplitRGBRow_C(const uint8_t* src_rgb, void SplitRGBRow_C(const uint8_t* src_rgb,
uint8_t* dst_r, uint8_t* dst_r,
@ -1383,6 +1617,11 @@ void SplitRGBRow_NEON(const uint8_t* src_rgb,
uint8_t* dst_g, uint8_t* dst_g,
uint8_t* dst_b, uint8_t* dst_b,
int width); int width);
void SplitRGBRow_MMI(const uint8_t* src_rgb,
uint8_t* dst_r,
uint8_t* dst_g,
uint8_t* dst_b,
int width);
void SplitRGBRow_Any_SSSE3(const uint8_t* src_ptr, void SplitRGBRow_Any_SSSE3(const uint8_t* src_ptr,
uint8_t* dst_r, uint8_t* dst_r,
uint8_t* dst_g, uint8_t* dst_g,
@ -1393,6 +1632,11 @@ void SplitRGBRow_Any_NEON(const uint8_t* src_ptr,
uint8_t* dst_g, uint8_t* dst_g,
uint8_t* dst_b, uint8_t* dst_b,
int width); int width);
void SplitRGBRow_Any_MMI(const uint8_t* src_ptr,
uint8_t* dst_r,
uint8_t* dst_g,
uint8_t* dst_b,
int width);
void MergeRGBRow_C(const uint8_t* src_r, void MergeRGBRow_C(const uint8_t* src_r,
const uint8_t* src_g, const uint8_t* src_g,
@ -1409,6 +1653,11 @@ void MergeRGBRow_NEON(const uint8_t* src_r,
const uint8_t* src_b, const uint8_t* src_b,
uint8_t* dst_rgb, uint8_t* dst_rgb,
int width); int width);
void MergeRGBRow_MMI(const uint8_t* src_r,
const uint8_t* src_g,
const uint8_t* src_b,
uint8_t* dst_rgb,
int width);
void MergeRGBRow_Any_SSSE3(const uint8_t* y_buf, void MergeRGBRow_Any_SSSE3(const uint8_t* y_buf,
const uint8_t* u_buf, const uint8_t* u_buf,
const uint8_t* v_buf, const uint8_t* v_buf,
@ -1419,6 +1668,11 @@ void MergeRGBRow_Any_NEON(const uint8_t* src_r,
const uint8_t* src_b, const uint8_t* src_b,
uint8_t* dst_rgb, uint8_t* dst_rgb,
int width); int width);
void MergeRGBRow_Any_MMI(const uint8_t* src_r,
const uint8_t* src_g,
const uint8_t* src_b,
uint8_t* dst_rgb,
int width);
void MergeUVRow_16_C(const uint16_t* src_u, void MergeUVRow_16_C(const uint16_t* src_u,
const uint16_t* src_v, const uint16_t* src_v,
@ -1497,12 +1751,16 @@ void CopyRow_16_C(const uint16_t* src, uint16_t* dst, int count);
void ARGBCopyAlphaRow_C(const uint8_t* src, uint8_t* dst, int width); void ARGBCopyAlphaRow_C(const uint8_t* src, uint8_t* dst, int width);
void ARGBCopyAlphaRow_SSE2(const uint8_t* src, uint8_t* dst, int width); void ARGBCopyAlphaRow_SSE2(const uint8_t* src, uint8_t* dst, int width);
void ARGBCopyAlphaRow_AVX2(const uint8_t* src, uint8_t* dst, int width); void ARGBCopyAlphaRow_AVX2(const uint8_t* src, uint8_t* dst, int width);
void ARGBCopyAlphaRow_MMI(const uint8_t* src, uint8_t* dst, int width);
void ARGBCopyAlphaRow_Any_SSE2(const uint8_t* src_ptr, void ARGBCopyAlphaRow_Any_SSE2(const uint8_t* src_ptr,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int width); int width);
void ARGBCopyAlphaRow_Any_AVX2(const uint8_t* src_ptr, void ARGBCopyAlphaRow_Any_AVX2(const uint8_t* src_ptr,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int width); int width);
void ARGBCopyAlphaRow_Any_MMI(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
void ARGBExtractAlphaRow_C(const uint8_t* src_argb, uint8_t* dst_a, int width); void ARGBExtractAlphaRow_C(const uint8_t* src_argb, uint8_t* dst_a, int width);
void ARGBExtractAlphaRow_SSE2(const uint8_t* src_argb, void ARGBExtractAlphaRow_SSE2(const uint8_t* src_argb,
@ -1517,6 +1775,9 @@ void ARGBExtractAlphaRow_NEON(const uint8_t* src_argb,
void ARGBExtractAlphaRow_MSA(const uint8_t* src_argb, void ARGBExtractAlphaRow_MSA(const uint8_t* src_argb,
uint8_t* dst_a, uint8_t* dst_a,
int width); int width);
void ARGBExtractAlphaRow_MMI(const uint8_t* src_argb,
uint8_t* dst_a,
int width);
void ARGBExtractAlphaRow_Any_SSE2(const uint8_t* src_ptr, void ARGBExtractAlphaRow_Any_SSE2(const uint8_t* src_ptr,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int width); int width);
@ -1529,16 +1790,23 @@ void ARGBExtractAlphaRow_Any_NEON(const uint8_t* src_ptr,
void ARGBExtractAlphaRow_Any_MSA(const uint8_t* src_ptr, void ARGBExtractAlphaRow_Any_MSA(const uint8_t* src_ptr,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int width); int width);
void ARGBExtractAlphaRow_Any_MMI(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
void ARGBCopyYToAlphaRow_C(const uint8_t* src, uint8_t* dst, int width); void ARGBCopyYToAlphaRow_C(const uint8_t* src, uint8_t* dst, int width);
void ARGBCopyYToAlphaRow_SSE2(const uint8_t* src, uint8_t* dst, int width); void ARGBCopyYToAlphaRow_SSE2(const uint8_t* src, uint8_t* dst, int width);
void ARGBCopyYToAlphaRow_AVX2(const uint8_t* src, uint8_t* dst, int width); void ARGBCopyYToAlphaRow_AVX2(const uint8_t* src, uint8_t* dst, int width);
void ARGBCopyYToAlphaRow_MMI(const uint8_t* src, uint8_t* dst, int width);
void ARGBCopyYToAlphaRow_Any_SSE2(const uint8_t* src_ptr, void ARGBCopyYToAlphaRow_Any_SSE2(const uint8_t* src_ptr,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int width); int width);
void ARGBCopyYToAlphaRow_Any_AVX2(const uint8_t* src_ptr, void ARGBCopyYToAlphaRow_Any_AVX2(const uint8_t* src_ptr,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int width); int width);
void ARGBCopyYToAlphaRow_Any_MMI(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
void SetRow_C(uint8_t* dst, uint8_t v8, int width); void SetRow_C(uint8_t* dst, uint8_t v8, int width);
void SetRow_MSA(uint8_t* dst, uint8_t v8, int width); void SetRow_MSA(uint8_t* dst, uint8_t v8, int width);
@ -1576,6 +1844,10 @@ void ARGBShuffleRow_MSA(const uint8_t* src_argb,
uint8_t* dst_argb, uint8_t* dst_argb,
const uint8_t* shuffler, const uint8_t* shuffler,
int width); int width);
void ARGBShuffleRow_MMI(const uint8_t* src_argb,
uint8_t* dst_argb,
const uint8_t* shuffler,
int width);
void ARGBShuffleRow_Any_SSSE3(const uint8_t* src_ptr, void ARGBShuffleRow_Any_SSSE3(const uint8_t* src_ptr,
uint8_t* dst_ptr, uint8_t* dst_ptr,
const uint8_t* param, const uint8_t* param,
@ -1592,6 +1864,10 @@ void ARGBShuffleRow_Any_MSA(const uint8_t* src_ptr,
uint8_t* dst_ptr, uint8_t* dst_ptr,
const uint8_t* param, const uint8_t* param,
int width); int width);
void ARGBShuffleRow_Any_MMI(const uint8_t* src_ptr,
uint8_t* dst_ptr,
const uint8_t* param,
int width);
void RGB24ToARGBRow_SSSE3(const uint8_t* src_rgb24, void RGB24ToARGBRow_SSSE3(const uint8_t* src_rgb24,
uint8_t* dst_argb, uint8_t* dst_argb,
@ -1615,28 +1891,40 @@ void RGB24ToARGBRow_NEON(const uint8_t* src_rgb24,
uint8_t* dst_argb, uint8_t* dst_argb,
int width); int width);
void RGB24ToARGBRow_MSA(const uint8_t* src_rgb24, uint8_t* dst_argb, int width); void RGB24ToARGBRow_MSA(const uint8_t* src_rgb24, uint8_t* dst_argb, int width);
void RGB24ToARGBRow_MMI(const uint8_t* src_rgb24, uint8_t* dst_argb, int width);
void RAWToARGBRow_NEON(const uint8_t* src_raw, uint8_t* dst_argb, int width); void RAWToARGBRow_NEON(const uint8_t* src_raw, uint8_t* dst_argb, int width);
void RAWToARGBRow_MSA(const uint8_t* src_raw, uint8_t* dst_argb, int width); void RAWToARGBRow_MSA(const uint8_t* src_raw, uint8_t* dst_argb, int width);
void RAWToARGBRow_MMI(const uint8_t* src_raw, uint8_t* dst_argb, int width);
void RAWToRGB24Row_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width); void RAWToRGB24Row_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width);
void RAWToRGB24Row_MSA(const uint8_t* src_raw, uint8_t* dst_rgb24, int width); void RAWToRGB24Row_MSA(const uint8_t* src_raw, uint8_t* dst_rgb24, int width);
void RAWToRGB24Row_MMI(const uint8_t* src_raw, uint8_t* dst_rgb24, int width);
void RGB565ToARGBRow_NEON(const uint8_t* src_rgb565, void RGB565ToARGBRow_NEON(const uint8_t* src_rgb565,
uint8_t* dst_argb, uint8_t* dst_argb,
int width); int width);
void RGB565ToARGBRow_MSA(const uint8_t* src_rgb565, void RGB565ToARGBRow_MSA(const uint8_t* src_rgb565,
uint8_t* dst_argb, uint8_t* dst_argb,
int width); int width);
void RGB565ToARGBRow_MMI(const uint8_t* src_rgb565,
uint8_t* dst_argb,
int width);
void ARGB1555ToARGBRow_NEON(const uint8_t* src_argb1555, void ARGB1555ToARGBRow_NEON(const uint8_t* src_argb1555,
uint8_t* dst_argb, uint8_t* dst_argb,
int width); int width);
void ARGB1555ToARGBRow_MSA(const uint8_t* src_argb1555, void ARGB1555ToARGBRow_MSA(const uint8_t* src_argb1555,
uint8_t* dst_argb, uint8_t* dst_argb,
int width); int width);
void ARGB1555ToARGBRow_MMI(const uint8_t* src_argb1555,
uint8_t* dst_argb,
int width);
void ARGB4444ToARGBRow_NEON(const uint8_t* src_argb4444, void ARGB4444ToARGBRow_NEON(const uint8_t* src_argb4444,
uint8_t* dst_argb, uint8_t* dst_argb,
int width); int width);
void ARGB4444ToARGBRow_MSA(const uint8_t* src_argb4444, void ARGB4444ToARGBRow_MSA(const uint8_t* src_argb4444,
uint8_t* dst_argb, uint8_t* dst_argb,
int width); int width);
void ARGB4444ToARGBRow_MMI(const uint8_t* src_argb4444,
uint8_t* dst_argb,
int width);
void RGB24ToARGBRow_C(const uint8_t* src_rgb24, uint8_t* dst_argb, int width); void RGB24ToARGBRow_C(const uint8_t* src_rgb24, uint8_t* dst_argb, int width);
void RAWToARGBRow_C(const uint8_t* src_raw, uint8_t* dst_argb, int width); void RAWToARGBRow_C(const uint8_t* src_raw, uint8_t* dst_argb, int width);
void RAWToRGB24Row_C(const uint8_t* src_raw, uint8_t* dst_rgb24, int width); void RAWToRGB24Row_C(const uint8_t* src_raw, uint8_t* dst_rgb24, int width);
@ -1687,24 +1975,35 @@ void RGB24ToARGBRow_Any_NEON(const uint8_t* src_ptr,
void RGB24ToARGBRow_Any_MSA(const uint8_t* src_ptr, void RGB24ToARGBRow_Any_MSA(const uint8_t* src_ptr,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int width); int width);
void RGB24ToARGBRow_Any_MMI(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
void RAWToARGBRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RAWToARGBRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RAWToARGBRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RAWToARGBRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RAWToARGBRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RAWToRGB24Row_Any_NEON(const uint8_t* src_ptr, void RAWToRGB24Row_Any_NEON(const uint8_t* src_ptr,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int width); int width);
void RAWToRGB24Row_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RAWToRGB24Row_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RAWToRGB24Row_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RGB565ToARGBRow_Any_NEON(const uint8_t* src_ptr, void RGB565ToARGBRow_Any_NEON(const uint8_t* src_ptr,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int width); int width);
void RGB565ToARGBRow_Any_MSA(const uint8_t* src_ptr, void RGB565ToARGBRow_Any_MSA(const uint8_t* src_ptr,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int width); int width);
void RGB565ToARGBRow_Any_MMI(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
void ARGB1555ToARGBRow_Any_NEON(const uint8_t* src_ptr, void ARGB1555ToARGBRow_Any_NEON(const uint8_t* src_ptr,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int width); int width);
void ARGB1555ToARGBRow_Any_MSA(const uint8_t* src_ptr, void ARGB1555ToARGBRow_Any_MSA(const uint8_t* src_ptr,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int width); int width);
void ARGB1555ToARGBRow_Any_MMI(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
void ARGB4444ToARGBRow_Any_NEON(const uint8_t* src_ptr, void ARGB4444ToARGBRow_Any_NEON(const uint8_t* src_ptr,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int width); int width);
@ -1712,6 +2011,9 @@ void ARGB4444ToARGBRow_Any_NEON(const uint8_t* src_ptr,
void ARGB4444ToARGBRow_Any_MSA(const uint8_t* src_ptr, void ARGB4444ToARGBRow_Any_MSA(const uint8_t* src_ptr,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int width); int width);
void ARGB4444ToARGBRow_Any_MMI(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
void ARGBToRGB24Row_SSSE3(const uint8_t* src, uint8_t* dst, int width); void ARGBToRGB24Row_SSSE3(const uint8_t* src, uint8_t* dst, int width);
void ARGBToRAWRow_SSSE3(const uint8_t* src, uint8_t* dst, int width); void ARGBToRAWRow_SSSE3(const uint8_t* src, uint8_t* dst, int width);
@ -1780,6 +2082,20 @@ void ARGBToRGB565DitherRow_MSA(const uint8_t* src_argb,
const uint32_t dither4, const uint32_t dither4,
int width); int width);
void ARGBToRGB24Row_MMI(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
void ARGBToRAWRow_MMI(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
void ARGBToRGB565Row_MMI(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
void ARGBToARGB1555Row_MMI(const uint8_t* src_argb,
uint8_t* dst_rgb,
int width);
void ARGBToARGB4444Row_MMI(const uint8_t* src_argb,
uint8_t* dst_rgb,
int width);
void ARGBToRGB565DitherRow_MMI(const uint8_t* src_argb,
uint8_t* dst_rgb,
const uint32_t dither4,
int width);
void ARGBToRGBARow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToRGBARow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
void ARGBToRGB24Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToRGB24Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
void ARGBToRAWRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToRAWRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
@ -1793,6 +2109,7 @@ void J400ToARGBRow_SSE2(const uint8_t* src_y, uint8_t* dst_argb, int width);
void J400ToARGBRow_AVX2(const uint8_t* src_y, uint8_t* dst_argb, int width); void J400ToARGBRow_AVX2(const uint8_t* src_y, uint8_t* dst_argb, int width);
void J400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width); void J400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width);
void J400ToARGBRow_MSA(const uint8_t* src_y, uint8_t* dst_argb, int width); void J400ToARGBRow_MSA(const uint8_t* src_y, uint8_t* dst_argb, int width);
void J400ToARGBRow_MMI(const uint8_t* src_y, uint8_t* dst_argb, int width);
void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width); void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width);
void J400ToARGBRow_Any_SSE2(const uint8_t* src_ptr, void J400ToARGBRow_Any_SSE2(const uint8_t* src_ptr,
uint8_t* dst_ptr, uint8_t* dst_ptr,
@ -1804,6 +2121,7 @@ void J400ToARGBRow_Any_NEON(const uint8_t* src_ptr,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int width); int width);
void J400ToARGBRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void J400ToARGBRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void J400ToARGBRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void I444ToARGBRow_C(const uint8_t* src_y, void I444ToARGBRow_C(const uint8_t* src_y,
const uint8_t* src_u, const uint8_t* src_u,
@ -2324,6 +2642,7 @@ void I400ToARGBRow_SSE2(const uint8_t* y_buf, uint8_t* dst_argb, int width);
void I400ToARGBRow_AVX2(const uint8_t* y_buf, uint8_t* dst_argb, int width); void I400ToARGBRow_AVX2(const uint8_t* y_buf, uint8_t* dst_argb, int width);
void I400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width); void I400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width);
void I400ToARGBRow_MSA(const uint8_t* src_y, uint8_t* dst_argb, int width); void I400ToARGBRow_MSA(const uint8_t* src_y, uint8_t* dst_argb, int width);
void I400ToARGBRow_MMI(const uint8_t* src_y, uint8_t* dst_argb, int width);
void I400ToARGBRow_Any_SSE2(const uint8_t* src_ptr, void I400ToARGBRow_Any_SSE2(const uint8_t* src_ptr,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int width); int width);
@ -2334,6 +2653,7 @@ void I400ToARGBRow_Any_NEON(const uint8_t* src_ptr,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int width); int width);
void I400ToARGBRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void I400ToARGBRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void I400ToARGBRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
// ARGB preattenuated alpha blend. // ARGB preattenuated alpha blend.
void ARGBBlendRow_SSSE3(const uint8_t* src_argb0, void ARGBBlendRow_SSSE3(const uint8_t* src_argb0,
@ -2348,6 +2668,10 @@ void ARGBBlendRow_MSA(const uint8_t* src_argb0,
const uint8_t* src_argb1, const uint8_t* src_argb1,
uint8_t* dst_argb, uint8_t* dst_argb,
int width); int width);
void ARGBBlendRow_MMI(const uint8_t* src_argb0,
const uint8_t* src_argb1,
uint8_t* dst_argb,
int width);
void ARGBBlendRow_C(const uint8_t* src_argb0, void ARGBBlendRow_C(const uint8_t* src_argb0,
const uint8_t* src_argb1, const uint8_t* src_argb1,
uint8_t* dst_argb, uint8_t* dst_argb,
@ -2374,6 +2698,16 @@ void BlendPlaneRow_Any_AVX2(const uint8_t* y_buf,
const uint8_t* v_buf, const uint8_t* v_buf,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int width); int width);
void BlendPlaneRow_MMI(const uint8_t* src0,
const uint8_t* src1,
const uint8_t* alpha,
uint8_t* dst,
int width);
void BlendPlaneRow_Any_MMI(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
uint8_t* dst_ptr,
int width);
void BlendPlaneRow_C(const uint8_t* src0, void BlendPlaneRow_C(const uint8_t* src0,
const uint8_t* src1, const uint8_t* src1,
const uint8_t* alpha, const uint8_t* alpha,
@ -2418,6 +2752,14 @@ void ARGBMultiplyRow_Any_MSA(const uint8_t* y_buf,
const uint8_t* uv_buf, const uint8_t* uv_buf,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int width); int width);
void ARGBMultiplyRow_MMI(const uint8_t* src_argb0,
const uint8_t* src_argb1,
uint8_t* dst_argb,
int width);
void ARGBMultiplyRow_Any_MMI(const uint8_t* y_buf,
const uint8_t* uv_buf,
uint8_t* dst_ptr,
int width);
// ARGB add images. // ARGB add images.
void ARGBAddRow_C(const uint8_t* src_argb0, void ARGBAddRow_C(const uint8_t* src_argb0,
@ -2456,6 +2798,14 @@ void ARGBAddRow_Any_MSA(const uint8_t* y_buf,
const uint8_t* uv_buf, const uint8_t* uv_buf,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int width); int width);
void ARGBAddRow_MMI(const uint8_t* src_argb0,
const uint8_t* src_argb1,
uint8_t* dst_argb,
int width);
void ARGBAddRow_Any_MMI(const uint8_t* y_buf,
const uint8_t* uv_buf,
uint8_t* dst_ptr,
int width);
// ARGB subtract images. Same API as Blend, but these require // ARGB subtract images. Same API as Blend, but these require
// pointer and width alignment for SSE2. // pointer and width alignment for SSE2.
@ -2495,6 +2845,14 @@ void ARGBSubtractRow_Any_MSA(const uint8_t* y_buf,
const uint8_t* uv_buf, const uint8_t* uv_buf,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int width); int width);
void ARGBSubtractRow_MMI(const uint8_t* src_argb0,
const uint8_t* src_argb1,
uint8_t* dst_argb,
int width);
void ARGBSubtractRow_Any_MMI(const uint8_t* y_buf,
const uint8_t* uv_buf,
uint8_t* dst_ptr,
int width);
void ARGBToRGB24Row_Any_SSSE3(const uint8_t* src_ptr, void ARGBToRGB24Row_Any_SSSE3(const uint8_t* src_ptr,
uint8_t* dst_ptr, uint8_t* dst_ptr,
@ -2584,6 +2942,24 @@ void ARGBToRGB565DitherRow_Any_MSA(const uint8_t* src_ptr,
const uint32_t param, const uint32_t param,
int width); int width);
void ARGBToRGB24Row_Any_MMI(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
void ARGBToRAWRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ARGBToRGB565Row_Any_MMI(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
void ARGBToARGB1555Row_Any_MMI(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
void ARGBToARGB4444Row_Any_MMI(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
void ARGBToRGB565DitherRow_Any_MMI(const uint8_t* src_ptr,
uint8_t* dst_ptr,
const uint32_t param,
int width);
void I444ToARGBRow_Any_NEON(const uint8_t* y_buf, void I444ToARGBRow_Any_NEON(const uint8_t* y_buf,
const uint8_t* u_buf, const uint8_t* u_buf,
const uint8_t* v_buf, const uint8_t* v_buf,
@ -2770,15 +3146,25 @@ void YUY2ToUV422Row_NEON(const uint8_t* src_yuy2,
uint8_t* dst_v, uint8_t* dst_v,
int width); int width);
void YUY2ToYRow_MSA(const uint8_t* src_yuy2, uint8_t* dst_y, int width); void YUY2ToYRow_MSA(const uint8_t* src_yuy2, uint8_t* dst_y, int width);
void YUY2ToYRow_MMI(const uint8_t* src_yuy2, uint8_t* dst_y, int width);
void YUY2ToUVRow_MSA(const uint8_t* src_yuy2, void YUY2ToUVRow_MSA(const uint8_t* src_yuy2,
int src_stride_yuy2, int src_stride_yuy2,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
int width); int width);
void YUY2ToUVRow_MMI(const uint8_t* src_yuy2,
int src_stride_yuy2,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void YUY2ToUV422Row_MSA(const uint8_t* src_yuy2, void YUY2ToUV422Row_MSA(const uint8_t* src_yuy2,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
int width); int width);
void YUY2ToUV422Row_MMI(const uint8_t* src_yuy2,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void YUY2ToYRow_C(const uint8_t* src_yuy2, uint8_t* dst_y, int width); void YUY2ToYRow_C(const uint8_t* src_yuy2, uint8_t* dst_y, int width);
void YUY2ToUVRow_C(const uint8_t* src_yuy2, void YUY2ToUVRow_C(const uint8_t* src_yuy2,
int src_stride_yuy2, int src_stride_yuy2,
@ -2820,15 +3206,25 @@ void YUY2ToUV422Row_Any_NEON(const uint8_t* src_ptr,
uint8_t* dst_v, uint8_t* dst_v,
int width); int width);
void YUY2ToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void YUY2ToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void YUY2ToYRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void YUY2ToUVRow_Any_MSA(const uint8_t* src_ptr, void YUY2ToUVRow_Any_MSA(const uint8_t* src_ptr,
int src_stride_ptr, int src_stride_ptr,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
int width); int width);
void YUY2ToUVRow_Any_MMI(const uint8_t* src_ptr,
int src_stride_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void YUY2ToUV422Row_Any_MSA(const uint8_t* src_ptr, void YUY2ToUV422Row_Any_MSA(const uint8_t* src_ptr,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
int width); int width);
void YUY2ToUV422Row_Any_MMI(const uint8_t* src_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void UYVYToYRow_AVX2(const uint8_t* src_uyvy, uint8_t* dst_y, int width); void UYVYToYRow_AVX2(const uint8_t* src_uyvy, uint8_t* dst_y, int width);
void UYVYToUVRow_AVX2(const uint8_t* src_uyvy, void UYVYToUVRow_AVX2(const uint8_t* src_uyvy,
int stride_uyvy, int stride_uyvy,
@ -2870,15 +3266,25 @@ void UYVYToUV422Row_NEON(const uint8_t* src_uyvy,
uint8_t* dst_v, uint8_t* dst_v,
int width); int width);
void UYVYToYRow_MSA(const uint8_t* src_uyvy, uint8_t* dst_y, int width); void UYVYToYRow_MSA(const uint8_t* src_uyvy, uint8_t* dst_y, int width);
void UYVYToYRow_MMI(const uint8_t* src_uyvy, uint8_t* dst_y, int width);
void UYVYToUVRow_MSA(const uint8_t* src_uyvy, void UYVYToUVRow_MSA(const uint8_t* src_uyvy,
int src_stride_uyvy, int src_stride_uyvy,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
int width); int width);
void UYVYToUVRow_MMI(const uint8_t* src_uyvy,
int src_stride_uyvy,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void UYVYToUV422Row_MSA(const uint8_t* src_uyvy, void UYVYToUV422Row_MSA(const uint8_t* src_uyvy,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
int width); int width);
void UYVYToUV422Row_MMI(const uint8_t* src_uyvy,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void UYVYToYRow_C(const uint8_t* src_uyvy, uint8_t* dst_y, int width); void UYVYToYRow_C(const uint8_t* src_uyvy, uint8_t* dst_y, int width);
void UYVYToUVRow_C(const uint8_t* src_uyvy, void UYVYToUVRow_C(const uint8_t* src_uyvy,
@ -2921,15 +3327,25 @@ void UYVYToUV422Row_Any_NEON(const uint8_t* src_ptr,
uint8_t* dst_v, uint8_t* dst_v,
int width); int width);
void UYVYToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void UYVYToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void UYVYToYRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void UYVYToUVRow_Any_MSA(const uint8_t* src_ptr, void UYVYToUVRow_Any_MSA(const uint8_t* src_ptr,
int src_stride_ptr, int src_stride_ptr,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
int width); int width);
void UYVYToUVRow_Any_MMI(const uint8_t* src_ptr,
int src_stride_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void UYVYToUV422Row_Any_MSA(const uint8_t* src_ptr, void UYVYToUV422Row_Any_MSA(const uint8_t* src_ptr,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
int width); int width);
void UYVYToUV422Row_Any_MMI(const uint8_t* src_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void I422ToYUY2Row_C(const uint8_t* src_y, void I422ToYUY2Row_C(const uint8_t* src_y,
const uint8_t* src_u, const uint8_t* src_u,
@ -3006,21 +3422,41 @@ void I422ToYUY2Row_MSA(const uint8_t* src_y,
const uint8_t* src_v, const uint8_t* src_v,
uint8_t* dst_yuy2, uint8_t* dst_yuy2,
int width); int width);
void I422ToYUY2Row_MMI(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* dst_yuy2,
int width);
void I422ToUYVYRow_MSA(const uint8_t* src_y, void I422ToUYVYRow_MSA(const uint8_t* src_y,
const uint8_t* src_u, const uint8_t* src_u,
const uint8_t* src_v, const uint8_t* src_v,
uint8_t* dst_uyvy, uint8_t* dst_uyvy,
int width); int width);
void I422ToUYVYRow_MMI(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* dst_uyvy,
int width);
void I422ToYUY2Row_Any_MSA(const uint8_t* y_buf, void I422ToYUY2Row_Any_MSA(const uint8_t* y_buf,
const uint8_t* u_buf, const uint8_t* u_buf,
const uint8_t* v_buf, const uint8_t* v_buf,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int width); int width);
void I422ToYUY2Row_Any_MMI(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
uint8_t* dst_ptr,
int width);
void I422ToUYVYRow_Any_MSA(const uint8_t* y_buf, void I422ToUYVYRow_Any_MSA(const uint8_t* y_buf,
const uint8_t* u_buf, const uint8_t* u_buf,
const uint8_t* v_buf, const uint8_t* v_buf,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int width); int width);
void I422ToUYVYRow_Any_MMI(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
uint8_t* dst_ptr,
int width);
// Effects related row functions. // Effects related row functions.
void ARGBAttenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width); void ARGBAttenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width);
@ -3036,6 +3472,9 @@ void ARGBAttenuateRow_NEON(const uint8_t* src_argb,
void ARGBAttenuateRow_MSA(const uint8_t* src_argb, void ARGBAttenuateRow_MSA(const uint8_t* src_argb,
uint8_t* dst_argb, uint8_t* dst_argb,
int width); int width);
void ARGBAttenuateRow_MMI(const uint8_t* src_argb,
uint8_t* dst_argb,
int width);
void ARGBAttenuateRow_Any_SSSE3(const uint8_t* src_ptr, void ARGBAttenuateRow_Any_SSSE3(const uint8_t* src_ptr,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int width); int width);
@ -3048,6 +3487,9 @@ void ARGBAttenuateRow_Any_NEON(const uint8_t* src_ptr,
void ARGBAttenuateRow_Any_MSA(const uint8_t* src_ptr, void ARGBAttenuateRow_Any_MSA(const uint8_t* src_ptr,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int width); int width);
void ARGBAttenuateRow_Any_MMI(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
// Inverse table for unattenuate, shared by C and SSE2. // Inverse table for unattenuate, shared by C and SSE2.
extern const uint32_t fixed_invtbl8[256]; extern const uint32_t fixed_invtbl8[256];
@ -3071,11 +3513,13 @@ void ARGBGrayRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width);
void ARGBGrayRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_argb, int width); void ARGBGrayRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_argb, int width);
void ARGBGrayRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width); void ARGBGrayRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width);
void ARGBGrayRow_MSA(const uint8_t* src_argb, uint8_t* dst_argb, int width); void ARGBGrayRow_MSA(const uint8_t* src_argb, uint8_t* dst_argb, int width);
void ARGBGrayRow_MMI(const uint8_t* src_argb, uint8_t* dst_argb, int width);
void ARGBSepiaRow_C(uint8_t* dst_argb, int width); void ARGBSepiaRow_C(uint8_t* dst_argb, int width);
void ARGBSepiaRow_SSSE3(uint8_t* dst_argb, int width); void ARGBSepiaRow_SSSE3(uint8_t* dst_argb, int width);
void ARGBSepiaRow_NEON(uint8_t* dst_argb, int width); void ARGBSepiaRow_NEON(uint8_t* dst_argb, int width);
void ARGBSepiaRow_MSA(uint8_t* dst_argb, int width); void ARGBSepiaRow_MSA(uint8_t* dst_argb, int width);
void ARGBSepiaRow_MMI(uint8_t* dst_argb, int width);
void ARGBColorMatrixRow_C(const uint8_t* src_argb, void ARGBColorMatrixRow_C(const uint8_t* src_argb,
uint8_t* dst_argb, uint8_t* dst_argb,
@ -3093,6 +3537,10 @@ void ARGBColorMatrixRow_MSA(const uint8_t* src_argb,
uint8_t* dst_argb, uint8_t* dst_argb,
const int8_t* matrix_argb, const int8_t* matrix_argb,
int width); int width);
void ARGBColorMatrixRow_MMI(const uint8_t* src_argb,
uint8_t* dst_argb,
const int8_t* matrix_argb,
int width);
void ARGBColorTableRow_C(uint8_t* dst_argb, void ARGBColorTableRow_C(uint8_t* dst_argb,
const uint8_t* table_argb, const uint8_t* table_argb,
@ -3145,6 +3593,10 @@ void ARGBShadeRow_MSA(const uint8_t* src_argb,
uint8_t* dst_argb, uint8_t* dst_argb,
int width, int width,
uint32_t value); uint32_t value);
void ARGBShadeRow_MMI(const uint8_t* src_argb,
uint8_t* dst_argb,
int width,
uint32_t value);
// Used for blur. // Used for blur.
void CumulativeSumToAverageRow_SSE2(const int32_t* topleft, void CumulativeSumToAverageRow_SSE2(const int32_t* topleft,
@ -3158,6 +3610,11 @@ void ComputeCumulativeSumRow_SSE2(const uint8_t* row,
const int32_t* previous_cumsum, const int32_t* previous_cumsum,
int width); int width);
void ComputeCumulativeSumRow_MMI(const uint8_t* row,
int32_t* cumsum,
const int32_t* previous_cumsum,
int width);
void CumulativeSumToAverageRow_C(const int32_t* tl, void CumulativeSumToAverageRow_C(const int32_t* tl,
const int32_t* bl, const int32_t* bl,
int w, int w,
@ -3208,6 +3665,11 @@ void InterpolateRow_MSA(uint8_t* dst_ptr,
ptrdiff_t src_stride, ptrdiff_t src_stride,
int width, int width,
int source_y_fraction); int source_y_fraction);
void InterpolateRow_MMI(uint8_t* dst_ptr,
const uint8_t* src_ptr,
ptrdiff_t src_stride,
int width,
int source_y_fraction);
void InterpolateRow_Any_NEON(uint8_t* dst_ptr, void InterpolateRow_Any_NEON(uint8_t* dst_ptr,
const uint8_t* src_ptr, const uint8_t* src_ptr,
ptrdiff_t src_stride_ptr, ptrdiff_t src_stride_ptr,
@ -3228,6 +3690,11 @@ void InterpolateRow_Any_MSA(uint8_t* dst_ptr,
ptrdiff_t src_stride_ptr, ptrdiff_t src_stride_ptr,
int width, int width,
int source_y_fraction); int source_y_fraction);
void InterpolateRow_Any_MMI(uint8_t* dst_ptr,
const uint8_t* src_ptr,
ptrdiff_t src_stride_ptr,
int width,
int source_y_fraction);
void InterpolateRow_16_C(uint16_t* dst_ptr, void InterpolateRow_16_C(uint16_t* dst_ptr,
const uint16_t* src_ptr, const uint16_t* src_ptr,
@ -3256,6 +3723,11 @@ void SobelXRow_MSA(const uint8_t* src_y0,
const uint8_t* src_y2, const uint8_t* src_y2,
uint8_t* dst_sobelx, uint8_t* dst_sobelx,
int width); int width);
void SobelXRow_MMI(const uint8_t* src_y0,
const uint8_t* src_y1,
const uint8_t* src_y2,
uint8_t* dst_sobelx,
int width);
void SobelYRow_C(const uint8_t* src_y0, void SobelYRow_C(const uint8_t* src_y0,
const uint8_t* src_y1, const uint8_t* src_y1,
uint8_t* dst_sobely, uint8_t* dst_sobely,
@ -3272,6 +3744,10 @@ void SobelYRow_MSA(const uint8_t* src_y0,
const uint8_t* src_y1, const uint8_t* src_y1,
uint8_t* dst_sobely, uint8_t* dst_sobely,
int width); int width);
void SobelYRow_MMI(const uint8_t* src_y0,
const uint8_t* src_y1,
uint8_t* dst_sobely,
int width);
void SobelRow_C(const uint8_t* src_sobelx, void SobelRow_C(const uint8_t* src_sobelx,
const uint8_t* src_sobely, const uint8_t* src_sobely,
uint8_t* dst_argb, uint8_t* dst_argb,
@ -3288,6 +3764,10 @@ void SobelRow_MSA(const uint8_t* src_sobelx,
const uint8_t* src_sobely, const uint8_t* src_sobely,
uint8_t* dst_argb, uint8_t* dst_argb,
int width); int width);
void SobelRow_MMI(const uint8_t* src_sobelx,
const uint8_t* src_sobely,
uint8_t* dst_argb,
int width);
void SobelToPlaneRow_C(const uint8_t* src_sobelx, void SobelToPlaneRow_C(const uint8_t* src_sobelx,
const uint8_t* src_sobely, const uint8_t* src_sobely,
uint8_t* dst_y, uint8_t* dst_y,
@ -3304,6 +3784,10 @@ void SobelToPlaneRow_MSA(const uint8_t* src_sobelx,
const uint8_t* src_sobely, const uint8_t* src_sobely,
uint8_t* dst_y, uint8_t* dst_y,
int width); int width);
void SobelToPlaneRow_MMI(const uint8_t* src_sobelx,
const uint8_t* src_sobely,
uint8_t* dst_y,
int width);
void SobelXYRow_C(const uint8_t* src_sobelx, void SobelXYRow_C(const uint8_t* src_sobelx,
const uint8_t* src_sobely, const uint8_t* src_sobely,
uint8_t* dst_argb, uint8_t* dst_argb,
@ -3320,6 +3804,10 @@ void SobelXYRow_MSA(const uint8_t* src_sobelx,
const uint8_t* src_sobely, const uint8_t* src_sobely,
uint8_t* dst_argb, uint8_t* dst_argb,
int width); int width);
void SobelXYRow_MMI(const uint8_t* src_sobelx,
const uint8_t* src_sobely,
uint8_t* dst_argb,
int width);
void SobelRow_Any_SSE2(const uint8_t* y_buf, void SobelRow_Any_SSE2(const uint8_t* y_buf,
const uint8_t* uv_buf, const uint8_t* uv_buf,
uint8_t* dst_ptr, uint8_t* dst_ptr,
@ -3332,6 +3820,10 @@ void SobelRow_Any_MSA(const uint8_t* y_buf,
const uint8_t* uv_buf, const uint8_t* uv_buf,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int width); int width);
void SobelRow_Any_MMI(const uint8_t* y_buf,
const uint8_t* uv_buf,
uint8_t* dst_ptr,
int width);
void SobelToPlaneRow_Any_SSE2(const uint8_t* y_buf, void SobelToPlaneRow_Any_SSE2(const uint8_t* y_buf,
const uint8_t* uv_buf, const uint8_t* uv_buf,
uint8_t* dst_ptr, uint8_t* dst_ptr,
@ -3344,6 +3836,10 @@ void SobelToPlaneRow_Any_MSA(const uint8_t* y_buf,
const uint8_t* uv_buf, const uint8_t* uv_buf,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int width); int width);
void SobelToPlaneRow_Any_MMI(const uint8_t* y_buf,
const uint8_t* uv_buf,
uint8_t* dst_ptr,
int width);
void SobelXYRow_Any_SSE2(const uint8_t* y_buf, void SobelXYRow_Any_SSE2(const uint8_t* y_buf,
const uint8_t* uv_buf, const uint8_t* uv_buf,
uint8_t* dst_ptr, uint8_t* dst_ptr,
@ -3356,6 +3852,10 @@ void SobelXYRow_Any_MSA(const uint8_t* y_buf,
const uint8_t* uv_buf, const uint8_t* uv_buf,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int width); int width);
void SobelXYRow_Any_MMI(const uint8_t* y_buf,
const uint8_t* uv_buf,
uint8_t* dst_ptr,
int width);
void ARGBPolynomialRow_C(const uint8_t* src_argb, void ARGBPolynomialRow_C(const uint8_t* src_argb,
uint8_t* dst_argb, uint8_t* dst_argb,

175
include/libyuv/scale_row.h Normal file → Executable file
View File

@ -110,6 +110,24 @@ extern "C" {
#define HAS_SCALEROWDOWN4_MSA #define HAS_SCALEROWDOWN4_MSA
#endif #endif
#if !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
#define HAS_FIXEDDIV1_MIPS
#define HAS_FIXEDDIV_MIPS
#define HAS_SCALEARGBCOLS_MMI
#define HAS_SCALEARGBCOLSUP2_MMI
#define HAS_SCALEARGBFILTERCOLS_MMI
#define HAS_SCALEARGBROWDOWN2_MMI
#define HAS_SCALEARGBROWDOWNEVEN_MMI
#define HAS_SCALEROWDOWN2_MMI
#define HAS_SCALEROWDOWN4_MMI
#define HAS_SCALEADDROW_MMI
#define HAS_SCALEADDROW_16_MMI
#define HAS_SCALEROWDOWN2_16_MMI
#define HAS_SCALEROWDOWN4_16_MMI
#define HAS_SCALECOLS_MMI
#define HAS_SCALECOLS_16_MMI
#endif
// Scale ARGB vertically with bilinear interpolation. // Scale ARGB vertically with bilinear interpolation.
void ScalePlaneVertical(int src_height, void ScalePlaneVertical(int src_height,
int dst_width, int dst_width,
@ -147,12 +165,17 @@ enum FilterMode ScaleFilterReduce(int src_width,
// Divide num by div and return as 16.16 fixed point result. // Divide num by div and return as 16.16 fixed point result.
int FixedDiv_C(int num, int div); int FixedDiv_C(int num, int div);
int FixedDiv_X86(int num, int div); int FixedDiv_X86(int num, int div);
int FixedDiv_MIPS(int num, int div);
// Divide num - 1 by div - 1 and return as 16.16 fixed point result. // Divide num - 1 by div - 1 and return as 16.16 fixed point result.
int FixedDiv1_C(int num, int div); int FixedDiv1_C(int num, int div);
int FixedDiv1_X86(int num, int div); int FixedDiv1_X86(int num, int div);
int FixedDiv1_MIPS(int num, int div);
#ifdef HAS_FIXEDDIV_X86 #ifdef HAS_FIXEDDIV_X86
#define FixedDiv FixedDiv_X86 #define FixedDiv FixedDiv_X86
#define FixedDiv1 FixedDiv1_X86 #define FixedDiv1 FixedDiv1_X86
#elif defined HAS_FIXEDDIV_MIPS
#define FixedDiv FixedDiv_MIPS
#define FixedDiv1 FixedDiv1_MIPS
#else #else
#define FixedDiv FixedDiv_C #define FixedDiv FixedDiv_C
#define FixedDiv1 FixedDiv1_C #define FixedDiv1 FixedDiv1_C
@ -569,6 +592,26 @@ void ScaleARGBCols_Any_MSA(uint8_t* dst_ptr,
int dst_width, int dst_width,
int x, int x,
int dx); int dx);
void ScaleARGBFilterCols_MMI(uint8_t* dst_argb,
const uint8_t* src_argb,
int dst_width,
int x,
int dx);
void ScaleARGBCols_MMI(uint8_t* dst_argb,
const uint8_t* src_argb,
int dst_width,
int x,
int dx);
void ScaleARGBFilterCols_Any_MMI(uint8_t* dst_ptr,
const uint8_t* src_ptr,
int dst_width,
int x,
int dx);
void ScaleARGBCols_Any_MMI(uint8_t* dst_ptr,
const uint8_t* src_ptr,
int dst_width,
int x,
int dx);
// ARGB Row functions // ARGB Row functions
void ScaleARGBRowDown2_SSE2(const uint8_t* src_argb, void ScaleARGBRowDown2_SSE2(const uint8_t* src_argb,
@ -607,6 +650,18 @@ void ScaleARGBRowDown2Box_MSA(const uint8_t* src_argb,
ptrdiff_t src_stride, ptrdiff_t src_stride,
uint8_t* dst_argb, uint8_t* dst_argb,
int dst_width); int dst_width);
void ScaleARGBRowDown2_MMI(const uint8_t* src_argb,
ptrdiff_t src_stride,
uint8_t* dst_argb,
int dst_width);
void ScaleARGBRowDown2Linear_MMI(const uint8_t* src_argb,
ptrdiff_t src_stride,
uint8_t* dst_argb,
int dst_width);
void ScaleARGBRowDown2Box_MMI(const uint8_t* src_argb,
ptrdiff_t src_stride,
uint8_t* dst_argb,
int dst_width);
void ScaleARGBRowDown2_Any_SSE2(const uint8_t* src_ptr, void ScaleARGBRowDown2_Any_SSE2(const uint8_t* src_ptr,
ptrdiff_t src_stride, ptrdiff_t src_stride,
uint8_t* dst_ptr, uint8_t* dst_ptr,
@ -643,7 +698,18 @@ void ScaleARGBRowDown2Box_Any_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride, ptrdiff_t src_stride,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int dst_width); int dst_width);
void ScaleARGBRowDown2_Any_MMI(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleARGBRowDown2Linear_Any_MMI(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleARGBRowDown2Box_Any_MMI(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleARGBRowDownEven_SSE2(const uint8_t* src_argb, void ScaleARGBRowDownEven_SSE2(const uint8_t* src_argb,
ptrdiff_t src_stride, ptrdiff_t src_stride,
int src_stepx, int src_stepx,
@ -674,6 +740,16 @@ void ScaleARGBRowDownEvenBox_MSA(const uint8_t* src_argb,
int src_stepx, int src_stepx,
uint8_t* dst_argb, uint8_t* dst_argb,
int dst_width); int dst_width);
void ScaleARGBRowDownEven_MMI(const uint8_t* src_argb,
ptrdiff_t src_stride,
int32_t src_stepx,
uint8_t* dst_argb,
int dst_width);
void ScaleARGBRowDownEvenBox_MMI(const uint8_t* src_argb,
ptrdiff_t src_stride,
int src_stepx,
uint8_t* dst_argb,
int dst_width);
void ScaleARGBRowDownEven_Any_SSE2(const uint8_t* src_ptr, void ScaleARGBRowDownEven_Any_SSE2(const uint8_t* src_ptr,
ptrdiff_t src_stride, ptrdiff_t src_stride,
int src_stepx, int src_stepx,
@ -704,6 +780,16 @@ void ScaleARGBRowDownEvenBox_Any_MSA(const uint8_t* src_ptr,
int src_stepx, int src_stepx,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int dst_width); int dst_width);
void ScaleARGBRowDownEven_Any_MMI(const uint8_t* src_ptr,
ptrdiff_t src_stride,
int32_t src_stepx,
uint8_t* dst_ptr,
int dst_width);
void ScaleARGBRowDownEvenBox_Any_MMI(const uint8_t* src_ptr,
ptrdiff_t src_stride,
int src_stepx,
uint8_t* dst_ptr,
int dst_width);
// ScaleRowDown2Box also used by planar functions // ScaleRowDown2Box also used by planar functions
// NEON downscalers with interpolation. // NEON downscalers with interpolation.
@ -936,6 +1022,93 @@ void ScaleRowDown34_1_Box_Any_MSA(const uint8_t* src_ptr,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int dst_width); int dst_width);
void ScaleRowDown2_MMI(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width);
void ScaleRowDown2_16_MMI(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst,
int dst_width);
void ScaleRowDown2Linear_MMI(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width);
void ScaleRowDown2Linear_16_MMI(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst,
int dst_width);
void ScaleRowDown2Box_MMI(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width);
void ScaleRowDown2Box_16_MMI(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst,
int dst_width);
void ScaleRowDown2Box_Odd_MMI(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width);
void ScaleRowDown4_MMI(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width);
void ScaleRowDown4_16_MMI(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst,
int dst_width);
void ScaleRowDown4Box_MMI(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width);
void ScaleRowDown4Box_16_MMI(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst,
int dst_width);
void ScaleAddRow_MMI(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width);
void ScaleAddRow_16_MMI(const uint16_t* src_ptr,
uint32_t* dst_ptr,
int src_width);
void ScaleColsUp2_MMI(uint8_t* dst_ptr,
const uint8_t* src_ptr,
int dst_width,
int x,
int dx);
void ScaleColsUp2_16_MMI(uint16_t* dst_ptr,
const uint16_t* src_ptr,
int dst_width,
int x,
int dx);
void ScaleARGBColsUp2_MMI(uint8_t* dst_argb,
const uint8_t* src_argb,
int dst_width,
int x,
int dx);
void ScaleRowDown2_Any_MMI(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown2Linear_Any_MMI(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown2Box_Any_MMI(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown4_Any_MMI(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown4Box_Any_MMI(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleAddRow_Any_MMI(const uint8_t* src_ptr,
uint16_t* dst_ptr,
int src_width);
#ifdef __cplusplus #ifdef __cplusplus
} // extern "C" } // extern "C"
} // namespace libyuv } // namespace libyuv

View File

@ -13,8 +13,11 @@ import("//build/config/mips.gni")
declare_args() { declare_args() {
libyuv_include_tests = !build_with_chromium libyuv_include_tests = !build_with_chromium
libyuv_disable_jpeg = false libyuv_disable_jpeg = false
libyuv_use_neon = (current_cpu == "arm64" || libyuv_use_neon =
(current_cpu == "arm" && (arm_use_neon || arm_optionally_use_neon))) current_cpu == "arm64" ||
libyuv_use_msa = (current_cpu == "mips64el" || current_cpu == "mipsel") && (current_cpu == "arm" && (arm_use_neon || arm_optionally_use_neon))
mips_use_msa libyuv_use_msa =
(current_cpu == "mips64el" || current_cpu == "mipsel") && mips_use_msa
libyuv_use_mmi =
(current_cpu == "mips64el" || current_cpu == "mipsel") && mips_use_mmi
} }

View File

@ -27,8 +27,10 @@
# Link-Time Optimizations. # Link-Time Optimizations.
'use_lto%': 0, 'use_lto%': 0,
'mips_msa%': 0, # Default to msa off. 'mips_msa%': 0, # Default to msa off.
'mips_mmi%': 0, # Default to mmi off.
'build_neon': 0, 'build_neon': 0,
'build_msa': 0, 'build_msa': 0,
'build_mmi': 0,
'conditions': [ 'conditions': [
['(target_arch == "armv7" or target_arch == "armv7s" or \ ['(target_arch == "armv7" or target_arch == "armv7s" or \
(target_arch == "arm" and arm_version >= 7) or target_arch == "arm64")\ (target_arch == "arm" and arm_version >= 7) or target_arch == "arm64")\
@ -40,6 +42,11 @@
{ {
'build_msa': 1, 'build_msa': 1,
}], }],
['(target_arch == "mipsel" or target_arch == "mips64el")\
and (mips_mmi == 1)',
{
'build_mmi': 1,
}],
], ],
}, },
@ -92,6 +99,11 @@
'LIBYUV_MSA', 'LIBYUV_MSA',
], ],
}], }],
['build_mmi != 0', {
'defines': [
'LIBYUV_MMI',
],
}],
['OS != "ios" and libyuv_disable_jpeg != 1', { ['OS != "ios" and libyuv_disable_jpeg != 1', {
'defines': [ 'defines': [
'HAVE_JPEG' 'HAVE_JPEG'

View File

@ -36,6 +36,7 @@
'source/compare_common.cc', 'source/compare_common.cc',
'source/compare_gcc.cc', 'source/compare_gcc.cc',
'source/compare_msa.cc', 'source/compare_msa.cc',
'source/compare_mmi.cc',
'source/compare_neon.cc', 'source/compare_neon.cc',
'source/compare_neon64.cc', 'source/compare_neon64.cc',
'source/compare_win.cc', 'source/compare_win.cc',
@ -56,6 +57,7 @@
'source/rotate_common.cc', 'source/rotate_common.cc',
'source/rotate_gcc.cc', 'source/rotate_gcc.cc',
'source/rotate_msa.cc', 'source/rotate_msa.cc',
'source/rotate_mmi.cc',
'source/rotate_neon.cc', 'source/rotate_neon.cc',
'source/rotate_neon64.cc', 'source/rotate_neon64.cc',
'source/rotate_win.cc', 'source/rotate_win.cc',
@ -63,6 +65,7 @@
'source/row_common.cc', 'source/row_common.cc',
'source/row_gcc.cc', 'source/row_gcc.cc',
'source/row_msa.cc', 'source/row_msa.cc',
'source/row_mmi.cc',
'source/row_neon.cc', 'source/row_neon.cc',
'source/row_neon64.cc', 'source/row_neon64.cc',
'source/row_win.cc', 'source/row_win.cc',
@ -72,6 +75,7 @@
'source/scale_common.cc', 'source/scale_common.cc',
'source/scale_gcc.cc', 'source/scale_gcc.cc',
'source/scale_msa.cc', 'source/scale_msa.cc',
'source/scale_mmi.cc',
'source/scale_neon.cc', 'source/scale_neon.cc',
'source/scale_neon64.cc', 'source/scale_neon64.cc',
'source/scale_win.cc', 'source/scale_win.cc',

View File

@ -95,6 +95,12 @@
'LIBYUV_MSA' 'LIBYUV_MSA'
], ],
}], }],
[ '(target_arch == "mipsel" or target_arch == "mips64el") \
and (mips_mmi == 1)', {
'defines': [
'LIBYUV_MMI'
],
}],
], # conditions ], # conditions
'defines': [ 'defines': [
# Enable the following 3 macros to turn off assembly for specified CPU. # Enable the following 3 macros to turn off assembly for specified CPU.

11
source/compare.cc Normal file → Executable file
View File

@ -154,6 +154,12 @@ uint64_t ComputeHammingDistance(const uint8_t* src_a,
HammingDistance = HammingDistance_MSA; HammingDistance = HammingDistance_MSA;
} }
#endif #endif
#if defined(HAS_HAMMINGDISTANCE_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
HammingDistance = HammingDistance_MMI;
}
#endif
#ifdef _OPENMP #ifdef _OPENMP
#pragma omp parallel for reduction(+ : diff) #pragma omp parallel for reduction(+ : diff)
#endif #endif
@ -210,6 +216,11 @@ uint64_t ComputeSumSquareError(const uint8_t* src_a,
SumSquareError = SumSquareError_MSA; SumSquareError = SumSquareError_MSA;
} }
#endif #endif
#if defined(HAS_SUMSQUAREERROR_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
SumSquareError = SumSquareError_MMI;
}
#endif
#ifdef _OPENMP #ifdef _OPENMP
#pragma omp parallel for reduction(+ : sse) #pragma omp parallel for reduction(+ : sse)
#endif #endif

121
source/compare_mmi.cc Normal file
View File

@ -0,0 +1,121 @@
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/basic_types.h"
#include "libyuv/compare_row.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
#if !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
// Hakmem method for hamming distance.
uint32_t HammingDistance_MMI(const uint8_t* src_a,
const uint8_t* src_b,
int count) {
uint32_t diff = 0u;
uint64_t temp = 0, temp1 = 0, ta = 0, tb = 0;
uint64_t c1 = 0x5555555555555555;
uint64_t c2 = 0x3333333333333333;
uint64_t c3 = 0x0f0f0f0f0f0f0f0f;
uint32_t c4 = 0x01010101;
uint64_t s1 = 1, s2 = 2, s3 = 4;
__asm__ volatile(
"1: \n\t"
"ldc1 %[ta], 0(%[src_a]) \n\t"
"ldc1 %[tb], 0(%[src_b]) \n\t"
"xor %[temp], %[ta], %[tb] \n\t"
"psrlw %[temp1], %[temp], %[s1] \n\t" // temp1=x>>1
"and %[temp1], %[temp1], %[c1] \n\t" // temp1&=c1
"psubw %[temp1], %[temp], %[temp1] \n\t" // x-temp1
"and %[temp], %[temp1], %[c2] \n\t" // t = (u&c2)
"psrlw %[temp1], %[temp1], %[s2] \n\t" // u>>2
"and %[temp1], %[temp1], %[c2] \n\t" // u>>2 & c2
"paddw %[temp1], %[temp1], %[temp] \n\t" // t1 = t1+t
"psrlw %[temp], %[temp1], %[s3] \n\t" // u>>4
"paddw %[temp1], %[temp1], %[temp] \n\t" // u+(u>>4)
"and %[temp1], %[temp1], %[c3] \n\t" //&c3
"dmfc1 $t0, %[temp1] \n\t"
"dsrl32 $t0, $t0, 0 \n\t "
"mul $t0, $t0, %[c4] \n\t"
"dsrl $t0, $t0, 24 \n\t"
"dadd %[diff], %[diff], $t0 \n\t"
"dmfc1 $t0, %[temp1] \n\t"
"mul $t0, $t0, %[c4] \n\t"
"dsrl $t0, $t0, 24 \n\t"
"dadd %[diff], %[diff], $t0 \n\t"
"daddiu %[src_a], %[src_a], 8 \n\t"
"daddiu %[src_b], %[src_b], 8 \n\t"
"addiu %[count], %[count], -8 \n\t"
"bgtz %[count], 1b \n\t"
"nop \n\t"
: [diff] "+r"(diff), [src_a] "+r"(src_a), [src_b] "+r"(src_b),
[count] "+r"(count), [ta] "+f"(ta), [tb] "+f"(tb), [temp] "+f"(temp),
[temp1] "+f"(temp1)
: [c1] "f"(c1), [c2] "f"(c2), [c3] "f"(c3), [c4] "r"(c4), [s1] "f"(s1),
[s2] "f"(s2), [s3] "f"(s3)
: "memory");
return diff;
}
uint32_t SumSquareError_MMI(const uint8_t* src_a,
const uint8_t* src_b,
int count) {
uint32_t sse = 0u;
uint32_t sse_hi = 0u, sse_lo = 0u;
uint64_t src1, src2;
uint64_t diff, diff_hi, diff_lo;
uint64_t sse_sum, sse_tmp;
const uint64_t mask = 0x0ULL;
__asm__ volatile(
"xor %[sse_sum], %[sse_sum], %[sse_sum] \n\t"
"1: \n\t"
"ldc1 %[src1], 0x00(%[src_a]) \n\t"
"ldc1 %[src2], 0x00(%[src_b]) \n\t"
"pasubub %[diff], %[src1], %[src2] \n\t"
"punpcklbh %[diff_lo], %[diff], %[mask] \n\t"
"punpckhbh %[diff_hi], %[diff], %[mask] \n\t"
"pmaddhw %[sse_tmp], %[diff_lo], %[diff_lo] \n\t"
"paddw %[sse_sum], %[sse_sum], %[sse_tmp] \n\t"
"pmaddhw %[sse_tmp], %[diff_hi], %[diff_hi] \n\t"
"paddw %[sse_sum], %[sse_sum], %[sse_tmp] \n\t"
"daddiu %[src_a], %[src_a], 0x08 \n\t"
"daddiu %[src_b], %[src_b], 0x08 \n\t"
"daddiu %[count], %[count], -0x08 \n\t"
"bnez %[count], 1b \n\t"
"mfc1 %[sse_lo], %[sse_sum] \n\t"
"mfhc1 %[sse_hi], %[sse_sum] \n\t"
"daddu %[sse], %[sse_hi], %[sse_lo] \n\t"
: [sse] "+&r"(sse), [diff] "=&f"(diff), [src1] "=&f"(src1),
[src2] "=&f"(src2), [diff_lo] "=&f"(diff_lo), [diff_hi] "=&f"(diff_hi),
[sse_sum] "=&f"(sse_sum), [sse_tmp] "=&f"(sse_tmp),
[sse_hi] "+&r"(sse_hi), [sse_lo] "+&r"(sse_lo)
: [src_a] "r"(src_a), [src_b] "r"(src_b), [count] "r"(count),
[mask] "f"(mask)
: "memory");
return sse;
}
#endif
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

View File

@ -502,6 +502,18 @@ int YUY2ToI420(const uint8_t* src_yuy2,
} }
} }
#endif #endif
#if defined(HAS_YUY2TOYROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
YUY2ToYRow = YUY2ToYRow_Any_MMI;
YUY2ToUVRow = YUY2ToUVRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
YUY2ToYRow = YUY2ToYRow_MMI;
if (IS_ALIGNED(width, 16)) {
YUY2ToUVRow = YUY2ToUVRow_MMI;
}
}
}
#endif
for (y = 0; y < height - 1; y += 2) { for (y = 0; y < height - 1; y += 2) {
YUY2ToUVRow(src_yuy2, src_stride_yuy2, dst_u, dst_v, width); YUY2ToUVRow(src_yuy2, src_stride_yuy2, dst_u, dst_v, width);
@ -583,6 +595,16 @@ int UYVYToI420(const uint8_t* src_uyvy,
} }
} }
#endif #endif
#if defined(HAS_UYVYTOYROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
UYVYToYRow = UYVYToYRow_Any_MMI;
UYVYToUVRow = UYVYToUVRow_Any_MMI;
if (IS_ALIGNED(width, 16)) {
UYVYToYRow = UYVYToYRow_MMI;
UYVYToUVRow = UYVYToUVRow_MMI;
}
}
#endif
for (y = 0; y < height - 1; y += 2) { for (y = 0; y < height - 1; y += 2) {
UYVYToUVRow(src_uyvy, src_stride_uyvy, dst_u, dst_v, width); UYVYToUVRow(src_uyvy, src_stride_uyvy, dst_u, dst_v, width);
@ -679,6 +701,22 @@ int ARGBToI420(const uint8_t* src_argb,
} }
} }
#endif #endif
#if defined(HAS_ARGBTOYROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToYRow = ARGBToYRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_MMI;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToUVRow = ARGBToUVRow_Any_MMI;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_MMI;
}
}
#endif
for (y = 0; y < height - 1; y += 2) { for (y = 0; y < height - 1; y += 2) {
ARGBToUVRow(src_argb, src_stride_argb, dst_u, dst_v, width); ARGBToUVRow(src_argb, src_stride_argb, dst_u, dst_v, width);
@ -765,6 +803,22 @@ int BGRAToI420(const uint8_t* src_bgra,
} }
} }
#endif #endif
#if defined(HAS_BGRATOYROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
BGRAToYRow = BGRAToYRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
BGRAToYRow = BGRAToYRow_MMI;
}
}
#endif
#if defined(HAS_BGRATOUVROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
BGRAToUVRow = BGRAToUVRow_Any_MMI;
if (IS_ALIGNED(width, 16)) {
BGRAToUVRow = BGRAToUVRow_MMI;
}
}
#endif
for (y = 0; y < height - 1; y += 2) { for (y = 0; y < height - 1; y += 2) {
BGRAToUVRow(src_bgra, src_stride_bgra, dst_u, dst_v, width); BGRAToUVRow(src_bgra, src_stride_bgra, dst_u, dst_v, width);
@ -851,6 +905,22 @@ int ABGRToI420(const uint8_t* src_abgr,
} }
} }
#endif #endif
#if defined(HAS_ABGRTOYROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ABGRToYRow = ABGRToYRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
ABGRToYRow = ABGRToYRow_MMI;
}
}
#endif
#if defined(HAS_ABGRTOUVROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ABGRToUVRow = ABGRToUVRow_Any_MMI;
if (IS_ALIGNED(width, 16)) {
ABGRToUVRow = ABGRToUVRow_MMI;
}
}
#endif
for (y = 0; y < height - 1; y += 2) { for (y = 0; y < height - 1; y += 2) {
ABGRToUVRow(src_abgr, src_stride_abgr, dst_u, dst_v, width); ABGRToUVRow(src_abgr, src_stride_abgr, dst_u, dst_v, width);
@ -937,6 +1007,22 @@ int RGBAToI420(const uint8_t* src_rgba,
} }
} }
#endif #endif
#if defined(HAS_RGBATOYROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
RGBAToYRow = RGBAToYRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
RGBAToYRow = RGBAToYRow_MMI;
}
}
#endif
#if defined(HAS_RGBATOUVROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
RGBAToUVRow = RGBAToUVRow_Any_MMI;
if (IS_ALIGNED(width, 16)) {
RGBAToUVRow = RGBAToUVRow_MMI;
}
}
#endif
for (y = 0; y < height - 1; y += 2) { for (y = 0; y < height - 1; y += 2) {
RGBAToUVRow(src_rgba, src_stride_rgba, dst_u, dst_v, width); RGBAToUVRow(src_rgba, src_stride_rgba, dst_u, dst_v, width);
@ -967,7 +1053,7 @@ int RGB24ToI420(const uint8_t* src_rgb24,
int width, int width,
int height) { int height) {
int y; int y;
#if (defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA)) #if (defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA) || defined(HAS_RGB24TOYROW_MMI))
void (*RGB24ToUVRow)(const uint8_t* src_rgb24, int src_stride_rgb24, void (*RGB24ToUVRow)(const uint8_t* src_rgb24, int src_stride_rgb24,
uint8_t* dst_u, uint8_t* dst_v, int width) = uint8_t* dst_u, uint8_t* dst_v, int width) =
RGB24ToUVRow_C; RGB24ToUVRow_C;
@ -1013,6 +1099,17 @@ int RGB24ToI420(const uint8_t* src_rgb24,
RGB24ToUVRow = RGB24ToUVRow_MSA; RGB24ToUVRow = RGB24ToUVRow_MSA;
} }
} }
#elif defined(HAS_RGB24TOYROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
RGB24ToUVRow = RGB24ToUVRow_Any_MMI;
RGB24ToYRow = RGB24ToYRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
RGB24ToYRow = RGB24ToYRow_MMI;
if (IS_ALIGNED(width, 16)) {
RGB24ToUVRow = RGB24ToUVRow_MMI;
}
}
}
// Other platforms do intermediate conversion from RGB24 to ARGB. // Other platforms do intermediate conversion from RGB24 to ARGB.
#else #else
#if defined(HAS_RGB24TOARGBROW_SSSE3) #if defined(HAS_RGB24TOARGBROW_SSSE3)
@ -1046,14 +1143,14 @@ int RGB24ToI420(const uint8_t* src_rgb24,
#endif #endif
{ {
#if !(defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA)) #if !(defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA) || defined(HAS_RGB24TOYROW_MMI))
// Allocate 2 rows of ARGB. // Allocate 2 rows of ARGB.
const int kRowSize = (width * 4 + 31) & ~31; const int kRowSize = (width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2); align_buffer_64(row, kRowSize * 2);
#endif #endif
for (y = 0; y < height - 1; y += 2) { for (y = 0; y < height - 1; y += 2) {
#if (defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA)) #if (defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA) || defined(HAS_RGB24TOYROW_MMI))
RGB24ToUVRow(src_rgb24, src_stride_rgb24, dst_u, dst_v, width); RGB24ToUVRow(src_rgb24, src_stride_rgb24, dst_u, dst_v, width);
RGB24ToYRow(src_rgb24, dst_y, width); RGB24ToYRow(src_rgb24, dst_y, width);
RGB24ToYRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width); RGB24ToYRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width);
@ -1070,7 +1167,7 @@ int RGB24ToI420(const uint8_t* src_rgb24,
dst_v += dst_stride_v; dst_v += dst_stride_v;
} }
if (height & 1) { if (height & 1) {
#if (defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA)) #if (defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA) || defined(HAS_RGB24TOYROW_MMI))
RGB24ToUVRow(src_rgb24, 0, dst_u, dst_v, width); RGB24ToUVRow(src_rgb24, 0, dst_u, dst_v, width);
RGB24ToYRow(src_rgb24, dst_y, width); RGB24ToYRow(src_rgb24, dst_y, width);
#else #else
@ -1079,7 +1176,7 @@ int RGB24ToI420(const uint8_t* src_rgb24,
ARGBToYRow(row, dst_y, width); ARGBToYRow(row, dst_y, width);
#endif #endif
} }
#if !(defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA)) #if !(defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA) || defined(HAS_RGB24TOYROW_MMI))
free_aligned_buffer_64(row); free_aligned_buffer_64(row);
#endif #endif
} }
@ -1099,7 +1196,7 @@ int RAWToI420(const uint8_t* src_raw,
int width, int width,
int height) { int height) {
int y; int y;
#if (defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA)) #if (defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA) || defined(HAS_RAWTOYROW_MMI))
void (*RAWToUVRow)(const uint8_t* src_raw, int src_stride_raw, uint8_t* dst_u, void (*RAWToUVRow)(const uint8_t* src_raw, int src_stride_raw, uint8_t* dst_u,
uint8_t* dst_v, int width) = RAWToUVRow_C; uint8_t* dst_v, int width) = RAWToUVRow_C;
void (*RAWToYRow)(const uint8_t* src_raw, uint8_t* dst_y, int width) = void (*RAWToYRow)(const uint8_t* src_raw, uint8_t* dst_y, int width) =
@ -1144,6 +1241,17 @@ int RAWToI420(const uint8_t* src_raw,
RAWToUVRow = RAWToUVRow_MSA; RAWToUVRow = RAWToUVRow_MSA;
} }
} }
#elif defined(HAS_RAWTOYROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
RAWToUVRow = RAWToUVRow_Any_MMI;
RAWToYRow = RAWToYRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
RAWToYRow = RAWToYRow_MMI;
if (IS_ALIGNED(width, 16)) {
RAWToUVRow = RAWToUVRow_MMI;
}
}
}
// Other platforms do intermediate conversion from RAW to ARGB. // Other platforms do intermediate conversion from RAW to ARGB.
#else #else
#if defined(HAS_RAWTOARGBROW_SSSE3) #if defined(HAS_RAWTOARGBROW_SSSE3)
@ -1177,14 +1285,14 @@ int RAWToI420(const uint8_t* src_raw,
#endif #endif
{ {
#if !(defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA)) #if !(defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA) || defined(HAS_RAWTOYROW_MMI))
// Allocate 2 rows of ARGB. // Allocate 2 rows of ARGB.
const int kRowSize = (width * 4 + 31) & ~31; const int kRowSize = (width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2); align_buffer_64(row, kRowSize * 2);
#endif #endif
for (y = 0; y < height - 1; y += 2) { for (y = 0; y < height - 1; y += 2) {
#if (defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA)) #if (defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA) || defined(HAS_RAWTOYROW_MMI))
RAWToUVRow(src_raw, src_stride_raw, dst_u, dst_v, width); RAWToUVRow(src_raw, src_stride_raw, dst_u, dst_v, width);
RAWToYRow(src_raw, dst_y, width); RAWToYRow(src_raw, dst_y, width);
RAWToYRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width); RAWToYRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width);
@ -1201,7 +1309,7 @@ int RAWToI420(const uint8_t* src_raw,
dst_v += dst_stride_v; dst_v += dst_stride_v;
} }
if (height & 1) { if (height & 1) {
#if (defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA)) #if (defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA) || defined(HAS_RAWTOYROW_MMI))
RAWToUVRow(src_raw, 0, dst_u, dst_v, width); RAWToUVRow(src_raw, 0, dst_u, dst_v, width);
RAWToYRow(src_raw, dst_y, width); RAWToYRow(src_raw, dst_y, width);
#else #else
@ -1210,7 +1318,7 @@ int RAWToI420(const uint8_t* src_raw,
ARGBToYRow(row, dst_y, width); ARGBToYRow(row, dst_y, width);
#endif #endif
} }
#if !(defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA)) #if !(defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA) || defined(HAS_RAWTOYROW_MMI))
free_aligned_buffer_64(row); free_aligned_buffer_64(row);
#endif #endif
} }
@ -1230,7 +1338,7 @@ int RGB565ToI420(const uint8_t* src_rgb565,
int width, int width,
int height) { int height) {
int y; int y;
#if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA)) #if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || defined(HAS_RGB565TOYROW_MMI))
void (*RGB565ToUVRow)(const uint8_t* src_rgb565, int src_stride_rgb565, void (*RGB565ToUVRow)(const uint8_t* src_rgb565, int src_stride_rgb565,
uint8_t* dst_u, uint8_t* dst_v, int width) = uint8_t* dst_u, uint8_t* dst_v, int width) =
RGB565ToUVRow_C; RGB565ToUVRow_C;
@ -1276,6 +1384,17 @@ int RGB565ToI420(const uint8_t* src_rgb565,
RGB565ToUVRow = RGB565ToUVRow_MSA; RGB565ToUVRow = RGB565ToUVRow_MSA;
} }
} }
#elif defined(HAS_RGB565TOYROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
RGB565ToUVRow = RGB565ToUVRow_Any_MMI;
RGB565ToYRow = RGB565ToYRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
RGB565ToYRow = RGB565ToYRow_MMI;
if (IS_ALIGNED(width, 16)) {
RGB565ToUVRow = RGB565ToUVRow_MMI;
}
}
}
// Other platforms do intermediate conversion from RGB565 to ARGB. // Other platforms do intermediate conversion from RGB565 to ARGB.
#else #else
#if defined(HAS_RGB565TOARGBROW_SSE2) #if defined(HAS_RGB565TOARGBROW_SSE2)
@ -1316,13 +1435,13 @@ int RGB565ToI420(const uint8_t* src_rgb565,
#endif #endif
#endif #endif
{ {
#if !(defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA)) #if !(defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || defined(HAS_RGB565TOYROW_MMI))
// Allocate 2 rows of ARGB. // Allocate 2 rows of ARGB.
const int kRowSize = (width * 4 + 31) & ~31; const int kRowSize = (width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2); align_buffer_64(row, kRowSize * 2);
#endif #endif
for (y = 0; y < height - 1; y += 2) { for (y = 0; y < height - 1; y += 2) {
#if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA)) #if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || defined(HAS_RGB565TOYROW_MMI))
RGB565ToUVRow(src_rgb565, src_stride_rgb565, dst_u, dst_v, width); RGB565ToUVRow(src_rgb565, src_stride_rgb565, dst_u, dst_v, width);
RGB565ToYRow(src_rgb565, dst_y, width); RGB565ToYRow(src_rgb565, dst_y, width);
RGB565ToYRow(src_rgb565 + src_stride_rgb565, dst_y + dst_stride_y, width); RGB565ToYRow(src_rgb565 + src_stride_rgb565, dst_y + dst_stride_y, width);
@ -1339,7 +1458,7 @@ int RGB565ToI420(const uint8_t* src_rgb565,
dst_v += dst_stride_v; dst_v += dst_stride_v;
} }
if (height & 1) { if (height & 1) {
#if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA)) #if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || defined(HAS_RGB565TOYROW_MMI))
RGB565ToUVRow(src_rgb565, 0, dst_u, dst_v, width); RGB565ToUVRow(src_rgb565, 0, dst_u, dst_v, width);
RGB565ToYRow(src_rgb565, dst_y, width); RGB565ToYRow(src_rgb565, dst_y, width);
#else #else
@ -1348,7 +1467,7 @@ int RGB565ToI420(const uint8_t* src_rgb565,
ARGBToYRow(row, dst_y, width); ARGBToYRow(row, dst_y, width);
#endif #endif
} }
#if !(defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA)) #if !(defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || defined(HAS_RGB565TOYROW_MMI))
free_aligned_buffer_64(row); free_aligned_buffer_64(row);
#endif #endif
} }
@ -1368,7 +1487,7 @@ int ARGB1555ToI420(const uint8_t* src_argb1555,
int width, int width,
int height) { int height) {
int y; int y;
#if (defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA)) #if (defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA) || defined(HAS_ARGB1555TOYROW_MMI))
void (*ARGB1555ToUVRow)(const uint8_t* src_argb1555, int src_stride_argb1555, void (*ARGB1555ToUVRow)(const uint8_t* src_argb1555, int src_stride_argb1555,
uint8_t* dst_u, uint8_t* dst_v, int width) = uint8_t* dst_u, uint8_t* dst_v, int width) =
ARGB1555ToUVRow_C; ARGB1555ToUVRow_C;
@ -1415,6 +1534,17 @@ int ARGB1555ToI420(const uint8_t* src_argb1555,
ARGB1555ToUVRow = ARGB1555ToUVRow_MSA; ARGB1555ToUVRow = ARGB1555ToUVRow_MSA;
} }
} }
#elif defined(HAS_ARGB1555TOYROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGB1555ToUVRow = ARGB1555ToUVRow_Any_MMI;
ARGB1555ToYRow = ARGB1555ToYRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
ARGB1555ToYRow = ARGB1555ToYRow_MMI;
if (IS_ALIGNED(width, 16)) {
ARGB1555ToUVRow = ARGB1555ToUVRow_MMI;
}
}
}
// Other platforms do intermediate conversion from ARGB1555 to ARGB. // Other platforms do intermediate conversion from ARGB1555 to ARGB.
#else #else
#if defined(HAS_ARGB1555TOARGBROW_SSE2) #if defined(HAS_ARGB1555TOARGBROW_SSE2)
@ -1455,14 +1585,14 @@ int ARGB1555ToI420(const uint8_t* src_argb1555,
#endif #endif
#endif #endif
{ {
#if !(defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA)) #if !(defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA) || defined(HAS_ARGB1555TOYROW_MMI))
// Allocate 2 rows of ARGB. // Allocate 2 rows of ARGB.
const int kRowSize = (width * 4 + 31) & ~31; const int kRowSize = (width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2); align_buffer_64(row, kRowSize * 2);
#endif #endif
for (y = 0; y < height - 1; y += 2) { for (y = 0; y < height - 1; y += 2) {
#if (defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA)) #if (defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA) || defined(HAS_ARGB1555TOYROW_MMI))
ARGB1555ToUVRow(src_argb1555, src_stride_argb1555, dst_u, dst_v, width); ARGB1555ToUVRow(src_argb1555, src_stride_argb1555, dst_u, dst_v, width);
ARGB1555ToYRow(src_argb1555, dst_y, width); ARGB1555ToYRow(src_argb1555, dst_y, width);
ARGB1555ToYRow(src_argb1555 + src_stride_argb1555, dst_y + dst_stride_y, ARGB1555ToYRow(src_argb1555 + src_stride_argb1555, dst_y + dst_stride_y,
@ -1481,7 +1611,7 @@ int ARGB1555ToI420(const uint8_t* src_argb1555,
dst_v += dst_stride_v; dst_v += dst_stride_v;
} }
if (height & 1) { if (height & 1) {
#if (defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA)) #if (defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA) || defined(HAS_ARGB1555TOYROW_MMI))
ARGB1555ToUVRow(src_argb1555, 0, dst_u, dst_v, width); ARGB1555ToUVRow(src_argb1555, 0, dst_u, dst_v, width);
ARGB1555ToYRow(src_argb1555, dst_y, width); ARGB1555ToYRow(src_argb1555, dst_y, width);
#else #else
@ -1490,7 +1620,7 @@ int ARGB1555ToI420(const uint8_t* src_argb1555,
ARGBToYRow(row, dst_y, width); ARGBToYRow(row, dst_y, width);
#endif #endif
} }
#if !(defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA)) #if !(defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA) || defined(HAS_ARGB1555TOYROW_MMI))
free_aligned_buffer_64(row); free_aligned_buffer_64(row);
#endif #endif
} }
@ -1510,7 +1640,7 @@ int ARGB4444ToI420(const uint8_t* src_argb4444,
int width, int width,
int height) { int height) {
int y; int y;
#if defined(HAS_ARGB4444TOYROW_NEON) #if (defined(HAS_ARGB4444TOYROW_NEON) || defined(HAS_ARGB4444TOYROW_MMI))
void (*ARGB4444ToUVRow)(const uint8_t* src_argb4444, int src_stride_argb4444, void (*ARGB4444ToUVRow)(const uint8_t* src_argb4444, int src_stride_argb4444,
uint8_t* dst_u, uint8_t* dst_v, int width) = uint8_t* dst_u, uint8_t* dst_v, int width) =
ARGB4444ToUVRow_C; ARGB4444ToUVRow_C;
@ -1548,6 +1678,17 @@ int ARGB4444ToI420(const uint8_t* src_argb4444,
} }
} }
} }
#elif defined(HAS_ARGB4444TOYROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGB4444ToUVRow = ARGB4444ToUVRow_Any_MMI;
ARGB4444ToYRow = ARGB4444ToYRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
ARGB4444ToYRow = ARGB4444ToYRow_MMI;
if (IS_ALIGNED(width, 16)) {
ARGB4444ToUVRow = ARGB4444ToUVRow_MMI;
}
}
}
// Other platforms do intermediate conversion from ARGB4444 to ARGB. // Other platforms do intermediate conversion from ARGB4444 to ARGB.
#else #else
#if defined(HAS_ARGB4444TOARGBROW_SSE2) #if defined(HAS_ARGB4444TOARGBROW_SSE2)
@ -1606,17 +1747,29 @@ int ARGB4444ToI420(const uint8_t* src_argb4444,
} }
} }
#endif #endif
#if defined(HAS_ARGBTOYROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToUVRow = ARGBToUVRow_Any_MMI;
ARGBToYRow = ARGBToYRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_MMI;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_MMI;
}
}
}
#endif
#endif #endif
{ {
#if !defined(HAS_ARGB4444TOYROW_NEON) #if !(defined(HAS_ARGB4444TOYROW_NEON) || defined(HAS_ARGB4444TOYROW_MMI))
// Allocate 2 rows of ARGB. // Allocate 2 rows of ARGB.
const int kRowSize = (width * 4 + 31) & ~31; const int kRowSize = (width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2); align_buffer_64(row, kRowSize * 2);
#endif #endif
for (y = 0; y < height - 1; y += 2) { for (y = 0; y < height - 1; y += 2) {
#if defined(HAS_ARGB4444TOYROW_NEON) #if (defined(HAS_ARGB4444TOYROW_NEON) || defined(HAS_ARGB4444TOYROW_MMI))
ARGB4444ToUVRow(src_argb4444, src_stride_argb4444, dst_u, dst_v, width); ARGB4444ToUVRow(src_argb4444, src_stride_argb4444, dst_u, dst_v, width);
ARGB4444ToYRow(src_argb4444, dst_y, width); ARGB4444ToYRow(src_argb4444, dst_y, width);
ARGB4444ToYRow(src_argb4444 + src_stride_argb4444, dst_y + dst_stride_y, ARGB4444ToYRow(src_argb4444 + src_stride_argb4444, dst_y + dst_stride_y,
@ -1635,7 +1788,7 @@ int ARGB4444ToI420(const uint8_t* src_argb4444,
dst_v += dst_stride_v; dst_v += dst_stride_v;
} }
if (height & 1) { if (height & 1) {
#if defined(HAS_ARGB4444TOYROW_NEON) #if (defined(HAS_ARGB4444TOYROW_NEON) || defined(HAS_ARGB4444TOYROW_MMI))
ARGB4444ToUVRow(src_argb4444, 0, dst_u, dst_v, width); ARGB4444ToUVRow(src_argb4444, 0, dst_u, dst_v, width);
ARGB4444ToYRow(src_argb4444, dst_y, width); ARGB4444ToYRow(src_argb4444, dst_y, width);
#else #else
@ -1644,7 +1797,7 @@ int ARGB4444ToI420(const uint8_t* src_argb4444,
ARGBToYRow(row, dst_y, width); ARGBToYRow(row, dst_y, width);
#endif #endif
} }
#if !defined(HAS_ARGB4444TOYROW_NEON) #if !(defined(HAS_ARGB4444TOYROW_NEON) || defined(HAS_ARGB4444TOYROW_MMI))
free_aligned_buffer_64(row); free_aligned_buffer_64(row);
#endif #endif
} }

64
source/convert_argb.cc Normal file → Executable file
View File

@ -885,6 +885,14 @@ static int I420AlphaToARGBMatrix(const uint8_t* src_y,
} }
} }
#endif #endif
#if defined(HAS_ARGBATTENUATEROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBAttenuateRow = ARGBAttenuateRow_Any_MMI;
if (IS_ALIGNED(width, 2)) {
ARGBAttenuateRow = ARGBAttenuateRow_MMI;
}
}
#endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
I422AlphaToARGBRow(src_y, src_u, src_v, src_a, dst_argb, yuvconstants, I422AlphaToARGBRow(src_y, src_u, src_v, src_a, dst_argb, yuvconstants,
@ -1004,6 +1012,14 @@ int I400ToARGB(const uint8_t* src_y,
} }
} }
#endif #endif
#if defined(HAS_I400TOARGBROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
I400ToARGBRow = I400ToARGBRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
I400ToARGBRow = I400ToARGBRow_MMI;
}
}
#endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
I400ToARGBRow(src_y, dst_argb, width); I400ToARGBRow(src_y, dst_argb, width);
@ -1070,6 +1086,14 @@ int J400ToARGB(const uint8_t* src_y,
J400ToARGBRow = J400ToARGBRow_MSA; J400ToARGBRow = J400ToARGBRow_MSA;
} }
} }
#endif
#if defined(HAS_J400TOARGBROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
J400ToARGBRow = J400ToARGBRow_Any_MMI;
if (IS_ALIGNED(width, 4)) {
J400ToARGBRow = J400ToARGBRow_MMI;
}
}
#endif #endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
J400ToARGBRow(src_y, dst_argb, width); J400ToARGBRow(src_y, dst_argb, width);
@ -1201,6 +1225,14 @@ int RGB24ToARGB(const uint8_t* src_rgb24,
} }
} }
#endif #endif
#if defined(HAS_RGB24TOARGBROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
RGB24ToARGBRow = RGB24ToARGBRow_Any_MMI;
if (IS_ALIGNED(width, 4)) {
RGB24ToARGBRow = RGB24ToARGBRow_MMI;
}
}
#endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
RGB24ToARGBRow(src_rgb24, dst_argb, width); RGB24ToARGBRow(src_rgb24, dst_argb, width);
@ -1260,6 +1292,14 @@ int RAWToARGB(const uint8_t* src_raw,
} }
} }
#endif #endif
#if defined(HAS_RAWTOARGBROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
RAWToARGBRow = RAWToARGBRow_Any_MMI;
if (IS_ALIGNED(width, 4)) {
RAWToARGBRow = RAWToARGBRow_MMI;
}
}
#endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
RAWToARGBRow(src_raw, dst_argb, width); RAWToARGBRow(src_raw, dst_argb, width);
@ -1327,6 +1367,14 @@ int RGB565ToARGB(const uint8_t* src_rgb565,
} }
} }
#endif #endif
#if defined(HAS_RGB565TOARGBROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
RGB565ToARGBRow = RGB565ToARGBRow_Any_MMI;
if (IS_ALIGNED(width, 4)) {
RGB565ToARGBRow = RGB565ToARGBRow_MMI;
}
}
#endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
RGB565ToARGBRow(src_rgb565, dst_argb, width); RGB565ToARGBRow(src_rgb565, dst_argb, width);
@ -1394,6 +1442,14 @@ int ARGB1555ToARGB(const uint8_t* src_argb1555,
} }
} }
#endif #endif
#if defined(HAS_ARGB1555TOARGBROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_MMI;
if (IS_ALIGNED(width, 4)) {
ARGB1555ToARGBRow = ARGB1555ToARGBRow_MMI;
}
}
#endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
ARGB1555ToARGBRow(src_argb1555, dst_argb, width); ARGB1555ToARGBRow(src_argb1555, dst_argb, width);
@ -1461,6 +1517,14 @@ int ARGB4444ToARGB(const uint8_t* src_argb4444,
} }
} }
#endif #endif
#if defined(HAS_ARGB4444TOARGBROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_MMI;
if (IS_ALIGNED(width, 4)) {
ARGB4444ToARGBRow = ARGB4444ToARGBRow_MMI;
}
}
#endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
ARGB4444ToARGBRow(src_argb4444, dst_argb, width); ARGB4444ToARGBRow(src_argb4444, dst_argb, width);

View File

@ -302,6 +302,14 @@ int I420ToYUY2(const uint8_t* src_y,
} }
} }
#endif #endif
#if defined(HAS_I422TOYUY2ROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
I422ToYUY2Row = I422ToYUY2Row_Any_MMI;
if (IS_ALIGNED(width, 8)) {
I422ToYUY2Row = I422ToYUY2Row_MMI;
}
}
#endif
for (y = 0; y < height - 1; y += 2) { for (y = 0; y < height - 1; y += 2) {
I422ToYUY2Row(src_y, src_u, src_v, dst_yuy2, width); I422ToYUY2Row(src_y, src_u, src_v, dst_yuy2, width);
@ -381,6 +389,14 @@ int I422ToUYVY(const uint8_t* src_y,
} }
} }
#endif #endif
#if defined(HAS_I422TOUYVYROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
I422ToUYVYRow = I422ToUYVYRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
I422ToUYVYRow = I422ToUYVYRow_MMI;
}
}
#endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
I422ToUYVYRow(src_y, src_u, src_v, dst_uyvy, width); I422ToUYVYRow(src_y, src_u, src_v, dst_uyvy, width);
@ -448,6 +464,14 @@ int I420ToUYVY(const uint8_t* src_y,
} }
} }
#endif #endif
#if defined(HAS_I422TOUYVYROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
I422ToUYVYRow = I422ToUYVYRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
I422ToUYVYRow = I422ToUYVYRow_MMI;
}
}
#endif
for (y = 0; y < height - 1; y += 2) { for (y = 0; y < height - 1; y += 2) {
I422ToUYVYRow(src_y, src_u, src_v, dst_uyvy, width); I422ToUYVYRow(src_y, src_u, src_v, dst_uyvy, width);

226
source/convert_from_argb.cc Normal file → Executable file
View File

@ -76,6 +76,14 @@ int ARGBToI444(const uint8_t* src_argb,
} }
} }
#endif #endif
#if defined(HAS_ARGBTOUV444ROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToUV444Row = ARGBToUV444Row_Any_MMI;
if (IS_ALIGNED(width, 8)) {
ARGBToUV444Row = ARGBToUV444Row_MMI;
}
}
#endif
#if defined(HAS_ARGBTOYROW_SSSE3) #if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) { if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYRow = ARGBToYRow_Any_SSSE3; ARGBToYRow = ARGBToYRow_Any_SSSE3;
@ -108,6 +116,14 @@ int ARGBToI444(const uint8_t* src_argb,
} }
} }
#endif #endif
#if defined(HAS_ARGBTOYROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToYRow = ARGBToYRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_MMI;
}
}
#endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
ARGBToUV444Row(src_argb, dst_u, dst_v, width); ARGBToUV444Row(src_argb, dst_u, dst_v, width);
@ -208,6 +224,23 @@ int ARGBToI422(const uint8_t* src_argb,
} }
#endif #endif
#if defined(HAS_ARGBTOYROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToYRow = ARGBToYRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_MMI;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToUVRow = ARGBToUVRow_Any_MMI;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_MMI;
}
}
#endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
ARGBToUVRow(src_argb, 0, dst_u, dst_v, width); ARGBToUVRow(src_argb, 0, dst_u, dst_v, width);
ARGBToYRow(src_argb, dst_y, width); ARGBToYRow(src_argb, dst_y, width);
@ -298,6 +331,22 @@ int ARGBToNV12(const uint8_t* src_argb,
} }
} }
#endif #endif
#if defined(HAS_ARGBTOYROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToYRow = ARGBToYRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_MMI;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToUVRow = ARGBToUVRow_Any_MMI;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_MMI;
}
}
#endif
#if defined(HAS_MERGEUVROW_SSE2) #if defined(HAS_MERGEUVROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) { if (TestCpuFlag(kCpuHasSSE2)) {
MergeUVRow_ = MergeUVRow_Any_SSE2; MergeUVRow_ = MergeUVRow_Any_SSE2;
@ -329,6 +378,14 @@ int ARGBToNV12(const uint8_t* src_argb,
MergeUVRow_ = MergeUVRow_MSA; MergeUVRow_ = MergeUVRow_MSA;
} }
} }
#endif
#if defined(HAS_MERGEUVROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
MergeUVRow_ = MergeUVRow_Any_MMI;
if (IS_ALIGNED(halfwidth, 8)) {
MergeUVRow_ = MergeUVRow_MMI;
}
}
#endif #endif
{ {
// Allocate a rows of uv. // Allocate a rows of uv.
@ -434,6 +491,23 @@ int ARGBToNV21(const uint8_t* src_argb,
} }
} }
#endif #endif
#if defined(HAS_ARGBTOYROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToYRow = ARGBToYRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_MMI;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToUVRow = ARGBToUVRow_Any_MMI;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_MMI;
}
}
#endif
#if defined(HAS_MERGEUVROW_SSE2) #if defined(HAS_MERGEUVROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) { if (TestCpuFlag(kCpuHasSSE2)) {
MergeUVRow_ = MergeUVRow_Any_SSE2; MergeUVRow_ = MergeUVRow_Any_SSE2;
@ -465,6 +539,14 @@ int ARGBToNV21(const uint8_t* src_argb,
MergeUVRow_ = MergeUVRow_MSA; MergeUVRow_ = MergeUVRow_MSA;
} }
} }
#endif
#if defined(HAS_MERGEUVROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
MergeUVRow_ = MergeUVRow_Any_MMI;
if (IS_ALIGNED(halfwidth, 8)) {
MergeUVRow_ = MergeUVRow_MMI;
}
}
#endif #endif
{ {
// Allocate a rows of uv. // Allocate a rows of uv.
@ -575,6 +657,22 @@ int ARGBToYUY2(const uint8_t* src_argb,
} }
} }
#endif #endif
#if defined(HAS_ARGBTOYROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToYRow = ARGBToYRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_MMI;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToUVRow = ARGBToUVRow_Any_MMI;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_MMI;
}
}
#endif
#if defined(HAS_I422TOYUY2ROW_SSE2) #if defined(HAS_I422TOYUY2ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) { if (TestCpuFlag(kCpuHasSSE2)) {
I422ToYUY2Row = I422ToYUY2Row_Any_SSE2; I422ToYUY2Row = I422ToYUY2Row_Any_SSE2;
@ -607,6 +705,14 @@ int ARGBToYUY2(const uint8_t* src_argb,
} }
} }
#endif #endif
#if defined(HAS_I422TOYUY2ROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
I422ToYUY2Row = I422ToYUY2Row_Any_MMI;
if (IS_ALIGNED(width, 8)) {
I422ToYUY2Row = I422ToYUY2Row_MMI;
}
}
#endif
{ {
// Allocate a rows of yuv. // Allocate a rows of yuv.
@ -712,6 +818,22 @@ int ARGBToUYVY(const uint8_t* src_argb,
} }
} }
#endif #endif
#if defined(HAS_ARGBTOYROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToYRow = ARGBToYRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_MMI;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToUVRow = ARGBToUVRow_Any_MMI;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_MMI;
}
}
#endif
#if defined(HAS_I422TOUYVYROW_SSE2) #if defined(HAS_I422TOUYVYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) { if (TestCpuFlag(kCpuHasSSE2)) {
I422ToUYVYRow = I422ToUYVYRow_Any_SSE2; I422ToUYVYRow = I422ToUYVYRow_Any_SSE2;
@ -744,6 +866,14 @@ int ARGBToUYVY(const uint8_t* src_argb,
} }
} }
#endif #endif
#if defined(HAS_I422TOUYVYROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
I422ToUYVYRow = I422ToUYVYRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
I422ToUYVYRow = I422ToUYVYRow_MMI;
}
}
#endif
{ {
// Allocate a rows of yuv. // Allocate a rows of yuv.
@ -821,6 +951,14 @@ int ARGBToI400(const uint8_t* src_argb,
} }
} }
#endif #endif
#if defined(HAS_ARGBTOYROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToYRow = ARGBToYRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_MMI;
}
}
#endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
ARGBToYRow(src_argb, dst_y, width); ARGBToYRow(src_argb, dst_y, width);
@ -911,6 +1049,14 @@ int ARGBToRGB24(const uint8_t* src_argb,
} }
} }
#endif #endif
#if defined(HAS_ARGBTORGB24ROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToRGB24Row = ARGBToRGB24Row_Any_MMI;
if (IS_ALIGNED(width, 4)) {
ARGBToRGB24Row = ARGBToRGB24Row_MMI;
}
}
#endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
ARGBToRGB24Row(src_argb, dst_rgb24, width); ARGBToRGB24Row(src_argb, dst_rgb24, width);
@ -977,6 +1123,14 @@ int ARGBToRAW(const uint8_t* src_argb,
} }
} }
#endif #endif
#if defined(HAS_ARGBTORAWROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToRAWRow = ARGBToRAWRow_Any_MMI;
if (IS_ALIGNED(width, 4)) {
ARGBToRAWRow = ARGBToRAWRow_MMI;
}
}
#endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
ARGBToRAWRow(src_argb, dst_raw, width); ARGBToRAWRow(src_argb, dst_raw, width);
@ -1047,6 +1201,14 @@ int ARGBToRGB565Dither(const uint8_t* src_argb,
} }
} }
#endif #endif
#if defined(HAS_ARGBTORGB565DITHERROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_MMI;
if (IS_ALIGNED(width, 4)) {
ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_MMI;
}
}
#endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
ARGBToRGB565DitherRow(src_argb, dst_rgb565, ARGBToRGB565DitherRow(src_argb, dst_rgb565,
@ -1116,6 +1278,14 @@ int ARGBToRGB565(const uint8_t* src_argb,
} }
} }
#endif #endif
#if defined(HAS_ARGBTORGB565ROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToRGB565Row = ARGBToRGB565Row_Any_MMI;
if (IS_ALIGNED(width, 4)) {
ARGBToRGB565Row = ARGBToRGB565Row_MMI;
}
}
#endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
ARGBToRGB565Row(src_argb, dst_rgb565, width); ARGBToRGB565Row(src_argb, dst_rgb565, width);
@ -1182,6 +1352,14 @@ int ARGBToARGB1555(const uint8_t* src_argb,
} }
} }
#endif #endif
#if defined(HAS_ARGBTOARGB1555ROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToARGB1555Row = ARGBToARGB1555Row_Any_MMI;
if (IS_ALIGNED(width, 4)) {
ARGBToARGB1555Row = ARGBToARGB1555Row_MMI;
}
}
#endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
ARGBToARGB1555Row(src_argb, dst_argb1555, width); ARGBToARGB1555Row(src_argb, dst_argb1555, width);
@ -1248,6 +1426,14 @@ int ARGBToARGB4444(const uint8_t* src_argb,
} }
} }
#endif #endif
#if defined(HAS_ARGBTOARGB4444ROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToARGB4444Row = ARGBToARGB4444Row_Any_MMI;
if (IS_ALIGNED(width, 4)) {
ARGBToARGB4444Row = ARGBToARGB4444Row_MMI;
}
}
#endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
ARGBToARGB4444Row(src_argb, dst_argb4444, width); ARGBToARGB4444Row(src_argb, dst_argb4444, width);
@ -1424,6 +1610,14 @@ int ARGBToJ420(const uint8_t* src_argb,
} }
} }
#endif #endif
#if defined(HAS_ARGBTOYJROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToYJRow = ARGBToYJRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
ARGBToYJRow = ARGBToYJRow_MMI;
}
}
#endif
#if defined(HAS_ARGBTOUVJROW_MSA) #if defined(HAS_ARGBTOUVJROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) { if (TestCpuFlag(kCpuHasMSA)) {
ARGBToUVJRow = ARGBToUVJRow_Any_MSA; ARGBToUVJRow = ARGBToUVJRow_Any_MSA;
@ -1432,6 +1626,14 @@ int ARGBToJ420(const uint8_t* src_argb,
} }
} }
#endif #endif
#if defined(HAS_ARGBTOUVJROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToUVJRow = ARGBToUVJRow_Any_MMI;
if (IS_ALIGNED(width, 16)) {
ARGBToUVJRow = ARGBToUVJRow_MMI;
}
}
#endif
for (y = 0; y < height - 1; y += 2) { for (y = 0; y < height - 1; y += 2) {
ARGBToUVJRow(src_argb, src_stride_argb, dst_u, dst_v, width); ARGBToUVJRow(src_argb, src_stride_argb, dst_u, dst_v, width);
@ -1525,6 +1727,14 @@ int ARGBToJ422(const uint8_t* src_argb,
} }
} }
#endif #endif
#if defined(HAS_ARGBTOYJROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToYJRow = ARGBToYJRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
ARGBToYJRow = ARGBToYJRow_MMI;
}
}
#endif
#if defined(HAS_ARGBTOUVJROW_MSA) #if defined(HAS_ARGBTOUVJROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) { if (TestCpuFlag(kCpuHasMSA)) {
ARGBToUVJRow = ARGBToUVJRow_Any_MSA; ARGBToUVJRow = ARGBToUVJRow_Any_MSA;
@ -1533,6 +1743,14 @@ int ARGBToJ422(const uint8_t* src_argb,
} }
} }
#endif #endif
#if defined(HAS_ARGBTOUVJROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToUVJRow = ARGBToUVJRow_Any_MMI;
if (IS_ALIGNED(width, 16)) {
ARGBToUVJRow = ARGBToUVJRow_MMI;
}
}
#endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
ARGBToUVJRow(src_argb, 0, dst_u, dst_v, width); ARGBToUVJRow(src_argb, 0, dst_u, dst_v, width);
@ -1602,6 +1820,14 @@ int ARGBToJ400(const uint8_t* src_argb,
} }
} }
#endif #endif
#if defined(HAS_ARGBTOYJROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToYJRow = ARGBToYJRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
ARGBToYJRow = ARGBToYJRow_MMI;
}
}
#endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
ARGBToYJRow(src_argb, dst_yj, width); ARGBToYJRow(src_argb, dst_yj, width);

View File

@ -173,6 +173,9 @@ LIBYUV_API SAFEBUFFERS int MipsCpuCaps(const char* cpuinfo_name,
if (strcmp(ase, " msa") == 0) { if (strcmp(ase, " msa") == 0) {
return kCpuHasMSA; return kCpuHasMSA;
} }
if (strcmp(ase, " mmi") == 0) {
return kCpuHasMMI;
}
return 0; return 0;
} }
while (fgets(cpuinfo_line, sizeof(cpuinfo_line) - 1, f)) { while (fgets(cpuinfo_line, sizeof(cpuinfo_line) - 1, f)) {
@ -185,6 +188,15 @@ LIBYUV_API SAFEBUFFERS int MipsCpuCaps(const char* cpuinfo_name,
} }
return 0; return 0;
} }
} else if(memcmp(cpuinfo_line, "cpu model", 9) == 0) {
char* p = strstr(cpuinfo_line, "Loongson-3");
if (p) {
fclose(f);
if (strcmp(ase, " mmi") == 0) {
return kCpuHasMMI;
}
return 0;
}
} }
} }
fclose(f); fclose(f);
@ -232,6 +244,8 @@ static SAFEBUFFERS int GetCpuFlags(void) {
#if defined(__mips__) && defined(__linux__) #if defined(__mips__) && defined(__linux__)
#if defined(__mips_msa) #if defined(__mips_msa)
cpu_info = MipsCpuCaps("/proc/cpuinfo", " msa"); cpu_info = MipsCpuCaps("/proc/cpuinfo", " msa");
#elif defined(_MIPS_ARCH_LOONGSON3A)
cpu_info = MipsCpuCaps("/proc/cpuinfo", " mmi");
#endif #endif
cpu_info |= kCpuHasMIPS; cpu_info |= kCpuHasMIPS;
#endif #endif

296
source/planar_functions.cc Normal file → Executable file
View File

@ -410,6 +410,14 @@ void SplitUVPlane(const uint8_t* src_uv,
} }
} }
#endif #endif
#if defined(HAS_SPLITUVROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
SplitUVRow = SplitUVRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
SplitUVRow = SplitUVRow_MMI;
}
}
#endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
// Copy a row of UV. // Copy a row of UV.
@ -478,6 +486,14 @@ void MergeUVPlane(const uint8_t* src_u,
} }
} }
#endif #endif
#if defined(HAS_MERGEUVROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
MergeUVRow = MergeUVRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
MergeUVRow = MergeUVRow_MMI;
}
}
#endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
// Merge a row of U and V into a row of UV. // Merge a row of U and V into a row of UV.
@ -537,6 +553,14 @@ void SplitRGBPlane(const uint8_t* src_rgb,
} }
} }
#endif #endif
#if defined(HAS_SPLITRGBROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
SplitRGBRow = SplitRGBRow_Any_MMI;
if (IS_ALIGNED(width, 4)) {
SplitRGBRow = SplitRGBRow_MMI;
}
}
#endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
// Copy a row of RGB. // Copy a row of RGB.
@ -593,6 +617,14 @@ void MergeRGBPlane(const uint8_t* src_r,
} }
} }
#endif #endif
#if defined(HAS_MERGERGBROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
MergeRGBRow = MergeRGBRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
MergeRGBRow = MergeRGBRow_MMI;
}
}
#endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
// Merge a row of U and V into a row of RGB. // Merge a row of U and V into a row of RGB.
@ -651,6 +683,14 @@ void MirrorPlane(const uint8_t* src_y,
} }
} }
#endif #endif
#if defined(HAS_MIRRORROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
MirrorRow = MirrorRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
MirrorRow = MirrorRow_MMI;
}
}
#endif
// Mirror plane // Mirror plane
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
@ -734,6 +774,16 @@ int YUY2ToI422(const uint8_t* src_yuy2,
} }
} }
#endif #endif
#if defined(HAS_YUY2TOYROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
YUY2ToYRow = YUY2ToYRow_Any_MMI;
YUY2ToUV422Row = YUY2ToUV422Row_Any_MMI;
if (IS_ALIGNED(width, 8)) {
YUY2ToYRow = YUY2ToYRow_MMI;
YUY2ToUV422Row = YUY2ToUV422Row_MMI;
}
}
#endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
YUY2ToUV422Row(src_yuy2, dst_u, dst_v, width); YUY2ToUV422Row(src_yuy2, dst_u, dst_v, width);
@ -820,6 +870,16 @@ int UYVYToI422(const uint8_t* src_uyvy,
} }
} }
#endif #endif
#if defined(HAS_UYVYTOYROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
UYVYToYRow = UYVYToYRow_Any_MMI;
UYVYToUV422Row = UYVYToUV422Row_Any_MMI;
if (IS_ALIGNED(width, 16)) {
UYVYToYRow = UYVYToYRow_MMI;
UYVYToUV422Row = UYVYToUV422Row_MMI;
}
}
#endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
UYVYToUV422Row(src_uyvy, dst_u, dst_v, width); UYVYToUV422Row(src_uyvy, dst_u, dst_v, width);
@ -890,6 +950,14 @@ int YUY2ToY(const uint8_t* src_yuy2,
} }
} }
#endif #endif
#if defined(HAS_YUY2TOYROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
YUY2ToYRow = YUY2ToYRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
YUY2ToYRow = YUY2ToYRow_MMI;
}
}
#endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
YUY2ToYRow(src_yuy2, dst_y, width); YUY2ToYRow(src_yuy2, dst_y, width);
@ -1015,6 +1083,14 @@ int ARGBMirror(const uint8_t* src_argb,
} }
} }
#endif #endif
#if defined(HAS_ARGBMIRRORROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBMirrorRow = ARGBMirrorRow_Any_MMI;
if (IS_ALIGNED(width, 2)) {
ARGBMirrorRow = ARGBMirrorRow_MMI;
}
}
#endif
// Mirror plane // Mirror plane
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
@ -1047,6 +1123,11 @@ ARGBBlendRow GetARGBBlend() {
if (TestCpuFlag(kCpuHasMSA)) { if (TestCpuFlag(kCpuHasMSA)) {
ARGBBlendRow = ARGBBlendRow_MSA; ARGBBlendRow = ARGBBlendRow_MSA;
} }
#endif
#if defined(HAS_ARGBBLENDROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBBlendRow = ARGBBlendRow_MMI;
}
#endif #endif
return ARGBBlendRow; return ARGBBlendRow;
} }
@ -1140,6 +1221,14 @@ int BlendPlane(const uint8_t* src_y0,
} }
} }
#endif #endif
#if defined(HAS_BLENDPLANEROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
BlendPlaneRow = BlendPlaneRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
BlendPlaneRow = BlendPlaneRow_MMI;
}
}
#endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
BlendPlaneRow(src_y0, src_y1, alpha, dst_y, width); BlendPlaneRow(src_y0, src_y1, alpha, dst_y, width);
@ -1215,6 +1304,14 @@ int I420Blend(const uint8_t* src_y0,
BlendPlaneRow = BlendPlaneRow_AVX2; BlendPlaneRow = BlendPlaneRow_AVX2;
} }
} }
#endif
#if defined(HAS_BLENDPLANEROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
BlendPlaneRow = BlendPlaneRow_Any_MMI;
if (IS_ALIGNED(halfwidth, 8)) {
BlendPlaneRow = BlendPlaneRow_MMI;
}
}
#endif #endif
if (!IS_ALIGNED(width, 2)) { if (!IS_ALIGNED(width, 2)) {
ScaleRowDown2 = ScaleRowDown2Box_Odd_C; ScaleRowDown2 = ScaleRowDown2Box_Odd_C;
@ -1252,6 +1349,17 @@ int I420Blend(const uint8_t* src_y0,
} }
} }
#endif #endif
#if defined(HAS_SCALEROWDOWN2_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ScaleRowDown2 = ScaleRowDown2Box_Odd_MMI;
if (IS_ALIGNED(width, 2)) {
ScaleRowDown2 = ScaleRowDown2Box_Any_MMI;
if (IS_ALIGNED(halfwidth, 8)) {
ScaleRowDown2 = ScaleRowDown2Box_MMI;
}
}
}
#endif
// Row buffer for intermediate alpha pixels. // Row buffer for intermediate alpha pixels.
align_buffer_64(halfalpha, halfwidth); align_buffer_64(halfalpha, halfwidth);
@ -1337,6 +1445,14 @@ int ARGBMultiply(const uint8_t* src_argb0,
} }
} }
#endif #endif
#if defined(HAS_ARGBMULTIPLYROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBMultiplyRow = ARGBMultiplyRow_Any_MMI;
if (IS_ALIGNED(width, 2)) {
ARGBMultiplyRow = ARGBMultiplyRow_MMI;
}
}
#endif
// Multiply plane // Multiply plane
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
@ -1414,6 +1530,14 @@ int ARGBAdd(const uint8_t* src_argb0,
} }
} }
#endif #endif
#if defined(HAS_ARGBADDROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBAddRow = ARGBAddRow_Any_MMI;
if (IS_ALIGNED(width, 2)) {
ARGBAddRow = ARGBAddRow_MMI;
}
}
#endif
// Add plane // Add plane
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
@ -1486,6 +1610,14 @@ int ARGBSubtract(const uint8_t* src_argb0,
} }
} }
#endif #endif
#if defined(HAS_ARGBSUBTRACTROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBSubtractRow = ARGBSubtractRow_Any_MMI;
if (IS_ALIGNED(width, 2)) {
ARGBSubtractRow = ARGBSubtractRow_MMI;
}
}
#endif
// Subtract plane // Subtract plane
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
@ -1718,6 +1850,14 @@ int RAWToRGB24(const uint8_t* src_raw,
} }
} }
#endif #endif
#if defined(HAS_RAWTORGB24ROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
RAWToRGB24Row = RAWToRGB24Row_Any_MMI;
if (IS_ALIGNED(width, 4)) {
RAWToRGB24Row = RAWToRGB24Row_MMI;
}
}
#endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
RAWToRGB24Row(src_raw, dst_rgb24, width); RAWToRGB24Row(src_raw, dst_rgb24, width);
@ -1939,6 +2079,14 @@ int ARGBAttenuate(const uint8_t* src_argb,
} }
} }
#endif #endif
#if defined(HAS_ARGBATTENUATEROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBAttenuateRow = ARGBAttenuateRow_Any_MMI;
if (IS_ALIGNED(width, 2)) {
ARGBAttenuateRow = ARGBAttenuateRow_MMI;
}
}
#endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
ARGBAttenuateRow(src_argb, dst_argb, width); ARGBAttenuateRow(src_argb, dst_argb, width);
@ -2039,6 +2187,11 @@ int ARGBGrayTo(const uint8_t* src_argb,
ARGBGrayRow = ARGBGrayRow_MSA; ARGBGrayRow = ARGBGrayRow_MSA;
} }
#endif #endif
#if defined(HAS_ARGBGRAYROW_MMI)
if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(width, 2)) {
ARGBGrayRow = ARGBGrayRow_MMI;
}
#endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
ARGBGrayRow(src_argb, dst_argb, width); ARGBGrayRow(src_argb, dst_argb, width);
@ -2084,6 +2237,11 @@ int ARGBGray(uint8_t* dst_argb,
ARGBGrayRow = ARGBGrayRow_MSA; ARGBGrayRow = ARGBGrayRow_MSA;
} }
#endif #endif
#if defined(HAS_ARGBGRAYROW_MMI)
if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(width, 2)) {
ARGBGrayRow = ARGBGrayRow_MMI;
}
#endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
ARGBGrayRow(dst, dst, width); ARGBGrayRow(dst, dst, width);
@ -2127,6 +2285,11 @@ int ARGBSepia(uint8_t* dst_argb,
ARGBSepiaRow = ARGBSepiaRow_MSA; ARGBSepiaRow = ARGBSepiaRow_MSA;
} }
#endif #endif
#if defined(HAS_ARGBSEPIAROW_MMI)
if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(width, 2)) {
ARGBSepiaRow = ARGBSepiaRow_MMI;
}
#endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
ARGBSepiaRow(dst, width); ARGBSepiaRow(dst, width);
@ -2177,6 +2340,11 @@ int ARGBColorMatrix(const uint8_t* src_argb,
if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 8)) { if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 8)) {
ARGBColorMatrixRow = ARGBColorMatrixRow_MSA; ARGBColorMatrixRow = ARGBColorMatrixRow_MSA;
} }
#endif
#if defined(HAS_ARGBCOLORMATRIXROW_MMI)
if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(width, 2)) {
ARGBColorMatrixRow = ARGBColorMatrixRow_MMI;
}
#endif #endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
ARGBColorMatrixRow(src_argb, dst_argb, matrix_argb, width); ARGBColorMatrixRow(src_argb, dst_argb, matrix_argb, width);
@ -2372,6 +2540,12 @@ int ARGBComputeCumulativeSum(const uint8_t* src_argb,
ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2; ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
} }
#endif #endif
#if defined(HAS_CUMULATIVESUMTOAVERAGEROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ComputeCumulativeSumRow = ComputeCumulativeSumRow_MMI;
}
#endif
memset(dst_cumsum, 0, width * sizeof(dst_cumsum[0]) * 4); // 4 int per pixel. memset(dst_cumsum, 0, width * sizeof(dst_cumsum[0]) * 4); // 4 int per pixel.
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
ComputeCumulativeSumRow(src_argb, dst_cumsum, previous_cumsum, width); ComputeCumulativeSumRow(src_argb, dst_cumsum, previous_cumsum, width);
@ -2429,6 +2603,11 @@ int ARGBBlur(const uint8_t* src_argb,
ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2; ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
CumulativeSumToAverageRow = CumulativeSumToAverageRow_SSE2; CumulativeSumToAverageRow = CumulativeSumToAverageRow_SSE2;
} }
#endif
#if defined(HAS_CUMULATIVESUMTOAVERAGEROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ComputeCumulativeSumRow = ComputeCumulativeSumRow_MMI;
}
#endif #endif
// Compute enough CumulativeSum for first row to be blurred. After this // Compute enough CumulativeSum for first row to be blurred. After this
// one row of CumulativeSum is updated at a time. // one row of CumulativeSum is updated at a time.
@ -2536,6 +2715,11 @@ int ARGBShade(const uint8_t* src_argb,
ARGBShadeRow = ARGBShadeRow_MSA; ARGBShadeRow = ARGBShadeRow_MSA;
} }
#endif #endif
#if defined(HAS_ARGBSHADEROW_MMI)
if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(width, 2)) {
ARGBShadeRow = ARGBShadeRow_MMI;
}
#endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
ARGBShadeRow(src_argb, dst_argb, width, value); ARGBShadeRow(src_argb, dst_argb, width, value);
@ -2607,6 +2791,14 @@ int InterpolatePlane(const uint8_t* src0,
} }
} }
#endif #endif
#if defined(HAS_INTERPOLATEROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
InterpolateRow = InterpolateRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
InterpolateRow = InterpolateRow_MMI;
}
}
#endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
InterpolateRow(dst, src0, src1 - src0, width, interpolation); InterpolateRow(dst, src0, src1 - src0, width, interpolation);
@ -2730,6 +2922,14 @@ int ARGBShuffle(const uint8_t* src_bgra,
} }
} }
#endif #endif
#if defined(HAS_ARGBSHUFFLEROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBShuffleRow = ARGBShuffleRow_Any_MMI;
if (IS_ALIGNED(width, 2)) {
ARGBShuffleRow = ARGBShuffleRow_MMI;
}
}
#endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
ARGBShuffleRow(src_bgra, dst_argb, shuffler, width); ARGBShuffleRow(src_bgra, dst_argb, shuffler, width);
@ -2801,6 +3001,14 @@ static int ARGBSobelize(const uint8_t* src_argb,
} }
} }
#endif #endif
#if defined(HAS_ARGBTOYJROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToYJRow = ARGBToYJRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
ARGBToYJRow = ARGBToYJRow_MMI;
}
}
#endif
#if defined(HAS_SOBELYROW_SSE2) #if defined(HAS_SOBELYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) { if (TestCpuFlag(kCpuHasSSE2)) {
@ -2817,6 +3025,11 @@ static int ARGBSobelize(const uint8_t* src_argb,
SobelYRow = SobelYRow_MSA; SobelYRow = SobelYRow_MSA;
} }
#endif #endif
#if defined(HAS_SOBELYROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
SobelYRow = SobelYRow_MMI;
}
#endif
#if defined(HAS_SOBELXROW_SSE2) #if defined(HAS_SOBELXROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) { if (TestCpuFlag(kCpuHasSSE2)) {
SobelXRow = SobelXRow_SSE2; SobelXRow = SobelXRow_SSE2;
@ -2831,6 +3044,11 @@ static int ARGBSobelize(const uint8_t* src_argb,
if (TestCpuFlag(kCpuHasMSA)) { if (TestCpuFlag(kCpuHasMSA)) {
SobelXRow = SobelXRow_MSA; SobelXRow = SobelXRow_MSA;
} }
#endif
#if defined(HAS_SOBELXROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
SobelXRow = SobelXRow_MMI;
}
#endif #endif
{ {
// 3 rows with edges before/after. // 3 rows with edges before/after.
@ -2913,6 +3131,14 @@ int ARGBSobel(const uint8_t* src_argb,
SobelRow = SobelRow_MSA; SobelRow = SobelRow_MSA;
} }
} }
#endif
#if defined(HAS_SOBELROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
SobelRow = SobelRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
SobelRow = SobelRow_MMI;
}
}
#endif #endif
return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb, return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
width, height, SobelRow); width, height, SobelRow);
@ -2951,6 +3177,14 @@ int ARGBSobelToPlane(const uint8_t* src_argb,
SobelToPlaneRow = SobelToPlaneRow_MSA; SobelToPlaneRow = SobelToPlaneRow_MSA;
} }
} }
#endif
#if defined(HAS_SOBELTOPLANEROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
SobelToPlaneRow = SobelToPlaneRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
SobelToPlaneRow = SobelToPlaneRow_MMI;
}
}
#endif #endif
return ARGBSobelize(src_argb, src_stride_argb, dst_y, dst_stride_y, width, return ARGBSobelize(src_argb, src_stride_argb, dst_y, dst_stride_y, width,
height, SobelToPlaneRow); height, SobelToPlaneRow);
@ -2990,6 +3224,14 @@ int ARGBSobelXY(const uint8_t* src_argb,
SobelXYRow = SobelXYRow_MSA; SobelXYRow = SobelXYRow_MSA;
} }
} }
#endif
#if defined(HAS_SOBELXYROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
SobelXYRow = SobelXYRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
SobelXYRow = SobelXYRow_MMI;
}
}
#endif #endif
return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb, return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
width, height, SobelXYRow); width, height, SobelXYRow);
@ -3228,6 +3470,14 @@ int ARGBCopyAlpha(const uint8_t* src_argb,
} }
} }
#endif #endif
#if defined(HAS_ARGBCOPYALPHAROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBCopyAlphaRow = ARGBCopyAlphaRow_Any_MMI;
if (IS_ALIGNED(width, 2)) {
ARGBCopyAlphaRow = ARGBCopyAlphaRow_MMI;
}
}
#endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
ARGBCopyAlphaRow(src_argb, dst_argb, width); ARGBCopyAlphaRow(src_argb, dst_argb, width);
@ -3286,6 +3536,12 @@ int ARGBExtractAlpha(const uint8_t* src_argb,
: ARGBExtractAlphaRow_Any_MSA; : ARGBExtractAlphaRow_Any_MSA;
} }
#endif #endif
#if defined(HAS_ARGBEXTRACTALPHAROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBExtractAlphaRow = IS_ALIGNED(width, 8) ? ARGBExtractAlphaRow_MMI
: ARGBExtractAlphaRow_Any_MMI;
}
#endif
for (int y = 0; y < height; ++y) { for (int y = 0; y < height; ++y) {
ARGBExtractAlphaRow(src_argb, dst_a, width); ARGBExtractAlphaRow(src_argb, dst_a, width);
@ -3337,6 +3593,14 @@ int ARGBCopyYToAlpha(const uint8_t* src_y,
} }
} }
#endif #endif
#if defined(HAS_ARGBCOPYYTOALPHAROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_MMI;
}
}
#endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
ARGBCopyYToAlphaRow(src_y, dst_argb, width); ARGBCopyYToAlphaRow(src_y, dst_argb, width);
@ -3406,6 +3670,14 @@ int YUY2ToNV12(const uint8_t* src_yuy2,
} }
} }
#endif #endif
#if defined(HAS_SPLITUVROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
SplitUVRow = SplitUVRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
SplitUVRow = SplitUVRow_MMI;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_SSSE3) #if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) { if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_SSSE3; InterpolateRow = InterpolateRow_Any_SSSE3;
@ -3438,6 +3710,14 @@ int YUY2ToNV12(const uint8_t* src_yuy2,
} }
} }
#endif #endif
#if defined(HAS_INTERPOLATEROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
InterpolateRow = InterpolateRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
InterpolateRow = InterpolateRow_MMI;
}
}
#endif
{ {
int awidth = halfwidth * 2; int awidth = halfwidth * 2;
@ -3522,6 +3802,14 @@ int UYVYToNV12(const uint8_t* src_uyvy,
} }
} }
#endif #endif
#if defined(HAS_SPLITUVROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
SplitUVRow = SplitUVRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
SplitUVRow = SplitUVRow_MMI;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_SSSE3) #if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) { if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_SSSE3; InterpolateRow = InterpolateRow_Any_SSSE3;
@ -3554,6 +3842,14 @@ int UYVYToNV12(const uint8_t* src_uyvy,
} }
} }
#endif #endif
#if defined(HAS_INTERPOLATEROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
InterpolateRow = InterpolateRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
InterpolateRow = InterpolateRow_MMI;
}
}
#endif
{ {
int awidth = halfwidth * 2; int awidth = halfwidth * 2;

31
source/rotate.cc Normal file → Executable file
View File

@ -49,6 +49,11 @@ void TransposePlane(const uint8_t* src,
} }
} }
#endif #endif
#if defined(HAS_TRANSPOSEWX8_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
TransposeWx8 = TransposeWx8_MMI;
}
#endif
#if defined(HAS_TRANSPOSEWX8_FAST_SSSE3) #if defined(HAS_TRANSPOSEWX8_FAST_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) { if (TestCpuFlag(kCpuHasSSSE3)) {
TransposeWx8 = TransposeWx8_Fast_Any_SSSE3; TransposeWx8 = TransposeWx8_Fast_Any_SSSE3;
@ -166,6 +171,14 @@ void RotatePlane180(const uint8_t* src,
} }
} }
#endif #endif
#if defined(HAS_MIRRORROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
MirrorRow = MirrorRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
MirrorRow = MirrorRow_MMI;
}
}
#endif
#if defined(HAS_COPYROW_SSE2) #if defined(HAS_COPYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) { if (TestCpuFlag(kCpuHasSSE2)) {
CopyRow = IS_ALIGNED(width, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2; CopyRow = IS_ALIGNED(width, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2;
@ -186,6 +199,11 @@ void RotatePlane180(const uint8_t* src,
CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON; CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON;
} }
#endif #endif
#if defined(HAS_COPYROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
CopyRow = IS_ALIGNED(width, 8) ? CopyRow_MMI : CopyRow_Any_MMI;
}
#endif
// Odd height will harmlessly mirror the middle row twice. // Odd height will harmlessly mirror the middle row twice.
for (y = 0; y < half_height; ++y) { for (y = 0; y < half_height; ++y) {
@ -232,6 +250,14 @@ void TransposeUV(const uint8_t* src,
} }
} }
#endif #endif
#if defined(HAS_TRANSPOSEUVWX8_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
TransposeUVWx8 = TransposeUVWx8_Any_MMI;
if (IS_ALIGNED(width, 4)) {
TransposeUVWx8 = TransposeUVWx8_MMI;
}
}
#endif
#if defined(HAS_TRANSPOSEUVWX16_MSA) #if defined(HAS_TRANSPOSEUVWX16_MSA)
if (TestCpuFlag(kCpuHasMSA)) { if (TestCpuFlag(kCpuHasMSA)) {
TransposeUVWx16 = TransposeUVWx16_Any_MSA; TransposeUVWx16 = TransposeUVWx16_Any_MSA;
@ -331,6 +357,11 @@ void RotateUV180(const uint8_t* src,
MirrorUVRow = MirrorUVRow_MSA; MirrorUVRow = MirrorUVRow_MSA;
} }
#endif #endif
#if defined(HAS_MIRRORUVROW_MMI)
if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(width, 8)) {
MirrorUVRow = MirrorUVRow_MMI;
}
#endif
dst_a += dst_stride_a * (height - 1); dst_a += dst_stride_a * (height - 1);
dst_b += dst_stride_b * (height - 1); dst_b += dst_stride_b * (height - 1);

6
source/rotate_any.cc Normal file → Executable file
View File

@ -35,6 +35,9 @@ TANY(TransposeWx8_Any_NEON, TransposeWx8_NEON, 7)
#ifdef HAS_TRANSPOSEWX8_SSSE3 #ifdef HAS_TRANSPOSEWX8_SSSE3
TANY(TransposeWx8_Any_SSSE3, TransposeWx8_SSSE3, 7) TANY(TransposeWx8_Any_SSSE3, TransposeWx8_SSSE3, 7)
#endif #endif
#ifdef HAS_TRANSPOSEWX8_MMI
TANY(TransposeWx8_Any_MMI, TransposeWx8_MMI, 7)
#endif
#ifdef HAS_TRANSPOSEWX8_FAST_SSSE3 #ifdef HAS_TRANSPOSEWX8_FAST_SSSE3
TANY(TransposeWx8_Fast_Any_SSSE3, TransposeWx8_Fast_SSSE3, 15) TANY(TransposeWx8_Fast_Any_SSSE3, TransposeWx8_Fast_SSSE3, 15)
#endif #endif
@ -62,6 +65,9 @@ TUVANY(TransposeUVWx8_Any_NEON, TransposeUVWx8_NEON, 7)
#ifdef HAS_TRANSPOSEUVWX8_SSE2 #ifdef HAS_TRANSPOSEUVWX8_SSE2
TUVANY(TransposeUVWx8_Any_SSE2, TransposeUVWx8_SSE2, 7) TUVANY(TransposeUVWx8_Any_SSE2, TransposeUVWx8_SSE2, 7)
#endif #endif
#ifdef HAS_TRANSPOSEUVWX8_MMI
TUVANY(TransposeUVWx8_Any_MMI, TransposeUVWx8_MMI, 7)
#endif
#ifdef HAS_TRANSPOSEUVWX16_MSA #ifdef HAS_TRANSPOSEUVWX16_MSA
TUVANY(TransposeUVWx16_Any_MSA, TransposeUVWx16_MSA, 7) TUVANY(TransposeUVWx16_Any_MSA, TransposeUVWx16_MSA, 7)
#endif #endif

16
source/rotate_argb.cc Normal file → Executable file
View File

@ -56,6 +56,14 @@ static void ARGBTranspose(const uint8_t* src_argb,
} }
} }
#endif #endif
#if defined(HAS_SCALEARGBROWDOWNEVEN_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ScaleARGBRowDownEven = ScaleARGBRowDownEven_Any_MMI;
if (IS_ALIGNED(height, 4)) { // Width of dest.
ScaleARGBRowDownEven = ScaleARGBRowDownEven_MMI;
}
}
#endif
for (i = 0; i < width; ++i) { // column of source to row of dest. for (i = 0; i < width; ++i) { // column of source to row of dest.
ScaleARGBRowDownEven(src_argb, 0, src_pixel_step, dst_argb, height); ScaleARGBRowDownEven(src_argb, 0, src_pixel_step, dst_argb, height);
@ -142,6 +150,14 @@ void ARGBRotate180(const uint8_t* src_argb,
} }
} }
#endif #endif
#if defined(HAS_ARGBMIRRORROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBMirrorRow = ARGBMirrorRow_Any_MMI;
if (IS_ALIGNED(width, 2)) {
ARGBMirrorRow = ARGBMirrorRow_MMI;
}
}
#endif
#if defined(HAS_COPYROW_SSE2) #if defined(HAS_COPYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) { if (TestCpuFlag(kCpuHasSSE2)) {
CopyRow = IS_ALIGNED(width * 4, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2; CopyRow = IS_ALIGNED(width * 4, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2;

290
source/rotate_mmi.cc Normal file
View File

@ -0,0 +1,290 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/rotate_row.h"
#include "libyuv/row.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
#if !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
void TransposeWx8_MMI(const uint8_t* src,
int src_stride,
uint8_t* dst,
int dst_stride,
int width) {
uint64_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
uint64_t tmp7, tmp8, tmp9, tmp10, tmp11, tmp12, tmp13;
uint8_t* src_tmp = nullptr;
__asm__ volatile(
"1: \n\t"
"ldc1 %[tmp12], 0x00(%[src]) \n\t"
"dadd %[src_tmp], %[src], %[src_stride] \n\t"
"ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t"
/* tmp0 = (00 10 01 11 02 12 03 13) */
"punpcklbh %[tmp0], %[tmp12], %[tmp13] \n\t"
/* tmp1 = (04 14 05 15 06 16 07 17) */
"punpckhbh %[tmp1], %[tmp12], %[tmp13] \n\t"
"dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
"ldc1 %[tmp12], 0x00(%[src_tmp]) \n\t"
"dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
"ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t"
/* tmp2 = (20 30 21 31 22 32 23 33) */
"punpcklbh %[tmp2], %[tmp12], %[tmp13] \n\t"
/* tmp3 = (24 34 25 35 26 36 27 37) */
"punpckhbh %[tmp3], %[tmp12], %[tmp13] \n\t"
/* tmp4 = (00 10 20 30 01 11 21 31) */
"punpcklhw %[tmp4], %[tmp0], %[tmp2] \n\t"
/* tmp5 = (02 12 22 32 03 13 23 33) */
"punpckhhw %[tmp5], %[tmp0], %[tmp2] \n\t"
/* tmp6 = (04 14 24 34 05 15 25 35) */
"punpcklhw %[tmp6], %[tmp1], %[tmp3] \n\t"
/* tmp7 = (06 16 26 36 07 17 27 37) */
"punpckhhw %[tmp7], %[tmp1], %[tmp3] \n\t"
"dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
"ldc1 %[tmp12], 0x00(%[src_tmp]) \n\t"
"dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
"ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t"
/* tmp0 = (40 50 41 51 42 52 43 53) */
"punpcklbh %[tmp0], %[tmp12], %[tmp13] \n\t"
/* tmp1 = (44 54 45 55 46 56 47 57) */
"punpckhbh %[tmp1], %[tmp12], %[tmp13] \n\t"
"dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
"ldc1 %[tmp12], 0x00(%[src_tmp]) \n\t"
"dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
"ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t"
/* tmp2 = (60 70 61 71 62 72 63 73) */
"punpcklbh %[tmp2], %[tmp12], %[tmp13] \n\t"
/* tmp3 = (64 74 65 75 66 76 67 77) */
"punpckhbh %[tmp3], %[tmp12], %[tmp13] \n\t"
/* tmp8 = (40 50 60 70 41 51 61 71) */
"punpcklhw %[tmp8], %[tmp0], %[tmp2] \n\t"
/* tmp9 = (42 52 62 72 43 53 63 73) */
"punpckhhw %[tmp9], %[tmp0], %[tmp2] \n\t"
/* tmp10 = (44 54 64 74 45 55 65 75) */
"punpcklhw %[tmp10], %[tmp1], %[tmp3] \n\t"
/* tmp11 = (46 56 66 76 47 57 67 77) */
"punpckhhw %[tmp11], %[tmp1], %[tmp3] \n\t"
/* tmp0 = (00 10 20 30 40 50 60 70) */
"punpcklwd %[tmp0], %[tmp4], %[tmp8] \n\t"
/* tmp1 = (01 11 21 31 41 51 61 71) */
"punpckhwd %[tmp1], %[tmp4], %[tmp8] \n\t"
"gssdlc1 %[tmp0], 0x07(%[dst]) \n\t"
"gssdrc1 %[tmp0], 0x00(%[dst]) \n\t"
"dadd %[dst], %[dst], %[dst_stride] \n\t"
"gssdlc1 %[tmp1], 0x07(%[dst]) \n\t"
"gssdrc1 %[tmp1], 0x00(%[dst]) \n\t"
/* tmp0 = (02 12 22 32 42 52 62 72) */
"punpcklwd %[tmp0], %[tmp5], %[tmp9] \n\t"
/* tmp1 = (03 13 23 33 43 53 63 73) */
"punpckhwd %[tmp1], %[tmp5], %[tmp9] \n\t"
"dadd %[dst], %[dst], %[dst_stride] \n\t"
"gssdlc1 %[tmp0], 0x07(%[dst]) \n\t"
"gssdrc1 %[tmp0], 0x00(%[dst]) \n\t"
"dadd %[dst], %[dst], %[dst_stride] \n\t"
"gssdlc1 %[tmp1], 0x07(%[dst]) \n\t"
"gssdrc1 %[tmp1], 0x00(%[dst]) \n\t"
/* tmp0 = (04 14 24 34 44 54 64 74) */
"punpcklwd %[tmp0], %[tmp6], %[tmp10] \n\t"
/* tmp1 = (05 15 25 35 45 55 65 75) */
"punpckhwd %[tmp1], %[tmp6], %[tmp10] \n\t"
"dadd %[dst], %[dst], %[dst_stride] \n\t"
"gssdlc1 %[tmp0], 0x07(%[dst]) \n\t"
"gssdrc1 %[tmp0], 0x00(%[dst]) \n\t"
"dadd %[dst], %[dst], %[dst_stride] \n\t"
"gssdlc1 %[tmp1], 0x07(%[dst]) \n\t"
"gssdrc1 %[tmp1], 0x00(%[dst]) \n\t"
/* tmp0 = (06 16 26 36 46 56 66 76) */
"punpcklwd %[tmp0], %[tmp7], %[tmp11] \n\t"
/* tmp1 = (07 17 27 37 47 57 67 77) */
"punpckhwd %[tmp1], %[tmp7], %[tmp11] \n\t"
"dadd %[dst], %[dst], %[dst_stride] \n\t"
"gssdlc1 %[tmp0], 0x07(%[dst]) \n\t"
"gssdrc1 %[tmp0], 0x00(%[dst]) \n\t"
"dadd %[dst], %[dst], %[dst_stride] \n\t"
"gssdlc1 %[tmp1], 0x07(%[dst]) \n\t"
"gssdrc1 %[tmp1], 0x00(%[dst]) \n\t"
"dadd %[dst], %[dst], %[dst_stride] \n\t"
"daddi %[src], %[src], 0x08 \n\t"
"daddi %[width], %[width], -0x08 \n\t"
"bnez %[width], 1b \n\t"
: [tmp0] "=&f"(tmp0), [tmp1] "=&f"(tmp1), [tmp2] "=&f"(tmp2),
[tmp3] "=&f"(tmp3), [tmp4] "=&f"(tmp4), [tmp5] "=&f"(tmp5),
[tmp6] "=&f"(tmp6), [tmp7] "=&f"(tmp7), [tmp8] "=&f"(tmp8),
[tmp9] "=&f"(tmp9), [tmp10] "=&f"(tmp10), [tmp11] "=&f"(tmp11),
[tmp12] "=&f"(tmp12), [tmp13] "=&f"(tmp13), [dst] "+&r"(dst),
[src_tmp] "+&r"(src_tmp)
: [src] "r"(src), [width] "r"(width), [src_stride] "r"(src_stride),
[dst_stride] "r"(dst_stride)
: "memory");
}
void TransposeUVWx8_MMI(const uint8_t* src,
int src_stride,
uint8_t* dst_a,
int dst_stride_a,
uint8_t* dst_b,
int dst_stride_b,
int width) {
uint64_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
uint64_t tmp7, tmp8, tmp9, tmp10, tmp11, tmp12, tmp13;
uint8_t* src_tmp = nullptr;
__asm__ volatile(
"1: \n\t"
/* tmp12 = (u00 v00 u01 v01 u02 v02 u03 v03) */
"ldc1 %[tmp12], 0x00(%[src]) \n\t"
"dadd %[src_tmp], %[src], %[src_stride] \n\t"
/* tmp13 = (u10 v10 u11 v11 u12 v12 u13 v13) */
"ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t"
/* tmp0 = (u00 u10 v00 v10 u01 u11 v01 v11) */
"punpcklbh %[tmp0], %[tmp12], %[tmp13] \n\t"
/* tmp1 = (u02 u12 v02 v12 u03 u13 v03 v13) */
"punpckhbh %[tmp1], %[tmp12], %[tmp13] \n\t"
"dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
/* tmp12 = (u20 v20 u21 v21 u22 v22 u23 v23) */
"ldc1 %[tmp12], 0x00(%[src_tmp]) \n\t"
"dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
/* tmp13 = (u30 v30 u31 v31 u32 v32 u33 v33) */
"ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t"
/* tmp2 = (u20 u30 v20 v30 u21 u31 v21 v31) */
"punpcklbh %[tmp2], %[tmp12], %[tmp13] \n\t"
/* tmp3 = (u22 u32 v22 v32 u23 u33 v23 v33) */
"punpckhbh %[tmp3], %[tmp12], %[tmp13] \n\t"
/* tmp4 = (u00 u10 u20 u30 v00 v10 v20 v30) */
"punpcklhw %[tmp4], %[tmp0], %[tmp2] \n\t"
/* tmp5 = (u01 u11 u21 u31 v01 v11 v21 v31) */
"punpckhhw %[tmp5], %[tmp0], %[tmp2] \n\t"
/* tmp6 = (u02 u12 u22 u32 v02 v12 v22 v32) */
"punpcklhw %[tmp6], %[tmp1], %[tmp3] \n\t"
/* tmp7 = (u03 u13 u23 u33 v03 v13 v23 v33) */
"punpckhhw %[tmp7], %[tmp1], %[tmp3] \n\t"
"dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
/* tmp12 = (u40 v40 u41 v41 u42 v42 u43 v43) */
"ldc1 %[tmp12], 0x00(%[src_tmp]) \n\t"
/* tmp13 = (u50 v50 u51 v51 u52 v52 u53 v53) */
"dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
"ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t"
/* tmp0 = (u40 u50 v40 v50 u41 u51 v41 v51) */
"punpcklbh %[tmp0], %[tmp12], %[tmp13] \n\t"
/* tmp1 = (u42 u52 v42 v52 u43 u53 v43 v53) */
"punpckhbh %[tmp1], %[tmp12], %[tmp13] \n\t"
"dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
/* tmp12 = (u60 v60 u61 v61 u62 v62 u63 v63) */
"ldc1 %[tmp12], 0x00(%[src_tmp]) \n\t"
/* tmp13 = (u70 v70 u71 v71 u72 v72 u73 v73) */
"dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
"ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t"
/* tmp2 = (u60 u70 v60 v70 u61 u71 v61 v71) */
"punpcklbh %[tmp2], %[tmp12], %[tmp13] \n\t"
/* tmp3 = (u62 u72 v62 v72 u63 u73 v63 v73) */
"punpckhbh %[tmp3], %[tmp12], %[tmp13] \n\t"
/* tmp8 = (u40 u50 u60 u70 v40 v50 v60 v70) */
"punpcklhw %[tmp8], %[tmp0], %[tmp2] \n\t"
/* tmp9 = (u41 u51 u61 u71 v41 v51 v61 v71) */
"punpckhhw %[tmp9], %[tmp0], %[tmp2] \n\t"
/* tmp10 = (u42 u52 u62 u72 v42 v52 v62 v72) */
"punpcklhw %[tmp10], %[tmp1], %[tmp3] \n\t"
/* tmp11 = (u43 u53 u63 u73 v43 v53 v63 v73) */
"punpckhhw %[tmp11], %[tmp1], %[tmp3] \n\t"
/* tmp0 = (u00 u10 u20 u30 u40 u50 u60 u70) */
"punpcklwd %[tmp0], %[tmp4], %[tmp8] \n\t"
/* tmp1 = (v00 v10 v20 v30 v40 v50 v60 v70) */
"punpckhwd %[tmp1], %[tmp4], %[tmp8] \n\t"
"gssdlc1 %[tmp0], 0x07(%[dst_a]) \n\t"
"gssdrc1 %[tmp0], 0x00(%[dst_a]) \n\t"
"gssdlc1 %[tmp1], 0x07(%[dst_b]) \n\t"
"gssdrc1 %[tmp1], 0x00(%[dst_b]) \n\t"
/* tmp0 = (u01 u11 u21 u31 u41 u51 u61 u71) */
"punpcklwd %[tmp0], %[tmp5], %[tmp9] \n\t"
/* tmp1 = (v01 v11 v21 v31 v41 v51 v61 v71) */
"punpckhwd %[tmp1], %[tmp5], %[tmp9] \n\t"
"dadd %[dst_a], %[dst_a], %[dst_stride_a] \n\t"
"gssdlc1 %[tmp0], 0x07(%[dst_a]) \n\t"
"gssdrc1 %[tmp0], 0x00(%[dst_a]) \n\t"
"dadd %[dst_b], %[dst_b], %[dst_stride_b] \n\t"
"gssdlc1 %[tmp1], 0x07(%[dst_b]) \n\t"
"gssdrc1 %[tmp1], 0x00(%[dst_b]) \n\t"
/* tmp0 = (u02 u12 u22 u32 u42 u52 u62 u72) */
"punpcklwd %[tmp0], %[tmp6], %[tmp10] \n\t"
/* tmp1 = (v02 v12 v22 v32 v42 v52 v62 v72) */
"punpckhwd %[tmp1], %[tmp6], %[tmp10] \n\t"
"dadd %[dst_a], %[dst_a], %[dst_stride_a] \n\t"
"gssdlc1 %[tmp0], 0x07(%[dst_a]) \n\t"
"gssdrc1 %[tmp0], 0x00(%[dst_a]) \n\t"
"dadd %[dst_b], %[dst_b], %[dst_stride_b] \n\t"
"gssdlc1 %[tmp1], 0x07(%[dst_b]) \n\t"
"gssdrc1 %[tmp1], 0x00(%[dst_b]) \n\t"
/* tmp0 = (u03 u13 u23 u33 u43 u53 u63 u73) */
"punpcklwd %[tmp0], %[tmp7], %[tmp11] \n\t"
/* tmp1 = (v03 v13 v23 v33 v43 v53 v63 v73) */
"punpckhwd %[tmp1], %[tmp7], %[tmp11] \n\t"
"dadd %[dst_a], %[dst_a], %[dst_stride_a] \n\t"
"gssdlc1 %[tmp0], 0x07(%[dst_a]) \n\t"
"gssdrc1 %[tmp0], 0x00(%[dst_a]) \n\t"
"dadd %[dst_b], %[dst_b], %[dst_stride_b] \n\t"
"gssdlc1 %[tmp1], 0x07(%[dst_b]) \n\t"
"gssdrc1 %[tmp1], 0x00(%[dst_b]) \n\t"
"dadd %[dst_a], %[dst_a], %[dst_stride_a] \n\t"
"dadd %[dst_b], %[dst_b], %[dst_stride_b] \n\t"
"daddiu %[src], %[src], 0x08 \n\t"
"daddi %[width], %[width], -0x04 \n\t"
"bnez %[width], 1b \n\t"
: [tmp0] "=&f"(tmp0), [tmp1] "=&f"(tmp1), [tmp2] "=&f"(tmp2),
[tmp3] "=&f"(tmp3), [tmp4] "=&f"(tmp4), [tmp5] "=&f"(tmp5),
[tmp6] "=&f"(tmp6), [tmp7] "=&f"(tmp7), [tmp8] "=&f"(tmp8),
[tmp9] "=&f"(tmp9), [tmp10] "=&f"(tmp10), [tmp11] "=&f"(tmp11),
[tmp12] "=&f"(tmp12), [tmp13] "=&f"(tmp13), [dst_a] "+&r"(dst_a),
[dst_b] "+&r"(dst_b), [src_tmp] "+&r"(src_tmp)
: [src] "r"(src), [width] "r"(width), [dst_stride_a] "r"(dst_stride_a),
[dst_stride_b] "r"(dst_stride_b), [src_stride] "r"(src_stride)
: "memory");
}
#endif
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

176
source/row_any.cc Normal file → Executable file
View File

@ -92,6 +92,9 @@ ANY31(MergeRGBRow_Any_SSSE3, MergeRGBRow_SSSE3, 0, 0, 3, 15)
#ifdef HAS_MERGERGBROW_NEON #ifdef HAS_MERGERGBROW_NEON
ANY31(MergeRGBRow_Any_NEON, MergeRGBRow_NEON, 0, 0, 3, 15) ANY31(MergeRGBRow_Any_NEON, MergeRGBRow_NEON, 0, 0, 3, 15)
#endif #endif
#ifdef HAS_MERGERGBROW_MMI
ANY31(MergeRGBRow_Any_MMI, MergeRGBRow_MMI, 0, 0, 3, 7)
#endif
#ifdef HAS_I422TOYUY2ROW_SSE2 #ifdef HAS_I422TOYUY2ROW_SSE2
ANY31(I422ToYUY2Row_Any_SSE2, I422ToYUY2Row_SSE2, 1, 1, 4, 15) ANY31(I422ToYUY2Row_Any_SSE2, I422ToYUY2Row_SSE2, 1, 1, 4, 15)
ANY31(I422ToUYVYRow_Any_SSE2, I422ToUYVYRow_SSE2, 1, 1, 4, 15) ANY31(I422ToUYVYRow_Any_SSE2, I422ToUYVYRow_SSE2, 1, 1, 4, 15)
@ -106,18 +109,27 @@ ANY31(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, 1, 1, 4, 15)
#ifdef HAS_I422TOYUY2ROW_MSA #ifdef HAS_I422TOYUY2ROW_MSA
ANY31(I422ToYUY2Row_Any_MSA, I422ToYUY2Row_MSA, 1, 1, 4, 31) ANY31(I422ToYUY2Row_Any_MSA, I422ToYUY2Row_MSA, 1, 1, 4, 31)
#endif #endif
#ifdef HAS_I422TOYUY2ROW_MMI
ANY31(I422ToYUY2Row_Any_MMI, I422ToYUY2Row_MMI, 1, 1, 4, 7)
#endif
#ifdef HAS_I422TOUYVYROW_NEON #ifdef HAS_I422TOUYVYROW_NEON
ANY31(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, 1, 1, 4, 15) ANY31(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, 1, 1, 4, 15)
#endif #endif
#ifdef HAS_I422TOUYVYROW_MSA #ifdef HAS_I422TOUYVYROW_MSA
ANY31(I422ToUYVYRow_Any_MSA, I422ToUYVYRow_MSA, 1, 1, 4, 31) ANY31(I422ToUYVYRow_Any_MSA, I422ToUYVYRow_MSA, 1, 1, 4, 31)
#endif #endif
#ifdef HAS_I422TOUYVYROW_MMI
ANY31(I422ToUYVYRow_Any_MMI, I422ToUYVYRow_MMI, 1, 1, 4, 7)
#endif
#ifdef HAS_BLENDPLANEROW_AVX2 #ifdef HAS_BLENDPLANEROW_AVX2
ANY31(BlendPlaneRow_Any_AVX2, BlendPlaneRow_AVX2, 0, 0, 1, 31) ANY31(BlendPlaneRow_Any_AVX2, BlendPlaneRow_AVX2, 0, 0, 1, 31)
#endif #endif
#ifdef HAS_BLENDPLANEROW_SSSE3 #ifdef HAS_BLENDPLANEROW_SSSE3
ANY31(BlendPlaneRow_Any_SSSE3, BlendPlaneRow_SSSE3, 0, 0, 1, 7) ANY31(BlendPlaneRow_Any_SSSE3, BlendPlaneRow_SSSE3, 0, 0, 1, 7)
#endif #endif
#ifdef HAS_BLENDPLANEROW_MMI
ANY31(BlendPlaneRow_Any_MMI, BlendPlaneRow_MMI, 0, 0, 1, 7)
#endif
#undef ANY31 #undef ANY31
// Note that odd width replication includes 444 due to implementation // Note that odd width replication includes 444 due to implementation
@ -271,6 +283,9 @@ ANY21(MergeUVRow_Any_NEON, MergeUVRow_NEON, 0, 1, 1, 2, 15)
#ifdef HAS_MERGEUVROW_MSA #ifdef HAS_MERGEUVROW_MSA
ANY21(MergeUVRow_Any_MSA, MergeUVRow_MSA, 0, 1, 1, 2, 15) ANY21(MergeUVRow_Any_MSA, MergeUVRow_MSA, 0, 1, 1, 2, 15)
#endif #endif
#ifdef HAS_MERGEUVROW_MMI
ANY21(MergeUVRow_Any_MMI, MergeUVRow_MMI, 0, 1, 1, 2, 7)
#endif
// Math functions. // Math functions.
#ifdef HAS_ARGBMULTIPLYROW_SSE2 #ifdef HAS_ARGBMULTIPLYROW_SSE2
@ -303,12 +318,21 @@ ANY21(ARGBSubtractRow_Any_NEON, ARGBSubtractRow_NEON, 0, 4, 4, 4, 7)
#ifdef HAS_ARGBMULTIPLYROW_MSA #ifdef HAS_ARGBMULTIPLYROW_MSA
ANY21(ARGBMultiplyRow_Any_MSA, ARGBMultiplyRow_MSA, 0, 4, 4, 4, 3) ANY21(ARGBMultiplyRow_Any_MSA, ARGBMultiplyRow_MSA, 0, 4, 4, 4, 3)
#endif #endif
#ifdef HAS_ARGBMULTIPLYROW_MMI
ANY21(ARGBMultiplyRow_Any_MMI, ARGBMultiplyRow_MMI, 0, 4, 4, 4, 1)
#endif
#ifdef HAS_ARGBADDROW_MSA #ifdef HAS_ARGBADDROW_MSA
ANY21(ARGBAddRow_Any_MSA, ARGBAddRow_MSA, 0, 4, 4, 4, 7) ANY21(ARGBAddRow_Any_MSA, ARGBAddRow_MSA, 0, 4, 4, 4, 7)
#endif #endif
#ifdef HAS_ARGBADDROW_MMI
ANY21(ARGBAddRow_Any_MMI, ARGBAddRow_MMI, 0, 4, 4, 4, 1)
#endif
#ifdef HAS_ARGBSUBTRACTROW_MSA #ifdef HAS_ARGBSUBTRACTROW_MSA
ANY21(ARGBSubtractRow_Any_MSA, ARGBSubtractRow_MSA, 0, 4, 4, 4, 7) ANY21(ARGBSubtractRow_Any_MSA, ARGBSubtractRow_MSA, 0, 4, 4, 4, 7)
#endif #endif
#ifdef HAS_ARGBSUBTRACTROW_MMI
ANY21(ARGBSubtractRow_Any_MMI, ARGBSubtractRow_MMI, 0, 4, 4, 4, 1)
#endif
#ifdef HAS_SOBELROW_SSE2 #ifdef HAS_SOBELROW_SSE2
ANY21(SobelRow_Any_SSE2, SobelRow_SSE2, 0, 1, 1, 4, 15) ANY21(SobelRow_Any_SSE2, SobelRow_SSE2, 0, 1, 1, 4, 15)
#endif #endif
@ -318,6 +342,9 @@ ANY21(SobelRow_Any_NEON, SobelRow_NEON, 0, 1, 1, 4, 7)
#ifdef HAS_SOBELROW_MSA #ifdef HAS_SOBELROW_MSA
ANY21(SobelRow_Any_MSA, SobelRow_MSA, 0, 1, 1, 4, 15) ANY21(SobelRow_Any_MSA, SobelRow_MSA, 0, 1, 1, 4, 15)
#endif #endif
#ifdef HAS_SOBELROW_MMI
ANY21(SobelRow_Any_MMI, SobelRow_MMI, 0, 1, 1, 4, 7)
#endif
#ifdef HAS_SOBELTOPLANEROW_SSE2 #ifdef HAS_SOBELTOPLANEROW_SSE2
ANY21(SobelToPlaneRow_Any_SSE2, SobelToPlaneRow_SSE2, 0, 1, 1, 1, 15) ANY21(SobelToPlaneRow_Any_SSE2, SobelToPlaneRow_SSE2, 0, 1, 1, 1, 15)
#endif #endif
@ -327,6 +354,9 @@ ANY21(SobelToPlaneRow_Any_NEON, SobelToPlaneRow_NEON, 0, 1, 1, 1, 15)
#ifdef HAS_SOBELTOPLANEROW_MSA #ifdef HAS_SOBELTOPLANEROW_MSA
ANY21(SobelToPlaneRow_Any_MSA, SobelToPlaneRow_MSA, 0, 1, 1, 1, 31) ANY21(SobelToPlaneRow_Any_MSA, SobelToPlaneRow_MSA, 0, 1, 1, 1, 31)
#endif #endif
#ifdef HAS_SOBELTOPLANEROW_MMI
ANY21(SobelToPlaneRow_Any_MMI, SobelToPlaneRow_MMI, 0, 1, 1, 1, 7)
#endif
#ifdef HAS_SOBELXYROW_SSE2 #ifdef HAS_SOBELXYROW_SSE2
ANY21(SobelXYRow_Any_SSE2, SobelXYRow_SSE2, 0, 1, 1, 4, 15) ANY21(SobelXYRow_Any_SSE2, SobelXYRow_SSE2, 0, 1, 1, 4, 15)
#endif #endif
@ -336,6 +366,9 @@ ANY21(SobelXYRow_Any_NEON, SobelXYRow_NEON, 0, 1, 1, 4, 7)
#ifdef HAS_SOBELXYROW_MSA #ifdef HAS_SOBELXYROW_MSA
ANY21(SobelXYRow_Any_MSA, SobelXYRow_MSA, 0, 1, 1, 4, 15) ANY21(SobelXYRow_Any_MSA, SobelXYRow_MSA, 0, 1, 1, 4, 15)
#endif #endif
#ifdef HAS_SOBELXYROW_MMI
ANY21(SobelXYRow_Any_MMI, SobelXYRow_MMI, 0, 1, 1, 4, 7)
#endif
#undef ANY21 #undef ANY21
// Any 2 planes to 1 with yuvconstants // Any 2 planes to 1 with yuvconstants
@ -521,12 +554,24 @@ ANY11(ARGBToARGB4444Row_Any_MSA, ARGBToARGB4444Row_MSA, 0, 4, 2, 7)
ANY11(J400ToARGBRow_Any_MSA, J400ToARGBRow_MSA, 0, 1, 4, 15) ANY11(J400ToARGBRow_Any_MSA, J400ToARGBRow_MSA, 0, 1, 4, 15)
ANY11(I400ToARGBRow_Any_MSA, I400ToARGBRow_MSA, 0, 1, 4, 15) ANY11(I400ToARGBRow_Any_MSA, I400ToARGBRow_MSA, 0, 1, 4, 15)
#endif #endif
#if defined(HAS_ARGBTORGB24ROW_MMI)
ANY11(ARGBToRGB24Row_Any_MMI, ARGBToRGB24Row_MMI, 0, 4, 3, 3)
ANY11(ARGBToRAWRow_Any_MMI, ARGBToRAWRow_MMI, 0, 4, 3, 3)
ANY11(ARGBToRGB565Row_Any_MMI, ARGBToRGB565Row_MMI, 0, 4, 2, 3)
ANY11(ARGBToARGB1555Row_Any_MMI, ARGBToARGB1555Row_MMI, 0, 4, 2, 3)
ANY11(ARGBToARGB4444Row_Any_MMI, ARGBToARGB4444Row_MMI, 0, 4, 2, 3)
ANY11(J400ToARGBRow_Any_MMI, J400ToARGBRow_MMI, 0, 1, 4, 3)
ANY11(I400ToARGBRow_Any_MMI, I400ToARGBRow_MMI, 0, 1, 4, 7)
#endif
#if defined(HAS_RAWTORGB24ROW_NEON) #if defined(HAS_RAWTORGB24ROW_NEON)
ANY11(RAWToRGB24Row_Any_NEON, RAWToRGB24Row_NEON, 0, 3, 3, 7) ANY11(RAWToRGB24Row_Any_NEON, RAWToRGB24Row_NEON, 0, 3, 3, 7)
#endif #endif
#if defined(HAS_RAWTORGB24ROW_MSA) #if defined(HAS_RAWTORGB24ROW_MSA)
ANY11(RAWToRGB24Row_Any_MSA, RAWToRGB24Row_MSA, 0, 3, 3, 15) ANY11(RAWToRGB24Row_Any_MSA, RAWToRGB24Row_MSA, 0, 3, 3, 15)
#endif #endif
#if defined(HAS_RAWTORGB24ROW_MMI)
ANY11(RAWToRGB24Row_Any_MMI, RAWToRGB24Row_MMI, 0, 3, 3, 3)
#endif
#ifdef HAS_ARGBTOYROW_AVX2 #ifdef HAS_ARGBTOYROW_AVX2
ANY11(ARGBToYRow_Any_AVX2, ARGBToYRow_AVX2, 0, 4, 1, 31) ANY11(ARGBToYRow_Any_AVX2, ARGBToYRow_AVX2, 0, 4, 1, 31)
#endif #endif
@ -558,57 +603,87 @@ ANY11(ARGBToYRow_Any_NEON, ARGBToYRow_NEON, 0, 4, 1, 7)
#ifdef HAS_ARGBTOYROW_MSA #ifdef HAS_ARGBTOYROW_MSA
ANY11(ARGBToYRow_Any_MSA, ARGBToYRow_MSA, 0, 4, 1, 15) ANY11(ARGBToYRow_Any_MSA, ARGBToYRow_MSA, 0, 4, 1, 15)
#endif #endif
#ifdef HAS_ARGBTOYROW_MMI
ANY11(ARGBToYRow_Any_MMI, ARGBToYRow_MMI, 0, 4, 1, 7)
#endif
#ifdef HAS_ARGBTOYJROW_NEON #ifdef HAS_ARGBTOYJROW_NEON
ANY11(ARGBToYJRow_Any_NEON, ARGBToYJRow_NEON, 0, 4, 1, 7) ANY11(ARGBToYJRow_Any_NEON, ARGBToYJRow_NEON, 0, 4, 1, 7)
#endif #endif
#ifdef HAS_ARGBTOYJROW_MSA #ifdef HAS_ARGBTOYJROW_MSA
ANY11(ARGBToYJRow_Any_MSA, ARGBToYJRow_MSA, 0, 4, 1, 15) ANY11(ARGBToYJRow_Any_MSA, ARGBToYJRow_MSA, 0, 4, 1, 15)
#endif #endif
#ifdef HAS_ARGBTOYJROW_MMI
ANY11(ARGBToYJRow_Any_MMI, ARGBToYJRow_MMI, 0, 4, 1, 7)
#endif
#ifdef HAS_BGRATOYROW_NEON #ifdef HAS_BGRATOYROW_NEON
ANY11(BGRAToYRow_Any_NEON, BGRAToYRow_NEON, 0, 4, 1, 7) ANY11(BGRAToYRow_Any_NEON, BGRAToYRow_NEON, 0, 4, 1, 7)
#endif #endif
#ifdef HAS_BGRATOYROW_MSA #ifdef HAS_BGRATOYROW_MSA
ANY11(BGRAToYRow_Any_MSA, BGRAToYRow_MSA, 0, 4, 1, 15) ANY11(BGRAToYRow_Any_MSA, BGRAToYRow_MSA, 0, 4, 1, 15)
#endif #endif
#ifdef HAS_BGRATOYROW_MMI
ANY11(BGRAToYRow_Any_MMI, BGRAToYRow_MMI, 0, 4, 1, 7)
#endif
#ifdef HAS_ABGRTOYROW_NEON #ifdef HAS_ABGRTOYROW_NEON
ANY11(ABGRToYRow_Any_NEON, ABGRToYRow_NEON, 0, 4, 1, 7) ANY11(ABGRToYRow_Any_NEON, ABGRToYRow_NEON, 0, 4, 1, 7)
#endif #endif
#ifdef HAS_ABGRTOYROW_MSA #ifdef HAS_ABGRTOYROW_MSA
ANY11(ABGRToYRow_Any_MSA, ABGRToYRow_MSA, 0, 4, 1, 7) ANY11(ABGRToYRow_Any_MSA, ABGRToYRow_MSA, 0, 4, 1, 7)
#endif #endif
#ifdef HAS_ABGRTOYROW_MMI
ANY11(ABGRToYRow_Any_MMI, ABGRToYRow_MMI, 0, 4, 1, 7)
#endif
#ifdef HAS_RGBATOYROW_NEON #ifdef HAS_RGBATOYROW_NEON
ANY11(RGBAToYRow_Any_NEON, RGBAToYRow_NEON, 0, 4, 1, 7) ANY11(RGBAToYRow_Any_NEON, RGBAToYRow_NEON, 0, 4, 1, 7)
#endif #endif
#ifdef HAS_RGBATOYROW_MSA #ifdef HAS_RGBATOYROW_MSA
ANY11(RGBAToYRow_Any_MSA, RGBAToYRow_MSA, 0, 4, 1, 15) ANY11(RGBAToYRow_Any_MSA, RGBAToYRow_MSA, 0, 4, 1, 15)
#endif #endif
#ifdef HAS_RGBATOYROW_MMI
ANY11(RGBAToYRow_Any_MMI, RGBAToYRow_MMI, 0, 4, 1, 7)
#endif
#ifdef HAS_RGB24TOYROW_NEON #ifdef HAS_RGB24TOYROW_NEON
ANY11(RGB24ToYRow_Any_NEON, RGB24ToYRow_NEON, 0, 3, 1, 7) ANY11(RGB24ToYRow_Any_NEON, RGB24ToYRow_NEON, 0, 3, 1, 7)
#endif #endif
#ifdef HAS_RGB24TOYROW_MSA #ifdef HAS_RGB24TOYROW_MSA
ANY11(RGB24ToYRow_Any_MSA, RGB24ToYRow_MSA, 0, 3, 1, 15) ANY11(RGB24ToYRow_Any_MSA, RGB24ToYRow_MSA, 0, 3, 1, 15)
#endif #endif
#ifdef HAS_RGB24TOYROW_MMI
ANY11(RGB24ToYRow_Any_MMI, RGB24ToYRow_MMI, 0, 3, 1, 7)
#endif
#ifdef HAS_RAWTOYROW_NEON #ifdef HAS_RAWTOYROW_NEON
ANY11(RAWToYRow_Any_NEON, RAWToYRow_NEON, 0, 3, 1, 7) ANY11(RAWToYRow_Any_NEON, RAWToYRow_NEON, 0, 3, 1, 7)
#endif #endif
#ifdef HAS_RAWTOYROW_MSA #ifdef HAS_RAWTOYROW_MSA
ANY11(RAWToYRow_Any_MSA, RAWToYRow_MSA, 0, 3, 1, 15) ANY11(RAWToYRow_Any_MSA, RAWToYRow_MSA, 0, 3, 1, 15)
#endif #endif
#ifdef HAS_RAWTOYROW_MMI
ANY11(RAWToYRow_Any_MMI, RAWToYRow_MMI, 0, 3, 1, 7)
#endif
#ifdef HAS_RGB565TOYROW_NEON #ifdef HAS_RGB565TOYROW_NEON
ANY11(RGB565ToYRow_Any_NEON, RGB565ToYRow_NEON, 0, 2, 1, 7) ANY11(RGB565ToYRow_Any_NEON, RGB565ToYRow_NEON, 0, 2, 1, 7)
#endif #endif
#ifdef HAS_RGB565TOYROW_MSA #ifdef HAS_RGB565TOYROW_MSA
ANY11(RGB565ToYRow_Any_MSA, RGB565ToYRow_MSA, 0, 2, 1, 15) ANY11(RGB565ToYRow_Any_MSA, RGB565ToYRow_MSA, 0, 2, 1, 15)
#endif #endif
#ifdef HAS_RGB565TOYROW_MMI
ANY11(RGB565ToYRow_Any_MMI, RGB565ToYRow_MMI, 0, 2, 1, 7)
#endif
#ifdef HAS_ARGB1555TOYROW_NEON #ifdef HAS_ARGB1555TOYROW_NEON
ANY11(ARGB1555ToYRow_Any_NEON, ARGB1555ToYRow_NEON, 0, 2, 1, 7) ANY11(ARGB1555ToYRow_Any_NEON, ARGB1555ToYRow_NEON, 0, 2, 1, 7)
#endif #endif
#ifdef HAS_ARGB1555TOYROW_MSA #ifdef HAS_ARGB1555TOYROW_MSA
ANY11(ARGB1555ToYRow_Any_MSA, ARGB1555ToYRow_MSA, 0, 2, 1, 15) ANY11(ARGB1555ToYRow_Any_MSA, ARGB1555ToYRow_MSA, 0, 2, 1, 15)
#endif #endif
#ifdef HAS_ARGB1555TOYROW_MMI
ANY11(ARGB1555ToYRow_Any_MMI, ARGB1555ToYRow_MMI, 0, 2, 1, 7)
#endif
#ifdef HAS_ARGB4444TOYROW_NEON #ifdef HAS_ARGB4444TOYROW_NEON
ANY11(ARGB4444ToYRow_Any_NEON, ARGB4444ToYRow_NEON, 0, 2, 1, 7) ANY11(ARGB4444ToYRow_Any_NEON, ARGB4444ToYRow_NEON, 0, 2, 1, 7)
#endif #endif
#ifdef HAS_ARGB4444TOYROW_MMI
ANY11(ARGB4444ToYRow_Any_MMI, ARGB4444ToYRow_MMI, 0, 2, 1, 7)
#endif
#ifdef HAS_YUY2TOYROW_NEON #ifdef HAS_YUY2TOYROW_NEON
ANY11(YUY2ToYRow_Any_NEON, YUY2ToYRow_NEON, 1, 4, 1, 15) ANY11(YUY2ToYRow_Any_NEON, YUY2ToYRow_NEON, 1, 4, 1, 15)
#endif #endif
@ -618,39 +693,60 @@ ANY11(UYVYToYRow_Any_NEON, UYVYToYRow_NEON, 1, 4, 1, 15)
#ifdef HAS_YUY2TOYROW_MSA #ifdef HAS_YUY2TOYROW_MSA
ANY11(YUY2ToYRow_Any_MSA, YUY2ToYRow_MSA, 1, 4, 1, 31) ANY11(YUY2ToYRow_Any_MSA, YUY2ToYRow_MSA, 1, 4, 1, 31)
#endif #endif
#ifdef HAS_YUY2TOYROW_MMI
ANY11(YUY2ToYRow_Any_MMI, YUY2ToYRow_MMI, 1, 4, 1, 7)
#endif
#ifdef HAS_UYVYTOYROW_MSA #ifdef HAS_UYVYTOYROW_MSA
ANY11(UYVYToYRow_Any_MSA, UYVYToYRow_MSA, 1, 4, 1, 31) ANY11(UYVYToYRow_Any_MSA, UYVYToYRow_MSA, 1, 4, 1, 31)
#endif #endif
#ifdef HAS_UYVYTOYROW_MMI
ANY11(UYVYToYRow_Any_MMI, UYVYToYRow_MMI, 1, 4, 1, 15)
#endif
#ifdef HAS_RGB24TOARGBROW_NEON #ifdef HAS_RGB24TOARGBROW_NEON
ANY11(RGB24ToARGBRow_Any_NEON, RGB24ToARGBRow_NEON, 0, 3, 4, 7) ANY11(RGB24ToARGBRow_Any_NEON, RGB24ToARGBRow_NEON, 0, 3, 4, 7)
#endif #endif
#ifdef HAS_RGB24TOARGBROW_MSA #ifdef HAS_RGB24TOARGBROW_MSA
ANY11(RGB24ToARGBRow_Any_MSA, RGB24ToARGBRow_MSA, 0, 3, 4, 15) ANY11(RGB24ToARGBRow_Any_MSA, RGB24ToARGBRow_MSA, 0, 3, 4, 15)
#endif #endif
#ifdef HAS_RGB24TOARGBROW_MMI
ANY11(RGB24ToARGBRow_Any_MMI, RGB24ToARGBRow_MMI, 0, 3, 4, 3)
#endif
#ifdef HAS_RAWTOARGBROW_NEON #ifdef HAS_RAWTOARGBROW_NEON
ANY11(RAWToARGBRow_Any_NEON, RAWToARGBRow_NEON, 0, 3, 4, 7) ANY11(RAWToARGBRow_Any_NEON, RAWToARGBRow_NEON, 0, 3, 4, 7)
#endif #endif
#ifdef HAS_RAWTOARGBROW_MSA #ifdef HAS_RAWTOARGBROW_MSA
ANY11(RAWToARGBRow_Any_MSA, RAWToARGBRow_MSA, 0, 3, 4, 15) ANY11(RAWToARGBRow_Any_MSA, RAWToARGBRow_MSA, 0, 3, 4, 15)
#endif #endif
#ifdef HAS_RAWTOARGBROW_MMI
ANY11(RAWToARGBRow_Any_MMI, RAWToARGBRow_MMI, 0, 3, 4, 3)
#endif
#ifdef HAS_RGB565TOARGBROW_NEON #ifdef HAS_RGB565TOARGBROW_NEON
ANY11(RGB565ToARGBRow_Any_NEON, RGB565ToARGBRow_NEON, 0, 2, 4, 7) ANY11(RGB565ToARGBRow_Any_NEON, RGB565ToARGBRow_NEON, 0, 2, 4, 7)
#endif #endif
#ifdef HAS_RGB565TOARGBROW_MSA #ifdef HAS_RGB565TOARGBROW_MSA
ANY11(RGB565ToARGBRow_Any_MSA, RGB565ToARGBRow_MSA, 0, 2, 4, 15) ANY11(RGB565ToARGBRow_Any_MSA, RGB565ToARGBRow_MSA, 0, 2, 4, 15)
#endif #endif
#ifdef HAS_RGB565TOARGBROW_MMI
ANY11(RGB565ToARGBRow_Any_MMI, RGB565ToARGBRow_MMI, 0, 2, 4, 3)
#endif
#ifdef HAS_ARGB1555TOARGBROW_NEON #ifdef HAS_ARGB1555TOARGBROW_NEON
ANY11(ARGB1555ToARGBRow_Any_NEON, ARGB1555ToARGBRow_NEON, 0, 2, 4, 7) ANY11(ARGB1555ToARGBRow_Any_NEON, ARGB1555ToARGBRow_NEON, 0, 2, 4, 7)
#endif #endif
#ifdef HAS_ARGB1555TOARGBROW_MSA #ifdef HAS_ARGB1555TOARGBROW_MSA
ANY11(ARGB1555ToARGBRow_Any_MSA, ARGB1555ToARGBRow_MSA, 0, 2, 4, 15) ANY11(ARGB1555ToARGBRow_Any_MSA, ARGB1555ToARGBRow_MSA, 0, 2, 4, 15)
#endif #endif
#ifdef HAS_ARGB1555TOARGBROW_MMI
ANY11(ARGB1555ToARGBRow_Any_MMI, ARGB1555ToARGBRow_MMI, 0, 2, 4, 3)
#endif
#ifdef HAS_ARGB4444TOARGBROW_NEON #ifdef HAS_ARGB4444TOARGBROW_NEON
ANY11(ARGB4444ToARGBRow_Any_NEON, ARGB4444ToARGBRow_NEON, 0, 2, 4, 7) ANY11(ARGB4444ToARGBRow_Any_NEON, ARGB4444ToARGBRow_NEON, 0, 2, 4, 7)
#endif #endif
#ifdef HAS_ARGB4444TOARGBROW_MSA #ifdef HAS_ARGB4444TOARGBROW_MSA
ANY11(ARGB4444ToARGBRow_Any_MSA, ARGB4444ToARGBRow_MSA, 0, 2, 4, 15) ANY11(ARGB4444ToARGBRow_Any_MSA, ARGB4444ToARGBRow_MSA, 0, 2, 4, 15)
#endif #endif
#ifdef HAS_ARGB4444TOARGBROW_MMI
ANY11(ARGB4444ToARGBRow_Any_MMI, ARGB4444ToARGBRow_MMI, 0, 2, 4, 3)
#endif
#ifdef HAS_ARGBATTENUATEROW_SSSE3 #ifdef HAS_ARGBATTENUATEROW_SSSE3
ANY11(ARGBAttenuateRow_Any_SSSE3, ARGBAttenuateRow_SSSE3, 0, 4, 4, 3) ANY11(ARGBAttenuateRow_Any_SSSE3, ARGBAttenuateRow_SSSE3, 0, 4, 4, 3)
#endif #endif
@ -669,6 +765,9 @@ ANY11(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, 0, 4, 4, 7)
#ifdef HAS_ARGBATTENUATEROW_MSA #ifdef HAS_ARGBATTENUATEROW_MSA
ANY11(ARGBAttenuateRow_Any_MSA, ARGBAttenuateRow_MSA, 0, 4, 4, 7) ANY11(ARGBAttenuateRow_Any_MSA, ARGBAttenuateRow_MSA, 0, 4, 4, 7)
#endif #endif
#ifdef HAS_ARGBATTENUATEROW_MMI
ANY11(ARGBAttenuateRow_Any_MMI, ARGBAttenuateRow_MMI, 0, 4, 4, 1)
#endif
#ifdef HAS_ARGBEXTRACTALPHAROW_SSE2 #ifdef HAS_ARGBEXTRACTALPHAROW_SSE2
ANY11(ARGBExtractAlphaRow_Any_SSE2, ARGBExtractAlphaRow_SSE2, 0, 4, 1, 7) ANY11(ARGBExtractAlphaRow_Any_SSE2, ARGBExtractAlphaRow_SSE2, 0, 4, 1, 7)
#endif #endif
@ -681,6 +780,9 @@ ANY11(ARGBExtractAlphaRow_Any_NEON, ARGBExtractAlphaRow_NEON, 0, 4, 1, 15)
#ifdef HAS_ARGBEXTRACTALPHAROW_MSA #ifdef HAS_ARGBEXTRACTALPHAROW_MSA
ANY11(ARGBExtractAlphaRow_Any_MSA, ARGBExtractAlphaRow_MSA, 0, 4, 1, 15) ANY11(ARGBExtractAlphaRow_Any_MSA, ARGBExtractAlphaRow_MSA, 0, 4, 1, 15)
#endif #endif
#ifdef HAS_ARGBEXTRACTALPHAROW_MMI
ANY11(ARGBExtractAlphaRow_Any_MMI, ARGBExtractAlphaRow_MMI, 0, 4, 1, 7)
#endif
#undef ANY11 #undef ANY11
// Any 1 to 1 blended. Destination is read, modify, write. // Any 1 to 1 blended. Destination is read, modify, write.
@ -705,12 +807,18 @@ ANY11B(ARGBCopyAlphaRow_Any_AVX2, ARGBCopyAlphaRow_AVX2, 0, 4, 4, 15)
#ifdef HAS_ARGBCOPYALPHAROW_SSE2 #ifdef HAS_ARGBCOPYALPHAROW_SSE2
ANY11B(ARGBCopyAlphaRow_Any_SSE2, ARGBCopyAlphaRow_SSE2, 0, 4, 4, 7) ANY11B(ARGBCopyAlphaRow_Any_SSE2, ARGBCopyAlphaRow_SSE2, 0, 4, 4, 7)
#endif #endif
#ifdef HAS_ARGBCOPYALPHAROW_MMI
ANY11B(ARGBCopyAlphaRow_Any_MMI, ARGBCopyAlphaRow_MMI, 0, 4, 4, 1)
#endif
#ifdef HAS_ARGBCOPYYTOALPHAROW_AVX2 #ifdef HAS_ARGBCOPYYTOALPHAROW_AVX2
ANY11B(ARGBCopyYToAlphaRow_Any_AVX2, ARGBCopyYToAlphaRow_AVX2, 0, 1, 4, 15) ANY11B(ARGBCopyYToAlphaRow_Any_AVX2, ARGBCopyYToAlphaRow_AVX2, 0, 1, 4, 15)
#endif #endif
#ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2 #ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2
ANY11B(ARGBCopyYToAlphaRow_Any_SSE2, ARGBCopyYToAlphaRow_SSE2, 0, 1, 4, 7) ANY11B(ARGBCopyYToAlphaRow_Any_SSE2, ARGBCopyYToAlphaRow_SSE2, 0, 1, 4, 7)
#endif #endif
#ifdef HAS_ARGBCOPYYTOALPHAROW_MMI
ANY11B(ARGBCopyYToAlphaRow_Any_MMI, ARGBCopyYToAlphaRow_MMI, 0, 1, 4, 7)
#endif
#undef ANY11B #undef ANY11B
// Any 1 to 1 with parameter. // Any 1 to 1 with parameter.
@ -760,6 +868,14 @@ ANY11P(ARGBToRGB565DitherRow_Any_MSA,
2, 2,
7) 7)
#endif #endif
#if defined(HAS_ARGBTORGB565DITHERROW_MMI)
ANY11P(ARGBToRGB565DitherRow_Any_MMI,
ARGBToRGB565DitherRow_MMI,
const uint32_t,
4,
2,
3)
#endif
#ifdef HAS_ARGBSHUFFLEROW_SSSE3 #ifdef HAS_ARGBSHUFFLEROW_SSSE3
ANY11P(ARGBShuffleRow_Any_SSSE3, ARGBShuffleRow_SSSE3, const uint8_t*, 4, 4, 7) ANY11P(ARGBShuffleRow_Any_SSSE3, ARGBShuffleRow_SSSE3, const uint8_t*, 4, 4, 7)
#endif #endif
@ -772,6 +888,10 @@ ANY11P(ARGBShuffleRow_Any_NEON, ARGBShuffleRow_NEON, const uint8_t*, 4, 4, 3)
#ifdef HAS_ARGBSHUFFLEROW_MSA #ifdef HAS_ARGBSHUFFLEROW_MSA
ANY11P(ARGBShuffleRow_Any_MSA, ARGBShuffleRow_MSA, const uint8_t*, 4, 4, 7) ANY11P(ARGBShuffleRow_Any_MSA, ARGBShuffleRow_MSA, const uint8_t*, 4, 4, 7)
#endif #endif
#ifdef HAS_ARGBSHUFFLEROW_MMI
ANY11P(ARGBShuffleRow_Any_MMI, ARGBShuffleRow_MMI, const uint8_t*, 4, 4, 1)
#endif
#undef ANY11P
#undef ANY11P #undef ANY11P
// Any 1 to 1 with parameter and shorts. BPP measures in shorts. // Any 1 to 1 with parameter and shorts. BPP measures in shorts.
@ -940,6 +1060,9 @@ ANY11T(InterpolateRow_Any_NEON, InterpolateRow_NEON, 1, 1, 15)
#ifdef HAS_INTERPOLATEROW_MSA #ifdef HAS_INTERPOLATEROW_MSA
ANY11T(InterpolateRow_Any_MSA, InterpolateRow_MSA, 1, 1, 31) ANY11T(InterpolateRow_Any_MSA, InterpolateRow_MSA, 1, 1, 31)
#endif #endif
#ifdef HAS_INTERPOLATEROW_MMI
ANY11T(InterpolateRow_Any_MMI, InterpolateRow_MMI, 1, 1, 7)
#endif
#undef ANY11T #undef ANY11T
// Any 1 to 1 mirror. // Any 1 to 1 mirror.
@ -969,6 +1092,9 @@ ANY11M(MirrorRow_Any_NEON, MirrorRow_NEON, 1, 15)
#ifdef HAS_MIRRORROW_MSA #ifdef HAS_MIRRORROW_MSA
ANY11M(MirrorRow_Any_MSA, MirrorRow_MSA, 1, 63) ANY11M(MirrorRow_Any_MSA, MirrorRow_MSA, 1, 63)
#endif #endif
#ifdef HAS_MIRRORROW_MMI
ANY11M(MirrorRow_Any_MMI, MirrorRow_MMI, 1, 7)
#endif
#ifdef HAS_ARGBMIRRORROW_AVX2 #ifdef HAS_ARGBMIRRORROW_AVX2
ANY11M(ARGBMirrorRow_Any_AVX2, ARGBMirrorRow_AVX2, 4, 7) ANY11M(ARGBMirrorRow_Any_AVX2, ARGBMirrorRow_AVX2, 4, 7)
#endif #endif
@ -981,6 +1107,9 @@ ANY11M(ARGBMirrorRow_Any_NEON, ARGBMirrorRow_NEON, 4, 3)
#ifdef HAS_ARGBMIRRORROW_MSA #ifdef HAS_ARGBMIRRORROW_MSA
ANY11M(ARGBMirrorRow_Any_MSA, ARGBMirrorRow_MSA, 4, 15) ANY11M(ARGBMirrorRow_Any_MSA, ARGBMirrorRow_MSA, 4, 15)
#endif #endif
#ifdef HAS_ARGBMIRRORROW_MMI
ANY11M(ARGBMirrorRow_Any_MMI, ARGBMirrorRow_MMI, 4, 1)
#endif
#undef ANY11M #undef ANY11M
// Any 1 plane. (memset) // Any 1 plane. (memset)
@ -1039,6 +1168,9 @@ ANY12(SplitUVRow_Any_NEON, SplitUVRow_NEON, 0, 2, 0, 15)
#ifdef HAS_SPLITUVROW_MSA #ifdef HAS_SPLITUVROW_MSA
ANY12(SplitUVRow_Any_MSA, SplitUVRow_MSA, 0, 2, 0, 31) ANY12(SplitUVRow_Any_MSA, SplitUVRow_MSA, 0, 2, 0, 31)
#endif #endif
#ifdef HAS_SPLITUVROW_MMI
ANY12(SplitUVRow_Any_MMI, SplitUVRow_MMI, 0, 2, 0, 7)
#endif
#ifdef HAS_ARGBTOUV444ROW_SSSE3 #ifdef HAS_ARGBTOUV444ROW_SSSE3
ANY12(ARGBToUV444Row_Any_SSSE3, ARGBToUV444Row_SSSE3, 0, 4, 0, 15) ANY12(ARGBToUV444Row_Any_SSSE3, ARGBToUV444Row_SSSE3, 0, 4, 0, 15)
#endif #endif
@ -1060,6 +1192,11 @@ ANY12(ARGBToUV444Row_Any_MSA, ARGBToUV444Row_MSA, 0, 4, 0, 15)
ANY12(YUY2ToUV422Row_Any_MSA, YUY2ToUV422Row_MSA, 1, 4, 1, 31) ANY12(YUY2ToUV422Row_Any_MSA, YUY2ToUV422Row_MSA, 1, 4, 1, 31)
ANY12(UYVYToUV422Row_Any_MSA, UYVYToUV422Row_MSA, 1, 4, 1, 31) ANY12(UYVYToUV422Row_Any_MSA, UYVYToUV422Row_MSA, 1, 4, 1, 31)
#endif #endif
#ifdef HAS_YUY2TOUV422ROW_MMI
ANY12(ARGBToUV444Row_Any_MMI, ARGBToUV444Row_MMI, 0, 4, 0, 7)
ANY12(UYVYToUV422Row_Any_MMI, UYVYToUV422Row_MMI, 1, 4, 1, 15)
ANY12(YUY2ToUV422Row_Any_MMI, YUY2ToUV422Row_MMI, 1, 4, 1, 15)
#endif
#undef ANY12 #undef ANY12
// Any 1 to 3. Outputs RGB planes. // Any 1 to 3. Outputs RGB planes.
@ -1086,6 +1223,9 @@ ANY13(SplitRGBRow_Any_SSSE3, SplitRGBRow_SSSE3, 3, 15)
#ifdef HAS_SPLITRGBROW_NEON #ifdef HAS_SPLITRGBROW_NEON
ANY13(SplitRGBRow_Any_NEON, SplitRGBRow_NEON, 3, 15) ANY13(SplitRGBRow_Any_NEON, SplitRGBRow_NEON, 3, 15)
#endif #endif
#ifdef HAS_SPLITRGBROW_MMI
ANY13(SplitRGBRow_Any_MMI, SplitRGBRow_MMI, 3, 3)
#endif
// Any 1 to 2 with source stride (2 rows of source). Outputs UV planes. // Any 1 to 2 with source stride (2 rows of source). Outputs UV planes.
// 128 byte row allows for 32 avx ARGB pixels. // 128 byte row allows for 32 avx ARGB pixels.
@ -1140,57 +1280,87 @@ ANY12S(ARGBToUVRow_Any_NEON, ARGBToUVRow_NEON, 0, 4, 15)
#ifdef HAS_ARGBTOUVROW_MSA #ifdef HAS_ARGBTOUVROW_MSA
ANY12S(ARGBToUVRow_Any_MSA, ARGBToUVRow_MSA, 0, 4, 31) ANY12S(ARGBToUVRow_Any_MSA, ARGBToUVRow_MSA, 0, 4, 31)
#endif #endif
#ifdef HAS_ARGBTOUVROW_MMI
ANY12S(ARGBToUVRow_Any_MMI, ARGBToUVRow_MMI, 0, 4, 15)
#endif
#ifdef HAS_ARGBTOUVJROW_NEON #ifdef HAS_ARGBTOUVJROW_NEON
ANY12S(ARGBToUVJRow_Any_NEON, ARGBToUVJRow_NEON, 0, 4, 15) ANY12S(ARGBToUVJRow_Any_NEON, ARGBToUVJRow_NEON, 0, 4, 15)
#endif #endif
#ifdef HAS_ARGBTOUVJROW_MSA #ifdef HAS_ARGBTOUVJROW_MSA
ANY12S(ARGBToUVJRow_Any_MSA, ARGBToUVJRow_MSA, 0, 4, 31) ANY12S(ARGBToUVJRow_Any_MSA, ARGBToUVJRow_MSA, 0, 4, 31)
#endif #endif
#ifdef HAS_ARGBTOUVJROW_MMI
ANY12S(ARGBToUVJRow_Any_MMI, ARGBToUVJRow_MMI, 0, 4, 15)
#endif
#ifdef HAS_BGRATOUVROW_NEON #ifdef HAS_BGRATOUVROW_NEON
ANY12S(BGRAToUVRow_Any_NEON, BGRAToUVRow_NEON, 0, 4, 15) ANY12S(BGRAToUVRow_Any_NEON, BGRAToUVRow_NEON, 0, 4, 15)
#endif #endif
#ifdef HAS_BGRATOUVROW_MSA #ifdef HAS_BGRATOUVROW_MSA
ANY12S(BGRAToUVRow_Any_MSA, BGRAToUVRow_MSA, 0, 4, 31) ANY12S(BGRAToUVRow_Any_MSA, BGRAToUVRow_MSA, 0, 4, 31)
#endif #endif
#ifdef HAS_BGRATOUVROW_MMI
ANY12S(BGRAToUVRow_Any_MMI, BGRAToUVRow_MMI, 0, 4, 15)
#endif
#ifdef HAS_ABGRTOUVROW_NEON #ifdef HAS_ABGRTOUVROW_NEON
ANY12S(ABGRToUVRow_Any_NEON, ABGRToUVRow_NEON, 0, 4, 15) ANY12S(ABGRToUVRow_Any_NEON, ABGRToUVRow_NEON, 0, 4, 15)
#endif #endif
#ifdef HAS_ABGRTOUVROW_MSA #ifdef HAS_ABGRTOUVROW_MSA
ANY12S(ABGRToUVRow_Any_MSA, ABGRToUVRow_MSA, 0, 4, 31) ANY12S(ABGRToUVRow_Any_MSA, ABGRToUVRow_MSA, 0, 4, 31)
#endif #endif
#ifdef HAS_ABGRTOUVROW_MMI
ANY12S(ABGRToUVRow_Any_MMI, ABGRToUVRow_MMI, 0, 4, 15)
#endif
#ifdef HAS_RGBATOUVROW_NEON #ifdef HAS_RGBATOUVROW_NEON
ANY12S(RGBAToUVRow_Any_NEON, RGBAToUVRow_NEON, 0, 4, 15) ANY12S(RGBAToUVRow_Any_NEON, RGBAToUVRow_NEON, 0, 4, 15)
#endif #endif
#ifdef HAS_RGBATOUVROW_MSA #ifdef HAS_RGBATOUVROW_MSA
ANY12S(RGBAToUVRow_Any_MSA, RGBAToUVRow_MSA, 0, 4, 31) ANY12S(RGBAToUVRow_Any_MSA, RGBAToUVRow_MSA, 0, 4, 31)
#endif #endif
#ifdef HAS_RGBATOUVROW_MMI
ANY12S(RGBAToUVRow_Any_MMI, RGBAToUVRow_MMI, 0, 4, 15)
#endif
#ifdef HAS_RGB24TOUVROW_NEON #ifdef HAS_RGB24TOUVROW_NEON
ANY12S(RGB24ToUVRow_Any_NEON, RGB24ToUVRow_NEON, 0, 3, 15) ANY12S(RGB24ToUVRow_Any_NEON, RGB24ToUVRow_NEON, 0, 3, 15)
#endif #endif
#ifdef HAS_RGB24TOUVROW_MSA #ifdef HAS_RGB24TOUVROW_MSA
ANY12S(RGB24ToUVRow_Any_MSA, RGB24ToUVRow_MSA, 0, 3, 15) ANY12S(RGB24ToUVRow_Any_MSA, RGB24ToUVRow_MSA, 0, 3, 15)
#endif #endif
#ifdef HAS_RGB24TOUVROW_MMI
ANY12S(RGB24ToUVRow_Any_MMI, RGB24ToUVRow_MMI, 0, 3, 15)
#endif
#ifdef HAS_RAWTOUVROW_NEON #ifdef HAS_RAWTOUVROW_NEON
ANY12S(RAWToUVRow_Any_NEON, RAWToUVRow_NEON, 0, 3, 15) ANY12S(RAWToUVRow_Any_NEON, RAWToUVRow_NEON, 0, 3, 15)
#endif #endif
#ifdef HAS_RAWTOUVROW_MSA #ifdef HAS_RAWTOUVROW_MSA
ANY12S(RAWToUVRow_Any_MSA, RAWToUVRow_MSA, 0, 3, 15) ANY12S(RAWToUVRow_Any_MSA, RAWToUVRow_MSA, 0, 3, 15)
#endif #endif
#ifdef HAS_RAWTOUVROW_MMI
ANY12S(RAWToUVRow_Any_MMI, RAWToUVRow_MMI, 0, 3, 15)
#endif
#ifdef HAS_RGB565TOUVROW_NEON #ifdef HAS_RGB565TOUVROW_NEON
ANY12S(RGB565ToUVRow_Any_NEON, RGB565ToUVRow_NEON, 0, 2, 15) ANY12S(RGB565ToUVRow_Any_NEON, RGB565ToUVRow_NEON, 0, 2, 15)
#endif #endif
#ifdef HAS_RGB565TOUVROW_MSA #ifdef HAS_RGB565TOUVROW_MSA
ANY12S(RGB565ToUVRow_Any_MSA, RGB565ToUVRow_MSA, 0, 2, 15) ANY12S(RGB565ToUVRow_Any_MSA, RGB565ToUVRow_MSA, 0, 2, 15)
#endif #endif
#ifdef HAS_RGB565TOUVROW_MMI
ANY12S(RGB565ToUVRow_Any_MMI, RGB565ToUVRow_MMI, 0, 2, 15)
#endif
#ifdef HAS_ARGB1555TOUVROW_NEON #ifdef HAS_ARGB1555TOUVROW_NEON
ANY12S(ARGB1555ToUVRow_Any_NEON, ARGB1555ToUVRow_NEON, 0, 2, 15) ANY12S(ARGB1555ToUVRow_Any_NEON, ARGB1555ToUVRow_NEON, 0, 2, 15)
#endif #endif
#ifdef HAS_ARGB1555TOUVROW_MSA #ifdef HAS_ARGB1555TOUVROW_MSA
ANY12S(ARGB1555ToUVRow_Any_MSA, ARGB1555ToUVRow_MSA, 0, 2, 15) ANY12S(ARGB1555ToUVRow_Any_MSA, ARGB1555ToUVRow_MSA, 0, 2, 15)
#endif #endif
#ifdef HAS_ARGB1555TOUVROW_MMI
ANY12S(ARGB1555ToUVRow_Any_MMI, ARGB1555ToUVRow_MMI, 0, 2, 15)
#endif
#ifdef HAS_ARGB4444TOUVROW_NEON #ifdef HAS_ARGB4444TOUVROW_NEON
ANY12S(ARGB4444ToUVRow_Any_NEON, ARGB4444ToUVRow_NEON, 0, 2, 15) ANY12S(ARGB4444ToUVRow_Any_NEON, ARGB4444ToUVRow_NEON, 0, 2, 15)
#endif #endif
#ifdef HAS_ARGB4444TOUVROW_MMI
ANY12S(ARGB4444ToUVRow_Any_MMI, ARGB4444ToUVRow_MMI, 0, 2, 15)
#endif
#ifdef HAS_YUY2TOUVROW_NEON #ifdef HAS_YUY2TOUVROW_NEON
ANY12S(YUY2ToUVRow_Any_NEON, YUY2ToUVRow_NEON, 1, 4, 15) ANY12S(YUY2ToUVRow_Any_NEON, YUY2ToUVRow_NEON, 1, 4, 15)
#endif #endif
@ -1200,9 +1370,15 @@ ANY12S(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, 1, 4, 15)
#ifdef HAS_YUY2TOUVROW_MSA #ifdef HAS_YUY2TOUVROW_MSA
ANY12S(YUY2ToUVRow_Any_MSA, YUY2ToUVRow_MSA, 1, 4, 31) ANY12S(YUY2ToUVRow_Any_MSA, YUY2ToUVRow_MSA, 1, 4, 31)
#endif #endif
#ifdef HAS_YUY2TOUVROW_MMI
ANY12S(YUY2ToUVRow_Any_MMI, YUY2ToUVRow_MMI, 1, 4, 15)
#endif
#ifdef HAS_UYVYTOUVROW_MSA #ifdef HAS_UYVYTOUVROW_MSA
ANY12S(UYVYToUVRow_Any_MSA, UYVYToUVRow_MSA, 1, 4, 31) ANY12S(UYVYToUVRow_Any_MSA, UYVYToUVRow_MSA, 1, 4, 31)
#endif #endif
#ifdef HAS_UYVYTOUVROW_MMI
ANY12S(UYVYToUVRow_Any_MMI, UYVYToUVRow_MMI, 1, 4, 15)
#endif
#undef ANY12S #undef ANY12S
#ifdef __cplusplus #ifdef __cplusplus

5972
source/row_mmi.cc Normal file

File diff suppressed because it is too large Load Diff

80
source/scale.cc Normal file → Executable file
View File

@ -118,6 +118,21 @@ static void ScalePlaneDown2(int src_width,
} }
} }
#endif #endif
#if defined(HAS_SCALEROWDOWN2_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ScaleRowDown2 =
filtering == kFilterNone
? ScaleRowDown2_Any_MMI
: (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_MMI
: ScaleRowDown2Box_Any_MMI);
if (IS_ALIGNED(dst_width, 8)) {
ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_MMI
: (filtering == kFilterLinear
? ScaleRowDown2Linear_MMI
: ScaleRowDown2Box_MMI);
}
}
#endif
if (filtering == kFilterLinear) { if (filtering == kFilterLinear) {
src_stride = 0; src_stride = 0;
@ -169,6 +184,15 @@ static void ScalePlaneDown2_16(int src_width,
: ScaleRowDown2Box_16_SSE2); : ScaleRowDown2Box_16_SSE2);
} }
#endif #endif
#if defined(HAS_SCALEROWDOWN2_16_MMI)
if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 4)) {
ScaleRowDown2 =
filtering == kFilterNone
? ScaleRowDown2_16_MMI
: (filtering == kFilterLinear ? ScaleRowDown2Linear_16_MMI
: ScaleRowDown2Box_16_MMI);
}
#endif
if (filtering == kFilterLinear) { if (filtering == kFilterLinear) {
src_stride = 0; src_stride = 0;
@ -241,6 +265,15 @@ static void ScalePlaneDown4(int src_width,
} }
} }
#endif #endif
#if defined(HAS_SCALEROWDOWN4_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ScaleRowDown4 =
filtering ? ScaleRowDown4Box_Any_MMI : ScaleRowDown4_Any_MMI;
if (IS_ALIGNED(dst_width, 8)) {
ScaleRowDown4 = filtering ? ScaleRowDown4Box_MMI : ScaleRowDown4_MMI;
}
}
#endif
if (filtering == kFilterLinear) { if (filtering == kFilterLinear) {
src_stride = 0; src_stride = 0;
@ -284,6 +317,12 @@ static void ScalePlaneDown4_16(int src_width,
filtering ? ScaleRowDown4Box_16_SSE2 : ScaleRowDown4_16_SSE2; filtering ? ScaleRowDown4Box_16_SSE2 : ScaleRowDown4_16_SSE2;
} }
#endif #endif
#if defined(HAS_SCALEROWDOWN4_16_MMI)
if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 8)) {
ScaleRowDown4 =
filtering ? ScaleRowDown4Box_16_MMI : ScaleRowDown4_16_MMI;
}
#endif
if (filtering == kFilterLinear) { if (filtering == kFilterLinear) {
src_stride = 0; src_stride = 0;
@ -849,6 +888,14 @@ static void ScalePlaneBox(int src_width,
} }
} }
#endif #endif
#if defined(HAS_SCALEADDROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ScaleAddRow = ScaleAddRow_Any_MMI;
if (IS_ALIGNED(src_width, 8)) {
ScaleAddRow = ScaleAddRow_MMI;
}
}
#endif
for (j = 0; j < dst_height; ++j) { for (j = 0; j < dst_height; ++j) {
int boxheight; int boxheight;
@ -904,6 +951,11 @@ static void ScalePlaneBox_16(int src_width,
} }
#endif #endif
#if defined(HAS_SCALEADDROW_16_MMI)
if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(src_width, 4)) {
ScaleAddRow = ScaleAddRow_16_MMI;
}
#endif
for (j = 0; j < dst_height; ++j) { for (j = 0; j < dst_height; ++j) {
int boxheight; int boxheight;
int iy = y >> 16; int iy = y >> 16;
@ -988,6 +1040,14 @@ void ScalePlaneBilinearDown(int src_width,
} }
} }
#endif #endif
#if defined(HAS_INTERPOLATEROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
InterpolateRow = InterpolateRow_Any_MMI;
if (IS_ALIGNED(src_width, 16)) {
InterpolateRow = InterpolateRow_MMI;
}
}
#endif
#if defined(HAS_SCALEFILTERCOLS_SSSE3) #if defined(HAS_SCALEFILTERCOLS_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
@ -1206,6 +1266,11 @@ void ScalePlaneBilinearUp(int src_width,
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) { if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
ScaleFilterCols = ScaleColsUp2_SSE2; ScaleFilterCols = ScaleColsUp2_SSE2;
} }
#endif
#if defined(HAS_SCALECOLS_MMI)
if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 8)) {
ScaleFilterCols = ScaleColsUp2_MMI;
}
#endif #endif
} }
@ -1333,6 +1398,11 @@ void ScalePlaneBilinearUp_16(int src_width,
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) { if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
ScaleFilterCols = ScaleColsUp2_16_SSE2; ScaleFilterCols = ScaleColsUp2_16_SSE2;
} }
#endif
#if defined(HAS_SCALECOLS_16_MMI)
if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 8)) {
ScaleFilterCols = ScaleColsUp2_16_MMI;
}
#endif #endif
} }
@ -1418,6 +1488,11 @@ static void ScalePlaneSimple(int src_width,
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) { if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
ScaleCols = ScaleColsUp2_SSE2; ScaleCols = ScaleColsUp2_SSE2;
} }
#endif
#if defined(HAS_SCALECOLS_MMI)
if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 8)) {
ScaleCols = ScaleColsUp2_MMI;
}
#endif #endif
} }
@ -1454,6 +1529,11 @@ static void ScalePlaneSimple_16(int src_width,
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) { if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
ScaleCols = ScaleColsUp2_16_SSE2; ScaleCols = ScaleColsUp2_16_SSE2;
} }
#endif
#if defined(HAS_SCALECOLS_16_MMI)
if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 8)) {
ScaleCols = ScaleColsUp2_16_MMI;
}
#endif #endif
} }

68
source/scale_any.cc Normal file → Executable file
View File

@ -42,6 +42,9 @@ CANY(ScaleARGBCols_Any_NEON, ScaleARGBCols_NEON, ScaleARGBCols_C, 4, 7)
#ifdef HAS_SCALEARGBCOLS_MSA #ifdef HAS_SCALEARGBCOLS_MSA
CANY(ScaleARGBCols_Any_MSA, ScaleARGBCols_MSA, ScaleARGBCols_C, 4, 3) CANY(ScaleARGBCols_Any_MSA, ScaleARGBCols_MSA, ScaleARGBCols_C, 4, 3)
#endif #endif
#ifdef HAS_SCALEARGBCOLS_MMI
CANY(ScaleARGBCols_Any_MMI, ScaleARGBCols_MMI, ScaleARGBCols_C, 4, 0)
#endif
#ifdef HAS_SCALEARGBFILTERCOLS_NEON #ifdef HAS_SCALEARGBFILTERCOLS_NEON
CANY(ScaleARGBFilterCols_Any_NEON, CANY(ScaleARGBFilterCols_Any_NEON,
ScaleARGBFilterCols_NEON, ScaleARGBFilterCols_NEON,
@ -165,6 +168,27 @@ SDANY(ScaleRowDown2Box_Any_MSA,
1, 1,
31) 31)
#endif #endif
#ifdef HAS_SCALEROWDOWN2_MMI
SDANY(ScaleRowDown2_Any_MMI, ScaleRowDown2_MMI, ScaleRowDown2_C, 2, 1, 7)
SDANY(ScaleRowDown2Linear_Any_MMI,
ScaleRowDown2Linear_MMI,
ScaleRowDown2Linear_C,
2,
1,
7)
SDANY(ScaleRowDown2Box_Any_MMI,
ScaleRowDown2Box_MMI,
ScaleRowDown2Box_C,
2,
1,
7)
SDODD(ScaleRowDown2Box_Odd_MMI,
ScaleRowDown2Box_MMI,
ScaleRowDown2Box_Odd_C,
2,
1,
7)
#endif
#ifdef HAS_SCALEROWDOWN4_SSSE3 #ifdef HAS_SCALEROWDOWN4_SSSE3
SDANY(ScaleRowDown4_Any_SSSE3, ScaleRowDown4_SSSE3, ScaleRowDown4_C, 4, 1, 7) SDANY(ScaleRowDown4_Any_SSSE3, ScaleRowDown4_SSSE3, ScaleRowDown4_C, 4, 1, 7)
SDANY(ScaleRowDown4Box_Any_SSSE3, SDANY(ScaleRowDown4Box_Any_SSSE3,
@ -201,6 +225,15 @@ SDANY(ScaleRowDown4Box_Any_MSA,
1, 1,
15) 15)
#endif #endif
#ifdef HAS_SCALEROWDOWN4_MMI
SDANY(ScaleRowDown4_Any_MMI, ScaleRowDown4_MMI, ScaleRowDown4_C, 4, 1, 7)
SDANY(ScaleRowDown4Box_Any_MMI,
ScaleRowDown4Box_MMI,
ScaleRowDown4Box_C,
4,
1,
7)
#endif
#ifdef HAS_SCALEROWDOWN34_SSSE3 #ifdef HAS_SCALEROWDOWN34_SSSE3
SDANY(ScaleRowDown34_Any_SSSE3, SDANY(ScaleRowDown34_Any_SSSE3,
ScaleRowDown34_SSSE3, ScaleRowDown34_SSSE3,
@ -382,6 +415,26 @@ SDANY(ScaleARGBRowDown2Box_Any_MSA,
4, 4,
3) 3)
#endif #endif
#ifdef HAS_SCALEARGBROWDOWN2_MMI
SDANY(ScaleARGBRowDown2_Any_MMI,
ScaleARGBRowDown2_MMI,
ScaleARGBRowDown2_C,
2,
4,
1)
SDANY(ScaleARGBRowDown2Linear_Any_MMI,
ScaleARGBRowDown2Linear_MMI,
ScaleARGBRowDown2Linear_C,
2,
4,
1)
SDANY(ScaleARGBRowDown2Box_Any_MMI,
ScaleARGBRowDown2Box_MMI,
ScaleARGBRowDown2Box_C,
2,
4,
1)
#endif
#undef SDANY #undef SDANY
// Scale down by even scale factor. // Scale down by even scale factor.
@ -433,6 +486,18 @@ SDAANY(ScaleARGBRowDownEvenBox_Any_MSA,
4, 4,
3) 3)
#endif #endif
#ifdef HAS_SCALEARGBROWDOWNEVEN_MMI
SDAANY(ScaleARGBRowDownEven_Any_MMI,
ScaleARGBRowDownEven_MMI,
ScaleARGBRowDownEven_C,
4,
1)
SDAANY(ScaleARGBRowDownEvenBox_Any_MMI,
ScaleARGBRowDownEvenBox_MMI,
ScaleARGBRowDownEvenBox_C,
4,
1)
#endif
// Add rows box filter scale down. // Add rows box filter scale down.
#define SAANY(NAMEANY, SCALEADDROW_SIMD, SCALEADDROW_C, MASK) \ #define SAANY(NAMEANY, SCALEADDROW_SIMD, SCALEADDROW_C, MASK) \
@ -456,6 +521,9 @@ SAANY(ScaleAddRow_Any_NEON, ScaleAddRow_NEON, ScaleAddRow_C, 15)
#ifdef HAS_SCALEADDROW_MSA #ifdef HAS_SCALEADDROW_MSA
SAANY(ScaleAddRow_Any_MSA, ScaleAddRow_MSA, ScaleAddRow_C, 15) SAANY(ScaleAddRow_Any_MSA, ScaleAddRow_MSA, ScaleAddRow_C, 15)
#endif #endif
#ifdef HAS_SCALEADDROW_MMI
SAANY(ScaleAddRow_Any_MMI, ScaleAddRow_MMI, ScaleAddRow_C, 7)
#endif
#undef SAANY #undef SAANY
#ifdef __cplusplus #ifdef __cplusplus

73
source/scale_argb.cc Normal file → Executable file
View File

@ -111,6 +111,22 @@ static void ScaleARGBDown2(int src_width,
} }
} }
#endif #endif
#if defined(HAS_SCALEARGBROWDOWN2_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ScaleARGBRowDown2 =
filtering == kFilterNone
? ScaleARGBRowDown2_Any_MMI
: (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_MMI
: ScaleARGBRowDown2Box_Any_MMI);
if (IS_ALIGNED(dst_width, 2)) {
ScaleARGBRowDown2 =
filtering == kFilterNone
? ScaleARGBRowDown2_MMI
: (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_MMI
: ScaleARGBRowDown2Box_MMI);
}
}
#endif
if (filtering == kFilterLinear) { if (filtering == kFilterLinear) {
src_stride = 0; src_stride = 0;
@ -237,6 +253,16 @@ static void ScaleARGBDownEven(int src_width,
} }
} }
#endif #endif
#if defined(HAS_SCALEARGBROWDOWNEVEN_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_MMI
: ScaleARGBRowDownEven_Any_MMI;
if (IS_ALIGNED(dst_width, 2)) {
ScaleARGBRowDownEven =
filtering ? ScaleARGBRowDownEvenBox_MMI : ScaleARGBRowDownEven_MMI;
}
}
#endif
if (filtering == kFilterLinear) { if (filtering == kFilterLinear) {
src_stride = 0; src_stride = 0;
@ -417,6 +443,14 @@ static void ScaleARGBBilinearUp(int src_width,
InterpolateRow = InterpolateRow_MSA; InterpolateRow = InterpolateRow_MSA;
} }
} }
#endif
#if defined(HAS_INTERPOLATEROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
InterpolateRow = InterpolateRow_Any_MMI;
if (IS_ALIGNED(dst_width, 2)) {
InterpolateRow = InterpolateRow_MMI;
}
}
#endif #endif
if (src_width >= 32768) { if (src_width >= 32768) {
ScaleARGBFilterCols = ScaleARGBFilterCols =
@ -463,6 +497,14 @@ static void ScaleARGBBilinearUp(int src_width,
ScaleARGBFilterCols = ScaleARGBCols_MSA; ScaleARGBFilterCols = ScaleARGBCols_MSA;
} }
} }
#endif
#if defined(HAS_SCALEARGBCOLS_MMI)
if (!filtering && TestCpuFlag(kCpuHasMMI)) {
ScaleARGBFilterCols = ScaleARGBCols_Any_MMI;
if (IS_ALIGNED(dst_width, 1)) {
ScaleARGBFilterCols = ScaleARGBCols_MMI;
}
}
#endif #endif
if (!filtering && src_width * 2 == dst_width && x < 0x8000) { if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
ScaleARGBFilterCols = ScaleARGBColsUp2_C; ScaleARGBFilterCols = ScaleARGBColsUp2_C;
@ -470,6 +512,11 @@ static void ScaleARGBBilinearUp(int src_width,
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) { if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2; ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
} }
#endif
#if defined(HAS_SCALEARGBCOLSUP2_MMI)
if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 4)) {
ScaleARGBFilterCols = ScaleARGBColsUp2_MMI;
}
#endif #endif
} }
@ -665,6 +712,14 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
ScaleARGBFilterCols = ScaleARGBCols_MSA; ScaleARGBFilterCols = ScaleARGBCols_MSA;
} }
} }
#endif
#if defined(HAS_SCALEARGBCOLS_MMI)
if (!filtering && TestCpuFlag(kCpuHasMMI)) {
ScaleARGBFilterCols = ScaleARGBCols_Any_MMI;
if (IS_ALIGNED(dst_width, 1)) {
ScaleARGBFilterCols = ScaleARGBCols_MMI;
}
}
#endif #endif
if (!filtering && src_width * 2 == dst_width && x < 0x8000) { if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
ScaleARGBFilterCols = ScaleARGBColsUp2_C; ScaleARGBFilterCols = ScaleARGBColsUp2_C;
@ -672,6 +727,11 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) { if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2; ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
} }
#endif
#if defined(HAS_SCALEARGBCOLSUP2_MMI)
if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 4)) {
ScaleARGBFilterCols = ScaleARGBColsUp2_MMI;
}
#endif #endif
} }
@ -796,6 +856,14 @@ static void ScaleARGBSimple(int src_width,
ScaleARGBCols = ScaleARGBCols_MSA; ScaleARGBCols = ScaleARGBCols_MSA;
} }
} }
#endif
#if defined(HAS_SCALEARGBCOLS_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ScaleARGBCols = ScaleARGBCols_Any_MMI;
if (IS_ALIGNED(dst_width, 1)) {
ScaleARGBCols = ScaleARGBCols_MMI;
}
}
#endif #endif
if (src_width * 2 == dst_width && x < 0x8000) { if (src_width * 2 == dst_width && x < 0x8000) {
ScaleARGBCols = ScaleARGBColsUp2_C; ScaleARGBCols = ScaleARGBColsUp2_C;
@ -803,6 +871,11 @@ static void ScaleARGBSimple(int src_width,
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) { if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
ScaleARGBCols = ScaleARGBColsUp2_SSE2; ScaleARGBCols = ScaleARGBColsUp2_SSE2;
} }
#endif
#if defined(HAS_SCALEARGBCOLSUP2_MMI)
if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 4)) {
ScaleARGBCols = ScaleARGBColsUp2_MMI;
}
#endif #endif
} }

View File

@ -1072,6 +1072,14 @@ void ScalePlaneVertical(int src_height,
InterpolateRow = InterpolateRow_MSA; InterpolateRow = InterpolateRow_MSA;
} }
} }
#endif
#if defined(HAS_INTERPOLATEROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
InterpolateRow = InterpolateRow_Any_MMI;
if (IS_ALIGNED(dst_width_bytes, 8)) {
InterpolateRow = InterpolateRow_MMI;
}
}
#endif #endif
for (j = 0; j < dst_height; ++j) { for (j = 0; j < dst_height; ++j) {
int yi; int yi;

1128
source/scale_mmi.cc Normal file

File diff suppressed because it is too large Load Diff

View File

@ -67,6 +67,8 @@ TEST_F(LibYUVBaseTest, TestCpuHas) {
printf("Has MIPS %d\n", has_mips); printf("Has MIPS %d\n", has_mips);
int has_msa = TestCpuFlag(kCpuHasMSA); int has_msa = TestCpuFlag(kCpuHasMSA);
printf("Has MSA %d\n", has_msa); printf("Has MSA %d\n", has_msa);
int has_mmi = TestCpuFlag(kCpuHasMMI);
printf("Has MMI %d\n", has_mmi);
#endif #endif
} }

11
unit_test/scale_test.cc Normal file → Executable file
View File

@ -437,6 +437,10 @@ extern "C" void ScaleRowUp2_16_NEON(const uint16_t* src_ptr,
ptrdiff_t src_stride, ptrdiff_t src_stride,
uint16_t* dst, uint16_t* dst,
int dst_width); int dst_width);
extern "C" void ScaleRowUp2_16_MMI(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst,
int dst_width);
extern "C" void ScaleRowUp2_16_C(const uint16_t* src_ptr, extern "C" void ScaleRowUp2_16_C(const uint16_t* src_ptr,
ptrdiff_t src_stride, ptrdiff_t src_stride,
uint16_t* dst, uint16_t* dst,
@ -463,6 +467,13 @@ TEST_F(LibYUVScaleTest, TestScaleRowUp2_16) {
} else { } else {
ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280); ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
} }
#elif !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
int has_mmi = TestCpuFlag(kCpuHasMMI);
if (has_mmi) {
ScaleRowUp2_16_MMI(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
} else {
ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
}
#else #else
ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280); ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
#endif #endif

View File

@ -71,6 +71,8 @@ int main(int argc, const char* argv[]) {
if (has_mips) { if (has_mips) {
int has_msa = TestCpuFlag(kCpuHasMSA); int has_msa = TestCpuFlag(kCpuHasMSA);
printf("Has MSA %x\n", has_msa); printf("Has MSA %x\n", has_msa);
int has_mmi = TestCpuFlag(kCpuHasMMI);
printf("Has MMI %x\n", has_mmi);
} }
if (has_x86) { if (has_x86) {
int has_sse2 = TestCpuFlag(kCpuHasSSE2); int has_sse2 = TestCpuFlag(kCpuHasSSE2);