From 2b4453d46faebcad72d744d763a4e3b1e97d338d Mon Sep 17 00:00:00 2001 From: Frank Barchard Date: Thu, 16 Oct 2025 11:24:52 -0700 Subject: [PATCH] Deprecate MIPS and MSA support. - Remove *_msa.cc source files - Update build files - Update header references, planar ifdefs for row functions - Update documentation on supported platforms - Version bumped to 1921 - clang-format applied Bug: 434383432 Change-Id: I072d6aac4956f0ed668e64614ac8557612171f76 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/7045953 Reviewed-by: Justin Green --- Android.bp | 4 - Android.mk | 4 - BUILD.gn | 25 +- CMakeLists.txt | 4 - DEPS | 7 - README.chromium | 2 +- docs/deprecated_builds.md | 16 - docs/environment_variables.md | 4 - docs/getting_started.md | 14 - include/libyuv/compare_row.h | 11 - include/libyuv/cpu_id.h | 6 - include/libyuv/macros_msa.h | 244 --- include/libyuv/rotate_row.h | 29 - include/libyuv/row.h | 622 ------ include/libyuv/scale_row.h | 230 -- libyuv.gyp | 12 - libyuv.gypi | 5 - linux.mk | 4 - source/compare.cc | 10 - source/compare_msa.cc | 97 - source/convert.cc | 308 +-- source/convert_argb.cc | 352 --- source/convert_from.cc | 24 - source/convert_from_argb.cc | 311 +-- source/cpu_id.cc | 37 +- source/planar_functions.cc | 268 --- source/rotate.cc | 57 +- source/rotate_any.cc | 6 - source/rotate_argb.cc | 16 - source/rotate_msa.cc | 240 --- source/row_any.cc | 198 -- source/row_msa.cc | 3597 ------------------------------- source/scale.cc | 97 - source/scale_any.cc | 116 - source/scale_argb.cc | 106 - source/scale_common.cc | 8 - source/scale_msa.cc | 949 -------- source/scale_uv.cc | 75 - unit_test/cpu_test.cc | 34 - unit_test/testdata/mips_msa.txt | 7 - unit_test/unit_test.cc | 7 - util/cpuid.c | 9 - 42 files changed, 41 insertions(+), 8131 deletions(-) delete mode 100644 include/libyuv/macros_msa.h delete mode 100644 source/compare_msa.cc delete mode 100644 source/rotate_msa.cc delete mode 100644 source/row_msa.cc delete mode 100644 source/scale_msa.cc delete mode 100644 unit_test/testdata/mips_msa.txt diff --git a/Android.bp b/Android.bp index 2e7dcea91..7975093d3 100644 --- a/Android.bp +++ b/Android.bp @@ -34,7 +34,6 @@ cc_library { "source/compare.cc", "source/compare_common.cc", "source/compare_gcc.cc", - "source/compare_msa.cc", "source/compare_neon.cc", "source/compare_neon64.cc", "source/convert.cc", @@ -53,13 +52,11 @@ cc_library { "source/rotate_argb.cc", "source/rotate_common.cc", "source/rotate_gcc.cc", - "source/rotate_msa.cc", "source/rotate_neon.cc", "source/rotate_neon64.cc", "source/row_any.cc", "source/row_common.cc", "source/row_gcc.cc", - "source/row_msa.cc", "source/row_neon.cc", "source/row_neon64.cc", "source/row_rvv.cc", @@ -68,7 +65,6 @@ cc_library { "source/scale_argb.cc", "source/scale_common.cc", "source/scale_gcc.cc", - "source/scale_msa.cc", "source/scale_neon.cc", "source/scale_neon64.cc", "source/scale_rgb.cc", diff --git a/Android.mk b/Android.mk index ad5e3f62a..c83bdb7ff 100644 --- a/Android.mk +++ b/Android.mk @@ -9,7 +9,6 @@ LOCAL_SRC_FILES := \ source/compare.cc \ source/compare_common.cc \ source/compare_gcc.cc \ - source/compare_msa.cc \ source/compare_neon.cc \ source/compare_neon64.cc \ source/compare_win.cc \ @@ -26,14 +25,12 @@ LOCAL_SRC_FILES := \ source/rotate_argb.cc \ source/rotate_common.cc \ source/rotate_gcc.cc \ - source/rotate_msa.cc \ source/rotate_neon.cc \ source/rotate_neon64.cc \ source/rotate_win.cc \ source/row_any.cc \ source/row_common.cc \ source/row_gcc.cc \ - source/row_msa.cc \ source/row_neon.cc \ source/row_neon64.cc \ source/row_win.cc \ @@ -42,7 +39,6 @@ LOCAL_SRC_FILES := \ source/scale_argb.cc \ source/scale_common.cc \ source/scale_gcc.cc \ - source/scale_msa.cc \ source/scale_neon.cc \ source/scale_neon64.cc \ source/scale_rgb.cc \ diff --git a/BUILD.gn b/BUILD.gn index 954ac3a66..50e012f23 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -23,7 +23,7 @@ declare_args() { config("libyuv_config") { include_dirs = [ "include" ] if (is_android) { - if (target_cpu == "arm" || target_cpu == "x86" || target_cpu == "mipsel") { + if (target_cpu == "arm" || target_cpu == "x86") { ldflags = [ "-Wl,--dynamic-linker,/system/bin/linker" ] } else { ldflags = [ "-Wl,--dynamic-linker,/system/bin/linker64" ] @@ -92,10 +92,6 @@ group("libyuv") { deps += [ ":libyuv_sme" ] } - if (libyuv_use_msa) { - deps += [ ":libyuv_msa" ] - } - if (libyuv_use_lsx) { deps += [ ":libyuv_lsx" ] } @@ -127,7 +123,6 @@ static_library("libyuv_internal") { "include/libyuv/convert_from_argb.h", "include/libyuv/cpu_id.h", "include/libyuv/loongson_intrinsics.h", - "include/libyuv/macros_msa.h", "include/libyuv/mjpeg_decoder.h", "include/libyuv/planar_functions.h", "include/libyuv/rotate.h", @@ -255,6 +250,7 @@ if (libyuv_use_sve) { # The -march arguments below are incompatible with libc++ modules. use_libcxx_modules = false + # SVE2 is an Armv9-A feature. cflags = [ "-march=armv9-a+i8mm+sve2" ] } @@ -274,27 +270,12 @@ if (libyuv_use_sme) { # The -march arguments below are incompatible with libc++ modules. use_libcxx_modules = false + # SME is an Armv9-A feature. cflags = [ "-march=armv9-a+i8mm+sme" ] } } -if (libyuv_use_msa) { - static_library("libyuv_msa") { - sources = [ - # MSA Source Files - "source/compare_msa.cc", - "source/rotate_msa.cc", - "source/row_msa.cc", - "source/scale_msa.cc", - ] - - deps = [ ":libyuv_internal" ] - - public_configs = [ ":libyuv_config" ] - } -} - if (libyuv_use_lsx) { static_library("libyuv_lsx") { sources = [ diff --git a/CMakeLists.txt b/CMakeLists.txt index c6cce8a60..77e0527fc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,7 +23,6 @@ set ( ly_common_source_files ${ly_src_dir}/compare.cc ${ly_src_dir}/compare_common.cc ${ly_src_dir}/compare_gcc.cc - ${ly_src_dir}/compare_msa.cc ${ly_src_dir}/compare_win.cc ${ly_src_dir}/convert_argb.cc ${ly_src_dir}/convert.cc @@ -42,14 +41,12 @@ set ( ly_common_source_files ${ly_src_dir}/rotate_common.cc ${ly_src_dir}/rotate_gcc.cc ${ly_src_dir}/rotate_lsx.cc - ${ly_src_dir}/rotate_msa.cc ${ly_src_dir}/rotate_win.cc ${ly_src_dir}/row_any.cc ${ly_src_dir}/row_common.cc ${ly_src_dir}/row_gcc.cc ${ly_src_dir}/row_lasx.cc ${ly_src_dir}/row_lsx.cc - ${ly_src_dir}/row_msa.cc ${ly_src_dir}/row_rvv.cc ${ly_src_dir}/row_win.cc ${ly_src_dir}/scale_any.cc @@ -58,7 +55,6 @@ set ( ly_common_source_files ${ly_src_dir}/scale_common.cc ${ly_src_dir}/scale_gcc.cc ${ly_src_dir}/scale_lsx.cc - ${ly_src_dir}/scale_msa.cc ${ly_src_dir}/scale_rgb.cc ${ly_src_dir}/scale_rvv.cc ${ly_src_dir}/scale_uv.cc diff --git a/DEPS b/DEPS index 036c8625a..0cb4c3260 100644 --- a/DEPS +++ b/DEPS @@ -1532,13 +1532,6 @@ hooks = [ 'action': ['python3', 'src/build/linux/sysroot_scripts/install-sysroot.py', '--arch=x86'], }, - { - 'name': 'sysroot_mips', - 'pattern': '.', - 'condition': 'checkout_linux and checkout_mips', - 'action': ['python3', 'src/build/linux/sysroot_scripts/install-sysroot.py', - '--arch=mips'], - }, { 'name': 'sysroot_x64', 'pattern': '.', diff --git a/README.chromium b/README.chromium index 6ad6e8073..9884e2f3a 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: https://chromium.googlesource.com/libyuv/libyuv/ -Version: 1920 +Version: 1921 License: BSD-3-Clause License File: LICENSE Shipped: yes diff --git a/docs/deprecated_builds.md b/docs/deprecated_builds.md index 01863e4fd..747260ee8 100644 --- a/docs/deprecated_builds.md +++ b/docs/deprecated_builds.md @@ -141,12 +141,6 @@ ia32 GYP_DEFINES="OS=android target_arch=ia32 android_full_debug=1" GYP_CROSSCOMPILE=1 ./gyp_libyuv ninja -j7 -C out/Debug yuv_unittest_apk -mipsel - - GYP_DEFINES="OS=android target_arch=mipsel" GYP_CROSSCOMPILE=1 ./gyp_libyuv - ninja -j7 -C out/Debug yuv_unittest_apk - ninja -j7 -C out/Release yuv_unittest_apk - arm32 disassembly: llvm-objdump -d out/Release/obj/source/libyuv.row_neon.o @@ -179,16 +173,6 @@ Running test with C code: gn gen out/Official "--args=is_debug=false is_official_build=true is_chrome_branded=true" ninja -C out/Official -#### Building mips with GN - -mipsel - gn gen out/Default "--args=is_debug=false target_cpu=\"mipsel\" target_os = \"android\" mips_arch_variant = \"r6\" mips_use_msa = true is_component_build = true is_clang = false" - ninja -C out/Default - -mips64el - gn gen out/Default "--args=is_debug=false target_cpu=\"mips64el\" target_os = \"android\" mips_arch_variant = \"r6\" mips_use_msa = true is_component_build = true is_clang = false" - ninja -C out/Default - ### Linux GYP_DEFINES="target_arch=x64" ./gyp_libyuv diff --git a/docs/environment_variables.md b/docs/environment_variables.md index 173c207a2..3905d65cc 100644 --- a/docs/environment_variables.md +++ b/docs/environment_variables.md @@ -43,10 +43,6 @@ By default the cpu is detected and the most advanced form of SIMD is used. But LIBYUV_DISABLE_SVE2 LIBYUV_DISABLE_SME -## MIPS CPUs - - LIBYUV_DISABLE_MSA - ## LOONGARCH CPUs LIBYUV_DISABLE_LSX diff --git a/docs/getting_started.md b/docs/getting_started.md index 756a5ec7a..1ecfe438e 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -116,13 +116,6 @@ ia32 ninja -v -C out/Debug libyuv_unittest ninja -v -C out/Release libyuv_unittest -mips - - gn gen out/Release "--args=is_debug=false target_os=\"android\" target_cpu=\"mips64el\" mips_arch_variant=\"r6\" mips_use_msa=true is_component_build=true" - gn gen out/Debug "--args=is_debug=true target_os=\"android\" target_cpu=\"mips64el\" mips_arch_variant=\"r6\" mips_use_msa=true is_component_build=true" - ninja -v -C out/Debug libyuv_unittest - ninja -v -C out/Release libyuv_unittest - arm disassembly: llvm-objdump -d ./out/Release/obj/libyuv/row_common.o >row_common.txt @@ -162,13 +155,6 @@ Running test with C code: ninja -v -C out/Debug libyuv_unittest ninja -v -C out/Release libyuv_unittest -### MIPS Linux - - gn gen out/Release "--args=is_debug=false target_os=\"linux\" target_cpu=\"mips64el\" mips_arch_variant=\"loongson3\" is_component_build=false use_sysroot=false use_gold=false" - gn gen out/Debug "--args=is_debug=true target_os=\"linux\" target_cpu=\"mips64el\" mips_arch_variant=\"loongson3\" is_component_build=false use_sysroot=false use_gold=false" - ninja -v -C out/Debug libyuv_unittest - ninja -v -C out/Release libyuv_unittest - ## Building the Library with make ### Linux diff --git a/include/libyuv/compare_row.h b/include/libyuv/compare_row.h index ec0e331cf..a08734e90 100644 --- a/include/libyuv/compare_row.h +++ b/include/libyuv/compare_row.h @@ -65,11 +65,6 @@ extern "C" { #define HAS_SUMSQUAREERROR_NEON_DOTPROD #endif -#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) -#define HAS_HAMMINGDISTANCE_MSA -#define HAS_SUMSQUAREERROR_MSA -#endif - uint32_t HammingDistance_C(const uint8_t* src_a, const uint8_t* src_b, int count); @@ -88,9 +83,6 @@ uint32_t HammingDistance_NEON(const uint8_t* src_a, uint32_t HammingDistance_NEON_DotProd(const uint8_t* src_a, const uint8_t* src_b, int count); -uint32_t HammingDistance_MSA(const uint8_t* src_a, - const uint8_t* src_b, - int count); uint32_t SumSquareError_C(const uint8_t* src_a, const uint8_t* src_b, int count); @@ -106,9 +98,6 @@ uint32_t SumSquareError_NEON(const uint8_t* src_a, uint32_t SumSquareError_NEON_DotProd(const uint8_t* src_a, const uint8_t* src_b, int count); -uint32_t SumSquareError_MSA(const uint8_t* src_a, - const uint8_t* src_b, - int count); uint32_t HashDjb2_C(const uint8_t* src, int count, uint32_t seed); uint32_t HashDjb2_SSE41(const uint8_t* src, int count, uint32_t seed); diff --git a/include/libyuv/cpu_id.h b/include/libyuv/cpu_id.h index 8ce319e05..bdb81174c 100644 --- a/include/libyuv/cpu_id.h +++ b/include/libyuv/cpu_id.h @@ -60,10 +60,6 @@ static const int kCpuHasAVXVNNI = 0x4000000; static const int kCpuHasAVXVNNIINT8 = 0x8000000; static const int kCpuHasAMXINT8 = 0x10000000; -// These flags are only valid on MIPS processors. -static const int kCpuHasMIPS = 0x10; -static const int kCpuHasMSA = 0x100; - // These flags are only valid on LOONGARCH processors. static const int kCpuHasLOONGARCH = 0x20; static const int kCpuHasLSX = 0x100; @@ -91,8 +87,6 @@ static __inline int TestCpuFlag(int test_flag) { LIBYUV_API int ArmCpuCaps(const char* cpuinfo_name); LIBYUV_API -int MipsCpuCaps(const char* cpuinfo_name); -LIBYUV_API int RiscvCpuCaps(const char* cpuinfo_name); #ifdef __linux__ diff --git a/include/libyuv/macros_msa.h b/include/libyuv/macros_msa.h deleted file mode 100644 index 6434a4da0..000000000 --- a/include/libyuv/macros_msa.h +++ /dev/null @@ -1,244 +0,0 @@ -/* - * Copyright 2016 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef INCLUDE_LIBYUV_MACROS_MSA_H_ -#define INCLUDE_LIBYUV_MACROS_MSA_H_ - -#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) -#include -#include - -#if (__mips_isa_rev >= 6) -#define LW(psrc) \ - ({ \ - const uint8_t* psrc_lw_m = (const uint8_t*)(psrc); \ - uint32_t val_m; \ - asm("lw %[val_m], %[psrc_lw_m] \n" \ - : [val_m] "=r"(val_m) \ - : [psrc_lw_m] "m"(*psrc_lw_m)); \ - val_m; \ - }) - -#if (__mips == 64) -#define LD(psrc) \ - ({ \ - const uint8_t* psrc_ld_m = (const uint8_t*)(psrc); \ - uint64_t val_m = 0; \ - asm("ld %[val_m], %[psrc_ld_m] \n" \ - : [val_m] "=r"(val_m) \ - : [psrc_ld_m] "m"(*psrc_ld_m)); \ - val_m; \ - }) -#else // !(__mips == 64) -#define LD(psrc) \ - ({ \ - const uint8_t* psrc_ld_m = (const uint8_t*)(psrc); \ - uint32_t val0_m, val1_m; \ - uint64_t val_m = 0; \ - val0_m = LW(psrc_ld_m); \ - val1_m = LW(psrc_ld_m + 4); \ - val_m = (uint64_t)(val1_m); /* NOLINT */ \ - val_m = (uint64_t)((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \ - val_m = (uint64_t)(val_m | (uint64_t)val0_m); /* NOLINT */ \ - val_m; \ - }) -#endif // (__mips == 64) - -#define SW(val, pdst) \ - ({ \ - uint8_t* pdst_sw_m = (uint8_t*)(pdst); /* NOLINT */ \ - uint32_t val_m = (val); \ - asm("sw %[val_m], %[pdst_sw_m] \n" \ - : [pdst_sw_m] "=m"(*pdst_sw_m) \ - : [val_m] "r"(val_m)); \ - }) - -#if (__mips == 64) -#define SD(val, pdst) \ - ({ \ - uint8_t* pdst_sd_m = (uint8_t*)(pdst); /* NOLINT */ \ - uint64_t val_m = (val); \ - asm("sd %[val_m], %[pdst_sd_m] \n" \ - : [pdst_sd_m] "=m"(*pdst_sd_m) \ - : [val_m] "r"(val_m)); \ - }) -#else // !(__mips == 64) -#define SD(val, pdst) \ - ({ \ - uint8_t* pdst_sd_m = (uint8_t*)(pdst); /* NOLINT */ \ - uint32_t val0_m, val1_m; \ - val0_m = (uint32_t)((val)&0x00000000FFFFFFFF); \ - val1_m = (uint32_t)(((val) >> 32) & 0x00000000FFFFFFFF); \ - SW(val0_m, pdst_sd_m); \ - SW(val1_m, pdst_sd_m + 4); \ - }) -#endif // !(__mips == 64) -#else // !(__mips_isa_rev >= 6) -#define LW(psrc) \ - ({ \ - uint8_t* psrc_lw_m = (uint8_t*)(psrc); \ - uint32_t val_lw_m; \ - \ - asm("lwr %[val_lw_m], 0(%[psrc_lw_m]) \n\t" \ - "lwl %[val_lw_m], 3(%[psrc_lw_m]) \n\t" \ - \ - : [val_lw_m] "=&r"(val_lw_m) \ - : [psrc_lw_m] "r"(psrc_lw_m)); \ - \ - val_lw_m; \ - }) - -#if (__mips == 64) -#define LD(psrc) \ - ({ \ - uint8_t* psrc_ld_m = (uint8_t*)(psrc); \ - uint64_t val_ld_m = 0; \ - \ - asm("ldr %[val_ld_m], 0(%[psrc_ld_m]) \n\t" \ - "ldl %[val_ld_m], 7(%[psrc_ld_m]) \n\t" \ - \ - : [val_ld_m] "=&r"(val_ld_m) \ - : [psrc_ld_m] "r"(psrc_ld_m)); \ - \ - val_ld_m; \ - }) -#else // !(__mips == 64) -#define LD(psrc) \ - ({ \ - const uint8_t* psrc_ld_m = (const uint8_t*)(psrc); \ - uint32_t val0_m, val1_m; \ - uint64_t val_m = 0; \ - val0_m = LW(psrc_ld_m); \ - val1_m = LW(psrc_ld_m + 4); \ - val_m = (uint64_t)(val1_m); /* NOLINT */ \ - val_m = (uint64_t)((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \ - val_m = (uint64_t)(val_m | (uint64_t)val0_m); /* NOLINT */ \ - val_m; \ - }) -#endif // (__mips == 64) - -#define SW(val, pdst) \ - ({ \ - uint8_t* pdst_sw_m = (uint8_t*)(pdst); /* NOLINT */ \ - uint32_t val_m = (val); \ - asm("usw %[val_m], %[pdst_sw_m] \n" \ - : [pdst_sw_m] "=m"(*pdst_sw_m) \ - : [val_m] "r"(val_m)); \ - }) - -#define SD(val, pdst) \ - ({ \ - uint8_t* pdst_sd_m = (uint8_t*)(pdst); /* NOLINT */ \ - uint32_t val0_m, val1_m; \ - val0_m = (uint32_t)((val)&0x00000000FFFFFFFF); \ - val1_m = (uint32_t)(((val) >> 32) & 0x00000000FFFFFFFF); \ - SW(val0_m, pdst_sd_m); \ - SW(val1_m, pdst_sd_m + 4); \ - }) -#endif // (__mips_isa_rev >= 6) - -// TODO(fbarchard): Consider removing __VAR_ARGS versions. -#define LD_B(RTYPE, psrc) *((RTYPE*)(psrc)) /* NOLINT */ -#define LD_UB(...) LD_B(const v16u8, __VA_ARGS__) - -#define LD_H(RTYPE, psrc) *((RTYPE*)(psrc)) /* NOLINT */ -#define LD_UH(...) LD_H(const v8u16, __VA_ARGS__) - -#define ST_B(RTYPE, in, pdst) *((RTYPE*)(pdst)) = (in) /* NOLINT */ -#define ST_UB(...) ST_B(v16u8, __VA_ARGS__) - -#define ST_H(RTYPE, in, pdst) *((RTYPE*)(pdst)) = (in) /* NOLINT */ -#define ST_UH(...) ST_H(v8u16, __VA_ARGS__) - -/* Description : Load two vectors with 16 'byte' sized elements - Arguments : Inputs - psrc, stride - Outputs - out0, out1 - Return Type - as per RTYPE - Details : Load 16 byte elements in 'out0' from (psrc) - Load 16 byte elements in 'out1' from (psrc + stride) -*/ -#define LD_B2(RTYPE, psrc, stride, out0, out1) \ - { \ - out0 = LD_B(RTYPE, (psrc)); \ - out1 = LD_B(RTYPE, (psrc) + stride); \ - } -#define LD_UB2(...) LD_B2(const v16u8, __VA_ARGS__) - -#define LD_B4(RTYPE, psrc, stride, out0, out1, out2, out3) \ - { \ - LD_B2(RTYPE, (psrc), stride, out0, out1); \ - LD_B2(RTYPE, (psrc) + 2 * stride, stride, out2, out3); \ - } -#define LD_UB4(...) LD_B4(const v16u8, __VA_ARGS__) - -/* Description : Store two vectors with stride each having 16 'byte' sized - elements - Arguments : Inputs - in0, in1, pdst, stride - Details : Store 16 byte elements from 'in0' to (pdst) - Store 16 byte elements from 'in1' to (pdst + stride) -*/ -#define ST_B2(RTYPE, in0, in1, pdst, stride) \ - { \ - ST_B(RTYPE, in0, (pdst)); \ - ST_B(RTYPE, in1, (pdst) + stride); \ - } -#define ST_UB2(...) ST_B2(v16u8, __VA_ARGS__) - -#define ST_B4(RTYPE, in0, in1, in2, in3, pdst, stride) \ - { \ - ST_B2(RTYPE, in0, in1, (pdst), stride); \ - ST_B2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \ - } -#define ST_UB4(...) ST_B4(v16u8, __VA_ARGS__) - -/* Description : Store vectors of 8 halfword elements with stride - Arguments : Inputs - in0, in1, pdst, stride - Details : Store 8 halfword elements from 'in0' to (pdst) - Store 8 halfword elements from 'in1' to (pdst + stride) -*/ -#define ST_H2(RTYPE, in0, in1, pdst, stride) \ - { \ - ST_H(RTYPE, in0, (pdst)); \ - ST_H(RTYPE, in1, (pdst) + stride); \ - } -#define ST_UH2(...) ST_H2(v8u16, __VA_ARGS__) - -// TODO(fbarchard): Consider using __msa_vshf_b and __msa_ilvr_b directly. -/* Description : Shuffle byte vector elements as per mask vector - Arguments : Inputs - in0, in1, in2, in3, mask0, mask1 - Outputs - out0, out1 - Return Type - as per RTYPE - Details : Byte elements from 'in0' & 'in1' are copied selectively to - 'out0' as per control vector 'mask0' -*/ -#define VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) \ - { \ - out0 = (RTYPE)__msa_vshf_b((v16i8)mask0, (v16i8)in1, (v16i8)in0); \ - out1 = (RTYPE)__msa_vshf_b((v16i8)mask1, (v16i8)in3, (v16i8)in2); \ - } -#define VSHF_B2_UB(...) VSHF_B2(v16u8, __VA_ARGS__) - -/* Description : Interleave both left and right half of input vectors - Arguments : Inputs - in0, in1 - Outputs - out0, out1 - Return Type - as per RTYPE - Details : Right half of byte elements from 'in0' and 'in1' are - interleaved and written to 'out0' -*/ -#define ILVRL_B2(RTYPE, in0, in1, out0, out1) \ - { \ - out0 = (RTYPE)__msa_ilvr_b((v16i8)in0, (v16i8)in1); \ - out1 = (RTYPE)__msa_ilvl_b((v16i8)in0, (v16i8)in1); \ - } -#define ILVRL_B2_UB(...) ILVRL_B2(v16u8, __VA_ARGS__) - -#endif /* !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) */ - -#endif // INCLUDE_LIBYUV_MACROS_MSA_H_ diff --git a/include/libyuv/rotate_row.h b/include/libyuv/rotate_row.h index c00d83c69..49f8a44bf 100644 --- a/include/libyuv/rotate_row.h +++ b/include/libyuv/rotate_row.h @@ -60,11 +60,6 @@ extern "C" { #define HAS_TRANSPOSEUVWXH_SME #endif -#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) -#define HAS_TRANSPOSEWX16_MSA -#define HAS_TRANSPOSEUVWX16_MSA -#endif - #if !defined(LIBYUV_DISABLE_LSX) && defined(__loongarch_sx) #define HAS_TRANSPOSEWX16_LSX #define HAS_TRANSPOSEUVWX16_LSX @@ -113,11 +108,6 @@ void TransposeWx8_Fast_SSSE3(const uint8_t* src, uint8_t* dst, int dst_stride, int width); -void TransposeWx16_MSA(const uint8_t* src, - int src_stride, - uint8_t* dst, - int dst_stride, - int width); void TransposeWx16_LSX(const uint8_t* src, int src_stride, uint8_t* dst, @@ -144,11 +134,6 @@ void TransposeWx8_Fast_Any_SSSE3(const uint8_t* src, uint8_t* dst, int dst_stride, int width); -void TransposeWx16_Any_MSA(const uint8_t* src, - int src_stride, - uint8_t* dst, - int dst_stride, - int width); void TransposeWx16_Any_LSX(const uint8_t* src, int src_stride, uint8_t* dst, @@ -200,13 +185,6 @@ void TransposeUVWxH_SME(const uint8_t* src, int dst_stride_b, int width, int height); -void TransposeUVWx16_MSA(const uint8_t* src, - int src_stride, - uint8_t* dst_a, - int dst_stride_a, - uint8_t* dst_b, - int dst_stride_b, - int width); void TransposeUVWx16_LSX(const uint8_t* src, int src_stride, uint8_t* dst_a, @@ -229,13 +207,6 @@ void TransposeUVWx8_Any_NEON(const uint8_t* src, uint8_t* dst_b, int dst_stride_b, int width); -void TransposeUVWx16_Any_MSA(const uint8_t* src, - int src_stride, - uint8_t* dst_a, - int dst_stride_a, - uint8_t* dst_b, - int dst_stride_b, - int width); void TransposeUVWx16_Any_LSX(const uint8_t* src, int src_stride, uint8_t* dst_a, diff --git a/include/libyuv/row.h b/include/libyuv/row.h index bf9121a17..d063ad5b7 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -669,91 +669,6 @@ extern "C" { #define HAS_SCALESUMSAMPLES_NEON #endif -#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) -#define HAS_ABGRTOUVJROW_MSA -#define HAS_ABGRTOUVROW_MSA -#define HAS_ABGRTOYROW_MSA -#define HAS_ARGB1555TOARGBROW_MSA -#define HAS_ARGB1555TOUVROW_MSA -#define HAS_ARGB1555TOYROW_MSA -#define HAS_ARGB4444TOARGBROW_MSA -#define HAS_ARGBADDROW_MSA -#define HAS_ARGBATTENUATEROW_MSA -#define HAS_ARGBBLENDROW_MSA -#define HAS_ARGBCOLORMATRIXROW_MSA -#define HAS_ARGBEXTRACTALPHAROW_MSA -#define HAS_ARGBGRAYROW_MSA -#define HAS_ARGBMIRRORROW_MSA -// TODO: Re-enable once rounding behaviour is fixed. -// #define HAS_ARGBMULTIPLYROW_MSA -#define HAS_ARGBQUANTIZEROW_MSA -#define HAS_ARGBSEPIAROW_MSA -#define HAS_ARGBSETROW_MSA -#define HAS_ARGBSHADEROW_MSA -#define HAS_ARGBSHUFFLEROW_MSA -#define HAS_ARGBSUBTRACTROW_MSA -#define HAS_ARGBTOARGB1555ROW_MSA -#define HAS_ARGBTOARGB4444ROW_MSA -#define HAS_ARGBTORAWROW_MSA -#define HAS_ARGBTORGB24ROW_MSA -#define HAS_ARGBTORGB565DITHERROW_MSA -#define HAS_ARGBTORGB565ROW_MSA -#define HAS_ARGBTOUV444ROW_MSA -#define HAS_ARGBTOUVJROW_MSA -#define HAS_ARGBTOUVROW_MSA -#define HAS_ARGBTOYJROW_MSA -#define HAS_ARGBTOYROW_MSA -#define HAS_BGRATOUVROW_MSA -#define HAS_BGRATOYROW_MSA -#define HAS_HALFFLOATROW_MSA -#define HAS_I400TOARGBROW_MSA -#define HAS_I422ALPHATOARGBROW_MSA -#define HAS_I422TOARGB1555ROW_MSA -#define HAS_I422TOARGB4444ROW_MSA -#define HAS_I422TOARGBROW_MSA -#define HAS_I422TORGB24ROW_MSA -#define HAS_I422TORGB565ROW_MSA -#define HAS_I422TORGBAROW_MSA -#define HAS_I422TOUYVYROW_MSA -#define HAS_I422TOYUY2ROW_MSA -#define HAS_I444TOARGBROW_MSA -#define HAS_INTERPOLATEROW_MSA -#define HAS_J400TOARGBROW_MSA -#define HAS_MERGEUVROW_MSA -#define HAS_MIRRORROW_MSA -#define HAS_MIRRORSPLITUVROW_MSA -#define HAS_MIRRORUVROW_MSA -#define HAS_NV12TOARGBROW_MSA -#define HAS_NV12TORGB565ROW_MSA -#define HAS_NV21TOARGBROW_MSA -#define HAS_RAWTOARGBROW_MSA -#define HAS_RAWTORGB24ROW_MSA -#define HAS_RAWTOUVROW_MSA -#define HAS_RAWTOYROW_MSA -#define HAS_RGB24TOARGBROW_MSA -#define HAS_RGB24TOUVROW_MSA -#define HAS_RGB24TOYROW_MSA -#define HAS_RGB565TOARGBROW_MSA -#define HAS_RGB565TOUVROW_MSA -#define HAS_RGB565TOYROW_MSA -#define HAS_RGBATOUVROW_MSA -#define HAS_RGBATOYROW_MSA -#define HAS_SETROW_MSA -#define HAS_SOBELROW_MSA -#define HAS_SOBELTOPLANEROW_MSA -#define HAS_SOBELXROW_MSA -#define HAS_SOBELXYROW_MSA -#define HAS_SOBELYROW_MSA -#define HAS_SPLITUVROW_MSA -#define HAS_UYVYTOARGBROW_MSA -#define HAS_UYVYTOUVROW_MSA -#define HAS_UYVYTOYROW_MSA -#define HAS_YUY2TOARGBROW_MSA -#define HAS_YUY2TOUV422ROW_MSA -#define HAS_YUY2TOUVROW_MSA -#define HAS_YUY2TOYROW_MSA -#endif - #if !defined(LIBYUV_DISABLE_LSX) && defined(__loongarch_sx) #define HAS_ABGRTOUVROW_LSX #define HAS_ABGRTOYROW_LSX @@ -1643,12 +1558,6 @@ void I422ToRGB24Row_RVV(const uint8_t* src_y, uint8_t* dst_rgb24, const struct YuvConstants* yuvconstants, int width); -void I444ToARGBRow_MSA(const uint8_t* src_y, - const uint8_t* src_u, - const uint8_t* src_v, - uint8_t* dst_argb, - const struct YuvConstants* yuvconstants, - int width); void I444ToARGBRow_LSX(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, @@ -1656,12 +1565,6 @@ void I444ToARGBRow_LSX(const uint8_t* src_y, const struct YuvConstants* yuvconstants, int width); -void I422ToARGBRow_MSA(const uint8_t* src_y, - const uint8_t* src_u, - const uint8_t* src_v, - uint8_t* dst_argb, - const struct YuvConstants* yuvconstants, - int width); void I422ToARGBRow_LSX(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, @@ -1674,12 +1577,6 @@ void I422ToARGBRow_LASX(const uint8_t* src_y, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I422ToRGBARow_MSA(const uint8_t* src_y, - const uint8_t* src_u, - const uint8_t* src_v, - uint8_t* dst_argb, - const struct YuvConstants* yuvconstants, - int width); void I422ToRGBARow_LSX(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, @@ -1692,13 +1589,6 @@ void I422ToRGBARow_LASX(const uint8_t* src_y, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I422AlphaToARGBRow_MSA(const uint8_t* src_y, - const uint8_t* src_u, - const uint8_t* src_v, - const uint8_t* src_a, - uint8_t* dst_argb, - const struct YuvConstants* yuvconstants, - int width); void I422AlphaToARGBRow_LSX(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, @@ -1713,12 +1603,6 @@ void I422AlphaToARGBRow_LASX(const uint8_t* src_y, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I422ToRGB24Row_MSA(const uint8_t* src_y, - const uint8_t* src_u, - const uint8_t* src_v, - uint8_t* dst_argb, - const struct YuvConstants* yuvconstants, - int width); void I422ToRGB24Row_LSX(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, @@ -1731,12 +1615,6 @@ void I422ToRGB24Row_LASX(const uint8_t* src_y, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I422ToRGB565Row_MSA(const uint8_t* src_y, - const uint8_t* src_u, - const uint8_t* src_v, - uint8_t* dst_rgb565, - const struct YuvConstants* yuvconstants, - int width); void I422ToRGB565Row_LSX(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, @@ -1749,12 +1627,6 @@ void I422ToRGB565Row_LASX(const uint8_t* src_y, uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width); -void I422ToARGB4444Row_MSA(const uint8_t* src_y, - const uint8_t* src_u, - const uint8_t* src_v, - uint8_t* dst_argb4444, - const struct YuvConstants* yuvconstants, - int width); void I422ToARGB4444Row_LSX(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, @@ -1767,12 +1639,6 @@ void I422ToARGB4444Row_LASX(const uint8_t* src_y, uint8_t* dst_argb4444, const struct YuvConstants* yuvconstants, int width); -void I422ToARGB1555Row_MSA(const uint8_t* src_y, - const uint8_t* src_u, - const uint8_t* src_v, - uint8_t* dst_argb1555, - const struct YuvConstants* yuvconstants, - int width); void I422ToARGB1555Row_LSX(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, @@ -1785,29 +1651,6 @@ void I422ToARGB1555Row_LASX(const uint8_t* src_y, uint8_t* dst_argb1555, const struct YuvConstants* yuvconstants, int width); -void NV12ToARGBRow_MSA(const uint8_t* src_y, - const uint8_t* src_uv, - uint8_t* dst_argb, - const struct YuvConstants* yuvconstants, - int width); -void NV12ToRGB565Row_MSA(const uint8_t* src_y, - const uint8_t* src_uv, - uint8_t* dst_rgb565, - const struct YuvConstants* yuvconstants, - int width); -void NV21ToARGBRow_MSA(const uint8_t* src_y, - const uint8_t* src_vu, - uint8_t* dst_argb, - const struct YuvConstants* yuvconstants, - int width); -void YUY2ToARGBRow_MSA(const uint8_t* src_yuy2, - uint8_t* dst_argb, - const struct YuvConstants* yuvconstants, - int width); -void UYVYToARGBRow_MSA(const uint8_t* src_uyvy, - uint8_t* dst_argb, - const struct YuvConstants* yuvconstants, - int width); void NV12ToARGBRow_LSX(const uint8_t* src_y, const uint8_t* src_uv, @@ -1912,8 +1755,6 @@ void ARGBToYRow_RVV(const uint8_t* src_argb, uint8_t* dst_y, int width); void ARGBToYJRow_RVV(const uint8_t* src_argb, uint8_t* dst_yj, int width); void ABGRToYJRow_RVV(const uint8_t* src_rgba, uint8_t* dst_yj, int width); void RGBAToYJRow_RVV(const uint8_t* src_rgba, uint8_t* dst_yj, int width); -void ARGBToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width); -void ARGBToYJRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width); void ARGBToYRow_LSX(const uint8_t* src_argb0, uint8_t* dst_y, int width); void ARGBToYRow_LASX(const uint8_t* src_argb0, uint8_t* dst_y, int width); void ARGBToYJRow_LSX(const uint8_t* src_argb0, uint8_t* dst_y, int width); @@ -1958,15 +1799,6 @@ void ARGBToUVRow_SME(const uint8_t* src_argb, uint8_t* dst_u, uint8_t* dst_v, int width); -void ARGBToUV444Row_MSA(const uint8_t* src_argb, - uint8_t* dst_u, - uint8_t* dst_v, - int width); -void ARGBToUVRow_MSA(const uint8_t* src_argb, - int src_stride_argb, - uint8_t* dst_u, - uint8_t* dst_v, - int width); void ARGBToUVRow_LSX(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_u, @@ -2120,51 +1952,6 @@ void ARGB4444ToUVRow_NEON(const uint8_t* src_argb4444, uint8_t* dst_u, uint8_t* dst_v, int width); -void ARGBToUVJRow_MSA(const uint8_t* src_rgb, - int src_stride_rgb, - uint8_t* dst_u, - uint8_t* dst_v, - int width); -void ABGRToUVJRow_MSA(const uint8_t* src_rgb, - int src_stride_rgb, - uint8_t* dst_u, - uint8_t* dst_v, - int width); -void BGRAToUVRow_MSA(const uint8_t* src_rgb, - int src_stride_rgb, - uint8_t* dst_u, - uint8_t* dst_v, - int width); -void ABGRToUVRow_MSA(const uint8_t* src_rgb, - int src_stride_rgb, - uint8_t* dst_u, - uint8_t* dst_v, - int width); -void RGBAToUVRow_MSA(const uint8_t* src_rgb, - int src_stride_rgb, - uint8_t* dst_u, - uint8_t* dst_v, - int width); -void RGB24ToUVRow_MSA(const uint8_t* src_rgb, - int src_stride_rgb, - uint8_t* dst_u, - uint8_t* dst_v, - int width); -void RAWToUVRow_MSA(const uint8_t* src_rgb, - int src_stride_rgb, - uint8_t* dst_u, - uint8_t* dst_v, - int width); -void RGB565ToUVRow_MSA(const uint8_t* src_rgb565, - int src_stride_rgb565, - uint8_t* dst_u, - uint8_t* dst_v, - int width); -void ARGB1555ToUVRow_MSA(const uint8_t* src_argb1555, - int src_stride_argb1555, - uint8_t* dst_u, - uint8_t* dst_v, - int width); void BGRAToUVRow_LSX(const uint8_t* src_bgra, int src_stride_bgra, uint8_t* dst_u, @@ -2260,13 +2047,6 @@ void RGB24ToYRow_RVV(const uint8_t* src_rgb24, uint8_t* dst_y, int width); void RGB24ToYJRow_RVV(const uint8_t* src_rgb24, uint8_t* dst_yj, int width); void RAWToYRow_RVV(const uint8_t* src_raw, uint8_t* dst_y, int width); void RAWToYJRow_RVV(const uint8_t* src_raw, uint8_t* dst_yj, int width); -void BGRAToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width); -void ABGRToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width); -void RGBAToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width); -void RGB24ToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width); -void RAWToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width); -void RGB565ToYRow_MSA(const uint8_t* src_rgb565, uint8_t* dst_y, int width); -void ARGB1555ToYRow_MSA(const uint8_t* src_argb1555, uint8_t* dst_y, int width); void BGRAToYRow_LSX(const uint8_t* src_bgra, uint8_t* dst_y, int width); void ABGRToYRow_LSX(const uint8_t* src_abgr, uint8_t* dst_y, int width); @@ -2357,17 +2137,6 @@ void ARGB1555ToYRow_Any_NEON(const uint8_t* src_ptr, void ARGB4444ToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void BGRAToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void ABGRToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void RGBAToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void ARGBToYJRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void ARGBToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void RGB24ToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void RAWToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void RGB565ToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void ARGB1555ToYRow_Any_MSA(const uint8_t* src_ptr, - uint8_t* dst_ptr, - int width); void BGRAToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ABGRToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); @@ -2537,15 +2306,6 @@ void ARGBToUVRow_Any_SME(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); -void ARGBToUV444Row_Any_MSA(const uint8_t* src_ptr, - uint8_t* dst_u, - uint8_t* dst_v, - int width); -void ARGBToUVRow_Any_MSA(const uint8_t* src_ptr, - int src_stride_ptr, - uint8_t* dst_u, - uint8_t* dst_v, - int width); void ARGBToUVRow_Any_LSX(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, @@ -2699,46 +2459,6 @@ void ARGB4444ToUVRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); -void ARGBToUVJRow_Any_MSA(const uint8_t* src_ptr, - int src_stride_ptr, - uint8_t* dst_u, - uint8_t* dst_v, - int width); -void BGRAToUVRow_Any_MSA(const uint8_t* src_ptr, - int src_stride_ptr, - uint8_t* dst_u, - uint8_t* dst_v, - int width); -void ABGRToUVRow_Any_MSA(const uint8_t* src_ptr, - int src_stride_ptr, - uint8_t* dst_u, - uint8_t* dst_v, - int width); -void RGBAToUVRow_Any_MSA(const uint8_t* src_ptr, - int src_stride_ptr, - uint8_t* dst_u, - uint8_t* dst_v, - int width); -void RGB24ToUVRow_Any_MSA(const uint8_t* src_ptr, - int src_stride_ptr, - uint8_t* dst_u, - uint8_t* dst_v, - int width); -void RAWToUVRow_Any_MSA(const uint8_t* src_ptr, - int src_stride_ptr, - uint8_t* dst_u, - uint8_t* dst_v, - int width); -void RGB565ToUVRow_Any_MSA(const uint8_t* src_ptr, - int src_stride_ptr, - uint8_t* dst_u, - uint8_t* dst_v, - int width); -void ARGB1555ToUVRow_Any_MSA(const uint8_t* src_ptr, - int src_stride_ptr, - uint8_t* dst_u, - uint8_t* dst_v, - int width); void ABGRToUVRow_Any_LSX(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, @@ -2929,7 +2649,6 @@ void ARGBToUVJ444Row_C(const uint8_t* src_argb, void MirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width); void MirrorRow_SSSE3(const uint8_t* src, uint8_t* dst, int width); void MirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width); -void MirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width); void MirrorRow_LSX(const uint8_t* src, uint8_t* dst, int width); void MirrorRow_LASX(const uint8_t* src, uint8_t* dst, int width); void MirrorRow_C(const uint8_t* src, uint8_t* dst, int width); @@ -2937,20 +2656,17 @@ void MirrorRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void MirrorRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void MirrorRow_Any_SSE2(const uint8_t* src, uint8_t* dst, int width); void MirrorRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void MirrorRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void MirrorRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void MirrorRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void MirrorUVRow_AVX2(const uint8_t* src_uv, uint8_t* dst_uv, int width); void MirrorUVRow_SSSE3(const uint8_t* src_uv, uint8_t* dst_uv, int width); void MirrorUVRow_NEON(const uint8_t* src_uv, uint8_t* dst_uv, int width); -void MirrorUVRow_MSA(const uint8_t* src_uv, uint8_t* dst_uv, int width); void MirrorUVRow_LSX(const uint8_t* src_uv, uint8_t* dst_uv, int width); void MirrorUVRow_LASX(const uint8_t* src_uv, uint8_t* dst_uv, int width); void MirrorUVRow_C(const uint8_t* src_uv, uint8_t* dst_uv, int width); void MirrorUVRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void MirrorUVRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void MirrorUVRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void MirrorUVRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void MirrorUVRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void MirrorUVRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); @@ -2962,10 +2678,6 @@ void MirrorSplitUVRow_NEON(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, int width); -void MirrorSplitUVRow_MSA(const uint8_t* src_uv, - uint8_t* dst_u, - uint8_t* dst_v, - int width); void MirrorSplitUVRow_LSX(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, @@ -2980,7 +2692,6 @@ void MirrorRow_16_C(const uint16_t* src, uint16_t* dst, int width); void ARGBMirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width); void ARGBMirrorRow_SSE2(const uint8_t* src, uint8_t* dst, int width); void ARGBMirrorRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width); -void ARGBMirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width); void ARGBMirrorRow_LSX(const uint8_t* src, uint8_t* dst, int width); void ARGBMirrorRow_LASX(const uint8_t* src, uint8_t* dst, int width); void ARGBMirrorRow_C(const uint8_t* src, uint8_t* dst, int width); @@ -2993,7 +2704,6 @@ void ARGBMirrorRow_Any_SSE2(const uint8_t* src_ptr, void ARGBMirrorRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void ARGBMirrorRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBMirrorRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBMirrorRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, @@ -3029,10 +2739,6 @@ void SplitUVRow_NEON(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, int width); -void SplitUVRow_MSA(const uint8_t* src_uv, - uint8_t* dst_u, - uint8_t* dst_v, - int width); void SplitUVRow_LSX(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, @@ -3053,10 +2759,6 @@ void SplitUVRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); -void SplitUVRow_Any_MSA(const uint8_t* src_ptr, - uint8_t* dst_u, - uint8_t* dst_v, - int width); void SplitUVRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, @@ -3198,10 +2900,6 @@ void MergeUVRow_SME(const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uv, int width); -void MergeUVRow_MSA(const uint8_t* src_u, - const uint8_t* src_v, - uint8_t* dst_uv, - int width); void MergeUVRow_LSX(const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uv, @@ -3226,10 +2924,6 @@ void MergeUVRow_Any_NEON(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, int width); -void MergeUVRow_Any_MSA(const uint8_t* y_buf, - const uint8_t* uv_buf, - uint8_t* dst_ptr, - int width); void MergeUVRow_Any_LSX(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, @@ -3939,7 +3633,6 @@ void CopyRow_AVX512BW(const uint8_t* src, uint8_t* dst, int width); void CopyRow_ERMS(const uint8_t* src, uint8_t* dst, int width); void CopyRow_NEON(const uint8_t* src, uint8_t* dst, int width); void CopyRow_SME(const uint8_t* src, uint8_t* dst, int width); -void CopyRow_MIPS(const uint8_t* src, uint8_t* dst, int count); void CopyRow_RVV(const uint8_t* src, uint8_t* dst, int count); void CopyRow_C(const uint8_t* src, uint8_t* dst, int count); void CopyRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); @@ -3969,9 +3662,6 @@ void ARGBExtractAlphaRow_AVX2(const uint8_t* src_argb, void ARGBExtractAlphaRow_NEON(const uint8_t* src_argb, uint8_t* dst_a, int width); -void ARGBExtractAlphaRow_MSA(const uint8_t* src_argb, - uint8_t* dst_a, - int width); void ARGBExtractAlphaRow_LSX(const uint8_t* src_argb, uint8_t* dst_a, int width); @@ -3987,9 +3677,6 @@ void ARGBExtractAlphaRow_Any_AVX2(const uint8_t* src_ptr, void ARGBExtractAlphaRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void ARGBExtractAlphaRow_Any_MSA(const uint8_t* src_ptr, - uint8_t* dst_ptr, - int width); void ARGBExtractAlphaRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); @@ -4006,7 +3693,6 @@ void ARGBCopyYToAlphaRow_Any_AVX2(const uint8_t* src_ptr, int width); void SetRow_C(uint8_t* dst, uint8_t v8, int width); -void SetRow_MSA(uint8_t* dst, uint8_t v8, int width); void SetRow_X86(uint8_t* dst, uint8_t v8, int width); void SetRow_ERMS(uint8_t* dst, uint8_t v8, int width); void SetRow_NEON(uint8_t* dst, uint8_t v8, int width); @@ -4019,8 +3705,6 @@ void ARGBSetRow_C(uint8_t* dst_argb, uint32_t v32, int width); void ARGBSetRow_X86(uint8_t* dst_argb, uint32_t v32, int width); void ARGBSetRow_NEON(uint8_t* dst, uint32_t v32, int width); void ARGBSetRow_Any_NEON(uint8_t* dst_ptr, uint32_t v32, int width); -void ARGBSetRow_MSA(uint8_t* dst_argb, uint32_t v32, int width); -void ARGBSetRow_Any_MSA(uint8_t* dst_ptr, uint32_t v32, int width); void ARGBSetRow_LSX(uint8_t* dst_argb, uint32_t v32, int width); void ARGBSetRow_Any_LSX(uint8_t* dst_ptr, uint32_t v32, int width); @@ -4041,10 +3725,6 @@ void ARGBShuffleRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, const uint8_t* shuffler, int width); -void ARGBShuffleRow_MSA(const uint8_t* src_argb, - uint8_t* dst_argb, - const uint8_t* shuffler, - int width); void ARGBShuffleRow_LSX(const uint8_t* src_argb, uint8_t* dst_argb, const uint8_t* shuffler, @@ -4065,10 +3745,6 @@ void ARGBShuffleRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, const uint8_t* param, int width); -void ARGBShuffleRow_Any_MSA(const uint8_t* src_ptr, - uint8_t* dst_ptr, - const uint8_t* param, - int width); void ARGBShuffleRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, const uint8_t* param, @@ -4104,7 +3780,6 @@ void RGB24ToARGBRow_NEON(const uint8_t* src_rgb24, void RGB24ToARGBRow_SVE2(const uint8_t* src_rgb24, uint8_t* dst_argb, int width); -void RGB24ToARGBRow_MSA(const uint8_t* src_rgb24, uint8_t* dst_argb, int width); void RGB24ToARGBRow_LSX(const uint8_t* src_rgb24, uint8_t* dst_argb, int width); void RGB24ToARGBRow_LASX(const uint8_t* src_rgb24, uint8_t* dst_argb, @@ -4114,22 +3789,17 @@ void RAWToARGBRow_NEON(const uint8_t* src_raw, uint8_t* dst_argb, int width); void RAWToARGBRow_SVE2(const uint8_t* src_raw, uint8_t* dst_argb, int width); void RAWToRGBARow_NEON(const uint8_t* src_raw, uint8_t* dst_rgba, int width); void RAWToRGBARow_SVE2(const uint8_t* src_raw, uint8_t* dst_rgba, int width); -void RAWToARGBRow_MSA(const uint8_t* src_raw, uint8_t* dst_argb, int width); void RAWToARGBRow_LSX(const uint8_t* src_raw, uint8_t* dst_argb, int width); void RAWToARGBRow_LASX(const uint8_t* src_raw, uint8_t* dst_argb, int width); void RAWToARGBRow_RVV(const uint8_t* src_raw, uint8_t* dst_argb, int width); void RAWToRGBARow_RVV(const uint8_t* src_raw, uint8_t* dst_rgba, int width); void RAWToRGB24Row_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width); void RAWToRGB24Row_SVE2(const uint8_t* src_raw, uint8_t* dst_rgb24, int width); -void RAWToRGB24Row_MSA(const uint8_t* src_raw, uint8_t* dst_rgb24, int width); void RAWToRGB24Row_LSX(const uint8_t* src_raw, uint8_t* dst_rgb24, int width); void RAWToRGB24Row_RVV(const uint8_t* src_raw, uint8_t* dst_rgb24, int width); void RGB565ToARGBRow_NEON(const uint8_t* src_rgb565, uint8_t* dst_argb, int width); -void RGB565ToARGBRow_MSA(const uint8_t* src_rgb565, - uint8_t* dst_argb, - int width); void RGB565ToARGBRow_LSX(const uint8_t* src_rgb565, uint8_t* dst_argb, int width); @@ -4142,9 +3812,6 @@ void ARGB1555ToARGBRow_NEON(const uint8_t* src_argb1555, void ARGB1555ToARGBRow_SVE2(const uint8_t* src_argb1555, uint8_t* dst_argb, int width); -void ARGB1555ToARGBRow_MSA(const uint8_t* src_argb1555, - uint8_t* dst_argb, - int width); void ARGB1555ToARGBRow_LSX(const uint8_t* src_argb1555, uint8_t* dst_argb, int width); @@ -4154,9 +3821,6 @@ void ARGB1555ToARGBRow_LASX(const uint8_t* src_argb1555, void ARGB4444ToARGBRow_NEON(const uint8_t* src_argb4444, uint8_t* dst_argb, int width); -void ARGB4444ToARGBRow_MSA(const uint8_t* src_argb4444, - uint8_t* dst_argb, - int width); void ARGB4444ToARGBRow_LSX(const uint8_t* src_argb4444, uint8_t* dst_argb, int width); @@ -4215,9 +3879,6 @@ void ARGB4444ToARGBRow_Any_AVX2(const uint8_t* src_ptr, void RGB24ToARGBRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void RGB24ToARGBRow_Any_MSA(const uint8_t* src_ptr, - uint8_t* dst_ptr, - int width); void RGB24ToARGBRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); @@ -4226,20 +3887,15 @@ void RGB24ToARGBRow_Any_LASX(const uint8_t* src_ptr, int width); void RAWToARGBRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RAWToRGBARow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void RAWToARGBRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RAWToARGBRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RAWToARGBRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RAWToRGB24Row_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void RAWToRGB24Row_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RAWToRGB24Row_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RGB565ToARGBRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void RGB565ToARGBRow_Any_MSA(const uint8_t* src_ptr, - uint8_t* dst_ptr, - int width); void RGB565ToARGBRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); @@ -4249,9 +3905,6 @@ void RGB565ToARGBRow_Any_LASX(const uint8_t* src_ptr, void ARGB1555ToARGBRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void ARGB1555ToARGBRow_Any_MSA(const uint8_t* src_ptr, - uint8_t* dst_ptr, - int width); void ARGB4444ToARGBRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); @@ -4262,9 +3915,6 @@ void ARGB1555ToARGBRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void ARGB4444ToARGBRow_Any_MSA(const uint8_t* src_ptr, - uint8_t* dst_ptr, - int width); void ARGB4444ToARGBRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); @@ -4336,19 +3986,6 @@ void ARGBToRGB565DitherRow_SVE2(const uint8_t* src_argb, uint8_t* dst_rgb, uint32_t dither4, int width); -void ARGBToRGB24Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width); -void ARGBToRAWRow_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width); -void ARGBToRGB565Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width); -void ARGBToARGB1555Row_MSA(const uint8_t* src_argb, - uint8_t* dst_rgb, - int width); -void ARGBToARGB4444Row_MSA(const uint8_t* src_argb, - uint8_t* dst_rgb, - int width); -void ARGBToRGB565DitherRow_MSA(const uint8_t* src_argb, - uint8_t* dst_rgb, - uint32_t dither4, - int width); void ARGBToRGB565DitherRow_LSX(const uint8_t* src_argb, uint8_t* dst_rgb, uint32_t dither4, @@ -4470,7 +4107,6 @@ void AB64ToARGBRow_Any_NEON(const uint16_t* src_ptr, void J400ToARGBRow_SSE2(const uint8_t* src_y, uint8_t* dst_argb, int width); void J400ToARGBRow_AVX2(const uint8_t* src_y, uint8_t* dst_argb, int width); void J400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width); -void J400ToARGBRow_MSA(const uint8_t* src_y, uint8_t* dst_argb, int width); void J400ToARGBRow_LSX(const uint8_t* src_y, uint8_t* dst_argb, int width); void J400ToARGBRow_RVV(const uint8_t* src_y, uint8_t* dst_argb, int width); void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width); @@ -4483,7 +4119,6 @@ void J400ToARGBRow_Any_AVX2(const uint8_t* src_ptr, void J400ToARGBRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void J400ToARGBRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void J400ToARGBRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void I444ToARGBRow_C(const uint8_t* src_y, @@ -5402,10 +5037,6 @@ void I400ToARGBRow_SME(const uint8_t* src_y, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I400ToARGBRow_MSA(const uint8_t* src_y, - uint8_t* dst_argb, - const struct YuvConstants* yuvconstants, - int width); void I400ToARGBRow_LSX(const uint8_t* src_y, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, @@ -5426,10 +5057,6 @@ void I400ToARGBRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, const struct YuvConstants* param, int width); -void I400ToARGBRow_Any_MSA(const uint8_t* src_ptr, - uint8_t* dst_ptr, - const struct YuvConstants* yuvconstants, - int width); void I400ToARGBRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, @@ -5444,10 +5071,6 @@ void ARGBBlendRow_NEON(const uint8_t* src_argb, const uint8_t* src_argb1, uint8_t* dst_argb, int width); -void ARGBBlendRow_MSA(const uint8_t* src_argb0, - const uint8_t* src_argb1, - uint8_t* dst_argb, - int width); void ARGBBlendRow_LSX(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, @@ -5527,14 +5150,6 @@ void ARGBMultiplyRow_SME(const uint8_t* src_argb, const uint8_t* src_argb1, uint8_t* dst_argb, int width); -void ARGBMultiplyRow_MSA(const uint8_t* src_argb0, - const uint8_t* src_argb1, - uint8_t* dst_argb, - int width); -void ARGBMultiplyRow_Any_MSA(const uint8_t* y_buf, - const uint8_t* uv_buf, - uint8_t* dst_ptr, - int width); void ARGBMultiplyRow_LSX(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, @@ -5581,14 +5196,6 @@ void ARGBAddRow_Any_NEON(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, int width); -void ARGBAddRow_MSA(const uint8_t* src_argb0, - const uint8_t* src_argb1, - uint8_t* dst_argb, - int width); -void ARGBAddRow_Any_MSA(const uint8_t* y_buf, - const uint8_t* uv_buf, - uint8_t* dst_ptr, - int width); void ARGBAddRow_LSX(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, @@ -5636,14 +5243,6 @@ void ARGBSubtractRow_Any_NEON(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, int width); -void ARGBSubtractRow_MSA(const uint8_t* src_argb0, - const uint8_t* src_argb1, - uint8_t* dst_argb, - int width); -void ARGBSubtractRow_Any_MSA(const uint8_t* y_buf, - const uint8_t* uv_buf, - uint8_t* dst_ptr, - int width); void ARGBSubtractRow_LSX(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, @@ -5731,23 +5330,6 @@ void ARGBToRGB565DitherRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, const uint32_t param, int width); -void ARGBToRGB24Row_Any_MSA(const uint8_t* src_ptr, - uint8_t* dst_ptr, - int width); -void ARGBToRAWRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void ARGBToRGB565Row_Any_MSA(const uint8_t* src_ptr, - uint8_t* dst_ptr, - int width); -void ARGBToARGB1555Row_Any_MSA(const uint8_t* src_ptr, - uint8_t* dst_ptr, - int width); -void ARGBToARGB4444Row_Any_MSA(const uint8_t* src_ptr, - uint8_t* dst_ptr, - int width); -void ARGBToRGB565DitherRow_Any_MSA(const uint8_t* src_ptr, - uint8_t* dst_ptr, - const uint32_t param, - int width); void ARGBToRGB565DitherRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, const uint32_t param, @@ -6026,24 +5608,12 @@ void P410ToAR30Row_Any_NEON(const uint16_t* y_buf, uint8_t* dst_ar30, const struct YuvConstants* yuvconstants, int width); -void I444ToARGBRow_Any_MSA(const uint8_t* y_buf, - const uint8_t* u_buf, - const uint8_t* v_buf, - uint8_t* dst_ptr, - const struct YuvConstants* yuvconstants, - int width); void I444ToARGBRow_Any_LSX(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToARGBRow_Any_MSA(const uint8_t* y_buf, - const uint8_t* u_buf, - const uint8_t* v_buf, - uint8_t* dst_ptr, - const struct YuvConstants* yuvconstants, - int width); void I422ToARGBRow_Any_LSX(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, @@ -6056,12 +5626,6 @@ void I422ToARGBRow_Any_LASX(const uint8_t* y_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToRGBARow_Any_MSA(const uint8_t* y_buf, - const uint8_t* u_buf, - const uint8_t* v_buf, - uint8_t* dst_ptr, - const struct YuvConstants* yuvconstants, - int width); void I422ToRGBARow_Any_LSX(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, @@ -6074,13 +5638,6 @@ void I422ToRGBARow_Any_LASX(const uint8_t* y_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422AlphaToARGBRow_Any_MSA(const uint8_t* y_buf, - const uint8_t* u_buf, - const uint8_t* v_buf, - const uint8_t* a_buf, - uint8_t* dst_ptr, - const struct YuvConstants* yuvconstants, - int width); void I422AlphaToARGBRow_Any_LSX(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, @@ -6095,12 +5652,6 @@ void I422AlphaToARGBRow_Any_LASX(const uint8_t* y_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToRGB24Row_Any_MSA(const uint8_t* y_buf, - const uint8_t* u_buf, - const uint8_t* v_buf, - uint8_t* dst_ptr, - const struct YuvConstants* yuvconstants, - int width); void I422ToRGB24Row_Any_LSX(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, @@ -6113,12 +5664,6 @@ void I422ToRGB24Row_Any_LASX(const uint8_t* y_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToRGB565Row_Any_MSA(const uint8_t* y_buf, - const uint8_t* u_buf, - const uint8_t* v_buf, - uint8_t* dst_ptr, - const struct YuvConstants* yuvconstants, - int width); void I422ToRGB565Row_Any_LSX(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, @@ -6131,12 +5676,6 @@ void I422ToRGB565Row_Any_LASX(const uint8_t* y_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToARGB4444Row_Any_MSA(const uint8_t* y_buf, - const uint8_t* u_buf, - const uint8_t* v_buf, - uint8_t* dst_ptr, - const struct YuvConstants* yuvconstants, - int width); void I422ToARGB4444Row_Any_LSX(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, @@ -6149,12 +5688,6 @@ void I422ToARGB4444Row_Any_LASX(const uint8_t* y_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToARGB1555Row_Any_MSA(const uint8_t* y_buf, - const uint8_t* u_buf, - const uint8_t* v_buf, - uint8_t* dst_ptr, - const struct YuvConstants* yuvconstants, - int width); void I422ToARGB1555Row_Any_LSX(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, @@ -6167,29 +5700,6 @@ void I422ToARGB1555Row_Any_LASX(const uint8_t* y_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void NV12ToARGBRow_Any_MSA(const uint8_t* y_buf, - const uint8_t* uv_buf, - uint8_t* dst_ptr, - const struct YuvConstants* yuvconstants, - int width); -void NV12ToRGB565Row_Any_MSA(const uint8_t* y_buf, - const uint8_t* uv_buf, - uint8_t* dst_ptr, - const struct YuvConstants* yuvconstants, - int width); -void NV21ToARGBRow_Any_MSA(const uint8_t* y_buf, - const uint8_t* uv_buf, - uint8_t* dst_ptr, - const struct YuvConstants* yuvconstants, - int width); -void YUY2ToARGBRow_Any_MSA(const uint8_t* src_ptr, - uint8_t* dst_ptr, - const struct YuvConstants* yuvconstants, - int width); -void UYVYToARGBRow_Any_MSA(const uint8_t* src_ptr, - uint8_t* dst_ptr, - const struct YuvConstants* yuvconstants, - int width); void NV12ToARGBRow_Any_LSX(const uint8_t* y_buf, const uint8_t* uv_buf, @@ -6272,14 +5782,8 @@ void YUY2ToUV422Row_NEON(const uint8_t* src_yuy2, uint8_t* dst_u, uint8_t* dst_v, int width); -void YUY2ToYRow_MSA(const uint8_t* src_yuy2, uint8_t* dst_y, int width); void YUY2ToYRow_LSX(const uint8_t* src_yuy2, uint8_t* dst_y, int width); void YUY2ToYRow_LASX(const uint8_t* src_yuy2, uint8_t* dst_y, int width); -void YUY2ToUVRow_MSA(const uint8_t* src_yuy2, - int src_stride_yuy2, - uint8_t* dst_u, - uint8_t* dst_v, - int width); void YUY2ToUVRow_LSX(const uint8_t* src_yuy2, int src_stride_yuy2, uint8_t* dst_u, @@ -6290,10 +5794,6 @@ void YUY2ToUVRow_LASX(const uint8_t* src_yuy2, uint8_t* dst_u, uint8_t* dst_v, int width); -void YUY2ToUV422Row_MSA(const uint8_t* src_yuy2, - uint8_t* dst_u, - uint8_t* dst_v, - int width); void YUY2ToUV422Row_LSX(const uint8_t* src_yuy2, uint8_t* dst_u, uint8_t* dst_v, @@ -6358,14 +5858,8 @@ void YUY2ToUV422Row_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); -void YUY2ToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void YUY2ToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void YUY2ToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void YUY2ToUVRow_Any_MSA(const uint8_t* src_ptr, - int src_stride_ptr, - uint8_t* dst_u, - uint8_t* dst_v, - int width); void YUY2ToUVRow_Any_LSX(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, @@ -6376,10 +5870,6 @@ void YUY2ToUVRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); -void YUY2ToUV422Row_Any_MSA(const uint8_t* src_ptr, - uint8_t* dst_u, - uint8_t* dst_v, - int width); void YUY2ToUV422Row_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, @@ -6428,14 +5918,8 @@ void UYVYToUV422Row_NEON(const uint8_t* src_uyvy, uint8_t* dst_u, uint8_t* dst_v, int width); -void UYVYToYRow_MSA(const uint8_t* src_uyvy, uint8_t* dst_y, int width); void UYVYToYRow_LSX(const uint8_t* src_uyvy, uint8_t* dst_y, int width); void UYVYToYRow_LASX(const uint8_t* src_uyvy, uint8_t* dst_y, int width); -void UYVYToUVRow_MSA(const uint8_t* src_uyvy, - int src_stride_uyvy, - uint8_t* dst_u, - uint8_t* dst_v, - int width); void UYVYToUVRow_LSX(const uint8_t* src_uyvy, int src_stride_uyvy, uint8_t* dst_u, @@ -6446,10 +5930,6 @@ void UYVYToUVRow_LASX(const uint8_t* src_uyvy, uint8_t* dst_u, uint8_t* dst_v, int width); -void UYVYToUV422Row_MSA(const uint8_t* src_uyvy, - uint8_t* dst_u, - uint8_t* dst_v, - int width); void UYVYToUV422Row_LSX(const uint8_t* src_uyvy, uint8_t* dst_u, uint8_t* dst_v, @@ -6499,14 +5979,8 @@ void UYVYToUV422Row_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); -void UYVYToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void UYVYToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void UYVYToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void UYVYToUVRow_Any_MSA(const uint8_t* src_ptr, - int src_stride_ptr, - uint8_t* dst_u, - uint8_t* dst_v, - int width); void UYVYToUVRow_Any_LSX(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, @@ -6517,10 +5991,6 @@ void UYVYToUVRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); -void UYVYToUV422Row_Any_MSA(const uint8_t* src_ptr, - uint8_t* dst_u, - uint8_t* dst_v, - int width); void UYVYToUV422Row_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, @@ -6650,11 +6120,6 @@ void I422ToUYVYRow_Any_NEON(const uint8_t* y_buf, const uint8_t* v_buf, uint8_t* dst_ptr, int width); -void I422ToYUY2Row_MSA(const uint8_t* src_y, - const uint8_t* src_u, - const uint8_t* src_v, - uint8_t* dst_yuy2, - int width); void I422ToYUY2Row_LSX(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, @@ -6665,11 +6130,6 @@ void I422ToYUY2Row_LASX(const uint8_t* src_y, const uint8_t* src_v, uint8_t* dst_yuy2, int width); -void I422ToUYVYRow_MSA(const uint8_t* src_y, - const uint8_t* src_u, - const uint8_t* src_v, - uint8_t* dst_uyvy, - int width); void I422ToUYVYRow_LSX(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, @@ -6680,11 +6140,6 @@ void I422ToUYVYRow_LASX(const uint8_t* src_y, const uint8_t* src_v, uint8_t* dst_uyvy, int width); -void I422ToYUY2Row_Any_MSA(const uint8_t* y_buf, - const uint8_t* u_buf, - const uint8_t* v_buf, - uint8_t* dst_ptr, - int width); void I422ToYUY2Row_Any_LSX(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, @@ -6695,11 +6150,6 @@ void I422ToYUY2Row_Any_LASX(const uint8_t* y_buf, const uint8_t* v_buf, uint8_t* dst_ptr, int width); -void I422ToUYVYRow_Any_MSA(const uint8_t* y_buf, - const uint8_t* u_buf, - const uint8_t* v_buf, - uint8_t* dst_ptr, - int width); void I422ToUYVYRow_Any_LSX(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, @@ -6722,9 +6172,6 @@ void ARGBAttenuateRow_AVX2(const uint8_t* src_argb, void ARGBAttenuateRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width); -void ARGBAttenuateRow_MSA(const uint8_t* src_argb, - uint8_t* dst_argb, - int width); void ARGBAttenuateRow_LSX(const uint8_t* src_argb, uint8_t* dst_argb, int width); @@ -6743,9 +6190,6 @@ void ARGBAttenuateRow_Any_AVX2(const uint8_t* src_ptr, void ARGBAttenuateRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void ARGBAttenuateRow_Any_MSA(const uint8_t* src_ptr, - uint8_t* dst_ptr, - int width); void ARGBAttenuateRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); @@ -6777,7 +6221,6 @@ void ARGBGrayRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width); void ARGBGrayRow_NEON_DotProd(const uint8_t* src_argb, uint8_t* dst_argb, int width); -void ARGBGrayRow_MSA(const uint8_t* src_argb, uint8_t* dst_argb, int width); void ARGBGrayRow_LSX(const uint8_t* src_argb, uint8_t* dst_argb, int width); void ARGBGrayRow_LASX(const uint8_t* src_argb, uint8_t* dst_argb, int width); @@ -6785,7 +6228,6 @@ void ARGBSepiaRow_C(uint8_t* dst_argb, int width); void ARGBSepiaRow_SSSE3(uint8_t* dst_argb, int width); void ARGBSepiaRow_NEON(uint8_t* dst_argb, int width); void ARGBSepiaRow_NEON_DotProd(uint8_t* dst_argb, int width); -void ARGBSepiaRow_MSA(uint8_t* dst_argb, int width); void ARGBSepiaRow_LSX(uint8_t* dst_argb, int width); void ARGBSepiaRow_LASX(uint8_t* dst_argb, int width); @@ -6805,10 +6247,6 @@ void ARGBColorMatrixRow_NEON_I8MM(const uint8_t* src_argb, uint8_t* dst_argb, const int8_t* matrix_argb, int width); -void ARGBColorMatrixRow_MSA(const uint8_t* src_argb, - uint8_t* dst_argb, - const int8_t* matrix_argb, - int width); void ARGBColorMatrixRow_LSX(const uint8_t* src_argb, uint8_t* dst_argb, const int8_t* matrix_argb, @@ -6843,11 +6281,6 @@ void ARGBQuantizeRow_NEON(uint8_t* dst_argb, int interval_size, int interval_offset, int width); -void ARGBQuantizeRow_MSA(uint8_t* dst_argb, - int scale, - int interval_size, - int interval_offset, - int width); void ARGBQuantizeRow_LSX(uint8_t* dst_argb, int scale, int interval_size, @@ -6866,10 +6299,6 @@ void ARGBShadeRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width, uint32_t value); -void ARGBShadeRow_MSA(const uint8_t* src_argb, - uint8_t* dst_argb, - int width, - uint32_t value); void ARGBShadeRow_LSX(const uint8_t* src_argb, uint8_t* dst_argb, int width, @@ -6941,11 +6370,6 @@ void InterpolateRow_SME(uint8_t* dst_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction); -void InterpolateRow_MSA(uint8_t* dst_ptr, - const uint8_t* src_ptr, - ptrdiff_t src_stride, - int width, - int source_y_fraction); void InterpolateRow_LSX(uint8_t* dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride, @@ -6971,11 +6395,6 @@ void InterpolateRow_Any_AVX2(uint8_t* dst_ptr, ptrdiff_t src_stride_ptr, int width, int source_y_fraction); -void InterpolateRow_Any_MSA(uint8_t* dst_ptr, - const uint8_t* src_ptr, - ptrdiff_t src_stride_ptr, - int width, - int source_y_fraction); void InterpolateRow_Any_LSX(uint8_t* dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride_ptr, @@ -7056,11 +6475,6 @@ void SobelXRow_NEON(const uint8_t* src_y0, const uint8_t* src_y2, uint8_t* dst_sobelx, int width); -void SobelXRow_MSA(const uint8_t* src_y0, - const uint8_t* src_y1, - const uint8_t* src_y2, - uint8_t* dst_sobelx, - int width); void SobelYRow_C(const uint8_t* src_y0, const uint8_t* src_y1, uint8_t* dst_sobely, @@ -7073,10 +6487,6 @@ void SobelYRow_NEON(const uint8_t* src_y0, const uint8_t* src_y1, uint8_t* dst_sobely, int width); -void SobelYRow_MSA(const uint8_t* src_y0, - const uint8_t* src_y1, - uint8_t* dst_sobely, - int width); void SobelRow_C(const uint8_t* src_sobelx, const uint8_t* src_sobely, uint8_t* dst_argb, @@ -7089,10 +6499,6 @@ void SobelRow_NEON(const uint8_t* src_sobelx, const uint8_t* src_sobely, uint8_t* dst_argb, int width); -void SobelRow_MSA(const uint8_t* src_sobelx, - const uint8_t* src_sobely, - uint8_t* dst_argb, - int width); void SobelRow_LSX(const uint8_t* src_sobelx, const uint8_t* src_sobely, uint8_t* dst_argb, @@ -7109,10 +6515,6 @@ void SobelToPlaneRow_NEON(const uint8_t* src_sobelx, const uint8_t* src_sobely, uint8_t* dst_y, int width); -void SobelToPlaneRow_MSA(const uint8_t* src_sobelx, - const uint8_t* src_sobely, - uint8_t* dst_y, - int width); void SobelToPlaneRow_LSX(const uint8_t* src_sobelx, const uint8_t* src_sobely, uint8_t* dst_y, @@ -7129,10 +6531,6 @@ void SobelXYRow_NEON(const uint8_t* src_sobelx, const uint8_t* src_sobely, uint8_t* dst_argb, int width); -void SobelXYRow_MSA(const uint8_t* src_sobelx, - const uint8_t* src_sobely, - uint8_t* dst_argb, - int width); void SobelXYRow_LSX(const uint8_t* src_sobelx, const uint8_t* src_sobely, uint8_t* dst_argb, @@ -7145,10 +6543,6 @@ void SobelRow_Any_NEON(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, int width); -void SobelRow_Any_MSA(const uint8_t* y_buf, - const uint8_t* uv_buf, - uint8_t* dst_ptr, - int width); void SobelRow_Any_LSX(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, @@ -7161,10 +6555,6 @@ void SobelToPlaneRow_Any_NEON(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, int width); -void SobelToPlaneRow_Any_MSA(const uint8_t* y_buf, - const uint8_t* uv_buf, - uint8_t* dst_ptr, - int width); void SobelToPlaneRow_Any_LSX(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, @@ -7177,10 +6567,6 @@ void SobelXYRow_Any_NEON(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, int width); -void SobelXYRow_Any_MSA(const uint8_t* y_buf, - const uint8_t* uv_buf, - uint8_t* dst_ptr, - int width); void SobelXYRow_Any_LSX(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, @@ -7249,14 +6635,6 @@ void HalfFloat1Row_SVE2(const uint16_t* src, uint16_t* dst, float scale, int width); -void HalfFloatRow_MSA(const uint16_t* src, - uint16_t* dst, - float scale, - int width); -void HalfFloatRow_Any_MSA(const uint16_t* src_ptr, - uint16_t* dst_ptr, - float param, - int width); void HalfFloatRow_LSX(const uint16_t* src, uint16_t* dst, float scale, diff --git a/include/libyuv/scale_row.h b/include/libyuv/scale_row.h index eef68eb4b..86efcbfca 100644 --- a/include/libyuv/scale_row.h +++ b/include/libyuv/scale_row.h @@ -136,19 +136,6 @@ extern "C" { #define HAS_SCALEUVROWDOWN2_SME #endif -#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) -#define HAS_SCALEADDROW_MSA -#define HAS_SCALEARGBCOLS_MSA -#define HAS_SCALEARGBFILTERCOLS_MSA -#define HAS_SCALEARGBROWDOWN2_MSA -#define HAS_SCALEARGBROWDOWNEVEN_MSA -#define HAS_SCALEFILTERCOLS_MSA -#define HAS_SCALEROWDOWN2_MSA -#define HAS_SCALEROWDOWN34_MSA -#define HAS_SCALEROWDOWN38_MSA -#define HAS_SCALEROWDOWN4_MSA -#endif - #if !defined(LIBYUV_DISABLE_LSX) && defined(__loongarch_sx) #define HAS_SCALEARGBROWDOWN2_LSX #define HAS_SCALEARGBROWDOWNEVEN_LSX @@ -269,17 +256,12 @@ enum FilterMode ScaleFilterReduce(int src_width, // Divide num by div and return as 16.16 fixed point result. int FixedDiv_C(int num, int div); int FixedDiv_X86(int num, int div); -int FixedDiv_MIPS(int num, int div); // Divide num - 1 by div - 1 and return as 16.16 fixed point result. int FixedDiv1_C(int num, int div); int FixedDiv1_X86(int num, int div); -int FixedDiv1_MIPS(int num, int div); #ifdef HAS_FIXEDDIV_X86 #define FixedDiv FixedDiv_X86 #define FixedDiv1 FixedDiv1_X86 -#elif defined HAS_FIXEDDIV_MIPS -#define FixedDiv FixedDiv_MIPS -#define FixedDiv1 FixedDiv1_MIPS #else #define FixedDiv FixedDiv_C #define FixedDiv1 FixedDiv1_C @@ -935,26 +917,6 @@ void ScaleARGBCols_Any_NEON(uint8_t* dst_ptr, int dst_width, int x, int dx); -void ScaleARGBFilterCols_MSA(uint8_t* dst_argb, - const uint8_t* src_argb, - int dst_width, - int x, - int dx); -void ScaleARGBCols_MSA(uint8_t* dst_argb, - const uint8_t* src_argb, - int dst_width, - int x, - int dx); -void ScaleARGBFilterCols_Any_MSA(uint8_t* dst_ptr, - const uint8_t* src_ptr, - int dst_width, - int x, - int dx); -void ScaleARGBCols_Any_MSA(uint8_t* dst_ptr, - const uint8_t* src_ptr, - int dst_width, - int x, - int dx); void ScaleARGBFilterCols_RVV(uint8_t* dst_argb, const uint8_t* src_argb, int dst_width, @@ -1010,18 +972,6 @@ void ScaleARGBRowDown2Box_RVV(const uint8_t* src_argb, ptrdiff_t src_stride, uint8_t* dst_argb, int dst_width); -void ScaleARGBRowDown2_MSA(const uint8_t* src_argb, - ptrdiff_t src_stride, - uint8_t* dst_argb, - int dst_width); -void ScaleARGBRowDown2Linear_MSA(const uint8_t* src_argb, - ptrdiff_t src_stride, - uint8_t* dst_argb, - int dst_width); -void ScaleARGBRowDown2Box_MSA(const uint8_t* src_argb, - ptrdiff_t src_stride, - uint8_t* dst_argb, - int dst_width); void ScaleARGBRowDown2_LSX(const uint8_t* src_argb, ptrdiff_t src_stride, uint8_t* dst_argb, @@ -1058,18 +1008,6 @@ void ScaleARGBRowDown2Box_Any_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); -void ScaleARGBRowDown2_Any_MSA(const uint8_t* src_ptr, - ptrdiff_t src_stride, - uint8_t* dst_ptr, - int dst_width); -void ScaleARGBRowDown2Linear_Any_MSA(const uint8_t* src_ptr, - ptrdiff_t src_stride, - uint8_t* dst_ptr, - int dst_width); -void ScaleARGBRowDown2Box_Any_MSA(const uint8_t* src_ptr, - ptrdiff_t src_stride, - uint8_t* dst_ptr, - int dst_width); void ScaleARGBRowDown2_Any_LSX(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, @@ -1102,16 +1040,6 @@ void ScaleARGBRowDownEvenBox_NEON(const uint8_t* src_argb, int src_stepx, uint8_t* dst_argb, int dst_width); -void ScaleARGBRowDownEven_MSA(const uint8_t* src_argb, - ptrdiff_t src_stride, - int32_t src_stepx, - uint8_t* dst_argb, - int dst_width); -void ScaleARGBRowDownEvenBox_MSA(const uint8_t* src_argb, - ptrdiff_t src_stride, - int src_stepx, - uint8_t* dst_argb, - int dst_width); void ScaleARGBRowDownEven_LSX(const uint8_t* src_argb, ptrdiff_t src_stride, int32_t src_stepx, @@ -1152,16 +1080,6 @@ void ScaleARGBRowDownEvenBox_Any_NEON(const uint8_t* src_ptr, int src_stepx, uint8_t* dst_ptr, int dst_width); -void ScaleARGBRowDownEven_Any_MSA(const uint8_t* src_ptr, - ptrdiff_t src_stride, - int32_t src_stepx, - uint8_t* dst_ptr, - int dst_width); -void ScaleARGBRowDownEvenBox_Any_MSA(const uint8_t* src_ptr, - ptrdiff_t src_stride, - int src_stepx, - uint8_t* dst_ptr, - int dst_width); void ScaleARGBRowDownEven_Any_LSX(const uint8_t* src_ptr, ptrdiff_t src_stride, int32_t src_stepx, @@ -1214,18 +1132,6 @@ void ScaleUVRowDown2Box_SME(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, int dst_width); -void ScaleUVRowDown2_MSA(const uint8_t* src_ptr, - ptrdiff_t src_stride, - uint8_t* dst_uv, - int dst_width); -void ScaleUVRowDown2Linear_MSA(const uint8_t* src_ptr, - ptrdiff_t src_stride, - uint8_t* dst_uv, - int dst_width); -void ScaleUVRowDown2Box_MSA(const uint8_t* src_ptr, - ptrdiff_t src_stride, - uint8_t* dst_uv, - int dst_width); void ScaleUVRowDown2_RVV(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_uv, @@ -1266,18 +1172,6 @@ void ScaleUVRowDown2Box_Any_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); -void ScaleUVRowDown2_Any_MSA(const uint8_t* src_ptr, - ptrdiff_t src_stride, - uint8_t* dst_ptr, - int dst_width); -void ScaleUVRowDown2Linear_Any_MSA(const uint8_t* src_ptr, - ptrdiff_t src_stride, - uint8_t* dst_ptr, - int dst_width); -void ScaleUVRowDown2Box_Any_MSA(const uint8_t* src_ptr, - ptrdiff_t src_stride, - uint8_t* dst_ptr, - int dst_width); void ScaleUVRowDownEven_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, int src_stepx, @@ -1308,16 +1202,6 @@ void ScaleUVRowDownEven_RVV(const uint8_t* src_ptr, int32_t src_stepx, uint8_t* dst_uv, int dst_width); -void ScaleUVRowDownEven_MSA(const uint8_t* src_ptr, - ptrdiff_t src_stride, - int32_t src_stepx, - uint8_t* dst_uv, - int dst_width); -void ScaleUVRowDownEvenBox_MSA(const uint8_t* src_ptr, - ptrdiff_t src_stride, - int src_stepx, - uint8_t* dst_uv, - int dst_width); void ScaleUVRowDownEven_Any_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, int src_stepx, @@ -1338,16 +1222,6 @@ void ScaleUVRowDownEvenBox_Any_NEON(const uint8_t* src_ptr, int src_stepx, uint8_t* dst_ptr, int dst_width); -void ScaleUVRowDownEven_Any_MSA(const uint8_t* src_ptr, - ptrdiff_t src_stride, - int32_t src_stepx, - uint8_t* dst_ptr, - int dst_width); -void ScaleUVRowDownEvenBox_Any_MSA(const uint8_t* src_ptr, - ptrdiff_t src_stride, - int src_stepx, - uint8_t* dst_ptr, - int dst_width); void ScaleUVRowUp2_Linear_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, @@ -1664,110 +1538,6 @@ void ScaleFilterCols_Any_NEON(uint8_t* dst_ptr, int x, int dx); -void ScaleRowDown2_MSA(const uint8_t* src_ptr, - ptrdiff_t src_stride, - uint8_t* dst, - int dst_width); -void ScaleRowDown2Linear_MSA(const uint8_t* src_ptr, - ptrdiff_t src_stride, - uint8_t* dst, - int dst_width); -void ScaleRowDown2Box_MSA(const uint8_t* src_ptr, - ptrdiff_t src_stride, - uint8_t* dst, - int dst_width); -void ScaleRowDown4_MSA(const uint8_t* src_ptr, - ptrdiff_t src_stride, - uint8_t* dst, - int dst_width); -void ScaleRowDown4Box_MSA(const uint8_t* src_ptr, - ptrdiff_t src_stride, - uint8_t* dst, - int dst_width); -void ScaleRowDown38_MSA(const uint8_t* src_ptr, - ptrdiff_t src_stride, - uint8_t* dst, - int dst_width); -void ScaleRowDown38_2_Box_MSA(const uint8_t* src_ptr, - ptrdiff_t src_stride, - uint8_t* dst_ptr, - int dst_width); -void ScaleRowDown38_3_Box_MSA(const uint8_t* src_ptr, - ptrdiff_t src_stride, - uint8_t* dst_ptr, - int dst_width); -void ScaleAddRow_MSA(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width); -void ScaleFilterCols_MSA(uint8_t* dst_ptr, - const uint8_t* src_ptr, - int dst_width, - int x, - int dx); -void ScaleRowDown34_MSA(const uint8_t* src_ptr, - ptrdiff_t src_stride, - uint8_t* dst, - int dst_width); -void ScaleRowDown34_0_Box_MSA(const uint8_t* src_ptr, - ptrdiff_t src_stride, - uint8_t* d, - int dst_width); -void ScaleRowDown34_1_Box_MSA(const uint8_t* src_ptr, - ptrdiff_t src_stride, - uint8_t* d, - int dst_width); - -void ScaleRowDown2_Any_MSA(const uint8_t* src_ptr, - ptrdiff_t src_stride, - uint8_t* dst_ptr, - int dst_width); -void ScaleRowDown2Linear_Any_MSA(const uint8_t* src_ptr, - ptrdiff_t src_stride, - uint8_t* dst_ptr, - int dst_width); -void ScaleRowDown2Box_Any_MSA(const uint8_t* src_ptr, - ptrdiff_t src_stride, - uint8_t* dst_ptr, - int dst_width); -void ScaleRowDown4_Any_MSA(const uint8_t* src_ptr, - ptrdiff_t src_stride, - uint8_t* dst_ptr, - int dst_width); -void ScaleRowDown4Box_Any_MSA(const uint8_t* src_ptr, - ptrdiff_t src_stride, - uint8_t* dst_ptr, - int dst_width); -void ScaleRowDown38_Any_MSA(const uint8_t* src_ptr, - ptrdiff_t src_stride, - uint8_t* dst_ptr, - int dst_width); -void ScaleRowDown38_2_Box_Any_MSA(const uint8_t* src_ptr, - ptrdiff_t src_stride, - uint8_t* dst_ptr, - int dst_width); -void ScaleRowDown38_3_Box_Any_MSA(const uint8_t* src_ptr, - ptrdiff_t src_stride, - uint8_t* dst_ptr, - int dst_width); -void ScaleAddRow_Any_MSA(const uint8_t* src_ptr, - uint16_t* dst_ptr, - int src_width); -void ScaleFilterCols_Any_MSA(uint8_t* dst_ptr, - const uint8_t* src_ptr, - int dst_width, - int x, - int dx); -void ScaleRowDown34_Any_MSA(const uint8_t* src_ptr, - ptrdiff_t src_stride, - uint8_t* dst_ptr, - int dst_width); -void ScaleRowDown34_0_Box_Any_MSA(const uint8_t* src_ptr, - ptrdiff_t src_stride, - uint8_t* dst_ptr, - int dst_width); -void ScaleRowDown34_1_Box_Any_MSA(const uint8_t* src_ptr, - ptrdiff_t src_stride, - uint8_t* dst_ptr, - int dst_width); - void ScaleRowDown2_LSX(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, diff --git a/libyuv.gyp b/libyuv.gyp index e853ba319..394840216 100644 --- a/libyuv.gyp +++ b/libyuv.gyp @@ -26,20 +26,13 @@ 'clang%': 0, # Link-Time Optimizations. 'use_lto%': 0, - 'mips_msa%': 0, # Default to msa off. 'build_neon': 0, - 'build_msa': 0, 'conditions': [ ['(target_arch == "armv7" or target_arch == "armv7s" or \ (target_arch == "arm" and arm_version >= 7) or target_arch == "arm64")\ and (arm_neon == 1 or arm_neon_optional == 1)', { 'build_neon': 1, }], - ['(target_arch == "mipsel" or target_arch == "mips64el")\ - and (mips_msa == 1)', - { - 'build_msa': 1, - }], ], }, @@ -87,11 +80,6 @@ }], ], }], - ['build_msa != 0', { - 'defines': [ - 'LIBYUV_MSA', - ], - }], ['OS != "ios" and libyuv_disable_jpeg != 1', { 'defines': [ 'HAVE_JPEG' diff --git a/libyuv.gypi b/libyuv.gypi index b46967979..44b127410 100644 --- a/libyuv.gypi +++ b/libyuv.gypi @@ -21,7 +21,6 @@ 'include/libyuv/cpu_id.h', 'include/libyuv/cpu_support.h', 'include/libyuv/loongson_intrinsics.h', - 'include/libyuv/macros_msa.h', 'include/libyuv/mjpeg_decoder.h', 'include/libyuv/planar_functions.h', 'include/libyuv/rotate.h', @@ -40,7 +39,6 @@ 'source/compare.cc', 'source/compare_common.cc', 'source/compare_gcc.cc', - 'source/compare_msa.cc', 'source/compare_neon.cc', 'source/compare_neon64.cc', 'source/compare_win.cc', @@ -61,14 +59,12 @@ 'source/rotate_common.cc', 'source/rotate_gcc.cc', 'source/rotate_lsx.cc', - 'source/rotate_msa.cc', 'source/rotate_neon.cc', 'source/rotate_neon64.cc', 'source/rotate_win.cc', 'source/row_any.cc', 'source/row_common.cc', 'source/row_gcc.cc', - 'source/row_msa.cc', 'source/row_lasx.cc', 'source/row_lsx.cc', 'source/row_neon.cc', @@ -80,7 +76,6 @@ 'source/scale_common.cc', 'source/scale_gcc.cc', 'source/scale_lsx.cc', - 'source/scale_msa.cc', 'source/scale_neon.cc', 'source/scale_neon64.cc', 'source/scale_rgb.cc', diff --git a/linux.mk b/linux.mk index d19a888ab..6999810da 100644 --- a/linux.mk +++ b/linux.mk @@ -13,7 +13,6 @@ LOCAL_OBJ_FILES := \ source/compare.o \ source/compare_common.o \ source/compare_gcc.o \ - source/compare_msa.o \ source/compare_neon.o \ source/compare_neon64.o \ source/compare_win.o \ @@ -34,7 +33,6 @@ LOCAL_OBJ_FILES := \ source/rotate_common.o \ source/rotate_gcc.o \ source/rotate_lsx.o \ - source/rotate_msa.o \ source/rotate_neon.o \ source/rotate_neon64.o \ source/rotate_win.o \ @@ -43,7 +41,6 @@ LOCAL_OBJ_FILES := \ source/row_gcc.o \ source/row_lasx.o \ source/row_lsx.o \ - source/row_msa.o \ source/row_neon.o \ source/row_neon64.o \ source/row_rvv.o \ @@ -54,7 +51,6 @@ LOCAL_OBJ_FILES := \ source/scale_common.o \ source/scale_gcc.o \ source/scale_lsx.o \ - source/scale_msa.o \ source/scale_neon.o \ source/scale_neon64.o \ source/scale_rgb.o \ diff --git a/source/compare.cc b/source/compare.cc index 3ce4cfea9..e85cc6d07 100644 --- a/source/compare.cc +++ b/source/compare.cc @@ -159,11 +159,6 @@ uint64_t ComputeHammingDistance(const uint8_t* src_a, HammingDistance = HammingDistance_AVX2; } #endif -#if defined(HAS_HAMMINGDISTANCE_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - HammingDistance = HammingDistance_MSA; - } -#endif #ifdef _OPENMP #pragma omp parallel for reduction(+ : diff) @@ -221,11 +216,6 @@ uint64_t ComputeSumSquareError(const uint8_t* src_a, SumSquareError = SumSquareError_AVX2; } #endif -#if defined(HAS_SUMSQUAREERROR_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - SumSquareError = SumSquareError_MSA; - } -#endif #ifdef _OPENMP #pragma omp parallel for reduction(+ : sse) #endif diff --git a/source/compare_msa.cc b/source/compare_msa.cc deleted file mode 100644 index 0b807d37b..000000000 --- a/source/compare_msa.cc +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright 2017 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/basic_types.h" - -#include "libyuv/compare_row.h" -#include "libyuv/row.h" - -// This module is for GCC MSA -#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) -#include "libyuv/macros_msa.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -uint32_t HammingDistance_MSA(const uint8_t* src_a, - const uint8_t* src_b, - int count) { - uint32_t diff = 0u; - int i; - v16u8 src0, src1, src2, src3; - v2i64 vec0 = {0}, vec1 = {0}; - - for (i = 0; i < count; i += 32) { - src0 = (v16u8)__msa_ld_b((v16i8*)src_a, 0); - src1 = (v16u8)__msa_ld_b((v16i8*)src_a, 16); - src2 = (v16u8)__msa_ld_b((v16i8*)src_b, 0); - src3 = (v16u8)__msa_ld_b((v16i8*)src_b, 16); - src0 ^= src2; - src1 ^= src3; - vec0 += __msa_pcnt_d((v2i64)src0); - vec1 += __msa_pcnt_d((v2i64)src1); - src_a += 32; - src_b += 32; - } - - vec0 += vec1; - diff = (uint32_t)__msa_copy_u_w((v4i32)vec0, 0); - diff += (uint32_t)__msa_copy_u_w((v4i32)vec0, 2); - return diff; -} - -uint32_t SumSquareError_MSA(const uint8_t* src_a, - const uint8_t* src_b, - int count) { - uint32_t sse = 0u; - int i; - v16u8 src0, src1, src2, src3; - v8i16 vec0, vec1, vec2, vec3; - v4i32 reg0 = {0}, reg1 = {0}, reg2 = {0}, reg3 = {0}; - v2i64 tmp0; - - for (i = 0; i < count; i += 32) { - src0 = (v16u8)__msa_ld_b((v16i8*)src_a, 0); - src1 = (v16u8)__msa_ld_b((v16i8*)src_a, 16); - src2 = (v16u8)__msa_ld_b((v16i8*)src_b, 0); - src3 = (v16u8)__msa_ld_b((v16i8*)src_b, 16); - vec0 = (v8i16)__msa_ilvr_b((v16i8)src2, (v16i8)src0); - vec1 = (v8i16)__msa_ilvl_b((v16i8)src2, (v16i8)src0); - vec2 = (v8i16)__msa_ilvr_b((v16i8)src3, (v16i8)src1); - vec3 = (v8i16)__msa_ilvl_b((v16i8)src3, (v16i8)src1); - vec0 = __msa_hsub_u_h((v16u8)vec0, (v16u8)vec0); - vec1 = __msa_hsub_u_h((v16u8)vec1, (v16u8)vec1); - vec2 = __msa_hsub_u_h((v16u8)vec2, (v16u8)vec2); - vec3 = __msa_hsub_u_h((v16u8)vec3, (v16u8)vec3); - reg0 = __msa_dpadd_s_w(reg0, vec0, vec0); - reg1 = __msa_dpadd_s_w(reg1, vec1, vec1); - reg2 = __msa_dpadd_s_w(reg2, vec2, vec2); - reg3 = __msa_dpadd_s_w(reg3, vec3, vec3); - src_a += 32; - src_b += 32; - } - - reg0 += reg1; - reg2 += reg3; - reg0 += reg2; - tmp0 = __msa_hadd_s_d(reg0, reg0); - sse = (uint32_t)__msa_copy_u_w((v4i32)tmp0, 0); - sse += (uint32_t)__msa_copy_u_w((v4i32)tmp0, 2); - return sse; -} - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif - -#endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) diff --git a/source/convert.cc b/source/convert.cc index ecc01f0b2..d175bbc44 100644 --- a/source/convert.cc +++ b/source/convert.cc @@ -748,14 +748,6 @@ int I010ToNV12(const uint16_t* src_y, MergeUVRow = MergeUVRow_SME; } #endif -#if defined(HAS_MERGEUVROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - MergeUVRow = MergeUVRow_Any_MSA; - if (IS_ALIGNED(halfwidth, 16)) { - MergeUVRow = MergeUVRow_MSA; - } - } -#endif #if defined(HAS_MERGEUVROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { MergeUVRow = MergeUVRow_Any_LSX; @@ -1196,14 +1188,6 @@ int I422ToNV21(const uint8_t* src_y, MergeUVRow = MergeUVRow_SME; } #endif -#if defined(HAS_MERGEUVROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - MergeUVRow = MergeUVRow_Any_MSA; - if (IS_ALIGNED(halfwidth, 16)) { - MergeUVRow = MergeUVRow_MSA; - } - } -#endif #if defined(HAS_MERGEUVROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { MergeUVRow = MergeUVRow_Any_LSX; @@ -1246,14 +1230,6 @@ int I422ToNV21(const uint8_t* src_y, InterpolateRow = InterpolateRow_SME; } #endif -#if defined(HAS_INTERPOLATEROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - InterpolateRow = InterpolateRow_Any_MSA; - if (IS_ALIGNED(width, 32)) { - InterpolateRow = InterpolateRow_MSA; - } - } -#endif #if defined(HAS_INTERPOLATEROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { InterpolateRow = InterpolateRow_Any_LSX; @@ -1724,16 +1700,6 @@ int YUY2ToI420(const uint8_t* src_yuy2, } } #endif -#if defined(HAS_YUY2TOYROW_MSA) && defined(HAS_YUY2TOUVROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - YUY2ToYRow = YUY2ToYRow_Any_MSA; - YUY2ToUVRow = YUY2ToUVRow_Any_MSA; - if (IS_ALIGNED(width, 32)) { - YUY2ToYRow = YUY2ToYRow_MSA; - YUY2ToUVRow = YUY2ToUVRow_MSA; - } - } -#endif #if defined(HAS_YUY2TOYROW_LSX) && defined(HAS_YUY2TOUVROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { YUY2ToYRow = YUY2ToYRow_Any_LSX; @@ -1825,16 +1791,6 @@ int UYVYToI420(const uint8_t* src_uyvy, } } #endif -#if defined(HAS_UYVYTOYROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - UYVYToYRow = UYVYToYRow_Any_MSA; - UYVYToUVRow = UYVYToUVRow_Any_MSA; - if (IS_ALIGNED(width, 32)) { - UYVYToYRow = UYVYToYRow_MSA; - UYVYToUVRow = UYVYToUVRow_MSA; - } - } -#endif #if defined(HAS_UYVYTOYROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { UYVYToYRow = UYVYToYRow_Any_LSX; @@ -2143,18 +2099,6 @@ int ARGBToI420(const uint8_t* src_argb, } } #endif -#if defined(HAS_ARGBTOYROW_MSA) && defined(HAS_ARGBTOUVROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGBToYRow = ARGBToYRow_Any_MSA; - ARGBToUVRow = ARGBToUVRow_Any_MSA; - if (IS_ALIGNED(width, 16)) { - ARGBToYRow = ARGBToYRow_MSA; - } - if (IS_ALIGNED(width, 32)) { - ARGBToUVRow = ARGBToUVRow_MSA; - } - } -#endif #if defined(HAS_ARGBTOYROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { ARGBToYRow = ARGBToYRow_Any_LSX; @@ -2337,18 +2281,6 @@ int ARGBToI420Alpha(const uint8_t* src_argb, } } #endif -#if defined(HAS_ARGBTOYROW_MSA) && defined(HAS_ARGBTOUVROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGBToYRow = ARGBToYRow_Any_MSA; - ARGBToUVRow = ARGBToUVRow_Any_MSA; - if (IS_ALIGNED(width, 16)) { - ARGBToYRow = ARGBToYRow_MSA; - } - if (IS_ALIGNED(width, 32)) { - ARGBToUVRow = ARGBToUVRow_MSA; - } - } -#endif #if defined(HAS_ARGBTOYROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { ARGBToYRow = ARGBToYRow_Any_LSX; @@ -2385,12 +2317,6 @@ int ARGBToI420Alpha(const uint8_t* src_argb, : ARGBExtractAlphaRow_Any_NEON; } #endif -#if defined(HAS_ARGBEXTRACTALPHAROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGBExtractAlphaRow = IS_ALIGNED(width, 16) ? ARGBExtractAlphaRow_MSA - : ARGBExtractAlphaRow_Any_MSA; - } -#endif #if defined(HAS_ARGBEXTRACTALPHAROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { ARGBExtractAlphaRow = IS_ALIGNED(width, 16) ? ARGBExtractAlphaRow_LSX @@ -2532,18 +2458,6 @@ int BGRAToI420(const uint8_t* src_bgra, } } #endif -#if defined(HAS_BGRATOYROW_MSA) && defined(HAS_BGRATOUVROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - BGRAToYRow = BGRAToYRow_Any_MSA; - BGRAToUVRow = BGRAToUVRow_Any_MSA; - if (IS_ALIGNED(width, 16)) { - BGRAToYRow = BGRAToYRow_MSA; - } - if (IS_ALIGNED(width, 32)) { - BGRAToUVRow = BGRAToUVRow_MSA; - } - } -#endif #if defined(HAS_BGRATOYROW_LSX) && defined(HAS_BGRATOUVROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { BGRAToYRow = BGRAToYRow_Any_LSX; @@ -2691,16 +2605,6 @@ int ABGRToI420(const uint8_t* src_abgr, } } #endif -#if defined(HAS_ABGRTOYROW_MSA) && defined(HAS_ABGRTOUVROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ABGRToYRow = ABGRToYRow_Any_MSA; - ABGRToUVRow = ABGRToUVRow_Any_MSA; - if (IS_ALIGNED(width, 16)) { - ABGRToYRow = ABGRToYRow_MSA; - ABGRToUVRow = ABGRToUVRow_MSA; - } - } -#endif #if defined(HAS_ABGRTOYROW_LSX) && defined(HAS_ABGRTOUVROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { ABGRToYRow = ABGRToYRow_Any_LSX; @@ -2832,16 +2736,6 @@ int RGBAToI420(const uint8_t* src_rgba, } } #endif -#if defined(HAS_RGBATOYROW_MSA) && defined(HAS_RGBATOUVROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - RGBAToYRow = RGBAToYRow_Any_MSA; - RGBAToUVRow = RGBAToUVRow_Any_MSA; - if (IS_ALIGNED(width, 16)) { - RGBAToYRow = RGBAToYRow_MSA; - RGBAToUVRow = RGBAToUVRow_MSA; - } - } -#endif #if defined(HAS_RGBATOYROW_LSX) && defined(HAS_RGBATOUVROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { RGBAToYRow = RGBAToYRow_Any_LSX; @@ -2883,8 +2777,8 @@ int RGBAToI420(const uint8_t* src_rgba, } // Enabled if 1 pass is available -#if (defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA) || \ - defined(HAS_RGB24TOYROW_LSX) || defined(HAS_RGB24TOYROW_RVV)) +#if (defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_LSX) || \ + defined(HAS_RGB24TOYROW_RVV)) #define HAS_RGB24TOYROW #endif @@ -2939,16 +2833,6 @@ int RGB24ToI420(const uint8_t* src_rgb24, } } #endif -#if defined(HAS_RGB24TOYROW_MSA) && defined(HAS_RGB24TOUVROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - RGB24ToUVRow = RGB24ToUVRow_Any_MSA; - RGB24ToYRow = RGB24ToYRow_Any_MSA; - if (IS_ALIGNED(width, 16)) { - RGB24ToYRow = RGB24ToYRow_MSA; - RGB24ToUVRow = RGB24ToUVRow_MSA; - } - } -#endif #if defined(HAS_RGB24TOYROW_LSX) && defined(HAS_RGB24TOUVROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { RGB24ToUVRow = RGB24ToUVRow_Any_LSX; @@ -3065,8 +2949,7 @@ int RGB24ToI420(const uint8_t* src_rgb24, #undef HAS_RGB24TOYROW // Enabled if 1 pass is available -#if defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \ - defined(HAS_RGB24TOYJROW_RVV) +#if defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_RVV) #define HAS_RGB24TOYJROW #endif @@ -3121,16 +3004,6 @@ int RGB24ToJ420(const uint8_t* src_rgb24, } } #endif -#if defined(HAS_RGB24TOYJROW_MSA) && defined(HAS_RGB24TOUVJROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - RGB24ToUVJRow = RGB24ToUVJRow_Any_MSA; - RGB24ToYJRow = RGB24ToYJRow_Any_MSA; - if (IS_ALIGNED(width, 16)) { - RGB24ToYJRow = RGB24ToYJRow_MSA; - RGB24ToUVJRow = RGB24ToUVJRow_MSA; - } - } -#endif #if defined(HAS_RGB24TOYJROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { RGB24ToYJRow = RGB24ToYJRow_Any_LSX; @@ -3243,8 +3116,8 @@ int RGB24ToJ420(const uint8_t* src_rgb24, #undef HAS_RGB24TOYJROW // Enabled if 1 pass is available -#if (defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA) || \ - defined(HAS_RAWTOYROW_LSX) || defined(HAS_RAWTOYROW_RVV)) +#if (defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_LSX) || \ + defined(HAS_RAWTOYROW_RVV)) #define HAS_RAWTOYROW #endif @@ -3298,16 +3171,6 @@ int RAWToI420(const uint8_t* src_raw, } } #endif -#if defined(HAS_RAWTOYROW_MSA) && defined(HAS_RAWTOUVROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - RAWToUVRow = RAWToUVRow_Any_MSA; - RAWToYRow = RAWToYRow_Any_MSA; - if (IS_ALIGNED(width, 16)) { - RAWToYRow = RAWToYRow_MSA; - RAWToUVRow = RAWToUVRow_MSA; - } - } -#endif #if defined(HAS_RAWTOYROW_LSX) && defined(HAS_RAWTOUVROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { RAWToUVRow = RAWToUVRow_Any_LSX; @@ -3432,8 +3295,7 @@ int RAWToI420(const uint8_t* src_raw, #undef HAS_RAWTOYROW // Enabled if 1 pass is available -#if defined(HAS_RAWTOYJROW_NEON) || defined(HAS_RAWTOYJROW_MSA) || \ - defined(HAS_RAWTOYJROW_RVV) +#if defined(HAS_RAWTOYJROW_NEON) || defined(HAS_RAWTOYJROW_RVV) #define HAS_RAWTOYJROW #endif @@ -3488,16 +3350,6 @@ int RAWToJ420(const uint8_t* src_raw, } } #endif -#if defined(HAS_RAWTOYJROW_MSA) && defined(HAS_RAWTOUVJROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - RAWToUVJRow = RAWToUVJRow_Any_MSA; - RAWToYJRow = RAWToYJRow_Any_MSA; - if (IS_ALIGNED(width, 16)) { - RAWToYJRow = RAWToYJRow_MSA; - RAWToUVJRow = RAWToUVJRow_MSA; - } - } -#endif #if defined(HAS_RAWTOYJROW_LSX) && defined(HAS_RAWTOUVJROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { RAWToUVJRow = RAWToUVJRow_Any_LSX; @@ -3683,14 +3535,6 @@ int RAWToI444(const uint8_t* src_raw, } } #endif -#if defined(HAS_ARGBTOUV444ROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGBToUV444Row = ARGBToUV444Row_Any_MSA; - if (IS_ALIGNED(width, 16)) { - ARGBToUV444Row = ARGBToUV444Row_MSA; - } - } -#endif #if defined(HAS_ARGBTOUV444ROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { ARGBToUV444Row = ARGBToUV444Row_Any_LSX; @@ -3739,14 +3583,6 @@ int RAWToI444(const uint8_t* src_raw, } } #endif -#if defined(HAS_ARGBTOYROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGBToYRow = ARGBToYRow_Any_MSA; - if (IS_ALIGNED(width, 16)) { - ARGBToYRow = ARGBToYRow_MSA; - } - } -#endif #if defined(HAS_ARGBTOYROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { ARGBToYRow = ARGBToYRow_Any_LSX; @@ -3798,14 +3634,6 @@ int RAWToI444(const uint8_t* src_raw, RAWToARGBRow = RAWToARGBRow_SVE2; } #endif -#if defined(HAS_RAWTOARGBROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - RAWToARGBRow = RAWToARGBRow_Any_MSA; - if (IS_ALIGNED(width, 16)) { - RAWToARGBRow = RAWToARGBRow_MSA; - } - } -#endif #if defined(HAS_RAWTOARGBROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { RAWToARGBRow = RAWToARGBRow_Any_LSX; @@ -3910,14 +3738,6 @@ int RAWToJ444(const uint8_t* src_raw, } } #endif -#if defined(HAS_ARGBTOUVJ444ROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGBToUVJ444Row = ARGBToUVJ444Row_Any_MSA; - if (IS_ALIGNED(width, 16)) { - ARGBToUVJ444Row = ARGBToUVJ444Row_MSA; - } - } -#endif #if defined(HAS_ARGBTOUVJ444ROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { ARGBToUVJ444Row = ARGBToUVJ444Row_Any_LSX; @@ -3966,14 +3786,6 @@ int RAWToJ444(const uint8_t* src_raw, } } #endif -#if defined(HAS_ARGBTOYJROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGBToYJRow = ARGBToYJRow_Any_MSA; - if (IS_ALIGNED(width, 16)) { - ARGBToYJRow = ARGBToYJRow_MSA; - } - } -#endif #if defined(HAS_ARGBTOYJROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { ARGBToYJRow = ARGBToYJRow_Any_LSX; @@ -4025,14 +3837,6 @@ int RAWToJ444(const uint8_t* src_raw, RAWToARGBRow = RAWToARGBRow_SVE2; } #endif -#if defined(HAS_RAWTOARGBROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - RAWToARGBRow = RAWToARGBRow_Any_MSA; - if (IS_ALIGNED(width, 16)) { - RAWToARGBRow = RAWToARGBRow_MSA; - } - } -#endif #if defined(HAS_RAWTOARGBROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { RAWToARGBRow = RAWToARGBRow_Any_LSX; @@ -4089,8 +3893,8 @@ int RGB565ToI420(const uint8_t* src_rgb565, int width, int height) { int y; -#if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || \ - defined(HAS_RGB565TOYROW_LSX) || defined(HAS_RGB565TOYROW_LASX)) +#if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_LSX) || \ + defined(HAS_RGB565TOYROW_LASX)) void (*RGB565ToUVRow)(const uint8_t* src_rgb565, int src_stride_rgb565, uint8_t* dst_u, uint8_t* dst_v, int width) = RGB565ToUVRow_C; @@ -4133,17 +3937,7 @@ int RGB565ToI420(const uint8_t* src_rgb565, } } #endif -// MSA version does direct RGB565 to YUV. -#if defined(HAS_RGB565TOYROW_MSA) && defined(HAS_RGB565TOUVROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - RGB565ToUVRow = RGB565ToUVRow_Any_MSA; - RGB565ToYRow = RGB565ToYRow_Any_MSA; - if (IS_ALIGNED(width, 16)) { - RGB565ToYRow = RGB565ToYRow_MSA; - RGB565ToUVRow = RGB565ToUVRow_MSA; - } - } -#endif +// LSX version does direct RGB565 to YUV. #if defined(HAS_RGB565TOYROW_LSX) && defined(HAS_RGB565TOUVROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { RGB565ToUVRow = RGB565ToUVRow_Any_LSX; @@ -4214,8 +4008,8 @@ int RGB565ToI420(const uint8_t* src_rgb565, } #endif { -#if !(defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || \ - defined(HAS_RGB565TOYROW_LSX) || defined(HAS_RGB565TOYROW_LASX)) +#if !(defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_LSX) || \ + defined(HAS_RGB565TOYROW_LASX)) // Allocate 2 rows of ARGB. const int row_size = (width * 4 + 31) & ~31; align_buffer_64(row, row_size * 2); @@ -4223,8 +4017,8 @@ int RGB565ToI420(const uint8_t* src_rgb565, return 1; #endif for (y = 0; y < height - 1; y += 2) { -#if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || \ - defined(HAS_RGB565TOYROW_LSX) || defined(HAS_RGB565TOYROW_LASX)) +#if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_LSX) || \ + defined(HAS_RGB565TOYROW_LASX)) RGB565ToUVRow(src_rgb565, src_stride_rgb565, dst_u, dst_v, width); RGB565ToYRow(src_rgb565, dst_y, width); RGB565ToYRow(src_rgb565 + src_stride_rgb565, dst_y + dst_stride_y, width); @@ -4241,8 +4035,8 @@ int RGB565ToI420(const uint8_t* src_rgb565, dst_v += dst_stride_v; } if (height & 1) { -#if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || \ - defined(HAS_RGB565TOYROW_LSX) || defined(HAS_RGB565TOYROW_LASX)) +#if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_LSX) || \ + defined(HAS_RGB565TOYROW_LASX)) RGB565ToUVRow(src_rgb565, 0, dst_u, dst_v, width); RGB565ToYRow(src_rgb565, dst_y, width); #else @@ -4251,8 +4045,8 @@ int RGB565ToI420(const uint8_t* src_rgb565, ARGBToYRow(row, dst_y, width); #endif } -#if !(defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || \ - defined(HAS_RGB565TOYROW_LSX) || defined(HAS_RGB565TOYROW_LASX)) +#if !(defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_LSX) || \ + defined(HAS_RGB565TOYROW_LASX)) free_aligned_buffer_64(row); #endif } @@ -4272,8 +4066,8 @@ int ARGB1555ToI420(const uint8_t* src_argb1555, int width, int height) { int y; -#if (defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA) || \ - defined(HAS_ARGB1555TOYROW_LSX) || defined(HAS_ARGB1555TOYROW_LASX)) +#if (defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_LSX) || \ + defined(HAS_ARGB1555TOYROW_LASX)) void (*ARGB1555ToUVRow)(const uint8_t* src_argb1555, int src_stride_argb1555, uint8_t* dst_u, uint8_t* dst_v, int width) = ARGB1555ToUVRow_C; @@ -4317,17 +4111,7 @@ int ARGB1555ToI420(const uint8_t* src_argb1555, } #endif -// MSA version does direct ARGB1555 to YUV. -#if defined(HAS_ARGB1555TOYROW_MSA) && defined(HAS_ARGB1555TOUVROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGB1555ToUVRow = ARGB1555ToUVRow_Any_MSA; - ARGB1555ToYRow = ARGB1555ToYRow_Any_MSA; - if (IS_ALIGNED(width, 16)) { - ARGB1555ToYRow = ARGB1555ToYRow_MSA; - ARGB1555ToUVRow = ARGB1555ToUVRow_MSA; - } - } -#endif +// LSX version does direct ARGB1555 to YUV. #if defined(HAS_ARGB1555TOYROW_LSX) && defined(HAS_ARGB1555TOUVROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { ARGB1555ToUVRow = ARGB1555ToUVRow_Any_LSX; @@ -4399,8 +4183,8 @@ int ARGB1555ToI420(const uint8_t* src_argb1555, } #endif { -#if !(defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA) || \ - defined(HAS_ARGB1555TOYROW_LSX) || defined(HAS_ARGB1555TOYROW_LASX)) +#if !(defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_LSX) || \ + defined(HAS_ARGB1555TOYROW_LASX)) // Allocate 2 rows of ARGB. const int row_size = (width * 4 + 31) & ~31; align_buffer_64(row, row_size * 2); @@ -4409,8 +4193,8 @@ int ARGB1555ToI420(const uint8_t* src_argb1555, #endif for (y = 0; y < height - 1; y += 2) { -#if (defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA) || \ - defined(HAS_ARGB1555TOYROW_LSX) || defined(HAS_ARGB1555TOYROW_LASX)) +#if (defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_LSX) || \ + defined(HAS_ARGB1555TOYROW_LASX)) ARGB1555ToUVRow(src_argb1555, src_stride_argb1555, dst_u, dst_v, width); ARGB1555ToYRow(src_argb1555, dst_y, width); ARGB1555ToYRow(src_argb1555 + src_stride_argb1555, dst_y + dst_stride_y, @@ -4429,8 +4213,8 @@ int ARGB1555ToI420(const uint8_t* src_argb1555, dst_v += dst_stride_v; } if (height & 1) { -#if (defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA) || \ - defined(HAS_ARGB1555TOYROW_LSX) || defined(HAS_ARGB1555TOYROW_LASX)) +#if (defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_LSX) || \ + defined(HAS_ARGB1555TOYROW_LASX)) ARGB1555ToUVRow(src_argb1555, 0, dst_u, dst_v, width); ARGB1555ToYRow(src_argb1555, dst_y, width); #else @@ -4439,8 +4223,8 @@ int ARGB1555ToI420(const uint8_t* src_argb1555, ARGBToYRow(row, dst_y, width); #endif } -#if !(defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA) || \ - defined(HAS_ARGB1555TOYROW_LSX) || defined(HAS_ARGB1555TOYROW_LASX)) +#if !(defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_LSX) || \ + defined(HAS_ARGB1555TOYROW_LASX)) free_aligned_buffer_64(row); #endif } @@ -4519,14 +4303,6 @@ int ARGB4444ToI420(const uint8_t* src_argb4444, } } #endif -#if defined(HAS_ARGB4444TOARGBROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_MSA; - if (IS_ALIGNED(width, 16)) { - ARGB4444ToARGBRow = ARGB4444ToARGBRow_MSA; - } - } -#endif #if defined(HAS_ARGB4444TOARGBROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_LSX; @@ -4575,18 +4351,6 @@ int ARGB4444ToI420(const uint8_t* src_argb4444, } } #endif -#if defined(HAS_ARGBTOYROW_MSA) && defined(HAS_ARGBTOUVROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGBToUVRow = ARGBToUVRow_Any_MSA; - ARGBToYRow = ARGBToYRow_Any_MSA; - if (IS_ALIGNED(width, 16)) { - ARGBToYRow = ARGBToYRow_MSA; - if (IS_ALIGNED(width, 32)) { - ARGBToUVRow = ARGBToUVRow_MSA; - } - } - } -#endif #if defined(HAS_ARGBTOYROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { ARGBToYRow = ARGBToYRow_Any_LSX; @@ -4710,14 +4474,6 @@ int RGB24ToJ400(const uint8_t* src_rgb24, } } #endif -#if defined(HAS_RGB24TOYJROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - RGB24ToYJRow = RGB24ToYJRow_Any_MSA; - if (IS_ALIGNED(width, 16)) { - RGB24ToYJRow = RGB24ToYJRow_MSA; - } - } -#endif #if defined(HAS_RGB24TOYJROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { RGB24ToYJRow = RGB24ToYJRow_Any_LSX; @@ -4799,14 +4555,6 @@ int RAWToJ400(const uint8_t* src_raw, } } #endif -#if defined(HAS_RAWTOYJROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - RAWToYJRow = RAWToYJRow_Any_MSA; - if (IS_ALIGNED(width, 16)) { - RAWToYJRow = RAWToYJRow_MSA; - } - } -#endif #if defined(HAS_RAWTOYJROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { RAWToYJRow = RAWToYJRow_Any_LSX; diff --git a/source/convert_argb.cc b/source/convert_argb.cc index e1f78c07d..751866d3f 100644 --- a/source/convert_argb.cc +++ b/source/convert_argb.cc @@ -119,14 +119,6 @@ int I420ToARGBMatrix(const uint8_t* src_y, I422ToARGBRow = I422ToARGBRow_SME; } #endif -#if defined(HAS_I422TOARGBROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - I422ToARGBRow = I422ToARGBRow_Any_MSA; - if (IS_ALIGNED(width, 8)) { - I422ToARGBRow = I422ToARGBRow_MSA; - } - } -#endif #if defined(HAS_I422TOARGBROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { I422ToARGBRow = I422ToARGBRow_Any_LSX; @@ -383,14 +375,6 @@ int I422ToARGBMatrix(const uint8_t* src_y, I422ToARGBRow = I422ToARGBRow_SME; } #endif -#if defined(HAS_I422TOARGBROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - I422ToARGBRow = I422ToARGBRow_Any_MSA; - if (IS_ALIGNED(width, 8)) { - I422ToARGBRow = I422ToARGBRow_MSA; - } - } -#endif #if defined(HAS_I422TOARGBROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { I422ToARGBRow = I422ToARGBRow_Any_LSX; @@ -636,14 +620,6 @@ int I444ToARGBMatrix(const uint8_t* src_y, I444ToARGBRow = I444ToARGBRow_SME; } #endif -#if defined(HAS_I444TOARGBROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - I444ToARGBRow = I444ToARGBRow_Any_MSA; - if (IS_ALIGNED(width, 8)) { - I444ToARGBRow = I444ToARGBRow_MSA; - } - } -#endif #if defined(HAS_I444TOARGBROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { I444ToARGBRow = I444ToARGBRow_Any_LSX; @@ -881,14 +857,6 @@ int I444ToRGB24Matrix(const uint8_t* src_y, I444ToRGB24Row = I444ToRGB24Row_SME; } #endif -#if defined(HAS_I444TORGB24ROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - I444ToRGB24Row = I444ToRGB24Row_Any_MSA; - if (IS_ALIGNED(width, 8)) { - I444ToRGB24Row = I444ToRGB24Row_MSA; - } - } -#endif #if defined(HAS_I444TORGB24ROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { I444ToRGB24Row = I444ToRGB24Row_Any_LSX; @@ -2298,14 +2266,6 @@ int I420AlphaToARGBMatrix(const uint8_t* src_y, I422AlphaToARGBRow = I422AlphaToARGBRow_SME; } #endif -#if defined(HAS_I422ALPHATOARGBROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - I422AlphaToARGBRow = I422AlphaToARGBRow_Any_MSA; - if (IS_ALIGNED(width, 8)) { - I422AlphaToARGBRow = I422AlphaToARGBRow_MSA; - } - } -#endif #if defined(HAS_I422ALPHATOARGBROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { I422AlphaToARGBRow = I422AlphaToARGBRow_Any_LSX; @@ -2351,14 +2311,6 @@ int I420AlphaToARGBMatrix(const uint8_t* src_y, } } #endif -#if defined(HAS_ARGBATTENUATEROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGBAttenuateRow = ARGBAttenuateRow_Any_MSA; - if (IS_ALIGNED(width, 8)) { - ARGBAttenuateRow = ARGBAttenuateRow_MSA; - } - } -#endif #if defined(HAS_ARGBATTENUATEROW_RVV) if (TestCpuFlag(kCpuHasRVV)) { ARGBAttenuateRow = ARGBAttenuateRow_RVV; @@ -2467,14 +2419,6 @@ int I422AlphaToARGBMatrix(const uint8_t* src_y, I422AlphaToARGBRow = I422AlphaToARGBRow_SME; } #endif -#if defined(HAS_I422ALPHATOARGBROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - I422AlphaToARGBRow = I422AlphaToARGBRow_Any_MSA; - if (IS_ALIGNED(width, 8)) { - I422AlphaToARGBRow = I422AlphaToARGBRow_MSA; - } - } -#endif #if defined(HAS_I422ALPHATOARGBROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { I422AlphaToARGBRow = I422AlphaToARGBRow_Any_LSX; @@ -2520,14 +2464,6 @@ int I422AlphaToARGBMatrix(const uint8_t* src_y, } } #endif -#if defined(HAS_ARGBATTENUATEROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGBAttenuateRow = ARGBAttenuateRow_Any_MSA; - if (IS_ALIGNED(width, 8)) { - ARGBAttenuateRow = ARGBAttenuateRow_MSA; - } - } -#endif #if defined(HAS_ARGBATTENUATEROW_RVV) if (TestCpuFlag(kCpuHasRVV)) { ARGBAttenuateRow = ARGBAttenuateRow_RVV; @@ -2634,14 +2570,6 @@ int I444AlphaToARGBMatrix(const uint8_t* src_y, I444AlphaToARGBRow = I444AlphaToARGBRow_SME; } #endif -#if defined(HAS_I444ALPHATOARGBROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - I444AlphaToARGBRow = I444AlphaToARGBRow_Any_MSA; - if (IS_ALIGNED(width, 8)) { - I444AlphaToARGBRow = I444AlphaToARGBRow_MSA; - } - } -#endif #if defined(HAS_I444ALPHATOARGBROW_RVV) if (TestCpuFlag(kCpuHasRVV)) { I444AlphaToARGBRow = I444AlphaToARGBRow_RVV; @@ -2671,14 +2599,6 @@ int I444AlphaToARGBMatrix(const uint8_t* src_y, } } #endif -#if defined(HAS_ARGBATTENUATEROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGBAttenuateRow = ARGBAttenuateRow_Any_MSA; - if (IS_ALIGNED(width, 8)) { - ARGBAttenuateRow = ARGBAttenuateRow_MSA; - } - } -#endif #if defined(HAS_ARGBATTENUATEROW_RVV) if (TestCpuFlag(kCpuHasRVV)) { ARGBAttenuateRow = ARGBAttenuateRow_RVV; @@ -2938,14 +2858,6 @@ int I010AlphaToARGBMatrix(const uint16_t* src_y, } } #endif -#if defined(HAS_ARGBATTENUATEROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGBAttenuateRow = ARGBAttenuateRow_Any_MSA; - if (IS_ALIGNED(width, 8)) { - ARGBAttenuateRow = ARGBAttenuateRow_MSA; - } - } -#endif #if defined(HAS_ARGBATTENUATEROW_RVV) if (TestCpuFlag(kCpuHasRVV)) { ARGBAttenuateRow = ARGBAttenuateRow_RVV; @@ -3078,14 +2990,6 @@ int I210AlphaToARGBMatrix(const uint16_t* src_y, } } #endif -#if defined(HAS_ARGBATTENUATEROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGBAttenuateRow = ARGBAttenuateRow_Any_MSA; - if (IS_ALIGNED(width, 8)) { - ARGBAttenuateRow = ARGBAttenuateRow_MSA; - } - } -#endif #if defined(HAS_ARGBATTENUATEROW_RVV) if (TestCpuFlag(kCpuHasRVV)) { ARGBAttenuateRow = ARGBAttenuateRow_RVV; @@ -3216,14 +3120,6 @@ int I410AlphaToARGBMatrix(const uint16_t* src_y, } } #endif -#if defined(HAS_ARGBATTENUATEROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGBAttenuateRow = ARGBAttenuateRow_Any_MSA; - if (IS_ALIGNED(width, 8)) { - ARGBAttenuateRow = ARGBAttenuateRow_MSA; - } - } -#endif #if defined(HAS_ARGBATTENUATEROW_RVV) if (TestCpuFlag(kCpuHasRVV)) { ARGBAttenuateRow = ARGBAttenuateRow_RVV; @@ -3324,14 +3220,6 @@ int I400ToARGBMatrix(const uint8_t* src_y, I400ToARGBRow = I400ToARGBRow_SME; } #endif -#if defined(HAS_I400TOARGBROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - I400ToARGBRow = I400ToARGBRow_Any_MSA; - if (IS_ALIGNED(width, 16)) { - I400ToARGBRow = I400ToARGBRow_MSA; - } - } -#endif #if defined(HAS_I400TOARGBROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { I400ToARGBRow = I400ToARGBRow_Any_LSX; @@ -3416,14 +3304,6 @@ int J400ToARGB(const uint8_t* src_y, } } #endif -#if defined(HAS_J400TOARGBROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - J400ToARGBRow = J400ToARGBRow_Any_MSA; - if (IS_ALIGNED(width, 16)) { - J400ToARGBRow = J400ToARGBRow_MSA; - } - } -#endif #if defined(HAS_J400TOARGBROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { J400ToARGBRow = J400ToARGBRow_Any_LSX; @@ -3771,14 +3651,6 @@ int RGB24ToARGB(const uint8_t* src_rgb24, RGB24ToARGBRow = RGB24ToARGBRow_SVE2; } #endif -#if defined(HAS_RGB24TOARGBROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - RGB24ToARGBRow = RGB24ToARGBRow_Any_MSA; - if (IS_ALIGNED(width, 16)) { - RGB24ToARGBRow = RGB24ToARGBRow_MSA; - } - } -#endif #if defined(HAS_RGB24TOARGBROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { RGB24ToARGBRow = RGB24ToARGBRow_Any_LSX; @@ -3864,14 +3736,6 @@ int RAWToARGB(const uint8_t* src_raw, RAWToARGBRow = RAWToARGBRow_SVE2; } #endif -#if defined(HAS_RAWTOARGBROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - RAWToARGBRow = RAWToARGBRow_Any_MSA; - if (IS_ALIGNED(width, 16)) { - RAWToARGBRow = RAWToARGBRow_MSA; - } - } -#endif #if defined(HAS_RAWTOARGBROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { RAWToARGBRow = RAWToARGBRow_Any_LSX; @@ -4013,14 +3877,6 @@ int RGB565ToARGB(const uint8_t* src_rgb565, } } #endif -#if defined(HAS_RGB565TOARGBROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - RGB565ToARGBRow = RGB565ToARGBRow_Any_MSA; - if (IS_ALIGNED(width, 16)) { - RGB565ToARGBRow = RGB565ToARGBRow_MSA; - } - } -#endif #if defined(HAS_RGB565TOARGBROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { RGB565ToARGBRow = RGB565ToARGBRow_Any_LSX; @@ -4101,14 +3957,6 @@ int ARGB1555ToARGB(const uint8_t* src_argb1555, ARGB1555ToARGBRow = ARGB1555ToARGBRow_SVE2; } #endif -#if defined(HAS_ARGB1555TOARGBROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_MSA; - if (IS_ALIGNED(width, 16)) { - ARGB1555ToARGBRow = ARGB1555ToARGBRow_MSA; - } - } -#endif #if defined(HAS_ARGB1555TOARGBROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_LSX; @@ -4184,14 +4032,6 @@ int ARGB4444ToARGB(const uint8_t* src_argb4444, } } #endif -#if defined(HAS_ARGB4444TOARGBROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_MSA; - if (IS_ALIGNED(width, 16)) { - ARGB4444ToARGBRow = ARGB4444ToARGBRow_MSA; - } - } -#endif #if defined(HAS_ARGB4444TOARGBROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_LSX; @@ -4500,14 +4340,6 @@ int NV12ToARGBMatrix(const uint8_t* src_y, NV12ToARGBRow = NV12ToARGBRow_SME; } #endif -#if defined(HAS_NV12TOARGBROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - NV12ToARGBRow = NV12ToARGBRow_Any_MSA; - if (IS_ALIGNED(width, 8)) { - NV12ToARGBRow = NV12ToARGBRow_MSA; - } - } -#endif #if defined(HAS_NV12TOARGBROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { NV12ToARGBRow = NV12ToARGBRow_Any_LSX; @@ -4600,14 +4432,6 @@ int NV21ToARGBMatrix(const uint8_t* src_y, NV21ToARGBRow = NV21ToARGBRow_SME; } #endif -#if defined(HAS_NV21TOARGBROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - NV21ToARGBRow = NV21ToARGBRow_Any_MSA; - if (IS_ALIGNED(width, 8)) { - NV21ToARGBRow = NV21ToARGBRow_MSA; - } - } -#endif #if defined(HAS_NV21TOARGBROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { NV21ToARGBRow = NV21ToARGBRow_Any_LSX; @@ -5028,14 +4852,6 @@ int YUY2ToARGBMatrix(const uint8_t* src_yuy2, YUY2ToARGBRow = YUY2ToARGBRow_SME; } #endif -#if defined(HAS_YUY2TOARGBROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - YUY2ToARGBRow = YUY2ToARGBRow_Any_MSA; - if (IS_ALIGNED(width, 8)) { - YUY2ToARGBRow = YUY2ToARGBRow_MSA; - } - } -#endif #if defined(HAS_YUY2TOARGBROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { YUY2ToARGBRow = YUY2ToARGBRow_Any_LSX; @@ -5126,14 +4942,6 @@ int UYVYToARGBMatrix(const uint8_t* src_uyvy, UYVYToARGBRow = UYVYToARGBRow_SME; } #endif -#if defined(HAS_UYVYTOARGBROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - UYVYToARGBRow = UYVYToARGBRow_Any_MSA; - if (IS_ALIGNED(width, 8)) { - UYVYToARGBRow = UYVYToARGBRow_MSA; - } - } -#endif #if defined(HAS_UYVYTOARGBROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { UYVYToARGBRow = UYVYToARGBRow_Any_LSX; @@ -5343,14 +5151,6 @@ int I422ToRGBAMatrix(const uint8_t* src_y, I422ToRGBARow = I422ToRGBARow_SME; } #endif -#if defined(HAS_I422TORGBAROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - I422ToRGBARow = I422ToRGBARow_Any_MSA; - if (IS_ALIGNED(width, 8)) { - I422ToRGBARow = I422ToRGBARow_MSA; - } - } -#endif #if defined(HAS_I422TORGBAROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { I422ToRGBARow = I422ToRGBARow_Any_LSX; @@ -5468,14 +5268,6 @@ int NV12ToRGB565Matrix(const uint8_t* src_y, } } #endif -#if defined(HAS_NV12TORGB565ROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - NV12ToRGB565Row = NV12ToRGB565Row_Any_MSA; - if (IS_ALIGNED(width, 8)) { - NV12ToRGB565Row = NV12ToRGB565Row_MSA; - } - } -#endif #if defined(HAS_NV12TORGB565ROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { NV12ToRGB565Row = NV12ToRGB565Row_Any_LSX; @@ -5581,14 +5373,6 @@ int I420ToRGBAMatrix(const uint8_t* src_y, I422ToRGBARow = I422ToRGBARow_SME; } #endif -#if defined(HAS_I422TORGBAROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - I422ToRGBARow = I422ToRGBARow_Any_MSA; - if (IS_ALIGNED(width, 8)) { - I422ToRGBARow = I422ToRGBARow_MSA; - } - } -#endif #if defined(HAS_I422TORGBAROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { I422ToRGBARow = I422ToRGBARow_Any_LSX; @@ -5721,14 +5505,6 @@ int I420ToRGB24Matrix(const uint8_t* src_y, I422ToRGB24Row = I422ToRGB24Row_SME; } #endif -#if defined(HAS_I422TORGB24ROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - I422ToRGB24Row = I422ToRGB24Row_Any_MSA; - if (IS_ALIGNED(width, 16)) { - I422ToRGB24Row = I422ToRGB24Row_MSA; - } - } -#endif #if defined(HAS_I422TORGB24ROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { I422ToRGB24Row = I422ToRGB24Row_Any_LSX; @@ -5933,14 +5709,6 @@ int I422ToRGB24Matrix(const uint8_t* src_y, I422ToRGB24Row = I422ToRGB24Row_SME; } #endif -#if defined(HAS_I422TORGB24ROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - I422ToRGB24Row = I422ToRGB24Row_Any_MSA; - if (IS_ALIGNED(width, 16)) { - I422ToRGB24Row = I422ToRGB24Row_MSA; - } - } -#endif #if defined(HAS_I422TORGB24ROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { I422ToRGB24Row = I422ToRGB24Row_Any_LSX; @@ -6070,14 +5838,6 @@ int I420ToARGB1555(const uint8_t* src_y, I422ToARGB1555Row = I422ToARGB1555Row_SME; } #endif -#if defined(HAS_I422TOARGB1555ROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - I422ToARGB1555Row = I422ToARGB1555Row_Any_MSA; - if (IS_ALIGNED(width, 8)) { - I422ToARGB1555Row = I422ToARGB1555Row_MSA; - } - } -#endif #if defined(HAS_I422TOARGB1555ROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { I422ToARGB1555Row = I422ToARGB1555Row_Any_LSX; @@ -6169,14 +5929,6 @@ int I420ToARGB4444(const uint8_t* src_y, I422ToARGB4444Row = I422ToARGB4444Row_SME; } #endif -#if defined(HAS_I422TOARGB4444ROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - I422ToARGB4444Row = I422ToARGB4444Row_Any_MSA; - if (IS_ALIGNED(width, 8)) { - I422ToARGB4444Row = I422ToARGB4444Row_MSA; - } - } -#endif #if defined(HAS_I422TOARGB4444ROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { I422ToARGB4444Row = I422ToARGB4444Row_Any_LSX; @@ -6269,14 +6021,6 @@ int I420ToRGB565Matrix(const uint8_t* src_y, I422ToRGB565Row = I422ToRGB565Row_SME; } #endif -#if defined(HAS_I422TORGB565ROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - I422ToRGB565Row = I422ToRGB565Row_Any_MSA; - if (IS_ALIGNED(width, 8)) { - I422ToRGB565Row = I422ToRGB565Row_MSA; - } - } -#endif #if defined(HAS_I422TORGB565ROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { I422ToRGB565Row = I422ToRGB565Row_Any_LSX; @@ -6419,14 +6163,6 @@ int I422ToRGB565Matrix(const uint8_t* src_y, I422ToRGB565Row = I422ToRGB565Row_SME; } #endif -#if defined(HAS_I422TORGB565ROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - I422ToRGB565Row = I422ToRGB565Row_Any_MSA; - if (IS_ALIGNED(width, 8)) { - I422ToRGB565Row = I422ToRGB565Row_MSA; - } - } -#endif #if defined(HAS_I422TORGB565ROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { I422ToRGB565Row = I422ToRGB565Row_Any_LSX; @@ -6552,14 +6288,6 @@ int I420ToRGB565Dither(const uint8_t* src_y, I422ToARGBRow = I422ToARGBRow_SME; } #endif -#if defined(HAS_I422TOARGBROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - I422ToARGBRow = I422ToARGBRow_Any_MSA; - if (IS_ALIGNED(width, 8)) { - I422ToARGBRow = I422ToARGBRow_MSA; - } - } -#endif #if defined(HAS_I422TOARGBROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { I422ToARGBRow = I422ToARGBRow_Any_LSX; @@ -6610,14 +6338,6 @@ int I420ToRGB565Dither(const uint8_t* src_y, ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_SVE2; } #endif -#if defined(HAS_ARGBTORGB565DITHERROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_MSA; - if (IS_ALIGNED(width, 8)) { - ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_MSA; - } - } -#endif #if defined(HAS_ARGBTORGB565DITHERROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_LSX; @@ -6866,14 +6586,6 @@ static int I420ToARGBMatrixBilinear(const uint8_t* src_y, I444ToARGBRow = I444ToARGBRow_SME; } #endif -#if defined(HAS_I444TOARGBROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - I444ToARGBRow = I444ToARGBRow_Any_MSA; - if (IS_ALIGNED(width, 8)) { - I444ToARGBRow = I444ToARGBRow_MSA; - } - } -#endif #if defined(HAS_I444TOARGBROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { I444ToARGBRow = I444ToARGBRow_Any_LASX; @@ -7023,14 +6735,6 @@ static int I422ToARGBMatrixLinear(const uint8_t* src_y, I444ToARGBRow = I444ToARGBRow_SME; } #endif -#if defined(HAS_I444TOARGBROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - I444ToARGBRow = I444ToARGBRow_Any_MSA; - if (IS_ALIGNED(width, 8)) { - I444ToARGBRow = I444ToARGBRow_MSA; - } - } -#endif #if defined(HAS_I444TOARGBROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { I444ToARGBRow = I444ToARGBRow_Any_LASX; @@ -7157,14 +6861,6 @@ static int I420ToRGB24MatrixBilinear(const uint8_t* src_y, I444ToRGB24Row = I444ToRGB24Row_SME; } #endif -#if defined(HAS_I444TORGB24ROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - I444ToRGB24Row = I444ToRGB24Row_Any_MSA; - if (IS_ALIGNED(width, 8)) { - I444ToRGB24Row = I444ToRGB24Row_MSA; - } - } -#endif #if defined(HAS_I444TORGB24ROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { I444ToRGB24Row = I444ToRGB24Row_Any_LASX; @@ -7779,14 +7475,6 @@ static int I420AlphaToARGBMatrixBilinear( I444AlphaToARGBRow = I444AlphaToARGBRow_SME; } #endif -#if defined(HAS_I444ALPHATOARGBROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - I444AlphaToARGBRow = I444AlphaToARGBRow_Any_MSA; - if (IS_ALIGNED(width, 8)) { - I444AlphaToARGBRow = I444AlphaToARGBRow_MSA; - } - } -#endif #if defined(HAS_I444ALPHATOARGBROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { I444AlphaToARGBRow = I444AlphaToARGBRow_Any_LASX; @@ -7824,14 +7512,6 @@ static int I420AlphaToARGBMatrixBilinear( } } #endif -#if defined(HAS_ARGBATTENUATEROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGBAttenuateRow = ARGBAttenuateRow_Any_MSA; - if (IS_ALIGNED(width, 8)) { - ARGBAttenuateRow = ARGBAttenuateRow_MSA; - } - } -#endif #if defined(HAS_ARGBATTENUATEROW_RVV) if (TestCpuFlag(kCpuHasRVV)) { ARGBAttenuateRow = ARGBAttenuateRow_RVV; @@ -8015,14 +7695,6 @@ static int I422AlphaToARGBMatrixLinear(const uint8_t* src_y, I444AlphaToARGBRow = I444AlphaToARGBRow_SME; } #endif -#if defined(HAS_I444ALPHATOARGBROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - I444AlphaToARGBRow = I444AlphaToARGBRow_Any_MSA; - if (IS_ALIGNED(width, 8)) { - I444AlphaToARGBRow = I444AlphaToARGBRow_MSA; - } - } -#endif #if defined(HAS_I444ALPHATOARGBROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { I444AlphaToARGBRow = I444AlphaToARGBRow_Any_LASX; @@ -8060,14 +7732,6 @@ static int I422AlphaToARGBMatrixLinear(const uint8_t* src_y, } } #endif -#if defined(HAS_ARGBATTENUATEROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGBAttenuateRow = ARGBAttenuateRow_Any_MSA; - if (IS_ALIGNED(width, 8)) { - ARGBAttenuateRow = ARGBAttenuateRow_MSA; - } - } -#endif #if defined(HAS_ARGBATTENUATEROW_RVV) if (TestCpuFlag(kCpuHasRVV)) { ARGBAttenuateRow = ARGBAttenuateRow_RVV; @@ -8240,14 +7904,6 @@ static int I010AlphaToARGBMatrixBilinear( } } #endif -#if defined(HAS_ARGBATTENUATEROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGBAttenuateRow = ARGBAttenuateRow_Any_MSA; - if (IS_ALIGNED(width, 8)) { - ARGBAttenuateRow = ARGBAttenuateRow_MSA; - } - } -#endif #if defined(HAS_ARGBATTENUATEROW_RVV) if (TestCpuFlag(kCpuHasRVV)) { ARGBAttenuateRow = ARGBAttenuateRow_RVV; @@ -8442,14 +8098,6 @@ static int I210AlphaToARGBMatrixLinear(const uint16_t* src_y, } } #endif -#if defined(HAS_ARGBATTENUATEROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGBAttenuateRow = ARGBAttenuateRow_Any_MSA; - if (IS_ALIGNED(width, 8)) { - ARGBAttenuateRow = ARGBAttenuateRow_MSA; - } - } -#endif #if defined(HAS_ARGBATTENUATEROW_RVV) if (TestCpuFlag(kCpuHasRVV)) { ARGBAttenuateRow = ARGBAttenuateRow_RVV; diff --git a/source/convert_from.cc b/source/convert_from.cc index f11870217..5cf88fa2d 100644 --- a/source/convert_from.cc +++ b/source/convert_from.cc @@ -445,14 +445,6 @@ int I420ToYUY2(const uint8_t* src_y, } } #endif -#if defined(HAS_I422TOYUY2ROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - I422ToYUY2Row = I422ToYUY2Row_Any_MSA; - if (IS_ALIGNED(width, 32)) { - I422ToYUY2Row = I422ToYUY2Row_MSA; - } - } -#endif #if defined(HAS_I422TOYUY2ROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { I422ToYUY2Row = I422ToYUY2Row_Any_LSX; @@ -540,14 +532,6 @@ int I422ToUYVY(const uint8_t* src_y, } } #endif -#if defined(HAS_I422TOUYVYROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - I422ToUYVYRow = I422ToUYVYRow_Any_MSA; - if (IS_ALIGNED(width, 32)) { - I422ToUYVYRow = I422ToUYVYRow_MSA; - } - } -#endif #if defined(HAS_I422TOUYVYROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { I422ToUYVYRow = I422ToUYVYRow_Any_LSX; @@ -623,14 +607,6 @@ int I420ToUYVY(const uint8_t* src_y, } } #endif -#if defined(HAS_I422TOUYVYROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - I422ToUYVYRow = I422ToUYVYRow_Any_MSA; - if (IS_ALIGNED(width, 32)) { - I422ToUYVYRow = I422ToUYVYRow_MSA; - } - } -#endif #if defined(HAS_I422TOUYVYROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { I422ToUYVYRow = I422ToUYVYRow_Any_LSX; diff --git a/source/convert_from_argb.cc b/source/convert_from_argb.cc index c37558266..e0a3d7c77 100644 --- a/source/convert_from_argb.cc +++ b/source/convert_from_argb.cc @@ -84,14 +84,6 @@ int ARGBToI444(const uint8_t* src_argb, } } #endif -#if defined(HAS_ARGBTOUV444ROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGBToUV444Row = ARGBToUV444Row_Any_MSA; - if (IS_ALIGNED(width, 16)) { - ARGBToUV444Row = ARGBToUV444Row_MSA; - } - } -#endif #if defined(HAS_ARGBTOUV444ROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { ARGBToUV444Row = ARGBToUV444Row_Any_LSX; @@ -140,14 +132,6 @@ int ARGBToI444(const uint8_t* src_argb, } } #endif -#if defined(HAS_ARGBTOYROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGBToYRow = ARGBToYRow_Any_MSA; - if (IS_ALIGNED(width, 16)) { - ARGBToYRow = ARGBToYRow_MSA; - } - } -#endif #if defined(HAS_ARGBTOYROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { ARGBToYRow = ARGBToYRow_Any_LSX; @@ -295,18 +279,6 @@ int ARGBToI422(const uint8_t* src_argb, } } #endif -#if defined(HAS_ARGBTOYROW_MSA) && defined(HAS_ARGBTOUVROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGBToYRow = ARGBToYRow_Any_MSA; - ARGBToUVRow = ARGBToUVRow_Any_MSA; - if (IS_ALIGNED(width, 16)) { - ARGBToYRow = ARGBToYRow_MSA; - } - if (IS_ALIGNED(width, 32)) { - ARGBToUVRow = ARGBToUVRow_MSA; - } - } -#endif #if defined(HAS_ARGBTOYROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { ARGBToYRow = ARGBToYRow_Any_LSX; @@ -459,18 +431,6 @@ int ARGBToNV12(const uint8_t* src_argb, } } #endif -#if defined(HAS_ARGBTOYROW_MSA) && defined(HAS_ARGBTOUVROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGBToYRow = ARGBToYRow_Any_MSA; - ARGBToUVRow = ARGBToUVRow_Any_MSA; - if (IS_ALIGNED(width, 16)) { - ARGBToYRow = ARGBToYRow_MSA; - } - if (IS_ALIGNED(width, 32)) { - ARGBToUVRow = ARGBToUVRow_MSA; - } - } -#endif #if defined(HAS_ARGBTOYROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { ARGBToYRow = ARGBToYRow_Any_LSX; @@ -531,14 +491,6 @@ int ARGBToNV12(const uint8_t* src_argb, MergeUVRow_ = MergeUVRow_SME; } #endif -#if defined(HAS_MERGEUVROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - MergeUVRow_ = MergeUVRow_Any_MSA; - if (IS_ALIGNED(halfwidth, 16)) { - MergeUVRow_ = MergeUVRow_MSA; - } - } -#endif #if defined(HAS_MERGEUVROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { MergeUVRow_ = MergeUVRow_Any_LSX; @@ -686,18 +638,6 @@ int ARGBToNV21(const uint8_t* src_argb, } } #endif -#if defined(HAS_ARGBTOYROW_MSA) && defined(HAS_ARGBTOUVROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGBToYRow = ARGBToYRow_Any_MSA; - ARGBToUVRow = ARGBToUVRow_Any_MSA; - if (IS_ALIGNED(width, 16)) { - ARGBToYRow = ARGBToYRow_MSA; - } - if (IS_ALIGNED(width, 32)) { - ARGBToUVRow = ARGBToUVRow_MSA; - } - } -#endif #if defined(HAS_ARGBTOYROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { ARGBToYRow = ARGBToYRow_Any_LSX; @@ -768,14 +708,6 @@ int ARGBToNV21(const uint8_t* src_argb, MergeUVRow_ = MergeUVRow_SME; } #endif -#if defined(HAS_MERGEUVROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - MergeUVRow_ = MergeUVRow_Any_MSA; - if (IS_ALIGNED(halfwidth, 16)) { - MergeUVRow_ = MergeUVRow_MSA; - } - } -#endif #if defined(HAS_MERGEUVROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { MergeUVRow_ = MergeUVRow_Any_LSX; @@ -922,18 +854,6 @@ int ABGRToNV12(const uint8_t* src_abgr, } } #endif -#if defined(HAS_ABGRTOYROW_MSA) && defined(HAS_ABGRTOUVROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ABGRToYRow = ABGRToYRow_Any_MSA; - ABGRToUVRow = ABGRToUVRow_Any_MSA; - if (IS_ALIGNED(width, 16)) { - ABGRToYRow = ABGRToYRow_MSA; - } - if (IS_ALIGNED(width, 32)) { - ABGRToUVRow = ABGRToUVRow_MSA; - } - } -#endif #if defined(HAS_ABGRTOYROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { ABGRToYRow = ABGRToYRow_Any_LSX; @@ -992,14 +912,6 @@ int ABGRToNV12(const uint8_t* src_abgr, MergeUVRow_ = MergeUVRow_SME; } #endif -#if defined(HAS_MERGEUVROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - MergeUVRow_ = MergeUVRow_Any_MSA; - if (IS_ALIGNED(halfwidth, 16)) { - MergeUVRow_ = MergeUVRow_MSA; - } - } -#endif #if defined(HAS_MERGEUVROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { MergeUVRow_ = MergeUVRow_Any_LSX; @@ -1147,18 +1059,6 @@ int ABGRToNV21(const uint8_t* src_abgr, } } #endif -#if defined(HAS_ABGRTOYROW_MSA) && defined(HAS_ABGRTOUVROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ABGRToYRow = ABGRToYRow_Any_MSA; - ABGRToUVRow = ABGRToUVRow_Any_MSA; - if (IS_ALIGNED(width, 16)) { - ABGRToYRow = ABGRToYRow_MSA; - } - if (IS_ALIGNED(width, 32)) { - ABGRToUVRow = ABGRToUVRow_MSA; - } - } -#endif #if defined(HAS_ABGRTOYROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { ABGRToYRow = ABGRToYRow_Any_LSX; @@ -1217,14 +1117,6 @@ int ABGRToNV21(const uint8_t* src_abgr, MergeUVRow_ = MergeUVRow_SME; } #endif -#if defined(HAS_MERGEUVROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - MergeUVRow_ = MergeUVRow_Any_MSA; - if (IS_ALIGNED(halfwidth, 16)) { - MergeUVRow_ = MergeUVRow_MSA; - } - } -#endif #if defined(HAS_MERGEUVROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { MergeUVRow_ = MergeUVRow_Any_LSX; @@ -1377,18 +1269,6 @@ int ARGBToYUY2(const uint8_t* src_argb, } } #endif -#if defined(HAS_ARGBTOYROW_MSA) && defined(HAS_ARGBTOUVROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGBToYRow = ARGBToYRow_Any_MSA; - ARGBToUVRow = ARGBToUVRow_Any_MSA; - if (IS_ALIGNED(width, 16)) { - ARGBToYRow = ARGBToYRow_MSA; - } - if (IS_ALIGNED(width, 32)) { - ARGBToUVRow = ARGBToUVRow_MSA; - } - } -#endif #if defined(HAS_ARGBTOYROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { ARGBToYRow = ARGBToYRow_Any_LSX; @@ -1446,14 +1326,6 @@ int ARGBToYUY2(const uint8_t* src_argb, } } #endif -#if defined(HAS_I422TOYUY2ROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - I422ToYUY2Row = I422ToYUY2Row_Any_MSA; - if (IS_ALIGNED(width, 32)) { - I422ToYUY2Row = I422ToYUY2Row_MSA; - } - } -#endif #if defined(HAS_I422TOYUY2ROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { I422ToYUY2Row = I422ToYUY2Row_Any_LSX; @@ -1605,18 +1477,6 @@ int ARGBToUYVY(const uint8_t* src_argb, } } #endif -#if defined(HAS_ARGBTOYROW_MSA) && defined(HAS_ARGBTOUVROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGBToYRow = ARGBToYRow_Any_MSA; - ARGBToUVRow = ARGBToUVRow_Any_MSA; - if (IS_ALIGNED(width, 16)) { - ARGBToYRow = ARGBToYRow_MSA; - } - if (IS_ALIGNED(width, 32)) { - ARGBToUVRow = ARGBToUVRow_MSA; - } - } -#endif #if defined(HAS_ARGBTOYROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { ARGBToYRow = ARGBToYRow_Any_LSX; @@ -1674,14 +1534,6 @@ int ARGBToUYVY(const uint8_t* src_argb, } } #endif -#if defined(HAS_I422TOUYVYROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - I422ToUYVYRow = I422ToUYVYRow_Any_MSA; - if (IS_ALIGNED(width, 32)) { - I422ToUYVYRow = I422ToUYVYRow_MSA; - } - } -#endif #if defined(HAS_I422TOUYVYROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { I422ToUYVYRow = I422ToUYVYRow_Any_LSX; @@ -1777,14 +1629,6 @@ int ARGBToI400(const uint8_t* src_argb, } } #endif -#if defined(HAS_ARGBTOYROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGBToYRow = ARGBToYRow_Any_MSA; - if (IS_ALIGNED(width, 16)) { - ARGBToYRow = ARGBToYRow_MSA; - } - } -#endif #if defined(HAS_ARGBTOYROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { ARGBToYRow = ARGBToYRow_Any_LSX; @@ -1935,14 +1779,6 @@ int ARGBToRGB24(const uint8_t* src_argb, ARGBToRGB24Row = ARGBToRGB24Row_SVE2; } #endif -#if defined(HAS_ARGBTORGB24ROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGBToRGB24Row = ARGBToRGB24Row_Any_MSA; - if (IS_ALIGNED(width, 16)) { - ARGBToRGB24Row = ARGBToRGB24Row_MSA; - } - } -#endif #if defined(HAS_ARGBTORGB24ROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { ARGBToRGB24Row = ARGBToRGB24Row_Any_LSX; @@ -2027,14 +1863,6 @@ int ARGBToRAW(const uint8_t* src_argb, ARGBToRAWRow = ARGBToRAWRow_SVE2; } #endif -#if defined(HAS_ARGBTORAWROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGBToRAWRow = ARGBToRAWRow_Any_MSA; - if (IS_ALIGNED(width, 16)) { - ARGBToRAWRow = ARGBToRAWRow_MSA; - } - } -#endif #if defined(HAS_ARGBTORAWROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { ARGBToRAWRow = ARGBToRAWRow_Any_LSX; @@ -2123,14 +1951,6 @@ int ARGBToRGB565Dither(const uint8_t* src_argb, ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_SVE2; } #endif -#if defined(HAS_ARGBTORGB565DITHERROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_MSA; - if (IS_ALIGNED(width, 8)) { - ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_MSA; - } - } -#endif #if defined(HAS_ARGBTORGB565DITHERROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_LSX; @@ -2213,14 +2033,6 @@ int ARGBToRGB565(const uint8_t* src_argb, ARGBToRGB565Row = ARGBToRGB565Row_SVE2; } #endif -#if defined(HAS_ARGBTORGB565ROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGBToRGB565Row = ARGBToRGB565Row_Any_MSA; - if (IS_ALIGNED(width, 8)) { - ARGBToRGB565Row = ARGBToRGB565Row_MSA; - } - } -#endif #if defined(HAS_ARGBTORGB565ROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { ARGBToRGB565Row = ARGBToRGB565Row_Any_LSX; @@ -2296,14 +2108,6 @@ int ARGBToARGB1555(const uint8_t* src_argb, } } #endif -#if defined(HAS_ARGBTOARGB1555ROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGBToARGB1555Row = ARGBToARGB1555Row_Any_MSA; - if (IS_ALIGNED(width, 8)) { - ARGBToARGB1555Row = ARGBToARGB1555Row_MSA; - } - } -#endif #if defined(HAS_ARGBTOARGB1555ROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { ARGBToARGB1555Row = ARGBToARGB1555Row_Any_LSX; @@ -2378,14 +2182,6 @@ int ARGBToARGB4444(const uint8_t* src_argb, } } #endif -#if defined(HAS_ARGBTOARGB4444ROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGBToARGB4444Row = ARGBToARGB4444Row_Any_MSA; - if (IS_ALIGNED(width, 8)) { - ARGBToARGB4444Row = ARGBToARGB4444Row_MSA; - } - } -#endif #if defined(HAS_ARGBTOARGB4444ROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { ARGBToARGB4444Row = ARGBToARGB4444Row_Any_LSX; @@ -2589,14 +2385,6 @@ int ARGBToJ444(const uint8_t* src_argb, } } #endif -#if defined(HAS_ARGBTOUVJ444ROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGBToUVJ444Row = ARGBToUVJ444Row_Any_MSA; - if (IS_ALIGNED(width, 16)) { - ARGBToUVJ444Row = ARGBToUVJ444Row_MSA; - } - } -#endif #if defined(HAS_ARGBTOUVJ444ROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { ARGBToUVJ444Row = ARGBToUVJ444Row_Any_LSX; @@ -2645,14 +2433,6 @@ int ARGBToJ444(const uint8_t* src_argb, } } #endif -#if defined(HAS_ARGBTOYJROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGBToYJRow = ARGBToYJRow_Any_MSA; - if (IS_ALIGNED(width, 16)) { - ARGBToYJRow = ARGBToYJRow_MSA; - } - } -#endif #if defined(HAS_ARGBTOYJROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { ARGBToYJRow = ARGBToYJRow_Any_LSX; @@ -2793,18 +2573,6 @@ int ARGBToJ420(const uint8_t* src_argb, } } #endif -#if defined(HAS_ARGBTOYJROW_MSA) && defined(HAS_ARGBTOUVJROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGBToYJRow = ARGBToYJRow_Any_MSA; - ARGBToUVJRow = ARGBToUVJRow_Any_MSA; - if (IS_ALIGNED(width, 16)) { - ARGBToYJRow = ARGBToYJRow_MSA; - } - if (IS_ALIGNED(width, 32)) { - ARGBToUVJRow = ARGBToUVJRow_MSA; - } - } -#endif #if defined(HAS_ARGBTOYJROW_LSX) && defined(HAS_ARGBTOUVJROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { ARGBToYJRow = ARGBToYJRow_Any_LSX; @@ -2961,18 +2729,6 @@ int ARGBToJ422(const uint8_t* src_argb, } } #endif -#if defined(HAS_ARGBTOYJROW_MSA) && defined(HAS_ARGBTOUVJROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGBToYJRow = ARGBToYJRow_Any_MSA; - ARGBToUVJRow = ARGBToUVJRow_Any_MSA; - if (IS_ALIGNED(width, 16)) { - ARGBToYJRow = ARGBToYJRow_MSA; - } - if (IS_ALIGNED(width, 32)) { - ARGBToUVJRow = ARGBToUVJRow_MSA; - } - } -#endif #if defined(HAS_ARGBTOYJROW_LSX) && defined(HAS_ARGBTOUVJROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { ARGBToYJRow = ARGBToYJRow_Any_LSX; @@ -3067,14 +2823,6 @@ int ARGBToJ400(const uint8_t* src_argb, } } #endif -#if defined(HAS_ARGBTOYJROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGBToYJRow = ARGBToYJRow_Any_MSA; - if (IS_ALIGNED(width, 16)) { - ARGBToYJRow = ARGBToYJRow_MSA; - } - } -#endif #if defined(HAS_ARGBTOYJROW_RVV) if (TestCpuFlag(kCpuHasRVV)) { ARGBToYJRow = ARGBToYJRow_RVV; @@ -3146,14 +2894,6 @@ int RGBAToJ400(const uint8_t* src_rgba, } } #endif -#if defined(HAS_RGBATOYJROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - RGBAToYJRow = RGBAToYJRow_Any_MSA; - if (IS_ALIGNED(width, 16)) { - RGBAToYJRow = RGBAToYJRow_MSA; - } - } -#endif #if defined(HAS_RGBATOYJROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { RGBAToYJRow = RGBAToYJRow_Any_LSX; @@ -3291,16 +3031,6 @@ int ABGRToJ420(const uint8_t* src_abgr, } } #endif -#if defined(HAS_ABGRTOYJROW_MSA) && defined(HAS_ABGRTOUVJROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ABGRToYJRow = ABGRToYJRow_Any_MSA; - ABGRToUVJRow = ABGRToUVJRow_Any_MSA; - if (IS_ALIGNED(width, 16)) { - ABGRToYJRow = ABGRToYJRow_MSA; - ABGRToUVJRow = ABGRToUVJRow_MSA; - } - } -#endif #if defined(HAS_ABGRTOYJROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { ABGRToYJRow = ABGRToYJRow_Any_LSX; @@ -3453,18 +3183,6 @@ int ABGRToJ422(const uint8_t* src_abgr, } } #endif -#if defined(HAS_ABGRTOYJROW_MSA) && defined(HAS_ABGRTOUVJROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ABGRToYJRow = ABGRToYJRow_Any_MSA; - ABGRToUVJRow = ABGRToUVJRow_Any_MSA; - if (IS_ALIGNED(width, 16)) { - ABGRToYJRow = ABGRToYJRow_MSA; - } - if (IS_ALIGNED(width, 32)) { - ABGRToUVJRow = ABGRToUVJRow_MSA; - } - } -#endif #if defined(HAS_ABGRTOYJROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { ABGRToYJRow = ABGRToYJRow_Any_LSX; @@ -3555,14 +3273,6 @@ int ABGRToJ400(const uint8_t* src_abgr, } } #endif -#if defined(HAS_ABGRTOYJROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ABGRToYJRow = ABGRToYJRow_Any_MSA; - if (IS_ALIGNED(width, 16)) { - ABGRToYJRow = ABGRToYJRow_MSA; - } - } -#endif #if defined(HAS_ABGRTOYJROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { ABGRToYJRow = ABGRToYJRow_Any_LSX; @@ -3722,8 +3432,7 @@ int ARGBToAB64(const uint8_t* src_argb, } // Enabled if 1 pass is available -#if defined(HAS_RAWTOYJROW_NEON) || defined(HAS_RAWTOYJROW_MSA) || \ - defined(HAS_RAWTOYJROW_RVV) +#if defined(HAS_RAWTOYJROW_NEON) || defined(HAS_RAWTOYJROW_RVV) #define HAS_RAWTOYJROW #endif @@ -3779,16 +3488,6 @@ int RAWToJNV21(const uint8_t* src_raw, } } #endif -#if defined(HAS_RAWTOYJROW_MSA) && defined(HAS_RAWTOUVJROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - RAWToUVJRow = RAWToUVJRow_Any_MSA; - RAWToYJRow = RAWToYJRow_Any_MSA; - if (IS_ALIGNED(width, 16)) { - RAWToYJRow = RAWToYJRow_MSA; - RAWToUVJRow = RAWToUVJRow_MSA; - } - } -#endif #if defined(HAS_RAWTOYJROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { RAWToYJRow = RAWToYJRow_Any_LSX; @@ -3900,14 +3599,6 @@ int RAWToJNV21(const uint8_t* src_raw, MergeUVRow_ = MergeUVRow_SME; } #endif -#if defined(HAS_MERGEUVROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - MergeUVRow_ = MergeUVRow_Any_MSA; - if (IS_ALIGNED(halfwidth, 16)) { - MergeUVRow_ = MergeUVRow_MSA; - } - } -#endif #if defined(HAS_MERGEUVROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { MergeUVRow_ = MergeUVRow_Any_LSX; diff --git a/source/cpu_id.cc b/source/cpu_id.cc index 95a658661..dd5813a6c 100644 --- a/source/cpu_id.cc +++ b/source/cpu_id.cc @@ -359,42 +359,13 @@ LIBYUV_API SAFEBUFFERS int RiscvCpuCaps(const char* cpuinfo_name) { return flag; } -LIBYUV_API SAFEBUFFERS int MipsCpuCaps(const char* cpuinfo_name) { - char cpuinfo_line[512]; - int flag = 0; - FILE* f = fopen(cpuinfo_name, "re"); - if (!f) { - // Assume nothing if /proc/cpuinfo is unavailable. - // This will occur for Chrome sandbox for Pepper or Render process. - return 0; - } - memset(cpuinfo_line, 0, sizeof(cpuinfo_line)); - while (fgets(cpuinfo_line, sizeof(cpuinfo_line), f)) { - if (memcmp(cpuinfo_line, "cpu model", 9) == 0) { - // Workaround early kernel without MSA in ASEs line. - if (strstr(cpuinfo_line, "Loongson-2K")) { - flag |= kCpuHasMSA; - } - } - if (memcmp(cpuinfo_line, "ASEs implemented", 16) == 0) { - if (strstr(cpuinfo_line, "msa")) { - flag |= kCpuHasMSA; - } - // ASEs is the last line, so we can break here. - break; - } - } - fclose(f); - return flag; -} - #if defined(__loongarch__) && defined(__linux__) // Define hwcap values ourselves: building with an old auxv header where these // hwcap values are not defined should not prevent features from being enabled. #define YUV_LOONGARCH_HWCAP_LSX (1 << 4) #define YUV_LOONGARCH_HWCAP_LASX (1 << 5) -LIBYUV_API SAFEBUFFERS int LoongarchCpuCaps(void) { +LIBYUV_API SAFEBUFFERS int LoongArchCpuCaps(void) { int flag = 0; unsigned long hwcap = getauxval(AT_HWCAP); @@ -462,12 +433,8 @@ static SAFEBUFFERS int GetCpuFlags(void) { } } #endif -#if defined(__mips__) && defined(__linux__) - cpu_info = MipsCpuCaps("/proc/cpuinfo"); - cpu_info |= kCpuHasMIPS; -#endif #if defined(__loongarch__) && defined(__linux__) - cpu_info = LoongarchCpuCaps(); + cpu_info = LoongArchCpuCaps(); cpu_info |= kCpuHasLOONGARCH; #endif #if defined(__aarch64__) diff --git a/source/planar_functions.cc b/source/planar_functions.cc index 20c51847b..a57e9a071 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -631,14 +631,6 @@ void SplitUVPlane(const uint8_t* src_uv, } } #endif -#if defined(HAS_SPLITUVROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - SplitUVRow = SplitUVRow_Any_MSA; - if (IS_ALIGNED(width, 32)) { - SplitUVRow = SplitUVRow_MSA; - } - } -#endif #if defined(HAS_SPLITUVROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { SplitUVRow = SplitUVRow_Any_LSX; @@ -727,14 +719,6 @@ void MergeUVPlane(const uint8_t* src_u, MergeUVRow = MergeUVRow_SME; } #endif -#if defined(HAS_MERGEUVROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - MergeUVRow = MergeUVRow_Any_MSA; - if (IS_ALIGNED(width, 16)) { - MergeUVRow = MergeUVRow_MSA; - } - } -#endif #if defined(HAS_MERGEUVROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { MergeUVRow = MergeUVRow_Any_LSX; @@ -2251,16 +2235,6 @@ int YUY2ToI422(const uint8_t* src_yuy2, } } #endif -#if defined(HAS_YUY2TOYROW_MSA) && defined(HAS_YUY2TOUV422ROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - YUY2ToYRow = YUY2ToYRow_Any_MSA; - YUY2ToUV422Row = YUY2ToUV422Row_Any_MSA; - if (IS_ALIGNED(width, 32)) { - YUY2ToYRow = YUY2ToYRow_MSA; - YUY2ToUV422Row = YUY2ToUV422Row_MSA; - } - } -#endif #if defined(HAS_YUY2TOYROW_LSX) && defined(HAS_YUY2TOUV422ROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { YUY2ToYRow = YUY2ToYRow_Any_LSX; @@ -2357,16 +2331,6 @@ int UYVYToI422(const uint8_t* src_uyvy, } } #endif -#if defined(HAS_UYVYTOYROW_MSA) && defined(HAS_UYVYTOUV422ROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - UYVYToYRow = UYVYToYRow_Any_MSA; - UYVYToUV422Row = UYVYToUV422Row_Any_MSA; - if (IS_ALIGNED(width, 32)) { - UYVYToYRow = UYVYToYRow_MSA; - UYVYToUV422Row = UYVYToUV422Row_MSA; - } - } -#endif #if defined(HAS_UYVYTOYROW_LSX) && defined(HAS_UYVYTOUV422ROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { UYVYToYRow = UYVYToYRow_Any_LSX; @@ -2449,14 +2413,6 @@ int YUY2ToY(const uint8_t* src_yuy2, } } #endif -#if defined(HAS_YUY2TOYROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - YUY2ToYRow = YUY2ToYRow_Any_MSA; - if (IS_ALIGNED(width, 32)) { - YUY2ToYRow = YUY2ToYRow_MSA; - } - } -#endif for (y = 0; y < height; ++y) { YUY2ToYRow(src_yuy2, dst_y, width); @@ -2516,14 +2472,6 @@ int UYVYToY(const uint8_t* src_uyvy, } } #endif -#if defined(HAS_UYVYTOYROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - UYVYToYRow = UYVYToYRow_Any_MSA; - if (IS_ALIGNED(width, 32)) { - UYVYToYRow = UYVYToYRow_MSA; - } - } -#endif #if defined(HAS_UYVYTOYROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { UYVYToYRow = UYVYToYRow_Any_LSX; @@ -2582,14 +2530,6 @@ void MirrorPlane(const uint8_t* src_y, } } #endif -#if defined(HAS_MIRRORROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - MirrorRow = MirrorRow_Any_MSA; - if (IS_ALIGNED(width, 64)) { - MirrorRow = MirrorRow_MSA; - } - } -#endif #if defined(HAS_MIRRORROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { MirrorRow = MirrorRow_Any_LSX; @@ -2656,14 +2596,6 @@ void MirrorUVPlane(const uint8_t* src_uv, } } #endif -#if defined(HAS_MIRRORUVROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - MirrorUVRow = MirrorUVRow_Any_MSA; - if (IS_ALIGNED(width, 8)) { - MirrorUVRow = MirrorUVRow_MSA; - } - } -#endif #if defined(HAS_MIRRORUVROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { MirrorUVRow = MirrorUVRow_Any_LSX; @@ -2836,14 +2768,6 @@ int ARGBMirror(const uint8_t* src_argb, } } #endif -#if defined(HAS_ARGBMIRRORROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGBMirrorRow = ARGBMirrorRow_Any_MSA; - if (IS_ALIGNED(width, 16)) { - ARGBMirrorRow = ARGBMirrorRow_MSA; - } - } -#endif #if defined(HAS_ARGBMIRRORROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { ARGBMirrorRow = ARGBMirrorRow_Any_LSX; @@ -2955,11 +2879,6 @@ int ARGBBlend(const uint8_t* src_argb0, ARGBBlendRow = ARGBBlendRow_NEON; } #endif -#if defined(HAS_ARGBBLENDROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGBBlendRow = ARGBBlendRow_MSA; - } -#endif #if defined(HAS_ARGBBLENDROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { ARGBBlendRow = ARGBBlendRow_LSX; @@ -3241,14 +3160,6 @@ int ARGBMultiply(const uint8_t* src_argb0, ARGBMultiplyRow = ARGBMultiplyRow_SME; } #endif -#if defined(HAS_ARGBMULTIPLYROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGBMultiplyRow = ARGBMultiplyRow_Any_MSA; - if (IS_ALIGNED(width, 4)) { - ARGBMultiplyRow = ARGBMultiplyRow_MSA; - } - } -#endif #if defined(HAS_ARGBMULTIPLYROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { ARGBMultiplyRow = ARGBMultiplyRow_Any_LSX; @@ -3334,14 +3245,6 @@ int ARGBAdd(const uint8_t* src_argb0, } } #endif -#if defined(HAS_ARGBADDROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGBAddRow = ARGBAddRow_Any_MSA; - if (IS_ALIGNED(width, 8)) { - ARGBAddRow = ARGBAddRow_MSA; - } - } -#endif #if defined(HAS_ARGBADDROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { ARGBAddRow = ARGBAddRow_Any_LSX; @@ -3422,14 +3325,6 @@ int ARGBSubtract(const uint8_t* src_argb0, } } #endif -#if defined(HAS_ARGBSUBTRACTROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGBSubtractRow = ARGBSubtractRow_Any_MSA; - if (IS_ALIGNED(width, 8)) { - ARGBSubtractRow = ARGBSubtractRow_MSA; - } - } -#endif #if defined(HAS_ARGBSUBTRACTROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { ARGBSubtractRow = ARGBSubtractRow_Any_LSX; @@ -3504,14 +3399,6 @@ int RAWToRGB24(const uint8_t* src_raw, RAWToRGB24Row = RAWToRGB24Row_SVE2; } #endif -#if defined(HAS_RAWTORGB24ROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - RAWToRGB24Row = RAWToRGB24Row_Any_MSA; - if (IS_ALIGNED(width, 16)) { - RAWToRGB24Row = RAWToRGB24Row_MSA; - } - } -#endif #if defined(HAS_RAWTORGB24ROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { RAWToRGB24Row = RAWToRGB24Row_Any_LSX; @@ -3579,11 +3466,6 @@ void SetPlane(uint8_t* dst_y, SetRow = SetRow_ERMS; } #endif -#if defined(HAS_SETROW_MSA) - if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 16)) { - SetRow = SetRow_MSA; - } -#endif #if defined(HAS_SETROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { SetRow = SetRow_Any_LSX; @@ -3674,14 +3556,6 @@ int ARGBRect(uint8_t* dst_argb, ARGBSetRow = ARGBSetRow_X86; } #endif -#if defined(HAS_ARGBSETROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGBSetRow = ARGBSetRow_Any_MSA; - if (IS_ALIGNED(width, 4)) { - ARGBSetRow = ARGBSetRow_MSA; - } - } -#endif #if defined(HAS_ARGBSETROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { ARGBSetRow = ARGBSetRow_Any_LSX; @@ -3760,14 +3634,6 @@ int ARGBAttenuate(const uint8_t* src_argb, } } #endif -#if defined(HAS_ARGBATTENUATEROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGBAttenuateRow = ARGBAttenuateRow_Any_MSA; - if (IS_ALIGNED(width, 8)) { - ARGBAttenuateRow = ARGBAttenuateRow_MSA; - } - } -#endif #if defined(HAS_ARGBATTENUATEROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { ARGBAttenuateRow = ARGBAttenuateRow_Any_LSX; @@ -3889,11 +3755,6 @@ int ARGBGrayTo(const uint8_t* src_argb, ARGBGrayRow = ARGBGrayRow_NEON_DotProd; } #endif -#if defined(HAS_ARGBGRAYROW_MSA) - if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 8)) { - ARGBGrayRow = ARGBGrayRow_MSA; - } -#endif #if defined(HAS_ARGBGRAYROW_LSX) if (TestCpuFlag(kCpuHasLSX) && IS_ALIGNED(width, 8)) { ARGBGrayRow = ARGBGrayRow_LSX; @@ -3949,11 +3810,6 @@ int ARGBGray(uint8_t* dst_argb, ARGBGrayRow = ARGBGrayRow_NEON_DotProd; } #endif -#if defined(HAS_ARGBGRAYROW_MSA) - if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 8)) { - ARGBGrayRow = ARGBGrayRow_MSA; - } -#endif #if defined(HAS_ARGBGRAYROW_LSX) if (TestCpuFlag(kCpuHasLSX) && IS_ALIGNED(width, 8)) { ARGBGrayRow = ARGBGrayRow_LSX; @@ -4007,11 +3863,6 @@ int ARGBSepia(uint8_t* dst_argb, ARGBSepiaRow = ARGBSepiaRow_NEON_DotProd; } #endif -#if defined(HAS_ARGBSEPIAROW_MSA) - if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 8)) { - ARGBSepiaRow = ARGBSepiaRow_MSA; - } -#endif #if defined(HAS_ARGBSEPIAROW_LSX) if (TestCpuFlag(kCpuHasLSX) && IS_ALIGNED(width, 8)) { ARGBSepiaRow = ARGBSepiaRow_LSX; @@ -4073,11 +3924,6 @@ int ARGBColorMatrix(const uint8_t* src_argb, ARGBColorMatrixRow = ARGBColorMatrixRow_NEON_I8MM; } #endif -#if defined(HAS_ARGBCOLORMATRIXROW_MSA) - if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 8)) { - ARGBColorMatrixRow = ARGBColorMatrixRow_MSA; - } -#endif #if defined(HAS_ARGBCOLORMATRIXROW_LSX) if (TestCpuFlag(kCpuHasLSX) && IS_ALIGNED(width, 8)) { ARGBColorMatrixRow = ARGBColorMatrixRow_LSX; @@ -4243,11 +4089,6 @@ int ARGBQuantize(uint8_t* dst_argb, ARGBQuantizeRow = ARGBQuantizeRow_NEON; } #endif -#if defined(HAS_ARGBQUANTIZEROW_MSA) - if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 8)) { - ARGBQuantizeRow = ARGBQuantizeRow_MSA; - } -#endif #if defined(HAS_ARGBQUANTIZEROW_LSX) if (TestCpuFlag(kCpuHasLSX) && IS_ALIGNED(width, 8)) { ARGBQuantizeRow = ARGBQuantizeRow_LSX; @@ -4442,11 +4283,6 @@ int ARGBShade(const uint8_t* src_argb, ARGBShadeRow = ARGBShadeRow_NEON; } #endif -#if defined(HAS_ARGBSHADEROW_MSA) - if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 4)) { - ARGBShadeRow = ARGBShadeRow_MSA; - } -#endif #if defined(HAS_ARGBSHADEROW_LSX) if (TestCpuFlag(kCpuHasLSX) && IS_ALIGNED(width, 4)) { ARGBShadeRow = ARGBShadeRow_LSX; @@ -4525,14 +4361,6 @@ int InterpolatePlane(const uint8_t* src0, InterpolateRow = InterpolateRow_SME; } #endif -#if defined(HAS_INTERPOLATEROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - InterpolateRow = InterpolateRow_Any_MSA; - if (IS_ALIGNED(width, 32)) { - InterpolateRow = InterpolateRow_MSA; - } - } -#endif #if defined(HAS_INTERPOLATEROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { InterpolateRow = InterpolateRow_Any_LSX; @@ -4615,14 +4443,6 @@ int InterpolatePlane_16(const uint16_t* src0, InterpolateRow_16 = InterpolateRow_16_SME; } #endif -#if defined(HAS_INTERPOLATEROW_16_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - InterpolateRow_16 = InterpolateRow_16_Any_MSA; - if (IS_ALIGNED(width, 32)) { - InterpolateRow_16 = InterpolateRow_16_MSA; - } - } -#endif #if defined(HAS_INTERPOLATEROW_16_LSX) if (TestCpuFlag(kCpuHasLSX)) { InterpolateRow_16 = InterpolateRow_16_Any_LSX; @@ -4748,14 +4568,6 @@ int ARGBShuffle(const uint8_t* src_bgra, } } #endif -#if defined(HAS_ARGBSHUFFLEROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGBShuffleRow = ARGBShuffleRow_Any_MSA; - if (IS_ALIGNED(width, 8)) { - ARGBShuffleRow = ARGBShuffleRow_MSA; - } - } -#endif #if defined(HAS_ARGBSHUFFLEROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { ARGBShuffleRow = ARGBShuffleRow_Any_LSX; @@ -4973,14 +4785,6 @@ static int ARGBSobelize(const uint8_t* src_argb, } } #endif -#if defined(HAS_ARGBTOYJROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGBToYJRow = ARGBToYJRow_Any_MSA; - if (IS_ALIGNED(width, 16)) { - ARGBToYJRow = ARGBToYJRow_MSA; - } - } -#endif #if defined(HAS_ARGBTOYJROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { ARGBToYJRow = ARGBToYJRow_Any_LSX; @@ -5013,11 +4817,6 @@ static int ARGBSobelize(const uint8_t* src_argb, SobelYRow = SobelYRow_NEON; } #endif -#if defined(HAS_SOBELYROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - SobelYRow = SobelYRow_MSA; - } -#endif #if defined(HAS_SOBELXROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { SobelXRow = SobelXRow_SSE2; @@ -5027,11 +4826,6 @@ static int ARGBSobelize(const uint8_t* src_argb, if (TestCpuFlag(kCpuHasNEON)) { SobelXRow = SobelXRow_NEON; } -#endif -#if defined(HAS_SOBELXROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - SobelXRow = SobelXRow_MSA; - } #endif { // 3 rows with edges before/after. @@ -5109,14 +4903,6 @@ int ARGBSobel(const uint8_t* src_argb, } } #endif -#if defined(HAS_SOBELROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - SobelRow = SobelRow_Any_MSA; - if (IS_ALIGNED(width, 16)) { - SobelRow = SobelRow_MSA; - } - } -#endif #if defined(HAS_SOBELROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { SobelRow = SobelRow_Any_LSX; @@ -5155,14 +4941,6 @@ int ARGBSobelToPlane(const uint8_t* src_argb, } } #endif -#if defined(HAS_SOBELTOPLANEROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - SobelToPlaneRow = SobelToPlaneRow_Any_MSA; - if (IS_ALIGNED(width, 32)) { - SobelToPlaneRow = SobelToPlaneRow_MSA; - } - } -#endif #if defined(HAS_SOBELTOPLANEROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { SobelToPlaneRow = SobelToPlaneRow_Any_LSX; @@ -5202,14 +4980,6 @@ int ARGBSobelXY(const uint8_t* src_argb, } } #endif -#if defined(HAS_SOBELXYROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - SobelXYRow = SobelXYRow_Any_MSA; - if (IS_ALIGNED(width, 16)) { - SobelXYRow = SobelXYRow_MSA; - } - } -#endif #if defined(HAS_SOBELXYROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { SobelXYRow = SobelXYRow_Any_LSX; @@ -5345,14 +5115,6 @@ int HalfFloatPlane(const uint16_t* src_y, HalfFloatRow = scale == 1.0f ? HalfFloat1Row_SVE2 : HalfFloatRow_SVE2; } #endif -#if defined(HAS_HALFFLOATROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - HalfFloatRow = HalfFloatRow_Any_MSA; - if (IS_ALIGNED(width, 32)) { - HalfFloatRow = HalfFloatRow_MSA; - } - } -#endif #if defined(HAS_HALFFLOATROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { HalfFloatRow = HalfFloatRow_Any_LSX; @@ -5527,12 +5289,6 @@ int ARGBExtractAlpha(const uint8_t* src_argb, : ARGBExtractAlphaRow_Any_NEON; } #endif -#if defined(HAS_ARGBEXTRACTALPHAROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGBExtractAlphaRow = IS_ALIGNED(width, 16) ? ARGBExtractAlphaRow_MSA - : ARGBExtractAlphaRow_Any_MSA; - } -#endif #if defined(HAS_ARGBEXTRACTALPHAROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { ARGBExtractAlphaRow = IS_ALIGNED(width, 16) ? ARGBExtractAlphaRow_LSX @@ -5657,14 +5413,6 @@ int YUY2ToNV12(const uint8_t* src_yuy2, } } #endif -#if defined(HAS_YUY2TOYROW_MSA) && defined(HAS_YUY2TOUV422ROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - YUY2ToYRow = YUY2ToYRow_Any_MSA; - if (IS_ALIGNED(width, 32)) { - YUY2ToYRow = YUY2ToYRow_MSA; - } - } -#endif #if defined(HAS_YUY2TOYROW_LSX) && defined(HAS_YUY2TOUV422ROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { YUY2ToYRow = YUY2ToYRow_Any_LSX; @@ -5773,14 +5521,6 @@ int UYVYToNV12(const uint8_t* src_uyvy, } } #endif -#if defined(HAS_SPLITUVROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - SplitUVRow = SplitUVRow_Any_MSA; - if (IS_ALIGNED(width, 32)) { - SplitUVRow = SplitUVRow_MSA; - } - } -#endif #if defined(HAS_SPLITUVROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { SplitUVRow = SplitUVRow_Any_LSX; @@ -5824,14 +5564,6 @@ int UYVYToNV12(const uint8_t* src_uyvy, InterpolateRow = InterpolateRow_SME; } #endif -#if defined(HAS_INTERPOLATEROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - InterpolateRow = InterpolateRow_Any_MSA; - if (IS_ALIGNED(width, 32)) { - InterpolateRow = InterpolateRow_MSA; - } - } -#endif #if defined(HAS_INTERPOLATEROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { InterpolateRow = InterpolateRow_Any_LSX; diff --git a/source/rotate.cc b/source/rotate.cc index 1ef2e38c1..d4a9fcd27 100644 --- a/source/rotate.cc +++ b/source/rotate.cc @@ -35,8 +35,7 @@ void TransposePlane(const uint8_t* src, void (*TransposeWxH)(const uint8_t* src, int src_stride, uint8_t* dst, int dst_stride, int width, int height) = NULL; #endif -#if defined(HAS_TRANSPOSEWX16_MSA) || defined(HAS_TRANSPOSEWX16_LSX) || \ - defined(HAS_TRANSPOSEWX16_NEON) +#if defined(HAS_TRANSPOSEWX16_LSX) || defined(HAS_TRANSPOSEWX16_NEON) void (*TransposeWx16)(const uint8_t* src, int src_stride, uint8_t* dst, int dst_stride, int width) = TransposeWx16_C; #else @@ -81,14 +80,6 @@ void TransposePlane(const uint8_t* src, } } #endif -#if defined(HAS_TRANSPOSEWX16_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - TransposeWx16 = TransposeWx16_Any_MSA; - if (IS_ALIGNED(width, 16)) { - TransposeWx16 = TransposeWx16_MSA; - } - } -#endif #if defined(HAS_TRANSPOSEWX16_LSX) if (TestCpuFlag(kCpuHasLSX)) { TransposeWx16 = TransposeWx16_Any_LSX; @@ -104,8 +95,7 @@ void TransposePlane(const uint8_t* src, return; } #endif -#if defined(HAS_TRANSPOSEWX16_MSA) || defined(HAS_TRANSPOSEWX16_LSX) || \ - defined(HAS_TRANSPOSEWX16_NEON) +#if defined(HAS_TRANSPOSEWX16_LSX) || defined(HAS_TRANSPOSEWX16_NEON) // Work across the source in 16x16 tiles while (i >= 16) { TransposeWx16(src, src_stride, dst, dst_stride, width); @@ -200,14 +190,6 @@ void RotatePlane180(const uint8_t* src, } } #endif -#if defined(HAS_MIRRORROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - MirrorRow = MirrorRow_Any_MSA; - if (IS_ALIGNED(width, 64)) { - MirrorRow = MirrorRow_MSA; - } - } -#endif #if defined(HAS_MIRRORROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { MirrorRow = MirrorRow_Any_LSX; @@ -288,11 +270,7 @@ void SplitTransposeUV(const uint8_t* src, int dst_stride_a, uint8_t* dst_b, int dst_stride_b, int width, int height) = TransposeUVWxH_C; #endif -#if defined(HAS_TRANSPOSEUVWX16_MSA) - void (*TransposeUVWx16)(const uint8_t* src, int src_stride, uint8_t* dst_a, - int dst_stride_a, uint8_t* dst_b, int dst_stride_b, - int width) = TransposeUVWx16_C; -#elif defined(HAS_TRANSPOSEUVWX16_LSX) +#if defined(HAS_TRANSPOSEUVWX16_LSX) void (*TransposeUVWx16)(const uint8_t* src, int src_stride, uint8_t* dst_a, int dst_stride_a, uint8_t* dst_b, int dst_stride_b, int width) = TransposeUVWx16_C; @@ -302,21 +280,14 @@ void SplitTransposeUV(const uint8_t* src, int width) = TransposeUVWx8_C; #endif -#if defined(HAS_TRANSPOSEUVWX16_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - TransposeUVWx16 = TransposeUVWx16_Any_MSA; - if (IS_ALIGNED(width, 8)) { - TransposeUVWx16 = TransposeUVWx16_MSA; - } - } -#elif defined(HAS_TRANSPOSEUVWX16_LSX) +#if defined(HAS_TRANSPOSEUVWX16_LSX) if (TestCpuFlag(kCpuHasLSX)) { TransposeUVWx16 = TransposeUVWx16_Any_LSX; if (IS_ALIGNED(width, 8)) { TransposeUVWx16 = TransposeUVWx16_LSX; } } -#else +#endif #if defined(HAS_TRANSPOSEUVWX8_NEON) if (TestCpuFlag(kCpuHasNEON)) { TransposeUVWx8 = TransposeUVWx8_Any_NEON; @@ -338,7 +309,6 @@ void SplitTransposeUV(const uint8_t* src, } } #endif -#endif /* defined(HAS_TRANSPOSEUVWX16_MSA) */ #if defined(HAS_TRANSPOSEUVWXH_SME) if (TestCpuFlag(kCpuHasSME)) { @@ -347,17 +317,7 @@ void SplitTransposeUV(const uint8_t* src, return; } #endif -#if defined(HAS_TRANSPOSEUVWX16_MSA) - // Work through the source in 8x8 tiles. - while (i >= 16) { - TransposeUVWx16(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b, - width); - src += 16 * src_stride; // Go down 16 rows. - dst_a += 16; // Move over 8 columns. - dst_b += 16; // Move over 8 columns. - i -= 16; - } -#elif defined(HAS_TRANSPOSEUVWX16_LSX) +#if defined(HAS_TRANSPOSEUVWX16_LSX) // Work through the source in 8x8 tiles. while (i >= 16) { TransposeUVWx16(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b, @@ -442,11 +402,6 @@ void SplitRotateUV180(const uint8_t* src, MirrorSplitUVRow = MirrorSplitUVRow_SSSE3; } #endif -#if defined(HAS_MIRRORSPLITUVROW_MSA) - if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 32)) { - MirrorSplitUVRow = MirrorSplitUVRow_MSA; - } -#endif #if defined(HAS_MIRRORSPLITUVROW_LSX) if (TestCpuFlag(kCpuHasLSX) && IS_ALIGNED(width, 32)) { MirrorSplitUVRow = MirrorSplitUVRow_LSX; diff --git a/source/rotate_any.cc b/source/rotate_any.cc index ac94253fd..bf62c067b 100644 --- a/source/rotate_any.cc +++ b/source/rotate_any.cc @@ -41,9 +41,6 @@ TANY(TransposeWx8_Any_SSSE3, TransposeWx8_SSSE3, TransposeWx8_C, 7) #ifdef HAS_TRANSPOSEWX8_FAST_SSSE3 TANY(TransposeWx8_Fast_Any_SSSE3, TransposeWx8_Fast_SSSE3, TransposeWx8_C, 15) #endif -#ifdef HAS_TRANSPOSEWX16_MSA -TANY(TransposeWx16_Any_MSA, TransposeWx16_MSA, TransposeWx16_C, 15) -#endif #ifdef HAS_TRANSPOSEWX16_LSX TANY(TransposeWx16_Any_LSX, TransposeWx16_LSX, TransposeWx16_C, 15) #endif @@ -68,9 +65,6 @@ TUVANY(TransposeUVWx8_Any_NEON, TransposeUVWx8_NEON, 7) #ifdef HAS_TRANSPOSEUVWX8_SSE2 TUVANY(TransposeUVWx8_Any_SSE2, TransposeUVWx8_SSE2, 7) #endif -#ifdef HAS_TRANSPOSEUVWX16_MSA -TUVANY(TransposeUVWx16_Any_MSA, TransposeUVWx16_MSA, 7) -#endif #ifdef HAS_TRANSPOSEUVWX16_LSX TUVANY(TransposeUVWx16_Any_LSX, TransposeUVWx16_LSX, 7) #endif diff --git a/source/rotate_argb.cc b/source/rotate_argb.cc index 7fda09d43..8c76ca919 100644 --- a/source/rotate_argb.cc +++ b/source/rotate_argb.cc @@ -53,14 +53,6 @@ static int ARGBTranspose(const uint8_t* src_argb, } } #endif -#if defined(HAS_SCALEARGBROWDOWNEVEN_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ScaleARGBRowDownEven = ScaleARGBRowDownEven_Any_MSA; - if (IS_ALIGNED(height, 4)) { // Width of dest. - ScaleARGBRowDownEven = ScaleARGBRowDownEven_MSA; - } - } -#endif #if defined(HAS_SCALEARGBROWDOWNEVEN_LSX) if (TestCpuFlag(kCpuHasLSX)) { ScaleARGBRowDownEven = ScaleARGBRowDownEven_Any_LSX; @@ -155,14 +147,6 @@ static int ARGBRotate180(const uint8_t* src_argb, } } #endif -#if defined(HAS_ARGBMIRRORROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGBMirrorRow = ARGBMirrorRow_Any_MSA; - if (IS_ALIGNED(width, 16)) { - ARGBMirrorRow = ARGBMirrorRow_MSA; - } - } -#endif #if defined(HAS_ARGBMIRRORROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { ARGBMirrorRow = ARGBMirrorRow_Any_LSX; diff --git a/source/rotate_msa.cc b/source/rotate_msa.cc deleted file mode 100644 index d4e62b12e..000000000 --- a/source/rotate_msa.cc +++ /dev/null @@ -1,240 +0,0 @@ -/* - * Copyright 2016 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/rotate_row.h" - -// This module is for GCC MSA -#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) -#include "libyuv/macros_msa.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -#define ILVRL_B(in0, in1, in2, in3, out0, out1, out2, out3) \ - { \ - out0 = (v16u8)__msa_ilvr_b((v16i8)in1, (v16i8)in0); \ - out1 = (v16u8)__msa_ilvl_b((v16i8)in1, (v16i8)in0); \ - out2 = (v16u8)__msa_ilvr_b((v16i8)in3, (v16i8)in2); \ - out3 = (v16u8)__msa_ilvl_b((v16i8)in3, (v16i8)in2); \ - } - -#define ILVRL_H(in0, in1, in2, in3, out0, out1, out2, out3) \ - { \ - out0 = (v16u8)__msa_ilvr_h((v8i16)in1, (v8i16)in0); \ - out1 = (v16u8)__msa_ilvl_h((v8i16)in1, (v8i16)in0); \ - out2 = (v16u8)__msa_ilvr_h((v8i16)in3, (v8i16)in2); \ - out3 = (v16u8)__msa_ilvl_h((v8i16)in3, (v8i16)in2); \ - } - -#define ILVRL_W(in0, in1, in2, in3, out0, out1, out2, out3) \ - { \ - out0 = (v16u8)__msa_ilvr_w((v4i32)in1, (v4i32)in0); \ - out1 = (v16u8)__msa_ilvl_w((v4i32)in1, (v4i32)in0); \ - out2 = (v16u8)__msa_ilvr_w((v4i32)in3, (v4i32)in2); \ - out3 = (v16u8)__msa_ilvl_w((v4i32)in3, (v4i32)in2); \ - } - -#define ILVRL_D(in0, in1, in2, in3, out0, out1, out2, out3) \ - { \ - out0 = (v16u8)__msa_ilvr_d((v2i64)in1, (v2i64)in0); \ - out1 = (v16u8)__msa_ilvl_d((v2i64)in1, (v2i64)in0); \ - out2 = (v16u8)__msa_ilvr_d((v2i64)in3, (v2i64)in2); \ - out3 = (v16u8)__msa_ilvl_d((v2i64)in3, (v2i64)in2); \ - } - -void TransposeUVWx16_C(const uint8_t* src, - int src_stride, - uint8_t* dst_a, - int dst_stride_a, - uint8_t* dst_b, - int dst_stride_b, - int width) { - TransposeUVWx8_C(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b, - width); - TransposeUVWx8_C((src + 8 * src_stride), src_stride, (dst_a + 8), - dst_stride_a, (dst_b + 8), dst_stride_b, width); -} - -void TransposeWx16_MSA(const uint8_t* src, - int src_stride, - uint8_t* dst, - int dst_stride, - int width) { - int x; - const uint8_t* s; - v16u8 src0, src1, src2, src3, dst0, dst1, dst2, dst3, vec0, vec1, vec2, vec3; - v16u8 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7; - v16u8 res0, res1, res2, res3, res4, res5, res6, res7, res8, res9; - - for (x = 0; x < width; x += 16) { - s = src; - src0 = (v16u8)__msa_ld_b((v16i8*)s, 0); - s += src_stride; - src1 = (v16u8)__msa_ld_b((v16i8*)s, 0); - s += src_stride; - src2 = (v16u8)__msa_ld_b((v16i8*)s, 0); - s += src_stride; - src3 = (v16u8)__msa_ld_b((v16i8*)s, 0); - s += src_stride; - ILVRL_B(src0, src1, src2, src3, vec0, vec1, vec2, vec3); - ILVRL_H(vec0, vec2, vec1, vec3, reg0, reg1, reg2, reg3); - src0 = (v16u8)__msa_ld_b((v16i8*)s, 0); - s += src_stride; - src1 = (v16u8)__msa_ld_b((v16i8*)s, 0); - s += src_stride; - src2 = (v16u8)__msa_ld_b((v16i8*)s, 0); - s += src_stride; - src3 = (v16u8)__msa_ld_b((v16i8*)s, 0); - s += src_stride; - ILVRL_B(src0, src1, src2, src3, vec0, vec1, vec2, vec3); - ILVRL_H(vec0, vec2, vec1, vec3, reg4, reg5, reg6, reg7); - ILVRL_W(reg0, reg4, reg1, reg5, res0, res1, res2, res3); - ILVRL_W(reg2, reg6, reg3, reg7, res4, res5, res6, res7); - src0 = (v16u8)__msa_ld_b((v16i8*)s, 0); - s += src_stride; - src1 = (v16u8)__msa_ld_b((v16i8*)s, 0); - s += src_stride; - src2 = (v16u8)__msa_ld_b((v16i8*)s, 0); - s += src_stride; - src3 = (v16u8)__msa_ld_b((v16i8*)s, 0); - s += src_stride; - ILVRL_B(src0, src1, src2, src3, vec0, vec1, vec2, vec3); - ILVRL_H(vec0, vec2, vec1, vec3, reg0, reg1, reg2, reg3); - src0 = (v16u8)__msa_ld_b((v16i8*)s, 0); - s += src_stride; - src1 = (v16u8)__msa_ld_b((v16i8*)s, 0); - s += src_stride; - src2 = (v16u8)__msa_ld_b((v16i8*)s, 0); - s += src_stride; - src3 = (v16u8)__msa_ld_b((v16i8*)s, 0); - s += src_stride; - ILVRL_B(src0, src1, src2, src3, vec0, vec1, vec2, vec3); - ILVRL_H(vec0, vec2, vec1, vec3, reg4, reg5, reg6, reg7); - res8 = (v16u8)__msa_ilvr_w((v4i32)reg4, (v4i32)reg0); - res9 = (v16u8)__msa_ilvl_w((v4i32)reg4, (v4i32)reg0); - ILVRL_D(res0, res8, res1, res9, dst0, dst1, dst2, dst3); - ST_UB4(dst0, dst1, dst2, dst3, dst, dst_stride); - dst += dst_stride * 4; - res8 = (v16u8)__msa_ilvr_w((v4i32)reg5, (v4i32)reg1); - res9 = (v16u8)__msa_ilvl_w((v4i32)reg5, (v4i32)reg1); - ILVRL_D(res2, res8, res3, res9, dst0, dst1, dst2, dst3); - ST_UB4(dst0, dst1, dst2, dst3, dst, dst_stride); - dst += dst_stride * 4; - res8 = (v16u8)__msa_ilvr_w((v4i32)reg6, (v4i32)reg2); - res9 = (v16u8)__msa_ilvl_w((v4i32)reg6, (v4i32)reg2); - ILVRL_D(res4, res8, res5, res9, dst0, dst1, dst2, dst3); - ST_UB4(dst0, dst1, dst2, dst3, dst, dst_stride); - dst += dst_stride * 4; - res8 = (v16u8)__msa_ilvr_w((v4i32)reg7, (v4i32)reg3); - res9 = (v16u8)__msa_ilvl_w((v4i32)reg7, (v4i32)reg3); - ILVRL_D(res6, res8, res7, res9, dst0, dst1, dst2, dst3); - ST_UB4(dst0, dst1, dst2, dst3, dst, dst_stride); - src += 16; - dst += dst_stride * 4; - } -} - -void TransposeUVWx16_MSA(const uint8_t* src, - int src_stride, - uint8_t* dst_a, - int dst_stride_a, - uint8_t* dst_b, - int dst_stride_b, - int width) { - int x; - const uint8_t* s; - v16u8 src0, src1, src2, src3, dst0, dst1, dst2, dst3, vec0, vec1, vec2, vec3; - v16u8 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7; - v16u8 res0, res1, res2, res3, res4, res5, res6, res7, res8, res9; - - for (x = 0; x < width; x += 8) { - s = src; - src0 = (v16u8)__msa_ld_b((v16i8*)s, 0); - s += src_stride; - src1 = (v16u8)__msa_ld_b((v16i8*)s, 0); - s += src_stride; - src2 = (v16u8)__msa_ld_b((v16i8*)s, 0); - s += src_stride; - src3 = (v16u8)__msa_ld_b((v16i8*)s, 0); - s += src_stride; - ILVRL_B(src0, src1, src2, src3, vec0, vec1, vec2, vec3); - ILVRL_H(vec0, vec2, vec1, vec3, reg0, reg1, reg2, reg3); - src0 = (v16u8)__msa_ld_b((v16i8*)s, 0); - s += src_stride; - src1 = (v16u8)__msa_ld_b((v16i8*)s, 0); - s += src_stride; - src2 = (v16u8)__msa_ld_b((v16i8*)s, 0); - s += src_stride; - src3 = (v16u8)__msa_ld_b((v16i8*)s, 0); - s += src_stride; - ILVRL_B(src0, src1, src2, src3, vec0, vec1, vec2, vec3); - ILVRL_H(vec0, vec2, vec1, vec3, reg4, reg5, reg6, reg7); - ILVRL_W(reg0, reg4, reg1, reg5, res0, res1, res2, res3); - ILVRL_W(reg2, reg6, reg3, reg7, res4, res5, res6, res7); - src0 = (v16u8)__msa_ld_b((v16i8*)s, 0); - s += src_stride; - src1 = (v16u8)__msa_ld_b((v16i8*)s, 0); - s += src_stride; - src2 = (v16u8)__msa_ld_b((v16i8*)s, 0); - s += src_stride; - src3 = (v16u8)__msa_ld_b((v16i8*)s, 0); - s += src_stride; - ILVRL_B(src0, src1, src2, src3, vec0, vec1, vec2, vec3); - ILVRL_H(vec0, vec2, vec1, vec3, reg0, reg1, reg2, reg3); - src0 = (v16u8)__msa_ld_b((v16i8*)s, 0); - s += src_stride; - src1 = (v16u8)__msa_ld_b((v16i8*)s, 0); - s += src_stride; - src2 = (v16u8)__msa_ld_b((v16i8*)s, 0); - s += src_stride; - src3 = (v16u8)__msa_ld_b((v16i8*)s, 0); - s += src_stride; - ILVRL_B(src0, src1, src2, src3, vec0, vec1, vec2, vec3); - ILVRL_H(vec0, vec2, vec1, vec3, reg4, reg5, reg6, reg7); - res8 = (v16u8)__msa_ilvr_w((v4i32)reg4, (v4i32)reg0); - res9 = (v16u8)__msa_ilvl_w((v4i32)reg4, (v4i32)reg0); - ILVRL_D(res0, res8, res1, res9, dst0, dst1, dst2, dst3); - ST_UB2(dst0, dst2, dst_a, dst_stride_a); - ST_UB2(dst1, dst3, dst_b, dst_stride_b); - dst_a += dst_stride_a * 2; - dst_b += dst_stride_b * 2; - res8 = (v16u8)__msa_ilvr_w((v4i32)reg5, (v4i32)reg1); - res9 = (v16u8)__msa_ilvl_w((v4i32)reg5, (v4i32)reg1); - ILVRL_D(res2, res8, res3, res9, dst0, dst1, dst2, dst3); - ST_UB2(dst0, dst2, dst_a, dst_stride_a); - ST_UB2(dst1, dst3, dst_b, dst_stride_b); - dst_a += dst_stride_a * 2; - dst_b += dst_stride_b * 2; - res8 = (v16u8)__msa_ilvr_w((v4i32)reg6, (v4i32)reg2); - res9 = (v16u8)__msa_ilvl_w((v4i32)reg6, (v4i32)reg2); - ILVRL_D(res4, res8, res5, res9, dst0, dst1, dst2, dst3); - ST_UB2(dst0, dst2, dst_a, dst_stride_a); - ST_UB2(dst1, dst3, dst_b, dst_stride_b); - dst_a += dst_stride_a * 2; - dst_b += dst_stride_b * 2; - res8 = (v16u8)__msa_ilvr_w((v4i32)reg7, (v4i32)reg3); - res9 = (v16u8)__msa_ilvl_w((v4i32)reg7, (v4i32)reg3); - ILVRL_D(res6, res8, res7, res9, dst0, dst1, dst2, dst3); - ST_UB2(dst0, dst2, dst_a, dst_stride_a); - ST_UB2(dst1, dst3, dst_b, dst_stride_b); - src += 16; - dst_a += dst_stride_a * 2; - dst_b += dst_stride_b * 2; - } -} - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif - -#endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) diff --git a/source/row_any.cc b/source/row_any.cc index edfaa7f5f..cd0d3e76d 100644 --- a/source/row_any.cc +++ b/source/row_any.cc @@ -110,12 +110,6 @@ ANY41C(I444AlphaToARGBRow_Any_NEON, I444AlphaToARGBRow_NEON, 0, 0, 4, 7) #ifdef HAS_I422ALPHATOARGBROW_NEON ANY41C(I422AlphaToARGBRow_Any_NEON, I422AlphaToARGBRow_NEON, 1, 0, 4, 7) #endif -#ifdef HAS_I444ALPHATOARGBROW_MSA -ANY41C(I444AlphaToARGBRow_Any_MSA, I444AlphaToARGBRow_MSA, 0, 0, 4, 7) -#endif -#ifdef HAS_I422ALPHATOARGBROW_MSA -ANY41C(I422AlphaToARGBRow_Any_MSA, I422AlphaToARGBRow_MSA, 1, 0, 4, 7) -#endif #ifdef HAS_I422ALPHATOARGBROW_LSX ANY41C(I422AlphaToARGBRow_Any_LSX, I422AlphaToARGBRow_LSX, 1, 0, 4, 15) #endif @@ -311,9 +305,6 @@ ANY31(I422ToUYVYRow_Any_AVX2, I422ToUYVYRow_AVX2, 1, 1, 4, 31) #ifdef HAS_I422TOYUY2ROW_NEON ANY31(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, 1, 1, 4, 15) #endif -#ifdef HAS_I422TOYUY2ROW_MSA -ANY31(I422ToYUY2Row_Any_MSA, I422ToYUY2Row_MSA, 1, 1, 4, 31) -#endif #ifdef HAS_I422TOYUY2ROW_LSX ANY31(I422ToYUY2Row_Any_LSX, I422ToYUY2Row_LSX, 1, 1, 4, 15) #endif @@ -323,9 +314,6 @@ ANY31(I422ToYUY2Row_Any_LASX, I422ToYUY2Row_LASX, 1, 1, 4, 31) #ifdef HAS_I422TOUYVYROW_NEON ANY31(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, 1, 1, 4, 15) #endif -#ifdef HAS_I422TOUYVYROW_MSA -ANY31(I422ToUYVYRow_Any_MSA, I422ToUYVYRow_MSA, 1, 1, 4, 31) -#endif #ifdef HAS_I422TOUYVYROW_LSX ANY31(I422ToUYVYRow_Any_LSX, I422ToUYVYRow_LSX, 1, 1, 4, 15) #endif @@ -439,15 +427,6 @@ ANY31C(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, 1, 0, 2, 7) ANY31C(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, 1, 0, 2, 7) ANY31C(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, 1, 0, 2, 7) #endif -#ifdef HAS_I422TOARGBROW_MSA -ANY31C(I444ToARGBRow_Any_MSA, I444ToARGBRow_MSA, 0, 0, 4, 7) -ANY31C(I422ToARGBRow_Any_MSA, I422ToARGBRow_MSA, 1, 0, 4, 7) -ANY31C(I422ToRGBARow_Any_MSA, I422ToRGBARow_MSA, 1, 0, 4, 7) -ANY31C(I422ToRGB24Row_Any_MSA, I422ToRGB24Row_MSA, 1, 0, 3, 15) -ANY31C(I422ToARGB4444Row_Any_MSA, I422ToARGB4444Row_MSA, 1, 0, 2, 7) -ANY31C(I422ToARGB1555Row_Any_MSA, I422ToARGB1555Row_MSA, 1, 0, 2, 7) -ANY31C(I422ToRGB565Row_Any_MSA, I422ToRGB565Row_MSA, 1, 0, 2, 7) -#endif #ifdef HAS_I422TOARGBROW_LSX ANY31C(I422ToARGBRow_Any_LSX, I422ToARGBRow_LSX, 1, 0, 4, 15) ANY31C(I422ToRGBARow_Any_LSX, I422ToRGBARow_LSX, 1, 0, 4, 15) @@ -645,9 +624,6 @@ ANY21(MergeUVRow_Any_AVX512BW, MergeUVRow_AVX512BW, 0, 1, 1, 2, 31) #ifdef HAS_MERGEUVROW_NEON ANY21(MergeUVRow_Any_NEON, MergeUVRow_NEON, 0, 1, 1, 2, 15) #endif -#ifdef HAS_MERGEUVROW_MSA -ANY21(MergeUVRow_Any_MSA, MergeUVRow_MSA, 0, 1, 1, 2, 15) -#endif #ifdef HAS_MERGEUVROW_LSX ANY21(MergeUVRow_Any_LSX, MergeUVRow_LSX, 0, 1, 1, 2, 15) #endif @@ -688,27 +664,18 @@ ANY21(ARGBAddRow_Any_NEON, ARGBAddRow_NEON, 0, 4, 4, 4, 7) #ifdef HAS_ARGBSUBTRACTROW_NEON ANY21(ARGBSubtractRow_Any_NEON, ARGBSubtractRow_NEON, 0, 4, 4, 4, 7) #endif -#ifdef HAS_ARGBMULTIPLYROW_MSA -ANY21(ARGBMultiplyRow_Any_MSA, ARGBMultiplyRow_MSA, 0, 4, 4, 4, 3) -#endif #ifdef HAS_ARGBMULTIPLYROW_LSX ANY21(ARGBMultiplyRow_Any_LSX, ARGBMultiplyRow_LSX, 0, 4, 4, 4, 3) #endif #ifdef HAS_ARGBMULTIPLYROW_LASX ANY21(ARGBMultiplyRow_Any_LASX, ARGBMultiplyRow_LASX, 0, 4, 4, 4, 7) #endif -#ifdef HAS_ARGBADDROW_MSA -ANY21(ARGBAddRow_Any_MSA, ARGBAddRow_MSA, 0, 4, 4, 4, 7) -#endif #ifdef HAS_ARGBADDROW_LSX ANY21(ARGBAddRow_Any_LSX, ARGBAddRow_LSX, 0, 4, 4, 4, 3) #endif #ifdef HAS_ARGBADDROW_LASX ANY21(ARGBAddRow_Any_LASX, ARGBAddRow_LASX, 0, 4, 4, 4, 7) #endif -#ifdef HAS_ARGBSUBTRACTROW_MSA -ANY21(ARGBSubtractRow_Any_MSA, ARGBSubtractRow_MSA, 0, 4, 4, 4, 7) -#endif #ifdef HAS_ARGBSUBTRACTROW_LSX ANY21(ARGBSubtractRow_Any_LSX, ARGBSubtractRow_LSX, 0, 4, 4, 4, 3) #endif @@ -721,9 +688,6 @@ ANY21(SobelRow_Any_SSE2, SobelRow_SSE2, 0, 1, 1, 4, 15) #ifdef HAS_SOBELROW_NEON ANY21(SobelRow_Any_NEON, SobelRow_NEON, 0, 1, 1, 4, 7) #endif -#ifdef HAS_SOBELROW_MSA -ANY21(SobelRow_Any_MSA, SobelRow_MSA, 0, 1, 1, 4, 15) -#endif #ifdef HAS_SOBELROW_LSX ANY21(SobelRow_Any_LSX, SobelRow_LSX, 0, 1, 1, 4, 15) #endif @@ -733,9 +697,6 @@ ANY21(SobelToPlaneRow_Any_SSE2, SobelToPlaneRow_SSE2, 0, 1, 1, 1, 15) #ifdef HAS_SOBELTOPLANEROW_NEON ANY21(SobelToPlaneRow_Any_NEON, SobelToPlaneRow_NEON, 0, 1, 1, 1, 15) #endif -#ifdef HAS_SOBELTOPLANEROW_MSA -ANY21(SobelToPlaneRow_Any_MSA, SobelToPlaneRow_MSA, 0, 1, 1, 1, 31) -#endif #ifdef HAS_SOBELTOPLANEROW_LSX ANY21(SobelToPlaneRow_Any_LSX, SobelToPlaneRow_LSX, 0, 1, 1, 1, 31) #endif @@ -745,9 +706,6 @@ ANY21(SobelXYRow_Any_SSE2, SobelXYRow_SSE2, 0, 1, 1, 4, 15) #ifdef HAS_SOBELXYROW_NEON ANY21(SobelXYRow_Any_NEON, SobelXYRow_NEON, 0, 1, 1, 4, 7) #endif -#ifdef HAS_SOBELXYROW_MSA -ANY21(SobelXYRow_Any_MSA, SobelXYRow_MSA, 0, 1, 1, 4, 15) -#endif #ifdef HAS_SOBELXYROW_LSX ANY21(SobelXYRow_Any_LSX, SobelXYRow_LSX, 0, 1, 1, 4, 15) #endif @@ -814,9 +772,6 @@ ANY21C(NV12ToARGBRow_Any_AVX2, NV12ToARGBRow_AVX2, 1, 1, 2, 4, 15) #ifdef HAS_NV12TOARGBROW_NEON ANY21C(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, 1, 1, 2, 4, 7) #endif -#ifdef HAS_NV12TOARGBROW_MSA -ANY21C(NV12ToARGBRow_Any_MSA, NV12ToARGBRow_MSA, 1, 1, 2, 4, 7) -#endif #ifdef HAS_NV12TOARGBROW_LSX ANY21C(NV12ToARGBRow_Any_LSX, NV12ToARGBRow_LSX, 1, 1, 2, 4, 7) #endif @@ -832,9 +787,6 @@ ANY21C(NV21ToARGBRow_Any_AVX2, NV21ToARGBRow_AVX2, 1, 1, 2, 4, 15) #ifdef HAS_NV21TOARGBROW_NEON ANY21C(NV21ToARGBRow_Any_NEON, NV21ToARGBRow_NEON, 1, 1, 2, 4, 7) #endif -#ifdef HAS_NV21TOARGBROW_MSA -ANY21C(NV21ToARGBRow_Any_MSA, NV21ToARGBRow_MSA, 1, 1, 2, 4, 7) -#endif #ifdef HAS_NV21TOARGBROW_LSX ANY21C(NV21ToARGBRow_Any_LSX, NV21ToARGBRow_LSX, 1, 1, 2, 4, 7) #endif @@ -868,9 +820,6 @@ ANY21C(NV12ToRGB565Row_Any_AVX2, NV12ToRGB565Row_AVX2, 1, 1, 2, 2, 15) #ifdef HAS_NV12TORGB565ROW_NEON ANY21C(NV12ToRGB565Row_Any_NEON, NV12ToRGB565Row_NEON, 1, 1, 2, 2, 7) #endif -#ifdef HAS_NV12TORGB565ROW_MSA -ANY21C(NV12ToRGB565Row_Any_MSA, NV12ToRGB565Row_MSA, 1, 1, 2, 2, 7) -#endif #ifdef HAS_NV12TORGB565ROW_LSX ANY21C(NV12ToRGB565Row_Any_LSX, NV12ToRGB565Row_LSX, 1, 1, 2, 2, 7) #endif @@ -1074,14 +1023,6 @@ ANY11(ARGBToARGB1555Row_Any_NEON, ARGBToARGB1555Row_NEON, 0, 4, 2, 7) ANY11(ARGBToARGB4444Row_Any_NEON, ARGBToARGB4444Row_NEON, 0, 4, 2, 7) ANY11(J400ToARGBRow_Any_NEON, J400ToARGBRow_NEON, 0, 1, 4, 7) #endif -#if defined(HAS_ARGBTORGB24ROW_MSA) -ANY11(ARGBToRGB24Row_Any_MSA, ARGBToRGB24Row_MSA, 0, 4, 3, 15) -ANY11(ARGBToRAWRow_Any_MSA, ARGBToRAWRow_MSA, 0, 4, 3, 15) -ANY11(ARGBToRGB565Row_Any_MSA, ARGBToRGB565Row_MSA, 0, 4, 2, 7) -ANY11(ARGBToARGB1555Row_Any_MSA, ARGBToARGB1555Row_MSA, 0, 4, 2, 7) -ANY11(ARGBToARGB4444Row_Any_MSA, ARGBToARGB4444Row_MSA, 0, 4, 2, 7) -ANY11(J400ToARGBRow_Any_MSA, J400ToARGBRow_MSA, 0, 1, 4, 15) -#endif #if defined(HAS_ARGBTORGB24ROW_LSX) ANY11(ARGBToRGB24Row_Any_LSX, ARGBToRGB24Row_LSX, 0, 4, 3, 15) ANY11(ARGBToRAWRow_Any_LSX, ARGBToRAWRow_LSX, 0, 4, 3, 15) @@ -1102,9 +1043,6 @@ ANY11(J400ToARGBRow_Any_LSX, J400ToARGBRow_LSX, 0, 1, 4, 15) #if defined(HAS_RAWTORGB24ROW_NEON) ANY11(RAWToRGB24Row_Any_NEON, RAWToRGB24Row_NEON, 0, 3, 3, 7) #endif -#if defined(HAS_RAWTORGB24ROW_MSA) -ANY11(RAWToRGB24Row_Any_MSA, RAWToRGB24Row_MSA, 0, 3, 3, 15) -#endif #if defined(HAS_RAWTORGB24ROW_LSX) ANY11(RAWToRGB24Row_Any_LSX, RAWToRGB24Row_LSX, 0, 3, 3, 15) #endif @@ -1156,9 +1094,6 @@ ANY11(ARGBToYRow_Any_NEON, ARGBToYRow_NEON, 0, 4, 1, 15) #ifdef HAS_ARGBTOYROW_NEON_DOTPROD ANY11(ARGBToYRow_Any_NEON_DotProd, ARGBToYRow_NEON_DotProd, 0, 4, 1, 15) #endif -#ifdef HAS_ARGBTOYROW_MSA -ANY11(ARGBToYRow_Any_MSA, ARGBToYRow_MSA, 0, 4, 1, 15) -#endif #ifdef HAS_ARGBTOYROW_LSX ANY11(ARGBToYRow_Any_LSX, ARGBToYRow_LSX, 0, 4, 1, 15) #endif @@ -1183,9 +1118,6 @@ ANY11(RGBAToYJRow_Any_NEON, RGBAToYJRow_NEON, 0, 4, 1, 15) #ifdef HAS_RGBATOYJROW_NEON_DOTPROD ANY11(RGBAToYJRow_Any_NEON_DotProd, RGBAToYJRow_NEON_DotProd, 0, 4, 1, 15) #endif -#ifdef HAS_ARGBTOYJROW_MSA -ANY11(ARGBToYJRow_Any_MSA, ARGBToYJRow_MSA, 0, 4, 1, 15) -#endif #ifdef HAS_ARGBTOYJROW_LSX ANY11(ARGBToYJRow_Any_LSX, ARGBToYJRow_LSX, 0, 4, 1, 15) #endif @@ -1210,9 +1142,6 @@ ANY11(BGRAToYRow_Any_NEON, BGRAToYRow_NEON, 0, 4, 1, 15) #ifdef HAS_BGRATOYROW_NEON_DOTPROD ANY11(BGRAToYRow_Any_NEON_DotProd, BGRAToYRow_NEON_DotProd, 0, 4, 1, 15) #endif -#ifdef HAS_BGRATOYROW_MSA -ANY11(BGRAToYRow_Any_MSA, BGRAToYRow_MSA, 0, 4, 1, 15) -#endif #ifdef HAS_BGRATOYROW_LSX ANY11(BGRAToYRow_Any_LSX, BGRAToYRow_LSX, 0, 4, 1, 15) #endif @@ -1225,9 +1154,6 @@ ANY11(ABGRToYRow_Any_NEON, ABGRToYRow_NEON, 0, 4, 1, 15) #ifdef HAS_ABGRTOYROW_NEON_DOTPROD ANY11(ABGRToYRow_Any_NEON_DotProd, ABGRToYRow_NEON_DotProd, 0, 4, 1, 15) #endif -#ifdef HAS_ABGRTOYROW_MSA -ANY11(ABGRToYRow_Any_MSA, ABGRToYRow_MSA, 0, 4, 1, 7) -#endif #ifdef HAS_ABGRTOYROW_LSX ANY11(ABGRToYRow_Any_LSX, ABGRToYRow_LSX, 0, 4, 1, 15) #endif @@ -1240,9 +1166,6 @@ ANY11(RGBAToYRow_Any_NEON, RGBAToYRow_NEON, 0, 4, 1, 15) #ifdef HAS_RGBATOYROW_NEON_DOTPROD ANY11(RGBAToYRow_Any_NEON_DotProd, RGBAToYRow_NEON_DotProd, 0, 4, 1, 15) #endif -#ifdef HAS_RGBATOYROW_MSA -ANY11(RGBAToYRow_Any_MSA, RGBAToYRow_MSA, 0, 4, 1, 15) -#endif #ifdef HAS_RGBATOYROW_LSX ANY11(RGBAToYRow_Any_LSX, RGBAToYRow_LSX, 0, 4, 1, 15) #endif @@ -1261,9 +1184,6 @@ ANY11(RGB24ToYJRow_Any_SSSE3, RGB24ToYJRow_SSSE3, 0, 3, 1, 15) #ifdef HAS_RGB24TOYJROW_NEON ANY11(RGB24ToYJRow_Any_NEON, RGB24ToYJRow_NEON, 0, 3, 1, 15) #endif -#ifdef HAS_RGB24TOYROW_MSA -ANY11(RGB24ToYRow_Any_MSA, RGB24ToYRow_MSA, 0, 3, 1, 15) -#endif #ifdef HAS_RGB24TOYROW_LSX ANY11(RGB24ToYRow_Any_LSX, RGB24ToYRow_LSX, 0, 3, 1, 15) #endif @@ -1288,9 +1208,6 @@ ANY11(RAWToYJRow_Any_SSSE3, RAWToYJRow_SSSE3, 0, 3, 1, 15) #ifdef HAS_RAWTOYJROW_NEON ANY11(RAWToYJRow_Any_NEON, RAWToYJRow_NEON, 0, 3, 1, 15) #endif -#ifdef HAS_RAWTOYROW_MSA -ANY11(RAWToYRow_Any_MSA, RAWToYRow_MSA, 0, 3, 1, 15) -#endif #ifdef HAS_RAWTOYROW_LSX ANY11(RAWToYRow_Any_LSX, RAWToYRow_LSX, 0, 3, 1, 15) #endif @@ -1306,9 +1223,6 @@ ANY11(RAWToYJRow_Any_LASX, RAWToYJRow_LASX, 0, 3, 1, 31) #ifdef HAS_RGB565TOYROW_NEON ANY11(RGB565ToYRow_Any_NEON, RGB565ToYRow_NEON, 0, 2, 1, 15) #endif -#ifdef HAS_RGB565TOYROW_MSA -ANY11(RGB565ToYRow_Any_MSA, RGB565ToYRow_MSA, 0, 2, 1, 15) -#endif #ifdef HAS_RGB565TOYROW_LSX ANY11(RGB565ToYRow_Any_LSX, RGB565ToYRow_LSX, 0, 2, 1, 15) #endif @@ -1322,9 +1236,6 @@ ANY11(ARGB1555ToYRow_Any_NEON, ARGB1555ToYRow_NEON, 0, 2, 1, 15) ANY11(ARGB1555ToYRow_Any_NEON, ARGB1555ToYRow_NEON, 0, 2, 1, 7) #endif #endif -#ifdef HAS_ARGB1555TOYROW_MSA -ANY11(ARGB1555ToYRow_Any_MSA, ARGB1555ToYRow_MSA, 0, 2, 1, 15) -#endif #ifdef HAS_ARGB1555TOYROW_LSX ANY11(ARGB1555ToYRow_Any_LSX, ARGB1555ToYRow_LSX, 0, 2, 1, 15) #endif @@ -1344,18 +1255,12 @@ ANY11(YUY2ToYRow_Any_NEON, YUY2ToYRow_NEON, 1, 4, 1, 15) #ifdef HAS_UYVYTOYROW_NEON ANY11(UYVYToYRow_Any_NEON, UYVYToYRow_NEON, 1, 4, 1, 15) #endif -#ifdef HAS_YUY2TOYROW_MSA -ANY11(YUY2ToYRow_Any_MSA, YUY2ToYRow_MSA, 1, 4, 1, 31) -#endif #ifdef HAS_YUY2TOYROW_LSX ANY11(YUY2ToYRow_Any_LSX, YUY2ToYRow_LSX, 1, 4, 1, 15) #endif #ifdef HAS_YUY2TOYROW_LASX ANY11(YUY2ToYRow_Any_LASX, YUY2ToYRow_LASX, 1, 4, 1, 31) #endif -#ifdef HAS_UYVYTOYROW_MSA -ANY11(UYVYToYRow_Any_MSA, UYVYToYRow_MSA, 1, 4, 1, 31) -#endif #ifdef HAS_UYVYTOYROW_LSX ANY11(UYVYToYRow_Any_LSX, UYVYToYRow_LSX, 1, 4, 1, 15) #endif @@ -1377,9 +1282,6 @@ ANY11(SwapUVRow_Any_NEON, SwapUVRow_NEON, 0, 2, 2, 15) #ifdef HAS_RGB24TOARGBROW_NEON ANY11(RGB24ToARGBRow_Any_NEON, RGB24ToARGBRow_NEON, 0, 3, 4, 7) #endif -#ifdef HAS_RGB24TOARGBROW_MSA -ANY11(RGB24ToARGBRow_Any_MSA, RGB24ToARGBRow_MSA, 0, 3, 4, 15) -#endif #ifdef HAS_RGB24TOARGBROW_LSX ANY11(RGB24ToARGBRow_Any_LSX, RGB24ToARGBRow_LSX, 0, 3, 4, 15) #endif @@ -1392,9 +1294,6 @@ ANY11(RAWToARGBRow_Any_NEON, RAWToARGBRow_NEON, 0, 3, 4, 7) #ifdef HAS_RAWTORGBAROW_NEON ANY11(RAWToRGBARow_Any_NEON, RAWToRGBARow_NEON, 0, 3, 4, 7) #endif -#ifdef HAS_RAWTOARGBROW_MSA -ANY11(RAWToARGBRow_Any_MSA, RAWToARGBRow_MSA, 0, 3, 4, 15) -#endif #ifdef HAS_RAWTOARGBROW_LSX ANY11(RAWToARGBRow_Any_LSX, RAWToARGBRow_LSX, 0, 3, 4, 15) #endif @@ -1404,9 +1303,6 @@ ANY11(RAWToARGBRow_Any_LASX, RAWToARGBRow_LASX, 0, 3, 4, 31) #ifdef HAS_RGB565TOARGBROW_NEON ANY11(RGB565ToARGBRow_Any_NEON, RGB565ToARGBRow_NEON, 0, 2, 4, 15) #endif -#ifdef HAS_RGB565TOARGBROW_MSA -ANY11(RGB565ToARGBRow_Any_MSA, RGB565ToARGBRow_MSA, 0, 2, 4, 15) -#endif #ifdef HAS_RGB565TOARGBROW_LSX ANY11(RGB565ToARGBRow_Any_LSX, RGB565ToARGBRow_LSX, 0, 2, 4, 15) #endif @@ -1416,9 +1312,6 @@ ANY11(RGB565ToARGBRow_Any_LASX, RGB565ToARGBRow_LASX, 0, 2, 4, 31) #ifdef HAS_ARGB1555TOARGBROW_NEON ANY11(ARGB1555ToARGBRow_Any_NEON, ARGB1555ToARGBRow_NEON, 0, 2, 4, 15) #endif -#ifdef HAS_ARGB1555TOARGBROW_MSA -ANY11(ARGB1555ToARGBRow_Any_MSA, ARGB1555ToARGBRow_MSA, 0, 2, 4, 15) -#endif #ifdef HAS_ARGB1555TOARGBROW_LSX ANY11(ARGB1555ToARGBRow_Any_LSX, ARGB1555ToARGBRow_LSX, 0, 2, 4, 15) #endif @@ -1428,9 +1321,6 @@ ANY11(ARGB1555ToARGBRow_Any_LASX, ARGB1555ToARGBRow_LASX, 0, 2, 4, 31) #ifdef HAS_ARGB4444TOARGBROW_NEON ANY11(ARGB4444ToARGBRow_Any_NEON, ARGB4444ToARGBRow_NEON, 0, 2, 4, 7) #endif -#ifdef HAS_ARGB4444TOARGBROW_MSA -ANY11(ARGB4444ToARGBRow_Any_MSA, ARGB4444ToARGBRow_MSA, 0, 2, 4, 15) -#endif #ifdef HAS_ARGB4444TOARGBROW_LSX ANY11(ARGB4444ToARGBRow_Any_LSX, ARGB4444ToARGBRow_LSX, 0, 2, 4, 15) #endif @@ -1452,9 +1342,6 @@ ANY11(ARGBUnattenuateRow_Any_AVX2, ARGBUnattenuateRow_AVX2, 0, 4, 4, 7) #ifdef HAS_ARGBATTENUATEROW_NEON ANY11(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, 0, 4, 4, 7) #endif -#ifdef HAS_ARGBATTENUATEROW_MSA -ANY11(ARGBAttenuateRow_Any_MSA, ARGBAttenuateRow_MSA, 0, 4, 4, 7) -#endif #ifdef HAS_ARGBATTENUATEROW_LSX ANY11(ARGBAttenuateRow_Any_LSX, ARGBAttenuateRow_LSX, 0, 4, 4, 7) #endif @@ -1470,9 +1357,6 @@ ANY11(ARGBExtractAlphaRow_Any_AVX2, ARGBExtractAlphaRow_AVX2, 0, 4, 1, 31) #ifdef HAS_ARGBEXTRACTALPHAROW_NEON ANY11(ARGBExtractAlphaRow_Any_NEON, ARGBExtractAlphaRow_NEON, 0, 4, 1, 15) #endif -#ifdef HAS_ARGBEXTRACTALPHAROW_MSA -ANY11(ARGBExtractAlphaRow_Any_MSA, ARGBExtractAlphaRow_MSA, 0, 4, 1, 15) -#endif #ifdef HAS_ARGBEXTRACTALPHAROW_LSX ANY11(ARGBExtractAlphaRow_Any_LSX, ARGBExtractAlphaRow_LSX, 0, 4, 1, 15) #endif @@ -1552,14 +1436,6 @@ ANY11P(I400ToARGBRow_Any_NEON, 4, 7) #endif -#if defined(HAS_I400TOARGBROW_MSA) -ANY11P(I400ToARGBRow_Any_MSA, - I400ToARGBRow_MSA, - const struct YuvConstants*, - 1, - 4, - 15) -#endif #if defined(HAS_I400TOARGBROW_LSX) ANY11P(I400ToARGBRow_Any_LSX, I400ToARGBRow_LSX, @@ -1593,14 +1469,6 @@ ANY11P(ARGBToRGB565DitherRow_Any_NEON, 2, 7) #endif -#if defined(HAS_ARGBTORGB565DITHERROW_MSA) -ANY11P(ARGBToRGB565DitherRow_Any_MSA, - ARGBToRGB565DitherRow_MSA, - const uint32_t, - 4, - 2, - 7) -#endif #if defined(HAS_ARGBTORGB565DITHERROW_LSX) ANY11P(ARGBToRGB565DitherRow_Any_LSX, ARGBToRGB565DitherRow_LSX, @@ -1626,9 +1494,6 @@ ANY11P(ARGBShuffleRow_Any_AVX2, ARGBShuffleRow_AVX2, const uint8_t*, 4, 4, 15) #ifdef HAS_ARGBSHUFFLEROW_NEON ANY11P(ARGBShuffleRow_Any_NEON, ARGBShuffleRow_NEON, const uint8_t*, 4, 4, 3) #endif -#ifdef HAS_ARGBSHUFFLEROW_MSA -ANY11P(ARGBShuffleRow_Any_MSA, ARGBShuffleRow_MSA, const uint8_t*, 4, 4, 7) -#endif #ifdef HAS_ARGBSHUFFLEROW_LSX ANY11P(ARGBShuffleRow_Any_LSX, ARGBShuffleRow_LSX, const uint8_t*, 4, 4, 7) #endif @@ -1885,9 +1750,6 @@ ANY11P16(HalfFloat1Row_Any_F16C, #ifdef HAS_HALFFLOATROW_NEON ANY11P16(HalfFloatRow_Any_NEON, HalfFloatRow_NEON, uint16_t, uint16_t, 2, 2, 15) #endif -#ifdef HAS_HALFFLOATROW_MSA -ANY11P16(HalfFloatRow_Any_MSA, HalfFloatRow_MSA, uint16_t, uint16_t, 2, 2, 31) -#endif #ifdef HAS_BYTETOFLOATROW_NEON ANY11P16(ByteToFloatRow_Any_NEON, ByteToFloatRow_NEON, uint8_t, float, 1, 3, 7) #endif @@ -1926,10 +1788,6 @@ ANY11C(UYVYToARGBRow_Any_AVX2, UYVYToARGBRow_AVX2, 1, 4, 4, 31) ANY11C(YUY2ToARGBRow_Any_NEON, YUY2ToARGBRow_NEON, 1, 4, 4, 7) ANY11C(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, 1, 4, 4, 7) #endif -#if defined(HAS_YUY2TOARGBROW_MSA) -ANY11C(YUY2ToARGBRow_Any_MSA, YUY2ToARGBRow_MSA, 1, 4, 4, 7) -ANY11C(UYVYToARGBRow_Any_MSA, UYVYToARGBRow_MSA, 1, 4, 4, 7) -#endif #if defined(HAS_YUY2TOARGBROW_LSX) ANY11C(YUY2ToARGBRow_Any_LSX, YUY2ToARGBRow_LSX, 1, 4, 4, 7) ANY11C(UYVYToARGBRow_Any_LSX, UYVYToARGBRow_LSX, 1, 4, 4, 7) @@ -1973,9 +1831,6 @@ ANY11I(InterpolateRow_Any_SSSE3, #ifdef HAS_INTERPOLATEROW_NEON ANY11I(InterpolateRow_Any_NEON, InterpolateRow_NEON, uint8_t, uint8_t, 1, 1, 15) #endif -#ifdef HAS_INTERPOLATEROW_MSA -ANY11I(InterpolateRow_Any_MSA, InterpolateRow_MSA, uint8_t, uint8_t, 1, 1, 31) -#endif #ifdef HAS_INTERPOLATEROW_LSX ANY11I(InterpolateRow_Any_LSX, InterpolateRow_LSX, uint8_t, uint8_t, 1, 1, 31) #endif @@ -2060,9 +1915,6 @@ ANY11M(MirrorRow_Any_SSSE3, MirrorRow_SSSE3, 1, 15) #ifdef HAS_MIRRORROW_NEON ANY11M(MirrorRow_Any_NEON, MirrorRow_NEON, 1, 31) #endif -#ifdef HAS_MIRRORROW_MSA -ANY11M(MirrorRow_Any_MSA, MirrorRow_MSA, 1, 63) -#endif #ifdef HAS_MIRRORROW_LSX ANY11M(MirrorRow_Any_LSX, MirrorRow_LSX, 1, 31) #endif @@ -2078,9 +1930,6 @@ ANY11M(MirrorUVRow_Any_SSSE3, MirrorUVRow_SSSE3, 2, 7) #ifdef HAS_MIRRORUVROW_NEON ANY11M(MirrorUVRow_Any_NEON, MirrorUVRow_NEON, 2, 31) #endif -#ifdef HAS_MIRRORUVROW_MSA -ANY11M(MirrorUVRow_Any_MSA, MirrorUVRow_MSA, 2, 7) -#endif #ifdef HAS_MIRRORUVROW_LSX ANY11M(MirrorUVRow_Any_LSX, MirrorUVRow_LSX, 2, 7) #endif @@ -2096,9 +1945,6 @@ ANY11M(ARGBMirrorRow_Any_SSE2, ARGBMirrorRow_SSE2, 4, 3) #ifdef HAS_ARGBMIRRORROW_NEON ANY11M(ARGBMirrorRow_Any_NEON, ARGBMirrorRow_NEON, 4, 7) #endif -#ifdef HAS_ARGBMIRRORROW_MSA -ANY11M(ARGBMirrorRow_Any_MSA, ARGBMirrorRow_MSA, 4, 15) -#endif #ifdef HAS_ARGBMIRRORROW_LSX ANY11M(ARGBMirrorRow_Any_LSX, ARGBMirrorRow_LSX, 4, 7) #endif @@ -2139,9 +1985,6 @@ ANY1(SetRow_Any_LSX, SetRow_LSX, uint8_t, 1, 15) #ifdef HAS_ARGBSETROW_NEON ANY1(ARGBSetRow_Any_NEON, ARGBSetRow_NEON, uint32_t, 4, 3) #endif -#ifdef HAS_ARGBSETROW_MSA -ANY1(ARGBSetRow_Any_MSA, ARGBSetRow_MSA, uint32_t, 4, 3) -#endif #ifdef HAS_ARGBSETROW_LSX ANY1(ARGBSetRow_Any_LSX, ARGBSetRow_LSX, uint32_t, 4, 3) #endif @@ -2175,9 +2018,6 @@ ANY12(SplitUVRow_Any_AVX2, SplitUVRow_AVX2, 0, 2, 0, 31) #ifdef HAS_SPLITUVROW_NEON ANY12(SplitUVRow_Any_NEON, SplitUVRow_NEON, 0, 2, 0, 15) #endif -#ifdef HAS_SPLITUVROW_MSA -ANY12(SplitUVRow_Any_MSA, SplitUVRow_MSA, 0, 2, 0, 31) -#endif #ifdef HAS_SPLITUVROW_LSX ANY12(SplitUVRow_Any_LSX, SplitUVRow_LSX, 0, 2, 0, 31) #endif @@ -2211,11 +2051,6 @@ ANY12(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON, 1, 4, 1, 15) ANY12(ARGBToUV444Row_Any_NEON_I8MM, ARGBToUV444Row_NEON_I8MM, 0, 4, 0, 7) ANY12(ARGBToUVJ444Row_Any_NEON_I8MM, ARGBToUVJ444Row_NEON_I8MM, 0, 4, 0, 7) #endif -#ifdef HAS_YUY2TOUV422ROW_MSA -ANY12(ARGBToUV444Row_Any_MSA, ARGBToUV444Row_MSA, 0, 4, 0, 15) -ANY12(YUY2ToUV422Row_Any_MSA, YUY2ToUV422Row_MSA, 1, 4, 1, 31) -ANY12(UYVYToUV422Row_Any_MSA, UYVYToUV422Row_MSA, 1, 4, 1, 31) -#endif #ifdef HAS_YUY2TOUV422ROW_LSX ANY12(ARGBToUV444Row_Any_LSX, ARGBToUV444Row_LSX, 0, 4, 0, 15) ANY12(YUY2ToUV422Row_Any_LSX, YUY2ToUV422Row_LSX, 1, 4, 1, 15) @@ -2407,9 +2242,6 @@ ANY12S(ARGBToUVRow_Any_SVE2, ARGBToUVRow_SVE2, 0, 4, 1) #ifdef HAS_ARGBTOUVROW_SME ANY12S(ARGBToUVRow_Any_SME, ARGBToUVRow_SME, 0, 4, 1) #endif -#ifdef HAS_ARGBTOUVROW_MSA -ANY12S(ARGBToUVRow_Any_MSA, ARGBToUVRow_MSA, 0, 4, 31) -#endif #ifdef HAS_ARGBTOUVROW_LSX ANY12S(ARGBToUVRow_Any_LSX, ARGBToUVRow_LSX, 0, 4, 15) #endif @@ -2440,9 +2272,6 @@ ANY12S(ABGRToUVJRow_Any_SVE2, ABGRToUVJRow_SVE2, 0, 4, 1) #ifdef HAS_ABGRTOUVJROW_SME ANY12S(ABGRToUVJRow_Any_SME, ABGRToUVJRow_SME, 0, 4, 1) #endif -#ifdef HAS_ARGBTOUVJROW_MSA -ANY12S(ARGBToUVJRow_Any_MSA, ARGBToUVJRow_MSA, 0, 4, 31) -#endif #ifdef HAS_ARGBTOUVJROW_LSX ANY12S(ARGBToUVJRow_Any_LSX, ARGBToUVJRow_LSX, 0, 4, 15) #endif @@ -2461,9 +2290,6 @@ ANY12S(BGRAToUVRow_Any_SVE2, BGRAToUVRow_SVE2, 0, 4, 1) #ifdef HAS_BGRATOUVROW_SME ANY12S(BGRAToUVRow_Any_SME, BGRAToUVRow_SME, 0, 4, 1) #endif -#ifdef HAS_BGRATOUVROW_MSA -ANY12S(BGRAToUVRow_Any_MSA, BGRAToUVRow_MSA, 0, 4, 15) -#endif #ifdef HAS_BGRATOUVROW_LSX ANY12S(BGRAToUVRow_Any_LSX, BGRAToUVRow_LSX, 0, 4, 15) #endif @@ -2479,9 +2305,6 @@ ANY12S(ABGRToUVRow_Any_SVE2, ABGRToUVRow_SVE2, 0, 4, 1) #ifdef HAS_ABGRTOUVROW_SME ANY12S(ABGRToUVRow_Any_SME, ABGRToUVRow_SME, 0, 4, 1) #endif -#ifdef HAS_ABGRTOUVROW_MSA -ANY12S(ABGRToUVRow_Any_MSA, ABGRToUVRow_MSA, 0, 4, 15) -#endif #ifdef HAS_ABGRTOUVROW_LSX ANY12S(ABGRToUVRow_Any_LSX, ABGRToUVRow_LSX, 0, 4, 15) #endif @@ -2497,9 +2320,6 @@ ANY12S(RGBAToUVRow_Any_SVE2, RGBAToUVRow_SVE2, 0, 4, 1) #ifdef HAS_RGBATOUVROW_SME ANY12S(RGBAToUVRow_Any_SME, RGBAToUVRow_SME, 0, 4, 1) #endif -#ifdef HAS_RGBATOUVROW_MSA -ANY12S(RGBAToUVRow_Any_MSA, RGBAToUVRow_MSA, 0, 4, 15) -#endif #ifdef HAS_RGBATOUVROW_LSX ANY12S(RGBAToUVRow_Any_LSX, RGBAToUVRow_LSX, 0, 4, 15) #endif @@ -2509,9 +2329,6 @@ ANY12S(RGB24ToUVRow_Any_NEON, RGB24ToUVRow_NEON, 0, 3, 15) #ifdef HAS_RGB24TOUVJROW_NEON ANY12S(RGB24ToUVJRow_Any_NEON, RGB24ToUVJRow_NEON, 0, 3, 15) #endif -#ifdef HAS_RGB24TOUVROW_MSA -ANY12S(RGB24ToUVRow_Any_MSA, RGB24ToUVRow_MSA, 0, 3, 15) -#endif #ifdef HAS_RGB24TOUVROW_LSX ANY12S(RGB24ToUVRow_Any_LSX, RGB24ToUVRow_LSX, 0, 3, 15) #endif @@ -2524,9 +2341,6 @@ ANY12S(RAWToUVRow_Any_NEON, RAWToUVRow_NEON, 0, 3, 15) #ifdef HAS_RAWTOUVJROW_NEON ANY12S(RAWToUVJRow_Any_NEON, RAWToUVJRow_NEON, 0, 3, 15) #endif -#ifdef HAS_RAWTOUVROW_MSA -ANY12S(RAWToUVRow_Any_MSA, RAWToUVRow_MSA, 0, 3, 15) -#endif #ifdef HAS_RAWTOUVROW_LSX ANY12S(RAWToUVRow_Any_LSX, RAWToUVRow_LSX, 0, 3, 15) #endif @@ -2536,9 +2350,6 @@ ANY12S(RAWToUVRow_Any_LASX, RAWToUVRow_LASX, 0, 3, 31) #ifdef HAS_RGB565TOUVROW_NEON ANY12S(RGB565ToUVRow_Any_NEON, RGB565ToUVRow_NEON, 0, 2, 15) #endif -#ifdef HAS_RGB565TOUVROW_MSA -ANY12S(RGB565ToUVRow_Any_MSA, RGB565ToUVRow_MSA, 0, 2, 15) -#endif #ifdef HAS_RGB565TOUVROW_LSX ANY12S(RGB565ToUVRow_Any_LSX, RGB565ToUVRow_LSX, 0, 2, 15) #endif @@ -2548,9 +2359,6 @@ ANY12S(RGB565ToUVRow_Any_LASX, RGB565ToUVRow_LASX, 0, 2, 31) #ifdef HAS_ARGB1555TOUVROW_NEON ANY12S(ARGB1555ToUVRow_Any_NEON, ARGB1555ToUVRow_NEON, 0, 2, 15) #endif -#ifdef HAS_ARGB1555TOUVROW_MSA -ANY12S(ARGB1555ToUVRow_Any_MSA, ARGB1555ToUVRow_MSA, 0, 2, 15) -#endif #ifdef HAS_ARGB1555TOUVROW_LSX ANY12S(ARGB1555ToUVRow_Any_LSX, ARGB1555ToUVRow_LSX, 0, 2, 15) #endif @@ -2566,18 +2374,12 @@ ANY12S(YUY2ToUVRow_Any_NEON, YUY2ToUVRow_NEON, 1, 4, 15) #ifdef HAS_UYVYTOUVROW_NEON ANY12S(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, 1, 4, 15) #endif -#ifdef HAS_YUY2TOUVROW_MSA -ANY12S(YUY2ToUVRow_Any_MSA, YUY2ToUVRow_MSA, 1, 4, 31) -#endif #ifdef HAS_YUY2TOUVROW_LSX ANY12S(YUY2ToUVRow_Any_LSX, YUY2ToUVRow_LSX, 1, 4, 15) #endif #ifdef HAS_YUY2TOUVROW_LASX ANY12S(YUY2ToUVRow_Any_LASX, YUY2ToUVRow_LASX, 1, 4, 31) #endif -#ifdef HAS_UYVYTOUVROW_MSA -ANY12S(UYVYToUVRow_Any_MSA, UYVYToUVRow_MSA, 1, 4, 31) -#endif #ifdef HAS_UYVYTOUVROW_LSX ANY12S(UYVYToUVRow_Any_LSX, UYVYToUVRow_LSX, 1, 4, 15) #endif diff --git a/source/row_msa.cc b/source/row_msa.cc deleted file mode 100644 index b7d5bb5ec..000000000 --- a/source/row_msa.cc +++ /dev/null @@ -1,3597 +0,0 @@ -/* - * Copyright 2016 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -#include "libyuv/row.h" - -// This module is for GCC MSA -#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) -#include "libyuv/macros_msa.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -#define ALPHA_VAL (-1) - -// Fill YUV -> RGB conversion constants into vectors -#define YUVTORGB_SETUP(yuvconst, ub, vr, ug, vg, yg, yb) \ - { \ - ub = __msa_fill_w(yuvconst->kUVToB[0]); \ - vr = __msa_fill_w(yuvconst->kUVToR[1]); \ - ug = __msa_fill_w(yuvconst->kUVToG[0]); \ - vg = __msa_fill_w(yuvconst->kUVToG[1]); \ - yg = __msa_fill_w(yuvconst->kYToRgb[0]); \ - yb = __msa_fill_w(yuvconst->kYBiasToRgb[0]); \ - } - -// Load YUV 422 pixel data -#define READYUV422(psrc_y, psrc_u, psrc_v, out_y, out_u, out_v) \ - { \ - uint64_t y_m; \ - uint32_t u_m, v_m; \ - v4i32 zero_m = {0}; \ - y_m = LD(psrc_y); \ - u_m = LW(psrc_u); \ - v_m = LW(psrc_v); \ - out_y = (v16u8)__msa_insert_d((v2i64)zero_m, 0, (int64_t)y_m); \ - out_u = (v16u8)__msa_insert_w(zero_m, 0, (int32_t)u_m); \ - out_v = (v16u8)__msa_insert_w(zero_m, 0, (int32_t)v_m); \ - } - -// Clip input vector elements between 0 to 255 -#define CLIP_0TO255(in0, in1, in2, in3, in4, in5) \ - { \ - v4i32 max_m = __msa_ldi_w(0xFF); \ - \ - in0 = __msa_maxi_s_w(in0, 0); \ - in1 = __msa_maxi_s_w(in1, 0); \ - in2 = __msa_maxi_s_w(in2, 0); \ - in3 = __msa_maxi_s_w(in3, 0); \ - in4 = __msa_maxi_s_w(in4, 0); \ - in5 = __msa_maxi_s_w(in5, 0); \ - in0 = __msa_min_s_w(max_m, in0); \ - in1 = __msa_min_s_w(max_m, in1); \ - in2 = __msa_min_s_w(max_m, in2); \ - in3 = __msa_min_s_w(max_m, in3); \ - in4 = __msa_min_s_w(max_m, in4); \ - in5 = __msa_min_s_w(max_m, in5); \ - } - -// Convert 8 pixels of YUV 420 to RGB. -#define YUVTORGB(in_y, in_uv, ubvr, ugvg, yg, yb, out_b, out_g, out_r) \ - { \ - v8i16 vec0_m, vec1_m; \ - v4i32 reg0_m, reg1_m, reg2_m, reg3_m, reg4_m; \ - v4i32 reg5_m, reg6_m, reg7_m; \ - v16i8 temp_m, zero_m = {0}; \ - \ - vec0_m = (v8i16)__msa_ilvr_b((v16i8)in_y, (v16i8)in_y); \ - vec1_m = (v8i16)__msa_ilvr_b((v16i8)zero_m, (v16i8)in_uv); \ - reg0_m = (v4i32)__msa_ilvr_h((v8i16)zero_m, (v8i16)vec0_m); \ - reg1_m = (v4i32)__msa_ilvl_h((v8i16)zero_m, (v8i16)vec0_m); \ - vec1_m = (v8i16)__msa_subv_h(vec1_m, const_0x80); \ - temp_m = (v16i8)__msa_clti_s_h(vec1_m, 0); \ - reg2_m = (v4i32)__msa_ilvr_h((v8i16)temp_m, (v8i16)vec1_m); \ - reg3_m = (v4i32)__msa_ilvl_h((v8i16)temp_m, (v8i16)vec1_m); \ - reg0_m *= yg; \ - reg1_m *= yg; \ - reg2_m *= ubvr; \ - reg3_m *= ubvr; \ - reg0_m = __msa_srai_w(reg0_m, 16); \ - reg1_m = __msa_srai_w(reg1_m, 16); \ - reg0_m += yb; \ - reg1_m += yb; \ - reg4_m = __msa_dotp_s_w((v8i16)vec1_m, (v8i16)ugvg); \ - reg5_m = __msa_ilvev_w(reg2_m, reg2_m); \ - reg6_m = __msa_ilvev_w(reg3_m, reg3_m); \ - reg7_m = __msa_ilvr_w(reg4_m, reg4_m); \ - reg2_m = __msa_ilvod_w(reg2_m, reg2_m); \ - reg3_m = __msa_ilvod_w(reg3_m, reg3_m); \ - reg4_m = __msa_ilvl_w(reg4_m, reg4_m); \ - reg5_m = reg0_m + reg5_m; \ - reg6_m = reg1_m + reg6_m; \ - reg2_m = reg0_m + reg2_m; \ - reg3_m = reg1_m + reg3_m; \ - reg7_m = reg0_m - reg7_m; \ - reg4_m = reg1_m - reg4_m; \ - reg5_m = __msa_srai_w(reg5_m, 6); \ - reg6_m = __msa_srai_w(reg6_m, 6); \ - reg7_m = __msa_srai_w(reg7_m, 6); \ - reg4_m = __msa_srai_w(reg4_m, 6); \ - reg2_m = __msa_srai_w(reg2_m, 6); \ - reg3_m = __msa_srai_w(reg3_m, 6); \ - CLIP_0TO255(reg5_m, reg6_m, reg7_m, reg4_m, reg2_m, reg3_m); \ - out_b = __msa_pckev_h((v8i16)reg6_m, (v8i16)reg5_m); \ - out_g = __msa_pckev_h((v8i16)reg4_m, (v8i16)reg7_m); \ - out_r = __msa_pckev_h((v8i16)reg3_m, (v8i16)reg2_m); \ - } - -// Pack and Store 8 ARGB values. -#define STOREARGB(in0, in1, in2, in3, pdst_argb) \ - { \ - v8i16 vec0_m, vec1_m; \ - v16u8 dst0_m, dst1_m; \ - vec0_m = (v8i16)__msa_ilvev_b((v16i8)in1, (v16i8)in0); \ - vec1_m = (v8i16)__msa_ilvev_b((v16i8)in3, (v16i8)in2); \ - dst0_m = (v16u8)__msa_ilvr_h(vec1_m, vec0_m); \ - dst1_m = (v16u8)__msa_ilvl_h(vec1_m, vec0_m); \ - ST_UB2(dst0_m, dst1_m, pdst_argb, 16); \ - } - -// Takes ARGB input and calculates Y. -#define ARGBTOY(argb0, argb1, argb2, argb3, const0, const1, const2, shift, \ - y_out) \ - { \ - v16u8 vec0_m, vec1_m, vec2_m, vec3_m; \ - v8u16 reg0_m, reg1_m; \ - \ - vec0_m = (v16u8)__msa_pckev_h((v8i16)argb1, (v8i16)argb0); \ - vec1_m = (v16u8)__msa_pckev_h((v8i16)argb3, (v8i16)argb2); \ - vec2_m = (v16u8)__msa_pckod_h((v8i16)argb1, (v8i16)argb0); \ - vec3_m = (v16u8)__msa_pckod_h((v8i16)argb3, (v8i16)argb2); \ - reg0_m = __msa_dotp_u_h(vec0_m, const0); \ - reg1_m = __msa_dotp_u_h(vec1_m, const0); \ - reg0_m = __msa_dpadd_u_h(reg0_m, vec2_m, const1); \ - reg1_m = __msa_dpadd_u_h(reg1_m, vec3_m, const1); \ - reg0_m += const2; \ - reg1_m += const2; \ - reg0_m = (v8u16)__msa_srai_h((v8i16)reg0_m, shift); \ - reg1_m = (v8u16)__msa_srai_h((v8i16)reg1_m, shift); \ - y_out = (v16u8)__msa_pckev_b((v16i8)reg1_m, (v16i8)reg0_m); \ - } - -// Loads current and next row of ARGB input and averages it to calculate U and V -#define READ_ARGB(s_ptr, t_ptr, argb0, argb1, argb2, argb3, const_0x0101) \ - { \ - v16u8 src0_m, src1_m, src2_m, src3_m, src4_m, src5_m, src6_m, src7_m; \ - v16u8 vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m, vec6_m, vec7_m; \ - v8u16 reg0_m, reg1_m, reg2_m, reg3_m, reg4_m, reg5_m, reg6_m, reg7_m; \ - v8u16 reg8_m, reg9_m; \ - \ - src0_m = (v16u8)__msa_ld_b((void*)s, 0); \ - src1_m = (v16u8)__msa_ld_b((void*)s, 16); \ - src2_m = (v16u8)__msa_ld_b((void*)s, 32); \ - src3_m = (v16u8)__msa_ld_b((void*)s, 48); \ - src4_m = (v16u8)__msa_ld_b((void*)t, 0); \ - src5_m = (v16u8)__msa_ld_b((void*)t, 16); \ - src6_m = (v16u8)__msa_ld_b((void*)t, 32); \ - src7_m = (v16u8)__msa_ld_b((void*)t, 48); \ - vec0_m = (v16u8)__msa_ilvr_b((v16i8)src0_m, (v16i8)src4_m); \ - vec1_m = (v16u8)__msa_ilvr_b((v16i8)src1_m, (v16i8)src5_m); \ - vec2_m = (v16u8)__msa_ilvr_b((v16i8)src2_m, (v16i8)src6_m); \ - vec3_m = (v16u8)__msa_ilvr_b((v16i8)src3_m, (v16i8)src7_m); \ - vec4_m = (v16u8)__msa_ilvl_b((v16i8)src0_m, (v16i8)src4_m); \ - vec5_m = (v16u8)__msa_ilvl_b((v16i8)src1_m, (v16i8)src5_m); \ - vec6_m = (v16u8)__msa_ilvl_b((v16i8)src2_m, (v16i8)src6_m); \ - vec7_m = (v16u8)__msa_ilvl_b((v16i8)src3_m, (v16i8)src7_m); \ - reg0_m = __msa_hadd_u_h(vec0_m, vec0_m); \ - reg1_m = __msa_hadd_u_h(vec1_m, vec1_m); \ - reg2_m = __msa_hadd_u_h(vec2_m, vec2_m); \ - reg3_m = __msa_hadd_u_h(vec3_m, vec3_m); \ - reg4_m = __msa_hadd_u_h(vec4_m, vec4_m); \ - reg5_m = __msa_hadd_u_h(vec5_m, vec5_m); \ - reg6_m = __msa_hadd_u_h(vec6_m, vec6_m); \ - reg7_m = __msa_hadd_u_h(vec7_m, vec7_m); \ - reg8_m = (v8u16)__msa_pckev_d((v2i64)reg4_m, (v2i64)reg0_m); \ - reg9_m = (v8u16)__msa_pckev_d((v2i64)reg5_m, (v2i64)reg1_m); \ - reg8_m += (v8u16)__msa_pckod_d((v2i64)reg4_m, (v2i64)reg0_m); \ - reg9_m += (v8u16)__msa_pckod_d((v2i64)reg5_m, (v2i64)reg1_m); \ - reg0_m = (v8u16)__msa_pckev_d((v2i64)reg6_m, (v2i64)reg2_m); \ - reg1_m = (v8u16)__msa_pckev_d((v2i64)reg7_m, (v2i64)reg3_m); \ - reg0_m += (v8u16)__msa_pckod_d((v2i64)reg6_m, (v2i64)reg2_m); \ - reg1_m += (v8u16)__msa_pckod_d((v2i64)reg7_m, (v2i64)reg3_m); \ - reg8_m += const_0x0101; \ - reg9_m += const_0x0101; \ - reg0_m += const_0x0101; \ - reg1_m += const_0x0101; \ - argb0 = (v8u16)__msa_srai_h((v8i16)reg8_m, 1); \ - argb1 = (v8u16)__msa_srai_h((v8i16)reg9_m, 1); \ - argb2 = (v8u16)__msa_srai_h((v8i16)reg0_m, 1); \ - argb3 = (v8u16)__msa_srai_h((v8i16)reg1_m, 1); \ - } - -#define ARGBTOUV(argb0, argb1, argb2, argb3, const0, const1, const2, const3, \ - shf0, shf1, shf2, shf3, shift, u_out, v_out) \ - { \ - v8u16 vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m, vec6_m, vec7_m; \ - v4u32 reg0_m, reg1_m, reg2_m, reg3_m; \ - \ - vec0_m = (v8u16)__msa_vshf_h(shf0, (v16i8)argb1, (v16i8)argb0); \ - vec1_m = (v8u16)__msa_vshf_h(shf0, (v16i8)argb3, (v16i8)argb2); \ - vec2_m = (v8u16)__msa_vshf_h(shf1, (v16i8)argb1, (v16i8)argb0); \ - vec3_m = (v8u16)__msa_vshf_h(shf1, (v16i8)argb3, (v16i8)argb2); \ - vec4_m = (v8u16)__msa_vshf_h(shf2, (v16i8)argb1, (v16i8)argb0); \ - vec5_m = (v8u16)__msa_vshf_h(shf2, (v16i8)argb3, (v16i8)argb2); \ - vec6_m = (v8u16)__msa_vshf_h(shf3, (v16i8)argb1, (v16i8)argb0); \ - vec7_m = (v8u16)__msa_vshf_h(shf3, (v16i8)argb3, (v16i8)argb2); \ - reg0_m = __msa_dotp_u_w(vec0_m, const0); \ - reg1_m = __msa_dotp_u_w(vec1_m, const0); \ - reg2_m = __msa_dotp_u_w(vec4_m, const0); \ - reg3_m = __msa_dotp_u_w(vec5_m, const0); \ - reg0_m += const1; \ - reg1_m += const1; \ - reg2_m += const1; \ - reg3_m += const1; \ - reg0_m -= (v4u32)__msa_dotp_u_w(vec2_m, const2); \ - reg1_m -= (v4u32)__msa_dotp_u_w(vec3_m, const2); \ - reg2_m -= (v4u32)__msa_dotp_u_w(vec6_m, const3); \ - reg3_m -= (v4u32)__msa_dotp_u_w(vec7_m, const3); \ - reg0_m = __msa_srl_w(reg0_m, shift); \ - reg1_m = __msa_srl_w(reg1_m, shift); \ - reg2_m = __msa_srl_w(reg2_m, shift); \ - reg3_m = __msa_srl_w(reg3_m, shift); \ - u_out = (v8u16)__msa_pckev_h((v8i16)reg1_m, (v8i16)reg0_m); \ - v_out = (v8u16)__msa_pckev_h((v8i16)reg3_m, (v8i16)reg2_m); \ - } - -// Takes ARGB input and calculates U and V. -#define ARGBTOUV_H(argb0, argb1, argb2, argb3, const0, const1, const2, const3, \ - shf0, shf1, shf2, shf3, v_out, u_out) \ - { \ - v8u16 vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m, vec6_m, vec7_m; \ - v4u32 reg0_m, reg1_m, reg2_m, reg3_m; \ - \ - vec0_m = __msa_vshf_h(shf0, (v16i8)argb1, (v16i8)argb0); \ - vec1_m = __msa_vshf_h(shf0, (v16i8)argb3, (v16i8)argb2); \ - vec2_m = __msa_vshf_h(shf1, (v16i8)argb1, (v16i8)argb0); \ - vec3_m = __msa_vshf_h(shf1, (v16i8)argb3, (v16i8)argb2); \ - vec4_m = __msa_vshf_h(shf2, (v16i8)argb1, (v16i8)argb0); \ - vec5_m = __msa_vshf_h(shf2, (v16i8)argb3, (v16i8)argb2); \ - vec6_m = __msa_vshf_h(shf3, (v16i8)argb1, (v16i8)argb0); \ - vec7_m = __msa_vshf_h(shf3, (v16i8)argb3, (v16i8)argb2); \ - reg0_m = __msa_dotp_u_w(vec0_m, const1); \ - reg1_m = __msa_dotp_u_w(vec1_m, const1); \ - reg2_m = __msa_dotp_u_w(vec4_m, const1); \ - reg3_m = __msa_dotp_u_w(vec5_m, const1); \ - reg0_m += (v4u32)const3; \ - reg1_m += (v4u32)const3; \ - reg2_m += (v4u32)const3; \ - reg3_m += (v4u32)const3; \ - reg0_m -= __msa_dotp_u_w(vec2_m, const0); \ - reg1_m -= __msa_dotp_u_w(vec3_m, const0); \ - reg2_m -= __msa_dotp_u_w(vec6_m, const2); \ - reg3_m -= __msa_dotp_u_w(vec7_m, const2); \ - u_out = (v16u8)__msa_pckev_h((v8i16)reg3_m, (v8i16)reg2_m); \ - v_out = (v16u8)__msa_pckev_h((v8i16)reg1_m, (v8i16)reg0_m); \ - u_out = (v16u8)__msa_pckod_b((v16i8)u_out, (v16i8)u_out); \ - v_out = (v16u8)__msa_pckod_b((v16i8)v_out, (v16i8)v_out); \ - } - -// Load I444 pixel data -#define READI444(psrc_y, psrc_u, psrc_v, out_y, out_u, out_v) \ - { \ - uint64_t y_m, u_m, v_m; \ - v2i64 zero_m = {0}; \ - y_m = LD(psrc_y); \ - u_m = LD(psrc_u); \ - v_m = LD(psrc_v); \ - out_y = (v16u8)__msa_insert_d(zero_m, 0, (int64_t)y_m); \ - out_u = (v16u8)__msa_insert_d(zero_m, 0, (int64_t)u_m); \ - out_v = (v16u8)__msa_insert_d(zero_m, 0, (int64_t)v_m); \ - } - -#define RGBTOUV(_tmpb, _tmpg, _tmpr, _nexb, _nexg, _nexr, _dst0) \ - { \ - v16u8 _tmp0, _tmp1, _tmp2, _tmp3, _tmp4, _tmp5; \ - v8i16 _reg0, _reg1, _reg2, _reg3, _reg4, _reg5; \ - _tmp0 = (v16u8)__msa_ilvev_b(_tmpb, _nexb); \ - _tmp1 = (v16u8)__msa_ilvod_b(_tmpb, _nexb); \ - _tmp2 = (v16u8)__msa_ilvev_b(_tmpg, _nexg); \ - _tmp3 = (v16u8)__msa_ilvod_b(_tmpg, _nexg); \ - _tmp4 = (v16u8)__msa_ilvev_b(_tmpr, _nexr); \ - _tmp5 = (v16u8)__msa_ilvod_b(_tmpr, _nexr); \ - _reg0 = (v8i16)__msa_hadd_u_h(_tmp0, _tmp0); \ - _reg1 = (v8i16)__msa_hadd_u_h(_tmp1, _tmp1); \ - _reg2 = (v8i16)__msa_hadd_u_h(_tmp2, _tmp2); \ - _reg3 = (v8i16)__msa_hadd_u_h(_tmp3, _tmp3); \ - _reg4 = (v8i16)__msa_hadd_u_h(_tmp4, _tmp4); \ - _reg5 = (v8i16)__msa_hadd_u_h(_tmp5, _tmp5); \ - _reg0 = (v8i16)__msa_aver_u_h(_reg0, _reg1); \ - _reg2 = (v8i16)__msa_aver_u_h(_reg2, _reg3); \ - _reg4 = (v8i16)__msa_aver_u_h(_reg4, _reg5); \ - _reg1 = const_8080 + const_112 * _reg0; \ - _reg3 = const_8080 + const_112 * _reg4; \ - _reg1 = (v8i16)__msa_msubv_h(_reg1, const_74, _reg2); \ - _reg3 = (v8i16)__msa_msubv_h(_reg3, const_94, _reg2); \ - _reg1 = (v8i16)__msa_msubv_h(_reg1, const_38, _reg4); \ - _reg3 = (v8i16)__msa_msubv_h(_reg3, const_18, _reg0); \ - _dst0 = (v16u8)__msa_pckod_b(_reg3, _reg1); \ - } - -void MirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width) { - int x; - v16u8 src0, src1, src2, src3; - v16u8 dst0, dst1, dst2, dst3; - v16i8 shuffler = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}; - src += width - 64; - - for (x = 0; x < width; x += 64) { - LD_UB4(src, 16, src3, src2, src1, src0); - VSHF_B2_UB(src3, src3, src2, src2, shuffler, shuffler, dst3, dst2); - VSHF_B2_UB(src1, src1, src0, src0, shuffler, shuffler, dst1, dst0); - ST_UB4(dst0, dst1, dst2, dst3, dst, 16); - dst += 64; - src -= 64; - } -} - -void MirrorUVRow_MSA(const uint8_t* src_uv, uint8_t* dst_uv, int width) { - int x; - v8u16 src, dst; - v8u16 shuffler = {7, 6, 5, 4, 3, 2, 1, 0}; - src_uv += (width - 8) << 1; - for (x = 0; x < width; x += 8) { - src = LD_UH(src_uv); - dst = __msa_vshf_h(shuffler, src, src); - ST_UH(dst, dst_uv); - src_uv -= 16; - dst_uv += 16; - } -} - -void ARGBMirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width) { - int x; - v16u8 src0, src1, src2, src3; - v16u8 dst0, dst1, dst2, dst3; - v16i8 shuffler = {12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3}; - src += width * 4 - 64; - - for (x = 0; x < width; x += 16) { - LD_UB4(src, 16, src3, src2, src1, src0); - VSHF_B2_UB(src3, src3, src2, src2, shuffler, shuffler, dst3, dst2); - VSHF_B2_UB(src1, src1, src0, src0, shuffler, shuffler, dst1, dst0); - ST_UB4(dst0, dst1, dst2, dst3, dst, 16); - dst += 64; - src -= 64; - } -} - -void I422ToYUY2Row_MSA(const uint8_t* src_y, - const uint8_t* src_u, - const uint8_t* src_v, - uint8_t* dst_yuy2, - int width) { - int x; - v16u8 src_u0, src_v0, src_y0, src_y1, vec_uv0, vec_uv1; - v16u8 dst_yuy2_0, dst_yuy2_1, dst_yuy2_2, dst_yuy2_3; - - for (x = 0; x < width; x += 32) { - src_u0 = LD_UB(src_u); - src_v0 = LD_UB(src_v); - LD_UB2(src_y, 16, src_y0, src_y1); - ILVRL_B2_UB(src_v0, src_u0, vec_uv0, vec_uv1); - ILVRL_B2_UB(vec_uv0, src_y0, dst_yuy2_0, dst_yuy2_1); - ILVRL_B2_UB(vec_uv1, src_y1, dst_yuy2_2, dst_yuy2_3); - ST_UB4(dst_yuy2_0, dst_yuy2_1, dst_yuy2_2, dst_yuy2_3, dst_yuy2, 16); - src_u += 16; - src_v += 16; - src_y += 32; - dst_yuy2 += 64; - } -} - -void I422ToUYVYRow_MSA(const uint8_t* src_y, - const uint8_t* src_u, - const uint8_t* src_v, - uint8_t* dst_uyvy, - int width) { - int x; - v16u8 src_u0, src_v0, src_y0, src_y1, vec_uv0, vec_uv1; - v16u8 dst_uyvy0, dst_uyvy1, dst_uyvy2, dst_uyvy3; - - for (x = 0; x < width; x += 32) { - src_u0 = LD_UB(src_u); - src_v0 = LD_UB(src_v); - LD_UB2(src_y, 16, src_y0, src_y1); - ILVRL_B2_UB(src_v0, src_u0, vec_uv0, vec_uv1); - ILVRL_B2_UB(src_y0, vec_uv0, dst_uyvy0, dst_uyvy1); - ILVRL_B2_UB(src_y1, vec_uv1, dst_uyvy2, dst_uyvy3); - ST_UB4(dst_uyvy0, dst_uyvy1, dst_uyvy2, dst_uyvy3, dst_uyvy, 16); - src_u += 16; - src_v += 16; - src_y += 32; - dst_uyvy += 64; - } -} - -void I422ToARGBRow_MSA(const uint8_t* src_y, - const uint8_t* src_u, - const uint8_t* src_v, - uint8_t* dst_argb, - const struct YuvConstants* yuvconstants, - int width) { - int x; - v16u8 src0, src1, src2; - v8i16 vec0, vec1, vec2; - v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_yg, vec_yb; - v4i32 vec_ubvr, vec_ugvg; - v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); - v8i16 const_0x80 = __msa_ldi_h(0x80); - - YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_yg, vec_yb); - vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub); - vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug); - - for (x = 0; x < width; x += 8) { - READYUV422(src_y, src_u, src_v, src0, src1, src2); - src1 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src1); - YUVTORGB(src0, src1, vec_ubvr, vec_ugvg, vec_yg, vec_yb, vec0, vec1, vec2); - STOREARGB(vec0, vec1, vec2, alpha, dst_argb); - src_y += 8; - src_u += 4; - src_v += 4; - dst_argb += 32; - } -} - -void I422ToRGBARow_MSA(const uint8_t* src_y, - const uint8_t* src_u, - const uint8_t* src_v, - uint8_t* dst_argb, - const struct YuvConstants* yuvconstants, - int width) { - int x; - v16u8 src0, src1, src2; - v8i16 vec0, vec1, vec2; - v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_yg, vec_yb; - v4i32 vec_ubvr, vec_ugvg; - v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); - v8i16 const_0x80 = __msa_ldi_h(0x80); - - YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_yg, vec_yb); - vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub); - vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug); - - for (x = 0; x < width; x += 8) { - READYUV422(src_y, src_u, src_v, src0, src1, src2); - src1 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src1); - YUVTORGB(src0, src1, vec_ubvr, vec_ugvg, vec_yg, vec_yb, vec0, vec1, vec2); - STOREARGB(alpha, vec0, vec1, vec2, dst_argb); - src_y += 8; - src_u += 4; - src_v += 4; - dst_argb += 32; - } -} - -void I422AlphaToARGBRow_MSA(const uint8_t* src_y, - const uint8_t* src_u, - const uint8_t* src_v, - const uint8_t* src_a, - uint8_t* dst_argb, - const struct YuvConstants* yuvconstants, - int width) { - int x; - int64_t data_a; - v16u8 src0, src1, src2, src3; - v8i16 vec0, vec1, vec2; - v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_yg, vec_yb; - v4i32 vec_ubvr, vec_ugvg; - v4i32 zero = {0}; - v8i16 const_0x80 = __msa_ldi_h(0x80); - - YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_yg, vec_yb); - vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub); - vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug); - - for (x = 0; x < width; x += 8) { - data_a = LD(src_a); - READYUV422(src_y, src_u, src_v, src0, src1, src2); - src1 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src1); - src3 = (v16u8)__msa_insert_d((v2i64)zero, 0, data_a); - YUVTORGB(src0, src1, vec_ubvr, vec_ugvg, vec_yg, vec_yb, vec0, vec1, vec2); - src3 = (v16u8)__msa_ilvr_b((v16i8)src3, (v16i8)src3); - STOREARGB(vec0, vec1, vec2, src3, dst_argb); - src_y += 8; - src_u += 4; - src_v += 4; - src_a += 8; - dst_argb += 32; - } -} - -void I422ToRGB24Row_MSA(const uint8_t* src_y, - const uint8_t* src_u, - const uint8_t* src_v, - uint8_t* dst_argb, - const struct YuvConstants* yuvconstants, - int32_t width) { - int x; - int64_t data_u, data_v; - v16u8 src0, src1, src2, src3, src4, dst0, dst1, dst2; - v8i16 vec0, vec1, vec2, vec3, vec4, vec5; - v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_yg, vec_yb; - v4i32 vec_ubvr, vec_ugvg; - v16u8 reg0, reg1, reg2, reg3; - v2i64 zero = {0}; - v8i16 const_0x80 = __msa_ldi_h(0x80); - v16i8 shuffler0 = {0, 1, 16, 2, 3, 17, 4, 5, 18, 6, 7, 19, 8, 9, 20, 10}; - v16i8 shuffler1 = {0, 21, 1, 2, 22, 3, 4, 23, 5, 6, 24, 7, 8, 25, 9, 10}; - v16i8 shuffler2 = {26, 6, 7, 27, 8, 9, 28, 10, - 11, 29, 12, 13, 30, 14, 15, 31}; - - YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_yg, vec_yb); - vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub); - vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug); - - for (x = 0; x < width; x += 16) { - src0 = (v16u8)__msa_ld_b((v16u8*)src_y, 0); - data_u = LD(src_u); - data_v = LD(src_v); - src1 = (v16u8)__msa_insert_d(zero, 0, data_u); - src2 = (v16u8)__msa_insert_d(zero, 0, data_v); - src1 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src1); - src3 = (v16u8)__msa_sldi_b((v16i8)src0, (v16i8)src0, 8); - src4 = (v16u8)__msa_sldi_b((v16i8)src1, (v16i8)src1, 8); - YUVTORGB(src0, src1, vec_ubvr, vec_ugvg, vec_yg, vec_yb, vec0, vec1, vec2); - YUVTORGB(src3, src4, vec_ubvr, vec_ugvg, vec_yg, vec_yb, vec3, vec4, vec5); - reg0 = (v16u8)__msa_ilvev_b((v16i8)vec1, (v16i8)vec0); - reg2 = (v16u8)__msa_ilvev_b((v16i8)vec4, (v16i8)vec3); - reg3 = (v16u8)__msa_pckev_b((v16i8)vec5, (v16i8)vec2); - reg1 = (v16u8)__msa_sldi_b((v16i8)reg2, (v16i8)reg0, 11); - dst0 = (v16u8)__msa_vshf_b(shuffler0, (v16i8)reg3, (v16i8)reg0); - dst1 = (v16u8)__msa_vshf_b(shuffler1, (v16i8)reg3, (v16i8)reg1); - dst2 = (v16u8)__msa_vshf_b(shuffler2, (v16i8)reg3, (v16i8)reg2); - ST_UB2(dst0, dst1, dst_argb, 16); - ST_UB(dst2, (dst_argb + 32)); - src_y += 16; - src_u += 8; - src_v += 8; - dst_argb += 48; - } -} - -// TODO(fbarchard): Consider AND instead of shift to isolate 5 upper bits of R. -void I422ToRGB565Row_MSA(const uint8_t* src_y, - const uint8_t* src_u, - const uint8_t* src_v, - uint8_t* dst_rgb565, - const struct YuvConstants* yuvconstants, - int width) { - int x; - v16u8 src0, src1, src2, dst0; - v8i16 vec0, vec1, vec2; - v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_yg, vec_yb; - v4i32 vec_ubvr, vec_ugvg; - v8i16 const_0x80 = __msa_ldi_h(0x80); - - YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_yg, vec_yb); - vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub); - vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug); - - for (x = 0; x < width; x += 8) { - READYUV422(src_y, src_u, src_v, src0, src1, src2); - src1 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src1); - YUVTORGB(src0, src1, vec_ubvr, vec_ugvg, vec_yg, vec_yb, vec0, vec1, vec2); - vec0 = __msa_srli_h(vec0, 3); - vec1 = __msa_srli_h(vec1, 2); - vec2 = __msa_srli_h(vec2, 3); - vec2 = __msa_slli_h(vec2, 11); - vec1 = __msa_slli_h(vec1, 5); - vec0 |= vec1; - dst0 = (v16u8)(vec2 | vec0); - ST_UB(dst0, dst_rgb565); - src_y += 8; - src_u += 4; - src_v += 4; - dst_rgb565 += 16; - } -} - -// TODO(fbarchard): Consider AND instead of shift to isolate 4 upper bits of G. -void I422ToARGB4444Row_MSA(const uint8_t* src_y, - const uint8_t* src_u, - const uint8_t* src_v, - uint8_t* dst_argb4444, - const struct YuvConstants* yuvconstants, - int width) { - int x; - v16u8 src0, src1, src2, dst0; - v8i16 vec0, vec1, vec2; - v8u16 reg0, reg1, reg2; - v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_yg, vec_yb; - v4i32 vec_ubvr, vec_ugvg; - v8u16 const_0xF000 = (v8u16)__msa_fill_h(0xF000); - v8u16 mask = (v8u16)__msa_fill_h(0x00F0); - v8i16 const_0x80 = __msa_ldi_h(0x80); - - YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_yg, vec_yb); - vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub); - vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug); - - for (x = 0; x < width; x += 8) { - READYUV422(src_y, src_u, src_v, src0, src1, src2); - src1 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src1); - YUVTORGB(src0, src1, vec_ubvr, vec_ugvg, vec_yg, vec_yb, vec0, vec1, vec2); - reg0 = (v8u16)__msa_srli_h(vec0, 4); - reg2 = (v8u16)__msa_srli_h(vec2, 4); - reg1 = (v8u16)__msa_and_v(vec1, mask); - reg2 = (v8u16)__msa_slli_h(reg2, 8); - reg1 |= const_0xF000; - reg0 |= reg2; - dst0 = (v16u8)(reg1 | reg0); - ST_UB(dst0, dst_argb4444); - src_y += 8; - src_u += 4; - src_v += 4; - dst_argb4444 += 16; - } -} - -void I422ToARGB1555Row_MSA(const uint8_t* src_y, - const uint8_t* src_u, - const uint8_t* src_v, - uint8_t* dst_argb1555, - const struct YuvConstants* yuvconstants, - int width) { - int x; - v16u8 src0, src1, src2, dst0; - v8i16 vec0, vec1, vec2; - v8u16 reg0, reg1, reg2; - v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_yg, vec_yb; - v4i32 vec_ubvr, vec_ugvg; - v8u16 const_0x8000 = (v8u16)__msa_fill_h(0x8000); - v8i16 const_0x80 = __msa_ldi_h(0x80); - - YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_yg, vec_yb); - vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub); - vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug); - - for (x = 0; x < width; x += 8) { - READYUV422(src_y, src_u, src_v, src0, src1, src2); - src1 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src1); - YUVTORGB(src0, src1, vec_ubvr, vec_ugvg, vec_yg, vec_yb, vec0, vec1, vec2); - reg0 = (v8u16)__msa_srli_h(vec0, 3); - reg1 = (v8u16)__msa_srli_h(vec1, 3); - reg2 = (v8u16)__msa_srli_h(vec2, 3); - reg1 = (v8u16)__msa_slli_h((v8i16)reg1, 5); - reg2 = (v8u16)__msa_slli_h((v8i16)reg2, 10); - reg1 |= const_0x8000; - reg0 |= reg2; - dst0 = (v16u8)(reg1 | reg0); - ST_UB(dst0, dst_argb1555); - src_y += 8; - src_u += 4; - src_v += 4; - dst_argb1555 += 16; - } -} - -void YUY2ToYRow_MSA(const uint8_t* src_yuy2, uint8_t* dst_y, int width) { - int x; - v16u8 src0, src1, src2, src3, dst0, dst1; - - for (x = 0; x < width; x += 32) { - LD_UB4(src_yuy2, 16, src0, src1, src2, src3); - dst0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); - dst1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2); - ST_UB2(dst0, dst1, dst_y, 16); - src_yuy2 += 64; - dst_y += 32; - } -} - -void YUY2ToUVRow_MSA(const uint8_t* src_yuy2, - int src_stride_yuy2, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - const uint8_t* src_yuy2_next = src_yuy2 + src_stride_yuy2; - int x; - v16u8 src0, src1, src2, src3, src4, src5, src6, src7; - v16u8 vec0, vec1, dst0, dst1; - - for (x = 0; x < width; x += 32) { - LD_UB4(src_yuy2, 16, src0, src1, src2, src3); - LD_UB4(src_yuy2_next, 16, src4, src5, src6, src7); - src0 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); - src1 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2); - src2 = (v16u8)__msa_pckod_b((v16i8)src5, (v16i8)src4); - src3 = (v16u8)__msa_pckod_b((v16i8)src7, (v16i8)src6); - vec0 = __msa_aver_u_b(src0, src2); - vec1 = __msa_aver_u_b(src1, src3); - dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); - dst1 = (v16u8)__msa_pckod_b((v16i8)vec1, (v16i8)vec0); - ST_UB(dst0, dst_u); - ST_UB(dst1, dst_v); - src_yuy2 += 64; - src_yuy2_next += 64; - dst_u += 16; - dst_v += 16; - } -} - -void YUY2ToUV422Row_MSA(const uint8_t* src_yuy2, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - int x; - v16u8 src0, src1, src2, src3, dst0, dst1; - - for (x = 0; x < width; x += 32) { - LD_UB4(src_yuy2, 16, src0, src1, src2, src3); - src0 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); - src1 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2); - dst0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); - dst1 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); - ST_UB(dst0, dst_u); - ST_UB(dst1, dst_v); - src_yuy2 += 64; - dst_u += 16; - dst_v += 16; - } -} - -void UYVYToYRow_MSA(const uint8_t* src_uyvy, uint8_t* dst_y, int width) { - int x; - v16u8 src0, src1, src2, src3, dst0, dst1; - - for (x = 0; x < width; x += 32) { - LD_UB4(src_uyvy, 16, src0, src1, src2, src3); - dst0 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); - dst1 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2); - ST_UB2(dst0, dst1, dst_y, 16); - src_uyvy += 64; - dst_y += 32; - } -} - -void UYVYToUVRow_MSA(const uint8_t* src_uyvy, - int src_stride_uyvy, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - const uint8_t* src_uyvy_next = src_uyvy + src_stride_uyvy; - int x; - v16u8 src0, src1, src2, src3, src4, src5, src6, src7; - v16u8 vec0, vec1, dst0, dst1; - - for (x = 0; x < width; x += 32) { - LD_UB4(src_uyvy, 16, src0, src1, src2, src3); - LD_UB4(src_uyvy_next, 16, src4, src5, src6, src7); - src0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); - src1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2); - src2 = (v16u8)__msa_pckev_b((v16i8)src5, (v16i8)src4); - src3 = (v16u8)__msa_pckev_b((v16i8)src7, (v16i8)src6); - vec0 = __msa_aver_u_b(src0, src2); - vec1 = __msa_aver_u_b(src1, src3); - dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); - dst1 = (v16u8)__msa_pckod_b((v16i8)vec1, (v16i8)vec0); - ST_UB(dst0, dst_u); - ST_UB(dst1, dst_v); - src_uyvy += 64; - src_uyvy_next += 64; - dst_u += 16; - dst_v += 16; - } -} - -void UYVYToUV422Row_MSA(const uint8_t* src_uyvy, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - int x; - v16u8 src0, src1, src2, src3, dst0, dst1; - - for (x = 0; x < width; x += 32) { - LD_UB4(src_uyvy, 16, src0, src1, src2, src3); - src0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); - src1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2); - dst0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); - dst1 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); - ST_UB(dst0, dst_u); - ST_UB(dst1, dst_v); - src_uyvy += 64; - dst_u += 16; - dst_v += 16; - } -} - -void ARGBToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width) { - int x; - v16u8 src0, src1, src2, src3, vec0, vec1, vec2, vec3, dst0; - v8u16 reg0, reg1, reg2, reg3, reg4, reg5; - v16i8 zero = {0}; - v8u16 const_0x19 = (v8u16)__msa_ldi_h(0x19); - v8u16 const_0x81 = (v8u16)__msa_ldi_h(0x81); - v8u16 const_0x42 = (v8u16)__msa_ldi_h(0x42); - v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080); - - for (x = 0; x < width; x += 16) { - src0 = (v16u8)__msa_ld_b((v16u8*)src_argb, 0); - src1 = (v16u8)__msa_ld_b((v16u8*)src_argb, 16); - src2 = (v16u8)__msa_ld_b((v16u8*)src_argb, 32); - src3 = (v16u8)__msa_ld_b((v16u8*)src_argb, 48); - vec0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); - vec1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2); - vec2 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); - vec3 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2); - reg0 = (v8u16)__msa_ilvev_b(zero, (v16i8)vec0); - reg1 = (v8u16)__msa_ilvev_b(zero, (v16i8)vec1); - reg2 = (v8u16)__msa_ilvev_b(zero, (v16i8)vec2); - reg3 = (v8u16)__msa_ilvev_b(zero, (v16i8)vec3); - reg4 = (v8u16)__msa_ilvod_b(zero, (v16i8)vec0); - reg5 = (v8u16)__msa_ilvod_b(zero, (v16i8)vec1); - reg0 *= const_0x19; - reg1 *= const_0x19; - reg2 *= const_0x81; - reg3 *= const_0x81; - reg4 *= const_0x42; - reg5 *= const_0x42; - reg0 += reg2; - reg1 += reg3; - reg0 += reg4; - reg1 += reg5; - reg0 += const_0x1080; - reg1 += const_0x1080; - reg0 = (v8u16)__msa_srai_h((v8i16)reg0, 8); - reg1 = (v8u16)__msa_srai_h((v8i16)reg1, 8); - dst0 = (v16u8)__msa_pckev_b((v16i8)reg1, (v16i8)reg0); - ST_UB(dst0, dst_y); - src_argb += 64; - dst_y += 16; - } -} - -void ARGBToUVRow_MSA(const uint8_t* src_argb, - int src_stride_argb, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - int x; - const uint8_t* src_argb_next = src_argb + src_stride_argb; - v16u8 src0, src1, src2, src3, src4, src5, src6, src7; - v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9; - v8u16 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8, reg9; - v16u8 dst0, dst1; - v8u16 const_0x70 = (v8u16)__msa_ldi_h(0x38); - v8u16 const_0x4A = (v8u16)__msa_ldi_h(0x25); - v8u16 const_0x26 = (v8u16)__msa_ldi_h(0x13); - v8u16 const_0x5E = (v8u16)__msa_ldi_h(0x2f); - v8u16 const_0x12 = (v8u16)__msa_ldi_h(0x09); - v8u16 const_0x8080 = (v8u16)__msa_fill_h(0x8080); - v8u16 const_0x0001 = (v8u16)__msa_fill_h(0x0001); - - for (x = 0; x < width; x += 32) { - src0 = (v16u8)__msa_ld_b((v16u8*)src_argb, 0); - src1 = (v16u8)__msa_ld_b((v16u8*)src_argb, 16); - src2 = (v16u8)__msa_ld_b((v16u8*)src_argb, 32); - src3 = (v16u8)__msa_ld_b((v16u8*)src_argb, 48); - src4 = (v16u8)__msa_ld_b((v16u8*)src_argb, 64); - src5 = (v16u8)__msa_ld_b((v16u8*)src_argb, 80); - src6 = (v16u8)__msa_ld_b((v16u8*)src_argb, 96); - src7 = (v16u8)__msa_ld_b((v16u8*)src_argb, 112); - vec0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); - vec1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2); - vec2 = (v16u8)__msa_pckev_b((v16i8)src5, (v16i8)src4); - vec3 = (v16u8)__msa_pckev_b((v16i8)src7, (v16i8)src6); - vec4 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); - vec5 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2); - vec6 = (v16u8)__msa_pckod_b((v16i8)src5, (v16i8)src4); - vec7 = (v16u8)__msa_pckod_b((v16i8)src7, (v16i8)src6); - vec8 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); - vec9 = (v16u8)__msa_pckev_b((v16i8)vec3, (v16i8)vec2); - vec4 = (v16u8)__msa_pckev_b((v16i8)vec5, (v16i8)vec4); - vec5 = (v16u8)__msa_pckev_b((v16i8)vec7, (v16i8)vec6); - vec0 = (v16u8)__msa_pckod_b((v16i8)vec1, (v16i8)vec0); - vec1 = (v16u8)__msa_pckod_b((v16i8)vec3, (v16i8)vec2); - reg0 = __msa_hadd_u_h(vec8, vec8); - reg1 = __msa_hadd_u_h(vec9, vec9); - reg2 = __msa_hadd_u_h(vec4, vec4); - reg3 = __msa_hadd_u_h(vec5, vec5); - reg4 = __msa_hadd_u_h(vec0, vec0); - reg5 = __msa_hadd_u_h(vec1, vec1); - src0 = (v16u8)__msa_ld_b((v16u8*)src_argb_next, 0); - src1 = (v16u8)__msa_ld_b((v16u8*)src_argb_next, 16); - src2 = (v16u8)__msa_ld_b((v16u8*)src_argb_next, 32); - src3 = (v16u8)__msa_ld_b((v16u8*)src_argb_next, 48); - src4 = (v16u8)__msa_ld_b((v16u8*)src_argb_next, 64); - src5 = (v16u8)__msa_ld_b((v16u8*)src_argb_next, 80); - src6 = (v16u8)__msa_ld_b((v16u8*)src_argb_next, 96); - src7 = (v16u8)__msa_ld_b((v16u8*)src_argb_next, 112); - vec0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); - vec1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2); - vec2 = (v16u8)__msa_pckev_b((v16i8)src5, (v16i8)src4); - vec3 = (v16u8)__msa_pckev_b((v16i8)src7, (v16i8)src6); - vec4 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); - vec5 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2); - vec6 = (v16u8)__msa_pckod_b((v16i8)src5, (v16i8)src4); - vec7 = (v16u8)__msa_pckod_b((v16i8)src7, (v16i8)src6); - vec8 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); - vec9 = (v16u8)__msa_pckev_b((v16i8)vec3, (v16i8)vec2); - vec4 = (v16u8)__msa_pckev_b((v16i8)vec5, (v16i8)vec4); - vec5 = (v16u8)__msa_pckev_b((v16i8)vec7, (v16i8)vec6); - vec0 = (v16u8)__msa_pckod_b((v16i8)vec1, (v16i8)vec0); - vec1 = (v16u8)__msa_pckod_b((v16i8)vec3, (v16i8)vec2); - reg0 += __msa_hadd_u_h(vec8, vec8); - reg1 += __msa_hadd_u_h(vec9, vec9); - reg2 += __msa_hadd_u_h(vec4, vec4); - reg3 += __msa_hadd_u_h(vec5, vec5); - reg4 += __msa_hadd_u_h(vec0, vec0); - reg5 += __msa_hadd_u_h(vec1, vec1); - reg0 += const_0x0001; - reg1 += const_0x0001; - reg2 += const_0x0001; - reg3 += const_0x0001; - reg4 += const_0x0001; - reg5 += const_0x0001; - reg0 = (v8u16)__msa_srai_h((v8i16)reg0, 1); - reg1 = (v8u16)__msa_srai_h((v8i16)reg1, 1); - reg2 = (v8u16)__msa_srai_h((v8i16)reg2, 1); - reg3 = (v8u16)__msa_srai_h((v8i16)reg3, 1); - reg4 = (v8u16)__msa_srai_h((v8i16)reg4, 1); - reg5 = (v8u16)__msa_srai_h((v8i16)reg5, 1); - reg6 = reg0 * const_0x70; - reg7 = reg1 * const_0x70; - reg8 = reg2 * const_0x4A; - reg9 = reg3 * const_0x4A; - reg6 += const_0x8080; - reg7 += const_0x8080; - reg8 += reg4 * const_0x26; - reg9 += reg5 * const_0x26; - reg0 *= const_0x12; - reg1 *= const_0x12; - reg2 *= const_0x5E; - reg3 *= const_0x5E; - reg4 *= const_0x70; - reg5 *= const_0x70; - reg2 += reg0; - reg3 += reg1; - reg4 += const_0x8080; - reg5 += const_0x8080; - reg6 -= reg8; - reg7 -= reg9; - reg4 -= reg2; - reg5 -= reg3; - reg6 = (v8u16)__msa_srai_h((v8i16)reg6, 8); - reg7 = (v8u16)__msa_srai_h((v8i16)reg7, 8); - reg4 = (v8u16)__msa_srai_h((v8i16)reg4, 8); - reg5 = (v8u16)__msa_srai_h((v8i16)reg5, 8); - dst0 = (v16u8)__msa_pckev_b((v16i8)reg7, (v16i8)reg6); - dst1 = (v16u8)__msa_pckev_b((v16i8)reg5, (v16i8)reg4); - ST_UB(dst0, dst_u); - ST_UB(dst1, dst_v); - src_argb += 128; - src_argb_next += 128; - dst_u += 16; - dst_v += 16; - } -} - -void ARGBToRGB24Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { - int x; - v16u8 src0, src1, src2, src3, dst0, dst1, dst2; - v16i8 shuffler0 = {0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 16, 17, 18, 20}; - v16i8 shuffler1 = {5, 6, 8, 9, 10, 12, 13, 14, - 16, 17, 18, 20, 21, 22, 24, 25}; - v16i8 shuffler2 = {10, 12, 13, 14, 16, 17, 18, 20, - 21, 22, 24, 25, 26, 28, 29, 30}; - - for (x = 0; x < width; x += 16) { - src0 = (v16u8)__msa_ld_b((void*)src_argb, 0); - src1 = (v16u8)__msa_ld_b((void*)src_argb, 16); - src2 = (v16u8)__msa_ld_b((void*)src_argb, 32); - src3 = (v16u8)__msa_ld_b((void*)src_argb, 48); - dst0 = (v16u8)__msa_vshf_b(shuffler0, (v16i8)src1, (v16i8)src0); - dst1 = (v16u8)__msa_vshf_b(shuffler1, (v16i8)src2, (v16i8)src1); - dst2 = (v16u8)__msa_vshf_b(shuffler2, (v16i8)src3, (v16i8)src2); - ST_UB2(dst0, dst1, dst_rgb, 16); - ST_UB(dst2, (dst_rgb + 32)); - src_argb += 64; - dst_rgb += 48; - } -} - -void ARGBToRAWRow_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { - int x; - v16u8 src0, src1, src2, src3, dst0, dst1, dst2; - v16i8 shuffler0 = {2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, 18, 17, 16, 22}; - v16i8 shuffler1 = {5, 4, 10, 9, 8, 14, 13, 12, - 18, 17, 16, 22, 21, 20, 26, 25}; - v16i8 shuffler2 = {8, 14, 13, 12, 18, 17, 16, 22, - 21, 20, 26, 25, 24, 30, 29, 28}; - - for (x = 0; x < width; x += 16) { - src0 = (v16u8)__msa_ld_b((void*)src_argb, 0); - src1 = (v16u8)__msa_ld_b((void*)src_argb, 16); - src2 = (v16u8)__msa_ld_b((void*)src_argb, 32); - src3 = (v16u8)__msa_ld_b((void*)src_argb, 48); - dst0 = (v16u8)__msa_vshf_b(shuffler0, (v16i8)src1, (v16i8)src0); - dst1 = (v16u8)__msa_vshf_b(shuffler1, (v16i8)src2, (v16i8)src1); - dst2 = (v16u8)__msa_vshf_b(shuffler2, (v16i8)src3, (v16i8)src2); - ST_UB2(dst0, dst1, dst_rgb, 16); - ST_UB(dst2, (dst_rgb + 32)); - src_argb += 64; - dst_rgb += 48; - } -} - -void ARGBToRGB565Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { - int x; - v16u8 src0, src1, dst0; - v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; - v16i8 zero = {0}; - - for (x = 0; x < width; x += 8) { - src0 = (v16u8)__msa_ld_b((void*)src_argb, 0); - src1 = (v16u8)__msa_ld_b((void*)src_argb, 16); - vec0 = (v16u8)__msa_srai_b((v16i8)src0, 3); - vec1 = (v16u8)__msa_slli_b((v16i8)src0, 3); - vec2 = (v16u8)__msa_srai_b((v16i8)src0, 5); - vec4 = (v16u8)__msa_srai_b((v16i8)src1, 3); - vec5 = (v16u8)__msa_slli_b((v16i8)src1, 3); - vec6 = (v16u8)__msa_srai_b((v16i8)src1, 5); - vec1 = (v16u8)__msa_sldi_b(zero, (v16i8)vec1, 1); - vec2 = (v16u8)__msa_sldi_b(zero, (v16i8)vec2, 1); - vec5 = (v16u8)__msa_sldi_b(zero, (v16i8)vec5, 1); - vec6 = (v16u8)__msa_sldi_b(zero, (v16i8)vec6, 1); - vec3 = (v16u8)__msa_sldi_b(zero, (v16i8)src0, 2); - vec7 = (v16u8)__msa_sldi_b(zero, (v16i8)src1, 2); - vec0 = __msa_binsli_b(vec0, vec1, 2); - vec1 = __msa_binsli_b(vec2, vec3, 4); - vec4 = __msa_binsli_b(vec4, vec5, 2); - vec5 = __msa_binsli_b(vec6, vec7, 4); - vec0 = (v16u8)__msa_ilvev_b((v16i8)vec1, (v16i8)vec0); - vec4 = (v16u8)__msa_ilvev_b((v16i8)vec5, (v16i8)vec4); - dst0 = (v16u8)__msa_pckev_h((v8i16)vec4, (v8i16)vec0); - ST_UB(dst0, dst_rgb); - src_argb += 32; - dst_rgb += 16; - } -} - -void ARGBToARGB1555Row_MSA(const uint8_t* src_argb, - uint8_t* dst_rgb, - int width) { - int x; - v16u8 src0, src1, dst0; - v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9; - v16i8 zero = {0}; - - for (x = 0; x < width; x += 8) { - src0 = (v16u8)__msa_ld_b((void*)src_argb, 0); - src1 = (v16u8)__msa_ld_b((void*)src_argb, 16); - vec0 = (v16u8)__msa_srai_b((v16i8)src0, 3); - vec1 = (v16u8)__msa_slli_b((v16i8)src0, 2); - vec2 = (v16u8)__msa_srai_b((v16i8)vec0, 3); - vec1 = (v16u8)__msa_sldi_b(zero, (v16i8)vec1, 1); - vec2 = (v16u8)__msa_sldi_b(zero, (v16i8)vec2, 1); - vec3 = (v16u8)__msa_srai_b((v16i8)src0, 1); - vec5 = (v16u8)__msa_srai_b((v16i8)src1, 3); - vec6 = (v16u8)__msa_slli_b((v16i8)src1, 2); - vec7 = (v16u8)__msa_srai_b((v16i8)vec5, 3); - vec6 = (v16u8)__msa_sldi_b(zero, (v16i8)vec6, 1); - vec7 = (v16u8)__msa_sldi_b(zero, (v16i8)vec7, 1); - vec8 = (v16u8)__msa_srai_b((v16i8)src1, 1); - vec3 = (v16u8)__msa_sldi_b(zero, (v16i8)vec3, 2); - vec8 = (v16u8)__msa_sldi_b(zero, (v16i8)vec8, 2); - vec4 = (v16u8)__msa_sldi_b(zero, (v16i8)src0, 3); - vec9 = (v16u8)__msa_sldi_b(zero, (v16i8)src1, 3); - vec0 = __msa_binsli_b(vec0, vec1, 2); - vec5 = __msa_binsli_b(vec5, vec6, 2); - vec1 = __msa_binsli_b(vec2, vec3, 5); - vec6 = __msa_binsli_b(vec7, vec8, 5); - vec1 = __msa_binsli_b(vec1, vec4, 0); - vec6 = __msa_binsli_b(vec6, vec9, 0); - vec0 = (v16u8)__msa_ilvev_b((v16i8)vec1, (v16i8)vec0); - vec1 = (v16u8)__msa_ilvev_b((v16i8)vec6, (v16i8)vec5); - dst0 = (v16u8)__msa_pckev_h((v8i16)vec1, (v8i16)vec0); - ST_UB(dst0, dst_rgb); - src_argb += 32; - dst_rgb += 16; - } -} - -void ARGBToARGB4444Row_MSA(const uint8_t* src_argb, - uint8_t* dst_rgb, - int width) { - int x; - v16u8 src0, src1; - v16u8 vec0, vec1; - v16u8 dst0; - v16i8 zero = {0}; - - for (x = 0; x < width; x += 8) { - src0 = (v16u8)__msa_ld_b((void*)src_argb, 0); - src1 = (v16u8)__msa_ld_b((void*)src_argb, 16); - vec0 = (v16u8)__msa_srai_b((v16i8)src0, 4); - vec1 = (v16u8)__msa_srai_b((v16i8)src1, 4); - src0 = (v16u8)__msa_sldi_b(zero, (v16i8)src0, 1); - src1 = (v16u8)__msa_sldi_b(zero, (v16i8)src1, 1); - vec0 = __msa_binsli_b(vec0, src0, 3); - vec1 = __msa_binsli_b(vec1, src1, 3); - dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); - ST_UB(dst0, dst_rgb); - src_argb += 32; - dst_rgb += 16; - } -} - -void ARGBToUV444Row_MSA(const uint8_t* src_argb, - uint8_t* dst_u, - uint8_t* dst_v, - int32_t width) { - int32_t x; - v16u8 src0, src1, src2, src3, reg0, reg1, reg2, reg3, dst0, dst1; - v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; - v8u16 vec8, vec9, vec10, vec11; - v8u16 const_112 = (v8u16)__msa_ldi_h(112); - v8u16 const_74 = (v8u16)__msa_ldi_h(74); - v8u16 const_38 = (v8u16)__msa_ldi_h(38); - v8u16 const_94 = (v8u16)__msa_ldi_h(94); - v8u16 const_18 = (v8u16)__msa_ldi_h(18); - v8u16 const_32896 = (v8u16)__msa_fill_h(32896); - v16i8 zero = {0}; - - for (x = width; x > 0; x -= 16) { - src0 = (v16u8)__msa_ld_b((void*)src_argb, 0); - src1 = (v16u8)__msa_ld_b((void*)src_argb, 16); - src2 = (v16u8)__msa_ld_b((void*)src_argb, 32); - src3 = (v16u8)__msa_ld_b((void*)src_argb, 48); - reg0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); - reg1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2); - reg2 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); - reg3 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2); - src0 = (v16u8)__msa_pckev_b((v16i8)reg1, (v16i8)reg0); - src1 = (v16u8)__msa_pckev_b((v16i8)reg3, (v16i8)reg2); - src2 = (v16u8)__msa_pckod_b((v16i8)reg1, (v16i8)reg0); - vec0 = (v8u16)__msa_ilvr_b(zero, (v16i8)src0); - vec1 = (v8u16)__msa_ilvl_b(zero, (v16i8)src0); - vec2 = (v8u16)__msa_ilvr_b(zero, (v16i8)src1); - vec3 = (v8u16)__msa_ilvl_b(zero, (v16i8)src1); - vec4 = (v8u16)__msa_ilvr_b(zero, (v16i8)src2); - vec5 = (v8u16)__msa_ilvl_b(zero, (v16i8)src2); - vec10 = vec0 * const_18; - vec11 = vec1 * const_18; - vec8 = vec2 * const_94; - vec9 = vec3 * const_94; - vec6 = vec4 * const_112; - vec7 = vec5 * const_112; - vec0 *= const_112; - vec1 *= const_112; - vec2 *= const_74; - vec3 *= const_74; - vec4 *= const_38; - vec5 *= const_38; - vec8 += vec10; - vec9 += vec11; - vec6 += const_32896; - vec7 += const_32896; - vec0 += const_32896; - vec1 += const_32896; - vec2 += vec4; - vec3 += vec5; - vec0 -= vec2; - vec1 -= vec3; - vec6 -= vec8; - vec7 -= vec9; - vec0 = (v8u16)__msa_srai_h((v8i16)vec0, 8); - vec1 = (v8u16)__msa_srai_h((v8i16)vec1, 8); - vec6 = (v8u16)__msa_srai_h((v8i16)vec6, 8); - vec7 = (v8u16)__msa_srai_h((v8i16)vec7, 8); - dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); - dst1 = (v16u8)__msa_pckev_b((v16i8)vec7, (v16i8)vec6); - ST_UB(dst0, dst_u); - ST_UB(dst1, dst_v); - src_argb += 64; - dst_u += 16; - dst_v += 16; - } -} - -void ARGBMultiplyRow_MSA(const uint8_t* src_argb, - const uint8_t* src_argb1, - uint8_t* dst_argb, - int width) { - int x; - v16u8 src0, src1, dst0; - v8u16 vec0, vec1, vec2, vec3; - v4u32 reg0, reg1, reg2, reg3; - v8i16 zero = {0}; - - for (x = 0; x < width; x += 4) { - src0 = (v16u8)__msa_ld_b((void*)src_argb, 0); - src1 = (v16u8)__msa_ld_b((void*)src_argb1, 0); - vec0 = (v8u16)__msa_ilvr_b((v16i8)src0, (v16i8)src0); - vec1 = (v8u16)__msa_ilvl_b((v16i8)src0, (v16i8)src0); - vec2 = (v8u16)__msa_ilvr_b((v16i8)zero, (v16i8)src1); - vec3 = (v8u16)__msa_ilvl_b((v16i8)zero, (v16i8)src1); - reg0 = (v4u32)__msa_ilvr_h(zero, (v8i16)vec0); - reg1 = (v4u32)__msa_ilvl_h(zero, (v8i16)vec0); - reg2 = (v4u32)__msa_ilvr_h(zero, (v8i16)vec1); - reg3 = (v4u32)__msa_ilvl_h(zero, (v8i16)vec1); - reg0 *= (v4u32)__msa_ilvr_h(zero, (v8i16)vec2); - reg1 *= (v4u32)__msa_ilvl_h(zero, (v8i16)vec2); - reg2 *= (v4u32)__msa_ilvr_h(zero, (v8i16)vec3); - reg3 *= (v4u32)__msa_ilvl_h(zero, (v8i16)vec3); - reg0 = (v4u32)__msa_srai_w((v4i32)reg0, 16); - reg1 = (v4u32)__msa_srai_w((v4i32)reg1, 16); - reg2 = (v4u32)__msa_srai_w((v4i32)reg2, 16); - reg3 = (v4u32)__msa_srai_w((v4i32)reg3, 16); - vec0 = (v8u16)__msa_pckev_h((v8i16)reg1, (v8i16)reg0); - vec1 = (v8u16)__msa_pckev_h((v8i16)reg3, (v8i16)reg2); - dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); - ST_UB(dst0, dst_argb); - src_argb += 16; - src_argb1 += 16; - dst_argb += 16; - } -} - -void ARGBAddRow_MSA(const uint8_t* src_argb, - const uint8_t* src_argb1, - uint8_t* dst_argb, - int width) { - int x; - v16u8 src0, src1, src2, src3, dst0, dst1; - - for (x = 0; x < width; x += 8) { - src0 = (v16u8)__msa_ld_b((void*)src_argb, 0); - src1 = (v16u8)__msa_ld_b((void*)src_argb, 16); - src2 = (v16u8)__msa_ld_b((void*)src_argb1, 0); - src3 = (v16u8)__msa_ld_b((void*)src_argb1, 16); - dst0 = __msa_adds_u_b(src0, src2); - dst1 = __msa_adds_u_b(src1, src3); - ST_UB2(dst0, dst1, dst_argb, 16); - src_argb += 32; - src_argb1 += 32; - dst_argb += 32; - } -} - -void ARGBSubtractRow_MSA(const uint8_t* src_argb, - const uint8_t* src_argb1, - uint8_t* dst_argb, - int width) { - int x; - v16u8 src0, src1, src2, src3, dst0, dst1; - - for (x = 0; x < width; x += 8) { - src0 = (v16u8)__msa_ld_b((void*)src_argb, 0); - src1 = (v16u8)__msa_ld_b((void*)src_argb, 16); - src2 = (v16u8)__msa_ld_b((void*)src_argb1, 0); - src3 = (v16u8)__msa_ld_b((void*)src_argb1, 16); - dst0 = __msa_subs_u_b(src0, src2); - dst1 = __msa_subs_u_b(src1, src3); - ST_UB2(dst0, dst1, dst_argb, 16); - src_argb += 32; - src_argb1 += 32; - dst_argb += 32; - } -} - -void ARGBAttenuateRow_MSA(const uint8_t* src_argb, - uint8_t* dst_argb, - int width) { - int x; - v16u8 src0, src1, dst0, dst1; - v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9; - v4u32 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7; - v8i16 zero = {0}; - v16u8 mask = {0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255}; - - for (x = 0; x < width; x += 8) { - src0 = (v16u8)__msa_ld_b((void*)src_argb, 0); - src1 = (v16u8)__msa_ld_b((void*)src_argb, 16); - vec0 = (v8u16)__msa_ilvr_b((v16i8)src0, (v16i8)src0); - vec1 = (v8u16)__msa_ilvl_b((v16i8)src0, (v16i8)src0); - vec2 = (v8u16)__msa_ilvr_b((v16i8)src1, (v16i8)src1); - vec3 = (v8u16)__msa_ilvl_b((v16i8)src1, (v16i8)src1); - vec4 = (v8u16)__msa_fill_h(vec0[3]); - vec5 = (v8u16)__msa_fill_h(vec0[7]); - vec6 = (v8u16)__msa_fill_h(vec1[3]); - vec7 = (v8u16)__msa_fill_h(vec1[7]); - vec4 = (v8u16)__msa_pckev_d((v2i64)vec5, (v2i64)vec4); - vec5 = (v8u16)__msa_pckev_d((v2i64)vec7, (v2i64)vec6); - vec6 = (v8u16)__msa_fill_h(vec2[3]); - vec7 = (v8u16)__msa_fill_h(vec2[7]); - vec8 = (v8u16)__msa_fill_h(vec3[3]); - vec9 = (v8u16)__msa_fill_h(vec3[7]); - vec6 = (v8u16)__msa_pckev_d((v2i64)vec7, (v2i64)vec6); - vec7 = (v8u16)__msa_pckev_d((v2i64)vec9, (v2i64)vec8); - reg0 = (v4u32)__msa_ilvr_h(zero, (v8i16)vec4); - reg1 = (v4u32)__msa_ilvl_h(zero, (v8i16)vec4); - reg2 = (v4u32)__msa_ilvr_h(zero, (v8i16)vec5); - reg3 = (v4u32)__msa_ilvl_h(zero, (v8i16)vec5); - reg4 = (v4u32)__msa_ilvr_h(zero, (v8i16)vec6); - reg5 = (v4u32)__msa_ilvl_h(zero, (v8i16)vec6); - reg6 = (v4u32)__msa_ilvr_h(zero, (v8i16)vec7); - reg7 = (v4u32)__msa_ilvl_h(zero, (v8i16)vec7); - reg0 *= (v4u32)__msa_ilvr_h(zero, (v8i16)vec0); - reg1 *= (v4u32)__msa_ilvl_h(zero, (v8i16)vec0); - reg2 *= (v4u32)__msa_ilvr_h(zero, (v8i16)vec1); - reg3 *= (v4u32)__msa_ilvl_h(zero, (v8i16)vec1); - reg4 *= (v4u32)__msa_ilvr_h(zero, (v8i16)vec2); - reg5 *= (v4u32)__msa_ilvl_h(zero, (v8i16)vec2); - reg6 *= (v4u32)__msa_ilvr_h(zero, (v8i16)vec3); - reg7 *= (v4u32)__msa_ilvl_h(zero, (v8i16)vec3); - reg0 = (v4u32)__msa_srai_w((v4i32)reg0, 24); - reg1 = (v4u32)__msa_srai_w((v4i32)reg1, 24); - reg2 = (v4u32)__msa_srai_w((v4i32)reg2, 24); - reg3 = (v4u32)__msa_srai_w((v4i32)reg3, 24); - reg4 = (v4u32)__msa_srai_w((v4i32)reg4, 24); - reg5 = (v4u32)__msa_srai_w((v4i32)reg5, 24); - reg6 = (v4u32)__msa_srai_w((v4i32)reg6, 24); - reg7 = (v4u32)__msa_srai_w((v4i32)reg7, 24); - vec0 = (v8u16)__msa_pckev_h((v8i16)reg1, (v8i16)reg0); - vec1 = (v8u16)__msa_pckev_h((v8i16)reg3, (v8i16)reg2); - vec2 = (v8u16)__msa_pckev_h((v8i16)reg5, (v8i16)reg4); - vec3 = (v8u16)__msa_pckev_h((v8i16)reg7, (v8i16)reg6); - dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); - dst1 = (v16u8)__msa_pckev_b((v16i8)vec3, (v16i8)vec2); - dst0 = __msa_bmnz_v(dst0, src0, mask); - dst1 = __msa_bmnz_v(dst1, src1, mask); - ST_UB2(dst0, dst1, dst_argb, 16); - src_argb += 32; - dst_argb += 32; - } -} - -void ARGBToRGB565DitherRow_MSA(const uint8_t* src_argb, - uint8_t* dst_rgb, - uint32_t dither4, - int width) { - int x; - v16u8 src0, src1, dst0, vec0, vec1; - v8i16 vec_d0; - v8i16 reg0, reg1, reg2; - v16i8 zero = {0}; - v8i16 max = __msa_ldi_h(0xFF); - - vec_d0 = (v8i16)__msa_fill_w(dither4); - vec_d0 = (v8i16)__msa_ilvr_b(zero, (v16i8)vec_d0); - - for (x = 0; x < width; x += 8) { - src0 = (v16u8)__msa_ld_b((void*)src_argb, 0); - src1 = (v16u8)__msa_ld_b((void*)src_argb, 16); - vec0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); - vec1 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); - reg0 = (v8i16)__msa_ilvev_b(zero, (v16i8)vec0); - reg1 = (v8i16)__msa_ilvev_b(zero, (v16i8)vec1); - reg2 = (v8i16)__msa_ilvod_b(zero, (v16i8)vec0); - reg0 += vec_d0; - reg1 += vec_d0; - reg2 += vec_d0; - reg0 = __msa_maxi_s_h((v8i16)reg0, 0); - reg1 = __msa_maxi_s_h((v8i16)reg1, 0); - reg2 = __msa_maxi_s_h((v8i16)reg2, 0); - reg0 = __msa_min_s_h((v8i16)max, (v8i16)reg0); - reg1 = __msa_min_s_h((v8i16)max, (v8i16)reg1); - reg2 = __msa_min_s_h((v8i16)max, (v8i16)reg2); - reg0 = __msa_srai_h(reg0, 3); - reg2 = __msa_srai_h(reg2, 3); - reg1 = __msa_srai_h(reg1, 2); - reg2 = __msa_slli_h(reg2, 11); - reg1 = __msa_slli_h(reg1, 5); - reg0 |= reg1; - dst0 = (v16u8)(reg0 | reg2); - ST_UB(dst0, dst_rgb); - src_argb += 32; - dst_rgb += 16; - } -} - -void ARGBShuffleRow_MSA(const uint8_t* src_argb, - uint8_t* dst_argb, - const uint8_t* shuffler, - int width) { - int x; - v16u8 src0, src1, dst0, dst1; - v16i8 vec0; - v16i8 shuffler_vec = {0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, 12, 12, 12, 12}; - int32_t val = LW((int32_t*)shuffler); - - vec0 = (v16i8)__msa_fill_w(val); - shuffler_vec += vec0; - - for (x = 0; x < width; x += 8) { - src0 = (v16u8)__msa_ld_b((v16u8*)src_argb, 0); - src1 = (v16u8)__msa_ld_b((v16u8*)src_argb, 16); - dst0 = (v16u8)__msa_vshf_b(shuffler_vec, (v16i8)src0, (v16i8)src0); - dst1 = (v16u8)__msa_vshf_b(shuffler_vec, (v16i8)src1, (v16i8)src1); - ST_UB2(dst0, dst1, dst_argb, 16); - src_argb += 32; - dst_argb += 32; - } -} - -void ARGBShadeRow_MSA(const uint8_t* src_argb, - uint8_t* dst_argb, - int width, - uint32_t value) { - int x; - v16u8 src0, dst0; - v8u16 vec0, vec1; - v4u32 reg0, reg1, reg2, reg3, rgba_scale; - v8i16 zero = {0}; - - rgba_scale[0] = value; - rgba_scale = (v4u32)__msa_ilvr_b((v16i8)rgba_scale, (v16i8)rgba_scale); - rgba_scale = (v4u32)__msa_ilvr_h(zero, (v8i16)rgba_scale); - - for (x = 0; x < width; x += 4) { - src0 = (v16u8)__msa_ld_b((v16u8*)src_argb, 0); - vec0 = (v8u16)__msa_ilvr_b((v16i8)src0, (v16i8)src0); - vec1 = (v8u16)__msa_ilvl_b((v16i8)src0, (v16i8)src0); - reg0 = (v4u32)__msa_ilvr_h(zero, (v8i16)vec0); - reg1 = (v4u32)__msa_ilvl_h(zero, (v8i16)vec0); - reg2 = (v4u32)__msa_ilvr_h(zero, (v8i16)vec1); - reg3 = (v4u32)__msa_ilvl_h(zero, (v8i16)vec1); - reg0 *= rgba_scale; - reg1 *= rgba_scale; - reg2 *= rgba_scale; - reg3 *= rgba_scale; - reg0 = (v4u32)__msa_srai_w((v4i32)reg0, 24); - reg1 = (v4u32)__msa_srai_w((v4i32)reg1, 24); - reg2 = (v4u32)__msa_srai_w((v4i32)reg2, 24); - reg3 = (v4u32)__msa_srai_w((v4i32)reg3, 24); - vec0 = (v8u16)__msa_pckev_h((v8i16)reg1, (v8i16)reg0); - vec1 = (v8u16)__msa_pckev_h((v8i16)reg3, (v8i16)reg2); - dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); - ST_UB(dst0, dst_argb); - src_argb += 16; - dst_argb += 16; - } -} - -void ARGBGrayRow_MSA(const uint8_t* src_argb, uint8_t* dst_argb, int width) { - int x; - v16u8 src0, src1, vec0, vec1, dst0, dst1; - v8u16 reg0; - v16u8 const_0x4D = (v16u8)__msa_ldi_h(0x4D); - v16u8 const_0x961D = (v16u8)__msa_fill_h(0x961D); - - for (x = 0; x < width; x += 8) { - src0 = (v16u8)__msa_ld_b((v16u8*)src_argb, 0); - src1 = (v16u8)__msa_ld_b((v16u8*)src_argb, 16); - vec0 = (v16u8)__msa_pckev_h((v8i16)src1, (v8i16)src0); - vec1 = (v16u8)__msa_pckod_h((v8i16)src1, (v8i16)src0); - reg0 = __msa_dotp_u_h(vec0, const_0x961D); - reg0 = __msa_dpadd_u_h(reg0, vec1, const_0x4D); - reg0 = (v8u16)__msa_srari_h((v8i16)reg0, 8); - vec0 = (v16u8)__msa_ilvev_b((v16i8)reg0, (v16i8)reg0); - vec1 = (v16u8)__msa_ilvod_b((v16i8)vec1, (v16i8)vec0); - dst0 = (v16u8)__msa_ilvr_b((v16i8)vec1, (v16i8)vec0); - dst1 = (v16u8)__msa_ilvl_b((v16i8)vec1, (v16i8)vec0); - ST_UB2(dst0, dst1, dst_argb, 16); - src_argb += 32; - dst_argb += 32; - } -} - -void ARGBSepiaRow_MSA(uint8_t* dst_argb, int width) { - int x; - v16u8 src0, src1, dst0, dst1, vec0, vec1, vec2, vec3, vec4, vec5; - v8u16 reg0, reg1, reg2; - v16u8 const_0x4411 = (v16u8)__msa_fill_h(0x4411); - v16u8 const_0x23 = (v16u8)__msa_ldi_h(0x23); - v16u8 const_0x5816 = (v16u8)__msa_fill_h(0x5816); - v16u8 const_0x2D = (v16u8)__msa_ldi_h(0x2D); - v16u8 const_0x6218 = (v16u8)__msa_fill_h(0x6218); - v16u8 const_0x32 = (v16u8)__msa_ldi_h(0x32); - v8u16 const_0xFF = (v8u16)__msa_ldi_h(0xFF); - - for (x = 0; x < width; x += 8) { - src0 = (v16u8)__msa_ld_b((v16u8*)dst_argb, 0); - src1 = (v16u8)__msa_ld_b((v16u8*)dst_argb, 16); - vec0 = (v16u8)__msa_pckev_h((v8i16)src1, (v8i16)src0); - vec1 = (v16u8)__msa_pckod_h((v8i16)src1, (v8i16)src0); - vec3 = (v16u8)__msa_pckod_b((v16i8)vec1, (v16i8)vec1); - reg0 = (v8u16)__msa_dotp_u_h(vec0, const_0x4411); - reg1 = (v8u16)__msa_dotp_u_h(vec0, const_0x5816); - reg2 = (v8u16)__msa_dotp_u_h(vec0, const_0x6218); - reg0 = (v8u16)__msa_dpadd_u_h(reg0, vec1, const_0x23); - reg1 = (v8u16)__msa_dpadd_u_h(reg1, vec1, const_0x2D); - reg2 = (v8u16)__msa_dpadd_u_h(reg2, vec1, const_0x32); - reg0 = (v8u16)__msa_srai_h((v8i16)reg0, 7); - reg1 = (v8u16)__msa_srai_h((v8i16)reg1, 7); - reg2 = (v8u16)__msa_srai_h((v8i16)reg2, 7); - reg1 = (v8u16)__msa_min_u_h((v8u16)reg1, const_0xFF); - reg2 = (v8u16)__msa_min_u_h((v8u16)reg2, const_0xFF); - vec0 = (v16u8)__msa_pckev_b((v16i8)reg0, (v16i8)reg0); - vec1 = (v16u8)__msa_pckev_b((v16i8)reg1, (v16i8)reg1); - vec2 = (v16u8)__msa_pckev_b((v16i8)reg2, (v16i8)reg2); - vec4 = (v16u8)__msa_ilvr_b((v16i8)vec2, (v16i8)vec0); - vec5 = (v16u8)__msa_ilvr_b((v16i8)vec3, (v16i8)vec1); - dst0 = (v16u8)__msa_ilvr_b((v16i8)vec5, (v16i8)vec4); - dst1 = (v16u8)__msa_ilvl_b((v16i8)vec5, (v16i8)vec4); - ST_UB2(dst0, dst1, dst_argb, 16); - dst_argb += 32; - } -} - -void ARGB4444ToARGBRow_MSA(const uint8_t* src_argb4444, - uint8_t* dst_argb, - int width) { - int x; - v16u8 src0, src1; - v8u16 vec0, vec1, vec2, vec3; - v16u8 dst0, dst1, dst2, dst3; - - for (x = 0; x < width; x += 16) { - src0 = (v16u8)__msa_ld_b((v16u8*)src_argb4444, 0); - src1 = (v16u8)__msa_ld_b((v16u8*)src_argb4444, 16); - vec0 = (v8u16)__msa_andi_b(src0, 0x0F); - vec1 = (v8u16)__msa_andi_b(src1, 0x0F); - vec2 = (v8u16)__msa_andi_b(src0, 0xF0); - vec3 = (v8u16)__msa_andi_b(src1, 0xF0); - vec0 |= (v8u16)__msa_slli_b((v16i8)vec0, 4); - vec1 |= (v8u16)__msa_slli_b((v16i8)vec1, 4); - vec2 |= (v8u16)__msa_srli_b((v16i8)vec2, 4); - vec3 |= (v8u16)__msa_srli_b((v16i8)vec3, 4); - dst0 = (v16u8)__msa_ilvr_b((v16i8)vec2, (v16i8)vec0); - dst1 = (v16u8)__msa_ilvl_b((v16i8)vec2, (v16i8)vec0); - dst2 = (v16u8)__msa_ilvr_b((v16i8)vec3, (v16i8)vec1); - dst3 = (v16u8)__msa_ilvl_b((v16i8)vec3, (v16i8)vec1); - ST_UB4(dst0, dst1, dst2, dst3, dst_argb, 16); - src_argb4444 += 32; - dst_argb += 64; - } -} - -void ARGB1555ToARGBRow_MSA(const uint8_t* src_argb1555, - uint8_t* dst_argb, - int width) { - int x; - v8u16 src0, src1; - v8u16 vec0, vec1, vec2, vec3, vec4, vec5; - v16u8 reg0, reg1, reg2, reg3, reg4, reg5, reg6; - v16u8 dst0, dst1, dst2, dst3; - v8u16 const_0x1F = (v8u16)__msa_ldi_h(0x1F); - - for (x = 0; x < width; x += 16) { - src0 = (v8u16)__msa_ld_h((void*)src_argb1555, 0); - src1 = (v8u16)__msa_ld_h((void*)src_argb1555, 16); - vec0 = src0 & const_0x1F; - vec1 = src1 & const_0x1F; - src0 = (v8u16)__msa_srli_h((v8i16)src0, 5); - src1 = (v8u16)__msa_srli_h((v8i16)src1, 5); - vec2 = src0 & const_0x1F; - vec3 = src1 & const_0x1F; - src0 = (v8u16)__msa_srli_h((v8i16)src0, 5); - src1 = (v8u16)__msa_srli_h((v8i16)src1, 5); - vec4 = src0 & const_0x1F; - vec5 = src1 & const_0x1F; - src0 = (v8u16)__msa_srli_h((v8i16)src0, 5); - src1 = (v8u16)__msa_srli_h((v8i16)src1, 5); - reg0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); - reg1 = (v16u8)__msa_pckev_b((v16i8)vec3, (v16i8)vec2); - reg2 = (v16u8)__msa_pckev_b((v16i8)vec5, (v16i8)vec4); - reg3 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); - reg4 = (v16u8)__msa_slli_b((v16i8)reg0, 3); - reg5 = (v16u8)__msa_slli_b((v16i8)reg1, 3); - reg6 = (v16u8)__msa_slli_b((v16i8)reg2, 3); - reg4 |= (v16u8)__msa_srai_b((v16i8)reg0, 2); - reg5 |= (v16u8)__msa_srai_b((v16i8)reg1, 2); - reg6 |= (v16u8)__msa_srai_b((v16i8)reg2, 2); - reg3 = -reg3; - reg0 = (v16u8)__msa_ilvr_b((v16i8)reg6, (v16i8)reg4); - reg1 = (v16u8)__msa_ilvl_b((v16i8)reg6, (v16i8)reg4); - reg2 = (v16u8)__msa_ilvr_b((v16i8)reg3, (v16i8)reg5); - reg3 = (v16u8)__msa_ilvl_b((v16i8)reg3, (v16i8)reg5); - dst0 = (v16u8)__msa_ilvr_b((v16i8)reg2, (v16i8)reg0); - dst1 = (v16u8)__msa_ilvl_b((v16i8)reg2, (v16i8)reg0); - dst2 = (v16u8)__msa_ilvr_b((v16i8)reg3, (v16i8)reg1); - dst3 = (v16u8)__msa_ilvl_b((v16i8)reg3, (v16i8)reg1); - ST_UB4(dst0, dst1, dst2, dst3, dst_argb, 16); - src_argb1555 += 32; - dst_argb += 64; - } -} - -void RGB565ToARGBRow_MSA(const uint8_t* src_rgb565, - uint8_t* dst_argb, - int width) { - int x; - v8u16 src0, src1, vec0, vec1, vec2, vec3, vec4, vec5; - v8u16 reg0, reg1, reg2, reg3, reg4, reg5; - v16u8 res0, res1, res2, res3, dst0, dst1, dst2, dst3; - v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); - v8u16 const_0x1F = (v8u16)__msa_ldi_h(0x1F); - v8u16 const_0x7E0 = (v8u16)__msa_fill_h(0x7E0); - v8u16 const_0xF800 = (v8u16)__msa_fill_h(0xF800); - - for (x = 0; x < width; x += 16) { - src0 = (v8u16)__msa_ld_h((void*)src_rgb565, 0); - src1 = (v8u16)__msa_ld_h((void*)src_rgb565, 16); - vec0 = src0 & const_0x1F; - vec1 = src0 & const_0x7E0; - vec2 = src0 & const_0xF800; - vec3 = src1 & const_0x1F; - vec4 = src1 & const_0x7E0; - vec5 = src1 & const_0xF800; - reg0 = (v8u16)__msa_slli_h((v8i16)vec0, 3); - reg1 = (v8u16)__msa_srli_h((v8i16)vec1, 3); - reg2 = (v8u16)__msa_srli_h((v8i16)vec2, 8); - reg3 = (v8u16)__msa_slli_h((v8i16)vec3, 3); - reg4 = (v8u16)__msa_srli_h((v8i16)vec4, 3); - reg5 = (v8u16)__msa_srli_h((v8i16)vec5, 8); - reg0 |= (v8u16)__msa_srli_h((v8i16)vec0, 2); - reg1 |= (v8u16)__msa_srli_h((v8i16)vec1, 9); - reg2 |= (v8u16)__msa_srli_h((v8i16)vec2, 13); - reg3 |= (v8u16)__msa_srli_h((v8i16)vec3, 2); - reg4 |= (v8u16)__msa_srli_h((v8i16)vec4, 9); - reg5 |= (v8u16)__msa_srli_h((v8i16)vec5, 13); - res0 = (v16u8)__msa_ilvev_b((v16i8)reg2, (v16i8)reg0); - res1 = (v16u8)__msa_ilvev_b((v16i8)alpha, (v16i8)reg1); - res2 = (v16u8)__msa_ilvev_b((v16i8)reg5, (v16i8)reg3); - res3 = (v16u8)__msa_ilvev_b((v16i8)alpha, (v16i8)reg4); - dst0 = (v16u8)__msa_ilvr_b((v16i8)res1, (v16i8)res0); - dst1 = (v16u8)__msa_ilvl_b((v16i8)res1, (v16i8)res0); - dst2 = (v16u8)__msa_ilvr_b((v16i8)res3, (v16i8)res2); - dst3 = (v16u8)__msa_ilvl_b((v16i8)res3, (v16i8)res2); - ST_UB4(dst0, dst1, dst2, dst3, dst_argb, 16); - src_rgb565 += 32; - dst_argb += 64; - } -} - -void RGB24ToARGBRow_MSA(const uint8_t* src_rgb24, - uint8_t* dst_argb, - int width) { - int x; - v16u8 src0, src1, src2; - v16u8 vec0, vec1, vec2; - v16u8 dst0, dst1, dst2, dst3; - v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); - v16i8 shuffler = {0, 1, 2, 16, 3, 4, 5, 17, 6, 7, 8, 18, 9, 10, 11, 19}; - - for (x = 0; x < width; x += 16) { - src0 = (v16u8)__msa_ld_b((void*)src_rgb24, 0); - src1 = (v16u8)__msa_ld_b((void*)src_rgb24, 16); - src2 = (v16u8)__msa_ld_b((void*)src_rgb24, 32); - vec0 = (v16u8)__msa_sldi_b((v16i8)src1, (v16i8)src0, 12); - vec1 = (v16u8)__msa_sldi_b((v16i8)src2, (v16i8)src1, 8); - vec2 = (v16u8)__msa_sldi_b((v16i8)src2, (v16i8)src2, 4); - dst0 = (v16u8)__msa_vshf_b(shuffler, (v16i8)alpha, (v16i8)src0); - dst1 = (v16u8)__msa_vshf_b(shuffler, (v16i8)alpha, (v16i8)vec0); - dst2 = (v16u8)__msa_vshf_b(shuffler, (v16i8)alpha, (v16i8)vec1); - dst3 = (v16u8)__msa_vshf_b(shuffler, (v16i8)alpha, (v16i8)vec2); - ST_UB4(dst0, dst1, dst2, dst3, dst_argb, 16); - src_rgb24 += 48; - dst_argb += 64; - } -} - -void RAWToARGBRow_MSA(const uint8_t* src_raw, uint8_t* dst_argb, int width) { - int x; - v16u8 src0, src1, src2; - v16u8 vec0, vec1, vec2; - v16u8 dst0, dst1, dst2, dst3; - v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); - v16i8 mask = {2, 1, 0, 16, 5, 4, 3, 17, 8, 7, 6, 18, 11, 10, 9, 19}; - - for (x = 0; x < width; x += 16) { - src0 = (v16u8)__msa_ld_b((void*)src_raw, 0); - src1 = (v16u8)__msa_ld_b((void*)src_raw, 16); - src2 = (v16u8)__msa_ld_b((void*)src_raw, 32); - vec0 = (v16u8)__msa_sldi_b((v16i8)src1, (v16i8)src0, 12); - vec1 = (v16u8)__msa_sldi_b((v16i8)src2, (v16i8)src1, 8); - vec2 = (v16u8)__msa_sldi_b((v16i8)src2, (v16i8)src2, 4); - dst0 = (v16u8)__msa_vshf_b(mask, (v16i8)alpha, (v16i8)src0); - dst1 = (v16u8)__msa_vshf_b(mask, (v16i8)alpha, (v16i8)vec0); - dst2 = (v16u8)__msa_vshf_b(mask, (v16i8)alpha, (v16i8)vec1); - dst3 = (v16u8)__msa_vshf_b(mask, (v16i8)alpha, (v16i8)vec2); - ST_UB4(dst0, dst1, dst2, dst3, dst_argb, 16); - src_raw += 48; - dst_argb += 64; - } -} - -void ARGB1555ToYRow_MSA(const uint8_t* src_argb1555, - uint8_t* dst_y, - int width) { - int x; - v16u8 src0, src1, tmp0, tmp1, tmpb, tmpg, tmpr; - v16u8 reg0, reg1, reg2, dst; - v8i16 tmpr_l, tmpr_r, tmpg_l, tmpg_r, tmpb_l, tmpb_r; - v8i16 res0, res1; - v8i16 const_66 = (v8i16)__msa_ldi_h(66); - v8i16 const_129 = (v8i16)__msa_ldi_h(129); - v8i16 const_25 = (v8i16)__msa_ldi_h(25); - v8u16 const_1080 = (v8u16)__msa_fill_h(0x1080); - v16u8 zero = (v16u8)__msa_ldi_b(0); - - for (x = 0; x < width; x += 16) { - src0 = (v16u8)__msa_ld_b((void*)src_argb1555, 0); - src1 = (v16u8)__msa_ld_b((void*)src_argb1555, 16); - tmp0 = (v16u8)__msa_pckev_b(src1, src0); - tmp1 = (v16u8)__msa_pckod_b(src1, src0); - tmpb = (v16u8)__msa_andi_b(tmp0, 0x1F); - tmpg = (v16u8)__msa_srli_b(tmp0, 5); - reg0 = (v16u8)__msa_andi_b(tmp1, 0x03); - reg0 = (v16u8)__msa_slli_b(reg0, 3); - tmpg = (v16u8)__msa_or_v(tmpg, reg0); - reg1 = (v16u8)__msa_andi_b(tmp1, 0x7C); - tmpr = (v16u8)__msa_srli_b(reg1, 2); - reg0 = (v16u8)__msa_slli_b(tmpb, 3); - reg1 = (v16u8)__msa_slli_b(tmpg, 3); - reg2 = (v16u8)__msa_slli_b(tmpr, 3); - tmpb = (v16u8)__msa_srli_b(tmpb, 2); - tmpg = (v16u8)__msa_srli_b(tmpg, 2); - tmpr = (v16u8)__msa_srli_b(tmpr, 2); - tmpb = (v16u8)__msa_or_v(reg0, tmpb); - tmpg = (v16u8)__msa_or_v(reg1, tmpg); - tmpr = (v16u8)__msa_or_v(reg2, tmpr); - tmpb_r = (v8i16)__msa_ilvr_b(zero, tmpb); - tmpb_l = (v8i16)__msa_ilvl_b(zero, tmpb); - tmpg_r = (v8i16)__msa_ilvr_b(zero, tmpg); - tmpg_l = (v8i16)__msa_ilvl_b(zero, tmpg); - tmpr_r = (v8i16)__msa_ilvr_b(zero, tmpr); - tmpr_l = (v8i16)__msa_ilvl_b(zero, tmpr); - res0 = const_1080 + const_25 * tmpb_r; - res1 = const_1080 + const_25 * tmpb_l; - res0 += const_129 * tmpg_r; - res1 += const_129 * tmpg_l; - res0 += const_66 * tmpr_r; - res1 += const_66 * tmpr_l; - dst = (v16u8)__msa_pckod_b(res1, res0); - ST_UB(dst, dst_y); - src_argb1555 += 32; - dst_y += 16; - } -} - -void RGB565ToYRow_MSA(const uint8_t* src_rgb565, uint8_t* dst_y, int width) { - int x; - v16u8 src0, src1, tmp0, tmp1, tmpb, tmpg, tmpr; - v16u8 reg0, reg1, dst; - v8i16 tmpr_l, tmpr_r, tmpg_l, tmpg_r, tmpb_l, tmpb_r; - v8i16 res0, res1; - v8i16 const_66 = (v8i16)__msa_ldi_h(66); - v8i16 const_129 = (v8i16)__msa_ldi_h(129); - v8i16 const_25 = (v8i16)__msa_ldi_h(25); - v8i16 const_1080 = (v8i16)__msa_fill_h(0x1080); - v16u8 zero = __msa_ldi_b(0); - - for (x = 0; x < width; x += 16) { - src0 = (v16u8)__msa_ld_b((void*)src_rgb565, 0); - src1 = (v16u8)__msa_ld_b((void*)src_rgb565, 16); - tmp0 = (v16u8)__msa_pckev_b(src1, src0); - tmp1 = (v16u8)__msa_pckod_b(src1, src0); - tmpb = (v16u8)__msa_andi_b(tmp0, 0x1F); - tmpr = (v16u8)__msa_andi_b(tmp1, 0xF8); - reg1 = (v16u8)__msa_andi_b(tmp1, 0x07); - reg0 = (v16u8)__msa_srli_b(tmp0, 5); - reg1 = (v16u8)__msa_slli_b(reg1, 3); - tmpg = (v16u8)__msa_or_v(reg1, reg0); - reg0 = (v16u8)__msa_slli_b(tmpb, 3); - reg1 = (v16u8)__msa_srli_b(tmpb, 2); - tmpb = (v16u8)__msa_or_v(reg1, reg0); - reg0 = (v16u8)__msa_slli_b(tmpg, 2); - reg1 = (v16u8)__msa_srli_b(tmpg, 4); - tmpg = (v16u8)__msa_or_v(reg1, reg0); - reg0 = (v16u8)__msa_srli_b(tmpr, 5); - tmpr = (v16u8)__msa_or_v(tmpr, reg0); - tmpb_r = (v8i16)__msa_ilvr_b(zero, tmpb); - tmpb_l = (v8i16)__msa_ilvl_b(zero, tmpb); - tmpg_r = (v8i16)__msa_ilvr_b(zero, tmpg); - tmpg_l = (v8i16)__msa_ilvl_b(zero, tmpg); - tmpr_r = (v8i16)__msa_ilvr_b(zero, tmpr); - tmpr_l = (v8i16)__msa_ilvl_b(zero, tmpr); - res0 = const_1080 + const_25 * tmpb_r; - res1 = const_1080 + const_25 * tmpb_l; - res0 += const_129 * tmpg_r; - res1 += const_129 * tmpg_l; - res0 += const_66 * tmpr_r; - res1 += const_66 * tmpr_l; - dst = (v16u8)__msa_pckod_b(res1, res0); - ST_UB(dst, dst_y); - src_rgb565 += 32; - dst_y += 16; - } -} - -void RGB24ToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width) { - int x; - v16u8 src0, src1, src2, reg0, reg1, reg2, reg3, dst0; - v8u16 vec0, vec1, vec2, vec3; - v8u16 const_0x8119 = (v8u16)__msa_fill_h(0x8119); - v8u16 const_0x42 = (v8u16)__msa_fill_h(0x42); - v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080); - v16i8 mask0 = {0, 1, 2, 3, 3, 4, 5, 6, 6, 7, 8, 9, 9, 10, 11, 12}; - v16i8 mask1 = {12, 13, 14, 15, 15, 16, 17, 18, - 18, 19, 20, 21, 21, 22, 23, 24}; - v16i8 mask2 = {8, 9, 10, 11, 11, 12, 13, 14, 14, 15, 16, 17, 17, 18, 19, 20}; - v16i8 mask3 = {4, 5, 6, 7, 7, 8, 9, 10, 10, 11, 12, 13, 13, 14, 15, 16}; - v16i8 zero = {0}; - - for (x = 0; x < width; x += 16) { - src0 = (v16u8)__msa_ld_b((void*)src_argb, 0); - src1 = (v16u8)__msa_ld_b((void*)src_argb, 16); - src2 = (v16u8)__msa_ld_b((void*)src_argb, 32); - reg0 = (v16u8)__msa_vshf_b(mask0, zero, (v16i8)src0); - reg1 = (v16u8)__msa_vshf_b(mask1, (v16i8)src1, (v16i8)src0); - reg2 = (v16u8)__msa_vshf_b(mask2, (v16i8)src2, (v16i8)src1); - reg3 = (v16u8)__msa_vshf_b(mask3, zero, (v16i8)src2); - vec0 = (v8u16)__msa_pckev_h((v8i16)reg1, (v8i16)reg0); - vec1 = (v8u16)__msa_pckev_h((v8i16)reg3, (v8i16)reg2); - vec2 = (v8u16)__msa_pckod_h((v8i16)reg1, (v8i16)reg0); - vec3 = (v8u16)__msa_pckod_h((v8i16)reg3, (v8i16)reg2); - vec0 = __msa_dotp_u_h((v16u8)vec0, (v16u8)const_0x8119); - vec1 = __msa_dotp_u_h((v16u8)vec1, (v16u8)const_0x8119); - vec0 = __msa_dpadd_u_h(vec0, (v16u8)vec2, (v16u8)const_0x42); - vec1 = __msa_dpadd_u_h(vec1, (v16u8)vec3, (v16u8)const_0x42); - vec0 += const_0x1080; - vec1 += const_0x1080; - vec0 = (v8u16)__msa_srai_h((v8i16)vec0, 8); - vec1 = (v8u16)__msa_srai_h((v8i16)vec1, 8); - dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); - ST_UB(dst0, dst_y); - src_argb += 48; - dst_y += 16; - } -} - -void RAWToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width) { - int x; - v16u8 src0, src1, src2, reg0, reg1, reg2, reg3, dst0; - v8u16 vec0, vec1, vec2, vec3; - v8u16 const_0x8142 = (v8u16)__msa_fill_h(0x8142); - v8u16 const_0x19 = (v8u16)__msa_fill_h(0x19); - v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080); - v16i8 mask0 = {0, 1, 2, 3, 3, 4, 5, 6, 6, 7, 8, 9, 9, 10, 11, 12}; - v16i8 mask1 = {12, 13, 14, 15, 15, 16, 17, 18, - 18, 19, 20, 21, 21, 22, 23, 24}; - v16i8 mask2 = {8, 9, 10, 11, 11, 12, 13, 14, 14, 15, 16, 17, 17, 18, 19, 20}; - v16i8 mask3 = {4, 5, 6, 7, 7, 8, 9, 10, 10, 11, 12, 13, 13, 14, 15, 16}; - v16i8 zero = {0}; - - for (x = 0; x < width; x += 16) { - src0 = (v16u8)__msa_ld_b((void*)src_argb, 0); - src1 = (v16u8)__msa_ld_b((void*)src_argb, 16); - src2 = (v16u8)__msa_ld_b((void*)src_argb, 32); - reg0 = (v16u8)__msa_vshf_b(mask0, zero, (v16i8)src0); - reg1 = (v16u8)__msa_vshf_b(mask1, (v16i8)src1, (v16i8)src0); - reg2 = (v16u8)__msa_vshf_b(mask2, (v16i8)src2, (v16i8)src1); - reg3 = (v16u8)__msa_vshf_b(mask3, zero, (v16i8)src2); - vec0 = (v8u16)__msa_pckev_h((v8i16)reg1, (v8i16)reg0); - vec1 = (v8u16)__msa_pckev_h((v8i16)reg3, (v8i16)reg2); - vec2 = (v8u16)__msa_pckod_h((v8i16)reg1, (v8i16)reg0); - vec3 = (v8u16)__msa_pckod_h((v8i16)reg3, (v8i16)reg2); - vec0 = __msa_dotp_u_h((v16u8)vec0, (v16u8)const_0x8142); - vec1 = __msa_dotp_u_h((v16u8)vec1, (v16u8)const_0x8142); - vec0 = __msa_dpadd_u_h(vec0, (v16u8)vec2, (v16u8)const_0x19); - vec1 = __msa_dpadd_u_h(vec1, (v16u8)vec3, (v16u8)const_0x19); - vec0 += const_0x1080; - vec1 += const_0x1080; - vec0 = (v8u16)__msa_srai_h((v8i16)vec0, 8); - vec1 = (v8u16)__msa_srai_h((v8i16)vec1, 8); - dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); - ST_UB(dst0, dst_y); - src_argb += 48; - dst_y += 16; - } -} - -void ARGB1555ToUVRow_MSA(const uint8_t* src_argb1555, - int src_stride_argb1555, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - int x; - const uint16_t* s = (const uint16_t*)src_argb1555; - const uint16_t* t = (const uint16_t*)(src_argb1555 + src_stride_argb1555); - int64_t res0, res1; - v16u8 src0, src1, src2, src3, dst; - v16u8 tmp0, tmp1, tmp2, tmp3; - v16u8 reg0, reg1, reg2, reg3; - v16u8 tmpb, tmpg, tmpr, nexb, nexg, nexr; - v8i16 const_112 = (v8i16)__msa_ldi_h(0x38); - v8i16 const_74 = (v8i16)__msa_ldi_h(0x25); - v8i16 const_38 = (v8i16)__msa_ldi_h(0x13); - v8i16 const_94 = (v8i16)__msa_ldi_h(0x2F); - v8i16 const_18 = (v8i16)__msa_ldi_h(0x09); - v8u16 const_8080 = (v8u16)__msa_fill_h(0x8080); - - for (x = 0; x < width; x += 16) { - src0 = (v8u16)__msa_ld_b((void*)s, 0); - src1 = (v8u16)__msa_ld_b((void*)s, 16); - src2 = (v8u16)__msa_ld_b((void*)t, 0); - src3 = (v8u16)__msa_ld_b((void*)t, 16); - tmp0 = (v16u8)__msa_pckev_b(src1, src0); - tmp1 = (v16u8)__msa_pckod_b(src1, src0); - tmp2 = (v16u8)__msa_pckev_b(src3, src2); - tmp3 = (v16u8)__msa_pckod_b(src3, src2); - tmpb = (v16u8)__msa_andi_b(tmp0, 0x1F); - nexb = (v16u8)__msa_andi_b(tmp2, 0x1F); - tmpg = (v16u8)__msa_srli_b(tmp0, 5); - nexg = (v16u8)__msa_srli_b(tmp2, 5); - reg0 = (v16u8)__msa_andi_b(tmp1, 0x03); - reg2 = (v16u8)__msa_andi_b(tmp3, 0x03); - reg0 = (v16u8)__msa_slli_b(reg0, 3); - reg2 = (v16u8)__msa_slli_b(reg2, 3); - tmpg = (v16u8)__msa_or_v(tmpg, reg0); - nexg = (v16u8)__msa_or_v(nexg, reg2); - reg1 = (v16u8)__msa_andi_b(tmp1, 0x7C); - reg3 = (v16u8)__msa_andi_b(tmp3, 0x7C); - tmpr = (v16u8)__msa_srli_b(reg1, 2); - nexr = (v16u8)__msa_srli_b(reg3, 2); - reg0 = (v16u8)__msa_slli_b(tmpb, 3); - reg1 = (v16u8)__msa_slli_b(tmpg, 3); - reg2 = (v16u8)__msa_slli_b(tmpr, 3); - tmpb = (v16u8)__msa_srli_b(tmpb, 2); - tmpg = (v16u8)__msa_srli_b(tmpg, 2); - tmpr = (v16u8)__msa_srli_b(tmpr, 2); - tmpb = (v16u8)__msa_or_v(reg0, tmpb); - tmpg = (v16u8)__msa_or_v(reg1, tmpg); - tmpr = (v16u8)__msa_or_v(reg2, tmpr); - reg0 = (v16u8)__msa_slli_b(nexb, 3); - reg1 = (v16u8)__msa_slli_b(nexg, 3); - reg2 = (v16u8)__msa_slli_b(nexr, 3); - nexb = (v16u8)__msa_srli_b(nexb, 2); - nexg = (v16u8)__msa_srli_b(nexg, 2); - nexr = (v16u8)__msa_srli_b(nexr, 2); - nexb = (v16u8)__msa_or_v(reg0, nexb); - nexg = (v16u8)__msa_or_v(reg1, nexg); - nexr = (v16u8)__msa_or_v(reg2, nexr); - RGBTOUV(tmpb, tmpg, tmpr, nexb, nexg, nexr, dst); - res0 = __msa_copy_u_d((v2i64)dst, 0); - res1 = __msa_copy_u_d((v2i64)dst, 1); - SD(res0, dst_u); - SD(res1, dst_v); - s += 16; - t += 16; - dst_u += 8; - dst_v += 8; - } -} - -void RGB565ToUVRow_MSA(const uint8_t* src_rgb565, - int src_stride_rgb565, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - int x; - const uint16_t* s = (const uint16_t*)src_rgb565; - const uint16_t* t = (const uint16_t*)(src_rgb565 + src_stride_rgb565); - int64_t res0, res1; - v16u8 src0, src1, src2, src3, dst; - v16u8 tmp0, tmp1, tmp2, tmp3; - v16u8 reg0, reg1, reg2, reg3; - v16u8 tmpb, tmpg, tmpr, nexb, nexg, nexr; - v8i16 const_112 = (v8i16)__msa_ldi_h(0x38); - v8i16 const_74 = (v8i16)__msa_ldi_h(0x25); - v8i16 const_38 = (v8i16)__msa_ldi_h(0x13); - v8i16 const_94 = (v8i16)__msa_ldi_h(0x2F); - v8i16 const_18 = (v8i16)__msa_ldi_h(0x09); - v8u16 const_8080 = (v8u16)__msa_fill_h(0x8080); - - for (x = 0; x < width; x += 16) { - src0 = (v16u8)__msa_ld_b((void*)s, 0); - src1 = (v16u8)__msa_ld_b((void*)s, 16); - src2 = (v16u8)__msa_ld_b((void*)t, 0); - src3 = (v16u8)__msa_ld_b((void*)t, 16); - tmp0 = (v16u8)__msa_pckev_b(src1, src0); - tmp1 = (v16u8)__msa_pckod_b(src1, src0); - tmp2 = (v16u8)__msa_pckev_b(src3, src2); - tmp3 = (v16u8)__msa_pckod_b(src3, src2); - tmpb = (v16u8)__msa_andi_b(tmp0, 0x1F); - tmpr = (v16u8)__msa_andi_b(tmp1, 0xF8); - nexb = (v16u8)__msa_andi_b(tmp2, 0x1F); - nexr = (v16u8)__msa_andi_b(tmp3, 0xF8); - reg1 = (v16u8)__msa_andi_b(tmp1, 0x07); - reg3 = (v16u8)__msa_andi_b(tmp3, 0x07); - reg0 = (v16u8)__msa_srli_b(tmp0, 5); - reg1 = (v16u8)__msa_slli_b(reg1, 3); - reg2 = (v16u8)__msa_srli_b(tmp2, 5); - reg3 = (v16u8)__msa_slli_b(reg3, 3); - tmpg = (v16u8)__msa_or_v(reg1, reg0); - nexg = (v16u8)__msa_or_v(reg2, reg3); - reg0 = (v16u8)__msa_slli_b(tmpb, 3); - reg1 = (v16u8)__msa_srli_b(tmpb, 2); - reg2 = (v16u8)__msa_slli_b(nexb, 3); - reg3 = (v16u8)__msa_srli_b(nexb, 2); - tmpb = (v16u8)__msa_or_v(reg1, reg0); - nexb = (v16u8)__msa_or_v(reg2, reg3); - reg0 = (v16u8)__msa_slli_b(tmpg, 2); - reg1 = (v16u8)__msa_srli_b(tmpg, 4); - reg2 = (v16u8)__msa_slli_b(nexg, 2); - reg3 = (v16u8)__msa_srli_b(nexg, 4); - tmpg = (v16u8)__msa_or_v(reg1, reg0); - nexg = (v16u8)__msa_or_v(reg2, reg3); - reg0 = (v16u8)__msa_srli_b(tmpr, 5); - reg2 = (v16u8)__msa_srli_b(nexr, 5); - tmpr = (v16u8)__msa_or_v(tmpr, reg0); - nexr = (v16u8)__msa_or_v(nexr, reg2); - RGBTOUV(tmpb, tmpg, tmpr, nexb, nexg, nexr, dst); - res0 = __msa_copy_u_d((v2i64)dst, 0); - res1 = __msa_copy_u_d((v2i64)dst, 1); - SD(res0, dst_u); - SD(res1, dst_v); - s += 16; - t += 16; - dst_u += 8; - dst_v += 8; - } -} - -void RGB24ToUVRow_MSA(const uint8_t* src_rgb, - int src_stride_rgb, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - int x; - const uint8_t* s = src_rgb; - const uint8_t* t = src_rgb + src_stride_rgb; - int64_t res0, res1; - v16u8 src0, src1, src2, src3, src4, src5, src6, src7; - v16u8 inp0, inp1, inp2, inp3, inp4, inp5; - v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; - v8i16 reg0, reg1, reg2, reg3; - v16u8 dst0; - v8u16 const_0x70 = (v8u16)__msa_fill_h(0x38); - v8u16 const_0x4A = (v8u16)__msa_fill_h(0x25); - v8u16 const_0x26 = (v8u16)__msa_fill_h(0x13); - v8u16 const_0x5E = (v8u16)__msa_fill_h(0x2f); - v8u16 const_0x12 = (v8u16)__msa_fill_h(0x09); - v8u16 const_0x8080 = (v8u16)__msa_fill_h(0x8080); - v8u16 const_0x0001 = (v8u16)__msa_fill_h(0x0001); - v16i8 mask = {0, 1, 2, 16, 3, 4, 5, 17, 6, 7, 8, 18, 9, 10, 11, 19}; - v16i8 zero = {0}; - - for (x = 0; x < width; x += 16) { - inp0 = (v16u8)__msa_ld_b((void*)s, 0); - inp1 = (v16u8)__msa_ld_b((void*)s, 16); - inp2 = (v16u8)__msa_ld_b((void*)s, 32); - inp3 = (v16u8)__msa_ld_b((void*)t, 0); - inp4 = (v16u8)__msa_ld_b((void*)t, 16); - inp5 = (v16u8)__msa_ld_b((void*)t, 32); - src1 = (v16u8)__msa_sldi_b((v16i8)inp1, (v16i8)inp0, 12); - src5 = (v16u8)__msa_sldi_b((v16i8)inp4, (v16i8)inp3, 12); - src2 = (v16u8)__msa_sldi_b((v16i8)inp2, (v16i8)inp1, 8); - src6 = (v16u8)__msa_sldi_b((v16i8)inp5, (v16i8)inp4, 8); - src3 = (v16u8)__msa_sldi_b((v16i8)inp2, (v16i8)inp2, 4); - src7 = (v16u8)__msa_sldi_b((v16i8)inp5, (v16i8)inp5, 4); - src0 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)inp0); - src1 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src1); - src2 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src2); - src3 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src3); - src4 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)inp3); - src5 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src5); - src6 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src6); - src7 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src7); - vec0 = (v8u16)__msa_ilvr_b((v16i8)src4, (v16i8)src0); - vec1 = (v8u16)__msa_ilvl_b((v16i8)src4, (v16i8)src0); - vec2 = (v8u16)__msa_ilvr_b((v16i8)src5, (v16i8)src1); - vec3 = (v8u16)__msa_ilvl_b((v16i8)src5, (v16i8)src1); - vec4 = (v8u16)__msa_ilvr_b((v16i8)src6, (v16i8)src2); - vec5 = (v8u16)__msa_ilvl_b((v16i8)src6, (v16i8)src2); - vec6 = (v8u16)__msa_ilvr_b((v16i8)src7, (v16i8)src3); - vec7 = (v8u16)__msa_ilvl_b((v16i8)src7, (v16i8)src3); - vec0 = (v8u16)__msa_hadd_u_h((v16u8)vec0, (v16u8)vec0); - vec1 = (v8u16)__msa_hadd_u_h((v16u8)vec1, (v16u8)vec1); - vec2 = (v8u16)__msa_hadd_u_h((v16u8)vec2, (v16u8)vec2); - vec3 = (v8u16)__msa_hadd_u_h((v16u8)vec3, (v16u8)vec3); - vec4 = (v8u16)__msa_hadd_u_h((v16u8)vec4, (v16u8)vec4); - vec5 = (v8u16)__msa_hadd_u_h((v16u8)vec5, (v16u8)vec5); - vec6 = (v8u16)__msa_hadd_u_h((v16u8)vec6, (v16u8)vec6); - vec7 = (v8u16)__msa_hadd_u_h((v16u8)vec7, (v16u8)vec7); - reg0 = (v8i16)__msa_pckev_d((v2i64)vec1, (v2i64)vec0); - reg1 = (v8i16)__msa_pckev_d((v2i64)vec3, (v2i64)vec2); - reg2 = (v8i16)__msa_pckev_d((v2i64)vec5, (v2i64)vec4); - reg3 = (v8i16)__msa_pckev_d((v2i64)vec7, (v2i64)vec6); - reg0 += (v8i16)__msa_pckod_d((v2i64)vec1, (v2i64)vec0); - reg1 += (v8i16)__msa_pckod_d((v2i64)vec3, (v2i64)vec2); - reg2 += (v8i16)__msa_pckod_d((v2i64)vec5, (v2i64)vec4); - reg3 += (v8i16)__msa_pckod_d((v2i64)vec7, (v2i64)vec6); - reg0 += const_0x0001; - reg1 += const_0x0001; - reg2 += const_0x0001; - reg3 += const_0x0001; - reg0 = __msa_srai_h((v8i16)reg0, 1); - reg1 = __msa_srai_h((v8i16)reg1, 1); - reg2 = __msa_srai_h((v8i16)reg2, 1); - reg3 = __msa_srai_h((v8i16)reg3, 1); - vec4 = (v8u16)__msa_pckev_h(reg1, reg0); - vec5 = (v8u16)__msa_pckev_h(reg3, reg2); - vec6 = (v8u16)__msa_pckod_h(reg1, reg0); - vec7 = (v8u16)__msa_pckod_h(reg3, reg2); - vec0 = (v8u16)__msa_pckev_h((v8i16)vec5, (v8i16)vec4); - vec1 = (v8u16)__msa_pckev_h((v8i16)vec7, (v8i16)vec6); - vec2 = (v8u16)__msa_pckod_h((v8i16)vec5, (v8i16)vec4); - vec3 = vec0 * const_0x70; - vec4 = vec1 * const_0x4A; - vec5 = vec2 * const_0x26; - vec2 *= const_0x70; - vec1 *= const_0x5E; - vec0 *= const_0x12; - reg0 = __msa_subv_h((v8i16)vec3, (v8i16)vec4); - reg1 = __msa_subv_h((v8i16)const_0x8080, (v8i16)vec5); - reg2 = __msa_subv_h((v8i16)vec2, (v8i16)vec1); - reg3 = __msa_subv_h((v8i16)const_0x8080, (v8i16)vec0); - reg0 += reg1; - reg2 += reg3; - reg0 = __msa_srai_h(reg0, 8); - reg2 = __msa_srai_h(reg2, 8); - dst0 = (v16u8)__msa_pckev_b((v16i8)reg2, (v16i8)reg0); - res0 = __msa_copy_u_d((v2i64)dst0, 0); - res1 = __msa_copy_u_d((v2i64)dst0, 1); - SD(res0, dst_u); - SD(res1, dst_v); - t += 48; - s += 48; - dst_u += 8; - dst_v += 8; - } -} - -void RAWToUVRow_MSA(const uint8_t* src_rgb, - int src_stride_rgb, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - int x; - const uint8_t* s = src_rgb; - const uint8_t* t = src_rgb + src_stride_rgb; - int64_t res0, res1; - v16u8 inp0, inp1, inp2, inp3, inp4, inp5; - v16u8 src0, src1, src2, src3, src4, src5, src6, src7; - v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; - v8i16 reg0, reg1, reg2, reg3; - v16u8 dst0; - v8u16 const_0x70 = (v8u16)__msa_fill_h(0x38); - v8u16 const_0x4A = (v8u16)__msa_fill_h(0x25); - v8u16 const_0x26 = (v8u16)__msa_fill_h(0x13); - v8u16 const_0x5E = (v8u16)__msa_fill_h(0x2f); - v8u16 const_0x12 = (v8u16)__msa_fill_h(0x09); - v8u16 const_0x8080 = (v8u16)__msa_fill_h(0x8080); - v8u16 const_0x0001 = (v8u16)__msa_fill_h(0x0001); - v16i8 mask = {0, 1, 2, 16, 3, 4, 5, 17, 6, 7, 8, 18, 9, 10, 11, 19}; - v16i8 zero = {0}; - - for (x = 0; x < width; x += 16) { - inp0 = (v16u8)__msa_ld_b((void*)s, 0); - inp1 = (v16u8)__msa_ld_b((void*)s, 16); - inp2 = (v16u8)__msa_ld_b((void*)s, 32); - inp3 = (v16u8)__msa_ld_b((void*)t, 0); - inp4 = (v16u8)__msa_ld_b((void*)t, 16); - inp5 = (v16u8)__msa_ld_b((void*)t, 32); - src1 = (v16u8)__msa_sldi_b((v16i8)inp1, (v16i8)inp0, 12); - src5 = (v16u8)__msa_sldi_b((v16i8)inp4, (v16i8)inp3, 12); - src2 = (v16u8)__msa_sldi_b((v16i8)inp2, (v16i8)inp1, 8); - src6 = (v16u8)__msa_sldi_b((v16i8)inp5, (v16i8)inp4, 8); - src3 = (v16u8)__msa_sldi_b((v16i8)inp2, (v16i8)inp2, 4); - src7 = (v16u8)__msa_sldi_b((v16i8)inp5, (v16i8)inp5, 4); - src0 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)inp0); - src1 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src1); - src2 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src2); - src3 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src3); - src4 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)inp3); - src5 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src5); - src6 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src6); - src7 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src7); - vec0 = (v8u16)__msa_ilvr_b((v16i8)src4, (v16i8)src0); - vec1 = (v8u16)__msa_ilvl_b((v16i8)src4, (v16i8)src0); - vec2 = (v8u16)__msa_ilvr_b((v16i8)src5, (v16i8)src1); - vec3 = (v8u16)__msa_ilvl_b((v16i8)src5, (v16i8)src1); - vec4 = (v8u16)__msa_ilvr_b((v16i8)src6, (v16i8)src2); - vec5 = (v8u16)__msa_ilvl_b((v16i8)src6, (v16i8)src2); - vec6 = (v8u16)__msa_ilvr_b((v16i8)src7, (v16i8)src3); - vec7 = (v8u16)__msa_ilvl_b((v16i8)src7, (v16i8)src3); - vec0 = (v8u16)__msa_hadd_u_h((v16u8)vec0, (v16u8)vec0); - vec1 = (v8u16)__msa_hadd_u_h((v16u8)vec1, (v16u8)vec1); - vec2 = (v8u16)__msa_hadd_u_h((v16u8)vec2, (v16u8)vec2); - vec3 = (v8u16)__msa_hadd_u_h((v16u8)vec3, (v16u8)vec3); - vec4 = (v8u16)__msa_hadd_u_h((v16u8)vec4, (v16u8)vec4); - vec5 = (v8u16)__msa_hadd_u_h((v16u8)vec5, (v16u8)vec5); - vec6 = (v8u16)__msa_hadd_u_h((v16u8)vec6, (v16u8)vec6); - vec7 = (v8u16)__msa_hadd_u_h((v16u8)vec7, (v16u8)vec7); - reg0 = (v8i16)__msa_pckev_d((v2i64)vec1, (v2i64)vec0); - reg1 = (v8i16)__msa_pckev_d((v2i64)vec3, (v2i64)vec2); - reg2 = (v8i16)__msa_pckev_d((v2i64)vec5, (v2i64)vec4); - reg3 = (v8i16)__msa_pckev_d((v2i64)vec7, (v2i64)vec6); - reg0 += (v8i16)__msa_pckod_d((v2i64)vec1, (v2i64)vec0); - reg1 += (v8i16)__msa_pckod_d((v2i64)vec3, (v2i64)vec2); - reg2 += (v8i16)__msa_pckod_d((v2i64)vec5, (v2i64)vec4); - reg3 += (v8i16)__msa_pckod_d((v2i64)vec7, (v2i64)vec6); - reg0 += const_0x0001; - reg1 += const_0x0001; - reg2 += const_0x0001; - reg3 += const_0x0001; - reg0 = __msa_srai_h(reg0, 1); - reg1 = __msa_srai_h(reg1, 1); - reg2 = __msa_srai_h(reg2, 1); - reg3 = __msa_srai_h(reg3, 1); - vec4 = (v8u16)__msa_pckev_h((v8i16)reg1, (v8i16)reg0); - vec5 = (v8u16)__msa_pckev_h((v8i16)reg3, (v8i16)reg2); - vec6 = (v8u16)__msa_pckod_h((v8i16)reg1, (v8i16)reg0); - vec7 = (v8u16)__msa_pckod_h((v8i16)reg3, (v8i16)reg2); - vec0 = (v8u16)__msa_pckod_h((v8i16)vec5, (v8i16)vec4); - vec1 = (v8u16)__msa_pckev_h((v8i16)vec7, (v8i16)vec6); - vec2 = (v8u16)__msa_pckev_h((v8i16)vec5, (v8i16)vec4); - vec3 = vec0 * const_0x70; - vec4 = vec1 * const_0x4A; - vec5 = vec2 * const_0x26; - vec2 *= const_0x70; - vec1 *= const_0x5E; - vec0 *= const_0x12; - reg0 = __msa_subv_h((v8i16)vec3, (v8i16)vec4); - reg1 = __msa_subv_h((v8i16)const_0x8080, (v8i16)vec5); - reg2 = __msa_subv_h((v8i16)vec2, (v8i16)vec1); - reg3 = __msa_subv_h((v8i16)const_0x8080, (v8i16)vec0); - reg0 += reg1; - reg2 += reg3; - reg0 = __msa_srai_h(reg0, 8); - reg2 = __msa_srai_h(reg2, 8); - dst0 = (v16u8)__msa_pckev_b((v16i8)reg2, (v16i8)reg0); - res0 = __msa_copy_u_d((v2i64)dst0, 0); - res1 = __msa_copy_u_d((v2i64)dst0, 1); - SD(res0, dst_u); - SD(res1, dst_v); - t += 48; - s += 48; - dst_u += 8; - dst_v += 8; - } -} - -void NV12ToARGBRow_MSA(const uint8_t* src_y, - const uint8_t* src_uv, - uint8_t* dst_argb, - const struct YuvConstants* yuvconstants, - int width) { - int x; - uint64_t val0, val1; - v16u8 src0, src1, res0, res1, dst0, dst1; - v8i16 vec0, vec1, vec2; - v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_yg, vec_yb; - v4i32 vec_ubvr, vec_ugvg; - v16u8 zero = {0}; - v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); - v8i16 const_0x80 = __msa_ldi_h(0x80); - - YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_yg, vec_yb); - vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub); - vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug); - - for (x = 0; x < width; x += 8) { - val0 = LD(src_y); - val1 = LD(src_uv); - src0 = (v16u8)__msa_insert_d((v2i64)zero, 0, val0); - src1 = (v16u8)__msa_insert_d((v2i64)zero, 0, val1); - YUVTORGB(src0, src1, vec_ubvr, vec_ugvg, vec_yg, vec_yb, vec0, vec1, vec2); - res0 = (v16u8)__msa_ilvev_b((v16i8)vec2, (v16i8)vec0); - res1 = (v16u8)__msa_ilvev_b((v16i8)alpha, (v16i8)vec1); - dst0 = (v16u8)__msa_ilvr_b((v16i8)res1, (v16i8)res0); - dst1 = (v16u8)__msa_ilvl_b((v16i8)res1, (v16i8)res0); - ST_UB2(dst0, dst1, dst_argb, 16); - src_y += 8; - src_uv += 8; - dst_argb += 32; - } -} - -void NV12ToRGB565Row_MSA(const uint8_t* src_y, - const uint8_t* src_uv, - uint8_t* dst_rgb565, - const struct YuvConstants* yuvconstants, - int width) { - int x; - uint64_t val0, val1; - v16u8 src0, src1, dst0; - v8i16 vec0, vec1, vec2; - v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_yg, vec_yb; - v4i32 vec_ubvr, vec_ugvg; - v8i16 const_0x80 = __msa_ldi_h(0x80); - v16u8 zero = {0}; - - YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_yg, vec_yb); - vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub); - vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug); - - for (x = 0; x < width; x += 8) { - val0 = LD(src_y); - val1 = LD(src_uv); - src0 = (v16u8)__msa_insert_d((v2i64)zero, 0, val0); - src1 = (v16u8)__msa_insert_d((v2i64)zero, 0, val1); - YUVTORGB(src0, src1, vec_ubvr, vec_ugvg, vec_yg, vec_yb, vec0, vec1, vec2); - vec0 = vec0 >> 3; - vec1 = (vec1 >> 2) << 5; - vec2 = (vec2 >> 3) << 11; - dst0 = (v16u8)(vec0 | vec1 | vec2); - ST_UB(dst0, dst_rgb565); - src_y += 8; - src_uv += 8; - dst_rgb565 += 16; - } -} - -void NV21ToARGBRow_MSA(const uint8_t* src_y, - const uint8_t* src_vu, - uint8_t* dst_argb, - const struct YuvConstants* yuvconstants, - int width) { - int x; - uint64_t val0, val1; - v16u8 src0, src1, res0, res1, dst0, dst1; - v8i16 vec0, vec1, vec2; - v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_yg, vec_yb; - v4i32 vec_ubvr, vec_ugvg; - v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); - v16u8 zero = {0}; - v16i8 shuffler = {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}; - v8i16 const_0x80 = __msa_ldi_h(0x80); - - YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_yg, vec_yb); - vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub); - vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug); - - for (x = 0; x < width; x += 8) { - val0 = LD(src_y); - val1 = LD(src_vu); - src0 = (v16u8)__msa_insert_d((v2i64)zero, 0, val0); - src1 = (v16u8)__msa_insert_d((v2i64)zero, 0, val1); - src1 = (v16u8)__msa_vshf_b(shuffler, (v16i8)src1, (v16i8)src1); - YUVTORGB(src0, src1, vec_ubvr, vec_ugvg, vec_yg, vec_yb, vec0, vec1, vec2); - res0 = (v16u8)__msa_ilvev_b((v16i8)vec2, (v16i8)vec0); - res1 = (v16u8)__msa_ilvev_b((v16i8)alpha, (v16i8)vec1); - dst0 = (v16u8)__msa_ilvr_b((v16i8)res1, (v16i8)res0); - dst1 = (v16u8)__msa_ilvl_b((v16i8)res1, (v16i8)res0); - ST_UB2(dst0, dst1, dst_argb, 16); - src_y += 8; - src_vu += 8; - dst_argb += 32; - } -} - -void SobelRow_MSA(const uint8_t* src_sobelx, - const uint8_t* src_sobely, - uint8_t* dst_argb, - int width) { - int x; - v16u8 src0, src1, vec0, dst0, dst1, dst2, dst3; - v16i8 mask0 = {0, 0, 0, 16, 1, 1, 1, 16, 2, 2, 2, 16, 3, 3, 3, 16}; - v16i8 const_0x4 = __msa_ldi_b(0x4); - v16i8 mask1 = mask0 + const_0x4; - v16i8 mask2 = mask1 + const_0x4; - v16i8 mask3 = mask2 + const_0x4; - v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); - - for (x = 0; x < width; x += 16) { - src0 = (v16u8)__msa_ld_b((void*)src_sobelx, 0); - src1 = (v16u8)__msa_ld_b((void*)src_sobely, 0); - vec0 = __msa_adds_u_b(src0, src1); - dst0 = (v16u8)__msa_vshf_b(mask0, (v16i8)alpha, (v16i8)vec0); - dst1 = (v16u8)__msa_vshf_b(mask1, (v16i8)alpha, (v16i8)vec0); - dst2 = (v16u8)__msa_vshf_b(mask2, (v16i8)alpha, (v16i8)vec0); - dst3 = (v16u8)__msa_vshf_b(mask3, (v16i8)alpha, (v16i8)vec0); - ST_UB4(dst0, dst1, dst2, dst3, dst_argb, 16); - src_sobelx += 16; - src_sobely += 16; - dst_argb += 64; - } -} - -void SobelToPlaneRow_MSA(const uint8_t* src_sobelx, - const uint8_t* src_sobely, - uint8_t* dst_y, - int width) { - int x; - v16u8 src0, src1, src2, src3, dst0, dst1; - - for (x = 0; x < width; x += 32) { - src0 = (v16u8)__msa_ld_b((void*)src_sobelx, 0); - src1 = (v16u8)__msa_ld_b((void*)src_sobelx, 16); - src2 = (v16u8)__msa_ld_b((void*)src_sobely, 0); - src3 = (v16u8)__msa_ld_b((void*)src_sobely, 16); - dst0 = __msa_adds_u_b(src0, src2); - dst1 = __msa_adds_u_b(src1, src3); - ST_UB2(dst0, dst1, dst_y, 16); - src_sobelx += 32; - src_sobely += 32; - dst_y += 32; - } -} - -void SobelXYRow_MSA(const uint8_t* src_sobelx, - const uint8_t* src_sobely, - uint8_t* dst_argb, - int width) { - int x; - v16u8 src0, src1, vec0, vec1, vec2; - v16u8 reg0, reg1, dst0, dst1, dst2, dst3; - v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); - - for (x = 0; x < width; x += 16) { - src0 = (v16u8)__msa_ld_b((void*)src_sobelx, 0); - src1 = (v16u8)__msa_ld_b((void*)src_sobely, 0); - vec0 = __msa_adds_u_b(src0, src1); - vec1 = (v16u8)__msa_ilvr_b((v16i8)src0, (v16i8)src1); - vec2 = (v16u8)__msa_ilvl_b((v16i8)src0, (v16i8)src1); - reg0 = (v16u8)__msa_ilvr_b((v16i8)alpha, (v16i8)vec0); - reg1 = (v16u8)__msa_ilvl_b((v16i8)alpha, (v16i8)vec0); - dst0 = (v16u8)__msa_ilvr_b((v16i8)reg0, (v16i8)vec1); - dst1 = (v16u8)__msa_ilvl_b((v16i8)reg0, (v16i8)vec1); - dst2 = (v16u8)__msa_ilvr_b((v16i8)reg1, (v16i8)vec2); - dst3 = (v16u8)__msa_ilvl_b((v16i8)reg1, (v16i8)vec2); - ST_UB4(dst0, dst1, dst2, dst3, dst_argb, 16); - src_sobelx += 16; - src_sobely += 16; - dst_argb += 64; - } -} - -void ARGBToYJRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width) { - int x; - v16u8 src0, src1, src2, src3, dst0; - v16u8 const_0x961D = (v16u8)__msa_fill_h(0x961D); - v16u8 const_0x4D = (v16u8)__msa_fill_h(0x4D); - v8u16 const_0x80 = (v8u16)__msa_fill_h(0x80); - - for (x = 0; x < width; x += 16) { - src0 = (v16u8)__msa_ld_b((void*)src_argb, 0); - src1 = (v16u8)__msa_ld_b((void*)src_argb, 16); - src2 = (v16u8)__msa_ld_b((void*)src_argb, 32); - src3 = (v16u8)__msa_ld_b((void*)src_argb, 48); - ARGBTOY(src0, src1, src2, src3, const_0x961D, const_0x4D, const_0x80, 8, - dst0); - ST_UB(dst0, dst_y); - src_argb += 64; - dst_y += 16; - } -} - -void BGRAToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width) { - int x; - v16u8 src0, src1, src2, src3, dst0; - v16u8 const_0x4200 = (v16u8)__msa_fill_h(0x4200); - v16u8 const_0x1981 = (v16u8)__msa_fill_h(0x1981); - v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080); - - for (x = 0; x < width; x += 16) { - src0 = (v16u8)__msa_ld_b((void*)src_argb, 0); - src1 = (v16u8)__msa_ld_b((void*)src_argb, 16); - src2 = (v16u8)__msa_ld_b((void*)src_argb, 32); - src3 = (v16u8)__msa_ld_b((void*)src_argb, 48); - ARGBTOY(src0, src1, src2, src3, const_0x4200, const_0x1981, const_0x1080, 8, - dst0); - ST_UB(dst0, dst_y); - src_argb += 64; - dst_y += 16; - } -} - -void ABGRToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width) { - int x; - v16u8 src0, src1, src2, src3, dst0; - v16u8 const_0x8142 = (v16u8)__msa_fill_h(0x8142); - v16u8 const_0x19 = (v16u8)__msa_fill_h(0x19); - v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080); - - for (x = 0; x < width; x += 16) { - src0 = (v16u8)__msa_ld_b((void*)src_argb, 0); - src1 = (v16u8)__msa_ld_b((void*)src_argb, 16); - src2 = (v16u8)__msa_ld_b((void*)src_argb, 32); - src3 = (v16u8)__msa_ld_b((void*)src_argb, 48); - ARGBTOY(src0, src1, src2, src3, const_0x8142, const_0x19, const_0x1080, 8, - dst0); - ST_UB(dst0, dst_y); - src_argb += 64; - dst_y += 16; - } -} - -void RGBAToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width) { - int x; - v16u8 src0, src1, src2, src3, dst0; - v16u8 const_0x1900 = (v16u8)__msa_fill_h(0x1900); - v16u8 const_0x4281 = (v16u8)__msa_fill_h(0x4281); - v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080); - - for (x = 0; x < width; x += 16) { - src0 = (v16u8)__msa_ld_b((void*)src_argb, 0); - src1 = (v16u8)__msa_ld_b((void*)src_argb, 16); - src2 = (v16u8)__msa_ld_b((void*)src_argb, 32); - src3 = (v16u8)__msa_ld_b((void*)src_argb, 48); - ARGBTOY(src0, src1, src2, src3, const_0x1900, const_0x4281, const_0x1080, 8, - dst0); - ST_UB(dst0, dst_y); - src_argb += 64; - dst_y += 16; - } -} - -void ARGBToUVJRow_MSA(const uint8_t* src_rgb, - int src_stride_rgb, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - int x; - const uint8_t* s = src_rgb; - const uint8_t* t = src_rgb + src_stride_rgb; - v8u16 src0, src1, src2, src3, src4, src5, src6, src7; - v8u16 vec0, vec1, vec2, vec3; - v8u16 dst0, dst1, dst2, dst3; - v16u8 zero = {0}; - v8i16 shuffler0 = {0, 3, 4, 7, 8, 11, 12, 15}; - v8i16 shuffler1 = {1, 2, 5, 6, 9, 10, 13, 14}; - v8i16 shuffler2 = {2, 3, 6, 7, 10, 11, 14, 15}; - v8i16 shuffler3 = {0, 1, 4, 5, 8, 9, 12, 13}; - v8u16 const_0x0000003f = (v8u16)__msa_fill_w(0x0000003f); - v4u32 const_0x00008080 = (v8u16)__msa_fill_w(0x00008080); - v8u16 const_0x0015002a = (v8u16)__msa_fill_w(0x0015002a); - v8u16 const_0x0035000a = (v8u16)__msa_fill_w(0x0035000a); - v4i32 shift = __msa_fill_w(0x00000008); - - for (x = 0; x < width; x += 32) { - src1 = __msa_ld_b((void*)s, 0); - src3 = __msa_ld_b((void*)s, 16); - src5 = __msa_ld_b((void*)t, 0); - src7 = __msa_ld_b((void*)t, 16); - src0 = __msa_ilvr_b(zero, src1); - src1 = __msa_ilvl_b(zero, src1); - src2 = __msa_ilvr_b(zero, src3); - src3 = __msa_ilvl_b(zero, src3); - src4 = __msa_ilvr_b(zero, src5); - src5 = __msa_ilvl_b(zero, src5); - src6 = __msa_ilvr_b(zero, src7); - src7 = __msa_ilvl_b(zero, src7); - src0 += src4; - src1 += src5; - src2 += src6; - src3 += src7; - src4 = __msa_ilvev_d(src1, src0); - src5 = __msa_ilvod_d(src1, src0); - src6 = __msa_ilvev_d(src3, src2); - src7 = __msa_ilvod_d(src3, src2); - vec0 = __msa_aver_u_h(src4, src5); - vec1 = __msa_aver_u_h(src6, src7); - - src1 = __msa_ld_b((void*)s, 32); - src3 = __msa_ld_b((void*)s, 48); - src5 = __msa_ld_b((void*)t, 32); - src7 = __msa_ld_b((void*)t, 48); - src0 = __msa_ilvr_b(zero, src1); - src1 = __msa_ilvl_b(zero, src1); - src2 = __msa_ilvr_b(zero, src3); - src3 = __msa_ilvl_b(zero, src3); - src4 = __msa_ilvr_b(zero, src5); - src5 = __msa_ilvl_b(zero, src5); - src6 = __msa_ilvr_b(zero, src7); - src7 = __msa_ilvl_b(zero, src7); - src0 += src4; - src1 += src5; - src2 += src6; - src3 += src7; - src4 = __msa_ilvev_d(src1, src0); - src5 = __msa_ilvod_d(src1, src0); - src6 = __msa_ilvev_d(src3, src2); - src7 = __msa_ilvod_d(src3, src2); - vec2 = __msa_aver_u_h(src4, src5); - vec3 = __msa_aver_u_h(src6, src7); - ARGBTOUV(vec0, vec1, vec2, vec3, const_0x0000003f, const_0x00008080, - const_0x0015002a, const_0x0035000a, shuffler0, shuffler1, - shuffler2, shuffler3, shift, dst0, dst1); - - src1 = __msa_ld_b((void*)s, 64); - src3 = __msa_ld_b((void*)s, 80); - src5 = __msa_ld_b((void*)t, 64); - src7 = __msa_ld_b((void*)t, 80); - src0 = __msa_ilvr_b(zero, src1); - src1 = __msa_ilvl_b(zero, src1); - src2 = __msa_ilvr_b(zero, src3); - src3 = __msa_ilvl_b(zero, src3); - src4 = __msa_ilvr_b(zero, src5); - src5 = __msa_ilvl_b(zero, src5); - src6 = __msa_ilvr_b(zero, src7); - src7 = __msa_ilvl_b(zero, src7); - src0 += src4; - src1 += src5; - src2 += src6; - src3 += src7; - src4 = __msa_ilvev_d(src1, src0); - src5 = __msa_ilvod_d(src1, src0); - src6 = __msa_ilvev_d(src3, src2); - src7 = __msa_ilvod_d(src3, src2); - vec0 = __msa_aver_u_h(src4, src5); - vec1 = __msa_aver_u_h(src6, src7); - - src1 = __msa_ld_b((void*)s, 96); - src3 = __msa_ld_b((void*)s, 112); - src5 = __msa_ld_b((void*)t, 96); - src7 = __msa_ld_b((void*)t, 112); - src0 = __msa_ilvr_b(zero, src1); - src1 = __msa_ilvl_b(zero, src1); - src2 = __msa_ilvr_b(zero, src3); - src3 = __msa_ilvl_b(zero, src3); - src4 = __msa_ilvr_b(zero, src5); - src5 = __msa_ilvl_b(zero, src5); - src6 = __msa_ilvr_b(zero, src7); - src7 = __msa_ilvl_b(zero, src7); - src0 += src4; - src1 += src5; - src2 += src6; - src3 += src7; - src4 = __msa_ilvev_d(src1, src0); - src5 = __msa_ilvod_d(src1, src0); - src6 = __msa_ilvev_d(src3, src2); - src7 = __msa_ilvod_d(src3, src2); - vec2 = __msa_aver_u_h(src4, src5); - vec3 = __msa_aver_u_h(src6, src7); - ARGBTOUV(vec0, vec1, vec2, vec3, const_0x0000003f, const_0x00008080, - const_0x0015002a, const_0x0035000a, shuffler0, shuffler1, - shuffler2, shuffler3, shift, dst2, dst3); - - dst0 = (v8u16)__msa_pckev_b(dst2, dst0); - dst1 = (v8u16)__msa_pckev_b(dst3, dst1); - ST_UB(dst0, dst_u); - ST_UB(dst1, dst_v); - s += 128; - t += 128; - dst_v += 16; - dst_u += 16; - } -} - -void BGRAToUVRow_MSA(const uint8_t* src_rgb, - int src_stride_rgb, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - int x; - const uint8_t* s = src_rgb; - const uint8_t* t = src_rgb + src_stride_rgb; - const uint8_t unused = 0xf; - v8u16 src0, src1, src2, src3; - v16u8 dst0, dst1; - v8i16 shuffler0 = {1, unused, 5, unused, 9, unused, 13, unused}; - v8i16 shuffler1 = {2, 3, 6, 7, 10, 11, 14, 15}; - v8i16 shuffler2 = {3, unused, 7, unused, 11, unused, 15, unused}; - v8i16 shuffler3 = {1, 2, 5, 6, 9, 10, 13, 14}; - v8u16 const_0x09002f = (v8u16)__msa_fill_w(0x09002f); - v8u16 const_0x000038 = (v8u16)__msa_fill_w(0x0038); - v8u16 const_0x250013 = (v8u16)__msa_fill_w(0x250013); - v4u32 const_0x008080 = (v4u32)__msa_fill_w(0x8080); - v8u16 const_0x0001 = (v8u16)__msa_fill_h(0x0001); - - for (x = 0; x < width; x += 16) { - READ_ARGB(s, t, src0, src1, src2, src3, const_0x0001); - ARGBTOUV_H(src0, src1, src2, src3, const_0x09002f, const_0x000038, - const_0x250013, const_0x008080, shuffler0, shuffler1, shuffler2, - shuffler3, dst0, dst1); - *((uint64_t*)dst_v) = __msa_copy_u_d((v2i64)dst0, 0); - *((uint64_t*)dst_u) = __msa_copy_u_d((v2i64)dst1, 0); - s += 64; - t += 64; - dst_u += 8; - dst_v += 8; - } -} - -void ABGRToUVRow_MSA(const uint8_t* src_rgb, - int src_stride_rgb, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - int x; - const uint8_t* s = src_rgb; - const uint8_t* t = src_rgb + src_stride_rgb; - const uint8_t unused = 0xf; - v8u16 src0, src1, src2, src3; - v16u8 dst0, dst1; - v8i16 shuffler0 = {0, unused, 4, unused, 8, unused, 12, unused}; - v8i16 shuffler1 = {1, 2, 5, 6, 9, 10, 13, 14}; - v8i16 shuffler2 = {2, unused, 6, unused, 10, unused, 14, unused}; - v8i16 shuffler3 = {0, 1, 4, 5, 8, 9, 12, 13}; - v8u16 const_0x09002f = (v8u16)__msa_fill_w(0x09002f); - v8u16 const_0x000038 = (v8u16)__msa_fill_w(0x0038); - v8u16 const_0x250013 = (v8u16)__msa_fill_w(0x250013); - v4u32 const_0x008080 = (v4u32)__msa_fill_w(0x8080); - v8u16 const_0x0001 = (v8u16)__msa_fill_h(0x0001); - - for (x = 0; x < width; x += 16) { - READ_ARGB(s, t, src0, src1, src2, src3, const_0x0001); - ARGBTOUV_H(src0, src1, src2, src3, const_0x09002f, const_0x000038, - const_0x250013, const_0x008080, shuffler0, shuffler1, shuffler2, - shuffler3, dst0, dst1); - *((uint64_t*)dst_v) = __msa_copy_u_d((v2i64)dst0, 0); - *((uint64_t*)dst_u) = __msa_copy_u_d((v2i64)dst1, 0); - s += 64; - t += 64; - dst_u += 8; - dst_v += 8; - } -} - -void RGBAToUVRow_MSA(const uint8_t* src_rgb, - int src_stride_rgb, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - int x; - const uint8_t* s = src_rgb; - const uint8_t* t = src_rgb + src_stride_rgb; - const uint8_t unused = 0xf; - v8u16 src0, src1, src2, src3; - v16u8 dst0, dst1; - v8i16 shuffler0 = {3, unused, 7, unused, 11, unused, 15, unused}; - v8i16 shuffler1 = {2, 1, 6, 5, 10, 9, 14, 13}; - v8i16 shuffler2 = {1, unused, 5, unused, 9, unused, 13, unused}; - v8i16 shuffler3 = {3, 2, 7, 6, 11, 10, 15, 14}; - v8u16 const_0x09002f = (v8u16)__msa_fill_w(0x09002f); - v8u16 const_0x000038 = (v8u16)__msa_fill_w(0x0038); - v8u16 const_0x250013 = (v8u16)__msa_fill_w(0x250013); - v4u32 const_0x008080 = (v4u32)__msa_fill_w(0x8080); - v8u16 const_0x0001 = (v8u16)__msa_fill_h(0x0001); - - for (x = 0; x < width; x += 16) { - READ_ARGB(s, t, src0, src1, src2, src3, const_0x0001); - ARGBTOUV_H(src0, src1, src2, src3, const_0x09002f, const_0x000038, - const_0x250013, const_0x008080, shuffler0, shuffler1, shuffler2, - shuffler3, dst0, dst1); - *((uint64_t*)dst_v) = __msa_copy_u_d((v2i64)dst0, 0); - *((uint64_t*)dst_u) = __msa_copy_u_d((v2i64)dst1, 0); - s += 64; - t += 64; - dst_u += 8; - dst_v += 8; - } -} - -void I444ToARGBRow_MSA(const uint8_t* src_y, - const uint8_t* src_u, - const uint8_t* src_v, - uint8_t* dst_argb, - const struct YuvConstants* yuvconstants, - int width) { - int x; - v16u8 src0, src1, src2, dst0, dst1; - v8i16 vec0, vec1, vec2; - v4i32 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8, reg9; - v4i32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5; - v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_yg, vec_yb; - v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); - v8i16 zero = {0}; - v4i32 const_0x80 = __msa_fill_w(0x80); - - YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_yg, vec_yb); - - for (x = 0; x < width; x += 8) { - READI444(src_y, src_u, src_v, src0, src1, src2); - vec0 = (v8i16)__msa_ilvr_b((v16i8)src0, (v16i8)src0); - reg0 = (v4i32)__msa_ilvr_h((v8i16)zero, (v8i16)vec0); - reg1 = (v4i32)__msa_ilvl_h((v8i16)zero, (v8i16)vec0); - reg0 *= vec_yg; - reg1 *= vec_yg; - reg0 = __msa_srai_w(reg0, 16); - reg1 = __msa_srai_w(reg1, 16); - reg0 += vec_yb; - reg1 += vec_yb; - vec0 = (v8u16)__msa_ilvr_b((v16i8)zero, (v16i8)src1); - vec1 = (v8u16)__msa_ilvr_b((v16i8)zero, (v16i8)src2); - reg6 = (v4i32)__msa_ilvr_h((v8i16)zero, (v8i16)vec0); - reg7 = (v4i32)__msa_ilvl_h((v8i16)zero, (v8i16)vec0); - reg8 = (v4i32)__msa_ilvr_h((v8i16)zero, (v8i16)vec1); - reg9 = (v4i32)__msa_ilvl_h((v8i16)zero, (v8i16)vec1); - reg6 -= const_0x80; - reg7 -= const_0x80; - reg8 -= const_0x80; - reg9 -= const_0x80; - tmp0 = reg0 + reg6 * vec_ub; - tmp1 = reg1 + reg7 * vec_ub; - tmp2 = reg0 + reg8 * vec_vr; - tmp3 = reg1 + reg9 * vec_vr; - tmp4 = reg6 * vec_ug; - tmp5 = reg7 * vec_ug; - tmp4 += reg8 * vec_vg; - tmp5 += reg9 * vec_vg; - tmp4 = reg0 - tmp4; - tmp5 = reg1 - tmp5; - reg0 = __msa_srai_w(tmp0, 6); - reg1 = __msa_srai_w(tmp1, 6); - reg2 = __msa_srai_w(tmp2, 6); - reg3 = __msa_srai_w(tmp3, 6); - reg4 = __msa_srai_w(tmp4, 6); - reg5 = __msa_srai_w(tmp5, 6); - CLIP_0TO255(reg0, reg1, reg2, reg3, reg4, reg5); - vec0 = (v8u16)__msa_pckev_h((v8i16)reg1, (v8i16)reg0); - vec1 = (v8u16)__msa_pckev_h((v8i16)reg5, (v8i16)reg4); - vec2 = (v8u16)__msa_pckev_h((v8i16)reg3, (v8i16)reg2); - vec0 = (v8u16)__msa_ilvev_b((v16i8)vec1, (v16i8)vec0); - vec1 = (v8u16)__msa_ilvev_b((v16i8)alpha, (v16i8)vec2); - dst0 = (v16u8)__msa_ilvr_h((v8i16)vec1, (v8i16)vec0); - dst1 = (v16u8)__msa_ilvl_h((v8i16)vec1, (v8i16)vec0); - ST_UB2(dst0, dst1, dst_argb, 16); - src_y += 8; - src_u += 8; - src_v += 8; - dst_argb += 32; - } -} - -// TODO - respect YuvConstants -void I400ToARGBRow_MSA(const uint8_t* src_y, - uint8_t* dst_argb, - const struct YuvConstants* yuvconstants, - int width) { - int x; -#if defined(__aarch64__) || defined(__arm__) - int ygb = yuvconstants->kUVBiasBGR[3]; - int yg = yuvconstants->kYToRgb[1]; -#else - int ygb = yuvconstants->kYBiasToRgb[0]; - int yg = yuvconstants->kYToRgb[0]; -#endif - v16u8 src0, res0, res1, res2, res3, res4, dst0, dst1, dst2, dst3; - v8i16 vec0, vec1; - v4i32 reg0, reg1, reg2, reg3; - v4i32 vec_yg = __msa_fill_w(yg); - v8i16 vec_ygb = __msa_fill_h(ygb); - v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); - v8i16 max = __msa_ldi_h(0xFF); - v8i16 zero = {0}; - - for (x = 0; x < width; x += 16) { - src0 = (v16u8)__msa_ld_b((void*)src_y, 0); - vec0 = (v8i16)__msa_ilvr_b((v16i8)src0, (v16i8)src0); - vec1 = (v8i16)__msa_ilvl_b((v16i8)src0, (v16i8)src0); - reg0 = (v4i32)__msa_ilvr_h(zero, vec0); - reg1 = (v4i32)__msa_ilvl_h(zero, vec0); - reg2 = (v4i32)__msa_ilvr_h(zero, vec1); - reg3 = (v4i32)__msa_ilvl_h(zero, vec1); - reg0 *= vec_yg; - reg1 *= vec_yg; - reg2 *= vec_yg; - reg3 *= vec_yg; - reg0 = __msa_srai_w(reg0, 16); - reg1 = __msa_srai_w(reg1, 16); - reg2 = __msa_srai_w(reg2, 16); - reg3 = __msa_srai_w(reg3, 16); - vec0 = (v8i16)__msa_pckev_h((v8i16)reg1, (v8i16)reg0); - vec1 = (v8i16)__msa_pckev_h((v8i16)reg3, (v8i16)reg2); - vec0 += vec_ygb; - vec1 += vec_ygb; - vec0 = __msa_srai_h(vec0, 6); - vec1 = __msa_srai_h(vec1, 6); - vec0 = __msa_maxi_s_h(vec0, 0); - vec1 = __msa_maxi_s_h(vec1, 0); - vec0 = __msa_min_s_h(max, vec0); - vec1 = __msa_min_s_h(max, vec1); - res0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); - res1 = (v16u8)__msa_ilvr_b((v16i8)res0, (v16i8)res0); - res2 = (v16u8)__msa_ilvl_b((v16i8)res0, (v16i8)res0); - res3 = (v16u8)__msa_ilvr_b((v16i8)alpha, (v16i8)res0); - res4 = (v16u8)__msa_ilvl_b((v16i8)alpha, (v16i8)res0); - dst0 = (v16u8)__msa_ilvr_b((v16i8)res3, (v16i8)res1); - dst1 = (v16u8)__msa_ilvl_b((v16i8)res3, (v16i8)res1); - dst2 = (v16u8)__msa_ilvr_b((v16i8)res4, (v16i8)res2); - dst3 = (v16u8)__msa_ilvl_b((v16i8)res4, (v16i8)res2); - ST_UB4(dst0, dst1, dst2, dst3, dst_argb, 16); - src_y += 16; - dst_argb += 64; - } -} - -void J400ToARGBRow_MSA(const uint8_t* src_y, uint8_t* dst_argb, int width) { - int x; - v16u8 src0, vec0, vec1, vec2, vec3, dst0, dst1, dst2, dst3; - v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); - - for (x = 0; x < width; x += 16) { - src0 = (v16u8)__msa_ld_b((void*)src_y, 0); - vec0 = (v16u8)__msa_ilvr_b((v16i8)src0, (v16i8)src0); - vec1 = (v16u8)__msa_ilvl_b((v16i8)src0, (v16i8)src0); - vec2 = (v16u8)__msa_ilvr_b((v16i8)alpha, (v16i8)src0); - vec3 = (v16u8)__msa_ilvl_b((v16i8)alpha, (v16i8)src0); - dst0 = (v16u8)__msa_ilvr_b((v16i8)vec2, (v16i8)vec0); - dst1 = (v16u8)__msa_ilvl_b((v16i8)vec2, (v16i8)vec0); - dst2 = (v16u8)__msa_ilvr_b((v16i8)vec3, (v16i8)vec1); - dst3 = (v16u8)__msa_ilvl_b((v16i8)vec3, (v16i8)vec1); - ST_UB4(dst0, dst1, dst2, dst3, dst_argb, 16); - src_y += 16; - dst_argb += 64; - } -} - -void YUY2ToARGBRow_MSA(const uint8_t* src_yuy2, - uint8_t* dst_argb, - const struct YuvConstants* yuvconstants, - int width) { - int x; - v16u8 src0, src1, src2; - v8i16 vec0, vec1, vec2; - v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_yg, vec_yb; - v4i32 vec_ubvr, vec_ugvg; - v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); - v8i16 const_0x80 = __msa_ldi_h(0x80); - - YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_yg, vec_yb); - vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub); - vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug); - - for (x = 0; x < width; x += 8) { - src0 = (v16u8)__msa_ld_b((void*)src_yuy2, 0); - src1 = (v16u8)__msa_pckev_b((v16i8)src0, (v16i8)src0); - src2 = (v16u8)__msa_pckod_b((v16i8)src0, (v16i8)src0); - YUVTORGB(src1, src2, vec_ubvr, vec_ugvg, vec_yg, vec_yb, vec0, vec1, vec2); - STOREARGB(vec0, vec1, vec2, alpha, dst_argb); - src_yuy2 += 16; - dst_argb += 32; - } -} - -void UYVYToARGBRow_MSA(const uint8_t* src_uyvy, - uint8_t* dst_argb, - const struct YuvConstants* yuvconstants, - int width) { - int x; - v16u8 src0, src1, src2; - v8i16 vec0, vec1, vec2; - v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_yg, vec_yb; - v4i32 vec_ubvr, vec_ugvg; - v8i16 const_0x80 = __msa_ldi_h(0x80); - v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); - - YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_yg, vec_yb); - vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub); - vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug); - - for (x = 0; x < width; x += 8) { - src0 = (v16u8)__msa_ld_b((void*)src_uyvy, 0); - src1 = (v16u8)__msa_pckod_b((v16i8)src0, (v16i8)src0); - src2 = (v16u8)__msa_pckev_b((v16i8)src0, (v16i8)src0); - YUVTORGB(src1, src2, vec_ubvr, vec_ugvg, vec_yg, vec_yb, vec0, vec1, vec2); - STOREARGB(vec0, vec1, vec2, alpha, dst_argb); - src_uyvy += 16; - dst_argb += 32; - } -} - -void InterpolateRow_MSA(uint8_t* dst_ptr, - const uint8_t* src_ptr, - ptrdiff_t src_stride, - int width, - int32_t source_y_fraction) { - int32_t y1_fraction = source_y_fraction; - int32_t y0_fraction = 256 - y1_fraction; - uint16_t y_fractions; - const uint8_t* s = src_ptr; - const uint8_t* t = src_ptr + src_stride; - int x; - v16u8 src0, src1, src2, src3, dst0, dst1; - v8u16 vec0, vec1, vec2, vec3, y_frac; - - if (0 == y1_fraction) { - memcpy(dst_ptr, src_ptr, width); - return; - } - - if (128 == y1_fraction) { - for (x = 0; x < width; x += 32) { - src0 = (v16u8)__msa_ld_b((void*)s, 0); - src1 = (v16u8)__msa_ld_b((void*)s, 16); - src2 = (v16u8)__msa_ld_b((void*)t, 0); - src3 = (v16u8)__msa_ld_b((void*)t, 16); - dst0 = __msa_aver_u_b(src0, src2); - dst1 = __msa_aver_u_b(src1, src3); - ST_UB2(dst0, dst1, dst_ptr, 16); - s += 32; - t += 32; - dst_ptr += 32; - } - return; - } - - y_fractions = (uint16_t)(y0_fraction + (y1_fraction << 8)); - y_frac = (v8u16)__msa_fill_h(y_fractions); - - for (x = 0; x < width; x += 32) { - src0 = (v16u8)__msa_ld_b((void*)s, 0); - src1 = (v16u8)__msa_ld_b((void*)s, 16); - src2 = (v16u8)__msa_ld_b((void*)t, 0); - src3 = (v16u8)__msa_ld_b((void*)t, 16); - vec0 = (v8u16)__msa_ilvr_b((v16i8)src2, (v16i8)src0); - vec1 = (v8u16)__msa_ilvl_b((v16i8)src2, (v16i8)src0); - vec2 = (v8u16)__msa_ilvr_b((v16i8)src3, (v16i8)src1); - vec3 = (v8u16)__msa_ilvl_b((v16i8)src3, (v16i8)src1); - vec0 = (v8u16)__msa_dotp_u_h((v16u8)vec0, (v16u8)y_frac); - vec1 = (v8u16)__msa_dotp_u_h((v16u8)vec1, (v16u8)y_frac); - vec2 = (v8u16)__msa_dotp_u_h((v16u8)vec2, (v16u8)y_frac); - vec3 = (v8u16)__msa_dotp_u_h((v16u8)vec3, (v16u8)y_frac); - vec0 = (v8u16)__msa_srari_h((v8i16)vec0, 8); - vec1 = (v8u16)__msa_srari_h((v8i16)vec1, 8); - vec2 = (v8u16)__msa_srari_h((v8i16)vec2, 8); - vec3 = (v8u16)__msa_srari_h((v8i16)vec3, 8); - dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); - dst1 = (v16u8)__msa_pckev_b((v16i8)vec3, (v16i8)vec2); - ST_UB2(dst0, dst1, dst_ptr, 16); - s += 32; - t += 32; - dst_ptr += 32; - } -} - -void ARGBSetRow_MSA(uint8_t* dst_argb, uint32_t v32, int width) { - int x; - v4i32 dst0 = __builtin_msa_fill_w(v32); - - for (x = 0; x < width; x += 4) { - ST_UB(dst0, dst_argb); - dst_argb += 16; - } -} - -void RAWToRGB24Row_MSA(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) { - int x; - v16u8 src0, src1, src2, src3, src4, dst0, dst1, dst2; - v16i8 shuffler0 = {2, 1, 0, 5, 4, 3, 8, 7, 6, 11, 10, 9, 14, 13, 12, 17}; - v16i8 shuffler1 = {8, 7, 12, 11, 10, 15, 14, 13, - 18, 17, 16, 21, 20, 19, 24, 23}; - v16i8 shuffler2 = {14, 19, 18, 17, 22, 21, 20, 25, - 24, 23, 28, 27, 26, 31, 30, 29}; - - for (x = 0; x < width; x += 16) { - src0 = (v16u8)__msa_ld_b((void*)src_raw, 0); - src1 = (v16u8)__msa_ld_b((void*)src_raw, 16); - src2 = (v16u8)__msa_ld_b((void*)src_raw, 32); - src3 = (v16u8)__msa_sldi_b((v16i8)src1, (v16i8)src0, 8); - src4 = (v16u8)__msa_sldi_b((v16i8)src2, (v16i8)src1, 8); - dst0 = (v16u8)__msa_vshf_b(shuffler0, (v16i8)src1, (v16i8)src0); - dst1 = (v16u8)__msa_vshf_b(shuffler1, (v16i8)src4, (v16i8)src3); - dst2 = (v16u8)__msa_vshf_b(shuffler2, (v16i8)src2, (v16i8)src1); - ST_UB2(dst0, dst1, dst_rgb24, 16); - ST_UB(dst2, (dst_rgb24 + 32)); - src_raw += 48; - dst_rgb24 += 48; - } -} - -void MergeUVRow_MSA(const uint8_t* src_u, - const uint8_t* src_v, - uint8_t* dst_uv, - int width) { - int x; - v16u8 src0, src1, dst0, dst1; - - for (x = 0; x < width; x += 16) { - src0 = (v16u8)__msa_ld_b((void*)src_u, 0); - src1 = (v16u8)__msa_ld_b((void*)src_v, 0); - dst0 = (v16u8)__msa_ilvr_b((v16i8)src1, (v16i8)src0); - dst1 = (v16u8)__msa_ilvl_b((v16i8)src1, (v16i8)src0); - ST_UB2(dst0, dst1, dst_uv, 16); - src_u += 16; - src_v += 16; - dst_uv += 32; - } -} - -void ARGBExtractAlphaRow_MSA(const uint8_t* src_argb, - uint8_t* dst_a, - int width) { - int i; - v16u8 src0, src1, src2, src3, vec0, vec1, dst0; - - for (i = 0; i < width; i += 16) { - src0 = (v16u8)__msa_ld_b((void*)src_argb, 0); - src1 = (v16u8)__msa_ld_b((void*)src_argb, 16); - src2 = (v16u8)__msa_ld_b((void*)src_argb, 32); - src3 = (v16u8)__msa_ld_b((void*)src_argb, 48); - vec0 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); - vec1 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2); - dst0 = (v16u8)__msa_pckod_b((v16i8)vec1, (v16i8)vec0); - ST_UB(dst0, dst_a); - src_argb += 64; - dst_a += 16; - } -} - -void ARGBBlendRow_MSA(const uint8_t* src_argb, - const uint8_t* src_argb1, - uint8_t* dst_argb, - int width) { - int x; - v16u8 src0, src1, src2, src3, dst0, dst1, dst2, dst3; - v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; - v8u16 vec8, vec9, vec10, vec11, vec12, vec13; - v8u16 const_256 = (v8u16)__msa_ldi_h(256); - v16u8 const_255 = (v16u8)__msa_ldi_b(255); - v16u8 mask = {0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255}; - v16i8 zero = {0}; - - for (x = 0; x < width; x += 8) { - src0 = (v16u8)__msa_ld_b((void*)src_argb, 0); - src1 = (v16u8)__msa_ld_b((void*)src_argb, 16); - src2 = (v16u8)__msa_ld_b((void*)src_argb1, 0); - src3 = (v16u8)__msa_ld_b((void*)src_argb1, 16); - vec0 = (v8u16)__msa_ilvr_b(zero, (v16i8)src0); - vec1 = (v8u16)__msa_ilvl_b(zero, (v16i8)src0); - vec2 = (v8u16)__msa_ilvr_b(zero, (v16i8)src1); - vec3 = (v8u16)__msa_ilvl_b(zero, (v16i8)src1); - vec4 = (v8u16)__msa_ilvr_b(zero, (v16i8)src2); - vec5 = (v8u16)__msa_ilvl_b(zero, (v16i8)src2); - vec6 = (v8u16)__msa_ilvr_b(zero, (v16i8)src3); - vec7 = (v8u16)__msa_ilvl_b(zero, (v16i8)src3); - vec8 = (v8u16)__msa_fill_h(vec0[3]); - vec9 = (v8u16)__msa_fill_h(vec0[7]); - vec10 = (v8u16)__msa_fill_h(vec1[3]); - vec11 = (v8u16)__msa_fill_h(vec1[7]); - vec8 = (v8u16)__msa_pckev_d((v2i64)vec9, (v2i64)vec8); - vec9 = (v8u16)__msa_pckev_d((v2i64)vec11, (v2i64)vec10); - vec10 = (v8u16)__msa_fill_h(vec2[3]); - vec11 = (v8u16)__msa_fill_h(vec2[7]); - vec12 = (v8u16)__msa_fill_h(vec3[3]); - vec13 = (v8u16)__msa_fill_h(vec3[7]); - vec10 = (v8u16)__msa_pckev_d((v2i64)vec11, (v2i64)vec10); - vec11 = (v8u16)__msa_pckev_d((v2i64)vec13, (v2i64)vec12); - vec8 = const_256 - vec8; - vec9 = const_256 - vec9; - vec10 = const_256 - vec10; - vec11 = const_256 - vec11; - vec8 *= vec4; - vec9 *= vec5; - vec10 *= vec6; - vec11 *= vec7; - vec8 = (v8u16)__msa_srai_h((v8i16)vec8, 8); - vec9 = (v8u16)__msa_srai_h((v8i16)vec9, 8); - vec10 = (v8u16)__msa_srai_h((v8i16)vec10, 8); - vec11 = (v8u16)__msa_srai_h((v8i16)vec11, 8); - dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); - dst1 = (v16u8)__msa_pckev_b((v16i8)vec3, (v16i8)vec2); - dst2 = (v16u8)__msa_pckev_b((v16i8)vec9, (v16i8)vec8); - dst3 = (v16u8)__msa_pckev_b((v16i8)vec11, (v16i8)vec10); - dst0 = (v16u8)__msa_adds_u_b(dst0, dst2); - dst1 = (v16u8)__msa_adds_u_b(dst1, dst3); - dst0 = __msa_bmnz_v(dst0, const_255, mask); - dst1 = __msa_bmnz_v(dst1, const_255, mask); - ST_UB2(dst0, dst1, dst_argb, 16); - src_argb += 32; - src_argb1 += 32; - dst_argb += 32; - } -} - -void ARGBQuantizeRow_MSA(uint8_t* dst_argb, - int scale, - int interval_size, - int interval_offset, - int width) { - int x; - v16u8 src0, src1, src2, src3, dst0, dst1, dst2, dst3; - v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; - v4i32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; - v4i32 tmp8, tmp9, tmp10, tmp11, tmp12, tmp13, tmp14, tmp15; - v4i32 vec_scale = __msa_fill_w(scale); - v16u8 vec_int_sz = (v16u8)__msa_fill_b(interval_size); - v16u8 vec_int_ofst = (v16u8)__msa_fill_b(interval_offset); - v16i8 mask = {0, 1, 2, 19, 4, 5, 6, 23, 8, 9, 10, 27, 12, 13, 14, 31}; - v16i8 zero = {0}; - - for (x = 0; x < width; x += 16) { - src0 = (v16u8)__msa_ld_b((void*)dst_argb, 0); - src1 = (v16u8)__msa_ld_b((void*)dst_argb, 16); - src2 = (v16u8)__msa_ld_b((void*)dst_argb, 32); - src3 = (v16u8)__msa_ld_b((void*)dst_argb, 48); - vec0 = (v8i16)__msa_ilvr_b(zero, (v16i8)src0); - vec1 = (v8i16)__msa_ilvl_b(zero, (v16i8)src0); - vec2 = (v8i16)__msa_ilvr_b(zero, (v16i8)src1); - vec3 = (v8i16)__msa_ilvl_b(zero, (v16i8)src1); - vec4 = (v8i16)__msa_ilvr_b(zero, (v16i8)src2); - vec5 = (v8i16)__msa_ilvl_b(zero, (v16i8)src2); - vec6 = (v8i16)__msa_ilvr_b(zero, (v16i8)src3); - vec7 = (v8i16)__msa_ilvl_b(zero, (v16i8)src3); - tmp0 = (v4i32)__msa_ilvr_h((v8i16)zero, (v8i16)vec0); - tmp1 = (v4i32)__msa_ilvl_h((v8i16)zero, (v8i16)vec0); - tmp2 = (v4i32)__msa_ilvr_h((v8i16)zero, (v8i16)vec1); - tmp3 = (v4i32)__msa_ilvl_h((v8i16)zero, (v8i16)vec1); - tmp4 = (v4i32)__msa_ilvr_h((v8i16)zero, (v8i16)vec2); - tmp5 = (v4i32)__msa_ilvl_h((v8i16)zero, (v8i16)vec2); - tmp6 = (v4i32)__msa_ilvr_h((v8i16)zero, (v8i16)vec3); - tmp7 = (v4i32)__msa_ilvl_h((v8i16)zero, (v8i16)vec3); - tmp8 = (v4i32)__msa_ilvr_h((v8i16)zero, (v8i16)vec4); - tmp9 = (v4i32)__msa_ilvl_h((v8i16)zero, (v8i16)vec4); - tmp10 = (v4i32)__msa_ilvr_h((v8i16)zero, (v8i16)vec5); - tmp11 = (v4i32)__msa_ilvl_h((v8i16)zero, (v8i16)vec5); - tmp12 = (v4i32)__msa_ilvr_h((v8i16)zero, (v8i16)vec6); - tmp13 = (v4i32)__msa_ilvl_h((v8i16)zero, (v8i16)vec6); - tmp14 = (v4i32)__msa_ilvr_h((v8i16)zero, (v8i16)vec7); - tmp15 = (v4i32)__msa_ilvl_h((v8i16)zero, (v8i16)vec7); - tmp0 *= vec_scale; - tmp1 *= vec_scale; - tmp2 *= vec_scale; - tmp3 *= vec_scale; - tmp4 *= vec_scale; - tmp5 *= vec_scale; - tmp6 *= vec_scale; - tmp7 *= vec_scale; - tmp8 *= vec_scale; - tmp9 *= vec_scale; - tmp10 *= vec_scale; - tmp11 *= vec_scale; - tmp12 *= vec_scale; - tmp13 *= vec_scale; - tmp14 *= vec_scale; - tmp15 *= vec_scale; - tmp0 >>= 16; - tmp1 >>= 16; - tmp2 >>= 16; - tmp3 >>= 16; - tmp4 >>= 16; - tmp5 >>= 16; - tmp6 >>= 16; - tmp7 >>= 16; - tmp8 >>= 16; - tmp9 >>= 16; - tmp10 >>= 16; - tmp11 >>= 16; - tmp12 >>= 16; - tmp13 >>= 16; - tmp14 >>= 16; - tmp15 >>= 16; - vec0 = (v8i16)__msa_pckev_h((v8i16)tmp1, (v8i16)tmp0); - vec1 = (v8i16)__msa_pckev_h((v8i16)tmp3, (v8i16)tmp2); - vec2 = (v8i16)__msa_pckev_h((v8i16)tmp5, (v8i16)tmp4); - vec3 = (v8i16)__msa_pckev_h((v8i16)tmp7, (v8i16)tmp6); - vec4 = (v8i16)__msa_pckev_h((v8i16)tmp9, (v8i16)tmp8); - vec5 = (v8i16)__msa_pckev_h((v8i16)tmp11, (v8i16)tmp10); - vec6 = (v8i16)__msa_pckev_h((v8i16)tmp13, (v8i16)tmp12); - vec7 = (v8i16)__msa_pckev_h((v8i16)tmp15, (v8i16)tmp14); - dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); - dst1 = (v16u8)__msa_pckev_b((v16i8)vec3, (v16i8)vec2); - dst2 = (v16u8)__msa_pckev_b((v16i8)vec5, (v16i8)vec4); - dst3 = (v16u8)__msa_pckev_b((v16i8)vec7, (v16i8)vec6); - dst0 *= vec_int_sz; - dst1 *= vec_int_sz; - dst2 *= vec_int_sz; - dst3 *= vec_int_sz; - dst0 += vec_int_ofst; - dst1 += vec_int_ofst; - dst2 += vec_int_ofst; - dst3 += vec_int_ofst; - dst0 = (v16u8)__msa_vshf_b(mask, (v16i8)src0, (v16i8)dst0); - dst1 = (v16u8)__msa_vshf_b(mask, (v16i8)src1, (v16i8)dst1); - dst2 = (v16u8)__msa_vshf_b(mask, (v16i8)src2, (v16i8)dst2); - dst3 = (v16u8)__msa_vshf_b(mask, (v16i8)src3, (v16i8)dst3); - ST_UB4(dst0, dst1, dst2, dst3, dst_argb, 16); - dst_argb += 64; - } -} - -void ARGBColorMatrixRow_MSA(const uint8_t* src_argb, - uint8_t* dst_argb, - const int8_t* matrix_argb, - int width) { - int32_t x; - v16i8 src0; - v16u8 src1, src2, dst0, dst1; - v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9; - v8i16 vec10, vec11, vec12, vec13, vec14, vec15, vec16, vec17; - v4i32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; - v4i32 tmp8, tmp9, tmp10, tmp11, tmp12, tmp13, tmp14, tmp15; - v16i8 zero = {0}; - v8i16 max = __msa_ldi_h(255); - - src0 = __msa_ld_b((void*)matrix_argb, 0); - vec0 = (v8i16)__msa_ilvr_b(zero, src0); - vec1 = (v8i16)__msa_ilvl_b(zero, src0); - - for (x = 0; x < width; x += 8) { - src1 = (v16u8)__msa_ld_b((void*)src_argb, 0); - src2 = (v16u8)__msa_ld_b((void*)src_argb, 16); - vec2 = (v8i16)__msa_ilvr_b(zero, (v16i8)src1); - vec3 = (v8i16)__msa_ilvl_b(zero, (v16i8)src1); - vec4 = (v8i16)__msa_ilvr_b(zero, (v16i8)src2); - vec5 = (v8i16)__msa_ilvl_b(zero, (v16i8)src2); - vec6 = (v8i16)__msa_pckod_d((v2i64)vec2, (v2i64)vec2); - vec7 = (v8i16)__msa_pckod_d((v2i64)vec3, (v2i64)vec3); - vec8 = (v8i16)__msa_pckod_d((v2i64)vec4, (v2i64)vec4); - vec9 = (v8i16)__msa_pckod_d((v2i64)vec5, (v2i64)vec5); - vec2 = (v8i16)__msa_pckev_d((v2i64)vec2, (v2i64)vec2); - vec3 = (v8i16)__msa_pckev_d((v2i64)vec3, (v2i64)vec3); - vec4 = (v8i16)__msa_pckev_d((v2i64)vec4, (v2i64)vec4); - vec5 = (v8i16)__msa_pckev_d((v2i64)vec5, (v2i64)vec5); - vec10 = vec2 * vec0; - vec11 = vec2 * vec1; - vec12 = vec6 * vec0; - vec13 = vec6 * vec1; - tmp0 = __msa_hadd_s_w(vec10, vec10); - tmp1 = __msa_hadd_s_w(vec11, vec11); - tmp2 = __msa_hadd_s_w(vec12, vec12); - tmp3 = __msa_hadd_s_w(vec13, vec13); - vec14 = vec3 * vec0; - vec15 = vec3 * vec1; - vec16 = vec7 * vec0; - vec17 = vec7 * vec1; - tmp4 = __msa_hadd_s_w(vec14, vec14); - tmp5 = __msa_hadd_s_w(vec15, vec15); - tmp6 = __msa_hadd_s_w(vec16, vec16); - tmp7 = __msa_hadd_s_w(vec17, vec17); - vec10 = __msa_pckev_h((v8i16)tmp1, (v8i16)tmp0); - vec11 = __msa_pckev_h((v8i16)tmp3, (v8i16)tmp2); - vec12 = __msa_pckev_h((v8i16)tmp5, (v8i16)tmp4); - vec13 = __msa_pckev_h((v8i16)tmp7, (v8i16)tmp6); - tmp0 = __msa_hadd_s_w(vec10, vec10); - tmp1 = __msa_hadd_s_w(vec11, vec11); - tmp2 = __msa_hadd_s_w(vec12, vec12); - tmp3 = __msa_hadd_s_w(vec13, vec13); - tmp0 = __msa_srai_w(tmp0, 6); - tmp1 = __msa_srai_w(tmp1, 6); - tmp2 = __msa_srai_w(tmp2, 6); - tmp3 = __msa_srai_w(tmp3, 6); - vec2 = vec4 * vec0; - vec6 = vec4 * vec1; - vec3 = vec8 * vec0; - vec7 = vec8 * vec1; - tmp8 = __msa_hadd_s_w(vec2, vec2); - tmp9 = __msa_hadd_s_w(vec6, vec6); - tmp10 = __msa_hadd_s_w(vec3, vec3); - tmp11 = __msa_hadd_s_w(vec7, vec7); - vec4 = vec5 * vec0; - vec8 = vec5 * vec1; - vec5 = vec9 * vec0; - vec9 = vec9 * vec1; - tmp12 = __msa_hadd_s_w(vec4, vec4); - tmp13 = __msa_hadd_s_w(vec8, vec8); - tmp14 = __msa_hadd_s_w(vec5, vec5); - tmp15 = __msa_hadd_s_w(vec9, vec9); - vec14 = __msa_pckev_h((v8i16)tmp9, (v8i16)tmp8); - vec15 = __msa_pckev_h((v8i16)tmp11, (v8i16)tmp10); - vec16 = __msa_pckev_h((v8i16)tmp13, (v8i16)tmp12); - vec17 = __msa_pckev_h((v8i16)tmp15, (v8i16)tmp14); - tmp4 = __msa_hadd_s_w(vec14, vec14); - tmp5 = __msa_hadd_s_w(vec15, vec15); - tmp6 = __msa_hadd_s_w(vec16, vec16); - tmp7 = __msa_hadd_s_w(vec17, vec17); - tmp4 = __msa_srai_w(tmp4, 6); - tmp5 = __msa_srai_w(tmp5, 6); - tmp6 = __msa_srai_w(tmp6, 6); - tmp7 = __msa_srai_w(tmp7, 6); - vec10 = __msa_pckev_h((v8i16)tmp1, (v8i16)tmp0); - vec11 = __msa_pckev_h((v8i16)tmp3, (v8i16)tmp2); - vec12 = __msa_pckev_h((v8i16)tmp5, (v8i16)tmp4); - vec13 = __msa_pckev_h((v8i16)tmp7, (v8i16)tmp6); - vec10 = __msa_maxi_s_h(vec10, 0); - vec11 = __msa_maxi_s_h(vec11, 0); - vec12 = __msa_maxi_s_h(vec12, 0); - vec13 = __msa_maxi_s_h(vec13, 0); - vec10 = __msa_min_s_h(vec10, max); - vec11 = __msa_min_s_h(vec11, max); - vec12 = __msa_min_s_h(vec12, max); - vec13 = __msa_min_s_h(vec13, max); - dst0 = (v16u8)__msa_pckev_b((v16i8)vec11, (v16i8)vec10); - dst1 = (v16u8)__msa_pckev_b((v16i8)vec13, (v16i8)vec12); - ST_UB2(dst0, dst1, dst_argb, 16); - src_argb += 32; - dst_argb += 32; - } -} - -void SplitUVRow_MSA(const uint8_t* src_uv, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - int x; - v16u8 src0, src1, src2, src3, dst0, dst1, dst2, dst3; - - for (x = 0; x < width; x += 32) { - src0 = (v16u8)__msa_ld_b((void*)src_uv, 0); - src1 = (v16u8)__msa_ld_b((void*)src_uv, 16); - src2 = (v16u8)__msa_ld_b((void*)src_uv, 32); - src3 = (v16u8)__msa_ld_b((void*)src_uv, 48); - dst0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); - dst1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2); - dst2 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); - dst3 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2); - ST_UB2(dst0, dst1, dst_u, 16); - ST_UB2(dst2, dst3, dst_v, 16); - src_uv += 64; - dst_u += 32; - dst_v += 32; - } -} - -void SetRow_MSA(uint8_t* dst, uint8_t v8, int width) { - int x; - v16u8 dst0 = (v16u8)__msa_fill_b(v8); - - for (x = 0; x < width; x += 16) { - ST_UB(dst0, dst); - dst += 16; - } -} - -void MirrorSplitUVRow_MSA(const uint8_t* src_uv, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - int x; - v16u8 src0, src1, src2, src3; - v16u8 dst0, dst1, dst2, dst3; - v16i8 mask0 = {30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0}; - v16i8 mask1 = {31, 29, 27, 25, 23, 21, 19, 17, 15, 13, 11, 9, 7, 5, 3, 1}; - - src_uv += (2 * width); - - for (x = 0; x < width; x += 32) { - src_uv -= 64; - src2 = (v16u8)__msa_ld_b((void*)src_uv, 0); - src3 = (v16u8)__msa_ld_b((void*)src_uv, 16); - src0 = (v16u8)__msa_ld_b((void*)src_uv, 32); - src1 = (v16u8)__msa_ld_b((void*)src_uv, 48); - dst0 = (v16u8)__msa_vshf_b(mask1, (v16i8)src1, (v16i8)src0); - dst1 = (v16u8)__msa_vshf_b(mask1, (v16i8)src3, (v16i8)src2); - dst2 = (v16u8)__msa_vshf_b(mask0, (v16i8)src1, (v16i8)src0); - dst3 = (v16u8)__msa_vshf_b(mask0, (v16i8)src3, (v16i8)src2); - ST_UB2(dst0, dst1, dst_v, 16); - ST_UB2(dst2, dst3, dst_u, 16); - dst_u += 32; - dst_v += 32; - } -} - -void SobelXRow_MSA(const uint8_t* src_y0, - const uint8_t* src_y1, - const uint8_t* src_y2, - uint8_t* dst_sobelx, - int32_t width) { - int x; - v16u8 src0, src1, src2, src3, src4, src5, dst0; - v8i16 vec0, vec1, vec2, vec3, vec4, vec5; - v16i8 mask0 = {0, 2, 1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 6, 8, 7, 9}; - v16i8 tmp = __msa_ldi_b(8); - v16i8 mask1 = mask0 + tmp; - v8i16 zero = {0}; - v8i16 max = __msa_ldi_h(255); - - for (x = 0; x < width; x += 16) { - src0 = (v16u8)__msa_ld_b((void*)src_y0, 0); - src1 = (v16u8)__msa_ld_b((void*)src_y0, 16); - src2 = (v16u8)__msa_ld_b((void*)src_y1, 0); - src3 = (v16u8)__msa_ld_b((void*)src_y1, 16); - src4 = (v16u8)__msa_ld_b((void*)src_y2, 0); - src5 = (v16u8)__msa_ld_b((void*)src_y2, 16); - vec0 = (v8i16)__msa_vshf_b(mask0, (v16i8)src1, (v16i8)src0); - vec1 = (v8i16)__msa_vshf_b(mask1, (v16i8)src1, (v16i8)src0); - vec2 = (v8i16)__msa_vshf_b(mask0, (v16i8)src3, (v16i8)src2); - vec3 = (v8i16)__msa_vshf_b(mask1, (v16i8)src3, (v16i8)src2); - vec4 = (v8i16)__msa_vshf_b(mask0, (v16i8)src5, (v16i8)src4); - vec5 = (v8i16)__msa_vshf_b(mask1, (v16i8)src5, (v16i8)src4); - vec0 = (v8i16)__msa_hsub_u_h((v16u8)vec0, (v16u8)vec0); - vec1 = (v8i16)__msa_hsub_u_h((v16u8)vec1, (v16u8)vec1); - vec2 = (v8i16)__msa_hsub_u_h((v16u8)vec2, (v16u8)vec2); - vec3 = (v8i16)__msa_hsub_u_h((v16u8)vec3, (v16u8)vec3); - vec4 = (v8i16)__msa_hsub_u_h((v16u8)vec4, (v16u8)vec4); - vec5 = (v8i16)__msa_hsub_u_h((v16u8)vec5, (v16u8)vec5); - vec0 += vec2; - vec1 += vec3; - vec4 += vec2; - vec5 += vec3; - vec0 += vec4; - vec1 += vec5; - vec0 = __msa_add_a_h(zero, vec0); - vec1 = __msa_add_a_h(zero, vec1); - vec0 = __msa_maxi_s_h(vec0, 0); - vec1 = __msa_maxi_s_h(vec1, 0); - vec0 = __msa_min_s_h(max, vec0); - vec1 = __msa_min_s_h(max, vec1); - dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); - ST_UB(dst0, dst_sobelx); - src_y0 += 16; - src_y1 += 16; - src_y2 += 16; - dst_sobelx += 16; - } -} - -void SobelYRow_MSA(const uint8_t* src_y0, - const uint8_t* src_y1, - uint8_t* dst_sobely, - int32_t width) { - int x; - v16u8 src0, src1, dst0; - v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6; - v8i16 zero = {0}; - v8i16 max = __msa_ldi_h(255); - - for (x = 0; x < width; x += 16) { - src0 = (v16u8)__msa_ld_b((void*)src_y0, 0); - src1 = (v16u8)__msa_ld_b((void*)src_y1, 0); - vec0 = (v8i16)__msa_ilvr_b((v16i8)zero, (v16i8)src0); - vec1 = (v8i16)__msa_ilvl_b((v16i8)zero, (v16i8)src0); - vec2 = (v8i16)__msa_ilvr_b((v16i8)zero, (v16i8)src1); - vec3 = (v8i16)__msa_ilvl_b((v16i8)zero, (v16i8)src1); - vec0 -= vec2; - vec1 -= vec3; - vec6[0] = src_y0[16] - src_y1[16]; - vec6[1] = src_y0[17] - src_y1[17]; - vec2 = (v8i16)__msa_sldi_b((v16i8)vec1, (v16i8)vec0, 2); - vec3 = (v8i16)__msa_sldi_b((v16i8)vec6, (v16i8)vec1, 2); - vec4 = (v8i16)__msa_sldi_b((v16i8)vec1, (v16i8)vec0, 4); - vec5 = (v8i16)__msa_sldi_b((v16i8)vec6, (v16i8)vec1, 4); - vec0 += vec2; - vec1 += vec3; - vec4 += vec2; - vec5 += vec3; - vec0 += vec4; - vec1 += vec5; - vec0 = __msa_add_a_h(zero, vec0); - vec1 = __msa_add_a_h(zero, vec1); - vec0 = __msa_maxi_s_h(vec0, 0); - vec1 = __msa_maxi_s_h(vec1, 0); - vec0 = __msa_min_s_h(max, vec0); - vec1 = __msa_min_s_h(max, vec1); - dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); - ST_UB(dst0, dst_sobely); - src_y0 += 16; - src_y1 += 16; - dst_sobely += 16; - } -} - -void HalfFloatRow_MSA(const uint16_t* src, - uint16_t* dst, - float scale, - int width) { - int i; - v8u16 src0, src1, src2, src3, dst0, dst1, dst2, dst3; - v4u32 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; - v4f32 fvec0, fvec1, fvec2, fvec3, fvec4, fvec5, fvec6, fvec7; - v4f32 mult_vec; - v8i16 zero = {0}; - mult_vec[0] = 1.9259299444e-34f * scale; - mult_vec = (v4f32)__msa_splati_w((v4i32)mult_vec, 0); - - for (i = 0; i < width; i += 32) { - src0 = (v8u16)__msa_ld_h((void*)src, 0); - src1 = (v8u16)__msa_ld_h((void*)src, 16); - src2 = (v8u16)__msa_ld_h((void*)src, 32); - src3 = (v8u16)__msa_ld_h((void*)src, 48); - vec0 = (v4u32)__msa_ilvr_h(zero, (v8i16)src0); - vec1 = (v4u32)__msa_ilvl_h(zero, (v8i16)src0); - vec2 = (v4u32)__msa_ilvr_h(zero, (v8i16)src1); - vec3 = (v4u32)__msa_ilvl_h(zero, (v8i16)src1); - vec4 = (v4u32)__msa_ilvr_h(zero, (v8i16)src2); - vec5 = (v4u32)__msa_ilvl_h(zero, (v8i16)src2); - vec6 = (v4u32)__msa_ilvr_h(zero, (v8i16)src3); - vec7 = (v4u32)__msa_ilvl_h(zero, (v8i16)src3); - fvec0 = __msa_ffint_u_w(vec0); - fvec1 = __msa_ffint_u_w(vec1); - fvec2 = __msa_ffint_u_w(vec2); - fvec3 = __msa_ffint_u_w(vec3); - fvec4 = __msa_ffint_u_w(vec4); - fvec5 = __msa_ffint_u_w(vec5); - fvec6 = __msa_ffint_u_w(vec6); - fvec7 = __msa_ffint_u_w(vec7); - fvec0 *= mult_vec; - fvec1 *= mult_vec; - fvec2 *= mult_vec; - fvec3 *= mult_vec; - fvec4 *= mult_vec; - fvec5 *= mult_vec; - fvec6 *= mult_vec; - fvec7 *= mult_vec; - vec0 = ((v4u32)fvec0) >> 13; - vec1 = ((v4u32)fvec1) >> 13; - vec2 = ((v4u32)fvec2) >> 13; - vec3 = ((v4u32)fvec3) >> 13; - vec4 = ((v4u32)fvec4) >> 13; - vec5 = ((v4u32)fvec5) >> 13; - vec6 = ((v4u32)fvec6) >> 13; - vec7 = ((v4u32)fvec7) >> 13; - dst0 = (v8u16)__msa_pckev_h((v8i16)vec1, (v8i16)vec0); - dst1 = (v8u16)__msa_pckev_h((v8i16)vec3, (v8i16)vec2); - dst2 = (v8u16)__msa_pckev_h((v8i16)vec5, (v8i16)vec4); - dst3 = (v8u16)__msa_pckev_h((v8i16)vec7, (v8i16)vec6); - ST_UH2(dst0, dst1, dst, 8); - ST_UH2(dst2, dst3, dst + 16, 8); - src += 32; - dst += 32; - } -} - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif - -#endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) diff --git a/source/scale.cc b/source/scale.cc index 76379fd6e..9c1e9b264 100644 --- a/source/scale.cc +++ b/source/scale.cc @@ -112,21 +112,6 @@ static void ScalePlaneDown2(int src_width, } } #endif -#if defined(HAS_SCALEROWDOWN2_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ScaleRowDown2 = - filtering == kFilterNone - ? ScaleRowDown2_Any_MSA - : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_MSA - : ScaleRowDown2Box_Any_MSA); - if (IS_ALIGNED(dst_width, 32)) { - ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_MSA - : (filtering == kFilterLinear - ? ScaleRowDown2Linear_MSA - : ScaleRowDown2Box_MSA); - } - } -#endif #if defined(HAS_SCALEROWDOWN2_LSX) if (TestCpuFlag(kCpuHasLSX)) { ScaleRowDown2 = @@ -317,15 +302,6 @@ static void ScalePlaneDown4(int src_width, } } #endif -#if defined(HAS_SCALEROWDOWN4_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ScaleRowDown4 = - filtering ? ScaleRowDown4Box_Any_MSA : ScaleRowDown4_Any_MSA; - if (IS_ALIGNED(dst_width, 16)) { - ScaleRowDown4 = filtering ? ScaleRowDown4Box_MSA : ScaleRowDown4_MSA; - } - } -#endif #if defined(HAS_SCALEROWDOWN4_LSX) if (TestCpuFlag(kCpuHasLSX)) { ScaleRowDown4 = @@ -445,27 +421,6 @@ static void ScalePlaneDown34(int src_width, } } #endif -#if defined(HAS_SCALEROWDOWN34_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - if (dst_width % 48 == 0) { - if (!filtering) { - ScaleRowDown34_0 = ScaleRowDown34_MSA; - ScaleRowDown34_1 = ScaleRowDown34_MSA; - } else { - ScaleRowDown34_0 = ScaleRowDown34_0_Box_MSA; - ScaleRowDown34_1 = ScaleRowDown34_1_Box_MSA; - } - } else { - if (!filtering) { - ScaleRowDown34_0 = ScaleRowDown34_Any_MSA; - ScaleRowDown34_1 = ScaleRowDown34_Any_MSA; - } else { - ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_MSA; - ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_MSA; - } - } - } -#endif #if defined(HAS_SCALEROWDOWN34_LSX) if (TestCpuFlag(kCpuHasLSX)) { if (dst_width % 48 == 0) { @@ -694,26 +649,6 @@ static void ScalePlaneDown38(int src_width, } } #endif -#if defined(HAS_SCALEROWDOWN38_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - if (!filtering) { - ScaleRowDown38_3 = ScaleRowDown38_Any_MSA; - ScaleRowDown38_2 = ScaleRowDown38_Any_MSA; - } else { - ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_MSA; - ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_MSA; - } - if (dst_width % 12 == 0) { - if (!filtering) { - ScaleRowDown38_3 = ScaleRowDown38_MSA; - ScaleRowDown38_2 = ScaleRowDown38_MSA; - } else { - ScaleRowDown38_3 = ScaleRowDown38_3_Box_MSA; - ScaleRowDown38_2 = ScaleRowDown38_2_Box_MSA; - } - } - } -#endif #if defined(HAS_SCALEROWDOWN38_LSX) if (TestCpuFlag(kCpuHasLSX)) { if (!filtering) { @@ -1015,14 +950,6 @@ static int ScalePlaneBox(int src_width, } } #endif -#if defined(HAS_SCALEADDROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ScaleAddRow = ScaleAddRow_Any_MSA; - if (IS_ALIGNED(src_width, 16)) { - ScaleAddRow = ScaleAddRow_MSA; - } - } -#endif #if defined(HAS_SCALEADDROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { ScaleAddRow = ScaleAddRow_Any_LSX; @@ -1178,14 +1105,6 @@ static int ScalePlaneBilinearDown(int src_width, InterpolateRow = InterpolateRow_SME; } #endif -#if defined(HAS_INTERPOLATEROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - InterpolateRow = InterpolateRow_Any_MSA; - if (IS_ALIGNED(src_width, 32)) { - InterpolateRow = InterpolateRow_MSA; - } - } -#endif #if defined(HAS_INTERPOLATEROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { InterpolateRow = InterpolateRow_Any_LSX; @@ -1213,14 +1132,6 @@ static int ScalePlaneBilinearDown(int src_width, } } #endif -#if defined(HAS_SCALEFILTERCOLS_MSA) - if (TestCpuFlag(kCpuHasMSA) && src_width < 32768) { - ScaleFilterCols = ScaleFilterCols_Any_MSA; - if (IS_ALIGNED(dst_width, 16)) { - ScaleFilterCols = ScaleFilterCols_MSA; - } - } -#endif #if defined(HAS_SCALEFILTERCOLS_LSX) if (TestCpuFlag(kCpuHasLSX) && src_width < 32768) { ScaleFilterCols = ScaleFilterCols_Any_LSX; @@ -1430,14 +1341,6 @@ static int ScalePlaneBilinearUp(int src_width, } } #endif -#if defined(HAS_SCALEFILTERCOLS_MSA) - if (filtering && TestCpuFlag(kCpuHasMSA) && src_width < 32768) { - ScaleFilterCols = ScaleFilterCols_Any_MSA; - if (IS_ALIGNED(dst_width, 16)) { - ScaleFilterCols = ScaleFilterCols_MSA; - } - } -#endif #if defined(HAS_SCALEFILTERCOLS_LSX) if (filtering && TestCpuFlag(kCpuHasLSX) && src_width < 32768) { ScaleFilterCols = ScaleFilterCols_Any_LSX; diff --git a/source/scale_any.cc b/source/scale_any.cc index 14334467c..c380bebbc 100644 --- a/source/scale_any.cc +++ b/source/scale_any.cc @@ -152,22 +152,6 @@ SDANY(ScaleUVRowDown2Box_Any_NEON, 2, 7) #endif - -#ifdef HAS_SCALEROWDOWN2_MSA -SDANY(ScaleRowDown2_Any_MSA, ScaleRowDown2_MSA, ScaleRowDown2_C, 2, 1, 31) -SDANY(ScaleRowDown2Linear_Any_MSA, - ScaleRowDown2Linear_MSA, - ScaleRowDown2Linear_C, - 2, - 1, - 31) -SDANY(ScaleRowDown2Box_Any_MSA, - ScaleRowDown2Box_MSA, - ScaleRowDown2Box_C, - 2, - 1, - 31) -#endif #ifdef HAS_SCALEROWDOWN2_LSX SDANY(ScaleRowDown2_Any_LSX, ScaleRowDown2_LSX, ScaleRowDown2_C, 2, 1, 31) SDANY(ScaleRowDown2Linear_Any_LSX, @@ -210,15 +194,6 @@ SDANY(ScaleRowDown4Box_Any_NEON, 1, 7) #endif -#ifdef HAS_SCALEROWDOWN4_MSA -SDANY(ScaleRowDown4_Any_MSA, ScaleRowDown4_MSA, ScaleRowDown4_C, 4, 1, 15) -SDANY(ScaleRowDown4Box_Any_MSA, - ScaleRowDown4Box_MSA, - ScaleRowDown4Box_C, - 4, - 1, - 15) -#endif #ifdef HAS_SCALEROWDOWN4_LSX SDANY(ScaleRowDown4_Any_LSX, ScaleRowDown4_LSX, ScaleRowDown4_C, 4, 1, 15) SDANY(ScaleRowDown4Box_Any_LSX, @@ -289,26 +264,6 @@ SDANY(ScaleRowDown34_1_Box_Any_NEON, 23) #endif #endif -#ifdef HAS_SCALEROWDOWN34_MSA -SDANY(ScaleRowDown34_Any_MSA, - ScaleRowDown34_MSA, - ScaleRowDown34_C, - 4 / 3, - 1, - 47) -SDANY(ScaleRowDown34_0_Box_Any_MSA, - ScaleRowDown34_0_Box_MSA, - ScaleRowDown34_0_Box_C, - 4 / 3, - 1, - 47) -SDANY(ScaleRowDown34_1_Box_Any_MSA, - ScaleRowDown34_1_Box_MSA, - ScaleRowDown34_1_Box_C, - 4 / 3, - 1, - 47) -#endif #ifdef HAS_SCALEROWDOWN34_LSX SDANY(ScaleRowDown34_Any_LSX, ScaleRowDown34_LSX, @@ -369,26 +324,6 @@ SDANY(ScaleRowDown38_2_Box_Any_NEON, 1, 11) #endif -#ifdef HAS_SCALEROWDOWN38_MSA -SDANY(ScaleRowDown38_Any_MSA, - ScaleRowDown38_MSA, - ScaleRowDown38_C, - 8 / 3, - 1, - 11) -SDANY(ScaleRowDown38_3_Box_Any_MSA, - ScaleRowDown38_3_Box_MSA, - ScaleRowDown38_3_Box_C, - 8 / 3, - 1, - 11) -SDANY(ScaleRowDown38_2_Box_Any_MSA, - ScaleRowDown38_2_Box_MSA, - ScaleRowDown38_2_Box_C, - 8 / 3, - 1, - 11) -#endif #ifdef HAS_SCALEROWDOWN38_LSX SDANY(ScaleRowDown38_Any_LSX, ScaleRowDown38_LSX, @@ -450,26 +385,6 @@ SDANY(ScaleARGBRowDown2Box_Any_NEON, 4, 7) #endif -#ifdef HAS_SCALEARGBROWDOWN2_MSA -SDANY(ScaleARGBRowDown2_Any_MSA, - ScaleARGBRowDown2_MSA, - ScaleARGBRowDown2_C, - 2, - 4, - 3) -SDANY(ScaleARGBRowDown2Linear_Any_MSA, - ScaleARGBRowDown2Linear_MSA, - ScaleARGBRowDown2Linear_C, - 2, - 4, - 3) -SDANY(ScaleARGBRowDown2Box_Any_MSA, - ScaleARGBRowDown2Box_MSA, - ScaleARGBRowDown2Box_C, - 2, - 4, - 3) -#endif #ifdef HAS_SCALEARGBROWDOWN2_LSX SDANY(ScaleARGBRowDown2_Any_LSX, ScaleARGBRowDown2_LSX, @@ -529,18 +444,6 @@ SDAANY(ScaleARGBRowDownEvenBox_Any_NEON, 4, 3) #endif -#ifdef HAS_SCALEARGBROWDOWNEVEN_MSA -SDAANY(ScaleARGBRowDownEven_Any_MSA, - ScaleARGBRowDownEven_MSA, - ScaleARGBRowDownEven_C, - 4, - 3) -SDAANY(ScaleARGBRowDownEvenBox_Any_MSA, - ScaleARGBRowDownEvenBox_MSA, - ScaleARGBRowDownEvenBox_C, - 4, - 3) -#endif #ifdef HAS_SCALEARGBROWDOWNEVEN_LSX SDAANY(ScaleARGBRowDownEven_Any_LSX, ScaleARGBRowDownEven_LSX, @@ -590,9 +493,6 @@ SAROW(ScaleAddRow_Any_AVX2, ScaleAddRow_AVX2, 1, 2, 31) #ifdef HAS_SCALEADDROW_NEON SAROW(ScaleAddRow_Any_NEON, ScaleAddRow_NEON, 1, 2, 15) #endif -#ifdef HAS_SCALEADDROW_MSA -SAROW(ScaleAddRow_Any_MSA, ScaleAddRow_MSA, 1, 2, 15) -#endif #ifdef HAS_SCALEADDROW_LSX SAROW(ScaleAddRow_Any_LSX, ScaleAddRow_LSX, 1, 2, 15) #endif @@ -619,9 +519,6 @@ SAANY(ScaleAddRow_Any_AVX2, ScaleAddRow_AVX2, ScaleAddRow_C, 31) #ifdef HAS_SCALEADDROW_NEON SAANY(ScaleAddRow_Any_NEON, ScaleAddRow_NEON, ScaleAddRow_C, 15) #endif -#ifdef HAS_SCALEADDROW_MSA -SAANY(ScaleAddRow_Any_MSA, ScaleAddRow_MSA, ScaleAddRow_C, 15) -#endif #ifdef HAS_SCALEADDROW_LSX SAANY(ScaleAddRow_Any_LSX, ScaleAddRow_LSX, ScaleAddRow_C, 15) #endif @@ -644,18 +541,12 @@ SAANY(ScaleAddRow_Any_LSX, ScaleAddRow_LSX, ScaleAddRow_C, 15) #ifdef HAS_SCALEFILTERCOLS_NEON CANY(ScaleFilterCols_Any_NEON, ScaleFilterCols_NEON, ScaleFilterCols_C, 1, 7) #endif -#ifdef HAS_SCALEFILTERCOLS_MSA -CANY(ScaleFilterCols_Any_MSA, ScaleFilterCols_MSA, ScaleFilterCols_C, 1, 15) -#endif #ifdef HAS_SCALEFILTERCOLS_LSX CANY(ScaleFilterCols_Any_LSX, ScaleFilterCols_LSX, ScaleFilterCols_C, 1, 15) #endif #ifdef HAS_SCALEARGBCOLS_NEON CANY(ScaleARGBCols_Any_NEON, ScaleARGBCols_NEON, ScaleARGBCols_C, 4, 7) #endif -#ifdef HAS_SCALEARGBCOLS_MSA -CANY(ScaleARGBCols_Any_MSA, ScaleARGBCols_MSA, ScaleARGBCols_C, 4, 3) -#endif #ifdef HAS_SCALEARGBCOLS_LSX CANY(ScaleARGBCols_Any_LSX, ScaleARGBCols_LSX, ScaleARGBCols_C, 4, 3) #endif @@ -666,13 +557,6 @@ CANY(ScaleARGBFilterCols_Any_NEON, 4, 3) #endif -#ifdef HAS_SCALEARGBFILTERCOLS_MSA -CANY(ScaleARGBFilterCols_Any_MSA, - ScaleARGBFilterCols_MSA, - ScaleARGBFilterCols_C, - 4, - 7) -#endif #ifdef HAS_SCALEARGBFILTERCOLS_LSX CANY(ScaleARGBFilterCols_Any_LSX, ScaleARGBFilterCols_LSX, diff --git a/source/scale_argb.cc b/source/scale_argb.cc index 5fa58fd0f..506409c15 100644 --- a/source/scale_argb.cc +++ b/source/scale_argb.cc @@ -107,22 +107,6 @@ static void ScaleARGBDown2(int src_width, : ScaleARGBRowDown2Box_SME; } #endif -#if defined(HAS_SCALEARGBROWDOWN2_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ScaleARGBRowDown2 = - filtering == kFilterNone - ? ScaleARGBRowDown2_Any_MSA - : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_MSA - : ScaleARGBRowDown2Box_Any_MSA); - if (IS_ALIGNED(dst_width, 4)) { - ScaleARGBRowDown2 = - filtering == kFilterNone - ? ScaleARGBRowDown2_MSA - : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_MSA - : ScaleARGBRowDown2Box_MSA); - } - } -#endif #if defined(HAS_SCALEARGBROWDOWN2_LSX) if (TestCpuFlag(kCpuHasLSX)) { ScaleARGBRowDown2 = @@ -282,16 +266,6 @@ static void ScaleARGBDownEven(int src_width, } } #endif -#if defined(HAS_SCALEARGBROWDOWNEVEN_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_MSA - : ScaleARGBRowDownEven_Any_MSA; - if (IS_ALIGNED(dst_width, 4)) { - ScaleARGBRowDownEven = - filtering ? ScaleARGBRowDownEvenBox_MSA : ScaleARGBRowDownEven_MSA; - } - } -#endif #if defined(HAS_SCALEARGBROWDOWNEVEN_LSX) if (TestCpuFlag(kCpuHasLSX)) { ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_LSX @@ -386,14 +360,6 @@ static int ScaleARGBBilinearDown(int src_width, InterpolateRow = InterpolateRow_SME; } #endif -#if defined(HAS_INTERPOLATEROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - InterpolateRow = InterpolateRow_Any_MSA; - if (IS_ALIGNED(clip_src_width, 32)) { - InterpolateRow = InterpolateRow_MSA; - } - } -#endif #if defined(HAS_INTERPOLATEROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { InterpolateRow = InterpolateRow_Any_LSX; @@ -420,14 +386,6 @@ static int ScaleARGBBilinearDown(int src_width, } } #endif -#if defined(HAS_SCALEARGBFILTERCOLS_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ScaleARGBFilterCols = ScaleARGBFilterCols_Any_MSA; - if (IS_ALIGNED(dst_width, 8)) { - ScaleARGBFilterCols = ScaleARGBFilterCols_MSA; - } - } -#endif #if defined(HAS_SCALEARGBFILTERCOLS_LSX) if (TestCpuFlag(kCpuHasLSX)) { ScaleARGBFilterCols = ScaleARGBFilterCols_Any_LSX; @@ -525,14 +483,6 @@ static int ScaleARGBBilinearUp(int src_width, InterpolateRow = InterpolateRow_SME; } #endif -#if defined(HAS_INTERPOLATEROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - InterpolateRow = InterpolateRow_Any_MSA; - if (IS_ALIGNED(dst_width, 8)) { - InterpolateRow = InterpolateRow_MSA; - } - } -#endif #if defined(HAS_INTERPOLATEROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { InterpolateRow = InterpolateRow_Any_LSX; @@ -563,14 +513,6 @@ static int ScaleARGBBilinearUp(int src_width, } } #endif -#if defined(HAS_SCALEARGBFILTERCOLS_MSA) - if (filtering && TestCpuFlag(kCpuHasMSA)) { - ScaleARGBFilterCols = ScaleARGBFilterCols_Any_MSA; - if (IS_ALIGNED(dst_width, 8)) { - ScaleARGBFilterCols = ScaleARGBFilterCols_MSA; - } - } -#endif #if defined(HAS_SCALEARGBFILTERCOLS_LSX) if (filtering && TestCpuFlag(kCpuHasLSX)) { ScaleARGBFilterCols = ScaleARGBFilterCols_Any_LSX; @@ -597,14 +539,6 @@ static int ScaleARGBBilinearUp(int src_width, } } #endif -#if defined(HAS_SCALEARGBCOLS_MSA) - if (!filtering && TestCpuFlag(kCpuHasMSA)) { - ScaleARGBFilterCols = ScaleARGBCols_Any_MSA; - if (IS_ALIGNED(dst_width, 4)) { - ScaleARGBFilterCols = ScaleARGBCols_MSA; - } - } -#endif #if defined(HAS_SCALEARGBCOLS_LSX) if (!filtering && TestCpuFlag(kCpuHasLSX)) { ScaleARGBFilterCols = ScaleARGBCols_Any_LSX; @@ -747,14 +681,6 @@ static int ScaleYUVToARGBBilinearUp(int src_width, I422ToARGBRow = I422ToARGBRow_SME; } #endif -#if defined(HAS_I422TOARGBROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - I422ToARGBRow = I422ToARGBRow_Any_MSA; - if (IS_ALIGNED(src_width, 8)) { - I422ToARGBRow = I422ToARGBRow_MSA; - } - } -#endif #if defined(HAS_I422TOARGBROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { I422ToARGBRow = I422ToARGBRow_Any_LSX; @@ -809,14 +735,6 @@ static int ScaleYUVToARGBBilinearUp(int src_width, InterpolateRow = InterpolateRow_SME; } #endif -#if defined(HAS_INTERPOLATEROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - InterpolateRow = InterpolateRow_Any_MSA; - if (IS_ALIGNED(dst_width, 8)) { - InterpolateRow = InterpolateRow_MSA; - } - } -#endif #if defined(HAS_INTERPOLATEROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { InterpolateRow = InterpolateRow_Any_LSX; @@ -851,14 +769,6 @@ static int ScaleYUVToARGBBilinearUp(int src_width, } } #endif -#if defined(HAS_SCALEARGBFILTERCOLS_MSA) - if (filtering && TestCpuFlag(kCpuHasMSA)) { - ScaleARGBFilterCols = ScaleARGBFilterCols_Any_MSA; - if (IS_ALIGNED(dst_width, 8)) { - ScaleARGBFilterCols = ScaleARGBFilterCols_MSA; - } - } -#endif #if defined(HAS_SCALEARGBFILTERCOLS_LSX) if (filtering && TestCpuFlag(kCpuHasLSX)) { ScaleARGBFilterCols = ScaleARGBFilterCols_Any_LSX; @@ -885,14 +795,6 @@ static int ScaleYUVToARGBBilinearUp(int src_width, } } #endif -#if defined(HAS_SCALEARGBCOLS_MSA) - if (!filtering && TestCpuFlag(kCpuHasMSA)) { - ScaleARGBFilterCols = ScaleARGBCols_Any_MSA; - if (IS_ALIGNED(dst_width, 4)) { - ScaleARGBFilterCols = ScaleARGBCols_MSA; - } - } -#endif #if defined(HAS_SCALEARGBCOLS_LSX) if (!filtering && TestCpuFlag(kCpuHasLSX)) { ScaleARGBFilterCols = ScaleARGBCols_Any_LSX; @@ -1025,14 +927,6 @@ static void ScaleARGBSimple(int src_width, } } #endif -#if defined(HAS_SCALEARGBCOLS_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ScaleARGBCols = ScaleARGBCols_Any_MSA; - if (IS_ALIGNED(dst_width, 4)) { - ScaleARGBCols = ScaleARGBCols_MSA; - } - } -#endif #if defined(HAS_SCALEARGBCOLS_LSX) if (TestCpuFlag(kCpuHasLSX)) { ScaleARGBCols = ScaleARGBCols_Any_LSX; diff --git a/source/scale_common.cc b/source/scale_common.cc index 430afc232..e51af8d7a 100644 --- a/source/scale_common.cc +++ b/source/scale_common.cc @@ -1667,14 +1667,6 @@ void ScalePlaneVertical(int src_height, InterpolateRow = InterpolateRow_SME; } #endif -#if defined(HAS_INTERPOLATEROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - InterpolateRow = InterpolateRow_Any_MSA; - if (IS_ALIGNED(dst_width_bytes, 32)) { - InterpolateRow = InterpolateRow_MSA; - } - } -#endif #if defined(HAS_INTERPOLATEROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { InterpolateRow = InterpolateRow_Any_LSX; diff --git a/source/scale_msa.cc b/source/scale_msa.cc deleted file mode 100644 index 482a521f0..000000000 --- a/source/scale_msa.cc +++ /dev/null @@ -1,949 +0,0 @@ -/* - * Copyright 2016 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -#include "libyuv/scale_row.h" - -// This module is for GCC MSA -#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) -#include "libyuv/macros_msa.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -#define LOAD_INDEXED_DATA(srcp, indx0, out0) \ - { \ - out0[0] = srcp[indx0[0]]; \ - out0[1] = srcp[indx0[1]]; \ - out0[2] = srcp[indx0[2]]; \ - out0[3] = srcp[indx0[3]]; \ - } - -void ScaleARGBRowDown2_MSA(const uint8_t* src_argb, - ptrdiff_t src_stride, - uint8_t* dst_argb, - int dst_width) { - int x; - v16u8 src0, src1, dst0; - (void)src_stride; - - for (x = 0; x < dst_width; x += 4) { - src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0); - src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16); - dst0 = (v16u8)__msa_pckod_w((v4i32)src1, (v4i32)src0); - ST_UB(dst0, dst_argb); - src_argb += 32; - dst_argb += 16; - } -} - -void ScaleARGBRowDown2Linear_MSA(const uint8_t* src_argb, - ptrdiff_t src_stride, - uint8_t* dst_argb, - int dst_width) { - int x; - v16u8 src0, src1, vec0, vec1, dst0; - (void)src_stride; - - for (x = 0; x < dst_width; x += 4) { - src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0); - src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16); - vec0 = (v16u8)__msa_pckev_w((v4i32)src1, (v4i32)src0); - vec1 = (v16u8)__msa_pckod_w((v4i32)src1, (v4i32)src0); - dst0 = (v16u8)__msa_aver_u_b((v16u8)vec0, (v16u8)vec1); - ST_UB(dst0, dst_argb); - src_argb += 32; - dst_argb += 16; - } -} - -void ScaleARGBRowDown2Box_MSA(const uint8_t* src_argb, - ptrdiff_t src_stride, - uint8_t* dst_argb, - int dst_width) { - int x; - const uint8_t* s = src_argb; - const uint8_t* t = src_argb + src_stride; - v16u8 src0, src1, src2, src3, vec0, vec1, vec2, vec3, dst0; - v8u16 reg0, reg1, reg2, reg3; - v16i8 shuffler = {0, 4, 1, 5, 2, 6, 3, 7, 8, 12, 9, 13, 10, 14, 11, 15}; - - for (x = 0; x < dst_width; x += 4) { - src0 = (v16u8)__msa_ld_b((v16i8*)s, 0); - src1 = (v16u8)__msa_ld_b((v16i8*)s, 16); - src2 = (v16u8)__msa_ld_b((v16i8*)t, 0); - src3 = (v16u8)__msa_ld_b((v16i8*)t, 16); - vec0 = (v16u8)__msa_vshf_b(shuffler, (v16i8)src0, (v16i8)src0); - vec1 = (v16u8)__msa_vshf_b(shuffler, (v16i8)src1, (v16i8)src1); - vec2 = (v16u8)__msa_vshf_b(shuffler, (v16i8)src2, (v16i8)src2); - vec3 = (v16u8)__msa_vshf_b(shuffler, (v16i8)src3, (v16i8)src3); - reg0 = __msa_hadd_u_h(vec0, vec0); - reg1 = __msa_hadd_u_h(vec1, vec1); - reg2 = __msa_hadd_u_h(vec2, vec2); - reg3 = __msa_hadd_u_h(vec3, vec3); - reg0 += reg2; - reg1 += reg3; - reg0 = (v8u16)__msa_srari_h((v8i16)reg0, 2); - reg1 = (v8u16)__msa_srari_h((v8i16)reg1, 2); - dst0 = (v16u8)__msa_pckev_b((v16i8)reg1, (v16i8)reg0); - ST_UB(dst0, dst_argb); - s += 32; - t += 32; - dst_argb += 16; - } -} - -void ScaleARGBRowDownEven_MSA(const uint8_t* src_argb, - ptrdiff_t src_stride, - int32_t src_stepx, - uint8_t* dst_argb, - int dst_width) { - int x; - int32_t stepx = src_stepx * 4; - int32_t data0, data1, data2, data3; - (void)src_stride; - - for (x = 0; x < dst_width; x += 4) { - data0 = LW(src_argb); - data1 = LW(src_argb + stepx); - data2 = LW(src_argb + stepx * 2); - data3 = LW(src_argb + stepx * 3); - SW(data0, dst_argb); - SW(data1, dst_argb + 4); - SW(data2, dst_argb + 8); - SW(data3, dst_argb + 12); - src_argb += stepx * 4; - dst_argb += 16; - } -} - -void ScaleARGBRowDownEvenBox_MSA(const uint8_t* src_argb, - ptrdiff_t src_stride, - int src_stepx, - uint8_t* dst_argb, - int dst_width) { - int x; - const uint8_t* nxt_argb = src_argb + src_stride; - int32_t stepx = src_stepx * 4; - int64_t data0, data1, data2, data3; - v16u8 src0 = {0}, src1 = {0}, src2 = {0}, src3 = {0}; - v16u8 vec0, vec1, vec2, vec3; - v8u16 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7; - v16u8 dst0; - - for (x = 0; x < dst_width; x += 4) { - data0 = LD(src_argb); - data1 = LD(src_argb + stepx); - data2 = LD(src_argb + stepx * 2); - data3 = LD(src_argb + stepx * 3); - src0 = (v16u8)__msa_insert_d((v2i64)src0, 0, data0); - src0 = (v16u8)__msa_insert_d((v2i64)src0, 1, data1); - src1 = (v16u8)__msa_insert_d((v2i64)src1, 0, data2); - src1 = (v16u8)__msa_insert_d((v2i64)src1, 1, data3); - data0 = LD(nxt_argb); - data1 = LD(nxt_argb + stepx); - data2 = LD(nxt_argb + stepx * 2); - data3 = LD(nxt_argb + stepx * 3); - src2 = (v16u8)__msa_insert_d((v2i64)src2, 0, data0); - src2 = (v16u8)__msa_insert_d((v2i64)src2, 1, data1); - src3 = (v16u8)__msa_insert_d((v2i64)src3, 0, data2); - src3 = (v16u8)__msa_insert_d((v2i64)src3, 1, data3); - vec0 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src0); - vec1 = (v16u8)__msa_ilvr_b((v16i8)src3, (v16i8)src1); - vec2 = (v16u8)__msa_ilvl_b((v16i8)src2, (v16i8)src0); - vec3 = (v16u8)__msa_ilvl_b((v16i8)src3, (v16i8)src1); - reg0 = __msa_hadd_u_h(vec0, vec0); - reg1 = __msa_hadd_u_h(vec1, vec1); - reg2 = __msa_hadd_u_h(vec2, vec2); - reg3 = __msa_hadd_u_h(vec3, vec3); - reg4 = (v8u16)__msa_pckev_d((v2i64)reg2, (v2i64)reg0); - reg5 = (v8u16)__msa_pckev_d((v2i64)reg3, (v2i64)reg1); - reg6 = (v8u16)__msa_pckod_d((v2i64)reg2, (v2i64)reg0); - reg7 = (v8u16)__msa_pckod_d((v2i64)reg3, (v2i64)reg1); - reg4 += reg6; - reg5 += reg7; - reg4 = (v8u16)__msa_srari_h((v8i16)reg4, 2); - reg5 = (v8u16)__msa_srari_h((v8i16)reg5, 2); - dst0 = (v16u8)__msa_pckev_b((v16i8)reg5, (v16i8)reg4); - ST_UB(dst0, dst_argb); - src_argb += stepx * 4; - nxt_argb += stepx * 4; - dst_argb += 16; - } -} - -void ScaleRowDown2_MSA(const uint8_t* src_ptr, - ptrdiff_t src_stride, - uint8_t* dst, - int dst_width) { - int x; - v16u8 src0, src1, src2, src3, dst0, dst1; - (void)src_stride; - - for (x = 0; x < dst_width; x += 32) { - src0 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 0); - src1 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 16); - src2 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 32); - src3 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 48); - dst0 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); - dst1 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2); - ST_UB2(dst0, dst1, dst, 16); - src_ptr += 64; - dst += 32; - } -} - -void ScaleRowDown2Linear_MSA(const uint8_t* src_ptr, - ptrdiff_t src_stride, - uint8_t* dst, - int dst_width) { - int x; - v16u8 src0, src1, src2, src3, vec0, vec1, vec2, vec3, dst0, dst1; - (void)src_stride; - - for (x = 0; x < dst_width; x += 32) { - src0 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 0); - src1 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 16); - src2 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 32); - src3 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 48); - vec0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); - vec2 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2); - vec1 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); - vec3 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2); - dst0 = __msa_aver_u_b(vec1, vec0); - dst1 = __msa_aver_u_b(vec3, vec2); - ST_UB2(dst0, dst1, dst, 16); - src_ptr += 64; - dst += 32; - } -} - -void ScaleRowDown2Box_MSA(const uint8_t* src_ptr, - ptrdiff_t src_stride, - uint8_t* dst, - int dst_width) { - int x; - const uint8_t* s = src_ptr; - const uint8_t* t = src_ptr + src_stride; - v16u8 src0, src1, src2, src3, src4, src5, src6, src7, dst0, dst1; - v8u16 vec0, vec1, vec2, vec3; - - for (x = 0; x < dst_width; x += 32) { - src0 = (v16u8)__msa_ld_b((v16i8*)s, 0); - src1 = (v16u8)__msa_ld_b((v16i8*)s, 16); - src2 = (v16u8)__msa_ld_b((v16i8*)s, 32); - src3 = (v16u8)__msa_ld_b((v16i8*)s, 48); - src4 = (v16u8)__msa_ld_b((v16i8*)t, 0); - src5 = (v16u8)__msa_ld_b((v16i8*)t, 16); - src6 = (v16u8)__msa_ld_b((v16i8*)t, 32); - src7 = (v16u8)__msa_ld_b((v16i8*)t, 48); - vec0 = __msa_hadd_u_h(src0, src0); - vec1 = __msa_hadd_u_h(src1, src1); - vec2 = __msa_hadd_u_h(src2, src2); - vec3 = __msa_hadd_u_h(src3, src3); - vec0 += __msa_hadd_u_h(src4, src4); - vec1 += __msa_hadd_u_h(src5, src5); - vec2 += __msa_hadd_u_h(src6, src6); - vec3 += __msa_hadd_u_h(src7, src7); - vec0 = (v8u16)__msa_srari_h((v8i16)vec0, 2); - vec1 = (v8u16)__msa_srari_h((v8i16)vec1, 2); - vec2 = (v8u16)__msa_srari_h((v8i16)vec2, 2); - vec3 = (v8u16)__msa_srari_h((v8i16)vec3, 2); - dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); - dst1 = (v16u8)__msa_pckev_b((v16i8)vec3, (v16i8)vec2); - ST_UB2(dst0, dst1, dst, 16); - s += 64; - t += 64; - dst += 32; - } -} - -void ScaleRowDown4_MSA(const uint8_t* src_ptr, - ptrdiff_t src_stride, - uint8_t* dst, - int dst_width) { - int x; - v16u8 src0, src1, src2, src3, vec0, vec1, dst0; - (void)src_stride; - - for (x = 0; x < dst_width; x += 16) { - src0 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 0); - src1 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 16); - src2 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 32); - src3 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 48); - vec0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); - vec1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2); - dst0 = (v16u8)__msa_pckod_b((v16i8)vec1, (v16i8)vec0); - ST_UB(dst0, dst); - src_ptr += 64; - dst += 16; - } -} - -void ScaleRowDown4Box_MSA(const uint8_t* src_ptr, - ptrdiff_t src_stride, - uint8_t* dst, - int dst_width) { - int x; - const uint8_t* s = src_ptr; - const uint8_t* t0 = s + src_stride; - const uint8_t* t1 = s + src_stride * 2; - const uint8_t* t2 = s + src_stride * 3; - v16u8 src0, src1, src2, src3, src4, src5, src6, src7, dst0; - v8u16 vec0, vec1, vec2, vec3; - v4u32 reg0, reg1, reg2, reg3; - - for (x = 0; x < dst_width; x += 16) { - src0 = (v16u8)__msa_ld_b((v16i8*)s, 0); - src1 = (v16u8)__msa_ld_b((v16i8*)s, 16); - src2 = (v16u8)__msa_ld_b((v16i8*)s, 32); - src3 = (v16u8)__msa_ld_b((v16i8*)s, 48); - src4 = (v16u8)__msa_ld_b((v16i8*)t0, 0); - src5 = (v16u8)__msa_ld_b((v16i8*)t0, 16); - src6 = (v16u8)__msa_ld_b((v16i8*)t0, 32); - src7 = (v16u8)__msa_ld_b((v16i8*)t0, 48); - vec0 = __msa_hadd_u_h(src0, src0); - vec1 = __msa_hadd_u_h(src1, src1); - vec2 = __msa_hadd_u_h(src2, src2); - vec3 = __msa_hadd_u_h(src3, src3); - vec0 += __msa_hadd_u_h(src4, src4); - vec1 += __msa_hadd_u_h(src5, src5); - vec2 += __msa_hadd_u_h(src6, src6); - vec3 += __msa_hadd_u_h(src7, src7); - src0 = (v16u8)__msa_ld_b((v16i8*)t1, 0); - src1 = (v16u8)__msa_ld_b((v16i8*)t1, 16); - src2 = (v16u8)__msa_ld_b((v16i8*)t1, 32); - src3 = (v16u8)__msa_ld_b((v16i8*)t1, 48); - src4 = (v16u8)__msa_ld_b((v16i8*)t2, 0); - src5 = (v16u8)__msa_ld_b((v16i8*)t2, 16); - src6 = (v16u8)__msa_ld_b((v16i8*)t2, 32); - src7 = (v16u8)__msa_ld_b((v16i8*)t2, 48); - vec0 += __msa_hadd_u_h(src0, src0); - vec1 += __msa_hadd_u_h(src1, src1); - vec2 += __msa_hadd_u_h(src2, src2); - vec3 += __msa_hadd_u_h(src3, src3); - vec0 += __msa_hadd_u_h(src4, src4); - vec1 += __msa_hadd_u_h(src5, src5); - vec2 += __msa_hadd_u_h(src6, src6); - vec3 += __msa_hadd_u_h(src7, src7); - reg0 = __msa_hadd_u_w(vec0, vec0); - reg1 = __msa_hadd_u_w(vec1, vec1); - reg2 = __msa_hadd_u_w(vec2, vec2); - reg3 = __msa_hadd_u_w(vec3, vec3); - reg0 = (v4u32)__msa_srari_w((v4i32)reg0, 4); - reg1 = (v4u32)__msa_srari_w((v4i32)reg1, 4); - reg2 = (v4u32)__msa_srari_w((v4i32)reg2, 4); - reg3 = (v4u32)__msa_srari_w((v4i32)reg3, 4); - vec0 = (v8u16)__msa_pckev_h((v8i16)reg1, (v8i16)reg0); - vec1 = (v8u16)__msa_pckev_h((v8i16)reg3, (v8i16)reg2); - dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); - ST_UB(dst0, dst); - s += 64; - t0 += 64; - t1 += 64; - t2 += 64; - dst += 16; - } -} - -void ScaleRowDown38_MSA(const uint8_t* src_ptr, - ptrdiff_t src_stride, - uint8_t* dst, - int dst_width) { - int x, width; - uint64_t dst0; - uint32_t dst1; - v16u8 src0, src1, vec0; - v16i8 mask = {0, 3, 6, 8, 11, 14, 16, 19, 22, 24, 27, 30, 0, 0, 0, 0}; - (void)src_stride; - - assert(dst_width % 3 == 0); - width = dst_width / 3; - - for (x = 0; x < width; x += 4) { - src0 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 0); - src1 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 16); - vec0 = (v16u8)__msa_vshf_b(mask, (v16i8)src1, (v16i8)src0); - dst0 = __msa_copy_u_d((v2i64)vec0, 0); - dst1 = __msa_copy_u_w((v4i32)vec0, 2); - SD(dst0, dst); - SW(dst1, dst + 8); - src_ptr += 32; - dst += 12; - } -} - -void ScaleRowDown38_2_Box_MSA(const uint8_t* src_ptr, - ptrdiff_t src_stride, - uint8_t* dst_ptr, - int dst_width) { - int x, width; - const uint8_t* s = src_ptr; - const uint8_t* t = src_ptr + src_stride; - uint64_t dst0; - uint32_t dst1; - v16u8 src0, src1, src2, src3, out; - v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; - v4u32 tmp0, tmp1, tmp2, tmp3, tmp4; - v8i16 zero = {0}; - v8i16 mask = {0, 1, 2, 8, 3, 4, 5, 9}; - v16i8 dst_mask = {0, 2, 16, 4, 6, 18, 8, 10, 20, 12, 14, 22, 0, 0, 0, 0}; - v4u32 const_0x2AAA = (v4u32)__msa_fill_w(0x2AAA); - v4u32 const_0x4000 = (v4u32)__msa_fill_w(0x4000); - - assert((dst_width % 3 == 0) && (dst_width > 0)); - width = dst_width / 3; - - for (x = 0; x < width; x += 4) { - src0 = (v16u8)__msa_ld_b((v16i8*)s, 0); - src1 = (v16u8)__msa_ld_b((v16i8*)s, 16); - src2 = (v16u8)__msa_ld_b((v16i8*)t, 0); - src3 = (v16u8)__msa_ld_b((v16i8*)t, 16); - vec0 = (v8u16)__msa_ilvr_b((v16i8)src2, (v16i8)src0); - vec1 = (v8u16)__msa_ilvl_b((v16i8)src2, (v16i8)src0); - vec2 = (v8u16)__msa_ilvr_b((v16i8)src3, (v16i8)src1); - vec3 = (v8u16)__msa_ilvl_b((v16i8)src3, (v16i8)src1); - vec0 = __msa_hadd_u_h((v16u8)vec0, (v16u8)vec0); - vec1 = __msa_hadd_u_h((v16u8)vec1, (v16u8)vec1); - vec2 = __msa_hadd_u_h((v16u8)vec2, (v16u8)vec2); - vec3 = __msa_hadd_u_h((v16u8)vec3, (v16u8)vec3); - vec4 = (v8u16)__msa_vshf_h(mask, zero, (v8i16)vec0); - vec5 = (v8u16)__msa_vshf_h(mask, zero, (v8i16)vec1); - vec6 = (v8u16)__msa_vshf_h(mask, zero, (v8i16)vec2); - vec7 = (v8u16)__msa_vshf_h(mask, zero, (v8i16)vec3); - vec0 = (v8u16)__msa_pckod_w((v4i32)vec1, (v4i32)vec0); - vec1 = (v8u16)__msa_pckod_w((v4i32)vec3, (v4i32)vec2); - vec0 = (v8u16)__msa_pckod_w((v4i32)vec1, (v4i32)vec0); - tmp0 = __msa_hadd_u_w(vec4, vec4); - tmp1 = __msa_hadd_u_w(vec5, vec5); - tmp2 = __msa_hadd_u_w(vec6, vec6); - tmp3 = __msa_hadd_u_w(vec7, vec7); - tmp4 = __msa_hadd_u_w(vec0, vec0); - vec0 = (v8u16)__msa_pckev_h((v8i16)tmp1, (v8i16)tmp0); - vec1 = (v8u16)__msa_pckev_h((v8i16)tmp3, (v8i16)tmp2); - tmp0 = __msa_hadd_u_w(vec0, vec0); - tmp1 = __msa_hadd_u_w(vec1, vec1); - tmp0 *= const_0x2AAA; - tmp1 *= const_0x2AAA; - tmp4 *= const_0x4000; - tmp0 = (v4u32)__msa_srai_w((v4i32)tmp0, 16); - tmp1 = (v4u32)__msa_srai_w((v4i32)tmp1, 16); - tmp4 = (v4u32)__msa_srai_w((v4i32)tmp4, 16); - vec0 = (v8u16)__msa_pckev_h((v8i16)tmp1, (v8i16)tmp0); - vec1 = (v8u16)__msa_pckev_h((v8i16)tmp4, (v8i16)tmp4); - out = (v16u8)__msa_vshf_b(dst_mask, (v16i8)vec1, (v16i8)vec0); - dst0 = __msa_copy_u_d((v2i64)out, 0); - dst1 = __msa_copy_u_w((v4i32)out, 2); - SD(dst0, dst_ptr); - SW(dst1, dst_ptr + 8); - s += 32; - t += 32; - dst_ptr += 12; - } -} - -void ScaleRowDown38_3_Box_MSA(const uint8_t* src_ptr, - ptrdiff_t src_stride, - uint8_t* dst_ptr, - int dst_width) { - int x, width; - const uint8_t* s = src_ptr; - const uint8_t* t0 = s + src_stride; - const uint8_t* t1 = s + src_stride * 2; - uint64_t dst0; - uint32_t dst1; - v16u8 src0, src1, src2, src3, src4, src5, out; - v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; - v4u32 tmp0, tmp1, tmp2, tmp3, tmp4; - v8u16 zero = {0}; - v8i16 mask = {0, 1, 2, 8, 3, 4, 5, 9}; - v16i8 dst_mask = {0, 2, 16, 4, 6, 18, 8, 10, 20, 12, 14, 22, 0, 0, 0, 0}; - v4u32 const_0x1C71 = (v4u32)__msa_fill_w(0x1C71); - v4u32 const_0x2AAA = (v4u32)__msa_fill_w(0x2AAA); - - assert((dst_width % 3 == 0) && (dst_width > 0)); - width = dst_width / 3; - - for (x = 0; x < width; x += 4) { - src0 = (v16u8)__msa_ld_b((v16i8*)s, 0); - src1 = (v16u8)__msa_ld_b((v16i8*)s, 16); - src2 = (v16u8)__msa_ld_b((v16i8*)t0, 0); - src3 = (v16u8)__msa_ld_b((v16i8*)t0, 16); - src4 = (v16u8)__msa_ld_b((v16i8*)t1, 0); - src5 = (v16u8)__msa_ld_b((v16i8*)t1, 16); - vec0 = (v8u16)__msa_ilvr_b((v16i8)src2, (v16i8)src0); - vec1 = (v8u16)__msa_ilvl_b((v16i8)src2, (v16i8)src0); - vec2 = (v8u16)__msa_ilvr_b((v16i8)src3, (v16i8)src1); - vec3 = (v8u16)__msa_ilvl_b((v16i8)src3, (v16i8)src1); - vec4 = (v8u16)__msa_ilvr_b((v16i8)zero, (v16i8)src4); - vec5 = (v8u16)__msa_ilvl_b((v16i8)zero, (v16i8)src4); - vec6 = (v8u16)__msa_ilvr_b((v16i8)zero, (v16i8)src5); - vec7 = (v8u16)__msa_ilvl_b((v16i8)zero, (v16i8)src5); - vec0 = __msa_hadd_u_h((v16u8)vec0, (v16u8)vec0); - vec1 = __msa_hadd_u_h((v16u8)vec1, (v16u8)vec1); - vec2 = __msa_hadd_u_h((v16u8)vec2, (v16u8)vec2); - vec3 = __msa_hadd_u_h((v16u8)vec3, (v16u8)vec3); - vec0 += __msa_hadd_u_h((v16u8)vec4, (v16u8)vec4); - vec1 += __msa_hadd_u_h((v16u8)vec5, (v16u8)vec5); - vec2 += __msa_hadd_u_h((v16u8)vec6, (v16u8)vec6); - vec3 += __msa_hadd_u_h((v16u8)vec7, (v16u8)vec7); - vec4 = (v8u16)__msa_vshf_h(mask, (v8i16)zero, (v8i16)vec0); - vec5 = (v8u16)__msa_vshf_h(mask, (v8i16)zero, (v8i16)vec1); - vec6 = (v8u16)__msa_vshf_h(mask, (v8i16)zero, (v8i16)vec2); - vec7 = (v8u16)__msa_vshf_h(mask, (v8i16)zero, (v8i16)vec3); - vec0 = (v8u16)__msa_pckod_w((v4i32)vec1, (v4i32)vec0); - vec1 = (v8u16)__msa_pckod_w((v4i32)vec3, (v4i32)vec2); - vec0 = (v8u16)__msa_pckod_w((v4i32)vec1, (v4i32)vec0); - tmp0 = __msa_hadd_u_w(vec4, vec4); - tmp1 = __msa_hadd_u_w(vec5, vec5); - tmp2 = __msa_hadd_u_w(vec6, vec6); - tmp3 = __msa_hadd_u_w(vec7, vec7); - tmp4 = __msa_hadd_u_w(vec0, vec0); - vec0 = (v8u16)__msa_pckev_h((v8i16)tmp1, (v8i16)tmp0); - vec1 = (v8u16)__msa_pckev_h((v8i16)tmp3, (v8i16)tmp2); - tmp0 = __msa_hadd_u_w(vec0, vec0); - tmp1 = __msa_hadd_u_w(vec1, vec1); - tmp0 *= const_0x1C71; - tmp1 *= const_0x1C71; - tmp4 *= const_0x2AAA; - tmp0 = (v4u32)__msa_srai_w((v4i32)tmp0, 16); - tmp1 = (v4u32)__msa_srai_w((v4i32)tmp1, 16); - tmp4 = (v4u32)__msa_srai_w((v4i32)tmp4, 16); - vec0 = (v8u16)__msa_pckev_h((v8i16)tmp1, (v8i16)tmp0); - vec1 = (v8u16)__msa_pckev_h((v8i16)tmp4, (v8i16)tmp4); - out = (v16u8)__msa_vshf_b(dst_mask, (v16i8)vec1, (v16i8)vec0); - dst0 = __msa_copy_u_d((v2i64)out, 0); - dst1 = __msa_copy_u_w((v4i32)out, 2); - SD(dst0, dst_ptr); - SW(dst1, dst_ptr + 8); - s += 32; - t0 += 32; - t1 += 32; - dst_ptr += 12; - } -} - -void ScaleAddRow_MSA(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) { - int x; - v16u8 src0; - v8u16 dst0, dst1; - v16i8 zero = {0}; - - assert(src_width > 0); - - for (x = 0; x < src_width; x += 16) { - src0 = LD_UB(src_ptr); - dst0 = (v8u16)__msa_ld_h((v8i16*)dst_ptr, 0); - dst1 = (v8u16)__msa_ld_h((v8i16*)dst_ptr, 16); - dst0 += (v8u16)__msa_ilvr_b(zero, (v16i8)src0); - dst1 += (v8u16)__msa_ilvl_b(zero, (v16i8)src0); - ST_UH2(dst0, dst1, dst_ptr, 8); - src_ptr += 16; - dst_ptr += 16; - } -} - -void ScaleFilterCols_MSA(uint8_t* dst_ptr, - const uint8_t* src_ptr, - int dst_width, - int x, - int dx) { - int j; - v4i32 vec_x = __msa_fill_w(x); - v4i32 vec_dx = __msa_fill_w(dx); - v4i32 vec_const = {0, 1, 2, 3}; - v4i32 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9; - v4i32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; - v8u16 reg0, reg1; - v16u8 dst0; - v4i32 const_0xFFFF = __msa_fill_w(0xFFFF); - v4i32 const_0x40 = __msa_fill_w(0x40); - - vec0 = vec_dx * vec_const; - vec1 = vec_dx * 4; - vec_x += vec0; - - for (j = 0; j < dst_width - 1; j += 16) { - vec2 = vec_x >> 16; - vec6 = vec_x & const_0xFFFF; - vec_x += vec1; - vec3 = vec_x >> 16; - vec7 = vec_x & const_0xFFFF; - vec_x += vec1; - vec4 = vec_x >> 16; - vec8 = vec_x & const_0xFFFF; - vec_x += vec1; - vec5 = vec_x >> 16; - vec9 = vec_x & const_0xFFFF; - vec_x += vec1; - vec6 >>= 9; - vec7 >>= 9; - vec8 >>= 9; - vec9 >>= 9; - LOAD_INDEXED_DATA(src_ptr, vec2, tmp0); - LOAD_INDEXED_DATA(src_ptr, vec3, tmp1); - LOAD_INDEXED_DATA(src_ptr, vec4, tmp2); - LOAD_INDEXED_DATA(src_ptr, vec5, tmp3); - vec2 += 1; - vec3 += 1; - vec4 += 1; - vec5 += 1; - LOAD_INDEXED_DATA(src_ptr, vec2, tmp4); - LOAD_INDEXED_DATA(src_ptr, vec3, tmp5); - LOAD_INDEXED_DATA(src_ptr, vec4, tmp6); - LOAD_INDEXED_DATA(src_ptr, vec5, tmp7); - tmp4 -= tmp0; - tmp5 -= tmp1; - tmp6 -= tmp2; - tmp7 -= tmp3; - tmp4 *= vec6; - tmp5 *= vec7; - tmp6 *= vec8; - tmp7 *= vec9; - tmp4 += const_0x40; - tmp5 += const_0x40; - tmp6 += const_0x40; - tmp7 += const_0x40; - tmp4 >>= 7; - tmp5 >>= 7; - tmp6 >>= 7; - tmp7 >>= 7; - tmp0 += tmp4; - tmp1 += tmp5; - tmp2 += tmp6; - tmp3 += tmp7; - reg0 = (v8u16)__msa_pckev_h((v8i16)tmp1, (v8i16)tmp0); - reg1 = (v8u16)__msa_pckev_h((v8i16)tmp3, (v8i16)tmp2); - dst0 = (v16u8)__msa_pckev_b((v16i8)reg1, (v16i8)reg0); - __msa_st_b(dst0, dst_ptr, 0); - dst_ptr += 16; - } -} - -void ScaleARGBCols_MSA(uint8_t* dst_argb, - const uint8_t* src_argb, - int dst_width, - int x, - int dx) { - const uint32_t* src = (const uint32_t*)(src_argb); - uint32_t* dst = (uint32_t*)(dst_argb); - int j; - v4i32 x_vec = __msa_fill_w(x); - v4i32 dx_vec = __msa_fill_w(dx); - v4i32 const_vec = {0, 1, 2, 3}; - v4i32 vec0, vec1, vec2; - v4i32 dst0; - - vec0 = dx_vec * const_vec; - vec1 = dx_vec * 4; - x_vec += vec0; - - for (j = 0; j < dst_width; j += 4) { - vec2 = x_vec >> 16; - x_vec += vec1; - LOAD_INDEXED_DATA(src, vec2, dst0); - __msa_st_w(dst0, dst, 0); - dst += 4; - } -} - -void ScaleARGBFilterCols_MSA(uint8_t* dst_argb, - const uint8_t* src_argb, - int dst_width, - int x, - int dx) { - const uint32_t* src = (const uint32_t*)(src_argb); - int j; - v4u32 src0, src1, src2, src3; - v4u32 vec0, vec1, vec2, vec3; - v16u8 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7; - v16u8 mult0, mult1, mult2, mult3; - v8u16 tmp0, tmp1, tmp2, tmp3; - v16u8 dst0, dst1; - v4u32 vec_x = (v4u32)__msa_fill_w(x); - v4u32 vec_dx = (v4u32)__msa_fill_w(dx); - v4u32 vec_const = {0, 1, 2, 3}; - v16u8 const_0x7f = (v16u8)__msa_fill_b(0x7f); - - vec0 = vec_dx * vec_const; - vec1 = vec_dx * 4; - vec_x += vec0; - - for (j = 0; j < dst_width - 1; j += 8) { - vec2 = vec_x >> 16; - reg0 = (v16u8)(vec_x >> 9); - vec_x += vec1; - vec3 = vec_x >> 16; - reg1 = (v16u8)(vec_x >> 9); - vec_x += vec1; - reg0 = reg0 & const_0x7f; - reg1 = reg1 & const_0x7f; - reg0 = (v16u8)__msa_shf_b((v16i8)reg0, 0); - reg1 = (v16u8)__msa_shf_b((v16i8)reg1, 0); - reg2 = reg0 ^ const_0x7f; - reg3 = reg1 ^ const_0x7f; - mult0 = (v16u8)__msa_ilvr_b((v16i8)reg0, (v16i8)reg2); - mult1 = (v16u8)__msa_ilvl_b((v16i8)reg0, (v16i8)reg2); - mult2 = (v16u8)__msa_ilvr_b((v16i8)reg1, (v16i8)reg3); - mult3 = (v16u8)__msa_ilvl_b((v16i8)reg1, (v16i8)reg3); - LOAD_INDEXED_DATA(src, vec2, src0); - LOAD_INDEXED_DATA(src, vec3, src1); - vec2 += 1; - vec3 += 1; - LOAD_INDEXED_DATA(src, vec2, src2); - LOAD_INDEXED_DATA(src, vec3, src3); - reg4 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src0); - reg5 = (v16u8)__msa_ilvl_b((v16i8)src2, (v16i8)src0); - reg6 = (v16u8)__msa_ilvr_b((v16i8)src3, (v16i8)src1); - reg7 = (v16u8)__msa_ilvl_b((v16i8)src3, (v16i8)src1); - tmp0 = __msa_dotp_u_h(reg4, mult0); - tmp1 = __msa_dotp_u_h(reg5, mult1); - tmp2 = __msa_dotp_u_h(reg6, mult2); - tmp3 = __msa_dotp_u_h(reg7, mult3); - tmp0 >>= 7; - tmp1 >>= 7; - tmp2 >>= 7; - tmp3 >>= 7; - dst0 = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0); - dst1 = (v16u8)__msa_pckev_b((v16i8)tmp3, (v16i8)tmp2); - __msa_st_b(dst0, dst_argb, 0); - __msa_st_b(dst1, dst_argb, 16); - dst_argb += 32; - } -} - -void ScaleRowDown34_MSA(const uint8_t* src_ptr, - ptrdiff_t src_stride, - uint8_t* dst, - int dst_width) { - int x; - (void)src_stride; - v16u8 src0, src1, src2, src3; - v16u8 vec0, vec1, vec2; - v16i8 mask0 = {0, 1, 3, 4, 5, 7, 8, 9, 11, 12, 13, 15, 16, 17, 19, 20}; - v16i8 mask1 = {5, 7, 8, 9, 11, 12, 13, 15, 16, 17, 19, 20, 21, 23, 24, 25}; - v16i8 mask2 = {11, 12, 13, 15, 16, 17, 19, 20, - 21, 23, 24, 25, 27, 28, 29, 31}; - - assert((dst_width % 3 == 0) && (dst_width > 0)); - - for (x = 0; x < dst_width; x += 48) { - src0 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 0); - src1 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 16); - src2 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 32); - src3 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 48); - vec0 = (v16u8)__msa_vshf_b(mask0, (v16i8)src1, (v16i8)src0); - vec1 = (v16u8)__msa_vshf_b(mask1, (v16i8)src2, (v16i8)src1); - vec2 = (v16u8)__msa_vshf_b(mask2, (v16i8)src3, (v16i8)src2); - __msa_st_b((v16i8)vec0, dst, 0); - __msa_st_b((v16i8)vec1, dst, 16); - __msa_st_b((v16i8)vec2, dst, 32); - src_ptr += 64; - dst += 48; - } -} - -void ScaleRowDown34_0_Box_MSA(const uint8_t* src_ptr, - ptrdiff_t src_stride, - uint8_t* d, - int dst_width) { - const uint8_t* s = src_ptr; - const uint8_t* t = src_ptr + src_stride; - int x; - v16u8 src0, src1, src2, src3, src4, src5, src6, src7, dst0, dst1, dst2; - v16u8 vec0, vec1, vec2, vec3, vec4, vec5; - v16u8 vec6, vec7, vec8, vec9, vec10, vec11; - v8i16 reg0, reg1, reg2, reg3, reg4, reg5; - v8i16 reg6, reg7, reg8, reg9, reg10, reg11; - v16u8 const0 = {3, 1, 1, 1, 1, 3, 3, 1, 1, 1, 1, 3, 3, 1, 1, 1}; - v16u8 const1 = {1, 3, 3, 1, 1, 1, 1, 3, 3, 1, 1, 1, 1, 3, 3, 1}; - v16u8 const2 = {1, 1, 1, 3, 3, 1, 1, 1, 1, 3, 3, 1, 1, 1, 1, 3}; - v16i8 mask0 = {0, 1, 1, 2, 2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10}; - v16i8 mask1 = {10, 11, 12, 13, 13, 14, 14, 15, - 16, 17, 17, 18, 18, 19, 20, 21}; - v16i8 mask2 = {5, 6, 6, 7, 8, 9, 9, 10, 10, 11, 12, 13, 13, 14, 14, 15}; - v8i16 shft0 = {2, 1, 2, 2, 1, 2, 2, 1}; - v8i16 shft1 = {2, 2, 1, 2, 2, 1, 2, 2}; - v8i16 shft2 = {1, 2, 2, 1, 2, 2, 1, 2}; - - assert((dst_width % 3 == 0) && (dst_width > 0)); - - for (x = 0; x < dst_width; x += 48) { - src0 = (v16u8)__msa_ld_b((v16i8*)s, 0); - src1 = (v16u8)__msa_ld_b((v16i8*)s, 16); - src2 = (v16u8)__msa_ld_b((v16i8*)s, 32); - src3 = (v16u8)__msa_ld_b((v16i8*)s, 48); - src4 = (v16u8)__msa_ld_b((v16i8*)t, 0); - src5 = (v16u8)__msa_ld_b((v16i8*)t, 16); - src6 = (v16u8)__msa_ld_b((v16i8*)t, 32); - src7 = (v16u8)__msa_ld_b((v16i8*)t, 48); - vec0 = (v16u8)__msa_vshf_b(mask0, (v16i8)src0, (v16i8)src0); - vec1 = (v16u8)__msa_vshf_b(mask1, (v16i8)src1, (v16i8)src0); - vec2 = (v16u8)__msa_vshf_b(mask2, (v16i8)src1, (v16i8)src1); - vec3 = (v16u8)__msa_vshf_b(mask0, (v16i8)src2, (v16i8)src2); - vec4 = (v16u8)__msa_vshf_b(mask1, (v16i8)src3, (v16i8)src2); - vec5 = (v16u8)__msa_vshf_b(mask2, (v16i8)src3, (v16i8)src3); - vec6 = (v16u8)__msa_vshf_b(mask0, (v16i8)src4, (v16i8)src4); - vec7 = (v16u8)__msa_vshf_b(mask1, (v16i8)src5, (v16i8)src4); - vec8 = (v16u8)__msa_vshf_b(mask2, (v16i8)src5, (v16i8)src5); - vec9 = (v16u8)__msa_vshf_b(mask0, (v16i8)src6, (v16i8)src6); - vec10 = (v16u8)__msa_vshf_b(mask1, (v16i8)src7, (v16i8)src6); - vec11 = (v16u8)__msa_vshf_b(mask2, (v16i8)src7, (v16i8)src7); - reg0 = (v8i16)__msa_dotp_u_h(vec0, const0); - reg1 = (v8i16)__msa_dotp_u_h(vec1, const1); - reg2 = (v8i16)__msa_dotp_u_h(vec2, const2); - reg3 = (v8i16)__msa_dotp_u_h(vec3, const0); - reg4 = (v8i16)__msa_dotp_u_h(vec4, const1); - reg5 = (v8i16)__msa_dotp_u_h(vec5, const2); - reg6 = (v8i16)__msa_dotp_u_h(vec6, const0); - reg7 = (v8i16)__msa_dotp_u_h(vec7, const1); - reg8 = (v8i16)__msa_dotp_u_h(vec8, const2); - reg9 = (v8i16)__msa_dotp_u_h(vec9, const0); - reg10 = (v8i16)__msa_dotp_u_h(vec10, const1); - reg11 = (v8i16)__msa_dotp_u_h(vec11, const2); - reg0 = __msa_srar_h(reg0, shft0); - reg1 = __msa_srar_h(reg1, shft1); - reg2 = __msa_srar_h(reg2, shft2); - reg3 = __msa_srar_h(reg3, shft0); - reg4 = __msa_srar_h(reg4, shft1); - reg5 = __msa_srar_h(reg5, shft2); - reg6 = __msa_srar_h(reg6, shft0); - reg7 = __msa_srar_h(reg7, shft1); - reg8 = __msa_srar_h(reg8, shft2); - reg9 = __msa_srar_h(reg9, shft0); - reg10 = __msa_srar_h(reg10, shft1); - reg11 = __msa_srar_h(reg11, shft2); - reg0 = reg0 * 3 + reg6; - reg1 = reg1 * 3 + reg7; - reg2 = reg2 * 3 + reg8; - reg3 = reg3 * 3 + reg9; - reg4 = reg4 * 3 + reg10; - reg5 = reg5 * 3 + reg11; - reg0 = __msa_srari_h(reg0, 2); - reg1 = __msa_srari_h(reg1, 2); - reg2 = __msa_srari_h(reg2, 2); - reg3 = __msa_srari_h(reg3, 2); - reg4 = __msa_srari_h(reg4, 2); - reg5 = __msa_srari_h(reg5, 2); - dst0 = (v16u8)__msa_pckev_b((v16i8)reg1, (v16i8)reg0); - dst1 = (v16u8)__msa_pckev_b((v16i8)reg3, (v16i8)reg2); - dst2 = (v16u8)__msa_pckev_b((v16i8)reg5, (v16i8)reg4); - __msa_st_b((v16i8)dst0, d, 0); - __msa_st_b((v16i8)dst1, d, 16); - __msa_st_b((v16i8)dst2, d, 32); - s += 64; - t += 64; - d += 48; - } -} - -void ScaleRowDown34_1_Box_MSA(const uint8_t* src_ptr, - ptrdiff_t src_stride, - uint8_t* d, - int dst_width) { - const uint8_t* s = src_ptr; - const uint8_t* t = src_ptr + src_stride; - int x; - v16u8 src0, src1, src2, src3, src4, src5, src6, src7, dst0, dst1, dst2; - v16u8 vec0, vec1, vec2, vec3, vec4, vec5; - v16u8 vec6, vec7, vec8, vec9, vec10, vec11; - v8i16 reg0, reg1, reg2, reg3, reg4, reg5; - v8i16 reg6, reg7, reg8, reg9, reg10, reg11; - v16u8 const0 = {3, 1, 1, 1, 1, 3, 3, 1, 1, 1, 1, 3, 3, 1, 1, 1}; - v16u8 const1 = {1, 3, 3, 1, 1, 1, 1, 3, 3, 1, 1, 1, 1, 3, 3, 1}; - v16u8 const2 = {1, 1, 1, 3, 3, 1, 1, 1, 1, 3, 3, 1, 1, 1, 1, 3}; - v16i8 mask0 = {0, 1, 1, 2, 2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10}; - v16i8 mask1 = {10, 11, 12, 13, 13, 14, 14, 15, - 16, 17, 17, 18, 18, 19, 20, 21}; - v16i8 mask2 = {5, 6, 6, 7, 8, 9, 9, 10, 10, 11, 12, 13, 13, 14, 14, 15}; - v8i16 shft0 = {2, 1, 2, 2, 1, 2, 2, 1}; - v8i16 shft1 = {2, 2, 1, 2, 2, 1, 2, 2}; - v8i16 shft2 = {1, 2, 2, 1, 2, 2, 1, 2}; - - assert((dst_width % 3 == 0) && (dst_width > 0)); - - for (x = 0; x < dst_width; x += 48) { - src0 = (v16u8)__msa_ld_b((v16i8*)s, 0); - src1 = (v16u8)__msa_ld_b((v16i8*)s, 16); - src2 = (v16u8)__msa_ld_b((v16i8*)s, 32); - src3 = (v16u8)__msa_ld_b((v16i8*)s, 48); - src4 = (v16u8)__msa_ld_b((v16i8*)t, 0); - src5 = (v16u8)__msa_ld_b((v16i8*)t, 16); - src6 = (v16u8)__msa_ld_b((v16i8*)t, 32); - src7 = (v16u8)__msa_ld_b((v16i8*)t, 48); - vec0 = (v16u8)__msa_vshf_b(mask0, (v16i8)src0, (v16i8)src0); - vec1 = (v16u8)__msa_vshf_b(mask1, (v16i8)src1, (v16i8)src0); - vec2 = (v16u8)__msa_vshf_b(mask2, (v16i8)src1, (v16i8)src1); - vec3 = (v16u8)__msa_vshf_b(mask0, (v16i8)src2, (v16i8)src2); - vec4 = (v16u8)__msa_vshf_b(mask1, (v16i8)src3, (v16i8)src2); - vec5 = (v16u8)__msa_vshf_b(mask2, (v16i8)src3, (v16i8)src3); - vec6 = (v16u8)__msa_vshf_b(mask0, (v16i8)src4, (v16i8)src4); - vec7 = (v16u8)__msa_vshf_b(mask1, (v16i8)src5, (v16i8)src4); - vec8 = (v16u8)__msa_vshf_b(mask2, (v16i8)src5, (v16i8)src5); - vec9 = (v16u8)__msa_vshf_b(mask0, (v16i8)src6, (v16i8)src6); - vec10 = (v16u8)__msa_vshf_b(mask1, (v16i8)src7, (v16i8)src6); - vec11 = (v16u8)__msa_vshf_b(mask2, (v16i8)src7, (v16i8)src7); - reg0 = (v8i16)__msa_dotp_u_h(vec0, const0); - reg1 = (v8i16)__msa_dotp_u_h(vec1, const1); - reg2 = (v8i16)__msa_dotp_u_h(vec2, const2); - reg3 = (v8i16)__msa_dotp_u_h(vec3, const0); - reg4 = (v8i16)__msa_dotp_u_h(vec4, const1); - reg5 = (v8i16)__msa_dotp_u_h(vec5, const2); - reg6 = (v8i16)__msa_dotp_u_h(vec6, const0); - reg7 = (v8i16)__msa_dotp_u_h(vec7, const1); - reg8 = (v8i16)__msa_dotp_u_h(vec8, const2); - reg9 = (v8i16)__msa_dotp_u_h(vec9, const0); - reg10 = (v8i16)__msa_dotp_u_h(vec10, const1); - reg11 = (v8i16)__msa_dotp_u_h(vec11, const2); - reg0 = __msa_srar_h(reg0, shft0); - reg1 = __msa_srar_h(reg1, shft1); - reg2 = __msa_srar_h(reg2, shft2); - reg3 = __msa_srar_h(reg3, shft0); - reg4 = __msa_srar_h(reg4, shft1); - reg5 = __msa_srar_h(reg5, shft2); - reg6 = __msa_srar_h(reg6, shft0); - reg7 = __msa_srar_h(reg7, shft1); - reg8 = __msa_srar_h(reg8, shft2); - reg9 = __msa_srar_h(reg9, shft0); - reg10 = __msa_srar_h(reg10, shft1); - reg11 = __msa_srar_h(reg11, shft2); - reg0 += reg6; - reg1 += reg7; - reg2 += reg8; - reg3 += reg9; - reg4 += reg10; - reg5 += reg11; - reg0 = __msa_srari_h(reg0, 1); - reg1 = __msa_srari_h(reg1, 1); - reg2 = __msa_srari_h(reg2, 1); - reg3 = __msa_srari_h(reg3, 1); - reg4 = __msa_srari_h(reg4, 1); - reg5 = __msa_srari_h(reg5, 1); - dst0 = (v16u8)__msa_pckev_b((v16i8)reg1, (v16i8)reg0); - dst1 = (v16u8)__msa_pckev_b((v16i8)reg3, (v16i8)reg2); - dst2 = (v16u8)__msa_pckev_b((v16i8)reg5, (v16i8)reg4); - __msa_st_b((v16i8)dst0, d, 0); - __msa_st_b((v16i8)dst1, d, 16); - __msa_st_b((v16i8)dst2, d, 32); - s += 64; - t += 64; - d += 48; - } -} - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif - -#endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) diff --git a/source/scale_uv.cc b/source/scale_uv.cc index 700d1b2b6..3d41a2398 100644 --- a/source/scale_uv.cc +++ b/source/scale_uv.cc @@ -155,23 +155,6 @@ static void ScaleUVDown2(int src_width, } #endif -#if defined(HAS_SCALEUVROWDOWN2_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ScaleUVRowDown2 = - filtering == kFilterNone - ? ScaleUVRowDown2_Any_MSA - : (filtering == kFilterLinear ? ScaleUVRowDown2Linear_Any_MSA - : ScaleUVRowDown2Box_Any_MSA); - if (IS_ALIGNED(dst_width, 2)) { - ScaleUVRowDown2 = - filtering == kFilterNone - ? ScaleUVRowDown2_MSA - : (filtering == kFilterLinear ? ScaleUVRowDown2Linear_MSA - : ScaleUVRowDown2Box_MSA); - } - } -#endif - if (filtering == kFilterLinear) { src_stride = 0; } @@ -321,16 +304,6 @@ static void ScaleUVDownEven(int src_width, } } #endif -#if defined(HAS_SCALEUVROWDOWNEVEN_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ScaleUVRowDownEven = - filtering ? ScaleUVRowDownEvenBox_Any_MSA : ScaleUVRowDownEven_Any_MSA; - if (IS_ALIGNED(dst_width, 4)) { - ScaleUVRowDownEven = - filtering ? ScaleUVRowDownEvenBox_MSA : ScaleUVRowDownEven_MSA; - } - } -#endif #if defined(HAS_SCALEUVROWDOWNEVEN_RVV) || defined(HAS_SCALEUVROWDOWN4_RVV) if (TestCpuFlag(kCpuHasRVV) && !filtering) { #if defined(HAS_SCALEUVROWDOWNEVEN_RVV) @@ -419,14 +392,6 @@ static int ScaleUVBilinearDown(int src_width, InterpolateRow = InterpolateRow_SME; } #endif -#if defined(HAS_INTERPOLATEROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - InterpolateRow = InterpolateRow_Any_MSA; - if (IS_ALIGNED(clip_src_width, 32)) { - InterpolateRow = InterpolateRow_MSA; - } - } -#endif #if defined(HAS_INTERPOLATEROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { InterpolateRow = InterpolateRow_Any_LSX; @@ -452,14 +417,6 @@ static int ScaleUVBilinearDown(int src_width, ScaleUVFilterCols = ScaleUVFilterCols_NEON; } } -#endif -#if defined(HAS_SCALEUVFILTERCOLS_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ScaleUVFilterCols = ScaleUVFilterCols_Any_MSA; - if (IS_ALIGNED(dst_width, 8)) { - ScaleUVFilterCols = ScaleUVFilterCols_MSA; - } - } #endif // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear. // Allocate a row of UV. @@ -545,14 +502,6 @@ static int ScaleUVBilinearUp(int src_width, InterpolateRow = InterpolateRow_SME; } #endif -#if defined(HAS_INTERPOLATEROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - InterpolateRow = InterpolateRow_Any_MSA; - if (IS_ALIGNED(dst_width, 16)) { - InterpolateRow = InterpolateRow_MSA; - } - } -#endif #if defined(HAS_INTERPOLATEROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { InterpolateRow = InterpolateRow_Any_LSX; @@ -582,14 +531,6 @@ static int ScaleUVBilinearUp(int src_width, } } #endif -#if defined(HAS_SCALEUVFILTERCOLS_MSA) - if (filtering && TestCpuFlag(kCpuHasMSA)) { - ScaleUVFilterCols = ScaleUVFilterCols_Any_MSA; - if (IS_ALIGNED(dst_width, 16)) { - ScaleUVFilterCols = ScaleUVFilterCols_MSA; - } - } -#endif #if defined(HAS_SCALEUVCOLS_SSSE3) if (!filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { ScaleUVFilterCols = ScaleUVCols_SSSE3; @@ -602,14 +543,6 @@ static int ScaleUVBilinearUp(int src_width, ScaleUVFilterCols = ScaleUVCols_NEON; } } -#endif -#if defined(HAS_SCALEUVCOLS_MSA) - if (!filtering && TestCpuFlag(kCpuHasMSA)) { - ScaleUVFilterCols = ScaleUVCols_Any_MSA; - if (IS_ALIGNED(dst_width, 8)) { - ScaleUVFilterCols = ScaleUVCols_MSA; - } - } #endif if (!filtering && src_width * 2 == dst_width && x < 0x8000) { ScaleUVFilterCols = ScaleUVColsUp2_C; @@ -944,14 +877,6 @@ static void ScaleUVSimple(int src_width, ScaleUVCols = ScaleUVCols_NEON; } } -#endif -#if defined(HAS_SCALEUVCOLS_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ScaleUVCols = ScaleUVCols_Any_MSA; - if (IS_ALIGNED(dst_width, 4)) { - ScaleUVCols = ScaleUVCols_MSA; - } - } #endif if (src_width * 2 == dst_width && x < 0x8000) { ScaleUVCols = ScaleUVColsUp2_C; diff --git a/unit_test/cpu_test.cc b/unit_test/cpu_test.cc index d597b6f0b..8feaeb9e6 100644 --- a/unit_test/cpu_test.cc +++ b/unit_test/cpu_test.cc @@ -151,15 +151,6 @@ TEST_F(LibYUVBaseTest, TestCpuHas) { } #endif // defined(__riscv) -#if defined(__mips__) - int has_mips = TestCpuFlag(kCpuHasMIPS); - if (has_mips) { - int has_msa = TestCpuFlag(kCpuHasMSA); - printf("Has MIPS 0x%x\n", has_mips); - printf("Has MSA 0x%x\n", has_msa); - } -#endif // defined(__mips__) - #if defined(__loongarch__) int has_loongarch = TestCpuFlag(kCpuHasLOONGARCH); if (has_loongarch) { @@ -291,18 +282,6 @@ TEST_F(LibYUVBaseTest, TestCompilerMacros) { #ifdef __llvm__ printf("__llvm__ %d\n", __llvm__); #endif -#ifdef __mips_msa - printf("__mips_msa %d\n", __mips_msa); -#endif -#ifdef __mips - printf("__mips %d\n", __mips); -#endif -#ifdef __mips_isa_rev - printf("__mips_isa_rev %d\n", __mips_isa_rev); -#endif -#ifdef _MIPS_ARCH_LOONGSON3A - printf("_MIPS_ARCH_LOONGSON3A %d\n", _MIPS_ARCH_LOONGSON3A); -#endif #ifdef __loongarch__ printf("__loongarch__ %d\n", __loongarch__); #endif @@ -388,19 +367,6 @@ TEST_F(LibYUVBaseTest, TestLinuxAArch64) { } #endif -TEST_F(LibYUVBaseTest, DISABLED_TestLinuxMipsMsa) { - if (FileExists("../../unit_test/testdata/mips.txt")) { - printf("Note: testing to load \"../../unit_test/testdata/mips.txt\"\n"); - - EXPECT_EQ(0, MipsCpuCaps("../../unit_test/testdata/mips.txt")); - EXPECT_EQ(kCpuHasMSA, MipsCpuCaps("../../unit_test/testdata/mips_msa.txt")); - EXPECT_EQ(kCpuHasMSA, - MipsCpuCaps("../../unit_test/testdata/mips_loongson2k.txt")); - } else { - printf("WARNING: unable to load \"../../unit_test/testdata/mips.txt\"\n"); - } -} - TEST_F(LibYUVBaseTest, DISABLED_TestLinuxRVV) { if (FileExists("../../unit_test/testdata/riscv64.txt")) { printf("Note: testing to load \"../../unit_test/testdata/riscv64.txt\"\n"); diff --git a/unit_test/testdata/mips_msa.txt b/unit_test/testdata/mips_msa.txt deleted file mode 100644 index ac9306152..000000000 --- a/unit_test/testdata/mips_msa.txt +++ /dev/null @@ -1,7 +0,0 @@ -system type : generic-loongson-machine -machine : loongson,generic -processor : 0 - -isa : mips1 mips2 mips3 mips4 mips5 mips32r1 mips32r2 mips64r1 mips64r2 -ASEs implemented : vz msa -shadow register sets : 1 diff --git a/unit_test/unit_test.cc b/unit_test/unit_test.cc index d917f5343..163e3ffdb 100644 --- a/unit_test/unit_test.cc +++ b/unit_test/unit_test.cc @@ -90,17 +90,10 @@ static int TestCpuEnv(int cpu_info) { cpu_info &= ~libyuv::kCpuHasSME; } #endif -#if defined(__mips__) && defined(__linux__) - if (TestEnv("LIBYUV_DISABLE_MSA")) { - cpu_info &= ~libyuv::kCpuHasMSA; - } -#endif #if defined(__longarch__) && defined(__linux__) if (TestEnv("LIBYUV_DISABLE_LSX")) { cpu_info &= ~libyuv::kCpuHasLSX; } -#endif -#if defined(__longarch__) && defined(__linux__) if (TestEnv("LIBYUV_DISABLE_LASX")) { cpu_info &= ~libyuv::kCpuHasLASX; } diff --git a/util/cpuid.c b/util/cpuid.c index dda87b84b..68bdbb142 100644 --- a/util/cpuid.c +++ b/util/cpuid.c @@ -113,15 +113,6 @@ int main(int argc, const char* argv[]) { } #endif // defined(__riscv) -#if defined(__mips__) - int has_mips = TestCpuFlag(kCpuHasMIPS); - if (has_mips) { - int has_msa = TestCpuFlag(kCpuHasMSA); - printf("Has MIPS 0x%x\n", has_mips); - printf("Has MSA 0x%x\n", has_msa); - } -#endif // defined(__mips__) - #if defined(__loongarch__) int has_loongarch = TestCpuFlag(kCpuHasLOONGARCH); if (has_loongarch) {