Remove Mips DSPR2 code

Bug: libyuv:765
Test: build for mips still passes
Change-Id: I99105ad3951d2210c0793e3b9241c178442fdc37
Reviewed-on: https://chromium-review.googlesource.com/826404
Reviewed-by: Weiyong Yao <braveyao@chromium.org>
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
Frank Barchard 2017-12-13 17:38:52 -08:00 committed by Commit Bot
parent bb3180ae80
commit 3b81288ece
33 changed files with 13 additions and 3810 deletions

View File

@ -24,14 +24,12 @@ cc_library {
"source/rotate_any.cc",
"source/rotate_argb.cc",
"source/rotate_common.cc",
"source/rotate_dspr2.cc",
"source/rotate_gcc.cc",
"source/rotate_msa.cc",
"source/rotate_neon.cc",
"source/rotate_neon64.cc",
"source/row_any.cc",
"source/row_common.cc",
"source/row_dspr2.cc",
"source/row_gcc.cc",
"source/row_msa.cc",
"source/row_neon.cc",
@ -40,7 +38,6 @@ cc_library {
"source/scale_any.cc",
"source/scale_argb.cc",
"source/scale_common.cc",
"source/scale_dspr2.cc",
"source/scale_gcc.cc",
"source/scale_msa.cc",
"source/scale_neon.cc",

View File

@ -24,14 +24,12 @@ LOCAL_SRC_FILES := \
source/rotate_any.cc \
source/rotate_argb.cc \
source/rotate_common.cc \
source/rotate_dspr2.cc \
source/rotate_gcc.cc \
source/rotate_msa.cc \
source/rotate_neon.cc \
source/rotate_neon64.cc \
source/row_any.cc \
source/row_common.cc \
source/row_dspr2.cc \
source/row_gcc.cc \
source/row_msa.cc \
source/row_neon.cc \
@ -40,7 +38,6 @@ LOCAL_SRC_FILES := \
source/scale_any.cc \
source/scale_argb.cc \
source/scale_common.cc \
source/scale_dspr2.cc \
source/scale_gcc.cc \
source/scale_msa.cc \
source/scale_neon.cc \

View File

@ -110,19 +110,16 @@ static_library("libyuv_internal") {
"source/rotate_any.cc",
"source/rotate_argb.cc",
"source/rotate_common.cc",
"source/rotate_dspr2.cc",
"source/rotate_gcc.cc",
"source/rotate_win.cc",
"source/row_any.cc",
"source/row_common.cc",
"source/row_dspr2.cc",
"source/row_gcc.cc",
"source/row_win.cc",
"source/scale.cc",
"source/scale_any.cc",
"source/scale_argb.cc",
"source/scale_common.cc",
"source/scale_dspr2.cc",
"source/scale_gcc.cc",
"source/scale_win.cc",
"source/video_common.cc",
@ -302,7 +299,6 @@ if (libyuv_include_tests) {
# Enable the following 3 macros to turn off assembly for specified CPU.
# "LIBYUV_DISABLE_X86",
# "LIBYUV_DISABLE_NEON",
# "LIBYUV_DISABLE_DSPR2",
# Enable the following macro to build libyuv as a shared library (dll).
# "LIBYUV_USING_SHARED_LIBRARY"
]

View File

@ -1,12 +1,12 @@
**libyuv** is an open source project that includes YUV scaling and conversion functionality.
* Scale YUV to prepare content for compression, with point, bilinear or box filter.
* Convert to YUV from webcam formats.
* Convert from YUV to formats for rendering/effects.
* Convert to YUV from webcam formats for compression.
* Convert to RGB formats for rendering/effects.
* Rotate by 90/180/270 degrees to adjust for mobile devices in portrait mode.
* Optimized for SSE2/SSSE3/AVX2 on x86/x64.
* Optimized for SSSE3/AVX2 on x86/x64.
* Optimized for Neon on Arm.
* Optimized for DSP R2 on Mips.
* Optimized for MSA on Mips.
### Development

View File

@ -17,7 +17,7 @@ By default the cpu is detected and the most advanced form of SIMD is used. But
LIBYUV_DISABLE_AVX512BW
LIBYUV_DISABLE_ERMS
LIBYUV_DISABLE_FMA3
LIBYUV_DISABLE_DSPR2
LIBYUV_DISABLE_MSA
LIBYUV_DISABLE_NEON
# Test Width/Height/Repeat

View File

@ -129,15 +129,10 @@ ia32
ninja -v -C out/Debug libyuv_unittest
ninja -v -C out/Release libyuv_unittest
mipsel
mips
gn gen out/Release "--args=is_debug=false target_os=\"android\" target_cpu=\"mipsel\" mips_arch_variant=\"r6\" mips_use_msa=true is_component_build=true is_clang=false"
gn gen out/Debug "--args=is_debug=true target_os=\"android\" target_cpu=\"mipsel\" mips_arch_variant=\"r6\" mips_use_msa=true is_component_build=true is_clang=false"
ninja -v -C out/Debug libyuv_unittest
ninja -v -C out/Release libyuv_unittest
gn gen out/Release "--args=is_debug=false target_os=\"android\" target_cpu=\"mips64el\" mips_arch_variant=\"r6\" mips_use_msa=true is_component_build=true is_clang=false"
gn gen out/Debug "--args=is_debug=true target_os=\"android\" target_cpu=\"mips64el\" mips_arch_variant=\"r6\" mips_use_msa=true is_component_build=true is_clang=false"
gn gen out/Release "--args=is_debug=false target_os=\"android\" target_cpu=\"mips64el\" mips_arch_variant=\"r6\" mips_use_msa=true is_component_build=true is_clang=true"
gn gen out/Debug "--args=is_debug=true target_os=\"android\" target_cpu=\"mips64el\" mips_arch_variant=\"r6\" mips_use_msa=true is_component_build=true is_clang=true"
ninja -v -C out/Debug libyuv_unittest
ninja -v -C out/Release libyuv_unittest

View File

@ -47,8 +47,7 @@ static const int kCpuHasAVX512VPOPCNTDQ = 0x100000;
// These flags are only valid on MIPS processors.
static const int kCpuHasMIPS = 0x200000;
static const int kCpuHasDSPR2 = 0x400000;
static const int kCpuHasMSA = 0x800000;
static const int kCpuHasMSA = 0x400000;
// Optional init function. TestCpuFlag does an auto-init.
// Returns cpu_info flags.

View File

@ -54,12 +54,6 @@ extern "C" {
#define HAS_TRANSPOSEUVWX8_NEON
#endif
#if !defined(LIBYUV_DISABLE_DSPR2) && !defined(__native_client__) && \
defined(__mips__) && defined(__mips_dsp) && (__mips_dsp_rev >= 2)
#define HAS_TRANSPOSEWX8_DSPR2
#define HAS_TRANSPOSEUVWX8_DSPR2
#endif // defined(__mips__)
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
#define HAS_TRANSPOSEWX16_MSA
#define HAS_TRANSPOSEUVWX16_MSA
@ -97,16 +91,6 @@ void TransposeWx8_Fast_SSSE3(const uint8* src,
uint8* dst,
int dst_stride,
int width);
void TransposeWx8_DSPR2(const uint8* src,
int src_stride,
uint8* dst,
int dst_stride,
int width);
void TransposeWx8_Fast_DSPR2(const uint8* src,
int src_stride,
uint8* dst,
int dst_stride,
int width);
void TransposeWx16_MSA(const uint8* src,
int src_stride,
uint8* dst,
@ -128,11 +112,6 @@ void TransposeWx8_Fast_Any_SSSE3(const uint8* src,
uint8* dst,
int dst_stride,
int width);
void TransposeWx8_Any_DSPR2(const uint8* src,
int src_stride,
uint8* dst,
int dst_stride,
int width);
void TransposeWx16_Any_MSA(const uint8* src,
int src_stride,
uint8* dst,
@ -176,13 +155,6 @@ void TransposeUVWx8_NEON(const uint8* src,
uint8* dst_b,
int dst_stride_b,
int width);
void TransposeUVWx8_DSPR2(const uint8* src,
int src_stride,
uint8* dst_a,
int dst_stride_a,
uint8* dst_b,
int dst_stride_b,
int width);
void TransposeUVWx16_MSA(const uint8* src,
int src_stride,
uint8* dst_a,
@ -205,13 +177,6 @@ void TransposeUVWx8_Any_NEON(const uint8* src,
uint8* dst_b,
int dst_stride_b,
int width);
void TransposeUVWx8_Any_DSPR2(const uint8* src,
int src_stride,
uint8* dst_a,
int dst_stride_a,
uint8* dst_b,
int dst_stride_b,
int width);
void TransposeUVWx16_Any_MSA(const uint8* src,
int src_stride,
uint8* dst_a,

View File

@ -380,37 +380,6 @@ extern "C" {
#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
#define HAS_SCALESUMSAMPLES_NEON
#endif
// The following are available on Mips platforms:
#if !defined(LIBYUV_DISABLE_DSPR2) && defined(__mips__) && \
(_MIPS_SIM == _MIPS_SIM_ABI32) && (__mips_isa_rev < 6)
#define HAS_COPYROW_MIPS
#if defined(__mips_dsp) && (__mips_dsp_rev >= 2)
#define HAS_I422TOARGBROW_DSPR2
#define HAS_INTERPOLATEROW_DSPR2
#define HAS_MIRRORROW_DSPR2
#define HAS_MIRRORUVROW_DSPR2
#define HAS_SPLITUVROW_DSPR2
#define HAS_RGB24TOARGBROW_DSPR2
#define HAS_RAWTOARGBROW_DSPR2
#define HAS_RGB565TOARGBROW_DSPR2
#define HAS_ARGB1555TOARGBROW_DSPR2
#define HAS_ARGB4444TOARGBROW_DSPR2
#define HAS_I444TOARGBROW_DSPR2
#define HAS_I422TOARGB4444ROW_DSPR2
#define HAS_I422TOARGB1555ROW_DSPR2
#define HAS_NV12TOARGBROW_DSPR2
#define HAS_BGRATOUVROW_DSPR2
#define HAS_BGRATOYROW_DSPR2
#define HAS_ABGRTOUVROW_DSPR2
#define HAS_ARGBTOYROW_DSPR2
#define HAS_ABGRTOYROW_DSPR2
#define HAS_RGBATOUVROW_DSPR2
#define HAS_RGBATOYROW_DSPR2
#define HAS_ARGBTOUVROW_DSPR2
#endif
#endif
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
#define HAS_ABGRTOUVROW_MSA
#define HAS_ABGRTOYROW_MSA
@ -797,29 +766,6 @@ void I444ToARGBRow_MSA(const uint8* src_y,
uint8* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I444ToARGBRow_DSPR2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I422ToARGB4444Row_DSPR2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb4444,
const struct YuvConstants* yuvconstants,
int width);
void I422ToARGB1555Row_DSPR2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb1555,
const struct YuvConstants* yuvconstants,
int width);
void NV12ToARGBRow_DSPR2(const uint8* src_y,
const uint8* src_uv,
uint8* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I422ToARGBRow_MSA(const uint8* src_y,
const uint8* src_u,
@ -1021,30 +967,6 @@ void RGB24ToYRow_MSA(const uint8* src_rgb24, uint8* dst_y, int width);
void RAWToYRow_MSA(const uint8* src_raw, uint8* dst_y, int width);
void RGB565ToYRow_MSA(const uint8* src_rgb565, uint8* dst_y, int width);
void ARGB1555ToYRow_MSA(const uint8* src_argb1555, uint8* dst_y, int width);
void BGRAToUVRow_DSPR2(const uint8* src_bgra,
int src_stride_bgra,
uint8* dst_u,
uint8* dst_v,
int width);
void BGRAToYRow_DSPR2(const uint8* src_bgra, uint8* dst_y, int width);
void ABGRToUVRow_DSPR2(const uint8* src_abgr,
int src_stride_abgr,
uint8* dst_u,
uint8* dst_v,
int width);
void ARGBToYRow_DSPR2(const uint8* src_argb, uint8* dst_y, int width);
void ABGRToYRow_DSPR2(const uint8* src_abgr, uint8* dst_y, int width);
void RGBAToUVRow_DSPR2(const uint8* src_rgba,
int src_stride_rgba,
uint8* dst_u,
uint8* dst_v,
int width);
void RGBAToYRow_DSPR2(const uint8* src_rgba, uint8* dst_y, int width);
void ARGBToUVRow_DSPR2(const uint8* src_argb,
int src_stride_argb,
uint8* dst_u,
uint8* dst_v,
int width);
void ARGBToYRow_C(const uint8* src_argb, uint8* dst_y, int width);
void ARGBToYJRow_C(const uint8* src_argb, uint8* dst_y, int width);
void BGRAToYRow_C(const uint8* src_bgra, uint8* dst_y, int width);
@ -1073,10 +995,6 @@ void RGB565ToYRow_Any_NEON(const uint8* src_rgb565, uint8* dst_y, int width);
void ARGB1555ToYRow_Any_NEON(const uint8* src_argb1555,
uint8* dst_y,
int width);
void BGRAToYRow_Any_DSPR2(const uint8* src_bgra, uint8* dst_y, int width);
void ARGBToYRow_Any_DSPR2(const uint8* src_argb, uint8* dst_y, int width);
void ABGRToYRow_Any_DSPR2(const uint8* src_abgr, uint8* dst_y, int width);
void RGBAToYRow_Any_DSPR2(const uint8* src_rgba, uint8* dst_y, int width);
void ARGB4444ToYRow_Any_NEON(const uint8* src_argb4444,
uint8* dst_y,
int width);
@ -1263,26 +1181,6 @@ void ARGB1555ToUVRow_Any_MSA(const uint8* src_argb1555,
uint8* dst_u,
uint8* dst_v,
int width);
void BGRAToUVRow_Any_DSPR2(const uint8* src_bgra,
int src_stride_bgra,
uint8* dst_u,
uint8* dst_v,
int width);
void ABGRToUVRow_Any_DSPR2(const uint8* src_abgr,
int src_stride_abgr,
uint8* dst_u,
uint8* dst_v,
int width);
void RGBAToUVRow_Any_DSPR2(const uint8* src_rgba,
int src_stride_rgba,
uint8* dst_u,
uint8* dst_v,
int width);
void ARGBToUVRow_Any_DSPR2(const uint8* src_argb,
int src_stride_argb,
uint8* dst_u,
uint8* dst_v,
int width);
void ARGBToUVRow_C(const uint8* src_argb,
int src_stride_argb,
uint8* dst_u,
@ -1361,7 +1259,6 @@ void ARGBToUV444Row_C(const uint8* src_argb,
void MirrorRow_AVX2(const uint8* src, uint8* dst, int width);
void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width);
void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
void MirrorRow_DSPR2(const uint8* src, uint8* dst, int width);
void MirrorRow_MSA(const uint8* src, uint8* dst, int width);
void MirrorRow_C(const uint8* src, uint8* dst, int width);
void MirrorRow_Any_AVX2(const uint8* src, uint8* dst, int width);
@ -1378,10 +1275,6 @@ void MirrorUVRow_NEON(const uint8* src_uv,
uint8* dst_u,
uint8* dst_v,
int width);
void MirrorUVRow_DSPR2(const uint8* src_uv,
uint8* dst_u,
uint8* dst_v,
int width);
void MirrorUVRow_MSA(const uint8* src_uv,
uint8* dst_u,
uint8* dst_v,
@ -1411,10 +1304,6 @@ void SplitUVRow_NEON(const uint8* src_uv,
uint8* dst_u,
uint8* dst_v,
int width);
void SplitUVRow_DSPR2(const uint8* src_uv,
uint8* dst_u,
uint8* dst_v,
int width);
void SplitUVRow_MSA(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width);
void SplitUVRow_Any_SSE2(const uint8* src_uv,
uint8* dst_u,
@ -1428,10 +1317,6 @@ void SplitUVRow_Any_NEON(const uint8* src_uv,
uint8* dst_u,
uint8* dst_v,
int width);
void SplitUVRow_Any_DSPR2(const uint8* src_uv,
uint8* dst_u,
uint8* dst_v,
int width);
void SplitUVRow_Any_MSA(const uint8* src_uv,
uint8* dst_u,
uint8* dst_v,
@ -1707,15 +1592,6 @@ void ARGB1555ToARGBRow_MSA(const uint8* src_argb1555,
void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444,
uint8* dst_argb,
int width);
void RGB24ToARGBRow_DSPR2(const uint8* src_rgb24, uint8* dst_argb, int width);
void RAWToARGBRow_DSPR2(const uint8* src_raw, uint8* dst_argb, int width);
void RGB565ToARGBRow_DSPR2(const uint8* src_rgb565, uint8* dst_argb, int width);
void ARGB1555ToARGBRow_DSPR2(const uint8* src_argb1555,
uint8* dst_argb,
int width);
void ARGB4444ToARGBRow_DSPR2(const uint8* src_argb4444,
uint8* dst_argb,
int width);
void ARGB4444ToARGBRow_MSA(const uint8* src_argb4444,
uint8* dst_argb,
int width);
@ -1773,19 +1649,6 @@ void ARGB1555ToARGBRow_Any_MSA(const uint8* src_argb1555,
void ARGB4444ToARGBRow_Any_NEON(const uint8* src_argb4444,
uint8* dst_argb,
int width);
void RGB24ToARGBRow_Any_DSPR2(const uint8* src_rgb24,
uint8* dst_argb,
int width);
void RAWToARGBRow_Any_DSPR2(const uint8* src_raw, uint8* dst_argb, int width);
void RGB565ToARGBRow_Any_DSPR2(const uint8* src_rgb565,
uint8* dst_argb,
int width);
void ARGB1555ToARGBRow_Any_DSPR2(const uint8* src_argb1555,
uint8* dst_argb,
int width);
void ARGB4444ToARGBRow_Any_DSPR2(const uint8* src_argb4444,
uint8* dst_argb,
int width);
void ARGB4444ToARGBRow_Any_MSA(const uint8* src_argb4444,
uint8* dst_argb,
@ -2543,53 +2406,6 @@ void UYVYToARGBRow_Any_NEON(const uint8* src_uyvy,
uint8* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I444ToARGBRow_Any_DSPR2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I422ToARGB4444Row_Any_DSPR2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I422ToARGBRow_Any_DSPR2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I422ToARGBRow_DSPR2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I422ToARGB1555Row_Any_DSPR2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I411ToARGBRow_Any_DSPR2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void NV12ToARGBRow_Any_DSPR2(const uint8* src_y,
const uint8* src_uv,
uint8* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I422ToARGBRow_DSPR2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I444ToARGBRow_Any_MSA(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
@ -3088,11 +2904,6 @@ void InterpolateRow_NEON(uint8* dst_ptr,
ptrdiff_t src_stride_ptr,
int width,
int source_y_fraction);
void InterpolateRow_DSPR2(uint8* dst_ptr,
const uint8* src_ptr,
ptrdiff_t src_stride_ptr,
int width,
int source_y_fraction);
void InterpolateRow_MSA(uint8* dst_ptr,
const uint8* src_ptr,
ptrdiff_t src_stride_ptr,
@ -3113,11 +2924,6 @@ void InterpolateRow_Any_AVX2(uint8* dst_ptr,
ptrdiff_t src_stride_ptr,
int width,
int source_y_fraction);
void InterpolateRow_Any_DSPR2(uint8* dst_ptr,
const uint8* src_ptr,
ptrdiff_t src_stride_ptr,
int width,
int source_y_fraction);
void InterpolateRow_Any_MSA(uint8* dst_ptr,
const uint8* src_ptr,
ptrdiff_t src_stride_ptr,

View File

@ -94,16 +94,6 @@ extern "C" {
#define HAS_SCALEARGBFILTERCOLS_NEON
#endif
// The following are available on Mips platforms:
#if !defined(LIBYUV_DISABLE_DSPR2) && !defined(__native_client__) && \
defined(__mips__) && defined(__mips_dsp) && (__mips_dsp_rev >= 2)
#define HAS_SCALEROWDOWN2_DSPR2
#define HAS_SCALEROWDOWN4_DSPR2
#define HAS_SCALEROWDOWN34_DSPR2
#define HAS_SCALEROWDOWN38_DSPR2
#define HAS_SCALEADDROW_DSPR2
#endif
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
#define HAS_SCALEADDROW_MSA
#define HAS_SCALEARGBCOLS_MSA
@ -831,51 +821,6 @@ void ScaleFilterCols_Any_NEON(uint8* dst_ptr,
int x,
int dx);
void ScaleRowDown2_DSPR2(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst,
int dst_width);
void ScaleRowDown2Box_DSPR2(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst,
int dst_width);
void ScaleRowDown4_DSPR2(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst,
int dst_width);
void ScaleRowDown4Box_DSPR2(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst,
int dst_width);
void ScaleRowDown34_DSPR2(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst,
int dst_width);
void ScaleRowDown34_0_Box_DSPR2(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* d,
int dst_width);
void ScaleRowDown34_1_Box_DSPR2(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* d,
int dst_width);
void ScaleRowDown38_DSPR2(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst,
int dst_width);
void ScaleRowDown38_2_Box_DSPR2(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr,
int dst_width);
void ScaleRowDown38_3_Box_DSPR2(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr,
int dst_width);
void ScaleAddRow_DSPR2(const uint8* src_ptr, uint16* dst_ptr, int src_width);
void ScaleAddRow_Any_DSPR2(const uint8* src_ptr,
uint16* dst_ptr,
int src_width);
void ScaleRowDown2_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,

View File

@ -121,7 +121,6 @@
# Enable the following 3 macros to turn off assembly for specified CPU.
# 'LIBYUV_DISABLE_X86',
# 'LIBYUV_DISABLE_NEON',
# 'LIBYUV_DISABLE_DSPR2',
# Enable the following macro to build libyuv as a shared library (dll).
# 'LIBYUV_USING_SHARED_LIBRARY',
# TODO(fbarchard): Make these into gyp defines.

View File

@ -55,7 +55,6 @@
'source/rotate_argb.cc',
'source/rotate_common.cc',
'source/rotate_gcc.cc',
'source/rotate_dspr2.cc',
'source/rotate_msa.cc',
'source/rotate_neon.cc',
'source/rotate_neon64.cc',
@ -63,7 +62,6 @@
'source/row_any.cc',
'source/row_common.cc',
'source/row_gcc.cc',
'source/row_dspr2.cc',
'source/row_msa.cc',
'source/row_neon.cc',
'source/row_neon64.cc',
@ -73,7 +71,6 @@
'source/scale_argb.cc',
'source/scale_common.cc',
'source/scale_gcc.cc',
'source/scale_dspr2.cc',
'source/scale_msa.cc',
'source/scale_neon.cc',
'source/scale_neon64.cc',

View File

@ -100,7 +100,6 @@
# Enable the following 3 macros to turn off assembly for specified CPU.
# 'LIBYUV_DISABLE_X86',
# 'LIBYUV_DISABLE_NEON',
# 'LIBYUV_DISABLE_DSPR2',
# Enable the following macro to build libyuv as a shared library (dll).
# 'LIBYUV_USING_SHARED_LIBRARY',
],

View File

@ -32,14 +32,12 @@ LOCAL_OBJ_FILES := \
source/rotate.o \
source/rotate_common.o \
source/rotate_gcc.o \
source/rotate_dspr2.o \
source/rotate_neon64.o \
source/rotate_neon.o \
source/rotate_win.o \
source/row_any.o \
source/row_common.o \
source/row_gcc.o \
source/row_dspr2.o \
source/row_neon64.o \
source/row_neon.o \
source/row_win.o \
@ -48,7 +46,6 @@ LOCAL_OBJ_FILES := \
source/scale.o \
source/scale_common.o \
source/scale_gcc.o \
source/scale_dspr2.o \
source/scale_neon64.o \
source/scale_neon.o \
source/scale_win.o \

View File

@ -212,11 +212,6 @@ static void CopyPlane2(const uint8* src,
CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON;
}
#endif
#if defined(HAS_COPYROW_MIPS)
if (TestCpuFlag(kCpuHasMIPS)) {
CopyRow = CopyRow_MIPS;
}
#endif
// Copy plane
for (y = 0; y < height - 1; y += 2) {
@ -579,14 +574,6 @@ int ARGBToI420(const uint8* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOYROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToYRow = ARGBToYRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_DSPR2;
}
}
#endif
#if defined(HAS_ARGBTOYROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ARGBToYRow = ARGBToYRow_Any_MSA;
@ -595,14 +582,6 @@ int ARGBToI420(const uint8* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOUVROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToUVRow = ARGBToUVRow_Any_DSPR2;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_DSPR2;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ARGBToUVRow = ARGBToUVRow_Any_MSA;
@ -680,22 +659,6 @@ int BGRAToI420(const uint8* src_bgra,
}
}
#endif
#if defined(HAS_BGRATOYROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
BGRAToYRow = BGRAToYRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
BGRAToYRow = BGRAToYRow_DSPR2;
}
}
#endif
#if defined(HAS_BGRATOUVROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
BGRAToUVRow = BGRAToUVRow_Any_DSPR2;
if (IS_ALIGNED(width, 16)) {
BGRAToUVRow = BGRAToUVRow_DSPR2;
}
}
#endif
#if defined(HAS_BGRATOYROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
BGRAToYRow = BGRAToYRow_Any_MSA;
@ -781,22 +744,6 @@ int ABGRToI420(const uint8* src_abgr,
}
}
#endif
#if defined(HAS_ABGRTOYROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ABGRToYRow = ABGRToYRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
ABGRToYRow = ABGRToYRow_DSPR2;
}
}
#endif
#if defined(HAS_ABGRTOUVROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ABGRToUVRow = ABGRToUVRow_Any_DSPR2;
if (IS_ALIGNED(width, 16)) {
ABGRToUVRow = ABGRToUVRow_DSPR2;
}
}
#endif
#if defined(HAS_ABGRTOYROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ABGRToYRow = ABGRToYRow_Any_MSA;
@ -882,22 +829,6 @@ int RGBAToI420(const uint8* src_rgba,
}
}
#endif
#if defined(HAS_RGBATOYROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
RGBAToYRow = RGBAToYRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
RGBAToYRow = RGBAToYRow_DSPR2;
}
}
#endif
#if defined(HAS_RGBATOUVROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
RGBAToUVRow = RGBAToUVRow_Any_DSPR2;
if (IS_ALIGNED(width, 16)) {
RGBAToUVRow = RGBAToUVRow_DSPR2;
}
}
#endif
#if defined(HAS_RGBATOYROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
RGBAToYRow = RGBAToYRow_Any_MSA;
@ -1287,14 +1218,6 @@ int RGB565ToI420(const uint8* src_rgb565,
}
}
#endif
#if defined(HAS_RGB565TOARGBROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
RGB565ToARGBRow = RGB565ToARGBRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
RGB565ToARGBRow = RGB565ToARGBRow_DSPR2;
}
}
#endif
#endif
{
#if !(defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA))

View File

@ -97,15 +97,6 @@ static int I420ToARGBMatrix(const uint8* src_y,
}
}
#endif
#if defined(HAS_I422TOARGBROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
I422ToARGBRow = I422ToARGBRow_DSPR2;
}
#endif
#if defined(HAS_I422TOARGBROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
I422ToARGBRow = I422ToARGBRow_Any_MSA;
@ -292,15 +283,6 @@ static int I422ToARGBMatrix(const uint8* src_y,
}
}
#endif
#if defined(HAS_I422TOARGBROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
I422ToARGBRow = I422ToARGBRow_DSPR2;
}
#endif
#if defined(HAS_I422TOARGBROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
I422ToARGBRow = I422ToARGBRow_Any_MSA;
@ -769,14 +751,6 @@ static int I444ToARGBMatrix(const uint8* src_y,
}
}
#endif
#if defined(HAS_I444TOARGBROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
I444ToARGBRow = I444ToARGBRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
I444ToARGBRow = I444ToARGBRow_DSPR2;
}
}
#endif
#if defined(HAS_I444TOARGBROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
I444ToARGBRow = I444ToARGBRow_Any_MSA;
@ -905,15 +879,6 @@ static int I420AlphaToARGBMatrix(const uint8* src_y,
}
}
#endif
#if defined(HAS_I422ALPHATOARGBROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
I422AlphaToARGBRow = I422AlphaToARGBRow_DSPR2;
}
#endif
#if defined(HAS_I422ALPHATOARGBROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
I422AlphaToARGBRow = I422AlphaToARGBRow_Any_MSA;
@ -1262,14 +1227,6 @@ int RGB24ToARGB(const uint8* src_rgb24,
}
}
#endif
#if defined(HAS_RGB24TOARGBROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
RGB24ToARGBRow = RGB24ToARGBRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
RGB24ToARGBRow = RGB24ToARGBRow_DSPR2;
}
}
#endif
#if defined(HAS_RGB24TOARGBROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
RGB24ToARGBRow = RGB24ToARGBRow_Any_MSA;
@ -1329,14 +1286,6 @@ int RAWToARGB(const uint8* src_raw,
}
}
#endif
#if defined(HAS_RAWTOARGBROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
RAWToARGBRow = RAWToARGBRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
RAWToARGBRow = RAWToARGBRow_DSPR2;
}
}
#endif
#if defined(HAS_RAWTOARGBROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
RAWToARGBRow = RAWToARGBRow_Any_MSA;
@ -1404,14 +1353,6 @@ int RGB565ToARGB(const uint8* src_rgb565,
}
}
#endif
#if defined(HAS_RGB565TOARGBROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
RGB565ToARGBRow = RGB565ToARGBRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
RGB565ToARGBRow = RGB565ToARGBRow_DSPR2;
}
}
#endif
#if defined(HAS_RGB565TOARGBROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
RGB565ToARGBRow = RGB565ToARGBRow_Any_MSA;
@ -1479,14 +1420,6 @@ int ARGB1555ToARGB(const uint8* src_argb1555,
}
}
#endif
#if defined(HAS_ARGB1555TOARGBROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_DSPR2;
if (IS_ALIGNED(width, 4)) {
ARGB1555ToARGBRow = ARGB1555ToARGBRow_DSPR2;
}
}
#endif
#if defined(HAS_ARGB1555TOARGBROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_MSA;
@ -1554,14 +1487,6 @@ int ARGB4444ToARGB(const uint8* src_argb4444,
}
}
#endif
#if defined(HAS_ARGB4444TOARGBROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_DSPR2;
if (IS_ALIGNED(width, 4)) {
ARGB4444ToARGBRow = ARGB4444ToARGBRow_DSPR2;
}
}
#endif
#if defined(HAS_ARGB4444TOARGBROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_MSA;
@ -1626,14 +1551,6 @@ static int NV12ToARGBMatrix(const uint8* src_y,
}
}
#endif
#if defined(HAS_NV12TOARGBROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
NV12ToARGBRow = NV12ToARGBRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
NV12ToARGBRow = NV12ToARGBRow_DSPR2;
}
}
#endif
#if defined(HAS_NV12TOARGBROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
NV12ToARGBRow = NV12ToARGBRow_Any_MSA;
@ -1823,14 +1740,6 @@ int M420ToARGB(const uint8* src_m420,
}
}
#endif
#if defined(HAS_NV12TOARGBROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
NV12ToARGBRow = NV12ToARGBRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
NV12ToARGBRow = NV12ToARGBRow_DSPR2;
}
}
#endif
#if defined(HAS_NV12TOARGBROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
NV12ToARGBRow = NV12ToARGBRow_Any_MSA;

View File

@ -484,15 +484,6 @@ static int I420ToRGBAMatrix(const uint8* src_y,
}
}
#endif
#if defined(HAS_I422TORGBAROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
IS_ALIGNED(dst_rgba, 4) && IS_ALIGNED(dst_stride_rgba, 4)) {
I422ToRGBARow = I422ToRGBARow_DSPR2;
}
#endif
#if defined(HAS_I422TORGBAROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
I422ToRGBARow = I422ToRGBARow_Any_MSA;
@ -744,14 +735,6 @@ int I420ToARGB1555(const uint8* src_y,
}
}
#endif
#if defined(HAS_I422TOARGB1555ROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
I422ToARGB1555Row = I422ToARGB1555Row_Any_DSPR2;
if (IS_ALIGNED(width, 4)) {
I422ToARGB1555Row = I422ToARGB1555Row_DSPR2;
}
}
#endif
#if defined(HAS_I422TOARGB1555ROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
I422ToARGB1555Row = I422ToARGB1555Row_Any_MSA;
@ -825,14 +808,6 @@ int I420ToARGB4444(const uint8* src_y,
}
}
#endif
#if defined(HAS_I422TOARGB4444ROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
I422ToARGB4444Row = I422ToARGB4444Row_Any_DSPR2;
if (IS_ALIGNED(width, 4)) {
I422ToARGB4444Row = I422ToARGB4444Row_DSPR2;
}
}
#endif
#if defined(HAS_I422TOARGB4444ROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
I422ToARGB4444Row = I422ToARGB4444Row_Any_MSA;
@ -1057,14 +1032,6 @@ int I420ToRGB565Dither(const uint8* src_y,
}
}
#endif
#if defined(HAS_I422TOARGBROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2)) {
I422ToARGBRow = I422ToARGBRow_DSPR2;
}
#endif
#if defined(HAS_I422TOARGBROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
I422ToARGBRow = I422ToARGBRow_Any_MSA;

View File

@ -100,14 +100,6 @@ int ARGBToI444(const uint8* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOYROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToYRow = ARGBToYRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_DSPR2;
}
}
#endif
#if defined(HAS_ARGBTOYROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ARGBToYRow = ARGBToYRow_Any_MSA;
@ -197,22 +189,6 @@ int ARGBToI422(const uint8* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOYROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToYRow = ARGBToYRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_DSPR2;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToUVRow = ARGBToUVRow_Any_DSPR2;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_DSPR2;
}
}
#endif
#if defined(HAS_ARGBTOYROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
@ -344,22 +320,6 @@ int ARGBToNV12(const uint8* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOYROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToYRow = ARGBToYRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_DSPR2;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToUVRow = ARGBToUVRow_Any_DSPR2;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_DSPR2;
}
}
#endif
#if defined(HAS_MERGEUVROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
MergeUVRow_ = MergeUVRow_Any_MSA;
@ -495,22 +455,6 @@ int ARGBToNV21(const uint8* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOYROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToYRow = ARGBToYRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_DSPR2;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToUVRow = ARGBToUVRow_Any_DSPR2;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_DSPR2;
}
}
#endif
#if defined(HAS_MERGEUVROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
MergeUVRow_ = MergeUVRow_Any_MSA;
@ -643,22 +587,6 @@ int ARGBToYUY2(const uint8* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOYROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToYRow = ARGBToYRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_DSPR2;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToUVRow = ARGBToUVRow_Any_DSPR2;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_DSPR2;
}
}
#endif
#if defined(HAS_I422TOYUY2ROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
I422ToYUY2Row = I422ToYUY2Row_Any_MSA;
@ -787,22 +715,6 @@ int ARGBToUYVY(const uint8* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOYROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToYRow = ARGBToYRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_DSPR2;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToUVRow = ARGBToUVRow_Any_DSPR2;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_DSPR2;
}
}
#endif
#if defined(HAS_I422TOUYVYROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
I422ToUYVYRow = I422ToUYVYRow_Any_MSA;
@ -880,14 +792,6 @@ int ARGBToI400(const uint8* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOYROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToYRow = ARGBToYRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_DSPR2;
}
}
#endif
#if defined(HAS_ARGBTOYROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ARGBToYRow = ARGBToYRow_Any_MSA;

View File

@ -179,7 +179,7 @@ LIBYUV_API SAFEBUFFERS int MipsCpuCaps(const char* cpuinfo_name,
if (strcmp(ase, " msa") == 0) {
return kCpuHasMSA;
}
return kCpuHasDSPR2;
return 0;
}
while (fgets(cpuinfo_line, sizeof(cpuinfo_line) - 1, f)) {
if (memcmp(cpuinfo_line, "ASEs implemented", 16) == 0) {
@ -189,7 +189,7 @@ LIBYUV_API SAFEBUFFERS int MipsCpuCaps(const char* cpuinfo_name,
if (strcmp(ase, " msa") == 0) {
return kCpuHasMSA;
}
return kCpuHasDSPR2;
return 0;
}
}
}
@ -290,16 +290,10 @@ static SAFEBUFFERS int GetCpuFlags(void) {
#endif
#if defined(__mips__) && defined(__linux__)
#if defined(__mips_dspr2)
cpu_info |= kCpuHasDSPR2;
#endif
#if defined(__mips_msa)
cpu_info = MipsCpuCaps("/proc/cpuinfo", " msa");
#endif
cpu_info |= kCpuHasMIPS;
if (getenv("LIBYUV_DISABLE_DSPR2")) {
cpu_info &= ~kCpuHasDSPR2;
}
if (getenv("LIBYUV_DISABLE_MSA")) {
cpu_info &= ~kCpuHasMSA;
}

View File

@ -70,11 +70,6 @@ void CopyPlane(const uint8* src_y,
CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON;
}
#endif
#if defined(HAS_COPYROW_MIPS)
if (TestCpuFlag(kCpuHasMIPS)) {
CopyRow = CopyRow_MIPS;
}
#endif
// Copy plane
for (y = 0; y < height; ++y) {
@ -116,11 +111,6 @@ void CopyPlane_16(const uint16* src_y,
CopyRow = CopyRow_16_NEON;
}
#endif
#if defined(HAS_COPYROW_16_MIPS)
if (TestCpuFlag(kCpuHasMIPS)) {
CopyRow = CopyRow_16_MIPS;
}
#endif
// Copy plane
for (y = 0; y < height; ++y) {
@ -311,16 +301,6 @@ void SplitUVPlane(const uint8* src_uv,
}
}
#endif
#if defined(HAS_SPLITUVROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(dst_u, 4) &&
IS_ALIGNED(dst_stride_u, 4) && IS_ALIGNED(dst_v, 4) &&
IS_ALIGNED(dst_stride_v, 4)) {
SplitUVRow = SplitUVRow_Any_DSPR2;
if (IS_ALIGNED(width, 16)) {
SplitUVRow = SplitUVRow_DSPR2;
}
}
#endif
#if defined(HAS_SPLITUVROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
SplitUVRow = SplitUVRow_Any_MSA;
@ -562,14 +542,6 @@ void MirrorPlane(const uint8* src_y,
}
}
#endif
// TODO(fbarchard): Mirror on mips handle unaligned memory.
#if defined(HAS_MIRRORROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_y, 4) &&
IS_ALIGNED(src_stride_y, 4) && IS_ALIGNED(dst_y, 4) &&
IS_ALIGNED(dst_stride_y, 4)) {
MirrorRow = MirrorRow_DSPR2;
}
#endif
#if defined(HAS_MIRRORROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
MirrorRow = MirrorRow_Any_MSA;
@ -1473,15 +1445,6 @@ static int I422ToRGBAMatrix(const uint8* src_y,
}
}
#endif
#if defined(HAS_I422TORGBAROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
IS_ALIGNED(dst_rgba, 4) && IS_ALIGNED(dst_stride_rgba, 4)) {
I422ToRGBARow = I422ToRGBARow_DSPR2;
}
#endif
#if defined(HAS_I422TORGBAROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
I422ToRGBARow = I422ToRGBARow_Any_MSA;
@ -2534,14 +2497,6 @@ int InterpolatePlane(const uint8* src0,
}
}
#endif
#if defined(HAS_INTERPOLATEROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src0, 4) &&
IS_ALIGNED(src_stride0, 4) && IS_ALIGNED(src1, 4) &&
IS_ALIGNED(src_stride1, 4) && IS_ALIGNED(dst, 4) &&
IS_ALIGNED(dst_stride, 4) && IS_ALIGNED(width, 4)) {
InterpolateRow = InterpolateRow_DSPR2;
}
#endif
#if defined(HAS_INTERPOLATEROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
InterpolateRow = InterpolateRow_Any_MSA;

View File

@ -57,16 +57,6 @@ void TransposePlane(const uint8* src,
}
}
#endif
#if defined(HAS_TRANSPOSEWX8_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
if (IS_ALIGNED(width, 4) && IS_ALIGNED(src, 4) &&
IS_ALIGNED(src_stride, 4)) {
TransposeWx8 = TransposeWx8_Fast_DSPR2;
} else {
TransposeWx8 = TransposeWx8_DSPR2;
}
}
#endif
#if defined(HAS_TRANSPOSEWX16_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
TransposeWx16 = TransposeWx16_Any_MSA;
@ -168,14 +158,6 @@ void RotatePlane180(const uint8* src,
}
}
#endif
// TODO(fbarchard): Mirror on mips handle unaligned memory.
#if defined(HAS_MIRRORROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src, 4) &&
IS_ALIGNED(src_stride, 4) && IS_ALIGNED(dst, 4) &&
IS_ALIGNED(dst_stride, 4)) {
MirrorRow = MirrorRow_DSPR2;
}
#endif
#if defined(HAS_MIRRORROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
MirrorRow = MirrorRow_Any_MSA;
@ -204,11 +186,6 @@ void RotatePlane180(const uint8* src,
CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON;
}
#endif
#if defined(HAS_COPYROW_MIPS)
if (TestCpuFlag(kCpuHasMIPS)) {
CopyRow = CopyRow_MIPS;
}
#endif
// Odd height will harmlessly mirror the middle row twice.
for (y = 0; y < half_height; ++y) {
@ -255,12 +232,6 @@ void TransposeUV(const uint8* src,
}
}
#endif
#if defined(HAS_TRANSPOSEUVWX8_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 2) && IS_ALIGNED(src, 4) &&
IS_ALIGNED(src_stride, 4)) {
TransposeUVWx8 = TransposeUVWx8_DSPR2;
}
#endif
#if defined(HAS_TRANSPOSEUVWX16_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
TransposeUVWx16 = TransposeUVWx16_Any_MSA;
@ -355,12 +326,6 @@ void RotateUV180(const uint8* src,
MirrorUVRow = MirrorUVRow_SSSE3;
}
#endif
#if defined(HAS_MIRRORUVROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src, 4) &&
IS_ALIGNED(src_stride, 4)) {
MirrorUVRow = MirrorUVRow_DSPR2;
}
#endif
#if defined(HAS_MIRRORUVROW_MSA)
if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 32)) {
MirrorUVRow = MirrorUVRow_MSA;

View File

@ -38,9 +38,6 @@ TANY(TransposeWx8_Any_SSSE3, TransposeWx8_SSSE3, 7)
#ifdef HAS_TRANSPOSEWX8_FAST_SSSE3
TANY(TransposeWx8_Fast_Any_SSSE3, TransposeWx8_Fast_SSSE3, 15)
#endif
#ifdef HAS_TRANSPOSEWX8_DSPR2
TANY(TransposeWx8_Any_DSPR2, TransposeWx8_DSPR2, 7)
#endif
#ifdef HAS_TRANSPOSEWX16_MSA
TANY(TransposeWx16_Any_MSA, TransposeWx16_MSA, 15)
#endif
@ -64,9 +61,6 @@ TUVANY(TransposeUVWx8_Any_NEON, TransposeUVWx8_NEON, 7)
#ifdef HAS_TRANSPOSEUVWX8_SSE2
TUVANY(TransposeUVWx8_Any_SSE2, TransposeUVWx8_SSE2, 7)
#endif
#ifdef HAS_TRANSPOSEUVWX8_DSPR2
TUVANY(TransposeUVWx8_Any_DSPR2, TransposeUVWx8_DSPR2, 7)
#endif
#ifdef HAS_TRANSPOSEUVWX16_MSA
TUVANY(TransposeUVWx16_Any_MSA, TransposeUVWx16_MSA, 7)
#endif

View File

@ -173,11 +173,6 @@ void ARGBRotate180(const uint8* src,
CopyRow = IS_ALIGNED(width * 4, 32) ? CopyRow_NEON : CopyRow_Any_NEON;
}
#endif
#if defined(HAS_COPYROW_MIPS)
if (TestCpuFlag(kCpuHasMIPS)) {
CopyRow = CopyRow_MIPS;
}
#endif
// Odd height will harmlessly mirror the middle row twice.
for (y = 0; y < half_height; ++y) {

View File

@ -1,475 +0,0 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/rotate_row.h"
#include "libyuv/row.h"
#include "libyuv/basic_types.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
#if !defined(LIBYUV_DISABLE_DSPR2) && defined(__mips_dsp) && \
(__mips_dsp_rev >= 2) && (_MIPS_SIM == _MIPS_SIM_ABI32)
void TransposeWx8_DSPR2(const uint8* src,
int src_stride,
uint8* dst,
int dst_stride,
int width) {
__asm__ __volatile__(
".set push \n"
".set noreorder \n"
"sll $t2, %[src_stride], 0x1 \n" // src_stride x 2
"sll $t4, %[src_stride], 0x2 \n" // src_stride x 4
"sll $t9, %[src_stride], 0x3 \n" // src_stride x 8
"addu $t3, $t2, %[src_stride] \n"
"addu $t5, $t4, %[src_stride] \n"
"addu $t6, $t2, $t4 \n"
"andi $t0, %[dst], 0x3 \n"
"andi $t1, %[dst_stride], 0x3 \n"
"or $t0, $t0, $t1 \n"
"bnez $t0, 11f \n"
" subu $t7, $t9, %[src_stride] \n"
// dst + dst_stride word aligned
"1: \n"
"lbu $t0, 0(%[src]) \n"
"lbux $t1, %[src_stride](%[src]) \n"
"lbux $t8, $t2(%[src]) \n"
"lbux $t9, $t3(%[src]) \n"
"sll $t1, $t1, 16 \n"
"sll $t9, $t9, 16 \n"
"or $t0, $t0, $t1 \n"
"or $t8, $t8, $t9 \n"
"precr.qb.ph $s0, $t8, $t0 \n"
"lbux $t0, $t4(%[src]) \n"
"lbux $t1, $t5(%[src]) \n"
"lbux $t8, $t6(%[src]) \n"
"lbux $t9, $t7(%[src]) \n"
"sll $t1, $t1, 16 \n"
"sll $t9, $t9, 16 \n"
"or $t0, $t0, $t1 \n"
"or $t8, $t8, $t9 \n"
"precr.qb.ph $s1, $t8, $t0 \n"
"sw $s0, 0(%[dst]) \n"
"addiu %[width], -1 \n"
"addiu %[src], 1 \n"
"sw $s1, 4(%[dst]) \n"
"bnez %[width], 1b \n"
" addu %[dst], %[dst], %[dst_stride] \n"
"b 2f \n"
// dst + dst_stride unaligned
"11: \n"
"lbu $t0, 0(%[src]) \n"
"lbux $t1, %[src_stride](%[src]) \n"
"lbux $t8, $t2(%[src]) \n"
"lbux $t9, $t3(%[src]) \n"
"sll $t1, $t1, 16 \n"
"sll $t9, $t9, 16 \n"
"or $t0, $t0, $t1 \n"
"or $t8, $t8, $t9 \n"
"precr.qb.ph $s0, $t8, $t0 \n"
"lbux $t0, $t4(%[src]) \n"
"lbux $t1, $t5(%[src]) \n"
"lbux $t8, $t6(%[src]) \n"
"lbux $t9, $t7(%[src]) \n"
"sll $t1, $t1, 16 \n"
"sll $t9, $t9, 16 \n"
"or $t0, $t0, $t1 \n"
"or $t8, $t8, $t9 \n"
"precr.qb.ph $s1, $t8, $t0 \n"
"swr $s0, 0(%[dst]) \n"
"swl $s0, 3(%[dst]) \n"
"addiu %[width], -1 \n"
"addiu %[src], 1 \n"
"swr $s1, 4(%[dst]) \n"
"swl $s1, 7(%[dst]) \n"
"bnez %[width], 11b \n"
"addu %[dst], %[dst], %[dst_stride] \n"
"2: \n"
".set pop \n"
: [src] "+r"(src), [dst] "+r"(dst), [width] "+r"(width)
: [src_stride] "r"(src_stride), [dst_stride] "r"(dst_stride)
: "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9", "s0", "s1");
}
void TransposeWx8_Fast_DSPR2(const uint8* src,
int src_stride,
uint8* dst,
int dst_stride,
int width) {
__asm__ __volatile__(
".set noat \n"
".set push \n"
".set noreorder \n"
"beqz %[width], 2f \n"
" sll $t2, %[src_stride], 0x1 \n" // src_stride x 2
"sll $t4, %[src_stride], 0x2 \n" // src_stride x 4
"sll $t9, %[src_stride], 0x3 \n" // src_stride x 8
"addu $t3, $t2, %[src_stride] \n"
"addu $t5, $t4, %[src_stride] \n"
"addu $t6, $t2, $t4 \n"
"srl $AT, %[width], 0x2 \n"
"andi $t0, %[dst], 0x3 \n"
"andi $t1, %[dst_stride], 0x3 \n"
"or $t0, $t0, $t1 \n"
"bnez $t0, 11f \n"
" subu $t7, $t9, %[src_stride] \n"
// dst + dst_stride word aligned
"1: \n"
"lw $t0, 0(%[src]) \n"
"lwx $t1, %[src_stride](%[src]) \n"
"lwx $t8, $t2(%[src]) \n"
"lwx $t9, $t3(%[src]) \n"
// t0 = | 30 | 20 | 10 | 00 |
// t1 = | 31 | 21 | 11 | 01 |
// t8 = | 32 | 22 | 12 | 02 |
// t9 = | 33 | 23 | 13 | 03 |
"precr.qb.ph $s0, $t1, $t0 \n"
"precr.qb.ph $s1, $t9, $t8 \n"
"precrq.qb.ph $s2, $t1, $t0 \n"
"precrq.qb.ph $s3, $t9, $t8 \n"
// s0 = | 21 | 01 | 20 | 00 |
// s1 = | 23 | 03 | 22 | 02 |
// s2 = | 31 | 11 | 30 | 10 |
// s3 = | 33 | 13 | 32 | 12 |
"precr.qb.ph $s4, $s1, $s0 \n"
"precrq.qb.ph $s5, $s1, $s0 \n"
"precr.qb.ph $s6, $s3, $s2 \n"
"precrq.qb.ph $s7, $s3, $s2 \n"
// s4 = | 03 | 02 | 01 | 00 |
// s5 = | 23 | 22 | 21 | 20 |
// s6 = | 13 | 12 | 11 | 10 |
// s7 = | 33 | 32 | 31 | 30 |
"lwx $t0, $t4(%[src]) \n"
"lwx $t1, $t5(%[src]) \n"
"lwx $t8, $t6(%[src]) \n"
"lwx $t9, $t7(%[src]) \n"
// t0 = | 34 | 24 | 14 | 04 |
// t1 = | 35 | 25 | 15 | 05 |
// t8 = | 36 | 26 | 16 | 06 |
// t9 = | 37 | 27 | 17 | 07 |
"precr.qb.ph $s0, $t1, $t0 \n"
"precr.qb.ph $s1, $t9, $t8 \n"
"precrq.qb.ph $s2, $t1, $t0 \n"
"precrq.qb.ph $s3, $t9, $t8 \n"
// s0 = | 25 | 05 | 24 | 04 |
// s1 = | 27 | 07 | 26 | 06 |
// s2 = | 35 | 15 | 34 | 14 |
// s3 = | 37 | 17 | 36 | 16 |
"precr.qb.ph $t0, $s1, $s0 \n"
"precrq.qb.ph $t1, $s1, $s0 \n"
"precr.qb.ph $t8, $s3, $s2 \n"
"precrq.qb.ph $t9, $s3, $s2 \n"
// t0 = | 07 | 06 | 05 | 04 |
// t1 = | 27 | 26 | 25 | 24 |
// t8 = | 17 | 16 | 15 | 14 |
// t9 = | 37 | 36 | 35 | 34 |
"addu $s0, %[dst], %[dst_stride] \n"
"addu $s1, $s0, %[dst_stride] \n"
"addu $s2, $s1, %[dst_stride] \n"
"sw $s4, 0(%[dst]) \n"
"sw $t0, 4(%[dst]) \n"
"sw $s6, 0($s0) \n"
"sw $t8, 4($s0) \n"
"sw $s5, 0($s1) \n"
"sw $t1, 4($s1) \n"
"sw $s7, 0($s2) \n"
"sw $t9, 4($s2) \n"
"addiu $AT, -1 \n"
"addiu %[src], 4 \n"
"bnez $AT, 1b \n"
" addu %[dst], $s2, %[dst_stride] \n"
"b 2f \n"
// dst + dst_stride unaligned
"11: \n"
"lw $t0, 0(%[src]) \n"
"lwx $t1, %[src_stride](%[src]) \n"
"lwx $t8, $t2(%[src]) \n"
"lwx $t9, $t3(%[src]) \n"
// t0 = | 30 | 20 | 10 | 00 |
// t1 = | 31 | 21 | 11 | 01 |
// t8 = | 32 | 22 | 12 | 02 |
// t9 = | 33 | 23 | 13 | 03 |
"precr.qb.ph $s0, $t1, $t0 \n"
"precr.qb.ph $s1, $t9, $t8 \n"
"precrq.qb.ph $s2, $t1, $t0 \n"
"precrq.qb.ph $s3, $t9, $t8 \n"
// s0 = | 21 | 01 | 20 | 00 |
// s1 = | 23 | 03 | 22 | 02 |
// s2 = | 31 | 11 | 30 | 10 |
// s3 = | 33 | 13 | 32 | 12 |
"precr.qb.ph $s4, $s1, $s0 \n"
"precrq.qb.ph $s5, $s1, $s0 \n"
"precr.qb.ph $s6, $s3, $s2 \n"
"precrq.qb.ph $s7, $s3, $s2 \n"
// s4 = | 03 | 02 | 01 | 00 |
// s5 = | 23 | 22 | 21 | 20 |
// s6 = | 13 | 12 | 11 | 10 |
// s7 = | 33 | 32 | 31 | 30 |
"lwx $t0, $t4(%[src]) \n"
"lwx $t1, $t5(%[src]) \n"
"lwx $t8, $t6(%[src]) \n"
"lwx $t9, $t7(%[src]) \n"
// t0 = | 34 | 24 | 14 | 04 |
// t1 = | 35 | 25 | 15 | 05 |
// t8 = | 36 | 26 | 16 | 06 |
// t9 = | 37 | 27 | 17 | 07 |
"precr.qb.ph $s0, $t1, $t0 \n"
"precr.qb.ph $s1, $t9, $t8 \n"
"precrq.qb.ph $s2, $t1, $t0 \n"
"precrq.qb.ph $s3, $t9, $t8 \n"
// s0 = | 25 | 05 | 24 | 04 |
// s1 = | 27 | 07 | 26 | 06 |
// s2 = | 35 | 15 | 34 | 14 |
// s3 = | 37 | 17 | 36 | 16 |
"precr.qb.ph $t0, $s1, $s0 \n"
"precrq.qb.ph $t1, $s1, $s0 \n"
"precr.qb.ph $t8, $s3, $s2 \n"
"precrq.qb.ph $t9, $s3, $s2 \n"
// t0 = | 07 | 06 | 05 | 04 |
// t1 = | 27 | 26 | 25 | 24 |
// t8 = | 17 | 16 | 15 | 14 |
// t9 = | 37 | 36 | 35 | 34 |
"addu $s0, %[dst], %[dst_stride] \n"
"addu $s1, $s0, %[dst_stride] \n"
"addu $s2, $s1, %[dst_stride] \n"
"swr $s4, 0(%[dst]) \n"
"swl $s4, 3(%[dst]) \n"
"swr $t0, 4(%[dst]) \n"
"swl $t0, 7(%[dst]) \n"
"swr $s6, 0($s0) \n"
"swl $s6, 3($s0) \n"
"swr $t8, 4($s0) \n"
"swl $t8, 7($s0) \n"
"swr $s5, 0($s1) \n"
"swl $s5, 3($s1) \n"
"swr $t1, 4($s1) \n"
"swl $t1, 7($s1) \n"
"swr $s7, 0($s2) \n"
"swl $s7, 3($s2) \n"
"swr $t9, 4($s2) \n"
"swl $t9, 7($s2) \n"
"addiu $AT, -1 \n"
"addiu %[src], 4 \n"
"bnez $AT, 11b \n"
" addu %[dst], $s2, %[dst_stride] \n"
"2: \n"
".set pop \n"
".set at \n"
: [src] "+r"(src), [dst] "+r"(dst), [width] "+r"(width)
: [src_stride] "r"(src_stride), [dst_stride] "r"(dst_stride)
: "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9", "s0", "s1",
"s2", "s3", "s4", "s5", "s6", "s7");
}
void TransposeUVWx8_DSPR2(const uint8* src,
int src_stride,
uint8* dst_a,
int dst_stride_a,
uint8* dst_b,
int dst_stride_b,
int width) {
__asm__ __volatile__(
".set push \n"
".set noreorder \n"
"beqz %[width], 2f \n"
" sll $t2, %[src_stride], 0x1 \n" // src_stride x 2
"sll $t4, %[src_stride], 0x2 \n" // src_stride x 4
"sll $t9, %[src_stride], 0x3 \n" // src_stride x 8
"addu $t3, $t2, %[src_stride] \n"
"addu $t5, $t4, %[src_stride] \n"
"addu $t6, $t2, $t4 \n"
"subu $t7, $t9, %[src_stride] \n"
"srl $t1, %[width], 1 \n"
// check word aligment for dst_a, dst_b, dst_stride_a and dst_stride_b
"andi $t0, %[dst_a], 0x3 \n"
"andi $t8, %[dst_b], 0x3 \n"
"or $t0, $t0, $t8 \n"
"andi $t8, %[dst_stride_a], 0x3 \n"
"andi $s5, %[dst_stride_b], 0x3 \n"
"or $t8, $t8, $s5 \n"
"or $t0, $t0, $t8 \n"
"bnez $t0, 11f \n"
" nop \n"
// dst + dst_stride word aligned (both, a & b dst addresses)
"1: \n"
"lw $t0, 0(%[src]) \n" // |B0|A0|b0|a0|
"lwx $t8, %[src_stride](%[src]) \n" // |B1|A1|b1|a1|
"addu $s5, %[dst_a], %[dst_stride_a] \n"
"lwx $t9, $t2(%[src]) \n" // |B2|A2|b2|a2|
"lwx $s0, $t3(%[src]) \n" // |B3|A3|b3|a3|
"addu $s6, %[dst_b], %[dst_stride_b] \n"
"precrq.ph.w $s1, $t8, $t0 \n" // |B1|A1|B0|A0|
"precrq.ph.w $s2, $s0, $t9 \n" // |B3|A3|B2|A2|
"precr.qb.ph $s3, $s2, $s1 \n" // |A3|A2|A1|A0|
"precrq.qb.ph $s4, $s2, $s1 \n" // |B3|B2|B1|B0|
"sll $t0, $t0, 16 \n"
"packrl.ph $s1, $t8, $t0 \n" // |b1|a1|b0|a0|
"sll $t9, $t9, 16 \n"
"packrl.ph $s2, $s0, $t9 \n" // |b3|a3|b2|a2|
"sw $s3, 0($s5) \n"
"sw $s4, 0($s6) \n"
"precr.qb.ph $s3, $s2, $s1 \n" // |a3|a2|a1|a0|
"precrq.qb.ph $s4, $s2, $s1 \n" // |b3|b2|b1|b0|
"lwx $t0, $t4(%[src]) \n" // |B4|A4|b4|a4|
"lwx $t8, $t5(%[src]) \n" // |B5|A5|b5|a5|
"lwx $t9, $t6(%[src]) \n" // |B6|A6|b6|a6|
"lwx $s0, $t7(%[src]) \n" // |B7|A7|b7|a7|
"sw $s3, 0(%[dst_a]) \n"
"sw $s4, 0(%[dst_b]) \n"
"precrq.ph.w $s1, $t8, $t0 \n" // |B5|A5|B4|A4|
"precrq.ph.w $s2, $s0, $t9 \n" // |B6|A6|B7|A7|
"precr.qb.ph $s3, $s2, $s1 \n" // |A7|A6|A5|A4|
"precrq.qb.ph $s4, $s2, $s1 \n" // |B7|B6|B5|B4|
"sll $t0, $t0, 16 \n"
"packrl.ph $s1, $t8, $t0 \n" // |b5|a5|b4|a4|
"sll $t9, $t9, 16 \n"
"packrl.ph $s2, $s0, $t9 \n" // |b7|a7|b6|a6|
"sw $s3, 4($s5) \n"
"sw $s4, 4($s6) \n"
"precr.qb.ph $s3, $s2, $s1 \n" // |a7|a6|a5|a4|
"precrq.qb.ph $s4, $s2, $s1 \n" // |b7|b6|b5|b4|
"addiu %[src], 4 \n"
"addiu $t1, -1 \n"
"sll $t0, %[dst_stride_a], 1 \n"
"sll $t8, %[dst_stride_b], 1 \n"
"sw $s3, 4(%[dst_a]) \n"
"sw $s4, 4(%[dst_b]) \n"
"addu %[dst_a], %[dst_a], $t0 \n"
"bnez $t1, 1b \n"
" addu %[dst_b], %[dst_b], $t8 \n"
"b 2f \n"
" nop \n"
// dst_a or dst_b or dst_stride_a or dst_stride_b not word aligned
"11: \n"
"lw $t0, 0(%[src]) \n" // |B0|A0|b0|a0|
"lwx $t8, %[src_stride](%[src]) \n" // |B1|A1|b1|a1|
"addu $s5, %[dst_a], %[dst_stride_a] \n"
"lwx $t9, $t2(%[src]) \n" // |B2|A2|b2|a2|
"lwx $s0, $t3(%[src]) \n" // |B3|A3|b3|a3|
"addu $s6, %[dst_b], %[dst_stride_b] \n"
"precrq.ph.w $s1, $t8, $t0 \n" // |B1|A1|B0|A0|
"precrq.ph.w $s2, $s0, $t9 \n" // |B3|A3|B2|A2|
"precr.qb.ph $s3, $s2, $s1 \n" // |A3|A2|A1|A0|
"precrq.qb.ph $s4, $s2, $s1 \n" // |B3|B2|B1|B0|
"sll $t0, $t0, 16 \n"
"packrl.ph $s1, $t8, $t0 \n" // |b1|a1|b0|a0|
"sll $t9, $t9, 16 \n"
"packrl.ph $s2, $s0, $t9 \n" // |b3|a3|b2|a2|
"swr $s3, 0($s5) \n"
"swl $s3, 3($s5) \n"
"swr $s4, 0($s6) \n"
"swl $s4, 3($s6) \n"
"precr.qb.ph $s3, $s2, $s1 \n" // |a3|a2|a1|a0|
"precrq.qb.ph $s4, $s2, $s1 \n" // |b3|b2|b1|b0|
"lwx $t0, $t4(%[src]) \n" // |B4|A4|b4|a4|
"lwx $t8, $t5(%[src]) \n" // |B5|A5|b5|a5|
"lwx $t9, $t6(%[src]) \n" // |B6|A6|b6|a6|
"lwx $s0, $t7(%[src]) \n" // |B7|A7|b7|a7|
"swr $s3, 0(%[dst_a]) \n"
"swl $s3, 3(%[dst_a]) \n"
"swr $s4, 0(%[dst_b]) \n"
"swl $s4, 3(%[dst_b]) \n"
"precrq.ph.w $s1, $t8, $t0 \n" // |B5|A5|B4|A4|
"precrq.ph.w $s2, $s0, $t9 \n" // |B6|A6|B7|A7|
"precr.qb.ph $s3, $s2, $s1 \n" // |A7|A6|A5|A4|
"precrq.qb.ph $s4, $s2, $s1 \n" // |B7|B6|B5|B4|
"sll $t0, $t0, 16 \n"
"packrl.ph $s1, $t8, $t0 \n" // |b5|a5|b4|a4|
"sll $t9, $t9, 16 \n"
"packrl.ph $s2, $s0, $t9 \n" // |b7|a7|b6|a6|
"swr $s3, 4($s5) \n"
"swl $s3, 7($s5) \n"
"swr $s4, 4($s6) \n"
"swl $s4, 7($s6) \n"
"precr.qb.ph $s3, $s2, $s1 \n" // |a7|a6|a5|a4|
"precrq.qb.ph $s4, $s2, $s1 \n" // |b7|b6|b5|b4|
"addiu %[src], 4 \n"
"addiu $t1, -1 \n"
"sll $t0, %[dst_stride_a], 1 \n"
"sll $t8, %[dst_stride_b], 1 \n"
"swr $s3, 4(%[dst_a]) \n"
"swl $s3, 7(%[dst_a]) \n"
"swr $s4, 4(%[dst_b]) \n"
"swl $s4, 7(%[dst_b]) \n"
"addu %[dst_a], %[dst_a], $t0 \n"
"bnez $t1, 11b \n"
" addu %[dst_b], %[dst_b], $t8 \n"
"2: \n"
".set pop \n"
: [src] "+r"(src), [dst_a] "+r"(dst_a), [dst_b] "+r"(dst_b),
[width] "+r"(width), [src_stride] "+r"(src_stride)
: [dst_stride_a] "r"(dst_stride_a), [dst_stride_b] "r"(dst_stride_b)
: "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9", "s0", "s1",
"s2", "s3", "s4", "s5", "s6");
}
#endif // defined(__mips_dsp) && (__mips_dsp_rev >= 2)
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

View File

@ -183,12 +183,6 @@ ANY31C(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, 1, 0, 2, 7)
ANY31C(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, 1, 0, 2, 7)
ANY31C(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, 1, 0, 2, 7)
#endif
#ifdef HAS_I422TOARGBROW_DSPR2
ANY31C(I444ToARGBRow_Any_DSPR2, I444ToARGBRow_DSPR2, 0, 0, 4, 7)
ANY31C(I422ToARGBRow_Any_DSPR2, I422ToARGBRow_DSPR2, 1, 0, 4, 7)
ANY31C(I422ToARGB4444Row_Any_DSPR2, I422ToARGB4444Row_DSPR2, 1, 0, 2, 7)
ANY31C(I422ToARGB1555Row_Any_DSPR2, I422ToARGB1555Row_DSPR2, 1, 0, 2, 7)
#endif
#ifdef HAS_I422TOARGBROW_MSA
ANY31C(I444ToARGBRow_Any_MSA, I444ToARGBRow_MSA, 0, 0, 4, 7)
ANY31C(I422ToARGBRow_Any_MSA, I422ToARGBRow_MSA, 1, 0, 4, 7)
@ -326,9 +320,6 @@ ANY21C(NV12ToARGBRow_Any_AVX2, NV12ToARGBRow_AVX2, 1, 1, 2, 4, 15)
#ifdef HAS_NV12TOARGBROW_NEON
ANY21C(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, 1, 1, 2, 4, 7)
#endif
#ifdef HAS_NV12TOARGBROW_DSPR2
ANY21C(NV12ToARGBRow_Any_DSPR2, NV12ToARGBRow_DSPR2, 1, 1, 2, 4, 7)
#endif
#ifdef HAS_NV12TOARGBROW_MSA
ANY21C(NV12ToARGBRow_Any_MSA, NV12ToARGBRow_MSA, 1, 1, 2, 4, 7)
#endif
@ -578,33 +569,6 @@ ANY11(ARGB1555ToARGBRow_Any_MSA, ARGB1555ToARGBRow_MSA, 0, 2, 4, 15)
#ifdef HAS_ARGB4444TOARGBROW_NEON
ANY11(ARGB4444ToARGBRow_Any_NEON, ARGB4444ToARGBRow_NEON, 0, 2, 4, 7)
#endif
#ifdef HAS_RGB24TOARGBROW_DSPR2
ANY11(RGB24ToARGBRow_Any_DSPR2, RGB24ToARGBRow_DSPR2, 0, 3, 4, 7)
#endif
#ifdef HAS_RAWTOARGBROW_DSPR2
ANY11(RAWToARGBRow_Any_DSPR2, RAWToARGBRow_DSPR2, 0, 3, 4, 7)
#endif
#ifdef HAS_RGB565TOARGBROW_DSPR2
ANY11(RGB565ToARGBRow_Any_DSPR2, RGB565ToARGBRow_DSPR2, 0, 2, 4, 7)
#endif
#ifdef HAS_ARGB1555TOARGBROW_DSPR2
ANY11(ARGB1555ToARGBRow_Any_DSPR2, ARGB1555ToARGBRow_DSPR2, 0, 2, 4, 7)
#endif
#ifdef HAS_ARGB4444TOARGBROW_DSPR2
ANY11(ARGB4444ToARGBRow_Any_DSPR2, ARGB4444ToARGBRow_DSPR2, 0, 2, 4, 7)
#endif
#ifdef HAS_BGRATOYROW_DSPR2
ANY11(BGRAToYRow_Any_DSPR2, BGRAToYRow_DSPR2, 0, 4, 1, 7)
#endif
#ifdef HAS_ARGBTOYROW_DSPR2
ANY11(ARGBToYRow_Any_DSPR2, ARGBToYRow_DSPR2, 0, 4, 1, 7)
#endif
#ifdef HAS_ABGRTOYROW_DSPR2
ANY11(ABGRToYRow_Any_DSPR2, ABGRToYRow_DSPR2, 0, 4, 1, 7)
#endif
#ifdef HAS_RGBATOYROW_DSPR2
ANY11(RGBAToYRow_Any_DSPR2, RGBAToYRow_DSPR2, 0, 4, 1, 7)
#endif
#ifdef HAS_ARGB4444TOARGBROW_MSA
ANY11(ARGB4444ToARGBRow_Any_MSA, ARGB4444ToARGBRow_MSA, 0, 2, 4, 15)
#endif
@ -851,9 +815,6 @@ ANY11T(InterpolateRow_Any_SSSE3, InterpolateRow_SSSE3, 1, 1, 15)
#ifdef HAS_INTERPOLATEROW_NEON
ANY11T(InterpolateRow_Any_NEON, InterpolateRow_NEON, 1, 1, 15)
#endif
#ifdef HAS_INTERPOLATEROW_DSPR2
ANY11T(InterpolateRow_Any_DSPR2, InterpolateRow_DSPR2, 1, 1, 3)
#endif
#ifdef HAS_INTERPOLATEROW_MSA
ANY11T(InterpolateRow_Any_MSA, InterpolateRow_MSA, 1, 1, 31)
#endif
@ -952,9 +913,6 @@ ANY12(SplitUVRow_Any_AVX2, SplitUVRow_AVX2, 0, 2, 0, 31)
#ifdef HAS_SPLITUVROW_NEON
ANY12(SplitUVRow_Any_NEON, SplitUVRow_NEON, 0, 2, 0, 15)
#endif
#ifdef HAS_SPLITUVROW_DSPR2
ANY12(SplitUVRow_Any_DSPR2, SplitUVRow_DSPR2, 0, 2, 0, 15)
#endif
#ifdef HAS_SPLITUVROW_MSA
ANY12(SplitUVRow_Any_MSA, SplitUVRow_MSA, 0, 2, 0, 31)
#endif
@ -1116,18 +1074,6 @@ ANY12S(YUY2ToUVRow_Any_NEON, YUY2ToUVRow_NEON, 1, 4, 15)
#ifdef HAS_UYVYTOUVROW_NEON
ANY12S(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, 1, 4, 15)
#endif
#ifdef HAS_BGRATOUVROW_DSPR2
ANY12S(BGRAToUVRow_Any_DSPR2, BGRAToUVRow_DSPR2, 0, 4, 15)
#endif
#ifdef HAS_ABGRTOUVROW_DSPR2
ANY12S(ABGRToUVRow_Any_DSPR2, ABGRToUVRow_DSPR2, 0, 4, 15)
#endif
#ifdef HAS_RGBATOUVROW_DSPR2
ANY12S(RGBAToUVRow_Any_DSPR2, RGBAToUVRow_DSPR2, 0, 4, 15)
#endif
#ifdef HAS_ARGBTOUVROW_DSPR2
ANY12S(ARGBToUVRow_Any_DSPR2, ARGBToUVRow_DSPR2, 0, 4, 15)
#endif
#ifdef HAS_YUY2TOUVROW_MSA
ANY12S(YUY2ToUVRow_Any_MSA, YUY2ToUVRow_MSA, 1, 4, 31)
#endif

File diff suppressed because it is too large Load Diff

View File

@ -103,13 +103,6 @@ static void ScalePlaneDown2(int src_width,
}
}
#endif
#if defined(HAS_SCALEROWDOWN2_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_ptr, 4) &&
IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) &&
IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
ScaleRowDown2 = filtering ? ScaleRowDown2Box_DSPR2 : ScaleRowDown2_DSPR2;
}
#endif
#if defined(HAS_SCALEROWDOWN2_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ScaleRowDown2 =
@ -176,14 +169,6 @@ static void ScalePlaneDown2_16(int src_width,
: ScaleRowDown2Box_16_SSE2);
}
#endif
#if defined(HAS_SCALEROWDOWN2_16_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_ptr, 4) &&
IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) &&
IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
ScaleRowDown2 =
filtering ? ScaleRowDown2Box_16_DSPR2 : ScaleRowDown2_16_DSPR2;
}
#endif
if (filtering == kFilterLinear) {
src_stride = 0;
@ -247,13 +232,6 @@ static void ScalePlaneDown4(int src_width,
}
}
#endif
#if defined(HAS_SCALEROWDOWN4_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(row_stride, 4) &&
IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
ScaleRowDown4 = filtering ? ScaleRowDown4Box_DSPR2 : ScaleRowDown4_DSPR2;
}
#endif
#if defined(HAS_SCALEROWDOWN4_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ScaleRowDown4 =
@ -306,14 +284,6 @@ static void ScalePlaneDown4_16(int src_width,
filtering ? ScaleRowDown4Box_16_SSE2 : ScaleRowDown4_16_SSE2;
}
#endif
#if defined(HAS_SCALEROWDOWN4_16_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(row_stride, 4) &&
IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
ScaleRowDown4 =
filtering ? ScaleRowDown4Box_16_DSPR2 : ScaleRowDown4_16_DSPR2;
}
#endif
if (filtering == kFilterLinear) {
src_stride = 0;
@ -411,19 +381,6 @@ static void ScalePlaneDown34(int src_width,
}
}
#endif
#if defined(HAS_SCALEROWDOWN34_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2) && (dst_width % 24 == 0) &&
IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
if (!filtering) {
ScaleRowDown34_0 = ScaleRowDown34_DSPR2;
ScaleRowDown34_1 = ScaleRowDown34_DSPR2;
} else {
ScaleRowDown34_0 = ScaleRowDown34_0_Box_DSPR2;
ScaleRowDown34_1 = ScaleRowDown34_1_Box_DSPR2;
}
}
#endif
for (y = 0; y < dst_height - 2; y += 3) {
ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
@ -495,19 +452,6 @@ static void ScalePlaneDown34_16(int src_width,
}
}
#endif
#if defined(HAS_SCALEROWDOWN34_16_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2) && (dst_width % 24 == 0) &&
IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
if (!filtering) {
ScaleRowDown34_0 = ScaleRowDown34_16_DSPR2;
ScaleRowDown34_1 = ScaleRowDown34_16_DSPR2;
} else {
ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_DSPR2;
ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_DSPR2;
}
}
#endif
for (y = 0; y < dst_height - 2; y += 3) {
ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
@ -612,19 +556,6 @@ static void ScalePlaneDown38(int src_width,
}
}
#endif
#if defined(HAS_SCALEROWDOWN38_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2) && (dst_width % 12 == 0) &&
IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
if (!filtering) {
ScaleRowDown38_3 = ScaleRowDown38_DSPR2;
ScaleRowDown38_2 = ScaleRowDown38_DSPR2;
} else {
ScaleRowDown38_3 = ScaleRowDown38_3_Box_DSPR2;
ScaleRowDown38_2 = ScaleRowDown38_2_Box_DSPR2;
}
}
#endif
#if defined(HAS_SCALEROWDOWN38_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
if (!filtering) {
@ -716,19 +647,6 @@ static void ScalePlaneDown38_16(int src_width,
}
}
#endif
#if defined(HAS_SCALEROWDOWN38_16_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2) && (dst_width % 12 == 0) &&
IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
if (!filtering) {
ScaleRowDown38_3 = ScaleRowDown38_16_DSPR2;
ScaleRowDown38_2 = ScaleRowDown38_16_DSPR2;
} else {
ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_DSPR2;
ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_DSPR2;
}
}
#endif
for (y = 0; y < dst_height - 2; y += 3) {
ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
@ -931,14 +849,6 @@ static void ScalePlaneBox(int src_width,
}
}
#endif
#if defined(HAS_SCALEADDROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ScaleAddRow = ScaleAddRow_Any_DSPR2;
if (IS_ALIGNED(src_width, 16)) {
ScaleAddRow = ScaleAddRow_DSPR2;
}
}
#endif
for (j = 0; j < dst_height; ++j) {
int boxheight;
@ -1070,14 +980,6 @@ void ScalePlaneBilinearDown(int src_width,
}
}
#endif
#if defined(HAS_INTERPOLATEROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
InterpolateRow = InterpolateRow_Any_DSPR2;
if (IS_ALIGNED(src_width, 4)) {
InterpolateRow = InterpolateRow_DSPR2;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
InterpolateRow = InterpolateRow_Any_MSA;
@ -1193,14 +1095,6 @@ void ScalePlaneBilinearDown_16(int src_width,
}
}
#endif
#if defined(HAS_INTERPOLATEROW_16_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
InterpolateRow = InterpolateRow_Any_16_DSPR2;
if (IS_ALIGNED(src_width, 4)) {
InterpolateRow = InterpolateRow_16_DSPR2;
}
}
#endif
#if defined(HAS_SCALEFILTERCOLS_16_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
@ -1281,14 +1175,6 @@ void ScalePlaneBilinearUp(int src_width,
}
}
#endif
#if defined(HAS_INTERPOLATEROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
InterpolateRow = InterpolateRow_Any_DSPR2;
if (IS_ALIGNED(dst_width, 4)) {
InterpolateRow = InterpolateRow_DSPR2;
}
}
#endif
if (filtering && src_width >= 32768) {
ScaleFilterCols = ScaleFilterCols64_C;
@ -1432,14 +1318,6 @@ void ScalePlaneBilinearUp_16(int src_width,
}
}
#endif
#if defined(HAS_INTERPOLATEROW_16_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
InterpolateRow = InterpolateRow_Any_16_DSPR2;
if (IS_ALIGNED(dst_width, 4)) {
InterpolateRow = InterpolateRow_16_DSPR2;
}
}
#endif
if (filtering && src_width >= 32768) {
ScaleFilterCols = ScaleFilterCols64_16_C;

View File

@ -456,9 +456,6 @@ SAANY(ScaleAddRow_Any_NEON, ScaleAddRow_NEON, ScaleAddRow_C, 15)
#ifdef HAS_SCALEADDROW_MSA
SAANY(ScaleAddRow_Any_MSA, ScaleAddRow_MSA, ScaleAddRow_C, 15)
#endif
#ifdef HAS_SCALEADDROW_DSPR2
SAANY(ScaleAddRow_Any_DSPR2, ScaleAddRow_DSPR2, ScaleAddRow_C, 15)
#endif
#undef SAANY
#ifdef __cplusplus

View File

@ -306,15 +306,6 @@ static void ScaleARGBBilinearDown(int src_width,
}
}
#endif
#if defined(HAS_INTERPOLATEROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_argb, 4) &&
IS_ALIGNED(src_stride, 4)) {
InterpolateRow = InterpolateRow_Any_DSPR2;
if (IS_ALIGNED(clip_src_width, 4)) {
InterpolateRow = InterpolateRow_DSPR2;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
InterpolateRow = InterpolateRow_Any_MSA;
@ -419,12 +410,6 @@ static void ScaleARGBBilinearUp(int src_width,
}
}
#endif
#if defined(HAS_INTERPOLATEROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(dst_argb, 4) &&
IS_ALIGNED(dst_stride, 4)) {
InterpolateRow = InterpolateRow_DSPR2;
}
#endif
#if defined(HAS_INTERPOLATEROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
InterpolateRow = InterpolateRow_Any_MSA;
@ -587,15 +572,6 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
}
}
#endif
#if defined(HAS_I422TOARGBROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_width, 4) &&
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
I422ToARGBRow = I422ToARGBRow_DSPR2;
}
#endif
#if defined(HAS_I422TOARGBROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
I422ToARGBRow = I422ToARGBRow_Any_MSA;
@ -632,12 +608,6 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
}
}
#endif
#if defined(HAS_INTERPOLATEROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(dst_argb, 4) &&
IS_ALIGNED(dst_stride_argb, 4)) {
InterpolateRow = InterpolateRow_DSPR2;
}
#endif
#if defined(HAS_INTERPOLATEROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
InterpolateRow = InterpolateRow_Any_MSA;

View File

@ -1063,16 +1063,6 @@ void ScalePlaneVertical(int src_height,
}
}
#endif
#if defined(HAS_INTERPOLATEROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_argb, 4) &&
IS_ALIGNED(src_stride, 4) && IS_ALIGNED(dst_argb, 4) &&
IS_ALIGNED(dst_stride, 4)) {
InterpolateRow = InterpolateRow_Any_DSPR2;
if (IS_ALIGNED(dst_width_bytes, 4)) {
InterpolateRow = InterpolateRow_DSPR2;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
InterpolateRow = InterpolateRow_Any_MSA;
@ -1150,16 +1140,6 @@ void ScalePlaneVertical_16(int src_height,
InterpolateRow = InterpolateRow_16_NEON;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_16_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_argb, 4) &&
IS_ALIGNED(src_stride, 4) && IS_ALIGNED(dst_argb, 4) &&
IS_ALIGNED(dst_stride, 4)) {
InterpolateRow = InterpolateRow_Any_16_DSPR2;
if (IS_ALIGNED(dst_width_bytes, 4)) {
InterpolateRow = InterpolateRow_16_DSPR2;
}
}
#endif
for (j = 0; j < dst_height; ++j) {
int yi;

View File

@ -1,668 +0,0 @@
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/basic_types.h"
#include "libyuv/row.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// This module is for GCC MIPS DSPR2
#if !defined(LIBYUV_DISABLE_DSPR2) && defined(__mips_dsp) && \
(__mips_dsp_rev >= 2) && (_MIPS_SIM == _MIPS_SIM_ABI32)
void ScaleRowDown2_DSPR2(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst,
int dst_width) {
__asm__ __volatile__(
".set push \n"
".set noreorder \n"
"srl $t9, %[dst_width], 4 \n" // iterations -> by 16
"beqz $t9, 2f \n"
" nop \n"
"1: \n"
"lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
"lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4|
"lw $t2, 8(%[src_ptr]) \n" // |11|10|9|8|
"lw $t3, 12(%[src_ptr]) \n" // |15|14|13|12|
"lw $t4, 16(%[src_ptr]) \n" // |19|18|17|16|
"lw $t5, 20(%[src_ptr]) \n" // |23|22|21|20|
"lw $t6, 24(%[src_ptr]) \n" // |27|26|25|24|
"lw $t7, 28(%[src_ptr]) \n" // |31|30|29|28|
// TODO(fbarchard): Use odd pixels instead of even.
"precrq.qb.ph $t8, $t1, $t0 \n" // |7|5|3|1|
"precrq.qb.ph $t0, $t3, $t2 \n" // |15|13|11|9|
"precrq.qb.ph $t1, $t5, $t4 \n" // |23|21|19|17|
"precrq.qb.ph $t2, $t7, $t6 \n" // |31|29|27|25|
"addiu %[src_ptr], %[src_ptr], 32 \n"
"addiu $t9, $t9, -1 \n"
"sw $t8, 0(%[dst]) \n"
"sw $t0, 4(%[dst]) \n"
"sw $t1, 8(%[dst]) \n"
"sw $t2, 12(%[dst]) \n"
"bgtz $t9, 1b \n"
" addiu %[dst], %[dst], 16 \n"
"2: \n"
"andi $t9, %[dst_width], 0xf \n" // residue
"beqz $t9, 3f \n"
" nop \n"
"21: \n"
"lbu $t0, 1(%[src_ptr]) \n"
"addiu %[src_ptr], %[src_ptr], 2 \n"
"addiu $t9, $t9, -1 \n"
"sb $t0, 0(%[dst]) \n"
"bgtz $t9, 21b \n"
" addiu %[dst], %[dst], 1 \n"
"3: \n"
".set pop \n"
: [src_ptr] "+r"(src_ptr), [dst] "+r"(dst)
: [dst_width] "r"(dst_width)
: "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9");
}
void ScaleRowDown2Box_DSPR2(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst,
int dst_width) {
const uint8* t = src_ptr + src_stride;
__asm__ __volatile__(
".set push \n"
".set noreorder \n"
"srl $t9, %[dst_width], 3 \n" // iterations -> step 8
"bltz $t9, 2f \n"
" nop \n"
"1: \n"
"lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
"lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4|
"lw $t2, 8(%[src_ptr]) \n" // |11|10|9|8|
"lw $t3, 12(%[src_ptr]) \n" // |15|14|13|12|
"lw $t4, 0(%[t]) \n" // |19|18|17|16|
"lw $t5, 4(%[t]) \n" // |23|22|21|20|
"lw $t6, 8(%[t]) \n" // |27|26|25|24|
"lw $t7, 12(%[t]) \n" // |31|30|29|28|
"addiu $t9, $t9, -1 \n"
"srl $t8, $t0, 16 \n" // |X|X|3|2|
"ins $t0, $t4, 16, 16 \n" // |17|16|1|0|
"ins $t4, $t8, 0, 16 \n" // |19|18|3|2|
"raddu.w.qb $t0, $t0 \n" // |17+16+1+0|
"raddu.w.qb $t4, $t4 \n" // |19+18+3+2|
"shra_r.w $t0, $t0, 2 \n" // |t0+2|>>2
"shra_r.w $t4, $t4, 2 \n" // |t4+2|>>2
"srl $t8, $t1, 16 \n" // |X|X|7|6|
"ins $t1, $t5, 16, 16 \n" // |21|20|5|4|
"ins $t5, $t8, 0, 16 \n" // |22|23|7|6|
"raddu.w.qb $t1, $t1 \n" // |21+20+5+4|
"raddu.w.qb $t5, $t5 \n" // |23+22+7+6|
"shra_r.w $t1, $t1, 2 \n" // |t1+2|>>2
"shra_r.w $t5, $t5, 2 \n" // |t5+2|>>2
"srl $t8, $t2, 16 \n" // |X|X|11|10|
"ins $t2, $t6, 16, 16 \n" // |25|24|9|8|
"ins $t6, $t8, 0, 16 \n" // |27|26|11|10|
"raddu.w.qb $t2, $t2 \n" // |25+24+9+8|
"raddu.w.qb $t6, $t6 \n" // |27+26+11+10|
"shra_r.w $t2, $t2, 2 \n" // |t2+2|>>2
"shra_r.w $t6, $t6, 2 \n" // |t5+2|>>2
"srl $t8, $t3, 16 \n" // |X|X|15|14|
"ins $t3, $t7, 16, 16 \n" // |29|28|13|12|
"ins $t7, $t8, 0, 16 \n" // |31|30|15|14|
"raddu.w.qb $t3, $t3 \n" // |29+28+13+12|
"raddu.w.qb $t7, $t7 \n" // |31+30+15+14|
"shra_r.w $t3, $t3, 2 \n" // |t3+2|>>2
"shra_r.w $t7, $t7, 2 \n" // |t7+2|>>2
"addiu %[src_ptr], %[src_ptr], 16 \n"
"addiu %[t], %[t], 16 \n"
"sb $t0, 0(%[dst]) \n"
"sb $t4, 1(%[dst]) \n"
"sb $t1, 2(%[dst]) \n"
"sb $t5, 3(%[dst]) \n"
"sb $t2, 4(%[dst]) \n"
"sb $t6, 5(%[dst]) \n"
"sb $t3, 6(%[dst]) \n"
"sb $t7, 7(%[dst]) \n"
"bgtz $t9, 1b \n"
" addiu %[dst], %[dst], 8 \n"
"2: \n"
"andi $t9, %[dst_width], 0x7 \n" // x = residue
"beqz $t9, 3f \n"
" nop \n"
"21: \n"
"lwr $t1, 0(%[src_ptr]) \n"
"lwl $t1, 3(%[src_ptr]) \n"
"lwr $t2, 0(%[t]) \n"
"lwl $t2, 3(%[t]) \n"
"srl $t8, $t1, 16 \n"
"ins $t1, $t2, 16, 16 \n"
"ins $t2, $t8, 0, 16 \n"
"raddu.w.qb $t1, $t1 \n"
"raddu.w.qb $t2, $t2 \n"
"shra_r.w $t1, $t1, 2 \n"
"shra_r.w $t2, $t2, 2 \n"
"sb $t1, 0(%[dst]) \n"
"sb $t2, 1(%[dst]) \n"
"addiu %[src_ptr], %[src_ptr], 4 \n"
"addiu $t9, $t9, -2 \n"
"addiu %[t], %[t], 4 \n"
"bgtz $t9, 21b \n"
" addiu %[dst], %[dst], 2 \n"
"3: \n"
".set pop \n"
: [src_ptr] "+r"(src_ptr), [dst] "+r"(dst), [t] "+r"(t)
: [dst_width] "r"(dst_width)
: "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9");
}
void ScaleRowDown4_DSPR2(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst,
int dst_width) {
__asm__ __volatile__(
".set push \n"
".set noreorder \n"
"srl $t9, %[dst_width], 3 \n"
"beqz $t9, 2f \n"
" nop \n"
"1: \n"
"lw $t1, 0(%[src_ptr]) \n" // |3|2|1|0|
"lw $t2, 4(%[src_ptr]) \n" // |7|6|5|4|
"lw $t3, 8(%[src_ptr]) \n" // |11|10|9|8|
"lw $t4, 12(%[src_ptr]) \n" // |15|14|13|12|
"lw $t5, 16(%[src_ptr]) \n" // |19|18|17|16|
"lw $t6, 20(%[src_ptr]) \n" // |23|22|21|20|
"lw $t7, 24(%[src_ptr]) \n" // |27|26|25|24|
"lw $t8, 28(%[src_ptr]) \n" // |31|30|29|28|
"precr.qb.ph $t1, $t2, $t1 \n" // |6|4|2|0|
"precr.qb.ph $t2, $t4, $t3 \n" // |14|12|10|8|
"precr.qb.ph $t5, $t6, $t5 \n" // |22|20|18|16|
"precr.qb.ph $t6, $t8, $t7 \n" // |30|28|26|24|
"precrq.qb.ph $t1, $t2, $t1 \n" // |14|10|6|2|
"precrq.qb.ph $t5, $t6, $t5 \n" // |30|26|22|18|
"addiu %[src_ptr], %[src_ptr], 32 \n"
"addiu $t9, $t9, -1 \n"
"sw $t1, 0(%[dst]) \n"
"sw $t5, 4(%[dst]) \n"
"bgtz $t9, 1b \n"
" addiu %[dst], %[dst], 8 \n"
"2: \n"
"andi $t9, %[dst_width], 7 \n" // residue
"beqz $t9, 3f \n"
" nop \n"
"21: \n"
"lbu $t1, 2(%[src_ptr]) \n"
"addiu %[src_ptr], %[src_ptr], 4 \n"
"addiu $t9, $t9, -1 \n"
"sb $t1, 0(%[dst]) \n"
"bgtz $t9, 21b \n"
" addiu %[dst], %[dst], 1 \n"
"3: \n"
".set pop \n"
: [src_ptr] "+r"(src_ptr), [dst] "+r"(dst)
: [dst_width] "r"(dst_width)
: "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9");
}
void ScaleRowDown4Box_DSPR2(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst,
int dst_width) {
intptr_t stride = src_stride;
const uint8* s1 = src_ptr + stride;
const uint8* s2 = s1 + stride;
const uint8* s3 = s2 + stride;
__asm__ __volatile__(
".set push \n"
".set noreorder \n"
"srl $t9, %[dst_width], 1 \n"
"andi $t8, %[dst_width], 1 \n"
"1: \n"
"lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
"lw $t1, 0(%[s1]) \n" // |7|6|5|4|
"lw $t2, 0(%[s2]) \n" // |11|10|9|8|
"lw $t3, 0(%[s3]) \n" // |15|14|13|12|
"lw $t4, 4(%[src_ptr]) \n" // |19|18|17|16|
"lw $t5, 4(%[s1]) \n" // |23|22|21|20|
"lw $t6, 4(%[s2]) \n" // |27|26|25|24|
"lw $t7, 4(%[s3]) \n" // |31|30|29|28|
"raddu.w.qb $t0, $t0 \n" // |3 + 2 + 1 + 0|
"raddu.w.qb $t1, $t1 \n" // |7 + 6 + 5 + 4|
"raddu.w.qb $t2, $t2 \n" // |11 + 10 + 9 + 8|
"raddu.w.qb $t3, $t3 \n" // |15 + 14 + 13 + 12|
"raddu.w.qb $t4, $t4 \n" // |19 + 18 + 17 + 16|
"raddu.w.qb $t5, $t5 \n" // |23 + 22 + 21 + 20|
"raddu.w.qb $t6, $t6 \n" // |27 + 26 + 25 + 24|
"raddu.w.qb $t7, $t7 \n" // |31 + 30 + 29 + 28|
"add $t0, $t0, $t1 \n"
"add $t1, $t2, $t3 \n"
"add $t0, $t0, $t1 \n"
"add $t4, $t4, $t5 \n"
"add $t6, $t6, $t7 \n"
"add $t4, $t4, $t6 \n"
"shra_r.w $t0, $t0, 4 \n"
"shra_r.w $t4, $t4, 4 \n"
"sb $t0, 0(%[dst]) \n"
"sb $t4, 1(%[dst]) \n"
"addiu %[src_ptr], %[src_ptr], 8 \n"
"addiu %[s1], %[s1], 8 \n"
"addiu %[s2], %[s2], 8 \n"
"addiu %[s3], %[s3], 8 \n"
"addiu $t9, $t9, -1 \n"
"bgtz $t9, 1b \n"
" addiu %[dst], %[dst], 2 \n"
"beqz $t8, 2f \n"
" nop \n"
"lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
"lw $t1, 0(%[s1]) \n" // |7|6|5|4|
"lw $t2, 0(%[s2]) \n" // |11|10|9|8|
"lw $t3, 0(%[s3]) \n" // |15|14|13|12|
"raddu.w.qb $t0, $t0 \n" // |3 + 2 + 1 + 0|
"raddu.w.qb $t1, $t1 \n" // |7 + 6 + 5 + 4|
"raddu.w.qb $t2, $t2 \n" // |11 + 10 + 9 + 8|
"raddu.w.qb $t3, $t3 \n" // |15 + 14 + 13 + 12|
"add $t0, $t0, $t1 \n"
"add $t1, $t2, $t3 \n"
"add $t0, $t0, $t1 \n"
"shra_r.w $t0, $t0, 4 \n"
"sb $t0, 0(%[dst]) \n"
"2: \n"
".set pop \n"
: [src_ptr] "+r"(src_ptr), [dst] "+r"(dst), [s1] "+r"(s1), [s2] "+r"(s2),
[s3] "+r"(s3)
: [dst_width] "r"(dst_width)
: "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9");
}
void ScaleRowDown34_DSPR2(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst,
int dst_width) {
__asm__ __volatile__(
".set push \n"
".set noreorder \n"
"1: \n"
"lw $t1, 0(%[src_ptr]) \n" // |3|2|1|0|
"lw $t2, 4(%[src_ptr]) \n" // |7|6|5|4|
"lw $t3, 8(%[src_ptr]) \n" // |11|10|9|8|
"lw $t4, 12(%[src_ptr]) \n" // |15|14|13|12|
"lw $t5, 16(%[src_ptr]) \n" // |19|18|17|16|
"lw $t6, 20(%[src_ptr]) \n" // |23|22|21|20|
"lw $t7, 24(%[src_ptr]) \n" // |27|26|25|24|
"lw $t8, 28(%[src_ptr]) \n" // |31|30|29|28|
"precrq.qb.ph $t0, $t2, $t4 \n" // |7|5|15|13|
"precrq.qb.ph $t9, $t6, $t8 \n" // |23|21|31|30|
"addiu %[dst_width], %[dst_width], -24 \n"
"ins $t1, $t1, 8, 16 \n" // |3|1|0|X|
"ins $t4, $t0, 8, 16 \n" // |X|15|13|12|
"ins $t5, $t5, 8, 16 \n" // |19|17|16|X|
"ins $t8, $t9, 8, 16 \n" // |X|31|29|28|
"addiu %[src_ptr], %[src_ptr], 32 \n"
"packrl.ph $t0, $t3, $t0 \n" // |9|8|7|5|
"packrl.ph $t9, $t7, $t9 \n" // |25|24|23|21|
"prepend $t1, $t2, 8 \n" // |4|3|1|0|
"prepend $t3, $t4, 24 \n" // |15|13|12|11|
"prepend $t5, $t6, 8 \n" // |20|19|17|16|
"prepend $t7, $t8, 24 \n" // |31|29|28|27|
"sw $t1, 0(%[dst]) \n"
"sw $t0, 4(%[dst]) \n"
"sw $t3, 8(%[dst]) \n"
"sw $t5, 12(%[dst]) \n"
"sw $t9, 16(%[dst]) \n"
"sw $t7, 20(%[dst]) \n"
"bnez %[dst_width], 1b \n"
" addiu %[dst], %[dst], 24 \n"
".set pop \n"
: [src_ptr] "+r"(src_ptr), [dst] "+r"(dst), [dst_width] "+r"(dst_width)
:
: "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9");
}
void ScaleRowDown34_0_Box_DSPR2(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* d,
int dst_width) {
__asm__ __volatile__(
".set push \n"
".set noreorder \n"
"repl.ph $t3, 3 \n" // 0x00030003
"1: \n"
"lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0|
"lwx $t1, %[src_stride](%[src_ptr]) \n" // |T3|T2|T1|T0|
"rotr $t2, $t0, 8 \n" // |S0|S3|S2|S1|
"rotr $t6, $t1, 8 \n" // |T0|T3|T2|T1|
"muleu_s.ph.qbl $t4, $t2, $t3 \n" // |S0*3|S3*3|
"muleu_s.ph.qbl $t5, $t6, $t3 \n" // |T0*3|T3*3|
"andi $t0, $t2, 0xFFFF \n" // |0|0|S2|S1|
"andi $t1, $t6, 0xFFFF \n" // |0|0|T2|T1|
"raddu.w.qb $t0, $t0 \n"
"raddu.w.qb $t1, $t1 \n"
"shra_r.w $t0, $t0, 1 \n"
"shra_r.w $t1, $t1, 1 \n"
"preceu.ph.qbr $t2, $t2 \n" // |0|S2|0|S1|
"preceu.ph.qbr $t6, $t6 \n" // |0|T2|0|T1|
"rotr $t2, $t2, 16 \n" // |0|S1|0|S2|
"rotr $t6, $t6, 16 \n" // |0|T1|0|T2|
"addu.ph $t2, $t2, $t4 \n"
"addu.ph $t6, $t6, $t5 \n"
"sll $t5, $t0, 1 \n"
"add $t0, $t5, $t0 \n"
"shra_r.ph $t2, $t2, 2 \n"
"shra_r.ph $t6, $t6, 2 \n"
"shll.ph $t4, $t2, 1 \n"
"addq.ph $t4, $t4, $t2 \n"
"addu $t0, $t0, $t1 \n"
"addiu %[src_ptr], %[src_ptr], 4 \n"
"shra_r.w $t0, $t0, 2 \n"
"addu.ph $t6, $t6, $t4 \n"
"shra_r.ph $t6, $t6, 2 \n"
"srl $t1, $t6, 16 \n"
"addiu %[dst_width], %[dst_width], -3 \n"
"sb $t1, 0(%[d]) \n"
"sb $t0, 1(%[d]) \n"
"sb $t6, 2(%[d]) \n"
"bgtz %[dst_width], 1b \n"
" addiu %[d], %[d], 3 \n"
"3: \n"
".set pop \n"
: [src_ptr] "+r"(src_ptr), [src_stride] "+r"(src_stride), [d] "+r"(d),
[dst_width] "+r"(dst_width)
:
: "t0", "t1", "t2", "t3", "t4", "t5", "t6");
}
void ScaleRowDown34_1_Box_DSPR2(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* d,
int dst_width) {
__asm__ __volatile__(
".set push \n"
".set noreorder \n"
"repl.ph $t2, 3 \n" // 0x00030003
"1: \n"
"lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0|
"lwx $t1, %[src_stride](%[src_ptr]) \n" // |T3|T2|T1|T0|
"rotr $t4, $t0, 8 \n" // |S0|S3|S2|S1|
"rotr $t6, $t1, 8 \n" // |T0|T3|T2|T1|
"muleu_s.ph.qbl $t3, $t4, $t2 \n" // |S0*3|S3*3|
"muleu_s.ph.qbl $t5, $t6, $t2 \n" // |T0*3|T3*3|
"andi $t0, $t4, 0xFFFF \n" // |0|0|S2|S1|
"andi $t1, $t6, 0xFFFF \n" // |0|0|T2|T1|
"raddu.w.qb $t0, $t0 \n"
"raddu.w.qb $t1, $t1 \n"
"shra_r.w $t0, $t0, 1 \n"
"shra_r.w $t1, $t1, 1 \n"
"preceu.ph.qbr $t4, $t4 \n" // |0|S2|0|S1|
"preceu.ph.qbr $t6, $t6 \n" // |0|T2|0|T1|
"rotr $t4, $t4, 16 \n" // |0|S1|0|S2|
"rotr $t6, $t6, 16 \n" // |0|T1|0|T2|
"addu.ph $t4, $t4, $t3 \n"
"addu.ph $t6, $t6, $t5 \n"
"shra_r.ph $t6, $t6, 2 \n"
"shra_r.ph $t4, $t4, 2 \n"
"addu.ph $t6, $t6, $t4 \n"
"addiu %[src_ptr], %[src_ptr], 4 \n"
"shra_r.ph $t6, $t6, 1 \n"
"addu $t0, $t0, $t1 \n"
"addiu %[dst_width], %[dst_width], -3 \n"
"shra_r.w $t0, $t0, 1 \n"
"srl $t1, $t6, 16 \n"
"sb $t1, 0(%[d]) \n"
"sb $t0, 1(%[d]) \n"
"sb $t6, 2(%[d]) \n"
"bgtz %[dst_width], 1b \n"
" addiu %[d], %[d], 3 \n"
"3: \n"
".set pop \n"
: [src_ptr] "+r"(src_ptr), [src_stride] "+r"(src_stride), [d] "+r"(d),
[dst_width] "+r"(dst_width)
:
: "t0", "t1", "t2", "t3", "t4", "t5", "t6");
}
void ScaleRowDown38_DSPR2(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst,
int dst_width) {
__asm__ __volatile__(
".set push \n"
".set noreorder \n"
"1: \n"
"lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
"lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4|
"lw $t2, 8(%[src_ptr]) \n" // |11|10|9|8|
"lw $t3, 12(%[src_ptr]) \n" // |15|14|13|12|
"lw $t4, 16(%[src_ptr]) \n" // |19|18|17|16|
"lw $t5, 20(%[src_ptr]) \n" // |23|22|21|20|
"lw $t6, 24(%[src_ptr]) \n" // |27|26|25|24|
"lw $t7, 28(%[src_ptr]) \n" // |31|30|29|28|
"wsbh $t0, $t0 \n" // |2|3|0|1|
"wsbh $t6, $t6 \n" // |26|27|24|25|
"srl $t0, $t0, 8 \n" // |X|2|3|0|
"srl $t3, $t3, 16 \n" // |X|X|15|14|
"srl $t5, $t5, 16 \n" // |X|X|23|22|
"srl $t7, $t7, 16 \n" // |X|X|31|30|
"ins $t1, $t2, 24, 8 \n" // |8|6|5|4|
"ins $t6, $t5, 0, 8 \n" // |26|27|24|22|
"ins $t1, $t0, 0, 16 \n" // |8|6|3|0|
"ins $t6, $t7, 24, 8 \n" // |30|27|24|22|
"prepend $t2, $t3, 24 \n" // |X|15|14|11|
"ins $t4, $t4, 16, 8 \n" // |19|16|17|X|
"ins $t4, $t2, 0, 16 \n" // |19|16|14|11|
"addiu %[src_ptr], %[src_ptr], 32 \n"
"addiu %[dst_width], %[dst_width], -12 \n"
"addiu $t8,%[dst_width], -12 \n"
"sw $t1, 0(%[dst]) \n"
"sw $t4, 4(%[dst]) \n"
"sw $t6, 8(%[dst]) \n"
"bgez $t8, 1b \n"
" addiu %[dst], %[dst], 12 \n"
".set pop \n"
: [src_ptr] "+r"(src_ptr), [dst] "+r"(dst), [dst_width] "+r"(dst_width)
:
: "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8");
}
void ScaleRowDown38_2_Box_DSPR2(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr,
int dst_width) {
intptr_t stride = src_stride;
const uint8* t = src_ptr + stride;
const int c = 0x2AAA;
__asm__ __volatile__(
".set push \n"
".set noreorder \n"
"1: \n"
"lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0|
"lw $t1, 4(%[src_ptr]) \n" // |S7|S6|S5|S4|
"lw $t2, 0(%[t]) \n" // |T3|T2|T1|T0|
"lw $t3, 4(%[t]) \n" // |T7|T6|T5|T4|
"rotr $t1, $t1, 16 \n" // |S5|S4|S7|S6|
"packrl.ph $t4, $t1, $t3 \n" // |S7|S6|T7|T6|
"packrl.ph $t5, $t3, $t1 \n" // |T5|T4|S5|S4|
"raddu.w.qb $t4, $t4 \n" // S7+S6+T7+T6
"raddu.w.qb $t5, $t5 \n" // T5+T4+S5+S4
"precrq.qb.ph $t6, $t0, $t2 \n" // |S3|S1|T3|T1|
"precrq.qb.ph $t6, $t6, $t6 \n" // |S3|T3|S3|T3|
"srl $t4, $t4, 2 \n" // t4 / 4
"srl $t6, $t6, 16 \n" // |0|0|S3|T3|
"raddu.w.qb $t6, $t6 \n" // 0+0+S3+T3
"addu $t6, $t5, $t6 \n"
"mul $t6, $t6, %[c] \n" // t6 * 0x2AAA
"sll $t0, $t0, 8 \n" // |S2|S1|S0|0|
"sll $t2, $t2, 8 \n" // |T2|T1|T0|0|
"raddu.w.qb $t0, $t0 \n" // S2+S1+S0+0
"raddu.w.qb $t2, $t2 \n" // T2+T1+T0+0
"addu $t0, $t0, $t2 \n"
"mul $t0, $t0, %[c] \n" // t0 * 0x2AAA
"addiu %[src_ptr], %[src_ptr], 8 \n"
"addiu %[t], %[t], 8 \n"
"addiu %[dst_width], %[dst_width], -3 \n"
"addiu %[dst_ptr], %[dst_ptr], 3 \n"
"srl $t6, $t6, 16 \n"
"srl $t0, $t0, 16 \n"
"sb $t4, -1(%[dst_ptr]) \n"
"sb $t6, -2(%[dst_ptr]) \n"
"bgtz %[dst_width], 1b \n"
" sb $t0, -3(%[dst_ptr]) \n"
".set pop \n"
: [src_ptr] "+r"(src_ptr), [dst_ptr] "+r"(dst_ptr), [t] "+r"(t),
[dst_width] "+r"(dst_width)
: [c] "r"(c)
: "t0", "t1", "t2", "t3", "t4", "t5", "t6");
}
void ScaleRowDown38_3_Box_DSPR2(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr,
int dst_width) {
intptr_t stride = src_stride;
const uint8* s1 = src_ptr + stride;
stride += stride;
const uint8* s2 = src_ptr + stride;
const int c1 = 0x1C71;
const int c2 = 0x2AAA;
__asm__ __volatile__(
".set push \n"
".set noreorder \n"
"1: \n"
"lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0|
"lw $t1, 4(%[src_ptr]) \n" // |S7|S6|S5|S4|
"lw $t2, 0(%[s1]) \n" // |T3|T2|T1|T0|
"lw $t3, 4(%[s1]) \n" // |T7|T6|T5|T4|
"lw $t4, 0(%[s2]) \n" // |R3|R2|R1|R0|
"lw $t5, 4(%[s2]) \n" // |R7|R6|R5|R4|
"rotr $t1, $t1, 16 \n" // |S5|S4|S7|S6|
"packrl.ph $t6, $t1, $t3 \n" // |S7|S6|T7|T6|
"raddu.w.qb $t6, $t6 \n" // S7+S6+T7+T6
"packrl.ph $t7, $t3, $t1 \n" // |T5|T4|S5|S4|
"raddu.w.qb $t7, $t7 \n" // T5+T4+S5+S4
"sll $t8, $t5, 16 \n" // |R5|R4|0|0|
"raddu.w.qb $t8, $t8 \n" // R5+R4
"addu $t7, $t7, $t8 \n"
"srl $t8, $t5, 16 \n" // |0|0|R7|R6|
"raddu.w.qb $t8, $t8 \n" // R7 + R6
"addu $t6, $t6, $t8 \n"
"mul $t6, $t6, %[c2] \n" // t6 * 0x2AAA
"precrq.qb.ph $t8, $t0, $t2 \n" // |S3|S1|T3|T1|
"precrq.qb.ph $t8, $t8, $t4 \n" // |S3|T3|R3|R1|
"srl $t8, $t8, 8 \n" // |0|S3|T3|R3|
"raddu.w.qb $t8, $t8 \n" // S3 + T3 + R3
"addu $t7, $t7, $t8 \n"
"mul $t7, $t7, %[c1] \n" // t7 * 0x1C71
"sll $t0, $t0, 8 \n" // |S2|S1|S0|0|
"sll $t2, $t2, 8 \n" // |T2|T1|T0|0|
"sll $t4, $t4, 8 \n" // |R2|R1|R0|0|
"raddu.w.qb $t0, $t0 \n"
"raddu.w.qb $t2, $t2 \n"
"raddu.w.qb $t4, $t4 \n"
"addu $t0, $t0, $t2 \n"
"addu $t0, $t0, $t4 \n"
"mul $t0, $t0, %[c1] \n" // t0 * 0x1C71
"addiu %[src_ptr], %[src_ptr], 8 \n"
"addiu %[s1], %[s1], 8 \n"
"addiu %[s2], %[s2], 8 \n"
"addiu %[dst_width], %[dst_width], -3 \n"
"addiu %[dst_ptr], %[dst_ptr], 3 \n"
"srl $t6, $t6, 16 \n"
"srl $t7, $t7, 16 \n"
"srl $t0, $t0, 16 \n"
"sb $t6, -1(%[dst_ptr]) \n"
"sb $t7, -2(%[dst_ptr]) \n"
"bgtz %[dst_width], 1b \n"
" sb $t0, -3(%[dst_ptr]) \n"
".set pop \n"
: [src_ptr] "+r"(src_ptr), [dst_ptr] "+r"(dst_ptr), [s1] "+r"(s1),
[s2] "+r"(s2), [dst_width] "+r"(dst_width)
: [c1] "r"(c1), [c2] "r"(c2)
: "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8");
}
void ScaleAddRow_DSPR2(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
int x;
for (x = 0; x < ((src_width - 1)); x += 8) {
uint32 tmp_t1, tmp_t2, tmp_t3, tmp_t4;
uint32 tmp_t5, tmp_t6, tmp_t7, tmp_t8;
__asm__ __volatile__(
".set push \n"
".set noreorder \n"
"lw %[tmp_t5], 0(%[src_ptr]) \n"
"lw %[tmp_t6], 4(%[src_ptr]) \n"
"lw %[tmp_t1], 0(%[dst_ptr]) \n"
"lw %[tmp_t2], 4(%[dst_ptr]) \n"
"lw %[tmp_t3], 8(%[dst_ptr]) \n"
"lw %[tmp_t4], 12(%[dst_ptr]) \n"
"preceu.ph.qbr %[tmp_t7], %[tmp_t5] \n"
"preceu.ph.qbl %[tmp_t8], %[tmp_t5] \n"
"addu.ph %[tmp_t1], %[tmp_t1], %[tmp_t7] \n"
"addu.ph %[tmp_t2], %[tmp_t2], %[tmp_t8] \n"
"preceu.ph.qbr %[tmp_t7], %[tmp_t6] \n"
"preceu.ph.qbl %[tmp_t8], %[tmp_t6] \n"
"addu.ph %[tmp_t3], %[tmp_t3], %[tmp_t7] \n"
"addu.ph %[tmp_t4], %[tmp_t4], %[tmp_t8] \n"
"sw %[tmp_t1], 0(%[dst_ptr]) \n"
"sw %[tmp_t2], 4(%[dst_ptr]) \n"
"sw %[tmp_t3], 8(%[dst_ptr]) \n"
"sw %[tmp_t4], 12(%[dst_ptr]) \n"
".set pop \n"
:
[tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2), [tmp_t3] "=&r"(tmp_t3),
[tmp_t4] "=&r"(tmp_t4), [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6),
[tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), [src_ptr] "+r"(src_ptr)
: [dst_ptr] "r"(dst_ptr));
src_ptr += 8;
dst_ptr += 8;
}
if ((src_width)&7) {
for (x = 0; x < ((src_width - 1) & 7); x += 1) {
dst_ptr[0] += src_ptr[0];
src_ptr += 1;
dst_ptr += 1;
}
}
}
#endif // defined(__mips_dsp) && (__mips_dsp_rev >= 2)
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

View File

@ -65,8 +65,6 @@ TEST_F(LibYUVBaseTest, TestCpuHas) {
#if defined(__mips__)
int has_mips = TestCpuFlag(kCpuHasMIPS);
printf("Has MIPS %x\n", has_mips);
int has_dspr2 = TestCpuFlag(kCpuHasDSPR2);
printf("Has DSPR2 %x\n", has_dspr2);
int has_msa = TestCpuFlag(kCpuHasMSA);
printf("Has MSA %x\n", has_msa);
#endif

View File

@ -69,8 +69,8 @@ int main(int argc, const char* argv[]) {
printf("Has NEON %x\n", has_neon);
}
if (has_mips) {
int has_dspr2 = TestCpuFlag(kCpuHasDSPR2);
printf("Has DSPR2 %x\n", has_dspr2);
int has_msa = TestCpuFlag(kCpuHasMSA);
printf("Has MSA %x\n", has_msa);
}
if (has_x86) {
int has_sse2 = TestCpuFlag(kCpuHasSSE2);