diff --git a/include/libyuv/macros_msa.h b/include/libyuv/macros_msa.h index 641fbb263..3e13e8e77 100644 --- a/include/libyuv/macros_msa.h +++ b/include/libyuv/macros_msa.h @@ -74,5 +74,20 @@ out1 = (RTYPE) __msa_vshf_b((v16i8) mask1, (v16i8) in3, (v16i8) in2); \ } #define VSHF_B2_UB(...) VSHF_B2(v16u8, __VA_ARGS__) + +#define VSHF_W2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) { \ + out0 = (RTYPE) __msa_vshf_w((v4i32) mask0, (v4i32) in1, (v4i32) in0); \ + out1 = (RTYPE) __msa_vshf_w((v4i32) mask1, (v4i32) in3, (v4i32) in2); \ +} +#define VSHF_W2_UB(...) VSHF_W2(v16u8, __VA_ARGS__) +#define VSHF_W2_SB(...) VSHF_W2(v16i8, __VA_ARGS__) +#define VSHF_W4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \ + mask0, mask1, mask2, mask3, \ + out0, out1, out2, out3) { \ + VSHF_W2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) \ + VSHF_W2(RTYPE, in4, in5, in6, in7, mask2, mask3, out2, out3) \ +} +#define VSHF_W4_UB(...) VSHF_W4(v16u8, __VA_ARGS__) + #endif /* !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) */ #endif /* __MACROS_MSA_H__ */ diff --git a/include/libyuv/row.h b/include/libyuv/row.h index d1ba8919d..28090fa9b 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -374,6 +374,7 @@ extern "C" { #if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) #define HAS_MIRRORROW_MSA +#define HAS_ARGBMIRRORROW_MSA #endif #if defined(_MSC_VER) && !defined(__CLR_VER) && !defined(__clang__) @@ -832,10 +833,12 @@ void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width); void ARGBMirrorRow_AVX2(const uint8* src, uint8* dst, int width); void ARGBMirrorRow_SSE2(const uint8* src, uint8* dst, int width); void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width); +void ARGBMirrorRow_MSA(const uint8* src, uint8* dst, int width); void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width); void ARGBMirrorRow_Any_AVX2(const uint8* src, uint8* dst, int width); void ARGBMirrorRow_Any_SSE2(const uint8* src, uint8* dst, int width); void ARGBMirrorRow_Any_NEON(const uint8* src, uint8* dst, int width); +void ARGBMirrorRow_Any_MSA(const uint8* src, uint8* dst, int width); void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width); void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, diff --git a/source/planar_functions.cc b/source/planar_functions.cc index 71f39b3bb..b919e9615 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -659,6 +659,14 @@ int ARGBMirror(const uint8* src_argb, int src_stride_argb, } } #endif +#if defined(HAS_ARGBMIRRORROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGBMirrorRow = ARGBMirrorRow_Any_MSA; + if (IS_ALIGNED(width, 16)) { + ARGBMirrorRow = ARGBMirrorRow_MSA; + } + } +#endif // Mirror plane for (y = 0; y < height; ++y) { diff --git a/source/rotate_argb.cc b/source/rotate_argb.cc index 787c0ad1b..71d29d410 100644 --- a/source/rotate_argb.cc +++ b/source/rotate_argb.cc @@ -118,6 +118,14 @@ void ARGBRotate180(const uint8* src, int src_stride, } } #endif +#if defined(HAS_ARGBMIRRORROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGBMirrorRow = ARGBMirrorRow_Any_MSA; + if (IS_ALIGNED(width, 16)) { + ARGBMirrorRow = ARGBMirrorRow_MSA; + } + } +#endif #if defined(HAS_COPYROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { CopyRow = IS_ALIGNED(width * 4, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2; diff --git a/source/row_any.cc b/source/row_any.cc index 14a59718a..28b6758fc 100644 --- a/source/row_any.cc +++ b/source/row_any.cc @@ -643,6 +643,9 @@ ANY11M(ARGBMirrorRow_Any_SSE2, ARGBMirrorRow_SSE2, 4, 3) #ifdef HAS_ARGBMIRRORROW_NEON ANY11M(ARGBMirrorRow_Any_NEON, ARGBMirrorRow_NEON, 4, 3) #endif +#ifdef HAS_ARGBMIRRORROW_MSA +ANY11M(ARGBMirrorRow_Any_MSA, ARGBMirrorRow_MSA, 4, 15) +#endif #undef ANY11M // Any 1 plane. (memset) diff --git a/source/row_msa.cc b/source/row_msa.cc index 6dd6f5f3b..b86865cf3 100644 --- a/source/row_msa.cc +++ b/source/row_msa.cc @@ -10,36 +10,52 @@ #include "libyuv/row.h" +// This module is for GCC MSA #if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) #include "libyuv/macros_msa.h" -#endif #ifdef __cplusplus namespace libyuv { extern "C" { #endif -#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) void MirrorRow_MSA(const uint8* src, uint8* dst, int width) { - int count; + int x; v16u8 src0, src1, src2, src3; v16u8 dst0, dst1, dst2, dst3; - v16i8 mask = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; - + v16i8 shuffler = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; src += width - 64; - for (count = 0; count < width; count += 64) { + for (x = 0; x < width; x += 64) { LD_UB4(src, 16, src3, src2, src1, src0); - VSHF_B2_UB(src3, src3, src2, src2, mask, mask, dst3, dst2); - VSHF_B2_UB(src1, src1, src0, src0, mask, mask, dst1, dst0); + VSHF_B2_UB(src3, src3, src2, src2, shuffler, shuffler, dst3, dst2); + VSHF_B2_UB(src1, src1, src0, src0, shuffler, shuffler, dst1, dst0); + ST_UB4(dst0, dst1, dst2, dst3, dst, 16); + dst += 64; + src -= 64; + } +} + +void ARGBMirrorRow_MSA(const uint8* src, uint8* dst, int width) { + int x; + v16u8 src0, src1, src2, src3; + v16u8 dst0, dst1, dst2, dst3; + v16i8 shuffler = { 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3 }; + src += width * 4 - 64; + + for (x = 0; x < width; x += 16) { + LD_UB4(src, 16, src3, src2, src1, src0); + VSHF_B2_UB(src3, src3, src2, src2, shuffler, shuffler, dst3, dst2); + VSHF_B2_UB(src1, src1, src0, src0, shuffler, shuffler, dst1, dst0); ST_UB4(dst0, dst1, dst2, dst3, dst, 16); dst += 64; src -= 64; } } -#endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif + +#endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)