diff --git a/include/libyuv/macros_msa.h b/include/libyuv/macros_msa.h index 29997ce11..4e232b66b 100644 --- a/include/libyuv/macros_msa.h +++ b/include/libyuv/macros_msa.h @@ -140,6 +140,9 @@ #define LD_B(RTYPE, psrc) *((RTYPE*)(psrc)) /* NOLINT */ #define LD_UB(...) LD_B(const v16u8, __VA_ARGS__) +#define LD_H(RTYPE, psrc) *((RTYPE*)(psrc)) /* NOLINT */ +#define LD_UH(...) LD_H(const v8u16, __VA_ARGS__) + #define ST_B(RTYPE, in, pdst) *((RTYPE*)(pdst)) = (in) /* NOLINT */ #define ST_UB(...) ST_B(v16u8, __VA_ARGS__) diff --git a/include/libyuv/row.h b/include/libyuv/row.h index 1dc6e9b56..18b0bfd7e 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -485,6 +485,7 @@ extern "C" { #define HAS_J400TOARGBROW_MSA #define HAS_MERGEUVROW_MSA #define HAS_MIRRORROW_MSA +#define HAS_MIRRORUVROW_MSA #define HAS_MIRRORSPLITUVROW_MSA #define HAS_NV12TOARGBROW_MSA #define HAS_NV12TORGB565ROW_MSA @@ -1571,10 +1572,12 @@ void MirrorRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void MirrorUVRow_AVX2(const uint8_t* src_uv, uint8_t* dst_uv, int width); void MirrorUVRow_SSSE3(const uint8_t* src_uv, uint8_t* dst_uv, int width); void MirrorUVRow_NEON(const uint8_t* src_uv, uint8_t* dst_uv, int width); +void MirrorUVRow_MSA(const uint8_t* src_uv, uint8_t* dst_uv, int width); void MirrorUVRow_C(const uint8_t* src_uv, uint8_t* dst_uv, int width); void MirrorUVRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void MirrorUVRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void MirrorUVRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void MirrorUVRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void MirrorSplitUVRow_SSSE3(const uint8_t* src, uint8_t* dst_u, diff --git a/source/planar_functions.cc b/source/planar_functions.cc index 730fe7ca8..d5cd7e680 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -1099,6 +1099,14 @@ void MirrorUVPlane(const uint8_t* src_uv, } } #endif +#if defined(HAS_MIRRORUVROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + MirrorUVRow = MirrorUVRow_Any_MSA; + if (IS_ALIGNED(width, 8)) { + MirrorUVRow = MirrorUVRow_MSA; + } + } +#endif // MirrorUV plane for (y = 0; y < height; ++y) { diff --git a/source/row_any.cc b/source/row_any.cc index 57469c8f3..61cc7cd87 100644 --- a/source/row_any.cc +++ b/source/row_any.cc @@ -1223,6 +1223,9 @@ ANY11M(MirrorUVRow_Any_SSSE3, MirrorUVRow_SSSE3, 2, 7) #ifdef HAS_MIRRORUVROW_NEON ANY11M(MirrorUVRow_Any_NEON, MirrorUVRow_NEON, 2, 31) #endif +#ifdef HAS_MIRRORUVROW_MSA +ANY11M(MirrorUVRow_Any_MSA, MirrorUVRow_MSA, 2, 7) +#endif #ifdef HAS_ARGBMIRRORROW_AVX2 ANY11M(ARGBMirrorRow_Any_AVX2, ARGBMirrorRow_AVX2, 4, 7) #endif diff --git a/source/row_msa.cc b/source/row_msa.cc index aebe4b410..7ca34daa7 100644 --- a/source/row_msa.cc +++ b/source/row_msa.cc @@ -302,6 +302,20 @@ void MirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width) { } } +void MirrorUVRow_MSA(const uint8_t* src_uv, uint8_t* dst_uv, int width) { + int x; + v8u16 src, dst; + v8u16 shuffler = {7, 6, 5, 4, 3, 2, 1, 0}; + src_uv += (width - 8) << 1; + for (x = 0; x < width; x += 8) { + src = LD_UH(src_uv); + dst = __msa_vshf_h(shuffler, src, src); + ST_UH(dst, dst_uv); + src_uv -= 16; + dst_uv += 16; + } +} + void ARGBMirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width) { int x; v16u8 src0, src1, src2, src3;