diff --git a/source/row_common.cc b/source/row_common.cc index 67b0c7233..2d0f27d42 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -26,11 +26,6 @@ extern "C" { (defined(_M_IX86) || (defined(_M_X64) && !defined(__clang__))) #define LIBYUV_RGB7 1 #endif -// mips use 7 bit RGBToY -#if (!defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)) || \ - (!defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)) -#define LIBYUV_RGB7 1 -#endif // llvm x86 is poor at ternary operator, so use branchless min/max. diff --git a/source/row_mmi.cc b/source/row_mmi.cc index 9ecafaa23..759c0b529 100644 --- a/source/row_mmi.cc +++ b/source/row_mmi.cc @@ -2473,10 +2473,10 @@ void ARGBToYJRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) { uint64_t src, src_hi, src_lo; uint64_t dest, dest0, dest1, dest2, dest3; uint64_t tmp0, tmp1; - const uint64_t shift = 0x07; - const uint64_t value = 0x0040; + const uint64_t shift = 0x08; + const uint64_t value = 0x80; const uint64_t mask0 = 0x0; - const uint64_t mask1 = 0x00010026004B000FULL; + const uint64_t mask1 = 0x0001004D0096001DULL; __asm__ volatile( "1: \n\t" @@ -4054,10 +4054,10 @@ void ARGBGrayRow_MMI(const uint8_t* src_argb, uint8_t* dst_argb, int width) { uint64_t tmp0, tmp1; const uint64_t mask0 = 0x0; const uint64_t mask1 = 0x01; - const uint64_t mask2 = 0x00400026004B000FULL; + const uint64_t mask2 = 0x0080004D0096001DULL; const uint64_t mask3 = 0xFF000000FF000000ULL; const uint64_t mask4 = ~mask3; - const uint64_t shift = 0x07; + const uint64_t shift = 0x08; __asm__ volatile( "1: \n\t" diff --git a/source/row_msa.cc b/source/row_msa.cc index 1e410fb6b..199e5db3c 100644 --- a/source/row_msa.cc +++ b/source/row_msa.cc @@ -1412,17 +1412,17 @@ void ARGBGrayRow_MSA(const uint8_t* src_argb, uint8_t* dst_argb, int width) { int x; v16u8 src0, src1, vec0, vec1, dst0, dst1; v8u16 reg0; - v16u8 const_0x26 = (v16u8)__msa_ldi_h(0x26); - v16u8 const_0x4B0F = (v16u8)__msa_fill_h(0x4B0F); + v16u8 const_0x4D = (v16u8)__msa_ldi_h(0x4D); + v16u8 const_0x961D = (v16u8)__msa_fill_h(0x961D); for (x = 0; x < width; x += 8) { src0 = (v16u8)__msa_ld_b((v16u8*)src_argb, 0); src1 = (v16u8)__msa_ld_b((v16u8*)src_argb, 16); vec0 = (v16u8)__msa_pckev_h((v8i16)src1, (v8i16)src0); vec1 = (v16u8)__msa_pckod_h((v8i16)src1, (v8i16)src0); - reg0 = __msa_dotp_u_h(vec0, const_0x4B0F); - reg0 = __msa_dpadd_u_h(reg0, vec1, const_0x26); - reg0 = (v8u16)__msa_srari_h((v8i16)reg0, 7); + reg0 = __msa_dotp_u_h(vec0, const_0x961D); + reg0 = __msa_dpadd_u_h(reg0, vec1, const_0x4D); + reg0 = (v8u16)__msa_srari_h((v8i16)reg0, 8); vec0 = (v16u8)__msa_ilvev_b((v16i8)reg0, (v16i8)reg0); vec1 = (v16u8)__msa_ilvod_b((v16i8)vec1, (v16i8)vec0); dst0 = (v16u8)__msa_ilvr_b((v16i8)vec1, (v16i8)vec0); @@ -2419,16 +2419,16 @@ void SobelXYRow_MSA(const uint8_t* src_sobelx, void ARGBToYJRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) { int x; v16u8 src0, src1, src2, src3, dst0; - v16u8 const_0x4B0F = (v16u8)__msa_fill_h(0x4B0F); - v16u8 const_0x26 = (v16u8)__msa_fill_h(0x26); - v8u16 const_0x40 = (v8u16)__msa_fill_h(0x40); + v16u8 const_0x961D = (v16u8)__msa_fill_h(0x961D); + v16u8 const_0x4D = (v16u8)__msa_fill_h(0x4D); + v8u16 const_0x80 = (v8u16)__msa_fill_h(0x80); for (x = 0; x < width; x += 16) { src0 = (v16u8)__msa_ld_b((void*)src_argb0, 0); src1 = (v16u8)__msa_ld_b((void*)src_argb0, 16); src2 = (v16u8)__msa_ld_b((void*)src_argb0, 32); src3 = (v16u8)__msa_ld_b((void*)src_argb0, 48); - ARGBTOY(src0, src1, src2, src3, const_0x4B0F, const_0x26, const_0x40, 7, + ARGBTOY(src0, src1, src2, src3, const_0x961D, const_0x4D, const_0x80, 8, dst0); ST_UB(dst0, dst_y); src_argb0 += 64;