mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 17:26:49 +08:00
Use 8 bit RGB to Y coefficients for Y and YJ in MMI and MSA.
1. Switch to 8 bit precision. 2. Fix an error in the implementation of MMI and MSA. About the error: MMI and MSA implementation for RGBtoY and RGBToYJ used different precision according to the C implementation( The C version has been unified in commit fce0fed542001577e6b10f4cf859e0fa1774974e). This patch unifies the precision to 8 bit for RGBToYJ in MMI and MSA. Change-Id: Ic6a6e424d27a2f049b0c954f03174192d2beb091 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/2155608 Reviewed-by: Frank Barchard <fbarchard@chromium.org> Commit-Queue: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
parent
2f48ffd42b
commit
1cd417bda9
@ -26,11 +26,6 @@ extern "C" {
|
||||
(defined(_M_IX86) || (defined(_M_X64) && !defined(__clang__)))
|
||||
#define LIBYUV_RGB7 1
|
||||
#endif
|
||||
// mips use 7 bit RGBToY
|
||||
#if (!defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)) || \
|
||||
(!defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa))
|
||||
#define LIBYUV_RGB7 1
|
||||
#endif
|
||||
|
||||
// llvm x86 is poor at ternary operator, so use branchless min/max.
|
||||
|
||||
|
||||
@ -2473,10 +2473,10 @@ void ARGBToYJRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
|
||||
uint64_t src, src_hi, src_lo;
|
||||
uint64_t dest, dest0, dest1, dest2, dest3;
|
||||
uint64_t tmp0, tmp1;
|
||||
const uint64_t shift = 0x07;
|
||||
const uint64_t value = 0x0040;
|
||||
const uint64_t shift = 0x08;
|
||||
const uint64_t value = 0x80;
|
||||
const uint64_t mask0 = 0x0;
|
||||
const uint64_t mask1 = 0x00010026004B000FULL;
|
||||
const uint64_t mask1 = 0x0001004D0096001DULL;
|
||||
|
||||
__asm__ volatile(
|
||||
"1: \n\t"
|
||||
@ -4054,10 +4054,10 @@ void ARGBGrayRow_MMI(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
|
||||
uint64_t tmp0, tmp1;
|
||||
const uint64_t mask0 = 0x0;
|
||||
const uint64_t mask1 = 0x01;
|
||||
const uint64_t mask2 = 0x00400026004B000FULL;
|
||||
const uint64_t mask2 = 0x0080004D0096001DULL;
|
||||
const uint64_t mask3 = 0xFF000000FF000000ULL;
|
||||
const uint64_t mask4 = ~mask3;
|
||||
const uint64_t shift = 0x07;
|
||||
const uint64_t shift = 0x08;
|
||||
|
||||
__asm__ volatile(
|
||||
"1: \n\t"
|
||||
|
||||
@ -1412,17 +1412,17 @@ void ARGBGrayRow_MSA(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
|
||||
int x;
|
||||
v16u8 src0, src1, vec0, vec1, dst0, dst1;
|
||||
v8u16 reg0;
|
||||
v16u8 const_0x26 = (v16u8)__msa_ldi_h(0x26);
|
||||
v16u8 const_0x4B0F = (v16u8)__msa_fill_h(0x4B0F);
|
||||
v16u8 const_0x4D = (v16u8)__msa_ldi_h(0x4D);
|
||||
v16u8 const_0x961D = (v16u8)__msa_fill_h(0x961D);
|
||||
|
||||
for (x = 0; x < width; x += 8) {
|
||||
src0 = (v16u8)__msa_ld_b((v16u8*)src_argb, 0);
|
||||
src1 = (v16u8)__msa_ld_b((v16u8*)src_argb, 16);
|
||||
vec0 = (v16u8)__msa_pckev_h((v8i16)src1, (v8i16)src0);
|
||||
vec1 = (v16u8)__msa_pckod_h((v8i16)src1, (v8i16)src0);
|
||||
reg0 = __msa_dotp_u_h(vec0, const_0x4B0F);
|
||||
reg0 = __msa_dpadd_u_h(reg0, vec1, const_0x26);
|
||||
reg0 = (v8u16)__msa_srari_h((v8i16)reg0, 7);
|
||||
reg0 = __msa_dotp_u_h(vec0, const_0x961D);
|
||||
reg0 = __msa_dpadd_u_h(reg0, vec1, const_0x4D);
|
||||
reg0 = (v8u16)__msa_srari_h((v8i16)reg0, 8);
|
||||
vec0 = (v16u8)__msa_ilvev_b((v16i8)reg0, (v16i8)reg0);
|
||||
vec1 = (v16u8)__msa_ilvod_b((v16i8)vec1, (v16i8)vec0);
|
||||
dst0 = (v16u8)__msa_ilvr_b((v16i8)vec1, (v16i8)vec0);
|
||||
@ -2419,16 +2419,16 @@ void SobelXYRow_MSA(const uint8_t* src_sobelx,
|
||||
void ARGBToYJRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
|
||||
int x;
|
||||
v16u8 src0, src1, src2, src3, dst0;
|
||||
v16u8 const_0x4B0F = (v16u8)__msa_fill_h(0x4B0F);
|
||||
v16u8 const_0x26 = (v16u8)__msa_fill_h(0x26);
|
||||
v8u16 const_0x40 = (v8u16)__msa_fill_h(0x40);
|
||||
v16u8 const_0x961D = (v16u8)__msa_fill_h(0x961D);
|
||||
v16u8 const_0x4D = (v16u8)__msa_fill_h(0x4D);
|
||||
v8u16 const_0x80 = (v8u16)__msa_fill_h(0x80);
|
||||
|
||||
for (x = 0; x < width; x += 16) {
|
||||
src0 = (v16u8)__msa_ld_b((void*)src_argb0, 0);
|
||||
src1 = (v16u8)__msa_ld_b((void*)src_argb0, 16);
|
||||
src2 = (v16u8)__msa_ld_b((void*)src_argb0, 32);
|
||||
src3 = (v16u8)__msa_ld_b((void*)src_argb0, 48);
|
||||
ARGBTOY(src0, src1, src2, src3, const_0x4B0F, const_0x26, const_0x40, 7,
|
||||
ARGBTOY(src0, src1, src2, src3, const_0x961D, const_0x4D, const_0x80, 8,
|
||||
dst0);
|
||||
ST_UB(dst0, dst_y);
|
||||
src_argb0 += 64;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user