mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2026-02-06 01:39:49 +08:00
ARGBToJ420 MMI and MSA version match C.
In commit 6cd1ff, C version has been updated. This patch update the MMI and MSA version to mach C version. Change-Id: Iea811e232f9c6019a80364d165f0255a37ce41b4 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/2227755 Commit-Queue: Frank Barchard <fbarchard@chromium.org> Reviewed-by: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
parent
6d603ec3f5
commit
5c6cdd0747
@ -2632,8 +2632,8 @@ void ARGBToUVJRow_MMI(const uint8_t* src_rgb0,
|
||||
uint64_t src_rgb1;
|
||||
uint64_t ftmp[12];
|
||||
const uint64_t value = 0x4040;
|
||||
const uint64_t mask_u = 0x002b0054007f0002;
|
||||
const uint64_t mask_v = 0x0002007f006b0014;
|
||||
const uint64_t mask_u = 0x0015002a003f0002;
|
||||
const uint64_t mask_v = 0x0002003f0035000a;
|
||||
|
||||
__asm__ volatile(
|
||||
"1: \n\t"
|
||||
@ -2646,8 +2646,8 @@ void ARGBToUVJRow_MMI(const uint8_t* src_rgb0,
|
||||
"punpckhbh %[src_hi], %[src0], %[zero] \n\t"
|
||||
"punpcklbh %[src0], %[src1], %[zero] \n\t"
|
||||
"punpckhbh %[src1], %[src1], %[zero] \n\t"
|
||||
"pavgh %[src0], %[src_lo], %[src0] \n\t"
|
||||
"pavgh %[src1], %[src_hi], %[src1] \n\t"
|
||||
"paddh %[src0], %[src_lo], %[src0] \n\t"
|
||||
"paddh %[src1], %[src_hi], %[src1] \n\t"
|
||||
"pavgh %[src0], %[src0], %[src1] \n\t"
|
||||
"dsll %[dest0_u], %[src0], %[sixteen] \n\t"
|
||||
"pinsrh_0 %[dest0_u], %[dest0_u], %[value] \n\t"
|
||||
@ -2663,8 +2663,8 @@ void ARGBToUVJRow_MMI(const uint8_t* src_rgb0,
|
||||
"punpckhbh %[src_hi], %[src0], %[zero] \n\t"
|
||||
"punpcklbh %[src0], %[src1], %[zero] \n\t"
|
||||
"punpckhbh %[src1], %[src1], %[zero] \n\t"
|
||||
"pavgh %[src0], %[src_lo], %[src0] \n\t"
|
||||
"pavgh %[src1], %[src_hi], %[src1] \n\t"
|
||||
"paddh %[src0], %[src_lo], %[src0] \n\t"
|
||||
"paddh %[src1], %[src_hi], %[src1] \n\t"
|
||||
"pavgh %[src0], %[src0], %[src1] \n\t"
|
||||
"dsll %[src_lo], %[src0], %[sixteen] \n\t"
|
||||
"pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
|
||||
@ -2689,8 +2689,8 @@ void ARGBToUVJRow_MMI(const uint8_t* src_rgb0,
|
||||
"punpckhbh %[src_hi], %[src0], %[zero] \n\t"
|
||||
"punpcklbh %[src0], %[src1], %[zero] \n\t"
|
||||
"punpckhbh %[src1], %[src1], %[zero] \n\t"
|
||||
"pavgh %[src0], %[src_lo], %[src0] \n\t"
|
||||
"pavgh %[src1], %[src_hi], %[src1] \n\t"
|
||||
"paddh %[src0], %[src_lo], %[src0] \n\t"
|
||||
"paddh %[src1], %[src_hi], %[src1] \n\t"
|
||||
"pavgh %[src0], %[src0], %[src1] \n\t"
|
||||
"dsll %[dest1_u], %[src0], %[sixteen] \n\t"
|
||||
"pinsrh_0 %[dest1_u], %[dest1_u], %[value] \n\t"
|
||||
@ -2706,8 +2706,8 @@ void ARGBToUVJRow_MMI(const uint8_t* src_rgb0,
|
||||
"punpckhbh %[src_hi], %[src0], %[zero] \n\t"
|
||||
"punpcklbh %[src0], %[src1], %[zero] \n\t"
|
||||
"punpckhbh %[src1], %[src1], %[zero] \n\t"
|
||||
"pavgh %[src0], %[src_lo], %[src0] \n\t"
|
||||
"pavgh %[src1], %[src_hi], %[src1] \n\t"
|
||||
"paddh %[src0], %[src_lo], %[src0] \n\t"
|
||||
"paddh %[src1], %[src_hi], %[src1] \n\t"
|
||||
"pavgh %[src0], %[src0], %[src1] \n\t"
|
||||
"dsll %[src_lo], %[src0], %[sixteen] \n\t"
|
||||
"pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
|
||||
@ -2732,8 +2732,8 @@ void ARGBToUVJRow_MMI(const uint8_t* src_rgb0,
|
||||
"punpckhbh %[src_hi], %[src0], %[zero] \n\t"
|
||||
"punpcklbh %[src0], %[src1], %[zero] \n\t"
|
||||
"punpckhbh %[src1], %[src1], %[zero] \n\t"
|
||||
"pavgh %[src0], %[src_lo], %[src0] \n\t"
|
||||
"pavgh %[src1], %[src_hi], %[src1] \n\t"
|
||||
"paddh %[src0], %[src_lo], %[src0] \n\t"
|
||||
"paddh %[src1], %[src_hi], %[src1] \n\t"
|
||||
"pavgh %[src0], %[src0], %[src1] \n\t"
|
||||
"dsll %[dest2_u], %[src0], %[sixteen] \n\t"
|
||||
"pinsrh_0 %[dest2_u], %[dest2_u], %[value] \n\t"
|
||||
@ -2749,8 +2749,8 @@ void ARGBToUVJRow_MMI(const uint8_t* src_rgb0,
|
||||
"punpckhbh %[src_hi], %[src0], %[zero] \n\t"
|
||||
"punpcklbh %[src0], %[src1], %[zero] \n\t"
|
||||
"punpckhbh %[src1], %[src1], %[zero] \n\t"
|
||||
"pavgh %[src0], %[src_lo], %[src0] \n\t"
|
||||
"pavgh %[src1], %[src_hi], %[src1] \n\t"
|
||||
"paddh %[src0], %[src_lo], %[src0] \n\t"
|
||||
"paddh %[src1], %[src_hi], %[src1] \n\t"
|
||||
"pavgh %[src0], %[src0], %[src1] \n\t"
|
||||
"dsll %[src_lo], %[src0], %[sixteen] \n\t"
|
||||
"pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
|
||||
@ -2775,8 +2775,8 @@ void ARGBToUVJRow_MMI(const uint8_t* src_rgb0,
|
||||
"punpckhbh %[src_hi], %[src0], %[zero] \n\t"
|
||||
"punpcklbh %[src0], %[src1], %[zero] \n\t"
|
||||
"punpckhbh %[src1], %[src1], %[zero] \n\t"
|
||||
"pavgh %[src0], %[src_lo], %[src0] \n\t"
|
||||
"pavgh %[src1], %[src_hi], %[src1] \n\t"
|
||||
"paddh %[src0], %[src_lo], %[src0] \n\t"
|
||||
"paddh %[src1], %[src_hi], %[src1] \n\t"
|
||||
"pavgh %[src0], %[src0], %[src1] \n\t"
|
||||
"dsll %[dest3_u], %[src0], %[sixteen] \n\t"
|
||||
"pinsrh_0 %[dest3_u], %[dest3_u], %[value] \n\t"
|
||||
@ -2792,8 +2792,8 @@ void ARGBToUVJRow_MMI(const uint8_t* src_rgb0,
|
||||
"punpckhbh %[src_hi], %[src0], %[zero] \n\t"
|
||||
"punpcklbh %[src0], %[src1], %[zero] \n\t"
|
||||
"punpckhbh %[src1], %[src1], %[zero] \n\t"
|
||||
"pavgh %[src0], %[src_lo], %[src0] \n\t"
|
||||
"pavgh %[src1], %[src_hi], %[src1] \n\t"
|
||||
"paddh %[src0], %[src_lo], %[src0] \n\t"
|
||||
"paddh %[src1], %[src_hi], %[src1] \n\t"
|
||||
"pavgh %[src0], %[src0], %[src1] \n\t"
|
||||
"dsll %[src_lo], %[src0], %[sixteen] \n\t"
|
||||
"pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
|
||||
|
||||
@ -205,33 +205,37 @@ extern "C" {
|
||||
}
|
||||
|
||||
#define ARGBTOUV(argb0, argb1, argb2, argb3, const0, const1, const2, const3, \
|
||||
shf0, shf1, shf2, shf3, v_out, u_out) \
|
||||
shf0, shf1, shf2, shf3, shift, u_out, v_out) \
|
||||
{ \
|
||||
v16u8 vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m, vec6_m, vec7_m; \
|
||||
v8u16 reg0_m, reg1_m, reg2_m, reg3_m; \
|
||||
v8u16 vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m, vec6_m, vec7_m; \
|
||||
v4u32 reg0_m, reg1_m, reg2_m, reg3_m; \
|
||||
\
|
||||
vec0_m = (v16u8)__msa_vshf_b(shf0, (v16i8)argb1, (v16i8)argb0); \
|
||||
vec1_m = (v16u8)__msa_vshf_b(shf0, (v16i8)argb3, (v16i8)argb2); \
|
||||
vec2_m = (v16u8)__msa_vshf_b(shf1, (v16i8)argb1, (v16i8)argb0); \
|
||||
vec3_m = (v16u8)__msa_vshf_b(shf1, (v16i8)argb3, (v16i8)argb2); \
|
||||
vec4_m = (v16u8)__msa_vshf_b(shf2, (v16i8)argb1, (v16i8)argb0); \
|
||||
vec5_m = (v16u8)__msa_vshf_b(shf2, (v16i8)argb3, (v16i8)argb2); \
|
||||
vec6_m = (v16u8)__msa_vshf_b(shf3, (v16i8)argb1, (v16i8)argb0); \
|
||||
vec7_m = (v16u8)__msa_vshf_b(shf3, (v16i8)argb3, (v16i8)argb2); \
|
||||
reg0_m = __msa_dotp_u_h(vec0_m, const1); \
|
||||
reg1_m = __msa_dotp_u_h(vec1_m, const1); \
|
||||
reg2_m = __msa_dotp_u_h(vec4_m, const1); \
|
||||
reg3_m = __msa_dotp_u_h(vec5_m, const1); \
|
||||
reg0_m += const3; \
|
||||
reg1_m += const3; \
|
||||
reg2_m += const3; \
|
||||
reg3_m += const3; \
|
||||
reg0_m -= __msa_dotp_u_h(vec2_m, const0); \
|
||||
reg1_m -= __msa_dotp_u_h(vec3_m, const0); \
|
||||
reg2_m -= __msa_dotp_u_h(vec6_m, const2); \
|
||||
reg3_m -= __msa_dotp_u_h(vec7_m, const2); \
|
||||
v_out = (v16u8)__msa_pckod_b((v16i8)reg1_m, (v16i8)reg0_m); \
|
||||
u_out = (v16u8)__msa_pckod_b((v16i8)reg3_m, (v16i8)reg2_m); \
|
||||
vec0_m = (v8u16)__msa_vshf_h(shf0, (v16i8)argb1, (v16i8)argb0); \
|
||||
vec1_m = (v8u16)__msa_vshf_h(shf0, (v16i8)argb3, (v16i8)argb2); \
|
||||
vec2_m = (v8u16)__msa_vshf_h(shf1, (v16i8)argb1, (v16i8)argb0); \
|
||||
vec3_m = (v8u16)__msa_vshf_h(shf1, (v16i8)argb3, (v16i8)argb2); \
|
||||
vec4_m = (v8u16)__msa_vshf_h(shf2, (v16i8)argb1, (v16i8)argb0); \
|
||||
vec5_m = (v8u16)__msa_vshf_h(shf2, (v16i8)argb3, (v16i8)argb2); \
|
||||
vec6_m = (v8u16)__msa_vshf_h(shf3, (v16i8)argb1, (v16i8)argb0); \
|
||||
vec7_m = (v8u16)__msa_vshf_h(shf3, (v16i8)argb3, (v16i8)argb2); \
|
||||
reg0_m = __msa_dotp_u_w(vec0_m, const0); \
|
||||
reg1_m = __msa_dotp_u_w(vec1_m, const0); \
|
||||
reg2_m = __msa_dotp_u_w(vec4_m, const0); \
|
||||
reg3_m = __msa_dotp_u_w(vec5_m, const0); \
|
||||
reg0_m += const1; \
|
||||
reg1_m += const1; \
|
||||
reg2_m += const1; \
|
||||
reg3_m += const1; \
|
||||
reg0_m -= (v4u32)__msa_dotp_u_w(vec2_m, const2); \
|
||||
reg1_m -= (v4u32)__msa_dotp_u_w(vec3_m, const2); \
|
||||
reg2_m -= (v4u32)__msa_dotp_u_w(vec6_m, const3); \
|
||||
reg3_m -= (v4u32)__msa_dotp_u_w(vec7_m, const3); \
|
||||
reg0_m = __msa_srl_w(reg0_m, shift); \
|
||||
reg1_m = __msa_srl_w(reg1_m, shift); \
|
||||
reg2_m = __msa_srl_w(reg2_m, shift); \
|
||||
reg3_m = __msa_srl_w(reg3_m, shift); \
|
||||
u_out = (v8u16)__msa_pckev_h((v8i16)reg1_m, (v8i16)reg0_m); \
|
||||
v_out = (v8u16)__msa_pckev_h((v8i16)reg3_m, (v8i16)reg2_m); \
|
||||
}
|
||||
|
||||
// Takes ARGB input and calculates U and V.
|
||||
@ -2530,61 +2534,123 @@ void ARGBToUVJRow_MSA(const uint8_t* src_rgb0,
|
||||
int x;
|
||||
const uint8_t* s = src_rgb0;
|
||||
const uint8_t* t = src_rgb0 + src_stride_rgb;
|
||||
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
|
||||
v16u8 vec0, vec1, vec2, vec3;
|
||||
v16u8 dst0, dst1;
|
||||
v16i8 shuffler0 = {0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29};
|
||||
v16i8 shuffler1 = {2, 3, 6, 7, 10, 11, 14, 15,
|
||||
18, 19, 22, 23, 26, 27, 30, 31};
|
||||
v16i8 shuffler2 = {0, 3, 4, 7, 8, 11, 12, 15, 16, 19, 20, 23, 24, 27, 28, 31};
|
||||
v16i8 shuffler3 = {1, 2, 5, 6, 9, 10, 13, 14, 17, 18, 21, 22, 25, 26, 29, 30};
|
||||
v16u8 const_0x7F = (v16u8)__msa_fill_h(0x7F);
|
||||
v16u8 const_0x6B14 = (v16u8)__msa_fill_h(0x6B14);
|
||||
v16u8 const_0x2B54 = (v16u8)__msa_fill_h(0x2B54);
|
||||
v8u16 const_0x8080 = (v8u16)__msa_fill_h(0x8080);
|
||||
v8u16 src0, src1, src2, src3, src4, src5, src6, src7;
|
||||
v8u16 vec0, vec1, vec2, vec3;
|
||||
v8u16 dst0, dst1, dst2, dst3;
|
||||
v16u8 zero = {0};
|
||||
v8i16 shuffler0 = {0, 3, 4, 7, 8, 11, 12, 15};
|
||||
v8i16 shuffler1 = {1, 2, 5, 6, 9, 10, 13, 14};
|
||||
v8i16 shuffler2 = {2, 3, 6, 7, 10, 11, 14, 15};
|
||||
v8i16 shuffler3 = {0, 1, 4, 5, 8, 9, 12, 13};
|
||||
v8u16 const_0x0000003f = (v8u16)__msa_fill_w(0x0000003f);
|
||||
v4u32 const_0x00008080 = (v8u16)__msa_fill_w(0x00008080);
|
||||
v8u16 const_0x0015002a = (v8u16)__msa_fill_w(0x0015002a);
|
||||
v8u16 const_0x0035000a = (v8u16)__msa_fill_w(0x0035000a);
|
||||
v4i32 shift = __msa_fill_w(0x00000008);
|
||||
|
||||
for (x = 0; x < width; x += 32) {
|
||||
src0 = (v16u8)__msa_ld_b((void*)s, 0);
|
||||
src1 = (v16u8)__msa_ld_b((void*)s, 16);
|
||||
src2 = (v16u8)__msa_ld_b((void*)s, 32);
|
||||
src3 = (v16u8)__msa_ld_b((void*)s, 48);
|
||||
src4 = (v16u8)__msa_ld_b((void*)t, 0);
|
||||
src5 = (v16u8)__msa_ld_b((void*)t, 16);
|
||||
src6 = (v16u8)__msa_ld_b((void*)t, 32);
|
||||
src7 = (v16u8)__msa_ld_b((void*)t, 48);
|
||||
src0 = __msa_aver_u_b(src0, src4);
|
||||
src1 = __msa_aver_u_b(src1, src5);
|
||||
src2 = __msa_aver_u_b(src2, src6);
|
||||
src3 = __msa_aver_u_b(src3, src7);
|
||||
src4 = (v16u8)__msa_pckev_w((v4i32)src1, (v4i32)src0);
|
||||
src5 = (v16u8)__msa_pckev_w((v4i32)src3, (v4i32)src2);
|
||||
src6 = (v16u8)__msa_pckod_w((v4i32)src1, (v4i32)src0);
|
||||
src7 = (v16u8)__msa_pckod_w((v4i32)src3, (v4i32)src2);
|
||||
vec0 = __msa_aver_u_b(src4, src6);
|
||||
vec1 = __msa_aver_u_b(src5, src7);
|
||||
src0 = (v16u8)__msa_ld_b((void*)s, 64);
|
||||
src1 = (v16u8)__msa_ld_b((void*)s, 80);
|
||||
src2 = (v16u8)__msa_ld_b((void*)s, 96);
|
||||
src3 = (v16u8)__msa_ld_b((void*)s, 112);
|
||||
src4 = (v16u8)__msa_ld_b((void*)t, 64);
|
||||
src5 = (v16u8)__msa_ld_b((void*)t, 80);
|
||||
src6 = (v16u8)__msa_ld_b((void*)t, 96);
|
||||
src7 = (v16u8)__msa_ld_b((void*)t, 112);
|
||||
src0 = __msa_aver_u_b(src0, src4);
|
||||
src1 = __msa_aver_u_b(src1, src5);
|
||||
src2 = __msa_aver_u_b(src2, src6);
|
||||
src3 = __msa_aver_u_b(src3, src7);
|
||||
src4 = (v16u8)__msa_pckev_w((v4i32)src1, (v4i32)src0);
|
||||
src5 = (v16u8)__msa_pckev_w((v4i32)src3, (v4i32)src2);
|
||||
src6 = (v16u8)__msa_pckod_w((v4i32)src1, (v4i32)src0);
|
||||
src7 = (v16u8)__msa_pckod_w((v4i32)src3, (v4i32)src2);
|
||||
vec2 = __msa_aver_u_b(src4, src6);
|
||||
vec3 = __msa_aver_u_b(src5, src7);
|
||||
ARGBTOUV(vec0, vec1, vec2, vec3, const_0x6B14, const_0x7F, const_0x2B54,
|
||||
const_0x8080, shuffler1, shuffler0, shuffler2, shuffler3, dst0,
|
||||
dst1);
|
||||
ST_UB(dst0, dst_v);
|
||||
ST_UB(dst1, dst_u);
|
||||
src1 = __msa_ld_b((void*)s, 0);
|
||||
src3 = __msa_ld_b((void*)s, 16);
|
||||
src5 = __msa_ld_b((void*)t, 0);
|
||||
src7 = __msa_ld_b((void*)t, 16);
|
||||
src0 = __msa_ilvr_b(zero, src1);
|
||||
src1 = __msa_ilvl_b(zero, src1);
|
||||
src2 = __msa_ilvr_b(zero, src3);
|
||||
src3 = __msa_ilvl_b(zero, src3);
|
||||
src4 = __msa_ilvr_b(zero, src5);
|
||||
src5 = __msa_ilvl_b(zero, src5);
|
||||
src6 = __msa_ilvr_b(zero, src7);
|
||||
src7 = __msa_ilvl_b(zero, src7);
|
||||
src0 += src4;
|
||||
src1 += src5;
|
||||
src2 += src6;
|
||||
src3 += src7;
|
||||
src4 = __msa_ilvev_d(src1, src0);
|
||||
src5 = __msa_ilvod_d(src1, src0);
|
||||
src6 = __msa_ilvev_d(src3, src2);
|
||||
src7 = __msa_ilvod_d(src3, src2);
|
||||
vec0 = __msa_aver_u_h(src4, src5);
|
||||
vec1 = __msa_aver_u_h(src6, src7);
|
||||
|
||||
src1 = __msa_ld_b((void*)s, 32);
|
||||
src3 = __msa_ld_b((void*)s, 48);
|
||||
src5 = __msa_ld_b((void*)t, 32);
|
||||
src7 = __msa_ld_b((void*)t, 48);
|
||||
src0 = __msa_ilvr_b(zero, src1);
|
||||
src1 = __msa_ilvl_b(zero, src1);
|
||||
src2 = __msa_ilvr_b(zero, src3);
|
||||
src3 = __msa_ilvl_b(zero, src3);
|
||||
src4 = __msa_ilvr_b(zero, src5);
|
||||
src5 = __msa_ilvl_b(zero, src5);
|
||||
src6 = __msa_ilvr_b(zero, src7);
|
||||
src7 = __msa_ilvl_b(zero, src7);
|
||||
src0 += src4;
|
||||
src1 += src5;
|
||||
src2 += src6;
|
||||
src3 += src7;
|
||||
src4 = __msa_ilvev_d(src1, src0);
|
||||
src5 = __msa_ilvod_d(src1, src0);
|
||||
src6 = __msa_ilvev_d(src3, src2);
|
||||
src7 = __msa_ilvod_d(src3, src2);
|
||||
vec2 = __msa_aver_u_h(src4, src5);
|
||||
vec3 = __msa_aver_u_h(src6, src7);
|
||||
ARGBTOUV(vec0, vec1, vec2, vec3, const_0x0000003f, const_0x00008080,
|
||||
const_0x0015002a, const_0x0035000a, shuffler0, shuffler1,
|
||||
shuffler2, shuffler3, shift, dst0, dst1);
|
||||
|
||||
src1 = __msa_ld_b((void*)s, 64);
|
||||
src3 = __msa_ld_b((void*)s, 80);
|
||||
src5 = __msa_ld_b((void*)t, 64);
|
||||
src7 = __msa_ld_b((void*)t, 80);
|
||||
src0 = __msa_ilvr_b(zero, src1);
|
||||
src1 = __msa_ilvl_b(zero, src1);
|
||||
src2 = __msa_ilvr_b(zero, src3);
|
||||
src3 = __msa_ilvl_b(zero, src3);
|
||||
src4 = __msa_ilvr_b(zero, src5);
|
||||
src5 = __msa_ilvl_b(zero, src5);
|
||||
src6 = __msa_ilvr_b(zero, src7);
|
||||
src7 = __msa_ilvl_b(zero, src7);
|
||||
src0 += src4;
|
||||
src1 += src5;
|
||||
src2 += src6;
|
||||
src3 += src7;
|
||||
src4 = __msa_ilvev_d(src1, src0);
|
||||
src5 = __msa_ilvod_d(src1, src0);
|
||||
src6 = __msa_ilvev_d(src3, src2);
|
||||
src7 = __msa_ilvod_d(src3, src2);
|
||||
vec0 = __msa_aver_u_h(src4, src5);
|
||||
vec1 = __msa_aver_u_h(src6, src7);
|
||||
|
||||
src1 = __msa_ld_b((void*)s, 96);
|
||||
src3 = __msa_ld_b((void*)s, 112);
|
||||
src5 = __msa_ld_b((void*)t, 96);
|
||||
src7 = __msa_ld_b((void*)t, 112);
|
||||
src0 = __msa_ilvr_b(zero, src1);
|
||||
src1 = __msa_ilvl_b(zero, src1);
|
||||
src2 = __msa_ilvr_b(zero, src3);
|
||||
src3 = __msa_ilvl_b(zero, src3);
|
||||
src4 = __msa_ilvr_b(zero, src5);
|
||||
src5 = __msa_ilvl_b(zero, src5);
|
||||
src6 = __msa_ilvr_b(zero, src7);
|
||||
src7 = __msa_ilvl_b(zero, src7);
|
||||
src0 += src4;
|
||||
src1 += src5;
|
||||
src2 += src6;
|
||||
src3 += src7;
|
||||
src4 = __msa_ilvev_d(src1, src0);
|
||||
src5 = __msa_ilvod_d(src1, src0);
|
||||
src6 = __msa_ilvev_d(src3, src2);
|
||||
src7 = __msa_ilvod_d(src3, src2);
|
||||
vec2 = __msa_aver_u_h(src4, src5);
|
||||
vec3 = __msa_aver_u_h(src6, src7);
|
||||
ARGBTOUV(vec0, vec1, vec2, vec3, const_0x0000003f, const_0x00008080,
|
||||
const_0x0015002a, const_0x0035000a, shuffler0, shuffler1,
|
||||
shuffler2, shuffler3, shift, dst2, dst3);
|
||||
|
||||
dst0 = (v8u16)__msa_pckev_b(dst2, dst0);
|
||||
dst1 = (v8u16)__msa_pckev_b(dst3, dst1);
|
||||
ST_UB(dst0, dst_u);
|
||||
ST_UB(dst1, dst_v);
|
||||
s += 128;
|
||||
t += 128;
|
||||
dst_v += 16;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user