mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2026-04-30 19:09:18 +08:00
ARGBToI420 MMI and MSA version match C.
In commit 0b8bb6, C version has been updated. This patch update the MMI and MSA version to mach C version. Change-Id: Ib28da3629a8465990c8e2185278a95af8c27a31d Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/2227754 Commit-Queue: Frank Barchard <fbarchard@chromium.org> Reviewed-by: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
parent
db63668a24
commit
ce5b333853
@ -1439,7 +1439,7 @@ ANY12S(ARGBToUVJRow_Any_MMI, ARGBToUVJRow_MMI, 0, 4, 15)
|
||||
ANY12S(BGRAToUVRow_Any_NEON, BGRAToUVRow_NEON, 0, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_BGRATOUVROW_MSA
|
||||
ANY12S(BGRAToUVRow_Any_MSA, BGRAToUVRow_MSA, 0, 4, 31)
|
||||
ANY12S(BGRAToUVRow_Any_MSA, BGRAToUVRow_MSA, 0, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_BGRATOUVROW_MMI
|
||||
ANY12S(BGRAToUVRow_Any_MMI, BGRAToUVRow_MMI, 0, 4, 15)
|
||||
@ -1448,7 +1448,7 @@ ANY12S(BGRAToUVRow_Any_MMI, BGRAToUVRow_MMI, 0, 4, 15)
|
||||
ANY12S(ABGRToUVRow_Any_NEON, ABGRToUVRow_NEON, 0, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_ABGRTOUVROW_MSA
|
||||
ANY12S(ABGRToUVRow_Any_MSA, ABGRToUVRow_MSA, 0, 4, 31)
|
||||
ANY12S(ABGRToUVRow_Any_MSA, ABGRToUVRow_MSA, 0, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_ABGRTOUVROW_MMI
|
||||
ANY12S(ABGRToUVRow_Any_MMI, ABGRToUVRow_MMI, 0, 4, 15)
|
||||
@ -1457,7 +1457,7 @@ ANY12S(ABGRToUVRow_Any_MMI, ABGRToUVRow_MMI, 0, 4, 15)
|
||||
ANY12S(RGBAToUVRow_Any_NEON, RGBAToUVRow_NEON, 0, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_RGBATOUVROW_MSA
|
||||
ANY12S(RGBAToUVRow_Any_MSA, RGBAToUVRow_MSA, 0, 4, 31)
|
||||
ANY12S(RGBAToUVRow_Any_MSA, RGBAToUVRow_MSA, 0, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_RGBATOUVROW_MMI
|
||||
ANY12S(RGBAToUVRow_Any_MMI, RGBAToUVRow_MMI, 0, 4, 15)
|
||||
|
||||
@ -690,12 +690,15 @@ void ARGBToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
uint8_t* dst_v,
|
||||
int width) {
|
||||
uint64_t src_rgb1;
|
||||
uint64_t ftmp[12];
|
||||
uint64_t ftmp[13];
|
||||
uint64_t tmp[1];
|
||||
const uint64_t value = 0x4040;
|
||||
const uint64_t mask_u = 0x0026004a00700002;
|
||||
const uint64_t mask_v = 0x00020070005e0012;
|
||||
const uint64_t mask_u = 0x0013002500380002;
|
||||
const uint64_t mask_v = 0x00020038002f0009;
|
||||
|
||||
__asm__ volatile(
|
||||
"dli %[tmp0], 0x0001000100010001 \n\t"
|
||||
"dmtc1 %[tmp0], %[ftmp12] \n\t"
|
||||
"1: \n\t"
|
||||
"daddu %[src_rgb1], %[src_rgb0], %[src_stride_rgb] \n\t"
|
||||
"gsldrc1 %[src0], 0x00(%[src_rgb0]) \n\t"
|
||||
@ -709,7 +712,8 @@ void ARGBToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
"paddh %[src0], %[src0], %[src_lo] \n\t"
|
||||
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
|
||||
"paddh %[src0], %[src0], %[src_hi] \n\t"
|
||||
"psrlh %[src0], %[src0], %[two] \n\t"
|
||||
"paddh %[src0], %[src0], %[ftmp12] \n\t"
|
||||
"psrlh %[src0], %[src0], %[one] \n\t"
|
||||
"dsll %[dest0_u], %[src0], %[sixteen] \n\t"
|
||||
"pinsrh_0 %[dest0_u], %[dest0_u], %[value] \n\t"
|
||||
"pinsrh_3 %[dest0_v], %[src0], %[value] \n\t"
|
||||
@ -727,7 +731,8 @@ void ARGBToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
"paddh %[src0], %[src0], %[src_lo] \n\t"
|
||||
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
|
||||
"paddh %[src0], %[src0], %[src_hi] \n\t"
|
||||
"psrlh %[src0], %[src0], %[two] \n\t"
|
||||
"paddh %[src0], %[src0], %[ftmp12] \n\t"
|
||||
"psrlh %[src0], %[src0], %[one] \n\t"
|
||||
"dsll %[src_lo], %[src0], %[sixteen] \n\t"
|
||||
"pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
|
||||
"pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
|
||||
@ -754,7 +759,8 @@ void ARGBToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
"paddh %[src0], %[src0], %[src_lo] \n\t"
|
||||
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
|
||||
"paddh %[src0], %[src0], %[src_hi] \n\t"
|
||||
"psrlh %[src0], %[src0], %[two] \n\t"
|
||||
"paddh %[src0], %[src0], %[ftmp12] \n\t"
|
||||
"psrlh %[src0], %[src0], %[one] \n\t"
|
||||
"dsll %[dest1_u], %[src0], %[sixteen] \n\t"
|
||||
"pinsrh_0 %[dest1_u], %[dest1_u], %[value] \n\t"
|
||||
"pinsrh_3 %[dest1_v], %[src0], %[value] \n\t"
|
||||
@ -772,7 +778,8 @@ void ARGBToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
"paddh %[src0], %[src0], %[src_lo] \n\t"
|
||||
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
|
||||
"paddh %[src0], %[src0], %[src_hi] \n\t"
|
||||
"psrlh %[src0], %[src0], %[two] \n\t"
|
||||
"paddh %[src0], %[src0], %[ftmp12] \n\t"
|
||||
"psrlh %[src0], %[src0], %[one] \n\t"
|
||||
"dsll %[src_lo], %[src0], %[sixteen] \n\t"
|
||||
"pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
|
||||
"pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
|
||||
@ -799,7 +806,8 @@ void ARGBToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
"paddh %[src0], %[src0], %[src_lo] \n\t"
|
||||
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
|
||||
"paddh %[src0], %[src0], %[src_hi] \n\t"
|
||||
"psrlh %[src0], %[src0], %[two] \n\t"
|
||||
"paddh %[src0], %[src0], %[ftmp12] \n\t"
|
||||
"psrlh %[src0], %[src0], %[one] \n\t"
|
||||
"dsll %[dest2_u], %[src0], %[sixteen] \n\t"
|
||||
"pinsrh_0 %[dest2_u], %[dest2_u], %[value] \n\t"
|
||||
"pinsrh_3 %[dest2_v], %[src0], %[value] \n\t"
|
||||
@ -817,7 +825,8 @@ void ARGBToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
"paddh %[src0], %[src0], %[src_lo] \n\t"
|
||||
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
|
||||
"paddh %[src0], %[src0], %[src_hi] \n\t"
|
||||
"psrlh %[src0], %[src0], %[two] \n\t"
|
||||
"paddh %[src0], %[src0], %[ftmp12] \n\t"
|
||||
"psrlh %[src0], %[src0], %[one] \n\t"
|
||||
"dsll %[src_lo], %[src0], %[sixteen] \n\t"
|
||||
"pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
|
||||
"pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
|
||||
@ -844,7 +853,8 @@ void ARGBToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
"paddh %[src0], %[src0], %[src_lo] \n\t"
|
||||
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
|
||||
"paddh %[src0], %[src0], %[src_hi] \n\t"
|
||||
"psrlh %[src0], %[src0], %[two] \n\t"
|
||||
"paddh %[src0], %[src0], %[ftmp12] \n\t"
|
||||
"psrlh %[src0], %[src0], %[one] \n\t"
|
||||
"dsll %[dest3_u], %[src0], %[sixteen] \n\t"
|
||||
"pinsrh_0 %[dest3_u], %[dest3_u], %[value] \n\t"
|
||||
"pinsrh_3 %[dest3_v], %[src0], %[value] \n\t"
|
||||
@ -862,7 +872,8 @@ void ARGBToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
"paddh %[src0], %[src0], %[src_lo] \n\t"
|
||||
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
|
||||
"paddh %[src0], %[src0], %[src_hi] \n\t"
|
||||
"psrlh %[src0], %[src0], %[two] \n\t"
|
||||
"paddh %[src0], %[src0], %[ftmp12] \n\t"
|
||||
"psrlh %[src0], %[src0], %[one] \n\t"
|
||||
"dsll %[src_lo], %[src0], %[sixteen] \n\t"
|
||||
"pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
|
||||
"pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
|
||||
@ -900,11 +911,12 @@ void ARGBToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
[dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]),
|
||||
[dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]),
|
||||
[dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]),
|
||||
[dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11])
|
||||
[dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11]),
|
||||
[ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0])
|
||||
: [src_rgb0] "r"(src_rgb0), [src_stride_rgb] "r"(src_stride_rgb),
|
||||
[dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
|
||||
[mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value),
|
||||
[zero] "f"(0x00), [eight] "f"(0x08), [two] "f"(0x02),
|
||||
[zero] "f"(0x00), [eight] "f"(0x08), [one] "f"(0x01),
|
||||
[sixteen] "f"(0x10)
|
||||
: "memory");
|
||||
}
|
||||
@ -994,12 +1006,15 @@ void BGRAToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
uint8_t* dst_v,
|
||||
int width) {
|
||||
uint64_t src_rgb1;
|
||||
uint64_t ftmp[12];
|
||||
uint64_t ftmp[13];
|
||||
uint64_t tmp[1];
|
||||
const uint64_t value = 0x4040;
|
||||
const uint64_t mask_u = 0x00020070004a0026;
|
||||
const uint64_t mask_v = 0x0012005e00700002;
|
||||
const uint64_t mask_u = 0x0002003800250013;
|
||||
const uint64_t mask_v = 0x0009002f00380002;
|
||||
|
||||
__asm__ volatile(
|
||||
"dli %[tmp0], 0x0001000100010001 \n\t"
|
||||
"dmtc1 %[tmp0], %[ftmp12] \n\t"
|
||||
"1: \n\t"
|
||||
"daddu %[src_rgb1], %[src_rgb0], %[src_stride_rgb] \n\t"
|
||||
"gsldrc1 %[src0], 0x00(%[src_rgb0]) \n\t"
|
||||
@ -1013,7 +1028,8 @@ void BGRAToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
"paddh %[src0], %[src0], %[src_lo] \n\t"
|
||||
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
|
||||
"paddh %[src0], %[src0], %[src_hi] \n\t"
|
||||
"psrlh %[src0], %[src0], %[two] \n\t"
|
||||
"paddh %[src0], %[src0], %[ftmp12] \n\t"
|
||||
"psrlh %[src0], %[src0], %[one] \n\t"
|
||||
"dsrl %[dest0_u], %[src0], %[sixteen] \n\t"
|
||||
"pinsrh_3 %[dest0_u], %[dest0_u], %[value] \n\t"
|
||||
"pinsrh_0 %[dest0_v], %[src0], %[value] \n\t"
|
||||
@ -1031,7 +1047,8 @@ void BGRAToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
"paddh %[src0], %[src0], %[src_lo] \n\t"
|
||||
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
|
||||
"paddh %[src0], %[src0], %[src_hi] \n\t"
|
||||
"psrlh %[src0], %[src0], %[two] \n\t"
|
||||
"paddh %[src0], %[src0], %[ftmp12] \n\t"
|
||||
"psrlh %[src0], %[src0], %[one] \n\t"
|
||||
"dsrl %[src_lo], %[src0], %[sixteen] \n\t"
|
||||
"pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
|
||||
"pinsrh_0 %[src_hi], %[src0], %[value] \n\t"
|
||||
@ -1058,7 +1075,8 @@ void BGRAToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
"paddh %[src0], %[src0], %[src_lo] \n\t"
|
||||
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
|
||||
"paddh %[src0], %[src0], %[src_hi] \n\t"
|
||||
"psrlh %[src0], %[src0], %[two] \n\t"
|
||||
"paddh %[src0], %[src0], %[ftmp12] \n\t"
|
||||
"psrlh %[src0], %[src0], %[one] \n\t"
|
||||
"dsrl %[dest1_u], %[src0], %[sixteen] \n\t"
|
||||
"pinsrh_3 %[dest1_u], %[dest1_u], %[value] \n\t"
|
||||
"pinsrh_0 %[dest1_v], %[src0], %[value] \n\t"
|
||||
@ -1076,7 +1094,8 @@ void BGRAToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
"paddh %[src0], %[src0], %[src_lo] \n\t"
|
||||
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
|
||||
"paddh %[src0], %[src0], %[src_hi] \n\t"
|
||||
"psrlh %[src0], %[src0], %[two] \n\t"
|
||||
"paddh %[src0], %[src0], %[ftmp12] \n\t"
|
||||
"psrlh %[src0], %[src0], %[one] \n\t"
|
||||
"dsrl %[src_lo], %[src0], %[sixteen] \n\t"
|
||||
"pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
|
||||
"pinsrh_0 %[src_hi], %[src0], %[value] \n\t"
|
||||
@ -1103,7 +1122,8 @@ void BGRAToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
"paddh %[src0], %[src0], %[src_lo] \n\t"
|
||||
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
|
||||
"paddh %[src0], %[src0], %[src_hi] \n\t"
|
||||
"psrlh %[src0], %[src0], %[two] \n\t"
|
||||
"paddh %[src0], %[src0], %[ftmp12] \n\t"
|
||||
"psrlh %[src0], %[src0], %[one] \n\t"
|
||||
"dsrl %[dest2_u], %[src0], %[sixteen] \n\t"
|
||||
"pinsrh_3 %[dest2_u], %[dest2_u], %[value] \n\t"
|
||||
"pinsrh_0 %[dest2_v], %[src0], %[value] \n\t"
|
||||
@ -1121,7 +1141,8 @@ void BGRAToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
"paddh %[src0], %[src0], %[src_lo] \n\t"
|
||||
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
|
||||
"paddh %[src0], %[src0], %[src_hi] \n\t"
|
||||
"psrlh %[src0], %[src0], %[two] \n\t"
|
||||
"paddh %[src0], %[src0], %[ftmp12] \n\t"
|
||||
"psrlh %[src0], %[src0], %[one] \n\t"
|
||||
"dsrl %[src_lo], %[src0], %[sixteen] \n\t"
|
||||
"pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
|
||||
"pinsrh_0 %[src_hi], %[src0], %[value] \n\t"
|
||||
@ -1148,7 +1169,8 @@ void BGRAToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
"paddh %[src0], %[src0], %[src_lo] \n\t"
|
||||
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
|
||||
"paddh %[src0], %[src0], %[src_hi] \n\t"
|
||||
"psrlh %[src0], %[src0], %[two] \n\t"
|
||||
"paddh %[src0], %[src0], %[ftmp12] \n\t"
|
||||
"psrlh %[src0], %[src0], %[one] \n\t"
|
||||
"dsrl %[dest3_u], %[src0], %[sixteen] \n\t"
|
||||
"pinsrh_3 %[dest3_u], %[dest3_u], %[value] \n\t"
|
||||
"pinsrh_0 %[dest3_v], %[src0], %[value] \n\t"
|
||||
@ -1166,7 +1188,8 @@ void BGRAToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
"paddh %[src0], %[src0], %[src_lo] \n\t"
|
||||
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
|
||||
"paddh %[src0], %[src0], %[src_hi] \n\t"
|
||||
"psrlh %[src0], %[src0], %[two] \n\t"
|
||||
"paddh %[src0], %[src0], %[ftmp12] \n\t"
|
||||
"psrlh %[src0], %[src0], %[one] \n\t"
|
||||
"dsrl %[src_lo], %[src0], %[sixteen] \n\t"
|
||||
"pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
|
||||
"pinsrh_0 %[src_hi], %[src0], %[value] \n\t"
|
||||
@ -1204,11 +1227,12 @@ void BGRAToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
[dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]),
|
||||
[dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]),
|
||||
[dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]),
|
||||
[dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11])
|
||||
[dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11]),
|
||||
[ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0])
|
||||
: [src_rgb0] "r"(src_rgb0), [src_stride_rgb] "r"(src_stride_rgb),
|
||||
[dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
|
||||
[mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value),
|
||||
[zero] "f"(0x00), [eight] "f"(0x08), [two] "f"(0x02),
|
||||
[zero] "f"(0x00), [eight] "f"(0x08), [one] "f"(0x01),
|
||||
[sixteen] "f"(0x10)
|
||||
: "memory");
|
||||
}
|
||||
@ -1298,12 +1322,15 @@ void ABGRToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
uint8_t* dst_v,
|
||||
int width) {
|
||||
uint64_t src_rgb1;
|
||||
uint64_t ftmp[12];
|
||||
uint64_t ftmp[13];
|
||||
uint64_t tmp[1];
|
||||
const uint64_t value = 0x4040;
|
||||
const uint64_t mask_u = 0x00020070004a0026;
|
||||
const uint64_t mask_v = 0x0012005e00700002;
|
||||
const uint64_t mask_u = 0x0002003800250013;
|
||||
const uint64_t mask_v = 0x0009002F00380002;
|
||||
|
||||
__asm__ volatile(
|
||||
"dli %[tmp0], 0x0001000100010001 \n\t"
|
||||
"dmtc1 %[tmp0], %[ftmp12] \n\t"
|
||||
"1: \n\t"
|
||||
"daddu %[src_rgb1], %[src_rgb0], %[src_stride_rgb] \n\t"
|
||||
"gsldrc1 %[src0], 0x00(%[src_rgb0]) \n\t"
|
||||
@ -1317,7 +1344,8 @@ void ABGRToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
"paddh %[src0], %[src0], %[src_lo] \n\t"
|
||||
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
|
||||
"paddh %[src0], %[src0], %[src_hi] \n\t"
|
||||
"psrlh %[src0], %[src0], %[two] \n\t"
|
||||
"paddh %[src0], %[src0], %[ftmp12] \n\t"
|
||||
"psrlh %[src0], %[src0], %[one] \n\t"
|
||||
"pinsrh_3 %[dest0_u], %[src0], %[value] \n\t"
|
||||
"dsll %[dest0_v], %[src0], %[sixteen] \n\t"
|
||||
"pinsrh_0 %[dest0_v], %[dest0_v], %[value] \n\t"
|
||||
@ -1335,7 +1363,8 @@ void ABGRToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
"paddh %[src0], %[src0], %[src_lo] \n\t"
|
||||
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
|
||||
"paddh %[src0], %[src0], %[src_hi] \n\t"
|
||||
"psrlh %[src0], %[src0], %[two] \n\t"
|
||||
"paddh %[src0], %[src0], %[ftmp12] \n\t"
|
||||
"psrlh %[src0], %[src0], %[one] \n\t"
|
||||
"pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
|
||||
"dsll %[src_hi], %[src0], %[sixteen] \n\t"
|
||||
"pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
|
||||
@ -1362,7 +1391,8 @@ void ABGRToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
"paddh %[src0], %[src0], %[src_lo] \n\t"
|
||||
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
|
||||
"paddh %[src0], %[src0], %[src_hi] \n\t"
|
||||
"psrlh %[src0], %[src0], %[two] \n\t"
|
||||
"paddh %[src0], %[src0], %[ftmp12] \n\t"
|
||||
"psrlh %[src0], %[src0], %[one] \n\t"
|
||||
"pinsrh_3 %[dest1_u], %[src0], %[value] \n\t"
|
||||
"dsll %[dest1_v], %[src0], %[sixteen] \n\t"
|
||||
"pinsrh_0 %[dest1_v], %[dest1_v], %[value] \n\t"
|
||||
@ -1380,7 +1410,8 @@ void ABGRToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
"paddh %[src0], %[src0], %[src_lo] \n\t"
|
||||
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
|
||||
"paddh %[src0], %[src0], %[src_hi] \n\t"
|
||||
"psrlh %[src0], %[src0], %[two] \n\t"
|
||||
"paddh %[src0], %[src0], %[ftmp12] \n\t"
|
||||
"psrlh %[src0], %[src0], %[one] \n\t"
|
||||
"pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
|
||||
"dsll %[src_hi], %[src0], %[sixteen] \n\t"
|
||||
"pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
|
||||
@ -1407,7 +1438,8 @@ void ABGRToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
"paddh %[src0], %[src0], %[src_lo] \n\t"
|
||||
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
|
||||
"paddh %[src0], %[src0], %[src_hi] \n\t"
|
||||
"psrlh %[src0], %[src0], %[two] \n\t"
|
||||
"paddh %[src0], %[src0], %[ftmp12] \n\t"
|
||||
"psrlh %[src0], %[src0], %[one] \n\t"
|
||||
"pinsrh_3 %[dest2_u], %[src0], %[value] \n\t"
|
||||
"dsll %[dest2_v], %[src0], %[sixteen] \n\t"
|
||||
"pinsrh_0 %[dest2_v], %[dest2_v], %[value] \n\t"
|
||||
@ -1425,7 +1457,8 @@ void ABGRToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
"paddh %[src0], %[src0], %[src_lo] \n\t"
|
||||
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
|
||||
"paddh %[src0], %[src0], %[src_hi] \n\t"
|
||||
"psrlh %[src0], %[src0], %[two] \n\t"
|
||||
"paddh %[src0], %[src0], %[ftmp12] \n\t"
|
||||
"psrlh %[src0], %[src0], %[one] \n\t"
|
||||
"pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
|
||||
"dsll %[src_hi], %[src0], %[sixteen] \n\t"
|
||||
"pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
|
||||
@ -1452,7 +1485,8 @@ void ABGRToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
"paddh %[src0], %[src0], %[src_lo] \n\t"
|
||||
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
|
||||
"paddh %[src0], %[src0], %[src_hi] \n\t"
|
||||
"psrlh %[src0], %[src0], %[two] \n\t"
|
||||
"paddh %[src0], %[src0], %[ftmp12] \n\t"
|
||||
"psrlh %[src0], %[src0], %[one] \n\t"
|
||||
"pinsrh_3 %[dest3_u], %[src0], %[value] \n\t"
|
||||
"dsll %[dest3_v], %[src0], %[sixteen] \n\t"
|
||||
"pinsrh_0 %[dest3_v], %[dest3_v], %[value] \n\t"
|
||||
@ -1470,7 +1504,8 @@ void ABGRToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
"paddh %[src0], %[src0], %[src_lo] \n\t"
|
||||
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
|
||||
"paddh %[src0], %[src0], %[src_hi] \n\t"
|
||||
"psrlh %[src0], %[src0], %[two] \n\t"
|
||||
"paddh %[src0], %[src0], %[ftmp12] \n\t"
|
||||
"psrlh %[src0], %[src0], %[one] \n\t"
|
||||
"pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
|
||||
"dsll %[src_hi], %[src0], %[sixteen] \n\t"
|
||||
"pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
|
||||
@ -1508,11 +1543,12 @@ void ABGRToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
[dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]),
|
||||
[dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]),
|
||||
[dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]),
|
||||
[dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11])
|
||||
[dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11]),
|
||||
[ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0])
|
||||
: [src_rgb0] "r"(src_rgb0), [src_stride_rgb] "r"(src_stride_rgb),
|
||||
[dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
|
||||
[mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value),
|
||||
[zero] "f"(0x00), [eight] "f"(0x08), [two] "f"(0x02),
|
||||
[zero] "f"(0x00), [eight] "f"(0x08), [one] "f"(0x01),
|
||||
[sixteen] "f"(0x10)
|
||||
: "memory");
|
||||
}
|
||||
@ -1602,12 +1638,15 @@ void RGBAToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
uint8_t* dst_v,
|
||||
int width) {
|
||||
uint64_t src_rgb1;
|
||||
uint64_t ftmp[12];
|
||||
uint64_t ftmp[13];
|
||||
uint64_t tmp[1];
|
||||
const uint64_t value = 0x4040;
|
||||
const uint64_t mask_u = 0x0026004a00700002;
|
||||
const uint64_t mask_v = 0x00020070005e0012;
|
||||
const uint64_t mask_u = 0x0013002500380002;
|
||||
const uint64_t mask_v = 0x00020038002f0009;
|
||||
|
||||
__asm__ volatile(
|
||||
"dli %[tmp0], 0x0001000100010001 \n\t"
|
||||
"dmtc1 %[tmp0], %[ftmp12] \n\t"
|
||||
"1: \n\t"
|
||||
"daddu %[src_rgb1], %[src_rgb0], %[src_stride_rgb] \n\t"
|
||||
"gsldrc1 %[src0], 0x00(%[src_rgb0]) \n\t"
|
||||
@ -1621,7 +1660,8 @@ void RGBAToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
"paddh %[src0], %[src0], %[src_lo] \n\t"
|
||||
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
|
||||
"paddh %[src0], %[src0], %[src_hi] \n\t"
|
||||
"psrlh %[src0], %[src0], %[two] \n\t"
|
||||
"paddh %[src0], %[src0], %[ftmp12] \n\t"
|
||||
"psrlh %[src0], %[src0], %[one] \n\t"
|
||||
"pinsrh_0 %[dest0_u], %[src0], %[value] \n\t"
|
||||
"dsrl %[dest0_v], %[src0], %[sixteen] \n\t"
|
||||
"pinsrh_3 %[dest0_v], %[dest0_v], %[value] \n\t"
|
||||
@ -1639,7 +1679,8 @@ void RGBAToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
"paddh %[src0], %[src0], %[src_lo] \n\t"
|
||||
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
|
||||
"paddh %[src0], %[src0], %[src_hi] \n\t"
|
||||
"psrlh %[src0], %[src0], %[two] \n\t"
|
||||
"paddh %[src0], %[src0], %[ftmp12] \n\t"
|
||||
"psrlh %[src0], %[src0], %[one] \n\t"
|
||||
"pinsrh_0 %[src_lo], %[src0], %[value] \n\t"
|
||||
"dsrl %[src_hi], %[src0], %[sixteen] \n\t"
|
||||
"pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
|
||||
@ -1666,7 +1707,8 @@ void RGBAToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
"paddh %[src0], %[src0], %[src_lo] \n\t"
|
||||
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
|
||||
"paddh %[src0], %[src0], %[src_hi] \n\t"
|
||||
"psrlh %[src0], %[src0], %[two] \n\t"
|
||||
"paddh %[src0], %[src0], %[ftmp12] \n\t"
|
||||
"psrlh %[src0], %[src0], %[one] \n\t"
|
||||
"pinsrh_0 %[dest1_u], %[src0], %[value] \n\t"
|
||||
"dsrl %[dest1_v], %[src0], %[sixteen] \n\t"
|
||||
"pinsrh_3 %[dest1_v], %[dest1_v], %[value] \n\t"
|
||||
@ -1684,7 +1726,8 @@ void RGBAToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
"paddh %[src0], %[src0], %[src_lo] \n\t"
|
||||
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
|
||||
"paddh %[src0], %[src0], %[src_hi] \n\t"
|
||||
"psrlh %[src0], %[src0], %[two] \n\t"
|
||||
"paddh %[src0], %[src0], %[ftmp12] \n\t"
|
||||
"psrlh %[src0], %[src0], %[one] \n\t"
|
||||
"pinsrh_0 %[src_lo], %[src0], %[value] \n\t"
|
||||
"dsrl %[src_hi], %[src0], %[sixteen] \n\t"
|
||||
"pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
|
||||
@ -1711,7 +1754,8 @@ void RGBAToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
"paddh %[src0], %[src0], %[src_lo] \n\t"
|
||||
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
|
||||
"paddh %[src0], %[src0], %[src_hi] \n\t"
|
||||
"psrlh %[src0], %[src0], %[two] \n\t"
|
||||
"paddh %[src0], %[src0], %[ftmp12] \n\t"
|
||||
"psrlh %[src0], %[src0], %[one] \n\t"
|
||||
"pinsrh_0 %[dest2_u], %[src0], %[value] \n\t"
|
||||
"dsrl %[dest2_v], %[src0], %[sixteen] \n\t"
|
||||
"pinsrh_3 %[dest2_v], %[dest2_v], %[value] \n\t"
|
||||
@ -1729,7 +1773,8 @@ void RGBAToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
"paddh %[src0], %[src0], %[src_lo] \n\t"
|
||||
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
|
||||
"paddh %[src0], %[src0], %[src_hi] \n\t"
|
||||
"psrlh %[src0], %[src0], %[two] \n\t"
|
||||
"paddh %[src0], %[src0], %[ftmp12] \n\t"
|
||||
"psrlh %[src0], %[src0], %[one] \n\t"
|
||||
"pinsrh_0 %[src_lo], %[src0], %[value] \n\t"
|
||||
"dsrl %[src_hi], %[src0], %[sixteen] \n\t"
|
||||
"pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
|
||||
@ -1756,7 +1801,8 @@ void RGBAToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
"paddh %[src0], %[src0], %[src_lo] \n\t"
|
||||
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
|
||||
"paddh %[src0], %[src0], %[src_hi] \n\t"
|
||||
"psrlh %[src0], %[src0], %[two] \n\t"
|
||||
"paddh %[src0], %[src0], %[ftmp12] \n\t"
|
||||
"psrlh %[src0], %[src0], %[one] \n\t"
|
||||
"pinsrh_0 %[dest3_u], %[src0], %[value] \n\t"
|
||||
"dsrl %[dest3_v], %[src0], %[sixteen] \n\t"
|
||||
"pinsrh_3 %[dest3_v], %[dest3_v], %[value] \n\t"
|
||||
@ -1774,7 +1820,8 @@ void RGBAToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
"paddh %[src0], %[src0], %[src_lo] \n\t"
|
||||
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
|
||||
"paddh %[src0], %[src0], %[src_hi] \n\t"
|
||||
"psrlh %[src0], %[src0], %[two] \n\t"
|
||||
"paddh %[src0], %[src0], %[ftmp12] \n\t"
|
||||
"psrlh %[src0], %[src0], %[one] \n\t"
|
||||
"pinsrh_0 %[src_lo], %[src0], %[value] \n\t"
|
||||
"dsrl %[src_hi], %[src0], %[sixteen] \n\t"
|
||||
"pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
|
||||
@ -1812,11 +1859,12 @@ void RGBAToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
[dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]),
|
||||
[dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]),
|
||||
[dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]),
|
||||
[dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11])
|
||||
[dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11]),
|
||||
[ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0])
|
||||
: [src_rgb0] "r"(src_rgb0), [src_stride_rgb] "r"(src_stride_rgb),
|
||||
[dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
|
||||
[mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value),
|
||||
[zero] "f"(0x00), [eight] "f"(0x08), [two] "f"(0x02),
|
||||
[zero] "f"(0x00), [eight] "f"(0x08), [one] "f"(0x01),
|
||||
[sixteen] "f"(0x10)
|
||||
: "memory");
|
||||
}
|
||||
@ -1910,12 +1958,15 @@ void RGB24ToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
uint8_t* dst_v,
|
||||
int width) {
|
||||
uint64_t src_rgb1;
|
||||
uint64_t ftmp[12];
|
||||
uint64_t ftmp[13];
|
||||
uint64_t tmp[1];
|
||||
const uint64_t value = 0x4040;
|
||||
const uint64_t mask_u = 0x0026004a00700002;
|
||||
const uint64_t mask_v = 0x00020070005e0012;
|
||||
const uint64_t mask_u = 0x0013002500380002;
|
||||
const uint64_t mask_v = 0x00020038002f0009;
|
||||
|
||||
__asm__ volatile(
|
||||
"dli %[tmp0], 0x0001000100010001 \n\t"
|
||||
"dmtc1 %[tmp0], %[ftmp12] \n\t"
|
||||
"1: \n\t"
|
||||
"daddu %[src_rgb1], %[src_rgb0], %[src_stride_rgb] \n\t"
|
||||
"gsldrc1 %[src0], 0x00(%[src_rgb0]) \n\t"
|
||||
@ -1931,7 +1982,8 @@ void RGB24ToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
"dsll %[src1], %[src1], %[eight] \n\t"
|
||||
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
|
||||
"paddh %[src0], %[src0], %[src_hi] \n\t"
|
||||
"psrlh %[src0], %[src0], %[two] \n\t"
|
||||
"paddh %[src0], %[src0], %[ftmp12] \n\t"
|
||||
"psrlh %[src0], %[src0], %[one] \n\t"
|
||||
"dsll %[dest0_u], %[src0], %[sixteen] \n\t"
|
||||
"pinsrh_0 %[dest0_u], %[dest0_u], %[value] \n\t"
|
||||
"pinsrh_3 %[dest0_v], %[src0], %[value] \n\t"
|
||||
@ -1951,7 +2003,8 @@ void RGB24ToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
"dsll %[src1], %[src1], %[eight] \n\t"
|
||||
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
|
||||
"paddh %[src0], %[src0], %[src_hi] \n\t"
|
||||
"psrlh %[src0], %[src0], %[two] \n\t"
|
||||
"paddh %[src0], %[src0], %[ftmp12] \n\t"
|
||||
"psrlh %[src0], %[src0], %[one] \n\t"
|
||||
"dsll %[src_lo], %[src0], %[sixteen] \n\t"
|
||||
"pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
|
||||
"pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
|
||||
@ -1980,7 +2033,8 @@ void RGB24ToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
"dsll %[src1], %[src1], %[eight] \n\t"
|
||||
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
|
||||
"paddh %[src0], %[src0], %[src_hi] \n\t"
|
||||
"psrlh %[src0], %[src0], %[two] \n\t"
|
||||
"paddh %[src0], %[src0], %[ftmp12] \n\t"
|
||||
"psrlh %[src0], %[src0], %[one] \n\t"
|
||||
"dsll %[dest1_u], %[src0], %[sixteen] \n\t"
|
||||
"pinsrh_0 %[dest1_u], %[dest1_u], %[value] \n\t"
|
||||
"pinsrh_3 %[dest1_v], %[src0], %[value] \n\t"
|
||||
@ -2000,7 +2054,8 @@ void RGB24ToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
"dsll %[src1], %[src1], %[eight] \n\t"
|
||||
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
|
||||
"paddh %[src0], %[src0], %[src_hi] \n\t"
|
||||
"psrlh %[src0], %[src0], %[two] \n\t"
|
||||
"paddh %[src0], %[src0], %[ftmp12] \n\t"
|
||||
"psrlh %[src0], %[src0], %[one] \n\t"
|
||||
"dsll %[src_lo], %[src0], %[sixteen] \n\t"
|
||||
"pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
|
||||
"pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
|
||||
@ -2029,7 +2084,8 @@ void RGB24ToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
"dsll %[src1], %[src1], %[eight] \n\t"
|
||||
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
|
||||
"paddh %[src0], %[src0], %[src_hi] \n\t"
|
||||
"psrlh %[src0], %[src0], %[two] \n\t"
|
||||
"paddh %[src0], %[src0], %[ftmp12] \n\t"
|
||||
"psrlh %[src0], %[src0], %[one] \n\t"
|
||||
"dsll %[dest2_u], %[src0], %[sixteen] \n\t"
|
||||
"pinsrh_0 %[dest2_u], %[dest2_u], %[value] \n\t"
|
||||
"pinsrh_3 %[dest2_v], %[src0], %[value] \n\t"
|
||||
@ -2049,7 +2105,8 @@ void RGB24ToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
"dsll %[src1], %[src1], %[eight] \n\t"
|
||||
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
|
||||
"paddh %[src0], %[src0], %[src_hi] \n\t"
|
||||
"psrlh %[src0], %[src0], %[two] \n\t"
|
||||
"paddh %[src0], %[src0], %[ftmp12] \n\t"
|
||||
"psrlh %[src0], %[src0], %[one] \n\t"
|
||||
"dsll %[src_lo], %[src0], %[sixteen] \n\t"
|
||||
"pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
|
||||
"pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
|
||||
@ -2078,7 +2135,8 @@ void RGB24ToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
"dsll %[src1], %[src1], %[eight] \n\t"
|
||||
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
|
||||
"paddh %[src0], %[src0], %[src_hi] \n\t"
|
||||
"psrlh %[src0], %[src0], %[two] \n\t"
|
||||
"paddh %[src0], %[src0], %[ftmp12] \n\t"
|
||||
"psrlh %[src0], %[src0], %[one] \n\t"
|
||||
"dsll %[dest3_u], %[src0], %[sixteen] \n\t"
|
||||
"pinsrh_0 %[dest3_u], %[dest3_u], %[value] \n\t"
|
||||
"pinsrh_3 %[dest3_v], %[src0], %[value] \n\t"
|
||||
@ -2098,7 +2156,8 @@ void RGB24ToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
"dsll %[src1], %[src1], %[eight] \n\t"
|
||||
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
|
||||
"paddh %[src0], %[src0], %[src_hi] \n\t"
|
||||
"psrlh %[src0], %[src0], %[two] \n\t"
|
||||
"paddh %[src0], %[src0], %[ftmp12] \n\t"
|
||||
"psrlh %[src0], %[src0], %[one] \n\t"
|
||||
"dsll %[src_lo], %[src0], %[sixteen] \n\t"
|
||||
"pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
|
||||
"pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
|
||||
@ -2136,11 +2195,12 @@ void RGB24ToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
[dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]),
|
||||
[dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]),
|
||||
[dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]),
|
||||
[dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11])
|
||||
[dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11]),
|
||||
[ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0])
|
||||
: [src_rgb0] "r"(src_rgb0), [src_stride_rgb] "r"(src_stride_rgb),
|
||||
[dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
|
||||
[mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value),
|
||||
[zero] "f"(0x00), [eight] "f"(0x08), [two] "f"(0x02),
|
||||
[zero] "f"(0x00), [eight] "f"(0x08), [one] "f"(0x01),
|
||||
[sixteen] "f"(0x10)
|
||||
: "memory");
|
||||
}
|
||||
@ -2234,12 +2294,15 @@ void RAWToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
uint8_t* dst_v,
|
||||
int width) {
|
||||
uint64_t src_rgb1;
|
||||
uint64_t ftmp[12];
|
||||
uint64_t ftmp[13];
|
||||
uint64_t tmp[1];
|
||||
const uint64_t value = 0x4040;
|
||||
const uint64_t mask_u = 0x00020070004a0026;
|
||||
const uint64_t mask_v = 0x0012005e00700002;
|
||||
const uint64_t mask_u = 0x0002003800250013;
|
||||
const uint64_t mask_v = 0x0009002f00380002;
|
||||
|
||||
__asm__ volatile(
|
||||
"dli %[tmp0], 0x0001000100010001 \n\t"
|
||||
"dmtc1 %[tmp0], %[ftmp12] \n\t"
|
||||
"1: \n\t"
|
||||
"daddu %[src_rgb1], %[src_rgb0], %[src_stride_rgb] \n\t"
|
||||
"gsldrc1 %[src0], 0x00(%[src_rgb0]) \n\t"
|
||||
@ -2255,7 +2318,8 @@ void RAWToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
"dsll %[src1], %[src1], %[eight] \n\t"
|
||||
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
|
||||
"paddh %[src0], %[src0], %[src_hi] \n\t"
|
||||
"psrlh %[src0], %[src0], %[two] \n\t"
|
||||
"paddh %[src0], %[src0], %[ftmp12] \n\t"
|
||||
"psrlh %[src0], %[src0], %[one] \n\t"
|
||||
"pinsrh_3 %[dest0_u], %[src0], %[value] \n\t"
|
||||
"dsll %[dest0_v], %[src0], %[sixteen] \n\t"
|
||||
"pinsrh_0 %[dest0_v], %[dest0_v], %[value] \n\t"
|
||||
@ -2275,7 +2339,8 @@ void RAWToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
"dsll %[src1], %[src1], %[eight] \n\t"
|
||||
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
|
||||
"paddh %[src0], %[src0], %[src_hi] \n\t"
|
||||
"psrlh %[src0], %[src0], %[two] \n\t"
|
||||
"paddh %[src0], %[src0], %[ftmp12] \n\t"
|
||||
"psrlh %[src0], %[src0], %[one] \n\t"
|
||||
"pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
|
||||
"dsll %[src_hi], %[src0], %[sixteen] \n\t"
|
||||
"pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
|
||||
@ -2304,7 +2369,8 @@ void RAWToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
"dsll %[src1], %[src1], %[eight] \n\t"
|
||||
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
|
||||
"paddh %[src0], %[src0], %[src_hi] \n\t"
|
||||
"psrlh %[src0], %[src0], %[two] \n\t"
|
||||
"paddh %[src0], %[src0], %[ftmp12] \n\t"
|
||||
"psrlh %[src0], %[src0], %[one] \n\t"
|
||||
"pinsrh_3 %[dest1_u], %[src0], %[value] \n\t"
|
||||
"dsll %[dest1_v], %[src0], %[sixteen] \n\t"
|
||||
"pinsrh_0 %[dest1_v], %[dest1_v], %[value] \n\t"
|
||||
@ -2324,7 +2390,8 @@ void RAWToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
"dsll %[src1], %[src1], %[eight] \n\t"
|
||||
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
|
||||
"paddh %[src0], %[src0], %[src_hi] \n\t"
|
||||
"psrlh %[src0], %[src0], %[two] \n\t"
|
||||
"paddh %[src0], %[src0], %[ftmp12] \n\t"
|
||||
"psrlh %[src0], %[src0], %[one] \n\t"
|
||||
"pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
|
||||
"dsll %[src_hi], %[src0], %[sixteen] \n\t"
|
||||
"pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
|
||||
@ -2353,7 +2420,8 @@ void RAWToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
"dsll %[src1], %[src1], %[eight] \n\t"
|
||||
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
|
||||
"paddh %[src0], %[src0], %[src_hi] \n\t"
|
||||
"psrlh %[src0], %[src0], %[two] \n\t"
|
||||
"paddh %[src0], %[src0], %[ftmp12] \n\t"
|
||||
"psrlh %[src0], %[src0], %[one] \n\t"
|
||||
"pinsrh_3 %[dest2_u], %[src0], %[value] \n\t"
|
||||
"dsll %[dest2_v], %[src0], %[sixteen] \n\t"
|
||||
"pinsrh_0 %[dest2_v], %[dest2_v], %[value] \n\t"
|
||||
@ -2373,7 +2441,8 @@ void RAWToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
"dsll %[src1], %[src1], %[eight] \n\t"
|
||||
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
|
||||
"paddh %[src0], %[src0], %[src_hi] \n\t"
|
||||
"psrlh %[src0], %[src0], %[two] \n\t"
|
||||
"paddh %[src0], %[src0], %[ftmp12] \n\t"
|
||||
"psrlh %[src0], %[src0], %[one] \n\t"
|
||||
"pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
|
||||
"dsll %[src_hi], %[src0], %[sixteen] \n\t"
|
||||
"pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
|
||||
@ -2402,7 +2471,8 @@ void RAWToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
"dsll %[src1], %[src1], %[eight] \n\t"
|
||||
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
|
||||
"paddh %[src0], %[src0], %[src_hi] \n\t"
|
||||
"psrlh %[src0], %[src0], %[two] \n\t"
|
||||
"paddh %[src0], %[src0], %[ftmp12] \n\t"
|
||||
"psrlh %[src0], %[src0], %[one] \n\t"
|
||||
"pinsrh_3 %[dest3_u], %[src0], %[value] \n\t"
|
||||
"dsll %[dest3_v], %[src0], %[sixteen] \n\t"
|
||||
"pinsrh_0 %[dest3_v], %[dest3_v], %[value] \n\t"
|
||||
@ -2422,7 +2492,8 @@ void RAWToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
"dsll %[src1], %[src1], %[eight] \n\t"
|
||||
"punpckhbh %[src_hi], %[src1], %[zero] \n\t"
|
||||
"paddh %[src0], %[src0], %[src_hi] \n\t"
|
||||
"psrlh %[src0], %[src0], %[two] \n\t"
|
||||
"paddh %[src0], %[src0], %[ftmp12] \n\t"
|
||||
"psrlh %[src0], %[src0], %[one] \n\t"
|
||||
"pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
|
||||
"dsll %[src_hi], %[src0], %[sixteen] \n\t"
|
||||
"pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
|
||||
@ -2460,11 +2531,12 @@ void RAWToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
[dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]),
|
||||
[dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]),
|
||||
[dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]),
|
||||
[dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11])
|
||||
[dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11]),
|
||||
[ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0])
|
||||
: [src_rgb0] "r"(src_rgb0), [src_stride_rgb] "r"(src_stride_rgb),
|
||||
[dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
|
||||
[mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value),
|
||||
[zero] "f"(0x00), [eight] "f"(0x08), [two] "f"(0x02),
|
||||
[zero] "f"(0x00), [eight] "f"(0x08), [one] "f"(0x01),
|
||||
[sixteen] "f"(0x10)
|
||||
: "memory");
|
||||
}
|
||||
@ -2764,7 +2836,7 @@ void ARGBToUVJRow_MMI(const uint8_t* src_rgb0,
|
||||
: [src_rgb0] "r"(src_rgb0), [src_stride_rgb] "r"(src_stride_rgb),
|
||||
[dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
|
||||
[mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value),
|
||||
[zero] "f"(0x00), [eight] "f"(0x08), [two] "f"(0x02),
|
||||
[zero] "f"(0x00), [eight] "f"(0x08),
|
||||
[sixteen] "f"(0x10)
|
||||
: "memory");
|
||||
}
|
||||
|
||||
@ -155,11 +155,10 @@ extern "C" {
|
||||
}
|
||||
|
||||
// Loads current and next row of ARGB input and averages it to calculate U and V
|
||||
#define READ_ARGB(s_ptr, t_ptr, argb0, argb1, argb2, argb3) \
|
||||
#define READ_ARGB(s_ptr, t_ptr, argb0, argb1, argb2, argb3, const_0x0101) \
|
||||
{ \
|
||||
v16u8 src0_m, src1_m, src2_m, src3_m, src4_m, src5_m, src6_m, src7_m; \
|
||||
v16u8 vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m, vec6_m, vec7_m; \
|
||||
v16u8 vec8_m, vec9_m; \
|
||||
v8u16 reg0_m, reg1_m, reg2_m, reg3_m, reg4_m, reg5_m, reg6_m, reg7_m; \
|
||||
v8u16 reg8_m, reg9_m; \
|
||||
\
|
||||
@ -195,53 +194,16 @@ extern "C" {
|
||||
reg1_m = (v8u16)__msa_pckev_d((v2i64)reg7_m, (v2i64)reg3_m); \
|
||||
reg0_m += (v8u16)__msa_pckod_d((v2i64)reg6_m, (v2i64)reg2_m); \
|
||||
reg1_m += (v8u16)__msa_pckod_d((v2i64)reg7_m, (v2i64)reg3_m); \
|
||||
reg8_m = (v8u16)__msa_srai_h((v8i16)reg8_m, 2); \
|
||||
reg9_m = (v8u16)__msa_srai_h((v8i16)reg9_m, 2); \
|
||||
reg0_m = (v8u16)__msa_srai_h((v8i16)reg0_m, 2); \
|
||||
reg1_m = (v8u16)__msa_srai_h((v8i16)reg1_m, 2); \
|
||||
argb0 = (v16u8)__msa_pckev_b((v16i8)reg9_m, (v16i8)reg8_m); \
|
||||
argb1 = (v16u8)__msa_pckev_b((v16i8)reg1_m, (v16i8)reg0_m); \
|
||||
src0_m = (v16u8)__msa_ld_b((void*)s, 64); \
|
||||
src1_m = (v16u8)__msa_ld_b((void*)s, 80); \
|
||||
src2_m = (v16u8)__msa_ld_b((void*)s, 96); \
|
||||
src3_m = (v16u8)__msa_ld_b((void*)s, 112); \
|
||||
src4_m = (v16u8)__msa_ld_b((void*)t, 64); \
|
||||
src5_m = (v16u8)__msa_ld_b((void*)t, 80); \
|
||||
src6_m = (v16u8)__msa_ld_b((void*)t, 96); \
|
||||
src7_m = (v16u8)__msa_ld_b((void*)t, 112); \
|
||||
vec2_m = (v16u8)__msa_ilvr_b((v16i8)src0_m, (v16i8)src4_m); \
|
||||
vec3_m = (v16u8)__msa_ilvr_b((v16i8)src1_m, (v16i8)src5_m); \
|
||||
vec4_m = (v16u8)__msa_ilvr_b((v16i8)src2_m, (v16i8)src6_m); \
|
||||
vec5_m = (v16u8)__msa_ilvr_b((v16i8)src3_m, (v16i8)src7_m); \
|
||||
vec6_m = (v16u8)__msa_ilvl_b((v16i8)src0_m, (v16i8)src4_m); \
|
||||
vec7_m = (v16u8)__msa_ilvl_b((v16i8)src1_m, (v16i8)src5_m); \
|
||||
vec8_m = (v16u8)__msa_ilvl_b((v16i8)src2_m, (v16i8)src6_m); \
|
||||
vec9_m = (v16u8)__msa_ilvl_b((v16i8)src3_m, (v16i8)src7_m); \
|
||||
reg0_m = __msa_hadd_u_h(vec2_m, vec2_m); \
|
||||
reg1_m = __msa_hadd_u_h(vec3_m, vec3_m); \
|
||||
reg2_m = __msa_hadd_u_h(vec4_m, vec4_m); \
|
||||
reg3_m = __msa_hadd_u_h(vec5_m, vec5_m); \
|
||||
reg4_m = __msa_hadd_u_h(vec6_m, vec6_m); \
|
||||
reg5_m = __msa_hadd_u_h(vec7_m, vec7_m); \
|
||||
reg6_m = __msa_hadd_u_h(vec8_m, vec8_m); \
|
||||
reg7_m = __msa_hadd_u_h(vec9_m, vec9_m); \
|
||||
reg8_m = (v8u16)__msa_pckev_d((v2i64)reg4_m, (v2i64)reg0_m); \
|
||||
reg9_m = (v8u16)__msa_pckev_d((v2i64)reg5_m, (v2i64)reg1_m); \
|
||||
reg8_m += (v8u16)__msa_pckod_d((v2i64)reg4_m, (v2i64)reg0_m); \
|
||||
reg9_m += (v8u16)__msa_pckod_d((v2i64)reg5_m, (v2i64)reg1_m); \
|
||||
reg0_m = (v8u16)__msa_pckev_d((v2i64)reg6_m, (v2i64)reg2_m); \
|
||||
reg1_m = (v8u16)__msa_pckev_d((v2i64)reg7_m, (v2i64)reg3_m); \
|
||||
reg0_m += (v8u16)__msa_pckod_d((v2i64)reg6_m, (v2i64)reg2_m); \
|
||||
reg1_m += (v8u16)__msa_pckod_d((v2i64)reg7_m, (v2i64)reg3_m); \
|
||||
reg8_m = (v8u16)__msa_srai_h((v8i16)reg8_m, 2); \
|
||||
reg9_m = (v8u16)__msa_srai_h((v8i16)reg9_m, 2); \
|
||||
reg0_m = (v8u16)__msa_srai_h((v8i16)reg0_m, 2); \
|
||||
reg1_m = (v8u16)__msa_srai_h((v8i16)reg1_m, 2); \
|
||||
argb2 = (v16u8)__msa_pckev_b((v16i8)reg9_m, (v16i8)reg8_m); \
|
||||
argb3 = (v16u8)__msa_pckev_b((v16i8)reg1_m, (v16i8)reg0_m); \
|
||||
reg8_m += const_0x0101; \
|
||||
reg9_m += const_0x0101; \
|
||||
reg0_m += const_0x0101; \
|
||||
reg1_m += const_0x0101; \
|
||||
argb0 = (v8u16)__msa_srai_h((v8i16)reg8_m, 1); \
|
||||
argb1 = (v8u16)__msa_srai_h((v8i16)reg9_m, 1); \
|
||||
argb2 = (v8u16)__msa_srai_h((v8i16)reg0_m, 1); \
|
||||
argb3 = (v8u16)__msa_srai_h((v8i16)reg1_m, 1); \
|
||||
}
|
||||
|
||||
// Takes ARGB input and calculates U and V.
|
||||
#define ARGBTOUV(argb0, argb1, argb2, argb3, const0, const1, const2, const3, \
|
||||
shf0, shf1, shf2, shf3, v_out, u_out) \
|
||||
{ \
|
||||
@ -272,6 +234,39 @@ extern "C" {
|
||||
u_out = (v16u8)__msa_pckod_b((v16i8)reg3_m, (v16i8)reg2_m); \
|
||||
}
|
||||
|
||||
// Takes ARGB input and calculates U and V.
|
||||
#define ARGBTOUV_H(argb0, argb1, argb2, argb3, const0, const1, const2, const3, \
|
||||
shf0, shf1, shf2, shf3, v_out, u_out) \
|
||||
{ \
|
||||
v8u16 vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m, vec6_m, vec7_m; \
|
||||
v4u32 reg0_m, reg1_m, reg2_m, reg3_m; \
|
||||
\
|
||||
vec0_m = __msa_vshf_h(shf0, (v16i8)argb1, (v16i8)argb0); \
|
||||
vec1_m = __msa_vshf_h(shf0, (v16i8)argb3, (v16i8)argb2); \
|
||||
vec2_m = __msa_vshf_h(shf1, (v16i8)argb1, (v16i8)argb0); \
|
||||
vec3_m = __msa_vshf_h(shf1, (v16i8)argb3, (v16i8)argb2); \
|
||||
vec4_m = __msa_vshf_h(shf2, (v16i8)argb1, (v16i8)argb0); \
|
||||
vec5_m = __msa_vshf_h(shf2, (v16i8)argb3, (v16i8)argb2); \
|
||||
vec6_m = __msa_vshf_h(shf3, (v16i8)argb1, (v16i8)argb0); \
|
||||
vec7_m = __msa_vshf_h(shf3, (v16i8)argb3, (v16i8)argb2); \
|
||||
reg0_m = __msa_dotp_u_w(vec0_m, const1); \
|
||||
reg1_m = __msa_dotp_u_w(vec1_m, const1); \
|
||||
reg2_m = __msa_dotp_u_w(vec4_m, const1); \
|
||||
reg3_m = __msa_dotp_u_w(vec5_m, const1); \
|
||||
reg0_m += (v4u32)const3; \
|
||||
reg1_m += (v4u32)const3; \
|
||||
reg2_m += (v4u32)const3; \
|
||||
reg3_m += (v4u32)const3; \
|
||||
reg0_m -= __msa_dotp_u_w(vec2_m, const0); \
|
||||
reg1_m -= __msa_dotp_u_w(vec3_m, const0); \
|
||||
reg2_m -= __msa_dotp_u_w(vec6_m, const2); \
|
||||
reg3_m -= __msa_dotp_u_w(vec7_m, const2); \
|
||||
u_out = (v16u8)__msa_pckev_h((v8i16)reg3_m, (v8i16)reg2_m); \
|
||||
v_out = (v16u8)__msa_pckev_h((v8i16)reg1_m, (v8i16)reg0_m); \
|
||||
u_out = (v16u8)__msa_pckod_b((v16i8)u_out, (v16i8)u_out); \
|
||||
v_out = (v16u8)__msa_pckod_b((v16i8)v_out, (v16i8)v_out); \
|
||||
}
|
||||
|
||||
// Load I444 pixel data
|
||||
#define READI444(psrc_y, psrc_u, psrc_v, out_y, out_u, out_v) \
|
||||
{ \
|
||||
@ -839,12 +834,13 @@ void ARGBToUVRow_MSA(const uint8_t* src_argb0,
|
||||
v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9;
|
||||
v8u16 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8, reg9;
|
||||
v16u8 dst0, dst1;
|
||||
v8u16 const_0x70 = (v8u16)__msa_ldi_h(0x70);
|
||||
v8u16 const_0x4A = (v8u16)__msa_ldi_h(0x4A);
|
||||
v8u16 const_0x26 = (v8u16)__msa_ldi_h(0x26);
|
||||
v8u16 const_0x5E = (v8u16)__msa_ldi_h(0x5E);
|
||||
v8u16 const_0x12 = (v8u16)__msa_ldi_h(0x12);
|
||||
v8u16 const_0x70 = (v8u16)__msa_ldi_h(0x38);
|
||||
v8u16 const_0x4A = (v8u16)__msa_ldi_h(0x25);
|
||||
v8u16 const_0x26 = (v8u16)__msa_ldi_h(0x13);
|
||||
v8u16 const_0x5E = (v8u16)__msa_ldi_h(0x2f);
|
||||
v8u16 const_0x12 = (v8u16)__msa_ldi_h(0x09);
|
||||
v8u16 const_0x8080 = (v8u16)__msa_fill_h(0x8080);
|
||||
v8u16 const_0x0001 = (v8u16)__msa_fill_h(0x0001);
|
||||
|
||||
for (x = 0; x < width; x += 32) {
|
||||
src0 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 0);
|
||||
@ -903,12 +899,18 @@ void ARGBToUVRow_MSA(const uint8_t* src_argb0,
|
||||
reg3 += __msa_hadd_u_h(vec5, vec5);
|
||||
reg4 += __msa_hadd_u_h(vec0, vec0);
|
||||
reg5 += __msa_hadd_u_h(vec1, vec1);
|
||||
reg0 = (v8u16)__msa_srai_h((v8i16)reg0, 2);
|
||||
reg1 = (v8u16)__msa_srai_h((v8i16)reg1, 2);
|
||||
reg2 = (v8u16)__msa_srai_h((v8i16)reg2, 2);
|
||||
reg3 = (v8u16)__msa_srai_h((v8i16)reg3, 2);
|
||||
reg4 = (v8u16)__msa_srai_h((v8i16)reg4, 2);
|
||||
reg5 = (v8u16)__msa_srai_h((v8i16)reg5, 2);
|
||||
reg0 += const_0x0001;
|
||||
reg1 += const_0x0001;
|
||||
reg2 += const_0x0001;
|
||||
reg3 += const_0x0001;
|
||||
reg4 += const_0x0001;
|
||||
reg5 += const_0x0001;
|
||||
reg0 = (v8u16)__msa_srai_h((v8i16)reg0, 1);
|
||||
reg1 = (v8u16)__msa_srai_h((v8i16)reg1, 1);
|
||||
reg2 = (v8u16)__msa_srai_h((v8i16)reg2, 1);
|
||||
reg3 = (v8u16)__msa_srai_h((v8i16)reg3, 1);
|
||||
reg4 = (v8u16)__msa_srai_h((v8i16)reg4, 1);
|
||||
reg5 = (v8u16)__msa_srai_h((v8i16)reg5, 1);
|
||||
reg6 = reg0 * const_0x70;
|
||||
reg7 = reg1 * const_0x70;
|
||||
reg8 = reg2 * const_0x4A;
|
||||
@ -2045,12 +2047,13 @@ void RGB24ToUVRow_MSA(const uint8_t* src_rgb0,
|
||||
v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
|
||||
v8i16 reg0, reg1, reg2, reg3;
|
||||
v16u8 dst0;
|
||||
v8u16 const_0x70 = (v8u16)__msa_fill_h(0x70);
|
||||
v8u16 const_0x4A = (v8u16)__msa_fill_h(0x4A);
|
||||
v8u16 const_0x26 = (v8u16)__msa_fill_h(0x26);
|
||||
v8u16 const_0x5E = (v8u16)__msa_fill_h(0x5E);
|
||||
v8u16 const_0x12 = (v8u16)__msa_fill_h(0x12);
|
||||
v8u16 const_0x70 = (v8u16)__msa_fill_h(0x38);
|
||||
v8u16 const_0x4A = (v8u16)__msa_fill_h(0x25);
|
||||
v8u16 const_0x26 = (v8u16)__msa_fill_h(0x13);
|
||||
v8u16 const_0x5E = (v8u16)__msa_fill_h(0x2f);
|
||||
v8u16 const_0x12 = (v8u16)__msa_fill_h(0x09);
|
||||
v8u16 const_0x8080 = (v8u16)__msa_fill_h(0x8080);
|
||||
v8u16 const_0x0001 = (v8u16)__msa_fill_h(0x0001);
|
||||
v16i8 mask = {0, 1, 2, 16, 3, 4, 5, 17, 6, 7, 8, 18, 9, 10, 11, 19};
|
||||
v16i8 zero = {0};
|
||||
|
||||
@ -2099,10 +2102,14 @@ void RGB24ToUVRow_MSA(const uint8_t* src_rgb0,
|
||||
reg1 += (v8i16)__msa_pckod_d((v2i64)vec3, (v2i64)vec2);
|
||||
reg2 += (v8i16)__msa_pckod_d((v2i64)vec5, (v2i64)vec4);
|
||||
reg3 += (v8i16)__msa_pckod_d((v2i64)vec7, (v2i64)vec6);
|
||||
reg0 = __msa_srai_h((v8i16)reg0, 2);
|
||||
reg1 = __msa_srai_h((v8i16)reg1, 2);
|
||||
reg2 = __msa_srai_h((v8i16)reg2, 2);
|
||||
reg3 = __msa_srai_h((v8i16)reg3, 2);
|
||||
reg0 += const_0x0001;
|
||||
reg1 += const_0x0001;
|
||||
reg2 += const_0x0001;
|
||||
reg3 += const_0x0001;
|
||||
reg0 = __msa_srai_h((v8i16)reg0, 1);
|
||||
reg1 = __msa_srai_h((v8i16)reg1, 1);
|
||||
reg2 = __msa_srai_h((v8i16)reg2, 1);
|
||||
reg3 = __msa_srai_h((v8i16)reg3, 1);
|
||||
vec4 = (v8u16)__msa_pckev_h(reg1, reg0);
|
||||
vec5 = (v8u16)__msa_pckev_h(reg3, reg2);
|
||||
vec6 = (v8u16)__msa_pckod_h(reg1, reg0);
|
||||
@ -2150,12 +2157,13 @@ void RAWToUVRow_MSA(const uint8_t* src_rgb0,
|
||||
v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
|
||||
v8i16 reg0, reg1, reg2, reg3;
|
||||
v16u8 dst0;
|
||||
v8u16 const_0x70 = (v8u16)__msa_fill_h(0x70);
|
||||
v8u16 const_0x4A = (v8u16)__msa_fill_h(0x4A);
|
||||
v8u16 const_0x26 = (v8u16)__msa_fill_h(0x26);
|
||||
v8u16 const_0x5E = (v8u16)__msa_fill_h(0x5E);
|
||||
v8u16 const_0x12 = (v8u16)__msa_fill_h(0x12);
|
||||
v8u16 const_0x70 = (v8u16)__msa_fill_h(0x38);
|
||||
v8u16 const_0x4A = (v8u16)__msa_fill_h(0x25);
|
||||
v8u16 const_0x26 = (v8u16)__msa_fill_h(0x13);
|
||||
v8u16 const_0x5E = (v8u16)__msa_fill_h(0x2f);
|
||||
v8u16 const_0x12 = (v8u16)__msa_fill_h(0x09);
|
||||
v8u16 const_0x8080 = (v8u16)__msa_fill_h(0x8080);
|
||||
v8u16 const_0x0001 = (v8u16)__msa_fill_h(0x0001);
|
||||
v16i8 mask = {0, 1, 2, 16, 3, 4, 5, 17, 6, 7, 8, 18, 9, 10, 11, 19};
|
||||
v16i8 zero = {0};
|
||||
|
||||
@ -2204,10 +2212,14 @@ void RAWToUVRow_MSA(const uint8_t* src_rgb0,
|
||||
reg1 += (v8i16)__msa_pckod_d((v2i64)vec3, (v2i64)vec2);
|
||||
reg2 += (v8i16)__msa_pckod_d((v2i64)vec5, (v2i64)vec4);
|
||||
reg3 += (v8i16)__msa_pckod_d((v2i64)vec7, (v2i64)vec6);
|
||||
reg0 = __msa_srai_h(reg0, 2);
|
||||
reg1 = __msa_srai_h(reg1, 2);
|
||||
reg2 = __msa_srai_h(reg2, 2);
|
||||
reg3 = __msa_srai_h(reg3, 2);
|
||||
reg0 += const_0x0001;
|
||||
reg1 += const_0x0001;
|
||||
reg2 += const_0x0001;
|
||||
reg3 += const_0x0001;
|
||||
reg0 = __msa_srai_h(reg0, 1);
|
||||
reg1 = __msa_srai_h(reg1, 1);
|
||||
reg2 = __msa_srai_h(reg2, 1);
|
||||
reg3 = __msa_srai_h(reg3, 1);
|
||||
vec4 = (v8u16)__msa_pckev_h((v8i16)reg1, (v8i16)reg0);
|
||||
vec5 = (v8u16)__msa_pckev_h((v8i16)reg3, (v8i16)reg2);
|
||||
vec6 = (v8u16)__msa_pckod_h((v8i16)reg1, (v8i16)reg0);
|
||||
@ -2588,28 +2600,30 @@ void BGRAToUVRow_MSA(const uint8_t* src_rgb0,
|
||||
int x;
|
||||
const uint8_t* s = src_rgb0;
|
||||
const uint8_t* t = src_rgb0 + src_stride_rgb;
|
||||
v16u8 dst0, dst1, vec0, vec1, vec2, vec3;
|
||||
v16i8 shuffler0 = {0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29};
|
||||
v16i8 shuffler1 = {2, 3, 6, 7, 10, 11, 14, 15,
|
||||
18, 19, 22, 23, 26, 27, 30, 31};
|
||||
v16i8 shuffler2 = {0, 3, 4, 7, 8, 11, 12, 15, 16, 19, 20, 23, 24, 27, 28, 31};
|
||||
v16i8 shuffler3 = {2, 1, 6, 5, 10, 9, 14, 13, 18, 17, 22, 21, 26, 25, 30, 29};
|
||||
v16u8 const_0x125E = (v16u8)__msa_fill_h(0x125E);
|
||||
v16u8 const_0x7000 = (v16u8)__msa_fill_h(0x7000);
|
||||
v16u8 const_0x264A = (v16u8)__msa_fill_h(0x264A);
|
||||
v8u16 const_0x8080 = (v8u16)__msa_fill_h(0x8080);
|
||||
const uint8_t unused = 0xf;
|
||||
v8u16 src0, src1, src2, src3;
|
||||
v16u8 dst0, dst1;
|
||||
v8i16 shuffler0 = {1, unused, 5, unused, 9, unused, 13, unused};
|
||||
v8i16 shuffler1 = {2, 3, 6, 7, 10, 11, 14, 15};
|
||||
v8i16 shuffler2 = {3, unused, 7, unused, 11, unused, 15, unused};
|
||||
v8i16 shuffler3 = {1, 2, 5, 6, 9, 10, 13, 14};
|
||||
v8u16 const_0x09002f = (v8u16)__msa_fill_w(0x09002f);
|
||||
v8u16 const_0x000038 = (v8u16)__msa_fill_w(0x0038);
|
||||
v8u16 const_0x250013 = (v8u16)__msa_fill_w(0x250013);
|
||||
v4u32 const_0x008080 = (v4u32)__msa_fill_w(0x8080);
|
||||
v8u16 const_0x0001 = (v8u16)__msa_fill_h(0x0001);
|
||||
|
||||
for (x = 0; x < width; x += 32) {
|
||||
READ_ARGB(s, t, vec0, vec1, vec2, vec3);
|
||||
ARGBTOUV(vec0, vec1, vec2, vec3, const_0x125E, const_0x7000, const_0x264A,
|
||||
const_0x8080, shuffler0, shuffler1, shuffler2, shuffler3, dst0,
|
||||
dst1);
|
||||
ST_UB(dst0, dst_v);
|
||||
ST_UB(dst1, dst_u);
|
||||
s += 128;
|
||||
t += 128;
|
||||
dst_v += 16;
|
||||
dst_u += 16;
|
||||
for (x = 0; x < width; x += 16) {
|
||||
READ_ARGB(s, t, src0, src1, src2, src3, const_0x0001);
|
||||
ARGBTOUV_H(src0, src1, src2, src3, const_0x09002f, const_0x000038,
|
||||
const_0x250013, const_0x008080, shuffler0, shuffler1,
|
||||
shuffler2, shuffler3, dst0, dst1);
|
||||
*((uint64_t*)dst_v) = __msa_copy_u_d((v2i64)dst0, 0);
|
||||
*((uint64_t*)dst_u) = __msa_copy_u_d((v2i64)dst1, 0);
|
||||
s += 64;
|
||||
t += 64;
|
||||
dst_u += 8;
|
||||
dst_v += 8;
|
||||
}
|
||||
}
|
||||
|
||||
@ -2621,29 +2635,30 @@ void ABGRToUVRow_MSA(const uint8_t* src_rgb0,
|
||||
int x;
|
||||
const uint8_t* s = src_rgb0;
|
||||
const uint8_t* t = src_rgb0 + src_stride_rgb;
|
||||
v16u8 src0, src1, src2, src3;
|
||||
const uint8_t unused = 0xf;
|
||||
v8u16 src0, src1, src2, src3;
|
||||
v16u8 dst0, dst1;
|
||||
v16i8 shuffler0 = {0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29};
|
||||
v16i8 shuffler1 = {2, 3, 6, 7, 10, 11, 14, 15,
|
||||
18, 19, 22, 23, 26, 27, 30, 31};
|
||||
v16i8 shuffler2 = {0, 3, 4, 7, 8, 11, 12, 15, 16, 19, 20, 23, 24, 27, 28, 31};
|
||||
v16i8 shuffler3 = {1, 2, 5, 6, 9, 10, 13, 14, 17, 18, 21, 22, 25, 26, 29, 30};
|
||||
v16u8 const_0x4A26 = (v16u8)__msa_fill_h(0x4A26);
|
||||
v16u8 const_0x0070 = (v16u8)__msa_fill_h(0x0070);
|
||||
v16u8 const_0x125E = (v16u8)__msa_fill_h(0x125E);
|
||||
v8u16 const_0x8080 = (v8u16)__msa_fill_h(0x8080);
|
||||
v8i16 shuffler0 = {0, unused, 4, unused, 8, unused, 12, unused};
|
||||
v8i16 shuffler1 = {1, 2, 5, 6, 9, 10, 13, 14};
|
||||
v8i16 shuffler2 = {2, unused, 6, unused, 10, unused, 14, unused};
|
||||
v8i16 shuffler3 = {0, 1, 4, 5, 8, 9, 12, 13};
|
||||
v8u16 const_0x09002f = (v8u16)__msa_fill_w(0x09002f);
|
||||
v8u16 const_0x000038 = (v8u16)__msa_fill_w(0x0038);
|
||||
v8u16 const_0x250013 = (v8u16)__msa_fill_w(0x250013);
|
||||
v4u32 const_0x008080 = (v4u32)__msa_fill_w(0x8080);
|
||||
v8u16 const_0x0001 = (v8u16)__msa_fill_h(0x0001);
|
||||
|
||||
for (x = 0; x < width; x += 32) {
|
||||
READ_ARGB(s, t, src0, src1, src2, src3);
|
||||
ARGBTOUV(src0, src1, src2, src3, const_0x4A26, const_0x0070, const_0x125E,
|
||||
const_0x8080, shuffler1, shuffler0, shuffler2, shuffler3, dst0,
|
||||
dst1);
|
||||
ST_UB(dst0, dst_u);
|
||||
ST_UB(dst1, dst_v);
|
||||
s += 128;
|
||||
t += 128;
|
||||
dst_u += 16;
|
||||
dst_v += 16;
|
||||
for (x = 0; x < width; x += 16) {
|
||||
READ_ARGB(s, t, src0, src1, src2, src3, const_0x0001);
|
||||
ARGBTOUV_H(src0, src1, src2, src3, const_0x09002f, const_0x000038,
|
||||
const_0x250013, const_0x008080, shuffler0, shuffler1,
|
||||
shuffler2, shuffler3, dst0, dst1);
|
||||
*((uint64_t*)dst_v) = __msa_copy_u_d((v2i64)dst0, 0);
|
||||
*((uint64_t*)dst_u) = __msa_copy_u_d((v2i64)dst1, 0);
|
||||
s += 64;
|
||||
t += 64;
|
||||
dst_u += 8;
|
||||
dst_v += 8;
|
||||
}
|
||||
}
|
||||
|
||||
@ -2655,28 +2670,30 @@ void RGBAToUVRow_MSA(const uint8_t* src_rgb0,
|
||||
int x;
|
||||
const uint8_t* s = src_rgb0;
|
||||
const uint8_t* t = src_rgb0 + src_stride_rgb;
|
||||
v16u8 dst0, dst1, vec0, vec1, vec2, vec3;
|
||||
v16i8 shuffler0 = {0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29};
|
||||
v16i8 shuffler1 = {2, 3, 6, 7, 10, 11, 14, 15,
|
||||
18, 19, 22, 23, 26, 27, 30, 31};
|
||||
v16i8 shuffler2 = {0, 3, 4, 7, 8, 11, 12, 15, 16, 19, 20, 23, 24, 27, 28, 31};
|
||||
v16i8 shuffler3 = {2, 1, 6, 5, 10, 9, 14, 13, 18, 17, 22, 21, 26, 25, 30, 29};
|
||||
v16u8 const_0x125E = (v16u8)__msa_fill_h(0x264A);
|
||||
v16u8 const_0x7000 = (v16u8)__msa_fill_h(0x7000);
|
||||
v16u8 const_0x264A = (v16u8)__msa_fill_h(0x125E);
|
||||
v8u16 const_0x8080 = (v8u16)__msa_fill_h(0x8080);
|
||||
const uint8_t unused = 0xf;
|
||||
v8u16 src0, src1, src2, src3;
|
||||
v16u8 dst0, dst1;
|
||||
v8i16 shuffler0 = {3, unused, 7, unused, 11, unused, 15, unused};
|
||||
v8i16 shuffler1 = {2, 1, 6, 5, 10, 9, 14, 13};
|
||||
v8i16 shuffler2 = {1, unused, 5, unused, 9, unused, 13, unused};
|
||||
v8i16 shuffler3 = {3, 2, 7, 6, 11, 10, 15, 14};
|
||||
v8u16 const_0x09002f = (v8u16)__msa_fill_w(0x09002f);
|
||||
v8u16 const_0x000038 = (v8u16)__msa_fill_w(0x0038);
|
||||
v8u16 const_0x250013 = (v8u16)__msa_fill_w(0x250013);
|
||||
v4u32 const_0x008080 = (v4u32)__msa_fill_w(0x8080);
|
||||
v8u16 const_0x0001 = (v8u16)__msa_fill_h(0x0001);
|
||||
|
||||
for (x = 0; x < width; x += 32) {
|
||||
READ_ARGB(s, t, vec0, vec1, vec2, vec3);
|
||||
ARGBTOUV(vec0, vec1, vec2, vec3, const_0x125E, const_0x7000, const_0x264A,
|
||||
const_0x8080, shuffler0, shuffler1, shuffler2, shuffler3, dst0,
|
||||
dst1);
|
||||
ST_UB(dst0, dst_u);
|
||||
ST_UB(dst1, dst_v);
|
||||
s += 128;
|
||||
t += 128;
|
||||
dst_u += 16;
|
||||
dst_v += 16;
|
||||
for (x = 0; x < width; x += 16) {
|
||||
READ_ARGB(s, t, src0, src1, src2, src3, const_0x0001);
|
||||
ARGBTOUV_H(src0, src1, src2, src3, const_0x09002f, const_0x000038,
|
||||
const_0x250013, const_0x008080, shuffler0, shuffler1,
|
||||
shuffler2, shuffler3, dst0, dst1);
|
||||
*((uint64_t*)dst_v) = __msa_copy_u_d((v2i64)dst0, 0);
|
||||
*((uint64_t*)dst_u) = __msa_copy_u_d((v2i64)dst1, 0);
|
||||
s += 64;
|
||||
t += 64;
|
||||
dst_u += 8;
|
||||
dst_v += 8;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user