diff --git a/README.chromium b/README.chromium index 1fd20c095..880191e4a 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1870 +Version: 1871 License: BSD License File: LICENSE diff --git a/include/libyuv/convert.h b/include/libyuv/convert.h index 93f56ed4a..88619a4f6 100644 --- a/include/libyuv/convert.h +++ b/include/libyuv/convert.h @@ -418,7 +418,6 @@ int I412ToI420(const uint16_t* src_y, int width, int height); - #define I412ToI012 I410ToI010 #define H410ToH010 I410ToI010 #define H412ToH012 I410ToI010 diff --git a/include/libyuv/row.h b/include/libyuv/row.h index 3590f125a..5b244d77e 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -3378,15 +3378,15 @@ void ARGBToRGB24Row_AVX512VBMI(const uint8_t* src, uint8_t* dst, int width); void ARGBToRGB565DitherRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, - const uint32_t dither4, + uint32_t dither4, int width); void ARGBToRGB565DitherRow_SSE2(const uint8_t* src, uint8_t* dst, - const uint32_t dither4, + uint32_t dither4, int width); void ARGBToRGB565DitherRow_AVX2(const uint8_t* src, uint8_t* dst, - const uint32_t dither4, + uint32_t dither4, int width); void ARGBToRGB565Row_AVX2(const uint8_t* src_argb, uint8_t* dst_rgb, int width); @@ -3414,7 +3414,7 @@ void ARGBToARGB4444Row_NEON(const uint8_t* src_argb, int width); void ARGBToRGB565DitherRow_NEON(const uint8_t* src_argb, uint8_t* dst_rgb, - const uint32_t dither4, + uint32_t dither4, int width); void ARGBToRGB24Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToRAWRow_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width); @@ -3427,15 +3427,15 @@ void ARGBToARGB4444Row_MSA(const uint8_t* src_argb, int width); void ARGBToRGB565DitherRow_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, - const uint32_t dither4, + uint32_t dither4, int width); void ARGBToRGB565DitherRow_LSX(const uint8_t* src_argb, uint8_t* dst_rgb, - const uint32_t dither4, + uint32_t dither4, int width); void ARGBToRGB565DitherRow_LASX(const uint8_t* src_argb, uint8_t* dst_rgb, - const uint32_t dither4, + uint32_t dither4, int width); void ARGBToRGB24Row_LSX(const uint8_t* src_argb, uint8_t* dst_rgb, int width); diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 20701e07d..b6623dbbe 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1870 +#define LIBYUV_VERSION 1871 #endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/source/compare_mmi.cc b/source/compare_mmi.cc deleted file mode 100644 index 7640d9468..000000000 --- a/source/compare_mmi.cc +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Copyright 2012 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/basic_types.h" - -#include "libyuv/compare_row.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -// This module is for Mips MMI. -#if !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A) - -// Hakmem method for hamming distance. -uint32_t HammingDistance_MMI(const uint8_t* src_a, - const uint8_t* src_b, - int count) { - uint32_t diff = 0u; - - uint64_t temp = 0, temp1 = 0, ta = 0, tb = 0; - uint64_t c1 = 0x5555555555555555; - uint64_t c2 = 0x3333333333333333; - uint64_t c3 = 0x0f0f0f0f0f0f0f0f; - uint32_t c4 = 0x01010101; - uint64_t s1 = 1, s2 = 2, s3 = 4; - __asm__ volatile( - "1: \n\t" - "ldc1 %[ta], 0(%[src_a]) \n\t" - "ldc1 %[tb], 0(%[src_b]) \n\t" - "xor %[temp], %[ta], %[tb] \n\t" - "psrlw %[temp1], %[temp], %[s1] \n\t" // temp1=x>>1 - "and %[temp1], %[temp1], %[c1] \n\t" // temp1&=c1 - "psubw %[temp1], %[temp], %[temp1] \n\t" // x-temp1 - "and %[temp], %[temp1], %[c2] \n\t" // t = (u&c2) - "psrlw %[temp1], %[temp1], %[s2] \n\t" // u>>2 - "and %[temp1], %[temp1], %[c2] \n\t" // u>>2 & c2 - "paddw %[temp1], %[temp1], %[temp] \n\t" // t1 = t1+t - "psrlw %[temp], %[temp1], %[s3] \n\t" // u>>4 - "paddw %[temp1], %[temp1], %[temp] \n\t" // u+(u>>4) - "and %[temp1], %[temp1], %[c3] \n\t" //&c3 - "dmfc1 $t0, %[temp1] \n\t" - "dsrl32 $t0, $t0, 0 \n\t " - "mul $t0, $t0, %[c4] \n\t" - "dsrl $t0, $t0, 24 \n\t" - "dadd %[diff], %[diff], $t0 \n\t" - "dmfc1 $t0, %[temp1] \n\t" - "mul $t0, $t0, %[c4] \n\t" - "dsrl $t0, $t0, 24 \n\t" - "dadd %[diff], %[diff], $t0 \n\t" - "daddiu %[src_a], %[src_a], 8 \n\t" - "daddiu %[src_b], %[src_b], 8 \n\t" - "addiu %[count], %[count], -8 \n\t" - "bgtz %[count], 1b \n\t" - "nop \n\t" - : [diff] "+r"(diff), [src_a] "+r"(src_a), [src_b] "+r"(src_b), - [count] "+r"(count), [ta] "+f"(ta), [tb] "+f"(tb), [temp] "+f"(temp), - [temp1] "+f"(temp1) - : [c1] "f"(c1), [c2] "f"(c2), [c3] "f"(c3), [c4] "r"(c4), [s1] "f"(s1), - [s2] "f"(s2), [s3] "f"(s3) - : "memory"); - return diff; -} - -uint32_t SumSquareError_MMI(const uint8_t* src_a, - const uint8_t* src_b, - int count) { - uint32_t sse = 0u; - uint32_t sse_hi = 0u, sse_lo = 0u; - - uint64_t src1, src2; - uint64_t diff, diff_hi, diff_lo; - uint64_t sse_sum, sse_tmp; - - const uint64_t mask = 0x0ULL; - - __asm__ volatile( - "xor %[sse_sum], %[sse_sum], %[sse_sum] \n\t" - - "1: \n\t" - "ldc1 %[src1], 0x00(%[src_a]) \n\t" - "ldc1 %[src2], 0x00(%[src_b]) \n\t" - "pasubub %[diff], %[src1], %[src2] \n\t" - "punpcklbh %[diff_lo], %[diff], %[mask] \n\t" - "punpckhbh %[diff_hi], %[diff], %[mask] \n\t" - "pmaddhw %[sse_tmp], %[diff_lo], %[diff_lo] \n\t" - "paddw %[sse_sum], %[sse_sum], %[sse_tmp] \n\t" - "pmaddhw %[sse_tmp], %[diff_hi], %[diff_hi] \n\t" - "paddw %[sse_sum], %[sse_sum], %[sse_tmp] \n\t" - - "daddiu %[src_a], %[src_a], 0x08 \n\t" - "daddiu %[src_b], %[src_b], 0x08 \n\t" - "daddiu %[count], %[count], -0x08 \n\t" - "bnez %[count], 1b \n\t" - - "mfc1 %[sse_lo], %[sse_sum] \n\t" - "mfhc1 %[sse_hi], %[sse_sum] \n\t" - "daddu %[sse], %[sse_hi], %[sse_lo] \n\t" - : [sse] "+&r"(sse), [diff] "=&f"(diff), [src1] "=&f"(src1), - [src2] "=&f"(src2), [diff_lo] "=&f"(diff_lo), [diff_hi] "=&f"(diff_hi), - [sse_sum] "=&f"(sse_sum), [sse_tmp] "=&f"(sse_tmp), - [sse_hi] "+&r"(sse_hi), [sse_lo] "+&r"(sse_lo) - : [src_a] "r"(src_a), [src_b] "r"(src_b), [count] "r"(count), - [mask] "f"(mask) - : "memory"); - - return sse; -} - -#endif // !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A) - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif diff --git a/source/convert.cc b/source/convert.cc index dd4d9794d..b11ab1bff 100644 --- a/source/convert.cc +++ b/source/convert.cc @@ -2010,7 +2010,8 @@ int ARGBToI420Alpha(const uint8_t* src_argb, ARGBToYRow_C; void (*ARGBExtractAlphaRow)(const uint8_t* src_argb, uint8_t* dst_a, int width) = ARGBExtractAlphaRow_C; - if (!src_argb || !dst_y || !dst_u || !dst_v || !dst_a || width <= 0 || height == 0) { + if (!src_argb || !dst_y || !dst_u || !dst_v || !dst_a || width <= 0 || + height == 0) { return -1; } // Negative height means invert the image. @@ -2133,7 +2134,8 @@ int ARGBToI420Alpha(const uint8_t* src_argb, ARGBToYRow(src_argb, dst_y, width); ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width); ARGBExtractAlphaRow(src_argb, dst_a, width); - ARGBExtractAlphaRow(src_argb + src_stride_argb, dst_a + dst_stride_a, width); + ARGBExtractAlphaRow(src_argb + src_stride_argb, dst_a + dst_stride_a, + width); src_argb += src_stride_argb * 2; dst_y += dst_stride_y * 2; dst_u += dst_stride_u; diff --git a/source/convert_argb.cc b/source/convert_argb.cc index f16c368d0..cc6560de6 100644 --- a/source/convert_argb.cc +++ b/source/convert_argb.cc @@ -5645,7 +5645,7 @@ int I420ToRGB565Dither(const uint8_t* src_y, const struct YuvConstants* yuvconstants, int width) = I422ToARGBRow_C; void (*ARGBToRGB565DitherRow)(const uint8_t* src_argb, uint8_t* dst_rgb, - const uint32_t dither4, int width) = + uint32_t dither4, int width) = ARGBToRGB565DitherRow_C; if (!src_y || !src_u || !src_v || !dst_rgb565 || width <= 0 || height == 0) { return -1; diff --git a/source/convert_from_argb.cc b/source/convert_from_argb.cc index b192a3b77..c3d037c47 100644 --- a/source/convert_from_argb.cc +++ b/source/convert_from_argb.cc @@ -1741,7 +1741,7 @@ int ARGBToRGB565Dither(const uint8_t* src_argb, int height) { int y; void (*ARGBToRGB565DitherRow)(const uint8_t* src_argb, uint8_t* dst_rgb, - const uint32_t dither4, int width) = + uint32_t dither4, int width) = ARGBToRGB565DitherRow_C; if (!src_argb || !dst_rgb565 || width <= 0 || height == 0) { return -1; diff --git a/source/rotate_mmi.cc b/source/rotate_mmi.cc deleted file mode 100644 index f8de60834..000000000 --- a/source/rotate_mmi.cc +++ /dev/null @@ -1,291 +0,0 @@ -/* - * Copyright 2011 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/rotate_row.h" -#include "libyuv/row.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -// This module is for Mips MMI. -#if !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A) - -void TransposeWx8_MMI(const uint8_t* src, - int src_stride, - uint8_t* dst, - int dst_stride, - int width) { - uint64_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6; - uint64_t tmp7, tmp8, tmp9, tmp10, tmp11, tmp12, tmp13; - uint8_t* src_tmp = nullptr; - - __asm__ volatile( - "1: \n\t" - "ldc1 %[tmp12], 0x00(%[src]) \n\t" - "dadd %[src_tmp], %[src], %[src_stride] \n\t" - "ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t" - - /* tmp0 = (00 10 01 11 02 12 03 13) */ - "punpcklbh %[tmp0], %[tmp12], %[tmp13] \n\t" - /* tmp1 = (04 14 05 15 06 16 07 17) */ - "punpckhbh %[tmp1], %[tmp12], %[tmp13] \n\t" - - "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t" - "ldc1 %[tmp12], 0x00(%[src_tmp]) \n\t" - "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t" - "ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t" - - /* tmp2 = (20 30 21 31 22 32 23 33) */ - "punpcklbh %[tmp2], %[tmp12], %[tmp13] \n\t" - /* tmp3 = (24 34 25 35 26 36 27 37) */ - "punpckhbh %[tmp3], %[tmp12], %[tmp13] \n\t" - - /* tmp4 = (00 10 20 30 01 11 21 31) */ - "punpcklhw %[tmp4], %[tmp0], %[tmp2] \n\t" - /* tmp5 = (02 12 22 32 03 13 23 33) */ - "punpckhhw %[tmp5], %[tmp0], %[tmp2] \n\t" - /* tmp6 = (04 14 24 34 05 15 25 35) */ - "punpcklhw %[tmp6], %[tmp1], %[tmp3] \n\t" - /* tmp7 = (06 16 26 36 07 17 27 37) */ - "punpckhhw %[tmp7], %[tmp1], %[tmp3] \n\t" - - "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t" - "ldc1 %[tmp12], 0x00(%[src_tmp]) \n\t" - "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t" - "ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t" - - /* tmp0 = (40 50 41 51 42 52 43 53) */ - "punpcklbh %[tmp0], %[tmp12], %[tmp13] \n\t" - /* tmp1 = (44 54 45 55 46 56 47 57) */ - "punpckhbh %[tmp1], %[tmp12], %[tmp13] \n\t" - - "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t" - "ldc1 %[tmp12], 0x00(%[src_tmp]) \n\t" - "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t" - "ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t" - - /* tmp2 = (60 70 61 71 62 72 63 73) */ - "punpcklbh %[tmp2], %[tmp12], %[tmp13] \n\t" - /* tmp3 = (64 74 65 75 66 76 67 77) */ - "punpckhbh %[tmp3], %[tmp12], %[tmp13] \n\t" - - /* tmp8 = (40 50 60 70 41 51 61 71) */ - "punpcklhw %[tmp8], %[tmp0], %[tmp2] \n\t" - /* tmp9 = (42 52 62 72 43 53 63 73) */ - "punpckhhw %[tmp9], %[tmp0], %[tmp2] \n\t" - /* tmp10 = (44 54 64 74 45 55 65 75) */ - "punpcklhw %[tmp10], %[tmp1], %[tmp3] \n\t" - /* tmp11 = (46 56 66 76 47 57 67 77) */ - "punpckhhw %[tmp11], %[tmp1], %[tmp3] \n\t" - - /* tmp0 = (00 10 20 30 40 50 60 70) */ - "punpcklwd %[tmp0], %[tmp4], %[tmp8] \n\t" - /* tmp1 = (01 11 21 31 41 51 61 71) */ - "punpckhwd %[tmp1], %[tmp4], %[tmp8] \n\t" - "gssdlc1 %[tmp0], 0x07(%[dst]) \n\t" - "gssdrc1 %[tmp0], 0x00(%[dst]) \n\t" - "dadd %[dst], %[dst], %[dst_stride] \n\t" - "gssdlc1 %[tmp1], 0x07(%[dst]) \n\t" - "gssdrc1 %[tmp1], 0x00(%[dst]) \n\t" - - /* tmp0 = (02 12 22 32 42 52 62 72) */ - "punpcklwd %[tmp0], %[tmp5], %[tmp9] \n\t" - /* tmp1 = (03 13 23 33 43 53 63 73) */ - "punpckhwd %[tmp1], %[tmp5], %[tmp9] \n\t" - "dadd %[dst], %[dst], %[dst_stride] \n\t" - "gssdlc1 %[tmp0], 0x07(%[dst]) \n\t" - "gssdrc1 %[tmp0], 0x00(%[dst]) \n\t" - "dadd %[dst], %[dst], %[dst_stride] \n\t" - "gssdlc1 %[tmp1], 0x07(%[dst]) \n\t" - "gssdrc1 %[tmp1], 0x00(%[dst]) \n\t" - - /* tmp0 = (04 14 24 34 44 54 64 74) */ - "punpcklwd %[tmp0], %[tmp6], %[tmp10] \n\t" - /* tmp1 = (05 15 25 35 45 55 65 75) */ - "punpckhwd %[tmp1], %[tmp6], %[tmp10] \n\t" - "dadd %[dst], %[dst], %[dst_stride] \n\t" - "gssdlc1 %[tmp0], 0x07(%[dst]) \n\t" - "gssdrc1 %[tmp0], 0x00(%[dst]) \n\t" - "dadd %[dst], %[dst], %[dst_stride] \n\t" - "gssdlc1 %[tmp1], 0x07(%[dst]) \n\t" - "gssdrc1 %[tmp1], 0x00(%[dst]) \n\t" - - /* tmp0 = (06 16 26 36 46 56 66 76) */ - "punpcklwd %[tmp0], %[tmp7], %[tmp11] \n\t" - /* tmp1 = (07 17 27 37 47 57 67 77) */ - "punpckhwd %[tmp1], %[tmp7], %[tmp11] \n\t" - "dadd %[dst], %[dst], %[dst_stride] \n\t" - "gssdlc1 %[tmp0], 0x07(%[dst]) \n\t" - "gssdrc1 %[tmp0], 0x00(%[dst]) \n\t" - "dadd %[dst], %[dst], %[dst_stride] \n\t" - "gssdlc1 %[tmp1], 0x07(%[dst]) \n\t" - "gssdrc1 %[tmp1], 0x00(%[dst]) \n\t" - - "dadd %[dst], %[dst], %[dst_stride] \n\t" - "daddi %[src], %[src], 0x08 \n\t" - "daddi %[width], %[width], -0x08 \n\t" - "bnez %[width], 1b \n\t" - - : [tmp0] "=&f"(tmp0), [tmp1] "=&f"(tmp1), [tmp2] "=&f"(tmp2), - [tmp3] "=&f"(tmp3), [tmp4] "=&f"(tmp4), [tmp5] "=&f"(tmp5), - [tmp6] "=&f"(tmp6), [tmp7] "=&f"(tmp7), [tmp8] "=&f"(tmp8), - [tmp9] "=&f"(tmp9), [tmp10] "=&f"(tmp10), [tmp11] "=&f"(tmp11), - [tmp12] "=&f"(tmp12), [tmp13] "=&f"(tmp13), [dst] "+&r"(dst), - [src_tmp] "+&r"(src_tmp) - : [src] "r"(src), [width] "r"(width), [src_stride] "r"(src_stride), - [dst_stride] "r"(dst_stride) - : "memory"); -} - -void TransposeUVWx8_MMI(const uint8_t* src, - int src_stride, - uint8_t* dst_a, - int dst_stride_a, - uint8_t* dst_b, - int dst_stride_b, - int width) { - uint64_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6; - uint64_t tmp7, tmp8, tmp9, tmp10, tmp11, tmp12, tmp13; - uint8_t* src_tmp = nullptr; - - __asm__ volatile( - "1: \n\t" - /* tmp12 = (u00 v00 u01 v01 u02 v02 u03 v03) */ - "ldc1 %[tmp12], 0x00(%[src]) \n\t" - "dadd %[src_tmp], %[src], %[src_stride] \n\t" - /* tmp13 = (u10 v10 u11 v11 u12 v12 u13 v13) */ - "ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t" - - /* tmp0 = (u00 u10 v00 v10 u01 u11 v01 v11) */ - "punpcklbh %[tmp0], %[tmp12], %[tmp13] \n\t" - /* tmp1 = (u02 u12 v02 v12 u03 u13 v03 v13) */ - "punpckhbh %[tmp1], %[tmp12], %[tmp13] \n\t" - - "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t" - /* tmp12 = (u20 v20 u21 v21 u22 v22 u23 v23) */ - "ldc1 %[tmp12], 0x00(%[src_tmp]) \n\t" - "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t" - /* tmp13 = (u30 v30 u31 v31 u32 v32 u33 v33) */ - "ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t" - - /* tmp2 = (u20 u30 v20 v30 u21 u31 v21 v31) */ - "punpcklbh %[tmp2], %[tmp12], %[tmp13] \n\t" - /* tmp3 = (u22 u32 v22 v32 u23 u33 v23 v33) */ - "punpckhbh %[tmp3], %[tmp12], %[tmp13] \n\t" - - /* tmp4 = (u00 u10 u20 u30 v00 v10 v20 v30) */ - "punpcklhw %[tmp4], %[tmp0], %[tmp2] \n\t" - /* tmp5 = (u01 u11 u21 u31 v01 v11 v21 v31) */ - "punpckhhw %[tmp5], %[tmp0], %[tmp2] \n\t" - /* tmp6 = (u02 u12 u22 u32 v02 v12 v22 v32) */ - "punpcklhw %[tmp6], %[tmp1], %[tmp3] \n\t" - /* tmp7 = (u03 u13 u23 u33 v03 v13 v23 v33) */ - "punpckhhw %[tmp7], %[tmp1], %[tmp3] \n\t" - - "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t" - /* tmp12 = (u40 v40 u41 v41 u42 v42 u43 v43) */ - "ldc1 %[tmp12], 0x00(%[src_tmp]) \n\t" - /* tmp13 = (u50 v50 u51 v51 u52 v52 u53 v53) */ - "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t" - "ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t" - - /* tmp0 = (u40 u50 v40 v50 u41 u51 v41 v51) */ - "punpcklbh %[tmp0], %[tmp12], %[tmp13] \n\t" - /* tmp1 = (u42 u52 v42 v52 u43 u53 v43 v53) */ - "punpckhbh %[tmp1], %[tmp12], %[tmp13] \n\t" - - "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t" - /* tmp12 = (u60 v60 u61 v61 u62 v62 u63 v63) */ - "ldc1 %[tmp12], 0x00(%[src_tmp]) \n\t" - /* tmp13 = (u70 v70 u71 v71 u72 v72 u73 v73) */ - "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t" - "ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t" - - /* tmp2 = (u60 u70 v60 v70 u61 u71 v61 v71) */ - "punpcklbh %[tmp2], %[tmp12], %[tmp13] \n\t" - /* tmp3 = (u62 u72 v62 v72 u63 u73 v63 v73) */ - "punpckhbh %[tmp3], %[tmp12], %[tmp13] \n\t" - - /* tmp8 = (u40 u50 u60 u70 v40 v50 v60 v70) */ - "punpcklhw %[tmp8], %[tmp0], %[tmp2] \n\t" - /* tmp9 = (u41 u51 u61 u71 v41 v51 v61 v71) */ - "punpckhhw %[tmp9], %[tmp0], %[tmp2] \n\t" - /* tmp10 = (u42 u52 u62 u72 v42 v52 v62 v72) */ - "punpcklhw %[tmp10], %[tmp1], %[tmp3] \n\t" - /* tmp11 = (u43 u53 u63 u73 v43 v53 v63 v73) */ - "punpckhhw %[tmp11], %[tmp1], %[tmp3] \n\t" - - /* tmp0 = (u00 u10 u20 u30 u40 u50 u60 u70) */ - "punpcklwd %[tmp0], %[tmp4], %[tmp8] \n\t" - /* tmp1 = (v00 v10 v20 v30 v40 v50 v60 v70) */ - "punpckhwd %[tmp1], %[tmp4], %[tmp8] \n\t" - "gssdlc1 %[tmp0], 0x07(%[dst_a]) \n\t" - "gssdrc1 %[tmp0], 0x00(%[dst_a]) \n\t" - "gssdlc1 %[tmp1], 0x07(%[dst_b]) \n\t" - "gssdrc1 %[tmp1], 0x00(%[dst_b]) \n\t" - - /* tmp0 = (u01 u11 u21 u31 u41 u51 u61 u71) */ - "punpcklwd %[tmp0], %[tmp5], %[tmp9] \n\t" - /* tmp1 = (v01 v11 v21 v31 v41 v51 v61 v71) */ - "punpckhwd %[tmp1], %[tmp5], %[tmp9] \n\t" - "dadd %[dst_a], %[dst_a], %[dst_stride_a] \n\t" - "gssdlc1 %[tmp0], 0x07(%[dst_a]) \n\t" - "gssdrc1 %[tmp0], 0x00(%[dst_a]) \n\t" - "dadd %[dst_b], %[dst_b], %[dst_stride_b] \n\t" - "gssdlc1 %[tmp1], 0x07(%[dst_b]) \n\t" - "gssdrc1 %[tmp1], 0x00(%[dst_b]) \n\t" - - /* tmp0 = (u02 u12 u22 u32 u42 u52 u62 u72) */ - "punpcklwd %[tmp0], %[tmp6], %[tmp10] \n\t" - /* tmp1 = (v02 v12 v22 v32 v42 v52 v62 v72) */ - "punpckhwd %[tmp1], %[tmp6], %[tmp10] \n\t" - "dadd %[dst_a], %[dst_a], %[dst_stride_a] \n\t" - "gssdlc1 %[tmp0], 0x07(%[dst_a]) \n\t" - "gssdrc1 %[tmp0], 0x00(%[dst_a]) \n\t" - "dadd %[dst_b], %[dst_b], %[dst_stride_b] \n\t" - "gssdlc1 %[tmp1], 0x07(%[dst_b]) \n\t" - "gssdrc1 %[tmp1], 0x00(%[dst_b]) \n\t" - - /* tmp0 = (u03 u13 u23 u33 u43 u53 u63 u73) */ - "punpcklwd %[tmp0], %[tmp7], %[tmp11] \n\t" - /* tmp1 = (v03 v13 v23 v33 v43 v53 v63 v73) */ - "punpckhwd %[tmp1], %[tmp7], %[tmp11] \n\t" - "dadd %[dst_a], %[dst_a], %[dst_stride_a] \n\t" - "gssdlc1 %[tmp0], 0x07(%[dst_a]) \n\t" - "gssdrc1 %[tmp0], 0x00(%[dst_a]) \n\t" - "dadd %[dst_b], %[dst_b], %[dst_stride_b] \n\t" - "gssdlc1 %[tmp1], 0x07(%[dst_b]) \n\t" - "gssdrc1 %[tmp1], 0x00(%[dst_b]) \n\t" - - "dadd %[dst_a], %[dst_a], %[dst_stride_a] \n\t" - "dadd %[dst_b], %[dst_b], %[dst_stride_b] \n\t" - "daddiu %[src], %[src], 0x08 \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - - : [tmp0] "=&f"(tmp0), [tmp1] "=&f"(tmp1), [tmp2] "=&f"(tmp2), - [tmp3] "=&f"(tmp3), [tmp4] "=&f"(tmp4), [tmp5] "=&f"(tmp5), - [tmp6] "=&f"(tmp6), [tmp7] "=&f"(tmp7), [tmp8] "=&f"(tmp8), - [tmp9] "=&f"(tmp9), [tmp10] "=&f"(tmp10), [tmp11] "=&f"(tmp11), - [tmp12] "=&f"(tmp12), [tmp13] "=&f"(tmp13), [dst_a] "+&r"(dst_a), - [dst_b] "+&r"(dst_b), [src_tmp] "+&r"(src_tmp) - : [src] "r"(src), [width] "r"(width), [dst_stride_a] "r"(dst_stride_a), - [dst_stride_b] "r"(dst_stride_b), [src_stride] "r"(src_stride) - : "memory"); -} - -#endif // !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A) - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif diff --git a/source/row_common.cc b/source/row_common.cc index b7655b7b1..8be37fb58 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -342,7 +342,7 @@ void ARGBToRGB565Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { // or the upper byte for big endian. void ARGBToRGB565DitherRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, - const uint32_t dither4, + uint32_t dither4, int width) { int x; for (x = 0; x < width - 1; x += 2) { diff --git a/source/row_gcc.cc b/source/row_gcc.cc index aa4c0d11e..e94fd04df 100644 --- a/source/row_gcc.cc +++ b/source/row_gcc.cc @@ -738,7 +738,7 @@ void ARGBToRGB565Row_SSE2(const uint8_t* src, uint8_t* dst, int width) { void ARGBToRGB565DitherRow_SSE2(const uint8_t* src, uint8_t* dst, - const uint32_t dither4, + uint32_t dither4, int width) { asm volatile( "movd %3,%%xmm6 \n" @@ -786,7 +786,7 @@ void ARGBToRGB565DitherRow_SSE2(const uint8_t* src, #ifdef HAS_ARGBTORGB565DITHERROW_AVX2 void ARGBToRGB565DitherRow_AVX2(const uint8_t* src, uint8_t* dst, - const uint32_t dither4, + uint32_t dither4, int width) { asm volatile( "vbroadcastss %3,%%xmm6 \n" diff --git a/source/row_lasx.cc b/source/row_lasx.cc index 29ac9254d..1082ad80b 100644 --- a/source/row_lasx.cc +++ b/source/row_lasx.cc @@ -1182,7 +1182,7 @@ void ARGBAttenuateRow_LASX(const uint8_t* src_argb, void ARGBToRGB565DitherRow_LASX(const uint8_t* src_argb, uint8_t* dst_rgb, - const uint32_t dither4, + uint32_t dither4, int width) { int x; int len = width / 16; diff --git a/source/row_lsx.cc b/source/row_lsx.cc index 573fc94de..e626072a9 100644 --- a/source/row_lsx.cc +++ b/source/row_lsx.cc @@ -794,10 +794,10 @@ void ARGBToUVRow_LSX(const uint8_t* src_argb0, __m128i const_0x12 = {0x0009000900090009, 0x0009000900090009}; __m128i const_0x8080 = {0x8080808080808080, 0x8080808080808080}; for (x = 0; x < len; x++) { - DUP4_ARG2(__lsx_vld, src_argb0, 0, src_argb0, 16, src_argb0, 32, - src_argb0, 48, src0, src1, src2, src3); - DUP4_ARG2(__lsx_vld, src_argb1, 0, src_argb1, 16, src_argb1, 32, - src_argb1, 48, src4, src5, src6, src7); + DUP4_ARG2(__lsx_vld, src_argb0, 0, src_argb0, 16, src_argb0, 32, src_argb0, + 48, src0, src1, src2, src3); + DUP4_ARG2(__lsx_vld, src_argb1, 0, src_argb1, 16, src_argb1, 32, src_argb1, + 48, src4, src5, src6, src7); vec0 = __lsx_vaddwev_h_bu(src0, src4); vec1 = __lsx_vaddwev_h_bu(src1, src5); vec2 = __lsx_vaddwev_h_bu(src2, src6); @@ -846,8 +846,8 @@ void ARGBToRGB24Row_LSX(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { __m128i tmp0, tmp1, tmp2, tmp3; __m128i shuf = {0x0908060504020100, 0x000000000E0D0C0A}; for (x = 0; x < len; x++) { - DUP4_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src_argb, 32, src_argb, - 48, src0, src1, src2, src3); + DUP4_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src_argb, 32, src_argb, 48, + src0, src1, src2, src3); tmp0 = __lsx_vshuf_b(src0, src0, shuf); tmp1 = __lsx_vshuf_b(src1, src1, shuf); tmp2 = __lsx_vshuf_b(src2, src2, shuf); @@ -879,8 +879,8 @@ void ARGBToRAWRow_LSX(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { __m128i tmp0, tmp1, tmp2, tmp3; __m128i shuf = {0x090A040506000102, 0x000000000C0D0E08}; for (x = 0; x < len; x++) { - DUP4_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src_argb, 32, src_argb, - 48, src0, src1, src2, src3); + DUP4_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src_argb, 32, src_argb, 48, + src0, src1, src2, src3); tmp0 = __lsx_vshuf_b(src0, src0, shuf); tmp1 = __lsx_vshuf_b(src1, src1, shuf); tmp2 = __lsx_vshuf_b(src2, src2, shuf); @@ -905,9 +905,7 @@ void ARGBToRAWRow_LSX(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { __lsx_vst(tmp3, dst_rgb, 0); } -void ARGBToRGB565Row_LSX(const uint8_t* src_argb, - uint8_t* dst_rgb, - int width) { +void ARGBToRGB565Row_LSX(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { int x; int len = width / 8; __m128i zero = __lsx_vldi(0); @@ -995,8 +993,8 @@ void ARGBToUV444Row_LSX(const uint8_t* src_argb, __m128i const_18 = __lsx_vldi(18); __m128i const_0x8080 = {0x8080808080808080, 0x8080808080808080}; for (x = 0; x < len; x++) { - DUP4_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src_argb, 32, src_argb, - 48, src0, src1, src2, src3); + DUP4_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src_argb, 32, src_argb, 48, + src0, src1, src2, src3); tmp0 = __lsx_vpickev_h(src1, src0); tmp1 = __lsx_vpickod_h(src1, src0); tmp2 = __lsx_vpickev_h(src3, src2); @@ -1138,7 +1136,7 @@ void ARGBAttenuateRow_LSX(const uint8_t* src_argb, void ARGBToRGB565DitherRow_LSX(const uint8_t* src_argb, uint8_t* dst_rgb, - const uint32_t dither4, + uint32_t dither4, int width) { int x; int len = width / 8; diff --git a/source/row_mmi.cc b/source/row_mmi.cc deleted file mode 100644 index 362fd1cfc..000000000 --- a/source/row_mmi.cc +++ /dev/null @@ -1,7842 +0,0 @@ -/* - * Copyright 2011 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ -#include "libyuv/row.h" - -#include // For memcpy and memset. - -#include "libyuv/basic_types.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -// This module is for Mips MMI. -#if !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A) - -// clang-format off - -void RGB24ToARGBRow_MMI(const uint8_t* src_rgb24, - uint8_t* dst_argb, - int width) { - uint64_t src0, src1, dest; - const uint64_t mask = 0xff000000ULL; - - __asm__ volatile( - "1: \n\t" - "gslwlc1 %[src0], 0x03(%[src_ptr]) \n\t" - "gslwrc1 %[src0], 0x00(%[src_ptr]) \n\t" - "gslwlc1 %[src1], 0x06(%[src_ptr]) \n\t" - "gslwrc1 %[src1], 0x03(%[src_ptr]) \n\t" - - "or %[src0], %[src0], %[mask] \n\t" - "or %[src1], %[src1], %[mask] \n\t" - "punpcklwd %[dest], %[src0], %[src1] \n\t" - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "gslwlc1 %[src0], 0x09(%[src_ptr]) \n\t" - "gslwrc1 %[src0], 0x06(%[src_ptr]) \n\t" - "gslwlc1 %[src1], 0x0c(%[src_ptr]) \n\t" - "gslwrc1 %[src1], 0x09(%[src_ptr]) \n\t" - - "or %[src0], %[src0], %[mask] \n\t" - "or %[src1], %[src1], %[mask] \n\t" - "punpcklwd %[dest], %[src0], %[src1] \n\t" - "gssdlc1 %[dest], 0x0f(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x08(%[dst_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 0x0c \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest) - : [src_ptr] "r"(src_rgb24), [dst_ptr] "r"(dst_argb), [width] "r"(width), - [mask] "f"(mask) - : "memory"); -} - -void RAWToARGBRow_MMI(const uint8_t* src_raw, uint8_t* dst_argb, int width) { - uint64_t src0, src1, dest; - const uint64_t mask0 = 0x0; - const uint64_t mask1 = 0xff000000ULL; - const uint64_t mask2 = 0xc6; - - __asm__ volatile( - "1: \n\t" - "gslwlc1 %[src0], 0x03(%[src_ptr]) \n\t" - "gslwrc1 %[src0], 0x00(%[src_ptr]) \n\t" - "gslwlc1 %[src1], 0x06(%[src_ptr]) \n\t" - "gslwrc1 %[src1], 0x03(%[src_ptr]) \n\t" - - "or %[src0], %[src0], %[mask1] \n\t" - "punpcklbh %[src0], %[src0], %[mask0] \n\t" - "pshufh %[src0], %[src0], %[mask2] \n\t" - "or %[src1], %[src1], %[mask1] \n\t" - "punpcklbh %[src1], %[src1], %[mask0] \n\t" - "pshufh %[src1], %[src1], %[mask2] \n\t" - "packushb %[dest], %[src0], %[src1] \n\t" - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "gslwlc1 %[src0], 0x09(%[src_ptr]) \n\t" - "gslwrc1 %[src0], 0x06(%[src_ptr]) \n\t" - "gslwlc1 %[src1], 0x0c(%[src_ptr]) \n\t" - "gslwrc1 %[src1], 0x09(%[src_ptr]) \n\t" - - "or %[src0], %[src0], %[mask1] \n\t" - "punpcklbh %[src0], %[src0], %[mask0] \n\t" - "pshufh %[src0], %[src0], %[mask2] \n\t" - "or %[src1], %[src1], %[mask1] \n\t" - "punpcklbh %[src1], %[src1], %[mask0] \n\t" - "pshufh %[src1], %[src1], %[mask2] \n\t" - "packushb %[dest], %[src0], %[src1] \n\t" - "gssdlc1 %[dest], 0x0f(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x08(%[dst_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 0x0c \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest) - : [src_ptr] "r"(src_raw), [dst_ptr] "r"(dst_argb), [mask0] "f"(mask0), - [mask1] "f"(mask1), [mask2] "f"(mask2), [width] "r"(width) - : "memory"); -} - -void RAWToRGB24Row_MMI(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) { - uint64_t src0, src1; - uint64_t ftmp[4]; - uint64_t mask0 = 0xc6; - uint64_t mask1 = 0x6c; - - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[src0], 0x00(%[src_raw]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_raw]) \n\t" - "gslwrc1 %[src1], 0x08(%[src_raw]) \n\t" - "gslwlc1 %[src1], 0x0b(%[src_raw]) \n\t" - - "punpcklbh %[ftmp0], %[src0], %[zero] \n\t" - "pshufh %[ftmp0], %[ftmp0], %[mask0] \n\t" - "punpckhbh %[ftmp1], %[src0], %[zero] \n\t" - "punpcklbh %[src1], %[src1], %[zero] \n\t" - "pextrh %[ftmp2], %[ftmp0], %[three] \n\t" - "pextrh %[ftmp3], %[ftmp1], %[one] \n\t" - "pinsrh_3 %[ftmp0], %[ftmp0], %[ftmp3] \n\t" - "pextrh %[ftmp3], %[ftmp1], %[two] \n\t" - "pinsrh_1 %[ftmp1], %[ftmp1], %[ftmp2] \n\t" - "pshufh %[src1], %[src1], %[mask1] \n\t" - "pextrh %[ftmp2], %[src1], %[zero] \n\t" - "pinsrh_2 %[ftmp1], %[ftmp1], %[ftmp2] \n\t" - "pinsrh_0 %[src1], %[src1], %[ftmp3] \n\t" - "packushb %[ftmp0], %[ftmp0], %[ftmp1] \n\t" - "packushb %[src1], %[src1], %[zero] \n\t" - - "gssdrc1 %[ftmp0], 0x00(%[dst_rgb24]) \n\t" - "gssdlc1 %[ftmp0], 0x07(%[dst_rgb24]) \n\t" - "gsswrc1 %[src1], 0x08(%[dst_rgb24]) \n\t" - "gsswlc1 %[src1], 0x0b(%[dst_rgb24]) \n\t" - - "daddiu %[src_raw], %[src_raw], 0x0c \n\t" - "daddiu %[dst_rgb24], %[dst_rgb24], 0x0c \n\t" - "daddiu %[width], %[width], -0x04 \n\t" - "bgtz %[width], 1b \n\t" - : [src0] "=&f"(src0), [src1] "=&f"(src1), [ftmp0] "=&f"(ftmp[0]), - [ftmp1] "=&f"(ftmp[1]), [ftmp2] "=&f"(ftmp[2]), [ftmp3] "=&f"(ftmp[3]) - : [src_raw] "r"(src_raw), [dst_rgb24] "r"(dst_rgb24), [width] "r"(width), - [mask0] "f"(mask0), [mask1] "f"(mask1), [zero] "f"(0x00), - [one] "f"(0x01), [two] "f"(0x02), [three] "f"(0x03) - : "memory"); -} - -void RGB565ToARGBRow_MMI(const uint8_t* src_rgb565, - uint8_t* dst_argb, - int width) { - uint64_t ftmp[5]; - uint64_t c0 = 0x001f001f001f001f; - uint64_t c1 = 0x00ff00ff00ff00ff; - uint64_t c2 = 0x0007000700070007; - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[src0], 0x00(%[src_rgb565]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_rgb565]) \n\t" - "psrlh %[src1], %[src0], %[eight] \n\t" - "and %[b], %[src0], %[c0] \n\t" - "and %[src0], %[src0], %[c1] \n\t" - "psrlh %[src0], %[src0], %[five] \n\t" - "and %[g], %[src1], %[c2] \n\t" - "psllh %[g], %[g], %[three] \n\t" - "or %[g], %[src0], %[g] \n\t" - "psrlh %[r], %[src1], %[three] \n\t" - "psllh %[src0], %[b], %[three] \n\t" - "psrlh %[src1], %[b], %[two] \n\t" - "or %[b], %[src0], %[src1] \n\t" - "psllh %[src0], %[g], %[two] \n\t" - "psrlh %[src1], %[g], %[four] \n\t" - "or %[g], %[src0], %[src1] \n\t" - "psllh %[src0], %[r], %[three] \n\t" - "psrlh %[src1], %[r], %[two] \n\t" - "or %[r], %[src0], %[src1] \n\t" - "packushb %[b], %[b], %[r] \n\t" - "packushb %[g], %[g], %[c1] \n\t" - "punpcklbh %[src0], %[b], %[g] \n\t" - "punpckhbh %[src1], %[b], %[g] \n\t" - "punpcklhw %[r], %[src0], %[src1] \n\t" - "gssdrc1 %[r], 0x00(%[dst_argb]) \n\t" - "gssdlc1 %[r], 0x07(%[dst_argb]) \n\t" - "punpckhhw %[r], %[src0], %[src1] \n\t" - "gssdrc1 %[r], 0x08(%[dst_argb]) \n\t" - "gssdlc1 %[r], 0x0f(%[dst_argb]) \n\t" - "daddiu %[src_rgb565], %[src_rgb565], 0x08 \n\t" - "daddiu %[dst_argb], %[dst_argb], 0x10 \n\t" - "daddiu %[width], %[width], -0x04 \n\t" - "bgtz %[width], 1b \n\t" - : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [b] "=&f"(ftmp[2]), - [g] "=&f"(ftmp[3]), [r] "=&f"(ftmp[4]) - : [src_rgb565] "r"(src_rgb565), [dst_argb] "r"(dst_argb), - [width] "r"(width), [c0] "f"(c0), [c1] "f"(c1), [c2] "f"(c2), - [eight] "f"(0x08), [five] "f"(0x05), [three] "f"(0x03), [two] "f"(0x02), - [four] "f"(0x04) - : "memory"); -} - -void ARGB1555ToARGBRow_MMI(const uint8_t* src_argb1555, - uint8_t* dst_argb, - int width) { - uint64_t ftmp[6]; - uint64_t c0 = 0x001f001f001f001f; - uint64_t c1 = 0x00ff00ff00ff00ff; - uint64_t c2 = 0x0003000300030003; - uint64_t c3 = 0x007c007c007c007c; - uint64_t c4 = 0x0001000100010001; - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[src0], 0x00(%[src_argb1555]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_argb1555]) \n\t" - "psrlh %[src1], %[src0], %[eight] \n\t" - "and %[b], %[src0], %[c0] \n\t" - "and %[src0], %[src0], %[c1] \n\t" - "psrlh %[src0], %[src0], %[five] \n\t" - "and %[g], %[src1], %[c2] \n\t" - "psllh %[g], %[g], %[three] \n\t" - "or %[g], %[src0], %[g] \n\t" - "and %[r], %[src1], %[c3] \n\t" - "psrlh %[r], %[r], %[two] \n\t" - "psrlh %[a], %[src1], %[seven] \n\t" - "psllh %[src0], %[b], %[three] \n\t" - "psrlh %[src1], %[b], %[two] \n\t" - "or %[b], %[src0], %[src1] \n\t" - "psllh %[src0], %[g], %[three] \n\t" - "psrlh %[src1], %[g], %[two] \n\t" - "or %[g], %[src0], %[src1] \n\t" - "psllh %[src0], %[r], %[three] \n\t" - "psrlh %[src1], %[r], %[two] \n\t" - "or %[r], %[src0], %[src1] \n\t" - "xor %[a], %[a], %[c1] \n\t" - "paddb %[a], %[a], %[c4] \n\t" - "packushb %[b], %[b], %[r] \n\t" - "packushb %[g], %[g], %[a] \n\t" - "punpcklbh %[src0], %[b], %[g] \n\t" - "punpckhbh %[src1], %[b], %[g] \n\t" - "punpcklhw %[r], %[src0], %[src1] \n\t" - "gssdrc1 %[r], 0x00(%[dst_argb]) \n\t" - "gssdlc1 %[r], 0x07(%[dst_argb]) \n\t" - "punpckhhw %[r], %[src0], %[src1] \n\t" - "gssdrc1 %[r], 0x08(%[dst_argb]) \n\t" - "gssdlc1 %[r], 0x0f(%[dst_argb]) \n\t" - "daddiu %[src_argb1555], %[src_argb1555], 0x08 \n\t" - "daddiu %[dst_argb], %[dst_argb], 0x10 \n\t" - "daddiu %[width], %[width], -0x04 \n\t" - "bgtz %[width], 1b \n\t" - : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [b] "=&f"(ftmp[2]), - [g] "=&f"(ftmp[3]), [r] "=&f"(ftmp[4]), [a] "=&f"(ftmp[5]) - : [src_argb1555] "r"(src_argb1555), [dst_argb] "r"(dst_argb), - [width] "r"(width), [c0] "f"(c0), [c1] "f"(c1), [c2] "f"(c2), - [c3] "f"(c3), [c4] "f"(c4), [eight] "f"(0x08), [five] "f"(0x05), - [three] "f"(0x03), [two] "f"(0x02), [seven] "f"(0x07) - : "memory"); -} - -void ARGB4444ToARGBRow_MMI(const uint8_t* src_argb4444, - uint8_t* dst_argb, - int width) { - uint64_t ftmp[6]; - uint64_t c0 = 0x000f000f000f000f; - uint64_t c1 = 0x00ff00ff00ff00ff; - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[src0], 0x00(%[src_argb4444]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_argb4444]) \n\t" - "psrlh %[src1], %[src0], %[eight] \n\t" - "and %[b], %[src0], %[c0] \n\t" - "and %[src0], %[src0], %[c1] \n\t" - "psrlh %[g], %[src0], %[four] \n\t" - "and %[r], %[src1], %[c0] \n\t" - "psrlh %[a], %[src1], %[four] \n\t" - "psllh %[src0], %[b], %[four] \n\t" - "or %[b], %[src0], %[b] \n\t" - "psllh %[src0], %[g], %[four] \n\t" - "or %[g], %[src0], %[g] \n\t" - "psllh %[src0], %[r], %[four] \n\t" - "or %[r], %[src0], %[r] \n\t" - "psllh %[src0], %[a], %[four] \n\t" - "or %[a], %[src0], %[a] \n\t" - "packushb %[b], %[b], %[r] \n\t" - "packushb %[g], %[g], %[a] \n\t" - "punpcklbh %[src0], %[b], %[g] \n\t" - "punpckhbh %[src1], %[b], %[g] \n\t" - "punpcklhw %[r], %[src0], %[src1] \n\t" - "gssdrc1 %[r], 0x00(%[dst_argb]) \n\t" - "gssdlc1 %[r], 0x07(%[dst_argb]) \n\t" - "punpckhhw %[r], %[src0], %[src1] \n\t" - "gssdrc1 %[r], 0x08(%[dst_argb]) \n\t" - "gssdlc1 %[r], 0x0f(%[dst_argb]) \n\t" - "daddiu %[src_argb4444], %[src_argb4444], 0x08 \n\t" - "daddiu %[dst_argb], %[dst_argb], 0x10 \n\t" - "daddiu %[width], %[width], -0x04 \n\t" - "bgtz %[width], 1b \n\t" - : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [b] "=&f"(ftmp[2]), - [g] "=&f"(ftmp[3]), [r] "=&f"(ftmp[4]), [a] "=&f"(ftmp[5]) - : [src_argb4444] "r"(src_argb4444), [dst_argb] "r"(dst_argb), - [width] "r"(width), [c0] "f"(c0), [c1] "f"(c1), [eight] "f"(0x08), - [four] "f"(0x04) - : "memory"); -} - -void ARGBToRGB24Row_MMI(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { - uint64_t src; - - __asm__ volatile( - "1: \n\t" - "gslwlc1 %[src], 0x03(%[src_ptr]) \n\t" - "gslwrc1 %[src], 0x00(%[src_ptr]) \n\t" - "gsswlc1 %[src], 0x03(%[dst_ptr]) \n\t" - "gsswrc1 %[src], 0x00(%[dst_ptr]) \n\t" - - "gslwlc1 %[src], 0x07(%[src_ptr]) \n\t" - "gslwrc1 %[src], 0x04(%[src_ptr]) \n\t" - "gsswlc1 %[src], 0x06(%[dst_ptr]) \n\t" - "gsswrc1 %[src], 0x03(%[dst_ptr]) \n\t" - - "gslwlc1 %[src], 0x0b(%[src_ptr]) \n\t" - "gslwrc1 %[src], 0x08(%[src_ptr]) \n\t" - "gsswlc1 %[src], 0x09(%[dst_ptr]) \n\t" - "gsswrc1 %[src], 0x06(%[dst_ptr]) \n\t" - - "gslwlc1 %[src], 0x0f(%[src_ptr]) \n\t" - "gslwrc1 %[src], 0x0c(%[src_ptr]) \n\t" - "gsswlc1 %[src], 0x0c(%[dst_ptr]) \n\t" - "gsswrc1 %[src], 0x09(%[dst_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 0x10 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x0c \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - : [src] "=&f"(src) - : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_rgb), [width] "r"(width) - : "memory"); -} - -void ARGBToRAWRow_MMI(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { - uint64_t src0, src1; - uint64_t ftmp[3]; - uint64_t mask0 = 0xc6; - uint64_t mask1 = 0x18; - - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[src0], 0x00(%[src_argb]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_argb]) \n\t" - "gsldrc1 %[src1], 0x08(%[src_argb]) \n\t" - "gsldlc1 %[src1], 0x0f(%[src_argb]) \n\t" - - "punpcklbh %[ftmp0], %[src0], %[zero] \n\t" - "pshufh %[ftmp0], %[ftmp0], %[mask0] \n\t" - "punpckhbh %[ftmp1], %[src0], %[zero] \n\t" - "punpcklbh %[ftmp2], %[src1], %[zero] \n\t" - "punpckhbh %[src1], %[src1], %[zero] \n\t" - - "pextrh %[src0], %[ftmp1], %[two] \n\t" - "pinsrh_3 %[ftmp0], %[ftmp0], %[src0] \n\t" - "pshufh %[ftmp1], %[ftmp1], %[one] \n\t" - - "pextrh %[src0], %[ftmp2], %[two] \n\t" - "pinsrh_2 %[ftmp1], %[ftmp1], %[src0] \n\t" - "pextrh %[src0], %[ftmp2], %[one] \n\t" - "pinsrh_3 %[ftmp1], %[ftmp1], %[src0] \n\t" - "pextrh %[src0], %[ftmp2], %[zero] \n\t" - "pshufh %[src1], %[src1], %[mask1] \n\t" - "pinsrh_0 %[src1], %[src1], %[src0] \n\t" - "packushb %[ftmp0], %[ftmp0], %[ftmp1] \n\t" - "packushb %[src1], %[src1], %[zero] \n\t" - - "gssdrc1 %[ftmp0], 0x00(%[dst_rgb]) \n\t" - "gssdlc1 %[ftmp0], 0x07(%[dst_rgb]) \n\t" - "gsswrc1 %[src1], 0x08(%[dst_rgb]) \n\t" - "gsswlc1 %[src1], 0x0b(%[dst_rgb]) \n\t" - - "daddiu %[src_argb], %[src_argb], 0x10 \n\t" - "daddiu %[dst_rgb], %[dst_rgb], 0x0c \n\t" - "daddiu %[width], %[width], -0x04 \n\t" - "bgtz %[width], 1b \n\t" - : [src0] "=&f"(src0), [src1] "=&f"(src1), [ftmp0] "=&f"(ftmp[0]), - [ftmp1] "=&f"(ftmp[1]), [ftmp2] "=&f"(ftmp[2]) - : [src_argb] "r"(src_argb), [dst_rgb] "r"(dst_rgb), [width] "r"(width), - [mask0] "f"(mask0), [mask1] "f"(mask1), [zero] "f"(0x00), - [one] "f"(0x01), [two] "f"(0x02) - : "memory"); -} - -void ARGBToRGB565Row_MMI(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { - uint64_t src0, src1; - uint64_t ftmp[3]; - - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[src0], 0x00(%[src_argb]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_argb]) \n\t" - "gsldrc1 %[src1], 0x08(%[src_argb]) \n\t" - "gsldlc1 %[src1], 0x0f(%[src_argb]) \n\t" - - "punpcklbh %[b], %[src0], %[src1] \n\t" - "punpckhbh %[g], %[src0], %[src1] \n\t" - "punpcklbh %[src0], %[b], %[g] \n\t" - "punpckhbh %[src1], %[b], %[g] \n\t" - "punpcklbh %[b], %[src0], %[zero] \n\t" - "punpckhbh %[g], %[src0], %[zero] \n\t" - "punpcklbh %[r], %[src1], %[zero] \n\t" - - "psrlh %[b], %[b], %[three] \n\t" - "psrlh %[g], %[g], %[two] \n\t" - "psrlh %[r], %[r], %[three] \n\t" - - "psllh %[g], %[g], %[five] \n\t" - "psllh %[r], %[r], %[eleven] \n\t" - "or %[b], %[b], %[g] \n\t" - "or %[b], %[b], %[r] \n\t" - - "gssdrc1 %[b], 0x00(%[dst_rgb]) \n\t" - "gssdlc1 %[b], 0x07(%[dst_rgb]) \n\t" - - "daddiu %[src_argb], %[src_argb], 0x10 \n\t" - "daddiu %[dst_rgb], %[dst_rgb], 0x08 \n\t" - "daddiu %[width], %[width], -0x04 \n\t" - "bgtz %[width], 1b \n\t" - : [src0] "=&f"(src0), [src1] "=&f"(src1), [b] "=&f"(ftmp[0]), - [g] "=&f"(ftmp[1]), [r] "=&f"(ftmp[2]) - : [src_argb] "r"(src_argb), [dst_rgb] "r"(dst_rgb), [width] "r"(width), - [zero] "f"(0x00), [two] "f"(0x02), [three] "f"(0x03), [five] "f"(0x05), - [eleven] "f"(0x0b) - : "memory"); -} - -// dither4 is a row of 4 values from 4x4 dither matrix. -// The 4x4 matrix contains values to increase RGB. When converting to -// fewer bits (565) this provides an ordered dither. -// The order in the 4x4 matrix in first byte is upper left. -// The 4 values are passed as an int, then referenced as an array, so -// endian will not affect order of the original matrix. But the dither4 -// will containing the first pixel in the lower byte for little endian -// or the upper byte for big endian. -void ARGBToRGB565DitherRow_MMI(const uint8_t* src_argb, - uint8_t* dst_rgb, - const uint32_t dither4, - int width) { - uint64_t src0, src1; - uint64_t ftmp[3]; - uint64_t c0 = 0x00ff00ff00ff00ff; - - __asm__ volatile( - "punpcklbh %[dither], %[dither], %[zero] \n\t" - "1: \n\t" - "gsldrc1 %[src0], 0x00(%[src_argb]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_argb]) \n\t" - "gsldrc1 %[src1], 0x08(%[src_argb]) \n\t" - "gsldlc1 %[src1], 0x0f(%[src_argb]) \n\t" - - "punpcklbh %[b], %[src0], %[src1] \n\t" - "punpckhbh %[g], %[src0], %[src1] \n\t" - "punpcklbh %[src0], %[b], %[g] \n\t" - "punpckhbh %[src1], %[b], %[g] \n\t" - "punpcklbh %[b], %[src0], %[zero] \n\t" - "punpckhbh %[g], %[src0], %[zero] \n\t" - "punpcklbh %[r], %[src1], %[zero] \n\t" - - "paddh %[b], %[b], %[dither] \n\t" - "paddh %[g], %[g], %[dither] \n\t" - "paddh %[r], %[r], %[dither] \n\t" - "pcmpgth %[src0], %[b], %[c0] \n\t" - "or %[src0], %[src0], %[b] \n\t" - "and %[b], %[src0], %[c0] \n\t" - "pcmpgth %[src0], %[g], %[c0] \n\t" - "or %[src0], %[src0], %[g] \n\t" - "and %[g], %[src0], %[c0] \n\t" - "pcmpgth %[src0], %[r], %[c0] \n\t" - "or %[src0], %[src0], %[r] \n\t" - "and %[r], %[src0], %[c0] \n\t" - - "psrlh %[b], %[b], %[three] \n\t" - "psrlh %[g], %[g], %[two] \n\t" - "psrlh %[r], %[r], %[three] \n\t" - - "psllh %[g], %[g], %[five] \n\t" - "psllh %[r], %[r], %[eleven] \n\t" - "or %[b], %[b], %[g] \n\t" - "or %[b], %[b], %[r] \n\t" - - "gssdrc1 %[b], 0x00(%[dst_rgb]) \n\t" - "gssdlc1 %[b], 0x07(%[dst_rgb]) \n\t" - - "daddiu %[src_argb], %[src_argb], 0x10 \n\t" - "daddiu %[dst_rgb], %[dst_rgb], 0x08 \n\t" - "daddiu %[width], %[width], -0x04 \n\t" - "bgtz %[width], 1b \n\t" - : [src0] "=&f"(src0), [src1] "=&f"(src1), [b] "=&f"(ftmp[0]), - [g] "=&f"(ftmp[1]), [r] "=&f"(ftmp[2]) - : [src_argb] "r"(src_argb), [dst_rgb] "r"(dst_rgb), [width] "r"(width), - [dither] "f"(dither4), [c0] "f"(c0), [zero] "f"(0x00), [two] "f"(0x02), - [three] "f"(0x03), [five] "f"(0x05), [eleven] "f"(0x0b) - : "memory"); -} - -void ARGBToARGB1555Row_MMI(const uint8_t* src_argb, - uint8_t* dst_rgb, - int width) { - uint64_t src0, src1; - uint64_t ftmp[4]; - - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[src0], 0x00(%[src_argb]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_argb]) \n\t" - "gsldrc1 %[src1], 0x08(%[src_argb]) \n\t" - "gsldlc1 %[src1], 0x0f(%[src_argb]) \n\t" - - "punpcklbh %[b], %[src0], %[src1] \n\t" - "punpckhbh %[g], %[src0], %[src1] \n\t" - "punpcklbh %[src0], %[b], %[g] \n\t" - "punpckhbh %[src1], %[b], %[g] \n\t" - "punpcklbh %[b], %[src0], %[zero] \n\t" - "punpckhbh %[g], %[src0], %[zero] \n\t" - "punpcklbh %[r], %[src1], %[zero] \n\t" - "punpckhbh %[a], %[src1], %[zero] \n\t" - - "psrlh %[b], %[b], %[three] \n\t" - "psrlh %[g], %[g], %[three] \n\t" - "psrlh %[r], %[r], %[three] \n\t" - "psrlh %[a], %[a], %[seven] \n\t" - - "psllh %[g], %[g], %[five] \n\t" - "psllh %[r], %[r], %[ten] \n\t" - "psllh %[a], %[a], %[fifteen] \n\t" - "or %[b], %[b], %[g] \n\t" - "or %[b], %[b], %[r] \n\t" - "or %[b], %[b], %[a] \n\t" - - "gssdrc1 %[b], 0x00(%[dst_rgb]) \n\t" - "gssdlc1 %[b], 0x07(%[dst_rgb]) \n\t" - - "daddiu %[src_argb], %[src_argb], 0x10 \n\t" - "daddiu %[dst_rgb], %[dst_rgb], 0x08 \n\t" - "daddiu %[width], %[width], -0x04 \n\t" - "bgtz %[width], 1b \n\t" - : [src0] "=&f"(src0), [src1] "=&f"(src1), [b] "=&f"(ftmp[0]), - [g] "=&f"(ftmp[1]), [r] "=&f"(ftmp[2]), [a] "=&f"(ftmp[3]) - : [src_argb] "r"(src_argb), [dst_rgb] "r"(dst_rgb), [width] "r"(width), - [zero] "f"(0x00), [three] "f"(0x03), [five] "f"(0x05), - [seven] "f"(0x07), [ten] "f"(0x0a), [fifteen] "f"(0x0f) - : "memory"); -} - -void ARGBToARGB4444Row_MMI(const uint8_t* src_argb, - uint8_t* dst_rgb, - int width) { - uint64_t src0, src1; - uint64_t ftmp[4]; - - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[src0], 0x00(%[src_argb]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_argb]) \n\t" - "gsldrc1 %[src1], 0x08(%[src_argb]) \n\t" - "gsldlc1 %[src1], 0x0f(%[src_argb]) \n\t" - - "punpcklbh %[b], %[src0], %[src1] \n\t" - "punpckhbh %[g], %[src0], %[src1] \n\t" - "punpcklbh %[src0], %[b], %[g] \n\t" - "punpckhbh %[src1], %[b], %[g] \n\t" - "punpcklbh %[b], %[src0], %[zero] \n\t" - "punpckhbh %[g], %[src0], %[zero] \n\t" - "punpcklbh %[r], %[src1], %[zero] \n\t" - "punpckhbh %[a], %[src1], %[zero] \n\t" - - "psrlh %[b], %[b], %[four] \n\t" - "psrlh %[g], %[g], %[four] \n\t" - "psrlh %[r], %[r], %[four] \n\t" - "psrlh %[a], %[a], %[four] \n\t" - - "psllh %[g], %[g], %[four] \n\t" - "psllh %[r], %[r], %[eight] \n\t" - "psllh %[a], %[a], %[twelve] \n\t" - "or %[b], %[b], %[g] \n\t" - "or %[b], %[b], %[r] \n\t" - "or %[b], %[b], %[a] \n\t" - - "gssdrc1 %[b], 0x00(%[dst_rgb]) \n\t" - "gssdlc1 %[b], 0x07(%[dst_rgb]) \n\t" - - "daddiu %[src_argb], %[src_argb], 0x10 \n\t" - "daddiu %[dst_rgb], %[dst_rgb], 0x08 \n\t" - "daddiu %[width], %[width], -0x04 \n\t" - "bgtz %[width], 1b \n\t" - : [src0] "=&f"(src0), [src1] "=&f"(src1), [b] "=&f"(ftmp[0]), - [g] "=&f"(ftmp[1]), [r] "=&f"(ftmp[2]), [a] "=&f"(ftmp[3]) - : [src_argb] "r"(src_argb), [dst_rgb] "r"(dst_rgb), [width] "r"(width), - [zero] "f"(0x00), [four] "f"(0x04), [eight] "f"(0x08), - [twelve] "f"(0x0c) - : "memory"); -} - -void ARGBToYRow_MMI(const uint8_t* src_argb, uint8_t* dst_y, int width) { - uint64_t src, src_hi, src_lo; - uint64_t dest0, dest1, dest2, dest3; - const uint64_t value = 0x1080; - const uint64_t mask = 0x0001004200810019; - - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[src], 0x07(%[src_argb]) \n\t" - "gsldrc1 %[src], 0x00(%[src_argb]) \n\t" - "punpcklbh %[src_lo], %[src], %[zero] \n\t" - "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "punpckhbh %[src_hi], %[src], %[zero] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[dest0], %[src_lo], %[src_hi] \n\t" - "paddw %[dest0], %[dest0], %[src] \n\t" - "psrlw %[dest0], %[dest0], %[eight] \n\t" - - "gsldlc1 %[src], 0x0f(%[src_argb]) \n\t" - "gsldrc1 %[src], 0x08(%[src_argb]) \n\t" - "punpcklbh %[src_lo], %[src], %[zero] \n\t" - "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "punpckhbh %[src_hi], %[src], %[zero] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[dest1], %[src_lo], %[src_hi] \n\t" - "paddw %[dest1], %[dest1], %[src] \n\t" - "psrlw %[dest1], %[dest1], %[eight] \n\t" - - "gsldlc1 %[src], 0x17(%[src_argb]) \n\t" - "gsldrc1 %[src], 0x10(%[src_argb]) \n\t" - "punpcklbh %[src_lo], %[src], %[zero] \n\t" - "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "punpckhbh %[src_hi], %[src], %[zero] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[dest2], %[src_lo], %[src_hi] \n\t" - "paddw %[dest2], %[dest2], %[src] \n\t" - "psrlw %[dest2], %[dest2], %[eight] \n\t" - - "gsldlc1 %[src], 0x1f(%[src_argb]) \n\t" - "gsldrc1 %[src], 0x18(%[src_argb]) \n\t" - "punpcklbh %[src_lo], %[src], %[zero] \n\t" - "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "punpckhbh %[src_hi], %[src], %[zero] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[dest3], %[src_lo], %[src_hi] \n\t" - "paddw %[dest3], %[dest3], %[src] \n\t" - "psrlw %[dest3], %[dest3], %[eight] \n\t" - - "packsswh %[src_lo], %[dest0], %[dest1] \n\t" - "packsswh %[src_hi], %[dest2], %[dest3] \n\t" - "packushb %[dest0], %[src_lo], %[src_hi] \n\t" - "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t" - "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t" - - "daddiu %[src_argb], %[src_argb], 0x20 \n\t" - "daddiu %[dst_y], %[dst_y], 0x08 \n\t" - "daddi %[width], %[width], -0x08 \n\t" - "bnez %[width], 1b \n\t" - : [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo), - [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2), - [dest3] "=&f"(dest3) - : [src_argb] "r"(src_argb), [dst_y] "r"(dst_y), [width] "r"(width), - [mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08), - [zero] "f"(0x00) - : "memory"); -} - -void ARGBToUVRow_MMI(const uint8_t* src_rgb, - int src_stride_rgb, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - uint64_t src_rgb1; - uint64_t ftmp[13]; - uint64_t tmp[1]; - const uint64_t value = 0x4040; - const uint64_t mask_u = 0x0013002500380002; - const uint64_t mask_v = 0x00020038002f0009; - - __asm__ volatile( - "dli %[tmp0], 0x0001000100010001 \n\t" - "dmtc1 %[tmp0], %[ftmp12] \n\t" - "1: \n\t" - "daddu %[src_rgb1], %[src_rgb], %[src_stride_rgb] \n\t" - "gsldrc1 %[src0], 0x00(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "dsll %[dest0_u], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[dest0_u], %[dest0_u], %[value] \n\t" - "pinsrh_3 %[dest0_v], %[src0], %[value] \n\t" - "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t" - "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t" - - "gsldrc1 %[src0], 0x08(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x0f(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x08(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x0f(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "dsll %[src_lo], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" - "pinsrh_3 %[src_hi], %[src0], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t" - "psubw %[dest0_u], %[src0], %[src1] \n\t" - "psraw %[dest0_u], %[dest0_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t" - "psubw %[dest0_v], %[src1], %[src0] \n\t" - "psraw %[dest0_v], %[dest0_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x10(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x17(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x10(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x17(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "dsll %[dest1_u], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[dest1_u], %[dest1_u], %[value] \n\t" - "pinsrh_3 %[dest1_v], %[src0], %[value] \n\t" - "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t" - "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t" - - "gsldrc1 %[src0], 0x18(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x1f(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "dsll %[src_lo], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" - "pinsrh_3 %[src_hi], %[src0], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t" - "psubw %[dest1_u], %[src0], %[src1] \n\t" - "psraw %[dest1_u], %[dest1_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t" - "psubw %[dest1_v], %[src1], %[src0] \n\t" - "psraw %[dest1_v], %[dest1_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x20(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x27(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x20(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x27(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "dsll %[dest2_u], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[dest2_u], %[dest2_u], %[value] \n\t" - "pinsrh_3 %[dest2_v], %[src0], %[value] \n\t" - "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t" - "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t" - - "gsldrc1 %[src0], 0x28(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x2f(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x28(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x2f(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "dsll %[src_lo], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" - "pinsrh_3 %[src_hi], %[src0], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t" - "psubw %[dest2_u], %[src0], %[src1] \n\t" - "psraw %[dest2_u], %[dest2_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t" - "psubw %[dest2_v], %[src1], %[src0] \n\t" - "psraw %[dest2_v], %[dest2_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x30(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x37(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x30(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x37(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "dsll %[dest3_u], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[dest3_u], %[dest3_u], %[value] \n\t" - "pinsrh_3 %[dest3_v], %[src0], %[value] \n\t" - "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t" - "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t" - - "gsldrc1 %[src0], 0x38(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x3f(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x38(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x3f(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "dsll %[src_lo], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" - "pinsrh_3 %[src_hi], %[src0], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t" - "psubw %[dest3_u], %[src0], %[src1] \n\t" - "psraw %[dest3_u], %[dest3_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t" - "psubw %[dest3_v], %[src1], %[src0] \n\t" - "psraw %[dest3_v], %[dest3_v], %[eight] \n\t" - - "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t" - "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t" - "packushb %[dest0_u], %[src0], %[src1] \n\t" - "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t" - "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t" - - "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t" - "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t" - "packushb %[dest0_v], %[src0], %[src1] \n\t" - "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t" - "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t" - - "daddiu %[src_rgb], %[src_rgb], 0x40 \n\t" - "daddiu %[dst_u], %[dst_u], 0x08 \n\t" - "daddiu %[dst_v], %[dst_v], 0x08 \n\t" - "daddi %[width], %[width], -0x10 \n\t" - "bgtz %[width], 1b \n\t" - : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]), - [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]), - [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]), - [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]), - [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]), - [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11]), - [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0]) - : [src_rgb] "r"(src_rgb), [src_stride_rgb] "r"(src_stride_rgb), - [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width), - [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value), - [zero] "f"(0x00), [eight] "f"(0x08), [one] "f"(0x01), - [sixteen] "f"(0x10) - : "memory"); -} - -void BGRAToYRow_MMI(const uint8_t* src_argb, uint8_t* dst_y, int width) { - uint64_t src, src_hi, src_lo; - uint64_t dest0, dest1, dest2, dest3; - const uint64_t value = 0x1080; - const uint64_t mask = 0x0019008100420001; - - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[src], 0x07(%[src_argb]) \n\t" - "gsldrc1 %[src], 0x00(%[src_argb]) \n\t" - "punpcklbh %[src_lo], %[src], %[zero] \n\t" - "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "punpckhbh %[src_hi], %[src], %[zero] \n\t" - "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[dest0], %[src_lo], %[src_hi] \n\t" - "paddw %[dest0], %[dest0], %[src] \n\t" - "psrlw %[dest0], %[dest0], %[eight] \n\t" - - "gsldlc1 %[src], 0x0f(%[src_argb]) \n\t" - "gsldrc1 %[src], 0x08(%[src_argb]) \n\t" - "punpcklbh %[src_lo], %[src], %[zero] \n\t" - "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "punpckhbh %[src_hi], %[src], %[zero] \n\t" - "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[dest1], %[src_lo], %[src_hi] \n\t" - "paddw %[dest1], %[dest1], %[src] \n\t" - "psrlw %[dest1], %[dest1], %[eight] \n\t" - - "gsldlc1 %[src], 0x17(%[src_argb]) \n\t" - "gsldrc1 %[src], 0x10(%[src_argb]) \n\t" - "punpcklbh %[src_lo], %[src], %[zero] \n\t" - "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "punpckhbh %[src_hi], %[src], %[zero] \n\t" - "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[dest2], %[src_lo], %[src_hi] \n\t" - "paddw %[dest2], %[dest2], %[src] \n\t" - "psrlw %[dest2], %[dest2], %[eight] \n\t" - - "gsldlc1 %[src], 0x1f(%[src_argb]) \n\t" - "gsldrc1 %[src], 0x18(%[src_argb]) \n\t" - "punpcklbh %[src_lo], %[src], %[zero] \n\t" - "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "punpckhbh %[src_hi], %[src], %[zero] \n\t" - "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[dest3], %[src_lo], %[src_hi] \n\t" - "paddw %[dest3], %[dest3], %[src] \n\t" - "psrlw %[dest3], %[dest3], %[eight] \n\t" - - "packsswh %[src_lo], %[dest0], %[dest1] \n\t" - "packsswh %[src_hi], %[dest2], %[dest3] \n\t" - "packushb %[dest0], %[src_lo], %[src_hi] \n\t" - "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t" - "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t" - - "daddiu %[src_argb], %[src_argb], 0x20 \n\t" - "daddiu %[dst_y], %[dst_y], 0x08 \n\t" - "daddi %[width], %[width], -0x08 \n\t" - "bnez %[width], 1b \n\t" - : [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo), - [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2), - [dest3] "=&f"(dest3) - : [src_argb] "r"(src_argb), [dst_y] "r"(dst_y), [width] "r"(width), - [mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08), - [zero] "f"(0x00) - : "memory"); -} - -void BGRAToUVRow_MMI(const uint8_t* src_rgb, - int src_stride_rgb, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - uint64_t src_rgb1; - uint64_t ftmp[13]; - uint64_t tmp[1]; - const uint64_t value = 0x4040; - const uint64_t mask_u = 0x0002003800250013; - const uint64_t mask_v = 0x0009002f00380002; - - __asm__ volatile( - "dli %[tmp0], 0x0001000100010001 \n\t" - "dmtc1 %[tmp0], %[ftmp12] \n\t" - "1: \n\t" - "daddu %[src_rgb1], %[src_rgb], %[src_stride_rgb] \n\t" - "gsldrc1 %[src0], 0x00(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "dsrl %[dest0_u], %[src0], %[sixteen] \n\t" - "pinsrh_3 %[dest0_u], %[dest0_u], %[value] \n\t" - "pinsrh_0 %[dest0_v], %[src0], %[value] \n\t" - "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t" - "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t" - - "gsldrc1 %[src0], 0x08(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x0f(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x08(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x0f(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "dsrl %[src_lo], %[src0], %[sixteen] \n\t" - "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" - "pinsrh_0 %[src_hi], %[src0], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t" - "psubw %[dest0_u], %[src1], %[src0] \n\t" - "psraw %[dest0_u], %[dest0_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t" - "psubw %[dest0_v], %[src0], %[src1] \n\t" - "psraw %[dest0_v], %[dest0_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x10(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x17(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x10(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x17(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "dsrl %[dest1_u], %[src0], %[sixteen] \n\t" - "pinsrh_3 %[dest1_u], %[dest1_u], %[value] \n\t" - "pinsrh_0 %[dest1_v], %[src0], %[value] \n\t" - "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t" - "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t" - - "gsldrc1 %[src0], 0x18(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x1f(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "dsrl %[src_lo], %[src0], %[sixteen] \n\t" - "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" - "pinsrh_0 %[src_hi], %[src0], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t" - "psubw %[dest1_u], %[src1], %[src0] \n\t" - "psraw %[dest1_u], %[dest1_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t" - "psubw %[dest1_v], %[src0], %[src1] \n\t" - "psraw %[dest1_v], %[dest1_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x20(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x27(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x20(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x27(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "dsrl %[dest2_u], %[src0], %[sixteen] \n\t" - "pinsrh_3 %[dest2_u], %[dest2_u], %[value] \n\t" - "pinsrh_0 %[dest2_v], %[src0], %[value] \n\t" - "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t" - "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t" - - "gsldrc1 %[src0], 0x28(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x2f(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x28(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x2f(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "dsrl %[src_lo], %[src0], %[sixteen] \n\t" - "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" - "pinsrh_0 %[src_hi], %[src0], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t" - "psubw %[dest2_u], %[src1], %[src0] \n\t" - "psraw %[dest2_u], %[dest2_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t" - "psubw %[dest2_v], %[src0], %[src1] \n\t" - "psraw %[dest2_v], %[dest2_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x30(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x37(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x30(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x37(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "dsrl %[dest3_u], %[src0], %[sixteen] \n\t" - "pinsrh_3 %[dest3_u], %[dest3_u], %[value] \n\t" - "pinsrh_0 %[dest3_v], %[src0], %[value] \n\t" - "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t" - "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t" - - "gsldrc1 %[src0], 0x38(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x3f(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x38(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x3f(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "dsrl %[src_lo], %[src0], %[sixteen] \n\t" - "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" - "pinsrh_0 %[src_hi], %[src0], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t" - "psubw %[dest3_u], %[src1], %[src0] \n\t" - "psraw %[dest3_u], %[dest3_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t" - "psubw %[dest3_v], %[src0], %[src1] \n\t" - "psraw %[dest3_v], %[dest3_v], %[eight] \n\t" - - "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t" - "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t" - "packushb %[dest0_u], %[src0], %[src1] \n\t" - "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t" - "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t" - - "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t" - "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t" - "packushb %[dest0_v], %[src0], %[src1] \n\t" - "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t" - "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t" - - "daddiu %[src_rgb], %[src_rgb], 0x40 \n\t" - "daddiu %[dst_u], %[dst_u], 0x08 \n\t" - "daddiu %[dst_v], %[dst_v], 0x08 \n\t" - "daddi %[width], %[width], -0x10 \n\t" - "bgtz %[width], 1b \n\t" - : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]), - [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]), - [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]), - [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]), - [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]), - [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11]), - [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0]) - : [src_rgb] "r"(src_rgb), [src_stride_rgb] "r"(src_stride_rgb), - [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width), - [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value), - [zero] "f"(0x00), [eight] "f"(0x08), [one] "f"(0x01), - [sixteen] "f"(0x10) - : "memory"); -} - -void ABGRToYRow_MMI(const uint8_t* src_argb, uint8_t* dst_y, int width) { - uint64_t src, src_hi, src_lo; - uint64_t dest0, dest1, dest2, dest3; - const uint64_t value = 0x1080; - const uint64_t mask = 0x0001001900810042; - - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[src], 0x07(%[src_argb]) \n\t" - "gsldrc1 %[src], 0x00(%[src_argb]) \n\t" - "punpcklbh %[src_lo], %[src], %[zero] \n\t" - "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "punpckhbh %[src_hi], %[src], %[zero] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[dest0], %[src_lo], %[src_hi] \n\t" - "paddw %[dest0], %[dest0], %[src] \n\t" - "psrlw %[dest0], %[dest0], %[eight] \n\t" - - "gsldlc1 %[src], 0x0f(%[src_argb]) \n\t" - "gsldrc1 %[src], 0x08(%[src_argb]) \n\t" - "punpcklbh %[src_lo], %[src], %[zero] \n\t" - "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "punpckhbh %[src_hi], %[src], %[zero] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[dest1], %[src_lo], %[src_hi] \n\t" - "paddw %[dest1], %[dest1], %[src] \n\t" - "psrlw %[dest1], %[dest1], %[eight] \n\t" - - "gsldlc1 %[src], 0x17(%[src_argb]) \n\t" - "gsldrc1 %[src], 0x10(%[src_argb]) \n\t" - "punpcklbh %[src_lo], %[src], %[zero] \n\t" - "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "punpckhbh %[src_hi], %[src], %[zero] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[dest2], %[src_lo], %[src_hi] \n\t" - "paddw %[dest2], %[dest2], %[src] \n\t" - "psrlw %[dest2], %[dest2], %[eight] \n\t" - - "gsldlc1 %[src], 0x1f(%[src_argb]) \n\t" - "gsldrc1 %[src], 0x18(%[src_argb]) \n\t" - "punpcklbh %[src_lo], %[src], %[zero] \n\t" - "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "punpckhbh %[src_hi], %[src], %[zero] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[dest3], %[src_lo], %[src_hi] \n\t" - "paddw %[dest3], %[dest3], %[src] \n\t" - "psrlw %[dest3], %[dest3], %[eight] \n\t" - - "packsswh %[src_lo], %[dest0], %[dest1] \n\t" - "packsswh %[src_hi], %[dest2], %[dest3] \n\t" - "packushb %[dest0], %[src_lo], %[src_hi] \n\t" - "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t" - "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t" - - "daddiu %[src_argb], %[src_argb], 0x20 \n\t" - "daddiu %[dst_y], %[dst_y], 0x08 \n\t" - "daddi %[width], %[width], -0x08 \n\t" - "bnez %[width], 1b \n\t" - : [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo), - [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2), - [dest3] "=&f"(dest3) - : [src_argb] "r"(src_argb), [dst_y] "r"(dst_y), [width] "r"(width), - [mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08), - [zero] "f"(0x00) - : "memory"); -} - -void ABGRToUVRow_MMI(const uint8_t* src_rgb, - int src_stride_rgb, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - uint64_t src_rgb1; - uint64_t ftmp[13]; - uint64_t tmp[1]; - const uint64_t value = 0x4040; - const uint64_t mask_u = 0x0002003800250013; - const uint64_t mask_v = 0x0009002F00380002; - - __asm__ volatile( - "dli %[tmp0], 0x0001000100010001 \n\t" - "dmtc1 %[tmp0], %[ftmp12] \n\t" - "1: \n\t" - "daddu %[src_rgb1], %[src_rgb], %[src_stride_rgb] \n\t" - "gsldrc1 %[src0], 0x00(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "pinsrh_3 %[dest0_u], %[src0], %[value] \n\t" - "dsll %[dest0_v], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[dest0_v], %[dest0_v], %[value] \n\t" - "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t" - "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t" - - "gsldrc1 %[src0], 0x08(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x0f(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x08(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x0f(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "pinsrh_3 %[src_lo], %[src0], %[value] \n\t" - "dsll %[src_hi], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t" - "psubw %[dest0_u], %[src1], %[src0] \n\t" - "psraw %[dest0_u], %[dest0_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t" - "psubw %[dest0_v], %[src0], %[src1] \n\t" - "psraw %[dest0_v], %[dest0_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x10(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x17(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x10(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x17(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "pinsrh_3 %[dest1_u], %[src0], %[value] \n\t" - "dsll %[dest1_v], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[dest1_v], %[dest1_v], %[value] \n\t" - "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t" - "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t" - - "gsldrc1 %[src0], 0x18(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x1f(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "pinsrh_3 %[src_lo], %[src0], %[value] \n\t" - "dsll %[src_hi], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t" - "psubw %[dest1_u], %[src1], %[src0] \n\t" - "psraw %[dest1_u], %[dest1_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t" - "psubw %[dest1_v], %[src0], %[src1] \n\t" - "psraw %[dest1_v], %[dest1_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x20(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x27(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x20(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x27(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "pinsrh_3 %[dest2_u], %[src0], %[value] \n\t" - "dsll %[dest2_v], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[dest2_v], %[dest2_v], %[value] \n\t" - "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t" - "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t" - - "gsldrc1 %[src0], 0x28(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x2f(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x28(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x2f(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "pinsrh_3 %[src_lo], %[src0], %[value] \n\t" - "dsll %[src_hi], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t" - "psubw %[dest2_u], %[src1], %[src0] \n\t" - "psraw %[dest2_u], %[dest2_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t" - "psubw %[dest2_v], %[src0], %[src1] \n\t" - "psraw %[dest2_v], %[dest2_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x30(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x37(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x30(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x37(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "pinsrh_3 %[dest3_u], %[src0], %[value] \n\t" - "dsll %[dest3_v], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[dest3_v], %[dest3_v], %[value] \n\t" - "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t" - "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t" - - "gsldrc1 %[src0], 0x38(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x3f(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x38(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x3f(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "pinsrh_3 %[src_lo], %[src0], %[value] \n\t" - "dsll %[src_hi], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t" - "psubw %[dest3_u], %[src1], %[src0] \n\t" - "psraw %[dest3_u], %[dest3_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t" - "psubw %[dest3_v], %[src0], %[src1] \n\t" - "psraw %[dest3_v], %[dest3_v], %[eight] \n\t" - - "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t" - "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t" - "packushb %[dest0_u], %[src0], %[src1] \n\t" - "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t" - "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t" - - "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t" - "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t" - "packushb %[dest0_v], %[src0], %[src1] \n\t" - "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t" - "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t" - - "daddiu %[src_rgb], %[src_rgb], 0x40 \n\t" - "daddiu %[dst_u], %[dst_u], 0x08 \n\t" - "daddiu %[dst_v], %[dst_v], 0x08 \n\t" - "daddi %[width], %[width], -0x10 \n\t" - "bgtz %[width], 1b \n\t" - : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]), - [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]), - [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]), - [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]), - [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]), - [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11]), - [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0]) - : [src_rgb] "r"(src_rgb), [src_stride_rgb] "r"(src_stride_rgb), - [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width), - [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value), - [zero] "f"(0x00), [eight] "f"(0x08), [one] "f"(0x01), - [sixteen] "f"(0x10) - : "memory"); -} - -void RGBAToYRow_MMI(const uint8_t* src_argb, uint8_t* dst_y, int width) { - uint64_t src, src_hi, src_lo; - uint64_t dest0, dest1, dest2, dest3; - const uint64_t value = 0x1080; - const uint64_t mask = 0x0042008100190001; - - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[src], 0x07(%[src_argb]) \n\t" - "gsldrc1 %[src], 0x00(%[src_argb]) \n\t" - "punpcklbh %[src_lo], %[src], %[zero] \n\t" - "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "punpckhbh %[src_hi], %[src], %[zero] \n\t" - "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[dest0], %[src_lo], %[src_hi] \n\t" - "paddw %[dest0], %[dest0], %[src] \n\t" - "psrlw %[dest0], %[dest0], %[eight] \n\t" - - "gsldlc1 %[src], 0x0f(%[src_argb]) \n\t" - "gsldrc1 %[src], 0x08(%[src_argb]) \n\t" - "punpcklbh %[src_lo], %[src], %[zero] \n\t" - "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "punpckhbh %[src_hi], %[src], %[zero] \n\t" - "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[dest1], %[src_lo], %[src_hi] \n\t" - "paddw %[dest1], %[dest1], %[src] \n\t" - "psrlw %[dest1], %[dest1], %[eight] \n\t" - - "gsldlc1 %[src], 0x17(%[src_argb]) \n\t" - "gsldrc1 %[src], 0x10(%[src_argb]) \n\t" - "punpcklbh %[src_lo], %[src], %[zero] \n\t" - "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "punpckhbh %[src_hi], %[src], %[zero] \n\t" - "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[dest2], %[src_lo], %[src_hi] \n\t" - "paddw %[dest2], %[dest2], %[src] \n\t" - "psrlw %[dest2], %[dest2], %[eight] \n\t" - - "gsldlc1 %[src], 0x1f(%[src_argb]) \n\t" - "gsldrc1 %[src], 0x18(%[src_argb]) \n\t" - "punpcklbh %[src_lo], %[src], %[zero] \n\t" - "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "punpckhbh %[src_hi], %[src], %[zero] \n\t" - "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[dest3], %[src_lo], %[src_hi] \n\t" - "paddw %[dest3], %[dest3], %[src] \n\t" - "psrlw %[dest3], %[dest3], %[eight] \n\t" - - "packsswh %[src_lo], %[dest0], %[dest1] \n\t" - "packsswh %[src_hi], %[dest2], %[dest3] \n\t" - "packushb %[dest0], %[src_lo], %[src_hi] \n\t" - "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t" - "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t" - - "daddiu %[src_argb], %[src_argb], 0x20 \n\t" - "daddiu %[dst_y], %[dst_y], 0x08 \n\t" - "daddi %[width], %[width], -0x08 \n\t" - "bnez %[width], 1b \n\t" - : [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo), - [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2), - [dest3] "=&f"(dest3) - : [src_argb] "r"(src_argb), [dst_y] "r"(dst_y), [width] "r"(width), - [mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08), - [zero] "f"(0x00) - : "memory"); -} - -void RGBAToUVRow_MMI(const uint8_t* src_rgb, - int src_stride_rgb, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - uint64_t src_rgb1; - uint64_t ftmp[13]; - uint64_t tmp[1]; - const uint64_t value = 0x4040; - const uint64_t mask_u = 0x0013002500380002; - const uint64_t mask_v = 0x00020038002f0009; - - __asm__ volatile( - "dli %[tmp0], 0x0001000100010001 \n\t" - "dmtc1 %[tmp0], %[ftmp12] \n\t" - "1: \n\t" - "daddu %[src_rgb1], %[src_rgb], %[src_stride_rgb] \n\t" - "gsldrc1 %[src0], 0x00(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "pinsrh_0 %[dest0_u], %[src0], %[value] \n\t" - "dsrl %[dest0_v], %[src0], %[sixteen] \n\t" - "pinsrh_3 %[dest0_v], %[dest0_v], %[value] \n\t" - "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t" - "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t" - - "gsldrc1 %[src0], 0x08(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x0f(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x08(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x0f(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "pinsrh_0 %[src_lo], %[src0], %[value] \n\t" - "dsrl %[src_hi], %[src0], %[sixteen] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t" - "psubw %[dest0_u], %[src0], %[src1] \n\t" - "psraw %[dest0_u], %[dest0_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t" - "psubw %[dest0_v], %[src1], %[src0] \n\t" - "psraw %[dest0_v], %[dest0_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x10(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x17(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x10(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x17(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "pinsrh_0 %[dest1_u], %[src0], %[value] \n\t" - "dsrl %[dest1_v], %[src0], %[sixteen] \n\t" - "pinsrh_3 %[dest1_v], %[dest1_v], %[value] \n\t" - "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t" - "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t" - - "gsldrc1 %[src0], 0x18(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x1f(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "pinsrh_0 %[src_lo], %[src0], %[value] \n\t" - "dsrl %[src_hi], %[src0], %[sixteen] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t" - "psubw %[dest1_u], %[src0], %[src1] \n\t" - "psraw %[dest1_u], %[dest1_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t" - "psubw %[dest1_v], %[src1], %[src0] \n\t" - "psraw %[dest1_v], %[dest1_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x20(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x27(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x20(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x27(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "pinsrh_0 %[dest2_u], %[src0], %[value] \n\t" - "dsrl %[dest2_v], %[src0], %[sixteen] \n\t" - "pinsrh_3 %[dest2_v], %[dest2_v], %[value] \n\t" - "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t" - "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t" - - "gsldrc1 %[src0], 0x28(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x2f(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x28(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x2f(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "pinsrh_0 %[src_lo], %[src0], %[value] \n\t" - "dsrl %[src_hi], %[src0], %[sixteen] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t" - "psubw %[dest2_u], %[src0], %[src1] \n\t" - "psraw %[dest2_u], %[dest2_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t" - "psubw %[dest2_v], %[src1], %[src0] \n\t" - "psraw %[dest2_v], %[dest2_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x30(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x37(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x30(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x37(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "pinsrh_0 %[dest3_u], %[src0], %[value] \n\t" - "dsrl %[dest3_v], %[src0], %[sixteen] \n\t" - "pinsrh_3 %[dest3_v], %[dest3_v], %[value] \n\t" - "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t" - "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t" - - "gsldrc1 %[src0], 0x38(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x3f(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x38(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x3f(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "pinsrh_0 %[src_lo], %[src0], %[value] \n\t" - "dsrl %[src_hi], %[src0], %[sixteen] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t" - "psubw %[dest3_u], %[src0], %[src1] \n\t" - "psraw %[dest3_u], %[dest3_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t" - "psubw %[dest3_v], %[src1], %[src0] \n\t" - "psraw %[dest3_v], %[dest3_v], %[eight] \n\t" - - "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t" - "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t" - "packushb %[dest0_u], %[src0], %[src1] \n\t" - "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t" - "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t" - - "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t" - "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t" - "packushb %[dest0_v], %[src0], %[src1] \n\t" - "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t" - "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t" - - "daddiu %[src_rgb], %[src_rgb], 0x40 \n\t" - "daddiu %[dst_u], %[dst_u], 0x08 \n\t" - "daddiu %[dst_v], %[dst_v], 0x08 \n\t" - "daddi %[width], %[width], -0x10 \n\t" - "bgtz %[width], 1b \n\t" - : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]), - [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]), - [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]), - [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]), - [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]), - [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11]), - [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0]) - : [src_rgb] "r"(src_rgb), [src_stride_rgb] "r"(src_stride_rgb), - [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width), - [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value), - [zero] "f"(0x00), [eight] "f"(0x08), [one] "f"(0x01), - [sixteen] "f"(0x10) - : "memory"); -} - -void RGB24ToYRow_MMI(const uint8_t* src_argb, uint8_t* dst_y, int width) { - uint64_t src, src_hi, src_lo; - uint64_t dest0, dest1, dest2, dest3; - const uint64_t value = 0x1080; - const uint64_t mask = 0x0001004200810019; - - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[src], 0x07(%[src_argb]) \n\t" - "gsldrc1 %[src], 0x00(%[src_argb]) \n\t" - "punpcklbh %[src_lo], %[src], %[zero] \n\t" - "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "dsll %[src], %[src], %[eight] \n\t" - "punpckhbh %[src_hi], %[src], %[zero] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[dest0], %[src_lo], %[src_hi] \n\t" - "paddw %[dest0], %[dest0], %[src] \n\t" - "psrlw %[dest0], %[dest0], %[eight] \n\t" - - "gsldlc1 %[src], 0x0d(%[src_argb]) \n\t" - "gsldrc1 %[src], 0x06(%[src_argb]) \n\t" - "punpcklbh %[src_lo], %[src], %[zero] \n\t" - "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "dsll %[src], %[src], %[eight] \n\t" - "punpckhbh %[src_hi], %[src], %[zero] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[dest1], %[src_lo], %[src_hi] \n\t" - "paddw %[dest1], %[dest1], %[src] \n\t" - "psrlw %[dest1], %[dest1], %[eight] \n\t" - - "gsldlc1 %[src], 0x13(%[src_argb]) \n\t" - "gsldrc1 %[src], 0x0c(%[src_argb]) \n\t" - "punpcklbh %[src_lo], %[src], %[zero] \n\t" - "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "dsll %[src], %[src], %[eight] \n\t" - "punpckhbh %[src_hi], %[src], %[zero] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[dest2], %[src_lo], %[src_hi] \n\t" - "paddw %[dest2], %[dest2], %[src] \n\t" - "psrlw %[dest2], %[dest2], %[eight] \n\t" - - "gsldlc1 %[src], 0x19(%[src_argb]) \n\t" - "gsldrc1 %[src], 0x12(%[src_argb]) \n\t" - "punpcklbh %[src_lo], %[src], %[zero] \n\t" - "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "dsll %[src], %[src], %[eight] \n\t" - "punpckhbh %[src_hi], %[src], %[zero] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[dest3], %[src_lo], %[src_hi] \n\t" - "paddw %[dest3], %[dest3], %[src] \n\t" - "psrlw %[dest3], %[dest3], %[eight] \n\t" - - "packsswh %[src_lo], %[dest0], %[dest1] \n\t" - "packsswh %[src_hi], %[dest2], %[dest3] \n\t" - "packushb %[dest0], %[src_lo], %[src_hi] \n\t" - "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t" - "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t" - - "daddiu %[src_argb], %[src_argb], 0x18 \n\t" - "daddiu %[dst_y], %[dst_y], 0x08 \n\t" - "daddi %[width], %[width], -0x08 \n\t" - "bnez %[width], 1b \n\t" - : [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo), - [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2), - [dest3] "=&f"(dest3) - : [src_argb] "r"(src_argb), [dst_y] "r"(dst_y), [width] "r"(width), - [mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08), - [zero] "f"(0x00) - : "memory"); -} - -void RGB24ToUVRow_MMI(const uint8_t* src_rgb, - int src_stride_rgb, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - uint64_t src_rgb1; - uint64_t ftmp[13]; - uint64_t tmp[1]; - const uint64_t value = 0x4040; - const uint64_t mask_u = 0x0013002500380002; - const uint64_t mask_v = 0x00020038002f0009; - - __asm__ volatile( - "dli %[tmp0], 0x0001000100010001 \n\t" - "dmtc1 %[tmp0], %[ftmp12] \n\t" - "1: \n\t" - "daddu %[src_rgb1], %[src_rgb], %[src_stride_rgb] \n\t" - "gsldrc1 %[src0], 0x00(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "dsll %[src0], %[src0], %[eight] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "dsll %[src1], %[src1], %[eight] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "dsll %[dest0_u], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[dest0_u], %[dest0_u], %[value] \n\t" - "pinsrh_3 %[dest0_v], %[src0], %[value] \n\t" - "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t" - "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t" - - "gsldrc1 %[src0], 0x06(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x0d(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x06(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x0d(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "dsll %[src0], %[src0], %[eight] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "dsll %[src1], %[src1], %[eight] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "dsll %[src_lo], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" - "pinsrh_3 %[src_hi], %[src0], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t" - "psubw %[dest0_u], %[src0], %[src1] \n\t" - "psraw %[dest0_u], %[dest0_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t" - "psubw %[dest0_v], %[src1], %[src0] \n\t" - "psraw %[dest0_v], %[dest0_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x0c(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x13(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x0c(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x13(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "dsll %[src0], %[src0], %[eight] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "dsll %[src1], %[src1], %[eight] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "dsll %[dest1_u], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[dest1_u], %[dest1_u], %[value] \n\t" - "pinsrh_3 %[dest1_v], %[src0], %[value] \n\t" - "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t" - "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t" - - "gsldrc1 %[src0], 0x12(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x19(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x12(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x19(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "dsll %[src0], %[src0], %[eight] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "dsll %[src1], %[src1], %[eight] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "dsll %[src_lo], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" - "pinsrh_3 %[src_hi], %[src0], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t" - "psubw %[dest1_u], %[src0], %[src1] \n\t" - "psraw %[dest1_u], %[dest1_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t" - "psubw %[dest1_v], %[src1], %[src0] \n\t" - "psraw %[dest1_v], %[dest1_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x18(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x1f(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "dsll %[src0], %[src0], %[eight] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "dsll %[src1], %[src1], %[eight] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "dsll %[dest2_u], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[dest2_u], %[dest2_u], %[value] \n\t" - "pinsrh_3 %[dest2_v], %[src0], %[value] \n\t" - "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t" - "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t" - - "gsldrc1 %[src0], 0x1e(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x25(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x1e(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x25(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "dsll %[src0], %[src0], %[eight] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "dsll %[src1], %[src1], %[eight] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "dsll %[src_lo], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" - "pinsrh_3 %[src_hi], %[src0], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t" - "psubw %[dest2_u], %[src0], %[src1] \n\t" - "psraw %[dest2_u], %[dest2_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t" - "psubw %[dest2_v], %[src1], %[src0] \n\t" - "psraw %[dest2_v], %[dest2_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x24(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x2b(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x24(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x2b(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "dsll %[src0], %[src0], %[eight] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "dsll %[src1], %[src1], %[eight] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "dsll %[dest3_u], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[dest3_u], %[dest3_u], %[value] \n\t" - "pinsrh_3 %[dest3_v], %[src0], %[value] \n\t" - "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t" - "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t" - - "gsldrc1 %[src0], 0x2a(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x31(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x2a(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x31(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "dsll %[src0], %[src0], %[eight] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "dsll %[src1], %[src1], %[eight] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "dsll %[src_lo], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" - "pinsrh_3 %[src_hi], %[src0], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t" - "psubw %[dest3_u], %[src0], %[src1] \n\t" - "psraw %[dest3_u], %[dest3_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t" - "psubw %[dest3_v], %[src1], %[src0] \n\t" - "psraw %[dest3_v], %[dest3_v], %[eight] \n\t" - - "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t" - "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t" - "packushb %[dest0_u], %[src0], %[src1] \n\t" - "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t" - "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t" - - "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t" - "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t" - "packushb %[dest0_v], %[src0], %[src1] \n\t" - "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t" - "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t" - - "daddiu %[src_rgb], %[src_rgb], 0x30 \n\t" - "daddiu %[dst_u], %[dst_u], 0x08 \n\t" - "daddiu %[dst_v], %[dst_v], 0x08 \n\t" - "daddi %[width], %[width], -0x10 \n\t" - "bgtz %[width], 1b \n\t" - : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]), - [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]), - [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]), - [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]), - [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]), - [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11]), - [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0]) - : [src_rgb] "r"(src_rgb), [src_stride_rgb] "r"(src_stride_rgb), - [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width), - [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value), - [zero] "f"(0x00), [eight] "f"(0x08), [one] "f"(0x01), - [sixteen] "f"(0x10) - : "memory"); -} - -void RAWToYRow_MMI(const uint8_t* src_argb, uint8_t* dst_y, int width) { - uint64_t src, src_hi, src_lo; - uint64_t dest0, dest1, dest2, dest3; - const uint64_t value = 0x1080; - const uint64_t mask = 0x0001001900810042; - - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[src], 0x07(%[src_argb]) \n\t" - "gsldrc1 %[src], 0x00(%[src_argb]) \n\t" - "punpcklbh %[src_lo], %[src], %[zero] \n\t" - "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "dsll %[src], %[src], %[eight] \n\t" - "punpckhbh %[src_hi], %[src], %[zero] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[dest0], %[src_lo], %[src_hi] \n\t" - "paddw %[dest0], %[dest0], %[src] \n\t" - "psrlw %[dest0], %[dest0], %[eight] \n\t" - - "gsldlc1 %[src], 0x0d(%[src_argb]) \n\t" - "gsldrc1 %[src], 0x06(%[src_argb]) \n\t" - "punpcklbh %[src_lo], %[src], %[zero] \n\t" - "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "dsll %[src], %[src], %[eight] \n\t" - "punpckhbh %[src_hi], %[src], %[zero] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[dest1], %[src_lo], %[src_hi] \n\t" - "paddw %[dest1], %[dest1], %[src] \n\t" - "psrlw %[dest1], %[dest1], %[eight] \n\t" - - "gsldlc1 %[src], 0x13(%[src_argb]) \n\t" - "gsldrc1 %[src], 0x0c(%[src_argb]) \n\t" - "punpcklbh %[src_lo], %[src], %[zero] \n\t" - "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "dsll %[src], %[src], %[eight] \n\t" - "punpckhbh %[src_hi], %[src], %[zero] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[dest2], %[src_lo], %[src_hi] \n\t" - "paddw %[dest2], %[dest2], %[src] \n\t" - "psrlw %[dest2], %[dest2], %[eight] \n\t" - - "gsldlc1 %[src], 0x19(%[src_argb]) \n\t" - "gsldrc1 %[src], 0x12(%[src_argb]) \n\t" - "punpcklbh %[src_lo], %[src], %[zero] \n\t" - "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "dsll %[src], %[src], %[eight] \n\t" - "punpckhbh %[src_hi], %[src], %[zero] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[dest3], %[src_lo], %[src_hi] \n\t" - "paddw %[dest3], %[dest3], %[src] \n\t" - "psrlw %[dest3], %[dest3], %[eight] \n\t" - - "packsswh %[src_lo], %[dest0], %[dest1] \n\t" - "packsswh %[src_hi], %[dest2], %[dest3] \n\t" - "packushb %[dest0], %[src_lo], %[src_hi] \n\t" - "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t" - "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t" - - "daddiu %[src_argb], %[src_argb], 0x18 \n\t" - "daddiu %[dst_y], %[dst_y], 0x08 \n\t" - "daddi %[width], %[width], -0x08 \n\t" - "bnez %[width], 1b \n\t" - : [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo), - [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2), - [dest3] "=&f"(dest3) - : [src_argb] "r"(src_argb), [dst_y] "r"(dst_y), [width] "r"(width), - [mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08), - [zero] "f"(0x00) - : "memory"); -} - -void RAWToUVRow_MMI(const uint8_t* src_rgb, - int src_stride_rgb, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - uint64_t src_rgb1; - uint64_t ftmp[13]; - uint64_t tmp[1]; - const uint64_t value = 0x4040; - const uint64_t mask_u = 0x0002003800250013; - const uint64_t mask_v = 0x0009002f00380002; - - __asm__ volatile( - "dli %[tmp0], 0x0001000100010001 \n\t" - "dmtc1 %[tmp0], %[ftmp12] \n\t" - "1: \n\t" - "daddu %[src_rgb1], %[src_rgb], %[src_stride_rgb] \n\t" - "gsldrc1 %[src0], 0x00(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "dsll %[src0], %[src0], %[eight] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "dsll %[src1], %[src1], %[eight] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "pinsrh_3 %[dest0_u], %[src0], %[value] \n\t" - "dsll %[dest0_v], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[dest0_v], %[dest0_v], %[value] \n\t" - "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t" - "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t" - - "gsldrc1 %[src0], 0x06(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x0d(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x06(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x0d(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "dsll %[src0], %[src0], %[eight] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "dsll %[src1], %[src1], %[eight] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "pinsrh_3 %[src_lo], %[src0], %[value] \n\t" - "dsll %[src_hi], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t" - "psubw %[dest0_u], %[src1], %[src0] \n\t" - "psraw %[dest0_u], %[dest0_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t" - "psubw %[dest0_v], %[src0], %[src1] \n\t" - "psraw %[dest0_v], %[dest0_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x0c(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x13(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x0c(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x13(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "dsll %[src0], %[src0], %[eight] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "dsll %[src1], %[src1], %[eight] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "pinsrh_3 %[dest1_u], %[src0], %[value] \n\t" - "dsll %[dest1_v], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[dest1_v], %[dest1_v], %[value] \n\t" - "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t" - "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t" - - "gsldrc1 %[src0], 0x12(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x19(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x12(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x19(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "dsll %[src0], %[src0], %[eight] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "dsll %[src1], %[src1], %[eight] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "pinsrh_3 %[src_lo], %[src0], %[value] \n\t" - "dsll %[src_hi], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t" - "psubw %[dest1_u], %[src1], %[src0] \n\t" - "psraw %[dest1_u], %[dest1_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t" - "psubw %[dest1_v], %[src0], %[src1] \n\t" - "psraw %[dest1_v], %[dest1_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x18(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x1f(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "dsll %[src0], %[src0], %[eight] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "dsll %[src1], %[src1], %[eight] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "pinsrh_3 %[dest2_u], %[src0], %[value] \n\t" - "dsll %[dest2_v], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[dest2_v], %[dest2_v], %[value] \n\t" - "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t" - "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t" - - "gsldrc1 %[src0], 0x1e(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x25(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x1e(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x25(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "dsll %[src0], %[src0], %[eight] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "dsll %[src1], %[src1], %[eight] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "pinsrh_3 %[src_lo], %[src0], %[value] \n\t" - "dsll %[src_hi], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t" - "psubw %[dest2_u], %[src1], %[src0] \n\t" - "psraw %[dest2_u], %[dest2_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t" - "psubw %[dest2_v], %[src0], %[src1] \n\t" - "psraw %[dest2_v], %[dest2_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x24(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x2b(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x24(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x2b(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "dsll %[src0], %[src0], %[eight] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "dsll %[src1], %[src1], %[eight] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "pinsrh_3 %[dest3_u], %[src0], %[value] \n\t" - "dsll %[dest3_v], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[dest3_v], %[dest3_v], %[value] \n\t" - "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t" - "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t" - - "gsldrc1 %[src0], 0x2a(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x31(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x2a(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x31(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "dsll %[src0], %[src0], %[eight] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src_hi] \n\t" - "punpcklbh %[src_lo], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_lo] \n\t" - "dsll %[src1], %[src1], %[eight] \n\t" - "punpckhbh %[src_hi], %[src1], %[zero] \n\t" - "paddh %[src0], %[src0], %[src_hi] \n\t" - "paddh %[src0], %[src0], %[ftmp12] \n\t" - "psrlh %[src0], %[src0], %[one] \n\t" - "pinsrh_3 %[src_lo], %[src0], %[value] \n\t" - "dsll %[src_hi], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t" - "psubw %[dest3_u], %[src1], %[src0] \n\t" - "psraw %[dest3_u], %[dest3_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t" - "psubw %[dest3_v], %[src0], %[src1] \n\t" - "psraw %[dest3_v], %[dest3_v], %[eight] \n\t" - - "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t" - "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t" - "packushb %[dest0_u], %[src0], %[src1] \n\t" - "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t" - "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t" - - "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t" - "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t" - "packushb %[dest0_v], %[src0], %[src1] \n\t" - "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t" - "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t" - - "daddiu %[src_rgb], %[src_rgb], 0x30 \n\t" - "daddiu %[dst_u], %[dst_u], 0x08 \n\t" - "daddiu %[dst_v], %[dst_v], 0x08 \n\t" - "daddi %[width], %[width], -0x10 \n\t" - "bgtz %[width], 1b \n\t" - : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]), - [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]), - [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]), - [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]), - [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]), - [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11]), - [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0]) - : [src_rgb] "r"(src_rgb), [src_stride_rgb] "r"(src_stride_rgb), - [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width), - [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value), - [zero] "f"(0x00), [eight] "f"(0x08), [one] "f"(0x01), - [sixteen] "f"(0x10) - : "memory"); -} - -void ARGBToYJRow_MMI(const uint8_t* src_argb, uint8_t* dst_y, int width) { - uint64_t src, src_hi, src_lo; - uint64_t dest, dest0, dest1, dest2, dest3; - uint64_t tmp0, tmp1; - const uint64_t shift = 0x08; - const uint64_t value = 0x80; - const uint64_t mask0 = 0x0; - const uint64_t mask1 = 0x0001004D0096001DULL; - - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t" - "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t" - "punpcklbh %[src_lo], %[src], %[mask0] \n\t" - "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask1] \n\t" - "punpckhbh %[src_hi], %[src], %[mask0] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask1] \n\t" - "punpcklwd %[tmp0], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[tmp1], %[src_lo], %[src_hi] \n\t" - "paddw %[dest0], %[tmp0], %[tmp1] \n\t" - "psrlw %[dest0], %[dest0], %[shift] \n\t" - - "gsldlc1 %[src], 0x0f(%[src_ptr]) \n\t" - "gsldrc1 %[src], 0x08(%[src_ptr]) \n\t" - "punpcklbh %[src_lo], %[src], %[mask0] \n\t" - "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask1] \n\t" - "punpckhbh %[src_hi], %[src], %[mask0] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask1] \n\t" - "punpcklwd %[tmp0], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[tmp1], %[src_lo], %[src_hi] \n\t" - "paddw %[dest1], %[tmp0], %[tmp1] \n\t" - "psrlw %[dest1], %[dest1], %[shift] \n\t" - - "gsldlc1 %[src], 0x17(%[src_ptr]) \n\t" - "gsldrc1 %[src], 0x10(%[src_ptr]) \n\t" - "punpcklbh %[src_lo], %[src], %[mask0] \n\t" - "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask1] \n\t" - "punpckhbh %[src_hi], %[src], %[mask0] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask1] \n\t" - "punpcklwd %[tmp0], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[tmp1], %[src_lo], %[src_hi] \n\t" - "paddw %[dest2], %[tmp0], %[tmp1] \n\t" - "psrlw %[dest2], %[dest2], %[shift] \n\t" - - "gsldlc1 %[src], 0x1f(%[src_ptr]) \n\t" - "gsldrc1 %[src], 0x18(%[src_ptr]) \n\t" - "punpcklbh %[src_lo], %[src], %[mask0] \n\t" - "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask1] \n\t" - "punpckhbh %[src_hi], %[src], %[mask0] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask1] \n\t" - "punpcklwd %[tmp0], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[tmp1], %[src_lo], %[src_hi] \n\t" - "paddw %[dest3], %[tmp0], %[tmp1] \n\t" - "psrlw %[dest3], %[dest3], %[shift] \n\t" - - "packsswh %[tmp0], %[dest0], %[dest1] \n\t" - "packsswh %[tmp1], %[dest2], %[dest3] \n\t" - "packushb %[dest], %[tmp0], %[tmp1] \n\t" - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 0x20 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x08 \n\t" - "bnez %[width], 1b \n\t" - : [src] "=&f"(src), [dest] "=&f"(dest), [src_hi] "=&f"(src_hi), - [src_lo] "=&f"(src_lo), [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), - [dest2] "=&f"(dest2), [dest3] "=&f"(dest3), [tmp0] "=&f"(tmp0), - [tmp1] "=&f"(tmp1) - : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_y), [mask0] "f"(mask0), - [mask1] "f"(mask1), [shift] "f"(shift), [value] "f"(value), - [width] "r"(width) - : "memory"); -} - -void ARGBToUVJRow_MMI(const uint8_t* src_rgb, - int src_stride_rgb, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - uint64_t src_rgb1; - uint64_t ftmp[12]; - const uint64_t value = 0x4040; - const uint64_t mask_u = 0x0015002a003f0002; - const uint64_t mask_v = 0x0002003f0035000a; - - __asm__ volatile( - "1: \n\t" - "daddu %[src_rgb1], %[src_rgb], %[src_stride_rgb] \n\t" - "gsldrc1 %[src0], 0x00(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "punpcklbh %[src0], %[src1], %[zero] \n\t" - "punpckhbh %[src1], %[src1], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src0] \n\t" - "paddh %[src1], %[src_hi], %[src1] \n\t" - "pavgh %[src0], %[src0], %[src1] \n\t" - "dsll %[dest0_u], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[dest0_u], %[dest0_u], %[value] \n\t" - "pinsrh_3 %[dest0_v], %[src0], %[value] \n\t" - "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t" - "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t" - - "gsldrc1 %[src0], 0x08(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x0f(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x08(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x0f(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "punpcklbh %[src0], %[src1], %[zero] \n\t" - "punpckhbh %[src1], %[src1], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src0] \n\t" - "paddh %[src1], %[src_hi], %[src1] \n\t" - "pavgh %[src0], %[src0], %[src1] \n\t" - "dsll %[src_lo], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" - "pinsrh_3 %[src_hi], %[src0], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t" - "psubw %[dest0_u], %[src0], %[src1] \n\t" - "psraw %[dest0_u], %[dest0_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t" - "psubw %[dest0_v], %[src1], %[src0] \n\t" - "psraw %[dest0_v], %[dest0_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x10(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x17(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x10(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x17(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "punpcklbh %[src0], %[src1], %[zero] \n\t" - "punpckhbh %[src1], %[src1], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src0] \n\t" - "paddh %[src1], %[src_hi], %[src1] \n\t" - "pavgh %[src0], %[src0], %[src1] \n\t" - "dsll %[dest1_u], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[dest1_u], %[dest1_u], %[value] \n\t" - "pinsrh_3 %[dest1_v], %[src0], %[value] \n\t" - "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t" - "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t" - - "gsldrc1 %[src0], 0x18(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x1f(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "punpcklbh %[src0], %[src1], %[zero] \n\t" - "punpckhbh %[src1], %[src1], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src0] \n\t" - "paddh %[src1], %[src_hi], %[src1] \n\t" - "pavgh %[src0], %[src0], %[src1] \n\t" - "dsll %[src_lo], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" - "pinsrh_3 %[src_hi], %[src0], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t" - "psubw %[dest1_u], %[src0], %[src1] \n\t" - "psraw %[dest1_u], %[dest1_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t" - "psubw %[dest1_v], %[src1], %[src0] \n\t" - "psraw %[dest1_v], %[dest1_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x20(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x27(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x20(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x27(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "punpcklbh %[src0], %[src1], %[zero] \n\t" - "punpckhbh %[src1], %[src1], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src0] \n\t" - "paddh %[src1], %[src_hi], %[src1] \n\t" - "pavgh %[src0], %[src0], %[src1] \n\t" - "dsll %[dest2_u], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[dest2_u], %[dest2_u], %[value] \n\t" - "pinsrh_3 %[dest2_v], %[src0], %[value] \n\t" - "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t" - "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t" - - "gsldrc1 %[src0], 0x28(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x2f(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x28(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x2f(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "punpcklbh %[src0], %[src1], %[zero] \n\t" - "punpckhbh %[src1], %[src1], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src0] \n\t" - "paddh %[src1], %[src_hi], %[src1] \n\t" - "pavgh %[src0], %[src0], %[src1] \n\t" - "dsll %[src_lo], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" - "pinsrh_3 %[src_hi], %[src0], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t" - "psubw %[dest2_u], %[src0], %[src1] \n\t" - "psraw %[dest2_u], %[dest2_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t" - "psubw %[dest2_v], %[src1], %[src0] \n\t" - "psraw %[dest2_v], %[dest2_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x30(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x37(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x30(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x37(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "punpcklbh %[src0], %[src1], %[zero] \n\t" - "punpckhbh %[src1], %[src1], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src0] \n\t" - "paddh %[src1], %[src_hi], %[src1] \n\t" - "pavgh %[src0], %[src0], %[src1] \n\t" - "dsll %[dest3_u], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[dest3_u], %[dest3_u], %[value] \n\t" - "pinsrh_3 %[dest3_v], %[src0], %[value] \n\t" - "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t" - "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t" - - "gsldrc1 %[src0], 0x38(%[src_rgb]) \n\t" - "gsldlc1 %[src0], 0x3f(%[src_rgb]) \n\t" - "gsldrc1 %[src1], 0x38(%[src_rgb1]) \n\t" - "gsldlc1 %[src1], 0x3f(%[src_rgb1]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "punpcklbh %[src0], %[src1], %[zero] \n\t" - "punpckhbh %[src1], %[src1], %[zero] \n\t" - "paddh %[src0], %[src_lo], %[src0] \n\t" - "paddh %[src1], %[src_hi], %[src1] \n\t" - "pavgh %[src0], %[src0], %[src1] \n\t" - "dsll %[src_lo], %[src0], %[sixteen] \n\t" - "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" - "pinsrh_3 %[src_hi], %[src0], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t" - "psubw %[dest3_u], %[src0], %[src1] \n\t" - "psraw %[dest3_u], %[dest3_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t" - "psubw %[dest3_v], %[src1], %[src0] \n\t" - "psraw %[dest3_v], %[dest3_v], %[eight] \n\t" - - "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t" - "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t" - "packushb %[dest0_u], %[src0], %[src1] \n\t" - "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t" - "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t" - - "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t" - "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t" - "packushb %[dest0_v], %[src0], %[src1] \n\t" - "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t" - "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t" - - "daddiu %[src_rgb], %[src_rgb], 0x40 \n\t" - "daddiu %[dst_u], %[dst_u], 0x08 \n\t" - "daddiu %[dst_v], %[dst_v], 0x08 \n\t" - "daddi %[width], %[width], -0x10 \n\t" - "bgtz %[width], 1b \n\t" - : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]), - [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]), - [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]), - [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]), - [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]), - [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11]) - : [src_rgb] "r"(src_rgb), [src_stride_rgb] "r"(src_stride_rgb), - [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width), - [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value), - [zero] "f"(0x00), [eight] "f"(0x08), - [sixteen] "f"(0x10) - : "memory"); -} - -void RGB565ToYRow_MMI(const uint8_t* src_rgb565, uint8_t* dst_y, int width) { - uint64_t ftmp[11]; - const uint64_t value = 0x1080108010801080; - const uint64_t mask = 0x0001004200810019; - uint64_t c0 = 0x001f001f001f001f; - uint64_t c1 = 0x00ff00ff00ff00ff; - uint64_t c2 = 0x0007000700070007; - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[src0], 0x00(%[src_rgb565]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_rgb565]) \n\t" - "psrlh %[src1], %[src0], %[eight] \n\t" - "and %[b], %[src0], %[c0] \n\t" - "and %[src0], %[src0], %[c1] \n\t" - "psrlh %[src0], %[src0], %[five] \n\t" - "and %[g], %[src1], %[c2] \n\t" - "psllh %[g], %[g], %[three] \n\t" - "or %[g], %[src0], %[g] \n\t" - "psrlh %[r], %[src1], %[three] \n\t" - "psllh %[src0], %[b], %[three] \n\t" - "psrlh %[src1], %[b], %[two] \n\t" - "or %[b], %[src0], %[src1] \n\t" - "psllh %[src0], %[g], %[two] \n\t" - "psrlh %[src1], %[g], %[four] \n\t" - "or %[g], %[src0], %[src1] \n\t" - "psllh %[src0], %[r], %[three] \n\t" - "psrlh %[src1], %[r], %[two] \n\t" - "or %[r], %[src0], %[src1] \n\t" - "punpcklhw %[src0], %[b], %[r] \n\t" - "punpcklhw %[src1], %[g], %[value] \n\t" - "punpcklhw %[src_lo], %[src0], %[src1] \n\t" - "punpckhhw %[src_hi], %[src0], %[src1] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t" - "paddw %[dest0], %[src0], %[src1] \n\t" - "psrlw %[dest0], %[dest0], %[eight] \n\t" - - "punpckhhw %[src0], %[b], %[r] \n\t" - "punpckhhw %[src1], %[g], %[value] \n\t" - "punpcklhw %[src_lo], %[src0], %[src1] \n\t" - "punpckhhw %[src_hi], %[src0], %[src1] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t" - "paddw %[dest1], %[src0], %[src1] \n\t" - "psrlw %[dest1], %[dest1], %[eight] \n\t" - - "gsldrc1 %[src0], 0x08(%[src_rgb565]) \n\t" - "gsldlc1 %[src0], 0x0f(%[src_rgb565]) \n\t" - "psrlh %[src1], %[src0], %[eight] \n\t" - "and %[b], %[src0], %[c0] \n\t" - "and %[src0], %[src0], %[c1] \n\t" - "psrlh %[src0], %[src0], %[five] \n\t" - "and %[g], %[src1], %[c2] \n\t" - "psllh %[g], %[g], %[three] \n\t" - "or %[g], %[src0], %[g] \n\t" - "psrlh %[r], %[src1], %[three] \n\t" - "psllh %[src0], %[b], %[three] \n\t" - "psrlh %[src1], %[b], %[two] \n\t" - "or %[b], %[src0], %[src1] \n\t" - "psllh %[src0], %[g], %[two] \n\t" - "psrlh %[src1], %[g], %[four] \n\t" - "or %[g], %[src0], %[src1] \n\t" - "psllh %[src0], %[r], %[three] \n\t" - "psrlh %[src1], %[r], %[two] \n\t" - "or %[r], %[src0], %[src1] \n\t" - "punpcklhw %[src0], %[b], %[r] \n\t" - "punpcklhw %[src1], %[g], %[value] \n\t" - "punpcklhw %[src_lo], %[src0], %[src1] \n\t" - "punpckhhw %[src_hi], %[src0], %[src1] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t" - "paddw %[dest2], %[src0], %[src1] \n\t" - "psrlw %[dest2], %[dest2], %[eight] \n\t" - - "punpckhhw %[src0], %[b], %[r] \n\t" - "punpckhhw %[src1], %[g], %[value] \n\t" - "punpcklhw %[src_lo], %[src0], %[src1] \n\t" - "punpckhhw %[src_hi], %[src0], %[src1] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t" - "paddw %[dest3], %[src0], %[src1] \n\t" - "psrlw %[dest3], %[dest3], %[eight] \n\t" - - "packsswh %[src_lo], %[dest0], %[dest1] \n\t" - "packsswh %[src_hi], %[dest2], %[dest3] \n\t" - "packushb %[dest0], %[src_lo], %[src_hi] \n\t" - "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t" - "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t" - - "daddiu %[src_rgb565], %[src_rgb565], 0x10 \n\t" - "daddiu %[dst_y], %[dst_y], 0x08 \n\t" - "daddiu %[width], %[width], -0x08 \n\t" - "bgtz %[width], 1b \n\t" - : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), - [src_hi] "=&f"(ftmp[3]), [b] "=&f"(ftmp[4]), [g] "=&f"(ftmp[5]), - [r] "=&f"(ftmp[6]), [dest0] "=&f"(ftmp[7]), [dest1] "=&f"(ftmp[8]), - [dest2] "=&f"(ftmp[9]), [dest3] "=&f"(ftmp[10]) - : [src_rgb565] "r"(src_rgb565), [dst_y] "r"(dst_y), [value] "f"(value), - [width] "r"(width), [c0] "f"(c0), [c1] "f"(c1), [c2] "f"(c2), - [mask] "f"(mask), [eight] "f"(0x08), [five] "f"(0x05), - [three] "f"(0x03), [two] "f"(0x02), [four] "f"(0x04) - : "memory"); -} - -void ARGB1555ToYRow_MMI(const uint8_t* src_argb1555, - uint8_t* dst_y, - int width) { - uint64_t ftmp[11]; - const uint64_t value = 0x1080108010801080; - const uint64_t mask = 0x0001004200810019; - uint64_t c0 = 0x001f001f001f001f; - uint64_t c1 = 0x00ff00ff00ff00ff; - uint64_t c2 = 0x0003000300030003; - uint64_t c3 = 0x007c007c007c007c; - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[src0], 0x00(%[src_argb1555]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_argb1555]) \n\t" - "psrlh %[src1], %[src0], %[eight] \n\t" - "and %[b], %[src0], %[c0] \n\t" - "and %[src0], %[src0], %[c1] \n\t" - "psrlh %[src0], %[src0], %[five] \n\t" - "and %[g], %[src1], %[c2] \n\t" - "psllh %[g], %[g], %[three] \n\t" - "or %[g], %[src0], %[g] \n\t" - "and %[r], %[src1], %[c3] \n\t" - "psrlh %[r], %[r], %[two] \n\t" - "psllh %[src0], %[b], %[three] \n\t" - "psrlh %[src1], %[b], %[two] \n\t" - "or %[b], %[src0], %[src1] \n\t" - "psllh %[src0], %[g], %[three] \n\t" - "psrlh %[src1], %[g], %[two] \n\t" - "or %[g], %[src0], %[src1] \n\t" - "psllh %[src0], %[r], %[three] \n\t" - "psrlh %[src1], %[r], %[two] \n\t" - "or %[r], %[src0], %[src1] \n\t" - "punpcklhw %[src0], %[b], %[r] \n\t" - "punpcklhw %[src1], %[g], %[value] \n\t" - "punpcklhw %[src_lo], %[src0], %[src1] \n\t" - "punpckhhw %[src_hi], %[src0], %[src1] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t" - "paddw %[dest0], %[src0], %[src1] \n\t" - "psrlw %[dest0], %[dest0], %[eight] \n\t" - - "punpckhhw %[src0], %[b], %[r] \n\t" - "punpckhhw %[src1], %[g], %[value] \n\t" - "punpcklhw %[src_lo], %[src0], %[src1] \n\t" - "punpckhhw %[src_hi], %[src0], %[src1] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t" - "paddw %[dest1], %[src0], %[src1] \n\t" - "psrlw %[dest1], %[dest1], %[eight] \n\t" - - "gsldrc1 %[src0], 0x08(%[src_argb1555]) \n\t" - "gsldlc1 %[src0], 0x0f(%[src_argb1555]) \n\t" - "psrlh %[src1], %[src0], %[eight] \n\t" - "and %[b], %[src0], %[c0] \n\t" - "and %[src0], %[src0], %[c1] \n\t" - "psrlh %[src0], %[src0], %[five] \n\t" - "and %[g], %[src1], %[c2] \n\t" - "psllh %[g], %[g], %[three] \n\t" - "or %[g], %[src0], %[g] \n\t" - "and %[r], %[src1], %[c3] \n\t" - "psrlh %[r], %[r], %[two] \n\t" - "psllh %[src0], %[b], %[three] \n\t" - "psrlh %[src1], %[b], %[two] \n\t" - "or %[b], %[src0], %[src1] \n\t" - "psllh %[src0], %[g], %[three] \n\t" - "psrlh %[src1], %[g], %[two] \n\t" - "or %[g], %[src0], %[src1] \n\t" - "psllh %[src0], %[r], %[three] \n\t" - "psrlh %[src1], %[r], %[two] \n\t" - "or %[r], %[src0], %[src1] \n\t" - "punpcklhw %[src0], %[b], %[r] \n\t" - "punpcklhw %[src1], %[g], %[value] \n\t" - "punpcklhw %[src_lo], %[src0], %[src1] \n\t" - "punpckhhw %[src_hi], %[src0], %[src1] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t" - "paddw %[dest2], %[src0], %[src1] \n\t" - "psrlw %[dest2], %[dest2], %[eight] \n\t" - - "punpckhhw %[src0], %[b], %[r] \n\t" - "punpckhhw %[src1], %[g], %[value] \n\t" - "punpcklhw %[src_lo], %[src0], %[src1] \n\t" - "punpckhhw %[src_hi], %[src0], %[src1] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t" - "paddw %[dest3], %[src0], %[src1] \n\t" - "psrlw %[dest3], %[dest3], %[eight] \n\t" - - "packsswh %[src_lo], %[dest0], %[dest1] \n\t" - "packsswh %[src_hi], %[dest2], %[dest3] \n\t" - "packushb %[dest0], %[src_lo], %[src_hi] \n\t" - "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t" - "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t" - - "daddiu %[src_argb1555], %[src_argb1555], 0x10 \n\t" - "daddiu %[dst_y], %[dst_y], 0x08 \n\t" - "daddiu %[width], %[width], -0x08 \n\t" - "bgtz %[width], 1b \n\t" - : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), - [src_hi] "=&f"(ftmp[3]), [b] "=&f"(ftmp[4]), [g] "=&f"(ftmp[5]), - [r] "=&f"(ftmp[6]), [dest0] "=&f"(ftmp[7]), [dest1] "=&f"(ftmp[8]), - [dest2] "=&f"(ftmp[9]), [dest3] "=&f"(ftmp[10]) - : [src_argb1555] "r"(src_argb1555), [dst_y] "r"(dst_y), - [width] "r"(width), [value] "f"(value), [mask] "f"(mask), [c0] "f"(c0), - [c1] "f"(c1), [c2] "f"(c2), [c3] "f"(c3), [eight] "f"(0x08), - [five] "f"(0x05), [three] "f"(0x03), [two] "f"(0x02), [seven] "f"(0x07) - : "memory"); -} - -void ARGB4444ToYRow_MMI(const uint8_t* src_argb4444, - uint8_t* dst_y, - int width) { - uint64_t ftmp[11]; - uint64_t value = 0x1080108010801080; - uint64_t mask = 0x0001004200810019; - uint64_t c0 = 0x000f000f000f000f; - uint64_t c1 = 0x00ff00ff00ff00ff; - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[src0], 0x00(%[src_argb4444]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_argb4444]) \n\t" - "psrlh %[src1], %[src0], %[eight] \n\t" - "and %[b], %[src0], %[c0] \n\t" - "and %[src0], %[src0], %[c1] \n\t" - "psrlh %[g], %[src0], %[four] \n\t" - "and %[r], %[src1], %[c0] \n\t" - "psllh %[src0], %[b], %[four] \n\t" - "or %[b], %[src0], %[b] \n\t" - "psllh %[src0], %[g], %[four] \n\t" - "or %[g], %[src0], %[g] \n\t" - "psllh %[src0], %[r], %[four] \n\t" - "or %[r], %[src0], %[r] \n\t" - "punpcklhw %[src0], %[b], %[r] \n\t" - "punpcklhw %[src1], %[g], %[value] \n\t" - "punpcklhw %[src_lo], %[src0], %[src1] \n\t" - "punpckhhw %[src_hi], %[src0], %[src1] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t" - "paddw %[dest0], %[src0], %[src1] \n\t" - "psrlw %[dest0], %[dest0], %[eight] \n\t" - - "punpckhhw %[src0], %[b], %[r] \n\t" - "punpckhhw %[src1], %[g], %[value] \n\t" - "punpcklhw %[src_lo], %[src0], %[src1] \n\t" - "punpckhhw %[src_hi], %[src0], %[src1] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t" - "paddw %[dest1], %[src0], %[src1] \n\t" - "psrlw %[dest1], %[dest1], %[eight] \n\t" - - "gsldrc1 %[src0], 0x08(%[src_argb4444]) \n\t" - "gsldlc1 %[src0], 0x0f(%[src_argb4444]) \n\t" - "psrlh %[src1], %[src0], %[eight] \n\t" - "and %[b], %[src0], %[c0] \n\t" - "and %[src0], %[src0], %[c1] \n\t" - "psrlh %[g], %[src0], %[four] \n\t" - "and %[r], %[src1], %[c0] \n\t" - "psllh %[src0], %[b], %[four] \n\t" - "or %[b], %[src0], %[b] \n\t" - "psllh %[src0], %[g], %[four] \n\t" - "or %[g], %[src0], %[g] \n\t" - "psllh %[src0], %[r], %[four] \n\t" - "or %[r], %[src0], %[r] \n\t" - "punpcklhw %[src0], %[b], %[r] \n\t" - "punpcklhw %[src1], %[g], %[value] \n\t" - "punpcklhw %[src_lo], %[src0], %[src1] \n\t" - "punpckhhw %[src_hi], %[src0], %[src1] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t" - "paddw %[dest2], %[src0], %[src1] \n\t" - "psrlw %[dest2], %[dest2], %[eight] \n\t" - - "punpckhhw %[src0], %[b], %[r] \n\t" - "punpckhhw %[src1], %[g], %[value] \n\t" - "punpcklhw %[src_lo], %[src0], %[src1] \n\t" - "punpckhhw %[src_hi], %[src0], %[src1] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t" - "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t" - "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t" - "paddw %[dest3], %[src0], %[src1] \n\t" - "psrlw %[dest3], %[dest3], %[eight] \n\t" - - "packsswh %[src_lo], %[dest0], %[dest1] \n\t" - "packsswh %[src_hi], %[dest2], %[dest3] \n\t" - "packushb %[dest0], %[src_lo], %[src_hi] \n\t" - "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t" - "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t" - - "daddiu %[src_argb4444], %[src_argb4444], 0x10 \n\t" - "daddiu %[dst_y], %[dst_y], 0x08 \n\t" - "daddiu %[width], %[width], -0x08 \n\t" - "bgtz %[width], 1b \n\t" - : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), - [src_hi] "=&f"(ftmp[3]), [b] "=&f"(ftmp[4]), [g] "=&f"(ftmp[5]), - [r] "=&f"(ftmp[6]), [dest0] "=&f"(ftmp[7]), [dest1] "=&f"(ftmp[8]), - [dest2] "=&f"(ftmp[9]), [dest3] "=&f"(ftmp[10]) - : [src_argb4444] "r"(src_argb4444), [dst_y] "r"(dst_y), - [width] "r"(width), [value] "f"(value), [mask] "f"(mask), [c0] "f"(c0), - [c1] "f"(c1), [eight] "f"(0x08), [four] "f"(0x04) - : "memory"); -} - -void RGB565ToUVRow_MMI(const uint8_t* src_rgb565, - int src_stride_rgb565, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - uint64_t ftmp[13]; - uint64_t value = 0x2020202020202020; - uint64_t mask_u = 0x0026004a00700002; - uint64_t mask_v = 0x00020070005e0012; - uint64_t mask = 0x93; - uint64_t c0 = 0x001f001f001f001f; - uint64_t c1 = 0x00ff00ff00ff00ff; - uint64_t c2 = 0x0007000700070007; - __asm__ volatile( - "daddu %[next_rgb565], %[src_rgb565], %[next_rgb565] \n\t" - "1: \n\t" - "gsldrc1 %[src0], 0x00(%[src_rgb565]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_rgb565]) \n\t" - "gsldrc1 %[src1], 0x00(%[next_rgb565]) \n\t" - "gsldlc1 %[src1], 0x07(%[next_rgb565]) \n\t" - "psrlh %[dest0_u], %[src0], %[eight] \n\t" - "and %[b0], %[src0], %[c0] \n\t" - "and %[src0], %[src0], %[c1] \n\t" - "psrlh %[src0], %[src0], %[five] \n\t" - "and %[g0], %[dest0_u], %[c2] \n\t" - "psllh %[g0], %[g0], %[three] \n\t" - "or %[g0], %[src0], %[g0] \n\t" - "psrlh %[r0], %[dest0_u], %[three] \n\t" - "psrlh %[src0], %[src1], %[eight] \n\t" - "and %[dest0_u], %[src1], %[c0] \n\t" - "and %[src1], %[src1], %[c1] \n\t" - "psrlh %[src1], %[src1], %[five] \n\t" - "and %[dest0_v], %[src0], %[c2] \n\t" - "psllh %[dest0_v], %[dest0_v], %[three] \n\t" - "or %[dest0_v], %[src1], %[dest0_v] \n\t" - "psrlh %[src0], %[src0], %[three] \n\t" - "paddh %[b0], %[b0], %[dest0_u] \n\t" - "paddh %[g0], %[g0], %[dest0_v] \n\t" - "paddh %[r0], %[r0], %[src0] \n\t" - "punpcklhw %[src0], %[b0], %[r0] \n\t" - "punpckhhw %[src1], %[b0], %[r0] \n\t" - "punpcklwd %[dest0_u], %[src0], %[src1] \n\t" - "punpckhwd %[dest0_v], %[src0], %[src1] \n\t" - "paddh %[src0], %[dest0_u], %[dest0_v] \n\t" - "psrlh %[b0], %[src0], %[six] \n\t" - "psllh %[r0], %[src0], %[one] \n\t" - "or %[b0], %[b0], %[r0] \n\t" - "punpcklhw %[src0], %[g0], %[value] \n\t" - "punpckhhw %[src1], %[g0], %[value] \n\t" - "punpcklwd %[dest0_u], %[src0], %[src1] \n\t" - "punpckhwd %[dest0_v], %[src0], %[src1] \n\t" - "paddh %[g0], %[dest0_u], %[dest0_v] \n\t" - "punpcklhw %[src0], %[b0], %[g0] \n\t" - "punpckhhw %[src1], %[b0], %[g0] \n\t" - - "pmaddhw %[dest0_v], %[src0], %[mask_v] \n\t" - "pshufh %[dest0_u], %[src0], %[mask] \n\t" - "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t" - "pmaddhw %[g0], %[src1], %[mask_v] \n\t" - "pshufh %[b0], %[src1], %[mask] \n\t" - "pmaddhw %[b0], %[b0], %[mask_u] \n\t" - - "punpcklwd %[src0], %[dest0_u], %[b0] \n\t" - "punpckhwd %[src1], %[dest0_u], %[b0] \n\t" - "psubw %[dest0_u], %[src0], %[src1] \n\t" - "psraw %[dest0_u], %[dest0_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest0_v], %[g0] \n\t" - "punpckhwd %[src1], %[dest0_v], %[g0] \n\t" - "psubw %[dest0_v], %[src1], %[src0] \n\t" - "psraw %[dest0_v], %[dest0_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x08(%[src_rgb565]) \n\t" - "gsldlc1 %[src0], 0x0f(%[src_rgb565]) \n\t" - "gsldrc1 %[src1], 0x08(%[next_rgb565]) \n\t" - "gsldlc1 %[src1], 0x0f(%[next_rgb565]) \n\t" - "psrlh %[dest1_u], %[src0], %[eight] \n\t" - "and %[b0], %[src0], %[c0] \n\t" - "and %[src0], %[src0], %[c1] \n\t" - "psrlh %[src0], %[src0], %[five] \n\t" - "and %[g0], %[dest1_u], %[c2] \n\t" - "psllh %[g0], %[g0], %[three] \n\t" - "or %[g0], %[src0], %[g0] \n\t" - "psrlh %[r0], %[dest1_u], %[three] \n\t" - "psrlh %[src0], %[src1], %[eight] \n\t" - "and %[dest1_u], %[src1], %[c0] \n\t" - "and %[src1], %[src1], %[c1] \n\t" - "psrlh %[src1], %[src1], %[five] \n\t" - "and %[dest1_v], %[src0], %[c2] \n\t" - "psllh %[dest1_v], %[dest1_v], %[three] \n\t" - "or %[dest1_v], %[src1], %[dest1_v] \n\t" - "psrlh %[src0], %[src0], %[three] \n\t" - "paddh %[b0], %[b0], %[dest1_u] \n\t" - "paddh %[g0], %[g0], %[dest1_v] \n\t" - "paddh %[r0], %[r0], %[src0] \n\t" - "punpcklhw %[src0], %[b0], %[r0] \n\t" - "punpckhhw %[src1], %[b0], %[r0] \n\t" - "punpcklwd %[dest1_u], %[src0], %[src1] \n\t" - "punpckhwd %[dest1_v], %[src0], %[src1] \n\t" - "paddh %[src0], %[dest1_u], %[dest1_v] \n\t" - "psrlh %[b0], %[src0], %[six] \n\t" - "psllh %[r0], %[src0], %[one] \n\t" - "or %[b0], %[b0], %[r0] \n\t" - "punpcklhw %[src0], %[g0], %[value] \n\t" - "punpckhhw %[src1], %[g0], %[value] \n\t" - "punpcklwd %[dest1_u], %[src0], %[src1] \n\t" - "punpckhwd %[dest1_v], %[src0], %[src1] \n\t" - "paddh %[g0], %[dest1_u], %[dest1_v] \n\t" - "punpcklhw %[src0], %[b0], %[g0] \n\t" - "punpckhhw %[src1], %[b0], %[g0] \n\t" - - "pmaddhw %[dest1_v], %[src0], %[mask_v] \n\t" - "pshufh %[dest1_u], %[src0], %[mask] \n\t" - "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t" - "pmaddhw %[g0], %[src1], %[mask_v] \n\t" - "pshufh %[b0], %[src1], %[mask] \n\t" - "pmaddhw %[b0], %[b0], %[mask_u] \n\t" - - "punpcklwd %[src0], %[dest1_u], %[b0] \n\t" - "punpckhwd %[src1], %[dest1_u], %[b0] \n\t" - "psubw %[dest1_u], %[src0], %[src1] \n\t" - "psraw %[dest1_u], %[dest1_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest1_v], %[g0] \n\t" - "punpckhwd %[src1], %[dest1_v], %[g0] \n\t" - "psubw %[dest1_v], %[src1], %[src0] \n\t" - "psraw %[dest1_v], %[dest1_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x10(%[src_rgb565]) \n\t" - "gsldlc1 %[src0], 0x17(%[src_rgb565]) \n\t" - "gsldrc1 %[src1], 0x10(%[next_rgb565]) \n\t" - "gsldlc1 %[src1], 0x17(%[next_rgb565]) \n\t" - "psrlh %[dest2_u], %[src0], %[eight] \n\t" - "and %[b0], %[src0], %[c0] \n\t" - "and %[src0], %[src0], %[c1] \n\t" - "psrlh %[src0], %[src0], %[five] \n\t" - "and %[g0], %[dest2_u], %[c2] \n\t" - "psllh %[g0], %[g0], %[three] \n\t" - "or %[g0], %[src0], %[g0] \n\t" - "psrlh %[r0], %[dest2_u], %[three] \n\t" - "psrlh %[src0], %[src1], %[eight] \n\t" - "and %[dest2_u], %[src1], %[c0] \n\t" - "and %[src1], %[src1], %[c1] \n\t" - "psrlh %[src1], %[src1], %[five] \n\t" - "and %[dest2_v], %[src0], %[c2] \n\t" - "psllh %[dest2_v], %[dest2_v], %[three] \n\t" - "or %[dest2_v], %[src1], %[dest2_v] \n\t" - "psrlh %[src0], %[src0], %[three] \n\t" - "paddh %[b0], %[b0], %[dest2_u] \n\t" - "paddh %[g0], %[g0], %[dest2_v] \n\t" - "paddh %[r0], %[r0], %[src0] \n\t" - "punpcklhw %[src0], %[b0], %[r0] \n\t" - "punpckhhw %[src1], %[b0], %[r0] \n\t" - "punpcklwd %[dest2_u], %[src0], %[src1] \n\t" - "punpckhwd %[dest2_v], %[src0], %[src1] \n\t" - "paddh %[src0], %[dest2_u], %[dest2_v] \n\t" - "psrlh %[b0], %[src0], %[six] \n\t" - "psllh %[r0], %[src0], %[one] \n\t" - "or %[b0], %[b0], %[r0] \n\t" - "punpcklhw %[src0], %[g0], %[value] \n\t" - "punpckhhw %[src1], %[g0], %[value] \n\t" - "punpcklwd %[dest2_u], %[src0], %[src1] \n\t" - "punpckhwd %[dest2_v], %[src0], %[src1] \n\t" - "paddh %[g0], %[dest2_u], %[dest2_v] \n\t" - "punpcklhw %[src0], %[b0], %[g0] \n\t" - "punpckhhw %[src1], %[b0], %[g0] \n\t" - - "pmaddhw %[dest2_v], %[src0], %[mask_v] \n\t" - "pshufh %[dest2_u], %[src0], %[mask] \n\t" - "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t" - "pmaddhw %[g0], %[src1], %[mask_v] \n\t" - "pshufh %[b0], %[src1], %[mask] \n\t" - "pmaddhw %[b0], %[b0], %[mask_u] \n\t" - - "punpcklwd %[src0], %[dest2_u], %[b0] \n\t" - "punpckhwd %[src1], %[dest2_u], %[b0] \n\t" - "psubw %[dest2_u], %[src0], %[src1] \n\t" - "psraw %[dest2_u], %[dest2_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest2_v], %[g0] \n\t" - "punpckhwd %[src1], %[dest2_v], %[g0] \n\t" - "psubw %[dest2_v], %[src1], %[src0] \n\t" - "psraw %[dest2_v], %[dest2_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x18(%[src_rgb565]) \n\t" - "gsldlc1 %[src0], 0x1f(%[src_rgb565]) \n\t" - "gsldrc1 %[src1], 0x18(%[next_rgb565]) \n\t" - "gsldlc1 %[src1], 0x1f(%[next_rgb565]) \n\t" - "psrlh %[dest3_u], %[src0], %[eight] \n\t" - "and %[b0], %[src0], %[c0] \n\t" - "and %[src0], %[src0], %[c1] \n\t" - "psrlh %[src0], %[src0], %[five] \n\t" - "and %[g0], %[dest3_u], %[c2] \n\t" - "psllh %[g0], %[g0], %[three] \n\t" - "or %[g0], %[src0], %[g0] \n\t" - "psrlh %[r0], %[dest3_u], %[three] \n\t" - "psrlh %[src0], %[src1], %[eight] \n\t" - "and %[dest3_u], %[src1], %[c0] \n\t" - "and %[src1], %[src1], %[c1] \n\t" - "psrlh %[src1], %[src1], %[five] \n\t" - "and %[dest3_v], %[src0], %[c2] \n\t" - "psllh %[dest3_v], %[dest3_v], %[three] \n\t" - "or %[dest3_v], %[src1], %[dest3_v] \n\t" - "psrlh %[src0], %[src0], %[three] \n\t" - "paddh %[b0], %[b0], %[dest3_u] \n\t" - "paddh %[g0], %[g0], %[dest3_v] \n\t" - "paddh %[r0], %[r0], %[src0] \n\t" - "punpcklhw %[src0], %[b0], %[r0] \n\t" - "punpckhhw %[src1], %[b0], %[r0] \n\t" - "punpcklwd %[dest3_u], %[src0], %[src1] \n\t" - "punpckhwd %[dest3_v], %[src0], %[src1] \n\t" - "paddh %[src0], %[dest3_u], %[dest3_v] \n\t" - "psrlh %[b0], %[src0], %[six] \n\t" - "psllh %[r0], %[src0], %[one] \n\t" - "or %[b0], %[b0], %[r0] \n\t" - "punpcklhw %[src0], %[g0], %[value] \n\t" - "punpckhhw %[src1], %[g0], %[value] \n\t" - "punpcklwd %[dest3_u], %[src0], %[src1] \n\t" - "punpckhwd %[dest3_v], %[src0], %[src1] \n\t" - "paddh %[g0], %[dest3_u], %[dest3_v] \n\t" - "punpcklhw %[src0], %[b0], %[g0] \n\t" - "punpckhhw %[src1], %[b0], %[g0] \n\t" - - "pmaddhw %[dest3_v], %[src0], %[mask_v] \n\t" - "pshufh %[dest3_u], %[src0], %[mask] \n\t" - "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t" - "pmaddhw %[g0], %[src1], %[mask_v] \n\t" - "pshufh %[b0], %[src1], %[mask] \n\t" - "pmaddhw %[b0], %[b0], %[mask_u] \n\t" - - "punpcklwd %[src0], %[dest3_u], %[b0] \n\t" - "punpckhwd %[src1], %[dest3_u], %[b0] \n\t" - "psubw %[dest3_u], %[src0], %[src1] \n\t" - "psraw %[dest3_u], %[dest3_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest3_v], %[g0] \n\t" - "punpckhwd %[src1], %[dest3_v], %[g0] \n\t" - "psubw %[dest3_v], %[src1], %[src0] \n\t" - "psraw %[dest3_v], %[dest3_v], %[eight] \n\t" - - "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t" - "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t" - "packushb %[dest0_u], %[src0], %[src1] \n\t" - "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t" - "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t" - "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t" - "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t" - "packushb %[dest0_v], %[src0], %[src1] \n\t" - "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t" - "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t" - - "daddiu %[src_rgb565], %[src_rgb565], 0x20 \n\t" - "daddiu %[next_rgb565], %[next_rgb565], 0x20 \n\t" - "daddiu %[dst_u], %[dst_u], 0x08 \n\t" - "daddiu %[dst_v], %[dst_v], 0x08 \n\t" - "daddiu %[width], %[width], -0x10 \n\t" - "bgtz %[width], 1b \n\t" - : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [b0] "=&f"(ftmp[2]), - [g0] "=&f"(ftmp[3]), [r0] "=&f"(ftmp[4]), [dest0_u] "=&f"(ftmp[5]), - [dest1_u] "=&f"(ftmp[6]), [dest2_u] "=&f"(ftmp[7]), - [dest3_u] "=&f"(ftmp[8]), [dest0_v] "=&f"(ftmp[9]), - [dest1_v] "=&f"(ftmp[10]), [dest2_v] "=&f"(ftmp[11]), - [dest3_v] "=&f"(ftmp[12]) - : [src_rgb565] "r"(src_rgb565), [next_rgb565] "r"(src_stride_rgb565), - [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width), - [value] "f"(value), [c0] "f"(c0), [c1] "f"(c1), [c2] "f"(c2), - [mask] "f"(mask), [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), - [eight] "f"(0x08), [six] "f"(0x06), [five] "f"(0x05), [three] "f"(0x03), - [one] "f"(0x01) - : "memory"); -} - -void ARGB1555ToUVRow_MMI(const uint8_t* src_argb1555, - int src_stride_argb1555, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - uint64_t ftmp[11]; - uint64_t value = 0x2020202020202020; - uint64_t mask_u = 0x0026004a00700002; - uint64_t mask_v = 0x00020070005e0012; - uint64_t mask = 0x93; - uint64_t c0 = 0x001f001f001f001f; - uint64_t c1 = 0x00ff00ff00ff00ff; - uint64_t c2 = 0x0003000300030003; - uint64_t c3 = 0x007c007c007c007c; - __asm__ volatile( - "daddu %[next_argb1555], %[src_argb1555], %[next_argb1555] \n\t" - "1: \n\t" - "gsldrc1 %[src0], 0x00(%[src_argb1555]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_argb1555]) \n\t" - "gsldrc1 %[src1], 0x00(%[next_argb1555]) \n\t" - "gsldlc1 %[src1], 0x07(%[next_argb1555]) \n\t" - "psrlh %[dest0_u], %[src0], %[eight] \n\t" - "and %[b0], %[src0], %[c0] \n\t" - "and %[src0], %[src0], %[c1] \n\t" - "psrlh %[src0], %[src0], %[five] \n\t" - "and %[g0], %[dest0_u], %[c2] \n\t" - "psllh %[g0], %[g0], %[three] \n\t" - "or %[g0], %[src0], %[g0] \n\t" - "and %[r0], %[dest0_u], %[c3] \n\t" - "psrlh %[r0], %[r0], %[two] \n\t" - "psrlh %[src0], %[src1], %[eight] \n\t" - "and %[dest0_u], %[src1], %[c0] \n\t" - "and %[src1], %[src1], %[c1] \n\t" - "psrlh %[src1], %[src1], %[five] \n\t" - "and %[dest0_v], %[src0], %[c2] \n\t" - "psllh %[dest0_v], %[dest0_v], %[three] \n\t" - "or %[dest0_v], %[src1], %[dest0_v] \n\t" - "and %[src0], %[src0], %[c3] \n\t" - "psrlh %[src0], %[src0], %[two] \n\t" - "paddh %[b0], %[b0], %[dest0_u] \n\t" - "paddh %[g0], %[g0], %[dest0_v] \n\t" - "paddh %[r0], %[r0], %[src0] \n\t" - "punpcklhw %[src0], %[b0], %[r0] \n\t" - "punpckhhw %[src1], %[b0], %[r0] \n\t" - "punpcklwd %[dest0_u], %[src0], %[src1] \n\t" - "punpckhwd %[dest0_v], %[src0], %[src1] \n\t" - "paddh %[src0], %[dest0_u], %[dest0_v] \n\t" - "psrlh %[b0], %[src0], %[six] \n\t" - "psllh %[r0], %[src0], %[one] \n\t" - "or %[b0], %[b0], %[r0] \n\t" - "psrlh %[r0], %[g0], %[six] \n\t" - "psllh %[g0], %[g0], %[one] \n\t" - "or %[g0], %[g0], %[r0] \n\t" - "punpcklhw %[src0], %[g0], %[value] \n\t" - "punpckhhw %[src1], %[g0], %[value] \n\t" - "punpcklwd %[dest0_u], %[src0], %[src1] \n\t" - "punpckhwd %[dest0_v], %[src0], %[src1] \n\t" - "paddh %[g0], %[dest0_u], %[dest0_v] \n\t" - "punpcklhw %[src0], %[b0], %[g0] \n\t" - "punpckhhw %[src1], %[b0], %[g0] \n\t" - - "pmaddhw %[dest0_v], %[src0], %[mask_v] \n\t" - "pshufh %[dest0_u], %[src0], %[mask] \n\t" - "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t" - "pmaddhw %[g0], %[src1], %[mask_v] \n\t" - "pshufh %[b0], %[src1], %[mask] \n\t" - "pmaddhw %[b0], %[b0], %[mask_u] \n\t" - - "punpcklwd %[src0], %[dest0_u], %[b0] \n\t" - "punpckhwd %[src1], %[dest0_u], %[b0] \n\t" - "psubw %[dest0_u], %[src0], %[src1] \n\t" - "psraw %[dest0_u], %[dest0_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest0_v], %[g0] \n\t" - "punpckhwd %[src1], %[dest0_v], %[g0] \n\t" - "psubw %[dest0_v], %[src1], %[src0] \n\t" - "psraw %[dest0_v], %[dest0_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x08(%[src_argb1555]) \n\t" - "gsldlc1 %[src0], 0x0f(%[src_argb1555]) \n\t" - "gsldrc1 %[src1], 0x08(%[next_argb1555]) \n\t" - "gsldlc1 %[src1], 0x0f(%[next_argb1555]) \n\t" - "psrlh %[dest1_u], %[src0], %[eight] \n\t" - "and %[b0], %[src0], %[c0] \n\t" - "and %[src0], %[src0], %[c1] \n\t" - "psrlh %[src0], %[src0], %[five] \n\t" - "and %[g0], %[dest1_u], %[c2] \n\t" - "psllh %[g0], %[g0], %[three] \n\t" - "or %[g0], %[src0], %[g0] \n\t" - "and %[r0], %[dest1_u], %[c3] \n\t" - "psrlh %[r0], %[r0], %[two] \n\t" - "psrlh %[src0], %[src1], %[eight] \n\t" - "and %[dest1_u], %[src1], %[c0] \n\t" - "and %[src1], %[src1], %[c1] \n\t" - "psrlh %[src1], %[src1], %[five] \n\t" - "and %[dest1_v], %[src0], %[c2] \n\t" - "psllh %[dest1_v], %[dest1_v], %[three] \n\t" - "or %[dest1_v], %[src1], %[dest1_v] \n\t" - "and %[src0], %[src0], %[c3] \n\t" - "psrlh %[src0], %[src0], %[two] \n\t" - "paddh %[b0], %[b0], %[dest1_u] \n\t" - "paddh %[g0], %[g0], %[dest1_v] \n\t" - "paddh %[r0], %[r0], %[src0] \n\t" - "punpcklhw %[src0], %[b0], %[r0] \n\t" - "punpckhhw %[src1], %[b0], %[r0] \n\t" - "punpcklwd %[dest1_u], %[src0], %[src1] \n\t" - "punpckhwd %[dest1_v], %[src0], %[src1] \n\t" - "paddh %[src0], %[dest1_u], %[dest1_v] \n\t" - "psrlh %[b0], %[src0], %[six] \n\t" - "psllh %[r0], %[src0], %[one] \n\t" - "or %[b0], %[b0], %[r0] \n\t" - "psrlh %[r0], %[g0], %[six] \n\t" - "psllh %[g0], %[g0], %[one] \n\t" - "or %[g0], %[g0], %[r0] \n\t" - "punpcklhw %[src0], %[g0], %[value] \n\t" - "punpckhhw %[src1], %[g0], %[value] \n\t" - "punpcklwd %[dest1_u], %[src0], %[src1] \n\t" - "punpckhwd %[dest1_v], %[src0], %[src1] \n\t" - "paddh %[g0], %[dest1_u], %[dest1_v] \n\t" - "punpcklhw %[src0], %[b0], %[g0] \n\t" - "punpckhhw %[src1], %[b0], %[g0] \n\t" - - "pmaddhw %[dest1_v], %[src0], %[mask_v] \n\t" - "pshufh %[dest1_u], %[src0], %[mask] \n\t" - "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t" - "pmaddhw %[g0], %[src1], %[mask_v] \n\t" - "pshufh %[b0], %[src1], %[mask] \n\t" - "pmaddhw %[b0], %[b0], %[mask_u] \n\t" - - "punpcklwd %[src0], %[dest1_u], %[b0] \n\t" - "punpckhwd %[src1], %[dest1_u], %[b0] \n\t" - "psubw %[dest1_u], %[src0], %[src1] \n\t" - "psraw %[dest1_u], %[dest1_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest1_v], %[g0] \n\t" - "punpckhwd %[src1], %[dest1_v], %[g0] \n\t" - "psubw %[dest1_v], %[src1], %[src0] \n\t" - "psraw %[dest1_v], %[dest1_v], %[eight] \n\t" - - "packsswh %[dest0_u], %[dest0_u], %[dest1_u] \n\t" - "packsswh %[dest1_u], %[dest0_v], %[dest1_v] \n\t" - - "gsldrc1 %[src0], 0x10(%[src_argb1555]) \n\t" - "gsldlc1 %[src0], 0x17(%[src_argb1555]) \n\t" - "gsldrc1 %[src1], 0x10(%[next_argb1555]) \n\t" - "gsldlc1 %[src1], 0x17(%[next_argb1555]) \n\t" - "psrlh %[dest2_u], %[src0], %[eight] \n\t" - "and %[b0], %[src0], %[c0] \n\t" - "and %[src0], %[src0], %[c1] \n\t" - "psrlh %[src0], %[src0], %[five] \n\t" - "and %[g0], %[dest2_u], %[c2] \n\t" - "psllh %[g0], %[g0], %[three] \n\t" - "or %[g0], %[src0], %[g0] \n\t" - "and %[r0], %[dest2_u], %[c3] \n\t" - "psrlh %[r0], %[r0], %[two] \n\t" - "psrlh %[src0], %[src1], %[eight] \n\t" - "and %[dest2_u], %[src1], %[c0] \n\t" - "and %[src1], %[src1], %[c1] \n\t" - "psrlh %[src1], %[src1], %[five] \n\t" - "and %[dest0_v], %[src0], %[c2] \n\t" - "psllh %[dest0_v], %[dest0_v], %[three] \n\t" - "or %[dest0_v], %[src1], %[dest0_v] \n\t" - "and %[src0], %[src0], %[c3] \n\t" - "psrlh %[src0], %[src0], %[two] \n\t" - "paddh %[b0], %[b0], %[dest2_u] \n\t" - "paddh %[g0], %[g0], %[dest0_v] \n\t" - "paddh %[r0], %[r0], %[src0] \n\t" - "punpcklhw %[src0], %[b0], %[r0] \n\t" - "punpckhhw %[src1], %[b0], %[r0] \n\t" - "punpcklwd %[dest2_u], %[src0], %[src1] \n\t" - "punpckhwd %[dest0_v], %[src0], %[src1] \n\t" - "paddh %[src0], %[dest2_u], %[dest0_v] \n\t" - "psrlh %[b0], %[src0], %[six] \n\t" - "psllh %[r0], %[src0], %[one] \n\t" - "or %[b0], %[b0], %[r0] \n\t" - "psrlh %[r0], %[g0], %[six] \n\t" - "psllh %[g0], %[g0], %[one] \n\t" - "or %[g0], %[g0], %[r0] \n\t" - "punpcklhw %[src0], %[g0], %[value] \n\t" - "punpckhhw %[src1], %[g0], %[value] \n\t" - "punpcklwd %[dest2_u], %[src0], %[src1] \n\t" - "punpckhwd %[dest0_v], %[src0], %[src1] \n\t" - "paddh %[g0], %[dest2_u], %[dest0_v] \n\t" - "punpcklhw %[src0], %[b0], %[g0] \n\t" - "punpckhhw %[src1], %[b0], %[g0] \n\t" - - "pmaddhw %[dest0_v], %[src0], %[mask_v] \n\t" - "pshufh %[dest2_u], %[src0], %[mask] \n\t" - "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t" - "pmaddhw %[g0], %[src1], %[mask_v] \n\t" - "pshufh %[b0], %[src1], %[mask] \n\t" - "pmaddhw %[b0], %[b0], %[mask_u] \n\t" - - "punpcklwd %[src0], %[dest2_u], %[b0] \n\t" - "punpckhwd %[src1], %[dest2_u], %[b0] \n\t" - "psubw %[dest2_u], %[src0], %[src1] \n\t" - "psraw %[dest2_u], %[dest2_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest0_v], %[g0] \n\t" - "punpckhwd %[src1], %[dest0_v], %[g0] \n\t" - "psubw %[dest0_v], %[src1], %[src0] \n\t" - "psraw %[dest0_v], %[dest0_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x18(%[src_argb1555]) \n\t" - "gsldlc1 %[src0], 0x1f(%[src_argb1555]) \n\t" - "gsldrc1 %[src1], 0x18(%[next_argb1555]) \n\t" - "gsldlc1 %[src1], 0x1f(%[next_argb1555]) \n\t" - "psrlh %[dest3_u], %[src0], %[eight] \n\t" - "and %[b0], %[src0], %[c0] \n\t" - "and %[src0], %[src0], %[c1] \n\t" - "psrlh %[src0], %[src0], %[five] \n\t" - "and %[g0], %[dest3_u], %[c2] \n\t" - "psllh %[g0], %[g0], %[three] \n\t" - "or %[g0], %[src0], %[g0] \n\t" - "and %[r0], %[dest3_u], %[c3] \n\t" - "psrlh %[r0], %[r0], %[two] \n\t" - "psrlh %[src0], %[src1], %[eight] \n\t" - "and %[dest3_u], %[src1], %[c0] \n\t" - "and %[src1], %[src1], %[c1] \n\t" - "psrlh %[src1], %[src1], %[five] \n\t" - "and %[dest1_v], %[src0], %[c2] \n\t" - "psllh %[dest1_v], %[dest1_v], %[three] \n\t" - "or %[dest1_v], %[src1], %[dest1_v] \n\t" - "and %[src0], %[src0], %[c3] \n\t" - "psrlh %[src0], %[src0], %[two] \n\t" - "paddh %[b0], %[b0], %[dest3_u] \n\t" - "paddh %[g0], %[g0], %[dest1_v] \n\t" - "paddh %[r0], %[r0], %[src0] \n\t" - "punpcklhw %[src0], %[b0], %[r0] \n\t" - "punpckhhw %[src1], %[b0], %[r0] \n\t" - "punpcklwd %[dest3_u], %[src0], %[src1] \n\t" - "punpckhwd %[dest1_v], %[src0], %[src1] \n\t" - "paddh %[src0], %[dest3_u], %[dest1_v] \n\t" - "psrlh %[b0], %[src0], %[six] \n\t" - "psllh %[r0], %[src0], %[one] \n\t" - "or %[b0], %[b0], %[r0] \n\t" - "psrlh %[r0], %[g0], %[six] \n\t" - "psllh %[g0], %[g0], %[one] \n\t" - "or %[g0], %[g0], %[r0] \n\t" - "punpcklhw %[src0], %[g0], %[value] \n\t" - "punpckhhw %[src1], %[g0], %[value] \n\t" - "punpcklwd %[dest3_u], %[src0], %[src1] \n\t" - "punpckhwd %[dest1_v], %[src0], %[src1] \n\t" - "paddh %[g0], %[dest3_u], %[dest1_v] \n\t" - "punpcklhw %[src0], %[b0], %[g0] \n\t" - "punpckhhw %[src1], %[b0], %[g0] \n\t" - - "pmaddhw %[dest1_v], %[src0], %[mask_v] \n\t" - "pshufh %[dest3_u], %[src0], %[mask] \n\t" - "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t" - "pmaddhw %[g0], %[src1], %[mask_v] \n\t" - "pshufh %[b0], %[src1], %[mask] \n\t" - "pmaddhw %[b0], %[b0], %[mask_u] \n\t" - - "punpcklwd %[src0], %[dest3_u], %[b0] \n\t" - "punpckhwd %[src1], %[dest3_u], %[b0] \n\t" - "psubw %[dest3_u], %[src0], %[src1] \n\t" - "psraw %[dest3_u], %[dest3_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest1_v], %[g0] \n\t" - "punpckhwd %[src1], %[dest1_v], %[g0] \n\t" - "psubw %[dest1_v], %[src1], %[src0] \n\t" - "psraw %[dest1_v], %[dest1_v], %[eight] \n\t" - - "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t" - "packushb %[dest0_u], %[dest0_u], %[src1] \n\t" - "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t" - "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t" - "packsswh %[src1], %[dest0_v], %[dest1_v] \n\t" - "packushb %[dest0_v], %[dest1_u], %[src1] \n\t" - "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t" - "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t" - - "daddiu %[src_argb1555], %[src_argb1555], 0x20 \n\t" - "daddiu %[next_argb1555], %[next_argb1555], 0x20 \n\t" - "daddiu %[dst_u], %[dst_u], 0x08 \n\t" - "daddiu %[dst_v], %[dst_v], 0x08 \n\t" - "daddiu %[width], %[width], -0x10 \n\t" - "bgtz %[width], 1b \n\t" - : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [b0] "=&f"(ftmp[2]), - [g0] "=&f"(ftmp[3]), [r0] "=&f"(ftmp[4]), [dest0_u] "=&f"(ftmp[5]), - [dest1_u] "=&f"(ftmp[6]), [dest2_u] "=&f"(ftmp[7]), - [dest3_u] "=&f"(ftmp[8]), [dest0_v] "=&f"(ftmp[9]), - [dest1_v] "=&f"(ftmp[10]) - : [src_argb1555] "r"(src_argb1555), - [next_argb1555] "r"(src_stride_argb1555), [dst_u] "r"(dst_u), - [dst_v] "r"(dst_v), [width] "r"(width), [value] "f"(value), - [c0] "f"(c0), [c1] "f"(c1), [c2] "f"(c2), [c3] "f"(c3), - [mask] "f"(mask), [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), - [eight] "f"(0x08), [six] "f"(0x06), [five] "f"(0x05), [three] "f"(0x03), - [two] "f"(0x02), [one] "f"(0x01) - : "memory"); -} - -void ARGB4444ToUVRow_MMI(const uint8_t* src_argb4444, - int src_stride_argb4444, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - uint64_t ftmp[13]; - uint64_t value = 0x2020202020202020; - uint64_t mask_u = 0x0026004a00700002; - uint64_t mask_v = 0x00020070005e0012; - uint64_t mask = 0x93; - uint64_t c0 = 0x000f000f000f000f; - uint64_t c1 = 0x00ff00ff00ff00ff; - __asm__ volatile( - "daddu %[next_argb4444], %[src_argb4444], %[next_argb4444] \n\t" - "1: \n\t" - "gsldrc1 %[src0], 0x00(%[src_argb4444]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_argb4444]) \n\t" - "gsldrc1 %[src1], 0x00(%[next_argb4444]) \n\t" - "gsldlc1 %[src1], 0x07(%[next_argb4444]) \n\t" - "psrlh %[dest0_u], %[src0], %[eight] \n\t" - "and %[b0], %[src0], %[c0] \n\t" - "and %[src0], %[src0], %[c1] \n\t" - "psrlh %[g0], %[src0], %[four] \n\t" - "and %[r0], %[dest0_u], %[c0] \n\t" - "psrlh %[src0], %[src1], %[eight] \n\t" - "and %[dest0_u], %[src1], %[c0] \n\t" - "and %[src1], %[src1], %[c1] \n\t" - "psrlh %[dest0_v], %[src1], %[four] \n\t" - "and %[src0], %[src0], %[c0] \n\t" - "paddh %[b0], %[b0], %[dest0_u] \n\t" - "paddh %[g0], %[g0], %[dest0_v] \n\t" - "paddh %[r0], %[r0], %[src0] \n\t" - "punpcklhw %[src0], %[b0], %[r0] \n\t" - "punpckhhw %[src1], %[b0], %[r0] \n\t" - "punpcklwd %[dest0_u], %[src0], %[src1] \n\t" - "punpckhwd %[dest0_v], %[src0], %[src1] \n\t" - "paddh %[src0], %[dest0_u], %[dest0_v] \n\t" - "psrlh %[b0], %[src0], %[four] \n\t" - "psllh %[r0], %[src0], %[two] \n\t" - "or %[b0], %[b0], %[r0] \n\t" - "psrlh %[r0], %[g0], %[four] \n\t" - "psllh %[g0], %[g0], %[two] \n\t" - "or %[g0], %[g0], %[r0] \n\t" - "punpcklhw %[src0], %[g0], %[value] \n\t" - "punpckhhw %[src1], %[g0], %[value] \n\t" - "punpcklwd %[dest0_u], %[src0], %[src1] \n\t" - "punpckhwd %[dest0_v], %[src0], %[src1] \n\t" - "paddh %[g0], %[dest0_u], %[dest0_v] \n\t" - "punpcklhw %[src0], %[b0], %[g0] \n\t" - "punpckhhw %[src1], %[b0], %[g0] \n\t" - - "pmaddhw %[dest0_v], %[src0], %[mask_v] \n\t" - "pshufh %[dest0_u], %[src0], %[mask] \n\t" - "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t" - "pmaddhw %[g0], %[src1], %[mask_v] \n\t" - "pshufh %[b0], %[src1], %[mask] \n\t" - "pmaddhw %[b0], %[b0], %[mask_u] \n\t" - - "punpcklwd %[src0], %[dest0_u], %[b0] \n\t" - "punpckhwd %[src1], %[dest0_u], %[b0] \n\t" - "psubw %[dest0_u], %[src0], %[src1] \n\t" - "psraw %[dest0_u], %[dest0_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest0_v], %[g0] \n\t" - "punpckhwd %[src1], %[dest0_v], %[g0] \n\t" - "psubw %[dest0_v], %[src1], %[src0] \n\t" - "psraw %[dest0_v], %[dest0_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x08(%[src_argb4444]) \n\t" - "gsldlc1 %[src0], 0x0f(%[src_argb4444]) \n\t" - "gsldrc1 %[src1], 0x08(%[next_argb4444]) \n\t" - "gsldlc1 %[src1], 0x0f(%[next_argb4444]) \n\t" - "psrlh %[dest1_u], %[src0], %[eight] \n\t" - "and %[b0], %[src0], %[c0] \n\t" - "and %[src0], %[src0], %[c1] \n\t" - "psrlh %[g0], %[src0], %[four] \n\t" - "and %[r0], %[dest1_u], %[c0] \n\t" - "psrlh %[src0], %[src1], %[eight] \n\t" - "and %[dest1_u], %[src1], %[c0] \n\t" - "and %[src1], %[src1], %[c1] \n\t" - "psrlh %[dest1_v], %[src1], %[four] \n\t" - "and %[src0], %[src0], %[c0] \n\t" - "paddh %[b0], %[b0], %[dest1_u] \n\t" - "paddh %[g0], %[g0], %[dest1_v] \n\t" - "paddh %[r0], %[r0], %[src0] \n\t" - "punpcklhw %[src0], %[b0], %[r0] \n\t" - "punpckhhw %[src1], %[b0], %[r0] \n\t" - "punpcklwd %[dest1_u], %[src0], %[src1] \n\t" - "punpckhwd %[dest1_v], %[src0], %[src1] \n\t" - "paddh %[src0], %[dest1_u], %[dest1_v] \n\t" - "psrlh %[b0], %[src0], %[four] \n\t" - "psllh %[r0], %[src0], %[two] \n\t" - "or %[b0], %[b0], %[r0] \n\t" - "psrlh %[r0], %[g0], %[four] \n\t" - "psllh %[g0], %[g0], %[two] \n\t" - "or %[g0], %[g0], %[r0] \n\t" - "punpcklhw %[src0], %[g0], %[value] \n\t" - "punpckhhw %[src1], %[g0], %[value] \n\t" - "punpcklwd %[dest1_u], %[src0], %[src1] \n\t" - "punpckhwd %[dest1_v], %[src0], %[src1] \n\t" - "paddh %[g0], %[dest1_u], %[dest1_v] \n\t" - "punpcklhw %[src0], %[b0], %[g0] \n\t" - "punpckhhw %[src1], %[b0], %[g0] \n\t" - - "pmaddhw %[dest1_v], %[src0], %[mask_v] \n\t" - "pshufh %[dest1_u], %[src0], %[mask] \n\t" - "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t" - "pmaddhw %[g0], %[src1], %[mask_v] \n\t" - "pshufh %[b0], %[src1], %[mask] \n\t" - "pmaddhw %[b0], %[b0], %[mask_u] \n\t" - - "punpcklwd %[src0], %[dest1_u], %[b0] \n\t" - "punpckhwd %[src1], %[dest1_u], %[b0] \n\t" - "psubw %[dest1_u], %[src0], %[src1] \n\t" - "psraw %[dest1_u], %[dest1_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest1_v], %[g0] \n\t" - "punpckhwd %[src1], %[dest1_v], %[g0] \n\t" - "psubw %[dest1_v], %[src1], %[src0] \n\t" - "psraw %[dest1_v], %[dest1_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x10(%[src_argb4444]) \n\t" - "gsldlc1 %[src0], 0x17(%[src_argb4444]) \n\t" - "gsldrc1 %[src1], 0x10(%[next_argb4444]) \n\t" - "gsldlc1 %[src1], 0x17(%[next_argb4444]) \n\t" - "psrlh %[dest2_u], %[src0], %[eight] \n\t" - "and %[b0], %[src0], %[c0] \n\t" - "and %[src0], %[src0], %[c1] \n\t" - "psrlh %[g0], %[src0], %[four] \n\t" - "and %[r0], %[dest2_u], %[c0] \n\t" - "psrlh %[src0], %[src1], %[eight] \n\t" - "and %[dest2_u], %[src1], %[c0] \n\t" - "and %[src1], %[src1], %[c1] \n\t" - "psrlh %[dest2_v], %[src1], %[four] \n\t" - "and %[src0], %[src0], %[c0] \n\t" - "paddh %[b0], %[b0], %[dest2_u] \n\t" - "paddh %[g0], %[g0], %[dest2_v] \n\t" - "paddh %[r0], %[r0], %[src0] \n\t" - "punpcklhw %[src0], %[b0], %[r0] \n\t" - "punpckhhw %[src1], %[b0], %[r0] \n\t" - "punpcklwd %[dest2_u], %[src0], %[src1] \n\t" - "punpckhwd %[dest2_v], %[src0], %[src1] \n\t" - "paddh %[src0], %[dest2_u], %[dest2_v] \n\t" - "psrlh %[b0], %[src0], %[four] \n\t" - "psllh %[r0], %[src0], %[two] \n\t" - "or %[b0], %[b0], %[r0] \n\t" - "psrlh %[r0], %[g0], %[four] \n\t" - "psllh %[g0], %[g0], %[two] \n\t" - "or %[g0], %[g0], %[r0] \n\t" - "punpcklhw %[src0], %[g0], %[value] \n\t" - "punpckhhw %[src1], %[g0], %[value] \n\t" - "punpcklwd %[dest2_u], %[src0], %[src1] \n\t" - "punpckhwd %[dest2_v], %[src0], %[src1] \n\t" - "paddh %[g0], %[dest2_u], %[dest2_v] \n\t" - "punpcklhw %[src0], %[b0], %[g0] \n\t" - "punpckhhw %[src1], %[b0], %[g0] \n\t" - - "pmaddhw %[dest2_v], %[src0], %[mask_v] \n\t" - "pshufh %[dest2_u], %[src0], %[mask] \n\t" - "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t" - "pmaddhw %[g0], %[src1], %[mask_v] \n\t" - "pshufh %[b0], %[src1], %[mask] \n\t" - "pmaddhw %[b0], %[b0], %[mask_u] \n\t" - - "punpcklwd %[src0], %[dest2_u], %[b0] \n\t" - "punpckhwd %[src1], %[dest2_u], %[b0] \n\t" - "psubw %[dest2_u], %[src0], %[src1] \n\t" - "psraw %[dest2_u], %[dest2_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest2_v], %[g0] \n\t" - "punpckhwd %[src1], %[dest2_v], %[g0] \n\t" - "psubw %[dest2_v], %[src1], %[src0] \n\t" - "psraw %[dest2_v], %[dest2_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x18(%[src_argb4444]) \n\t" - "gsldlc1 %[src0], 0x1f(%[src_argb4444]) \n\t" - "gsldrc1 %[src1], 0x18(%[next_argb4444]) \n\t" - "gsldlc1 %[src1], 0x1f(%[next_argb4444]) \n\t" - "psrlh %[dest3_u], %[src0], %[eight] \n\t" - "and %[b0], %[src0], %[c0] \n\t" - "and %[src0], %[src0], %[c1] \n\t" - "psrlh %[g0], %[src0], %[four] \n\t" - "and %[r0], %[dest3_u], %[c0] \n\t" - "psrlh %[src0], %[src1], %[eight] \n\t" - "and %[dest3_u], %[src1], %[c0] \n\t" - "and %[src1], %[src1], %[c1] \n\t" - "psrlh %[dest3_v], %[src1], %[four] \n\t" - "and %[src0], %[src0], %[c0] \n\t" - "paddh %[b0], %[b0], %[dest3_u] \n\t" - "paddh %[g0], %[g0], %[dest3_v] \n\t" - "paddh %[r0], %[r0], %[src0] \n\t" - "punpcklhw %[src0], %[b0], %[r0] \n\t" - "punpckhhw %[src1], %[b0], %[r0] \n\t" - "punpcklwd %[dest3_u], %[src0], %[src1] \n\t" - "punpckhwd %[dest3_v], %[src0], %[src1] \n\t" - "paddh %[src0], %[dest3_u], %[dest3_v] \n\t" - "psrlh %[b0], %[src0], %[four] \n\t" - "psllh %[r0], %[src0], %[two] \n\t" - "or %[b0], %[b0], %[r0] \n\t" - "psrlh %[r0], %[g0], %[four] \n\t" - "psllh %[g0], %[g0], %[two] \n\t" - "or %[g0], %[g0], %[r0] \n\t" - "punpcklhw %[src0], %[g0], %[value] \n\t" - "punpckhhw %[src1], %[g0], %[value] \n\t" - "punpcklwd %[dest3_u], %[src0], %[src1] \n\t" - "punpckhwd %[dest3_v], %[src0], %[src1] \n\t" - "paddh %[g0], %[dest3_u], %[dest3_v] \n\t" - "punpcklhw %[src0], %[b0], %[g0] \n\t" - "punpckhhw %[src1], %[b0], %[g0] \n\t" - - "pmaddhw %[dest3_v], %[src0], %[mask_v] \n\t" - "pshufh %[dest3_u], %[src0], %[mask] \n\t" - "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t" - "pmaddhw %[g0], %[src1], %[mask_v] \n\t" - "pshufh %[b0], %[src1], %[mask] \n\t" - "pmaddhw %[b0], %[b0], %[mask_u] \n\t" - - "punpcklwd %[src0], %[dest3_u], %[b0] \n\t" - "punpckhwd %[src1], %[dest3_u], %[b0] \n\t" - "psubw %[dest3_u], %[src0], %[src1] \n\t" - "psraw %[dest3_u], %[dest3_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest3_v], %[g0] \n\t" - "punpckhwd %[src1], %[dest3_v], %[g0] \n\t" - "psubw %[dest3_v], %[src1], %[src0] \n\t" - "psraw %[dest3_v], %[dest3_v], %[eight] \n\t" - - "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t" - "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t" - "packushb %[dest0_u], %[src0], %[src1] \n\t" - "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t" - "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t" - "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t" - "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t" - "packushb %[dest0_v], %[src0], %[src1] \n\t" - "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t" - "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t" - - "daddiu %[src_argb4444], %[src_argb4444], 0x20 \n\t" - "daddiu %[next_argb4444], %[next_argb4444], 0x20 \n\t" - "daddiu %[dst_u], %[dst_u], 0x08 \n\t" - "daddiu %[dst_v], %[dst_v], 0x08 \n\t" - "daddiu %[width], %[width], -0x10 \n\t" - "bgtz %[width], 1b \n\t" - : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [b0] "=&f"(ftmp[2]), - [g0] "=&f"(ftmp[3]), [r0] "=&f"(ftmp[4]), [dest0_u] "=&f"(ftmp[5]), - [dest1_u] "=&f"(ftmp[6]), [dest2_u] "=&f"(ftmp[7]), - [dest3_u] "=&f"(ftmp[8]), [dest0_v] "=&f"(ftmp[9]), - [dest1_v] "=&f"(ftmp[10]), [dest2_v] "=&f"(ftmp[11]), - [dest3_v] "=&f"(ftmp[12]) - : [src_argb4444] "r"(src_argb4444), - [next_argb4444] "r"(src_stride_argb4444), [dst_u] "r"(dst_u), - [dst_v] "r"(dst_v), [width] "r"(width), [value] "f"(value), - [c0] "f"(c0), [c1] "f"(c1), [mask] "f"(mask), [mask_u] "f"(mask_u), - [mask_v] "f"(mask_v), [eight] "f"(0x08), [four] "f"(0x04), - [two] "f"(0x02) - : "memory"); -} - -void ARGBToUV444Row_MMI(const uint8_t* src_argb, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - uint64_t ftmp[12]; - const uint64_t value = 0x4040; - const uint64_t mask_u = 0x0026004a00700002; - const uint64_t mask_v = 0x00020070005e0012; - - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[src0], 0x00(%[src_argb]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_argb]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "dsll %[dest0_u], %[src_lo], %[sixteen] \n\t" - "pinsrh_0 %[dest0_u], %[dest0_u], %[value] \n\t" - "pinsrh_3 %[dest0_v], %[src_lo], %[value] \n\t" - "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t" - "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t" - - "dsll %[src_lo], %[src_hi], %[sixteen] \n\t" - "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t" - "psubw %[dest0_u], %[src0], %[src1] \n\t" - "psraw %[dest0_u], %[dest0_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t" - "psubw %[dest0_v], %[src1], %[src0] \n\t" - "psraw %[dest0_v], %[dest0_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x08(%[src_argb]) \n\t" - "gsldlc1 %[src0], 0x0f(%[src_argb]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "dsll %[dest1_u], %[src_lo], %[sixteen] \n\t" - "pinsrh_0 %[dest1_u], %[dest1_u], %[value] \n\t" - "pinsrh_3 %[dest1_v], %[src_lo], %[value] \n\t" - "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t" - "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t" - "dsll %[src_lo], %[src_hi], %[sixteen] \n\t" - "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t" - "psubw %[dest1_u], %[src0], %[src1] \n\t" - "psraw %[dest1_u], %[dest1_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t" - "psubw %[dest1_v], %[src1], %[src0] \n\t" - "psraw %[dest1_v], %[dest1_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x10(%[src_argb]) \n\t" - "gsldlc1 %[src0], 0x17(%[src_argb]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "dsll %[dest2_u], %[src_lo], %[sixteen] \n\t" - "pinsrh_0 %[dest2_u], %[dest2_u], %[value] \n\t" - "pinsrh_3 %[dest2_v], %[src_lo], %[value] \n\t" - "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t" - "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t" - "dsll %[src_lo], %[src_hi], %[sixteen] \n\t" - "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t" - "psubw %[dest2_u], %[src0], %[src1] \n\t" - "psraw %[dest2_u], %[dest2_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t" - "psubw %[dest2_v], %[src1], %[src0] \n\t" - "psraw %[dest2_v], %[dest2_v], %[eight] \n\t" - - "gsldrc1 %[src0], 0x18(%[src_argb]) \n\t" - "gsldlc1 %[src0], 0x1f(%[src_argb]) \n\t" - "punpcklbh %[src_lo], %[src0], %[zero] \n\t" - "punpckhbh %[src_hi], %[src0], %[zero] \n\t" - "dsll %[dest3_u], %[src_lo], %[sixteen] \n\t" - "pinsrh_0 %[dest3_u], %[dest3_u], %[value] \n\t" - "pinsrh_3 %[dest3_v], %[src_lo], %[value] \n\t" - "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t" - "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t" - "dsll %[src_lo], %[src_hi], %[sixteen] \n\t" - "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t" - "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t" - "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t" - - "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t" - "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t" - "psubw %[dest3_u], %[src0], %[src1] \n\t" - "psraw %[dest3_u], %[dest3_u], %[eight] \n\t" - "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t" - "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t" - "psubw %[dest3_v], %[src1], %[src0] \n\t" - "psraw %[dest3_v], %[dest3_v], %[eight] \n\t" - - "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t" - "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t" - "packushb %[dest0_u], %[src0], %[src1] \n\t" - "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t" - "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t" - - "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t" - "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t" - "packushb %[dest0_v], %[src0], %[src1] \n\t" - "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t" - "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t" - - "daddiu %[src_argb], %[src_argb], 0x20 \n\t" - "daddiu %[dst_u], %[dst_u], 0x08 \n\t" - "daddiu %[dst_v], %[dst_v], 0x08 \n\t" - "daddi %[width], %[width], -0x08 \n\t" - "bgtz %[width], 1b \n\t" - : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), - [src_hi] "=&f"(ftmp[3]), [dest0_u] "=&f"(ftmp[4]), - [dest0_v] "=&f"(ftmp[5]), [dest1_u] "=&f"(ftmp[6]), - [dest1_v] "=&f"(ftmp[7]), [dest2_u] "=&f"(ftmp[8]), - [dest2_v] "=&f"(ftmp[9]), [dest3_u] "=&f"(ftmp[10]), - [dest3_v] "=&f"(ftmp[11]) - : [src_argb] "r"(src_argb), [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), - [width] "r"(width), [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), - [value] "f"(value), [zero] "f"(0x00), [sixteen] "f"(0x10), - [eight] "f"(0x08) - : "memory"); -} - -void ARGBGrayRow_MMI(const uint8_t* src_argb, uint8_t* dst_argb, int width) { - uint64_t src, src_lo, src_hi, src37, dest, dest_lo, dest_hi; - uint64_t tmp0, tmp1; - const uint64_t mask0 = 0x0; - const uint64_t mask1 = 0x01; - const uint64_t mask2 = 0x0080004D0096001DULL; - const uint64_t mask3 = 0xFF000000FF000000ULL; - const uint64_t mask4 = ~mask3; - const uint64_t shift = 0x08; - - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t" - "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t" - - "and %[src37], %[src], %[mask3] \n\t" - - "punpcklbh %[src_lo], %[src], %[mask0] \n\t" - "pinsrh_3 %[src_lo], %[src_lo], %[mask1] \n\t" - "pmaddhw %[dest_lo], %[src_lo], %[mask2] \n\t" - "punpcklwd %[tmp0], %[dest_lo], %[dest_lo] \n\t" - "punpckhwd %[tmp1], %[dest_lo], %[dest_lo] \n\t" - "paddw %[dest_lo], %[tmp0], %[tmp1] \n\t" - "psrlw %[dest_lo], %[dest_lo], %[shift] \n\t" - "packsswh %[dest_lo], %[dest_lo], %[dest_lo] \n\t" - - "punpckhbh %[src_hi], %[src], %[mask0] \n\t" - "pinsrh_3 %[src_hi], %[src_hi], %[mask1] \n\t" - "pmaddhw %[dest_hi], %[src_hi], %[mask2] \n\t" - "punpcklwd %[tmp0], %[dest_hi], %[dest_hi] \n\t" - "punpckhwd %[tmp1], %[dest_hi], %[dest_hi] \n\t" - "paddw %[dest_hi], %[tmp0], %[tmp1] \n\t" - "psrlw %[dest_hi], %[dest_hi], %[shift] \n\t" - "packsswh %[dest_hi], %[dest_hi], %[dest_hi] \n\t" - - "packushb %[dest], %[dest_lo], %[dest_hi] \n\t" - "and %[dest], %[dest], %[mask4] \n\t" - "or %[dest], %[dest], %[src37] \n\t" - - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x02 \n\t" - "bnez %[width], 1b \n\t" - : [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo), - [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo), [tmp0] "=&f"(tmp0), - [tmp1] "=&f"(tmp1), [src] "=&f"(src), [dest] "=&f"(dest), - [src37] "=&f"(src37) - : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_argb), [width] "r"(width), - [shift] "f"(shift), [mask0] "f"(mask0), [mask1] "f"(mask1), - [mask2] "f"(mask2), [mask3] "f"(mask3), [mask4] "f"(mask4) - : "memory"); -} - -// Convert a row of image to Sepia tone. -void ARGBSepiaRow_MMI(uint8_t* dst_argb, int width) { - uint64_t dest, dest_lo, dest_hi, dest37, dest0, dest1, dest2; - uint64_t tmp0, tmp1; - const uint64_t mask0 = 0x0; - const uint64_t mask1 = 0x002300440011ULL; - const uint64_t mask2 = 0x002D00580016ULL; - const uint64_t mask3 = 0x003200620018ULL; - const uint64_t mask4 = 0xFF000000FF000000ULL; - const uint64_t shift = 0x07; - - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gsldrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "and %[dest37], %[dest], %[mask4] \n\t" - - "punpcklbh %[dest_lo], %[dest], %[mask0] \n\t" - "pmaddhw %[dest0], %[dest_lo], %[mask1] \n\t" - "pmaddhw %[dest1], %[dest_lo], %[mask2] \n\t" - "pmaddhw %[dest2], %[dest_lo], %[mask3] \n\t" - "punpcklwd %[tmp0], %[dest0], %[dest1] \n\t" - "punpckhwd %[tmp1], %[dest0], %[dest1] \n\t" - "paddw %[dest0], %[tmp0], %[tmp1] \n\t" - "psrlw %[dest0], %[dest0], %[shift] \n\t" - "punpcklwd %[tmp0], %[dest2], %[mask0] \n\t" - "punpckhwd %[tmp1], %[dest2], %[mask0] \n\t" - "paddw %[dest1], %[tmp0], %[tmp1] \n\t" - "psrlw %[dest1], %[dest1], %[shift] \n\t" - "packsswh %[dest_lo], %[dest0], %[dest1] \n\t" - - "punpckhbh %[dest_hi], %[dest], %[mask0] \n\t" - "pmaddhw %[dest0], %[dest_hi], %[mask1] \n\t" - "pmaddhw %[dest1], %[dest_hi], %[mask2] \n\t" - "pmaddhw %[dest2], %[dest_hi], %[mask3] \n\t" - "punpcklwd %[tmp0], %[dest0], %[dest1] \n\t" - "punpckhwd %[tmp1], %[dest0], %[dest1] \n\t" - "paddw %[dest0], %[tmp0], %[tmp1] \n\t" - "psrlw %[dest0], %[dest0], %[shift] \n\t" - "punpcklwd %[tmp0], %[dest2], %[mask0] \n\t" - "punpckhwd %[tmp1], %[dest2], %[mask0] \n\t" - "paddw %[dest1], %[tmp0], %[tmp1] \n\t" - "psrlw %[dest1], %[dest1], %[shift] \n\t" - "packsswh %[dest_hi], %[dest0], %[dest1] \n\t" - - "packushb %[dest], %[dest_lo], %[dest_hi] \n\t" - "or %[dest], %[dest], %[dest37] \n\t" - - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x02 \n\t" - "bnez %[width], 1b \n\t" - : [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo), - [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2), - [dest37] "=&f"(dest37), [tmp0] "=&f"(tmp0), [tmp1] "=&f"(tmp1), - [dest] "=&f"(dest) - : [dst_ptr] "r"(dst_argb), [width] "r"(width), [mask0] "f"(mask0), - [mask1] "f"(mask1), [mask2] "f"(mask2), [mask3] "f"(mask3), - [mask4] "f"(mask4), [shift] "f"(shift) - : "memory"); -} - -// Apply color matrix to a row of image. Matrix is signed. -// TODO(fbarchard): Consider adding rounding (+32). -void ARGBColorMatrixRow_MMI(const uint8_t* src_argb, - uint8_t* dst_argb, - const int8_t* matrix_argb, - int width) { - uint64_t src, src_hi, src_lo, dest, dest_lo, dest_hi, dest0, dest1, dest2, - dest3; - uint64_t matrix, matrix_hi, matrix_lo; - uint64_t tmp0, tmp1; - const uint64_t shift0 = 0x06; - const uint64_t shift1 = 0x08; - const uint64_t mask0 = 0x0; - const uint64_t mask1 = 0x08; - - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t" - "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t" - - "punpcklbh %[src_lo], %[src], %[mask0] \n\t" - - "gsldlc1 %[matrix], 0x07(%[matrix_ptr]) \n\t" - "gsldrc1 %[matrix], 0x00(%[matrix_ptr]) \n\t" - "punpcklbh %[matrix_lo], %[matrix], %[mask0] \n\t" - "psllh %[matrix_lo], %[matrix_lo], %[shift1] \n\t" - "psrah %[matrix_lo], %[matrix_lo], %[shift1] \n\t" - "punpckhbh %[matrix_hi], %[matrix], %[mask0] \n\t" - "psllh %[matrix_hi], %[matrix_hi], %[shift1] \n\t" - "psrah %[matrix_hi], %[matrix_hi], %[shift1] \n\t" - "pmaddhw %[dest_lo], %[src_lo], %[matrix_lo] \n\t" - "pmaddhw %[dest_hi], %[src_lo], %[matrix_hi] \n\t" - "punpcklwd %[tmp0], %[dest_lo], %[dest_hi] \n\t" - "punpckhwd %[tmp1], %[dest_lo], %[dest_hi] \n\t" - "paddw %[dest0], %[tmp0], %[tmp1] \n\t" - "psraw %[dest0], %[dest0], %[shift0] \n\t" - - "gsldlc1 %[matrix], 0x0f(%[matrix_ptr]) \n\t" - "gsldrc1 %[matrix], 0x08(%[matrix_ptr]) \n\t" - "punpcklbh %[matrix_lo], %[matrix], %[mask0] \n\t" - "psllh %[matrix_lo], %[matrix_lo], %[shift1] \n\t" - "psrah %[matrix_lo], %[matrix_lo], %[shift1] \n\t" - "punpckhbh %[matrix_hi], %[matrix], %[mask0] \n\t" - "psllh %[matrix_hi], %[matrix_hi], %[shift1] \n\t" - "psrah %[matrix_hi], %[matrix_hi], %[shift1] \n\t" - "pmaddhw %[dest_lo], %[src_lo], %[matrix_lo] \n\t" - "pmaddhw %[dest_hi], %[src_lo], %[matrix_hi] \n\t" - "punpcklwd %[tmp0], %[dest_lo], %[dest_hi] \n\t" - "punpckhwd %[tmp1], %[dest_lo], %[dest_hi] \n\t" - "paddw %[dest1], %[tmp0], %[tmp1] \n\t" - "psraw %[dest1], %[dest1], %[shift0] \n\t" - - "punpckhbh %[src_hi], %[src], %[mask0] \n\t" - - "gsldlc1 %[matrix], 0x07(%[matrix_ptr]) \n\t" - "gsldrc1 %[matrix], 0x00(%[matrix_ptr]) \n\t" - "punpcklbh %[matrix_lo], %[matrix], %[mask0] \n\t" - "psllh %[matrix_lo], %[matrix_lo], %[shift1] \n\t" - "psrah %[matrix_lo], %[matrix_lo], %[shift1] \n\t" - "punpckhbh %[matrix_hi], %[matrix], %[mask0] \n\t" - "psllh %[matrix_hi], %[matrix_hi], %[shift1] \n\t" - "psrah %[matrix_hi], %[matrix_hi], %[shift1] \n\t" - "pmaddhw %[dest_lo], %[src_hi], %[matrix_lo] \n\t" - "pmaddhw %[dest_hi], %[src_hi], %[matrix_hi] \n\t" - "punpcklwd %[tmp0], %[dest_lo], %[dest_hi] \n\t" - "punpckhwd %[tmp1], %[dest_lo], %[dest_hi] \n\t" - "paddw %[dest2], %[tmp0], %[tmp1] \n\t" - "psraw %[dest2], %[dest2], %[shift0] \n\t" - - "gsldlc1 %[matrix], 0x0f(%[matrix_ptr]) \n\t" - "gsldrc1 %[matrix], 0x08(%[matrix_ptr]) \n\t" - "punpcklbh %[matrix_lo], %[matrix], %[mask0] \n\t" - "psllh %[matrix_lo], %[matrix_lo], %[shift1] \n\t" - "psrah %[matrix_lo], %[matrix_lo], %[shift1] \n\t" - "punpckhbh %[matrix_hi], %[matrix], %[mask0] \n\t" - "psllh %[matrix_hi], %[matrix_hi], %[shift1] \n\t" - "psrah %[matrix_hi], %[matrix_hi], %[shift1] \n\t" - "pmaddhw %[dest_lo], %[src_hi], %[matrix_lo] \n\t" - "pmaddhw %[dest_hi], %[src_hi], %[matrix_hi] \n\t" - "punpcklwd %[tmp0], %[dest_lo], %[dest_hi] \n\t" - "punpckhwd %[tmp1], %[dest_lo], %[dest_hi] \n\t" - "paddw %[dest3], %[tmp0], %[tmp1] \n\t" - "psraw %[dest3], %[dest3], %[shift0] \n\t" - - "packsswh %[tmp0], %[dest0], %[dest1] \n\t" - "packsswh %[tmp1], %[dest2], %[dest3] \n\t" - "packushb %[dest], %[tmp0], %[tmp1] \n\t" - - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x02 \n\t" - "bnez %[width], 1b \n\t" - : [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo), - [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo), - [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2), - [dest3] "=&f"(dest3), [src] "=&f"(src), [dest] "=&f"(dest), - [tmp0] "=&f"(tmp0), [tmp1] "=&f"(tmp1), [matrix_hi] "=&f"(matrix_hi), - [matrix_lo] "=&f"(matrix_lo), [matrix] "=&f"(matrix) - : [src_ptr] "r"(src_argb), [matrix_ptr] "r"(matrix_argb), - [dst_ptr] "r"(dst_argb), [width] "r"(width), [shift0] "f"(shift0), - [shift1] "f"(shift1), [mask0] "f"(mask0), [mask1] "f"(mask1) - : "memory"); -} - -void ARGBShadeRow_MMI(const uint8_t* src_argb, - uint8_t* dst_argb, - int width, - uint32_t value) { - uint64_t src, src_hi, src_lo, dest, dest_lo, dest_hi; - const uint64_t shift = 0x08; - - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t" - "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t" - "punpcklbh %[src_lo], %[src], %[src] \n\t" - "punpckhbh %[src_hi], %[src], %[src] \n\t" - - "punpcklbh %[value], %[value], %[value] \n\t" - - "pmulhuh %[dest_lo], %[src_lo], %[value] \n\t" - "psrlh %[dest_lo], %[dest_lo], %[shift] \n\t" - "pmulhuh %[dest_hi], %[src_hi], %[value] \n\t" - "psrlh %[dest_hi], %[dest_hi], %[shift] \n\t" - "packushb %[dest], %[dest_lo], %[dest_hi] \n\t" - - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x02 \n\t" - "bnez %[width], 1b \n\t" - : [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo), - [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo), [src] "=&f"(src), - [dest] "=&f"(dest) - : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_argb), [width] "r"(width), - [value] "f"(value), [shift] "f"(shift) - : "memory"); -} - -void ARGBMultiplyRow_MMI(const uint8_t* src_argb, - const uint8_t* src_argb1, - uint8_t* dst_argb, - int width) { - uint64_t src0, src0_hi, src0_lo, src1, src1_hi, src1_lo; - uint64_t dest, dest_lo, dest_hi; - const uint64_t mask = 0x0; - - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[src0], 0x07(%[src0_ptr]) \n\t" - "gsldrc1 %[src0], 0x00(%[src0_ptr]) \n\t" - "punpcklbh %[src0_lo], %[src0], %[src0] \n\t" - "punpckhbh %[src0_hi], %[src0], %[src0] \n\t" - - "gsldlc1 %[src1], 0x07(%[src1_ptr]) \n\t" - "gsldrc1 %[src1], 0x00(%[src1_ptr]) \n\t" - "punpcklbh %[src1_lo], %[src1], %[mask] \n\t" - "punpckhbh %[src1_hi], %[src1], %[mask] \n\t" - - "pmulhuh %[dest_lo], %[src0_lo], %[src1_lo] \n\t" - "pmulhuh %[dest_hi], %[src0_hi], %[src1_hi] \n\t" - "packushb %[dest], %[dest_lo], %[dest_hi] \n\t" - - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddiu %[src0_ptr], %[src0_ptr], 0x08 \n\t" - "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x02 \n\t" - "bnez %[width], 1b \n\t" - : [src0_hi] "=&f"(src0_hi), [src0_lo] "=&f"(src0_lo), - [src1_hi] "=&f"(src1_hi), [src1_lo] "=&f"(src1_lo), - [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo), [src0] "=&f"(src0), - [src1] "=&f"(src1), [dest] "=&f"(dest) - : [src0_ptr] "r"(src_argb), [src1_ptr] "r"(src_argb1), - [dst_ptr] "r"(dst_argb), [width] "r"(width), [mask] "f"(mask) - : "memory"); -} - -void ARGBAddRow_MMI(const uint8_t* src_argb, - const uint8_t* src_argb1, - uint8_t* dst_argb, - int width) { - uint64_t src0, src1, dest; - - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[src0], 0x07(%[src0_ptr]) \n\t" - "gsldrc1 %[src0], 0x00(%[src0_ptr]) \n\t" - "gsldlc1 %[src1], 0x07(%[src1_ptr]) \n\t" - "gsldrc1 %[src1], 0x00(%[src1_ptr]) \n\t" - "paddusb %[dest], %[src0], %[src1] \n\t" - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddiu %[src0_ptr], %[src0_ptr], 0x08 \n\t" - "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x02 \n\t" - "bnez %[width], 1b \n\t" - : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest) - : [src0_ptr] "r"(src_argb), [src1_ptr] "r"(src_argb1), - [dst_ptr] "r"(dst_argb), [width] "r"(width) - : "memory"); -} - -void ARGBSubtractRow_MMI(const uint8_t* src_argb, - const uint8_t* src_argb1, - uint8_t* dst_argb, - int width) { - uint64_t src0, src1, dest; - - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[src0], 0x07(%[src0_ptr]) \n\t" - "gsldrc1 %[src0], 0x00(%[src0_ptr]) \n\t" - "gsldlc1 %[src1], 0x07(%[src1_ptr]) \n\t" - "gsldrc1 %[src1], 0x00(%[src1_ptr]) \n\t" - "psubusb %[dest], %[src0], %[src1] \n\t" - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddiu %[src0_ptr], %[src0_ptr], 0x08 \n\t" - "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x02 \n\t" - "bnez %[width], 1b \n\t" - : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest) - : [src0_ptr] "r"(src_argb), [src1_ptr] "r"(src_argb1), - [dst_ptr] "r"(dst_argb), [width] "r"(width) - : "memory"); -} - -// Sobel functions which mimics SSSE3. -void SobelXRow_MMI(const uint8_t* src_y0, - const uint8_t* src_y1, - const uint8_t* src_y2, - uint8_t* dst_sobelx, - int width) { - uint64_t y00 = 0, y10 = 0, y20 = 0; - uint64_t y02 = 0, y12 = 0, y22 = 0; - uint64_t zero = 0x0; - uint64_t sobel = 0x0; - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[y00], 0x07(%[src_y0]) \n\t" // a=src_y0[i] - "gsldrc1 %[y00], 0x00(%[src_y0]) \n\t" - "gsldlc1 %[y02], 0x09(%[src_y0]) \n\t" // a_sub=src_y0[i+2] - "gsldrc1 %[y02], 0x02(%[src_y0]) \n\t" - - "gsldlc1 %[y10], 0x07(%[src_y1]) \n\t" // b=src_y1[i] - "gsldrc1 %[y10], 0x00(%[src_y1]) \n\t" - "gsldlc1 %[y12], 0x09(%[src_y1]) \n\t" // b_sub=src_y1[i+2] - "gsldrc1 %[y12], 0x02(%[src_y1]) \n\t" - - "gsldlc1 %[y20], 0x07(%[src_y2]) \n\t" // c=src_y2[i] - "gsldrc1 %[y20], 0x00(%[src_y2]) \n\t" - "gsldlc1 %[y22], 0x09(%[src_y2]) \n\t" // c_sub=src_y2[i+2] - "gsldrc1 %[y22], 0x02(%[src_y2]) \n\t" - - "punpcklbh %[y00], %[y00], %[zero] \n\t" - "punpcklbh %[y10], %[y10], %[zero] \n\t" - "punpcklbh %[y20], %[y20], %[zero] \n\t" - - "punpcklbh %[y02], %[y02], %[zero] \n\t" - "punpcklbh %[y12], %[y12], %[zero] \n\t" - "punpcklbh %[y22], %[y22], %[zero] \n\t" - - "paddh %[y00], %[y00], %[y10] \n\t" // a+b - "paddh %[y20], %[y20], %[y10] \n\t" // c+b - "paddh %[y00], %[y00], %[y20] \n\t" // a+2b+c - - "paddh %[y02], %[y02], %[y12] \n\t" // a_sub+b_sub - "paddh %[y22], %[y22], %[y12] \n\t" // c_sub+b_sub - "paddh %[y02], %[y02], %[y22] \n\t" // a_sub+2b_sub+c_sub - - "pmaxsh %[y10], %[y00], %[y02] \n\t" - "pminsh %[y20], %[y00], %[y02] \n\t" - "psubh %[sobel], %[y10], %[y20] \n\t" // Abs - - "gsldlc1 %[y00], 0x0B(%[src_y0]) \n\t" - "gsldrc1 %[y00], 0x04(%[src_y0]) \n\t" - "gsldlc1 %[y02], 0x0D(%[src_y0]) \n\t" - "gsldrc1 %[y02], 0x06(%[src_y0]) \n\t" - - "gsldlc1 %[y10], 0x0B(%[src_y1]) \n\t" - "gsldrc1 %[y10], 0x04(%[src_y1]) \n\t" - "gsldlc1 %[y12], 0x0D(%[src_y1]) \n\t" - "gsldrc1 %[y12], 0x06(%[src_y1]) \n\t" - - "gsldlc1 %[y20], 0x0B(%[src_y2]) \n\t" - "gsldrc1 %[y20], 0x04(%[src_y2]) \n\t" - "gsldlc1 %[y22], 0x0D(%[src_y2]) \n\t" - "gsldrc1 %[y22], 0x06(%[src_y2]) \n\t" - - "punpcklbh %[y00], %[y00], %[zero] \n\t" - "punpcklbh %[y10], %[y10], %[zero] \n\t" - "punpcklbh %[y20], %[y20], %[zero] \n\t" - - "punpcklbh %[y02], %[y02], %[zero] \n\t" - "punpcklbh %[y12], %[y12], %[zero] \n\t" - "punpcklbh %[y22], %[y22], %[zero] \n\t" - - "paddh %[y00], %[y00], %[y10] \n\t" - "paddh %[y20], %[y20], %[y10] \n\t" - "paddh %[y00], %[y00], %[y20] \n\t" - - "paddh %[y02], %[y02], %[y12] \n\t" - "paddh %[y22], %[y22], %[y12] \n\t" - "paddh %[y02], %[y02], %[y22] \n\t" - - "pmaxsh %[y10], %[y00], %[y02] \n\t" - "pminsh %[y20], %[y00], %[y02] \n\t" - "psubh %[y00], %[y10], %[y20] \n\t" - - "packushb %[sobel], %[sobel], %[y00] \n\t" // clamp255 - "gssdrc1 %[sobel], 0(%[dst_sobelx]) \n\t" - "gssdlc1 %[sobel], 7(%[dst_sobelx]) \n\t" - - "daddiu %[src_y0], %[src_y0], 8 \n\t" - "daddiu %[src_y1], %[src_y1], 8 \n\t" - "daddiu %[src_y2], %[src_y2], 8 \n\t" - "daddiu %[dst_sobelx], %[dst_sobelx], 8 \n\t" - "daddiu %[width], %[width], -8 \n\t" - "bgtz %[width], 1b \n\t" - "nop \n\t" - : [sobel] "=&f"(sobel), [y00] "=&f"(y00), [y10] "=&f"(y10), - [y20] "=&f"(y20), [y02] "=&f"(y02), [y12] "=&f"(y12), [y22] "=&f"(y22) - : [src_y0] "r"(src_y0), [src_y1] "r"(src_y1), [src_y2] "r"(src_y2), - [dst_sobelx] "r"(dst_sobelx), [width] "r"(width), [zero] "f"(zero) - : "memory"); -} - -void SobelYRow_MMI(const uint8_t* src_y0, - const uint8_t* src_y1, - uint8_t* dst_sobely, - int width) { - uint64_t y00 = 0, y01 = 0, y02 = 0; - uint64_t y10 = 0, y11 = 0, y12 = 0; - uint64_t zero = 0x0; - uint64_t sobel = 0x0; - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[y00], 0x07(%[src_y0]) \n\t" // a=src_y0[i] - "gsldrc1 %[y00], 0x00(%[src_y0]) \n\t" - "gsldlc1 %[y01], 0x08(%[src_y0]) \n\t" // b=src_y0[i+1] - "gsldrc1 %[y01], 0x01(%[src_y0]) \n\t" - "gsldlc1 %[y02], 0x09(%[src_y0]) \n\t" // c=src_y0[i+2] - "gsldrc1 %[y02], 0x02(%[src_y0]) \n\t" - - "gsldlc1 %[y10], 0x07(%[src_y1]) \n\t" // a_sub=src_y1[i] - "gsldrc1 %[y10], 0x00(%[src_y1]) \n\t" - "gsldlc1 %[y11], 0x08(%[src_y1]) \n\t" // b_sub=src_y1[i+1] - "gsldrc1 %[y11], 0x01(%[src_y1]) \n\t" - "gsldlc1 %[y12], 0x09(%[src_y1]) \n\t" // c_sub=src_y1[i+2] - "gsldrc1 %[y12], 0x02(%[src_y1]) \n\t" - - "punpcklbh %[y00], %[y00], %[zero] \n\t" - "punpcklbh %[y01], %[y01], %[zero] \n\t" - "punpcklbh %[y02], %[y02], %[zero] \n\t" - - "punpcklbh %[y10], %[y10], %[zero] \n\t" - "punpcklbh %[y11], %[y11], %[zero] \n\t" - "punpcklbh %[y12], %[y12], %[zero] \n\t" - - "paddh %[y00], %[y00], %[y01] \n\t" // a+b - "paddh %[y02], %[y02], %[y01] \n\t" // c+b - "paddh %[y00], %[y00], %[y02] \n\t" // a+2b+c - - "paddh %[y10], %[y10], %[y11] \n\t" // a_sub+b_sub - "paddh %[y12], %[y12], %[y11] \n\t" // c_sub+b_sub - "paddh %[y10], %[y10], %[y12] \n\t" // a_sub+2b_sub+c_sub - - "pmaxsh %[y02], %[y00], %[y10] \n\t" - "pminsh %[y12], %[y00], %[y10] \n\t" - "psubh %[sobel], %[y02], %[y12] \n\t" // Abs - - "gsldlc1 %[y00], 0x0B(%[src_y0]) \n\t" - "gsldrc1 %[y00], 0x04(%[src_y0]) \n\t" - "gsldlc1 %[y01], 0x0C(%[src_y0]) \n\t" - "gsldrc1 %[y01], 0x05(%[src_y0]) \n\t" - "gsldlc1 %[y02], 0x0D(%[src_y0]) \n\t" - "gsldrc1 %[y02], 0x06(%[src_y0]) \n\t" - - "gsldlc1 %[y10], 0x0B(%[src_y1]) \n\t" - "gsldrc1 %[y10], 0x04(%[src_y1]) \n\t" - "gsldlc1 %[y11], 0x0C(%[src_y1]) \n\t" - "gsldrc1 %[y11], 0x05(%[src_y1]) \n\t" - "gsldlc1 %[y12], 0x0D(%[src_y1]) \n\t" - "gsldrc1 %[y12], 0x06(%[src_y1]) \n\t" - - "punpcklbh %[y00], %[y00], %[zero] \n\t" - "punpcklbh %[y01], %[y01], %[zero] \n\t" - "punpcklbh %[y02], %[y02], %[zero] \n\t" - - "punpcklbh %[y10], %[y10], %[zero] \n\t" - "punpcklbh %[y11], %[y11], %[zero] \n\t" - "punpcklbh %[y12], %[y12], %[zero] \n\t" - - "paddh %[y00], %[y00], %[y01] \n\t" - "paddh %[y02], %[y02], %[y01] \n\t" - "paddh %[y00], %[y00], %[y02] \n\t" - - "paddh %[y10], %[y10], %[y11] \n\t" - "paddh %[y12], %[y12], %[y11] \n\t" - "paddh %[y10], %[y10], %[y12] \n\t" - - "pmaxsh %[y02], %[y00], %[y10] \n\t" - "pminsh %[y12], %[y00], %[y10] \n\t" - "psubh %[y00], %[y02], %[y12] \n\t" - - "packushb %[sobel], %[sobel], %[y00] \n\t" // clamp255 - "gssdrc1 %[sobel], 0(%[dst_sobely]) \n\t" - "gssdlc1 %[sobel], 7(%[dst_sobely]) \n\t" - - "daddiu %[src_y0], %[src_y0], 8 \n\t" - "daddiu %[src_y1], %[src_y1], 8 \n\t" - "daddiu %[dst_sobely], %[dst_sobely], 8 \n\t" - "daddiu %[width], %[width], -8 \n\t" - "bgtz %[width], 1b \n\t" - "nop \n\t" - : [sobel] "=&f"(sobel), [y00] "=&f"(y00), [y01] "=&f"(y01), - [y02] "=&f"(y02), [y10] "=&f"(y10), [y11] "=&f"(y11), [y12] "=&f"(y12) - : [src_y0] "r"(src_y0), [src_y1] "r"(src_y1), - [dst_sobely] "r"(dst_sobely), [width] "r"(width), [zero] "f"(zero) - : "memory"); -} - -void SobelRow_MMI(const uint8_t* src_sobelx, - const uint8_t* src_sobely, - uint8_t* dst_argb, - int width) { - double temp[3]; - uint64_t c1 = 0xff000000ff000000; - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[t0], 0x07(%[src_sobelx]) \n\t" // a=src_sobelx[i] - "gsldrc1 %[t0], 0x00(%[src_sobelx]) \n\t" - "gsldlc1 %[t1], 0x07(%[src_sobely]) \n\t" // b=src_sobely[i] - "gsldrc1 %[t1], 0x00(%[src_sobely]) \n\t" - // s7 s6 s5 s4 s3 s2 s1 s0 = a+b - "paddusb %[t2] , %[t0], %[t1] \n\t" - - // s3 s2 s1 s0->s3 s3 s2 s2 s1 s1 s0 s0 - "punpcklbh %[t0], %[t2], %[t2] \n\t" - - // s1 s1 s0 s0->s1 s2 s1 s1 s0 s0 s0 s0 - "punpcklbh %[t1], %[t0], %[t0] \n\t" - "or %[t1], %[t1], %[c1] \n\t" - // 255 s1 s1 s1 s55 s0 s0 s0 - "gssdrc1 %[t1], 0x00(%[dst_argb]) \n\t" - "gssdlc1 %[t1], 0x07(%[dst_argb]) \n\t" - - // s3 s3 s2 s2->s3 s3 s3 s3 s2 s2 s2 s2 - "punpckhbh %[t1], %[t0], %[t0] \n\t" - "or %[t1], %[t1], %[c1] \n\t" - // 255 s3 s3 s3 255 s2 s2 s2 - "gssdrc1 %[t1], 0x08(%[dst_argb]) \n\t" - "gssdlc1 %[t1], 0x0f(%[dst_argb]) \n\t" - - // s7 s6 s5 s4->s7 s7 s6 s6 s5 s5 s4 s4 - "punpckhbh %[t0], %[t2], %[t2] \n\t" - - // s5 s5 s4 s4->s5 s5 s5 s5 s4 s4 s4 s4 - "punpcklbh %[t1], %[t0], %[t0] \n\t" - "or %[t1], %[t1], %[c1] \n\t" - "gssdrc1 %[t1], 0x10(%[dst_argb]) \n\t" - "gssdlc1 %[t1], 0x17(%[dst_argb]) \n\t" - - // s7 s7 s6 s6->s7 s7 s7 s7 s6 s6 s6 s6 - "punpckhbh %[t1], %[t0], %[t0] \n\t" - "or %[t1], %[t1], %[c1] \n\t" - "gssdrc1 %[t1], 0x18(%[dst_argb]) \n\t" - "gssdlc1 %[t1], 0x1f(%[dst_argb]) \n\t" - - "daddiu %[dst_argb], %[dst_argb], 32 \n\t" - "daddiu %[src_sobelx], %[src_sobelx], 8 \n\t" - "daddiu %[src_sobely], %[src_sobely], 8 \n\t" - "daddiu %[width], %[width], -8 \n\t" - "bgtz %[width], 1b \n\t" - "nop \n\t" - : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [t2] "=&f"(temp[2]) - : [src_sobelx] "r"(src_sobelx), [src_sobely] "r"(src_sobely), - [dst_argb] "r"(dst_argb), [width] "r"(width), [c1] "f"(c1) - : "memory"); -} - -void SobelToPlaneRow_MMI(const uint8_t* src_sobelx, - const uint8_t* src_sobely, - uint8_t* dst_y, - int width) { - uint64_t tr = 0; - uint64_t tb = 0; - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[tr], 0x0(%[src_sobelx]) \n\t" - "gsldlc1 %[tr], 0x7(%[src_sobelx]) \n\t" // r=src_sobelx[i] - "gsldrc1 %[tb], 0x0(%[src_sobely]) \n\t" - "gsldlc1 %[tb], 0x7(%[src_sobely]) \n\t" // b=src_sobely[i] - "paddusb %[tr], %[tr], %[tb] \n\t" // g - "gssdrc1 %[tr], 0x0(%[dst_y]) \n\t" - "gssdlc1 %[tr], 0x7(%[dst_y]) \n\t" - - "daddiu %[dst_y], %[dst_y], 8 \n\t" - "daddiu %[src_sobelx], %[src_sobelx], 8 \n\t" - "daddiu %[src_sobely], %[src_sobely], 8 \n\t" - "daddiu %[width], %[width], -8 \n\t" - "bgtz %[width], 1b \n\t" - "nop \n\t" - : [tr] "=&f"(tr), [tb] "=&f"(tb) - : [src_sobelx] "r"(src_sobelx), [src_sobely] "r"(src_sobely), - [dst_y] "r"(dst_y), [width] "r"(width) - : "memory"); -} - -void SobelXYRow_MMI(const uint8_t* src_sobelx, - const uint8_t* src_sobely, - uint8_t* dst_argb, - int width) { - uint64_t temp[3]; - uint64_t result = 0; - uint64_t gb = 0; - uint64_t cr = 0; - uint64_t c1 = 0xffffffffffffffff; - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[tr], 0x07(%[src_sobelx]) \n\t" // r=src_sobelx[i] - "gsldrc1 %[tr], 0x00(%[src_sobelx]) \n\t" - "gsldlc1 %[tb], 0x07(%[src_sobely]) \n\t" // b=src_sobely[i] - "gsldrc1 %[tb], 0x00(%[src_sobely]) \n\t" - "paddusb %[tg] , %[tr], %[tb] \n\t" // g - - // g3 b3 g2 b2 g1 b1 g0 b0 - "punpcklbh %[gb], %[tb], %[tg] \n\t" - // c3 r3 r2 r2 c1 r1 c0 r0 - "punpcklbh %[cr], %[tr], %[c1] \n\t" - // c1 r1 g1 b1 c0 r0 g0 b0 - "punpcklhw %[result], %[gb], %[cr] \n\t" - "gssdrc1 %[result], 0x00(%[dst_argb]) \n\t" - "gssdlc1 %[result], 0x07(%[dst_argb]) \n\t" - // c3 r3 g3 b3 c2 r2 g2 b2 - "punpckhhw %[result], %[gb], %[cr] \n\t" - "gssdrc1 %[result], 0x08(%[dst_argb]) \n\t" - "gssdlc1 %[result], 0x0f(%[dst_argb]) \n\t" - - // g7 b7 g6 b6 g5 b5 g4 b4 - "punpckhbh %[gb], %[tb], %[tg] \n\t" - // c7 r7 c6 r6 c5 r5 c4 r4 - "punpckhbh %[cr], %[tr], %[c1] \n\t" - // c5 r5 g5 b5 c4 r4 g4 b4 - "punpcklhw %[result], %[gb], %[cr] \n\t" - "gssdrc1 %[result], 0x10(%[dst_argb]) \n\t" - "gssdlc1 %[result], 0x17(%[dst_argb]) \n\t" - // c7 r7 g7 b7 c6 r6 g6 b6 - "punpckhhw %[result], %[gb], %[cr] \n\t" - "gssdrc1 %[result], 0x18(%[dst_argb]) \n\t" - "gssdlc1 %[result], 0x1f(%[dst_argb]) \n\t" - - "daddiu %[dst_argb], %[dst_argb], 32 \n\t" - "daddiu %[src_sobelx], %[src_sobelx], 8 \n\t" - "daddiu %[src_sobely], %[src_sobely], 8 \n\t" - "daddiu %[width], %[width], -8 \n\t" - "bgtz %[width], 1b \n\t" - "nop \n\t" - : [tr] "=&f"(temp[0]), [tb] "=&f"(temp[1]), [tg] "=&f"(temp[2]), - [gb] "=&f"(gb), [cr] "=&f"(cr), [result] "=&f"(result) - : [src_sobelx] "r"(src_sobelx), [src_sobely] "r"(src_sobely), - [dst_argb] "r"(dst_argb), [width] "r"(width), [c1] "f"(c1) - : "memory"); -} - -void J400ToARGBRow_MMI(const uint8_t* src_y, uint8_t* dst_argb, int width) { - // Copy a Y to RGB. - uint64_t src, dest; - const uint64_t mask0 = 0x00ffffff00ffffffULL; - const uint64_t mask1 = ~mask0; - - __asm__ volatile( - "1: \n\t" - "gslwlc1 %[src], 0x03(%[src_ptr]) \n\t" - "gslwrc1 %[src], 0x00(%[src_ptr]) \n\t" - "punpcklbh %[src], %[src], %[src] \n\t" - "punpcklhw %[dest], %[src], %[src] \n\t" - "and %[dest], %[dest], %[mask0] \n\t" - "or %[dest], %[dest], %[mask1] \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - - "punpckhhw %[dest], %[src], %[src] \n\t" - "and %[dest], %[dest], %[mask0] \n\t" - "or %[dest], %[dest], %[mask1] \n\t" - "gssdrc1 %[dest], 0x08(%[dst_ptr]) \n\t" - "gssdlc1 %[dest], 0x0f(%[dst_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 0x04 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - : [src] "=&f"(src), [dest] "=&f"(dest) - : [src_ptr] "r"(src_y), [dst_ptr] "r"(dst_argb), [mask0] "f"(mask0), - [mask1] "f"(mask1), [width] "r"(width) - : "memory"); -} - -// TODO - respect YuvConstants -void I400ToARGBRow_MMI(const uint8_t* src_y, uint8_t* rgb_buf, - const struct YuvConstants*, int width) { - uint64_t src, src_lo, src_hi, dest, dest_lo, dest_hi; - const uint64_t mask0 = 0x0; - const uint64_t mask1 = 0x55; - const uint64_t mask2 = 0xAA; - const uint64_t mask3 = 0xFF; - const uint64_t mask4 = 0x4A354A354A354A35ULL; - const uint64_t mask5 = 0x0488048804880488ULL; - const uint64_t shift0 = 0x08; - const uint64_t shift1 = 0x06; - - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t" - "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t" - "punpcklbh %[src_lo], %[src], %[mask0] \n\t" - "punpckhbh %[src_hi], %[src], %[mask0] \n\t" - - "pshufh %[src], %[src_lo], %[mask0] \n\t" - "psllh %[dest_lo], %[src], %[shift0] \n\t" - "paddush %[dest_lo], %[dest_lo], %[src] \n\t" - "pmulhuh %[dest_lo], %[dest_lo], %[mask4] \n\t" - "psubh %[dest_lo], %[dest_lo], %[mask5] \n\t" - "psrah %[dest_lo], %[dest_lo], %[shift1] \n\t" - "pinsrh_3 %[dest_lo], %[dest_lo], %[mask3] \n\t" - "pshufh %[src], %[src_lo], %[mask1] \n\t" - "psllh %[dest_hi], %[src], %[shift0] \n\t" - "paddush %[dest_hi], %[dest_hi], %[src] \n\t" - "pmulhuh %[dest_hi], %[dest_hi], %[mask4] \n\t" - "psubh %[dest_hi], %[dest_hi], %[mask5] \n\t" - "psrah %[dest_hi], %[dest_hi], %[shift1] \n\t" - "pinsrh_3 %[dest_hi], %[dest_hi], %[mask3] \n\t" - "packushb %[dest], %[dest_lo], %[dest_hi] \n\t" - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "pshufh %[src], %[src_lo], %[mask2] \n\t" - "psllh %[dest_lo], %[src], %[shift0] \n\t" - "paddush %[dest_lo], %[dest_lo], %[src] \n\t" - "pmulhuh %[dest_lo], %[dest_lo], %[mask4] \n\t" - "psubh %[dest_lo], %[dest_lo], %[mask5] \n\t" - "psrah %[dest_lo], %[dest_lo], %[shift1] \n\t" - "pinsrh_3 %[dest_lo], %[dest_lo], %[mask3] \n\t" - "pshufh %[src], %[src_lo], %[mask3] \n\t" - "psllh %[dest_hi], %[src], %[shift0] \n\t" - "paddush %[dest_hi], %[dest_hi], %[src] \n\t" - "pmulhuh %[dest_hi], %[dest_hi], %[mask4] \n\t" - "psubh %[dest_hi], %[dest_hi], %[mask5] \n\t" - "psrah %[dest_hi], %[dest_hi], %[shift1] \n\t" - "pinsrh_3 %[dest_hi], %[dest_hi], %[mask3] \n\t" - "packushb %[dest], %[dest_lo], %[dest_hi] \n\t" - "gssdlc1 %[dest], 0x0f(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x08(%[dst_ptr]) \n\t" - - "pshufh %[src], %[src_hi], %[mask0] \n\t" - "psllh %[dest_lo], %[src], %[shift0] \n\t" - "paddush %[dest_lo], %[dest_lo], %[src] \n\t" - "pmulhuh %[dest_lo], %[dest_lo], %[mask4] \n\t" - "psubh %[dest_lo], %[dest_lo], %[mask5] \n\t" - "psrah %[dest_lo], %[dest_lo], %[shift1] \n\t" - "pinsrh_3 %[dest_lo], %[dest_lo], %[mask3] \n\t" - "pshufh %[src], %[src_hi], %[mask1] \n\t" - "psllh %[dest_hi], %[src], %[shift0] \n\t" - "paddush %[dest_hi], %[dest_hi], %[src] \n\t" - "pmulhuh %[dest_hi], %[dest_hi], %[mask4] \n\t" - "psubh %[dest_hi], %[dest_hi], %[mask5] \n\t" - "psrah %[dest_hi], %[dest_hi], %[shift1] \n\t" - "pinsrh_3 %[dest_hi], %[dest_hi], %[mask3] \n\t" - "packushb %[dest], %[dest_lo], %[dest_hi] \n\t" - "gssdlc1 %[dest], 0x17(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x10(%[dst_ptr]) \n\t" - - "pshufh %[src], %[src_hi], %[mask2] \n\t" - "psllh %[dest_lo], %[src], %[shift0] \n\t" - "paddush %[dest_lo], %[dest_lo], %[src] \n\t" - "pmulhuh %[dest_lo], %[dest_lo], %[mask4] \n\t" - "psubh %[dest_lo], %[dest_lo], %[mask5] \n\t" - "psrah %[dest_lo], %[dest_lo], %[shift1] \n\t" - "pinsrh_3 %[dest_lo], %[dest_lo], %[mask3] \n\t" - "pshufh %[src], %[src_hi], %[mask3] \n\t" - "psllh %[dest_hi], %[src], %[shift0] \n\t" - "paddush %[dest_hi], %[dest_hi], %[src] \n\t" - "pmulhuh %[dest_hi], %[dest_hi], %[mask4] \n\t" - "psubh %[dest_hi], %[dest_hi], %[mask5] \n\t" - "psrah %[dest_hi], %[dest_hi], %[shift1] \n\t" - "pinsrh_3 %[dest_hi], %[dest_hi], %[mask3] \n\t" - "packushb %[dest], %[dest_lo], %[dest_hi] \n\t" - "gssdlc1 %[dest], 0x1f(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x18(%[dst_ptr]) \n\t" - - "daddi %[src_ptr], %[src_ptr], 0x08 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x20 \n\t" - "daddi %[width], %[width], -0x08 \n\t" - "bnez %[width], 1b \n\t" - : [src] "=&f"(src), [dest] "=&f"(dest), [src_hi] "=&f"(src_hi), - [src_lo] "=&f"(src_lo), [dest_hi] "=&f"(dest_hi), - [dest_lo] "=&f"(dest_lo) - : [src_ptr] "r"(src_y), [dst_ptr] "r"(rgb_buf), [mask0] "f"(mask0), - [mask1] "f"(mask1), [mask2] "f"(mask2), [mask3] "f"(mask3), - [mask4] "f"(mask4), [mask5] "f"(mask5), [shift0] "f"(shift0), - [shift1] "f"(shift1), [width] "r"(width) - : "memory"); -} - -void MirrorRow_MMI(const uint8_t* src, uint8_t* dst, int width) { - uint64_t source, src0, src1, dest; - const uint64_t mask0 = 0x0; - const uint64_t mask1 = 0x1b; - - src += width - 1; - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[source], 0(%[src_ptr]) \n\t" - "gsldrc1 %[source], -7(%[src_ptr]) \n\t" - "punpcklbh %[src0], %[source], %[mask0] \n\t" - "pshufh %[src0], %[src0], %[mask1] \n\t" - "punpckhbh %[src1], %[source], %[mask0] \n\t" - "pshufh %[src1], %[src1], %[mask1] \n\t" - "packushb %[dest], %[src1], %[src0] \n\t" - - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddi %[src_ptr], %[src_ptr], -0x08 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x08 \n\t" - "bnez %[width], 1b \n\t" - : [source] "=&f"(source), [dest] "=&f"(dest), [src0] "=&f"(src0), - [src1] "=&f"(src1) - : [src_ptr] "r"(src), [dst_ptr] "r"(dst), [mask0] "f"(mask0), - [mask1] "f"(mask1), [width] "r"(width) - : "memory"); -} - -void MirrorSplitUVRow_MMI(const uint8_t* src_uv, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - uint64_t src0, src1, dest0, dest1; - const uint64_t mask0 = 0x00ff00ff00ff00ffULL; - const uint64_t mask1 = 0x1b; - const uint64_t shift = 0x08; - - src_uv += (width - 1) << 1; - - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[src0], 1(%[src_ptr]) \n\t" - "gsldrc1 %[src0], -6(%[src_ptr]) \n\t" - "gsldlc1 %[src1], -7(%[src_ptr]) \n\t" - "gsldrc1 %[src1], -14(%[src_ptr]) \n\t" - - "and %[dest0], %[src0], %[mask0] \n\t" - "pshufh %[dest0], %[dest0], %[mask1] \n\t" - "and %[dest1], %[src1], %[mask0] \n\t" - "pshufh %[dest1], %[dest1], %[mask1] \n\t" - "packushb %[dest0], %[dest0], %[dest1] \n\t" - "gssdlc1 %[dest0], 0x07(%[dstu_ptr]) \n\t" - "gssdrc1 %[dest0], 0x00(%[dstu_ptr]) \n\t" - - "psrlh %[dest0], %[src0], %[shift] \n\t" - "pshufh %[dest0], %[dest0], %[mask1] \n\t" - "psrlh %[dest1], %[src1], %[shift] \n\t" - "pshufh %[dest1], %[dest1], %[mask1] \n\t" - "packushb %[dest0], %[dest0], %[dest1] \n\t" - "gssdlc1 %[dest0], 0x07(%[dstv_ptr]) \n\t" - "gssdrc1 %[dest0], 0x00(%[dstv_ptr]) \n\t" - - "daddi %[src_ptr], %[src_ptr], -0x10 \n\t" - "daddiu %[dstu_ptr], %[dstu_ptr], 0x08 \n\t" - "daddiu %[dstv_ptr], %[dstv_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x08 \n\t" - "bnez %[width], 1b \n\t" - : [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [src0] "=&f"(src0), - [src1] "=&f"(src1) - : [src_ptr] "r"(src_uv), [dstu_ptr] "r"(dst_u), [dstv_ptr] "r"(dst_v), - [width] "r"(width), [mask0] "f"(mask0), [mask1] "f"(mask1), - [shift] "f"(shift) - : "memory"); -} - -void ARGBMirrorRow_MMI(const uint8_t* src, uint8_t* dst, int width) { - src += (width - 1) * 4; - uint64_t temp = 0x0; - uint64_t shuff = 0x4e; // 01 00 11 10 - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[temp], 3(%[src]) \n\t" - "gsldrc1 %[temp], -4(%[src]) \n\t" - "pshufh %[temp], %[temp], %[shuff] \n\t" - "gssdrc1 %[temp], 0x0(%[dst]) \n\t" - "gssdlc1 %[temp], 0x7(%[dst]) \n\t" - - "daddiu %[src], %[src], -0x08 \n\t" - "daddiu %[dst], %[dst], 0x08 \n\t" - "daddiu %[width], %[width], -0x02 \n\t" - "bnez %[width], 1b \n\t" - : [temp] "=&f"(temp) - : [src] "r"(src), [dst] "r"(dst), [width] "r"(width), [shuff] "f"(shuff) - : "memory"); -} - -void SplitUVRow_MMI(const uint8_t* src_uv, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - uint64_t c0 = 0x00ff00ff00ff00ff; - uint64_t temp[4]; - uint64_t shift = 0x08; - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[t0], 0x00(%[src_uv]) \n\t" - "gsldlc1 %[t0], 0x07(%[src_uv]) \n\t" - "gsldrc1 %[t1], 0x08(%[src_uv]) \n\t" - "gsldlc1 %[t1], 0x0f(%[src_uv]) \n\t" - - "and %[t2], %[t0], %[c0] \n\t" - "and %[t3], %[t1], %[c0] \n\t" - "packushb %[t2], %[t2], %[t3] \n\t" - "gssdrc1 %[t2], 0x0(%[dst_u]) \n\t" - "gssdlc1 %[t2], 0x7(%[dst_u]) \n\t" - - "psrlh %[t2], %[t0], %[shift] \n\t" - "psrlh %[t3], %[t1], %[shift] \n\t" - "packushb %[t2], %[t2], %[t3] \n\t" - "gssdrc1 %[t2], 0x0(%[dst_v]) \n\t" - "gssdlc1 %[t2], 0x7(%[dst_v]) \n\t" - - "daddiu %[src_uv], %[src_uv], 16 \n\t" - "daddiu %[dst_u], %[dst_u], 8 \n\t" - "daddiu %[dst_v], %[dst_v], 8 \n\t" - "daddiu %[width], %[width], -8 \n\t" - "bgtz %[width], 1b \n\t" - "nop \n\t" - : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [t2] "=&f"(temp[2]), - [t3] "=&f"(temp[3]) - : [src_uv] "r"(src_uv), [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), - [width] "r"(width), [c0] "f"(c0), [shift] "f"(shift) - : "memory"); -} - -void MergeUVRow_MMI(const uint8_t* src_u, - const uint8_t* src_v, - uint8_t* dst_uv, - int width) { - uint64_t temp[3]; - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[t0], 0x0(%[src_u]) \n\t" - "gsldlc1 %[t0], 0x7(%[src_u]) \n\t" - "gsldrc1 %[t1], 0x0(%[src_v]) \n\t" - "gsldlc1 %[t1], 0x7(%[src_v]) \n\t" - "punpcklbh %[t2], %[t0], %[t1] \n\t" - "gssdrc1 %[t2], 0x0(%[dst_uv]) \n\t" - "gssdlc1 %[t2], 0x7(%[dst_uv]) \n\t" - "punpckhbh %[t2], %[t0], %[t1] \n\t" - "gssdrc1 %[t2], 0x8(%[dst_uv]) \n\t" - "gssdlc1 %[t2], 0xf(%[dst_uv]) \n\t" - - "daddiu %[src_u], %[src_u], 8 \n\t" - "daddiu %[src_v], %[src_v], 8 \n\t" - "daddiu %[dst_uv], %[dst_uv], 16 \n\t" - "daddiu %[width], %[width], -8 \n\t" - "bgtz %[width], 1b \n\t" - "nop \n\t" - : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [t2] "=&f"(temp[2]) - : [dst_uv] "r"(dst_uv), [src_u] "r"(src_u), [src_v] "r"(src_v), - [width] "r"(width) - : "memory"); -} - -void SplitRGBRow_MMI(const uint8_t* src_rgb, - uint8_t* dst_r, - uint8_t* dst_g, - uint8_t* dst_b, - int width) { - uint64_t src[4]; - uint64_t dest_hi, dest_lo, dest; - - __asm__ volatile( - "1: \n\t" - "gslwlc1 %[src0], 0x03(%[src_ptr]) \n\t" - "gslwrc1 %[src0], 0x00(%[src_ptr]) \n\t" - "gslwlc1 %[src1], 0x06(%[src_ptr]) \n\t" - "gslwrc1 %[src1], 0x03(%[src_ptr]) \n\t" - "punpcklbh %[dest_lo], %[src0], %[src1] \n\t" - "gslwlc1 %[src2], 0x09(%[src_ptr]) \n\t" - "gslwrc1 %[src2], 0x06(%[src_ptr]) \n\t" - "gslwlc1 %[src3], 0x0c(%[src_ptr]) \n\t" - "gslwrc1 %[src3], 0x09(%[src_ptr]) \n\t" - "punpcklbh %[dest_hi], %[src2], %[src3] \n\t" - - "punpcklhw %[dest], %[dest_lo], %[dest_hi] \n\t" - "gsswlc1 %[dest], 0x03(%[dstr_ptr]) \n\t" - "gsswrc1 %[dest], 0x00(%[dstr_ptr]) \n\t" - "punpckhwd %[dest], %[dest], %[dest] \n\t" - "gsswlc1 %[dest], 0x03(%[dstg_ptr]) \n\t" - "gsswrc1 %[dest], 0x00(%[dstg_ptr]) \n\t" - "punpckhhw %[dest], %[dest_lo], %[dest_hi] \n\t" - "gsswlc1 %[dest], 0x03(%[dstb_ptr]) \n\t" - "gsswrc1 %[dest], 0x00(%[dstb_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 0x0c \n\t" - "daddiu %[dstr_ptr], %[dstr_ptr], 0x04 \n\t" - "daddiu %[dstg_ptr], %[dstg_ptr], 0x04 \n\t" - "daddiu %[dstb_ptr], %[dstb_ptr], 0x04 \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - : [src0] "=&f"(src[0]), [src1] "=&f"(src[1]), [src2] "=&f"(src[2]), - [src3] "=&f"(src[3]), [dest_hi] "=&f"(dest_hi), - [dest_lo] "=&f"(dest_lo), [dest] "=&f"(dest) - : [src_ptr] "r"(src_rgb), [dstr_ptr] "r"(dst_r), [dstg_ptr] "r"(dst_g), - [dstb_ptr] "r"(dst_b), [width] "r"(width) - : "memory"); -} - -void MergeRGBRow_MMI(const uint8_t* src_r, - const uint8_t* src_g, - const uint8_t* src_b, - uint8_t* dst_rgb, - int width) { - uint64_t srcr, srcg, srcb, dest; - uint64_t srcrg_hi, srcrg_lo, srcbz_hi, srcbz_lo; - const uint64_t temp = 0x0; - - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[srcr], 0x07(%[srcr_ptr]) \n\t" - "gsldrc1 %[srcr], 0x00(%[srcr_ptr]) \n\t" - "gsldlc1 %[srcg], 0x07(%[srcg_ptr]) \n\t" - "gsldrc1 %[srcg], 0x00(%[srcg_ptr]) \n\t" - "punpcklbh %[srcrg_lo], %[srcr], %[srcg] \n\t" - "punpckhbh %[srcrg_hi], %[srcr], %[srcg] \n\t" - - "gsldlc1 %[srcb], 0x07(%[srcb_ptr]) \n\t" - "gsldrc1 %[srcb], 0x00(%[srcb_ptr]) \n\t" - "punpcklbh %[srcbz_lo], %[srcb], %[temp] \n\t" - "punpckhbh %[srcbz_hi], %[srcb], %[temp] \n\t" - - "punpcklhw %[dest], %[srcrg_lo], %[srcbz_lo] \n\t" - "gsswlc1 %[dest], 0x03(%[dst_ptr]) \n\t" - "gsswrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - "punpckhwd %[dest], %[dest], %[dest] \n\t" - "gsswlc1 %[dest], 0x06(%[dst_ptr]) \n\t" - "gsswrc1 %[dest], 0x03(%[dst_ptr]) \n\t" - "punpckhhw %[dest], %[srcrg_lo], %[srcbz_lo] \n\t" - "gsswlc1 %[dest], 0x09(%[dst_ptr]) \n\t" - "gsswrc1 %[dest], 0x06(%[dst_ptr]) \n\t" - "punpckhwd %[dest], %[dest], %[dest] \n\t" - "gsswlc1 %[dest], 0x0c(%[dst_ptr]) \n\t" - "gsswrc1 %[dest], 0x09(%[dst_ptr]) \n\t" - "punpcklhw %[dest], %[srcrg_hi], %[srcbz_hi] \n\t" - "gsswlc1 %[dest], 0x0f(%[dst_ptr]) \n\t" - "gsswrc1 %[dest], 0x0c(%[dst_ptr]) \n\t" - "punpckhwd %[dest], %[dest], %[dest] \n\t" - "gsswlc1 %[dest], 0x12(%[dst_ptr]) \n\t" - "gsswrc1 %[dest], 0x0f(%[dst_ptr]) \n\t" - "punpckhhw %[dest], %[srcrg_hi], %[srcbz_hi] \n\t" - "gsswlc1 %[dest], 0x15(%[dst_ptr]) \n\t" - "gsswrc1 %[dest], 0x12(%[dst_ptr]) \n\t" - "punpckhwd %[dest], %[dest], %[dest] \n\t" - "gsswlc1 %[dest], 0x18(%[dst_ptr]) \n\t" - "gsswrc1 %[dest], 0x15(%[dst_ptr]) \n\t" - - "daddiu %[srcr_ptr], %[srcr_ptr], 0x08 \n\t" - "daddiu %[srcg_ptr], %[srcg_ptr], 0x08 \n\t" - "daddiu %[srcb_ptr], %[srcb_ptr], 0x08 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x18 \n\t" - "daddi %[width], %[width], -0x08 \n\t" - "bnez %[width], 1b \n\t" - : [srcr] "=&f"(srcr), [srcg] "=&f"(srcg), [srcb] "=&f"(srcb), - [dest] "=&f"(dest), [srcrg_hi] "=&f"(srcrg_hi), - [srcrg_lo] "=&f"(srcrg_lo), [srcbz_hi] "=&f"(srcbz_hi), - [srcbz_lo] "=&f"(srcbz_lo) - : [srcr_ptr] "r"(src_r), [srcg_ptr] "r"(src_g), [srcb_ptr] "r"(src_b), - [dst_ptr] "r"(dst_rgb), [width] "r"(width), [temp] "f"(temp) - : "memory"); -} - -// Filter 2 rows of YUY2 UV's (422) into U and V (420). -void YUY2ToUVRow_MMI(const uint8_t* src_yuy2, - int src_stride_yuy2, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - uint64_t c0 = 0xff00ff00ff00ff00; - uint64_t c1 = 0x00ff00ff00ff00ff; - uint64_t temp[3]; - uint64_t data[4]; - uint64_t shift = 0x08; - uint64_t src_stride = 0x0; - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[t0], 0x00(%[src_yuy2]) \n\t" - "gsldlc1 %[t0], 0x07(%[src_yuy2]) \n\t" - "daddu %[src_stride], %[src_yuy2], %[src_stride_yuy2] \n\t" - "gsldrc1 %[t1], 0x00(%[src_stride]) \n\t" - "gsldlc1 %[t1], 0x07(%[src_stride]) \n\t" - "pavgb %[t0], %[t0], %[t1] \n\t" - - "gsldrc1 %[t2], 0x08(%[src_yuy2]) \n\t" - "gsldlc1 %[t2], 0x0f(%[src_yuy2]) \n\t" - "gsldrc1 %[t1], 0x08(%[src_stride]) \n\t" - "gsldlc1 %[t1], 0x0f(%[src_stride]) \n\t" - "pavgb %[t1], %[t2], %[t1] \n\t" - - "and %[t0], %[t0], %[c0] \n\t" - "and %[t1], %[t1], %[c0] \n\t" - "psrlh %[t0], %[t0], %[shift] \n\t" - "psrlh %[t1], %[t1], %[shift] \n\t" - "packushb %[t0], %[t0], %[t1] \n\t" - "mov.s %[t1], %[t0] \n\t" - "and %[d0], %[t0], %[c1] \n\t" - "psrlh %[d1], %[t1], %[shift] \n\t" - - "gsldrc1 %[t0], 0x10(%[src_yuy2]) \n\t" - "gsldlc1 %[t0], 0x17(%[src_yuy2]) \n\t" - "gsldrc1 %[t1], 0x10(%[src_stride]) \n\t" - "gsldlc1 %[t1], 0x17(%[src_stride]) \n\t" - "pavgb %[t0], %[t0], %[t1] \n\t" - - "gsldrc1 %[t2], 0x18(%[src_yuy2]) \n\t" - "gsldlc1 %[t2], 0x1f(%[src_yuy2]) \n\t" - "gsldrc1 %[t1], 0x18(%[src_stride]) \n\t" - "gsldlc1 %[t1], 0x1f(%[src_stride]) \n\t" - "pavgb %[t1], %[t2], %[t1] \n\t" - - "and %[t0], %[t0], %[c0] \n\t" - "and %[t1], %[t1], %[c0] \n\t" - "psrlh %[t0], %[t0], %[shift] \n\t" - "psrlh %[t1], %[t1], %[shift] \n\t" - "packushb %[t0], %[t0], %[t1] \n\t" - "mov.s %[t1], %[t0] \n\t" - "and %[d2], %[t0], %[c1] \n\t" - "psrlh %[d3], %[t1], %[shift] \n\t" - - "packushb %[d0], %[d0], %[d2] \n\t" - "packushb %[d1], %[d1], %[d3] \n\t" - "gssdrc1 %[d0], 0x0(%[dst_u]) \n\t" - "gssdlc1 %[d0], 0x7(%[dst_u]) \n\t" - "gssdrc1 %[d1], 0x0(%[dst_v]) \n\t" - "gssdlc1 %[d1], 0x7(%[dst_v]) \n\t" - "daddiu %[src_yuy2], %[src_yuy2], 32 \n\t" - "daddiu %[dst_u], %[dst_u], 8 \n\t" - "daddiu %[dst_v], %[dst_v], 8 \n\t" - "daddiu %[width], %[width], -16 \n\t" - "bgtz %[width], 1b \n\t" - "nop \n\t" - : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [t2] "=&f"(temp[2]), - [d0] "=&f"(data[0]), [d1] "=&f"(data[1]), [d2] "=&f"(data[2]), - [d3] "=&f"(data[3]), [src_stride] "=&r"(src_stride) - : [src_yuy2] "r"(src_yuy2), [src_stride_yuy2] "r"(src_stride_yuy2), - [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width), - [c0] "f"(c0), [c1] "f"(c1), [shift] "f"(shift) - : "memory"); -} - -// Copy row of YUY2 UV's (422) into U and V (422). -void YUY2ToUV422Row_MMI(const uint8_t* src_yuy2, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - uint64_t c0 = 0xff00ff00ff00ff00; - uint64_t c1 = 0x00ff00ff00ff00ff; - uint64_t temp[2]; - uint64_t data[4]; - uint64_t shift = 0x08; - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[t0], 0x00(%[src_yuy2]) \n\t" - "gsldlc1 %[t0], 0x07(%[src_yuy2]) \n\t" - "gsldrc1 %[t1], 0x08(%[src_yuy2]) \n\t" - "gsldlc1 %[t1], 0x0f(%[src_yuy2]) \n\t" - "and %[t0], %[t0], %[c0] \n\t" - "and %[t1], %[t1], %[c0] \n\t" - "psrlh %[t0], %[t0], %[shift] \n\t" - "psrlh %[t1], %[t1], %[shift] \n\t" - "packushb %[t0], %[t0], %[t1] \n\t" - "mov.s %[t1], %[t0] \n\t" - "and %[d0], %[t0], %[c1] \n\t" - "psrlh %[d1], %[t1], %[shift] \n\t" - - "gsldrc1 %[t0], 0x10(%[src_yuy2]) \n\t" - "gsldlc1 %[t0], 0x17(%[src_yuy2]) \n\t" - "gsldrc1 %[t1], 0x18(%[src_yuy2]) \n\t" - "gsldlc1 %[t1], 0x1f(%[src_yuy2]) \n\t" - "and %[t0], %[t0], %[c0] \n\t" - "and %[t1], %[t1], %[c0] \n\t" - "psrlh %[t0], %[t0], %[shift] \n\t" - "psrlh %[t1], %[t1], %[shift] \n\t" - "packushb %[t0], %[t0], %[t1] \n\t" - "mov.s %[t1], %[t0] \n\t" - "and %[d2], %[t0], %[c1] \n\t" - "psrlh %[d3], %[t1], %[shift] \n\t" - - "packushb %[d0], %[d0], %[d2] \n\t" - "packushb %[d1], %[d1], %[d3] \n\t" - "gssdrc1 %[d0], 0x0(%[dst_u]) \n\t" - "gssdlc1 %[d0], 0x7(%[dst_u]) \n\t" - "gssdrc1 %[d1], 0x0(%[dst_v]) \n\t" - "gssdlc1 %[d1], 0x7(%[dst_v]) \n\t" - "daddiu %[src_yuy2], %[src_yuy2], 32 \n\t" - "daddiu %[dst_u], %[dst_u], 8 \n\t" - "daddiu %[dst_v], %[dst_v], 8 \n\t" - "daddiu %[width], %[width], -16 \n\t" - "bgtz %[width], 1b \n\t" - "nop \n\t" - : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [d0] "=&f"(data[0]), - [d1] "=&f"(data[1]), [d2] "=&f"(data[2]), [d3] "=&f"(data[3]) - : [src_yuy2] "r"(src_yuy2), [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), - [width] "r"(width), [c0] "f"(c0), [c1] "f"(c1), [shift] "f"(shift) - : "memory"); -} - -// Copy row of YUY2 Y's (422) into Y (420/422). -void YUY2ToYRow_MMI(const uint8_t* src_yuy2, uint8_t* dst_y, int width) { - uint64_t c0 = 0x00ff00ff00ff00ff; - uint64_t temp[2]; - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[t0], 0x00(%[src_yuy2]) \n\t" - "gsldlc1 %[t0], 0x07(%[src_yuy2]) \n\t" - "gsldrc1 %[t1], 0x08(%[src_yuy2]) \n\t" - "gsldlc1 %[t1], 0x0f(%[src_yuy2]) \n\t" - "and %[t0], %[t0], %[c0] \n\t" - "and %[t1], %[t1], %[c0] \n\t" - "packushb %[t0], %[t0], %[t1] \n\t" - "gssdrc1 %[t0], 0x0(%[dst_y]) \n\t" - "gssdlc1 %[t0], 0x7(%[dst_y]) \n\t" - "daddiu %[src_yuy2], %[src_yuy2], 16 \n\t" - "daddiu %[dst_y], %[dst_y], 8 \n\t" - "daddiu %[width], %[width], -8 \n\t" - "bgtz %[width], 1b \n\t" - "nop \n\t" - : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]) - : [src_yuy2] "r"(src_yuy2), [dst_y] "r"(dst_y), [width] "r"(width), - [c0] "f"(c0) - : "memory"); -} - -// Filter 2 rows of UYVY UV's (422) into U and V (420). -void UYVYToUVRow_MMI(const uint8_t* src_uyvy, - int src_stride_uyvy, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - // Output a row of UV values. - uint64_t c0 = 0x00ff00ff00ff00ff; - uint64_t temp[3]; - uint64_t data[4]; - uint64_t shift = 0x08; - uint64_t src_stride = 0x0; - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[t0], 0x00(%[src_uyvy]) \n\t" - "gsldlc1 %[t0], 0x07(%[src_uyvy]) \n\t" - "daddu %[src_stride], %[src_uyvy], %[src_stride_uyvy] \n\t" - "gsldrc1 %[t1], 0x00(%[src_stride]) \n\t" - "gsldlc1 %[t1], 0x07(%[src_stride]) \n\t" - "pavgb %[t0], %[t0], %[t1] \n\t" - - "gsldrc1 %[t2], 0x08(%[src_uyvy]) \n\t" - "gsldlc1 %[t2], 0x0f(%[src_uyvy]) \n\t" - "gsldrc1 %[t1], 0x08(%[src_stride]) \n\t" - "gsldlc1 %[t1], 0x0f(%[src_stride]) \n\t" - "pavgb %[t1], %[t2], %[t1] \n\t" - - "and %[t0], %[t0], %[c0] \n\t" - "and %[t1], %[t1], %[c0] \n\t" - "packushb %[t0], %[t0], %[t1] \n\t" - "mov.s %[t1], %[t0] \n\t" - "and %[d0], %[t0], %[c0] \n\t" - "psrlh %[d1], %[t1], %[shift] \n\t" - - "gsldrc1 %[t0], 0x10(%[src_uyvy]) \n\t" - "gsldlc1 %[t0], 0x17(%[src_uyvy]) \n\t" - "gsldrc1 %[t1], 0x10(%[src_stride]) \n\t" - "gsldlc1 %[t1], 0x17(%[src_stride]) \n\t" - "pavgb %[t0], %[t0], %[t1] \n\t" - - "gsldrc1 %[t2], 0x18(%[src_uyvy]) \n\t" - "gsldlc1 %[t2], 0x1f(%[src_uyvy]) \n\t" - "gsldrc1 %[t1], 0x18(%[src_stride]) \n\t" - "gsldlc1 %[t1], 0x1f(%[src_stride]) \n\t" - "pavgb %[t1], %[t2], %[t1] \n\t" - - "and %[t0], %[t0], %[c0] \n\t" - "and %[t1], %[t1], %[c0] \n\t" - "packushb %[t0], %[t0], %[t1] \n\t" - "mov.s %[t1], %[t0] \n\t" - "and %[d2], %[t0], %[c0] \n\t" - "psrlh %[d3], %[t1], %[shift] \n\t" - - "packushb %[d0], %[d0], %[d2] \n\t" - "packushb %[d1], %[d1], %[d3] \n\t" - "gssdrc1 %[d0], 0x0(%[dst_u]) \n\t" - "gssdlc1 %[d0], 0x7(%[dst_u]) \n\t" - "gssdrc1 %[d1], 0x0(%[dst_v]) \n\t" - "gssdlc1 %[d1], 0x7(%[dst_v]) \n\t" - "daddiu %[src_uyvy], %[src_uyvy], 32 \n\t" - "daddiu %[dst_u], %[dst_u], 8 \n\t" - "daddiu %[dst_v], %[dst_v], 8 \n\t" - "daddiu %[width], %[width], -16 \n\t" - "bgtz %[width], 1b \n\t" - "nop \n\t" - : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [t2] "=&f"(temp[2]), - [d0] "=&f"(data[0]), [d1] "=&f"(data[1]), [d2] "=&f"(data[2]), - [d3] "=&f"(data[3]), [src_stride] "=&r"(src_stride) - : [src_uyvy] "r"(src_uyvy), [src_stride_uyvy] "r"(src_stride_uyvy), - [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width), - [c0] "f"(c0), [shift] "f"(shift) - : "memory"); -} - -// Copy row of UYVY UV's (422) into U and V (422). -void UYVYToUV422Row_MMI(const uint8_t* src_uyvy, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - // Output a row of UV values. - uint64_t c0 = 0x00ff00ff00ff00ff; - uint64_t temp[2]; - uint64_t data[4]; - uint64_t shift = 0x08; - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[t0], 0x00(%[src_uyvy]) \n\t" - "gsldlc1 %[t0], 0x07(%[src_uyvy]) \n\t" - "gsldrc1 %[t1], 0x08(%[src_uyvy]) \n\t" - "gsldlc1 %[t1], 0x0f(%[src_uyvy]) \n\t" - "and %[t0], %[t0], %[c0] \n\t" - "and %[t1], %[t1], %[c0] \n\t" - "packushb %[t0], %[t0], %[t1] \n\t" - "mov.s %[t1], %[t0] \n\t" - "and %[d0], %[t0], %[c0] \n\t" - "psrlh %[d1], %[t1], %[shift] \n\t" - - "gsldrc1 %[t0], 0x10(%[src_uyvy]) \n\t" - "gsldlc1 %[t0], 0x17(%[src_uyvy]) \n\t" - "gsldrc1 %[t1], 0x18(%[src_uyvy]) \n\t" - "gsldlc1 %[t1], 0x1f(%[src_uyvy]) \n\t" - "and %[t0], %[t0], %[c0] \n\t" - "and %[t1], %[t1], %[c0] \n\t" - "packushb %[t0], %[t0], %[t1] \n\t" - "mov.s %[t1], %[t0] \n\t" - "and %[d2], %[t0], %[c0] \n\t" - "psrlh %[d3], %[t1], %[shift] \n\t" - - "packushb %[d0], %[d0], %[d2] \n\t" - "packushb %[d1], %[d1], %[d3] \n\t" - "gssdrc1 %[d0], 0x0(%[dst_u]) \n\t" - "gssdlc1 %[d0], 0x7(%[dst_u]) \n\t" - "gssdrc1 %[d1], 0x0(%[dst_v]) \n\t" - "gssdlc1 %[d1], 0x7(%[dst_v]) \n\t" - "daddiu %[src_uyvy], %[src_uyvy], 32 \n\t" - "daddiu %[dst_u], %[dst_u], 8 \n\t" - "daddiu %[dst_v], %[dst_v], 8 \n\t" - "daddiu %[width], %[width], -16 \n\t" - "bgtz %[width], 1b \n\t" - "nop \n\t" - : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [d0] "=&f"(data[0]), - [d1] "=&f"(data[1]), [d2] "=&f"(data[2]), [d3] "=&f"(data[3]) - : [src_uyvy] "r"(src_uyvy), [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), - [width] "r"(width), [c0] "f"(c0), [shift] "f"(shift) - : "memory"); -} - -// Copy row of UYVY Y's (422) into Y (420/422). -void UYVYToYRow_MMI(const uint8_t* src_uyvy, uint8_t* dst_y, int width) { - // Output a row of Y values. - uint64_t c0 = 0x00ff00ff00ff00ff; - uint64_t shift = 0x08; - uint64_t temp[2]; - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[t0], 0x00(%[src_uyvy]) \n\t" - "gsldlc1 %[t0], 0x07(%[src_uyvy]) \n\t" - "gsldrc1 %[t1], 0x08(%[src_uyvy]) \n\t" - "gsldlc1 %[t1], 0x0f(%[src_uyvy]) \n\t" - "dsrl %[t0], %[t0], %[shift] \n\t" - "dsrl %[t1], %[t1], %[shift] \n\t" - "and %[t0], %[t0], %[c0] \n\t" - "and %[t1], %[t1], %[c0] \n\t" - "and %[t1], %[t1], %[c0] \n\t" - "packushb %[t0], %[t0], %[t1] \n\t" - "gssdrc1 %[t0], 0x0(%[dst_y]) \n\t" - "gssdlc1 %[t0], 0x7(%[dst_y]) \n\t" - "daddiu %[src_uyvy], %[src_uyvy], 16 \n\t" - "daddiu %[dst_y], %[dst_y], 8 \n\t" - "daddiu %[width], %[width], -8 \n\t" - "bgtz %[width], 1b \n\t" - "nop \n\t" - : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]) - : [src_uyvy] "r"(src_uyvy), [dst_y] "r"(dst_y), [width] "r"(width), - [c0] "f"(c0), [shift] "f"(shift) - : "memory"); -} - -// Blend src_argb over src_argb1 and store to dst_argb. -// dst_argb may be src_argb or src_argb1. -// This code mimics the SSSE3 version for better testability. -void ARGBBlendRow_MMI(const uint8_t* src_argb, - const uint8_t* src_argb1, - uint8_t* dst_argb, - int width) { - uint64_t src0, src1, dest, alpha, src0_hi, src0_lo, src1_hi, src1_lo, dest_hi, - dest_lo; - const uint64_t mask0 = 0x0; - const uint64_t mask1 = 0x00FFFFFF00FFFFFFULL; - const uint64_t mask2 = 0x00FF00FF00FF00FFULL; - const uint64_t mask3 = 0xFF; - const uint64_t mask4 = ~mask1; - const uint64_t shift = 0x08; - - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[src0], 0x07(%[src0_ptr]) \n\t" - "gsldrc1 %[src0], 0x00(%[src0_ptr]) \n\t" - "punpcklbh %[src0_lo], %[src0], %[mask0] \n\t" - - "gsldlc1 %[src1], 0x07(%[src1_ptr]) \n\t" - "gsldrc1 %[src1], 0x00(%[src1_ptr]) \n\t" - "punpcklbh %[src1_lo], %[src1], %[mask0] \n\t" - - "psubush %[alpha], %[mask2], %[src0_lo] \n\t" - "pshufh %[alpha], %[alpha], %[mask3] \n\t" - "pmullh %[dest_lo], %[src1_lo], %[alpha] \n\t" - "psrlh %[dest_lo], %[dest_lo], %[shift] \n\t" - "paddush %[dest_lo], %[dest_lo], %[src0_lo] \n\t" - - "punpckhbh %[src0_hi], %[src0], %[mask0] \n\t" - "punpckhbh %[src1_hi], %[src1], %[mask0] \n\t" - - "psubush %[alpha], %[mask2], %[src0_hi] \n\t" - "pshufh %[alpha], %[alpha], %[mask3] \n\t" - "pmullh %[dest_hi], %[src1_hi], %[alpha] \n\t" - "psrlh %[dest_hi], %[dest_hi], %[shift] \n\t" - "paddush %[dest_hi], %[dest_hi], %[src0_hi] \n\t" - - "packushb %[dest], %[dest_lo], %[dest_hi] \n\t" - "and %[dest], %[dest], %[mask1] \n\t" - "or %[dest], %[dest], %[mask4] \n\t" - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddiu %[src0_ptr], %[src0_ptr], 0x08 \n\t" - "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x02 \n\t" - "bnez %[width], 1b \n\t" - : [src0] "=&f"(src0), [src1] "=&f"(src1), [alpha] "=&f"(alpha), - [dest] "=&f"(dest), [src0_hi] "=&f"(src0_hi), [src0_lo] "=&f"(src0_lo), - [src1_hi] "=&f"(src1_hi), [src1_lo] "=&f"(src1_lo), - [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo) - : [src0_ptr] "r"(src_argb), [src1_ptr] "r"(src_argb1), - [dst_ptr] "r"(dst_argb), [mask0] "f"(mask0), [mask1] "f"(mask1), - [mask2] "f"(mask2), [mask3] "f"(mask3), [mask4] "f"(mask4), - [shift] "f"(shift), [width] "r"(width) - : "memory"); -} - -void BlendPlaneRow_MMI(const uint8_t* src0, - const uint8_t* src1, - const uint8_t* alpha, - uint8_t* dst, - int width) { - uint64_t source0, source1, dest, alph; - uint64_t src0_hi, src0_lo, src1_hi, src1_lo, alpha_hi, alpha_lo, dest_hi, - dest_lo; - uint64_t alpha_rev, alpha_rev_lo, alpha_rev_hi; - const uint64_t mask0 = 0x0; - const uint64_t mask1 = 0xFFFFFFFFFFFFFFFFULL; - const uint64_t mask2 = 0x00FF00FF00FF00FFULL; - const uint64_t shift = 0x08; - - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[src0], 0x07(%[src0_ptr]) \n\t" - "gsldrc1 %[src0], 0x00(%[src0_ptr]) \n\t" - "punpcklbh %[src0_lo], %[src0], %[mask0] \n\t" - "punpckhbh %[src0_hi], %[src0], %[mask0] \n\t" - - "gsldlc1 %[src1], 0x07(%[src1_ptr]) \n\t" - "gsldrc1 %[src1], 0x00(%[src1_ptr]) \n\t" - "punpcklbh %[src1_lo], %[src1], %[mask0] \n\t" - "punpckhbh %[src1_hi], %[src1], %[mask0] \n\t" - - "gsldlc1 %[alpha], 0x07(%[alpha_ptr]) \n\t" - "gsldrc1 %[alpha], 0x00(%[alpha_ptr]) \n\t" - "psubusb %[alpha_r], %[mask1], %[alpha] \n\t" - "punpcklbh %[alpha_lo], %[alpha], %[mask0] \n\t" - "punpckhbh %[alpha_hi], %[alpha], %[mask0] \n\t" - "punpcklbh %[alpha_rlo], %[alpha_r], %[mask0] \n\t" - "punpckhbh %[alpha_rhi], %[alpha_r], %[mask0] \n\t" - - "pmullh %[dest_lo], %[src0_lo], %[alpha_lo] \n\t" - "pmullh %[dest], %[src1_lo], %[alpha_rlo] \n\t" - "paddush %[dest_lo], %[dest_lo], %[dest] \n\t" - "paddush %[dest_lo], %[dest_lo], %[mask2] \n\t" - "psrlh %[dest_lo], %[dest_lo], %[shift] \n\t" - - "pmullh %[dest_hi], %[src0_hi], %[alpha_hi] \n\t" - "pmullh %[dest], %[src1_hi], %[alpha_rhi] \n\t" - "paddush %[dest_hi], %[dest_hi], %[dest] \n\t" - "paddush %[dest_hi], %[dest_hi], %[mask2] \n\t" - "psrlh %[dest_hi], %[dest_hi], %[shift] \n\t" - - "packushb %[dest], %[dest_lo], %[dest_hi] \n\t" - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddiu %[src0_ptr], %[src0_ptr], 0x08 \n\t" - "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t" - "daddiu %[alpha_ptr], %[alpha_ptr], 0x08 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x08 \n\t" - "bnez %[width], 1b \n\t" - : [src0] "=&f"(source0), [src1] "=&f"(source1), [alpha] "=&f"(alph), - [dest] "=&f"(dest), [src0_hi] "=&f"(src0_hi), [src0_lo] "=&f"(src0_lo), - [src1_hi] "=&f"(src1_hi), [src1_lo] "=&f"(src1_lo), - [alpha_hi] "=&f"(alpha_hi), [alpha_lo] "=&f"(alpha_lo), - [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo), - [alpha_rlo] "=&f"(alpha_rev_lo), [alpha_rhi] "=&f"(alpha_rev_hi), - [alpha_r] "=&f"(alpha_rev) - : [src0_ptr] "r"(src0), [src1_ptr] "r"(src1), [alpha_ptr] "r"(alpha), - [dst_ptr] "r"(dst), [mask0] "f"(mask0), [mask1] "f"(mask1), - [mask2] "f"(mask2), [shift] "f"(shift), [width] "r"(width) - : "memory"); -} - -// Multiply source RGB by alpha and store to destination. -// This code mimics the SSSE3 version for better testability. -void ARGBAttenuateRow_MMI(const uint8_t* src_argb, - uint8_t* dst_argb, - int width) { - uint64_t src, src_hi, src_lo, dest, dest_hi, dest_lo, alpha; - const uint64_t mask0 = 0xFF; - const uint64_t mask1 = 0xFF000000FF000000ULL; - const uint64_t mask2 = ~mask1; - const uint64_t shift = 0x08; - - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t" - "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t" - "punpcklbh %[src_lo], %[src], %[src] \n\t" - "punpckhbh %[src_hi], %[src], %[src] \n\t" - - "pshufh %[alpha], %[src_lo], %[mask0] \n\t" - "pmulhuh %[dest_lo], %[alpha], %[src_lo] \n\t" - "psrlh %[dest_lo], %[dest_lo], %[shift] \n\t" - "pshufh %[alpha], %[src_hi], %[mask0] \n\t" - "pmulhuh %[dest_hi], %[alpha], %[src_hi] \n\t" - "psrlh %[dest_hi], %[dest_hi], %[shift] \n\t" - - "packushb %[dest], %[dest_lo], %[dest_hi] \n\t" - "and %[dest], %[dest], %[mask2] \n\t" - "and %[src], %[src], %[mask1] \n\t" - "or %[dest], %[dest], %[src] \n\t" - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x02 \n\t" - "bnez %[width], 1b \n\t" - : [src] "=&f"(src), [dest] "=&f"(dest), [src_hi] "=&f"(src_hi), - [src_lo] "=&f"(src_lo), [dest_hi] "=&f"(dest_hi), - [dest_lo] "=&f"(dest_lo), [alpha] "=&f"(alpha) - : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_argb), [mask0] "f"(mask0), - [mask1] "f"(mask1), [mask2] "f"(mask2), [shift] "f"(shift), - [width] "r"(width) - : "memory"); -} - -void ComputeCumulativeSumRow_MMI(const uint8_t* row, - int32_t* cumsum, - const int32_t* previous_cumsum, - int width) { - int64_t row_sum[2] = {0, 0}; - uint64_t src, dest0, dest1, presrc0, presrc1, dest; - const uint64_t mask = 0x0; - - __asm__ volatile( - "xor %[row_sum0], %[row_sum0], %[row_sum0] \n\t" - "xor %[row_sum1], %[row_sum1], %[row_sum1] \n\t" - - "1: \n\t" - "gslwlc1 %[src], 0x03(%[row_ptr]) \n\t" - "gslwrc1 %[src], 0x00(%[row_ptr]) \n\t" - - "punpcklbh %[src], %[src], %[mask] \n\t" - "punpcklhw %[dest0], %[src], %[mask] \n\t" - "punpckhhw %[dest1], %[src], %[mask] \n\t" - - "paddw %[row_sum0], %[row_sum0], %[dest0] \n\t" - "paddw %[row_sum1], %[row_sum1], %[dest1] \n\t" - - "gsldlc1 %[presrc0], 0x07(%[pre_ptr]) \n\t" - "gsldrc1 %[presrc0], 0x00(%[pre_ptr]) \n\t" - "gsldlc1 %[presrc1], 0x0f(%[pre_ptr]) \n\t" - "gsldrc1 %[presrc1], 0x08(%[pre_ptr]) \n\t" - - "paddw %[dest0], %[row_sum0], %[presrc0] \n\t" - "paddw %[dest1], %[row_sum1], %[presrc1] \n\t" - - "gssdlc1 %[dest0], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest0], 0x00(%[dst_ptr]) \n\t" - "gssdlc1 %[dest1], 0x0f(%[dst_ptr]) \n\t" - "gssdrc1 %[dest1], 0x08(%[dst_ptr]) \n\t" - - "daddiu %[row_ptr], %[row_ptr], 0x04 \n\t" - "daddiu %[pre_ptr], %[pre_ptr], 0x10 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t" - "daddi %[width], %[width], -0x01 \n\t" - "bnez %[width], 1b \n\t" - : [src] "=&f"(src), [dest] "=&f"(dest), [dest0] "=&f"(dest0), - [dest1] "=&f"(dest1), [row_sum0] "+&f"(row_sum[0]), - [row_sum1] "+&f"(row_sum[1]), [presrc0] "=&f"(presrc0), - [presrc1] "=&f"(presrc1) - : [row_ptr] "r"(row), [pre_ptr] "r"(previous_cumsum), - [dst_ptr] "r"(cumsum), [width] "r"(width), [mask] "f"(mask) - : "memory"); -} - -// C version 2x2 -> 2x1. -void InterpolateRow_MMI(uint8_t* dst_ptr, - const uint8_t* src_ptr, - ptrdiff_t src_stride, - int width, - int source_y_fraction) { - if (source_y_fraction == 0) { - __asm__ volatile( - "1: \n\t" - "ld $t0, 0x0(%[src_ptr]) \n\t" - "sd $t0, 0x0(%[dst_ptr]) \n\t" - "daddiu %[src_ptr], %[src_ptr], 8 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 8 \n\t" - "daddiu %[width], %[width], -8 \n\t" - "bgtz %[width], 1b \n\t" - "nop \n\t" - : - : [dst_ptr] "r"(dst_ptr), [src_ptr] "r"(src_ptr), [width] "r"(width) - : "memory"); - return; - } - if (source_y_fraction == 128) { - uint64_t uv = 0x0; - uint64_t uv_stride = 0x0; - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[uv], 0x0(%[src_ptr]) \n\t" - "gsldlc1 %[uv], 0x7(%[src_ptr]) \n\t" - "daddu $t0, %[src_ptr], %[stride] \n\t" - "gsldrc1 %[uv_stride], 0x0($t0) \n\t" - "gsldlc1 %[uv_stride], 0x7($t0) \n\t" - - "pavgb %[uv], %[uv], %[uv_stride] \n\t" - "gssdrc1 %[uv], 0x0(%[dst_ptr]) \n\t" - "gssdlc1 %[uv], 0x7(%[dst_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 8 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 8 \n\t" - "daddiu %[width], %[width], -8 \n\t" - "bgtz %[width], 1b \n\t" - "nop \n\t" - : [uv] "=&f"(uv), [uv_stride] "=&f"(uv_stride) - : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst_ptr), [width] "r"(width), - [stride] "r"((int64_t)src_stride) - : "memory"); - return; - } - const uint8_t* src_ptr1 = src_ptr + src_stride; - uint64_t temp; - uint64_t data[4]; - uint64_t zero = 0x0; - uint64_t c0 = 0x0080008000800080; - uint64_t fy0 = 0x0100010001000100; - uint64_t shift = 0x8; - __asm__ volatile( - "pshufh %[fy1], %[fy1], %[zero] \n\t" - "psubh %[fy0], %[fy0], %[fy1] \n\t" - "1: \n\t" - "gsldrc1 %[t0], 0x0(%[src_ptr]) \n\t" - "gsldlc1 %[t0], 0x7(%[src_ptr]) \n\t" - "punpcklbh %[d0], %[t0], %[zero] \n\t" - "punpckhbh %[d1], %[t0], %[zero] \n\t" - "gsldrc1 %[t0], 0x0(%[src_ptr1]) \n\t" - "gsldlc1 %[t0], 0x7(%[src_ptr1]) \n\t" - "punpcklbh %[d2], %[t0], %[zero] \n\t" - "punpckhbh %[d3], %[t0], %[zero] \n\t" - - "pmullh %[d0], %[d0], %[fy0] \n\t" - "pmullh %[d2], %[d2], %[fy1] \n\t" - "paddh %[d0], %[d0], %[d2] \n\t" - "paddh %[d0], %[d0], %[c0] \n\t" - "psrlh %[d0], %[d0], %[shift] \n\t" - - "pmullh %[d1], %[d1], %[fy0] \n\t" - "pmullh %[d3], %[d3], %[fy1] \n\t" - "paddh %[d1], %[d1], %[d3] \n\t" - "paddh %[d1], %[d1], %[c0] \n\t" - "psrlh %[d1], %[d1], %[shift] \n\t" - - "packushb %[d0], %[d0], %[d1] \n\t" - "gssdrc1 %[d0], 0x0(%[dst_ptr]) \n\t" - "gssdlc1 %[d0], 0x7(%[dst_ptr]) \n\t" - "daddiu %[src_ptr], %[src_ptr], 8 \n\t" - "daddiu %[src_ptr1], %[src_ptr1], 8 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 8 \n\t" - "daddiu %[width], %[width], -8 \n\t" - "bgtz %[width], 1b \n\t" - "nop \n\t" - : [t0] "=&f"(temp), [d0] "=&f"(data[0]), [d1] "=&f"(data[1]), - [d2] "=&f"(data[2]), [d3] "=&f"(data[3]) - : [src_ptr] "r"(src_ptr), [src_ptr1] "r"(src_ptr1), - [dst_ptr] "r"(dst_ptr), [width] "r"(width), - [fy1] "f"(source_y_fraction), [fy0] "f"(fy0), [c0] "f"(c0), - [shift] "f"(shift), [zero] "f"(zero) - : "memory"); -} - -// Use first 4 shuffler values to reorder ARGB channels. -void ARGBShuffleRow_MMI(const uint8_t* src_argb, - uint8_t* dst_argb, - const uint8_t* shuffler, - int width) { - uint64_t source, dest0, dest1, dest; - const uint64_t mask0 = 0x0; - const uint64_t mask1 = (shuffler[0] & 0x03) | ((shuffler[1] & 0x03) << 2) | - ((shuffler[2] & 0x03) << 4) | - ((shuffler[3] & 0x03) << 6); - - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t" - "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t" - - "punpcklbh %[dest0], %[src], %[mask0] \n\t" - "pshufh %[dest0], %[dest0], %[mask1] \n\t" - "punpckhbh %[dest1], %[src], %[mask0] \n\t" - "pshufh %[dest1], %[dest1], %[mask1] \n\t" - "packushb %[dest], %[dest0], %[dest1] \n\t" - - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x02 \n\t" - "bnez %[width], 1b \n\t" - : [src] "=&f"(source), [dest] "=&f"(dest), [dest0] "=&f"(dest0), - [dest1] "=&f"(dest1) - : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_argb), [mask0] "f"(mask0), - [mask1] "f"(mask1), [width] "r"(width) - : "memory"); -} - -void I422ToYUY2Row_MMI(const uint8_t* src_y, - const uint8_t* src_u, - const uint8_t* src_v, - uint8_t* dst_frame, - int width) { - uint64_t temp[3]; - uint64_t vu = 0x0; - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[ty], 0x7(%[src_y]) \n\t" // r=src_sobelx[i] - "gsldrc1 %[ty], 0x0(%[src_y]) \n\t" // r=src_sobelx[i] - "gslwlc1 %[tu], 0x3(%[src_u]) \n\t" // b=src_sobely[i] - "gslwrc1 %[tu], 0x0(%[src_u]) \n\t" // b=src_sobely[i] - "gslwlc1 %[tv], 0x3(%[src_v]) \n\t" // b=src_sobely[i] - "gslwrc1 %[tv], 0x0(%[src_v]) \n\t" // b=src_sobely[i] - "punpcklbh %[vu], %[tu], %[tv] \n\t" // g - "punpcklbh %[tu], %[ty], %[vu] \n\t" // g - "gssdlc1 %[tu], 0x7(%[dst_frame]) \n\t" - "gssdrc1 %[tu], 0x0(%[dst_frame]) \n\t" - "punpckhbh %[tu], %[ty], %[vu] \n\t" // g - "gssdlc1 %[tu], 0x0F(%[dst_frame]) \n\t" - "gssdrc1 %[tu], 0x08(%[dst_frame]) \n\t" - "daddiu %[src_y], %[src_y], 8 \n\t" - "daddiu %[src_u], %[src_u], 4 \n\t" - "daddiu %[src_v], %[src_v], 4 \n\t" - "daddiu %[dst_frame], %[dst_frame], 16 \n\t" - "daddiu %[width], %[width], -8 \n\t" - "bgtz %[width], 1b \n\t" - "nop \n\t" - : [ty] "=&f"(temp[1]), [tu] "=&f"(temp[1]), [tv] "=&f"(temp[1]), - [vu] "=&f"(vu) - : [src_y] "r"(src_y), [src_u] "r"(src_u), [src_v] "r"(src_v), - [dst_frame] "r"(dst_frame), [width] "r"(width) - : "memory"); -} - -void I422ToUYVYRow_MMI(const uint8_t* src_y, - const uint8_t* src_u, - const uint8_t* src_v, - uint8_t* dst_frame, - int width) { - uint64_t temp[3]; - uint64_t vu = 0x0; - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[ty], 0x7(%[src_y]) \n\t" // r=src_sobelx[i] - "gsldrc1 %[ty], 0x0(%[src_y]) \n\t" // r=src_sobelx[i] - "gslwlc1 %[tu], 0x3(%[src_u]) \n\t" // b=src_sobely[i] - "gslwrc1 %[tu], 0x0(%[src_u]) \n\t" // b=src_sobely[i] - "gslwlc1 %[tv], 0x3(%[src_v]) \n\t" // b=src_sobely[i] - "gslwrc1 %[tv], 0x0(%[src_v]) \n\t" // b=src_sobely[i] - "punpcklbh %[vu], %[tu], %[tv] \n\t" // g - "punpcklbh %[tu], %[vu], %[ty] \n\t" // g - "gssdlc1 %[tu], 0x7(%[dst_frame]) \n\t" - "gssdrc1 %[tu], 0x0(%[dst_frame]) \n\t" - "punpckhbh %[tu], %[vu], %[ty] \n\t" // g - "gssdlc1 %[tu], 0x0F(%[dst_frame]) \n\t" - "gssdrc1 %[tu], 0x08(%[dst_frame]) \n\t" - "daddiu %[src_y], %[src_y], 8 \n\t" - "daddiu %[src_u], %[src_u], 4 \n\t" - "daddiu %[src_v], %[src_v], 4 \n\t" - "daddiu %[dst_frame], %[dst_frame], 16 \n\t" - "daddiu %[width], %[width], -8 \n\t" - "bgtz %[width], 1b \n\t" - "nop \n\t" - : [ty] "=&f"(temp[1]), [tu] "=&f"(temp[1]), [tv] "=&f"(temp[1]), - [vu] "=&f"(vu) - : [src_y] "r"(src_y), [src_u] "r"(src_u), [src_v] "r"(src_v), - [dst_frame] "r"(dst_frame), [width] "r"(width) - : "memory"); -} - -void ARGBCopyAlphaRow_MMI(const uint8_t* src, uint8_t* dst, int width) { - uint64_t source, dest; - const uint64_t mask0 = 0xff000000ff000000ULL; - const uint64_t mask1 = ~mask0; - - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t" - "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t" - "gsldlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gsldrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "and %[src], %[src], %[mask0] \n\t" - "and %[dest], %[dest], %[mask1] \n\t" - "or %[dest], %[src], %[dest] \n\t" - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x02 \n\t" - "bnez %[width], 1b \n\t" - : [src] "=&f"(source), [dest] "=&f"(dest) - : [src_ptr] "r"(src), [dst_ptr] "r"(dst), [mask0] "f"(mask0), - [mask1] "f"(mask1), [width] "r"(width) - : "memory"); -} - -void ARGBExtractAlphaRow_MMI(const uint8_t* src_argb, - uint8_t* dst_a, - int width) { - uint64_t src, dest0, dest1, dest_lo, dest_hi, dest; - const uint64_t mask = 0xff000000ff000000ULL; - const uint64_t shift = 0x18; - - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t" - "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t" - "and %[dest0], %[src], %[mask] \n\t" - "psrlw %[dest0], %[dest0], %[shift] \n\t" - "gsldlc1 %[src], 0x0f(%[src_ptr]) \n\t" - "gsldrc1 %[src], 0x08(%[src_ptr]) \n\t" - "and %[dest1], %[src], %[mask] \n\t" - "psrlw %[dest1], %[dest1], %[shift] \n\t" - "packsswh %[dest_lo], %[dest0], %[dest1] \n\t" - - "gsldlc1 %[src], 0x17(%[src_ptr]) \n\t" - "gsldrc1 %[src], 0x10(%[src_ptr]) \n\t" - "and %[dest0], %[src], %[mask] \n\t" - "psrlw %[dest0], %[dest0], %[shift] \n\t" - "gsldlc1 %[src], 0x1f(%[src_ptr]) \n\t" - "gsldrc1 %[src], 0x18(%[src_ptr]) \n\t" - "and %[dest1], %[src], %[mask] \n\t" - "psrlw %[dest1], %[dest1], %[shift] \n\t" - "packsswh %[dest_hi], %[dest0], %[dest1] \n\t" - - "packushb %[dest], %[dest_lo], %[dest_hi] \n\t" - - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 0x20 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x08 \n\t" - "bnez %[width], 1b \n\t" - : [src] "=&f"(src), [dest] "=&f"(dest), [dest0] "=&f"(dest0), - [dest1] "=&f"(dest1), [dest_lo] "=&f"(dest_lo), [dest_hi] "=&f"(dest_hi) - : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_a), [mask] "f"(mask), - [shift] "f"(shift), [width] "r"(width) - : "memory"); -} - -void ARGBCopyYToAlphaRow_MMI(const uint8_t* src, uint8_t* dst, int width) { - uint64_t source, dest0, dest1, dest; - const uint64_t mask0 = 0x0; - const uint64_t mask1 = 0x00ffffff00ffffffULL; - - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t" - "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t" - - "punpcklbh %[dest0], %[mask0], %[src] \n\t" - "punpcklhw %[dest1], %[mask0], %[dest0] \n\t" - "gsldlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gsldrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - "and %[dest], %[dest], %[mask1] \n\t" - "or %[dest], %[dest], %[dest1] \n\t" - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - "punpckhhw %[dest1], %[mask0], %[dest0] \n\t" - "gsldlc1 %[dest], 0x0f(%[dst_ptr]) \n\t" - "gsldrc1 %[dest], 0x08(%[dst_ptr]) \n\t" - "and %[dest], %[dest], %[mask1] \n\t" - "or %[dest], %[dest], %[dest1] \n\t" - "gssdlc1 %[dest], 0x0f(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x08(%[dst_ptr]) \n\t" - - "punpckhbh %[dest0], %[mask0], %[src] \n\t" - "punpcklhw %[dest1], %[mask0], %[dest0] \n\t" - "gsldlc1 %[dest], 0x17(%[dst_ptr]) \n\t" - "gsldrc1 %[dest], 0x10(%[dst_ptr]) \n\t" - "and %[dest], %[dest], %[mask1] \n\t" - "or %[dest], %[dest], %[dest1] \n\t" - "gssdlc1 %[dest], 0x17(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x10(%[dst_ptr]) \n\t" - "punpckhhw %[dest1], %[mask0], %[dest0] \n\t" - "gsldlc1 %[dest], 0x1f(%[dst_ptr]) \n\t" - "gsldrc1 %[dest], 0x18(%[dst_ptr]) \n\t" - "and %[dest], %[dest], %[mask1] \n\t" - "or %[dest], %[dest], %[dest1] \n\t" - "gssdlc1 %[dest], 0x1f(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x18(%[dst_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x20 \n\t" - "daddi %[width], %[width], -0x08 \n\t" - "bnez %[width], 1b \n\t" - : [src] "=&f"(source), [dest] "=&f"(dest), [dest0] "=&f"(dest0), - [dest1] "=&f"(dest1) - : [src_ptr] "r"(src), [dst_ptr] "r"(dst), [mask0] "f"(mask0), - [mask1] "f"(mask1), [width] "r"(width) - : "memory"); -} - -void I444ToARGBRow_MMI(const uint8_t* src_y, - const uint8_t* src_u, - const uint8_t* src_v, - uint8_t* rgb_buf, - const struct YuvConstants* yuvconstants, - int width) { - uint64_t y,u,v; - uint64_t b_vec[2],g_vec[2],r_vec[2]; - uint64_t mask = 0xff00ff00ff00ff00ULL; - uint64_t ub,ug,vg,vr,bb,bg,br,yg; - __asm__ volatile ( - "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"//yg - "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"//bb - "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"//ub - "or %[ub], %[ub], %[mask] \n\t"//must sign extension - "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"//bg - "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"//ug - "punpcklbh %[ug], %[ug], %[zero] \n\t" - "pshufh %[ug], %[ug], %[zero] \n\t" - "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"//vg - "punpcklbh %[vg], %[vg], %[zero] \n\t" - "pshufh %[vg], %[vg], %[five] \n\t" - "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"//br - "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"//vr - "punpcklbh %[vr], %[vr], %[zero] \n\t" - "pshufh %[vr], %[vr], %[five] \n\t" - "or %[vr], %[vr], %[mask] \n\t"//sign extension - - "1: \n\t" - "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t" - "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t" - "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t" - "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t" - "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t" - "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t" - - "punpcklbh %[y], %[y], %[y] \n\t"//y*0x0101 - "pmulhuh %[y], %[y], %[yg] \n\t"//y1 - - "punpcklbh %[u], %[u], %[zero] \n\t"//u - "paddsh %[b_vec0], %[y], %[bb] \n\t" - "pmullh %[b_vec1], %[u], %[ub] \n\t" - "psubsh %[b_vec0], %[b_vec0], %[b_vec1] \n\t" - "psrah %[b_vec0], %[b_vec0], %[six] \n\t" - - "punpcklbh %[v], %[v], %[zero] \n\t"//v - "paddsh %[g_vec0], %[y], %[bg] \n\t" - "pmullh %[g_vec1], %[u], %[ug] \n\t"//u*ug - "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t" - "pmullh %[g_vec1], %[v], %[vg] \n\t"//v*vg - "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t" - "psrah %[g_vec0], %[g_vec0], %[six] \n\t" - - "paddsh %[r_vec0], %[y], %[br] \n\t" - "pmullh %[r_vec1], %[v], %[vr] \n\t"//v*vr - "psubsh %[r_vec0], %[r_vec0], %[r_vec1] \n\t" - "psrah %[r_vec0], %[r_vec0], %[six] \n\t" - - "packushb %[r_vec0], %[b_vec0], %[r_vec0] \n\t"//rrrrbbbb - "packushb %[g_vec0], %[g_vec0], %[alpha] \n\t"//ffffgggg - "punpcklwd %[g_vec0], %[g_vec0], %[alpha] \n\t" - "punpcklbh %[b_vec0], %[r_vec0], %[g_vec0] \n\t"//gbgbgbgb - "punpckhbh %[r_vec0], %[r_vec0], %[g_vec0] \n\t"//frfrfrfr - "punpcklhw %[g_vec0], %[b_vec0], %[r_vec0] \n\t"//frgbfrgb - "punpckhhw %[g_vec1], %[b_vec0], %[r_vec0] \n\t"//frgbfrgb - "gssdlc1 %[g_vec0], 0x07(%[rgbbuf_ptr]) \n\t" - "gssdrc1 %[g_vec0], 0x00(%[rgbbuf_ptr]) \n\t" - "gssdlc1 %[g_vec1], 0x0f(%[rgbbuf_ptr]) \n\t" - "gssdrc1 %[g_vec1], 0x08(%[rgbbuf_ptr]) \n\t" - - "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t" - "daddiu %[u_ptr], %[u_ptr], 0x04 \n\t" - "daddiu %[v_ptr], %[v_ptr], 0x04 \n\t" - "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - : [y]"=&f"(y), - [u]"=&f"(u), [v]"=&f"(v), - [b_vec0]"=&f"(b_vec[0]), [b_vec1]"=&f"(b_vec[1]), - [g_vec0]"=&f"(g_vec[0]), [g_vec1]"=&f"(g_vec[1]), - [r_vec0]"=&f"(r_vec[0]), [r_vec1]"=&f"(r_vec[1]), - [ub]"=&f"(ub), [ug]"=&f"(ug), - [vg]"=&f"(vg), [vr]"=&f"(vr), - [bb]"=&f"(bb), [bg]"=&f"(bg), - [br]"=&f"(br), [yg]"=&f"(yg) - : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u), - [v_ptr]"r"(src_v), [rgbbuf_ptr]"r"(rgb_buf), - [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width), - [zero]"f"(0x00), [alpha]"f"(-1), - [six]"f"(0x6), [five]"f"(0x55), - [mask]"f"(mask) - : "memory" - ); -} - -// Also used for 420 -void I422ToARGBRow_MMI(const uint8_t* src_y, - const uint8_t* src_u, - const uint8_t* src_v, - uint8_t* rgb_buf, - const struct YuvConstants* yuvconstants, - int width) { - uint64_t y,u,v; - uint64_t b_vec[2],g_vec[2],r_vec[2]; - uint64_t mask = 0xff00ff00ff00ff00ULL; - uint64_t ub,ug,vg,vr,bb,bg,br,yg; - - __asm__ volatile( - "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"//yg - "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"//bb - "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"//ub - "or %[ub], %[ub], %[mask] \n\t"//must sign extension - "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"//bg - "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"//ug - "punpcklbh %[ug], %[ug], %[zero] \n\t" - "pshufh %[ug], %[ug], %[zero] \n\t" - "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"//vg - "punpcklbh %[vg], %[vg], %[zero] \n\t" - "pshufh %[vg], %[vg], %[five] \n\t" - "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"//br - "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"//vr - "punpcklbh %[vr], %[vr], %[zero] \n\t" - "pshufh %[vr], %[vr], %[five] \n\t" - "or %[vr], %[vr], %[mask] \n\t"//sign extension - - "1: \n\t" - "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t" - "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t" - "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t" - "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t" - "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t" - "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t" - - "punpcklbh %[y], %[y], %[y] \n\t"//y*0x0101 - "pmulhuh %[y], %[y], %[yg] \n\t"//y1 - - //u3|u2|u1|u0 --> u1|u1|u0|u0 - "punpcklbh %[u], %[u], %[u] \n\t"//u - "punpcklbh %[u], %[u], %[zero] \n\t" - "paddsh %[b_vec0], %[y], %[bb] \n\t" - "pmullh %[b_vec1], %[u], %[ub] \n\t" - "psubsh %[b_vec0], %[b_vec0], %[b_vec1] \n\t" - "psrah %[b_vec0], %[b_vec0], %[six] \n\t" - - //v3|v2|v1|v0 --> v1|v1|v0|v0 - "punpcklbh %[v], %[v], %[v] \n\t"//v - "punpcklbh %[v], %[v], %[zero] \n\t" - "paddsh %[g_vec0], %[y], %[bg] \n\t" - "pmullh %[g_vec1], %[u], %[ug] \n\t"//u*ug - "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t" - "pmullh %[g_vec1], %[v], %[vg] \n\t"//v*vg - "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t" - "psrah %[g_vec0], %[g_vec0], %[six] \n\t" - - "paddsh %[r_vec0], %[y], %[br] \n\t" - "pmullh %[r_vec1], %[v], %[vr] \n\t"//v*vr - "psubsh %[r_vec0], %[r_vec0], %[r_vec1] \n\t" - "psrah %[r_vec0], %[r_vec0], %[six] \n\t" - - "packushb %[r_vec0], %[b_vec0], %[r_vec0] \n\t"//rrrrbbbb - "packushb %[g_vec0], %[g_vec0], %[alpha] \n\t"//ffffgggg - "punpcklwd %[g_vec0], %[g_vec0], %[alpha] \n\t" - "punpcklbh %[b_vec0], %[r_vec0], %[g_vec0] \n\t"//gbgbgbgb - "punpckhbh %[r_vec0], %[r_vec0], %[g_vec0] \n\t"//frfrfrfr - "punpcklhw %[g_vec0], %[b_vec0], %[r_vec0] \n\t"//frgbfrgb - "punpckhhw %[g_vec1], %[b_vec0], %[r_vec0] \n\t"//frgbfrgb - "gssdlc1 %[g_vec0], 0x07(%[rgbbuf_ptr]) \n\t" - "gssdrc1 %[g_vec0], 0x00(%[rgbbuf_ptr]) \n\t" - "gssdlc1 %[g_vec1], 0x0f(%[rgbbuf_ptr]) \n\t" - "gssdrc1 %[g_vec1], 0x08(%[rgbbuf_ptr]) \n\t" - - "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t" - "daddiu %[u_ptr], %[u_ptr], 0x02 \n\t" - "daddiu %[v_ptr], %[v_ptr], 0x02 \n\t" - "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - - : [y]"=&f"(y), - [u]"=&f"(u), [v]"=&f"(v), - [b_vec0]"=&f"(b_vec[0]), [b_vec1]"=&f"(b_vec[1]), - [g_vec0]"=&f"(g_vec[0]), [g_vec1]"=&f"(g_vec[1]), - [r_vec0]"=&f"(r_vec[0]), [r_vec1]"=&f"(r_vec[1]), - [ub]"=&f"(ub), [ug]"=&f"(ug), - [vg]"=&f"(vg), [vr]"=&f"(vr), - [bb]"=&f"(bb), [bg]"=&f"(bg), - [br]"=&f"(br), [yg]"=&f"(yg) - : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u), - [v_ptr]"r"(src_v), [rgbbuf_ptr]"r"(rgb_buf), - [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width), - [zero]"f"(0x00), [alpha]"f"(-1), - [six]"f"(0x6), [five]"f"(0x55), - [mask]"f"(mask) - : "memory" - ); -} - -// 10 bit YUV to ARGB -void I210ToARGBRow_MMI(const uint16_t* src_y, - const uint16_t* src_u, - const uint16_t* src_v, - uint8_t* rgb_buf, - const struct YuvConstants* yuvconstants, - int width) { - uint64_t y,u,v; - uint64_t b_vec[2],g_vec[2],r_vec[2]; - uint64_t mask = 0xff00ff00ff00ff00ULL; - uint64_t ub,ug,vg,vr,bb,bg,br,yg; - - __asm__ volatile( - "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t" - "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t" - "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t" - "or %[ub], %[ub], %[mask] \n\t" - "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t" - "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t" - "punpcklbh %[ug], %[ug], %[zero] \n\t" - "pshufh %[ug], %[ug], %[zero] \n\t" - "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t" - "punpcklbh %[vg], %[vg], %[zero] \n\t" - "pshufh %[vg], %[vg], %[five] \n\t" - "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t" - "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t" - "punpcklbh %[vr], %[vr], %[zero] \n\t" - "pshufh %[vr], %[vr], %[five] \n\t" - "or %[vr], %[vr], %[mask] \n\t" - - "1: \n\t" - "gsldlc1 %[y], 0x07(%[y_ptr]) \n\t" - "gsldrc1 %[y], 0x00(%[y_ptr]) \n\t" - "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t" - "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t" - "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t" - "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t" - - "psllh %[y], %[y], %[six] \n\t" - "pmulhuh %[y], %[y], %[yg] \n\t" - - "punpcklhw %[u], %[u], %[u] \n\t" - "psrah %[u], %[u], %[two] \n\t" - "punpcklhw %[v], %[v], %[v] \n\t" - "psrah %[v], %[v], %[two] \n\t" - "pminsh %[u], %[u], %[mask1] \n\t" - "pminsh %[v], %[v], %[mask1] \n\t" - - "paddsh %[b_vec0], %[y], %[bb] \n\t" - "pmullh %[b_vec1], %[u], %[ub] \n\t" - "psubsh %[b_vec0], %[b_vec0], %[b_vec1] \n\t" - - "paddsh %[g_vec0], %[y], %[bg] \n\t" - "pmullh %[g_vec1], %[u], %[ug] \n\t" - "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t" - "pmullh %[g_vec1], %[v], %[vg] \n\t" - "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t" - - "paddsh %[r_vec0], %[y], %[br] \n\t" - "pmullh %[r_vec1], %[v], %[vr] \n\t" - "psubsh %[r_vec0], %[r_vec0], %[r_vec1] \n\t" - - "psrah %[b_vec0], %[b_vec0], %[six] \n\t" - "psrah %[g_vec0], %[g_vec0], %[six] \n\t" - "psrah %[r_vec0], %[r_vec0], %[six] \n\t" - - "packushb %[r_vec0], %[b_vec0], %[r_vec0] \n\t" - "packushb %[g_vec0], %[g_vec0], %[alpha] \n\t" - "punpcklwd %[g_vec0], %[g_vec0], %[alpha] \n\t" - "punpcklbh %[b_vec0], %[r_vec0], %[g_vec0] \n\t" - "punpckhbh %[r_vec0], %[r_vec0], %[g_vec0] \n\t" - "punpcklhw %[g_vec0], %[b_vec0], %[r_vec0] \n\t" - "punpckhhw %[g_vec1], %[b_vec0], %[r_vec0] \n\t" - "gssdlc1 %[g_vec0], 0x07(%[rgbbuf_ptr]) \n\t" - "gssdrc1 %[g_vec0], 0x00(%[rgbbuf_ptr]) \n\t" - "gssdlc1 %[g_vec1], 0x0f(%[rgbbuf_ptr]) \n\t" - "gssdrc1 %[g_vec1], 0x08(%[rgbbuf_ptr]) \n\t" - - "daddiu %[y_ptr], %[y_ptr], 0x08 \n\t" - "daddiu %[u_ptr], %[u_ptr], 0x04 \n\t" - "daddiu %[v_ptr], %[v_ptr], 0x04 \n\t" - "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - - : [y]"=&f"(y), - [u]"=&f"(u), [v]"=&f"(v), - [b_vec0]"=&f"(b_vec[0]), [b_vec1]"=&f"(b_vec[1]), - [g_vec0]"=&f"(g_vec[0]), [g_vec1]"=&f"(g_vec[1]), - [r_vec0]"=&f"(r_vec[0]), [r_vec1]"=&f"(r_vec[1]), - [ub]"=&f"(ub), [ug]"=&f"(ug), - [vg]"=&f"(vg), [vr]"=&f"(vr), - [bb]"=&f"(bb), [bg]"=&f"(bg), - [br]"=&f"(br), [yg]"=&f"(yg) - : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u), - [v_ptr]"r"(src_v), [rgbbuf_ptr]"r"(rgb_buf), - [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width), - [zero]"f"(0x00), [alpha]"f"(-1), - [six]"f"(0x6), [five]"f"(0x55), - [mask]"f"(mask), [two]"f"(0x02), - [mask1]"f"(0x00ff00ff00ff00ff) - : "memory" - ); -} - -void I422AlphaToARGBRow_MMI(const uint8_t* src_y, - const uint8_t* src_u, - const uint8_t* src_v, - const uint8_t* src_a, - uint8_t* rgb_buf, - const struct YuvConstants* yuvconstants, - int width) { - uint64_t y,u,v,a; - uint64_t b_vec[2],g_vec[2],r_vec[2]; - uint64_t mask = 0xff00ff00ff00ff00ULL; - uint64_t ub,ug,vg,vr,bb,bg,br,yg; - - __asm__ volatile( - "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t" - "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t" - "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t" - "or %[ub], %[ub], %[mask] \n\t" - "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t" - "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t" - "punpcklbh %[ug], %[ug], %[zero] \n\t" - "pshufh %[ug], %[ug], %[zero] \n\t" - "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t" - "punpcklbh %[vg], %[vg], %[zero] \n\t" - "pshufh %[vg], %[vg], %[five] \n\t" - "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t" - "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t" - "punpcklbh %[vr], %[vr], %[zero] \n\t" - "pshufh %[vr], %[vr], %[five] \n\t" - "or %[vr], %[vr], %[mask] \n\t" - - "1: \n\t" - "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t" - "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t" - "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t" - "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t" - "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t" - "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t" - "gslwlc1 %[a], 0x03(%[a_ptr]) \n\t" - "gslwrc1 %[a], 0x00(%[a_ptr]) \n\t" - - "punpcklbh %[y], %[y], %[y] \n\t"//y*0x0101 - "pmulhuh %[y], %[y], %[yg] \n\t"//y1 - - //u3|u2|u1|u0 --> u1|u1|u0|u0 - "punpcklbh %[u], %[u], %[u] \n\t"//u - "punpcklbh %[u], %[u], %[zero] \n\t" - "paddsh %[b_vec0], %[y], %[bb] \n\t" - "pmullh %[b_vec1], %[u], %[ub] \n\t" - "psubsh %[b_vec0], %[b_vec0], %[b_vec1] \n\t" - "psrah %[b_vec0], %[b_vec0], %[six] \n\t" - - //v3|v2|v1|v0 --> v1|v1|v0|v0 - "punpcklbh %[v], %[v], %[v] \n\t" - "punpcklbh %[v], %[v], %[zero] \n\t" - "paddsh %[g_vec0], %[y], %[bg] \n\t" - "pmullh %[g_vec1], %[u], %[ug] \n\t" - "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t" - "pmullh %[g_vec1], %[v], %[vg] \n\t" - "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t" - "psrah %[g_vec0], %[g_vec0], %[six] \n\t" - - "paddsh %[r_vec0], %[y], %[br] \n\t" - "pmullh %[r_vec1], %[v], %[vr] \n\t" - "psubsh %[r_vec0], %[r_vec0], %[r_vec1] \n\t" - "psrah %[r_vec0], %[r_vec0], %[six] \n\t" - - "packushb %[r_vec0], %[b_vec0], %[r_vec0] \n\t"//rrrrbbbb - "packushb %[g_vec0], %[g_vec0], %[a] \n\t" - "punpcklwd %[g_vec0], %[g_vec0], %[a] \n\t"//aaaagggg - "punpcklbh %[b_vec0], %[r_vec0], %[g_vec0] \n\t" - "punpckhbh %[r_vec0], %[r_vec0], %[g_vec0] \n\t" - "punpcklhw %[g_vec0], %[b_vec0], %[r_vec0] \n\t" - "punpckhhw %[g_vec1], %[b_vec0], %[r_vec0] \n\t" - "gssdlc1 %[g_vec0], 0x07(%[rgbbuf_ptr]) \n\t" - "gssdrc1 %[g_vec0], 0x00(%[rgbbuf_ptr]) \n\t" - "gssdlc1 %[g_vec1], 0x0f(%[rgbbuf_ptr]) \n\t" - "gssdrc1 %[g_vec1], 0x08(%[rgbbuf_ptr]) \n\t" - - "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t" - "daddiu %[a_ptr], %[a_ptr], 0x04 \n\t" - "daddiu %[u_ptr], %[u_ptr], 0x02 \n\t" - "daddiu %[v_ptr], %[v_ptr], 0x02 \n\t" - "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - - : [y]"=&f"(y), [u]"=&f"(u), - [v]"=&f"(v), [a]"=&f"(a), - [b_vec0]"=&f"(b_vec[0]), [b_vec1]"=&f"(b_vec[1]), - [g_vec0]"=&f"(g_vec[0]), [g_vec1]"=&f"(g_vec[1]), - [r_vec0]"=&f"(r_vec[0]), [r_vec1]"=&f"(r_vec[1]), - [ub]"=&f"(ub), [ug]"=&f"(ug), - [vg]"=&f"(vg), [vr]"=&f"(vr), - [bb]"=&f"(bb), [bg]"=&f"(bg), - [br]"=&f"(br), [yg]"=&f"(yg) - : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u), - [v_ptr]"r"(src_v), [rgbbuf_ptr]"r"(rgb_buf), - [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width), - [a_ptr]"r"(src_a), [zero]"f"(0x00), - [six]"f"(0x6), [five]"f"(0x55), - [mask]"f"(mask) - : "memory" - ); -} - -void I422ToRGB24Row_MMI(const uint8_t* src_y, - const uint8_t* src_u, - const uint8_t* src_v, - uint8_t* rgb_buf, - const struct YuvConstants* yuvconstants, - int width) { - uint64_t y,u,v; - uint64_t b_vec[2],g_vec[2],r_vec[2]; - uint64_t mask = 0xff00ff00ff00ff00ULL; - uint64_t ub,ug,vg,vr,bb,bg,br,yg; - - __asm__ volatile( - "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t" - "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t" - "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t" - "or %[ub], %[ub], %[mask] \n\t" - "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t" - "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t" - "punpcklbh %[ug], %[ug], %[zero] \n\t" - "pshufh %[ug], %[ug], %[zero] \n\t" - "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t" - "punpcklbh %[vg], %[vg], %[zero] \n\t" - "pshufh %[vg], %[vg], %[five] \n\t" - "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t" - "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t" - "punpcklbh %[vr], %[vr], %[zero] \n\t" - "pshufh %[vr], %[vr], %[five] \n\t" - "or %[vr], %[vr], %[mask] \n\t" - - "1: \n\t" - "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t" - "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t" - "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t" - "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t" - "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t" - "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t" - - "punpcklbh %[y], %[y], %[y] \n\t"//y*0x0101 - "pmulhuh %[y], %[y], %[yg] \n\t"//y1 - - //u3|u2|u1|u0 --> u1|u1|u0|u0 - "punpcklbh %[u], %[u], %[u] \n\t"//u - "punpcklbh %[u], %[u], %[zero] \n\t" - "paddsh %[b_vec0], %[y], %[bb] \n\t" - "pmullh %[b_vec1], %[u], %[ub] \n\t" - "psubsh %[b_vec0], %[b_vec0], %[b_vec1] \n\t" - "psrah %[b_vec0], %[b_vec0], %[six] \n\t" - - //v3|v2|v1|v0 --> v1|v1|v0|v0 - "punpcklbh %[v], %[v], %[v] \n\t" - "punpcklbh %[v], %[v], %[zero] \n\t" - "paddsh %[g_vec0], %[y], %[bg] \n\t" - "pmullh %[g_vec1], %[u], %[ug] \n\t" - "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t" - "pmullh %[g_vec1], %[v], %[vg] \n\t" - "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t" - "psrah %[g_vec0], %[g_vec0], %[six] \n\t" - - "paddsh %[r_vec0], %[y], %[br] \n\t" - "pmullh %[r_vec1], %[v], %[vr] \n\t" - "psubsh %[r_vec0], %[r_vec0], %[r_vec1] \n\t" - "psrah %[r_vec0], %[r_vec0], %[six] \n\t" - - "packushb %[r_vec0], %[b_vec0], %[r_vec0] \n\t" - "packushb %[g_vec0], %[g_vec0], %[zero] \n\t" - "punpcklbh %[b_vec0], %[r_vec0], %[g_vec0] \n\t" - "punpckhbh %[r_vec0], %[r_vec0], %[g_vec0] \n\t" - "punpcklhw %[g_vec0], %[b_vec0], %[r_vec0] \n\t" - "punpckhhw %[g_vec1], %[b_vec0], %[r_vec0] \n\t" - - "punpckhwd %[r_vec0], %[g_vec0], %[g_vec0] \n\t" - "psllw %[r_vec1], %[r_vec0], %[lmove1] \n\t" - "or %[g_vec0], %[g_vec0], %[r_vec1] \n\t" - "psrlw %[r_vec1], %[r_vec0], %[rmove1] \n\t" - "pextrh %[r_vec1], %[r_vec1], %[zero] \n\t" - "pinsrh_2 %[g_vec0], %[g_vec0], %[r_vec1] \n\t" - "pextrh %[r_vec1], %[g_vec1], %[zero] \n\t" - "pinsrh_3 %[g_vec0], %[g_vec0], %[r_vec1] \n\t" - "pextrh %[r_vec1], %[g_vec1], %[one] \n\t" - "punpckhwd %[g_vec1], %[g_vec1], %[g_vec1] \n\t" - "psllw %[g_vec1], %[g_vec1], %[rmove1] \n\t" - "or %[g_vec1], %[g_vec1], %[r_vec1] \n\t" - "gssdlc1 %[g_vec0], 0x07(%[rgbbuf_ptr]) \n\t" - "gssdrc1 %[g_vec0], 0x00(%[rgbbuf_ptr]) \n\t" - "gsswlc1 %[g_vec1], 0x0b(%[rgbbuf_ptr]) \n\t" - "gsswrc1 %[g_vec1], 0x08(%[rgbbuf_ptr]) \n\t" - - - "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t" - "daddiu %[u_ptr], %[u_ptr], 0x02 \n\t" - "daddiu %[v_ptr], %[v_ptr], 0x02 \n\t" - "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x0c \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - - : [y]"=&f"(y), [u]"=&f"(u), - [v]"=&f"(v), - [b_vec0]"=&f"(b_vec[0]), [b_vec1]"=&f"(b_vec[1]), - [g_vec0]"=&f"(g_vec[0]), [g_vec1]"=&f"(g_vec[1]), - [r_vec0]"=&f"(r_vec[0]), [r_vec1]"=&f"(r_vec[1]), - [ub]"=&f"(ub), [ug]"=&f"(ug), - [vg]"=&f"(vg), [vr]"=&f"(vr), - [bb]"=&f"(bb), [bg]"=&f"(bg), - [br]"=&f"(br), [yg]"=&f"(yg) - : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u), - [v_ptr]"r"(src_v), [rgbbuf_ptr]"r"(rgb_buf), - [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width), - [zero]"f"(0x00), [five]"f"(0x55), - [six]"f"(0x6), [mask]"f"(mask), - [lmove1]"f"(0x18), [rmove1]"f"(0x8), - [one]"f"(0x1) - : "memory" - ); -} - -void I422ToARGB4444Row_MMI(const uint8_t* src_y, - const uint8_t* src_u, - const uint8_t* src_v, - uint8_t* dst_argb4444, - const struct YuvConstants* yuvconstants, - int width) { - uint64_t y, u, v; - uint64_t b_vec, g_vec, r_vec, temp; - uint64_t ub,ug,vg,vr,bb,bg,br,yg; - - __asm__ volatile( - "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t" - "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t" - "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t" - "or %[ub], %[ub], %[mask] \n\t" - "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t" - "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t" - "punpcklbh %[ug], %[ug], %[zero] \n\t" - "pshufh %[ug], %[ug], %[zero] \n\t" - "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t" - "punpcklbh %[vg], %[vg], %[zero] \n\t" - "pshufh %[vg], %[vg], %[five] \n\t" - "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t" - "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t" - "punpcklbh %[vr], %[vr], %[zero] \n\t" - "pshufh %[vr], %[vr], %[five] \n\t" - "or %[vr], %[vr], %[mask] \n\t" - - "1: \n\t" - "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t" - "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t" - "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t" - "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t" - "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t" - "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t" - - "punpcklbh %[y], %[y], %[y] \n\t"//y*0x0101 - "pmulhuh %[y], %[y], %[yg] \n\t"//y1 - - //u3|u2|u1|u0 --> u1|u1|u0|u0 - "punpcklbh %[u], %[u], %[u] \n\t"//u - "punpcklbh %[u], %[u], %[zero] \n\t" - "paddsh %[b_vec], %[y], %[bb] \n\t" - "pmullh %[temp], %[u], %[ub] \n\t" - "psubsh %[b_vec], %[b_vec], %[temp] \n\t" - "psrah %[b_vec], %[b_vec], %[six] \n\t" - - //v3|v2|v1|v0 --> v1|v1|v0|v0 - "punpcklbh %[v], %[v], %[v] \n\t" - "punpcklbh %[v], %[v], %[zero] \n\t" - "paddsh %[g_vec], %[y], %[bg] \n\t" - "pmullh %[temp], %[u], %[ug] \n\t" - "psubsh %[g_vec], %[g_vec], %[temp] \n\t" - "pmullh %[temp], %[v], %[vg] \n\t" - "psubsh %[g_vec], %[g_vec], %[temp] \n\t" - "psrah %[g_vec], %[g_vec], %[six] \n\t" - - "paddsh %[r_vec], %[y], %[br] \n\t" - "pmullh %[temp], %[v], %[vr] \n\t" - "psubsh %[r_vec], %[r_vec], %[temp] \n\t" - "psrah %[r_vec], %[r_vec], %[six] \n\t" - - "packushb %[r_vec], %[b_vec], %[r_vec] \n\t" - "packushb %[g_vec], %[g_vec], %[zero] \n\t" - "punpcklwd %[g_vec], %[g_vec], %[alpha] \n\t" - "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t" - "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t" - "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t" - "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t" - - "and %[g_vec], %[g_vec], %[mask1] \n\t" - "psrlw %[g_vec], %[g_vec], %[four] \n\t" - "psrlw %[r_vec], %[g_vec], %[four] \n\t" - "or %[g_vec], %[g_vec], %[r_vec] \n\t" - "punpcklbh %[r_vec], %[alpha], %[zero] \n\t" - "and %[g_vec], %[g_vec], %[r_vec] \n\t" - - "and %[b_vec], %[b_vec], %[mask1] \n\t" - "psrlw %[b_vec], %[b_vec], %[four] \n\t" - "psrlw %[r_vec], %[b_vec], %[four] \n\t" - "or %[b_vec], %[b_vec], %[r_vec] \n\t" - "punpcklbh %[r_vec], %[alpha], %[zero] \n\t" - "and %[b_vec], %[b_vec], %[r_vec] \n\t" - "packushb %[g_vec], %[g_vec], %[b_vec] \n\t" - - "gssdlc1 %[g_vec], 0x07(%[dst_argb4444]) \n\t" - "gssdrc1 %[g_vec], 0x00(%[dst_argb4444]) \n\t" - - "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t" - "daddiu %[u_ptr], %[u_ptr], 0x02 \n\t" - "daddiu %[v_ptr], %[v_ptr], 0x02 \n\t" - "daddiu %[dst_argb4444], %[dst_argb4444], 0x08 \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - - : [y]"=&f"(y), [u]"=&f"(u), - [v]"=&f"(v), - [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec), - [r_vec]"=&f"(r_vec), [temp]"=&f"(temp), - [ub]"=&f"(ub), [ug]"=&f"(ug), - [vg]"=&f"(vg), [vr]"=&f"(vr), - [bb]"=&f"(bb), [bg]"=&f"(bg), - [br]"=&f"(br), [yg]"=&f"(yg) - : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u), - [v_ptr]"r"(src_v), [dst_argb4444]"r"(dst_argb4444), - [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width), - [zero]"f"(0x00), [five]"f"(0x55), - [six]"f"(0x6), [mask]"f"(0xff00ff00ff00ff00), - [four]"f"(0x4), [mask1]"f"(0xf0f0f0f0f0f0f0f0), - [alpha]"f"(-1) - : "memory" - ); -} - -void I422ToARGB1555Row_MMI(const uint8_t* src_y, - const uint8_t* src_u, - const uint8_t* src_v, - uint8_t* dst_argb1555, - const struct YuvConstants* yuvconstants, - int width) { - uint64_t y, u, v; - uint64_t b_vec, g_vec, r_vec, temp; - uint64_t ub,ug,vg,vr,bb,bg,br,yg; - - __asm__ volatile( - "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t" - "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t" - "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t" - "or %[ub], %[ub], %[mask1] \n\t" - "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t" - "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t" - "punpcklbh %[ug], %[ug], %[zero] \n\t" - "pshufh %[ug], %[ug], %[zero] \n\t" - "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t" - "punpcklbh %[vg], %[vg], %[zero] \n\t" - "pshufh %[vg], %[vg], %[five] \n\t" - "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t" - "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t" - "punpcklbh %[vr], %[vr], %[zero] \n\t" - "pshufh %[vr], %[vr], %[five] \n\t" - "or %[vr], %[vr], %[mask1] \n\t" - - "1: \n\t" - "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t" - "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t" - "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t" - "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t" - "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t" - "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t" - - "punpcklbh %[y], %[y], %[y] \n\t" - "pmulhuh %[y], %[y], %[yg] \n\t" - - //u3|u2|u1|u0 --> u1|u1|u0|u0 - "punpcklbh %[u], %[u], %[u] \n\t" - "punpcklbh %[u], %[u], %[zero] \n\t" - "paddsh %[b_vec], %[y], %[bb] \n\t" - "pmullh %[temp], %[u], %[ub] \n\t" - "psubsh %[b_vec], %[b_vec], %[temp] \n\t" - "psrah %[b_vec], %[b_vec], %[six] \n\t" - - //v3|v2|v1|v0 --> v1|v1|v0|v0 - "punpcklbh %[v], %[v], %[v] \n\t" - "punpcklbh %[v], %[v], %[zero] \n\t" - "paddsh %[g_vec], %[y], %[bg] \n\t" - "pmullh %[temp], %[u], %[ug] \n\t" - "psubsh %[g_vec], %[g_vec], %[temp] \n\t" - "pmullh %[temp], %[v], %[vg] \n\t" - "psubsh %[g_vec], %[g_vec], %[temp] \n\t" - "psrah %[g_vec], %[g_vec], %[six] \n\t" - - "paddsh %[r_vec], %[y], %[br] \n\t" - "pmullh %[temp], %[v], %[vr] \n\t" - "psubsh %[r_vec], %[r_vec], %[temp] \n\t" - "psrah %[r_vec], %[r_vec], %[six] \n\t" - - "packushb %[r_vec], %[b_vec], %[r_vec] \n\t" - "packushb %[g_vec], %[g_vec], %[zero] \n\t" - "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t" - "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t" - "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t" - "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t" - - "psrlw %[temp], %[g_vec], %[three] \n\t" - "and %[g_vec], %[temp], %[mask2] \n\t" - "psrlw %[temp], %[temp], %[eight] \n\t" - "and %[r_vec], %[temp], %[mask2] \n\t" - "psllw %[r_vec], %[r_vec], %[lmove5] \n\t" - "or %[g_vec], %[g_vec], %[r_vec] \n\t" - "psrlw %[temp], %[temp], %[eight] \n\t" - "and %[r_vec], %[temp], %[mask2] \n\t" - "psllw %[r_vec], %[r_vec], %[lmove5] \n\t" - "psllw %[r_vec], %[r_vec], %[lmove5] \n\t" - "or %[g_vec], %[g_vec], %[r_vec] \n\t" - "or %[g_vec], %[g_vec], %[mask3] \n\t" - - "psrlw %[temp], %[b_vec], %[three] \n\t" - "and %[b_vec], %[temp], %[mask2] \n\t" - "psrlw %[temp], %[temp], %[eight] \n\t" - "and %[r_vec], %[temp], %[mask2] \n\t" - "psllw %[r_vec], %[r_vec], %[lmove5] \n\t" - "or %[b_vec], %[b_vec], %[r_vec] \n\t" - "psrlw %[temp], %[temp], %[eight] \n\t" - "and %[r_vec], %[temp], %[mask2] \n\t" - "psllw %[r_vec], %[r_vec], %[lmove5] \n\t" - "psllw %[r_vec], %[r_vec], %[lmove5] \n\t" - "or %[b_vec], %[b_vec], %[r_vec] \n\t" - "or %[b_vec], %[b_vec], %[mask3] \n\t" - - "punpcklhw %[r_vec], %[g_vec], %[b_vec] \n\t" - "punpckhhw %[b_vec], %[g_vec], %[b_vec] \n\t" - "punpcklhw %[g_vec], %[r_vec], %[b_vec] \n\t" - - "gssdlc1 %[g_vec], 0x07(%[dst_argb1555]) \n\t" - "gssdrc1 %[g_vec], 0x00(%[dst_argb1555]) \n\t" - - "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t" - "daddiu %[u_ptr], %[u_ptr], 0x02 \n\t" - "daddiu %[v_ptr], %[v_ptr], 0x02 \n\t" - "daddiu %[dst_argb1555], %[dst_argb1555], 0x08 \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - - : [y]"=&f"(y), [u]"=&f"(u), - [v]"=&f"(v), - [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec), - [r_vec]"=&f"(r_vec), [temp]"=&f"(temp), - [ub]"=&f"(ub), [ug]"=&f"(ug), - [vg]"=&f"(vg), [vr]"=&f"(vr), - [bb]"=&f"(bb), [bg]"=&f"(bg), - [br]"=&f"(br), [yg]"=&f"(yg) - : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u), - [v_ptr]"r"(src_v), [dst_argb1555]"r"(dst_argb1555), - [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width), - [zero]"f"(0x00), [five]"f"(0x55), - [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00), - [three]"f"(0x3), [mask2]"f"(0x1f0000001f), - [eight]"f"(0x8), [mask3]"f"(0x800000008000), - [lmove5]"f"(0x5) - : "memory" - ); -} - -void I422ToRGB565Row_MMI(const uint8_t* src_y, - const uint8_t* src_u, - const uint8_t* src_v, - uint8_t* dst_rgb565, - const struct YuvConstants* yuvconstants, - int width) { - uint64_t y, u, v; - uint64_t b_vec, g_vec, r_vec, temp; - uint64_t ub,ug,vg,vr,bb,bg,br,yg; - - __asm__ volatile( - "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t" - "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t" - "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t" - "or %[ub], %[ub], %[mask1] \n\t" - "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t" - "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t" - "punpcklbh %[ug], %[ug], %[zero] \n\t" - "pshufh %[ug], %[ug], %[zero] \n\t" - "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t" - "punpcklbh %[vg], %[vg], %[zero] \n\t" - "pshufh %[vg], %[vg], %[five] \n\t" - "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t" - "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t" - "punpcklbh %[vr], %[vr], %[zero] \n\t" - "pshufh %[vr], %[vr], %[five] \n\t" - "or %[vr], %[vr], %[mask1] \n\t" - - "1: \n\t" - "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t" - "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t" - "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t" - "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t" - "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t" - "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t" - - "punpcklbh %[y], %[y], %[y] \n\t" - "pmulhuh %[y], %[y], %[yg] \n\t" - - //u3|u2|u1|u0 --> u1|u1|u0|u0 - "punpcklbh %[u], %[u], %[u] \n\t" - "punpcklbh %[u], %[u], %[zero] \n\t" - "paddsh %[b_vec], %[y], %[bb] \n\t" - "pmullh %[temp], %[u], %[ub] \n\t" - "psubsh %[b_vec], %[b_vec], %[temp] \n\t" - "psrah %[b_vec], %[b_vec], %[six] \n\t" - - //v3|v2|v1|v0 --> v1|v1|v0|v0 - "punpcklbh %[v], %[v], %[v] \n\t" - "punpcklbh %[v], %[v], %[zero] \n\t" - "paddsh %[g_vec], %[y], %[bg] \n\t" - "pmullh %[temp], %[u], %[ug] \n\t" - "psubsh %[g_vec], %[g_vec], %[temp] \n\t" - "pmullh %[temp], %[v], %[vg] \n\t" - "psubsh %[g_vec], %[g_vec], %[temp] \n\t" - "psrah %[g_vec], %[g_vec], %[six] \n\t" - - "paddsh %[r_vec], %[y], %[br] \n\t" - "pmullh %[temp], %[v], %[vr] \n\t" - "psubsh %[r_vec], %[r_vec], %[temp] \n\t" - "psrah %[r_vec], %[r_vec], %[six] \n\t" - - "packushb %[r_vec], %[b_vec], %[r_vec] \n\t" - "packushb %[g_vec], %[g_vec], %[zero] \n\t" - "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t" - "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t" - "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t" - "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t" - - "psrlh %[temp], %[g_vec], %[three] \n\t" - "and %[g_vec], %[temp], %[mask2] \n\t" - "psrlw %[temp], %[temp], %[seven] \n\t" - "psrlw %[r_vec], %[mask1], %[eight] \n\t" - "and %[r_vec], %[temp], %[r_vec] \n\t" - "psllw %[r_vec], %[r_vec], %[lmove5] \n\t" - "or %[g_vec], %[g_vec], %[r_vec] \n\t" - "paddb %[r_vec], %[three], %[six] \n\t" - "psrlw %[temp], %[temp], %[r_vec] \n\t" - "and %[r_vec], %[temp], %[mask2] \n\t" - "paddb %[temp], %[three], %[eight] \n\t" - "psllw %[r_vec], %[r_vec], %[temp] \n\t" - "or %[g_vec], %[g_vec], %[r_vec] \n\t" - - "psrlh %[temp], %[b_vec], %[three] \n\t" - "and %[b_vec], %[temp], %[mask2] \n\t" - "psrlw %[temp], %[temp], %[seven] \n\t" - "psrlw %[r_vec], %[mask1], %[eight] \n\t" - "and %[r_vec], %[temp], %[r_vec] \n\t" - "psllw %[r_vec], %[r_vec], %[lmove5] \n\t" - "or %[b_vec], %[b_vec], %[r_vec] \n\t" - "paddb %[r_vec], %[three], %[six] \n\t" - "psrlw %[temp], %[temp], %[r_vec] \n\t" - "and %[r_vec], %[temp], %[mask2] \n\t" - "paddb %[temp], %[three], %[eight] \n\t" - "psllw %[r_vec], %[r_vec], %[temp] \n\t" - "or %[b_vec], %[b_vec], %[r_vec] \n\t" - - "punpcklhw %[r_vec], %[g_vec], %[b_vec] \n\t" - "punpckhhw %[b_vec], %[g_vec], %[b_vec] \n\t" - "punpcklhw %[g_vec], %[r_vec], %[b_vec] \n\t" - - "gssdlc1 %[g_vec], 0x07(%[dst_rgb565]) \n\t" - "gssdrc1 %[g_vec], 0x00(%[dst_rgb565]) \n\t" - - "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t" - "daddiu %[u_ptr], %[u_ptr], 0x02 \n\t" - "daddiu %[v_ptr], %[v_ptr], 0x02 \n\t" - "daddiu %[dst_rgb565], %[dst_rgb565], 0x08 \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - - : [y]"=&f"(y), [u]"=&f"(u), - [v]"=&f"(v), - [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec), - [r_vec]"=&f"(r_vec), [temp]"=&f"(temp), - [ub]"=&f"(ub), [ug]"=&f"(ug), - [vg]"=&f"(vg), [vr]"=&f"(vr), - [bb]"=&f"(bb), [bg]"=&f"(bg), - [br]"=&f"(br), [yg]"=&f"(yg) - : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u), - [v_ptr]"r"(src_v), [dst_rgb565]"r"(dst_rgb565), - [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width), - [zero]"f"(0x00), [five]"f"(0x55), - [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00), - [three]"f"(0x3), [mask2]"f"(0x1f0000001f), - [eight]"f"(0x8), [seven]"f"(0x7), - [lmove5]"f"(0x5) - : "memory" - ); -} - -void NV12ToARGBRow_MMI(const uint8_t* src_y, - const uint8_t* src_uv, - uint8_t* rgb_buf, - const struct YuvConstants* yuvconstants, - int width) { - uint64_t y, u, v; - uint64_t b_vec, g_vec, r_vec, temp; - uint64_t ub,ug,vg,vr,bb,bg,br,yg; - - __asm__ volatile( - "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t" - "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t" - "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t" - "or %[ub], %[ub], %[mask1] \n\t" - "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t" - "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t" - "punpcklbh %[ug], %[ug], %[zero] \n\t" - "pshufh %[ug], %[ug], %[zero] \n\t" - "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t" - "punpcklbh %[vg], %[vg], %[zero] \n\t" - "pshufh %[vg], %[vg], %[five] \n\t" - "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t" - "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t" - "punpcklbh %[vr], %[vr], %[zero] \n\t" - "pshufh %[vr], %[vr], %[five] \n\t" - "or %[vr], %[vr], %[mask1] \n\t" - - "1: \n\t" - "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t" - "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t" - "gslwlc1 %[u], 0x03(%[uv_ptr]) \n\t" - "gslwrc1 %[u], 0x00(%[uv_ptr]) \n\t" - "punpcklbh %[u], %[u], %[zero] \n\t" - "pshufh %[v], %[u], %[vshu] \n\t" - "pshufh %[u], %[u], %[ushu] \n\t" - - "punpcklbh %[y], %[y], %[y] \n\t" - "pmulhuh %[y], %[y], %[yg] \n\t" - - "paddsh %[b_vec], %[y], %[bb] \n\t" - "pmullh %[temp], %[u], %[ub] \n\t" - "psubsh %[b_vec], %[b_vec], %[temp] \n\t" - "psrah %[b_vec], %[b_vec], %[six] \n\t" - - "paddsh %[g_vec], %[y], %[bg] \n\t" - "pmullh %[temp], %[u], %[ug] \n\t" - "psubsh %[g_vec], %[g_vec], %[temp] \n\t" - "pmullh %[temp], %[v], %[vg] \n\t" - "psubsh %[g_vec], %[g_vec], %[temp] \n\t" - "psrah %[g_vec], %[g_vec], %[six] \n\t" - - "paddsh %[r_vec], %[y], %[br] \n\t" - "pmullh %[temp], %[v], %[vr] \n\t" - "psubsh %[r_vec], %[r_vec], %[temp] \n\t" - "psrah %[r_vec], %[r_vec], %[six] \n\t" - - "packushb %[r_vec], %[b_vec], %[r_vec] \n\t" - "packushb %[g_vec], %[g_vec], %[zero] \n\t" - "punpcklwd %[g_vec], %[g_vec], %[alpha] \n\t" - "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t" - "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t" - "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t" - "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t" - - "gssdlc1 %[g_vec], 0x07(%[rgbbuf_ptr]) \n\t" - "gssdrc1 %[g_vec], 0x00(%[rgbbuf_ptr]) \n\t" - "gssdlc1 %[b_vec], 0x0f(%[rgbbuf_ptr]) \n\t" - "gssdrc1 %[b_vec], 0x08(%[rgbbuf_ptr]) \n\t" - - "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t" - "daddiu %[uv_ptr], %[uv_ptr], 0x04 \n\t" - "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - - : [y]"=&f"(y), [u]"=&f"(u), - [v]"=&f"(v), - [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec), - [r_vec]"=&f"(r_vec), [temp]"=&f"(temp), - [ub]"=&f"(ub), [ug]"=&f"(ug), - [vg]"=&f"(vg), [vr]"=&f"(vr), - [bb]"=&f"(bb), [bg]"=&f"(bg), - [br]"=&f"(br), [yg]"=&f"(yg) - : [y_ptr]"r"(src_y), [uv_ptr]"r"(src_uv), - [rgbbuf_ptr]"r"(rgb_buf), - [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width), - [zero]"f"(0x00), [five]"f"(0x55), - [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00), - [ushu]"f"(0xA0), [vshu]"f"(0xf5), - [alpha]"f"(-1) - : "memory" - ); -} - -void NV21ToARGBRow_MMI(const uint8_t* src_y, - const uint8_t* src_vu, - uint8_t* rgb_buf, - const struct YuvConstants* yuvconstants, - int width) { - uint64_t y, u, v; - uint64_t b_vec, g_vec, r_vec, temp; - uint64_t ub,ug,vg,vr,bb,bg,br,yg; - - __asm__ volatile( - "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t" - "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t" - "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t" - "or %[ub], %[ub], %[mask1] \n\t" - "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t" - "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t" - "punpcklbh %[ug], %[ug], %[zero] \n\t" - "pshufh %[ug], %[ug], %[zero] \n\t" - "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t" - "punpcklbh %[vg], %[vg], %[zero] \n\t" - "pshufh %[vg], %[vg], %[five] \n\t" - "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t" - "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t" - "punpcklbh %[vr], %[vr], %[zero] \n\t" - "pshufh %[vr], %[vr], %[five] \n\t" - "or %[vr], %[vr], %[mask1] \n\t" - - "1: \n\t" - "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t" - "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t" - "gslwlc1 %[u], 0x03(%[vu_ptr]) \n\t" - "gslwrc1 %[u], 0x00(%[vu_ptr]) \n\t" - "punpcklbh %[u], %[u], %[zero] \n\t" - "pshufh %[v], %[u], %[ushu] \n\t" - "pshufh %[u], %[u], %[vshu] \n\t" - - "punpcklbh %[y], %[y], %[y] \n\t" - "pmulhuh %[y], %[y], %[yg] \n\t" - - "paddsh %[b_vec], %[y], %[bb] \n\t" - "pmullh %[temp], %[u], %[ub] \n\t" - "psubsh %[b_vec], %[b_vec], %[temp] \n\t" - "psrah %[b_vec], %[b_vec], %[six] \n\t" - - "paddsh %[g_vec], %[y], %[bg] \n\t" - "pmullh %[temp], %[u], %[ug] \n\t" - "psubsh %[g_vec], %[g_vec], %[temp] \n\t" - "pmullh %[temp], %[v], %[vg] \n\t" - "psubsh %[g_vec], %[g_vec], %[temp] \n\t" - "psrah %[g_vec], %[g_vec], %[six] \n\t" - - "paddsh %[r_vec], %[y], %[br] \n\t" - "pmullh %[temp], %[v], %[vr] \n\t" - "psubsh %[r_vec], %[r_vec], %[temp] \n\t" - "psrah %[r_vec], %[r_vec], %[six] \n\t" - - "packushb %[r_vec], %[b_vec], %[r_vec] \n\t" - "packushb %[g_vec], %[g_vec], %[zero] \n\t" - "punpcklwd %[g_vec], %[g_vec], %[alpha] \n\t" - "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t" - "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t" - "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t" - "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t" - - "gssdlc1 %[g_vec], 0x07(%[rgbbuf_ptr]) \n\t" - "gssdrc1 %[g_vec], 0x00(%[rgbbuf_ptr]) \n\t" - "gssdlc1 %[b_vec], 0x0f(%[rgbbuf_ptr]) \n\t" - "gssdrc1 %[b_vec], 0x08(%[rgbbuf_ptr]) \n\t" - - "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t" - "daddiu %[vu_ptr], %[vu_ptr], 0x04 \n\t" - "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - - : [y]"=&f"(y), [u]"=&f"(u), - [v]"=&f"(v), - [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec), - [r_vec]"=&f"(r_vec), [temp]"=&f"(temp), - [ub]"=&f"(ub), [ug]"=&f"(ug), - [vg]"=&f"(vg), [vr]"=&f"(vr), - [bb]"=&f"(bb), [bg]"=&f"(bg), - [br]"=&f"(br), [yg]"=&f"(yg) - : [y_ptr]"r"(src_y), [vu_ptr]"r"(src_vu), - [rgbbuf_ptr]"r"(rgb_buf), - [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width), - [zero]"f"(0x00), [five]"f"(0x55), - [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00), - [ushu]"f"(0xA0), [vshu]"f"(0xf5), - [alpha]"f"(-1) - : "memory" - ); -} - -void NV12ToRGB24Row_MMI(const uint8_t* src_y, - const uint8_t* src_uv, - uint8_t* rgb_buf, - const struct YuvConstants* yuvconstants, - int width) { - uint64_t y, u, v; - uint64_t b_vec, g_vec, r_vec, temp; - uint64_t ub,ug,vg,vr,bb,bg,br,yg; - - __asm__ volatile( - "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t" - "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t" - "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t" - "or %[ub], %[ub], %[mask1] \n\t" - "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t" - "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t" - "punpcklbh %[ug], %[ug], %[zero] \n\t" - "pshufh %[ug], %[ug], %[zero] \n\t" - "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t" - "punpcklbh %[vg], %[vg], %[zero] \n\t" - "pshufh %[vg], %[vg], %[five] \n\t" - "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t" - "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t" - "punpcklbh %[vr], %[vr], %[zero] \n\t" - "pshufh %[vr], %[vr], %[five] \n\t" - "or %[vr], %[vr], %[mask1] \n\t" - - "1: \n\t" - "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t" - "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t" - "gslwlc1 %[u], 0x03(%[uv_ptr]) \n\t" - "gslwrc1 %[u], 0x00(%[uv_ptr]) \n\t" - "punpcklbh %[u], %[u], %[zero] \n\t" - "pshufh %[v], %[u], %[vshu] \n\t" - "pshufh %[u], %[u], %[ushu] \n\t" - - "punpcklbh %[y], %[y], %[y] \n\t" - "pmulhuh %[y], %[y], %[yg] \n\t" - - "paddsh %[b_vec], %[y], %[bb] \n\t" - "pmullh %[temp], %[u], %[ub] \n\t" - "psubsh %[b_vec], %[b_vec], %[temp] \n\t" - "psrah %[b_vec], %[b_vec], %[six] \n\t" - - "paddsh %[g_vec], %[y], %[bg] \n\t" - "pmullh %[temp], %[u], %[ug] \n\t" - "psubsh %[g_vec], %[g_vec], %[temp] \n\t" - "pmullh %[temp], %[v], %[vg] \n\t" - "psubsh %[g_vec], %[g_vec], %[temp] \n\t" - "psrah %[g_vec], %[g_vec], %[six] \n\t" - - "paddsh %[r_vec], %[y], %[br] \n\t" - "pmullh %[temp], %[v], %[vr] \n\t" - "psubsh %[r_vec], %[r_vec], %[temp] \n\t" - "psrah %[r_vec], %[r_vec], %[six] \n\t" - - "packushb %[r_vec], %[b_vec], %[r_vec] \n\t" - "packushb %[g_vec], %[g_vec], %[zero] \n\t" - "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t" - "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t" - "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t" - "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t" - - "punpckhwd %[r_vec], %[g_vec], %[g_vec] \n\t" - "psllw %[temp], %[r_vec], %[lmove1] \n\t" - "or %[g_vec], %[g_vec], %[temp] \n\t" - "psrlw %[temp], %[r_vec], %[rmove1] \n\t" - "pextrh %[temp], %[temp], %[zero] \n\t" - "pinsrh_2 %[g_vec], %[g_vec], %[temp] \n\t" - "pextrh %[temp], %[b_vec], %[zero] \n\t" - "pinsrh_3 %[g_vec], %[g_vec], %[temp] \n\t" - "pextrh %[temp], %[b_vec], %[one] \n\t" - "punpckhwd %[b_vec], %[b_vec], %[b_vec] \n\t" - "psllw %[b_vec], %[b_vec], %[rmove1] \n\t" - "or %[b_vec], %[b_vec], %[temp] \n\t" - "gssdlc1 %[g_vec], 0x07(%[rgbbuf_ptr]) \n\t" - "gssdrc1 %[g_vec], 0x00(%[rgbbuf_ptr]) \n\t" - "gsswlc1 %[b_vec], 0x0b(%[rgbbuf_ptr]) \n\t" - "gsswrc1 %[b_vec], 0x08(%[rgbbuf_ptr]) \n\t" - - "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t" - "daddiu %[uv_ptr], %[uv_ptr], 0x04 \n\t" - "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x0C \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - - : [y]"=&f"(y), [u]"=&f"(u), - [v]"=&f"(v), - [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec), - [r_vec]"=&f"(r_vec), [temp]"=&f"(temp), - [ub]"=&f"(ub), [ug]"=&f"(ug), - [vg]"=&f"(vg), [vr]"=&f"(vr), - [bb]"=&f"(bb), [bg]"=&f"(bg), - [br]"=&f"(br), [yg]"=&f"(yg) - : [y_ptr]"r"(src_y), [uv_ptr]"r"(src_uv), - [rgbbuf_ptr]"r"(rgb_buf), - [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width), - [zero]"f"(0x00), [five]"f"(0x55), - [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00), - [ushu]"f"(0xA0), [vshu]"f"(0xf5), - [alpha]"f"(-1), [lmove1]"f"(0x18), - [one]"f"(0x1), [rmove1]"f"(0x8) - : "memory" - ); -} - -void NV21ToRGB24Row_MMI(const uint8_t* src_y, - const uint8_t* src_vu, - uint8_t* rgb_buf, - const struct YuvConstants* yuvconstants, - int width) { - uint64_t y, u, v; - uint64_t b_vec, g_vec, r_vec, temp; - uint64_t ub,ug,vg,vr,bb,bg,br,yg; - - __asm__ volatile( - "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t" - "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t" - "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t" - "or %[ub], %[ub], %[mask1] \n\t" - "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t" - "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t" - "punpcklbh %[ug], %[ug], %[zero] \n\t" - "pshufh %[ug], %[ug], %[zero] \n\t" - "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t" - "punpcklbh %[vg], %[vg], %[zero] \n\t" - "pshufh %[vg], %[vg], %[five] \n\t" - "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t" - "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t" - "punpcklbh %[vr], %[vr], %[zero] \n\t" - "pshufh %[vr], %[vr], %[five] \n\t" - "or %[vr], %[vr], %[mask1] \n\t" - - "1: \n\t" - "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t" - "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t" - "gslwlc1 %[u], 0x03(%[vu_ptr]) \n\t" - "gslwrc1 %[u], 0x00(%[vu_ptr]) \n\t" - "punpcklbh %[u], %[u], %[zero] \n\t" - "pshufh %[v], %[u], %[ushu] \n\t" - "pshufh %[u], %[u], %[vshu] \n\t" - - "punpcklbh %[y], %[y], %[y] \n\t" - "pmulhuh %[y], %[y], %[yg] \n\t" - - "paddsh %[b_vec], %[y], %[bb] \n\t" - "pmullh %[temp], %[u], %[ub] \n\t" - "psubsh %[b_vec], %[b_vec], %[temp] \n\t" - "psrah %[b_vec], %[b_vec], %[six] \n\t" - - "paddsh %[g_vec], %[y], %[bg] \n\t" - "pmullh %[temp], %[u], %[ug] \n\t" - "psubsh %[g_vec], %[g_vec], %[temp] \n\t" - "pmullh %[temp], %[v], %[vg] \n\t" - "psubsh %[g_vec], %[g_vec], %[temp] \n\t" - "psrah %[g_vec], %[g_vec], %[six] \n\t" - - "paddsh %[r_vec], %[y], %[br] \n\t" - "pmullh %[temp], %[v], %[vr] \n\t" - "psubsh %[r_vec], %[r_vec], %[temp] \n\t" - "psrah %[r_vec], %[r_vec], %[six] \n\t" - - "packushb %[r_vec], %[b_vec], %[r_vec] \n\t" - "packushb %[g_vec], %[g_vec], %[zero] \n\t" - "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t" - "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t" - "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t" - "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t" - - "punpckhwd %[r_vec], %[g_vec], %[g_vec] \n\t" - "psllw %[temp], %[r_vec], %[lmove1] \n\t" - "or %[g_vec], %[g_vec], %[temp] \n\t" - "psrlw %[temp], %[r_vec], %[rmove1] \n\t" - "pextrh %[temp], %[temp], %[zero] \n\t" - "pinsrh_2 %[g_vec], %[g_vec], %[temp] \n\t" - "pextrh %[temp], %[b_vec], %[zero] \n\t" - "pinsrh_3 %[g_vec], %[g_vec], %[temp] \n\t" - "pextrh %[temp], %[b_vec], %[one] \n\t" - "punpckhwd %[b_vec], %[b_vec], %[b_vec] \n\t" - "psllw %[b_vec], %[b_vec], %[rmove1] \n\t" - "or %[b_vec], %[b_vec], %[temp] \n\t" - "gssdlc1 %[g_vec], 0x07(%[rgbbuf_ptr]) \n\t" - "gssdrc1 %[g_vec], 0x00(%[rgbbuf_ptr]) \n\t" - "gsswlc1 %[b_vec], 0x0b(%[rgbbuf_ptr]) \n\t" - "gsswrc1 %[b_vec], 0x08(%[rgbbuf_ptr]) \n\t" - - "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t" - "daddiu %[vu_ptr], %[vu_ptr], 0x04 \n\t" - "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x0C \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - - : [y]"=&f"(y), [u]"=&f"(u), - [v]"=&f"(v), - [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec), - [r_vec]"=&f"(r_vec), [temp]"=&f"(temp), - [ub]"=&f"(ub), [ug]"=&f"(ug), - [vg]"=&f"(vg), [vr]"=&f"(vr), - [bb]"=&f"(bb), [bg]"=&f"(bg), - [br]"=&f"(br), [yg]"=&f"(yg) - : [y_ptr]"r"(src_y), [vu_ptr]"r"(src_vu), - [rgbbuf_ptr]"r"(rgb_buf), - [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width), - [zero]"f"(0x00), [five]"f"(0x55), - [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00), - [ushu]"f"(0xA0), [vshu]"f"(0xf5), - [lmove1]"f"(0x18), [rmove1]"f"(0x8), - [one]"f"(0x1) - : "memory" - ); -} - -void NV12ToRGB565Row_MMI(const uint8_t* src_y, - const uint8_t* src_uv, - uint8_t* dst_rgb565, - const struct YuvConstants* yuvconstants, - int width) { - uint64_t y, u, v; - uint64_t b_vec, g_vec, r_vec, temp; - uint64_t ub,ug,vg,vr,bb,bg,br,yg; - - __asm__ volatile( - "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t" - "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t" - "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t" - "or %[ub], %[ub], %[mask1] \n\t" - "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t" - "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t" - "punpcklbh %[ug], %[ug], %[zero] \n\t" - "pshufh %[ug], %[ug], %[zero] \n\t" - "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t" - "punpcklbh %[vg], %[vg], %[zero] \n\t" - "pshufh %[vg], %[vg], %[five] \n\t" - "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t" - "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t" - "punpcklbh %[vr], %[vr], %[zero] \n\t" - "pshufh %[vr], %[vr], %[five] \n\t" - "or %[vr], %[vr], %[mask1] \n\t" - - "1: \n\t" - "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t" - "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t" - "gslwlc1 %[u], 0x03(%[uv_ptr]) \n\t" - "gslwrc1 %[u], 0x00(%[uv_ptr]) \n\t" - "punpcklbh %[u], %[u], %[zero] \n\t" - "pshufh %[v], %[u], %[vshu] \n\t" - "pshufh %[u], %[u], %[ushu] \n\t" - - "punpcklbh %[y], %[y], %[y] \n\t" - "pmulhuh %[y], %[y], %[yg] \n\t" - - "paddsh %[b_vec], %[y], %[bb] \n\t" - "pmullh %[temp], %[u], %[ub] \n\t" - "psubsh %[b_vec], %[b_vec], %[temp] \n\t" - "psrah %[b_vec], %[b_vec], %[six] \n\t" - - "paddsh %[g_vec], %[y], %[bg] \n\t" - "pmullh %[temp], %[u], %[ug] \n\t" - "psubsh %[g_vec], %[g_vec], %[temp] \n\t" - "pmullh %[temp], %[v], %[vg] \n\t" - "psubsh %[g_vec], %[g_vec], %[temp] \n\t" - "psrah %[g_vec], %[g_vec], %[six] \n\t" - - "paddsh %[r_vec], %[y], %[br] \n\t" - "pmullh %[temp], %[v], %[vr] \n\t" - "psubsh %[r_vec], %[r_vec], %[temp] \n\t" - "psrah %[r_vec], %[r_vec], %[six] \n\t" - - "packushb %[r_vec], %[b_vec], %[r_vec] \n\t" - "packushb %[g_vec], %[g_vec], %[zero] \n\t" - "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t" - "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t" - "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t" - "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t" - - "psrlh %[temp], %[g_vec], %[three] \n\t" - "and %[g_vec], %[temp], %[mask2] \n\t" - "psrlw %[temp], %[temp], %[seven] \n\t" - "psrlw %[r_vec], %[mask1], %[eight] \n\t" - "and %[r_vec], %[temp], %[r_vec] \n\t" - "psubb %[y], %[eight], %[three] \n\t"//5 - "psllw %[r_vec], %[r_vec], %[y] \n\t" - "or %[g_vec], %[g_vec], %[r_vec] \n\t" - "paddb %[r_vec], %[three], %[six] \n\t" - "psrlw %[temp], %[temp], %[r_vec] \n\t" - "and %[r_vec], %[temp], %[mask2] \n\t" - "paddb %[temp], %[three], %[eight] \n\t" - "psllw %[r_vec], %[r_vec], %[temp] \n\t" - "or %[g_vec], %[g_vec], %[r_vec] \n\t" - - "psrlh %[temp], %[b_vec], %[three] \n\t" - "and %[b_vec], %[temp], %[mask2] \n\t" - "psrlw %[temp], %[temp], %[seven] \n\t" - "psrlw %[r_vec], %[mask1], %[eight] \n\t" - "and %[r_vec], %[temp], %[r_vec] \n\t" - "psubb %[y], %[eight], %[three] \n\t"//5 - "psllw %[r_vec], %[r_vec], %[y] \n\t" - "or %[b_vec], %[b_vec], %[r_vec] \n\t" - "paddb %[r_vec], %[three], %[six] \n\t" - "psrlw %[temp], %[temp], %[r_vec] \n\t" - "and %[r_vec], %[temp], %[mask2] \n\t" - "paddb %[temp], %[three], %[eight] \n\t" - "psllw %[r_vec], %[r_vec], %[temp] \n\t" - "or %[b_vec], %[b_vec], %[r_vec] \n\t" - - "punpcklhw %[r_vec], %[g_vec], %[b_vec] \n\t" - "punpckhhw %[b_vec], %[g_vec], %[b_vec] \n\t" - "punpcklhw %[g_vec], %[r_vec], %[b_vec] \n\t" - - "gssdlc1 %[g_vec], 0x07(%[dst_rgb565]) \n\t" - "gssdrc1 %[g_vec], 0x00(%[dst_rgb565]) \n\t" - - "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t" - "daddiu %[uv_ptr], %[uv_ptr], 0x04 \n\t" - "daddiu %[dst_rgb565], %[dst_rgb565], 0x08 \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - - : [y]"=&f"(y), [u]"=&f"(u), - [v]"=&f"(v), - [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec), - [r_vec]"=&f"(r_vec), [temp]"=&f"(temp), - [ub]"=&f"(ub), [ug]"=&f"(ug), - [vg]"=&f"(vg), [vr]"=&f"(vr), - [bb]"=&f"(bb), [bg]"=&f"(bg), - [br]"=&f"(br), [yg]"=&f"(yg) - : [y_ptr]"r"(src_y), [uv_ptr]"r"(src_uv), - [dst_rgb565]"r"(dst_rgb565), - [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width), - [zero]"f"(0x00), [five]"f"(0x55), - [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00), - [ushu]"f"(0xA0), [vshu]"f"(0xf5), - [three]"f"(0x3), [mask2]"f"(0x1f0000001f), - [eight]"f"(0x8), [seven]"f"(0x7) - : "memory" - ); -} - -void YUY2ToARGBRow_MMI(const uint8_t* src_yuy2, - uint8_t* rgb_buf, - const struct YuvConstants* yuvconstants, - int width) { - uint64_t y, u, v; - uint64_t b_vec, g_vec, r_vec, temp; - uint64_t ub,ug,vg,vr,bb,bg,br,yg; - - __asm__ volatile( - "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t" - "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t" - "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t" - "or %[ub], %[ub], %[mask1] \n\t" - "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t" - "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t" - "punpcklbh %[ug], %[ug], %[zero] \n\t" - "pshufh %[ug], %[ug], %[zero] \n\t" - "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t" - "punpcklbh %[vg], %[vg], %[zero] \n\t" - "pshufh %[vg], %[vg], %[five] \n\t" - "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t" - "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t" - "punpcklbh %[vr], %[vr], %[zero] \n\t" - "pshufh %[vr], %[vr], %[five] \n\t" - "or %[vr], %[vr], %[mask1] \n\t" - - "1: \n\t" - "gsldlc1 %[y], 0x07(%[yuy2_ptr]) \n\t" - "gsldrc1 %[y], 0x00(%[yuy2_ptr]) \n\t" - "psrlh %[temp], %[y], %[eight] \n\t" - "pshufh %[u], %[temp], %[ushu] \n\t" - "pshufh %[v], %[temp], %[vshu] \n\t" - - "psrlh %[temp], %[mask1], %[eight] \n\t" - "and %[y], %[y], %[temp] \n\t" - "psllh %[temp], %[y], %[eight] \n\t" - "or %[y], %[y], %[temp] \n\t" - "pmulhuh %[y], %[y], %[yg] \n\t" - - "paddsh %[b_vec], %[y], %[bb] \n\t" - "pmullh %[temp], %[u], %[ub] \n\t" - "psubsh %[b_vec], %[b_vec], %[temp] \n\t" - "psrah %[b_vec], %[b_vec], %[six] \n\t" - - "paddsh %[g_vec], %[y], %[bg] \n\t" - "pmullh %[temp], %[u], %[ug] \n\t" - "psubsh %[g_vec], %[g_vec], %[temp] \n\t" - "pmullh %[temp], %[v], %[vg] \n\t" - "psubsh %[g_vec], %[g_vec], %[temp] \n\t" - "psrah %[g_vec], %[g_vec], %[six] \n\t" - - "paddsh %[r_vec], %[y], %[br] \n\t" - "pmullh %[temp], %[v], %[vr] \n\t" - "psubsh %[r_vec], %[r_vec], %[temp] \n\t" - "psrah %[r_vec], %[r_vec], %[six] \n\t" - - "packushb %[r_vec], %[b_vec], %[r_vec] \n\t" - "packushb %[g_vec], %[g_vec], %[zero] \n\t" - "punpcklwd %[g_vec], %[g_vec], %[alpha] \n\t" - "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t" - "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t" - "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t" - "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t" - - "gssdlc1 %[g_vec], 0x07(%[rgbbuf_ptr]) \n\t" - "gssdrc1 %[g_vec], 0x00(%[rgbbuf_ptr]) \n\t" - "gssdlc1 %[b_vec], 0x0f(%[rgbbuf_ptr]) \n\t" - "gssdrc1 %[b_vec], 0x08(%[rgbbuf_ptr]) \n\t" - - "daddiu %[yuy2_ptr], %[yuy2_ptr], 0x08 \n\t" - "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - - : [y]"=&f"(y), [u]"=&f"(u), - [v]"=&f"(v), - [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec), - [r_vec]"=&f"(r_vec), [temp]"=&f"(temp), - [ub]"=&f"(ub), [ug]"=&f"(ug), - [vg]"=&f"(vg), [vr]"=&f"(vr), - [bb]"=&f"(bb), [bg]"=&f"(bg), - [br]"=&f"(br), [yg]"=&f"(yg) - : [yuy2_ptr]"r"(src_yuy2), [rgbbuf_ptr]"r"(rgb_buf), - [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width), - [zero]"f"(0x00), [five]"f"(0x55), - [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00), - [ushu]"f"(0xA0), [vshu]"f"(0xf5), - [alpha]"f"(-1), [eight]"f"(0x8) - : "memory" - ); -} - -void UYVYToARGBRow_MMI(const uint8_t* src_uyvy, - uint8_t* rgb_buf, - const struct YuvConstants* yuvconstants, - int width) { - uint64_t y, u, v; - uint64_t b_vec, g_vec, r_vec, temp; - uint64_t ub,ug,vg,vr,bb,bg,br,yg; - - __asm__ volatile( - "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t" - "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t" - "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t" - "or %[ub], %[ub], %[mask1] \n\t" - "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t" - "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t" - "punpcklbh %[ug], %[ug], %[zero] \n\t" - "pshufh %[ug], %[ug], %[zero] \n\t" - "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t" - "punpcklbh %[vg], %[vg], %[zero] \n\t" - "pshufh %[vg], %[vg], %[five] \n\t" - "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t" - "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t" - "punpcklbh %[vr], %[vr], %[zero] \n\t" - "pshufh %[vr], %[vr], %[five] \n\t" - "or %[vr], %[vr], %[mask1] \n\t" - - "1: \n\t" - "gsldlc1 %[y], 0x07(%[uyvy_ptr]) \n\t" - "gsldrc1 %[y], 0x00(%[uyvy_ptr]) \n\t" - "psrlh %[temp], %[mask1], %[eight] \n\t" - "and %[temp], %[y], %[temp] \n\t" - "pshufh %[u], %[temp], %[ushu] \n\t" - "pshufh %[v], %[temp], %[vshu] \n\t" - - "psrlh %[y], %[y], %[eight] \n\t" - "psllh %[temp], %[y], %[eight] \n\t" - "or %[y], %[y], %[temp] \n\t" - "pmulhuh %[y], %[y], %[yg] \n\t" - - "paddsh %[b_vec], %[y], %[bb] \n\t" - "pmullh %[temp], %[u], %[ub] \n\t" - "psubsh %[b_vec], %[b_vec], %[temp] \n\t" - "psrah %[b_vec], %[b_vec], %[six] \n\t" - - "paddsh %[g_vec], %[y], %[bg] \n\t" - "pmullh %[temp], %[u], %[ug] \n\t" - "psubsh %[g_vec], %[g_vec], %[temp] \n\t" - "pmullh %[temp], %[v], %[vg] \n\t" - "psubsh %[g_vec], %[g_vec], %[temp] \n\t" - "psrah %[g_vec], %[g_vec], %[six] \n\t" - - "paddsh %[r_vec], %[y], %[br] \n\t" - "pmullh %[temp], %[v], %[vr] \n\t" - "psubsh %[r_vec], %[r_vec], %[temp] \n\t" - "psrah %[r_vec], %[r_vec], %[six] \n\t" - - "packushb %[r_vec], %[b_vec], %[r_vec] \n\t" - "packushb %[g_vec], %[g_vec], %[zero] \n\t" - "punpcklwd %[g_vec], %[g_vec], %[alpha] \n\t" - "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t" - "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t" - "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t" - "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t" - - "gssdlc1 %[g_vec], 0x07(%[rgbbuf_ptr]) \n\t" - "gssdrc1 %[g_vec], 0x00(%[rgbbuf_ptr]) \n\t" - "gssdlc1 %[b_vec], 0x0f(%[rgbbuf_ptr]) \n\t" - "gssdrc1 %[b_vec], 0x08(%[rgbbuf_ptr]) \n\t" - - "daddiu %[uyvy_ptr], %[uyvy_ptr], 0x08 \n\t" - "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - - : [y]"=&f"(y), [u]"=&f"(u), - [v]"=&f"(v), - [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec), - [r_vec]"=&f"(r_vec), [temp]"=&f"(temp), - [ub]"=&f"(ub), [ug]"=&f"(ug), - [vg]"=&f"(vg), [vr]"=&f"(vr), - [bb]"=&f"(bb), [bg]"=&f"(bg), - [br]"=&f"(br), [yg]"=&f"(yg) - : [uyvy_ptr]"r"(src_uyvy), [rgbbuf_ptr]"r"(rgb_buf), - [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width), - [zero]"f"(0x00), [five]"f"(0x55), - [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00), - [ushu]"f"(0xA0), [vshu]"f"(0xf5), - [alpha]"f"(-1), [eight]"f"(0x8) - : "memory" - ); -} - -void I422ToRGBARow_MMI(const uint8_t* src_y, - const uint8_t* src_u, - const uint8_t* src_v, - uint8_t* rgb_buf, - const struct YuvConstants* yuvconstants, - int width) { - uint64_t y, u, v; - uint64_t b_vec, g_vec, r_vec, temp; - uint64_t ub,ug,vg,vr,bb,bg,br,yg; - - __asm__ volatile( - "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t" - "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t" - "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t" - "or %[ub], %[ub], %[mask1] \n\t" - "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t" - "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t" - "punpcklbh %[ug], %[ug], %[zero] \n\t" - "pshufh %[ug], %[ug], %[zero] \n\t" - "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t" - "punpcklbh %[vg], %[vg], %[zero] \n\t" - "pshufh %[vg], %[vg], %[five] \n\t" - "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t" - "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t" - "punpcklbh %[vr], %[vr], %[zero] \n\t" - "pshufh %[vr], %[vr], %[five] \n\t" - "or %[vr], %[vr], %[mask1] \n\t" - - "1: \n\t" - "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t" - "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t" - "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t" - "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t" - "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t" - "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t" - - "punpcklbh %[y], %[y], %[y] \n\t" - "pmulhuh %[y], %[y], %[yg] \n\t" - - "punpcklbh %[u], %[u], %[u] \n\t" - "punpcklbh %[u], %[u], %[zero] \n\t" - "paddsh %[b_vec], %[y], %[bb] \n\t" - "pmullh %[temp], %[u], %[ub] \n\t" - "psubsh %[b_vec], %[b_vec], %[temp] \n\t" - "psrah %[b_vec], %[b_vec], %[six] \n\t" - - "punpcklbh %[v], %[v], %[v] \n\t" - "punpcklbh %[v], %[v], %[zero] \n\t" - "paddsh %[g_vec], %[y], %[bg] \n\t" - "pmullh %[temp], %[u], %[ug] \n\t" - "psubsh %[g_vec], %[g_vec], %[temp] \n\t" - "pmullh %[temp], %[v], %[vg] \n\t" - "psubsh %[g_vec], %[g_vec], %[temp] \n\t" - "psrah %[g_vec], %[g_vec], %[six] \n\t" - - "paddsh %[r_vec], %[y], %[br] \n\t" - "pmullh %[temp], %[v], %[vr] \n\t" - "psubsh %[r_vec], %[r_vec], %[temp] \n\t" - "psrah %[r_vec], %[r_vec], %[six] \n\t" - - "packushb %[r_vec], %[b_vec], %[r_vec] \n\t" - "packushb %[g_vec], %[g_vec], %[zero] \n\t" - "punpcklwd %[g_vec], %[alpha], %[g_vec] \n\t" - "punpcklbh %[b_vec], %[g_vec], %[r_vec] \n\t" - "punpckhbh %[r_vec], %[g_vec], %[r_vec] \n\t" - "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t" - "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t" - - "gssdlc1 %[g_vec], 0x07(%[rgbbuf_ptr]) \n\t" - "gssdrc1 %[g_vec], 0x00(%[rgbbuf_ptr]) \n\t" - "gssdlc1 %[b_vec], 0x0f(%[rgbbuf_ptr]) \n\t" - "gssdrc1 %[b_vec], 0x08(%[rgbbuf_ptr]) \n\t" - - "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t" - "daddiu %[u_ptr], %[u_ptr], 0x02 \n\t" - "daddiu %[v_ptr], %[v_ptr], 0x02 \n\t" - "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - - : [y]"=&f"(y), [u]"=&f"(u), - [v]"=&f"(v), - [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec), - [r_vec]"=&f"(r_vec), [temp]"=&f"(temp), - [ub]"=&f"(ub), [ug]"=&f"(ug), - [vg]"=&f"(vg), [vr]"=&f"(vr), - [bb]"=&f"(bb), [bg]"=&f"(bg), - [br]"=&f"(br), [yg]"=&f"(yg) - : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u), - [v_ptr]"r"(src_v), [rgbbuf_ptr]"r"(rgb_buf), - [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width), - [zero]"f"(0x00), [five]"f"(0x55), - [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00), - [alpha]"f"(-1) - : "memory" - ); -} - -void ARGBSetRow_MMI(uint8_t* dst_argb, uint32_t v32, int width) { - __asm__ volatile ( - "punpcklwd %[v32], %[v32], %[v32] \n\t" - "1: \n\t" - "gssdlc1 %[v32], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[v32], 0x00(%[dst_ptr]) \n\t" - "gssdlc1 %[v32], 0x0f(%[dst_ptr]) \n\t" - "gssdrc1 %[v32], 0x08(%[dst_ptr]) \n\t" - - "daddi %[width], %[width], -0x04 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t" - "bnez %[width], 1b \n\t" - : [v32]"+&f"(v32) - : [dst_ptr]"r"(dst_argb), [width]"r"(width) - : "memory" - ); -} -// clang-format on - -// 10 bit YUV to ARGB -#endif // !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A) - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif diff --git a/source/row_neon.cc b/source/row_neon.cc index b653765cc..4ed136381 100644 --- a/source/row_neon.cc +++ b/source/row_neon.cc @@ -1753,7 +1753,7 @@ void ARGBToRGB565Row_NEON(const uint8_t* src_argb, void ARGBToRGB565DitherRow_NEON(const uint8_t* src_argb, uint8_t* dst_rgb, - const uint32_t dither4, + uint32_t dither4, int width) { asm volatile( "vdup.32 d7, %2 \n" // dither4 diff --git a/source/row_neon64.cc b/source/row_neon64.cc index ba34ff4c9..74190d611 100644 --- a/source/row_neon64.cc +++ b/source/row_neon64.cc @@ -1979,7 +1979,7 @@ void ARGBToRGB565Row_NEON(const uint8_t* src_argb, void ARGBToRGB565DitherRow_NEON(const uint8_t* src_argb, uint8_t* dst_rgb, - const uint32_t dither4, + uint32_t dither4, int width) { asm volatile( "dup v1.4s, %w3 \n" // dither4 diff --git a/source/row_win.cc b/source/row_win.cc index b414d2d0e..5fb28521e 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -895,7 +895,7 @@ __declspec(naked) void ARGBToRGB565Row_SSE2(const uint8_t* src_argb, __declspec(naked) void ARGBToRGB565DitherRow_SSE2(const uint8_t* src_argb, uint8_t* dst_rgb, - const uint32_t dither4, + uint32_t dither4, int width) { __asm { @@ -942,7 +942,7 @@ __declspec(naked) void ARGBToRGB565DitherRow_SSE2(const uint8_t* src_argb, #ifdef HAS_ARGBTORGB565DITHERROW_AVX2 __declspec(naked) void ARGBToRGB565DitherRow_AVX2(const uint8_t* src_argb, uint8_t* dst_rgb, - const uint32_t dither4, + uint32_t dither4, int width) { __asm { mov eax, [esp + 4] // src_argb diff --git a/source/scale_mmi.cc b/source/scale_mmi.cc deleted file mode 100644 index 1226ef3ea..000000000 --- a/source/scale_mmi.cc +++ /dev/null @@ -1,1168 +0,0 @@ -/* - * Copyright 2013 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/scale.h" - -#include -#include - -#include "libyuv/cpu_id.h" -#include "libyuv/planar_functions.h" // For CopyARGB -#include "libyuv/row.h" -#include "libyuv/scale_row.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -// This module is for Mips MMI. -#if !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A) - -// clang-format off - -// CPU agnostic row functions -void ScaleRowDown2_MMI(const uint8_t* src_ptr, - ptrdiff_t src_stride, - uint8_t* dst, - int dst_width) { - (void)src_stride; - - uint64_t src0, src1, dest; - const uint64_t shift = 0x8ULL; - - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[src0], 0x00(%[src_ptr]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_ptr]) \n\t" - "psrlh %[src0], %[src0], %[shift] \n\t" - - "gsldrc1 %[src1], 0x08(%[src_ptr]) \n\t" - "gsldlc1 %[src1], 0x0f(%[src_ptr]) \n\t" - "psrlh %[src1], %[src1], %[shift] \n\t" - - "packushb %[dest], %[src0], %[src1] \n\t" - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 0x10 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x08 \n\t" - "bnez %[width], 1b \n\t" - : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest) - : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst), [width] "r"(dst_width), - [shift] "f"(shift) - : "memory"); -} - -void ScaleRowDown2Linear_MMI(const uint8_t* src_ptr, - ptrdiff_t src_stride, - uint8_t* dst, - int dst_width) { - (void)src_stride; - - uint64_t src0, src1; - uint64_t dest, dest0, dest1; - - const uint64_t mask = 0x00ff00ff00ff00ffULL; - const uint64_t shift = 0x8ULL; - - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[src0], 0x00(%[src_ptr]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_ptr]) \n\t" - "and %[dest0], %[src0], %[mask] \n\t" - "gsldrc1 %[src1], 0x08(%[src_ptr]) \n\t" - "gsldlc1 %[src1], 0x0f(%[src_ptr]) \n\t" - "and %[dest1], %[src1], %[mask] \n\t" - "packushb %[dest0], %[dest0], %[dest1] \n\t" - - "psrlh %[src0], %[src0], %[shift] \n\t" - "psrlh %[src1], %[src1], %[shift] \n\t" - "packushb %[dest1], %[src0], %[src1] \n\t" - - "pavgb %[dest], %[dest0], %[dest1] \n\t" - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 0x10 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x08 \n\t" - "bnez %[width], 1b \n\t" - : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest0] "=&f"(dest0), - [dest1] "=&f"(dest1), [dest] "=&f"(dest) - : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst), [mask] "f"(mask), - [shift] "f"(shift), [width] "r"(dst_width) - : "memory"); -} - -void ScaleRowDown2Box_MMI(const uint8_t* src_ptr, - ptrdiff_t src_stride, - uint8_t* dst, - int dst_width) { - const uint8_t* s = src_ptr; - const uint8_t* t = src_ptr + src_stride; - - uint64_t s0, s1, t0, t1; - uint64_t dest, dest0, dest1; - - const uint64_t ph = 0x0002000200020002ULL; - const uint64_t mask = 0x00ff00ff00ff00ffULL; - const uint64_t shift0 = 0x2ULL; - const uint64_t shift1 = 0x8ULL; - - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[s0], 0x00(%[s]) \n\t" - "gsldlc1 %[s0], 0x07(%[s]) \n\t" - "psrlh %[s1], %[s0], %[shift1] \n\t" - "and %[s0], %[s0], %[mask] \n\t" - - "gsldrc1 %[t0], 0x00(%[t]) \n\t" - "gsldlc1 %[t0], 0x07(%[t]) \n\t" - "psrlh %[t1], %[t0], %[shift1] \n\t" - "and %[t0], %[t0], %[mask] \n\t" - - "paddh %[dest0], %[s0], %[s1] \n\t" - "paddh %[dest0], %[dest0], %[t0] \n\t" - "paddh %[dest0], %[dest0], %[t1] \n\t" - "paddh %[dest0], %[dest0], %[ph] \n\t" - "psrlh %[dest0], %[dest0], %[shift0] \n\t" - - "gsldrc1 %[s0], 0x08(%[s]) \n\t" - "gsldlc1 %[s0], 0x0f(%[s]) \n\t" - "psrlh %[s1], %[s0], %[shift1] \n\t" - "and %[s0], %[s0], %[mask] \n\t" - - "gsldrc1 %[t0], 0x08(%[t]) \n\t" - "gsldlc1 %[t0], 0x0f(%[t]) \n\t" - "psrlh %[t1], %[t0], %[shift1] \n\t" - "and %[t0], %[t0], %[mask] \n\t" - - "paddh %[dest1], %[s0], %[s1] \n\t" - "paddh %[dest1], %[dest1], %[t0] \n\t" - "paddh %[dest1], %[dest1], %[t1] \n\t" - "paddh %[dest1], %[dest1], %[ph] \n\t" - "psrlh %[dest1], %[dest1], %[shift0] \n\t" - - "packushb %[dest], %[dest0], %[dest1] \n\t" - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddiu %[s], %[s], 0x10 \n\t" - "daddiu %[t], %[t], 0x10 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x08 \n\t" - "bnez %[width], 1b \n\t" - : [s0] "=&f"(s0), [s1] "=&f"(s1), [t0] "=&f"(t0), [t1] "=&f"(t1), - [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest] "=&f"(dest) - : [s] "r"(s), [t] "r"(t), [dst_ptr] "r"(dst), [width] "r"(dst_width), - [shift0] "f"(shift0), [shift1] "f"(shift1), [ph] "f"(ph), - [mask] "f"(mask) - : "memory"); -} - -void ScaleARGBRowDown2_MMI(const uint8_t* src_argb, - ptrdiff_t src_stride, - uint8_t* dst_argb, - int dst_width) { - (void)src_stride; - - const uint32_t* src = (const uint32_t*)(src_argb); - uint32_t* dst = (uint32_t*)(dst_argb); - - uint64_t src0, src1, dest; - - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[src0], 0x00(%[src_ptr]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_ptr]) \n\t" - "gsldrc1 %[src1], 0x08(%[src_ptr]) \n\t" - "gsldlc1 %[src1], 0x0f(%[src_ptr]) \n\t" - "punpckhwd %[dest], %[src0], %[src1] \n\t" - - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 0x10 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x02 \n\t" - "bnez %[width], 1b \n\t" - : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest) - : [src_ptr] "r"(src), [dst_ptr] "r"(dst), [width] "r"(dst_width) - : "memory"); -} - -void ScaleARGBRowDown2Linear_MMI(const uint8_t* src_argb, - ptrdiff_t src_stride, - uint8_t* dst_argb, - int dst_width) { - (void)src_stride; - - uint64_t src0, src1; - uint64_t dest, dest_hi, dest_lo; - - __asm__ volatile( - "1: \n\t" - "lwc1 %[src0], 0x00(%[src_ptr]) \n\t" - "lwc1 %[src1], 0x08(%[src_ptr]) \n\t" - "punpcklwd %[dest_lo], %[src0], %[src1] \n\t" - "lwc1 %[src0], 0x04(%[src_ptr]) \n\t" - "lwc1 %[src1], 0x0c(%[src_ptr]) \n\t" - "punpcklwd %[dest_hi], %[src0], %[src1] \n\t" - - "pavgb %[dest], %[dest_lo], %[dest_hi] \n\t" - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 0x10 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x02 \n\t" - "bnez %[width], 1b \n\t" - : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest_hi] "=&f"(dest_hi), - [dest_lo] "=&f"(dest_lo), [dest] "=&f"(dest) - : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_argb), [width] "r"(dst_width) - : "memory"); -} - -void ScaleARGBRowDown2Box_MMI(const uint8_t* src_argb, - ptrdiff_t src_stride, - uint8_t* dst_argb, - int dst_width) { - const uint8_t* s = src_argb; - const uint8_t* t = src_argb + src_stride; - - uint64_t s0, s_hi, s_lo; - uint64_t t0, t_hi, t_lo; - uint64_t dest, dest_hi, dest_lo; - - const uint64_t mask = 0x0ULL; - const uint64_t ph = 0x0002000200020002ULL; - const uint64_t shfit = 0x2ULL; - - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[s0], 0x00(%[s]) \n\t" - "gsldlc1 %[s0], 0x07(%[s]) \n\t" - "punpcklbh %[s_lo], %[s0], %[mask] \n\t" - "punpckhbh %[s_hi], %[s0], %[mask] \n\t" - "paddh %[dest_lo], %[s_lo], %[s_hi] \n\t" - - "gsldrc1 %[t0], 0x00(%[t]) \n\t" - "gsldlc1 %[t0], 0x07(%[t]) \n\t" - "punpcklbh %[t_lo], %[t0], %[mask] \n\t" - "punpckhbh %[t_hi], %[t0], %[mask] \n\t" - "paddh %[dest_lo], %[dest_lo], %[t_lo] \n\t" - "paddh %[dest_lo], %[dest_lo], %[t_hi] \n\t" - - "paddh %[dest_lo], %[dest_lo], %[ph] \n\t" - "psrlh %[dest_lo], %[dest_lo], %[shfit] \n\t" - - "gsldrc1 %[s0], 0x08(%[s]) \n\t" - "gsldlc1 %[s0], 0x0f(%[s]) \n\t" - "punpcklbh %[s_lo], %[s0], %[mask] \n\t" - "punpckhbh %[s_hi], %[s0], %[mask] \n\t" - "paddh %[dest_hi], %[s_lo], %[s_hi] \n\t" - - "gsldrc1 %[t0], 0x08(%[t]) \n\t" - "gsldlc1 %[t0], 0x0f(%[t]) \n\t" - "punpcklbh %[t_lo], %[t0], %[mask] \n\t" - "punpckhbh %[t_hi], %[t0], %[mask] \n\t" - "paddh %[dest_hi], %[dest_hi], %[t_lo] \n\t" - "paddh %[dest_hi], %[dest_hi], %[t_hi] \n\t" - - "paddh %[dest_hi], %[dest_hi], %[ph] \n\t" - "psrlh %[dest_hi], %[dest_hi], %[shfit] \n\t" - - "packushb %[dest], %[dest_lo], %[dest_hi] \n\t" - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddiu %[s], %[s], 0x10 \n\t" - "daddiu %[t], %[t], 0x10 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x02 \n\t" - "bnez %[width], 1b \n\t" - : [s0] "=&f"(s0), [t0] "=&f"(t0), [dest_hi] "=&f"(dest_hi), - [dest_lo] "=&f"(dest_lo), [s_hi] "=&f"(s_hi), [s_lo] "=&f"(s_lo), - [t_hi] "=&f"(t_hi), [t_lo] "=&f"(t_lo), [dest] "=&f"(dest) - : [s] "r"(s), [t] "r"(t), [dst_ptr] "r"(dst_argb), [width] "r"(dst_width), - [mask] "f"(mask), [ph] "f"(ph), [shfit] "f"(shfit) - : "memory"); -} - -void ScaleRowDown2_16_MMI(const uint16_t* src_ptr, - ptrdiff_t src_stride, - uint16_t* dst, - int dst_width) { - (void)src_stride; - - uint64_t src0, src1, dest; - const uint64_t shift = 0x10ULL; - - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[src0], 0x00(%[src_ptr]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_ptr]) \n\t" - "psrlw %[src0], %[src0], %[shift] \n\t" - - "gsldrc1 %[src1], 0x08(%[src_ptr]) \n\t" - "gsldlc1 %[src1], 0x0f(%[src_ptr]) \n\t" - "psrlw %[src1], %[src1], %[shift] \n\t" - - "packsswh %[dest], %[src0], %[src1] \n\t" - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 0x10 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest) - : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst), [width] "r"(dst_width), - [shift] "f"(shift) - : "memory"); -} - -void ScaleRowDown2Linear_16_MMI(const uint16_t* src_ptr, - ptrdiff_t src_stride, - uint16_t* dst, - int dst_width) { - (void)src_stride; - - uint64_t src0, src1; - uint64_t dest, dest_hi, dest_lo; - - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[src0], 0x00(%[src_ptr]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_ptr]) \n\t" - "gsldrc1 %[src1], 0x08(%[src_ptr]) \n\t" - "gsldlc1 %[src1], 0x0f(%[src_ptr]) \n\t" - "punpcklhw %[dest_lo], %[src0], %[src1] \n\t" - "punpckhhw %[dest_hi], %[src0], %[src1] \n\t" - - "punpcklhw %[src0], %[dest_lo], %[dest_hi] \n\t" - "punpckhhw %[src1], %[dest_lo], %[dest_hi] \n\t" - - "pavgh %[dest], %[src0], %[src1] \n\t" - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 0x10 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest_hi] "=&f"(dest_hi), - [dest_lo] "=&f"(dest_lo), [dest] "=&f"(dest) - : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst), [width] "r"(dst_width) - : "memory"); -} - -void ScaleRowDown2Box_16_MMI(const uint16_t* src_ptr, - ptrdiff_t src_stride, - uint16_t* dst, - int dst_width) { - const uint16_t* s = src_ptr; - const uint16_t* t = src_ptr + src_stride; - - uint64_t s0, s1, s_hi, s_lo; - uint64_t t0, t1, t_hi, t_lo; - uint64_t dest, dest0, dest1; - - const uint64_t ph = 0x0000000200000002ULL; - const uint64_t mask = 0x0000ffff0000ffffULL; - const uint64_t shift0 = 0x10ULL; - const uint64_t shift1 = 0x2ULL; - - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[s0], 0x00(%[s]) \n\t" - "gsldlc1 %[s0], 0x07(%[s]) \n\t" - "psrlw %[s1], %[s0], %[shift0] \n\t" - "and %[s0], %[s0], %[mask] \n\t" - - "gsldrc1 %[t0], 0x00(%[t]) \n\t" - "gsldlc1 %[t0], 0x07(%[t]) \n\t" - "psrlw %[t1], %[t0], %[shift0] \n\t" - "and %[t0], %[t0], %[mask] \n\t" - - "paddw %[dest0], %[s0], %[s1] \n\t" - "paddw %[dest0], %[dest0], %[t0] \n\t" - "paddw %[dest0], %[dest0], %[t1] \n\t" - "paddw %[dest0], %[dest0], %[ph] \n\t" - "psrlw %[dest0], %[dest0], %[shift1] \n\t" - - "gsldrc1 %[s0], 0x08(%[s]) \n\t" - "gsldlc1 %[s0], 0x0f(%[s]) \n\t" - "psrlw %[s1], %[s0], %[shift0] \n\t" - "and %[s0], %[s0], %[mask] \n\t" - - "gsldrc1 %[t0], 0x08(%[t]) \n\t" - "gsldlc1 %[t0], 0x0f(%[t]) \n\t" - "psrlw %[t1], %[t0], %[shift0] \n\t" - "and %[t0], %[t0], %[mask] \n\t" - - "paddw %[dest1], %[s0], %[s1] \n\t" - "paddw %[dest1], %[dest1], %[t0] \n\t" - "paddw %[dest1], %[dest1], %[t1] \n\t" - "paddw %[dest1], %[dest1], %[ph] \n\t" - "psrlw %[dest1], %[dest1], %[shift1] \n\t" - - "packsswh %[dest], %[dest0], %[dest1] \n\t" - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddiu %[s], %[s], 0x10 \n\t" - "daddiu %[t], %[t], 0x10 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - : [s0] "=&f"(s0), [s1] "=&f"(s1), [t0] "=&f"(t0), [t1] "=&f"(t1), - [s_hi] "=&f"(s_hi), [s_lo] "=&f"(s_lo), [t_hi] "=&f"(t_hi), - [t_lo] "=&f"(t_lo), [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), - [dest] "=&f"(dest) - : [s] "r"(s), [t] "r"(t), [dst_ptr] "r"(dst), [width] "r"(dst_width), - [shift0] "f"(shift0), [shift1] "f"(shift1), [ph] "f"(ph), - [mask] "f"(mask) - : "memory"); -} - -void ScaleRowDown4_MMI(const uint8_t* src_ptr, - ptrdiff_t src_stride, - uint8_t* dst, - int dst_width) { - (void)src_stride; - - uint64_t src0, src1; - uint64_t dest, dest_hi, dest_lo; - - const uint64_t shift = 0x10ULL; - const uint64_t mask = 0x000000ff000000ffULL; - - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[src0], 0x00(%[src_ptr]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_ptr]) \n\t" - "psrlw %[src0], %[src0], %[shift] \n\t" - "and %[src0], %[src0], %[mask] \n\t" - "gsldrc1 %[src1], 0x08(%[src_ptr]) \n\t" - "gsldlc1 %[src1], 0x0f(%[src_ptr]) \n\t" - "psrlw %[src1], %[src1], %[shift] \n\t" - "and %[src1], %[src1], %[mask] \n\t" - "packsswh %[dest_lo], %[src0], %[src1] \n\t" - - "gsldrc1 %[src0], 0x10(%[src_ptr]) \n\t" - "gsldlc1 %[src0], 0x17(%[src_ptr]) \n\t" - "psrlw %[src0], %[src0], %[shift] \n\t" - "and %[src0], %[src0], %[mask] \n\t" - "gsldrc1 %[src1], 0x18(%[src_ptr]) \n\t" - "gsldlc1 %[src1], 0x1f(%[src_ptr]) \n\t" - "psrlw %[src1], %[src1], %[shift] \n\t" - "and %[src1], %[src1], %[mask] \n\t" - "packsswh %[dest_hi], %[src0], %[src1] \n\t" - - "packushb %[dest], %[dest_lo], %[dest_hi] \n\t" - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 0x20 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x08 \n\t" - "bnez %[width], 1b \n\t" - : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest_hi] "=&f"(dest_hi), - [dest_lo] "=&f"(dest_lo), [dest] "=&f"(dest) - : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst), [width] "r"(dst_width), - [shift] "f"(shift), [mask] "f"(mask) - : "memory"); -} - -void ScaleRowDown4_16_MMI(const uint16_t* src_ptr, - ptrdiff_t src_stride, - uint16_t* dst, - int dst_width) { - (void)src_stride; - - uint64_t src0, src1; - uint64_t dest, dest_hi, dest_lo; - - const uint64_t mask = 0x0ULL; - - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[src0], 0x00(%[src_ptr]) \n\t" - "gsldlc1 %[src0], 0x07(%[src_ptr]) \n\t" - "gsldrc1 %[src1], 0x08(%[src_ptr]) \n\t" - "gsldlc1 %[src1], 0x0f(%[src_ptr]) \n\t" - "punpckhhw %[dest_lo], %[src0], %[src1] \n\t" - "punpcklhw %[dest_lo], %[dest_lo], %[mask] \n\t" - - "gsldrc1 %[src0], 0x10(%[src_ptr]) \n\t" - "gsldlc1 %[src0], 0x17(%[src_ptr]) \n\t" - "gsldrc1 %[src1], 0x18(%[src_ptr]) \n\t" - "gsldlc1 %[src1], 0x1f(%[src_ptr]) \n\t" - "punpckhhw %[dest_hi], %[src0], %[src1] \n\t" - "punpcklhw %[dest_hi], %[dest_hi], %[mask] \n\t" - - "packushb %[dest], %[dest_lo], %[dest_hi] \n\t" - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 0x20 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest_hi] "=&f"(dest_hi), - [dest_lo] "=&f"(dest_lo), [dest] "=&f"(dest) - : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst), [width] "r"(dst_width), - [mask] "f"(mask) - : "memory"); -} - -#define DO_SCALEROWDOWN4BOX_PUNPCKADD() \ - "punpcklbh %[src_lo], %[src], %[mask0] \n\t" \ - "punpckhbh %[src_hi], %[src], %[mask0] \n\t" \ - "paddh %[dest_lo], %[dest_lo], %[src_lo] \n\t" \ - "paddh %[dest_hi], %[dest_hi], %[src_hi] \n\t" - -#define DO_SCALEROWDOWN4BOX_LOOP(reg) \ - "ldc1 %[src], 0x00(%[src0_ptr]) \n\t" \ - "punpcklbh %[dest_lo], %[src], %[mask0] \n\t" \ - "punpckhbh %[dest_hi], %[src], %[mask0] \n\t" \ - \ - "ldc1 %[src], 0x00(%[src1_ptr]) \n\t" \ - DO_SCALEROWDOWN4BOX_PUNPCKADD() \ - \ - "ldc1 %[src], 0x00(%[src2_ptr]) \n\t" \ - DO_SCALEROWDOWN4BOX_PUNPCKADD() \ - \ - "ldc1 %[src], 0x00(%[src3_ptr]) \n\t" \ - DO_SCALEROWDOWN4BOX_PUNPCKADD() \ - \ - "pmaddhw %[dest_lo], %[dest_lo], %[mask1] \n\t" \ - "pmaddhw %[dest_hi], %[dest_hi], %[mask1] \n\t" \ - "packsswh " #reg ", %[dest_lo], %[dest_hi] \n\t" \ - "pmaddhw " #reg ", " #reg ", %[mask1] \n\t" \ - "paddh " #reg ", " #reg ", %[ph] \n\t" \ - "psrlh " #reg ", " #reg ", %[shift] \n\t" \ - \ - "daddiu %[src0_ptr], %[src0_ptr], 0x08 \n\t" \ - "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t" \ - "daddiu %[src2_ptr], %[src2_ptr], 0x08 \n\t" \ - "daddiu %[src3_ptr], %[src3_ptr], 0x08 \n\t" - -/* LibYUVScaleTest.ScaleDownBy4_Box */ -void ScaleRowDown4Box_MMI(const uint8_t* src_ptr, - ptrdiff_t src_stride, - uint8_t* dst, - int dst_width) { - const uint8_t* src0_ptr = src_ptr; - const uint8_t* src1_ptr = src_ptr + src_stride; - const uint8_t* src2_ptr = src_ptr + src_stride * 2; - const uint8_t* src3_ptr = src_ptr + src_stride * 3; - - uint64_t src, src_hi, src_lo; - uint64_t dest, dest_hi, dest_lo, dest0, dest1, dest2, dest3; - - const uint64_t mask0 = 0x0ULL; - const uint64_t mask1 = 0x0001000100010001ULL; - const uint64_t ph = 0x0008000800080008ULL; - const uint64_t shift = 0x4ULL; - - __asm__ volatile( - "1: \n\t" - - DO_SCALEROWDOWN4BOX_LOOP(%[dest0]) - DO_SCALEROWDOWN4BOX_LOOP(%[dest1]) - DO_SCALEROWDOWN4BOX_LOOP(%[dest2]) - DO_SCALEROWDOWN4BOX_LOOP(%[dest3]) - - "packsswh %[dest_lo], %[dest0], %[dest1] \n\t" - "packsswh %[dest_hi], %[dest2], %[dest3] \n\t" - - "packushb %[dest], %[dest_lo], %[dest_hi] \n\t" - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x08 \n\t" - "bnez %[width], 1b \n\t" - : [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo), - [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo), - [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2), - [dest3] "=&f"(dest3), [src] "=&f"(src), [dest] "=&f"(dest) - : [src0_ptr] "r"(src0_ptr), [src1_ptr] "r"(src1_ptr), - [src2_ptr] "r"(src2_ptr), [src3_ptr] "r"(src3_ptr), [dst_ptr] "r"(dst), - [width] "r"(dst_width), [shift] "f"(shift), [mask0] "f"(mask0), - [ph] "f"(ph), [mask1] "f"(mask1) - : "memory"); -} - -#define DO_SCALEROWDOWN4BOX_16_PUNPCKADD() \ - "punpcklbh %[src_lo], %[src], %[mask0] \n\t" \ - "punpckhbh %[src_hi], %[src], %[mask0] \n\t" \ - "paddh %[dest_lo], %[dest_lo], %[src_lo] \n\t" \ - "paddh %[dest_hi], %[dest_hi], %[src_hi] \n\t" - -#define DO_SCALEROWDOWN4BOX_16_LOOP(reg) \ - "ldc1 %[src], 0x00(%[src0_ptr]) \n\t" \ - "punpcklbh %[dest_lo], %[src], %[mask0] \n\t" \ - "punpckhbh %[dest_hi], %[src], %[mask0] \n\t" \ - \ - "ldc1 %[src], 0x00(%[src1_ptr]) \n\t" \ - DO_SCALEROWDOWN4BOX_16_PUNPCKADD() \ - \ - "ldc1 %[src], 0x00(%[src2_ptr]) \n\t" \ - DO_SCALEROWDOWN4BOX_16_PUNPCKADD() \ - \ - "ldc1 %[src], 0x00(%[src3_ptr]) \n\t" \ - DO_SCALEROWDOWN4BOX_16_PUNPCKADD() \ - \ - "paddw %[dest], %[dest_lo], %[dest_hi] \n\t" \ - "punpckhwd %[dest_hi], %[dest], %[dest] \n\t" \ - "paddw %[dest], %[dest_hi], %[dest] \n\t" \ - "paddw %[dest], %[dest], %[ph] \n\t" \ - "psraw %[dest], %[dest], %[shift] \n\t" \ - "and " #reg ", %[dest], %[mask1] \n\t" \ - \ - "daddiu %[src0_ptr], %[src0_ptr], 0x08 \n\t" \ - "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t" \ - "daddiu %[src2_ptr], %[src2_ptr], 0x08 \n\t" \ - "daddiu %[src3_ptr], %[src3_ptr], 0x08 \n\t" - -/* LibYUVScaleTest.ScaleDownBy4_Box_16 */ -void ScaleRowDown4Box_16_MMI(const uint16_t* src_ptr, - ptrdiff_t src_stride, - uint16_t* dst, - int dst_width) { - const uint16_t* src0_ptr = src_ptr; - const uint16_t* src1_ptr = src_ptr + src_stride; - const uint16_t* src2_ptr = src_ptr + src_stride * 2; - const uint16_t* src3_ptr = src_ptr + src_stride * 3; - - uint64_t src, src_hi, src_lo; - uint64_t dest, dest_hi, dest_lo, dest0, dest1, dest2, dest3; - - const uint64_t mask0 = 0x0ULL; - const uint64_t mask1 = 0x00000000ffffffffULL; - const uint64_t ph = 0x0000000800000008ULL; - const uint64_t shift = 0x04ULL; - - __asm__ volatile( - "1: \n\t" - - DO_SCALEROWDOWN4BOX_16_LOOP(%[dest0]) - DO_SCALEROWDOWN4BOX_16_LOOP(%[dest1]) - DO_SCALEROWDOWN4BOX_16_LOOP(%[dest2]) - DO_SCALEROWDOWN4BOX_16_LOOP(%[dest3]) - "punpcklwd %[dest_lo], %[dest0], %[dest1] \n\t" - "punpcklwd %[dest_hi], %[dest2], %[dest3] \n\t" - - "packushb %[dest], %[dest_lo], %[dest_hi] \n\t" - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - : [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo), - [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo), - [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2), - [dest3] "=&f"(dest3), [src] "=&f"(src), [dest] "=&f"(dest) - : [src0_ptr] "r"(src0_ptr), [src1_ptr] "r"(src1_ptr), - [src2_ptr] "r"(src2_ptr), [src3_ptr] "r"(src3_ptr), [dst_ptr] "r"(dst), - [width] "r"(dst_width), [shift] "f"(shift), [mask0] "f"(mask0), - [ph] "f"(ph), [mask1] "f"(mask1) - : "memory"); -} - -// Scales a single row of pixels up by 2x using point sampling. -void ScaleColsUp2_MMI(uint8_t* dst_ptr, - const uint8_t* src_ptr, - int dst_width, - int x, - int dx) { - uint64_t src, dest; - - (void)x; - (void)dx; - - __asm__ volatile( - "1: \n\t" - "lwc1 %[src], 0x00(%[src_ptr]) \n\t" - - "punpcklbh %[dest], %[src], %[src] \n\t" - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 0x04 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x08 \n\t" - "bnez %[width], 1b \n\t" - : [src] "=&f"(src), [dest] "=&f"(dest) - : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst_ptr), [width] "r"(dst_width) - : "memory"); -} - -void ScaleColsUp2_16_MMI(uint16_t* dst_ptr, - const uint16_t* src_ptr, - int dst_width, - int x, - int dx) { - uint64_t src, dest; - - (void)x; - (void)dx; - - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t" - "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t" - - "punpcklhw %[dest], %[src], %[src] \n\t" - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "punpckhhw %[dest], %[src], %[src] \n\t" - "gssdlc1 %[dest], 0x0f(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x08(%[dst_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t" - "daddi %[width], %[width], -0x08 \n\t" - "bnez %[width], 1b \n\t" - : [src] "=&f"(src), [dest] "=&f"(dest) - : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst_ptr), [width] "r"(dst_width) - : "memory"); -} - -void ScaleAddRow_MMI(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) { - uint64_t src, src_hi, src_lo, dest0, dest1; - const uint64_t mask = 0x0ULL; - - __asm__ volatile( - "1: \n\t" - "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t" - "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t" - "punpcklbh %[src_lo], %[src], %[mask] \n\t" - "punpckhbh %[src_hi], %[src], %[mask] \n\t" - - "gsldrc1 %[dest0], 0x00(%[dst_ptr]) \n\t" - "gsldlc1 %[dest0], 0x07(%[dst_ptr]) \n\t" - "paddush %[dest0], %[dest0], %[src_lo] \n\t" - "gsldrc1 %[dest1], 0x08(%[dst_ptr]) \n\t" - "gsldlc1 %[dest1], 0x0f(%[dst_ptr]) \n\t" - "paddush %[dest1], %[dest1], %[src_hi] \n\t" - - "gssdlc1 %[dest0], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest0], 0x00(%[dst_ptr]) \n\t" - "gssdlc1 %[dest1], 0x0f(%[dst_ptr]) \n\t" - "gssdrc1 %[dest1], 0x08(%[dst_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t" - "daddi %[width], %[width], -0x08 \n\t" - "bnez %[width], 1b \n\t" - : [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [src_hi] "=&f"(src_hi), - [src_lo] "=&f"(src_lo), [src] "=&f"(src) - : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst_ptr), [width] "r"(src_width), - [mask] "f"(mask) - : "memory"); -} - -void ScaleAddRow_16_MMI(const uint16_t* src_ptr, - uint32_t* dst_ptr, - int src_width) { - uint64_t src, src_hi, src_lo, dest0, dest1; - const uint64_t mask = 0x0ULL; - - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t" - "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t" - "punpcklhw %[src_lo], %[src], %[mask] \n\t" - "punpckhhw %[src_hi], %[src], %[mask] \n\t" - - "gsldrc1 %[dest0], 0x00(%[dst_ptr]) \n\t" - "gsldlc1 %[dest0], 0x07(%[dst_ptr]) \n\t" - "paddw %[dest0], %[dest0], %[src_lo] \n\t" - "gssdlc1 %[dest0], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest0], 0x00(%[dst_ptr]) \n\t" - - "gsldrc1 %[dest1], 0x08(%[dst_ptr]) \n\t" - "gsldlc1 %[dest1], 0x0f(%[dst_ptr]) \n\t" - "paddw %[dest1], %[dest1], %[src_hi] \n\t" - "gssdlc1 %[dest1], 0x0f(%[dst_ptr]) \n\t" - "gssdrc1 %[dest1], 0x08(%[dst_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - : [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [src_hi] "=&f"(src_hi), - [src_lo] "=&f"(src_lo), [src] "=&f"(src) - : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst_ptr), [width] "r"(src_width), - [mask] "f"(mask) - : "memory"); -} - -void ScaleARGBRowDownEven_MMI(const uint8_t* src_argb, - ptrdiff_t src_stride, - int src_stepx, - uint8_t* dst_argb, - int dst_width) { - (void)src_stride; - - uint64_t src0, src1, dest; - - __asm__ volatile( - "1: \n\t" - "lwc1 %[src0], 0x00(%[src_ptr]) \n\t" - "dadd %[src_ptr], %[src_ptr], %[src_stepx_4]\n\t" - "lwc1 %[src1], 0x00(%[src_ptr]) \n\t" - "punpcklwd %[dest], %[src0], %[src1] \n\t" - - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "dadd %[src_ptr], %[src_ptr], %[src_stepx_4]\n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x02 \n\t" - "bnez %[width], 1b \n\t" - : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest) - : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_argb), - [src_stepx_4] "r"(src_stepx << 2), [width] "r"(dst_width) - : "memory"); -} - -void ScaleARGBRowDownEvenBox_MMI(const uint8_t* src_argb, - ptrdiff_t src_stride, - int src_stepx, - uint8_t* dst_argb, - int dst_width) { - const uint8_t* src0_ptr = src_argb; - const uint8_t* src1_ptr = src_argb + src_stride; - - uint64_t src0, src1, src_hi, src_lo; - uint64_t dest, dest_hi, dest_lo, dest0, dest1; - - const uint64_t mask = 0x0ULL; - const uint64_t ph = 0x0002000200020002ULL; - const uint64_t shift = 0x2ULL; - - __asm__ volatile( - "1: \n\t" - - "lwc1 %[src0], 0x00(%[src0_ptr]) \n\t" - "punpcklbh %[dest_lo], %[src0], %[mask] \n\t" - "lwc1 %[src0], 0x04(%[src0_ptr]) \n\t" - "punpcklbh %[dest_hi], %[src0], %[mask] \n\t" - - "lwc1 %[src1], 0x00(%[src1_ptr]) \n\t" - "punpcklbh %[src_lo], %[src1], %[mask] \n\t" - "lwc1 %[src1], 0x04(%[src1_ptr]) \n\t" - "punpcklbh %[src_hi], %[src1], %[mask] \n\t" - "paddh %[dest_lo], %[dest_lo], %[src_lo] \n\t" - "paddh %[dest_hi], %[dest_hi], %[src_hi] \n\t" - "paddh %[dest0], %[dest_hi], %[dest_lo] \n\t" - "paddh %[dest0], %[dest0], %[ph] \n\t" - "psrlh %[dest0], %[dest0], %[shift] \n\t" - - "dadd %[src0_ptr], %[src0_ptr], %[src_stepx_4] \n\t" - "dadd %[src1_ptr], %[src1_ptr], %[src_stepx_4] \n\t" - - "lwc1 %[src0], 0x00(%[src0_ptr]) \n\t" - "punpcklbh %[dest_lo], %[src0], %[mask] \n\t" - "lwc1 %[src0], 0x04(%[src0_ptr]) \n\t" - "punpcklbh %[dest_hi], %[src0], %[mask] \n\t" - - "lwc1 %[src1], 0x00(%[src1_ptr]) \n\t" - "punpcklbh %[src_lo], %[src1], %[mask] \n\t" - "lwc1 %[src1], 0x04(%[src1_ptr]) \n\t" - "punpcklbh %[src_hi], %[src1], %[mask] \n\t" - "paddh %[dest_lo], %[dest_lo], %[src_lo] \n\t" - "paddh %[dest_hi], %[dest_hi], %[src_hi] \n\t" - "paddh %[dest1], %[dest_hi], %[dest_lo] \n\t" - "paddh %[dest1], %[dest1], %[ph] \n\t" - "psrlh %[dest1], %[dest1], %[shift] \n\t" - - "packushb %[dest], %[dest0], %[dest1] \n\t" - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "dadd %[src0_ptr], %[src0_ptr], %[src_stepx_4] \n\t" - "dadd %[src1_ptr], %[src1_ptr], %[src_stepx_4] \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t" - "daddi %[width], %[width], -0x02 \n\t" - "bnez %[width], 1b \n\t" - : [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo), - [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo), - [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [src0] "=&f"(src0), - [src1] "=&f"(src1), [dest] "=&f"(dest) - : [src0_ptr] "r"(src0_ptr), [src1_ptr] "r"(src1_ptr), - [dst_ptr] "r"(dst_argb), [width] "r"(dst_width), - [src_stepx_4] "r"(src_stepx << 2), [shift] "f"(shift), [mask] "f"(mask), - [ph] "f"(ph) - : "memory"); -} - -// Scales a single row of pixels using point sampling. -void ScaleARGBCols_MMI(uint8_t* dst_argb, - const uint8_t* src_argb, - int dst_width, - int x, - int dx) { - const uint32_t* src = (const uint32_t*)(src_argb); - uint32_t* dst = (uint32_t*)(dst_argb); - - const uint32_t* src_tmp; - - uint64_t dest, offset; - - const uint64_t shift0 = 16; - const uint64_t shift1 = 2; - - __asm__ volatile( - "1: \n\t" - "srav %[offset], %[x], %[shift0] \n\t" - "sllv %[offset], %[offset], %[shift1] \n\t" - "dadd %[src_tmp], %[src_ptr], %[offset] \n\t" - "lwc1 %[dest], 0x00(%[src_tmp]) \n\t" - "swc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - "dadd %[x], %[x], %[dx] \n\t" - - "daddiu %[dst_ptr], %[dst_ptr], 0x04 \n\t" - "daddi %[width], %[width], -0x01 \n\t" - "bnez %[width], 1b \n\t" - : [dest] "=&f"(dest), [offset] "=&r"(offset), [src_tmp] "=&r"(src_tmp) - : [src_ptr] "r"(src), [dst_ptr] "r"(dst), [width] "r"(dst_width), - [dx] "r"(dx), [x] "r"(x), [shift0] "r"(shift0), [shift1] "r"(shift1) - : "memory"); -} - -// Scales a single row of pixels up by 2x using point sampling. -void ScaleARGBColsUp2_MMI(uint8_t* dst_argb, - const uint8_t* src_argb, - int dst_width, - int x, - int dx) { - uint64_t src, dest0, dest1; - (void)x; - (void)dx; - - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t" - "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t" - "punpcklwd %[dest0], %[src], %[src] \n\t" - "gssdlc1 %[dest0], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest0], 0x00(%[dst_ptr]) \n\t" - "punpckhwd %[dest1], %[src], %[src] \n\t" - "gssdlc1 %[dest1], 0x0f(%[dst_ptr]) \n\t" - "gssdrc1 %[dest1], 0x08(%[dst_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t" - "daddi %[width], %[width], -0x04 \n\t" - "bnez %[width], 1b \n\t" - : [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [src] "=&f"(src) - : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_argb), [width] "r"(dst_width) - : "memory"); -} - -// Divide num by div and return as 16.16 fixed point result. -/* LibYUVBaseTest.TestFixedDiv */ -int FixedDiv_MIPS(int num, int div) { - int quotient = 0; - const int shift = 16; - - asm( - "dsll %[num], %[num], %[shift] \n\t" - "ddiv %[num], %[div] \t\n" - "mflo %[quo] \t\n" - : [quo] "+&r"(quotient) - : [num] "r"(num), [div] "r"(div), [shift] "r"(shift)); - - return quotient; -} - -// Divide num by div and return as 16.16 fixed point result. -/* LibYUVScaleTest.ARGBScaleTo320x240_Linear */ -int FixedDiv1_MIPS(int num, int div) { - int quotient = 0; - const int shift = 16; - const int val1 = 1; - const int64_t val11 = 0x00010001ULL; - - asm( - "dsll %[num], %[num], %[shift] \n\t" - "dsub %[num], %[num], %[val11] \n\t" - "dsub %[div], %[div], %[val1] \n\t" - "ddiv %[num], %[div] \t\n" - "mflo %[quo] \t\n" - : [quo] "+&r"(quotient) - : [num] "r"(num), [div] "r"(div), [val1] "r"(val1), [val11] "r"(val11), - [shift] "r"(shift)); - - return quotient; -} - -// Read 8x2 upsample with filtering and write 16x1. -// actually reads an extra pixel, so 9x2. -void ScaleRowUp2_16_MMI(const uint16_t* src_ptr, - ptrdiff_t src_stride, - uint16_t* dst, - int dst_width) { - const uint16_t* src2_ptr = src_ptr + src_stride; - - uint64_t src0, src1; - uint64_t dest, dest04, dest15, dest26, dest37; - uint64_t tmp0, tmp1, tmp2, tmp3; - - const uint64_t mask0 = 0x0003000900030009ULL; - const uint64_t mask1 = 0x0001000300010003ULL; - const uint64_t mask2 = 0x0009000300090003ULL; - const uint64_t mask3 = 0x0003000100030001ULL; - const uint64_t ph = 0x0000000800000008ULL; - const uint64_t shift = 4; - - __asm__ volatile( - "1: \n\t" - "gsldrc1 %[src0], 0x00(%[src1_ptr]) \n\t" - "gsldlc1 %[src0], 0x07(%[src1_ptr]) \n\t" - "pmaddhw %[dest04], %[src0], %[mask0] \n\t" - "gsldrc1 %[src1], 0x00(%[src2_ptr]) \n\t" - "gsldlc1 %[src1], 0x07(%[src2_ptr]) \n\t" - "pmaddhw %[dest], %[src1], %[mask1] \n\t" - "paddw %[dest04], %[dest04], %[dest] \n\t" - "paddw %[dest04], %[dest04], %[ph] \n\t" - "psrlw %[dest04], %[dest04], %[shift] \n\t" - - "pmaddhw %[dest15], %[src0], %[mask2] \n\t" - "pmaddhw %[dest], %[src1], %[mask3] \n\t" - "paddw %[dest15], %[dest15], %[dest] \n\t" - "paddw %[dest15], %[dest15], %[ph] \n\t" - "psrlw %[dest15], %[dest15], %[shift] \n\t" - - "gsldrc1 %[src0], 0x02(%[src1_ptr]) \n\t" - "gsldlc1 %[src0], 0x09(%[src1_ptr]) \n\t" - "pmaddhw %[dest26], %[src0], %[mask0] \n\t" - "gsldrc1 %[src1], 0x02(%[src2_ptr]) \n\t" - "gsldlc1 %[src1], 0x09(%[src2_ptr]) \n\t" - "pmaddhw %[dest], %[src1], %[mask1] \n\t" - "paddw %[dest26], %[dest26], %[dest] \n\t" - "paddw %[dest26], %[dest26], %[ph] \n\t" - "psrlw %[dest26], %[dest26], %[shift] \n\t" - - "pmaddhw %[dest37], %[src0], %[mask2] \n\t" - "pmaddhw %[dest], %[src1], %[mask3] \n\t" - "paddw %[dest37], %[dest37], %[dest] \n\t" - "paddw %[dest37], %[dest37], %[ph] \n\t" - "psrlw %[dest37], %[dest37], %[shift] \n\t" - - /* tmp0 = ( 00 04 02 06 ) */ - "packsswh %[tmp0], %[dest04], %[dest26] \n\t" - /* tmp1 = ( 01 05 03 07 ) */ - "packsswh %[tmp1], %[dest15], %[dest37] \n\t" - - /* tmp2 = ( 00 01 04 05 )*/ - "punpcklhw %[tmp2], %[tmp0], %[tmp1] \n\t" - /* tmp3 = ( 02 03 06 07 )*/ - "punpckhhw %[tmp3], %[tmp0], %[tmp1] \n\t" - - /* ( 00 01 02 03 ) */ - "punpcklwd %[dest], %[tmp2], %[tmp3] \n\t" - "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t" - - /* ( 04 05 06 07 ) */ - "punpckhwd %[dest], %[tmp2], %[tmp3] \n\t" - "gssdlc1 %[dest], 0x0f(%[dst_ptr]) \n\t" - "gssdrc1 %[dest], 0x08(%[dst_ptr]) \n\t" - - "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t" - "daddiu %[src2_ptr], %[src2_ptr], 0x08 \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t" - "daddi %[width], %[width], -0x08 \n\t" - "bnez %[width], 1b \n\t" - : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest04] "=&f"(dest04), - [dest15] "=&f"(dest15), [dest26] "=&f"(dest26), [dest37] "=&f"(dest37), - [tmp0] "=&f"(tmp0), [tmp1] "=&f"(tmp1), [tmp2] "=&f"(tmp2), - [tmp3] "=&f"(tmp3), [dest] "=&f"(dest) - : [src1_ptr] "r"(src_ptr), [src2_ptr] "r"(src2_ptr), [dst_ptr] "r"(dst), - [width] "r"(dst_width), [mask0] "f"(mask0), [mask1] "f"(mask1), - [mask2] "f"(mask2), [mask3] "f"(mask3), [shift] "f"(shift), [ph] "f"(ph) - : "memory"); -} - -void ScaleRowDown34_MMI(const uint8_t* src_ptr, - ptrdiff_t src_stride, - uint8_t* dst, - int dst_width) { - (void)src_stride; - assert((dst_width % 3 == 0) && (dst_width > 0)); - uint64_t src[2]; - uint64_t tmp[2]; - __asm__ volatile ( - "1: \n\t" - "gsldlc1 %[src0], 0x07(%[src_ptr]) \n\t" - "gsldrc1 %[src0], 0x00(%[src_ptr]) \n\t" - "gsldlc1 %[src1], 0x0f(%[src_ptr]) \n\t" - "gsldrc1 %[src1], 0x08(%[src_ptr]) \n\t" - "and %[tmp1], %[src0], %[mask1] \n\t" - "psrlw %[tmp0], %[src0], %[rmov] \n\t" - "psllw %[tmp0], %[tmp0], %[lmov1] \n\t" - "or %[src0], %[tmp0], %[tmp1] \n\t" - "punpckhwd %[tmp0], %[src0], %[src0] \n\t" - "psllw %[tmp1], %[tmp0], %[rmov] \n\t" - "or %[src0], %[src0], %[tmp1] \n\t" - "psrlw %[tmp0], %[tmp0], %[rmov8] \n\t" - "pextrh %[tmp0], %[tmp0], %[zero] \n\t" - "pinsrh_2 %[src0], %[src0], %[tmp0] \n\t" - "pextrh %[tmp0], %[src1], %[zero] \n\t" - "pinsrh_3 %[src0], %[src0], %[tmp0] \n\t" - - "punpckhwd %[tmp0], %[src1], %[src1] \n\t" - "pextrh %[tmp1], %[tmp0], %[zero] \n\t" - "psrlw %[src1], %[src1], %[rmov] \n\t" - "psllw %[tmp1], %[tmp1], %[rmov8] \n\t" - "or %[src1], %[src1], %[tmp1] \n\t" - "and %[tmp0], %[tmp0], %[mask2] \n\t" - "or %[src1], %[src1], %[tmp0] \n\t" - - "gssdlc1 %[src0], 0x07(%[dst_ptr]) \n\t" - "gssdrc1 %[src0], 0x00(%[dst_ptr]) \n\t" - "gsswlc1 %[src1], 0x0b(%[dst_ptr]) \n\t" - "gsswrc1 %[src1], 0x08(%[dst_ptr]) \n\t" - - "daddiu %[src_ptr], %[src_ptr], 0x10 \n\t" - "daddi %[width], %[width], -0x0c \n\t" - "daddiu %[dst_ptr], %[dst_ptr], 0x0c \n\t" - "bnez %[width], 1b \n\t" - - : [src0]"=&f"(src[0]), [src1]"=&f"(src[1]), - [tmp0]"=&f"(tmp[0]), [tmp1]"=&f"(tmp[1]) - : [src_ptr]"r"(src_ptr), [dst_ptr]"r"(dst), - [lmov]"f"(0xc), [rmov]"f"(0x18), - [mask1]"f"(0xffff0000ffff), [rmov8]"f"(0x8), - [zero]"f"(0x0), [mask2]"f"(0xff000000), - [width]"r"(dst_width), [lmov1]"f"(0x10) - : "memory" - ); -} -// clang-format on - -#endif // !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A) - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif