mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 09:16:48 +08:00
Refine some functions on the Longarch platform.
Add ARGBToYMatrixRow_LSX/LASX, RGBAToYMatrixRow_LSX/LASX and RGBToYMatrixRow_LSX/LASX functions with RgbConstants argument. Bug: libyuv:912 Change-Id: I956e639d1f0da4a47a55b79c9d41dcd29e29bdc5 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/4167860 Reviewed-by: Frank Barchard <fbarchard@chromium.org> Reviewed-by: Mirko Bonadei <mbonadei@chromium.org> Commit-Queue: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
parent
0faf8dd0e0
commit
0809713775
@ -686,6 +686,11 @@ extern "C" {
|
||||
#define HAS_SPLITUVROW_LSX
|
||||
#define HAS_UYVYTOARGBROW_LSX
|
||||
#define HAS_YUY2TOARGBROW_LSX
|
||||
#define HAS_ARGBTOYROW_LSX
|
||||
#define HAS_ABGRTOYJROW_LSX
|
||||
#define HAS_RGBATOYJROW_LSX
|
||||
#define HAS_RGB24TOYJROW_LSX
|
||||
#define HAS_RAWTOYJROW_LSX
|
||||
#endif
|
||||
|
||||
#if !defined(LIBYUV_DISABLE_LASX) && defined(__loongarch_asx)
|
||||
@ -713,6 +718,8 @@ extern "C" {
|
||||
#define HAS_ARGBTOUVROW_LASX
|
||||
#define HAS_ARGBTOYJROW_LASX
|
||||
#define HAS_ARGBTOYROW_LASX
|
||||
#define HAS_ABGRTOYJROW_LASX
|
||||
#define HAS_ABGRTOYROW_LASX
|
||||
#define HAS_I422ALPHATOARGBROW_LASX
|
||||
#define HAS_I422TOARGB1555ROW_LASX
|
||||
#define HAS_I422TOARGB4444ROW_LASX
|
||||
@ -742,6 +749,11 @@ extern "C" {
|
||||
#define HAS_YUY2TOUV422ROW_LASX
|
||||
#define HAS_YUY2TOUVROW_LASX
|
||||
#define HAS_YUY2TOYROW_LASX
|
||||
#define HAS_RGBATOYROW_LASX
|
||||
#define HAS_RGBATOYJROW_LASX
|
||||
#define HAS_BGRATOYROW_LASX
|
||||
#define HAS_RGB24TOYJROW_LASX
|
||||
#define HAS_RAWTOYJROW_LASX
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER) && !defined(__CLR_VER) && !defined(__clang__)
|
||||
@ -1201,9 +1213,14 @@ void ABGRToYJRow_NEON(const uint8_t* src_abgr, uint8_t* dst_yj, int width);
|
||||
void RGBAToYJRow_NEON(const uint8_t* src_rgba, uint8_t* dst_yj, int width);
|
||||
void ARGBToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width);
|
||||
void ARGBToYJRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width);
|
||||
void ARGBToYRow_LSX(const uint8_t* src_argb0, uint8_t* dst_y, int width);
|
||||
void ARGBToYRow_LASX(const uint8_t* src_argb0, uint8_t* dst_y, int width);
|
||||
void ARGBToYJRow_LSX(const uint8_t* src_argb0, uint8_t* dst_y, int width);
|
||||
void ABGRToYJRow_LSX(const uint8_t* src_abgr, uint8_t* dst_yj, int width);
|
||||
void RGBAToYJRow_LSX(const uint8_t* src_rgba, uint8_t* dst_yj, int width);
|
||||
void ARGBToYJRow_LASX(const uint8_t* src_argb0, uint8_t* dst_y, int width);
|
||||
void ABGRToYJRow_LASX(const uint8_t* src_abgr, uint8_t* dst_yj, int width);
|
||||
void RGBAToYJRow_LASX(const uint8_t* src_rgba, uint8_t* dst_yj, int width);
|
||||
void ARGBToUV444Row_NEON(const uint8_t* src_argb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
@ -1427,6 +1444,8 @@ void BGRAToYRow_LSX(const uint8_t* src_bgra, uint8_t* dst_y, int width);
|
||||
void ABGRToYRow_LSX(const uint8_t* src_abgr, uint8_t* dst_y, int width);
|
||||
void RGBAToYRow_LSX(const uint8_t* src_rgba, uint8_t* dst_y, int width);
|
||||
void ARGB1555ToYRow_LSX(const uint8_t* src_argb1555, uint8_t* dst_y, int width);
|
||||
void RGB24ToYJRow_LSX(const uint8_t* src_rgb24, uint8_t* dst_yj, int width);
|
||||
void ABGRToYRow_LASX(const uint8_t* src_abgr, uint8_t* dst_y, int width);
|
||||
void ARGB1555ToYRow_LASX(const uint8_t* src_argb1555,
|
||||
uint8_t* dst_y,
|
||||
int width);
|
||||
@ -1436,6 +1455,11 @@ void RGB24ToYRow_LSX(const uint8_t* src_rgb24, uint8_t* dst_y, int width);
|
||||
void RGB24ToYRow_LASX(const uint8_t* src_rgb24, uint8_t* dst_y, int width);
|
||||
void RAWToYRow_LSX(const uint8_t* src_raw, uint8_t* dst_y, int width);
|
||||
void RAWToYRow_LASX(const uint8_t* src_raw, uint8_t* dst_y, int width);
|
||||
void RGBAToYRow_LASX(const uint8_t* src_rgba, uint8_t* dst_y, int width);
|
||||
void BGRAToYRow_LASX(const uint8_t* src_bgra, uint8_t* dst_y, int width);
|
||||
void RGB24ToYJRow_LASX(const uint8_t* src_rgb24, uint8_t* dst_yj, int width);
|
||||
void RAWToYJRow_LSX(const uint8_t* src_raw, uint8_t* dst_yj, int width);
|
||||
void RAWToYJRow_LASX(const uint8_t* src_raw, uint8_t* dst_yj, int width);
|
||||
|
||||
void ARGBToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width);
|
||||
void ARGBToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width);
|
||||
@ -1499,10 +1523,15 @@ void ARGB1555ToYRow_Any_MSA(const uint8_t* src_ptr,
|
||||
void BGRAToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void ABGRToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void RGBAToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void ARGBToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void ARGBToYJRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void RGB24ToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void RGB565ToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void ABGRToYJRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void RAWToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void RGBAToYJRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void RGB24ToYJRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void RAWToYJRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void ARGB1555ToYRow_Any_LSX(const uint8_t* src_ptr,
|
||||
uint8_t* dst_ptr,
|
||||
int width);
|
||||
@ -1511,7 +1540,14 @@ void RGB565ToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void RGB24ToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void ARGBToYJRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void ARGBToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void ABGRToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void ABGRToYJRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void RAWToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void RGBAToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void RGBAToYJRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void BGRAToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void RGB24ToYJRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void RAWToYJRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void ARGB1555ToYRow_Any_LASX(const uint8_t* src_ptr,
|
||||
uint8_t* dst_ptr,
|
||||
int width);
|
||||
|
||||
@ -1793,6 +1793,14 @@ int ARGBToI420(const uint8_t* src_argb,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_LSX)
|
||||
if (TestCpuFlag(kCpuHasLSX)) {
|
||||
ARGBToYRow = ARGBToYRow_Any_LSX;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToYRow = ARGBToYRow_LSX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
ARGBToYRow = ARGBToYRow_Any_LASX;
|
||||
@ -1907,13 +1915,21 @@ int BGRAToI420(const uint8_t* src_bgra,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_BGRATOYROW_LASX) && defined(HAS_BGRATOUVROW_LASX)
|
||||
#if defined(HAS_BGRATOYROW_LSX) && defined(HAS_BGRATOUVROW_LSX)
|
||||
if (TestCpuFlag(kCpuHasLSX)) {
|
||||
BGRAToYRow = BGRAToYRow_Any_LSX;
|
||||
BGRAToUVRow = BGRAToUVRow_Any_LSX;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
BGRAToYRow = BGRAToYRow_LSX;
|
||||
BGRAToUVRow = BGRAToUVRow_LSX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_BGRATOYROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
BGRAToYRow = BGRAToYRow_Any_LASX;
|
||||
BGRAToUVRow = BGRAToUVRow_Any_LASX;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
BGRAToYRow = BGRAToYRow_LASX;
|
||||
BGRAToUVRow = BGRAToUVRow_LASX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -2029,6 +2045,14 @@ int ABGRToI420(const uint8_t* src_abgr,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ABGRTOYROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
ABGRToYRow = ABGRToYRow_Any_LASX;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
ABGRToYRow = ABGRToYRow_LASX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height - 1; y += 2) {
|
||||
ABGRToUVRow(src_abgr, src_stride_abgr, dst_u, dst_v, width);
|
||||
@ -2125,6 +2149,14 @@ int RGBAToI420(const uint8_t* src_rgba,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_RGBATOYROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
RGBAToYRow = RGBAToYRow_Any_LASX;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
RGBAToYRow = RGBAToYRow_LASX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height - 1; y += 2) {
|
||||
RGBAToUVRow(src_rgba, src_stride_rgba, dst_u, dst_v, width);
|
||||
@ -2383,6 +2415,22 @@ int RGB24ToJ420(const uint8_t* src_rgb24,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_RGB24TOYJROW_LSX)
|
||||
if (TestCpuFlag(kCpuHasLSX)) {
|
||||
RGB24ToYJRow = RGB24ToYJRow_Any_LSX;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
RGB24ToYJRow = RGB24ToYJRow_LSX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_RGB24TOYJROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
RGB24ToYJRow = RGB24ToYJRow_Any_LASX;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
RGB24ToYJRow = RGB24ToYJRow_LASX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// Other platforms do intermediate conversion from RGB24 to ARGB.
|
||||
#else // HAS_RGB24TOYJROW
|
||||
@ -2711,6 +2759,22 @@ int RAWToJ420(const uint8_t* src_raw,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_RAWTOYJROW_LSX)
|
||||
if (TestCpuFlag(kCpuHasLSX)) {
|
||||
RAWToYJRow = RAWToYJRow_Any_LSX;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
RAWToYJRow = RAWToYJRow_LSX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_RAWTOYJROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
RAWToYJRow = RAWToYJRow_Any_LASX;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
RAWToYJRow = RAWToYJRow_LASX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// Other platforms do intermediate conversion from RAW to ARGB.
|
||||
#else // HAS_RAWTOYJROW
|
||||
@ -3298,6 +3362,14 @@ int ARGB4444ToI420(const uint8_t* src_argb4444,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_LSX)
|
||||
if (TestCpuFlag(kCpuHasLSX)) {
|
||||
ARGBToYRow = ARGBToYRow_Any_LSX;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToYRow = ARGBToYRow_LSX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
ARGBToYRow = ARGBToYRow_Any_LASX;
|
||||
@ -3410,6 +3482,22 @@ int RGB24ToJ400(const uint8_t* src_rgb24,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_RGB24TOYJROW_LSX)
|
||||
if (TestCpuFlag(kCpuHasLSX)) {
|
||||
RGB24ToYJRow = RGB24ToYJRow_Any_LSX;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
RGB24ToYJRow = RGB24ToYJRow_LSX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_RGB24TOYJROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
RGB24ToYJRow = RGB24ToYJRow_Any_LASX;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
RGB24ToYJRow = RGB24ToYJRow_LASX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
RGB24ToYJRow(src_rgb24, dst_yj, width);
|
||||
@ -3478,6 +3566,22 @@ int RAWToJ400(const uint8_t* src_raw,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_RAWTOYJROW_LSX)
|
||||
if (TestCpuFlag(kCpuHasLSX)) {
|
||||
RAWToYJRow = RAWToYJRow_Any_LSX;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
RAWToYJRow = RAWToYJRow_LSX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_RAWTOYJROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
RAWToYJRow = RAWToYJRow_Any_LASX;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
RAWToYJRow = RAWToYJRow_LASX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
RAWToYJRow(src_raw, dst_yj, width);
|
||||
|
||||
@ -116,6 +116,14 @@ int ARGBToI444(const uint8_t* src_argb,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_LSX)
|
||||
if (TestCpuFlag(kCpuHasLSX)) {
|
||||
ARGBToYRow = ARGBToYRow_Any_LSX;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToYRow = ARGBToYRow_LSX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
ARGBToYRow = ARGBToYRow_Any_LASX;
|
||||
@ -230,7 +238,14 @@ int ARGBToI422(const uint8_t* src_argb,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(HAS_ARGBTOYROW_LSX)
|
||||
if (TestCpuFlag(kCpuHasLSX)) {
|
||||
ARGBToYRow = ARGBToYRow_Any_LSX;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToYRow = ARGBToYRow_LSX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
ARGBToYRow = ARGBToYRow_Any_LASX;
|
||||
@ -340,6 +355,14 @@ int ARGBToNV12(const uint8_t* src_argb,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_LSX)
|
||||
if (TestCpuFlag(kCpuHasLSX)) {
|
||||
ARGBToYRow = ARGBToYRow_Any_LSX;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToYRow = ARGBToYRow_LSX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
ARGBToYRow = ARGBToYRow_Any_LASX;
|
||||
@ -502,6 +525,14 @@ int ARGBToNV21(const uint8_t* src_argb,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_LSX)
|
||||
if (TestCpuFlag(kCpuHasLSX)) {
|
||||
ARGBToYRow = ARGBToYRow_Any_LSX;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToYRow = ARGBToYRow_LSX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
ARGBToYRow = ARGBToYRow_Any_LASX;
|
||||
@ -663,6 +694,22 @@ int ABGRToNV12(const uint8_t* src_abgr,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ABGRTOYROW_LSX)
|
||||
if (TestCpuFlag(kCpuHasLSX)) {
|
||||
ABGRToYRow = ABGRToYRow_Any_LSX;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ABGRToYRow = ABGRToYRow_LSX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ABGRTOYROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
ABGRToYRow = ABGRToYRow_Any_LASX;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
ABGRToYRow = ABGRToYRow_LASX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_MERGEUVROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
MergeUVRow_ = MergeUVRow_Any_SSE2;
|
||||
@ -815,6 +862,22 @@ int ABGRToNV21(const uint8_t* src_abgr,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ABGRTOYROW_LSX)
|
||||
if (TestCpuFlag(kCpuHasLSX)) {
|
||||
ABGRToYRow = ABGRToYRow_Any_LSX;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ABGRToYRow = ABGRToYRow_LSX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ABGRTOYROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
ABGRToYRow = ABGRToYRow_Any_LASX;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
ABGRToYRow = ABGRToYRow_LASX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_MERGEUVROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
MergeUVRow_ = MergeUVRow_Any_SSE2;
|
||||
@ -972,6 +1035,14 @@ int ARGBToYUY2(const uint8_t* src_argb,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_LSX)
|
||||
if (TestCpuFlag(kCpuHasLSX)) {
|
||||
ARGBToYRow = ARGBToYRow_Any_LSX;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToYRow = ARGBToYRow_LSX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
ARGBToYRow = ARGBToYRow_Any_LASX;
|
||||
@ -1135,6 +1206,14 @@ int ARGBToUYVY(const uint8_t* src_argb,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_LSX)
|
||||
if (TestCpuFlag(kCpuHasLSX)) {
|
||||
ARGBToYRow = ARGBToYRow_Any_LSX;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToYRow = ARGBToYRow_LSX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
ARGBToYRow = ARGBToYRow_Any_LASX;
|
||||
@ -1262,6 +1341,14 @@ int ARGBToI400(const uint8_t* src_argb,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_LSX)
|
||||
if (TestCpuFlag(kCpuHasLSX)) {
|
||||
ARGBToYRow = ARGBToYRow_Any_LSX;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToYRow = ARGBToYRow_LSX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
ARGBToYRow = ARGBToYRow_Any_LASX;
|
||||
@ -1939,6 +2026,16 @@ int ARGBToJ420(const uint8_t* src_argb,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYJROW_LSX) && defined(HAS_ARGBTOUVJROW_LSX)
|
||||
if (TestCpuFlag(kCpuHasLSX)) {
|
||||
ARGBToYJRow = ARGBToYJRow_Any_LSX;
|
||||
ARGBToUVJRow = ARGBToUVJRow_Any_LSX;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToYJRow = ARGBToYJRow_LSX;
|
||||
ARGBToUVJRow = ARGBToUVJRow_LSX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYJROW_LASX) && defined(HAS_ARGBTOUVJROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
ARGBToYJRow = ARGBToYJRow_Any_LASX;
|
||||
@ -2215,6 +2312,22 @@ int RGBAToJ400(const uint8_t* src_rgba,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_RGBATOYJROW_LSX)
|
||||
if (TestCpuFlag(kCpuHasLSX)) {
|
||||
RGBAToYJRow = RGBAToYJRow_Any_LSX;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
RGBAToYJRow = RGBAToYJRow_LSX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_RGBATOYJROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
RGBAToYJRow = RGBAToYJRow_Any_LASX;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
RGBAToYJRow = RGBAToYJRow_LASX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
RGBAToYJRow(src_rgba, dst_yj, width);
|
||||
@ -2309,13 +2422,19 @@ int ABGRToJ420(const uint8_t* src_abgr,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ABGRTOYJROW_LSX) && defined(HAS_ABGRTOUVJROW_LSX)
|
||||
#if defined(HAS_ABGRTOYJROW_LSX)
|
||||
if (TestCpuFlag(kCpuHasLSX)) {
|
||||
ABGRToYJRow = ABGRToYJRow_Any_LSX;
|
||||
ABGRToUVJRow = ABGRToUVJRow_Any_LSX;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ABGRToYJRow = ABGRToYJRow_LSX;
|
||||
ABGRToUVJRow = ABGRToUVJRow_LSX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ABGRTOYJROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
ABGRToYJRow = ABGRToYJRow_Any_LASX;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
ABGRToYJRow = ABGRToYJRow_LASX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -2430,23 +2549,19 @@ int ABGRToJ422(const uint8_t* src_abgr,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ABGRTOYJROW_LSX) && defined(HAS_ABGRTOUVJROW_LSX)
|
||||
#if defined(HAS_ABGRTOYJROW_LSX)
|
||||
if (TestCpuFlag(kCpuHasLSX)) {
|
||||
ABGRToYJRow = ABGRToYJRow_Any_LSX;
|
||||
ABGRToUVJRow = ABGRToUVJRow_Any_LSX;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ABGRToYJRow = ABGRToYJRow_LSX;
|
||||
ABGRToUVJRow = ABGRToUVJRow_LSX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ABGRTOYJROW_LASX) && defined(HAS_ABGRTOUVJROW_LASX)
|
||||
#if defined(HAS_ABGRTOYJROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
ABGRToYJRow = ABGRToYJRow_Any_LASX;
|
||||
ABGRToUVJRow = ABGRToUVJRow_Any_LASX;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
ABGRToYJRow = ABGRToYJRow_LASX;
|
||||
ABGRToUVJRow = ABGRToUVJRow_LASX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -2519,6 +2634,22 @@ int ABGRToJ400(const uint8_t* src_abgr,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ABGRTOYJROW_LSX)
|
||||
if (TestCpuFlag(kCpuHasLSX)) {
|
||||
ABGRToYJRow = ABGRToYJRow_Any_LSX;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ABGRToYJRow = ABGRToYJRow_LSX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ABGRTOYJROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
ABGRToYJRow = ABGRToYJRow_Any_LASX;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
ABGRToYJRow = ABGRToYJRow_LASX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
ABGRToYJRow(src_abgr, dst_yj, width);
|
||||
@ -2713,6 +2844,22 @@ int RAWToJNV21(const uint8_t* src_raw,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_RAWTOYJROW_LSX)
|
||||
if (TestCpuFlag(kCpuHasLSX)) {
|
||||
RAWToYJRow = RAWToYJRow_Any_LSX;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
RAWToYJRow = RAWToYJRow_LSX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_RAWTOYJROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
RAWToYJRow = RAWToYJRow_Any_LASX;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
RAWToYJRow = RAWToYJRow_LASX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// Other platforms do intermediate conversion from RAW to ARGB.
|
||||
#else // HAS_RAWTOYJROW
|
||||
|
||||
@ -1036,6 +1036,9 @@ ANY11(ARGBToYRow_Any_NEON, ARGBToYRow_NEON, 0, 4, 1, 15)
|
||||
#ifdef HAS_ARGBTOYROW_MSA
|
||||
ANY11(ARGBToYRow_Any_MSA, ARGBToYRow_MSA, 0, 4, 1, 15)
|
||||
#endif
|
||||
#ifdef HAS_ARGBTOYROW_LSX
|
||||
ANY11(ARGBToYRow_Any_LSX, ARGBToYRow_LSX, 0, 4, 1, 15)
|
||||
#endif
|
||||
#ifdef HAS_ARGBTOYROW_LASX
|
||||
ANY11(ARGBToYRow_Any_LASX, ARGBToYRow_LASX, 0, 4, 1, 31)
|
||||
#endif
|
||||
@ -1054,9 +1057,21 @@ ANY11(ARGBToYJRow_Any_MSA, ARGBToYJRow_MSA, 0, 4, 1, 15)
|
||||
#ifdef HAS_ARGBTOYJROW_LSX
|
||||
ANY11(ARGBToYJRow_Any_LSX, ARGBToYJRow_LSX, 0, 4, 1, 15)
|
||||
#endif
|
||||
#ifdef HAS_RGBATOYJROW_LSX
|
||||
ANY11(RGBAToYJRow_Any_LSX, RGBAToYJRow_LSX, 0, 4, 1, 15)
|
||||
#endif
|
||||
#ifdef HAS_ABGRTOYJROW_LSX
|
||||
ANY11(ABGRToYJRow_Any_LSX, ABGRToYJRow_LSX, 0, 4, 1, 15)
|
||||
#endif
|
||||
#ifdef HAS_RGBATOYJROW_LASX
|
||||
ANY11(RGBAToYJRow_Any_LASX, RGBAToYJRow_LASX, 0, 4, 1, 31)
|
||||
#endif
|
||||
#ifdef HAS_ARGBTOYJROW_LASX
|
||||
ANY11(ARGBToYJRow_Any_LASX, ARGBToYJRow_LASX, 0, 4, 1, 31)
|
||||
#endif
|
||||
#ifdef HAS_ABGRTOYJROW_LASX
|
||||
ANY11(ABGRToYJRow_Any_LASX, ABGRToYJRow_LASX, 0, 4, 1, 31)
|
||||
#endif
|
||||
#ifdef HAS_BGRATOYROW_NEON
|
||||
ANY11(BGRAToYRow_Any_NEON, BGRAToYRow_NEON, 0, 4, 1, 15)
|
||||
#endif
|
||||
@ -1066,6 +1081,9 @@ ANY11(BGRAToYRow_Any_MSA, BGRAToYRow_MSA, 0, 4, 1, 15)
|
||||
#ifdef HAS_BGRATOYROW_LSX
|
||||
ANY11(BGRAToYRow_Any_LSX, BGRAToYRow_LSX, 0, 4, 1, 15)
|
||||
#endif
|
||||
#ifdef HAS_BGRATOYROW_LASX
|
||||
ANY11(BGRAToYRow_Any_LASX, BGRAToYRow_LASX, 0, 4, 1, 31)
|
||||
#endif
|
||||
#ifdef HAS_ABGRTOYROW_NEON
|
||||
ANY11(ABGRToYRow_Any_NEON, ABGRToYRow_NEON, 0, 4, 1, 15)
|
||||
#endif
|
||||
@ -1075,6 +1093,9 @@ ANY11(ABGRToYRow_Any_MSA, ABGRToYRow_MSA, 0, 4, 1, 7)
|
||||
#ifdef HAS_ABGRTOYROW_LSX
|
||||
ANY11(ABGRToYRow_Any_LSX, ABGRToYRow_LSX, 0, 4, 1, 15)
|
||||
#endif
|
||||
#ifdef HAS_ABGRTOYROW_LASX
|
||||
ANY11(ABGRToYRow_Any_LASX, ABGRToYRow_LASX, 0, 4, 1, 31)
|
||||
#endif
|
||||
#ifdef HAS_RGBATOYROW_NEON
|
||||
ANY11(RGBAToYRow_Any_NEON, RGBAToYRow_NEON, 0, 4, 1, 15)
|
||||
#endif
|
||||
@ -1084,6 +1105,9 @@ ANY11(RGBAToYRow_Any_MSA, RGBAToYRow_MSA, 0, 4, 1, 15)
|
||||
#ifdef HAS_RGBATOYROW_LSX
|
||||
ANY11(RGBAToYRow_Any_LSX, RGBAToYRow_LSX, 0, 4, 1, 15)
|
||||
#endif
|
||||
#ifdef HAS_RGBATOYROW_LASX
|
||||
ANY11(RGBAToYRow_Any_LASX, RGBAToYRow_LASX, 0, 4, 1, 31)
|
||||
#endif
|
||||
#ifdef HAS_RGB24TOYROW_NEON
|
||||
ANY11(RGB24ToYRow_Any_NEON, RGB24ToYRow_NEON, 0, 3, 1, 15)
|
||||
#endif
|
||||
@ -1102,6 +1126,12 @@ ANY11(RGB24ToYRow_Any_MSA, RGB24ToYRow_MSA, 0, 3, 1, 15)
|
||||
#ifdef HAS_RGB24TOYROW_LSX
|
||||
ANY11(RGB24ToYRow_Any_LSX, RGB24ToYRow_LSX, 0, 3, 1, 15)
|
||||
#endif
|
||||
#ifdef HAS_RGB24TOYJROW_LSX
|
||||
ANY11(RGB24ToYJRow_Any_LSX, RGB24ToYJRow_LSX, 0, 3, 1, 15)
|
||||
#endif
|
||||
#ifdef HAS_RGB24TOYJROW_LASX
|
||||
ANY11(RGB24ToYJRow_Any_LASX, RGB24ToYJRow_LASX, 0, 3, 1, 31)
|
||||
#endif
|
||||
#ifdef HAS_RGB24TOYROW_LASX
|
||||
ANY11(RGB24ToYRow_Any_LASX, RGB24ToYRow_LASX, 0, 3, 1, 31)
|
||||
#endif
|
||||
@ -1126,6 +1156,12 @@ ANY11(RAWToYRow_Any_LSX, RAWToYRow_LSX, 0, 3, 1, 15)
|
||||
#ifdef HAS_RAWTOYROW_LASX
|
||||
ANY11(RAWToYRow_Any_LASX, RAWToYRow_LASX, 0, 3, 1, 31)
|
||||
#endif
|
||||
#ifdef HAS_RAWTOYJROW_LSX
|
||||
ANY11(RAWToYJRow_Any_LSX, RAWToYJRow_LSX, 0, 3, 1, 15)
|
||||
#endif
|
||||
#ifdef HAS_RAWTOYJROW_LASX
|
||||
ANY11(RAWToYJRow_Any_LASX, RAWToYJRow_LASX, 0, 3, 1, 31)
|
||||
#endif
|
||||
#ifdef HAS_RGB565TOYROW_NEON
|
||||
ANY11(RGB565ToYRow_Any_NEON, RGB565ToYRow_NEON, 0, 2, 1, 7)
|
||||
#endif
|
||||
|
||||
@ -775,40 +775,6 @@ void UYVYToUV422Row_LASX(const uint8_t* src_uyvy,
|
||||
}
|
||||
}
|
||||
|
||||
void ARGBToYRow_LASX(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
|
||||
int x;
|
||||
int len = width / 32;
|
||||
__m256i src0, src1, src2, src3, vec0, vec1, vec2, vec3;
|
||||
__m256i tmp0, tmp1, dst0;
|
||||
__m256i const_19 = __lasx_xvldi(0x19);
|
||||
__m256i const_42 = __lasx_xvldi(0x42);
|
||||
__m256i const_81 = __lasx_xvldi(0x81);
|
||||
__m256i const_1080 = {0x1080108010801080, 0x1080108010801080,
|
||||
0x1080108010801080, 0x1080108010801080};
|
||||
__m256i control = {0x0000000400000000, 0x0000000500000001, 0x0000000600000002,
|
||||
0x0000000700000003};
|
||||
|
||||
for (x = 0; x < len; x++) {
|
||||
DUP4_ARG2(__lasx_xvld, src_argb0, 0, src_argb0, 32, src_argb0, 64,
|
||||
src_argb0, 96, src0, src1, src2, src3);
|
||||
vec0 = __lasx_xvpickev_b(src1, src0);
|
||||
vec1 = __lasx_xvpickev_b(src3, src2);
|
||||
vec2 = __lasx_xvpickod_b(src1, src0);
|
||||
vec3 = __lasx_xvpickod_b(src3, src2);
|
||||
tmp0 = __lasx_xvmaddwev_h_bu(const_1080, vec0, const_19);
|
||||
tmp1 = __lasx_xvmaddwev_h_bu(const_1080, vec1, const_19);
|
||||
tmp0 = __lasx_xvmaddwev_h_bu(tmp0, vec2, const_81);
|
||||
tmp1 = __lasx_xvmaddwev_h_bu(tmp1, vec3, const_81);
|
||||
tmp0 = __lasx_xvmaddwod_h_bu(tmp0, vec0, const_42);
|
||||
tmp1 = __lasx_xvmaddwod_h_bu(tmp1, vec1, const_42);
|
||||
dst0 = __lasx_xvssrani_b_h(tmp1, tmp0, 8);
|
||||
dst0 = __lasx_xvperm_w(dst0, control);
|
||||
__lasx_xvst(dst0, dst_y, 0);
|
||||
src_argb0 += 128;
|
||||
dst_y += 32;
|
||||
}
|
||||
}
|
||||
|
||||
void ARGBToUVRow_LASX(const uint8_t* src_argb0,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_u,
|
||||
@ -1811,48 +1777,6 @@ void RGB565ToUVRow_LASX(const uint8_t* src_rgb565,
|
||||
}
|
||||
}
|
||||
|
||||
void RGB24ToYRow_LASX(const uint8_t* src_rgb24, uint8_t* dst_y, int width) {
|
||||
int x;
|
||||
int len = width / 32;
|
||||
__m256i src0, src1, src2;
|
||||
__m256i tmp0, tmp1, tmp2, tmp3;
|
||||
__m256i reg0, reg1, reg2, dst0;
|
||||
__m256i const_129 = __lasx_xvldi(129);
|
||||
__m256i const_br = {0x4219421942194219, 0x4219421942194219,
|
||||
0x4219421942194219, 0x4219421942194219};
|
||||
__m256i const_1080 = {0x1080108010801080, 0x1080108010801080,
|
||||
0x1080108010801080, 0x1080108010801080};
|
||||
__m256i shuff0 = {0x0B09080605030200, 0x17151412110F0E0C, 0x0B09080605030200,
|
||||
0x17151412110F0E0C};
|
||||
__m256i shuff1 = {0x0301001E1D1B1A18, 0x0F0D0C0A09070604, 0x0301001E1D1B1A18,
|
||||
0x0F0D0C0A09070604};
|
||||
__m256i shuff2 = {0x000A000700040001, 0x001600130010000D, 0x000A000700040001,
|
||||
0x001600130010000D};
|
||||
__m256i shuff3 = {0x0002001F001C0019, 0x000E000B00080005, 0x0002001F001C0019,
|
||||
0x000E000B00080005};
|
||||
|
||||
for (x = 0; x < len; x++) {
|
||||
reg0 = __lasx_xvld(src_rgb24, 0);
|
||||
reg1 = __lasx_xvld(src_rgb24, 32);
|
||||
reg2 = __lasx_xvld(src_rgb24, 64);
|
||||
src0 = __lasx_xvpermi_q(reg1, reg0, 0x30);
|
||||
src1 = __lasx_xvpermi_q(reg2, reg0, 0x21);
|
||||
src2 = __lasx_xvpermi_q(reg2, reg1, 0x30);
|
||||
tmp0 = __lasx_xvshuf_b(src1, src0, shuff0);
|
||||
tmp1 = __lasx_xvshuf_b(src1, src2, shuff1);
|
||||
tmp2 = __lasx_xvshuf_b(src1, src0, shuff2);
|
||||
tmp3 = __lasx_xvshuf_b(src1, src2, shuff3);
|
||||
reg0 = __lasx_xvmaddwev_h_bu(const_1080, tmp2, const_129);
|
||||
reg1 = __lasx_xvmaddwev_h_bu(const_1080, tmp3, const_129);
|
||||
reg0 = __lasx_xvdp2add_h_bu(reg0, const_br, tmp0);
|
||||
reg1 = __lasx_xvdp2add_h_bu(reg1, const_br, tmp1);
|
||||
dst0 = __lasx_xvpickod_b(reg1, reg0);
|
||||
__lasx_xvst(dst0, dst_y, 0);
|
||||
dst_y += 32;
|
||||
src_rgb24 += 96;
|
||||
}
|
||||
}
|
||||
|
||||
void RGB24ToUVRow_LASX(const uint8_t* src_rgb24,
|
||||
int src_stride_rgb24,
|
||||
uint8_t* dst_u,
|
||||
@ -1916,48 +1840,6 @@ void RGB24ToUVRow_LASX(const uint8_t* src_rgb24,
|
||||
}
|
||||
}
|
||||
|
||||
void RAWToYRow_LASX(const uint8_t* src_raw, uint8_t* dst_y, int width) {
|
||||
int x;
|
||||
int len = width / 32;
|
||||
__m256i src0, src1, src2;
|
||||
__m256i tmp0, tmp1, tmp2, tmp3;
|
||||
__m256i reg0, reg1, reg2, dst0;
|
||||
__m256i const_129 = __lasx_xvldi(129);
|
||||
__m256i const_br = {0x1942194219421942, 0x1942194219421942,
|
||||
0x1942194219421942, 0x1942194219421942};
|
||||
__m256i const_1080 = {0x1080108010801080, 0x1080108010801080,
|
||||
0x1080108010801080, 0x1080108010801080};
|
||||
__m256i shuff0 = {0x0B09080605030200, 0x17151412110F0E0C, 0x0B09080605030200,
|
||||
0x17151412110F0E0C};
|
||||
__m256i shuff1 = {0x0301001E1D1B1A18, 0x0F0D0C0A09070604, 0x0301001E1D1B1A18,
|
||||
0x0F0D0C0A09070604};
|
||||
__m256i shuff2 = {0x000A000700040001, 0x001600130010000D, 0x000A000700040001,
|
||||
0x001600130010000D};
|
||||
__m256i shuff3 = {0x0002001F001C0019, 0x000E000B00080005, 0x0002001F001C0019,
|
||||
0x000E000B00080005};
|
||||
|
||||
for (x = 0; x < len; x++) {
|
||||
reg0 = __lasx_xvld(src_raw, 0);
|
||||
reg1 = __lasx_xvld(src_raw, 32);
|
||||
reg2 = __lasx_xvld(src_raw, 64);
|
||||
src0 = __lasx_xvpermi_q(reg1, reg0, 0x30);
|
||||
src1 = __lasx_xvpermi_q(reg2, reg0, 0x21);
|
||||
src2 = __lasx_xvpermi_q(reg2, reg1, 0x30);
|
||||
tmp0 = __lasx_xvshuf_b(src1, src0, shuff0);
|
||||
tmp1 = __lasx_xvshuf_b(src1, src2, shuff1);
|
||||
tmp2 = __lasx_xvshuf_b(src1, src0, shuff2);
|
||||
tmp3 = __lasx_xvshuf_b(src1, src2, shuff3);
|
||||
reg0 = __lasx_xvmaddwev_h_bu(const_1080, tmp2, const_129);
|
||||
reg1 = __lasx_xvmaddwev_h_bu(const_1080, tmp3, const_129);
|
||||
reg0 = __lasx_xvdp2add_h_bu(reg0, const_br, tmp0);
|
||||
reg1 = __lasx_xvdp2add_h_bu(reg1, const_br, tmp1);
|
||||
dst0 = __lasx_xvpickod_b(reg1, reg0);
|
||||
__lasx_xvst(dst0, dst_y, 0);
|
||||
dst_y += 32;
|
||||
src_raw += 96;
|
||||
}
|
||||
}
|
||||
|
||||
void RAWToUVRow_LASX(const uint8_t* src_raw,
|
||||
int src_stride_raw,
|
||||
uint8_t* dst_u,
|
||||
@ -2118,36 +2000,226 @@ void NV21ToARGBRow_LASX(const uint8_t* src_y,
|
||||
}
|
||||
}
|
||||
|
||||
void ARGBToYJRow_LASX(const uint8_t* src_argb, uint8_t* dst_y, int width) {
|
||||
int x;
|
||||
int len = width / 32;
|
||||
__m256i src0, src1, src2, src3, dst0;
|
||||
__m256i tmp0, tmp1, tmp2, tmp3;
|
||||
__m256i reg0, reg1;
|
||||
__m256i const_128 = __lasx_xvldi(0x480);
|
||||
__m256i const_150 = __lasx_xvldi(0x96);
|
||||
__m256i const_br = {0x4D1D4D1D4D1D4D1D, 0x4D1D4D1D4D1D4D1D,
|
||||
0x4D1D4D1D4D1D4D1D, 0x4D1D4D1D4D1D4D1D};
|
||||
__m256i shuff = {0x0000000400000000, 0x0000000500000001, 0x0000000600000002,
|
||||
0x0000000700000003};
|
||||
struct RgbConstants {
|
||||
uint8_t kRGBToY[4];
|
||||
uint16_t kAddY;
|
||||
uint16_t pad;
|
||||
};
|
||||
|
||||
for (x = 0; x < len; x++) {
|
||||
DUP4_ARG2(__lasx_xvld, src_argb, 0, src_argb, 32, src_argb, 64, src_argb,
|
||||
96, src0, src1, src2, src3);
|
||||
tmp0 = __lasx_xvpickev_b(src1, src0);
|
||||
tmp1 = __lasx_xvpickod_b(src1, src0);
|
||||
tmp2 = __lasx_xvpickev_b(src3, src2);
|
||||
tmp3 = __lasx_xvpickod_b(src3, src2);
|
||||
reg0 = __lasx_xvmaddwev_h_bu(const_128, tmp1, const_150);
|
||||
reg1 = __lasx_xvmaddwev_h_bu(const_128, tmp3, const_150);
|
||||
reg0 = __lasx_xvdp2add_h_bu(reg0, const_br, tmp0);
|
||||
reg1 = __lasx_xvdp2add_h_bu(reg1, const_br, tmp2);
|
||||
dst0 = __lasx_xvpickod_b(reg1, reg0);
|
||||
dst0 = __lasx_xvperm_w(dst0, shuff);
|
||||
__lasx_xvst(dst0, dst_y, 0);
|
||||
dst_y += 32;
|
||||
src_argb += 128;
|
||||
}
|
||||
// RGB to JPeg coefficients
|
||||
// B * 0.1140 coefficient = 29
|
||||
// G * 0.5870 coefficient = 150
|
||||
// R * 0.2990 coefficient = 77
|
||||
// Add 0.5 = 0x80
|
||||
static const struct RgbConstants kRgb24JPEGConstants = {{29, 150, 77, 0},
|
||||
128,
|
||||
0};
|
||||
|
||||
static const struct RgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, 128, 0};
|
||||
|
||||
// RGB to BT.601 coefficients
|
||||
// B * 0.1016 coefficient = 25
|
||||
// G * 0.5078 coefficient = 129
|
||||
// R * 0.2578 coefficient = 66
|
||||
// Add 16.5 = 0x1080
|
||||
|
||||
static const struct RgbConstants kRgb24I601Constants = {{25, 129, 66, 0},
|
||||
0x1080,
|
||||
0};
|
||||
|
||||
static const struct RgbConstants kRawI601Constants = {{66, 129, 25, 0},
|
||||
0x1080,
|
||||
0};
|
||||
|
||||
// ARGB expects first 3 values to contain RGB and 4th value is ignored.
|
||||
static void ARGBToYMatrixRow_LASX(const uint8_t* src_argb,
|
||||
uint8_t* dst_y,
|
||||
int width,
|
||||
const struct RgbConstants* rgbconstants) {
|
||||
int32_t shuff[8] = {0, 4, 1, 5, 2, 6, 3, 7};
|
||||
asm volatile(
|
||||
"xvldrepl.b $xr0, %3, 0 \n\t" // load rgbconstants
|
||||
"xvldrepl.b $xr1, %3, 1 \n\t" // load rgbconstants
|
||||
"xvldrepl.b $xr2, %3, 2 \n\t" // load rgbconstants
|
||||
"xvldrepl.h $xr3, %3, 4 \n\t" // load rgbconstants
|
||||
"xvld $xr20, %4, 0 \n\t" // load shuff
|
||||
"1: \n\t"
|
||||
"xvld $xr4, %0, 0 \n\t"
|
||||
"xvld $xr5, %0, 32 \n\t"
|
||||
"xvld $xr6, %0, 64 \n\t"
|
||||
"xvld $xr7, %0, 96 \n\t" // load 32 pixels of ARGB
|
||||
"xvor.v $xr12, $xr3, $xr3 \n\t"
|
||||
"xvor.v $xr13, $xr3, $xr3 \n\t"
|
||||
"addi.d %2, %2, -32 \n\t" // 32 processed per loop.
|
||||
"xvpickev.b $xr8, $xr5, $xr4 \n\t" //BR
|
||||
"xvpickev.b $xr10, $xr7, $xr6 \n\t"
|
||||
"xvpickod.b $xr9, $xr5, $xr4 \n\t" //GA
|
||||
"xvpickod.b $xr11, $xr7, $xr6 \n\t"
|
||||
"xvmaddwev.h.bu $xr12, $xr8, $xr0 \n\t" //B
|
||||
"xvmaddwev.h.bu $xr13, $xr10, $xr0 \n\t"
|
||||
"xvmaddwev.h.bu $xr12, $xr9, $xr1 \n\t" //G
|
||||
"xvmaddwev.h.bu $xr13, $xr11, $xr1 \n\t"
|
||||
"xvmaddwod.h.bu $xr12, $xr8, $xr2 \n\t" //R
|
||||
"xvmaddwod.h.bu $xr13, $xr10, $xr2 \n\t"
|
||||
"addi.d %0, %0, 128 \n\t"
|
||||
"xvpickod.b $xr10, $xr13, $xr12 \n\t"
|
||||
"xvperm.w $xr11, $xr10, $xr20 \n\t"
|
||||
"xvst $xr11, %1, 0 \n\t"
|
||||
"addi.d %1, %1, 32 \n\t"
|
||||
"bnez %2, 1b \n\t"
|
||||
: "+&r"(src_argb), // %0
|
||||
"+&r"(dst_y), // %1
|
||||
"+&r"(width) // %2
|
||||
: "r"(rgbconstants),
|
||||
"r"(shuff)
|
||||
: "memory"
|
||||
);
|
||||
}
|
||||
|
||||
void ARGBToYRow_LASX(const uint8_t* src_argb, uint8_t* dst_y, int width) {
|
||||
ARGBToYMatrixRow_LASX(src_argb, dst_y, width, &kRgb24I601Constants);
|
||||
}
|
||||
|
||||
void ARGBToYJRow_LASX(const uint8_t* src_argb, uint8_t* dst_yj, int width) {
|
||||
ARGBToYMatrixRow_LASX(src_argb, dst_yj, width, &kRgb24JPEGConstants);
|
||||
}
|
||||
|
||||
void ABGRToYRow_LASX(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
|
||||
ARGBToYMatrixRow_LASX(src_abgr, dst_y, width, &kRawI601Constants);
|
||||
}
|
||||
|
||||
void ABGRToYJRow_LASX(const uint8_t* src_abgr, uint8_t* dst_yj, int width) {
|
||||
ARGBToYMatrixRow_LASX(src_abgr, dst_yj, width, &kRawJPEGConstants);
|
||||
}
|
||||
|
||||
// RGBA expects first value to be A and ignored, then 3 values to contain RGB.
|
||||
// Same code as ARGB, except the LD4
|
||||
static void RGBAToYMatrixRow_LASX(const uint8_t* src_rgba,
|
||||
uint8_t* dst_y,
|
||||
int width,
|
||||
const struct RgbConstants* rgbconstants) {
|
||||
int32_t shuff[8] = {0, 4, 1, 5, 2, 6, 3, 7};
|
||||
asm volatile(
|
||||
"xvldrepl.b $xr0, %3, 0 \n\t" // load rgbconstants
|
||||
"xvldrepl.b $xr1, %3, 1 \n\t" // load rgbconstants
|
||||
"xvldrepl.b $xr2, %3, 2 \n\t" // load rgbconstants
|
||||
"xvldrepl.h $xr3, %3, 4 \n\t" // load rgbconstants
|
||||
"xvld $xr20, %4, 0 \n\t" // load shuff
|
||||
"1: \n\t"
|
||||
"xvld $xr4, %0, 0 \n\t"
|
||||
"xvld $xr5, %0, 32 \n\t"
|
||||
"xvld $xr6, %0, 64 \n\t"
|
||||
"xvld $xr7, %0, 96 \n\t" // load 32 pixels of RGBA
|
||||
"xvor.v $xr12, $xr3, $xr3 \n\t"
|
||||
"xvor.v $xr13, $xr3, $xr3 \n\t"
|
||||
"addi.d %2, %2, -32 \n\t" // 32 processed per loop.
|
||||
"xvpickev.b $xr8, $xr5, $xr4 \n\t" //AG
|
||||
"xvpickev.b $xr10, $xr7, $xr6 \n\t"
|
||||
"xvpickod.b $xr9, $xr5, $xr4 \n\t" //BR
|
||||
"xvpickod.b $xr11, $xr7, $xr6 \n\t"
|
||||
"xvmaddwev.h.bu $xr12, $xr9, $xr0 \n\t" //B
|
||||
"xvmaddwev.h.bu $xr13, $xr11, $xr0 \n\t"
|
||||
"xvmaddwod.h.bu $xr12, $xr8, $xr1 \n\t" //G
|
||||
"xvmaddwod.h.bu $xr13, $xr10, $xr1 \n\t"
|
||||
"xvmaddwod.h.bu $xr12, $xr9, $xr2 \n\t" //R
|
||||
"xvmaddwod.h.bu $xr13, $xr11, $xr2 \n\t"
|
||||
"addi.d %0, %0, 128 \n\t"
|
||||
"xvpickod.b $xr10, $xr13, $xr12 \n\t"
|
||||
"xvperm.w $xr11, $xr10, $xr20 \n\t"
|
||||
"xvst $xr11, %1, 0 \n\t"
|
||||
"addi.d %1, %1, 32 \n\t"
|
||||
"bnez %2, 1b \n\t"
|
||||
: "+&r"(src_rgba), // %0
|
||||
"+&r"(dst_y), // %1
|
||||
"+&r"(width) // %2
|
||||
: "r"(rgbconstants),
|
||||
"r"(shuff)
|
||||
: "memory"
|
||||
);
|
||||
}
|
||||
|
||||
void RGBAToYRow_LASX(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
|
||||
RGBAToYMatrixRow_LASX(src_rgba, dst_y, width, &kRgb24I601Constants);
|
||||
}
|
||||
|
||||
void RGBAToYJRow_LASX(const uint8_t* src_rgba, uint8_t* dst_yj, int width) {
|
||||
RGBAToYMatrixRow_LASX(src_rgba, dst_yj, width, &kRgb24JPEGConstants);
|
||||
}
|
||||
|
||||
void BGRAToYRow_LASX(const uint8_t* src_bgra, uint8_t* dst_y, int width) {
|
||||
RGBAToYMatrixRow_LASX(src_bgra, dst_y, width, &kRawI601Constants);
|
||||
}
|
||||
|
||||
static void RGBToYMatrixRow_LASX(const uint8_t* src_rgba,
|
||||
uint8_t* dst_y,
|
||||
int width,
|
||||
const struct RgbConstants* rgbconstants) {
|
||||
int8_t shuff[128] = {0, 2, 3, 5, 6, 8, 9, 11, 12, 14, 15, 17, 18, 20, 21, 23,
|
||||
0, 2, 3, 5, 6, 8, 9, 11, 12, 14, 15, 17, 18, 20, 21, 23,
|
||||
24, 26, 27, 29, 30, 0, 1, 3, 4, 6, 7, 9, 10, 12, 13, 15,
|
||||
24, 26, 27, 29, 30, 0, 1, 3, 4, 6, 7, 9, 10, 12, 13, 15,
|
||||
1, 0, 4, 0, 7, 0, 10, 0, 13, 0, 16, 0, 19, 0, 22, 0,
|
||||
1, 0, 4, 0, 7, 0, 10, 0, 13, 0, 16, 0, 19, 0, 22, 0,
|
||||
25, 0, 28, 0, 31, 0, 2, 0, 5, 0, 8, 0, 11, 0, 14, 0,
|
||||
25, 0, 28, 0, 31, 0, 2, 0, 5, 0, 8, 0, 11, 0, 14, 0};
|
||||
asm volatile(
|
||||
"xvldrepl.b $xr0, %3, 0 \n\t" // load rgbconstants
|
||||
"xvldrepl.b $xr1, %3, 1 \n\t" // load rgbconstants
|
||||
"xvldrepl.b $xr2, %3, 2 \n\t" // load rgbconstants
|
||||
"xvldrepl.h $xr3, %3, 4 \n\t" // load rgbconstants
|
||||
"xvld $xr4, %4, 0 \n\t" // load shuff
|
||||
"xvld $xr5, %4, 32 \n\t"
|
||||
"xvld $xr6, %4, 64 \n\t"
|
||||
"xvld $xr7, %4, 96 \n\t"
|
||||
"1: \n\t"
|
||||
"xvld $xr8, %0, 0 \n\t"
|
||||
"xvld $xr9, %0, 32 \n\t"
|
||||
"xvld $xr10, %0, 64 \n\t" // load 32 pixels of RGB
|
||||
"xvor.v $xr12, $xr3, $xr3 \n\t"
|
||||
"xvor.v $xr13, $xr3, $xr3 \n\t"
|
||||
"xvor.v $xr11, $xr9, $xr9 \n\t"
|
||||
"addi.d %2, %2, -32 \n\t" // 32 processed per loop.
|
||||
"xvpermi.q $xr9, $xr8, 0x30 \n\t" //src0
|
||||
"xvpermi.q $xr8, $xr10, 0x03 \n\t" //src1
|
||||
"xvpermi.q $xr10, $xr11, 0x30 \n\t" //src2
|
||||
"xvshuf.b $xr14, $xr8, $xr9, $xr4 \n\t"
|
||||
"xvshuf.b $xr15, $xr8, $xr10, $xr5 \n\t"
|
||||
"xvshuf.b $xr16, $xr8, $xr9, $xr6 \n\t"
|
||||
"xvshuf.b $xr17, $xr8, $xr10, $xr7 \n\t"
|
||||
"xvmaddwev.h.bu $xr12, $xr16, $xr1 \n\t" //G
|
||||
"xvmaddwev.h.bu $xr13, $xr17, $xr1 \n\t"
|
||||
"xvmaddwev.h.bu $xr12, $xr14, $xr0 \n\t" //B
|
||||
"xvmaddwev.h.bu $xr13, $xr15, $xr0 \n\t"
|
||||
"xvmaddwod.h.bu $xr12, $xr14, $xr2 \n\t" //R
|
||||
"xvmaddwod.h.bu $xr13, $xr15, $xr2 \n\t"
|
||||
"addi.d %0, %0, 96 \n\t"
|
||||
"xvpickod.b $xr10, $xr13, $xr12 \n\t"
|
||||
"xvst $xr10, %1, 0 \n\t"
|
||||
"addi.d %1, %1, 32 \n\t"
|
||||
"bnez %2, 1b \n\t"
|
||||
: "+&r"(src_rgba), // %0
|
||||
"+&r"(dst_y), // %1
|
||||
"+&r"(width) // %2
|
||||
: "r"(rgbconstants), // %3
|
||||
"r"(shuff) // %4
|
||||
: "memory"
|
||||
);
|
||||
}
|
||||
|
||||
void RGB24ToYJRow_LASX(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) {
|
||||
RGBToYMatrixRow_LASX(src_rgb24, dst_yj, width, &kRgb24JPEGConstants);
|
||||
}
|
||||
|
||||
void RAWToYJRow_LASX(const uint8_t* src_raw, uint8_t* dst_yj, int width) {
|
||||
RGBToYMatrixRow_LASX(src_raw, dst_yj, width, &kRawJPEGConstants);
|
||||
}
|
||||
|
||||
void RGB24ToYRow_LASX(const uint8_t* src_rgb24, uint8_t* dst_y, int width) {
|
||||
RGBToYMatrixRow_LASX(src_rgb24, dst_y, width, &kRgb24I601Constants);
|
||||
}
|
||||
|
||||
void RAWToYRow_LASX(const uint8_t* src_raw, uint8_t* dst_y, int width) {
|
||||
RGBToYMatrixRow_LASX(src_raw, dst_y, width, &kRawI601Constants);
|
||||
}
|
||||
|
||||
void ARGBToUVJRow_LASX(const uint8_t* src_argb,
|
||||
|
||||
@ -561,39 +561,6 @@ void RGB565ToUVRow_LSX(const uint8_t* src_rgb565,
|
||||
}
|
||||
}
|
||||
|
||||
void RGB24ToYRow_LSX(const uint8_t* src_rgb24, uint8_t* dst_y, int width) {
|
||||
int x;
|
||||
int len = width / 16;
|
||||
__m128i src0, src1, src2;
|
||||
__m128i tmp0, tmp1, tmp2, tmp3;
|
||||
__m128i reg0, reg1, dst0;
|
||||
__m128i const_129 = __lsx_vldi(129);
|
||||
__m128i const_br = {0x4219421942194219, 0x4219421942194219};
|
||||
__m128i const_1080 = {0x1080108010801080, 0x1080108010801080};
|
||||
__m128i shuff0 = {0x0B09080605030200, 0x17151412110F0E0C};
|
||||
__m128i shuff1 = {0x0301001E1D1B1A18, 0x0F0D0C0A09070604};
|
||||
__m128i shuff2 = {0x000A000700040001, 0x001600130010000D};
|
||||
__m128i shuff3 = {0x0002001F001C0019, 0x000E000B00080005};
|
||||
|
||||
for (x = 0; x < len; x++) {
|
||||
src0 = __lsx_vld(src_rgb24, 0);
|
||||
src1 = __lsx_vld(src_rgb24, 16);
|
||||
src2 = __lsx_vld(src_rgb24, 32);
|
||||
tmp0 = __lsx_vshuf_b(src1, src0, shuff0);
|
||||
tmp1 = __lsx_vshuf_b(src1, src2, shuff1);
|
||||
tmp2 = __lsx_vshuf_b(src1, src0, shuff2);
|
||||
tmp3 = __lsx_vshuf_b(src1, src2, shuff3);
|
||||
reg0 = __lsx_vmaddwev_h_bu(const_1080, tmp2, const_129);
|
||||
reg1 = __lsx_vmaddwev_h_bu(const_1080, tmp3, const_129);
|
||||
reg0 = __lsx_vdp2add_h_bu(reg0, const_br, tmp0);
|
||||
reg1 = __lsx_vdp2add_h_bu(reg1, const_br, tmp1);
|
||||
dst0 = __lsx_vpickod_b(reg1, reg0);
|
||||
__lsx_vst(dst0, dst_y, 0);
|
||||
dst_y += 16;
|
||||
src_rgb24 += 48;
|
||||
}
|
||||
}
|
||||
|
||||
void RGB24ToUVRow_LSX(const uint8_t* src_rgb24,
|
||||
int src_stride_rgb24,
|
||||
uint8_t* dst_u,
|
||||
@ -647,39 +614,6 @@ void RGB24ToUVRow_LSX(const uint8_t* src_rgb24,
|
||||
}
|
||||
}
|
||||
|
||||
void RAWToYRow_LSX(const uint8_t* src_raw, uint8_t* dst_y, int width) {
|
||||
int x;
|
||||
int len = width / 16;
|
||||
__m128i src0, src1, src2;
|
||||
__m128i tmp0, tmp1, tmp2, tmp3;
|
||||
__m128i reg0, reg1, dst0;
|
||||
__m128i const_129 = __lsx_vldi(129);
|
||||
__m128i const_br = {0x1942194219421942, 0x1942194219421942};
|
||||
__m128i const_1080 = {0x1080108010801080, 0x1080108010801080};
|
||||
__m128i shuff0 = {0x0B09080605030200, 0x17151412110F0E0C};
|
||||
__m128i shuff1 = {0x0301001E1D1B1A18, 0x0F0D0C0A09070604};
|
||||
__m128i shuff2 = {0x000A000700040001, 0x001600130010000D};
|
||||
__m128i shuff3 = {0x0002001F001C0019, 0x000E000B00080005};
|
||||
|
||||
for (x = 0; x < len; x++) {
|
||||
src0 = __lsx_vld(src_raw, 0);
|
||||
src1 = __lsx_vld(src_raw, 16);
|
||||
src2 = __lsx_vld(src_raw, 32);
|
||||
tmp0 = __lsx_vshuf_b(src1, src0, shuff0);
|
||||
tmp1 = __lsx_vshuf_b(src1, src2, shuff1);
|
||||
tmp2 = __lsx_vshuf_b(src1, src0, shuff2);
|
||||
tmp3 = __lsx_vshuf_b(src1, src2, shuff3);
|
||||
reg0 = __lsx_vmaddwev_h_bu(const_1080, tmp2, const_129);
|
||||
reg1 = __lsx_vmaddwev_h_bu(const_1080, tmp3, const_129);
|
||||
reg0 = __lsx_vdp2add_h_bu(reg0, const_br, tmp0);
|
||||
reg1 = __lsx_vdp2add_h_bu(reg1, const_br, tmp1);
|
||||
dst0 = __lsx_vsrlni_b_h(reg1, reg0, 8);
|
||||
__lsx_vst(dst0, dst_y, 0);
|
||||
dst_y += 16;
|
||||
src_raw += 48;
|
||||
}
|
||||
}
|
||||
|
||||
void RAWToUVRow_LSX(const uint8_t* src_raw,
|
||||
int src_stride_raw,
|
||||
uint8_t* dst_u,
|
||||
@ -914,62 +848,6 @@ void SobelXYRow_LSX(const uint8_t* src_sobelx,
|
||||
}
|
||||
}
|
||||
|
||||
void ARGBToYJRow_LSX(const uint8_t* src_argb, uint8_t* dst_y, int width) {
|
||||
int x;
|
||||
int len = width / 16;
|
||||
__m128i src0, src1, src2, src3, dst0;
|
||||
__m128i tmp0, tmp1, tmp2, tmp3;
|
||||
__m128i reg0, reg1;
|
||||
__m128i const_128 = __lsx_vldi(0x480);
|
||||
__m128i const_150 = __lsx_vldi(0x96);
|
||||
__m128i const_br = {0x4D1D4D1D4D1D4D1D, 0x4D1D4D1D4D1D4D1D};
|
||||
|
||||
for (x = 0; x < len; x++) {
|
||||
DUP4_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src_argb, 32, src_argb, 48,
|
||||
src0, src1, src2, src3);
|
||||
tmp0 = __lsx_vpickev_b(src1, src0);
|
||||
tmp1 = __lsx_vpickod_b(src1, src0);
|
||||
tmp2 = __lsx_vpickev_b(src3, src2);
|
||||
tmp3 = __lsx_vpickod_b(src3, src2);
|
||||
reg0 = __lsx_vmaddwev_h_bu(const_128, tmp1, const_150);
|
||||
reg1 = __lsx_vmaddwev_h_bu(const_128, tmp3, const_150);
|
||||
reg0 = __lsx_vdp2add_h_bu(reg0, const_br, tmp0);
|
||||
reg1 = __lsx_vdp2add_h_bu(reg1, const_br, tmp2);
|
||||
dst0 = __lsx_vpickod_b(reg1, reg0);
|
||||
__lsx_vst(dst0, dst_y, 0);
|
||||
dst_y += 16;
|
||||
src_argb += 64;
|
||||
}
|
||||
}
|
||||
|
||||
void BGRAToYRow_LSX(const uint8_t* src_bgra, uint8_t* dst_y, int width) {
|
||||
int x;
|
||||
int len = width / 16;
|
||||
__m128i src0, src1, src2, src3, dst0;
|
||||
__m128i tmp0, tmp1, tmp2, tmp3;
|
||||
__m128i reg0, reg1;
|
||||
__m128i const_129 = __lsx_vldi(0x81);
|
||||
__m128i const_br = {0x1942194219421942, 0x1942194219421942};
|
||||
__m128i const_1080 = {0x1080108010801080, 0x1080108010801080};
|
||||
|
||||
for (x = 0; x < len; x++) {
|
||||
DUP4_ARG2(__lsx_vld, src_bgra, 0, src_bgra, 16, src_bgra, 32, src_bgra, 48,
|
||||
src0, src1, src2, src3);
|
||||
tmp0 = __lsx_vpickod_b(src1, src0);
|
||||
tmp1 = __lsx_vpickev_b(src1, src0);
|
||||
tmp2 = __lsx_vpickod_b(src3, src2);
|
||||
tmp3 = __lsx_vpickev_b(src3, src2);
|
||||
reg0 = __lsx_vmaddwod_h_bu(const_1080, tmp1, const_129);
|
||||
reg1 = __lsx_vmaddwod_h_bu(const_1080, tmp3, const_129);
|
||||
reg0 = __lsx_vdp2add_h_bu(reg0, const_br, tmp0);
|
||||
reg1 = __lsx_vdp2add_h_bu(reg1, const_br, tmp2);
|
||||
dst0 = __lsx_vsrlni_b_h(reg1, reg0, 8);
|
||||
__lsx_vst(dst0, dst_y, 0);
|
||||
dst_y += 16;
|
||||
src_bgra += 64;
|
||||
}
|
||||
}
|
||||
|
||||
void BGRAToUVRow_LSX(const uint8_t* src_bgra,
|
||||
int src_stride_bgra,
|
||||
uint8_t* dst_u,
|
||||
@ -1018,34 +896,6 @@ void BGRAToUVRow_LSX(const uint8_t* src_bgra,
|
||||
}
|
||||
}
|
||||
|
||||
void ABGRToYRow_LSX(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
|
||||
int x;
|
||||
int len = width / 16;
|
||||
__m128i src0, src1, src2, src3, dst0;
|
||||
__m128i tmp0, tmp1, tmp2, tmp3;
|
||||
__m128i reg0, reg1;
|
||||
__m128i const_129 = __lsx_vldi(0x81);
|
||||
__m128i const_br = {0x1942194219421942, 0x1942194219421942};
|
||||
__m128i const_1080 = {0x1080108010801080, 0x1080108010801080};
|
||||
|
||||
for (x = 0; x < len; x++) {
|
||||
DUP4_ARG2(__lsx_vld, src_abgr, 0, src_abgr, 16, src_abgr, 32, src_abgr, 48,
|
||||
src0, src1, src2, src3);
|
||||
tmp0 = __lsx_vpickev_b(src1, src0);
|
||||
tmp1 = __lsx_vpickod_b(src1, src0);
|
||||
tmp2 = __lsx_vpickev_b(src3, src2);
|
||||
tmp3 = __lsx_vpickod_b(src3, src2);
|
||||
reg0 = __lsx_vmaddwev_h_bu(const_1080, tmp1, const_129);
|
||||
reg1 = __lsx_vmaddwev_h_bu(const_1080, tmp3, const_129);
|
||||
reg0 = __lsx_vdp2add_h_bu(reg0, const_br, tmp0);
|
||||
reg1 = __lsx_vdp2add_h_bu(reg1, const_br, tmp2);
|
||||
dst0 = __lsx_vsrlni_b_h(reg1, reg0, 8);
|
||||
__lsx_vst(dst0, dst_y, 0);
|
||||
dst_y += 16;
|
||||
src_abgr += 64;
|
||||
}
|
||||
}
|
||||
|
||||
void ABGRToUVRow_LSX(const uint8_t* src_abgr,
|
||||
int src_stride_abgr,
|
||||
uint8_t* dst_u,
|
||||
@ -1094,34 +944,6 @@ void ABGRToUVRow_LSX(const uint8_t* src_abgr,
|
||||
}
|
||||
}
|
||||
|
||||
void RGBAToYRow_LSX(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
|
||||
int x;
|
||||
int len = width / 16;
|
||||
__m128i src0, src1, src2, src3, dst0;
|
||||
__m128i tmp0, tmp1, tmp2, tmp3;
|
||||
__m128i reg0, reg1;
|
||||
__m128i const_129 = __lsx_vldi(0x81);
|
||||
__m128i const_br = {0x4219421942194219, 0x4219421942194219};
|
||||
__m128i const_1080 = {0x1080108010801080, 0x1080108010801080};
|
||||
|
||||
for (x = 0; x < len; x++) {
|
||||
DUP4_ARG2(__lsx_vld, src_rgba, 0, src_rgba, 16, src_rgba, 32, src_rgba, 48,
|
||||
src0, src1, src2, src3);
|
||||
tmp0 = __lsx_vpickod_b(src1, src0);
|
||||
tmp1 = __lsx_vpickev_b(src1, src0);
|
||||
tmp2 = __lsx_vpickod_b(src3, src2);
|
||||
tmp3 = __lsx_vpickev_b(src3, src2);
|
||||
reg0 = __lsx_vmaddwod_h_bu(const_1080, tmp1, const_129);
|
||||
reg1 = __lsx_vmaddwod_h_bu(const_1080, tmp3, const_129);
|
||||
reg0 = __lsx_vdp2add_h_bu(reg0, const_br, tmp0);
|
||||
reg1 = __lsx_vdp2add_h_bu(reg1, const_br, tmp2);
|
||||
dst0 = __lsx_vsrlni_b_h(reg1, reg0, 8);
|
||||
__lsx_vst(dst0, dst_y, 0);
|
||||
dst_y += 16;
|
||||
src_rgba += 64;
|
||||
}
|
||||
}
|
||||
|
||||
void RGBAToUVRow_LSX(const uint8_t* src_rgba,
|
||||
int src_stride_rgba,
|
||||
uint8_t* dst_u,
|
||||
@ -1821,6 +1643,212 @@ void HalfFloatRow_LSX(const uint16_t* src,
|
||||
}
|
||||
}
|
||||
|
||||
struct RgbConstants {
|
||||
uint8_t kRGBToY[4];
|
||||
uint16_t kAddY;
|
||||
uint16_t pad;
|
||||
};
|
||||
|
||||
// RGB to JPeg coefficients
|
||||
// B * 0.1140 coefficient = 29
|
||||
// G * 0.5870 coefficient = 150
|
||||
// R * 0.2990 coefficient = 77
|
||||
// Add 0.5 = 0x80
|
||||
static const struct RgbConstants kRgb24JPEGConstants = {{29, 150, 77, 0},
|
||||
128,
|
||||
0};
|
||||
|
||||
static const struct RgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, 128, 0};
|
||||
|
||||
// RGB to BT.601 coefficients
|
||||
// B * 0.1016 coefficient = 25
|
||||
// G * 0.5078 coefficient = 129
|
||||
// R * 0.2578 coefficient = 66
|
||||
// Add 16.5 = 0x1080
|
||||
|
||||
static const struct RgbConstants kRgb24I601Constants = {{25, 129, 66, 0},
|
||||
0x1080,
|
||||
0};
|
||||
|
||||
static const struct RgbConstants kRawI601Constants = {{66, 129, 25, 0},
|
||||
0x1080,
|
||||
0};
|
||||
|
||||
// ARGB expects first 3 values to contain RGB and 4th value is ignored.
|
||||
static void ARGBToYMatrixRow_LSX(const uint8_t* src_argb,
|
||||
uint8_t* dst_y,
|
||||
int width,
|
||||
const struct RgbConstants* rgbconstants) {
|
||||
asm volatile(
|
||||
"vldrepl.b $vr0, %3, 0 \n\t" // load rgbconstants
|
||||
"vldrepl.b $vr1, %3, 1 \n\t" // load rgbconstants
|
||||
"vldrepl.b $vr2, %3, 2 \n\t" // load rgbconstants
|
||||
"vldrepl.h $vr3, %3, 4 \n\t" // load rgbconstants
|
||||
"1: \n\t"
|
||||
"vld $vr4, %0, 0 \n\t"
|
||||
"vld $vr5, %0, 16 \n\t"
|
||||
"vld $vr6, %0, 32 \n\t"
|
||||
"vld $vr7, %0, 48 \n\t" // load 16 pixels of ARGB
|
||||
"vor.v $vr12, $vr3, $vr3 \n\t"
|
||||
"vor.v $vr13, $vr3, $vr3 \n\t"
|
||||
"addi.d %2, %2, -16 \n\t" // 16 processed per loop.
|
||||
"vpickev.b $vr8, $vr5, $vr4 \n\t" //BR
|
||||
"vpickev.b $vr10, $vr7, $vr6 \n\t"
|
||||
"vpickod.b $vr9, $vr5, $vr4 \n\t" //GA
|
||||
"vpickod.b $vr11, $vr7, $vr6 \n\t"
|
||||
"vmaddwev.h.bu $vr12, $vr8, $vr0 \n\t" //B
|
||||
"vmaddwev.h.bu $vr13, $vr10, $vr0 \n\t"
|
||||
"vmaddwev.h.bu $vr12, $vr9, $vr1 \n\t" //G
|
||||
"vmaddwev.h.bu $vr13, $vr11, $vr1 \n\t"
|
||||
"vmaddwod.h.bu $vr12, $vr8, $vr2 \n\t" //R
|
||||
"vmaddwod.h.bu $vr13, $vr10, $vr2 \n\t"
|
||||
"addi.d %0, %0, 64 \n\t"
|
||||
"vpickod.b $vr10, $vr13, $vr12 \n\t"
|
||||
"vst $vr10, %1, 0 \n\t"
|
||||
"addi.d %1, %1, 16 \n\t"
|
||||
"bnez %2, 1b \n\t"
|
||||
: "+&r"(src_argb), // %0
|
||||
"+&r"(dst_y), // %1
|
||||
"+&r"(width) // %2
|
||||
: "r"(rgbconstants)
|
||||
: "memory"
|
||||
);
|
||||
}
|
||||
|
||||
void ARGBToYRow_LSX(const uint8_t* src_argb, uint8_t* dst_y, int width) {
|
||||
ARGBToYMatrixRow_LSX(src_argb, dst_y, width, &kRgb24I601Constants);
|
||||
}
|
||||
|
||||
void ARGBToYJRow_LSX(const uint8_t* src_argb, uint8_t* dst_yj, int width) {
|
||||
ARGBToYMatrixRow_LSX(src_argb, dst_yj, width, &kRgb24JPEGConstants);
|
||||
}
|
||||
|
||||
void ABGRToYRow_LSX(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
|
||||
ARGBToYMatrixRow_LSX(src_abgr, dst_y, width, &kRawI601Constants);
|
||||
}
|
||||
|
||||
void ABGRToYJRow_LSX(const uint8_t* src_abgr, uint8_t* dst_yj, int width) {
|
||||
ARGBToYMatrixRow_LSX(src_abgr, dst_yj, width, &kRawJPEGConstants);
|
||||
}
|
||||
|
||||
// RGBA expects first value to be A and ignored, then 3 values to contain RGB.
|
||||
// Same code as ARGB, except the LD4
|
||||
static void RGBAToYMatrixRow_LSX(const uint8_t* src_rgba,
|
||||
uint8_t* dst_y,
|
||||
int width,
|
||||
const struct RgbConstants* rgbconstants) {
|
||||
asm volatile(
|
||||
"vldrepl.b $vr0, %3, 0 \n\t" // load rgbconstants
|
||||
"vldrepl.b $vr1, %3, 1 \n\t" // load rgbconstants
|
||||
"vldrepl.b $vr2, %3, 2 \n\t" // load rgbconstants
|
||||
"vldrepl.h $vr3, %3, 4 \n\t" // load rgbconstants
|
||||
"1: \n\t"
|
||||
"vld $vr4, %0, 0 \n\t"
|
||||
"vld $vr5, %0, 16 \n\t"
|
||||
"vld $vr6, %0, 32 \n\t"
|
||||
"vld $vr7, %0, 48 \n\t" // load 16 pixels of RGBA
|
||||
"vor.v $vr12, $vr3, $vr3 \n\t"
|
||||
"vor.v $vr13, $vr3, $vr3 \n\t"
|
||||
"addi.d %2, %2, -16 \n\t" // 16 processed per loop.
|
||||
"vpickev.b $vr8, $vr5, $vr4 \n\t" //AG
|
||||
"vpickev.b $vr10, $vr7, $vr6 \n\t"
|
||||
"vpickod.b $vr9, $vr5, $vr4 \n\t" //BR
|
||||
"vpickod.b $vr11, $vr7, $vr6 \n\t"
|
||||
"vmaddwev.h.bu $vr12, $vr9, $vr0 \n\t" //B
|
||||
"vmaddwev.h.bu $vr13, $vr11, $vr0 \n\t"
|
||||
"vmaddwod.h.bu $vr12, $vr8, $vr1 \n\t" //G
|
||||
"vmaddwod.h.bu $vr13, $vr10, $vr1 \n\t"
|
||||
"vmaddwod.h.bu $vr12, $vr9, $vr2 \n\t" //R
|
||||
"vmaddwod.h.bu $vr13, $vr11, $vr2 \n\t"
|
||||
"addi.d %0, %0, 64 \n\t"
|
||||
"vpickod.b $vr10, $vr13, $vr12 \n\t"
|
||||
"vst $vr10, %1, 0 \n\t"
|
||||
"addi.d %1, %1, 16 \n\t"
|
||||
"bnez %2, 1b \n\t"
|
||||
: "+&r"(src_rgba), // %0
|
||||
"+&r"(dst_y), // %1
|
||||
"+&r"(width) // %2
|
||||
: "r"(rgbconstants)
|
||||
: "memory"
|
||||
);
|
||||
}
|
||||
|
||||
void RGBAToYRow_LSX(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
|
||||
RGBAToYMatrixRow_LSX(src_rgba, dst_y, width, &kRgb24I601Constants);
|
||||
}
|
||||
|
||||
void RGBAToYJRow_LSX(const uint8_t* src_rgba, uint8_t* dst_yj, int width) {
|
||||
RGBAToYMatrixRow_LSX(src_rgba, dst_yj, width, &kRgb24JPEGConstants);
|
||||
}
|
||||
|
||||
void BGRAToYRow_LSX(const uint8_t* src_bgra, uint8_t* dst_y, int width) {
|
||||
RGBAToYMatrixRow_LSX(src_bgra, dst_y, width, &kRawI601Constants);
|
||||
}
|
||||
|
||||
static void RGBToYMatrixRow_LSX(const uint8_t* src_rgba,
|
||||
uint8_t* dst_y,
|
||||
int width,
|
||||
const struct RgbConstants* rgbconstants) {
|
||||
int8_t shuff[64] = {0, 2, 3, 5, 6, 8, 9, 11, 12, 14, 15, 17, 18, 20, 21, 23,
|
||||
24, 26, 27, 29, 30, 0, 1, 3, 4, 6, 7, 9, 10, 12, 13, 15,
|
||||
1, 0, 4, 0, 7, 0, 10, 0, 13, 0, 16, 0, 19, 0, 22, 0,
|
||||
25, 0, 28, 0, 31, 0, 2, 0, 5, 0, 8, 0, 11, 0, 14, 0};
|
||||
asm volatile(
|
||||
"vldrepl.b $vr0, %3, 0 \n\t" // load rgbconstants
|
||||
"vldrepl.b $vr1, %3, 1 \n\t" // load rgbconstants
|
||||
"vldrepl.b $vr2, %3, 2 \n\t" // load rgbconstants
|
||||
"vldrepl.h $vr3, %3, 4 \n\t" // load rgbconstants
|
||||
"vld $vr4, %4, 0 \n\t" // load shuff
|
||||
"vld $vr5, %4, 16 \n\t"
|
||||
"vld $vr6, %4, 32 \n\t"
|
||||
"vld $vr7, %4, 48 \n\t"
|
||||
"1: \n\t"
|
||||
"vld $vr8, %0, 0 \n\t"
|
||||
"vld $vr9, %0, 16 \n\t"
|
||||
"vld $vr10, %0, 32 \n\t" // load 16 pixels of RGB
|
||||
"vor.v $vr12, $vr3, $vr3 \n\t"
|
||||
"vor.v $vr13, $vr3, $vr3 \n\t"
|
||||
"addi.d %2, %2, -16 \n\t" // 16 processed per loop.
|
||||
"vshuf.b $vr14, $vr9, $vr8, $vr4 \n\t"
|
||||
"vshuf.b $vr15, $vr9, $vr10, $vr5 \n\t"
|
||||
"vshuf.b $vr16, $vr9, $vr8, $vr6 \n\t"
|
||||
"vshuf.b $vr17, $vr9, $vr10, $vr7 \n\t"
|
||||
"vmaddwev.h.bu $vr12, $vr16, $vr1 \n\t" //G
|
||||
"vmaddwev.h.bu $vr13, $vr17, $vr1 \n\t"
|
||||
"vmaddwev.h.bu $vr12, $vr14, $vr0 \n\t" //B
|
||||
"vmaddwev.h.bu $vr13, $vr15, $vr0 \n\t"
|
||||
"vmaddwod.h.bu $vr12, $vr14, $vr2 \n\t" //R
|
||||
"vmaddwod.h.bu $vr13, $vr15, $vr2 \n\t"
|
||||
"addi.d %0, %0, 48 \n\t"
|
||||
"vpickod.b $vr10, $vr13, $vr12 \n\t"
|
||||
"vst $vr10, %1, 0 \n\t"
|
||||
"addi.d %1, %1, 16 \n\t"
|
||||
"bnez %2, 1b \n\t"
|
||||
: "+&r"(src_rgba), // %0
|
||||
"+&r"(dst_y), // %1
|
||||
"+&r"(width) // %2
|
||||
: "r"(rgbconstants), // %3
|
||||
"r"(shuff) // %4
|
||||
: "memory"
|
||||
);
|
||||
}
|
||||
|
||||
void RGB24ToYJRow_LSX(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) {
|
||||
RGBToYMatrixRow_LSX(src_rgb24, dst_yj, width, &kRgb24JPEGConstants);
|
||||
}
|
||||
|
||||
void RAWToYJRow_LSX(const uint8_t* src_raw, uint8_t* dst_yj, int width) {
|
||||
RGBToYMatrixRow_LSX(src_raw, dst_yj, width, &kRawJPEGConstants);
|
||||
}
|
||||
|
||||
void RGB24ToYRow_LSX(const uint8_t* src_rgb24, uint8_t* dst_y, int width) {
|
||||
RGBToYMatrixRow_LSX(src_rgb24, dst_y, width, &kRgb24I601Constants);
|
||||
}
|
||||
|
||||
void RAWToYRow_LSX(const uint8_t* src_raw, uint8_t* dst_y, int width) {
|
||||
RGBToYMatrixRow_LSX(src_raw, dst_y, width, &kRawI601Constants);
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user