From e8af6cb2e4e6f2c3a1936cdee2e97f4e39f416f6 Mon Sep 17 00:00:00 2001 From: Darren Hsieh Date: Sat, 1 Apr 2023 07:28:00 -0700 Subject: [PATCH] Add RAWToARGBRow_RVV,RAWToRGBARow_RVV,RAWToRGB24Row_RVV * Run on SiFive internal FPGA: RAWToARGB_Opt (~2x vs scalar) RAWToRGBA_Opt (~2x vs scalar) RAWToRGB24_Opt (~1.5x vs scalar) LIBYUV_WIDTH=1280 LIBYUV_HEIGHT=720 LIBYUV_REPEAT=10 Change-Id: I21a13d646589ea2aa3822cb9225f5191068c285b Signed-off-by: Darren Hsieh Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/4408357 Reviewed-by: Frank Barchard Commit-Queue: Frank Barchard --- README.chromium | 2 +- README.md | 1 + include/libyuv/row.h | 9 +++++ include/libyuv/version.h | 2 +- source/convert_argb.cc | 10 +++++ source/planar_functions.cc | 5 +++ source/row_rvv.cc | 75 ++++++++++++++++++++++++++++++++++++++ 7 files changed, 102 insertions(+), 2 deletions(-) create mode 100644 source/row_rvv.cc diff --git a/README.chromium b/README.chromium index a73cecf87..16398820f 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1864 +Version: 1865 License: BSD License File: LICENSE diff --git a/README.md b/README.md index db70b7f08..95eeb04c8 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,7 @@ * Optimized for SSSE3/AVX2 on x86/x64. * Optimized for Neon on Arm. * Optimized for MSA on Mips. +* Optimized for RVV on RISC-V. ### Development diff --git a/include/libyuv/row.h b/include/libyuv/row.h index ff6ffe47c..08004c0cc 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -757,6 +757,12 @@ extern "C" { #define HAS_RAWTOYJROW_LASX #endif +#if !defined(LIBYUV_DISABLE_RVV) && defined(__riscv) +#define HAS_RAWTOARGBROW_RVV +#define HAS_RAWTORGBAROW_RVV +#define HAS_RAWTORGB24ROW_RVV +#endif + #if defined(_MSC_VER) && !defined(__CLR_VER) && !defined(__clang__) #if defined(VISUALC_HAS_AVX2) #define SIMD_ALIGNED(var) __declspec(align(32)) var @@ -2960,9 +2966,12 @@ void RAWToRGBARow_NEON(const uint8_t* src_raw, uint8_t* dst_rgba, int width); void RAWToARGBRow_MSA(const uint8_t* src_raw, uint8_t* dst_argb, int width); void RAWToARGBRow_LSX(const uint8_t* src_raw, uint8_t* dst_argb, int width); void RAWToARGBRow_LASX(const uint8_t* src_raw, uint8_t* dst_argb, int width); +void RAWToARGBRow_RVV(const uint8_t* src_raw, uint8_t* dst_argb, int width); +void RAWToRGBARow_RVV(const uint8_t* src_raw, uint8_t* dst_rgba, int width); void RAWToRGB24Row_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width); void RAWToRGB24Row_MSA(const uint8_t* src_raw, uint8_t* dst_rgb24, int width); void RAWToRGB24Row_LSX(const uint8_t* src_raw, uint8_t* dst_rgb24, int width); +void RAWToRGB24Row_RVV(const uint8_t* src_raw, uint8_t* dst_rgb24, int width); void RGB565ToARGBRow_NEON(const uint8_t* src_rgb565, uint8_t* dst_argb, int width); diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 56c41fe5b..c2b342ef9 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1864 +#define LIBYUV_VERSION 1865 #endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/source/convert_argb.cc b/source/convert_argb.cc index 64425c596..e25ecefa9 100644 --- a/source/convert_argb.cc +++ b/source/convert_argb.cc @@ -3124,6 +3124,11 @@ int RAWToARGB(const uint8_t* src_raw, } } #endif +#if defined(HAS_RAWTOARGBROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + RAWToARGBRow = RAWToARGBRow_RVV; + } +#endif for (y = 0; y < height; ++y) { RAWToARGBRow(src_raw, dst_argb, width); @@ -3175,6 +3180,11 @@ int RAWToRGBA(const uint8_t* src_raw, } } #endif +#if defined(HAS_RAWTORGBAROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + RAWToRGBARow = RAWToRGBARow_RVV; + } +#endif for (y = 0; y < height; ++y) { RAWToRGBARow(src_raw, dst_rgba, width); diff --git a/source/planar_functions.cc b/source/planar_functions.cc index e3452f58e..b5a2e1a03 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -3234,6 +3234,11 @@ int RAWToRGB24(const uint8_t* src_raw, } } #endif +#if defined(HAS_RAWTORGB24ROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + RAWToRGB24Row = RAWToRGB24Row_RVV; + } +#endif for (y = 0; y < height; ++y) { RAWToRGB24Row(src_raw, dst_rgb24, width); diff --git a/source/row_rvv.cc b/source/row_rvv.cc new file mode 100644 index 000000000..0f264d349 --- /dev/null +++ b/source/row_rvv.cc @@ -0,0 +1,75 @@ +/* + * Copyright 2023 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * Copyright (c) 2023 SiFive, Inc. All rights reserved. + * + * Contributed by Darren Hsieh + * + */ + +#include + +#include "libyuv/row.h" + +#if !defined(LIBYUV_DISABLE_RVV) && defined(__riscv) +#include + +#ifdef __cplusplus +namespace libyuv { +extern "C" { +#endif + +void RAWToARGBRow_RVV(const uint8_t* src_raw, uint8_t* dst_argb, int width) { + size_t vl = __riscv_vsetvl_e8m2(width); + vuint8m2_t v_a = __riscv_vmv_v_x_u8m2(255u, vl); + while (width > 0) { + vuint8m2_t v_b, v_g, v_r; + vl = __riscv_vsetvl_e8m2(width); + __riscv_vlseg3e8_v_u8m2(&v_r, &v_g, &v_b, src_raw, vl); + __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl); + width -= vl; + src_raw += (3 * vl); + dst_argb += (4 * vl); + } +} + +void RAWToRGBARow_RVV(const uint8_t* src_raw, uint8_t* dst_rgba, int width) { + size_t vl = __riscv_vsetvl_e8m2(width); + vuint8m2_t v_a = __riscv_vmv_v_x_u8m2(255u, vl); + while (width > 0) { + vuint8m2_t v_b, v_g, v_r; + vl = __riscv_vsetvl_e8m2(width); + __riscv_vlseg3e8_v_u8m2(&v_r, &v_g, &v_b, src_raw, vl); + __riscv_vsseg4e8_v_u8m2(dst_rgba, v_a, v_b, v_g, v_r, vl); + width -= vl; + src_raw += (3 * vl); + dst_rgba += (4 * vl); + } +} + +void RAWToRGB24Row_RVV(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) { + while (width > 0) { + vuint8m2_t v_b, v_g, v_r; + size_t vl = __riscv_vsetvl_e8m2(width); + __riscv_vlseg3e8_v_u8m2(&v_b, &v_g, &v_r, src_raw, vl); + __riscv_vsseg3e8_v_u8m2(dst_rgb24, v_r, v_g, v_b, vl); + width -= vl; + src_raw += (3 * vl); + dst_rgb24 += (3 * vl); + } +} + +#ifdef __cplusplus +} // extern "C" +} // namespace libyuv +#endif + +#endif // !defined(LIBYUV_DISABLE_RVV) && defined(__riscv)