mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 01:06:46 +08:00
Optimized AR64/AB64 <-> ARGB with RVV
* Run on SiFive internal FPGA: ARGBToAR64_Opt (~13.7x vs scalar) ARGBToAB64_Opt (~5.81x vs scalar) AR64ToARGB_Opt (~15.8x vs scalar) AB64ToARGB_Opt (~2.40x vs scalar) LIBYUV_WIDTH=1280 LIBYUV_HEIGHT=720 LIBYUV_REPEAT=10 Bug: libyuv:956 Change-Id: Ida642a5077f59d25fb7c5328f671956b2293dadd Signed-off-by: Bruce Lai <bruce.lai@sifive.com> Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/4442913 Reviewed-by: Frank Barchard <fbarchard@chromium.org> Commit-Queue: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
parent
77c2121f7e
commit
1330a79e9f
@ -758,6 +758,10 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
#if !defined(LIBYUV_DISABLE_RVV) && defined(__riscv)
|
||||
#define HAS_AB64TOARGBROW_RVV
|
||||
#define HAS_AR64TOARGBROW_RVV
|
||||
#define HAS_ARGBTOAB64ROW_RVV
|
||||
#define HAS_ARGBTOAR64ROW_RVV
|
||||
#define HAS_ARGBTORAWROW_RVV
|
||||
#define HAS_ARGBTORGB24ROW_RVV
|
||||
#define HAS_RAWTOARGBROW_RVV
|
||||
@ -3241,6 +3245,10 @@ void ARGBToAR64Row_NEON(const uint8_t* src_argb, uint16_t* dst_ar64, int width);
|
||||
void ARGBToAB64Row_NEON(const uint8_t* src_argb, uint16_t* dst_ab64, int width);
|
||||
void AR64ToARGBRow_NEON(const uint16_t* src_ar64, uint8_t* dst_argb, int width);
|
||||
void AB64ToARGBRow_NEON(const uint16_t* src_ab64, uint8_t* dst_argb, int width);
|
||||
void ARGBToAR64Row_RVV(const uint8_t* src_argb, uint16_t* dst_ar64, int width);
|
||||
void ARGBToAB64Row_RVV(const uint8_t* src_argb, uint16_t* dst_ab64, int width);
|
||||
void AR64ToARGBRow_RVV(const uint16_t* src_ar64, uint8_t* dst_argb, int width);
|
||||
void AB64ToARGBRow_RVV(const uint16_t* src_ab64, uint8_t* dst_argb, int width);
|
||||
void ARGBToAR64Row_Any_SSSE3(const uint8_t* src_ptr,
|
||||
uint16_t* dst_ptr,
|
||||
int width);
|
||||
|
||||
@ -3594,6 +3594,11 @@ int AR64ToARGB(const uint16_t* src_ar64,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_AR64TOARGBROW_RVV)
|
||||
if (TestCpuFlag(kCpuHasRVV)) {
|
||||
AR64ToARGBRow = AR64ToARGBRow_RVV;
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
AR64ToARGBRow(src_ar64, dst_argb, width);
|
||||
@ -3653,6 +3658,11 @@ int AB64ToARGB(const uint16_t* src_ab64,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_AB64TOARGBROW_RVV)
|
||||
if (TestCpuFlag(kCpuHasRVV)) {
|
||||
AB64ToARGBRow = AB64ToARGBRow_RVV;
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
AB64ToARGBRow(src_ab64, dst_argb, width);
|
||||
|
||||
@ -2751,6 +2751,11 @@ int ARGBToAR64(const uint8_t* src_argb,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOAR64ROW_RVV)
|
||||
if (TestCpuFlag(kCpuHasRVV)) {
|
||||
ARGBToAR64Row = ARGBToAR64Row_RVV;
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
ARGBToAR64Row(src_argb, dst_ar64, width);
|
||||
@ -2810,6 +2815,11 @@ int ARGBToAB64(const uint8_t* src_argb,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOAB64ROW_RVV)
|
||||
if (TestCpuFlag(kCpuHasRVV)) {
|
||||
ARGBToAB64Row = ARGBToAB64Row_RVV;
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
ARGBToAB64Row(src_argb, dst_ab64, width);
|
||||
|
||||
@ -12,7 +12,7 @@
|
||||
* Copyright (c) 2023 SiFive, Inc. All rights reserved.
|
||||
*
|
||||
* Contributed by Darren Hsieh <darren.hsieh@sifive.com>
|
||||
*
|
||||
* Contributed by Bruce Lai <bruce.lai@sifive.com>
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
@ -27,6 +27,77 @@ namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void ARGBToAR64Row_RVV(const uint8_t* src_argb, uint16_t* dst_ar64, int width) {
|
||||
size_t avl = (size_t)4 * width;
|
||||
do {
|
||||
vuint16m8_t v_ar64;
|
||||
vuint8m4_t v_argb;
|
||||
size_t vl = __riscv_vsetvl_e8m4(avl);
|
||||
v_argb = __riscv_vle8_v_u8m4(src_argb, vl);
|
||||
v_ar64 = __riscv_vwaddu_vx_u16m8(v_argb, 0, vl);
|
||||
v_ar64 = __riscv_vmul_vx_u16m8(v_ar64, 0x0101, vl);
|
||||
__riscv_vse16_v_u16m8(dst_ar64, v_ar64, vl);
|
||||
avl -= vl;
|
||||
src_argb += vl;
|
||||
dst_ar64 += vl;
|
||||
} while (avl > 0);
|
||||
}
|
||||
|
||||
void ARGBToAB64Row_RVV(const uint8_t* src_argb, uint16_t* dst_ab64, int width) {
|
||||
size_t avl = (size_t)width;
|
||||
do {
|
||||
vuint16m2_t v_b_16, v_g_16, v_r_16, v_a_16;
|
||||
vuint8m1_t v_b, v_g, v_r, v_a;
|
||||
size_t vl = __riscv_vsetvl_e8m1(avl);
|
||||
__riscv_vlseg4e8_v_u8m1(&v_b, &v_g, &v_r, &v_a, src_argb, vl);
|
||||
v_b_16 = __riscv_vwaddu_vx_u16m2(v_b, 0, vl);
|
||||
v_g_16 = __riscv_vwaddu_vx_u16m2(v_g, 0, vl);
|
||||
v_r_16 = __riscv_vwaddu_vx_u16m2(v_r, 0, vl);
|
||||
v_a_16 = __riscv_vwaddu_vx_u16m2(v_a, 0, vl);
|
||||
v_b_16 = __riscv_vmul_vx_u16m2(v_b_16, 0x0101, vl);
|
||||
v_g_16 = __riscv_vmul_vx_u16m2(v_g_16, 0x0101, vl);
|
||||
v_r_16 = __riscv_vmul_vx_u16m2(v_r_16, 0x0101, vl);
|
||||
v_a_16 = __riscv_vmul_vx_u16m2(v_a_16, 0x0101, vl);
|
||||
__riscv_vsseg4e16_v_u16m2(dst_ab64, v_r_16, v_g_16, v_b_16, v_a_16, vl);
|
||||
avl -= vl;
|
||||
src_argb += 4 * vl;
|
||||
dst_ab64 += 4 * vl;
|
||||
} while (avl > 0);
|
||||
}
|
||||
|
||||
void AR64ToARGBRow_RVV(const uint16_t* src_ar64, uint8_t* dst_argb, int width) {
|
||||
size_t avl = (size_t)4 * width;
|
||||
do {
|
||||
vuint16m8_t v_ar64;
|
||||
vuint8m4_t v_argb;
|
||||
size_t vl = __riscv_vsetvl_e16m8(avl);
|
||||
v_ar64 = __riscv_vle16_v_u16m8(src_ar64, vl);
|
||||
v_argb = __riscv_vnsrl_wx_u8m4(v_ar64, 8, vl);
|
||||
__riscv_vse8_v_u8m4(dst_argb, v_argb, vl);
|
||||
avl -= vl;
|
||||
src_ar64 += vl;
|
||||
dst_argb += vl;
|
||||
} while (avl > 0);
|
||||
}
|
||||
|
||||
void AB64ToARGBRow_RVV(const uint16_t* src_ab64, uint8_t* dst_argb, int width) {
|
||||
size_t avl = (size_t)width;
|
||||
do {
|
||||
vuint16m2_t v_b_16, v_g_16, v_r_16, v_a_16;
|
||||
vuint8m1_t v_b, v_g, v_r, v_a;
|
||||
size_t vl = __riscv_vsetvl_e16m2(avl);
|
||||
__riscv_vlseg4e16_v_u16m2(&v_r_16, &v_g_16, &v_b_16, &v_a_16, src_ab64, vl);
|
||||
v_b = __riscv_vnsrl_wx_u8m1(v_b_16, 8, vl);
|
||||
v_g = __riscv_vnsrl_wx_u8m1(v_g_16, 8, vl);
|
||||
v_r = __riscv_vnsrl_wx_u8m1(v_r_16, 8, vl);
|
||||
v_a = __riscv_vnsrl_wx_u8m1(v_a_16, 8, vl);
|
||||
__riscv_vsseg4e8_v_u8m1(dst_argb, v_b, v_g, v_r, v_a, vl);
|
||||
avl -= vl;
|
||||
src_ab64 += 4 * vl;
|
||||
dst_argb += 4 * vl;
|
||||
} while (avl > 0);
|
||||
}
|
||||
|
||||
void RAWToARGBRow_RVV(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
|
||||
size_t vl = __riscv_vsetvl_e8m2(width);
|
||||
vuint8m2_t v_a = __riscv_vmv_v_x_u8m2(255u, vl);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user