mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
Add ARGBToRAWRow_RVV, ARGBToRGB24Row_RVV, RGB24ToARGBRow_RVV
* Run on SiFive internal FPGA: ARGBToRAW_Opt (~1.55x vs scalar) ARGBToRGB24_Opt (~1.44x vs scalar) RGB24ToARGB_Opt (~1.77x vs scalar) LIBYUV_WIDTH=1280 LIBYUV_HEIGHT=720 LIBYUV_REPEAT=10 Bug: libyuv:956 Change-Id: I26722f6848cd68684d95d9a7ee06ce0416e7985d Signed-off-by: Darren Hsieh <darren.hsieh@sifive.com> Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/4413083 Reviewed-by: Frank Barchard <fbarchard@chromium.org> Commit-Queue: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
parent
68659d0d68
commit
44396e6e9a
@ -758,9 +758,12 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
#if !defined(LIBYUV_DISABLE_RVV) && defined(__riscv)
|
||||
#define HAS_ARGBTORAWROW_RVV
|
||||
#define HAS_ARGBTORGB24ROW_RVV
|
||||
#define HAS_RAWTOARGBROW_RVV
|
||||
#define HAS_RAWTORGBAROW_RVV
|
||||
#define HAS_RAWTORGB24ROW_RVV
|
||||
#define HAS_RAWTORGBAROW_RVV
|
||||
#define HAS_RGB24TOARGBROW_RVV
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER) && !defined(__CLR_VER) && !defined(__clang__)
|
||||
@ -2961,6 +2964,7 @@ void RGB24ToARGBRow_LSX(const uint8_t* src_rgb24, uint8_t* dst_argb, int width);
|
||||
void RGB24ToARGBRow_LASX(const uint8_t* src_rgb24,
|
||||
uint8_t* dst_argb,
|
||||
int width);
|
||||
void RGB24ToARGBRow_RVV(const uint8_t* src_rgb24, uint8_t* dst_argb, int width);
|
||||
void RAWToARGBRow_NEON(const uint8_t* src_raw, uint8_t* dst_argb, int width);
|
||||
void RAWToRGBARow_NEON(const uint8_t* src_raw, uint8_t* dst_rgba, int width);
|
||||
void RAWToARGBRow_MSA(const uint8_t* src_raw, uint8_t* dst_argb, int width);
|
||||
@ -3197,6 +3201,9 @@ void ARGBToARGB4444Row_LASX(const uint8_t* src_argb,
|
||||
uint8_t* dst_rgb,
|
||||
int width);
|
||||
|
||||
void ARGBToRAWRow_RVV(const uint8_t* src_argb, uint8_t* dst_raw, int width);
|
||||
void ARGBToRGB24Row_RVV(const uint8_t* src_argb, uint8_t* dst_rgb24, int width);
|
||||
|
||||
void ARGBToRGBARow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
|
||||
void ARGBToRGB24Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
|
||||
void ARGBToRAWRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
|
||||
|
||||
@ -3049,6 +3049,11 @@ int RGB24ToARGB(const uint8_t* src_rgb24,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_RGB24TOARGBROW_RVV)
|
||||
if (TestCpuFlag(kCpuHasRVV)) {
|
||||
RGB24ToARGBRow = RGB24ToARGBRow_RVV;
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
RGB24ToARGBRow(src_rgb24, dst_argb, width);
|
||||
|
||||
@ -1487,6 +1487,11 @@ int ARGBToRGB24(const uint8_t* src_argb,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTORGB24ROW_RVV)
|
||||
if (TestCpuFlag(kCpuHasRVV)) {
|
||||
ARGBToRGB24Row = ARGBToRGB24Row_RVV;
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
ARGBToRGB24Row(src_argb, dst_rgb24, width);
|
||||
@ -1561,6 +1566,11 @@ int ARGBToRAW(const uint8_t* src_argb,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTORAWROW_RVV)
|
||||
if (TestCpuFlag(kCpuHasRVV)) {
|
||||
ARGBToRAWRow = ARGBToRAWRow_RVV;
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
ARGBToRAWRow(src_argb, dst_raw, width);
|
||||
|
||||
@ -30,33 +30,33 @@ extern "C" {
|
||||
void RAWToARGBRow_RVV(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
|
||||
size_t vl = __riscv_vsetvl_e8m2(width);
|
||||
vuint8m2_t v_a = __riscv_vmv_v_x_u8m2(255u, vl);
|
||||
while (width > 0) {
|
||||
do {
|
||||
vuint8m2_t v_b, v_g, v_r;
|
||||
vl = __riscv_vsetvl_e8m2(width);
|
||||
__riscv_vlseg3e8_v_u8m2(&v_r, &v_g, &v_b, src_raw, vl);
|
||||
__riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl);
|
||||
width -= vl;
|
||||
src_raw += (3 * vl);
|
||||
dst_argb += (4 * vl);
|
||||
}
|
||||
vl = __riscv_vsetvl_e8m2(width);
|
||||
} while (width > 0);
|
||||
}
|
||||
|
||||
void RAWToRGBARow_RVV(const uint8_t* src_raw, uint8_t* dst_rgba, int width) {
|
||||
size_t vl = __riscv_vsetvl_e8m2(width);
|
||||
vuint8m2_t v_a = __riscv_vmv_v_x_u8m2(255u, vl);
|
||||
while (width > 0) {
|
||||
do {
|
||||
vuint8m2_t v_b, v_g, v_r;
|
||||
vl = __riscv_vsetvl_e8m2(width);
|
||||
__riscv_vlseg3e8_v_u8m2(&v_r, &v_g, &v_b, src_raw, vl);
|
||||
__riscv_vsseg4e8_v_u8m2(dst_rgba, v_a, v_b, v_g, v_r, vl);
|
||||
width -= vl;
|
||||
src_raw += (3 * vl);
|
||||
dst_rgba += (4 * vl);
|
||||
}
|
||||
vl = __riscv_vsetvl_e8m2(width);
|
||||
} while (width > 0);
|
||||
}
|
||||
|
||||
void RAWToRGB24Row_RVV(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
|
||||
while (width > 0) {
|
||||
do {
|
||||
vuint8m2_t v_b, v_g, v_r;
|
||||
size_t vl = __riscv_vsetvl_e8m2(width);
|
||||
__riscv_vlseg3e8_v_u8m2(&v_b, &v_g, &v_r, src_raw, vl);
|
||||
@ -64,7 +64,49 @@ void RAWToRGB24Row_RVV(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
|
||||
width -= vl;
|
||||
src_raw += (3 * vl);
|
||||
dst_rgb24 += (3 * vl);
|
||||
} while (width > 0);
|
||||
}
|
||||
|
||||
void ARGBToRAWRow_RVV(const uint8_t* src_argb, uint8_t* dst_raw, int width) {
|
||||
do {
|
||||
vuint8m2_t v_b, v_g, v_r, v_a;
|
||||
size_t vl = __riscv_vsetvl_e8m2(width);
|
||||
__riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl);
|
||||
__riscv_vsseg3e8_v_u8m2(dst_raw, v_r, v_g, v_b, vl);
|
||||
width -= vl;
|
||||
src_argb += (4 * vl);
|
||||
dst_raw += (3 * vl);
|
||||
} while (width > 0);
|
||||
}
|
||||
|
||||
void ARGBToRGB24Row_RVV(const uint8_t* src_argb,
|
||||
uint8_t* dst_rgb24,
|
||||
int width) {
|
||||
do {
|
||||
vuint8m2_t v_b, v_g, v_r, v_a;
|
||||
size_t vl = __riscv_vsetvl_e8m2(width);
|
||||
__riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl);
|
||||
__riscv_vsseg3e8_v_u8m2(dst_rgb24, v_b, v_g, v_r, vl);
|
||||
width -= vl;
|
||||
src_argb += (4 * vl);
|
||||
dst_rgb24 += (3 * vl);
|
||||
} while (width > 0);
|
||||
}
|
||||
|
||||
void RGB24ToARGBRow_RVV(const uint8_t* src_rgb24,
|
||||
uint8_t* dst_argb,
|
||||
int width) {
|
||||
size_t vl = __riscv_vsetvl_e8m2(width);
|
||||
vuint8m2_t v_a = __riscv_vmv_v_x_u8m2(255u, vl);
|
||||
do {
|
||||
vuint8m2_t v_b, v_g, v_r;
|
||||
__riscv_vlseg3e8_v_u8m2(&v_b, &v_g, &v_r, src_rgb24, vl);
|
||||
__riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl);
|
||||
width -= vl;
|
||||
src_rgb24 += (3 * vl);
|
||||
dst_argb += (4 * vl);
|
||||
vl = __riscv_vsetvl_e8m2(width);
|
||||
} while (width > 0);
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user