mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
Enable {J400/I400}ToARGBRow_RVV
Run on SiFive internal FPGA*: I400ToARGB_Opt (~8x vs scalar) J400ToARGB_Opt (~10x vs scalar) LIBYUV_WIDTH=1280 LIBYUV_HEIGHT=720 LIBYUV_REPEAT=10 Bug: libyuv:956, libyuv:961 Change-Id: If4e21ec85c4ff79083ec16a6faae0e457129a8de Signed-off-by: Bruce Lai <bruce.lai@sifive.com> Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/4544972 Reviewed-by: Frank Barchard <fbarchard@chromium.org> Commit-Queue: Wan-Teh Chang <wtc@google.com>
This commit is contained in:
parent
8670bcf17f
commit
179b0203e5
@ -807,6 +807,7 @@ extern "C" {
|
|||||||
#define HAS_ABGRTOYROW_RVV
|
#define HAS_ABGRTOYROW_RVV
|
||||||
#define HAS_ABGRTOYJROW_RVV
|
#define HAS_ABGRTOYJROW_RVV
|
||||||
#define HAS_BGRATOYROW_RVV
|
#define HAS_BGRATOYROW_RVV
|
||||||
|
#define HAS_I400TOARGBROW_RVV
|
||||||
#define HAS_I422ALPHATOARGBROW_RVV
|
#define HAS_I422ALPHATOARGBROW_RVV
|
||||||
#define HAS_I422TOARGBROW_RVV
|
#define HAS_I422TOARGBROW_RVV
|
||||||
#define HAS_I422TORGB24ROW_RVV
|
#define HAS_I422TORGB24ROW_RVV
|
||||||
@ -814,6 +815,7 @@ extern "C" {
|
|||||||
#define HAS_I444ALPHATOARGBROW_RVV
|
#define HAS_I444ALPHATOARGBROW_RVV
|
||||||
#define HAS_I444TOARGBROW_RVV
|
#define HAS_I444TOARGBROW_RVV
|
||||||
#define HAS_I444TORGB24ROW_RVV
|
#define HAS_I444TORGB24ROW_RVV
|
||||||
|
#define HAS_J400TOARGBROW_RVV
|
||||||
#define HAS_MERGEARGBROW_RVV
|
#define HAS_MERGEARGBROW_RVV
|
||||||
#define HAS_MERGERGBROW_RVV
|
#define HAS_MERGERGBROW_RVV
|
||||||
#define HAS_MERGEXRGBROW_RVV
|
#define HAS_MERGEXRGBROW_RVV
|
||||||
@ -3531,6 +3533,7 @@ void J400ToARGBRow_AVX2(const uint8_t* src_y, uint8_t* dst_argb, int width);
|
|||||||
void J400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width);
|
void J400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width);
|
||||||
void J400ToARGBRow_MSA(const uint8_t* src_y, uint8_t* dst_argb, int width);
|
void J400ToARGBRow_MSA(const uint8_t* src_y, uint8_t* dst_argb, int width);
|
||||||
void J400ToARGBRow_LSX(const uint8_t* src_y, uint8_t* dst_argb, int width);
|
void J400ToARGBRow_LSX(const uint8_t* src_y, uint8_t* dst_argb, int width);
|
||||||
|
void J400ToARGBRow_RVV(const uint8_t* src_y, uint8_t* dst_argb, int width);
|
||||||
void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width);
|
void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width);
|
||||||
void J400ToARGBRow_Any_SSE2(const uint8_t* src_ptr,
|
void J400ToARGBRow_Any_SSE2(const uint8_t* src_ptr,
|
||||||
uint8_t* dst_ptr,
|
uint8_t* dst_ptr,
|
||||||
@ -4460,6 +4463,10 @@ void I400ToARGBRow_LSX(const uint8_t* src_y,
|
|||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width);
|
int width);
|
||||||
|
void I400ToARGBRow_RVV(const uint8_t* src_y,
|
||||||
|
uint8_t* dst_argb,
|
||||||
|
const struct YuvConstants* yuvconstants,
|
||||||
|
int width);
|
||||||
void I400ToARGBRow_Any_SSE2(const uint8_t* src_ptr,
|
void I400ToARGBRow_Any_SSE2(const uint8_t* src_ptr,
|
||||||
uint8_t* dst_ptr,
|
uint8_t* dst_ptr,
|
||||||
const struct YuvConstants* param,
|
const struct YuvConstants* param,
|
||||||
|
|||||||
@ -2897,6 +2897,11 @@ int I400ToARGBMatrix(const uint8_t* src_y,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#if defined(HAS_I400TOARGBROW_RVV)
|
||||||
|
if (TestCpuFlag(kCpuHasRVV)) {
|
||||||
|
I400ToARGBRow = I400ToARGBRow_RVV;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
for (y = 0; y < height; ++y) {
|
for (y = 0; y < height; ++y) {
|
||||||
I400ToARGBRow(src_y, dst_argb, yuvconstants, width);
|
I400ToARGBRow(src_y, dst_argb, yuvconstants, width);
|
||||||
@ -2984,6 +2989,12 @@ int J400ToARGB(const uint8_t* src_y,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#if defined(HAS_J400TOARGBROW_RVV)
|
||||||
|
if (TestCpuFlag(kCpuHasRVV)) {
|
||||||
|
J400ToARGBRow = J400ToARGBRow_RVV;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
for (y = 0; y < height; ++y) {
|
for (y = 0; y < height; ++y) {
|
||||||
J400ToARGBRow(src_y, dst_argb, width);
|
J400ToARGBRow(src_y, dst_argb, width);
|
||||||
src_y += src_stride_y;
|
src_y += src_stride_y;
|
||||||
|
|||||||
@ -474,6 +474,62 @@ void I422ToRGB24Row_RVV(const uint8_t* src_y,
|
|||||||
} while (w > 0);
|
} while (w > 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void I400ToARGBRow_RVV(const uint8_t* src_y,
|
||||||
|
uint8_t* dst_argb,
|
||||||
|
const struct YuvConstants* yuvconstants,
|
||||||
|
int width) {
|
||||||
|
size_t w = (size_t)width;
|
||||||
|
size_t vl = __riscv_vsetvl_e8m2(w);
|
||||||
|
const bool is_yb_positive = (yuvconstants->kRGBCoeffBias[4] >= 0);
|
||||||
|
vuint8m2_t v_a = __riscv_vmv_v_x_u8m2(255u, vl);
|
||||||
|
vuint16m4_t v_yb;
|
||||||
|
vuint16m4_t v_yg = __riscv_vmv_v_x_u16m4(yuvconstants->kRGBCoeffBias[0], vl);
|
||||||
|
// To match behavior on other platforms, vxrm (fixed-point rounding mode
|
||||||
|
// register) sets to round-down mode(2).
|
||||||
|
asm volatile("csrwi vxrm, 2");
|
||||||
|
if (is_yb_positive) {
|
||||||
|
v_yb = __riscv_vmv_v_x_u16m4(yuvconstants->kRGBCoeffBias[4], vl);
|
||||||
|
} else {
|
||||||
|
v_yb = __riscv_vmv_v_x_u16m4(-yuvconstants->kRGBCoeffBias[4], vl);
|
||||||
|
}
|
||||||
|
do {
|
||||||
|
vuint8m2_t v_y, v_out;
|
||||||
|
vuint16m4_t v_y_16, v_tmp0, v_tmp1, v_tmp2;
|
||||||
|
vuint32m8_t v_y1;
|
||||||
|
vl = __riscv_vsetvl_e8m2(w);
|
||||||
|
v_y = __riscv_vle8_v_u8m2(src_y, vl);
|
||||||
|
v_y_16 = __riscv_vwaddu_vx_u16m4(v_y, 0, vl);
|
||||||
|
v_tmp0 = __riscv_vmul_vx_u16m4(v_y_16, 0x0101, vl); // 257 * v_y
|
||||||
|
v_y1 = __riscv_vwmulu_vv_u32m8(v_tmp0, v_yg, vl);
|
||||||
|
v_tmp1 = __riscv_vnsrl_wx_u16m4(v_y1, 16, vl);
|
||||||
|
if (is_yb_positive) {
|
||||||
|
v_tmp2 = __riscv_vsaddu_vv_u16m4(v_tmp1, v_yb, vl);
|
||||||
|
} else {
|
||||||
|
v_tmp2 = __riscv_vssubu_vv_u16m4(v_tmp1, v_yb, vl);
|
||||||
|
}
|
||||||
|
v_out = __riscv_vnclipu_wx_u8m2(v_tmp2, 6, vl);
|
||||||
|
__riscv_vsseg4e8_v_u8m2(dst_argb, v_out, v_out, v_out, v_a, vl);
|
||||||
|
w -= vl;
|
||||||
|
src_y += vl;
|
||||||
|
dst_argb += vl * 4;
|
||||||
|
} while (w > 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void J400ToARGBRow_RVV(const uint8_t* src_y, uint8_t* dst_argb, int width) {
|
||||||
|
size_t w = (size_t)width;
|
||||||
|
size_t vl = __riscv_vsetvl_e8m2(w);
|
||||||
|
vuint8m2_t v_a = __riscv_vmv_v_x_u8m2(255u, vl);
|
||||||
|
do {
|
||||||
|
vuint8m2_t v_y;
|
||||||
|
v_y = __riscv_vle8_v_u8m2(src_y, vl);
|
||||||
|
__riscv_vsseg4e8_v_u8m2(dst_argb, v_y, v_y, v_y, v_a, vl);
|
||||||
|
w -= vl;
|
||||||
|
src_y += vl;
|
||||||
|
dst_argb += vl * 4;
|
||||||
|
vl = __riscv_vsetvl_e8m2(w);
|
||||||
|
} while (w > 0);
|
||||||
|
}
|
||||||
|
|
||||||
void SplitRGBRow_RVV(const uint8_t* src_rgb,
|
void SplitRGBRow_RVV(const uint8_t* src_rgb,
|
||||||
uint8_t* dst_r,
|
uint8_t* dst_r,
|
||||||
uint8_t* dst_g,
|
uint8_t* dst_g,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user