mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
[RVV] Update ARGBAttenuateRow_RVV implementation
Bug: libyuv:956 Change-Id: Ib539c2196767e88fa6e419ed2f22d95b6deaf406 Signed-off-by: Bruce Lai <bruce.lai@sifive.com> Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/4623172 Reviewed-by: Frank Barchard <fbarchard@chromium.org> Commit-Queue: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
parent
7939e039e7
commit
4472b5b849
@ -924,24 +924,31 @@ void RAWToYRow_RVV(const uint8_t* src_raw, uint8_t* dst_y, int width) {
|
|||||||
RGBToYMatrixRow_RVV(src_raw, dst_y, width, &kRawI601Constants);
|
RGBToYMatrixRow_RVV(src_raw, dst_y, width, &kRawI601Constants);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Attenuate: (f * a + 255) >> 8
|
||||||
void ARGBAttenuateRow_RVV(const uint8_t* src_argb,
|
void ARGBAttenuateRow_RVV(const uint8_t* src_argb,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width) {
|
int width) {
|
||||||
size_t w = (size_t)width;
|
size_t w = (size_t)width;
|
||||||
// To match behavior on other platforms, vxrm (fixed-point rounding mode
|
// To match behavior on other platforms, vxrm (fixed-point rounding mode
|
||||||
// register) is set to round-to-nearest-up(0).
|
// register) is set to round-down(2).
|
||||||
asm volatile("csrwi vxrm, 0");
|
asm volatile("csrwi vxrm, 2");
|
||||||
do {
|
do {
|
||||||
vuint8m2_t v_b, v_g, v_r, v_a;
|
vuint8m2_t v_b, v_g, v_r, v_a;
|
||||||
vuint16m4_t v_ba_16, v_ga_16, v_ra_16;
|
vuint16m4_t v_ba_16, v_ga_16, v_ra_16;
|
||||||
size_t vl = __riscv_vsetvl_e8m2(w);
|
size_t vl = __riscv_vsetvl_e8m2(w);
|
||||||
__riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl);
|
__riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl);
|
||||||
|
// f * a
|
||||||
v_ba_16 = __riscv_vwmulu_vv_u16m4(v_b, v_a, vl);
|
v_ba_16 = __riscv_vwmulu_vv_u16m4(v_b, v_a, vl);
|
||||||
v_ga_16 = __riscv_vwmulu_vv_u16m4(v_g, v_a, vl);
|
v_ga_16 = __riscv_vwmulu_vv_u16m4(v_g, v_a, vl);
|
||||||
v_ra_16 = __riscv_vwmulu_vv_u16m4(v_r, v_a, vl);
|
v_ra_16 = __riscv_vwmulu_vv_u16m4(v_r, v_a, vl);
|
||||||
v_b = __riscv_vnclipu_wx_u8m2(v_ba_16, 8, vl);
|
// f * a + 255
|
||||||
v_g = __riscv_vnclipu_wx_u8m2(v_ga_16, 8, vl);
|
v_ba_16 = __riscv_vadd_vx_u16m4(v_ba_16, 255u, vl);
|
||||||
v_r = __riscv_vnclipu_wx_u8m2(v_ra_16, 8, vl);
|
v_ga_16 = __riscv_vadd_vx_u16m4(v_ga_16, 255u, vl);
|
||||||
|
v_ra_16 = __riscv_vadd_vx_u16m4(v_ra_16, 255u, vl);
|
||||||
|
// (f * a + 255) >> 8
|
||||||
|
v_b = __riscv_vnsrl_wx_u8m2(v_ba_16, 8, vl);
|
||||||
|
v_g = __riscv_vnsrl_wx_u8m2(v_ga_16, 8, vl);
|
||||||
|
v_r = __riscv_vnsrl_wx_u8m2(v_ra_16, 8, vl);
|
||||||
__riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl);
|
__riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl);
|
||||||
w -= vl;
|
w -= vl;
|
||||||
src_argb += vl * 4;
|
src_argb += vl * 4;
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user