mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 01:06:46 +08:00
[RVV] Enable Scale{ARGB,UV}RowDown{2,4,EVEN}_RVV
Run on SiFive internal FPGA: Test case RVV function Speedup I444ScaleDownBy3_Box ScaleAddRow_RVV+ScaleAddCols(scalar) 2.8 ARGBScaleDownBy2_None ScaleARGBRowDown2_RVV 2.2 ARGBScaleDownBy2_Linear ScaleARGBRowDown2Linear_RVV 5.0 ARGBScaleDownBy2_Box ScaleARGBRowDown2Box_RVV 4.3 ARGBScaleDownBy4_None ScaleARGBRowDownEven_RVV 1.2 ARGBScaleDownBy8_Box ScaleARGBRowDownEvenBox_RVV 3.2 ARGBScaleDownBy4_Box ScaleARGBRowDown2Box_RVV 4.5 I444ScaleDownBy2_None ScaleRowDown2_RVV 5.8 I444ScaleDownBy2_Linear ScaleRowDown2Linear_RVV 6.1 I444ScaleDownBy2_Box ScaleRowDown2Box_RVV 5.0 I444ScaleDownBy4_None ScaleRowDown4_RVV 3.6 I444ScaleDownBy4_Box ScaleRowDown4Box_RVV 3.5 UVScaleDownBy2_None ScaleUVRowDown2_RVV 5.8 UVScaleDownBy2_Linear ScaleUVRowDown2Linear_RVV 5.6 UVScaleDownBy2_Box ScaleUVRowDown2Box_RVV 4.1 UVScaleDownBy4_None ScaleUVRowDown4_RVV 1.7 UVScaleDownBy4_Box ScaleUVRowDown2Box_RVV 4.5 avg-speedup: 4 Note: Specialize ScaleUVRowDown with step_size=4 by ScaleUVRowDown4_RVV. Bug: libyuv:956 Change-Id: If9604a6aadf681193f282507602c57c726332202 Signed-off-by: Darren Hsieh <darren.hsieh@sifive.com> Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/4601684 Reviewed-by: Frank Barchard <fbarchard@chromium.org> Commit-Queue: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
parent
29bcf021c6
commit
873eaa3bbf
@ -175,6 +175,18 @@ extern "C" {
|
|||||||
#define HAS_SCALEROWDOWN34_LSX
|
#define HAS_SCALEROWDOWN34_LSX
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if !defined(LIBYUV_DISABLE_RVV) && defined(__riscv_vector)
|
||||||
|
#define HAS_SCALEADDROW_RVV
|
||||||
|
#define HAS_SCALEARGBROWDOWN2_RVV
|
||||||
|
#define HAS_SCALEARGBROWDOWNEVEN_RVV
|
||||||
|
#define HAS_SCALEROWDOWN2_RVV
|
||||||
|
#define HAS_SCALEROWDOWN4_RVV
|
||||||
|
#define HAS_SCALEUVROWDOWN2_RVV
|
||||||
|
#define HAS_SCALEUVROWDOWN2LINEAR_RVV
|
||||||
|
#define HAS_SCALEUVROWDOWN2BOX_RVV
|
||||||
|
#define HAS_SCALEUVROWDOWNEVEN_RVV
|
||||||
|
#endif
|
||||||
|
|
||||||
// Scale ARGB vertically with bilinear interpolation.
|
// Scale ARGB vertically with bilinear interpolation.
|
||||||
void ScalePlaneVertical(int src_height,
|
void ScalePlaneVertical(int src_height,
|
||||||
int dst_width,
|
int dst_width,
|
||||||
@ -949,6 +961,18 @@ void ScaleARGBRowDown2Box_NEON(const uint8_t* src_ptr,
|
|||||||
ptrdiff_t src_stride,
|
ptrdiff_t src_stride,
|
||||||
uint8_t* dst,
|
uint8_t* dst,
|
||||||
int dst_width);
|
int dst_width);
|
||||||
|
void ScaleARGBRowDown2_RVV(const uint8_t* src_argb,
|
||||||
|
ptrdiff_t src_stride,
|
||||||
|
uint8_t* dst_argb,
|
||||||
|
int dst_width);
|
||||||
|
void ScaleARGBRowDown2Linear_RVV(const uint8_t* src_argb,
|
||||||
|
ptrdiff_t src_stride,
|
||||||
|
uint8_t* dst_argb,
|
||||||
|
int dst_width);
|
||||||
|
void ScaleARGBRowDown2Box_RVV(const uint8_t* src_argb,
|
||||||
|
ptrdiff_t src_stride,
|
||||||
|
uint8_t* dst_argb,
|
||||||
|
int dst_width);
|
||||||
void ScaleARGBRowDown2_MSA(const uint8_t* src_argb,
|
void ScaleARGBRowDown2_MSA(const uint8_t* src_argb,
|
||||||
ptrdiff_t src_stride,
|
ptrdiff_t src_stride,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
@ -1061,6 +1085,16 @@ void ScaleARGBRowDownEvenBox_LSX(const uint8_t* src_argb,
|
|||||||
int src_stepx,
|
int src_stepx,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int dst_width);
|
int dst_width);
|
||||||
|
void ScaleARGBRowDownEven_RVV(const uint8_t* src_argb,
|
||||||
|
ptrdiff_t src_stride,
|
||||||
|
int32_t src_stepx,
|
||||||
|
uint8_t* dst_argb,
|
||||||
|
int dst_width);
|
||||||
|
void ScaleARGBRowDownEvenBox_RVV(const uint8_t* src_argb,
|
||||||
|
ptrdiff_t src_stride,
|
||||||
|
int src_stepx,
|
||||||
|
uint8_t* dst_argb,
|
||||||
|
int dst_width);
|
||||||
void ScaleARGBRowDownEven_Any_SSE2(const uint8_t* src_ptr,
|
void ScaleARGBRowDownEven_Any_SSE2(const uint8_t* src_ptr,
|
||||||
ptrdiff_t src_stride,
|
ptrdiff_t src_stride,
|
||||||
int src_stepx,
|
int src_stepx,
|
||||||
@ -1143,6 +1177,18 @@ void ScaleUVRowDown2Box_MSA(const uint8_t* src_ptr,
|
|||||||
ptrdiff_t src_stride,
|
ptrdiff_t src_stride,
|
||||||
uint8_t* dst_uv,
|
uint8_t* dst_uv,
|
||||||
int dst_width);
|
int dst_width);
|
||||||
|
void ScaleUVRowDown2_RVV(const uint8_t* src_ptr,
|
||||||
|
ptrdiff_t src_stride,
|
||||||
|
uint8_t* dst_uv,
|
||||||
|
int dst_width);
|
||||||
|
void ScaleUVRowDown2Linear_RVV(const uint8_t* src_ptr,
|
||||||
|
ptrdiff_t src_stride,
|
||||||
|
uint8_t* dst_uv,
|
||||||
|
int dst_width);
|
||||||
|
void ScaleUVRowDown2Box_RVV(const uint8_t* src_ptr,
|
||||||
|
ptrdiff_t src_stride,
|
||||||
|
uint8_t* dst,
|
||||||
|
int dst_width);
|
||||||
void ScaleUVRowDown2_Any_SSSE3(const uint8_t* src_ptr,
|
void ScaleUVRowDown2_Any_SSSE3(const uint8_t* src_ptr,
|
||||||
ptrdiff_t src_stride,
|
ptrdiff_t src_stride,
|
||||||
uint8_t* dst_ptr,
|
uint8_t* dst_ptr,
|
||||||
@ -1203,6 +1249,16 @@ void ScaleUVRowDownEvenBox_NEON(const uint8_t* src_ptr,
|
|||||||
int src_stepx,
|
int src_stepx,
|
||||||
uint8_t* dst_uv,
|
uint8_t* dst_uv,
|
||||||
int dst_width);
|
int dst_width);
|
||||||
|
void ScaleUVRowDown4_RVV(const uint8_t* src_ptr,
|
||||||
|
ptrdiff_t src_stride,
|
||||||
|
int32_t src_stepx,
|
||||||
|
uint8_t* dst_uv,
|
||||||
|
int dst_width);
|
||||||
|
void ScaleUVRowDownEven_RVV(const uint8_t* src_ptr,
|
||||||
|
ptrdiff_t src_stride,
|
||||||
|
int32_t src_stepx,
|
||||||
|
uint8_t* dst_uv,
|
||||||
|
int dst_width);
|
||||||
void ScaleUVRowDownEven_MSA(const uint8_t* src_ptr,
|
void ScaleUVRowDownEven_MSA(const uint8_t* src_ptr,
|
||||||
ptrdiff_t src_stride,
|
ptrdiff_t src_stride,
|
||||||
int32_t src_stepx,
|
int32_t src_stepx,
|
||||||
@ -1744,6 +1800,29 @@ void ScaleRowDown34_1_Box_Any_LSX(const uint8_t* src_ptr,
|
|||||||
uint8_t* dst_ptr,
|
uint8_t* dst_ptr,
|
||||||
int dst_width);
|
int dst_width);
|
||||||
|
|
||||||
|
void ScaleAddRow_RVV(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width);
|
||||||
|
void ScaleRowDown2_RVV(const uint8_t* src_ptr,
|
||||||
|
ptrdiff_t src_stride,
|
||||||
|
uint8_t* dst,
|
||||||
|
int dst_width);
|
||||||
|
void ScaleRowDown2Linear_RVV(const uint8_t* src_ptr,
|
||||||
|
ptrdiff_t src_stride,
|
||||||
|
uint8_t* dst,
|
||||||
|
int dst_width);
|
||||||
|
void ScaleRowDown2Box_RVV(const uint8_t* src_ptr,
|
||||||
|
ptrdiff_t src_stride,
|
||||||
|
uint8_t* dst,
|
||||||
|
int dst_width);
|
||||||
|
|
||||||
|
void ScaleRowDown4_RVV(const uint8_t* src_ptr,
|
||||||
|
ptrdiff_t src_stride,
|
||||||
|
uint8_t* dst_ptr,
|
||||||
|
int dst_width);
|
||||||
|
void ScaleRowDown4Box_RVV(const uint8_t* src_ptr,
|
||||||
|
ptrdiff_t src_stride,
|
||||||
|
uint8_t* dst_ptr,
|
||||||
|
int dst_width);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
} // extern "C"
|
} // extern "C"
|
||||||
} // namespace libyuv
|
} // namespace libyuv
|
||||||
|
|||||||
@ -69,6 +69,11 @@ static int ARGBTranspose(const uint8_t* src_argb,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#if defined(HAS_SCALEARGBROWDOWNEVEN_RVV)
|
||||||
|
if (TestCpuFlag(kCpuHasRVV)) {
|
||||||
|
ScaleARGBRowDownEven = ScaleARGBRowDownEven_RVV;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
for (i = 0; i < width; ++i) { // column of source to row of dest.
|
for (i = 0; i < width; ++i) { // column of source to row of dest.
|
||||||
ScaleARGBRowDownEven(src_argb, 0, src_pixel_step, dst_argb, height);
|
ScaleARGBRowDownEven(src_argb, 0, src_pixel_step, dst_argb, height);
|
||||||
|
|||||||
@ -135,6 +135,14 @@ static void ScalePlaneDown2(int src_width,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#if defined(HAS_SCALEROWDOWN2_RVV)
|
||||||
|
if (TestCpuFlag(kCpuHasRVV)) {
|
||||||
|
ScaleRowDown2 = filtering == kFilterNone
|
||||||
|
? ScaleRowDown2_RVV
|
||||||
|
: (filtering == kFilterLinear ? ScaleRowDown2Linear_RVV
|
||||||
|
: ScaleRowDown2Box_RVV);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
if (filtering == kFilterLinear) {
|
if (filtering == kFilterLinear) {
|
||||||
src_stride = 0;
|
src_stride = 0;
|
||||||
@ -312,6 +320,11 @@ static void ScalePlaneDown4(int src_width,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#if defined(HAS_SCALEROWDOWN4_RVV)
|
||||||
|
if (TestCpuFlag(kCpuHasRVV)) {
|
||||||
|
ScaleRowDown4 = filtering ? ScaleRowDown4Box_RVV : ScaleRowDown4_RVV;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
if (filtering == kFilterLinear) {
|
if (filtering == kFilterLinear) {
|
||||||
src_stride = 0;
|
src_stride = 0;
|
||||||
@ -971,6 +984,11 @@ static void ScalePlaneBox(int src_width,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#if defined(HAS_SCALEADDROW_RVV)
|
||||||
|
if (TestCpuFlag(kCpuHasRVV)) {
|
||||||
|
ScaleAddRow = ScaleAddRow_RVV;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
for (j = 0; j < dst_height; ++j) {
|
for (j = 0; j < dst_height; ++j) {
|
||||||
int boxheight;
|
int boxheight;
|
||||||
|
|||||||
@ -127,6 +127,15 @@ static void ScaleARGBDown2(int src_width,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#if defined(HAS_SCALEARGBROWDOWN2_RVV)
|
||||||
|
if (TestCpuFlag(kCpuHasRVV)) {
|
||||||
|
ScaleARGBRowDown2 =
|
||||||
|
filtering == kFilterNone
|
||||||
|
? ScaleARGBRowDown2_RVV
|
||||||
|
: (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_RVV
|
||||||
|
: ScaleARGBRowDown2Box_RVV);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
if (filtering == kFilterLinear) {
|
if (filtering == kFilterLinear) {
|
||||||
src_stride = 0;
|
src_stride = 0;
|
||||||
@ -184,6 +193,11 @@ static void ScaleARGBDown4Box(int src_width,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#if defined(HAS_SCALEARGBROWDOWN2_RVV)
|
||||||
|
if (TestCpuFlag(kCpuHasRVV)) {
|
||||||
|
ScaleARGBRowDown2 = ScaleARGBRowDown2Box_RVV;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
for (j = 0; j < dst_height; ++j) {
|
for (j = 0; j < dst_height; ++j) {
|
||||||
ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2);
|
ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2);
|
||||||
@ -263,6 +277,12 @@ static void ScaleARGBDownEven(int src_width,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#if defined(HAS_SCALEARGBROWDOWNEVEN_RVV)
|
||||||
|
if (TestCpuFlag(kCpuHasRVV)) {
|
||||||
|
ScaleARGBRowDownEven =
|
||||||
|
filtering ? ScaleARGBRowDownEvenBox_RVV : ScaleARGBRowDownEven_RVV;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
if (filtering == kFilterLinear) {
|
if (filtering == kFilterLinear) {
|
||||||
src_stride = 0;
|
src_stride = 0;
|
||||||
|
|||||||
470
source/scale_rvv.cc
Normal file
470
source/scale_rvv.cc
Normal file
@ -0,0 +1,470 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2023 The LibYuv Project Authors. All rights reserved.
|
||||||
|
*
|
||||||
|
* Use of this source code is governed by a BSD-style license
|
||||||
|
* that can be found in the LICENSE file in the root of the source
|
||||||
|
* tree. An additional intellectual property rights grant can be found
|
||||||
|
* in the file PATENTS. All contributing project authors may
|
||||||
|
* be found in the AUTHORS file in the root of the source tree.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2023 SiFive, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Contributed by Darren Hsieh <darren.hsieh@sifive.com>
|
||||||
|
* Contributed by Bruce Lai <bruce.lai@sifive.com>
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "libyuv/row.h"
|
||||||
|
#include "libyuv/scale_row.h"
|
||||||
|
|
||||||
|
// This module is for gcc/clang rvv.
|
||||||
|
#if !defined(LIBYUV_DISABLE_RVV) && defined(__riscv_vector)
|
||||||
|
#include <riscv_vector.h>
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
namespace libyuv {
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
void ScaleAddRow_RVV(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) {
|
||||||
|
size_t w = (size_t)src_width;
|
||||||
|
do {
|
||||||
|
size_t vl = __riscv_vsetvl_e8m4(w);
|
||||||
|
vuint8m4_t v_src = __riscv_vle8_v_u8m4(src_ptr, vl);
|
||||||
|
vuint16m8_t v_dst = __riscv_vle16_v_u16m8(dst_ptr, vl);
|
||||||
|
// Use widening multiply-add instead of widening + add
|
||||||
|
v_dst = __riscv_vwmaccu_vx_u16m8(v_dst, 1, v_src, vl);
|
||||||
|
__riscv_vse16_v_u16m8(dst_ptr, v_dst, vl);
|
||||||
|
w -= vl;
|
||||||
|
src_ptr += vl;
|
||||||
|
dst_ptr += vl;
|
||||||
|
} while (w > 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ScaleARGBRowDown2_RVV(const uint8_t* src_argb,
|
||||||
|
ptrdiff_t src_stride,
|
||||||
|
uint8_t* dst_argb,
|
||||||
|
int dst_width) {
|
||||||
|
(void)src_stride;
|
||||||
|
size_t w = (size_t)dst_width;
|
||||||
|
const uint64_t* src = (const uint64_t*)(src_argb);
|
||||||
|
uint32_t* dst = (uint32_t*)(dst_argb);
|
||||||
|
do {
|
||||||
|
vuint64m8_t v_data;
|
||||||
|
vuint32m4_t v_dst;
|
||||||
|
size_t vl = __riscv_vsetvl_e64m8(w);
|
||||||
|
v_data = __riscv_vle64_v_u64m8(src, vl);
|
||||||
|
v_dst = __riscv_vnsrl_wx_u32m4(v_data, 32, vl);
|
||||||
|
__riscv_vse32_v_u32m4(dst, v_dst, vl);
|
||||||
|
w -= vl;
|
||||||
|
src += vl;
|
||||||
|
dst += vl;
|
||||||
|
} while (w > 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ScaleARGBRowDown2Linear_RVV(const uint8_t* src_argb,
|
||||||
|
ptrdiff_t src_stride,
|
||||||
|
uint8_t* dst_argb,
|
||||||
|
int dst_width) {
|
||||||
|
(void)src_stride;
|
||||||
|
size_t w = (size_t)dst_width;
|
||||||
|
const uint32_t* src = (const uint32_t*)(src_argb);
|
||||||
|
// NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode
|
||||||
|
// register) is set to round-to-nearest-up mode(0).
|
||||||
|
asm volatile("csrwi vxrm, 0");
|
||||||
|
do {
|
||||||
|
vuint8m4_t v_odd, v_even, v_dst;
|
||||||
|
vuint16m8_t v_sum;
|
||||||
|
vuint32m4_t v_odd_32, v_even_32;
|
||||||
|
size_t vl = __riscv_vsetvl_e32m4(w);
|
||||||
|
__riscv_vlseg2e32_v_u32m4(&v_even_32, &v_odd_32, src, vl);
|
||||||
|
v_even = __riscv_vreinterpret_v_u32m4_u8m4(v_even_32);
|
||||||
|
v_odd = __riscv_vreinterpret_v_u32m4_u8m4(v_odd_32);
|
||||||
|
// Use round-to-nearest-up mode for averaging add
|
||||||
|
v_dst = __riscv_vaaddu_vv_u8m4(v_even, v_odd, vl * 4);
|
||||||
|
__riscv_vse8_v_u8m4(dst_argb, v_dst, vl * 4);
|
||||||
|
w -= vl;
|
||||||
|
src += vl * 2;
|
||||||
|
dst_argb += vl * 4;
|
||||||
|
} while (w > 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ScaleARGBRowDown2Box_RVV(const uint8_t* src_argb,
|
||||||
|
ptrdiff_t src_stride,
|
||||||
|
uint8_t* dst_argb,
|
||||||
|
int dst_width) {
|
||||||
|
size_t w = (size_t)dst_width;
|
||||||
|
const uint32_t* src0 = (const uint32_t*)(src_argb);
|
||||||
|
const uint32_t* src1 = (const uint32_t*)(src_argb + src_stride);
|
||||||
|
// NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode
|
||||||
|
// register) is set to round-to-nearest-up mode(0).
|
||||||
|
asm volatile("csrwi vxrm, 0");
|
||||||
|
do {
|
||||||
|
vuint8m4_t v_row0_odd, v_row0_even, v_row1_odd, v_row1_even, v_dst;
|
||||||
|
vuint16m8_t v_row0_sum, v_row1_sum, v_dst_16;
|
||||||
|
vuint32m4_t v_row0_odd_32, v_row0_even_32, v_row1_odd_32, v_row1_even_32;
|
||||||
|
size_t vl = __riscv_vsetvl_e32m4(w);
|
||||||
|
__riscv_vlseg2e32_v_u32m4(&v_row0_even_32, &v_row0_odd_32, src0, vl);
|
||||||
|
__riscv_vlseg2e32_v_u32m4(&v_row1_even_32, &v_row1_odd_32, src1, vl);
|
||||||
|
v_row0_even = __riscv_vreinterpret_v_u32m4_u8m4(v_row0_even_32);
|
||||||
|
v_row0_odd = __riscv_vreinterpret_v_u32m4_u8m4(v_row0_odd_32);
|
||||||
|
v_row1_even = __riscv_vreinterpret_v_u32m4_u8m4(v_row1_even_32);
|
||||||
|
v_row1_odd = __riscv_vreinterpret_v_u32m4_u8m4(v_row1_odd_32);
|
||||||
|
v_row0_sum = __riscv_vwaddu_vv_u16m8(v_row0_even, v_row0_odd, vl * 4);
|
||||||
|
v_row1_sum = __riscv_vwaddu_vv_u16m8(v_row1_even, v_row1_odd, vl * 4);
|
||||||
|
v_dst_16 = __riscv_vadd_vv_u16m8(v_row0_sum, v_row1_sum, vl * 4);
|
||||||
|
// Use round-to-nearest-up mode for vnclip
|
||||||
|
v_dst = __riscv_vnclipu_wx_u8m4(v_dst_16, 2, vl * 4);
|
||||||
|
__riscv_vse8_v_u8m4(dst_argb, v_dst, vl * 4);
|
||||||
|
w -= vl;
|
||||||
|
src0 += vl * 2;
|
||||||
|
src1 += vl * 2;
|
||||||
|
dst_argb += vl * 4;
|
||||||
|
} while (w > 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ScaleARGBRowDownEven_RVV(const uint8_t* src_argb,
|
||||||
|
ptrdiff_t src_stride,
|
||||||
|
int src_stepx,
|
||||||
|
uint8_t* dst_argb,
|
||||||
|
int dst_width) {
|
||||||
|
size_t w = (size_t)dst_width;
|
||||||
|
const uint32_t* src = (const uint32_t*)(src_argb);
|
||||||
|
uint32_t* dst = (uint32_t*)(dst_argb);
|
||||||
|
const int stride_byte = src_stepx * 4;
|
||||||
|
do {
|
||||||
|
vuint32m8_t v_row;
|
||||||
|
size_t vl = __riscv_vsetvl_e32m8(w);
|
||||||
|
v_row = __riscv_vlse32_v_u32m8(src, stride_byte, vl);
|
||||||
|
__riscv_vse32_v_u32m8(dst, v_row, vl);
|
||||||
|
w -= vl;
|
||||||
|
src += vl * src_stepx;
|
||||||
|
dst += vl;
|
||||||
|
} while (w > 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ScaleARGBRowDownEvenBox_RVV(const uint8_t* src_argb,
|
||||||
|
ptrdiff_t src_stride,
|
||||||
|
int src_stepx,
|
||||||
|
uint8_t* dst_argb,
|
||||||
|
int dst_width) {
|
||||||
|
size_t w = (size_t)dst_width;
|
||||||
|
const uint32_t* src0 = (const uint32_t*)(src_argb);
|
||||||
|
const uint32_t* src1 = (const uint32_t*)(src_argb + src_stride);
|
||||||
|
const int stride_byte = src_stepx * 4;
|
||||||
|
// NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode
|
||||||
|
// register) is set to round-to-nearest-up mode(0).
|
||||||
|
asm volatile("csrwi vxrm, 0");
|
||||||
|
do {
|
||||||
|
vuint8m4_t v_row0_low, v_row0_high, v_row1_low, v_row1_high, v_dst;
|
||||||
|
vuint16m8_t v_row0_sum, v_row1_sum, v_sum;
|
||||||
|
vuint32m4_t v_row0_low_32, v_row0_high_32, v_row1_low_32, v_row1_high_32;
|
||||||
|
size_t vl = __riscv_vsetvl_e32m4(w);
|
||||||
|
__riscv_vlsseg2e32_v_u32m4(&v_row0_low_32, &v_row0_high_32, src0,
|
||||||
|
stride_byte, vl);
|
||||||
|
__riscv_vlsseg2e32_v_u32m4(&v_row1_low_32, &v_row1_high_32, src1,
|
||||||
|
stride_byte, vl);
|
||||||
|
v_row0_low = __riscv_vreinterpret_v_u32m4_u8m4(v_row0_low_32);
|
||||||
|
v_row0_high = __riscv_vreinterpret_v_u32m4_u8m4(v_row0_high_32);
|
||||||
|
v_row1_low = __riscv_vreinterpret_v_u32m4_u8m4(v_row1_low_32);
|
||||||
|
v_row1_high = __riscv_vreinterpret_v_u32m4_u8m4(v_row1_high_32);
|
||||||
|
v_row0_sum = __riscv_vwaddu_vv_u16m8(v_row0_low, v_row0_high, vl * 4);
|
||||||
|
v_row1_sum = __riscv_vwaddu_vv_u16m8(v_row1_low, v_row1_high, vl * 4);
|
||||||
|
v_sum = __riscv_vadd_vv_u16m8(v_row0_sum, v_row1_sum, vl * 4);
|
||||||
|
// Use round-to-nearest-up mode for vnclip
|
||||||
|
v_dst = __riscv_vnclipu_wx_u8m4(v_sum, 2, vl * 4);
|
||||||
|
__riscv_vse8_v_u8m4(dst_argb, v_dst, vl * 4);
|
||||||
|
w -= vl;
|
||||||
|
src0 += vl * src_stepx;
|
||||||
|
src1 += vl * src_stepx;
|
||||||
|
dst_argb += vl * 4;
|
||||||
|
} while (w > 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ScaleRowDown2_RVV(const uint8_t* src_ptr,
|
||||||
|
ptrdiff_t src_stride,
|
||||||
|
uint8_t* dst,
|
||||||
|
int dst_width) {
|
||||||
|
size_t w = (size_t)dst_width;
|
||||||
|
const uint16_t* src = (const uint16_t*)src_ptr;
|
||||||
|
(void)src_stride;
|
||||||
|
do {
|
||||||
|
size_t vl = __riscv_vsetvl_e16m8(w);
|
||||||
|
vuint16m8_t v_src = __riscv_vle16_v_u16m8(src, vl);
|
||||||
|
vuint8m4_t v_dst = __riscv_vnsrl_wx_u8m4(v_src, 8, vl);
|
||||||
|
__riscv_vse8_v_u8m4(dst, v_dst, vl);
|
||||||
|
w -= vl;
|
||||||
|
src += vl;
|
||||||
|
dst += vl;
|
||||||
|
} while (w > 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ScaleRowDown2Linear_RVV(const uint8_t* src_ptr,
|
||||||
|
ptrdiff_t src_stride,
|
||||||
|
uint8_t* dst,
|
||||||
|
int dst_width) {
|
||||||
|
size_t w = (size_t)dst_width;
|
||||||
|
(void)src_stride;
|
||||||
|
// NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode
|
||||||
|
// register) is set to round-to-nearest-up mode(0).
|
||||||
|
asm volatile("csrwi vxrm, 0");
|
||||||
|
do {
|
||||||
|
vuint8m4_t v_s0, v_s1, v_dst;
|
||||||
|
size_t vl = __riscv_vsetvl_e8m4(w);
|
||||||
|
__riscv_vlseg2e8_v_u8m4(&v_s0, &v_s1, src_ptr, vl);
|
||||||
|
// Use round-to-nearest-up mode for averaging add
|
||||||
|
v_dst = __riscv_vaaddu_vv_u8m4(v_s0, v_s1, vl);
|
||||||
|
__riscv_vse8_v_u8m4(dst, v_dst, vl);
|
||||||
|
w -= vl;
|
||||||
|
src_ptr += 2 * vl;
|
||||||
|
dst += vl;
|
||||||
|
} while (w > 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ScaleRowDown2Box_RVV(const uint8_t* src_ptr,
|
||||||
|
ptrdiff_t src_stride,
|
||||||
|
uint8_t* dst,
|
||||||
|
int dst_width) {
|
||||||
|
const uint8_t* s = src_ptr;
|
||||||
|
const uint8_t* t = src_ptr + src_stride;
|
||||||
|
size_t w = (size_t)dst_width;
|
||||||
|
// NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode
|
||||||
|
// register) is set to round-to-nearest-up mode(0).
|
||||||
|
asm volatile("csrwi vxrm, 0");
|
||||||
|
do {
|
||||||
|
size_t vl = __riscv_vsetvl_e8m4(w);
|
||||||
|
vuint8m4_t v_s0, v_s1, v_t0, v_t1;
|
||||||
|
vuint16m8_t v_s01, v_t01, v_st01;
|
||||||
|
vuint8m4_t v_dst;
|
||||||
|
__riscv_vlseg2e8_v_u8m4(&v_s0, &v_s1, s, vl);
|
||||||
|
__riscv_vlseg2e8_v_u8m4(&v_t0, &v_t1, t, vl);
|
||||||
|
v_s01 = __riscv_vwaddu_vv_u16m8(v_s0, v_s1, vl);
|
||||||
|
v_t01 = __riscv_vwaddu_vv_u16m8(v_t0, v_t1, vl);
|
||||||
|
v_st01 = __riscv_vadd_vv_u16m8(v_s01, v_t01, vl);
|
||||||
|
// Use round-to-nearest-up mode for vnclip
|
||||||
|
v_dst = __riscv_vnclipu_wx_u8m4(v_st01, 2, vl);
|
||||||
|
__riscv_vse8_v_u8m4(dst, v_dst, vl);
|
||||||
|
w -= vl;
|
||||||
|
s += 2 * vl;
|
||||||
|
t += 2 * vl;
|
||||||
|
dst += vl;
|
||||||
|
} while (w > 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ScaleRowDown4_RVV(const uint8_t* src_ptr,
|
||||||
|
ptrdiff_t src_stride,
|
||||||
|
uint8_t* dst_ptr,
|
||||||
|
int dst_width) {
|
||||||
|
size_t w = (size_t)dst_width;
|
||||||
|
(void)src_stride;
|
||||||
|
do {
|
||||||
|
size_t vl = __riscv_vsetvl_e8m2(w);
|
||||||
|
vuint8m2_t v_s0, v_s1, v_s2, v_s3;
|
||||||
|
__riscv_vlseg4e8_v_u8m2(&v_s0, &v_s1, &v_s2, &v_s3, src_ptr, vl);
|
||||||
|
__riscv_vse8_v_u8m2(dst_ptr, v_s2, vl);
|
||||||
|
w -= vl;
|
||||||
|
src_ptr += (4 * vl);
|
||||||
|
dst_ptr += vl;
|
||||||
|
} while (w > 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ScaleRowDown4Box_RVV(const uint8_t* src_ptr,
|
||||||
|
ptrdiff_t src_stride,
|
||||||
|
uint8_t* dst_ptr,
|
||||||
|
int dst_width) {
|
||||||
|
const uint8_t* src_ptr1 = src_ptr + src_stride;
|
||||||
|
const uint8_t* src_ptr2 = src_ptr + src_stride * 2;
|
||||||
|
const uint8_t* src_ptr3 = src_ptr + src_stride * 3;
|
||||||
|
size_t w = (size_t)dst_width;
|
||||||
|
// NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode
|
||||||
|
// register) is set to round-to-nearest-up mode(0).
|
||||||
|
asm volatile("csrwi vxrm, 0");
|
||||||
|
do {
|
||||||
|
vuint8m2_t v_s0, v_s1, v_s2, v_s3;
|
||||||
|
vuint8m2_t v_t0, v_t1, v_t2, v_t3;
|
||||||
|
vuint8m2_t v_u0, v_u1, v_u2, v_u3;
|
||||||
|
vuint8m2_t v_v0, v_v1, v_v2, v_v3;
|
||||||
|
vuint16m4_t v_s01, v_s23, v_t01, v_t23;
|
||||||
|
vuint16m4_t v_u01, v_u23, v_v01, v_v23;
|
||||||
|
vuint16m4_t v_st01, v_st23, v_uv01, v_uv23;
|
||||||
|
vuint16m4_t v_st0123, v_uv0123, v_stuv0123;
|
||||||
|
vuint8m2_t v_dst;
|
||||||
|
size_t vl = __riscv_vsetvl_e8m2(w);
|
||||||
|
|
||||||
|
__riscv_vlseg4e8_v_u8m2(&v_s0, &v_s1, &v_s2, &v_s3, src_ptr, vl);
|
||||||
|
v_s01 = __riscv_vwaddu_vv_u16m4(v_s0, v_s1, vl);
|
||||||
|
|
||||||
|
__riscv_vlseg4e8_v_u8m2(&v_t0, &v_t1, &v_t2, &v_t3, src_ptr1, vl);
|
||||||
|
v_t01 = __riscv_vwaddu_vv_u16m4(v_t0, v_t1, vl);
|
||||||
|
|
||||||
|
__riscv_vlseg4e8_v_u8m2(&v_u0, &v_u1, &v_u2, &v_u3, src_ptr2, vl);
|
||||||
|
v_u01 = __riscv_vwaddu_vv_u16m4(v_u0, v_u1, vl);
|
||||||
|
v_u23 = __riscv_vwaddu_vv_u16m4(v_u2, v_u3, vl);
|
||||||
|
|
||||||
|
v_s23 = __riscv_vwaddu_vv_u16m4(v_s2, v_s3, vl);
|
||||||
|
v_t23 = __riscv_vwaddu_vv_u16m4(v_t2, v_t3, vl);
|
||||||
|
v_st01 = __riscv_vadd_vv_u16m4(v_s01, v_t01, vl);
|
||||||
|
v_st23 = __riscv_vadd_vv_u16m4(v_s23, v_t23, vl);
|
||||||
|
|
||||||
|
__riscv_vlseg4e8_v_u8m2(&v_v0, &v_v1, &v_v2, &v_v3, src_ptr3, vl);
|
||||||
|
|
||||||
|
v_v01 = __riscv_vwaddu_vv_u16m4(v_v0, v_v1, vl);
|
||||||
|
v_v23 = __riscv_vwaddu_vv_u16m4(v_v2, v_v3, vl);
|
||||||
|
|
||||||
|
v_uv01 = __riscv_vadd_vv_u16m4(v_u01, v_v01, vl);
|
||||||
|
v_uv23 = __riscv_vadd_vv_u16m4(v_u23, v_v23, vl);
|
||||||
|
|
||||||
|
v_st0123 = __riscv_vadd_vv_u16m4(v_st01, v_st23, vl);
|
||||||
|
v_uv0123 = __riscv_vadd_vv_u16m4(v_uv01, v_uv23, vl);
|
||||||
|
v_stuv0123 = __riscv_vadd_vv_u16m4(v_st0123, v_uv0123, vl);
|
||||||
|
// Use round-to-nearest-up mode for vnclip
|
||||||
|
v_dst = __riscv_vnclipu_wx_u8m2(v_stuv0123, 4, vl);
|
||||||
|
__riscv_vse8_v_u8m2(dst_ptr, v_dst, vl);
|
||||||
|
w -= vl;
|
||||||
|
src_ptr += 4 * vl;
|
||||||
|
src_ptr1 += 4 * vl;
|
||||||
|
src_ptr2 += 4 * vl;
|
||||||
|
src_ptr3 += 4 * vl;
|
||||||
|
dst_ptr += vl;
|
||||||
|
} while (w > 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ScaleUVRowDown2_RVV(const uint8_t* src_uv,
|
||||||
|
ptrdiff_t src_stride,
|
||||||
|
uint8_t* dst_uv,
|
||||||
|
int dst_width) {
|
||||||
|
size_t w = (size_t)dst_width;
|
||||||
|
const uint32_t* src = (const uint32_t*)src_uv;
|
||||||
|
uint16_t* dst = (uint16_t*)dst_uv;
|
||||||
|
(void)src_stride;
|
||||||
|
do {
|
||||||
|
size_t vl = __riscv_vsetvl_e32m8(w);
|
||||||
|
vuint32m8_t v_data = __riscv_vle32_v_u32m8(src, vl);
|
||||||
|
vuint16m4_t v_u1v1 = __riscv_vnsrl_wx_u16m4(v_data, 16, vl);
|
||||||
|
__riscv_vse16_v_u16m4(dst, v_u1v1, vl);
|
||||||
|
w -= vl;
|
||||||
|
src += vl;
|
||||||
|
dst += vl;
|
||||||
|
} while (w > 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ScaleUVRowDown2Linear_RVV(const uint8_t* src_uv,
|
||||||
|
ptrdiff_t src_stride,
|
||||||
|
uint8_t* dst_uv,
|
||||||
|
int dst_width) {
|
||||||
|
size_t w = (size_t)dst_width;
|
||||||
|
const uint16_t* src = (const uint16_t*)src_uv;
|
||||||
|
(void)src_stride;
|
||||||
|
// NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode
|
||||||
|
// register) is set to round-to-nearest-up mode(0).
|
||||||
|
asm volatile("csrwi vxrm, 0");
|
||||||
|
do {
|
||||||
|
vuint8m4_t v_u0v0, v_u1v1, v_avg;
|
||||||
|
vuint16m4_t v_u0v0_16, v_u1v1_16;
|
||||||
|
size_t vl = __riscv_vsetvl_e16m4(w);
|
||||||
|
vlseg2e16_v_u16m4(&v_u0v0_16, &v_u1v1_16, src, vl);
|
||||||
|
v_u0v0 = __riscv_vreinterpret_v_u16m4_u8m4(v_u0v0_16);
|
||||||
|
v_u1v1 = __riscv_vreinterpret_v_u16m4_u8m4(v_u1v1_16);
|
||||||
|
// Use round-to-nearest-up mode for averaging add
|
||||||
|
v_avg = __riscv_vaaddu_vv_u8m4(v_u0v0, v_u1v1, vl * 2);
|
||||||
|
__riscv_vse8_v_u8m4(dst_uv, v_avg, vl * 2);
|
||||||
|
w -= vl;
|
||||||
|
src += vl * 2;
|
||||||
|
dst_uv += vl * 2;
|
||||||
|
} while (w > 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ScaleUVRowDown2Box_RVV(const uint8_t* src_uv,
|
||||||
|
ptrdiff_t src_stride,
|
||||||
|
uint8_t* dst_uv,
|
||||||
|
int dst_width) {
|
||||||
|
const uint8_t* src_uv_row1 = src_uv + src_stride;
|
||||||
|
size_t w = (size_t)dst_width;
|
||||||
|
// NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode
|
||||||
|
// register) is set to round-to-nearest-up mode(0).
|
||||||
|
asm volatile("csrwi vxrm, 0");
|
||||||
|
do {
|
||||||
|
vuint8m2_t v_u0_row0, v_v0_row0, v_u1_row0, v_v1_row0;
|
||||||
|
vuint8m2_t v_u0_row1, v_v0_row1, v_u1_row1, v_v1_row1;
|
||||||
|
vuint16m4_t v_u0u1_row0, v_u0u1_row1, v_v0v1_row0, v_v0v1_row1;
|
||||||
|
vuint16m4_t v_sum0, v_sum1;
|
||||||
|
vuint8m2_t v_dst_u, v_dst_v;
|
||||||
|
size_t vl = __riscv_vsetvl_e8m2(w);
|
||||||
|
|
||||||
|
__riscv_vlseg4e8_v_u8m2(&v_u0_row0, &v_v0_row0, &v_u1_row0, &v_v1_row0,
|
||||||
|
src_uv, vl);
|
||||||
|
__riscv_vlseg4e8_v_u8m2(&v_u0_row1, &v_v0_row1, &v_u1_row1, &v_v1_row1,
|
||||||
|
src_uv_row1, vl);
|
||||||
|
|
||||||
|
v_u0u1_row0 = __riscv_vwaddu_vv_u16m4(v_u0_row0, v_u1_row0, vl);
|
||||||
|
v_u0u1_row1 = __riscv_vwaddu_vv_u16m4(v_u0_row1, v_u1_row1, vl);
|
||||||
|
v_v0v1_row0 = __riscv_vwaddu_vv_u16m4(v_v0_row0, v_v1_row0, vl);
|
||||||
|
v_v0v1_row1 = __riscv_vwaddu_vv_u16m4(v_v0_row1, v_v1_row1, vl);
|
||||||
|
|
||||||
|
v_sum0 = __riscv_vadd_vv_u16m4(v_u0u1_row0, v_u0u1_row1, vl);
|
||||||
|
v_sum1 = __riscv_vadd_vv_u16m4(v_v0v1_row0, v_v0v1_row1, vl);
|
||||||
|
// Use round-to-nearest-up mode for vnclip
|
||||||
|
v_dst_u = __riscv_vnclipu_wx_u8m2(v_sum0, 2, vl);
|
||||||
|
v_dst_v = __riscv_vnclipu_wx_u8m2(v_sum1, 2, vl);
|
||||||
|
|
||||||
|
__riscv_vsseg2e8_v_u8m2(dst_uv, v_dst_u, v_dst_v, vl);
|
||||||
|
|
||||||
|
dst_uv += 2 * vl;
|
||||||
|
src_uv += 4 * vl;
|
||||||
|
w -= vl;
|
||||||
|
src_uv_row1 += 4 * vl;
|
||||||
|
} while (w > 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ScaleUVRowDown4_RVV(const uint8_t* src_uv,
|
||||||
|
ptrdiff_t src_stride,
|
||||||
|
int src_stepx,
|
||||||
|
uint8_t* dst_uv,
|
||||||
|
int dst_width) {
|
||||||
|
// Overflow will never happen here, since sizeof(size_t)/sizeof(int)=2.
|
||||||
|
// dst_width = src_width / 4 and src_width is also int.
|
||||||
|
size_t w = (size_t)dst_width * 8;
|
||||||
|
(void)src_stride;
|
||||||
|
(void)src_stepx;
|
||||||
|
do {
|
||||||
|
size_t vl = __riscv_vsetvl_e8m8(w);
|
||||||
|
vuint8m8_t v_row = __riscv_vle8_v_u8m8(src_uv, vl);
|
||||||
|
vuint64m8_t v_row_64 = __riscv_vreinterpret_v_u8m8_u64m8(v_row);
|
||||||
|
// Narrowing without clipping
|
||||||
|
vuint32m4_t v_tmp = __riscv_vncvt_x_x_w_u32m4(v_row_64, vl / 8);
|
||||||
|
vuint16m2_t v_dst_16 = __riscv_vncvt_x_x_w_u16m2(v_tmp, vl / 8);
|
||||||
|
vuint8m2_t v_dst = __riscv_vreinterpret_v_u16m2_u8m2(v_dst_16);
|
||||||
|
__riscv_vse8_v_u8m2(dst_uv, v_dst, vl / 4);
|
||||||
|
w -= vl;
|
||||||
|
src_uv += vl;
|
||||||
|
dst_uv += vl / 4;
|
||||||
|
} while (w > 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ScaleUVRowDownEven_RVV(const uint8_t* src_uv,
|
||||||
|
ptrdiff_t src_stride,
|
||||||
|
int src_stepx,
|
||||||
|
uint8_t* dst_uv,
|
||||||
|
int dst_width) {
|
||||||
|
size_t w = (size_t)dst_width;
|
||||||
|
const ptrdiff_t stride_byte = (ptrdiff_t)src_stepx * 2;
|
||||||
|
const uint16_t* src = (const uint16_t*)(src_uv);
|
||||||
|
uint16_t* dst = (uint16_t*)(dst_uv);
|
||||||
|
(void)src_stride;
|
||||||
|
do {
|
||||||
|
size_t vl = __riscv_vsetvl_e16m8(w);
|
||||||
|
vuint16m8_t v_row = __riscv_vlse16_v_u16m8(src, stride_byte, vl);
|
||||||
|
__riscv_vse16_v_u16m8(dst, v_row, vl);
|
||||||
|
w -= vl;
|
||||||
|
src += vl * src_stepx;
|
||||||
|
dst += vl;
|
||||||
|
} while (w > 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
} // extern "C"
|
||||||
|
} // namespace libyuv
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif // !defined(LIBYUV_DISABLE_RVV) && defined(__riscv_vector)
|
||||||
@ -128,6 +128,15 @@ static void ScaleUVDown2(int src_width,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#if defined(HAS_SCALEUVROWDOWN2_RVV)
|
||||||
|
if (TestCpuFlag(kCpuHasRVV)) {
|
||||||
|
ScaleUVRowDown2 =
|
||||||
|
filtering == kFilterNone
|
||||||
|
? ScaleUVRowDown2_RVV
|
||||||
|
: (filtering == kFilterLinear ? ScaleUVRowDown2Linear_RVV
|
||||||
|
: ScaleUVRowDown2Box_RVV);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
// This code is not enabled. Only box filter is available at this time.
|
// This code is not enabled. Only box filter is available at this time.
|
||||||
#if defined(HAS_SCALEUVROWDOWN2_SSSE3)
|
#if defined(HAS_SCALEUVROWDOWN2_SSSE3)
|
||||||
@ -231,6 +240,11 @@ static void ScaleUVDown4Box(int src_width,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#if defined(HAS_SCALEUVROWDOWN2BOX_RVV)
|
||||||
|
if (TestCpuFlag(kCpuHasRVV)) {
|
||||||
|
ScaleUVRowDown2 = ScaleUVRowDown2Box_RVV;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
for (j = 0; j < dst_height; ++j) {
|
for (j = 0; j < dst_height; ++j) {
|
||||||
ScaleUVRowDown2(src_uv, src_stride, row, dst_width * 2);
|
ScaleUVRowDown2(src_uv, src_stride, row, dst_width * 2);
|
||||||
@ -310,6 +324,12 @@ static void ScaleUVDownEven(int src_width,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#if defined(HAS_SCALEUVROWDOWNEVEN_RVV)
|
||||||
|
if (TestCpuFlag(kCpuHasRVV) && !filtering) {
|
||||||
|
ScaleUVRowDownEven =
|
||||||
|
(col_step == 4) ? ScaleUVRowDown4_RVV : ScaleUVRowDownEven_RVV;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
if (filtering == kFilterLinear) {
|
if (filtering == kFilterLinear) {
|
||||||
src_stride = 0;
|
src_stride = 0;
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user