[RVV] Support AR64ToAB64 and RGBA-family color conversions

Add scalar code for AR64ToAB64, ARGBToRGBA, ARGBToBGRA, ARGBToABGR, RGBAToARGB, BGRAToARGB, and ABGRToARGB.
They are originally implemented by ARGBShffle.
This CL independetly implements them, and only enables for risc-v now.
This CL also add RVV implementation for `RGBA-family <-> RGBA-family` color conversions.

* Run on SiFive internal FPGA(VLEN=128):

Test Case	Speedup
AR64ToAB64_Opt  x4.6
ARGBToRGBA_Opt  x6
ARGBToBGRA_Opt  x6
ARGBToABGR_Opt  x6
RGBAToARGB_Opt  x6

Change-Id: Ie0630901046084aa259699fcdeccc64170d7103f
Signed-off-by: Bruce Lai <bruce.lai@sifive.com>
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/4797451
Reviewed-by: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
Bruce Lai 2023-08-18 01:50:49 -07:00 committed by Frank Barchard
parent f0921806a2
commit ec2e9ca000
5 changed files with 403 additions and 0 deletions

View File

@ -800,14 +800,18 @@ extern "C" {
#define HAS_ABGRTOYJROW_RVV
#define HAS_ABGRTOYROW_RVV
#define HAS_AR64TOARGBROW_RVV
#define HAS_AR64TOAB64ROW_RVV
#define HAS_ARGBATTENUATEROW_RVV
#define HAS_ARGBBLENDROW_RVV
#define HAS_ARGBCOPYYTOALPHAROW_RVV
#define HAS_ARGBEXTRACTALPHAROW_RVV
#define HAS_ARGBTOAB64ROW_RVV
#define HAS_ARGBTOABGRROW_RVV
#define HAS_ARGBTOAR64ROW_RVV
#define HAS_ARGBTOBGRAROW_RVV
#define HAS_ARGBTORAWROW_RVV
#define HAS_ARGBTORGB24ROW_RVV
#define HAS_ARGBTORGBAROW_RVV
#define HAS_ARGBTOYJROW_RVV
#define HAS_ARGBTOYMATRIXROW_RVV
#define HAS_ARGBTOYROW_RVV
@ -839,6 +843,7 @@ extern "C" {
#define HAS_RGB24TOARGBROW_RVV
#define HAS_RGB24TOYJROW_RVV
#define HAS_RGB24TOYROW_RVV
#define HAS_RGBATOARGBROW_RVV
#define HAS_RGBATOYJROW_RVV
#define HAS_RGBATOYMATRIXROW_RVV
#define HAS_RGBATOYROW_RVV
@ -3494,8 +3499,13 @@ void ARGBToARGB4444Row_LASX(const uint8_t* src_argb,
int width);
void ARGBToRAWRow_RVV(const uint8_t* src_argb, uint8_t* dst_raw, int width);
void ARGBToABGRRow_RVV(const uint8_t* src_argb, uint8_t* dst_abgr, int width);
void ARGBToBGRARow_RVV(const uint8_t* src_argb, uint8_t* dst_rgba, int width);
void ARGBToRGBARow_RVV(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
void ARGBToRGB24Row_RVV(const uint8_t* src_argb, uint8_t* dst_rgb24, int width);
void ARGBToABGRRow_C(const uint8_t* src_argb, uint8_t* dst_abgr, int width);
void ARGBToBGRARow_C(const uint8_t* src_argb, uint8_t* dst_bgra, int width);
void ARGBToRGBARow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
void ARGBToRGB24Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
void ARGBToRAWRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
@ -3509,6 +3519,8 @@ void ARGBToAR64Row_C(const uint8_t* src_argb, uint16_t* dst_ar64, int width);
void ARGBToAB64Row_C(const uint8_t* src_argb, uint16_t* dst_ab64, int width);
void AR64ToARGBRow_C(const uint16_t* src_ar64, uint8_t* dst_argb, int width);
void AB64ToARGBRow_C(const uint16_t* src_ab64, uint8_t* dst_argb, int width);
void AR64ToAB64Row_C(const uint16_t* src_ar64, uint16_t* dst_ab64, int width);
void RGBAToARGBRow_C(const uint8_t* src_rgba, uint8_t* dst_argb, int width);
void AR64ShuffleRow_C(const uint8_t* src_ar64,
uint8_t* dst_ar64,
const uint8_t* shuffler,
@ -3537,6 +3549,8 @@ void ARGBToAR64Row_RVV(const uint8_t* src_argb, uint16_t* dst_ar64, int width);
void ARGBToAB64Row_RVV(const uint8_t* src_argb, uint16_t* dst_ab64, int width);
void AR64ToARGBRow_RVV(const uint16_t* src_ar64, uint8_t* dst_argb, int width);
void AB64ToARGBRow_RVV(const uint16_t* src_ab64, uint8_t* dst_argb, int width);
void AR64ToAB64Row_RVV(const uint16_t* src_ar64, uint16_t* dst_ab64, int width);
void RGBAToARGBRow_RVV(const uint8_t* src_rgba, uint8_t* dst_argb, int width);
void ARGBToAR64Row_Any_SSSE3(const uint8_t* src_ptr,
uint16_t* dst_ptr,
int width);

View File

@ -3003,6 +3003,7 @@ int J400ToARGB(const uint8_t* src_y,
return 0;
}
#ifndef __riscv
// Shuffle table for converting BGRA to ARGB.
static const uvec8 kShuffleMaskBGRAToARGB = {
3u, 2u, 1u, 0u, 7u, 6u, 5u, 4u, 11u, 10u, 9u, 8u, 15u, 14u, 13u, 12u};
@ -3090,6 +3091,195 @@ int AR64ToAB64(const uint16_t* src_ar64,
return AR64Shuffle(src_ar64, src_stride_ar64, dst_ab64, dst_stride_ab64,
(const uint8_t*)&kShuffleMaskAR64ToAB64, width, height);
}
#else
// Convert BGRA to ARGB (same as ARGBToBGRA).
LIBYUV_API
int BGRAToARGB(const uint8_t* src_bgra,
int src_stride_bgra,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height) {
return ARGBToBGRA(src_bgra, src_stride_bgra, dst_argb, dst_stride_argb, width,
height);
}
// Convert ARGB to BGRA.
LIBYUV_API
int ARGBToBGRA(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_bgra,
int dst_stride_bgra,
int width,
int height) {
int y;
void (*ARGBToBGRARow)(const uint8_t* src_argb, uint8_t* dst_bgra, int width) =
ARGBToBGRARow_C;
if (!src_argb || !dst_bgra || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
// Coalesce rows.
if (src_stride_argb == width * 4 && dst_stride_bgra == width * 4) {
width *= height;
height = 1;
src_stride_argb = dst_stride_bgra = 0;
}
#if defined(HAS_ARGBTOBGRAROW_RVV)
if (TestCpuFlag(kCpuHasRVV)) {
ARGBToBGRARow = ARGBToBGRARow_RVV;
}
#endif
for (y = 0; y < height; ++y) {
ARGBToBGRARow(src_argb, dst_bgra, width);
src_argb += src_stride_argb;
dst_bgra += dst_stride_bgra;
}
return 0;
}
// Convert ARGB to ABGR.
LIBYUV_API
int ARGBToABGR(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height) {
int y;
void (*ARGBToABGRRow)(const uint8_t* src_argb, uint8_t* dst_abgr, int width) =
ARGBToABGRRow_C;
if (!src_argb || !dst_abgr || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
// Coalesce rows.
if (src_stride_argb == width * 4 && dst_stride_abgr == width * 4) {
width *= height;
height = 1;
src_stride_argb = dst_stride_abgr = 0;
}
#if defined(HAS_ARGBTOABGRROW_RVV)
if (TestCpuFlag(kCpuHasRVV)) {
ARGBToABGRRow = ARGBToABGRRow_RVV;
}
#endif
for (y = 0; y < height; ++y) {
ARGBToABGRRow(src_argb, dst_abgr, width);
src_argb += src_stride_argb;
dst_abgr += dst_stride_abgr;
}
return 0;
}
// Convert ABGR to ARGB (same as ARGBToABGR).
LIBYUV_API
int ABGRToARGB(const uint8_t* src_abgr,
int src_stride_abgr,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height) {
return ARGBToABGR(src_abgr, src_stride_abgr, dst_argb, dst_stride_argb, width,
height);
}
// Convert RGBA to ARGB.
LIBYUV_API
int RGBAToARGB(const uint8_t* src_rgba,
int src_stride_rgba,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height) {
int y;
void (*RGBAToARGBRow)(const uint8_t* src_rgba, uint8_t* dst_argb, int width) =
RGBAToARGBRow_C;
if (!src_rgba || !dst_argb || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_rgba = src_rgba + (height - 1) * src_stride_rgba;
src_stride_rgba = -src_stride_rgba;
}
// Coalesce rows.
if (src_stride_rgba == width * 4 && dst_stride_argb == width * 4) {
width *= height;
height = 1;
src_stride_rgba = dst_stride_argb = 0;
}
#if defined(HAS_RGBATOARGBROW_RVV)
if (TestCpuFlag(kCpuHasRVV)) {
RGBAToARGBRow = RGBAToARGBRow_RVV;
}
#endif
for (y = 0; y < height; ++y) {
RGBAToARGBRow(src_rgba, dst_argb, width);
src_rgba += src_stride_rgba;
dst_argb += dst_stride_argb;
}
return 0;
}
// Convert AR64 To AB64.
LIBYUV_API
int AR64ToAB64(const uint16_t* src_ar64,
int src_stride_ar64,
uint16_t* dst_ab64,
int dst_stride_ab64,
int width,
int height) {
int y;
void (*AR64ToAB64Row)(const uint16_t* src_ar64, uint16_t* dst_ab64,
int width) = AR64ToAB64Row_C;
if (!src_ar64 || !dst_ab64 || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_ar64 = src_ar64 + (height - 1) * src_stride_ar64;
src_stride_ar64 = -src_stride_ar64;
}
// Coalesce rows.
if (src_stride_ar64 == width * 4 && dst_stride_ab64 == width * 4) {
width *= height;
height = 1;
src_stride_ar64 = dst_stride_ab64 = 0;
}
#if defined(HAS_AR64TOAB64ROW_RVV)
if (TestCpuFlag(kCpuHasRVV)) {
AR64ToAB64Row = AR64ToAB64Row_RVV;
}
#endif
for (y = 0; y < height; ++y) {
AR64ToAB64Row(src_ar64, dst_ab64, width);
src_ar64 += src_stride_ar64;
dst_ab64 += dst_stride_ab64;
}
return 0;
}
#endif
// Convert RGB24 to ARGB.
LIBYUV_API

View File

@ -1527,6 +1527,7 @@ int ARGBToI400(const uint8_t* src_argb,
return 0;
}
#ifndef __riscv
// Shuffle table for converting ARGB to RGBA.
static const uvec8 kShuffleMaskARGBToRGBA = {
3u, 0u, 1u, 2u, 7u, 4u, 5u, 6u, 11u, 8u, 9u, 10u, 15u, 12u, 13u, 14u};
@ -1542,6 +1543,47 @@ int ARGBToRGBA(const uint8_t* src_argb,
return ARGBShuffle(src_argb, src_stride_argb, dst_rgba, dst_stride_rgba,
(const uint8_t*)(&kShuffleMaskARGBToRGBA), width, height);
}
#else
// Convert ARGB to RGBA.
LIBYUV_API
int ARGBToRGBA(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_rgba,
int dst_stride_rgba,
int width,
int height) {
int y;
void (*ARGBToRGBARow)(const uint8_t* src_argb, uint8_t* dst_rgba, int width) =
ARGBToRGBARow_C;
if (!src_argb || !dst_rgba || width <= 0 || height == 0) {
return -1;
}
if (height < 0) {
height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
// Coalesce rows.
if (src_stride_argb == width * 4 && dst_stride_rgba == width * 4) {
width *= height;
height = 1;
src_stride_argb = dst_stride_rgba = 0;
}
#if defined(HAS_ARGBTORGBAROW_RVV)
if (TestCpuFlag(kCpuHasRVV)) {
ARGBToRGBARow = ARGBToRGBARow_RVV;
}
#endif
for (y = 0; y < height; ++y) {
ARGBToRGBARow(src_argb, dst_rgba, width);
src_argb += src_stride_argb;
dst_rgba += dst_stride_rgba;
}
return 0;
}
#endif
// Convert ARGB To RGB24.
LIBYUV_API

View File

@ -281,6 +281,54 @@ void AR30ToAB30Row_C(const uint8_t* src_ar30, uint8_t* dst_ab30, int width) {
}
}
void ARGBToABGRRow_C(const uint8_t* src_argb, uint8_t* dst_abgr, int width) {
int x;
for (x = 0; x < width; ++x) {
uint8_t b = src_argb[0];
uint8_t g = src_argb[1];
uint8_t r = src_argb[2];
uint8_t a = src_argb[3];
dst_abgr[0] = r;
dst_abgr[1] = g;
dst_abgr[2] = b;
dst_abgr[3] = a;
dst_abgr += 4;
src_argb += 4;
}
}
void ARGBToBGRARow_C(const uint8_t* src_argb, uint8_t* dst_bgra, int width) {
int x;
for (x = 0; x < width; ++x) {
uint8_t b = src_argb[0];
uint8_t g = src_argb[1];
uint8_t r = src_argb[2];
uint8_t a = src_argb[3];
dst_bgra[0] = a;
dst_bgra[1] = r;
dst_bgra[2] = g;
dst_bgra[3] = b;
dst_bgra += 4;
src_argb += 4;
}
}
void ARGBToRGBARow_C(const uint8_t* src_argb, uint8_t* dst_rgba, int width) {
int x;
for (x = 0; x < width; ++x) {
uint8_t b = src_argb[0];
uint8_t g = src_argb[1];
uint8_t r = src_argb[2];
uint8_t a = src_argb[3];
dst_rgba[0] = a;
dst_rgba[1] = b;
dst_rgba[2] = g;
dst_rgba[3] = r;
dst_rgba += 4;
src_argb += 4;
}
}
void ARGBToRGB24Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
int x;
for (x = 0; x < width; ++x) {
@ -309,6 +357,22 @@ void ARGBToRAWRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
}
}
void RGBAToARGBRow_C(const uint8_t* src_rgba, uint8_t* dst_argb, int width) {
int x;
for (x = 0; x < width; ++x) {
uint8_t a = src_rgba[0];
uint8_t b = src_rgba[1];
uint8_t g = src_rgba[2];
uint8_t r = src_rgba[3];
dst_argb[0] = b;
dst_argb[1] = g;
dst_argb[2] = r;
dst_argb[3] = a;
dst_argb += 4;
src_rgba += 4;
}
}
void ARGBToRGB565Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
@ -517,6 +581,22 @@ void AB64ToARGBRow_C(const uint16_t* src_ab64, uint8_t* dst_argb, int width) {
}
}
void AR64ToAB64Row_C(const uint16_t* src_ar64, uint16_t* dst_ab64, int width) {
int x;
for (x = 0; x < width; ++x) {
uint16_t b = src_ar64[0];
uint16_t g = src_ar64[1];
uint16_t r = src_ar64[2];
uint16_t a = src_ar64[3];
dst_ab64[0] = r;
dst_ab64[1] = g;
dst_ab64[2] = b;
dst_ab64[3] = a;
dst_ab64 += 4;
src_ar64 += 4;
}
}
// TODO(fbarchard): Make shuffle compatible with SIMD versions
void AR64ShuffleRow_C(const uint8_t* src_ar64,
uint8_t* dst_ar64,

View File

@ -200,6 +200,23 @@ void AR64ToARGBRow_RVV(const uint16_t* src_ar64, uint8_t* dst_argb, int width) {
}
#endif
#ifdef HAS_AR64TOAB64ROW_RVV
void AR64ToAB64Row_RVV(const uint16_t* src_ar64,
uint16_t* dst_ab64,
int width) {
size_t w = (size_t)width;
do {
size_t vl = __riscv_vsetvl_e16m2(w);
vuint16m2_t v_b, v_g, v_r, v_a;
__riscv_vlseg4e16_v_u16m2(&v_b, &v_g, &v_r, &v_a, src_ar64, vl);
__riscv_vsseg4e16_v_u16m2(dst_ab64, v_r, v_g, v_b, v_a, vl);
w -= vl;
src_ar64 += vl * 4;
dst_ab64 += vl * 4;
} while (w > 0);
}
#endif
#ifdef HAS_AB64TOARGBROW_RVV
void AB64ToARGBRow_RVV(const uint16_t* src_ab64, uint8_t* dst_argb, int width) {
size_t avl = (size_t)width;
@ -301,6 +318,66 @@ void ARGBToRGB24Row_RVV(const uint8_t* src_argb,
}
#endif
#ifdef HAS_ARGBTOABGRROW_RVV
void ARGBToABGRRow_RVV(const uint8_t* src_argb, uint8_t* dst_abgr, int width) {
size_t w = (size_t)width;
do {
size_t vl = __riscv_vsetvl_e8m2(w);
vuint8m2_t v_a, v_r, v_g, v_b;
__riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl);
__riscv_vsseg4e8_v_u8m2(dst_abgr, v_r, v_g, v_b, v_a, vl);
w -= vl;
src_argb += vl * 4;
dst_abgr += vl * 4;
} while (w > 0);
}
#endif
#ifdef HAS_ARGBTOBGRAROW_RVV
void ARGBToBGRARow_RVV(const uint8_t* src_argb, uint8_t* dst_bgra, int width) {
size_t w = (size_t)width;
do {
size_t vl = __riscv_vsetvl_e8m2(w);
vuint8m2_t v_a, v_r, v_g, v_b;
__riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl);
__riscv_vsseg4e8_v_u8m2(dst_bgra, v_a, v_r, v_g, v_b, vl);
w -= vl;
src_argb += vl * 4;
dst_bgra += vl * 4;
} while (w > 0);
}
#endif
#ifdef HAS_ARGBTORGBAROW_RVV
void ARGBToRGBARow_RVV(const uint8_t* src_argb, uint8_t* dst_rgba, int width) {
size_t w = (size_t)width;
do {
size_t vl = __riscv_vsetvl_e8m2(w);
vuint8m2_t v_a, v_r, v_g, v_b;
__riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl);
__riscv_vsseg4e8_v_u8m2(dst_rgba, v_a, v_b, v_g, v_r, vl);
w -= vl;
src_argb += vl * 4;
dst_rgba += vl * 4;
} while (w > 0);
}
#endif
#ifdef HAS_RGBATOARGBROW_RVV
void RGBAToARGBRow_RVV(const uint8_t* src_rgba, uint8_t* dst_argb, int width) {
size_t w = (size_t)width;
do {
size_t vl = __riscv_vsetvl_e8m2(w);
vuint8m2_t v_a, v_r, v_g, v_b;
__riscv_vlseg4e8_v_u8m2(&v_a, &v_b, &v_g, &v_r, src_rgba, vl);
__riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl);
w -= vl;
src_rgba += vl * 4;
dst_argb += vl * 4;
} while (w > 0);
}
#endif
#ifdef HAS_RGB24TOARGBROW_RVV
void RGB24ToARGBRow_RVV(const uint8_t* src_rgb24,
uint8_t* dst_argb,