mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 17:26:49 +08:00
Add MSA optimized ARGBToRGB24Row_MSA and ARGBToRAWRow_MSA functions
R=fbarchard@google.com BUG=libyuv:634 Review URL: https://codereview.chromium.org/2487913004 .
This commit is contained in:
parent
97fb18b846
commit
b1504a8e48
@ -384,6 +384,8 @@ extern "C" {
|
||||
#define HAS_I422TORGBAROW_MSA
|
||||
#define HAS_I422ALPHATOARGBROW_MSA
|
||||
#define HAS_I422TORGB24ROW_MSA
|
||||
#define HAS_ARGBTORGB24ROW_MSA
|
||||
#define HAS_ARGBTORAWROW_MSA
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER) && !defined(__CLR_VER) && !defined(__clang__)
|
||||
@ -1304,6 +1306,8 @@ void ARGBToRGB565DitherRow_NEON(const uint8* src_argb,
|
||||
uint8* dst_rgb,
|
||||
const uint32 dither4,
|
||||
int width);
|
||||
void ARGBToRGB24Row_MSA(const uint8* src_argb, uint8* dst_rgb, int width);
|
||||
void ARGBToRAWRow_MSA(const uint8* src_argb, uint8* dst_rgb, int width);
|
||||
|
||||
void ARGBToRGBARow_C(const uint8* src_argb, uint8* dst_rgb, int width);
|
||||
void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int width);
|
||||
@ -1892,6 +1896,8 @@ void ARGBToRGB565DitherRow_Any_NEON(const uint8* src_argb,
|
||||
uint8* dst_rgb,
|
||||
const uint32 dither4,
|
||||
int width);
|
||||
void ARGBToRGB24Row_Any_MSA(const uint8* src_argb, uint8* dst_rgb, int width);
|
||||
void ARGBToRAWRow_Any_MSA(const uint8* src_argb, uint8* dst_rgb, int width);
|
||||
|
||||
void I444ToARGBRow_Any_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
|
||||
@ -841,6 +841,14 @@ int ARGBToRGB24(const uint8* src_argb,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTORGB24ROW_MSA)
|
||||
if (TestCpuFlag(kCpuHasMSA)) {
|
||||
ARGBToRGB24Row = ARGBToRGB24Row_Any_MSA;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToRGB24Row = ARGBToRGB24Row_MSA;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
ARGBToRGB24Row(src_argb, dst_rgb24, width);
|
||||
@ -891,6 +899,14 @@ int ARGBToRAW(const uint8* src_argb,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTORAWROW_MSA)
|
||||
if (TestCpuFlag(kCpuHasMSA)) {
|
||||
ARGBToRAWRow = ARGBToRAWRow_Any_MSA;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToRAWRow = ARGBToRAWRow_MSA;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
ARGBToRAWRow(src_argb, dst_raw, width);
|
||||
|
||||
@ -380,6 +380,10 @@ ANY11(ARGBToARGB4444Row_Any_NEON, ARGBToARGB4444Row_NEON, 0, 4, 2, 7)
|
||||
ANY11(J400ToARGBRow_Any_NEON, J400ToARGBRow_NEON, 0, 1, 4, 7)
|
||||
ANY11(I400ToARGBRow_Any_NEON, I400ToARGBRow_NEON, 0, 1, 4, 7)
|
||||
#endif
|
||||
#if defined(HAS_ARGBTORGB24ROW_MSA)
|
||||
ANY11(ARGBToRGB24Row_Any_MSA, ARGBToRGB24Row_MSA, 0, 4, 3, 15)
|
||||
ANY11(ARGBToRAWRow_Any_MSA, ARGBToRAWRow_MSA, 0, 4, 3, 15)
|
||||
#endif
|
||||
#if defined(HAS_RAWTORGB24ROW_NEON)
|
||||
ANY11(RAWToRGB24Row_Any_NEON, RAWToRGB24Row_NEON, 0, 3, 3, 7)
|
||||
#endif
|
||||
|
||||
@ -229,12 +229,12 @@ void I422ToARGBRow_MSA(const uint8* src_y,
|
||||
}
|
||||
}
|
||||
|
||||
void YUVTORGBARow_MSA(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
void I422ToRGBARow_MSA(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
int x;
|
||||
v16u8 src0, src1, src2;
|
||||
v8i16 vec0, vec1, vec2;
|
||||
@ -289,12 +289,12 @@ void I422AlphaToARGBRow_MSA(const uint8* src_y,
|
||||
}
|
||||
}
|
||||
|
||||
void YUVTORGB24Row_MSA(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int32 width) {
|
||||
void I422ToRGB24Row_MSA(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int32 width) {
|
||||
int x;
|
||||
int64 data_u, data_v;
|
||||
v16u8 src0, src1, src2, src3, src4, src5, dst0, dst1, dst2;
|
||||
@ -340,12 +340,12 @@ void YUVTORGB24Row_MSA(const uint8* src_y,
|
||||
}
|
||||
|
||||
// TODO(fbarchard): Consider AND instead of shift to isolate 5 upper bits of R.
|
||||
void YUVTORGB565Row_MSA(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_rgb565,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
void I422ToRGB565Row_MSA(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_rgb565,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
int x;
|
||||
v16u8 src0, src1, src2, dst0;
|
||||
v8i16 vec0, vec1, vec2;
|
||||
@ -740,6 +740,54 @@ void ARGBToUVRow_MSA(const uint8* src_argb0,
|
||||
}
|
||||
}
|
||||
|
||||
void ARGBToRGB24Row_MSA(const uint8* src_argb, uint8* dst_rgb, int width) {
|
||||
int x;
|
||||
v16u8 src0, src1, src2, src3, dst0, dst1, dst2;
|
||||
v16i8 shuffler0 = {0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 16, 17, 18, 20};
|
||||
v16i8 shuffler1 = {5, 6, 8, 9, 10, 12, 13, 14,
|
||||
16, 17, 18, 20, 21, 22, 24, 25};
|
||||
v16i8 shuffler2 = {10, 12, 13, 14, 16, 17, 18, 20,
|
||||
21, 22, 24, 25, 26, 28, 29, 30};
|
||||
|
||||
for (x = 0; x < width; x += 16) {
|
||||
src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0);
|
||||
src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16);
|
||||
src2 = (v16u8)__msa_ld_b((v16i8*)src_argb, 32);
|
||||
src3 = (v16u8)__msa_ld_b((v16i8*)src_argb, 48);
|
||||
dst0 = (v16u8)__msa_vshf_b(shuffler0, (v16i8)src1, (v16i8)src0);
|
||||
dst1 = (v16u8)__msa_vshf_b(shuffler1, (v16i8)src2, (v16i8)src1);
|
||||
dst2 = (v16u8)__msa_vshf_b(shuffler2, (v16i8)src3, (v16i8)src2);
|
||||
ST_UB2(dst0, dst1, dst_rgb, 16);
|
||||
ST_UB(dst2, (dst_rgb + 32));
|
||||
src_argb += 64;
|
||||
dst_rgb += 48;
|
||||
}
|
||||
}
|
||||
|
||||
void ARGBToRAWRow_MSA(const uint8* src_argb, uint8* dst_rgb, int width) {
|
||||
int x;
|
||||
v16u8 src0, src1, src2, src3, dst0, dst1, dst2;
|
||||
v16i8 shuffler0 = {2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, 18, 17, 16, 22};
|
||||
v16i8 shuffler1 = {5, 4, 10, 9, 8, 14, 13, 12,
|
||||
18, 17, 16, 22, 21, 20, 26, 25};
|
||||
v16i8 shuffler2 = {8, 14, 13, 12, 18, 17, 16, 22,
|
||||
21, 20, 26, 25, 24, 30, 29, 28};
|
||||
|
||||
for (x = 0; x < width; x += 16) {
|
||||
src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0);
|
||||
src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16);
|
||||
src2 = (v16u8)__msa_ld_b((v16i8*)src_argb, 32);
|
||||
src3 = (v16u8)__msa_ld_b((v16i8*)src_argb, 48);
|
||||
dst0 = (v16u8)__msa_vshf_b(shuffler0, (v16i8)src1, (v16i8)src0);
|
||||
dst1 = (v16u8)__msa_vshf_b(shuffler1, (v16i8)src2, (v16i8)src1);
|
||||
dst2 = (v16u8)__msa_vshf_b(shuffler2, (v16i8)src3, (v16i8)src2);
|
||||
ST_UB2(dst0, dst1, dst_rgb, 16);
|
||||
ST_UB(dst2, (dst_rgb + 32));
|
||||
src_argb += 64;
|
||||
dst_rgb += 48;
|
||||
}
|
||||
}
|
||||
|
||||
void ARGB4444ToARGBRow_MSA(const uint8* src_argb4444,
|
||||
uint8* dst_argb,
|
||||
int width) {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user