mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
ARGBToJ444 and RAWToJ444 NEON
- Pass JPEG matrix to ARGBToUV444MatrixRow_NEON - Remove NEON unsigned constants in favor of DOTPROD signed constants Samsung S23: Was C for UV ARGBToJ444_Opt (320 ms) RAWToJ444_Opt (411 ms) Now I8MM ARGBToJ444_Opt (196 ms) RAWToJ444_Opt (301 ms) NEON ARGBToJ444_Opt (505 ms) RAWToJ444_Opt (596 ms) 32 bit ARM NEON ARGBToJ444_Opt (1135 ms) RAWToJ444_Opt (1546 ms) Profile of RAWToJ444 37.72% ARGBToUVJ444Row_NEON_I8MM 34.48% RAWToARGBRow_NEON 14.65% ARGBToYJRow_NEON_DotProd Bug: 390247964 Change-Id: Ia26240bee974a0baf502548f2fc896b193c3006c Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/6220890 Reviewed-by: Ben Weiss <bweiss@google.com>
This commit is contained in:
parent
1c938f342d
commit
96f98f6915
@ -395,6 +395,7 @@ extern "C" {
|
||||
#define HAS_ARGBTORGB565DITHERROW_NEON
|
||||
#define HAS_ARGBTORGB565ROW_NEON
|
||||
#define HAS_ARGBTOUV444ROW_NEON
|
||||
#define HAS_ARGBTOUVJ444ROW_NEON
|
||||
#define HAS_ARGBTOUVJROW_NEON
|
||||
#define HAS_ARGBTOUVROW_NEON
|
||||
#define HAS_ARGBTOYJROW_NEON
|
||||
@ -537,6 +538,7 @@ extern "C" {
|
||||
|
||||
#define HAS_ARGBCOLORMATRIXROW_NEON_I8MM
|
||||
#define HAS_ARGBTOUV444ROW_NEON_I8MM
|
||||
#define HAS_ARGBTOUVJ444ROW_NEON_I8MM
|
||||
#endif
|
||||
|
||||
// The following are available on AArch64 SVE platforms:
|
||||
@ -1858,6 +1860,14 @@ void ARGBToUV444Row_NEON_I8MM(const uint8_t* src_argb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void ARGBToUVJ444Row_NEON(const uint8_t* src_argb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void ARGBToUVJ444Row_NEON_I8MM(const uint8_t* src_argb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void ARGBToUVRow_NEON(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_u,
|
||||
@ -2369,6 +2379,14 @@ void ARGBToUV444Row_Any_NEON_I8MM(const uint8_t* src_ptr,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void ARGBToUVJ444Row_Any_NEON(const uint8_t* src_ptr,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void ARGBToUVJ444Row_Any_NEON_I8MM(const uint8_t* src_ptr,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void ARGBToUVRow_Any_NEON(const uint8_t* src_ptr,
|
||||
int src_stride,
|
||||
uint8_t* dst_u,
|
||||
|
||||
@ -2155,11 +2155,13 @@ ANY12(UYVYToUV422Row_Any_SSE2, UYVYToUV422Row_SSE2, 1, 4, 1, 15)
|
||||
#endif
|
||||
#ifdef HAS_YUY2TOUV422ROW_NEON
|
||||
ANY12(ARGBToUV444Row_Any_NEON, ARGBToUV444Row_NEON, 0, 4, 0, 7)
|
||||
ANY12(ARGBToUVJ444Row_Any_NEON, ARGBToUVJ444Row_NEON, 0, 4, 0, 7)
|
||||
ANY12(YUY2ToUV422Row_Any_NEON, YUY2ToUV422Row_NEON, 1, 4, 1, 15)
|
||||
ANY12(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON, 1, 4, 1, 15)
|
||||
#endif
|
||||
#ifdef HAS_ARGBTOUV444ROW_NEON_I8MM
|
||||
ANY12(ARGBToUV444Row_Any_NEON_I8MM, ARGBToUV444Row_NEON_I8MM, 0, 4, 0, 7)
|
||||
ANY12(ARGBToUVJ444Row_Any_NEON_I8MM, ARGBToUVJ444Row_NEON_I8MM, 0, 4, 0, 7)
|
||||
#endif
|
||||
#ifdef HAS_YUY2TOUV422ROW_MSA
|
||||
ANY12(ARGBToUV444Row_Any_MSA, ARGBToUV444Row_MSA, 0, 4, 0, 15)
|
||||
|
||||
@ -1822,8 +1822,8 @@ void ARGBExtractAlphaRow_NEON(const uint8_t* src_argb,
|
||||
}
|
||||
|
||||
struct RgbUVConstants {
|
||||
uint8_t kRGBToU[4];
|
||||
uint8_t kRGBToV[4];
|
||||
int8_t kRGBToU[4];
|
||||
int8_t kRGBToV[4];
|
||||
};
|
||||
|
||||
// 8x1 pixels.
|
||||
@ -1847,12 +1847,12 @@ static void ARGBToUV444MatrixRow_NEON(
|
||||
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels.
|
||||
"subs %3, %3, #8 \n" // 8 processed per loop.
|
||||
"vmull.u8 q2, d0, d24 \n" // B
|
||||
"vmlsl.u8 q2, d1, d25 \n" // G
|
||||
"vmlsl.u8 q2, d2, d26 \n" // R
|
||||
"vmlal.u8 q2, d1, d25 \n" // G
|
||||
"vmlal.u8 q2, d2, d26 \n" // R
|
||||
|
||||
"vmull.u8 q3, d2, d24 \n" // R
|
||||
"vmlsl.u8 q3, d1, d28 \n" // G
|
||||
"vmlsl.u8 q3, d0, d27 \n" // B
|
||||
"vmlal.u8 q3, d1, d28 \n" // G
|
||||
"vmlal.u8 q3, d0, d27 \n" // B
|
||||
|
||||
"vaddhn.u16 d0, q2, q15 \n" // +128 -> unsigned
|
||||
"vaddhn.u16 d1, q3, q15 \n" // +128 -> unsigned
|
||||
@ -1871,14 +1871,14 @@ static void ARGBToUV444MatrixRow_NEON(
|
||||
|
||||
// RGB to bt601 coefficients
|
||||
// UB 0.875 coefficient = 112
|
||||
// UG -0.5781 coefficient = 74
|
||||
// UR -0.2969 coefficient = 38
|
||||
// VB -0.1406 coefficient = 18
|
||||
// VG -0.7344 coefficient = 94
|
||||
// VR 0.875 coefficient = 112 (ignored)
|
||||
// UG -0.5781 coefficient = -74
|
||||
// UR -0.2969 coefficient = -38
|
||||
// VB -0.1406 coefficient = -18
|
||||
// VG -0.7344 coefficient = -94
|
||||
// VR 0.875 coefficient = 112
|
||||
|
||||
static const struct RgbUVConstants kRgb24I601UVConstants = {{112, 74, 38, 0},
|
||||
{18, 94, 112, 0}};
|
||||
static const struct RgbUVConstants kRgb24I601UVConstants = {{112, -74, -38, 0},
|
||||
{-18, -94, 112, 0}};
|
||||
|
||||
void ARGBToUV444Row_NEON(const uint8_t* src_argb,
|
||||
uint8_t* dst_u,
|
||||
@ -1888,6 +1888,26 @@ void ARGBToUV444Row_NEON(const uint8_t* src_argb,
|
||||
&kRgb24I601UVConstants);
|
||||
}
|
||||
|
||||
// RGB to JPEG coefficients
|
||||
// UB 0.500 coefficient = 127
|
||||
// UG -0.33126 coefficient = -84
|
||||
// UR -0.16874 coefficient = -43
|
||||
// VB -0.08131 coefficient = -20
|
||||
// VG -0.41869 coefficient = -107
|
||||
// VR 0.500 coefficient = 127
|
||||
|
||||
static const struct RgbUVConstants kRgb24JPEGUVConstants = {
|
||||
{127, -84, -43, 0},
|
||||
{-20, -107, 127, 0}};
|
||||
|
||||
void ARGBToUVJ444Row_NEON(const uint8_t* src_argb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width) {
|
||||
ARGBToUV444MatrixRow_NEON(src_argb, dst_u, dst_v, width,
|
||||
&kRgb24JPEGUVConstants);
|
||||
}
|
||||
|
||||
// clang-format off
|
||||
// 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16.
|
||||
#define RGBTOUV(QB, QG, QR) \
|
||||
|
||||
@ -2710,12 +2710,7 @@ void ARGBExtractAlphaRow_NEON(const uint8_t* src_argb,
|
||||
);
|
||||
}
|
||||
|
||||
struct RgbUVConstantsU8 {
|
||||
uint8_t kRGBToU[4];
|
||||
uint8_t kRGBToV[4];
|
||||
};
|
||||
|
||||
struct RgbUVConstantsI8 {
|
||||
struct RgbUVConstants {
|
||||
int8_t kRGBToU[4];
|
||||
int8_t kRGBToV[4];
|
||||
};
|
||||
@ -2726,7 +2721,7 @@ static void ARGBToUV444MatrixRow_NEON(
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width,
|
||||
const struct RgbUVConstantsU8* rgbuvconstants) {
|
||||
const struct RgbUVConstants* rgbuvconstants) {
|
||||
asm volatile(
|
||||
"ldr d0, [%4] \n" // load rgbuvconstants
|
||||
"dup v24.16b, v0.b[0] \n" // UB 0.875 coefficient
|
||||
@ -2740,13 +2735,13 @@ static void ARGBToUV444MatrixRow_NEON(
|
||||
"ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB
|
||||
"subs %w3, %w3, #8 \n" // 8 processed per loop.
|
||||
"umull v4.8h, v0.8b, v24.8b \n" // B
|
||||
"umlsl v4.8h, v1.8b, v25.8b \n" // G
|
||||
"umlsl v4.8h, v2.8b, v26.8b \n" // R
|
||||
"umlal v4.8h, v1.8b, v25.8b \n" // G
|
||||
"umlal v4.8h, v2.8b, v26.8b \n" // R
|
||||
"prfm pldl1keep, [%0, 448] \n"
|
||||
|
||||
"umull v3.8h, v2.8b, v24.8b \n" // R
|
||||
"umlsl v3.8h, v1.8b, v28.8b \n" // G
|
||||
"umlsl v3.8h, v0.8b, v27.8b \n" // B
|
||||
"umlal v3.8h, v1.8b, v28.8b \n" // G
|
||||
"umlal v3.8h, v0.8b, v27.8b \n" // B
|
||||
|
||||
"addhn v0.8b, v4.8h, v29.8h \n" // +128 -> unsigned
|
||||
"addhn v1.8b, v3.8h, v29.8h \n" // +128 -> unsigned
|
||||
@ -2768,7 +2763,7 @@ static void ARGBToUV444MatrixRow_NEON_I8MM(
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width,
|
||||
const struct RgbUVConstantsI8* rgbuvconstants) {
|
||||
const struct RgbUVConstants* rgbuvconstants) {
|
||||
asm("ld2r {v16.4s, v17.4s}, [%[rgbuvconstants]] \n"
|
||||
"movi v29.16b, #0x80 \n" // 128.5
|
||||
"1: \n"
|
||||
@ -2801,25 +2796,21 @@ static void ARGBToUV444MatrixRow_NEON_I8MM(
|
||||
|
||||
// RGB to bt601 coefficients
|
||||
// UB 0.875 coefficient = 112
|
||||
// UG -0.5781 coefficient = 74
|
||||
// UR -0.2969 coefficient = 38
|
||||
// VB -0.1406 coefficient = 18
|
||||
// VG -0.7344 coefficient = 94
|
||||
// VR 0.875 coefficient = 112 (ignored)
|
||||
// UG -0.5781 coefficient = -74
|
||||
// UR -0.2969 coefficient = -38
|
||||
// VB -0.1406 coefficient = -18
|
||||
// VG -0.7344 coefficient = -94
|
||||
// VR 0.875 coefficient = 112
|
||||
|
||||
static const struct RgbUVConstantsU8 kRgb24I601UVConstantsU8 = {
|
||||
{112, 74, 38, 0},
|
||||
{18, 94, 112, 0}};
|
||||
static const struct RgbUVConstantsI8 kRgb24I601UVConstantsI8 = {
|
||||
{112, -74, -38, 0},
|
||||
{-18, -94, 112, 0}};
|
||||
static const struct RgbUVConstants kRgb24I601UVConstants = {{112, -74, -38, 0},
|
||||
{-18, -94, 112, 0}};
|
||||
|
||||
void ARGBToUV444Row_NEON(const uint8_t* src_argb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width) {
|
||||
ARGBToUV444MatrixRow_NEON(src_argb, dst_u, dst_v, width,
|
||||
&kRgb24I601UVConstantsU8);
|
||||
&kRgb24I601UVConstants);
|
||||
}
|
||||
|
||||
void ARGBToUV444Row_NEON_I8MM(const uint8_t* src_argb,
|
||||
@ -2827,7 +2818,35 @@ void ARGBToUV444Row_NEON_I8MM(const uint8_t* src_argb,
|
||||
uint8_t* dst_v,
|
||||
int width) {
|
||||
ARGBToUV444MatrixRow_NEON_I8MM(src_argb, dst_u, dst_v, width,
|
||||
&kRgb24I601UVConstantsI8);
|
||||
&kRgb24I601UVConstants);
|
||||
}
|
||||
|
||||
// RGB to JPEG coefficients
|
||||
// UB 0.500 coefficient = 127
|
||||
// UG -0.33126 coefficient = -84
|
||||
// UR -0.16874 coefficient = -43
|
||||
// VB -0.08131 coefficient = -20
|
||||
// VG -0.41869 coefficient = -107
|
||||
// VR 0.500 coefficient = 127
|
||||
|
||||
static const struct RgbUVConstants kRgb24JPEGUVConstants = {
|
||||
{127, -84, -43, 0},
|
||||
{-20, -107, 127, 0}};
|
||||
|
||||
void ARGBToUVJ444Row_NEON(const uint8_t* src_argb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width) {
|
||||
ARGBToUV444MatrixRow_NEON(src_argb, dst_u, dst_v, width,
|
||||
&kRgb24JPEGUVConstants);
|
||||
}
|
||||
|
||||
void ARGBToUVJ444Row_NEON_I8MM(const uint8_t* src_argb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width) {
|
||||
ARGBToUV444MatrixRow_NEON_I8MM(src_argb, dst_u, dst_v, width,
|
||||
&kRgb24JPEGUVConstants);
|
||||
}
|
||||
|
||||
#define RGBTOUV_SETUP_REG \
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user