mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 08:46:47 +08:00
UV subsample on ARM use rounding average of 4 pixels
Performance on Samsung S22 Exynos (SVE2+I8MM+DOTPROD+Neon) AArch64 ARGBToI400_Opt (168 ms) ARGBToJ400_Opt (103 ms) ABGRToJ400_Opt (81 ms) RGBAToJ400_Opt (82 ms) RGB24ToJ400_Opt (176 ms) RAWToJ400_Opt (176 ms) ABGRToI420_Opt (258 ms) ARGBToI420_Opt (259 ms) ARGBToI422_Opt (403 ms) ARGBToI444_Opt (213 ms) ARGBToJ420_Opt (257 ms) ARGBToJ422_Opt (403 ms) ARGBToJ444_Opt (214 ms) ABGRToJ420_Opt (255 ms) ABGRToJ422_Opt (399 ms) ARGB4444ToI420_Opt (285 ms) RGB565ToI420_Opt (316 ms) ARGB1555ToI420_Opt (324 ms) BGRAToI420_Opt (260 ms) RAWToI420_Opt (303 ms) RAWToI444_Opt (303 ms) RAWToJ420_Opt (335 ms) RAWToJ444_Opt (308 ms) RGB24ToI420_Opt (372 ms) RGB24ToJ420_Opt (365 ms) RGBAToI420_Opt (259 ms) AArch32 (Neon) ARGBToI400_Opt (496 ms) ARGBToJ400_Opt (478 ms) ABGRToJ400_Opt (483 ms) RGBAToJ400_Opt (493 ms) RGB24ToJ400_Opt (343 ms) RAWToJ400_Opt (341 ms) ABGRToI420_Opt (993 ms) ARGBToI420_Opt (992 ms) ARGBToI422_Opt (1503 ms) ARGBToI444_Opt (1257 ms) ARGBToJ420_Opt (1006 ms) ARGBToJ422_Opt (1521 ms) ARGBToJ444_Opt (1267 ms) ABGRToJ420_Opt (1002 ms) ABGRToJ422_Opt (1504 ms) ARGB4444ToI420_Opt (1180 ms) RGB565ToI420_Opt (1112 ms) ARGB1555ToI420_Opt (1115 ms) BGRAToI420_Opt (993 ms) RAWToI420_Opt (703 ms) RAWToI444_Opt (1717 ms) RAWToJ420_Opt (704 ms) RAWToJ444_Opt (1739 ms) RGB24ToI420_Opt (703 ms) RGB24ToJ420_Opt (703 ms) RGBAToI420_Opt (993 ms) Bug: 381138208 Change-Id: I33728d5237f357362b0bfc509a9ebe6fe46f45d4 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/6228987 Reviewed-by: Ben Weiss <bweiss@google.com> Reviewed-by: Wan-Teh Chang <wtc@google.com>
This commit is contained in:
parent
ccdf870348
commit
d32d19ccf2
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: https://chromium.googlesource.com/libyuv/libyuv/
|
||||
Version: 1903
|
||||
Version: 1904
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
Shipped: yes
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 1903
|
||||
#define LIBYUV_VERSION 1904
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
@ -662,15 +662,13 @@ static __inline uint8_t RGBToV(uint8_t r, uint8_t g, uint8_t b) {
|
||||
}
|
||||
#endif
|
||||
|
||||
// LIBYUV_ARGBTOUV_PAVGB mimics x86 code that subsamples with 2 pavgb.
|
||||
// ARM uses uint16
|
||||
#if !defined(LIBYUV_ARGBTOUV_PAVGB)
|
||||
static __inline int RGB2xToU(uint16_t r, uint16_t g, uint16_t b) {
|
||||
return STATIC_CAST(
|
||||
uint8_t, ((112 / 2) * b - (74 / 2) * g - (38 / 2) * r + 0x8080) >> 8);
|
||||
static __inline int RGBxToU(uint16_t r, uint16_t g, uint16_t b) {
|
||||
return STATIC_CAST(uint8_t, (112 * b - 74 * g - 38 * r + 0x8080) >> 8);
|
||||
}
|
||||
static __inline int RGB2xToV(uint16_t r, uint16_t g, uint16_t b) {
|
||||
return STATIC_CAST(
|
||||
uint8_t, ((112 / 2) * r - (94 / 2) * g - (18 / 2) * b + 0x8080) >> 8);
|
||||
static __inline int RGBxToV(uint16_t r, uint16_t g, uint16_t b) {
|
||||
return STATIC_CAST(uint8_t, (112 * r - 94 * g - 18 * b + 0x8080) >> 8);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -713,7 +711,7 @@ static __inline int RGB2xToV(uint16_t r, uint16_t g, uint16_t b) {
|
||||
} \
|
||||
}
|
||||
#else
|
||||
// ARM version does sum / 2 then multiply by 2x smaller coefficients
|
||||
// ARM version does average of 4 pixels with rounding
|
||||
#define MAKEROWY(NAME, R, G, B, BPP) \
|
||||
void NAME##ToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \
|
||||
int x; \
|
||||
@ -729,27 +727,27 @@ static __inline int RGB2xToV(uint16_t r, uint16_t g, uint16_t b) {
|
||||
int x; \
|
||||
for (x = 0; x < width - 1; x += 2) { \
|
||||
uint16_t ab = (src_rgb[B] + src_rgb[B + BPP] + src_rgb1[B] + \
|
||||
src_rgb1[B + BPP] + 1) >> \
|
||||
1; \
|
||||
src_rgb1[B + BPP] + 2) >> \
|
||||
2; \
|
||||
uint16_t ag = (src_rgb[G] + src_rgb[G + BPP] + src_rgb1[G] + \
|
||||
src_rgb1[G + BPP] + 1) >> \
|
||||
1; \
|
||||
src_rgb1[G + BPP] + 2) >> \
|
||||
2; \
|
||||
uint16_t ar = (src_rgb[R] + src_rgb[R + BPP] + src_rgb1[R] + \
|
||||
src_rgb1[R + BPP] + 1) >> \
|
||||
1; \
|
||||
dst_u[0] = RGB2xToU(ar, ag, ab); \
|
||||
dst_v[0] = RGB2xToV(ar, ag, ab); \
|
||||
src_rgb1[R + BPP] + 2) >> \
|
||||
2; \
|
||||
dst_u[0] = RGBxToU(ar, ag, ab); \
|
||||
dst_v[0] = RGBxToV(ar, ag, ab); \
|
||||
src_rgb += BPP * 2; \
|
||||
src_rgb1 += BPP * 2; \
|
||||
dst_u += 1; \
|
||||
dst_v += 1; \
|
||||
} \
|
||||
if (width & 1) { \
|
||||
uint16_t ab = src_rgb[B] + src_rgb1[B]; \
|
||||
uint16_t ag = src_rgb[G] + src_rgb1[G]; \
|
||||
uint16_t ar = src_rgb[R] + src_rgb1[R]; \
|
||||
dst_u[0] = RGB2xToU(ar, ag, ab); \
|
||||
dst_v[0] = RGB2xToV(ar, ag, ab); \
|
||||
uint16_t ab = (src_rgb[B] + src_rgb1[B] + 1) >> 1; \
|
||||
uint16_t ag = (src_rgb[G] + src_rgb1[G] + 1) >> 1; \
|
||||
uint16_t ar = (src_rgb[R] + src_rgb1[R] + 1) >> 1; \
|
||||
dst_u[0] = RGBxToU(ar, ag, ab); \
|
||||
dst_v[0] = RGBxToV(ar, ag, ab); \
|
||||
} \
|
||||
}
|
||||
#endif
|
||||
@ -806,11 +804,11 @@ static __inline uint8_t RGBToVJ(uint8_t r, uint8_t g, uint8_t b) {
|
||||
return (127 * r - 107 * g - 20 * b + 0x8080) >> 8;
|
||||
}
|
||||
#if !defined(LIBYUV_ARGBTOUV_PAVGB)
|
||||
static __inline uint8_t RGB2xToUJ(uint16_t r, uint16_t g, uint16_t b) {
|
||||
return ((127 / 2) * b - (84 / 2) * g - (43 / 2) * r + 0x8080) >> 8;
|
||||
static __inline uint8_t RGBxToUJ(uint16_t r, uint16_t g, uint16_t b) {
|
||||
return (127 * b - 84 * g - 43 * r + 0x8080) >> 8;
|
||||
}
|
||||
static __inline uint8_t RGB2xToVJ(uint16_t r, uint16_t g, uint16_t b) {
|
||||
return ((127 / 2) * r - (107 / 2) * g - (20 / 2) * b + 0x8080) >> 8;
|
||||
static __inline uint8_t RGBxToVJ(uint16_t r, uint16_t g, uint16_t b) {
|
||||
return (127 * r - 107 * g - 20 * b + 0x8080) >> 8;
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -853,7 +851,7 @@ static __inline uint8_t RGB2xToVJ(uint16_t r, uint16_t g, uint16_t b) {
|
||||
} \
|
||||
}
|
||||
#else
|
||||
// ARM version does sum / 2 then multiply by 2x smaller coefficients
|
||||
// ARM version does average of 4 pixels with rounding
|
||||
#define MAKEROWYJ(NAME, R, G, B, BPP) \
|
||||
void NAME##ToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \
|
||||
int x; \
|
||||
@ -869,27 +867,27 @@ static __inline uint8_t RGB2xToVJ(uint16_t r, uint16_t g, uint16_t b) {
|
||||
int x; \
|
||||
for (x = 0; x < width - 1; x += 2) { \
|
||||
uint16_t ab = (src_rgb[B] + src_rgb[B + BPP] + src_rgb1[B] + \
|
||||
src_rgb1[B + BPP] + 1) >> \
|
||||
1; \
|
||||
src_rgb1[B + BPP] + 2) >> \
|
||||
2; \
|
||||
uint16_t ag = (src_rgb[G] + src_rgb[G + BPP] + src_rgb1[G] + \
|
||||
src_rgb1[G + BPP] + 1) >> \
|
||||
1; \
|
||||
src_rgb1[G + BPP] + 2) >> \
|
||||
2; \
|
||||
uint16_t ar = (src_rgb[R] + src_rgb[R + BPP] + src_rgb1[R] + \
|
||||
src_rgb1[R + BPP] + 1) >> \
|
||||
1; \
|
||||
dst_u[0] = RGB2xToUJ(ar, ag, ab); \
|
||||
dst_v[0] = RGB2xToVJ(ar, ag, ab); \
|
||||
src_rgb1[R + BPP] + 2) >> \
|
||||
2; \
|
||||
dst_u[0] = RGBxToUJ(ar, ag, ab); \
|
||||
dst_v[0] = RGBxToVJ(ar, ag, ab); \
|
||||
src_rgb += BPP * 2; \
|
||||
src_rgb1 += BPP * 2; \
|
||||
dst_u += 1; \
|
||||
dst_v += 1; \
|
||||
} \
|
||||
if (width & 1) { \
|
||||
uint16_t ab = (src_rgb[B] + src_rgb1[B]); \
|
||||
uint16_t ag = (src_rgb[G] + src_rgb1[G]); \
|
||||
uint16_t ar = (src_rgb[R] + src_rgb1[R]); \
|
||||
dst_u[0] = RGB2xToUJ(ar, ag, ab); \
|
||||
dst_v[0] = RGB2xToVJ(ar, ag, ab); \
|
||||
uint16_t ab = (src_rgb[B] + src_rgb1[B] + 1) >> 1; \
|
||||
uint16_t ag = (src_rgb[G] + src_rgb1[G] + 1) >> 1; \
|
||||
uint16_t ar = (src_rgb[R] + src_rgb1[R] + 1) >> 1; \
|
||||
dst_u[0] = RGBxToUJ(ar, ag, ab); \
|
||||
dst_v[0] = RGBxToVJ(ar, ag, ab); \
|
||||
} \
|
||||
}
|
||||
|
||||
@ -994,11 +992,11 @@ void RGB565ToUVRow_C(const uint8_t* src_rgb565,
|
||||
dst_u[0] = RGBToU(ar, ag, ab);
|
||||
dst_v[0] = RGBToV(ar, ag, ab);
|
||||
#else
|
||||
uint16_t b = (b0 + b1 + b2 + b3 + 1) >> 1;
|
||||
uint16_t g = (g0 + g1 + g2 + g3 + 1) >> 1;
|
||||
uint16_t r = (r0 + r1 + r2 + r3 + 1) >> 1;
|
||||
dst_u[0] = RGB2xToU(r, g, b);
|
||||
dst_v[0] = RGB2xToV(r, g, b);
|
||||
uint16_t b = (b0 + b1 + b2 + b3 + 2) >> 2;
|
||||
uint16_t g = (g0 + g1 + g2 + g3 + 2) >> 2;
|
||||
uint16_t r = (r0 + r1 + r2 + r3 + 2) >> 2;
|
||||
dst_u[0] = RGBxToU(r, g, b);
|
||||
dst_v[0] = RGBxToV(r, g, b);
|
||||
#endif
|
||||
|
||||
src_rgb565 += 4;
|
||||
@ -1029,11 +1027,11 @@ void RGB565ToUVRow_C(const uint8_t* src_rgb565,
|
||||
dst_u[0] = RGBToU(ar, ag, ab);
|
||||
dst_v[0] = RGBToV(ar, ag, ab);
|
||||
#else
|
||||
uint16_t b = b0 + b2;
|
||||
uint16_t g = g0 + g2;
|
||||
uint16_t r = r0 + r2;
|
||||
dst_u[0] = RGB2xToU(r, g, b);
|
||||
dst_v[0] = RGB2xToV(r, g, b);
|
||||
uint16_t b = (b0 + b2 + 1) >> 1;
|
||||
uint16_t g = (g0 + g2 + 1) >> 1;
|
||||
uint16_t r = (r0 + r2 + 1) >> 1;
|
||||
dst_u[0] = RGBxToU(r, g, b);
|
||||
dst_v[0] = RGBxToV(r, g, b);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
@ -1083,11 +1081,11 @@ void ARGB1555ToUVRow_C(const uint8_t* src_argb1555,
|
||||
dst_u[0] = RGBToU(ar, ag, ab);
|
||||
dst_v[0] = RGBToV(ar, ag, ab);
|
||||
#else
|
||||
uint16_t b = (b0 + b1 + b2 + b3 + 1) >> 1;
|
||||
uint16_t g = (g0 + g1 + g2 + g3 + 1) >> 1;
|
||||
uint16_t r = (r0 + r1 + r2 + r3 + 1) >> 1;
|
||||
dst_u[0] = RGB2xToU(r, g, b);
|
||||
dst_v[0] = RGB2xToV(r, g, b);
|
||||
uint16_t b = (b0 + b1 + b2 + b3 + 2) >> 2;
|
||||
uint16_t g = (g0 + g1 + g2 + g3 + 2) >> 2;
|
||||
uint16_t r = (r0 + r1 + r2 + r3 + 2) >> 2;
|
||||
dst_u[0] = RGBxToU(r, g, b);
|
||||
dst_v[0] = RGBxToV(r, g, b);
|
||||
#endif
|
||||
|
||||
src_argb1555 += 4;
|
||||
@ -1119,11 +1117,11 @@ void ARGB1555ToUVRow_C(const uint8_t* src_argb1555,
|
||||
dst_u[0] = RGBToU(ar, ag, ab);
|
||||
dst_v[0] = RGBToV(ar, ag, ab);
|
||||
#else
|
||||
uint16_t b = b0 + b2;
|
||||
uint16_t g = g0 + g2;
|
||||
uint16_t r = r0 + r2;
|
||||
dst_u[0] = RGB2xToU(r, g, b);
|
||||
dst_v[0] = RGB2xToV(r, g, b);
|
||||
uint16_t b = (b0 + b2 + 1) >> 1;
|
||||
uint16_t g = (g0 + g2 + 1) >> 1;
|
||||
uint16_t r = (r0 + r2 + 1) >> 1;
|
||||
dst_u[0] = RGBxToU(r, g, b);
|
||||
dst_v[0] = RGBxToV(r, g, b);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
@ -1169,11 +1167,11 @@ void ARGB4444ToUVRow_C(const uint8_t* src_argb4444,
|
||||
dst_u[0] = RGBToU(ar, ag, ab);
|
||||
dst_v[0] = RGBToV(ar, ag, ab);
|
||||
#else
|
||||
uint16_t b = (b0 + b1 + b2 + b3 + 1) >> 1;
|
||||
uint16_t g = (g0 + g1 + g2 + g3 + 1) >> 1;
|
||||
uint16_t r = (r0 + r1 + r2 + r3 + 1) >> 1;
|
||||
dst_u[0] = RGB2xToU(r, g, b);
|
||||
dst_v[0] = RGB2xToV(r, g, b);
|
||||
uint16_t b = (b0 + b1 + b2 + b3 + 2) >> 2;
|
||||
uint16_t g = (g0 + g1 + g2 + g3 + 2) >> 2;
|
||||
uint16_t r = (r0 + r1 + r2 + r3 + 2) >> 2;
|
||||
dst_u[0] = RGBxToU(r, g, b);
|
||||
dst_v[0] = RGBxToV(r, g, b);
|
||||
#endif
|
||||
|
||||
src_argb4444 += 4;
|
||||
@ -1203,11 +1201,11 @@ void ARGB4444ToUVRow_C(const uint8_t* src_argb4444,
|
||||
dst_u[0] = RGBToU(ar, ag, ab);
|
||||
dst_v[0] = RGBToV(ar, ag, ab);
|
||||
#else
|
||||
uint16_t b = b0 + b2;
|
||||
uint16_t g = g0 + g2;
|
||||
uint16_t r = r0 + r2;
|
||||
dst_u[0] = RGB2xToU(r, g, b);
|
||||
dst_v[0] = RGB2xToV(r, g, b);
|
||||
uint16_t b = (b0 + b2 + 1) >> 1;
|
||||
uint16_t g = (g0 + g2 + 1) >> 1;
|
||||
uint16_t r = (r0 + r2 + 1) >> 1;
|
||||
dst_u[0] = RGBxToU(r, g, b);
|
||||
dst_v[0] = RGBxToV(r, g, b);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
@ -1933,11 +1933,11 @@ void ARGBToUVRow_NEON(const uint8_t* src_argb,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"add %1, %0, %1 \n" // src_stride + src_argb
|
||||
"vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
|
||||
"vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient
|
||||
"vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient
|
||||
"vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
|
||||
"vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
|
||||
"vmov.s16 q10, #112 \n" // UB/VR 0.875 coefficient
|
||||
"vmov.s16 q11, #74 \n" // UG -0.5781 coefficient
|
||||
"vmov.s16 q12, #38 \n" // UR -0.2969 coefficient
|
||||
"vmov.s16 q13, #18 \n" // VB -0.1406 coefficient
|
||||
"vmov.s16 q14, #94 \n" // VG -0.7344 coefficient
|
||||
"vmov.u16 q15, #0x8080 \n" // 128.5
|
||||
"1: \n"
|
||||
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
|
||||
@ -1952,9 +1952,9 @@ void ARGBToUVRow_NEON(const uint8_t* src_argb,
|
||||
"vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts.
|
||||
"vpadal.u8 q2, q6 \n" // R 16 bytes -> 8 shorts.
|
||||
|
||||
"vrshr.u16 q0, q0, #1 \n" // 2x average
|
||||
"vrshr.u16 q1, q1, #1 \n"
|
||||
"vrshr.u16 q2, q2, #1 \n"
|
||||
"vrshr.u16 q0, q0, #2 \n" // average of 4
|
||||
"vrshr.u16 q1, q1, #2 \n"
|
||||
"vrshr.u16 q2, q2, #2 \n"
|
||||
|
||||
RGBTOUV(q0, q1, q2)
|
||||
"vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
|
||||
@ -1971,7 +1971,6 @@ void ARGBToUVRow_NEON(const uint8_t* src_argb,
|
||||
);
|
||||
}
|
||||
|
||||
// TODO(fbarchard): Subsample match Intel code.
|
||||
void ARGBToUVJRow_NEON(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_u,
|
||||
@ -1979,11 +1978,11 @@ void ARGBToUVJRow_NEON(const uint8_t* src_argb,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"add %1, %0, %1 \n" // src_stride + src_argb
|
||||
"vmov.s16 q10, #127 / 2 \n" // UB / VR 0.500 coefficient
|
||||
"vmov.s16 q11, #84 / 2 \n" // UG -0.33126 coefficient
|
||||
"vmov.s16 q12, #43 / 2 \n" // UR -0.16874 coefficient
|
||||
"vmov.s16 q13, #20 / 2 \n" // VB -0.08131 coefficient
|
||||
"vmov.s16 q14, #107 / 2 \n" // VG -0.41869 coefficient
|
||||
"vmov.s16 q10, #127 \n" // UB / VR 0.500 coefficient
|
||||
"vmov.s16 q11, #84 \n" // UG -0.33126 coefficient
|
||||
"vmov.s16 q12, #43 \n" // UR -0.16874 coefficient
|
||||
"vmov.s16 q13, #20 \n" // VB -0.08131 coefficient
|
||||
"vmov.s16 q14, #107 \n" // VG -0.41869 coefficient
|
||||
"vmov.u16 q15, #0x8080 \n" // 128.5
|
||||
"1: \n"
|
||||
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
|
||||
@ -1998,9 +1997,9 @@ void ARGBToUVJRow_NEON(const uint8_t* src_argb,
|
||||
"vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts.
|
||||
"vpadal.u8 q2, q6 \n" // R 16 bytes -> 8 shorts.
|
||||
|
||||
"vrshr.u16 q0, q0, #1 \n" // 2x average
|
||||
"vrshr.u16 q1, q1, #1 \n"
|
||||
"vrshr.u16 q2, q2, #1 \n"
|
||||
"vrshr.u16 q0, q0, #2 \n" // average of 4
|
||||
"vrshr.u16 q1, q1, #2 \n"
|
||||
"vrshr.u16 q2, q2, #2 \n"
|
||||
|
||||
RGBTOUV(q0, q1, q2)
|
||||
"vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
|
||||
@ -2024,11 +2023,11 @@ void ABGRToUVJRow_NEON(const uint8_t* src_abgr,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"add %1, %0, %1 \n" // src_stride + src_argb
|
||||
"vmov.s16 q10, #127 / 2 \n" // UB / VR 0.500 coefficient
|
||||
"vmov.s16 q11, #84 / 2 \n" // UG -0.33126 coefficient
|
||||
"vmov.s16 q12, #43 / 2 \n" // UR -0.16874 coefficient
|
||||
"vmov.s16 q13, #20 / 2 \n" // VB -0.08131 coefficient
|
||||
"vmov.s16 q14, #107 / 2 \n" // VG -0.41869 coefficient
|
||||
"vmov.s16 q10, #127 \n" // UB / VR 0.500 coefficient
|
||||
"vmov.s16 q11, #84 \n" // UG -0.33126 coefficient
|
||||
"vmov.s16 q12, #43 \n" // UR -0.16874 coefficient
|
||||
"vmov.s16 q13, #20 \n" // VB -0.08131 coefficient
|
||||
"vmov.s16 q14, #107 \n" // VG -0.41869 coefficient
|
||||
"vmov.u16 q15, #0x8080 \n" // 128.5
|
||||
"1: \n"
|
||||
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ABGR pixels.
|
||||
@ -2043,9 +2042,9 @@ void ABGRToUVJRow_NEON(const uint8_t* src_abgr,
|
||||
"vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts.
|
||||
"vpadal.u8 q2, q6 \n" // B 16 bytes -> 8 shorts.
|
||||
|
||||
"vrshr.u16 q0, q0, #1 \n" // 2x average
|
||||
"vrshr.u16 q1, q1, #1 \n"
|
||||
"vrshr.u16 q2, q2, #1 \n"
|
||||
"vrshr.u16 q0, q0, #2 \n" // average of 4
|
||||
"vrshr.u16 q1, q1, #2 \n"
|
||||
"vrshr.u16 q2, q2, #2 \n"
|
||||
|
||||
RGBTOUV(q2, q1, q0)
|
||||
"vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
|
||||
@ -2070,11 +2069,11 @@ void RGB24ToUVJRow_NEON(const uint8_t* src_rgb24,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"add %1, %0, %1 \n" // src_stride + src_rgb24
|
||||
"vmov.s16 q10, #127 / 2 \n" // UB / VR 0.500 coefficient
|
||||
"vmov.s16 q11, #84 / 2 \n" // UG -0.33126 coefficient
|
||||
"vmov.s16 q12, #43 / 2 \n" // UR -0.16874 coefficient
|
||||
"vmov.s16 q13, #20 / 2 \n" // VB -0.08131 coefficient
|
||||
"vmov.s16 q14, #107 / 2 \n" // VG -0.41869 coefficient
|
||||
"vmov.s16 q10, #127 \n" // UB / VR 0.500 coefficient
|
||||
"vmov.s16 q11, #84 \n" // UG -0.33126 coefficient
|
||||
"vmov.s16 q12, #43 \n" // UR -0.16874 coefficient
|
||||
"vmov.s16 q13, #20 \n" // VB -0.08131 coefficient
|
||||
"vmov.s16 q14, #107 \n" // VG -0.41869 coefficient
|
||||
"vmov.u16 q15, #0x8080 \n" // 128.5
|
||||
"1: \n"
|
||||
"vld3.8 {d0, d2, d4}, [%0]! \n" // load 8 RGB24 pixels.
|
||||
@ -2089,9 +2088,9 @@ void RGB24ToUVJRow_NEON(const uint8_t* src_rgb24,
|
||||
"vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts.
|
||||
"vpadal.u8 q2, q6 \n" // R 16 bytes -> 8 shorts.
|
||||
|
||||
"vrshr.u16 q0, q0, #1 \n" // 2x average
|
||||
"vrshr.u16 q1, q1, #1 \n"
|
||||
"vrshr.u16 q2, q2, #1 \n"
|
||||
"vrshr.u16 q0, q0, #2 \n" // average of 4
|
||||
"vrshr.u16 q1, q1, #2 \n"
|
||||
"vrshr.u16 q2, q2, #2 \n"
|
||||
|
||||
RGBTOUV(q0, q1, q2)
|
||||
"vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
|
||||
@ -2116,11 +2115,11 @@ void RAWToUVJRow_NEON(const uint8_t* src_raw,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"add %1, %0, %1 \n" // src_stride + src_raw
|
||||
"vmov.s16 q10, #127 / 2 \n" // UB / VR 0.500 coefficient
|
||||
"vmov.s16 q11, #84 / 2 \n" // UG -0.33126 coefficient
|
||||
"vmov.s16 q12, #43 / 2 \n" // UR -0.16874 coefficient
|
||||
"vmov.s16 q13, #20 / 2 \n" // VB -0.08131 coefficient
|
||||
"vmov.s16 q14, #107 / 2 \n" // VG -0.41869 coefficient
|
||||
"vmov.s16 q10, #127 \n" // UB / VR 0.500 coefficient
|
||||
"vmov.s16 q11, #84 \n" // UG -0.33126 coefficient
|
||||
"vmov.s16 q12, #43 \n" // UR -0.16874 coefficient
|
||||
"vmov.s16 q13, #20 \n" // VB -0.08131 coefficient
|
||||
"vmov.s16 q14, #107 \n" // VG -0.41869 coefficient
|
||||
"vmov.u16 q15, #0x8080 \n" // 128.5
|
||||
"1: \n"
|
||||
"vld3.8 {d0, d2, d4}, [%0]! \n" // load 8 RAW pixels.
|
||||
@ -2135,9 +2134,9 @@ void RAWToUVJRow_NEON(const uint8_t* src_raw,
|
||||
"vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts.
|
||||
"vpadal.u8 q2, q6 \n" // R 16 bytes -> 8 shorts.
|
||||
|
||||
"vrshr.u16 q0, q0, #1 \n" // 2x average
|
||||
"vrshr.u16 q1, q1, #1 \n"
|
||||
"vrshr.u16 q2, q2, #1 \n"
|
||||
"vrshr.u16 q0, q0, #2 \n" // average of 4
|
||||
"vrshr.u16 q1, q1, #2 \n"
|
||||
"vrshr.u16 q2, q2, #2 \n"
|
||||
|
||||
RGBTOUV(q2, q1, q0)
|
||||
"vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
|
||||
@ -2161,11 +2160,11 @@ void BGRAToUVRow_NEON(const uint8_t* src_bgra,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"add %1, %0, %1 \n" // src_stride + src_bgra
|
||||
"vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
|
||||
"vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient
|
||||
"vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient
|
||||
"vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
|
||||
"vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
|
||||
"vmov.s16 q10, #112 \n" // UB/VR 0.875 coefficient
|
||||
"vmov.s16 q11, #74 \n" // UG -0.5781 coefficient
|
||||
"vmov.s16 q12, #38 \n" // UR -0.2969 coefficient
|
||||
"vmov.s16 q13, #18 \n" // VB -0.1406 coefficient
|
||||
"vmov.s16 q14, #94 \n" // VG -0.7344 coefficient
|
||||
"vmov.u16 q15, #0x8080 \n" // 128.5
|
||||
"1: \n"
|
||||
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 BGRA pixels.
|
||||
@ -2180,9 +2179,9 @@ void BGRAToUVRow_NEON(const uint8_t* src_bgra,
|
||||
"vpadal.u8 q2, q6 \n" // G 16 bytes -> 8 shorts.
|
||||
"vpadal.u8 q1, q5 \n" // R 16 bytes -> 8 shorts.
|
||||
|
||||
"vrshr.u16 q1, q1, #1 \n" // 2x average
|
||||
"vrshr.u16 q2, q2, #1 \n"
|
||||
"vrshr.u16 q3, q3, #1 \n"
|
||||
"vrshr.u16 q1, q1, #2 \n" // average of 4
|
||||
"vrshr.u16 q2, q2, #2 \n"
|
||||
"vrshr.u16 q3, q3, #2 \n"
|
||||
|
||||
RGBTOUV(q3, q2, q1)
|
||||
"vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
|
||||
@ -2190,7 +2189,7 @@ void BGRAToUVRow_NEON(const uint8_t* src_bgra,
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_bgra), // %0
|
||||
"+r"(src_stride_bgra), // %1
|
||||
"+r"(dst_u), // %2
|
||||
"+r"(dst_u), // %2-
|
||||
"+r"(dst_v), // %3
|
||||
"+r"(width) // %4
|
||||
:
|
||||
@ -2206,11 +2205,11 @@ void ABGRToUVRow_NEON(const uint8_t* src_abgr,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"add %1, %0, %1 \n" // src_stride + src_abgr
|
||||
"vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
|
||||
"vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient
|
||||
"vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient
|
||||
"vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
|
||||
"vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
|
||||
"vmov.s16 q10, #112 \n" // UB/VR 0.875 coefficient
|
||||
"vmov.s16 q11, #74 \n" // UG -0.5781 coefficient
|
||||
"vmov.s16 q12, #38 \n" // UR -0.2969 coefficient
|
||||
"vmov.s16 q13, #18 \n" // VB -0.1406 coefficient
|
||||
"vmov.s16 q14, #94 \n" // VG -0.7344 coefficient
|
||||
"vmov.u16 q15, #0x8080 \n" // 128.5
|
||||
"1: \n"
|
||||
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ABGR pixels.
|
||||
@ -2225,9 +2224,9 @@ void ABGRToUVRow_NEON(const uint8_t* src_abgr,
|
||||
"vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts.
|
||||
"vpadal.u8 q0, q4 \n" // R 16 bytes -> 8 shorts.
|
||||
|
||||
"vrshr.u16 q0, q0, #1 \n" // 2x average
|
||||
"vrshr.u16 q1, q1, #1 \n"
|
||||
"vrshr.u16 q2, q2, #1 \n"
|
||||
"vrshr.u16 q0, q0, #2 \n" // average of 4
|
||||
"vrshr.u16 q1, q1, #2 \n"
|
||||
"vrshr.u16 q2, q2, #2 \n"
|
||||
|
||||
RGBTOUV(q2, q1, q0)
|
||||
"vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
|
||||
@ -2251,11 +2250,11 @@ void RGBAToUVRow_NEON(const uint8_t* src_rgba,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"add %1, %0, %1 \n" // src_stride + src_rgba
|
||||
"vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
|
||||
"vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient
|
||||
"vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient
|
||||
"vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
|
||||
"vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
|
||||
"vmov.s16 q10, #112 \n" // UB/VR 0.875 coefficient
|
||||
"vmov.s16 q11, #74 \n" // UG -0.5781 coefficient
|
||||
"vmov.s16 q12, #38 \n" // UR -0.2969 coefficient
|
||||
"vmov.s16 q13, #18 \n" // VB -0.1406 coefficient
|
||||
"vmov.s16 q14, #94 \n" // VG -0.7344 coefficient
|
||||
"vmov.u16 q15, #0x8080 \n" // 128.5
|
||||
"1: \n"
|
||||
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 RGBA pixels.
|
||||
@ -2270,9 +2269,9 @@ void RGBAToUVRow_NEON(const uint8_t* src_rgba,
|
||||
"vpadal.u8 q1, q6 \n" // G 16 bytes -> 8 shorts.
|
||||
"vpadal.u8 q2, q7 \n" // R 16 bytes -> 8 shorts.
|
||||
|
||||
"vrshr.u16 q0, q0, #1 \n" // 2x average
|
||||
"vrshr.u16 q1, q1, #1 \n"
|
||||
"vrshr.u16 q2, q2, #1 \n"
|
||||
"vrshr.u16 q0, q0, #2 \n" // average of 4
|
||||
"vrshr.u16 q1, q1, #2 \n"
|
||||
"vrshr.u16 q2, q2, #2 \n"
|
||||
|
||||
RGBTOUV(q0, q1, q2)
|
||||
"vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
|
||||
@ -2296,11 +2295,11 @@ void RGB24ToUVRow_NEON(const uint8_t* src_rgb24,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"add %1, %0, %1 \n" // src_stride + src_rgb24
|
||||
"vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
|
||||
"vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient
|
||||
"vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient
|
||||
"vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
|
||||
"vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
|
||||
"vmov.s16 q10, #112 \n" // UB/VR 0.875 coefficient
|
||||
"vmov.s16 q11, #74 \n" // UG -0.5781 coefficient
|
||||
"vmov.s16 q12, #38 \n" // UR -0.2969 coefficient
|
||||
"vmov.s16 q13, #18 \n" // VB -0.1406 coefficient
|
||||
"vmov.s16 q14, #94 \n" // VG -0.7344 coefficient
|
||||
"vmov.u16 q15, #0x8080 \n" // 128.5
|
||||
"1: \n"
|
||||
"vld3.8 {d0, d2, d4}, [%0]! \n" // load 8 RGB24 pixels.
|
||||
@ -2315,9 +2314,9 @@ void RGB24ToUVRow_NEON(const uint8_t* src_rgb24,
|
||||
"vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts.
|
||||
"vpadal.u8 q2, q6 \n" // R 16 bytes -> 8 shorts.
|
||||
|
||||
"vrshr.u16 q0, q0, #1 \n" // 2x average
|
||||
"vrshr.u16 q1, q1, #1 \n"
|
||||
"vrshr.u16 q2, q2, #1 \n"
|
||||
"vrshr.u16 q0, q0, #2 \n" // average of 4
|
||||
"vrshr.u16 q1, q1, #2 \n"
|
||||
"vrshr.u16 q2, q2, #2 \n"
|
||||
|
||||
RGBTOUV(q0, q1, q2)
|
||||
"vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
|
||||
@ -2341,11 +2340,11 @@ void RAWToUVRow_NEON(const uint8_t* src_raw,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"add %1, %0, %1 \n" // src_stride + src_raw
|
||||
"vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
|
||||
"vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient
|
||||
"vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient
|
||||
"vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
|
||||
"vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
|
||||
"vmov.s16 q10, #112 \n" // UB/VR 0.875 coefficient
|
||||
"vmov.s16 q11, #74 \n" // UG -0.5781 coefficient
|
||||
"vmov.s16 q12, #38 \n" // UR -0.2969 coefficient
|
||||
"vmov.s16 q13, #18 \n" // VB -0.1406 coefficient
|
||||
"vmov.s16 q14, #94 \n" // VG -0.7344 coefficient
|
||||
"vmov.u16 q15, #0x8080 \n" // 128.5
|
||||
"1: \n"
|
||||
"vld3.8 {d0, d2, d4}, [%0]! \n" // load 8 RAW pixels.
|
||||
@ -2360,9 +2359,9 @@ void RAWToUVRow_NEON(const uint8_t* src_raw,
|
||||
"vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts.
|
||||
"vpadal.u8 q0, q4 \n" // R 16 bytes -> 8 shorts.
|
||||
|
||||
"vrshr.u16 q0, q0, #1 \n" // 2x average
|
||||
"vrshr.u16 q1, q1, #1 \n"
|
||||
"vrshr.u16 q2, q2, #1 \n"
|
||||
"vrshr.u16 q0, q0, #2 \n" // average of 4
|
||||
"vrshr.u16 q1, q1, #2 \n"
|
||||
"vrshr.u16 q2, q2, #2 \n"
|
||||
|
||||
RGBTOUV(q2, q1, q0)
|
||||
"vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
|
||||
@ -2387,12 +2386,11 @@ void RGB565ToUVRow_NEON(const uint8_t* src_rgb565,
|
||||
int width) {
|
||||
asm volatile(
|
||||
"add %1, %0, %1 \n" // src_stride + src_argb
|
||||
"vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875
|
||||
// coefficient
|
||||
"vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient
|
||||
"vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient
|
||||
"vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
|
||||
"vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
|
||||
"vmov.s16 q10, #112 \n" // UB/VR 0.875 coefficient
|
||||
"vmov.s16 q11, #74 \n" // UG -0.5781 coefficient
|
||||
"vmov.s16 q12, #38 \n" // UR -0.2969 coefficient
|
||||
"vmov.s16 q13, #18 \n" // VB -0.1406 coefficient
|
||||
"vmov.s16 q14, #94 \n" // VG -0.7344 coefficient
|
||||
"vmov.u16 q15, #0x8080 \n" // 128.5
|
||||
"1: \n"
|
||||
"vld1.8 {q0}, [%0]! \n" // load 8 RGB565 pixels.
|
||||
@ -2418,9 +2416,9 @@ void RGB565ToUVRow_NEON(const uint8_t* src_rgb565,
|
||||
"vpadal.u8 d11, d1 \n" // G 8 bytes -> 4 shorts.
|
||||
"vpadal.u8 d13, d2 \n" // R 8 bytes -> 4 shorts.
|
||||
|
||||
"vrshr.u16 q4, q4, #1 \n" // 2x average
|
||||
"vrshr.u16 q5, q5, #1 \n"
|
||||
"vrshr.u16 q6, q6, #1 \n"
|
||||
"vrshr.u16 q4, q4, #2 \n" // average of 4
|
||||
"vrshr.u16 q5, q5, #2 \n"
|
||||
"vrshr.u16 q6, q6, #2 \n"
|
||||
|
||||
"vmul.s16 q8, q4, q10 \n" // B
|
||||
"vmls.s16 q8, q5, q11 \n" // G
|
||||
@ -2453,12 +2451,11 @@ void ARGB1555ToUVRow_NEON(const uint8_t* src_argb1555,
|
||||
int width) {
|
||||
asm volatile(
|
||||
"add %1, %0, %1 \n" // src_stride + src_argb
|
||||
"vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875
|
||||
// coefficient
|
||||
"vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient
|
||||
"vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient
|
||||
"vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
|
||||
"vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
|
||||
"vmov.s16 q10, #112 \n" // UB/VR 0.875 coefficient
|
||||
"vmov.s16 q11, #74 \n" // UG -0.5781 coefficient
|
||||
"vmov.s16 q12, #38 \n" // UR -0.2969 coefficient
|
||||
"vmov.s16 q13, #18 \n" // VB -0.1406 coefficient
|
||||
"vmov.s16 q14, #94 \n" // VG -0.7344 coefficient
|
||||
"vmov.u16 q15, #0x8080 \n" // 128.5
|
||||
"1: \n"
|
||||
"vld1.8 {q0}, [%0]! \n" // load 8 ARGB1555 pixels.
|
||||
@ -2484,9 +2481,9 @@ void ARGB1555ToUVRow_NEON(const uint8_t* src_argb1555,
|
||||
"vpadal.u8 d11, d1 \n" // G 8 bytes -> 4 shorts.
|
||||
"vpadal.u8 d13, d2 \n" // R 8 bytes -> 4 shorts.
|
||||
|
||||
"vrshr.u16 q4, q4, #1 \n" // 2x average
|
||||
"vrshr.u16 q5, q5, #1 \n"
|
||||
"vrshr.u16 q6, q6, #1 \n"
|
||||
"vrshr.u16 q4, q4, #2 \n" // average of 4
|
||||
"vrshr.u16 q5, q5, #2 \n"
|
||||
"vrshr.u16 q6, q6, #2 \n"
|
||||
|
||||
"vmul.s16 q8, q4, q10 \n" // B
|
||||
"vmls.s16 q8, q5, q11 \n" // G
|
||||
@ -2519,12 +2516,11 @@ void ARGB4444ToUVRow_NEON(const uint8_t* src_argb4444,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"add %1, %0, %1 \n" // src_stride + src_argb
|
||||
"vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875
|
||||
// coefficient
|
||||
"vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient
|
||||
"vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient
|
||||
"vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
|
||||
"vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
|
||||
"vmov.s16 q10, #112 \n" // UB/VR 0.875 coefficient
|
||||
"vmov.s16 q11, #74 \n" // UG -0.5781 coefficient
|
||||
"vmov.s16 q12, #38 \n" // UR -0.2969 coefficient
|
||||
"vmov.s16 q13, #18 \n" // VB -0.1406 coefficient
|
||||
"vmov.s16 q14, #94 \n" // VG -0.7344 coefficient
|
||||
"vmov.u16 q15, #0x8080 \n" // 128.5
|
||||
"1: \n"
|
||||
"vld1.8 {q0}, [%0]! \n" // load 8 ARGB4444 pixels.
|
||||
@ -2550,9 +2546,9 @@ void ARGB4444ToUVRow_NEON(const uint8_t* src_argb4444,
|
||||
"vpadal.u8 d11, d1 \n" // G 8 bytes -> 4 shorts.
|
||||
"vpadal.u8 d13, d2 \n" // R 8 bytes -> 4 shorts.
|
||||
|
||||
"vrshr.u16 q0, q4, #1 \n" // 2x average
|
||||
"vrshr.u16 q1, q5, #1 \n"
|
||||
"vrshr.u16 q2, q6, #1 \n"
|
||||
"vrshr.u16 q0, q4, #2 \n" // average of 4
|
||||
"vrshr.u16 q1, q5, #2 \n"
|
||||
"vrshr.u16 q2, q6, #2 \n"
|
||||
|
||||
RGBTOUV(q0, q1, q2)
|
||||
"vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
|
||||
|
||||
@ -2853,13 +2853,13 @@ void ARGBToUVJ444Row_NEON_I8MM(const uint8_t* src_argb,
|
||||
&kRgb24JPEGUVConstants);
|
||||
}
|
||||
|
||||
#define RGBTOUV_SETUP_REG \
|
||||
"movi v20.8h, #56, lsl #0 \n" /* UB/VR coefficient (0.875) / 2 */ \
|
||||
"movi v21.8h, #37, lsl #0 \n" /* UG coefficient (-0.5781) / 2 */ \
|
||||
"movi v22.8h, #19, lsl #0 \n" /* UR coefficient (-0.2969) / 2 */ \
|
||||
"movi v23.8h, #9, lsl #0 \n" /* VB coefficient (-0.1406) / 2 */ \
|
||||
"movi v24.8h, #47, lsl #0 \n" /* VG coefficient (-0.7344) / 2 */ \
|
||||
"movi v25.16b, #0x80 \n" /* 128.5 (0x8080 in 16-bit) */
|
||||
#define RGBTOUV_SETUP_REG \
|
||||
"movi v20.8h, #112 \n" /* UB/VR coefficient (0.875) */ \
|
||||
"movi v21.8h, #74 \n" /* UG coefficient (-0.5781) */ \
|
||||
"movi v22.8h, #38 \n" /* UR coefficient (-0.2969) */ \
|
||||
"movi v23.8h, #18 \n" /* VB coefficient (-0.1406) */ \
|
||||
"movi v24.8h, #94 \n" /* VG coefficient (-0.7344) */ \
|
||||
"movi v25.16b, #0x80 \n" /* 128.5 (0x8080 in 16-bit) */
|
||||
|
||||
// 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16.
|
||||
// clang-format off
|
||||
@ -2899,9 +2899,9 @@ void ARGBToUVRow_NEON(const uint8_t* src_argb,
|
||||
"uadalp v1.8h, v5.16b \n" // G 16 bytes -> 8 shorts.
|
||||
"uadalp v2.8h, v6.16b \n" // R 16 bytes -> 8 shorts.
|
||||
|
||||
"urshr v0.8h, v0.8h, #1 \n" // 2x average
|
||||
"urshr v1.8h, v1.8h, #1 \n"
|
||||
"urshr v2.8h, v2.8h, #1 \n"
|
||||
"urshr v0.8h, v0.8h, #2 \n" // average of 4
|
||||
"urshr v1.8h, v1.8h, #2 \n"
|
||||
"urshr v2.8h, v2.8h, #2 \n"
|
||||
|
||||
RGBTOUV(v0.8h, v1.8h, v2.8h)
|
||||
"st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U.
|
||||
@ -2918,7 +2918,6 @@ void ARGBToUVRow_NEON(const uint8_t* src_argb,
|
||||
);
|
||||
}
|
||||
|
||||
// TODO(fbarchard): Subsample match Intel code.
|
||||
void ARGBToUVJRow_NEON(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_u,
|
||||
@ -2926,11 +2925,11 @@ void ARGBToUVJRow_NEON(const uint8_t* src_argb,
|
||||
int width) {
|
||||
const uint8_t* src_argb_1 = src_argb + src_stride_argb;
|
||||
asm volatile (
|
||||
"movi v20.8h, #63, lsl #0 \n" // UB/VR coeff (0.500) / 2
|
||||
"movi v21.8h, #42, lsl #0 \n" // UG coeff (-0.33126) / 2
|
||||
"movi v22.8h, #21, lsl #0 \n" // UR coeff (-0.16874) / 2
|
||||
"movi v23.8h, #10, lsl #0 \n" // VB coeff (-0.08131) / 2
|
||||
"movi v24.8h, #53, lsl #0 \n" // VG coeff (-0.41869) / 2
|
||||
"movi v20.8h, #127 \n" // UB/VR coeff (0.500)
|
||||
"movi v21.8h, #84 \n" // UG coeff (-0.33126)
|
||||
"movi v22.8h, #43 \n" // UR coeff (-0.16874)
|
||||
"movi v23.8h, #20 \n" // VB coeff (-0.08131)
|
||||
"movi v24.8h, #107 \n" // VG coeff (-0.41869)
|
||||
"movi v25.16b, #0x80 \n" // 128.5 (0x8080 in 16-bit)
|
||||
"1: \n"
|
||||
"ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels.
|
||||
@ -2945,9 +2944,9 @@ void ARGBToUVJRow_NEON(const uint8_t* src_argb,
|
||||
"uadalp v1.8h, v5.16b \n" // G 16 bytes -> 8 shorts.
|
||||
"uadalp v2.8h, v6.16b \n" // R 16 bytes -> 8 shorts.
|
||||
|
||||
"urshr v0.8h, v0.8h, #1 \n" // 2x average
|
||||
"urshr v1.8h, v1.8h, #1 \n"
|
||||
"urshr v2.8h, v2.8h, #1 \n"
|
||||
"urshr v0.8h, v0.8h, #2 \n" // average of 4
|
||||
"urshr v1.8h, v1.8h, #2 \n"
|
||||
"urshr v2.8h, v2.8h, #2 \n"
|
||||
|
||||
RGBTOUV(v0.8h, v1.8h, v2.8h)
|
||||
"st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U.
|
||||
@ -2971,11 +2970,11 @@ void ABGRToUVJRow_NEON(const uint8_t* src_abgr,
|
||||
int width) {
|
||||
const uint8_t* src_abgr_1 = src_abgr + src_stride_abgr;
|
||||
asm volatile (
|
||||
"movi v20.8h, #63, lsl #0 \n" // UB/VR coeff (0.500) / 2
|
||||
"movi v21.8h, #42, lsl #0 \n" // UG coeff (-0.33126) / 2
|
||||
"movi v22.8h, #21, lsl #0 \n" // UR coeff (-0.16874) / 2
|
||||
"movi v23.8h, #10, lsl #0 \n" // VB coeff (-0.08131) / 2
|
||||
"movi v24.8h, #53, lsl #0 \n" // VG coeff (-0.41869) / 2
|
||||
"movi v20.8h, #127 \n" // UB/VR coeff (0.500)
|
||||
"movi v21.8h, #84 \n" // UG coeff (-0.33126)
|
||||
"movi v22.8h, #43 \n" // UR coeff (-0.16874)
|
||||
"movi v23.8h, #20 \n" // VB coeff (-0.08131)
|
||||
"movi v24.8h, #107 \n" // VG coeff (-0.41869)
|
||||
"movi v25.16b, #0x80 \n" // 128.5 (0x8080 in 16-bit)
|
||||
"1: \n"
|
||||
"ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels.
|
||||
@ -2990,9 +2989,9 @@ void ABGRToUVJRow_NEON(const uint8_t* src_abgr,
|
||||
"uadalp v1.8h, v5.16b \n" // G 16 bytes -> 8 shorts.
|
||||
"uadalp v2.8h, v6.16b \n" // B 16 bytes -> 8 shorts.
|
||||
|
||||
"urshr v0.8h, v0.8h, #1 \n" // 2x average
|
||||
"urshr v1.8h, v1.8h, #1 \n"
|
||||
"urshr v2.8h, v2.8h, #1 \n"
|
||||
"urshr v0.8h, v0.8h, #2 \n" // average of 4
|
||||
"urshr v1.8h, v1.8h, #2 \n"
|
||||
"urshr v2.8h, v2.8h, #2 \n"
|
||||
|
||||
RGBTOUV(v2.8h, v1.8h, v0.8h)
|
||||
"st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U.
|
||||
@ -3016,11 +3015,11 @@ void RGB24ToUVJRow_NEON(const uint8_t* src_rgb24,
|
||||
int width) {
|
||||
const uint8_t* src_rgb24_1 = src_rgb24 + src_stride_rgb24;
|
||||
asm volatile (
|
||||
"movi v20.8h, #63, lsl #0 \n" // UB/VR coeff (0.500) / 2
|
||||
"movi v21.8h, #42, lsl #0 \n" // UG coeff (-0.33126) / 2
|
||||
"movi v22.8h, #21, lsl #0 \n" // UR coeff (-0.16874) / 2
|
||||
"movi v23.8h, #10, lsl #0 \n" // VB coeff (-0.08131) / 2
|
||||
"movi v24.8h, #53, lsl #0 \n" // VG coeff (-0.41869) / 2
|
||||
"movi v20.8h, #127 \n" // UB/VR coeff (0.500)
|
||||
"movi v21.8h, #84 \n" // UG coeff (-0.33126)
|
||||
"movi v22.8h, #43 \n" // UR coeff (-0.16874)
|
||||
"movi v23.8h, #20 \n" // VB coeff (-0.08131)
|
||||
"movi v24.8h, #107 \n" // VG coeff (-0.41869)
|
||||
"movi v25.16b, #0x80 \n" // 128.5 (0x8080 in 16-bit)
|
||||
"1: \n"
|
||||
"ld3 {v0.16b,v1.16b,v2.16b}, [%0], #48 \n" // load 16 pixels.
|
||||
@ -3035,9 +3034,9 @@ void RGB24ToUVJRow_NEON(const uint8_t* src_rgb24,
|
||||
"uadalp v1.8h, v5.16b \n" // G 16 bytes -> 8 shorts.
|
||||
"uadalp v2.8h, v6.16b \n" // R 16 bytes -> 8 shorts.
|
||||
|
||||
"urshr v0.8h, v0.8h, #1 \n" // 2x average
|
||||
"urshr v1.8h, v1.8h, #1 \n"
|
||||
"urshr v2.8h, v2.8h, #1 \n"
|
||||
"urshr v0.8h, v0.8h, #2 \n" // average of 4
|
||||
"urshr v1.8h, v1.8h, #2 \n"
|
||||
"urshr v2.8h, v2.8h, #2 \n"
|
||||
|
||||
RGBTOUV(v0.8h, v1.8h, v2.8h)
|
||||
"st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U.
|
||||
@ -3061,11 +3060,11 @@ void RAWToUVJRow_NEON(const uint8_t* src_raw,
|
||||
int width) {
|
||||
const uint8_t* src_raw_1 = src_raw + src_stride_raw;
|
||||
asm volatile (
|
||||
"movi v20.8h, #63, lsl #0 \n" // UB/VR coeff (0.500) / 2
|
||||
"movi v21.8h, #42, lsl #0 \n" // UG coeff (-0.33126) / 2
|
||||
"movi v22.8h, #21, lsl #0 \n" // UR coeff (-0.16874) / 2
|
||||
"movi v23.8h, #10, lsl #0 \n" // VB coeff (-0.08131) / 2
|
||||
"movi v24.8h, #53, lsl #0 \n" // VG coeff (-0.41869) / 2
|
||||
"movi v20.8h, #127 \n" // UB/VR coeff (0.500)
|
||||
"movi v21.8h, #84 \n" // UG coeff (-0.33126)
|
||||
"movi v22.8h, #43 \n" // UR coeff (-0.16874)
|
||||
"movi v23.8h, #20 \n" // VB coeff (-0.08131)
|
||||
"movi v24.8h, #107 \n" // VG coeff (-0.41869)
|
||||
"movi v25.16b, #0x80 \n" // 128.5 (0x8080 in 16-bit)
|
||||
"1: \n"
|
||||
"ld3 {v0.16b,v1.16b,v2.16b}, [%0], #48 \n" // load 16 pixels.
|
||||
@ -3080,9 +3079,9 @@ void RAWToUVJRow_NEON(const uint8_t* src_raw,
|
||||
"uadalp v1.8h, v5.16b \n" // G 16 bytes -> 8 shorts.
|
||||
"uadalp v2.8h, v6.16b \n" // R 16 bytes -> 8 shorts.
|
||||
|
||||
"urshr v0.8h, v0.8h, #1 \n" // 2x average
|
||||
"urshr v1.8h, v1.8h, #1 \n"
|
||||
"urshr v2.8h, v2.8h, #1 \n"
|
||||
"urshr v0.8h, v0.8h, #2 \n" // average of 4
|
||||
"urshr v1.8h, v1.8h, #2 \n"
|
||||
"urshr v2.8h, v2.8h, #2 \n"
|
||||
|
||||
RGBTOUV(v2.8h, v1.8h, v0.8h)
|
||||
"st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U.
|
||||
@ -3120,9 +3119,9 @@ void BGRAToUVRow_NEON(const uint8_t* src_bgra,
|
||||
"uadalp v3.8h, v6.16b \n" // G 16 bytes -> 8 shorts.
|
||||
"uadalp v2.8h, v5.16b \n" // R 16 bytes -> 8 shorts.
|
||||
|
||||
"urshr v0.8h, v0.8h, #1 \n" // 2x average
|
||||
"urshr v1.8h, v3.8h, #1 \n"
|
||||
"urshr v2.8h, v2.8h, #1 \n"
|
||||
"urshr v0.8h, v0.8h, #2 \n" // average of 4
|
||||
"urshr v1.8h, v3.8h, #2 \n"
|
||||
"urshr v2.8h, v2.8h, #2 \n"
|
||||
|
||||
RGBTOUV(v0.8h, v1.8h, v2.8h)
|
||||
"st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U.
|
||||
@ -3160,9 +3159,9 @@ void ABGRToUVRow_NEON(const uint8_t* src_abgr,
|
||||
"uadalp v2.8h, v5.16b \n" // G 16 bytes -> 8 shorts.
|
||||
"uadalp v1.8h, v4.16b \n" // R 16 bytes -> 8 shorts.
|
||||
|
||||
"urshr v0.8h, v3.8h, #1 \n" // 2x average
|
||||
"urshr v2.8h, v2.8h, #1 \n"
|
||||
"urshr v1.8h, v1.8h, #1 \n"
|
||||
"urshr v0.8h, v3.8h, #2 \n" // average of 4
|
||||
"urshr v2.8h, v2.8h, #2 \n"
|
||||
"urshr v1.8h, v1.8h, #2 \n"
|
||||
|
||||
RGBTOUV(v0.8h, v2.8h, v1.8h)
|
||||
"st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U.
|
||||
@ -3200,9 +3199,9 @@ void RGBAToUVRow_NEON(const uint8_t* src_rgba,
|
||||
"uadalp v1.8h, v6.16b \n" // G 16 bytes -> 8 shorts.
|
||||
"uadalp v2.8h, v7.16b \n" // R 16 bytes -> 8 shorts.
|
||||
|
||||
"urshr v0.8h, v0.8h, #1 \n" // 2x average
|
||||
"urshr v1.8h, v1.8h, #1 \n"
|
||||
"urshr v2.8h, v2.8h, #1 \n"
|
||||
"urshr v0.8h, v0.8h, #2 \n" // average of 4
|
||||
"urshr v1.8h, v1.8h, #2 \n"
|
||||
"urshr v2.8h, v2.8h, #2 \n"
|
||||
|
||||
RGBTOUV(v0.8h, v1.8h, v2.8h)
|
||||
"st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U.
|
||||
@ -3240,9 +3239,9 @@ void RGB24ToUVRow_NEON(const uint8_t* src_rgb24,
|
||||
"uadalp v1.8h, v5.16b \n" // G 16 bytes -> 8 shorts.
|
||||
"uadalp v2.8h, v6.16b \n" // R 16 bytes -> 8 shorts.
|
||||
|
||||
"urshr v0.8h, v0.8h, #1 \n" // 2x average
|
||||
"urshr v1.8h, v1.8h, #1 \n"
|
||||
"urshr v2.8h, v2.8h, #1 \n"
|
||||
"urshr v0.8h, v0.8h, #2 \n" // average of 4
|
||||
"urshr v1.8h, v1.8h, #2 \n"
|
||||
"urshr v2.8h, v2.8h, #2 \n"
|
||||
|
||||
RGBTOUV(v0.8h, v1.8h, v2.8h)
|
||||
"st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U.
|
||||
@ -3280,9 +3279,9 @@ void RAWToUVRow_NEON(const uint8_t* src_raw,
|
||||
"uadalp v1.8h, v5.16b \n" // G 16 bytes -> 8 shorts.
|
||||
"uadalp v0.8h, v4.16b \n" // R 16 bytes -> 8 shorts.
|
||||
|
||||
"urshr v2.8h, v2.8h, #1 \n" // 2x average
|
||||
"urshr v1.8h, v1.8h, #1 \n"
|
||||
"urshr v0.8h, v0.8h, #1 \n"
|
||||
"urshr v2.8h, v2.8h, #2 \n" // average of 4
|
||||
"urshr v1.8h, v1.8h, #2 \n"
|
||||
"urshr v0.8h, v0.8h, #2 \n"
|
||||
|
||||
RGBTOUV(v2.8h, v1.8h, v0.8h)
|
||||
"st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U.
|
||||
@ -3324,9 +3323,9 @@ void RGB565ToUVRow_NEON(const uint8_t* src_rgb565,
|
||||
"uadalp v17.8h, v1.16b \n" // G 16 bytes -> 8 shorts.
|
||||
"uadalp v18.8h, v2.16b \n" // R 16 bytes -> 8 shorts.
|
||||
|
||||
"urshr v0.8h, v16.8h, #1 \n" // 2x average
|
||||
"urshr v1.8h, v17.8h, #1 \n"
|
||||
"urshr v2.8h, v18.8h, #1 \n"
|
||||
"urshr v0.8h, v16.8h, #2 \n" // average of 4
|
||||
"urshr v1.8h, v17.8h, #2 \n"
|
||||
"urshr v2.8h, v18.8h, #2 \n"
|
||||
|
||||
RGBTOUV(v0.8h, v1.8h, v2.8h)
|
||||
"st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U.
|
||||
@ -3368,9 +3367,9 @@ void ARGB1555ToUVRow_NEON(const uint8_t* src_argb1555,
|
||||
"uadalp v17.8h, v1.16b \n" // G 16 bytes -> 8 shorts.
|
||||
"uadalp v18.8h, v2.16b \n" // R 16 bytes -> 8 shorts.
|
||||
|
||||
"urshr v0.8h, v16.8h, #1 \n" // 2x average
|
||||
"urshr v1.8h, v17.8h, #1 \n"
|
||||
"urshr v2.8h, v18.8h, #1 \n"
|
||||
"urshr v0.8h, v16.8h, #2 \n" // average of 4
|
||||
"urshr v1.8h, v17.8h, #2 \n"
|
||||
"urshr v2.8h, v18.8h, #2 \n"
|
||||
|
||||
RGBTOUV(v0.8h, v1.8h, v2.8h)
|
||||
"st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U.
|
||||
@ -3412,9 +3411,9 @@ void ARGB4444ToUVRow_NEON(const uint8_t* src_argb4444,
|
||||
"uadalp v17.8h, v1.16b \n" // G 16 bytes -> 8 shorts.
|
||||
"uadalp v18.8h, v2.16b \n" // R 16 bytes -> 8 shorts.
|
||||
|
||||
"urshr v0.8h, v16.8h, #1 \n" // 2x average
|
||||
"urshr v1.8h, v17.8h, #1 \n"
|
||||
"urshr v2.8h, v18.8h, #1 \n"
|
||||
"urshr v0.8h, v16.8h, #2 \n" // average of 4
|
||||
"urshr v1.8h, v17.8h, #2 \n"
|
||||
"urshr v2.8h, v18.8h, #2 \n"
|
||||
|
||||
RGBTOUV(v0.8h, v1.8h, v2.8h)
|
||||
"st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U.
|
||||
|
||||
@ -205,32 +205,32 @@ void NV21ToRGB24Row_SVE2(const uint8_t* src_y,
|
||||
|
||||
static const int16_t kARGBToUVCoefficients[] = {
|
||||
// UB, -UR, -UG, 0, -VB, VR, -VG, 0
|
||||
56, -19, -37, 0, -9, 56, -47, 0,
|
||||
112, -38, -74, 0, -18, 112, -94, 0,
|
||||
};
|
||||
|
||||
static const int16_t kRGBAToUVCoefficients[] = {
|
||||
// 0, -UG, UB, -UR, 0, -VG, -VB, VR
|
||||
0, -37, 56, -19, 0, -47, -9, 56,
|
||||
0, -74, 112, -38, 0, -94, -18, 112,
|
||||
};
|
||||
|
||||
static const int16_t kBGRAToUVCoefficients[] = {
|
||||
// 0, -UG, -UR, UB, 0, -VG, VR, -VB
|
||||
0, -37, -19, 56, 0, -47, 56, -9,
|
||||
0, -74, -38, 112, 0, -94, 112, -18,
|
||||
};
|
||||
|
||||
static const int16_t kABGRToUVCoefficients[] = {
|
||||
// -UR, UB, -UG, 0, VR, -VB, -VG, 0
|
||||
-19, 56, -37, 0, 56, -9, -47, 0,
|
||||
-38, 112, -74, 0, 112, -18, -94, 0,
|
||||
};
|
||||
|
||||
static const int16_t kARGBToUVJCoefficients[] = {
|
||||
// UB, -UR, -UG, 0, -VB, VR, -VG, 0
|
||||
63, -21, -42, 0, -10, 63, -53, 0,
|
||||
127, -43, -84, 0, -20, 127, -107, 0,
|
||||
};
|
||||
|
||||
static const int16_t kABGRToUVJCoefficients[] = {
|
||||
// -UR, UB, -UG, 0, VR, -VB, -VG, 0
|
||||
-21, 63, -42, 0, 63, -10, -53, 0,
|
||||
-43, 127, -84, 0, 127, -20, -107, 0,
|
||||
};
|
||||
|
||||
static void ARGBToUVMatrixRow_SVE2(const uint8_t* src_argb,
|
||||
@ -285,10 +285,15 @@ static void ARGBToUVMatrixRow_SVE2(const uint8_t* src_argb,
|
||||
|
||||
"subs %w[width], %w[width], %w[vl] \n" // 4*VL per loop
|
||||
|
||||
"urhadd z0.h, p0/m, z0.h, z1.h \n" // brgabrga
|
||||
"urhadd z2.h, p0/m, z2.h, z3.h \n" // brgabrga
|
||||
"urhadd z4.h, p0/m, z4.h, z5.h \n" // brgabrga
|
||||
"urhadd z6.h, p0/m, z6.h, z7.h \n" // brgabrga
|
||||
"add z0.h, p0/m, z0.h, z1.h \n" // brgabrga
|
||||
"add z2.h, p0/m, z2.h, z3.h \n" // brgabrga
|
||||
"add z4.h, p0/m, z4.h, z5.h \n" // brgabrga
|
||||
"add z6.h, p0/m, z6.h, z7.h \n" // brgabrga
|
||||
|
||||
"urshr z0.h, p0/m, z0.h, #2 \n" // brgabrga
|
||||
"urshr z2.h, p0/m, z2.h, #2 \n" // brgabrga
|
||||
"urshr z4.h, p0/m, z4.h, #2 \n" // brgabrga
|
||||
"urshr z6.h, p0/m, z6.h, #2 \n" // brgabrga
|
||||
|
||||
"movi v16.8h, #0 \n"
|
||||
"movi v17.8h, #0 \n"
|
||||
@ -350,7 +355,9 @@ static void ARGBToUVMatrixRow_SVE2(const uint8_t* src_argb,
|
||||
"trn1 z0.s, z16.s, z17.s \n" // brgabgra
|
||||
"trn2 z1.s, z16.s, z17.s \n" // brgabgra
|
||||
|
||||
"urhadd z0.h, p0/m, z0.h, z1.h \n" // brgabrga
|
||||
"add z0.h, p0/m, z0.h, z1.h \n" // brgabrga
|
||||
|
||||
"urshr z0.h, p0/m, z0.h, #2 \n" // brgabrga
|
||||
|
||||
"subs %w[width], %w[width], %w[vl] \n" // VL per loop
|
||||
|
||||
|
||||
@ -2076,7 +2076,7 @@ TEST_F(LibYUVConvertTest, TestRGB24ToJ420) {
|
||||
}
|
||||
|
||||
uint32_t checksum = HashDjb2(dest_j420, kSize * 3 / 2 * 2, 5381);
|
||||
EXPECT_EQ(2755440272u, checksum);
|
||||
EXPECT_EQ(4157186353u, checksum);
|
||||
|
||||
free_aligned_buffer_page_end(orig_rgb24);
|
||||
free_aligned_buffer_page_end(dest_j420);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user