Refactor NEON YUVToRGB, Remove subsampling

Refactor NEON YUVToRGB Assembly to support HBD data as input and output.
Work on YUV444 internally, remove subsampling in I444ToARGB.

libyuv_unittest --gtest_filter=*.NV??ToARGB_Opt:*UYVYToARGB_Opt:*YUY2ToARGB_Opt:*I4*ToARGB_Opt

Bug: libyuv:895, libyuv:862, libyuv:863
Change-Id: I05b56ea8ea56d9e523720b842fa6e4b122ed4115
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/2810060
Reviewed-by: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
Yuan Tong 2021-04-14 00:18:41 +08:00 committed by Frank Barchard
parent 287158925b
commit 590c17ce40
5 changed files with 824 additions and 1172 deletions

View File

@ -746,23 +746,11 @@ typedef uint32_t ulvec32[8];
typedef uint8_t ulvec8[32];
#endif
#if defined(__aarch64__)
// This struct is for Arm64 color conversion.
#if defined(__aarch64__) || defined(__arm__)
// This struct is for ARM color conversion.
struct YuvConstants {
uvec16 kUVToRB;
uvec16 kUVToRB2;
uvec16 kUVToG;
uvec16 kUVToG2;
vec16 kUVBiasBGR;
vec32 kYToRgb;
};
#elif defined(__arm__)
// This struct is for ArmV7 color conversion.
struct YuvConstants {
uvec8 kUVToRB;
uvec8 kUVToG;
vec16 kUVBiasBGR;
vec32 kYToRgb;
uvec8 kUVCoeff;
vec16 kRGBCoeffBias;
};
#else
// This struct is for Intel color conversion.

View File

@ -1420,64 +1420,48 @@ void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width) {
// Macros to create SIMD specific yuv to rgb conversion constants.
#if defined(__aarch64__)
#define MAKEYUVCONSTANTS(name, YG, YB, UB, UG, VG, VR, BB, BG, BR) \
const struct YuvConstants SIMD_ALIGNED(kYuv##name##Constants) = { \
{UB, VR, UB, VR, UB, VR, UB, VR}, {UB, VR, UB, VR, UB, VR, UB, VR}, \
{UG, VG, UG, VG, UG, VG, UG, VG}, {UG, VG, UG, VG, UG, VG, UG, VG}, \
{BB, BG, BR, YB, 0, 0, 0, 0}, {0x0101 * YG, YG, 0, 0}}; \
const struct YuvConstants SIMD_ALIGNED(kYvu##name##Constants) = { \
{VR, UB, VR, UB, VR, UB, VR, UB}, {VR, UB, VR, UB, VR, UB, VR, UB}, \
{VG, UG, VG, UG, VG, UG, VG, UG}, {VG, UG, VG, UG, VG, UG, VG, UG}, \
{BR, BG, BB, YB, 0, 0, 0, 0}, {0x0101 * YG, YG, 0, 0}};
#elif defined(__arm__)
#define MAKEYUVCONSTANTS(name, YG, YB, UB, UG, VG, VR, BB, BG, BR) \
const struct YuvConstants SIMD_ALIGNED(kYuv##name##Constants) = { \
{UB, UB, UB, UB, VR, VR, VR, VR, 0, 0, 0, 0, 0, 0, 0, 0}, \
{UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0}, \
{BB, BG, BR, YB, 0, 0, 0, 0}, \
{0x0101 * YG, YG, 0, 0}}; \
const struct YuvConstants SIMD_ALIGNED(kYvu##name##Constants) = { \
{VR, VR, VR, VR, UB, UB, UB, UB, 0, 0, 0, 0, 0, 0, 0, 0}, \
{VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0}, \
{BR, BG, BB, YB, 0, 0, 0, 0}, \
{0x0101 * YG, YG, 0, 0}};
// clang-format off
#if defined(__aarch64__) || defined(__arm__)
#define YUBCONSTANTSBODY(YG, YB, UB, UG, VG, VR, BB, BG, BR) \
{{UB, VR, UG, VG, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, \
{YG, BB, BG, BR, YB, 0, 0, 0}}
#else
#define MAKEYUVCONSTANTS(name, YG, YB, UB, UG, VG, VR, BB, BG, BR) \
const struct YuvConstants SIMD_ALIGNED(kYuv##name##Constants) = { \
{-UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, \
-UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0}, \
{UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, \
UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG}, \
{0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, \
0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR}, \
{BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB}, \
{BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG}, \
{BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR}, \
{YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}, \
{YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB}}; \
const struct YuvConstants SIMD_ALIGNED(kYvu##name##Constants) = { \
{-VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, \
-VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0}, \
{VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, \
VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG}, \
{0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, \
0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB}, \
{BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR}, \
{BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG}, \
{BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB}, \
{YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}, \
{YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB}};
#define YUBCONSTANTSBODY(YG, YB, UB, UG, VG, VR, BB, BG, BR) \
{{-UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, \
-UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0, -UB, 0}, \
{UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, \
UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG}, \
{0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, \
0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0, -VR}, \
{BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB}, \
{BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG}, \
{BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR}, \
{YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}, \
{YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB}}
#endif
// clang-format on
#define MAKEYUVCONSTANTS(name, YG, YB, UB, UG, VG, VR, BB, BG, BR) \
const struct YuvConstants SIMD_ALIGNED(kYuv##name##Constants) = \
YUBCONSTANTSBODY(YG, YB, UB, UG, VG, VR, BB, BG, BR); \
const struct YuvConstants SIMD_ALIGNED(kYvu##name##Constants) = \
YUBCONSTANTSBODY(YG, YB, VR, VG, UG, UB, BR, BG, BB);
// TODO(fbarchard): Generate SIMD structures from float matrix.
// Bias values to round, and subtract 128 from U and V.
// For B and R this is negative. For G this is positive.
#ifdef LIBYUV_UNLIMITED_DATA
#define BB (UB * 128 - YB)
#define BG (UG * 128 + VG * 128 + YB)
#define BR (VR * 128 - YB)
#else
#define BB (-UB * 128 + YB)
#define BG (UG * 128 + VG * 128 + YB)
#define BR (-VR * 128 + YB)
#endif
// BT.601 limited range YUV to RGB reference
// R = (Y - 16) * 1.164 + V * 1.596
@ -1486,7 +1470,7 @@ void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width) {
// KR = 0.299; KB = 0.114
// U and V contributions to R,G,B.
#if LIBYUV_UNLIMITED_DATA
#ifdef LIBYUV_UNLIMITED_DATA
#define UB 129 /* round(2.018 * 64) */
#else
#define UB 128 /* max(128, round(2.018 * 64)) */
@ -1540,7 +1524,7 @@ MAKEYUVCONSTANTS(JPEG, YG, YB, UB, UG, VG, VR, BB, BG, BR)
// KR = 0.2126, KB = 0.0722
// U and V contributions to R,G,B.
#if LIBYUV_UNLIMITED_DATA
#ifdef LIBYUV_UNLIMITED_DATA
#define UB 135 /* round(2.112 * 64) */
#else
#define UB 128 /* max(128, round(2.112 * 64)) */
@ -1594,7 +1578,7 @@ MAKEYUVCONSTANTS(F709, YG, YB, UB, UG, VG, VR, BB, BG, BR)
// KR = 0.2627; KB = 0.0593
// U and V contributions to R,G,B.
#if LIBYUV_UNLIMITED_DATA
#ifdef LIBYUV_UNLIMITED_DATA
#define UB 137 /* round(2.142 * 64) */
#else
#define UB 128 /* max(128, round(2.142 * 64)) */
@ -1646,7 +1630,39 @@ MAKEYUVCONSTANTS(V2020, YG, YB, UB, UG, VG, VR, BB, BG, BR)
#undef MAKEYUVCONSTANTS
#if LIBYUV_UNLIMITED_DATA
#if defined(__aarch64__) || defined(__arm__)
#ifdef LIBYUV_UNLIMITED_DATA
#define LOAD_YUV_CONSTANTS \
int ub = -yuvconstants->kUVCoeff[0]; \
int vr = -yuvconstants->kUVCoeff[1]; \
int ug = yuvconstants->kUVCoeff[2]; \
int vg = yuvconstants->kUVCoeff[3]; \
int yg = yuvconstants->kRGBCoeffBias[0]; \
int bb = -yuvconstants->kRGBCoeffBias[1]; \
int bg = yuvconstants->kRGBCoeffBias[2]; \
int br = -yuvconstants->kRGBCoeffBias[3]
#else
#define LOAD_YUV_CONSTANTS \
int ub = -yuvconstants->kUVCoeff[0]; \
int vr = -yuvconstants->kUVCoeff[1]; \
int ug = yuvconstants->kUVCoeff[2]; \
int vg = yuvconstants->kUVCoeff[3]; \
int yg = yuvconstants->kRGBCoeffBias[0]; \
int bb = yuvconstants->kRGBCoeffBias[1]; \
int bg = yuvconstants->kRGBCoeffBias[2]; \
int br = yuvconstants->kRGBCoeffBias[3]
#endif
#else
#define LOAD_YUV_CONSTANTS \
int ub = yuvconstants->kUVToB[0]; \
int ug = yuvconstants->kUVToG[0]; \
int vg = yuvconstants->kUVToG[1]; \
int vr = yuvconstants->kUVToR[1]; \
int bb = yuvconstants->kUVBiasB[0]; \
int bg = yuvconstants->kUVBiasG[0]; \
int br = yuvconstants->kUVBiasR[0]; \
int yg = yuvconstants->kYToRgb[0]
#endif
// C reference code that mimics the YUV assembly.
// Reads 8 bit YUV and leaves result as 16 bit.
@ -1657,85 +1673,13 @@ static __inline void YuvPixel(uint8_t y,
uint8_t* g,
uint8_t* r,
const struct YuvConstants* yuvconstants) {
#if defined(__aarch64__)
int ub = yuvconstants->kUVToRB[0];
int ug = yuvconstants->kUVToG[0];
int vg = yuvconstants->kUVToG[1];
int vr = yuvconstants->kUVToRB[1];
int bb = yuvconstants->kUVBiasBGR[0];
int bg = yuvconstants->kUVBiasBGR[1];
int br = yuvconstants->kUVBiasBGR[2];
int yg = yuvconstants->kYToRgb[1];
#elif defined(__arm__)
int ub = yuvconstants->kUVToRB[0];
int ug = yuvconstants->kUVToG[0];
int vg = yuvconstants->kUVToG[4];
int vr = yuvconstants->kUVToRB[4];
int bb = yuvconstants->kUVBiasBGR[0];
int bg = yuvconstants->kUVBiasBGR[1];
int br = yuvconstants->kUVBiasBGR[2];
int yg = yuvconstants->kYToRgb[1];
#else
int ub = -yuvconstants->kUVToB[0];
int ug = yuvconstants->kUVToG[0];
int vg = yuvconstants->kUVToG[1];
int vr = -yuvconstants->kUVToR[1];
int bb = yuvconstants->kUVBiasB[0];
int bg = yuvconstants->kUVBiasG[0];
int br = yuvconstants->kUVBiasR[0];
int yg = yuvconstants->kYToRgb[0];
#endif
uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16;
*b = Clamp((int32_t)(y1 + (u * ub) + bb) >> 6);
*g = Clamp((int32_t)(y1 - (u * ug + v * vg) + bg) >> 6);
*r = Clamp((int32_t)(y1 + (v * vr) + br) >> 6);
}
#else
// C reference code that mimics the YUV assembly.
// Reads 8 bit YUV and leaves result as 8 bit.
static __inline void YuvPixel(uint8_t y,
uint8_t u,
uint8_t v,
uint8_t* b,
uint8_t* g,
uint8_t* r,
const struct YuvConstants* yuvconstants) {
#if defined(__aarch64__)
int ub = -yuvconstants->kUVToRB[0];
int ug = yuvconstants->kUVToG[0];
int vg = yuvconstants->kUVToG[1];
int vr = -yuvconstants->kUVToRB[1];
int bb = yuvconstants->kUVBiasBGR[0];
int bg = yuvconstants->kUVBiasBGR[1];
int br = yuvconstants->kUVBiasBGR[2];
int yg = yuvconstants->kYToRgb[1];
#elif defined(__arm__)
int ub = -yuvconstants->kUVToRB[0];
int ug = yuvconstants->kUVToG[0];
int vg = yuvconstants->kUVToG[4];
int vr = -yuvconstants->kUVToRB[4];
int bb = yuvconstants->kUVBiasBGR[0];
int bg = yuvconstants->kUVBiasBGR[1];
int br = yuvconstants->kUVBiasBGR[2];
int yg = yuvconstants->kYToRgb[1];
#else
int ub = yuvconstants->kUVToB[0];
int ug = yuvconstants->kUVToG[0];
int vg = yuvconstants->kUVToG[1];
int vr = yuvconstants->kUVToR[1];
int bb = yuvconstants->kUVBiasB[0];
int bg = yuvconstants->kUVBiasG[0];
int br = yuvconstants->kUVBiasR[0];
int yg = yuvconstants->kYToRgb[0];
#endif
LOAD_YUV_CONSTANTS;
uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16;
*b = Clamp((int32_t)(y1 - (u * ub) + bb) >> 6);
*g = Clamp((int32_t)(y1 - (u * ug + v * vg) + bg) >> 6);
*r = Clamp((int32_t)(y1 - (v * vr) + br) >> 6);
}
#endif
// Reads 8 bit YUV and leaves result as 16 bit.
static __inline void YuvPixel8_16(uint8_t y,
@ -1745,34 +1689,7 @@ static __inline void YuvPixel8_16(uint8_t y,
int* g,
int* r,
const struct YuvConstants* yuvconstants) {
#if defined(__aarch64__)
int ub = -yuvconstants->kUVToRB[0];
int ug = yuvconstants->kUVToG[0];
int vg = yuvconstants->kUVToG[1];
int vr = -yuvconstants->kUVToRB[1];
int bb = yuvconstants->kUVBiasBGR[0];
int bg = yuvconstants->kUVBiasBGR[1];
int br = yuvconstants->kUVBiasBGR[2];
int yg = yuvconstants->kYToRgb[1];
#elif defined(__arm__)
int ub = -yuvconstants->kUVToRB[0];
int ug = yuvconstants->kUVToG[0];
int vg = yuvconstants->kUVToG[4];
int vr = -yuvconstants->kUVToRB[4];
int bb = yuvconstants->kUVBiasBGR[0];
int bg = yuvconstants->kUVBiasBGR[1];
int br = yuvconstants->kUVBiasBGR[2];
int yg = yuvconstants->kYToRgb[1];
#else
int ub = yuvconstants->kUVToB[0];
int ug = yuvconstants->kUVToG[0];
int vg = yuvconstants->kUVToG[1];
int vr = yuvconstants->kUVToR[1];
int bb = yuvconstants->kUVBiasB[0];
int bg = yuvconstants->kUVBiasG[0];
int br = yuvconstants->kUVBiasR[0];
int yg = yuvconstants->kYToRgb[0];
#endif
LOAD_YUV_CONSTANTS;
uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16;
*b = (int)(y1 - (u * ub) + bb);
@ -1789,34 +1706,7 @@ static __inline void YuvPixel10_16(uint16_t y,
int* g,
int* r,
const struct YuvConstants* yuvconstants) {
#if defined(__aarch64__)
int ub = -yuvconstants->kUVToRB[0];
int ug = yuvconstants->kUVToG[0];
int vg = yuvconstants->kUVToG[1];
int vr = -yuvconstants->kUVToRB[1];
int bb = yuvconstants->kUVBiasBGR[0];
int bg = yuvconstants->kUVBiasBGR[1];
int br = yuvconstants->kUVBiasBGR[2];
int yg = yuvconstants->kYToRgb[1];
#elif defined(__arm__)
int ub = -yuvconstants->kUVToRB[0];
int ug = yuvconstants->kUVToG[0];
int vg = yuvconstants->kUVToG[4];
int vr = -yuvconstants->kUVToRB[4];
int bb = yuvconstants->kUVBiasBGR[0];
int bg = yuvconstants->kUVBiasBGR[1];
int br = yuvconstants->kUVBiasBGR[2];
int yg = yuvconstants->kYToRgb[1];
#else
int ub = yuvconstants->kUVToB[0];
int ug = yuvconstants->kUVToG[0];
int vg = yuvconstants->kUVToG[1];
int vr = yuvconstants->kUVToR[1];
int bb = yuvconstants->kUVBiasB[0];
int bg = yuvconstants->kUVBiasG[0];
int br = yuvconstants->kUVBiasR[0];
int yg = yuvconstants->kYToRgb[0];
#endif
LOAD_YUV_CONSTANTS;
uint32_t y1 = (uint32_t)((y << 6) * yg) >> 16;
u = clamp255(u >> 2);
@ -1835,34 +1725,7 @@ static __inline void YuvPixel12_16(int16_t y,
int* g,
int* r,
const struct YuvConstants* yuvconstants) {
#if defined(__aarch64__)
int ub = -yuvconstants->kUVToRB[0];
int ug = yuvconstants->kUVToG[0];
int vg = yuvconstants->kUVToG[1];
int vr = -yuvconstants->kUVToRB[1];
int bb = yuvconstants->kUVBiasBGR[0];
int bg = yuvconstants->kUVBiasBGR[1];
int br = yuvconstants->kUVBiasBGR[2];
int yg = yuvconstants->kYToRgb[1];
#elif defined(__arm__)
int ub = -yuvconstants->kUVToRB[0];
int ug = yuvconstants->kUVToG[0];
int vg = yuvconstants->kUVToG[4];
int vr = -yuvconstants->kUVToRB[4];
int bb = yuvconstants->kUVBiasBGR[0];
int bg = yuvconstants->kUVBiasBGR[1];
int br = yuvconstants->kUVBiasBGR[2];
int yg = yuvconstants->kYToRgb[1];
#else
int ub = yuvconstants->kUVToB[0];
int ug = yuvconstants->kUVToG[0];
int vg = yuvconstants->kUVToG[1];
int vr = yuvconstants->kUVToR[1];
int bb = yuvconstants->kUVBiasB[0];
int bg = yuvconstants->kUVBiasG[0];
int br = yuvconstants->kUVBiasR[0];
int yg = yuvconstants->kYToRgb[0];
#endif
LOAD_YUV_CONSTANTS;
uint32_t y1 = (uint32_t)((y << 4) * yg) >> 16;
u = clamp255(u >> 4);
@ -1917,34 +1780,7 @@ static __inline void YuvPixel16_8(uint16_t y,
uint8_t* g,
uint8_t* r,
const struct YuvConstants* yuvconstants) {
#if defined(__aarch64__)
int ub = -yuvconstants->kUVToRB[0];
int ug = yuvconstants->kUVToG[0];
int vg = yuvconstants->kUVToG[1];
int vr = -yuvconstants->kUVToRB[1];
int bb = yuvconstants->kUVBiasBGR[0];
int bg = yuvconstants->kUVBiasBGR[1];
int br = yuvconstants->kUVBiasBGR[2];
int yg = yuvconstants->kYToRgb[1];
#elif defined(__arm__)
int ub = -yuvconstants->kUVToRB[0];
int ug = yuvconstants->kUVToG[0];
int vg = yuvconstants->kUVToG[4];
int vr = -yuvconstants->kUVToRB[4];
int bb = yuvconstants->kUVBiasBGR[0];
int bg = yuvconstants->kUVBiasBGR[1];
int br = yuvconstants->kUVBiasBGR[2];
int yg = yuvconstants->kYToRgb[1];
#else
int ub = yuvconstants->kUVToB[0];
int ug = yuvconstants->kUVToG[0];
int vg = yuvconstants->kUVToG[1];
int vr = yuvconstants->kUVToR[1];
int bb = yuvconstants->kUVBiasB[0];
int bg = yuvconstants->kUVBiasG[0];
int br = yuvconstants->kUVBiasR[0];
int yg = yuvconstants->kYToRgb[0];
#endif
LOAD_YUV_CONSTANTS;
uint32_t y1 = (uint32_t)(y * yg) >> 16;
u = clamp255(u >> 8);
@ -1963,34 +1799,7 @@ static __inline void YuvPixel16_16(uint16_t y,
int* g,
int* r,
const struct YuvConstants* yuvconstants) {
#if defined(__aarch64__)
int ub = -yuvconstants->kUVToRB[0];
int ug = yuvconstants->kUVToG[0];
int vg = yuvconstants->kUVToG[1];
int vr = -yuvconstants->kUVToRB[1];
int bb = yuvconstants->kUVBiasBGR[0];
int bg = yuvconstants->kUVBiasBGR[1];
int br = yuvconstants->kUVBiasBGR[2];
int yg = yuvconstants->kYToRgb[1];
#elif defined(__arm__)
int ub = -yuvconstants->kUVToRB[0];
int ug = yuvconstants->kUVToG[0];
int vg = yuvconstants->kUVToG[4];
int vr = -yuvconstants->kUVToRB[4];
int bb = yuvconstants->kUVBiasBGR[0];
int bg = yuvconstants->kUVBiasBGR[1];
int br = yuvconstants->kUVBiasBGR[2];
int yg = yuvconstants->kYToRgb[1];
#else
int ub = yuvconstants->kUVToB[0];
int ug = yuvconstants->kUVToG[0];
int vg = yuvconstants->kUVToG[1];
int vr = yuvconstants->kUVToR[1];
int bb = yuvconstants->kUVBiasB[0];
int bg = yuvconstants->kUVBiasG[0];
int br = yuvconstants->kUVBiasR[0];
int yg = yuvconstants->kYToRgb[0];
#endif
LOAD_YUV_CONSTANTS;
uint32_t y1 = (uint32_t)(y * yg) >> 16;
u = clamp255(u >> 8);
@ -2008,8 +1817,8 @@ static __inline void YPixel(uint8_t y,
uint8_t* r,
const struct YuvConstants* yuvconstants) {
#if defined(__aarch64__) || defined(__arm__)
int ygb = yuvconstants->kUVBiasBGR[3];
int yg = yuvconstants->kYToRgb[1];
int yg = yuvconstants->kRGBCoeffBias[0];
int ygb = yuvconstants->kRGBCoeffBias[4];
#else
int ygb = yuvconstants->kYBiasToRgb[0];
int yg = yuvconstants->kYToRgb[0];
@ -2020,38 +1829,6 @@ static __inline void YPixel(uint8_t y,
*r = Clamp(((int32_t)(y1) + ygb) >> 6);
}
#if !defined(LIBYUV_DISABLE_NEON) && \
(defined(__ARM_NEON__) || defined(__aarch64__) || defined(LIBYUV_NEON))
// C mimic assembly.
// TODO(fbarchard): Remove subsampling from Neon.
void I444ToARGBRow_C(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
uint8_t u = (src_u[0] + src_u[1] + 1) >> 1;
uint8_t v = (src_v[0] + src_v[1] + 1) >> 1;
YuvPixel(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2,
yuvconstants);
rgb_buf[3] = 255;
YuvPixel(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6,
yuvconstants);
rgb_buf[7] = 255;
src_y += 2;
src_u += 2;
src_v += 2;
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
}
}
#else
void I444ToARGBRow_C(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
@ -2069,7 +1846,6 @@ void I444ToARGBRow_C(const uint8_t* src_y,
rgb_buf += 4; // Advance 1 pixel.
}
}
#endif
// Also used for 420
void I422ToARGBRow_C(const uint8_t* src_y,
@ -2415,40 +2191,6 @@ void I422ToAR30Row_C(const uint8_t* src_y,
}
}
#if !defined(LIBYUV_DISABLE_NEON) && \
(defined(__ARM_NEON__) || defined(__aarch64__) || defined(LIBYUV_NEON))
// C mimic assembly.
// TODO(fbarchard): Remove subsampling from Neon.
void I444AlphaToARGBRow_C(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
const uint8_t* src_a,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
uint8_t u = (src_u[0] + src_u[1] + 1) >> 1;
uint8_t v = (src_v[0] + src_v[1] + 1) >> 1;
YuvPixel(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2,
yuvconstants);
rgb_buf[3] = src_a[0];
YuvPixel(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6,
yuvconstants);
rgb_buf[7] = src_a[1];
src_y += 2;
src_u += 2;
src_v += 2;
src_a += 2;
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
rgb_buf[3] = src_a[0];
}
}
#else
void I444AlphaToARGBRow_C(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
@ -2468,7 +2210,6 @@ void I444AlphaToARGBRow_C(const uint8_t* src_y,
rgb_buf += 4; // Advance 1 pixel.
}
}
#endif
void I422AlphaToARGBRow_C(const uint8_t* src_y,
const uint8_t* src_u,

View File

@ -21,90 +21,138 @@ extern "C" {
#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) && \
!defined(__aarch64__)
// q0: Y uint16x8_t
// d2: U uint8x8_t
// d3: V uint8x8_t
// Read 8 Y, 4 U and 4 V from 422
#define READYUV422 \
"vld1.8 {d0}, [%0]! \n" \
"vld1.32 {d2[0]}, [%1]! \n" \
"vld1.32 {d2[1]}, [%2]! \n"
"vld1.8 {d0}, [%[src_y]]! \n" \
"vld1.32 {d2[0]}, [%[src_u]]! \n" \
"vld1.32 {d2[1]}, [%[src_v]]! \n" \
"vmov.u8 d1, d0 \n" \
"vmovl.u8 q1, d2 \n" \
"vzip.u8 d0, d1 \n" \
"vsli.u16 q1, q1, #8 \n"
// Read 8 Y, 8 U and 8 V from 444
#define READYUV444 \
"vld1.8 {d0}, [%0]! \n" \
"vld1.8 {d2}, [%1]! \n" \
"vld1.8 {d3}, [%2]! \n" \
"vpaddl.u8 q1, q1 \n" \
"vrshrn.u16 d2, q1, #1 \n"
"vld1.8 {d0}, [%[src_y]]! \n" \
"vld1.8 {d2}, [%[src_u]]! \n" \
"vmovl.u8 q0, d0 \n" \
"vld1.8 {d3}, [%[src_v]]! \n" \
"vsli.u16 q0, q0, #8 \n"
// Read 8 Y, and set 4 U and 4 V to 128
#define READYUV400 \
"vld1.8 {d0}, [%0]! \n" \
"vmov.u8 d2, #128 \n"
"vld1.8 {d0}, [%[src_y]]! \n" \
"vmov.u8 q1, #128 \n" \
"vmovl.u8 q0, d0 \n" \
"vsli.u16 q0, q0, #8 \n"
// Read 8 Y and 4 UV from NV12
#define READNV12 \
"vld1.8 {d0}, [%0]! \n" \
"vld1.8 {d2}, [%1]! \n" \
"vmov.u8 d3, d2 \n" /* split odd/even uv apart */ \
"vuzp.u8 d2, d3 \n" \
"vtrn.u32 d2, d3 \n"
#define READNV12 \
"vld1.8 {d0}, [%[src_y]]! \n" \
"vld1.8 {d2}, [%[src_uv]]! \n" \
"vmov.u8 d1, d0 \n" \
"vmov.u8 d3, d2 \n" \
"vzip.u8 d0, d1 \n" \
"vsli.u16 d2, d2, #8 \n" /* Duplicate low byte (U) */ \
"vsri.u16 d3, d3, #8 \n" /* Duplicate high byte (V) */
// Read 8 Y and 4 VU from NV21
#define READNV21 \
"vld1.8 {d0}, [%0]! \n" \
"vld1.8 {d2}, [%1]! \n" \
"vmov.u8 d3, d2 \n" /* split odd/even uv apart */ \
"vuzp.u8 d3, d2 \n" \
"vtrn.u32 d2, d3 \n"
"vld1.8 {d0}, [%[src_y]]! \n" \
"vld1.8 {d2}, [%[src_vu]]! \n" \
"vmov.u8 d1, d0 \n" \
"vmov.u8 d3, d2 \n" \
"vzip.u8 d0, d1 \n" \
"vsri.u16 d2, d2, #8 \n" /* Duplicate high byte (U) */ \
"vsli.u16 d3, d3, #8 \n" /* Duplicate low byte (V) */
// Read 8 YUY2
#define READYUY2 \
"vld2.8 {d0, d2}, [%0]! \n" \
"vld2.8 {d0, d2}, [%[src_yuy2]]! \n" \
"vmovl.u8 q0, d0 \n" \
"vmov.u8 d3, d2 \n" \
"vuzp.u8 d2, d3 \n" \
"vtrn.u32 d2, d3 \n"
"vsli.u16 q0, q0, #8 \n" \
"vsli.u16 d2, d2, #8 \n" \
"vsri.u16 d3, d3, #8 \n"
// Read 8 UYVY
#define READUYVY \
"vld2.8 {d2, d3}, [%0]! \n" \
"vmov.u8 d0, d3 \n" \
"vld2.8 {d2, d3}, [%[src_uyvy]]! \n" \
"vmovl.u8 q0, d3 \n" \
"vmov.u8 d3, d2 \n" \
"vuzp.u8 d2, d3 \n" \
"vtrn.u32 d2, d3 \n"
"vsli.u16 q0, q0, #8 \n" \
"vsli.u16 d2, d2, #8 \n" \
"vsri.u16 d3, d3, #8 \n"
#define YUVTORGB_SETUP \
"vld1.8 {d24}, [%[kUVToRB]] \n" \
"vld1.8 {d25}, [%[kUVToG]] \n" \
"vld1.16 {d26[], d27[]}, [%[kUVBiasBGR]]! \n" \
"vld1.16 {d8[], d9[]}, [%[kUVBiasBGR]]! \n" \
"vld1.16 {d28[], d29[]}, [%[kUVBiasBGR]] \n" \
"vld1.32 {d30[], d31[]}, [%[kYToRgb]] \n"
#define YUVTORGB_SETUP \
"vld4.8 {d26[], d27[], d28[], d29[]}, [%[kUVCoeff]] \n" \
"vld1.16 {d31[]}, [%[kRGBCoeffBias]]! \n" \
"vld1.16 {d20[], d21[]}, [%[kRGBCoeffBias]]! \n" \
"vld1.16 {d22[], d23[]}, [%[kRGBCoeffBias]]! \n" \
"vld1.16 {d24[], d25[]}, [%[kRGBCoeffBias]] \n"
#define YUVTORGB \
"vmull.u8 q8, d2, d24 \n" /* u/v B/R component */ \
"vmull.u8 q9, d2, d25 \n" /* u/v G component */ \
"vmovl.u8 q0, d0 \n" /* Y */ \
"vmovl.s16 q10, d1 \n" \
"vmovl.s16 q0, d0 \n" \
"vmul.s32 q10, q10, q15 \n" \
"vmul.s32 q0, q0, q15 \n" \
"vqshrun.s32 d0, q0, #16 \n" \
"vqshrun.s32 d1, q10, #16 \n" /* Y */ \
"vadd.s16 d18, d19 \n" \
"vshll.u16 q1, d16, #16 \n" /* Replicate u * UB */ \
"vshll.u16 q10, d17, #16 \n" /* Replicate v * VR */ \
"vshll.u16 q3, d18, #16 \n" /* Replicate (v*VG + u*UG)*/ \
"vaddw.u16 q1, q1, d16 \n" \
"vaddw.u16 q10, q10, d17 \n" \
"vaddw.u16 q3, q3, d18 \n" \
"vqadd.s16 q8, q0, q13 \n" /* B */ \
"vqadd.s16 q9, q0, q14 \n" /* R */ \
"vqadd.s16 q0, q0, q4 \n" /* G */ \
"vqadd.s16 q8, q8, q1 \n" /* B */ \
"vqadd.s16 q9, q9, q10 \n" /* R */ \
"vqsub.s16 q0, q0, q3 \n" /* G */ \
"vqshrun.s16 d20, q8, #6 \n" /* B */ \
"vqshrun.s16 d22, q9, #6 \n" /* R */ \
"vqshrun.s16 d21, q0, #6 \n" /* G */
// q0: B uint16x8_t
// q1: G uint16x8_t
// q2: R uint16x8_t
#ifdef LIBYUV_UNLIMITED_DATA
// Convert from YUV to 2.14 fixed point RGB
#define YUVTORGB \
"vmull.u16 q2, d1, d31 \n" \
"vmull.u8 q8, d3, d29 \n" /* DGV */ \
"vmull.u16 q0, d0, d31 \n" \
"vmlal.u8 q8, d2, d28 \n" /* DG */ \
"vqshrn.u32 d0, q0, #16 \n" \
"vqshrn.u32 d1, q2, #16 \n" /* Y */ \
"vmull.u8 q9, d2, d26 \n" /* DB */ \
"vmull.u8 q2, d3, d27 \n" /* DR */ \
"vadd.u16 q4, q0, q11 \n" /* G */ \
"vadd.u16 q2, q0, q2 \n" /* R */ \
"vadd.u16 q0, q0, q9 \n" /* B */ \
"vqsub.u16 q1, q4, q8 \n" /* G */ \
"vqsub.u16 q0, q0, q10 \n" /* B */ \
"vqsub.u16 q2, q2, q12 \n" /* R */
// Convert from 2.14 fixed point RGB To 8 bit RGB
#define RGBTORGB8 \
"vqshrn.u16 d4, q2, #6 \n" /* R */ \
"vqshrn.u16 d2, q1, #6 \n" /* G */ \
"vqshrn.u16 d0, q0, #6 \n" /* B */
#else
#define YUVTORGB \
"vmull.u16 q2, d1, d31 \n" \
"vmull.u8 q8, d3, d29 \n" \
"vmull.u16 q0, d0, d31 \n" \
"vmlal.u8 q8, d2, d28 \n" /* DG */ \
"vqshrun.s32 d0, q0, #16 \n" \
"vqshrun.s32 d1, q2, #16 \n" /* Y */ \
"vmull.u8 q9, d2, d26 \n" /* DB */ \
"vmull.u8 q1, d3, d27 \n" /* DR */ \
"vqadd.s16 q2, q0, q12 \n" \
"vqadd.s16 q4, q0, q11 \n" \
"vqadd.s16 q0, q0, q10 \n" \
"vqadd.s16 q2, q2, q1 \n" /* R */ \
"vqsub.s16 q1, q4, q8 \n" /* G */ \
"vqadd.s16 q0, q0, q9 \n" /* B */
#define RGBTORGB8 \
"vqshrun.s16 d4, q2, #6 \n" /* R */ \
"vqshrun.s16 d2, q1, #6 \n" /* G */ \
"vqshrun.s16 d0, q0, #6 \n" /* B */
#endif
#define YUVTORGB_REGS \
"q0", "q1", "q2", "q4", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "d31"
#define STORERGBA \
"vmov.u8 d1, d0 \n" \
"vmov.u8 d3, d4 \n" \
"vmov.u8 d0, d6 \n" \
"vst4.8 {d0, d1, d2, d3}, [%[dst_rgba]]! \n"
void I444ToARGBRow_NEON(const uint8_t* src_y,
const uint8_t* src_u,
@ -114,22 +162,20 @@ void I444ToARGBRow_NEON(const uint8_t* src_y,
int width) {
asm volatile(
YUVTORGB_SETUP
"vmov.u8 d23, #255 \n"
"vmov.u8 d6, #255 \n"
"1: \n" READYUV444 YUVTORGB
"subs %4, %4, #8 \n"
"vst4.8 {d20, d21, d22, d23}, [%3]! \n"
RGBTORGB8
"subs %[width], %[width], #8 \n"
"vst4.8 {d0, d2, d4, d6}, [%[dst_argb]]! \n"
"bgt 1b \n"
: "+r"(src_y), // %0
"+r"(src_u), // %1
"+r"(src_v), // %2
"+r"(dst_argb), // %3
"+r"(width) // %4
: [kUVToRB] "r"(&yuvconstants->kUVToRB),
[kUVToG] "r"(&yuvconstants->kUVToG),
[kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR),
[kYToRgb] "r"(&yuvconstants->kYToRgb)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11",
"q12", "q13", "q14", "q15");
: [src_y] "+r"(src_y), // %[src_y]
[src_u] "+r"(src_u), // %[src_u]
[src_v] "+r"(src_v), // %[src_v]
[dst_argb] "+r"(dst_argb), // %[dst_argb]
[width] "+r"(width) // %[width]
: [kUVCoeff] "r"(&yuvconstants->kUVCoeff), // %[kUVCoeff]
[kRGBCoeffBias] "r"(&yuvconstants->kRGBCoeffBias) // %[kRGBCoeffBias]
: "cc", "memory", YUVTORGB_REGS, "d6");
}
void I422ToARGBRow_NEON(const uint8_t* src_y,
@ -140,22 +186,20 @@ void I422ToARGBRow_NEON(const uint8_t* src_y,
int width) {
asm volatile(
YUVTORGB_SETUP
"vmov.u8 d23, #255 \n"
"vmov.u8 d6, #255 \n"
"1: \n" READYUV422 YUVTORGB
"subs %4, %4, #8 \n"
"vst4.8 {d20, d21, d22, d23}, [%3]! \n"
RGBTORGB8
"subs %[width], %[width], #8 \n"
"vst4.8 {d0, d2, d4, d6}, [%[dst_argb]]! \n"
"bgt 1b \n"
: "+r"(src_y), // %0
"+r"(src_u), // %1
"+r"(src_v), // %2
"+r"(dst_argb), // %3
"+r"(width) // %4
: [kUVToRB] "r"(&yuvconstants->kUVToRB),
[kUVToG] "r"(&yuvconstants->kUVToG),
[kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR),
[kYToRgb] "r"(&yuvconstants->kYToRgb)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11",
"q12", "q13", "q14", "q15");
: [src_y] "+r"(src_y), // %[src_y]
[src_u] "+r"(src_u), // %[src_u]
[src_v] "+r"(src_v), // %[src_v]
[dst_argb] "+r"(dst_argb), // %[dst_argb]
[width] "+r"(width) // %[width]
: [kUVCoeff] "r"(&yuvconstants->kUVCoeff), // %[kUVCoeff]
[kRGBCoeffBias] "r"(&yuvconstants->kRGBCoeffBias) // %[kRGBCoeffBias]
: "cc", "memory", YUVTORGB_REGS, "d6");
}
void I444AlphaToARGBRow_NEON(const uint8_t* src_y,
@ -168,22 +212,20 @@ void I444AlphaToARGBRow_NEON(const uint8_t* src_y,
asm volatile(
YUVTORGB_SETUP
"1: \n" READYUV444 YUVTORGB
"vld1.8 {d23}, [%3]! \n"
"subs %5, %5, #8 \n"
"vst4.8 {d20, d21, d22, d23}, [%4]! \n"
RGBTORGB8
"vld1.8 {d6}, [%[src_a]]! \n"
"subs %[width], %[width], #8 \n"
"vst4.8 {d0, d2, d4, d6}, [%[dst_argb]]! \n"
"bgt 1b \n"
: "+r"(src_y), // %0
"+r"(src_u), // %1
"+r"(src_v), // %2
"+r"(src_a), // %3
"+r"(dst_argb), // %4
"+r"(width) // %5
: [kUVToRB] "r"(&yuvconstants->kUVToRB),
[kUVToG] "r"(&yuvconstants->kUVToG),
[kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR),
[kYToRgb] "r"(&yuvconstants->kYToRgb)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11",
"q12", "q13", "q14", "q15");
: [src_y] "+r"(src_y), // %[src_y]
[src_u] "+r"(src_u), // %[src_u]
[src_v] "+r"(src_v), // %[src_v]
[src_a] "+r"(src_a), // %[src_a]
[dst_argb] "+r"(dst_argb), // %[dst_argb]
[width] "+r"(width) // %[width]
: [kUVCoeff] "r"(&yuvconstants->kUVCoeff), // %[kUVCoeff]
[kRGBCoeffBias] "r"(&yuvconstants->kRGBCoeffBias) // %[kRGBCoeffBias]
: "cc", "memory", YUVTORGB_REGS, "d6");
}
void I422AlphaToARGBRow_NEON(const uint8_t* src_y,
@ -196,22 +238,20 @@ void I422AlphaToARGBRow_NEON(const uint8_t* src_y,
asm volatile(
YUVTORGB_SETUP
"1: \n" READYUV422 YUVTORGB
"subs %5, %5, #8 \n"
"vld1.8 {d23}, [%3]! \n"
"vst4.8 {d20, d21, d22, d23}, [%4]! \n"
RGBTORGB8
"vld1.8 {d6}, [%[src_a]]! \n"
"subs %[width], %[width], #8 \n"
"vst4.8 {d0, d2, d4, d6}, [%[dst_argb]]! \n"
"bgt 1b \n"
: "+r"(src_y), // %0
"+r"(src_u), // %1
"+r"(src_v), // %2
"+r"(src_a), // %3
"+r"(dst_argb), // %4
"+r"(width) // %5
: [kUVToRB] "r"(&yuvconstants->kUVToRB),
[kUVToG] "r"(&yuvconstants->kUVToG),
[kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR),
[kYToRgb] "r"(&yuvconstants->kYToRgb)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11",
"q12", "q13", "q14", "q15");
: [src_y] "+r"(src_y), // %[src_y]
[src_u] "+r"(src_u), // %[src_u]
[src_v] "+r"(src_v), // %[src_v]
[src_a] "+r"(src_a), // %[src_a]
[dst_argb] "+r"(dst_argb), // %[dst_argb]
[width] "+r"(width) // %[width]
: [kUVCoeff] "r"(&yuvconstants->kUVCoeff), // %[kUVCoeff]
[kRGBCoeffBias] "r"(&yuvconstants->kRGBCoeffBias) // %[kRGBCoeffBias]
: "cc", "memory", YUVTORGB_REGS, "d6");
}
void I422ToRGBARow_NEON(const uint8_t* src_y,
@ -222,22 +262,18 @@ void I422ToRGBARow_NEON(const uint8_t* src_y,
int width) {
asm volatile(
YUVTORGB_SETUP
"vmov.u8 d6, #255 \n"
"1: \n" READYUV422 YUVTORGB
"subs %4, %4, #8 \n"
"vmov.u8 d19, #255 \n" // YUVTORGB modified d19
"vst4.8 {d19, d20, d21, d22}, [%3]! \n"
RGBTORGB8 "subs %[width], %[width], #8 \n" STORERGBA
"bgt 1b \n"
: "+r"(src_y), // %0
"+r"(src_u), // %1
"+r"(src_v), // %2
"+r"(dst_rgba), // %3
"+r"(width) // %4
: [kUVToRB] "r"(&yuvconstants->kUVToRB),
[kUVToG] "r"(&yuvconstants->kUVToG),
[kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR),
[kYToRgb] "r"(&yuvconstants->kYToRgb)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11",
"q12", "q13", "q14", "q15");
: [src_y] "+r"(src_y), // %[src_y]
[src_u] "+r"(src_u), // %[src_u]
[src_v] "+r"(src_v), // %[src_v]
[dst_rgba] "+r"(dst_rgba), // %[dst_rgba]
[width] "+r"(width) // %[width]
: [kUVCoeff] "r"(&yuvconstants->kUVCoeff), // %[kUVCoeff]
[kRGBCoeffBias] "r"(&yuvconstants->kRGBCoeffBias) // %[kRGBCoeffBias]
: "cc", "memory", YUVTORGB_REGS, "d6");
}
void I422ToRGB24Row_NEON(const uint8_t* src_y,
@ -248,29 +284,28 @@ void I422ToRGB24Row_NEON(const uint8_t* src_y,
int width) {
asm volatile(
YUVTORGB_SETUP
"vmov.u8 d6, #255 \n"
"1: \n" READYUV422 YUVTORGB
"subs %4, %4, #8 \n"
"vst3.8 {d20, d21, d22}, [%3]! \n"
RGBTORGB8
"subs %[width], %[width], #8 \n"
"vst3.8 {d0, d2, d4}, [%[dst_rgb24]]! \n"
"bgt 1b \n"
: "+r"(src_y), // %0
"+r"(src_u), // %1
"+r"(src_v), // %2
"+r"(dst_rgb24), // %3
"+r"(width) // %4
: [kUVToRB] "r"(&yuvconstants->kUVToRB),
[kUVToG] "r"(&yuvconstants->kUVToG),
[kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR),
[kYToRgb] "r"(&yuvconstants->kYToRgb)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11",
"q12", "q13", "q14", "q15");
: [src_y] "+r"(src_y), // %[src_y]
[src_u] "+r"(src_u), // %[src_u]
[src_v] "+r"(src_v), // %[src_v]
[dst_rgb24] "+r"(dst_rgb24), // %[dst_rgb24]
[width] "+r"(width) // %[width]
: [kUVCoeff] "r"(&yuvconstants->kUVCoeff), // %[kUVCoeff]
[kRGBCoeffBias] "r"(&yuvconstants->kRGBCoeffBias) // %[kRGBCoeffBias]
: "cc", "memory", YUVTORGB_REGS);
}
#define ARGBTORGB565 \
"vshll.u8 q0, d22, #8 \n" /* R */ \
"vshll.u8 q8, d21, #8 \n" /* G */ \
"vshll.u8 q9, d20, #8 \n" /* B */ \
"vsri.16 q0, q8, #5 \n" /* RG */ \
"vsri.16 q0, q9, #11 \n" /* RGB */
"vshll.u8 q2, d4, #8 \n" /* R */ \
"vshll.u8 q1, d2, #8 \n" /* G */ \
"vshll.u8 q0, d0, #8 \n" /* B */ \
"vsri.16 q2, q1, #5 \n" /* RG */ \
"vsri.16 q2, q0, #11 \n" /* RGB */
void I422ToRGB565Row_NEON(const uint8_t* src_y,
const uint8_t* src_u,
@ -280,31 +315,29 @@ void I422ToRGB565Row_NEON(const uint8_t* src_y,
int width) {
asm volatile(
YUVTORGB_SETUP
"vmov.u8 d6, #255 \n"
"1: \n" READYUV422 YUVTORGB
"subs %4, %4, #8 \n" ARGBTORGB565
"vst1.8 {q0}, [%3]! \n" // store 8 pixels RGB565.
RGBTORGB8 "subs %[width], %[width], #8 \n" ARGBTORGB565
"vst1.8 {q2}, [%[dst_rgb565]]! \n" // store 8 pixels RGB565.
"bgt 1b \n"
: "+r"(src_y), // %0
"+r"(src_u), // %1
"+r"(src_v), // %2
"+r"(dst_rgb565), // %3
"+r"(width) // %4
: [kUVToRB] "r"(&yuvconstants->kUVToRB),
[kUVToG] "r"(&yuvconstants->kUVToG),
[kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR),
[kYToRgb] "r"(&yuvconstants->kYToRgb)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11",
"q12", "q13", "q14", "q15");
: [src_y] "+r"(src_y), // %[src_y]
[src_u] "+r"(src_u), // %[src_u]
[src_v] "+r"(src_v), // %[src_v]
[dst_rgb565] "+r"(dst_rgb565), // %[dst_rgb565]
[width] "+r"(width) // %[width]
: [kUVCoeff] "r"(&yuvconstants->kUVCoeff), // %[kUVCoeff]
[kRGBCoeffBias] "r"(&yuvconstants->kRGBCoeffBias) // %[kRGBCoeffBias]
: "cc", "memory", YUVTORGB_REGS);
}
#define ARGBTOARGB1555 \
"vshll.u8 q0, d23, #8 \n" /* A */ \
"vshll.u8 q8, d22, #8 \n" /* R */ \
"vshll.u8 q9, d21, #8 \n" /* G */ \
"vshll.u8 q10, d20, #8 \n" /* B */ \
"vsri.16 q0, q8, #1 \n" /* AR */ \
"vsri.16 q0, q9, #6 \n" /* ARG */ \
"vsri.16 q0, q10, #11 \n" /* ARGB */
"vshll.u8 q3, d6, #8 \n" /* A */ \
"vshll.u8 q2, d4, #8 \n" /* R */ \
"vshll.u8 q1, d2, #8 \n" /* G */ \
"vshll.u8 q0, d0, #8 \n" /* B */ \
"vsri.16 q3, q2, #1 \n" /* AR */ \
"vsri.16 q3, q1, #6 \n" /* ARG */ \
"vsri.16 q3, q0, #11 \n" /* ARGB */
void I422ToARGB1555Row_NEON(const uint8_t* src_y,
const uint8_t* src_u,
@ -315,30 +348,28 @@ void I422ToARGB1555Row_NEON(const uint8_t* src_y,
asm volatile(
YUVTORGB_SETUP
"1: \n" READYUV422 YUVTORGB
"subs %4, %4, #8 \n"
"vmov.u8 d23, #255 \n" ARGBTOARGB1555
"vst1.8 {q0}, [%3]! \n" // store 8 pixels
RGBTORGB8
"subs %[width], %[width], #8 \n"
"vmov.u8 d6, #0xff \n" ARGBTOARGB1555
"vst1.8 {q3}, [%[dst_argb1555]]! \n" // store 8 pixels RGB1555.
"bgt 1b \n"
: "+r"(src_y), // %0
"+r"(src_u), // %1
"+r"(src_v), // %2
"+r"(dst_argb1555), // %3
"+r"(width) // %4
: [kUVToRB] "r"(&yuvconstants->kUVToRB),
[kUVToG] "r"(&yuvconstants->kUVToG),
[kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR),
[kYToRgb] "r"(&yuvconstants->kYToRgb)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11",
"q12", "q13", "q14", "q15");
: [src_y] "+r"(src_y), // %[src_y]
[src_u] "+r"(src_u), // %[src_u]
[src_v] "+r"(src_v), // %[src_v]
[dst_argb1555] "+r"(dst_argb1555), // %[dst_argb1555]
[width] "+r"(width) // %[width]
: [kUVCoeff] "r"(&yuvconstants->kUVCoeff), // %[kUVCoeff]
[kRGBCoeffBias] "r"(&yuvconstants->kRGBCoeffBias) // %[kRGBCoeffBias]
: "cc", "memory", YUVTORGB_REGS, "q3");
}
#define ARGBTOARGB4444 \
"vshr.u8 d20, d20, #4 \n" /* B */ \
"vbic.32 d21, d21, d4 \n" /* G */ \
"vshr.u8 d22, d22, #4 \n" /* R */ \
"vbic.32 d23, d23, d4 \n" /* A */ \
"vorr d0, d20, d21 \n" /* BG */ \
"vorr d1, d22, d23 \n" /* RA */ \
"vshr.u8 d0, d0, #4 \n" /* B */ \
"vbic.32 d2, d2, d7 \n" /* G */ \
"vshr.u8 d4, d4, #4 \n" /* R */ \
"vbic.32 d6, d6, d7 \n" /* A */ \
"vorr d0, d0, d2 \n" /* BG */ \
"vorr d1, d4, d6 \n" /* RA */ \
"vzip.u8 d0, d1 \n" /* BGRA */
void I422ToARGB4444Row_NEON(const uint8_t* src_y,
@ -349,25 +380,21 @@ void I422ToARGB4444Row_NEON(const uint8_t* src_y,
int width) {
asm volatile(
YUVTORGB_SETUP
"vmov.u8 d4, #0x0f \n" // vbic bits to clear
"1: \n"
READYUV422 YUVTORGB
"subs %4, %4, #8 \n"
"vmov.u8 d23, #255 \n" ARGBTOARGB4444
"vst1.8 {q0}, [%3]! \n" // store 8 pixels
"vmov.u8 d6, #255 \n"
"vmov.u8 d7, #0x0f \n" // vbic bits to clear
"1: \n" READYUV422 YUVTORGB
RGBTORGB8
"subs %[width], %[width], #8 \n" ARGBTOARGB4444
"vst1.8 {q0}, [%[dst_argb4444]]! \n" // store 8 pixels
"bgt 1b \n"
: "+r"(src_y), // %0
"+r"(src_u), // %1
"+r"(src_v), // %2
"+r"(dst_argb4444), // %3
"+r"(width) // %4
: [kUVToRB] "r"(&yuvconstants->kUVToRB),
[kUVToG] "r"(&yuvconstants->kUVToG),
[kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR),
[kYToRgb] "r"(&yuvconstants->kYToRgb)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11",
"q12", "q13", "q14", "q15");
: [src_y] "+r"(src_y), // %[src_y]
[src_u] "+r"(src_u), // %[src_u]
[src_v] "+r"(src_v), // %[src_v]
[dst_argb4444] "+r"(dst_argb4444), // %[dst_argb4444]
[width] "+r"(width) // %[width]
: [kUVCoeff] "r"(&yuvconstants->kUVCoeff), // %[kUVCoeff]
[kRGBCoeffBias] "r"(&yuvconstants->kRGBCoeffBias) // %[kRGBCoeffBias]
: "cc", "memory", YUVTORGB_REGS, "q3");
}
void I400ToARGBRow_NEON(const uint8_t* src_y,
@ -376,20 +403,18 @@ void I400ToARGBRow_NEON(const uint8_t* src_y,
int width) {
asm volatile(
YUVTORGB_SETUP
"vmov.u8 d23, #255 \n"
"vmov.u8 d6, #255 \n"
"1: \n" READYUV400 YUVTORGB
"subs %2, %2, #8 \n"
"vst4.8 {d20, d21, d22, d23}, [%1]! \n"
RGBTORGB8
"subs %[width], %[width], #8 \n"
"vst4.8 {d0, d2, d4, d6}, [%[dst_argb]]! \n"
"bgt 1b \n"
: "+r"(src_y), // %0
"+r"(dst_argb), // %1
"+r"(width) // %2
: [kUVToRB] "r"(&yuvconstants->kUVToRB),
[kUVToG] "r"(&yuvconstants->kUVToG),
[kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR),
[kYToRgb] "r"(&yuvconstants->kYToRgb)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11",
"q12", "q13", "q14", "q15");
: [src_y] "+r"(src_y), // %[src_y]
[dst_argb] "+r"(dst_argb), // %[dst_argb]
[width] "+r"(width) // %[width]
: [kUVCoeff] "r"(&yuvconstants->kUVCoeff), // %[kUVCoeff]
[kRGBCoeffBias] "r"(&yuvconstants->kRGBCoeffBias) // %[kRGBCoeffBias]
: "cc", "memory", YUVTORGB_REGS, "d6");
}
void J400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width) {
@ -414,22 +439,20 @@ void NV12ToARGBRow_NEON(const uint8_t* src_y,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile(YUVTORGB_SETUP
"vmov.u8 d23, #255 \n"
"1: \n" READNV12 YUVTORGB
"subs %3, %3, #8 \n"
"vst4.8 {d20, d21, d22, d23}, [%2]! \n"
asm volatile(
YUVTORGB_SETUP
"vmov.u8 d6, #255 \n"
"1: \n" READNV12 YUVTORGB RGBTORGB8
"subs %[width], %[width], #8 \n"
"vst4.8 {d0, d2, d4, d6}, [%[dst_argb]]! \n"
"bgt 1b \n"
: "+r"(src_y), // %0
"+r"(src_uv), // %1
"+r"(dst_argb), // %2
"+r"(width) // %3
: [kUVToRB] "r"(&yuvconstants->kUVToRB),
[kUVToG] "r"(&yuvconstants->kUVToG),
[kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR),
[kYToRgb] "r"(&yuvconstants->kYToRgb)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9",
"q10", "q11", "q12", "q13", "q14", "q15");
: [src_y] "+r"(src_y), // %[src_y]
[src_uv] "+r"(src_uv), // %[src_uv]
[dst_argb] "+r"(dst_argb), // %[dst_argb]
[width] "+r"(width) // %[width]
: [kUVCoeff] "r"(&yuvconstants->kUVCoeff), // %[kUVCoeff]
[kRGBCoeffBias] "r"(&yuvconstants->kRGBCoeffBias) // %[kRGBCoeffBias]
: "cc", "memory", YUVTORGB_REGS, "d6");
}
void NV21ToARGBRow_NEON(const uint8_t* src_y,
@ -437,22 +460,20 @@ void NV21ToARGBRow_NEON(const uint8_t* src_y,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile(YUVTORGB_SETUP
"vmov.u8 d23, #255 \n"
"1: \n" READNV21 YUVTORGB
"subs %3, %3, #8 \n"
"vst4.8 {d20, d21, d22, d23}, [%2]! \n"
asm volatile(
YUVTORGB_SETUP
"vmov.u8 d6, #255 \n"
"1: \n" READNV21 YUVTORGB RGBTORGB8
"subs %[width], %[width], #8 \n"
"vst4.8 {d0, d2, d4, d6}, [%[dst_argb]]! \n"
"bgt 1b \n"
: "+r"(src_y), // %0
"+r"(src_vu), // %1
"+r"(dst_argb), // %2
"+r"(width) // %3
: [kUVToRB] "r"(&yuvconstants->kUVToRB),
[kUVToG] "r"(&yuvconstants->kUVToG),
[kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR),
[kYToRgb] "r"(&yuvconstants->kYToRgb)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9",
"q10", "q11", "q12", "q13", "q14", "q15");
: [src_y] "+r"(src_y), // %[src_y]
[src_vu] "+r"(src_vu), // %[src_vu]
[dst_argb] "+r"(dst_argb), // %[dst_argb]
[width] "+r"(width) // %[width]
: [kUVCoeff] "r"(&yuvconstants->kUVCoeff), // %[kUVCoeff]
[kRGBCoeffBias] "r"(&yuvconstants->kRGBCoeffBias) // %[kRGBCoeffBias]
: "cc", "memory", YUVTORGB_REGS, "d6");
}
void NV12ToRGB24Row_NEON(const uint8_t* src_y,
@ -461,25 +482,19 @@ void NV12ToRGB24Row_NEON(const uint8_t* src_y,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile(
YUVTORGB_SETUP
"1: \n"
READNV12 YUVTORGB
"subs %3, %3, #8 \n"
"vst3.8 {d20, d21, d22}, [%2]! \n"
"vmov.u8 d6, #255 \n"
"1: \n" READNV12 YUVTORGB RGBTORGB8
"subs %[width], %[width], #8 \n"
"vst3.8 {d0, d2, d4}, [%[dst_rgb24]]! \n"
"bgt 1b \n"
: "+r"(src_y), // %0
"+r"(src_uv), // %1
"+r"(dst_rgb24), // %2
"+r"(width) // %3
: [kUVToRB] "r"(&yuvconstants->kUVToRB),
[kUVToG] "r"(&yuvconstants->kUVToG),
[kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR),
[kYToRgb] "r"(&yuvconstants->kYToRgb)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11",
"q12", "q13", "q14", "q15");
: [src_y] "+r"(src_y), // %[src_y]
[src_uv] "+r"(src_uv), // %[src_uv]
[dst_rgb24] "+r"(dst_rgb24), // %[dst_rgb24]
[width] "+r"(width) // %[width]
: [kUVCoeff] "r"(&yuvconstants->kUVCoeff), // %[kUVCoeff]
[kRGBCoeffBias] "r"(&yuvconstants->kRGBCoeffBias) // %[kRGBCoeffBias]
: "cc", "memory", YUVTORGB_REGS);
}
void NV21ToRGB24Row_NEON(const uint8_t* src_y,
@ -488,25 +503,19 @@ void NV21ToRGB24Row_NEON(const uint8_t* src_y,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile(
YUVTORGB_SETUP
"1: \n"
READNV21 YUVTORGB
"subs %3, %3, #8 \n"
"vst3.8 {d20, d21, d22}, [%2]! \n"
"vmov.u8 d6, #255 \n"
"1: \n" READNV21 YUVTORGB RGBTORGB8
"subs %[width], %[width], #8 \n"
"vst3.8 {d0, d2, d4}, [%[dst_rgb24]]! \n"
"bgt 1b \n"
: "+r"(src_y), // %0
"+r"(src_vu), // %1
"+r"(dst_rgb24), // %2
"+r"(width) // %3
: [kUVToRB] "r"(&yuvconstants->kUVToRB),
[kUVToG] "r"(&yuvconstants->kUVToG),
[kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR),
[kYToRgb] "r"(&yuvconstants->kYToRgb)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11",
"q12", "q13", "q14", "q15");
: [src_y] "+r"(src_y), // %[src_y]
[src_vu] "+r"(src_vu), // %[src_vu]
[dst_rgb24] "+r"(dst_rgb24), // %[dst_rgb24]
[width] "+r"(width) // %[width]
: [kUVCoeff] "r"(&yuvconstants->kUVCoeff), // %[kUVCoeff]
[kRGBCoeffBias] "r"(&yuvconstants->kRGBCoeffBias) // %[kRGBCoeffBias]
: "cc", "memory", YUVTORGB_REGS);
}
void NV12ToRGB565Row_NEON(const uint8_t* src_y,
@ -516,62 +525,56 @@ void NV12ToRGB565Row_NEON(const uint8_t* src_y,
int width) {
asm volatile(
YUVTORGB_SETUP
"1: \n" READNV12 YUVTORGB
"subs %3, %3, #8 \n" ARGBTORGB565
"vst1.8 {q0}, [%2]! \n" // store 8 pixels RGB565.
"vmov.u8 d6, #255 \n"
"1: \n" READNV12 YUVTORGB RGBTORGB8
"subs %[width], %[width], #8 \n" ARGBTORGB565
"vst1.8 {q2}, [%[dst_rgb565]]! \n" // store 8 pixels RGB565.
"bgt 1b \n"
: "+r"(src_y), // %0
"+r"(src_uv), // %1
"+r"(dst_rgb565), // %2
"+r"(width) // %3
: [kUVToRB] "r"(&yuvconstants->kUVToRB),
[kUVToG] "r"(&yuvconstants->kUVToG),
[kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR),
[kYToRgb] "r"(&yuvconstants->kYToRgb)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11",
"q12", "q13", "q14", "q15");
: [src_y] "+r"(src_y), // %[src_y]
[src_uv] "+r"(src_uv), // %[src_uv]
[dst_rgb565] "+r"(dst_rgb565), // %[dst_rgb565]
[width] "+r"(width) // %[width]
: [kUVCoeff] "r"(&yuvconstants->kUVCoeff), // %[kUVCoeff]
[kRGBCoeffBias] "r"(&yuvconstants->kRGBCoeffBias) // %[kRGBCoeffBias]
: "cc", "memory", YUVTORGB_REGS);
}
void YUY2ToARGBRow_NEON(const uint8_t* src_yuy2,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile(YUVTORGB_SETUP
"vmov.u8 d23, #255 \n"
"1: \n" READYUY2 YUVTORGB
"subs %2, %2, #8 \n"
"vst4.8 {d20, d21, d22, d23}, [%1]! \n"
asm volatile(
YUVTORGB_SETUP
"vmov.u8 d6, #255 \n"
"1: \n" READYUY2 YUVTORGB RGBTORGB8
"subs %[width], %[width], #8 \n"
"vst4.8 {d0, d2, d4, d6}, [%[dst_argb]]! \n"
"bgt 1b \n"
: "+r"(src_yuy2), // %0
"+r"(dst_argb), // %1
"+r"(width) // %2
: [kUVToRB] "r"(&yuvconstants->kUVToRB),
[kUVToG] "r"(&yuvconstants->kUVToG),
[kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR),
[kYToRgb] "r"(&yuvconstants->kYToRgb)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9",
"q10", "q11", "q12", "q13", "q14", "q15");
: [src_yuy2] "+r"(src_yuy2), // %[src_yuy2]
[dst_argb] "+r"(dst_argb), // %[dst_argb]
[width] "+r"(width) // %[width]
: [kUVCoeff] "r"(&yuvconstants->kUVCoeff), // %[kUVCoeff]
[kRGBCoeffBias] "r"(&yuvconstants->kRGBCoeffBias) // %[kRGBCoeffBias]
: "cc", "memory", YUVTORGB_REGS, "d6");
}
void UYVYToARGBRow_NEON(const uint8_t* src_uyvy,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile(YUVTORGB_SETUP
"vmov.u8 d23, #255 \n"
"1: \n" READUYVY YUVTORGB
"subs %2, %2, #8 \n"
"vst4.8 {d20, d21, d22, d23}, [%1]! \n"
asm volatile(
YUVTORGB_SETUP
"vmov.u8 d6, #255 \n"
"1: \n" READUYVY YUVTORGB RGBTORGB8
"subs %[width], %[width], #8 \n"
"vst4.8 {d0, d2, d4, d6}, [%[dst_argb]]! \n"
"bgt 1b \n"
: "+r"(src_uyvy), // %0
"+r"(dst_argb), // %1
"+r"(width) // %2
: [kUVToRB] "r"(&yuvconstants->kUVToRB),
[kUVToG] "r"(&yuvconstants->kUVToG),
[kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR),
[kYToRgb] "r"(&yuvconstants->kYToRgb)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9",
"q10", "q11", "q12", "q13", "q14", "q15");
: [src_uyvy] "+r"(src_uyvy), // %[src_uyvy]
[dst_argb] "+r"(dst_argb), // %[dst_argb]
[width] "+r"(width) // %[width]
: [kUVCoeff] "r"(&yuvconstants->kUVCoeff), // %[kUVCoeff]
[kRGBCoeffBias] "r"(&yuvconstants->kRGBCoeffBias) // %[kRGBCoeffBias]
: "cc", "memory", YUVTORGB_REGS, "d6");
}
// Reads 16 pairs of UV and write even values to dst_u and odd to dst_v.
@ -1548,16 +1551,16 @@ void ARGBToRGB565Row_NEON(const uint8_t* src_argb,
int width) {
asm volatile(
"1: \n"
"vld4.8 {d20, d21, d22, d23}, [%0]! \n" // load 8 pixels of ARGB.
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 pixels of ARGB.
"subs %2, %2, #8 \n" // 8 processed per loop.
ARGBTORGB565
"vst1.8 {q0}, [%1]! \n" // store 8 pixels RGB565.
"vst1.8 {q2}, [%1]! \n" // store 8 pixels RGB565.
"bgt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_rgb565), // %1
"+r"(width) // %2
:
: "cc", "memory", "q0", "q8", "q9", "q10", "q11");
: "cc", "memory", "q0", "q1", "q2", "d6");
}
void ARGBToRGB565DitherRow_NEON(const uint8_t* src_argb,
@ -1565,21 +1568,21 @@ void ARGBToRGB565DitherRow_NEON(const uint8_t* src_argb,
const uint32_t dither4,
int width) {
asm volatile(
"vdup.32 d2, %2 \n" // dither4
"vdup.32 d7, %2 \n" // dither4
"1: \n"
"vld4.8 {d20, d21, d22, d23}, [%1]! \n" // load 8 pixels of ARGB.
"vld4.8 {d0, d2, d4, d6}, [%1]! \n" // load 8 pixels of ARGB.
"subs %3, %3, #8 \n" // 8 processed per loop.
"vqadd.u8 d20, d20, d2 \n"
"vqadd.u8 d21, d21, d2 \n"
"vqadd.u8 d22, d22, d2 \n" // add for dither
"vqadd.u8 d0, d0, d7 \n"
"vqadd.u8 d2, d2, d7 \n"
"vqadd.u8 d4, d4, d7 \n" // add for dither
ARGBTORGB565
"vst1.8 {q0}, [%0]! \n" // store 8 RGB565.
"vst1.8 {q2}, [%0]! \n" // store 8 RGB565.
"bgt 1b \n"
: "+r"(dst_rgb) // %0
: "r"(src_argb), // %1
"r"(dither4), // %2
"r"(width) // %3
: "cc", "memory", "q0", "q1", "q8", "q9", "q10", "q11");
: "cc", "memory", "q0", "q1", "q2", "q3");
}
void ARGBToARGB1555Row_NEON(const uint8_t* src_argb,
@ -1587,26 +1590,26 @@ void ARGBToARGB1555Row_NEON(const uint8_t* src_argb,
int width) {
asm volatile(
"1: \n"
"vld4.8 {d20, d21, d22, d23}, [%0]! \n" // load 8 pixels of ARGB.
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 pixels of ARGB.
"subs %2, %2, #8 \n" // 8 processed per loop.
ARGBTOARGB1555
"vst1.8 {q0}, [%1]! \n" // store 8 ARGB1555.
"vst1.8 {q3}, [%1]! \n" // store 8 ARGB1555.
"bgt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_argb1555), // %1
"+r"(width) // %2
:
: "cc", "memory", "q0", "q8", "q9", "q10", "q11");
: "cc", "memory", "q0", "q1", "q2", "q3");
}
void ARGBToARGB4444Row_NEON(const uint8_t* src_argb,
uint8_t* dst_argb4444,
int width) {
asm volatile(
"vmov.u8 d4, #0x0f \n" // bits to clear with
"vmov.u8 d7, #0x0f \n" // bits to clear with
// vbic.
"1: \n"
"vld4.8 {d20, d21, d22, d23}, [%0]! \n" // load 8 pixels of ARGB.
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 pixels of ARGB.
"subs %2, %2, #8 \n" // 8 processed per loop.
ARGBTOARGB4444
"vst1.8 {q0}, [%1]! \n" // store 8 ARGB4444.
@ -1615,7 +1618,7 @@ void ARGBToARGB4444Row_NEON(const uint8_t* src_argb,
"+r"(dst_argb4444), // %1
"+r"(width) // %2
:
: "cc", "memory", "q0", "q8", "q9", "q10", "q11");
: "cc", "memory", "q0", "q1", "q2", "q3");
}
void ARGBToYRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) {

File diff suppressed because it is too large Load Diff

View File

@ -32,7 +32,11 @@ namespace libyuv {
#endif
#define ERROR_R 1
#define ERROR_G 1
#define ERROR_B 3
#ifdef LIBYUV_UNLIMITED_DATA
#define ERROR_B 1
#else
#define ERROR_B 18
#endif
#define TESTCS(TESTNAME, YUVTOARGB, ARGBTOYUV, HS1, HS, HN, DIFF) \
TEST_F(LibYUVColorTest, TESTNAME) { \
@ -498,7 +502,11 @@ TEST_F(LibYUVColorTest, TestYUV) {
YUVToRGB(240, 0, 0, &r1, &g1, &b1);
EXPECT_EQ(57, r1);
EXPECT_EQ(255, g1);
#ifdef LIBYUV_UNLIMITED_DATA
EXPECT_EQ(3, b1);
#else
EXPECT_EQ(5, b1);
#endif
for (int i = 0; i < 256; ++i) {
YUVToRGBReference(i, 128, 128, &r0, &g0, &b0);
@ -655,9 +663,9 @@ TEST_F(LibYUVColorTest, TestFullYUVJ) {
int y = RANDOM256(y2);
YUVJToRGBReference(y, u, v, &r0, &g0, &b0);
YUVJToRGB(y, u, v, &r1, &g1, &b1);
EXPECT_NEAR(r0, r1, 1);
EXPECT_NEAR(g0, g1, 1);
EXPECT_NEAR(b0, b1, 1);
EXPECT_NEAR(r0, r1, ERROR_R);
EXPECT_NEAR(g0, g1, ERROR_G);
EXPECT_NEAR(b0, b1, ERROR_B);
++rh[r1 - r0 + 128];
++gh[g1 - g0 + 128];
++bh[b1 - b0 + 128];
@ -687,8 +695,7 @@ TEST_F(LibYUVColorTest, TestFullYUVH) {
YUVHToRGB(y, u, v, &r1, &g1, &b1);
EXPECT_NEAR(r0, r1, ERROR_R);
EXPECT_NEAR(g0, g1, ERROR_G);
// TODO(crbug.com/libyuv/862): Reduce the errors in the B channel.
EXPECT_NEAR(b0, b1, 15);
EXPECT_NEAR(b0, b1, ERROR_B);
++rh[r1 - r0 + 128];
++gh[g1 - g0 + 128];
++bh[b1 - b0 + 128];
@ -716,9 +723,9 @@ TEST_F(LibYUVColorTest, TestFullYUVF) {
int y = RANDOM256(y2);
YUVFToRGBReference(y, u, v, &r0, &g0, &b0);
YUVFToRGB(y, u, v, &r1, &g1, &b1);
EXPECT_NEAR(r0, r1, 5);
EXPECT_NEAR(g0, g1, 5);
EXPECT_NEAR(b0, b1, 5);
EXPECT_NEAR(r0, r1, ERROR_R);
EXPECT_NEAR(g0, g1, ERROR_G);
EXPECT_NEAR(b0, b1, ERROR_B);
++rh[r1 - r0 + 128];
++gh[g1 - g0 + 128];
++bh[b1 - b0 + 128];
@ -748,8 +755,7 @@ TEST_F(LibYUVColorTest, TestFullYUVU) {
YUVUToRGB(y, u, v, &r1, &g1, &b1);
EXPECT_NEAR(r0, r1, ERROR_R);
EXPECT_NEAR(g0, g1, ERROR_G);
// TODO(crbug.com/libyuv/863): Reduce the errors in the B channel.
EXPECT_NEAR(b0, b1, 18);
EXPECT_NEAR(b0, b1, ERROR_B);
++rh[r1 - r0 + 128];
++gh[g1 - g0 + 128];
++bh[b1 - b0 + 128];