diff --git a/include/libyuv/row.h b/include/libyuv/row.h index b547e8194..ded1995fd 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -557,6 +557,8 @@ extern "C" { #define HAS_ABGRTOAR30ROW_NEON #define HAS_I210ALPHATOARGBROW_NEON #define HAS_I410ALPHATOARGBROW_NEON +#define HAS_I210TOARGBROW_NEON +#define HAS_I410TOARGBROW_NEON #define HAS_ABGRTOYJROW_NEON_DOTPROD #define HAS_ABGRTOYROW_NEON_DOTPROD @@ -1075,6 +1077,18 @@ void I444ToRGB24Row_NEON(const uint8_t* src_y, uint8_t* dst_rgb24, const struct YuvConstants* yuvconstants, int width); +void I210ToARGBRow_NEON(const uint16_t* src_y, + const uint16_t* src_u, + const uint16_t* src_v, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width); +void I410ToARGBRow_NEON(const uint16_t* src_y, + const uint16_t* src_u, + const uint16_t* src_v, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width); void I422ToARGBRow_NEON(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, @@ -5074,6 +5088,18 @@ void I422ToARGBRow_Any_NEON(const uint8_t* y_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); +void I210ToARGBRow_Any_NEON(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I410ToARGBRow_Any_NEON(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); void I444AlphaToARGBRow_Any_NEON(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, diff --git a/source/convert_argb.cc b/source/convert_argb.cc index c56d08c99..291bab63c 100644 --- a/source/convert_argb.cc +++ b/source/convert_argb.cc @@ -1389,6 +1389,14 @@ int I010ToARGBMatrix(const uint16_t* src_y, } } #endif +#if defined(HAS_I210TOARGBROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + I210ToARGBRow = I210ToARGBRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + I210ToARGBRow = I210ToARGBRow_NEON; + } + } +#endif #if defined(HAS_I210TOARGBROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { I210ToARGBRow = I210ToARGBRow_Any_AVX2; @@ -1609,6 +1617,14 @@ int I210ToARGBMatrix(const uint16_t* src_y, } } #endif +#if defined(HAS_I210TOARGBROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + I210ToARGBRow = I210ToARGBRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + I210ToARGBRow = I210ToARGBRow_NEON; + } + } +#endif #if defined(HAS_I210TOARGBROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { I210ToARGBRow = I210ToARGBRow_Any_AVX2; @@ -1770,6 +1786,14 @@ int I410ToARGBMatrix(const uint16_t* src_y, } } #endif +#if defined(HAS_I410TOARGBROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + I410ToARGBRow = I410ToARGBRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + I410ToARGBRow = I410ToARGBRow_NEON; + } + } +#endif #if defined(HAS_I410TOARGBROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { I410ToARGBRow = I410ToARGBRow_Any_AVX2; @@ -6874,6 +6898,14 @@ static int I010ToARGBMatrixBilinear(const uint16_t* src_y, } } #endif +#if defined(HAS_I410TOARGBROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + I410ToARGBRow = I410ToARGBRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + I410ToARGBRow = I410ToARGBRow_NEON; + } + } +#endif #if defined(HAS_I410TOARGBROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { I410ToARGBRow = I410ToARGBRow_Any_AVX2; @@ -6979,6 +7011,14 @@ static int I210ToARGBMatrixLinear(const uint16_t* src_y, } } #endif +#if defined(HAS_I410TOARGBROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + I410ToARGBRow = I410ToARGBRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + I410ToARGBRow = I410ToARGBRow_NEON; + } + } +#endif #if defined(HAS_I410TOARGBROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { I410ToARGBRow = I410ToARGBRow_Any_AVX2; diff --git a/source/row_any.cc b/source/row_any.cc index af48ebbd1..57414f3f7 100644 --- a/source/row_any.cc +++ b/source/row_any.cc @@ -516,6 +516,12 @@ ANY31CT(I212ToARGBRow_Any_AVX2, I212ToARGBRow_AVX2, 1, 0, uint16_t, 2, 4, 15) #ifdef HAS_I212TOAR30ROW_AVX2 ANY31CT(I212ToAR30Row_Any_AVX2, I212ToAR30Row_AVX2, 1, 0, uint16_t, 2, 4, 15) #endif +#ifdef HAS_I210TOARGBROW_NEON +ANY31CT(I210ToARGBRow_Any_NEON, I210ToARGBRow_NEON, 1, 0, uint16_t, 2, 4, 7) +#endif +#ifdef HAS_I410TOARGBROW_NEON +ANY31CT(I410ToARGBRow_Any_NEON, I410ToARGBRow_NEON, 0, 0, uint16_t, 2, 4, 7) +#endif #undef ANY31CT // Any 3 planes to 1 plane with parameter diff --git a/source/row_neon64.cc b/source/row_neon64.cc index ff2472e1a..bc5aeb07c 100644 --- a/source/row_neon64.cc +++ b/source/row_neon64.cc @@ -233,6 +233,56 @@ void I444ToRGB24Row_NEON(const uint8_t* src_y, : "cc", "memory", YUVTORGB_REGS); } +void I210ToARGBRow_NEON(const uint16_t* src_y, + const uint16_t* src_u, + const uint16_t* src_v, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width) { + const uvec8* uv_coeff = &yuvconstants->kUVCoeff; + const vec16* rgb_coeff = &yuvconstants->kRGBCoeffBias; + asm volatile( + YUVTORGB_SETUP + "movi v19.8b, #255 \n" + "1: \n" READYUV210 NVTORGB RGBTORGB8 + "subs %w[width], %w[width], #8 \n" + "st4 {v16.8b,v17.8b,v18.8b,v19.8b}, [%[dst_argb]], #32 \n" + "b.gt 1b \n" + : [src_y] "+r"(src_y), // %[src_y] + [src_u] "+r"(src_u), // %[src_u] + [src_v] "+r"(src_v), // %[src_v] + [dst_argb] "+r"(dst_argb), // %[dst_argb] + [width] "+r"(width) // %[width] + : [kUVCoeff] "r"(&yuvconstants->kUVCoeff), // %[kUVCoeff] + [kRGBCoeffBias] "r"(&yuvconstants->kRGBCoeffBias) // %[kRGBCoeffBias] + : "cc", "memory", YUVTORGB_REGS, "v19"); +} + +void I410ToARGBRow_NEON(const uint16_t* src_y, + const uint16_t* src_u, + const uint16_t* src_v, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width) { + const uvec8* uv_coeff = &yuvconstants->kUVCoeff; + const vec16* rgb_coeff = &yuvconstants->kRGBCoeffBias; + asm volatile( + YUVTORGB_SETUP + "movi v19.8b, #255 \n" + "1: \n" READYUV410 NVTORGB RGBTORGB8 + "subs %w[width], %w[width], #8 \n" + "st4 {v16.8b,v17.8b,v18.8b,v19.8b}, [%[dst_argb]], #32 \n" + "b.gt 1b \n" + : [src_y] "+r"(src_y), // %[src_y] + [src_u] "+r"(src_u), // %[src_u] + [src_v] "+r"(src_v), // %[src_v] + [dst_argb] "+r"(dst_argb), // %[dst_argb] + [width] "+r"(width) // %[width] + : [kUVCoeff] "r"(&yuvconstants->kUVCoeff), // %[kUVCoeff] + [kRGBCoeffBias] "r"(&yuvconstants->kRGBCoeffBias) // %[kRGBCoeffBias] + : "cc", "memory", YUVTORGB_REGS, "v19"); +} + void I422ToARGBRow_NEON(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v,