diff --git a/include/libyuv/row.h b/include/libyuv/row.h index 66c4a649d..502184e38 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -108,8 +108,6 @@ extern "C" { #define HAS_I422TOARGB1555ROW_SSSE3 #define HAS_I422TOARGB4444ROW_SSSE3 #define HAS_I422TOARGBROW_SSSE3 -#define HAS_I422TOARGBMATRIXROW_SSSE3 -#define HAS_I422TOABGRMATRIXROW_SSSE3 #define HAS_I422TOBGRAROW_SSSE3 #define HAS_I422TORAWROW_SSSE3 #define HAS_I422TORGB24ROW_SSSE3 @@ -151,8 +149,6 @@ extern "C" { #define HAS_YUY2TOYROW_SSE2 #define HAS_I444TOARGBROW_SSSE3 #define HAS_I444TOABGRROW_SSSE3 -#define HAS_I444TOARGBMATRIXROW_SSSE3 -#define HAS_I444TOABGRMATRIXROW_SSSE3 // Effects: #define HAS_ARGBADDROW_SSE2 @@ -191,8 +187,6 @@ extern "C" { (!defined(__clang__) || defined(__SSSE3__)) #define HAS_I422TOARGBROW_SSSE3 #define HAS_I422TOABGRROW_SSSE3 -#define HAS_I422TOARGBMATRIXROW_SSSE3 -#define HAS_I422TOABGRMATRIXROW_SSSE3 #endif // The following are available for AVX2 Visual C and clangcl 32 bit: @@ -218,8 +212,6 @@ extern "C" { #define HAS_NV21TOARGBROW_AVX2 #define HAS_NV21TORGB565ROW_AVX2 #define HAS_RGB565TOARGBROW_AVX2 -#define HAS_I444TOARGBMATRIXROW_AVX2 -#define HAS_I444TOABGRMATRIXROW_AVX2 #endif // The following are available on all x86 platforms, but @@ -239,8 +231,6 @@ extern "C" { #define HAS_I400TOARGBROW_AVX2 #define HAS_I422TOABGRROW_AVX2 #define HAS_I422TOARGBROW_AVX2 -#define HAS_I422TOARGBMATRIXROW_AVX2 -#define HAS_I422TOABGRMATRIXROW_AVX2 #define HAS_I422TOBGRAROW_AVX2 #define HAS_I422TORAWROW_AVX2 #define HAS_I422TORGB24ROW_AVX2 @@ -313,8 +303,6 @@ extern "C" { #define HAS_I422TOARGB4444ROW_NEON // TODO(fbarchard): Implement aarch64 neon version #ifndef __aarch64__ -#define HAS_I422TOARGBMATRIXROW_NEON -#define HAS_I422TOABGRMATRIXROW_NEON #define HAS_J422TOARGBROW_NEON #define HAS_J422TOABGRROW_NEON #define HAS_H422TOARGBROW_NEON @@ -444,8 +432,19 @@ typedef uint32 ulvec32[8]; typedef uint8 ulvec8[32]; #endif +#if defined(__arm__) || defined(__aarch64__) + +// This struct is for Arm color conversion. +struct YuvConstants { + uvec8 kUVToRB; + uvec8 kUVToG; + vec16 kUVBiasBGR; + vec32 kYToRgb; +}; + +#else + // This struct is for Intel color conversion. -// TODO(fnbarchard): Consider different struct for other platforms. struct YuvConstants { lvec8 kUVToB; lvec8 kUVToG; @@ -464,21 +463,12 @@ struct YuvConstants { #define KUVBIASG 128 #define KUVBIASR 160 #define KYTORGB 192 - -struct YuvConstantsNEON { - uvec8 kUVToRB; - uvec8 kUVToG; - vec16 kUVBiasBGR; - vec32 kYToRgb; -}; +#endif extern struct YuvConstants kYuvConstants; extern struct YuvConstants kYvuConstants; extern struct YuvConstants kYuvJConstants; extern struct YuvConstants kYuvHConstants; -extern struct YuvConstantsNEON kYuvConstantsNEON; -extern struct YuvConstantsNEON kYuvJConstantsNEON; -extern struct YuvConstantsNEON kYuvHConstantsNEON; #if defined(__APPLE__) || defined(__x86_64__) || defined(__llvm__) #define OMITFP @@ -569,110 +559,97 @@ void I444ToARGBRow_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void I422ToARGBRow_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + struct YuvConstants* yuvconstants, + int width); +void I422ToARGBRow_NEON(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, + struct YuvConstants* yuvconstants, + int width); +void I422ToABGRRow_NEON(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); -void I422ToARGBMatrixRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - struct YuvConstantsNEON* YuvConstants, - int width); -void I422ToABGRMatrixRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - struct YuvConstantsNEON* YuvConstants, - int width); void I411ToARGBRow_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void I422ToBGRARow_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_bgra, + struct YuvConstants* yuvconstants, int width); void I422ToABGRRow_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_abgr, + struct YuvConstants* yuvconstants, int width); void I422ToRGBARow_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_rgba, + struct YuvConstants* yuvconstants, int width); void I422ToRGB24Row_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_rgb24, + struct YuvConstants* yuvconstants, int width); void I422ToRAWRow_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_raw, + struct YuvConstants* yuvconstants, int width); void I422ToRGB565Row_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_rgb565, + struct YuvConstants* yuvconstants, int width); void I422ToARGB1555Row_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb1555, + struct YuvConstants* yuvconstants, int width); void I422ToARGB4444Row_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb4444, + struct YuvConstants* yuvconstants, int width); -void J422ToARGBRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void H422ToARGBRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void J422ToABGRRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_abgr, - int width); -void H422ToABGRRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_abgr, - int width); void NV12ToARGBRow_NEON(const uint8* src_y, const uint8* src_uv, uint8* dst_argb, - int width); -void NV21ToARGBRow_NEON(const uint8* src_y, - const uint8* src_vu, - uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void NV12ToRGB565Row_NEON(const uint8* src_y, const uint8* src_uv, uint8* dst_rgb565, - int width); -void NV21ToRGB565Row_NEON(const uint8* src_y, - const uint8* src_vu, - uint8* dst_rgb565, + struct YuvConstants* yuvconstants, int width); void YUY2ToARGBRow_NEON(const uint8* src_yuy2, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void UYVYToARGBRow_NEON(const uint8* src_uyvy, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix); @@ -1054,571 +1031,526 @@ void I444ToARGBRow_C(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void I444ToABGRRow_C(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void I422ToARGBRow_C(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + struct YuvConstants* yuvconstants, + int width); +void I422ToARGBRow_C(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, + struct YuvConstants* yuvconstants, + int width); +void I422ToABGRRow_C(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); -void I422ToARGBMatrixRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - struct YuvConstants* YuvConstants, - int width); -void I422ToABGRMatrixRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - struct YuvConstants* YuvConstants, - int width); void I411ToARGBRow_C(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void NV12ToARGBRow_C(const uint8* src_y, const uint8* src_uv, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void NV21ToRGB565Row_C(const uint8* src_y, const uint8* src_vu, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void NV12ToRGB565Row_C(const uint8* src_y, const uint8* src_uv, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); -void NV21ToARGBRow_C(const uint8* src_y, - const uint8* src_vu, - uint8* dst_argb, - int width); void YUY2ToARGBRow_C(const uint8* src_yuy2, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void UYVYToARGBRow_C(const uint8* src_uyvy, uint8* dst_argb, - int width); -void J422ToARGBRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void J422ToABGRRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void H422ToARGBRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void H422ToABGRRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void I422ToBGRARow_C(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_bgra, + struct YuvConstants* yuvconstants, int width); void I422ToABGRRow_C(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_abgr, + struct YuvConstants* yuvconstants, int width); void I422ToRGBARow_C(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_rgba, + struct YuvConstants* yuvconstants, int width); void I422ToRGB24Row_C(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_rgb24, + struct YuvConstants* yuvconstants, int width); void I422ToRAWRow_C(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_raw, + struct YuvConstants* yuvconstants, int width); void I422ToARGB4444Row_C(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb4444, + struct YuvConstants* yuvconstants, int width); void I422ToARGB1555Row_C(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb4444, + struct YuvConstants* yuvconstants, int width); void I422ToRGB565Row_C(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_rgb565, + struct YuvConstants* yuvconstants, int width); void I422ToARGBRow_AVX2(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); -void I422ToARGBMatrixRow_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - struct YuvConstants* YuvConstants, - int width); -void I422ToABGRMatrixRow_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - struct YuvConstants* YuvConstants, - int width); -void I422ToBGRARow_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I422ToRGBARow_AVX2(const uint8* src_y, +void I422ToARGBRow_AVX2(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void I422ToABGRRow_AVX2(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + struct YuvConstants* yuvconstants, + int width); +void I422ToBGRARow_AVX2(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, + struct YuvConstants* yuvconstants, + int width); +void I422ToRGBARow_AVX2(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, + struct YuvConstants* yuvconstants, + int width); +void I422ToABGRRow_AVX2(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); -void I444ToARGBMatrixRow_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - struct YuvConstants* YuvConstants, - int width); -void I444ToARGBMatrixRow_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - struct YuvConstants* YuvConstants, - int width); void I444ToARGBRow_SSSE3(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void I444ToARGBRow_AVX2(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + struct YuvConstants* yuvconstants, + int width); +void I444ToARGBRow_SSSE3(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, + struct YuvConstants* yuvconstants, + int width); +void I444ToARGBRow_AVX2(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); -void I444ToABGRMatrixRow_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_abgr, - struct YuvConstants* YuvConstants, - int width); -void I444ToABGRMatrixRow_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_abgr, - struct YuvConstants* YuvConstants, - int width); void I444ToABGRRow_SSSE3(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_abgr, + struct YuvConstants* yuvconstants, int width); void I444ToABGRRow_AVX2(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_abgr, + struct YuvConstants* yuvconstants, + int width); +void I444ToABGRRow_SSSE3(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_abgr, + struct YuvConstants* yuvconstants, + int width); +void I444ToABGRRow_AVX2(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_abgr, + struct YuvConstants* yuvconstants, int width); void I422ToARGBRow_SSSE3(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + struct YuvConstants* yuvconstants, + int width); +void I422ToARGBRow_SSSE3(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, + struct YuvConstants* yuvconstants, + int width); +void I422ToABGRRow_SSSE3(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); -void I422ToARGBMatrixRow_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - struct YuvConstants* YuvConstants, - int width); -void I422ToABGRMatrixRow_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - struct YuvConstants* YuvConstants, - int width); void I411ToARGBRow_SSSE3(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void I411ToARGBRow_AVX2(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void NV12ToARGBRow_SSSE3(const uint8* src_y, const uint8* src_uv, uint8* dst_argb, - int width); -void NV21ToARGBRow_SSSE3(const uint8* src_y, - const uint8* src_vu, - uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void NV12ToARGBRow_AVX2(const uint8* src_y, const uint8* src_uv, uint8* dst_argb, - int width); -void NV21ToARGBRow_AVX2(const uint8* src_y, - const uint8* src_vu, - uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void NV12ToRGB565Row_SSSE3(const uint8* src_y, const uint8* src_uv, uint8* dst_argb, - int width); -void NV21ToRGB565Row_SSSE3(const uint8* src_y, - const uint8* src_vu, - uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void NV12ToRGB565Row_AVX2(const uint8* src_y, const uint8* src_uv, uint8* dst_argb, - int width); -void NV21ToRGB565Row_AVX2(const uint8* src_y, - const uint8* src_vu, - uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void UYVYToARGBRow_SSSE3(const uint8* src_uyvy, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void YUY2ToARGBRow_AVX2(const uint8* src_yuy2, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void UYVYToARGBRow_AVX2(const uint8* src_uyvy, uint8* dst_argb, - int width); -void J422ToARGBRow_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void J422ToABGRRow_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void J422ToARGBRow_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void J422ToABGRRow_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void H422ToARGBRow_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void H422ToABGRRow_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void H422ToARGBRow_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void H422ToABGRRow_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void I422ToBGRARow_SSSE3(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_bgra, + struct YuvConstants* yuvconstants, int width); void I422ToABGRRow_SSSE3(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_abgr, + struct YuvConstants* yuvconstants, int width); void I422ToRGBARow_SSSE3(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_rgba, + struct YuvConstants* yuvconstants, int width); void I422ToARGB4444Row_SSSE3(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void I422ToARGB4444Row_AVX2(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void I422ToARGB1555Row_SSSE3(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void I422ToARGB1555Row_AVX2(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void I422ToRGB565Row_SSSE3(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void I422ToRGB565Row_AVX2(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void I422ToRGB24Row_SSSE3(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_rgb24, + struct YuvConstants* yuvconstants, int width); void I422ToRGB24Row_AVX2(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_rgb24, + struct YuvConstants* yuvconstants, int width); void I422ToRAWRow_SSSE3(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_raw, + struct YuvConstants* yuvconstants, int width); void I422ToRAWRow_AVX2(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_raw, + struct YuvConstants* yuvconstants, int width); void I422ToARGBRow_Any_AVX2(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void I422ToBGRARow_Any_AVX2(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void I422ToRGBARow_Any_AVX2(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void I422ToABGRRow_Any_AVX2(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void I444ToARGBRow_Any_SSSE3(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void I444ToARGBRow_Any_AVX2(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void I444ToABGRRow_Any_SSSE3(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_abgr, + struct YuvConstants* yuvconstants, int width); void I444ToABGRRow_Any_AVX2(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_abgr, + struct YuvConstants* yuvconstants, int width); void I422ToARGBRow_Any_SSSE3(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void I411ToARGBRow_Any_SSSE3(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void I411ToARGBRow_Any_AVX2(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void NV12ToARGBRow_Any_SSSE3(const uint8* src_y, const uint8* src_uv, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void NV21ToARGBRow_Any_SSSE3(const uint8* src_y, const uint8* src_vu, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void NV12ToARGBRow_Any_AVX2(const uint8* src_y, const uint8* src_uv, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void NV21ToARGBRow_Any_AVX2(const uint8* src_y, const uint8* src_vu, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void NV12ToRGB565Row_Any_SSSE3(const uint8* src_y, const uint8* src_uv, uint8* dst_argb, - int width); -void NV21ToRGB565Row_Any_SSSE3(const uint8* src_y, - const uint8* src_vu, - uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void NV12ToRGB565Row_Any_AVX2(const uint8* src_y, const uint8* src_uv, uint8* dst_argb, - int width); -void NV21ToRGB565Row_Any_AVX2(const uint8* src_y, - const uint8* src_vu, - uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void YUY2ToARGBRow_Any_SSSE3(const uint8* src_yuy2, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void UYVYToARGBRow_Any_SSSE3(const uint8* src_uyvy, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void YUY2ToARGBRow_Any_AVX2(const uint8* src_yuy2, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void UYVYToARGBRow_Any_AVX2(const uint8* src_uyvy, uint8* dst_argb, - int width); -void J422ToARGBRow_Any_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void J422ToABGRRow_Any_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void J422ToARGBRow_Any_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void J422ToABGRRow_Any_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void H422ToARGBRow_Any_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void H422ToABGRRow_Any_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void H422ToARGBRow_Any_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void H422ToABGRRow_Any_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void I422ToBGRARow_Any_SSSE3(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_bgra, + struct YuvConstants* yuvconstants, int width); void I422ToABGRRow_Any_SSSE3(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_abgr, + struct YuvConstants* yuvconstants, int width); void I422ToRGBARow_Any_SSSE3(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_rgba, + struct YuvConstants* yuvconstants, int width); void I422ToARGB4444Row_Any_SSSE3(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_rgba, + struct YuvConstants* yuvconstants, int width); void I422ToARGB4444Row_Any_AVX2(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_rgba, + struct YuvConstants* yuvconstants, int width); void I422ToARGB1555Row_Any_SSSE3(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_rgba, + struct YuvConstants* yuvconstants, int width); void I422ToARGB1555Row_Any_AVX2(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_rgba, + struct YuvConstants* yuvconstants, int width); void I422ToRGB565Row_Any_SSSE3(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_rgba, + struct YuvConstants* yuvconstants, int width); void I422ToRGB565Row_Any_AVX2(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_rgba, + struct YuvConstants* yuvconstants, int width); void I422ToRGB24Row_Any_SSSE3(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void I422ToRGB24Row_Any_AVX2(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void I422ToRAWRow_Any_SSSE3(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void I422ToRAWRow_Any_AVX2(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width); @@ -1716,128 +1648,121 @@ void I444ToARGBRow_Any_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void I422ToARGBRow_Any_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void I411ToARGBRow_Any_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void I422ToBGRARow_Any_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void I422ToABGRRow_Any_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void I422ToRGBARow_Any_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void I422ToRGB24Row_Any_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void I422ToRAWRow_Any_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void I422ToARGB4444Row_Any_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void I422ToARGB1555Row_Any_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void I422ToRGB565Row_Any_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); -void J422ToARGBRow_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void H422ToARGBRow_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void J422ToABGRRow_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_abgr, - int width); -void H422ToABGRRow_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_abgr, - int width); void NV12ToARGBRow_Any_NEON(const uint8* src_y, const uint8* src_uv, uint8* dst_argb, - int width); -void NV21ToARGBRow_Any_NEON(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void NV12ToRGB565Row_Any_NEON(const uint8* src_y, const uint8* src_uv, uint8* dst_argb, - int width); -void NV21ToRGB565Row_Any_NEON(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void YUY2ToARGBRow_Any_NEON(const uint8* src_yuy2, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void UYVYToARGBRow_Any_NEON(const uint8* src_uyvy, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void I422ToARGBRow_MIPS_DSPR2(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void I422ToBGRARow_MIPS_DSPR2(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void I422ToABGRRow_MIPS_DSPR2(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void I422ToARGBRow_MIPS_DSPR2(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void I422ToBGRARow_MIPS_DSPR2(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void I422ToABGRRow_MIPS_DSPR2(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width); void YUY2ToYRow_AVX2(const uint8* src_yuy2, uint8* dst_y, int pix); diff --git a/source/convert_argb.cc b/source/convert_argb.cc index dec44dedd..dc3071926 100644 --- a/source/convert_argb.cc +++ b/source/convert_argb.cc @@ -56,6 +56,7 @@ int I444ToARGB(const uint8* src_y, int src_stride_y, const uint8* u_buf, const uint8* v_buf, uint8* rgb_buf, + struct YuvConstants* yuvconstants, int width) = I444ToARGBRow_C; if (!src_y || !src_u || !src_v || !dst_argb || @@ -103,7 +104,7 @@ int I444ToARGB(const uint8* src_y, int src_stride_y, #endif for (y = 0; y < height; ++y) { - I444ToARGBRow(src_y, src_u, src_v, dst_argb, width); + I444ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvConstants, width); dst_argb += dst_stride_argb; src_y += src_stride_y; src_u += src_stride_u; @@ -124,6 +125,7 @@ int I444ToABGR(const uint8* src_y, int src_stride_y, const uint8* u_buf, const uint8* v_buf, uint8* rgb_buf, + struct YuvConstants* yuvconstants, int width) = I444ToABGRRow_C; if (!src_y || !src_u || !src_v || !dst_abgr || @@ -171,7 +173,7 @@ int I444ToABGR(const uint8* src_y, int src_stride_y, #endif for (y = 0; y < height; ++y) { - I444ToABGRRow(src_y, src_u, src_v, dst_abgr, width); + I444ToABGRRow(src_y, src_u, src_v, dst_abgr, &kYuvConstants, width); dst_abgr += dst_stride_abgr; src_y += src_stride_y; src_u += src_stride_u; @@ -192,6 +194,7 @@ int I422ToARGB(const uint8* src_y, int src_stride_y, const uint8* u_buf, const uint8* v_buf, uint8* rgb_buf, + struct YuvConstants* yuvconstants, int width) = I422ToARGBRow_C; if (!src_y || !src_u || !src_v || !dst_argb || @@ -248,7 +251,7 @@ int I422ToARGB(const uint8* src_y, int src_stride_y, #endif for (y = 0; y < height; ++y) { - I422ToARGBRow(src_y, src_u, src_v, dst_argb, width); + I422ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvConstants, width); dst_argb += dst_stride_argb; src_y += src_stride_y; src_u += src_stride_u; @@ -269,6 +272,7 @@ int I411ToARGB(const uint8* src_y, int src_stride_y, const uint8* u_buf, const uint8* v_buf, uint8* rgb_buf, + struct YuvConstants* yuvconstants, int width) = I411ToARGBRow_C; if (!src_y || !src_u || !src_v || !dst_argb || @@ -316,7 +320,7 @@ int I411ToARGB(const uint8* src_y, int src_stride_y, #endif for (y = 0; y < height; ++y) { - I411ToARGBRow(src_y, src_u, src_v, dst_argb, width); + I411ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvConstants, width); dst_argb += dst_stride_argb; src_y += src_stride_y; src_u += src_stride_u; @@ -338,6 +342,7 @@ int I420AlphaToARGB(const uint8* src_y, int src_stride_y, const uint8* u_buf, const uint8* v_buf, uint8* rgb_buf, + struct YuvConstants* yuvconstants, int width) = I422ToARGBRow_C; void (*ARGBCopyYToAlphaRow)(const uint8* src_y, uint8* dst_argb, int width) = ARGBCopyYToAlphaRow_C; @@ -436,7 +441,7 @@ int I420AlphaToARGB(const uint8* src_y, int src_stride_y, #endif for (y = 0; y < height; ++y) { - I422ToARGBRow(src_y, src_u, src_v, dst_argb, width); + I422ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvConstants, width); ARGBCopyYToAlphaRow(src_a, dst_argb, width); ARGBAttenuateRow(dst_argb, dst_argb, width); dst_argb += dst_stride_argb; @@ -462,6 +467,7 @@ int I420AlphaToABGR(const uint8* src_y, int src_stride_y, const uint8* u_buf, const uint8* v_buf, uint8* rgb_buf, + struct YuvConstants* yuvconstants, int width) = I422ToABGRRow_C; void (*ARGBCopyYToAlphaRow)(const uint8* src_y, uint8* dst_argb, int width) = ARGBCopyYToAlphaRow_C; @@ -560,7 +566,7 @@ int I420AlphaToABGR(const uint8* src_y, int src_stride_y, #endif for (y = 0; y < height; ++y) { - I422ToABGRRow(src_y, src_u, src_v, dst_abgr, width); + I422ToABGRRow(src_y, src_u, src_v, dst_abgr, &kYuvConstants, width); ARGBCopyYToAlphaRow(src_a, dst_abgr, width); ARGBAttenuateRow(dst_abgr, dst_abgr, width); dst_abgr += dst_stride_abgr; @@ -639,7 +645,7 @@ int J400ToARGB(const uint8* src_y, int src_stride_y, uint8* dst_argb, int dst_stride_argb, int width, int height) { int y; - void (*J400ToARGBRow)(const uint8* src_y, uint8* dst_argb, int pix) = + void (*J400ToARGBRow)(const uint8* src_y, uint8* dst_argb, int width) = J400ToARGBRow_C; if (!src_y || !dst_argb || width <= 0 || height == 0) { @@ -766,7 +772,7 @@ int RGB24ToARGB(const uint8* src_rgb24, int src_stride_rgb24, uint8* dst_argb, int dst_stride_argb, int width, int height) { int y; - void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) = + void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) = RGB24ToARGBRow_C; if (!src_rgb24 || !dst_argb || width <= 0 || height == 0) { @@ -816,7 +822,7 @@ int RAWToARGB(const uint8* src_raw, int src_stride_raw, uint8* dst_argb, int dst_stride_argb, int width, int height) { int y; - void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) = + void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) = RAWToARGBRow_C; if (!src_raw || !dst_argb || width <= 0 || height == 0) { @@ -866,7 +872,7 @@ int RGB565ToARGB(const uint8* src_rgb565, int src_stride_rgb565, uint8* dst_argb, int dst_stride_argb, int width, int height) { int y; - void (*RGB565ToARGBRow)(const uint8* src_rgb565, uint8* dst_argb, int pix) = + void (*RGB565ToARGBRow)(const uint8* src_rgb565, uint8* dst_argb, int width) = RGB565ToARGBRow_C; if (!src_rgb565 || !dst_argb || width <= 0 || height == 0) { @@ -925,7 +931,7 @@ int ARGB1555ToARGB(const uint8* src_argb1555, int src_stride_argb1555, int width, int height) { int y; void (*ARGB1555ToARGBRow)(const uint8* src_argb1555, uint8* dst_argb, - int pix) = ARGB1555ToARGBRow_C; + int width) = ARGB1555ToARGBRow_C; if (!src_argb1555 || !dst_argb || width <= 0 || height == 0) { return -1; @@ -983,7 +989,7 @@ int ARGB4444ToARGB(const uint8* src_argb4444, int src_stride_argb4444, int width, int height) { int y; void (*ARGB4444ToARGBRow)(const uint8* src_argb4444, uint8* dst_argb, - int pix) = ARGB4444ToARGBRow_C; + int width) = ARGB4444ToARGBRow_C; if (!src_argb4444 || !dst_argb || width <= 0 || height == 0) { return -1; @@ -1044,6 +1050,7 @@ int NV12ToARGB(const uint8* src_y, int src_stride_y, void (*NV12ToARGBRow)(const uint8* y_buf, const uint8* uv_buf, uint8* rgb_buf, + struct YuvConstants* yuvconstants, int width) = NV12ToARGBRow_C; if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) { @@ -1081,7 +1088,7 @@ int NV12ToARGB(const uint8* src_y, int src_stride_y, #endif for (y = 0; y < height; ++y) { - NV12ToARGBRow(src_y, src_uv, dst_argb, width); + NV12ToARGBRow(src_y, src_uv, dst_argb, &kYuvConstants, width); dst_argb += dst_stride_argb; src_y += src_stride_y; if (y & 1) { @@ -1098,10 +1105,11 @@ int NV21ToARGB(const uint8* src_y, int src_stride_y, uint8* dst_argb, int dst_stride_argb, int width, int height) { int y; - void (*NV21ToARGBRow)(const uint8* y_buf, + void (*NV12ToARGBRow)(const uint8* y_buf, const uint8* uv_buf, uint8* rgb_buf, - int width) = NV21ToARGBRow_C; + struct YuvConstants* yuvconstants, + int width) = NV12ToARGBRow_C; if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) { return -1; @@ -1112,33 +1120,33 @@ int NV21ToARGB(const uint8* src_y, int src_stride_y, dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_stride_argb = -dst_stride_argb; } -#if defined(HAS_NV21TOARGBROW_SSSE3) +#if defined(HAS_NV12TOARGBROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - NV21ToARGBRow = NV21ToARGBRow_Any_SSSE3; + NV12ToARGBRow = NV12ToARGBRow_Any_SSSE3; if (IS_ALIGNED(width, 8)) { - NV21ToARGBRow = NV21ToARGBRow_SSSE3; + NV12ToARGBRow = NV12ToARGBRow_SSSE3; } } #endif -#if defined(HAS_NV21TOARGBROW_AVX2) +#if defined(HAS_NV12TOARGBROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - NV21ToARGBRow = NV21ToARGBRow_Any_AVX2; + NV12ToARGBRow = NV12ToARGBRow_Any_AVX2; if (IS_ALIGNED(width, 16)) { - NV21ToARGBRow = NV21ToARGBRow_AVX2; + NV12ToARGBRow = NV12ToARGBRow_AVX2; } } #endif -#if defined(HAS_NV21TOARGBROW_NEON) +#if defined(HAS_NV12TOARGBROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { - NV21ToARGBRow = NV21ToARGBRow_Any_NEON; + NV12ToARGBRow = NV12ToARGBRow_Any_NEON; if (IS_ALIGNED(width, 8)) { - NV21ToARGBRow = NV21ToARGBRow_NEON; + NV12ToARGBRow = NV12ToARGBRow_NEON; } } #endif for (y = 0; y < height; ++y) { - NV21ToARGBRow(src_y, src_uv, dst_argb, width); + NV12ToARGBRow(src_y, src_uv, dst_argb, &kYvuConstants, width); dst_argb += dst_stride_argb; src_y += src_stride_y; if (y & 1) { @@ -1157,6 +1165,7 @@ int M420ToARGB(const uint8* src_m420, int src_stride_m420, void (*NV12ToARGBRow)(const uint8* y_buf, const uint8* uv_buf, uint8* rgb_buf, + struct YuvConstants* yuvconstants, int width) = NV12ToARGBRow_C; if (!src_m420 || !dst_argb || width <= 0 || height == 0) { @@ -1194,14 +1203,16 @@ int M420ToARGB(const uint8* src_m420, int src_stride_m420, #endif for (y = 0; y < height - 1; y += 2) { - NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb, width); + NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb, + &kYuvConstants, width); NV12ToARGBRow(src_m420 + src_stride_m420, src_m420 + src_stride_m420 * 2, - dst_argb + dst_stride_argb, width); + dst_argb + dst_stride_argb, &kYuvConstants, width); dst_argb += dst_stride_argb * 2; src_m420 += src_stride_m420 * 3; } if (height & 1) { - NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb, width); + NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb, + &kYuvConstants, width); } return 0; } @@ -1212,7 +1223,10 @@ int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2, uint8* dst_argb, int dst_stride_argb, int width, int height) { int y; - void (*YUY2ToARGBRow)(const uint8* src_yuy2, uint8* dst_argb, int pix) = + void (*YUY2ToARGBRow)(const uint8* src_yuy2, + uint8* dst_argb, + struct YuvConstants* yuvconstants, + int width) = YUY2ToARGBRow_C; if (!src_yuy2 || !dst_argb || width <= 0 || height == 0) { @@ -1256,7 +1270,7 @@ int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2, } #endif for (y = 0; y < height; ++y) { - YUY2ToARGBRow(src_yuy2, dst_argb, width); + YUY2ToARGBRow(src_yuy2, dst_argb, &kYuvConstants, width); src_yuy2 += src_stride_yuy2; dst_argb += dst_stride_argb; } @@ -1269,7 +1283,10 @@ int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy, uint8* dst_argb, int dst_stride_argb, int width, int height) { int y; - void (*UYVYToARGBRow)(const uint8* src_uyvy, uint8* dst_argb, int pix) = + void (*UYVYToARGBRow)(const uint8* src_uyvy, + uint8* dst_argb, + struct YuvConstants* yuvconstants, + int width) = UYVYToARGBRow_C; if (!src_uyvy || !dst_argb || width <= 0 || height == 0) { @@ -1313,7 +1330,7 @@ int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy, } #endif for (y = 0; y < height; ++y) { - UYVYToARGBRow(src_uyvy, dst_argb, width); + UYVYToARGBRow(src_uyvy, dst_argb, &kYuvConstants, width); src_uyvy += src_stride_uyvy; dst_argb += dst_stride_argb; } @@ -1328,11 +1345,12 @@ int J420ToARGB(const uint8* src_y, int src_stride_y, uint8* dst_argb, int dst_stride_argb, int width, int height) { int y; - void (*J422ToARGBRow)(const uint8* y_buf, + void (*I422ToARGBRow)(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, uint8* rgb_buf, - int width) = J422ToARGBRow_C; + struct YuvConstants* yuvconstants, + int width) = I422ToARGBRow_C; if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; @@ -1343,42 +1361,42 @@ int J420ToARGB(const uint8* src_y, int src_stride_y, dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_stride_argb = -dst_stride_argb; } -#if defined(HAS_J422TOARGBROW_SSSE3) +#if defined(HAS_I422TOARGBROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - J422ToARGBRow = J422ToARGBRow_Any_SSSE3; + I422ToARGBRow = I422ToARGBRow_Any_SSSE3; if (IS_ALIGNED(width, 8)) { - J422ToARGBRow = J422ToARGBRow_SSSE3; + I422ToARGBRow = I422ToARGBRow_SSSE3; } } #endif -#if defined(HAS_J422TOARGBROW_AVX2) +#if defined(HAS_I422TOARGBROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - J422ToARGBRow = J422ToARGBRow_Any_AVX2; + I422ToARGBRow = I422ToARGBRow_Any_AVX2; if (IS_ALIGNED(width, 16)) { - J422ToARGBRow = J422ToARGBRow_AVX2; + I422ToARGBRow = I422ToARGBRow_AVX2; } } #endif -#if defined(HAS_J422TOARGBROW_NEON) +#if defined(HAS_I422TOARGBROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { - J422ToARGBRow = J422ToARGBRow_Any_NEON; + I422ToARGBRow = I422ToARGBRow_Any_NEON; if (IS_ALIGNED(width, 8)) { - J422ToARGBRow = J422ToARGBRow_NEON; + I422ToARGBRow = I422ToARGBRow_NEON; } } #endif -#if defined(HAS_J422TOARGBROW_MIPS_DSPR2) +#if defined(HAS_I422TOARGBROW_MIPS_DSPR2) if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) && IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) && IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) { - J422ToARGBRow = J422ToARGBRow_MIPS_DSPR2; + I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2; } #endif for (y = 0; y < height; ++y) { - J422ToARGBRow(src_y, src_u, src_v, dst_argb, width); + I422ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvJConstants, width); dst_argb += dst_stride_argb; src_y += src_stride_y; if (y & 1) { @@ -1397,11 +1415,12 @@ int J422ToARGB(const uint8* src_y, int src_stride_y, uint8* dst_argb, int dst_stride_argb, int width, int height) { int y; - void (*J422ToARGBRow)(const uint8* y_buf, + void (*I422ToARGBRow)(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, uint8* rgb_buf, - int width) = J422ToARGBRow_C; + struct YuvConstants* yuvconstants, + int width) = I422ToARGBRow_C; if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { @@ -1422,42 +1441,42 @@ int J422ToARGB(const uint8* src_y, int src_stride_y, height = 1; src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0; } -#if defined(HAS_J422TOARGBROW_SSSE3) +#if defined(HAS_I422TOARGBROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - J422ToARGBRow = J422ToARGBRow_Any_SSSE3; + I422ToARGBRow = I422ToARGBRow_Any_SSSE3; if (IS_ALIGNED(width, 8)) { - J422ToARGBRow = J422ToARGBRow_SSSE3; + I422ToARGBRow = I422ToARGBRow_SSSE3; } } #endif -#if defined(HAS_J422TOARGBROW_AVX2) +#if defined(HAS_I422TOARGBROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - J422ToARGBRow = J422ToARGBRow_Any_AVX2; + I422ToARGBRow = I422ToARGBRow_Any_AVX2; if (IS_ALIGNED(width, 16)) { - J422ToARGBRow = J422ToARGBRow_AVX2; + I422ToARGBRow = I422ToARGBRow_AVX2; } } #endif -#if defined(HAS_J422TOARGBROW_NEON) +#if defined(HAS_I422TOARGBROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { - J422ToARGBRow = J422ToARGBRow_Any_NEON; + I422ToARGBRow = I422ToARGBRow_Any_NEON; if (IS_ALIGNED(width, 8)) { - J422ToARGBRow = J422ToARGBRow_NEON; + I422ToARGBRow = I422ToARGBRow_NEON; } } #endif -#if defined(HAS_J422TOARGBROW_MIPS_DSPR2) +#if defined(HAS_I422TOARGBROW_MIPS_DSPR2) if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) && IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) && IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) { - J422ToARGBRow = J422ToARGBRow_MIPS_DSPR2; + I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2; } #endif for (y = 0; y < height; ++y) { - J422ToARGBRow(src_y, src_u, src_v, dst_argb, width); + I422ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvJConstants, width); dst_argb += dst_stride_argb; src_y += src_stride_y; src_u += src_stride_u; @@ -1474,11 +1493,12 @@ int J420ToABGR(const uint8* src_y, int src_stride_y, uint8* dst_abgr, int dst_stride_abgr, int width, int height) { int y; - void (*J422ToABGRRow)(const uint8* y_buf, + void (*I422ToABGRRow)(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, uint8* rgb_buf, - int width) = J422ToABGRRow_C; + struct YuvConstants* yuvconstants, + int width) = I422ToABGRRow_C; if (!src_y || !src_u || !src_v || !dst_abgr || width <= 0 || height == 0) { return -1; @@ -1489,42 +1509,42 @@ int J420ToABGR(const uint8* src_y, int src_stride_y, dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr; dst_stride_abgr = -dst_stride_abgr; } -#if defined(HAS_J422TOABGRROW_SSSE3) +#if defined(HAS_I422TOABGRROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - J422ToABGRRow = J422ToABGRRow_Any_SSSE3; + I422ToABGRRow = I422ToABGRRow_Any_SSSE3; if (IS_ALIGNED(width, 8)) { - J422ToABGRRow = J422ToABGRRow_SSSE3; + I422ToABGRRow = I422ToABGRRow_SSSE3; } } #endif -#if defined(HAS_J422TOABGRROW_AVX2) +#if defined(HAS_I422TOABGRROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - J422ToABGRRow = J422ToABGRRow_Any_AVX2; + I422ToABGRRow = I422ToABGRRow_Any_AVX2; if (IS_ALIGNED(width, 16)) { - J422ToABGRRow = J422ToABGRRow_AVX2; + I422ToABGRRow = I422ToABGRRow_AVX2; } } #endif -#if defined(HAS_J422TOABGRROW_NEON) +#if defined(HAS_I422TOABGRROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { - J422ToABGRRow = J422ToABGRRow_Any_NEON; + I422ToABGRRow = I422ToABGRRow_Any_NEON; if (IS_ALIGNED(width, 8)) { - J422ToABGRRow = J422ToABGRRow_NEON; + I422ToABGRRow = I422ToABGRRow_NEON; } } #endif -#if defined(HAS_J422TOABGRROW_MIPS_DSPR2) +#if defined(HAS_I422TOABGRROW_MIPS_DSPR2) if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) && IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) && IS_ALIGNED(dst_abgr, 4) && IS_ALIGNED(dst_stride_abgr, 4)) { - J422ToABGRRow = J422ToABGRRow_MIPS_DSPR2; + I422ToABGRRow = I422ToABGRRow_MIPS_DSPR2; } #endif for (y = 0; y < height; ++y) { - J422ToABGRRow(src_y, src_u, src_v, dst_abgr, width); + I422ToABGRRow(src_y, src_u, src_v, dst_abgr, &kYuvJConstants, width); dst_abgr += dst_stride_abgr; src_y += src_stride_y; if (y & 1) { @@ -1543,11 +1563,12 @@ int J422ToABGR(const uint8* src_y, int src_stride_y, uint8* dst_abgr, int dst_stride_abgr, int width, int height) { int y; - void (*J422ToABGRRow)(const uint8* y_buf, + void (*I422ToABGRRow)(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, uint8* rgb_buf, - int width) = J422ToABGRRow_C; + struct YuvConstants* yuvconstants, + int width) = I422ToABGRRow_C; if (!src_y || !src_u || !src_v || !dst_abgr || width <= 0 || height == 0) { @@ -1568,42 +1589,42 @@ int J422ToABGR(const uint8* src_y, int src_stride_y, height = 1; src_stride_y = src_stride_u = src_stride_v = dst_stride_abgr = 0; } -#if defined(HAS_J422TOABGRROW_SSSE3) +#if defined(HAS_I422TOABGRROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - J422ToABGRRow = J422ToABGRRow_Any_SSSE3; + I422ToABGRRow = I422ToABGRRow_Any_SSSE3; if (IS_ALIGNED(width, 8)) { - J422ToABGRRow = J422ToABGRRow_SSSE3; + I422ToABGRRow = I422ToABGRRow_SSSE3; } } #endif -#if defined(HAS_J422TOABGRROW_AVX2) +#if defined(HAS_I422TOABGRROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - J422ToABGRRow = J422ToABGRRow_Any_AVX2; + I422ToABGRRow = I422ToABGRRow_Any_AVX2; if (IS_ALIGNED(width, 16)) { - J422ToABGRRow = J422ToABGRRow_AVX2; + I422ToABGRRow = I422ToABGRRow_AVX2; } } #endif -#if defined(HAS_J422TOABGRROW_NEON) +#if defined(HAS_I422TOABGRROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { - J422ToABGRRow = J422ToABGRRow_Any_NEON; + I422ToABGRRow = I422ToABGRRow_Any_NEON; if (IS_ALIGNED(width, 8)) { - J422ToABGRRow = J422ToABGRRow_NEON; + I422ToABGRRow = I422ToABGRRow_NEON; } } #endif -#if defined(HAS_J422TOABGRROW_MIPS_DSPR2) +#if defined(HAS_I422TOABGRROW_MIPS_DSPR2) if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) && IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) && IS_ALIGNED(dst_abgr, 4) && IS_ALIGNED(dst_stride_abgr, 4)) { - J422ToABGRRow = J422ToABGRRow_MIPS_DSPR2; + I422ToABGRRow = I422ToABGRRow_MIPS_DSPR2; } #endif for (y = 0; y < height; ++y) { - J422ToABGRRow(src_y, src_u, src_v, dst_abgr, width); + I422ToABGRRow(src_y, src_u, src_v, dst_abgr, &kYuvJConstants, width); dst_abgr += dst_stride_abgr; src_y += src_stride_y; src_u += src_stride_u; @@ -1620,11 +1641,12 @@ int H420ToARGB(const uint8* src_y, int src_stride_y, uint8* dst_argb, int dst_stride_argb, int width, int height) { int y; - void (*H422ToARGBRow)(const uint8* y_buf, + void (*I422ToARGBRow)(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, uint8* rgb_buf, - int width) = H422ToARGBRow_C; + struct YuvConstants* yuvconstants, + int width) = I422ToARGBRow_C; if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; @@ -1635,42 +1657,42 @@ int H420ToARGB(const uint8* src_y, int src_stride_y, dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_stride_argb = -dst_stride_argb; } -#if defined(HAS_H422TOARGBROW_SSSE3) +#if defined(HAS_I422TOARGBROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - H422ToARGBRow = H422ToARGBRow_Any_SSSE3; + I422ToARGBRow = I422ToARGBRow_Any_SSSE3; if (IS_ALIGNED(width, 8)) { - H422ToARGBRow = H422ToARGBRow_SSSE3; + I422ToARGBRow = I422ToARGBRow_SSSE3; } } #endif -#if defined(HAS_H422TOARGBROW_AVX2) +#if defined(HAS_I422TOARGBROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - H422ToARGBRow = H422ToARGBRow_Any_AVX2; + I422ToARGBRow = I422ToARGBRow_Any_AVX2; if (IS_ALIGNED(width, 16)) { - H422ToARGBRow = H422ToARGBRow_AVX2; + I422ToARGBRow = I422ToARGBRow_AVX2; } } #endif -#if defined(HAS_H422TOARGBROW_NEON) +#if defined(HAS_I422TOARGBROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { - H422ToARGBRow = H422ToARGBRow_Any_NEON; + I422ToARGBRow = I422ToARGBRow_Any_NEON; if (IS_ALIGNED(width, 8)) { - H422ToARGBRow = H422ToARGBRow_NEON; + I422ToARGBRow = I422ToARGBRow_NEON; } } #endif -#if defined(HAS_H422TOARGBROW_MIPS_DSPR2) +#if defined(HAS_I422TOARGBROW_MIPS_DSPR2) if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) && IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) && IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) { - H422ToARGBRow = H422ToARGBRow_MIPS_DSPR2; + I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2; } #endif for (y = 0; y < height; ++y) { - H422ToARGBRow(src_y, src_u, src_v, dst_argb, width); + I422ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvHConstants, width); dst_argb += dst_stride_argb; src_y += src_stride_y; if (y & 1) { @@ -1689,11 +1711,12 @@ int H422ToARGB(const uint8* src_y, int src_stride_y, uint8* dst_argb, int dst_stride_argb, int width, int height) { int y; - void (*H422ToARGBRow)(const uint8* y_buf, + void (*I422ToARGBRow)(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, uint8* rgb_buf, - int width) = H422ToARGBRow_C; + struct YuvConstants* yuvconstants, + int width) = I422ToARGBRow_C; if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { @@ -1714,42 +1737,42 @@ int H422ToARGB(const uint8* src_y, int src_stride_y, height = 1; src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0; } -#if defined(HAS_H422TOARGBROW_SSSE3) +#if defined(HAS_I422TOARGBROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - H422ToARGBRow = H422ToARGBRow_Any_SSSE3; + I422ToARGBRow = I422ToARGBRow_Any_SSSE3; if (IS_ALIGNED(width, 8)) { - H422ToARGBRow = H422ToARGBRow_SSSE3; + I422ToARGBRow = I422ToARGBRow_SSSE3; } } #endif -#if defined(HAS_H422TOARGBROW_AVX2) +#if defined(HAS_I422TOARGBROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - H422ToARGBRow = H422ToARGBRow_Any_AVX2; + I422ToARGBRow = I422ToARGBRow_Any_AVX2; if (IS_ALIGNED(width, 16)) { - H422ToARGBRow = H422ToARGBRow_AVX2; + I422ToARGBRow = I422ToARGBRow_AVX2; } } #endif -#if defined(HAS_H422TOARGBROW_NEON) +#if defined(HAS_I422TOARGBROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { - H422ToARGBRow = H422ToARGBRow_Any_NEON; + I422ToARGBRow = I422ToARGBRow_Any_NEON; if (IS_ALIGNED(width, 8)) { - H422ToARGBRow = H422ToARGBRow_NEON; + I422ToARGBRow = I422ToARGBRow_NEON; } } #endif -#if defined(HAS_H422TOARGBROW_MIPS_DSPR2) +#if defined(HAS_I422TOARGBROW_MIPS_DSPR2) if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) && IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) && IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) { - H422ToARGBRow = H422ToARGBRow_MIPS_DSPR2; + I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2; } #endif for (y = 0; y < height; ++y) { - H422ToARGBRow(src_y, src_u, src_v, dst_argb, width); + I422ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvHConstants, width); dst_argb += dst_stride_argb; src_y += src_stride_y; src_u += src_stride_u; @@ -1766,11 +1789,12 @@ int H420ToABGR(const uint8* src_y, int src_stride_y, uint8* dst_abgr, int dst_stride_abgr, int width, int height) { int y; - void (*H422ToABGRRow)(const uint8* y_buf, + void (*I422ToABGRRow)(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, uint8* rgb_buf, - int width) = H422ToABGRRow_C; + struct YuvConstants* yuvconstants, + int width) = I422ToABGRRow_C; if (!src_y || !src_u || !src_v || !dst_abgr || width <= 0 || height == 0) { return -1; @@ -1781,42 +1805,42 @@ int H420ToABGR(const uint8* src_y, int src_stride_y, dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr; dst_stride_abgr = -dst_stride_abgr; } -#if defined(HAS_H422TOABGRROW_SSSE3) +#if defined(HAS_I422TOABGRROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - H422ToABGRRow = H422ToABGRRow_Any_SSSE3; + I422ToABGRRow = I422ToABGRRow_Any_SSSE3; if (IS_ALIGNED(width, 8)) { - H422ToABGRRow = H422ToABGRRow_SSSE3; + I422ToABGRRow = I422ToABGRRow_SSSE3; } } #endif -#if defined(HAS_H422TOABGRROW_AVX2) +#if defined(HAS_I422TOABGRROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - H422ToABGRRow = H422ToABGRRow_Any_AVX2; + I422ToABGRRow = I422ToABGRRow_Any_AVX2; if (IS_ALIGNED(width, 16)) { - H422ToABGRRow = H422ToABGRRow_AVX2; + I422ToABGRRow = I422ToABGRRow_AVX2; } } #endif -#if defined(HAS_H422TOABGRROW_NEON) +#if defined(HAS_I422TOABGRROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { - H422ToABGRRow = H422ToABGRRow_Any_NEON; + I422ToABGRRow = I422ToABGRRow_Any_NEON; if (IS_ALIGNED(width, 8)) { - H422ToABGRRow = H422ToABGRRow_NEON; + I422ToABGRRow = I422ToABGRRow_NEON; } } #endif -#if defined(HAS_H422TOABGRROW_MIPS_DSPR2) +#if defined(HAS_I422TOABGRROW_MIPS_DSPR2) if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) && IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) && IS_ALIGNED(dst_abgr, 4) && IS_ALIGNED(dst_stride_abgr, 4)) { - H422ToABGRRow = H422ToABGRRow_MIPS_DSPR2; + I422ToABGRRow = I422ToABGRRow_MIPS_DSPR2; } #endif for (y = 0; y < height; ++y) { - H422ToABGRRow(src_y, src_u, src_v, dst_abgr, width); + I422ToABGRRow(src_y, src_u, src_v, dst_abgr, &kYuvHConstants, width); dst_abgr += dst_stride_abgr; src_y += src_stride_y; if (y & 1) { @@ -1835,11 +1859,12 @@ int H422ToABGR(const uint8* src_y, int src_stride_y, uint8* dst_abgr, int dst_stride_abgr, int width, int height) { int y; - void (*H422ToABGRRow)(const uint8* y_buf, + void (*I422ToABGRRow)(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, uint8* rgb_buf, - int width) = H422ToABGRRow_C; + struct YuvConstants* yuvconstants, + int width) = I422ToABGRRow_C; if (!src_y || !src_u || !src_v || !dst_abgr || width <= 0 || height == 0) { @@ -1860,42 +1885,42 @@ int H422ToABGR(const uint8* src_y, int src_stride_y, height = 1; src_stride_y = src_stride_u = src_stride_v = dst_stride_abgr = 0; } -#if defined(HAS_H422TOABGRROW_SSSE3) +#if defined(HAS_I422TOABGRROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - H422ToABGRRow = H422ToABGRRow_Any_SSSE3; + I422ToABGRRow = I422ToABGRRow_Any_SSSE3; if (IS_ALIGNED(width, 8)) { - H422ToABGRRow = H422ToABGRRow_SSSE3; + I422ToABGRRow = I422ToABGRRow_SSSE3; } } #endif -#if defined(HAS_H422TOABGRROW_AVX2) +#if defined(HAS_I422TOABGRROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - H422ToABGRRow = H422ToABGRRow_Any_AVX2; + I422ToABGRRow = I422ToABGRRow_Any_AVX2; if (IS_ALIGNED(width, 16)) { - H422ToABGRRow = H422ToABGRRow_AVX2; + I422ToABGRRow = I422ToABGRRow_AVX2; } } #endif -#if defined(HAS_H422TOABGRROW_NEON) +#if defined(HAS_I422TOABGRROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { - H422ToABGRRow = H422ToABGRRow_Any_NEON; + I422ToABGRRow = I422ToABGRRow_Any_NEON; if (IS_ALIGNED(width, 8)) { - H422ToABGRRow = H422ToABGRRow_NEON; + I422ToABGRRow = I422ToABGRRow_NEON; } } #endif -#if defined(HAS_H422TOABGRROW_MIPS_DSPR2) +#if defined(HAS_I422TOABGRROW_MIPS_DSPR2) if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) && IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) && IS_ALIGNED(dst_abgr, 4) && IS_ALIGNED(dst_stride_abgr, 4)) { - H422ToABGRRow = H422ToABGRRow_MIPS_DSPR2; + I422ToABGRRow = I422ToABGRRow_MIPS_DSPR2; } #endif for (y = 0; y < height; ++y) { - H422ToABGRRow(src_y, src_u, src_v, dst_abgr, width); + I422ToABGRRow(src_y, src_u, src_v, dst_abgr, &kYuvHConstants, width); dst_abgr += dst_stride_abgr; src_y += src_stride_y; src_u += src_stride_u; diff --git a/source/convert_from.cc b/source/convert_from.cc index 31f1ac992..b9c7be904 100644 --- a/source/convert_from.cc +++ b/source/convert_from.cc @@ -462,6 +462,7 @@ int I420ToARGB(const uint8* src_y, int src_stride_y, const uint8* u_buf, const uint8* v_buf, uint8* rgb_buf, + struct YuvConstants* yuvconstants, int width) = I422ToARGBRow_C; if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { @@ -508,7 +509,7 @@ int I420ToARGB(const uint8* src_y, int src_stride_y, #endif for (y = 0; y < height; ++y) { - I422ToARGBRow(src_y, src_u, src_v, dst_argb, width); + I422ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvConstants, width); dst_argb += dst_stride_argb; src_y += src_stride_y; if (y & 1) { @@ -531,6 +532,7 @@ int I420ToBGRA(const uint8* src_y, int src_stride_y, const uint8* u_buf, const uint8* v_buf, uint8* rgb_buf, + struct YuvConstants* yuvconstants, int width) = I422ToBGRARow_C; if (!src_y || !src_u || !src_v || !dst_bgra || width <= 0 || height == 0) { @@ -577,7 +579,7 @@ int I420ToBGRA(const uint8* src_y, int src_stride_y, #endif for (y = 0; y < height; ++y) { - I422ToBGRARow(src_y, src_u, src_v, dst_bgra, width); + I422ToBGRARow(src_y, src_u, src_v, dst_bgra, &kYuvConstants, width); dst_bgra += dst_stride_bgra; src_y += src_stride_y; if (y & 1) { @@ -600,6 +602,7 @@ int I420ToABGR(const uint8* src_y, int src_stride_y, const uint8* u_buf, const uint8* v_buf, uint8* rgb_buf, + struct YuvConstants* yuvconstants, int width) = I422ToABGRRow_C; if (!src_y || !src_u || !src_v || !dst_abgr || width <= 0 || height == 0) { @@ -637,7 +640,7 @@ int I420ToABGR(const uint8* src_y, int src_stride_y, #endif for (y = 0; y < height; ++y) { - I422ToABGRRow(src_y, src_u, src_v, dst_abgr, width); + I422ToABGRRow(src_y, src_u, src_v, dst_abgr, &kYuvConstants, width); dst_abgr += dst_stride_abgr; src_y += src_stride_y; if (y & 1) { @@ -660,6 +663,7 @@ int I420ToRGBA(const uint8* src_y, int src_stride_y, const uint8* u_buf, const uint8* v_buf, uint8* rgb_buf, + struct YuvConstants* yuvconstants, int width) = I422ToRGBARow_C; if (!src_y || !src_u || !src_v || !dst_rgba || width <= 0 || height == 0) { @@ -697,7 +701,7 @@ int I420ToRGBA(const uint8* src_y, int src_stride_y, #endif for (y = 0; y < height; ++y) { - I422ToRGBARow(src_y, src_u, src_v, dst_rgba, width); + I422ToRGBARow(src_y, src_u, src_v, dst_rgba, &kYuvConstants, width); dst_rgba += dst_stride_rgba; src_y += src_stride_y; if (y & 1) { @@ -720,6 +724,7 @@ int I420ToRGB24(const uint8* src_y, int src_stride_y, const uint8* u_buf, const uint8* v_buf, uint8* rgb_buf, + struct YuvConstants* yuvconstants, int width) = I422ToRGB24Row_C; if (!src_y || !src_u || !src_v || !dst_rgb24 || width <= 0 || height == 0) { @@ -757,7 +762,7 @@ int I420ToRGB24(const uint8* src_y, int src_stride_y, #endif for (y = 0; y < height; ++y) { - I422ToRGB24Row(src_y, src_u, src_v, dst_rgb24, width); + I422ToRGB24Row(src_y, src_u, src_v, dst_rgb24, &kYuvConstants, width); dst_rgb24 += dst_stride_rgb24; src_y += src_stride_y; if (y & 1) { @@ -780,6 +785,7 @@ int I420ToRAW(const uint8* src_y, int src_stride_y, const uint8* u_buf, const uint8* v_buf, uint8* rgb_buf, + struct YuvConstants* yuvconstants, int width) = I422ToRAWRow_C; if (!src_y || !src_u || !src_v || !dst_raw || width <= 0 || height == 0) { @@ -817,7 +823,7 @@ int I420ToRAW(const uint8* src_y, int src_stride_y, #endif for (y = 0; y < height; ++y) { - I422ToRAWRow(src_y, src_u, src_v, dst_raw, width); + I422ToRAWRow(src_y, src_u, src_v, dst_raw, &kYuvConstants, width); dst_raw += dst_stride_raw; src_y += src_stride_y; if (y & 1) { @@ -840,6 +846,7 @@ int I420ToARGB1555(const uint8* src_y, int src_stride_y, const uint8* u_buf, const uint8* v_buf, uint8* rgb_buf, + struct YuvConstants* yuvconstants, int width) = I422ToARGB1555Row_C; if (!src_y || !src_u || !src_v || !dst_argb1555 || width <= 0 || height == 0) { @@ -877,7 +884,7 @@ int I420ToARGB1555(const uint8* src_y, int src_stride_y, #endif for (y = 0; y < height; ++y) { - I422ToARGB1555Row(src_y, src_u, src_v, dst_argb1555, width); + I422ToARGB1555Row(src_y, src_u, src_v, dst_argb1555, &kYuvConstants, width); dst_argb1555 += dst_stride_argb1555; src_y += src_stride_y; if (y & 1) { @@ -901,6 +908,7 @@ int I420ToARGB4444(const uint8* src_y, int src_stride_y, const uint8* u_buf, const uint8* v_buf, uint8* rgb_buf, + struct YuvConstants* yuvconstants, int width) = I422ToARGB4444Row_C; if (!src_y || !src_u || !src_v || !dst_argb4444 || width <= 0 || height == 0) { @@ -938,7 +946,7 @@ int I420ToARGB4444(const uint8* src_y, int src_stride_y, #endif for (y = 0; y < height; ++y) { - I422ToARGB4444Row(src_y, src_u, src_v, dst_argb4444, width); + I422ToARGB4444Row(src_y, src_u, src_v, dst_argb4444, &kYuvConstants, width); dst_argb4444 += dst_stride_argb4444; src_y += src_stride_y; if (y & 1) { @@ -961,6 +969,7 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y, const uint8* u_buf, const uint8* v_buf, uint8* rgb_buf, + struct YuvConstants* yuvconstants, int width) = I422ToRGB565Row_C; if (!src_y || !src_u || !src_v || !dst_rgb565 || width <= 0 || height == 0) { @@ -998,7 +1007,7 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y, #endif for (y = 0; y < height; ++y) { - I422ToRGB565Row(src_y, src_u, src_v, dst_rgb565, width); + I422ToRGB565Row(src_y, src_u, src_v, dst_rgb565, &kYuvConstants, width); dst_rgb565 += dst_stride_rgb565; src_y += src_stride_y; if (y & 1) { @@ -1029,6 +1038,7 @@ int I420ToRGB565Dither(const uint8* src_y, int src_stride_y, const uint8* u_buf, const uint8* v_buf, uint8* rgb_buf, + struct YuvConstants* yuvconstants, int width) = I422ToARGBRow_C; void (*ARGBToRGB565DitherRow)(const uint8* src_argb, uint8* dst_rgb, const uint32 dither4, int pix) = ARGBToRGB565DitherRow_C; @@ -1105,7 +1115,7 @@ int I420ToRGB565Dither(const uint8* src_y, int src_stride_y, // Allocate a row of argb. align_buffer_64(row_argb, width * 4); for (y = 0; y < height; ++y) { - I422ToARGBRow(src_y, src_u, src_v, row_argb, width); + I422ToARGBRow(src_y, src_u, src_v, row_argb, &kYuvConstants, width); ARGBToRGB565DitherRow(row_argb, dst_rgb565, *(uint32*)(dither4x4 + ((y & 3) << 2)), width); dst_rgb565 += dst_stride_rgb565; diff --git a/source/planar_functions.cc b/source/planar_functions.cc index 3cca5f48b..2299ab892 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -287,9 +287,9 @@ int YUY2ToI422(const uint8* src_yuy2, int src_stride_yuy2, int width, int height) { int y; void (*YUY2ToUV422Row)(const uint8* src_yuy2, - uint8* dst_u, uint8* dst_v, int pix) = + uint8* dst_u, uint8* dst_v, int width) = YUY2ToUV422Row_C; - void (*YUY2ToYRow)(const uint8* src_yuy2, uint8* dst_y, int pix) = + void (*YUY2ToYRow)(const uint8* src_yuy2, uint8* dst_y, int width) = YUY2ToYRow_C; // Negative height means invert the image. if (height < 0) { @@ -359,10 +359,10 @@ int UYVYToI422(const uint8* src_uyvy, int src_stride_uyvy, int width, int height) { int y; void (*UYVYToUV422Row)(const uint8* src_uyvy, - uint8* dst_u, uint8* dst_v, int pix) = + uint8* dst_u, uint8* dst_v, int width) = UYVYToUV422Row_C; void (*UYVYToYRow)(const uint8* src_uyvy, - uint8* dst_y, int pix) = UYVYToYRow_C; + uint8* dst_y, int width) = UYVYToYRow_C; // Negative height means invert the image. if (height < 0) { height = -height; @@ -790,6 +790,7 @@ int I422ToBGRA(const uint8* src_y, int src_stride_y, const uint8* u_buf, const uint8* v_buf, uint8* rgb_buf, + struct YuvConstants* yuvconstants, int width) = I422ToBGRARow_C; if (!src_y || !src_u || !src_v || !dst_bgra || @@ -846,7 +847,7 @@ int I422ToBGRA(const uint8* src_y, int src_stride_y, #endif for (y = 0; y < height; ++y) { - I422ToBGRARow(src_y, src_u, src_v, dst_bgra, width); + I422ToBGRARow(src_y, src_u, src_v, dst_bgra, &kYuvConstants, width); dst_bgra += dst_stride_bgra; src_y += src_stride_y; src_u += src_stride_u; @@ -867,6 +868,7 @@ int I422ToABGR(const uint8* src_y, int src_stride_y, const uint8* u_buf, const uint8* v_buf, uint8* rgb_buf, + struct YuvConstants* yuvconstants, int width) = I422ToABGRRow_C; if (!src_y || !src_u || !src_v || !dst_abgr || @@ -914,7 +916,7 @@ int I422ToABGR(const uint8* src_y, int src_stride_y, #endif for (y = 0; y < height; ++y) { - I422ToABGRRow(src_y, src_u, src_v, dst_abgr, width); + I422ToABGRRow(src_y, src_u, src_v, dst_abgr, &kYuvConstants, width); dst_abgr += dst_stride_abgr; src_y += src_stride_y; src_u += src_stride_u; @@ -935,6 +937,7 @@ int I422ToRGBA(const uint8* src_y, int src_stride_y, const uint8* u_buf, const uint8* v_buf, uint8* rgb_buf, + struct YuvConstants* yuvconstants, int width) = I422ToRGBARow_C; if (!src_y || !src_u || !src_v || !dst_rgba || @@ -982,7 +985,7 @@ int I422ToRGBA(const uint8* src_y, int src_stride_y, #endif for (y = 0; y < height; ++y) { - I422ToRGBARow(src_y, src_u, src_v, dst_rgba, width); + I422ToRGBARow(src_y, src_u, src_v, dst_rgba, &kYuvConstants, width); dst_rgba += dst_stride_rgba; src_y += src_stride_y; src_u += src_stride_u; @@ -1001,6 +1004,7 @@ int NV12ToRGB565(const uint8* src_y, int src_stride_y, void (*NV12ToRGB565Row)(const uint8* y_buf, const uint8* uv_buf, uint8* rgb_buf, + struct YuvConstants* yuvconstants, int width) = NV12ToRGB565Row_C; if (!src_y || !src_uv || !dst_rgb565 || width <= 0 || height == 0) { @@ -1038,7 +1042,7 @@ int NV12ToRGB565(const uint8* src_y, int src_stride_y, #endif for (y = 0; y < height; ++y) { - NV12ToRGB565Row(src_y, src_uv, dst_rgb565, width); + NV12ToRGB565Row(src_y, src_uv, dst_rgb565, &kYuvConstants, width); dst_rgb565 += dst_stride_rgb565; src_y += src_stride_y; if (y & 1) { @@ -1055,10 +1059,11 @@ int NV21ToRGB565(const uint8* src_y, int src_stride_y, uint8* dst_rgb565, int dst_stride_rgb565, int width, int height) { int y; - void (*NV21ToRGB565Row)(const uint8* y_buf, + void (*NV12ToRGB565Row)(const uint8* y_buf, const uint8* src_vu, uint8* rgb_buf, - int width) = NV21ToRGB565Row_C; + struct YuvConstants* yuvconstants, + int width) = NV12ToRGB565Row_C; if (!src_y || !src_vu || !dst_rgb565 || width <= 0 || height == 0) { return -1; @@ -1069,33 +1074,33 @@ int NV21ToRGB565(const uint8* src_y, int src_stride_y, dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565; dst_stride_rgb565 = -dst_stride_rgb565; } -#if defined(HAS_NV21TORGB565ROW_SSSE3) +#if defined(HAS_NV12TORGB565ROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - NV21ToRGB565Row = NV21ToRGB565Row_Any_SSSE3; + NV12ToRGB565Row = NV12ToRGB565Row_Any_SSSE3; if (IS_ALIGNED(width, 8)) { - NV21ToRGB565Row = NV21ToRGB565Row_SSSE3; + NV12ToRGB565Row = NV12ToRGB565Row_SSSE3; } } #endif -#if defined(HAS_NV21TORGB565ROW_AVX2) +#if defined(HAS_NV12TORGB565ROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - NV21ToRGB565Row = NV21ToRGB565Row_Any_AVX2; + NV12ToRGB565Row = NV12ToRGB565Row_Any_AVX2; if (IS_ALIGNED(width, 16)) { - NV21ToRGB565Row = NV21ToRGB565Row_AVX2; + NV12ToRGB565Row = NV12ToRGB565Row_AVX2; } } #endif -#if defined(HAS_NV21TORGB565ROW_NEON) +#if defined(HAS_NV12TORGB565ROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { - NV21ToRGB565Row = NV21ToRGB565Row_Any_NEON; + NV12ToRGB565Row = NV12ToRGB565Row_Any_NEON; if (IS_ALIGNED(width, 8)) { - NV21ToRGB565Row = NV21ToRGB565Row_NEON; + NV12ToRGB565Row = NV12ToRGB565Row_NEON; } } #endif for (y = 0; y < height; ++y) { - NV21ToRGB565Row(src_y, src_vu, dst_rgb565, width); + NV12ToRGB565Row(src_y, src_vu, dst_rgb565, &kYvuConstants, width); dst_rgb565 += dst_stride_rgb565; src_y += src_stride_y; if (y & 1) { @@ -1110,7 +1115,7 @@ void SetPlane(uint8* dst_y, int dst_stride_y, int width, int height, uint32 value) { int y; - void (*SetRow)(uint8* dst, uint8 value, int pix) = SetRow_C; + void (*SetRow)(uint8* dst, uint8 value, int width) = SetRow_C; if (height < 0) { height = -height; dst_y = dst_y + (height - 1) * dst_stride_y; @@ -1186,7 +1191,7 @@ int ARGBRect(uint8* dst_argb, int dst_stride_argb, int width, int height, uint32 value) { int y; - void (*ARGBSetRow)(uint8* dst_argb, uint32 value, int pix) = ARGBSetRow_C; + void (*ARGBSetRow)(uint8* dst_argb, uint32 value, int width) = ARGBSetRow_C; if (!dst_argb || width <= 0 || height == 0 || dst_x < 0 || dst_y < 0) { @@ -1909,7 +1914,7 @@ int ARGBShuffle(const uint8* src_bgra, int src_stride_bgra, const uint8* shuffler, int width, int height) { int y; void (*ARGBShuffleRow)(const uint8* src_bgra, uint8* dst_argb, - const uint8* shuffler, int pix) = ARGBShuffleRow_C; + const uint8* shuffler, int width) = ARGBShuffleRow_C; if (!src_bgra || !dst_argb || width <= 0 || height == 0) { return -1; @@ -1976,7 +1981,7 @@ static int ARGBSobelize(const uint8* src_argb, int src_stride_argb, const uint8* src_sobely, uint8* dst, int width)) { int y; - void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_g, int pix) = + void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_g, int width) = ARGBToYJRow_C; void (*SobelYRow)(const uint8* src_y0, const uint8* src_y1, uint8* dst_sobely, int width) = SobelYRow_C; @@ -2360,8 +2365,8 @@ int YUY2ToNV12(const uint8* src_yuy2, int src_stride_yuy2, int width, int height) { int y; int halfwidth = (width + 1) >> 1; - void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) = - SplitUVRow_C; + void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, + int width) = SplitUVRow_C; void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_C; @@ -2464,8 +2469,8 @@ int UYVYToNV12(const uint8* src_uyvy, int src_stride_uyvy, int width, int height) { int y; int halfwidth = (width + 1) >> 1; - void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) = - SplitUVRow_C; + void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, + int width) = SplitUVRow_C; void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_C; diff --git a/source/row_any.cc b/source/row_any.cc index c309499ca..46cbdc759 100644 --- a/source/row_any.cc +++ b/source/row_any.cc @@ -40,103 +40,9 @@ extern "C" { memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 192, \ SS(r, DUVSHIFT) * BPP); \ } - -#ifdef HAS_I422TOARGBROW_SSSE3 -ANY31(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_SSSE3, 1, 0, 4, 7) -ANY31(I422ToABGRRow_Any_SSSE3, I422ToABGRRow_SSSE3, 1, 0, 4, 7) -ANY31(J422ToARGBRow_Any_SSSE3, J422ToARGBRow_SSSE3, 1, 0, 4, 7) -ANY31(J422ToABGRRow_Any_SSSE3, J422ToABGRRow_SSSE3, 1, 0, 4, 7) -ANY31(H422ToARGBRow_Any_SSSE3, H422ToARGBRow_SSSE3, 1, 0, 4, 7) -ANY31(H422ToABGRRow_Any_SSSE3, H422ToABGRRow_SSSE3, 1, 0, 4, 7) -#endif -#ifdef HAS_I444TOARGBROW_SSSE3 -ANY31(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_SSSE3, 0, 0, 4, 7) -ANY31(I411ToARGBRow_Any_SSSE3, I411ToARGBRow_SSSE3, 2, 0, 4, 7) -ANY31(I422ToBGRARow_Any_SSSE3, I422ToBGRARow_SSSE3, 1, 0, 4, 7) -ANY31(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_SSSE3, 1, 0, 4, 7) -ANY31(I422ToARGB4444Row_Any_SSSE3, I422ToARGB4444Row_SSSE3, 1, 0, 2, 7) -ANY31(I422ToARGB1555Row_Any_SSSE3, I422ToARGB1555Row_SSSE3, 1, 0, 2, 7) -ANY31(I422ToRGB565Row_Any_SSSE3, I422ToRGB565Row_SSSE3, 1, 0, 2, 7) -ANY31(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, 1, 0, 3, 7) -ANY31(I422ToRAWRow_Any_SSSE3, I422ToRAWRow_SSSE3, 1, 0, 3, 7) +#ifdef HAS_I422TOYUY2ROW_SSE2 ANY31(I422ToYUY2Row_Any_SSE2, I422ToYUY2Row_SSE2, 1, 1, 4, 15) ANY31(I422ToUYVYRow_Any_SSE2, I422ToUYVYRow_SSE2, 1, 1, 4, 15) -#endif // HAS_I444TOARGBROW_SSSE3 -#ifdef HAS_I444TOABGRROW_SSSE3 -ANY31(I444ToABGRRow_Any_SSSE3, I444ToABGRRow_SSSE3, 0, 0, 4, 7) -#endif -#ifdef HAS_I422TORGB24ROW_AVX2 -ANY31(I422ToRGB24Row_Any_AVX2, I422ToRGB24Row_AVX2, 1, 0, 3, 15) -#endif -#ifdef HAS_I422TORAWROW_AVX2 -ANY31(I422ToRAWRow_Any_AVX2, I422ToRAWRow_AVX2, 1, 0, 3, 15) -#endif -#ifdef HAS_J422TOARGBROW_AVX2 -ANY31(J422ToARGBRow_Any_AVX2, J422ToARGBRow_AVX2, 1, 0, 4, 15) -#endif -#ifdef HAS_J422TOABGRROW_AVX2 -ANY31(J422ToABGRRow_Any_AVX2, J422ToABGRRow_AVX2, 1, 0, 4, 15) -#endif -#ifdef HAS_H422TOARGBROW_AVX2 -ANY31(H422ToARGBRow_Any_AVX2, H422ToARGBRow_AVX2, 1, 0, 4, 15) -#endif -#ifdef HAS_H422TOABGRROW_AVX2 -ANY31(H422ToABGRRow_Any_AVX2, H422ToABGRRow_AVX2, 1, 0, 4, 15) -#endif -#ifdef HAS_I422TOARGBROW_AVX2 -ANY31(I422ToARGBRow_Any_AVX2, I422ToARGBRow_AVX2, 1, 0, 4, 15) -#endif -#ifdef HAS_I422TOBGRAROW_AVX2 -ANY31(I422ToBGRARow_Any_AVX2, I422ToBGRARow_AVX2, 1, 0, 4, 15) -#endif -#ifdef HAS_I422TORGBAROW_AVX2 -ANY31(I422ToRGBARow_Any_AVX2, I422ToRGBARow_AVX2, 1, 0, 4, 15) -#endif -#ifdef HAS_I422TOABGRROW_AVX2 -ANY31(I422ToABGRRow_Any_AVX2, I422ToABGRRow_AVX2, 1, 0, 4, 15) -#endif -#ifdef HAS_I444TOARGBROW_AVX2 -ANY31(I444ToARGBRow_Any_AVX2, I444ToARGBRow_AVX2, 0, 0, 4, 15) -#endif -#ifdef HAS_I444TOABGRROW_AVX2 -ANY31(I444ToABGRRow_Any_AVX2, I444ToABGRRow_AVX2, 0, 0, 4, 15) -#endif -#ifdef HAS_I411TOARGBROW_AVX2 -ANY31(I411ToARGBRow_Any_AVX2, I411ToARGBRow_AVX2, 2, 0, 4, 15) -#endif -#ifdef HAS_I422TOARGB4444ROW_AVX2 -ANY31(I422ToARGB4444Row_Any_AVX2, I422ToARGB4444Row_AVX2, 1, 0, 2, 7) -#endif -#ifdef HAS_I422TOARGB1555ROW_AVX2 -ANY31(I422ToARGB1555Row_Any_AVX2, I422ToARGB1555Row_AVX2, 1, 0, 2, 7) -#endif -#ifdef HAS_I422TORGB565ROW_AVX2 -ANY31(I422ToRGB565Row_Any_AVX2, I422ToRGB565Row_AVX2, 1, 0, 2, 7) -#endif -#ifdef HAS_I422TOARGBROW_NEON -ANY31(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, 0, 0, 4, 7) -ANY31(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, 1, 0, 4, 7) -ANY31(I411ToARGBRow_Any_NEON, I411ToARGBRow_NEON, 2, 0, 4, 7) -ANY31(I422ToBGRARow_Any_NEON, I422ToBGRARow_NEON, 1, 0, 4, 7) -ANY31(I422ToABGRRow_Any_NEON, I422ToABGRRow_NEON, 1, 0, 4, 7) -ANY31(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, 1, 0, 4, 7) -ANY31(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, 1, 0, 3, 7) -ANY31(I422ToRAWRow_Any_NEON, I422ToRAWRow_NEON, 1, 0, 3, 7) -ANY31(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, 1, 0, 2, 7) -ANY31(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, 1, 0, 2, 7) -ANY31(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, 1, 0, 2, 7) -#endif -#ifdef HAS_J422TOARGBROW_NEON -ANY31(J422ToARGBRow_Any_NEON, J422ToARGBRow_NEON, 1, 0, 4, 7) -#endif -#ifdef HAS_J422TOABGRROW_NEON -ANY31(J422ToABGRRow_Any_NEON, J422ToABGRRow_NEON, 1, 0, 4, 7) -#endif -#ifdef HAS_H422TOARGBROW_NEON -ANY31(H422ToARGBRow_Any_NEON, H422ToARGBRow_NEON, 1, 0, 4, 7) -#endif -#ifdef HAS_H422TOABGRROW_NEON -ANY31(H422ToABGRRow_Any_NEON, H422ToABGRRow_NEON, 1, 0, 4, 7) #endif #ifdef HAS_I422TOYUY2ROW_NEON ANY31(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, 1, 1, 4, 15) @@ -144,7 +50,97 @@ ANY31(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, 1, 1, 4, 15) #ifdef HAS_I422TOUYVYROW_NEON ANY31(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, 1, 1, 4, 15) #endif -#undef ANY31 +#undef ANY31C + +// Any 3 planes to 1 with yuvconstants +#define ANY31C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \ + void NAMEANY(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, \ + uint8* dst_ptr, struct YuvConstants* yuvconstants, \ + int width) { \ + SIMD_ALIGNED(uint8 temp[64 * 4]); \ + memset(temp, 0, 64 * 3); /* for YUY2 and msan */ \ + int r = width & MASK; \ + int n = width & ~MASK; \ + if (n > 0) { \ + ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n); \ + } \ + memcpy(temp, y_buf + n, r); \ + memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ + memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ + ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, \ + yuvconstants, MASK + 1); \ + memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 192, \ + SS(r, DUVSHIFT) * BPP); \ + } + +#ifdef HAS_I422TOARGBROW_SSSE3 +ANY31C(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_SSSE3, 1, 0, 4, 7) +ANY31C(I422ToABGRRow_Any_SSSE3, I422ToABGRRow_SSSE3, 1, 0, 4, 7) +#endif +#ifdef HAS_I444TOARGBROW_SSSE3 +ANY31C(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_SSSE3, 0, 0, 4, 7) +ANY31C(I411ToARGBRow_Any_SSSE3, I411ToARGBRow_SSSE3, 2, 0, 4, 7) +ANY31C(I422ToBGRARow_Any_SSSE3, I422ToBGRARow_SSSE3, 1, 0, 4, 7) +ANY31C(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_SSSE3, 1, 0, 4, 7) +ANY31C(I422ToARGB4444Row_Any_SSSE3, I422ToARGB4444Row_SSSE3, 1, 0, 2, 7) +ANY31C(I422ToARGB1555Row_Any_SSSE3, I422ToARGB1555Row_SSSE3, 1, 0, 2, 7) +ANY31C(I422ToRGB565Row_Any_SSSE3, I422ToRGB565Row_SSSE3, 1, 0, 2, 7) +ANY31C(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, 1, 0, 3, 7) +ANY31C(I422ToRAWRow_Any_SSSE3, I422ToRAWRow_SSSE3, 1, 0, 3, 7) +#endif // HAS_I444TOARGBROW_SSSE3 +#ifdef HAS_I444TOABGRROW_SSSE3 +ANY31C(I444ToABGRRow_Any_SSSE3, I444ToABGRRow_SSSE3, 0, 0, 4, 7) +#endif +#ifdef HAS_I422TORGB24ROW_AVX2 +ANY31C(I422ToRGB24Row_Any_AVX2, I422ToRGB24Row_AVX2, 1, 0, 3, 15) +#endif +#ifdef HAS_I422TORAWROW_AVX2 +ANY31C(I422ToRAWRow_Any_AVX2, I422ToRAWRow_AVX2, 1, 0, 3, 15) +#endif +#ifdef HAS_I422TOARGBROW_AVX2 +ANY31C(I422ToARGBRow_Any_AVX2, I422ToARGBRow_AVX2, 1, 0, 4, 15) +#endif +#ifdef HAS_I422TOBGRAROW_AVX2 +ANY31C(I422ToBGRARow_Any_AVX2, I422ToBGRARow_AVX2, 1, 0, 4, 15) +#endif +#ifdef HAS_I422TORGBAROW_AVX2 +ANY31C(I422ToRGBARow_Any_AVX2, I422ToRGBARow_AVX2, 1, 0, 4, 15) +#endif +#ifdef HAS_I422TOABGRROW_AVX2 +ANY31C(I422ToABGRRow_Any_AVX2, I422ToABGRRow_AVX2, 1, 0, 4, 15) +#endif +#ifdef HAS_I444TOARGBROW_AVX2 +ANY31C(I444ToARGBRow_Any_AVX2, I444ToARGBRow_AVX2, 0, 0, 4, 15) +#endif +#ifdef HAS_I444TOABGRROW_AVX2 +ANY31C(I444ToABGRRow_Any_AVX2, I444ToABGRRow_AVX2, 0, 0, 4, 15) +#endif +#ifdef HAS_I411TOARGBROW_AVX2 +ANY31C(I411ToARGBRow_Any_AVX2, I411ToARGBRow_AVX2, 2, 0, 4, 15) +#endif +#ifdef HAS_I422TOARGB4444ROW_AVX2 +ANY31C(I422ToARGB4444Row_Any_AVX2, I422ToARGB4444Row_AVX2, 1, 0, 2, 7) +#endif +#ifdef HAS_I422TOARGB1555ROW_AVX2 +ANY31C(I422ToARGB1555Row_Any_AVX2, I422ToARGB1555Row_AVX2, 1, 0, 2, 7) +#endif +#ifdef HAS_I422TORGB565ROW_AVX2 +ANY31C(I422ToRGB565Row_Any_AVX2, I422ToRGB565Row_AVX2, 1, 0, 2, 7) +#endif +#ifdef HAS_I422TOARGBROW_NEON +ANY31C(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, 0, 0, 4, 7) +ANY31C(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, 1, 0, 4, 7) +ANY31C(I411ToARGBRow_Any_NEON, I411ToARGBRow_NEON, 2, 0, 4, 7) +ANY31C(I422ToBGRARow_Any_NEON, I422ToBGRARow_NEON, 1, 0, 4, 7) +ANY31C(I422ToABGRRow_Any_NEON, I422ToABGRRow_NEON, 1, 0, 4, 7) +ANY31C(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, 1, 0, 4, 7) +ANY31C(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, 1, 0, 3, 7) +ANY31C(I422ToRAWRow_Any_NEON, I422ToRAWRow_NEON, 1, 0, 3, 7) +ANY31C(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, 1, 0, 2, 7) +ANY31C(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, 1, 0, 2, 7) +ANY31C(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, 1, 0, 2, 7) +#endif +#undef ANY31C // Any 2 planes to 1. #define ANY21(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \ @@ -164,32 +160,6 @@ ANY31(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, 1, 1, 4, 15) memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \ } -// Biplanar to RGB. -#ifdef HAS_NV12TOARGBROW_SSSE3 -ANY21(NV12ToARGBRow_Any_SSSE3, NV12ToARGBRow_SSSE3, 1, 1, 2, 4, 7) -ANY21(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_SSSE3, 1, 1, 2, 4, 7) -#endif -#ifdef HAS_NV12TOARGBROW_AVX2 -ANY21(NV12ToARGBRow_Any_AVX2, NV12ToARGBRow_AVX2, 1, 1, 2, 4, 15) -ANY21(NV21ToARGBRow_Any_AVX2, NV21ToARGBRow_AVX2, 1, 1, 2, 4, 15) -#endif -#ifdef HAS_NV12TOARGBROW_NEON -ANY21(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, 1, 1, 2, 4, 7) -ANY21(NV21ToARGBRow_Any_NEON, NV21ToARGBRow_NEON, 1, 1, 2, 4, 7) -#endif -#ifdef HAS_NV12TORGB565ROW_SSSE3 -ANY21(NV12ToRGB565Row_Any_SSSE3, NV12ToRGB565Row_SSSE3, 1, 1, 2, 2, 7) -ANY21(NV21ToRGB565Row_Any_SSSE3, NV21ToRGB565Row_SSSE3, 1, 1, 2, 2, 7) -#endif -#ifdef HAS_NV12TORGB565ROW_AVX2 -ANY21(NV12ToRGB565Row_Any_AVX2, NV12ToRGB565Row_AVX2, 1, 1, 2, 2, 15) -ANY21(NV21ToRGB565Row_Any_AVX2, NV21ToRGB565Row_AVX2, 1, 1, 2, 2, 15) -#endif -#ifdef HAS_NV12TORGB565ROW_NEON -ANY21(NV12ToRGB565Row_Any_NEON, NV12ToRGB565Row_NEON, 1, 1, 2, 2, 7) -ANY21(NV21ToRGB565Row_Any_NEON, NV21ToRGB565Row_NEON, 1, 1, 2, 2, 7) -#endif - // Merge functions. #ifdef HAS_MERGEUVROW_SSE2 ANY21(MergeUVRow_Any_SSE2, MergeUVRow_SSE2, 0, 1, 1, 2, 15) @@ -249,6 +219,46 @@ ANY21(SobelXYRow_Any_NEON, SobelXYRow_NEON, 0, 1, 1, 4, 7) #endif #undef ANY21 +// Any 2 planes to 1 with yuvconstants +#define ANY21C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \ + void NAMEANY(const uint8* y_buf, const uint8* uv_buf, \ + uint8* dst_ptr, struct YuvConstants* yuvconstants, \ + int width) { \ + SIMD_ALIGNED(uint8 temp[64 * 3]); \ + memset(temp, 0, 64 * 2); /* for msan */ \ + int r = width & MASK; \ + int n = width & ~MASK; \ + if (n > 0) { \ + ANY_SIMD(y_buf, uv_buf, dst_ptr, yuvconstants, n); \ + } \ + memcpy(temp, y_buf + n * SBPP, r * SBPP); \ + memcpy(temp + 64, uv_buf + (n >> UVSHIFT) * SBPP2, \ + SS(r, UVSHIFT) * SBPP2); \ + ANY_SIMD(temp, temp + 64, temp + 128, yuvconstants, MASK + 1); \ + memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \ + } + +// Biplanar to RGB. +#ifdef HAS_NV12TOARGBROW_SSSE3 +ANY21C(NV12ToARGBRow_Any_SSSE3, NV12ToARGBRow_SSSE3, 1, 1, 2, 4, 7) +#endif +#ifdef HAS_NV12TOARGBROW_AVX2 +ANY21C(NV12ToARGBRow_Any_AVX2, NV12ToARGBRow_AVX2, 1, 1, 2, 4, 15) +#endif +#ifdef HAS_NV12TOARGBROW_NEON +ANY21C(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, 1, 1, 2, 4, 7) +#endif +#ifdef HAS_NV12TORGB565ROW_SSSE3 +ANY21C(NV12ToRGB565Row_Any_SSSE3, NV12ToRGB565Row_SSSE3, 1, 1, 2, 2, 7) +#endif +#ifdef HAS_NV12TORGB565ROW_AVX2 +ANY21C(NV12ToRGB565Row_Any_AVX2, NV12ToRGB565Row_AVX2, 1, 1, 2, 2, 15) +#endif +#ifdef HAS_NV12TORGB565ROW_NEON +ANY21C(NV12ToRGB565Row_Any_NEON, NV12ToRGB565Row_NEON, 1, 1, 2, 2, 7) +#endif +#undef ANY21C + // Any 1 to 1. #define ANY11(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \ void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) { \ @@ -297,9 +307,7 @@ ANY11(I400ToARGBRow_Any_SSE2, I400ToARGBRow_SSE2, 0, 1, 4, 7) #if defined(HAS_I400TOARGBROW_AVX2) ANY11(I400ToARGBRow_Any_AVX2, I400ToARGBRow_AVX2, 0, 1, 4, 15) #endif -#if defined(HAS_YUY2TOARGBROW_SSSE3) -ANY11(YUY2ToARGBRow_Any_SSSE3, YUY2ToARGBRow_SSSE3, 1, 4, 4, 15) -ANY11(UYVYToARGBRow_Any_SSSE3, UYVYToARGBRow_SSSE3, 1, 4, 4, 15) +#if defined(HAS_RGB24TOARGBROW_SSSE3) ANY11(RGB24ToARGBRow_Any_SSSE3, RGB24ToARGBRow_SSSE3, 0, 3, 4, 15) ANY11(RAWToARGBRow_Any_SSSE3, RAWToARGBRow_SSSE3, 0, 3, 4, 15) ANY11(RGB565ToARGBRow_Any_SSE2, RGB565ToARGBRow_SSE2, 0, 2, 4, 7) @@ -315,10 +323,6 @@ ANY11(ARGB1555ToARGBRow_Any_AVX2, ARGB1555ToARGBRow_AVX2, 0, 2, 4, 15) #if defined(HAS_ARGB4444TOARGBROW_AVX2) ANY11(ARGB4444ToARGBRow_Any_AVX2, ARGB4444ToARGBRow_AVX2, 0, 2, 4, 15) #endif -#if defined(HAS_YUY2TOARGBROW_AVX2) -ANY11(YUY2ToARGBRow_Any_AVX2, YUY2ToARGBRow_AVX2, 1, 4, 4, 31) -ANY11(UYVYToARGBRow_Any_AVX2, UYVYToARGBRow_AVX2, 1, 4, 4, 31) -#endif #if defined(HAS_ARGBTORGB24ROW_NEON) ANY11(ARGBToRGB24Row_Any_NEON, ARGBToRGB24Row_NEON, 0, 4, 3, 7) ANY11(ARGBToRAWRow_Any_NEON, ARGBToRAWRow_NEON, 0, 4, 3, 7) @@ -327,8 +331,6 @@ ANY11(ARGBToARGB1555Row_Any_NEON, ARGBToARGB1555Row_NEON, 0, 4, 2, 7) ANY11(ARGBToARGB4444Row_Any_NEON, ARGBToARGB4444Row_NEON, 0, 4, 2, 7) ANY11(J400ToARGBRow_Any_NEON, J400ToARGBRow_NEON, 0, 1, 4, 7) ANY11(I400ToARGBRow_Any_NEON, I400ToARGBRow_NEON, 0, 1, 4, 7) -ANY11(YUY2ToARGBRow_Any_NEON, YUY2ToARGBRow_NEON, 1, 4, 4, 7) -ANY11(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, 1, 4, 4, 7) #endif #ifdef HAS_ARGBTOYROW_AVX2 ANY11(ARGBToYRow_Any_AVX2, ARGBToYRow_AVX2, 0, 4, 1, 31) @@ -426,6 +428,35 @@ ANY11(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, 0, 4, 4, 7) #endif #undef ANY11 +// Any 1 to 1 with yuvconstants +#define ANY11C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \ + void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, \ + struct YuvConstants* yuvconstants, int width) { \ + SIMD_ALIGNED(uint8 temp[128 * 2]); \ + memset(temp, 0, 128); /* for YUY2 and msan */ \ + int r = width & MASK; \ + int n = width & ~MASK; \ + if (n > 0) { \ + ANY_SIMD(src_ptr, dst_ptr, yuvconstants, n); \ + } \ + memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \ + ANY_SIMD(temp, temp + 128, yuvconstants, MASK + 1); \ + memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \ + } +#if defined(HAS_YUY2TOARGBROW_SSSE3) +ANY11C(YUY2ToARGBRow_Any_SSSE3, YUY2ToARGBRow_SSSE3, 1, 4, 4, 15) +ANY11C(UYVYToARGBRow_Any_SSSE3, UYVYToARGBRow_SSSE3, 1, 4, 4, 15) +#endif +#if defined(HAS_YUY2TOARGBROW_AVX2) +ANY11C(YUY2ToARGBRow_Any_AVX2, YUY2ToARGBRow_AVX2, 1, 4, 4, 31) +ANY11C(UYVYToARGBRow_Any_AVX2, UYVYToARGBRow_AVX2, 1, 4, 4, 31) +#endif +#if defined(HAS_YUY2TOARGBROW_NEON) +ANY11C(YUY2ToARGBRow_Any_NEON, YUY2ToARGBRow_NEON, 1, 4, 4, 7) +ANY11C(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, 1, 4, 4, 7) +#endif +#undef ANY11C + // Any 1 to 1 blended. #define ANY11B(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \ void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) { \ diff --git a/source/row_common.cc b/source/row_common.cc index 0a4520f71..341380332 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -1014,6 +1014,22 @@ void J400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width) { #define BG (UG * 128 + VG * 128 + YGB) #define BR (VR * 128 + YGB) +#if defined(__arm__) || defined(__aarch64__) +YuvConstants SIMD_ALIGNED(kYuvConstants) = { + { -UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0 }, + { UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0 }, + { BB, BG, BR, 0, 0, 0, 0, 0 }, + { 0x0101 * YG, 0, 0, 0 } +}; + +YuvConstants SIMD_ALIGNED(kYvuConstants) = { + { -VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0 }, + { VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0 }, + { BB, BG, BR, 0, 0, 0, 0, 0 }, + { 0x0101 * YG, 0, 0, 0 } +}; + +#else // BT601 constants for YUV to RGB. YuvConstants SIMD_ALIGNED(kYuvConstants) = { { UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, @@ -1041,22 +1057,7 @@ YuvConstants SIMD_ALIGNED(kYvuConstants) = { { BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR }, { YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG } }; - -YuvConstantsNEON SIMD_ALIGNED(kYuvConstantsNEON) = { - { -UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0 }, - { UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0 }, - { BB, BG, BR, 0, 0, 0, 0, 0 }, - { 0x0101 * YG, 0, 0, 0 } -}; - -// C reference code that mimics the YUV assembly. -static __inline void YuvPixel(uint8 y, uint8 u, uint8 v, - uint8* b, uint8* g, uint8* r) { - uint32 y1 = (uint32)(y * 0x0101 * YG) >> 16; - *b = Clamp((int32)(-(u * UB) + y1 + BB) >> 6); - *g = Clamp((int32)(-(v * VG + u * UG) + y1 + BG) >> 6); - *r = Clamp((int32)(-(v * VR)+ y1 + BR) >> 6); -} +#endif // C reference code that mimics the YUV assembly. static __inline void YPixel(uint8 y, uint8* b, uint8* g, uint8* r) { @@ -1065,15 +1066,50 @@ static __inline void YPixel(uint8 y, uint8* b, uint8* g, uint8* r) { *g = Clamp((int32)(y1 + YGB) >> 6); *r = Clamp((int32)(y1 + YGB) >> 6); } -#undef YG + +#undef BB +#undef BG +#undef BR #undef YGB #undef UB #undef UG #undef VG #undef VR -#undef BB -#undef BG -#undef BR +#undef YG + +// C reference code that mimics the YUV assembly. +static __inline void YuvPixel(uint8 y, uint8 u, uint8 v, + uint8* b, uint8* g, uint8* r, + struct YuvConstants* yuvconstants) { +#if defined(__arm__) || defined(__aarch64__) + + int UB = -yuvconstants->kUVToRB[0]; + int VB = 0; + int UG = yuvconstants->kUVToG[0]; + int VG = yuvconstants->kUVToG[4]; + int UR = 0; + int VR = -yuvconstants->kUVToRB[4]; + int BB = yuvconstants->kUVBiasBGR[0]; + int BG = yuvconstants->kUVBiasBGR[1]; + int BR = yuvconstants->kUVBiasBGR[2]; + int YG = yuvconstants->kYToRgb[0]; +#else + int UB = yuvconstants->kUVToB[0]; + int VB = yuvconstants->kUVToB[1]; // usually 0 + int UG = yuvconstants->kUVToG[0]; + int VG = yuvconstants->kUVToG[1]; + int UR = yuvconstants->kUVToR[0]; // usually 0 + int VR = yuvconstants->kUVToR[1]; + int BB = yuvconstants->kUVBiasB[0]; + int BG = yuvconstants->kUVBiasG[0]; + int BR = yuvconstants->kUVBiasR[0]; + int YG = yuvconstants->kYToRgb[0]; +#endif + uint32 y1 = (uint32)(y * 0x0101 * YG) >> 16; + *b = Clamp((int32)(-(u * UB + v * VB) + y1 + BB) >> 6); + *g = Clamp((int32)(-(u * UG + v * VG) + y1 + BG) >> 6); + *r = Clamp((int32)(-(u * UR + v * VR) + y1 + BR) >> 6); +} // JPEG YUV to RGB reference // * R = Y - V * -1.40200 @@ -1095,6 +1131,15 @@ static __inline void YPixel(uint8 y, uint8* b, uint8* g, uint8* r) { #define BGJ (UGJ * 128 + VGJ * 128 + YGBJ) #define BRJ (VRJ * 128 + YGBJ) +#if defined(__arm__) || defined(__aarch64__) +// JPEG constants for YUV to RGB. +YuvConstants SIMD_ALIGNED(kYuvJConstants) = { + { -UBJ, -UBJ, -UBJ, -UBJ, -VRJ, -VRJ, -VRJ, -VRJ, 0, 0, 0, 0, 0, 0, 0, 0 }, + { UGJ, UGJ, UGJ, UGJ, VGJ, VGJ, VGJ, VGJ, 0, 0, 0, 0, 0, 0, 0, 0 }, + { BBJ, BGJ, BRJ, 0, 0, 0, 0, 0 }, + { 0x0101 * YGJ, 0, 0, 0 } +}; +#else // JPEG constants for YUV to RGB. YuvConstants SIMD_ALIGNED(kYuvJConstants) = { { UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, @@ -1114,23 +1159,7 @@ YuvConstants SIMD_ALIGNED(kYuvJConstants) = { { YGJ, YGJ, YGJ, YGJ, YGJ, YGJ, YGJ, YGJ, YGJ, YGJ, YGJ, YGJ, YGJ, YGJ, YGJ, YGJ } }; - -// JPEG constants for YUV to RGB. -YuvConstantsNEON SIMD_ALIGNED(kYuvJConstantsNEON) = { - { -UBJ, -UBJ, -UBJ, -UBJ, -VRJ, -VRJ, -VRJ, -VRJ, 0, 0, 0, 0, 0, 0, 0, 0 }, - { UGJ, UGJ, UGJ, UGJ, VGJ, VGJ, VGJ, VGJ, 0, 0, 0, 0, 0, 0, 0, 0 }, - { BBJ, BGJ, BRJ, 0, 0, 0, 0, 0 }, - { 0x0101 * YGJ, 0, 0, 0 } -}; - -// C reference code that mimics the YUV assembly. -static __inline void YuvJPixel(uint8 y, uint8 u, uint8 v, - uint8* b, uint8* g, uint8* r) { - uint32 y1 = (uint32)(y * 0x0101 * YGJ) >> 16; - *b = Clamp((int32)(-(u * UBJ) + y1 + BBJ) >> 6); - *g = Clamp((int32)(-(v * VGJ + u * UGJ) + y1 + BGJ) >> 6); - *r = Clamp((int32)(-(v * VRJ) + y1 + BRJ) >> 6); -} +#endif #undef YGJ #undef YGBJ @@ -1162,6 +1191,15 @@ static __inline void YuvJPixel(uint8 y, uint8 u, uint8 v, #define BGH (UGH * 128 + VGH * 128 + YGBH) #define BRH (VRH * 128 + YGBH) +#if defined(__arm__) || defined(__aarch64__) +// BT.709 constants for YUV to RGB. +YuvConstants SIMD_ALIGNED(kYuvHConstants) = { + { -UBH, -UBH, -UBH, -UBH, -VRH, -VRH, -VRH, -VRH, 0, 0, 0, 0, 0, 0, 0, 0 }, + { UGH, UGH, UGH, UGH, VGH, VGH, VGH, VGH, 0, 0, 0, 0, 0, 0, 0, 0 }, + { BBH, BGH, BRH, 0, 0, 0, 0, 0 }, + { 0x0101 * YGH, 0, 0, 0 } +}; +#else // BT.709 constants for YUV to RGB. YuvConstants SIMD_ALIGNED(kYuvHConstants) = { { UBH, 0, UBH, 0, UBH, 0, UBH, 0, UBH, 0, UBH, 0, UBH, 0, UBH, 0, @@ -1181,23 +1219,7 @@ YuvConstants SIMD_ALIGNED(kYuvHConstants) = { { YGH, YGH, YGH, YGH, YGH, YGH, YGH, YGH, YGH, YGH, YGH, YGH, YGH, YGH, YGH, YGH } }; - -// BT.709 constants for YUV to RGB. -YuvConstantsNEON SIMD_ALIGNED(kYuvHConstantsNEON) = { - { -UBH, -UBH, -UBH, -UBH, -VRH, -VRH, -VRH, -VRH, 0, 0, 0, 0, 0, 0, 0, 0 }, - { UGH, UGH, UGH, UGH, VGH, VGH, VGH, VGH, 0, 0, 0, 0, 0, 0, 0, 0 }, - { BBH, BGH, BRH, 0, 0, 0, 0, 0 }, - { 0x0101 * YGH, 0, 0, 0 } -}; - -// C reference code that mimics the YUV assembly. -static __inline void YuvHPixel(uint8 y, uint8 u, uint8 v, - uint8* b, uint8* g, uint8* r) { - uint32 y1 = (uint32)(y * 0x0101 * YGH) >> 16; - *b = Clamp((int32)(-(u * UBH) + y1 + BBH) >> 6); - *g = Clamp((int32)(-(v * VGH + u * UGH) + y1 + BGH) >> 6); - *r = Clamp((int32)(-(v * VRH) + y1 + BRH) >> 6); -} +#endif #undef YGH #undef YGBH @@ -1217,14 +1239,17 @@ void I444ToARGBRow_C(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* rgb_buf, + struct YuvConstants* yuvconstants, int width) { int x; for (x = 0; x < width - 1; x += 2) { uint8 u = (src_u[0] + src_u[1] + 1) >> 1; uint8 v = (src_v[0] + src_v[1] + 1) >> 1; - YuvPixel(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); + YuvPixel(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, + yuvconstants); rgb_buf[3] = 255; - YuvPixel(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6); + YuvPixel(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, + yuvconstants); rgb_buf[7] = 255; src_y += 2; src_u += 2; @@ -1233,7 +1258,7 @@ void I444ToARGBRow_C(const uint8* src_y, } if (width & 1) { YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); + rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); } } @@ -1241,14 +1266,17 @@ void I444ToABGRRow_C(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* rgb_buf, + struct YuvConstants* yuvconstants, int width) { int x; for (x = 0; x < width - 1; x += 2) { uint8 u = (src_u[0] + src_u[1] + 1) >> 1; uint8 v = (src_v[0] + src_v[1] + 1) >> 1; - YuvPixel(src_y[0], u, v, rgb_buf + 2, rgb_buf + 1, rgb_buf + 0); + YuvPixel(src_y[0], u, v, rgb_buf + 2, rgb_buf + 1, rgb_buf + 0, + yuvconstants); rgb_buf[3] = 255; - YuvPixel(src_y[1], u, v, rgb_buf + 6, rgb_buf + 5, rgb_buf + 4); + YuvPixel(src_y[1], u, v, rgb_buf + 6, rgb_buf + 5, rgb_buf + 4, + yuvconstants); rgb_buf[7] = 255; src_y += 2; src_u += 2; @@ -1257,7 +1285,7 @@ void I444ToABGRRow_C(const uint8* src_y, } if (width & 1) { YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 2, rgb_buf + 1, rgb_buf + 0); + rgb_buf + 2, rgb_buf + 1, rgb_buf + 0, yuvconstants); } } #else @@ -1265,11 +1293,12 @@ void I444ToARGBRow_C(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* rgb_buf, + struct YuvConstants* yuvconstants, int width) { int x; for (x = 0; x < width; ++x) { YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); + rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); rgb_buf[3] = 255; src_y += 1; src_u += 1; @@ -1282,11 +1311,12 @@ void I444ToABGRRow_C(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* rgb_buf, + struct YuvConstants* yuvconstants, int width) { int x; for (x = 0; x < width; ++x) { YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 2, rgb_buf + 1, rgb_buf + 0); + rgb_buf + 2, rgb_buf + 1, rgb_buf + 0, yuvconstants); rgb_buf[3] = 255; src_y += 1; src_u += 1; @@ -1301,14 +1331,15 @@ void I422ToARGBRow_C(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* rgb_buf, + struct YuvConstants* yuvconstants, int width) { int x; for (x = 0; x < width - 1; x += 2) { YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); + rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); rgb_buf[3] = 255; YuvPixel(src_y[1], src_u[0], src_v[0], - rgb_buf + 4, rgb_buf + 5, rgb_buf + 6); + rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants); rgb_buf[7] = 255; src_y += 2; src_u += 1; @@ -1317,124 +1348,23 @@ void I422ToARGBRow_C(const uint8* src_y, } if (width & 1) { YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); + rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); rgb_buf[3] = 255; } } -void J422ToARGBRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* rgb_buf, - int width) { - int x; - for (x = 0; x < width - 1; x += 2) { - YuvJPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); - rgb_buf[3] = 255; - YuvJPixel(src_y[1], src_u[0], src_v[0], - rgb_buf + 4, rgb_buf + 5, rgb_buf + 6); - rgb_buf[7] = 255; - src_y += 2; - src_u += 1; - src_v += 1; - rgb_buf += 8; // Advance 2 pixels. - } - if (width & 1) { - YuvJPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); - rgb_buf[3] = 255; - } -} - -void J422ToABGRRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* rgb_buf, - int width) { - int x; - for (x = 0; x < width - 1; x += 2) { - YuvJPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 2, rgb_buf + 1, rgb_buf + 0); - rgb_buf[3] = 255; - YuvJPixel(src_y[1], src_u[0], src_v[0], - rgb_buf + 6, rgb_buf + 5, rgb_buf + 4); - rgb_buf[7] = 255; - src_y += 2; - src_u += 1; - src_v += 1; - rgb_buf += 8; // Advance 2 pixels. - } - if (width & 1) { - YuvJPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 2, rgb_buf + 1, rgb_buf + 0); - rgb_buf[3] = 255; - } -} - -// TODO(fbarchard): replace with common matrix function. -void H422ToARGBRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* rgb_buf, - int width) { - int x; - for (x = 0; x < width - 1; x += 2) { - YuvHPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); - rgb_buf[3] = 255; - YuvHPixel(src_y[1], src_u[0], src_v[0], - rgb_buf + 4, rgb_buf + 5, rgb_buf + 6); - rgb_buf[7] = 255; - src_y += 2; - src_u += 1; - src_v += 1; - rgb_buf += 8; // Advance 2 pixels. - } - if (width & 1) { - YuvHPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); - rgb_buf[3] = 255; - } -} - -void H422ToABGRRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* rgb_buf, - int width) { - int x; - for (x = 0; x < width - 1; x += 2) { - YuvHPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 2, rgb_buf + 1, rgb_buf + 0); - rgb_buf[3] = 255; - YuvHPixel(src_y[1], src_u[0], src_v[0], - rgb_buf + 6, rgb_buf + 5, rgb_buf + 4); - rgb_buf[7] = 255; - src_y += 2; - src_u += 1; - src_v += 1; - rgb_buf += 8; // Advance 2 pixels. - } - if (width & 1) { - YuvHPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 2, rgb_buf + 1, rgb_buf + 0); - rgb_buf[3] = 255; - } -} - - void I422ToRGB24Row_C(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* rgb_buf, + struct YuvConstants* yuvconstants, int width) { int x; for (x = 0; x < width - 1; x += 2) { YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); + rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); YuvPixel(src_y[1], src_u[0], src_v[0], - rgb_buf + 3, rgb_buf + 4, rgb_buf + 5); + rgb_buf + 3, rgb_buf + 4, rgb_buf + 5, yuvconstants); src_y += 2; src_u += 1; src_v += 1; @@ -1442,7 +1372,7 @@ void I422ToRGB24Row_C(const uint8* src_y, } if (width & 1) { YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); + rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); } } @@ -1450,13 +1380,14 @@ void I422ToRAWRow_C(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* rgb_buf, + struct YuvConstants* yuvconstants, int width) { int x; for (x = 0; x < width - 1; x += 2) { YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 2, rgb_buf + 1, rgb_buf + 0); + rgb_buf + 2, rgb_buf + 1, rgb_buf + 0, yuvconstants); YuvPixel(src_y[1], src_u[0], src_v[0], - rgb_buf + 5, rgb_buf + 4, rgb_buf + 3); + rgb_buf + 5, rgb_buf + 4, rgb_buf + 3, yuvconstants); src_y += 2; src_u += 1; src_v += 1; @@ -1464,7 +1395,7 @@ void I422ToRAWRow_C(const uint8* src_y, } if (width & 1) { YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 2, rgb_buf + 1, rgb_buf + 0); + rgb_buf + 2, rgb_buf + 1, rgb_buf + 0, yuvconstants); } } @@ -1472,6 +1403,7 @@ void I422ToARGB4444Row_C(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb4444, + struct YuvConstants* yuvconstants, int width) { uint8 b0; uint8 g0; @@ -1481,8 +1413,8 @@ void I422ToARGB4444Row_C(const uint8* src_y, uint8 r1; int x; for (x = 0; x < width - 1; x += 2) { - YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0); - YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1); + YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants); + YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants); b0 = b0 >> 4; g0 = g0 >> 4; r0 = r0 >> 4; @@ -1497,7 +1429,7 @@ void I422ToARGB4444Row_C(const uint8* src_y, dst_argb4444 += 4; // Advance 2 pixels. } if (width & 1) { - YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0); + YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants); b0 = b0 >> 4; g0 = g0 >> 4; r0 = r0 >> 4; @@ -1510,6 +1442,7 @@ void I422ToARGB1555Row_C(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb1555, + struct YuvConstants* yuvconstants, int width) { uint8 b0; uint8 g0; @@ -1519,8 +1452,8 @@ void I422ToARGB1555Row_C(const uint8* src_y, uint8 r1; int x; for (x = 0; x < width - 1; x += 2) { - YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0); - YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1); + YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants); + YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants); b0 = b0 >> 3; g0 = g0 >> 3; r0 = r0 >> 3; @@ -1535,7 +1468,7 @@ void I422ToARGB1555Row_C(const uint8* src_y, dst_argb1555 += 4; // Advance 2 pixels. } if (width & 1) { - YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0); + YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants); b0 = b0 >> 3; g0 = g0 >> 3; r0 = r0 >> 3; @@ -1548,6 +1481,7 @@ void I422ToRGB565Row_C(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_rgb565, + struct YuvConstants* yuvconstants, int width) { uint8 b0; uint8 g0; @@ -1557,8 +1491,8 @@ void I422ToRGB565Row_C(const uint8* src_y, uint8 r1; int x; for (x = 0; x < width - 1; x += 2) { - YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0); - YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1); + YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants); + YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants); b0 = b0 >> 3; g0 = g0 >> 2; r0 = r0 >> 3; @@ -1573,7 +1507,7 @@ void I422ToRGB565Row_C(const uint8* src_y, dst_rgb565 += 4; // Advance 2 pixels. } if (width & 1) { - YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0); + YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants); b0 = b0 >> 3; g0 = g0 >> 2; r0 = r0 >> 3; @@ -1585,20 +1519,21 @@ void I411ToARGBRow_C(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* rgb_buf, + struct YuvConstants* yuvconstants, int width) { int x; for (x = 0; x < width - 3; x += 4) { YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); + rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); rgb_buf[3] = 255; YuvPixel(src_y[1], src_u[0], src_v[0], - rgb_buf + 4, rgb_buf + 5, rgb_buf + 6); + rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants); rgb_buf[7] = 255; YuvPixel(src_y[2], src_u[0], src_v[0], - rgb_buf + 8, rgb_buf + 9, rgb_buf + 10); + rgb_buf + 8, rgb_buf + 9, rgb_buf + 10, yuvconstants); rgb_buf[11] = 255; YuvPixel(src_y[3], src_u[0], src_v[0], - rgb_buf + 12, rgb_buf + 13, rgb_buf + 14); + rgb_buf + 12, rgb_buf + 13, rgb_buf + 14, yuvconstants); rgb_buf[15] = 255; src_y += 4; src_u += 1; @@ -1607,17 +1542,17 @@ void I411ToARGBRow_C(const uint8* src_y, } if (width & 2) { YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); + rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); rgb_buf[3] = 255; YuvPixel(src_y[1], src_u[0], src_v[0], - rgb_buf + 4, rgb_buf + 5, rgb_buf + 6); + rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants); rgb_buf[7] = 255; src_y += 2; rgb_buf += 8; // Advance 2 pixels. } if (width & 1) { YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); + rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); rgb_buf[3] = 255; } } @@ -1625,14 +1560,15 @@ void I411ToARGBRow_C(const uint8* src_y, void NV12ToARGBRow_C(const uint8* src_y, const uint8* src_uv, uint8* rgb_buf, + struct YuvConstants* yuvconstants, int width) { int x; for (x = 0; x < width - 1; x += 2) { YuvPixel(src_y[0], src_uv[0], src_uv[1], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); + rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); rgb_buf[3] = 255; YuvPixel(src_y[1], src_uv[0], src_uv[1], - rgb_buf + 4, rgb_buf + 5, rgb_buf + 6); + rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants); rgb_buf[7] = 255; src_y += 2; src_uv += 2; @@ -1640,32 +1576,7 @@ void NV12ToARGBRow_C(const uint8* src_y, } if (width & 1) { YuvPixel(src_y[0], src_uv[0], src_uv[1], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); - rgb_buf[3] = 255; - } -} - -void NV21ToARGBRow_C(const uint8* src_y, - const uint8* src_vu, - uint8* rgb_buf, - int width) { - int x; - for (x = 0; x < width - 1; x += 2) { - YuvPixel(src_y[0], src_vu[1], src_vu[0], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); - rgb_buf[3] = 255; - - YuvPixel(src_y[1], src_vu[1], src_vu[0], - rgb_buf + 4, rgb_buf + 5, rgb_buf + 6); - rgb_buf[7] = 255; - - src_y += 2; - src_vu += 2; - rgb_buf += 8; // Advance 2 pixels. - } - if (width & 1) { - YuvPixel(src_y[0], src_vu[1], src_vu[0], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); + rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); rgb_buf[3] = 255; } } @@ -1673,6 +1584,7 @@ void NV21ToARGBRow_C(const uint8* src_y, void NV12ToRGB565Row_C(const uint8* src_y, const uint8* src_uv, uint8* dst_rgb565, + struct YuvConstants* yuvconstants, int width) { uint8 b0; uint8 g0; @@ -1682,8 +1594,8 @@ void NV12ToRGB565Row_C(const uint8* src_y, uint8 r1; int x; for (x = 0; x < width - 1; x += 2) { - YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0); - YuvPixel(src_y[1], src_uv[0], src_uv[1], &b1, &g1, &r1); + YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants); + YuvPixel(src_y[1], src_uv[0], src_uv[1], &b1, &g1, &r1, yuvconstants); b0 = b0 >> 3; g0 = g0 >> 2; r0 = r0 >> 3; @@ -1697,42 +1609,7 @@ void NV12ToRGB565Row_C(const uint8* src_y, dst_rgb565 += 4; // Advance 2 pixels. } if (width & 1) { - YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0); - b0 = b0 >> 3; - g0 = g0 >> 2; - r0 = r0 >> 3; - *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11); - } -} - -void NV21ToRGB565Row_C(const uint8* src_y, - const uint8* vsrc_u, - uint8* dst_rgb565, - int width) { - uint8 b0; - uint8 g0; - uint8 r0; - uint8 b1; - uint8 g1; - uint8 r1; - int x; - for (x = 0; x < width - 1; x += 2) { - YuvPixel(src_y[0], vsrc_u[1], vsrc_u[0], &b0, &g0, &r0); - YuvPixel(src_y[1], vsrc_u[1], vsrc_u[0], &b1, &g1, &r1); - b0 = b0 >> 3; - g0 = g0 >> 2; - r0 = r0 >> 3; - b1 = b1 >> 3; - g1 = g1 >> 2; - r1 = r1 >> 3; - *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) | - (b1 << 16) | (g1 << 21) | (r1 << 27); - src_y += 2; - vsrc_u += 2; - dst_rgb565 += 4; // Advance 2 pixels. - } - if (width & 1) { - YuvPixel(src_y[0], vsrc_u[1], vsrc_u[0], &b0, &g0, &r0); + YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants); b0 = b0 >> 3; g0 = g0 >> 2; r0 = r0 >> 3; @@ -1742,42 +1619,44 @@ void NV21ToRGB565Row_C(const uint8* src_y, void YUY2ToARGBRow_C(const uint8* src_yuy2, uint8* rgb_buf, + struct YuvConstants* yuvconstants, int width) { int x; for (x = 0; x < width - 1; x += 2) { YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); + rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); rgb_buf[3] = 255; YuvPixel(src_yuy2[2], src_yuy2[1], src_yuy2[3], - rgb_buf + 4, rgb_buf + 5, rgb_buf + 6); + rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants); rgb_buf[7] = 255; src_yuy2 += 4; rgb_buf += 8; // Advance 2 pixels. } if (width & 1) { YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); + rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); rgb_buf[3] = 255; } } void UYVYToARGBRow_C(const uint8* src_uyvy, uint8* rgb_buf, + struct YuvConstants* yuvconstants, int width) { int x; for (x = 0; x < width - 1; x += 2) { YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); + rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); rgb_buf[3] = 255; YuvPixel(src_uyvy[3], src_uyvy[0], src_uyvy[2], - rgb_buf + 4, rgb_buf + 5, rgb_buf + 6); + rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants); rgb_buf[7] = 255; src_uyvy += 4; rgb_buf += 8; // Advance 2 pixels. } if (width & 1) { YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); + rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); rgb_buf[3] = 255; } } @@ -1786,14 +1665,15 @@ void I422ToBGRARow_C(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* rgb_buf, + struct YuvConstants* yuvconstants, int width) { int x; for (x = 0; x < width - 1; x += 2) { YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 3, rgb_buf + 2, rgb_buf + 1); + rgb_buf + 3, rgb_buf + 2, rgb_buf + 1, yuvconstants); rgb_buf[0] = 255; YuvPixel(src_y[1], src_u[0], src_v[0], - rgb_buf + 7, rgb_buf + 6, rgb_buf + 5); + rgb_buf + 7, rgb_buf + 6, rgb_buf + 5, yuvconstants); rgb_buf[4] = 255; src_y += 2; src_u += 1; @@ -1802,7 +1682,7 @@ void I422ToBGRARow_C(const uint8* src_y, } if (width & 1) { YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 3, rgb_buf + 2, rgb_buf + 1); + rgb_buf + 3, rgb_buf + 2, rgb_buf + 1, yuvconstants); rgb_buf[0] = 255; } } @@ -1811,14 +1691,15 @@ void I422ToABGRRow_C(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* rgb_buf, + struct YuvConstants* yuvconstants, int width) { int x; for (x = 0; x < width - 1; x += 2) { YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 2, rgb_buf + 1, rgb_buf + 0); + rgb_buf + 2, rgb_buf + 1, rgb_buf + 0, yuvconstants); rgb_buf[3] = 255; YuvPixel(src_y[1], src_u[0], src_v[0], - rgb_buf + 6, rgb_buf + 5, rgb_buf + 4); + rgb_buf + 6, rgb_buf + 5, rgb_buf + 4, yuvconstants); rgb_buf[7] = 255; src_y += 2; src_u += 1; @@ -1827,7 +1708,7 @@ void I422ToABGRRow_C(const uint8* src_y, } if (width & 1) { YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 2, rgb_buf + 1, rgb_buf + 0); + rgb_buf + 2, rgb_buf + 1, rgb_buf + 0, yuvconstants); rgb_buf[3] = 255; } } @@ -1836,14 +1717,15 @@ void I422ToRGBARow_C(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* rgb_buf, + struct YuvConstants* yuvconstants, int width) { int x; for (x = 0; x < width - 1; x += 2) { YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 1, rgb_buf + 2, rgb_buf + 3); + rgb_buf + 1, rgb_buf + 2, rgb_buf + 3, yuvconstants); rgb_buf[0] = 255; YuvPixel(src_y[1], src_u[0], src_v[0], - rgb_buf + 5, rgb_buf + 6, rgb_buf + 7); + rgb_buf + 5, rgb_buf + 6, rgb_buf + 7, yuvconstants); rgb_buf[4] = 255; src_y += 2; src_u += 1; @@ -1852,7 +1734,7 @@ void I422ToRGBARow_C(const uint8* src_y, } if (width & 1) { YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 1, rgb_buf + 2, rgb_buf + 3); + rgb_buf + 1, rgb_buf + 2, rgb_buf + 3, yuvconstants); rgb_buf[0] = 255; } } @@ -2401,374 +2283,10 @@ void I422ToUYVYRow_C(const uint8* src_y, } } -#define ANYYUV(NAMEANY, ANY_SIMD, YUVCONSTANTS) \ - void NAMEANY(const uint8* y_buf, \ - const uint8* u_buf, \ - const uint8* v_buf, \ - uint8* dst_argb, \ - int width) { \ - ANY_SIMD(y_buf, u_buf, v_buf, dst_argb, &YUVCONSTANTS, width); \ - } - -#ifdef HAS_I422TOARGBMATRIXROW_NEON -ANYYUV(I422ToARGBRow_NEON, I422ToARGBMatrixRow_NEON, kYuvConstantsNEON) -ANYYUV(J422ToARGBRow_NEON, I422ToARGBMatrixRow_NEON, kYuvJConstantsNEON) -ANYYUV(H422ToARGBRow_NEON, I422ToARGBMatrixRow_NEON, kYuvHConstantsNEON) -#endif -#ifdef HAS_I422TOABGRMATRIXROW_NEON -ANYYUV(I422ToABGRRow_NEON, I422ToABGRMatrixRow_NEON, kYuvConstantsNEON) -ANYYUV(J422ToABGRRow_NEON, I422ToABGRMatrixRow_NEON, kYuvJConstantsNEON) -ANYYUV(H422ToABGRRow_NEON, I422ToABGRMatrixRow_NEON, kYuvHConstantsNEON) -#endif -#ifdef HAS_I422TOARGBMATRIXROW_SSSE3 -ANYYUV(I422ToARGBRow_SSSE3, I422ToARGBMatrixRow_SSSE3, kYuvConstants) -ANYYUV(J422ToARGBRow_SSSE3, I422ToARGBMatrixRow_SSSE3, kYuvJConstants) -ANYYUV(H422ToARGBRow_SSSE3, I422ToARGBMatrixRow_SSSE3, kYuvHConstants) -#endif -#ifdef HAS_I422TOARGBMATRIXROW_AVX2 -ANYYUV(I422ToARGBRow_AVX2, I422ToARGBMatrixRow_AVX2, kYuvConstants) -ANYYUV(J422ToARGBRow_AVX2, I422ToARGBMatrixRow_AVX2, kYuvJConstants) -ANYYUV(H422ToARGBRow_AVX2, I422ToARGBMatrixRow_AVX2, kYuvHConstants) -#endif -#ifdef HAS_I422TOABGRMATRIXROW_SSSE3 -ANYYUV(I422ToABGRRow_SSSE3, I422ToABGRMatrixRow_SSSE3, kYuvConstants) -ANYYUV(J422ToABGRRow_SSSE3, I422ToABGRMatrixRow_SSSE3, kYuvJConstants) -ANYYUV(H422ToABGRRow_SSSE3, I422ToABGRMatrixRow_SSSE3, kYuvHConstants) -#endif -#ifdef HAS_I422TOABGRMATRIXROW_AVX2 -ANYYUV(I422ToABGRRow_AVX2, I422ToABGRMatrixRow_AVX2, kYuvConstants) -ANYYUV(J422ToABGRRow_AVX2, I422ToABGRMatrixRow_AVX2, kYuvJConstants) -ANYYUV(H422ToABGRRow_AVX2, I422ToABGRMatrixRow_AVX2, kYuvHConstants) -#endif -// TODO(fbarchard): Neon, J444, H444 versions. -#ifdef HAS_I444TOARGBMATRIXROW_SSSE3 -ANYYUV(I444ToARGBRow_SSSE3, I444ToARGBMatrixRow_SSSE3, kYuvConstants) -#endif -#ifdef HAS_I444TOARGBMATRIXROW_AVX2 -ANYYUV(I444ToARGBRow_AVX2, I444ToARGBMatrixRow_AVX2, kYuvConstants) -#endif -#ifdef HAS_I444TOABGRMATRIXROW_SSSE3 -ANYYUV(I444ToABGRRow_SSSE3, I444ToABGRMatrixRow_SSSE3, kYuvConstants) -#endif -#ifdef HAS_I444TOABGRMATRIXROW_AVX2 -ANYYUV(I444ToABGRRow_AVX2, I444ToABGRMatrixRow_AVX2, kYuvConstants) -#endif - -// Maximum temporary width for wrappers to process at a time, in pixels. -#define MAXTWIDTH 2048 - -#if !(defined(_MSC_VER) && defined(_M_IX86)) && \ - defined(HAS_I422TORGB565ROW_SSSE3) -// row_win.cc has asm version, but GCC uses 2 step wrapper. -void I422ToRGB565Row_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgb565, - int width) { - SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]); - while (width > 0) { - int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; - I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, twidth); - ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth); - src_y += twidth; - src_u += twidth / 2; - src_v += twidth / 2; - dst_rgb565 += twidth * 2; - width -= twidth; - } -} -#endif - -#if defined(HAS_I422TOARGB1555ROW_SSSE3) -void I422ToARGB1555Row_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb1555, - int width) { - // Row buffer for intermediate ARGB pixels. - SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]); - while (width > 0) { - int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; - I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, twidth); - ARGBToARGB1555Row_SSE2(row, dst_argb1555, twidth); - src_y += twidth; - src_u += twidth / 2; - src_v += twidth / 2; - dst_argb1555 += twidth * 2; - width -= twidth; - } -} -#endif - -#if defined(HAS_I422TOARGB4444ROW_SSSE3) -void I422ToARGB4444Row_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb4444, - int width) { - // Row buffer for intermediate ARGB pixels. - SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]); - while (width > 0) { - int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; - I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, twidth); - ARGBToARGB4444Row_SSE2(row, dst_argb4444, twidth); - src_y += twidth; - src_u += twidth / 2; - src_v += twidth / 2; - dst_argb4444 += twidth * 2; - width -= twidth; - } -} -#endif - -#if defined(HAS_NV12TORGB565ROW_SSSE3) -void NV12ToRGB565Row_SSSE3(const uint8* src_y, const uint8* src_uv, - uint8* dst_rgb565, int width) { - // Row buffer for intermediate ARGB pixels. - SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]); - while (width > 0) { - int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; - NV12ToARGBRow_SSSE3(src_y, src_uv, row, twidth); - ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth); - src_y += twidth; - src_uv += twidth; - dst_rgb565 += twidth * 2; - width -= twidth; - } -} -#endif - -#if defined(HAS_NV21TORGB565ROW_SSSE3) -void NV21ToRGB565Row_SSSE3(const uint8* src_y, const uint8* src_vu, - uint8* dst_rgb565, int width) { - // Row buffer for intermediate ARGB pixels. - SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]); - while (width > 0) { - int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; - NV21ToARGBRow_SSSE3(src_y, src_vu, row, twidth); - ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth); - src_y += twidth; - src_vu += twidth; - dst_rgb565 += twidth * 2; - width -= twidth; - } -} -#endif - -#if defined(HAS_YUY2TOARGBROW_SSSE3) -void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2, uint8* dst_argb, int width) { - // Row buffers for intermediate YUV pixels. - SIMD_ALIGNED(uint8 row_y[MAXTWIDTH]); - SIMD_ALIGNED(uint8 row_u[MAXTWIDTH / 2]); - SIMD_ALIGNED(uint8 row_v[MAXTWIDTH / 2]); - while (width > 0) { - int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; - YUY2ToUV422Row_SSE2(src_yuy2, row_u, row_v, twidth); - YUY2ToYRow_SSE2(src_yuy2, row_y, twidth); - I422ToARGBRow_SSSE3(row_y, row_u, row_v, dst_argb, twidth); - src_yuy2 += twidth * 2; - dst_argb += twidth * 4; - width -= twidth; - } -} -#endif - -#if defined(HAS_UYVYTOARGBROW_SSSE3) -void UYVYToARGBRow_SSSE3(const uint8* src_uyvy, uint8* dst_argb, int width) { - // Row buffers for intermediate YUV pixels. - SIMD_ALIGNED(uint8 row_y[MAXTWIDTH]); - SIMD_ALIGNED(uint8 row_u[MAXTWIDTH / 2]); - SIMD_ALIGNED(uint8 row_v[MAXTWIDTH / 2]); - while (width > 0) { - int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; - UYVYToUV422Row_SSE2(src_uyvy, row_u, row_v, twidth); - UYVYToYRow_SSE2(src_uyvy, row_y, twidth); - I422ToARGBRow_SSSE3(row_y, row_u, row_v, dst_argb, twidth); - src_uyvy += twidth * 2; - dst_argb += twidth * 4; - width -= twidth; - } -} -#endif // !defined(LIBYUV_DISABLE_X86) - -#if defined(HAS_I422TORGB565ROW_AVX2) -void I422ToRGB565Row_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgb565, - int width) { - SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]); - while (width > 0) { - int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; - I422ToARGBRow_AVX2(src_y, src_u, src_v, row, twidth); - ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth); - src_y += twidth; - src_u += twidth / 2; - src_v += twidth / 2; - dst_rgb565 += twidth * 2; - width -= twidth; - } -} -#endif - -#if defined(HAS_I422TOARGB1555ROW_AVX2) -void I422ToARGB1555Row_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb1555, - int width) { - // Row buffer for intermediate ARGB pixels. - SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]); - while (width > 0) { - int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; - I422ToARGBRow_AVX2(src_y, src_u, src_v, row, twidth); - ARGBToARGB1555Row_AVX2(row, dst_argb1555, twidth); - src_y += twidth; - src_u += twidth / 2; - src_v += twidth / 2; - dst_argb1555 += twidth * 2; - width -= twidth; - } -} -#endif - -#if defined(HAS_I422TOARGB4444ROW_AVX2) -void I422ToARGB4444Row_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb4444, - int width) { - // Row buffer for intermediate ARGB pixels. - SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]); - while (width > 0) { - int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; - I422ToARGBRow_AVX2(src_y, src_u, src_v, row, twidth); - ARGBToARGB4444Row_AVX2(row, dst_argb4444, twidth); - src_y += twidth; - src_u += twidth / 2; - src_v += twidth / 2; - dst_argb4444 += twidth * 2; - width -= twidth; - } -} -#endif - -#if defined(HAS_I422TORGB24ROW_AVX2) -void I422ToRGB24Row_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgb24, - int width) { - // Row buffer for intermediate ARGB pixels. - SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]); - while (width > 0) { - int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; - I422ToARGBRow_AVX2(src_y, src_u, src_v, row, twidth); - // TODO(fbarchard): ARGBToRGB24Row_AVX2 - ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth); - src_y += twidth; - src_u += twidth / 2; - src_v += twidth / 2; - dst_rgb24 += twidth * 3; - width -= twidth; - } -} -#endif - -#if defined(HAS_I422TORAWROW_AVX2) -void I422ToRAWRow_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_raw, - int width) { - // Row buffer for intermediate ARGB pixels. - SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]); - while (width > 0) { - int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; - I422ToARGBRow_AVX2(src_y, src_u, src_v, row, twidth); - // TODO(fbarchard): ARGBToRAWRow_AVX2 - ARGBToRAWRow_SSSE3(row, dst_raw, twidth); - src_y += twidth; - src_u += twidth / 2; - src_v += twidth / 2; - dst_raw += twidth * 3; - width -= twidth; - } -} -#endif - -#if defined(HAS_NV12TORGB565ROW_AVX2) -void NV12ToRGB565Row_AVX2(const uint8* src_y, const uint8* src_uv, - uint8* dst_rgb565, int width) { - // Row buffer for intermediate ARGB pixels. - SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]); - while (width > 0) { - int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; - NV12ToARGBRow_AVX2(src_y, src_uv, row, twidth); - ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth); - src_y += twidth; - src_uv += twidth; - dst_rgb565 += twidth * 2; - width -= twidth; - } -} -#endif - -#if defined(HAS_NV21TORGB565ROW_AVX2) -void NV21ToRGB565Row_AVX2(const uint8* src_y, const uint8* src_vu, - uint8* dst_rgb565, int width) { - // Row buffer for intermediate ARGB pixels. - SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]); - while (width > 0) { - int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; - NV21ToARGBRow_AVX2(src_y, src_vu, row, twidth); - ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth); - src_y += twidth; - src_vu += twidth; - dst_rgb565 += twidth * 2; - width -= twidth; - } -} -#endif - -#if defined(HAS_YUY2TOARGBROW_AVX2) -void YUY2ToARGBRow_AVX2(const uint8* src_yuy2, uint8* dst_argb, int width) { - // Row buffers for intermediate YUV pixels. - SIMD_ALIGNED32(uint8 row_y[MAXTWIDTH]); - SIMD_ALIGNED32(uint8 row_u[MAXTWIDTH / 2]); - SIMD_ALIGNED32(uint8 row_v[MAXTWIDTH / 2]); - while (width > 0) { - int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; - YUY2ToUV422Row_AVX2(src_yuy2, row_u, row_v, twidth); - YUY2ToYRow_AVX2(src_yuy2, row_y, twidth); - I422ToARGBRow_AVX2(row_y, row_u, row_v, dst_argb, twidth); - src_yuy2 += twidth * 2; - dst_argb += twidth * 4; - width -= twidth; - } -} -#endif - -#if defined(HAS_UYVYTOARGBROW_AVX2) -void UYVYToARGBRow_AVX2(const uint8* src_uyvy, uint8* dst_argb, int width) { - // Row buffers for intermediate YUV pixels. - SIMD_ALIGNED32(uint8 row_y[MAXTWIDTH]); - SIMD_ALIGNED32(uint8 row_u[MAXTWIDTH / 2]); - SIMD_ALIGNED32(uint8 row_v[MAXTWIDTH / 2]); - while (width > 0) { - int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; - UYVYToUV422Row_AVX2(src_uyvy, row_u, row_v, twidth); - UYVYToYRow_AVX2(src_uyvy, row_y, twidth); - I422ToARGBRow_AVX2(row_y, row_u, row_v, dst_argb, twidth); - src_uyvy += twidth * 2; - dst_argb += twidth * 4; - width -= twidth; - } -} -#endif // !defined(LIBYUV_DISABLE_X86) void ARGBPolynomialRow_C(const uint8* src_argb, - uint8* dst_argb, const float* poly, + uint8* dst_argb, + const float* poly, int width) { int i; for (i = 0; i < width; ++i) { @@ -2868,6 +2386,311 @@ void ARGBCopyYToAlphaRow_C(const uint8* src, uint8* dst, int width) { } } +// Maximum temporary width for wrappers to process at a time, in pixels. +#define MAXTWIDTH 2048 + +#if !(defined(_MSC_VER) && defined(_M_IX86)) && \ + defined(HAS_I422TORGB565ROW_SSSE3) +// row_win.cc has asm version, but GCC uses 2 step wrapper. +void I422ToRGB565Row_SSSE3(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_rgb565, + struct YuvConstants* yuvconstants, + int width) { + SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]); + while (width > 0) { + int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; + I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth); + ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth); + src_y += twidth; + src_u += twidth / 2; + src_v += twidth / 2; + dst_rgb565 += twidth * 2; + width -= twidth; + } +} +#endif + +#if defined(HAS_I422TOARGB1555ROW_SSSE3) +void I422ToARGB1555Row_SSSE3(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb1555, + struct YuvConstants* yuvconstants, + int width) { + // Row buffer for intermediate ARGB pixels. + SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]); + while (width > 0) { + int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; + I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth); + ARGBToARGB1555Row_SSE2(row, dst_argb1555, twidth); + src_y += twidth; + src_u += twidth / 2; + src_v += twidth / 2; + dst_argb1555 += twidth * 2; + width -= twidth; + } +} +#endif + +#if defined(HAS_I422TOARGB4444ROW_SSSE3) +void I422ToARGB4444Row_SSSE3(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb4444, + struct YuvConstants* yuvconstants, + int width) { + // Row buffer for intermediate ARGB pixels. + SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]); + while (width > 0) { + int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; + I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth); + ARGBToARGB4444Row_SSE2(row, dst_argb4444, twidth); + src_y += twidth; + src_u += twidth / 2; + src_v += twidth / 2; + dst_argb4444 += twidth * 2; + width -= twidth; + } +} +#endif + +#if defined(HAS_NV12TORGB565ROW_SSSE3) +void NV12ToRGB565Row_SSSE3(const uint8* src_y, + const uint8* src_uv, + uint8* dst_rgb565, + struct YuvConstants* yuvconstants, + int width) { + // Row buffer for intermediate ARGB pixels. + SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]); + while (width > 0) { + int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; + NV12ToARGBRow_SSSE3(src_y, src_uv, row, yuvconstants, twidth); + ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth); + src_y += twidth; + src_uv += twidth; + dst_rgb565 += twidth * 2; + width -= twidth; + } +} +#endif + +#if defined(HAS_YUY2TOARGBROW_SSSE3) +void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2, + uint8* dst_argb, + struct YuvConstants* yuvconstants, + int width) { + // Row buffers for intermediate YUV pixels. + SIMD_ALIGNED(uint8 row_y[MAXTWIDTH]); + SIMD_ALIGNED(uint8 row_u[MAXTWIDTH / 2]); + SIMD_ALIGNED(uint8 row_v[MAXTWIDTH / 2]); + while (width > 0) { + int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; + YUY2ToUV422Row_SSE2(src_yuy2, row_u, row_v, twidth); + YUY2ToYRow_SSE2(src_yuy2, row_y, twidth); + I422ToARGBRow_SSSE3(row_y, row_u, row_v, dst_argb, yuvconstants, twidth); + src_yuy2 += twidth * 2; + dst_argb += twidth * 4; + width -= twidth; + } +} +#endif + +#if defined(HAS_UYVYTOARGBROW_SSSE3) +void UYVYToARGBRow_SSSE3(const uint8* src_uyvy, + uint8* dst_argb, + struct YuvConstants* yuvconstants, + int width) { + // Row buffers for intermediate YUV pixels. + SIMD_ALIGNED(uint8 row_y[MAXTWIDTH]); + SIMD_ALIGNED(uint8 row_u[MAXTWIDTH / 2]); + SIMD_ALIGNED(uint8 row_v[MAXTWIDTH / 2]); + while (width > 0) { + int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; + UYVYToUV422Row_SSE2(src_uyvy, row_u, row_v, twidth); + UYVYToYRow_SSE2(src_uyvy, row_y, twidth); + I422ToARGBRow_SSSE3(row_y, row_u, row_v, dst_argb, yuvconstants, twidth); + src_uyvy += twidth * 2; + dst_argb += twidth * 4; + width -= twidth; + } +} +#endif // !defined(LIBYUV_DISABLE_X86) + +#if defined(HAS_I422TORGB565ROW_AVX2) +void I422ToRGB565Row_AVX2(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_rgb565, + struct YuvConstants* yuvconstants, + int width) { + SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]); + while (width > 0) { + int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; + I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth); + ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth); + src_y += twidth; + src_u += twidth / 2; + src_v += twidth / 2; + dst_rgb565 += twidth * 2; + width -= twidth; + } +} +#endif + +#if defined(HAS_I422TOARGB1555ROW_AVX2) +void I422ToARGB1555Row_AVX2(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb1555, + struct YuvConstants* yuvconstants, + int width) { + // Row buffer for intermediate ARGB pixels. + SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]); + while (width > 0) { + int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; + I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth); + ARGBToARGB1555Row_AVX2(row, dst_argb1555, twidth); + src_y += twidth; + src_u += twidth / 2; + src_v += twidth / 2; + dst_argb1555 += twidth * 2; + width -= twidth; + } +} +#endif + +#if defined(HAS_I422TOARGB4444ROW_AVX2) +void I422ToARGB4444Row_AVX2(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb4444, + struct YuvConstants* yuvconstants, + int width) { + // Row buffer for intermediate ARGB pixels. + SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]); + while (width > 0) { + int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; + I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth); + ARGBToARGB4444Row_AVX2(row, dst_argb4444, twidth); + src_y += twidth; + src_u += twidth / 2; + src_v += twidth / 2; + dst_argb4444 += twidth * 2; + width -= twidth; + } +} +#endif + +#if defined(HAS_I422TORGB24ROW_AVX2) +void I422ToRGB24Row_AVX2(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_rgb24, + struct YuvConstants* yuvconstants, + int width) { + // Row buffer for intermediate ARGB pixels. + SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]); + while (width > 0) { + int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; + I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth); + // TODO(fbarchard): ARGBToRGB24Row_AVX2 + ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth); + src_y += twidth; + src_u += twidth / 2; + src_v += twidth / 2; + dst_rgb24 += twidth * 3; + width -= twidth; + } +} +#endif + +#if defined(HAS_I422TORAWROW_AVX2) +void I422ToRAWRow_AVX2(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_raw, + struct YuvConstants* yuvconstants, + int width) { + // Row buffer for intermediate ARGB pixels. + SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]); + while (width > 0) { + int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; + I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth); + // TODO(fbarchard): ARGBToRAWRow_AVX2 + ARGBToRAWRow_SSSE3(row, dst_raw, twidth); + src_y += twidth; + src_u += twidth / 2; + src_v += twidth / 2; + dst_raw += twidth * 3; + width -= twidth; + } +} +#endif + +#if defined(HAS_NV12TORGB565ROW_AVX2) +void NV12ToRGB565Row_AVX2(const uint8* src_y, + const uint8* src_uv, + uint8* dst_rgb565, + struct YuvConstants* yuvconstants, + int width) { + // Row buffer for intermediate ARGB pixels. + SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]); + while (width > 0) { + int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; + NV12ToARGBRow_AVX2(src_y, src_uv, row, yuvconstants, twidth); + ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth); + src_y += twidth; + src_uv += twidth; + dst_rgb565 += twidth * 2; + width -= twidth; + } +} +#endif + +#if defined(HAS_YUY2TOARGBROW_AVX2) +void YUY2ToARGBRow_AVX2(const uint8* src_yuy2, + uint8* dst_argb, + struct YuvConstants* yuvconstants, + int width) { + // Row buffers for intermediate YUV pixels. + SIMD_ALIGNED32(uint8 row_y[MAXTWIDTH]); + SIMD_ALIGNED32(uint8 row_u[MAXTWIDTH / 2]); + SIMD_ALIGNED32(uint8 row_v[MAXTWIDTH / 2]); + while (width > 0) { + int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; + YUY2ToUV422Row_AVX2(src_yuy2, row_u, row_v, twidth); + YUY2ToYRow_AVX2(src_yuy2, row_y, twidth); + I422ToARGBRow_AVX2(row_y, row_u, row_v, dst_argb, yuvconstants, twidth); + src_yuy2 += twidth * 2; + dst_argb += twidth * 4; + width -= twidth; + } +} +#endif + +#if defined(HAS_UYVYTOARGBROW_AVX2) +void UYVYToARGBRow_AVX2(const uint8* src_uyvy, + uint8* dst_argb, + struct YuvConstants* yuvconstants, + int width) { + // Row buffers for intermediate YUV pixels. + SIMD_ALIGNED32(uint8 row_y[MAXTWIDTH]); + SIMD_ALIGNED32(uint8 row_u[MAXTWIDTH / 2]); + SIMD_ALIGNED32(uint8 row_v[MAXTWIDTH / 2]); + while (width > 0) { + int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; + UYVYToUV422Row_AVX2(src_uyvy, row_u, row_v, twidth); + UYVYToYRow_AVX2(src_uyvy, row_y, twidth); + I422ToARGBRow_AVX2(row_y, row_u, row_v, dst_argb, yuvconstants, twidth); + src_uyvy += twidth * 2; + dst_argb += twidth * 4; + width -= twidth; + } +} +#endif // !defined(LIBYUV_DISABLE_X86) + #ifdef __cplusplus } // extern "C" } // namespace libyuv diff --git a/source/row_gcc.cc b/source/row_gcc.cc index 949750321..af5ca2b52 100644 --- a/source/row_gcc.cc +++ b/source/row_gcc.cc @@ -1350,23 +1350,23 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0, int src_stride_rgba, "punpcklwd %%xmm0,%%xmm0 \n" // Convert 8 pixels: 8 UV and 8 Y -#define YUVTORGB(YuvConstants) \ +#define YUVTORGB(yuvconstants) \ "movdqa %%xmm0,%%xmm1 \n" \ "movdqa %%xmm0,%%xmm2 \n" \ "movdqa %%xmm0,%%xmm3 \n" \ - "movdqa " MEMACCESS2(96, [YuvConstants]) ",%%xmm0 \n" \ - "pmaddubsw " MEMACCESS([YuvConstants]) ",%%xmm1 \n" \ + "movdqa " MEMACCESS2(96, [yuvconstants]) ",%%xmm0 \n" \ + "pmaddubsw " MEMACCESS([yuvconstants]) ",%%xmm1 \n" \ "psubw %%xmm1,%%xmm0 \n" \ - "movdqa " MEMACCESS2(128, [YuvConstants]) ",%%xmm1 \n" \ - "pmaddubsw " MEMACCESS2(32, [YuvConstants]) ",%%xmm2 \n" \ + "movdqa " MEMACCESS2(128, [yuvconstants]) ",%%xmm1 \n" \ + "pmaddubsw " MEMACCESS2(32, [yuvconstants]) ",%%xmm2 \n" \ "psubw %%xmm2,%%xmm1 \n" \ - "movdqa " MEMACCESS2(160, [YuvConstants]) ",%%xmm2 \n" \ - "pmaddubsw " MEMACCESS2(64, [YuvConstants]) ",%%xmm3 \n" \ + "movdqa " MEMACCESS2(160, [yuvconstants]) ",%%xmm2 \n" \ + "pmaddubsw " MEMACCESS2(64, [yuvconstants]) ",%%xmm3 \n" \ "psubw %%xmm3,%%xmm2 \n" \ "movq " MEMACCESS([y_buf]) ",%%xmm3 \n" \ "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" \ "punpcklbw %%xmm3,%%xmm3 \n" \ - "pmulhuw " MEMACCESS2(192, [YuvConstants]) ",%%xmm3 \n" \ + "pmulhuw " MEMACCESS2(192, [yuvconstants]) ",%%xmm3 \n" \ "paddsw %%xmm3,%%xmm0 \n" \ "paddsw %%xmm3,%%xmm1 \n" \ "paddsw %%xmm3,%%xmm2 \n" \ @@ -1423,19 +1423,19 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0, int src_stride_rgba, "movdqu %%xmm0," MEMACCESS2(0x10, [dst_rgba]) " \n" \ "lea " MEMLEA(0x20, [dst_rgba]) ",%[dst_rgba] \n" -void OMITFP I444ToARGBMatrixRow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, - struct YuvConstants* YuvConstants, - int width) { +void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* dst_argb, + struct YuvConstants* yuvconstants, + int width) { asm volatile ( "sub %[u_buf],%[v_buf] \n" "pcmpeqb %%xmm5,%%xmm5 \n" LABELALIGN "1: \n" READYUV444 - YUVTORGB(YuvConstants) + YUVTORGB(yuvconstants) STOREARGB "sub $0x8,%[width] \n" "jg 1b \n" @@ -1444,25 +1444,25 @@ void OMITFP I444ToARGBMatrixRow_SSSE3(const uint8* y_buf, [v_buf]"+r"(v_buf), // %[v_buf] [dst_argb]"+r"(dst_argb), // %[dst_argb] [width]"+rm"(width) // %[width] - : [YuvConstants]"r"(YuvConstants) // %[kYuvConstants] + : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] : "memory", "cc", NACL_R14 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" ); } -void OMITFP I444ToABGRMatrixRow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_abgr, - struct YuvConstants* YuvConstants, - int width) { +void OMITFP I444ToABGRRow_SSSE3(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* dst_abgr, + struct YuvConstants* yuvconstants, + int width) { asm volatile ( "sub %[u_buf],%[v_buf] \n" "pcmpeqb %%xmm5,%%xmm5 \n" LABELALIGN "1: \n" READYUV444 - YUVTORGB(YuvConstants) + YUVTORGB(yuvconstants) STOREABGR "sub $0x8,%[width] \n" "jg 1b \n" @@ -1471,7 +1471,7 @@ void OMITFP I444ToABGRMatrixRow_SSSE3(const uint8* y_buf, [v_buf]"+r"(v_buf), // %[v_buf] [dst_abgr]"+r"(dst_abgr), // %[dst_abgr] [width]"+rm"(width) // %[width] - : [YuvConstants]"r"(YuvConstants) // %[kYuvConstants] + : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] : "memory", "cc", NACL_R14 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" ); @@ -1482,6 +1482,7 @@ void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, uint8* dst_rgb24, + struct YuvConstants* yuvconstants, int width) { asm volatile ( "movdqa %[kShuffleMaskARGBToRGB24_0],%%xmm5 \n" @@ -1490,7 +1491,7 @@ void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf, LABELALIGN "1: \n" READYUV422 - YUVTORGB(kYuvConstants) + YUVTORGB(yuvconstants) "punpcklbw %%xmm1,%%xmm0 \n" "punpcklbw %%xmm2,%%xmm2 \n" "movdqa %%xmm0,%%xmm1 \n" @@ -1514,7 +1515,7 @@ void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf, #else [width]"+rm"(width) // %[width] #endif - : [kYuvConstants]"r"(&kYuvConstants.kUVToB), + : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] [kShuffleMaskARGBToRGB24_0]"m"(kShuffleMaskARGBToRGB24_0), [kShuffleMaskARGBToRGB24]"m"(kShuffleMaskARGBToRGB24) : "memory", "cc", NACL_R14 @@ -1526,6 +1527,7 @@ void OMITFP I422ToRAWRow_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, uint8* dst_raw, + struct YuvConstants* yuvconstants, int width) { asm volatile ( "movdqa %[kShuffleMaskARGBToRAW_0],%%xmm5 \n" @@ -1534,7 +1536,7 @@ void OMITFP I422ToRAWRow_SSSE3(const uint8* y_buf, LABELALIGN "1: \n" READYUV422 - YUVTORGB(kYuvConstants) + YUVTORGB(yuvconstants) "punpcklbw %%xmm1,%%xmm0 \n" "punpcklbw %%xmm2,%%xmm2 \n" "movdqa %%xmm0,%%xmm1 \n" @@ -1558,7 +1560,7 @@ void OMITFP I422ToRAWRow_SSSE3(const uint8* y_buf, #else [width]"+rm"(width) // %[width] #endif - : [kYuvConstants]"r"(&kYuvConstants.kUVToB), + : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] [kShuffleMaskARGBToRAW_0]"m"(kShuffleMaskARGBToRAW_0), [kShuffleMaskARGBToRAW]"m"(kShuffleMaskARGBToRAW) : "memory", "cc", NACL_R14 @@ -1566,19 +1568,19 @@ void OMITFP I422ToRAWRow_SSSE3(const uint8* y_buf, ); } -void OMITFP I422ToARGBMatrixRow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, - struct YuvConstants* YuvConstants, - int width) { +void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* dst_argb, + struct YuvConstants* yuvconstants, + int width) { asm volatile ( "sub %[u_buf],%[v_buf] \n" "pcmpeqb %%xmm5,%%xmm5 \n" LABELALIGN "1: \n" READYUV422 - YUVTORGB(YuvConstants) + YUVTORGB(yuvconstants) STOREARGB "sub $0x8,%[width] \n" "jg 1b \n" @@ -1587,7 +1589,7 @@ void OMITFP I422ToARGBMatrixRow_SSSE3(const uint8* y_buf, [v_buf]"+r"(v_buf), // %[v_buf] [dst_argb]"+r"(dst_argb), // %[dst_argb] [width]"+rm"(width) // %[width] - : [YuvConstants]"r"(YuvConstants) // %[kYuvConstants] + : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] : "memory", "cc", NACL_R14 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" ); @@ -1597,6 +1599,7 @@ void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width) { asm volatile ( "sub %[u_buf],%[v_buf] \n" @@ -1604,7 +1607,7 @@ void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf, LABELALIGN "1: \n" READYUV411 - YUVTORGB(kYuvConstants) + YUVTORGB(yuvconstants) STOREARGB "sub $0x8,%[width] \n" "jg 1b \n" @@ -1613,7 +1616,7 @@ void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf, [v_buf]"+r"(v_buf), // %[v_buf] [dst_argb]"+r"(dst_argb), // %[dst_argb] [width]"+rm"(width) // %[width] - : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants] + : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] : "memory", "cc", NACL_R14 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" ); @@ -1622,13 +1625,14 @@ void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf, void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf, const uint8* uv_buf, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width) { asm volatile ( "pcmpeqb %%xmm5,%%xmm5 \n" LABELALIGN "1: \n" READNV12 - YUVTORGB(kYuvConstants) + YUVTORGB(yuvconstants) STOREARGB "sub $0x8,%[width] \n" "jg 1b \n" @@ -1636,30 +1640,7 @@ void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf, [uv_buf]"+r"(uv_buf), // %[uv_buf] [dst_argb]"+r"(dst_argb), // %[dst_argb] [width]"+rm"(width) // %[width] - : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants] - // Does not use r14. - : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" - ); -} - -void OMITFP NV21ToARGBRow_SSSE3(const uint8* y_buf, - const uint8* uv_buf, - uint8* dst_argb, - int width) { - asm volatile ( - "pcmpeqb %%xmm5,%%xmm5 \n" - LABELALIGN - "1: \n" - READNV12 - YUVTORGB(kYuvConstants) - STOREARGB - "sub $0x8,%[width] \n" - "jg 1b \n" - : [y_buf]"+r"(y_buf), // %[y_buf] - [uv_buf]"+r"(uv_buf), // %[uv_buf] - [dst_argb]"+r"(dst_argb), // %[dst_argb] - [width]"+rm"(width) // %[width] - : [kYuvConstants]"r"(&kYvuConstants.kUVToB) // %[kYuvConstants] + : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] // Does not use r14. : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" ); @@ -1669,6 +1650,7 @@ void OMITFP I422ToBGRARow_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, uint8* dst_bgra, + struct YuvConstants* yuvconstants, int width) { asm volatile ( "sub %[u_buf],%[v_buf] \n" @@ -1676,7 +1658,7 @@ void OMITFP I422ToBGRARow_SSSE3(const uint8* y_buf, LABELALIGN "1: \n" READYUV422 - YUVTORGB(kYuvConstants) + YUVTORGB(yuvconstants) STOREBGRA "sub $0x8,%[width] \n" "jg 1b \n" @@ -1685,25 +1667,25 @@ void OMITFP I422ToBGRARow_SSSE3(const uint8* y_buf, [v_buf]"+r"(v_buf), // %[v_buf] [dst_bgra]"+r"(dst_bgra), // %[dst_bgra] [width]"+rm"(width) // %[width] - : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants] + : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] : "memory", "cc", NACL_R14 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" ); } -void OMITFP I422ToABGRMatrixRow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_abgr, - struct YuvConstants* YuvConstants, - int width) { +void OMITFP I422ToABGRRow_SSSE3(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* dst_abgr, + struct YuvConstants* yuvconstants, + int width) { asm volatile ( "sub %[u_buf],%[v_buf] \n" "pcmpeqb %%xmm5,%%xmm5 \n" LABELALIGN "1: \n" READYUV422 - YUVTORGB(kYuvConstants) + YUVTORGB(yuvconstants) STOREABGR "sub $0x8,%[width] \n" "jg 1b \n" @@ -1712,7 +1694,7 @@ void OMITFP I422ToABGRMatrixRow_SSSE3(const uint8* y_buf, [v_buf]"+r"(v_buf), // %[v_buf] [dst_abgr]"+r"(dst_abgr), // %[dst_abgr] [width]"+rm"(width) // %[width] - : [kYuvConstants]"r"(YuvConstants) // %[kYuvConstants] + : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] : "memory", "cc", NACL_R14 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" ); @@ -1722,6 +1704,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, uint8* dst_rgba, + struct YuvConstants* yuvconstants, int width) { asm volatile ( "sub %[u_buf],%[v_buf] \n" @@ -1729,7 +1712,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf, LABELALIGN "1: \n" READYUV422 - YUVTORGB(kYuvConstants) + YUVTORGB(yuvconstants) STORERGBA "sub $0x8,%[width] \n" "jg 1b \n" @@ -1738,7 +1721,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf, [v_buf]"+r"(v_buf), // %[v_buf] [dst_rgba]"+r"(dst_rgba), // %[dst_rgba] [width]"+rm"(width) // %[width] - : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants] + : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] : "memory", "cc", NACL_R14 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" ); @@ -1788,6 +1771,7 @@ void OMITFP I422ToBGRARow_AVX2(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, uint8* dst_bgra, + struct YuvConstants* yuvconstants, int width) { asm volatile ( "sub %[u_buf],%[v_buf] \n" @@ -1795,7 +1779,7 @@ void OMITFP I422ToBGRARow_AVX2(const uint8* y_buf, LABELALIGN "1: \n" READYUV422_AVX2 - YUVTORGB_AVX2(kYuvConstants) + YUVTORGB_AVX2(yuvconstants) // Step 3: Weave into BGRA "vpunpcklbw %%ymm0,%%ymm1,%%ymm1 \n" // GB @@ -1816,29 +1800,29 @@ void OMITFP I422ToBGRARow_AVX2(const uint8* y_buf, [v_buf]"+r"(v_buf), // %[v_buf] [dst_bgra]"+r"(dst_bgra), // %[dst_bgra] [width]"+rm"(width) // %[width] - : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants] + : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] : "memory", "cc", NACL_R14 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" ); } #endif // HAS_I422TOBGRAROW_AVX2 -#if defined(HAS_I422TOARGBMATRIXROW_AVX2) +#if defined(HAS_I422TOARGBROW_AVX2) // 16 pixels // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). -void OMITFP I422ToARGBMatrixRow_AVX2(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, - struct YuvConstants* YuvConstants, - int width) { +void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* dst_argb, + struct YuvConstants* yuvconstants, + int width) { asm volatile ( "sub %[u_buf],%[v_buf] \n" "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" LABELALIGN "1: \n" READYUV422_AVX2 - YUVTORGB_AVX2(kYuvConstants) + YUVTORGB_AVX2(yuvconstants) // Step 3: Weave into ARGB "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" // BG @@ -1859,29 +1843,29 @@ void OMITFP I422ToARGBMatrixRow_AVX2(const uint8* y_buf, [v_buf]"+r"(v_buf), // %[v_buf] [dst_argb]"+r"(dst_argb), // %[dst_argb] [width]"+rm"(width) // %[width] - : [kYuvConstants]"r"(YuvConstants) // %[kYuvConstants] + : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] : "memory", "cc", NACL_R14 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" ); } -#endif // HAS_I422TOARGBMATRIXROW_AVX2 +#endif // HAS_I422TOARGBROW_AVX2 #if defined(HAS_I422TOABGRROW_AVX2) // 16 pixels // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes). -void OMITFP I422ToABGRMatrixRow_AVX2(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, - struct YuvConstants* YuvConstants, - int width) { +void OMITFP I422ToABGRRow_AVX2(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* dst_argb, + struct YuvConstants* yuvconstants, + int width) { asm volatile ( "sub %[u_buf],%[v_buf] \n" "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" LABELALIGN "1: \n" READYUV422_AVX2 - YUVTORGB_AVX2(kYuvConstants) + YUVTORGB_AVX2(yuvconstants) // Step 3: Weave into ABGR "vpunpcklbw %%ymm1,%%ymm2,%%ymm1 \n" // RG @@ -1901,7 +1885,7 @@ void OMITFP I422ToABGRMatrixRow_AVX2(const uint8* y_buf, [v_buf]"+r"(v_buf), // %[v_buf] [dst_argb]"+r"(dst_argb), // %[dst_argb] [width]"+rm"(width) // %[width] - : [kYuvConstants]"r"(YuvConstants) // %[kYuvConstants] + : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] : "memory", "cc", NACL_R14 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" ); @@ -1915,6 +1899,7 @@ void OMITFP I422ToRGBARow_AVX2(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width) { asm volatile ( "sub %[u_buf],%[v_buf] \n" @@ -1922,7 +1907,7 @@ void OMITFP I422ToRGBARow_AVX2(const uint8* y_buf, LABELALIGN "1: \n" READYUV422_AVX2 - YUVTORGB_AVX2(kYuvConstants) + YUVTORGB_AVX2(yuvconstants) // Step 3: Weave into RGBA "vpunpcklbw %%ymm2,%%ymm1,%%ymm1 \n" @@ -1942,7 +1927,7 @@ void OMITFP I422ToRGBARow_AVX2(const uint8* y_buf, [v_buf]"+r"(v_buf), // %[v_buf] [dst_argb]"+r"(dst_argb), // %[dst_argb] [width]"+rm"(width) // %[width] - : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants] + : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] : "memory", "cc", NACL_R14 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" ); diff --git a/source/row_mips.cc b/source/row_mips.cc index 1183c7183..0720110b6 100644 --- a/source/row_mips.cc +++ b/source/row_mips.cc @@ -593,7 +593,7 @@ void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, // t8 = | 0 | G1 | 0 | g1 | // t2 = | 0 | R0 | 0 | r0 | // t1 = | 0 | R1 | 0 | r1 | -#define I422ToTransientMipsRGB \ +#define YUVTORGB \ "lw $t0, 0(%[y_buf]) \n" \ "lhu $t1, 0(%[u_buf]) \n" \ "lhu $t2, 0(%[v_buf]) \n" \ @@ -652,10 +652,12 @@ void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, "addu.ph $t2, $t2, $s5 \n" \ "addu.ph $t1, $t1, $s5 \n" +// TODO(fbarchard): accept yuv conversion constants. void I422ToARGBRow_MIPS_DSPR2(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, uint8* rgb_buf, + struct YuvConstants* yuvconstants, int width) { __asm__ __volatile__ ( ".set push \n" @@ -671,7 +673,7 @@ void I422ToARGBRow_MIPS_DSPR2(const uint8* y_buf, "ori $s6, 0xff00 \n" // |ff|00|ff|00|ff| "1: \n" - I422ToTransientMipsRGB + YUVTORGB // Arranging into argb format "precr.qb.ph $t4, $t8, $t4 \n" // |G1|g1|B1|b1| "precr.qb.ph $t5, $t9, $t5 \n" // |G0|g0|B0|b0| @@ -717,6 +719,7 @@ void I422ToABGRRow_MIPS_DSPR2(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, uint8* rgb_buf, + struct YuvConstants* yuvconstants, int width) { __asm__ __volatile__ ( ".set push \n" @@ -732,7 +735,7 @@ void I422ToABGRRow_MIPS_DSPR2(const uint8* y_buf, "ori $s6, 0xff00 \n" // |ff|00|ff|00| "1: \n" - I422ToTransientMipsRGB + YUVTORGB // Arranging into abgr format "precr.qb.ph $t0, $t8, $t1 \n" // |G1|g1|R1|r1| "precr.qb.ph $t3, $t9, $t2 \n" // |G0|g0|R0|r0| @@ -778,6 +781,7 @@ void I422ToBGRARow_MIPS_DSPR2(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, uint8* rgb_buf, + struct YuvConstants* yuvconstants, int width) { __asm__ __volatile__ ( ".set push \n" @@ -793,7 +797,7 @@ void I422ToBGRARow_MIPS_DSPR2(const uint8* y_buf, "ori $s6, 0xff \n" // |00|ff|00|ff| "1: \n" - I422ToTransientMipsRGB + YUVTORGB // Arranging into bgra format "precr.qb.ph $t4, $t4, $t8 \n" // |B1|b1|G1|g1| "precr.qb.ph $t5, $t5, $t9 \n" // |B0|b0|G0|g0| diff --git a/source/row_neon.cc b/source/row_neon.cc index f7e6ba1dc..9052ed043 100644 --- a/source/row_neon.cc +++ b/source/row_neon.cc @@ -93,7 +93,7 @@ extern "C" { "vuzp.u8 d2, d3 \n" \ "vtrn.u32 d2, d3 \n" -#define YUV422TORGB_SETUP_REG \ +#define YUVTORGB_SETUP \ MEMACCESS([kUVToRB]) \ "vld1.8 {d24}, [%[kUVToRB]] \n" \ MEMACCESS([kUVToG]) \ @@ -107,7 +107,7 @@ extern "C" { MEMACCESS([kYToRgb]) \ "vld1.32 {d30[], d31[]}, [%[kYToRgb]] \n" -#define YUV422TORGB \ +#define YUVTORGB \ "vmull.u8 q8, d2, d24 \n" /* u/v B/R component */\ "vmull.u8 q9, d2, d25 \n" /* u/v G component */\ "vmovl.u8 q0, d0 \n" /* Y */\ @@ -138,12 +138,13 @@ void I444ToARGBRow_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width) { asm volatile ( - YUV422TORGB_SETUP_REG + YUVTORGB_SETUP "1: \n" READYUV444 - YUV422TORGB + YUVTORGB "subs %4, %4, #8 \n" "vmov.u8 d23, #255 \n" MEMACCESS(3) @@ -154,26 +155,26 @@ void I444ToARGBRow_NEON(const uint8* src_y, "+r"(src_v), // %2 "+r"(dst_argb), // %3 "+r"(width) // %4 - : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %5 - [kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %6 - [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), - [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) + : [kUVToRB]"r"(&yuvconstants->kUVToRB), + [kUVToG]"r"(&yuvconstants->kUVToG), + [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), + [kYToRgb]"r"(&yuvconstants->kYToRgb) : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" ); } -void I422ToARGBMatrixRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - struct YuvConstantsNEON* YuvConstants, - int width) { +void I422ToARGBRow_NEON(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, + struct YuvConstants* yuvconstants, + int width) { asm volatile ( - YUV422TORGB_SETUP_REG + YUVTORGB_SETUP "1: \n" READYUV422 - YUV422TORGB + YUVTORGB "subs %4, %4, #8 \n" "vmov.u8 d23, #255 \n" MEMACCESS(3) @@ -184,10 +185,10 @@ void I422ToARGBMatrixRow_NEON(const uint8* src_y, "+r"(src_v), // %2 "+r"(dst_argb), // %3 "+r"(width) // %4 - : [kUVToRB]"r"(&YuvConstants->kUVToRB), // %5 - [kUVToG]"r"(&YuvConstants->kUVToG), // %6 - [kUVBiasBGR]"r"(&YuvConstants->kUVBiasBGR), - [kYToRgb]"r"(&YuvConstants->kYToRgb) + : [kUVToRB]"r"(&yuvconstants->kUVToRB), + [kUVToG]"r"(&yuvconstants->kUVToG), + [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), + [kYToRgb]"r"(&yuvconstants->kYToRgb) : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" ); @@ -197,12 +198,13 @@ void I411ToARGBRow_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width) { asm volatile ( - YUV422TORGB_SETUP_REG + YUVTORGB_SETUP "1: \n" READYUV411 - YUV422TORGB + YUVTORGB "subs %4, %4, #8 \n" "vmov.u8 d23, #255 \n" MEMACCESS(3) @@ -213,10 +215,10 @@ void I411ToARGBRow_NEON(const uint8* src_y, "+r"(src_v), // %2 "+r"(dst_argb), // %3 "+r"(width) // %4 - : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %5 - [kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %6 - [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), - [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) + : [kUVToRB]"r"(&yuvconstants->kUVToRB), + [kUVToG]"r"(&yuvconstants->kUVToG), + [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), + [kYToRgb]"r"(&yuvconstants->kYToRgb) : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" ); @@ -226,12 +228,13 @@ void I422ToBGRARow_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_bgra, + struct YuvConstants* yuvconstants, int width) { asm volatile ( - YUV422TORGB_SETUP_REG + YUVTORGB_SETUP "1: \n" READYUV422 - YUV422TORGB + YUVTORGB "subs %4, %4, #8 \n" "vswp.u8 d20, d22 \n" "vmov.u8 d19, #255 \n" @@ -243,26 +246,26 @@ void I422ToBGRARow_NEON(const uint8* src_y, "+r"(src_v), // %2 "+r"(dst_bgra), // %3 "+r"(width) // %4 - : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %5 - [kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %6 - [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), - [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) + : [kUVToRB]"r"(&yuvconstants->kUVToRB), + [kUVToG]"r"(&yuvconstants->kUVToG), + [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), + [kYToRgb]"r"(&yuvconstants->kYToRgb) : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" ); } -void I422ToABGRMatrixRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_abgr, - struct YuvConstantsNEON* YuvConstants, - int width) { +void I422ToABGRRow_NEON(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_abgr, + struct YuvConstants* yuvconstants, + int width) { asm volatile ( - YUV422TORGB_SETUP_REG + YUVTORGB_SETUP "1: \n" READYUV422 - YUV422TORGB + YUVTORGB "subs %4, %4, #8 \n" "vswp.u8 d20, d22 \n" "vmov.u8 d23, #255 \n" @@ -274,10 +277,10 @@ void I422ToABGRMatrixRow_NEON(const uint8* src_y, "+r"(src_v), // %2 "+r"(dst_abgr), // %3 "+r"(width) // %4 - : [kUVToRB]"r"(&YuvConstants->kUVToRB), // %5 - [kUVToG]"r"(&YuvConstants->kUVToG), // %6 - [kUVBiasBGR]"r"(&YuvConstants->kUVBiasBGR), - [kYToRgb]"r"(&YuvConstants->kYToRgb) + : [kUVToRB]"r"(&yuvconstants->kUVToRB), + [kUVToG]"r"(&yuvconstants->kUVToG), + [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), + [kYToRgb]"r"(&yuvconstants->kYToRgb) : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" ); @@ -287,12 +290,13 @@ void I422ToRGBARow_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_rgba, + struct YuvConstants* yuvconstants, int width) { asm volatile ( - YUV422TORGB_SETUP_REG + YUVTORGB_SETUP "1: \n" READYUV422 - YUV422TORGB + YUVTORGB "subs %4, %4, #8 \n" "vmov.u8 d19, #255 \n" MEMACCESS(3) @@ -303,10 +307,10 @@ void I422ToRGBARow_NEON(const uint8* src_y, "+r"(src_v), // %2 "+r"(dst_rgba), // %3 "+r"(width) // %4 - : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %5 - [kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %6 - [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), - [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) + : [kUVToRB]"r"(&yuvconstants->kUVToRB), + [kUVToG]"r"(&yuvconstants->kUVToG), + [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), + [kYToRgb]"r"(&yuvconstants->kYToRgb) : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" ); @@ -316,12 +320,13 @@ void I422ToRGB24Row_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_rgb24, + struct YuvConstants* yuvconstants, int width) { asm volatile ( - YUV422TORGB_SETUP_REG + YUVTORGB_SETUP "1: \n" READYUV422 - YUV422TORGB + YUVTORGB "subs %4, %4, #8 \n" MEMACCESS(3) "vst3.8 {d20, d21, d22}, [%3]! \n" @@ -331,10 +336,10 @@ void I422ToRGB24Row_NEON(const uint8* src_y, "+r"(src_v), // %2 "+r"(dst_rgb24), // %3 "+r"(width) // %4 - : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %5 - [kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %6 - [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), - [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) + : [kUVToRB]"r"(&yuvconstants->kUVToRB), + [kUVToG]"r"(&yuvconstants->kUVToG), + [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), + [kYToRgb]"r"(&yuvconstants->kYToRgb) : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" ); @@ -344,12 +349,13 @@ void I422ToRAWRow_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_raw, + struct YuvConstants* yuvconstants, int width) { asm volatile ( - YUV422TORGB_SETUP_REG + YUVTORGB_SETUP "1: \n" READYUV422 - YUV422TORGB + YUVTORGB "subs %4, %4, #8 \n" "vswp.u8 d20, d22 \n" MEMACCESS(3) @@ -360,10 +366,10 @@ void I422ToRAWRow_NEON(const uint8* src_y, "+r"(src_v), // %2 "+r"(dst_raw), // %3 "+r"(width) // %4 - : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %5 - [kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %6 - [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), - [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) + : [kUVToRB]"r"(&yuvconstants->kUVToRB), + [kUVToG]"r"(&yuvconstants->kUVToG), + [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), + [kYToRgb]"r"(&yuvconstants->kYToRgb) : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" ); @@ -385,12 +391,13 @@ void I422ToRGB565Row_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_rgb565, + struct YuvConstants* yuvconstants, int width) { asm volatile ( - YUV422TORGB_SETUP_REG + YUVTORGB_SETUP "1: \n" READYUV422 - YUV422TORGB + YUVTORGB "subs %4, %4, #8 \n" ARGBTORGB565 MEMACCESS(3) @@ -401,10 +408,10 @@ void I422ToRGB565Row_NEON(const uint8* src_y, "+r"(src_v), // %2 "+r"(dst_rgb565), // %3 "+r"(width) // %4 - : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %5 - [kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %6 - [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), - [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) + : [kUVToRB]"r"(&yuvconstants->kUVToRB), + [kUVToG]"r"(&yuvconstants->kUVToG), + [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), + [kYToRgb]"r"(&yuvconstants->kYToRgb) : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" ); @@ -429,12 +436,13 @@ void I422ToARGB1555Row_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb1555, + struct YuvConstants* yuvconstants, int width) { asm volatile ( - YUV422TORGB_SETUP_REG + YUVTORGB_SETUP "1: \n" READYUV422 - YUV422TORGB + YUVTORGB "subs %4, %4, #8 \n" "vmov.u8 d23, #255 \n" ARGBTOARGB1555 @@ -446,10 +454,10 @@ void I422ToARGB1555Row_NEON(const uint8* src_y, "+r"(src_v), // %2 "+r"(dst_argb1555), // %3 "+r"(width) // %4 - : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %5 - [kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %6 - [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), - [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) + : [kUVToRB]"r"(&yuvconstants->kUVToRB), + [kUVToG]"r"(&yuvconstants->kUVToG), + [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), + [kYToRgb]"r"(&yuvconstants->kYToRgb) : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" ); @@ -468,13 +476,14 @@ void I422ToARGB4444Row_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb4444, + struct YuvConstants* yuvconstants, int width) { asm volatile ( - YUV422TORGB_SETUP_REG + YUVTORGB_SETUP "vmov.u8 d4, #0x0f \n" // bits to clear with vbic. "1: \n" READYUV422 - YUV422TORGB + YUVTORGB "subs %4, %4, #8 \n" "vmov.u8 d23, #255 \n" ARGBTOARGB4444 @@ -486,10 +495,10 @@ void I422ToARGB4444Row_NEON(const uint8* src_y, "+r"(src_v), // %2 "+r"(dst_argb4444), // %3 "+r"(width) // %4 - : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %5 - [kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %6 - [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), - [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) + : [kUVToRB]"r"(&yuvconstants->kUVToRB), + [kUVToG]"r"(&yuvconstants->kUVToG), + [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), + [kYToRgb]"r"(&yuvconstants->kYToRgb) : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" ); @@ -499,10 +508,10 @@ void I400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int width) { asm volatile ( - YUV422TORGB_SETUP_REG + YUVTORGB_SETUP "1: \n" READYUV400 - YUV422TORGB + YUVTORGB "subs %2, %2, #8 \n" "vmov.u8 d23, #255 \n" MEMACCESS(1) @@ -511,10 +520,10 @@ void I400ToARGBRow_NEON(const uint8* src_y, : "+r"(src_y), // %0 "+r"(dst_argb), // %1 "+r"(width) // %2 - : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %3 - [kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %4 - [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), - [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) + : [kUVToRB]"r"(&kYuvConstants.kUVToRB), + [kUVToG]"r"(&kYuvConstants.kUVToG), + [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR), + [kYToRgb]"r"(&kYuvConstants.kYToRgb) : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" ); @@ -545,12 +554,13 @@ void J400ToARGBRow_NEON(const uint8* src_y, void NV12ToARGBRow_NEON(const uint8* src_y, const uint8* src_uv, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width) { asm volatile ( - YUV422TORGB_SETUP_REG + YUVTORGB_SETUP "1: \n" READNV12 - YUV422TORGB + YUVTORGB "subs %3, %3, #8 \n" "vmov.u8 d23, #255 \n" MEMACCESS(2) @@ -560,37 +570,10 @@ void NV12ToARGBRow_NEON(const uint8* src_y, "+r"(src_uv), // %1 "+r"(dst_argb), // %2 "+r"(width) // %3 - : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %4 - [kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %5 - [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), - [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) - : "cc", "memory", "q0", "q1", "q2", "q3", "q4", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); -} - -void NV21ToARGBRow_NEON(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, - int width) { - asm volatile ( - YUV422TORGB_SETUP_REG - "1: \n" - READNV21 - YUV422TORGB - "subs %3, %3, #8 \n" - "vmov.u8 d23, #255 \n" - MEMACCESS(2) - "vst4.8 {d20, d21, d22, d23}, [%2]! \n" - "bgt 1b \n" - : "+r"(src_y), // %0 - "+r"(src_uv), // %1 - "+r"(dst_argb), // %2 - "+r"(width) // %3 - : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %4 - [kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %5 - [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), - [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) + : [kUVToRB]"r"(&yuvconstants->kUVToRB), + [kUVToG]"r"(&yuvconstants->kUVToG), + [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), + [kYToRgb]"r"(&yuvconstants->kYToRgb) : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" ); @@ -599,12 +582,13 @@ void NV21ToARGBRow_NEON(const uint8* src_y, void NV12ToRGB565Row_NEON(const uint8* src_y, const uint8* src_uv, uint8* dst_rgb565, + struct YuvConstants* yuvconstants, int width) { asm volatile ( - YUV422TORGB_SETUP_REG + YUVTORGB_SETUP "1: \n" READNV12 - YUV422TORGB + YUVTORGB "subs %3, %3, #8 \n" ARGBTORGB565 MEMACCESS(2) @@ -614,37 +598,10 @@ void NV12ToRGB565Row_NEON(const uint8* src_y, "+r"(src_uv), // %1 "+r"(dst_rgb565), // %2 "+r"(width) // %3 - : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %4 - [kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %5 - [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), - [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) - : "cc", "memory", "q0", "q1", "q2", "q3", "q4", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); -} - -void NV21ToRGB565Row_NEON(const uint8* src_y, - const uint8* src_uv, - uint8* dst_rgb565, - int width) { - asm volatile ( - YUV422TORGB_SETUP_REG - "1: \n" - READNV21 - YUV422TORGB - "subs %3, %3, #8 \n" - ARGBTORGB565 - MEMACCESS(2) - "vst1.8 {q0}, [%2]! \n" // store 8 pixels RGB565. - "bgt 1b \n" - : "+r"(src_y), // %0 - "+r"(src_uv), // %1 - "+r"(dst_rgb565), // %2 - "+r"(width) // %3 - : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %4 - [kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %5 - [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), - [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) + : [kUVToRB]"r"(&yuvconstants->kUVToRB), + [kUVToG]"r"(&yuvconstants->kUVToG), + [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), + [kYToRgb]"r"(&yuvconstants->kYToRgb) : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" ); @@ -652,12 +609,13 @@ void NV21ToRGB565Row_NEON(const uint8* src_y, void YUY2ToARGBRow_NEON(const uint8* src_yuy2, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width) { asm volatile ( - YUV422TORGB_SETUP_REG + YUVTORGB_SETUP "1: \n" READYUY2 - YUV422TORGB + YUVTORGB "subs %2, %2, #8 \n" "vmov.u8 d23, #255 \n" MEMACCESS(1) @@ -666,10 +624,10 @@ void YUY2ToARGBRow_NEON(const uint8* src_yuy2, : "+r"(src_yuy2), // %0 "+r"(dst_argb), // %1 "+r"(width) // %2 - : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %3 - [kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %4 - [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), - [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) + : [kUVToRB]"r"(&yuvconstants->kUVToRB), + [kUVToG]"r"(&yuvconstants->kUVToG), + [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), + [kYToRgb]"r"(&yuvconstants->kYToRgb) : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" ); @@ -677,12 +635,13 @@ void YUY2ToARGBRow_NEON(const uint8* src_yuy2, void UYVYToARGBRow_NEON(const uint8* src_uyvy, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width) { asm volatile ( - YUV422TORGB_SETUP_REG + YUVTORGB_SETUP "1: \n" READUYVY - YUV422TORGB + YUVTORGB "subs %2, %2, #8 \n" "vmov.u8 d23, #255 \n" MEMACCESS(1) @@ -691,10 +650,10 @@ void UYVYToARGBRow_NEON(const uint8* src_uyvy, : "+r"(src_uyvy), // %0 "+r"(dst_argb), // %1 "+r"(width) // %2 - : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %3 - [kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %4 - [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), - [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) + : [kUVToRB]"r"(&yuvconstants->kUVToRB), + [kUVToG]"r"(&yuvconstants->kUVToG), + [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), + [kYToRgb]"r"(&yuvconstants->kYToRgb) : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" ); diff --git a/source/row_neon64.cc b/source/row_neon64.cc index 668baef77..ae7b32cbf 100644 --- a/source/row_neon64.cc +++ b/source/row_neon64.cc @@ -91,7 +91,8 @@ extern "C" { "uzp2 v3.8b, v2.8b, v2.8b \n" \ "ins v1.s[1], v3.s[0] \n" -#define YUV422TORGB_SETUP_REG \ +// TODO(fbarchard): replace movi with constants from struct. +#define YUVTORGB_SETUP \ "ld1r {v24.8h}, [%[kUVBiasBGR]], #2 \n" \ "ld1r {v25.8h}, [%[kUVBiasBGR]], #2 \n" \ "ld1r {v26.8h}, [%[kUVBiasBGR]] \n" \ @@ -101,7 +102,7 @@ extern "C" { "movi v29.8h, #25 \n" \ "movi v30.8h, #52 \n" -#define YUV422TORGB(vR, vG, vB) \ +#define YUVTORGB(vR, vG, vB) \ "uxtl v0.8h, v0.8b \n" /* Extract Y */ \ "shll v2.8h, v1.8b, #8 \n" /* Replicate UV */ \ "ushll2 v3.4s, v0.8h, #0 \n" /* Y */ \ @@ -143,12 +144,13 @@ void I444ToARGBRow_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width) { asm volatile ( - YUV422TORGB_SETUP_REG + YUVTORGB_SETUP "1: \n" READYUV444 - YUV422TORGB(v22, v21, v20) + YUVTORGB(v22, v21, v20) "subs %w4, %w4, #8 \n" "movi v23.8b, #255 \n" /* A */ MEMACCESS(3) @@ -159,8 +161,8 @@ void I444ToARGBRow_NEON(const uint8* src_y, "+r"(src_v), // %2 "+r"(dst_argb), // %3 "+r"(width) // %4 - : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), - [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) + : [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), + [kYToRgb]"r"(&yuvconstants->kYToRgb) : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); @@ -173,12 +175,13 @@ void I422ToARGBRow_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width) { asm volatile ( - YUV422TORGB_SETUP_REG + YUVTORGB_SETUP "1: \n" READYUV422 - YUV422TORGB(v22, v21, v20) + YUVTORGB(v22, v21, v20) "subs %w4, %w4, #8 \n" "movi v23.8b, #255 \n" /* A */ MEMACCESS(3) @@ -189,8 +192,8 @@ void I422ToARGBRow_NEON(const uint8* src_y, "+r"(src_v), // %2 "+r"(dst_argb), // %3 "+r"(width) // %4 - : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), - [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) + : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR), + [kYToRgb]"r"(&kYuvConstants.kYToRgb) : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); @@ -202,12 +205,13 @@ void I411ToARGBRow_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width) { asm volatile ( - YUV422TORGB_SETUP_REG + YUVTORGB_SETUP "1: \n" READYUV411 - YUV422TORGB(v22, v21, v20) + YUVTORGB(v22, v21, v20) "subs %w4, %w4, #8 \n" "movi v23.8b, #255 \n" /* A */ MEMACCESS(3) @@ -218,8 +222,8 @@ void I411ToARGBRow_NEON(const uint8* src_y, "+r"(src_v), // %2 "+r"(dst_argb), // %3 "+r"(width) // %4 - : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), - [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) + : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR), + [kYToRgb]"r"(&kYuvConstants.kYToRgb) : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); @@ -231,12 +235,13 @@ void I422ToBGRARow_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_bgra, + struct YuvConstants* yuvconstants, int width) { asm volatile ( - YUV422TORGB_SETUP_REG + YUVTORGB_SETUP "1: \n" READYUV422 - YUV422TORGB(v21, v22, v23) + YUVTORGB(v21, v22, v23) "subs %w4, %w4, #8 \n" "movi v20.8b, #255 \n" /* A */ MEMACCESS(3) @@ -247,8 +252,8 @@ void I422ToBGRARow_NEON(const uint8* src_y, "+r"(src_v), // %2 "+r"(dst_bgra), // %3 "+r"(width) // %4 - : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), - [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) + : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR), + [kYToRgb]"r"(&kYuvConstants.kYToRgb) : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); @@ -261,12 +266,13 @@ void I422ToABGRRow_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_abgr, + struct YuvConstants* yuvconstants, int width) { asm volatile ( - YUV422TORGB_SETUP_REG + YUVTORGB_SETUP "1: \n" READYUV422 - YUV422TORGB(v20, v21, v22) + YUVTORGB(v20, v21, v22) "subs %w4, %w4, #8 \n" "movi v23.8b, #255 \n" /* A */ MEMACCESS(3) @@ -277,8 +283,8 @@ void I422ToABGRRow_NEON(const uint8* src_y, "+r"(src_v), // %2 "+r"(dst_abgr), // %3 "+r"(width) // %4 - : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), - [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) + : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR), + [kYToRgb]"r"(&kYuvConstants.kYToRgb) : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); @@ -290,12 +296,13 @@ void I422ToRGBARow_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_rgba, + struct YuvConstants* yuvconstants, int width) { asm volatile ( - YUV422TORGB_SETUP_REG + YUVTORGB_SETUP "1: \n" READYUV422 - YUV422TORGB(v23, v22, v21) + YUVTORGB(v23, v22, v21) "subs %w4, %w4, #8 \n" "movi v20.8b, #255 \n" /* A */ MEMACCESS(3) @@ -306,8 +313,8 @@ void I422ToRGBARow_NEON(const uint8* src_y, "+r"(src_v), // %2 "+r"(dst_rgba), // %3 "+r"(width) // %4 - : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), - [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) + : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR), + [kYToRgb]"r"(&kYuvConstants.kYToRgb) : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); @@ -319,12 +326,13 @@ void I422ToRGB24Row_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_rgb24, + struct YuvConstants* yuvconstants, int width) { asm volatile ( - YUV422TORGB_SETUP_REG + YUVTORGB_SETUP "1: \n" READYUV422 - YUV422TORGB(v22, v21, v20) + YUVTORGB(v22, v21, v20) "subs %w4, %w4, #8 \n" MEMACCESS(3) "st3 {v20.8b,v21.8b,v22.8b}, [%3], #24 \n" @@ -334,8 +342,8 @@ void I422ToRGB24Row_NEON(const uint8* src_y, "+r"(src_v), // %2 "+r"(dst_rgb24), // %3 "+r"(width) // %4 - : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), - [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) + : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR), + [kYToRgb]"r"(&kYuvConstants.kYToRgb) : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); @@ -347,12 +355,13 @@ void I422ToRAWRow_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_raw, + struct YuvConstants* yuvconstants, int width) { asm volatile ( - YUV422TORGB_SETUP_REG + YUVTORGB_SETUP "1: \n" READYUV422 - YUV422TORGB(v20, v21, v22) + YUVTORGB(v20, v21, v22) "subs %w4, %w4, #8 \n" MEMACCESS(3) "st3 {v20.8b,v21.8b,v22.8b}, [%3], #24 \n" @@ -362,8 +371,8 @@ void I422ToRAWRow_NEON(const uint8* src_y, "+r"(src_v), // %2 "+r"(dst_raw), // %3 "+r"(width) // %4 - : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), - [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) + : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR), + [kYToRgb]"r"(&kYuvConstants.kYToRgb) : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); @@ -382,12 +391,13 @@ void I422ToRGB565Row_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_rgb565, + struct YuvConstants* yuvconstants, int width) { asm volatile ( - YUV422TORGB_SETUP_REG + YUVTORGB_SETUP "1: \n" READYUV422 - YUV422TORGB(v22, v21, v20) + YUVTORGB(v22, v21, v20) "subs %w4, %w4, #8 \n" ARGBTORGB565 MEMACCESS(3) @@ -398,8 +408,8 @@ void I422ToRGB565Row_NEON(const uint8* src_y, "+r"(src_v), // %2 "+r"(dst_rgb565), // %3 "+r"(width) // %4 - : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), - [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) + : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR), + [kYToRgb]"r"(&kYuvConstants.kYToRgb) : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); @@ -420,12 +430,13 @@ void I422ToARGB1555Row_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb1555, + struct YuvConstants* yuvconstants, int width) { asm volatile ( - YUV422TORGB_SETUP_REG + YUVTORGB_SETUP "1: \n" READYUV422 - YUV422TORGB(v22, v21, v20) + YUVTORGB(v22, v21, v20) "subs %w4, %w4, #8 \n" "movi v23.8b, #255 \n" ARGBTOARGB1555 @@ -437,8 +448,8 @@ void I422ToARGB1555Row_NEON(const uint8* src_y, "+r"(src_v), // %2 "+r"(dst_argb1555), // %3 "+r"(width) // %4 - : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), - [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) + : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR), + [kYToRgb]"r"(&kYuvConstants.kYToRgb) : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); @@ -460,13 +471,14 @@ void I422ToARGB4444Row_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb4444, + struct YuvConstants* yuvconstants, int width) { asm volatile ( - YUV422TORGB_SETUP_REG + YUVTORGB_SETUP "movi v4.16b, #0x0f \n" // bits to clear with vbic. "1: \n" READYUV422 - YUV422TORGB(v22, v21, v20) + YUVTORGB(v22, v21, v20) "subs %w4, %w4, #8 \n" "movi v23.8b, #255 \n" ARGBTOARGB4444 @@ -478,8 +490,8 @@ void I422ToARGB4444Row_NEON(const uint8* src_y, "+r"(src_v), // %2 "+r"(dst_argb4444), // %3 "+r"(width) // %4 - : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), - [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) + : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR), + [kYToRgb]"r"(&kYuvConstants.kYToRgb) : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); @@ -492,10 +504,10 @@ void I400ToARGBRow_NEON(const uint8* src_y, int width) { int64 width64 = (int64)(width); asm volatile ( - YUV422TORGB_SETUP_REG + YUVTORGB_SETUP "1: \n" READYUV400 - YUV422TORGB(v22, v21, v20) + YUVTORGB(v22, v21, v20) "subs %w2, %w2, #8 \n" "movi v23.8b, #255 \n" MEMACCESS(1) @@ -504,8 +516,8 @@ void I400ToARGBRow_NEON(const uint8* src_y, : "+r"(src_y), // %0 "+r"(dst_argb), // %1 "+r"(width64) // %2 - : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), - [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) + : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR), + [kYToRgb]"r"(&kYuvConstants.kYToRgb) : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); @@ -540,12 +552,13 @@ void J400ToARGBRow_NEON(const uint8* src_y, void NV12ToARGBRow_NEON(const uint8* src_y, const uint8* src_uv, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width) { asm volatile ( - YUV422TORGB_SETUP_REG + YUVTORGB_SETUP "1: \n" READNV12 - YUV422TORGB(v22, v21, v20) + YUVTORGB(v22, v21, v20) "subs %w3, %w3, #8 \n" "movi v23.8b, #255 \n" MEMACCESS(2) @@ -555,51 +568,25 @@ void NV12ToARGBRow_NEON(const uint8* src_y, "+r"(src_uv), // %1 "+r"(dst_argb), // %2 "+r"(width) // %3 - : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), - [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) + : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR), + [kYToRgb]"r"(&kYuvConstants.kYToRgb) : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); } #endif // HAS_NV12TOARGBROW_NEON -#ifdef HAS_NV21TOARGBROW_NEON -void NV21ToARGBRow_NEON(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, - int width) { - asm volatile ( - YUV422TORGB_SETUP_REG - "1: \n" - READNV21 - YUV422TORGB(v22, v21, v20) - "subs %w3, %w3, #8 \n" - "movi v23.8b, #255 \n" - MEMACCESS(2) - "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%2], #32 \n" - "b.gt 1b \n" - : "+r"(src_y), // %0 - "+r"(src_uv), // %1 - "+r"(dst_argb), // %2 - "+r"(width) // %3 - : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), - [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", - "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" - ); -} -#endif // HAS_NV21TOARGBROW_NEON - #ifdef HAS_NV12TORGB565ROW_NEON void NV12ToRGB565Row_NEON(const uint8* src_y, const uint8* src_uv, uint8* dst_rgb565, + struct YuvConstants* yuvconstants, int width) { asm volatile ( - YUV422TORGB_SETUP_REG + YUVTORGB_SETUP "1: \n" READNV12 - YUV422TORGB(v22, v21, v20) + YUVTORGB(v22, v21, v20) "subs %w3, %w3, #8 \n" ARGBTORGB565 MEMACCESS(2) @@ -609,51 +596,25 @@ void NV12ToRGB565Row_NEON(const uint8* src_y, "+r"(src_uv), // %1 "+r"(dst_rgb565), // %2 "+r"(width) // %3 - : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), - [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) + : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR), + [kYToRgb]"r"(&kYuvConstants.kYToRgb) : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); } #endif // HAS_NV12TORGB565ROW_NEON -#ifdef HAS_NV21TORGB565ROW_NEON -void NV21ToRGB565Row_NEON(const uint8* src_y, - const uint8* src_uv, - uint8* dst_rgb565, - int width) { - asm volatile ( - YUV422TORGB_SETUP_REG - "1: \n" - READNV21 - YUV422TORGB(v22, v21, v20) - "subs %w3, %w3, #8 \n" - ARGBTORGB565 - MEMACCESS(2) - "st1 {v0.8h}, [%2], 16 \n" // store 8 pixels RGB565. - "b.gt 1b \n" - : "+r"(src_y), // %0 - "+r"(src_uv), // %1 - "+r"(dst_rgb565), // %2 - "+r"(width) // %3 - : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), - [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", - "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" - ); -} -#endif // HAS_NV21TORGB565ROW_NEON - #ifdef HAS_YUY2TOARGBROW_NEON void YUY2ToARGBRow_NEON(const uint8* src_yuy2, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width) { int64 width64 = (int64)(width); asm volatile ( - YUV422TORGB_SETUP_REG + YUVTORGB_SETUP "1: \n" READYUY2 - YUV422TORGB(v22, v21, v20) + YUVTORGB(v22, v21, v20) "subs %w2, %w2, #8 \n" "movi v23.8b, #255 \n" MEMACCESS(1) @@ -662,8 +623,8 @@ void YUY2ToARGBRow_NEON(const uint8* src_yuy2, : "+r"(src_yuy2), // %0 "+r"(dst_argb), // %1 "+r"(width64) // %2 - : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), - [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) + : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR), + [kYToRgb]"r"(&kYuvConstants.kYToRgb) : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); @@ -673,13 +634,14 @@ void YUY2ToARGBRow_NEON(const uint8* src_yuy2, #ifdef HAS_UYVYTOARGBROW_NEON void UYVYToARGBRow_NEON(const uint8* src_uyvy, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width) { int64 width64 = (int64)(width); asm volatile ( - YUV422TORGB_SETUP_REG + YUVTORGB_SETUP "1: \n" READUYVY - YUV422TORGB(v22, v21, v20) + YUVTORGB(v22, v21, v20) "subs %w2, %w2, #8 \n" "movi v23.8b, #255 \n" MEMACCESS(1) @@ -688,8 +650,8 @@ void UYVYToARGBRow_NEON(const uint8* src_uyvy, : "+r"(src_uyvy), // %0 "+r"(dst_argb), // %1 "+r"(width64) // %2 - : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), - [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) + : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR), + [kYToRgb]"r"(&kYuvConstants.kYToRgb) : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); diff --git a/source/row_win.cc b/source/row_win.cc index f7a310520..f09d2a75b 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -83,13 +83,13 @@ extern "C" { dst_argb += 32; -#if defined(HAS_I422TOARGBMATRIXROW_SSSE3) -void I422ToARGBMatrixRow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, - struct YuvConstants* YuvConstants, - int width) { +#if defined(HAS_I422TOARGBROW_SSSE3) +void I422ToARGBRow_SSSE3(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* dst_argb, + struct YuvConstants* yuvconstants, + int width) { __m128i xmm0, xmm1, xmm2, xmm3; const __m128i xmm5 = _mm_set1_epi8(-1); const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; @@ -102,13 +102,13 @@ void I422ToARGBMatrixRow_SSSE3(const uint8* y_buf, } #endif -#if defined(HAS_I422TOABGRMATRIXROW_SSSE3) -void I422ToABGRMatrixRow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, - struct YuvConstants* YuvConstants, - int width) { +#if defined(HAS_I422TOABGRROW_SSSE3) +void I422ToABGRRow_SSSE3(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* dst_argb, + struct YuvConstants* yuvconstants, + int width) { __m128i xmm0, xmm1, xmm2, xmm3; const __m128i xmm5 = _mm_set1_epi8(-1); const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; @@ -1963,16 +1963,16 @@ void RGBAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, __asm lea edx, [edx + 64] \ } -#ifdef HAS_I422TOARGBMATRIXROW_AVX2 +#ifdef HAS_I422TOARGBROW_AVX2 // 16 pixels // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). __declspec(naked) -void I422ToARGBMatrixRow_AVX2(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, - struct YuvConstants* YuvConstants, - int width) { +void I422ToARGBRow_AVX2(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* dst_argb, + struct YuvConstants* yuvconstants, + int width) { __asm { push esi push edi @@ -2001,18 +2001,18 @@ void I422ToARGBMatrixRow_AVX2(const uint8* y_buf, ret } } -#endif // HAS_I422TOARGBMATRIXROW_AVX2 +#endif // HAS_I422TOARGBROW_AVX2 -#ifdef HAS_I444TOARGBMATRIXROW_AVX2 +#ifdef HAS_I444TOARGBROW_AVX2 // 16 pixels // 16 UV values with 16 Y producing 16 ARGB (64 bytes). __declspec(naked) -void I444ToARGBMatrixRow_AVX2(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, - struct YuvConstants* YuvConstants, - int width) { +void I444ToARGBRow_AVX2(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* dst_argb, + struct YuvConstants* yuvconstants, + int width) { __asm { push esi push edi @@ -2040,18 +2040,18 @@ void I444ToARGBMatrixRow_AVX2(const uint8* y_buf, ret } } -#endif // HAS_I444TOARGBMATRIXROW_AVX2 +#endif // HAS_I444TOARGBROW_AVX2 -#ifdef HAS_I444TOABGRMATRIXROW_AVX2 +#ifdef HAS_I444TOABGRROW_AVX2 // 16 pixels // 16 UV values with 16 Y producing 16 ABGR (64 bytes). __declspec(naked) -void I444ToABGRMatrixRow_AVX2(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_abgr, - struct YuvConstants* YuvConstants, - int width) { +void I444ToABGRRow_AVX2(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* dst_abgr, + struct YuvConstants* yuvconstants, + int width) { __asm { push esi push edi @@ -2079,7 +2079,7 @@ void I444ToABGRMatrixRow_AVX2(const uint8* y_buf, ret } } -#endif // HAS_I444TOABGRMATRIXROW_AVX2 +#endif // HAS_I444TOABGRROW_AVX2 #ifdef HAS_I411TOARGBROW_AVX2 // 16 pixels @@ -2089,26 +2089,30 @@ void I411ToARGBRow_AVX2(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width) { __asm { push esi push edi - mov eax, [esp + 8 + 4] // Y - mov esi, [esp + 8 + 8] // U - mov edi, [esp + 8 + 12] // V - mov edx, [esp + 8 + 16] // argb - mov ecx, [esp + 8 + 20] // width + push ebp + mov eax, [esp + 12 + 4] // Y + mov esi, [esp + 12 + 8] // U + mov edi, [esp + 12 + 12] // V + mov edx, [esp + 12 + 16] // abgr + mov ebp, [esp + 12 + 20] // YuvConstants + mov ecx, [esp + 12 + 24] // width sub edi, esi vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha convertloop: READYUV411_AVX2 - YUVTORGB_AVX2(kYuvConstants) + YUVTORGB_AVX2(ebp) STOREARGB_AVX2 sub ecx, 16 jg convertloop + pop ebp pop edi pop esi vzeroupper @@ -2124,23 +2128,27 @@ __declspec(naked) void NV12ToARGBRow_AVX2(const uint8* y_buf, const uint8* uv_buf, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width) { __asm { push esi - mov eax, [esp + 4 + 4] // Y - mov esi, [esp + 4 + 8] // UV - mov edx, [esp + 4 + 12] // argb - mov ecx, [esp + 4 + 16] // width + push ebp + mov eax, [esp + 8 + 4] // Y + mov esi, [esp + 8 + 8] // UV + mov edx, [esp + 8 + 12] // argb + mov ebp, [esp + 8 + 16] // YuvConstants + mov ecx, [esp + 8 + 20] // width vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha convertloop: READNV12_AVX2 - YUVTORGB_AVX2(kYuvConstants) + YUVTORGB_AVX2(ebp) STOREARGB_AVX2 sub ecx, 16 jg convertloop + pop ebp pop esi vzeroupper ret @@ -2148,37 +2156,6 @@ void NV12ToARGBRow_AVX2(const uint8* y_buf, } #endif // HAS_NV12TOARGBROW_AVX2 -#ifdef HAS_NV21TOARGBROW_AVX2 -// 16 pixels. -// 8 VU values upsampled to 16 VU, mixed with 16 Y producing 16 ARGB (64 bytes). -__declspec(naked) -void NV21ToARGBRow_AVX2(const uint8* y_buf, - const uint8* uv_buf, - uint8* dst_argb, - int width) { - __asm { - push esi - mov eax, [esp + 4 + 4] // Y - mov esi, [esp + 4 + 8] // UV - mov edx, [esp + 4 + 12] // argb - mov ecx, [esp + 4 + 16] // width - vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha - - convertloop: - READNV12_AVX2 - YUVTORGB_AVX2(kYvuConstants) - STOREARGB_AVX2 - - sub ecx, 16 - jg convertloop - - pop esi - vzeroupper - ret - } -} -#endif // HAS_NV21TOARGBROW_AVX2 - #ifdef HAS_I422TOBGRAROW_AVX2 // 16 pixels // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 BGRA (64 bytes). @@ -2188,26 +2165,30 @@ void I422ToBGRARow_AVX2(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width) { __asm { push esi push edi - mov eax, [esp + 8 + 4] // Y - mov esi, [esp + 8 + 8] // U - mov edi, [esp + 8 + 12] // V - mov edx, [esp + 8 + 16] // argb - mov ecx, [esp + 8 + 20] // width + push ebp + mov eax, [esp + 12 + 4] // Y + mov esi, [esp + 12 + 8] // U + mov edi, [esp + 12 + 12] // V + mov edx, [esp + 12 + 16] // abgr + mov ebp, [esp + 12 + 20] // YuvConstants + mov ecx, [esp + 12 + 24] // width sub edi, esi vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha convertloop: READYUV422_AVX2 - YUVTORGB_AVX2(kYuvConstants) + YUVTORGB_AVX2(ebp) STOREBGRA_AVX2 sub ecx, 16 jg convertloop + pop ebp pop edi pop esi vzeroupper @@ -2224,26 +2205,30 @@ void I422ToRGBARow_AVX2(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width) { __asm { push esi push edi - mov eax, [esp + 8 + 4] // Y - mov esi, [esp + 8 + 8] // U - mov edi, [esp + 8 + 12] // V - mov edx, [esp + 8 + 16] // argb - mov ecx, [esp + 8 + 20] // width + push ebp + mov eax, [esp + 12 + 4] // Y + mov esi, [esp + 12 + 8] // U + mov edi, [esp + 12 + 12] // V + mov edx, [esp + 12 + 16] // abgr + mov ebp, [esp + 12 + 20] // YuvConstants + mov ecx, [esp + 12 + 24] // width sub edi, esi vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha convertloop: READYUV422_AVX2 - YUVTORGB_AVX2(kYuvConstants) + YUVTORGB_AVX2(ebp) STORERGBA_AVX2 sub ecx, 16 jg convertloop + pop ebp pop edi pop esi vzeroupper @@ -2256,12 +2241,12 @@ void I422ToRGBARow_AVX2(const uint8* y_buf, // 16 pixels // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes). __declspec(naked) -void I422ToABGRMatrixRow_AVX2(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, - struct YuvConstants* YuvConstants, - int width) { +void I422ToABGRRow_AVX2(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* dst_argb, + struct YuvConstants* yuvconstants, + int width) { __asm { push esi push edi @@ -2481,12 +2466,12 @@ void I422ToABGRMatrixRow_AVX2(const uint8* y_buf, // 8 pixels. // 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes). __declspec(naked) -void I444ToARGBMatrixRow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, - struct YuvConstants* YuvConstants, - int width) { +void I444ToARGBRow_SSSE3(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* dst_argb, + struct YuvConstants* yuvconstants, + int width) { __asm { push esi push edi @@ -2518,12 +2503,12 @@ void I444ToARGBMatrixRow_SSSE3(const uint8* y_buf, // 8 pixels. // 8 UV values, mixed with 8 Y producing 8 ABGR (32 bytes). __declspec(naked) -void I444ToABGRMatrixRow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_abgr, - struct YuvConstants* YuvConstants, - int width) { +void I444ToABGRRow_SSSE3(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* dst_abgr, + struct YuvConstants* yuvconstants, + int width) { __asm { push esi push edi @@ -2559,27 +2544,31 @@ void I422ToRGB24Row_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, uint8* dst_rgb24, + struct YuvConstants* yuvconstants, int width) { __asm { push esi push edi - mov eax, [esp + 8 + 4] // Y - mov esi, [esp + 8 + 8] // U - mov edi, [esp + 8 + 12] // V - mov edx, [esp + 8 + 16] // rgb24 - mov ecx, [esp + 8 + 20] // width + push ebp + mov eax, [esp + 12 + 4] // Y + mov esi, [esp + 12 + 8] // U + mov edi, [esp + 12 + 12] // V + mov edx, [esp + 12 + 16] // argb + mov ebp, [esp + 12 + 20] // YuvConstants + mov ecx, [esp + 12 + 24] // width sub edi, esi movdqa xmm5, xmmword ptr kShuffleMaskARGBToRGB24_0 movdqa xmm6, xmmword ptr kShuffleMaskARGBToRGB24 convertloop: READYUV422 - YUVTORGB(kYuvConstants) + YUVTORGB(ebp) STORERGB24 sub ecx, 8 jg convertloop + pop ebp pop edi pop esi ret @@ -2593,27 +2582,31 @@ void I422ToRAWRow_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, uint8* dst_raw, + struct YuvConstants* yuvconstants, int width) { __asm { push esi push edi - mov eax, [esp + 8 + 4] // Y - mov esi, [esp + 8 + 8] // U - mov edi, [esp + 8 + 12] // V - mov edx, [esp + 8 + 16] // raw - mov ecx, [esp + 8 + 20] // width + push ebp + mov eax, [esp + 12 + 4] // Y + mov esi, [esp + 12 + 8] // U + mov edi, [esp + 12 + 12] // V + mov edx, [esp + 12 + 16] // argb + mov ebp, [esp + 12 + 20] // YuvConstants + mov ecx, [esp + 12 + 24] // width sub edi, esi movdqa xmm5, xmmword ptr kShuffleMaskARGBToRAW_0 movdqa xmm6, xmmword ptr kShuffleMaskARGBToRAW convertloop: READYUV422 - YUVTORGB(kYuvConstants) + YUVTORGB(ebp) STORERAW sub ecx, 8 jg convertloop + pop ebp pop edi pop esi ret @@ -2627,15 +2620,18 @@ void I422ToRGB565Row_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, uint8* rgb565_buf, + struct YuvConstants* yuvconstants, int width) { __asm { push esi push edi - mov eax, [esp + 8 + 4] // Y - mov esi, [esp + 8 + 8] // U - mov edi, [esp + 8 + 12] // V - mov edx, [esp + 8 + 16] // rgb565 - mov ecx, [esp + 8 + 20] // width + push ebp + mov eax, [esp + 12 + 4] // Y + mov esi, [esp + 12 + 8] // U + mov edi, [esp + 12 + 12] // V + mov edx, [esp + 12 + 16] // argb + mov ebp, [esp + 12 + 20] // YuvConstants + mov ecx, [esp + 12 + 24] // width sub edi, esi pcmpeqb xmm5, xmm5 // generate mask 0x0000001f psrld xmm5, 27 @@ -2647,12 +2643,13 @@ void I422ToRGB565Row_SSSE3(const uint8* y_buf, convertloop: READYUV422 - YUVTORGB(kYuvConstants) + YUVTORGB(ebp) STORERGB565 sub ecx, 8 jg convertloop + pop ebp pop edi pop esi ret @@ -2662,12 +2659,12 @@ void I422ToRGB565Row_SSSE3(const uint8* y_buf, // 8 pixels. // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). __declspec(naked) -void I422ToARGBMatrixRow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, - struct YuvConstants* YuvConstants, - int width) { +void I422ToARGBRow_SSSE3(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* dst_argb, + struct YuvConstants* yuvconstants, + int width) { __asm { push esi push edi @@ -2704,30 +2701,32 @@ void I411ToARGBRow_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width) { __asm { - push ebx push esi push edi + push ebp mov eax, [esp + 12 + 4] // Y mov esi, [esp + 12 + 8] // U mov edi, [esp + 12 + 12] // V - mov edx, [esp + 12 + 16] // argb - mov ecx, [esp + 12 + 20] // width + mov edx, [esp + 12 + 16] // abgr + mov ebp, [esp + 12 + 20] // YuvConstants + mov ecx, [esp + 12 + 24] // width sub edi, esi pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha convertloop: - READYUV411 // modifies EBX - YUVTORGB(kYuvConstants) + READYUV411 + YUVTORGB(ebp) STOREARGB sub ecx, 8 jg convertloop + pop ebp pop edi pop esi - pop ebx ret } } @@ -2738,51 +2737,27 @@ __declspec(naked) void NV12ToARGBRow_SSSE3(const uint8* y_buf, const uint8* uv_buf, uint8* dst_argb, + struct YuvConstants* yuvconstants, int width) { __asm { push esi - mov eax, [esp + 4 + 4] // Y - mov esi, [esp + 4 + 8] // UV - mov edx, [esp + 4 + 12] // argb - mov ecx, [esp + 4 + 16] // width + push ebp + mov eax, [esp + 8 + 4] // Y + mov esi, [esp + 8 + 8] // UV + mov edx, [esp + 8 + 12] // argb + mov ebp, [esp + 8 + 16] // YuvConstants + mov ecx, [esp + 8 + 20] // width pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha convertloop: READNV12 - YUVTORGB(kYuvConstants) - STOREARGB - - sub ecx, 8 - jg convertloop - - pop esi - ret - } -} - -// 8 pixels. -// 4 VU values upsampled to 8 VU, mixed with 8 Y producing 8 ARGB (32 bytes). -__declspec(naked) -void NV21ToARGBRow_SSSE3(const uint8* y_buf, - const uint8* uv_buf, - uint8* dst_argb, - int width) { - __asm { - push esi - mov eax, [esp + 4 + 4] // Y - mov esi, [esp + 4 + 8] // UV - mov edx, [esp + 4 + 12] // argb - mov ecx, [esp + 4 + 16] // width - pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha - - convertloop: - READNV12 - YUVTORGB(kYvuConstants) + YUVTORGB(ebp) STOREARGB sub ecx, 8 jg convertloop + pop ebp pop esi ret } @@ -2793,25 +2768,29 @@ void I422ToBGRARow_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, uint8* dst_bgra, + struct YuvConstants* yuvconstants, int width) { __asm { push esi push edi - mov eax, [esp + 8 + 4] // Y - mov esi, [esp + 8 + 8] // U - mov edi, [esp + 8 + 12] // V - mov edx, [esp + 8 + 16] // bgra - mov ecx, [esp + 8 + 20] // width + push ebp + mov eax, [esp + 12 + 4] // Y + mov esi, [esp + 12 + 8] // U + mov edi, [esp + 12 + 12] // V + mov edx, [esp + 12 + 16] // argb + mov ebp, [esp + 12 + 20] // YuvConstants + mov ecx, [esp + 12 + 24] // width sub edi, esi convertloop: READYUV422 - YUVTORGB(kYuvConstants) + YUVTORGB(ebp) STOREBGRA sub ecx, 8 jg convertloop + pop ebp pop edi pop esi ret @@ -2819,12 +2798,12 @@ void I422ToBGRARow_SSSE3(const uint8* y_buf, } __declspec(naked) -void I422ToABGRMatrixRow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_abgr, - struct YuvConstants* YuvConstants, - int width) { +void I422ToABGRRow_SSSE3(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* dst_abgr, + struct YuvConstants* yuvconstants, + int width) { __asm { push esi push edi @@ -2858,31 +2837,34 @@ void I422ToRGBARow_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, uint8* dst_rgba, + struct YuvConstants* yuvconstants, int width) { __asm { push esi push edi - mov eax, [esp + 8 + 4] // Y - mov esi, [esp + 8 + 8] // U - mov edi, [esp + 8 + 12] // V - mov edx, [esp + 8 + 16] // rgba - mov ecx, [esp + 8 + 20] // width + push ebp + mov eax, [esp + 12 + 4] // Y + mov esi, [esp + 12 + 8] // U + mov edi, [esp + 12 + 12] // V + mov edx, [esp + 12 + 16] // argb + mov ebp, [esp + 12 + 20] // YuvConstants + mov ecx, [esp + 12 + 24] // width sub edi, esi convertloop: READYUV422 - YUVTORGB(kYuvConstants) + YUVTORGB(ebp) STORERGBA sub ecx, 8 jg convertloop + pop ebp pop edi pop esi ret } } - #endif // HAS_I422TOARGBROW_SSSE3 #ifdef HAS_I400TOARGBROW_SSE2