From bdf7cb591452611090922e690d5104a7d8c6b1e5 Mon Sep 17 00:00:00 2001 From: "fbarchard@google.com" Date: Mon, 5 Nov 2012 23:40:11 +0000 Subject: [PATCH] RGB formats converted to YUV with Neon BUG=none TEST=convert_test Review URL: https://webrtc-codereview.appspot.com/936013 git-svn-id: http://libyuv.googlecode.com/svn/trunk@471 16f28f9a-4ce2-e073-06de-1de4eb20be90 --- README.chromium | 2 +- include/libyuv/convert.h | 64 +- include/libyuv/convert_from_argb.h | 87 ++- include/libyuv/row.h | 1058 +++++++++++++++------------- include/libyuv/scale.h | 4 +- include/libyuv/version.h | 2 +- source/convert.cc | 441 +++++++----- source/convert_argb.cc | 55 +- source/convert_from.cc | 94 +-- source/convert_from_argb.cc | 586 +++++++++++++-- source/format_conversion.cc | 16 +- source/planar_functions.cc | 2 +- source/rotate.cc | 12 +- source/row_any.cc | 67 +- source/row_common.cc | 529 ++++++++------ source/row_mips.cc | 6 +- source/row_neon.cc | 228 +++++- source/row_posix.cc | 156 +++- source/row_win.cc | 156 +++- unit_test/convert_test.cc | 242 ++++--- 20 files changed, 2528 insertions(+), 1279 deletions(-) diff --git a/README.chromium b/README.chromium index 1d368327b..0b383abbe 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 470 +Version: 471 License: BSD License File: LICENSE diff --git a/include/libyuv/convert.h b/include/libyuv/convert.h index e07bfd199..da1a7e6e9 100644 --- a/include/libyuv/convert.h +++ b/include/libyuv/convert.h @@ -22,22 +22,9 @@ namespace libyuv { extern "C" { #endif -// Alias. -#define I420ToI420 I420Copy - -// Copy I420 to I420. +// Convert I444 to I420. LIBYUV_API -int I420Copy(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// Convert I422 to I420. -LIBYUV_API -int I422ToI420(const uint8* src_y, int src_stride_y, +int I444ToI420(const uint8* src_y, int src_stride_y, const uint8* src_u, int src_stride_u, const uint8* src_v, int src_stride_v, uint8* dst_y, int dst_stride_y, @@ -45,9 +32,9 @@ int I422ToI420(const uint8* src_y, int src_stride_y, uint8* dst_v, int dst_stride_v, int width, int height); -// Convert I444 to I420. +// Convert I422 to I420. LIBYUV_API -int I444ToI420(const uint8* src_y, int src_stride_y, +int I422ToI420(const uint8* src_y, int src_stride_y, const uint8* src_u, int src_stride_u, const uint8* src_v, int src_stride_v, uint8* dst_y, int dst_stride_y, @@ -65,6 +52,17 @@ int I411ToI420(const uint8* src_y, int src_stride_y, uint8* dst_v, int dst_stride_v, int width, int height); +// Copy I420 to I420. +#define I420ToI420 I420Copy +LIBYUV_API +int I420Copy(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height); + // Convert I400 (grey) to I420. LIBYUV_API int I400ToI420(const uint8* src_y, int src_stride_y, @@ -91,6 +89,22 @@ int NV21ToI420(const uint8* src_y, int src_stride_y, uint8* dst_v, int dst_stride_v, int width, int height); +// Convert YUY2 to I420. +LIBYUV_API +int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2, + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height); + +// Convert UYVY to I420. +LIBYUV_API +int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy, + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height); + // Convert M420 to I420. LIBYUV_API int M420ToI420(const uint8* src_m420, int src_stride_m420, @@ -108,22 +122,6 @@ int Q420ToI420(const uint8* src_y, int src_stride_y, uint8* dst_v, int dst_stride_v, int width, int height); -// Convert YUY2 to I420. -LIBYUV_API -int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// Convert UYVY to I420. -LIBYUV_API -int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - // Convert V210 to I420. LIBYUV_API int V210ToI420(const uint8* src_uyvy, int src_stride_uyvy, diff --git a/include/libyuv/convert_from_argb.h b/include/libyuv/convert_from_argb.h index 41cfcb7e6..0aa23f438 100644 --- a/include/libyuv/convert_from_argb.h +++ b/include/libyuv/convert_from_argb.h @@ -18,15 +18,27 @@ namespace libyuv { extern "C" { #endif -// Alias. -#define ARGBToARGB ARGBCopy - // Copy ARGB to ARGB. +#define ARGBToARGB ARGBCopy LIBYUV_API int ARGBCopy(const uint8* src_argb, int src_stride_argb, uint8* dst_argb, int dst_stride_argb, int width, int height); +// Convert ARGB To BGRA. (alias) +#define ARGBToBGRA BGRAToARGB +LIBYUV_API +int BGRAToARGB(const uint8* src_frame, int src_stride_frame, + uint8* dst_argb, int dst_stride_argb, + int width, int height); + +// Convert ARGB To ABGR. (alias) +#define ARGBToABGR ABGRToARGB +LIBYUV_API +int ABGRToARGB(const uint8* src_frame, int src_stride_frame, + uint8* dst_argb, int dst_stride_argb, + int width, int height); + // Convert ARGB To RGBA. LIBYUV_API int ARGBToRGBA(const uint8* src_frame, int src_stride_frame, @@ -63,34 +75,75 @@ int ARGBToARGB4444(const uint8* src_argb, int src_stride_argb, uint8* dst_argb4444, int dst_stride_argb4444, int width, int height); +// Convert ARGB To I444. +LIBYUV_API +int ARGBToI444(const uint8* src_argb, int src_stride_argb, + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height); + +// Convert ARGB To I422. +LIBYUV_API +int ARGBToI422(const uint8* src_argb, int src_stride_argb, + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height); + +// Convert ARGB To I420. (also in convert.h) +LIBYUV_API +int ARGBToI420(const uint8* src_argb, int src_stride_argb, + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height); + +// Convert ARGB To I411. +LIBYUV_API +int ARGBToI411(const uint8* src_argb, int src_stride_argb, + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height); + // Convert ARGB to I400. LIBYUV_API int ARGBToI400(const uint8* src_argb, int src_stride_argb, uint8* dst_y, int dst_stride_y, int width, int height); -// ARGB little endian (bgra in memory) to I422. +// Convert ARGB To NV12. LIBYUV_API -int ARGBToI422(const uint8* src_frame, int src_stride_frame, +int ARGBToNV12(const uint8* src_argb, int src_stride_argb, uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, + uint8* dst_uv, int dst_stride_uv, int width, int height); -// Aliases. -#define ARGBToBGRA BGRAToARGB -#define ARGBToABGR ABGRToARGB - -// BGRA little endian (argb in memory) to ARGB. +// Convert ARGB To NV21. LIBYUV_API -int BGRAToARGB(const uint8* src_frame, int src_stride_frame, - uint8* dst_argb, int dst_stride_argb, +int ARGBToNV21(const uint8* src_argb, int src_stride_argb, + uint8* dst_y, int dst_stride_y, + uint8* dst_vu, int dst_stride_vu, int width, int height); -// ABGR little endian (rgba in memory) to ARGB. +// Convert ARGB To NV21. LIBYUV_API -int ABGRToARGB(const uint8* src_frame, int src_stride_frame, - uint8* dst_argb, int dst_stride_argb, +int ARGBToNV21(const uint8* src_argb, int src_stride_argb, + uint8* dst_y, int dst_stride_y, + uint8* dst_vu, int dst_stride_vu, + int width, int height); + +// Convert ARGB To YUY2. +LIBYUV_API +int ARGBToYUY2(const uint8* src_argb, int src_stride_argb, + uint8* dst_yuy2, int dst_stride_yuy2, + int width, int height); + +// Convert ARGB To UYVY. +LIBYUV_API +int ARGBToUYVY(const uint8* src_argb, int src_stride_argb, + uint8* dst_uyvy, int dst_stride_uyvy, int width, int height); #ifdef __cplusplus diff --git a/include/libyuv/row.h b/include/libyuv/row.h index cb574ece0..b7bb7dafe 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -49,6 +49,7 @@ extern "C" { #define HAS_ARGBTORGB565ROW_SSE2 #define HAS_ARGBTORGBAROW_SSSE3 #define HAS_ARGBTOUVROW_SSSE3 +#define HAS_ARGBTOUV422ROW_SSSE3 #define HAS_ARGBTOYROW_SSSE3 #define HAS_BGRATOARGBROW_SSSE3 #define HAS_BGRATOUVROW_SSSE3 @@ -66,7 +67,7 @@ extern "C" { #define HAS_I422TORGBAROW_SSSE3 #define HAS_I444TOARGBROW_SSSE3 #define HAS_MIRRORROW_SSSE3 -#define HAS_MIRRORROWUV_SSSE3 +#define HAS_MirrorUVRow_SSSE3 #define HAS_NV12TOARGBROW_SSSE3 #define HAS_NV21TOARGBROW_SSSE3 #define HAS_NV12TORGB565ROW_SSSE3 @@ -94,6 +95,8 @@ extern "C" { #define HAS_I422TORGB565ROW_SSSE3 #define HAS_YUY2TOARGBROW_SSSE3 #define HAS_UYVYTOARGBROW_SSSE3 +#define HAS_RGB24TOYROW_SSSE3 +#define HAS_RAWTOYROW_SSSE3 // Effects #define HAS_ARGBAFFINEROW_SSE2 @@ -162,7 +165,7 @@ extern "C" { #define HAS_I422TORGB565ROW_NEON #define HAS_I422TORGBAROW_NEON #define HAS_MIRRORROW_NEON -#define HAS_MIRRORROWUV_NEON +#define HAS_MirrorUVRow_NEON #define HAS_NV12TOARGBROW_NEON #define HAS_NV21TOARGBROW_NEON #define HAS_YUY2TOARGBROW_NEON @@ -185,11 +188,20 @@ extern "C" { #define HAS_ARGBTORGB565ROW_NEON #define HAS_ARGBTOARGB1555ROW_NEON #define HAS_ARGBTOARGB4444ROW_NEON -#define HAS_ARGBTOYROW_NEON #define HAS_MERGEUV_NEON #define HAS_YTOARGBROW_NEON #define HAS_I444TOARGBROW_NEON #define HAS_I411TOARGBROW_NEON +#define HAS_ARGBTOYROW_NEON +#define HAS_BGRATOYROW_NEON +#define HAS_ABGRTOYROW_NEON +#define HAS_RGBATOYROW_NEON +#define HAS_RGB24TOYROW_NEON +#define HAS_RAWTOYROW_NEON +#define HAS_RGB565TOARGBROW_NEON +#define HAS_ARGB1555TOARGBROW_NEON +#define HAS_ARGB4444TOARGBROW_NEON +#define HAS_RGB565TOYROW_NEON #endif // The following are available on Mips platforms @@ -198,7 +210,7 @@ extern "C" { #if defined(__mips_dsp) && (__mips_dsp_rev >= 2) #define HAS_SPLITUV_MIPS_DSPR2 #define HAS_MIRRORROW_MIPS_DSPR2 -#define HAS_MIRRORROWUV_MIPS_DSPR2 +#define HAS_MirrorUVRow_MIPS_DSPR2 #define HAS_I422TOARGBROW_MIPS_DSPR2 #define HAS_I422TOBGRAROW_MIPS_DSPR2 #define HAS_I422TOABGRROW_MIPS_DSPR2 @@ -237,123 +249,185 @@ typedef uint32 uvec32[4]; #define OMITFP __attribute__((optimize("omit-frame-pointer"))) #endif -void I444ToARGBRow_NEON(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, +void I444ToARGBRow_NEON(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, int width); -void I422ToARGBRow_NEON(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, +void I422ToARGBRow_NEON(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, int width); -void I411ToARGBRow_NEON(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, +void I411ToARGBRow_NEON(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, int width); -void I422ToBGRARow_NEON(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, +void I422ToBGRARow_NEON(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_bgra, int width); -void I422ToABGRRow_NEON(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, +void I422ToABGRRow_NEON(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_abgr, int width); -void I422ToRGBARow_NEON(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, +void I422ToRGBARow_NEON(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_rgba, int width); -void I422ToRGB24Row_NEON(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, +void I422ToRGB24Row_NEON(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_rgb24, int width); -void I422ToRAWRow_NEON(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, +void I422ToRAWRow_NEON(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_raw, int width); -void I422ToARGB4444Row_NEON(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, +void I422ToRGB565Row_NEON(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_rgb565, + int width); +void I422ToARGB1555Row_NEON(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb1555, int width); -void I422ToARGB1555Row_NEON(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, +void I422ToARGB4444Row_NEON(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb4444, int width); -void I422ToRGB565Row_NEON(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width); -void NV12ToARGBRow_NEON(const uint8* y_buf, - const uint8* uv_buf, - uint8* rgb_buf, +void NV12ToARGBRow_NEON(const uint8* src_y, + const uint8* src_uv, + uint8* dst_argb, int width); -void NV21ToARGBRow_NEON(const uint8* y_buf, - const uint8* uv_buf, - uint8* rgb_buf, +void NV21ToARGBRow_NEON(const uint8* src_y, + const uint8* src_vu, + uint8* dst_argb, int width); -void NV12ToRGB565Row_NEON(const uint8* y_buf, - const uint8* uv_buf, - uint8* rgb_buf, +void NV12ToRGB565Row_NEON(const uint8* src_y, + const uint8* src_uv, + uint8* dst_rgb565, int width); -void NV21ToRGB565Row_NEON(const uint8* y_buf, - const uint8* uv_buf, - uint8* rgb_buf, +void NV21ToRGB565Row_NEON(const uint8* src_y, + const uint8* src_vu, + uint8* dst_rgb565, int width); -void YUY2ToARGBRow_NEON(const uint8* yuy2_buf, - uint8* rgb_buf, +void YUY2ToARGBRow_NEON(const uint8* src_yuy2, + uint8* dst_argb, int width); -void UYVYToARGBRow_NEON(const uint8* uyvy_buf, - uint8* rgb_buf, +void UYVYToARGBRow_NEON(const uint8* src_uyvy, + uint8* dst_argb, int width); void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix); -void BGRAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix); -void ABGRToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix); -void RGBAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix); +void BGRAToYRow_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix); +void ABGRToYRow_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix); +void RGBAToYRow_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix); +void RGB24ToYRow_SSSE3(const uint8* src_rgb24, uint8* dst_y, int pix); +void RAWToYRow_SSSE3(const uint8* src_raw, uint8* dst_y, int pix); void ARGBToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix); -void BGRAToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix); -void ABGRToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix); -void RGBAToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix); - -void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void BGRAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void ABGRToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void RGBAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void BGRAToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void ABGRToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void RGBAToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); - +void BGRAToYRow_Unaligned_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix); +void ABGRToYRow_Unaligned_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix); +void RGBAToYRow_Unaligned_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix); +void RGB24ToYRow_Unaligned_SSSE3(const uint8* src_rgb24, uint8* dst_y, int pix); +void RAWToYRow_Unaligned_SSSE3(const uint8* src_raw, uint8* dst_y, int pix); void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix); +void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int pix); +void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int pix); +void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int pix); +void RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int pix); +void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int pix); +void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int pix); +void ARGBToYRow_C(const uint8* src_argb, uint8* dst_y, int pix); +void BGRAToYRow_C(const uint8* src_bgra, uint8* dst_y, int pix); +void ABGRToYRow_C(const uint8* src_abgr, uint8* dst_y, int pix); +void RGBAToYRow_C(const uint8* src_rgba, uint8* dst_y, int pix); +void RGB24ToYRow_C(const uint8* src_rgb24, uint8* dst_y, int pix); +void RAWToYRow_C(const uint8* src_raw, uint8* dst_y, int pix); +void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int pix); +void ARGBToYRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int pix); +void BGRAToYRow_Any_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix); +void ABGRToYRow_Any_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix); +void RGBAToYRow_Any_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix); +void RGB24ToYRow_Any_SSSE3(const uint8* src_rgb24, uint8* dst_y, int pix); +void RAWToYRow_Any_SSSE3(const uint8* src_raw, uint8* dst_y, int pix); +void ARGBToYRow_Any_NEON(const uint8* src_argb, uint8* dst_y, int pix); +void BGRAToYRow_Any_NEON(const uint8* src_bgra, uint8* dst_y, int pix); +void ABGRToYRow_Any_NEON(const uint8* src_abgr, uint8* dst_y, int pix); +void RGBAToYRow_Any_NEON(const uint8* src_rgba, uint8* dst_y, int pix); +void RGB24ToYRow_Any_NEON(const uint8* src_rgb24, uint8* dst_y, int pix); +void RAWToYRow_Any_NEON(const uint8* src_raw, uint8* dst_y, int pix); +void RGB565ToYRow_Any_NEON(const uint8* src_rgb565, uint8* dst_y, int pix); + +void ARGBToUVRow_SSSE3(const uint8* src_argb, int src_stride_argb, + uint8* dst_u, uint8* dst_v, int width); +void BGRAToUVRow_SSSE3(const uint8* src_bgra, int src_stride_bgra, + uint8* dst_u, uint8* dst_v, int width); +void ABGRToUVRow_SSSE3(const uint8* src_abgr, int src_stride_abgr, + uint8* dst_u, uint8* dst_v, int width); +void RGBAToUVRow_SSSE3(const uint8* src_rgba, int src_stride_rgba, + uint8* dst_u, uint8* dst_v, int width); +void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb, int src_stride_argb, + uint8* dst_u, uint8* dst_v, int width); +void BGRAToUVRow_Unaligned_SSSE3(const uint8* src_bgra, int src_stride_bgra, + uint8* dst_u, uint8* dst_v, int width); +void ABGRToUVRow_Unaligned_SSSE3(const uint8* src_abgr, int src_stride_abgr, + uint8* dst_u, uint8* dst_v, int width); +void RGBAToUVRow_Unaligned_SSSE3(const uint8* src_rgba, int src_stride_rgba, + uint8* dst_u, uint8* dst_v, int width); +void ARGBToUVRow_Any_SSSE3(const uint8* src_argb, int src_stride_argb, + uint8* dst_u, uint8* dst_v, int width); +void BGRAToUVRow_Any_SSSE3(const uint8* src_bgra, int src_stride_bgra, + uint8* dst_u, uint8* dst_v, int width); +void ABGRToUVRow_Any_SSSE3(const uint8* src_abgr, int src_stride_abgr, + uint8* dst_u, uint8* dst_v, int width); +void RGBAToUVRow_Any_SSSE3(const uint8* src_rgba, int src_stride_rgba, + uint8* dst_u, uint8* dst_v, int width); +void ARGBToUVRow_C(const uint8* src_argb, int src_stride_argb, + uint8* dst_u, uint8* dst_v, int width); +void BGRAToUVRow_C(const uint8* src_bgra, int src_stride_bgra, + uint8* dst_u, uint8* dst_v, int width); +void ABGRToUVRow_C(const uint8* src_abgr, int src_stride_abgr, + uint8* dst_u, uint8* dst_v, int width); +void RGBAToUVRow_C(const uint8* src_rgba, int src_stride_rgba, + uint8* dst_u, uint8* dst_v, int width); + +void ARGBToUV422Row_SSSE3(const uint8* src_argb, + uint8* dst_u, uint8* dst_v, int width); +void ARGBToUV422Row_Unaligned_SSSE3(const uint8* src_argb, + uint8* dst_u, uint8* dst_v, int width); +void ARGBToUV422Row_Any_SSSE3(const uint8* src_argb, + uint8* dst_u, uint8* dst_v, int width); + +void ARGBToUV444Row_C(const uint8* src_argb, + uint8* dst_u, uint8* dst_v, int width); +void ARGBToUV422Row_C(const uint8* src_argb, + uint8* dst_u, uint8* dst_v, int width); +void ARGBToUV411Row_C(const uint8* src_argb, + uint8* dst_u, uint8* dst_v, int width); void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width); void MirrorRow_SSE2(const uint8* src, uint8* dst, int width); void MirrorRow_NEON(const uint8* src, uint8* dst, int width); void MirrorRow_MIPS_DSPR2(const uint8* src, uint8* dst, int width); -void MirrorRowUV_MIPS_DSPR2(const uint8* src, uint8* dst_u, uint8* dst_v, +void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width); void MirrorRow_C(const uint8* src, uint8* dst, int width); -void MirrorRowUV_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v, int width); -void MirrorRowUV_NEON(const uint8* src, uint8* dst_u, uint8* dst_v, int width); -void MirrorRowUV_C(const uint8* src, uint8* dst_u, uint8* dst_v, int width); +void MirrorUVRow_SSSE3(const uint8* src_uv, uint8* dst_u, uint8* dst_v, + int width); +void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, + int width); +void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, + int width); void ARGBMirrorRow_SSSE3(const uint8* src, uint8* dst, int width); void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width); @@ -414,43 +488,52 @@ void SetRows32_NEON(uint8* dst, uint32 v32, int width, void SetRow8_C(uint8* dst, uint32 v32, int count); void SetRows32_C(uint8* dst, uint32 v32, int width, int dst_stride, int height); -void ARGBToYRow_C(const uint8* src_argb, uint8* dst_y, int pix); -void BGRAToYRow_C(const uint8* src_argb, uint8* dst_y, int pix); -void ABGRToYRow_C(const uint8* src_argb, uint8* dst_y, int pix); -void RGBAToYRow_C(const uint8* src_argb, uint8* dst_y, int pix); - -void ARGBToUVRow_C(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void BGRAToUVRow_C(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void ABGRToUVRow_C(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void RGBAToUVRow_C(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); - void BGRAToARGBRow_SSSE3(const uint8* src_bgra, uint8* dst_argb, int pix); void ABGRToARGBRow_SSSE3(const uint8* src_abgr, uint8* dst_argb, int pix); void RGBAToARGBRow_SSSE3(const uint8* src_rgba, uint8* dst_argb, int pix); void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix); -void RAWToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix); -void RGB565ToARGBRow_SSE2(const uint8* src_argb, uint8* dst_argb, int pix); -void ARGB1555ToARGBRow_SSE2(const uint8* src_argb, uint8* dst_argb, int pix); -void ARGB4444ToARGBRow_SSE2(const uint8* src_argb, uint8* dst_argb, int pix); - +void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, int pix); +void RGB565ToARGBRow_SSE2(const uint8* src_rgb565, uint8* dst_argb, int pix); +void ARGB1555ToARGBRow_SSE2(const uint8* src_argb1555, uint8* dst_argb, + int pix); +void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444, uint8* dst_argb, + int pix); void BGRAToARGBRow_NEON(const uint8* src_bgra, uint8* dst_argb, int pix); void ABGRToARGBRow_NEON(const uint8* src_abgr, uint8* dst_argb, int pix); void RGBAToARGBRow_NEON(const uint8* src_rgba, uint8* dst_argb, int pix); void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix); -void RAWToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix); - +void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int pix); +void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int pix); +void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555, uint8* dst_argb, + int pix); +void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444, uint8* dst_argb, + int pix); void BGRAToARGBRow_C(const uint8* src_bgra, uint8* dst_argb, int pix); void ABGRToARGBRow_C(const uint8* src_abgr, uint8* dst_argb, int pix); void RGBAToARGBRow_C(const uint8* src_rgba, uint8* dst_argb, int pix); void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int pix); -void RAWToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int pix); +void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int pix); void RGB565ToARGBRow_C(const uint8* src_rgb, uint8* dst_argb, int pix); void ARGB1555ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int pix); void ARGB4444ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int pix); +void RGB24ToARGBRow_Any_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix); +void RAWToARGBRow_Any_SSSE3(const uint8* src_raw, uint8* dst_argb, int pix); +void RGB565ToARGBRow_Any_SSE2(const uint8* src_rgb565, uint8* dst_argb, + int pix); +void ARGB1555ToARGBRow_Any_SSE2(const uint8* src_argb1555, uint8* dst_argb, + int pix); +void ARGB4444ToARGBRow_Any_SSE2(const uint8* src_argb4444, uint8* dst_argb, + int pix); +void RGB24ToARGBRow_Any_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix); +void RAWToARGBRow_Any_NEON(const uint8* src_raw, uint8* dst_argb, int pix); +void RGB565ToARGBRow_Any_NEON(const uint8* src_rgb565, uint8* dst_argb, + int pix); +void ARGB1555ToARGBRow_Any_NEON(const uint8* src_argb1555, uint8* dst_argb, + int pix); +void ARGB4444ToARGBRow_Any_NEON(const uint8* src_argb4444, uint8* dst_argb, + int pix); + + void ARGBToRGBARow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToRGB24Row_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix); @@ -480,301 +563,301 @@ void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int pix); void I400ToARGBRow_Any_SSE2(const uint8* src_y, uint8* dst_argb, int pix); void I400ToARGBRow_Any_NEON(const uint8* src_y, uint8* dst_argb, int pix); -void I444ToARGBRow_C(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* argb_buf, +void I444ToARGBRow_C(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, int width); -void I422ToARGBRow_C(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* argb_buf, +void I422ToARGBRow_C(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, int width); -void I411ToARGBRow_C(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, +void I411ToARGBRow_C(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, int width); -void NV12ToARGBRow_C(const uint8* y_buf, - const uint8* uv_buf, - uint8* argb_buf, +void NV12ToARGBRow_C(const uint8* src_y, + const uint8* src_uv, + uint8* dst_argb, int width); -void NV21ToRGB565Row_C(const uint8* y_buf, - const uint8* vu_buf, - uint8* argb_buf, +void NV21ToRGB565Row_C(const uint8* src_y, + const uint8* src_vu, + uint8* dst_argb, int width); -void NV12ToRGB565Row_C(const uint8* y_buf, - const uint8* uv_buf, - uint8* argb_buf, +void NV12ToRGB565Row_C(const uint8* src_y, + const uint8* src_uv, + uint8* dst_argb, int width); -void NV21ToARGBRow_C(const uint8* y_buf, - const uint8* vu_buf, - uint8* argb_buf, +void NV21ToARGBRow_C(const uint8* src_y, + const uint8* src_vu, + uint8* dst_argb, int width); -void YUY2ToARGBRow_C(const uint8* yuy2_buf, - uint8* argb_buf, +void YUY2ToARGBRow_C(const uint8* src_yuy2, + uint8* dst_argb, int width); -void UYVYToARGBRow_C(const uint8* uyvy_buf, - uint8* argb_buf, +void UYVYToARGBRow_C(const uint8* src_uyvy, + uint8* dst_argb, int width); -void I422ToBGRARow_C(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* bgra_buf, +void I422ToBGRARow_C(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_bgra, int width); -void I422ToABGRRow_C(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* abgr_buf, +void I422ToABGRRow_C(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_abgr, int width); -void I422ToRGBARow_C(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgba_buf, +void I422ToRGBARow_C(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_rgba, int width); -void I422ToRGB24Row_C(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb24_buf, +void I422ToRGB24Row_C(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_rgb24, int width); -void I422ToRAWRow_C(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* raw_buf, +void I422ToRAWRow_C(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_raw, int width); -void I422ToARGB4444Row_C(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_rgb565, +void I422ToARGB4444Row_C(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb4444, int width); -void I422ToARGB1555Row_C(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_rgb565, +void I422ToARGB1555Row_C(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb4444, int width); -void I422ToRGB565Row_C(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, +void I422ToRGB565Row_C(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, uint8* dst_rgb565, int width); -void YToARGBRow_C(const uint8* y_buf, - uint8* rgb_buf, +void YToARGBRow_C(const uint8* src_y, + uint8* dst_argb, int width); -void I444ToARGBRow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* argb_buf, +void I444ToARGBRow_SSSE3(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, int width); -void I422ToARGBRow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* argb_buf, +void I422ToARGBRow_SSSE3(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, int width); -void I411ToARGBRow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, +void I411ToARGBRow_SSSE3(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, int width); -void NV12ToARGBRow_SSSE3(const uint8* y_buf, - const uint8* uv_buf, - uint8* argb_buf, +void NV12ToARGBRow_SSSE3(const uint8* src_y, + const uint8* src_uv, + uint8* dst_argb, int width); -void NV21ToARGBRow_SSSE3(const uint8* y_buf, - const uint8* vu_buf, - uint8* argb_buf, +void NV21ToARGBRow_SSSE3(const uint8* src_y, + const uint8* src_vu, + uint8* dst_argb, int width); -void NV12ToRGB565Row_SSSE3(const uint8* y_buf, - const uint8* uv_buf, - uint8* argb_buf, +void NV12ToRGB565Row_SSSE3(const uint8* src_y, + const uint8* src_uv, + uint8* dst_argb, int width); -void NV21ToRGB565Row_SSSE3(const uint8* y_buf, - const uint8* vu_buf, - uint8* argb_buf, +void NV21ToRGB565Row_SSSE3(const uint8* src_y, + const uint8* src_vu, + uint8* dst_argb, int width); -void YUY2ToARGBRow_SSSE3(const uint8* yuy2_buf, - uint8* argb_buf, +void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2, + uint8* dst_argb, int width); -void UYVYToARGBRow_SSSE3(const uint8* uyvy_buf, - uint8* argb_buf, +void UYVYToARGBRow_SSSE3(const uint8* src_uyvy, + uint8* dst_argb, int width); -void I422ToBGRARow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* bgra_buf, +void I422ToBGRARow_SSSE3(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_bgra, int width); -void I422ToABGRRow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* abgr_buf, +void I422ToABGRRow_SSSE3(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_abgr, int width); -void I422ToRGBARow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgba_buf, +void I422ToRGBARow_SSSE3(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_rgba, int width); -void I422ToARGB4444Row_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, +void I422ToARGB4444Row_SSSE3(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, int width); -void I422ToARGB1555Row_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, +void I422ToARGB1555Row_SSSE3(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, int width); -void I422ToRGB565Row_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, +void I422ToRGB565Row_SSSE3(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, int width); // RGB24/RAW are unaligned. -void I422ToRGB24Row_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, +void I422ToRGB24Row_SSSE3(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_rgb24, int width); -void I422ToRAWRow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, +void I422ToRAWRow_SSSE3(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_raw, int width); -void I444ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* argb_buf, +void I444ToARGBRow_Unaligned_SSSE3(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, int width); -void I422ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* argb_buf, +void I422ToARGBRow_Unaligned_SSSE3(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, int width); -void I411ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, +void I411ToARGBRow_Unaligned_SSSE3(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, int width); -void NV12ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, - const uint8* uv_buf, - uint8* argb_buf, +void NV12ToARGBRow_Unaligned_SSSE3(const uint8* src_y, + const uint8* src_uv, + uint8* dst_argb, int width); -void NV21ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, - const uint8* vu_buf, - uint8* argb_buf, +void NV21ToARGBRow_Unaligned_SSSE3(const uint8* src_y, + const uint8* src_vu, + uint8* dst_argb, int width); -void YUY2ToARGBRow_Unaligned_SSSE3(const uint8* yuy2_buf, - uint8* argb_buf, +void YUY2ToARGBRow_Unaligned_SSSE3(const uint8* src_yuy2, + uint8* dst_argb, int width); -void UYVYToARGBRow_Unaligned_SSSE3(const uint8* uyvy_buf, - uint8* argb_buf, +void UYVYToARGBRow_Unaligned_SSSE3(const uint8* src_uyvy, + uint8* dst_argb, int width); -void I422ToBGRARow_Unaligned_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* bgra_buf, +void I422ToBGRARow_Unaligned_SSSE3(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_bgra, int width); -void I422ToABGRRow_Unaligned_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* abgr_buf, +void I422ToABGRRow_Unaligned_SSSE3(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_abgr, int width); -void I422ToRGBARow_Unaligned_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgba_buf, +void I422ToRGBARow_Unaligned_SSSE3(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_rgba, int width); -void I444ToARGBRow_Any_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* argb_buf, +void I444ToARGBRow_Any_SSSE3(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, int width); -void I422ToARGBRow_Any_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* argb_buf, +void I422ToARGBRow_Any_SSSE3(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, int width); -void I411ToARGBRow_Any_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, +void I411ToARGBRow_Any_SSSE3(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, int width); -void NV12ToARGBRow_Any_SSSE3(const uint8* y_buf, - const uint8* uv_buf, - uint8* argb_buf, +void NV12ToARGBRow_Any_SSSE3(const uint8* src_y, + const uint8* src_uv, + uint8* dst_argb, int width); -void NV21ToARGBRow_Any_SSSE3(const uint8* y_buf, - const uint8* vu_buf, - uint8* argb_buf, +void NV21ToARGBRow_Any_SSSE3(const uint8* src_y, + const uint8* src_vu, + uint8* dst_argb, int width); -void NV12ToRGB565Row_Any_SSSE3(const uint8* y_buf, - const uint8* uv_buf, - uint8* argb_buf, +void NV12ToRGB565Row_Any_SSSE3(const uint8* src_y, + const uint8* src_uv, + uint8* dst_argb, int width); -void NV21ToRGB565Row_Any_SSSE3(const uint8* y_buf, - const uint8* vu_buf, - uint8* argb_buf, +void NV21ToRGB565Row_Any_SSSE3(const uint8* src_y, + const uint8* src_vu, + uint8* dst_argb, int width); -void YUY2ToARGBRow_Any_SSSE3(const uint8* yuy2_buf, - uint8* argb_buf, +void YUY2ToARGBRow_Any_SSSE3(const uint8* src_yuy2, + uint8* dst_argb, int width); -void UYVYToARGBRow_Any_SSSE3(const uint8* uyvy_buf, - uint8* argb_buf, +void UYVYToARGBRow_Any_SSSE3(const uint8* src_uyvy, + uint8* dst_argb, int width); -void I422ToBGRARow_Any_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* bgra_buf, +void I422ToBGRARow_Any_SSSE3(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_bgra, int width); -void I422ToABGRRow_Any_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* abgr_buf, +void I422ToABGRRow_Any_SSSE3(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_abgr, int width); -void I422ToRGBARow_Any_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgba_buf, +void I422ToRGBARow_Any_SSSE3(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_rgba, int width); -void I422ToARGB4444Row_Any_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgba_buf, +void I422ToARGB4444Row_Any_SSSE3(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_rgba, int width); -void I422ToARGB1555Row_Any_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgba_buf, +void I422ToARGB1555Row_Any_SSSE3(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_rgba, int width); -void I422ToRGB565Row_Any_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgba_buf, +void I422ToRGB565Row_Any_SSSE3(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_rgba, int width); // RGB24/RAW are unaligned. -void I422ToRGB24Row_Any_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, +void I422ToRGB24Row_Any_SSSE3(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, int width); -void I422ToRAWRow_Any_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, +void I422ToRAWRow_Any_SSSE3(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, int width); -void YToARGBRow_SSE2(const uint8* y_buf, - uint8* argb_buf, +void YToARGBRow_SSE2(const uint8* src_y, + uint8* dst_argb, int width); -void YToARGBRow_NEON(const uint8* y_buf, - uint8* argb_buf, +void YToARGBRow_NEON(const uint8* src_y, + uint8* dst_argb, int width); // ARGB preattenuated alpha blend. -void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1, +void ARGBBlendRow_SSSE3(const uint8* src_argb, const uint8* src_argb1, uint8* dst_argb, int width); -void ARGBBlendRow_SSE2(const uint8* src_argb0, const uint8* src_argb1, +void ARGBBlendRow_SSE2(const uint8* src_argb, const uint8* src_argb1, uint8* dst_argb, int width); -void ARGBBlendRow_C(const uint8* src_argb0, const uint8* src_argb1, +void ARGBBlendRow_C(const uint8* src_argb, const uint8* src_argb1, uint8* dst_argb, int width); void ARGBToRGB24Row_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix); @@ -789,126 +872,112 @@ void ARGBToRGB565Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToARGB1555Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToARGB4444Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); -void ARGBToYRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int pix); -void BGRAToYRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int pix); -void ABGRToYRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int pix); -void RGBAToYRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int pix); -void ARGBToUVRow_Any_SSSE3(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void BGRAToUVRow_Any_SSSE3(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void ABGRToUVRow_Any_SSSE3(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void RGBAToUVRow_Any_SSSE3(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToYRow_Any_NEON(const uint8* src_argb, uint8* dst_y, int pix); - -void I444ToARGBRow_Any_NEON(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, +void I444ToARGBRow_Any_NEON(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, int width); -void I422ToARGBRow_Any_NEON(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, +void I422ToARGBRow_Any_NEON(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, int width); -void I411ToARGBRow_Any_NEON(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, +void I411ToARGBRow_Any_NEON(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, int width); -void I422ToBGRARow_Any_NEON(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, +void I422ToBGRARow_Any_NEON(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, int width); -void I422ToABGRRow_Any_NEON(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, +void I422ToABGRRow_Any_NEON(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, int width); -void I422ToRGBARow_Any_NEON(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, +void I422ToRGBARow_Any_NEON(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, int width); -void I422ToRGB24Row_Any_NEON(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, +void I422ToRGB24Row_Any_NEON(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, int width); -void I422ToRAWRow_Any_NEON(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, +void I422ToRAWRow_Any_NEON(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, int width); -void I422ToARGB4444Row_Any_NEON(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, +void I422ToARGB4444Row_Any_NEON(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, int width); -void I422ToARGB1555Row_Any_NEON(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, +void I422ToARGB1555Row_Any_NEON(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, int width); -void I422ToRGB565Row_Any_NEON(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, +void I422ToRGB565Row_Any_NEON(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, int width); -void NV12ToARGBRow_Any_NEON(const uint8* y_buf, - const uint8* uv_buf, - uint8* argb_buf, +void NV12ToARGBRow_Any_NEON(const uint8* src_y, + const uint8* src_uv, + uint8* dst_argb, int width); -void NV21ToARGBRow_Any_NEON(const uint8* y_buf, - const uint8* uv_buf, - uint8* argb_buf, +void NV21ToARGBRow_Any_NEON(const uint8* src_y, + const uint8* src_uv, + uint8* dst_argb, int width); -void NV12ToRGB565Row_Any_NEON(const uint8* y_buf, - const uint8* uv_buf, - uint8* argb_buf, +void NV12ToRGB565Row_Any_NEON(const uint8* src_y, + const uint8* src_uv, + uint8* dst_argb, int width); -void NV21ToRGB565Row_Any_NEON(const uint8* y_buf, - const uint8* uv_buf, - uint8* argb_buf, +void NV21ToRGB565Row_Any_NEON(const uint8* src_y, + const uint8* src_uv, + uint8* dst_argb, int width); -void YUY2ToARGBRow_Any_NEON(const uint8* yuy2_buf, - uint8* argb_buf, +void YUY2ToARGBRow_Any_NEON(const uint8* src_yuy2, + uint8* dst_argb, int width); -void UYVYToARGBRow_Any_NEON(const uint8* uyvy_buf, - uint8* argb_buf, +void UYVYToARGBRow_Any_NEON(const uint8* src_uyvy, + uint8* dst_argb, int width); -void I422ToARGBRow_MIPS_DSPR2(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, +void I422ToARGBRow_MIPS_DSPR2(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, int width); -void I422ToBGRARow_MIPS_DSPR2(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, +void I422ToBGRARow_MIPS_DSPR2(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, int width); -void I422ToABGRRow_MIPS_DSPR2(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, +void I422ToABGRRow_MIPS_DSPR2(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, int width); -void I422ToARGBRow_MIPS_DSPR2(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, +void I422ToARGBRow_MIPS_DSPR2(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, int width); -void I422ToBGRARow_MIPS_DSPR2(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, +void I422ToBGRARow_MIPS_DSPR2(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, int width); -void I422ToABGRRow_MIPS_DSPR2(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, +void I422ToABGRRow_MIPS_DSPR2(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, int width); void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix); @@ -976,6 +1045,62 @@ void UYVYToUVRow_Any_NEON(const uint8* src_uyvy, int stride_uyvy, void UYVYToUV422Row_Any_NEON(const uint8* src_uyvy, uint8* dst_u, uint8* dst_v, int pix); +void HalfRow_C(const uint8* src_uv, int src_uv_stride, + uint8* dst_uv, int pix); +void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride, + uint8* dst_uv, int pix); +void HalfRow_NEON(const uint8* src_uv, int src_uv_stride, + uint8* dst_uv, int pix); + +void ARGBToBayerRow_C(const uint8* src_argb, + uint8* dst_bayer, uint32 selector, int pix); +void ARGBToBayerRow_SSSE3(const uint8* src_argb, + uint8* dst_bayer, uint32 selector, int pix); +void ARGBToBayerRow_NEON(const uint8* src_argb, + uint8* dst_bayer, uint32 selector, int pix); + +void I422ToYUY2Row_C(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_yuy2, int width); +void I422ToUYVYRow_C(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_uyvy, int width); +void I422ToYUY2Row_SSE2(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_yuy2, int width); +void I422ToUYVYRow_SSE2(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_uyvy, int width); +void I422ToYUY2Row_Any_SSE2(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_yuy2, int width); +void I422ToUYVYRow_Any_SSE2(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_uyvy, int width); +void I422ToYUY2Row_NEON(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_yuy2, int width); +void I422ToUYVYRow_NEON(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_uyvy, int width); +void I422ToYUY2Row_Any_NEON(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_yuy2, int width); +void I422ToUYVYRow_Any_NEON(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_uyvy, int width); + +// Effects related row functions. void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width); void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width); void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width); @@ -1026,68 +1151,13 @@ LIBYUV_API void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride, uint8* dst_argb, const float* uv_dudv, int width); -void ARGBInterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride, +void ARGBInterpolateRow_C(uint8* dst_argb, const uint8* src_argb, + ptrdiff_t src_stride_argb, int dst_width, int source_y_fraction); -void ARGBInterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride, int dst_width, +void ARGBInterpolateRow_SSSE3(uint8* dst_argb, const uint8* src_argb, + ptrdiff_t src_stride_argb, int dst_width, int source_y_fraction); -void HalfRow_C(const uint8* src_uv, int src_uv_stride, - uint8* dst_uv, int pix); -void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride, - uint8* dst_uv, int pix); -void HalfRow_NEON(const uint8* src_uv, int src_uv_stride, - uint8* dst_uv, int pix); - -void ARGBToBayerRow_C(const uint8* src_argb, - uint8* dst_bayer, uint32 selector, int pix); -void ARGBToBayerRow_SSSE3(const uint8* src_argb, - uint8* dst_bayer, uint32 selector, int pix); -void ARGBToBayerRow_NEON(const uint8* src_argb, - uint8* dst_bayer, uint32 selector, int pix); - -void I422ToYUY2Row_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_frame, int width); -void I422ToUYVYRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_frame, int width); -void I422ToYUY2Row_SSE2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_frame, int width); -void I422ToUYVYRow_SSE2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_frame, int width); -void I422ToYUY2Row_Any_SSE2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_frame, int width); -void I422ToUYVYRow_Any_SSE2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_frame, int width); -void I422ToYUY2Row_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_frame, int width); -void I422ToUYVYRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_frame, int width); -void I422ToYUY2Row_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_frame, int width); -void I422ToUYVYRow_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_frame, int width); - #ifdef __cplusplus } // extern "C" } // namespace libyuv diff --git a/include/libyuv/scale.h b/include/libyuv/scale.h index 18098798b..7ced534b5 100644 --- a/include/libyuv/scale.h +++ b/include/libyuv/scale.h @@ -66,8 +66,8 @@ int Scale(const uint8* src_y, const uint8* src_u, const uint8* src_v, // Legacy API. Deprecated. LIBYUV_API -int ScaleOffset(const uint8* src, int src_width, int src_height, - uint8* dst, int dst_width, int dst_height, int dst_yoffset, +int ScaleOffset(const uint8* src_i420, int src_width, int src_height, + uint8* dst_i420, int dst_width, int dst_height, int dst_yoffset, bool interpolate); // For testing, allow disabling of specialized scalers. diff --git a/include/libyuv/version.h b/include/libyuv/version.h index f4dfeef6e..d485c0f21 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 470 +#define LIBYUV_VERSION 471 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/convert.cc b/source/convert.cc index 02e56ecbc..5efefc3a3 100644 --- a/source/convert.cc +++ b/source/convert.cc @@ -533,11 +533,9 @@ int Q420ToI420(const uint8* src_y, int src_stride_y, void (*YUY2ToYRow)(const uint8* src_yuy2, uint8* dst_y, int pix) = YUY2ToYRow_C; #if defined(HAS_YUY2TOYROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2)) { - if (width > 16) { - YUY2ToUV422Row = YUY2ToUV422Row_Any_SSE2; - YUY2ToYRow = YUY2ToYRow_Any_SSE2; - } + if (TestCpuFlag(kCpuHasSSE2) && width >= 16) { + YUY2ToUV422Row = YUY2ToUV422Row_Any_SSE2; + YUY2ToYRow = YUY2ToYRow_Any_SSE2; if (IS_ALIGNED(width, 16)) { YUY2ToUV422Row = YUY2ToUV422Row_Unaligned_SSE2; YUY2ToYRow = YUY2ToYRow_Unaligned_SSE2; @@ -550,12 +548,10 @@ int Q420ToI420(const uint8* src_y, int src_stride_y, } } #elif defined(HAS_YUY2TOYROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - if (width > 8) { - YUY2ToYRow = YUY2ToYRow_Any_NEON; - if (width > 16) { - YUY2ToUV422Row = YUY2ToUV422Row_Any_NEON; - } + if (TestCpuFlag(kCpuHasNEON) && width >= 8) { + YUY2ToYRow = YUY2ToYRow_Any_NEON; + if (width >= 16) { + YUY2ToUV422Row = YUY2ToUV422Row_Any_NEON; } if (IS_ALIGNED(width, 16)) { YUY2ToYRow = YUY2ToYRow_NEON; @@ -656,11 +652,9 @@ int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2, YUY2ToYRow = YUY2ToYRow_C; YUY2ToUVRow = YUY2ToUVRow_C; #if defined(HAS_YUY2TOYROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2)) { - if (width > 16) { - YUY2ToUVRow = YUY2ToUVRow_Any_SSE2; - YUY2ToYRow = YUY2ToYRow_Any_SSE2; - } + if (TestCpuFlag(kCpuHasSSE2) && width >= 16) { + YUY2ToUVRow = YUY2ToUVRow_Any_SSE2; + YUY2ToYRow = YUY2ToYRow_Any_SSE2; if (IS_ALIGNED(width, 16)) { YUY2ToUVRow = YUY2ToUVRow_Unaligned_SSE2; YUY2ToYRow = YUY2ToYRow_Unaligned_SSE2; @@ -673,12 +667,10 @@ int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2, } } #elif defined(HAS_YUY2TOYROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - if (width > 8) { - YUY2ToYRow = YUY2ToYRow_Any_NEON; - if (width > 16) { - YUY2ToUVRow = YUY2ToUVRow_Any_NEON; - } + if (TestCpuFlag(kCpuHasNEON) && width >= 8) { + YUY2ToYRow = YUY2ToYRow_Any_NEON; + if (width >= 16) { + YUY2ToUVRow = YUY2ToUVRow_Any_NEON; } if (IS_ALIGNED(width, 16)) { YUY2ToYRow = YUY2ToYRow_NEON; @@ -723,11 +715,9 @@ int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy, UYVYToYRow = UYVYToYRow_C; UYVYToUVRow = UYVYToUVRow_C; #if defined(HAS_UYVYTOYROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2)) { - if (width > 16) { - UYVYToUVRow = UYVYToUVRow_Any_SSE2; - UYVYToYRow = UYVYToYRow_Any_SSE2; - } + if (TestCpuFlag(kCpuHasSSE2) && width >= 16) { + UYVYToUVRow = UYVYToUVRow_Any_SSE2; + UYVYToYRow = UYVYToYRow_Any_SSE2; if (IS_ALIGNED(width, 16)) { UYVYToUVRow = UYVYToUVRow_Unaligned_SSE2; UYVYToYRow = UYVYToYRow_Unaligned_SSE2; @@ -740,12 +730,10 @@ int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy, } } #elif defined(HAS_UYVYTOYROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - if (width > 8) { - UYVYToYRow = UYVYToYRow_Any_NEON; - if (width > 16) { - UYVYToUVRow = UYVYToUVRow_Any_NEON; - } + if (TestCpuFlag(kCpuHasNEON) && width >= 8) { + UYVYToYRow = UYVYToYRow_Any_NEON; + if (width >= 16) { + UYVYToUVRow = UYVYToUVRow_Any_NEON; } if (IS_ALIGNED(width, 16)) { UYVYToYRow = UYVYToYRow_NEON; @@ -827,10 +815,9 @@ int V210ToI420(const uint8* src_v210, int src_stride_v210, uint8* dst_u, int dst_stride_u, uint8* dst_v, int dst_stride_v, int width, int height) { - if (width * 2 * 2 > kMaxStride) { // 2 rows of UYVY are required. - return -1; - } else if (!src_v210 || !dst_y || !dst_u || !dst_v || - width <= 0 || height == 0) { + if (!src_v210 || !dst_y || !dst_u || !dst_v || + width <= 0 || height == 0 || + width * 2 * 2 > kMaxStride) { return -1; } // Negative height means invert the image. @@ -858,12 +845,10 @@ int V210ToI420(const uint8* src_v210, int src_stride_v210, } } #elif defined(HAS_UYVYTOYROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - if (width > 8) { - UYVYToYRow = UYVYToYRow_Any_NEON; - if (width > 16) { - UYVYToUVRow = UYVYToUVRow_Any_NEON; - } + if (TestCpuFlag(kCpuHasNEON) && width >= 8) { + UYVYToYRow = UYVYToYRow_Any_NEON; + if (width >= 16) { + UYVYToUVRow = UYVYToUVRow_Any_NEON; } if (IS_ALIGNED(width, 16)) { UYVYToYRow = UYVYToYRow_NEON; @@ -873,11 +858,9 @@ int V210ToI420(const uint8* src_v210, int src_stride_v210, #endif #if defined(HAS_UYVYTOYROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2)) { - if (width > 16) { - UYVYToUVRow = UYVYToUVRow_Any_SSE2; - UYVYToYRow = UYVYToYRow_Any_SSE2; - } + if (TestCpuFlag(kCpuHasSSE2) && width >= 16) { + UYVYToUVRow = UYVYToUVRow_Any_SSE2; + UYVYToYRow = UYVYToYRow_Any_SSE2; if (IS_ALIGNED(width, 16)) { UYVYToYRow = UYVYToYRow_Unaligned_SSE2; UYVYToUVRow = UYVYToUVRow_SSE2; @@ -887,12 +870,10 @@ int V210ToI420(const uint8* src_v210, int src_stride_v210, } } #elif defined(HAS_UYVYTOYROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - if (width > 8) { - UYVYToYRow = UYVYToYRow_Any_NEON; - if (width > 16) { - UYVYToUVRow = UYVYToUVRow_Any_NEON; - } + if (TestCpuFlag(kCpuHasNEON) && width >= 8) { + UYVYToYRow = UYVYToYRow_Any_NEON; + if (width >= 16) { + UYVYToUVRow = UYVYToUVRow_Any_NEON; } if (IS_ALIGNED(width, 16)) { UYVYToYRow = UYVYToYRow_NEON; @@ -920,6 +901,7 @@ int V210ToI420(const uint8* src_v210, int src_stride_v210, return 0; } +// Convert ARGB to I420. LIBYUV_API int ARGBToI420(const uint8* src_argb, int src_stride_argb, uint8* dst_y, int dst_stride_y, @@ -942,11 +924,9 @@ int ARGBToI420(const uint8* src_argb, int src_stride_argb, void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = ARGBToYRow_C; #if defined(HAS_ARGBTOYROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - if (width > 16) { - ARGBToUVRow = ARGBToUVRow_Any_SSSE3; - ARGBToYRow = ARGBToYRow_Any_SSSE3; - } + if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { + ARGBToUVRow = ARGBToUVRow_Any_SSSE3; + ARGBToYRow = ARGBToYRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { ARGBToUVRow = ARGBToUVRow_Unaligned_SSSE3; ARGBToYRow = ARGBToYRow_Unaligned_SSSE3; @@ -959,10 +939,8 @@ int ARGBToI420(const uint8* src_argb, int src_stride_argb, } } #elif defined(HAS_ARGBTOYROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - if (width > 8) { - ARGBToYRow = ARGBToYRow_Any_NEON; - } + if (TestCpuFlag(kCpuHasNEON) && width >= 8) { + ARGBToYRow = ARGBToYRow_Any_NEON; if (IS_ALIGNED(width, 8)) { ARGBToYRow = ARGBToYRow_NEON; } @@ -985,6 +963,7 @@ int ARGBToI420(const uint8* src_argb, int src_stride_argb, return 0; } +// Convert BGRA to I420. LIBYUV_API int BGRAToI420(const uint8* src_bgra, int src_stride_bgra, uint8* dst_y, int dst_stride_y, @@ -1002,18 +981,14 @@ int BGRAToI420(const uint8* src_bgra, int src_stride_bgra, src_bgra = src_bgra + (height - 1) * src_stride_bgra; src_stride_bgra = -src_stride_bgra; } - void (*BGRAToYRow)(const uint8* src_bgra, uint8* dst_y, int pix); void (*BGRAToUVRow)(const uint8* src_bgra0, int src_stride_bgra, - uint8* dst_u, uint8* dst_v, int width); - - BGRAToYRow = BGRAToYRow_C; - BGRAToUVRow = BGRAToUVRow_C; + uint8* dst_u, uint8* dst_v, int width) = BGRAToUVRow_C; + void (*BGRAToYRow)(const uint8* src_bgra, uint8* dst_y, int pix) = + BGRAToYRow_C; #if defined(HAS_BGRATOYROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - if (width > 16) { - BGRAToUVRow = BGRAToUVRow_Any_SSSE3; - BGRAToYRow = BGRAToYRow_Any_SSSE3; - } + if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { + BGRAToUVRow = BGRAToUVRow_Any_SSSE3; + BGRAToYRow = BGRAToYRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { BGRAToUVRow = BGRAToUVRow_Unaligned_SSSE3; BGRAToYRow = BGRAToYRow_Unaligned_SSSE3; @@ -1025,6 +1000,13 @@ int BGRAToI420(const uint8* src_bgra, int src_stride_bgra, } } } +#elif defined(HAS_BGRATOYROW_NEON) + if (TestCpuFlag(kCpuHasNEON) && width >= 8) { + BGRAToYRow = BGRAToYRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + BGRAToYRow = BGRAToYRow_NEON; + } + } #endif for (int y = 0; y < height - 1; y += 2) { @@ -1043,6 +1025,7 @@ int BGRAToI420(const uint8* src_bgra, int src_stride_bgra, return 0; } +// Convert ABGR to I420. LIBYUV_API int ABGRToI420(const uint8* src_abgr, int src_stride_abgr, uint8* dst_y, int dst_stride_y, @@ -1060,18 +1043,14 @@ int ABGRToI420(const uint8* src_abgr, int src_stride_abgr, src_abgr = src_abgr + (height - 1) * src_stride_abgr; src_stride_abgr = -src_stride_abgr; } - void (*ABGRToYRow)(const uint8* src_abgr, uint8* dst_y, int pix); void (*ABGRToUVRow)(const uint8* src_abgr0, int src_stride_abgr, - uint8* dst_u, uint8* dst_v, int width); - - ABGRToYRow = ABGRToYRow_C; - ABGRToUVRow = ABGRToUVRow_C; + uint8* dst_u, uint8* dst_v, int width) = ABGRToUVRow_C; + void (*ABGRToYRow)(const uint8* src_abgr, uint8* dst_y, int pix) = + ABGRToYRow_C; #if defined(HAS_ABGRTOYROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - if (width > 16) { - ABGRToUVRow = ABGRToUVRow_Any_SSSE3; - ABGRToYRow = ABGRToYRow_Any_SSSE3; - } + if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { + ABGRToUVRow = ABGRToUVRow_Any_SSSE3; + ABGRToYRow = ABGRToYRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { ABGRToUVRow = ABGRToUVRow_Unaligned_SSSE3; ABGRToYRow = ABGRToYRow_Unaligned_SSSE3; @@ -1083,6 +1062,13 @@ int ABGRToI420(const uint8* src_abgr, int src_stride_abgr, } } } +#elif defined(HAS_ABGRTOYROW_NEON) + if (TestCpuFlag(kCpuHasNEON) && width >= 8) { + ABGRToYRow = ABGRToYRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + ABGRToYRow = ABGRToYRow_NEON; + } + } #endif for (int y = 0; y < height - 1; y += 2) { @@ -1101,6 +1087,7 @@ int ABGRToI420(const uint8* src_abgr, int src_stride_abgr, return 0; } +// Convert RGBA to I420. LIBYUV_API int RGBAToI420(const uint8* src_rgba, int src_stride_rgba, uint8* dst_y, int dst_stride_y, @@ -1118,18 +1105,14 @@ int RGBAToI420(const uint8* src_rgba, int src_stride_rgba, src_rgba = src_rgba + (height - 1) * src_stride_rgba; src_stride_rgba = -src_stride_rgba; } - void (*RGBAToYRow)(const uint8* src_rgba, uint8* dst_y, int pix); void (*RGBAToUVRow)(const uint8* src_rgba0, int src_stride_rgba, - uint8* dst_u, uint8* dst_v, int width); - - RGBAToYRow = RGBAToYRow_C; - RGBAToUVRow = RGBAToUVRow_C; + uint8* dst_u, uint8* dst_v, int width) = RGBAToUVRow_C; + void (*RGBAToYRow)(const uint8* src_rgba, uint8* dst_y, int pix) = + RGBAToYRow_C; #if defined(HAS_RGBATOYROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - if (width > 16) { - RGBAToUVRow = RGBAToUVRow_Any_SSSE3; - RGBAToYRow = RGBAToYRow_Any_SSSE3; - } + if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { + RGBAToUVRow = RGBAToUVRow_Any_SSSE3; + RGBAToYRow = RGBAToYRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { RGBAToUVRow = RGBAToUVRow_Unaligned_SSSE3; RGBAToYRow = RGBAToYRow_Unaligned_SSSE3; @@ -1141,6 +1124,13 @@ int RGBAToI420(const uint8* src_rgba, int src_stride_rgba, } } } +#elif defined(HAS_RGBATOYROW_NEON) + if (TestCpuFlag(kCpuHasNEON) && width >= 8) { + RGBAToYRow = RGBAToYRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + RGBAToYRow = RGBAToYRow_NEON; + } + } #endif for (int y = 0; y < height - 1; y += 2) { @@ -1159,18 +1149,17 @@ int RGBAToI420(const uint8* src_rgba, int src_stride_rgba, return 0; } +// Convert RGB24 to I420. LIBYUV_API int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24, uint8* dst_y, int dst_stride_y, uint8* dst_u, int dst_stride_u, uint8* dst_v, int dst_stride_v, int width, int height) { - if (width * 4 > kMaxStride) { // Row buffer is required. + if (!src_rgb24 || !dst_y || !dst_u || !dst_v || + width <= 0 || height == 0 || + width * 4 > kMaxStride) { return -1; - } else if (!src_rgb24 || - !dst_y || !dst_u || !dst_v || - width <= 0 || height == 0) { - return -1; } // Negative height means invert the image. if (height < 0) { @@ -1179,44 +1168,71 @@ int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24, src_stride_rgb24 = -src_stride_rgb24; } SIMD_ALIGNED(uint8 row[kMaxStride * 2]); - void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix); - - RGB24ToARGBRow = RGB24ToARGBRow_C; + void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) = + RGB24ToARGBRow_C; #if defined(HAS_RGB24TOARGBROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && - TestReadSafe(src_rgb24, src_stride_rgb24, width, height, 3, 48)) { - RGB24ToARGBRow = RGB24ToARGBRow_SSSE3; + if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { + RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + RGB24ToARGBRow = RGB24ToARGBRow_SSSE3; + } + } +#elif defined(HAS_RGB24TOARGBROW_NEON) + if (TestCpuFlag(kCpuHasNEON) && width >= 8) { + RGB24ToARGBRow = RGB24ToARGBRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + RGB24ToARGBRow = RGB24ToARGBRow_NEON; + } } #endif - void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix); void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); - - ARGBToYRow = ARGBToYRow_C; - ARGBToUVRow = ARGBToUVRow_C; -#if defined(HAS_ARGBTOYROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - if (width > 16) { - ARGBToUVRow = ARGBToUVRow_Any_SSSE3; - } - ARGBToYRow = ARGBToYRow_Any_SSSE3; + uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C; +#if defined(HAS_ARGBTOUVROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { + ARGBToUVRow = ARGBToUVRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { ARGBToUVRow = ARGBToUVRow_SSSE3; + } + } +#endif + +#if defined(HAS_RGB24TOYROW_NEON) + void (*RGB24ToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = + RGB24ToYRow_C; + if (TestCpuFlag(kCpuHasNEON) && width >= 8) { + RGB24ToYRow = RGB24ToYRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + RGB24ToYRow = RGB24ToYRow_NEON; + } + } +#else + void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = + ARGBToYRow_C; +#if defined(HAS_ARGBTOUVROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { + ARGBToYRow = ARGBToYRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { ARGBToYRow = ARGBToYRow_Unaligned_SSSE3; if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { ARGBToYRow = ARGBToYRow_SSSE3; } } } -#endif +#endif // HAS_ARGBTOUVROW_SSSE3 +#endif // HAS_RGB24TOYROW_NEON for (int y = 0; y < height - 1; y += 2) { RGB24ToARGBRow(src_rgb24, row, width); RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + kMaxStride, width); ARGBToUVRow(row, kMaxStride, dst_u, dst_v, width); +#if defined(HAS_RGB24TOYROW_NEON) + RGB24ToYRow(src_rgb24, dst_y, width); + RGB24ToYRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width); +#else ARGBToYRow(row, dst_y, width); ARGBToYRow(row + kMaxStride, dst_y + dst_stride_y, width); +#endif src_rgb24 += src_stride_rgb24 * 2; dst_y += dst_stride_y * 2; dst_u += dst_stride_u; @@ -1225,23 +1241,27 @@ int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24, if (height & 1) { RGB24ToARGBRow_C(src_rgb24, row, width); ARGBToUVRow(row, 0, dst_u, dst_v, width); +#if defined(HAS_RGB24TOYROW_NEON) + RGB24ToYRow(src_rgb24, dst_y, width); +#else ARGBToYRow(row, dst_y, width); +#endif } return 0; } +// Convert RAW to I420. +// Same as RGB24 but RGB vs BGR LIBYUV_API int RAWToI420(const uint8* src_raw, int src_stride_raw, uint8* dst_y, int dst_stride_y, uint8* dst_u, int dst_stride_u, uint8* dst_v, int dst_stride_v, int width, int height) { - if (width * 4 > kMaxStride) { // Row buffer is required. + if (!src_raw || !dst_y || !dst_u || !dst_v || + width <= 0 || height == 0 || + width * 4 > kMaxStride) { return -1; - } else if (!src_raw || - !dst_y || !dst_u || !dst_v || - width <= 0 || height == 0) { - return -1; } // Negative height means invert the image. if (height < 0) { @@ -1250,44 +1270,71 @@ int RAWToI420(const uint8* src_raw, int src_stride_raw, src_stride_raw = -src_stride_raw; } SIMD_ALIGNED(uint8 row[kMaxStride * 2]); - void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix); - - RAWToARGBRow = RAWToARGBRow_C; + void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) = + RAWToARGBRow_C; #if defined(HAS_RAWTOARGBROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && - TestReadSafe(src_raw, src_stride_raw, width, height, 3, 48)) { - RAWToARGBRow = RAWToARGBRow_SSSE3; + if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { + RAWToARGBRow = RAWToARGBRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + RAWToARGBRow = RAWToARGBRow_SSSE3; + } + } +#elif defined(HAS_RAWTOARGBROW_NEON) + if (TestCpuFlag(kCpuHasNEON) && width >= 8) { + RAWToARGBRow = RAWToARGBRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + RAWToARGBRow = RAWToARGBRow_NEON; + } } #endif - void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix); void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); - - ARGBToYRow = ARGBToYRow_C; - ARGBToUVRow = ARGBToUVRow_C; -#if defined(HAS_ARGBTOYROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - if (width > 16) { - ARGBToUVRow = ARGBToUVRow_Any_SSSE3; - } - ARGBToYRow = ARGBToYRow_Any_SSSE3; + uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C; +#if defined(HAS_ARGBTOUVROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { + ARGBToUVRow = ARGBToUVRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { ARGBToUVRow = ARGBToUVRow_SSSE3; + } + } +#endif + +#if defined(HAS_RAWTOYROW_NEON) + void (*RAWToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = + RAWToYRow_C; + if (TestCpuFlag(kCpuHasNEON) && width >= 8) { + RAWToYRow = RAWToYRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + RAWToYRow = RAWToYRow_NEON; + } + } +#else + void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = + ARGBToYRow_C; +#if defined(HAS_ARGBTOUVROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { + ARGBToYRow = ARGBToYRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { ARGBToYRow = ARGBToYRow_Unaligned_SSSE3; if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { ARGBToYRow = ARGBToYRow_SSSE3; } } } -#endif +#endif // HAS_ARGBTOUVROW_SSSE3 +#endif // HAS_RAWTOYROW_NEON for (int y = 0; y < height - 1; y += 2) { RAWToARGBRow(src_raw, row, width); RAWToARGBRow(src_raw + src_stride_raw, row + kMaxStride, width); ARGBToUVRow(row, kMaxStride, dst_u, dst_v, width); +#if defined(HAS_RAWTOYROW_NEON) + RAWToYRow(src_raw, dst_y, width); + RAWToYRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width); +#else ARGBToYRow(row, dst_y, width); ARGBToYRow(row + kMaxStride, dst_y + dst_stride_y, width); +#endif src_raw += src_stride_raw * 2; dst_y += dst_stride_y * 2; dst_u += dst_stride_u; @@ -1296,22 +1343,25 @@ int RAWToI420(const uint8* src_raw, int src_stride_raw, if (height & 1) { RAWToARGBRow_C(src_raw, row, width); ARGBToUVRow(row, 0, dst_u, dst_v, width); +#if defined(HAS_RAWTOYROW_NEON) + RAWToYRow(src_raw, dst_y, width); +#else ARGBToYRow(row, dst_y, width); +#endif } return 0; } +// Convert RGB565 to I420. LIBYUV_API int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - if (width * 4 > kMaxStride) { // Row buffer is required. - return -1; - } else if (!src_rgb565 || - !dst_y || !dst_u || !dst_v || - width <= 0 || height == 0) { + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height) { + if (!src_rgb565 || !dst_y || !dst_u || !dst_v || + width <= 0 || height == 0 || + width * 4 > kMaxStride) { return -1; } // Negative height means invert the image. @@ -1321,44 +1371,71 @@ int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565, src_stride_rgb565 = -src_stride_rgb565; } SIMD_ALIGNED(uint8 row[kMaxStride * 2]); - void (*RGB565ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix); - - RGB565ToARGBRow = RGB565ToARGBRow_C; + void (*RGB565ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) = + RGB565ToARGBRow_C; #if defined(HAS_RGB565TOARGBROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && - TestReadSafe(src_rgb565, src_stride_rgb565, width, height, 2, 16)) { - RGB565ToARGBRow = RGB565ToARGBRow_SSE2; + if (TestCpuFlag(kCpuHasSSE2) && width >= 8) { + RGB565ToARGBRow = RGB565ToARGBRow_Any_SSE2; + if (IS_ALIGNED(width, 8)) { + RGB565ToARGBRow = RGB565ToARGBRow_SSE2; + } + } +#elif defined(HAS_RGB565TOARGBROW_NEON) + if (TestCpuFlag(kCpuHasNEON) && width >= 8) { + RGB565ToARGBRow = RGB565ToARGBRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + RGB565ToARGBRow = RGB565ToARGBRow_NEON; + } } #endif - void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix); void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); - - ARGBToYRow = ARGBToYRow_C; - ARGBToUVRow = ARGBToUVRow_C; -#if defined(HAS_ARGBTOYROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - if (width > 16) { - ARGBToUVRow = ARGBToUVRow_Any_SSSE3; - } - ARGBToYRow = ARGBToYRow_Any_SSSE3; + uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C; +#if defined(HAS_ARGBTOUVROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { + ARGBToUVRow = ARGBToUVRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { ARGBToUVRow = ARGBToUVRow_SSSE3; + } + } +#endif + +#if defined(HAS_RGB565TOYROW_NEON) + void (*RGB565ToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = + RGB565ToYRow_C; + if (TestCpuFlag(kCpuHasNEON) && width >= 8) { + RGB565ToYRow = RGB565ToYRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + RGB565ToYRow = RGB565ToYRow_NEON; + } + } +#else + void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = + ARGBToYRow_C; +#if defined(HAS_ARGBTOUVROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { + ARGBToYRow = ARGBToYRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { ARGBToYRow = ARGBToYRow_Unaligned_SSSE3; if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { ARGBToYRow = ARGBToYRow_SSSE3; } } } -#endif +#endif // HAS_ARGBTOUVROW_SSSE3 +#endif // HAS_RGB565TOYROW_NEON for (int y = 0; y < height - 1; y += 2) { RGB565ToARGBRow(src_rgb565, row, width); RGB565ToARGBRow(src_rgb565 + src_stride_rgb565, row + kMaxStride, width); ARGBToUVRow(row, kMaxStride, dst_u, dst_v, width); +#if defined(HAS_RGB565TOYROW_NEON) + RGB565ToYRow(src_rgb565, dst_y, width); + RGB565ToYRow(src_rgb565 + src_stride_rgb565, dst_y + dst_stride_y, width); +#else ARGBToYRow(row, dst_y, width); ARGBToYRow(row + kMaxStride, dst_y + dst_stride_y, width); +#endif src_rgb565 += src_stride_rgb565 * 2; dst_y += dst_stride_y * 2; dst_u += dst_stride_u; @@ -1367,7 +1444,11 @@ int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565, if (height & 1) { RGB565ToARGBRow_C(src_rgb565, row, width); ARGBToUVRow(row, 0, dst_u, dst_v, width); +#if defined(HAS_RGB565TOYROW_NEON) + RGB565ToYRow(src_rgb565, dst_y, width); +#else ARGBToYRow(row, dst_y, width); +#endif } return 0; } @@ -1378,12 +1459,10 @@ int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555, uint8* dst_u, int dst_stride_u, uint8* dst_v, int dst_stride_v, int width, int height) { - if (width * 4 > kMaxStride) { // Row buffer is required. + if (!src_argb1555 || !dst_y || !dst_u || !dst_v || + width <= 0 || height == 0 || + width * 4 > kMaxStride) { return -1; - } else if (!src_argb1555 || - !dst_y || !dst_u || !dst_v || - width <= 0 || height == 0) { - return -1; } // Negative height means invert the image. if (height < 0) { @@ -1409,10 +1488,8 @@ int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555, ARGBToYRow = ARGBToYRow_C; ARGBToUVRow = ARGBToUVRow_C; #if defined(HAS_ARGBTOYROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - if (width > 16) { - ARGBToUVRow = ARGBToUVRow_Any_SSSE3; - } + if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { + ARGBToUVRow = ARGBToUVRow_Any_SSSE3; ARGBToYRow = ARGBToYRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { ARGBToUVRow = ARGBToUVRow_SSSE3; @@ -1450,12 +1527,10 @@ int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444, uint8* dst_u, int dst_stride_u, uint8* dst_v, int dst_stride_v, int width, int height) { - if (width * 4 > kMaxStride) { // Row buffer is required. + if (!src_argb4444 || !dst_y || !dst_u || !dst_v || + width <= 0 || height == 0 || + width * 4 > kMaxStride) { return -1; - } else if (!src_argb4444 || - !dst_y || !dst_u || !dst_v || - width <= 0 || height == 0) { - return -1; } // Negative height means invert the image. if (height < 0) { @@ -1481,10 +1556,8 @@ int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444, ARGBToYRow = ARGBToYRow_C; ARGBToUVRow = ARGBToUVRow_C; #if defined(HAS_ARGBTOYROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - if (width > 16) { - ARGBToUVRow = ARGBToUVRow_Any_SSSE3; - } + if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { + ARGBToUVRow = ARGBToUVRow_Any_SSSE3; ARGBToYRow = ARGBToYRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { ARGBToUVRow = ARGBToUVRow_SSSE3; diff --git a/source/convert_argb.cc b/source/convert_argb.cc index 7d6c26205..2c0d2dd4f 100644 --- a/source/convert_argb.cc +++ b/source/convert_argb.cc @@ -405,8 +405,8 @@ int RGBAToARGB(const uint8* src_rgba, int src_stride_rgba, // Convert RGB24 to ARGB. LIBYUV_API int RGB24ToARGB(const uint8* src_rgb24, int src_stride_rgb24, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { + uint8* dst_argb, int dst_stride_argb, + int width, int height) { if (!src_rgb24 || !dst_argb || width <= 0 || height == 0) { return -1; @@ -417,16 +417,22 @@ int RGB24ToARGB(const uint8* src_rgb24, int src_stride_rgb24, src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24; src_stride_rgb24 = -src_stride_rgb24; } - void (*RGB24ToARGBRow)(const uint8* src_rgb24, uint8* dst_argb, int pix) = + void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) = RGB24ToARGBRow_C; #if defined(HAS_RGB24TOARGBROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) && + if (TestCpuFlag(kCpuHasSSSE3) && width >= 16 && IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { - RGB24ToARGBRow = RGB24ToARGBRow_SSSE3; + RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + RGB24ToARGBRow = RGB24ToARGBRow_SSSE3; + } } #elif defined(HAS_RGB24TOARGBROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { - RGB24ToARGBRow = RGB24ToARGBRow_NEON; + if (TestCpuFlag(kCpuHasNEON) && width >= 8) { + RGB24ToARGBRow = RGB24ToARGBRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + RGB24ToARGBRow = RGB24ToARGBRow_NEON; + } } #endif @@ -441,8 +447,8 @@ int RGB24ToARGB(const uint8* src_rgb24, int src_stride_rgb24, // Convert RAW to ARGB. LIBYUV_API int RAWToARGB(const uint8* src_raw, int src_stride_raw, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { + uint8* dst_argb, int dst_stride_argb, + int width, int height) { if (!src_raw || !dst_argb || width <= 0 || height == 0) { return -1; @@ -453,16 +459,22 @@ int RAWToARGB(const uint8* src_raw, int src_stride_raw, src_raw = src_raw + (height - 1) * src_stride_raw; src_stride_raw = -src_stride_raw; } - void (*RAWToARGBRow)(const uint8* src_raw, uint8* dst_argb, int pix) = + void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) = RAWToARGBRow_C; #if defined(HAS_RAWTOARGBROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) && + if (TestCpuFlag(kCpuHasSSSE3) && width >= 16 && IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { - RAWToARGBRow = RAWToARGBRow_SSSE3; + RAWToARGBRow = RAWToARGBRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + RAWToARGBRow = RAWToARGBRow_SSSE3; + } } #elif defined(HAS_RAWTOARGBROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { - RAWToARGBRow = RAWToARGBRow_NEON; + if (TestCpuFlag(kCpuHasNEON) && width >= 8) { + RAWToARGBRow = RAWToARGBRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + RAWToARGBRow = RAWToARGBRow_NEON; + } } #endif @@ -492,10 +504,19 @@ int RGB565ToARGB(const uint8* src_rgb565, int src_stride_rgb565, void (*RGB565ToARGBRow)(const uint8* src_rgb565, uint8* dst_argb, int pix) = RGB565ToARGBRow_C; #if defined(HAS_RGB565TOARGBROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && - IS_ALIGNED(width, 8) && + if (TestCpuFlag(kCpuHasSSE2) && width >= 8 && IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { - RGB565ToARGBRow = RGB565ToARGBRow_SSE2; + RGB565ToARGBRow = RGB565ToARGBRow_Any_SSE2; + if (IS_ALIGNED(width, 8)) { + RGB565ToARGBRow = RGB565ToARGBRow_SSE2; + } + } +#elif defined(HAS_RGB565TOARGBROW_NEON) + if (TestCpuFlag(kCpuHasNEON) && width >= 8) { + RGB565ToARGBRow = RGB565ToARGBRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + RGB565ToARGBRow = RGB565ToARGBRow_NEON; + } } #endif diff --git a/source/convert_from.cc b/source/convert_from.cc index 86d3c387f..ff1fb315f 100644 --- a/source/convert_from.cc +++ b/source/convert_from.cc @@ -265,25 +265,25 @@ LIBYUV_API int I422ToYUY2(const uint8* src_y, int src_stride_y, const uint8* src_u, int src_stride_u, const uint8* src_v, int src_stride_v, - uint8* dst_frame, int dst_stride_frame, + uint8* dst_yuy2, int dst_stride_yuy2, int width, int height) { - if (!src_y || !src_u || !src_v || !dst_frame || + if (!src_y || !src_u || !src_v || !dst_yuy2 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_frame = dst_frame + (height - 1) * dst_stride_frame; - dst_stride_frame = -dst_stride_frame; + dst_yuy2 = dst_yuy2 + (height - 1) * dst_stride_yuy2; + dst_stride_yuy2 = -dst_stride_yuy2; } void (*I422ToYUY2Row)(const uint8* src_y, const uint8* src_u, - const uint8* src_v, uint8* dst_frame, int width) = + const uint8* src_v, uint8* dst_yuy2, int width) = I422ToYUY2Row_C; #if defined(HAS_I422TOYUY2ROW_SSE2) if (TestCpuFlag(kCpuHasSSE2) && width >= 16 && IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) && - IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) { + IS_ALIGNED(dst_yuy2, 16) && IS_ALIGNED(dst_stride_yuy2, 16)) { I422ToYUY2Row = I422ToYUY2Row_Any_SSE2; if (IS_ALIGNED(width, 16)) { I422ToYUY2Row = I422ToYUY2Row_SSE2; @@ -299,11 +299,11 @@ int I422ToYUY2(const uint8* src_y, int src_stride_y, #endif for (int y = 0; y < height; ++y) { - I422ToYUY2Row(src_y, src_u, src_y, dst_frame, width); + I422ToYUY2Row(src_y, src_u, src_v, dst_yuy2, width); src_y += src_stride_y; src_u += src_stride_u; src_v += src_stride_v; - dst_frame += dst_stride_frame; + dst_yuy2 += dst_stride_yuy2; } return 0; } @@ -312,25 +312,25 @@ LIBYUV_API int I420ToYUY2(const uint8* src_y, int src_stride_y, const uint8* src_u, int src_stride_u, const uint8* src_v, int src_stride_v, - uint8* dst_frame, int dst_stride_frame, + uint8* dst_yuy2, int dst_stride_yuy2, int width, int height) { - if (!src_y || !src_u || !src_v || !dst_frame || + if (!src_y || !src_u || !src_v || !dst_yuy2 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_frame = dst_frame + (height - 1) * dst_stride_frame; - dst_stride_frame = -dst_stride_frame; + dst_yuy2 = dst_yuy2 + (height - 1) * dst_stride_yuy2; + dst_stride_yuy2 = -dst_stride_yuy2; } void (*I422ToYUY2Row)(const uint8* src_y, const uint8* src_u, - const uint8* src_v, uint8* dst_frame, int width) = + const uint8* src_v, uint8* dst_yuy2, int width) = I422ToYUY2Row_C; #if defined(HAS_I422TOYUY2ROW_SSE2) if (TestCpuFlag(kCpuHasSSE2) && width >= 16 && IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) && - IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) { + IS_ALIGNED(dst_yuy2, 16) && IS_ALIGNED(dst_stride_yuy2, 16)) { I422ToYUY2Row = I422ToYUY2Row_Any_SSE2; if (IS_ALIGNED(width, 16)) { I422ToYUY2Row = I422ToYUY2Row_SSE2; @@ -346,16 +346,16 @@ int I420ToYUY2(const uint8* src_y, int src_stride_y, #endif for (int y = 0; y < height - 1; y += 2) { - I422ToYUY2Row(src_y, src_u, src_v, dst_frame, width); + I422ToYUY2Row(src_y, src_u, src_v, dst_yuy2, width); I422ToYUY2Row(src_y + src_stride_y, src_u, src_v, - dst_frame + dst_stride_frame, width); + dst_yuy2 + dst_stride_yuy2, width); src_y += src_stride_y * 2; src_u += src_stride_u; src_v += src_stride_v; - dst_frame += dst_stride_frame * 2; + dst_yuy2 += dst_stride_yuy2 * 2; } if (height & 1) { - I422ToYUY2Row(src_y, src_u, src_v, dst_frame, width); + I422ToYUY2Row(src_y, src_u, src_v, dst_yuy2, width); } return 0; } @@ -365,25 +365,25 @@ LIBYUV_API int I422ToUYVY(const uint8* src_y, int src_stride_y, const uint8* src_u, int src_stride_u, const uint8* src_v, int src_stride_v, - uint8* dst_frame, int dst_stride_frame, + uint8* dst_uyvy, int dst_stride_uyvy, int width, int height) { - if (!src_y || !src_u || !src_v || !dst_frame || + if (!src_y || !src_u || !src_v || !dst_uyvy || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_frame = dst_frame + (height - 1) * dst_stride_frame; - dst_stride_frame = -dst_stride_frame; + dst_uyvy = dst_uyvy + (height - 1) * dst_stride_uyvy; + dst_stride_uyvy = -dst_stride_uyvy; } void (*I422ToUYVYRow)(const uint8* src_y, const uint8* src_u, - const uint8* src_v, uint8* dst_frame, int width) = + const uint8* src_v, uint8* dst_uyvy, int width) = I422ToUYVYRow_C; #if defined(HAS_I422TOUYVYROW_SSE2) if (TestCpuFlag(kCpuHasSSE2) && width >= 16 && IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) && - IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) { + IS_ALIGNED(dst_uyvy, 16) && IS_ALIGNED(dst_stride_uyvy, 16)) { I422ToUYVYRow = I422ToUYVYRow_Any_SSE2; if (IS_ALIGNED(width, 16)) { I422ToUYVYRow = I422ToUYVYRow_SSE2; @@ -399,11 +399,11 @@ int I422ToUYVY(const uint8* src_y, int src_stride_y, #endif for (int y = 0; y < height; ++y) { - I422ToUYVYRow(src_y, src_u, src_y, dst_frame, width); + I422ToUYVYRow(src_y, src_u, src_v, dst_uyvy, width); src_y += src_stride_y; src_u += src_stride_u; src_v += src_stride_v; - dst_frame += dst_stride_frame; + dst_uyvy += dst_stride_uyvy; } return 0; } @@ -412,25 +412,25 @@ LIBYUV_API int I420ToUYVY(const uint8* src_y, int src_stride_y, const uint8* src_u, int src_stride_u, const uint8* src_v, int src_stride_v, - uint8* dst_frame, int dst_stride_frame, + uint8* dst_uyvy, int dst_stride_uyvy, int width, int height) { - if (!src_y || !src_u || !src_v || !dst_frame || + if (!src_y || !src_u || !src_v || !dst_uyvy || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_frame = dst_frame + (height - 1) * dst_stride_frame; - dst_stride_frame = -dst_stride_frame; + dst_uyvy = dst_uyvy + (height - 1) * dst_stride_uyvy; + dst_stride_uyvy = -dst_stride_uyvy; } void (*I422ToUYVYRow)(const uint8* src_y, const uint8* src_u, - const uint8* src_v, uint8* dst_frame, int width) = + const uint8* src_v, uint8* dst_uyvy, int width) = I422ToUYVYRow_C; #if defined(HAS_I422TOUYVYROW_SSE2) if (TestCpuFlag(kCpuHasSSE2) && width >= 16 && IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) && - IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) { + IS_ALIGNED(dst_uyvy, 16) && IS_ALIGNED(dst_stride_uyvy, 16)) { I422ToUYVYRow = I422ToUYVYRow_Any_SSE2; if (IS_ALIGNED(width, 16)) { I422ToUYVYRow = I422ToUYVYRow_SSE2; @@ -446,16 +446,16 @@ int I420ToUYVY(const uint8* src_y, int src_stride_y, #endif for (int y = 0; y < height - 1; y += 2) { - I422ToUYVYRow(src_y, src_u, src_v, dst_frame, width); + I422ToUYVYRow(src_y, src_u, src_v, dst_uyvy, width); I422ToUYVYRow(src_y + src_stride_y, src_u, src_v, - dst_frame + dst_stride_frame, width); + dst_uyvy + dst_stride_uyvy, width); src_y += src_stride_y * 2; src_u += src_stride_u; src_v += src_stride_v; - dst_frame += dst_stride_frame * 2; + dst_uyvy += dst_stride_uyvy * 2; } if (height & 1) { - I422ToUYVYRow(src_y, src_u, src_v, dst_frame, width); + I422ToUYVYRow(src_y, src_u, src_v, dst_uyvy, width); } return 0; } @@ -464,35 +464,35 @@ LIBYUV_API int I420ToV210(const uint8* src_y, int src_stride_y, const uint8* src_u, int src_stride_u, const uint8* src_v, int src_stride_v, - uint8* dst_frame, int dst_stride_frame, + uint8* dst_v210, int dst_stride_v210, int width, int height) { if (width * 16 / 6 > kMaxStride || - !src_y || !src_u || !src_v || !dst_frame || + !src_y || !src_u || !src_v || !dst_v210 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_frame = dst_frame + (height - 1) * dst_stride_frame; - dst_stride_frame = -dst_stride_frame; + dst_v210 = dst_v210 + (height - 1) * dst_stride_v210; + dst_stride_v210 = -dst_stride_v210; } SIMD_ALIGNED(uint8 row[kMaxStride]); for (int y = 0; y < height - 1; y += 2) { I422ToUYVYRow_C(src_y, src_u, src_v, row, width); - UYVYToV210Row_C(row, dst_frame, width); + UYVYToV210Row_C(row, dst_v210, width); I422ToUYVYRow_C(src_y + src_stride_y, src_u, src_v, row, width); - UYVYToV210Row_C(row, dst_frame + dst_stride_frame, width); + UYVYToV210Row_C(row, dst_v210 + dst_stride_v210, width); src_y += src_stride_y * 2; src_u += src_stride_u; src_v += src_stride_v; - dst_frame += dst_stride_frame * 2; + dst_v210 += dst_stride_v210 * 2; } if (height & 1) { I422ToUYVYRow_C(src_y, src_u, src_v, row, width); - UYVYToV210Row_C(row, dst_frame, width); + UYVYToV210Row_C(row, dst_v210, width); } return 0; } @@ -521,7 +521,7 @@ int I420ToNV12(const uint8* src_y, int src_stride_y, int halfwidth = (width + 1) >> 1; void (*MergeUV)(const uint8* src_u, const uint8* src_v, uint8* dst_uv, int width) = MergeUV_C; -#if defined(HAS_SPLITUV_SSE2) +#if defined(HAS_MERGEUV_SSE2) if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) { MergeUV = MergeUV_Any_SSE2; if (IS_ALIGNED(halfwidth, 16)) { @@ -534,7 +534,7 @@ int I420ToNV12(const uint8* src_y, int src_stride_y, } } #endif -#if defined(HAS_SPLITUV_AVX2) +#if defined(HAS_MERGEUV_AVX2) if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) { MergeUV = MergeUV_Any_AVX2; if (IS_ALIGNED(halfwidth, 32)) { @@ -547,7 +547,7 @@ int I420ToNV12(const uint8* src_y, int src_stride_y, } } #endif -#if defined(HAS_SPLITUV_NEON) +#if defined(HAS_MERGEUV_NEON) if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) { MergeUV = MergeUV_Any_NEON; if (IS_ALIGNED(halfwidth, 16)) { diff --git a/source/convert_from_argb.cc b/source/convert_from_argb.cc index cd7186592..9dedde006 100644 --- a/source/convert_from_argb.cc +++ b/source/convert_from_argb.cc @@ -21,6 +21,522 @@ namespace libyuv { extern "C" { #endif +// ARGB little endian (bgra in memory) to I444 +LIBYUV_API +int ARGBToI444(const uint8* src_argb, int src_stride_argb, + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height) { + if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { + return -1; + } + if (height < 0) { + height = -height; + src_argb = src_argb + (height - 1) * src_stride_argb; + src_stride_argb = -src_stride_argb; + } + void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = + ARGBToYRow_C; +#if defined(HAS_ARGBTOYROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { + ARGBToYRow = ARGBToYRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_Unaligned_SSSE3; + if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) && + IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { + ARGBToYRow = ARGBToYRow_SSSE3; + } + } + } +#elif defined(HAS_ARGBTOYROW_NEON) + if (TestCpuFlag(kCpuHasNEON) && width >= 8) { + ARGBToYRow = ARGBToYRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + ARGBToYRow = ARGBToYRow_NEON; + } + } +#endif + + for (int y = 0; y < height; ++y) { + ARGBToUV444Row_C(src_argb, dst_u, dst_v, width); + ARGBToYRow(src_argb, dst_y, width); + src_argb += src_stride_argb; + dst_y += dst_stride_y; + dst_u += dst_stride_u; + dst_v += dst_stride_v; + } + return 0; +} + +// ARGB little endian (bgra in memory) to I422 +LIBYUV_API +int ARGBToI422(const uint8* src_argb, int src_stride_argb, + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height) { + if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { + return -1; + } + if (height < 0) { + height = -height; + src_argb = src_argb + (height - 1) * src_stride_argb; + src_stride_argb = -src_stride_argb; + } + void (*ARGBToUV422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v, + int pix) = ARGBToUV422Row_C; +#if defined(HAS_ARGBTOUV422ROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { + ARGBToUV422Row = ARGBToUV422Row_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToUV422Row = ARGBToUV422Row_Unaligned_SSSE3; + if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) && + IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { + ARGBToUV422Row = ARGBToUV422Row_SSSE3; + } + } + } +#endif + + void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = + ARGBToYRow_C; +#if defined(HAS_ARGBTOYROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { + ARGBToYRow = ARGBToYRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_Unaligned_SSSE3; + if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) && + IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { + ARGBToYRow = ARGBToYRow_SSSE3; + } + } + } +#elif defined(HAS_ARGBTOYROW_NEON) + if (TestCpuFlag(kCpuHasNEON) && width >= 8) { + ARGBToYRow = ARGBToYRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + ARGBToYRow = ARGBToYRow_NEON; + } + } +#endif + + for (int y = 0; y < height; ++y) { + ARGBToUV422Row(src_argb, dst_u, dst_v, width); + ARGBToYRow(src_argb, dst_y, width); + src_argb += src_stride_argb; + dst_y += dst_stride_y; + dst_u += dst_stride_u; + dst_v += dst_stride_v; + } + return 0; +} + +// ARGB little endian (bgra in memory) to I411 +LIBYUV_API +int ARGBToI411(const uint8* src_argb, int src_stride_argb, + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height) { + if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { + return -1; + } + if (height < 0) { + height = -height; + src_argb = src_argb + (height - 1) * src_stride_argb; + src_stride_argb = -src_stride_argb; + } + void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = + ARGBToYRow_C; +#if defined(HAS_ARGBTOYROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { + ARGBToYRow = ARGBToYRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_Unaligned_SSSE3; + if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) && + IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { + ARGBToYRow = ARGBToYRow_SSSE3; + } + } + } +#elif defined(HAS_ARGBTOYROW_NEON) + if (TestCpuFlag(kCpuHasNEON) && width >= 8) { + ARGBToYRow = ARGBToYRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + ARGBToYRow = ARGBToYRow_NEON; + } + } +#endif + + for (int y = 0; y < height; ++y) { + ARGBToUV411Row_C(src_argb, dst_u, dst_v, width); + ARGBToYRow(src_argb, dst_y, width); + src_argb += src_stride_argb; + dst_y += dst_stride_y; + dst_u += dst_stride_u; + dst_v += dst_stride_v; + } + return 0; +} + +LIBYUV_API +int ARGBToNV12(const uint8* src_argb, int src_stride_argb, + uint8* dst_y, int dst_stride_y, + uint8* dst_uv, int dst_stride_uv, + int width, int height) { + if (!src_argb || + !dst_y || !dst_uv || + width <= 0 || height == 0 || + width > kMaxStride) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_argb = src_argb + (height - 1) * src_stride_argb; + src_stride_argb = -src_stride_argb; + } + void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, + uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C; + void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = + ARGBToYRow_C; +#if defined(HAS_ARGBTOYROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { + ARGBToUVRow = ARGBToUVRow_Any_SSSE3; + ARGBToYRow = ARGBToYRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToUVRow = ARGBToUVRow_Unaligned_SSSE3; + ARGBToYRow = ARGBToYRow_Unaligned_SSSE3; + if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) { + ARGBToUVRow = ARGBToUVRow_SSSE3; + if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { + ARGBToYRow = ARGBToYRow_SSSE3; + } + } + } + } +#elif defined(HAS_ARGBTOYROW_NEON) + if (TestCpuFlag(kCpuHasNEON) && width >= 8) { + ARGBToYRow = ARGBToYRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + ARGBToYRow = ARGBToYRow_NEON; + } + } +#endif + int halfwidth = (width + 1) >> 1; + void (*MergeUV)(const uint8* src_u, const uint8* src_v, uint8* dst_uv, + int width) = MergeUV_C; +#if defined(HAS_MERGEUV_SSE2) + if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) { + MergeUV = MergeUV_Any_SSE2; + if (IS_ALIGNED(halfwidth, 16)) { + MergeUV = MergeUV_Unaligned_SSE2; + if (IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) { + MergeUV = MergeUV_SSE2; + } + } + } +#endif +#if defined(HAS_MERGEUV_AVX2) + if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) { + MergeUV = MergeUV_Any_AVX2; + if (IS_ALIGNED(halfwidth, 32)) { + MergeUV = MergeUV_Unaligned_AVX2; + if (IS_ALIGNED(dst_uv, 32) && IS_ALIGNED(dst_stride_uv, 32)) { + MergeUV = MergeUV_AVX2; + } + } + } +#endif +#if defined(HAS_MERGEUV_NEON) + if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) { + MergeUV = MergeUV_Any_NEON; + if (IS_ALIGNED(halfwidth, 16)) { + MergeUV = MergeUV_Unaligned_NEON; + if (IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) { + MergeUV = MergeUV_NEON; + } + } + } +#endif + + SIMD_ALIGNED(uint8 row_u[kMaxStride / 2]); + SIMD_ALIGNED(uint8 row_v[kMaxStride / 2]); + + for (int y = 0; y < height - 1; y += 2) { + ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width); + MergeUV(row_u, row_v, dst_uv, halfwidth); + ARGBToYRow(src_argb, dst_y, width); + ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width); + src_argb += src_stride_argb * 2; + dst_y += dst_stride_y * 2; + dst_uv += dst_stride_uv; + } + if (height & 1) { + ARGBToUVRow(src_argb, 0, row_u, row_v, width); + MergeUV(row_u, row_v, dst_uv, halfwidth); + ARGBToYRow(src_argb, dst_y, width); + ARGBToYRow(src_argb + 0, dst_y + dst_stride_y, width); + } + return 0; +} + +// Same as NV12 but U and V swapped. +LIBYUV_API +int ARGBToNV21(const uint8* src_argb, int src_stride_argb, + uint8* dst_y, int dst_stride_y, + uint8* dst_uv, int dst_stride_uv, + int width, int height) { + if (!src_argb || + !dst_y || !dst_uv || + width <= 0 || height == 0 || + width > kMaxStride) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_argb = src_argb + (height - 1) * src_stride_argb; + src_stride_argb = -src_stride_argb; + } + void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, + uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C; + void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = + ARGBToYRow_C; +#if defined(HAS_ARGBTOYROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { + ARGBToUVRow = ARGBToUVRow_Any_SSSE3; + ARGBToYRow = ARGBToYRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToUVRow = ARGBToUVRow_Unaligned_SSSE3; + ARGBToYRow = ARGBToYRow_Unaligned_SSSE3; + if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) { + ARGBToUVRow = ARGBToUVRow_SSSE3; + if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { + ARGBToYRow = ARGBToYRow_SSSE3; + } + } + } + } +#elif defined(HAS_ARGBTOYROW_NEON) + if (TestCpuFlag(kCpuHasNEON) && width >= 8) { + ARGBToYRow = ARGBToYRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + ARGBToYRow = ARGBToYRow_NEON; + } + } +#endif + int halfwidth = (width + 1) >> 1; + void (*MergeUV)(const uint8* src_u, const uint8* src_v, uint8* dst_uv, + int width) = MergeUV_C; +#if defined(HAS_MERGEUV_SSE2) + if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) { + MergeUV = MergeUV_Any_SSE2; + if (IS_ALIGNED(halfwidth, 16)) { + MergeUV = MergeUV_Unaligned_SSE2; + if (IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) { + MergeUV = MergeUV_SSE2; + } + } + } +#endif +#if defined(HAS_MERGEUV_AVX2) + if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) { + MergeUV = MergeUV_Any_AVX2; + if (IS_ALIGNED(halfwidth, 32)) { + MergeUV = MergeUV_Unaligned_AVX2; + if (IS_ALIGNED(dst_uv, 32) && IS_ALIGNED(dst_stride_uv, 32)) { + MergeUV = MergeUV_AVX2; + } + } + } +#endif +#if defined(HAS_MERGEUV_NEON) + if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) { + MergeUV = MergeUV_Any_NEON; + if (IS_ALIGNED(halfwidth, 16)) { + MergeUV = MergeUV_Unaligned_NEON; + if (IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) { + MergeUV = MergeUV_NEON; + } + } + } +#endif + + SIMD_ALIGNED(uint8 row_u[kMaxStride / 2]); + SIMD_ALIGNED(uint8 row_v[kMaxStride / 2]); + + for (int y = 0; y < height - 1; y += 2) { + ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width); + MergeUV(row_v, row_u, dst_uv, halfwidth); + ARGBToYRow(src_argb, dst_y, width); + ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width); + src_argb += src_stride_argb * 2; + dst_y += dst_stride_y * 2; + dst_uv += dst_stride_uv; + } + if (height & 1) { + ARGBToUVRow(src_argb, 0, row_u, row_v, width); + MergeUV(row_v, row_u, dst_uv, halfwidth); + ARGBToYRow(src_argb, dst_y, width); + ARGBToYRow(src_argb + 0, dst_y + dst_stride_y, width); + } + return 0; +} + +// Convert ARGB to YUY2. +LIBYUV_API +int ARGBToYUY2(const uint8* src_argb, int src_stride_argb, + uint8* dst_yuy2, int dst_stride_yuy2, + int width, int height) { + if (!src_argb || !dst_yuy2 || + width <= 0 || height == 0 || + width > kMaxStride) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_yuy2 = dst_yuy2 + (height - 1) * dst_stride_yuy2; + dst_stride_yuy2 = -dst_stride_yuy2; + } + + void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, + uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C; + void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = + ARGBToYRow_C; +#if defined(HAS_ARGBTOYROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { + ARGBToUVRow = ARGBToUVRow_Any_SSSE3; + ARGBToYRow = ARGBToYRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToUVRow = ARGBToUVRow_Unaligned_SSSE3; + ARGBToYRow = ARGBToYRow_Unaligned_SSSE3; + if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) { + ARGBToUVRow = ARGBToUVRow_SSSE3; + ARGBToYRow = ARGBToYRow_SSSE3; + } + } + } +#elif defined(HAS_ARGBTOYROW_NEON) + if (TestCpuFlag(kCpuHasNEON) && width >= 8) { + ARGBToYRow = ARGBToYRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + ARGBToYRow = ARGBToYRow_NEON; + } + } +#endif + + void (*I422ToYUY2Row)(const uint8* src_y, const uint8* src_u, + const uint8* src_v, uint8* dst_yuy2, int width) = + I422ToYUY2Row_C; +#if defined(HAS_I422TOYUY2ROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2) && width >= 16 && + IS_ALIGNED(dst_yuy2, 16) && IS_ALIGNED(dst_stride_yuy2, 16)) { + I422ToYUY2Row = I422ToYUY2Row_Any_SSE2; + if (IS_ALIGNED(width, 16)) { + I422ToYUY2Row = I422ToYUY2Row_SSE2; + } + } +#elif defined(HAS_I422TOYUY2ROW_NEON) + if (TestCpuFlag(kCpuHasNEON) && width >= 16) { + I422ToYUY2Row = I422ToYUY2Row_Any_NEON; + if (IS_ALIGNED(width, 16)) { + I422ToYUY2Row = I422ToYUY2Row_NEON; + } + } +#endif + SIMD_ALIGNED(uint8 row_y[kMaxStride]); + SIMD_ALIGNED(uint8 row_u[kMaxStride / 2]); + SIMD_ALIGNED(uint8 row_v[kMaxStride / 2]); + + for (int y = 0; y < height; ++y) { + ARGBToUVRow(src_argb, 0, row_u, row_v, width); + ARGBToYRow(src_argb, row_y, width); + I422ToYUY2Row(row_y, row_u, row_v, dst_yuy2, width); + src_argb += src_stride_argb; + dst_yuy2 += dst_stride_yuy2; + } + return 0; +} + +// Convert ARGB to UYVY. +LIBYUV_API +int ARGBToUYVY(const uint8* src_argb, int src_stride_argb, + uint8* dst_uyvy, int dst_stride_uyvy, + int width, int height) { + if (!src_argb || !dst_uyvy || + width <= 0 || height == 0 || + width > kMaxStride) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_uyvy = dst_uyvy + (height - 1) * dst_stride_uyvy; + dst_stride_uyvy = -dst_stride_uyvy; + } + + void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, + uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C; + void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = + ARGBToYRow_C; +#if defined(HAS_ARGBTOYROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { + ARGBToUVRow = ARGBToUVRow_Any_SSSE3; + ARGBToYRow = ARGBToYRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToUVRow = ARGBToUVRow_Unaligned_SSSE3; + ARGBToYRow = ARGBToYRow_Unaligned_SSSE3; + if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) { + ARGBToUVRow = ARGBToUVRow_SSSE3; + ARGBToYRow = ARGBToYRow_SSSE3; + } + } + } +#elif defined(HAS_ARGBTOYROW_NEON) + if (TestCpuFlag(kCpuHasNEON) && width >= 8) { + ARGBToYRow = ARGBToYRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + ARGBToYRow = ARGBToYRow_NEON; + } + } +#endif + + void (*I422ToUYVYRow)(const uint8* src_y, const uint8* src_u, + const uint8* src_v, uint8* dst_uyvy, int width) = + I422ToUYVYRow_C; +#if defined(HAS_I422TOUYVYROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2) && width >= 16 && + IS_ALIGNED(dst_uyvy, 16) && IS_ALIGNED(dst_stride_uyvy, 16)) { + I422ToUYVYRow = I422ToUYVYRow_Any_SSE2; + if (IS_ALIGNED(width, 16)) { + I422ToUYVYRow = I422ToUYVYRow_SSE2; + } + } +#elif defined(HAS_I422TOUYVYROW_NEON) + if (TestCpuFlag(kCpuHasNEON) && width >= 16) { + I422ToUYVYRow = I422ToUYVYRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + I422ToUYVYRow = I422ToUYVYRow_NEON; + } + } +#endif + SIMD_ALIGNED(uint8 row_y[kMaxStride]); + SIMD_ALIGNED(uint8 row_u[kMaxStride / 2]); + SIMD_ALIGNED(uint8 row_v[kMaxStride / 2]); + + for (int y = 0; y < height; ++y) { + ARGBToUVRow(src_argb, 0, row_u, row_v, width); + ARGBToYRow(src_argb, row_y, width); + I422ToUYVYRow(row_y, row_u, row_v, dst_uyvy, width); + src_argb += src_stride_argb; + dst_uyvy += dst_stride_uyvy; + } + return 0; +} + // Convert ARGB to I400. LIBYUV_API int ARGBToI400(const uint8* src_argb, int src_stride_argb, @@ -37,10 +553,8 @@ int ARGBToI400(const uint8* src_argb, int src_stride_argb, void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = ARGBToYRow_C; #if defined(HAS_ARGBTOYROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - if (width > 16) { - ARGBToYRow = ARGBToYRow_Any_SSSE3; - } + if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { + ARGBToYRow = ARGBToYRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { ARGBToYRow = ARGBToYRow_Unaligned_SSSE3; if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) && @@ -50,10 +564,8 @@ int ARGBToI400(const uint8* src_argb, int src_stride_argb, } } #elif defined(HAS_ARGBTOYROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - if (width > 8) { - ARGBToYRow = ARGBToYRow_Any_NEON; - } + if (TestCpuFlag(kCpuHasNEON) && width >= 8) { + ARGBToYRow = ARGBToYRow_Any_NEON; if (IS_ALIGNED(width, 8)) { ARGBToYRow = ARGBToYRow_NEON; } @@ -68,64 +580,6 @@ int ARGBToI400(const uint8* src_argb, int src_stride_argb, return 0; } -// ARGB little endian (bgra in memory) to I422 -// same as I420 except UV plane is full height -LIBYUV_API -int ARGBToI422(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { - return -1; - } - if (height < 0) { - height = -height; - src_argb = src_argb + (height - 1) * src_stride_argb; - src_stride_argb = -src_stride_argb; - } - void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C; - void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = - ARGBToYRow_C; -#if defined(HAS_ARGBTOYROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - if (width > 16) { - ARGBToUVRow = ARGBToUVRow_Any_SSSE3; - ARGBToYRow = ARGBToYRow_Any_SSSE3; - } - if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_Unaligned_SSSE3; - ARGBToYRow = ARGBToYRow_Unaligned_SSSE3; - if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) { - ARGBToUVRow = ARGBToUVRow_SSSE3; - if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { - ARGBToYRow = ARGBToYRow_SSSE3; - } - } - } - } -#elif defined(HAS_ARGBTOYROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - if (width > 8) { - ARGBToYRow = ARGBToYRow_Any_NEON; - } - if (IS_ALIGNED(width, 8)) { - ARGBToYRow = ARGBToYRow_NEON; - } - } -#endif - - for (int y = 0; y < height; ++y) { - ARGBToUVRow(src_argb, 0, dst_u, dst_v, width); - ARGBToYRow(src_argb, dst_y, width); - src_argb += src_stride_argb; - dst_y += dst_stride_y; - dst_u += dst_stride_u; - dst_v += dst_stride_v; - } - return 0; -} // Convert ARGB to RGBA. LIBYUV_API int ARGBToRGBA(const uint8* src_argb, int src_stride_argb, diff --git a/source/format_conversion.cc b/source/format_conversion.cc index 95ce4713e..e1ae5c4c4 100644 --- a/source/format_conversion.cc +++ b/source/format_conversion.cc @@ -268,7 +268,7 @@ int BayerToARGB(const uint8* src_bayer, int src_stride_bayer, dst_argb += dst_stride_argb * 2; } if (height & 1) { - BayerRow0(src_bayer, -src_stride_bayer, dst_argb, width); + BayerRow0(src_bayer, src_stride_bayer, dst_argb, width); } return 0; } @@ -305,11 +305,9 @@ int BayerToI420(const uint8* src_bayer, int src_stride_bayer, void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = ARGBToYRow_C; #if defined(HAS_ARGBTOYROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - if (width > 16) { - ARGBToUVRow = ARGBToUVRow_Any_SSSE3; - ARGBToYRow = ARGBToYRow_Any_SSSE3; - } + if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { + ARGBToUVRow = ARGBToUVRow_Any_SSSE3; + ARGBToYRow = ARGBToYRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { ARGBToUVRow = ARGBToUVRow_SSSE3; ARGBToYRow = ARGBToYRow_Unaligned_SSSE3; @@ -319,10 +317,8 @@ int BayerToI420(const uint8* src_bayer, int src_stride_bayer, } } #elif defined(HAS_ARGBTOYROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - if (width > 8) { - ARGBToYRow = ARGBToYRow_Any_NEON; - } + if (TestCpuFlag(kCpuHasNEON) && width >= 8) { + ARGBToYRow = ARGBToYRow_Any_NEON; if (IS_ALIGNED(width, 8)) { ARGBToYRow = ARGBToYRow_NEON; } diff --git a/source/planar_functions.cc b/source/planar_functions.cc index b5d4ffa57..7225c06ac 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -617,7 +617,7 @@ int NV21ToRGB565(const uint8* src_y, int src_stride_y, dst_stride_rgb565 = -dst_stride_rgb565; } void (*NV21ToRGB565Row)(const uint8* y_buf, - const uint8* vu_buf, + const uint8* src_vu, uint8* rgb_buf, int width) = NV21ToRGB565Row_C; #if defined(HAS_NV21TORGB565ROW_SSSE3) diff --git a/source/rotate.cc b/source/rotate.cc index 0601dec07..2d312f886 100644 --- a/source/rotate.cc +++ b/source/rotate.cc @@ -45,7 +45,7 @@ extern "C" { #define HAS_MIRRORROW_NEON void MirrorRow_NEON(const uint8* src, uint8* dst, int width); #define HAS_MIRRORROW_UV_NEON -void MirrorRowUV_NEON(const uint8* src, uint8* dst_a, uint8* dst_b, int width); +void MirrorUVRow_NEON(const uint8* src, uint8* dst_a, uint8* dst_b, int width); #define HAS_TRANSPOSE_WX8_NEON void TransposeWx8_NEON(const uint8* src, int src_stride, uint8* dst, int dst_stride, int width); @@ -1049,21 +1049,21 @@ void RotateUV180(const uint8* src, int src_stride, uint8* dst_b, int dst_stride_b, int width, int height) { void (*MirrorRowUV)(const uint8* src, uint8* dst_u, uint8* dst_v, int width) = - MirrorRowUV_C; + MirrorUVRow_C; #if defined(HAS_MIRRORROW_UV_NEON) if (TestCpuFlag(kCpuHasNEON)) { - MirrorRowUV = MirrorRowUV_NEON; + MirrorRowUV = MirrorUVRow_NEON; } #elif defined(HAS_MIRRORROW_UV_SSSE3) if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) && IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) { - MirrorRowUV = MirrorRowUV_SSSE3; + MirrorRowUV = MirrorUVRow_SSSE3; } -#elif defined(HAS_MIRRORROWUV_MIPS_DSPR2) +#elif defined(HAS_MirrorUVRow_MIPS_DSPR2) if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) { - MirrorRowUV = MirrorRowUV_MIPS_DSPR2; + MirrorRowUV = MirrorUVRow_MIPS_DSPR2; } #endif diff --git a/source/row_any.cc b/source/row_any.cc index 1efc5572f..4dea57ff7 100644 --- a/source/row_any.cc +++ b/source/row_any.cc @@ -121,12 +121,12 @@ NV2NY(NV21ToRGB565Row_Any_NEON, NV21ToRGB565Row_NEON, NV21ToRGB565Row_C, 0, 2) // NEON RGB24 is multiple of 8 pixels, unaligned source and destination. // I400 To ARGB does multiple of 8 pixels with SIMD and remainder with C. #define RGBANY(NAMEANY, ARGBTORGB_SIMD, ARGBTORGB_C, MASK, SBPP, BPP) \ - void NAMEANY(const uint8* argb_buf, \ - uint8* rgb_buf, \ + void NAMEANY(const uint8* src, \ + uint8* dst, \ int width) { \ int n = width & ~MASK; \ - ARGBTORGB_SIMD(argb_buf, rgb_buf, n); \ - ARGBTORGB_C(argb_buf + n * SBPP, rgb_buf + n * BPP, width & MASK); \ + ARGBTORGB_SIMD(src, dst, n); \ + ARGBTORGB_C(src + n * SBPP, dst + n * BPP, width & MASK); \ } #if defined(HAS_ARGBTORGB24ROW_SSSE3) @@ -167,30 +167,37 @@ RGBANY(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, UYVYToARGBRow_C, // RGB/YUV to Y does multiple of 16 with SIMD and last 16 with SIMD. // TODO(fbarchard): Use last 16 method for all unsubsampled conversions. -#define YANY(NAMEANY, ARGBTOY_SIMD, BPP, NUM) \ +#define YANY(NAMEANY, ARGBTOY_SIMD, SBPP, BPP, NUM) \ void NAMEANY(const uint8* src_argb, uint8* dst_y, int width) { \ ARGBTOY_SIMD(src_argb, dst_y, width - NUM); \ - ARGBTOY_SIMD(src_argb + (width - NUM) * BPP, dst_y + (width - NUM), NUM);\ + ARGBTOY_SIMD(src_argb + (width - NUM) * SBPP, \ + dst_y + (width - NUM) * BPP, NUM); \ } #ifdef HAS_ARGBTOYROW_SSSE3 -YANY(ARGBToYRow_Any_SSSE3, ARGBToYRow_Unaligned_SSSE3, 4, 16) -YANY(BGRAToYRow_Any_SSSE3, BGRAToYRow_Unaligned_SSSE3, 4, 16) -YANY(ABGRToYRow_Any_SSSE3, ABGRToYRow_Unaligned_SSSE3, 4, 16) -#endif -#ifdef HAS_RGBATOYROW_SSSE3 -YANY(RGBAToYRow_Any_SSSE3, RGBAToYRow_Unaligned_SSSE3, 4, 16) +YANY(ARGBToYRow_Any_SSSE3, ARGBToYRow_Unaligned_SSSE3, 4, 1, 16) +YANY(BGRAToYRow_Any_SSSE3, BGRAToYRow_Unaligned_SSSE3, 4, 1, 16) +YANY(ABGRToYRow_Any_SSSE3, ABGRToYRow_Unaligned_SSSE3, 4, 1, 16) +YANY(RGBAToYRow_Any_SSSE3, RGBAToYRow_Unaligned_SSSE3, 4, 1, 16) +YANY(YUY2ToYRow_Any_SSE2, YUY2ToYRow_Unaligned_SSE2, 2, 1, 16) +YANY(UYVYToYRow_Any_SSE2, UYVYToYRow_Unaligned_SSE2, 2, 1, 16) +YANY(RGB24ToARGBRow_Any_SSSE3, RGB24ToARGBRow_SSSE3, 3, 4, 16) +YANY(RAWToARGBRow_Any_SSSE3, RAWToARGBRow_SSSE3, 3, 4, 16) +YANY(RGB565ToARGBRow_Any_SSE2, RGB565ToARGBRow_SSE2, 2, 4, 8) #endif #ifdef HAS_ARGBTOYROW_NEON -YANY(ARGBToYRow_Any_NEON, ARGBToYRow_NEON, 4, 8) -#endif -#ifdef HAS_YUY2TOYROW_SSE2 -YANY(YUY2ToYRow_Any_SSE2, YUY2ToYRow_Unaligned_SSE2, 2, 16) -YANY(UYVYToYRow_Any_SSE2, UYVYToYRow_Unaligned_SSE2, 2, 16) -#endif -#ifdef HAS_YUY2TOYROW_NEON -YANY(YUY2ToYRow_Any_NEON, YUY2ToYRow_NEON, 2, 16) -YANY(UYVYToYRow_Any_NEON, UYVYToYRow_NEON, 2, 16) +YANY(ARGBToYRow_Any_NEON, ARGBToYRow_NEON, 4, 1, 8) +YANY(BGRAToYRow_Any_NEON, BGRAToYRow_NEON, 4, 1, 8) +YANY(ABGRToYRow_Any_NEON, ABGRToYRow_NEON, 4, 1, 8) +YANY(RGBAToYRow_Any_NEON, RGBAToYRow_NEON, 4, 1, 8) +YANY(RGB24ToYRow_Any_NEON, RGB24ToYRow_NEON, 3, 1, 8) +YANY(RAWToYRow_Any_NEON, RAWToYRow_NEON, 3, 1, 8) +YANY(RGB565ToYRow_Any_NEON, RGB565ToYRow_NEON, 2, 1, 8) +YANY(YUY2ToYRow_Any_NEON, YUY2ToYRow_NEON, 2, 1, 16) +YANY(UYVYToYRow_Any_NEON, UYVYToYRow_NEON, 2, 1, 16) +YANY(RGB24ToARGBRow_Any_NEON, RGB24ToARGBRow_NEON, 3, 4, 8) +YANY(RAWToARGBRow_Any_NEON, RAWToARGBRow_NEON, 3, 4, 8) +YANY(RGB565ToARGBRow_Any_NEON, RGB565ToARGBRow_NEON, 2, 4, 8) #endif #undef YANY @@ -201,17 +208,15 @@ YANY(UYVYToYRow_Any_NEON, UYVYToYRow_NEON, 2, 16) int n = width & ~15; \ ANYTOUV_SIMD(src_argb, src_stride_argb, dst_u, dst_v, n); \ ANYTOUV_C(src_argb + n * BPP, src_stride_argb, \ - dst_u + (n >> 1), \ - dst_v + (n >> 1), \ - width & 15); \ + dst_u + (n >> 1), \ + dst_v + (n >> 1), \ + width & 15); \ } #ifdef HAS_ARGBTOUVROW_SSSE3 UVANY(ARGBToUVRow_Any_SSSE3, ARGBToUVRow_Unaligned_SSSE3, ARGBToUVRow_C, 4) UVANY(BGRAToUVRow_Any_SSSE3, BGRAToUVRow_Unaligned_SSSE3, BGRAToUVRow_C, 4) UVANY(ABGRToUVRow_Any_SSSE3, ABGRToUVRow_Unaligned_SSSE3, ABGRToUVRow_C, 4) -#endif -#ifdef HAS_RGBATOYROW_SSSE3 UVANY(RGBAToUVRow_Any_SSSE3, RGBAToUVRow_Unaligned_SSSE3, RGBAToUVRow_C, 4) #endif #ifdef HAS_YUY2TOUVROW_SSE2 @@ -230,11 +235,15 @@ UVANY(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, UYVYToUVRow_C, 2) int n = width & ~15; \ ANYTOUV_SIMD(src_uv, dst_u, dst_v, n); \ ANYTOUV_C(src_uv + n * BPP, \ - dst_u + (n >> 1), \ - dst_v + (n >> 1), \ - width & 15); \ + dst_u + (n >> 1), \ + dst_v + (n >> 1), \ + width & 15); \ } +#ifdef HAS_ARGBTOUVROW_SSSE3 +UV422ANY(ARGBToUV422Row_Any_SSSE3, ARGBToUV422Row_Unaligned_SSSE3, + ARGBToUV422Row_C, 4) +#endif #ifdef HAS_YUY2TOUV422ROW_SSE2 UV422ANY(YUY2ToUV422Row_Any_SSE2, YUY2ToUV422Row_Unaligned_SSE2, YUY2ToUV422Row_C, 2) diff --git a/source/row_common.cc b/source/row_common.cc index 5ab935a89..aad97b001 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -95,47 +95,47 @@ void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int width) { } } -void RGB565ToARGBRow_C(const uint8* src_rgb, uint8* dst_argb, int width) { +void RGB565ToARGBRow_C(const uint8* src_rgb565, uint8* dst_argb, int width) { for (int x = 0; x < width; ++x) { - uint8 b = src_rgb[0] & 0x1f; - uint8 g = (src_rgb[0] >> 5) | ((src_rgb[1] & 0x07) << 3); - uint8 r = src_rgb[1] >> 3; + uint8 b = src_rgb565[0] & 0x1f; + uint8 g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3); + uint8 r = src_rgb565[1] >> 3; dst_argb[0] = (b << 3) | (b >> 2); dst_argb[1] = (g << 2) | (g >> 4); dst_argb[2] = (r << 3) | (r >> 2); dst_argb[3] = 255u; dst_argb += 4; - src_rgb += 2; + src_rgb565 += 2; } } -void ARGB1555ToARGBRow_C(const uint8* src_rgb, uint8* dst_argb, int width) { +void ARGB1555ToARGBRow_C(const uint8* src_argb1555, uint8* dst_argb, int width) { for (int x = 0; x < width; ++x) { - uint8 b = src_rgb[0] & 0x1f; - uint8 g = (src_rgb[0] >> 5) | ((src_rgb[1] & 0x03) << 3); - uint8 r = (src_rgb[1] & 0x7c) >> 2; - uint8 a = src_rgb[1] >> 7; + uint8 b = src_argb1555[0] & 0x1f; + uint8 g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3); + uint8 r = (src_argb1555[1] & 0x7c) >> 2; + uint8 a = src_argb1555[1] >> 7; dst_argb[0] = (b << 3) | (b >> 2); dst_argb[1] = (g << 3) | (g >> 2); dst_argb[2] = (r << 3) | (r >> 2); dst_argb[3] = -a; dst_argb += 4; - src_rgb += 2; + src_argb1555 += 2; } } -void ARGB4444ToARGBRow_C(const uint8* src_rgb, uint8* dst_argb, int width) { +void ARGB4444ToARGBRow_C(const uint8* src_argb4444, uint8* dst_argb, int width) { for (int x = 0; x < width; ++x) { - uint8 b = src_rgb[0] & 0x0f; - uint8 g = src_rgb[0] >> 4; - uint8 r = src_rgb[1] & 0x0f; - uint8 a = src_rgb[1] >> 4; + uint8 b = src_argb4444[0] & 0x0f; + uint8 g = src_argb4444[0] >> 4; + uint8 r = src_argb4444[1] & 0x0f; + uint8 a = src_argb4444[1] >> 4; dst_argb[0] = (b << 4) | b; dst_argb[1] = (g << 4) | g; dst_argb[2] = (r << 4) | r; dst_argb[3] = (a << 4) | a; dst_argb += 4; - src_rgb += 2; + src_argb4444 += 2; } } @@ -265,11 +265,11 @@ static __inline int RGBToV(uint8 r, uint8 g, uint8 b) { return ((112 * r - 94 * g - 18 * b + 128) >> 8) + 128; } -#define MAKEROWY(NAME, R, G, B) \ +#define MAKEROWY(NAME, R, G, B, BPP) \ void NAME ## ToYRow_C(const uint8* src_argb0, uint8* dst_y, int width) { \ for (int x = 0; x < width; ++x) { \ dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]); \ - src_argb0 += 4; \ + src_argb0 += BPP; \ dst_y += 1; \ } \ } \ @@ -277,16 +277,16 @@ void NAME ## ToUVRow_C(const uint8* src_rgb0, int src_stride_rgb, \ uint8* dst_u, uint8* dst_v, int width) { \ const uint8* src_rgb1 = src_rgb0 + src_stride_rgb; \ for (int x = 0; x < width - 1; x += 2) { \ - uint8 ab = (src_rgb0[B] + src_rgb0[B + 4] + \ - src_rgb1[B] + src_rgb1[B + 4]) >> 2; \ - uint8 ag = (src_rgb0[G] + src_rgb0[G + 4] + \ - src_rgb1[G] + src_rgb1[G + 4]) >> 2; \ - uint8 ar = (src_rgb0[R] + src_rgb0[R + 4] + \ - src_rgb1[R] + src_rgb1[R + 4]) >> 2; \ + uint8 ab = (src_rgb0[B] + src_rgb0[B + BPP] + \ + src_rgb1[B] + src_rgb1[B + BPP]) >> 2; \ + uint8 ag = (src_rgb0[G] + src_rgb0[G + BPP] + \ + src_rgb1[G] + src_rgb1[G + BPP]) >> 2; \ + uint8 ar = (src_rgb0[R] + src_rgb0[R + BPP] + \ + src_rgb1[R] + src_rgb1[R + BPP]) >> 2; \ dst_u[0] = RGBToU(ar, ag, ab); \ dst_v[0] = RGBToV(ar, ag, ab); \ - src_rgb0 += 8; \ - src_rgb1 += 8; \ + src_rgb0 += BPP * 2; \ + src_rgb1 += BPP * 2; \ dst_u += 1; \ dst_v += 1; \ } \ @@ -299,10 +299,95 @@ void NAME ## ToUVRow_C(const uint8* src_rgb0, int src_stride_rgb, \ } \ } -MAKEROWY(ARGB, 2, 1, 0) -MAKEROWY(BGRA, 1, 2, 3) -MAKEROWY(ABGR, 0, 1, 2) -MAKEROWY(RGBA, 3, 2, 1) +MAKEROWY(ARGB, 2, 1, 0, 4) +MAKEROWY(BGRA, 1, 2, 3, 4) +MAKEROWY(ABGR, 0, 1, 2, 4) +MAKEROWY(RGBA, 3, 2, 1, 4) +MAKEROWY(RGB24, 2, 1, 0, 3) +MAKEROWY(RAW, 0, 1, 2, 3) +#undef MAKEROWY + +void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int width) { + for (int x = 0; x < width; ++x) { + uint8 b = src_rgb565[0] & 0x1f; + uint8 g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3); + uint8 r = src_rgb565[1] >> 3; + b = (b << 3) | (b >> 2); + g = (g << 2) | (g >> 4); + r = (r << 3) | (r >> 2); + dst_y[0] = RGBToY(r, g, b); + src_rgb565 += 2; + dst_y += 1; + } +} + +void ARGBToUV444Row_C(const uint8* src_argb, + uint8* dst_u, uint8* dst_v, int width) { + for (int x = 0; x < width; ++x) { + uint8 ab = src_argb[0]; + uint8 ag = src_argb[1]; + uint8 ar = src_argb[2]; + dst_u[0] = RGBToU(ar, ag, ab); + dst_v[0] = RGBToV(ar, ag, ab); + src_argb += 4; + dst_u += 1; + dst_v += 1; + } +} + +void ARGBToUV422Row_C(const uint8* src_argb, + uint8* dst_u, uint8* dst_v, int width) { + for (int x = 0; x < width - 1; x += 2) { + uint8 ab = (src_argb[0] + src_argb[4]) >> 1; + uint8 ag = (src_argb[1] + src_argb[5]) >> 1; + uint8 ar = (src_argb[2] + src_argb[6]) >> 1; + dst_u[0] = RGBToU(ar, ag, ab); + dst_v[0] = RGBToV(ar, ag, ab); + src_argb += 8; + dst_u += 1; + dst_v += 1; + } + if ((width & 3) == 1) { + uint8 ab = src_argb[0]; + uint8 ag = src_argb[1]; + uint8 ar = src_argb[2]; + dst_u[0] = RGBToU(ar, ag, ab); + dst_v[0] = RGBToV(ar, ag, ab); + } +} + +void ARGBToUV411Row_C(const uint8* src_argb, + uint8* dst_u, uint8* dst_v, int width) { + for (int x = 0; x < width - 3; x += 4) { + uint8 ab = (src_argb[0] + src_argb[4] + src_argb[8] + src_argb[12]) >> 2; + uint8 ag = (src_argb[1] + src_argb[5] + src_argb[9] + src_argb[13]) >> 2; + uint8 ar = (src_argb[2] + src_argb[6] + src_argb[10] + src_argb[14]) >> 2; + dst_u[0] = RGBToU(ar, ag, ab); + dst_v[0] = RGBToV(ar, ag, ab); + src_argb += 16; + dst_u += 1; + dst_v += 1; + } + if ((width & 3) == 3) { + uint8 ab = (src_argb[0] + src_argb[4] + src_argb[8]) / 3; + uint8 ag = (src_argb[1] + src_argb[5] + src_argb[9]) / 3; + uint8 ar = (src_argb[2] + src_argb[6] + src_argb[10]) / 3; + dst_u[0] = RGBToU(ar, ag, ab); + dst_v[0] = RGBToV(ar, ag, ab); + } else if ((width & 3) == 2) { + uint8 ab = (src_argb[0] + src_argb[4]) >> 1; + uint8 ag = (src_argb[1] + src_argb[5]) >> 1; + uint8 ar = (src_argb[2] + src_argb[6]) >> 1; + dst_u[0] = RGBToU(ar, ag, ab); + dst_v[0] = RGBToV(ar, ag, ab); + } else if ((width & 3) == 1) { + uint8 ab = src_argb[0]; + uint8 ag = src_argb[1]; + uint8 ar = src_argb[2]; + dst_u[0] = RGBToU(ar, ag, ab); + dst_v[0] = RGBToV(ar, ag, ab); + } +} // http://en.wikipedia.org/wiki/Grayscale. // 0.11 * B + 0.59 * G + 0.30 * R @@ -470,104 +555,104 @@ static __inline void YuvPixel2(uint8 y, uint8 u, uint8 v, #if defined(__ARM_NEON__) // C mimic assembly. // TODO(fbarchard): Remove subsampling from Neon. -void I444ToARGBRow_C(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, +void I444ToARGBRow_C(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, uint8* rgb_buf, int width) { for (int x = 0; x < width - 1; x += 2) { - uint8 u = (u_buf[0] + u_buf[1] + 1) >> 1; - uint8 v = (v_buf[0] + v_buf[1] + 1) >> 1; - YuvPixel(y_buf[0], u, v, rgb_buf + 0, 24, 16, 8, 0); - YuvPixel(y_buf[1], u, v, rgb_buf + 4, 24, 16, 8, 0); - y_buf += 2; - u_buf += 2; - v_buf += 2; + uint8 u = (src_u[0] + src_u[1] + 1) >> 1; + uint8 v = (src_v[0] + src_v[1] + 1) >> 1; + YuvPixel(src_y[0], u, v, rgb_buf + 0, 24, 16, 8, 0); + YuvPixel(src_y[1], u, v, rgb_buf + 4, 24, 16, 8, 0); + src_y += 2; + src_u += 2; + src_v += 2; rgb_buf += 8; // Advance 2 pixels. } if (width & 1) { - YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 16, 8, 0); + YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, 24, 16, 8, 0); } } #else -void I444ToARGBRow_C(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, +void I444ToARGBRow_C(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, uint8* rgb_buf, int width) { for (int x = 0; x < width; ++x) { - YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf, 24, 16, 8, 0); - y_buf += 1; - u_buf += 1; - v_buf += 1; + YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf, 24, 16, 8, 0); + src_y += 1; + src_u += 1; + src_v += 1; rgb_buf += 4; // Advance 1 pixel. } } #endif // Also used for 420 -void I422ToARGBRow_C(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, +void I422ToARGBRow_C(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, uint8* rgb_buf, int width) { for (int x = 0; x < width - 1; x += 2) { - YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 16, 8, 0); - YuvPixel(y_buf[1], u_buf[0], v_buf[0], rgb_buf + 4, 24, 16, 8, 0); - y_buf += 2; - u_buf += 1; - v_buf += 1; + YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, 24, 16, 8, 0); + YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, 24, 16, 8, 0); + src_y += 2; + src_u += 1; + src_v += 1; rgb_buf += 8; // Advance 2 pixels. } if (width & 1) { - YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 16, 8, 0); + YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, 24, 16, 8, 0); } } -void I422ToRGB24Row_C(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, +void I422ToRGB24Row_C(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, uint8* rgb_buf, int width) { for (int x = 0; x < width - 1; x += 2) { - YuvPixel2(y_buf[0], u_buf[0], v_buf[0], + YuvPixel2(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); - YuvPixel2(y_buf[1], u_buf[0], v_buf[0], + YuvPixel2(src_y[1], src_u[0], src_v[0], rgb_buf + 3, rgb_buf + 4, rgb_buf + 5); - y_buf += 2; - u_buf += 1; - v_buf += 1; + src_y += 2; + src_u += 1; + src_v += 1; rgb_buf += 6; // Advance 2 pixels. } if (width & 1) { - YuvPixel2(y_buf[0], u_buf[0], v_buf[0], + YuvPixel2(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); } } -void I422ToRAWRow_C(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, +void I422ToRAWRow_C(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, uint8* rgb_buf, int width) { for (int x = 0; x < width - 1; x += 2) { - YuvPixel2(y_buf[0], u_buf[0], v_buf[0], + YuvPixel2(src_y[0], src_u[0], src_v[0], rgb_buf + 2, rgb_buf + 1, rgb_buf + 0); - YuvPixel2(y_buf[1], u_buf[0], v_buf[0], + YuvPixel2(src_y[1], src_u[0], src_v[0], rgb_buf + 5, rgb_buf + 4, rgb_buf + 3); - y_buf += 2; - u_buf += 1; - v_buf += 1; + src_y += 2; + src_u += 1; + src_v += 1; rgb_buf += 6; // Advance 2 pixels. } if (width & 1) { - YuvPixel2(y_buf[0], u_buf[0], v_buf[0], + YuvPixel2(src_y[0], src_u[0], src_v[0], rgb_buf + 2, rgb_buf + 1, rgb_buf + 0); } } -void I422ToARGB4444Row_C(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, +void I422ToARGB4444Row_C(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, uint8* dst_argb4444, int width) { uint8 b0; @@ -577,8 +662,8 @@ void I422ToARGB4444Row_C(const uint8* y_buf, uint8 g1; uint8 r1; for (int x = 0; x < width - 1; x += 2) { - YuvPixel2(y_buf[0], u_buf[0], v_buf[0], &b0, &g0, &r0); - YuvPixel2(y_buf[1], u_buf[0], v_buf[0], &b1, &g1, &r1); + YuvPixel2(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0); + YuvPixel2(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1); b0 = b0 >> 4; g0 = g0 >> 4; r0 = r0 >> 4; @@ -587,13 +672,13 @@ void I422ToARGB4444Row_C(const uint8* y_buf, r1 = r1 >> 4; *reinterpret_cast(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | (b1 << 16) | (g1 << 20) | (r1 << 24) | 0xf000f000; - y_buf += 2; - u_buf += 1; - v_buf += 1; + src_y += 2; + src_u += 1; + src_v += 1; dst_argb4444 += 4; // Advance 2 pixels. } if (width & 1) { - YuvPixel2(y_buf[0], u_buf[0], v_buf[0], &b0, &g0, &r0); + YuvPixel2(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0); b0 = b0 >> 4; g0 = g0 >> 4; r0 = r0 >> 4; @@ -602,9 +687,9 @@ void I422ToARGB4444Row_C(const uint8* y_buf, } } -void I422ToARGB1555Row_C(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, +void I422ToARGB1555Row_C(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, uint8* dst_argb1555, int width) { uint8 b0; @@ -614,8 +699,8 @@ void I422ToARGB1555Row_C(const uint8* y_buf, uint8 g1; uint8 r1; for (int x = 0; x < width - 1; x += 2) { - YuvPixel2(y_buf[0], u_buf[0], v_buf[0], &b0, &g0, &r0); - YuvPixel2(y_buf[1], u_buf[0], v_buf[0], &b1, &g1, &r1); + YuvPixel2(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0); + YuvPixel2(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1); b0 = b0 >> 3; g0 = g0 >> 3; r0 = r0 >> 3; @@ -624,13 +709,13 @@ void I422ToARGB1555Row_C(const uint8* y_buf, r1 = r1 >> 3; *reinterpret_cast(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) | (b1 << 16) | (g1 << 21) | (r1 << 26) | 0x80008000; - y_buf += 2; - u_buf += 1; - v_buf += 1; + src_y += 2; + src_u += 1; + src_v += 1; dst_argb1555 += 4; // Advance 2 pixels. } if (width & 1) { - YuvPixel2(y_buf[0], u_buf[0], v_buf[0], &b0, &g0, &r0); + YuvPixel2(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0); b0 = b0 >> 3; g0 = g0 >> 3; r0 = r0 >> 3; @@ -639,9 +724,9 @@ void I422ToARGB1555Row_C(const uint8* y_buf, } } -void I422ToRGB565Row_C(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, +void I422ToRGB565Row_C(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, uint8* dst_rgb565, int width) { uint8 b0; @@ -651,8 +736,8 @@ void I422ToRGB565Row_C(const uint8* y_buf, uint8 g1; uint8 r1; for (int x = 0; x < width - 1; x += 2) { - YuvPixel2(y_buf[0], u_buf[0], v_buf[0], &b0, &g0, &r0); - YuvPixel2(y_buf[1], u_buf[0], v_buf[0], &b1, &g1, &r1); + YuvPixel2(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0); + YuvPixel2(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1); b0 = b0 >> 3; g0 = g0 >> 2; r0 = r0 >> 3; @@ -661,13 +746,13 @@ void I422ToRGB565Row_C(const uint8* y_buf, r1 = r1 >> 3; *reinterpret_cast(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) | (r1 << 27); - y_buf += 2; - u_buf += 1; - v_buf += 1; + src_y += 2; + src_u += 1; + src_v += 1; dst_rgb565 += 4; // Advance 2 pixels. } if (width & 1) { - YuvPixel2(y_buf[0], u_buf[0], v_buf[0], &b0, &g0, &r0); + YuvPixel2(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0); b0 = b0 >> 3; g0 = g0 >> 2; r0 = r0 >> 3; @@ -675,66 +760,66 @@ void I422ToRGB565Row_C(const uint8* y_buf, } } -void I411ToARGBRow_C(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, +void I411ToARGBRow_C(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, uint8* rgb_buf, int width) { for (int x = 0; x < width - 3; x += 4) { - YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 16, 8, 0); - YuvPixel(y_buf[1], u_buf[0], v_buf[0], rgb_buf + 4, 24, 16, 8, 0); - YuvPixel(y_buf[2], u_buf[0], v_buf[0], rgb_buf + 8, 24, 16, 8, 0); - YuvPixel(y_buf[3], u_buf[0], v_buf[0], rgb_buf + 12, 24, 16, 8, 0); - y_buf += 4; - u_buf += 1; - v_buf += 1; + YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, 24, 16, 8, 0); + YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, 24, 16, 8, 0); + YuvPixel(src_y[2], src_u[0], src_v[0], rgb_buf + 8, 24, 16, 8, 0); + YuvPixel(src_y[3], src_u[0], src_v[0], rgb_buf + 12, 24, 16, 8, 0); + src_y += 4; + src_u += 1; + src_v += 1; rgb_buf += 16; // Advance 4 pixels. } if (width & 2) { - YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 16, 8, 0); - YuvPixel(y_buf[1], u_buf[0], v_buf[0], rgb_buf + 4, 24, 16, 8, 0); - y_buf += 2; + YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, 24, 16, 8, 0); + YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, 24, 16, 8, 0); + src_y += 2; rgb_buf += 8; // Advance 2 pixels. } if (width & 1) { - YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 16, 8, 0); + YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, 24, 16, 8, 0); } } -void NV12ToARGBRow_C(const uint8* y_buf, - const uint8* uv_buf, +void NV12ToARGBRow_C(const uint8* src_y, + const uint8* usrc_v, uint8* rgb_buf, int width) { for (int x = 0; x < width - 1; x += 2) { - YuvPixel(y_buf[0], uv_buf[0], uv_buf[1], rgb_buf + 0, 24, 16, 8, 0); - YuvPixel(y_buf[1], uv_buf[0], uv_buf[1], rgb_buf + 4, 24, 16, 8, 0); - y_buf += 2; - uv_buf += 2; + YuvPixel(src_y[0], usrc_v[0], usrc_v[1], rgb_buf + 0, 24, 16, 8, 0); + YuvPixel(src_y[1], usrc_v[0], usrc_v[1], rgb_buf + 4, 24, 16, 8, 0); + src_y += 2; + usrc_v += 2; rgb_buf += 8; // Advance 2 pixels. } if (width & 1) { - YuvPixel(y_buf[0], uv_buf[0], uv_buf[1], rgb_buf + 0, 24, 16, 8, 0); + YuvPixel(src_y[0], usrc_v[0], usrc_v[1], rgb_buf + 0, 24, 16, 8, 0); } } -void NV21ToARGBRow_C(const uint8* y_buf, - const uint8* vu_buf, +void NV21ToARGBRow_C(const uint8* src_y, + const uint8* src_vu, uint8* rgb_buf, int width) { for (int x = 0; x < width - 1; x += 2) { - YuvPixel(y_buf[0], vu_buf[1], vu_buf[0], rgb_buf + 0, 24, 16, 8, 0); - YuvPixel(y_buf[1], vu_buf[1], vu_buf[0], rgb_buf + 4, 24, 16, 8, 0); - y_buf += 2; - vu_buf += 2; + YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, 24, 16, 8, 0); + YuvPixel(src_y[1], src_vu[1], src_vu[0], rgb_buf + 4, 24, 16, 8, 0); + src_y += 2; + src_vu += 2; rgb_buf += 8; // Advance 2 pixels. } if (width & 1) { - YuvPixel(y_buf[0], vu_buf[1], vu_buf[0], rgb_buf + 0, 24, 16, 8, 0); + YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, 24, 16, 8, 0); } } -void NV12ToRGB565Row_C(const uint8* y_buf, - const uint8* uv_buf, +void NV12ToRGB565Row_C(const uint8* src_y, + const uint8* usrc_v, uint8* dst_rgb565, int width) { uint8 b0; @@ -744,8 +829,8 @@ void NV12ToRGB565Row_C(const uint8* y_buf, uint8 g1; uint8 r1; for (int x = 0; x < width - 1; x += 2) { - YuvPixel2(y_buf[0], uv_buf[0], uv_buf[1], &b0, &g0, &r0); - YuvPixel2(y_buf[1], uv_buf[0], uv_buf[1], &b1, &g1, &r1); + YuvPixel2(src_y[0], usrc_v[0], usrc_v[1], &b0, &g0, &r0); + YuvPixel2(src_y[1], usrc_v[0], usrc_v[1], &b1, &g1, &r1); b0 = b0 >> 3; g0 = g0 >> 2; r0 = r0 >> 3; @@ -754,12 +839,12 @@ void NV12ToRGB565Row_C(const uint8* y_buf, r1 = r1 >> 3; *reinterpret_cast(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) | (r1 << 27); - y_buf += 2; - uv_buf += 2; + src_y += 2; + usrc_v += 2; dst_rgb565 += 4; // Advance 2 pixels. } if (width & 1) { - YuvPixel2(y_buf[0], uv_buf[0], uv_buf[1], &b0, &g0, &r0); + YuvPixel2(src_y[0], usrc_v[0], usrc_v[1], &b0, &g0, &r0); b0 = b0 >> 3; g0 = g0 >> 2; r0 = r0 >> 3; @@ -767,8 +852,8 @@ void NV12ToRGB565Row_C(const uint8* y_buf, } } -void NV21ToRGB565Row_C(const uint8* y_buf, - const uint8* vu_buf, +void NV21ToRGB565Row_C(const uint8* src_y, + const uint8* vsrc_u, uint8* dst_rgb565, int width) { uint8 b0; @@ -778,8 +863,8 @@ void NV21ToRGB565Row_C(const uint8* y_buf, uint8 g1; uint8 r1; for (int x = 0; x < width - 1; x += 2) { - YuvPixel2(y_buf[0], vu_buf[1], vu_buf[0], &b0, &g0, &r0); - YuvPixel2(y_buf[1], vu_buf[1], vu_buf[0], &b1, &g1, &r1); + YuvPixel2(src_y[0], vsrc_u[1], vsrc_u[0], &b0, &g0, &r0); + YuvPixel2(src_y[1], vsrc_u[1], vsrc_u[0], &b1, &g1, &r1); b0 = b0 >> 3; g0 = g0 >> 2; r0 = r0 >> 3; @@ -788,12 +873,12 @@ void NV21ToRGB565Row_C(const uint8* y_buf, r1 = r1 >> 3; *reinterpret_cast(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) | (r1 << 27); - y_buf += 2; - vu_buf += 2; + src_y += 2; + vsrc_u += 2; dst_rgb565 += 4; // Advance 2 pixels. } if (width & 1) { - YuvPixel2(y_buf[0], vu_buf[1], vu_buf[0], &b0, &g0, &r0); + YuvPixel2(src_y[0], vsrc_u[1], vsrc_u[0], &b0, &g0, &r0); b0 = b0 >> 3; g0 = g0 >> 2; r0 = r0 >> 3; @@ -801,92 +886,92 @@ void NV21ToRGB565Row_C(const uint8* y_buf, } } -void YUY2ToARGBRow_C(const uint8* yuy2_buf, +void YUY2ToARGBRow_C(const uint8* src_yuy2, uint8* rgb_buf, int width) { for (int x = 0; x < width - 1; x += 2) { - YuvPixel(yuy2_buf[0], yuy2_buf[1], yuy2_buf[3], rgb_buf + 0, 24, 16, 8, 0); - YuvPixel(yuy2_buf[2], yuy2_buf[1], yuy2_buf[3], rgb_buf + 4, 24, 16, 8, 0); - yuy2_buf += 4; + YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, 24, 16, 8, 0); + YuvPixel(src_yuy2[2], src_yuy2[1], src_yuy2[3], rgb_buf + 4, 24, 16, 8, 0); + src_yuy2 += 4; rgb_buf += 8; // Advance 2 pixels. } if (width & 1) { - YuvPixel(yuy2_buf[0], yuy2_buf[1], yuy2_buf[3], rgb_buf + 0, 24, 16, 8, 0); + YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, 24, 16, 8, 0); } } -void UYVYToARGBRow_C(const uint8* uyvy_buf, +void UYVYToARGBRow_C(const uint8* src_uyvy, uint8* rgb_buf, int width) { for (int x = 0; x < width - 1; x += 2) { - YuvPixel(uyvy_buf[1], uyvy_buf[0], uyvy_buf[2], rgb_buf + 0, 24, 16, 8, 0); - YuvPixel(uyvy_buf[3], uyvy_buf[0], uyvy_buf[2], rgb_buf + 4, 24, 16, 8, 0); - uyvy_buf += 4; + YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, 24, 16, 8, 0); + YuvPixel(src_uyvy[3], src_uyvy[0], src_uyvy[2], rgb_buf + 4, 24, 16, 8, 0); + src_uyvy += 4; rgb_buf += 8; // Advance 2 pixels. } if (width & 1) { - YuvPixel(uyvy_buf[1], uyvy_buf[0], uyvy_buf[2], rgb_buf + 0, 24, 16, 8, 0); + YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, 24, 16, 8, 0); } } -void I422ToBGRARow_C(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, +void I422ToBGRARow_C(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, uint8* rgb_buf, int width) { for (int x = 0; x < width - 1; x += 2) { - YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 0, 8, 16, 24); - YuvPixel(y_buf[1], u_buf[0], v_buf[0], rgb_buf + 4, 0, 8, 16, 24); - y_buf += 2; - u_buf += 1; - v_buf += 1; + YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, 0, 8, 16, 24); + YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, 0, 8, 16, 24); + src_y += 2; + src_u += 1; + src_v += 1; rgb_buf += 8; // Advance 2 pixels. } if (width & 1) { - YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf, 0, 8, 16, 24); + YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf, 0, 8, 16, 24); } } -void I422ToABGRRow_C(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, +void I422ToABGRRow_C(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, uint8* rgb_buf, int width) { for (int x = 0; x < width - 1; x += 2) { - YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 0, 8, 16); - YuvPixel(y_buf[1], u_buf[0], v_buf[0], rgb_buf + 4, 24, 0, 8, 16); - y_buf += 2; - u_buf += 1; - v_buf += 1; + YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, 24, 0, 8, 16); + YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, 24, 0, 8, 16); + src_y += 2; + src_u += 1; + src_v += 1; rgb_buf += 8; // Advance 2 pixels. } if (width & 1) { - YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 0, 8, 16); + YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, 24, 0, 8, 16); } } -void I422ToRGBARow_C(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, +void I422ToRGBARow_C(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, uint8* rgb_buf, int width) { for (int x = 0; x < width - 1; x += 2) { - YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 0, 24, 16, 8); - YuvPixel(y_buf[1], u_buf[0], v_buf[0], rgb_buf + 4, 0, 24, 16, 8); - y_buf += 2; - u_buf += 1; - v_buf += 1; + YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, 0, 24, 16, 8); + YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, 0, 24, 16, 8); + src_y += 2; + src_u += 1; + src_v += 1; rgb_buf += 8; // Advance 2 pixels. } if (width & 1) { - YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 0, 24, 16, 8); + YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, 0, 24, 16, 8); } } -void YToARGBRow_C(const uint8* y_buf, uint8* rgb_buf, int width) { +void YToARGBRow_C(const uint8* src_y, uint8* rgb_buf, int width) { for (int x = 0; x < width; ++x) { - YuvPixel(y_buf[0], 128, 128, rgb_buf, 24, 16, 8, 0); - y_buf += 1; + YuvPixel(src_y[0], 128, 128, rgb_buf, 24, 16, 8, 0); + src_y += 1; rgb_buf += 4; // Advance 1 pixel. } } @@ -903,7 +988,7 @@ void MirrorRow_C(const uint8* src, uint8* dst, int width) { } } -void MirrorRowUV_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) { +void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) { src_uv += (width - 1) << 1; for (int x = 0; x < width - 1; x += 2) { dst_u[x] = src_uv[0]; @@ -1399,35 +1484,35 @@ void I422ToUYVYRow_C(const uint8* src_y, // row_win.cc has asm version, but GCC uses 2 step wrapper. 5% slower. // TODO(fbarchard): Handle width > kMaxStride here instead of calling code. #if defined(__x86_64__) || defined(__i386__) -void I422ToRGB565Row_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, +void I422ToRGB565Row_SSSE3(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, uint8* rgb_buf, int width) { SIMD_ALIGNED(uint8 row[kMaxStride]); - I422ToARGBRow_SSSE3(y_buf, u_buf, v_buf, row, width); + I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, width); ARGBToRGB565Row_SSE2(row, rgb_buf, width); } #endif // defined(__x86_64__) || defined(__i386__) #if defined(_M_IX86) || defined(__x86_64__) || defined(__i386__) -void I422ToARGB1555Row_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, +void I422ToARGB1555Row_SSSE3(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, uint8* rgb_buf, int width) { SIMD_ALIGNED(uint8 row[kMaxStride]); - I422ToARGBRow_SSSE3(y_buf, u_buf, v_buf, row, width); + I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, width); ARGBToARGB1555Row_SSE2(row, rgb_buf, width); } -void I422ToARGB4444Row_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, +void I422ToARGB4444Row_SSSE3(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, uint8* rgb_buf, int width) { SIMD_ALIGNED(uint8 row[kMaxStride]); - I422ToARGBRow_SSSE3(y_buf, u_buf, v_buf, row, width); + I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, width); ARGBToARGB4444Row_SSE2(row, rgb_buf, width); } @@ -1452,45 +1537,45 @@ void NV21ToRGB565Row_SSSE3(const uint8* src_y, void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2, uint8* dst_argb, int width) { - SIMD_ALIGNED(uint8 rowy[kMaxStride]); - SIMD_ALIGNED(uint8 rowu[kMaxStride]); - SIMD_ALIGNED(uint8 rowv[kMaxStride]); - YUY2ToUV422Row_SSE2(src_yuy2, rowu, rowv, width); - YUY2ToYRow_SSE2(src_yuy2, rowy, width); - I422ToARGBRow_SSSE3(rowy, rowu, rowv, dst_argb, width); + SIMD_ALIGNED(uint8 row_y[kMaxStride]); + SIMD_ALIGNED(uint8 row_u[kMaxStride / 2]); + SIMD_ALIGNED(uint8 row_v[kMaxStride / 2]); + YUY2ToUV422Row_SSE2(src_yuy2, row_u, row_v, width); + YUY2ToYRow_SSE2(src_yuy2, row_y, width); + I422ToARGBRow_SSSE3(row_y, row_u, row_v, dst_argb, width); } void YUY2ToARGBRow_Unaligned_SSSE3(const uint8* src_yuy2, uint8* dst_argb, int width) { - SIMD_ALIGNED(uint8 rowy[kMaxStride]); - SIMD_ALIGNED(uint8 rowu[kMaxStride]); - SIMD_ALIGNED(uint8 rowv[kMaxStride]); - YUY2ToUV422Row_Unaligned_SSE2(src_yuy2, rowu, rowv, width); - YUY2ToYRow_Unaligned_SSE2(src_yuy2, rowy, width); - I422ToARGBRow_Unaligned_SSSE3(rowy, rowu, rowv, dst_argb, width); + SIMD_ALIGNED(uint8 row_y[kMaxStride]); + SIMD_ALIGNED(uint8 row_u[kMaxStride / 2]); + SIMD_ALIGNED(uint8 row_v[kMaxStride / 2]); + YUY2ToUV422Row_Unaligned_SSE2(src_yuy2, row_u, row_v, width); + YUY2ToYRow_Unaligned_SSE2(src_yuy2, row_y, width); + I422ToARGBRow_Unaligned_SSSE3(row_y, row_u, row_v, dst_argb, width); } void UYVYToARGBRow_SSSE3(const uint8* src_uyvy, uint8* dst_argb, int width) { - SIMD_ALIGNED(uint8 rowy[kMaxStride]); - SIMD_ALIGNED(uint8 rowu[kMaxStride]); - SIMD_ALIGNED(uint8 rowv[kMaxStride]); - UYVYToUV422Row_SSE2(src_uyvy, rowu, rowv, width); - UYVYToYRow_SSE2(src_uyvy, rowy, width); - I422ToARGBRow_SSSE3(rowy, rowu, rowv, dst_argb, width); + SIMD_ALIGNED(uint8 row_y[kMaxStride]); + SIMD_ALIGNED(uint8 row_u[kMaxStride / 2]); + SIMD_ALIGNED(uint8 row_v[kMaxStride / 2]); + UYVYToUV422Row_SSE2(src_uyvy, row_u, row_v, width); + UYVYToYRow_SSE2(src_uyvy, row_y, width); + I422ToARGBRow_SSSE3(row_y, row_u, row_v, dst_argb, width); } void UYVYToARGBRow_Unaligned_SSSE3(const uint8* src_uyvy, uint8* dst_argb, int width) { - SIMD_ALIGNED(uint8 rowy[kMaxStride]); - SIMD_ALIGNED(uint8 rowu[kMaxStride]); - SIMD_ALIGNED(uint8 rowv[kMaxStride]); - UYVYToUV422Row_Unaligned_SSE2(src_uyvy, rowu, rowv, width); - UYVYToYRow_Unaligned_SSE2(src_uyvy, rowy, width); - I422ToARGBRow_Unaligned_SSSE3(rowy, rowu, rowv, dst_argb, width); + SIMD_ALIGNED(uint8 row_y[kMaxStride]); + SIMD_ALIGNED(uint8 row_u[kMaxStride / 2]); + SIMD_ALIGNED(uint8 row_v[kMaxStride / 2]); + UYVYToUV422Row_Unaligned_SSE2(src_uyvy, row_u, row_v, width); + UYVYToYRow_Unaligned_SSE2(src_uyvy, row_y, width); + I422ToARGBRow_Unaligned_SSSE3(row_y, row_u, row_v, dst_argb, width); } #endif // defined(_M_IX86) || defined(__x86_64__) || defined(__i386__) diff --git a/source/row_mips.cc b/source/row_mips.cc index df4542fbf..48759e09d 100644 --- a/source/row_mips.cc +++ b/source/row_mips.cc @@ -225,8 +225,8 @@ void MirrorRow_MIPS_DSPR2(const uint8* src, uint8* dst, int width) { } #endif // HAS_MIRRORROW_MIPS_DSPR2 -#ifdef HAS_MIRRORROWUV_MIPS_DSPR2 -void MirrorRowUV_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, +#ifdef HAS_MirrorUVRow_MIPS_DSPR2 +void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) { int x = 0; int y = 0; @@ -315,7 +315,7 @@ void MirrorRowUV_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, "t5", "t7", "t8", "t9" ); } -#endif // HAS_MIRRORROWUV_MIPS_DSPR2 +#endif // HAS_MirrorUVRow_MIPS_DSPR2 diff --git a/source/row_neon.cc b/source/row_neon.cc index 0014e5df6..2c6643f18 100644 --- a/source/row_neon.cc +++ b/source/row_neon.cc @@ -629,9 +629,9 @@ void NV21ToARGBRow_NEON(const uint8* src_y, #ifdef HAS_NV12TORGB565ROW_NEON void NV12ToRGB565Row_NEON(const uint8* src_y, - const uint8* src_uv, - uint8* dst_rgb565, - int width) { + const uint8* src_uv, + uint8* dst_rgb565, + int width) { asm volatile ( "vld1.u8 {d24}, [%4] \n" "vld1.u8 {d25}, [%5] \n" @@ -660,9 +660,9 @@ void NV12ToRGB565Row_NEON(const uint8* src_y, #ifdef HAS_NV21TORGB565ROW_NEON void NV21ToRGB565Row_NEON(const uint8* src_y, - const uint8* src_uv, - uint8* dst_rgb565, - int width) { + const uint8* src_uv, + uint8* dst_rgb565, + int width) { asm volatile ( "vld1.u8 {d24}, [%4] \n" "vld1.u8 {d25}, [%5] \n" @@ -955,8 +955,8 @@ void MirrorRow_NEON(const uint8* src, uint8* dst, int width) { } #endif // HAS_MIRRORROW_NEON -#ifdef HAS_MIRRORROWUV_NEON -void MirrorRowUV_NEON(const uint8* src, uint8* dst_a, uint8* dst_b, int width) { +#ifdef HAS_MirrorUVRow_NEON +void MirrorUVRow_NEON(const uint8* src, uint8* dst_a, uint8* dst_b, int width) { asm volatile ( // compute where to start writing destination "add %1, %3 \n" // dst_a + width @@ -1013,7 +1013,7 @@ void MirrorRowUV_NEON(const uint8* src, uint8* dst_a, uint8* dst_b, int width) { : "memory", "cc", "r12", "q0" ); } -#endif // HAS_MIRRORROWUV_NEON +#endif // HAS_MirrorUVRow_NEON #ifdef HAS_BGRATOARGBROW_NEON void BGRAToARGBRow_NEON(const uint8* src_bgra, uint8* dst_argb, int pix) { @@ -1112,6 +1112,41 @@ void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int pix) { } #endif // HAS_RAWTOARGBROW_NEON +#ifdef HAS_RGB565TOARGBROW_NEON + +#define RGB565TOARGB \ + "vmovn.u16 d4, q0 \n" /* B xxxBBBBB */ \ + "vshrn.u16 d5, q0, #5 \n" /* G xxGGGGGG */ \ + "vshrn.u16 d6, q0, #8 \n" /* R RRRRRxxx */ \ + "vshl.u8 d0, d4, #3 \n" /* B BBBBB000 upper 5 */ \ + "vshl.u8 d1, d5, #2 \n" /* G GGGGGG00 upper 6 */ \ + "vbic.u8 d2, d6, d7 \n" /* R RRRRR000 upper 5 */ \ + "vshr.u8 d4, d0, #5 \n" /* B 00000BBB lower 3 */ \ + "vshr.u8 d5, d1, #6 \n" /* G 000000GG lower 2 */ \ + "vshr.u8 d6, d2, #5 \n" /* R 00000RRR lower 3 */ \ + "vorr.u8 q0, q0, q2 \n" /* B,G */ \ + "vorr.u8 d2, d2, d6 \n" /* R */ + +void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int pix) { + asm volatile ( + "vmov.u8 d3, #255 \n" // Alpha + "vmov.u8 d7, #7 \n" // 5 bit mask + ".p2align 2 \n" + "1: \n" + "vld1.8 {q0}, [%0]! \n" // load 8 pixels of RGB565. + "subs %2, %2, #8 \n" // 8 processed per loop. + RGB565TOARGB + "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 pixels of ARGB. + "bgt 1b \n" + : "+r"(src_rgb565), // %0 + "+r"(dst_argb), // %1 + "+r"(pix) // %2 + : + : "memory", "cc", "q0", "q1", "q2", "q3" // Clobber List + ); +} +#endif // HAS_RGB565TOARGBROW_NEON + #ifdef HAS_ARGBTORGBAROW_NEON void ARGBToRGBARow_NEON(const uint8* src_argb, uint8* dst_rgba, int pix) { asm volatile ( @@ -1436,9 +1471,9 @@ void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_argb4444, ARGBTOARGB4444 "vst1.8 {q0}, [%1]! \n" // store 8 pixels ARGB4444. "bgt 1b \n" - : "+r"(src_argb), // %0 + : "+r"(src_argb), // %0 "+r"(dst_argb4444), // %1 - "+r"(pix) // %2 + "+r"(pix) // %2 : : "memory", "cc", "q0", "q8", "q9", "q10", "q11" ); @@ -1447,6 +1482,117 @@ void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_argb4444, #ifdef HAS_ARGBTOYROW_NEON void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) { + asm volatile ( + "vmov.u8 d24, #13 \n" // B * 0.1016 coefficient + "vmov.u8 d25, #65 \n" // G * 0.5078 coefficient + "vmov.u8 d26, #33 \n" // R * 0.2578 coefficient + "vmov.u8 d27, #16 \n" // Add 16 constant + ".p2align 2 \n" + "1: \n" + "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of ARGB. + "subs %2, %2, #8 \n" // 8 processed per loop. + "vmull.u8 q2, d0, d24 \n" // B + "vmlal.u8 q2, d1, d25 \n" // G + "vmlal.u8 q2, d2, d26 \n" // R + "vqrshrun.s16 d0, q2, #7 \n" // 16 bit to 8 bit Y + "vqadd.u8 d0, d27 \n" + "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. + "bgt 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_y), // %1 + "+r"(pix) // %2 + : + : "memory", "cc", "q0", "q1", "q2", "q12", "q13" + ); +} +#endif // HAS_ARGBTOYROW_NEON + +#ifdef HAS_RGB565TOYROW_NEON +void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int pix) { + asm volatile ( + "vmov.u8 d7, #7 \n" // 5 bit mask + "vmov.u8 d24, #13 \n" // B * 0.1016 coefficient + "vmov.u8 d25, #65 \n" // G * 0.5078 coefficient + "vmov.u8 d26, #33 \n" // R * 0.2578 coefficient + "vmov.u8 d27, #16 \n" // Add 16 constant + ".p2align 2 \n" + "1: \n" + "vld1.8 {q0}, [%0]! \n" // load 8 pixels of RGB565. + "subs %2, %2, #8 \n" // 8 processed per loop. + RGB565TOARGB + "vmull.u8 q2, d0, d24 \n" // B + "vmlal.u8 q2, d1, d25 \n" // G + "vmlal.u8 q2, d2, d26 \n" // R + "vqrshrun.s16 d0, q2, #7 \n" // 16 bit to 8 bit Y + "vqadd.u8 d0, d27 \n" + "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. + "bgt 1b \n" + : "+r"(src_rgb565), // %0 + "+r"(dst_y), // %1 + "+r"(pix) // %2 + : + : "memory", "cc", "q0", "q1", "q2", "q3", "q12", "q13" + ); +} +#endif // HAS_RGB565TOYROW_NEON + + +#ifdef HAS_BGRATOYROW_NEON +void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int pix) { + asm volatile ( + "vmov.u8 d4, #33 \n" // R * 0.2578 coefficient + "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient + "vmov.u8 d6, #13 \n" // B * 0.1016 coefficient + "vmov.u8 d7, #16 \n" // Add 16 constant + ".p2align 2 \n" + "1: \n" + "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of BGRA. + "subs %2, %2, #8 \n" // 8 processed per loop. + "vmull.u8 q8, d1, d4 \n" // R + "vmlal.u8 q8, d2, d5 \n" // G + "vmlal.u8 q8, d3, d6 \n" // B + "vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y + "vqadd.u8 d0, d7 \n" + "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. + "bgt 1b \n" + : "+r"(src_bgra), // %0 + "+r"(dst_y), // %1 + "+r"(pix) // %2 + : + : "memory", "cc", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8" + ); +} +#endif // HAS_BGRATOYROW_NEON + +#ifdef HAS_ABGRTOYROW_NEON +void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int pix) { + asm volatile ( + "vmov.u8 d4, #33 \n" // R * 0.2578 coefficient + "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient + "vmov.u8 d6, #13 \n" // B * 0.1016 coefficient + "vmov.u8 d7, #16 \n" // Add 16 constant + ".p2align 2 \n" + "1: \n" + "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of ABGR. + "subs %2, %2, #8 \n" // 8 processed per loop. + "vmull.u8 q8, d0, d4 \n" // R + "vmlal.u8 q8, d1, d5 \n" // G + "vmlal.u8 q8, d2, d6 \n" // B + "vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y + "vqadd.u8 d0, d7 \n" + "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. + "bgt 1b \n" + : "+r"(src_abgr), // %0 + "+r"(dst_y), // %1 + "+r"(pix) // %2 + : + : "memory", "cc", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8" + ); +} +#endif // HAS_ABGRTOYROW_NEON + +#ifdef HAS_RGBATOYROW_NEON +void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int pix) { asm volatile ( "vmov.u8 d4, #13 \n" // B * 0.1016 coefficient "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient @@ -1454,7 +1600,34 @@ void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) { "vmov.u8 d7, #16 \n" // Add 16 constant ".p2align 2 \n" "1: \n" - "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of ARGB. + "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of RGBA. + "subs %2, %2, #8 \n" // 8 processed per loop. + "vmull.u8 q8, d1, d4 \n" // B + "vmlal.u8 q8, d2, d5 \n" // G + "vmlal.u8 q8, d3, d6 \n" // R + "vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y + "vqadd.u8 d0, d7 \n" + "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. + "bgt 1b \n" + : "+r"(src_rgba), // %0 + "+r"(dst_y), // %1 + "+r"(pix) // %2 + : + : "memory", "cc", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8" + ); +} +#endif // HAS_RGBATOYROW_NEON + +#ifdef HAS_RGB24TOYROW_NEON +void RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int pix) { + asm volatile ( + "vmov.u8 d4, #13 \n" // B * 0.1016 coefficient + "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient + "vmov.u8 d6, #33 \n" // R * 0.2578 coefficient + "vmov.u8 d7, #16 \n" // Add 16 constant + ".p2align 2 \n" + "1: \n" + "vld3.8 {d0, d1, d2}, [%0]! \n" // load 8 pixels of RGB24. "subs %2, %2, #8 \n" // 8 processed per loop. "vmull.u8 q8, d0, d4 \n" // B "vmlal.u8 q8, d1, d5 \n" // G @@ -1463,14 +1636,41 @@ void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) { "vqadd.u8 d0, d7 \n" "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. "bgt 1b \n" - : "+r"(src_argb), // %0 + : "+r"(src_rgb24), // %0 "+r"(dst_y), // %1 "+r"(pix) // %2 : : "memory", "cc", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8" ); } -#endif // HAS_ARGBTOYROW_NEON +#endif // HAS_RGB24TOYROW_NEON + +#ifdef HAS_RAWTOYROW_NEON +void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int pix) { + asm volatile ( + "vmov.u8 d4, #33 \n" // R * 0.2578 coefficient + "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient + "vmov.u8 d6, #13 \n" // B * 0.1016 coefficient + "vmov.u8 d7, #16 \n" // Add 16 constant + ".p2align 2 \n" + "1: \n" + "vld3.8 {d0, d1, d2}, [%0]! \n" // load 8 pixels of RAW. + "subs %2, %2, #8 \n" // 8 processed per loop. + "vmull.u8 q8, d0, d4 \n" // B + "vmlal.u8 q8, d1, d5 \n" // G + "vmlal.u8 q8, d2, d6 \n" // R + "vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y + "vqadd.u8 d0, d7 \n" + "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. + "bgt 1b \n" + : "+r"(src_raw), // %0 + "+r"(dst_y), // %1 + "+r"(pix) // %2 + : + : "memory", "cc", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8" + ); +} +#endif // HAS_RAWTOYROW_NEON #endif // __ARM_NEON__ diff --git a/source/row_posix.cc b/source/row_posix.cc index 5e26005b7..62afc05a8 100644 --- a/source/row_posix.cc +++ b/source/row_posix.cc @@ -925,6 +925,120 @@ void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb, ); } +void ARGBToUV422Row_SSSE3(const uint8* src_argb0, + uint8* dst_u, uint8* dst_v, int width) { + asm volatile ( + "movdqa %0,%%xmm4 \n" + "movdqa %1,%%xmm3 \n" + "movdqa %2,%%xmm5 \n" + : + : "m"(kARGBToU), // %0 + "m"(kARGBToV), // %1 + "m"(kAddUV128) // %2 + ); + asm volatile ( + "sub %1,%2 \n" + ".p2align 4 \n" + "1: \n" + "movdqa (%0),%%xmm0 \n" + "movdqa 0x10(%0),%%xmm1 \n" + "movdqa 0x20(%0),%%xmm2 \n" + "movdqa 0x30(%0),%%xmm6 \n" + "lea 0x40(%0),%0 \n" + "movdqa %%xmm0,%%xmm7 \n" + "shufps $0x88,%%xmm1,%%xmm0 \n" + "shufps $0xdd,%%xmm1,%%xmm7 \n" + "pavgb %%xmm7,%%xmm0 \n" + "movdqa %%xmm2,%%xmm7 \n" + "shufps $0x88,%%xmm6,%%xmm2 \n" + "shufps $0xdd,%%xmm6,%%xmm7 \n" + "pavgb %%xmm7,%%xmm2 \n" + "movdqa %%xmm0,%%xmm1 \n" + "movdqa %%xmm2,%%xmm6 \n" + "pmaddubsw %%xmm4,%%xmm0 \n" + "pmaddubsw %%xmm4,%%xmm2 \n" + "pmaddubsw %%xmm3,%%xmm1 \n" + "pmaddubsw %%xmm3,%%xmm6 \n" + "phaddw %%xmm2,%%xmm0 \n" + "phaddw %%xmm6,%%xmm1 \n" + "psraw $0x8,%%xmm0 \n" + "psraw $0x8,%%xmm1 \n" + "packsswb %%xmm1,%%xmm0 \n" + "paddb %%xmm5,%%xmm0 \n" + "sub $0x10,%3 \n" + "movlps %%xmm0,(%1) \n" + "movhps %%xmm0,(%1,%2,1) \n" + "lea 0x8(%1),%1 \n" + "jg 1b \n" + : "+r"(src_argb0), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+rm"(width) // %3 + : + : "memory", "cc" +#if defined(__SSE2__) + , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7" +#endif + ); +} + +void ARGBToUV422Row_Unaligned_SSSE3(const uint8* src_argb0, + uint8* dst_u, uint8* dst_v, int width) { + asm volatile ( + "movdqa %0,%%xmm4 \n" + "movdqa %1,%%xmm3 \n" + "movdqa %2,%%xmm5 \n" + : + : "m"(kARGBToU), // %0 + "m"(kARGBToV), // %1 + "m"(kAddUV128) // %2 + ); + asm volatile ( + "sub %1,%2 \n" + ".p2align 4 \n" + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x20(%0),%%xmm2 \n" + "movdqu 0x30(%0),%%xmm6 \n" + "lea 0x40(%0),%0 \n" + "movdqa %%xmm0,%%xmm7 \n" + "shufps $0x88,%%xmm1,%%xmm0 \n" + "shufps $0xdd,%%xmm1,%%xmm7 \n" + "pavgb %%xmm7,%%xmm0 \n" + "movdqa %%xmm2,%%xmm7 \n" + "shufps $0x88,%%xmm6,%%xmm2 \n" + "shufps $0xdd,%%xmm6,%%xmm7 \n" + "pavgb %%xmm7,%%xmm2 \n" + "movdqa %%xmm0,%%xmm1 \n" + "movdqa %%xmm2,%%xmm6 \n" + "pmaddubsw %%xmm4,%%xmm0 \n" + "pmaddubsw %%xmm4,%%xmm2 \n" + "pmaddubsw %%xmm3,%%xmm1 \n" + "pmaddubsw %%xmm3,%%xmm6 \n" + "phaddw %%xmm2,%%xmm0 \n" + "phaddw %%xmm6,%%xmm1 \n" + "psraw $0x8,%%xmm0 \n" + "psraw $0x8,%%xmm1 \n" + "packsswb %%xmm1,%%xmm0 \n" + "paddb %%xmm5,%%xmm0 \n" + "sub $0x10,%3 \n" + "movlps %%xmm0,(%1) \n" + "movhps %%xmm0,(%1,%2,1) \n" + "lea 0x8(%1),%1 \n" + "jg 1b \n" + : "+r"(src_argb0), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+rm"(width) // %3 + : + : "memory", "cc" +#if defined(__SSE2__) + , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7" +#endif + ); +} + void BGRAToYRow_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix) { asm volatile ( "movdqa %4,%%xmm5 \n" @@ -1652,7 +1766,7 @@ struct { void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, - uint8* argb_buf, + uint8* dst_argb, int width) { asm volatile ( "sub %[u_buf],%[v_buf] \n" @@ -1688,7 +1802,7 @@ void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf, void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, - uint8* rgb24_buf, + uint8* dst_rgb24, int width) { // fpic 32 bit gcc 4.2 on OSX runs out of GPR regs. #ifdef __APPLE__ @@ -1743,7 +1857,7 @@ void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf, void OMITFP I422ToRAWRow_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, - uint8* raw_buf, + uint8* dst_raw, int width) { // fpic 32 bit gcc 4.2 on OSX runs out of GPR regs. #ifdef __APPLE__ @@ -1798,7 +1912,7 @@ void OMITFP I422ToRAWRow_SSSE3(const uint8* y_buf, void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, - uint8* argb_buf, + uint8* dst_argb, int width) { asm volatile ( "sub %[u_buf],%[v_buf] \n" @@ -1834,7 +1948,7 @@ void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf, void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, - uint8* argb_buf, + uint8* dst_argb, int width) { asm volatile ( "sub %[u_buf],%[v_buf] \n" @@ -1869,7 +1983,7 @@ void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf, void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf, const uint8* uv_buf, - uint8* argb_buf, + uint8* dst_argb, int width) { asm volatile ( "pcmpeqb %%xmm5,%%xmm5 \n" @@ -1901,8 +2015,8 @@ void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf, } void OMITFP NV21ToARGBRow_SSSE3(const uint8* y_buf, - const uint8* vu_buf, - uint8* argb_buf, + const uint8* src_vu, + uint8* dst_argb, int width) { asm volatile ( "pcmpeqb %%xmm5,%%xmm5 \n" @@ -1936,7 +2050,7 @@ void OMITFP NV21ToARGBRow_SSSE3(const uint8* y_buf, void OMITFP I444ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, - uint8* argb_buf, + uint8* dst_argb, int width) { asm volatile ( "sub %[u_buf],%[v_buf] \n" @@ -1972,7 +2086,7 @@ void OMITFP I444ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, void OMITFP I422ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, - uint8* argb_buf, + uint8* dst_argb, int width) { asm volatile ( "sub %[u_buf],%[v_buf] \n" @@ -2008,7 +2122,7 @@ void OMITFP I422ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, void OMITFP I411ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, - uint8* argb_buf, + uint8* dst_argb, int width) { asm volatile ( "sub %[u_buf],%[v_buf] \n" @@ -2043,7 +2157,7 @@ void OMITFP I411ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, void OMITFP NV12ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, const uint8* uv_buf, - uint8* argb_buf, + uint8* dst_argb, int width) { asm volatile ( "pcmpeqb %%xmm5,%%xmm5 \n" @@ -2075,8 +2189,8 @@ void OMITFP NV12ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, } void OMITFP NV21ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, - const uint8* vu_buf, - uint8* argb_buf, + const uint8* src_vu, + uint8* dst_argb, int width) { asm volatile ( "pcmpeqb %%xmm5,%%xmm5 \n" @@ -2110,7 +2224,7 @@ void OMITFP NV21ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, void OMITFP I422ToBGRARow_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, - uint8* bgra_buf, + uint8* dst_bgra, int width) { asm volatile ( "sub %[u_buf],%[v_buf] \n" @@ -2147,7 +2261,7 @@ void OMITFP I422ToBGRARow_SSSE3(const uint8* y_buf, void OMITFP I422ToABGRRow_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, - uint8* abgr_buf, + uint8* dst_abgr, int width) { asm volatile ( "sub %[u_buf],%[v_buf] \n" @@ -2183,7 +2297,7 @@ void OMITFP I422ToABGRRow_SSSE3(const uint8* y_buf, void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, - uint8* rgba_buf, + uint8* dst_rgba, int width) { asm volatile ( "sub %[u_buf],%[v_buf] \n" @@ -2220,7 +2334,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf, void OMITFP I422ToBGRARow_Unaligned_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, - uint8* bgra_buf, + uint8* dst_bgra, int width) { asm volatile ( "sub %[u_buf],%[v_buf] \n" @@ -2257,7 +2371,7 @@ void OMITFP I422ToBGRARow_Unaligned_SSSE3(const uint8* y_buf, void OMITFP I422ToABGRRow_Unaligned_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, - uint8* abgr_buf, + uint8* dst_abgr, int width) { asm volatile ( "sub %[u_buf],%[v_buf] \n" @@ -2293,7 +2407,7 @@ void OMITFP I422ToABGRRow_Unaligned_SSSE3(const uint8* y_buf, void OMITFP I422ToRGBARow_Unaligned_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, - uint8* rgba_buf, + uint8* dst_rgba, int width) { asm volatile ( "sub %[u_buf],%[v_buf] \n" @@ -2446,7 +2560,7 @@ void MirrorRow_SSE2(const uint8* src, uint8* dst, int width) { CONST uvec8 kShuffleMirrorUV = { 14u, 12u, 10u, 8u, 6u, 4u, 2u, 0u, 15u, 13u, 11u, 9u, 7u, 5u, 3u, 1u }; -void MirrorRowUV_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v, +void MirrorUVRow_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v, int width) { intptr_t temp_width = static_cast(width); asm volatile ( diff --git a/source/row_win.cc b/source/row_win.cc index c1b77cfc7..87da31699 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -1101,6 +1101,124 @@ __asm { } } +__declspec(naked) __declspec(align(16)) +void ARGBToUV422Row_SSSE3(const uint8* src_argb0, + uint8* dst_u, uint8* dst_v, int width) { +__asm { + push edi + mov eax, [esp + 4 + 4] // src_argb + mov edx, [esp + 4 + 8] // dst_u + mov edi, [esp + 4 + 12] // dst_v + mov ecx, [esp + 4 + 16] // pix + movdqa xmm7, kARGBToU + movdqa xmm6, kARGBToV + movdqa xmm5, kAddUV128 + sub edi, edx // stride from u to v + + align 16 + convertloop: + /* step 1 - subsample 16x2 argb pixels to 8x1 */ + movdqa xmm0, [eax] + movdqa xmm1, [eax + 16] + movdqa xmm2, [eax + 32] + movdqa xmm3, [eax + 48] + lea eax, [eax + 64] + movdqa xmm4, xmm0 + shufps xmm0, xmm1, 0x88 + shufps xmm4, xmm1, 0xdd + pavgb xmm0, xmm4 + movdqa xmm4, xmm2 + shufps xmm2, xmm3, 0x88 + shufps xmm4, xmm3, 0xdd + pavgb xmm2, xmm4 + + // step 2 - convert to U and V + // from here down is very similar to Y code except + // instead of 16 different pixels, its 8 pixels of U and 8 of V + movdqa xmm1, xmm0 + movdqa xmm3, xmm2 + pmaddubsw xmm0, xmm7 // U + pmaddubsw xmm2, xmm7 + pmaddubsw xmm1, xmm6 // V + pmaddubsw xmm3, xmm6 + phaddw xmm0, xmm2 + phaddw xmm1, xmm3 + psraw xmm0, 8 + psraw xmm1, 8 + packsswb xmm0, xmm1 + paddb xmm0, xmm5 // -> unsigned + + // step 3 - store 8 U and 8 V values + sub ecx, 16 + movlps qword ptr [edx], xmm0 // U + movhps qword ptr [edx + edi], xmm0 // V + lea edx, [edx + 8] + jg convertloop + + pop edi + ret + } +} + +__declspec(naked) __declspec(align(16)) +void ARGBToUV422Row_Unaligned_SSSE3(const uint8* src_argb0, + uint8* dst_u, uint8* dst_v, int width) { +__asm { + push edi + mov eax, [esp + 4 + 4] // src_argb + mov edx, [esp + 4 + 8] // dst_u + mov edi, [esp + 4 + 12] // dst_v + mov ecx, [esp + 4 + 16] // pix + movdqa xmm7, kARGBToU + movdqa xmm6, kARGBToV + movdqa xmm5, kAddUV128 + sub edi, edx // stride from u to v + + align 16 + convertloop: + /* step 1 - subsample 16x2 argb pixels to 8x1 */ + movdqu xmm0, [eax] + movdqu xmm1, [eax + 16] + movdqu xmm2, [eax + 32] + movdqu xmm3, [eax + 48] + lea eax, [eax + 64] + movdqa xmm4, xmm0 + shufps xmm0, xmm1, 0x88 + shufps xmm4, xmm1, 0xdd + pavgb xmm0, xmm4 + movdqa xmm4, xmm2 + shufps xmm2, xmm3, 0x88 + shufps xmm4, xmm3, 0xdd + pavgb xmm2, xmm4 + + // step 2 - convert to U and V + // from here down is very similar to Y code except + // instead of 16 different pixels, its 8 pixels of U and 8 of V + movdqa xmm1, xmm0 + movdqa xmm3, xmm2 + pmaddubsw xmm0, xmm7 // U + pmaddubsw xmm2, xmm7 + pmaddubsw xmm1, xmm6 // V + pmaddubsw xmm3, xmm6 + phaddw xmm0, xmm2 + phaddw xmm1, xmm3 + psraw xmm0, 8 + psraw xmm1, 8 + packsswb xmm0, xmm1 + paddb xmm0, xmm5 // -> unsigned + + // step 3 - store 8 U and 8 V values + sub ecx, 16 + movlps qword ptr [edx], xmm0 // U + movhps qword ptr [edx + edi], xmm0 // V + lea edx, [edx + 8] + jg convertloop + + pop edi + ret + } +} + __declspec(naked) __declspec(align(16)) void BGRAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, uint8* dst_u, uint8* dst_v, int width) { @@ -1656,7 +1774,7 @@ __declspec(naked) __declspec(align(16)) void I444ToARGBRow_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, - uint8* argb_buf, + uint8* dst_argb, int width) { __asm { push esi @@ -1699,7 +1817,7 @@ __declspec(naked) __declspec(align(16)) void I422ToRGB24Row_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, - uint8* rgb24_buf, + uint8* dst_rgb24, int width) { __asm { push esi @@ -1746,7 +1864,7 @@ __declspec(naked) __declspec(align(16)) void I422ToRAWRow_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, - uint8* raw_buf, + uint8* dst_raw, int width) { __asm { push esi @@ -1866,7 +1984,7 @@ __declspec(naked) __declspec(align(16)) void I422ToARGBRow_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, - uint8* argb_buf, + uint8* dst_argb, int width) { __asm { push esi @@ -1910,7 +2028,7 @@ __declspec(naked) __declspec(align(16)) void I411ToARGBRow_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, - uint8* argb_buf, + uint8* dst_argb, int width) { __asm { push esi @@ -1952,7 +2070,7 @@ void I411ToARGBRow_SSSE3(const uint8* y_buf, __declspec(naked) __declspec(align(16)) void NV12ToARGBRow_SSSE3(const uint8* y_buf, const uint8* uv_buf, - uint8* argb_buf, + uint8* dst_argb, int width) { __asm { push esi @@ -1990,7 +2108,7 @@ void NV12ToARGBRow_SSSE3(const uint8* y_buf, __declspec(naked) __declspec(align(16)) void NV21ToARGBRow_SSSE3(const uint8* y_buf, const uint8* uv_buf, - uint8* argb_buf, + uint8* dst_argb, int width) { __asm { push esi @@ -2029,7 +2147,7 @@ __declspec(naked) __declspec(align(16)) void I444ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, - uint8* argb_buf, + uint8* dst_argb, int width) { __asm { push esi @@ -2072,7 +2190,7 @@ __declspec(naked) __declspec(align(16)) void I422ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, - uint8* argb_buf, + uint8* dst_argb, int width) { __asm { push esi @@ -2116,7 +2234,7 @@ __declspec(naked) __declspec(align(16)) void I411ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, - uint8* argb_buf, + uint8* dst_argb, int width) { __asm { push esi @@ -2158,7 +2276,7 @@ void I411ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, __declspec(naked) __declspec(align(16)) void NV12ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, const uint8* uv_buf, - uint8* argb_buf, + uint8* dst_argb, int width) { __asm { push esi @@ -2196,7 +2314,7 @@ void NV12ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, __declspec(naked) __declspec(align(16)) void NV21ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, const uint8* uv_buf, - uint8* argb_buf, + uint8* dst_argb, int width) { __asm { push esi @@ -2233,7 +2351,7 @@ __declspec(naked) __declspec(align(16)) void I422ToBGRARow_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, - uint8* bgra_buf, + uint8* dst_bgra, int width) { __asm { push esi @@ -2274,7 +2392,7 @@ __declspec(naked) __declspec(align(16)) void I422ToBGRARow_Unaligned_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, - uint8* bgra_buf, + uint8* dst_bgra, int width) { __asm { push esi @@ -2315,7 +2433,7 @@ __declspec(naked) __declspec(align(16)) void I422ToABGRRow_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, - uint8* abgr_buf, + uint8* dst_abgr, int width) { __asm { push esi @@ -2356,7 +2474,7 @@ __declspec(naked) __declspec(align(16)) void I422ToABGRRow_Unaligned_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, - uint8* abgr_buf, + uint8* dst_abgr, int width) { __asm { push esi @@ -2397,7 +2515,7 @@ __declspec(naked) __declspec(align(16)) void I422ToRGBARow_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, - uint8* rgba_buf, + uint8* dst_rgba, int width) { __asm { push esi @@ -2438,7 +2556,7 @@ __declspec(naked) __declspec(align(16)) void I422ToRGBARow_Unaligned_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, - uint8* rgba_buf, + uint8* dst_rgba, int width) { __asm { push esi @@ -2591,7 +2709,7 @@ static const uvec8 kShuffleMirrorUV = { }; __declspec(naked) __declspec(align(16)) -void MirrorRowUV_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v, +void MirrorUVRow_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v, int width) { __asm { push edi diff --git a/unit_test/convert_test.cc b/unit_test/convert_test.cc index f18beb675..d8a04c71e 100644 --- a/unit_test/convert_test.cc +++ b/unit_test/convert_test.cc @@ -220,7 +220,7 @@ TEST_F(libyuvTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \ FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_ - 4, _Any, +, 0) \ + benchmark_width_ - 4, _Any, +, 0) \ TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ benchmark_width_, _Unaligned, +, 1) \ @@ -614,10 +614,9 @@ TESTATOPLANAR(RGB24, 3, I420, 2, 2) TESTATOPLANAR(RGB565, 2, I420, 2, 2) TESTATOPLANAR(ARGB1555, 2, I420, 2, 2) TESTATOPLANAR(ARGB4444, 2, I420, 2, 2) -// TESTATOPLANAR(ARGB, 4, I411, 4, 1) +TESTATOPLANAR(ARGB, 4, I411, 4, 1) TESTATOPLANAR(ARGB, 4, I422, 2, 1) -// TESTATOPLANAR(ARGB, 4, I444, 1, 1) -// TODO(fbarchard): Implement and test 411 and 444 +TESTATOPLANAR(ARGB, 4, I444, 1, 1) TESTATOPLANAR(V210, 16 / 6, I420, 2, 2) TESTATOPLANAR(YUY2, 2, I420, 2, 2) TESTATOPLANAR(UYVY, 2, I420, 2, 2) @@ -629,30 +628,103 @@ TESTATOPLANAR(BayerRGGB, 1, I420, 2, 2) TESTATOPLANAR(BayerGBRG, 1, I420, 2, 2) TESTATOPLANAR(BayerGRBG, 1, I420, 2, 2) -#define TESTATOBI(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, W1280, DIFF, \ - N, NEG, OFF) \ +#define TESTATOBIPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + W1280, N, NEG, OFF) \ +TEST_F(libyuvTest, FMT_A##To##FMT_PLANAR##N) { \ + const int kWidth = W1280; \ + const int kHeight = benchmark_height_; \ + const int kStride = (kWidth * 8 * BPP_A + 7) / 8; \ + align_buffer_16(src_argb, kStride * kHeight + OFF); \ + align_buffer_16(dst_y_c, kWidth * kHeight); \ + align_buffer_16(dst_uv_c, kWidth / SUBSAMP_X * 2 * kHeight / SUBSAMP_Y); \ + align_buffer_16(dst_y_opt, kWidth * kHeight); \ + align_buffer_16(dst_uv_opt, kWidth / SUBSAMP_X * 2 * kHeight / SUBSAMP_Y); \ + srandom(time(NULL)); \ + for (int i = 0; i < kHeight; ++i) \ + for (int j = 0; j < kStride; ++j) \ + src_argb[(i * kStride) + j + OFF] = (random() & 0xff); \ + MaskCpuFlags(0); \ + FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, \ + dst_y_c, kWidth, \ + dst_uv_c, kWidth / SUBSAMP_X * 2, \ + kWidth, NEG kHeight); \ + MaskCpuFlags(-1); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, \ + dst_y_opt, kWidth, \ + dst_uv_opt, kWidth / SUBSAMP_X * 2, \ + kWidth, NEG kHeight); \ + } \ + int max_diff = 0; \ + for (int i = 0; i < kHeight; ++i) { \ + for (int j = 0; j < kWidth; ++j) { \ + int abs_diff = \ + abs(static_cast(dst_y_c[i * kWidth + j]) - \ + static_cast(dst_y_opt[i * kWidth + j])); \ + if (abs_diff > max_diff) { \ + max_diff = abs_diff; \ + } \ + } \ + } \ + EXPECT_LE(max_diff, 2); \ + for (int i = 0; i < kHeight / SUBSAMP_Y; ++i) { \ + for (int j = 0; j < kWidth / SUBSAMP_X * 2; ++j) { \ + int abs_diff = \ + abs(static_cast(dst_uv_c[i * kWidth / SUBSAMP_X * 2 + j]) - \ + static_cast(dst_uv_opt[i * kWidth / SUBSAMP_X * 2 + j])); \ + if (abs_diff > max_diff) { \ + max_diff = abs_diff; \ + } \ + } \ + } \ + EXPECT_LE(max_diff, 2); \ + free_aligned_buffer_16(dst_y_c) \ + free_aligned_buffer_16(dst_uv_c) \ + free_aligned_buffer_16(dst_y_opt) \ + free_aligned_buffer_16(dst_uv_opt) \ + free_aligned_buffer_16(src_argb) \ +} + +#define TESTATOBIPLANAR(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ + TESTATOBIPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_ - 4, _Any, +, 0) \ + TESTATOBIPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Unaligned, +, 1) \ + TESTATOBIPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Invert, -, 0) \ + TESTATOBIPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Opt, +, 0) + +TESTATOBIPLANAR(ARGB, 4, NV12, 2, 2) +TESTATOBIPLANAR(ARGB, 4, NV21, 2, 2) + +#define TESTATOBI(FMT_A, BPP_A, STRIDE_A, \ + FMT_B, BPP_B, STRIDE_B, \ + W1280, DIFF, N, NEG, OFF) \ TEST_F(libyuvTest, FMT_A##To##FMT_B##N) { \ const int kWidth = W1280; \ const int kHeight = benchmark_height_; \ - align_buffer_16(src_argb, (kWidth * BPP_A) * kHeight + OFF); \ - align_buffer_16(dst_argb_c, (kWidth * BPP_B) * kHeight); \ - align_buffer_16(dst_argb_opt, (kWidth * BPP_B) * kHeight); \ + const int kStrideA = (kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \ + const int kStrideB = (kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \ + align_buffer_16(src_argb, kStrideA * kHeight + OFF); \ + align_buffer_16(dst_argb_c, kStrideB * kHeight); \ + align_buffer_16(dst_argb_opt, kStrideB * kHeight); \ srandom(time(NULL)); \ - for (int i = 0; i < kHeight * kWidth * BPP_A; ++i) { \ + for (int i = 0; i < kStrideA * kHeight; ++i) { \ src_argb[i + OFF] = (random() & 0xff); \ } \ MaskCpuFlags(0); \ - FMT_A##To##FMT_B(src_argb + OFF, kWidth * STRIDE_A, \ - dst_argb_c, kWidth * BPP_B, \ + FMT_A##To##FMT_B(src_argb + OFF, kStrideA, \ + dst_argb_c, kStrideB, \ kWidth, NEG kHeight); \ MaskCpuFlags(-1); \ for (int i = 0; i < benchmark_iterations_; ++i) { \ - FMT_A##To##FMT_B(src_argb + OFF, kWidth * STRIDE_A, \ - dst_argb_opt, kWidth * BPP_B, \ + FMT_A##To##FMT_B(src_argb + OFF, kStrideA, \ + dst_argb_opt, kStrideB, \ kWidth, NEG kHeight); \ } \ int max_diff = 0; \ - for (int i = 0; i < kHeight * kWidth * BPP_B; ++i) { \ + for (int i = 0; i < kStrideB * kHeight; ++i) { \ int abs_diff = \ abs(static_cast(dst_argb_c[i]) - \ static_cast(dst_argb_opt[i])); \ @@ -665,65 +737,26 @@ TEST_F(libyuvTest, FMT_A##To##FMT_B##N) { \ free_aligned_buffer_16(dst_argb_c) \ free_aligned_buffer_16(dst_argb_opt) \ } -#define TESTATOB(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, DIFF) \ - TESTATOBI(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, benchmark_width_, DIFF, \ - _Any, +, 0) \ - TESTATOBI(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, benchmark_width_, DIFF, \ - _Unaligned, +, 1) \ - TESTATOBI(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, benchmark_width_, DIFF, \ - _Invert, -, 0) \ - TESTATOBI(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, benchmark_width_, DIFF, \ - _Opt, +, 0) -TESTATOB(ARGB, 4, 4, ARGB, 4, 0) -TESTATOB(ARGB, 4, 4, BGRA, 4, 0) -TESTATOB(ARGB, 4, 4, ABGR, 4, 0) -TESTATOB(ARGB, 4, 4, RGBA, 4, 0) -TESTATOB(ARGB, 4, 4, RAW, 3, 0) -TESTATOB(ARGB, 4, 4, RGB24, 3, 0) -TESTATOB(ARGB, 4, 4, RGB565, 2, 0) -TESTATOB(ARGB, 4, 4, ARGB1555, 2, 0) -TESTATOB(ARGB, 4, 4, ARGB4444, 2, 0) -TESTATOB(ARGB, 4, 4, BayerBGGR, 1, 0) -TESTATOB(ARGB, 4, 4, BayerRGGB, 1, 0) -TESTATOB(ARGB, 4, 4, BayerGBRG, 1, 0) -TESTATOB(ARGB, 4, 4, BayerGRBG, 1, 0) -TESTATOB(ARGB, 4, 4, I400, 1, 2) -TESTATOB(BGRA, 4, 4, ARGB, 4, 0) -TESTATOB(ABGR, 4, 4, ARGB, 4, 0) -TESTATOB(RGBA, 4, 4, ARGB, 4, 0) -TESTATOB(RAW, 3, 3, ARGB, 4, 0) -TESTATOB(RGB24, 3, 3, ARGB, 4, 0) -TESTATOB(RGB565, 2, 2, ARGB, 4, 0) -TESTATOB(ARGB1555, 2, 2, ARGB, 4, 0) -TESTATOB(ARGB4444, 2, 2, ARGB, 4, 0) -TESTATOB(YUY2, 2, 2, ARGB, 4, 0) -TESTATOB(UYVY, 2, 2, ARGB, 4, 0) -TESTATOB(M420, 3 / 2, 1, ARGB, 4, 0) -TESTATOB(BayerBGGR, 1, 1, ARGB, 4, 0) -TESTATOB(BayerRGGB, 1, 1, ARGB, 4, 0) -TESTATOB(BayerGBRG, 1, 1, ARGB, 4, 0) -TESTATOB(BayerGRBG, 1, 1, ARGB, 4, 0) -TESTATOB(I400, 1, 1, ARGB, 4, 0) -TESTATOB(I400, 1, 1, I400, 1, 0) -TESTATOB(I400, 1, 1, I400Mirror, 1, 0) -TESTATOB(Y, 1, 1, ARGB, 4, 0) -TESTATOB(ARGB, 4, 4, ARGBMirror, 4, 0) - -#define TESTATOBRANDOM(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, STRIDE_B, DIFF) \ +#define TESTATOBRANDOM(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, \ + FMT_B, BPP_B, STRIDE_B, HEIGHT_B, DIFF) \ TEST_F(libyuvTest, FMT_A##To##FMT_B##_Random) { \ srandom(time(NULL)); \ for (int times = 0; times < benchmark_iterations_; ++times) { \ const int kWidth = (random() & 63) + 1; \ const int kHeight = (random() & 31) + 1; \ + const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A; \ + const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B; \ const int kStrideA = (kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A;\ const int kStrideB = (kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B;\ - align_buffer_page_end(src_argb, kStrideA * kHeight); \ - align_buffer_page_end(dst_argb_c, kStrideB * kHeight); \ - align_buffer_page_end(dst_argb_opt, kStrideB * kHeight); \ - for (int i = 0; i < kStrideA * kHeight; ++i) { \ + align_buffer_page_end(src_argb, kStrideA * kHeightA); \ + align_buffer_page_end(dst_argb_c, kStrideB * kHeightB); \ + align_buffer_page_end(dst_argb_opt, kStrideB * kHeightB); \ + for (int i = 0; i < kStrideA * kHeightA; ++i) { \ src_argb[i] = (random() & 0xff); \ } \ + memset(dst_argb_c, 0, kStrideB * kHeightB); \ + memset(dst_argb_opt, 0, kStrideB * kHeightB); \ MaskCpuFlags(0); \ FMT_A##To##FMT_B(src_argb, kStrideA, \ dst_argb_c, kStrideB, \ @@ -733,7 +766,7 @@ TEST_F(libyuvTest, FMT_A##To##FMT_B##_Random) { \ dst_argb_opt, kStrideB, \ kWidth, kHeight); \ int max_diff = 0; \ - for (int i = 0; i < kStrideB * kHeight; ++i) { \ + for (int i = 0; i < kStrideB * kHeightB; ++i) { \ int abs_diff = \ abs(static_cast(dst_argb_c[i]) - \ static_cast(dst_argb_opt[i])); \ @@ -748,33 +781,58 @@ TEST_F(libyuvTest, FMT_A##To##FMT_B##_Random) { \ } \ } -TESTATOBRANDOM(ARGB, 4, 4, ARGB, 4, 4, 0) -TESTATOBRANDOM(ARGB, 4, 4, BGRA, 4, 4, 0) -TESTATOBRANDOM(ARGB, 4, 4, ABGR, 4, 4, 0) -TESTATOBRANDOM(ARGB, 4, 4, RGBA, 4, 4, 0) -TESTATOBRANDOM(ARGB, 4, 4, RAW, 3, 3, 0) -TESTATOBRANDOM(ARGB, 4, 4, RGB24, 3, 3, 0) -TESTATOBRANDOM(ARGB, 4, 4, RGB565, 2, 2, 0) -TESTATOBRANDOM(ARGB, 4, 4, ARGB1555, 2, 2, 0) -TESTATOBRANDOM(ARGB, 4, 4, ARGB4444, 2, 2, 0) -TESTATOBRANDOM(ARGB, 4, 4, I400, 1, 1, 2) -// TODO(fbarchard): Implement YUY2 -// TESTATOBRANDOM(ARGB, 4, 4, YUY2, 4, 2, 0) -// TESTATOBRANDOM(ARGB, 4, 4, UYVY, 4, 2, 0) -TESTATOBRANDOM(BGRA, 4, 4, ARGB, 4, 4, 0) -TESTATOBRANDOM(ABGR, 4, 4, ARGB, 4, 4, 0) -TESTATOBRANDOM(RGBA, 4, 4, ARGB, 4, 4, 0) -TESTATOBRANDOM(RAW, 3, 3, ARGB, 4, 4, 0) -TESTATOBRANDOM(RGB24, 3, 3, ARGB, 4, 4, 0) -TESTATOBRANDOM(RGB565, 2, 2, ARGB, 4, 4, 0) -TESTATOBRANDOM(ARGB1555, 2, 2, ARGB, 4, 4, 0) -TESTATOBRANDOM(ARGB4444, 2, 2, ARGB, 4, 4, 0) -TESTATOBRANDOM(I400, 1, 1, ARGB, 4, 4, 0) -TESTATOBRANDOM(YUY2, 4, 2, ARGB, 4, 4, 0) -TESTATOBRANDOM(UYVY, 4, 2, ARGB, 4, 4, 0) -TESTATOBRANDOM(I400, 1, 1, I400, 1, 1, 0) -TESTATOBRANDOM(I400, 1, 1, I400Mirror, 1, 1, 0) -TESTATOBRANDOM(ARGB, 4, 4, ARGBMirror, 4, 4, 0) +#define TESTATOB(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, \ + FMT_B, BPP_B, STRIDE_B, HEIGHT_B, DIFF) \ + TESTATOBI(FMT_A, BPP_A, STRIDE_A, \ + FMT_B, BPP_B, STRIDE_B, \ + benchmark_width_, DIFF, _Any, +, 0) \ + TESTATOBI(FMT_A, BPP_A, STRIDE_A, \ + FMT_B, BPP_B, STRIDE_B, \ + benchmark_width_, DIFF, _Unaligned, +, 1) \ + TESTATOBI(FMT_A, BPP_A, STRIDE_A, \ + FMT_B, BPP_B, STRIDE_B, \ + benchmark_width_, DIFF, _Invert, -, 0) \ + TESTATOBI(FMT_A, BPP_A, STRIDE_A, \ + FMT_B, BPP_B, STRIDE_B, \ + benchmark_width_, DIFF, _Opt, +, 0) \ + TESTATOBRANDOM(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, \ + FMT_B, BPP_B, STRIDE_B, HEIGHT_B, DIFF) + +TESTATOB(ARGB, 4, 4, 1, ARGB, 4, 4, 1, 0) +TESTATOB(ARGB, 4, 4, 1, BGRA, 4, 4, 1, 0) +TESTATOB(ARGB, 4, 4, 1, ABGR, 4, 4, 1, 0) +TESTATOB(ARGB, 4, 4, 1, RGBA, 4, 4, 1, 0) +TESTATOB(ARGB, 4, 4, 1, RAW, 3, 3, 1, 0) +TESTATOB(ARGB, 4, 4, 1, RGB24, 3, 3, 1, 0) +TESTATOB(ARGB, 4, 4, 1, RGB565, 2, 2, 1, 0) +TESTATOB(ARGB, 4, 4, 1, ARGB1555, 2, 2, 1, 0) +TESTATOB(ARGB, 4, 4, 1, ARGB4444, 2, 2, 1, 0) +TESTATOB(ARGB, 4, 4, 1, BayerBGGR, 1, 2, 2, 0) +TESTATOB(ARGB, 4, 4, 1, BayerRGGB, 1, 2, 2, 0) +TESTATOB(ARGB, 4, 4, 1, BayerGBRG, 1, 2, 2, 0) +TESTATOB(ARGB, 4, 4, 1, BayerGRBG, 1, 2, 2, 0) +TESTATOB(ARGB, 4, 4, 1, YUY2, 2, 4, 1, 2) +TESTATOB(ARGB, 4, 4, 1, UYVY, 2, 4, 1, 2) +TESTATOB(ARGB, 4, 4, 1, I400, 1, 1, 1, 2) +TESTATOB(BGRA, 4, 4, 1, ARGB, 4, 4, 1, 0) +TESTATOB(ABGR, 4, 4, 1, ARGB, 4, 4, 1, 0) +TESTATOB(RGBA, 4, 4, 1, ARGB, 4, 4, 1, 0) +TESTATOB(RAW, 3, 3, 1, ARGB, 4, 4, 1, 0) +TESTATOB(RGB24, 3, 3, 1, ARGB, 4, 4, 1, 0) +TESTATOB(RGB565, 2, 2, 1, ARGB, 4, 4, 1, 0) +TESTATOB(ARGB1555, 2, 2, 1, ARGB, 4, 4, 1, 0) +TESTATOB(ARGB4444, 2, 2, 1, ARGB, 4, 4, 1, 0) +TESTATOB(YUY2, 2, 4, 1, ARGB, 4, 4, 1, 0) +TESTATOB(UYVY, 2, 4, 1, ARGB, 4, 4, 1, 0) +TESTATOB(BayerBGGR, 1, 2, 2, ARGB, 4, 4, 1, 0) +TESTATOB(BayerRGGB, 1, 2, 2, ARGB, 4, 4, 1, 0) +TESTATOB(BayerGBRG, 1, 2, 2, ARGB, 4, 4, 1, 0) +TESTATOB(BayerGRBG, 1, 2, 2, ARGB, 4, 4, 1, 0) +TESTATOB(I400, 1, 1, 1, ARGB, 4, 4, 1, 0) +TESTATOB(I400, 1, 1, 1, I400, 1, 1, 1, 0) +TESTATOB(I400, 1, 1, 1, I400Mirror, 1, 1, 1, 0) +TESTATOB(Y, 1, 1, 1, ARGB, 4, 4, 1, 0) +TESTATOB(ARGB, 4, 4, 1, ARGBMirror, 4, 4, 1, 0) TEST_F(libyuvTest, Test565) { SIMD_ALIGNED(uint8 orig_pixels[256][4]);