From 2d11d43a6e21865b904705acce6535ae4c2d3caf Mon Sep 17 00:00:00 2001 From: "fbarchard@google.com" Date: Thu, 16 Feb 2012 02:50:39 +0000 Subject: [PATCH] shuffle functions so convert.h is all formats to I420 and convert_from.h is from I420 to all formats BUG=none TEST=none Review URL: https://webrtc-codereview.appspot.com/395006 git-svn-id: http://libyuv.googlecode.com/svn/trunk@174 16f28f9a-4ce2-e073-06de-1de4eb20be90 --- README.chromium | 2 +- include/libyuv.h | 1 + include/libyuv/convert.h | 220 +++--- include/libyuv/convert_from.h | 134 ++++ include/libyuv/planar_functions.h | 142 +--- include/libyuv/version.h | 2 +- libyuv.gyp | 3 +- source/convert.cc | 958 ++++++++++++---------- source/convert_from.cc | 1227 +++++++++++++++++++++++++++++ source/convertfrom.cc | 239 ------ source/planar_functions.cc | 1177 +-------------------------- source/rotate.cc | 1 + source/row.h | 6 + source/row_common.cc | 15 +- source/row_neon.cc | 26 + source/row_posix.cc | 45 +- source/row_win.cc | 45 +- 17 files changed, 2162 insertions(+), 2081 deletions(-) create mode 100644 include/libyuv/convert_from.h create mode 100644 source/convert_from.cc delete mode 100644 source/convertfrom.cc diff --git a/README.chromium b/README.chromium index 3a8b2c3a9..aa900ddd4 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 173 +Version: 174 License: BSD License File: LICENSE diff --git a/include/libyuv.h b/include/libyuv.h index f6f5c65aa..bc161db8f 100644 --- a/include/libyuv.h +++ b/include/libyuv.h @@ -15,6 +15,7 @@ #include "libyuv/basic_types.h" #include "libyuv/compare.h" #include "libyuv/convert.h" +#include "libyuv/convert_from.h" #include "libyuv/cpu_id.h" #include "libyuv/format_conversion.h" #include "libyuv/planar_functions.h" diff --git a/include/libyuv/convert.h b/include/libyuv/convert.h index f645d68d9..0e164a5e7 100644 --- a/include/libyuv/convert.h +++ b/include/libyuv/convert.h @@ -12,6 +12,7 @@ #define INCLUDE_LIBYUV_CONVERT_H_ #include "libyuv/basic_types.h" +#include "libyuv/planar_functions.h" #include "libyuv/rotate.h" #ifdef __cplusplus @@ -19,117 +20,142 @@ namespace libyuv { extern "C" { #endif -// RGB24 is also known as 24BG and BGR3 -int I420ToRGB24(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_frame, int dst_stride_frame, - int width, int height); +// Copy I420 to I420. +int I420Copy(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height); -// RAW is also known as RGB3 -int I420ToRAW(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_frame, int dst_stride_frame, - int width, int height); - -int I420ToARGB4444(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_frame, int dst_stride_frame, - int width, int height); - -int I420ToRGB565(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_frame, int dst_stride_frame, - int width, int height); - -int I420ToARGB1555(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_frame, int dst_stride_frame, - int width, int height); - -int I420ToYUY2(const uint8* src_y, int src_stride_y, +// Convert I422 to I420. +int I422ToI420(const uint8* src_y, int src_stride_y, const uint8* src_u, int src_stride_u, const uint8* src_v, int src_stride_v, - uint8* dst_frame, int dst_stride_frame, - int width, int height); - -int I422ToYUY2(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_frame, int dst_stride_frame, - int width, int height); - -int I420ToUYVY(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_frame, int dst_stride_frame, - int width, int height); - -int I422ToUYVY(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_frame, int dst_stride_frame, - int width, int height); - -int I420ToV210(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_frame, int dst_stride_frame, - int width, int height); - - -int RGB24ToI420(const uint8* src_frame, int src_stride_frame, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -int RAWToI420(const uint8* src_frame, int src_stride_frame, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -int RGB565ToI420(const uint8* src_frame, int src_stride_frame, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -int ARGB1555ToI420(const uint8* src_frame, int src_stride_frame, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -int ARGB4444ToI420(const uint8* src_frame, int src_stride_frame, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -int ABGRToI420(const uint8* src_frame, int src_stride_frame, uint8* dst_y, int dst_stride_y, uint8* dst_u, int dst_stride_u, uint8* dst_v, int dst_stride_v, int width, int height); +// Convert I444 to I420. +int I444ToI420(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height); + +// Convert I400 (grey) to I420. +int I400ToI420(const uint8* src_y, int src_stride_y, + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height); + +// Convert NV12 to I420. Also used for NV21. +int NV12ToI420(const uint8* src_y, int src_stride_y, + const uint8* src_uv, int src_stride_uv, + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height); + +// Convert M420 to I420. +int M420ToI420(const uint8* src_m420, int src_stride_m420, + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height); + +// Convert Q420 to I420. +int Q420ToI420(const uint8* src_y, int src_stride_y, + const uint8* src_yuy2, int src_stride_yuy2, + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height); + +// Convert YUY2 to I420. +int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2, + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height); + +// Convert UYVY to I420. +int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy, + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height); + +// Convert V210 to I420. +int V210ToI420(const uint8* src_uyvy, int src_stride_uyvy, + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height); + +// ARGB little endian (bgra in memory) to I420 +int ARGBToI420(const uint8* src_frame, int src_stride_frame, + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height); + +// BGRA little endian (argb in memory) to I420 int BGRAToI420(const uint8* src_frame, int src_stride_frame, uint8* dst_y, int dst_stride_y, uint8* dst_u, int dst_stride_u, uint8* dst_v, int dst_stride_v, int width, int height); -int ARGBToI420(const uint8* src_frame, int src_stride_frame, +// ABGR little endian (rgba in memory) to I420 +int ABGRToI420(const uint8* src_frame, int src_stride_frame, uint8* dst_y, int dst_stride_y, uint8* dst_u, int dst_stride_u, uint8* dst_v, int dst_stride_v, int width, int height); +// RGB little endian (bgr in memory) to I420 +int RGB24ToI420(const uint8* src_frame, int src_stride_frame, + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height); + +// RGB big endian (rgb in memory) to I420 +int RAWToI420(const uint8* src_frame, int src_stride_frame, + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height); + +// RGB16 (RGBP fourcc) little endian to I420 +int RGB565ToI420(const uint8* src_frame, int src_stride_frame, + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height); + +// RGB15 (RGBO fourcc) little endian to I420 +int ARGB1555ToI420(const uint8* src_frame, int src_stride_frame, + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height); + +// RGB12 (R444 fourcc) little endian to I420 +int ARGB4444ToI420(const uint8* src_frame, int src_stride_frame, + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height); + +// Note Bayer formats (BGGR) To I420 are in format_conversion.h + // Convert camera sample to I420 with cropping, rotation and vertical flip. // "src_size" is needed to parse MJPG. // "dst_stride_y" number of bytes in a row of the dst_y plane. @@ -162,16 +188,6 @@ int ConvertToI420(const uint8* src_frame, size_t src_size, RotationMode rotation, uint32 format); -// Convert I420 to specified format. -// "dst_sample_stride" is bytes in a row for the destination. Pass 0 if the -// buffer has contiguous rows. Can be negative. A multiple of 16 is optimal. -int ConvertFromI420(const uint8* y, int y_stride, - const uint8* u, int u_stride, - const uint8* v, int v_stride, - uint8* dst_sample, int dst_sample_stride, - int width, int height, - uint32 format); - #ifdef __cplusplus } // extern "C" } // namespace libyuv diff --git a/include/libyuv/convert_from.h b/include/libyuv/convert_from.h new file mode 100644 index 000000000..26047eea6 --- /dev/null +++ b/include/libyuv/convert_from.h @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2011 The LibYuv project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef INCLUDE_LIBYUV_CONVERT_FROM_H_ +#define INCLUDE_LIBYUV_CONVERT_FROM_H_ + +#include "libyuv/basic_types.h" +#include "libyuv/rotate.h" + +#ifdef __cplusplus +namespace libyuv { +extern "C" { +#endif + +// See Also convert.h for conversions from formats to I420 + +// I420Copy in convert to I420ToI420 + +int I420ToI422(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height); + +int I420ToI444(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height); + +// Copy to I400. Source can be I420,422,444,400,NV12,NV21 +int I400Copy(const uint8* src_y, int src_stride_y, + uint8* dst_y, int dst_stride_y, + int width, int height); + +// TODO(fbarchard): I420ToNV12 +// TODO(fbarchard): I420ToM420 +// TODO(fbarchard): I420ToQ420 + +int I420ToYUY2(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_frame, int dst_stride_frame, + int width, int height); + +int I420ToUYVY(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_frame, int dst_stride_frame, + int width, int height); + +int I420ToV210(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_frame, int dst_stride_frame, + int width, int height); + +int I420ToARGB(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_argb, int dst_stride_argb, + int width, int height); + +int I420ToBGRA(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_argb, int dst_stride_argb, + int width, int height); + +int I420ToABGR(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_argb, int dst_stride_argb, + int width, int height); + +int I420ToRGB24(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_frame, int dst_stride_frame, + int width, int height); + +int I420ToRAW(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_frame, int dst_stride_frame, + int width, int height); + +int I420ToRGB565(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_frame, int dst_stride_frame, + int width, int height); + +int I420ToARGB1555(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_frame, int dst_stride_frame, + int width, int height); + +int I420ToARGB4444(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_frame, int dst_stride_frame, + int width, int height); + +// Note Bayer formats (BGGR) To I420 are in format_conversion.h + +// Convert I420 to specified format. +// "dst_sample_stride" is bytes in a row for the destination. Pass 0 if the +// buffer has contiguous rows. Can be negative. A multiple of 16 is optimal. +int ConvertFromI420(const uint8* y, int y_stride, + const uint8* u, int u_stride, + const uint8* v, int v_stride, + uint8* dst_sample, int dst_sample_stride, + int width, int height, + uint32 format); + +#ifdef __cplusplus +} // extern "C" +} // namespace libyuv +#endif + +#endif // INCLUDE_LIBYUV_CONVERT_FROM_H_ diff --git a/include/libyuv/planar_functions.h b/include/libyuv/planar_functions.h index 168fcbcc7..e1380fa27 100644 --- a/include/libyuv/planar_functions.h +++ b/include/libyuv/planar_functions.h @@ -18,14 +18,14 @@ namespace libyuv { extern "C" { #endif -// Copy I420 to I420. -int I420Copy(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +void SetPlane(uint8* dst_y, int dst_stride_y, + int width, int height, + uint32 value); + +// Copy a plane of data (I420 to I400) +void CopyPlane(const uint8* src_y, int src_stride_y, + uint8* dst_y, int dst_stride_y, + int width, int height); // I420 mirror int I420Mirror(const uint8* src_y, int src_stride_y, @@ -36,49 +36,6 @@ int I420Mirror(const uint8* src_y, int src_stride_y, uint8* dst_v, int dst_stride_v, int width, int height); -// Convert I422 to I420. -int I422ToI420(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// Convert I422 to I420. -int I420ToI422(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// Convert I444 to I420. -int I444ToI420(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// Convert I420 to I444. -int I420ToI444(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// Convert I400 (grey) to I420. -int I400ToI420(const uint8* src_y, int src_stride_y, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - // Convert NV12 to ARGB. Also used for NV21. int NV12ToARGB(const uint8* src_y, int src_stride_y, const uint8* src_uv, int src_stride_uv, @@ -91,76 +48,6 @@ int NV12ToRGB565(const uint8* src_y, int src_stride_y, uint8* dst_frame, int dst_stride_frame, int width, int height); -// Copy to I400. Source can be I420,422,444,400,NV12,NV21 -int I400Copy(const uint8* src_y, int src_stride_y, - uint8* dst_y, int dst_stride_y, - int width, int height); - -// Convert NV12 to I420. Also used for NV21. -int NV12ToI420(const uint8* src_y, int src_stride_y, - const uint8* src_uv, int src_stride_uv, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// Convert Q420 to I420. -int Q420ToI420(const uint8* src_y, int src_stride_y, - const uint8* src_yuy2, int src_stride_yuy2, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// Convert M420 to I420. -int M420ToI420(const uint8* src_m420, int src_stride_m420, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// Convert YUY2 to I420. -int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// Convert UYVY to I420. -int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// Convert V210 to I420. -int V210ToI420(const uint8* src_uyvy, int src_stride_uyvy, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// Convert I420 to ARGB. -int I420ToARGB(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// Convert I420 to BGRA. -int I420ToBGRA(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// Convert I420 to ABGR. -int I420ToABGR(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - // Convert I422 to ARGB. int I422ToARGB(const uint8* src_y, int src_stride_y, const uint8* src_u, int src_stride_u, @@ -242,9 +129,16 @@ int ARGBCopy(const uint8* src_argb, int src_stride_argb, uint8* dst_argb, int dst_stride_argb, int width, int height); -// Copy a plane of data -void CopyPlane(const uint8* src_y, int src_stride_y, - uint8* dst_y, int dst_stride_y, +int I422ToYUY2(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_frame, int dst_stride_frame, + int width, int height); + +int I422ToUYVY(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_frame, int dst_stride_frame, int width, int height); #ifdef __cplusplus diff --git a/include/libyuv/version.h b/include/libyuv/version.h index cd1b2719e..e0f4ec43b 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,7 +11,7 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 173 +#define LIBYUV_VERSION 174 #endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/libyuv.gyp b/libyuv.gyp index 8a7e545a9..f237d5a4c 100644 --- a/libyuv.gyp +++ b/libyuv.gyp @@ -25,6 +25,7 @@ # includes 'include/libyuv/basic_types.h', 'include/libyuv/convert.h', + 'include/libyuv/convert_from.h', 'include/libyuv/scale.h', 'include/libyuv/planar_functions.h', 'include/libyuv/video_common.h', @@ -37,7 +38,7 @@ # sources 'source/compare.cc', 'source/convert.cc', - 'source/convertfrom.cc', + 'source/convert_from.cc', 'source/cpu_id.cc', 'source/format_conversion.cc', 'source/planar_functions.cc', diff --git a/source/convert.cc b/source/convert.cc index 846b4877e..d7638b324 100644 --- a/source/convert.cc +++ b/source/convert.cc @@ -10,6 +10,8 @@ #include "libyuv/convert.h" +#include // For memcpy() + #include "libyuv/basic_types.h" #include "libyuv/cpu_id.h" #include "libyuv/format_conversion.h" @@ -23,477 +25,519 @@ namespace libyuv { extern "C" { #endif -// YUY2 - Macro-pixel = 2 image pixels -// Y0U0Y1V0....Y2U2Y3V2...Y4U4Y5V4.... +// Copy I420 with optional flipping +int I420Copy(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height) { + if (!src_y || !src_u || !src_v || + !dst_y || !dst_u || !dst_v || + width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + int halfheight = (height + 1) >> 1; + src_y = src_y + (height - 1) * src_stride_y; + src_u = src_u + (halfheight - 1) * src_stride_u; + src_v = src_v + (halfheight - 1) * src_stride_v; + src_stride_y = -src_stride_y; + src_stride_u = -src_stride_u; + src_stride_v = -src_stride_v; + } -// UYVY - Macro-pixel = 2 image pixels -// U0Y0V0Y1 + int halfwidth = (width + 1) >> 1; + int halfheight = (height + 1) >> 1; + if (dst_y) { + CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); + } + CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight); + CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight); + return 0; +} #if defined(_M_IX86) && !defined(YUV_DISABLE_ASM) -#define HAS_I42XTOYUY2ROW_SSE2 +#define HAS_HALFROW_SSE2 __declspec(naked) -static void I42xToYUY2Row_SSE2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_frame, int width) { +static void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride, + uint8* dst_uv, int pix) { __asm { - push esi push edi - mov eax, [esp + 8 + 4] // src_y - mov esi, [esp + 8 + 8] // src_u - mov edx, [esp + 8 + 12] // src_v - mov edi, [esp + 8 + 16] // dst_frame - mov ecx, [esp + 8 + 20] // width - sub edx, esi + mov eax, [esp + 4 + 4] // src_uv + mov edx, [esp + 4 + 8] // src_uv_stride + mov edi, [esp + 4 + 12] // dst_v + mov ecx, [esp + 4 + 16] // pix + sub edi, eax convertloop: - movq xmm2, qword ptr [esi] // U - movq xmm3, qword ptr [esi + edx] // V - lea esi, [esi + 8] - punpcklbw xmm2, xmm3 // UV - movdqa xmm0, [eax] // Y - lea eax, [eax + 16] - movdqa xmm1, xmm0 - punpcklbw xmm0, xmm2 // YUYV - punpckhbw xmm1, xmm2 - movdqa [edi], xmm0 - movdqa [edi + 16], xmm1 - lea edi, [edi + 32] + movdqa xmm0, [eax] + pavgb xmm0, [eax + edx] + movdqa [eax + edi], xmm0 + lea eax, [eax + 16] sub ecx, 16 ja convertloop - pop edi - pop esi ret } } -#define HAS_I42XTOUYVYROW_SSE2 -__declspec(naked) -static void I42xToUYVYRow_SSE2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_frame, int width) { - __asm { - push esi - push edi - mov eax, [esp + 8 + 4] // src_y - mov esi, [esp + 8 + 8] // src_u - mov edx, [esp + 8 + 12] // src_v - mov edi, [esp + 8 + 16] // dst_frame - mov ecx, [esp + 8 + 20] // width - sub edx, esi - - convertloop: - movq xmm2, qword ptr [esi] // U - movq xmm3, qword ptr [esi + edx] // V - lea esi, [esi + 8] - punpcklbw xmm2, xmm3 // UV - movdqa xmm0, [eax] // Y - movdqa xmm1, xmm2 - lea eax, [eax + 16] - punpcklbw xmm1, xmm0 // UYVY - punpckhbw xmm2, xmm0 - movdqa [edi], xmm1 - movdqa [edi + 16], xmm2 - lea edi, [edi + 32] - sub ecx, 16 - ja convertloop - - pop edi - pop esi - ret - } -} #elif (defined(__x86_64__) || defined(__i386__)) && !defined(YUV_DISABLE_ASM) -#define HAS_I42XTOYUY2ROW_SSE2 -static void I42xToYUY2Row_SSE2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_frame, int width) { +#define HAS_HALFROW_SSE2 +static void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride, + uint8* dst_uv, int pix) { asm volatile ( - "sub %1,%2 \n" - "1: \n" - "movq (%1),%%xmm2 \n" - "movq (%1,%2,1),%%xmm3 \n" - "lea 0x8(%1),%1 \n" - "punpcklbw %%xmm3,%%xmm2 \n" - "movdqa (%0),%%xmm0 \n" - "lea 0x10(%0),%0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklbw %%xmm2,%%xmm0 \n" - "punpckhbw %%xmm2,%%xmm1 \n" - "movdqa %%xmm0,(%3) \n" - "movdqa %%xmm1,0x10(%3) \n" - "lea 0x20(%3),%3 \n" - "sub $0x10,%4 \n" - "ja 1b \n" - : "+r"(src_y), // %0 - "+r"(src_u), // %1 - "+r"(src_v), // %2 - "+r"(dst_frame), // %3 - "+rm"(width) // %4 - : - : "memory", "cc" + "sub %0,%1 \n" +"1: \n" + "movdqa (%0),%%xmm0 \n" + "pavgb (%0,%3),%%xmm0 \n" + "movdqa %%xmm0,(%0,%1) \n" + "lea 0x10(%0),%0 \n" + "sub $0x10,%2 \n" + "ja 1b \n" + : "+r"(src_uv), // %0 + "+r"(dst_uv), // %1 + "+r"(pix) // %2 + : "r"(static_cast(src_uv_stride)) // %3 + : "memory", "cc" #if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3" + , "xmm0" #endif - ); +); +} +#endif + +static void HalfRow_C(const uint8* src_uv, int src_uv_stride, + uint8* dst_uv, int pix) { + for (int x = 0; x < pix; ++x) { + dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1; + } } -#define HAS_I42XTOUYVYROW_SSE2 -static void I42xToUYVYRow_SSE2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_frame, int width) { - asm volatile ( - "sub %1,%2 \n" - "1: \n" - "movq (%1),%%xmm2 \n" - "movq (%1,%2,1),%%xmm3 \n" - "lea 0x8(%1),%1 \n" - "punpcklbw %%xmm3,%%xmm2 \n" - "movdqa (%0),%%xmm0 \n" - "movdqa %%xmm2,%%xmm1 \n" - "lea 0x10(%0),%0 \n" - "punpcklbw %%xmm0,%%xmm1 \n" - "punpckhbw %%xmm0,%%xmm2 \n" - "movdqa %%xmm1,(%3) \n" - "movdqa %%xmm2,0x10(%3) \n" - "lea 0x20(%3),%3 \n" - "sub $0x10,%4 \n" - "ja 1b \n" - : "+r"(src_y), // %0 - "+r"(src_u), // %1 - "+r"(src_v), // %2 - "+r"(dst_frame), // %3 - "+rm"(width) // %4 - : - : "memory", "cc" +int I422ToI420(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height) { + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_y = src_y + (height - 1) * src_stride_y; + src_u = src_u + (height - 1) * src_stride_u; + src_v = src_v + (height - 1) * src_stride_v; + src_stride_y = -src_stride_y; + src_stride_u = -src_stride_u; + src_stride_v = -src_stride_v; + } + int halfwidth = (width + 1) >> 1; + void (*HalfRow)(const uint8* src_uv, int src_uv_stride, + uint8* dst_uv, int pix); +#if defined(HAS_HALFROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2) && + IS_ALIGNED(halfwidth, 16) && + IS_ALIGNED(src_u, 16) && IS_ALIGNED(src_stride_u, 16) && + IS_ALIGNED(src_v, 16) && IS_ALIGNED(src_stride_v, 16) && + IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) && + IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) { + HalfRow = HalfRow_SSE2; + } else +#endif + { + HalfRow = HalfRow_C; + } + + // Copy Y plane + if (dst_y) { + CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); + } + + // SubSample U plane. + int y; + for (y = 0; y < height - 1; y += 2) { + HalfRow(src_u, src_stride_u, dst_u, halfwidth); + src_u += src_stride_u * 2; + dst_u += dst_stride_u; + } + if (height & 1) { + HalfRow(src_u, 0, dst_u, halfwidth); + } + + // SubSample V plane. + for (y = 0; y < height - 1; y += 2) { + HalfRow(src_v, src_stride_v, dst_v, halfwidth); + src_v += src_stride_v * 2; + dst_v += dst_stride_v; + } + if (height & 1) { + HalfRow(src_v, 0, dst_v, halfwidth); + } + return 0; +} + +// Blends 32x2 pixels to 16x1 +// source in scale.cc +#if defined(__ARM_NEON__) && !defined(YUV_DISABLE_ASM) +#define HAS_SCALEROWDOWN2_NEON +void ScaleRowDown2Int_NEON(const uint8* src_ptr, int src_stride, + uint8* dst, int dst_width); +#elif (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) && \ + !defined(YUV_DISABLE_ASM) +void ScaleRowDown2Int_SSE2(const uint8* src_ptr, int src_stride, + uint8* dst_ptr, int dst_width); +#endif +void ScaleRowDown2Int_C(const uint8* src_ptr, int src_stride, + uint8* dst_ptr, int dst_width); + +int I444ToI420(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height) { + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_y = src_y + (height - 1) * src_stride_y; + src_u = src_u + (height - 1) * src_stride_u; + src_v = src_v + (height - 1) * src_stride_v; + src_stride_y = -src_stride_y; + src_stride_u = -src_stride_u; + src_stride_v = -src_stride_v; + } + int halfwidth = (width + 1) >> 1; + void (*ScaleRowDown2)(const uint8* src_ptr, int src_stride, + uint8* dst_ptr, int dst_width); +#if defined(HAS_SCALEROWDOWN2_NEON) + if (TestCpuFlag(kCpuHasNEON) && + IS_ALIGNED(halfwidth, 16)) { + ScaleRowDown2 = ScaleRowDown2Int_NEON; + } else +#endif +#if defined(HAS_SCALEROWDOWN2_SSE2) + if (TestCpuFlag(kCpuHasSSE2) && + IS_ALIGNED(halfwidth, 16) && + IS_ALIGNED(src_u, 16) && IS_ALIGNED(src_stride_u, 16) && + IS_ALIGNED(src_v, 16) && IS_ALIGNED(src_stride_v, 16) && + IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) && + IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) { + ScaleRowDown2 = ScaleRowDown2Int_SSE2; +#endif + { + ScaleRowDown2 = ScaleRowDown2Int_C; + } + + // Copy Y plane + if (dst_y) { + CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); + } + + // SubSample U plane. + int y; + for (y = 0; y < height - 1; y += 2) { + ScaleRowDown2(src_u, src_stride_u, dst_u, halfwidth); + src_u += src_stride_u * 2; + dst_u += dst_stride_u; + } + if (height & 1) { + ScaleRowDown2(src_u, 0, dst_u, halfwidth); + } + + // SubSample V plane. + for (y = 0; y < height - 1; y += 2) { + ScaleRowDown2(src_v, src_stride_v, dst_v, halfwidth); + src_v += src_stride_v * 2; + dst_v += dst_stride_v; + } + if (height & 1) { + ScaleRowDown2(src_v, 0, dst_v, halfwidth); + } + return 0; +} + +// I400 is greyscale typically used in MJPG +int I400ToI420(const uint8* src_y, int src_stride_y, + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height) { + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_y = src_y + (height - 1) * src_stride_y; + src_stride_y = -src_stride_y; + } + int halfwidth = (width + 1) >> 1; + int halfheight = (height + 1) >> 1; + CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); + SetPlane(dst_u, dst_stride_u, halfwidth, halfheight, 128); + SetPlane(dst_v, dst_stride_v, halfwidth, halfheight, 128); + return 0; +} + +static void CopyPlane2(const uint8* src, int src_stride_0, int src_stride_1, + uint8* dst, int dst_stride_frame, + int width, int height) { + // Copy plane + for (int y = 0; y < height; y += 2) { + memcpy(dst, src, width); + src += src_stride_0; + dst += dst_stride_frame; + memcpy(dst, src, width); + src += src_stride_1; + dst += dst_stride_frame; + } +} + +// Support converting from FOURCC_M420 +// Useful for bandwidth constrained transports like USB 1.0 and 2.0 and for +// easy conversion to I420. +// M420 format description: +// M420 is row biplanar 420: 2 rows of Y and 1 row of VU. +// Chroma is half width / half height. (420) +// src_stride_m420 is row planar. Normally this will be the width in pixels. +// The UV plane is half width, but 2 values, so src_stride_m420 applies to +// this as well as the two Y planes. +static int X420ToI420(const uint8* src_y, + int src_stride_y0, int src_stride_y1, + const uint8* src_uv, int src_stride_uv, + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height) { + // Negative height means invert the image. + if (height < 0) { + height = -height; + int halfheight = (height + 1) >> 1; + dst_y = dst_y + (height - 1) * dst_stride_y; + dst_u = dst_u + (halfheight - 1) * dst_stride_u; + dst_v = dst_v + (halfheight - 1) * dst_stride_v; + dst_stride_y = -dst_stride_y; + dst_stride_u = -dst_stride_u; + dst_stride_v = -dst_stride_v; + } + + int halfwidth = (width + 1) >> 1; + void (*SplitUV)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix); +#if defined(HAS_SPLITUV_NEON) + if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(halfwidth, 16)) { + SplitUV = SplitUV_NEON; + } else +#elif defined(HAS_SPLITUV_SSE2) + if (TestCpuFlag(kCpuHasSSE2) && + IS_ALIGNED(halfwidth, 16) && + IS_ALIGNED(src_uv, 16) && IS_ALIGNED(src_stride_uv, 16) && + IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) && + IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) { + SplitUV = SplitUV_SSE2; + } else +#endif + { + SplitUV = SplitUV_C; + } + + if (dst_y) { + CopyPlane2(src_y, src_stride_y0, src_stride_y1, dst_y, dst_stride_y, + width, height); + } + + int halfheight = (height + 1) >> 1; + for (int y = 0; y < halfheight; ++y) { + // Copy a row of UV. + SplitUV(src_uv, dst_u, dst_v, halfwidth); + dst_u += dst_stride_u; + dst_v += dst_stride_v; + src_uv += src_stride_uv; + } + return 0; +} + +// Convert NV12 to I420. +int NV12ToI420(const uint8* src_y, int src_stride_y, + const uint8* src_uv, int src_stride_uv, + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height) { + return X420ToI420(src_y, src_stride_y, src_stride_y, + src_uv, src_stride_uv, + dst_y, dst_stride_y, + dst_u, dst_stride_u, + dst_v, dst_stride_v, + width, height); +} + +// Convert M420 to I420. +int M420ToI420(const uint8* src_m420, int src_stride_m420, + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height) { + return X420ToI420(src_m420, src_stride_m420, src_stride_m420 * 2, + src_m420 + src_stride_m420 * 2, src_stride_m420 * 3, + dst_y, dst_stride_y, + dst_u, dst_stride_u, + dst_v, dst_stride_v, + width, height); +} + +#if defined(_M_IX86) && !defined(YUV_DISABLE_ASM) +#define HAS_SPLITYUY2_SSE2 +__declspec(naked) +static void SplitYUY2_SSE2(const uint8* src_yuy2, + uint8* dst_y, uint8* dst_u, uint8* dst_v, int pix) { + __asm { + push esi + push edi + mov eax, [esp + 8 + 4] // src_yuy2 + mov edx, [esp + 8 + 8] // dst_y + mov esi, [esp + 8 + 12] // dst_u + mov edi, [esp + 8 + 16] // dst_v + mov ecx, [esp + 8 + 20] // pix + pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff + psrlw xmm5, 8 + + convertloop: + movdqa xmm0, [eax] + movdqa xmm1, [eax + 16] + lea eax, [eax + 32] + movdqa xmm2, xmm0 + movdqa xmm3, xmm1 + pand xmm2, xmm5 // even bytes are Y + pand xmm3, xmm5 + packuswb xmm2, xmm3 + movdqa [edx], xmm2 + lea edx, [edx + 16] + psrlw xmm0, 8 // YUYV -> UVUV + psrlw xmm1, 8 + packuswb xmm0, xmm1 + movdqa xmm1, xmm0 + pand xmm0, xmm5 // U + packuswb xmm0, xmm0 + movq qword ptr [esi], xmm0 + lea esi, [esi + 8] + psrlw xmm1, 8 // V + packuswb xmm1, xmm1 + movq qword ptr [edi], xmm1 + lea edi, [edi + 8] + sub ecx, 16 + ja convertloop + + pop edi + pop esi + ret + } +} + +#elif (defined(__x86_64__) || defined(__i386__)) && !defined(YUV_DISABLE_ASM) +#define HAS_SPLITYUY2_SSE2 +static void SplitYUY2_SSE2(const uint8* src_yuy2, uint8* dst_y, + uint8* dst_u, uint8* dst_v, int pix) { + asm volatile ( + "pcmpeqb %%xmm5,%%xmm5 \n" + "psrlw $0x8,%%xmm5 \n" +"1: \n" + "movdqa (%0),%%xmm0 \n" + "movdqa 0x10(%0),%%xmm1 \n" + "lea 0x20(%0),%0 \n" + "movdqa %%xmm0,%%xmm2 \n" + "movdqa %%xmm1,%%xmm3 \n" + "pand %%xmm5,%%xmm2 \n" + "pand %%xmm5,%%xmm3 \n" + "packuswb %%xmm3,%%xmm2 \n" + "movdqa %%xmm2,(%1) \n" + "lea 0x10(%1),%1 \n" + "psrlw $0x8,%%xmm0 \n" + "psrlw $0x8,%%xmm1 \n" + "packuswb %%xmm1,%%xmm0 \n" + "movdqa %%xmm0,%%xmm1 \n" + "pand %%xmm5,%%xmm0 \n" + "packuswb %%xmm0,%%xmm0 \n" + "movq %%xmm0,(%2) \n" + "lea 0x8(%2),%2 \n" + "psrlw $0x8,%%xmm1 \n" + "packuswb %%xmm1,%%xmm1 \n" + "movq %%xmm1,(%3) \n" + "lea 0x8(%3),%3 \n" + "sub $0x10,%4 \n" + "ja 1b \n" + : "+r"(src_yuy2), // %0 + "+r"(dst_y), // %1 + "+r"(dst_u), // %2 + "+r"(dst_v), // %3 + "+r"(pix) // %4 + : + : "memory", "cc" #if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3" + , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" #endif - ); +); } #endif -void I42xToYUY2Row_C(const uint8* src_y, const uint8* src_u, const uint8* src_v, - uint8* dst_frame, int width) { - for (int x = 0; x < width - 1; x += 2) { - dst_frame[0] = src_y[0]; - dst_frame[1] = src_u[0]; - dst_frame[2] = src_y[1]; - dst_frame[3] = src_v[0]; - dst_frame += 4; - src_y += 2; - src_u += 1; - src_v += 1; - } - if (width & 1) { - dst_frame[0] = src_y[0]; - dst_frame[1] = src_u[0]; - dst_frame[2] = src_y[0]; // duplicate last y - dst_frame[3] = src_v[0]; - } -} - -void I42xToUYVYRow_C(const uint8* src_y, const uint8* src_u, const uint8* src_v, - uint8* dst_frame, int width) { - for (int x = 0; x < width - 1; x += 2) { - dst_frame[0] = src_u[0]; - dst_frame[1] = src_y[0]; - dst_frame[2] = src_v[0]; - dst_frame[3] = src_y[1]; - dst_frame += 4; - src_y += 2; - src_u += 1; - src_v += 1; - } - if (width & 1) { - dst_frame[0] = src_u[0]; - dst_frame[1] = src_y[0]; - dst_frame[2] = src_v[0]; - dst_frame[3] = src_y[0]; // duplicate last y - } -} - - -// gcc provided macros -#if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && defined(__BIG_ENDIAN) -#if __BYTE_ORDER == __LITTLE_ENDIAN -#define LIBYUV_LITTLE_ENDIAN -#endif -// Visual C for x86 defines these -#elif defined(_M_X64) || defined(_M_IX86) -#define LIBYUV_LITTLE_ENDIAN -#endif - -#ifdef LIBYUV_LITTLE_ENDIAN -#define READWORD(p) (*((uint32*) (p))) -#define WRITEWORD(p, v) (*((uint32*) (p))) = v -#else -uint32 READWORD(const uint8* p) { - return (uint32) p[0] | - ((uint32) (p[1]) << 8) | - ((uint32) (p[2]) << 16) | - ((uint32) (p[3]) << 24); -} -void WRITEWORD(uint8* p, uint32 v) { - p[0] = (uint8)(v & 255); - p[1] = (uint8)((v >> 8) & 255); - p[2] = (uint8)((v >> 16) & 255); - p[3] = (uint8)((v >> 24) & 255); -} -#endif - -// Must be multiple of 6 pixels. Will over convert to handle remainder. -// https://developer.apple.com/quicktime/icefloe/dispatch019.html#v210 -void V210ToUYVYRow_C(const uint8* src_v210, uint8* dst_uyvy, int width) { - for (int x = 0; x < width; x += 6) { - uint32 w = READWORD(src_v210 + 0); - dst_uyvy[0] = (w >> 2) & 0xff; - dst_uyvy[1] = (w >> 12) & 0xff; - dst_uyvy[2] = (w >> 22) & 0xff; - - w = READWORD(src_v210 + 4); - dst_uyvy[3] = (w >> 2) & 0xff; - dst_uyvy[4] = (w >> 12) & 0xff; - dst_uyvy[5] = (w >> 22) & 0xff; - - w = READWORD(src_v210 + 8); - dst_uyvy[6] = (w >> 2) & 0xff; - dst_uyvy[7] = (w >> 12) & 0xff; - dst_uyvy[8] = (w >> 22) & 0xff; - - w = READWORD(src_v210 + 12); - dst_uyvy[9] = (w >> 2) & 0xff; - dst_uyvy[10] = (w >> 12) & 0xff; - dst_uyvy[11] = (w >> 22) & 0xff; - - src_v210 += 16; - dst_uyvy += 12; +static void SplitYUY2_C(const uint8* src_yuy2, + uint8* dst_y, uint8* dst_u, uint8* dst_v, int pix) { + // Copy a row of YUY2. + for (int x = 0; x < pix; x += 2) { + dst_y[0] = src_yuy2[0]; + dst_y[1] = src_yuy2[2]; + dst_u[0] = src_yuy2[1]; + dst_v[0] = src_yuy2[3]; + src_yuy2 += 4; + dst_y += 2; + dst_u += 1; + dst_v += 1; } } -#define EIGHTTOTEN(x) (x << 2 | x >> 6) -void UYVYToV210Row_C(const uint8* src_uyvy, uint8* dst_v210, int width) { - for (int x = 0; x < width; x += 6) { - WRITEWORD(dst_v210 + 0, (EIGHTTOTEN(src_uyvy[0])) | - (EIGHTTOTEN(src_uyvy[1]) << 10) | - (EIGHTTOTEN(src_uyvy[2]) << 20)); - WRITEWORD(dst_v210 + 4, (EIGHTTOTEN(src_uyvy[3])) | - (EIGHTTOTEN(src_uyvy[4]) << 10) | - (EIGHTTOTEN(src_uyvy[5]) << 20)); - WRITEWORD(dst_v210 + 8, (EIGHTTOTEN(src_uyvy[6])) | - (EIGHTTOTEN(src_uyvy[7]) << 10) | - (EIGHTTOTEN(src_uyvy[8]) << 20)); - WRITEWORD(dst_v210 + 12, (EIGHTTOTEN(src_uyvy[9])) | - (EIGHTTOTEN(src_uyvy[10]) << 10) | - (EIGHTTOTEN(src_uyvy[11]) << 20)); - src_uyvy += 12; - dst_v210 += 16; - } -} - -int I422ToYUY2(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_frame, int dst_stride_frame, +// Convert Q420 to I420. +// Format is rows of YY/YUYV +int Q420ToI420(const uint8* src_y, int src_stride_y, + const uint8* src_yuy2, int src_stride_yuy2, + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, int width, int height) { - if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) { - return -1; - } // Negative height means invert the image. if (height < 0) { height = -height; - dst_frame = dst_frame + (height - 1) * dst_stride_frame; - dst_stride_frame = -dst_stride_frame; + int halfheight = (height + 1) >> 1; + dst_y = dst_y + (height - 1) * dst_stride_y; + dst_u = dst_u + (halfheight - 1) * dst_stride_u; + dst_v = dst_v + (halfheight - 1) * dst_stride_v; + dst_stride_y = -dst_stride_y; + dst_stride_u = -dst_stride_u; + dst_stride_v = -dst_stride_v; } - void (*I42xToYUY2Row)(const uint8* src_y, const uint8* src_u, - const uint8* src_v, uint8* dst_frame, int width); - I42xToYUY2Row = I42xToYUY2Row_C; -#if defined(HAS_I42XTOYUY2ROW_SSE2) + void (*SplitYUY2)(const uint8* src_yuy2, + uint8* dst_y, uint8* dst_u, uint8* dst_v, int pix); +#if defined(HAS_SPLITYUY2_SSE2) if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) && - IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) && - IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) { - I42xToYUY2Row = I42xToYUY2Row_SSE2; - } + IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16) && + IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { + SplitYUY2 = SplitYUY2_SSE2; + } else #endif - - for (int y = 0; y < height; ++y) { - I42xToYUY2Row(src_y, src_u, src_y, dst_frame, width); + { + SplitYUY2 = SplitYUY2_C; + } + for (int y = 0; y < height; y += 2) { + memcpy(dst_y, src_y, width); + dst_y += dst_stride_y; src_y += src_stride_y; - src_u += src_stride_u; - src_v += src_stride_v; - dst_frame += dst_stride_frame; - } - return 0; -} -int I420ToYUY2(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_frame, int dst_stride_frame, - int width, int height) { - if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - dst_frame = dst_frame + (height - 1) * dst_stride_frame; - dst_stride_frame = -dst_stride_frame; - } - void (*I42xToYUY2Row)(const uint8* src_y, const uint8* src_u, - const uint8* src_v, uint8* dst_frame, int width); - I42xToYUY2Row = I42xToYUY2Row_C; -#if defined(HAS_I42XTOYUY2ROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && - IS_ALIGNED(width, 16) && - IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) && - IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) { - I42xToYUY2Row = I42xToYUY2Row_SSE2; - } -#endif - - for (int y = 0; y < height - 1; y += 2) { - I42xToYUY2Row(src_y, src_u, src_v, dst_frame, width); - I42xToYUY2Row(src_y + src_stride_y, src_u, src_v, - dst_frame + dst_stride_frame, width); - src_y += src_stride_y * 2; - src_u += src_stride_u; - src_v += src_stride_v; - dst_frame += dst_stride_frame * 2; - } - if (height & 1) { - I42xToYUY2Row(src_y, src_u, src_v, dst_frame, width); - } - return 0; -} - -int I422ToUYVY(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_frame, int dst_stride_frame, - int width, int height) { - if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - dst_frame = dst_frame + (height - 1) * dst_stride_frame; - dst_stride_frame = -dst_stride_frame; - } - void (*I42xToUYVYRow)(const uint8* src_y, const uint8* src_u, - const uint8* src_v, uint8* dst_frame, int width); - I42xToUYVYRow = I42xToUYVYRow_C; -#if defined(HAS_I42XTOUYVYROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && - IS_ALIGNED(width, 16) && - IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) && - IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) { - I42xToUYVYRow = I42xToUYVYRow_SSE2; - } -#endif - - for (int y = 0; y < height; ++y) { - I42xToUYVYRow(src_y, src_u, src_y, dst_frame, width); - src_y += src_stride_y; - src_u += src_stride_u; - src_v += src_stride_v; - dst_frame += dst_stride_frame; - } - return 0; -} - -int I420ToUYVY(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_frame, int dst_stride_frame, - int width, int height) { - if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - dst_frame = dst_frame + (height - 1) * dst_stride_frame; - dst_stride_frame = -dst_stride_frame; - } - void (*I42xToUYVYRow)(const uint8* src_y, const uint8* src_u, - const uint8* src_v, uint8* dst_frame, int width); - I42xToUYVYRow = I42xToUYVYRow_C; -#if defined(HAS_I42XTOUYVYROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && - IS_ALIGNED(width, 16) && - IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) && - IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) { - I42xToUYVYRow = I42xToUYVYRow_SSE2; - } -#endif - - for (int y = 0; y < height - 1; y += 2) { - I42xToUYVYRow(src_y, src_u, src_v, dst_frame, width); - I42xToUYVYRow(src_y + src_stride_y, src_u, src_v, - dst_frame + dst_stride_frame, width); - src_y += src_stride_y * 2; - src_u += src_stride_u; - src_v += src_stride_v; - dst_frame += dst_stride_frame * 2; - } - if (height & 1) { - I42xToUYVYRow(src_y, src_u, src_v, dst_frame, width); - } - return 0; -} - -int I420ToV210(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_frame, int dst_stride_frame, - int width, int height) { - if (width * 16 / 6 > kMaxStride || // row buffer of V210 is required - src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - dst_frame = dst_frame + (height - 1) * dst_stride_frame; - dst_stride_frame = -dst_stride_frame; - } - - SIMD_ALIGNED(uint8 row[kMaxStride]); - void (*UYVYToV210Row)(const uint8* src_uyvy, uint8* dst_v210, int pix); - UYVYToV210Row = UYVYToV210Row_C; - - void (*I42xToUYVYRow)(const uint8* src_y, const uint8* src_u, - const uint8* src_v, uint8* dst_frame, int width); - I42xToUYVYRow = I42xToUYVYRow_C; -#if defined(HAS_I42XTOUYVYROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && - IS_ALIGNED(width, 16) && - IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16)) { - I42xToUYVYRow = I42xToUYVYRow_SSE2; - } -#endif - - for (int y = 0; y < height - 1; y += 2) { - I42xToUYVYRow(src_y, src_u, src_v, row, width); - UYVYToV210Row(row, dst_frame, width); - I42xToUYVYRow(src_y + src_stride_y, src_u, src_v, row, width); - UYVYToV210Row(row, dst_frame + dst_stride_frame, width); - - src_y += src_stride_y * 2; - src_u += src_stride_u; - src_v += src_stride_v; - dst_frame += dst_stride_frame * 2; - } - if (height & 1) { - I42xToUYVYRow(src_y, src_u, src_v, row, width); - UYVYToV210Row(row, dst_frame, width); + // Copy a row of YUY2. + SplitYUY2(src_yuy2, dst_y, dst_u, dst_v, width); + dst_y += dst_stride_y; + dst_u += dst_stride_u; + dst_v += dst_stride_v; + src_yuy2 += src_stride_yuy2; } return 0; } @@ -647,6 +691,56 @@ int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy, return 0; } +// gcc provided macros +#if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && defined(__BIG_ENDIAN) +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define LIBYUV_LITTLE_ENDIAN +#endif +// Visual C for x86 defines these +#elif defined(_M_X64) || defined(_M_IX86) +#define LIBYUV_LITTLE_ENDIAN +#endif + +#ifdef LIBYUV_LITTLE_ENDIAN +#define READWORD(p) (*((uint32*) (p))) +#else +static inline uint32 READWORD(const uint8* p) { + return (uint32) p[0] | + ((uint32) (p[1]) << 8) | + ((uint32) (p[2]) << 16) | + ((uint32) (p[3]) << 24); +} +#endif + +// Must be multiple of 6 pixels. Will over convert to handle remainder. +// https://developer.apple.com/quicktime/icefloe/dispatch019.html#v210 +static void V210ToUYVYRow_C(const uint8* src_v210, uint8* dst_uyvy, int width) { + for (int x = 0; x < width; x += 6) { + uint32 w = READWORD(src_v210 + 0); + dst_uyvy[0] = (w >> 2) & 0xff; + dst_uyvy[1] = (w >> 12) & 0xff; + dst_uyvy[2] = (w >> 22) & 0xff; + + w = READWORD(src_v210 + 4); + dst_uyvy[3] = (w >> 2) & 0xff; + dst_uyvy[4] = (w >> 12) & 0xff; + dst_uyvy[5] = (w >> 22) & 0xff; + + w = READWORD(src_v210 + 8); + dst_uyvy[6] = (w >> 2) & 0xff; + dst_uyvy[7] = (w >> 12) & 0xff; + dst_uyvy[8] = (w >> 22) & 0xff; + + w = READWORD(src_v210 + 12); + dst_uyvy[9] = (w >> 2) & 0xff; + dst_uyvy[10] = (w >> 12) & 0xff; + dst_uyvy[11] = (w >> 22) & 0xff; + + src_v210 += 16; + dst_uyvy += 12; + } +} + // Convert V210 to I420. // V210 is 10 bit version of UYVY. 16 bytes to store 6 pixels. // With is multiple of 48. diff --git a/source/convert_from.cc b/source/convert_from.cc new file mode 100644 index 000000000..d11b6aa21 --- /dev/null +++ b/source/convert_from.cc @@ -0,0 +1,1227 @@ +/* + * Copyright (c) 2011 The LibYuv project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "libyuv/convert_from.h" + +#include // For memcpy() + +#include "libyuv/basic_types.h" +#include "libyuv/convert.h" // For I420Copy +#include "libyuv/cpu_id.h" +#include "libyuv/format_conversion.h" +#include "libyuv/planar_functions.h" +#include "libyuv/rotate.h" +#include "libyuv/video_common.h" +#include "row.h" + +#ifdef __cplusplus +namespace libyuv { +extern "C" { +#endif + +int I420ToI422(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height) { + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_y = dst_y + (height - 1) * dst_stride_y; + dst_u = dst_u + (height - 1) * dst_stride_u; + dst_v = dst_v + (height - 1) * dst_stride_v; + dst_stride_y = -dst_stride_y; + dst_stride_u = -dst_stride_u; + dst_stride_v = -dst_stride_v; + } + // Copy Y plane + if (dst_y) { + CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); + } + + int halfwidth = (width + 1) >> 1; + // UpSample U plane. + int y; + for (y = 0; y < height - 1; y += 2) { + memcpy(dst_u, src_u, halfwidth); + memcpy(dst_u + dst_stride_u, src_u, halfwidth); + src_u += src_stride_u; + dst_u += dst_stride_u * 2; + } + if (height & 1) { + memcpy(dst_u, src_u, halfwidth); + } + + // UpSample V plane. + for (y = 0; y < height - 1; y += 2) { + memcpy(dst_v, src_v, halfwidth); + memcpy(dst_v + dst_stride_v, src_v, halfwidth); + src_v += src_stride_v; + dst_v += dst_stride_v * 2; + } + if (height & 1) { + memcpy(dst_v, src_v, halfwidth); + } + return 0; +} + +// use Bilinear for upsampling chroma +void ScalePlaneBilinear(int src_width, int src_height, + int dst_width, int dst_height, + int src_stride, int dst_stride, + const uint8* src_ptr, uint8* dst_ptr); + +int I420ToI444(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height) { + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_y = dst_y + (height - 1) * dst_stride_y; + dst_u = dst_u + (height - 1) * dst_stride_u; + dst_v = dst_v + (height - 1) * dst_stride_v; + dst_stride_y = -dst_stride_y; + dst_stride_u = -dst_stride_u; + dst_stride_v = -dst_stride_v; + } + + // Copy Y plane + if (dst_y) { + CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); + } + + int halfwidth = (width + 1) >> 1; + int halfheight = (height + 1) >> 1; + + // Upsample U plane. + ScalePlaneBilinear(halfwidth, halfheight, + width, height, + src_stride_u, + dst_stride_u, + src_u, dst_u); + + // Upsample V plane. + ScalePlaneBilinear(halfwidth, halfheight, + width, height, + src_stride_v, + dst_stride_v, + src_v, dst_v); + return 0; +} + +// Copy to I400. Source can be I420,422,444,400,NV12,NV21 +int I400Copy(const uint8* src_y, int src_stride_y, + uint8* dst_y, int dst_stride_y, + int width, int height) { + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_y = src_y + (height - 1) * src_stride_y; + src_stride_y = -src_stride_y; + } + CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); + return 0; +} + +// YUY2 - Macro-pixel = 2 image pixels +// Y0U0Y1V0....Y2U2Y3V2...Y4U4Y5V4.... + +// UYVY - Macro-pixel = 2 image pixels +// U0Y0V0Y1 + +#if defined(_M_IX86) && !defined(YUV_DISABLE_ASM) +#define HAS_I42XTOYUY2ROW_SSE2 +__declspec(naked) +static void I42xToYUY2Row_SSE2(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_frame, int width) { + __asm { + push esi + push edi + mov eax, [esp + 8 + 4] // src_y + mov esi, [esp + 8 + 8] // src_u + mov edx, [esp + 8 + 12] // src_v + mov edi, [esp + 8 + 16] // dst_frame + mov ecx, [esp + 8 + 20] // width + sub edx, esi + + convertloop: + movq xmm2, qword ptr [esi] // U + movq xmm3, qword ptr [esi + edx] // V + lea esi, [esi + 8] + punpcklbw xmm2, xmm3 // UV + movdqa xmm0, [eax] // Y + lea eax, [eax + 16] + movdqa xmm1, xmm0 + punpcklbw xmm0, xmm2 // YUYV + punpckhbw xmm1, xmm2 + movdqa [edi], xmm0 + movdqa [edi + 16], xmm1 + lea edi, [edi + 32] + sub ecx, 16 + ja convertloop + + pop edi + pop esi + ret + } +} + +#define HAS_I42XTOUYVYROW_SSE2 +__declspec(naked) +static void I42xToUYVYRow_SSE2(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_frame, int width) { + __asm { + push esi + push edi + mov eax, [esp + 8 + 4] // src_y + mov esi, [esp + 8 + 8] // src_u + mov edx, [esp + 8 + 12] // src_v + mov edi, [esp + 8 + 16] // dst_frame + mov ecx, [esp + 8 + 20] // width + sub edx, esi + + convertloop: + movq xmm2, qword ptr [esi] // U + movq xmm3, qword ptr [esi + edx] // V + lea esi, [esi + 8] + punpcklbw xmm2, xmm3 // UV + movdqa xmm0, [eax] // Y + movdqa xmm1, xmm2 + lea eax, [eax + 16] + punpcklbw xmm1, xmm0 // UYVY + punpckhbw xmm2, xmm0 + movdqa [edi], xmm1 + movdqa [edi + 16], xmm2 + lea edi, [edi + 32] + sub ecx, 16 + ja convertloop + + pop edi + pop esi + ret + } +} +#elif (defined(__x86_64__) || defined(__i386__)) && !defined(YUV_DISABLE_ASM) +#define HAS_I42XTOYUY2ROW_SSE2 +static void I42xToYUY2Row_SSE2(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_frame, int width) { + asm volatile ( + "sub %1,%2 \n" + "1: \n" + "movq (%1),%%xmm2 \n" + "movq (%1,%2,1),%%xmm3 \n" + "lea 0x8(%1),%1 \n" + "punpcklbw %%xmm3,%%xmm2 \n" + "movdqa (%0),%%xmm0 \n" + "lea 0x10(%0),%0 \n" + "movdqa %%xmm0,%%xmm1 \n" + "punpcklbw %%xmm2,%%xmm0 \n" + "punpckhbw %%xmm2,%%xmm1 \n" + "movdqa %%xmm0,(%3) \n" + "movdqa %%xmm1,0x10(%3) \n" + "lea 0x20(%3),%3 \n" + "sub $0x10,%4 \n" + "ja 1b \n" + : "+r"(src_y), // %0 + "+r"(src_u), // %1 + "+r"(src_v), // %2 + "+r"(dst_frame), // %3 + "+rm"(width) // %4 + : + : "memory", "cc" +#if defined(__SSE2__) + , "xmm0", "xmm1", "xmm2", "xmm3" +#endif + ); +} + +#define HAS_I42XTOUYVYROW_SSE2 +static void I42xToUYVYRow_SSE2(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_frame, int width) { + asm volatile ( + "sub %1,%2 \n" + "1: \n" + "movq (%1),%%xmm2 \n" + "movq (%1,%2,1),%%xmm3 \n" + "lea 0x8(%1),%1 \n" + "punpcklbw %%xmm3,%%xmm2 \n" + "movdqa (%0),%%xmm0 \n" + "movdqa %%xmm2,%%xmm1 \n" + "lea 0x10(%0),%0 \n" + "punpcklbw %%xmm0,%%xmm1 \n" + "punpckhbw %%xmm0,%%xmm2 \n" + "movdqa %%xmm1,(%3) \n" + "movdqa %%xmm2,0x10(%3) \n" + "lea 0x20(%3),%3 \n" + "sub $0x10,%4 \n" + "ja 1b \n" + : "+r"(src_y), // %0 + "+r"(src_u), // %1 + "+r"(src_v), // %2 + "+r"(dst_frame), // %3 + "+rm"(width) // %4 + : + : "memory", "cc" +#if defined(__SSE2__) + , "xmm0", "xmm1", "xmm2", "xmm3" +#endif + ); +} +#endif + +static void I42xToYUY2Row_C(const uint8* src_y, const uint8* src_u, const uint8* src_v, + uint8* dst_frame, int width) { + for (int x = 0; x < width - 1; x += 2) { + dst_frame[0] = src_y[0]; + dst_frame[1] = src_u[0]; + dst_frame[2] = src_y[1]; + dst_frame[3] = src_v[0]; + dst_frame += 4; + src_y += 2; + src_u += 1; + src_v += 1; + } + if (width & 1) { + dst_frame[0] = src_y[0]; + dst_frame[1] = src_u[0]; + dst_frame[2] = src_y[0]; // duplicate last y + dst_frame[3] = src_v[0]; + } +} + +static void I42xToUYVYRow_C(const uint8* src_y, const uint8* src_u, const uint8* src_v, + uint8* dst_frame, int width) { + for (int x = 0; x < width - 1; x += 2) { + dst_frame[0] = src_u[0]; + dst_frame[1] = src_y[0]; + dst_frame[2] = src_v[0]; + dst_frame[3] = src_y[1]; + dst_frame += 4; + src_y += 2; + src_u += 1; + src_v += 1; + } + if (width & 1) { + dst_frame[0] = src_u[0]; + dst_frame[1] = src_y[0]; + dst_frame[2] = src_v[0]; + dst_frame[3] = src_y[0]; // duplicate last y + } +} + +// gcc provided macros +#if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && defined(__BIG_ENDIAN) +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define LIBYUV_LITTLE_ENDIAN +#endif +// Visual C for x86 defines these +#elif defined(_M_X64) || defined(_M_IX86) +#define LIBYUV_LITTLE_ENDIAN +#endif + +#ifdef LIBYUV_LITTLE_ENDIAN +#define WRITEWORD(p, v) (*((uint32*) (p))) = v +#else + +static inline void WRITEWORD(uint8* p, uint32 v) { + p[0] = (uint8)(v & 255); + p[1] = (uint8)((v >> 8) & 255); + p[2] = (uint8)((v >> 16) & 255); + p[3] = (uint8)((v >> 24) & 255); +} +#endif + +#define EIGHTTOTEN(x) (x << 2 | x >> 6) +static void UYVYToV210Row_C(const uint8* src_uyvy, uint8* dst_v210, int width) { + for (int x = 0; x < width; x += 6) { + WRITEWORD(dst_v210 + 0, (EIGHTTOTEN(src_uyvy[0])) | + (EIGHTTOTEN(src_uyvy[1]) << 10) | + (EIGHTTOTEN(src_uyvy[2]) << 20)); + WRITEWORD(dst_v210 + 4, (EIGHTTOTEN(src_uyvy[3])) | + (EIGHTTOTEN(src_uyvy[4]) << 10) | + (EIGHTTOTEN(src_uyvy[5]) << 20)); + WRITEWORD(dst_v210 + 8, (EIGHTTOTEN(src_uyvy[6])) | + (EIGHTTOTEN(src_uyvy[7]) << 10) | + (EIGHTTOTEN(src_uyvy[8]) << 20)); + WRITEWORD(dst_v210 + 12, (EIGHTTOTEN(src_uyvy[9])) | + (EIGHTTOTEN(src_uyvy[10]) << 10) | + (EIGHTTOTEN(src_uyvy[11]) << 20)); + src_uyvy += 12; + dst_v210 += 16; + } +} + +// TODO(fbarchard): Deprecate, move or expand 422 support? +int I422ToYUY2(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_frame, int dst_stride_frame, + int width, int height) { + if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_frame = dst_frame + (height - 1) * dst_stride_frame; + dst_stride_frame = -dst_stride_frame; + } + void (*I42xToYUY2Row)(const uint8* src_y, const uint8* src_u, + const uint8* src_v, uint8* dst_frame, int width); + I42xToYUY2Row = I42xToYUY2Row_C; +#if defined(HAS_I42XTOYUY2ROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2) && + IS_ALIGNED(width, 16) && + IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) && + IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) { + I42xToYUY2Row = I42xToYUY2Row_SSE2; + } +#endif + + for (int y = 0; y < height; ++y) { + I42xToYUY2Row(src_y, src_u, src_y, dst_frame, width); + src_y += src_stride_y; + src_u += src_stride_u; + src_v += src_stride_v; + dst_frame += dst_stride_frame; + } + return 0; +} + +int I420ToYUY2(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_frame, int dst_stride_frame, + int width, int height) { + if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_frame = dst_frame + (height - 1) * dst_stride_frame; + dst_stride_frame = -dst_stride_frame; + } + void (*I42xToYUY2Row)(const uint8* src_y, const uint8* src_u, + const uint8* src_v, uint8* dst_frame, int width); + I42xToYUY2Row = I42xToYUY2Row_C; +#if defined(HAS_I42XTOYUY2ROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2) && + IS_ALIGNED(width, 16) && + IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) && + IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) { + I42xToYUY2Row = I42xToYUY2Row_SSE2; + } +#endif + + for (int y = 0; y < height - 1; y += 2) { + I42xToYUY2Row(src_y, src_u, src_v, dst_frame, width); + I42xToYUY2Row(src_y + src_stride_y, src_u, src_v, + dst_frame + dst_stride_frame, width); + src_y += src_stride_y * 2; + src_u += src_stride_u; + src_v += src_stride_v; + dst_frame += dst_stride_frame * 2; + } + if (height & 1) { + I42xToYUY2Row(src_y, src_u, src_v, dst_frame, width); + } + return 0; +} + +// TODO(fbarchard): Deprecate, move or expand 422 support? +int I422ToUYVY(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_frame, int dst_stride_frame, + int width, int height) { + if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_frame = dst_frame + (height - 1) * dst_stride_frame; + dst_stride_frame = -dst_stride_frame; + } + void (*I42xToUYVYRow)(const uint8* src_y, const uint8* src_u, + const uint8* src_v, uint8* dst_frame, int width); + I42xToUYVYRow = I42xToUYVYRow_C; +#if defined(HAS_I42XTOUYVYROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2) && + IS_ALIGNED(width, 16) && + IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) && + IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) { + I42xToUYVYRow = I42xToUYVYRow_SSE2; + } +#endif + + for (int y = 0; y < height; ++y) { + I42xToUYVYRow(src_y, src_u, src_y, dst_frame, width); + src_y += src_stride_y; + src_u += src_stride_u; + src_v += src_stride_v; + dst_frame += dst_stride_frame; + } + return 0; +} + +int I420ToUYVY(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_frame, int dst_stride_frame, + int width, int height) { + if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_frame = dst_frame + (height - 1) * dst_stride_frame; + dst_stride_frame = -dst_stride_frame; + } + void (*I42xToUYVYRow)(const uint8* src_y, const uint8* src_u, + const uint8* src_v, uint8* dst_frame, int width); + I42xToUYVYRow = I42xToUYVYRow_C; +#if defined(HAS_I42XTOUYVYROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2) && + IS_ALIGNED(width, 16) && + IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) && + IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) { + I42xToUYVYRow = I42xToUYVYRow_SSE2; + } +#endif + + for (int y = 0; y < height - 1; y += 2) { + I42xToUYVYRow(src_y, src_u, src_v, dst_frame, width); + I42xToUYVYRow(src_y + src_stride_y, src_u, src_v, + dst_frame + dst_stride_frame, width); + src_y += src_stride_y * 2; + src_u += src_stride_u; + src_v += src_stride_v; + dst_frame += dst_stride_frame * 2; + } + if (height & 1) { + I42xToUYVYRow(src_y, src_u, src_v, dst_frame, width); + } + return 0; +} + +int I420ToV210(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_frame, int dst_stride_frame, + int width, int height) { + if (width * 16 / 6 > kMaxStride || // row buffer of V210 is required + src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_frame = dst_frame + (height - 1) * dst_stride_frame; + dst_stride_frame = -dst_stride_frame; + } + + SIMD_ALIGNED(uint8 row[kMaxStride]); + void (*UYVYToV210Row)(const uint8* src_uyvy, uint8* dst_v210, int pix); + UYVYToV210Row = UYVYToV210Row_C; + + void (*I42xToUYVYRow)(const uint8* src_y, const uint8* src_u, + const uint8* src_v, uint8* dst_frame, int width); + I42xToUYVYRow = I42xToUYVYRow_C; +#if defined(HAS_I42XTOUYVYROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2) && + IS_ALIGNED(width, 16) && + IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16)) { + I42xToUYVYRow = I42xToUYVYRow_SSE2; + } +#endif + + for (int y = 0; y < height - 1; y += 2) { + I42xToUYVYRow(src_y, src_u, src_v, row, width); + UYVYToV210Row(row, dst_frame, width); + I42xToUYVYRow(src_y + src_stride_y, src_u, src_v, row, width); + UYVYToV210Row(row, dst_frame + dst_stride_frame, width); + + src_y += src_stride_y * 2; + src_u += src_stride_u; + src_v += src_stride_v; + dst_frame += dst_stride_frame * 2; + } + if (height & 1) { + I42xToUYVYRow(src_y, src_u, src_v, row, width); + UYVYToV210Row(row, dst_frame, width); + } + return 0; +} + +// Convert I420 to ARGB. +int I420ToARGB(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_argb, int dst_stride_argb, + int width, int height) { + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; + dst_stride_argb = -dst_stride_argb; + } + void (*I420ToARGBRow)(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* rgb_buf, + int width); +#if defined(HAS_I420TOARGBROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + I420ToARGBRow = I420ToARGBRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + I420ToARGBRow = I420ToARGBRow_NEON; + } + } else +#elif defined(HAS_I420TOARGBROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + I420ToARGBRow = I420ToARGBRow_Any_SSSE3; + if (IS_ALIGNED(width, 8) && + IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { + I420ToARGBRow = I420ToARGBRow_SSSE3; + } + } else +#endif + { + I420ToARGBRow = I420ToARGBRow_C; + } + for (int y = 0; y < height; ++y) { + I420ToARGBRow(src_y, src_u, src_v, dst_argb, width); + dst_argb += dst_stride_argb; + src_y += src_stride_y; + if (y & 1) { + src_u += src_stride_u; + src_v += src_stride_v; + } + } + return 0; +} + +// Convert I420 to BGRA. +int I420ToBGRA(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_bgra, int dst_stride_bgra, + int width, int height) { + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_bgra = dst_bgra + (height - 1) * dst_stride_bgra; + dst_stride_bgra = -dst_stride_bgra; + } + void (*I420ToBGRARow)(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* rgb_buf, + int width); +#if defined(HAS_I420TOBGRAROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + I420ToBGRARow = I420ToBGRARow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + I420ToBGRARow = I420ToBGRARow_NEON; + } + } else +#elif defined(HAS_I420TOBGRAROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + I420ToBGRARow = I420ToBGRARow_Any_SSSE3; + if (IS_ALIGNED(width, 8) && + IS_ALIGNED(dst_bgra, 16) && IS_ALIGNED(dst_stride_bgra, 16)) { + I420ToBGRARow = I420ToBGRARow_SSSE3; + } + } else +#endif + { + I420ToBGRARow = I420ToBGRARow_C; + } + for (int y = 0; y < height; ++y) { + I420ToBGRARow(src_y, src_u, src_v, dst_bgra, width); + dst_bgra += dst_stride_bgra; + src_y += src_stride_y; + if (y & 1) { + src_u += src_stride_u; + src_v += src_stride_v; + } + } + return 0; +} + +// Convert I420 to ABGR. +int I420ToABGR(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_abgr, int dst_stride_abgr, + int width, int height) { + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr; + dst_stride_abgr = -dst_stride_abgr; + } + void (*I420ToABGRRow)(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* rgb_buf, + int width); +#if defined(HAS_I420TOABGRROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + I420ToABGRRow = I420ToABGRRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + I420ToABGRRow = I420ToABGRRow_NEON; + } + } else +#elif defined(HAS_I420TOABGRROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + I420ToABGRRow = I420ToABGRRow_Any_SSSE3; + if (IS_ALIGNED(width, 8) && + IS_ALIGNED(dst_abgr, 16) && IS_ALIGNED(dst_stride_abgr, 16)) { + I420ToABGRRow = I420ToABGRRow_SSSE3; + } + } else +#endif + { + I420ToABGRRow = I420ToABGRRow_C; + } + for (int y = 0; y < height; ++y) { + I420ToABGRRow(src_y, src_u, src_v, dst_abgr, width); + dst_abgr += dst_stride_abgr; + src_y += src_stride_y; + if (y & 1) { + src_u += src_stride_u; + src_v += src_stride_v; + } + } + return 0; +} + +// Convert I420 to RGB24. +int I420ToRGB24(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_argb, int dst_stride_argb, + int width, int height) { + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; + dst_stride_argb = -dst_stride_argb; + } + void (*I420ToARGBRow)(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* rgb_buf, + int width); +#if defined(HAS_I420TOARGBROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + I420ToARGBRow = I420ToARGBRow_NEON; + } else +#elif defined(HAS_I420TOARGBROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + I420ToARGBRow = I420ToARGBRow_SSSE3; + } else +#endif + { + I420ToARGBRow = I420ToARGBRow_C; + } + + SIMD_ALIGNED(uint8 row[kMaxStride]); + void (*ARGBToRGB24Row)(const uint8* src_argb, uint8* dst_rgb, int pix); +#if defined(HAS_ARGBTORGB24ROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToRGB24Row = ARGBToRGB24Row_Any_SSSE3; + if (IS_ALIGNED(width, 16) && + IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { + ARGBToRGB24Row = ARGBToRGB24Row_SSSE3; + } + } else +#endif + { + ARGBToRGB24Row = ARGBToRGB24Row_C; + } + + for (int y = 0; y < height; ++y) { + I420ToARGBRow(src_y, src_u, src_v, row, width); + ARGBToRGB24Row(row, dst_argb, width); + dst_argb += dst_stride_argb; + src_y += src_stride_y; + if (y & 1) { + src_u += src_stride_u; + src_v += src_stride_v; + } + } + return 0; +} + +// Convert I420 to RAW. +int I420ToRAW(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_argb, int dst_stride_argb, + int width, int height) { + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; + dst_stride_argb = -dst_stride_argb; + } + void (*I420ToARGBRow)(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* rgb_buf, + int width); +#if defined(HAS_I420TOARGBROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + I420ToARGBRow = I420ToARGBRow_NEON; + } else +#elif defined(HAS_I420TOARGBROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + I420ToARGBRow = I420ToARGBRow_SSSE3; + } else +#endif + { + I420ToARGBRow = I420ToARGBRow_C; + } + + SIMD_ALIGNED(uint8 row[kMaxStride]); + void (*ARGBToRAWRow)(const uint8* src_argb, uint8* dst_rgb, int pix); +#if defined(HAS_ARGBTORAWROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToRAWRow = ARGBToRAWRow_Any_SSSE3; + if (IS_ALIGNED(width, 16) && + IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { + ARGBToRAWRow = ARGBToRAWRow_SSSE3; + } + } else +#endif + { + ARGBToRAWRow = ARGBToRAWRow_C; + } + + for (int y = 0; y < height; ++y) { + I420ToARGBRow(src_y, src_u, src_v, row, width); + ARGBToRAWRow(row, dst_argb, width); + dst_argb += dst_stride_argb; + src_y += src_stride_y; + if (y & 1) { + src_u += src_stride_u; + src_v += src_stride_v; + } + } + return 0; +} + +// Convert I420 to RGB565. +int I420ToRGB565(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_rgb, int dst_stride_rgb, + int width, int height) { + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_rgb = dst_rgb + (height - 1) * dst_stride_rgb; + dst_stride_rgb = -dst_stride_rgb; + } + void (*I420ToARGBRow)(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* rgb_buf, + int width); +#if defined(HAS_I420TOARGBROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + I420ToARGBRow = I420ToARGBRow_NEON; + } else +#elif defined(HAS_I420TOARGBROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + I420ToARGBRow = I420ToARGBRow_SSSE3; + } else +#endif + { + I420ToARGBRow = I420ToARGBRow_C; + } + + SIMD_ALIGNED(uint8 row[kMaxStride]); + void (*ARGBToRGB565Row)(const uint8* src_rgb, uint8* dst_rgb, int pix); +#if defined(HAS_ARGBTORGB565ROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + ARGBToRGB565Row = ARGBToRGB565Row_Any_SSE2; + if (IS_ALIGNED(width, 4)) { + ARGBToRGB565Row = ARGBToRGB565Row_SSE2; + } + } else +#endif + { + ARGBToRGB565Row = ARGBToRGB565Row_C; + } + + for (int y = 0; y < height; ++y) { + I420ToARGBRow(src_y, src_u, src_v, row, width); + ARGBToRGB565Row(row, dst_rgb, width); + dst_rgb += dst_stride_rgb; + src_y += src_stride_y; + if (y & 1) { + src_u += src_stride_u; + src_v += src_stride_v; + } + } + return 0; +} + +// Convert I420 to ARGB1555. +int I420ToARGB1555(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_argb, int dst_stride_argb, + int width, int height) { + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; + dst_stride_argb = -dst_stride_argb; + } + void (*I420ToARGBRow)(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* rgb_buf, + int width); +#if defined(HAS_I420TOARGBROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + I420ToARGBRow = I420ToARGBRow_NEON; + } else +#elif defined(HAS_I420TOARGBROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + I420ToARGBRow = I420ToARGBRow_SSSE3; + } else +#endif + { + I420ToARGBRow = I420ToARGBRow_C; + } + + SIMD_ALIGNED(uint8 row[kMaxStride]); + void (*ARGBToARGB1555Row)(const uint8* src_argb, uint8* dst_rgb, int pix); +#if defined(HAS_ARGBTOARGB1555ROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + ARGBToARGB1555Row = ARGBToARGB1555Row_Any_SSE2; + if (IS_ALIGNED(width, 4)) { + ARGBToARGB1555Row = ARGBToARGB1555Row_SSE2; + } + } else +#endif + { + ARGBToARGB1555Row = ARGBToARGB1555Row_C; + } + + for (int y = 0; y < height; ++y) { + I420ToARGBRow(src_y, src_u, src_v, row, width); + ARGBToARGB1555Row(row, dst_argb, width); + dst_argb += dst_stride_argb; + src_y += src_stride_y; + if (y & 1) { + src_u += src_stride_u; + src_v += src_stride_v; + } + } + return 0; +} + +// Convert I420 to ARGB4444. +int I420ToARGB4444(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_argb, int dst_stride_argb, + int width, int height) { + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; + dst_stride_argb = -dst_stride_argb; + } + void (*I420ToARGBRow)(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* rgb_buf, + int width); +#if defined(HAS_I420TOARGBROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + I420ToARGBRow = I420ToARGBRow_NEON; + } else +#elif defined(HAS_I420TOARGBROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + I420ToARGBRow = I420ToARGBRow_SSSE3; + } else +#endif + { + I420ToARGBRow = I420ToARGBRow_C; + } + + SIMD_ALIGNED(uint8 row[kMaxStride]); + void (*ARGBToARGB4444Row)(const uint8* src_argb, uint8* dst_rgb, int pix); +#if defined(HAS_ARGBTOARGB4444ROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + ARGBToARGB4444Row = ARGBToARGB4444Row_Any_SSE2; + if (IS_ALIGNED(width, 4)) { + ARGBToARGB4444Row = ARGBToARGB4444Row_SSE2; + } + } else +#endif + { + ARGBToARGB4444Row = ARGBToARGB4444Row_C; + } + + for (int y = 0; y < height; ++y) { + I420ToARGBRow(src_y, src_u, src_v, row, width); + ARGBToARGB4444Row(row, dst_argb, width); + dst_argb += dst_stride_argb; + src_y += src_stride_y; + if (y & 1) { + src_u += src_stride_u; + src_v += src_stride_v; + } + } + return 0; +} + +// Convert I420 to specified format +int ConvertFromI420(const uint8* y, int y_stride, + const uint8* u, int u_stride, + const uint8* v, int v_stride, + uint8* dst_sample, int dst_sample_stride, + int width, int height, + uint32 format) { + + if (y == NULL || u == NULL || v == NULL || dst_sample == NULL) { + return -1; + } + switch (format) { + // Single plane formats + case FOURCC_YUY2: + I420ToYUY2(y, y_stride, + u, u_stride, + v, v_stride, + dst_sample, + dst_sample_stride ? dst_sample_stride : width * 2, + width, height); + break; + case FOURCC_UYVY: + I420ToUYVY(y, y_stride, + u, u_stride, + v, v_stride, + dst_sample, + dst_sample_stride ? dst_sample_stride : width * 2, + width, height); + break; + case FOURCC_V210: + I420ToV210(y, y_stride, + u, u_stride, + v, v_stride, + dst_sample, + dst_sample_stride ? dst_sample_stride : + (width + 47) / 48 * 128, + width, height); + break; + case FOURCC_RGBP: + I420ToRGB565(y, y_stride, + u, u_stride, + v, v_stride, + dst_sample, + dst_sample_stride ? dst_sample_stride : width * 2, + width, height); + break; + case FOURCC_RGBO: + I420ToARGB1555(y, y_stride, + u, u_stride, + v, v_stride, + dst_sample, + dst_sample_stride ? dst_sample_stride : width * 2, + width, height); + break; + case FOURCC_R444: + I420ToARGB4444(y, y_stride, + u, u_stride, + v, v_stride, + dst_sample, + dst_sample_stride ? dst_sample_stride : width * 2, + width, height); + break; + case FOURCC_24BG: + I420ToRGB24(y, y_stride, + u, u_stride, + v, v_stride, + dst_sample, + dst_sample_stride ? dst_sample_stride : width * 3, + width, height); + break; + case FOURCC_RAW: + I420ToRAW(y, y_stride, + u, u_stride, + v, v_stride, + dst_sample, + dst_sample_stride ? dst_sample_stride : width * 3, + width, height); + break; + case FOURCC_ARGB: + I420ToARGB(y, y_stride, + u, u_stride, + v, v_stride, + dst_sample, + dst_sample_stride ? dst_sample_stride : width * 4, + width, height); + break; + case FOURCC_BGRA: + I420ToBGRA(y, y_stride, + u, u_stride, + v, v_stride, + dst_sample, + dst_sample_stride ? dst_sample_stride : width * 4, + width, height); + break; + case FOURCC_ABGR: + I420ToABGR(y, y_stride, + u, u_stride, + v, v_stride, + dst_sample, + dst_sample_stride ? dst_sample_stride : width * 4, + width, height); + break; + case FOURCC_BGGR: + I420ToBayerBGGR(y, y_stride, + u, u_stride, + v, v_stride, + dst_sample, + dst_sample_stride ? dst_sample_stride : width, + width, height); + break; + case FOURCC_GBRG: + I420ToBayerGBRG(y, y_stride, + u, u_stride, + v, v_stride, + dst_sample, + dst_sample_stride ? dst_sample_stride : width, + width, height); + break; + case FOURCC_GRBG: + I420ToBayerGRBG(y, y_stride, + u, u_stride, + v, v_stride, + dst_sample, + dst_sample_stride ? dst_sample_stride : width, + width, height); + break; + case FOURCC_RGGB: + I420ToBayerRGGB(y, y_stride, + u, u_stride, + v, v_stride, + dst_sample, + dst_sample_stride ? dst_sample_stride : width, + width, height); + break; + case FOURCC_I400: + I400Copy(y, y_stride, + dst_sample, + dst_sample_stride ? dst_sample_stride : width, + width, height); + break; + // Triplanar formats + // TODO(fbarchard): halfstride instead of halfwidth + case FOURCC_I420: + case FOURCC_YV12: { + int halfwidth = (width + 1) / 2; + int halfheight = (height + 1) / 2; + uint8* dst_u; + uint8* dst_v; + if (format == FOURCC_I420) { + dst_u = dst_sample + width * height; + dst_v = dst_u + halfwidth * halfheight; + } else { + dst_v = dst_sample + width * height; + dst_u = dst_v + halfwidth * halfheight; + } + I420Copy(y, y_stride, + u, u_stride, + v, v_stride, + dst_sample, width, + dst_u, halfwidth, + dst_v, halfwidth, + width, height); + break; + } + case FOURCC_I422: + case FOURCC_YV16: { + int halfwidth = (width + 1) / 2; + uint8* dst_u; + uint8* dst_v; + if (format == FOURCC_I422) { + dst_u = dst_sample + width * height; + dst_v = dst_u + halfwidth * height; + } else { + dst_v = dst_sample + width * height; + dst_u = dst_v + halfwidth * height; + } + I420ToI422(y, y_stride, + u, u_stride, + v, v_stride, + dst_sample, width, + dst_u, halfwidth, + dst_v, halfwidth, + width, height); + break; + } + case FOURCC_I444: + case FOURCC_YV24: { + uint8* dst_u; + uint8* dst_v; + if (format == FOURCC_I444) { + dst_u = dst_sample + width * height; + dst_v = dst_u + width * height; + } else { + dst_v = dst_sample + width * height; + dst_u = dst_v + width * height; + } + I420ToI444(y, y_stride, + u, u_stride, + v, v_stride, + dst_sample, width, + dst_u, width, + dst_v, width, + width, height); + break; + } + + // Formats not supported - MJPG, biplanar, some rgb formats. + default: + return -1; // unknown fourcc - return failure code. + } + return 0; +} + +#ifdef __cplusplus +} // extern "C" +} // namespace libyuv +#endif diff --git a/source/convertfrom.cc b/source/convertfrom.cc deleted file mode 100644 index 22aaec655..000000000 --- a/source/convertfrom.cc +++ /dev/null @@ -1,239 +0,0 @@ -/* - * Copyright (c) 2011 The LibYuv project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/convert.h" - -#include "libyuv/basic_types.h" -#include "libyuv/format_conversion.h" -#include "libyuv/planar_functions.h" -#include "libyuv/video_common.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -// Convert I420 to specified format -int ConvertFromI420(const uint8* y, int y_stride, - const uint8* u, int u_stride, - const uint8* v, int v_stride, - uint8* dst_sample, int dst_sample_stride, - int width, int height, - uint32 format) { - - if (y == NULL || u == NULL || v == NULL || dst_sample == NULL) { - return -1; - } - switch (format) { - // Single plane formats - case FOURCC_YUY2: - I420ToYUY2(y, y_stride, - u, u_stride, - v, v_stride, - dst_sample, - dst_sample_stride ? dst_sample_stride : width * 2, - width, height); - break; - case FOURCC_UYVY: - I420ToUYVY(y, y_stride, - u, u_stride, - v, v_stride, - dst_sample, - dst_sample_stride ? dst_sample_stride : width * 2, - width, height); - break; - case FOURCC_V210: - I420ToV210(y, y_stride, - u, u_stride, - v, v_stride, - dst_sample, - dst_sample_stride ? dst_sample_stride : - (width + 47) / 48 * 128, - width, height); - break; - case FOURCC_RGBP: - I420ToRGB565(y, y_stride, - u, u_stride, - v, v_stride, - dst_sample, - dst_sample_stride ? dst_sample_stride : width * 2, - width, height); - break; - case FOURCC_RGBO: - I420ToARGB1555(y, y_stride, - u, u_stride, - v, v_stride, - dst_sample, - dst_sample_stride ? dst_sample_stride : width * 2, - width, height); - break; - case FOURCC_R444: - I420ToARGB4444(y, y_stride, - u, u_stride, - v, v_stride, - dst_sample, - dst_sample_stride ? dst_sample_stride : width * 2, - width, height); - break; - case FOURCC_24BG: - I420ToRGB24(y, y_stride, - u, u_stride, - v, v_stride, - dst_sample, - dst_sample_stride ? dst_sample_stride : width * 3, - width, height); - break; - case FOURCC_RAW: - I420ToRAW(y, y_stride, - u, u_stride, - v, v_stride, - dst_sample, - dst_sample_stride ? dst_sample_stride : width * 3, - width, height); - break; - case FOURCC_ARGB: - I420ToARGB(y, y_stride, - u, u_stride, - v, v_stride, - dst_sample, - dst_sample_stride ? dst_sample_stride : width * 4, - width, height); - break; - case FOURCC_BGRA: - I420ToBGRA(y, y_stride, - u, u_stride, - v, v_stride, - dst_sample, - dst_sample_stride ? dst_sample_stride : width * 4, - width, height); - break; - case FOURCC_ABGR: - I420ToABGR(y, y_stride, - u, u_stride, - v, v_stride, - dst_sample, - dst_sample_stride ? dst_sample_stride : width * 4, - width, height); - break; - case FOURCC_BGGR: - I420ToBayerBGGR(y, y_stride, - u, u_stride, - v, v_stride, - dst_sample, - dst_sample_stride ? dst_sample_stride : width, - width, height); - break; - case FOURCC_GBRG: - I420ToBayerGBRG(y, y_stride, - u, u_stride, - v, v_stride, - dst_sample, - dst_sample_stride ? dst_sample_stride : width, - width, height); - break; - case FOURCC_GRBG: - I420ToBayerGRBG(y, y_stride, - u, u_stride, - v, v_stride, - dst_sample, - dst_sample_stride ? dst_sample_stride : width, - width, height); - break; - case FOURCC_RGGB: - I420ToBayerRGGB(y, y_stride, - u, u_stride, - v, v_stride, - dst_sample, - dst_sample_stride ? dst_sample_stride : width, - width, height); - break; - case FOURCC_I400: - I400Copy(y, y_stride, - dst_sample, - dst_sample_stride ? dst_sample_stride : width, - width, height); - break; - // Triplanar formats - // TODO(fbarchard): halfstride instead of halfwidth - case FOURCC_I420: - case FOURCC_YV12: { - int halfwidth = (width + 1) / 2; - int halfheight = (height + 1) / 2; - uint8* dst_u; - uint8* dst_v; - if (format == FOURCC_I420) { - dst_u = dst_sample + width * height; - dst_v = dst_u + halfwidth * halfheight; - } else { - dst_v = dst_sample + width * height; - dst_u = dst_v + halfwidth * halfheight; - } - I420Copy(y, y_stride, - u, u_stride, - v, v_stride, - dst_sample, width, - dst_u, halfwidth, - dst_v, halfwidth, - width, height); - break; - } - case FOURCC_I422: - case FOURCC_YV16: { - int halfwidth = (width + 1) / 2; - uint8* dst_u; - uint8* dst_v; - if (format == FOURCC_I422) { - dst_u = dst_sample + width * height; - dst_v = dst_u + halfwidth * height; - } else { - dst_v = dst_sample + width * height; - dst_u = dst_v + halfwidth * height; - } - I420ToI422(y, y_stride, - u, u_stride, - v, v_stride, - dst_sample, width, - dst_u, halfwidth, - dst_v, halfwidth, - width, height); - break; - } - case FOURCC_I444: - case FOURCC_YV24: { - uint8* dst_u; - uint8* dst_v; - if (format == FOURCC_I444) { - dst_u = dst_sample + width * height; - dst_v = dst_u + width * height; - } else { - dst_v = dst_sample + width * height; - dst_u = dst_v + width * height; - } - I420ToI444(y, y_stride, - u, u_stride, - v, v_stride, - dst_sample, width, - dst_u, width, - dst_v, width, - width, height); - break; - } - - // Formats not supported - MJPG, biplanar, some rgb formats. - default: - return -1; // unknown fourcc - return failure code. - } - return 0; -} - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif diff --git a/source/planar_functions.cc b/source/planar_functions.cc index 23a169f94..a48e96eaa 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -20,116 +20,6 @@ namespace libyuv { extern "C" { #endif -#if defined(__ARM_NEON__) && !defined(YUV_DISABLE_ASM) -#define HAS_SPLITUV_NEON -// Reads 16 pairs of UV and write even values to dst_u and odd to dst_v -// Alignment requirement: 16 bytes for pointers, and multiple of 16 pixels. -static void SplitUV_NEON(const uint8* src_uv, - uint8* dst_u, uint8* dst_v, int pix) { - asm volatile ( - "1: \n" - "vld2.u8 {q0,q1}, [%0]! \n" // load 16 pairs of UV - "subs %3, %3, #16 \n" // 16 processed per loop - "vst1.u8 {q0}, [%1]! \n" // store U - "vst1.u8 {q1}, [%2]! \n" // Store V - "bhi 1b \n" - : "+r"(src_uv), - "+r"(dst_u), - "+r"(dst_v), - "+r"(pix) // Output registers - : // Input registers - : "memory", "cc", "q0", "q1" // Clobber List - ); -} - -#elif defined(_M_IX86) && !defined(YUV_DISABLE_ASM) -#define HAS_SPLITUV_SSE2 -__declspec(naked) -static void SplitUV_SSE2(const uint8* src_uv, - uint8* dst_u, uint8* dst_v, int pix) { - __asm { - push edi - mov eax, [esp + 4 + 4] // src_uv - mov edx, [esp + 4 + 8] // dst_u - mov edi, [esp + 4 + 12] // dst_v - mov ecx, [esp + 4 + 16] // pix - pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff - psrlw xmm5, 8 - sub edi, edx - - convertloop: - movdqa xmm0, [eax] - movdqa xmm1, [eax + 16] - lea eax, [eax + 32] - movdqa xmm2, xmm0 - movdqa xmm3, xmm1 - pand xmm0, xmm5 // even bytes - pand xmm1, xmm5 - packuswb xmm0, xmm1 - psrlw xmm2, 8 // odd bytes - psrlw xmm3, 8 - packuswb xmm2, xmm3 - movdqa [edx], xmm0 - movdqa [edx + edi], xmm2 - lea edx, [edx + 16] - sub ecx, 16 - ja convertloop - pop edi - ret - } -} - -#elif (defined(__x86_64__) || defined(__i386__)) && !defined(YUV_DISABLE_ASM) -#define HAS_SPLITUV_SSE2 -static void SplitUV_SSE2(const uint8* src_uv, - uint8* dst_u, uint8* dst_v, int pix) { - asm volatile ( - "pcmpeqb %%xmm5,%%xmm5 \n" - "psrlw $0x8,%%xmm5 \n" - "sub %1,%2 \n" - -"1: \n" - "movdqa (%0),%%xmm0 \n" - "movdqa 0x10(%0),%%xmm1 \n" - "lea 0x20(%0),%0 \n" - "movdqa %%xmm0,%%xmm2 \n" - "movdqa %%xmm1,%%xmm3 \n" - "pand %%xmm5,%%xmm0 \n" - "pand %%xmm5,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "psrlw $0x8,%%xmm2 \n" - "psrlw $0x8,%%xmm3 \n" - "packuswb %%xmm3,%%xmm2 \n" - "movdqa %%xmm0,(%1) \n" - "movdqa %%xmm2,(%1,%2) \n" - "lea 0x10(%1),%1 \n" - "sub $0x10,%3 \n" - "ja 1b \n" - : "+r"(src_uv), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(pix) // %3 - : - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" -#endif -); -} -#endif - -static void SplitUV_C(const uint8* src_uv, - uint8* dst_u, uint8* dst_v, int pix) { - // Copy a row of UV. - for (int x = 0; x < pix; ++x) { - dst_u[0] = src_uv[0]; - dst_v[0] = src_uv[1]; - src_uv += 2; - dst_u += 1; - dst_v += 1; - } -} - // CopyRows copys 'count' bytes using a 16 byte load/store, 64 bytes at time #if defined(_M_IX86) && !defined(YUV_DISABLE_ASM) #define HAS_COPYROW_SSE2 @@ -243,41 +133,6 @@ void CopyPlane(const uint8* src_y, int src_stride_y, } } -// Copy I420 with optional flipping -int I420Copy(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - if (!src_y || !src_u || !src_v || - !dst_y || !dst_u || !dst_v || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - int halfheight = (height + 1) >> 1; - src_y = src_y + (height - 1) * src_stride_y; - src_u = src_u + (halfheight - 1) * src_stride_u; - src_v = src_v + (halfheight - 1) * src_stride_v; - src_stride_y = -src_stride_y; - src_stride_u = -src_stride_u; - src_stride_v = -src_stride_v; - } - - int halfwidth = (width + 1) >> 1; - int halfheight = (height + 1) >> 1; - if (dst_y) { - CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); - } - CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight); - CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight); - return 0; -} - // Mirror a plane of data void MirrorPlane(const uint8* src_y, int src_stride_y, uint8* dst_y, int dst_stride_y, @@ -367,998 +222,6 @@ int ARGBCopy(const uint8* src_argb, int src_stride_argb, return 0; } -#if defined(_M_IX86) && !defined(YUV_DISABLE_ASM) -#define HAS_HALFROW_SSE2 -__declspec(naked) -static void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride, - uint8* dst_uv, int pix) { - __asm { - push edi - mov eax, [esp + 4 + 4] // src_uv - mov edx, [esp + 4 + 8] // src_uv_stride - mov edi, [esp + 4 + 12] // dst_v - mov ecx, [esp + 4 + 16] // pix - sub edi, eax - - convertloop: - movdqa xmm0, [eax] - pavgb xmm0, [eax + edx] - movdqa [eax + edi], xmm0 - lea eax, [eax + 16] - sub ecx, 16 - ja convertloop - pop edi - ret - } -} - -#elif (defined(__x86_64__) || defined(__i386__)) && !defined(YUV_DISABLE_ASM) -#define HAS_HALFROW_SSE2 -static void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride, - uint8* dst_uv, int pix) { - asm volatile ( - "sub %0,%1 \n" -"1: \n" - "movdqa (%0),%%xmm0 \n" - "pavgb (%0,%3),%%xmm0 \n" - "movdqa %%xmm0,(%0,%1) \n" - "lea 0x10(%0),%0 \n" - "sub $0x10,%2 \n" - "ja 1b \n" - : "+r"(src_uv), // %0 - "+r"(dst_uv), // %1 - "+r"(pix) // %2 - : "r"(static_cast(src_uv_stride)) // %3 - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0" -#endif -); -} -#endif - -void HalfRow_C(const uint8* src_uv, int src_uv_stride, - uint8* dst_uv, int pix) { - for (int x = 0; x < pix; ++x) { - dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1; - } -} - -int I422ToI420(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_y = src_y + (height - 1) * src_stride_y; - src_u = src_u + (height - 1) * src_stride_u; - src_v = src_v + (height - 1) * src_stride_v; - src_stride_y = -src_stride_y; - src_stride_u = -src_stride_u; - src_stride_v = -src_stride_v; - } - int halfwidth = (width + 1) >> 1; - void (*HalfRow)(const uint8* src_uv, int src_uv_stride, - uint8* dst_uv, int pix); -#if defined(HAS_HALFROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && - IS_ALIGNED(halfwidth, 16) && - IS_ALIGNED(src_u, 16) && IS_ALIGNED(src_stride_u, 16) && - IS_ALIGNED(src_v, 16) && IS_ALIGNED(src_stride_v, 16) && - IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) && - IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) { - HalfRow = HalfRow_SSE2; - } else -#endif - { - HalfRow = HalfRow_C; - } - - // Copy Y plane - if (dst_y) { - CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); - } - - // SubSample U plane. - int y; - for (y = 0; y < height - 1; y += 2) { - HalfRow(src_u, src_stride_u, dst_u, halfwidth); - src_u += src_stride_u * 2; - dst_u += dst_stride_u; - } - if (height & 1) { - HalfRow(src_u, 0, dst_u, halfwidth); - } - - // SubSample V plane. - for (y = 0; y < height - 1; y += 2) { - HalfRow(src_v, src_stride_v, dst_v, halfwidth); - src_v += src_stride_v * 2; - dst_v += dst_stride_v; - } - if (height & 1) { - HalfRow(src_v, 0, dst_v, halfwidth); - } - return 0; -} - -int I420ToI422(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - // Negative height means invert the image. - if (height < 0) { - height = -height; - dst_y = dst_y + (height - 1) * dst_stride_y; - dst_u = dst_u + (height - 1) * dst_stride_u; - dst_v = dst_v + (height - 1) * dst_stride_v; - dst_stride_y = -dst_stride_y; - dst_stride_u = -dst_stride_u; - dst_stride_v = -dst_stride_v; - } - // Copy Y plane - if (dst_y) { - CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); - } - - int halfwidth = (width + 1) >> 1; - // UpSample U plane. - int y; - for (y = 0; y < height - 1; y += 2) { - memcpy(dst_u, src_u, halfwidth); - memcpy(dst_u + dst_stride_u, src_u, halfwidth); - src_u += src_stride_u; - dst_u += dst_stride_u * 2; - } - if (height & 1) { - memcpy(dst_u, src_u, halfwidth); - } - - // UpSample V plane. - for (y = 0; y < height - 1; y += 2) { - memcpy(dst_v, src_v, halfwidth); - memcpy(dst_v + dst_stride_v, src_v, halfwidth); - src_v += src_stride_v; - dst_v += dst_stride_v * 2; - } - if (height & 1) { - memcpy(dst_v, src_v, halfwidth); - } - return 0; -} - -// Blends 32x2 pixels to 16x1 -// source in scale.cc -#if defined(__ARM_NEON__) && !defined(YUV_DISABLE_ASM) -#define HAS_SCALEROWDOWN2_NEON -void ScaleRowDown2Int_NEON(const uint8* src_ptr, int src_stride, - uint8* dst, int dst_width); -#elif (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) && \ - !defined(YUV_DISABLE_ASM) -void ScaleRowDown2Int_SSE2(const uint8* src_ptr, int src_stride, - uint8* dst_ptr, int dst_width); -#endif -void ScaleRowDown2Int_C(const uint8* src_ptr, int src_stride, - uint8* dst_ptr, int dst_width); - -int I444ToI420(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_y = src_y + (height - 1) * src_stride_y; - src_u = src_u + (height - 1) * src_stride_u; - src_v = src_v + (height - 1) * src_stride_v; - src_stride_y = -src_stride_y; - src_stride_u = -src_stride_u; - src_stride_v = -src_stride_v; - } - int halfwidth = (width + 1) >> 1; - void (*ScaleRowDown2)(const uint8* src_ptr, int src_stride, - uint8* dst_ptr, int dst_width); -#if defined(HAS_SCALEROWDOWN2_NEON) - if (TestCpuFlag(kCpuHasNEON) && - IS_ALIGNED(halfwidth, 16)) { - ScaleRowDown2 = ScaleRowDown2Int_NEON; - } else -#endif -#if defined(HAS_SCALEROWDOWN2_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && - IS_ALIGNED(halfwidth, 16) && - IS_ALIGNED(src_u, 16) && IS_ALIGNED(src_stride_u, 16) && - IS_ALIGNED(src_v, 16) && IS_ALIGNED(src_stride_v, 16) && - IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) && - IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) { - ScaleRowDown2 = ScaleRowDown2Int_SSE2; -#endif - { - ScaleRowDown2 = ScaleRowDown2Int_C; - } - - // Copy Y plane - if (dst_y) { - CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); - } - - // SubSample U plane. - int y; - for (y = 0; y < height - 1; y += 2) { - ScaleRowDown2(src_u, src_stride_u, dst_u, halfwidth); - src_u += src_stride_u * 2; - dst_u += dst_stride_u; - } - if (height & 1) { - ScaleRowDown2(src_u, 0, dst_u, halfwidth); - } - - // SubSample V plane. - for (y = 0; y < height - 1; y += 2) { - ScaleRowDown2(src_v, src_stride_v, dst_v, halfwidth); - src_v += src_stride_v * 2; - dst_v += dst_stride_v; - } - if (height & 1) { - ScaleRowDown2(src_v, 0, dst_v, halfwidth); - } - return 0; -} - -// use Bilinear for upsampling chroma -void ScalePlaneBilinear(int src_width, int src_height, - int dst_width, int dst_height, - int src_stride, int dst_stride, - const uint8* src_ptr, uint8* dst_ptr); - -int I420ToI444(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - // Negative height means invert the image. - if (height < 0) { - height = -height; - dst_y = dst_y + (height - 1) * dst_stride_y; - dst_u = dst_u + (height - 1) * dst_stride_u; - dst_v = dst_v + (height - 1) * dst_stride_v; - dst_stride_y = -dst_stride_y; - dst_stride_u = -dst_stride_u; - dst_stride_v = -dst_stride_v; - } - - // Copy Y plane - if (dst_y) { - CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); - } - - int halfwidth = (width + 1) >> 1; - int halfheight = (height + 1) >> 1; - - // Upsample U plane. - ScalePlaneBilinear(halfwidth, halfheight, - width, height, - src_stride_u, - dst_stride_u, - src_u, dst_u); - - // Upsample V plane. - ScalePlaneBilinear(halfwidth, halfheight, - width, height, - src_stride_v, - dst_stride_v, - src_v, dst_v); - return 0; -} - - -static void CopyPlane2(const uint8* src, int src_stride_0, int src_stride_1, - uint8* dst, int dst_stride_frame, - int width, int height) { - // Copy plane - for (int y = 0; y < height; y += 2) { - memcpy(dst, src, width); - src += src_stride_0; - dst += dst_stride_frame; - memcpy(dst, src, width); - src += src_stride_1; - dst += dst_stride_frame; - } -} - -// Support converting from FOURCC_M420 -// Useful for bandwidth constrained transports like USB 1.0 and 2.0 and for -// easy conversion to I420. -// M420 format description: -// M420 is row biplanar 420: 2 rows of Y and 1 row of VU. -// Chroma is half width / half height. (420) -// src_stride_m420 is row planar. Normally this will be the width in pixels. -// The UV plane is half width, but 2 values, so src_stride_m420 applies to -// this as well as the two Y planes. -static int X420ToI420(const uint8* src_y, - int src_stride_y0, int src_stride_y1, - const uint8* src_uv, int src_stride_uv, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - // Negative height means invert the image. - if (height < 0) { - height = -height; - int halfheight = (height + 1) >> 1; - dst_y = dst_y + (height - 1) * dst_stride_y; - dst_u = dst_u + (halfheight - 1) * dst_stride_u; - dst_v = dst_v + (halfheight - 1) * dst_stride_v; - dst_stride_y = -dst_stride_y; - dst_stride_u = -dst_stride_u; - dst_stride_v = -dst_stride_v; - } - - int halfwidth = (width + 1) >> 1; - void (*SplitUV)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix); -#if defined(HAS_SPLITUV_NEON) - if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(halfwidth, 16)) { - SplitUV = SplitUV_NEON; - } else -#elif defined(HAS_SPLITUV_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && - IS_ALIGNED(halfwidth, 16) && - IS_ALIGNED(src_uv, 16) && IS_ALIGNED(src_stride_uv, 16) && - IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) && - IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) { - SplitUV = SplitUV_SSE2; - } else -#endif - { - SplitUV = SplitUV_C; - } - - if (dst_y) { - CopyPlane2(src_y, src_stride_y0, src_stride_y1, dst_y, dst_stride_y, - width, height); - } - - int halfheight = (height + 1) >> 1; - for (int y = 0; y < halfheight; ++y) { - // Copy a row of UV. - SplitUV(src_uv, dst_u, dst_v, halfwidth); - dst_u += dst_stride_u; - dst_v += dst_stride_v; - src_uv += src_stride_uv; - } - return 0; -} - -// Convert M420 to I420. -int M420ToI420(const uint8* src_m420, int src_stride_m420, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - return X420ToI420(src_m420, src_stride_m420, src_stride_m420 * 2, - src_m420 + src_stride_m420 * 2, src_stride_m420 * 3, - dst_y, dst_stride_y, - dst_u, dst_stride_u, - dst_v, dst_stride_v, - width, height); -} - -// Convert NV12 to I420. -int NV12ToI420(const uint8* src_y, int src_stride_y, - const uint8* src_uv, int src_stride_uv, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - return X420ToI420(src_y, src_stride_y, src_stride_y, - src_uv, src_stride_uv, - dst_y, dst_stride_y, - dst_u, dst_stride_u, - dst_v, dst_stride_v, - width, height); -} - -#if defined(_M_IX86) && !defined(YUV_DISABLE_ASM) -#define HAS_SPLITYUY2_SSE2 -__declspec(naked) -static void SplitYUY2_SSE2(const uint8* src_yuy2, - uint8* dst_y, uint8* dst_u, uint8* dst_v, int pix) { - __asm { - push esi - push edi - mov eax, [esp + 8 + 4] // src_yuy2 - mov edx, [esp + 8 + 8] // dst_y - mov esi, [esp + 8 + 12] // dst_u - mov edi, [esp + 8 + 16] // dst_v - mov ecx, [esp + 8 + 20] // pix - pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff - psrlw xmm5, 8 - - convertloop: - movdqa xmm0, [eax] - movdqa xmm1, [eax + 16] - lea eax, [eax + 32] - movdqa xmm2, xmm0 - movdqa xmm3, xmm1 - pand xmm2, xmm5 // even bytes are Y - pand xmm3, xmm5 - packuswb xmm2, xmm3 - movdqa [edx], xmm2 - lea edx, [edx + 16] - psrlw xmm0, 8 // YUYV -> UVUV - psrlw xmm1, 8 - packuswb xmm0, xmm1 - movdqa xmm1, xmm0 - pand xmm0, xmm5 // U - packuswb xmm0, xmm0 - movq qword ptr [esi], xmm0 - lea esi, [esi + 8] - psrlw xmm1, 8 // V - packuswb xmm1, xmm1 - movq qword ptr [edi], xmm1 - lea edi, [edi + 8] - sub ecx, 16 - ja convertloop - - pop edi - pop esi - ret - } -} - -#elif (defined(__x86_64__) || defined(__i386__)) && !defined(YUV_DISABLE_ASM) -#define HAS_SPLITYUY2_SSE2 -static void SplitYUY2_SSE2(const uint8* src_yuy2, uint8* dst_y, - uint8* dst_u, uint8* dst_v, int pix) { - asm volatile ( - "pcmpeqb %%xmm5,%%xmm5 \n" - "psrlw $0x8,%%xmm5 \n" -"1: \n" - "movdqa (%0),%%xmm0 \n" - "movdqa 0x10(%0),%%xmm1 \n" - "lea 0x20(%0),%0 \n" - "movdqa %%xmm0,%%xmm2 \n" - "movdqa %%xmm1,%%xmm3 \n" - "pand %%xmm5,%%xmm2 \n" - "pand %%xmm5,%%xmm3 \n" - "packuswb %%xmm3,%%xmm2 \n" - "movdqa %%xmm2,(%1) \n" - "lea 0x10(%1),%1 \n" - "psrlw $0x8,%%xmm0 \n" - "psrlw $0x8,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "pand %%xmm5,%%xmm0 \n" - "packuswb %%xmm0,%%xmm0 \n" - "movq %%xmm0,(%2) \n" - "lea 0x8(%2),%2 \n" - "psrlw $0x8,%%xmm1 \n" - "packuswb %%xmm1,%%xmm1 \n" - "movq %%xmm1,(%3) \n" - "lea 0x8(%3),%3 \n" - "sub $0x10,%4 \n" - "ja 1b \n" - : "+r"(src_yuy2), // %0 - "+r"(dst_y), // %1 - "+r"(dst_u), // %2 - "+r"(dst_v), // %3 - "+r"(pix) // %4 - : - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" -#endif -); -} -#endif - -static void SplitYUY2_C(const uint8* src_yuy2, - uint8* dst_y, uint8* dst_u, uint8* dst_v, int pix) { - // Copy a row of YUY2. - for (int x = 0; x < pix; x += 2) { - dst_y[0] = src_yuy2[0]; - dst_y[1] = src_yuy2[2]; - dst_u[0] = src_yuy2[1]; - dst_v[0] = src_yuy2[3]; - src_yuy2 += 4; - dst_y += 2; - dst_u += 1; - dst_v += 1; - } -} - -// Convert Q420 to I420. -// Format is rows of YY/YUYV -int Q420ToI420(const uint8* src_y, int src_stride_y, - const uint8* src_yuy2, int src_stride_yuy2, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - // Negative height means invert the image. - if (height < 0) { - height = -height; - int halfheight = (height + 1) >> 1; - dst_y = dst_y + (height - 1) * dst_stride_y; - dst_u = dst_u + (halfheight - 1) * dst_stride_u; - dst_v = dst_v + (halfheight - 1) * dst_stride_v; - dst_stride_y = -dst_stride_y; - dst_stride_u = -dst_stride_u; - dst_stride_v = -dst_stride_v; - } - void (*SplitYUY2)(const uint8* src_yuy2, - uint8* dst_y, uint8* dst_u, uint8* dst_v, int pix); -#if defined(HAS_SPLITYUY2_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && - IS_ALIGNED(width, 16) && - IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16) && - IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16) && - IS_ALIGNED(dst_u, 8) && IS_ALIGNED(dst_stride_u, 8) && - IS_ALIGNED(dst_v, 8) && IS_ALIGNED(dst_stride_v, 8)) { - SplitYUY2 = SplitYUY2_SSE2; - } else -#endif - { - SplitYUY2 = SplitYUY2_C; - } - for (int y = 0; y < height; y += 2) { - memcpy(dst_y, src_y, width); - dst_y += dst_stride_y; - src_y += src_stride_y; - - // Copy a row of YUY2. - SplitYUY2(src_yuy2, dst_y, dst_u, dst_v, width); - dst_y += dst_stride_y; - dst_u += dst_stride_u; - dst_v += dst_stride_v; - src_yuy2 += src_stride_yuy2; - } - return 0; -} - -// Convert I420 to ARGB. -int I420ToARGB(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { - // Negative height means invert the image. - if (height < 0) { - height = -height; - dst_argb = dst_argb + (height - 1) * dst_stride_argb; - dst_stride_argb = -dst_stride_argb; - } - void (*I420ToARGBRow)(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width); -#if defined(HAS_I420TOARGBROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - I420ToARGBRow = I420ToARGBRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - I420ToARGBRow = I420ToARGBRow_NEON; - } - } else -#elif defined(HAS_I420TOARGBROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - I420ToARGBRow = I420ToARGBRow_Any_SSSE3; - if (IS_ALIGNED(width, 8) && - IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { - I420ToARGBRow = I420ToARGBRow_SSSE3; - } - } else -#endif - { - I420ToARGBRow = I420ToARGBRow_C; - } - for (int y = 0; y < height; ++y) { - I420ToARGBRow(src_y, src_u, src_v, dst_argb, width); - dst_argb += dst_stride_argb; - src_y += src_stride_y; - if (y & 1) { - src_u += src_stride_u; - src_v += src_stride_v; - } - } - return 0; -} - -// Convert I420 to BGRA. -int I420ToBGRA(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_bgra, int dst_stride_bgra, - int width, int height) { - // Negative height means invert the image. - if (height < 0) { - height = -height; - dst_bgra = dst_bgra + (height - 1) * dst_stride_bgra; - dst_stride_bgra = -dst_stride_bgra; - } - void (*I420ToBGRARow)(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width); -#if defined(HAS_I420TOBGRAROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - I420ToBGRARow = I420ToBGRARow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - I420ToBGRARow = I420ToBGRARow_NEON; - } - } else -#elif defined(HAS_I420TOBGRAROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - I420ToBGRARow = I420ToBGRARow_Any_SSSE3; - if (IS_ALIGNED(width, 8) && - IS_ALIGNED(dst_bgra, 16) && IS_ALIGNED(dst_stride_bgra, 16)) { - I420ToBGRARow = I420ToBGRARow_SSSE3; - } - } else -#endif - { - I420ToBGRARow = I420ToBGRARow_C; - } - for (int y = 0; y < height; ++y) { - I420ToBGRARow(src_y, src_u, src_v, dst_bgra, width); - dst_bgra += dst_stride_bgra; - src_y += src_stride_y; - if (y & 1) { - src_u += src_stride_u; - src_v += src_stride_v; - } - } - return 0; -} - -// Convert I420 to ABGR. -int I420ToABGR(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_abgr, int dst_stride_abgr, - int width, int height) { - // Negative height means invert the image. - if (height < 0) { - height = -height; - dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr; - dst_stride_abgr = -dst_stride_abgr; - } - void (*I420ToABGRRow)(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width); -#if defined(HAS_I420TOABGRROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - I420ToABGRRow = I420ToABGRRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - I420ToABGRRow = I420ToABGRRow_NEON; - } - } else -#elif defined(HAS_I420TOABGRROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - I420ToABGRRow = I420ToABGRRow_Any_SSSE3; - if (IS_ALIGNED(width, 8) && - IS_ALIGNED(dst_abgr, 16) && IS_ALIGNED(dst_stride_abgr, 16)) { - I420ToABGRRow = I420ToABGRRow_SSSE3; - } - } else -#endif - { - I420ToABGRRow = I420ToABGRRow_C; - } - for (int y = 0; y < height; ++y) { - I420ToABGRRow(src_y, src_u, src_v, dst_abgr, width); - dst_abgr += dst_stride_abgr; - src_y += src_stride_y; - if (y & 1) { - src_u += src_stride_u; - src_v += src_stride_v; - } - } - return 0; -} - -// Convert I420 to RGB24. -int I420ToRGB24(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { - // Negative height means invert the image. - if (height < 0) { - height = -height; - dst_argb = dst_argb + (height - 1) * dst_stride_argb; - dst_stride_argb = -dst_stride_argb; - } - void (*I420ToARGBRow)(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width); -#if defined(HAS_I420TOARGBROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - I420ToARGBRow = I420ToARGBRow_NEON; - } else -#elif defined(HAS_I420TOARGBROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - I420ToARGBRow = I420ToARGBRow_SSSE3; - } else -#endif - { - I420ToARGBRow = I420ToARGBRow_C; - } - - SIMD_ALIGNED(uint8 row[kMaxStride]); - void (*ARGBToRGB24Row)(const uint8* src_argb, uint8* dst_rgb, int pix); -#if defined(HAS_ARGBTORGB24ROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToRGB24Row = ARGBToRGB24Row_Any_SSSE3; - if (IS_ALIGNED(width, 16) && - IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { - ARGBToRGB24Row = ARGBToRGB24Row_SSSE3; - } - } else -#endif - { - ARGBToRGB24Row = ARGBToRGB24Row_C; - } - - for (int y = 0; y < height; ++y) { - I420ToARGBRow(src_y, src_u, src_v, row, width); - ARGBToRGB24Row(row, dst_argb, width); - dst_argb += dst_stride_argb; - src_y += src_stride_y; - if (y & 1) { - src_u += src_stride_u; - src_v += src_stride_v; - } - } - return 0; -} - -// Convert I420 to RAW. -int I420ToRAW(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { - // Negative height means invert the image. - if (height < 0) { - height = -height; - dst_argb = dst_argb + (height - 1) * dst_stride_argb; - dst_stride_argb = -dst_stride_argb; - } - void (*I420ToARGBRow)(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width); -#if defined(HAS_I420TOARGBROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - I420ToARGBRow = I420ToARGBRow_NEON; - } else -#elif defined(HAS_I420TOARGBROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - I420ToARGBRow = I420ToARGBRow_SSSE3; - } else -#endif - { - I420ToARGBRow = I420ToARGBRow_C; - } - - SIMD_ALIGNED(uint8 row[kMaxStride]); - void (*ARGBToRAWRow)(const uint8* src_argb, uint8* dst_rgb, int pix); -#if defined(HAS_ARGBTORAWROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToRAWRow = ARGBToRAWRow_Any_SSSE3; - if (IS_ALIGNED(width, 16) && - IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { - ARGBToRAWRow = ARGBToRAWRow_SSSE3; - } - } else -#endif - { - ARGBToRAWRow = ARGBToRAWRow_C; - } - - for (int y = 0; y < height; ++y) { - I420ToARGBRow(src_y, src_u, src_v, row, width); - ARGBToRAWRow(row, dst_argb, width); - dst_argb += dst_stride_argb; - src_y += src_stride_y; - if (y & 1) { - src_u += src_stride_u; - src_v += src_stride_v; - } - } - return 0; -} - -// Convert I420 to RGB565. -int I420ToRGB565(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_rgb, int dst_stride_rgb, - int width, int height) { - // Negative height means invert the image. - if (height < 0) { - height = -height; - dst_rgb = dst_rgb + (height - 1) * dst_stride_rgb; - dst_stride_rgb = -dst_stride_rgb; - } - void (*I420ToARGBRow)(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width); -#if defined(HAS_I420TOARGBROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - I420ToARGBRow = I420ToARGBRow_NEON; - } else -#elif defined(HAS_I420TOARGBROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - I420ToARGBRow = I420ToARGBRow_SSSE3; - } else -#endif - { - I420ToARGBRow = I420ToARGBRow_C; - } - - SIMD_ALIGNED(uint8 row[kMaxStride]); - void (*ARGBToRGB565Row)(const uint8* src_rgb, uint8* dst_rgb, int pix); -#if defined(HAS_ARGBTORGB565ROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2)) { - ARGBToRGB565Row = ARGBToRGB565Row_Any_SSE2; - if (IS_ALIGNED(width, 4)) { - ARGBToRGB565Row = ARGBToRGB565Row_SSE2; - } - } else -#endif - { - ARGBToRGB565Row = ARGBToRGB565Row_C; - } - - for (int y = 0; y < height; ++y) { - I420ToARGBRow(src_y, src_u, src_v, row, width); - ARGBToRGB565Row(row, dst_rgb, width); - dst_rgb += dst_stride_rgb; - src_y += src_stride_y; - if (y & 1) { - src_u += src_stride_u; - src_v += src_stride_v; - } - } - return 0; -} - -// Convert I420 to ARGB1555. -int I420ToARGB1555(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { - // Negative height means invert the image. - if (height < 0) { - height = -height; - dst_argb = dst_argb + (height - 1) * dst_stride_argb; - dst_stride_argb = -dst_stride_argb; - } - void (*I420ToARGBRow)(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width); -#if defined(HAS_I420TOARGBROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - I420ToARGBRow = I420ToARGBRow_NEON; - } else -#elif defined(HAS_I420TOARGBROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - I420ToARGBRow = I420ToARGBRow_SSSE3; - } else -#endif - { - I420ToARGBRow = I420ToARGBRow_C; - } - - SIMD_ALIGNED(uint8 row[kMaxStride]); - void (*ARGBToARGB1555Row)(const uint8* src_argb, uint8* dst_rgb, int pix); -#if defined(HAS_ARGBTOARGB1555ROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2)) { - ARGBToARGB1555Row = ARGBToARGB1555Row_Any_SSE2; - if (IS_ALIGNED(width, 4)) { - ARGBToARGB1555Row = ARGBToARGB1555Row_SSE2; - } - } else -#endif - { - ARGBToARGB1555Row = ARGBToARGB1555Row_C; - } - - for (int y = 0; y < height; ++y) { - I420ToARGBRow(src_y, src_u, src_v, row, width); - ARGBToARGB1555Row(row, dst_argb, width); - dst_argb += dst_stride_argb; - src_y += src_stride_y; - if (y & 1) { - src_u += src_stride_u; - src_v += src_stride_v; - } - } - return 0; -} - -// Convert I420 to ARGB4444. -int I420ToARGB4444(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { - // Negative height means invert the image. - if (height < 0) { - height = -height; - dst_argb = dst_argb + (height - 1) * dst_stride_argb; - dst_stride_argb = -dst_stride_argb; - } - void (*I420ToARGBRow)(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width); -#if defined(HAS_I420TOARGBROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - I420ToARGBRow = I420ToARGBRow_NEON; - } else -#elif defined(HAS_I420TOARGBROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - I420ToARGBRow = I420ToARGBRow_SSSE3; - } else -#endif - { - I420ToARGBRow = I420ToARGBRow_C; - } - - SIMD_ALIGNED(uint8 row[kMaxStride]); - void (*ARGBToARGB4444Row)(const uint8* src_argb, uint8* dst_rgb, int pix); -#if defined(HAS_ARGBTOARGB4444ROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2)) { - ARGBToARGB4444Row = ARGBToARGB4444Row_Any_SSE2; - if (IS_ALIGNED(width, 4)) { - ARGBToARGB4444Row = ARGBToARGB4444Row_SSE2; - } - } else -#endif - { - ARGBToARGB4444Row = ARGBToARGB4444Row_C; - } - - for (int y = 0; y < height; ++y) { - I420ToARGBRow(src_y, src_u, src_v, row, width); - ARGBToARGB4444Row(row, dst_argb, width); - dst_argb += dst_stride_argb; - src_y += src_stride_y; - if (y & 1) { - src_u += src_stride_u; - src_v += src_stride_v; - } - } - return 0; -} - // Convert I422 to ARGB. int I422ToARGB(const uint8* src_y, int src_stride_y, const uint8* src_u, int src_stride_u, @@ -1977,9 +840,9 @@ static void SetRows32_C(uint8* dst, uint32 v32, int width, } #endif -static void SetPlane(uint8* dst_y, int dst_stride_y, - int width, int height, - uint32 value) { +void SetPlane(uint8* dst_y, int dst_stride_y, + int width, int height, + uint32 value) { void (*SetRow)(uint8* dst, uint32 value, int pix); #if defined(HAS_SETROW_NEON) if (TestCpuFlag(kCpuHasNEON) && @@ -2068,40 +931,6 @@ int ARGBRect(uint8* dst_argb, int dst_stride_argb, return 0; } -// I400 is greyscale typically used in MJPG -int I400ToI420(const uint8* src_y, int src_stride_y, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_y = src_y + (height - 1) * src_stride_y; - src_stride_y = -src_stride_y; - } - int halfwidth = (width + 1) >> 1; - int halfheight = (height + 1) >> 1; - CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); - SetPlane(dst_u, dst_stride_u, halfwidth, halfheight, 128); - SetPlane(dst_v, dst_stride_v, halfwidth, halfheight, 128); - return 0; -} - -// Copy to I400. Source can be I420,422,444,400,NV12,NV21 -int I400Copy(const uint8* src_y, int src_stride_y, - uint8* dst_y, int dst_stride_y, - int width, int height) { - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_y = src_y + (height - 1) * src_stride_y; - src_stride_y = -src_stride_y; - } - CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); - return 0; -} - #ifdef __cplusplus } // extern "C" } // namespace libyuv diff --git a/source/rotate.cc b/source/rotate.cc index 8f54ae1e1..e7cc7ded1 100644 --- a/source/rotate.cc +++ b/source/rotate.cc @@ -11,6 +11,7 @@ #include "libyuv/rotate.h" #include "libyuv/cpu_id.h" +#include "libyuv/convert.h" #include "libyuv/planar_functions.h" #include "rotate_priv.h" #include "row.h" diff --git a/source/row.h b/source/row.h index d3f0dac2b..c8670a893 100644 --- a/source/row.h +++ b/source/row.h @@ -46,6 +46,7 @@ extern "C" { #define HAS_I444TOARGBROW_SSSE3 #define HAS_MIRRORROW_SSSE3 #define HAS_MIRRORROW_SSE2 +#define HAS_SPLITUV_SSE2 #define HAS_YUY2TOYROW_SSE2 #define HAS_UYVYTOYROW_SSE2 #define HAS_YUY2TOUVROW_SSE2 @@ -67,6 +68,7 @@ extern "C" { // The following are available on Neon platforms #if defined(__ARM_NEON__) && !defined(YUV_DISABLE_ASM) #define HAS_MIRRORROW_NEON +#define HAS_SPLITUV_NEON #define HAS_I420TOARGBROW_NEON #define HAS_I420TOBGRAROW_NEON #define HAS_I420TOABGRROW_NEON @@ -125,6 +127,10 @@ void MirrorRow_SSE2(const uint8* src, uint8* dst, int width); void MirrorRow_NEON(const uint8* src, uint8* dst, int width); void MirrorRow_C(const uint8* src, uint8* dst, int width); +void SplitUV_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix); +void SplitUV_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix); +void SplitUV_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix); + void ARGBToYRow_C(const uint8* src_argb, uint8* dst_y, int pix); void BGRAToYRow_C(const uint8* src_argb, uint8* dst_y, int pix); void ABGRToYRow_C(const uint8* src_argb, uint8* dst_y, int pix); diff --git a/source/row_common.cc b/source/row_common.cc index 3d6adf5ac..ec22b37fe 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -350,9 +350,7 @@ void I444ToARGBRow_C(const uint8* y_buf, } } -void YToARGBRow_C(const uint8* y_buf, - uint8* rgb_buf, - int width) { +void YToARGBRow_C(const uint8* y_buf, uint8* rgb_buf, int width) { for (int x = 0; x < width; ++x) { YuvPixel(y_buf[0], 128, 128, rgb_buf, 24, 16, 8, 0); y_buf += 1; @@ -368,6 +366,17 @@ void MirrorRow_C(const uint8* src, uint8* dst, int width) { } } +void SplitUV_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) { + // Copy a row of UV. + for (int x = 0; x < pix; ++x) { + dst_u[0] = src_uv[0]; + dst_v[0] = src_uv[1]; + src_uv += 2; + dst_u += 1; + dst_v += 1; + } +} + // Filter 2 rows of YUY2 UV's (422) into U and V (420) void YUY2ToUVRow_C(const uint8* src_yuy2, int src_stride_yuy2, uint8* dst_u, uint8* dst_v, int pix) { diff --git a/source/row_neon.cc b/source/row_neon.cc index e062e8a3f..68028b192 100644 --- a/source/row_neon.cc +++ b/source/row_neon.cc @@ -15,6 +15,9 @@ namespace libyuv { extern "C" { #endif +// This module is for GCC Neon +#if defined(__ARM_NEON__) && !defined(YUV_DISABLE_ASM) + #define YUVTORGB \ "vld1.u8 {d0}, [%0]! \n" \ "vld1.u32 {d2[0]}, [%1]! \n" \ @@ -160,6 +163,29 @@ YUVTORGB } #endif +#if defined(HAS_SPLITUV_NEON) +// Reads 16 pairs of UV and write even values to dst_u and odd to dst_v +// Alignment requirement: 16 bytes for pointers, and multiple of 16 pixels. +void SplitUV_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) { + asm volatile ( + "1: \n" + "vld2.u8 {q0,q1}, [%0]! \n" // load 16 pairs of UV + "subs %3, %3, #16 \n" // 16 processed per loop + "vst1.u8 {q0}, [%1]! \n" // store U + "vst1.u8 {q1}, [%2]! \n" // Store V + "bhi 1b \n" + : "+r"(src_uv), + "+r"(dst_u), + "+r"(dst_v), + "+r"(pix) // Output registers + : // Input registers + : "memory", "cc", "q0", "q1" // Clobber List + ); +} +#endif + +#endif // __ARM_NEON__ + #ifdef __cplusplus } // extern "C" } // namespace libyuv diff --git a/source/row_posix.cc b/source/row_posix.cc index e0148b725..6fb3f3c73 100644 --- a/source/row_posix.cc +++ b/source/row_posix.cc @@ -17,6 +17,9 @@ namespace libyuv { extern "C" { #endif +// This module is for GCC x86 and x64 +#if (defined(__x86_64__) || defined(__i386__)) && !defined(YUV_DISABLE_ASM) + #ifdef __APPLE__ #define CONST #else @@ -816,7 +819,7 @@ void MirrorRow_SSE2(const uint8* src, uint8* dst, int width) { "lea -0x10(%0),%0 \n" "1: \n" "movdqu (%0,%2),%%xmm0 \n" - "movdqu %%xmm0,%%xmm1 \n" + "movdqa %%xmm0,%%xmm1 \n" "psllw $0x8,%%xmm0 \n" "psrlw $0x8,%%xmm1 \n" "por %%xmm1,%%xmm0 \n" @@ -839,6 +842,43 @@ void MirrorRow_SSE2(const uint8* src, uint8* dst, int width) { } #endif +#ifdef HAS_SPLITUV_SSE2 +void SplitUV_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) { + asm volatile ( + "pcmpeqb %%xmm5,%%xmm5 \n" + "psrlw $0x8,%%xmm5 \n" + "sub %1,%2 \n" + + "1: \n" + "movdqa (%0),%%xmm0 \n" + "movdqa 0x10(%0),%%xmm1 \n" + "lea 0x20(%0),%0 \n" + "movdqa %%xmm0,%%xmm2 \n" + "movdqa %%xmm1,%%xmm3 \n" + "pand %%xmm5,%%xmm0 \n" + "pand %%xmm5,%%xmm1 \n" + "packuswb %%xmm1,%%xmm0 \n" + "psrlw $0x8,%%xmm2 \n" + "psrlw $0x8,%%xmm3 \n" + "packuswb %%xmm3,%%xmm2 \n" + "movdqa %%xmm0,(%1) \n" + "movdqa %%xmm2,(%1,%2) \n" + "lea 0x10(%1),%1 \n" + "sub $0x10,%3 \n" + "ja 1b \n" + : "+r"(src_uv), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+r"(pix) // %3 + : + : "memory", "cc" +#if defined(__SSE2__) + , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" +#endif + ); +} +#endif + #ifdef HAS_YUY2TOYROW_SSE2 void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix) { asm volatile ( @@ -1099,9 +1139,10 @@ void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy, #endif ); } - #endif // HAS_YUY2TOYROW_SSE2 +#endif // defined(__x86_64__) || defined(__i386__) + #ifdef __cplusplus } // extern "C" } // namespace libyuv diff --git a/source/row_win.cc b/source/row_win.cc index 76e6f2a1c..7d347c436 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -15,6 +15,9 @@ namespace libyuv { extern "C" { #endif +// This module is for Visual C x86 +#if defined(_M_IX86) && !defined(YUV_DISABLE_ASM) + #ifdef HAS_ARGBTOYROW_SSSE3 // Constant multiplication table for converting ARGB to I400. @@ -1503,7 +1506,7 @@ __asm { #ifdef HAS_MIRRORROW_SSE2 -// SSE2 version has movdqu so it can be used on misaligned buffers when SSSE3 +// SSE2 version has movdqu so it can be used on unaligned buffers when SSSE3 // version can not. __declspec(naked) void MirrorRow_SSE2(const uint8* src, uint8* dst, int width) { @@ -1514,7 +1517,7 @@ __asm { lea eax, [eax - 16] convertloop: movdqu xmm0, [eax + ecx] - movdqu xmm1, xmm0 // swap bytes + movdqa xmm1, xmm0 // swap bytes psllw xmm0, 8 psrlw xmm1, 8 por xmm0, xmm1 @@ -1530,6 +1533,42 @@ __asm { } #endif +#ifdef HAS_SPLITUV_SSE2 +__declspec(naked) +void SplitUV_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) { + __asm { + push edi + mov eax, [esp + 4 + 4] // src_uv + mov edx, [esp + 4 + 8] // dst_u + mov edi, [esp + 4 + 12] // dst_v + mov ecx, [esp + 4 + 16] // pix + pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff + psrlw xmm5, 8 + sub edi, edx + + convertloop: + movdqa xmm0, [eax] + movdqa xmm1, [eax + 16] + lea eax, [eax + 32] + movdqa xmm2, xmm0 + movdqa xmm3, xmm1 + pand xmm0, xmm5 // even bytes + pand xmm1, xmm5 + packuswb xmm0, xmm1 + psrlw xmm2, 8 // odd bytes + psrlw xmm3, 8 + packuswb xmm2, xmm3 + movdqa [edx], xmm0 + movdqa [edx + edi], xmm2 + lea edx, [edx + 16] + sub ecx, 16 + ja convertloop + pop edi + ret + } +} +#endif + #ifdef HAS_YUY2TOYROW_SSE2 __declspec(naked) void YUY2ToYRow_SSE2(const uint8* src_yuy2, @@ -1800,6 +1839,8 @@ void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy, } #endif // HAS_YUY2TOYROW_SSE2 +#endif // _M_IX86 + #ifdef __cplusplus } // extern "C" } // namespace libyuv