diff --git a/include/convert.h b/include/convert.h index dd51024a9..731f624cd 100644 --- a/include/convert.h +++ b/include/convert.h @@ -14,128 +14,96 @@ #include "basic_types.h" -namespace libyuv -{ -class Convert { - public: - static int - I420ToRGB24(const uint8* src_yplane, int src_ystride, - const uint8* src_uplane, int src_ustride, - const uint8* src_vplane, int src_vstride, - uint8* dst_frame, int dst_stride, - int src_width, int src_height); +namespace libyuv { - static int - I420ToARGB(const uint8* src_yplane, int src_ystride, - const uint8* src_uplane, int src_ustride, - const uint8* src_vplane, int src_vstride, - uint8* dst_frame, int dst_stride, - int src_width, int src_height); +int +I420ToRGB24(const uint8* src_yplane, int src_ystride, + const uint8* src_uplane, int src_ustride, + const uint8* src_vplane, int src_vstride, + uint8* dst_frame, int dst_stride, + int src_width, int src_height); - static int - I420ToARGB4444(const uint8* src_yplane, int src_ystride, - const uint8* src_uplane, int src_ustride, - const uint8* src_vplane, int src_vstride, - uint8* dst_frame, int dst_stride, - int src_width, int src_height); - - static int - I420ToRGB565(const uint8* src_yplane, int src_ystride, +int +I420ToARGB4444(const uint8* src_yplane, int src_ystride, const uint8* src_uplane, int src_ustride, const uint8* src_vplane, int src_vstride, uint8* dst_frame, int dst_stride, int src_width, int src_height); - static int - I420ToARGB1555(const uint8* src_yplane, int src_ystride, - const uint8* src_uplane, int src_ustride, - const uint8* src_vplane, int src_vstride, - uint8* dst_frame, int dst_stride, - int src_width, int src_height); - - static int - I420ToYUY2(const uint8* src_yplane, int src_ystride, +int +I420ToRGB565(const uint8* src_yplane, int src_ystride, const uint8* src_uplane, int src_ustride, const uint8* src_vplane, int src_vstride, uint8* dst_frame, int dst_stride, int src_width, int src_height); - static int - I420ToUYVY(const uint8* src_yplane, int src_ystride, - const uint8* src_uplane, int src_ustride, - const uint8* src_vplane, int src_vstride, - uint8* dst_frame, int dst_stride, - int src_width, int src_height); - static int - UYVYToI420(const uint8* src_frame, int src_stride, - uint8* dst_yplane, int dst_ystride, - uint8* dst_uplane, int dst_ustride, - uint8* dst_vplane, int dst_vstride, - int src_width, int src_height); +int +I420ToARGB1555(const uint8* src_yplane, int src_ystride, + const uint8* src_uplane, int src_ustride, + const uint8* src_vplane, int src_vstride, + uint8* dst_frame, int dst_stride, + int src_width, int src_height); - static int - RGB24ToARGB(const uint8* src_frame, int src_stride, - uint8* dst_frame, int dst_stride, - int src_width, int src_height); +int +I420ToYUY2(const uint8* src_yplane, int src_ystride, + const uint8* src_uplane, int src_ustride, + const uint8* src_vplane, int src_vstride, + uint8* dst_frame, int dst_stride, + int src_width, int src_height); - static int - RGB24ToI420(const uint8* src_frame, int src_stride, - uint8* dst_yplane, int dst_ystride, - uint8* dst_uplane, int dst_ustride, - uint8* dst_vplane, int dst_vstride, - int src_width, int src_height); +int +I420ToUYVY(const uint8* src_yplane, int src_ystride, + const uint8* src_uplane, int src_ustride, + const uint8* src_vplane, int src_vstride, + uint8* dst_frame, int dst_stride, + int src_width, int src_height); - static int - RAWToI420(const uint8* src_frame, int src_stride, +int +RGB24ToARGB(const uint8* src_frame, int src_stride, + uint8* dst_frame, int dst_stride, + int src_width, int src_height); + +int +RGB24ToI420(const uint8* src_frame, int src_stride, uint8* dst_yplane, int dst_ystride, uint8* dst_uplane, int dst_ustride, uint8* dst_vplane, int dst_vstride, int src_width, int src_height); - static int - ABGRToI420(const uint8* src_frame, int src_stride, - uint8* dst_yplane, int dst_ystride, - uint8* dst_uplane, int dst_ustride, - uint8* dst_vplane, int dst_vstride, - int src_width, int src_height); +int +RAWToI420(const uint8* src_frame, int src_stride, + uint8* dst_yplane, int dst_ystride, + uint8* dst_uplane, int dst_ustride, + uint8* dst_vplane, int dst_vstride, + int src_width, int src_height); - static int - I420ToABGR(const uint8* src_yplane, int src_ystride, - const uint8* src_uplane, int src_ustride, - const uint8* src_vplane, int src_vstride, +int +ABGRToI420(const uint8* src_frame, int src_stride, + uint8* dst_yplane, int dst_ystride, + uint8* dst_uplane, int dst_ustride, + uint8* dst_vplane, int dst_vstride, + int src_width, int src_height); + +int +BGRAToI420(const uint8* src_frame, int src_stride, + uint8* dst_yplane, int dst_ystride, + uint8* dst_uplane, int dst_ustride, + uint8* dst_vplane, int dst_vstride, + int src_width, int src_height); + +int +ARGBToI420(const uint8* src_frame, int src_stride, + uint8* dst_yplane, int dst_ystride, + uint8* dst_uplane, int dst_ustride, + uint8* dst_vplane, int dst_vstride, + int src_width, int src_height); + +int +NV12ToRGB565(const uint8* src_yplane, int src_ystride, + const uint8* src_uvplane, int src_uvstride, uint8* dst_frame, int dst_stride, int src_width, int src_height); - static int - I420ToBGRA(const uint8* src_yplane, int src_ystride, - const uint8* src_uplane, int src_ustride, - const uint8* src_vplane, int src_vstride, - uint8* dst_frame, int dst_stride, - int src_width, int src_height); - - static int` - BGRAToI420(const uint8* src_frame, int src_stride, - uint8* dst_yplane, int dst_ystride, - uint8* dst_uplane, int dst_ustride, - uint8* dst_vplane, int dst_vstride, - int src_width, int src_height); - - int static - ARGBToI420(const uint8* src_frame, int src_stride, - uint8* dst_yplane, int dst_ystride, - uint8* dst_uplane, int dst_ustride, - uint8* dst_vplane, int dst_vstride, - int src_width, int src_height); - - static int - NV12ToRGB565(const uint8* src_yplane, int src_ystride, - const uint8* src_uvplane, int src_uvstride, - uint8* dst_frame, int dst_stride, - int src_width, int src_height); - - DISALLOW_IMPLICIT_CONSTRUCTORS(Convert); - -}; } // namespace libyuv #endif // LIBYUV_INCLUDE_CONVERT_H_ diff --git a/include/format_conversion.h b/include/format_conversion.h index e93532046..e73d4e50a 100644 --- a/include/format_conversion.h +++ b/include/format_conversion.h @@ -8,8 +8,9 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef LIBYUV_INCLUDE_FORMAT_CONVERSION_H_ -#define LIBYUV_INCLUDE_FORMAT_CONVERSION_H_ + +#ifndef LIBYUV_INCLUDE_FORMATCONVERSION_H_ +#define LIBYUV_INCLUDE_FORMATCONVERSION_H_ #include "basic_types.h" @@ -23,14 +24,18 @@ void BayerRGBToI420(const uint8* src_bayer, int src_pitch_bayer, uint8* dst_v, int dst_pitch_v, int width, int height); -// Converts any 32 bit ARGB to any Bayer RGB format. -void RGB32ToBayerRGB(const uint8* src_rgb, int src_pitch_rgb, - uint32 src_fourcc_rgb, - uint8* dst_bayer, int dst_pitch_bayer, - uint32 dst_fourcc_bayer, - int width, int height); +// Converts any Bayer RGB format to ARGB. +void BayerRGBToARGB(const uint8* src_bayer, int src_pitch_bayer, + uint32 src_fourcc_bayer, + uint8* dst_rgb, int dst_pitch_rgb, + int width, int height); +// Converts ARGB to any Bayer RGB format. +void ARGBToBayerRGB(const uint8* src_rgb, int src_pitch_rgb, + uint8* dst_bayer, int dst_pitch_bayer, + uint32 dst_fourcc_bayer, + int width, int height); } // namespace libyuv -#endif // LIBYUV_INCLUDE_FORMAT_CONVERSION_H_ +#endif // LIBYUV_INCLUDE_FORMATCONVERSION_H_ diff --git a/include/general.h b/include/general.h index 253f27eeb..1a5b48380 100644 --- a/include/general.h +++ b/include/general.h @@ -9,50 +9,128 @@ */ -/* - * General operations on YUV images. - */ - -#ifndef LIBYUV_INCLUDE_GENERAL_H_ -#define LIBYUV_INCLUDE_GENERAL_H_ +#ifndef LIBYUV_INCLUDE_PLANAR_FUNCTIONS_H_ +#define LIBYUV_INCLUDE_PLANAR_FUNCTIONS_H_ #include "basic_types.h" namespace libyuv { -// Supported rotation -enum VideoRotationMode -{ - kRotateNone = 0, - kRotateClockwise = 90, - kRotateCounterClockwise = -90, - kRotate180 = 180, -}; +// Copy I420 to I420. +void I420Copy(const uint8* src_y, int src_pitch_y, + const uint8* src_u, int src_pitch_u, + const uint8* src_v, int src_pitch_v, + uint8* dst_y, int dst_pitch_y, + uint8* dst_u, int dst_pitch_u, + uint8* dst_v, int dst_pitch_v, + int width, int height); -// I420 mirror -int -I420Mirror(const uint8* src_yplane, int src_ystride, - const uint8* src_uplane, int src_ustride, - const uint8* src_vplane, int src_vstride, - uint8* dst_yplane, int dst_ystride, - uint8* dst_uplane, int dst_ustride, - uint8* dst_vplane, int dst_vstride, - int width, int height); +// Convert I422 to I420. Used by MJPG. +void I422ToI420(const uint8* src_y, int src_pitch_y, + const uint8* src_u, int src_pitch_u, + const uint8* src_v, int src_pitch_v, + uint8* dst_y, int dst_pitch_y, + uint8* dst_u, int dst_pitch_u, + uint8* dst_v, int dst_pitch_v, + int width, int height); -// Crop/Pad I420 frame to match required dimensions. -int -I420CropPad(const uint8* src_frame, int src_width, - int src_height, uint8* dst_frame, - int dst_width, int dst_height); +// Convert M420 to I420. +void M420ToI420(const uint8* src_m420, int src_pitch_m420, + uint8* dst_y, int dst_pitch_y, + uint8* dst_u, int dst_pitch_u, + uint8* dst_v, int dst_pitch_v, + int width, int height); -// I420 Crop - make a center cut -int -I420Cut(uint8* frame, - int src_width, int src_height, - int dst_width, int dst_height); +// Convert Q420 to I420. +void Q420ToI420(const uint8* src_y, int src_pitch_y, + const uint8* src_yuy2, int src_pitch_yuy2, + uint8* dst_y, int dst_pitch_y, + uint8* dst_u, int dst_pitch_u, + uint8* dst_v, int dst_pitch_v, + int width, int height); +// Convert NV12 to I420. Also used for NV21. +void NV12ToI420(const uint8* src_y, + const uint8* src_uv, int src_pitch, + uint8* dst_y, int dst_pitch_y, + uint8* dst_u, int dst_pitch_u, + uint8* dst_v, int dst_pitch_v, + int width, int height); -} // namespace libyuv +// Convert YUY2 to I420. +void YUY2ToI420(const uint8* src_yuy2, int src_pitch_yuy2, + uint8* dst_y, int dst_pitch_y, + uint8* dst_u, int dst_pitch_u, + uint8* dst_v, int dst_pitch_v, + int width, int height); +// Convert UYVY to I420. +void UYVYToI420(const uint8* src_uyvy, int src_pitch_uyvy, + uint8* dst_y, int dst_pitch_y, + uint8* dst_u, int dst_pitch_u, + uint8* dst_v, int dst_pitch_v, + int width, int height); -#endif // LIBYUV_INCLUDE_GENERAL_H_ +// Convert I420 to ARGB. +void I420ToARGB(const uint8* src_y, int src_pitch_y, + const uint8* src_u, int src_pitch_u, + const uint8* src_v, int src_pitch_v, + uint8* dst_argb, int dst_pitch_argb, + int width, int height); + +// Convert I420 to BGRA. +void I420ToBGRA(const uint8* src_y, int src_pitch_y, + const uint8* src_u, int src_pitch_u, + const uint8* src_v, int src_pitch_v, + uint8* dst_argb, int dst_pitch_argb, + int width, int height); + +// Convert I420 to ABGR. +void I420ToABGR(const uint8* src_y, int src_pitch_y, + const uint8* src_u, int src_pitch_u, + const uint8* src_v, int src_pitch_v, + uint8* dst_argb, int dst_pitch_argb, + int width, int height); + +// Convert I422 to ARGB. +void I422ToARGB(const uint8* src_y, int src_pitch_y, + const uint8* src_u, int src_pitch_u, + const uint8* src_v, int src_pitch_v, + uint8* dst_argb, int dst_pitch_argb, + int width, int height); + +// Convert I444 to ARGB. +void I444ToARGB(const uint8* src_y, int src_pitch_y, + const uint8* src_u, int src_pitch_u, + const uint8* src_v, int src_pitch_v, + uint8* dst_argb, int dst_pitch_argb, + int width, int height); + +// Convert I400 to ARGB. +void I400ToARGB(const uint8* src_y, int src_pitch_y, + uint8* dst_argb, int dst_pitch_argb, + int width, int height); + +// Convert I400 to ARGB. +void I400ToARGB_Reference(const uint8* src_y, int src_pitch_y, + uint8* dst_argb, int dst_pitch_argb, + int width, int height); + +// Convert RAW to ARGB. +void RAWToARGB(const uint8* src_raw, int src_pitch_raw, + uint8* dst_argb, int dst_pitch_argb, + int width, int height); + +// Convert BG24 to ARGB. +void BG24ToARGB(const uint8* src_bg24, int src_pitch_bg24, + uint8* dst_argb, int dst_pitch_argb, + int width, int height); + +// Convert ABGR to ARGB. +void ABGRToARGB(const uint8* src_abgr, int src_pitch_abgr, + uint8* dst_argb, int dst_pitch_argb, + int width, int height); + +} // namespace libyuv + +#endif // LIBYUV_INCLUDE_PLANAR_FUNCTIONS_H_ diff --git a/include/planar_functions.h b/include/planar_functions.h index a078b6762..1a5b48380 100644 --- a/include/planar_functions.h +++ b/include/planar_functions.h @@ -16,112 +16,120 @@ namespace libyuv { -class PlanarFunctions { - public: +// Copy I420 to I420. +void I420Copy(const uint8* src_y, int src_pitch_y, + const uint8* src_u, int src_pitch_u, + const uint8* src_v, int src_pitch_v, + uint8* dst_y, int dst_pitch_y, + uint8* dst_u, int dst_pitch_u, + uint8* dst_v, int dst_pitch_v, + int width, int height); - // Copy I420 to I420. - static void I420Copy(const uint8* src_y, int src_pitch_y, - const uint8* src_u, int src_pitch_u, - const uint8* src_v, int src_pitch_v, - uint8* dst_y, int dst_pitch_y, - uint8* dst_u, int dst_pitch_u, - uint8* dst_v, int dst_pitch_v, - int width, int height); +// Convert I422 to I420. Used by MJPG. +void I422ToI420(const uint8* src_y, int src_pitch_y, + const uint8* src_u, int src_pitch_u, + const uint8* src_v, int src_pitch_v, + uint8* dst_y, int dst_pitch_y, + uint8* dst_u, int dst_pitch_u, + uint8* dst_v, int dst_pitch_v, + int width, int height); - // Convert I422 to I420. Used by MJPG. - static void I422ToI420(const uint8* src_y, int src_pitch_y, - const uint8* src_u, int src_pitch_u, - const uint8* src_v, int src_pitch_v, - uint8* dst_y, int dst_pitch_y, - uint8* dst_u, int dst_pitch_u, - uint8* dst_v, int dst_pitch_v, - int width, int height); +// Convert M420 to I420. +void M420ToI420(const uint8* src_m420, int src_pitch_m420, + uint8* dst_y, int dst_pitch_y, + uint8* dst_u, int dst_pitch_u, + uint8* dst_v, int dst_pitch_v, + int width, int height); - // Convert M420 to I420. - static void M420ToI420(const uint8* src_m420, int src_pitch_m420, - uint8* dst_y, int dst_pitch_y, - uint8* dst_u, int dst_pitch_u, - uint8* dst_v, int dst_pitch_v, - int width, int height); +// Convert Q420 to I420. +void Q420ToI420(const uint8* src_y, int src_pitch_y, + const uint8* src_yuy2, int src_pitch_yuy2, + uint8* dst_y, int dst_pitch_y, + uint8* dst_u, int dst_pitch_u, + uint8* dst_v, int dst_pitch_v, + int width, int height); - // Convert Q420 to I420. - static void Q420ToI420(const uint8* src_y, int src_pitch_y, - const uint8* src_yuy2, int src_pitch_yuy2, - uint8* dst_y, int dst_pitch_y, - uint8* dst_u, int dst_pitch_u, - uint8* dst_v, int dst_pitch_v, - int width, int height); +// Convert NV12 to I420. Also used for NV21. +void NV12ToI420(const uint8* src_y, + const uint8* src_uv, int src_pitch, + uint8* dst_y, int dst_pitch_y, + uint8* dst_u, int dst_pitch_u, + uint8* dst_v, int dst_pitch_v, + int width, int height); - // Convert NV12 to I420. Also used for NV21. - static void NV12ToI420(const uint8* src_y, - const uint8* src_uv, int src_pitch, - uint8* dst_y, int dst_pitch_y, - uint8* dst_u, int dst_pitch_u, - uint8* dst_v, int dst_pitch_v, - int width, int height); +// Convert YUY2 to I420. +void YUY2ToI420(const uint8* src_yuy2, int src_pitch_yuy2, + uint8* dst_y, int dst_pitch_y, + uint8* dst_u, int dst_pitch_u, + uint8* dst_v, int dst_pitch_v, + int width, int height); - // Convert YUY2 to I420. - static void YUY2ToI420(const uint8* src_yuy2, int src_pitch_yuy2, - uint8* dst_y, int dst_pitch_y, - uint8* dst_u, int dst_pitch_u, - uint8* dst_v, int dst_pitch_v, - int width, int height); +// Convert UYVY to I420. +void UYVYToI420(const uint8* src_uyvy, int src_pitch_uyvy, + uint8* dst_y, int dst_pitch_y, + uint8* dst_u, int dst_pitch_u, + uint8* dst_v, int dst_pitch_v, + int width, int height); - // Convert UYVY to I420. - static void UYVYToI420(const uint8* src_uyvy, int src_pitch_uyvy, - uint8* dst_y, int dst_pitch_y, - uint8* dst_u, int dst_pitch_u, - uint8* dst_v, int dst_pitch_v, - int width, int height); +// Convert I420 to ARGB. +void I420ToARGB(const uint8* src_y, int src_pitch_y, + const uint8* src_u, int src_pitch_u, + const uint8* src_v, int src_pitch_v, + uint8* dst_argb, int dst_pitch_argb, + int width, int height); - // Convert I420 to ARGB. - static void I420ToARGB(const uint8* src_y, int src_pitch_y, - const uint8* src_u, int src_pitch_u, - const uint8* src_v, int src_pitch_v, - uint8* dst_argb, int dst_pitch_argb, - int width, int height); +// Convert I420 to BGRA. +void I420ToBGRA(const uint8* src_y, int src_pitch_y, + const uint8* src_u, int src_pitch_u, + const uint8* src_v, int src_pitch_v, + uint8* dst_argb, int dst_pitch_argb, + int width, int height); - // Convert I422 to ARGB. - static void I422ToARGB(const uint8* src_y, int src_pitch_y, - const uint8* src_u, int src_pitch_u, - const uint8* src_v, int src_pitch_v, - uint8* dst_argb, int dst_pitch_argb, - int width, int height); +// Convert I420 to ABGR. +void I420ToABGR(const uint8* src_y, int src_pitch_y, + const uint8* src_u, int src_pitch_u, + const uint8* src_v, int src_pitch_v, + uint8* dst_argb, int dst_pitch_argb, + int width, int height); - // Convert I444 to ARGB. - static void I444ToARGB(const uint8* src_y, int src_pitch_y, - const uint8* src_u, int src_pitch_u, - const uint8* src_v, int src_pitch_v, - uint8* dst_argb, int dst_pitch_argb, - int width, int height); +// Convert I422 to ARGB. +void I422ToARGB(const uint8* src_y, int src_pitch_y, + const uint8* src_u, int src_pitch_u, + const uint8* src_v, int src_pitch_v, + uint8* dst_argb, int dst_pitch_argb, + int width, int height); - // Convert I400 to ARGB. - static void I400ToARGB(const uint8* src_y, int src_pitch_y, - uint8* dst_argb, int dst_pitch_argb, - int width, int height); +// Convert I444 to ARGB. +void I444ToARGB(const uint8* src_y, int src_pitch_y, + const uint8* src_u, int src_pitch_u, + const uint8* src_v, int src_pitch_v, + uint8* dst_argb, int dst_pitch_argb, + int width, int height); - // Convert I400 to ARGB. - static void I400ToARGB_Reference(const uint8* src_y, int src_pitch_y, - uint8* dst_argb, int dst_pitch_argb, - int width, int height); +// Convert I400 to ARGB. +void I400ToARGB(const uint8* src_y, int src_pitch_y, + uint8* dst_argb, int dst_pitch_argb, + int width, int height); - // Convert RAW to ARGB. - static void RAWToARGB(const uint8* src_raw, int src_pitch_raw, +// Convert I400 to ARGB. +void I400ToARGB_Reference(const uint8* src_y, int src_pitch_y, uint8* dst_argb, int dst_pitch_argb, int width, int height); - // Convert BG24 to ARGB. - static void BG24ToARGB(const uint8* src_bg24, int src_pitch_bg24, - uint8* dst_argb, int dst_pitch_argb, - int width, int height); +// Convert RAW to ARGB. +void RAWToARGB(const uint8* src_raw, int src_pitch_raw, + uint8* dst_argb, int dst_pitch_argb, + int width, int height); - // Convert ABGR to ARGB. - static void ABGRToARGB(const uint8* src_abgr, int src_pitch_abgr, - uint8* dst_argb, int dst_pitch_argb, - int width, int height); +// Convert BG24 to ARGB. +void BG24ToARGB(const uint8* src_bg24, int src_pitch_bg24, + uint8* dst_argb, int dst_pitch_argb, + int width, int height); - DISALLOW_IMPLICIT_CONSTRUCTORS(PlanarFunctions); - }; +// Convert ABGR to ARGB. +void ABGRToARGB(const uint8* src_abgr, int src_pitch_abgr, + uint8* dst_argb, int dst_pitch_argb, + int width, int height); } // namespace libyuv diff --git a/include/scale.h b/include/scale.h index 57b2c18b9..9cef9bce8 100644 --- a/include/scale.h +++ b/include/scale.h @@ -12,7 +12,6 @@ #ifndef LIBYUV_INCLUDE_SCALE_H_ #define LIBYUV_INCLUDE_SCALE_H_ - #include "basic_types.h" #if defined(_MSC_VER) @@ -21,43 +20,32 @@ #define ALIGN16(var) var __attribute__((aligned(16))) #endif +namespace libyuv { -namespace libyuv -{ +// Scales a YUV 4:2:0 image from the input width and height to the +// output width and height. If outh_offset is nonzero, the image is +// offset by that many pixels and stretched to (outh - outh_offset * 2) +// pixels high, instead of outh. +// If interpolate is not set, a simple nearest-neighbor algorithm is +// used. This produces basic (blocky) quality at the fastest speed. +// If interpolate is set, interpolation is used to produce a better +// quality image, at the expense of speed. +// Returns true if successful. +bool Scale(const uint8 *in, int32 inw, int32 inh, + uint8 *out, int32 outw, int32 outh, int32 outh_offset, + bool interpolate); -class YuvScaler { - public: - // Scales a YUV 4:2:0 image from the input width and height to the - // output width and height. If outh_offset is nonzero, the image is - // offset by that many pixels and stretched to (outh - outh_offset * 2) - // pixels high, instead of outh. - // If interpolate is not set, a simple nearest-neighbor algorithm is - // used. This produces basic (blocky) quality at the fastest speed. - // If interpolate is set, interpolation is used to produce a better - // quality image, at the expense of speed. - // Returns true if successful. - static bool Scale(const uint8 *in, int32 inw, int32 inh, - uint8 *out, int32 outw, int32 outh, int32 outh_offset, - bool interpolate); +// Same, but specified in terms of each plane location and stride. +bool Scale(const uint8 *inY, const uint8 *inU, const uint8 *inV, + int32 istrideY, int32 istrideU, int32 istrideV, + int32 iwidth, int32 iheight, + uint8 *outY, uint8 *outU, uint8 *outV, + int32 ostrideY, int32 ostrideU, int32 ostrideV, + int32 owidth, int32 oheight, + bool interpolate); - // Same, but specified in terms of each plane location and stride. - static bool Scale(const uint8 *inY, const uint8 *inU, const uint8 *inV, - int32 istrideY, int32 istrideU, int32 istrideV, - int32 iwidth, int32 iheight, - uint8 *outY, uint8 *outU, uint8 *outV, - int32 ostrideY, int32 ostrideU, int32 ostrideV, - int32 owidth, int32 oheight, - bool interpolate); - - // For testing, allow disabling of optimizations. - static void SetUseReferenceImpl(bool use) { use_reference_impl_ = use; } - - private: - - static bool use_reference_impl_; - - DISALLOW_IMPLICIT_CONSTRUCTORS(YuvScaler); -}; +// For testing, allow disabling of optimizations. +void SetUseReferenceImpl(bool use); } // namespace libyuv diff --git a/source/convert.cc b/source/convert.cc index 70821bc62..4e4394a09 100644 --- a/source/convert.cc +++ b/source/convert.cc @@ -37,11 +37,11 @@ void *memcpy_8(void * dest, const void * src, size_t n); int -Convert::I420ToRGB24(const uint8* src_yplane, int src_ystride, - const uint8* src_uplane, int src_ustride, - const uint8* src_vplane, int src_vstride, - uint8* dst_frame, int dst_stride, - int src_width, int src_height) +I420ToRGB24(const uint8* src_yplane, int src_ystride, + const uint8* src_uplane, int src_ustride, + const uint8* src_vplane, int src_vstride, + uint8* dst_frame, int dst_stride, + int src_width, int src_height) { if (src_yplane == NULL || src_uplane == NULL || src_vplane == NULL || dst_frame == NULL) @@ -106,168 +106,13 @@ Convert::I420ToRGB24(const uint8* src_yplane, int src_ystride, return 0; } - -int -Convert::I420ToARGB(const uint8* src_yplane, int src_ystride, - const uint8* src_uplane, int src_ustride, - const uint8* src_vplane, int src_vstride, - uint8* dst_frame, int dst_stride, - int src_width, int src_height) -{ - if (src_yplane == NULL || src_uplane == NULL || src_vplane == NULL || - dst_frame == NULL){ - return -1; - } - - uint8* out1 = dst_frame; - uint8* out2 = out1 + dst_stride * 4; - const uint8 *y1,*y2, *u, *v; - y1 = src_yplane; - y2 = src_yplane + src_ystride; - u = src_uplane; - v = src_vplane; - int h, w; - int tmpR, tmpG, tmpB; - - for (h = ((src_height + 1) >> 1); h > 0; h--){ - // Do 2 rows at the time - for (w = 0; w < ((src_width + 1) >> 1); w++){ - // Vertical and horizontal sub-sampling - - tmpR = (int32)((mapYc[y1[0]] + mapVcr[v[0]] + 128) >> 8); - tmpG = (int32)((mapYc[y1[0]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8); - tmpB = (int32)((mapYc[y1[0]] + mapUcb[u[0]] + 128) >> 8); - out1[0] = Clip(tmpB); - out1[1] = Clip(tmpG); - out1[2] = Clip(tmpR); - out1[3] = 0xff; - - tmpR = (int32)((mapYc[y1[1]] + mapVcr[v[0]] + 128) >> 8); - tmpG = (int32)((mapYc[y1[1]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8); - tmpB = (int32)((mapYc[y1[1]] + mapUcb[u[0]] + 128) >> 8); - out1[4] = Clip(tmpB); - out1[5] = Clip(tmpG); - out1[6] = Clip(tmpR); - out1[7] = 0xff; - - tmpR = (int32)((mapYc[y2[0]] + mapVcr[v[0]] + 128) >> 8); - tmpG = (int32)((mapYc[y2[0]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8); - tmpB = (int32)((mapYc[y2[0]] + mapUcb[u[0]] + 128) >> 8); - out2[0] = Clip(tmpB); - out2[1] = Clip(tmpG); - out2[2] = Clip(tmpR); - out2[3] = 0xff; - - tmpR = (int32)((mapYc[y2[1]] + mapVcr[v[0]] + 128) >> 8); - tmpG = (int32)((mapYc[y2[1]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8); - tmpB = (int32)((mapYc[y2[1]] + mapUcb[u[0]] + 128) >> 8); - out2[4] = Clip(tmpB); - out2[5] = Clip(tmpG); - out2[6] = Clip(tmpR); - out2[7] = 0xff; - - out1 += 8; - out2 += 8; - y1 += 2; - y2 += 2; - u++; - v++; - } - y1 += 2 * src_ystride - src_width; - y2 += 2 * src_ystride - src_width; - u += src_ustride - ((src_width + 1) >> 1); - v += src_vstride - ((src_width + 1) >> 1); - out1 += (2 * dst_stride - src_width) * 4; - out2 += (2 * dst_stride - src_width) * 4; - } // end height for - return 0; -} - - -int -Convert::I420ToBGRA(const uint8* src_yplane, int src_ystride, - const uint8* src_uplane, int src_ustride, - const uint8* src_vplane, int src_vstride, - uint8* dst_frame, int dst_stride, - int src_width, int src_height) -{ - if (src_yplane == NULL || src_uplane == NULL || src_vplane == NULL || - dst_frame == NULL){ - return -1; - } - - uint8* out1 = dst_frame; - uint8* out2 = out1 + dst_stride * 4; - const uint8 *y1,*y2, *u, *v; - y1 = src_yplane; - y2 = src_yplane + src_ystride; - u = src_uplane; - v = src_vplane; - int h, w; - int tmpR, tmpG, tmpB; - - for (h = ((src_height + 1) >> 1); h > 0; h--){ - // Do 2 rows at the time - for (w = 0; w < ((src_width + 1) >> 1); w++){ - // Vertical and horizontal sub-sampling - - tmpR = (int32)((mapYc[y1[0]] + mapVcr[v[0]] + 128) >> 8); - tmpG = (int32)((mapYc[y1[0]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8); - tmpB = (int32)((mapYc[y1[0]] + mapUcb[u[0]] + 128) >> 8); - out1[0] = 0xff; - out1[1] = Clip(tmpR); - out1[2] = Clip(tmpG); - out1[3] = Clip(tmpB); - - tmpR = (int32)((mapYc[y1[1]] + mapVcr[v[0]] + 128) >> 8); - tmpG = (int32)((mapYc[y1[1]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8); - tmpB = (int32)((mapYc[y1[1]] + mapUcb[u[0]] + 128) >> 8); - out1[4] = 0xff; - out1[5] = Clip(tmpR); - out1[6] = Clip(tmpG); - out1[7] = Clip(tmpB); - - tmpR = (int32)((mapYc[y2[0]] + mapVcr[v[0]] + 128) >> 8); - tmpG = (int32)((mapYc[y2[0]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8); - tmpB = (int32)((mapYc[y2[0]] + mapUcb[u[0]] + 128) >> 8); - out2[0] = 0xff; - out2[1] = Clip(tmpR); - out2[2] = Clip(tmpG); - out2[3] = Clip(tmpB); - - tmpR = (int32)((mapYc[y2[1]] + mapVcr[v[0]] + 128) >> 8); - tmpG = (int32)((mapYc[y2[1]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8); - tmpB = (int32)((mapYc[y2[1]] + mapUcb[u[0]] + 128) >> 8); - out2[4] = 0xff; - out2[5] = Clip(tmpR); - out2[6] = Clip(tmpG); - out2[7] = Clip(tmpB); - - out1 += 8; - out2 += 8; - y1 += 2; - y2 += 2; - u++; - v++; - } - y1 += 2 * src_ystride - src_width; - y2 += 2 * src_ystride - src_width; - u += src_ustride - ((src_width + 1) >> 1); - v += src_vstride - ((src_width + 1) >> 1); - out1 += (2 * dst_stride - src_width) * 4; - out2 += (2 * dst_stride - src_width) * 4; - } // end height for - return 0; -} - - // Little Endian... int -Convert::I420ToARGB4444(const uint8* src_yplane, int src_ystride, - const uint8* src_uplane, int src_ustride, - const uint8* src_vplane, int src_vstride, - uint8* dst_frame, int dst_stride, - int src_width, int src_height) +I420ToARGB4444(const uint8* src_yplane, int src_ystride, + const uint8* src_uplane, int src_ustride, + const uint8* src_vplane, int src_vstride, + uint8* dst_frame, int dst_stride, + int src_width, int src_height) { if (src_yplane == NULL || src_uplane == NULL || src_vplane == NULL || dst_frame == NULL){ @@ -332,11 +177,11 @@ Convert::I420ToARGB4444(const uint8* src_yplane, int src_ystride, int -Convert::I420ToRGB565(const uint8* src_yplane, int src_ystride, - const uint8* src_uplane, int src_ustride, - const uint8* src_vplane, int src_vstride, - uint8* dst_frame, int dst_stride, - int src_width, int src_height) +I420ToRGB565(const uint8* src_yplane, int src_ystride, + const uint8* src_uplane, int src_ustride, + const uint8* src_vplane, int src_vstride, + uint8* dst_frame, int dst_stride, + int src_width, int src_height) { if (src_yplane == NULL || src_uplane == NULL || src_vplane == NULL || dst_frame == NULL){ @@ -414,11 +259,11 @@ Convert::I420ToRGB565(const uint8* src_yplane, int src_ystride, int -Convert::I420ToARGB1555(const uint8* src_yplane, int src_ystride, - const uint8* src_uplane, int src_ustride, - const uint8* src_vplane, int src_vstride, - uint8* dst_frame, int dst_stride, - int src_width, int src_height) +I420ToARGB1555(const uint8* src_yplane, int src_ystride, + const uint8* src_uplane, int src_ustride, + const uint8* src_vplane, int src_vstride, + uint8* dst_frame, int dst_stride, + int src_width, int src_height) { if (src_yplane == NULL || src_uplane == NULL || src_vplane == NULL || dst_frame == NULL){ @@ -485,11 +330,11 @@ Convert::I420ToARGB1555(const uint8* src_yplane, int src_ystride, int -Convert::I420ToYUY2(const uint8* src_yplane, int src_ystride, - const uint8* src_uplane, int src_ustride, - const uint8* src_vplane, int src_vstride, - uint8* dst_frame, int dst_stride, - int src_width, int src_height) +I420ToYUY2(const uint8* src_yplane, int src_ystride, + const uint8* src_uplane, int src_ustride, + const uint8* src_vplane, int src_vstride, + uint8* dst_frame, int dst_stride, + int src_width, int src_height) { if (src_yplane == NULL || src_uplane == NULL || src_vplane == NULL || dst_frame == NULL){ @@ -601,11 +446,11 @@ Convert::I420ToYUY2(const uint8* src_yplane, int src_ystride, } int -Convert::I420ToUYVY(const uint8* src_yplane, int src_ystride, - const uint8* src_uplane, int src_ustride, - const uint8* src_vplane, int src_vstride, - uint8* dst_frame, int dst_stride, - int src_width, int src_height) +I420ToUYVY(const uint8* src_yplane, int src_ystride, + const uint8* src_uplane, int src_ustride, + const uint8* src_vplane, int src_vstride, + uint8* dst_frame, int dst_stride, + int src_width, int src_height) { if (src_yplane == NULL || src_uplane == NULL || src_vplane == NULL || dst_frame == NULL){ @@ -717,10 +562,10 @@ loop0: int -Convert::NV12ToRGB565(const uint8* src_yplane, int src_ystride, - const uint8* src_uvplane, int src_uvstride, - uint8* dst_frame, int dst_stride, - int src_width, int src_height) +NV12ToRGB565(const uint8* src_yplane, int src_ystride, + const uint8* src_uvplane, int src_uvstride, + uint8* dst_frame, int dst_stride, + int src_width, int src_height) { if (src_yplane == NULL || src_uvplane == NULL || dst_frame == NULL){ return -1; @@ -788,11 +633,11 @@ Convert::NV12ToRGB565(const uint8* src_yplane, int src_ystride, } int -Convert::I420ToABGR(const uint8* src_yplane, int src_ystride, - const uint8* src_uplane, int src_ustride, - const uint8* src_vplane, int src_vstride, - uint8* dst_frame, int dst_stride, - int src_width, int src_height) +I420ToABGR(const uint8* src_yplane, int src_ystride, + const uint8* src_uplane, int src_ustride, + const uint8* src_vplane, int src_vstride, + uint8* dst_frame, int dst_stride, + int src_width, int src_height) { if (src_yplane == NULL || src_uplane == NULL || src_vplane == NULL || dst_frame == NULL){ @@ -872,56 +717,10 @@ Convert::I420ToABGR(const uint8* src_yplane, int src_ystride, return 0; } - int -Convert::UYVYToI420(const uint8* src_frame, int src_stride, - uint8* dst_yplane, int dst_ystride, - uint8* dst_uplane, int dst_ustride, - uint8* dst_vplane, int dst_vstride, - int src_width, int src_height) -{ - if (dst_yplane == NULL || dst_uplane == NULL || dst_vplane == NULL || - src_frame == NULL){ - return -1; - } - - int i, j; - uint8* outI = dst_yplane; - uint8* outU = dst_uplane; - uint8* outV = dst_vplane; - - // U0Y0V0Y1..U2Y2V2Y3..... - - for (i = 0; i < ((src_height + 1) >> 1); i++){ - for (j = 0; j < ((src_width + 1) >> 1); j++){ - outI[0] = src_frame[1]; - *outU = src_frame[0]; - outI[1] = src_frame[3]; - *outV = src_frame[2]; - src_frame += 4; - outI += 2; - outU++; - outV++; - } - for (j = 0; j < ((src_width + 1) >> 1); j++) - { - outI[0] = src_frame[1]; - outI[1] = src_frame[3]; - src_frame += 4; - outI += 2; - } - outI += dst_ystride - src_width; - outU += dst_ustride - ((src_width + 1) << 1); - outV += dst_vstride - ((src_width + 1) << 1); - } - return 0; -} - - -int -Convert::RGB24ToARGB(const uint8* src_frame, int src_stride, - uint8* dst_frame, int dst_stride, - int src_width, int src_height) +RGB24ToARGB(const uint8* src_frame, int src_stride, + uint8* dst_frame, int dst_stride, + int src_width, int src_height) { if (src_frame == NULL || dst_frame == NULL){ return -1; @@ -936,7 +735,7 @@ Convert::RGB24ToARGB(const uint8* src_frame, int src_stride, { for (j = 0; j < src_width; j++) { - offset = j*4; + offset = j * 4; outFrame[0 + offset] = inFrame[0]; outFrame[1 + offset] = inFrame[1]; outFrame[2 + offset] = inFrame[2]; @@ -949,326 +748,190 @@ Convert::RGB24ToARGB(const uint8* src_frame, int src_stride, return 0; } +// ARGBToI420Row_C etc row functions use the following macro, generating +// code with RGB offsets/strides different for each version. Less error +// prone than duplicating the code. +// template could be used, but macro method works for C and asm and this is +// performance critical code. -int -Convert::RGB24ToI420(const uint8* src_frame, int src_stride, +#define MAKEROWRGBTOI420(NAME,R,G,B,BPP) \ +static void \ +NAME(const uint8* src_row0, const uint8* src_row1, \ + uint8* dst_yplane0, uint8* dst_yplane1, \ + uint8* dst_uplane, \ + uint8* dst_vplane, \ + int src_width) { \ + for (int x = 0; x < src_width - 1; x += 2) { \ + dst_yplane0[0] = (uint8)((src_row0[R] * 66 + \ + src_row0[G] * 129 + \ + src_row0[B] * 25 + 128) >> 8) + 16; \ + dst_yplane0[1] = (uint8)((src_row0[R + BPP] * 66 + \ + src_row0[G + BPP] * 129 + \ + src_row0[B + BPP] * 25 + 128) >> 8) + 16; \ + dst_yplane1[0] = (uint8)((src_row1[R] * 66 + \ + src_row1[G] * 129 + \ + src_row1[B] * 25 + 128) >> 8) + 16; \ + dst_yplane1[1] = (uint8)((src_row1[R + BPP] * 66 + \ + src_row1[G + BPP] * 129 + \ + src_row1[B + BPP] * 25 + 128) >> 8) + 16; \ + dst_uplane[0] = (uint8)(((src_row0[R] + src_row0[R + BPP] + \ + src_row1[R] + src_row1[R + BPP]) * -38 + \ + (src_row0[G] + src_row0[G + BPP] + \ + src_row1[G] + src_row1[G + BPP]) * -74 + \ + (src_row0[B] + src_row0[B + BPP] + \ + src_row1[B] + src_row1[B + BPP]) * 112 + \ + + 512) >> 10) + 128; \ + dst_vplane[0] = (uint8)(((src_row0[R] + src_row0[R + BPP] + \ + src_row1[R] + src_row1[R + BPP]) * 112 + \ + (src_row0[G] + src_row0[G + BPP] + \ + src_row1[G] + src_row1[G + BPP]) * -94 + \ + (src_row0[B] + src_row0[B + BPP] + \ + src_row1[B] + src_row1[B + BPP]) * -18 + \ + + 512) >> 10) + 128; \ + dst_yplane0 += 2; \ + dst_yplane1 += 2; \ + ++dst_uplane; \ + ++dst_vplane; \ + src_row0 += BPP * 2; \ + src_row1 += BPP * 2; \ + } \ + if (src_width & 1) { \ + dst_yplane0[0] = (uint8)((src_row0[R] * 66 + \ + src_row0[G] * 129 + \ + src_row0[B] * 25 + 128) >> 8) + 16; \ + dst_yplane1[0] = (uint8)((src_row1[R] * 66 + \ + src_row1[G] * 129 + \ + src_row1[B] * 25 + 128) >> 8) + 16; \ + dst_uplane[0] = (uint8)(((src_row0[R] + \ + src_row1[R]) * -38 + \ + (src_row0[G] + \ + src_row1[G]) * -74 + \ + (src_row0[B] + \ + src_row1[B]) * 112 + \ + + 256) >> 9) + 128; \ + dst_vplane[0] = (uint8)(((src_row0[R] + \ + src_row1[R]) * 112 + \ + (src_row0[G] + \ + src_row1[G]) * -94 + \ + (src_row0[B] + \ + src_row1[B]) * -18 + \ + + 256) >> 9) + 128; \ + } \ +} + +// Generate variations of RGBToI420. Parameters are r,g,b offsets within a +// pixel, and number of bytes per pixel. +MAKEROWRGBTOI420(ARGBToI420Row_C, 2, 1, 0, 4) +MAKEROWRGBTOI420(BGRAToI420Row_C, 1, 2, 3, 4) +MAKEROWRGBTOI420(ABGRToI420Row_C, 0, 1, 2, 4) +MAKEROWRGBTOI420(RGB24ToI420Row_C, 2, 1, 0, 3) +MAKEROWRGBTOI420(RAWToI420Row_C, 0, 1, 2, 3) + +static int RGBToI420(const uint8* src_frame, int src_stride, uint8* dst_yplane, int dst_ystride, uint8* dst_uplane, int dst_ustride, uint8* dst_vplane, int dst_vstride, - int src_width, int src_height) -{ + int src_width, int src_height, + void (*RGBToI420Row)(const uint8* src_row0, + const uint8* src_row1, + uint8* dst_yplane0, + uint8* dst_yplane1, + uint8* dst_uplane, + uint8* dst_vplane, + int src_width)) { if (src_frame == NULL || dst_yplane == NULL || - dst_vplane == NULL || dst_vplane == NULL) - return -1; - - uint8* yStartPtr; - uint8* yStartPtr2; - uint8* uStartPtr; - uint8* vStartPtr; - const uint8* inpPtr; - const uint8* inpPtr2; - int h, w; - - yStartPtr = dst_yplane; - yStartPtr2 = yStartPtr + dst_ystride; - uStartPtr = dst_uplane; - vStartPtr = dst_vplane; - inpPtr = src_frame + src_stride * src_height * 3 - 3 * src_height; - inpPtr2 = inpPtr - 3 * src_stride; - - for (h = 0; h < ((src_height + 1) >> 1); h++ ){ - for (w = 0; w < ((src_width + 1) >> 1); w++){ - // Y - yStartPtr[0] = (uint8)((66 * inpPtr[2] + 129 * inpPtr[1] - + 25 * inpPtr[0] + 128) >> 8) + 16; - yStartPtr2[0] = (uint8)((66 * inpPtr2[2] + 129 * inpPtr2[1] - + 25 * inpPtr2[0] + 128) >> 8) + 16; - // Moving to next column - yStartPtr[1] = (uint8)((66 * inpPtr[5] + 129 * inpPtr[4] - + 25 * inpPtr[3] + 128) >> 8) + 16; - yStartPtr2[1] = (uint8)((66 * inpPtr2[5] + 129 * inpPtr2[4] - + 25 * inpPtr2[3] + 128) >> 8 ) + 16; - // U - uStartPtr[0] = (uint8)((-38 * inpPtr[2] - 74 * inpPtr[1] + - 112 * inpPtr[0] + 128) >> 8) + 128; - // V - vStartPtr[0] = (uint8)((112 * inpPtr[2] -94 * inpPtr[1] - - 18 * inpPtr[0] + 128) >> 8) + 128; - - yStartPtr += 2; - yStartPtr2 += 2; - uStartPtr++; - vStartPtr++; - inpPtr += 6; - inpPtr2 += 6; - } // end for w - yStartPtr += dst_ystride + dst_ystride - src_width; - yStartPtr2 += dst_ystride + dst_ystride - src_width; - uStartPtr += dst_ustride + dst_ustride - ((src_width + 1) >> 1); - vStartPtr += dst_vstride + dst_vstride - ((src_width + 1) >> 1); - inpPtr -= 3 * (2 * src_stride + src_width); - inpPtr2 -= 3 * (2 * src_stride + src_width); - } // end for h - return 0; -} - -int -Convert::RAWToI420(const uint8* src_frame, int src_stride, - uint8* dst_yplane, int dst_ystride, - uint8* dst_uplane, int dst_ustride, - uint8* dst_vplane, int dst_vstride, - int src_width, int src_height) -{ - if (src_frame == NULL || dst_yplane == NULL || - dst_vplane == NULL || dst_vplane == NULL) - return -1; - - uint8* yStartPtr; - uint8* yStartPtr2; - uint8* uStartPtr; - uint8* vStartPtr; - const uint8* inpPtr; - const uint8* inpPtr2; - int h, w; - - yStartPtr = dst_yplane; - yStartPtr2 = yStartPtr + dst_ystride; - uStartPtr = dst_uplane; - vStartPtr = dst_vplane; - inpPtr = src_frame + src_stride * src_height * 3 - 3 * src_height; - inpPtr2 = inpPtr - 3 * src_stride; - - // Same as RGB24 - reverse ordering - - for (h = 0; h < ((src_height + 1) >> 1); h++){ - for (w = 0; w < ((src_width + 1) >> 1); w++){ - // Y - yStartPtr[0] = (uint8)((66 * inpPtr[0] + 129 * inpPtr[1] - + 25 * inpPtr[2] + 128) >> 8) + 16; - yStartPtr2[0] = (uint8)((66 * inpPtr2[2] + 129 * inpPtr2[1] - + 25 * inpPtr2[0] + 128) >> 8) + 16; - // Moving to next column - yStartPtr[1] = (uint8)((66 * inpPtr[3] + 129 * inpPtr[4] - + 25 * inpPtr[5] + 128) >> 8) + 16; - yStartPtr2[1] = (uint8)((66 * inpPtr2[3] + 129 * inpPtr2[4] - + 25 * inpPtr2[5] + 128) >> 8 ) + 16; - // U - uStartPtr[0] = (uint8)((-38 * inpPtr[0] - 74 * inpPtr[1] + - 112 * inpPtr[2] + 128) >> 8) + 128; - // V - vStartPtr[0] = (uint8)((112 * inpPtr[0] -94 * inpPtr[1] - - 18 * inpPtr[2] + 128) >> 8) + 128; - - yStartPtr += 2; - yStartPtr2 += 2; - uStartPtr++; - vStartPtr++; - inpPtr += 6; - inpPtr2 += 6; - } // end for w - yStartPtr += dst_ystride + dst_ystride - src_width; - yStartPtr2 += dst_ystride + dst_ystride - src_width; - uStartPtr += dst_ustride + dst_ustride - ((src_width + 1) >> 1); - vStartPtr += dst_vstride + dst_vstride - ((src_width + 1) >> 1); - inpPtr -= 3 * (2 * src_stride + src_width); - inpPtr2 -= 3 * (2 * src_stride + src_width); - } // end for h - return 0; -} - - -int -Convert::BGRAToI420(const uint8* src_frame, int src_stride, - uint8* dst_yplane, int dst_ystride, - uint8* dst_uplane, int dst_ustride, - uint8* dst_vplane, int dst_vstride, - int src_width, int src_height) -{ - if (src_frame == NULL || dst_yplane == NULL || - dst_vplane == NULL || dst_vplane == NULL) - return -1; - - uint8* yStartPtr; - uint8* yStartPtr2; - uint8* uStartPtr; - uint8* vStartPtr; - const uint8* inpPtr; - const uint8* inpPtr2; - int h, w; - - // Assuming RGB in a bottom up orientation. - yStartPtr = dst_yplane; - yStartPtr2 = yStartPtr + dst_ystride; - uStartPtr = dst_uplane; - vStartPtr = dst_vplane; - inpPtr = src_frame + src_stride * src_height * 3 - 3 * src_height; - inpPtr2 = inpPtr - 3 * src_stride; - - for (h = 0; h < ((src_height + 1) >> 1); h++ ){ - for (w = 0; w < ((src_width + 1) >> 1); w++){ - // Y - yStartPtr[0] = (uint8)((66 * inpPtr[1] + 129 * inpPtr[2] - + 25 * inpPtr[3] + 128) >> 8) + 16; - yStartPtr2[0] = (uint8)((66 * inpPtr2[1] + 129 * inpPtr2[2] - + 25 * inpPtr2[3] + 128) >> 8) + 16; - // Moving to next column - yStartPtr[1] = (uint8)((66 * inpPtr[5] + 129 * inpPtr[6] - + 25 * inpPtr[7] + 128) >> 8) + 16; - yStartPtr2[1] = (uint8)((66 * inpPtr2[5] + 129 * inpPtr2[6] - + 25 * inpPtr2[7] + 128) >> 8 ) + 16; - // U - uStartPtr[0] = (uint8)((-38 * inpPtr[1] - 74 * inpPtr[2] + - 112 * inpPtr[3] + 128) >> 8) + 128; - // V - vStartPtr[0] = (uint8)((112 * inpPtr[1] -94 * inpPtr[2] - - 18 * inpPtr[3] + 128) >> 8) + 128; - - yStartPtr += 2; - yStartPtr2 += 2; - uStartPtr++; - vStartPtr++; - inpPtr += 6; - inpPtr2 += 6; - } // end for w - yStartPtr += dst_ystride + dst_ystride - src_width; - yStartPtr2 += dst_ystride + dst_ystride - src_width; - uStartPtr += dst_ustride + dst_ustride - ((src_width + 1) >> 1); - vStartPtr += dst_vstride + dst_vstride - ((src_width + 1) >> 1); - inpPtr -= 3 * (2 * src_stride + src_width); - inpPtr2 -= 3 * (2 * src_stride + src_width); - } // end for h - return 0; -} - - -int -Convert::ARGBToI420(const uint8* src_frame, int src_stride, - uint8* dst_yplane, int dst_ystride, - uint8* dst_uplane, int dst_ustride, - uint8* dst_vplane, int dst_vstride, - int src_width, int src_height) -{ - if (src_frame == NULL || dst_yplane == NULL || - dst_vplane == NULL || dst_vplane == NULL) - return -1; - - uint8* yStartPtr; - uint8* yStartPtr2; - uint8* uStartPtr; - uint8* vStartPtr; - const uint8* inpPtr; - const uint8* inpPtr2; - int h, w; - - yStartPtr = dst_yplane; - yStartPtr2 = yStartPtr + dst_ystride; - uStartPtr = dst_uplane; - vStartPtr = dst_vplane; - inpPtr = src_frame + src_stride * src_height * 3 - 3 * src_height; - inpPtr2 = inpPtr - 3 * src_stride; - - for (h = 0; h < ((src_height + 1) >> 1); h++ ){ - for (w = 0; w < ((src_width + 1) >> 1); w++){ - // Y - yStartPtr[0] = (uint8)((66 * inpPtr[2] + 129 * inpPtr[1] - + 25 * inpPtr[0] + 128) >> 8) + 16; - yStartPtr2[0] = (uint8)((66 * inpPtr2[2] + 129 * inpPtr2[1] - + 25 * inpPtr2[0] + 128) >> 8) + 16; - // Moving to next column - yStartPtr[1] = (uint8)((66 * inpPtr[6] + 129 * inpPtr[5] - + 25 * inpPtr[4] + 128) >> 8) + 16; - yStartPtr2[1] = (uint8)((66 * inpPtr2[5] + 129 * inpPtr2[4] - + 25 * inpPtr2[3] + 128) >> 8 ) + 16; - // U - uStartPtr[0] = (uint8)((-38 * inpPtr[2] - 74 * inpPtr[1] + - 112 * inpPtr[0] + 128) >> 8) + 128; - // V - vStartPtr[0] = (uint8)((112 * inpPtr[2] -94 * inpPtr[1] - - 18 * inpPtr[0] + 128) >> 8) + 128; - - yStartPtr += 2; - yStartPtr2 += 2; - uStartPtr++; - vStartPtr++; - inpPtr += 6; - inpPtr2 += 6; - } // end for w - yStartPtr += dst_ystride + dst_ystride - src_width; - yStartPtr2 += dst_ystride + dst_ystride - src_width; - uStartPtr += dst_ustride + dst_ustride - ((src_width + 1) >> 1); - vStartPtr += dst_vstride + dst_vstride - ((src_width + 1) >> 1); - inpPtr -= 3 * (2 * src_stride + src_width); - inpPtr2 -= 3 * (2 * src_stride + src_width); - } // end for h - return 0; -} - - -int -Convert::ABGRToI420(const uint8* src_frame, int src_stride, - uint8* dst_yplane, int dst_ystride, - uint8* dst_uplane, int dst_ustride, - uint8* dst_vplane, int dst_vstride, - int src_width, int src_height) -{ - if (src_frame == NULL || dst_yplane == NULL || - dst_vplane == NULL || dst_vplane == NULL){ + dst_vplane == NULL || dst_vplane == NULL) { return -1; } - - uint8* yStartPtr; - uint8* yStartPtr2; - uint8* uStartPtr; - uint8* vStartPtr; - const uint8* inpPtr; - const uint8* inpPtr2; - - yStartPtr = dst_yplane; - yStartPtr2 = yStartPtr + dst_ystride; - uStartPtr = dst_uplane; - vStartPtr = dst_vplane; - inpPtr = src_frame; - inpPtr2 = inpPtr + 4 * src_stride; - int h, w; - - // RGBA in memory - for (h = 0; h < ((src_height + 1) >> 1); h++){ - for (w = 0; w < ((src_width + 1) >> 1); w++){ - // Y - yStartPtr[0] = (uint8)((66 * inpPtr[0] + 129 * inpPtr[1] - + 25 * inpPtr[2] + 128) >> 8) + 16; - yStartPtr2[0] = (uint8)((66 * inpPtr2[0] + 129 * inpPtr2[1] - + 25 * inpPtr2[2] + 128) >> 8) + 16; - // Moving to next column - yStartPtr[1] = (uint8)((66 * inpPtr[4] + 129 * inpPtr[5] - + 25 * inpPtr[6] + 128) >> 8) + 16; - yStartPtr2[1] = (uint8)((66 * inpPtr2[4] + 129 * inpPtr2[5] - + 25 * inpPtr2[6] + 128) >> 8) + 16; - // U - uStartPtr[0] = (uint8)((-38 * inpPtr[0] - 74 * inpPtr[1] - + 112 * inpPtr[2] + 128) >> 8) + 128; - // V - vStartPtr[0] = (uint8)((112 * inpPtr[0] - 94 * inpPtr[1] - - 18 * inpPtr[2] + 128) >> 8) + 128; - - yStartPtr += 2; - yStartPtr2 += 2; - uStartPtr++; - vStartPtr++; - inpPtr += 8; - inpPtr2 += 8; - } - - yStartPtr += 2 * dst_ystride - src_width; - yStartPtr2 += 2 * dst_ystride - src_width; - uStartPtr += dst_ustride + dst_ustride - ((src_width + 1) >> 1); - vStartPtr += dst_vstride + dst_vstride - ((src_width + 1) >> 1); - inpPtr += 4 * (2 * src_stride - src_width); - inpPtr2 += 4 * (2 * src_stride - src_width); + if (src_height < 0) { + src_height = -src_height; + src_frame = src_frame + src_stride * (src_height -1); + src_stride = -src_stride; + } + for (int y = 0; y < src_height - 1; y += 2) { + RGBToI420Row(src_frame, src_frame + src_stride, + dst_yplane, dst_yplane + dst_ystride, + dst_uplane, dst_vplane, + src_width); + src_frame += src_stride * 2; + dst_yplane += dst_ystride * 2; + dst_uplane += dst_ustride; + dst_vplane += dst_vstride; + } + if (src_height & 1) { + RGBToI420Row(src_frame, src_frame, + dst_yplane, dst_yplane, + dst_uplane, dst_vplane, + src_width); } return 0; } +int +ARGBToI420(const uint8* src_frame, int src_stride, + uint8* dst_yplane, int dst_ystride, + uint8* dst_uplane, int dst_ustride, + uint8* dst_vplane, int dst_vstride, + int src_width, int src_height) { + return RGBToI420(src_frame, src_stride, + dst_yplane, dst_ystride, + dst_uplane, dst_ustride, + dst_vplane, dst_vstride, + src_width, src_height, ARGBToI420Row_C); +} + +int +BGRAToI420(const uint8* src_frame, int src_stride, + uint8* dst_yplane, int dst_ystride, + uint8* dst_uplane, int dst_ustride, + uint8* dst_vplane, int dst_vstride, + int src_width, int src_height) { + return RGBToI420(src_frame, src_stride, + dst_yplane, dst_ystride, + dst_uplane, dst_ustride, + dst_vplane, dst_vstride, + src_width, src_height, BGRAToI420Row_C); +} + +int +ABGRToI420(const uint8* src_frame, int src_stride, + uint8* dst_yplane, int dst_ystride, + uint8* dst_uplane, int dst_ustride, + uint8* dst_vplane, int dst_vstride, + int src_width, int src_height) { + return RGBToI420(src_frame, src_stride, + dst_yplane, dst_ystride, + dst_uplane, dst_ustride, + dst_vplane, dst_vstride, + src_width, src_height, ABGRToI420Row_C); +} + +int +RGB24ToI420(const uint8* src_frame, int src_stride, + uint8* dst_yplane, int dst_ystride, + uint8* dst_uplane, int dst_ustride, + uint8* dst_vplane, int dst_vstride, + int src_width, int src_height) { + return RGBToI420(src_frame, src_stride, + dst_yplane, dst_ystride, + dst_uplane, dst_ustride, + dst_vplane, dst_vstride, + src_width, src_height, RGB24ToI420Row_C); +} + +int +RAWToI420(const uint8* src_frame, int src_stride, + uint8* dst_yplane, int dst_ystride, + uint8* dst_uplane, int dst_ustride, + uint8* dst_vplane, int dst_vstride, + int src_width, int src_height) { + return RGBToI420(src_frame, src_stride, + dst_yplane, dst_ystride, + dst_uplane, dst_ustride, + dst_vplane, dst_vstride, + src_width, src_height, RAWToI420Row_C); +} + inline uint8 Clip(int32 val) { @@ -1328,4 +991,3 @@ void #endif } // namespace libyuv - diff --git a/source/format_conversion.cc b/source/format_conversion.cc index a7e1f3c60..828015424 100644 --- a/source/format_conversion.cc +++ b/source/format_conversion.cc @@ -8,9 +8,6 @@ * be found in the AUTHORS file in the root of the source tree. */ - -#include "format_conversion.h" - #include #include "common.h" @@ -19,6 +16,9 @@ namespace libyuv { +// Most code in here is inspired by the material at +// http://www.siliconimaging.com/RGB%20Bayer.htm + enum { RED = 0, BLUE = 1, @@ -274,6 +274,65 @@ static FORCE_INLINE void InterpolateBayerRGBCenter(uint8* r, } } +// Converts any Bayer RGB format to ARGB. +void BayerRGBToARGB(const uint8* src, int src_pitch, uint32 src_fourcc, + uint8* dst, int dst_pitch, + int width, int height) { + assert(width % 2 == 0); + assert(height % 2 == 0); + + uint32 colour_map = FourCcToBayerPixelColourMap(src_fourcc); + int src_row_inc = src_pitch * 2 - width; + int dst_row_inc = dst_pitch * 2 - width * 4; + + // Iterate over the 2x2 grids. + for (int y1 = 0; y1 < height; y1 += 2) { + for (int x1 = 0; x1 < width; x1 += 2) { + uint32 colours = colour_map; + // Iterate over the four pixels within them. + for (int y2 = 0; y2 < 2; ++y2) { + for (int x2 = 0; x2 < 2; ++x2) { + uint8 r, g, b; + // The low-order byte of the colour map is the current colour. + uint8 current_colour = static_cast(colours); + colours >>= 8; + Position pos = GetPosition(x1 + x2, y1 + y2, width, height); + const uint8* src_pixel = &src[y2 * src_pitch + x2]; + const uint8* dst_pixel = &dst[y2 * dst_pitch + x2 * 4]; + + // Convert from Bayer RGB to regular RGB. + if (pos == MIDDLE) { + // 99% of the image is the middle. + InterpolateBayerRGBCenter(&r, &g, &b, + src_pixel, src_pitch, + current_colour); + } else if (pos >= LEFT_EDGE) { + // Next most frequent is edges. + InterpolateBayerRGBEdge(&r, &g, &b, + src_pixel, src_pitch, pos, + current_colour); + } else { + // Last is the corners. There are only 4. + InterpolateBayerRGBCorner(&r, &g, &b, + src_pixel, src_pitch, pos, + current_colour); + } + + // Store ARGB + dst[0] = b; + dst[1] = g; + dst[2] = r; + dst[3] = 255u; + } + } + src += 2; + dst += 2 * 4; + } + src += src_row_inc; + dst += dst_row_inc; + } +} + // Converts any Bayer RGB format to I420. void BayerRGBToI420(const uint8* src, int src_pitch, uint32 src_fourcc, uint8* y, int y_pitch, @@ -430,17 +489,16 @@ static uint32 GenerateSelector(int select0, int select1) { static_cast((select1 + 12) << 24); } -// Converts any 32 bit ARGB to any Bayer RGB format. -void RGB32ToBayerRGB(const uint8* src_rgb, int src_pitch_rgb, - uint32 src_fourcc_rgb, - uint8* dst_bayer, int dst_pitch_bayer, - uint32 dst_fourcc_bayer, - int width, int height) { +// Converts 32 bit ARGB to any Bayer RGB format. +void ARGBToBayerRGB(const uint8* src_rgb, int src_pitch_rgb, + uint8* dst_bayer, int dst_pitch_bayer, + uint32 dst_fourcc_bayer, + int width, int height) { assert(width % 2 == 0); void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer, uint32 selector, int pix); #if defined(HAS_ARGBTOBAYERROW_SSSE3) - if (CpuInfo::TestCpuFlag(CpuInfo::kCpuHasSSSE3) && + if (libyuv::CpuInfo::TestCpuFlag(libyuv::CpuInfo::kCpuHasSSSE3) && (width % 4 == 0) && IS_ALIGNED(src_rgb, 16) && (src_pitch_rgb % 16 == 0) && IS_ALIGNED(dst_bayer, 4) && (dst_pitch_bayer % 4 == 0)) { @@ -451,7 +509,6 @@ void RGB32ToBayerRGB(const uint8* src_rgb, int src_pitch_rgb, ARGBToBayerRow = ARGBToBayerRow_C; } - assert(src_fourcc_rgb == FOURCC_ARGB); int blue_index = 0; int green_index = 1; int red_index = 2; diff --git a/source/planar_functions.cc b/source/planar_functions.cc index f815e7c53..8b0a06111 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -249,13 +249,13 @@ static void I420CopyPlane2(const uint8* src, int src_pitch_0, int src_pitch_1, // Helper function to copy yuv data without scaling. Used // by our jpeg conversion callbacks to incrementally fill a yuv image. -void PlanarFunctions::I420Copy(const uint8* src_y, int src_pitch_y, - const uint8* src_u, int src_pitch_u, - const uint8* src_v, int src_pitch_v, - uint8* dst_y, int dst_pitch_y, - uint8* dst_u, int dst_pitch_u, - uint8* dst_v, int dst_pitch_v, - int width, int height) { +void I420Copy(const uint8* src_y, int src_pitch_y, + const uint8* src_u, int src_pitch_u, + const uint8* src_v, int src_pitch_v, + uint8* dst_y, int dst_pitch_y, + uint8* dst_u, int dst_pitch_u, + uint8* dst_v, int dst_pitch_v, + int width, int height) { // Negative height means invert the image. if (height < 0) { height = -height; @@ -276,13 +276,13 @@ void PlanarFunctions::I420Copy(const uint8* src_y, int src_pitch_y, // Helper function to copy yuv data without scaling. Used // by our jpeg conversion callbacks to incrementally fill a yuv image. -void PlanarFunctions::I422ToI420(const uint8* src_y, int src_pitch_y, - const uint8* src_u, int src_pitch_u, - const uint8* src_v, int src_pitch_v, - uint8* dst_y, int dst_pitch_y, - uint8* dst_u, int dst_pitch_u, - uint8* dst_v, int dst_pitch_v, - int width, int height) { +void I422ToI420(const uint8* src_y, int src_pitch_y, + const uint8* src_u, int src_pitch_u, + const uint8* src_v, int src_pitch_v, + uint8* dst_y, int dst_pitch_y, + uint8* dst_u, int dst_pitch_u, + uint8* dst_v, int dst_pitch_v, + int width, int height) { // Negative height means invert the image. if (height < 0) { height = -height; @@ -399,11 +399,11 @@ static void X420ToI420(const uint8* src_y, } // Convert M420 to I420. -void PlanarFunctions::M420ToI420(const uint8* src_m420, int src_pitch_m420, - uint8* dst_y, int dst_pitch_y, - uint8* dst_u, int dst_pitch_u, - uint8* dst_v, int dst_pitch_v, - int width, int height) { +void M420ToI420(const uint8* src_m420, int src_pitch_m420, + uint8* dst_y, int dst_pitch_y, + uint8* dst_u, int dst_pitch_u, + uint8* dst_v, int dst_pitch_v, + int width, int height) { X420ToI420(src_m420, src_pitch_m420, src_pitch_m420 * 2, src_m420 + src_pitch_m420 * 2, src_pitch_m420 * 3, dst_y, dst_pitch_y, dst_u, dst_pitch_u, dst_v, dst_pitch_v, @@ -411,13 +411,13 @@ void PlanarFunctions::M420ToI420(const uint8* src_m420, int src_pitch_m420, } // Convert NV12 to I420. -void PlanarFunctions::NV12ToI420(const uint8* src_y, - const uint8* src_uv, - int src_pitch, - uint8* dst_y, int dst_pitch_y, - uint8* dst_u, int dst_pitch_u, - uint8* dst_v, int dst_pitch_v, - int width, int height) { +void NV12ToI420(const uint8* src_y, + const uint8* src_uv, + int src_pitch, + uint8* dst_y, int dst_pitch_y, + uint8* dst_u, int dst_pitch_u, + uint8* dst_v, int dst_pitch_v, + int width, int height) { X420ToI420(src_y, src_pitch, src_pitch, src_uv, src_pitch, dst_y, dst_pitch_y, dst_u, dst_pitch_u, dst_v, dst_pitch_v, @@ -543,12 +543,12 @@ static void SplitYUY2_C(const uint8* src_yuy2, // Convert Q420 to I420. // Format is rows of YY/YUYV -void PlanarFunctions::Q420ToI420(const uint8* src_y, int src_pitch_y, - const uint8* src_yuy2, int src_pitch_yuy2, - uint8* dst_y, int dst_pitch_y, - uint8* dst_u, int dst_pitch_u, - uint8* dst_v, int dst_pitch_v, - int width, int height) { +void Q420ToI420(const uint8* src_y, int src_pitch_y, + const uint8* src_yuy2, int src_pitch_yuy2, + uint8* dst_y, int dst_pitch_y, + uint8* dst_u, int dst_pitch_u, + uint8* dst_v, int dst_pitch_v, + int width, int height) { void (*SplitYUY2)(const uint8* src_yuy2, uint8* dst_y, uint8* dst_u, uint8* dst_v, int pix); #if defined(HAS_SPLITYUY2_SSE2) @@ -921,11 +921,11 @@ void UYVYToI420RowY_C(const uint8* src_uyvy, } // Convert YUY2 to I420. -void PlanarFunctions::YUY2ToI420(const uint8* src_yuy2, int src_pitch_yuy2, - uint8* dst_y, int dst_pitch_y, - uint8* dst_u, int dst_pitch_u, - uint8* dst_v, int dst_pitch_v, - int width, int height) { +void YUY2ToI420(const uint8* src_yuy2, int src_pitch_yuy2, + uint8* dst_y, int dst_pitch_y, + uint8* dst_u, int dst_pitch_u, + uint8* dst_v, int dst_pitch_v, + int width, int height) { void (*YUY2ToI420RowUV)(const uint8* src_yuy2, int src_pitch_yuy2, uint8* dst_u, uint8* dst_v, int pix); void (*YUY2ToI420RowY)(const uint8* src_yuy2, @@ -961,11 +961,11 @@ void PlanarFunctions::YUY2ToI420(const uint8* src_yuy2, int src_pitch_yuy2, } // Convert UYVY to I420. -void PlanarFunctions::UYVYToI420(const uint8* src_uyvy, int src_pitch_uyvy, - uint8* dst_y, int dst_pitch_y, - uint8* dst_u, int dst_pitch_u, - uint8* dst_v, int dst_pitch_v, - int width, int height) { +void UYVYToI420(const uint8* src_uyvy, int src_pitch_uyvy, + uint8* dst_y, int dst_pitch_y, + uint8* dst_u, int dst_pitch_u, + uint8* dst_v, int dst_pitch_v, + int width, int height) { void (*UYVYToI420RowUV)(const uint8* src_uyvy, int src_pitch_uyvy, uint8* dst_u, uint8* dst_v, int pix); void (*UYVYToI420RowY)(const uint8* src_uyvy, @@ -1002,11 +1002,11 @@ void PlanarFunctions::UYVYToI420(const uint8* src_uyvy, int src_pitch_uyvy, // Convert I420 to ARGB. // TODO(fbarchard): Add SSSE3 version and supply C version for fallback. -void PlanarFunctions::I420ToARGB(const uint8* src_y, int src_pitch_y, - const uint8* src_u, int src_pitch_u, - const uint8* src_v, int src_pitch_v, - uint8* dst_argb, int dst_pitch_argb, - int width, int height) { +void I420ToARGB(const uint8* src_y, int src_pitch_y, + const uint8* src_u, int src_pitch_u, + const uint8* src_v, int src_pitch_v, + uint8* dst_argb, int dst_pitch_argb, + int width, int height) { for (int y = 0; y < height; ++y) { FastConvertYUVToRGB32Row(src_y, src_u, src_v, dst_argb, width); dst_argb += dst_pitch_argb; @@ -1020,12 +1020,48 @@ void PlanarFunctions::I420ToARGB(const uint8* src_y, int src_pitch_y, EMMS(); } +// Convert I420 to BGRA. +void I420ToBGRA(const uint8* src_y, int src_pitch_y, + const uint8* src_u, int src_pitch_u, + const uint8* src_v, int src_pitch_v, + uint8* dst_argb, int dst_pitch_argb, + int width, int height) { + for (int y = 0; y < height; ++y) { + FastConvertYUVToBGRARow(src_y, src_u, src_v, dst_argb, width); + dst_argb += dst_pitch_argb; + src_y += src_pitch_y; + if (y & 1) { + src_u += src_pitch_u; + src_v += src_pitch_v; + } + } + EMMS(); +} + +// Convert I420 to BGRA. +void I420ToABGR(const uint8* src_y, int src_pitch_y, + const uint8* src_u, int src_pitch_u, + const uint8* src_v, int src_pitch_v, + uint8* dst_argb, int dst_pitch_argb, + int width, int height) { + for (int y = 0; y < height; ++y) { + FastConvertYUVToABGRRow(src_y, src_u, src_v, dst_argb, width); + dst_argb += dst_pitch_argb; + src_y += src_pitch_y; + if (y & 1) { + src_u += src_pitch_u; + src_v += src_pitch_v; + } + } + EMMS(); +} + // Convert I422 to ARGB. -void PlanarFunctions::I422ToARGB(const uint8* src_y, int src_pitch_y, - const uint8* src_u, int src_pitch_u, - const uint8* src_v, int src_pitch_v, - uint8* dst_argb, int dst_pitch_argb, - int width, int height) { +void I422ToARGB(const uint8* src_y, int src_pitch_y, + const uint8* src_u, int src_pitch_u, + const uint8* src_v, int src_pitch_v, + uint8* dst_argb, int dst_pitch_argb, + int width, int height) { for (int y = 0; y < height; ++y) { FastConvertYUVToRGB32Row(src_y, src_u, src_v, dst_argb, width); dst_argb += dst_pitch_argb; @@ -1038,11 +1074,11 @@ void PlanarFunctions::I422ToARGB(const uint8* src_y, int src_pitch_y, } // Convert I444 to ARGB. -void PlanarFunctions::I444ToARGB(const uint8* src_y, int src_pitch_y, - const uint8* src_u, int src_pitch_u, - const uint8* src_v, int src_pitch_v, - uint8* dst_argb, int dst_pitch_argb, - int width, int height) { +void I444ToARGB(const uint8* src_y, int src_pitch_y, + const uint8* src_u, int src_pitch_u, + const uint8* src_v, int src_pitch_v, + uint8* dst_argb, int dst_pitch_argb, + int width, int height) { for (int y = 0; y < height; ++y) { FastConvertYUV444ToRGB32Row(src_y, src_u, src_v, dst_argb, width); dst_argb += dst_pitch_argb; @@ -1055,9 +1091,9 @@ void PlanarFunctions::I444ToARGB(const uint8* src_y, int src_pitch_y, } // Convert I400 to ARGB. -void PlanarFunctions::I400ToARGB_Reference(const uint8* src_y, int src_pitch_y, - uint8* dst_argb, int dst_pitch_argb, - int width, int height) { +void I400ToARGB_Reference(const uint8* src_y, int src_pitch_y, + uint8* dst_argb, int dst_pitch_argb, + int width, int height) { for (int y = 0; y < height; ++y) { FastConvertYToRGB32Row(src_y, dst_argb, width); dst_argb += dst_pitch_argb; @@ -1147,9 +1183,9 @@ static void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int pix) { } // Convert I400 to ARGB. -void PlanarFunctions::I400ToARGB(const uint8* src_y, int src_pitch_y, - uint8* dst_argb, int dst_pitch_argb, - int width, int height) { +void I400ToARGB(const uint8* src_y, int src_pitch_y, + uint8* dst_argb, int dst_pitch_argb, + int width, int height) { void (*I400ToARGBRow)(const uint8* src_y, uint8* dst_argb, int pix); #if defined(HAS_I400TOARGBROW_SSE2) if (libyuv::CpuInfo::TestCpuFlag(libyuv::CpuInfo::kCpuHasSSE2) && @@ -1182,9 +1218,9 @@ static void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int pix) { } // Convert RAW to ARGB. -void PlanarFunctions::RAWToARGB(const uint8* src_raw, int src_pitch_raw, - uint8* dst_argb, int dst_pitch_argb, - int width, int height) { +void RAWToARGB(const uint8* src_raw, int src_pitch_raw, + uint8* dst_argb, int dst_pitch_argb, + int width, int height) { for (int y = 0; y < height; ++y) { RAWToARGBRow_C(src_raw, dst_argb, width); src_raw += src_pitch_raw; @@ -1204,9 +1240,9 @@ static void BG24ToARGBRow_C(const uint8* src_bg24, uint8* dst_argb, int pix) { } // Convert BG24 to ARGB. -void PlanarFunctions::BG24ToARGB(const uint8* src_bg24, int src_pitch_bg24, - uint8* dst_argb, int dst_pitch_argb, - int width, int height) { +void BG24ToARGB(const uint8* src_bg24, int src_pitch_bg24, + uint8* dst_argb, int dst_pitch_argb, + int width, int height) { for (int y = 0; y < height; ++y) { BG24ToARGBRow_C(src_bg24, dst_argb, width); src_bg24 += src_pitch_bg24; @@ -1226,9 +1262,9 @@ static void ABGRToARGBRow_C(const uint8* src_abgr, uint8* dst_argb, int pix) { } // Convert ABGR to ARGB. -void PlanarFunctions::ABGRToARGB(const uint8* src_abgr, int src_pitch_abgr, - uint8* dst_argb, int dst_pitch_argb, - int width, int height) { +void ABGRToARGB(const uint8* src_abgr, int src_pitch_abgr, + uint8* dst_argb, int dst_pitch_argb, + int width, int height) { for (int y = 0; y < height; ++y) { ABGRToARGBRow_C(src_abgr, dst_argb, width); src_abgr += src_pitch_abgr; diff --git a/source/row.h b/source/row.h index 48bb8440e..67119b553 100644 --- a/source/row.h +++ b/source/row.h @@ -14,14 +14,24 @@ #include "basic_types.h" extern "C" { -// Can only do 1x. -// This is the second fastest of the scalers. void FastConvertYUVToRGB32Row(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, uint8* rgb_buf, int width); +void FastConvertYUVToBGRARow(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* rgb_buf, + int width); + +void FastConvertYUVToABGRRow(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* rgb_buf, + int width); + void FastConvertYUV444ToRGB32Row(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, @@ -39,8 +49,12 @@ void FastConvertYToRGB32Row(const uint8* y_buf, #endif #ifdef OSX extern SIMD_ALIGNED(const int16 kCoefficientsRgbY[768][4]); +extern SIMD_ALIGNED(const int16 kCoefficientsBgraY[768][4]); +extern SIMD_ALIGNED(const int16 kCoefficientsAbgrY[768][4]); #else extern SIMD_ALIGNED(const int16 _kCoefficientsRgbY[768][4]); +extern SIMD_ALIGNED(const int16 _kCoefficientsBgraY[768][4]); +extern SIMD_ALIGNED(const int16 _kCoefficientsAbgrY[768][4]); #endif // Method to force C version. diff --git a/source/row_posix.cc b/source/row_posix.cc index 44c89dabd..999e8ce1a 100644 --- a/source/row_posix.cc +++ b/source/row_posix.cc @@ -55,6 +55,84 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf, // rdi ); } +void FastConvertYUVToBGRARow(const uint8* y_buf, // rdi + const uint8* u_buf, // rsi + const uint8* v_buf, // rdx + uint8* rgb_buf, // rcx + int width) { // r8 + asm( +"1:" + "movzb (%1),%%r10\n" + "lea 1(%1),%1\n" + "movzb (%2),%%r11\n" + "lea 1(%2),%2\n" + "movq 2048(%5,%%r10,8),%%xmm0\n" + "movzb (%0),%%r10\n" + "movq 4096(%5,%%r11,8),%%xmm1\n" + "movzb 0x1(%0),%%r11\n" + "paddsw %%xmm1,%%xmm0\n" + "movq (%5,%%r10,8),%%xmm2\n" + "lea 2(%0),%0\n" + "movq (%5,%%r11,8),%%xmm3\n" + "paddsw %%xmm0,%%xmm2\n" + "paddsw %%xmm0,%%xmm3\n" + "shufps $0x44,%%xmm3,%%xmm2\n" + "psraw $0x6,%%xmm2\n" + "packuswb %%xmm2,%%xmm2\n" + "movq %%xmm2,0x0(%3)\n" + "lea 8(%3),%3\n" + "sub $0x2,%4\n" + "ja 1b\n" + : + : "r"(y_buf), // %0 + "r"(u_buf), // %1 + "r"(v_buf), // %2 + "r"(rgb_buf), // %3 + "r"(width), // %4 + "r" (_kCoefficientsBgraY) // %5 + : "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3" +); +} + +void FastConvertYUVToABGRRow(const uint8* y_buf, // rdi + const uint8* u_buf, // rsi + const uint8* v_buf, // rdx + uint8* rgb_buf, // rcx + int width) { // r8 + asm( +"1:" + "movzb (%1),%%r10\n" + "lea 1(%1),%1\n" + "movzb (%2),%%r11\n" + "lea 1(%2),%2\n" + "movq 2048(%5,%%r10,8),%%xmm0\n" + "movzb (%0),%%r10\n" + "movq 4096(%5,%%r11,8),%%xmm1\n" + "movzb 0x1(%0),%%r11\n" + "paddsw %%xmm1,%%xmm0\n" + "movq (%5,%%r10,8),%%xmm2\n" + "lea 2(%0),%0\n" + "movq (%5,%%r11,8),%%xmm3\n" + "paddsw %%xmm0,%%xmm2\n" + "paddsw %%xmm0,%%xmm3\n" + "shufps $0x44,%%xmm3,%%xmm2\n" + "psraw $0x6,%%xmm2\n" + "packuswb %%xmm2,%%xmm2\n" + "movq %%xmm2,0x0(%3)\n" + "lea 8(%3),%3\n" + "sub $0x2,%4\n" + "ja 1b\n" + : + : "r"(y_buf), // %0 + "r"(u_buf), // %1 + "r"(v_buf), // %2 + "r"(rgb_buf), // %3 + "r"(width), // %4 + "r" (_kCoefficientsAbgrY) // %5 + : "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3" +); +} + void FastConvertYUV444ToRGB32Row(const uint8* y_buf, // rdi const uint8* u_buf, // rsi const uint8* v_buf, // rdx @@ -166,6 +244,98 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf, "ret\n" ); +void FastConvertYUVToBGRARow(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* rgb_buf, + int width); + asm( + ".text\n" +#if defined(OSX) || defined(IOS) + ".globl _FastConvertYUVToBGRARow\n" +"_FastConvertYUVToBGRARow:\n" +#else + ".global FastConvertYUVToBGRARow\n" +"FastConvertYUVToBGRARow:\n" +#endif + "pusha\n" + "mov 0x24(%esp),%edx\n" + "mov 0x28(%esp),%edi\n" + "mov 0x2c(%esp),%esi\n" + "mov 0x30(%esp),%ebp\n" + "mov 0x34(%esp),%ecx\n" + +"1:" + "movzbl (%edi),%eax\n" + "lea 1(%edi),%edi\n" + "movzbl (%esi),%ebx\n" + "lea 1(%esi),%esi\n" + "movq _kCoefficientsBgraY+2048(,%eax,8),%mm0\n" + "movzbl (%edx),%eax\n" + "paddsw _kCoefficientsBgraY+4096(,%ebx,8),%mm0\n" + "movzbl 0x1(%edx),%ebx\n" + "movq _kCoefficientsBgraY(,%eax,8),%mm1\n" + "lea 2(%edx),%edx\n" + "movq _kCoefficientsBgraY(,%ebx,8),%mm2\n" + "paddsw %mm0,%mm1\n" + "paddsw %mm0,%mm2\n" + "psraw $0x6,%mm1\n" + "psraw $0x6,%mm2\n" + "packuswb %mm2,%mm1\n" + "movntq %mm1,0x0(%ebp)\n" + "lea 8(%ebp),%ebp\n" + "sub $0x2,%ecx\n" + "ja 1b\n" + "popa\n" + "ret\n" +); + +void FastConvertYUVToABGRRow(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* rgb_buf, + int width); + asm( + ".text\n" +#if defined(OSX) || defined(IOS) + ".globl _FastConvertYUVToABGRRow\n" +"_FastConvertYUVToABGRRow:\n" +#else + ".global FastConvertYUVToABGRRow\n" +"FastConvertYUVToABGRRow:\n" +#endif + "pusha\n" + "mov 0x24(%esp),%edx\n" + "mov 0x28(%esp),%edi\n" + "mov 0x2c(%esp),%esi\n" + "mov 0x30(%esp),%ebp\n" + "mov 0x34(%esp),%ecx\n" + +"1:" + "movzbl (%edi),%eax\n" + "lea 1(%edi),%edi\n" + "movzbl (%esi),%ebx\n" + "lea 1(%esi),%esi\n" + "movq _kCoefficientsAbgrY+2048(,%eax,8),%mm0\n" + "movzbl (%edx),%eax\n" + "paddsw _kCoefficientsAbgrY+4096(,%ebx,8),%mm0\n" + "movzbl 0x1(%edx),%ebx\n" + "movq _kCoefficientsAbgrY(,%eax,8),%mm1\n" + "lea 2(%edx),%edx\n" + "movq _kCoefficientsAbgrY(,%ebx,8),%mm2\n" + "paddsw %mm0,%mm1\n" + "paddsw %mm0,%mm2\n" + "psraw $0x6,%mm1\n" + "psraw $0x6,%mm2\n" + "packuswb %mm2,%mm1\n" + "movntq %mm1,0x0(%ebp)\n" + "lea 8(%ebp),%ebp\n" + "sub $0x2,%ecx\n" + "ja 1b\n" + "popa\n" + "ret\n" +); + void FastConvertYUV444ToRGB32Row(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, @@ -250,7 +420,11 @@ void FastConvertYToRGB32Row(const uint8* y_buf, static inline void YuvPixel(uint8 y, uint8 u, uint8 v, - uint8* rgb_buf) { + uint8* rgb_buf, + int ashift, + int rshift, + int gshift, + int bshift) { int b = _kCoefficientsRgbY[256+u][0]; int g = _kCoefficientsRgbY[256+u][1]; @@ -272,10 +446,10 @@ static inline void YuvPixel(uint8 y, r >>= 6; a >>= 6; - *reinterpret_cast(rgb_buf) = (packuswb(b)) | - (packuswb(g) << 8) | - (packuswb(r) << 16) | - (packuswb(a) << 24); + *reinterpret_cast(rgb_buf) = (packuswb(b) << bshift) | + (packuswb(g) << gshift) | + (packuswb(r) << rshift) | + (packuswb(a) << ashift); } void FastConvertYUVToRGB32Row(const uint8* y_buf, @@ -287,10 +461,46 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf, uint8 u = u_buf[x >> 1]; uint8 v = v_buf[x >> 1]; uint8 y0 = y_buf[x]; - YuvPixel(y0, u, v, rgb_buf); + YuvPixel(y0, u, v, rgb_buf, 24, 16, 8, 0); if ((x + 1) < width) { uint8 y1 = y_buf[x + 1]; - YuvPixel(y1, u, v, rgb_buf + 4); + YuvPixel(y1, u, v, rgb_buf + 4, 24, 16, 8, 0); + } + rgb_buf += 8; // Advance 2 pixels. + } +} + +void FastConvertYUVToBGRARow(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* rgb_buf, + int width) { + for (int x = 0; x < width; x += 2) { + uint8 u = u_buf[x >> 1]; + uint8 v = v_buf[x >> 1]; + uint8 y0 = y_buf[x]; + YuvPixel(y0, u, v, rgb_buf, 0, 8, 16, 24); + if ((x + 1) < width) { + uint8 y1 = y_buf[x + 1]; + YuvPixel(y1, u, v, rgb_buf + 4, 0, 8, 16, 24); + } + rgb_buf += 8; // Advance 2 pixels. + } +} + +void FastConvertYUVToABGRRow(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* rgb_buf, + int width) { + for (int x = 0; x < width; x += 2) { + uint8 u = u_buf[x >> 1]; + uint8 v = v_buf[x >> 1]; + uint8 y0 = y_buf[x]; + YuvPixel(y0, u, v, rgb_buf, 24, 0, 8, 16); + if ((x + 1) < width) { + uint8 y1 = y_buf[x + 1]; + YuvPixel(y1, u, v, rgb_buf + 4, 24, 0, 8, 16); } rgb_buf += 8; // Advance 2 pixels. } diff --git a/source/row_table.cc b/source/row_table.cc index 71ee07308..7ce4a7ebd 100644 --- a/source/row_table.cc +++ b/source/row_table.cc @@ -12,6 +12,203 @@ extern "C" { +#define MAKETABLE(NAME) \ +SIMD_ALIGNED(const int16 NAME[256 * 3][4]) = {\ + RGBY(0x00), RGBY(0x01), RGBY(0x02), RGBY(0x03), \ + RGBY(0x04), RGBY(0x05), RGBY(0x06), RGBY(0x07), \ + RGBY(0x08), RGBY(0x09), RGBY(0x0A), RGBY(0x0B), \ + RGBY(0x0C), RGBY(0x0D), RGBY(0x0E), RGBY(0x0F), \ + RGBY(0x10), RGBY(0x11), RGBY(0x12), RGBY(0x13), \ + RGBY(0x14), RGBY(0x15), RGBY(0x16), RGBY(0x17), \ + RGBY(0x18), RGBY(0x19), RGBY(0x1A), RGBY(0x1B), \ + RGBY(0x1C), RGBY(0x1D), RGBY(0x1E), RGBY(0x1F), \ + RGBY(0x20), RGBY(0x21), RGBY(0x22), RGBY(0x23), \ + RGBY(0x24), RGBY(0x25), RGBY(0x26), RGBY(0x27), \ + RGBY(0x28), RGBY(0x29), RGBY(0x2A), RGBY(0x2B), \ + RGBY(0x2C), RGBY(0x2D), RGBY(0x2E), RGBY(0x2F), \ + RGBY(0x30), RGBY(0x31), RGBY(0x32), RGBY(0x33), \ + RGBY(0x34), RGBY(0x35), RGBY(0x36), RGBY(0x37), \ + RGBY(0x38), RGBY(0x39), RGBY(0x3A), RGBY(0x3B), \ + RGBY(0x3C), RGBY(0x3D), RGBY(0x3E), RGBY(0x3F), \ + RGBY(0x40), RGBY(0x41), RGBY(0x42), RGBY(0x43), \ + RGBY(0x44), RGBY(0x45), RGBY(0x46), RGBY(0x47), \ + RGBY(0x48), RGBY(0x49), RGBY(0x4A), RGBY(0x4B), \ + RGBY(0x4C), RGBY(0x4D), RGBY(0x4E), RGBY(0x4F), \ + RGBY(0x50), RGBY(0x51), RGBY(0x52), RGBY(0x53), \ + RGBY(0x54), RGBY(0x55), RGBY(0x56), RGBY(0x57), \ + RGBY(0x58), RGBY(0x59), RGBY(0x5A), RGBY(0x5B), \ + RGBY(0x5C), RGBY(0x5D), RGBY(0x5E), RGBY(0x5F), \ + RGBY(0x60), RGBY(0x61), RGBY(0x62), RGBY(0x63), \ + RGBY(0x64), RGBY(0x65), RGBY(0x66), RGBY(0x67), \ + RGBY(0x68), RGBY(0x69), RGBY(0x6A), RGBY(0x6B), \ + RGBY(0x6C), RGBY(0x6D), RGBY(0x6E), RGBY(0x6F), \ + RGBY(0x70), RGBY(0x71), RGBY(0x72), RGBY(0x73), \ + RGBY(0x74), RGBY(0x75), RGBY(0x76), RGBY(0x77), \ + RGBY(0x78), RGBY(0x79), RGBY(0x7A), RGBY(0x7B), \ + RGBY(0x7C), RGBY(0x7D), RGBY(0x7E), RGBY(0x7F), \ + RGBY(0x80), RGBY(0x81), RGBY(0x82), RGBY(0x83), \ + RGBY(0x84), RGBY(0x85), RGBY(0x86), RGBY(0x87), \ + RGBY(0x88), RGBY(0x89), RGBY(0x8A), RGBY(0x8B), \ + RGBY(0x8C), RGBY(0x8D), RGBY(0x8E), RGBY(0x8F), \ + RGBY(0x90), RGBY(0x91), RGBY(0x92), RGBY(0x93), \ + RGBY(0x94), RGBY(0x95), RGBY(0x96), RGBY(0x97), \ + RGBY(0x98), RGBY(0x99), RGBY(0x9A), RGBY(0x9B), \ + RGBY(0x9C), RGBY(0x9D), RGBY(0x9E), RGBY(0x9F), \ + RGBY(0xA0), RGBY(0xA1), RGBY(0xA2), RGBY(0xA3), \ + RGBY(0xA4), RGBY(0xA5), RGBY(0xA6), RGBY(0xA7), \ + RGBY(0xA8), RGBY(0xA9), RGBY(0xAA), RGBY(0xAB), \ + RGBY(0xAC), RGBY(0xAD), RGBY(0xAE), RGBY(0xAF), \ + RGBY(0xB0), RGBY(0xB1), RGBY(0xB2), RGBY(0xB3), \ + RGBY(0xB4), RGBY(0xB5), RGBY(0xB6), RGBY(0xB7), \ + RGBY(0xB8), RGBY(0xB9), RGBY(0xBA), RGBY(0xBB), \ + RGBY(0xBC), RGBY(0xBD), RGBY(0xBE), RGBY(0xBF), \ + RGBY(0xC0), RGBY(0xC1), RGBY(0xC2), RGBY(0xC3), \ + RGBY(0xC4), RGBY(0xC5), RGBY(0xC6), RGBY(0xC7), \ + RGBY(0xC8), RGBY(0xC9), RGBY(0xCA), RGBY(0xCB), \ + RGBY(0xCC), RGBY(0xCD), RGBY(0xCE), RGBY(0xCF), \ + RGBY(0xD0), RGBY(0xD1), RGBY(0xD2), RGBY(0xD3), \ + RGBY(0xD4), RGBY(0xD5), RGBY(0xD6), RGBY(0xD7), \ + RGBY(0xD8), RGBY(0xD9), RGBY(0xDA), RGBY(0xDB), \ + RGBY(0xDC), RGBY(0xDD), RGBY(0xDE), RGBY(0xDF), \ + RGBY(0xE0), RGBY(0xE1), RGBY(0xE2), RGBY(0xE3), \ + RGBY(0xE4), RGBY(0xE5), RGBY(0xE6), RGBY(0xE7), \ + RGBY(0xE8), RGBY(0xE9), RGBY(0xEA), RGBY(0xEB), \ + RGBY(0xEC), RGBY(0xED), RGBY(0xEE), RGBY(0xEF), \ + RGBY(0xF0), RGBY(0xF1), RGBY(0xF2), RGBY(0xF3), \ + RGBY(0xF4), RGBY(0xF5), RGBY(0xF6), RGBY(0xF7), \ + RGBY(0xF8), RGBY(0xF9), RGBY(0xFA), RGBY(0xFB), \ + RGBY(0xFC), RGBY(0xFD), RGBY(0xFE), RGBY(0xFF), \ + RGBU(0x00), RGBU(0x01), RGBU(0x02), RGBU(0x03), \ + RGBU(0x04), RGBU(0x05), RGBU(0x06), RGBU(0x07), \ + RGBU(0x08), RGBU(0x09), RGBU(0x0A), RGBU(0x0B), \ + RGBU(0x0C), RGBU(0x0D), RGBU(0x0E), RGBU(0x0F), \ + RGBU(0x10), RGBU(0x11), RGBU(0x12), RGBU(0x13), \ + RGBU(0x14), RGBU(0x15), RGBU(0x16), RGBU(0x17), \ + RGBU(0x18), RGBU(0x19), RGBU(0x1A), RGBU(0x1B), \ + RGBU(0x1C), RGBU(0x1D), RGBU(0x1E), RGBU(0x1F), \ + RGBU(0x20), RGBU(0x21), RGBU(0x22), RGBU(0x23), \ + RGBU(0x24), RGBU(0x25), RGBU(0x26), RGBU(0x27), \ + RGBU(0x28), RGBU(0x29), RGBU(0x2A), RGBU(0x2B), \ + RGBU(0x2C), RGBU(0x2D), RGBU(0x2E), RGBU(0x2F), \ + RGBU(0x30), RGBU(0x31), RGBU(0x32), RGBU(0x33), \ + RGBU(0x34), RGBU(0x35), RGBU(0x36), RGBU(0x37), \ + RGBU(0x38), RGBU(0x39), RGBU(0x3A), RGBU(0x3B), \ + RGBU(0x3C), RGBU(0x3D), RGBU(0x3E), RGBU(0x3F), \ + RGBU(0x40), RGBU(0x41), RGBU(0x42), RGBU(0x43), \ + RGBU(0x44), RGBU(0x45), RGBU(0x46), RGBU(0x47), \ + RGBU(0x48), RGBU(0x49), RGBU(0x4A), RGBU(0x4B), \ + RGBU(0x4C), RGBU(0x4D), RGBU(0x4E), RGBU(0x4F), \ + RGBU(0x50), RGBU(0x51), RGBU(0x52), RGBU(0x53), \ + RGBU(0x54), RGBU(0x55), RGBU(0x56), RGBU(0x57), \ + RGBU(0x58), RGBU(0x59), RGBU(0x5A), RGBU(0x5B), \ + RGBU(0x5C), RGBU(0x5D), RGBU(0x5E), RGBU(0x5F), \ + RGBU(0x60), RGBU(0x61), RGBU(0x62), RGBU(0x63), \ + RGBU(0x64), RGBU(0x65), RGBU(0x66), RGBU(0x67), \ + RGBU(0x68), RGBU(0x69), RGBU(0x6A), RGBU(0x6B), \ + RGBU(0x6C), RGBU(0x6D), RGBU(0x6E), RGBU(0x6F), \ + RGBU(0x70), RGBU(0x71), RGBU(0x72), RGBU(0x73), \ + RGBU(0x74), RGBU(0x75), RGBU(0x76), RGBU(0x77), \ + RGBU(0x78), RGBU(0x79), RGBU(0x7A), RGBU(0x7B), \ + RGBU(0x7C), RGBU(0x7D), RGBU(0x7E), RGBU(0x7F), \ + RGBU(0x80), RGBU(0x81), RGBU(0x82), RGBU(0x83), \ + RGBU(0x84), RGBU(0x85), RGBU(0x86), RGBU(0x87), \ + RGBU(0x88), RGBU(0x89), RGBU(0x8A), RGBU(0x8B), \ + RGBU(0x8C), RGBU(0x8D), RGBU(0x8E), RGBU(0x8F), \ + RGBU(0x90), RGBU(0x91), RGBU(0x92), RGBU(0x93), \ + RGBU(0x94), RGBU(0x95), RGBU(0x96), RGBU(0x97), \ + RGBU(0x98), RGBU(0x99), RGBU(0x9A), RGBU(0x9B), \ + RGBU(0x9C), RGBU(0x9D), RGBU(0x9E), RGBU(0x9F), \ + RGBU(0xA0), RGBU(0xA1), RGBU(0xA2), RGBU(0xA3), \ + RGBU(0xA4), RGBU(0xA5), RGBU(0xA6), RGBU(0xA7), \ + RGBU(0xA8), RGBU(0xA9), RGBU(0xAA), RGBU(0xAB), \ + RGBU(0xAC), RGBU(0xAD), RGBU(0xAE), RGBU(0xAF), \ + RGBU(0xB0), RGBU(0xB1), RGBU(0xB2), RGBU(0xB3), \ + RGBU(0xB4), RGBU(0xB5), RGBU(0xB6), RGBU(0xB7), \ + RGBU(0xB8), RGBU(0xB9), RGBU(0xBA), RGBU(0xBB), \ + RGBU(0xBC), RGBU(0xBD), RGBU(0xBE), RGBU(0xBF), \ + RGBU(0xC0), RGBU(0xC1), RGBU(0xC2), RGBU(0xC3), \ + RGBU(0xC4), RGBU(0xC5), RGBU(0xC6), RGBU(0xC7), \ + RGBU(0xC8), RGBU(0xC9), RGBU(0xCA), RGBU(0xCB), \ + RGBU(0xCC), RGBU(0xCD), RGBU(0xCE), RGBU(0xCF), \ + RGBU(0xD0), RGBU(0xD1), RGBU(0xD2), RGBU(0xD3), \ + RGBU(0xD4), RGBU(0xD5), RGBU(0xD6), RGBU(0xD7), \ + RGBU(0xD8), RGBU(0xD9), RGBU(0xDA), RGBU(0xDB), \ + RGBU(0xDC), RGBU(0xDD), RGBU(0xDE), RGBU(0xDF), \ + RGBU(0xE0), RGBU(0xE1), RGBU(0xE2), RGBU(0xE3), \ + RGBU(0xE4), RGBU(0xE5), RGBU(0xE6), RGBU(0xE7), \ + RGBU(0xE8), RGBU(0xE9), RGBU(0xEA), RGBU(0xEB), \ + RGBU(0xEC), RGBU(0xED), RGBU(0xEE), RGBU(0xEF), \ + RGBU(0xF0), RGBU(0xF1), RGBU(0xF2), RGBU(0xF3), \ + RGBU(0xF4), RGBU(0xF5), RGBU(0xF6), RGBU(0xF7), \ + RGBU(0xF8), RGBU(0xF9), RGBU(0xFA), RGBU(0xFB), \ + RGBU(0xFC), RGBU(0xFD), RGBU(0xFE), RGBU(0xFF), \ + RGBV(0x00), RGBV(0x01), RGBV(0x02), RGBV(0x03), \ + RGBV(0x04), RGBV(0x05), RGBV(0x06), RGBV(0x07), \ + RGBV(0x08), RGBV(0x09), RGBV(0x0A), RGBV(0x0B), \ + RGBV(0x0C), RGBV(0x0D), RGBV(0x0E), RGBV(0x0F), \ + RGBV(0x10), RGBV(0x11), RGBV(0x12), RGBV(0x13), \ + RGBV(0x14), RGBV(0x15), RGBV(0x16), RGBV(0x17), \ + RGBV(0x18), RGBV(0x19), RGBV(0x1A), RGBV(0x1B), \ + RGBV(0x1C), RGBV(0x1D), RGBV(0x1E), RGBV(0x1F), \ + RGBV(0x20), RGBV(0x21), RGBV(0x22), RGBV(0x23), \ + RGBV(0x24), RGBV(0x25), RGBV(0x26), RGBV(0x27), \ + RGBV(0x28), RGBV(0x29), RGBV(0x2A), RGBV(0x2B), \ + RGBV(0x2C), RGBV(0x2D), RGBV(0x2E), RGBV(0x2F), \ + RGBV(0x30), RGBV(0x31), RGBV(0x32), RGBV(0x33), \ + RGBV(0x34), RGBV(0x35), RGBV(0x36), RGBV(0x37), \ + RGBV(0x38), RGBV(0x39), RGBV(0x3A), RGBV(0x3B), \ + RGBV(0x3C), RGBV(0x3D), RGBV(0x3E), RGBV(0x3F), \ + RGBV(0x40), RGBV(0x41), RGBV(0x42), RGBV(0x43), \ + RGBV(0x44), RGBV(0x45), RGBV(0x46), RGBV(0x47), \ + RGBV(0x48), RGBV(0x49), RGBV(0x4A), RGBV(0x4B), \ + RGBV(0x4C), RGBV(0x4D), RGBV(0x4E), RGBV(0x4F), \ + RGBV(0x50), RGBV(0x51), RGBV(0x52), RGBV(0x53), \ + RGBV(0x54), RGBV(0x55), RGBV(0x56), RGBV(0x57), \ + RGBV(0x58), RGBV(0x59), RGBV(0x5A), RGBV(0x5B), \ + RGBV(0x5C), RGBV(0x5D), RGBV(0x5E), RGBV(0x5F), \ + RGBV(0x60), RGBV(0x61), RGBV(0x62), RGBV(0x63), \ + RGBV(0x64), RGBV(0x65), RGBV(0x66), RGBV(0x67), \ + RGBV(0x68), RGBV(0x69), RGBV(0x6A), RGBV(0x6B), \ + RGBV(0x6C), RGBV(0x6D), RGBV(0x6E), RGBV(0x6F), \ + RGBV(0x70), RGBV(0x71), RGBV(0x72), RGBV(0x73), \ + RGBV(0x74), RGBV(0x75), RGBV(0x76), RGBV(0x77), \ + RGBV(0x78), RGBV(0x79), RGBV(0x7A), RGBV(0x7B), \ + RGBV(0x7C), RGBV(0x7D), RGBV(0x7E), RGBV(0x7F), \ + RGBV(0x80), RGBV(0x81), RGBV(0x82), RGBV(0x83), \ + RGBV(0x84), RGBV(0x85), RGBV(0x86), RGBV(0x87), \ + RGBV(0x88), RGBV(0x89), RGBV(0x8A), RGBV(0x8B), \ + RGBV(0x8C), RGBV(0x8D), RGBV(0x8E), RGBV(0x8F), \ + RGBV(0x90), RGBV(0x91), RGBV(0x92), RGBV(0x93), \ + RGBV(0x94), RGBV(0x95), RGBV(0x96), RGBV(0x97), \ + RGBV(0x98), RGBV(0x99), RGBV(0x9A), RGBV(0x9B), \ + RGBV(0x9C), RGBV(0x9D), RGBV(0x9E), RGBV(0x9F), \ + RGBV(0xA0), RGBV(0xA1), RGBV(0xA2), RGBV(0xA3), \ + RGBV(0xA4), RGBV(0xA5), RGBV(0xA6), RGBV(0xA7), \ + RGBV(0xA8), RGBV(0xA9), RGBV(0xAA), RGBV(0xAB), \ + RGBV(0xAC), RGBV(0xAD), RGBV(0xAE), RGBV(0xAF), \ + RGBV(0xB0), RGBV(0xB1), RGBV(0xB2), RGBV(0xB3), \ + RGBV(0xB4), RGBV(0xB5), RGBV(0xB6), RGBV(0xB7), \ + RGBV(0xB8), RGBV(0xB9), RGBV(0xBA), RGBV(0xBB), \ + RGBV(0xBC), RGBV(0xBD), RGBV(0xBE), RGBV(0xBF), \ + RGBV(0xC0), RGBV(0xC1), RGBV(0xC2), RGBV(0xC3), \ + RGBV(0xC4), RGBV(0xC5), RGBV(0xC6), RGBV(0xC7), \ + RGBV(0xC8), RGBV(0xC9), RGBV(0xCA), RGBV(0xCB), \ + RGBV(0xCC), RGBV(0xCD), RGBV(0xCE), RGBV(0xCF), \ + RGBV(0xD0), RGBV(0xD1), RGBV(0xD2), RGBV(0xD3), \ + RGBV(0xD4), RGBV(0xD5), RGBV(0xD6), RGBV(0xD7), \ + RGBV(0xD8), RGBV(0xD9), RGBV(0xDA), RGBV(0xDB), \ + RGBV(0xDC), RGBV(0xDD), RGBV(0xDE), RGBV(0xDF), \ + RGBV(0xE0), RGBV(0xE1), RGBV(0xE2), RGBV(0xE3), \ + RGBV(0xE4), RGBV(0xE5), RGBV(0xE6), RGBV(0xE7), \ + RGBV(0xE8), RGBV(0xE9), RGBV(0xEA), RGBV(0xEB), \ + RGBV(0xEC), RGBV(0xED), RGBV(0xEE), RGBV(0xEF), \ + RGBV(0xF0), RGBV(0xF1), RGBV(0xF2), RGBV(0xF3), \ + RGBV(0xF4), RGBV(0xF5), RGBV(0xF6), RGBV(0xF7), \ + RGBV(0xF8), RGBV(0xF9), RGBV(0xFA), RGBV(0xFB), \ + RGBV(0xFC), RGBV(0xFD), RGBV(0xFE), RGBV(0xFF), \ +}; + +// ARGB table #define RGBY(i) { \ static_cast(1.164 * 64 * (i - 16) + 0.5), \ static_cast(1.164 * 64 * (i - 16) + 0.5), \ @@ -34,212 +231,74 @@ extern "C" { } #ifdef OSX -SIMD_ALIGNED(const int16 kCoefficientsRgbY[256 * 3][4]) = { +MAKETABLE(kCoefficientsRgbY) #else -SIMD_ALIGNED(const int16 _kCoefficientsRgbY[256 * 3][4]) = { +MAKETABLE(_kCoefficientsRgbY) #endif - // Luminance table. - RGBY(0x00), RGBY(0x01), RGBY(0x02), RGBY(0x03), - RGBY(0x04), RGBY(0x05), RGBY(0x06), RGBY(0x07), - RGBY(0x08), RGBY(0x09), RGBY(0x0A), RGBY(0x0B), - RGBY(0x0C), RGBY(0x0D), RGBY(0x0E), RGBY(0x0F), - RGBY(0x10), RGBY(0x11), RGBY(0x12), RGBY(0x13), - RGBY(0x14), RGBY(0x15), RGBY(0x16), RGBY(0x17), - RGBY(0x18), RGBY(0x19), RGBY(0x1A), RGBY(0x1B), - RGBY(0x1C), RGBY(0x1D), RGBY(0x1E), RGBY(0x1F), - RGBY(0x20), RGBY(0x21), RGBY(0x22), RGBY(0x23), - RGBY(0x24), RGBY(0x25), RGBY(0x26), RGBY(0x27), - RGBY(0x28), RGBY(0x29), RGBY(0x2A), RGBY(0x2B), - RGBY(0x2C), RGBY(0x2D), RGBY(0x2E), RGBY(0x2F), - RGBY(0x30), RGBY(0x31), RGBY(0x32), RGBY(0x33), - RGBY(0x34), RGBY(0x35), RGBY(0x36), RGBY(0x37), - RGBY(0x38), RGBY(0x39), RGBY(0x3A), RGBY(0x3B), - RGBY(0x3C), RGBY(0x3D), RGBY(0x3E), RGBY(0x3F), - RGBY(0x40), RGBY(0x41), RGBY(0x42), RGBY(0x43), - RGBY(0x44), RGBY(0x45), RGBY(0x46), RGBY(0x47), - RGBY(0x48), RGBY(0x49), RGBY(0x4A), RGBY(0x4B), - RGBY(0x4C), RGBY(0x4D), RGBY(0x4E), RGBY(0x4F), - RGBY(0x50), RGBY(0x51), RGBY(0x52), RGBY(0x53), - RGBY(0x54), RGBY(0x55), RGBY(0x56), RGBY(0x57), - RGBY(0x58), RGBY(0x59), RGBY(0x5A), RGBY(0x5B), - RGBY(0x5C), RGBY(0x5D), RGBY(0x5E), RGBY(0x5F), - RGBY(0x60), RGBY(0x61), RGBY(0x62), RGBY(0x63), - RGBY(0x64), RGBY(0x65), RGBY(0x66), RGBY(0x67), - RGBY(0x68), RGBY(0x69), RGBY(0x6A), RGBY(0x6B), - RGBY(0x6C), RGBY(0x6D), RGBY(0x6E), RGBY(0x6F), - RGBY(0x70), RGBY(0x71), RGBY(0x72), RGBY(0x73), - RGBY(0x74), RGBY(0x75), RGBY(0x76), RGBY(0x77), - RGBY(0x78), RGBY(0x79), RGBY(0x7A), RGBY(0x7B), - RGBY(0x7C), RGBY(0x7D), RGBY(0x7E), RGBY(0x7F), - RGBY(0x80), RGBY(0x81), RGBY(0x82), RGBY(0x83), - RGBY(0x84), RGBY(0x85), RGBY(0x86), RGBY(0x87), - RGBY(0x88), RGBY(0x89), RGBY(0x8A), RGBY(0x8B), - RGBY(0x8C), RGBY(0x8D), RGBY(0x8E), RGBY(0x8F), - RGBY(0x90), RGBY(0x91), RGBY(0x92), RGBY(0x93), - RGBY(0x94), RGBY(0x95), RGBY(0x96), RGBY(0x97), - RGBY(0x98), RGBY(0x99), RGBY(0x9A), RGBY(0x9B), - RGBY(0x9C), RGBY(0x9D), RGBY(0x9E), RGBY(0x9F), - RGBY(0xA0), RGBY(0xA1), RGBY(0xA2), RGBY(0xA3), - RGBY(0xA4), RGBY(0xA5), RGBY(0xA6), RGBY(0xA7), - RGBY(0xA8), RGBY(0xA9), RGBY(0xAA), RGBY(0xAB), - RGBY(0xAC), RGBY(0xAD), RGBY(0xAE), RGBY(0xAF), - RGBY(0xB0), RGBY(0xB1), RGBY(0xB2), RGBY(0xB3), - RGBY(0xB4), RGBY(0xB5), RGBY(0xB6), RGBY(0xB7), - RGBY(0xB8), RGBY(0xB9), RGBY(0xBA), RGBY(0xBB), - RGBY(0xBC), RGBY(0xBD), RGBY(0xBE), RGBY(0xBF), - RGBY(0xC0), RGBY(0xC1), RGBY(0xC2), RGBY(0xC3), - RGBY(0xC4), RGBY(0xC5), RGBY(0xC6), RGBY(0xC7), - RGBY(0xC8), RGBY(0xC9), RGBY(0xCA), RGBY(0xCB), - RGBY(0xCC), RGBY(0xCD), RGBY(0xCE), RGBY(0xCF), - RGBY(0xD0), RGBY(0xD1), RGBY(0xD2), RGBY(0xD3), - RGBY(0xD4), RGBY(0xD5), RGBY(0xD6), RGBY(0xD7), - RGBY(0xD8), RGBY(0xD9), RGBY(0xDA), RGBY(0xDB), - RGBY(0xDC), RGBY(0xDD), RGBY(0xDE), RGBY(0xDF), - RGBY(0xE0), RGBY(0xE1), RGBY(0xE2), RGBY(0xE3), - RGBY(0xE4), RGBY(0xE5), RGBY(0xE6), RGBY(0xE7), - RGBY(0xE8), RGBY(0xE9), RGBY(0xEA), RGBY(0xEB), - RGBY(0xEC), RGBY(0xED), RGBY(0xEE), RGBY(0xEF), - RGBY(0xF0), RGBY(0xF1), RGBY(0xF2), RGBY(0xF3), - RGBY(0xF4), RGBY(0xF5), RGBY(0xF6), RGBY(0xF7), - RGBY(0xF8), RGBY(0xF9), RGBY(0xFA), RGBY(0xFB), - RGBY(0xFC), RGBY(0xFD), RGBY(0xFE), RGBY(0xFF), - - // Chroma U table. - RGBU(0x00), RGBU(0x01), RGBU(0x02), RGBU(0x03), - RGBU(0x04), RGBU(0x05), RGBU(0x06), RGBU(0x07), - RGBU(0x08), RGBU(0x09), RGBU(0x0A), RGBU(0x0B), - RGBU(0x0C), RGBU(0x0D), RGBU(0x0E), RGBU(0x0F), - RGBU(0x10), RGBU(0x11), RGBU(0x12), RGBU(0x13), - RGBU(0x14), RGBU(0x15), RGBU(0x16), RGBU(0x17), - RGBU(0x18), RGBU(0x19), RGBU(0x1A), RGBU(0x1B), - RGBU(0x1C), RGBU(0x1D), RGBU(0x1E), RGBU(0x1F), - RGBU(0x20), RGBU(0x21), RGBU(0x22), RGBU(0x23), - RGBU(0x24), RGBU(0x25), RGBU(0x26), RGBU(0x27), - RGBU(0x28), RGBU(0x29), RGBU(0x2A), RGBU(0x2B), - RGBU(0x2C), RGBU(0x2D), RGBU(0x2E), RGBU(0x2F), - RGBU(0x30), RGBU(0x31), RGBU(0x32), RGBU(0x33), - RGBU(0x34), RGBU(0x35), RGBU(0x36), RGBU(0x37), - RGBU(0x38), RGBU(0x39), RGBU(0x3A), RGBU(0x3B), - RGBU(0x3C), RGBU(0x3D), RGBU(0x3E), RGBU(0x3F), - RGBU(0x40), RGBU(0x41), RGBU(0x42), RGBU(0x43), - RGBU(0x44), RGBU(0x45), RGBU(0x46), RGBU(0x47), - RGBU(0x48), RGBU(0x49), RGBU(0x4A), RGBU(0x4B), - RGBU(0x4C), RGBU(0x4D), RGBU(0x4E), RGBU(0x4F), - RGBU(0x50), RGBU(0x51), RGBU(0x52), RGBU(0x53), - RGBU(0x54), RGBU(0x55), RGBU(0x56), RGBU(0x57), - RGBU(0x58), RGBU(0x59), RGBU(0x5A), RGBU(0x5B), - RGBU(0x5C), RGBU(0x5D), RGBU(0x5E), RGBU(0x5F), - RGBU(0x60), RGBU(0x61), RGBU(0x62), RGBU(0x63), - RGBU(0x64), RGBU(0x65), RGBU(0x66), RGBU(0x67), - RGBU(0x68), RGBU(0x69), RGBU(0x6A), RGBU(0x6B), - RGBU(0x6C), RGBU(0x6D), RGBU(0x6E), RGBU(0x6F), - RGBU(0x70), RGBU(0x71), RGBU(0x72), RGBU(0x73), - RGBU(0x74), RGBU(0x75), RGBU(0x76), RGBU(0x77), - RGBU(0x78), RGBU(0x79), RGBU(0x7A), RGBU(0x7B), - RGBU(0x7C), RGBU(0x7D), RGBU(0x7E), RGBU(0x7F), - RGBU(0x80), RGBU(0x81), RGBU(0x82), RGBU(0x83), - RGBU(0x84), RGBU(0x85), RGBU(0x86), RGBU(0x87), - RGBU(0x88), RGBU(0x89), RGBU(0x8A), RGBU(0x8B), - RGBU(0x8C), RGBU(0x8D), RGBU(0x8E), RGBU(0x8F), - RGBU(0x90), RGBU(0x91), RGBU(0x92), RGBU(0x93), - RGBU(0x94), RGBU(0x95), RGBU(0x96), RGBU(0x97), - RGBU(0x98), RGBU(0x99), RGBU(0x9A), RGBU(0x9B), - RGBU(0x9C), RGBU(0x9D), RGBU(0x9E), RGBU(0x9F), - RGBU(0xA0), RGBU(0xA1), RGBU(0xA2), RGBU(0xA3), - RGBU(0xA4), RGBU(0xA5), RGBU(0xA6), RGBU(0xA7), - RGBU(0xA8), RGBU(0xA9), RGBU(0xAA), RGBU(0xAB), - RGBU(0xAC), RGBU(0xAD), RGBU(0xAE), RGBU(0xAF), - RGBU(0xB0), RGBU(0xB1), RGBU(0xB2), RGBU(0xB3), - RGBU(0xB4), RGBU(0xB5), RGBU(0xB6), RGBU(0xB7), - RGBU(0xB8), RGBU(0xB9), RGBU(0xBA), RGBU(0xBB), - RGBU(0xBC), RGBU(0xBD), RGBU(0xBE), RGBU(0xBF), - RGBU(0xC0), RGBU(0xC1), RGBU(0xC2), RGBU(0xC3), - RGBU(0xC4), RGBU(0xC5), RGBU(0xC6), RGBU(0xC7), - RGBU(0xC8), RGBU(0xC9), RGBU(0xCA), RGBU(0xCB), - RGBU(0xCC), RGBU(0xCD), RGBU(0xCE), RGBU(0xCF), - RGBU(0xD0), RGBU(0xD1), RGBU(0xD2), RGBU(0xD3), - RGBU(0xD4), RGBU(0xD5), RGBU(0xD6), RGBU(0xD7), - RGBU(0xD8), RGBU(0xD9), RGBU(0xDA), RGBU(0xDB), - RGBU(0xDC), RGBU(0xDD), RGBU(0xDE), RGBU(0xDF), - RGBU(0xE0), RGBU(0xE1), RGBU(0xE2), RGBU(0xE3), - RGBU(0xE4), RGBU(0xE5), RGBU(0xE6), RGBU(0xE7), - RGBU(0xE8), RGBU(0xE9), RGBU(0xEA), RGBU(0xEB), - RGBU(0xEC), RGBU(0xED), RGBU(0xEE), RGBU(0xEF), - RGBU(0xF0), RGBU(0xF1), RGBU(0xF2), RGBU(0xF3), - RGBU(0xF4), RGBU(0xF5), RGBU(0xF6), RGBU(0xF7), - RGBU(0xF8), RGBU(0xF9), RGBU(0xFA), RGBU(0xFB), - RGBU(0xFC), RGBU(0xFD), RGBU(0xFE), RGBU(0xFF), - - // Chroma V table. - RGBV(0x00), RGBV(0x01), RGBV(0x02), RGBV(0x03), - RGBV(0x04), RGBV(0x05), RGBV(0x06), RGBV(0x07), - RGBV(0x08), RGBV(0x09), RGBV(0x0A), RGBV(0x0B), - RGBV(0x0C), RGBV(0x0D), RGBV(0x0E), RGBV(0x0F), - RGBV(0x10), RGBV(0x11), RGBV(0x12), RGBV(0x13), - RGBV(0x14), RGBV(0x15), RGBV(0x16), RGBV(0x17), - RGBV(0x18), RGBV(0x19), RGBV(0x1A), RGBV(0x1B), - RGBV(0x1C), RGBV(0x1D), RGBV(0x1E), RGBV(0x1F), - RGBV(0x20), RGBV(0x21), RGBV(0x22), RGBV(0x23), - RGBV(0x24), RGBV(0x25), RGBV(0x26), RGBV(0x27), - RGBV(0x28), RGBV(0x29), RGBV(0x2A), RGBV(0x2B), - RGBV(0x2C), RGBV(0x2D), RGBV(0x2E), RGBV(0x2F), - RGBV(0x30), RGBV(0x31), RGBV(0x32), RGBV(0x33), - RGBV(0x34), RGBV(0x35), RGBV(0x36), RGBV(0x37), - RGBV(0x38), RGBV(0x39), RGBV(0x3A), RGBV(0x3B), - RGBV(0x3C), RGBV(0x3D), RGBV(0x3E), RGBV(0x3F), - RGBV(0x40), RGBV(0x41), RGBV(0x42), RGBV(0x43), - RGBV(0x44), RGBV(0x45), RGBV(0x46), RGBV(0x47), - RGBV(0x48), RGBV(0x49), RGBV(0x4A), RGBV(0x4B), - RGBV(0x4C), RGBV(0x4D), RGBV(0x4E), RGBV(0x4F), - RGBV(0x50), RGBV(0x51), RGBV(0x52), RGBV(0x53), - RGBV(0x54), RGBV(0x55), RGBV(0x56), RGBV(0x57), - RGBV(0x58), RGBV(0x59), RGBV(0x5A), RGBV(0x5B), - RGBV(0x5C), RGBV(0x5D), RGBV(0x5E), RGBV(0x5F), - RGBV(0x60), RGBV(0x61), RGBV(0x62), RGBV(0x63), - RGBV(0x64), RGBV(0x65), RGBV(0x66), RGBV(0x67), - RGBV(0x68), RGBV(0x69), RGBV(0x6A), RGBV(0x6B), - RGBV(0x6C), RGBV(0x6D), RGBV(0x6E), RGBV(0x6F), - RGBV(0x70), RGBV(0x71), RGBV(0x72), RGBV(0x73), - RGBV(0x74), RGBV(0x75), RGBV(0x76), RGBV(0x77), - RGBV(0x78), RGBV(0x79), RGBV(0x7A), RGBV(0x7B), - RGBV(0x7C), RGBV(0x7D), RGBV(0x7E), RGBV(0x7F), - RGBV(0x80), RGBV(0x81), RGBV(0x82), RGBV(0x83), - RGBV(0x84), RGBV(0x85), RGBV(0x86), RGBV(0x87), - RGBV(0x88), RGBV(0x89), RGBV(0x8A), RGBV(0x8B), - RGBV(0x8C), RGBV(0x8D), RGBV(0x8E), RGBV(0x8F), - RGBV(0x90), RGBV(0x91), RGBV(0x92), RGBV(0x93), - RGBV(0x94), RGBV(0x95), RGBV(0x96), RGBV(0x97), - RGBV(0x98), RGBV(0x99), RGBV(0x9A), RGBV(0x9B), - RGBV(0x9C), RGBV(0x9D), RGBV(0x9E), RGBV(0x9F), - RGBV(0xA0), RGBV(0xA1), RGBV(0xA2), RGBV(0xA3), - RGBV(0xA4), RGBV(0xA5), RGBV(0xA6), RGBV(0xA7), - RGBV(0xA8), RGBV(0xA9), RGBV(0xAA), RGBV(0xAB), - RGBV(0xAC), RGBV(0xAD), RGBV(0xAE), RGBV(0xAF), - RGBV(0xB0), RGBV(0xB1), RGBV(0xB2), RGBV(0xB3), - RGBV(0xB4), RGBV(0xB5), RGBV(0xB6), RGBV(0xB7), - RGBV(0xB8), RGBV(0xB9), RGBV(0xBA), RGBV(0xBB), - RGBV(0xBC), RGBV(0xBD), RGBV(0xBE), RGBV(0xBF), - RGBV(0xC0), RGBV(0xC1), RGBV(0xC2), RGBV(0xC3), - RGBV(0xC4), RGBV(0xC5), RGBV(0xC6), RGBV(0xC7), - RGBV(0xC8), RGBV(0xC9), RGBV(0xCA), RGBV(0xCB), - RGBV(0xCC), RGBV(0xCD), RGBV(0xCE), RGBV(0xCF), - RGBV(0xD0), RGBV(0xD1), RGBV(0xD2), RGBV(0xD3), - RGBV(0xD4), RGBV(0xD5), RGBV(0xD6), RGBV(0xD7), - RGBV(0xD8), RGBV(0xD9), RGBV(0xDA), RGBV(0xDB), - RGBV(0xDC), RGBV(0xDD), RGBV(0xDE), RGBV(0xDF), - RGBV(0xE0), RGBV(0xE1), RGBV(0xE2), RGBV(0xE3), - RGBV(0xE4), RGBV(0xE5), RGBV(0xE6), RGBV(0xE7), - RGBV(0xE8), RGBV(0xE9), RGBV(0xEA), RGBV(0xEB), - RGBV(0xEC), RGBV(0xED), RGBV(0xEE), RGBV(0xEF), - RGBV(0xF0), RGBV(0xF1), RGBV(0xF2), RGBV(0xF3), - RGBV(0xF4), RGBV(0xF5), RGBV(0xF6), RGBV(0xF7), - RGBV(0xF8), RGBV(0xF9), RGBV(0xFA), RGBV(0xFB), - RGBV(0xFC), RGBV(0xFD), RGBV(0xFE), RGBV(0xFF), -}; - #undef RGBY #undef RGBU #undef RGBV +// BGRA table +#define RGBY(i) { \ + static_cast(256 * 64 - 1), \ + static_cast(1.164 * 64 * (i - 16) + 0.5), \ + static_cast(1.164 * 64 * (i - 16) + 0.5), \ + static_cast(1.164 * 64 * (i - 16) + 0.5) \ +} + +#define RGBU(i) { \ + 0, \ + 0, \ + static_cast(-0.391 * 64 * (i - 128) + 0.5), \ + static_cast(2.018 * 64 * (i - 128) + 0.5) \ +} + +#define RGBV(i) { \ + 0, \ + static_cast(1.596 * 64 * (i - 128) + 0.5), \ + static_cast(-0.813 * 64 * (i - 128) + 0.5), \ + 0 \ +} + +#ifdef OSX +MAKETABLE(kCoefficientsBgraY) +#else +MAKETABLE(_kCoefficientsBgraY) +#endif + + +#undef RGBY +#undef RGBU +#undef RGBV + +// ABGR table +#define RGBY(i) { \ + static_cast(1.164 * 64 * (i - 16) + 0.5), \ + static_cast(1.164 * 64 * (i - 16) + 0.5), \ + static_cast(1.164 * 64 * (i - 16) + 0.5), \ + static_cast(256 * 64 - 1) \ +} + +#define RGBU(i) { \ + 0, \ + static_cast(-0.391 * 64 * (i - 128) + 0.5), \ + static_cast(2.018 * 64 * (i - 128) + 0.5), \ + 0 \ +} + +#define RGBV(i) { \ + static_cast(1.596 * 64 * (i - 128) + 0.5), \ + static_cast(-0.813 * 64 * (i - 128) + 0.5), \ + 0, \ + 0 \ +} + +#ifdef OSX +MAKETABLE(kCoefficientsAbgrY) +#else +MAKETABLE(_kCoefficientsAbgrY) +#endif + } // extern "C" diff --git a/source/row_win.cc b/source/row_win.cc index e26cc9934..f127f4835 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -7,13 +7,8 @@ * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ - - -#include "row.h" -#define kCoefficientsRgbY _kCoefficientsRgbY + 0 -#define kCoefficientsRgbU _kCoefficientsRgbY + 2048 -#define kCoefficientsRgbV _kCoefficientsRgbY + 4096 +#include "row.h" extern "C" { @@ -25,24 +20,106 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf, int width) { __asm { pushad - mov edx, [esp + 32 + 4] // Y - mov edi, [esp + 32 + 8] // U - mov esi, [esp + 32 + 12] // V - mov ebp, [esp + 32 + 16] // rgb - mov ecx, [esp + 32 + 20] // width + mov edx, [esp + 32 + 4] + mov edi, [esp + 32 + 8] + mov esi, [esp + 32 + 12] + mov ebp, [esp + 32 + 16] + mov ecx, [esp + 32 + 20] convertloop : movzx eax, byte ptr [edi] lea edi, [edi + 1] movzx ebx, byte ptr [esi] lea esi, [esi + 1] - movq mm0, [kCoefficientsRgbU + 8 * eax] + movq mm0, [_kCoefficientsRgbY + 2048 + 8 * eax] movzx eax, byte ptr [edx] - paddsw mm0, [kCoefficientsRgbV + 8 * ebx] + paddsw mm0, [_kCoefficientsRgbY + 4096 + 8 * ebx] movzx ebx, byte ptr [edx + 1] - movq mm1, [kCoefficientsRgbY + 8 * eax] + movq mm1, [_kCoefficientsRgbY + 8 * eax] lea edx, [edx + 2] - movq mm2, [kCoefficientsRgbY + 8 * ebx] + movq mm2, [_kCoefficientsRgbY + 8 * ebx] + paddsw mm1, mm0 + paddsw mm2, mm0 + psraw mm1, 6 + psraw mm2, 6 + packuswb mm1, mm2 + movntq [ebp], mm1 + lea ebp, [ebp + 8] + sub ecx, 2 + ja convertloop + + popad + ret + } +} + +__declspec(naked) +void FastConvertYUVToBGRARow(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* rgb_buf, + int width) { + __asm { + pushad + mov edx, [esp + 32 + 4] + mov edi, [esp + 32 + 8] + mov esi, [esp + 32 + 12] + mov ebp, [esp + 32 + 16] + mov ecx, [esp + 32 + 20] + + convertloop : + movzx eax, byte ptr [edi] + lea edi, [edi + 1] + movzx ebx, byte ptr [esi] + lea esi, [esi + 1] + movq mm0, [_kCoefficientsBgraY + 2048 + 8 * eax] + movzx eax, byte ptr [edx] + paddsw mm0, [_kCoefficientsBgraY + 4096 + 8 * ebx] + movzx ebx, byte ptr [edx + 1] + movq mm1, [_kCoefficientsBgraY + 8 * eax] + lea edx, [edx + 2] + movq mm2, [_kCoefficientsBgraY + 8 * ebx] + paddsw mm1, mm0 + paddsw mm2, mm0 + psraw mm1, 6 + psraw mm2, 6 + packuswb mm1, mm2 + movntq [ebp], mm1 + lea ebp, [ebp + 8] + sub ecx, 2 + ja convertloop + + popad + ret + } +} + +__declspec(naked) +void FastConvertYUVToABGRRow(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* rgb_buf, + int width) { + __asm { + pushad + mov edx, [esp + 32 + 4] + mov edi, [esp + 32 + 8] + mov esi, [esp + 32 + 12] + mov ebp, [esp + 32 + 16] + mov ecx, [esp + 32 + 20] + + convertloop : + movzx eax, byte ptr [edi] + lea edi, [edi + 1] + movzx ebx, byte ptr [esi] + lea esi, [esi + 1] + movq mm0, [_kCoefficientsAbgrY + 2048 + 8 * eax] + movzx eax, byte ptr [edx] + paddsw mm0, [_kCoefficientsAbgrY + 4096 + 8 * ebx] + movzx ebx, byte ptr [edx + 1] + movq mm1, [_kCoefficientsAbgrY + 8 * eax] + lea edx, [edx + 2] + movq mm2, [_kCoefficientsAbgrY + 8 * ebx] paddsw mm1, mm0 paddsw mm2, mm0 psraw mm1, 6 @@ -77,11 +154,11 @@ void FastConvertYUV444ToRGB32Row(const uint8* y_buf, lea edi, [edi + 1] movzx ebx, byte ptr [esi] lea esi, [esi + 1] - movq mm0, [kCoefficientsRgbU + 8 * eax] + movq mm0, [_kCoefficientsRgbY + 2048 + 8 * eax] movzx eax, byte ptr [edx] - paddsw mm0, [kCoefficientsRgbV + 8 * ebx] + paddsw mm0, [_kCoefficientsRgbY + 4096 + 8 * ebx] lea edx, [edx + 1] - paddsw mm0, [kCoefficientsRgbY + 8 * eax] + paddsw mm0, [_kCoefficientsRgbY + 8 * eax] psraw mm0, 6 packuswb mm0, mm0 movd [ebp], mm0 @@ -106,10 +183,10 @@ void FastConvertYToRGB32Row(const uint8* y_buf, convertloop : movzx ebx, byte ptr [eax] - movq mm0, [kCoefficientsRgbY + 8 * ebx] + movq mm0, [_kCoefficientsRgbY + 8 * ebx] psraw mm0, 6 movzx ebx, byte ptr [eax + 1] - movq mm1, [kCoefficientsRgbY + 8 * ebx] + movq mm1, [_kCoefficientsRgbY + 8 * ebx] psraw mm1, 6 packuswb mm0, mm1 lea eax, [eax + 2] diff --git a/source/scale.cc b/source/scale.cc index 2efcb8579..f056de50b 100644 --- a/source/scale.cc +++ b/source/scale.cc @@ -46,8 +46,12 @@ namespace libyuv { // NOT the optimized versions. Useful for debugging and // when comparing the quality of the resulting YUV planes // as produced by the optimized and non-optimized versions. -bool YuvScaler::use_reference_impl_ = false; +bool use_reference_impl_ = false; + +void SetUseReferenceImpl(bool use) { + use_reference_impl_ = use; +} /** * NEON downscalers with interpolation. @@ -2790,13 +2794,13 @@ static void ScalePlane(const uint8 *in, int32 istride, * suitable for handling the desired resolutions. * */ -bool YuvScaler::Scale(const uint8 *inY, const uint8 *inU, const uint8 *inV, - int32 istrideY, int32 istrideU, int32 istrideV, - int32 iwidth, int32 iheight, - uint8 *outY, uint8 *outU, uint8 *outV, - int32 ostrideY, int32 ostrideU, int32 ostrideV, - int32 owidth, int32 oheight, - bool interpolate) { +bool Scale(const uint8 *inY, const uint8 *inU, const uint8 *inV, + int32 istrideY, int32 istrideU, int32 istrideV, + int32 iwidth, int32 iheight, + uint8 *outY, uint8 *outU, uint8 *outV, + int32 ostrideY, int32 ostrideU, int32 ostrideV, + int32 owidth, int32 oheight, + bool interpolate) { if (!inY || !inU || !inV || iwidth <= 0 || iheight <= 0 || !outY || !outU || !outV || owidth <= 0 || oheight <= 0) { return false; @@ -2818,9 +2822,9 @@ bool YuvScaler::Scale(const uint8 *inY, const uint8 *inU, const uint8 *inV, return true; } -bool YuvScaler::Scale(const uint8 *in, int32 iwidth, int32 iheight, - uint8 *out, int32 owidth, int32 oheight, int32 ooffset, - bool interpolate) { +bool Scale(const uint8 *in, int32 iwidth, int32 iheight, + uint8 *out, int32 owidth, int32 oheight, int32 ooffset, + bool interpolate) { if (!in || iwidth <= 0 || iheight <= 0 || !out || owidth <= 0 || oheight <= 0 || ooffset < 0 || ooffset >= oheight) {