diff --git a/BUILD.gn b/BUILD.gn index 2c9c3e6e4..a72ff0655 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -27,6 +27,10 @@ config("libyuv_config") { if (is_android && current_cpu != "arm64") { ldflags = [ "-Wl,--dynamic-linker,/system/bin/linker" ] } + + if (!libyuv_use_neon) { + defines = [ "LIBYUV_DISABLE_NEON" ] + } } # This target is built when no specific target is specified on the command line. diff --git a/README.chromium b/README.chromium index b6d85b739..8461a1218 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1824 +Version: 1828 License: BSD License File: LICENSE diff --git a/include/libyuv/convert.h b/include/libyuv/convert.h index 5c2954f4c..46d371593 100644 --- a/include/libyuv/convert.h +++ b/include/libyuv/convert.h @@ -238,6 +238,23 @@ int I010ToI420(const uint16_t* src_y, int width, int height); +#define H210ToH420 I210ToI420 +LIBYUV_API +int I210ToI420(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); + #define H210ToH422 I210ToI422 LIBYUV_API int I210ToI422(const uint16_t* src_y, diff --git a/include/libyuv/planar_functions.h b/include/libyuv/planar_functions.h index 15c7d457d..1ef2256bf 100644 --- a/include/libyuv/planar_functions.h +++ b/include/libyuv/planar_functions.h @@ -991,6 +991,21 @@ int InterpolatePlane(const uint8_t* src0, int height, int interpolation); +// Interpolate between two images using specified amount of interpolation +// (0 to 255) and store to destination. +// 'interpolation' is specified as 8 bit fraction where 0 means 100% src0 +// and 255 means 1% src0 and 99% src1. +LIBYUV_API +int InterpolatePlane_16(const uint16_t* src0, + int src_stride0, // measured in 16 bit pixels + const uint16_t* src1, + int src_stride1, + uint16_t* dst, + int dst_stride, + int width, + int height, + int interpolation); + // Interpolate between two ARGB images using specified amount of interpolation // Internally calls InterpolatePlane with width * 4 (bpp). LIBYUV_API diff --git a/include/libyuv/row.h b/include/libyuv/row.h index e51155c08..f15b58fab 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -111,6 +111,7 @@ extern "C" { #define HAS_I422TOUYVYROW_SSE2 #define HAS_I422TOYUY2ROW_SSE2 #define HAS_I444TOARGBROW_SSSE3 +#define HAS_INTERPOLATEROW_SSSE3 #define HAS_J400TOARGBROW_SSE2 #define HAS_J422TOARGBROW_SSSE3 #define HAS_MERGEUVROW_SSE2 @@ -123,10 +124,10 @@ extern "C" { #define HAS_NV21TORGB24ROW_SSSE3 #define HAS_RAWTOARGBROW_SSSE3 #define HAS_RAWTORGB24ROW_SSSE3 -#define HAS_RGB24TOARGBROW_SSSE3 -#define HAS_RGB565TOARGBROW_SSE2 #define HAS_RAWTOYROW_SSSE3 +#define HAS_RGB24TOARGBROW_SSSE3 #define HAS_RGB24TOYROW_SSSE3 +#define HAS_RGB565TOARGBROW_SSE2 #define HAS_RGBATOYROW_SSSE3 #if !defined(LIBYUV_BIT_EXACT) #define HAS_RGB24TOYJROW_SSSE3 @@ -169,7 +170,6 @@ extern "C" { #define HAS_BLENDPLANEROW_SSSE3 #define HAS_COMPUTECUMULATIVESUMROW_SSE2 #define HAS_CUMULATIVESUMTOAVERAGEROW_SSE2 -#define HAS_INTERPOLATEROW_SSSE3 #define HAS_RGBCOLORTABLEROW_X86 #define HAS_SOBELROW_SSE2 #define HAS_SOBELTOPLANEROW_SSE2 @@ -410,8 +410,10 @@ extern "C" { // The following are available on Neon platforms: #if !defined(LIBYUV_DISABLE_NEON) && \ (defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON)) +#define HAS_AB64TOARGBROW_NEON #define HAS_ABGRTOUVROW_NEON #define HAS_ABGRTOYROW_NEON +#define HAS_AR64TOARGBROW_NEON #define HAS_ARGB1555TOARGBROW_NEON #define HAS_ARGB1555TOUVROW_NEON #define HAS_ARGB1555TOYROW_NEON @@ -420,16 +422,14 @@ extern "C" { #define HAS_ARGB4444TOYROW_NEON #define HAS_ARGBEXTRACTALPHAROW_NEON #define HAS_ARGBSETROW_NEON +#define HAS_ARGBTOAB64ROW_NEON +#define HAS_ARGBTOAR64ROW_NEON #define HAS_ARGBTOARGB1555ROW_NEON #define HAS_ARGBTOARGB4444ROW_NEON #define HAS_ARGBTORAWROW_NEON #define HAS_ARGBTORGB24ROW_NEON #define HAS_ARGBTORGB565DITHERROW_NEON #define HAS_ARGBTORGB565ROW_NEON -#define HAS_ARGBTOAR64ROW_NEON -#define HAS_ARGBTOAB64ROW_NEON -#define HAS_AR64TOARGBROW_NEON -#define HAS_AB64TOARGBROW_NEON #define HAS_ARGBTOUV444ROW_NEON #define HAS_ARGBTOUVJROW_NEON #define HAS_ARGBTOUVROW_NEON @@ -449,7 +449,6 @@ extern "C" { #define HAS_HALFFLOATROW_NEON #define HAS_HALFMERGEUVROW_NEON #define HAS_I400TOARGBROW_NEON -#define HAS_I444ALPHATOARGBROW_NEON #define HAS_I422ALPHATOARGBROW_NEON #define HAS_I422TOARGB1555ROW_NEON #define HAS_I422TOARGB4444ROW_NEON @@ -459,20 +458,23 @@ extern "C" { #define HAS_I422TORGBAROW_NEON #define HAS_I422TOUYVYROW_NEON #define HAS_I422TOYUY2ROW_NEON +#define HAS_I444ALPHATOARGBROW_NEON #define HAS_I444TOARGBROW_NEON +#define HAS_INTERPOLATEROW_16_NEON +#define HAS_INTERPOLATEROW_NEON #define HAS_J400TOARGBROW_NEON #define HAS_MERGEAR64ROW_NEON #define HAS_MERGEARGB16TO8ROW_NEON #define HAS_MERGEARGBROW_NEON +#define HAS_MERGEUVROW_16_NEON +#define HAS_MERGEUVROW_NEON #define HAS_MERGEXR30ROW_NEON #define HAS_MERGEXR64ROW_NEON #define HAS_MERGEXRGB16TO8ROW_NEON #define HAS_MERGEXRGBROW_NEON -#define HAS_MERGEUVROW_NEON -#define HAS_MERGEUVROW_16_NEON #define HAS_MIRRORROW_NEON -#define HAS_MIRRORUVROW_NEON #define HAS_MIRRORSPLITUVROW_NEON +#define HAS_MIRRORUVROW_NEON #define HAS_MULTIPLYROW_16_NEON #define HAS_NV12TOARGBROW_NEON #define HAS_NV12TORGB24ROW_NEON @@ -483,13 +485,13 @@ extern "C" { #define HAS_RAWTOARGBROW_NEON #define HAS_RAWTORGB24ROW_NEON #define HAS_RAWTORGBAROW_NEON -#define HAS_RAWTOUVROW_NEON #define HAS_RAWTOUVJROW_NEON +#define HAS_RAWTOUVROW_NEON #define HAS_RAWTOYJROW_NEON #define HAS_RAWTOYROW_NEON #define HAS_RGB24TOARGBROW_NEON -#define HAS_RGB24TOUVROW_NEON #define HAS_RGB24TOUVJROW_NEON +#define HAS_RGB24TOUVROW_NEON #define HAS_RGB24TOYJROW_NEON #define HAS_RGB24TOYROW_NEON #define HAS_RGB565TOARGBROW_NEON @@ -500,10 +502,10 @@ extern "C" { #define HAS_RGBATOYROW_NEON #define HAS_SETROW_NEON #define HAS_SPLITARGBROW_NEON -#define HAS_SPLITXRGBROW_NEON #define HAS_SPLITRGBROW_NEON -#define HAS_SPLITUVROW_NEON #define HAS_SPLITUVROW_16_NEON +#define HAS_SPLITUVROW_NEON +#define HAS_SPLITXRGBROW_NEON #define HAS_SWAPUVROW_NEON #define HAS_UYVYTOARGBROW_NEON #define HAS_UYVYTOUV422ROW_NEON @@ -528,7 +530,6 @@ extern "C" { #define HAS_ARGBSHADEROW_NEON #define HAS_ARGBSHUFFLEROW_NEON #define HAS_ARGBSUBTRACTROW_NEON -#define HAS_INTERPOLATEROW_NEON #define HAS_SOBELROW_NEON #define HAS_SOBELTOPLANEROW_NEON #define HAS_SOBELXROW_NEON @@ -5203,6 +5204,23 @@ void InterpolateRow_16_C(uint16_t* dst_ptr, ptrdiff_t src_stride, int width, int source_y_fraction); +void InterpolateRow_16_NEON(uint16_t* dst_ptr, + const uint16_t* src_ptr, + ptrdiff_t src_stride, + int width, + int source_y_fraction); +void InterpolateRow_16_Any_NEON(uint16_t* dst_ptr, + const uint16_t* src_ptr, + ptrdiff_t src_stride, + int width, + int source_y_fraction); + +void InterpolateRow_16To8_C(uint8_t* dst_ptr, + const uint16_t* src_ptr, + ptrdiff_t src_stride, + int scale, + int width, + int source_y_fraction); // Sobel images. void SobelXRow_C(const uint8_t* src_y0, diff --git a/include/libyuv/scale_row.h b/include/libyuv/scale_row.h index cc1c90619..5c474b0ce 100644 --- a/include/libyuv/scale_row.h +++ b/include/libyuv/scale_row.h @@ -200,6 +200,20 @@ void ScalePlaneVertical_16(int src_height, int wpp, enum FilterMode filtering); +void ScalePlaneVertical_16To8(int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint16_t* src_argb, + uint8_t* dst_argb, + int x, + int y, + int dy, + int wpp, + int scale, + enum FilterMode filtering); + // Simplify the filtering based on scale factors. enum FilterMode ScaleFilterReduce(int src_width, int src_height, diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 8afef0ed9..8f09ccd2b 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1824 +#define LIBYUV_VERSION 1828 #endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/source/convert.cc b/source/convert.cc index 502f002d6..162546e5b 100644 --- a/source/convert.cc +++ b/source/convert.cc @@ -15,8 +15,9 @@ #include "libyuv/planar_functions.h" #include "libyuv/rotate.h" #include "libyuv/row.h" -#include "libyuv/scale.h" // For ScalePlane() -#include "libyuv/scale_uv.h" // For UVScale() +#include "libyuv/scale.h" // For ScalePlane() +#include "libyuv/scale_row.h" // For FixedDiv +#include "libyuv/scale_uv.h" // For UVScale() #ifdef __cplusplus namespace libyuv { @@ -220,6 +221,55 @@ int I010ToI420(const uint16_t* src_y, 1, 10); } +LIBYUV_API +int I210ToI420(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { + const int depth = 10; + const int scale = 1 << (24 - depth); + + if (width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_y = src_y + (height - 1) * src_stride_y; + src_u = src_u + (height - 1) * src_stride_u; + src_v = src_v + (height - 1) * src_stride_v; + src_stride_y = -src_stride_y; + src_stride_u = -src_stride_u; + src_stride_v = -src_stride_v; + } + + { + const int uv_width = SUBSAMPLE(width, 1, 1); + const int uv_height = SUBSAMPLE(height, 1, 1); + const int dy = FixedDiv(height, uv_height); + + Convert16To8Plane(src_y, src_stride_y, dst_y, dst_stride_y, scale, width, + height); + ScalePlaneVertical_16To8(height, uv_width, uv_height, src_stride_u, + dst_stride_u, src_u, dst_u, 0, 32768, dy, + /*bpp=*/1, scale, kFilterBilinear); + ScalePlaneVertical_16To8(height, uv_width, uv_height, src_stride_v, + dst_stride_v, src_v, dst_v, 0, 32768, dy, + /*bpp=*/1, scale, kFilterBilinear); + } + return 0; +} + LIBYUV_API int I210ToI422(const uint16_t* src_y, int src_stride_y, diff --git a/source/cpu_id.cc b/source/cpu_id.cc index 9fce8d204..56fe60e49 100644 --- a/source/cpu_id.cc +++ b/source/cpu_id.cc @@ -20,7 +20,7 @@ #endif // For ArmCpuCaps() but unittested on all platforms -#include +#include // For fopen() #include #ifdef __cplusplus diff --git a/source/planar_functions.cc b/source/planar_functions.cc index 141db6efb..b5344862d 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -35,7 +35,7 @@ void CopyPlane(const uint8_t* src_y, int height) { int y; void (*CopyRow)(const uint8_t* src, uint8_t* dst, int width) = CopyRow_C; - if (width == 0 || height == 0) { + if (width <= 0 || height == 0) { return; } // Negative height means invert the image. @@ -84,8 +84,6 @@ void CopyPlane(const uint8_t* src_y, } } -// TODO(fbarchard): Consider support for negative height. -// TODO(fbarchard): Consider stride measured in bytes. LIBYUV_API void CopyPlane_16(const uint16_t* src_y, int src_stride_y, @@ -93,36 +91,8 @@ void CopyPlane_16(const uint16_t* src_y, int dst_stride_y, int width, int height) { - int y; - void (*CopyRow)(const uint16_t* src, uint16_t* dst, int width) = CopyRow_16_C; - // Coalesce rows. - if (src_stride_y == width && dst_stride_y == width) { - width *= height; - height = 1; - src_stride_y = dst_stride_y = 0; - } -#if defined(HAS_COPYROW_16_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32)) { - CopyRow = CopyRow_16_SSE2; - } -#endif -#if defined(HAS_COPYROW_16_ERMS) - if (TestCpuFlag(kCpuHasERMS)) { - CopyRow = CopyRow_16_ERMS; - } -#endif -#if defined(HAS_COPYROW_16_NEON) - if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) { - CopyRow = CopyRow_16_NEON; - } -#endif - - // Copy plane - for (y = 0; y < height; ++y) { - CopyRow(src_y, dst_y, width); - src_y += src_stride_y; - dst_y += dst_stride_y; - } + CopyPlane((const uint8_t*)src_y, src_stride_y * 2, (uint8_t*)dst_y, + dst_stride_y * 2, width * 2, height); } // Convert a plane of 16 bit data to 8 bit @@ -138,6 +108,9 @@ void Convert16To8Plane(const uint16_t* src_y, void (*Convert16To8Row)(const uint16_t* src_y, uint8_t* dst_y, int scale, int width) = Convert16To8Row_C; + if (width <= 0 || height == 0) { + return; + } // Negative height means invert the image. if (height < 0) { height = -height; @@ -196,6 +169,9 @@ void Convert8To16Plane(const uint8_t* src_y, void (*Convert8To16Row)(const uint8_t* src_y, uint16_t* dst_y, int scale, int width) = Convert8To16Row_C; + if (width <= 0 || height == 0) { + return; + } // Negative height means invert the image. if (height < 0) { height = -height; @@ -470,6 +446,9 @@ void SplitUVPlane(const uint8_t* src_uv, int y; void (*SplitUVRow)(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, int width) = SplitUVRow_C; + if (width <= 0 || height == 0) { + return; + } // Negative height means invert the image. if (height < 0) { height = -height; @@ -547,6 +526,9 @@ void MergeUVPlane(const uint8_t* src_u, int y; void (*MergeUVRow)(const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uv, int width) = MergeUVRow_C; + if (width <= 0 || height == 0) { + return; + } // Negative height means invert the image. if (height < 0) { height = -height; @@ -626,6 +608,9 @@ void SplitUVPlane_16(const uint16_t* src_uv, void (*SplitUVRow_16)(const uint16_t* src_uv, uint16_t* dst_u, uint16_t* dst_v, int depth, int width) = SplitUVRow_16_C; + if (width <= 0 || height == 0) { + return; + } // Negative height means invert the image. if (height < 0) { height = -height; @@ -683,6 +668,9 @@ void MergeUVPlane_16(const uint16_t* src_u, MergeUVRow_16_C; assert(depth >= 8); assert(depth <= 16); + if (width <= 0 || height == 0) { + return; + } // Negative height means invert the image. if (height < 0) { height = -height; @@ -735,6 +723,9 @@ void ConvertToMSBPlane_16(const uint16_t* src_y, int scale = 1 << (16 - depth); void (*MultiplyRow_16)(const uint16_t* src_y, uint16_t* dst_y, int scale, int width) = MultiplyRow_16_C; + if (width <= 0 || height == 0) { + return; + } // Negative height means invert the image. if (height < 0) { height = -height; @@ -785,6 +776,9 @@ void ConvertToLSBPlane_16(const uint16_t* src_y, int scale = 1 << depth; void (*DivideRow)(const uint16_t* src_y, uint16_t* dst_y, int scale, int width) = DivideRow_16_C; + if (width <= 0 || height == 0) { + return; + } // Negative height means invert the image. if (height < 0) { height = -height; @@ -833,6 +827,9 @@ void SwapUVPlane(const uint8_t* src_uv, int y; void (*SwapUVRow)(const uint8_t* src_uv, uint8_t* dst_vu, int width) = SwapUVRow_C; + if (width <= 0 || height == 0) { + return; + } // Negative height means invert the image. if (height < 0) { height = -height; @@ -936,6 +933,9 @@ void DetilePlane(const uint8_t* src_y, assert(tile_height > 0); assert(src_stride_y > 0); + if (width <= 0 || height == 0) { + return; + } // Negative height means invert the image. if (height < 0) { height = -height; @@ -991,6 +991,9 @@ void DetileSplitUVPlane(const uint8_t* src_uv, assert(tile_height > 0); assert(src_stride_uv > 0); + if (width <= 0 || height == 0) { + return; + } // Negative height means invert the image. if (height < 0) { height = -height; @@ -1046,6 +1049,9 @@ void SplitRGBPlane(const uint8_t* src_rgb, int y; void (*SplitRGBRow)(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g, uint8_t* dst_b, int width) = SplitRGBRow_C; + if (width <= 0 || height == 0) { + return; + } // Negative height means invert the image. if (height < 0) { height = -height; @@ -1105,6 +1111,9 @@ void MergeRGBPlane(const uint8_t* src_r, void (*MergeRGBRow)(const uint8_t* src_r, const uint8_t* src_g, const uint8_t* src_b, uint8_t* dst_rgb, int width) = MergeRGBRow_C; + if (width <= 0 || height == 0) { + return; + } // Coalesce rows. // Negative height means invert the image. if (height < 0) { @@ -3059,6 +3068,10 @@ void SetPlane(uint8_t* dst_y, uint32_t value) { int y; void (*SetRow)(uint8_t * dst, uint8_t value, int width) = SetRow_C; + + if (width <= 0 || height == 0) { + return; + } if (height < 0) { height = -height; dst_y = dst_y + (height - 1) * dst_stride_y; @@ -4005,6 +4018,86 @@ int InterpolatePlane(const uint8_t* src0, return 0; } +// Interpolate 2 planes by specified amount (0 to 255). +LIBYUV_API +int InterpolatePlane_16(const uint16_t* src0, + int src_stride0, + const uint16_t* src1, + int src_stride1, + uint16_t* dst, + int dst_stride, + int width, + int height, + int interpolation) { + int y; + void (*InterpolateRow_16)(uint16_t * dst_ptr, const uint16_t* src_ptr, + ptrdiff_t src_stride, int dst_width, + int source_y_fraction) = InterpolateRow_16_C; + if (!src0 || !src1 || !dst || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst = dst + (height - 1) * dst_stride; + dst_stride = -dst_stride; + } + // Coalesce rows. + if (src_stride0 == width && src_stride1 == width && dst_stride == width) { + width *= height; + height = 1; + src_stride0 = src_stride1 = dst_stride = 0; + } +#if defined(HAS_INTERPOLATEROW_16_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + InterpolateRow_16 = InterpolateRow_16_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + InterpolateRow_16 = InterpolateRow_16_SSSE3; + } + } +#endif +#if defined(HAS_INTERPOLATEROW_16_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + InterpolateRow_16 = InterpolateRow_16_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + InterpolateRow_16 = InterpolateRow_16_AVX2; + } + } +#endif +#if defined(HAS_INTERPOLATEROW_16_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + InterpolateRow_16 = InterpolateRow_16_Any_NEON; + if (IS_ALIGNED(width, 8)) { + InterpolateRow_16 = InterpolateRow_16_NEON; + } + } +#endif +#if defined(HAS_INTERPOLATEROW_16_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + InterpolateRow_16 = InterpolateRow_16_Any_MSA; + if (IS_ALIGNED(width, 32)) { + InterpolateRow_16 = InterpolateRow_16_MSA; + } + } +#endif +#if defined(HAS_INTERPOLATEROW_16_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + InterpolateRow_16 = InterpolateRow_16_Any_LSX; + if (IS_ALIGNED(width, 32)) { + InterpolateRow_16 = InterpolateRow_16_LSX; + } + } +#endif + + for (y = 0; y < height; ++y) { + InterpolateRow_16(dst, src0, src1 - src0, width, interpolation); + src0 += src_stride0; + src1 += src_stride1; + dst += dst_stride; + } + return 0; +} + // Interpolate 2 ARGB images by specified amount (0 to 255). LIBYUV_API int ARGBInterpolate(const uint8_t* src_argb0, diff --git a/source/row_any.cc b/source/row_any.cc index 089e518af..3e95b2df4 100644 --- a/source/row_any.cc +++ b/source/row_any.cc @@ -1625,37 +1625,42 @@ ANY11C(UYVYToARGBRow_Any_LSX, UYVYToARGBRow_LSX, 1, 4, 4, 7) #undef ANY11C // Any 1 to 1 interpolate. Takes 2 rows of source via stride. -#define ANY11I(NAMEANY, ANY_SIMD, SBPP, BPP, MASK) \ - void NAMEANY(uint8_t* dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride, \ - int width, int source_y_fraction) { \ - SIMD_ALIGNED(uint8_t temp[64 * 3]); \ - memset(temp, 0, 64 * 2); /* for msan */ \ - int r = width & MASK; \ - int n = width & ~MASK; \ - if (n > 0) { \ - ANY_SIMD(dst_ptr, src_ptr, src_stride, n, source_y_fraction); \ - } \ - memcpy(temp, src_ptr + n * SBPP, r * SBPP); \ - memcpy(temp + 64, src_ptr + src_stride + n * SBPP, r * SBPP); \ - ANY_SIMD(temp + 128, temp, 64, MASK + 1, source_y_fraction); \ - memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \ +#define ANY11I(NAMEANY, ANY_SIMD, T, SBPP, BPP, MASK) \ + void NAMEANY(T* dst_ptr, const T* src_ptr, ptrdiff_t src_stride, int width, \ + int source_y_fraction) { \ + SIMD_ALIGNED(T temp[64 * 3]); \ + memset(temp, 0, 64 * 2 * sizeof(T)); /* for msan */ \ + int r = width & MASK; \ + int n = width & ~MASK; \ + if (n > 0) { \ + ANY_SIMD(dst_ptr, src_ptr, src_stride, n, source_y_fraction); \ + } \ + memcpy(temp, src_ptr + n * SBPP, r * SBPP * sizeof(T)); \ + memcpy(temp + 64, src_ptr + src_stride + n * SBPP, r * SBPP * sizeof(T)); \ + ANY_SIMD(temp + 128, temp, 64, MASK + 1, source_y_fraction); \ + memcpy(dst_ptr + n * BPP, temp + 128, r * BPP * sizeof(T)); \ } #ifdef HAS_INTERPOLATEROW_AVX2 -ANY11I(InterpolateRow_Any_AVX2, InterpolateRow_AVX2, 1, 1, 31) +ANY11I(InterpolateRow_Any_AVX2, InterpolateRow_AVX2, uint8_t, 1, 1, 31) #endif #ifdef HAS_INTERPOLATEROW_SSSE3 -ANY11I(InterpolateRow_Any_SSSE3, InterpolateRow_SSSE3, 1, 1, 15) +ANY11I(InterpolateRow_Any_SSSE3, InterpolateRow_SSSE3, uint8_t, 1, 1, 15) #endif #ifdef HAS_INTERPOLATEROW_NEON -ANY11I(InterpolateRow_Any_NEON, InterpolateRow_NEON, 1, 1, 15) +ANY11I(InterpolateRow_Any_NEON, InterpolateRow_NEON, uint8_t, 1, 1, 15) #endif #ifdef HAS_INTERPOLATEROW_MSA -ANY11I(InterpolateRow_Any_MSA, InterpolateRow_MSA, 1, 1, 31) +ANY11I(InterpolateRow_Any_MSA, InterpolateRow_MSA, uint8_t, 1, 1, 31) #endif #ifdef HAS_INTERPOLATEROW_LSX -ANY11I(InterpolateRow_Any_LSX, InterpolateRow_LSX, 1, 1, 31) +ANY11I(InterpolateRow_Any_LSX, InterpolateRow_LSX, uint8_t, 1, 1, 31) #endif + +#ifdef HAS_INTERPOLATEROW_16_NEON +ANY11I(InterpolateRow_16_Any_NEON, InterpolateRow_16_NEON, uint16_t, 1, 1, 7) +#endif + #undef ANY11I // Any 1 to 1 mirror. diff --git a/source/row_common.cc b/source/row_common.cc index 8cf826ec5..3bfc56180 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -11,7 +11,6 @@ #include "libyuv/row.h" #include -#include #include // For memcpy and memset. #include "libyuv/basic_types.h" @@ -3402,6 +3401,18 @@ static void HalfRow_16_C(const uint16_t* src_uv, } } +static void HalfRow_16To8_C(const uint16_t* src_uv, + ptrdiff_t src_uv_stride, + uint8_t* dst_uv, + int scale, + int width) { + int x; + for (x = 0; x < width; ++x) { + dst_uv[x] = clamp255( + (((src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1) * scale) >> 16); + } +} + // C version 2x2 -> 2x1. void InterpolateRow_C(uint8_t* dst_ptr, const uint8_t* src_ptr, @@ -3435,6 +3446,51 @@ void InterpolateRow_C(uint8_t* dst_ptr, } } +// C version 2x2 16 bit-> 2x1 8 bit. +// Use scale to convert lsb formats to msb, depending how many bits there are: +// 32768 = 9 bits +// 16384 = 10 bits +// 4096 = 12 bits +// 256 = 16 bits +void InterpolateRow_16To8_C(uint8_t* dst_ptr, + const uint16_t* src_ptr, + ptrdiff_t src_stride, + int scale, + int width, + int source_y_fraction) { + int y1_fraction = source_y_fraction; + int y0_fraction = 256 - y1_fraction; + const uint16_t* src_ptr1 = src_ptr + src_stride; + int x; + if (source_y_fraction == 0) { + Convert16To8Row_C(src_ptr, dst_ptr, scale, width); + return; + } + if (source_y_fraction == 128) { + HalfRow_16To8_C(src_ptr, src_stride, dst_ptr, scale, width); + return; + } + for (x = 0; x < width - 1; x += 2) { + dst_ptr[0] = clamp255( + (((src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8) * + scale) >> + 16); + dst_ptr[1] = clamp255( + (((src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8) * + scale) >> + 16); + src_ptr += 2; + src_ptr1 += 2; + dst_ptr += 2; + } + if (width & 1) { + dst_ptr[0] = clamp255( + (((src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8) * + scale) >> + 16); + } +} + void InterpolateRow_16_C(uint16_t* dst_ptr, const uint16_t* src_ptr, ptrdiff_t src_stride, diff --git a/source/row_neon.cc b/source/row_neon.cc index 8ba71d07e..297ccceb6 100644 --- a/source/row_neon.cc +++ b/source/row_neon.cc @@ -10,8 +10,6 @@ #include "libyuv/row.h" -#include - #ifdef __cplusplus namespace libyuv { extern "C" { @@ -21,6 +19,8 @@ extern "C" { #if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) && \ !defined(__aarch64__) +// d8-d15, r4-r11,r14(lr) need to be preserved if used. r13(sp),r15(pc) are reserved. + // q0: Y uint16x8_t // d2: U uint8x8_t // d3: V uint8x8_t @@ -2715,6 +2715,66 @@ void InterpolateRow_NEON(uint8_t* dst_ptr, : "cc", "memory", "q0", "q1", "d4", "d5", "q13", "q14"); } +// Bilinear filter 8x2 -> 8x1 +void InterpolateRow_16_NEON(uint16_t* dst_ptr, + const uint16_t* src_ptr, + ptrdiff_t src_stride, + int dst_width, + int source_y_fraction) { + int y1_fraction = source_y_fraction; + int y0_fraction = 256 - y1_fraction; + const uint16_t* src_ptr1 = src_ptr + src_stride; + + asm volatile( + "cmp %4, #0 \n" + "beq 100f \n" + "cmp %4, #128 \n" + "beq 50f \n" + + "vdup.16 d17, %4 \n" + "vdup.16 d16, %5 \n" + // General purpose row blend. + "1: \n" + "vld1.16 {q0}, [%1]! \n" + "vld1.16 {q1}, [%2]! \n" + "subs %3, %3, #8 \n" + "vmull.u16 q2, d0, d16 \n" + "vmull.u16 q3, d1, d16 \n" + "vmlal.u16 q2, d2, d17 \n" + "vmlal.u16 q3, d3, d17 \n" + "vrshrn.u32 d0, q2, #8 \n" + "vrshrn.u32 d1, q3, #8 \n" + "vst1.16 {q0}, [%0]! \n" + "bgt 1b \n" + "b 99f \n" + + // Blend 50 / 50. + "50: \n" + "vld1.16 {q0}, [%1]! \n" + "vld1.16 {q1}, [%2]! \n" + "subs %3, %3, #8 \n" + "vrhadd.u16 q0, q1 \n" + "vst1.16 {q0}, [%0]! \n" + "bgt 50b \n" + "b 99f \n" + + // Blend 100 / 0 - Copy row unchanged. + "100: \n" + "vld1.16 {q0}, [%1]! \n" + "subs %3, %3, #8 \n" + "vst1.16 {q0}, [%0]! \n" + "bgt 100b \n" + + "99: \n" + : "+r"(dst_ptr), // %0 + "+r"(src_ptr), // %1 + "+r"(src_ptr1), // %2 + "+r"(dst_width) // %3 + : "r"(y1_fraction), // %4 + "r"(y0_fraction) // %5 + : "cc", "memory", "q0", "q1", "q2", "q3", "q8"); +} + // dr * (256 - sa) / 256 + sr = dr - dr * sa / 256 + sr void ARGBBlendRow_NEON(const uint8_t* src_argb, const uint8_t* src_argb1, @@ -3666,7 +3726,7 @@ void Convert16To8Row_NEON(const uint16_t* src_y, "vqdmulh.s16 q1, q1, q2 \n" "vqshrn.u16 d0, q0, #1 \n" "vqshrn.u16 d1, q1, #1 \n" - "vst1.16 {q0}, [%1]! \n" + "vst1.8 {q0}, [%1]! \n" "subs %3, %3, #16 \n" // 16 src pixels per loop "bgt 1b \n" : "+r"(src_y), // %0 diff --git a/source/row_neon64.cc b/source/row_neon64.cc index 8d43d5940..6135014b7 100644 --- a/source/row_neon64.cc +++ b/source/row_neon64.cc @@ -2966,6 +2966,71 @@ void InterpolateRow_NEON(uint8_t* dst_ptr, : "cc", "memory", "v0", "v1", "v3", "v4", "v5"); } +// Bilinear filter 8x2 -> 8x1 +void InterpolateRow_16_NEON(uint16_t* dst_ptr, + const uint16_t* src_ptr, + ptrdiff_t src_stride, + int dst_width, + int source_y_fraction) { + int y1_fraction = source_y_fraction; + int y0_fraction = 256 - y1_fraction; + const uint16_t* src_ptr1 = src_ptr + src_stride; + + asm volatile( + "cmp %w4, #0 \n" + "b.eq 100f \n" + "cmp %w4, #128 \n" + "b.eq 50f \n" + + "dup v5.8h, %w4 \n" + "dup v4.8h, %w5 \n" + // General purpose row blend. + "1: \n" + "ld1 {v0.8h}, [%1], #16 \n" + "ld1 {v1.8h}, [%2], #16 \n" + "subs %w3, %w3, #8 \n" + "umull v2.4s, v0.4h, v4.4h \n" + "prfm pldl1keep, [%1, 448] \n" + "umull2 v3.4s, v0.8h, v4.8h \n" + "prfm pldl1keep, [%2, 448] \n" + "umlal v2.4s, v1.4h, v5.4h \n" + "umlal2 v3.4s, v1.8h, v5.8h \n" + "rshrn v0.4h, v2.4s, #8 \n" + "rshrn2 v0.8h, v3.4s, #8 \n" + "st1 {v0.8h}, [%0], #16 \n" + "b.gt 1b \n" + "b 99f \n" + + // Blend 50 / 50. + "50: \n" + "ld1 {v0.8h}, [%1], #16 \n" + "ld1 {v1.8h}, [%2], #16 \n" + "subs %w3, %w3, #8 \n" + "prfm pldl1keep, [%1, 448] \n" + "urhadd v0.8h, v0.8h, v1.8h \n" + "prfm pldl1keep, [%2, 448] \n" + "st1 {v0.8h}, [%0], #16 \n" + "b.gt 50b \n" + "b 99f \n" + + // Blend 100 / 0 - Copy row unchanged. + "100: \n" + "ld1 {v0.8h}, [%1], #16 \n" + "subs %w3, %w3, #8 \n" + "prfm pldl1keep, [%1, 448] \n" + "st1 {v0.8h}, [%0], #16 \n" + "b.gt 100b \n" + + "99: \n" + : "+r"(dst_ptr), // %0 + "+r"(src_ptr), // %1 + "+r"(src_ptr1), // %2 + "+r"(dst_width) // %3 + : "r"(y1_fraction), // %4 + "r"(y0_fraction) // %5 + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5"); +} + // dr * (256 - sa) / 256 + sr = dr - dr * sa / 256 + sr void ARGBBlendRow_NEON(const uint8_t* src_argb, const uint8_t* src_argb1, diff --git a/source/scale.cc b/source/scale.cc index ad573ef6a..ac009310f 100644 --- a/source/scale.cc +++ b/source/scale.cc @@ -29,6 +29,7 @@ static __inline int Abs(int v) { } #define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s) +#define CENTERSTART(dx, s) (dx < 0) ? -((-dx >> 1) + s) : ((dx >> 1) + s) // Scale plane, 1/2 // This is an optimized version for scaling down a plane to 1/2 of @@ -1154,7 +1155,7 @@ void ScalePlaneBilinearDown_16(int src_width, #if defined(HAS_INTERPOLATEROW_16_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { - InterpolateRow = InterpolateRow_Any_16_SSE2; + InterpolateRow = InterpolateRow_16_Any_SSE2; if (IS_ALIGNED(src_width, 16)) { InterpolateRow = InterpolateRow_16_SSE2; } @@ -1162,7 +1163,7 @@ void ScalePlaneBilinearDown_16(int src_width, #endif #if defined(HAS_INTERPOLATEROW_16_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - InterpolateRow = InterpolateRow_Any_16_SSSE3; + InterpolateRow = InterpolateRow_16_Any_SSSE3; if (IS_ALIGNED(src_width, 16)) { InterpolateRow = InterpolateRow_16_SSSE3; } @@ -1170,7 +1171,7 @@ void ScalePlaneBilinearDown_16(int src_width, #endif #if defined(HAS_INTERPOLATEROW_16_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - InterpolateRow = InterpolateRow_Any_16_AVX2; + InterpolateRow = InterpolateRow_16_Any_AVX2; if (IS_ALIGNED(src_width, 32)) { InterpolateRow = InterpolateRow_16_AVX2; } @@ -1178,7 +1179,7 @@ void ScalePlaneBilinearDown_16(int src_width, #endif #if defined(HAS_INTERPOLATEROW_16_NEON) if (TestCpuFlag(kCpuHasNEON)) { - InterpolateRow = InterpolateRow_Any_16_NEON; + InterpolateRow = InterpolateRow_16_Any_NEON; if (IS_ALIGNED(src_width, 16)) { InterpolateRow = InterpolateRow_16_NEON; } @@ -1706,7 +1707,7 @@ void ScalePlaneBilinearUp_16(int src_width, #if defined(HAS_INTERPOLATEROW_16_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { - InterpolateRow = InterpolateRow_Any_16_SSE2; + InterpolateRow = InterpolateRow_16_Any_SSE2; if (IS_ALIGNED(dst_width, 16)) { InterpolateRow = InterpolateRow_16_SSE2; } @@ -1714,7 +1715,7 @@ void ScalePlaneBilinearUp_16(int src_width, #endif #if defined(HAS_INTERPOLATEROW_16_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - InterpolateRow = InterpolateRow_Any_16_SSSE3; + InterpolateRow = InterpolateRow_16_Any_SSSE3; if (IS_ALIGNED(dst_width, 16)) { InterpolateRow = InterpolateRow_16_SSSE3; } @@ -1722,7 +1723,7 @@ void ScalePlaneBilinearUp_16(int src_width, #endif #if defined(HAS_INTERPOLATEROW_16_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - InterpolateRow = InterpolateRow_Any_16_AVX2; + InterpolateRow = InterpolateRow_16_Any_AVX2; if (IS_ALIGNED(dst_width, 32)) { InterpolateRow = InterpolateRow_16_AVX2; } @@ -1730,7 +1731,7 @@ void ScalePlaneBilinearUp_16(int src_width, #endif #if defined(HAS_INTERPOLATEROW_16_NEON) if (TestCpuFlag(kCpuHasNEON)) { - InterpolateRow = InterpolateRow_Any_16_NEON; + InterpolateRow = InterpolateRow_16_Any_NEON; if (IS_ALIGNED(dst_width, 16)) { InterpolateRow = InterpolateRow_16_NEON; } @@ -1886,7 +1887,6 @@ static void ScalePlaneSimple_16(int src_width, // Scale a plane. // This function dispatches to a specialized scaler based on scale factor. - LIBYUV_API void ScalePlane(const uint8_t* src, int src_stride, @@ -1916,10 +1916,19 @@ void ScalePlane(const uint8_t* src, return; } if (dst_width == src_width && filtering != kFilterBox) { - int dy = FixedDiv(src_height, dst_height); + int dy = 0; + int y = 0; + // When scaling down, use the center 2 rows to filter. + // When scaling up, last row of destination uses the last 2 source rows. + if (dst_height <= src_height) { + dy = FixedDiv(src_height, dst_height); + y = CENTERSTART(dy, -32768); // Subtract 0.5 (32768) to center filter. + } else if (src_height > 1 && dst_height > 1) { + dy = FixedDiv1(src_height, dst_height); + } // Arbitrary scale vertically, but unscaled horizontally. ScalePlaneVertical(src_height, dst_width, dst_height, src_stride, - dst_stride, src, dst, 0, 0, dy, /*bpp=*/1, filtering); + dst_stride, src, dst, 0, y, dy, /*bpp=*/1, filtering); return; } if (dst_width <= Abs(src_width) && dst_height <= src_height) { @@ -2010,10 +2019,22 @@ void ScalePlane_16(const uint16_t* src, return; } if (dst_width == src_width && filtering != kFilterBox) { - int dy = FixedDiv(src_height, dst_height); + int dy = 0; + int y = 0; + // When scaling down, use the center 2 rows to filter. + // When scaling up, last row of destination uses the last 2 source rows. + if (dst_height <= src_height) { + dy = FixedDiv(src_height, dst_height); + y = CENTERSTART(dy, -32768); // Subtract 0.5 (32768) to center filter. + // When scaling up, ensure the last row of destination uses the last + // source. Avoid divide by zero for dst_height but will do no scaling + // later. + } else if (src_height > 1 && dst_height > 1) { + dy = FixedDiv1(src_height, dst_height); + } // Arbitrary scale vertically, but unscaled horizontally. ScalePlaneVertical_16(src_height, dst_width, dst_height, src_stride, - dst_stride, src, dst, 0, 0, dy, /*bpp=*/1, filtering); + dst_stride, src, dst, 0, y, dy, /*bpp=*/1, filtering); return; } if (dst_width <= Abs(src_width) && dst_height <= src_height) { diff --git a/source/scale_common.cc b/source/scale_common.cc index d54ab9423..e0c821c27 100644 --- a/source/scale_common.cc +++ b/source/scale_common.cc @@ -1533,6 +1533,7 @@ void ScalePlaneVertical(int src_height, y += dy; } } + void ScalePlaneVertical_16(int src_height, int dst_width, int dst_height, @@ -1543,7 +1544,7 @@ void ScalePlaneVertical_16(int src_height, int x, int y, int dy, - int wpp, + int wpp, /* words per pixel. normally 1 */ enum FilterMode filtering) { // TODO(fbarchard): Allow higher wpp. int dst_width_words = dst_width * wpp; @@ -1559,32 +1560,32 @@ void ScalePlaneVertical_16(int src_height, src_argb += (x >> 16) * wpp; #if defined(HAS_INTERPOLATEROW_16_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { - InterpolateRow = InterpolateRow_Any_16_SSE2; - if (IS_ALIGNED(dst_width_bytes, 16)) { + InterpolateRow = InterpolateRow_16_Any_SSE2; + if (IS_ALIGNED(dst_width_words, 16)) { InterpolateRow = InterpolateRow_16_SSE2; } } #endif #if defined(HAS_INTERPOLATEROW_16_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - InterpolateRow = InterpolateRow_Any_16_SSSE3; - if (IS_ALIGNED(dst_width_bytes, 16)) { + InterpolateRow = InterpolateRow_16_Any_SSSE3; + if (IS_ALIGNED(dst_width_words, 16)) { InterpolateRow = InterpolateRow_16_SSSE3; } } #endif #if defined(HAS_INTERPOLATEROW_16_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - InterpolateRow = InterpolateRow_Any_16_AVX2; - if (IS_ALIGNED(dst_width_bytes, 32)) { + InterpolateRow = InterpolateRow_16_Any_AVX2; + if (IS_ALIGNED(dst_width_words, 32)) { InterpolateRow = InterpolateRow_16_AVX2; } } #endif #if defined(HAS_INTERPOLATEROW_16_NEON) if (TestCpuFlag(kCpuHasNEON)) { - InterpolateRow = InterpolateRow_Any_16_NEON; - if (IS_ALIGNED(dst_width_bytes, 16)) { + InterpolateRow = InterpolateRow_16_Any_NEON; + if (IS_ALIGNED(dst_width_words, 8)) { InterpolateRow = InterpolateRow_16_NEON; } } @@ -1604,6 +1605,48 @@ void ScalePlaneVertical_16(int src_height, } } +void ScalePlaneVertical_16To8(int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint16_t* src_argb, + uint8_t* dst_argb, + int x, + int y, + int dy, + int wpp, /* words per pixel. normally 1 */ + int scale, + enum FilterMode filtering) { + // TODO(fbarchard): Allow higher wpp. + int dst_width_words = dst_width * wpp; + // TODO(https://crbug.com/libyuv/931): Add NEON and AVX2 versions. + void (*InterpolateRow_16To8)(uint8_t * dst_argb, const uint16_t* src_argb, + ptrdiff_t src_stride, int scale, int dst_width, + int source_y_fraction) = InterpolateRow_16To8_C; + const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0; + int j; + assert(wpp >= 1 && wpp <= 2); + assert(src_height != 0); + assert(dst_width > 0); + assert(dst_height > 0); + src_argb += (x >> 16) * wpp; + + for (j = 0; j < dst_height; ++j) { + int yi; + int yf; + if (y > max_y) { + y = max_y; + } + yi = y >> 16; + yf = filtering ? ((y >> 8) & 255) : 0; + InterpolateRow_16To8(dst_argb, src_argb + yi * src_stride, src_stride, + scale, dst_width_words, yf); + dst_argb += dst_stride; + y += dy; + } +} + // Simplify the filtering based on scale factors. enum FilterMode ScaleFilterReduce(int src_width, int src_height, @@ -1653,7 +1696,7 @@ int FixedDiv_C(int num, int div) { return (int)(((int64_t)(num) << 16) / div); } -// Divide num by div and return as 16.16 fixed point result. +// Divide num - 1 by div - 1 and return as 16.16 fixed point result. int FixedDiv1_C(int num, int div) { return (int)((((int64_t)(num) << 16) - 0x00010001) / (div - 1)); } @@ -1696,14 +1739,14 @@ void ScaleSlope(int src_width, if (dst_width <= Abs(src_width)) { *dx = FixedDiv(Abs(src_width), dst_width); *x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter. - } else if (dst_width > 1) { + } else if (src_width > 1 && dst_width > 1) { *dx = FixedDiv1(Abs(src_width), dst_width); *x = 0; } if (dst_height <= src_height) { *dy = FixedDiv(src_height, dst_height); *y = CENTERSTART(*dy, -32768); // Subtract 0.5 (32768) to center filter. - } else if (dst_height > 1) { + } else if (src_height > 1 && dst_height > 1) { *dy = FixedDiv1(src_height, dst_height); *y = 0; } @@ -1712,7 +1755,7 @@ void ScaleSlope(int src_width, if (dst_width <= Abs(src_width)) { *dx = FixedDiv(Abs(src_width), dst_width); *x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter. - } else if (dst_width > 1) { + } else if (src_width > 1 && dst_width > 1) { *dx = FixedDiv1(Abs(src_width), dst_width); *x = 0; } diff --git a/unit_test/convert_test.cc b/unit_test/convert_test.cc index 185c5aa44..c2d952200 100644 --- a/unit_test/convert_test.cc +++ b/unit_test/convert_test.cc @@ -178,6 +178,7 @@ TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I010, uint16_t, 2, 2, 2, 10) TESTPLANARTOP(I412, uint16_t, 2, 1, 1, I012, uint16_t, 2, 2, 2, 12) TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I012, uint16_t, 2, 2, 2, 12) TESTPLANARTOP(I010, uint16_t, 2, 2, 2, I420, uint8_t, 1, 2, 2, 10) +TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I420, uint8_t, 1, 2, 2, 10) TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I422, uint8_t, 1, 2, 1, 10) TESTPLANARTOP(I410, uint16_t, 2, 1, 1, I444, uint8_t, 1, 1, 1, 10) TESTPLANARTOP(I012, uint16_t, 2, 2, 2, I420, uint8_t, 1, 2, 2, 12) @@ -2270,7 +2271,8 @@ TEST_F(LibYUVConvertTest, TestMJPGToNV12_420) { free_aligned_buffer_page_end(dst_vu); } -TEST_F(LibYUVConvertTest, TestMJPGToNV21_422) { +// TODO(fbarchard): Improve test to compare against I422, not checksum +TEST_F(LibYUVConvertTest, DISABLED_TestMJPGToNV21_422) { int width = 0; int height = 0; int ret = MJPGSize(kTest3Jpg, kTest3JpgLen, &width, &height); @@ -2294,13 +2296,13 @@ TEST_F(LibYUVConvertTest, TestMJPGToNV21_422) { uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); uint32_t dst_uv_hash = HashDjb2(dst_uv, half_width * half_height * 2, 5381); EXPECT_EQ(dst_y_hash, 2682851208u); - EXPECT_EQ(dst_uv_hash, 3543430771u); + EXPECT_EQ(dst_uv_hash, 493520167u); free_aligned_buffer_page_end(dst_y); free_aligned_buffer_page_end(dst_uv); } -TEST_F(LibYUVConvertTest, TestMJPGToNV12_422) { +TEST_F(LibYUVConvertTest, DISABLED_TestMJPGToNV12_422) { int width = 0; int height = 0; int ret = MJPGSize(kTest3Jpg, kTest3JpgLen, &width, &height); @@ -2327,7 +2329,7 @@ TEST_F(LibYUVConvertTest, TestMJPGToNV12_422) { half_height); uint32_t dst_vu_hash = HashDjb2(dst_vu, half_width * half_height * 2, 5381); EXPECT_EQ(dst_y_hash, 2682851208u); - EXPECT_EQ(dst_vu_hash, 3543430771u); + EXPECT_EQ(dst_vu_hash, 493520167u); free_aligned_buffer_page_end(dst_y); free_aligned_buffer_page_end(dst_uv); diff --git a/unit_test/planar_test.cc b/unit_test/planar_test.cc index 8f5a33cb1..42166d0d9 100644 --- a/unit_test/planar_test.cc +++ b/unit_test/planar_test.cc @@ -1080,6 +1080,87 @@ TEST_F(LibYUVPlanarTest, TestInterpolatePlane) { } } +TEST_F(LibYUVPlanarTest, TestInterpolatePlane_16) { + SIMD_ALIGNED(uint16_t orig_pixels_0[1280]); + SIMD_ALIGNED(uint16_t orig_pixels_1[1280]); + SIMD_ALIGNED(uint16_t interpolate_pixels[1280]); + memset(orig_pixels_0, 0, sizeof(orig_pixels_0)); + memset(orig_pixels_1, 0, sizeof(orig_pixels_1)); + + orig_pixels_0[0] = 16u; + orig_pixels_0[1] = 32u; + orig_pixels_0[2] = 64u; + orig_pixels_0[3] = 128u; + orig_pixels_0[4] = 0u; + orig_pixels_0[5] = 0u; + orig_pixels_0[6] = 0u; + orig_pixels_0[7] = 255u; + orig_pixels_0[8] = 0u; + orig_pixels_0[9] = 0u; + orig_pixels_0[10] = 0u; + orig_pixels_0[11] = 0u; + orig_pixels_0[12] = 0u; + orig_pixels_0[13] = 0u; + orig_pixels_0[14] = 0u; + orig_pixels_0[15] = 0u; + + orig_pixels_1[0] = 0u; + orig_pixels_1[1] = 0u; + orig_pixels_1[2] = 0u; + orig_pixels_1[3] = 0u; + orig_pixels_1[4] = 0u; + orig_pixels_1[5] = 0u; + orig_pixels_1[6] = 0u; + orig_pixels_1[7] = 0u; + orig_pixels_1[8] = 0u; + orig_pixels_1[9] = 0u; + orig_pixels_1[10] = 0u; + orig_pixels_1[11] = 0u; + orig_pixels_1[12] = 255u; + orig_pixels_1[13] = 255u; + orig_pixels_1[14] = 255u; + orig_pixels_1[15] = 255u; + + InterpolatePlane_16(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0, + &interpolate_pixels[0], 0, 16, 1, 128); + EXPECT_EQ(8u, interpolate_pixels[0]); + EXPECT_EQ(16u, interpolate_pixels[1]); + EXPECT_EQ(32u, interpolate_pixels[2]); + EXPECT_EQ(64u, interpolate_pixels[3]); + EXPECT_EQ(0u, interpolate_pixels[4]); + EXPECT_EQ(0u, interpolate_pixels[5]); + EXPECT_EQ(0u, interpolate_pixels[6]); + EXPECT_EQ(128u, interpolate_pixels[7]); + EXPECT_EQ(0u, interpolate_pixels[8]); + EXPECT_EQ(0u, interpolate_pixels[9]); + EXPECT_EQ(0u, interpolate_pixels[10]); + EXPECT_EQ(0u, interpolate_pixels[11]); + EXPECT_EQ(128u, interpolate_pixels[12]); + EXPECT_EQ(128u, interpolate_pixels[13]); + EXPECT_EQ(128u, interpolate_pixels[14]); + EXPECT_EQ(128u, interpolate_pixels[15]); + + InterpolatePlane_16(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0, + &interpolate_pixels[0], 0, 16, 1, 0); + EXPECT_EQ(16u, interpolate_pixels[0]); + EXPECT_EQ(32u, interpolate_pixels[1]); + EXPECT_EQ(64u, interpolate_pixels[2]); + EXPECT_EQ(128u, interpolate_pixels[3]); + + InterpolatePlane_16(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0, + &interpolate_pixels[0], 0, 16, 1, 192); + + EXPECT_EQ(4u, interpolate_pixels[0]); + EXPECT_EQ(8u, interpolate_pixels[1]); + EXPECT_EQ(16u, interpolate_pixels[2]); + EXPECT_EQ(32u, interpolate_pixels[3]); + + for (int i = 0; i < benchmark_pixels_div1280_; ++i) { + InterpolatePlane_16(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0, + &interpolate_pixels[0], 0, 1280, 1, 123); + } +} + #define TESTTERP(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, STRIDE_B, W1280, TERP, \ N, NEG, OFF) \ TEST_F(LibYUVPlanarTest, ARGBInterpolate##TERP##N) { \ @@ -1484,9 +1565,43 @@ TEST_F(LibYUVPlanarTest, TestCopyPlane) { EXPECT_EQ(0, err); } -TEST_F(LibYUVPlanarTest, TestCopyPlaneZeroDimensionRegressionTest) { - // Regression test to verify copying a rect with a zero height or width does - // not lead to memory corruption. +TEST_F(LibYUVPlanarTest, CopyPlane_Opt) { + int i; + int y_plane_size = benchmark_width_ * benchmark_height_; + align_buffer_page_end(orig_y, y_plane_size); + align_buffer_page_end(dst_c, y_plane_size); + align_buffer_page_end(dst_opt, y_plane_size); + + MemRandomize(orig_y, y_plane_size); + memset(dst_c, 1, y_plane_size); + memset(dst_opt, 2, y_plane_size); + + // Disable all optimizations. + MaskCpuFlags(disable_cpu_flags_); + for (i = 0; i < benchmark_iterations_; i++) { + CopyPlane(orig_y, benchmark_width_, dst_c, benchmark_width_, + benchmark_width_, benchmark_height_); + } + + // Enable optimizations. + MaskCpuFlags(benchmark_cpu_info_); + for (i = 0; i < benchmark_iterations_; i++) { + CopyPlane(orig_y, benchmark_width_, dst_opt, benchmark_width_, + benchmark_width_, benchmark_height_); + } + + for (i = 0; i < y_plane_size; ++i) { + EXPECT_EQ(dst_c[i], dst_opt[i]); + } + + free_aligned_buffer_page_end(orig_y); + free_aligned_buffer_page_end(dst_c); + free_aligned_buffer_page_end(dst_opt); +} + +TEST_F(LibYUVPlanarTest, TestCopyPlaneZero) { + // Test to verify copying a rect with a zero height or width does + // not touch destination memory. uint8_t src = 42; uint8_t dst = 0; @@ -3509,8 +3624,8 @@ TEST_F(LibYUVPlanarTest, YUY2ToY) { memset(dst_pixels_y_c, 1, kPixels); MaskCpuFlags(disable_cpu_flags_); - YUY2ToY(src_pixels_y, benchmark_width_ * 2, dst_pixels_y_c, - benchmark_width_, benchmark_width_, benchmark_height_); + YUY2ToY(src_pixels_y, benchmark_width_ * 2, dst_pixels_y_c, benchmark_width_, + benchmark_width_, benchmark_height_); MaskCpuFlags(benchmark_cpu_info_); for (int i = 0; i < benchmark_iterations_; ++i) { @@ -3538,8 +3653,8 @@ TEST_F(LibYUVPlanarTest, UYVYToY) { memset(dst_pixels_y_c, 1, kPixels); MaskCpuFlags(disable_cpu_flags_); - UYVYToY(src_pixels_y, benchmark_width_ * 2, dst_pixels_y_c, - benchmark_width_, benchmark_width_, benchmark_height_); + UYVYToY(src_pixels_y, benchmark_width_ * 2, dst_pixels_y_c, benchmark_width_, + benchmark_width_, benchmark_height_); MaskCpuFlags(benchmark_cpu_info_); for (int i = 0; i < benchmark_iterations_; ++i) { diff --git a/unit_test/scale_test.cc b/unit_test/scale_test.cc index 1fb3b2f0a..81c839f41 100644 --- a/unit_test/scale_test.cc +++ b/unit_test/scale_test.cc @@ -1545,4 +1545,57 @@ TEST_F(LibYUVScaleTest, PlaneTestRotate_Box) { free_aligned_buffer_page_end(orig_pixels); } +TEST_F(LibYUVScaleTest, PlaneTest1_Box) { + align_buffer_page_end(orig_pixels, 3); + align_buffer_page_end(dst_pixels, 3); + + // Pad the 1x1 byte image with invalid values before and after in case libyuv + // reads outside the memory boundaries. + orig_pixels[0] = 0; + orig_pixels[1] = 1; // scale this pixel + orig_pixels[2] = 2; + dst_pixels[0] = 3; + dst_pixels[1] = 3; + dst_pixels[2] = 3; + + libyuv::ScalePlane(orig_pixels + 1, /* src_stride= */ 1, /* src_width= */ 1, + /* src_height= */ 1, dst_pixels, /* dst_stride= */ 1, + /* dst_width= */ 1, /* dst_height= */ 2, + libyuv::kFilterBox); + + EXPECT_EQ(dst_pixels[0], 1); + EXPECT_EQ(dst_pixels[1], 1); + EXPECT_EQ(dst_pixels[2], 3); + + free_aligned_buffer_page_end(dst_pixels); + free_aligned_buffer_page_end(orig_pixels); +} + +TEST_F(LibYUVScaleTest, PlaneTest1_16_Box) { + align_buffer_page_end(orig_pixels_alloc, 3 * 2); + align_buffer_page_end(dst_pixels_alloc, 3 * 2); + uint16_t* orig_pixels = (uint16_t*)orig_pixels_alloc; + uint16_t* dst_pixels = (uint16_t*)dst_pixels_alloc; + + // Pad the 1x1 byte image with invalid values before and after in case libyuv + // reads outside the memory boundaries. + orig_pixels[0] = 0; + orig_pixels[1] = 1; // scale this pixel + orig_pixels[2] = 2; + dst_pixels[0] = 3; + dst_pixels[1] = 3; + dst_pixels[2] = 3; + + libyuv::ScalePlane_16( + orig_pixels + 1, /* src_stride= */ 1, /* src_width= */ 1, + /* src_height= */ 1, dst_pixels, /* dst_stride= */ 1, + /* dst_width= */ 1, /* dst_height= */ 2, libyuv::kFilterNone); + + EXPECT_EQ(dst_pixels[0], 1); + EXPECT_EQ(dst_pixels[1], 1); + EXPECT_EQ(dst_pixels[2], 3); + + free_aligned_buffer_page_end(dst_pixels_alloc); + free_aligned_buffer_page_end(orig_pixels_alloc); +} } // namespace libyuv