From d011314f14738e0751dcb269c1d989c4dcbaad7b Mon Sep 17 00:00:00 2001 From: Frank Barchard Date: Tue, 7 Jun 2022 09:06:38 +0000 Subject: [PATCH] Revert "I210ToI420, InterpolatePlane_16, and ScalePlane Vertical-only asan fix" This reverts commit 60254a1d846a93a4d7559009004cdd91bcc04d82. Reason for revert: breaks PaintCanvasVideoRendererTest.HighBitDepth Original change's description: > I210ToI420, InterpolatePlane_16, and ScalePlane Vertical-only asan fix > > - Add I210ToI420 to convert 10 bit 4:2:2 YUV to 4:2:0 8 bit > - Add NEON InterpolateRow_16 for fast 10 bit scaling > - When scaling up, set step to interpolate toward height - 1 to avoid buffer overread > - When scaling down, center the 2 rows used for source to achieve filtering. > - CopyPlane check for 0 size and return > > Bug: libyuv:931, b/228605787, b/233233302, b/233634772, b/234558395, b/234340482 > Change-Id: I63e8580710a57812b683c2fe40583ac5a179c4f1 > Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/3687552 > Reviewed-by: Mirko Bonadei > Reviewed-by: richard winterton Bug: libyuv:931, b/228605787, b/233233302, b/233634772, b/234558395, b/234340482 Change-Id: Icc05bb340db0e7fe864061fb501d0a861c764116 No-Presubmit: true No-Tree-Checks: true No-Try: true Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/3692886 Reviewed-by: Frank Barchard Commit-Queue: Mirko Bonadei Reviewed-by: Mirko Bonadei --- BUILD.gn | 4 - README.chromium | 2 +- include/libyuv/convert.h | 17 ---- include/libyuv/planar_functions.h | 15 --- include/libyuv/row.h | 52 ++++------ include/libyuv/scale_row.h | 14 --- include/libyuv/version.h | 2 +- source/convert.cc | 54 +--------- source/cpu_id.cc | 2 +- source/planar_functions.cc | 159 +++++++----------------------- source/row_any.cc | 43 ++++---- source/row_common.cc | 58 +---------- source/row_neon.cc | 66 +------------ source/row_neon64.cc | 65 ------------ source/scale.cc | 47 +++------ source/scale_common.cc | 69 +++---------- unit_test/convert_test.cc | 10 +- unit_test/planar_test.cc | 129 ++---------------------- unit_test/scale_test.cc | 53 ---------- 19 files changed, 115 insertions(+), 746 deletions(-) diff --git a/BUILD.gn b/BUILD.gn index a72ff0655..2c9c3e6e4 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -27,10 +27,6 @@ config("libyuv_config") { if (is_android && current_cpu != "arm64") { ldflags = [ "-Wl,--dynamic-linker,/system/bin/linker" ] } - - if (!libyuv_use_neon) { - defines = [ "LIBYUV_DISABLE_NEON" ] - } } # This target is built when no specific target is specified on the command line. diff --git a/README.chromium b/README.chromium index 8461a1218..b6d85b739 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1828 +Version: 1824 License: BSD License File: LICENSE diff --git a/include/libyuv/convert.h b/include/libyuv/convert.h index 46d371593..5c2954f4c 100644 --- a/include/libyuv/convert.h +++ b/include/libyuv/convert.h @@ -238,23 +238,6 @@ int I010ToI420(const uint16_t* src_y, int width, int height); -#define H210ToH420 I210ToI420 -LIBYUV_API -int I210ToI420(const uint16_t* src_y, - int src_stride_y, - const uint16_t* src_u, - int src_stride_u, - const uint16_t* src_v, - int src_stride_v, - uint8_t* dst_y, - int dst_stride_y, - uint8_t* dst_u, - int dst_stride_u, - uint8_t* dst_v, - int dst_stride_v, - int width, - int height); - #define H210ToH422 I210ToI422 LIBYUV_API int I210ToI422(const uint16_t* src_y, diff --git a/include/libyuv/planar_functions.h b/include/libyuv/planar_functions.h index 1ef2256bf..15c7d457d 100644 --- a/include/libyuv/planar_functions.h +++ b/include/libyuv/planar_functions.h @@ -991,21 +991,6 @@ int InterpolatePlane(const uint8_t* src0, int height, int interpolation); -// Interpolate between two images using specified amount of interpolation -// (0 to 255) and store to destination. -// 'interpolation' is specified as 8 bit fraction where 0 means 100% src0 -// and 255 means 1% src0 and 99% src1. -LIBYUV_API -int InterpolatePlane_16(const uint16_t* src0, - int src_stride0, // measured in 16 bit pixels - const uint16_t* src1, - int src_stride1, - uint16_t* dst, - int dst_stride, - int width, - int height, - int interpolation); - // Interpolate between two ARGB images using specified amount of interpolation // Internally calls InterpolatePlane with width * 4 (bpp). LIBYUV_API diff --git a/include/libyuv/row.h b/include/libyuv/row.h index f15b58fab..e51155c08 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -111,7 +111,6 @@ extern "C" { #define HAS_I422TOUYVYROW_SSE2 #define HAS_I422TOYUY2ROW_SSE2 #define HAS_I444TOARGBROW_SSSE3 -#define HAS_INTERPOLATEROW_SSSE3 #define HAS_J400TOARGBROW_SSE2 #define HAS_J422TOARGBROW_SSSE3 #define HAS_MERGEUVROW_SSE2 @@ -124,10 +123,10 @@ extern "C" { #define HAS_NV21TORGB24ROW_SSSE3 #define HAS_RAWTOARGBROW_SSSE3 #define HAS_RAWTORGB24ROW_SSSE3 -#define HAS_RAWTOYROW_SSSE3 #define HAS_RGB24TOARGBROW_SSSE3 -#define HAS_RGB24TOYROW_SSSE3 #define HAS_RGB565TOARGBROW_SSE2 +#define HAS_RAWTOYROW_SSSE3 +#define HAS_RGB24TOYROW_SSSE3 #define HAS_RGBATOYROW_SSSE3 #if !defined(LIBYUV_BIT_EXACT) #define HAS_RGB24TOYJROW_SSSE3 @@ -170,6 +169,7 @@ extern "C" { #define HAS_BLENDPLANEROW_SSSE3 #define HAS_COMPUTECUMULATIVESUMROW_SSE2 #define HAS_CUMULATIVESUMTOAVERAGEROW_SSE2 +#define HAS_INTERPOLATEROW_SSSE3 #define HAS_RGBCOLORTABLEROW_X86 #define HAS_SOBELROW_SSE2 #define HAS_SOBELTOPLANEROW_SSE2 @@ -410,10 +410,8 @@ extern "C" { // The following are available on Neon platforms: #if !defined(LIBYUV_DISABLE_NEON) && \ (defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON)) -#define HAS_AB64TOARGBROW_NEON #define HAS_ABGRTOUVROW_NEON #define HAS_ABGRTOYROW_NEON -#define HAS_AR64TOARGBROW_NEON #define HAS_ARGB1555TOARGBROW_NEON #define HAS_ARGB1555TOUVROW_NEON #define HAS_ARGB1555TOYROW_NEON @@ -422,14 +420,16 @@ extern "C" { #define HAS_ARGB4444TOYROW_NEON #define HAS_ARGBEXTRACTALPHAROW_NEON #define HAS_ARGBSETROW_NEON -#define HAS_ARGBTOAB64ROW_NEON -#define HAS_ARGBTOAR64ROW_NEON #define HAS_ARGBTOARGB1555ROW_NEON #define HAS_ARGBTOARGB4444ROW_NEON #define HAS_ARGBTORAWROW_NEON #define HAS_ARGBTORGB24ROW_NEON #define HAS_ARGBTORGB565DITHERROW_NEON #define HAS_ARGBTORGB565ROW_NEON +#define HAS_ARGBTOAR64ROW_NEON +#define HAS_ARGBTOAB64ROW_NEON +#define HAS_AR64TOARGBROW_NEON +#define HAS_AB64TOARGBROW_NEON #define HAS_ARGBTOUV444ROW_NEON #define HAS_ARGBTOUVJROW_NEON #define HAS_ARGBTOUVROW_NEON @@ -449,6 +449,7 @@ extern "C" { #define HAS_HALFFLOATROW_NEON #define HAS_HALFMERGEUVROW_NEON #define HAS_I400TOARGBROW_NEON +#define HAS_I444ALPHATOARGBROW_NEON #define HAS_I422ALPHATOARGBROW_NEON #define HAS_I422TOARGB1555ROW_NEON #define HAS_I422TOARGB4444ROW_NEON @@ -458,23 +459,20 @@ extern "C" { #define HAS_I422TORGBAROW_NEON #define HAS_I422TOUYVYROW_NEON #define HAS_I422TOYUY2ROW_NEON -#define HAS_I444ALPHATOARGBROW_NEON #define HAS_I444TOARGBROW_NEON -#define HAS_INTERPOLATEROW_16_NEON -#define HAS_INTERPOLATEROW_NEON #define HAS_J400TOARGBROW_NEON #define HAS_MERGEAR64ROW_NEON #define HAS_MERGEARGB16TO8ROW_NEON #define HAS_MERGEARGBROW_NEON -#define HAS_MERGEUVROW_16_NEON -#define HAS_MERGEUVROW_NEON #define HAS_MERGEXR30ROW_NEON #define HAS_MERGEXR64ROW_NEON #define HAS_MERGEXRGB16TO8ROW_NEON #define HAS_MERGEXRGBROW_NEON +#define HAS_MERGEUVROW_NEON +#define HAS_MERGEUVROW_16_NEON #define HAS_MIRRORROW_NEON -#define HAS_MIRRORSPLITUVROW_NEON #define HAS_MIRRORUVROW_NEON +#define HAS_MIRRORSPLITUVROW_NEON #define HAS_MULTIPLYROW_16_NEON #define HAS_NV12TOARGBROW_NEON #define HAS_NV12TORGB24ROW_NEON @@ -485,13 +483,13 @@ extern "C" { #define HAS_RAWTOARGBROW_NEON #define HAS_RAWTORGB24ROW_NEON #define HAS_RAWTORGBAROW_NEON -#define HAS_RAWTOUVJROW_NEON #define HAS_RAWTOUVROW_NEON +#define HAS_RAWTOUVJROW_NEON #define HAS_RAWTOYJROW_NEON #define HAS_RAWTOYROW_NEON #define HAS_RGB24TOARGBROW_NEON -#define HAS_RGB24TOUVJROW_NEON #define HAS_RGB24TOUVROW_NEON +#define HAS_RGB24TOUVJROW_NEON #define HAS_RGB24TOYJROW_NEON #define HAS_RGB24TOYROW_NEON #define HAS_RGB565TOARGBROW_NEON @@ -502,10 +500,10 @@ extern "C" { #define HAS_RGBATOYROW_NEON #define HAS_SETROW_NEON #define HAS_SPLITARGBROW_NEON -#define HAS_SPLITRGBROW_NEON -#define HAS_SPLITUVROW_16_NEON -#define HAS_SPLITUVROW_NEON #define HAS_SPLITXRGBROW_NEON +#define HAS_SPLITRGBROW_NEON +#define HAS_SPLITUVROW_NEON +#define HAS_SPLITUVROW_16_NEON #define HAS_SWAPUVROW_NEON #define HAS_UYVYTOARGBROW_NEON #define HAS_UYVYTOUV422ROW_NEON @@ -530,6 +528,7 @@ extern "C" { #define HAS_ARGBSHADEROW_NEON #define HAS_ARGBSHUFFLEROW_NEON #define HAS_ARGBSUBTRACTROW_NEON +#define HAS_INTERPOLATEROW_NEON #define HAS_SOBELROW_NEON #define HAS_SOBELTOPLANEROW_NEON #define HAS_SOBELXROW_NEON @@ -5204,23 +5203,6 @@ void InterpolateRow_16_C(uint16_t* dst_ptr, ptrdiff_t src_stride, int width, int source_y_fraction); -void InterpolateRow_16_NEON(uint16_t* dst_ptr, - const uint16_t* src_ptr, - ptrdiff_t src_stride, - int width, - int source_y_fraction); -void InterpolateRow_16_Any_NEON(uint16_t* dst_ptr, - const uint16_t* src_ptr, - ptrdiff_t src_stride, - int width, - int source_y_fraction); - -void InterpolateRow_16To8_C(uint8_t* dst_ptr, - const uint16_t* src_ptr, - ptrdiff_t src_stride, - int scale, - int width, - int source_y_fraction); // Sobel images. void SobelXRow_C(const uint8_t* src_y0, diff --git a/include/libyuv/scale_row.h b/include/libyuv/scale_row.h index 5c474b0ce..cc1c90619 100644 --- a/include/libyuv/scale_row.h +++ b/include/libyuv/scale_row.h @@ -200,20 +200,6 @@ void ScalePlaneVertical_16(int src_height, int wpp, enum FilterMode filtering); -void ScalePlaneVertical_16To8(int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint16_t* src_argb, - uint8_t* dst_argb, - int x, - int y, - int dy, - int wpp, - int scale, - enum FilterMode filtering); - // Simplify the filtering based on scale factors. enum FilterMode ScaleFilterReduce(int src_width, int src_height, diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 8f09ccd2b..8afef0ed9 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1828 +#define LIBYUV_VERSION 1824 #endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/source/convert.cc b/source/convert.cc index 162546e5b..502f002d6 100644 --- a/source/convert.cc +++ b/source/convert.cc @@ -15,9 +15,8 @@ #include "libyuv/planar_functions.h" #include "libyuv/rotate.h" #include "libyuv/row.h" -#include "libyuv/scale.h" // For ScalePlane() -#include "libyuv/scale_row.h" // For FixedDiv -#include "libyuv/scale_uv.h" // For UVScale() +#include "libyuv/scale.h" // For ScalePlane() +#include "libyuv/scale_uv.h" // For UVScale() #ifdef __cplusplus namespace libyuv { @@ -221,55 +220,6 @@ int I010ToI420(const uint16_t* src_y, 1, 10); } -LIBYUV_API -int I210ToI420(const uint16_t* src_y, - int src_stride_y, - const uint16_t* src_u, - int src_stride_u, - const uint16_t* src_v, - int src_stride_v, - uint8_t* dst_y, - int dst_stride_y, - uint8_t* dst_u, - int dst_stride_u, - uint8_t* dst_v, - int dst_stride_v, - int width, - int height) { - const int depth = 10; - const int scale = 1 << (24 - depth); - - if (width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_y = src_y + (height - 1) * src_stride_y; - src_u = src_u + (height - 1) * src_stride_u; - src_v = src_v + (height - 1) * src_stride_v; - src_stride_y = -src_stride_y; - src_stride_u = -src_stride_u; - src_stride_v = -src_stride_v; - } - - { - const int uv_width = SUBSAMPLE(width, 1, 1); - const int uv_height = SUBSAMPLE(height, 1, 1); - const int dy = FixedDiv(height, uv_height); - - Convert16To8Plane(src_y, src_stride_y, dst_y, dst_stride_y, scale, width, - height); - ScalePlaneVertical_16To8(height, uv_width, uv_height, src_stride_u, - dst_stride_u, src_u, dst_u, 0, 32768, dy, - /*bpp=*/1, scale, kFilterBilinear); - ScalePlaneVertical_16To8(height, uv_width, uv_height, src_stride_v, - dst_stride_v, src_v, dst_v, 0, 32768, dy, - /*bpp=*/1, scale, kFilterBilinear); - } - return 0; -} - LIBYUV_API int I210ToI422(const uint16_t* src_y, int src_stride_y, diff --git a/source/cpu_id.cc b/source/cpu_id.cc index 56fe60e49..9fce8d204 100644 --- a/source/cpu_id.cc +++ b/source/cpu_id.cc @@ -20,7 +20,7 @@ #endif // For ArmCpuCaps() but unittested on all platforms -#include // For fopen() +#include #include #ifdef __cplusplus diff --git a/source/planar_functions.cc b/source/planar_functions.cc index b5344862d..141db6efb 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -35,7 +35,7 @@ void CopyPlane(const uint8_t* src_y, int height) { int y; void (*CopyRow)(const uint8_t* src, uint8_t* dst, int width) = CopyRow_C; - if (width <= 0 || height == 0) { + if (width == 0 || height == 0) { return; } // Negative height means invert the image. @@ -84,6 +84,8 @@ void CopyPlane(const uint8_t* src_y, } } +// TODO(fbarchard): Consider support for negative height. +// TODO(fbarchard): Consider stride measured in bytes. LIBYUV_API void CopyPlane_16(const uint16_t* src_y, int src_stride_y, @@ -91,8 +93,36 @@ void CopyPlane_16(const uint16_t* src_y, int dst_stride_y, int width, int height) { - CopyPlane((const uint8_t*)src_y, src_stride_y * 2, (uint8_t*)dst_y, - dst_stride_y * 2, width * 2, height); + int y; + void (*CopyRow)(const uint16_t* src, uint16_t* dst, int width) = CopyRow_16_C; + // Coalesce rows. + if (src_stride_y == width && dst_stride_y == width) { + width *= height; + height = 1; + src_stride_y = dst_stride_y = 0; + } +#if defined(HAS_COPYROW_16_SSE2) + if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32)) { + CopyRow = CopyRow_16_SSE2; + } +#endif +#if defined(HAS_COPYROW_16_ERMS) + if (TestCpuFlag(kCpuHasERMS)) { + CopyRow = CopyRow_16_ERMS; + } +#endif +#if defined(HAS_COPYROW_16_NEON) + if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) { + CopyRow = CopyRow_16_NEON; + } +#endif + + // Copy plane + for (y = 0; y < height; ++y) { + CopyRow(src_y, dst_y, width); + src_y += src_stride_y; + dst_y += dst_stride_y; + } } // Convert a plane of 16 bit data to 8 bit @@ -108,9 +138,6 @@ void Convert16To8Plane(const uint16_t* src_y, void (*Convert16To8Row)(const uint16_t* src_y, uint8_t* dst_y, int scale, int width) = Convert16To8Row_C; - if (width <= 0 || height == 0) { - return; - } // Negative height means invert the image. if (height < 0) { height = -height; @@ -169,9 +196,6 @@ void Convert8To16Plane(const uint8_t* src_y, void (*Convert8To16Row)(const uint8_t* src_y, uint16_t* dst_y, int scale, int width) = Convert8To16Row_C; - if (width <= 0 || height == 0) { - return; - } // Negative height means invert the image. if (height < 0) { height = -height; @@ -446,9 +470,6 @@ void SplitUVPlane(const uint8_t* src_uv, int y; void (*SplitUVRow)(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, int width) = SplitUVRow_C; - if (width <= 0 || height == 0) { - return; - } // Negative height means invert the image. if (height < 0) { height = -height; @@ -526,9 +547,6 @@ void MergeUVPlane(const uint8_t* src_u, int y; void (*MergeUVRow)(const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uv, int width) = MergeUVRow_C; - if (width <= 0 || height == 0) { - return; - } // Negative height means invert the image. if (height < 0) { height = -height; @@ -608,9 +626,6 @@ void SplitUVPlane_16(const uint16_t* src_uv, void (*SplitUVRow_16)(const uint16_t* src_uv, uint16_t* dst_u, uint16_t* dst_v, int depth, int width) = SplitUVRow_16_C; - if (width <= 0 || height == 0) { - return; - } // Negative height means invert the image. if (height < 0) { height = -height; @@ -668,9 +683,6 @@ void MergeUVPlane_16(const uint16_t* src_u, MergeUVRow_16_C; assert(depth >= 8); assert(depth <= 16); - if (width <= 0 || height == 0) { - return; - } // Negative height means invert the image. if (height < 0) { height = -height; @@ -723,9 +735,6 @@ void ConvertToMSBPlane_16(const uint16_t* src_y, int scale = 1 << (16 - depth); void (*MultiplyRow_16)(const uint16_t* src_y, uint16_t* dst_y, int scale, int width) = MultiplyRow_16_C; - if (width <= 0 || height == 0) { - return; - } // Negative height means invert the image. if (height < 0) { height = -height; @@ -776,9 +785,6 @@ void ConvertToLSBPlane_16(const uint16_t* src_y, int scale = 1 << depth; void (*DivideRow)(const uint16_t* src_y, uint16_t* dst_y, int scale, int width) = DivideRow_16_C; - if (width <= 0 || height == 0) { - return; - } // Negative height means invert the image. if (height < 0) { height = -height; @@ -827,9 +833,6 @@ void SwapUVPlane(const uint8_t* src_uv, int y; void (*SwapUVRow)(const uint8_t* src_uv, uint8_t* dst_vu, int width) = SwapUVRow_C; - if (width <= 0 || height == 0) { - return; - } // Negative height means invert the image. if (height < 0) { height = -height; @@ -933,9 +936,6 @@ void DetilePlane(const uint8_t* src_y, assert(tile_height > 0); assert(src_stride_y > 0); - if (width <= 0 || height == 0) { - return; - } // Negative height means invert the image. if (height < 0) { height = -height; @@ -991,9 +991,6 @@ void DetileSplitUVPlane(const uint8_t* src_uv, assert(tile_height > 0); assert(src_stride_uv > 0); - if (width <= 0 || height == 0) { - return; - } // Negative height means invert the image. if (height < 0) { height = -height; @@ -1049,9 +1046,6 @@ void SplitRGBPlane(const uint8_t* src_rgb, int y; void (*SplitRGBRow)(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g, uint8_t* dst_b, int width) = SplitRGBRow_C; - if (width <= 0 || height == 0) { - return; - } // Negative height means invert the image. if (height < 0) { height = -height; @@ -1111,9 +1105,6 @@ void MergeRGBPlane(const uint8_t* src_r, void (*MergeRGBRow)(const uint8_t* src_r, const uint8_t* src_g, const uint8_t* src_b, uint8_t* dst_rgb, int width) = MergeRGBRow_C; - if (width <= 0 || height == 0) { - return; - } // Coalesce rows. // Negative height means invert the image. if (height < 0) { @@ -3068,10 +3059,6 @@ void SetPlane(uint8_t* dst_y, uint32_t value) { int y; void (*SetRow)(uint8_t * dst, uint8_t value, int width) = SetRow_C; - - if (width <= 0 || height == 0) { - return; - } if (height < 0) { height = -height; dst_y = dst_y + (height - 1) * dst_stride_y; @@ -4018,86 +4005,6 @@ int InterpolatePlane(const uint8_t* src0, return 0; } -// Interpolate 2 planes by specified amount (0 to 255). -LIBYUV_API -int InterpolatePlane_16(const uint16_t* src0, - int src_stride0, - const uint16_t* src1, - int src_stride1, - uint16_t* dst, - int dst_stride, - int width, - int height, - int interpolation) { - int y; - void (*InterpolateRow_16)(uint16_t * dst_ptr, const uint16_t* src_ptr, - ptrdiff_t src_stride, int dst_width, - int source_y_fraction) = InterpolateRow_16_C; - if (!src0 || !src1 || !dst || width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - dst = dst + (height - 1) * dst_stride; - dst_stride = -dst_stride; - } - // Coalesce rows. - if (src_stride0 == width && src_stride1 == width && dst_stride == width) { - width *= height; - height = 1; - src_stride0 = src_stride1 = dst_stride = 0; - } -#if defined(HAS_INTERPOLATEROW_16_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - InterpolateRow_16 = InterpolateRow_16_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - InterpolateRow_16 = InterpolateRow_16_SSSE3; - } - } -#endif -#if defined(HAS_INTERPOLATEROW_16_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - InterpolateRow_16 = InterpolateRow_16_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - InterpolateRow_16 = InterpolateRow_16_AVX2; - } - } -#endif -#if defined(HAS_INTERPOLATEROW_16_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - InterpolateRow_16 = InterpolateRow_16_Any_NEON; - if (IS_ALIGNED(width, 8)) { - InterpolateRow_16 = InterpolateRow_16_NEON; - } - } -#endif -#if defined(HAS_INTERPOLATEROW_16_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - InterpolateRow_16 = InterpolateRow_16_Any_MSA; - if (IS_ALIGNED(width, 32)) { - InterpolateRow_16 = InterpolateRow_16_MSA; - } - } -#endif -#if defined(HAS_INTERPOLATEROW_16_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - InterpolateRow_16 = InterpolateRow_16_Any_LSX; - if (IS_ALIGNED(width, 32)) { - InterpolateRow_16 = InterpolateRow_16_LSX; - } - } -#endif - - for (y = 0; y < height; ++y) { - InterpolateRow_16(dst, src0, src1 - src0, width, interpolation); - src0 += src_stride0; - src1 += src_stride1; - dst += dst_stride; - } - return 0; -} - // Interpolate 2 ARGB images by specified amount (0 to 255). LIBYUV_API int ARGBInterpolate(const uint8_t* src_argb0, diff --git a/source/row_any.cc b/source/row_any.cc index 3e95b2df4..089e518af 100644 --- a/source/row_any.cc +++ b/source/row_any.cc @@ -1625,42 +1625,37 @@ ANY11C(UYVYToARGBRow_Any_LSX, UYVYToARGBRow_LSX, 1, 4, 4, 7) #undef ANY11C // Any 1 to 1 interpolate. Takes 2 rows of source via stride. -#define ANY11I(NAMEANY, ANY_SIMD, T, SBPP, BPP, MASK) \ - void NAMEANY(T* dst_ptr, const T* src_ptr, ptrdiff_t src_stride, int width, \ - int source_y_fraction) { \ - SIMD_ALIGNED(T temp[64 * 3]); \ - memset(temp, 0, 64 * 2 * sizeof(T)); /* for msan */ \ - int r = width & MASK; \ - int n = width & ~MASK; \ - if (n > 0) { \ - ANY_SIMD(dst_ptr, src_ptr, src_stride, n, source_y_fraction); \ - } \ - memcpy(temp, src_ptr + n * SBPP, r * SBPP * sizeof(T)); \ - memcpy(temp + 64, src_ptr + src_stride + n * SBPP, r * SBPP * sizeof(T)); \ - ANY_SIMD(temp + 128, temp, 64, MASK + 1, source_y_fraction); \ - memcpy(dst_ptr + n * BPP, temp + 128, r * BPP * sizeof(T)); \ +#define ANY11I(NAMEANY, ANY_SIMD, SBPP, BPP, MASK) \ + void NAMEANY(uint8_t* dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride, \ + int width, int source_y_fraction) { \ + SIMD_ALIGNED(uint8_t temp[64 * 3]); \ + memset(temp, 0, 64 * 2); /* for msan */ \ + int r = width & MASK; \ + int n = width & ~MASK; \ + if (n > 0) { \ + ANY_SIMD(dst_ptr, src_ptr, src_stride, n, source_y_fraction); \ + } \ + memcpy(temp, src_ptr + n * SBPP, r * SBPP); \ + memcpy(temp + 64, src_ptr + src_stride + n * SBPP, r * SBPP); \ + ANY_SIMD(temp + 128, temp, 64, MASK + 1, source_y_fraction); \ + memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \ } #ifdef HAS_INTERPOLATEROW_AVX2 -ANY11I(InterpolateRow_Any_AVX2, InterpolateRow_AVX2, uint8_t, 1, 1, 31) +ANY11I(InterpolateRow_Any_AVX2, InterpolateRow_AVX2, 1, 1, 31) #endif #ifdef HAS_INTERPOLATEROW_SSSE3 -ANY11I(InterpolateRow_Any_SSSE3, InterpolateRow_SSSE3, uint8_t, 1, 1, 15) +ANY11I(InterpolateRow_Any_SSSE3, InterpolateRow_SSSE3, 1, 1, 15) #endif #ifdef HAS_INTERPOLATEROW_NEON -ANY11I(InterpolateRow_Any_NEON, InterpolateRow_NEON, uint8_t, 1, 1, 15) +ANY11I(InterpolateRow_Any_NEON, InterpolateRow_NEON, 1, 1, 15) #endif #ifdef HAS_INTERPOLATEROW_MSA -ANY11I(InterpolateRow_Any_MSA, InterpolateRow_MSA, uint8_t, 1, 1, 31) +ANY11I(InterpolateRow_Any_MSA, InterpolateRow_MSA, 1, 1, 31) #endif #ifdef HAS_INTERPOLATEROW_LSX -ANY11I(InterpolateRow_Any_LSX, InterpolateRow_LSX, uint8_t, 1, 1, 31) +ANY11I(InterpolateRow_Any_LSX, InterpolateRow_LSX, 1, 1, 31) #endif - -#ifdef HAS_INTERPOLATEROW_16_NEON -ANY11I(InterpolateRow_16_Any_NEON, InterpolateRow_16_NEON, uint16_t, 1, 1, 7) -#endif - #undef ANY11I // Any 1 to 1 mirror. diff --git a/source/row_common.cc b/source/row_common.cc index 3bfc56180..8cf826ec5 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -11,6 +11,7 @@ #include "libyuv/row.h" #include +#include #include // For memcpy and memset. #include "libyuv/basic_types.h" @@ -3401,18 +3402,6 @@ static void HalfRow_16_C(const uint16_t* src_uv, } } -static void HalfRow_16To8_C(const uint16_t* src_uv, - ptrdiff_t src_uv_stride, - uint8_t* dst_uv, - int scale, - int width) { - int x; - for (x = 0; x < width; ++x) { - dst_uv[x] = clamp255( - (((src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1) * scale) >> 16); - } -} - // C version 2x2 -> 2x1. void InterpolateRow_C(uint8_t* dst_ptr, const uint8_t* src_ptr, @@ -3446,51 +3435,6 @@ void InterpolateRow_C(uint8_t* dst_ptr, } } -// C version 2x2 16 bit-> 2x1 8 bit. -// Use scale to convert lsb formats to msb, depending how many bits there are: -// 32768 = 9 bits -// 16384 = 10 bits -// 4096 = 12 bits -// 256 = 16 bits -void InterpolateRow_16To8_C(uint8_t* dst_ptr, - const uint16_t* src_ptr, - ptrdiff_t src_stride, - int scale, - int width, - int source_y_fraction) { - int y1_fraction = source_y_fraction; - int y0_fraction = 256 - y1_fraction; - const uint16_t* src_ptr1 = src_ptr + src_stride; - int x; - if (source_y_fraction == 0) { - Convert16To8Row_C(src_ptr, dst_ptr, scale, width); - return; - } - if (source_y_fraction == 128) { - HalfRow_16To8_C(src_ptr, src_stride, dst_ptr, scale, width); - return; - } - for (x = 0; x < width - 1; x += 2) { - dst_ptr[0] = clamp255( - (((src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8) * - scale) >> - 16); - dst_ptr[1] = clamp255( - (((src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8) * - scale) >> - 16); - src_ptr += 2; - src_ptr1 += 2; - dst_ptr += 2; - } - if (width & 1) { - dst_ptr[0] = clamp255( - (((src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8) * - scale) >> - 16); - } -} - void InterpolateRow_16_C(uint16_t* dst_ptr, const uint16_t* src_ptr, ptrdiff_t src_stride, diff --git a/source/row_neon.cc b/source/row_neon.cc index 297ccceb6..8ba71d07e 100644 --- a/source/row_neon.cc +++ b/source/row_neon.cc @@ -10,6 +10,8 @@ #include "libyuv/row.h" +#include + #ifdef __cplusplus namespace libyuv { extern "C" { @@ -19,8 +21,6 @@ extern "C" { #if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) && \ !defined(__aarch64__) -// d8-d15, r4-r11,r14(lr) need to be preserved if used. r13(sp),r15(pc) are reserved. - // q0: Y uint16x8_t // d2: U uint8x8_t // d3: V uint8x8_t @@ -2715,66 +2715,6 @@ void InterpolateRow_NEON(uint8_t* dst_ptr, : "cc", "memory", "q0", "q1", "d4", "d5", "q13", "q14"); } -// Bilinear filter 8x2 -> 8x1 -void InterpolateRow_16_NEON(uint16_t* dst_ptr, - const uint16_t* src_ptr, - ptrdiff_t src_stride, - int dst_width, - int source_y_fraction) { - int y1_fraction = source_y_fraction; - int y0_fraction = 256 - y1_fraction; - const uint16_t* src_ptr1 = src_ptr + src_stride; - - asm volatile( - "cmp %4, #0 \n" - "beq 100f \n" - "cmp %4, #128 \n" - "beq 50f \n" - - "vdup.16 d17, %4 \n" - "vdup.16 d16, %5 \n" - // General purpose row blend. - "1: \n" - "vld1.16 {q0}, [%1]! \n" - "vld1.16 {q1}, [%2]! \n" - "subs %3, %3, #8 \n" - "vmull.u16 q2, d0, d16 \n" - "vmull.u16 q3, d1, d16 \n" - "vmlal.u16 q2, d2, d17 \n" - "vmlal.u16 q3, d3, d17 \n" - "vrshrn.u32 d0, q2, #8 \n" - "vrshrn.u32 d1, q3, #8 \n" - "vst1.16 {q0}, [%0]! \n" - "bgt 1b \n" - "b 99f \n" - - // Blend 50 / 50. - "50: \n" - "vld1.16 {q0}, [%1]! \n" - "vld1.16 {q1}, [%2]! \n" - "subs %3, %3, #8 \n" - "vrhadd.u16 q0, q1 \n" - "vst1.16 {q0}, [%0]! \n" - "bgt 50b \n" - "b 99f \n" - - // Blend 100 / 0 - Copy row unchanged. - "100: \n" - "vld1.16 {q0}, [%1]! \n" - "subs %3, %3, #8 \n" - "vst1.16 {q0}, [%0]! \n" - "bgt 100b \n" - - "99: \n" - : "+r"(dst_ptr), // %0 - "+r"(src_ptr), // %1 - "+r"(src_ptr1), // %2 - "+r"(dst_width) // %3 - : "r"(y1_fraction), // %4 - "r"(y0_fraction) // %5 - : "cc", "memory", "q0", "q1", "q2", "q3", "q8"); -} - // dr * (256 - sa) / 256 + sr = dr - dr * sa / 256 + sr void ARGBBlendRow_NEON(const uint8_t* src_argb, const uint8_t* src_argb1, @@ -3726,7 +3666,7 @@ void Convert16To8Row_NEON(const uint16_t* src_y, "vqdmulh.s16 q1, q1, q2 \n" "vqshrn.u16 d0, q0, #1 \n" "vqshrn.u16 d1, q1, #1 \n" - "vst1.8 {q0}, [%1]! \n" + "vst1.16 {q0}, [%1]! \n" "subs %3, %3, #16 \n" // 16 src pixels per loop "bgt 1b \n" : "+r"(src_y), // %0 diff --git a/source/row_neon64.cc b/source/row_neon64.cc index 6135014b7..8d43d5940 100644 --- a/source/row_neon64.cc +++ b/source/row_neon64.cc @@ -2966,71 +2966,6 @@ void InterpolateRow_NEON(uint8_t* dst_ptr, : "cc", "memory", "v0", "v1", "v3", "v4", "v5"); } -// Bilinear filter 8x2 -> 8x1 -void InterpolateRow_16_NEON(uint16_t* dst_ptr, - const uint16_t* src_ptr, - ptrdiff_t src_stride, - int dst_width, - int source_y_fraction) { - int y1_fraction = source_y_fraction; - int y0_fraction = 256 - y1_fraction; - const uint16_t* src_ptr1 = src_ptr + src_stride; - - asm volatile( - "cmp %w4, #0 \n" - "b.eq 100f \n" - "cmp %w4, #128 \n" - "b.eq 50f \n" - - "dup v5.8h, %w4 \n" - "dup v4.8h, %w5 \n" - // General purpose row blend. - "1: \n" - "ld1 {v0.8h}, [%1], #16 \n" - "ld1 {v1.8h}, [%2], #16 \n" - "subs %w3, %w3, #8 \n" - "umull v2.4s, v0.4h, v4.4h \n" - "prfm pldl1keep, [%1, 448] \n" - "umull2 v3.4s, v0.8h, v4.8h \n" - "prfm pldl1keep, [%2, 448] \n" - "umlal v2.4s, v1.4h, v5.4h \n" - "umlal2 v3.4s, v1.8h, v5.8h \n" - "rshrn v0.4h, v2.4s, #8 \n" - "rshrn2 v0.8h, v3.4s, #8 \n" - "st1 {v0.8h}, [%0], #16 \n" - "b.gt 1b \n" - "b 99f \n" - - // Blend 50 / 50. - "50: \n" - "ld1 {v0.8h}, [%1], #16 \n" - "ld1 {v1.8h}, [%2], #16 \n" - "subs %w3, %w3, #8 \n" - "prfm pldl1keep, [%1, 448] \n" - "urhadd v0.8h, v0.8h, v1.8h \n" - "prfm pldl1keep, [%2, 448] \n" - "st1 {v0.8h}, [%0], #16 \n" - "b.gt 50b \n" - "b 99f \n" - - // Blend 100 / 0 - Copy row unchanged. - "100: \n" - "ld1 {v0.8h}, [%1], #16 \n" - "subs %w3, %w3, #8 \n" - "prfm pldl1keep, [%1, 448] \n" - "st1 {v0.8h}, [%0], #16 \n" - "b.gt 100b \n" - - "99: \n" - : "+r"(dst_ptr), // %0 - "+r"(src_ptr), // %1 - "+r"(src_ptr1), // %2 - "+r"(dst_width) // %3 - : "r"(y1_fraction), // %4 - "r"(y0_fraction) // %5 - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5"); -} - // dr * (256 - sa) / 256 + sr = dr - dr * sa / 256 + sr void ARGBBlendRow_NEON(const uint8_t* src_argb, const uint8_t* src_argb1, diff --git a/source/scale.cc b/source/scale.cc index ac009310f..ad573ef6a 100644 --- a/source/scale.cc +++ b/source/scale.cc @@ -29,7 +29,6 @@ static __inline int Abs(int v) { } #define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s) -#define CENTERSTART(dx, s) (dx < 0) ? -((-dx >> 1) + s) : ((dx >> 1) + s) // Scale plane, 1/2 // This is an optimized version for scaling down a plane to 1/2 of @@ -1155,7 +1154,7 @@ void ScalePlaneBilinearDown_16(int src_width, #if defined(HAS_INTERPOLATEROW_16_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { - InterpolateRow = InterpolateRow_16_Any_SSE2; + InterpolateRow = InterpolateRow_Any_16_SSE2; if (IS_ALIGNED(src_width, 16)) { InterpolateRow = InterpolateRow_16_SSE2; } @@ -1163,7 +1162,7 @@ void ScalePlaneBilinearDown_16(int src_width, #endif #if defined(HAS_INTERPOLATEROW_16_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - InterpolateRow = InterpolateRow_16_Any_SSSE3; + InterpolateRow = InterpolateRow_Any_16_SSSE3; if (IS_ALIGNED(src_width, 16)) { InterpolateRow = InterpolateRow_16_SSSE3; } @@ -1171,7 +1170,7 @@ void ScalePlaneBilinearDown_16(int src_width, #endif #if defined(HAS_INTERPOLATEROW_16_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - InterpolateRow = InterpolateRow_16_Any_AVX2; + InterpolateRow = InterpolateRow_Any_16_AVX2; if (IS_ALIGNED(src_width, 32)) { InterpolateRow = InterpolateRow_16_AVX2; } @@ -1179,7 +1178,7 @@ void ScalePlaneBilinearDown_16(int src_width, #endif #if defined(HAS_INTERPOLATEROW_16_NEON) if (TestCpuFlag(kCpuHasNEON)) { - InterpolateRow = InterpolateRow_16_Any_NEON; + InterpolateRow = InterpolateRow_Any_16_NEON; if (IS_ALIGNED(src_width, 16)) { InterpolateRow = InterpolateRow_16_NEON; } @@ -1707,7 +1706,7 @@ void ScalePlaneBilinearUp_16(int src_width, #if defined(HAS_INTERPOLATEROW_16_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { - InterpolateRow = InterpolateRow_16_Any_SSE2; + InterpolateRow = InterpolateRow_Any_16_SSE2; if (IS_ALIGNED(dst_width, 16)) { InterpolateRow = InterpolateRow_16_SSE2; } @@ -1715,7 +1714,7 @@ void ScalePlaneBilinearUp_16(int src_width, #endif #if defined(HAS_INTERPOLATEROW_16_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - InterpolateRow = InterpolateRow_16_Any_SSSE3; + InterpolateRow = InterpolateRow_Any_16_SSSE3; if (IS_ALIGNED(dst_width, 16)) { InterpolateRow = InterpolateRow_16_SSSE3; } @@ -1723,7 +1722,7 @@ void ScalePlaneBilinearUp_16(int src_width, #endif #if defined(HAS_INTERPOLATEROW_16_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - InterpolateRow = InterpolateRow_16_Any_AVX2; + InterpolateRow = InterpolateRow_Any_16_AVX2; if (IS_ALIGNED(dst_width, 32)) { InterpolateRow = InterpolateRow_16_AVX2; } @@ -1731,7 +1730,7 @@ void ScalePlaneBilinearUp_16(int src_width, #endif #if defined(HAS_INTERPOLATEROW_16_NEON) if (TestCpuFlag(kCpuHasNEON)) { - InterpolateRow = InterpolateRow_16_Any_NEON; + InterpolateRow = InterpolateRow_Any_16_NEON; if (IS_ALIGNED(dst_width, 16)) { InterpolateRow = InterpolateRow_16_NEON; } @@ -1887,6 +1886,7 @@ static void ScalePlaneSimple_16(int src_width, // Scale a plane. // This function dispatches to a specialized scaler based on scale factor. + LIBYUV_API void ScalePlane(const uint8_t* src, int src_stride, @@ -1916,19 +1916,10 @@ void ScalePlane(const uint8_t* src, return; } if (dst_width == src_width && filtering != kFilterBox) { - int dy = 0; - int y = 0; - // When scaling down, use the center 2 rows to filter. - // When scaling up, last row of destination uses the last 2 source rows. - if (dst_height <= src_height) { - dy = FixedDiv(src_height, dst_height); - y = CENTERSTART(dy, -32768); // Subtract 0.5 (32768) to center filter. - } else if (src_height > 1 && dst_height > 1) { - dy = FixedDiv1(src_height, dst_height); - } + int dy = FixedDiv(src_height, dst_height); // Arbitrary scale vertically, but unscaled horizontally. ScalePlaneVertical(src_height, dst_width, dst_height, src_stride, - dst_stride, src, dst, 0, y, dy, /*bpp=*/1, filtering); + dst_stride, src, dst, 0, 0, dy, /*bpp=*/1, filtering); return; } if (dst_width <= Abs(src_width) && dst_height <= src_height) { @@ -2019,22 +2010,10 @@ void ScalePlane_16(const uint16_t* src, return; } if (dst_width == src_width && filtering != kFilterBox) { - int dy = 0; - int y = 0; - // When scaling down, use the center 2 rows to filter. - // When scaling up, last row of destination uses the last 2 source rows. - if (dst_height <= src_height) { - dy = FixedDiv(src_height, dst_height); - y = CENTERSTART(dy, -32768); // Subtract 0.5 (32768) to center filter. - // When scaling up, ensure the last row of destination uses the last - // source. Avoid divide by zero for dst_height but will do no scaling - // later. - } else if (src_height > 1 && dst_height > 1) { - dy = FixedDiv1(src_height, dst_height); - } + int dy = FixedDiv(src_height, dst_height); // Arbitrary scale vertically, but unscaled horizontally. ScalePlaneVertical_16(src_height, dst_width, dst_height, src_stride, - dst_stride, src, dst, 0, y, dy, /*bpp=*/1, filtering); + dst_stride, src, dst, 0, 0, dy, /*bpp=*/1, filtering); return; } if (dst_width <= Abs(src_width) && dst_height <= src_height) { diff --git a/source/scale_common.cc b/source/scale_common.cc index e0c821c27..d54ab9423 100644 --- a/source/scale_common.cc +++ b/source/scale_common.cc @@ -1533,7 +1533,6 @@ void ScalePlaneVertical(int src_height, y += dy; } } - void ScalePlaneVertical_16(int src_height, int dst_width, int dst_height, @@ -1544,7 +1543,7 @@ void ScalePlaneVertical_16(int src_height, int x, int y, int dy, - int wpp, /* words per pixel. normally 1 */ + int wpp, enum FilterMode filtering) { // TODO(fbarchard): Allow higher wpp. int dst_width_words = dst_width * wpp; @@ -1560,32 +1559,32 @@ void ScalePlaneVertical_16(int src_height, src_argb += (x >> 16) * wpp; #if defined(HAS_INTERPOLATEROW_16_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { - InterpolateRow = InterpolateRow_16_Any_SSE2; - if (IS_ALIGNED(dst_width_words, 16)) { + InterpolateRow = InterpolateRow_Any_16_SSE2; + if (IS_ALIGNED(dst_width_bytes, 16)) { InterpolateRow = InterpolateRow_16_SSE2; } } #endif #if defined(HAS_INTERPOLATEROW_16_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - InterpolateRow = InterpolateRow_16_Any_SSSE3; - if (IS_ALIGNED(dst_width_words, 16)) { + InterpolateRow = InterpolateRow_Any_16_SSSE3; + if (IS_ALIGNED(dst_width_bytes, 16)) { InterpolateRow = InterpolateRow_16_SSSE3; } } #endif #if defined(HAS_INTERPOLATEROW_16_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - InterpolateRow = InterpolateRow_16_Any_AVX2; - if (IS_ALIGNED(dst_width_words, 32)) { + InterpolateRow = InterpolateRow_Any_16_AVX2; + if (IS_ALIGNED(dst_width_bytes, 32)) { InterpolateRow = InterpolateRow_16_AVX2; } } #endif #if defined(HAS_INTERPOLATEROW_16_NEON) if (TestCpuFlag(kCpuHasNEON)) { - InterpolateRow = InterpolateRow_16_Any_NEON; - if (IS_ALIGNED(dst_width_words, 8)) { + InterpolateRow = InterpolateRow_Any_16_NEON; + if (IS_ALIGNED(dst_width_bytes, 16)) { InterpolateRow = InterpolateRow_16_NEON; } } @@ -1605,48 +1604,6 @@ void ScalePlaneVertical_16(int src_height, } } -void ScalePlaneVertical_16To8(int src_height, - int dst_width, - int dst_height, - int src_stride, - int dst_stride, - const uint16_t* src_argb, - uint8_t* dst_argb, - int x, - int y, - int dy, - int wpp, /* words per pixel. normally 1 */ - int scale, - enum FilterMode filtering) { - // TODO(fbarchard): Allow higher wpp. - int dst_width_words = dst_width * wpp; - // TODO(https://crbug.com/libyuv/931): Add NEON and AVX2 versions. - void (*InterpolateRow_16To8)(uint8_t * dst_argb, const uint16_t* src_argb, - ptrdiff_t src_stride, int scale, int dst_width, - int source_y_fraction) = InterpolateRow_16To8_C; - const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0; - int j; - assert(wpp >= 1 && wpp <= 2); - assert(src_height != 0); - assert(dst_width > 0); - assert(dst_height > 0); - src_argb += (x >> 16) * wpp; - - for (j = 0; j < dst_height; ++j) { - int yi; - int yf; - if (y > max_y) { - y = max_y; - } - yi = y >> 16; - yf = filtering ? ((y >> 8) & 255) : 0; - InterpolateRow_16To8(dst_argb, src_argb + yi * src_stride, src_stride, - scale, dst_width_words, yf); - dst_argb += dst_stride; - y += dy; - } -} - // Simplify the filtering based on scale factors. enum FilterMode ScaleFilterReduce(int src_width, int src_height, @@ -1696,7 +1653,7 @@ int FixedDiv_C(int num, int div) { return (int)(((int64_t)(num) << 16) / div); } -// Divide num - 1 by div - 1 and return as 16.16 fixed point result. +// Divide num by div and return as 16.16 fixed point result. int FixedDiv1_C(int num, int div) { return (int)((((int64_t)(num) << 16) - 0x00010001) / (div - 1)); } @@ -1739,14 +1696,14 @@ void ScaleSlope(int src_width, if (dst_width <= Abs(src_width)) { *dx = FixedDiv(Abs(src_width), dst_width); *x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter. - } else if (src_width > 1 && dst_width > 1) { + } else if (dst_width > 1) { *dx = FixedDiv1(Abs(src_width), dst_width); *x = 0; } if (dst_height <= src_height) { *dy = FixedDiv(src_height, dst_height); *y = CENTERSTART(*dy, -32768); // Subtract 0.5 (32768) to center filter. - } else if (src_height > 1 && dst_height > 1) { + } else if (dst_height > 1) { *dy = FixedDiv1(src_height, dst_height); *y = 0; } @@ -1755,7 +1712,7 @@ void ScaleSlope(int src_width, if (dst_width <= Abs(src_width)) { *dx = FixedDiv(Abs(src_width), dst_width); *x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter. - } else if (src_width > 1 && dst_width > 1) { + } else if (dst_width > 1) { *dx = FixedDiv1(Abs(src_width), dst_width); *x = 0; } diff --git a/unit_test/convert_test.cc b/unit_test/convert_test.cc index c2d952200..185c5aa44 100644 --- a/unit_test/convert_test.cc +++ b/unit_test/convert_test.cc @@ -178,7 +178,6 @@ TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I010, uint16_t, 2, 2, 2, 10) TESTPLANARTOP(I412, uint16_t, 2, 1, 1, I012, uint16_t, 2, 2, 2, 12) TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I012, uint16_t, 2, 2, 2, 12) TESTPLANARTOP(I010, uint16_t, 2, 2, 2, I420, uint8_t, 1, 2, 2, 10) -TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I420, uint8_t, 1, 2, 2, 10) TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I422, uint8_t, 1, 2, 1, 10) TESTPLANARTOP(I410, uint16_t, 2, 1, 1, I444, uint8_t, 1, 1, 1, 10) TESTPLANARTOP(I012, uint16_t, 2, 2, 2, I420, uint8_t, 1, 2, 2, 12) @@ -2271,8 +2270,7 @@ TEST_F(LibYUVConvertTest, TestMJPGToNV12_420) { free_aligned_buffer_page_end(dst_vu); } -// TODO(fbarchard): Improve test to compare against I422, not checksum -TEST_F(LibYUVConvertTest, DISABLED_TestMJPGToNV21_422) { +TEST_F(LibYUVConvertTest, TestMJPGToNV21_422) { int width = 0; int height = 0; int ret = MJPGSize(kTest3Jpg, kTest3JpgLen, &width, &height); @@ -2296,13 +2294,13 @@ TEST_F(LibYUVConvertTest, DISABLED_TestMJPGToNV21_422) { uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); uint32_t dst_uv_hash = HashDjb2(dst_uv, half_width * half_height * 2, 5381); EXPECT_EQ(dst_y_hash, 2682851208u); - EXPECT_EQ(dst_uv_hash, 493520167u); + EXPECT_EQ(dst_uv_hash, 3543430771u); free_aligned_buffer_page_end(dst_y); free_aligned_buffer_page_end(dst_uv); } -TEST_F(LibYUVConvertTest, DISABLED_TestMJPGToNV12_422) { +TEST_F(LibYUVConvertTest, TestMJPGToNV12_422) { int width = 0; int height = 0; int ret = MJPGSize(kTest3Jpg, kTest3JpgLen, &width, &height); @@ -2329,7 +2327,7 @@ TEST_F(LibYUVConvertTest, DISABLED_TestMJPGToNV12_422) { half_height); uint32_t dst_vu_hash = HashDjb2(dst_vu, half_width * half_height * 2, 5381); EXPECT_EQ(dst_y_hash, 2682851208u); - EXPECT_EQ(dst_vu_hash, 493520167u); + EXPECT_EQ(dst_vu_hash, 3543430771u); free_aligned_buffer_page_end(dst_y); free_aligned_buffer_page_end(dst_uv); diff --git a/unit_test/planar_test.cc b/unit_test/planar_test.cc index 42166d0d9..8f5a33cb1 100644 --- a/unit_test/planar_test.cc +++ b/unit_test/planar_test.cc @@ -1080,87 +1080,6 @@ TEST_F(LibYUVPlanarTest, TestInterpolatePlane) { } } -TEST_F(LibYUVPlanarTest, TestInterpolatePlane_16) { - SIMD_ALIGNED(uint16_t orig_pixels_0[1280]); - SIMD_ALIGNED(uint16_t orig_pixels_1[1280]); - SIMD_ALIGNED(uint16_t interpolate_pixels[1280]); - memset(orig_pixels_0, 0, sizeof(orig_pixels_0)); - memset(orig_pixels_1, 0, sizeof(orig_pixels_1)); - - orig_pixels_0[0] = 16u; - orig_pixels_0[1] = 32u; - orig_pixels_0[2] = 64u; - orig_pixels_0[3] = 128u; - orig_pixels_0[4] = 0u; - orig_pixels_0[5] = 0u; - orig_pixels_0[6] = 0u; - orig_pixels_0[7] = 255u; - orig_pixels_0[8] = 0u; - orig_pixels_0[9] = 0u; - orig_pixels_0[10] = 0u; - orig_pixels_0[11] = 0u; - orig_pixels_0[12] = 0u; - orig_pixels_0[13] = 0u; - orig_pixels_0[14] = 0u; - orig_pixels_0[15] = 0u; - - orig_pixels_1[0] = 0u; - orig_pixels_1[1] = 0u; - orig_pixels_1[2] = 0u; - orig_pixels_1[3] = 0u; - orig_pixels_1[4] = 0u; - orig_pixels_1[5] = 0u; - orig_pixels_1[6] = 0u; - orig_pixels_1[7] = 0u; - orig_pixels_1[8] = 0u; - orig_pixels_1[9] = 0u; - orig_pixels_1[10] = 0u; - orig_pixels_1[11] = 0u; - orig_pixels_1[12] = 255u; - orig_pixels_1[13] = 255u; - orig_pixels_1[14] = 255u; - orig_pixels_1[15] = 255u; - - InterpolatePlane_16(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0, - &interpolate_pixels[0], 0, 16, 1, 128); - EXPECT_EQ(8u, interpolate_pixels[0]); - EXPECT_EQ(16u, interpolate_pixels[1]); - EXPECT_EQ(32u, interpolate_pixels[2]); - EXPECT_EQ(64u, interpolate_pixels[3]); - EXPECT_EQ(0u, interpolate_pixels[4]); - EXPECT_EQ(0u, interpolate_pixels[5]); - EXPECT_EQ(0u, interpolate_pixels[6]); - EXPECT_EQ(128u, interpolate_pixels[7]); - EXPECT_EQ(0u, interpolate_pixels[8]); - EXPECT_EQ(0u, interpolate_pixels[9]); - EXPECT_EQ(0u, interpolate_pixels[10]); - EXPECT_EQ(0u, interpolate_pixels[11]); - EXPECT_EQ(128u, interpolate_pixels[12]); - EXPECT_EQ(128u, interpolate_pixels[13]); - EXPECT_EQ(128u, interpolate_pixels[14]); - EXPECT_EQ(128u, interpolate_pixels[15]); - - InterpolatePlane_16(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0, - &interpolate_pixels[0], 0, 16, 1, 0); - EXPECT_EQ(16u, interpolate_pixels[0]); - EXPECT_EQ(32u, interpolate_pixels[1]); - EXPECT_EQ(64u, interpolate_pixels[2]); - EXPECT_EQ(128u, interpolate_pixels[3]); - - InterpolatePlane_16(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0, - &interpolate_pixels[0], 0, 16, 1, 192); - - EXPECT_EQ(4u, interpolate_pixels[0]); - EXPECT_EQ(8u, interpolate_pixels[1]); - EXPECT_EQ(16u, interpolate_pixels[2]); - EXPECT_EQ(32u, interpolate_pixels[3]); - - for (int i = 0; i < benchmark_pixels_div1280_; ++i) { - InterpolatePlane_16(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0, - &interpolate_pixels[0], 0, 1280, 1, 123); - } -} - #define TESTTERP(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, STRIDE_B, W1280, TERP, \ N, NEG, OFF) \ TEST_F(LibYUVPlanarTest, ARGBInterpolate##TERP##N) { \ @@ -1565,43 +1484,9 @@ TEST_F(LibYUVPlanarTest, TestCopyPlane) { EXPECT_EQ(0, err); } -TEST_F(LibYUVPlanarTest, CopyPlane_Opt) { - int i; - int y_plane_size = benchmark_width_ * benchmark_height_; - align_buffer_page_end(orig_y, y_plane_size); - align_buffer_page_end(dst_c, y_plane_size); - align_buffer_page_end(dst_opt, y_plane_size); - - MemRandomize(orig_y, y_plane_size); - memset(dst_c, 1, y_plane_size); - memset(dst_opt, 2, y_plane_size); - - // Disable all optimizations. - MaskCpuFlags(disable_cpu_flags_); - for (i = 0; i < benchmark_iterations_; i++) { - CopyPlane(orig_y, benchmark_width_, dst_c, benchmark_width_, - benchmark_width_, benchmark_height_); - } - - // Enable optimizations. - MaskCpuFlags(benchmark_cpu_info_); - for (i = 0; i < benchmark_iterations_; i++) { - CopyPlane(orig_y, benchmark_width_, dst_opt, benchmark_width_, - benchmark_width_, benchmark_height_); - } - - for (i = 0; i < y_plane_size; ++i) { - EXPECT_EQ(dst_c[i], dst_opt[i]); - } - - free_aligned_buffer_page_end(orig_y); - free_aligned_buffer_page_end(dst_c); - free_aligned_buffer_page_end(dst_opt); -} - -TEST_F(LibYUVPlanarTest, TestCopyPlaneZero) { - // Test to verify copying a rect with a zero height or width does - // not touch destination memory. +TEST_F(LibYUVPlanarTest, TestCopyPlaneZeroDimensionRegressionTest) { + // Regression test to verify copying a rect with a zero height or width does + // not lead to memory corruption. uint8_t src = 42; uint8_t dst = 0; @@ -3624,8 +3509,8 @@ TEST_F(LibYUVPlanarTest, YUY2ToY) { memset(dst_pixels_y_c, 1, kPixels); MaskCpuFlags(disable_cpu_flags_); - YUY2ToY(src_pixels_y, benchmark_width_ * 2, dst_pixels_y_c, benchmark_width_, - benchmark_width_, benchmark_height_); + YUY2ToY(src_pixels_y, benchmark_width_ * 2, dst_pixels_y_c, + benchmark_width_, benchmark_width_, benchmark_height_); MaskCpuFlags(benchmark_cpu_info_); for (int i = 0; i < benchmark_iterations_; ++i) { @@ -3653,8 +3538,8 @@ TEST_F(LibYUVPlanarTest, UYVYToY) { memset(dst_pixels_y_c, 1, kPixels); MaskCpuFlags(disable_cpu_flags_); - UYVYToY(src_pixels_y, benchmark_width_ * 2, dst_pixels_y_c, benchmark_width_, - benchmark_width_, benchmark_height_); + UYVYToY(src_pixels_y, benchmark_width_ * 2, dst_pixels_y_c, + benchmark_width_, benchmark_width_, benchmark_height_); MaskCpuFlags(benchmark_cpu_info_); for (int i = 0; i < benchmark_iterations_; ++i) { diff --git a/unit_test/scale_test.cc b/unit_test/scale_test.cc index 81c839f41..1fb3b2f0a 100644 --- a/unit_test/scale_test.cc +++ b/unit_test/scale_test.cc @@ -1545,57 +1545,4 @@ TEST_F(LibYUVScaleTest, PlaneTestRotate_Box) { free_aligned_buffer_page_end(orig_pixels); } -TEST_F(LibYUVScaleTest, PlaneTest1_Box) { - align_buffer_page_end(orig_pixels, 3); - align_buffer_page_end(dst_pixels, 3); - - // Pad the 1x1 byte image with invalid values before and after in case libyuv - // reads outside the memory boundaries. - orig_pixels[0] = 0; - orig_pixels[1] = 1; // scale this pixel - orig_pixels[2] = 2; - dst_pixels[0] = 3; - dst_pixels[1] = 3; - dst_pixels[2] = 3; - - libyuv::ScalePlane(orig_pixels + 1, /* src_stride= */ 1, /* src_width= */ 1, - /* src_height= */ 1, dst_pixels, /* dst_stride= */ 1, - /* dst_width= */ 1, /* dst_height= */ 2, - libyuv::kFilterBox); - - EXPECT_EQ(dst_pixels[0], 1); - EXPECT_EQ(dst_pixels[1], 1); - EXPECT_EQ(dst_pixels[2], 3); - - free_aligned_buffer_page_end(dst_pixels); - free_aligned_buffer_page_end(orig_pixels); -} - -TEST_F(LibYUVScaleTest, PlaneTest1_16_Box) { - align_buffer_page_end(orig_pixels_alloc, 3 * 2); - align_buffer_page_end(dst_pixels_alloc, 3 * 2); - uint16_t* orig_pixels = (uint16_t*)orig_pixels_alloc; - uint16_t* dst_pixels = (uint16_t*)dst_pixels_alloc; - - // Pad the 1x1 byte image with invalid values before and after in case libyuv - // reads outside the memory boundaries. - orig_pixels[0] = 0; - orig_pixels[1] = 1; // scale this pixel - orig_pixels[2] = 2; - dst_pixels[0] = 3; - dst_pixels[1] = 3; - dst_pixels[2] = 3; - - libyuv::ScalePlane_16( - orig_pixels + 1, /* src_stride= */ 1, /* src_width= */ 1, - /* src_height= */ 1, dst_pixels, /* dst_stride= */ 1, - /* dst_width= */ 1, /* dst_height= */ 2, libyuv::kFilterNone); - - EXPECT_EQ(dst_pixels[0], 1); - EXPECT_EQ(dst_pixels[1], 1); - EXPECT_EQ(dst_pixels[2], 3); - - free_aligned_buffer_page_end(dst_pixels_alloc); - free_aligned_buffer_page_end(orig_pixels_alloc); -} } // namespace libyuv