From ebe6fef90344936e2cd623f727024482fca72a87 Mon Sep 17 00:00:00 2001 From: Wan-Teh Chang Date: Thu, 28 May 2026 13:56:44 -0700 Subject: [PATCH] Fix integer overflow in multiplications of stride Audit all occurrences of "stride *" in the libyuv source tree. Ensure that these multiplications are performed in the ptrdiff_t type. For functions not declared in a public header (such as static functions), prefer to declare the stride parameters (typically named src_stride and dst_stride) and related stride local variables as ptrdiff_t. If this is not possible, add ptrdiff_t casts to the stride parameters in multiplications. If intptr_t or int64_t casts were used, change them to ptrdiff_t casts. Bug: chromium:516986556 Change-Id: I6cd8a8eb00cbb5380db828bf83e4d89ff95891f3 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/7882967 Commit-Queue: Wan-Teh Chang Reviewed-by: Frank Barchard --- source/convert_from.cc | 30 ++-- source/planar_functions.cc | 2 +- source/rotate.cc | 18 +- source/rotate_common.cc | 10 +- source/rotate_neon.cc | 20 +-- source/rotate_neon64.cc | 20 +-- source/row_neon64.cc | 8 +- source/scale.cc | 150 ++++++++-------- source/scale_argb.cc | 327 +++------------------------------- source/scale_uv.cc | 93 +++++----- unit_test/scale_plane_test.cc | 102 +++++++++++ 11 files changed, 308 insertions(+), 472 deletions(-) diff --git a/source/convert_from.cc b/source/convert_from.cc index 5cf88fa2d..363edc252 100644 --- a/source/convert_from.cc +++ b/source/convert_from.cc @@ -10,6 +10,8 @@ #include "libyuv/convert_from.h" +#include + #include "libyuv/basic_types.h" #include "libyuv/convert.h" // For I420Copy #include "libyuv/cpu_id.h" @@ -782,7 +784,7 @@ int ConvertFromI420(const uint8_t* y, break; case FOURCC_NV12: { int dst_y_stride = dst_sample_stride ? dst_sample_stride : width; - uint8_t* dst_uv = dst_sample + dst_y_stride * height; + uint8_t* dst_uv = dst_sample + (ptrdiff_t)dst_y_stride * height; r = I420ToNV12(y, y_stride, u, u_stride, v, v_stride, dst_sample, dst_sample_stride ? dst_sample_stride : width, dst_uv, dst_sample_stride ? dst_sample_stride : width, width, @@ -791,7 +793,7 @@ int ConvertFromI420(const uint8_t* y, } case FOURCC_NV21: { int dst_y_stride = dst_sample_stride ? dst_sample_stride : width; - uint8_t* dst_vu = dst_sample + dst_y_stride * height; + uint8_t* dst_vu = dst_sample + (ptrdiff_t)dst_y_stride * height; r = I420ToNV21(y, y_stride, u, u_stride, v, v_stride, dst_sample, dst_sample_stride ? dst_sample_stride : width, dst_vu, dst_sample_stride ? dst_sample_stride : width, width, @@ -807,11 +809,11 @@ int ConvertFromI420(const uint8_t* y, uint8_t* dst_u; uint8_t* dst_v; if (format == FOURCC_YV12) { - dst_v = dst_sample + dst_sample_stride * height; - dst_u = dst_v + halfstride * halfheight; + dst_v = dst_sample + (ptrdiff_t)dst_sample_stride * height; + dst_u = dst_v + (ptrdiff_t)halfstride * halfheight; } else { - dst_u = dst_sample + dst_sample_stride * height; - dst_v = dst_u + halfstride * halfheight; + dst_u = dst_sample + (ptrdiff_t)dst_sample_stride * height; + dst_v = dst_u + (ptrdiff_t)halfstride * halfheight; } r = I420Copy(y, y_stride, u, u_stride, v, v_stride, dst_sample, dst_sample_stride, dst_u, halfstride, dst_v, halfstride, @@ -825,11 +827,11 @@ int ConvertFromI420(const uint8_t* y, uint8_t* dst_u; uint8_t* dst_v; if (format == FOURCC_YV16) { - dst_v = dst_sample + dst_sample_stride * height; - dst_u = dst_v + halfstride * height; + dst_v = dst_sample + (ptrdiff_t)dst_sample_stride * height; + dst_u = dst_v + (ptrdiff_t)halfstride * height; } else { - dst_u = dst_sample + dst_sample_stride * height; - dst_v = dst_u + halfstride * height; + dst_u = dst_sample + (ptrdiff_t)dst_sample_stride * height; + dst_v = dst_u + (ptrdiff_t)halfstride * height; } r = I420ToI422(y, y_stride, u, u_stride, v, v_stride, dst_sample, dst_sample_stride, dst_u, halfstride, dst_v, halfstride, @@ -842,11 +844,11 @@ int ConvertFromI420(const uint8_t* y, uint8_t* dst_u; uint8_t* dst_v; if (format == FOURCC_YV24) { - dst_v = dst_sample + dst_sample_stride * height; - dst_u = dst_v + dst_sample_stride * height; + dst_v = dst_sample + (ptrdiff_t)dst_sample_stride * height; + dst_u = dst_v + (ptrdiff_t)dst_sample_stride * height; } else { - dst_u = dst_sample + dst_sample_stride * height; - dst_v = dst_u + dst_sample_stride * height; + dst_u = dst_sample + (ptrdiff_t)dst_sample_stride * height; + dst_v = dst_u + (ptrdiff_t)dst_sample_stride * height; } r = I420ToI444(y, y_stride, u, u_stride, v, v_stride, dst_sample, dst_sample_stride, dst_u, dst_sample_stride, dst_v, diff --git a/source/planar_functions.cc b/source/planar_functions.cc index 149dde3a1..016ea24e0 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -3088,7 +3088,7 @@ int I420Blend(const uint8_t* src_y0, } // Subsample 2 rows of UV to half width and half height. ScaleRowDown2(alpha, alpha_stride, halfalpha, halfwidth); - alpha += alpha_stride * 2; + alpha += (ptrdiff_t)alpha_stride * 2; BlendPlaneRow(src_u0, src_u1, halfalpha, dst_u, halfwidth); BlendPlaneRow(src_v0, src_v1, halfalpha, dst_v, halfwidth); src_u0 += src_stride_u0; diff --git a/source/rotate.cc b/source/rotate.cc index 520806236..d51b313be 100644 --- a/source/rotate.cc +++ b/source/rotate.cc @@ -128,7 +128,7 @@ void RotatePlane90(const uint8_t* src, // Rotate by 90 is a transpose with the source read // from bottom to top. So set the source pointer to the end // of the buffer and flip the sign of the source stride. - src += src_stride * (height - 1); + src += (ptrdiff_t)src_stride * (height - 1); src_stride = -src_stride; TransposePlane(src, src_stride, dst, dst_stride, width, height); } @@ -143,7 +143,7 @@ void RotatePlane270(const uint8_t* src, // Rotate by 270 is a transpose with the destination written // from bottom to top. So set the destination pointer to the end // of the buffer and flip the sign of the destination stride. - dst += dst_stride * (width - 1); + dst += (ptrdiff_t)dst_stride * (width - 1); dst_stride = -dst_stride; TransposePlane(src, src_stride, dst, dst_stride, width, height); } @@ -160,8 +160,8 @@ void RotatePlane180(const uint8_t* src, assert(row); if (!row) return; - const uint8_t* src_bot = src + src_stride * (height - 1); - uint8_t* dst_bot = dst + dst_stride * (height - 1); + const uint8_t* src_bot = src + (ptrdiff_t)src_stride * (height - 1); + uint8_t* dst_bot = dst + (ptrdiff_t)dst_stride * (height - 1); int half_height = (height + 1) >> 1; int y; void (*MirrorRow)(const uint8_t* src, uint8_t* dst, int width) = MirrorRow_C; @@ -354,7 +354,7 @@ void SplitRotateUV90(const uint8_t* src, int dst_stride_b, int width, int height) { - src += src_stride * (height - 1); + src += (ptrdiff_t)src_stride * (height - 1); src_stride = -src_stride; SplitTransposeUV(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b, @@ -533,7 +533,7 @@ static void RotatePlane90_16(const uint16_t* src, // Rotate by 90 is a transpose with the source read // from bottom to top. So set the source pointer to the end // of the buffer and flip the sign of the source stride. - src += src_stride * (height - 1); + src += (ptrdiff_t)src_stride * (height - 1); src_stride = -src_stride; TransposePlane_16(src, src_stride, dst, dst_stride, width, height); } @@ -547,7 +547,7 @@ static void RotatePlane270_16(const uint16_t* src, // Rotate by 270 is a transpose with the destination written // from bottom to top. So set the destination pointer to the end // of the buffer and flip the sign of the destination stride. - dst += dst_stride * (width - 1); + dst += (ptrdiff_t)dst_stride * (width - 1); dst_stride = -dst_stride; TransposePlane_16(src, src_stride, dst, dst_stride, width, height); } @@ -558,8 +558,8 @@ static void RotatePlane180_16(const uint16_t* src, int dst_stride, int width, int height) { - const uint16_t* src_bot = src + src_stride * (height - 1); - uint16_t* dst_bot = dst + dst_stride * (height - 1); + const uint16_t* src_bot = src + (ptrdiff_t)src_stride * (height - 1); + uint16_t* dst_bot = dst + (ptrdiff_t)dst_stride * (height - 1); int half_height = (height + 1) >> 1; int y; diff --git a/source/rotate_common.cc b/source/rotate_common.cc index e0341fec4..ad420489c 100644 --- a/source/rotate_common.cc +++ b/source/rotate_common.cc @@ -8,6 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include + #include "libyuv/rotate_row.h" #include "libyuv/row.h" @@ -191,10 +193,10 @@ void Transpose4x4_32_C(const uint8_t* src, ((uint32_t*)(dst3))[1] = p31; ((uint32_t*)(dst3))[2] = p32; ((uint32_t*)(dst3))[3] = p33; - src += src_stride * 4; // advance 4 rows - src1 += src_stride * 4; - src2 += src_stride * 4; - src3 += src_stride * 4; + src += (ptrdiff_t)src_stride * 4; // advance 4 rows + src1 += (ptrdiff_t)src_stride * 4; + src2 += (ptrdiff_t)src_stride * 4; + src3 += (ptrdiff_t)src_stride * 4; dst += 4 * 4; // advance 4 columns dst1 += 4 * 4; dst2 += 4 * 4; diff --git a/source/rotate_neon.cc b/source/rotate_neon.cc index 27bd2251b..de14c41b0 100644 --- a/source/rotate_neon.cc +++ b/source/rotate_neon.cc @@ -198,16 +198,16 @@ void Transpose4x4_32_NEON(const uint8_t* src, "vst1.8 {q3}, [%7]! \n" "bgt 1b \n" - : "+r"(src), // %0 - "+r"(src1), // %1 - "+r"(src2), // %2 - "+r"(src3), // %3 - "+r"(dst), // %4 - "+r"(dst1), // %5 - "+r"(dst2), // %6 - "+r"(dst3), // %7 - "+r"(width) // %8 - : "r"((ptrdiff_t)(src_stride * 4)) // %9 + : "+r"(src), // %0 + "+r"(src1), // %1 + "+r"(src2), // %2 + "+r"(src3), // %3 + "+r"(dst), // %4 + "+r"(dst1), // %5 + "+r"(dst2), // %6 + "+r"(dst3), // %7 + "+r"(width) // %8 + : "r"((ptrdiff_t)src_stride * 4) // %9 : "memory", "cc", "q0", "q1", "q2", "q3"); } diff --git a/source/rotate_neon64.cc b/source/rotate_neon64.cc index e09bcb178..14f31d94c 100644 --- a/source/rotate_neon64.cc +++ b/source/rotate_neon64.cc @@ -252,16 +252,16 @@ void Transpose4x4_32_NEON(const uint8_t* src, "st1 {v2.4s}, [%6], 16 \n" "st1 {v3.4s}, [%7], 16 \n" "b.gt 1b \n" - : "+r"(src), // %0 - "+r"(src1), // %1 - "+r"(src2), // %2 - "+r"(src3), // %3 - "+r"(dst), // %4 - "+r"(dst1), // %5 - "+r"(dst2), // %6 - "+r"(dst3), // %7 - "+r"(width) // %8 - : "r"((ptrdiff_t)(src_stride * 4)) // %9 + : "+r"(src), // %0 + "+r"(src1), // %1 + "+r"(src2), // %2 + "+r"(src3), // %3 + "+r"(dst), // %4 + "+r"(dst1), // %5 + "+r"(dst2), // %6 + "+r"(dst3), // %7 + "+r"(width) // %8 + : "r"((ptrdiff_t)src_stride * 4) // %9 : "memory", "cc", "v0", "v1", "v2", "v3"); } diff --git a/source/row_neon64.cc b/source/row_neon64.cc index 4eed2df12..65d7b65a5 100644 --- a/source/row_neon64.cc +++ b/source/row_neon64.cc @@ -4751,10 +4751,10 @@ void ConvertFP16ToFP32Column_NEON(const uint16_t* src, // fp16 "str s2, [%1], #4 \n" // store 1 floats "b.gt 2b \n" "3: \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(width) // %2 - : "r"((ptrdiff_t)(src_stride * 2)) // %3 + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : "r"((ptrdiff_t)src_stride * 2) // %3 : "cc", "memory", "v0", "v1", "v2", "v3"); } diff --git a/source/scale.cc b/source/scale.cc index 7e762a1f4..a78356328 100644 --- a/source/scale.cc +++ b/source/scale.cc @@ -39,8 +39,8 @@ static void ScalePlaneDown2(int src_width, int src_height, int dst_width, int dst_height, - int src_stride, - int dst_stride, + ptrdiff_t src_stride, + ptrdiff_t dst_stride, const uint8_t* src_ptr, uint8_t* dst_ptr, enum FilterMode filtering) { @@ -51,7 +51,7 @@ static void ScalePlaneDown2(int src_width, ? ScaleRowDown2_C : (filtering == kFilterLinear ? ScaleRowDown2Linear_C : ScaleRowDown2Box_C); - int row_stride = src_stride * 2; + ptrdiff_t row_stride = src_stride * 2; (void)src_width; (void)src_height; if (!filtering) { @@ -151,8 +151,8 @@ static void ScalePlaneDown2_16(int src_width, int src_height, int dst_width, int dst_height, - int src_stride, - int dst_stride, + ptrdiff_t src_stride, + ptrdiff_t dst_stride, const uint16_t* src_ptr, uint16_t* dst_ptr, enum FilterMode filtering) { @@ -163,7 +163,7 @@ static void ScalePlaneDown2_16(int src_width, ? ScaleRowDown2_16_C : (filtering == kFilterLinear ? ScaleRowDown2Linear_16_C : ScaleRowDown2Box_16_C); - int row_stride = src_stride * 2; + ptrdiff_t row_stride = src_stride * 2; (void)src_width; (void)src_height; if (!filtering) { @@ -228,7 +228,7 @@ void ScalePlaneDown2_16To8(int src_width, ? ScaleRowDown2_16To8_C : (filtering == kFilterLinear ? ScaleRowDown2Linear_16To8_C : ScaleRowDown2Box_16To8_C)); - int row_stride = src_stride * 2; + ptrdiff_t row_stride = (ptrdiff_t)src_stride * 2; (void)dst_height; if (!filtering) { src_ptr += src_stride; // Point to odd rows. @@ -259,8 +259,8 @@ static void ScalePlaneDown4(int src_width, int src_height, int dst_width, int dst_height, - int src_stride, - int dst_stride, + ptrdiff_t src_stride, + ptrdiff_t dst_stride, const uint8_t* src_ptr, uint8_t* dst_ptr, enum FilterMode filtering) { @@ -268,7 +268,7 @@ static void ScalePlaneDown4(int src_width, void (*ScaleRowDown4)(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) = filtering ? ScaleRowDown4Box_C : ScaleRowDown4_C; - int row_stride = src_stride * 4; + ptrdiff_t row_stride = src_stride * 4; (void)src_width; (void)src_height; if (!filtering) { @@ -331,8 +331,8 @@ static void ScalePlaneDown4_16(int src_width, int src_height, int dst_width, int dst_height, - int src_stride, - int dst_stride, + ptrdiff_t src_stride, + ptrdiff_t dst_stride, const uint16_t* src_ptr, uint16_t* dst_ptr, enum FilterMode filtering) { @@ -340,7 +340,7 @@ static void ScalePlaneDown4_16(int src_width, void (*ScaleRowDown4)(const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr, int dst_width) = filtering ? ScaleRowDown4Box_16_C : ScaleRowDown4_16_C; - int row_stride = src_stride * 4; + ptrdiff_t row_stride = src_stride * 4; (void)src_width; (void)src_height; if (!filtering) { @@ -375,8 +375,8 @@ static void ScalePlaneDown34(int src_width, int src_height, int dst_width, int dst_height, - int src_stride, - int dst_stride, + ptrdiff_t src_stride, + ptrdiff_t dst_stride, const uint8_t* src_ptr, uint8_t* dst_ptr, enum FilterMode filtering) { @@ -385,7 +385,7 @@ static void ScalePlaneDown34(int src_width, uint8_t* dst_ptr, int dst_width); void (*ScaleRowDown34_1)(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); - const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride; + const ptrdiff_t filter_stride = (filtering == kFilterLinear) ? 0 : src_stride; (void)src_width; (void)src_height; assert(dst_width % 3 == 0); @@ -502,8 +502,8 @@ static void ScalePlaneDown34_16(int src_width, int src_height, int dst_width, int dst_height, - int src_stride, - int dst_stride, + ptrdiff_t src_stride, + ptrdiff_t dst_stride, const uint16_t* src_ptr, uint16_t* dst_ptr, enum FilterMode filtering) { @@ -512,7 +512,7 @@ static void ScalePlaneDown34_16(int src_width, uint16_t* dst_ptr, int dst_width); void (*ScaleRowDown34_1)(const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr, int dst_width); - const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride; + const ptrdiff_t filter_stride = (filtering == kFilterLinear) ? 0 : src_stride; (void)src_width; (void)src_height; assert(dst_width % 3 == 0); @@ -588,8 +588,8 @@ static void ScalePlaneDown38(int src_width, int src_height, int dst_width, int dst_height, - int src_stride, - int dst_stride, + ptrdiff_t src_stride, + ptrdiff_t dst_stride, const uint8_t* src_ptr, uint8_t* dst_ptr, enum FilterMode filtering) { @@ -598,7 +598,7 @@ static void ScalePlaneDown38(int src_width, uint8_t* dst_ptr, int dst_width); void (*ScaleRowDown38_2)(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); - const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride; + const ptrdiff_t filter_stride = (filtering == kFilterLinear) ? 0 : src_stride; assert(dst_width % 3 == 0); (void)src_width; (void)src_height; @@ -708,8 +708,8 @@ static void ScalePlaneDown38_16(int src_width, int src_height, int dst_width, int dst_height, - int src_stride, - int dst_stride, + ptrdiff_t src_stride, + ptrdiff_t dst_stride, const uint16_t* src_ptr, uint16_t* dst_ptr, enum FilterMode filtering) { @@ -718,7 +718,7 @@ static void ScalePlaneDown38_16(int src_width, uint16_t* dst_ptr, int dst_width); void (*ScaleRowDown38_2)(const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr, int dst_width); - const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride; + const ptrdiff_t filter_stride = (filtering == kFilterLinear) ? 0 : src_stride; (void)src_width; (void)src_height; assert(dst_width % 3 == 0); @@ -901,8 +901,8 @@ static int ScalePlaneBox(int src_width, int src_height, int dst_width, int dst_height, - int src_stride, - int dst_stride, + ptrdiff_t src_stride, + ptrdiff_t dst_stride, const uint8_t* src_ptr, uint8_t* dst_ptr) { int j, k; @@ -967,7 +967,7 @@ static int ScalePlaneBox(int src_width, for (j = 0; j < dst_height; ++j) { int boxheight; int iy = y >> 16; - const uint8_t* src = src_ptr + iy * (int64_t)src_stride; + const uint8_t* src = src_ptr + iy * src_stride; y += dy; if (y > max_y) { y = max_y; @@ -990,8 +990,8 @@ static int ScalePlaneBox_16(int src_width, int src_height, int dst_width, int dst_height, - int src_stride, - int dst_stride, + ptrdiff_t src_stride, + ptrdiff_t dst_stride, const uint16_t* src_ptr, uint16_t* dst_ptr) { int j, k; @@ -1024,7 +1024,7 @@ static int ScalePlaneBox_16(int src_width, for (j = 0; j < dst_height; ++j) { int boxheight; int iy = y >> 16; - const uint16_t* src = src_ptr + iy * (int64_t)src_stride; + const uint16_t* src = src_ptr + iy * src_stride; y += dy; if (y > max_y) { y = max_y; @@ -1048,8 +1048,8 @@ static int ScalePlaneBilinearDown(int src_width, int src_height, int dst_width, int dst_height, - int src_stride, - int dst_stride, + ptrdiff_t src_stride, + ptrdiff_t dst_stride, const uint8_t* src_ptr, uint8_t* dst_ptr, enum FilterMode filtering) { @@ -1138,7 +1138,7 @@ static int ScalePlaneBilinearDown(int src_width, for (j = 0; j < dst_height; ++j) { int yi = y >> 16; - const uint8_t* src = src_ptr + yi * (int64_t)src_stride; + const uint8_t* src = src_ptr + yi * src_stride; if (filtering == kFilterLinear) { ScaleFilterCols(dst_ptr, src, dst_width, x, dx); } else { @@ -1160,8 +1160,8 @@ static int ScalePlaneBilinearDown_16(int src_width, int src_height, int dst_width, int dst_height, - int src_stride, - int dst_stride, + ptrdiff_t src_stride, + ptrdiff_t dst_stride, const uint16_t* src_ptr, uint16_t* dst_ptr, enum FilterMode filtering) { @@ -1229,7 +1229,7 @@ static int ScalePlaneBilinearDown_16(int src_width, for (j = 0; j < dst_height; ++j) { int yi = y >> 16; - const uint16_t* src = src_ptr + yi * (int64_t)src_stride; + const uint16_t* src = src_ptr + yi * src_stride; if (filtering == kFilterLinear) { ScaleFilterCols(dst_ptr, src, dst_width, x, dx); } else { @@ -1252,8 +1252,8 @@ static int ScalePlaneBilinearUp(int src_width, int src_height, int dst_width, int dst_height, - int src_stride, - int dst_stride, + ptrdiff_t src_stride, + ptrdiff_t dst_stride, const uint8_t* src_ptr, uint8_t* dst_ptr, enum FilterMode filtering) { @@ -1339,7 +1339,7 @@ static int ScalePlaneBilinearUp(int src_width, } { int yi = y >> 16; - const uint8_t* src = src_ptr + yi * (int64_t)src_stride; + const uint8_t* src = src_ptr + yi * src_stride; // Allocate 2 row buffers. const int row_size = (dst_width + 31) & ~31; @@ -1348,7 +1348,7 @@ static int ScalePlaneBilinearUp(int src_width, return 1; uint8_t* rowptr = row; - int rowstride = row_size; + ptrdiff_t rowstride = row_size; int lasty = yi; ScaleFilterCols(rowptr, src, dst_width, x, dx); @@ -1366,7 +1366,7 @@ static int ScalePlaneBilinearUp(int src_width, if (y > max_y) { y = max_y; yi = y >> 16; - src = src_ptr + yi * (int64_t)src_stride; + src = src_ptr + yi * src_stride; } if (yi != lasty) { ScaleFilterCols(rowptr, src, dst_width, x, dx); @@ -1401,8 +1401,8 @@ static void ScalePlaneUp2_Linear(int src_width, int src_height, int dst_width, int dst_height, - int src_stride, - int dst_stride, + ptrdiff_t src_stride, + ptrdiff_t dst_stride, const uint8_t* src_ptr, uint8_t* dst_ptr) { void (*ScaleRowUp)(const uint8_t* src_ptr, uint8_t* dst_ptr, int dst_width) = @@ -1445,13 +1445,13 @@ static void ScalePlaneUp2_Linear(int src_width, #endif if (dst_height == 1) { - ScaleRowUp(src_ptr + ((src_height - 1) / 2) * (int64_t)src_stride, dst_ptr, + ScaleRowUp(src_ptr + ((src_height - 1) / 2) * src_stride, dst_ptr, dst_width); } else { dy = FixedDiv(src_height - 1, dst_height - 1); y = (1 << 15) - 1; for (i = 0; i < dst_height; ++i) { - ScaleRowUp(src_ptr + (y >> 16) * (int64_t)src_stride, dst_ptr, dst_width); + ScaleRowUp(src_ptr + (y >> 16) * src_stride, dst_ptr, dst_width); dst_ptr += dst_stride; y += dy; } @@ -1466,8 +1466,8 @@ static void ScalePlaneUp2_Bilinear(int src_width, int src_height, int dst_width, int dst_height, - int src_stride, - int dst_stride, + ptrdiff_t src_stride, + ptrdiff_t dst_stride, const uint8_t* src_ptr, uint8_t* dst_ptr) { void (*Scale2RowUp)(const uint8_t* src_ptr, ptrdiff_t src_stride, @@ -1532,8 +1532,8 @@ static void ScalePlaneUp2_12_Linear(int src_width, int src_height, int dst_width, int dst_height, - int src_stride, - int dst_stride, + ptrdiff_t src_stride, + ptrdiff_t dst_stride, const uint16_t* src_ptr, uint16_t* dst_ptr) { void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr, @@ -1565,13 +1565,13 @@ static void ScalePlaneUp2_12_Linear(int src_width, #endif if (dst_height == 1) { - ScaleRowUp(src_ptr + ((src_height - 1) / 2) * (int64_t)src_stride, dst_ptr, + ScaleRowUp(src_ptr + ((src_height - 1) / 2) * src_stride, dst_ptr, dst_width); } else { dy = FixedDiv(src_height - 1, dst_height - 1); y = (1 << 15) - 1; for (i = 0; i < dst_height; ++i) { - ScaleRowUp(src_ptr + (y >> 16) * (int64_t)src_stride, dst_ptr, dst_width); + ScaleRowUp(src_ptr + (y >> 16) * src_stride, dst_ptr, dst_width); dst_ptr += dst_stride; y += dy; } @@ -1587,8 +1587,8 @@ static void ScalePlaneUp2_12_Bilinear(int src_width, int src_height, int dst_width, int dst_height, - int src_stride, - int dst_stride, + ptrdiff_t src_stride, + ptrdiff_t dst_stride, const uint16_t* src_ptr, uint16_t* dst_ptr) { void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride, @@ -1635,8 +1635,8 @@ static void ScalePlaneUp2_16_Linear(int src_width, int src_height, int dst_width, int dst_height, - int src_stride, - int dst_stride, + ptrdiff_t src_stride, + ptrdiff_t dst_stride, const uint16_t* src_ptr, uint16_t* dst_ptr) { void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr, @@ -1668,13 +1668,13 @@ static void ScalePlaneUp2_16_Linear(int src_width, #endif if (dst_height == 1) { - ScaleRowUp(src_ptr + ((src_height - 1) / 2) * (int64_t)src_stride, dst_ptr, + ScaleRowUp(src_ptr + ((src_height - 1) / 2) * src_stride, dst_ptr, dst_width); } else { dy = FixedDiv(src_height - 1, dst_height - 1); y = (1 << 15) - 1; for (i = 0; i < dst_height; ++i) { - ScaleRowUp(src_ptr + (y >> 16) * (int64_t)src_stride, dst_ptr, dst_width); + ScaleRowUp(src_ptr + (y >> 16) * src_stride, dst_ptr, dst_width); dst_ptr += dst_stride; y += dy; } @@ -1685,8 +1685,8 @@ static void ScalePlaneUp2_16_Bilinear(int src_width, int src_height, int dst_width, int dst_height, - int src_stride, - int dst_stride, + ptrdiff_t src_stride, + ptrdiff_t dst_stride, const uint16_t* src_ptr, uint16_t* dst_ptr) { void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride, @@ -1733,8 +1733,8 @@ static int ScalePlaneBilinearUp_16(int src_width, int src_height, int dst_width, int dst_height, - int src_stride, - int dst_stride, + ptrdiff_t src_stride, + ptrdiff_t dst_stride, const uint16_t* src_ptr, uint16_t* dst_ptr, enum FilterMode filtering) { @@ -1806,12 +1806,12 @@ static int ScalePlaneBilinearUp_16(int src_width, } { int yi = y >> 16; - const uint16_t* src = src_ptr + yi * (int64_t)src_stride; + const uint16_t* src = src_ptr + yi * src_stride; // Allocate 2 row buffers. const int row_size = (dst_width + 31) & ~31; align_buffer_64(row, row_size * 4); - int rowstride = row_size; + ptrdiff_t rowstride = row_size; int lasty = yi; uint16_t* rowptr = (uint16_t*)row; if (!row) @@ -1832,7 +1832,7 @@ static int ScalePlaneBilinearUp_16(int src_width, if (y > max_y) { y = max_y; yi = y >> 16; - src = src_ptr + yi * (int64_t)src_stride; + src = src_ptr + yi * src_stride; } if (yi != lasty) { ScaleFilterCols(rowptr, src, dst_width, x, dx); @@ -1867,8 +1867,8 @@ static void ScalePlaneSimple(int src_width, int src_height, int dst_width, int dst_height, - int src_stride, - int dst_stride, + ptrdiff_t src_stride, + ptrdiff_t dst_stride, const uint8_t* src_ptr, uint8_t* dst_ptr) { int i; @@ -1893,8 +1893,7 @@ static void ScalePlaneSimple(int src_width, } for (i = 0; i < dst_height; ++i) { - ScaleCols(dst_ptr, src_ptr + (y >> 16) * (int64_t)src_stride, dst_width, x, - dx); + ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, dst_width, x, dx); dst_ptr += dst_stride; y += dy; } @@ -1904,8 +1903,8 @@ static void ScalePlaneSimple_16(int src_width, int src_height, int dst_width, int dst_height, - int src_stride, - int dst_stride, + ptrdiff_t src_stride, + ptrdiff_t dst_stride, const uint16_t* src_ptr, uint16_t* dst_ptr) { int i; @@ -1930,8 +1929,7 @@ static void ScalePlaneSimple_16(int src_width, } for (i = 0; i < dst_height; ++i) { - ScaleCols(dst_ptr, src_ptr + (y >> 16) * (int64_t)src_stride, dst_width, x, - dx); + ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, dst_width, x, dx); dst_ptr += dst_stride; y += dy; } @@ -1956,7 +1954,7 @@ int ScalePlane(const uint8_t* src, // Negative height means invert the image. if (src_height < 0) { src_height = -src_height; - src = src + (src_height - 1) * (int64_t)src_stride; + src = src + (src_height - 1) * (ptrdiff_t)src_stride; src_stride = -src_stride; } // Use specialized scales to improve performance for common resolutions. @@ -2056,7 +2054,7 @@ int ScalePlane_16(const uint16_t* src, // Negative height means invert the image. if (src_height < 0) { src_height = -src_height; - src = src + (src_height - 1) * (int64_t)src_stride; + src = src + (src_height - 1) * (ptrdiff_t)src_stride; src_stride = -src_stride; } // Use specialized scales to improve performance for common resolutions. @@ -2160,7 +2158,7 @@ int ScalePlane_12(const uint16_t* src, // Negative height means invert the image. if (src_height < 0) { src_height = -src_height; - src = src + (src_height - 1) * (int64_t)src_stride; + src = src + (src_height - 1) * (ptrdiff_t)src_stride; src_stride = -src_stride; } diff --git a/source/scale_argb.cc b/source/scale_argb.cc index eb6caa538..760293d0d 100644 --- a/source/scale_argb.cc +++ b/source/scale_argb.cc @@ -38,8 +38,8 @@ static void ScaleARGBDown2(int src_width, int src_height, int dst_width, int dst_height, - int src_stride, - int dst_stride, + ptrdiff_t src_stride, + ptrdiff_t dst_stride, const uint8_t* src_argb, uint8_t* dst_argb, int x, @@ -48,7 +48,7 @@ static void ScaleARGBDown2(int src_width, int dy, enum FilterMode filtering) { int j; - int row_stride = src_stride * (dy >> 16); + ptrdiff_t row_stride = src_stride * (dy >> 16); void (*ScaleARGBRowDown2)(const uint8_t* src_argb, ptrdiff_t src_stride, uint8_t* dst_argb, int dst_width) = filtering == kFilterNone @@ -62,9 +62,9 @@ static void ScaleARGBDown2(int src_width, assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2. // Advance to odd row, even column. if (filtering == kFilterBilinear) { - src_argb += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4; + src_argb += (y >> 16) * src_stride + (x >> 16) * 4; } else { - src_argb += (y >> 16) * (intptr_t)src_stride + ((x >> 16) - 1) * 4; + src_argb += (y >> 16) * src_stride + ((x >> 16) - 1) * 4; } #if defined(HAS_SCALEARGBROWDOWN2_SSE2) @@ -152,8 +152,8 @@ static int ScaleARGBDown4Box(int src_width, int src_height, int dst_width, int dst_height, - int src_stride, - int dst_stride, + ptrdiff_t src_stride, + ptrdiff_t dst_stride, const uint8_t* src_argb, uint8_t* dst_argb, int x, @@ -169,12 +169,12 @@ static int ScaleARGBDown4Box(int src_width, align_buffer_64(row, row_size * 2); if (!row) return 1; - int row_stride = src_stride * (dy >> 16); + ptrdiff_t row_stride = src_stride * (dy >> 16); void (*ScaleARGBRowDown2)(const uint8_t* src_argb, ptrdiff_t src_stride, uint8_t* dst_argb, int dst_width) = ScaleARGBRowDown2Box_C; // Advance to odd row, even column. - src_argb += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4; + src_argb += (y >> 16) * src_stride + (x >> 16) * 4; (void)src_width; (void)src_height; (void)dx; @@ -226,8 +226,8 @@ static void ScaleARGBDownEven(int src_width, int src_height, int dst_width, int dst_height, - int src_stride, - int dst_stride, + ptrdiff_t src_stride, + ptrdiff_t dst_stride, const uint8_t* src_argb, uint8_t* dst_argb, int x, @@ -237,7 +237,7 @@ static void ScaleARGBDownEven(int src_width, enum FilterMode filtering) { int j; int col_step = dx >> 16; - ptrdiff_t row_stride = (ptrdiff_t)((dy >> 16) * (intptr_t)src_stride); + ptrdiff_t row_stride = (dy >> 16) * src_stride; void (*ScaleARGBRowDownEven)(const uint8_t* src_argb, ptrdiff_t src_stride, int src_step, uint8_t* dst_argb, int dst_width) = filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C; @@ -245,7 +245,7 @@ static void ScaleARGBDownEven(int src_width, (void)src_height; assert(IS_ALIGNED(src_width, 2)); assert(IS_ALIGNED(src_height, 2)); - src_argb += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4; + src_argb += (y >> 16) * src_stride + (x >> 16) * 4; #if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_SSE2 @@ -302,8 +302,8 @@ static int ScaleARGBBilinearDown(int src_width, int src_height, int dst_width, int dst_height, - int src_stride, - int dst_stride, + ptrdiff_t src_stride, + ptrdiff_t dst_stride, const uint8_t* src_argb, uint8_t* dst_argb, int x, @@ -405,7 +405,7 @@ static int ScaleARGBBilinearDown(int src_width, } for (j = 0; j < dst_height; ++j) { int yi = y >> 16; - const uint8_t* src = src_argb + yi * (intptr_t)src_stride; + const uint8_t* src = src_argb + yi * src_stride; if (filtering == kFilterLinear) { ScaleARGBFilterCols(dst_argb, src, dst_width, x, dx); } else { @@ -429,8 +429,8 @@ static int ScaleARGBBilinearUp(int src_width, int src_height, int dst_width, int dst_height, - int src_stride, - int dst_stride, + ptrdiff_t src_stride, + ptrdiff_t dst_stride, const uint8_t* src_argb, uint8_t* dst_argb, int x, @@ -546,7 +546,7 @@ static int ScaleARGBBilinearUp(int src_width, { int yi = y >> 16; - const uint8_t* src = src_argb + yi * (intptr_t)src_stride; + const uint8_t* src = src_argb + yi * src_stride; // Allocate 2 rows of ARGB. const int row_size = (dst_width * 4 + 31) & ~31; @@ -555,7 +555,7 @@ static int ScaleARGBBilinearUp(int src_width, return 1; uint8_t* rowptr = row; - int rowstride = row_size; + ptrdiff_t rowstride = row_size; int lasty = yi; ScaleARGBFilterCols(rowptr, src, dst_width, x, dx); @@ -573,7 +573,7 @@ static int ScaleARGBBilinearUp(int src_width, if (y > max_y) { y = max_y; yi = y >> 16; - src = src_argb + yi * (intptr_t)src_stride; + src = src_argb + yi * src_stride; } if (yi != lasty) { ScaleARGBFilterCols(rowptr, src, dst_width, x, dx); @@ -599,275 +599,6 @@ static int ScaleARGBBilinearUp(int src_width, return 0; } -#ifdef YUVSCALEUP -// Scale YUV to ARGB up with bilinear interpolation. -static int ScaleYUVToARGBBilinearUp(int src_width, - int src_height, - int dst_width, - int dst_height, - int src_stride_y, - int src_stride_u, - int src_stride_v, - int dst_stride_argb, - const uint8_t* src_y, - const uint8_t* src_u, - const uint8_t* src_v, - uint8_t* dst_argb, - int x, - int dx, - int y, - int dy, - enum FilterMode filtering) { - int j; - void (*I422ToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf, - const uint8_t* v_buf, uint8_t* rgb_buf, int width) = - I422ToARGBRow_C; -#if defined(HAS_I422TOARGBROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - I422ToARGBRow = I422ToARGBRow_Any_SSSE3; - if (IS_ALIGNED(src_width, 8)) { - I422ToARGBRow = I422ToARGBRow_SSSE3; - } - } -#endif -#if defined(HAS_I422TOARGBROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - I422ToARGBRow = I422ToARGBRow_Any_AVX2; - if (IS_ALIGNED(src_width, 16)) { - I422ToARGBRow = I422ToARGBRow_AVX2; - } - } -#endif -#if defined(HAS_I422TOARGBROW_AVX512BW) - if (TestCpuFlag(kCpuHasAVX512BW | kCpuHasAVX512VL) == - (kCpuHasAVX512BW | kCpuHasAVX512VL)) { - I422ToARGBRow = I422ToARGBRow_Any_AVX512BW; - if (IS_ALIGNED(src_width, 32)) { - I422ToARGBRow = I422ToARGBRow_AVX512BW; - } - } -#endif -#if defined(HAS_I422TOARGBROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - I422ToARGBRow = I422ToARGBRow_Any_NEON; - if (IS_ALIGNED(src_width, 8)) { - I422ToARGBRow = I422ToARGBRow_NEON; - } - } -#endif -#if defined(HAS_I422TOARGBROW_SVE2) - if (TestCpuFlag(kCpuHasSVE2)) { - I422ToARGBRow = I422ToARGBRow_SVE2; - } -#endif -#if defined(HAS_I422TOARGBROW_SME) - if (TestCpuFlag(kCpuHasSME)) { - I422ToARGBRow = I422ToARGBRow_SME; - } -#endif -#if defined(HAS_I422TOARGBROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - I422ToARGBRow = I422ToARGBRow_Any_LSX; - if (IS_ALIGNED(src_width, 16)) { - I422ToARGBRow = I422ToARGBRow_LSX; - } - } -#endif -#if defined(HAS_I422TOARGBROW_LASX) - if (TestCpuFlag(kCpuHasLASX)) { - I422ToARGBRow = I422ToARGBRow_Any_LASX; - if (IS_ALIGNED(src_width, 32)) { - I422ToARGBRow = I422ToARGBRow_LASX; - } - } -#endif -#if defined(HAS_I422TOARGBROW_RVV) - if (TestCpuFlag(kCpuHasRVV)) { - I422ToARGBRow = I422ToARGBRow_RVV; - } -#endif - - void (*InterpolateRow)(uint8_t* dst_argb, const uint8_t* src_argb, - ptrdiff_t src_stride, int dst_width, - int source_y_fraction) = InterpolateRow_C; -#if defined(HAS_INTERPOLATEROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - InterpolateRow = InterpolateRow_Any_AVX2; - if (IS_ALIGNED(dst_width, 8)) { - InterpolateRow = InterpolateRow_AVX2; - } - } -#endif -#if defined(HAS_INTERPOLATEROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - InterpolateRow = InterpolateRow_Any_NEON; - if (IS_ALIGNED(dst_width, 4)) { - InterpolateRow = InterpolateRow_NEON; - } - } -#endif -#if defined(HAS_INTERPOLATEROW_SME) - if (TestCpuFlag(kCpuHasSME)) { - InterpolateRow = InterpolateRow_SME; - } -#endif -#if defined(HAS_INTERPOLATEROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - InterpolateRow = InterpolateRow_Any_LSX; - if (IS_ALIGNED(dst_width, 8)) { - InterpolateRow = InterpolateRow_LSX; - } - } -#endif -#if defined(HAS_INTERPOLATEROW_RVV) - if (TestCpuFlag(kCpuHasRVV)) { - InterpolateRow = InterpolateRow_RVV; - } -#endif - - void (*ScaleARGBFilterCols)(uint8_t* dst_argb, const uint8_t* src_argb, - int dst_width, int x, int dx) = - filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C; - if (src_width >= 32768) { - ScaleARGBFilterCols = - filtering ? ScaleARGBFilterCols64_C : ScaleARGBCols64_C; - } -#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3) - if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { - ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3; - } -#endif -#if defined(HAS_SCALEARGBFILTERCOLS_NEON) - if (filtering && TestCpuFlag(kCpuHasNEON)) { - ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON; - if (IS_ALIGNED(dst_width, 4)) { - ScaleARGBFilterCols = ScaleARGBFilterCols_NEON; - } - } -#endif -#if defined(HAS_SCALEARGBFILTERCOLS_LSX) - if (filtering && TestCpuFlag(kCpuHasLSX)) { - ScaleARGBFilterCols = ScaleARGBFilterCols_Any_LSX; - if (IS_ALIGNED(dst_width, 8)) { - ScaleARGBFilterCols = ScaleARGBFilterCols_LSX; - } - } -#endif -#if defined(HAS_SCALEARGBFILTERCOLS_RVV) - if (filtering && TestCpuFlag(kCpuHasRVV)) { - ScaleARGBFilterCols = ScaleARGBFilterCols_RVV; - } -#endif -#if defined(HAS_SCALEARGBCOLS_SSE2) - if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) { - ScaleARGBFilterCols = ScaleARGBCols_SSE2; - } -#endif -#if defined(HAS_SCALEARGBCOLS_NEON) - if (!filtering && TestCpuFlag(kCpuHasNEON)) { - ScaleARGBFilterCols = ScaleARGBCols_Any_NEON; - if (IS_ALIGNED(dst_width, 8)) { - ScaleARGBFilterCols = ScaleARGBCols_NEON; - } - } -#endif -#if defined(HAS_SCALEARGBCOLS_LSX) - if (!filtering && TestCpuFlag(kCpuHasLSX)) { - ScaleARGBFilterCols = ScaleARGBCols_Any_LSX; - if (IS_ALIGNED(dst_width, 4)) { - ScaleARGBFilterCols = ScaleARGBCols_LSX; - } - } -#endif - if (!filtering && src_width * 2 == dst_width && x < 0x8000) { - ScaleARGBFilterCols = ScaleARGBColsUp2_C; -#if defined(HAS_SCALEARGBCOLSUP2_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) { - ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2; - } -#endif - } - - const int max_y = (src_height - 1) << 16; - if (y > max_y) { - y = max_y; - } - const int kYShift = 1; // Shift Y by 1 to convert Y plane to UV coordinate. - int yi = y >> 16; - int uv_yi = yi >> kYShift; - const uint8_t* src_row_y = src_y + yi * (intptr_t)src_stride_y; - const uint8_t* src_row_u = src_u + uv_yi * (intptr_t)src_stride_u; - const uint8_t* src_row_v = src_v + uv_yi * (intptr_t)src_stride_v; - - // Allocate 1 row of ARGB for source conversion and 2 rows of ARGB - // scaled horizontally to the destination width. - const int row_size = (dst_width * 4 + 31) & ~31; - align_buffer_64(row, row_size * 2 + src_width * 4); - - uint8_t* argb_row = row + row_size * 2; - uint8_t* rowptr = row; - int rowstride = row_size; - int lasty = yi; - if (!row) - return 1; - - // TODO(fbarchard): Convert first 2 rows of YUV to ARGB. - ScaleARGBFilterCols(rowptr, src_row_y, dst_width, x, dx); - if (src_height > 1) { - src_row_y += src_stride_y; - if (yi & 1) { - src_row_u += src_stride_u; - src_row_v += src_stride_v; - } - } - ScaleARGBFilterCols(rowptr + rowstride, src_row_y, dst_width, x, dx); - if (src_height > 2) { - src_row_y += src_stride_y; - if (!(yi & 1)) { - src_row_u += src_stride_u; - src_row_v += src_stride_v; - } - } - - for (j = 0; j < dst_height; ++j) { - yi = y >> 16; - if (yi != lasty) { - if (y > max_y) { - y = max_y; - yi = y >> 16; - uv_yi = yi >> kYShift; - src_row_y = src_y + yi * (intptr_t)src_stride_y; - src_row_u = src_u + uv_yi * (intptr_t)src_stride_u; - src_row_v = src_v + uv_yi * (intptr_t)src_stride_v; - } - if (yi != lasty) { - // TODO(fbarchard): Convert the clipped region of row. - I422ToARGBRow(src_row_y, src_row_u, src_row_v, argb_row, src_width); - ScaleARGBFilterCols(rowptr, argb_row, dst_width, x, dx); - rowptr += rowstride; - rowstride = -rowstride; - lasty = yi; - src_row_y += src_stride_y; - if (yi & 1) { - src_row_u += src_stride_u; - src_row_v += src_stride_v; - } - } - } - if (filtering == kFilterLinear) { - InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0); - } else { - int yf = (y >> 8) & 255; - InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf); - } - dst_argb += dst_stride_argb; - y += dy; - } - free_aligned_buffer_64(row); - return 0; -} -#endif - // Scale ARGB to/from any dimensions, without interpolation. // Fixed point math is used for performance: The upper 16 bits // of x and dx is the integer part of the source position and @@ -877,8 +608,8 @@ static void ScaleARGBSimple(int src_width, int src_height, int dst_width, int dst_height, - int src_stride, - int dst_stride, + ptrdiff_t src_stride, + ptrdiff_t dst_stride, const uint8_t* src_argb, uint8_t* dst_argb, int x, @@ -921,8 +652,8 @@ static void ScaleARGBSimple(int src_width, } for (j = 0; j < dst_height; ++j) { - ScaleARGBCols(dst_argb, src_argb + (y >> 16) * (intptr_t)src_stride, - dst_width, x, dx); + ScaleARGBCols(dst_argb, src_argb + (y >> 16) * src_stride, dst_width, x, + dx); dst_argb += dst_stride; y += dy; } @@ -957,7 +688,7 @@ static int ScaleARGB(const uint8_t* src, // Negative src_height means invert the image. if (src_height < 0) { src_height = -src_height; - src = src + (src_height - 1) * (intptr_t)src_stride; + src = src + (src_height - 1) * (ptrdiff_t)src_stride; src_stride = -src_stride; } ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y, @@ -972,8 +703,8 @@ static int ScaleARGB(const uint8_t* src, if (clip_y) { int64_t clipf = (int64_t)(clip_y)*dy; y += (clipf & 0xffff); - src += (clipf >> 16) * (intptr_t)src_stride; - dst += clip_y * dst_stride; + src += (clipf >> 16) * (ptrdiff_t)src_stride; + dst += clip_y * (ptrdiff_t)dst_stride; } // Special case for integer step values. @@ -1006,7 +737,7 @@ static int ScaleARGB(const uint8_t* src, filtering = kFilterNone; if (dx == 0x10000 && dy == 0x10000) { // Straight copy. - ARGBCopy(src + (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4, + ARGBCopy(src + (y >> 16) * (ptrdiff_t)src_stride + (x >> 16) * 4, src_stride, dst, dst_stride, clip_width, clip_height); return 0; } diff --git a/source/scale_uv.cc b/source/scale_uv.cc index 35ad55dda..95ac5307f 100644 --- a/source/scale_uv.cc +++ b/source/scale_uv.cc @@ -11,6 +11,7 @@ #include "libyuv/scale_uv.h" #include +#include #include #include "libyuv/cpu_id.h" @@ -59,8 +60,8 @@ static void ScaleUVDown2(int src_width, int src_height, int dst_width, int dst_height, - int src_stride, - int dst_stride, + ptrdiff_t src_stride, + ptrdiff_t dst_stride, const uint8_t* src_uv, uint8_t* dst_uv, int x, @@ -69,7 +70,7 @@ static void ScaleUVDown2(int src_width, int dy, enum FilterMode filtering) { int j; - int row_stride = src_stride * (dy >> 16); + ptrdiff_t row_stride = src_stride * (dy >> 16); void (*ScaleUVRowDown2)(const uint8_t* src_uv, ptrdiff_t src_stride, uint8_t* dst_uv, int dst_width) = filtering == kFilterNone @@ -83,9 +84,9 @@ static void ScaleUVDown2(int src_width, assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2. // Advance to odd row, even column. if (filtering == kFilterBilinear) { - src_uv += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2; + src_uv += (y >> 16) * src_stride + (x >> 16) * 2; } else { - src_uv += (y >> 16) * (intptr_t)src_stride + ((x >> 16) - 1) * 2; + src_uv += (y >> 16) * src_stride + ((x >> 16) - 1) * 2; } #if defined(HAS_SCALEUVROWDOWN2BOX_SSSE3) @@ -174,8 +175,8 @@ static int ScaleUVDown4Box(int src_width, int src_height, int dst_width, int dst_height, - int src_stride, - int dst_stride, + ptrdiff_t src_stride, + ptrdiff_t dst_stride, const uint8_t* src_uv, uint8_t* dst_uv, int x, @@ -188,12 +189,12 @@ static int ScaleUVDown4Box(int src_width, align_buffer_64(row, row_size * 2); if (!row) return 1; - int row_stride = src_stride * (dy >> 16); + ptrdiff_t row_stride = src_stride * (dy >> 16); void (*ScaleUVRowDown2)(const uint8_t* src_uv, ptrdiff_t src_stride, uint8_t* dst_uv, int dst_width) = ScaleUVRowDown2Box_C; // Advance to odd row, even column. - src_uv += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2; + src_uv += (y >> 16) * src_stride + (x >> 16) * 2; (void)src_width; (void)src_height; (void)dx; @@ -256,8 +257,8 @@ static void ScaleUVDownEven(int src_width, int src_height, int dst_width, int dst_height, - int src_stride, - int dst_stride, + ptrdiff_t src_stride, + ptrdiff_t dst_stride, const uint8_t* src_uv, uint8_t* dst_uv, int x, @@ -267,7 +268,7 @@ static void ScaleUVDownEven(int src_width, enum FilterMode filtering) { int j; int col_step = dx >> 16; - ptrdiff_t row_stride = (ptrdiff_t)((dy >> 16) * (intptr_t)src_stride); + ptrdiff_t row_stride = (dy >> 16) * src_stride; void (*ScaleUVRowDownEven)(const uint8_t* src_uv, ptrdiff_t src_stride, int src_step, uint8_t* dst_uv, int dst_width) = filtering ? ScaleUVRowDownEvenBox_C : ScaleUVRowDownEven_C; @@ -275,7 +276,7 @@ static void ScaleUVDownEven(int src_width, (void)src_height; assert(IS_ALIGNED(src_width, 2)); assert(IS_ALIGNED(src_height, 2)); - src_uv += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2; + src_uv += (y >> 16) * src_stride + (x >> 16) * 2; #if defined(HAS_SCALEUVROWDOWNEVEN_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ScaleUVRowDownEven = filtering ? ScaleUVRowDownEvenBox_Any_SSSE3 @@ -334,8 +335,8 @@ static int ScaleUVBilinearDown(int src_width, int src_height, int dst_width, int dst_height, - int src_stride, - int dst_stride, + ptrdiff_t src_stride, + ptrdiff_t dst_stride, const uint8_t* src_uv, uint8_t* dst_uv, int x, @@ -422,7 +423,7 @@ static int ScaleUVBilinearDown(int src_width, } for (j = 0; j < dst_height; ++j) { int yi = y >> 16; - const uint8_t* src = src_uv + yi * (intptr_t)src_stride; + const uint8_t* src = src_uv + yi * src_stride; if (filtering == kFilterLinear) { ScaleUVFilterCols(dst_uv, src, dst_width, x, dx); } else { @@ -448,8 +449,8 @@ static int ScaleUVBilinearUp(int src_width, int src_height, int dst_width, int dst_height, - int src_stride, - int dst_stride, + ptrdiff_t src_stride, + ptrdiff_t dst_stride, const uint8_t* src_uv, uint8_t* dst_uv, int x, @@ -543,7 +544,7 @@ static int ScaleUVBilinearUp(int src_width, { int yi = y >> 16; - const uint8_t* src = src_uv + yi * (intptr_t)src_stride; + const uint8_t* src = src_uv + yi * src_stride; // Allocate 2 rows of UV. const int row_size = (dst_width * 2 + 15) & ~15; @@ -552,7 +553,7 @@ static int ScaleUVBilinearUp(int src_width, return 1; uint8_t* rowptr = row; - int rowstride = row_size; + ptrdiff_t rowstride = row_size; int lasty = yi; ScaleUVFilterCols(rowptr, src, dst_width, x, dx); @@ -570,7 +571,7 @@ static int ScaleUVBilinearUp(int src_width, if (y > max_y) { y = max_y; yi = y >> 16; - src = src_uv + yi * (intptr_t)src_stride; + src = src_uv + yi * src_stride; } if (yi != lasty) { ScaleUVFilterCols(rowptr, src, dst_width, x, dx); @@ -606,8 +607,8 @@ static void ScaleUVLinearUp2(int src_width, int src_height, int dst_width, int dst_height, - int src_stride, - int dst_stride, + ptrdiff_t src_stride, + ptrdiff_t dst_stride, const uint8_t* src_uv, uint8_t* dst_uv) { void (*ScaleRowUp)(const uint8_t* src_uv, uint8_t* dst_uv, int dst_width) = @@ -645,13 +646,12 @@ static void ScaleUVLinearUp2(int src_width, #endif if (dst_height == 1) { - ScaleRowUp(src_uv + ((src_height - 1) / 2) * (intptr_t)src_stride, dst_uv, - dst_width); + ScaleRowUp(src_uv + ((src_height - 1) / 2) * src_stride, dst_uv, dst_width); } else { dy = FixedDiv(src_height - 1, dst_height - 1); y = (1 << 15) - 1; for (i = 0; i < dst_height; ++i) { - ScaleRowUp(src_uv + (y >> 16) * (intptr_t)src_stride, dst_uv, dst_width); + ScaleRowUp(src_uv + (y >> 16) * src_stride, dst_uv, dst_width); dst_uv += dst_stride; y += dy; } @@ -727,8 +727,8 @@ static void ScaleUVLinearUp2_16(int src_width, int src_height, int dst_width, int dst_height, - int src_stride, - int dst_stride, + ptrdiff_t src_stride, + ptrdiff_t dst_stride, const uint16_t* src_uv, uint16_t* dst_uv) { void (*ScaleRowUp)(const uint16_t* src_uv, uint16_t* dst_uv, int dst_width) = @@ -760,13 +760,12 @@ static void ScaleUVLinearUp2_16(int src_width, #endif if (dst_height == 1) { - ScaleRowUp(src_uv + ((src_height - 1) / 2) * (intptr_t)src_stride, dst_uv, - dst_width); + ScaleRowUp(src_uv + ((src_height - 1) / 2) * src_stride, dst_uv, dst_width); } else { dy = FixedDiv(src_height - 1, dst_height - 1); y = (1 << 15) - 1; for (i = 0; i < dst_height; ++i) { - ScaleRowUp(src_uv + (y >> 16) * (intptr_t)src_stride, dst_uv, dst_width); + ScaleRowUp(src_uv + (y >> 16) * src_stride, dst_uv, dst_width); dst_uv += dst_stride; y += dy; } @@ -836,8 +835,8 @@ static void ScaleUVSimple(int src_width, int src_height, int dst_width, int dst_height, - int src_stride, - int dst_stride, + ptrdiff_t src_stride, + ptrdiff_t dst_stride, const uint8_t* src_uv, uint8_t* dst_uv, int x, @@ -872,8 +871,7 @@ static void ScaleUVSimple(int src_width, } for (j = 0; j < dst_height; ++j) { - ScaleUVCols(dst_uv, src_uv + (y >> 16) * (intptr_t)src_stride, dst_width, x, - dx); + ScaleUVCols(dst_uv, src_uv + (y >> 16) * src_stride, dst_width, x, dx); dst_uv += dst_stride; y += dy; } @@ -893,7 +891,7 @@ static int UVCopy(const uint8_t* src_uv, // Negative height means invert the image. if (height < 0) { height = -height; - src_uv = src_uv + (height - 1) * (intptr_t)src_stride_uv; + src_uv = src_uv + (height - 1) * (ptrdiff_t)src_stride_uv; src_stride_uv = -src_stride_uv; } @@ -913,7 +911,7 @@ static int UVCopy_16(const uint16_t* src_uv, // Negative height means invert the image. if (height < 0) { height = -height; - src_uv = src_uv + (height - 1) * (intptr_t)src_stride_uv; + src_uv = src_uv + (height - 1) * (ptrdiff_t)src_stride_uv; src_stride_uv = -src_stride_uv; } @@ -951,7 +949,7 @@ static int ScaleUV(const uint8_t* src, // Negative src_height means invert the image. if (src_height < 0) { src_height = -src_height; - src = src + (src_height - 1) * (intptr_t)src_stride; + src = src + (src_height - 1) * (ptrdiff_t)src_stride; src_stride = -src_stride; } ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y, @@ -966,8 +964,8 @@ static int ScaleUV(const uint8_t* src, if (clip_y) { int64_t clipf = (int64_t)(clip_y)*dy; y += (clipf & 0xffff); - src += (clipf >> 16) * (intptr_t)src_stride; - dst += clip_y * dst_stride; + src += (clipf >> 16) * (ptrdiff_t)src_stride; + dst += clip_y * (ptrdiff_t)dst_stride; } // Special case for integer step values. @@ -1007,7 +1005,7 @@ static int ScaleUV(const uint8_t* src, #ifdef HAS_UVCOPY if (dx == 0x10000 && dy == 0x10000) { // Straight copy. - UVCopy(src + (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2, + UVCopy(src + (y >> 16) * (ptrdiff_t)src_stride + (x >> 16) * 2, src_stride, dst, dst_stride, clip_width, clip_height); return 0; } @@ -1100,7 +1098,7 @@ int UVScale_16(const uint16_t* src_uv, // Negative src_height means invert the image. if (src_height < 0) { src_height = -src_height; - src_uv = src_uv + (src_height - 1) * (intptr_t)src_stride_uv; + src_uv = src_uv + (src_height - 1) * (ptrdiff_t)src_stride_uv; src_stride_uv = -src_stride_uv; } src_width = Abs(src_width); @@ -1108,13 +1106,16 @@ int UVScale_16(const uint16_t* src_uv, #ifdef HAS_UVCOPY if (!filtering && src_width == dst_width && (src_height % dst_height == 0)) { if (dst_height == 1) { - UVCopy_16(src_uv + ((src_height - 1) / 2) * (intptr_t)src_stride_uv, + UVCopy_16(src_uv + ((src_height - 1) / 2) * (ptrdiff_t)src_stride_uv, src_stride_uv, dst_uv, dst_stride_uv, dst_width, dst_height); } else { dy = src_height / dst_height; - UVCopy_16(src_uv + ((dy - 1) / 2) * (intptr_t)src_stride_uv, - (int)(dy * (intptr_t)src_stride_uv), dst_uv, dst_stride_uv, - dst_width, dst_height); + if (src_stride_uv > INT_MAX / dy) { + return -1; + } + UVCopy_16(src_uv + ((dy - 1) / 2) * (ptrdiff_t)src_stride_uv, + dy * src_stride_uv, dst_uv, dst_stride_uv, dst_width, + dst_height); } return 0; diff --git a/unit_test/scale_plane_test.cc b/unit_test/scale_plane_test.cc index b952a6f73..7d38c4d18 100644 --- a/unit_test/scale_plane_test.cc +++ b/unit_test/scale_plane_test.cc @@ -42,6 +42,108 @@ namespace libyuv { +// POC: int row_stride = src_stride * 2 overflows to a small negative value +// when src_stride is close to INT_MAX, causing src_ptr to walk backward +// past the start of the source allocation on the second loop iteration. +// With src_stride = 0x7FFFFFFE, row_stride = (int)0xFFFFFFFC = -4, so on +// y=1 ScaleRowDown2Box reads 4 bytes before the heap allocation. +TEST_F(LibYUVScaleTest, ScalePlaneDown2_RowStrideOverflow) { + constexpr int kSrcStride = 0x7FFFFFFE; // INT_MAX - 1 + constexpr int kSrcW = 64; + constexpr int kSrcH = 4; + constexpr int kDstW = 32; + constexpr int kDstH = 2; + // src_size = (kSrcH - 1) * stride + width. + size_t src_size = kSrcH - 1; + if (src_size > SIZE_MAX / kSrcStride) { + GTEST_SKIP() << "could not represent allocation size in size_t"; + } + src_size *= kSrcStride; + if (src_size > SIZE_MAX - kSrcW) { + GTEST_SKIP() << "could not represent allocation size in size_t"; + } + src_size += kSrcW; + +#if defined(__aarch64__) + // Infer malloc can accept a large size for cpu with dot product (a76/a55) + int has_large_malloc = TestCpuFlag(kCpuHasNeonDotProd); +#else + int has_large_malloc = 1; +#endif + if (!has_large_malloc) { + GTEST_SKIP() << "large allocation may assert for " << src_size << " bytes"; + } + + uint8_t* src = new (std::nothrow) uint8_t[src_size]; + if (!src) { + GTEST_SKIP() << "could not allocate " << src_size << " bytes"; + } + uint8_t dst[kDstW * kDstH]; + uint8_t* src_row = src; + for (int i = 0; i < kSrcH; i++) { + memset(src_row, 0x41, kSrcW); + src_row += kSrcStride; + } + // Force the C row kernel: the SIMD kernels are inline asm that ASAN does not + // instrument, so they silently read OOB without a report. + MaskCpuFlags(1); + // 2*dst == src on both axes -> ScalePlane dispatches to ScalePlaneDown2. + // int row_stride = kSrcStride * 2 wraps to -4; on y=1 src_ptr underflows. + ScalePlane(src, kSrcStride, kSrcW, kSrcH, dst, kDstW, kDstW, kDstH, + kFilterBox); + MaskCpuFlags(0); + delete[] src; +} + +// POC: same defect in the 1/4 fast path. src_stride = 0x3FFFFFFF gives +// int row_stride = src_stride * 4 = (int)0xFFFFFFFC = -4. +TEST_F(LibYUVScaleTest, ScalePlaneDown4_RowStrideOverflow) { + constexpr int kSrcStride = 0x3FFFFFFF; // INT_MAX / 4 (rounded down) + constexpr int kSrcW = 64; + constexpr int kSrcH = 8; + constexpr int kDstW = 16; + constexpr int kDstH = 2; + // src_size = (kSrcH - 1) * stride + width. + size_t src_size = kSrcH - 1; + if (src_size > SIZE_MAX / kSrcStride) { + GTEST_SKIP() << "could not represent allocation size in size_t"; + } + src_size *= kSrcStride; + if (src_size > SIZE_MAX - kSrcW) { + GTEST_SKIP() << "could not represent allocation size in size_t"; + } + src_size += kSrcW; + +#if defined(__aarch64__) + // Infer malloc can accept a large size for cpu with dot product (a76/a55) + int has_large_malloc = TestCpuFlag(kCpuHasNeonDotProd); +#else + int has_large_malloc = 1; +#endif + if (!has_large_malloc) { + GTEST_SKIP() << "large allocation may assert for " << src_size << " bytes"; + } + + uint8_t* src = new (std::nothrow) uint8_t[src_size]; + if (!src) { + GTEST_SKIP() << "could not allocate " << src_size << " bytes"; + } + uint8_t dst[kDstW * kDstH]; + uint8_t* src_row = src; + for (int i = 0; i < kSrcH; i++) { + memset(src_row, 0x41, kSrcW); + src_row += kSrcStride; + } + // Force the C row kernel: the SIMD kernels are inline asm that ASAN does not + // instrument, so they silently read OOB without a report. + MaskCpuFlags(1); + // 4*dst == src on both axes with kFilterBox -> ScalePlaneDown4. + ScalePlane(src, kSrcStride, kSrcW, kSrcH, dst, kDstW, kDstW, kDstH, + kFilterBox); + MaskCpuFlags(0); + delete[] src; +} + #ifdef ENABLE_ROW_TESTS #ifdef HAS_SCALEROWDOWN2_SSSE3 TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_Odd_SSSE3) {