Fix integer overflow in multiplications of stride

Audit all occurrences of "stride *" in the libyuv source tree. Ensure that these multiplications are performed in the ptrdiff_t type. For functions not declared in a public header (such as static functions), prefer to declare the stride parameters (typically named src_stride and dst_stride) and related stride local variables as ptrdiff_t. If this is not possible, add ptrdiff_t casts to the stride parameters in multiplications. If intptr_t or int64_t casts were used, change them to ptrdiff_t casts. Bug: chromium:516986556 Change-Id: I6cd8a8eb00cbb5380db828bf83e4d89ff95891f3 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/7882967 Commit-Queue: Wan-Teh Chang <wtc@google.com> Reviewed-by: Frank Barchard <fbarchard@google.com>
2026-07-30 16:26:19 +08:00 · 2026-05-28 13:56:44 -07:00 · 2026-05-28 13:56:44 -07:00 · ebe6fef903
commit ebe6fef903
parent de63bd90f4
11 changed files with 308 additions and 472 deletions
--- a/source/convert_from.cc
+++ b/source/convert_from.cc
@ -10,6 +10,8 @@

 #include "libyuv/convert_from.h"

+#include <stddef.h>
+
 #include "libyuv/basic_types.h"
 #include "libyuv/convert.h"  // For I420Copy
 #include "libyuv/cpu_id.h"
@ -782,7 +784,7 @@ int ConvertFromI420(const uint8_t* y,
      break;
    case FOURCC_NV12: {
      int dst_y_stride = dst_sample_stride ? dst_sample_stride : width;
-      uint8_t* dst_uv = dst_sample + dst_y_stride * height;
+      uint8_t* dst_uv = dst_sample + (ptrdiff_t)dst_y_stride * height;
      r = I420ToNV12(y, y_stride, u, u_stride, v, v_stride, dst_sample,
                     dst_sample_stride ? dst_sample_stride : width, dst_uv,
                     dst_sample_stride ? dst_sample_stride : width, width,
@ -791,7 +793,7 @@ int ConvertFromI420(const uint8_t* y,
    }
    case FOURCC_NV21: {
      int dst_y_stride = dst_sample_stride ? dst_sample_stride : width;
-      uint8_t* dst_vu = dst_sample + dst_y_stride * height;
+      uint8_t* dst_vu = dst_sample + (ptrdiff_t)dst_y_stride * height;
      r = I420ToNV21(y, y_stride, u, u_stride, v, v_stride, dst_sample,
                     dst_sample_stride ? dst_sample_stride : width, dst_vu,
                     dst_sample_stride ? dst_sample_stride : width, width,
@ -807,11 +809,11 @@ int ConvertFromI420(const uint8_t* y,
      uint8_t* dst_u;
      uint8_t* dst_v;
      if (format == FOURCC_YV12) {
-        dst_v = dst_sample + dst_sample_stride * height;
-        dst_u = dst_v + halfstride * halfheight;
+        dst_v = dst_sample + (ptrdiff_t)dst_sample_stride * height;
+        dst_u = dst_v + (ptrdiff_t)halfstride * halfheight;
      } else {
-        dst_u = dst_sample + dst_sample_stride * height;
-        dst_v = dst_u + halfstride * halfheight;
+        dst_u = dst_sample + (ptrdiff_t)dst_sample_stride * height;
+        dst_v = dst_u + (ptrdiff_t)halfstride * halfheight;
      }
      r = I420Copy(y, y_stride, u, u_stride, v, v_stride, dst_sample,
                   dst_sample_stride, dst_u, halfstride, dst_v, halfstride,
@ -825,11 +827,11 @@ int ConvertFromI420(const uint8_t* y,
      uint8_t* dst_u;
      uint8_t* dst_v;
      if (format == FOURCC_YV16) {
-        dst_v = dst_sample + dst_sample_stride * height;
-        dst_u = dst_v + halfstride * height;
+        dst_v = dst_sample + (ptrdiff_t)dst_sample_stride * height;
+        dst_u = dst_v + (ptrdiff_t)halfstride * height;
      } else {
-        dst_u = dst_sample + dst_sample_stride * height;
-        dst_v = dst_u + halfstride * height;
+        dst_u = dst_sample + (ptrdiff_t)dst_sample_stride * height;
+        dst_v = dst_u + (ptrdiff_t)halfstride * height;
      }
      r = I420ToI422(y, y_stride, u, u_stride, v, v_stride, dst_sample,
                     dst_sample_stride, dst_u, halfstride, dst_v, halfstride,
@ -842,11 +844,11 @@ int ConvertFromI420(const uint8_t* y,
      uint8_t* dst_u;
      uint8_t* dst_v;
      if (format == FOURCC_YV24) {
-        dst_v = dst_sample + dst_sample_stride * height;
-        dst_u = dst_v + dst_sample_stride * height;
+        dst_v = dst_sample + (ptrdiff_t)dst_sample_stride * height;
+        dst_u = dst_v + (ptrdiff_t)dst_sample_stride * height;
      } else {
-        dst_u = dst_sample + dst_sample_stride * height;
-        dst_v = dst_u + dst_sample_stride * height;
+        dst_u = dst_sample + (ptrdiff_t)dst_sample_stride * height;
+        dst_v = dst_u + (ptrdiff_t)dst_sample_stride * height;
      }
      r = I420ToI444(y, y_stride, u, u_stride, v, v_stride, dst_sample,
                     dst_sample_stride, dst_u, dst_sample_stride, dst_v,
--- a/source/planar_functions.cc
+++ b/source/planar_functions.cc
@ -3088,7 +3088,7 @@ int I420Blend(const uint8_t* src_y0,
    }
    // Subsample 2 rows of UV to half width and half height.
    ScaleRowDown2(alpha, alpha_stride, halfalpha, halfwidth);
-    alpha += alpha_stride * 2;
+    alpha += (ptrdiff_t)alpha_stride * 2;
    BlendPlaneRow(src_u0, src_u1, halfalpha, dst_u, halfwidth);
    BlendPlaneRow(src_v0, src_v1, halfalpha, dst_v, halfwidth);
    src_u0 += src_stride_u0;
--- a/source/rotate.cc
+++ b/source/rotate.cc
@ -128,7 +128,7 @@ void RotatePlane90(const uint8_t* src,
  // Rotate by 90 is a transpose with the source read
  // from bottom to top. So set the source pointer to the end
  // of the buffer and flip the sign of the source stride.
-  src += src_stride * (height - 1);
+  src += (ptrdiff_t)src_stride * (height - 1);
  src_stride = -src_stride;
  TransposePlane(src, src_stride, dst, dst_stride, width, height);
 }
@ -143,7 +143,7 @@ void RotatePlane270(const uint8_t* src,
  // Rotate by 270 is a transpose with the destination written
  // from bottom to top. So set the destination pointer to the end
  // of the buffer and flip the sign of the destination stride.
-  dst += dst_stride * (width - 1);
+  dst += (ptrdiff_t)dst_stride * (width - 1);
  dst_stride = -dst_stride;
  TransposePlane(src, src_stride, dst, dst_stride, width, height);
 }
@ -160,8 +160,8 @@ void RotatePlane180(const uint8_t* src,
  assert(row);
  if (!row)
    return;
-  const uint8_t* src_bot = src + src_stride * (height - 1);
-  uint8_t* dst_bot = dst + dst_stride * (height - 1);
+  const uint8_t* src_bot = src + (ptrdiff_t)src_stride * (height - 1);
+  uint8_t* dst_bot = dst + (ptrdiff_t)dst_stride * (height - 1);
  int half_height = (height + 1) >> 1;
  int y;
  void (*MirrorRow)(const uint8_t* src, uint8_t* dst, int width) = MirrorRow_C;
@ -354,7 +354,7 @@ void SplitRotateUV90(const uint8_t* src,
                     int dst_stride_b,
                     int width,
                     int height) {
-  src += src_stride * (height - 1);
+  src += (ptrdiff_t)src_stride * (height - 1);
  src_stride = -src_stride;

  SplitTransposeUV(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b,
@ -533,7 +533,7 @@ static void RotatePlane90_16(const uint16_t* src,
  // Rotate by 90 is a transpose with the source read
  // from bottom to top. So set the source pointer to the end
  // of the buffer and flip the sign of the source stride.
-  src += src_stride * (height - 1);
+  src += (ptrdiff_t)src_stride * (height - 1);
  src_stride = -src_stride;
  TransposePlane_16(src, src_stride, dst, dst_stride, width, height);
 }
@ -547,7 +547,7 @@ static void RotatePlane270_16(const uint16_t* src,
  // Rotate by 270 is a transpose with the destination written
  // from bottom to top. So set the destination pointer to the end
  // of the buffer and flip the sign of the destination stride.
-  dst += dst_stride * (width - 1);
+  dst += (ptrdiff_t)dst_stride * (width - 1);
  dst_stride = -dst_stride;
  TransposePlane_16(src, src_stride, dst, dst_stride, width, height);
 }
@ -558,8 +558,8 @@ static void RotatePlane180_16(const uint16_t* src,
                              int dst_stride,
                              int width,
                              int height) {
-  const uint16_t* src_bot = src + src_stride * (height - 1);
-  uint16_t* dst_bot = dst + dst_stride * (height - 1);
+  const uint16_t* src_bot = src + (ptrdiff_t)src_stride * (height - 1);
+  uint16_t* dst_bot = dst + (ptrdiff_t)dst_stride * (height - 1);
  int half_height = (height + 1) >> 1;
  int y;

--- a/source/rotate_common.cc
+++ b/source/rotate_common.cc
@ -8,6 +8,8 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

+#include <stddef.h>
+
 #include "libyuv/rotate_row.h"
 #include "libyuv/row.h"

@ -191,10 +193,10 @@ void Transpose4x4_32_C(const uint8_t* src,
    ((uint32_t*)(dst3))[1] = p31;
    ((uint32_t*)(dst3))[2] = p32;
    ((uint32_t*)(dst3))[3] = p33;
-    src += src_stride * 4;  // advance 4 rows
-    src1 += src_stride * 4;
-    src2 += src_stride * 4;
-    src3 += src_stride * 4;
+    src += (ptrdiff_t)src_stride * 4;  // advance 4 rows
+    src1 += (ptrdiff_t)src_stride * 4;
+    src2 += (ptrdiff_t)src_stride * 4;
+    src3 += (ptrdiff_t)src_stride * 4;
    dst += 4 * 4;  // advance 4 columns
    dst1 += 4 * 4;
    dst2 += 4 * 4;
--- a/source/rotate_neon.cc
+++ b/source/rotate_neon.cc
@ -198,16 +198,16 @@ void Transpose4x4_32_NEON(const uint8_t* src,
      "vst1.8      {q3}, [%7]!                   \n"
      "bgt         1b                            \n"

-      : "+r"(src),                        // %0
-        "+r"(src1),                       // %1
-        "+r"(src2),                       // %2
-        "+r"(src3),                       // %3
-        "+r"(dst),                        // %4
-        "+r"(dst1),                       // %5
-        "+r"(dst2),                       // %6
-        "+r"(dst3),                       // %7
-        "+r"(width)                       // %8
-      : "r"((ptrdiff_t)(src_stride * 4))  // %9
+      : "+r"(src),                      // %0
+        "+r"(src1),                     // %1
+        "+r"(src2),                     // %2
+        "+r"(src3),                     // %3
+        "+r"(dst),                      // %4
+        "+r"(dst1),                     // %5
+        "+r"(dst2),                     // %6
+        "+r"(dst3),                     // %7
+        "+r"(width)                     // %8
+      : "r"((ptrdiff_t)src_stride * 4)  // %9
      : "memory", "cc", "q0", "q1", "q2", "q3");
 }

--- a/source/rotate_neon64.cc
+++ b/source/rotate_neon64.cc
@ -252,16 +252,16 @@ void Transpose4x4_32_NEON(const uint8_t* src,
      "st1         {v2.4s}, [%6], 16             \n"
      "st1         {v3.4s}, [%7], 16             \n"
      "b.gt        1b                            \n"
-      : "+r"(src),                        // %0
-        "+r"(src1),                       // %1
-        "+r"(src2),                       // %2
-        "+r"(src3),                       // %3
-        "+r"(dst),                        // %4
-        "+r"(dst1),                       // %5
-        "+r"(dst2),                       // %6
-        "+r"(dst3),                       // %7
-        "+r"(width)                       // %8
-      : "r"((ptrdiff_t)(src_stride * 4))  // %9
+      : "+r"(src),                      // %0
+        "+r"(src1),                     // %1
+        "+r"(src2),                     // %2
+        "+r"(src3),                     // %3
+        "+r"(dst),                      // %4
+        "+r"(dst1),                     // %5
+        "+r"(dst2),                     // %6
+        "+r"(dst3),                     // %7
+        "+r"(width)                     // %8
+      : "r"((ptrdiff_t)src_stride * 4)  // %9
      : "memory", "cc", "v0", "v1", "v2", "v3");
 }

--- a/source/row_neon64.cc
+++ b/source/row_neon64.cc
@ -4751,10 +4751,10 @@ void ConvertFP16ToFP32Column_NEON(const uint16_t* src,  // fp16
      "str         s2, [%1], #4                  \n"  // store 1 floats
      "b.gt        2b                            \n"
      "3:          \n"
-      : "+r"(src),                        // %0
-        "+r"(dst),                        // %1
-        "+r"(width)                       // %2
-      : "r"((ptrdiff_t)(src_stride * 2))  // %3
+      : "+r"(src),                      // %0
+        "+r"(dst),                      // %1
+        "+r"(width)                     // %2
+      : "r"((ptrdiff_t)src_stride * 2)  // %3
      : "cc", "memory", "v0", "v1", "v2", "v3");
 }

--- a/source/scale.cc
+++ b/source/scale.cc
@ -39,8 +39,8 @@ static void ScalePlaneDown2(int src_width,
                            int src_height,
                            int dst_width,
                            int dst_height,
-                            int src_stride,
-                            int dst_stride,
+                            ptrdiff_t src_stride,
+                            ptrdiff_t dst_stride,
                            const uint8_t* src_ptr,
                            uint8_t* dst_ptr,
                            enum FilterMode filtering) {
@ -51,7 +51,7 @@ static void ScalePlaneDown2(int src_width,
          ? ScaleRowDown2_C
          : (filtering == kFilterLinear ? ScaleRowDown2Linear_C
                                        : ScaleRowDown2Box_C);
-  int row_stride = src_stride * 2;
+  ptrdiff_t row_stride = src_stride * 2;
  (void)src_width;
  (void)src_height;
  if (!filtering) {
@ -151,8 +151,8 @@ static void ScalePlaneDown2_16(int src_width,
                               int src_height,
                               int dst_width,
                               int dst_height,
-                               int src_stride,
-                               int dst_stride,
+                               ptrdiff_t src_stride,
+                               ptrdiff_t dst_stride,
                               const uint16_t* src_ptr,
                               uint16_t* dst_ptr,
                               enum FilterMode filtering) {
@ -163,7 +163,7 @@ static void ScalePlaneDown2_16(int src_width,
          ? ScaleRowDown2_16_C
          : (filtering == kFilterLinear ? ScaleRowDown2Linear_16_C
                                        : ScaleRowDown2Box_16_C);
-  int row_stride = src_stride * 2;
+  ptrdiff_t row_stride = src_stride * 2;
  (void)src_width;
  (void)src_height;
  if (!filtering) {
@ -228,7 +228,7 @@ void ScalePlaneDown2_16To8(int src_width,
                 ? ScaleRowDown2_16To8_C
                 : (filtering == kFilterLinear ? ScaleRowDown2Linear_16To8_C
                                               : ScaleRowDown2Box_16To8_C));
-  int row_stride = src_stride * 2;
+  ptrdiff_t row_stride = (ptrdiff_t)src_stride * 2;
  (void)dst_height;
  if (!filtering) {
    src_ptr += src_stride;  // Point to odd rows.
@ -259,8 +259,8 @@ static void ScalePlaneDown4(int src_width,
                            int src_height,
                            int dst_width,
                            int dst_height,
-                            int src_stride,
-                            int dst_stride,
+                            ptrdiff_t src_stride,
+                            ptrdiff_t dst_stride,
                            const uint8_t* src_ptr,
                            uint8_t* dst_ptr,
                            enum FilterMode filtering) {
@ -268,7 +268,7 @@ static void ScalePlaneDown4(int src_width,
  void (*ScaleRowDown4)(const uint8_t* src_ptr, ptrdiff_t src_stride,
                        uint8_t* dst_ptr, int dst_width) =
      filtering ? ScaleRowDown4Box_C : ScaleRowDown4_C;
-  int row_stride = src_stride * 4;
+  ptrdiff_t row_stride = src_stride * 4;
  (void)src_width;
  (void)src_height;
  if (!filtering) {
@ -331,8 +331,8 @@ static void ScalePlaneDown4_16(int src_width,
                               int src_height,
                               int dst_width,
                               int dst_height,
-                               int src_stride,
-                               int dst_stride,
+                               ptrdiff_t src_stride,
+                               ptrdiff_t dst_stride,
                               const uint16_t* src_ptr,
                               uint16_t* dst_ptr,
                               enum FilterMode filtering) {
@ -340,7 +340,7 @@ static void ScalePlaneDown4_16(int src_width,
  void (*ScaleRowDown4)(const uint16_t* src_ptr, ptrdiff_t src_stride,
                        uint16_t* dst_ptr, int dst_width) =
      filtering ? ScaleRowDown4Box_16_C : ScaleRowDown4_16_C;
-  int row_stride = src_stride * 4;
+  ptrdiff_t row_stride = src_stride * 4;
  (void)src_width;
  (void)src_height;
  if (!filtering) {
@ -375,8 +375,8 @@ static void ScalePlaneDown34(int src_width,
                             int src_height,
                             int dst_width,
                             int dst_height,
-                             int src_stride,
-                             int dst_stride,
+                             ptrdiff_t src_stride,
+                             ptrdiff_t dst_stride,
                             const uint8_t* src_ptr,
                             uint8_t* dst_ptr,
                             enum FilterMode filtering) {
@ -385,7 +385,7 @@ static void ScalePlaneDown34(int src_width,
                           uint8_t* dst_ptr, int dst_width);
  void (*ScaleRowDown34_1)(const uint8_t* src_ptr, ptrdiff_t src_stride,
                           uint8_t* dst_ptr, int dst_width);
-  const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
+  const ptrdiff_t filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
  (void)src_width;
  (void)src_height;
  assert(dst_width % 3 == 0);
@ -502,8 +502,8 @@ static void ScalePlaneDown34_16(int src_width,
                                int src_height,
                                int dst_width,
                                int dst_height,
-                                int src_stride,
-                                int dst_stride,
+                                ptrdiff_t src_stride,
+                                ptrdiff_t dst_stride,
                                const uint16_t* src_ptr,
                                uint16_t* dst_ptr,
                                enum FilterMode filtering) {
@ -512,7 +512,7 @@ static void ScalePlaneDown34_16(int src_width,
                           uint16_t* dst_ptr, int dst_width);
  void (*ScaleRowDown34_1)(const uint16_t* src_ptr, ptrdiff_t src_stride,
                           uint16_t* dst_ptr, int dst_width);
-  const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
+  const ptrdiff_t filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
  (void)src_width;
  (void)src_height;
  assert(dst_width % 3 == 0);
@ -588,8 +588,8 @@ static void ScalePlaneDown38(int src_width,
                             int src_height,
                             int dst_width,
                             int dst_height,
-                             int src_stride,
-                             int dst_stride,
+                             ptrdiff_t src_stride,
+                             ptrdiff_t dst_stride,
                             const uint8_t* src_ptr,
                             uint8_t* dst_ptr,
                             enum FilterMode filtering) {
@ -598,7 +598,7 @@ static void ScalePlaneDown38(int src_width,
                           uint8_t* dst_ptr, int dst_width);
  void (*ScaleRowDown38_2)(const uint8_t* src_ptr, ptrdiff_t src_stride,
                           uint8_t* dst_ptr, int dst_width);
-  const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
+  const ptrdiff_t filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
  assert(dst_width % 3 == 0);
  (void)src_width;
  (void)src_height;
@ -708,8 +708,8 @@ static void ScalePlaneDown38_16(int src_width,
                                int src_height,
                                int dst_width,
                                int dst_height,
-                                int src_stride,
-                                int dst_stride,
+                                ptrdiff_t src_stride,
+                                ptrdiff_t dst_stride,
                                const uint16_t* src_ptr,
                                uint16_t* dst_ptr,
                                enum FilterMode filtering) {
@ -718,7 +718,7 @@ static void ScalePlaneDown38_16(int src_width,
                           uint16_t* dst_ptr, int dst_width);
  void (*ScaleRowDown38_2)(const uint16_t* src_ptr, ptrdiff_t src_stride,
                           uint16_t* dst_ptr, int dst_width);
-  const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
+  const ptrdiff_t filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
  (void)src_width;
  (void)src_height;
  assert(dst_width % 3 == 0);
@ -901,8 +901,8 @@ static int ScalePlaneBox(int src_width,
                         int src_height,
                         int dst_width,
                         int dst_height,
-                         int src_stride,
-                         int dst_stride,
+                         ptrdiff_t src_stride,
+                         ptrdiff_t dst_stride,
                         const uint8_t* src_ptr,
                         uint8_t* dst_ptr) {
  int j, k;
@ -967,7 +967,7 @@ static int ScalePlaneBox(int src_width,
    for (j = 0; j < dst_height; ++j) {
      int boxheight;
      int iy = y >> 16;
-      const uint8_t* src = src_ptr + iy * (int64_t)src_stride;
+      const uint8_t* src = src_ptr + iy * src_stride;
      y += dy;
      if (y > max_y) {
        y = max_y;
@ -990,8 +990,8 @@ static int ScalePlaneBox_16(int src_width,
                            int src_height,
                            int dst_width,
                            int dst_height,
-                            int src_stride,
-                            int dst_stride,
+                            ptrdiff_t src_stride,
+                            ptrdiff_t dst_stride,
                            const uint16_t* src_ptr,
                            uint16_t* dst_ptr) {
  int j, k;
@ -1024,7 +1024,7 @@ static int ScalePlaneBox_16(int src_width,
    for (j = 0; j < dst_height; ++j) {
      int boxheight;
      int iy = y >> 16;
-      const uint16_t* src = src_ptr + iy * (int64_t)src_stride;
+      const uint16_t* src = src_ptr + iy * src_stride;
      y += dy;
      if (y > max_y) {
        y = max_y;
@ -1048,8 +1048,8 @@ static int ScalePlaneBilinearDown(int src_width,
                                  int src_height,
                                  int dst_width,
                                  int dst_height,
-                                  int src_stride,
-                                  int dst_stride,
+                                  ptrdiff_t src_stride,
+                                  ptrdiff_t dst_stride,
                                  const uint8_t* src_ptr,
                                  uint8_t* dst_ptr,
                                  enum FilterMode filtering) {
@ -1138,7 +1138,7 @@ static int ScalePlaneBilinearDown(int src_width,

  for (j = 0; j < dst_height; ++j) {
    int yi = y >> 16;
-    const uint8_t* src = src_ptr + yi * (int64_t)src_stride;
+    const uint8_t* src = src_ptr + yi * src_stride;
    if (filtering == kFilterLinear) {
      ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
    } else {
@ -1160,8 +1160,8 @@ static int ScalePlaneBilinearDown_16(int src_width,
                                     int src_height,
                                     int dst_width,
                                     int dst_height,
-                                     int src_stride,
-                                     int dst_stride,
+                                     ptrdiff_t src_stride,
+                                     ptrdiff_t dst_stride,
                                     const uint16_t* src_ptr,
                                     uint16_t* dst_ptr,
                                     enum FilterMode filtering) {
@ -1229,7 +1229,7 @@ static int ScalePlaneBilinearDown_16(int src_width,

  for (j = 0; j < dst_height; ++j) {
    int yi = y >> 16;
-    const uint16_t* src = src_ptr + yi * (int64_t)src_stride;
+    const uint16_t* src = src_ptr + yi * src_stride;
    if (filtering == kFilterLinear) {
      ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
    } else {
@ -1252,8 +1252,8 @@ static int ScalePlaneBilinearUp(int src_width,
                                int src_height,
                                int dst_width,
                                int dst_height,
-                                int src_stride,
-                                int dst_stride,
+                                ptrdiff_t src_stride,
+                                ptrdiff_t dst_stride,
                                const uint8_t* src_ptr,
                                uint8_t* dst_ptr,
                                enum FilterMode filtering) {
@ -1339,7 +1339,7 @@ static int ScalePlaneBilinearUp(int src_width,
  }
  {
    int yi = y >> 16;
-    const uint8_t* src = src_ptr + yi * (int64_t)src_stride;
+    const uint8_t* src = src_ptr + yi * src_stride;

    // Allocate 2 row buffers.
    const int row_size = (dst_width + 31) & ~31;
@ -1348,7 +1348,7 @@ static int ScalePlaneBilinearUp(int src_width,
      return 1;

    uint8_t* rowptr = row;
-    int rowstride = row_size;
+    ptrdiff_t rowstride = row_size;
    int lasty = yi;

    ScaleFilterCols(rowptr, src, dst_width, x, dx);
@ -1366,7 +1366,7 @@ static int ScalePlaneBilinearUp(int src_width,
        if (y > max_y) {
          y = max_y;
          yi = y >> 16;
-          src = src_ptr + yi * (int64_t)src_stride;
+          src = src_ptr + yi * src_stride;
        }
        if (yi != lasty) {
          ScaleFilterCols(rowptr, src, dst_width, x, dx);
@ -1401,8 +1401,8 @@ static void ScalePlaneUp2_Linear(int src_width,
                                 int src_height,
                                 int dst_width,
                                 int dst_height,
-                                 int src_stride,
-                                 int dst_stride,
+                                 ptrdiff_t src_stride,
+                                 ptrdiff_t dst_stride,
                                 const uint8_t* src_ptr,
                                 uint8_t* dst_ptr) {
  void (*ScaleRowUp)(const uint8_t* src_ptr, uint8_t* dst_ptr, int dst_width) =
@ -1445,13 +1445,13 @@ static void ScalePlaneUp2_Linear(int src_width,
 #endif

  if (dst_height == 1) {
-    ScaleRowUp(src_ptr + ((src_height - 1) / 2) * (int64_t)src_stride, dst_ptr,
+    ScaleRowUp(src_ptr + ((src_height - 1) / 2) * src_stride, dst_ptr,
               dst_width);
  } else {
    dy = FixedDiv(src_height - 1, dst_height - 1);
    y = (1 << 15) - 1;
    for (i = 0; i < dst_height; ++i) {
-      ScaleRowUp(src_ptr + (y >> 16) * (int64_t)src_stride, dst_ptr, dst_width);
+      ScaleRowUp(src_ptr + (y >> 16) * src_stride, dst_ptr, dst_width);
      dst_ptr += dst_stride;
      y += dy;
    }
@ -1466,8 +1466,8 @@ static void ScalePlaneUp2_Bilinear(int src_width,
                                   int src_height,
                                   int dst_width,
                                   int dst_height,
-                                   int src_stride,
-                                   int dst_stride,
+                                   ptrdiff_t src_stride,
+                                   ptrdiff_t dst_stride,
                                   const uint8_t* src_ptr,
                                   uint8_t* dst_ptr) {
  void (*Scale2RowUp)(const uint8_t* src_ptr, ptrdiff_t src_stride,
@ -1532,8 +1532,8 @@ static void ScalePlaneUp2_12_Linear(int src_width,
                                    int src_height,
                                    int dst_width,
                                    int dst_height,
-                                    int src_stride,
-                                    int dst_stride,
+                                    ptrdiff_t src_stride,
+                                    ptrdiff_t dst_stride,
                                    const uint16_t* src_ptr,
                                    uint16_t* dst_ptr) {
  void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr,
@ -1565,13 +1565,13 @@ static void ScalePlaneUp2_12_Linear(int src_width,
 #endif

  if (dst_height == 1) {
-    ScaleRowUp(src_ptr + ((src_height - 1) / 2) * (int64_t)src_stride, dst_ptr,
+    ScaleRowUp(src_ptr + ((src_height - 1) / 2) * src_stride, dst_ptr,
               dst_width);
  } else {
    dy = FixedDiv(src_height - 1, dst_height - 1);
    y = (1 << 15) - 1;
    for (i = 0; i < dst_height; ++i) {
-      ScaleRowUp(src_ptr + (y >> 16) * (int64_t)src_stride, dst_ptr, dst_width);
+      ScaleRowUp(src_ptr + (y >> 16) * src_stride, dst_ptr, dst_width);
      dst_ptr += dst_stride;
      y += dy;
    }
@ -1587,8 +1587,8 @@ static void ScalePlaneUp2_12_Bilinear(int src_width,
                                      int src_height,
                                      int dst_width,
                                      int dst_height,
-                                      int src_stride,
-                                      int dst_stride,
+                                      ptrdiff_t src_stride,
+                                      ptrdiff_t dst_stride,
                                      const uint16_t* src_ptr,
                                      uint16_t* dst_ptr) {
  void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride,
@ -1635,8 +1635,8 @@ static void ScalePlaneUp2_16_Linear(int src_width,
                                    int src_height,
                                    int dst_width,
                                    int dst_height,
-                                    int src_stride,
-                                    int dst_stride,
+                                    ptrdiff_t src_stride,
+                                    ptrdiff_t dst_stride,
                                    const uint16_t* src_ptr,
                                    uint16_t* dst_ptr) {
  void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr,
@ -1668,13 +1668,13 @@ static void ScalePlaneUp2_16_Linear(int src_width,
 #endif

  if (dst_height == 1) {
-    ScaleRowUp(src_ptr + ((src_height - 1) / 2) * (int64_t)src_stride, dst_ptr,
+    ScaleRowUp(src_ptr + ((src_height - 1) / 2) * src_stride, dst_ptr,
               dst_width);
  } else {
    dy = FixedDiv(src_height - 1, dst_height - 1);
    y = (1 << 15) - 1;
    for (i = 0; i < dst_height; ++i) {
-      ScaleRowUp(src_ptr + (y >> 16) * (int64_t)src_stride, dst_ptr, dst_width);
+      ScaleRowUp(src_ptr + (y >> 16) * src_stride, dst_ptr, dst_width);
      dst_ptr += dst_stride;
      y += dy;
    }
@ -1685,8 +1685,8 @@ static void ScalePlaneUp2_16_Bilinear(int src_width,
                                      int src_height,
                                      int dst_width,
                                      int dst_height,
-                                      int src_stride,
-                                      int dst_stride,
+                                      ptrdiff_t src_stride,
+                                      ptrdiff_t dst_stride,
                                      const uint16_t* src_ptr,
                                      uint16_t* dst_ptr) {
  void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride,
@ -1733,8 +1733,8 @@ static int ScalePlaneBilinearUp_16(int src_width,
                                   int src_height,
                                   int dst_width,
                                   int dst_height,
-                                   int src_stride,
-                                   int dst_stride,
+                                   ptrdiff_t src_stride,
+                                   ptrdiff_t dst_stride,
                                   const uint16_t* src_ptr,
                                   uint16_t* dst_ptr,
                                   enum FilterMode filtering) {
@ -1806,12 +1806,12 @@ static int ScalePlaneBilinearUp_16(int src_width,
  }
  {
    int yi = y >> 16;
-    const uint16_t* src = src_ptr + yi * (int64_t)src_stride;
+    const uint16_t* src = src_ptr + yi * src_stride;

    // Allocate 2 row buffers.
    const int row_size = (dst_width + 31) & ~31;
    align_buffer_64(row, row_size * 4);
-    int rowstride = row_size;
+    ptrdiff_t rowstride = row_size;
    int lasty = yi;
    uint16_t* rowptr = (uint16_t*)row;
    if (!row)
@ -1832,7 +1832,7 @@ static int ScalePlaneBilinearUp_16(int src_width,
        if (y > max_y) {
          y = max_y;
          yi = y >> 16;
-          src = src_ptr + yi * (int64_t)src_stride;
+          src = src_ptr + yi * src_stride;
        }
        if (yi != lasty) {
          ScaleFilterCols(rowptr, src, dst_width, x, dx);
@ -1867,8 +1867,8 @@ static void ScalePlaneSimple(int src_width,
                             int src_height,
                             int dst_width,
                             int dst_height,
-                             int src_stride,
-                             int dst_stride,
+                             ptrdiff_t src_stride,
+                             ptrdiff_t dst_stride,
                             const uint8_t* src_ptr,
                             uint8_t* dst_ptr) {
  int i;
@ -1893,8 +1893,7 @@ static void ScalePlaneSimple(int src_width,
  }

  for (i = 0; i < dst_height; ++i) {
-    ScaleCols(dst_ptr, src_ptr + (y >> 16) * (int64_t)src_stride, dst_width, x,
-              dx);
+    ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, dst_width, x, dx);
    dst_ptr += dst_stride;
    y += dy;
  }
@ -1904,8 +1903,8 @@ static void ScalePlaneSimple_16(int src_width,
                                int src_height,
                                int dst_width,
                                int dst_height,
-                                int src_stride,
-                                int dst_stride,
+                                ptrdiff_t src_stride,
+                                ptrdiff_t dst_stride,
                                const uint16_t* src_ptr,
                                uint16_t* dst_ptr) {
  int i;
@ -1930,8 +1929,7 @@ static void ScalePlaneSimple_16(int src_width,
  }

  for (i = 0; i < dst_height; ++i) {
-    ScaleCols(dst_ptr, src_ptr + (y >> 16) * (int64_t)src_stride, dst_width, x,
-              dx);
+    ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, dst_width, x, dx);
    dst_ptr += dst_stride;
    y += dy;
  }
@ -1956,7 +1954,7 @@ int ScalePlane(const uint8_t* src,
  // Negative height means invert the image.
  if (src_height < 0) {
    src_height = -src_height;
-    src = src + (src_height - 1) * (int64_t)src_stride;
+    src = src + (src_height - 1) * (ptrdiff_t)src_stride;
    src_stride = -src_stride;
  }
  // Use specialized scales to improve performance for common resolutions.
@ -2056,7 +2054,7 @@ int ScalePlane_16(const uint16_t* src,
  // Negative height means invert the image.
  if (src_height < 0) {
    src_height = -src_height;
-    src = src + (src_height - 1) * (int64_t)src_stride;
+    src = src + (src_height - 1) * (ptrdiff_t)src_stride;
    src_stride = -src_stride;
  }
  // Use specialized scales to improve performance for common resolutions.
@ -2160,7 +2158,7 @@ int ScalePlane_12(const uint16_t* src,
  // Negative height means invert the image.
  if (src_height < 0) {
    src_height = -src_height;
-    src = src + (src_height - 1) * (int64_t)src_stride;
+    src = src + (src_height - 1) * (ptrdiff_t)src_stride;
    src_stride = -src_stride;
  }

--- a/source/scale_argb.cc
+++ b/source/scale_argb.cc
@ -38,8 +38,8 @@ static void ScaleARGBDown2(int src_width,
                           int src_height,
                           int dst_width,
                           int dst_height,
-                           int src_stride,
-                           int dst_stride,
+                           ptrdiff_t src_stride,
+                           ptrdiff_t dst_stride,
                           const uint8_t* src_argb,
                           uint8_t* dst_argb,
                           int x,
@ -48,7 +48,7 @@ static void ScaleARGBDown2(int src_width,
                           int dy,
                           enum FilterMode filtering) {
  int j;
-  int row_stride = src_stride * (dy >> 16);
+  ptrdiff_t row_stride = src_stride * (dy >> 16);
  void (*ScaleARGBRowDown2)(const uint8_t* src_argb, ptrdiff_t src_stride,
                            uint8_t* dst_argb, int dst_width) =
      filtering == kFilterNone
@ -62,9 +62,9 @@ static void ScaleARGBDown2(int src_width,
  assert((dy & 0x1ffff) == 0);  // Test vertical scale is multiple of 2.
  // Advance to odd row, even column.
  if (filtering == kFilterBilinear) {
-    src_argb += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4;
+    src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
  } else {
-    src_argb += (y >> 16) * (intptr_t)src_stride + ((x >> 16) - 1) * 4;
+    src_argb += (y >> 16) * src_stride + ((x >> 16) - 1) * 4;
  }

 #if defined(HAS_SCALEARGBROWDOWN2_SSE2)
@ -152,8 +152,8 @@ static int ScaleARGBDown4Box(int src_width,
                             int src_height,
                             int dst_width,
                             int dst_height,
-                             int src_stride,
-                             int dst_stride,
+                             ptrdiff_t src_stride,
+                             ptrdiff_t dst_stride,
                             const uint8_t* src_argb,
                             uint8_t* dst_argb,
                             int x,
@ -169,12 +169,12 @@ static int ScaleARGBDown4Box(int src_width,
  align_buffer_64(row, row_size * 2);
  if (!row)
    return 1;
-  int row_stride = src_stride * (dy >> 16);
+  ptrdiff_t row_stride = src_stride * (dy >> 16);
  void (*ScaleARGBRowDown2)(const uint8_t* src_argb, ptrdiff_t src_stride,
                            uint8_t* dst_argb, int dst_width) =
      ScaleARGBRowDown2Box_C;
  // Advance to odd row, even column.
-  src_argb += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4;
+  src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
  (void)src_width;
  (void)src_height;
  (void)dx;
@ -226,8 +226,8 @@ static void ScaleARGBDownEven(int src_width,
                              int src_height,
                              int dst_width,
                              int dst_height,
-                              int src_stride,
-                              int dst_stride,
+                              ptrdiff_t src_stride,
+                              ptrdiff_t dst_stride,
                              const uint8_t* src_argb,
                              uint8_t* dst_argb,
                              int x,
@ -237,7 +237,7 @@ static void ScaleARGBDownEven(int src_width,
                              enum FilterMode filtering) {
  int j;
  int col_step = dx >> 16;
-  ptrdiff_t row_stride = (ptrdiff_t)((dy >> 16) * (intptr_t)src_stride);
+  ptrdiff_t row_stride = (dy >> 16) * src_stride;
  void (*ScaleARGBRowDownEven)(const uint8_t* src_argb, ptrdiff_t src_stride,
                               int src_step, uint8_t* dst_argb, int dst_width) =
      filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C;
@ -245,7 +245,7 @@ static void ScaleARGBDownEven(int src_width,
  (void)src_height;
  assert(IS_ALIGNED(src_width, 2));
  assert(IS_ALIGNED(src_height, 2));
-  src_argb += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4;
+  src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
 #if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
  if (TestCpuFlag(kCpuHasSSE2)) {
    ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_SSE2
@ -302,8 +302,8 @@ static int ScaleARGBBilinearDown(int src_width,
                                 int src_height,
                                 int dst_width,
                                 int dst_height,
-                                 int src_stride,
-                                 int dst_stride,
+                                 ptrdiff_t src_stride,
+                                 ptrdiff_t dst_stride,
                                 const uint8_t* src_argb,
                                 uint8_t* dst_argb,
                                 int x,
@ -405,7 +405,7 @@ static int ScaleARGBBilinearDown(int src_width,
    }
    for (j = 0; j < dst_height; ++j) {
      int yi = y >> 16;
-      const uint8_t* src = src_argb + yi * (intptr_t)src_stride;
+      const uint8_t* src = src_argb + yi * src_stride;
      if (filtering == kFilterLinear) {
        ScaleARGBFilterCols(dst_argb, src, dst_width, x, dx);
      } else {
@ -429,8 +429,8 @@ static int ScaleARGBBilinearUp(int src_width,
                               int src_height,
                               int dst_width,
                               int dst_height,
-                               int src_stride,
-                               int dst_stride,
+                               ptrdiff_t src_stride,
+                               ptrdiff_t dst_stride,
                               const uint8_t* src_argb,
                               uint8_t* dst_argb,
                               int x,
@ -546,7 +546,7 @@ static int ScaleARGBBilinearUp(int src_width,

  {
    int yi = y >> 16;
-    const uint8_t* src = src_argb + yi * (intptr_t)src_stride;
+    const uint8_t* src = src_argb + yi * src_stride;

    // Allocate 2 rows of ARGB.
    const int row_size = (dst_width * 4 + 31) & ~31;
@ -555,7 +555,7 @@ static int ScaleARGBBilinearUp(int src_width,
      return 1;

    uint8_t* rowptr = row;
-    int rowstride = row_size;
+    ptrdiff_t rowstride = row_size;
    int lasty = yi;

    ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
@ -573,7 +573,7 @@ static int ScaleARGBBilinearUp(int src_width,
        if (y > max_y) {
          y = max_y;
          yi = y >> 16;
-          src = src_argb + yi * (intptr_t)src_stride;
+          src = src_argb + yi * src_stride;
        }
        if (yi != lasty) {
          ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
@ -599,275 +599,6 @@ static int ScaleARGBBilinearUp(int src_width,
  return 0;
 }

-#ifdef YUVSCALEUP
-// Scale YUV to ARGB up with bilinear interpolation.
-static int ScaleYUVToARGBBilinearUp(int src_width,
-                                    int src_height,
-                                    int dst_width,
-                                    int dst_height,
-                                    int src_stride_y,
-                                    int src_stride_u,
-                                    int src_stride_v,
-                                    int dst_stride_argb,
-                                    const uint8_t* src_y,
-                                    const uint8_t* src_u,
-                                    const uint8_t* src_v,
-                                    uint8_t* dst_argb,
-                                    int x,
-                                    int dx,
-                                    int y,
-                                    int dy,
-                                    enum FilterMode filtering) {
-  int j;
-  void (*I422ToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf,
-                        const uint8_t* v_buf, uint8_t* rgb_buf, int width) =
-      I422ToARGBRow_C;
-#if defined(HAS_I422TOARGBROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
-    I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
-    if (IS_ALIGNED(src_width, 8)) {
-      I422ToARGBRow = I422ToARGBRow_SSSE3;
-    }
-  }
-#endif
-#if defined(HAS_I422TOARGBROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2)) {
-    I422ToARGBRow = I422ToARGBRow_Any_AVX2;
-    if (IS_ALIGNED(src_width, 16)) {
-      I422ToARGBRow = I422ToARGBRow_AVX2;
-    }
-  }
-#endif
-#if defined(HAS_I422TOARGBROW_AVX512BW)
-  if (TestCpuFlag(kCpuHasAVX512BW | kCpuHasAVX512VL) ==
-      (kCpuHasAVX512BW | kCpuHasAVX512VL)) {
-    I422ToARGBRow = I422ToARGBRow_Any_AVX512BW;
-    if (IS_ALIGNED(src_width, 32)) {
-      I422ToARGBRow = I422ToARGBRow_AVX512BW;
-    }
-  }
-#endif
-#if defined(HAS_I422TOARGBROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
-    I422ToARGBRow = I422ToARGBRow_Any_NEON;
-    if (IS_ALIGNED(src_width, 8)) {
-      I422ToARGBRow = I422ToARGBRow_NEON;
-    }
-  }
-#endif
-#if defined(HAS_I422TOARGBROW_SVE2)
-  if (TestCpuFlag(kCpuHasSVE2)) {
-    I422ToARGBRow = I422ToARGBRow_SVE2;
-  }
-#endif
-#if defined(HAS_I422TOARGBROW_SME)
-  if (TestCpuFlag(kCpuHasSME)) {
-    I422ToARGBRow = I422ToARGBRow_SME;
-  }
-#endif
-#if defined(HAS_I422TOARGBROW_LSX)
-  if (TestCpuFlag(kCpuHasLSX)) {
-    I422ToARGBRow = I422ToARGBRow_Any_LSX;
-    if (IS_ALIGNED(src_width, 16)) {
-      I422ToARGBRow = I422ToARGBRow_LSX;
-    }
-  }
-#endif
-#if defined(HAS_I422TOARGBROW_LASX)
-  if (TestCpuFlag(kCpuHasLASX)) {
-    I422ToARGBRow = I422ToARGBRow_Any_LASX;
-    if (IS_ALIGNED(src_width, 32)) {
-      I422ToARGBRow = I422ToARGBRow_LASX;
-    }
-  }
-#endif
-#if defined(HAS_I422TOARGBROW_RVV)
-  if (TestCpuFlag(kCpuHasRVV)) {
-    I422ToARGBRow = I422ToARGBRow_RVV;
-  }
-#endif
-
-  void (*InterpolateRow)(uint8_t* dst_argb, const uint8_t* src_argb,
-                         ptrdiff_t src_stride, int dst_width,
-                         int source_y_fraction) = InterpolateRow_C;
-#if defined(HAS_INTERPOLATEROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2)) {
-    InterpolateRow = InterpolateRow_Any_AVX2;
-    if (IS_ALIGNED(dst_width, 8)) {
-      InterpolateRow = InterpolateRow_AVX2;
-    }
-  }
-#endif
-#if defined(HAS_INTERPOLATEROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
-    InterpolateRow = InterpolateRow_Any_NEON;
-    if (IS_ALIGNED(dst_width, 4)) {
-      InterpolateRow = InterpolateRow_NEON;
-    }
-  }
-#endif
-#if defined(HAS_INTERPOLATEROW_SME)
-  if (TestCpuFlag(kCpuHasSME)) {
-    InterpolateRow = InterpolateRow_SME;
-  }
-#endif
-#if defined(HAS_INTERPOLATEROW_LSX)
-  if (TestCpuFlag(kCpuHasLSX)) {
-    InterpolateRow = InterpolateRow_Any_LSX;
-    if (IS_ALIGNED(dst_width, 8)) {
-      InterpolateRow = InterpolateRow_LSX;
-    }
-  }
-#endif
-#if defined(HAS_INTERPOLATEROW_RVV)
-  if (TestCpuFlag(kCpuHasRVV)) {
-    InterpolateRow = InterpolateRow_RVV;
-  }
-#endif
-
-  void (*ScaleARGBFilterCols)(uint8_t* dst_argb, const uint8_t* src_argb,
-                              int dst_width, int x, int dx) =
-      filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
-  if (src_width >= 32768) {
-    ScaleARGBFilterCols =
-        filtering ? ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
-  }
-#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
-  if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
-    ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
-  }
-#endif
-#if defined(HAS_SCALEARGBFILTERCOLS_NEON)
-  if (filtering && TestCpuFlag(kCpuHasNEON)) {
-    ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
-    if (IS_ALIGNED(dst_width, 4)) {
-      ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
-    }
-  }
-#endif
-#if defined(HAS_SCALEARGBFILTERCOLS_LSX)
-  if (filtering && TestCpuFlag(kCpuHasLSX)) {
-    ScaleARGBFilterCols = ScaleARGBFilterCols_Any_LSX;
-    if (IS_ALIGNED(dst_width, 8)) {
-      ScaleARGBFilterCols = ScaleARGBFilterCols_LSX;
-    }
-  }
-#endif
-#if defined(HAS_SCALEARGBFILTERCOLS_RVV)
-  if (filtering && TestCpuFlag(kCpuHasRVV)) {
-    ScaleARGBFilterCols = ScaleARGBFilterCols_RVV;
-  }
-#endif
-#if defined(HAS_SCALEARGBCOLS_SSE2)
-  if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
-    ScaleARGBFilterCols = ScaleARGBCols_SSE2;
-  }
-#endif
-#if defined(HAS_SCALEARGBCOLS_NEON)
-  if (!filtering && TestCpuFlag(kCpuHasNEON)) {
-    ScaleARGBFilterCols = ScaleARGBCols_Any_NEON;
-    if (IS_ALIGNED(dst_width, 8)) {
-      ScaleARGBFilterCols = ScaleARGBCols_NEON;
-    }
-  }
-#endif
-#if defined(HAS_SCALEARGBCOLS_LSX)
-  if (!filtering && TestCpuFlag(kCpuHasLSX)) {
-    ScaleARGBFilterCols = ScaleARGBCols_Any_LSX;
-    if (IS_ALIGNED(dst_width, 4)) {
-      ScaleARGBFilterCols = ScaleARGBCols_LSX;
-    }
-  }
-#endif
-  if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
-    ScaleARGBFilterCols = ScaleARGBColsUp2_C;
-#if defined(HAS_SCALEARGBCOLSUP2_SSE2)
-    if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
-      ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
-    }
-#endif
-  }
-
-  const int max_y = (src_height - 1) << 16;
-  if (y > max_y) {
-    y = max_y;
-  }
-  const int kYShift = 1;  // Shift Y by 1 to convert Y plane to UV coordinate.
-  int yi = y >> 16;
-  int uv_yi = yi >> kYShift;
-  const uint8_t* src_row_y = src_y + yi * (intptr_t)src_stride_y;
-  const uint8_t* src_row_u = src_u + uv_yi * (intptr_t)src_stride_u;
-  const uint8_t* src_row_v = src_v + uv_yi * (intptr_t)src_stride_v;
-
-  // Allocate 1 row of ARGB for source conversion and 2 rows of ARGB
-  // scaled horizontally to the destination width.
-  const int row_size = (dst_width * 4 + 31) & ~31;
-  align_buffer_64(row, row_size * 2 + src_width * 4);
-
-  uint8_t* argb_row = row + row_size * 2;
-  uint8_t* rowptr = row;
-  int rowstride = row_size;
-  int lasty = yi;
-  if (!row)
-    return 1;
-
-  // TODO(fbarchard): Convert first 2 rows of YUV to ARGB.
-  ScaleARGBFilterCols(rowptr, src_row_y, dst_width, x, dx);
-  if (src_height > 1) {
-    src_row_y += src_stride_y;
-    if (yi & 1) {
-      src_row_u += src_stride_u;
-      src_row_v += src_stride_v;
-    }
-  }
-  ScaleARGBFilterCols(rowptr + rowstride, src_row_y, dst_width, x, dx);
-  if (src_height > 2) {
-    src_row_y += src_stride_y;
-    if (!(yi & 1)) {
-      src_row_u += src_stride_u;
-      src_row_v += src_stride_v;
-    }
-  }
-
-  for (j = 0; j < dst_height; ++j) {
-    yi = y >> 16;
-    if (yi != lasty) {
-      if (y > max_y) {
-        y = max_y;
-        yi = y >> 16;
-        uv_yi = yi >> kYShift;
-        src_row_y = src_y + yi * (intptr_t)src_stride_y;
-        src_row_u = src_u + uv_yi * (intptr_t)src_stride_u;
-        src_row_v = src_v + uv_yi * (intptr_t)src_stride_v;
-      }
-      if (yi != lasty) {
-        // TODO(fbarchard): Convert the clipped region of row.
-        I422ToARGBRow(src_row_y, src_row_u, src_row_v, argb_row, src_width);
-        ScaleARGBFilterCols(rowptr, argb_row, dst_width, x, dx);
-        rowptr += rowstride;
-        rowstride = -rowstride;
-        lasty = yi;
-        src_row_y += src_stride_y;
-        if (yi & 1) {
-          src_row_u += src_stride_u;
-          src_row_v += src_stride_v;
-        }
-      }
-    }
-    if (filtering == kFilterLinear) {
-      InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
-    } else {
-      int yf = (y >> 8) & 255;
-      InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
-    }
-    dst_argb += dst_stride_argb;
-    y += dy;
-  }
-  free_aligned_buffer_64(row);
-  return 0;
-}
-#endif
-
 // Scale ARGB to/from any dimensions, without interpolation.
 // Fixed point math is used for performance: The upper 16 bits
 // of x and dx is the integer part of the source position and
@ -877,8 +608,8 @@ static void ScaleARGBSimple(int src_width,
                            int src_height,
                            int dst_width,
                            int dst_height,
-                            int src_stride,
-                            int dst_stride,
+                            ptrdiff_t src_stride,
+                            ptrdiff_t dst_stride,
                            const uint8_t* src_argb,
                            uint8_t* dst_argb,
                            int x,
@ -921,8 +652,8 @@ static void ScaleARGBSimple(int src_width,
  }

  for (j = 0; j < dst_height; ++j) {
-    ScaleARGBCols(dst_argb, src_argb + (y >> 16) * (intptr_t)src_stride,
-                  dst_width, x, dx);
+    ScaleARGBCols(dst_argb, src_argb + (y >> 16) * src_stride, dst_width, x,
+                  dx);
    dst_argb += dst_stride;
    y += dy;
  }
@ -957,7 +688,7 @@ static int ScaleARGB(const uint8_t* src,
  // Negative src_height means invert the image.
  if (src_height < 0) {
    src_height = -src_height;
-    src = src + (src_height - 1) * (intptr_t)src_stride;
+    src = src + (src_height - 1) * (ptrdiff_t)src_stride;
    src_stride = -src_stride;
  }
  ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
@ -972,8 +703,8 @@ static int ScaleARGB(const uint8_t* src,
  if (clip_y) {
    int64_t clipf = (int64_t)(clip_y)*dy;
    y += (clipf & 0xffff);
-    src += (clipf >> 16) * (intptr_t)src_stride;
-    dst += clip_y * dst_stride;
+    src += (clipf >> 16) * (ptrdiff_t)src_stride;
+    dst += clip_y * (ptrdiff_t)dst_stride;
  }

  // Special case for integer step values.
@ -1006,7 +737,7 @@ static int ScaleARGB(const uint8_t* src,
        filtering = kFilterNone;
        if (dx == 0x10000 && dy == 0x10000) {
          // Straight copy.
-          ARGBCopy(src + (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4,
+          ARGBCopy(src + (y >> 16) * (ptrdiff_t)src_stride + (x >> 16) * 4,
                   src_stride, dst, dst_stride, clip_width, clip_height);
          return 0;
        }
--- a/source/scale_uv.cc
+++ b/source/scale_uv.cc
@ -11,6 +11,7 @@
 #include "libyuv/scale_uv.h"

 #include <assert.h>
+#include <limits.h>
 #include <string.h>

 #include "libyuv/cpu_id.h"
@ -59,8 +60,8 @@ static void ScaleUVDown2(int src_width,
                         int src_height,
                         int dst_width,
                         int dst_height,
-                         int src_stride,
-                         int dst_stride,
+                         ptrdiff_t src_stride,
+                         ptrdiff_t dst_stride,
                         const uint8_t* src_uv,
                         uint8_t* dst_uv,
                         int x,
@ -69,7 +70,7 @@ static void ScaleUVDown2(int src_width,
                         int dy,
                         enum FilterMode filtering) {
  int j;
-  int row_stride = src_stride * (dy >> 16);
+  ptrdiff_t row_stride = src_stride * (dy >> 16);
  void (*ScaleUVRowDown2)(const uint8_t* src_uv, ptrdiff_t src_stride,
                          uint8_t* dst_uv, int dst_width) =
      filtering == kFilterNone
@ -83,9 +84,9 @@ static void ScaleUVDown2(int src_width,
  assert((dy & 0x1ffff) == 0);  // Test vertical scale is multiple of 2.
  // Advance to odd row, even column.
  if (filtering == kFilterBilinear) {
-    src_uv += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2;
+    src_uv += (y >> 16) * src_stride + (x >> 16) * 2;
  } else {
-    src_uv += (y >> 16) * (intptr_t)src_stride + ((x >> 16) - 1) * 2;
+    src_uv += (y >> 16) * src_stride + ((x >> 16) - 1) * 2;
  }

 #if defined(HAS_SCALEUVROWDOWN2BOX_SSSE3)
@ -174,8 +175,8 @@ static int ScaleUVDown4Box(int src_width,
                           int src_height,
                           int dst_width,
                           int dst_height,
-                           int src_stride,
-                           int dst_stride,
+                           ptrdiff_t src_stride,
+                           ptrdiff_t dst_stride,
                           const uint8_t* src_uv,
                           uint8_t* dst_uv,
                           int x,
@ -188,12 +189,12 @@ static int ScaleUVDown4Box(int src_width,
  align_buffer_64(row, row_size * 2);
  if (!row)
    return 1;
-  int row_stride = src_stride * (dy >> 16);
+  ptrdiff_t row_stride = src_stride * (dy >> 16);
  void (*ScaleUVRowDown2)(const uint8_t* src_uv, ptrdiff_t src_stride,
                          uint8_t* dst_uv, int dst_width) =
      ScaleUVRowDown2Box_C;
  // Advance to odd row, even column.
-  src_uv += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2;
+  src_uv += (y >> 16) * src_stride + (x >> 16) * 2;
  (void)src_width;
  (void)src_height;
  (void)dx;
@ -256,8 +257,8 @@ static void ScaleUVDownEven(int src_width,
                            int src_height,
                            int dst_width,
                            int dst_height,
-                            int src_stride,
-                            int dst_stride,
+                            ptrdiff_t src_stride,
+                            ptrdiff_t dst_stride,
                            const uint8_t* src_uv,
                            uint8_t* dst_uv,
                            int x,
@ -267,7 +268,7 @@ static void ScaleUVDownEven(int src_width,
                            enum FilterMode filtering) {
  int j;
  int col_step = dx >> 16;
-  ptrdiff_t row_stride = (ptrdiff_t)((dy >> 16) * (intptr_t)src_stride);
+  ptrdiff_t row_stride = (dy >> 16) * src_stride;
  void (*ScaleUVRowDownEven)(const uint8_t* src_uv, ptrdiff_t src_stride,
                             int src_step, uint8_t* dst_uv, int dst_width) =
      filtering ? ScaleUVRowDownEvenBox_C : ScaleUVRowDownEven_C;
@ -275,7 +276,7 @@ static void ScaleUVDownEven(int src_width,
  (void)src_height;
  assert(IS_ALIGNED(src_width, 2));
  assert(IS_ALIGNED(src_height, 2));
-  src_uv += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2;
+  src_uv += (y >> 16) * src_stride + (x >> 16) * 2;
 #if defined(HAS_SCALEUVROWDOWNEVEN_SSSE3)
  if (TestCpuFlag(kCpuHasSSSE3)) {
    ScaleUVRowDownEven = filtering ? ScaleUVRowDownEvenBox_Any_SSSE3
@ -334,8 +335,8 @@ static int ScaleUVBilinearDown(int src_width,
                               int src_height,
                               int dst_width,
                               int dst_height,
-                               int src_stride,
-                               int dst_stride,
+                               ptrdiff_t src_stride,
+                               ptrdiff_t dst_stride,
                               const uint8_t* src_uv,
                               uint8_t* dst_uv,
                               int x,
@ -422,7 +423,7 @@ static int ScaleUVBilinearDown(int src_width,
    }
    for (j = 0; j < dst_height; ++j) {
      int yi = y >> 16;
-      const uint8_t* src = src_uv + yi * (intptr_t)src_stride;
+      const uint8_t* src = src_uv + yi * src_stride;
      if (filtering == kFilterLinear) {
        ScaleUVFilterCols(dst_uv, src, dst_width, x, dx);
      } else {
@ -448,8 +449,8 @@ static int ScaleUVBilinearUp(int src_width,
                             int src_height,
                             int dst_width,
                             int dst_height,
-                             int src_stride,
-                             int dst_stride,
+                             ptrdiff_t src_stride,
+                             ptrdiff_t dst_stride,
                             const uint8_t* src_uv,
                             uint8_t* dst_uv,
                             int x,
@ -543,7 +544,7 @@ static int ScaleUVBilinearUp(int src_width,

  {
    int yi = y >> 16;
-    const uint8_t* src = src_uv + yi * (intptr_t)src_stride;
+    const uint8_t* src = src_uv + yi * src_stride;

    // Allocate 2 rows of UV.
    const int row_size = (dst_width * 2 + 15) & ~15;
@ -552,7 +553,7 @@ static int ScaleUVBilinearUp(int src_width,
      return 1;

    uint8_t* rowptr = row;
-    int rowstride = row_size;
+    ptrdiff_t rowstride = row_size;
    int lasty = yi;

    ScaleUVFilterCols(rowptr, src, dst_width, x, dx);
@ -570,7 +571,7 @@ static int ScaleUVBilinearUp(int src_width,
        if (y > max_y) {
          y = max_y;
          yi = y >> 16;
-          src = src_uv + yi * (intptr_t)src_stride;
+          src = src_uv + yi * src_stride;
        }
        if (yi != lasty) {
          ScaleUVFilterCols(rowptr, src, dst_width, x, dx);
@ -606,8 +607,8 @@ static void ScaleUVLinearUp2(int src_width,
                             int src_height,
                             int dst_width,
                             int dst_height,
-                             int src_stride,
-                             int dst_stride,
+                             ptrdiff_t src_stride,
+                             ptrdiff_t dst_stride,
                             const uint8_t* src_uv,
                             uint8_t* dst_uv) {
  void (*ScaleRowUp)(const uint8_t* src_uv, uint8_t* dst_uv, int dst_width) =
@ -645,13 +646,12 @@ static void ScaleUVLinearUp2(int src_width,
 #endif

  if (dst_height == 1) {
-    ScaleRowUp(src_uv + ((src_height - 1) / 2) * (intptr_t)src_stride, dst_uv,
-               dst_width);
+    ScaleRowUp(src_uv + ((src_height - 1) / 2) * src_stride, dst_uv, dst_width);
  } else {
    dy = FixedDiv(src_height - 1, dst_height - 1);
    y = (1 << 15) - 1;
    for (i = 0; i < dst_height; ++i) {
-      ScaleRowUp(src_uv + (y >> 16) * (intptr_t)src_stride, dst_uv, dst_width);
+      ScaleRowUp(src_uv + (y >> 16) * src_stride, dst_uv, dst_width);
      dst_uv += dst_stride;
      y += dy;
    }
@ -727,8 +727,8 @@ static void ScaleUVLinearUp2_16(int src_width,
                                int src_height,
                                int dst_width,
                                int dst_height,
-                                int src_stride,
-                                int dst_stride,
+                                ptrdiff_t src_stride,
+                                ptrdiff_t dst_stride,
                                const uint16_t* src_uv,
                                uint16_t* dst_uv) {
  void (*ScaleRowUp)(const uint16_t* src_uv, uint16_t* dst_uv, int dst_width) =
@ -760,13 +760,12 @@ static void ScaleUVLinearUp2_16(int src_width,
 #endif

  if (dst_height == 1) {
-    ScaleRowUp(src_uv + ((src_height - 1) / 2) * (intptr_t)src_stride, dst_uv,
-               dst_width);
+    ScaleRowUp(src_uv + ((src_height - 1) / 2) * src_stride, dst_uv, dst_width);
  } else {
    dy = FixedDiv(src_height - 1, dst_height - 1);
    y = (1 << 15) - 1;
    for (i = 0; i < dst_height; ++i) {
-      ScaleRowUp(src_uv + (y >> 16) * (intptr_t)src_stride, dst_uv, dst_width);
+      ScaleRowUp(src_uv + (y >> 16) * src_stride, dst_uv, dst_width);
      dst_uv += dst_stride;
      y += dy;
    }
@ -836,8 +835,8 @@ static void ScaleUVSimple(int src_width,
                          int src_height,
                          int dst_width,
                          int dst_height,
-                          int src_stride,
-                          int dst_stride,
+                          ptrdiff_t src_stride,
+                          ptrdiff_t dst_stride,
                          const uint8_t* src_uv,
                          uint8_t* dst_uv,
                          int x,
@ -872,8 +871,7 @@ static void ScaleUVSimple(int src_width,
  }

  for (j = 0; j < dst_height; ++j) {
-    ScaleUVCols(dst_uv, src_uv + (y >> 16) * (intptr_t)src_stride, dst_width, x,
-                dx);
+    ScaleUVCols(dst_uv, src_uv + (y >> 16) * src_stride, dst_width, x, dx);
    dst_uv += dst_stride;
    y += dy;
  }
@ -893,7 +891,7 @@ static int UVCopy(const uint8_t* src_uv,
  // Negative height means invert the image.
  if (height < 0) {
    height = -height;
-    src_uv = src_uv + (height - 1) * (intptr_t)src_stride_uv;
+    src_uv = src_uv + (height - 1) * (ptrdiff_t)src_stride_uv;
    src_stride_uv = -src_stride_uv;
  }

@ -913,7 +911,7 @@ static int UVCopy_16(const uint16_t* src_uv,
  // Negative height means invert the image.
  if (height < 0) {
    height = -height;
-    src_uv = src_uv + (height - 1) * (intptr_t)src_stride_uv;
+    src_uv = src_uv + (height - 1) * (ptrdiff_t)src_stride_uv;
    src_stride_uv = -src_stride_uv;
  }

@ -951,7 +949,7 @@ static int ScaleUV(const uint8_t* src,
  // Negative src_height means invert the image.
  if (src_height < 0) {
    src_height = -src_height;
-    src = src + (src_height - 1) * (intptr_t)src_stride;
+    src = src + (src_height - 1) * (ptrdiff_t)src_stride;
    src_stride = -src_stride;
  }
  ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
@ -966,8 +964,8 @@ static int ScaleUV(const uint8_t* src,
  if (clip_y) {
    int64_t clipf = (int64_t)(clip_y)*dy;
    y += (clipf & 0xffff);
-    src += (clipf >> 16) * (intptr_t)src_stride;
-    dst += clip_y * dst_stride;
+    src += (clipf >> 16) * (ptrdiff_t)src_stride;
+    dst += clip_y * (ptrdiff_t)dst_stride;
  }

  // Special case for integer step values.
@ -1007,7 +1005,7 @@ static int ScaleUV(const uint8_t* src,
 #ifdef HAS_UVCOPY
        if (dx == 0x10000 && dy == 0x10000) {
          // Straight copy.
-          UVCopy(src + (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2,
+          UVCopy(src + (y >> 16) * (ptrdiff_t)src_stride + (x >> 16) * 2,
                 src_stride, dst, dst_stride, clip_width, clip_height);
          return 0;
        }
@ -1100,7 +1098,7 @@ int UVScale_16(const uint16_t* src_uv,
  // Negative src_height means invert the image.
  if (src_height < 0) {
    src_height = -src_height;
-    src_uv = src_uv + (src_height - 1) * (intptr_t)src_stride_uv;
+    src_uv = src_uv + (src_height - 1) * (ptrdiff_t)src_stride_uv;
    src_stride_uv = -src_stride_uv;
  }
  src_width = Abs(src_width);
@ -1108,13 +1106,16 @@ int UVScale_16(const uint16_t* src_uv,
 #ifdef HAS_UVCOPY
  if (!filtering && src_width == dst_width && (src_height % dst_height == 0)) {
    if (dst_height == 1) {
-      UVCopy_16(src_uv + ((src_height - 1) / 2) * (intptr_t)src_stride_uv,
+      UVCopy_16(src_uv + ((src_height - 1) / 2) * (ptrdiff_t)src_stride_uv,
                src_stride_uv, dst_uv, dst_stride_uv, dst_width, dst_height);
    } else {
      dy = src_height / dst_height;
-      UVCopy_16(src_uv + ((dy - 1) / 2) * (intptr_t)src_stride_uv,
-                (int)(dy * (intptr_t)src_stride_uv), dst_uv, dst_stride_uv,
-                dst_width, dst_height);
+      if (src_stride_uv > INT_MAX / dy) {
+        return -1;
+      }
+      UVCopy_16(src_uv + ((dy - 1) / 2) * (ptrdiff_t)src_stride_uv,
+                dy * src_stride_uv, dst_uv, dst_stride_uv, dst_width,
+                dst_height);
    }

    return 0;
--- a/unit_test/scale_plane_test.cc
+++ b/unit_test/scale_plane_test.cc
@ -42,6 +42,108 @@

 namespace libyuv {

+// POC: int row_stride = src_stride * 2 overflows to a small negative value
+// when src_stride is close to INT_MAX, causing src_ptr to walk backward
+// past the start of the source allocation on the second loop iteration.
+// With src_stride = 0x7FFFFFFE, row_stride = (int)0xFFFFFFFC = -4, so on
+// y=1 ScaleRowDown2Box reads 4 bytes before the heap allocation.
+TEST_F(LibYUVScaleTest, ScalePlaneDown2_RowStrideOverflow) {
+  constexpr int kSrcStride = 0x7FFFFFFE;  // INT_MAX - 1
+  constexpr int kSrcW = 64;
+  constexpr int kSrcH = 4;
+  constexpr int kDstW = 32;
+  constexpr int kDstH = 2;
+  // src_size = (kSrcH - 1) * stride + width.
+  size_t src_size = kSrcH - 1;
+  if (src_size > SIZE_MAX / kSrcStride) {
+    GTEST_SKIP() << "could not represent allocation size in size_t";
+  }
+  src_size *= kSrcStride;
+  if (src_size > SIZE_MAX - kSrcW) {
+    GTEST_SKIP() << "could not represent allocation size in size_t";
+  }
+  src_size += kSrcW;
+
+#if defined(__aarch64__)
+  // Infer malloc can accept a large size for cpu with dot product (a76/a55)
+  int has_large_malloc = TestCpuFlag(kCpuHasNeonDotProd);
+#else
+  int has_large_malloc = 1;
+#endif
+  if (!has_large_malloc) {
+    GTEST_SKIP() << "large allocation may assert for " << src_size << " bytes";
+  }
+
+  uint8_t* src = new (std::nothrow) uint8_t[src_size];
+  if (!src) {
+    GTEST_SKIP() << "could not allocate " << src_size << " bytes";
+  }
+  uint8_t dst[kDstW * kDstH];
+  uint8_t* src_row = src;
+  for (int i = 0; i < kSrcH; i++) {
+    memset(src_row, 0x41, kSrcW);
+    src_row += kSrcStride;
+  }
+  // Force the C row kernel: the SIMD kernels are inline asm that ASAN does not
+  // instrument, so they silently read OOB without a report.
+  MaskCpuFlags(1);
+  // 2*dst == src on both axes -> ScalePlane dispatches to ScalePlaneDown2.
+  // int row_stride = kSrcStride * 2 wraps to -4; on y=1 src_ptr underflows.
+  ScalePlane(src, kSrcStride, kSrcW, kSrcH, dst, kDstW, kDstW, kDstH,
+             kFilterBox);
+  MaskCpuFlags(0);
+  delete[] src;
+}
+
+// POC: same defect in the 1/4 fast path. src_stride = 0x3FFFFFFF gives
+// int row_stride = src_stride * 4 = (int)0xFFFFFFFC = -4.
+TEST_F(LibYUVScaleTest, ScalePlaneDown4_RowStrideOverflow) {
+  constexpr int kSrcStride = 0x3FFFFFFF;  // INT_MAX / 4 (rounded down)
+  constexpr int kSrcW = 64;
+  constexpr int kSrcH = 8;
+  constexpr int kDstW = 16;
+  constexpr int kDstH = 2;
+  // src_size = (kSrcH - 1) * stride + width.
+  size_t src_size = kSrcH - 1;
+  if (src_size > SIZE_MAX / kSrcStride) {
+    GTEST_SKIP() << "could not represent allocation size in size_t";
+  }
+  src_size *= kSrcStride;
+  if (src_size > SIZE_MAX - kSrcW) {
+    GTEST_SKIP() << "could not represent allocation size in size_t";
+  }
+  src_size += kSrcW;
+
+#if defined(__aarch64__)
+  // Infer malloc can accept a large size for cpu with dot product (a76/a55)
+  int has_large_malloc = TestCpuFlag(kCpuHasNeonDotProd);
+#else
+  int has_large_malloc = 1;
+#endif
+  if (!has_large_malloc) {
+    GTEST_SKIP() << "large allocation may assert for " << src_size << " bytes";
+  }
+
+  uint8_t* src = new (std::nothrow) uint8_t[src_size];
+  if (!src) {
+    GTEST_SKIP() << "could not allocate " << src_size << " bytes";
+  }
+  uint8_t dst[kDstW * kDstH];
+  uint8_t* src_row = src;
+  for (int i = 0; i < kSrcH; i++) {
+    memset(src_row, 0x41, kSrcW);
+    src_row += kSrcStride;
+  }
+  // Force the C row kernel: the SIMD kernels are inline asm that ASAN does not
+  // instrument, so they silently read OOB without a report.
+  MaskCpuFlags(1);
+  // 4*dst == src on both axes with kFilterBox -> ScalePlaneDown4.
+  ScalePlane(src, kSrcStride, kSrcW, kSrcH, dst, kDstW, kDstW, kDstH,
+             kFilterBox);
+  MaskCpuFlags(0);
+  delete[] src;
+}
+
 #ifdef ENABLE_ROW_TESTS
 #ifdef HAS_SCALEROWDOWN2_SSSE3
 TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_Odd_SSSE3) {