From ebe6fef90344936e2cd623f727024482fca72a87 Mon Sep 17 00:00:00 2001
From: Wan-Teh Chang <wtc@google.com>
Date: Thu, 28 May 2026 13:56:44 -0700
Subject: [PATCH] Fix integer overflow in multiplications of stride

Audit all occurrences of "stride *" in the libyuv source tree. Ensure
that these multiplications are performed in the ptrdiff_t type.

For functions not declared in a public header (such as static
functions), prefer to declare the stride parameters (typically named
src_stride and dst_stride) and related stride local variables as
ptrdiff_t. If this is not possible, add ptrdiff_t casts to the stride
parameters in multiplications. If intptr_t or int64_t casts were used,
change them to ptrdiff_t casts.

Bug: chromium:516986556
Change-Id: I6cd8a8eb00cbb5380db828bf83e4d89ff95891f3
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/7882967
Commit-Queue: Wan-Teh Chang <wtc@google.com>
Reviewed-by: Frank Barchard <fbarchard@google.com>
---
 source/convert_from.cc        |  30 ++--
 source/planar_functions.cc    |   2 +-
 source/rotate.cc              |  18 +-
 source/rotate_common.cc       |  10 +-
 source/rotate_neon.cc         |  20 +--
 source/rotate_neon64.cc       |  20 +--
 source/row_neon64.cc          |   8 +-
 source/scale.cc               | 150 ++++++++--------
 source/scale_argb.cc          | 327 +++-------------------------------
 source/scale_uv.cc            |  93 +++++-----
 unit_test/scale_plane_test.cc | 102 +++++++++++
 11 files changed, 308 insertions(+), 472 deletions(-)

diff --git a/source/convert_from.cc b/source/convert_from.cc
index 5cf88fa2d..363edc252 100644
--- a/source/convert_from.cc
+++ b/source/convert_from.cc
@@ -10,6 +10,8 @@
 
 #include "libyuv/convert_from.h"
 
+#include <stddef.h>
+
 #include "libyuv/basic_types.h"
 #include "libyuv/convert.h"  // For I420Copy
 #include "libyuv/cpu_id.h"
@@ -782,7 +784,7 @@ int ConvertFromI420(const uint8_t* y,
       break;
     case FOURCC_NV12: {
       int dst_y_stride = dst_sample_stride ? dst_sample_stride : width;
-      uint8_t* dst_uv = dst_sample + dst_y_stride * height;
+      uint8_t* dst_uv = dst_sample + (ptrdiff_t)dst_y_stride * height;
       r = I420ToNV12(y, y_stride, u, u_stride, v, v_stride, dst_sample,
                      dst_sample_stride ? dst_sample_stride : width, dst_uv,
                      dst_sample_stride ? dst_sample_stride : width, width,
@@ -791,7 +793,7 @@ int ConvertFromI420(const uint8_t* y,
     }
     case FOURCC_NV21: {
       int dst_y_stride = dst_sample_stride ? dst_sample_stride : width;
-      uint8_t* dst_vu = dst_sample + dst_y_stride * height;
+      uint8_t* dst_vu = dst_sample + (ptrdiff_t)dst_y_stride * height;
       r = I420ToNV21(y, y_stride, u, u_stride, v, v_stride, dst_sample,
                      dst_sample_stride ? dst_sample_stride : width, dst_vu,
                      dst_sample_stride ? dst_sample_stride : width, width,
@@ -807,11 +809,11 @@ int ConvertFromI420(const uint8_t* y,
       uint8_t* dst_u;
       uint8_t* dst_v;
       if (format == FOURCC_YV12) {
-        dst_v = dst_sample + dst_sample_stride * height;
-        dst_u = dst_v + halfstride * halfheight;
+        dst_v = dst_sample + (ptrdiff_t)dst_sample_stride * height;
+        dst_u = dst_v + (ptrdiff_t)halfstride * halfheight;
       } else {
-        dst_u = dst_sample + dst_sample_stride * height;
-        dst_v = dst_u + halfstride * halfheight;
+        dst_u = dst_sample + (ptrdiff_t)dst_sample_stride * height;
+        dst_v = dst_u + (ptrdiff_t)halfstride * halfheight;
       }
       r = I420Copy(y, y_stride, u, u_stride, v, v_stride, dst_sample,
                    dst_sample_stride, dst_u, halfstride, dst_v, halfstride,
@@ -825,11 +827,11 @@ int ConvertFromI420(const uint8_t* y,
       uint8_t* dst_u;
       uint8_t* dst_v;
       if (format == FOURCC_YV16) {
-        dst_v = dst_sample + dst_sample_stride * height;
-        dst_u = dst_v + halfstride * height;
+        dst_v = dst_sample + (ptrdiff_t)dst_sample_stride * height;
+        dst_u = dst_v + (ptrdiff_t)halfstride * height;
       } else {
-        dst_u = dst_sample + dst_sample_stride * height;
-        dst_v = dst_u + halfstride * height;
+        dst_u = dst_sample + (ptrdiff_t)dst_sample_stride * height;
+        dst_v = dst_u + (ptrdiff_t)halfstride * height;
       }
       r = I420ToI422(y, y_stride, u, u_stride, v, v_stride, dst_sample,
                      dst_sample_stride, dst_u, halfstride, dst_v, halfstride,
@@ -842,11 +844,11 @@ int ConvertFromI420(const uint8_t* y,
       uint8_t* dst_u;
       uint8_t* dst_v;
       if (format == FOURCC_YV24) {
-        dst_v = dst_sample + dst_sample_stride * height;
-        dst_u = dst_v + dst_sample_stride * height;
+        dst_v = dst_sample + (ptrdiff_t)dst_sample_stride * height;
+        dst_u = dst_v + (ptrdiff_t)dst_sample_stride * height;
       } else {
-        dst_u = dst_sample + dst_sample_stride * height;
-        dst_v = dst_u + dst_sample_stride * height;
+        dst_u = dst_sample + (ptrdiff_t)dst_sample_stride * height;
+        dst_v = dst_u + (ptrdiff_t)dst_sample_stride * height;
       }
       r = I420ToI444(y, y_stride, u, u_stride, v, v_stride, dst_sample,
                      dst_sample_stride, dst_u, dst_sample_stride, dst_v,
diff --git a/source/planar_functions.cc b/source/planar_functions.cc
index 149dde3a1..016ea24e0 100644
--- a/source/planar_functions.cc
+++ b/source/planar_functions.cc
@@ -3088,7 +3088,7 @@ int I420Blend(const uint8_t* src_y0,
     }
     // Subsample 2 rows of UV to half width and half height.
     ScaleRowDown2(alpha, alpha_stride, halfalpha, halfwidth);
-    alpha += alpha_stride * 2;
+    alpha += (ptrdiff_t)alpha_stride * 2;
     BlendPlaneRow(src_u0, src_u1, halfalpha, dst_u, halfwidth);
     BlendPlaneRow(src_v0, src_v1, halfalpha, dst_v, halfwidth);
     src_u0 += src_stride_u0;
diff --git a/source/rotate.cc b/source/rotate.cc
index 520806236..d51b313be 100644
--- a/source/rotate.cc
+++ b/source/rotate.cc
@@ -128,7 +128,7 @@ void RotatePlane90(const uint8_t* src,
   // Rotate by 90 is a transpose with the source read
   // from bottom to top. So set the source pointer to the end
   // of the buffer and flip the sign of the source stride.
-  src += src_stride * (height - 1);
+  src += (ptrdiff_t)src_stride * (height - 1);
   src_stride = -src_stride;
   TransposePlane(src, src_stride, dst, dst_stride, width, height);
 }
@@ -143,7 +143,7 @@ void RotatePlane270(const uint8_t* src,
   // Rotate by 270 is a transpose with the destination written
   // from bottom to top. So set the destination pointer to the end
   // of the buffer and flip the sign of the destination stride.
-  dst += dst_stride * (width - 1);
+  dst += (ptrdiff_t)dst_stride * (width - 1);
   dst_stride = -dst_stride;
   TransposePlane(src, src_stride, dst, dst_stride, width, height);
 }
@@ -160,8 +160,8 @@ void RotatePlane180(const uint8_t* src,
   assert(row);
   if (!row)
     return;
-  const uint8_t* src_bot = src + src_stride * (height - 1);
-  uint8_t* dst_bot = dst + dst_stride * (height - 1);
+  const uint8_t* src_bot = src + (ptrdiff_t)src_stride * (height - 1);
+  uint8_t* dst_bot = dst + (ptrdiff_t)dst_stride * (height - 1);
   int half_height = (height + 1) >> 1;
   int y;
   void (*MirrorRow)(const uint8_t* src, uint8_t* dst, int width) = MirrorRow_C;
@@ -354,7 +354,7 @@ void SplitRotateUV90(const uint8_t* src,
                      int dst_stride_b,
                      int width,
                      int height) {
-  src += src_stride * (height - 1);
+  src += (ptrdiff_t)src_stride * (height - 1);
   src_stride = -src_stride;
 
   SplitTransposeUV(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b,
@@ -533,7 +533,7 @@ static void RotatePlane90_16(const uint16_t* src,
   // Rotate by 90 is a transpose with the source read
   // from bottom to top. So set the source pointer to the end
   // of the buffer and flip the sign of the source stride.
-  src += src_stride * (height - 1);
+  src += (ptrdiff_t)src_stride * (height - 1);
   src_stride = -src_stride;
   TransposePlane_16(src, src_stride, dst, dst_stride, width, height);
 }
@@ -547,7 +547,7 @@ static void RotatePlane270_16(const uint16_t* src,
   // Rotate by 270 is a transpose with the destination written
   // from bottom to top. So set the destination pointer to the end
   // of the buffer and flip the sign of the destination stride.
-  dst += dst_stride * (width - 1);
+  dst += (ptrdiff_t)dst_stride * (width - 1);
   dst_stride = -dst_stride;
   TransposePlane_16(src, src_stride, dst, dst_stride, width, height);
 }
@@ -558,8 +558,8 @@ static void RotatePlane180_16(const uint16_t* src,
                               int dst_stride,
                               int width,
                               int height) {
-  const uint16_t* src_bot = src + src_stride * (height - 1);
-  uint16_t* dst_bot = dst + dst_stride * (height - 1);
+  const uint16_t* src_bot = src + (ptrdiff_t)src_stride * (height - 1);
+  uint16_t* dst_bot = dst + (ptrdiff_t)dst_stride * (height - 1);
   int half_height = (height + 1) >> 1;
   int y;
 
diff --git a/source/rotate_common.cc b/source/rotate_common.cc
index e0341fec4..ad420489c 100644
--- a/source/rotate_common.cc
+++ b/source/rotate_common.cc
@@ -8,6 +8,8 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
+#include <stddef.h>
+
 #include "libyuv/rotate_row.h"
 #include "libyuv/row.h"
 
@@ -191,10 +193,10 @@ void Transpose4x4_32_C(const uint8_t* src,
     ((uint32_t*)(dst3))[1] = p31;
     ((uint32_t*)(dst3))[2] = p32;
     ((uint32_t*)(dst3))[3] = p33;
-    src += src_stride * 4;  // advance 4 rows
-    src1 += src_stride * 4;
-    src2 += src_stride * 4;
-    src3 += src_stride * 4;
+    src += (ptrdiff_t)src_stride * 4;  // advance 4 rows
+    src1 += (ptrdiff_t)src_stride * 4;
+    src2 += (ptrdiff_t)src_stride * 4;
+    src3 += (ptrdiff_t)src_stride * 4;
     dst += 4 * 4;  // advance 4 columns
     dst1 += 4 * 4;
     dst2 += 4 * 4;
diff --git a/source/rotate_neon.cc b/source/rotate_neon.cc
index 27bd2251b..de14c41b0 100644
--- a/source/rotate_neon.cc
+++ b/source/rotate_neon.cc
@@ -198,16 +198,16 @@ void Transpose4x4_32_NEON(const uint8_t* src,
       "vst1.8      {q3}, [%7]!                   \n"
       "bgt         1b                            \n"
 
-      : "+r"(src),                        // %0
-        "+r"(src1),                       // %1
-        "+r"(src2),                       // %2
-        "+r"(src3),                       // %3
-        "+r"(dst),                        // %4
-        "+r"(dst1),                       // %5
-        "+r"(dst2),                       // %6
-        "+r"(dst3),                       // %7
-        "+r"(width)                       // %8
-      : "r"((ptrdiff_t)(src_stride * 4))  // %9
+      : "+r"(src),                      // %0
+        "+r"(src1),                     // %1
+        "+r"(src2),                     // %2
+        "+r"(src3),                     // %3
+        "+r"(dst),                      // %4
+        "+r"(dst1),                     // %5
+        "+r"(dst2),                     // %6
+        "+r"(dst3),                     // %7
+        "+r"(width)                     // %8
+      : "r"((ptrdiff_t)src_stride * 4)  // %9
       : "memory", "cc", "q0", "q1", "q2", "q3");
 }
 
diff --git a/source/rotate_neon64.cc b/source/rotate_neon64.cc
index e09bcb178..14f31d94c 100644
--- a/source/rotate_neon64.cc
+++ b/source/rotate_neon64.cc
@@ -252,16 +252,16 @@ void Transpose4x4_32_NEON(const uint8_t* src,
       "st1         {v2.4s}, [%6], 16             \n"
       "st1         {v3.4s}, [%7], 16             \n"
       "b.gt        1b                            \n"
-      : "+r"(src),                        // %0
-        "+r"(src1),                       // %1
-        "+r"(src2),                       // %2
-        "+r"(src3),                       // %3
-        "+r"(dst),                        // %4
-        "+r"(dst1),                       // %5
-        "+r"(dst2),                       // %6
-        "+r"(dst3),                       // %7
-        "+r"(width)                       // %8
-      : "r"((ptrdiff_t)(src_stride * 4))  // %9
+      : "+r"(src),                      // %0
+        "+r"(src1),                     // %1
+        "+r"(src2),                     // %2
+        "+r"(src3),                     // %3
+        "+r"(dst),                      // %4
+        "+r"(dst1),                     // %5
+        "+r"(dst2),                     // %6
+        "+r"(dst3),                     // %7
+        "+r"(width)                     // %8
+      : "r"((ptrdiff_t)src_stride * 4)  // %9
       : "memory", "cc", "v0", "v1", "v2", "v3");
 }
 
diff --git a/source/row_neon64.cc b/source/row_neon64.cc
index 4eed2df12..65d7b65a5 100644
--- a/source/row_neon64.cc
+++ b/source/row_neon64.cc
@@ -4751,10 +4751,10 @@ void ConvertFP16ToFP32Column_NEON(const uint16_t* src,  // fp16
       "str         s2, [%1], #4                  \n"  // store 1 floats
       "b.gt        2b                            \n"
       "3:          \n"
-      : "+r"(src),                        // %0
-        "+r"(dst),                        // %1
-        "+r"(width)                       // %2
-      : "r"((ptrdiff_t)(src_stride * 2))  // %3
+      : "+r"(src),                      // %0
+        "+r"(dst),                      // %1
+        "+r"(width)                     // %2
+      : "r"((ptrdiff_t)src_stride * 2)  // %3
       : "cc", "memory", "v0", "v1", "v2", "v3");
 }
 
diff --git a/source/scale.cc b/source/scale.cc
index 7e762a1f4..a78356328 100644
--- a/source/scale.cc
+++ b/source/scale.cc
@@ -39,8 +39,8 @@ static void ScalePlaneDown2(int src_width,
                             int src_height,
                             int dst_width,
                             int dst_height,
-                            int src_stride,
-                            int dst_stride,
+                            ptrdiff_t src_stride,
+                            ptrdiff_t dst_stride,
                             const uint8_t* src_ptr,
                             uint8_t* dst_ptr,
                             enum FilterMode filtering) {
@@ -51,7 +51,7 @@ static void ScalePlaneDown2(int src_width,
           ? ScaleRowDown2_C
           : (filtering == kFilterLinear ? ScaleRowDown2Linear_C
                                         : ScaleRowDown2Box_C);
-  int row_stride = src_stride * 2;
+  ptrdiff_t row_stride = src_stride * 2;
   (void)src_width;
   (void)src_height;
   if (!filtering) {
@@ -151,8 +151,8 @@ static void ScalePlaneDown2_16(int src_width,
                                int src_height,
                                int dst_width,
                                int dst_height,
-                               int src_stride,
-                               int dst_stride,
+                               ptrdiff_t src_stride,
+                               ptrdiff_t dst_stride,
                                const uint16_t* src_ptr,
                                uint16_t* dst_ptr,
                                enum FilterMode filtering) {
@@ -163,7 +163,7 @@ static void ScalePlaneDown2_16(int src_width,
           ? ScaleRowDown2_16_C
           : (filtering == kFilterLinear ? ScaleRowDown2Linear_16_C
                                         : ScaleRowDown2Box_16_C);
-  int row_stride = src_stride * 2;
+  ptrdiff_t row_stride = src_stride * 2;
   (void)src_width;
   (void)src_height;
   if (!filtering) {
@@ -228,7 +228,7 @@ void ScalePlaneDown2_16To8(int src_width,
                  ? ScaleRowDown2_16To8_C
                  : (filtering == kFilterLinear ? ScaleRowDown2Linear_16To8_C
                                                : ScaleRowDown2Box_16To8_C));
-  int row_stride = src_stride * 2;
+  ptrdiff_t row_stride = (ptrdiff_t)src_stride * 2;
   (void)dst_height;
   if (!filtering) {
     src_ptr += src_stride;  // Point to odd rows.
@@ -259,8 +259,8 @@ static void ScalePlaneDown4(int src_width,
                             int src_height,
                             int dst_width,
                             int dst_height,
-                            int src_stride,
-                            int dst_stride,
+                            ptrdiff_t src_stride,
+                            ptrdiff_t dst_stride,
                             const uint8_t* src_ptr,
                             uint8_t* dst_ptr,
                             enum FilterMode filtering) {
@@ -268,7 +268,7 @@ static void ScalePlaneDown4(int src_width,
   void (*ScaleRowDown4)(const uint8_t* src_ptr, ptrdiff_t src_stride,
                         uint8_t* dst_ptr, int dst_width) =
       filtering ? ScaleRowDown4Box_C : ScaleRowDown4_C;
-  int row_stride = src_stride * 4;
+  ptrdiff_t row_stride = src_stride * 4;
   (void)src_width;
   (void)src_height;
   if (!filtering) {
@@ -331,8 +331,8 @@ static void ScalePlaneDown4_16(int src_width,
                                int src_height,
                                int dst_width,
                                int dst_height,
-                               int src_stride,
-                               int dst_stride,
+                               ptrdiff_t src_stride,
+                               ptrdiff_t dst_stride,
                                const uint16_t* src_ptr,
                                uint16_t* dst_ptr,
                                enum FilterMode filtering) {
@@ -340,7 +340,7 @@ static void ScalePlaneDown4_16(int src_width,
   void (*ScaleRowDown4)(const uint16_t* src_ptr, ptrdiff_t src_stride,
                         uint16_t* dst_ptr, int dst_width) =
       filtering ? ScaleRowDown4Box_16_C : ScaleRowDown4_16_C;
-  int row_stride = src_stride * 4;
+  ptrdiff_t row_stride = src_stride * 4;
   (void)src_width;
   (void)src_height;
   if (!filtering) {
@@ -375,8 +375,8 @@ static void ScalePlaneDown34(int src_width,
                              int src_height,
                              int dst_width,
                              int dst_height,
-                             int src_stride,
-                             int dst_stride,
+                             ptrdiff_t src_stride,
+                             ptrdiff_t dst_stride,
                              const uint8_t* src_ptr,
                              uint8_t* dst_ptr,
                              enum FilterMode filtering) {
@@ -385,7 +385,7 @@ static void ScalePlaneDown34(int src_width,
                            uint8_t* dst_ptr, int dst_width);
   void (*ScaleRowDown34_1)(const uint8_t* src_ptr, ptrdiff_t src_stride,
                            uint8_t* dst_ptr, int dst_width);
-  const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
+  const ptrdiff_t filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
   (void)src_width;
   (void)src_height;
   assert(dst_width % 3 == 0);
@@ -502,8 +502,8 @@ static void ScalePlaneDown34_16(int src_width,
                                 int src_height,
                                 int dst_width,
                                 int dst_height,
-                                int src_stride,
-                                int dst_stride,
+                                ptrdiff_t src_stride,
+                                ptrdiff_t dst_stride,
                                 const uint16_t* src_ptr,
                                 uint16_t* dst_ptr,
                                 enum FilterMode filtering) {
@@ -512,7 +512,7 @@ static void ScalePlaneDown34_16(int src_width,
                            uint16_t* dst_ptr, int dst_width);
   void (*ScaleRowDown34_1)(const uint16_t* src_ptr, ptrdiff_t src_stride,
                            uint16_t* dst_ptr, int dst_width);
-  const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
+  const ptrdiff_t filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
   (void)src_width;
   (void)src_height;
   assert(dst_width % 3 == 0);
@@ -588,8 +588,8 @@ static void ScalePlaneDown38(int src_width,
                              int src_height,
                              int dst_width,
                              int dst_height,
-                             int src_stride,
-                             int dst_stride,
+                             ptrdiff_t src_stride,
+                             ptrdiff_t dst_stride,
                              const uint8_t* src_ptr,
                              uint8_t* dst_ptr,
                              enum FilterMode filtering) {
@@ -598,7 +598,7 @@ static void ScalePlaneDown38(int src_width,
                            uint8_t* dst_ptr, int dst_width);
   void (*ScaleRowDown38_2)(const uint8_t* src_ptr, ptrdiff_t src_stride,
                            uint8_t* dst_ptr, int dst_width);
-  const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
+  const ptrdiff_t filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
   assert(dst_width % 3 == 0);
   (void)src_width;
   (void)src_height;
@@ -708,8 +708,8 @@ static void ScalePlaneDown38_16(int src_width,
                                 int src_height,
                                 int dst_width,
                                 int dst_height,
-                                int src_stride,
-                                int dst_stride,
+                                ptrdiff_t src_stride,
+                                ptrdiff_t dst_stride,
                                 const uint16_t* src_ptr,
                                 uint16_t* dst_ptr,
                                 enum FilterMode filtering) {
@@ -718,7 +718,7 @@ static void ScalePlaneDown38_16(int src_width,
                            uint16_t* dst_ptr, int dst_width);
   void (*ScaleRowDown38_2)(const uint16_t* src_ptr, ptrdiff_t src_stride,
                            uint16_t* dst_ptr, int dst_width);
-  const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
+  const ptrdiff_t filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
   (void)src_width;
   (void)src_height;
   assert(dst_width % 3 == 0);
@@ -901,8 +901,8 @@ static int ScalePlaneBox(int src_width,
                          int src_height,
                          int dst_width,
                          int dst_height,
-                         int src_stride,
-                         int dst_stride,
+                         ptrdiff_t src_stride,
+                         ptrdiff_t dst_stride,
                          const uint8_t* src_ptr,
                          uint8_t* dst_ptr) {
   int j, k;
@@ -967,7 +967,7 @@ static int ScalePlaneBox(int src_width,
     for (j = 0; j < dst_height; ++j) {
       int boxheight;
       int iy = y >> 16;
-      const uint8_t* src = src_ptr + iy * (int64_t)src_stride;
+      const uint8_t* src = src_ptr + iy * src_stride;
       y += dy;
       if (y > max_y) {
         y = max_y;
@@ -990,8 +990,8 @@ static int ScalePlaneBox_16(int src_width,
                             int src_height,
                             int dst_width,
                             int dst_height,
-                            int src_stride,
-                            int dst_stride,
+                            ptrdiff_t src_stride,
+                            ptrdiff_t dst_stride,
                             const uint16_t* src_ptr,
                             uint16_t* dst_ptr) {
   int j, k;
@@ -1024,7 +1024,7 @@ static int ScalePlaneBox_16(int src_width,
     for (j = 0; j < dst_height; ++j) {
       int boxheight;
       int iy = y >> 16;
-      const uint16_t* src = src_ptr + iy * (int64_t)src_stride;
+      const uint16_t* src = src_ptr + iy * src_stride;
       y += dy;
       if (y > max_y) {
         y = max_y;
@@ -1048,8 +1048,8 @@ static int ScalePlaneBilinearDown(int src_width,
                                   int src_height,
                                   int dst_width,
                                   int dst_height,
-                                  int src_stride,
-                                  int dst_stride,
+                                  ptrdiff_t src_stride,
+                                  ptrdiff_t dst_stride,
                                   const uint8_t* src_ptr,
                                   uint8_t* dst_ptr,
                                   enum FilterMode filtering) {
@@ -1138,7 +1138,7 @@ static int ScalePlaneBilinearDown(int src_width,
 
   for (j = 0; j < dst_height; ++j) {
     int yi = y >> 16;
-    const uint8_t* src = src_ptr + yi * (int64_t)src_stride;
+    const uint8_t* src = src_ptr + yi * src_stride;
     if (filtering == kFilterLinear) {
       ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
     } else {
@@ -1160,8 +1160,8 @@ static int ScalePlaneBilinearDown_16(int src_width,
                                      int src_height,
                                      int dst_width,
                                      int dst_height,
-                                     int src_stride,
-                                     int dst_stride,
+                                     ptrdiff_t src_stride,
+                                     ptrdiff_t dst_stride,
                                      const uint16_t* src_ptr,
                                      uint16_t* dst_ptr,
                                      enum FilterMode filtering) {
@@ -1229,7 +1229,7 @@ static int ScalePlaneBilinearDown_16(int src_width,
 
   for (j = 0; j < dst_height; ++j) {
     int yi = y >> 16;
-    const uint16_t* src = src_ptr + yi * (int64_t)src_stride;
+    const uint16_t* src = src_ptr + yi * src_stride;
     if (filtering == kFilterLinear) {
       ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
     } else {
@@ -1252,8 +1252,8 @@ static int ScalePlaneBilinearUp(int src_width,
                                 int src_height,
                                 int dst_width,
                                 int dst_height,
-                                int src_stride,
-                                int dst_stride,
+                                ptrdiff_t src_stride,
+                                ptrdiff_t dst_stride,
                                 const uint8_t* src_ptr,
                                 uint8_t* dst_ptr,
                                 enum FilterMode filtering) {
@@ -1339,7 +1339,7 @@ static int ScalePlaneBilinearUp(int src_width,
   }
   {
     int yi = y >> 16;
-    const uint8_t* src = src_ptr + yi * (int64_t)src_stride;
+    const uint8_t* src = src_ptr + yi * src_stride;
 
     // Allocate 2 row buffers.
     const int row_size = (dst_width + 31) & ~31;
@@ -1348,7 +1348,7 @@ static int ScalePlaneBilinearUp(int src_width,
       return 1;
 
     uint8_t* rowptr = row;
-    int rowstride = row_size;
+    ptrdiff_t rowstride = row_size;
     int lasty = yi;
 
     ScaleFilterCols(rowptr, src, dst_width, x, dx);
@@ -1366,7 +1366,7 @@ static int ScalePlaneBilinearUp(int src_width,
         if (y > max_y) {
           y = max_y;
           yi = y >> 16;
-          src = src_ptr + yi * (int64_t)src_stride;
+          src = src_ptr + yi * src_stride;
         }
         if (yi != lasty) {
           ScaleFilterCols(rowptr, src, dst_width, x, dx);
@@ -1401,8 +1401,8 @@ static void ScalePlaneUp2_Linear(int src_width,
                                  int src_height,
                                  int dst_width,
                                  int dst_height,
-                                 int src_stride,
-                                 int dst_stride,
+                                 ptrdiff_t src_stride,
+                                 ptrdiff_t dst_stride,
                                  const uint8_t* src_ptr,
                                  uint8_t* dst_ptr) {
   void (*ScaleRowUp)(const uint8_t* src_ptr, uint8_t* dst_ptr, int dst_width) =
@@ -1445,13 +1445,13 @@ static void ScalePlaneUp2_Linear(int src_width,
 #endif
 
   if (dst_height == 1) {
-    ScaleRowUp(src_ptr + ((src_height - 1) / 2) * (int64_t)src_stride, dst_ptr,
+    ScaleRowUp(src_ptr + ((src_height - 1) / 2) * src_stride, dst_ptr,
                dst_width);
   } else {
     dy = FixedDiv(src_height - 1, dst_height - 1);
     y = (1 << 15) - 1;
     for (i = 0; i < dst_height; ++i) {
-      ScaleRowUp(src_ptr + (y >> 16) * (int64_t)src_stride, dst_ptr, dst_width);
+      ScaleRowUp(src_ptr + (y >> 16) * src_stride, dst_ptr, dst_width);
       dst_ptr += dst_stride;
       y += dy;
     }
@@ -1466,8 +1466,8 @@ static void ScalePlaneUp2_Bilinear(int src_width,
                                    int src_height,
                                    int dst_width,
                                    int dst_height,
-                                   int src_stride,
-                                   int dst_stride,
+                                   ptrdiff_t src_stride,
+                                   ptrdiff_t dst_stride,
                                    const uint8_t* src_ptr,
                                    uint8_t* dst_ptr) {
   void (*Scale2RowUp)(const uint8_t* src_ptr, ptrdiff_t src_stride,
@@ -1532,8 +1532,8 @@ static void ScalePlaneUp2_12_Linear(int src_width,
                                     int src_height,
                                     int dst_width,
                                     int dst_height,
-                                    int src_stride,
-                                    int dst_stride,
+                                    ptrdiff_t src_stride,
+                                    ptrdiff_t dst_stride,
                                     const uint16_t* src_ptr,
                                     uint16_t* dst_ptr) {
   void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr,
@@ -1565,13 +1565,13 @@ static void ScalePlaneUp2_12_Linear(int src_width,
 #endif
 
   if (dst_height == 1) {
-    ScaleRowUp(src_ptr + ((src_height - 1) / 2) * (int64_t)src_stride, dst_ptr,
+    ScaleRowUp(src_ptr + ((src_height - 1) / 2) * src_stride, dst_ptr,
                dst_width);
   } else {
     dy = FixedDiv(src_height - 1, dst_height - 1);
     y = (1 << 15) - 1;
     for (i = 0; i < dst_height; ++i) {
-      ScaleRowUp(src_ptr + (y >> 16) * (int64_t)src_stride, dst_ptr, dst_width);
+      ScaleRowUp(src_ptr + (y >> 16) * src_stride, dst_ptr, dst_width);
       dst_ptr += dst_stride;
       y += dy;
     }
@@ -1587,8 +1587,8 @@ static void ScalePlaneUp2_12_Bilinear(int src_width,
                                       int src_height,
                                       int dst_width,
                                       int dst_height,
-                                      int src_stride,
-                                      int dst_stride,
+                                      ptrdiff_t src_stride,
+                                      ptrdiff_t dst_stride,
                                       const uint16_t* src_ptr,
                                       uint16_t* dst_ptr) {
   void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride,
@@ -1635,8 +1635,8 @@ static void ScalePlaneUp2_16_Linear(int src_width,
                                     int src_height,
                                     int dst_width,
                                     int dst_height,
-                                    int src_stride,
-                                    int dst_stride,
+                                    ptrdiff_t src_stride,
+                                    ptrdiff_t dst_stride,
                                     const uint16_t* src_ptr,
                                     uint16_t* dst_ptr) {
   void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr,
@@ -1668,13 +1668,13 @@ static void ScalePlaneUp2_16_Linear(int src_width,
 #endif
 
   if (dst_height == 1) {
-    ScaleRowUp(src_ptr + ((src_height - 1) / 2) * (int64_t)src_stride, dst_ptr,
+    ScaleRowUp(src_ptr + ((src_height - 1) / 2) * src_stride, dst_ptr,
                dst_width);
   } else {
     dy = FixedDiv(src_height - 1, dst_height - 1);
     y = (1 << 15) - 1;
     for (i = 0; i < dst_height; ++i) {
-      ScaleRowUp(src_ptr + (y >> 16) * (int64_t)src_stride, dst_ptr, dst_width);
+      ScaleRowUp(src_ptr + (y >> 16) * src_stride, dst_ptr, dst_width);
       dst_ptr += dst_stride;
       y += dy;
     }
@@ -1685,8 +1685,8 @@ static void ScalePlaneUp2_16_Bilinear(int src_width,
                                       int src_height,
                                       int dst_width,
                                       int dst_height,
-                                      int src_stride,
-                                      int dst_stride,
+                                      ptrdiff_t src_stride,
+                                      ptrdiff_t dst_stride,
                                       const uint16_t* src_ptr,
                                       uint16_t* dst_ptr) {
   void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride,
@@ -1733,8 +1733,8 @@ static int ScalePlaneBilinearUp_16(int src_width,
                                    int src_height,
                                    int dst_width,
                                    int dst_height,
-                                   int src_stride,
-                                   int dst_stride,
+                                   ptrdiff_t src_stride,
+                                   ptrdiff_t dst_stride,
                                    const uint16_t* src_ptr,
                                    uint16_t* dst_ptr,
                                    enum FilterMode filtering) {
@@ -1806,12 +1806,12 @@ static int ScalePlaneBilinearUp_16(int src_width,
   }
   {
     int yi = y >> 16;
-    const uint16_t* src = src_ptr + yi * (int64_t)src_stride;
+    const uint16_t* src = src_ptr + yi * src_stride;
 
     // Allocate 2 row buffers.
     const int row_size = (dst_width + 31) & ~31;
     align_buffer_64(row, row_size * 4);
-    int rowstride = row_size;
+    ptrdiff_t rowstride = row_size;
     int lasty = yi;
     uint16_t* rowptr = (uint16_t*)row;
     if (!row)
@@ -1832,7 +1832,7 @@ static int ScalePlaneBilinearUp_16(int src_width,
         if (y > max_y) {
           y = max_y;
           yi = y >> 16;
-          src = src_ptr + yi * (int64_t)src_stride;
+          src = src_ptr + yi * src_stride;
         }
         if (yi != lasty) {
           ScaleFilterCols(rowptr, src, dst_width, x, dx);
@@ -1867,8 +1867,8 @@ static void ScalePlaneSimple(int src_width,
                              int src_height,
                              int dst_width,
                              int dst_height,
-                             int src_stride,
-                             int dst_stride,
+                             ptrdiff_t src_stride,
+                             ptrdiff_t dst_stride,
                              const uint8_t* src_ptr,
                              uint8_t* dst_ptr) {
   int i;
@@ -1893,8 +1893,7 @@ static void ScalePlaneSimple(int src_width,
   }
 
   for (i = 0; i < dst_height; ++i) {
-    ScaleCols(dst_ptr, src_ptr + (y >> 16) * (int64_t)src_stride, dst_width, x,
-              dx);
+    ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, dst_width, x, dx);
     dst_ptr += dst_stride;
     y += dy;
   }
@@ -1904,8 +1903,8 @@ static void ScalePlaneSimple_16(int src_width,
                                 int src_height,
                                 int dst_width,
                                 int dst_height,
-                                int src_stride,
-                                int dst_stride,
+                                ptrdiff_t src_stride,
+                                ptrdiff_t dst_stride,
                                 const uint16_t* src_ptr,
                                 uint16_t* dst_ptr) {
   int i;
@@ -1930,8 +1929,7 @@ static void ScalePlaneSimple_16(int src_width,
   }
 
   for (i = 0; i < dst_height; ++i) {
-    ScaleCols(dst_ptr, src_ptr + (y >> 16) * (int64_t)src_stride, dst_width, x,
-              dx);
+    ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, dst_width, x, dx);
     dst_ptr += dst_stride;
     y += dy;
   }
@@ -1956,7 +1954,7 @@ int ScalePlane(const uint8_t* src,
   // Negative height means invert the image.
   if (src_height < 0) {
     src_height = -src_height;
-    src = src + (src_height - 1) * (int64_t)src_stride;
+    src = src + (src_height - 1) * (ptrdiff_t)src_stride;
     src_stride = -src_stride;
   }
   // Use specialized scales to improve performance for common resolutions.
@@ -2056,7 +2054,7 @@ int ScalePlane_16(const uint16_t* src,
   // Negative height means invert the image.
   if (src_height < 0) {
     src_height = -src_height;
-    src = src + (src_height - 1) * (int64_t)src_stride;
+    src = src + (src_height - 1) * (ptrdiff_t)src_stride;
     src_stride = -src_stride;
   }
   // Use specialized scales to improve performance for common resolutions.
@@ -2160,7 +2158,7 @@ int ScalePlane_12(const uint16_t* src,
   // Negative height means invert the image.
   if (src_height < 0) {
     src_height = -src_height;
-    src = src + (src_height - 1) * (int64_t)src_stride;
+    src = src + (src_height - 1) * (ptrdiff_t)src_stride;
     src_stride = -src_stride;
   }
 
diff --git a/source/scale_argb.cc b/source/scale_argb.cc
index eb6caa538..760293d0d 100644
--- a/source/scale_argb.cc
+++ b/source/scale_argb.cc
@@ -38,8 +38,8 @@ static void ScaleARGBDown2(int src_width,
                            int src_height,
                            int dst_width,
                            int dst_height,
-                           int src_stride,
-                           int dst_stride,
+                           ptrdiff_t src_stride,
+                           ptrdiff_t dst_stride,
                            const uint8_t* src_argb,
                            uint8_t* dst_argb,
                            int x,
@@ -48,7 +48,7 @@ static void ScaleARGBDown2(int src_width,
                            int dy,
                            enum FilterMode filtering) {
   int j;
-  int row_stride = src_stride * (dy >> 16);
+  ptrdiff_t row_stride = src_stride * (dy >> 16);
   void (*ScaleARGBRowDown2)(const uint8_t* src_argb, ptrdiff_t src_stride,
                             uint8_t* dst_argb, int dst_width) =
       filtering == kFilterNone
@@ -62,9 +62,9 @@ static void ScaleARGBDown2(int src_width,
   assert((dy & 0x1ffff) == 0);  // Test vertical scale is multiple of 2.
   // Advance to odd row, even column.
   if (filtering == kFilterBilinear) {
-    src_argb += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4;
+    src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
   } else {
-    src_argb += (y >> 16) * (intptr_t)src_stride + ((x >> 16) - 1) * 4;
+    src_argb += (y >> 16) * src_stride + ((x >> 16) - 1) * 4;
   }
 
 #if defined(HAS_SCALEARGBROWDOWN2_SSE2)
@@ -152,8 +152,8 @@ static int ScaleARGBDown4Box(int src_width,
                              int src_height,
                              int dst_width,
                              int dst_height,
-                             int src_stride,
-                             int dst_stride,
+                             ptrdiff_t src_stride,
+                             ptrdiff_t dst_stride,
                              const uint8_t* src_argb,
                              uint8_t* dst_argb,
                              int x,
@@ -169,12 +169,12 @@ static int ScaleARGBDown4Box(int src_width,
   align_buffer_64(row, row_size * 2);
   if (!row)
     return 1;
-  int row_stride = src_stride * (dy >> 16);
+  ptrdiff_t row_stride = src_stride * (dy >> 16);
   void (*ScaleARGBRowDown2)(const uint8_t* src_argb, ptrdiff_t src_stride,
                             uint8_t* dst_argb, int dst_width) =
       ScaleARGBRowDown2Box_C;
   // Advance to odd row, even column.
-  src_argb += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4;
+  src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
   (void)src_width;
   (void)src_height;
   (void)dx;
@@ -226,8 +226,8 @@ static void ScaleARGBDownEven(int src_width,
                               int src_height,
                               int dst_width,
                               int dst_height,
-                              int src_stride,
-                              int dst_stride,
+                              ptrdiff_t src_stride,
+                              ptrdiff_t dst_stride,
                               const uint8_t* src_argb,
                               uint8_t* dst_argb,
                               int x,
@@ -237,7 +237,7 @@ static void ScaleARGBDownEven(int src_width,
                               enum FilterMode filtering) {
   int j;
   int col_step = dx >> 16;
-  ptrdiff_t row_stride = (ptrdiff_t)((dy >> 16) * (intptr_t)src_stride);
+  ptrdiff_t row_stride = (dy >> 16) * src_stride;
   void (*ScaleARGBRowDownEven)(const uint8_t* src_argb, ptrdiff_t src_stride,
                                int src_step, uint8_t* dst_argb, int dst_width) =
       filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C;
@@ -245,7 +245,7 @@ static void ScaleARGBDownEven(int src_width,
   (void)src_height;
   assert(IS_ALIGNED(src_width, 2));
   assert(IS_ALIGNED(src_height, 2));
-  src_argb += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4;
+  src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
 #if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
   if (TestCpuFlag(kCpuHasSSE2)) {
     ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_SSE2
@@ -302,8 +302,8 @@ static int ScaleARGBBilinearDown(int src_width,
                                  int src_height,
                                  int dst_width,
                                  int dst_height,
-                                 int src_stride,
-                                 int dst_stride,
+                                 ptrdiff_t src_stride,
+                                 ptrdiff_t dst_stride,
                                  const uint8_t* src_argb,
                                  uint8_t* dst_argb,
                                  int x,
@@ -405,7 +405,7 @@ static int ScaleARGBBilinearDown(int src_width,
     }
     for (j = 0; j < dst_height; ++j) {
       int yi = y >> 16;
-      const uint8_t* src = src_argb + yi * (intptr_t)src_stride;
+      const uint8_t* src = src_argb + yi * src_stride;
       if (filtering == kFilterLinear) {
         ScaleARGBFilterCols(dst_argb, src, dst_width, x, dx);
       } else {
@@ -429,8 +429,8 @@ static int ScaleARGBBilinearUp(int src_width,
                                int src_height,
                                int dst_width,
                                int dst_height,
-                               int src_stride,
-                               int dst_stride,
+                               ptrdiff_t src_stride,
+                               ptrdiff_t dst_stride,
                                const uint8_t* src_argb,
                                uint8_t* dst_argb,
                                int x,
@@ -546,7 +546,7 @@ static int ScaleARGBBilinearUp(int src_width,
 
   {
     int yi = y >> 16;
-    const uint8_t* src = src_argb + yi * (intptr_t)src_stride;
+    const uint8_t* src = src_argb + yi * src_stride;
 
     // Allocate 2 rows of ARGB.
     const int row_size = (dst_width * 4 + 31) & ~31;
@@ -555,7 +555,7 @@ static int ScaleARGBBilinearUp(int src_width,
       return 1;
 
     uint8_t* rowptr = row;
-    int rowstride = row_size;
+    ptrdiff_t rowstride = row_size;
     int lasty = yi;
 
     ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
@@ -573,7 +573,7 @@ static int ScaleARGBBilinearUp(int src_width,
         if (y > max_y) {
           y = max_y;
           yi = y >> 16;
-          src = src_argb + yi * (intptr_t)src_stride;
+          src = src_argb + yi * src_stride;
         }
         if (yi != lasty) {
           ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
@@ -599,275 +599,6 @@ static int ScaleARGBBilinearUp(int src_width,
   return 0;
 }
 
-#ifdef YUVSCALEUP
-// Scale YUV to ARGB up with bilinear interpolation.
-static int ScaleYUVToARGBBilinearUp(int src_width,
-                                    int src_height,
-                                    int dst_width,
-                                    int dst_height,
-                                    int src_stride_y,
-                                    int src_stride_u,
-                                    int src_stride_v,
-                                    int dst_stride_argb,
-                                    const uint8_t* src_y,
-                                    const uint8_t* src_u,
-                                    const uint8_t* src_v,
-                                    uint8_t* dst_argb,
-                                    int x,
-                                    int dx,
-                                    int y,
-                                    int dy,
-                                    enum FilterMode filtering) {
-  int j;
-  void (*I422ToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf,
-                        const uint8_t* v_buf, uint8_t* rgb_buf, int width) =
-      I422ToARGBRow_C;
-#if defined(HAS_I422TOARGBROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
-    I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
-    if (IS_ALIGNED(src_width, 8)) {
-      I422ToARGBRow = I422ToARGBRow_SSSE3;
-    }
-  }
-#endif
-#if defined(HAS_I422TOARGBROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2)) {
-    I422ToARGBRow = I422ToARGBRow_Any_AVX2;
-    if (IS_ALIGNED(src_width, 16)) {
-      I422ToARGBRow = I422ToARGBRow_AVX2;
-    }
-  }
-#endif
-#if defined(HAS_I422TOARGBROW_AVX512BW)
-  if (TestCpuFlag(kCpuHasAVX512BW | kCpuHasAVX512VL) ==
-      (kCpuHasAVX512BW | kCpuHasAVX512VL)) {
-    I422ToARGBRow = I422ToARGBRow_Any_AVX512BW;
-    if (IS_ALIGNED(src_width, 32)) {
-      I422ToARGBRow = I422ToARGBRow_AVX512BW;
-    }
-  }
-#endif
-#if defined(HAS_I422TOARGBROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
-    I422ToARGBRow = I422ToARGBRow_Any_NEON;
-    if (IS_ALIGNED(src_width, 8)) {
-      I422ToARGBRow = I422ToARGBRow_NEON;
-    }
-  }
-#endif
-#if defined(HAS_I422TOARGBROW_SVE2)
-  if (TestCpuFlag(kCpuHasSVE2)) {
-    I422ToARGBRow = I422ToARGBRow_SVE2;
-  }
-#endif
-#if defined(HAS_I422TOARGBROW_SME)
-  if (TestCpuFlag(kCpuHasSME)) {
-    I422ToARGBRow = I422ToARGBRow_SME;
-  }
-#endif
-#if defined(HAS_I422TOARGBROW_LSX)
-  if (TestCpuFlag(kCpuHasLSX)) {
-    I422ToARGBRow = I422ToARGBRow_Any_LSX;
-    if (IS_ALIGNED(src_width, 16)) {
-      I422ToARGBRow = I422ToARGBRow_LSX;
-    }
-  }
-#endif
-#if defined(HAS_I422TOARGBROW_LASX)
-  if (TestCpuFlag(kCpuHasLASX)) {
-    I422ToARGBRow = I422ToARGBRow_Any_LASX;
-    if (IS_ALIGNED(src_width, 32)) {
-      I422ToARGBRow = I422ToARGBRow_LASX;
-    }
-  }
-#endif
-#if defined(HAS_I422TOARGBROW_RVV)
-  if (TestCpuFlag(kCpuHasRVV)) {
-    I422ToARGBRow = I422ToARGBRow_RVV;
-  }
-#endif
-
-  void (*InterpolateRow)(uint8_t* dst_argb, const uint8_t* src_argb,
-                         ptrdiff_t src_stride, int dst_width,
-                         int source_y_fraction) = InterpolateRow_C;
-#if defined(HAS_INTERPOLATEROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2)) {
-    InterpolateRow = InterpolateRow_Any_AVX2;
-    if (IS_ALIGNED(dst_width, 8)) {
-      InterpolateRow = InterpolateRow_AVX2;
-    }
-  }
-#endif
-#if defined(HAS_INTERPOLATEROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
-    InterpolateRow = InterpolateRow_Any_NEON;
-    if (IS_ALIGNED(dst_width, 4)) {
-      InterpolateRow = InterpolateRow_NEON;
-    }
-  }
-#endif
-#if defined(HAS_INTERPOLATEROW_SME)
-  if (TestCpuFlag(kCpuHasSME)) {
-    InterpolateRow = InterpolateRow_SME;
-  }
-#endif
-#if defined(HAS_INTERPOLATEROW_LSX)
-  if (TestCpuFlag(kCpuHasLSX)) {
-    InterpolateRow = InterpolateRow_Any_LSX;
-    if (IS_ALIGNED(dst_width, 8)) {
-      InterpolateRow = InterpolateRow_LSX;
-    }
-  }
-#endif
-#if defined(HAS_INTERPOLATEROW_RVV)
-  if (TestCpuFlag(kCpuHasRVV)) {
-    InterpolateRow = InterpolateRow_RVV;
-  }
-#endif
-
-  void (*ScaleARGBFilterCols)(uint8_t* dst_argb, const uint8_t* src_argb,
-                              int dst_width, int x, int dx) =
-      filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
-  if (src_width >= 32768) {
-    ScaleARGBFilterCols =
-        filtering ? ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
-  }
-#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
-  if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
-    ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
-  }
-#endif
-#if defined(HAS_SCALEARGBFILTERCOLS_NEON)
-  if (filtering && TestCpuFlag(kCpuHasNEON)) {
-    ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
-    if (IS_ALIGNED(dst_width, 4)) {
-      ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
-    }
-  }
-#endif
-#if defined(HAS_SCALEARGBFILTERCOLS_LSX)
-  if (filtering && TestCpuFlag(kCpuHasLSX)) {
-    ScaleARGBFilterCols = ScaleARGBFilterCols_Any_LSX;
-    if (IS_ALIGNED(dst_width, 8)) {
-      ScaleARGBFilterCols = ScaleARGBFilterCols_LSX;
-    }
-  }
-#endif
-#if defined(HAS_SCALEARGBFILTERCOLS_RVV)
-  if (filtering && TestCpuFlag(kCpuHasRVV)) {
-    ScaleARGBFilterCols = ScaleARGBFilterCols_RVV;
-  }
-#endif
-#if defined(HAS_SCALEARGBCOLS_SSE2)
-  if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
-    ScaleARGBFilterCols = ScaleARGBCols_SSE2;
-  }
-#endif
-#if defined(HAS_SCALEARGBCOLS_NEON)
-  if (!filtering && TestCpuFlag(kCpuHasNEON)) {
-    ScaleARGBFilterCols = ScaleARGBCols_Any_NEON;
-    if (IS_ALIGNED(dst_width, 8)) {
-      ScaleARGBFilterCols = ScaleARGBCols_NEON;
-    }
-  }
-#endif
-#if defined(HAS_SCALEARGBCOLS_LSX)
-  if (!filtering && TestCpuFlag(kCpuHasLSX)) {
-    ScaleARGBFilterCols = ScaleARGBCols_Any_LSX;
-    if (IS_ALIGNED(dst_width, 4)) {
-      ScaleARGBFilterCols = ScaleARGBCols_LSX;
-    }
-  }
-#endif
-  if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
-    ScaleARGBFilterCols = ScaleARGBColsUp2_C;
-#if defined(HAS_SCALEARGBCOLSUP2_SSE2)
-    if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
-      ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
-    }
-#endif
-  }
-
-  const int max_y = (src_height - 1) << 16;
-  if (y > max_y) {
-    y = max_y;
-  }
-  const int kYShift = 1;  // Shift Y by 1 to convert Y plane to UV coordinate.
-  int yi = y >> 16;
-  int uv_yi = yi >> kYShift;
-  const uint8_t* src_row_y = src_y + yi * (intptr_t)src_stride_y;
-  const uint8_t* src_row_u = src_u + uv_yi * (intptr_t)src_stride_u;
-  const uint8_t* src_row_v = src_v + uv_yi * (intptr_t)src_stride_v;
-
-  // Allocate 1 row of ARGB for source conversion and 2 rows of ARGB
-  // scaled horizontally to the destination width.
-  const int row_size = (dst_width * 4 + 31) & ~31;
-  align_buffer_64(row, row_size * 2 + src_width * 4);
-
-  uint8_t* argb_row = row + row_size * 2;
-  uint8_t* rowptr = row;
-  int rowstride = row_size;
-  int lasty = yi;
-  if (!row)
-    return 1;
-
-  // TODO(fbarchard): Convert first 2 rows of YUV to ARGB.
-  ScaleARGBFilterCols(rowptr, src_row_y, dst_width, x, dx);
-  if (src_height > 1) {
-    src_row_y += src_stride_y;
-    if (yi & 1) {
-      src_row_u += src_stride_u;
-      src_row_v += src_stride_v;
-    }
-  }
-  ScaleARGBFilterCols(rowptr + rowstride, src_row_y, dst_width, x, dx);
-  if (src_height > 2) {
-    src_row_y += src_stride_y;
-    if (!(yi & 1)) {
-      src_row_u += src_stride_u;
-      src_row_v += src_stride_v;
-    }
-  }
-
-  for (j = 0; j < dst_height; ++j) {
-    yi = y >> 16;
-    if (yi != lasty) {
-      if (y > max_y) {
-        y = max_y;
-        yi = y >> 16;
-        uv_yi = yi >> kYShift;
-        src_row_y = src_y + yi * (intptr_t)src_stride_y;
-        src_row_u = src_u + uv_yi * (intptr_t)src_stride_u;
-        src_row_v = src_v + uv_yi * (intptr_t)src_stride_v;
-      }
-      if (yi != lasty) {
-        // TODO(fbarchard): Convert the clipped region of row.
-        I422ToARGBRow(src_row_y, src_row_u, src_row_v, argb_row, src_width);
-        ScaleARGBFilterCols(rowptr, argb_row, dst_width, x, dx);
-        rowptr += rowstride;
-        rowstride = -rowstride;
-        lasty = yi;
-        src_row_y += src_stride_y;
-        if (yi & 1) {
-          src_row_u += src_stride_u;
-          src_row_v += src_stride_v;
-        }
-      }
-    }
-    if (filtering == kFilterLinear) {
-      InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
-    } else {
-      int yf = (y >> 8) & 255;
-      InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
-    }
-    dst_argb += dst_stride_argb;
-    y += dy;
-  }
-  free_aligned_buffer_64(row);
-  return 0;
-}
-#endif
-
 // Scale ARGB to/from any dimensions, without interpolation.
 // Fixed point math is used for performance: The upper 16 bits
 // of x and dx is the integer part of the source position and
@@ -877,8 +608,8 @@ static void ScaleARGBSimple(int src_width,
                             int src_height,
                             int dst_width,
                             int dst_height,
-                            int src_stride,
-                            int dst_stride,
+                            ptrdiff_t src_stride,
+                            ptrdiff_t dst_stride,
                             const uint8_t* src_argb,
                             uint8_t* dst_argb,
                             int x,
@@ -921,8 +652,8 @@ static void ScaleARGBSimple(int src_width,
   }
 
   for (j = 0; j < dst_height; ++j) {
-    ScaleARGBCols(dst_argb, src_argb + (y >> 16) * (intptr_t)src_stride,
-                  dst_width, x, dx);
+    ScaleARGBCols(dst_argb, src_argb + (y >> 16) * src_stride, dst_width, x,
+                  dx);
     dst_argb += dst_stride;
     y += dy;
   }
@@ -957,7 +688,7 @@ static int ScaleARGB(const uint8_t* src,
   // Negative src_height means invert the image.
   if (src_height < 0) {
     src_height = -src_height;
-    src = src + (src_height - 1) * (intptr_t)src_stride;
+    src = src + (src_height - 1) * (ptrdiff_t)src_stride;
     src_stride = -src_stride;
   }
   ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
@@ -972,8 +703,8 @@ static int ScaleARGB(const uint8_t* src,
   if (clip_y) {
     int64_t clipf = (int64_t)(clip_y)*dy;
     y += (clipf & 0xffff);
-    src += (clipf >> 16) * (intptr_t)src_stride;
-    dst += clip_y * dst_stride;
+    src += (clipf >> 16) * (ptrdiff_t)src_stride;
+    dst += clip_y * (ptrdiff_t)dst_stride;
   }
 
   // Special case for integer step values.
@@ -1006,7 +737,7 @@ static int ScaleARGB(const uint8_t* src,
         filtering = kFilterNone;
         if (dx == 0x10000 && dy == 0x10000) {
           // Straight copy.
-          ARGBCopy(src + (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4,
+          ARGBCopy(src + (y >> 16) * (ptrdiff_t)src_stride + (x >> 16) * 4,
                    src_stride, dst, dst_stride, clip_width, clip_height);
           return 0;
         }
diff --git a/source/scale_uv.cc b/source/scale_uv.cc
index 35ad55dda..95ac5307f 100644
--- a/source/scale_uv.cc
+++ b/source/scale_uv.cc
@@ -11,6 +11,7 @@
 #include "libyuv/scale_uv.h"
 
 #include <assert.h>
+#include <limits.h>
 #include <string.h>
 
 #include "libyuv/cpu_id.h"
@@ -59,8 +60,8 @@ static void ScaleUVDown2(int src_width,
                          int src_height,
                          int dst_width,
                          int dst_height,
-                         int src_stride,
-                         int dst_stride,
+                         ptrdiff_t src_stride,
+                         ptrdiff_t dst_stride,
                          const uint8_t* src_uv,
                          uint8_t* dst_uv,
                          int x,
@@ -69,7 +70,7 @@ static void ScaleUVDown2(int src_width,
                          int dy,
                          enum FilterMode filtering) {
   int j;
-  int row_stride = src_stride * (dy >> 16);
+  ptrdiff_t row_stride = src_stride * (dy >> 16);
   void (*ScaleUVRowDown2)(const uint8_t* src_uv, ptrdiff_t src_stride,
                           uint8_t* dst_uv, int dst_width) =
       filtering == kFilterNone
@@ -83,9 +84,9 @@ static void ScaleUVDown2(int src_width,
   assert((dy & 0x1ffff) == 0);  // Test vertical scale is multiple of 2.
   // Advance to odd row, even column.
   if (filtering == kFilterBilinear) {
-    src_uv += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2;
+    src_uv += (y >> 16) * src_stride + (x >> 16) * 2;
   } else {
-    src_uv += (y >> 16) * (intptr_t)src_stride + ((x >> 16) - 1) * 2;
+    src_uv += (y >> 16) * src_stride + ((x >> 16) - 1) * 2;
   }
 
 #if defined(HAS_SCALEUVROWDOWN2BOX_SSSE3)
@@ -174,8 +175,8 @@ static int ScaleUVDown4Box(int src_width,
                            int src_height,
                            int dst_width,
                            int dst_height,
-                           int src_stride,
-                           int dst_stride,
+                           ptrdiff_t src_stride,
+                           ptrdiff_t dst_stride,
                            const uint8_t* src_uv,
                            uint8_t* dst_uv,
                            int x,
@@ -188,12 +189,12 @@ static int ScaleUVDown4Box(int src_width,
   align_buffer_64(row, row_size * 2);
   if (!row)
     return 1;
-  int row_stride = src_stride * (dy >> 16);
+  ptrdiff_t row_stride = src_stride * (dy >> 16);
   void (*ScaleUVRowDown2)(const uint8_t* src_uv, ptrdiff_t src_stride,
                           uint8_t* dst_uv, int dst_width) =
       ScaleUVRowDown2Box_C;
   // Advance to odd row, even column.
-  src_uv += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2;
+  src_uv += (y >> 16) * src_stride + (x >> 16) * 2;
   (void)src_width;
   (void)src_height;
   (void)dx;
@@ -256,8 +257,8 @@ static void ScaleUVDownEven(int src_width,
                             int src_height,
                             int dst_width,
                             int dst_height,
-                            int src_stride,
-                            int dst_stride,
+                            ptrdiff_t src_stride,
+                            ptrdiff_t dst_stride,
                             const uint8_t* src_uv,
                             uint8_t* dst_uv,
                             int x,
@@ -267,7 +268,7 @@ static void ScaleUVDownEven(int src_width,
                             enum FilterMode filtering) {
   int j;
   int col_step = dx >> 16;
-  ptrdiff_t row_stride = (ptrdiff_t)((dy >> 16) * (intptr_t)src_stride);
+  ptrdiff_t row_stride = (dy >> 16) * src_stride;
   void (*ScaleUVRowDownEven)(const uint8_t* src_uv, ptrdiff_t src_stride,
                              int src_step, uint8_t* dst_uv, int dst_width) =
       filtering ? ScaleUVRowDownEvenBox_C : ScaleUVRowDownEven_C;
@@ -275,7 +276,7 @@ static void ScaleUVDownEven(int src_width,
   (void)src_height;
   assert(IS_ALIGNED(src_width, 2));
   assert(IS_ALIGNED(src_height, 2));
-  src_uv += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2;
+  src_uv += (y >> 16) * src_stride + (x >> 16) * 2;
 #if defined(HAS_SCALEUVROWDOWNEVEN_SSSE3)
   if (TestCpuFlag(kCpuHasSSSE3)) {
     ScaleUVRowDownEven = filtering ? ScaleUVRowDownEvenBox_Any_SSSE3
@@ -334,8 +335,8 @@ static int ScaleUVBilinearDown(int src_width,
                                int src_height,
                                int dst_width,
                                int dst_height,
-                               int src_stride,
-                               int dst_stride,
+                               ptrdiff_t src_stride,
+                               ptrdiff_t dst_stride,
                                const uint8_t* src_uv,
                                uint8_t* dst_uv,
                                int x,
@@ -422,7 +423,7 @@ static int ScaleUVBilinearDown(int src_width,
     }
     for (j = 0; j < dst_height; ++j) {
       int yi = y >> 16;
-      const uint8_t* src = src_uv + yi * (intptr_t)src_stride;
+      const uint8_t* src = src_uv + yi * src_stride;
       if (filtering == kFilterLinear) {
         ScaleUVFilterCols(dst_uv, src, dst_width, x, dx);
       } else {
@@ -448,8 +449,8 @@ static int ScaleUVBilinearUp(int src_width,
                              int src_height,
                              int dst_width,
                              int dst_height,
-                             int src_stride,
-                             int dst_stride,
+                             ptrdiff_t src_stride,
+                             ptrdiff_t dst_stride,
                              const uint8_t* src_uv,
                              uint8_t* dst_uv,
                              int x,
@@ -543,7 +544,7 @@ static int ScaleUVBilinearUp(int src_width,
 
   {
     int yi = y >> 16;
-    const uint8_t* src = src_uv + yi * (intptr_t)src_stride;
+    const uint8_t* src = src_uv + yi * src_stride;
 
     // Allocate 2 rows of UV.
     const int row_size = (dst_width * 2 + 15) & ~15;
@@ -552,7 +553,7 @@ static int ScaleUVBilinearUp(int src_width,
       return 1;
 
     uint8_t* rowptr = row;
-    int rowstride = row_size;
+    ptrdiff_t rowstride = row_size;
     int lasty = yi;
 
     ScaleUVFilterCols(rowptr, src, dst_width, x, dx);
@@ -570,7 +571,7 @@ static int ScaleUVBilinearUp(int src_width,
         if (y > max_y) {
           y = max_y;
           yi = y >> 16;
-          src = src_uv + yi * (intptr_t)src_stride;
+          src = src_uv + yi * src_stride;
         }
         if (yi != lasty) {
           ScaleUVFilterCols(rowptr, src, dst_width, x, dx);
@@ -606,8 +607,8 @@ static void ScaleUVLinearUp2(int src_width,
                              int src_height,
                              int dst_width,
                              int dst_height,
-                             int src_stride,
-                             int dst_stride,
+                             ptrdiff_t src_stride,
+                             ptrdiff_t dst_stride,
                              const uint8_t* src_uv,
                              uint8_t* dst_uv) {
   void (*ScaleRowUp)(const uint8_t* src_uv, uint8_t* dst_uv, int dst_width) =
@@ -645,13 +646,12 @@ static void ScaleUVLinearUp2(int src_width,
 #endif
 
   if (dst_height == 1) {
-    ScaleRowUp(src_uv + ((src_height - 1) / 2) * (intptr_t)src_stride, dst_uv,
-               dst_width);
+    ScaleRowUp(src_uv + ((src_height - 1) / 2) * src_stride, dst_uv, dst_width);
   } else {
     dy = FixedDiv(src_height - 1, dst_height - 1);
     y = (1 << 15) - 1;
     for (i = 0; i < dst_height; ++i) {
-      ScaleRowUp(src_uv + (y >> 16) * (intptr_t)src_stride, dst_uv, dst_width);
+      ScaleRowUp(src_uv + (y >> 16) * src_stride, dst_uv, dst_width);
       dst_uv += dst_stride;
       y += dy;
     }
@@ -727,8 +727,8 @@ static void ScaleUVLinearUp2_16(int src_width,
                                 int src_height,
                                 int dst_width,
                                 int dst_height,
-                                int src_stride,
-                                int dst_stride,
+                                ptrdiff_t src_stride,
+                                ptrdiff_t dst_stride,
                                 const uint16_t* src_uv,
                                 uint16_t* dst_uv) {
   void (*ScaleRowUp)(const uint16_t* src_uv, uint16_t* dst_uv, int dst_width) =
@@ -760,13 +760,12 @@ static void ScaleUVLinearUp2_16(int src_width,
 #endif
 
   if (dst_height == 1) {
-    ScaleRowUp(src_uv + ((src_height - 1) / 2) * (intptr_t)src_stride, dst_uv,
-               dst_width);
+    ScaleRowUp(src_uv + ((src_height - 1) / 2) * src_stride, dst_uv, dst_width);
   } else {
     dy = FixedDiv(src_height - 1, dst_height - 1);
     y = (1 << 15) - 1;
     for (i = 0; i < dst_height; ++i) {
-      ScaleRowUp(src_uv + (y >> 16) * (intptr_t)src_stride, dst_uv, dst_width);
+      ScaleRowUp(src_uv + (y >> 16) * src_stride, dst_uv, dst_width);
       dst_uv += dst_stride;
       y += dy;
     }
@@ -836,8 +835,8 @@ static void ScaleUVSimple(int src_width,
                           int src_height,
                           int dst_width,
                           int dst_height,
-                          int src_stride,
-                          int dst_stride,
+                          ptrdiff_t src_stride,
+                          ptrdiff_t dst_stride,
                           const uint8_t* src_uv,
                           uint8_t* dst_uv,
                           int x,
@@ -872,8 +871,7 @@ static void ScaleUVSimple(int src_width,
   }
 
   for (j = 0; j < dst_height; ++j) {
-    ScaleUVCols(dst_uv, src_uv + (y >> 16) * (intptr_t)src_stride, dst_width, x,
-                dx);
+    ScaleUVCols(dst_uv, src_uv + (y >> 16) * src_stride, dst_width, x, dx);
     dst_uv += dst_stride;
     y += dy;
   }
@@ -893,7 +891,7 @@ static int UVCopy(const uint8_t* src_uv,
   // Negative height means invert the image.
   if (height < 0) {
     height = -height;
-    src_uv = src_uv + (height - 1) * (intptr_t)src_stride_uv;
+    src_uv = src_uv + (height - 1) * (ptrdiff_t)src_stride_uv;
     src_stride_uv = -src_stride_uv;
   }
 
@@ -913,7 +911,7 @@ static int UVCopy_16(const uint16_t* src_uv,
   // Negative height means invert the image.
   if (height < 0) {
     height = -height;
-    src_uv = src_uv + (height - 1) * (intptr_t)src_stride_uv;
+    src_uv = src_uv + (height - 1) * (ptrdiff_t)src_stride_uv;
     src_stride_uv = -src_stride_uv;
   }
 
@@ -951,7 +949,7 @@ static int ScaleUV(const uint8_t* src,
   // Negative src_height means invert the image.
   if (src_height < 0) {
     src_height = -src_height;
-    src = src + (src_height - 1) * (intptr_t)src_stride;
+    src = src + (src_height - 1) * (ptrdiff_t)src_stride;
     src_stride = -src_stride;
   }
   ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
@@ -966,8 +964,8 @@ static int ScaleUV(const uint8_t* src,
   if (clip_y) {
     int64_t clipf = (int64_t)(clip_y)*dy;
     y += (clipf & 0xffff);
-    src += (clipf >> 16) * (intptr_t)src_stride;
-    dst += clip_y * dst_stride;
+    src += (clipf >> 16) * (ptrdiff_t)src_stride;
+    dst += clip_y * (ptrdiff_t)dst_stride;
   }
 
   // Special case for integer step values.
@@ -1007,7 +1005,7 @@ static int ScaleUV(const uint8_t* src,
 #ifdef HAS_UVCOPY
         if (dx == 0x10000 && dy == 0x10000) {
           // Straight copy.
-          UVCopy(src + (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2,
+          UVCopy(src + (y >> 16) * (ptrdiff_t)src_stride + (x >> 16) * 2,
                  src_stride, dst, dst_stride, clip_width, clip_height);
           return 0;
         }
@@ -1100,7 +1098,7 @@ int UVScale_16(const uint16_t* src_uv,
   // Negative src_height means invert the image.
   if (src_height < 0) {
     src_height = -src_height;
-    src_uv = src_uv + (src_height - 1) * (intptr_t)src_stride_uv;
+    src_uv = src_uv + (src_height - 1) * (ptrdiff_t)src_stride_uv;
     src_stride_uv = -src_stride_uv;
   }
   src_width = Abs(src_width);
@@ -1108,13 +1106,16 @@ int UVScale_16(const uint16_t* src_uv,
 #ifdef HAS_UVCOPY
   if (!filtering && src_width == dst_width && (src_height % dst_height == 0)) {
     if (dst_height == 1) {
-      UVCopy_16(src_uv + ((src_height - 1) / 2) * (intptr_t)src_stride_uv,
+      UVCopy_16(src_uv + ((src_height - 1) / 2) * (ptrdiff_t)src_stride_uv,
                 src_stride_uv, dst_uv, dst_stride_uv, dst_width, dst_height);
     } else {
       dy = src_height / dst_height;
-      UVCopy_16(src_uv + ((dy - 1) / 2) * (intptr_t)src_stride_uv,
-                (int)(dy * (intptr_t)src_stride_uv), dst_uv, dst_stride_uv,
-                dst_width, dst_height);
+      if (src_stride_uv > INT_MAX / dy) {
+        return -1;
+      }
+      UVCopy_16(src_uv + ((dy - 1) / 2) * (ptrdiff_t)src_stride_uv,
+                dy * src_stride_uv, dst_uv, dst_stride_uv, dst_width,
+                dst_height);
     }
 
     return 0;
diff --git a/unit_test/scale_plane_test.cc b/unit_test/scale_plane_test.cc
index b952a6f73..7d38c4d18 100644
--- a/unit_test/scale_plane_test.cc
+++ b/unit_test/scale_plane_test.cc
@@ -42,6 +42,108 @@
 
 namespace libyuv {
 
+// POC: int row_stride = src_stride * 2 overflows to a small negative value
+// when src_stride is close to INT_MAX, causing src_ptr to walk backward
+// past the start of the source allocation on the second loop iteration.
+// With src_stride = 0x7FFFFFFE, row_stride = (int)0xFFFFFFFC = -4, so on
+// y=1 ScaleRowDown2Box reads 4 bytes before the heap allocation.
+TEST_F(LibYUVScaleTest, ScalePlaneDown2_RowStrideOverflow) {
+  constexpr int kSrcStride = 0x7FFFFFFE;  // INT_MAX - 1
+  constexpr int kSrcW = 64;
+  constexpr int kSrcH = 4;
+  constexpr int kDstW = 32;
+  constexpr int kDstH = 2;
+  // src_size = (kSrcH - 1) * stride + width.
+  size_t src_size = kSrcH - 1;
+  if (src_size > SIZE_MAX / kSrcStride) {
+    GTEST_SKIP() << "could not represent allocation size in size_t";
+  }
+  src_size *= kSrcStride;
+  if (src_size > SIZE_MAX - kSrcW) {
+    GTEST_SKIP() << "could not represent allocation size in size_t";
+  }
+  src_size += kSrcW;
+
+#if defined(__aarch64__)
+  // Infer malloc can accept a large size for cpu with dot product (a76/a55)
+  int has_large_malloc = TestCpuFlag(kCpuHasNeonDotProd);
+#else
+  int has_large_malloc = 1;
+#endif
+  if (!has_large_malloc) {
+    GTEST_SKIP() << "large allocation may assert for " << src_size << " bytes";
+  }
+
+  uint8_t* src = new (std::nothrow) uint8_t[src_size];
+  if (!src) {
+    GTEST_SKIP() << "could not allocate " << src_size << " bytes";
+  }
+  uint8_t dst[kDstW * kDstH];
+  uint8_t* src_row = src;
+  for (int i = 0; i < kSrcH; i++) {
+    memset(src_row, 0x41, kSrcW);
+    src_row += kSrcStride;
+  }
+  // Force the C row kernel: the SIMD kernels are inline asm that ASAN does not
+  // instrument, so they silently read OOB without a report.
+  MaskCpuFlags(1);
+  // 2*dst == src on both axes -> ScalePlane dispatches to ScalePlaneDown2.
+  // int row_stride = kSrcStride * 2 wraps to -4; on y=1 src_ptr underflows.
+  ScalePlane(src, kSrcStride, kSrcW, kSrcH, dst, kDstW, kDstW, kDstH,
+             kFilterBox);
+  MaskCpuFlags(0);
+  delete[] src;
+}
+
+// POC: same defect in the 1/4 fast path. src_stride = 0x3FFFFFFF gives
+// int row_stride = src_stride * 4 = (int)0xFFFFFFFC = -4.
+TEST_F(LibYUVScaleTest, ScalePlaneDown4_RowStrideOverflow) {
+  constexpr int kSrcStride = 0x3FFFFFFF;  // INT_MAX / 4 (rounded down)
+  constexpr int kSrcW = 64;
+  constexpr int kSrcH = 8;
+  constexpr int kDstW = 16;
+  constexpr int kDstH = 2;
+  // src_size = (kSrcH - 1) * stride + width.
+  size_t src_size = kSrcH - 1;
+  if (src_size > SIZE_MAX / kSrcStride) {
+    GTEST_SKIP() << "could not represent allocation size in size_t";
+  }
+  src_size *= kSrcStride;
+  if (src_size > SIZE_MAX - kSrcW) {
+    GTEST_SKIP() << "could not represent allocation size in size_t";
+  }
+  src_size += kSrcW;
+
+#if defined(__aarch64__)
+  // Infer malloc can accept a large size for cpu with dot product (a76/a55)
+  int has_large_malloc = TestCpuFlag(kCpuHasNeonDotProd);
+#else
+  int has_large_malloc = 1;
+#endif
+  if (!has_large_malloc) {
+    GTEST_SKIP() << "large allocation may assert for " << src_size << " bytes";
+  }
+
+  uint8_t* src = new (std::nothrow) uint8_t[src_size];
+  if (!src) {
+    GTEST_SKIP() << "could not allocate " << src_size << " bytes";
+  }
+  uint8_t dst[kDstW * kDstH];
+  uint8_t* src_row = src;
+  for (int i = 0; i < kSrcH; i++) {
+    memset(src_row, 0x41, kSrcW);
+    src_row += kSrcStride;
+  }
+  // Force the C row kernel: the SIMD kernels are inline asm that ASAN does not
+  // instrument, so they silently read OOB without a report.
+  MaskCpuFlags(1);
+  // 4*dst == src on both axes with kFilterBox -> ScalePlaneDown4.
+  ScalePlane(src, kSrcStride, kSrcW, kSrcH, dst, kDstW, kDstW, kDstH,
+             kFilterBox);
+  MaskCpuFlags(0);
+  delete[] src;
+}
+
 #ifdef ENABLE_ROW_TESTS
 #ifdef HAS_SCALEROWDOWN2_SSSE3
 TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_Odd_SSSE3) {