diff --git a/BUILD.gn b/BUILD.gn
index 2c9c3e6e4..a72ff0655 100644
--- a/BUILD.gn
+++ b/BUILD.gn
@@ -27,6 +27,10 @@ config("libyuv_config") {
   if (is_android && current_cpu != "arm64") {
     ldflags = [ "-Wl,--dynamic-linker,/system/bin/linker" ]
   }
+
+  if (!libyuv_use_neon) {
+    defines = [ "LIBYUV_DISABLE_NEON" ]
+  }
 }
 
 # This target is built when no specific target is specified on the command line.
diff --git a/README.chromium b/README.chromium
index b6d85b739..8461a1218 100644
--- a/README.chromium
+++ b/README.chromium
@@ -1,6 +1,6 @@
 Name: libyuv
 URL: http://code.google.com/p/libyuv/
-Version: 1824
+Version: 1828
 License: BSD
 License File: LICENSE
 
diff --git a/include/libyuv/convert.h b/include/libyuv/convert.h
index 5c2954f4c..46d371593 100644
--- a/include/libyuv/convert.h
+++ b/include/libyuv/convert.h
@@ -238,6 +238,23 @@ int I010ToI420(const uint16_t* src_y,
                int width,
                int height);
 
+#define H210ToH420 I210ToI420
+LIBYUV_API
+int I210ToI420(const uint16_t* src_y,
+               int src_stride_y,
+               const uint16_t* src_u,
+               int src_stride_u,
+               const uint16_t* src_v,
+               int src_stride_v,
+               uint8_t* dst_y,
+               int dst_stride_y,
+               uint8_t* dst_u,
+               int dst_stride_u,
+               uint8_t* dst_v,
+               int dst_stride_v,
+               int width,
+               int height);
+
 #define H210ToH422 I210ToI422
 LIBYUV_API
 int I210ToI422(const uint16_t* src_y,
diff --git a/include/libyuv/planar_functions.h b/include/libyuv/planar_functions.h
index 15c7d457d..1ef2256bf 100644
--- a/include/libyuv/planar_functions.h
+++ b/include/libyuv/planar_functions.h
@@ -991,6 +991,21 @@ int InterpolatePlane(const uint8_t* src0,
                      int height,
                      int interpolation);
 
+// Interpolate between two images using specified amount of interpolation
+// (0 to 255) and store to destination.
+// 'interpolation' is specified as 8 bit fraction where 0 means 100% src0
+// and 255 means 1% src0 and 99% src1.
+LIBYUV_API
+int InterpolatePlane_16(const uint16_t* src0,
+                        int src_stride0,  // measured in 16 bit pixels
+                        const uint16_t* src1,
+                        int src_stride1,
+                        uint16_t* dst,
+                        int dst_stride,
+                        int width,
+                        int height,
+                        int interpolation);
+
 // Interpolate between two ARGB images using specified amount of interpolation
 // Internally calls InterpolatePlane with width * 4 (bpp).
 LIBYUV_API
diff --git a/include/libyuv/row.h b/include/libyuv/row.h
index e51155c08..f15b58fab 100644
--- a/include/libyuv/row.h
+++ b/include/libyuv/row.h
@@ -111,6 +111,7 @@ extern "C" {
 #define HAS_I422TOUYVYROW_SSE2
 #define HAS_I422TOYUY2ROW_SSE2
 #define HAS_I444TOARGBROW_SSSE3
+#define HAS_INTERPOLATEROW_SSSE3
 #define HAS_J400TOARGBROW_SSE2
 #define HAS_J422TOARGBROW_SSSE3
 #define HAS_MERGEUVROW_SSE2
@@ -123,10 +124,10 @@ extern "C" {
 #define HAS_NV21TORGB24ROW_SSSE3
 #define HAS_RAWTOARGBROW_SSSE3
 #define HAS_RAWTORGB24ROW_SSSE3
-#define HAS_RGB24TOARGBROW_SSSE3
-#define HAS_RGB565TOARGBROW_SSE2
 #define HAS_RAWTOYROW_SSSE3
+#define HAS_RGB24TOARGBROW_SSSE3
 #define HAS_RGB24TOYROW_SSSE3
+#define HAS_RGB565TOARGBROW_SSE2
 #define HAS_RGBATOYROW_SSSE3
 #if !defined(LIBYUV_BIT_EXACT)
 #define HAS_RGB24TOYJROW_SSSE3
@@ -169,7 +170,6 @@ extern "C" {
 #define HAS_BLENDPLANEROW_SSSE3
 #define HAS_COMPUTECUMULATIVESUMROW_SSE2
 #define HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
-#define HAS_INTERPOLATEROW_SSSE3
 #define HAS_RGBCOLORTABLEROW_X86
 #define HAS_SOBELROW_SSE2
 #define HAS_SOBELTOPLANEROW_SSE2
@@ -410,8 +410,10 @@ extern "C" {
 // The following are available on Neon platforms:
 #if !defined(LIBYUV_DISABLE_NEON) && \
     (defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON))
+#define HAS_AB64TOARGBROW_NEON
 #define HAS_ABGRTOUVROW_NEON
 #define HAS_ABGRTOYROW_NEON
+#define HAS_AR64TOARGBROW_NEON
 #define HAS_ARGB1555TOARGBROW_NEON
 #define HAS_ARGB1555TOUVROW_NEON
 #define HAS_ARGB1555TOYROW_NEON
@@ -420,16 +422,14 @@ extern "C" {
 #define HAS_ARGB4444TOYROW_NEON
 #define HAS_ARGBEXTRACTALPHAROW_NEON
 #define HAS_ARGBSETROW_NEON
+#define HAS_ARGBTOAB64ROW_NEON
+#define HAS_ARGBTOAR64ROW_NEON
 #define HAS_ARGBTOARGB1555ROW_NEON
 #define HAS_ARGBTOARGB4444ROW_NEON
 #define HAS_ARGBTORAWROW_NEON
 #define HAS_ARGBTORGB24ROW_NEON
 #define HAS_ARGBTORGB565DITHERROW_NEON
 #define HAS_ARGBTORGB565ROW_NEON
-#define HAS_ARGBTOAR64ROW_NEON
-#define HAS_ARGBTOAB64ROW_NEON
-#define HAS_AR64TOARGBROW_NEON
-#define HAS_AB64TOARGBROW_NEON
 #define HAS_ARGBTOUV444ROW_NEON
 #define HAS_ARGBTOUVJROW_NEON
 #define HAS_ARGBTOUVROW_NEON
@@ -449,7 +449,6 @@ extern "C" {
 #define HAS_HALFFLOATROW_NEON
 #define HAS_HALFMERGEUVROW_NEON
 #define HAS_I400TOARGBROW_NEON
-#define HAS_I444ALPHATOARGBROW_NEON
 #define HAS_I422ALPHATOARGBROW_NEON
 #define HAS_I422TOARGB1555ROW_NEON
 #define HAS_I422TOARGB4444ROW_NEON
@@ -459,20 +458,23 @@ extern "C" {
 #define HAS_I422TORGBAROW_NEON
 #define HAS_I422TOUYVYROW_NEON
 #define HAS_I422TOYUY2ROW_NEON
+#define HAS_I444ALPHATOARGBROW_NEON
 #define HAS_I444TOARGBROW_NEON
+#define HAS_INTERPOLATEROW_16_NEON
+#define HAS_INTERPOLATEROW_NEON
 #define HAS_J400TOARGBROW_NEON
 #define HAS_MERGEAR64ROW_NEON
 #define HAS_MERGEARGB16TO8ROW_NEON
 #define HAS_MERGEARGBROW_NEON
+#define HAS_MERGEUVROW_16_NEON
+#define HAS_MERGEUVROW_NEON
 #define HAS_MERGEXR30ROW_NEON
 #define HAS_MERGEXR64ROW_NEON
 #define HAS_MERGEXRGB16TO8ROW_NEON
 #define HAS_MERGEXRGBROW_NEON
-#define HAS_MERGEUVROW_NEON
-#define HAS_MERGEUVROW_16_NEON
 #define HAS_MIRRORROW_NEON
-#define HAS_MIRRORUVROW_NEON
 #define HAS_MIRRORSPLITUVROW_NEON
+#define HAS_MIRRORUVROW_NEON
 #define HAS_MULTIPLYROW_16_NEON
 #define HAS_NV12TOARGBROW_NEON
 #define HAS_NV12TORGB24ROW_NEON
@@ -483,13 +485,13 @@ extern "C" {
 #define HAS_RAWTOARGBROW_NEON
 #define HAS_RAWTORGB24ROW_NEON
 #define HAS_RAWTORGBAROW_NEON
-#define HAS_RAWTOUVROW_NEON
 #define HAS_RAWTOUVJROW_NEON
+#define HAS_RAWTOUVROW_NEON
 #define HAS_RAWTOYJROW_NEON
 #define HAS_RAWTOYROW_NEON
 #define HAS_RGB24TOARGBROW_NEON
-#define HAS_RGB24TOUVROW_NEON
 #define HAS_RGB24TOUVJROW_NEON
+#define HAS_RGB24TOUVROW_NEON
 #define HAS_RGB24TOYJROW_NEON
 #define HAS_RGB24TOYROW_NEON
 #define HAS_RGB565TOARGBROW_NEON
@@ -500,10 +502,10 @@ extern "C" {
 #define HAS_RGBATOYROW_NEON
 #define HAS_SETROW_NEON
 #define HAS_SPLITARGBROW_NEON
-#define HAS_SPLITXRGBROW_NEON
 #define HAS_SPLITRGBROW_NEON
-#define HAS_SPLITUVROW_NEON
 #define HAS_SPLITUVROW_16_NEON
+#define HAS_SPLITUVROW_NEON
+#define HAS_SPLITXRGBROW_NEON
 #define HAS_SWAPUVROW_NEON
 #define HAS_UYVYTOARGBROW_NEON
 #define HAS_UYVYTOUV422ROW_NEON
@@ -528,7 +530,6 @@ extern "C" {
 #define HAS_ARGBSHADEROW_NEON
 #define HAS_ARGBSHUFFLEROW_NEON
 #define HAS_ARGBSUBTRACTROW_NEON
-#define HAS_INTERPOLATEROW_NEON
 #define HAS_SOBELROW_NEON
 #define HAS_SOBELTOPLANEROW_NEON
 #define HAS_SOBELXROW_NEON
@@ -5203,6 +5204,23 @@ void InterpolateRow_16_C(uint16_t* dst_ptr,
                          ptrdiff_t src_stride,
                          int width,
                          int source_y_fraction);
+void InterpolateRow_16_NEON(uint16_t* dst_ptr,
+                            const uint16_t* src_ptr,
+                            ptrdiff_t src_stride,
+                            int width,
+                            int source_y_fraction);
+void InterpolateRow_16_Any_NEON(uint16_t* dst_ptr,
+                                const uint16_t* src_ptr,
+                                ptrdiff_t src_stride,
+                                int width,
+                                int source_y_fraction);
+
+void InterpolateRow_16To8_C(uint8_t* dst_ptr,
+                            const uint16_t* src_ptr,
+                            ptrdiff_t src_stride,
+                            int scale,
+                            int width,
+                            int source_y_fraction);
 
 // Sobel images.
 void SobelXRow_C(const uint8_t* src_y0,
diff --git a/include/libyuv/scale_row.h b/include/libyuv/scale_row.h
index cc1c90619..5c474b0ce 100644
--- a/include/libyuv/scale_row.h
+++ b/include/libyuv/scale_row.h
@@ -200,6 +200,20 @@ void ScalePlaneVertical_16(int src_height,
                            int wpp,
                            enum FilterMode filtering);
 
+void ScalePlaneVertical_16To8(int src_height,
+                              int dst_width,
+                              int dst_height,
+                              int src_stride,
+                              int dst_stride,
+                              const uint16_t* src_argb,
+                              uint8_t* dst_argb,
+                              int x,
+                              int y,
+                              int dy,
+                              int wpp,
+                              int scale,
+                              enum FilterMode filtering);
+
 // Simplify the filtering based on scale factors.
 enum FilterMode ScaleFilterReduce(int src_width,
                                   int src_height,
diff --git a/include/libyuv/version.h b/include/libyuv/version.h
index 8afef0ed9..8f09ccd2b 100644
--- a/include/libyuv/version.h
+++ b/include/libyuv/version.h
@@ -11,6 +11,6 @@
 #ifndef INCLUDE_LIBYUV_VERSION_H_
 #define INCLUDE_LIBYUV_VERSION_H_
 
-#define LIBYUV_VERSION 1824
+#define LIBYUV_VERSION 1828
 
 #endif  // INCLUDE_LIBYUV_VERSION_H_
diff --git a/source/convert.cc b/source/convert.cc
index 502f002d6..162546e5b 100644
--- a/source/convert.cc
+++ b/source/convert.cc
@@ -15,8 +15,9 @@
 #include "libyuv/planar_functions.h"
 #include "libyuv/rotate.h"
 #include "libyuv/row.h"
-#include "libyuv/scale.h"     // For ScalePlane()
-#include "libyuv/scale_uv.h"  // For UVScale()
+#include "libyuv/scale.h"      // For ScalePlane()
+#include "libyuv/scale_row.h"  // For FixedDiv
+#include "libyuv/scale_uv.h"   // For UVScale()
 
 #ifdef __cplusplus
 namespace libyuv {
@@ -220,6 +221,55 @@ int I010ToI420(const uint16_t* src_y,
                            1, 10);
 }
 
+LIBYUV_API
+int I210ToI420(const uint16_t* src_y,
+               int src_stride_y,
+               const uint16_t* src_u,
+               int src_stride_u,
+               const uint16_t* src_v,
+               int src_stride_v,
+               uint8_t* dst_y,
+               int dst_stride_y,
+               uint8_t* dst_u,
+               int dst_stride_u,
+               uint8_t* dst_v,
+               int dst_stride_v,
+               int width,
+               int height) {
+  const int depth = 10;
+  const int scale = 1 << (24 - depth);
+
+  if (width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_y = src_y + (height - 1) * src_stride_y;
+    src_u = src_u + (height - 1) * src_stride_u;
+    src_v = src_v + (height - 1) * src_stride_v;
+    src_stride_y = -src_stride_y;
+    src_stride_u = -src_stride_u;
+    src_stride_v = -src_stride_v;
+  }
+
+  {
+    const int uv_width = SUBSAMPLE(width, 1, 1);
+    const int uv_height = SUBSAMPLE(height, 1, 1);
+    const int dy = FixedDiv(height, uv_height);
+
+    Convert16To8Plane(src_y, src_stride_y, dst_y, dst_stride_y, scale, width,
+                      height);
+    ScalePlaneVertical_16To8(height, uv_width, uv_height, src_stride_u,
+                             dst_stride_u, src_u, dst_u, 0, 32768, dy,
+                             /*bpp=*/1, scale, kFilterBilinear);
+    ScalePlaneVertical_16To8(height, uv_width, uv_height, src_stride_v,
+                             dst_stride_v, src_v, dst_v, 0, 32768, dy,
+                             /*bpp=*/1, scale, kFilterBilinear);
+  }
+  return 0;
+}
+
 LIBYUV_API
 int I210ToI422(const uint16_t* src_y,
                int src_stride_y,
diff --git a/source/cpu_id.cc b/source/cpu_id.cc
index 9fce8d204..56fe60e49 100644
--- a/source/cpu_id.cc
+++ b/source/cpu_id.cc
@@ -20,7 +20,7 @@
 #endif
 
 // For ArmCpuCaps() but unittested on all platforms
-#include <stdio.h>
+#include <stdio.h>  // For fopen()
 #include <string.h>
 
 #ifdef __cplusplus
diff --git a/source/planar_functions.cc b/source/planar_functions.cc
index 141db6efb..b5344862d 100644
--- a/source/planar_functions.cc
+++ b/source/planar_functions.cc
@@ -35,7 +35,7 @@ void CopyPlane(const uint8_t* src_y,
                int height) {
   int y;
   void (*CopyRow)(const uint8_t* src, uint8_t* dst, int width) = CopyRow_C;
-  if (width == 0 || height == 0) {
+  if (width <= 0 || height == 0) {
     return;
   }
   // Negative height means invert the image.
@@ -84,8 +84,6 @@ void CopyPlane(const uint8_t* src_y,
   }
 }
 
-// TODO(fbarchard): Consider support for negative height.
-// TODO(fbarchard): Consider stride measured in bytes.
 LIBYUV_API
 void CopyPlane_16(const uint16_t* src_y,
                   int src_stride_y,
@@ -93,36 +91,8 @@ void CopyPlane_16(const uint16_t* src_y,
                   int dst_stride_y,
                   int width,
                   int height) {
-  int y;
-  void (*CopyRow)(const uint16_t* src, uint16_t* dst, int width) = CopyRow_16_C;
-  // Coalesce rows.
-  if (src_stride_y == width && dst_stride_y == width) {
-    width *= height;
-    height = 1;
-    src_stride_y = dst_stride_y = 0;
-  }
-#if defined(HAS_COPYROW_16_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32)) {
-    CopyRow = CopyRow_16_SSE2;
-  }
-#endif
-#if defined(HAS_COPYROW_16_ERMS)
-  if (TestCpuFlag(kCpuHasERMS)) {
-    CopyRow = CopyRow_16_ERMS;
-  }
-#endif
-#if defined(HAS_COPYROW_16_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) {
-    CopyRow = CopyRow_16_NEON;
-  }
-#endif
-
-  // Copy plane
-  for (y = 0; y < height; ++y) {
-    CopyRow(src_y, dst_y, width);
-    src_y += src_stride_y;
-    dst_y += dst_stride_y;
-  }
+  CopyPlane((const uint8_t*)src_y, src_stride_y * 2, (uint8_t*)dst_y,
+            dst_stride_y * 2, width * 2, height);
 }
 
 // Convert a plane of 16 bit data to 8 bit
@@ -138,6 +108,9 @@ void Convert16To8Plane(const uint16_t* src_y,
   void (*Convert16To8Row)(const uint16_t* src_y, uint8_t* dst_y, int scale,
                           int width) = Convert16To8Row_C;
 
+  if (width <= 0 || height == 0) {
+    return;
+  }
   // Negative height means invert the image.
   if (height < 0) {
     height = -height;
@@ -196,6 +169,9 @@ void Convert8To16Plane(const uint8_t* src_y,
   void (*Convert8To16Row)(const uint8_t* src_y, uint16_t* dst_y, int scale,
                           int width) = Convert8To16Row_C;
 
+  if (width <= 0 || height == 0) {
+    return;
+  }
   // Negative height means invert the image.
   if (height < 0) {
     height = -height;
@@ -470,6 +446,9 @@ void SplitUVPlane(const uint8_t* src_uv,
   int y;
   void (*SplitUVRow)(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v,
                      int width) = SplitUVRow_C;
+  if (width <= 0 || height == 0) {
+    return;
+  }
   // Negative height means invert the image.
   if (height < 0) {
     height = -height;
@@ -547,6 +526,9 @@ void MergeUVPlane(const uint8_t* src_u,
   int y;
   void (*MergeUVRow)(const uint8_t* src_u, const uint8_t* src_v,
                      uint8_t* dst_uv, int width) = MergeUVRow_C;
+  if (width <= 0 || height == 0) {
+    return;
+  }
   // Negative height means invert the image.
   if (height < 0) {
     height = -height;
@@ -626,6 +608,9 @@ void SplitUVPlane_16(const uint16_t* src_uv,
   void (*SplitUVRow_16)(const uint16_t* src_uv, uint16_t* dst_u,
                         uint16_t* dst_v, int depth, int width) =
       SplitUVRow_16_C;
+  if (width <= 0 || height == 0) {
+    return;
+  }
   // Negative height means invert the image.
   if (height < 0) {
     height = -height;
@@ -683,6 +668,9 @@ void MergeUVPlane_16(const uint16_t* src_u,
       MergeUVRow_16_C;
   assert(depth >= 8);
   assert(depth <= 16);
+  if (width <= 0 || height == 0) {
+    return;
+  }
   // Negative height means invert the image.
   if (height < 0) {
     height = -height;
@@ -735,6 +723,9 @@ void ConvertToMSBPlane_16(const uint16_t* src_y,
   int scale = 1 << (16 - depth);
   void (*MultiplyRow_16)(const uint16_t* src_y, uint16_t* dst_y, int scale,
                          int width) = MultiplyRow_16_C;
+  if (width <= 0 || height == 0) {
+    return;
+  }
   // Negative height means invert the image.
   if (height < 0) {
     height = -height;
@@ -785,6 +776,9 @@ void ConvertToLSBPlane_16(const uint16_t* src_y,
   int scale = 1 << depth;
   void (*DivideRow)(const uint16_t* src_y, uint16_t* dst_y, int scale,
                     int width) = DivideRow_16_C;
+  if (width <= 0 || height == 0) {
+    return;
+  }
   // Negative height means invert the image.
   if (height < 0) {
     height = -height;
@@ -833,6 +827,9 @@ void SwapUVPlane(const uint8_t* src_uv,
   int y;
   void (*SwapUVRow)(const uint8_t* src_uv, uint8_t* dst_vu, int width) =
       SwapUVRow_C;
+  if (width <= 0 || height == 0) {
+    return;
+  }
   // Negative height means invert the image.
   if (height < 0) {
     height = -height;
@@ -936,6 +933,9 @@ void DetilePlane(const uint8_t* src_y,
   assert(tile_height > 0);
   assert(src_stride_y > 0);
 
+  if (width <= 0 || height == 0) {
+    return;
+  }
   // Negative height means invert the image.
   if (height < 0) {
     height = -height;
@@ -991,6 +991,9 @@ void DetileSplitUVPlane(const uint8_t* src_uv,
   assert(tile_height > 0);
   assert(src_stride_uv > 0);
 
+  if (width <= 0 || height == 0) {
+    return;
+  }
   // Negative height means invert the image.
   if (height < 0) {
     height = -height;
@@ -1046,6 +1049,9 @@ void SplitRGBPlane(const uint8_t* src_rgb,
   int y;
   void (*SplitRGBRow)(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g,
                       uint8_t* dst_b, int width) = SplitRGBRow_C;
+  if (width <= 0 || height == 0) {
+    return;
+  }
   // Negative height means invert the image.
   if (height < 0) {
     height = -height;
@@ -1105,6 +1111,9 @@ void MergeRGBPlane(const uint8_t* src_r,
   void (*MergeRGBRow)(const uint8_t* src_r, const uint8_t* src_g,
                       const uint8_t* src_b, uint8_t* dst_rgb, int width) =
       MergeRGBRow_C;
+  if (width <= 0 || height == 0) {
+    return;
+  }
   // Coalesce rows.
   // Negative height means invert the image.
   if (height < 0) {
@@ -3059,6 +3068,10 @@ void SetPlane(uint8_t* dst_y,
               uint32_t value) {
   int y;
   void (*SetRow)(uint8_t * dst, uint8_t value, int width) = SetRow_C;
+
+  if (width <= 0 || height == 0) {
+    return;
+  }
   if (height < 0) {
     height = -height;
     dst_y = dst_y + (height - 1) * dst_stride_y;
@@ -4005,6 +4018,86 @@ int InterpolatePlane(const uint8_t* src0,
   return 0;
 }
 
+// Interpolate 2 planes by specified amount (0 to 255).
+LIBYUV_API
+int InterpolatePlane_16(const uint16_t* src0,
+                        int src_stride0,
+                        const uint16_t* src1,
+                        int src_stride1,
+                        uint16_t* dst,
+                        int dst_stride,
+                        int width,
+                        int height,
+                        int interpolation) {
+  int y;
+  void (*InterpolateRow_16)(uint16_t * dst_ptr, const uint16_t* src_ptr,
+                            ptrdiff_t src_stride, int dst_width,
+                            int source_y_fraction) = InterpolateRow_16_C;
+  if (!src0 || !src1 || !dst || width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    dst = dst + (height - 1) * dst_stride;
+    dst_stride = -dst_stride;
+  }
+  // Coalesce rows.
+  if (src_stride0 == width && src_stride1 == width && dst_stride == width) {
+    width *= height;
+    height = 1;
+    src_stride0 = src_stride1 = dst_stride = 0;
+  }
+#if defined(HAS_INTERPOLATEROW_16_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3)) {
+    InterpolateRow_16 = InterpolateRow_16_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
+      InterpolateRow_16 = InterpolateRow_16_SSSE3;
+    }
+  }
+#endif
+#if defined(HAS_INTERPOLATEROW_16_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    InterpolateRow_16 = InterpolateRow_16_Any_AVX2;
+    if (IS_ALIGNED(width, 32)) {
+      InterpolateRow_16 = InterpolateRow_16_AVX2;
+    }
+  }
+#endif
+#if defined(HAS_INTERPOLATEROW_16_NEON)
+  if (TestCpuFlag(kCpuHasNEON)) {
+    InterpolateRow_16 = InterpolateRow_16_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      InterpolateRow_16 = InterpolateRow_16_NEON;
+    }
+  }
+#endif
+#if defined(HAS_INTERPOLATEROW_16_MSA)
+  if (TestCpuFlag(kCpuHasMSA)) {
+    InterpolateRow_16 = InterpolateRow_16_Any_MSA;
+    if (IS_ALIGNED(width, 32)) {
+      InterpolateRow_16 = InterpolateRow_16_MSA;
+    }
+  }
+#endif
+#if defined(HAS_INTERPOLATEROW_16_LSX)
+  if (TestCpuFlag(kCpuHasLSX)) {
+    InterpolateRow_16 = InterpolateRow_16_Any_LSX;
+    if (IS_ALIGNED(width, 32)) {
+      InterpolateRow_16 = InterpolateRow_16_LSX;
+    }
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    InterpolateRow_16(dst, src0, src1 - src0, width, interpolation);
+    src0 += src_stride0;
+    src1 += src_stride1;
+    dst += dst_stride;
+  }
+  return 0;
+}
+
 // Interpolate 2 ARGB images by specified amount (0 to 255).
 LIBYUV_API
 int ARGBInterpolate(const uint8_t* src_argb0,
diff --git a/source/row_any.cc b/source/row_any.cc
index 089e518af..3e95b2df4 100644
--- a/source/row_any.cc
+++ b/source/row_any.cc
@@ -1625,37 +1625,42 @@ ANY11C(UYVYToARGBRow_Any_LSX, UYVYToARGBRow_LSX, 1, 4, 4, 7)
 #undef ANY11C
 
 // Any 1 to 1 interpolate.  Takes 2 rows of source via stride.
-#define ANY11I(NAMEANY, ANY_SIMD, SBPP, BPP, MASK)                             \
-  void NAMEANY(uint8_t* dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride, \
-               int width, int source_y_fraction) {                             \
-    SIMD_ALIGNED(uint8_t temp[64 * 3]);                                        \
-    memset(temp, 0, 64 * 2); /* for msan */                                    \
-    int r = width & MASK;                                                      \
-    int n = width & ~MASK;                                                     \
-    if (n > 0) {                                                               \
-      ANY_SIMD(dst_ptr, src_ptr, src_stride, n, source_y_fraction);            \
-    }                                                                          \
-    memcpy(temp, src_ptr + n * SBPP, r * SBPP);                                \
-    memcpy(temp + 64, src_ptr + src_stride + n * SBPP, r * SBPP);              \
-    ANY_SIMD(temp + 128, temp, 64, MASK + 1, source_y_fraction);               \
-    memcpy(dst_ptr + n * BPP, temp + 128, r * BPP);                            \
+#define ANY11I(NAMEANY, ANY_SIMD, T, SBPP, BPP, MASK)                         \
+  void NAMEANY(T* dst_ptr, const T* src_ptr, ptrdiff_t src_stride, int width, \
+               int source_y_fraction) {                                       \
+    SIMD_ALIGNED(T temp[64 * 3]);                                             \
+    memset(temp, 0, 64 * 2 * sizeof(T)); /* for msan */                       \
+    int r = width & MASK;                                                     \
+    int n = width & ~MASK;                                                    \
+    if (n > 0) {                                                              \
+      ANY_SIMD(dst_ptr, src_ptr, src_stride, n, source_y_fraction);           \
+    }                                                                         \
+    memcpy(temp, src_ptr + n * SBPP, r * SBPP * sizeof(T));                   \
+    memcpy(temp + 64, src_ptr + src_stride + n * SBPP, r * SBPP * sizeof(T)); \
+    ANY_SIMD(temp + 128, temp, 64, MASK + 1, source_y_fraction);              \
+    memcpy(dst_ptr + n * BPP, temp + 128, r * BPP * sizeof(T));               \
   }
 
 #ifdef HAS_INTERPOLATEROW_AVX2
-ANY11I(InterpolateRow_Any_AVX2, InterpolateRow_AVX2, 1, 1, 31)
+ANY11I(InterpolateRow_Any_AVX2, InterpolateRow_AVX2, uint8_t, 1, 1, 31)
 #endif
 #ifdef HAS_INTERPOLATEROW_SSSE3
-ANY11I(InterpolateRow_Any_SSSE3, InterpolateRow_SSSE3, 1, 1, 15)
+ANY11I(InterpolateRow_Any_SSSE3, InterpolateRow_SSSE3, uint8_t, 1, 1, 15)
 #endif
 #ifdef HAS_INTERPOLATEROW_NEON
-ANY11I(InterpolateRow_Any_NEON, InterpolateRow_NEON, 1, 1, 15)
+ANY11I(InterpolateRow_Any_NEON, InterpolateRow_NEON, uint8_t, 1, 1, 15)
 #endif
 #ifdef HAS_INTERPOLATEROW_MSA
-ANY11I(InterpolateRow_Any_MSA, InterpolateRow_MSA, 1, 1, 31)
+ANY11I(InterpolateRow_Any_MSA, InterpolateRow_MSA, uint8_t, 1, 1, 31)
 #endif
 #ifdef HAS_INTERPOLATEROW_LSX
-ANY11I(InterpolateRow_Any_LSX, InterpolateRow_LSX, 1, 1, 31)
+ANY11I(InterpolateRow_Any_LSX, InterpolateRow_LSX, uint8_t, 1, 1, 31)
 #endif
+
+#ifdef HAS_INTERPOLATEROW_16_NEON
+ANY11I(InterpolateRow_16_Any_NEON, InterpolateRow_16_NEON, uint16_t, 1, 1, 7)
+#endif
+
 #undef ANY11I
 
 // Any 1 to 1 mirror.
diff --git a/source/row_common.cc b/source/row_common.cc
index 8cf826ec5..3bfc56180 100644
--- a/source/row_common.cc
+++ b/source/row_common.cc
@@ -11,7 +11,6 @@
 #include "libyuv/row.h"
 
 #include <assert.h>
-#include <stdio.h>
 #include <string.h>  // For memcpy and memset.
 
 #include "libyuv/basic_types.h"
@@ -3402,6 +3401,18 @@ static void HalfRow_16_C(const uint16_t* src_uv,
   }
 }
 
+static void HalfRow_16To8_C(const uint16_t* src_uv,
+                            ptrdiff_t src_uv_stride,
+                            uint8_t* dst_uv,
+                            int scale,
+                            int width) {
+  int x;
+  for (x = 0; x < width; ++x) {
+    dst_uv[x] = clamp255(
+        (((src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1) * scale) >> 16);
+  }
+}
+
 // C version 2x2 -> 2x1.
 void InterpolateRow_C(uint8_t* dst_ptr,
                       const uint8_t* src_ptr,
@@ -3435,6 +3446,51 @@ void InterpolateRow_C(uint8_t* dst_ptr,
   }
 }
 
+// C version 2x2 16 bit-> 2x1 8 bit.
+// Use scale to convert lsb formats to msb, depending how many bits there are:
+// 32768 = 9 bits
+// 16384 = 10 bits
+// 4096 = 12 bits
+// 256 = 16 bits
+void InterpolateRow_16To8_C(uint8_t* dst_ptr,
+                            const uint16_t* src_ptr,
+                            ptrdiff_t src_stride,
+                            int scale,
+                            int width,
+                            int source_y_fraction) {
+  int y1_fraction = source_y_fraction;
+  int y0_fraction = 256 - y1_fraction;
+  const uint16_t* src_ptr1 = src_ptr + src_stride;
+  int x;
+  if (source_y_fraction == 0) {
+    Convert16To8Row_C(src_ptr, dst_ptr, scale, width);
+    return;
+  }
+  if (source_y_fraction == 128) {
+    HalfRow_16To8_C(src_ptr, src_stride, dst_ptr, scale, width);
+    return;
+  }
+  for (x = 0; x < width - 1; x += 2) {
+    dst_ptr[0] = clamp255(
+        (((src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8) *
+         scale) >>
+        16);
+    dst_ptr[1] = clamp255(
+        (((src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8) *
+         scale) >>
+        16);
+    src_ptr += 2;
+    src_ptr1 += 2;
+    dst_ptr += 2;
+  }
+  if (width & 1) {
+    dst_ptr[0] = clamp255(
+        (((src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8) *
+         scale) >>
+        16);
+  }
+}
+
 void InterpolateRow_16_C(uint16_t* dst_ptr,
                          const uint16_t* src_ptr,
                          ptrdiff_t src_stride,
diff --git a/source/row_neon.cc b/source/row_neon.cc
index 8ba71d07e..297ccceb6 100644
--- a/source/row_neon.cc
+++ b/source/row_neon.cc
@@ -10,8 +10,6 @@
 
 #include "libyuv/row.h"
 
-#include <stdio.h>
-
 #ifdef __cplusplus
 namespace libyuv {
 extern "C" {
@@ -21,6 +19,8 @@ extern "C" {
 #if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) && \
     !defined(__aarch64__)
 
+// d8-d15, r4-r11,r14(lr) need to be preserved if used. r13(sp),r15(pc) are reserved.
+
 // q0: Y uint16x8_t
 // d2: U uint8x8_t
 // d3: V uint8x8_t
@@ -2715,6 +2715,66 @@ void InterpolateRow_NEON(uint8_t* dst_ptr,
       : "cc", "memory", "q0", "q1", "d4", "d5", "q13", "q14");
 }
 
+// Bilinear filter 8x2 -> 8x1
+void InterpolateRow_16_NEON(uint16_t* dst_ptr,
+                            const uint16_t* src_ptr,
+                            ptrdiff_t src_stride,
+                            int dst_width,
+                            int source_y_fraction) {
+  int y1_fraction = source_y_fraction;
+  int y0_fraction = 256 - y1_fraction;
+  const uint16_t* src_ptr1 = src_ptr + src_stride;
+
+  asm volatile(
+      "cmp         %4, #0                        \n"
+      "beq         100f                          \n"
+      "cmp         %4, #128                      \n"
+      "beq         50f                           \n"
+
+      "vdup.16     d17, %4                       \n"
+      "vdup.16     d16, %5                       \n"
+      // General purpose row blend.
+      "1:                                        \n"
+      "vld1.16     {q0}, [%1]!                   \n"
+      "vld1.16     {q1}, [%2]!                   \n"
+      "subs        %3, %3, #8                    \n"
+      "vmull.u16   q2, d0, d16                   \n"
+      "vmull.u16   q3, d1, d16                   \n"
+      "vmlal.u16   q2, d2, d17                   \n"
+      "vmlal.u16   q3, d3, d17                   \n"
+      "vrshrn.u32  d0, q2, #8                    \n"
+      "vrshrn.u32  d1, q3, #8                    \n"
+      "vst1.16     {q0}, [%0]!                   \n"
+      "bgt         1b                            \n"
+      "b           99f                           \n"
+
+      // Blend 50 / 50.
+      "50:                                       \n"
+      "vld1.16     {q0}, [%1]!                   \n"
+      "vld1.16     {q1}, [%2]!                   \n"
+      "subs        %3, %3, #8                    \n"
+      "vrhadd.u16  q0, q1                        \n"
+      "vst1.16     {q0}, [%0]!                   \n"
+      "bgt         50b                           \n"
+      "b           99f                           \n"
+
+      // Blend 100 / 0 - Copy row unchanged.
+      "100:                                      \n"
+      "vld1.16     {q0}, [%1]!                   \n"
+      "subs        %3, %3, #8                    \n"
+      "vst1.16     {q0}, [%0]!                   \n"
+      "bgt         100b                          \n"
+
+      "99:                                       \n"
+      : "+r"(dst_ptr),      // %0
+        "+r"(src_ptr),      // %1
+        "+r"(src_ptr1),     // %2
+        "+r"(dst_width)     // %3
+      : "r"(y1_fraction),   // %4
+        "r"(y0_fraction)    // %5
+      : "cc", "memory", "q0", "q1", "q2", "q3", "q8");
+}
+
 // dr * (256 - sa) / 256 + sr = dr - dr * sa / 256 + sr
 void ARGBBlendRow_NEON(const uint8_t* src_argb,
                        const uint8_t* src_argb1,
@@ -3666,7 +3726,7 @@ void Convert16To8Row_NEON(const uint16_t* src_y,
       "vqdmulh.s16 q1, q1, q2                    \n"
       "vqshrn.u16  d0, q0, #1                    \n"
       "vqshrn.u16  d1, q1, #1                    \n"
-      "vst1.16     {q0}, [%1]!                   \n"
+      "vst1.8      {q0}, [%1]!                   \n"
       "subs        %3, %3, #16                   \n"  // 16 src pixels per loop
       "bgt         1b                            \n"
       : "+r"(src_y),  // %0
diff --git a/source/row_neon64.cc b/source/row_neon64.cc
index 8d43d5940..6135014b7 100644
--- a/source/row_neon64.cc
+++ b/source/row_neon64.cc
@@ -2966,6 +2966,71 @@ void InterpolateRow_NEON(uint8_t* dst_ptr,
       : "cc", "memory", "v0", "v1", "v3", "v4", "v5");
 }
 
+// Bilinear filter 8x2 -> 8x1
+void InterpolateRow_16_NEON(uint16_t* dst_ptr,
+                            const uint16_t* src_ptr,
+                            ptrdiff_t src_stride,
+                            int dst_width,
+                            int source_y_fraction) {
+  int y1_fraction = source_y_fraction;
+  int y0_fraction = 256 - y1_fraction;
+  const uint16_t* src_ptr1 = src_ptr + src_stride;
+
+  asm volatile(
+      "cmp         %w4, #0                       \n"
+      "b.eq        100f                          \n"
+      "cmp         %w4, #128                     \n"
+      "b.eq        50f                           \n"
+
+      "dup         v5.8h, %w4                    \n"
+      "dup         v4.8h, %w5                    \n"
+      // General purpose row blend.
+      "1:                                        \n"
+      "ld1         {v0.8h}, [%1], #16            \n"
+      "ld1         {v1.8h}, [%2], #16            \n"
+      "subs        %w3, %w3, #8                  \n"
+      "umull       v2.4s, v0.4h, v4.4h           \n"
+      "prfm        pldl1keep, [%1, 448]          \n"
+      "umull2      v3.4s, v0.8h, v4.8h           \n"
+      "prfm        pldl1keep, [%2, 448]          \n"
+      "umlal       v2.4s, v1.4h, v5.4h           \n"
+      "umlal2      v3.4s, v1.8h, v5.8h           \n"
+      "rshrn       v0.4h, v2.4s, #8              \n"
+      "rshrn2      v0.8h, v3.4s, #8              \n"
+      "st1         {v0.8h}, [%0], #16            \n"
+      "b.gt        1b                            \n"
+      "b           99f                           \n"
+
+      // Blend 50 / 50.
+      "50:                                       \n"
+      "ld1         {v0.8h}, [%1], #16            \n"
+      "ld1         {v1.8h}, [%2], #16            \n"
+      "subs        %w3, %w3, #8                  \n"
+      "prfm        pldl1keep, [%1, 448]          \n"
+      "urhadd      v0.8h, v0.8h, v1.8h           \n"
+      "prfm        pldl1keep, [%2, 448]          \n"
+      "st1         {v0.8h}, [%0], #16            \n"
+      "b.gt        50b                           \n"
+      "b           99f                           \n"
+
+      // Blend 100 / 0 - Copy row unchanged.
+      "100:                                      \n"
+      "ld1         {v0.8h}, [%1], #16            \n"
+      "subs        %w3, %w3, #8                  \n"
+      "prfm        pldl1keep, [%1, 448]          \n"
+      "st1         {v0.8h}, [%0], #16            \n"
+      "b.gt        100b                          \n"
+
+      "99:                                       \n"
+      : "+r"(dst_ptr),      // %0
+        "+r"(src_ptr),      // %1
+        "+r"(src_ptr1),     // %2
+        "+r"(dst_width)     // %3
+      : "r"(y1_fraction),   // %4
+        "r"(y0_fraction)    // %5
+      : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5");
+}
+
 // dr * (256 - sa) / 256 + sr = dr - dr * sa / 256 + sr
 void ARGBBlendRow_NEON(const uint8_t* src_argb,
                        const uint8_t* src_argb1,
diff --git a/source/scale.cc b/source/scale.cc
index ad573ef6a..ac009310f 100644
--- a/source/scale.cc
+++ b/source/scale.cc
@@ -29,6 +29,7 @@ static __inline int Abs(int v) {
 }
 
 #define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s)
+#define CENTERSTART(dx, s) (dx < 0) ? -((-dx >> 1) + s) : ((dx >> 1) + s)
 
 // Scale plane, 1/2
 // This is an optimized version for scaling down a plane to 1/2 of
@@ -1154,7 +1155,7 @@ void ScalePlaneBilinearDown_16(int src_width,
 
 #if defined(HAS_INTERPOLATEROW_16_SSE2)
   if (TestCpuFlag(kCpuHasSSE2)) {
-    InterpolateRow = InterpolateRow_Any_16_SSE2;
+    InterpolateRow = InterpolateRow_16_Any_SSE2;
     if (IS_ALIGNED(src_width, 16)) {
       InterpolateRow = InterpolateRow_16_SSE2;
     }
@@ -1162,7 +1163,7 @@ void ScalePlaneBilinearDown_16(int src_width,
 #endif
 #if defined(HAS_INTERPOLATEROW_16_SSSE3)
   if (TestCpuFlag(kCpuHasSSSE3)) {
-    InterpolateRow = InterpolateRow_Any_16_SSSE3;
+    InterpolateRow = InterpolateRow_16_Any_SSSE3;
     if (IS_ALIGNED(src_width, 16)) {
       InterpolateRow = InterpolateRow_16_SSSE3;
     }
@@ -1170,7 +1171,7 @@ void ScalePlaneBilinearDown_16(int src_width,
 #endif
 #if defined(HAS_INTERPOLATEROW_16_AVX2)
   if (TestCpuFlag(kCpuHasAVX2)) {
-    InterpolateRow = InterpolateRow_Any_16_AVX2;
+    InterpolateRow = InterpolateRow_16_Any_AVX2;
     if (IS_ALIGNED(src_width, 32)) {
       InterpolateRow = InterpolateRow_16_AVX2;
     }
@@ -1178,7 +1179,7 @@ void ScalePlaneBilinearDown_16(int src_width,
 #endif
 #if defined(HAS_INTERPOLATEROW_16_NEON)
   if (TestCpuFlag(kCpuHasNEON)) {
-    InterpolateRow = InterpolateRow_Any_16_NEON;
+    InterpolateRow = InterpolateRow_16_Any_NEON;
     if (IS_ALIGNED(src_width, 16)) {
       InterpolateRow = InterpolateRow_16_NEON;
     }
@@ -1706,7 +1707,7 @@ void ScalePlaneBilinearUp_16(int src_width,
 
 #if defined(HAS_INTERPOLATEROW_16_SSE2)
   if (TestCpuFlag(kCpuHasSSE2)) {
-    InterpolateRow = InterpolateRow_Any_16_SSE2;
+    InterpolateRow = InterpolateRow_16_Any_SSE2;
     if (IS_ALIGNED(dst_width, 16)) {
       InterpolateRow = InterpolateRow_16_SSE2;
     }
@@ -1714,7 +1715,7 @@ void ScalePlaneBilinearUp_16(int src_width,
 #endif
 #if defined(HAS_INTERPOLATEROW_16_SSSE3)
   if (TestCpuFlag(kCpuHasSSSE3)) {
-    InterpolateRow = InterpolateRow_Any_16_SSSE3;
+    InterpolateRow = InterpolateRow_16_Any_SSSE3;
     if (IS_ALIGNED(dst_width, 16)) {
       InterpolateRow = InterpolateRow_16_SSSE3;
     }
@@ -1722,7 +1723,7 @@ void ScalePlaneBilinearUp_16(int src_width,
 #endif
 #if defined(HAS_INTERPOLATEROW_16_AVX2)
   if (TestCpuFlag(kCpuHasAVX2)) {
-    InterpolateRow = InterpolateRow_Any_16_AVX2;
+    InterpolateRow = InterpolateRow_16_Any_AVX2;
     if (IS_ALIGNED(dst_width, 32)) {
       InterpolateRow = InterpolateRow_16_AVX2;
     }
@@ -1730,7 +1731,7 @@ void ScalePlaneBilinearUp_16(int src_width,
 #endif
 #if defined(HAS_INTERPOLATEROW_16_NEON)
   if (TestCpuFlag(kCpuHasNEON)) {
-    InterpolateRow = InterpolateRow_Any_16_NEON;
+    InterpolateRow = InterpolateRow_16_Any_NEON;
     if (IS_ALIGNED(dst_width, 16)) {
       InterpolateRow = InterpolateRow_16_NEON;
     }
@@ -1886,7 +1887,6 @@ static void ScalePlaneSimple_16(int src_width,
 
 // Scale a plane.
 // This function dispatches to a specialized scaler based on scale factor.
-
 LIBYUV_API
 void ScalePlane(const uint8_t* src,
                 int src_stride,
@@ -1916,10 +1916,19 @@ void ScalePlane(const uint8_t* src,
     return;
   }
   if (dst_width == src_width && filtering != kFilterBox) {
-    int dy = FixedDiv(src_height, dst_height);
+    int dy = 0;
+    int y = 0;
+    // When scaling down, use the center 2 rows to filter.
+    // When scaling up, last row of destination uses the last 2 source rows.
+    if (dst_height <= src_height) {
+      dy = FixedDiv(src_height, dst_height);
+      y = CENTERSTART(dy, -32768);  // Subtract 0.5 (32768) to center filter.
+    } else if (src_height > 1 && dst_height > 1) {
+      dy = FixedDiv1(src_height, dst_height);
+    }
     // Arbitrary scale vertically, but unscaled horizontally.
     ScalePlaneVertical(src_height, dst_width, dst_height, src_stride,
-                       dst_stride, src, dst, 0, 0, dy, /*bpp=*/1, filtering);
+                       dst_stride, src, dst, 0, y, dy, /*bpp=*/1, filtering);
     return;
   }
   if (dst_width <= Abs(src_width) && dst_height <= src_height) {
@@ -2010,10 +2019,22 @@ void ScalePlane_16(const uint16_t* src,
     return;
   }
   if (dst_width == src_width && filtering != kFilterBox) {
-    int dy = FixedDiv(src_height, dst_height);
+    int dy = 0;
+    int y = 0;
+    // When scaling down, use the center 2 rows to filter.
+    // When scaling up, last row of destination uses the last 2 source rows.
+    if (dst_height <= src_height) {
+      dy = FixedDiv(src_height, dst_height);
+      y = CENTERSTART(dy, -32768);  // Subtract 0.5 (32768) to center filter.
+      // When scaling up, ensure the last row of destination uses the last
+      // source. Avoid divide by zero for dst_height but will do no scaling
+      // later.
+    } else if (src_height > 1 && dst_height > 1) {
+      dy = FixedDiv1(src_height, dst_height);
+    }
     // Arbitrary scale vertically, but unscaled horizontally.
     ScalePlaneVertical_16(src_height, dst_width, dst_height, src_stride,
-                          dst_stride, src, dst, 0, 0, dy, /*bpp=*/1, filtering);
+                          dst_stride, src, dst, 0, y, dy, /*bpp=*/1, filtering);
     return;
   }
   if (dst_width <= Abs(src_width) && dst_height <= src_height) {
diff --git a/source/scale_common.cc b/source/scale_common.cc
index d54ab9423..e0c821c27 100644
--- a/source/scale_common.cc
+++ b/source/scale_common.cc
@@ -1533,6 +1533,7 @@ void ScalePlaneVertical(int src_height,
     y += dy;
   }
 }
+
 void ScalePlaneVertical_16(int src_height,
                            int dst_width,
                            int dst_height,
@@ -1543,7 +1544,7 @@ void ScalePlaneVertical_16(int src_height,
                            int x,
                            int y,
                            int dy,
-                           int wpp,
+                           int wpp, /* words per pixel. normally 1 */
                            enum FilterMode filtering) {
   // TODO(fbarchard): Allow higher wpp.
   int dst_width_words = dst_width * wpp;
@@ -1559,32 +1560,32 @@ void ScalePlaneVertical_16(int src_height,
   src_argb += (x >> 16) * wpp;
 #if defined(HAS_INTERPOLATEROW_16_SSE2)
   if (TestCpuFlag(kCpuHasSSE2)) {
-    InterpolateRow = InterpolateRow_Any_16_SSE2;
-    if (IS_ALIGNED(dst_width_bytes, 16)) {
+    InterpolateRow = InterpolateRow_16_Any_SSE2;
+    if (IS_ALIGNED(dst_width_words, 16)) {
       InterpolateRow = InterpolateRow_16_SSE2;
     }
   }
 #endif
 #if defined(HAS_INTERPOLATEROW_16_SSSE3)
   if (TestCpuFlag(kCpuHasSSSE3)) {
-    InterpolateRow = InterpolateRow_Any_16_SSSE3;
-    if (IS_ALIGNED(dst_width_bytes, 16)) {
+    InterpolateRow = InterpolateRow_16_Any_SSSE3;
+    if (IS_ALIGNED(dst_width_words, 16)) {
       InterpolateRow = InterpolateRow_16_SSSE3;
     }
   }
 #endif
 #if defined(HAS_INTERPOLATEROW_16_AVX2)
   if (TestCpuFlag(kCpuHasAVX2)) {
-    InterpolateRow = InterpolateRow_Any_16_AVX2;
-    if (IS_ALIGNED(dst_width_bytes, 32)) {
+    InterpolateRow = InterpolateRow_16_Any_AVX2;
+    if (IS_ALIGNED(dst_width_words, 32)) {
       InterpolateRow = InterpolateRow_16_AVX2;
     }
   }
 #endif
 #if defined(HAS_INTERPOLATEROW_16_NEON)
   if (TestCpuFlag(kCpuHasNEON)) {
-    InterpolateRow = InterpolateRow_Any_16_NEON;
-    if (IS_ALIGNED(dst_width_bytes, 16)) {
+    InterpolateRow = InterpolateRow_16_Any_NEON;
+    if (IS_ALIGNED(dst_width_words, 8)) {
       InterpolateRow = InterpolateRow_16_NEON;
     }
   }
@@ -1604,6 +1605,48 @@ void ScalePlaneVertical_16(int src_height,
   }
 }
 
+void ScalePlaneVertical_16To8(int src_height,
+                              int dst_width,
+                              int dst_height,
+                              int src_stride,
+                              int dst_stride,
+                              const uint16_t* src_argb,
+                              uint8_t* dst_argb,
+                              int x,
+                              int y,
+                              int dy,
+                              int wpp, /* words per pixel. normally 1 */
+                              int scale,
+                              enum FilterMode filtering) {
+  // TODO(fbarchard): Allow higher wpp.
+  int dst_width_words = dst_width * wpp;
+  // TODO(https://crbug.com/libyuv/931): Add NEON and AVX2 versions.
+  void (*InterpolateRow_16To8)(uint8_t * dst_argb, const uint16_t* src_argb,
+                               ptrdiff_t src_stride, int scale, int dst_width,
+                               int source_y_fraction) = InterpolateRow_16To8_C;
+  const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
+  int j;
+  assert(wpp >= 1 && wpp <= 2);
+  assert(src_height != 0);
+  assert(dst_width > 0);
+  assert(dst_height > 0);
+  src_argb += (x >> 16) * wpp;
+
+  for (j = 0; j < dst_height; ++j) {
+    int yi;
+    int yf;
+    if (y > max_y) {
+      y = max_y;
+    }
+    yi = y >> 16;
+    yf = filtering ? ((y >> 8) & 255) : 0;
+    InterpolateRow_16To8(dst_argb, src_argb + yi * src_stride, src_stride,
+                         scale, dst_width_words, yf);
+    dst_argb += dst_stride;
+    y += dy;
+  }
+}
+
 // Simplify the filtering based on scale factors.
 enum FilterMode ScaleFilterReduce(int src_width,
                                   int src_height,
@@ -1653,7 +1696,7 @@ int FixedDiv_C(int num, int div) {
   return (int)(((int64_t)(num) << 16) / div);
 }
 
-// Divide num by div and return as 16.16 fixed point result.
+// Divide num - 1 by div - 1 and return as 16.16 fixed point result.
 int FixedDiv1_C(int num, int div) {
   return (int)((((int64_t)(num) << 16) - 0x00010001) / (div - 1));
 }
@@ -1696,14 +1739,14 @@ void ScaleSlope(int src_width,
     if (dst_width <= Abs(src_width)) {
       *dx = FixedDiv(Abs(src_width), dst_width);
       *x = CENTERSTART(*dx, -32768);  // Subtract 0.5 (32768) to center filter.
-    } else if (dst_width > 1) {
+    } else if (src_width > 1 && dst_width > 1) {
       *dx = FixedDiv1(Abs(src_width), dst_width);
       *x = 0;
     }
     if (dst_height <= src_height) {
       *dy = FixedDiv(src_height, dst_height);
       *y = CENTERSTART(*dy, -32768);  // Subtract 0.5 (32768) to center filter.
-    } else if (dst_height > 1) {
+    } else if (src_height > 1 && dst_height > 1) {
       *dy = FixedDiv1(src_height, dst_height);
       *y = 0;
     }
@@ -1712,7 +1755,7 @@ void ScaleSlope(int src_width,
     if (dst_width <= Abs(src_width)) {
       *dx = FixedDiv(Abs(src_width), dst_width);
       *x = CENTERSTART(*dx, -32768);  // Subtract 0.5 (32768) to center filter.
-    } else if (dst_width > 1) {
+    } else if (src_width > 1 && dst_width > 1) {
       *dx = FixedDiv1(Abs(src_width), dst_width);
       *x = 0;
     }
diff --git a/unit_test/convert_test.cc b/unit_test/convert_test.cc
index 185c5aa44..c2d952200 100644
--- a/unit_test/convert_test.cc
+++ b/unit_test/convert_test.cc
@@ -178,6 +178,7 @@ TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I010, uint16_t, 2, 2, 2, 10)
 TESTPLANARTOP(I412, uint16_t, 2, 1, 1, I012, uint16_t, 2, 2, 2, 12)
 TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I012, uint16_t, 2, 2, 2, 12)
 TESTPLANARTOP(I010, uint16_t, 2, 2, 2, I420, uint8_t, 1, 2, 2, 10)
+TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I420, uint8_t, 1, 2, 2, 10)
 TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I422, uint8_t, 1, 2, 1, 10)
 TESTPLANARTOP(I410, uint16_t, 2, 1, 1, I444, uint8_t, 1, 1, 1, 10)
 TESTPLANARTOP(I012, uint16_t, 2, 2, 2, I420, uint8_t, 1, 2, 2, 12)
@@ -2270,7 +2271,8 @@ TEST_F(LibYUVConvertTest, TestMJPGToNV12_420) {
   free_aligned_buffer_page_end(dst_vu);
 }
 
-TEST_F(LibYUVConvertTest, TestMJPGToNV21_422) {
+// TODO(fbarchard): Improve test to compare against I422, not checksum
+TEST_F(LibYUVConvertTest, DISABLED_TestMJPGToNV21_422) {
   int width = 0;
   int height = 0;
   int ret = MJPGSize(kTest3Jpg, kTest3JpgLen, &width, &height);
@@ -2294,13 +2296,13 @@ TEST_F(LibYUVConvertTest, TestMJPGToNV21_422) {
   uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381);
   uint32_t dst_uv_hash = HashDjb2(dst_uv, half_width * half_height * 2, 5381);
   EXPECT_EQ(dst_y_hash, 2682851208u);
-  EXPECT_EQ(dst_uv_hash, 3543430771u);
+  EXPECT_EQ(dst_uv_hash, 493520167u);
 
   free_aligned_buffer_page_end(dst_y);
   free_aligned_buffer_page_end(dst_uv);
 }
 
-TEST_F(LibYUVConvertTest, TestMJPGToNV12_422) {
+TEST_F(LibYUVConvertTest, DISABLED_TestMJPGToNV12_422) {
   int width = 0;
   int height = 0;
   int ret = MJPGSize(kTest3Jpg, kTest3JpgLen, &width, &height);
@@ -2327,7 +2329,7 @@ TEST_F(LibYUVConvertTest, TestMJPGToNV12_422) {
               half_height);
   uint32_t dst_vu_hash = HashDjb2(dst_vu, half_width * half_height * 2, 5381);
   EXPECT_EQ(dst_y_hash, 2682851208u);
-  EXPECT_EQ(dst_vu_hash, 3543430771u);
+  EXPECT_EQ(dst_vu_hash, 493520167u);
 
   free_aligned_buffer_page_end(dst_y);
   free_aligned_buffer_page_end(dst_uv);
diff --git a/unit_test/planar_test.cc b/unit_test/planar_test.cc
index 8f5a33cb1..42166d0d9 100644
--- a/unit_test/planar_test.cc
+++ b/unit_test/planar_test.cc
@@ -1080,6 +1080,87 @@ TEST_F(LibYUVPlanarTest, TestInterpolatePlane) {
   }
 }
 
+TEST_F(LibYUVPlanarTest, TestInterpolatePlane_16) {
+  SIMD_ALIGNED(uint16_t orig_pixels_0[1280]);
+  SIMD_ALIGNED(uint16_t orig_pixels_1[1280]);
+  SIMD_ALIGNED(uint16_t interpolate_pixels[1280]);
+  memset(orig_pixels_0, 0, sizeof(orig_pixels_0));
+  memset(orig_pixels_1, 0, sizeof(orig_pixels_1));
+
+  orig_pixels_0[0] = 16u;
+  orig_pixels_0[1] = 32u;
+  orig_pixels_0[2] = 64u;
+  orig_pixels_0[3] = 128u;
+  orig_pixels_0[4] = 0u;
+  orig_pixels_0[5] = 0u;
+  orig_pixels_0[6] = 0u;
+  orig_pixels_0[7] = 255u;
+  orig_pixels_0[8] = 0u;
+  orig_pixels_0[9] = 0u;
+  orig_pixels_0[10] = 0u;
+  orig_pixels_0[11] = 0u;
+  orig_pixels_0[12] = 0u;
+  orig_pixels_0[13] = 0u;
+  orig_pixels_0[14] = 0u;
+  orig_pixels_0[15] = 0u;
+
+  orig_pixels_1[0] = 0u;
+  orig_pixels_1[1] = 0u;
+  orig_pixels_1[2] = 0u;
+  orig_pixels_1[3] = 0u;
+  orig_pixels_1[4] = 0u;
+  orig_pixels_1[5] = 0u;
+  orig_pixels_1[6] = 0u;
+  orig_pixels_1[7] = 0u;
+  orig_pixels_1[8] = 0u;
+  orig_pixels_1[9] = 0u;
+  orig_pixels_1[10] = 0u;
+  orig_pixels_1[11] = 0u;
+  orig_pixels_1[12] = 255u;
+  orig_pixels_1[13] = 255u;
+  orig_pixels_1[14] = 255u;
+  orig_pixels_1[15] = 255u;
+
+  InterpolatePlane_16(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
+                      &interpolate_pixels[0], 0, 16, 1, 128);
+  EXPECT_EQ(8u, interpolate_pixels[0]);
+  EXPECT_EQ(16u, interpolate_pixels[1]);
+  EXPECT_EQ(32u, interpolate_pixels[2]);
+  EXPECT_EQ(64u, interpolate_pixels[3]);
+  EXPECT_EQ(0u, interpolate_pixels[4]);
+  EXPECT_EQ(0u, interpolate_pixels[5]);
+  EXPECT_EQ(0u, interpolate_pixels[6]);
+  EXPECT_EQ(128u, interpolate_pixels[7]);
+  EXPECT_EQ(0u, interpolate_pixels[8]);
+  EXPECT_EQ(0u, interpolate_pixels[9]);
+  EXPECT_EQ(0u, interpolate_pixels[10]);
+  EXPECT_EQ(0u, interpolate_pixels[11]);
+  EXPECT_EQ(128u, interpolate_pixels[12]);
+  EXPECT_EQ(128u, interpolate_pixels[13]);
+  EXPECT_EQ(128u, interpolate_pixels[14]);
+  EXPECT_EQ(128u, interpolate_pixels[15]);
+
+  InterpolatePlane_16(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
+                      &interpolate_pixels[0], 0, 16, 1, 0);
+  EXPECT_EQ(16u, interpolate_pixels[0]);
+  EXPECT_EQ(32u, interpolate_pixels[1]);
+  EXPECT_EQ(64u, interpolate_pixels[2]);
+  EXPECT_EQ(128u, interpolate_pixels[3]);
+
+  InterpolatePlane_16(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
+                      &interpolate_pixels[0], 0, 16, 1, 192);
+
+  EXPECT_EQ(4u, interpolate_pixels[0]);
+  EXPECT_EQ(8u, interpolate_pixels[1]);
+  EXPECT_EQ(16u, interpolate_pixels[2]);
+  EXPECT_EQ(32u, interpolate_pixels[3]);
+
+  for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
+    InterpolatePlane_16(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
+                        &interpolate_pixels[0], 0, 1280, 1, 123);
+  }
+}
+
 #define TESTTERP(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, STRIDE_B, W1280, TERP, \
                  N, NEG, OFF)                                                 \
   TEST_F(LibYUVPlanarTest, ARGBInterpolate##TERP##N) {                        \
@@ -1484,9 +1565,43 @@ TEST_F(LibYUVPlanarTest, TestCopyPlane) {
   EXPECT_EQ(0, err);
 }
 
-TEST_F(LibYUVPlanarTest, TestCopyPlaneZeroDimensionRegressionTest) {
-  // Regression test to verify copying a rect with a zero height or width does
-  // not lead to memory corruption.
+TEST_F(LibYUVPlanarTest, CopyPlane_Opt) {
+  int i;
+  int y_plane_size = benchmark_width_ * benchmark_height_;
+  align_buffer_page_end(orig_y, y_plane_size);
+  align_buffer_page_end(dst_c, y_plane_size);
+  align_buffer_page_end(dst_opt, y_plane_size);
+
+  MemRandomize(orig_y, y_plane_size);
+  memset(dst_c, 1, y_plane_size);
+  memset(dst_opt, 2, y_plane_size);
+
+  // Disable all optimizations.
+  MaskCpuFlags(disable_cpu_flags_);
+  for (i = 0; i < benchmark_iterations_; i++) {
+    CopyPlane(orig_y, benchmark_width_, dst_c, benchmark_width_,
+              benchmark_width_, benchmark_height_);
+  }
+
+  // Enable optimizations.
+  MaskCpuFlags(benchmark_cpu_info_);
+  for (i = 0; i < benchmark_iterations_; i++) {
+    CopyPlane(orig_y, benchmark_width_, dst_opt, benchmark_width_,
+              benchmark_width_, benchmark_height_);
+  }
+
+  for (i = 0; i < y_plane_size; ++i) {
+    EXPECT_EQ(dst_c[i], dst_opt[i]);
+  }
+
+  free_aligned_buffer_page_end(orig_y);
+  free_aligned_buffer_page_end(dst_c);
+  free_aligned_buffer_page_end(dst_opt);
+}
+
+TEST_F(LibYUVPlanarTest, TestCopyPlaneZero) {
+  // Test to verify copying a rect with a zero height or width does
+  // not touch destination memory.
   uint8_t src = 42;
   uint8_t dst = 0;
 
@@ -3509,8 +3624,8 @@ TEST_F(LibYUVPlanarTest, YUY2ToY) {
   memset(dst_pixels_y_c, 1, kPixels);
 
   MaskCpuFlags(disable_cpu_flags_);
-  YUY2ToY(src_pixels_y, benchmark_width_ * 2, dst_pixels_y_c,
-          benchmark_width_, benchmark_width_, benchmark_height_);
+  YUY2ToY(src_pixels_y, benchmark_width_ * 2, dst_pixels_y_c, benchmark_width_,
+          benchmark_width_, benchmark_height_);
   MaskCpuFlags(benchmark_cpu_info_);
 
   for (int i = 0; i < benchmark_iterations_; ++i) {
@@ -3538,8 +3653,8 @@ TEST_F(LibYUVPlanarTest, UYVYToY) {
   memset(dst_pixels_y_c, 1, kPixels);
 
   MaskCpuFlags(disable_cpu_flags_);
-  UYVYToY(src_pixels_y, benchmark_width_ * 2, dst_pixels_y_c,
-          benchmark_width_, benchmark_width_, benchmark_height_);
+  UYVYToY(src_pixels_y, benchmark_width_ * 2, dst_pixels_y_c, benchmark_width_,
+          benchmark_width_, benchmark_height_);
   MaskCpuFlags(benchmark_cpu_info_);
 
   for (int i = 0; i < benchmark_iterations_; ++i) {
diff --git a/unit_test/scale_test.cc b/unit_test/scale_test.cc
index 1fb3b2f0a..81c839f41 100644
--- a/unit_test/scale_test.cc
+++ b/unit_test/scale_test.cc
@@ -1545,4 +1545,57 @@ TEST_F(LibYUVScaleTest, PlaneTestRotate_Box) {
   free_aligned_buffer_page_end(orig_pixels);
 }
 
+TEST_F(LibYUVScaleTest, PlaneTest1_Box) {
+  align_buffer_page_end(orig_pixels, 3);
+  align_buffer_page_end(dst_pixels, 3);
+
+  // Pad the 1x1 byte image with invalid values before and after in case libyuv
+  // reads outside the memory boundaries.
+  orig_pixels[0] = 0;
+  orig_pixels[1] = 1;  // scale this pixel
+  orig_pixels[2] = 2;
+  dst_pixels[0] = 3;
+  dst_pixels[1] = 3;
+  dst_pixels[2] = 3;
+
+  libyuv::ScalePlane(orig_pixels + 1, /* src_stride= */ 1, /* src_width= */ 1,
+                     /* src_height= */ 1, dst_pixels, /* dst_stride= */ 1,
+                     /* dst_width= */ 1, /* dst_height= */ 2,
+                     libyuv::kFilterBox);
+
+  EXPECT_EQ(dst_pixels[0], 1);
+  EXPECT_EQ(dst_pixels[1], 1);
+  EXPECT_EQ(dst_pixels[2], 3);
+
+  free_aligned_buffer_page_end(dst_pixels);
+  free_aligned_buffer_page_end(orig_pixels);
+}
+
+TEST_F(LibYUVScaleTest, PlaneTest1_16_Box) {
+  align_buffer_page_end(orig_pixels_alloc, 3 * 2);
+  align_buffer_page_end(dst_pixels_alloc, 3 * 2);
+  uint16_t* orig_pixels = (uint16_t*)orig_pixels_alloc;
+  uint16_t* dst_pixels = (uint16_t*)dst_pixels_alloc;
+
+  // Pad the 1x1 byte image with invalid values before and after in case libyuv
+  // reads outside the memory boundaries.
+  orig_pixels[0] = 0;
+  orig_pixels[1] = 1;  // scale this pixel
+  orig_pixels[2] = 2;
+  dst_pixels[0] = 3;
+  dst_pixels[1] = 3;
+  dst_pixels[2] = 3;
+
+  libyuv::ScalePlane_16(
+      orig_pixels + 1, /* src_stride= */ 1, /* src_width= */ 1,
+      /* src_height= */ 1, dst_pixels, /* dst_stride= */ 1,
+      /* dst_width= */ 1, /* dst_height= */ 2, libyuv::kFilterNone);
+
+  EXPECT_EQ(dst_pixels[0], 1);
+  EXPECT_EQ(dst_pixels[1], 1);
+  EXPECT_EQ(dst_pixels[2], 3);
+
+  free_aligned_buffer_page_end(dst_pixels_alloc);
+  free_aligned_buffer_page_end(orig_pixels_alloc);
+}
 }  // namespace libyuv