diff --git a/include/libyuv/row.h b/include/libyuv/row.h
index aa2c69372..5645a73f3 100644
--- a/include/libyuv/row.h
+++ b/include/libyuv/row.h
@@ -527,6 +527,7 @@ extern "C" {
 #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
 #define HAS_ARGBTOAR30ROW_NEON
 #define HAS_ABGRTOAR30ROW_NEON
+#define HAS_CONVERT8TO16ROW_NEON
 #define HAS_I210ALPHATOARGBROW_NEON
 #define HAS_I410ALPHATOARGBROW_NEON
 #define HAS_I210TOARGBROW_NEON
@@ -3786,6 +3787,14 @@ void Convert8To16Row_Any_AVX2(const uint8_t* src_ptr,
                               uint16_t* dst_ptr,
                               int scale,
                               int width);
+void Convert8To16Row_NEON(const uint8_t* src_y,
+                          uint16_t* dst_y,
+                          int scale,
+                          int width);
+void Convert8To16Row_Any_NEON(const uint8_t* src_y,
+                              uint16_t* dst_y,
+                              int scale,
+                              int width);
 
 void Convert16To8Row_C(const uint16_t* src_y,
                        uint8_t* dst_y,
diff --git a/source/planar_functions.cc b/source/planar_functions.cc
index c2d4b67a4..7c2785cf2 100644
--- a/source/planar_functions.cc
+++ b/source/planar_functions.cc
@@ -225,6 +225,14 @@ void Convert8To16Plane(const uint8_t* src_y,
     }
   }
 #endif
+#if defined(HAS_CONVERT8TO16ROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON)) {
+    Convert8To16Row = Convert8To16Row_Any_NEON;
+    if (IS_ALIGNED(width, 16)) {
+      Convert8To16Row = Convert8To16Row_NEON;
+    }
+  }
+#endif
 
   // Convert plane
   for (y = 0; y < height; ++y) {
diff --git a/source/row_any.cc b/source/row_any.cc
index 5dac7a9c7..4c19d792a 100644
--- a/source/row_any.cc
+++ b/source/row_any.cc
@@ -1757,6 +1757,15 @@ ANY11C(Convert8To16Row_Any_AVX2,
        uint16_t,
        31)
 #endif
+#ifdef HAS_CONVERT8TO16ROW_NEON
+ANY11C(Convert8To16Row_Any_NEON,
+       Convert8To16Row_NEON,
+       1,
+       2,
+       uint8_t,
+       uint16_t,
+       15)
+#endif
 #ifdef HAS_MULTIPLYROW_16_AVX2
 ANY11C(MultiplyRow_16_Any_AVX2,
        MultiplyRow_16_AVX2,
diff --git a/source/row_neon64.cc b/source/row_neon64.cc
index f14061dec..0fe54c830 100644
--- a/source/row_neon64.cc
+++ b/source/row_neon64.cc
@@ -5582,6 +5582,34 @@ void Convert8To8Row_NEON(const uint8_t* src_y,
       : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5");
 }
 
+// Use scale to convert lsb formats to msb, depending how many bits there are:
+// 1024 = 10 bits
+void Convert8To16Row_NEON(const uint8_t* src_y,
+                          uint16_t* dst_y,
+                          int scale,
+                          int width) {
+  // (src * 0x0101 * scale) >> 16.
+  // Since scale is a power of two, compute the shift to use to avoid needing
+  // to widen to int32.
+  int shift = 15 - __builtin_clz(scale);
+  asm volatile(
+      "dup    v2.8h, %w[shift]                 \n"
+      "1:                                      \n"
+      "ldr    q0, [%[src]], #16                \n"
+      "zip2   v1.16b, v0.16b, v0.16b           \n"
+      "zip1   v0.16b, v0.16b, v0.16b           \n"
+      "subs   %w[width], %w[width], #16        \n"
+      "ushl   v1.8h, v1.8h, v2.8h              \n"
+      "ushl   v0.8h, v0.8h, v2.8h              \n"
+      "stp    q0, q1, [%[dst]], #32            \n"
+      "b.ne   1b                               \n"
+      : [src] "+r"(src_y),   // %[src]
+        [dst] "+r"(dst_y),   // %[dst]
+        [width] "+r"(width)  // %[width]
+      : [shift] "r"(shift)   // %[shift]
+      : "cc", "memory", "v0", "v1", "v2");
+}
+
 #endif  // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
 
 #ifdef __cplusplus