diff --git a/include/libyuv/row.h b/include/libyuv/row.h
index 0b1e01744..5ccf94d9d 100644
--- a/include/libyuv/row.h
+++ b/include/libyuv/row.h
@@ -271,7 +271,7 @@ extern "C" {
 #define HAS_I422TOARGBROW_SSSE3
 #endif
 
-// The following are available forr gcc/clang x86 platforms:
+// The following are available for gcc/clang x86 platforms:
 // TODO(fbarchard): Port to Visual C
 #if !defined(LIBYUV_DISABLE_X86) && \
     (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
@@ -279,6 +279,14 @@ extern "C" {
 #define HAS_SPLITRGBROW_SSSE3
 #endif
 
+// The following are available for AVX2 gcc/clang x86 platforms:
+// TODO(fbarchard): Port to Visual C
+#if !defined(LIBYUV_DISABLE_X86) && \
+    (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER))) && \
+    (defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
+#define HAS_MERGEUV10ROW_AVX2
+#endif
+    
 // The following are available on Neon platforms:
 #if !defined(LIBYUV_DISABLE_NEON) && \
     (defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON))
@@ -1523,6 +1531,15 @@ void MergeRGBRow_Any_NEON(const uint8* src_r,
                           uint8* dst_rgb,
                           int width);
 
+void MergeUV10Row_C(const uint16* src_u,
+                    const uint16* src_v,
+                    uint16* dst_uv,
+                    int width);
+void MergeUV10Row_AVX2(const uint16* src_u,
+                       const uint16* src_v,
+                       uint16* dst_uv,
+                       int width);
+
 void CopyRow_SSE2(const uint8* src, uint8* dst, int count);
 void CopyRow_AVX(const uint8* src, uint8* dst, int count);
 void CopyRow_ERMS(const uint8* src, uint8* dst, int count);
diff --git a/source/row_common.cc b/source/row_common.cc
index 2d01a789b..c3294ece5 100644
--- a/source/row_common.cc
+++ b/source/row_common.cc
@@ -1798,6 +1798,24 @@ void MergeRGBRow_C(const uint8* src_r,
   }
 }
 
+void MergeUV10Row_C(const uint16* src_u,
+                    const uint16* src_v,
+                    uint16* dst_uv,
+                    int width) {
+  int x;
+  for (x = 0; x < width - 1; x += 2) {
+    dst_uv[0] = src_u[x] << 6;
+    dst_uv[1] = src_v[x] << 6;
+    dst_uv[2] = src_u[x + 1] << 6;
+    dst_uv[3] = src_v[x + 1] << 6;
+    dst_uv += 4;
+  }
+  if (width & 1) {
+    dst_uv[0] = src_u[width - 1] << 6;
+    dst_uv[1] = src_v[width - 1] << 6;
+  }
+}
+
 void CopyRow_C(const uint8* src, uint8* dst, int count) {
   memcpy(dst, src, count);
 }
diff --git a/source/row_gcc.cc b/source/row_gcc.cc
index 86f0880be..3af320454 100644
--- a/source/row_gcc.cc
+++ b/source/row_gcc.cc
@@ -2753,6 +2753,48 @@ void MergeUVRow_SSE2(const uint8* src_u,
 }
 #endif  // HAS_MERGEUVROW_SSE2
 
+#ifdef HAS_MERGEUV10ROW_AVX2
+void MergeUV10Row_AVX2(const uint16* src_u,
+                       const uint16* src_v,
+                       uint16* dst_uv,
+                       int width) {
+  asm volatile (
+    "sub       %0,%1                           \n"
+
+    LABELALIGN
+    "1:                                        \n"
+    "vmovdqu   (%0),%%ymm0                     \n"
+    "vmovdqu   (%0,%1,1),%%ymm1                \n"
+    "add        $0x20,%0                       \n"
+    "vpsllw    $0x6,%%ymm0,%%ymm0              \n"
+    "vpsllw    $0x6,%%ymm1,%%ymm1              \n"
+//    "vpermq     $0xd8,%%ymm0,%%ymm0            \n"
+//    "vpermq     $0xd8,%%ymm1,%%ymm1            \n"
+    "vpunpcklwd %%ymm1,%%ymm0,%%ymm2           \n"
+    "vpunpckhwd %%ymm1,%%ymm0,%%ymm0           \n"
+
+//    "vmovdqu   %%ymm2, (%2)                    \n"
+//    "vmovdqu   %%ymm0, 0x20(%2)                \n"
+
+    "vextractf128 $0x0,%%ymm2,(%2)             \n"
+    "vextractf128 $0x0,%%ymm0,0x10(%2)         \n"
+    "vextractf128 $0x1,%%ymm2,0x20(%2)         \n"
+    "vextractf128 $0x1,%%ymm0,0x30(%2)         \n"
+    "add       $0x40,%2                        \n"
+    "sub       $0x10,%3                        \n"
+    "jg        1b                              \n"
+    "vzeroupper                                \n"
+  : "+r"(src_u),     // %0
+    "+r"(src_v),     // %1
+    "+r"(dst_uv),    // %2
+    "+r"(width)      // %3
+  :
+  : "memory", "cc", "xmm0", "xmm1", "xmm2"
+  );
+}
+#endif  // HAS_MERGEUVROW_AVX2
+
+
 #ifdef HAS_SPLITRGBROW_SSSE3
 
 // Shuffle table for converting RGB to Planar.
diff --git a/unit_test/planar_test.cc b/unit_test/planar_test.cc
index 04591fbcf..1cbd13f8b 100644
--- a/unit_test/planar_test.cc
+++ b/unit_test/planar_test.cc
@@ -2617,6 +2617,48 @@ TEST_F(LibYUVPlanarTest, SplitRGBPlane_Opt) {
   free_aligned_buffer_page_end(dst_pixels_c);
 }
 
+// TODO(fbarchard): improve test for platforms and cpu detect
+#ifdef HAS_MERGEUV10ROW_AVX2
+TEST_F(LibYUVPlanarTest, MergeUV10Row_Opt) {
+  const int kPixels = benchmark_width_ * benchmark_height_;
+  align_buffer_page_end(src_pixels_u, kPixels * 2);
+  align_buffer_page_end(src_pixels_v, kPixels * 2);
+  align_buffer_page_end(dst_pixels_uv_opt, kPixels * 2 * 2);
+  align_buffer_page_end(dst_pixels_uv_c, kPixels * 2 * 2);
+
+  MemRandomize(src_pixels_u, kPixels * 2);
+  MemRandomize(src_pixels_v, kPixels * 2);
+  memset(dst_pixels_uv_opt, 0, kPixels * 2 * 2);
+  memset(dst_pixels_uv_c, 1, kPixels * 2 * 2);
+
+  MergeUV10Row_C(reinterpret_cast<const uint16*>(src_pixels_u),
+                 reinterpret_cast<const uint16*>(src_pixels_v),
+                 reinterpret_cast<uint16*>(dst_pixels_uv_c), kPixels);
+
+  int has_avx2 = TestCpuFlag(kCpuHasAVX2);
+  for (int i = 0; i < benchmark_iterations_; ++i) {
+    if (has_avx2) {
+      MergeUV10Row_AVX2(reinterpret_cast<const uint16*>(src_pixels_u),
+                        reinterpret_cast<const uint16*>(src_pixels_v),
+                        reinterpret_cast<uint16*>(dst_pixels_uv_opt), kPixels);
+    } else {
+      MergeUV10Row_C(reinterpret_cast<const uint16*>(src_pixels_u),
+                     reinterpret_cast<const uint16*>(src_pixels_v),
+                     reinterpret_cast<uint16*>(dst_pixels_uv_opt), kPixels);
+    }
+  }
+
+  for (int i = 0; i < kPixels * 2 * 2; ++i) {
+    EXPECT_EQ(dst_pixels_uv_opt[i], dst_pixels_uv_c[i]);
+  }
+
+  free_aligned_buffer_page_end(src_pixels_u);
+  free_aligned_buffer_page_end(src_pixels_v);
+  free_aligned_buffer_page_end(dst_pixels_uv_opt);
+  free_aligned_buffer_page_end(dst_pixels_uv_c);
+}
+#endif
+
 float TestScaleMaxSamples(int benchmark_width,
                           int benchmark_height,
                           int benchmark_iterations,