diff --git a/README.chromium b/README.chromium
index 565a88ed9..40188988c 100644
--- a/README.chromium
+++ b/README.chromium
@@ -1,6 +1,6 @@
 Name: libyuv
 URL: http://code.google.com/p/libyuv/
-Version: 146
+Version: 147
 License: BSD
 License File: LICENSE
 
diff --git a/source/planar_functions.cc b/source/planar_functions.cc
index 35b250477..4051e7e54 100644
--- a/source/planar_functions.cc
+++ b/source/planar_functions.cc
@@ -278,6 +278,74 @@ int I420Copy(const uint8* src_y, int src_stride_y,
   return 0;
 }
 
+// Mirror a plane of data
+void MirrorPlane(const uint8* src_y, int src_stride_y,
+                 uint8* dst_y, int dst_stride_y,
+                 int width, int height) {
+  void (*MirrorRow)(const uint8* src, uint8* dst, int width);
+#if defined(HAS_MIRRORROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
+    MirrorRow = MirrorRow_NEON;
+  } else
+#endif
+#if defined(HAS_MIRRORROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) &&
+      IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16)) {
+    MirrorRow = MirrorRow_SSSE3;
+  } else
+#endif
+#if defined(HAS_MIRRORROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16)) {
+    MirrorRow = MirrorRow_SSE2;
+  } else
+#endif
+  {
+    MirrorRow = MirrorRow_C;
+  }
+
+  // Mirror plane
+  for (int y = 0; y < height; ++y) {
+    MirrorRow(src_y, dst_y, width);
+    src_y += src_stride_y;
+    dst_y += dst_stride_y;
+  }
+}
+
+// Mirror I420 with optional flipping
+int I420Mirror(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height) {
+  if (!src_y || !src_u || !src_v ||
+      !dst_y || !dst_u || !dst_v ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    int halfheight = (height + 1) >> 1;
+    src_y = src_y + (height - 1) * src_stride_y;
+    src_u = src_u + (halfheight - 1) * src_stride_u;
+    src_v = src_v + (halfheight - 1) * src_stride_v;
+    src_stride_y = -src_stride_y;
+    src_stride_u = -src_stride_u;
+    src_stride_v = -src_stride_v;
+  }
+
+  int halfwidth = (width + 1) >> 1;
+  int halfheight = (height + 1) >> 1;
+  if (dst_y) {
+    MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+  }
+  MirrorPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight);
+  MirrorPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight);
+  return 0;
+}
+
 // Copy ARGB with optional flipping
 int ARGBCopy(const uint8* src_argb, int src_stride_argb,
              uint8* dst_argb, int dst_stride_argb,
@@ -299,89 +367,6 @@ int ARGBCopy(const uint8* src_argb, int src_stride_argb,
   return 0;
 }
 
-int I420Mirror(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height) {
-  if (!src_y || !src_u || !src_v ||
-      !dst_y || !dst_u || !dst_v ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  int halfwidth = (width + 1) >> 1;
-  int halfheight = (height + 1) >> 1;
-
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    halfheight = (height + 1) >> 1;
-    src_y = src_y + (height - 1) * src_stride_y;
-    src_u = src_u + (halfheight - 1) * src_stride_u;
-    src_v = src_v + (halfheight - 1) * src_stride_v;
-    src_stride_y = -src_stride_y;
-    src_stride_u = -src_stride_u;
-    src_stride_v = -src_stride_v;
-  }
-  void (*ReverseRow)(const uint8* src, uint8* dst, int width);
-#if defined(HAS_REVERSE_ROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) &&
-      IS_ALIGNED(width, 32)) {
-    ReverseRow = ReverseRow_NEON;
-  } else
-#endif
-#if defined(HAS_REVERSE_ROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) &&
-      IS_ALIGNED(width, 32) &&
-      IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
-      IS_ALIGNED(src_u, 16) && IS_ALIGNED(src_stride_u, 16) &&
-      IS_ALIGNED(src_v, 16) && IS_ALIGNED(src_stride_v, 16) &&
-      IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16) &&
-      IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) &&
-      IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) {
-    ReverseRow = ReverseRow_SSSE3;
-  } else
-#endif
-#if defined(HAS_REVERSE_ROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) &&
-      IS_ALIGNED(width, 32) &&
-      IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
-      IS_ALIGNED(src_u, 16) && IS_ALIGNED(src_stride_u, 16) &&
-      IS_ALIGNED(src_v, 16) && IS_ALIGNED(src_stride_v, 16) &&
-      IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16) &&
-      IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) &&
-      IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) {
-    ReverseRow = ReverseRow_SSE2;
-  } else
-#endif
-  {
-    ReverseRow = ReverseRow_C;
-  }
-
-  // Y Plane
-  int y;
-  for (y = 0; y < height; ++y) {
-    ReverseRow(src_y, dst_y, width);
-    src_y += src_stride_y;
-    dst_y += dst_stride_y;
-  }
-  // U Plane
-  for (y = 0; y < halfheight; ++y) {
-    ReverseRow(src_u, dst_u, halfwidth);
-    src_u += src_stride_u;
-    dst_u += dst_stride_u;
-  }
-  // V Plane
-  for (y = 0; y < halfheight; ++y) {
-    ReverseRow(src_v, dst_v, halfwidth);
-    src_v += src_stride_v;
-    dst_v += dst_stride_v;
-  }
-  return 0;
-}
-
 #if defined(_M_IX86) && !defined(YUV_DISABLE_ASM)
 #define HAS_HALFROW_SSE2
 __declspec(naked)
diff --git a/source/rotate.cc b/source/rotate.cc
index b44fab80a..8f54ae1e1 100644
--- a/source/rotate.cc
+++ b/source/rotate.cc
@@ -24,7 +24,7 @@ extern "C" {
     !defined(YUV_DISABLE_ASM)
 // Note static const preferred, but gives internal compiler error on gcc 4.2
 // Shuffle table for reversing the bytes of UV channels.
-uvec8 kShuffleReverseUV = {
+uvec8 kShuffleMirrorUV = {
   14u, 12u, 10u, 8u, 6u, 4u, 2u, 0u, 15u, 13u, 11u, 9u, 7u, 5u, 3u, 1u
 };
 
@@ -47,7 +47,7 @@ uvec8 kShuffleReverseUV = {
 #endif
 #endif
 
-typedef void (*reverse_uv_func)(const uint8*, uint8*, uint8*, int);
+typedef void (*mirror_uv_func)(const uint8*, uint8*, uint8*, int);
 typedef void (*rotate_uv_wx8_func)(const uint8*, int,
                                    uint8*, int,
                                    uint8*, int, int);
@@ -58,10 +58,10 @@ typedef void (*rotate_wx8_func)(const uint8*, int, uint8*, int, int);
 typedef void (*rotate_wxh_func)(const uint8*, int, uint8*, int, int, int);
 
 #ifdef __ARM_NEON__
-#define HAS_REVERSE_ROW_NEON
-void ReverseRow_NEON(const uint8* src, uint8* dst, int width);
-#define HAS_REVERSE_ROW_UV_NEON
-void ReverseRowUV_NEON(const uint8* src,
+#define HAS_MIRRORROW_NEON
+void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
+#define HAS_MIRRORROW_UV_NEON
+void MirrorRowUV_NEON(const uint8* src,
                         uint8* dst_a, uint8* dst_b,
                         int width);
 #define HAS_TRANSPOSE_WX8_NEON
@@ -852,37 +852,37 @@ void RotatePlane270(const uint8* src, int src_stride,
 void RotatePlane180(const uint8* src, int src_stride,
                     uint8* dst, int dst_stride,
                     int width, int height) {
-  void (*ReverseRow)(const uint8* src, uint8* dst, int width);
-#if defined(HAS_REVERSE_ROW_NEON)
+  void (*MirrorRow)(const uint8* src, uint8* dst, int width);
+#if defined(HAS_MIRRORROW_NEON)
   if (TestCpuFlag(kCpuHasNEON)) {
-    ReverseRow = ReverseRow_NEON;
+    MirrorRow = MirrorRow_NEON;
   } else
 #endif
-#if defined(HAS_REVERSE_ROW_SSSE3)
+#if defined(HAS_MIRRORROW_SSSE3)
   if (TestCpuFlag(kCpuHasSSSE3) &&
       IS_ALIGNED(width, 16) &&
       IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
       IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
-    ReverseRow = ReverseRow_SSSE3;
+    MirrorRow = MirrorRow_SSSE3;
   } else
 #endif
-#if defined(HAS_REVERSE_ROW_SSE2)
+#if defined(HAS_MIRRORROW_SSE2)
   if (TestCpuFlag(kCpuHasSSE2) &&
       IS_ALIGNED(width, 16) &&
       IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
       IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
-    ReverseRow = ReverseRow_SSE2;
+    MirrorRow = MirrorRow_SSE2;
   } else
 #endif
   {
-    ReverseRow = ReverseRow_C;
+    MirrorRow = MirrorRow_C;
   }
 
   // Rotate by 180 is a mirror and vertical flip
   src += src_stride * (height - 1);
 
   for (int y = 0; y < height; ++y) {
-    ReverseRow(src, dst, width);
+    MirrorRow(src, dst, width);
     src -= src_stride;
     dst += dst_stride;
   }
@@ -1004,9 +1004,9 @@ void RotateUV270(const uint8* src, int src_stride,
 }
 
 #if defined(_M_IX86) && !defined(YUV_DISABLE_ASM)
-#define HAS_REVERSE_ROW_UV_SSSE3
+#define HAS_MIRRORROW_UV_SSSE3
 __declspec(naked)
-void ReverseRowUV_SSSE3(const uint8* src,
+void MirrorRowUV_SSSE3(const uint8* src,
                          uint8* dst_a, uint8* dst_b,
                          int width) {
 __asm {
@@ -1015,7 +1015,7 @@ __asm {
     mov       edx, [esp + 4 + 8]   // dst_a
     mov       edi, [esp + 4 + 12]  // dst_b
     mov       ecx, [esp + 4 + 16]  // width
-    movdqa    xmm5, kShuffleReverseUV
+    movdqa    xmm5, kShuffleMirrorUV
     lea       eax, [eax + ecx * 2 - 16]
 
  convertloop:
@@ -1035,8 +1035,8 @@ __asm {
 
 #elif (defined(__i386__) || defined(__x86_64__)) && \
     !defined(YUV_DISABLE_ASM)
-#define HAS_REVERSE_ROW_UV_SSSE3
-void ReverseRowUV_SSSE3(const uint8* src,
+#define HAS_MIRRORROW_UV_SSSE3
+void MirrorRowUV_SSSE3(const uint8* src,
                         uint8* dst_a, uint8* dst_b,
                         int width) {
   intptr_t temp_width = static_cast<intptr_t>(width);
@@ -1057,7 +1057,7 @@ void ReverseRowUV_SSSE3(const uint8* src,
     "+r"(dst_a),    // %1
     "+r"(dst_b),    // %2
     "+r"(temp_width)  // %3
-  : "m"(kShuffleReverseUV) // %4
+  : "m"(kShuffleMirrorUV) // %4
   : "memory", "cc"
 #if defined(__SSE2__)
     , "xmm0", "xmm5"
@@ -1066,7 +1066,7 @@ void ReverseRowUV_SSSE3(const uint8* src,
 }
 #endif
 
-static void ReverseRowUV_C(const uint8* src,
+static void MirrorRowUV_C(const uint8* src,
                             uint8* dst_a, uint8* dst_b,
                             int width) {
   int i;
@@ -1083,29 +1083,29 @@ void RotateUV180(const uint8* src, int src_stride,
                  uint8* dst_b, int dst_stride_b,
                  int width, int height) {
   int i;
-  reverse_uv_func ReverseRow;
+  mirror_uv_func MirrorRow;
 
-#if defined(HAS_REVERSE_ROW_UV_NEON)
+#if defined(HAS_MIRRORROW_UV_NEON)
   if (TestCpuFlag(kCpuHasNEON)) {
-    ReverseRow = ReverseRowUV_NEON;
+    MirrorRow = MirrorRowUV_NEON;
   } else
 #endif
-#if defined(HAS_REVERSE_ROW_UV_SSSE3)
+#if defined(HAS_MIRRORROW_UV_SSSE3)
   if (TestCpuFlag(kCpuHasSSSE3) &&
       IS_ALIGNED(width, 16) &&
       IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) {
-    ReverseRow = ReverseRowUV_SSSE3;
+    MirrorRow = MirrorRowUV_SSSE3;
   } else
 #endif
   {
-    ReverseRow = ReverseRowUV_C;
+    MirrorRow = MirrorRowUV_C;
   }
 
   dst_a += dst_stride_a * (height - 1);
   dst_b += dst_stride_b * (height - 1);
 
   for (i = 0; i < height; ++i) {
-    ReverseRow(src, dst_a, dst_b, width);
+    MirrorRow(src, dst_a, dst_b, width);
 
     src   += src_stride;      // down one line at a time
     dst_a -= dst_stride_a;    // nominally up one line at a time
diff --git a/source/rotate_neon.cc b/source/rotate_neon.cc
index c40e1c33e..449476364 100644
--- a/source/rotate_neon.cc
+++ b/source/rotate_neon.cc
@@ -19,7 +19,7 @@ extern "C" {
 
 #if defined(__ARM_NEON__) && !defined(YUV_DISABLE_ASM)
 
-void ReverseRow_NEON(const uint8* src, uint8* dst, int width) {
+void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
   asm volatile (
     // compute where to start writing destination
     "add         %1, %2                        \n"
@@ -38,7 +38,7 @@ void ReverseRow_NEON(const uint8* src, uint8* dst, int width) {
     "beq         2f                            \n"
 
     // back of destination by the size of the register that is
-    // going to be reversed
+    // going to be mirrord
     "sub         %1, #16                       \n"
 
     // the loop needs to run on blocks of 16.  what will be left
@@ -50,12 +50,12 @@ void ReverseRow_NEON(const uint8* src, uint8* dst, int width) {
     "1:                                        \n"
       "vld1.8      {q0}, [%0]!                 \n"  // src += 16
 
-        // reverse the bytes in the 64 bit segments.  unable to reverse
+        // mirror the bytes in the 64 bit segments.  unable to mirror
         // the bytes in the entire 128 bits in one go.
       "vrev64.8    q0, q0                      \n"
 
-        // because of the inability to reverse the entire 128 bits
-        // reverse the writing out of the two 64 bit segments.
+        // because of the inability to mirror the entire 128 bits
+        // mirror the writing out of the two 64 bit segments.
       "vst1.8      {d1}, [%1]!                 \n"
       "vst1.8      {d0}, [%1], r3              \n"  // dst -= 16
 
@@ -272,7 +272,7 @@ void TransposeWx8_NEON(const uint8* src, int src_stride,
   );
 }
 
-void ReverseRowUV_NEON(const uint8* src,
+void MirrorRowUV_NEON(const uint8* src,
                        uint8* dst_a, uint8* dst_b,
                        int width) {
   asm volatile (
@@ -291,7 +291,7 @@ void ReverseRowUV_NEON(const uint8* src,
     "mov         r12, #-8                      \n"
 
     // back of destination by the size of the register that is
-    // going to be reversed
+    // going to be mirrord
     "sub         %1, #8                        \n"
     "sub         %2, #8                        \n"
 
@@ -304,7 +304,7 @@ void ReverseRowUV_NEON(const uint8* src,
     "1:                                        \n"
       "vld2.8      {d0, d1}, [%0]!             \n"  // src += 16
 
-      // reverse the bytes in the 64 bit segments
+      // mirror the bytes in the 64 bit segments
       "vrev64.8    q0, q0                      \n"
 
       "vst1.8      {d0}, [%1], r12             \n"  // dst_a -= 8
diff --git a/source/row.h b/source/row.h
index ea0e55e6b..723c59388 100644
--- a/source/row.h
+++ b/source/row.h
@@ -39,8 +39,8 @@
 #define HAS_FASTCONVERTYUVTOBGRAROW_SSSE3
 #define HAS_FASTCONVERTYUVTOABGRROW_SSSE3
 #define HAS_FASTCONVERTYUV444TOARGBROW_SSSE3
-#define HAS_REVERSE_ROW_SSSE3
-#define HAS_REVERSE_ROW_SSE2
+#define HAS_MIRRORROW_SSSE3
+#define HAS_MIRRORROW_SSE2
 #endif
 
 // The following are available on Windows platforms
@@ -58,7 +58,7 @@
 
 // The following are available on Neon platforms
 #if defined(__ARM_NEON__) && !defined(YUV_DISABLE_ASM)
-#define HAS_REVERSE_ROW_NEON
+#define HAS_MIRRORROW_NEON
 #define HAS_FASTCONVERTYUVTOARGBROW_NEON
 #define HAS_FASTCONVERTYUVTOBGRAROW_NEON
 #define HAS_FASTCONVERTYUVTOABGRROW_NEON
@@ -107,10 +107,10 @@ void BGRAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
 void ABGRToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
                        uint8* dst_u, uint8* dst_v, int width);
 
-void ReverseRow_SSSE3(const uint8* src, uint8* dst, int width);
-void ReverseRow_SSE2(const uint8* src, uint8* dst, int width);
-void ReverseRow_NEON(const uint8* src, uint8* dst, int width);
-void ReverseRow_C(const uint8* src, uint8* dst, int width);
+void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width);
+void MirrorRow_SSE2(const uint8* src, uint8* dst, int width);
+void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
+void MirrorRow_C(const uint8* src, uint8* dst, int width);
 
 void ARGBToYRow_C(const uint8* src_argb, uint8* dst_y, int pix);
 void BGRAToYRow_C(const uint8* src_argb, uint8* dst_y, int pix);
diff --git a/source/row_common.cc b/source/row_common.cc
index cb1e8b0d7..e35213732 100644
--- a/source/row_common.cc
+++ b/source/row_common.cc
@@ -360,7 +360,7 @@ void FastConvertYToARGBRow_C(const uint8* y_buf,
   }
 }
 
-void ReverseRow_C(const uint8* src, uint8* dst, int width) {
+void MirrorRow_C(const uint8* src, uint8* dst, int width) {
   src += width - 1;
   for (int i = 0; i < width; ++i) {
     dst[i] = src[0];
diff --git a/source/row_posix.cc b/source/row_posix.cc
index 984281be3..a79277268 100644
--- a/source/row_posix.cc
+++ b/source/row_posix.cc
@@ -644,14 +644,14 @@ void BGRAToUVRow_SSSE3(const uint8* src_argb, int src_stride_argb,
 }
 #endif
 
-#ifdef HAS_REVERSE_ROW_SSSE3
+#ifdef HAS_MIRRORROW_SSSE3
 
 // Shuffle table for reversing the bytes.
-CONST uvec8 kShuffleReverse = {
+CONST uvec8 kShuffleMirror = {
   15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u
 };
 
-void ReverseRow_SSSE3(const uint8* src, uint8* dst, int width) {
+void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) {
   intptr_t temp_width = static_cast<intptr_t>(width);
   asm volatile (
   "movdqa     %3,%%xmm5                        \n"
@@ -666,7 +666,7 @@ void ReverseRow_SSSE3(const uint8* src, uint8* dst, int width) {
   : "+r"(src),  // %0
     "+r"(dst),  // %1
     "+r"(temp_width)  // %2
-  : "m"(kShuffleReverse) // %3
+  : "m"(kShuffleMirror) // %3
   : "memory", "cc"
 #if defined(__SSE2__)
     , "xmm0", "xmm5"
@@ -675,15 +675,15 @@ void ReverseRow_SSSE3(const uint8* src, uint8* dst, int width) {
 }
 #endif
 
-#ifdef HAS_REVERSE_ROW_SSE2
+#ifdef HAS_MIRRORROW_SSE2
 
-void ReverseRow_SSE2(const uint8* src, uint8* dst, int width) {
+void MirrorRow_SSE2(const uint8* src, uint8* dst, int width) {
   intptr_t temp_width = static_cast<intptr_t>(width);
   asm volatile (
   "lea        -0x10(%0),%0                     \n"
   "1:                                          \n"
-    "movdqa     (%0,%2),%%xmm0                 \n"
-    "movdqa     %%xmm0,%%xmm1                  \n"
+    "movdqu     (%0,%2),%%xmm0                 \n"
+    "movdqu     %%xmm0,%%xmm1                  \n"
     "psllw      $0x8,%%xmm0                    \n"
     "psrlw      $0x8,%%xmm1                    \n"
     "por        %%xmm1,%%xmm0                  \n"
@@ -691,7 +691,7 @@ void ReverseRow_SSE2(const uint8* src, uint8* dst, int width) {
     "pshufhw    $0x1b,%%xmm0,%%xmm0            \n"
     "pshufd     $0x4e,%%xmm0,%%xmm0            \n"
     "sub        $0x10,%2                       \n"
-    "movdqa     %%xmm0,(%1)                    \n"
+    "movdqu     %%xmm0,(%1)                    \n"
     "lea        0x10(%1),%1                    \n"
     "ja         1b                             \n"
   : "+r"(src),  // %0
diff --git a/source/row_win.cc b/source/row_win.cc
index 71d081636..805811897 100644
--- a/source/row_win.cc
+++ b/source/row_win.cc
@@ -1169,20 +1169,20 @@ void FastConvertYToARGBRow_SSE2(const uint8* y_buf,
 #endif
 #endif
 
-#ifdef HAS_REVERSE_ROW_SSSE3
+#ifdef HAS_MIRRORROW_SSSE3
 
 // Shuffle table for reversing the bytes.
-static const uvec8 kShuffleReverse = {
+static const uvec8 kShuffleMirror = {
   15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u
 };
 
 __declspec(naked)
-void ReverseRow_SSSE3(const uint8* src, uint8* dst, int width) {
+void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) {
 __asm {
     mov       eax, [esp + 4]   // src
     mov       edx, [esp + 8]   // dst
     mov       ecx, [esp + 12]  // width
-    movdqa    xmm5, kShuffleReverse
+    movdqa    xmm5, kShuffleMirror
     lea       eax, [eax - 16]
  convertloop:
     movdqa    xmm0, [eax + ecx]
@@ -1196,18 +1196,20 @@ __asm {
 }
 #endif
 
-#ifdef HAS_REVERSE_ROW_SSE2
+#ifdef HAS_MIRRORROW_SSE2
 
+// SSE2 version has movdqu so it can be used on misaligned buffers when SSSE3
+// version can not.
 __declspec(naked)
-void ReverseRow_SSE2(const uint8* src, uint8* dst, int width) {
+void MirrorRow_SSE2(const uint8* src, uint8* dst, int width) {
 __asm {
     mov       eax, [esp + 4]   // src
     mov       edx, [esp + 8]   // dst
     mov       ecx, [esp + 12]  // width
     lea       eax, [eax - 16]
  convertloop:
-    movdqa    xmm0, [eax + ecx]
-    movdqa    xmm1, xmm0        // swap bytes
+    movdqu    xmm0, [eax + ecx]
+    movdqu    xmm1, xmm0        // swap bytes
     psllw     xmm0, 8
     psrlw     xmm1, 8
     por       xmm0, xmm1
@@ -1215,7 +1217,7 @@ __asm {
     pshufhw   xmm0, xmm0, 0x1b
     pshufd    xmm0, xmm0, 0x4e  // swap qwords
     sub       ecx, 16
-    movdqa    [edx], xmm0
+    movdqu    [edx], xmm0
     lea       edx, [edx + 16]
     ja        convertloop
     ret