diff --git a/README.chromium b/README.chromium
index 00e918ffe..9f70d6007 100644
--- a/README.chromium
+++ b/README.chromium
@@ -1,6 +1,6 @@
 Name: libyuv
 URL: http://code.google.com/p/libyuv/
-Version: 784
+Version: 785
 License: BSD
 License File: LICENSE
 
diff --git a/include/libyuv/version.h b/include/libyuv/version.h
index 5227e9dfa..6ad24ae15 100644
--- a/include/libyuv/version.h
+++ b/include/libyuv/version.h
@@ -11,6 +11,6 @@
 #ifndef INCLUDE_LIBYUV_VERSION_H_  // NOLINT
 #define INCLUDE_LIBYUV_VERSION_H_
 
-#define LIBYUV_VERSION 784
+#define LIBYUV_VERSION 785
 
 #endif  // INCLUDE_LIBYUV_VERSION_H_  NOLINT
diff --git a/source/convert_from_argb.cc b/source/convert_from_argb.cc
index b6cacb683..a567c7d21 100644
--- a/source/convert_from_argb.cc
+++ b/source/convert_from_argb.cc
@@ -744,9 +744,7 @@ int ARGBToRGB24(const uint8* src_argb, int src_stride_argb,
   void (*ARGBToRGB24Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
       ARGBToRGB24Row_C;
 #if defined(HAS_ARGBTORGB24ROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16 &&
-      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
-      IS_ALIGNED(dst_rgb24, 16) && IS_ALIGNED(dst_stride_rgb24, 16)) {
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
     ARGBToRGB24Row = ARGBToRGB24Row_Any_SSSE3;
     if (IS_ALIGNED(width, 16)) {
       ARGBToRGB24Row = ARGBToRGB24Row_SSSE3;
@@ -792,9 +790,7 @@ int ARGBToRAW(const uint8* src_argb, int src_stride_argb,
   void (*ARGBToRAWRow)(const uint8* src_argb, uint8* dst_rgb, int pix) =
       ARGBToRAWRow_C;
 #if defined(HAS_ARGBTORAWROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16 &&
-      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
-      IS_ALIGNED(dst_raw, 16) && IS_ALIGNED(dst_stride_raw, 16)) {
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
     ARGBToRAWRow = ARGBToRAWRow_Any_SSSE3;
     if (IS_ALIGNED(width, 16)) {
       ARGBToRAWRow = ARGBToRAWRow_SSSE3;
diff --git a/source/row_posix.cc b/source/row_posix.cc
index 9e8b1a5b4..6e8722f6a 100644
--- a/source/row_posix.cc
+++ b/source/row_posix.cc
@@ -569,10 +569,10 @@ void ARGBToRGB24Row_SSSE3(const uint8* src, uint8* dst, int pix) {
     "movdqa    %3,%%xmm6                       \n"
     ".p2align  4                               \n"
   "1:                                          \n"
-    "movdqa    (%0),%%xmm0                     \n"
-    "movdqa    0x10(%0),%%xmm1                 \n"
-    "movdqa    0x20(%0),%%xmm2                 \n"
-    "movdqa    0x30(%0),%%xmm3                 \n"
+    "movdqu    (%0),%%xmm0                     \n"
+    "movdqu    0x10(%0),%%xmm1                 \n"
+    "movdqu    0x20(%0),%%xmm2                 \n"
+    "movdqu    0x30(%0),%%xmm3                 \n"
     "lea       0x40(%0),%0                     \n"
     "pshufb    %%xmm6,%%xmm0                   \n"
     "pshufb    %%xmm6,%%xmm1                   \n"
@@ -584,13 +584,13 @@ void ARGBToRGB24Row_SSSE3(const uint8* src, uint8* dst, int pix) {
     "movdqa    %%xmm2,%%xmm5                   \n"
     "por       %%xmm4,%%xmm0                   \n"
     "pslldq    $0x8,%%xmm5                     \n"
-    "movdqa    %%xmm0,(%1)                     \n"
+    "movdqu    %%xmm0,(%1)                     \n"
     "por       %%xmm5,%%xmm1                   \n"
     "psrldq    $0x8,%%xmm2                     \n"
     "pslldq    $0x4,%%xmm3                     \n"
     "por       %%xmm3,%%xmm2                   \n"
-    "movdqa    %%xmm1,0x10(%1)                 \n"
-    "movdqa    %%xmm2,0x20(%1)                 \n"
+    "movdqu    %%xmm1,0x10(%1)                 \n"
+    "movdqu    %%xmm2,0x20(%1)                 \n"
     "lea       0x30(%1),%1                     \n"
     "sub       $0x10,%2                        \n"
     "jg        1b                              \n"
@@ -610,10 +610,10 @@ void ARGBToRAWRow_SSSE3(const uint8* src, uint8* dst, int pix) {
     "movdqa    %3,%%xmm6                       \n"
     ".p2align  4                               \n"
   "1:                                          \n"
-    "movdqa    (%0),%%xmm0                     \n"
-    "movdqa    0x10(%0),%%xmm1                 \n"
-    "movdqa    0x20(%0),%%xmm2                 \n"
-    "movdqa    0x30(%0),%%xmm3                 \n"
+    "movdqu    (%0),%%xmm0                     \n"
+    "movdqu    0x10(%0),%%xmm1                 \n"
+    "movdqu    0x20(%0),%%xmm2                 \n"
+    "movdqu    0x30(%0),%%xmm3                 \n"
     "lea       0x40(%0),%0                     \n"
     "pshufb    %%xmm6,%%xmm0                   \n"
     "pshufb    %%xmm6,%%xmm1                   \n"
@@ -625,13 +625,13 @@ void ARGBToRAWRow_SSSE3(const uint8* src, uint8* dst, int pix) {
     "movdqa    %%xmm2,%%xmm5                   \n"
     "por       %%xmm4,%%xmm0                   \n"
     "pslldq    $0x8,%%xmm5                     \n"
-    "movdqa    %%xmm0,(%1)                     \n"
+    "movdqu    %%xmm0,(%1)                     \n"
     "por       %%xmm5,%%xmm1                   \n"
     "psrldq    $0x8,%%xmm2                     \n"
     "pslldq    $0x4,%%xmm3                     \n"
     "por       %%xmm3,%%xmm2                   \n"
-    "movdqa    %%xmm1,0x10(%1)                 \n"
-    "movdqa    %%xmm2,0x20(%1)                 \n"
+    "movdqu    %%xmm1,0x10(%1)                 \n"
+    "movdqu    %%xmm2,0x20(%1)                 \n"
     "lea       0x30(%1),%1                     \n"
     "sub       $0x10,%2                        \n"
     "jg        1b                              \n"
diff --git a/source/row_win.cc b/source/row_win.cc
index 3e1aabd1b..1bf4a94ed 100644
--- a/source/row_win.cc
+++ b/source/row_win.cc
@@ -479,10 +479,10 @@ void ARGBToRGB24Row_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix) {
 
     align      16
  convertloop:
-    movdqa    xmm0, [eax]   // fetch 16 pixels of argb
-    movdqa    xmm1, [eax + 16]
-    movdqa    xmm2, [eax + 32]
-    movdqa    xmm3, [eax + 48]
+    movdqu    xmm0, [eax]   // fetch 16 pixels of argb
+    movdqu    xmm1, [eax + 16]
+    movdqu    xmm2, [eax + 32]
+    movdqu    xmm3, [eax + 48]
     lea       eax, [eax + 64]
     pshufb    xmm0, xmm6    // pack 16 bytes of ARGB to 12 bytes of RGB
     pshufb    xmm1, xmm6
@@ -494,13 +494,13 @@ void ARGBToRGB24Row_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix) {
     movdqa    xmm5, xmm2   // 8 bytes from 2 for 1
     por       xmm0, xmm4   // 4 bytes from 1 for 0
     pslldq    xmm5, 8      // 8 bytes from 2 for 1
-    movdqa    [edx], xmm0  // store 0
+    movdqu    [edx], xmm0  // store 0
     por       xmm1, xmm5   // 8 bytes from 2 for 1
     psrldq    xmm2, 8      // 4 bytes from 2
     pslldq    xmm3, 4      // 12 bytes from 3 for 2
     por       xmm2, xmm3   // 12 bytes from 3 for 2
-    movdqa    [edx + 16], xmm1   // store 1
-    movdqa    [edx + 32], xmm2   // store 2
+    movdqu    [edx + 16], xmm1   // store 1
+    movdqu    [edx + 32], xmm2   // store 2
     lea       edx, [edx + 48]
     sub       ecx, 16
     jg        convertloop
@@ -518,10 +518,10 @@ void ARGBToRAWRow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix) {
 
     align      16
  convertloop:
-    movdqa    xmm0, [eax]   // fetch 16 pixels of argb
-    movdqa    xmm1, [eax + 16]
-    movdqa    xmm2, [eax + 32]
-    movdqa    xmm3, [eax + 48]
+    movdqu    xmm0, [eax]   // fetch 16 pixels of argb
+    movdqu    xmm1, [eax + 16]
+    movdqu    xmm2, [eax + 32]
+    movdqu    xmm3, [eax + 48]
     lea       eax, [eax + 64]
     pshufb    xmm0, xmm6    // pack 16 bytes of ARGB to 12 bytes of RGB
     pshufb    xmm1, xmm6
@@ -533,13 +533,13 @@ void ARGBToRAWRow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix) {
     movdqa    xmm5, xmm2   // 8 bytes from 2 for 1
     por       xmm0, xmm4   // 4 bytes from 1 for 0
     pslldq    xmm5, 8      // 8 bytes from 2 for 1
-    movdqa    [edx], xmm0  // store 0
+    movdqu    [edx], xmm0  // store 0
     por       xmm1, xmm5   // 8 bytes from 2 for 1
     psrldq    xmm2, 8      // 4 bytes from 2
     pslldq    xmm3, 4      // 12 bytes from 3 for 2
     por       xmm2, xmm3   // 12 bytes from 3 for 2
-    movdqa    [edx + 16], xmm1   // store 1
-    movdqa    [edx + 32], xmm2   // store 2
+    movdqu    [edx + 16], xmm1   // store 1
+    movdqu    [edx + 32], xmm2   // store 2
     lea       edx, [edx + 48]
     sub       ecx, 16
     jg        convertloop