diff --git a/README.chromium b/README.chromium
index 441cf7581..0a637246f 100644
--- a/README.chromium
+++ b/README.chromium
@@ -1,6 +1,6 @@
 Name: libyuv
 URL: http://code.google.com/p/libyuv/
-Version: 707
+Version: 708
 License: BSD
 License File: LICENSE
 
diff --git a/include/libyuv/version.h b/include/libyuv/version.h
index 5cb5d7f2f..eb2624953 100644
--- a/include/libyuv/version.h
+++ b/include/libyuv/version.h
@@ -11,6 +11,6 @@
 #ifndef INCLUDE_LIBYUV_VERSION_H_  // NOLINT
 #define INCLUDE_LIBYUV_VERSION_H_
 
-#define LIBYUV_VERSION 707
+#define LIBYUV_VERSION 708
 
 #endif  // INCLUDE_LIBYUV_VERSION_H_  NOLINT
diff --git a/source/scale.cc b/source/scale.cc
index f39f1dace..721beee08 100644
--- a/source/scale.cc
+++ b/source/scale.cc
@@ -196,16 +196,14 @@ static void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
                                      // src_stride ignored
     mov        edx, [esp + 12]       // dst_ptr
     mov        ecx, [esp + 16]       // dst_width
-    pcmpeqb    xmm5, xmm5            // generate mask 0x00ff00ff
-    psrlw      xmm5, 8
 
     align      16
   wloop:
     movdqa     xmm0, [eax]
     movdqa     xmm1, [eax + 16]
     lea        eax,  [eax + 32]
-    pand       xmm0, xmm5
-    pand       xmm1, xmm5
+    psrlw      xmm0, 8               // isolate odd pixels.
+    psrlw      xmm1, 8
     packuswb   xmm0, xmm1
     sub        ecx, 16
     movdqa     [edx], xmm0
@@ -271,16 +269,14 @@ static void ScaleRowDown2_Unaligned_SSE2(const uint8* src_ptr,
                                      // src_stride ignored
     mov        edx, [esp + 12]       // dst_ptr
     mov        ecx, [esp + 16]       // dst_width
-    pcmpeqb    xmm5, xmm5            // generate mask 0x00ff00ff
-    psrlw      xmm5, 8
 
     align      16
   wloop:
     movdqu     xmm0, [eax]
     movdqu     xmm1, [eax + 16]
     lea        eax,  [eax + 32]
-    pand       xmm0, xmm5
-    pand       xmm1, xmm5
+    psrlw      xmm0, 8               // isolate odd pixels.
+    psrlw      xmm1, 8
     packuswb   xmm0, xmm1
     sub        ecx, 16
     movdqu     [edx], xmm0
@@ -1269,15 +1265,13 @@ static void ScaleFilterRows_Unaligned_SSSE3(uint8* dst_ptr,
 static void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
                                uint8* dst_ptr, int dst_width) {
   asm volatile (
-    "pcmpeqb   %%xmm5,%%xmm5                   \n"
-    "psrlw     $0x8,%%xmm5                     \n"
     ".p2align  4                               \n"
   "1:                                          \n"
     "movdqa    (%0),%%xmm0                     \n"
     "movdqa    0x10(%0),%%xmm1                 \n"
     "lea       0x20(%0),%0                     \n"
-    "pand      %%xmm5,%%xmm0                   \n"
-    "pand      %%xmm5,%%xmm1                   \n"
+    "psrlw     $0x8,%%xmm0                     \n"
+    "psrlw     $0x8,%%xmm1                     \n"
     "packuswb  %%xmm1,%%xmm0                   \n"
     "movdqa    %%xmm0,(%1)                     \n"
     "lea       0x10(%1),%1                     \n"
@@ -1289,7 +1283,7 @@ static void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
   :
   : "memory", "cc"
 #if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm5"
+    , "xmm0", "xmm1"
 #endif
   );
 }
@@ -1336,15 +1330,13 @@ static void ScaleRowDown2_Unaligned_SSE2(const uint8* src_ptr,
                                          ptrdiff_t src_stride,
                                          uint8* dst_ptr, int dst_width) {
   asm volatile (
-    "pcmpeqb   %%xmm5,%%xmm5                   \n"
-    "psrlw     $0x8,%%xmm5                     \n"
     ".p2align  4                               \n"
   "1:                                          \n"
     "movdqu    (%0),%%xmm0                     \n"
     "movdqu    0x10(%0),%%xmm1                 \n"
     "lea       0x20(%0),%0                     \n"
-    "pand      %%xmm5,%%xmm0                   \n"
-    "pand      %%xmm5,%%xmm1                   \n"
+    "psrlw     $0x8,%%xmm0                     \n"
+    "psrlw     $0x8,%%xmm1                     \n"
     "packuswb  %%xmm1,%%xmm0                   \n"
     "movdqu    %%xmm0,(%1)                     \n"
     "lea       0x10(%1),%1                     \n"
@@ -1356,7 +1348,7 @@ static void ScaleRowDown2_Unaligned_SSE2(const uint8* src_ptr,
   :
   : "memory", "cc"
 #if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm5"
+    , "xmm0", "xmm1"
 #endif
   );
 }
@@ -2324,13 +2316,13 @@ static void ScaleRowDown2_C(const uint8* src_ptr, ptrdiff_t /* src_stride */,
                             uint8* dst, int dst_width) {
   uint8* dend = dst + dst_width - 1;
   do {
-    dst[0] = src_ptr[0];
-    dst[1] = src_ptr[2];
+    dst[0] = src_ptr[1];
+    dst[1] = src_ptr[3];
     dst += 2;
     src_ptr += 4;
   } while (dst < dend);
   if (dst_width & 1) {
-    dst[0] = src_ptr[0];
+    dst[0] = src_ptr[1];
   }
 }
 
@@ -2689,6 +2681,7 @@ static void ScalePlaneDown2(int /* src_width */, int /* src_height */,
   }
 #endif
 
+  src_ptr += src_stride;  // Point to odd rows.
   // TODO(fbarchard): Loop through source height to allow odd height.
   for (int y = 0; y < dst_height; ++y) {
     ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width);
diff --git a/source/scale_argb.cc b/source/scale_argb.cc
index 248236c24..fa271556a 100644
--- a/source/scale_argb.cc
+++ b/source/scale_argb.cc
@@ -62,7 +62,7 @@ static void ScaleARGBRowDown2_SSE2(const uint8* src_argb,
     movdqa     xmm0, [eax]
     movdqa     xmm1, [eax + 16]
     lea        eax,  [eax + 32]
-    shufps     xmm0, xmm1, 0x88
+    shufps     xmm0, xmm1, 0xdd
     sub        ecx, 4
     movdqa     [edx], xmm0
     lea        edx, [edx + 16]
@@ -350,7 +350,7 @@ static void ScaleARGBRowDown2_SSE2(const uint8* src_argb,
     "movdqa    (%0),%%xmm0                     \n"
     "movdqa    0x10(%0),%%xmm1                 \n"
     "lea       0x20(%0),%0                     \n"
-    "shufps    $0x88,%%xmm1,%%xmm0             \n"
+    "shufps    $0xdd,%%xmm1,%%xmm0             \n"
     "sub       $0x4,%2                         \n"
     "movdqa    %%xmm0,(%1)                     \n"
     "lea       0x10(%1),%1                     \n"
@@ -634,13 +634,13 @@ static void ScaleARGBRowDown2_C(const uint8* src_argb,
   uint32* dst = reinterpret_cast<uint32*>(dst_argb);
 
   for (int x = 0; x < dst_width - 1; x += 2) {
-    dst[0] = src[0];
-    dst[1] = src[2];
+    dst[0] = src[1];
+    dst[1] = src[3];
     src += 4;
     dst += 2;
   }
   if (dst_width & 1) {
-    dst[0] = src[0];
+    dst[0] = src[1];
   }
 }
 
@@ -743,25 +743,26 @@ static void ScaleARGBDown2(int /* src_width */, int /* src_height */,
                            FilterMode filtering) {
   assert(dx == 65536 * 2);  // Test scale factor of 2.
   assert((dy & 0x1ffff) == 0);  // Test vertical scale is multiple of 2.
+  // Advance to odd row / even column.
+  src_argb += (y >> 16) * src_stride + ((x >> 16) - 1) * 4;
+  int row_stride = src_stride * (dy >> 16);
   void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
                             uint8* dst_argb, int dst_width) =
       filtering ? ScaleARGBRowDown2Int_C : ScaleARGBRowDown2_C;
 #if defined(HAS_SCALEARGBROWDOWN2_SSE2)
   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) &&
-      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
+      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(row_stride, 16) &&
       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
     ScaleARGBRowDown2 = filtering ? ScaleARGBRowDown2Int_SSE2 :
         ScaleARGBRowDown2_SSE2;
   }
 #elif defined(HAS_SCALEARGBROWDOWN2_NEON)
   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8) &&
-      IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4)) {
+      IS_ALIGNED(src_argb, 4) && IS_ALIGNED(row_stride, 4)) {
     ScaleARGBRowDown2 = filtering ? ScaleARGBRowDown2Int_NEON :
         ScaleARGBRowDown2_NEON;
   }
 #endif
-  src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
-  int row_stride = src_stride * (dy >> 16);
 
   // TODO(fbarchard): Loop through source height to allow odd height.
   for (int y = 0; y < dst_height; ++y) {
@@ -782,6 +783,9 @@ static void ScaleARGBDownEven(int src_width, int src_height,
                               FilterMode filtering) {
   assert(IS_ALIGNED(src_width, 2));
   assert(IS_ALIGNED(src_height, 2));
+  int col_step = dx >> 16;
+  int row_stride = (dy >> 16) * src_stride;
+  src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
   void (*ScaleARGBRowDownEven)(const uint8* src_argb, ptrdiff_t src_stride,
                                int src_step, uint8* dst_argb, int dst_width) =
       filtering ? ScaleARGBRowDownEvenInt_C : ScaleARGBRowDownEven_C;
@@ -798,9 +802,6 @@ static void ScaleARGBDownEven(int src_width, int src_height,
         ScaleARGBRowDownEven_NEON;
   }
 #endif
-  int col_step = dx >> 16;
-  int row_stride = (dy >> 16) * src_stride;
-  src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
 
   for (int y = 0; y < dst_height; ++y) {
     ScaleARGBRowDownEven(src_argb, src_stride, col_step, dst_argb, dst_width);
diff --git a/source/scale_argb_neon.cc b/source/scale_argb_neon.cc
index 720b72e22..819186bc7 100644
--- a/source/scale_argb_neon.cc
+++ b/source/scale_argb_neon.cc
@@ -27,8 +27,8 @@ void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t /* src_stride */,
     "vld2.u32   {q0, q1}, [%0]!                \n"
     "vld2.u32   {q2, q3}, [%0]!                \n"
     "subs       %2, %2, #8                     \n"  // 8 processed per loop
-    "vst1.u8    {q0}, [%1]!                    \n"  // store even pixels
-    "vst1.u8    {q2}, [%1]!                    \n"
+    "vst1.u8    {q1}, [%1]!                    \n"  // store odd pixels
+    "vst1.u8    {q3}, [%1]!                    \n"
     "bgt        1b                             \n"
   : "+r"(src_ptr),          // %0
     "+r"(dst),              // %1
@@ -78,6 +78,7 @@ void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t,
                                int src_stepx,
                                uint8* dst_argb, int dst_width) {
   asm volatile (
+    "add        %0, #4                         \n"  // point to odd pixels.
     "mov        r12, %3, lsl #2                \n"
     ".p2align  2                               \n"
   "1:                                          \n"
diff --git a/source/scale_mips.cc b/source/scale_mips.cc
index ba8e8b516..b30eaba0c 100644
--- a/source/scale_mips.cc
+++ b/source/scale_mips.cc
@@ -39,6 +39,7 @@ void ScaleRowDown2_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t /* src_stride */,
     "lw             $t5, 20(%[src_ptr])            \n"  // |23|22|21|20|
     "lw             $t6, 24(%[src_ptr])            \n"  // |27|26|25|24|
     "lw             $t7, 28(%[src_ptr])            \n"  // |31|30|29|28|
+    // TODO(fbarchard): Use odd pixels instead of even.
     "precr.qb.ph    $t8, $t1, $t0                  \n"  // |6|4|2|0|
     "precr.qb.ph    $t0, $t3, $t2                  \n"  // |14|12|10|8|
     "precr.qb.ph    $t1, $t5, $t4                  \n"  // |22|20|18|16|
diff --git a/source/scale_neon.cc b/source/scale_neon.cc
index c1cf7f11b..2449ec80e 100644
--- a/source/scale_neon.cc
+++ b/source/scale_neon.cc
@@ -29,7 +29,7 @@ void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t /* src_stride */,
     // load even pixels into q0, odd into q1
     "vld2.u8    {q0,q1}, [%0]!                 \n"
     "subs       %2, %2, #16                    \n"  // 16 processed per loop
-    "vst1.u8    {q0}, [%1]!                    \n"  // store even pixels
+    "vst1.u8    {q1}, [%1]!                    \n"  // store odd pixels
     "bgt        1b                             \n"
   : "+r"(src_ptr),          // %0
     "+r"(dst),              // %1