diff --git a/README.chromium b/README.chromium
index 41b410ae2..c3d58b987 100644
--- a/README.chromium
+++ b/README.chromium
@@ -1,6 +1,6 @@
 Name: libyuv
 URL: http://code.google.com/p/libyuv/
-Version: 884
+Version: 885
 License: BSD
 License File: LICENSE
 
diff --git a/include/libyuv/convert_from_argb.h b/include/libyuv/convert_from_argb.h
index be3bba444..f0343a77d 100644
--- a/include/libyuv/convert_from_argb.h
+++ b/include/libyuv/convert_from_argb.h
@@ -1,168 +1,168 @@
-/*
- *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_  // NOLINT
-#define INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_
-
-#include "libyuv/basic_types.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// Copy ARGB to ARGB.
-#define ARGBToARGB ARGBCopy
-LIBYUV_API
-int ARGBCopy(const uint8* src_argb, int src_stride_argb,
-             uint8* dst_argb, int dst_stride_argb,
-             int width, int height);
-
-// Convert ARGB To BGRA. (alias)
-#define ARGBToBGRA BGRAToARGB
-LIBYUV_API
-int BGRAToARGB(const uint8* src_frame, int src_stride_frame,
-               uint8* dst_argb, int dst_stride_argb,
-               int width, int height);
-
-// Convert ARGB To ABGR. (alias)
-#define ARGBToABGR ABGRToARGB
-LIBYUV_API
-int ABGRToARGB(const uint8* src_frame, int src_stride_frame,
-               uint8* dst_argb, int dst_stride_argb,
-               int width, int height);
-
-// Convert ARGB To RGBA.
-LIBYUV_API
-int ARGBToRGBA(const uint8* src_frame, int src_stride_frame,
-               uint8* dst_argb, int dst_stride_argb,
-               int width, int height);
-
-// Convert ARGB To RGB24.
-LIBYUV_API
-int ARGBToRGB24(const uint8* src_argb, int src_stride_argb,
-                uint8* dst_rgb24, int dst_stride_rgb24,
-                int width, int height);
-
-// Convert ARGB To RAW.
-LIBYUV_API
-int ARGBToRAW(const uint8* src_argb, int src_stride_argb,
-              uint8* dst_rgb, int dst_stride_rgb,
-              int width, int height);
-
-// Convert ARGB To RGB565.
-LIBYUV_API
-int ARGBToRGB565(const uint8* src_argb, int src_stride_argb,
-                 uint8* dst_rgb565, int dst_stride_rgb565,
-                 int width, int height);
-
-// Convert ARGB To ARGB1555.
-LIBYUV_API
-int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb,
-                   uint8* dst_argb1555, int dst_stride_argb1555,
-                   int width, int height);
-
-// Convert ARGB To ARGB4444.
-LIBYUV_API
-int ARGBToARGB4444(const uint8* src_argb, int src_stride_argb,
-                   uint8* dst_argb4444, int dst_stride_argb4444,
-                   int width, int height);
-
-// Convert ARGB To I444.
-LIBYUV_API
-int ARGBToI444(const uint8* src_argb, int src_stride_argb,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height);
-
-// Convert ARGB To I422.
-LIBYUV_API
-int ARGBToI422(const uint8* src_argb, int src_stride_argb,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height);
-
-// Convert ARGB To I420. (also in convert.h)
-LIBYUV_API
-int ARGBToI420(const uint8* src_argb, int src_stride_argb,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height);
-
-// Convert ARGB to J420. (JPeg full range I420).
-LIBYUV_API
-int ARGBToJ420(const uint8* src_argb, int src_stride_argb,
-               uint8* dst_yj, int dst_stride_yj,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height);
-
-// Convert ARGB To I411.
-LIBYUV_API
-int ARGBToI411(const uint8* src_argb, int src_stride_argb,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height);
-
-// Convert ARGB to J400. (JPeg full range).
-LIBYUV_API
-int ARGBToJ400(const uint8* src_argb, int src_stride_argb,
-               uint8* dst_yj, int dst_stride_yj,
-               int width, int height);
-
-// Convert ARGB to I400.
-LIBYUV_API
-int ARGBToI400(const uint8* src_argb, int src_stride_argb,
-               uint8* dst_y, int dst_stride_y,
-               int width, int height);
-
-// Convert ARGB To NV12.
-LIBYUV_API
-int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_uv, int dst_stride_uv,
-               int width, int height);
-
-// Convert ARGB To NV21.
-LIBYUV_API
-int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_vu, int dst_stride_vu,
-               int width, int height);
-
-// Convert ARGB To NV21.
-LIBYUV_API
-int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_vu, int dst_stride_vu,
-               int width, int height);
-
-// Convert ARGB To YUY2.
-LIBYUV_API
-int ARGBToYUY2(const uint8* src_argb, int src_stride_argb,
-               uint8* dst_yuy2, int dst_stride_yuy2,
-               int width, int height);
-
-// Convert ARGB To UYVY.
-LIBYUV_API
-int ARGBToUYVY(const uint8* src_argb, int src_stride_argb,
-               uint8* dst_uyvy, int dst_stride_uyvy,
-               int width, int height);
-
-#ifdef __cplusplus
-}  // extern "C"
-}  // namespace libyuv
-#endif
-
-#endif  // INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_  NOLINT
+/*
+ *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_  // NOLINT
+#define INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_
+
+#include "libyuv/basic_types.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// Copy ARGB to ARGB.
+#define ARGBToARGB ARGBCopy
+LIBYUV_API
+int ARGBCopy(const uint8* src_argb, int src_stride_argb,
+             uint8* dst_argb, int dst_stride_argb,
+             int width, int height);
+
+// Convert ARGB To BGRA. (alias)
+#define ARGBToBGRA BGRAToARGB
+LIBYUV_API
+int BGRAToARGB(const uint8* src_frame, int src_stride_frame,
+               uint8* dst_argb, int dst_stride_argb,
+               int width, int height);
+
+// Convert ARGB To ABGR. (alias)
+#define ARGBToABGR ABGRToARGB
+LIBYUV_API
+int ABGRToARGB(const uint8* src_frame, int src_stride_frame,
+               uint8* dst_argb, int dst_stride_argb,
+               int width, int height);
+
+// Convert ARGB To RGBA.
+LIBYUV_API
+int ARGBToRGBA(const uint8* src_frame, int src_stride_frame,
+               uint8* dst_argb, int dst_stride_argb,
+               int width, int height);
+
+// Convert ARGB To RGB24.
+LIBYUV_API
+int ARGBToRGB24(const uint8* src_argb, int src_stride_argb,
+                uint8* dst_rgb24, int dst_stride_rgb24,
+                int width, int height);
+
+// Convert ARGB To RAW.
+LIBYUV_API
+int ARGBToRAW(const uint8* src_argb, int src_stride_argb,
+              uint8* dst_rgb, int dst_stride_rgb,
+              int width, int height);
+
+// Convert ARGB To RGB565.
+LIBYUV_API
+int ARGBToRGB565(const uint8* src_argb, int src_stride_argb,
+                 uint8* dst_rgb565, int dst_stride_rgb565,
+                 int width, int height);
+
+// Convert ARGB To ARGB1555.
+LIBYUV_API
+int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb,
+                   uint8* dst_argb1555, int dst_stride_argb1555,
+                   int width, int height);
+
+// Convert ARGB To ARGB4444.
+LIBYUV_API
+int ARGBToARGB4444(const uint8* src_argb, int src_stride_argb,
+                   uint8* dst_argb4444, int dst_stride_argb4444,
+                   int width, int height);
+
+// Convert ARGB To I444.
+LIBYUV_API
+int ARGBToI444(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height);
+
+// Convert ARGB To I422.
+LIBYUV_API
+int ARGBToI422(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height);
+
+// Convert ARGB To I420. (also in convert.h)
+LIBYUV_API
+int ARGBToI420(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height);
+
+// Convert ARGB to J420. (JPeg full range I420).
+LIBYUV_API
+int ARGBToJ420(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_yj, int dst_stride_yj,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height);
+
+// Convert ARGB To I411.
+LIBYUV_API
+int ARGBToI411(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height);
+
+// Convert ARGB to J400. (JPeg full range).
+LIBYUV_API
+int ARGBToJ400(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_yj, int dst_stride_yj,
+               int width, int height);
+
+// Convert ARGB to I400.
+LIBYUV_API
+int ARGBToI400(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_y, int dst_stride_y,
+               int width, int height);
+
+// Convert ARGB To NV12.
+LIBYUV_API
+int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_uv, int dst_stride_uv,
+               int width, int height);
+
+// Convert ARGB To NV21.
+LIBYUV_API
+int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_vu, int dst_stride_vu,
+               int width, int height);
+
+// Convert ARGB To NV21.
+LIBYUV_API
+int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_vu, int dst_stride_vu,
+               int width, int height);
+
+// Convert ARGB To YUY2.
+LIBYUV_API
+int ARGBToYUY2(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_yuy2, int dst_stride_yuy2,
+               int width, int height);
+
+// Convert ARGB To UYVY.
+LIBYUV_API
+int ARGBToUYVY(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_uyvy, int dst_stride_uyvy,
+               int width, int height);
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
+
+#endif  // INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_  NOLINT
diff --git a/include/libyuv/version.h b/include/libyuv/version.h
index ce3a723e2..f14bdbf6a 100644
--- a/include/libyuv/version.h
+++ b/include/libyuv/version.h
@@ -11,6 +11,6 @@
 #ifndef INCLUDE_LIBYUV_VERSION_H_  // NOLINT
 #define INCLUDE_LIBYUV_VERSION_H_
 
-#define LIBYUV_VERSION 884
+#define LIBYUV_VERSION 885
 
 #endif  // INCLUDE_LIBYUV_VERSION_H_  NOLINT
diff --git a/source/compare_common.cc b/source/compare_common.cc
index ab587d081..3e4c77a67 100644
--- a/source/compare_common.cc
+++ b/source/compare_common.cc
@@ -1,40 +1,40 @@
-/*
- *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/basic_types.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count) {
-  uint32 sse = 0u;
-  for (int i = 0; i < count; ++i) {
-    int diff = src_a[i] - src_b[i];
-    sse += static_cast<uint32>(diff * diff);
-  }
-  return sse;
-}
-
-// hash seed of 5381 recommended.
-// Internal C version of HashDjb2 with int sized count for efficiency.
-uint32 HashDjb2_C(const uint8* src, int count, uint32 seed) {
-  uint32 hash = seed;
-  for (int i = 0; i < count; ++i) {
-    hash += (hash << 5) + src[i];
-  }
-  return hash;
-}
-
-#ifdef __cplusplus
-}  // extern "C"
-}  // namespace libyuv
-#endif
+/*
+ *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/basic_types.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count) {
+  uint32 sse = 0u;
+  for (int i = 0; i < count; ++i) {
+    int diff = src_a[i] - src_b[i];
+    sse += static_cast<uint32>(diff * diff);
+  }
+  return sse;
+}
+
+// hash seed of 5381 recommended.
+// Internal C version of HashDjb2 with int sized count for efficiency.
+uint32 HashDjb2_C(const uint8* src, int count, uint32 seed) {
+  uint32 hash = seed;
+  for (int i = 0; i < count; ++i) {
+    hash += (hash << 5) + src[i];
+  }
+  return hash;
+}
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
diff --git a/source/compare_neon.cc b/source/compare_neon.cc
index a4e777506..c377c1634 100644
--- a/source/compare_neon.cc
+++ b/source/compare_neon.cc
@@ -1,61 +1,61 @@
-/*
- *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/basic_types.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__)
-
-uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
-  volatile uint32 sse;
-  asm volatile (
-    "vmov.u8    q8, #0                         \n"
-    "vmov.u8    q10, #0                        \n"
-    "vmov.u8    q9, #0                         \n"
-    "vmov.u8    q11, #0                        \n"
-
-    ".p2align  2                               \n"
-  "1:                                          \n"
-    "vld1.8     {q0}, [%0]!                    \n"
-    "vld1.8     {q1}, [%1]!                    \n"
-    "subs       %2, %2, #16                    \n"
-    "vsubl.u8   q2, d0, d2                     \n"
-    "vsubl.u8   q3, d1, d3                     \n"
-    "vmlal.s16  q8, d4, d4                     \n"
-    "vmlal.s16  q9, d6, d6                     \n"
-    "vmlal.s16  q10, d5, d5                    \n"
-    "vmlal.s16  q11, d7, d7                    \n"
-    "bgt        1b                             \n"
-
-    "vadd.u32   q8, q8, q9                     \n"
-    "vadd.u32   q10, q10, q11                  \n"
-    "vadd.u32   q11, q8, q10                   \n"
-    "vpaddl.u32 q1, q11                        \n"
-    "vadd.u64   d0, d2, d3                     \n"
-    "vmov.32    %3, d0[0]                      \n"
-    : "+r"(src_a),
-      "+r"(src_b),
-      "+r"(count),
-      "=r"(sse)
-    :
-    : "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11");
-  return sse;
-}
-
-#endif  // __ARM_NEON__
-
-#ifdef __cplusplus
-}  // extern "C"
-}  // namespace libyuv
-#endif
+/*
+ *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/basic_types.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__)
+
+uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
+  volatile uint32 sse;
+  asm volatile (
+    "vmov.u8    q8, #0                         \n"
+    "vmov.u8    q10, #0                        \n"
+    "vmov.u8    q9, #0                         \n"
+    "vmov.u8    q11, #0                        \n"
+
+    ".p2align  2                               \n"
+  "1:                                          \n"
+    "vld1.8     {q0}, [%0]!                    \n"
+    "vld1.8     {q1}, [%1]!                    \n"
+    "subs       %2, %2, #16                    \n"
+    "vsubl.u8   q2, d0, d2                     \n"
+    "vsubl.u8   q3, d1, d3                     \n"
+    "vmlal.s16  q8, d4, d4                     \n"
+    "vmlal.s16  q9, d6, d6                     \n"
+    "vmlal.s16  q10, d5, d5                    \n"
+    "vmlal.s16  q11, d7, d7                    \n"
+    "bgt        1b                             \n"
+
+    "vadd.u32   q8, q8, q9                     \n"
+    "vadd.u32   q10, q10, q11                  \n"
+    "vadd.u32   q11, q8, q10                   \n"
+    "vpaddl.u32 q1, q11                        \n"
+    "vadd.u64   d0, d2, d3                     \n"
+    "vmov.32    %3, d0[0]                      \n"
+    : "+r"(src_a),
+      "+r"(src_b),
+      "+r"(count),
+      "=r"(sse)
+    :
+    : "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11");
+  return sse;
+}
+
+#endif  // __ARM_NEON__
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
diff --git a/source/compare_posix.cc b/source/compare_posix.cc
index bb7902dbf..1e0ba8fe1 100644
--- a/source/compare_posix.cc
+++ b/source/compare_posix.cc
@@ -1,166 +1,166 @@
-/*
- *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/basic_types.h"
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
-
-#if defined(__native_client__) && defined(__x86_64__)
-#define MEMACCESS(base) "%%nacl:(%%r15,%q" #base ")"
-#define MEMLEA(offset, base) #offset "(%q" #base ")"
-#else
-#define MEMACCESS(base) "(%" #base ")"
-#define MEMLEA(offset, base) #offset "(%" #base ")"
-#endif
-
-uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
-  uint32 sse;
-  asm volatile (  // NOLINT
-    "pxor      %%xmm0,%%xmm0                   \n"
-    "pxor      %%xmm5,%%xmm5                   \n"
-    ".p2align  2                               \n"
-    "1:                                        \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm1         \n"
-    "lea       " MEMLEA(0x10, 0) ",%0          \n"
-    "movdqa    " MEMACCESS(1) ",%%xmm2         \n"
-    "lea       " MEMLEA(0x10, 1) ",%1          \n"
-    "sub       $0x10,%2                        \n"
-    "movdqa    %%xmm1,%%xmm3                   \n"
-    "psubusb   %%xmm2,%%xmm1                   \n"
-    "psubusb   %%xmm3,%%xmm2                   \n"
-    "por       %%xmm2,%%xmm1                   \n"
-    "movdqa    %%xmm1,%%xmm2                   \n"
-    "punpcklbw %%xmm5,%%xmm1                   \n"
-    "punpckhbw %%xmm5,%%xmm2                   \n"
-    "pmaddwd   %%xmm1,%%xmm1                   \n"
-    "pmaddwd   %%xmm2,%%xmm2                   \n"
-    "paddd     %%xmm1,%%xmm0                   \n"
-    "paddd     %%xmm2,%%xmm0                   \n"
-    "jg        1b                              \n"
-
-    "pshufd    $0xee,%%xmm0,%%xmm1             \n"
-    "paddd     %%xmm1,%%xmm0                   \n"
-    "pshufd    $0x1,%%xmm0,%%xmm1              \n"
-    "paddd     %%xmm1,%%xmm0                   \n"
-    "movd      %%xmm0,%3                       \n"
-
-  : "+r"(src_a),      // %0
-    "+r"(src_b),      // %1
-    "+r"(count),      // %2
-    "=g"(sse)         // %3
-  :
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
-#endif
-  );  // NOLINT
-  return sse;
-}
-
-#endif  // defined(__x86_64__) || defined(__i386__)
-
-#if !defined(LIBYUV_DISABLE_X86) && \
-    (defined(__x86_64__) || (defined(__i386__) && !defined(__pic__)))
-#define HAS_HASHDJB2_SSE41
-static uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 };  // 33 ^ 16
-static uvec32 kHashMul0 = {
-  0x0c3525e1,  // 33 ^ 15
-  0xa3476dc1,  // 33 ^ 14
-  0x3b4039a1,  // 33 ^ 13
-  0x4f5f0981,  // 33 ^ 12
-};
-static uvec32 kHashMul1 = {
-  0x30f35d61,  // 33 ^ 11
-  0x855cb541,  // 33 ^ 10
-  0x040a9121,  // 33 ^ 9
-  0x747c7101,  // 33 ^ 8
-};
-static uvec32 kHashMul2 = {
-  0xec41d4e1,  // 33 ^ 7
-  0x4cfa3cc1,  // 33 ^ 6
-  0x025528a1,  // 33 ^ 5
-  0x00121881,  // 33 ^ 4
-};
-static uvec32 kHashMul3 = {
-  0x00008c61,  // 33 ^ 3
-  0x00000441,  // 33 ^ 2
-  0x00000021,  // 33 ^ 1
-  0x00000001,  // 33 ^ 0
-};
-
-uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
-  uint32 hash;
-  asm volatile (  // NOLINT
-    "movd      %2,%%xmm0                       \n"
-    "pxor      %%xmm7,%%xmm7                   \n"
-    "movdqa    %4,%%xmm6                       \n"
-    ".p2align  2                               \n"
-  "1:                                          \n"
-    "movdqu    " MEMACCESS(0) ",%%xmm1         \n"
-    "lea       " MEMLEA(0x10, 0) ",%0          \n"
-    "pmulld    %%xmm6,%%xmm0                   \n"
-    "movdqa    %5,%%xmm5                       \n"
-    "movdqa    %%xmm1,%%xmm2                   \n"
-    "punpcklbw %%xmm7,%%xmm2                   \n"
-    "movdqa    %%xmm2,%%xmm3                   \n"
-    "punpcklwd %%xmm7,%%xmm3                   \n"
-    "pmulld    %%xmm5,%%xmm3                   \n"
-    "movdqa    %6,%%xmm5                       \n"
-    "movdqa    %%xmm2,%%xmm4                   \n"
-    "punpckhwd %%xmm7,%%xmm4                   \n"
-    "pmulld    %%xmm5,%%xmm4                   \n"
-    "movdqa    %7,%%xmm5                       \n"
-    "punpckhbw %%xmm7,%%xmm1                   \n"
-    "movdqa    %%xmm1,%%xmm2                   \n"
-    "punpcklwd %%xmm7,%%xmm2                   \n"
-    "pmulld    %%xmm5,%%xmm2                   \n"
-    "movdqa    %8,%%xmm5                       \n"
-    "punpckhwd %%xmm7,%%xmm1                   \n"
-    "pmulld    %%xmm5,%%xmm1                   \n"
-    "paddd     %%xmm4,%%xmm3                   \n"
-    "paddd     %%xmm2,%%xmm1                   \n"
-    "sub       $0x10,%1                        \n"
-    "paddd     %%xmm3,%%xmm1                   \n"
-    "pshufd    $0xe,%%xmm1,%%xmm2              \n"
-    "paddd     %%xmm2,%%xmm1                   \n"
-    "pshufd    $0x1,%%xmm1,%%xmm2              \n"
-    "paddd     %%xmm2,%%xmm1                   \n"
-    "paddd     %%xmm1,%%xmm0                   \n"
-    "jg        1b                              \n"
-    "movd      %%xmm0,%3                       \n"
-  : "+r"(src),        // %0
-    "+r"(count),      // %1
-    "+rm"(seed),      // %2
-    "=g"(hash)        // %3
-  : "m"(kHash16x33),  // %4
-    "m"(kHashMul0),   // %5
-    "m"(kHashMul1),   // %6
-    "m"(kHashMul2),   // %7
-    "m"(kHashMul3)    // %8
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
-#endif
-  );  // NOLINT
-  return hash;
-}
-#endif  // defined(__x86_64__) || (defined(__i386__) && !defined(__pic__)))
-
-#ifdef __cplusplus
-}  // extern "C"
-}  // namespace libyuv
-#endif
-
+/*
+ *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/basic_types.h"
+#include "libyuv/row.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
+
+#if defined(__native_client__) && defined(__x86_64__)
+#define MEMACCESS(base) "%%nacl:(%%r15,%q" #base ")"
+#define MEMLEA(offset, base) #offset "(%q" #base ")"
+#else
+#define MEMACCESS(base) "(%" #base ")"
+#define MEMLEA(offset, base) #offset "(%" #base ")"
+#endif
+
+uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
+  uint32 sse;
+  asm volatile (  // NOLINT
+    "pxor      %%xmm0,%%xmm0                   \n"
+    "pxor      %%xmm5,%%xmm5                   \n"
+    ".p2align  2                               \n"
+    "1:                                        \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm1         \n"
+    "lea       " MEMLEA(0x10, 0) ",%0          \n"
+    "movdqa    " MEMACCESS(1) ",%%xmm2         \n"
+    "lea       " MEMLEA(0x10, 1) ",%1          \n"
+    "sub       $0x10,%2                        \n"
+    "movdqa    %%xmm1,%%xmm3                   \n"
+    "psubusb   %%xmm2,%%xmm1                   \n"
+    "psubusb   %%xmm3,%%xmm2                   \n"
+    "por       %%xmm2,%%xmm1                   \n"
+    "movdqa    %%xmm1,%%xmm2                   \n"
+    "punpcklbw %%xmm5,%%xmm1                   \n"
+    "punpckhbw %%xmm5,%%xmm2                   \n"
+    "pmaddwd   %%xmm1,%%xmm1                   \n"
+    "pmaddwd   %%xmm2,%%xmm2                   \n"
+    "paddd     %%xmm1,%%xmm0                   \n"
+    "paddd     %%xmm2,%%xmm0                   \n"
+    "jg        1b                              \n"
+
+    "pshufd    $0xee,%%xmm0,%%xmm1             \n"
+    "paddd     %%xmm1,%%xmm0                   \n"
+    "pshufd    $0x1,%%xmm0,%%xmm1              \n"
+    "paddd     %%xmm1,%%xmm0                   \n"
+    "movd      %%xmm0,%3                       \n"
+
+  : "+r"(src_a),      // %0
+    "+r"(src_b),      // %1
+    "+r"(count),      // %2
+    "=g"(sse)         // %3
+  :
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
+#endif
+  );  // NOLINT
+  return sse;
+}
+
+#endif  // defined(__x86_64__) || defined(__i386__)
+
+#if !defined(LIBYUV_DISABLE_X86) && \
+    (defined(__x86_64__) || (defined(__i386__) && !defined(__pic__)))
+#define HAS_HASHDJB2_SSE41
+static uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 };  // 33 ^ 16
+static uvec32 kHashMul0 = {
+  0x0c3525e1,  // 33 ^ 15
+  0xa3476dc1,  // 33 ^ 14
+  0x3b4039a1,  // 33 ^ 13
+  0x4f5f0981,  // 33 ^ 12
+};
+static uvec32 kHashMul1 = {
+  0x30f35d61,  // 33 ^ 11
+  0x855cb541,  // 33 ^ 10
+  0x040a9121,  // 33 ^ 9
+  0x747c7101,  // 33 ^ 8
+};
+static uvec32 kHashMul2 = {
+  0xec41d4e1,  // 33 ^ 7
+  0x4cfa3cc1,  // 33 ^ 6
+  0x025528a1,  // 33 ^ 5
+  0x00121881,  // 33 ^ 4
+};
+static uvec32 kHashMul3 = {
+  0x00008c61,  // 33 ^ 3
+  0x00000441,  // 33 ^ 2
+  0x00000021,  // 33 ^ 1
+  0x00000001,  // 33 ^ 0
+};
+
+uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
+  uint32 hash;
+  asm volatile (  // NOLINT
+    "movd      %2,%%xmm0                       \n"
+    "pxor      %%xmm7,%%xmm7                   \n"
+    "movdqa    %4,%%xmm6                       \n"
+    ".p2align  2                               \n"
+  "1:                                          \n"
+    "movdqu    " MEMACCESS(0) ",%%xmm1         \n"
+    "lea       " MEMLEA(0x10, 0) ",%0          \n"
+    "pmulld    %%xmm6,%%xmm0                   \n"
+    "movdqa    %5,%%xmm5                       \n"
+    "movdqa    %%xmm1,%%xmm2                   \n"
+    "punpcklbw %%xmm7,%%xmm2                   \n"
+    "movdqa    %%xmm2,%%xmm3                   \n"
+    "punpcklwd %%xmm7,%%xmm3                   \n"
+    "pmulld    %%xmm5,%%xmm3                   \n"
+    "movdqa    %6,%%xmm5                       \n"
+    "movdqa    %%xmm2,%%xmm4                   \n"
+    "punpckhwd %%xmm7,%%xmm4                   \n"
+    "pmulld    %%xmm5,%%xmm4                   \n"
+    "movdqa    %7,%%xmm5                       \n"
+    "punpckhbw %%xmm7,%%xmm1                   \n"
+    "movdqa    %%xmm1,%%xmm2                   \n"
+    "punpcklwd %%xmm7,%%xmm2                   \n"
+    "pmulld    %%xmm5,%%xmm2                   \n"
+    "movdqa    %8,%%xmm5                       \n"
+    "punpckhwd %%xmm7,%%xmm1                   \n"
+    "pmulld    %%xmm5,%%xmm1                   \n"
+    "paddd     %%xmm4,%%xmm3                   \n"
+    "paddd     %%xmm2,%%xmm1                   \n"
+    "sub       $0x10,%1                        \n"
+    "paddd     %%xmm3,%%xmm1                   \n"
+    "pshufd    $0xe,%%xmm1,%%xmm2              \n"
+    "paddd     %%xmm2,%%xmm1                   \n"
+    "pshufd    $0x1,%%xmm1,%%xmm2              \n"
+    "paddd     %%xmm2,%%xmm1                   \n"
+    "paddd     %%xmm1,%%xmm0                   \n"
+    "jg        1b                              \n"
+    "movd      %%xmm0,%3                       \n"
+  : "+r"(src),        // %0
+    "+r"(count),      // %1
+    "+rm"(seed),      // %2
+    "=g"(hash)        // %3
+  : "m"(kHash16x33),  // %4
+    "m"(kHashMul0),   // %5
+    "m"(kHashMul1),   // %6
+    "m"(kHashMul2),   // %7
+    "m"(kHashMul3)    // %8
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
+#endif
+  );  // NOLINT
+  return hash;
+}
+#endif  // defined(__x86_64__) || (defined(__i386__) && !defined(__pic__)))
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
+
diff --git a/source/compare_win.cc b/source/compare_win.cc
index ab279777e..99831651f 100644
--- a/source/compare_win.cc
+++ b/source/compare_win.cc
@@ -1,232 +1,232 @@
-/*
- *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/basic_types.h"
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
-
-__declspec(naked) __declspec(align(16))
-uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
-  __asm {
-    mov        eax, [esp + 4]    // src_a
-    mov        edx, [esp + 8]    // src_b
-    mov        ecx, [esp + 12]   // count
-    pxor       xmm0, xmm0
-    pxor       xmm5, xmm5
-
-    align      4
-  wloop:
-    movdqa     xmm1, [eax]
-    lea        eax,  [eax + 16]
-    movdqa     xmm2, [edx]
-    lea        edx,  [edx + 16]
-    sub        ecx, 16
-    movdqa     xmm3, xmm1  // abs trick
-    psubusb    xmm1, xmm2
-    psubusb    xmm2, xmm3
-    por        xmm1, xmm2
-    movdqa     xmm2, xmm1
-    punpcklbw  xmm1, xmm5
-    punpckhbw  xmm2, xmm5
-    pmaddwd    xmm1, xmm1
-    pmaddwd    xmm2, xmm2
-    paddd      xmm0, xmm1
-    paddd      xmm0, xmm2
-    jg         wloop
-
-    pshufd     xmm1, xmm0, 0xee
-    paddd      xmm0, xmm1
-    pshufd     xmm1, xmm0, 0x01
-    paddd      xmm0, xmm1
-    movd       eax, xmm0
-    ret
-  }
-}
-
-// Visual C 2012 required for AVX2.
-#if _MSC_VER >= 1700
-// C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX.
-#pragma warning(disable: 4752)
-__declspec(naked) __declspec(align(16))
-uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) {
-  __asm {
-    mov        eax, [esp + 4]    // src_a
-    mov        edx, [esp + 8]    // src_b
-    mov        ecx, [esp + 12]   // count
-    vpxor      ymm0, ymm0, ymm0  // sum
-    vpxor      ymm5, ymm5, ymm5  // constant 0 for unpck
-    sub        edx, eax
-
-    align      4
-  wloop:
-    vmovdqu    ymm1, [eax]
-    vmovdqu    ymm2, [eax + edx]
-    lea        eax,  [eax + 32]
-    sub        ecx, 32
-    vpsubusb   ymm3, ymm1, ymm2  // abs difference trick
-    vpsubusb   ymm2, ymm2, ymm1
-    vpor       ymm1, ymm2, ymm3
-    vpunpcklbw ymm2, ymm1, ymm5  // u16.  mutates order.
-    vpunpckhbw ymm1, ymm1, ymm5
-    vpmaddwd   ymm2, ymm2, ymm2  // square + hadd to u32.
-    vpmaddwd   ymm1, ymm1, ymm1
-    vpaddd     ymm0, ymm0, ymm1
-    vpaddd     ymm0, ymm0, ymm2
-    jg         wloop
-
-    vpshufd    ymm1, ymm0, 0xee  // 3, 2 + 1, 0 both lanes.
-    vpaddd     ymm0, ymm0, ymm1
-    vpshufd    ymm1, ymm0, 0x01  // 1 + 0 both lanes.
-    vpaddd     ymm0, ymm0, ymm1
-    vpermq     ymm1, ymm0, 0x02  // high + low lane.
-    vpaddd     ymm0, ymm0, ymm1
-    vmovd      eax, xmm0
-    vzeroupper
-    ret
-  }
-}
-#endif  // _MSC_VER >= 1700
-
-#define HAS_HASHDJB2_SSE41
-static uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 };  // 33 ^ 16
-static uvec32 kHashMul0 = {
-  0x0c3525e1,  // 33 ^ 15
-  0xa3476dc1,  // 33 ^ 14
-  0x3b4039a1,  // 33 ^ 13
-  0x4f5f0981,  // 33 ^ 12
-};
-static uvec32 kHashMul1 = {
-  0x30f35d61,  // 33 ^ 11
-  0x855cb541,  // 33 ^ 10
-  0x040a9121,  // 33 ^ 9
-  0x747c7101,  // 33 ^ 8
-};
-static uvec32 kHashMul2 = {
-  0xec41d4e1,  // 33 ^ 7
-  0x4cfa3cc1,  // 33 ^ 6
-  0x025528a1,  // 33 ^ 5
-  0x00121881,  // 33 ^ 4
-};
-static uvec32 kHashMul3 = {
-  0x00008c61,  // 33 ^ 3
-  0x00000441,  // 33 ^ 2
-  0x00000021,  // 33 ^ 1
-  0x00000001,  // 33 ^ 0
-};
-
-// 27: 66 0F 38 40 C6     pmulld      xmm0,xmm6
-// 44: 66 0F 38 40 DD     pmulld      xmm3,xmm5
-// 59: 66 0F 38 40 E5     pmulld      xmm4,xmm5
-// 72: 66 0F 38 40 D5     pmulld      xmm2,xmm5
-// 83: 66 0F 38 40 CD     pmulld      xmm1,xmm5
-#define pmulld(reg) _asm _emit 0x66 _asm _emit 0x0F _asm _emit 0x38 \
-    _asm _emit 0x40 _asm _emit reg
-
-__declspec(naked) __declspec(align(16))
-uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
-  __asm {
-    mov        eax, [esp + 4]    // src
-    mov        ecx, [esp + 8]    // count
-    movd       xmm0, [esp + 12]  // seed
-
-    pxor       xmm7, xmm7        // constant 0 for unpck
-    movdqa     xmm6, kHash16x33
-
-    align      4
-  wloop:
-    movdqu     xmm1, [eax]       // src[0-15]
-    lea        eax, [eax + 16]
-    pmulld(0xc6)                 // pmulld      xmm0,xmm6  hash *= 33 ^ 16
-    movdqa     xmm5, kHashMul0
-    movdqa     xmm2, xmm1
-    punpcklbw  xmm2, xmm7        // src[0-7]
-    movdqa     xmm3, xmm2
-    punpcklwd  xmm3, xmm7        // src[0-3]
-    pmulld(0xdd)                 // pmulld     xmm3, xmm5
-    movdqa     xmm5, kHashMul1
-    movdqa     xmm4, xmm2
-    punpckhwd  xmm4, xmm7        // src[4-7]
-    pmulld(0xe5)                 // pmulld     xmm4, xmm5
-    movdqa     xmm5, kHashMul2
-    punpckhbw  xmm1, xmm7        // src[8-15]
-    movdqa     xmm2, xmm1
-    punpcklwd  xmm2, xmm7        // src[8-11]
-    pmulld(0xd5)                 // pmulld     xmm2, xmm5
-    movdqa     xmm5, kHashMul3
-    punpckhwd  xmm1, xmm7        // src[12-15]
-    pmulld(0xcd)                 // pmulld     xmm1, xmm5
-    paddd      xmm3, xmm4        // add 16 results
-    paddd      xmm1, xmm2
-    sub        ecx, 16
-    paddd      xmm1, xmm3
-
-    pshufd     xmm2, xmm1, 0x0e  // upper 2 dwords
-    paddd      xmm1, xmm2
-    pshufd     xmm2, xmm1, 0x01
-    paddd      xmm1, xmm2
-    paddd      xmm0, xmm1
-    jg         wloop
-
-    movd       eax, xmm0         // return hash
-    ret
-  }
-}
-
-// Visual C 2012 required for AVX2.
-#if _MSC_VER >= 1700
-__declspec(naked) __declspec(align(16))
-uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) {
-  __asm {
-    mov        eax, [esp + 4]    // src
-    mov        ecx, [esp + 8]    // count
-    movd       xmm0, [esp + 12]  // seed
-    movdqa     xmm6, kHash16x33
-
-    align      4
-  wloop:
-    vpmovzxbd  xmm3, dword ptr [eax]  // src[0-3]
-    pmulld     xmm0, xmm6  // hash *= 33 ^ 16
-    vpmovzxbd  xmm4, dword ptr [eax + 4]  // src[4-7]
-    pmulld     xmm3, kHashMul0
-    vpmovzxbd  xmm2, dword ptr [eax + 8]  // src[8-11]
-    pmulld     xmm4, kHashMul1
-    vpmovzxbd  xmm1, dword ptr [eax + 12]  // src[12-15]
-    pmulld     xmm2, kHashMul2
-    lea        eax, [eax + 16]
-    pmulld     xmm1, kHashMul3
-    paddd      xmm3, xmm4        // add 16 results
-    paddd      xmm1, xmm2
-    sub        ecx, 16
-    paddd      xmm1, xmm3
-    pshufd     xmm2, xmm1, 0x0e  // upper 2 dwords
-    paddd      xmm1, xmm2
-    pshufd     xmm2, xmm1, 0x01
-    paddd      xmm1, xmm2
-    paddd      xmm0, xmm1
-    jg         wloop
-
-    movd       eax, xmm0         // return hash
-    ret
-  }
-}
-#endif  // _MSC_VER >= 1700
-
-#endif  // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
-
-#ifdef __cplusplus
-}  // extern "C"
-}  // namespace libyuv
-#endif
+/*
+ *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/basic_types.h"
+#include "libyuv/row.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
+
+__declspec(naked) __declspec(align(16))
+uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
+  __asm {
+    mov        eax, [esp + 4]    // src_a
+    mov        edx, [esp + 8]    // src_b
+    mov        ecx, [esp + 12]   // count
+    pxor       xmm0, xmm0
+    pxor       xmm5, xmm5
+
+    align      4
+  wloop:
+    movdqa     xmm1, [eax]
+    lea        eax,  [eax + 16]
+    movdqa     xmm2, [edx]
+    lea        edx,  [edx + 16]
+    sub        ecx, 16
+    movdqa     xmm3, xmm1  // abs trick
+    psubusb    xmm1, xmm2
+    psubusb    xmm2, xmm3
+    por        xmm1, xmm2
+    movdqa     xmm2, xmm1
+    punpcklbw  xmm1, xmm5
+    punpckhbw  xmm2, xmm5
+    pmaddwd    xmm1, xmm1
+    pmaddwd    xmm2, xmm2
+    paddd      xmm0, xmm1
+    paddd      xmm0, xmm2
+    jg         wloop
+
+    pshufd     xmm1, xmm0, 0xee
+    paddd      xmm0, xmm1
+    pshufd     xmm1, xmm0, 0x01
+    paddd      xmm0, xmm1
+    movd       eax, xmm0
+    ret
+  }
+}
+
+// Visual C 2012 required for AVX2.
+#if _MSC_VER >= 1700
+// C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX.
+#pragma warning(disable: 4752)
+__declspec(naked) __declspec(align(16))
+uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) {
+  __asm {
+    mov        eax, [esp + 4]    // src_a
+    mov        edx, [esp + 8]    // src_b
+    mov        ecx, [esp + 12]   // count
+    vpxor      ymm0, ymm0, ymm0  // sum
+    vpxor      ymm5, ymm5, ymm5  // constant 0 for unpck
+    sub        edx, eax
+
+    align      4
+  wloop:
+    vmovdqu    ymm1, [eax]
+    vmovdqu    ymm2, [eax + edx]
+    lea        eax,  [eax + 32]
+    sub        ecx, 32
+    vpsubusb   ymm3, ymm1, ymm2  // abs difference trick
+    vpsubusb   ymm2, ymm2, ymm1
+    vpor       ymm1, ymm2, ymm3
+    vpunpcklbw ymm2, ymm1, ymm5  // u16.  mutates order.
+    vpunpckhbw ymm1, ymm1, ymm5
+    vpmaddwd   ymm2, ymm2, ymm2  // square + hadd to u32.
+    vpmaddwd   ymm1, ymm1, ymm1
+    vpaddd     ymm0, ymm0, ymm1
+    vpaddd     ymm0, ymm0, ymm2
+    jg         wloop
+
+    vpshufd    ymm1, ymm0, 0xee  // 3, 2 + 1, 0 both lanes.
+    vpaddd     ymm0, ymm0, ymm1
+    vpshufd    ymm1, ymm0, 0x01  // 1 + 0 both lanes.
+    vpaddd     ymm0, ymm0, ymm1
+    vpermq     ymm1, ymm0, 0x02  // high + low lane.
+    vpaddd     ymm0, ymm0, ymm1
+    vmovd      eax, xmm0
+    vzeroupper
+    ret
+  }
+}
+#endif  // _MSC_VER >= 1700
+
+#define HAS_HASHDJB2_SSE41
+static uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 };  // 33 ^ 16
+static uvec32 kHashMul0 = {
+  0x0c3525e1,  // 33 ^ 15
+  0xa3476dc1,  // 33 ^ 14
+  0x3b4039a1,  // 33 ^ 13
+  0x4f5f0981,  // 33 ^ 12
+};
+static uvec32 kHashMul1 = {
+  0x30f35d61,  // 33 ^ 11
+  0x855cb541,  // 33 ^ 10
+  0x040a9121,  // 33 ^ 9
+  0x747c7101,  // 33 ^ 8
+};
+static uvec32 kHashMul2 = {
+  0xec41d4e1,  // 33 ^ 7
+  0x4cfa3cc1,  // 33 ^ 6
+  0x025528a1,  // 33 ^ 5
+  0x00121881,  // 33 ^ 4
+};
+static uvec32 kHashMul3 = {
+  0x00008c61,  // 33 ^ 3
+  0x00000441,  // 33 ^ 2
+  0x00000021,  // 33 ^ 1
+  0x00000001,  // 33 ^ 0
+};
+
+// 27: 66 0F 38 40 C6     pmulld      xmm0,xmm6
+// 44: 66 0F 38 40 DD     pmulld      xmm3,xmm5
+// 59: 66 0F 38 40 E5     pmulld      xmm4,xmm5
+// 72: 66 0F 38 40 D5     pmulld      xmm2,xmm5
+// 83: 66 0F 38 40 CD     pmulld      xmm1,xmm5
+#define pmulld(reg) _asm _emit 0x66 _asm _emit 0x0F _asm _emit 0x38 \
+    _asm _emit 0x40 _asm _emit reg
+
+__declspec(naked) __declspec(align(16))
+uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
+  __asm {
+    mov        eax, [esp + 4]    // src
+    mov        ecx, [esp + 8]    // count
+    movd       xmm0, [esp + 12]  // seed
+
+    pxor       xmm7, xmm7        // constant 0 for unpck
+    movdqa     xmm6, kHash16x33
+
+    align      4
+  wloop:
+    movdqu     xmm1, [eax]       // src[0-15]
+    lea        eax, [eax + 16]
+    pmulld(0xc6)                 // pmulld      xmm0,xmm6  hash *= 33 ^ 16
+    movdqa     xmm5, kHashMul0
+    movdqa     xmm2, xmm1
+    punpcklbw  xmm2, xmm7        // src[0-7]
+    movdqa     xmm3, xmm2
+    punpcklwd  xmm3, xmm7        // src[0-3]
+    pmulld(0xdd)                 // pmulld     xmm3, xmm5
+    movdqa     xmm5, kHashMul1
+    movdqa     xmm4, xmm2
+    punpckhwd  xmm4, xmm7        // src[4-7]
+    pmulld(0xe5)                 // pmulld     xmm4, xmm5
+    movdqa     xmm5, kHashMul2
+    punpckhbw  xmm1, xmm7        // src[8-15]
+    movdqa     xmm2, xmm1
+    punpcklwd  xmm2, xmm7        // src[8-11]
+    pmulld(0xd5)                 // pmulld     xmm2, xmm5
+    movdqa     xmm5, kHashMul3
+    punpckhwd  xmm1, xmm7        // src[12-15]
+    pmulld(0xcd)                 // pmulld     xmm1, xmm5
+    paddd      xmm3, xmm4        // add 16 results
+    paddd      xmm1, xmm2
+    sub        ecx, 16
+    paddd      xmm1, xmm3
+
+    pshufd     xmm2, xmm1, 0x0e  // upper 2 dwords
+    paddd      xmm1, xmm2
+    pshufd     xmm2, xmm1, 0x01
+    paddd      xmm1, xmm2
+    paddd      xmm0, xmm1
+    jg         wloop
+
+    movd       eax, xmm0         // return hash
+    ret
+  }
+}
+
+// Visual C 2012 required for AVX2.
+#if _MSC_VER >= 1700
+__declspec(naked) __declspec(align(16))
+uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) {
+  __asm {
+    mov        eax, [esp + 4]    // src
+    mov        ecx, [esp + 8]    // count
+    movd       xmm0, [esp + 12]  // seed
+    movdqa     xmm6, kHash16x33
+
+    align      4
+  wloop:
+    vpmovzxbd  xmm3, dword ptr [eax]  // src[0-3]
+    pmulld     xmm0, xmm6  // hash *= 33 ^ 16
+    vpmovzxbd  xmm4, dword ptr [eax + 4]  // src[4-7]
+    pmulld     xmm3, kHashMul0
+    vpmovzxbd  xmm2, dword ptr [eax + 8]  // src[8-11]
+    pmulld     xmm4, kHashMul1
+    vpmovzxbd  xmm1, dword ptr [eax + 12]  // src[12-15]
+    pmulld     xmm2, kHashMul2
+    lea        eax, [eax + 16]
+    pmulld     xmm1, kHashMul3
+    paddd      xmm3, xmm4        // add 16 results
+    paddd      xmm1, xmm2
+    sub        ecx, 16
+    paddd      xmm1, xmm3
+    pshufd     xmm2, xmm1, 0x0e  // upper 2 dwords
+    paddd      xmm1, xmm2
+    pshufd     xmm2, xmm1, 0x01
+    paddd      xmm1, xmm2
+    paddd      xmm0, xmm1
+    jg         wloop
+
+    movd       eax, xmm0         // return hash
+    ret
+  }
+}
+#endif  // _MSC_VER >= 1700
+
+#endif  // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
diff --git a/source/convert_from_argb.cc b/source/convert_from_argb.cc
index c729dab28..9d5752cbb 100644
--- a/source/convert_from_argb.cc
+++ b/source/convert_from_argb.cc
@@ -49,17 +49,17 @@ int ARGBToI444(const uint8* src_argb, int src_stride_argb,
       ARGBToYRow_C;
   void (*ARGBToUV444Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
                          int pix) = ARGBToUV444Row_C;
-#if defined(HAS_ARGBTOUV444ROW_SSSE3)
-    if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
-      ARGBToUV444Row = ARGBToUV444Row_Any_SSSE3;
-      if (IS_ALIGNED(width, 16)) {
-        ARGBToUV444Row = ARGBToUV444Row_Unaligned_SSSE3;
-        if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
-          ARGBToUV444Row = ARGBToUV444Row_SSSE3;
-        }
-      }
-  }
-#endif
+#if defined(HAS_ARGBTOUV444ROW_SSSE3)
+    if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+      ARGBToUV444Row = ARGBToUV444Row_Any_SSSE3;
+      if (IS_ALIGNED(width, 16)) {
+        ARGBToUV444Row = ARGBToUV444Row_Unaligned_SSSE3;
+        if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
+          ARGBToUV444Row = ARGBToUV444Row_SSSE3;
+        }
+      }
+  }
+#endif
 #if defined(HAS_ARGBTOYROW_SSSE3)
   if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
     ARGBToYRow = ARGBToYRow_Any_SSSE3;
diff --git a/source/row_x86.asm b/source/row_x86.asm
index 56188f068..0cb326f8e 100644
--- a/source/row_x86.asm
+++ b/source/row_x86.asm
@@ -28,7 +28,7 @@ cglobal %1ToYRow%3, 3, 3, 3, src_yuy2, dst_y, pix
     psrlw      m2, m2, 8
 %endif
 
-    align      4
+    ALIGN      4
 .convertloop:
     mov%2      m0, [src_yuy2q]
     mov%2      m1, [src_yuy2q + mmsize]
@@ -74,7 +74,7 @@ cglobal SplitUVRow%2, 4, 4, 5, src_uv, dst_u, dst_v, pix
     psrlw      m4, m4, 8
     sub        dst_vq, dst_uq
 
-    align      4
+    ALIGN      4
 .convertloop:
     mov%1      m0, [src_uvq]
     mov%1      m1, [src_uvq + mmsize]
@@ -113,7 +113,7 @@ SplitUVRow a,
 cglobal MergeUVRow_%2, 4, 4, 3, src_u, src_v, dst_uv, pix
     sub        src_vq, src_uq
 
-    align      4
+    ALIGN      4
 .convertloop:
     mov%1      m0, [src_uq]
     mov%1      m1, [src_vq]
diff --git a/unit_test/convert_test.cc b/unit_test/convert_test.cc
index f35542531..10bdd2c8c 100644
--- a/unit_test/convert_test.cc
+++ b/unit_test/convert_test.cc
@@ -1,995 +1,995 @@
-/*
- *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <stdlib.h>
-#include <time.h>
-
-#include "libyuv/compare.h"
-#include "libyuv/convert.h"
-#include "libyuv/convert_argb.h"
-#include "libyuv/convert_from.h"
-#include "libyuv/convert_from_argb.h"
-#include "libyuv/cpu_id.h"
-#include "libyuv/format_conversion.h"
-#ifdef HAVE_JPEG
-#include "libyuv/mjpeg_decoder.h"
-#endif
-#include "libyuv/planar_functions.h"
-#include "libyuv/rotate.h"
-#include "../unit_test/unit_test.h"
-
-#if defined(_MSC_VER)
-#define SIMD_ALIGNED(var) __declspec(align(16)) var
-#else  // __GNUC__
-#define SIMD_ALIGNED(var) var __attribute__((aligned(16)))
-#endif
-
-namespace libyuv {
-
-#define SUBSAMPLE(v, a) ((((v) + (a) - 1)) / (a))
-
-#define TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,           \
-                       FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, OFF)   \
-TEST_F(libyuvTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) {                        \
-  const int kWidth = ((W1280) > 0) ? (W1280) : 1;                              \
-  const int kHeight = benchmark_height_;                                       \
-  align_buffer_64(src_y, kWidth * kHeight + OFF);                              \
-  align_buffer_64(src_u,                                                       \
-                  SUBSAMPLE(kWidth, SRC_SUBSAMP_X) *                           \
-                  SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + OFF);                    \
-  align_buffer_64(src_v,                                                       \
-                  SUBSAMPLE(kWidth, SRC_SUBSAMP_X) *                           \
-                  SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + OFF);                    \
-  align_buffer_64(dst_y_c, kWidth * kHeight);                                  \
-  align_buffer_64(dst_u_c,                                                     \
-                  SUBSAMPLE(kWidth, SUBSAMP_X) *                               \
-                  SUBSAMPLE(kHeight, SUBSAMP_Y));                              \
-  align_buffer_64(dst_v_c,                                                     \
-                  SUBSAMPLE(kWidth, SUBSAMP_X) *                               \
-                  SUBSAMPLE(kHeight, SUBSAMP_Y));                              \
-  align_buffer_64(dst_y_opt, kWidth * kHeight);                                \
-  align_buffer_64(dst_u_opt,                                                   \
-                  SUBSAMPLE(kWidth, SUBSAMP_X) *                               \
-                  SUBSAMPLE(kHeight, SUBSAMP_Y));                              \
-  align_buffer_64(dst_v_opt,                                                   \
-                  SUBSAMPLE(kWidth, SUBSAMP_X) *                               \
-                  SUBSAMPLE(kHeight, SUBSAMP_Y));                              \
-  srandom(time(NULL));                                                         \
-  for (int i = 0; i < kHeight; ++i)                                            \
-    for (int j = 0; j < kWidth; ++j)                                           \
-      src_y[(i * kWidth) + j + OFF] = (random() & 0xff);                       \
-  for (int i = 0; i < SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); ++i) {                \
-    for (int j = 0; j < SUBSAMPLE(kWidth, SRC_SUBSAMP_X); ++j) {               \
-      src_u[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] =                \
-          (random() & 0xff);                                                   \
-      src_v[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] =                \
-          (random() & 0xff);                                                   \
-    }                                                                          \
-  }                                                                            \
-  MaskCpuFlags(0);                                                             \
-  SRC_FMT_PLANAR##To##FMT_PLANAR(src_y + OFF, kWidth,                          \
-                                 src_u + OFF,                                  \
-                                 SUBSAMPLE(kWidth, SRC_SUBSAMP_X),             \
-                                 src_v + OFF,                                  \
-                                 SUBSAMPLE(kWidth, SRC_SUBSAMP_X),             \
-                                 dst_y_c, kWidth,                              \
-                                 dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X),        \
-                                 dst_v_c, SUBSAMPLE(kWidth, SUBSAMP_X),        \
-                                 kWidth, NEG kHeight);                         \
-  MaskCpuFlags(-1);                                                            \
-  for (int i = 0; i < benchmark_iterations_; ++i) {                            \
-    SRC_FMT_PLANAR##To##FMT_PLANAR(src_y + OFF, kWidth,                        \
-                                   src_u + OFF,                                \
-                                       SUBSAMPLE(kWidth, SRC_SUBSAMP_X),       \
-                                   src_v + OFF,                                \
-                                       SUBSAMPLE(kWidth, SRC_SUBSAMP_X),       \
-                                   dst_y_opt, kWidth,                          \
-                                   dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X),    \
-                                   dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X),    \
-                                   kWidth, NEG kHeight);                       \
-  }                                                                            \
-  int max_diff = 0;                                                            \
-  for (int i = 0; i < kHeight; ++i) {                                          \
-    for (int j = 0; j < kWidth; ++j) {                                         \
-      int abs_diff =                                                           \
-          abs(static_cast<int>(dst_y_c[i * kWidth + j]) -                      \
-              static_cast<int>(dst_y_opt[i * kWidth + j]));                    \
-      if (abs_diff > max_diff) {                                               \
-        max_diff = abs_diff;                                                   \
-      }                                                                        \
-    }                                                                          \
-  }                                                                            \
-  EXPECT_LE(max_diff, 0);                                                      \
-  for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) {                    \
-    for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) {                   \
-      int abs_diff =                                                           \
-          abs(static_cast<int>(dst_u_c[i *                                     \
-                               SUBSAMPLE(kWidth, SUBSAMP_X) + j]) -            \
-              static_cast<int>(dst_u_opt[i *                                   \
-                               SUBSAMPLE(kWidth, SUBSAMP_X) + j]));            \
-      if (abs_diff > max_diff) {                                               \
-        max_diff = abs_diff;                                                   \
-      }                                                                        \
-    }                                                                          \
-  }                                                                            \
-  EXPECT_LE(max_diff, 3);                                                      \
-  for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) {                    \
-    for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) {                   \
-      int abs_diff =                                                           \
-          abs(static_cast<int>(dst_v_c[i *                                     \
-                               SUBSAMPLE(kWidth, SUBSAMP_X) + j]) -            \
-              static_cast<int>(dst_v_opt[i *                                   \
-                               SUBSAMPLE(kWidth, SUBSAMP_X) + j]));            \
-      if (abs_diff > max_diff) {                                               \
-        max_diff = abs_diff;                                                   \
-      }                                                                        \
-    }                                                                          \
-  }                                                                            \
-  EXPECT_LE(max_diff, 3);                                                      \
-  free_aligned_buffer_64(dst_y_c)                                              \
-  free_aligned_buffer_64(dst_u_c)                                              \
-  free_aligned_buffer_64(dst_v_c)                                              \
-  free_aligned_buffer_64(dst_y_opt)                                            \
-  free_aligned_buffer_64(dst_u_opt)                                            \
-  free_aligned_buffer_64(dst_v_opt)                                            \
-  free_aligned_buffer_64(src_y)                                                \
-  free_aligned_buffer_64(src_u)                                                \
-  free_aligned_buffer_64(src_v)                                                \
-}
-
-#define TESTPLANARTOP(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,            \
-                      FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y)                        \
-    TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,               \
-                   FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,                           \
-                   benchmark_width_ - 4, _Any, +, 0)                           \
-    TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,               \
-                   FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,                           \
-                   benchmark_width_, _Unaligned, +, 1)                         \
-    TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,               \
-                   FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,                           \
-                   benchmark_width_, _Invert, -, 0)                            \
-    TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,               \
-                   FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,                           \
-                   benchmark_width_, _Opt, +, 0)
-
-TESTPLANARTOP(I420, 2, 2, I420, 2, 2)
-TESTPLANARTOP(I422, 2, 1, I420, 2, 2)
-TESTPLANARTOP(I444, 1, 1, I420, 2, 2)
-TESTPLANARTOP(I411, 4, 1, I420, 2, 2)
-TESTPLANARTOP(I420, 2, 2, I422, 2, 1)
-TESTPLANARTOP(I420, 2, 2, I444, 1, 1)
-TESTPLANARTOP(I420, 2, 2, I411, 4, 1)
-TESTPLANARTOP(I420, 2, 2, I420Mirror, 2, 2)
-TESTPLANARTOP(I422, 2, 1, I422, 2, 1)
-TESTPLANARTOP(I444, 1, 1, I444, 1, 1)
-
-#define TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,          \
-                       FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, OFF)   \
-TEST_F(libyuvTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) {                        \
-  const int kWidth = ((W1280) > 0) ? (W1280) : 1;                              \
-  const int kHeight = benchmark_height_;                                       \
-  align_buffer_64(src_y, kWidth * kHeight + OFF);                              \
-  align_buffer_64(src_u,                                                       \
-                  SUBSAMPLE(kWidth, SRC_SUBSAMP_X) *                           \
-                  SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + OFF);                    \
-  align_buffer_64(src_v,                                                       \
-                  SUBSAMPLE(kWidth, SRC_SUBSAMP_X) *                           \
-                  SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + OFF);                    \
-  align_buffer_64(dst_y_c, kWidth * kHeight);                                  \
-  align_buffer_64(dst_uv_c, SUBSAMPLE(kWidth * 2, SUBSAMP_X) *                 \
-                  SUBSAMPLE(kHeight, SUBSAMP_Y));                              \
-  align_buffer_64(dst_y_opt, kWidth * kHeight);                                \
-  align_buffer_64(dst_uv_opt, SUBSAMPLE(kWidth * 2, SUBSAMP_X) *               \
-                  SUBSAMPLE(kHeight, SUBSAMP_Y));                              \
-  srandom(time(NULL));                                                         \
-  for (int i = 0; i < kHeight; ++i)                                            \
-    for (int j = 0; j < kWidth; ++j)                                           \
-      src_y[(i * kWidth) + j + OFF] = (random() & 0xff);                       \
-  for (int i = 0; i < SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); ++i) {                \
-    for (int j = 0; j < SUBSAMPLE(kWidth, SRC_SUBSAMP_X); ++j) {               \
-      src_u[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] =                \
-          (random() & 0xff);                                                   \
-      src_v[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] =                \
-          (random() & 0xff);                                                   \
-    }                                                                          \
-  }                                                                            \
-  MaskCpuFlags(0);                                                             \
-  SRC_FMT_PLANAR##To##FMT_PLANAR(src_y + OFF, kWidth,                          \
-                                 src_u + OFF,                                  \
-                                 SUBSAMPLE(kWidth, SRC_SUBSAMP_X),             \
-                                 src_v + OFF,                                  \
-                                 SUBSAMPLE(kWidth, SRC_SUBSAMP_X),             \
-                                 dst_y_c, kWidth,                              \
-                                 dst_uv_c, SUBSAMPLE(kWidth * 2, SUBSAMP_X),   \
-                                 kWidth, NEG kHeight);                         \
-  MaskCpuFlags(-1);                                                            \
-  for (int i = 0; i < benchmark_iterations_; ++i) {                            \
-    SRC_FMT_PLANAR##To##FMT_PLANAR(src_y + OFF, kWidth,                        \
-                                   src_u + OFF,                                \
-                                   SUBSAMPLE(kWidth, SRC_SUBSAMP_X),           \
-                                   src_v + OFF,                                \
-                                   SUBSAMPLE(kWidth, SRC_SUBSAMP_X),           \
-                                   dst_y_opt, kWidth,                          \
-                                   dst_uv_opt,                                 \
-                                   SUBSAMPLE(kWidth * 2, SUBSAMP_X),           \
-                                   kWidth, NEG kHeight);                       \
-  }                                                                            \
-  int max_diff = 0;                                                            \
-  for (int i = 0; i < kHeight; ++i) {                                          \
-    for (int j = 0; j < kWidth; ++j) {                                         \
-      int abs_diff =                                                           \
-          abs(static_cast<int>(dst_y_c[i * kWidth + j]) -                      \
-              static_cast<int>(dst_y_opt[i * kWidth + j]));                    \
-      if (abs_diff > max_diff) {                                               \
-        max_diff = abs_diff;                                                   \
-      }                                                                        \
-    }                                                                          \
-  }                                                                            \
-  EXPECT_LE(max_diff, 1);                                                      \
-  for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) {                    \
-    for (int j = 0; j < SUBSAMPLE(kWidth * 2, SUBSAMP_X); ++j) {               \
-      int abs_diff =                                                           \
-          abs(static_cast<int>(dst_uv_c[i *                                    \
-                               SUBSAMPLE(kWidth * 2, SUBSAMP_X) + j]) -        \
-              static_cast<int>(dst_uv_opt[i *                                  \
-                               SUBSAMPLE(kWidth * 2, SUBSAMP_X) + j]));        \
-      if (abs_diff > max_diff) {                                               \
-        max_diff = abs_diff;                                                   \
-      }                                                                        \
-    }                                                                          \
-  }                                                                            \
-  EXPECT_LE(max_diff, 1);                                                      \
-  free_aligned_buffer_64(dst_y_c)                                              \
-  free_aligned_buffer_64(dst_uv_c)                                             \
-  free_aligned_buffer_64(dst_y_opt)                                            \
-  free_aligned_buffer_64(dst_uv_opt)                                           \
-  free_aligned_buffer_64(src_y)                                                \
-  free_aligned_buffer_64(src_u)                                                \
-  free_aligned_buffer_64(src_v)                                                \
-}
-
-#define TESTPLANARTOBP(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,           \
-                       FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y)                       \
-    TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,              \
-                    FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,                          \
-                    benchmark_width_ - 4, _Any, +, 0)                          \
-    TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,              \
-                    FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,                          \
-                    benchmark_width_, _Unaligned, +, 1)                        \
-    TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,              \
-                    FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,                          \
-                    benchmark_width_, _Invert, -, 0)                           \
-    TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,              \
-                    FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,                          \
-                    benchmark_width_, _Opt, +, 0)
-
-TESTPLANARTOBP(I420, 2, 2, NV12, 2, 2)
-TESTPLANARTOBP(I420, 2, 2, NV21, 2, 2)
-
-#define TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,         \
-                         FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, OFF) \
-TEST_F(libyuvTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) {                        \
-  const int kWidth = ((W1280) > 0) ? (W1280) : 1;                              \
-  const int kHeight = benchmark_height_;                                       \
-  align_buffer_64(src_y, kWidth * kHeight + OFF);                              \
-  align_buffer_64(src_uv, 2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X) *               \
-                  SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + OFF);                    \
-  align_buffer_64(dst_y_c, kWidth * kHeight);                                  \
-  align_buffer_64(dst_u_c,                                                     \
-                  SUBSAMPLE(kWidth, SUBSAMP_X) *                               \
-                  SUBSAMPLE(kHeight, SUBSAMP_Y));                              \
-  align_buffer_64(dst_v_c,                                                     \
-                  SUBSAMPLE(kWidth, SUBSAMP_X) *                               \
-                  SUBSAMPLE(kHeight, SUBSAMP_Y));                              \
-  align_buffer_64(dst_y_opt, kWidth * kHeight);                                \
-  align_buffer_64(dst_u_opt,                                                   \
-                  SUBSAMPLE(kWidth, SUBSAMP_X) *                               \
-                  SUBSAMPLE(kHeight, SUBSAMP_Y));                              \
-  align_buffer_64(dst_v_opt,                                                   \
-                  SUBSAMPLE(kWidth, SUBSAMP_X) *                               \
-                  SUBSAMPLE(kHeight, SUBSAMP_Y));                              \
-  srandom(time(NULL));                                                         \
-  for (int i = 0; i < kHeight; ++i)                                            \
-    for (int j = 0; j < kWidth; ++j)                                           \
-      src_y[(i * kWidth) + j + OFF] = (random() & 0xff);                       \
-  for (int i = 0; i < SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); ++i) {                \
-    for (int j = 0; j < 2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X); ++j) {           \
-      src_uv[(i * 2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] =           \
-          (random() & 0xff);                                                   \
-    }                                                                          \
-  }                                                                            \
-  MaskCpuFlags(0);                                                             \
-  SRC_FMT_PLANAR##To##FMT_PLANAR(src_y + OFF, kWidth,                          \
-                                 src_uv + OFF,                                 \
-                                 2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X),         \
-                                 dst_y_c, kWidth,                              \
-                                 dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X),        \
-                                 dst_v_c, SUBSAMPLE(kWidth, SUBSAMP_X),        \
-                                 kWidth, NEG kHeight);                         \
-  MaskCpuFlags(-1);                                                            \
-  for (int i = 0; i < benchmark_iterations_; ++i) {                            \
-    SRC_FMT_PLANAR##To##FMT_PLANAR(src_y + OFF, kWidth,                        \
-                                   src_uv + OFF,                               \
-                                   2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X),       \
-                                   dst_y_opt, kWidth,                          \
-                                   dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X),    \
-                                   dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X),    \
-                                   kWidth, NEG kHeight);                       \
-  }                                                                            \
-  int max_diff = 0;                                                            \
-  for (int i = 0; i < kHeight; ++i) {                                          \
-    for (int j = 0; j < kWidth; ++j) {                                         \
-      int abs_diff =                                                           \
-          abs(static_cast<int>(dst_y_c[i * kWidth + j]) -                      \
-              static_cast<int>(dst_y_opt[i * kWidth + j]));                    \
-      if (abs_diff > max_diff) {                                               \
-        max_diff = abs_diff;                                                   \
-      }                                                                        \
-    }                                                                          \
-  }                                                                            \
-  EXPECT_LE(max_diff, 1);                                                      \
-  for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) {                    \
-    for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) {                   \
-      int abs_diff =                                                           \
-          abs(static_cast<int>(dst_u_c[i *                                     \
-                               SUBSAMPLE(kWidth, SUBSAMP_X) + j]) -            \
-              static_cast<int>(dst_u_opt[i *                                   \
-                               SUBSAMPLE(kWidth, SUBSAMP_X) + j]));            \
-      if (abs_diff > max_diff) {                                               \
-        max_diff = abs_diff;                                                   \
-      }                                                                        \
-    }                                                                          \
-  }                                                                            \
-  EXPECT_LE(max_diff, 1);                                                      \
-  for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) {                    \
-    for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) {                   \
-      int abs_diff =                                                           \
-          abs(static_cast<int>(dst_v_c[i *                                     \
-                               SUBSAMPLE(kWidth, SUBSAMP_X) + j]) -            \
-              static_cast<int>(dst_v_opt[i *                                   \
-                               SUBSAMPLE(kWidth, SUBSAMP_X) + j]));            \
-      if (abs_diff > max_diff) {                                               \
-        max_diff = abs_diff;                                                   \
-      }                                                                        \
-    }                                                                          \
-  }                                                                            \
-  EXPECT_LE(max_diff, 1);                                                      \
-  free_aligned_buffer_64(dst_y_c)                                              \
-  free_aligned_buffer_64(dst_u_c)                                              \
-  free_aligned_buffer_64(dst_v_c)                                              \
-  free_aligned_buffer_64(dst_y_opt)                                            \
-  free_aligned_buffer_64(dst_u_opt)                                            \
-  free_aligned_buffer_64(dst_v_opt)                                            \
-  free_aligned_buffer_64(src_y)                                                \
-  free_aligned_buffer_64(src_uv)                                               \
-}
-
-#define TESTBIPLANARTOP(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,          \
-                        FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y)                      \
-    TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,             \
-                     FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,                         \
-                     benchmark_width_ - 4, _Any, +, 0)                         \
-    TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,             \
-                     FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,                         \
-                     benchmark_width_, _Unaligned, +, 1)                       \
-    TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,             \
-                     FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,                         \
-                     benchmark_width_, _Invert, -, 0)                          \
-    TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,             \
-                     FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,                         \
-                     benchmark_width_, _Opt, +, 0)
-
-TESTBIPLANARTOP(NV12, 2, 2, I420, 2, 2)
-TESTBIPLANARTOP(NV21, 2, 2, I420, 2, 2)
-
-#define ALIGNINT(V, ALIGN) (((V) + (ALIGN) - 1) / (ALIGN) * (ALIGN))
-
-#define TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,  \
-                       YALIGN, W1280, DIFF, N, NEG, OFF, FMT_C, BPP_C)         \
-TEST_F(libyuvTest, FMT_PLANAR##To##FMT_B##N) {                                 \
-  const int kWidth = ((W1280) > 0) ? (W1280) : 1;                              \
-  const int kHeight = ALIGNINT(benchmark_height_, YALIGN);                     \
-  const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN);                        \
-  const int kSizeUV =                                                          \
-    SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y);              \
-  align_buffer_64(src_y, kWidth * kHeight + OFF);                              \
-  align_buffer_64(src_u, kSizeUV + OFF);                                       \
-  align_buffer_64(src_v, kSizeUV + OFF);                                       \
-  align_buffer_64(dst_argb_c, kStrideB * kHeight);                             \
-  align_buffer_64(dst_argb_opt, kStrideB * kHeight);                           \
-  memset(dst_argb_c, 0, kStrideB * kHeight);                                   \
-  memset(dst_argb_opt, 0, kStrideB * kHeight);                                 \
-  srandom(time(NULL));                                                         \
-  for (int i = 0; i < kWidth * kHeight; ++i) {                                 \
-    src_y[i + OFF] = (random() & 0xff);                                        \
-  }                                                                            \
-  for (int i = 0; i < kSizeUV; ++i) {                                          \
-    src_u[i + OFF] = (random() & 0xff);                                        \
-    src_v[i + OFF] = (random() & 0xff);                                        \
-  }                                                                            \
-  MaskCpuFlags(0);                                                             \
-  FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth,                                   \
-                        src_u + OFF, SUBSAMPLE(kWidth, SUBSAMP_X),             \
-                        src_v + OFF, SUBSAMPLE(kWidth, SUBSAMP_X),             \
-                        dst_argb_c, kStrideB,                                  \
-                        kWidth, NEG kHeight);                                  \
-  MaskCpuFlags(-1);                                                            \
-  for (int i = 0; i < benchmark_iterations_; ++i) {                            \
-    FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth,                                 \
-                          src_u + OFF, SUBSAMPLE(kWidth, SUBSAMP_X),           \
-                          src_v + OFF, SUBSAMPLE(kWidth, SUBSAMP_X),           \
-                          dst_argb_opt, kStrideB,                              \
-                          kWidth, NEG kHeight);                                \
-  }                                                                            \
-  int max_diff = 0;                                                            \
-  /* Convert to ARGB so 565 is expanded to bytes that can be compared. */      \
-  align_buffer_64(dst_argb32_c, kWidth * BPP_C  * kHeight);                    \
-  align_buffer_64(dst_argb32_opt, kWidth * BPP_C  * kHeight);                  \
-  memset(dst_argb32_c, 0, kWidth * BPP_C  * kHeight);                          \
-  memset(dst_argb32_opt, 0, kWidth * BPP_C  * kHeight);                        \
-  FMT_B##To##FMT_C(dst_argb_c, kStrideB,                                       \
-                   dst_argb32_c, kWidth * BPP_C ,                              \
-                   kWidth, kHeight);                                           \
-  FMT_B##To##FMT_C(dst_argb_opt, kStrideB,                                     \
-                   dst_argb32_opt, kWidth * BPP_C ,                            \
-                   kWidth, kHeight);                                           \
-  for (int i = 0; i < kWidth * BPP_C * kHeight; ++i) {                         \
-    int abs_diff =                                                             \
-        abs(static_cast<int>(dst_argb32_c[i]) -                                \
-            static_cast<int>(dst_argb32_opt[i]));                              \
-    if (abs_diff > max_diff) {                                                 \
-      max_diff = abs_diff;                                                     \
-    }                                                                          \
-  }                                                                            \
-  EXPECT_LE(max_diff, DIFF);                                                   \
-  free_aligned_buffer_64(src_y)                                                \
-  free_aligned_buffer_64(src_u)                                                \
-  free_aligned_buffer_64(src_v)                                                \
-  free_aligned_buffer_64(dst_argb_c)                                           \
-  free_aligned_buffer_64(dst_argb_opt)                                         \
-  free_aligned_buffer_64(dst_argb32_c)                                         \
-  free_aligned_buffer_64(dst_argb32_opt)                                       \
-}
-
-#define TESTPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,   \
-                      YALIGN, DIFF, FMT_C, BPP_C)                              \
-    TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
-        YALIGN, benchmark_width_ - 4, DIFF, _Any, +, 0, FMT_C, BPP_C)          \
-    TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
-        YALIGN, benchmark_width_, DIFF, _Unaligned, +, 1, FMT_C, BPP_C)        \
-    TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
-        YALIGN, benchmark_width_, DIFF, _Invert, -, 0, FMT_C, BPP_C)           \
-    TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
-        YALIGN, benchmark_width_, DIFF, _Opt, +, 0, FMT_C, BPP_C)
-
-// TODO(fbarchard): Make vertical alignment unnecessary on bayer.
-TESTPLANARTOB(I420, 2, 2, ARGB, 4, 4, 1, 2, ARGB, 4)
-TESTPLANARTOB(I420, 2, 2, BGRA, 4, 4, 1, 2, ARGB, 4)
-TESTPLANARTOB(I420, 2, 2, ABGR, 4, 4, 1, 2, ARGB, 4)
-TESTPLANARTOB(I420, 2, 2, RGBA, 4, 4, 1, 2, ARGB, 4)
-TESTPLANARTOB(I420, 2, 2, RAW, 3, 3, 1, 2, ARGB, 4)
-TESTPLANARTOB(I420, 2, 2, RGB24, 3, 3, 1, 2, ARGB, 4)
-TESTPLANARTOB(I420, 2, 2, RGB565, 2, 2, 1, 9, ARGB, 4)
-TESTPLANARTOB(I420, 2, 2, ARGB1555, 2, 2, 1, 9, ARGB, 4)
-TESTPLANARTOB(I420, 2, 2, ARGB4444, 2, 2, 1, 17, ARGB, 4)
-TESTPLANARTOB(I422, 2, 1, ARGB, 4, 4, 1, 2, ARGB, 4)
-TESTPLANARTOB(I422, 2, 1, BGRA, 4, 4, 1, 2, ARGB, 4)
-TESTPLANARTOB(I422, 2, 1, ABGR, 4, 4, 1, 2, ARGB, 4)
-TESTPLANARTOB(I422, 2, 1, RGBA, 4, 4, 1, 2, ARGB, 4)
-TESTPLANARTOB(I411, 4, 1, ARGB, 4, 4, 1, 2, ARGB, 4)
-TESTPLANARTOB(I444, 1, 1, ARGB, 4, 4, 1, 2, ARGB, 4)
-TESTPLANARTOB(I420, 2, 2, YUY2, 2, 4, 1, 1, ARGB, 4)
-TESTPLANARTOB(I420, 2, 2, UYVY, 2, 4, 1, 1, ARGB, 4)
-TESTPLANARTOB(I422, 2, 1, YUY2, 2, 4, 1, 0, ARGB, 4)
-TESTPLANARTOB(I422, 2, 1, UYVY, 2, 4, 1, 0, ARGB, 4)
-TESTPLANARTOB(I420, 2, 2, I400, 1, 1, 1, 0, ARGB, 4)
-TESTPLANARTOB(I420, 2, 2, BayerBGGR, 1, 2, 2, 2, ARGB, 4)
-TESTPLANARTOB(I420, 2, 2, BayerRGGB, 1, 2, 2, 2, ARGB, 4)
-TESTPLANARTOB(I420, 2, 2, BayerGBRG, 1, 2, 2, 2, ARGB, 4)
-TESTPLANARTOB(I420, 2, 2, BayerGRBG, 1, 2, 2, 2, ARGB, 4)
-
-#define TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B,       \
-                         W1280, DIFF, N, NEG, OFF)                             \
-TEST_F(libyuvTest, FMT_PLANAR##To##FMT_B##N) {                                 \
-  const int kWidth = ((W1280) > 0) ? (W1280) : 1;                              \
-  const int kHeight = benchmark_height_;                                       \
-  const int kStrideB = kWidth * BPP_B;                                         \
-  align_buffer_64(src_y, kWidth * kHeight + OFF);                              \
-  align_buffer_64(src_uv,                                                      \
-                  SUBSAMPLE(kWidth, SUBSAMP_X) *                               \
-                  SUBSAMPLE(kHeight, SUBSAMP_Y) * 2 + OFF);                    \
-  align_buffer_64(dst_argb_c, kStrideB * kHeight);                             \
-  align_buffer_64(dst_argb_opt, kStrideB * kHeight);                           \
-  srandom(time(NULL));                                                         \
-  for (int i = 0; i < kHeight; ++i)                                            \
-    for (int j = 0; j < kWidth; ++j)                                           \
-      src_y[(i * kWidth) + j + OFF] = (random() & 0xff);                       \
-  for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i)                      \
-    for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X) * 2; ++j) {               \
-      src_uv[(i * SUBSAMPLE(kWidth, SUBSAMP_X)) * 2 + j + OFF] =               \
-          (random() & 0xff);                                                   \
-    }                                                                          \
-  MaskCpuFlags(0);                                                             \
-  FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth,                                   \
-                        src_uv + OFF, SUBSAMPLE(kWidth, SUBSAMP_X) * 2,        \
-                        dst_argb_c, kWidth * BPP_B,                            \
-                        kWidth, NEG kHeight);                                  \
-  MaskCpuFlags(-1);                                                            \
-  for (int i = 0; i < benchmark_iterations_; ++i) {                            \
-    FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth,                                 \
-                          src_uv + OFF, SUBSAMPLE(kWidth, SUBSAMP_X) * 2,      \
-                          dst_argb_opt, kWidth * BPP_B,                        \
-                          kWidth, NEG kHeight);                                \
-  }                                                                            \
-  /* Convert to ARGB so 565 is expanded to bytes that can be compared. */      \
-  align_buffer_64(dst_argb32_c, kWidth * 4 * kHeight);                         \
-  align_buffer_64(dst_argb32_opt, kWidth * 4 * kHeight);                       \
-  memset(dst_argb32_c, 1, kWidth * 4 * kHeight);                               \
-  memset(dst_argb32_opt, 2, kWidth * 4 * kHeight);                             \
-  FMT_B##ToARGB(dst_argb_c, kStrideB,                                          \
-                dst_argb32_c, kWidth * 4,                                      \
-                kWidth, kHeight);                                              \
-  FMT_B##ToARGB(dst_argb_opt, kStrideB,                                        \
-                dst_argb32_opt, kWidth * 4,                                    \
-                kWidth, kHeight);                                              \
-  int max_diff = 0;                                                            \
-  for (int i = 0; i < kHeight; ++i) {                                          \
-    for (int j = 0; j < kWidth * 4; ++j) {                                     \
-      int abs_diff =                                                           \
-          abs(static_cast<int>(dst_argb32_c[i * kWidth * 4 + j]) -             \
-              static_cast<int>(dst_argb32_opt[i * kWidth * 4 + j]));           \
-      if (abs_diff > max_diff) {                                               \
-        max_diff = abs_diff;                                                   \
-      }                                                                        \
-    }                                                                          \
-  }                                                                            \
-  EXPECT_LE(max_diff, DIFF);                                                   \
-  free_aligned_buffer_64(src_y)                                                \
-  free_aligned_buffer_64(src_uv)                                               \
-  free_aligned_buffer_64(dst_argb_c)                                           \
-  free_aligned_buffer_64(dst_argb_opt)                                         \
-  free_aligned_buffer_64(dst_argb32_c)                                         \
-  free_aligned_buffer_64(dst_argb32_opt)                                       \
-}
-
-#define TESTBIPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, DIFF)  \
-    TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B,           \
-                     benchmark_width_ - 4, DIFF, _Any, +, 0)                   \
-    TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B,           \
-                     benchmark_width_, DIFF, _Unaligned, +, 1)                 \
-    TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B,           \
-                     benchmark_width_, DIFF, _Invert, -, 0)                    \
-    TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B,           \
-                     benchmark_width_, DIFF, _Opt, +, 0)
-
-TESTBIPLANARTOB(NV12, 2, 2, ARGB, 4, 2)
-TESTBIPLANARTOB(NV21, 2, 2, ARGB, 4, 2)
-TESTBIPLANARTOB(NV12, 2, 2, RGB565, 2, 9)
-TESTBIPLANARTOB(NV21, 2, 2, RGB565, 2, 9)
-
-#define TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
-                       W1280, DIFF, N, NEG, OFF)                               \
-TEST_F(libyuvTest, FMT_A##To##FMT_PLANAR##N) {                                 \
-  const int kWidth = ((W1280) > 0) ? (W1280) : 1;                              \
-  const int kHeight = ALIGNINT(benchmark_height_, YALIGN);                     \
-  const int kStride =                                                          \
-      (SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMP_X * 8 * BPP_A + 7) / 8;          \
-  align_buffer_64(src_argb, kStride * kHeight + OFF);                          \
-  align_buffer_64(dst_y_c, kWidth * kHeight);                                  \
-  align_buffer_64(dst_u_c,                                                     \
-                  SUBSAMPLE(kWidth, SUBSAMP_X) *                               \
-                  SUBSAMPLE(kHeight, SUBSAMP_Y));                              \
-  align_buffer_64(dst_v_c,                                                     \
-                  SUBSAMPLE(kWidth, SUBSAMP_X) *                               \
-                  SUBSAMPLE(kHeight, SUBSAMP_Y));                              \
-  align_buffer_64(dst_y_opt, kWidth * kHeight);                                \
-  align_buffer_64(dst_u_opt,                                                   \
-                  SUBSAMPLE(kWidth, SUBSAMP_X) *                               \
-                  SUBSAMPLE(kHeight, SUBSAMP_Y));                              \
-  align_buffer_64(dst_v_opt,                                                   \
-                  SUBSAMPLE(kWidth, SUBSAMP_X) *                               \
-                  SUBSAMPLE(kHeight, SUBSAMP_Y));                              \
-  memset(dst_y_c, 1, kWidth * kHeight);                                        \
-  memset(dst_u_c, 0,                                                           \
-         SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y));        \
-  memset(dst_v_c, 0,                                                           \
-         SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y));        \
-  memset(dst_y_opt, 2, kWidth * kHeight);                                      \
-  memset(dst_u_opt, 0,                                                         \
-         SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y));        \
-  memset(dst_v_opt, 0,                                                         \
-         SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y));        \
-  srandom(time(NULL));                                                         \
-  for (int i = 0; i < kHeight; ++i)                                            \
-    for (int j = 0; j < kStride; ++j)                                          \
-      src_argb[(i * kStride) + j + OFF] = (random() & 0xff);                   \
-  MaskCpuFlags(0);                                                             \
-  FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride,                               \
-                        dst_y_c, kWidth,                                       \
-                        dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X),                 \
-                        dst_v_c, SUBSAMPLE(kWidth, SUBSAMP_X),                 \
-                        kWidth, NEG kHeight);                                  \
-  MaskCpuFlags(-1);                                                            \
-  for (int i = 0; i < benchmark_iterations_; ++i) {                            \
-    FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride,                             \
-                          dst_y_opt, kWidth,                                   \
-                          dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X),             \
-                          dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X),             \
-                          kWidth, NEG kHeight);                                \
-  }                                                                            \
-  int max_diff = 0;                                                            \
-  for (int i = 0; i < kHeight; ++i) {                                          \
-    for (int j = 0; j < kWidth; ++j) {                                         \
-      int abs_diff =                                                           \
-          abs(static_cast<int>(dst_y_c[i * kWidth + j]) -                      \
-              static_cast<int>(dst_y_opt[i * kWidth + j]));                    \
-      if (abs_diff > max_diff) {                                               \
-        max_diff = abs_diff;                                                   \
-      }                                                                        \
-    }                                                                          \
-  }                                                                            \
-  EXPECT_LE(max_diff, DIFF);                                                   \
-  for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) {                    \
-    for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) {                   \
-      int abs_diff =                                                           \
-          abs(static_cast<int>(dst_u_c[i *                                     \
-                               SUBSAMPLE(kWidth, SUBSAMP_X) + j]) -            \
-              static_cast<int>(dst_u_opt[i *                                   \
-                               SUBSAMPLE(kWidth, SUBSAMP_X) + j]));            \
-      if (abs_diff > max_diff) {                                               \
-        max_diff = abs_diff;                                                   \
-      }                                                                        \
-    }                                                                          \
-  }                                                                            \
-  EXPECT_LE(max_diff, DIFF);                                                   \
-  for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) {                    \
-    for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) {                   \
-      int abs_diff =                                                           \
-          abs(static_cast<int>(dst_v_c[i *                                     \
-                               SUBSAMPLE(kWidth, SUBSAMP_X) + j]) -            \
-              static_cast<int>(dst_v_opt[i *                                   \
-                               SUBSAMPLE(kWidth, SUBSAMP_X) + j]));            \
-      if (abs_diff > max_diff) {                                               \
-        max_diff = abs_diff;                                                   \
-      }                                                                        \
-    }                                                                          \
-  }                                                                            \
-  EXPECT_LE(max_diff, DIFF);                                                   \
-  free_aligned_buffer_64(dst_y_c)                                              \
-  free_aligned_buffer_64(dst_u_c)                                              \
-  free_aligned_buffer_64(dst_v_c)                                              \
-  free_aligned_buffer_64(dst_y_opt)                                            \
-  free_aligned_buffer_64(dst_u_opt)                                            \
-  free_aligned_buffer_64(dst_v_opt)                                            \
-  free_aligned_buffer_64(src_argb)                                             \
-}
-
-#define TESTATOPLANAR(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,  \
-                      DIFF)                                                    \
-    TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,     \
-                   benchmark_width_ - 4, DIFF, _Any, +, 0)                     \
-    TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,     \
-                   benchmark_width_, DIFF, _Unaligned, +, 1)                   \
-    TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,     \
-                   benchmark_width_, DIFF, _Invert, -, 0)                      \
-    TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,     \
-                   benchmark_width_, DIFF, _Opt, +, 0)
-
-TESTATOPLANAR(ARGB, 4, 1, I420, 2, 2, 4)
-#ifdef __arm__
-TESTATOPLANAR(ARGB, 4, 1, J420, 2, 2, 4)
-#else
-TESTATOPLANAR(ARGB, 4, 1, J420, 2, 2, 0)
-#endif
-TESTATOPLANAR(BGRA, 4, 1, I420, 2, 2, 4)
-TESTATOPLANAR(ABGR, 4, 1, I420, 2, 2, 4)
-TESTATOPLANAR(RGBA, 4, 1, I420, 2, 2, 4)
-TESTATOPLANAR(RAW, 3, 1, I420, 2, 2, 4)
-TESTATOPLANAR(RGB24, 3, 1, I420, 2, 2, 4)
-TESTATOPLANAR(RGB565, 2, 1, I420, 2, 2, 5)
-// TODO(fbarchard): Make 1555 neon work same as C code, reduce to diff 9.
-TESTATOPLANAR(ARGB1555, 2, 1, I420, 2, 2, 15)
-TESTATOPLANAR(ARGB4444, 2, 1, I420, 2, 2, 17)
-TESTATOPLANAR(ARGB, 4, 1, I411, 4, 1, 4)
-TESTATOPLANAR(ARGB, 4, 1, I422, 2, 1, 2)
-TESTATOPLANAR(ARGB, 4, 1, I444, 1, 1, 2)
-TESTATOPLANAR(YUY2, 2, 1, I420, 2, 2, 2)
-TESTATOPLANAR(UYVY, 2, 1, I420, 2, 2, 2)
-TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1, 2)
-TESTATOPLANAR(UYVY, 2, 1, I422, 2, 1, 2)
-TESTATOPLANAR(I400, 1, 1, I420, 2, 2, 2)
-TESTATOPLANAR(BayerBGGR, 1, 2, I420, 2, 2, 4)
-TESTATOPLANAR(BayerRGGB, 1, 2, I420, 2, 2, 4)
-TESTATOPLANAR(BayerGBRG, 1, 2, I420, 2, 2, 4)
-TESTATOPLANAR(BayerGRBG, 1, 2, I420, 2, 2, 4)
-
-#define TESTATOBIPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,       \
-                       W1280, N, NEG, OFF)                                     \
-TEST_F(libyuvTest, FMT_A##To##FMT_PLANAR##N) {                                 \
-  const int kWidth = ((W1280) > 0) ? (W1280) : 1;                              \
-  const int kHeight = benchmark_height_;                                       \
-  const int kStride = (kWidth * 8 * BPP_A + 7) / 8;                            \
-  align_buffer_64(src_argb, kStride * kHeight + OFF);                          \
-  align_buffer_64(dst_y_c, kWidth * kHeight);                                  \
-  align_buffer_64(dst_uv_c,                                                    \
-                  SUBSAMPLE(kWidth, SUBSAMP_X) * 2 *                           \
-                  SUBSAMPLE(kHeight, SUBSAMP_Y));                              \
-  align_buffer_64(dst_y_opt, kWidth * kHeight);                                \
-  align_buffer_64(dst_uv_opt,                                                  \
-                  SUBSAMPLE(kWidth, SUBSAMP_X) * 2 *                           \
-                  SUBSAMPLE(kHeight, SUBSAMP_Y));                              \
-  srandom(time(NULL));                                                         \
-  for (int i = 0; i < kHeight; ++i)                                            \
-    for (int j = 0; j < kStride; ++j)                                          \
-      src_argb[(i * kStride) + j + OFF] = (random() & 0xff);                   \
-  MaskCpuFlags(0);                                                             \
-  FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride,                               \
-                        dst_y_c, kWidth,                                       \
-                        dst_uv_c, SUBSAMPLE(kWidth, SUBSAMP_X) * 2,            \
-                        kWidth, NEG kHeight);                                  \
-  MaskCpuFlags(-1);                                                            \
-  for (int i = 0; i < benchmark_iterations_; ++i) {                            \
-    FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride,                             \
-                          dst_y_opt, kWidth,                                   \
-                          dst_uv_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * 2,        \
-                          kWidth, NEG kHeight);                                \
-  }                                                                            \
-  int max_diff = 0;                                                            \
-  for (int i = 0; i < kHeight; ++i) {                                          \
-    for (int j = 0; j < kWidth; ++j) {                                         \
-      int abs_diff =                                                           \
-          abs(static_cast<int>(dst_y_c[i * kWidth + j]) -                      \
-              static_cast<int>(dst_y_opt[i * kWidth + j]));                    \
-      if (abs_diff > max_diff) {                                               \
-        max_diff = abs_diff;                                                   \
-      }                                                                        \
-    }                                                                          \
-  }                                                                            \
-  EXPECT_LE(max_diff, 4);                                                      \
-  for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) {                    \
-    for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X) * 2; ++j) {               \
-      int abs_diff =                                                           \
-          abs(static_cast<int>(dst_uv_c[i *                                    \
-                               SUBSAMPLE(kWidth, SUBSAMP_X) * 2 + j]) -        \
-              static_cast<int>(dst_uv_opt[i *                                  \
-                               SUBSAMPLE(kWidth, SUBSAMP_X) * 2 + j]));        \
-      if (abs_diff > max_diff) {                                               \
-        max_diff = abs_diff;                                                   \
-      }                                                                        \
-    }                                                                          \
-  }                                                                            \
-  EXPECT_LE(max_diff, 4);                                                      \
-  free_aligned_buffer_64(dst_y_c)                                              \
-  free_aligned_buffer_64(dst_uv_c)                                             \
-  free_aligned_buffer_64(dst_y_opt)                                            \
-  free_aligned_buffer_64(dst_uv_opt)                                           \
-  free_aligned_buffer_64(src_argb)                                             \
-}
-
-#define TESTATOBIPLANAR(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y)        \
-    TESTATOBIPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,           \
-                   benchmark_width_ - 4, _Any, +, 0)                           \
-    TESTATOBIPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,           \
-                   benchmark_width_, _Unaligned, +, 1)                         \
-    TESTATOBIPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,           \
-                   benchmark_width_, _Invert, -, 0)                            \
-    TESTATOBIPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,           \
-                   benchmark_width_, _Opt, +, 0)
-
-TESTATOBIPLANAR(ARGB, 4, NV12, 2, 2)
-TESTATOBIPLANAR(ARGB, 4, NV21, 2, 2)
-
-#define TESTATOBI(FMT_A, BPP_A, STRIDE_A,                                      \
-                  FMT_B, BPP_B, STRIDE_B,                                      \
-                  W1280, DIFF, N, NEG, OFF)                                    \
-TEST_F(libyuvTest, FMT_A##To##FMT_B##N) {                                      \
-  const int kWidth = ((W1280) > 0) ? (W1280) : 1;                              \
-  const int kHeight = benchmark_height_;                                       \
-  const int kStrideA = (kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A;  \
-  const int kStrideB = (kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B;  \
-  align_buffer_64(src_argb, kStrideA * kHeight + OFF);                         \
-  align_buffer_64(dst_argb_c, kStrideB * kHeight);                             \
-  align_buffer_64(dst_argb_opt, kStrideB * kHeight);                           \
-  memset(dst_argb_c, 0, kStrideB * kHeight);                                   \
-  memset(dst_argb_opt, 0, kStrideB * kHeight);                                 \
-  srandom(time(NULL));                                                         \
-  for (int i = 0; i < kStrideA * kHeight; ++i) {                               \
-    src_argb[i + OFF] = (random() & 0xff);                                     \
-  }                                                                            \
-  MaskCpuFlags(0);                                                             \
-  FMT_A##To##FMT_B(src_argb + OFF, kStrideA,                                   \
-                   dst_argb_c, kStrideB,                                       \
-                   kWidth, NEG kHeight);                                       \
-  MaskCpuFlags(-1);                                                            \
-  for (int i = 0; i < benchmark_iterations_; ++i) {                            \
-    FMT_A##To##FMT_B(src_argb + OFF, kStrideA,                                 \
-                     dst_argb_opt, kStrideB,                                   \
-                     kWidth, NEG kHeight);                                     \
-  }                                                                            \
-  int max_diff = 0;                                                            \
-  for (int i = 0; i < kStrideB * kHeight; ++i) {                               \
-    int abs_diff =                                                             \
-        abs(static_cast<int>(dst_argb_c[i]) -                                  \
-            static_cast<int>(dst_argb_opt[i]));                                \
-    if (abs_diff > max_diff) {                                                 \
-      max_diff = abs_diff;                                                     \
-    }                                                                          \
-  }                                                                            \
-  EXPECT_LE(max_diff, DIFF);                                                   \
-  free_aligned_buffer_64(src_argb)                                             \
-  free_aligned_buffer_64(dst_argb_c)                                           \
-  free_aligned_buffer_64(dst_argb_opt)                                         \
-}
-
-#define TESTATOBRANDOM(FMT_A, BPP_A, STRIDE_A, HEIGHT_A,                       \
-                       FMT_B, BPP_B, STRIDE_B, HEIGHT_B, DIFF)                 \
-TEST_F(libyuvTest, FMT_A##To##FMT_B##_Random) {                                \
-  srandom(time(NULL));                                                         \
-  for (int times = 0; times < benchmark_iterations_; ++times) {                \
-    const int kWidth = (random() & 63) + 1;                                    \
-    const int kHeight = (random() & 31) + 1;                                   \
-    const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A;       \
-    const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B;       \
-    const int kStrideA = (kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A;\
-    const int kStrideB = (kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B;\
-    align_buffer_page_end(src_argb, kStrideA * kHeightA);                      \
-    align_buffer_page_end(dst_argb_c, kStrideB * kHeightB);                    \
-    align_buffer_page_end(dst_argb_opt, kStrideB * kHeightB);                  \
-    memset(dst_argb_c, 0, kStrideB * kHeightB);                                \
-    memset(dst_argb_opt, 0, kStrideB * kHeightB);                              \
-    for (int i = 0; i < kStrideA * kHeightA; ++i) {                            \
-      src_argb[i] = (random() & 0xff);                                         \
-    }                                                                          \
-    MaskCpuFlags(0);                                                           \
-    FMT_A##To##FMT_B(src_argb, kStrideA,                                       \
-                     dst_argb_c, kStrideB,                                     \
-                     kWidth, kHeight);                                         \
-    MaskCpuFlags(-1);                                                          \
-    FMT_A##To##FMT_B(src_argb, kStrideA,                                       \
-                     dst_argb_opt, kStrideB,                                   \
-                     kWidth, kHeight);                                         \
-    int max_diff = 0;                                                          \
-    for (int i = 0; i < kStrideB * kHeightB; ++i) {                            \
-      int abs_diff =                                                           \
-          abs(static_cast<int>(dst_argb_c[i]) -                                \
-              static_cast<int>(dst_argb_opt[i]));                              \
-      if (abs_diff > max_diff) {                                               \
-        max_diff = abs_diff;                                                   \
-      }                                                                        \
-    }                                                                          \
-    EXPECT_LE(max_diff, DIFF);                                                 \
-    free_aligned_buffer_page_end(src_argb)                                     \
-    free_aligned_buffer_page_end(dst_argb_c)                                   \
-    free_aligned_buffer_page_end(dst_argb_opt)                                 \
-  }                                                                            \
-}
-
-#define TESTATOB(FMT_A, BPP_A, STRIDE_A, HEIGHT_A,                             \
-                 FMT_B, BPP_B, STRIDE_B, HEIGHT_B, DIFF)                       \
-    TESTATOBI(FMT_A, BPP_A, STRIDE_A,                                          \
-              FMT_B, BPP_B, STRIDE_B,                                          \
-              benchmark_width_ - 4, DIFF, _Any, +, 0)                          \
-    TESTATOBI(FMT_A, BPP_A, STRIDE_A,                                          \
-              FMT_B, BPP_B, STRIDE_B,                                          \
-              benchmark_width_, DIFF, _Unaligned, +, 1)                        \
-    TESTATOBI(FMT_A, BPP_A, STRIDE_A,                                          \
-              FMT_B, BPP_B, STRIDE_B,                                          \
-              benchmark_width_, DIFF, _Invert, -, 0)                           \
-    TESTATOBI(FMT_A, BPP_A, STRIDE_A,                                          \
-              FMT_B, BPP_B, STRIDE_B,                                          \
-              benchmark_width_, DIFF, _Opt, +, 0)                              \
-    TESTATOBRANDOM(FMT_A, BPP_A, STRIDE_A, HEIGHT_A,                           \
-                   FMT_B, BPP_B, STRIDE_B, HEIGHT_B, DIFF)
-
-TESTATOB(ARGB, 4, 4, 1, ARGB, 4, 4, 1, 0)
-TESTATOB(ARGB, 4, 4, 1, BGRA, 4, 4, 1, 0)
-TESTATOB(ARGB, 4, 4, 1, ABGR, 4, 4, 1, 0)
-TESTATOB(ARGB, 4, 4, 1, RGBA, 4, 4, 1, 0)
-TESTATOB(ARGB, 4, 4, 1, RAW, 3, 3, 1, 0)
-TESTATOB(ARGB, 4, 4, 1, RGB24, 3, 3, 1, 0)
-TESTATOB(ARGB, 4, 4, 1, RGB565, 2, 2, 1, 0)
-TESTATOB(ARGB, 4, 4, 1, ARGB1555, 2, 2, 1, 0)
-TESTATOB(ARGB, 4, 4, 1, ARGB4444, 2, 2, 1, 0)
-TESTATOB(ARGB, 4, 4, 1, BayerBGGR, 1, 2, 2, 0)
-TESTATOB(ARGB, 4, 4, 1, BayerRGGB, 1, 2, 2, 0)
-TESTATOB(ARGB, 4, 4, 1, BayerGBRG, 1, 2, 2, 0)
-TESTATOB(ARGB, 4, 4, 1, BayerGRBG, 1, 2, 2, 0)
-TESTATOB(ARGB, 4, 4, 1, YUY2, 2, 4, 1, 4)
-TESTATOB(ARGB, 4, 4, 1, UYVY, 2, 4, 1, 4)
-TESTATOB(ARGB, 4, 4, 1, I400, 1, 1, 1, 2)
-TESTATOB(ARGB, 4, 4, 1, J400, 1, 1, 1, 2)
-TESTATOB(BGRA, 4, 4, 1, ARGB, 4, 4, 1, 0)
-TESTATOB(ABGR, 4, 4, 1, ARGB, 4, 4, 1, 0)
-TESTATOB(RGBA, 4, 4, 1, ARGB, 4, 4, 1, 0)
-TESTATOB(RAW, 3, 3, 1, ARGB, 4, 4, 1, 0)
-TESTATOB(RGB24, 3, 3, 1, ARGB, 4, 4, 1, 0)
-TESTATOB(RGB565, 2, 2, 1, ARGB, 4, 4, 1, 0)
-TESTATOB(ARGB1555, 2, 2, 1, ARGB, 4, 4, 1, 0)
-TESTATOB(ARGB4444, 2, 2, 1, ARGB, 4, 4, 1, 0)
-TESTATOB(YUY2, 2, 4, 1, ARGB, 4, 4, 1, 4)
-TESTATOB(UYVY, 2, 4, 1, ARGB, 4, 4, 1, 4)
-TESTATOB(BayerBGGR, 1, 2, 2, ARGB, 4, 4, 1, 0)
-TESTATOB(BayerRGGB, 1, 2, 2, ARGB, 4, 4, 1, 0)
-TESTATOB(BayerGBRG, 1, 2, 2, ARGB, 4, 4, 1, 0)
-TESTATOB(BayerGRBG, 1, 2, 2, ARGB, 4, 4, 1, 0)
-TESTATOB(I400, 1, 1, 1, ARGB, 4, 4, 1, 0)
-TESTATOB(I400, 1, 1, 1, I400, 1, 1, 1, 0)
-TESTATOB(I400, 1, 1, 1, I400Mirror, 1, 1, 1, 0)
-TESTATOB(Y, 1, 1, 1, ARGB, 4, 4, 1, 0)
-TESTATOB(ARGB, 4, 4, 1, ARGBMirror, 4, 4, 1, 0)
-
-TEST_F(libyuvTest, Test565) {
-  SIMD_ALIGNED(uint8 orig_pixels[256][4]);
-  SIMD_ALIGNED(uint8 pixels565[256][2]);
-
-  for (int i = 0; i < 256; ++i) {
-    for (int j = 0; j < 4; ++j) {
-      orig_pixels[i][j] = i;
-    }
-  }
-  ARGBToRGB565(&orig_pixels[0][0], 0, &pixels565[0][0], 0, 256, 1);
-  uint32 checksum = HashDjb2(&pixels565[0][0], sizeof(pixels565), 5381);
-  EXPECT_EQ(610919429u, checksum);
-}
-
-#ifdef HAVE_JPEG
-TEST_F(libyuvTest, ValidateJpeg) {
-  const int kOff = 10;
-  const int kMinJpeg = 64;
-  const int kImageSize = benchmark_width_ * benchmark_height_ >= kMinJpeg ?
-    benchmark_width_ * benchmark_height_ : kMinJpeg;
-  const int kSize = kImageSize + kOff;
-  align_buffer_64(orig_pixels, kSize);
-
-  // No SOI or EOI. Expect fail.
-  memset(orig_pixels, 0, kSize);
-
-  // EOI, SOI. Expect pass.
-  orig_pixels[0] = 0xff;
-  orig_pixels[1] = 0xd8;  // SOI.
-  orig_pixels[kSize - kOff + 0] = 0xff;
-  orig_pixels[kSize - kOff + 1] = 0xd9;  // EOI.
-  for (int times = 0; times < benchmark_iterations_; ++times) {
-    EXPECT_TRUE(ValidateJpeg(orig_pixels, kSize));
-  }
-  free_aligned_buffer_page_end(orig_pixels);
-}
-
-TEST_F(libyuvTest, InvalidateJpeg) {
-  const int kOff = 10;
-  const int kMinJpeg = 64;
-  const int kImageSize = benchmark_width_ * benchmark_height_ >= kMinJpeg ?
-    benchmark_width_ * benchmark_height_ : kMinJpeg;
-  const int kSize = kImageSize + kOff;
-  align_buffer_64(orig_pixels, kSize);
-
-  // No SOI or EOI. Expect fail.
-  memset(orig_pixels, 0, kSize);
-  EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize));
-
-  // SOI but no EOI. Expect fail.
-  orig_pixels[0] = 0xff;
-  orig_pixels[1] = 0xd8;  // SOI.
-  for (int times = 0; times < benchmark_iterations_; ++times) {
-    EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize));
-  }
-  // EOI but no SOI. Expect fail.
-  orig_pixels[0] = 0;
-  orig_pixels[1] = 0;
-  orig_pixels[kSize - kOff + 0] = 0xff;
-  orig_pixels[kSize - kOff + 1] = 0xd9;  // EOI.
-  EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize));
-
-  free_aligned_buffer_page_end(orig_pixels);
-}
-
-#endif
-
-}  // namespace libyuv
+/*
+ *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdlib.h>
+#include <time.h>
+
+#include "libyuv/compare.h"
+#include "libyuv/convert.h"
+#include "libyuv/convert_argb.h"
+#include "libyuv/convert_from.h"
+#include "libyuv/convert_from_argb.h"
+#include "libyuv/cpu_id.h"
+#include "libyuv/format_conversion.h"
+#ifdef HAVE_JPEG
+#include "libyuv/mjpeg_decoder.h"
+#endif
+#include "libyuv/planar_functions.h"
+#include "libyuv/rotate.h"
+#include "../unit_test/unit_test.h"
+
+#if defined(_MSC_VER)
+#define SIMD_ALIGNED(var) __declspec(align(16)) var
+#else  // __GNUC__
+#define SIMD_ALIGNED(var) var __attribute__((aligned(16)))
+#endif
+
+namespace libyuv {
+
+#define SUBSAMPLE(v, a) ((((v) + (a) - 1)) / (a))
+
+#define TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,           \
+                       FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, OFF)   \
+TEST_F(libyuvTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) {                        \
+  const int kWidth = ((W1280) > 0) ? (W1280) : 1;                              \
+  const int kHeight = benchmark_height_;                                       \
+  align_buffer_64(src_y, kWidth * kHeight + OFF);                              \
+  align_buffer_64(src_u,                                                       \
+                  SUBSAMPLE(kWidth, SRC_SUBSAMP_X) *                           \
+                  SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + OFF);                    \
+  align_buffer_64(src_v,                                                       \
+                  SUBSAMPLE(kWidth, SRC_SUBSAMP_X) *                           \
+                  SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + OFF);                    \
+  align_buffer_64(dst_y_c, kWidth * kHeight);                                  \
+  align_buffer_64(dst_u_c,                                                     \
+                  SUBSAMPLE(kWidth, SUBSAMP_X) *                               \
+                  SUBSAMPLE(kHeight, SUBSAMP_Y));                              \
+  align_buffer_64(dst_v_c,                                                     \
+                  SUBSAMPLE(kWidth, SUBSAMP_X) *                               \
+                  SUBSAMPLE(kHeight, SUBSAMP_Y));                              \
+  align_buffer_64(dst_y_opt, kWidth * kHeight);                                \
+  align_buffer_64(dst_u_opt,                                                   \
+                  SUBSAMPLE(kWidth, SUBSAMP_X) *                               \
+                  SUBSAMPLE(kHeight, SUBSAMP_Y));                              \
+  align_buffer_64(dst_v_opt,                                                   \
+                  SUBSAMPLE(kWidth, SUBSAMP_X) *                               \
+                  SUBSAMPLE(kHeight, SUBSAMP_Y));                              \
+  srandom(time(NULL));                                                         \
+  for (int i = 0; i < kHeight; ++i)                                            \
+    for (int j = 0; j < kWidth; ++j)                                           \
+      src_y[(i * kWidth) + j + OFF] = (random() & 0xff);                       \
+  for (int i = 0; i < SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); ++i) {                \
+    for (int j = 0; j < SUBSAMPLE(kWidth, SRC_SUBSAMP_X); ++j) {               \
+      src_u[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] =                \
+          (random() & 0xff);                                                   \
+      src_v[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] =                \
+          (random() & 0xff);                                                   \
+    }                                                                          \
+  }                                                                            \
+  MaskCpuFlags(0);                                                             \
+  SRC_FMT_PLANAR##To##FMT_PLANAR(src_y + OFF, kWidth,                          \
+                                 src_u + OFF,                                  \
+                                 SUBSAMPLE(kWidth, SRC_SUBSAMP_X),             \
+                                 src_v + OFF,                                  \
+                                 SUBSAMPLE(kWidth, SRC_SUBSAMP_X),             \
+                                 dst_y_c, kWidth,                              \
+                                 dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X),        \
+                                 dst_v_c, SUBSAMPLE(kWidth, SUBSAMP_X),        \
+                                 kWidth, NEG kHeight);                         \
+  MaskCpuFlags(-1);                                                            \
+  for (int i = 0; i < benchmark_iterations_; ++i) {                            \
+    SRC_FMT_PLANAR##To##FMT_PLANAR(src_y + OFF, kWidth,                        \
+                                   src_u + OFF,                                \
+                                       SUBSAMPLE(kWidth, SRC_SUBSAMP_X),       \
+                                   src_v + OFF,                                \
+                                       SUBSAMPLE(kWidth, SRC_SUBSAMP_X),       \
+                                   dst_y_opt, kWidth,                          \
+                                   dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X),    \
+                                   dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X),    \
+                                   kWidth, NEG kHeight);                       \
+  }                                                                            \
+  int max_diff = 0;                                                            \
+  for (int i = 0; i < kHeight; ++i) {                                          \
+    for (int j = 0; j < kWidth; ++j) {                                         \
+      int abs_diff =                                                           \
+          abs(static_cast<int>(dst_y_c[i * kWidth + j]) -                      \
+              static_cast<int>(dst_y_opt[i * kWidth + j]));                    \
+      if (abs_diff > max_diff) {                                               \
+        max_diff = abs_diff;                                                   \
+      }                                                                        \
+    }                                                                          \
+  }                                                                            \
+  EXPECT_LE(max_diff, 0);                                                      \
+  for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) {                    \
+    for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) {                   \
+      int abs_diff =                                                           \
+          abs(static_cast<int>(dst_u_c[i *                                     \
+                               SUBSAMPLE(kWidth, SUBSAMP_X) + j]) -            \
+              static_cast<int>(dst_u_opt[i *                                   \
+                               SUBSAMPLE(kWidth, SUBSAMP_X) + j]));            \
+      if (abs_diff > max_diff) {                                               \
+        max_diff = abs_diff;                                                   \
+      }                                                                        \
+    }                                                                          \
+  }                                                                            \
+  EXPECT_LE(max_diff, 3);                                                      \
+  for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) {                    \
+    for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) {                   \
+      int abs_diff =                                                           \
+          abs(static_cast<int>(dst_v_c[i *                                     \
+                               SUBSAMPLE(kWidth, SUBSAMP_X) + j]) -            \
+              static_cast<int>(dst_v_opt[i *                                   \
+                               SUBSAMPLE(kWidth, SUBSAMP_X) + j]));            \
+      if (abs_diff > max_diff) {                                               \
+        max_diff = abs_diff;                                                   \
+      }                                                                        \
+    }                                                                          \
+  }                                                                            \
+  EXPECT_LE(max_diff, 3);                                                      \
+  free_aligned_buffer_64(dst_y_c)                                              \
+  free_aligned_buffer_64(dst_u_c)                                              \
+  free_aligned_buffer_64(dst_v_c)                                              \
+  free_aligned_buffer_64(dst_y_opt)                                            \
+  free_aligned_buffer_64(dst_u_opt)                                            \
+  free_aligned_buffer_64(dst_v_opt)                                            \
+  free_aligned_buffer_64(src_y)                                                \
+  free_aligned_buffer_64(src_u)                                                \
+  free_aligned_buffer_64(src_v)                                                \
+}
+
+#define TESTPLANARTOP(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,            \
+                      FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y)                        \
+    TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,               \
+                   FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,                           \
+                   benchmark_width_ - 4, _Any, +, 0)                           \
+    TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,               \
+                   FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,                           \
+                   benchmark_width_, _Unaligned, +, 1)                         \
+    TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,               \
+                   FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,                           \
+                   benchmark_width_, _Invert, -, 0)                            \
+    TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,               \
+                   FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,                           \
+                   benchmark_width_, _Opt, +, 0)
+
+TESTPLANARTOP(I420, 2, 2, I420, 2, 2)
+TESTPLANARTOP(I422, 2, 1, I420, 2, 2)
+TESTPLANARTOP(I444, 1, 1, I420, 2, 2)
+TESTPLANARTOP(I411, 4, 1, I420, 2, 2)
+TESTPLANARTOP(I420, 2, 2, I422, 2, 1)
+TESTPLANARTOP(I420, 2, 2, I444, 1, 1)
+TESTPLANARTOP(I420, 2, 2, I411, 4, 1)
+TESTPLANARTOP(I420, 2, 2, I420Mirror, 2, 2)
+TESTPLANARTOP(I422, 2, 1, I422, 2, 1)
+TESTPLANARTOP(I444, 1, 1, I444, 1, 1)
+
+#define TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,          \
+                       FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, OFF)   \
+TEST_F(libyuvTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) {                        \
+  const int kWidth = ((W1280) > 0) ? (W1280) : 1;                              \
+  const int kHeight = benchmark_height_;                                       \
+  align_buffer_64(src_y, kWidth * kHeight + OFF);                              \
+  align_buffer_64(src_u,                                                       \
+                  SUBSAMPLE(kWidth, SRC_SUBSAMP_X) *                           \
+                  SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + OFF);                    \
+  align_buffer_64(src_v,                                                       \
+                  SUBSAMPLE(kWidth, SRC_SUBSAMP_X) *                           \
+                  SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + OFF);                    \
+  align_buffer_64(dst_y_c, kWidth * kHeight);                                  \
+  align_buffer_64(dst_uv_c, SUBSAMPLE(kWidth * 2, SUBSAMP_X) *                 \
+                  SUBSAMPLE(kHeight, SUBSAMP_Y));                              \
+  align_buffer_64(dst_y_opt, kWidth * kHeight);                                \
+  align_buffer_64(dst_uv_opt, SUBSAMPLE(kWidth * 2, SUBSAMP_X) *               \
+                  SUBSAMPLE(kHeight, SUBSAMP_Y));                              \
+  srandom(time(NULL));                                                         \
+  for (int i = 0; i < kHeight; ++i)                                            \
+    for (int j = 0; j < kWidth; ++j)                                           \
+      src_y[(i * kWidth) + j + OFF] = (random() & 0xff);                       \
+  for (int i = 0; i < SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); ++i) {                \
+    for (int j = 0; j < SUBSAMPLE(kWidth, SRC_SUBSAMP_X); ++j) {               \
+      src_u[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] =                \
+          (random() & 0xff);                                                   \
+      src_v[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] =                \
+          (random() & 0xff);                                                   \
+    }                                                                          \
+  }                                                                            \
+  MaskCpuFlags(0);                                                             \
+  SRC_FMT_PLANAR##To##FMT_PLANAR(src_y + OFF, kWidth,                          \
+                                 src_u + OFF,                                  \
+                                 SUBSAMPLE(kWidth, SRC_SUBSAMP_X),             \
+                                 src_v + OFF,                                  \
+                                 SUBSAMPLE(kWidth, SRC_SUBSAMP_X),             \
+                                 dst_y_c, kWidth,                              \
+                                 dst_uv_c, SUBSAMPLE(kWidth * 2, SUBSAMP_X),   \
+                                 kWidth, NEG kHeight);                         \
+  MaskCpuFlags(-1);                                                            \
+  for (int i = 0; i < benchmark_iterations_; ++i) {                            \
+    SRC_FMT_PLANAR##To##FMT_PLANAR(src_y + OFF, kWidth,                        \
+                                   src_u + OFF,                                \
+                                   SUBSAMPLE(kWidth, SRC_SUBSAMP_X),           \
+                                   src_v + OFF,                                \
+                                   SUBSAMPLE(kWidth, SRC_SUBSAMP_X),           \
+                                   dst_y_opt, kWidth,                          \
+                                   dst_uv_opt,                                 \
+                                   SUBSAMPLE(kWidth * 2, SUBSAMP_X),           \
+                                   kWidth, NEG kHeight);                       \
+  }                                                                            \
+  int max_diff = 0;                                                            \
+  for (int i = 0; i < kHeight; ++i) {                                          \
+    for (int j = 0; j < kWidth; ++j) {                                         \
+      int abs_diff =                                                           \
+          abs(static_cast<int>(dst_y_c[i * kWidth + j]) -                      \
+              static_cast<int>(dst_y_opt[i * kWidth + j]));                    \
+      if (abs_diff > max_diff) {                                               \
+        max_diff = abs_diff;                                                   \
+      }                                                                        \
+    }                                                                          \
+  }                                                                            \
+  EXPECT_LE(max_diff, 1);                                                      \
+  for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) {                    \
+    for (int j = 0; j < SUBSAMPLE(kWidth * 2, SUBSAMP_X); ++j) {               \
+      int abs_diff =                                                           \
+          abs(static_cast<int>(dst_uv_c[i *                                    \
+                               SUBSAMPLE(kWidth * 2, SUBSAMP_X) + j]) -        \
+              static_cast<int>(dst_uv_opt[i *                                  \
+                               SUBSAMPLE(kWidth * 2, SUBSAMP_X) + j]));        \
+      if (abs_diff > max_diff) {                                               \
+        max_diff = abs_diff;                                                   \
+      }                                                                        \
+    }                                                                          \
+  }                                                                            \
+  EXPECT_LE(max_diff, 1);                                                      \
+  free_aligned_buffer_64(dst_y_c)                                              \
+  free_aligned_buffer_64(dst_uv_c)                                             \
+  free_aligned_buffer_64(dst_y_opt)                                            \
+  free_aligned_buffer_64(dst_uv_opt)                                           \
+  free_aligned_buffer_64(src_y)                                                \
+  free_aligned_buffer_64(src_u)                                                \
+  free_aligned_buffer_64(src_v)                                                \
+}
+
+#define TESTPLANARTOBP(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,           \
+                       FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y)                       \
+    TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,              \
+                    FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,                          \
+                    benchmark_width_ - 4, _Any, +, 0)                          \
+    TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,              \
+                    FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,                          \
+                    benchmark_width_, _Unaligned, +, 1)                        \
+    TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,              \
+                    FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,                          \
+                    benchmark_width_, _Invert, -, 0)                           \
+    TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,              \
+                    FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,                          \
+                    benchmark_width_, _Opt, +, 0)
+
+TESTPLANARTOBP(I420, 2, 2, NV12, 2, 2)
+TESTPLANARTOBP(I420, 2, 2, NV21, 2, 2)
+
+#define TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,         \
+                         FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, OFF) \
+TEST_F(libyuvTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) {                        \
+  const int kWidth = ((W1280) > 0) ? (W1280) : 1;                              \
+  const int kHeight = benchmark_height_;                                       \
+  align_buffer_64(src_y, kWidth * kHeight + OFF);                              \
+  align_buffer_64(src_uv, 2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X) *               \
+                  SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + OFF);                    \
+  align_buffer_64(dst_y_c, kWidth * kHeight);                                  \
+  align_buffer_64(dst_u_c,                                                     \
+                  SUBSAMPLE(kWidth, SUBSAMP_X) *                               \
+                  SUBSAMPLE(kHeight, SUBSAMP_Y));                              \
+  align_buffer_64(dst_v_c,                                                     \
+                  SUBSAMPLE(kWidth, SUBSAMP_X) *                               \
+                  SUBSAMPLE(kHeight, SUBSAMP_Y));                              \
+  align_buffer_64(dst_y_opt, kWidth * kHeight);                                \
+  align_buffer_64(dst_u_opt,                                                   \
+                  SUBSAMPLE(kWidth, SUBSAMP_X) *                               \
+                  SUBSAMPLE(kHeight, SUBSAMP_Y));                              \
+  align_buffer_64(dst_v_opt,                                                   \
+                  SUBSAMPLE(kWidth, SUBSAMP_X) *                               \
+                  SUBSAMPLE(kHeight, SUBSAMP_Y));                              \
+  srandom(time(NULL));                                                         \
+  for (int i = 0; i < kHeight; ++i)                                            \
+    for (int j = 0; j < kWidth; ++j)                                           \
+      src_y[(i * kWidth) + j + OFF] = (random() & 0xff);                       \
+  for (int i = 0; i < SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); ++i) {                \
+    for (int j = 0; j < 2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X); ++j) {           \
+      src_uv[(i * 2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] =           \
+          (random() & 0xff);                                                   \
+    }                                                                          \
+  }                                                                            \
+  MaskCpuFlags(0);                                                             \
+  SRC_FMT_PLANAR##To##FMT_PLANAR(src_y + OFF, kWidth,                          \
+                                 src_uv + OFF,                                 \
+                                 2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X),         \
+                                 dst_y_c, kWidth,                              \
+                                 dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X),        \
+                                 dst_v_c, SUBSAMPLE(kWidth, SUBSAMP_X),        \
+                                 kWidth, NEG kHeight);                         \
+  MaskCpuFlags(-1);                                                            \
+  for (int i = 0; i < benchmark_iterations_; ++i) {                            \
+    SRC_FMT_PLANAR##To##FMT_PLANAR(src_y + OFF, kWidth,                        \
+                                   src_uv + OFF,                               \
+                                   2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X),       \
+                                   dst_y_opt, kWidth,                          \
+                                   dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X),    \
+                                   dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X),    \
+                                   kWidth, NEG kHeight);                       \
+  }                                                                            \
+  int max_diff = 0;                                                            \
+  for (int i = 0; i < kHeight; ++i) {                                          \
+    for (int j = 0; j < kWidth; ++j) {                                         \
+      int abs_diff =                                                           \
+          abs(static_cast<int>(dst_y_c[i * kWidth + j]) -                      \
+              static_cast<int>(dst_y_opt[i * kWidth + j]));                    \
+      if (abs_diff > max_diff) {                                               \
+        max_diff = abs_diff;                                                   \
+      }                                                                        \
+    }                                                                          \
+  }                                                                            \
+  EXPECT_LE(max_diff, 1);                                                      \
+  for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) {                    \
+    for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) {                   \
+      int abs_diff =                                                           \
+          abs(static_cast<int>(dst_u_c[i *                                     \
+                               SUBSAMPLE(kWidth, SUBSAMP_X) + j]) -            \
+              static_cast<int>(dst_u_opt[i *                                   \
+                               SUBSAMPLE(kWidth, SUBSAMP_X) + j]));            \
+      if (abs_diff > max_diff) {                                               \
+        max_diff = abs_diff;                                                   \
+      }                                                                        \
+    }                                                                          \
+  }                                                                            \
+  EXPECT_LE(max_diff, 1);                                                      \
+  for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) {                    \
+    for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) {                   \
+      int abs_diff =                                                           \
+          abs(static_cast<int>(dst_v_c[i *                                     \
+                               SUBSAMPLE(kWidth, SUBSAMP_X) + j]) -            \
+              static_cast<int>(dst_v_opt[i *                                   \
+                               SUBSAMPLE(kWidth, SUBSAMP_X) + j]));            \
+      if (abs_diff > max_diff) {                                               \
+        max_diff = abs_diff;                                                   \
+      }                                                                        \
+    }                                                                          \
+  }                                                                            \
+  EXPECT_LE(max_diff, 1);                                                      \
+  free_aligned_buffer_64(dst_y_c)                                              \
+  free_aligned_buffer_64(dst_u_c)                                              \
+  free_aligned_buffer_64(dst_v_c)                                              \
+  free_aligned_buffer_64(dst_y_opt)                                            \
+  free_aligned_buffer_64(dst_u_opt)                                            \
+  free_aligned_buffer_64(dst_v_opt)                                            \
+  free_aligned_buffer_64(src_y)                                                \
+  free_aligned_buffer_64(src_uv)                                               \
+}
+
+#define TESTBIPLANARTOP(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,          \
+                        FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y)                      \
+    TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,             \
+                     FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,                         \
+                     benchmark_width_ - 4, _Any, +, 0)                         \
+    TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,             \
+                     FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,                         \
+                     benchmark_width_, _Unaligned, +, 1)                       \
+    TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,             \
+                     FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,                         \
+                     benchmark_width_, _Invert, -, 0)                          \
+    TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,             \
+                     FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,                         \
+                     benchmark_width_, _Opt, +, 0)
+
+TESTBIPLANARTOP(NV12, 2, 2, I420, 2, 2)
+TESTBIPLANARTOP(NV21, 2, 2, I420, 2, 2)
+
+#define ALIGNINT(V, ALIGN) (((V) + (ALIGN) - 1) / (ALIGN) * (ALIGN))
+
+#define TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,  \
+                       YALIGN, W1280, DIFF, N, NEG, OFF, FMT_C, BPP_C)         \
+TEST_F(libyuvTest, FMT_PLANAR##To##FMT_B##N) {                                 \
+  const int kWidth = ((W1280) > 0) ? (W1280) : 1;                              \
+  const int kHeight = ALIGNINT(benchmark_height_, YALIGN);                     \
+  const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN);                        \
+  const int kSizeUV =                                                          \
+    SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y);              \
+  align_buffer_64(src_y, kWidth * kHeight + OFF);                              \
+  align_buffer_64(src_u, kSizeUV + OFF);                                       \
+  align_buffer_64(src_v, kSizeUV + OFF);                                       \
+  align_buffer_64(dst_argb_c, kStrideB * kHeight);                             \
+  align_buffer_64(dst_argb_opt, kStrideB * kHeight);                           \
+  memset(dst_argb_c, 0, kStrideB * kHeight);                                   \
+  memset(dst_argb_opt, 0, kStrideB * kHeight);                                 \
+  srandom(time(NULL));                                                         \
+  for (int i = 0; i < kWidth * kHeight; ++i) {                                 \
+    src_y[i + OFF] = (random() & 0xff);                                        \
+  }                                                                            \
+  for (int i = 0; i < kSizeUV; ++i) {                                          \
+    src_u[i + OFF] = (random() & 0xff);                                        \
+    src_v[i + OFF] = (random() & 0xff);                                        \
+  }                                                                            \
+  MaskCpuFlags(0);                                                             \
+  FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth,                                   \
+                        src_u + OFF, SUBSAMPLE(kWidth, SUBSAMP_X),             \
+                        src_v + OFF, SUBSAMPLE(kWidth, SUBSAMP_X),             \
+                        dst_argb_c, kStrideB,                                  \
+                        kWidth, NEG kHeight);                                  \
+  MaskCpuFlags(-1);                                                            \
+  for (int i = 0; i < benchmark_iterations_; ++i) {                            \
+    FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth,                                 \
+                          src_u + OFF, SUBSAMPLE(kWidth, SUBSAMP_X),           \
+                          src_v + OFF, SUBSAMPLE(kWidth, SUBSAMP_X),           \
+                          dst_argb_opt, kStrideB,                              \
+                          kWidth, NEG kHeight);                                \
+  }                                                                            \
+  int max_diff = 0;                                                            \
+  /* Convert to ARGB so 565 is expanded to bytes that can be compared. */      \
+  align_buffer_64(dst_argb32_c, kWidth * BPP_C  * kHeight);                    \
+  align_buffer_64(dst_argb32_opt, kWidth * BPP_C  * kHeight);                  \
+  memset(dst_argb32_c, 0, kWidth * BPP_C  * kHeight);                          \
+  memset(dst_argb32_opt, 0, kWidth * BPP_C  * kHeight);                        \
+  FMT_B##To##FMT_C(dst_argb_c, kStrideB,                                       \
+                   dst_argb32_c, kWidth * BPP_C ,                              \
+                   kWidth, kHeight);                                           \
+  FMT_B##To##FMT_C(dst_argb_opt, kStrideB,                                     \
+                   dst_argb32_opt, kWidth * BPP_C ,                            \
+                   kWidth, kHeight);                                           \
+  for (int i = 0; i < kWidth * BPP_C * kHeight; ++i) {                         \
+    int abs_diff =                                                             \
+        abs(static_cast<int>(dst_argb32_c[i]) -                                \
+            static_cast<int>(dst_argb32_opt[i]));                              \
+    if (abs_diff > max_diff) {                                                 \
+      max_diff = abs_diff;                                                     \
+    }                                                                          \
+  }                                                                            \
+  EXPECT_LE(max_diff, DIFF);                                                   \
+  free_aligned_buffer_64(src_y)                                                \
+  free_aligned_buffer_64(src_u)                                                \
+  free_aligned_buffer_64(src_v)                                                \
+  free_aligned_buffer_64(dst_argb_c)                                           \
+  free_aligned_buffer_64(dst_argb_opt)                                         \
+  free_aligned_buffer_64(dst_argb32_c)                                         \
+  free_aligned_buffer_64(dst_argb32_opt)                                       \
+}
+
+#define TESTPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,   \
+                      YALIGN, DIFF, FMT_C, BPP_C)                              \
+    TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
+        YALIGN, benchmark_width_ - 4, DIFF, _Any, +, 0, FMT_C, BPP_C)          \
+    TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
+        YALIGN, benchmark_width_, DIFF, _Unaligned, +, 1, FMT_C, BPP_C)        \
+    TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
+        YALIGN, benchmark_width_, DIFF, _Invert, -, 0, FMT_C, BPP_C)           \
+    TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
+        YALIGN, benchmark_width_, DIFF, _Opt, +, 0, FMT_C, BPP_C)
+
+// TODO(fbarchard): Make vertical alignment unnecessary on bayer.
+TESTPLANARTOB(I420, 2, 2, ARGB, 4, 4, 1, 2, ARGB, 4)
+TESTPLANARTOB(I420, 2, 2, BGRA, 4, 4, 1, 2, ARGB, 4)
+TESTPLANARTOB(I420, 2, 2, ABGR, 4, 4, 1, 2, ARGB, 4)
+TESTPLANARTOB(I420, 2, 2, RGBA, 4, 4, 1, 2, ARGB, 4)
+TESTPLANARTOB(I420, 2, 2, RAW, 3, 3, 1, 2, ARGB, 4)
+TESTPLANARTOB(I420, 2, 2, RGB24, 3, 3, 1, 2, ARGB, 4)
+TESTPLANARTOB(I420, 2, 2, RGB565, 2, 2, 1, 9, ARGB, 4)
+TESTPLANARTOB(I420, 2, 2, ARGB1555, 2, 2, 1, 9, ARGB, 4)
+TESTPLANARTOB(I420, 2, 2, ARGB4444, 2, 2, 1, 17, ARGB, 4)
+TESTPLANARTOB(I422, 2, 1, ARGB, 4, 4, 1, 2, ARGB, 4)
+TESTPLANARTOB(I422, 2, 1, BGRA, 4, 4, 1, 2, ARGB, 4)
+TESTPLANARTOB(I422, 2, 1, ABGR, 4, 4, 1, 2, ARGB, 4)
+TESTPLANARTOB(I422, 2, 1, RGBA, 4, 4, 1, 2, ARGB, 4)
+TESTPLANARTOB(I411, 4, 1, ARGB, 4, 4, 1, 2, ARGB, 4)
+TESTPLANARTOB(I444, 1, 1, ARGB, 4, 4, 1, 2, ARGB, 4)
+TESTPLANARTOB(I420, 2, 2, YUY2, 2, 4, 1, 1, ARGB, 4)
+TESTPLANARTOB(I420, 2, 2, UYVY, 2, 4, 1, 1, ARGB, 4)
+TESTPLANARTOB(I422, 2, 1, YUY2, 2, 4, 1, 0, ARGB, 4)
+TESTPLANARTOB(I422, 2, 1, UYVY, 2, 4, 1, 0, ARGB, 4)
+TESTPLANARTOB(I420, 2, 2, I400, 1, 1, 1, 0, ARGB, 4)
+TESTPLANARTOB(I420, 2, 2, BayerBGGR, 1, 2, 2, 2, ARGB, 4)
+TESTPLANARTOB(I420, 2, 2, BayerRGGB, 1, 2, 2, 2, ARGB, 4)
+TESTPLANARTOB(I420, 2, 2, BayerGBRG, 1, 2, 2, 2, ARGB, 4)
+TESTPLANARTOB(I420, 2, 2, BayerGRBG, 1, 2, 2, 2, ARGB, 4)
+
+#define TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B,       \
+                         W1280, DIFF, N, NEG, OFF)                             \
+TEST_F(libyuvTest, FMT_PLANAR##To##FMT_B##N) {                                 \
+  const int kWidth = ((W1280) > 0) ? (W1280) : 1;                              \
+  const int kHeight = benchmark_height_;                                       \
+  const int kStrideB = kWidth * BPP_B;                                         \
+  align_buffer_64(src_y, kWidth * kHeight + OFF);                              \
+  align_buffer_64(src_uv,                                                      \
+                  SUBSAMPLE(kWidth, SUBSAMP_X) *                               \
+                  SUBSAMPLE(kHeight, SUBSAMP_Y) * 2 + OFF);                    \
+  align_buffer_64(dst_argb_c, kStrideB * kHeight);                             \
+  align_buffer_64(dst_argb_opt, kStrideB * kHeight);                           \
+  srandom(time(NULL));                                                         \
+  for (int i = 0; i < kHeight; ++i)                                            \
+    for (int j = 0; j < kWidth; ++j)                                           \
+      src_y[(i * kWidth) + j + OFF] = (random() & 0xff);                       \
+  for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i)                      \
+    for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X) * 2; ++j) {               \
+      src_uv[(i * SUBSAMPLE(kWidth, SUBSAMP_X)) * 2 + j + OFF] =               \
+          (random() & 0xff);                                                   \
+    }                                                                          \
+  MaskCpuFlags(0);                                                             \
+  FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth,                                   \
+                        src_uv + OFF, SUBSAMPLE(kWidth, SUBSAMP_X) * 2,        \
+                        dst_argb_c, kWidth * BPP_B,                            \
+                        kWidth, NEG kHeight);                                  \
+  MaskCpuFlags(-1);                                                            \
+  for (int i = 0; i < benchmark_iterations_; ++i) {                            \
+    FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth,                                 \
+                          src_uv + OFF, SUBSAMPLE(kWidth, SUBSAMP_X) * 2,      \
+                          dst_argb_opt, kWidth * BPP_B,                        \
+                          kWidth, NEG kHeight);                                \
+  }                                                                            \
+  /* Convert to ARGB so 565 is expanded to bytes that can be compared. */      \
+  align_buffer_64(dst_argb32_c, kWidth * 4 * kHeight);                         \
+  align_buffer_64(dst_argb32_opt, kWidth * 4 * kHeight);                       \
+  memset(dst_argb32_c, 1, kWidth * 4 * kHeight);                               \
+  memset(dst_argb32_opt, 2, kWidth * 4 * kHeight);                             \
+  FMT_B##ToARGB(dst_argb_c, kStrideB,                                          \
+                dst_argb32_c, kWidth * 4,                                      \
+                kWidth, kHeight);                                              \
+  FMT_B##ToARGB(dst_argb_opt, kStrideB,                                        \
+                dst_argb32_opt, kWidth * 4,                                    \
+                kWidth, kHeight);                                              \
+  int max_diff = 0;                                                            \
+  for (int i = 0; i < kHeight; ++i) {                                          \
+    for (int j = 0; j < kWidth * 4; ++j) {                                     \
+      int abs_diff =                                                           \
+          abs(static_cast<int>(dst_argb32_c[i * kWidth * 4 + j]) -             \
+              static_cast<int>(dst_argb32_opt[i * kWidth * 4 + j]));           \
+      if (abs_diff > max_diff) {                                               \
+        max_diff = abs_diff;                                                   \
+      }                                                                        \
+    }                                                                          \
+  }                                                                            \
+  EXPECT_LE(max_diff, DIFF);                                                   \
+  free_aligned_buffer_64(src_y)                                                \
+  free_aligned_buffer_64(src_uv)                                               \
+  free_aligned_buffer_64(dst_argb_c)                                           \
+  free_aligned_buffer_64(dst_argb_opt)                                         \
+  free_aligned_buffer_64(dst_argb32_c)                                         \
+  free_aligned_buffer_64(dst_argb32_opt)                                       \
+}
+
+#define TESTBIPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, DIFF)  \
+    TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B,           \
+                     benchmark_width_ - 4, DIFF, _Any, +, 0)                   \
+    TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B,           \
+                     benchmark_width_, DIFF, _Unaligned, +, 1)                 \
+    TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B,           \
+                     benchmark_width_, DIFF, _Invert, -, 0)                    \
+    TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B,           \
+                     benchmark_width_, DIFF, _Opt, +, 0)
+
+TESTBIPLANARTOB(NV12, 2, 2, ARGB, 4, 2)
+TESTBIPLANARTOB(NV21, 2, 2, ARGB, 4, 2)
+TESTBIPLANARTOB(NV12, 2, 2, RGB565, 2, 9)
+TESTBIPLANARTOB(NV21, 2, 2, RGB565, 2, 9)
+
+#define TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
+                       W1280, DIFF, N, NEG, OFF)                               \
+TEST_F(libyuvTest, FMT_A##To##FMT_PLANAR##N) {                                 \
+  const int kWidth = ((W1280) > 0) ? (W1280) : 1;                              \
+  const int kHeight = ALIGNINT(benchmark_height_, YALIGN);                     \
+  const int kStride =                                                          \
+      (SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMP_X * 8 * BPP_A + 7) / 8;          \
+  align_buffer_64(src_argb, kStride * kHeight + OFF);                          \
+  align_buffer_64(dst_y_c, kWidth * kHeight);                                  \
+  align_buffer_64(dst_u_c,                                                     \
+                  SUBSAMPLE(kWidth, SUBSAMP_X) *                               \
+                  SUBSAMPLE(kHeight, SUBSAMP_Y));                              \
+  align_buffer_64(dst_v_c,                                                     \
+                  SUBSAMPLE(kWidth, SUBSAMP_X) *                               \
+                  SUBSAMPLE(kHeight, SUBSAMP_Y));                              \
+  align_buffer_64(dst_y_opt, kWidth * kHeight);                                \
+  align_buffer_64(dst_u_opt,                                                   \
+                  SUBSAMPLE(kWidth, SUBSAMP_X) *                               \
+                  SUBSAMPLE(kHeight, SUBSAMP_Y));                              \
+  align_buffer_64(dst_v_opt,                                                   \
+                  SUBSAMPLE(kWidth, SUBSAMP_X) *                               \
+                  SUBSAMPLE(kHeight, SUBSAMP_Y));                              \
+  memset(dst_y_c, 1, kWidth * kHeight);                                        \
+  memset(dst_u_c, 0,                                                           \
+         SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y));        \
+  memset(dst_v_c, 0,                                                           \
+         SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y));        \
+  memset(dst_y_opt, 2, kWidth * kHeight);                                      \
+  memset(dst_u_opt, 0,                                                         \
+         SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y));        \
+  memset(dst_v_opt, 0,                                                         \
+         SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y));        \
+  srandom(time(NULL));                                                         \
+  for (int i = 0; i < kHeight; ++i)                                            \
+    for (int j = 0; j < kStride; ++j)                                          \
+      src_argb[(i * kStride) + j + OFF] = (random() & 0xff);                   \
+  MaskCpuFlags(0);                                                             \
+  FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride,                               \
+                        dst_y_c, kWidth,                                       \
+                        dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X),                 \
+                        dst_v_c, SUBSAMPLE(kWidth, SUBSAMP_X),                 \
+                        kWidth, NEG kHeight);                                  \
+  MaskCpuFlags(-1);                                                            \
+  for (int i = 0; i < benchmark_iterations_; ++i) {                            \
+    FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride,                             \
+                          dst_y_opt, kWidth,                                   \
+                          dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X),             \
+                          dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X),             \
+                          kWidth, NEG kHeight);                                \
+  }                                                                            \
+  int max_diff = 0;                                                            \
+  for (int i = 0; i < kHeight; ++i) {                                          \
+    for (int j = 0; j < kWidth; ++j) {                                         \
+      int abs_diff =                                                           \
+          abs(static_cast<int>(dst_y_c[i * kWidth + j]) -                      \
+              static_cast<int>(dst_y_opt[i * kWidth + j]));                    \
+      if (abs_diff > max_diff) {                                               \
+        max_diff = abs_diff;                                                   \
+      }                                                                        \
+    }                                                                          \
+  }                                                                            \
+  EXPECT_LE(max_diff, DIFF);                                                   \
+  for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) {                    \
+    for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) {                   \
+      int abs_diff =                                                           \
+          abs(static_cast<int>(dst_u_c[i *                                     \
+                               SUBSAMPLE(kWidth, SUBSAMP_X) + j]) -            \
+              static_cast<int>(dst_u_opt[i *                                   \
+                               SUBSAMPLE(kWidth, SUBSAMP_X) + j]));            \
+      if (abs_diff > max_diff) {                                               \
+        max_diff = abs_diff;                                                   \
+      }                                                                        \
+    }                                                                          \
+  }                                                                            \
+  EXPECT_LE(max_diff, DIFF);                                                   \
+  for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) {                    \
+    for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) {                   \
+      int abs_diff =                                                           \
+          abs(static_cast<int>(dst_v_c[i *                                     \
+                               SUBSAMPLE(kWidth, SUBSAMP_X) + j]) -            \
+              static_cast<int>(dst_v_opt[i *                                   \
+                               SUBSAMPLE(kWidth, SUBSAMP_X) + j]));            \
+      if (abs_diff > max_diff) {                                               \
+        max_diff = abs_diff;                                                   \
+      }                                                                        \
+    }                                                                          \
+  }                                                                            \
+  EXPECT_LE(max_diff, DIFF);                                                   \
+  free_aligned_buffer_64(dst_y_c)                                              \
+  free_aligned_buffer_64(dst_u_c)                                              \
+  free_aligned_buffer_64(dst_v_c)                                              \
+  free_aligned_buffer_64(dst_y_opt)                                            \
+  free_aligned_buffer_64(dst_u_opt)                                            \
+  free_aligned_buffer_64(dst_v_opt)                                            \
+  free_aligned_buffer_64(src_argb)                                             \
+}
+
+#define TESTATOPLANAR(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,  \
+                      DIFF)                                                    \
+    TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,     \
+                   benchmark_width_ - 4, DIFF, _Any, +, 0)                     \
+    TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,     \
+                   benchmark_width_, DIFF, _Unaligned, +, 1)                   \
+    TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,     \
+                   benchmark_width_, DIFF, _Invert, -, 0)                      \
+    TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,     \
+                   benchmark_width_, DIFF, _Opt, +, 0)
+
+TESTATOPLANAR(ARGB, 4, 1, I420, 2, 2, 4)
+#ifdef __arm__
+TESTATOPLANAR(ARGB, 4, 1, J420, 2, 2, 4)
+#else
+TESTATOPLANAR(ARGB, 4, 1, J420, 2, 2, 0)
+#endif
+TESTATOPLANAR(BGRA, 4, 1, I420, 2, 2, 4)
+TESTATOPLANAR(ABGR, 4, 1, I420, 2, 2, 4)
+TESTATOPLANAR(RGBA, 4, 1, I420, 2, 2, 4)
+TESTATOPLANAR(RAW, 3, 1, I420, 2, 2, 4)
+TESTATOPLANAR(RGB24, 3, 1, I420, 2, 2, 4)
+TESTATOPLANAR(RGB565, 2, 1, I420, 2, 2, 5)
+// TODO(fbarchard): Make 1555 neon work same as C code, reduce to diff 9.
+TESTATOPLANAR(ARGB1555, 2, 1, I420, 2, 2, 15)
+TESTATOPLANAR(ARGB4444, 2, 1, I420, 2, 2, 17)
+TESTATOPLANAR(ARGB, 4, 1, I411, 4, 1, 4)
+TESTATOPLANAR(ARGB, 4, 1, I422, 2, 1, 2)
+TESTATOPLANAR(ARGB, 4, 1, I444, 1, 1, 2)
+TESTATOPLANAR(YUY2, 2, 1, I420, 2, 2, 2)
+TESTATOPLANAR(UYVY, 2, 1, I420, 2, 2, 2)
+TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1, 2)
+TESTATOPLANAR(UYVY, 2, 1, I422, 2, 1, 2)
+TESTATOPLANAR(I400, 1, 1, I420, 2, 2, 2)
+TESTATOPLANAR(BayerBGGR, 1, 2, I420, 2, 2, 4)
+TESTATOPLANAR(BayerRGGB, 1, 2, I420, 2, 2, 4)
+TESTATOPLANAR(BayerGBRG, 1, 2, I420, 2, 2, 4)
+TESTATOPLANAR(BayerGRBG, 1, 2, I420, 2, 2, 4)
+
+#define TESTATOBIPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,       \
+                       W1280, N, NEG, OFF)                                     \
+TEST_F(libyuvTest, FMT_A##To##FMT_PLANAR##N) {                                 \
+  const int kWidth = ((W1280) > 0) ? (W1280) : 1;                              \
+  const int kHeight = benchmark_height_;                                       \
+  const int kStride = (kWidth * 8 * BPP_A + 7) / 8;                            \
+  align_buffer_64(src_argb, kStride * kHeight + OFF);                          \
+  align_buffer_64(dst_y_c, kWidth * kHeight);                                  \
+  align_buffer_64(dst_uv_c,                                                    \
+                  SUBSAMPLE(kWidth, SUBSAMP_X) * 2 *                           \
+                  SUBSAMPLE(kHeight, SUBSAMP_Y));                              \
+  align_buffer_64(dst_y_opt, kWidth * kHeight);                                \
+  align_buffer_64(dst_uv_opt,                                                  \
+                  SUBSAMPLE(kWidth, SUBSAMP_X) * 2 *                           \
+                  SUBSAMPLE(kHeight, SUBSAMP_Y));                              \
+  srandom(time(NULL));                                                         \
+  for (int i = 0; i < kHeight; ++i)                                            \
+    for (int j = 0; j < kStride; ++j)                                          \
+      src_argb[(i * kStride) + j + OFF] = (random() & 0xff);                   \
+  MaskCpuFlags(0);                                                             \
+  FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride,                               \
+                        dst_y_c, kWidth,                                       \
+                        dst_uv_c, SUBSAMPLE(kWidth, SUBSAMP_X) * 2,            \
+                        kWidth, NEG kHeight);                                  \
+  MaskCpuFlags(-1);                                                            \
+  for (int i = 0; i < benchmark_iterations_; ++i) {                            \
+    FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride,                             \
+                          dst_y_opt, kWidth,                                   \
+                          dst_uv_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * 2,        \
+                          kWidth, NEG kHeight);                                \
+  }                                                                            \
+  int max_diff = 0;                                                            \
+  for (int i = 0; i < kHeight; ++i) {                                          \
+    for (int j = 0; j < kWidth; ++j) {                                         \
+      int abs_diff =                                                           \
+          abs(static_cast<int>(dst_y_c[i * kWidth + j]) -                      \
+              static_cast<int>(dst_y_opt[i * kWidth + j]));                    \
+      if (abs_diff > max_diff) {                                               \
+        max_diff = abs_diff;                                                   \
+      }                                                                        \
+    }                                                                          \
+  }                                                                            \
+  EXPECT_LE(max_diff, 4);                                                      \
+  for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) {                    \
+    for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X) * 2; ++j) {               \
+      int abs_diff =                                                           \
+          abs(static_cast<int>(dst_uv_c[i *                                    \
+                               SUBSAMPLE(kWidth, SUBSAMP_X) * 2 + j]) -        \
+              static_cast<int>(dst_uv_opt[i *                                  \
+                               SUBSAMPLE(kWidth, SUBSAMP_X) * 2 + j]));        \
+      if (abs_diff > max_diff) {                                               \
+        max_diff = abs_diff;                                                   \
+      }                                                                        \
+    }                                                                          \
+  }                                                                            \
+  EXPECT_LE(max_diff, 4);                                                      \
+  free_aligned_buffer_64(dst_y_c)                                              \
+  free_aligned_buffer_64(dst_uv_c)                                             \
+  free_aligned_buffer_64(dst_y_opt)                                            \
+  free_aligned_buffer_64(dst_uv_opt)                                           \
+  free_aligned_buffer_64(src_argb)                                             \
+}
+
+#define TESTATOBIPLANAR(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y)        \
+    TESTATOBIPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,           \
+                   benchmark_width_ - 4, _Any, +, 0)                           \
+    TESTATOBIPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,           \
+                   benchmark_width_, _Unaligned, +, 1)                         \
+    TESTATOBIPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,           \
+                   benchmark_width_, _Invert, -, 0)                            \
+    TESTATOBIPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,           \
+                   benchmark_width_, _Opt, +, 0)
+
+TESTATOBIPLANAR(ARGB, 4, NV12, 2, 2)
+TESTATOBIPLANAR(ARGB, 4, NV21, 2, 2)
+
+#define TESTATOBI(FMT_A, BPP_A, STRIDE_A,                                      \
+                  FMT_B, BPP_B, STRIDE_B,                                      \
+                  W1280, DIFF, N, NEG, OFF)                                    \
+TEST_F(libyuvTest, FMT_A##To##FMT_B##N) {                                      \
+  const int kWidth = ((W1280) > 0) ? (W1280) : 1;                              \
+  const int kHeight = benchmark_height_;                                       \
+  const int kStrideA = (kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A;  \
+  const int kStrideB = (kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B;  \
+  align_buffer_64(src_argb, kStrideA * kHeight + OFF);                         \
+  align_buffer_64(dst_argb_c, kStrideB * kHeight);                             \
+  align_buffer_64(dst_argb_opt, kStrideB * kHeight);                           \
+  memset(dst_argb_c, 0, kStrideB * kHeight);                                   \
+  memset(dst_argb_opt, 0, kStrideB * kHeight);                                 \
+  srandom(time(NULL));                                                         \
+  for (int i = 0; i < kStrideA * kHeight; ++i) {                               \
+    src_argb[i + OFF] = (random() & 0xff);                                     \
+  }                                                                            \
+  MaskCpuFlags(0);                                                             \
+  FMT_A##To##FMT_B(src_argb + OFF, kStrideA,                                   \
+                   dst_argb_c, kStrideB,                                       \
+                   kWidth, NEG kHeight);                                       \
+  MaskCpuFlags(-1);                                                            \
+  for (int i = 0; i < benchmark_iterations_; ++i) {                            \
+    FMT_A##To##FMT_B(src_argb + OFF, kStrideA,                                 \
+                     dst_argb_opt, kStrideB,                                   \
+                     kWidth, NEG kHeight);                                     \
+  }                                                                            \
+  int max_diff = 0;                                                            \
+  for (int i = 0; i < kStrideB * kHeight; ++i) {                               \
+    int abs_diff =                                                             \
+        abs(static_cast<int>(dst_argb_c[i]) -                                  \
+            static_cast<int>(dst_argb_opt[i]));                                \
+    if (abs_diff > max_diff) {                                                 \
+      max_diff = abs_diff;                                                     \
+    }                                                                          \
+  }                                                                            \
+  EXPECT_LE(max_diff, DIFF);                                                   \
+  free_aligned_buffer_64(src_argb)                                             \
+  free_aligned_buffer_64(dst_argb_c)                                           \
+  free_aligned_buffer_64(dst_argb_opt)                                         \
+}
+
+#define TESTATOBRANDOM(FMT_A, BPP_A, STRIDE_A, HEIGHT_A,                       \
+                       FMT_B, BPP_B, STRIDE_B, HEIGHT_B, DIFF)                 \
+TEST_F(libyuvTest, FMT_A##To##FMT_B##_Random) {                                \
+  srandom(time(NULL));                                                         \
+  for (int times = 0; times < benchmark_iterations_; ++times) {                \
+    const int kWidth = (random() & 63) + 1;                                    \
+    const int kHeight = (random() & 31) + 1;                                   \
+    const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A;       \
+    const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B;       \
+    const int kStrideA = (kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A;\
+    const int kStrideB = (kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B;\
+    align_buffer_page_end(src_argb, kStrideA * kHeightA);                      \
+    align_buffer_page_end(dst_argb_c, kStrideB * kHeightB);                    \
+    align_buffer_page_end(dst_argb_opt, kStrideB * kHeightB);                  \
+    memset(dst_argb_c, 0, kStrideB * kHeightB);                                \
+    memset(dst_argb_opt, 0, kStrideB * kHeightB);                              \
+    for (int i = 0; i < kStrideA * kHeightA; ++i) {                            \
+      src_argb[i] = (random() & 0xff);                                         \
+    }                                                                          \
+    MaskCpuFlags(0);                                                           \
+    FMT_A##To##FMT_B(src_argb, kStrideA,                                       \
+                     dst_argb_c, kStrideB,                                     \
+                     kWidth, kHeight);                                         \
+    MaskCpuFlags(-1);                                                          \
+    FMT_A##To##FMT_B(src_argb, kStrideA,                                       \
+                     dst_argb_opt, kStrideB,                                   \
+                     kWidth, kHeight);                                         \
+    int max_diff = 0;                                                          \
+    for (int i = 0; i < kStrideB * kHeightB; ++i) {                            \
+      int abs_diff =                                                           \
+          abs(static_cast<int>(dst_argb_c[i]) -                                \
+              static_cast<int>(dst_argb_opt[i]));                              \
+      if (abs_diff > max_diff) {                                               \
+        max_diff = abs_diff;                                                   \
+      }                                                                        \
+    }                                                                          \
+    EXPECT_LE(max_diff, DIFF);                                                 \
+    free_aligned_buffer_page_end(src_argb)                                     \
+    free_aligned_buffer_page_end(dst_argb_c)                                   \
+    free_aligned_buffer_page_end(dst_argb_opt)                                 \
+  }                                                                            \
+}
+
+#define TESTATOB(FMT_A, BPP_A, STRIDE_A, HEIGHT_A,                             \
+                 FMT_B, BPP_B, STRIDE_B, HEIGHT_B, DIFF)                       \
+    TESTATOBI(FMT_A, BPP_A, STRIDE_A,                                          \
+              FMT_B, BPP_B, STRIDE_B,                                          \
+              benchmark_width_ - 4, DIFF, _Any, +, 0)                          \
+    TESTATOBI(FMT_A, BPP_A, STRIDE_A,                                          \
+              FMT_B, BPP_B, STRIDE_B,                                          \
+              benchmark_width_, DIFF, _Unaligned, +, 1)                        \
+    TESTATOBI(FMT_A, BPP_A, STRIDE_A,                                          \
+              FMT_B, BPP_B, STRIDE_B,                                          \
+              benchmark_width_, DIFF, _Invert, -, 0)                           \
+    TESTATOBI(FMT_A, BPP_A, STRIDE_A,                                          \
+              FMT_B, BPP_B, STRIDE_B,                                          \
+              benchmark_width_, DIFF, _Opt, +, 0)                              \
+    TESTATOBRANDOM(FMT_A, BPP_A, STRIDE_A, HEIGHT_A,                           \
+                   FMT_B, BPP_B, STRIDE_B, HEIGHT_B, DIFF)
+
+TESTATOB(ARGB, 4, 4, 1, ARGB, 4, 4, 1, 0)
+TESTATOB(ARGB, 4, 4, 1, BGRA, 4, 4, 1, 0)
+TESTATOB(ARGB, 4, 4, 1, ABGR, 4, 4, 1, 0)
+TESTATOB(ARGB, 4, 4, 1, RGBA, 4, 4, 1, 0)
+TESTATOB(ARGB, 4, 4, 1, RAW, 3, 3, 1, 0)
+TESTATOB(ARGB, 4, 4, 1, RGB24, 3, 3, 1, 0)
+TESTATOB(ARGB, 4, 4, 1, RGB565, 2, 2, 1, 0)
+TESTATOB(ARGB, 4, 4, 1, ARGB1555, 2, 2, 1, 0)
+TESTATOB(ARGB, 4, 4, 1, ARGB4444, 2, 2, 1, 0)
+TESTATOB(ARGB, 4, 4, 1, BayerBGGR, 1, 2, 2, 0)
+TESTATOB(ARGB, 4, 4, 1, BayerRGGB, 1, 2, 2, 0)
+TESTATOB(ARGB, 4, 4, 1, BayerGBRG, 1, 2, 2, 0)
+TESTATOB(ARGB, 4, 4, 1, BayerGRBG, 1, 2, 2, 0)
+TESTATOB(ARGB, 4, 4, 1, YUY2, 2, 4, 1, 4)
+TESTATOB(ARGB, 4, 4, 1, UYVY, 2, 4, 1, 4)
+TESTATOB(ARGB, 4, 4, 1, I400, 1, 1, 1, 2)
+TESTATOB(ARGB, 4, 4, 1, J400, 1, 1, 1, 2)
+TESTATOB(BGRA, 4, 4, 1, ARGB, 4, 4, 1, 0)
+TESTATOB(ABGR, 4, 4, 1, ARGB, 4, 4, 1, 0)
+TESTATOB(RGBA, 4, 4, 1, ARGB, 4, 4, 1, 0)
+TESTATOB(RAW, 3, 3, 1, ARGB, 4, 4, 1, 0)
+TESTATOB(RGB24, 3, 3, 1, ARGB, 4, 4, 1, 0)
+TESTATOB(RGB565, 2, 2, 1, ARGB, 4, 4, 1, 0)
+TESTATOB(ARGB1555, 2, 2, 1, ARGB, 4, 4, 1, 0)
+TESTATOB(ARGB4444, 2, 2, 1, ARGB, 4, 4, 1, 0)
+TESTATOB(YUY2, 2, 4, 1, ARGB, 4, 4, 1, 4)
+TESTATOB(UYVY, 2, 4, 1, ARGB, 4, 4, 1, 4)
+TESTATOB(BayerBGGR, 1, 2, 2, ARGB, 4, 4, 1, 0)
+TESTATOB(BayerRGGB, 1, 2, 2, ARGB, 4, 4, 1, 0)
+TESTATOB(BayerGBRG, 1, 2, 2, ARGB, 4, 4, 1, 0)
+TESTATOB(BayerGRBG, 1, 2, 2, ARGB, 4, 4, 1, 0)
+TESTATOB(I400, 1, 1, 1, ARGB, 4, 4, 1, 0)
+TESTATOB(I400, 1, 1, 1, I400, 1, 1, 1, 0)
+TESTATOB(I400, 1, 1, 1, I400Mirror, 1, 1, 1, 0)
+TESTATOB(Y, 1, 1, 1, ARGB, 4, 4, 1, 0)
+TESTATOB(ARGB, 4, 4, 1, ARGBMirror, 4, 4, 1, 0)
+
+TEST_F(libyuvTest, Test565) {
+  SIMD_ALIGNED(uint8 orig_pixels[256][4]);
+  SIMD_ALIGNED(uint8 pixels565[256][2]);
+
+  for (int i = 0; i < 256; ++i) {
+    for (int j = 0; j < 4; ++j) {
+      orig_pixels[i][j] = i;
+    }
+  }
+  ARGBToRGB565(&orig_pixels[0][0], 0, &pixels565[0][0], 0, 256, 1);
+  uint32 checksum = HashDjb2(&pixels565[0][0], sizeof(pixels565), 5381);
+  EXPECT_EQ(610919429u, checksum);
+}
+
+#ifdef HAVE_JPEG
+TEST_F(libyuvTest, ValidateJpeg) {
+  const int kOff = 10;
+  const int kMinJpeg = 64;
+  const int kImageSize = benchmark_width_ * benchmark_height_ >= kMinJpeg ?
+    benchmark_width_ * benchmark_height_ : kMinJpeg;
+  const int kSize = kImageSize + kOff;
+  align_buffer_64(orig_pixels, kSize);
+
+  // No SOI or EOI. Expect fail.
+  memset(orig_pixels, 0, kSize);
+
+  // EOI, SOI. Expect pass.
+  orig_pixels[0] = 0xff;
+  orig_pixels[1] = 0xd8;  // SOI.
+  orig_pixels[kSize - kOff + 0] = 0xff;
+  orig_pixels[kSize - kOff + 1] = 0xd9;  // EOI.
+  for (int times = 0; times < benchmark_iterations_; ++times) {
+    EXPECT_TRUE(ValidateJpeg(orig_pixels, kSize));
+  }
+  free_aligned_buffer_page_end(orig_pixels);
+}
+
+TEST_F(libyuvTest, InvalidateJpeg) {
+  const int kOff = 10;
+  const int kMinJpeg = 64;
+  const int kImageSize = benchmark_width_ * benchmark_height_ >= kMinJpeg ?
+    benchmark_width_ * benchmark_height_ : kMinJpeg;
+  const int kSize = kImageSize + kOff;
+  align_buffer_64(orig_pixels, kSize);
+
+  // No SOI or EOI. Expect fail.
+  memset(orig_pixels, 0, kSize);
+  EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize));
+
+  // SOI but no EOI. Expect fail.
+  orig_pixels[0] = 0xff;
+  orig_pixels[1] = 0xd8;  // SOI.
+  for (int times = 0; times < benchmark_iterations_; ++times) {
+    EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize));
+  }
+  // EOI but no SOI. Expect fail.
+  orig_pixels[0] = 0;
+  orig_pixels[1] = 0;
+  orig_pixels[kSize - kOff + 0] = 0xff;
+  orig_pixels[kSize - kOff + 1] = 0xd9;  // EOI.
+  EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize));
+
+  free_aligned_buffer_page_end(orig_pixels);
+}
+
+#endif
+
+}  // namespace libyuv