From bdf7cb591452611090922e690d5104a7d8c6b1e5 Mon Sep 17 00:00:00 2001
From: "fbarchard@google.com"
 <fbarchard@google.com@16f28f9a-4ce2-e073-06de-1de4eb20be90>
Date: Mon, 5 Nov 2012 23:40:11 +0000
Subject: [PATCH] RGB formats converted to YUV with Neon BUG=none
 TEST=convert_test Review URL: https://webrtc-codereview.appspot.com/936013

git-svn-id: http://libyuv.googlecode.com/svn/trunk@471 16f28f9a-4ce2-e073-06de-1de4eb20be90
---
 README.chromium                    |    2 +-
 include/libyuv/convert.h           |   64 +-
 include/libyuv/convert_from_argb.h |   87 ++-
 include/libyuv/row.h               | 1058 +++++++++++++++-------------
 include/libyuv/scale.h             |    4 +-
 include/libyuv/version.h           |    2 +-
 source/convert.cc                  |  441 +++++++-----
 source/convert_argb.cc             |   55 +-
 source/convert_from.cc             |   94 +--
 source/convert_from_argb.cc        |  586 +++++++++++++--
 source/format_conversion.cc        |   16 +-
 source/planar_functions.cc         |    2 +-
 source/rotate.cc                   |   12 +-
 source/row_any.cc                  |   67 +-
 source/row_common.cc               |  529 ++++++++------
 source/row_mips.cc                 |    6 +-
 source/row_neon.cc                 |  228 +++++-
 source/row_posix.cc                |  156 +++-
 source/row_win.cc                  |  156 +++-
 unit_test/convert_test.cc          |  242 ++++---
 20 files changed, 2528 insertions(+), 1279 deletions(-)

diff --git a/README.chromium b/README.chromium
index 1d368327b..0b383abbe 100644
--- a/README.chromium
+++ b/README.chromium
@@ -1,6 +1,6 @@
 Name: libyuv
 URL: http://code.google.com/p/libyuv/
-Version: 470
+Version: 471
 License: BSD
 License File: LICENSE
 
diff --git a/include/libyuv/convert.h b/include/libyuv/convert.h
index e07bfd199..da1a7e6e9 100644
--- a/include/libyuv/convert.h
+++ b/include/libyuv/convert.h
@@ -22,22 +22,9 @@ namespace libyuv {
 extern "C" {
 #endif
 
-// Alias.
-#define I420ToI420 I420Copy
-
-// Copy I420 to I420.
+// Convert I444 to I420.
 LIBYUV_API
-int I420Copy(const uint8* src_y, int src_stride_y,
-             const uint8* src_u, int src_stride_u,
-             const uint8* src_v, int src_stride_v,
-             uint8* dst_y, int dst_stride_y,
-             uint8* dst_u, int dst_stride_u,
-             uint8* dst_v, int dst_stride_v,
-             int width, int height);
-
-// Convert I422 to I420.
-LIBYUV_API
-int I422ToI420(const uint8* src_y, int src_stride_y,
+int I444ToI420(const uint8* src_y, int src_stride_y,
                const uint8* src_u, int src_stride_u,
                const uint8* src_v, int src_stride_v,
                uint8* dst_y, int dst_stride_y,
@@ -45,9 +32,9 @@ int I422ToI420(const uint8* src_y, int src_stride_y,
                uint8* dst_v, int dst_stride_v,
                int width, int height);
 
-// Convert I444 to I420.
+// Convert I422 to I420.
 LIBYUV_API
-int I444ToI420(const uint8* src_y, int src_stride_y,
+int I422ToI420(const uint8* src_y, int src_stride_y,
                const uint8* src_u, int src_stride_u,
                const uint8* src_v, int src_stride_v,
                uint8* dst_y, int dst_stride_y,
@@ -65,6 +52,17 @@ int I411ToI420(const uint8* src_y, int src_stride_y,
                uint8* dst_v, int dst_stride_v,
                int width, int height);
 
+// Copy I420 to I420.
+#define I420ToI420 I420Copy
+LIBYUV_API
+int I420Copy(const uint8* src_y, int src_stride_y,
+             const uint8* src_u, int src_stride_u,
+             const uint8* src_v, int src_stride_v,
+             uint8* dst_y, int dst_stride_y,
+             uint8* dst_u, int dst_stride_u,
+             uint8* dst_v, int dst_stride_v,
+             int width, int height);
+
 // Convert I400 (grey) to I420.
 LIBYUV_API
 int I400ToI420(const uint8* src_y, int src_stride_y,
@@ -91,6 +89,22 @@ int NV21ToI420(const uint8* src_y, int src_stride_y,
                uint8* dst_v, int dst_stride_v,
                int width, int height);
 
+// Convert YUY2 to I420.
+LIBYUV_API
+int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height);
+
+// Convert UYVY to I420.
+LIBYUV_API
+int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height);
+
 // Convert M420 to I420.
 LIBYUV_API
 int M420ToI420(const uint8* src_m420, int src_stride_m420,
@@ -108,22 +122,6 @@ int Q420ToI420(const uint8* src_y, int src_stride_y,
                uint8* dst_v, int dst_stride_v,
                int width, int height);
 
-// Convert YUY2 to I420.
-LIBYUV_API
-int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height);
-
-// Convert UYVY to I420.
-LIBYUV_API
-int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height);
-
 // Convert V210 to I420.
 LIBYUV_API
 int V210ToI420(const uint8* src_uyvy, int src_stride_uyvy,
diff --git a/include/libyuv/convert_from_argb.h b/include/libyuv/convert_from_argb.h
index 41cfcb7e6..0aa23f438 100644
--- a/include/libyuv/convert_from_argb.h
+++ b/include/libyuv/convert_from_argb.h
@@ -18,15 +18,27 @@ namespace libyuv {
 extern "C" {
 #endif
 
-// Alias.
-#define ARGBToARGB ARGBCopy
-
 // Copy ARGB to ARGB.
+#define ARGBToARGB ARGBCopy
 LIBYUV_API
 int ARGBCopy(const uint8* src_argb, int src_stride_argb,
              uint8* dst_argb, int dst_stride_argb,
              int width, int height);
 
+// Convert ARGB To BGRA. (alias)
+#define ARGBToBGRA BGRAToARGB
+LIBYUV_API
+int BGRAToARGB(const uint8* src_frame, int src_stride_frame,
+               uint8* dst_argb, int dst_stride_argb,
+               int width, int height);
+
+// Convert ARGB To ABGR. (alias)
+#define ARGBToABGR ABGRToARGB
+LIBYUV_API
+int ABGRToARGB(const uint8* src_frame, int src_stride_frame,
+               uint8* dst_argb, int dst_stride_argb,
+               int width, int height);
+
 // Convert ARGB To RGBA.
 LIBYUV_API
 int ARGBToRGBA(const uint8* src_frame, int src_stride_frame,
@@ -63,34 +75,75 @@ int ARGBToARGB4444(const uint8* src_argb, int src_stride_argb,
                    uint8* dst_argb4444, int dst_stride_argb4444,
                    int width, int height);
 
+// Convert ARGB To I444.
+LIBYUV_API
+int ARGBToI444(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height);
+
+// Convert ARGB To I422.
+LIBYUV_API
+int ARGBToI422(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height);
+
+// Convert ARGB To I420. (also in convert.h)
+LIBYUV_API
+int ARGBToI420(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height);
+
+// Convert ARGB To I411.
+LIBYUV_API
+int ARGBToI411(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height);
+
 // Convert ARGB to I400.
 LIBYUV_API
 int ARGBToI400(const uint8* src_argb, int src_stride_argb,
                uint8* dst_y, int dst_stride_y,
                int width, int height);
 
-// ARGB little endian (bgra in memory) to I422.
+// Convert ARGB To NV12.
 LIBYUV_API
-int ARGBToI422(const uint8* src_frame, int src_stride_frame,
+int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
                uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
+               uint8* dst_uv, int dst_stride_uv,
                int width, int height);
 
-// Aliases.
-#define ARGBToBGRA BGRAToARGB
-#define ARGBToABGR ABGRToARGB
-
-// BGRA little endian (argb in memory) to ARGB.
+// Convert ARGB To NV21.
 LIBYUV_API
-int BGRAToARGB(const uint8* src_frame, int src_stride_frame,
-               uint8* dst_argb, int dst_stride_argb,
+int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_vu, int dst_stride_vu,
                int width, int height);
 
-// ABGR little endian (rgba in memory) to ARGB.
+// Convert ARGB To NV21.
 LIBYUV_API
-int ABGRToARGB(const uint8* src_frame, int src_stride_frame,
-               uint8* dst_argb, int dst_stride_argb,
+int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_vu, int dst_stride_vu,
+               int width, int height);
+
+// Convert ARGB To YUY2.
+LIBYUV_API
+int ARGBToYUY2(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_yuy2, int dst_stride_yuy2,
+               int width, int height);
+
+// Convert ARGB To UYVY.
+LIBYUV_API
+int ARGBToUYVY(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_uyvy, int dst_stride_uyvy,
                int width, int height);
 
 #ifdef __cplusplus
diff --git a/include/libyuv/row.h b/include/libyuv/row.h
index cb574ece0..b7bb7dafe 100644
--- a/include/libyuv/row.h
+++ b/include/libyuv/row.h
@@ -49,6 +49,7 @@ extern "C" {
 #define HAS_ARGBTORGB565ROW_SSE2
 #define HAS_ARGBTORGBAROW_SSSE3
 #define HAS_ARGBTOUVROW_SSSE3
+#define HAS_ARGBTOUV422ROW_SSSE3
 #define HAS_ARGBTOYROW_SSSE3
 #define HAS_BGRATOARGBROW_SSSE3
 #define HAS_BGRATOUVROW_SSSE3
@@ -66,7 +67,7 @@ extern "C" {
 #define HAS_I422TORGBAROW_SSSE3
 #define HAS_I444TOARGBROW_SSSE3
 #define HAS_MIRRORROW_SSSE3
-#define HAS_MIRRORROWUV_SSSE3
+#define HAS_MirrorUVRow_SSSE3
 #define HAS_NV12TOARGBROW_SSSE3
 #define HAS_NV21TOARGBROW_SSSE3
 #define HAS_NV12TORGB565ROW_SSSE3
@@ -94,6 +95,8 @@ extern "C" {
 #define HAS_I422TORGB565ROW_SSSE3
 #define HAS_YUY2TOARGBROW_SSSE3
 #define HAS_UYVYTOARGBROW_SSSE3
+#define HAS_RGB24TOYROW_SSSE3
+#define HAS_RAWTOYROW_SSSE3
 
 // Effects
 #define HAS_ARGBAFFINEROW_SSE2
@@ -162,7 +165,7 @@ extern "C" {
 #define HAS_I422TORGB565ROW_NEON
 #define HAS_I422TORGBAROW_NEON
 #define HAS_MIRRORROW_NEON
-#define HAS_MIRRORROWUV_NEON
+#define HAS_MirrorUVRow_NEON
 #define HAS_NV12TOARGBROW_NEON
 #define HAS_NV21TOARGBROW_NEON
 #define HAS_YUY2TOARGBROW_NEON
@@ -185,11 +188,20 @@ extern "C" {
 #define HAS_ARGBTORGB565ROW_NEON
 #define HAS_ARGBTOARGB1555ROW_NEON
 #define HAS_ARGBTOARGB4444ROW_NEON
-#define HAS_ARGBTOYROW_NEON
 #define HAS_MERGEUV_NEON
 #define HAS_YTOARGBROW_NEON
 #define HAS_I444TOARGBROW_NEON
 #define HAS_I411TOARGBROW_NEON
+#define HAS_ARGBTOYROW_NEON
+#define HAS_BGRATOYROW_NEON
+#define HAS_ABGRTOYROW_NEON
+#define HAS_RGBATOYROW_NEON
+#define HAS_RGB24TOYROW_NEON
+#define HAS_RAWTOYROW_NEON
+#define HAS_RGB565TOARGBROW_NEON
+#define HAS_ARGB1555TOARGBROW_NEON
+#define HAS_ARGB4444TOARGBROW_NEON
+#define HAS_RGB565TOYROW_NEON
 #endif
 
 // The following are available on Mips platforms
@@ -198,7 +210,7 @@ extern "C" {
 #if defined(__mips_dsp) && (__mips_dsp_rev >= 2)
 #define HAS_SPLITUV_MIPS_DSPR2
 #define HAS_MIRRORROW_MIPS_DSPR2
-#define HAS_MIRRORROWUV_MIPS_DSPR2
+#define HAS_MirrorUVRow_MIPS_DSPR2
 #define HAS_I422TOARGBROW_MIPS_DSPR2
 #define HAS_I422TOBGRAROW_MIPS_DSPR2
 #define HAS_I422TOABGRROW_MIPS_DSPR2
@@ -237,123 +249,185 @@ typedef uint32 uvec32[4];
 #define OMITFP __attribute__((optimize("omit-frame-pointer")))
 #endif
 
-void I444ToARGBRow_NEON(const uint8* y_buf,
-                        const uint8* u_buf,
-                        const uint8* v_buf,
-                        uint8* rgb_buf,
+void I444ToARGBRow_NEON(const uint8* src_y,
+                        const uint8* src_u,
+                        const uint8* src_v,
+                        uint8* dst_argb,
                         int width);
-void I422ToARGBRow_NEON(const uint8* y_buf,
-                        const uint8* u_buf,
-                        const uint8* v_buf,
-                        uint8* rgb_buf,
+void I422ToARGBRow_NEON(const uint8* src_y,
+                        const uint8* src_u,
+                        const uint8* src_v,
+                        uint8* dst_argb,
                         int width);
-void I411ToARGBRow_NEON(const uint8* y_buf,
-                        const uint8* u_buf,
-                        const uint8* v_buf,
-                        uint8* rgb_buf,
+void I411ToARGBRow_NEON(const uint8* src_y,
+                        const uint8* src_u,
+                        const uint8* src_v,
+                        uint8* dst_argb,
                         int width);
-void I422ToBGRARow_NEON(const uint8* y_buf,
-                        const uint8* u_buf,
-                        const uint8* v_buf,
-                        uint8* rgb_buf,
+void I422ToBGRARow_NEON(const uint8* src_y,
+                        const uint8* src_u,
+                        const uint8* src_v,
+                        uint8* dst_bgra,
                         int width);
-void I422ToABGRRow_NEON(const uint8* y_buf,
-                        const uint8* u_buf,
-                        const uint8* v_buf,
-                        uint8* rgb_buf,
+void I422ToABGRRow_NEON(const uint8* src_y,
+                        const uint8* src_u,
+                        const uint8* src_v,
+                        uint8* dst_abgr,
                         int width);
-void I422ToRGBARow_NEON(const uint8* y_buf,
-                        const uint8* u_buf,
-                        const uint8* v_buf,
-                        uint8* rgb_buf,
+void I422ToRGBARow_NEON(const uint8* src_y,
+                        const uint8* src_u,
+                        const uint8* src_v,
+                        uint8* dst_rgba,
                         int width);
-void I422ToRGB24Row_NEON(const uint8* y_buf,
-                         const uint8* u_buf,
-                         const uint8* v_buf,
-                         uint8* rgb_buf,
+void I422ToRGB24Row_NEON(const uint8* src_y,
+                         const uint8* src_u,
+                         const uint8* src_v,
+                         uint8* dst_rgb24,
                          int width);
-void I422ToRAWRow_NEON(const uint8* y_buf,
-                       const uint8* u_buf,
-                       const uint8* v_buf,
-                       uint8* rgb_buf,
+void I422ToRAWRow_NEON(const uint8* src_y,
+                       const uint8* src_u,
+                       const uint8* src_v,
+                       uint8* dst_raw,
                        int width);
-void I422ToARGB4444Row_NEON(const uint8* y_buf,
-                            const uint8* u_buf,
-                            const uint8* v_buf,
-                            uint8* rgb_buf,
+void I422ToRGB565Row_NEON(const uint8* src_y,
+                          const uint8* src_u,
+                          const uint8* src_v,
+                          uint8* dst_rgb565,
+                          int width);
+void I422ToARGB1555Row_NEON(const uint8* src_y,
+                            const uint8* src_u,
+                            const uint8* src_v,
+                            uint8* dst_argb1555,
                             int width);
-void I422ToARGB1555Row_NEON(const uint8* y_buf,
-                            const uint8* u_buf,
-                            const uint8* v_buf,
-                            uint8* rgb_buf,
+void I422ToARGB4444Row_NEON(const uint8* src_y,
+                            const uint8* src_u,
+                            const uint8* src_v,
+                            uint8* dst_argb4444,
                             int width);
-void I422ToRGB565Row_NEON(const uint8* y_buf,
-                          const uint8* u_buf,
-                          const uint8* v_buf,
-                          uint8* rgb_buf,
-                          int width);
-void NV12ToARGBRow_NEON(const uint8* y_buf,
-                        const uint8* uv_buf,
-                        uint8* rgb_buf,
+void NV12ToARGBRow_NEON(const uint8* src_y,
+                        const uint8* src_uv,
+                        uint8* dst_argb,
                         int width);
-void NV21ToARGBRow_NEON(const uint8* y_buf,
-                        const uint8* uv_buf,
-                        uint8* rgb_buf,
+void NV21ToARGBRow_NEON(const uint8* src_y,
+                        const uint8* src_vu,
+                        uint8* dst_argb,
                         int width);
-void NV12ToRGB565Row_NEON(const uint8* y_buf,
-                          const uint8* uv_buf,
-                          uint8* rgb_buf,
+void NV12ToRGB565Row_NEON(const uint8* src_y,
+                          const uint8* src_uv,
+                          uint8* dst_rgb565,
                           int width);
-void NV21ToRGB565Row_NEON(const uint8* y_buf,
-                          const uint8* uv_buf,
-                          uint8* rgb_buf,
+void NV21ToRGB565Row_NEON(const uint8* src_y,
+                          const uint8* src_vu,
+                          uint8* dst_rgb565,
                           int width);
-void YUY2ToARGBRow_NEON(const uint8* yuy2_buf,
-                        uint8* rgb_buf,
+void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
+                        uint8* dst_argb,
                         int width);
-void UYVYToARGBRow_NEON(const uint8* uyvy_buf,
-                        uint8* rgb_buf,
+void UYVYToARGBRow_NEON(const uint8* src_uyvy,
+                        uint8* dst_argb,
                         int width);
 
 void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
-void BGRAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
-void ABGRToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
-void RGBAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
+void BGRAToYRow_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix);
+void ABGRToYRow_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix);
+void RGBAToYRow_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix);
+void RGB24ToYRow_SSSE3(const uint8* src_rgb24, uint8* dst_y, int pix);
+void RAWToYRow_SSSE3(const uint8* src_raw, uint8* dst_y, int pix);
 void ARGBToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
-void BGRAToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
-void ABGRToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
-void RGBAToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
-
-void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
-                       uint8* dst_u, uint8* dst_v, int width);
-void BGRAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
-                       uint8* dst_u, uint8* dst_v, int width);
-void ABGRToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
-                       uint8* dst_u, uint8* dst_v, int width);
-void RGBAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
-                       uint8* dst_u, uint8* dst_v, int width);
-void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
-                                 uint8* dst_u, uint8* dst_v, int width);
-void BGRAToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
-                                 uint8* dst_u, uint8* dst_v, int width);
-void ABGRToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
-                                 uint8* dst_u, uint8* dst_v, int width);
-void RGBAToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
-                                 uint8* dst_u, uint8* dst_v, int width);
-
+void BGRAToYRow_Unaligned_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix);
+void ABGRToYRow_Unaligned_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix);
+void RGBAToYRow_Unaligned_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix);
+void RGB24ToYRow_Unaligned_SSSE3(const uint8* src_rgb24, uint8* dst_y, int pix);
+void RAWToYRow_Unaligned_SSSE3(const uint8* src_raw, uint8* dst_y, int pix);
 void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix);
+void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int pix);
+void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int pix);
+void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int pix);
+void RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int pix);
+void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int pix);
+void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int pix);
+void ARGBToYRow_C(const uint8* src_argb, uint8* dst_y, int pix);
+void BGRAToYRow_C(const uint8* src_bgra, uint8* dst_y, int pix);
+void ABGRToYRow_C(const uint8* src_abgr, uint8* dst_y, int pix);
+void RGBAToYRow_C(const uint8* src_rgba, uint8* dst_y, int pix);
+void RGB24ToYRow_C(const uint8* src_rgb24, uint8* dst_y, int pix);
+void RAWToYRow_C(const uint8* src_raw, uint8* dst_y, int pix);
+void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int pix);
+void ARGBToYRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
+void BGRAToYRow_Any_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix);
+void ABGRToYRow_Any_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix);
+void RGBAToYRow_Any_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix);
+void RGB24ToYRow_Any_SSSE3(const uint8* src_rgb24, uint8* dst_y, int pix);
+void RAWToYRow_Any_SSSE3(const uint8* src_raw, uint8* dst_y, int pix);
+void ARGBToYRow_Any_NEON(const uint8* src_argb, uint8* dst_y, int pix);
+void BGRAToYRow_Any_NEON(const uint8* src_bgra, uint8* dst_y, int pix);
+void ABGRToYRow_Any_NEON(const uint8* src_abgr, uint8* dst_y, int pix);
+void RGBAToYRow_Any_NEON(const uint8* src_rgba, uint8* dst_y, int pix);
+void RGB24ToYRow_Any_NEON(const uint8* src_rgb24, uint8* dst_y, int pix);
+void RAWToYRow_Any_NEON(const uint8* src_raw, uint8* dst_y, int pix);
+void RGB565ToYRow_Any_NEON(const uint8* src_rgb565, uint8* dst_y, int pix);
+
+void ARGBToUVRow_SSSE3(const uint8* src_argb, int src_stride_argb,
+                       uint8* dst_u, uint8* dst_v, int width);
+void BGRAToUVRow_SSSE3(const uint8* src_bgra, int src_stride_bgra,
+                       uint8* dst_u, uint8* dst_v, int width);
+void ABGRToUVRow_SSSE3(const uint8* src_abgr, int src_stride_abgr,
+                       uint8* dst_u, uint8* dst_v, int width);
+void RGBAToUVRow_SSSE3(const uint8* src_rgba, int src_stride_rgba,
+                       uint8* dst_u, uint8* dst_v, int width);
+void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb, int src_stride_argb,
+                                 uint8* dst_u, uint8* dst_v, int width);
+void BGRAToUVRow_Unaligned_SSSE3(const uint8* src_bgra, int src_stride_bgra,
+                                 uint8* dst_u, uint8* dst_v, int width);
+void ABGRToUVRow_Unaligned_SSSE3(const uint8* src_abgr, int src_stride_abgr,
+                                 uint8* dst_u, uint8* dst_v, int width);
+void RGBAToUVRow_Unaligned_SSSE3(const uint8* src_rgba, int src_stride_rgba,
+                       uint8* dst_u, uint8* dst_v, int width);
+void ARGBToUVRow_Any_SSSE3(const uint8* src_argb, int src_stride_argb,
+                           uint8* dst_u, uint8* dst_v, int width);
+void BGRAToUVRow_Any_SSSE3(const uint8* src_bgra, int src_stride_bgra,
+                           uint8* dst_u, uint8* dst_v, int width);
+void ABGRToUVRow_Any_SSSE3(const uint8* src_abgr, int src_stride_abgr,
+                           uint8* dst_u, uint8* dst_v, int width);
+void RGBAToUVRow_Any_SSSE3(const uint8* src_rgba, int src_stride_rgba,
+                       uint8* dst_u, uint8* dst_v, int width);
+void ARGBToUVRow_C(const uint8* src_argb, int src_stride_argb,
+                   uint8* dst_u, uint8* dst_v, int width);
+void BGRAToUVRow_C(const uint8* src_bgra, int src_stride_bgra,
+                   uint8* dst_u, uint8* dst_v, int width);
+void ABGRToUVRow_C(const uint8* src_abgr, int src_stride_abgr,
+                   uint8* dst_u, uint8* dst_v, int width);
+void RGBAToUVRow_C(const uint8* src_rgba, int src_stride_rgba,
+                   uint8* dst_u, uint8* dst_v, int width);
+
+void ARGBToUV422Row_SSSE3(const uint8* src_argb,
+                          uint8* dst_u, uint8* dst_v, int width);
+void ARGBToUV422Row_Unaligned_SSSE3(const uint8* src_argb,
+                                    uint8* dst_u, uint8* dst_v, int width);
+void ARGBToUV422Row_Any_SSSE3(const uint8* src_argb,
+                              uint8* dst_u, uint8* dst_v, int width);
+
+void ARGBToUV444Row_C(const uint8* src_argb,
+                      uint8* dst_u, uint8* dst_v, int width);
+void ARGBToUV422Row_C(const uint8* src_argb,
+                      uint8* dst_u, uint8* dst_v, int width);
+void ARGBToUV411Row_C(const uint8* src_argb,
+                      uint8* dst_u, uint8* dst_v, int width);
 
 void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width);
 void MirrorRow_SSE2(const uint8* src, uint8* dst, int width);
 void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
 void MirrorRow_MIPS_DSPR2(const uint8* src, uint8* dst, int width);
-void MirrorRowUV_MIPS_DSPR2(const uint8* src, uint8* dst_u, uint8* dst_v,
+void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
                             int width);
 void MirrorRow_C(const uint8* src, uint8* dst, int width);
 
-void MirrorRowUV_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v, int width);
-void MirrorRowUV_NEON(const uint8* src, uint8* dst_u, uint8* dst_v, int width);
-void MirrorRowUV_C(const uint8* src, uint8* dst_u, uint8* dst_v, int width);
+void MirrorUVRow_SSSE3(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
+                       int width);
+void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
+                      int width);
+void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
+                   int width);
 
 void ARGBMirrorRow_SSSE3(const uint8* src, uint8* dst, int width);
 void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width);
@@ -414,43 +488,52 @@ void SetRows32_NEON(uint8* dst, uint32 v32, int width,
 void SetRow8_C(uint8* dst, uint32 v32, int count);
 void SetRows32_C(uint8* dst, uint32 v32, int width, int dst_stride, int height);
 
-void ARGBToYRow_C(const uint8* src_argb, uint8* dst_y, int pix);
-void BGRAToYRow_C(const uint8* src_argb, uint8* dst_y, int pix);
-void ABGRToYRow_C(const uint8* src_argb, uint8* dst_y, int pix);
-void RGBAToYRow_C(const uint8* src_argb, uint8* dst_y, int pix);
-
-void ARGBToUVRow_C(const uint8* src_argb0, int src_stride_argb,
-                   uint8* dst_u, uint8* dst_v, int width);
-void BGRAToUVRow_C(const uint8* src_argb0, int src_stride_argb,
-                   uint8* dst_u, uint8* dst_v, int width);
-void ABGRToUVRow_C(const uint8* src_argb0, int src_stride_argb,
-                   uint8* dst_u, uint8* dst_v, int width);
-void RGBAToUVRow_C(const uint8* src_argb0, int src_stride_argb,
-                   uint8* dst_u, uint8* dst_v, int width);
-
 void BGRAToARGBRow_SSSE3(const uint8* src_bgra, uint8* dst_argb, int pix);
 void ABGRToARGBRow_SSSE3(const uint8* src_abgr, uint8* dst_argb, int pix);
 void RGBAToARGBRow_SSSE3(const uint8* src_rgba, uint8* dst_argb, int pix);
 void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix);
-void RAWToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix);
-void RGB565ToARGBRow_SSE2(const uint8* src_argb, uint8* dst_argb, int pix);
-void ARGB1555ToARGBRow_SSE2(const uint8* src_argb, uint8* dst_argb, int pix);
-void ARGB4444ToARGBRow_SSE2(const uint8* src_argb, uint8* dst_argb, int pix);
-
+void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, int pix);
+void RGB565ToARGBRow_SSE2(const uint8* src_rgb565, uint8* dst_argb, int pix);
+void ARGB1555ToARGBRow_SSE2(const uint8* src_argb1555, uint8* dst_argb,
+                            int pix);
+void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444, uint8* dst_argb,
+                            int pix);
 void BGRAToARGBRow_NEON(const uint8* src_bgra, uint8* dst_argb, int pix);
 void ABGRToARGBRow_NEON(const uint8* src_abgr, uint8* dst_argb, int pix);
 void RGBAToARGBRow_NEON(const uint8* src_rgba, uint8* dst_argb, int pix);
 void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix);
-void RAWToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix);
-
+void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int pix);
+void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int pix);
+void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555, uint8* dst_argb,
+                            int pix);
+void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444, uint8* dst_argb,
+                            int pix);
 void BGRAToARGBRow_C(const uint8* src_bgra, uint8* dst_argb, int pix);
 void ABGRToARGBRow_C(const uint8* src_abgr, uint8* dst_argb, int pix);
 void RGBAToARGBRow_C(const uint8* src_rgba, uint8* dst_argb, int pix);
 void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int pix);
-void RAWToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int pix);
+void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int pix);
 void RGB565ToARGBRow_C(const uint8* src_rgb, uint8* dst_argb, int pix);
 void ARGB1555ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int pix);
 void ARGB4444ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int pix);
+void RGB24ToARGBRow_Any_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix);
+void RAWToARGBRow_Any_SSSE3(const uint8* src_raw, uint8* dst_argb, int pix);
+void RGB565ToARGBRow_Any_SSE2(const uint8* src_rgb565, uint8* dst_argb,
+                              int pix);
+void ARGB1555ToARGBRow_Any_SSE2(const uint8* src_argb1555, uint8* dst_argb,
+                                int pix);
+void ARGB4444ToARGBRow_Any_SSE2(const uint8* src_argb4444, uint8* dst_argb,
+                                int pix);
+void RGB24ToARGBRow_Any_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix);
+void RAWToARGBRow_Any_NEON(const uint8* src_raw, uint8* dst_argb, int pix);
+void RGB565ToARGBRow_Any_NEON(const uint8* src_rgb565, uint8* dst_argb,
+                              int pix);
+void ARGB1555ToARGBRow_Any_NEON(const uint8* src_argb1555, uint8* dst_argb,
+                                int pix);
+void ARGB4444ToARGBRow_Any_NEON(const uint8* src_argb4444, uint8* dst_argb,
+                                int pix);
+
+
 
 void ARGBToRGBARow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix);
 void ARGBToRGB24Row_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix);
@@ -480,301 +563,301 @@ void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int pix);
 void I400ToARGBRow_Any_SSE2(const uint8* src_y, uint8* dst_argb, int pix);
 void I400ToARGBRow_Any_NEON(const uint8* src_y, uint8* dst_argb, int pix);
 
-void I444ToARGBRow_C(const uint8* y_buf,
-                     const uint8* u_buf,
-                     const uint8* v_buf,
-                     uint8* argb_buf,
+void I444ToARGBRow_C(const uint8* src_y,
+                     const uint8* src_u,
+                     const uint8* src_v,
+                     uint8* dst_argb,
                      int width);
-void I422ToARGBRow_C(const uint8* y_buf,
-                     const uint8* u_buf,
-                     const uint8* v_buf,
-                     uint8* argb_buf,
+void I422ToARGBRow_C(const uint8* src_y,
+                     const uint8* src_u,
+                     const uint8* src_v,
+                     uint8* dst_argb,
                      int width);
-void I411ToARGBRow_C(const uint8* y_buf,
-                     const uint8* u_buf,
-                     const uint8* v_buf,
-                     uint8* rgb_buf,
+void I411ToARGBRow_C(const uint8* src_y,
+                     const uint8* src_u,
+                     const uint8* src_v,
+                     uint8* dst_argb,
                      int width);
-void NV12ToARGBRow_C(const uint8* y_buf,
-                     const uint8* uv_buf,
-                     uint8* argb_buf,
+void NV12ToARGBRow_C(const uint8* src_y,
+                     const uint8* src_uv,
+                     uint8* dst_argb,
                      int width);
-void NV21ToRGB565Row_C(const uint8* y_buf,
-                       const uint8* vu_buf,
-                       uint8* argb_buf,
+void NV21ToRGB565Row_C(const uint8* src_y,
+                       const uint8* src_vu,
+                       uint8* dst_argb,
                        int width);
-void NV12ToRGB565Row_C(const uint8* y_buf,
-                       const uint8* uv_buf,
-                       uint8* argb_buf,
+void NV12ToRGB565Row_C(const uint8* src_y,
+                       const uint8* src_uv,
+                       uint8* dst_argb,
                        int width);
-void NV21ToARGBRow_C(const uint8* y_buf,
-                     const uint8* vu_buf,
-                     uint8* argb_buf,
+void NV21ToARGBRow_C(const uint8* src_y,
+                     const uint8* src_vu,
+                     uint8* dst_argb,
                      int width);
-void YUY2ToARGBRow_C(const uint8* yuy2_buf,
-                     uint8* argb_buf,
+void YUY2ToARGBRow_C(const uint8* src_yuy2,
+                     uint8* dst_argb,
                      int width);
-void UYVYToARGBRow_C(const uint8* uyvy_buf,
-                     uint8* argb_buf,
+void UYVYToARGBRow_C(const uint8* src_uyvy,
+                     uint8* dst_argb,
                      int width);
-void I422ToBGRARow_C(const uint8* y_buf,
-                     const uint8* u_buf,
-                     const uint8* v_buf,
-                     uint8* bgra_buf,
+void I422ToBGRARow_C(const uint8* src_y,
+                     const uint8* src_u,
+                     const uint8* src_v,
+                     uint8* dst_bgra,
                      int width);
-void I422ToABGRRow_C(const uint8* y_buf,
-                     const uint8* u_buf,
-                     const uint8* v_buf,
-                     uint8* abgr_buf,
+void I422ToABGRRow_C(const uint8* src_y,
+                     const uint8* src_u,
+                     const uint8* src_v,
+                     uint8* dst_abgr,
                      int width);
-void I422ToRGBARow_C(const uint8* y_buf,
-                     const uint8* u_buf,
-                     const uint8* v_buf,
-                     uint8* rgba_buf,
+void I422ToRGBARow_C(const uint8* src_y,
+                     const uint8* src_u,
+                     const uint8* src_v,
+                     uint8* dst_rgba,
                      int width);
-void I422ToRGB24Row_C(const uint8* y_buf,
-                      const uint8* u_buf,
-                      const uint8* v_buf,
-                      uint8* rgb24_buf,
+void I422ToRGB24Row_C(const uint8* src_y,
+                      const uint8* src_u,
+                      const uint8* src_v,
+                      uint8* dst_rgb24,
                       int width);
-void I422ToRAWRow_C(const uint8* y_buf,
-                    const uint8* u_buf,
-                    const uint8* v_buf,
-                    uint8* raw_buf,
+void I422ToRAWRow_C(const uint8* src_y,
+                    const uint8* src_u,
+                    const uint8* src_v,
+                    uint8* dst_raw,
                     int width);
-void I422ToARGB4444Row_C(const uint8* y_buf,
-                         const uint8* u_buf,
-                         const uint8* v_buf,
-                         uint8* dst_rgb565,
+void I422ToARGB4444Row_C(const uint8* src_y,
+                         const uint8* src_u,
+                         const uint8* src_v,
+                         uint8* dst_argb4444,
                          int width);
-void I422ToARGB1555Row_C(const uint8* y_buf,
-                         const uint8* u_buf,
-                         const uint8* v_buf,
-                         uint8* dst_rgb565,
+void I422ToARGB1555Row_C(const uint8* src_y,
+                         const uint8* src_u,
+                         const uint8* src_v,
+                         uint8* dst_argb4444,
                          int width);
-void I422ToRGB565Row_C(const uint8* y_buf,
-                       const uint8* u_buf,
-                       const uint8* v_buf,
+void I422ToRGB565Row_C(const uint8* src_y,
+                       const uint8* src_u,
+                       const uint8* src_v,
                        uint8* dst_rgb565,
                        int width);
-void YToARGBRow_C(const uint8* y_buf,
-                  uint8* rgb_buf,
+void YToARGBRow_C(const uint8* src_y,
+                  uint8* dst_argb,
                   int width);
 
-void I444ToARGBRow_SSSE3(const uint8* y_buf,
-                         const uint8* u_buf,
-                         const uint8* v_buf,
-                         uint8* argb_buf,
+void I444ToARGBRow_SSSE3(const uint8* src_y,
+                         const uint8* src_u,
+                         const uint8* src_v,
+                         uint8* dst_argb,
                          int width);
-void I422ToARGBRow_SSSE3(const uint8* y_buf,
-                         const uint8* u_buf,
-                         const uint8* v_buf,
-                         uint8* argb_buf,
+void I422ToARGBRow_SSSE3(const uint8* src_y,
+                         const uint8* src_u,
+                         const uint8* src_v,
+                         uint8* dst_argb,
                          int width);
-void I411ToARGBRow_SSSE3(const uint8* y_buf,
-                         const uint8* u_buf,
-                         const uint8* v_buf,
-                         uint8* rgb_buf,
+void I411ToARGBRow_SSSE3(const uint8* src_y,
+                         const uint8* src_u,
+                         const uint8* src_v,
+                         uint8* dst_argb,
                          int width);
-void NV12ToARGBRow_SSSE3(const uint8* y_buf,
-                         const uint8* uv_buf,
-                         uint8* argb_buf,
+void NV12ToARGBRow_SSSE3(const uint8* src_y,
+                         const uint8* src_uv,
+                         uint8* dst_argb,
                          int width);
-void NV21ToARGBRow_SSSE3(const uint8* y_buf,
-                         const uint8* vu_buf,
-                         uint8* argb_buf,
+void NV21ToARGBRow_SSSE3(const uint8* src_y,
+                         const uint8* src_vu,
+                         uint8* dst_argb,
                          int width);
-void NV12ToRGB565Row_SSSE3(const uint8* y_buf,
-                           const uint8* uv_buf,
-                           uint8* argb_buf,
+void NV12ToRGB565Row_SSSE3(const uint8* src_y,
+                           const uint8* src_uv,
+                           uint8* dst_argb,
                            int width);
-void NV21ToRGB565Row_SSSE3(const uint8* y_buf,
-                           const uint8* vu_buf,
-                           uint8* argb_buf,
+void NV21ToRGB565Row_SSSE3(const uint8* src_y,
+                           const uint8* src_vu,
+                           uint8* dst_argb,
                            int width);
-void YUY2ToARGBRow_SSSE3(const uint8* yuy2_buf,
-                         uint8* argb_buf,
+void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2,
+                         uint8* dst_argb,
                          int width);
-void UYVYToARGBRow_SSSE3(const uint8* uyvy_buf,
-                         uint8* argb_buf,
+void UYVYToARGBRow_SSSE3(const uint8* src_uyvy,
+                         uint8* dst_argb,
                          int width);
-void I422ToBGRARow_SSSE3(const uint8* y_buf,
-                         const uint8* u_buf,
-                         const uint8* v_buf,
-                         uint8* bgra_buf,
+void I422ToBGRARow_SSSE3(const uint8* src_y,
+                         const uint8* src_u,
+                         const uint8* src_v,
+                         uint8* dst_bgra,
                          int width);
-void I422ToABGRRow_SSSE3(const uint8* y_buf,
-                         const uint8* u_buf,
-                         const uint8* v_buf,
-                         uint8* abgr_buf,
+void I422ToABGRRow_SSSE3(const uint8* src_y,
+                         const uint8* src_u,
+                         const uint8* src_v,
+                         uint8* dst_abgr,
                          int width);
-void I422ToRGBARow_SSSE3(const uint8* y_buf,
-                         const uint8* u_buf,
-                         const uint8* v_buf,
-                         uint8* rgba_buf,
+void I422ToRGBARow_SSSE3(const uint8* src_y,
+                         const uint8* src_u,
+                         const uint8* src_v,
+                         uint8* dst_rgba,
                          int width);
-void I422ToARGB4444Row_SSSE3(const uint8* y_buf,
-                             const uint8* u_buf,
-                             const uint8* v_buf,
-                             uint8* rgb_buf,
+void I422ToARGB4444Row_SSSE3(const uint8* src_y,
+                             const uint8* src_u,
+                             const uint8* src_v,
+                             uint8* dst_argb,
                              int width);
-void I422ToARGB1555Row_SSSE3(const uint8* y_buf,
-                             const uint8* u_buf,
-                             const uint8* v_buf,
-                             uint8* rgb_buf,
+void I422ToARGB1555Row_SSSE3(const uint8* src_y,
+                             const uint8* src_u,
+                             const uint8* src_v,
+                             uint8* dst_argb,
                              int width);
-void I422ToRGB565Row_SSSE3(const uint8* y_buf,
-                           const uint8* u_buf,
-                           const uint8* v_buf,
-                           uint8* rgb_buf,
+void I422ToRGB565Row_SSSE3(const uint8* src_y,
+                           const uint8* src_u,
+                           const uint8* src_v,
+                           uint8* dst_argb,
                            int width);
 // RGB24/RAW are unaligned.
-void I422ToRGB24Row_SSSE3(const uint8* y_buf,
-                          const uint8* u_buf,
-                          const uint8* v_buf,
-                          uint8* rgb_buf,
+void I422ToRGB24Row_SSSE3(const uint8* src_y,
+                          const uint8* src_u,
+                          const uint8* src_v,
+                          uint8* dst_rgb24,
                           int width);
-void I422ToRAWRow_SSSE3(const uint8* y_buf,
-                        const uint8* u_buf,
-                        const uint8* v_buf,
-                        uint8* rgb_buf,
+void I422ToRAWRow_SSSE3(const uint8* src_y,
+                        const uint8* src_u,
+                        const uint8* src_v,
+                        uint8* dst_raw,
                         int width);
 
-void I444ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
-                                   const uint8* u_buf,
-                                   const uint8* v_buf,
-                                   uint8* argb_buf,
+void I444ToARGBRow_Unaligned_SSSE3(const uint8* src_y,
+                                   const uint8* src_u,
+                                   const uint8* src_v,
+                                   uint8* dst_argb,
                                    int width);
-void I422ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
-                                   const uint8* u_buf,
-                                   const uint8* v_buf,
-                                   uint8* argb_buf,
+void I422ToARGBRow_Unaligned_SSSE3(const uint8* src_y,
+                                   const uint8* src_u,
+                                   const uint8* src_v,
+                                   uint8* dst_argb,
                                    int width);
-void I411ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
-                                   const uint8* u_buf,
-                                   const uint8* v_buf,
-                                   uint8* rgb_buf,
+void I411ToARGBRow_Unaligned_SSSE3(const uint8* src_y,
+                                   const uint8* src_u,
+                                   const uint8* src_v,
+                                   uint8* dst_argb,
                                    int width);
-void NV12ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
-                                   const uint8* uv_buf,
-                                   uint8* argb_buf,
+void NV12ToARGBRow_Unaligned_SSSE3(const uint8* src_y,
+                                   const uint8* src_uv,
+                                   uint8* dst_argb,
                                    int width);
-void NV21ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
-                                   const uint8* vu_buf,
-                                   uint8* argb_buf,
+void NV21ToARGBRow_Unaligned_SSSE3(const uint8* src_y,
+                                   const uint8* src_vu,
+                                   uint8* dst_argb,
                                    int width);
-void YUY2ToARGBRow_Unaligned_SSSE3(const uint8* yuy2_buf,
-                                   uint8* argb_buf,
+void YUY2ToARGBRow_Unaligned_SSSE3(const uint8* src_yuy2,
+                                   uint8* dst_argb,
                                    int width);
-void UYVYToARGBRow_Unaligned_SSSE3(const uint8* uyvy_buf,
-                                   uint8* argb_buf,
+void UYVYToARGBRow_Unaligned_SSSE3(const uint8* src_uyvy,
+                                   uint8* dst_argb,
                                    int width);
-void I422ToBGRARow_Unaligned_SSSE3(const uint8* y_buf,
-                                   const uint8* u_buf,
-                                   const uint8* v_buf,
-                                   uint8* bgra_buf,
+void I422ToBGRARow_Unaligned_SSSE3(const uint8* src_y,
+                                   const uint8* src_u,
+                                   const uint8* src_v,
+                                   uint8* dst_bgra,
                                    int width);
-void I422ToABGRRow_Unaligned_SSSE3(const uint8* y_buf,
-                                   const uint8* u_buf,
-                                   const uint8* v_buf,
-                                   uint8* abgr_buf,
+void I422ToABGRRow_Unaligned_SSSE3(const uint8* src_y,
+                                   const uint8* src_u,
+                                   const uint8* src_v,
+                                   uint8* dst_abgr,
                                    int width);
-void I422ToRGBARow_Unaligned_SSSE3(const uint8* y_buf,
-                                   const uint8* u_buf,
-                                   const uint8* v_buf,
-                                   uint8* rgba_buf,
+void I422ToRGBARow_Unaligned_SSSE3(const uint8* src_y,
+                                   const uint8* src_u,
+                                   const uint8* src_v,
+                                   uint8* dst_rgba,
                                    int width);
-void I444ToARGBRow_Any_SSSE3(const uint8* y_buf,
-                             const uint8* u_buf,
-                             const uint8* v_buf,
-                             uint8* argb_buf,
+void I444ToARGBRow_Any_SSSE3(const uint8* src_y,
+                             const uint8* src_u,
+                             const uint8* src_v,
+                             uint8* dst_argb,
                              int width);
-void I422ToARGBRow_Any_SSSE3(const uint8* y_buf,
-                             const uint8* u_buf,
-                             const uint8* v_buf,
-                             uint8* argb_buf,
+void I422ToARGBRow_Any_SSSE3(const uint8* src_y,
+                             const uint8* src_u,
+                             const uint8* src_v,
+                             uint8* dst_argb,
                              int width);
-void I411ToARGBRow_Any_SSSE3(const uint8* y_buf,
-                             const uint8* u_buf,
-                             const uint8* v_buf,
-                             uint8* rgb_buf,
+void I411ToARGBRow_Any_SSSE3(const uint8* src_y,
+                             const uint8* src_u,
+                             const uint8* src_v,
+                             uint8* dst_argb,
                              int width);
-void NV12ToARGBRow_Any_SSSE3(const uint8* y_buf,
-                             const uint8* uv_buf,
-                             uint8* argb_buf,
+void NV12ToARGBRow_Any_SSSE3(const uint8* src_y,
+                             const uint8* src_uv,
+                             uint8* dst_argb,
                              int width);
-void NV21ToARGBRow_Any_SSSE3(const uint8* y_buf,
-                             const uint8* vu_buf,
-                             uint8* argb_buf,
+void NV21ToARGBRow_Any_SSSE3(const uint8* src_y,
+                             const uint8* src_vu,
+                             uint8* dst_argb,
                              int width);
-void NV12ToRGB565Row_Any_SSSE3(const uint8* y_buf,
-                               const uint8* uv_buf,
-                               uint8* argb_buf,
+void NV12ToRGB565Row_Any_SSSE3(const uint8* src_y,
+                               const uint8* src_uv,
+                               uint8* dst_argb,
                                int width);
-void NV21ToRGB565Row_Any_SSSE3(const uint8* y_buf,
-                               const uint8* vu_buf,
-                               uint8* argb_buf,
+void NV21ToRGB565Row_Any_SSSE3(const uint8* src_y,
+                               const uint8* src_vu,
+                               uint8* dst_argb,
                                int width);
-void YUY2ToARGBRow_Any_SSSE3(const uint8* yuy2_buf,
-                             uint8* argb_buf,
+void YUY2ToARGBRow_Any_SSSE3(const uint8* src_yuy2,
+                             uint8* dst_argb,
                              int width);
-void UYVYToARGBRow_Any_SSSE3(const uint8* uyvy_buf,
-                             uint8* argb_buf,
+void UYVYToARGBRow_Any_SSSE3(const uint8* src_uyvy,
+                             uint8* dst_argb,
                              int width);
-void I422ToBGRARow_Any_SSSE3(const uint8* y_buf,
-                             const uint8* u_buf,
-                             const uint8* v_buf,
-                             uint8* bgra_buf,
+void I422ToBGRARow_Any_SSSE3(const uint8* src_y,
+                             const uint8* src_u,
+                             const uint8* src_v,
+                             uint8* dst_bgra,
                              int width);
-void I422ToABGRRow_Any_SSSE3(const uint8* y_buf,
-                             const uint8* u_buf,
-                             const uint8* v_buf,
-                             uint8* abgr_buf,
+void I422ToABGRRow_Any_SSSE3(const uint8* src_y,
+                             const uint8* src_u,
+                             const uint8* src_v,
+                             uint8* dst_abgr,
                              int width);
-void I422ToRGBARow_Any_SSSE3(const uint8* y_buf,
-                             const uint8* u_buf,
-                             const uint8* v_buf,
-                             uint8* rgba_buf,
+void I422ToRGBARow_Any_SSSE3(const uint8* src_y,
+                             const uint8* src_u,
+                             const uint8* src_v,
+                             uint8* dst_rgba,
                              int width);
-void I422ToARGB4444Row_Any_SSSE3(const uint8* y_buf,
-                                 const uint8* u_buf,
-                                 const uint8* v_buf,
-                                 uint8* rgba_buf,
+void I422ToARGB4444Row_Any_SSSE3(const uint8* src_y,
+                                 const uint8* src_u,
+                                 const uint8* src_v,
+                                 uint8* dst_rgba,
                                  int width);
-void I422ToARGB1555Row_Any_SSSE3(const uint8* y_buf,
-                                 const uint8* u_buf,
-                                 const uint8* v_buf,
-                                 uint8* rgba_buf,
+void I422ToARGB1555Row_Any_SSSE3(const uint8* src_y,
+                                 const uint8* src_u,
+                                 const uint8* src_v,
+                                 uint8* dst_rgba,
                                  int width);
-void I422ToRGB565Row_Any_SSSE3(const uint8* y_buf,
-                               const uint8* u_buf,
-                               const uint8* v_buf,
-                               uint8* rgba_buf,
+void I422ToRGB565Row_Any_SSSE3(const uint8* src_y,
+                               const uint8* src_u,
+                               const uint8* src_v,
+                               uint8* dst_rgba,
                                int width);
 // RGB24/RAW are unaligned.
-void I422ToRGB24Row_Any_SSSE3(const uint8* y_buf,
-                              const uint8* u_buf,
-                              const uint8* v_buf,
-                              uint8* rgb_buf,
+void I422ToRGB24Row_Any_SSSE3(const uint8* src_y,
+                              const uint8* src_u,
+                              const uint8* src_v,
+                              uint8* dst_argb,
                               int width);
-void I422ToRAWRow_Any_SSSE3(const uint8* y_buf,
-                            const uint8* u_buf,
-                            const uint8* v_buf,
-                            uint8* rgb_buf,
+void I422ToRAWRow_Any_SSSE3(const uint8* src_y,
+                            const uint8* src_u,
+                            const uint8* src_v,
+                            uint8* dst_argb,
                             int width);
-void YToARGBRow_SSE2(const uint8* y_buf,
-                     uint8* argb_buf,
+void YToARGBRow_SSE2(const uint8* src_y,
+                     uint8* dst_argb,
                      int width);
-void YToARGBRow_NEON(const uint8* y_buf,
-                     uint8* argb_buf,
+void YToARGBRow_NEON(const uint8* src_y,
+                     uint8* dst_argb,
                      int width);
 
 // ARGB preattenuated alpha blend.
-void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
+void ARGBBlendRow_SSSE3(const uint8* src_argb, const uint8* src_argb1,
                         uint8* dst_argb, int width);
-void ARGBBlendRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
+void ARGBBlendRow_SSE2(const uint8* src_argb, const uint8* src_argb1,
                        uint8* dst_argb, int width);
-void ARGBBlendRow_C(const uint8* src_argb0, const uint8* src_argb1,
+void ARGBBlendRow_C(const uint8* src_argb, const uint8* src_argb1,
                     uint8* dst_argb, int width);
 
 void ARGBToRGB24Row_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix);
@@ -789,126 +872,112 @@ void ARGBToRGB565Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
 void ARGBToARGB1555Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
 void ARGBToARGB4444Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
 
-void ARGBToYRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
-void BGRAToYRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
-void ABGRToYRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
-void RGBAToYRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
-void ARGBToUVRow_Any_SSSE3(const uint8* src_argb0, int src_stride_argb,
-                           uint8* dst_u, uint8* dst_v, int width);
-void BGRAToUVRow_Any_SSSE3(const uint8* src_argb0, int src_stride_argb,
-                           uint8* dst_u, uint8* dst_v, int width);
-void ABGRToUVRow_Any_SSSE3(const uint8* src_argb0, int src_stride_argb,
-                           uint8* dst_u, uint8* dst_v, int width);
-void RGBAToUVRow_Any_SSSE3(const uint8* src_argb0, int src_stride_argb,
-                           uint8* dst_u, uint8* dst_v, int width);
-void ARGBToYRow_Any_NEON(const uint8* src_argb, uint8* dst_y, int pix);
-
-void I444ToARGBRow_Any_NEON(const uint8* y_buf,
-                            const uint8* u_buf,
-                            const uint8* v_buf,
-                            uint8* rgb_buf,
+void I444ToARGBRow_Any_NEON(const uint8* src_y,
+                            const uint8* src_u,
+                            const uint8* src_v,
+                            uint8* dst_argb,
                             int width);
-void I422ToARGBRow_Any_NEON(const uint8* y_buf,
-                            const uint8* u_buf,
-                            const uint8* v_buf,
-                            uint8* rgb_buf,
+void I422ToARGBRow_Any_NEON(const uint8* src_y,
+                            const uint8* src_u,
+                            const uint8* src_v,
+                            uint8* dst_argb,
                             int width);
-void I411ToARGBRow_Any_NEON(const uint8* y_buf,
-                            const uint8* u_buf,
-                            const uint8* v_buf,
-                            uint8* rgb_buf,
+void I411ToARGBRow_Any_NEON(const uint8* src_y,
+                            const uint8* src_u,
+                            const uint8* src_v,
+                            uint8* dst_argb,
                             int width);
-void I422ToBGRARow_Any_NEON(const uint8* y_buf,
-                            const uint8* u_buf,
-                            const uint8* v_buf,
-                            uint8* rgb_buf,
+void I422ToBGRARow_Any_NEON(const uint8* src_y,
+                            const uint8* src_u,
+                            const uint8* src_v,
+                            uint8* dst_argb,
                             int width);
-void I422ToABGRRow_Any_NEON(const uint8* y_buf,
-                            const uint8* u_buf,
-                            const uint8* v_buf,
-                            uint8* rgb_buf,
+void I422ToABGRRow_Any_NEON(const uint8* src_y,
+                            const uint8* src_u,
+                            const uint8* src_v,
+                            uint8* dst_argb,
                             int width);
-void I422ToRGBARow_Any_NEON(const uint8* y_buf,
-                            const uint8* u_buf,
-                            const uint8* v_buf,
-                            uint8* rgb_buf,
+void I422ToRGBARow_Any_NEON(const uint8* src_y,
+                            const uint8* src_u,
+                            const uint8* src_v,
+                            uint8* dst_argb,
                             int width);
-void I422ToRGB24Row_Any_NEON(const uint8* y_buf,
-                             const uint8* u_buf,
-                             const uint8* v_buf,
-                             uint8* rgb_buf,
+void I422ToRGB24Row_Any_NEON(const uint8* src_y,
+                             const uint8* src_u,
+                             const uint8* src_v,
+                             uint8* dst_argb,
                              int width);
-void I422ToRAWRow_Any_NEON(const uint8* y_buf,
-                           const uint8* u_buf,
-                           const uint8* v_buf,
-                           uint8* rgb_buf,
+void I422ToRAWRow_Any_NEON(const uint8* src_y,
+                           const uint8* src_u,
+                           const uint8* src_v,
+                           uint8* dst_argb,
                            int width);
-void I422ToARGB4444Row_Any_NEON(const uint8* y_buf,
-                                const uint8* u_buf,
-                                const uint8* v_buf,
-                                uint8* rgb_buf,
+void I422ToARGB4444Row_Any_NEON(const uint8* src_y,
+                                const uint8* src_u,
+                                const uint8* src_v,
+                                uint8* dst_argb,
                                 int width);
-void I422ToARGB1555Row_Any_NEON(const uint8* y_buf,
-                                const uint8* u_buf,
-                                const uint8* v_buf,
-                                uint8* rgb_buf,
+void I422ToARGB1555Row_Any_NEON(const uint8* src_y,
+                                const uint8* src_u,
+                                const uint8* src_v,
+                                uint8* dst_argb,
                                 int width);
-void I422ToRGB565Row_Any_NEON(const uint8* y_buf,
-                              const uint8* u_buf,
-                              const uint8* v_buf,
-                              uint8* rgb_buf,
+void I422ToRGB565Row_Any_NEON(const uint8* src_y,
+                              const uint8* src_u,
+                              const uint8* src_v,
+                              uint8* dst_argb,
                               int width);
-void NV12ToARGBRow_Any_NEON(const uint8* y_buf,
-                            const uint8* uv_buf,
-                            uint8* argb_buf,
+void NV12ToARGBRow_Any_NEON(const uint8* src_y,
+                            const uint8* src_uv,
+                            uint8* dst_argb,
                             int width);
-void NV21ToARGBRow_Any_NEON(const uint8* y_buf,
-                            const uint8* uv_buf,
-                            uint8* argb_buf,
+void NV21ToARGBRow_Any_NEON(const uint8* src_y,
+                            const uint8* src_uv,
+                            uint8* dst_argb,
                             int width);
-void NV12ToRGB565Row_Any_NEON(const uint8* y_buf,
-                              const uint8* uv_buf,
-                              uint8* argb_buf,
+void NV12ToRGB565Row_Any_NEON(const uint8* src_y,
+                              const uint8* src_uv,
+                              uint8* dst_argb,
                               int width);
-void NV21ToRGB565Row_Any_NEON(const uint8* y_buf,
-                              const uint8* uv_buf,
-                              uint8* argb_buf,
+void NV21ToRGB565Row_Any_NEON(const uint8* src_y,
+                              const uint8* src_uv,
+                              uint8* dst_argb,
                               int width);
-void YUY2ToARGBRow_Any_NEON(const uint8* yuy2_buf,
-                            uint8* argb_buf,
+void YUY2ToARGBRow_Any_NEON(const uint8* src_yuy2,
+                            uint8* dst_argb,
                             int width);
-void UYVYToARGBRow_Any_NEON(const uint8* uyvy_buf,
-                            uint8* argb_buf,
+void UYVYToARGBRow_Any_NEON(const uint8* src_uyvy,
+                            uint8* dst_argb,
                             int width);
-void I422ToARGBRow_MIPS_DSPR2(const uint8* y_buf,
-                              const uint8* u_buf,
-                              const uint8* v_buf,
-                              uint8* rgb_buf,
+void I422ToARGBRow_MIPS_DSPR2(const uint8* src_y,
+                              const uint8* src_u,
+                              const uint8* src_v,
+                              uint8* dst_argb,
                               int width);
-void I422ToBGRARow_MIPS_DSPR2(const uint8* y_buf,
-                              const uint8* u_buf,
-                              const uint8* v_buf,
-                              uint8* rgb_buf,
+void I422ToBGRARow_MIPS_DSPR2(const uint8* src_y,
+                              const uint8* src_u,
+                              const uint8* src_v,
+                              uint8* dst_argb,
                               int width);
-void I422ToABGRRow_MIPS_DSPR2(const uint8* y_buf,
-                              const uint8* u_buf,
-                              const uint8* v_buf,
-                              uint8* rgb_buf,
+void I422ToABGRRow_MIPS_DSPR2(const uint8* src_y,
+                              const uint8* src_u,
+                              const uint8* src_v,
+                              uint8* dst_argb,
                               int width);
-void I422ToARGBRow_MIPS_DSPR2(const uint8* y_buf,
-                              const uint8* u_buf,
-                              const uint8* v_buf,
-                              uint8* rgb_buf,
+void I422ToARGBRow_MIPS_DSPR2(const uint8* src_y,
+                              const uint8* src_u,
+                              const uint8* src_v,
+                              uint8* dst_argb,
                               int width);
-void I422ToBGRARow_MIPS_DSPR2(const uint8* y_buf,
-                              const uint8* u_buf,
-                              const uint8* v_buf,
-                              uint8* rgb_buf,
+void I422ToBGRARow_MIPS_DSPR2(const uint8* src_y,
+                              const uint8* src_u,
+                              const uint8* src_v,
+                              uint8* dst_argb,
                               int width);
-void I422ToABGRRow_MIPS_DSPR2(const uint8* y_buf,
-                              const uint8* u_buf,
-                              const uint8* v_buf,
-                              uint8* rgb_buf,
+void I422ToABGRRow_MIPS_DSPR2(const uint8* src_y,
+                              const uint8* src_u,
+                              const uint8* src_v,
+                              uint8* dst_argb,
                               int width);
 
 void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix);
@@ -976,6 +1045,62 @@ void UYVYToUVRow_Any_NEON(const uint8* src_uyvy, int stride_uyvy,
 void UYVYToUV422Row_Any_NEON(const uint8* src_uyvy,
                              uint8* dst_u, uint8* dst_v, int pix);
 
+void HalfRow_C(const uint8* src_uv, int src_uv_stride,
+               uint8* dst_uv, int pix);
+void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride,
+                  uint8* dst_uv, int pix);
+void HalfRow_NEON(const uint8* src_uv, int src_uv_stride,
+                  uint8* dst_uv, int pix);
+
+void ARGBToBayerRow_C(const uint8* src_argb,
+                      uint8* dst_bayer, uint32 selector, int pix);
+void ARGBToBayerRow_SSSE3(const uint8* src_argb,
+                          uint8* dst_bayer, uint32 selector, int pix);
+void ARGBToBayerRow_NEON(const uint8* src_argb,
+                         uint8* dst_bayer, uint32 selector, int pix);
+
+void I422ToYUY2Row_C(const uint8* src_y,
+                     const uint8* src_u,
+                     const uint8* src_v,
+                     uint8* dst_yuy2, int width);
+void I422ToUYVYRow_C(const uint8* src_y,
+                     const uint8* src_u,
+                     const uint8* src_v,
+                     uint8* dst_uyvy, int width);
+void I422ToYUY2Row_SSE2(const uint8* src_y,
+                        const uint8* src_u,
+                        const uint8* src_v,
+                        uint8* dst_yuy2, int width);
+void I422ToUYVYRow_SSE2(const uint8* src_y,
+                        const uint8* src_u,
+                        const uint8* src_v,
+                        uint8* dst_uyvy, int width);
+void I422ToYUY2Row_Any_SSE2(const uint8* src_y,
+                            const uint8* src_u,
+                            const uint8* src_v,
+                            uint8* dst_yuy2, int width);
+void I422ToUYVYRow_Any_SSE2(const uint8* src_y,
+                            const uint8* src_u,
+                            const uint8* src_v,
+                            uint8* dst_uyvy, int width);
+void I422ToYUY2Row_NEON(const uint8* src_y,
+                        const uint8* src_u,
+                        const uint8* src_v,
+                        uint8* dst_yuy2, int width);
+void I422ToUYVYRow_NEON(const uint8* src_y,
+                        const uint8* src_u,
+                        const uint8* src_v,
+                        uint8* dst_uyvy, int width);
+void I422ToYUY2Row_Any_NEON(const uint8* src_y,
+                            const uint8* src_u,
+                            const uint8* src_v,
+                            uint8* dst_yuy2, int width);
+void I422ToUYVYRow_Any_NEON(const uint8* src_y,
+                            const uint8* src_u,
+                            const uint8* src_v,
+                            uint8* dst_uyvy, int width);
+
+// Effects related row functions.
 void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width);
 void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width);
 void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width);
@@ -1026,68 +1151,13 @@ LIBYUV_API
 void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
                         uint8* dst_argb, const float* uv_dudv, int width);
 
-void ARGBInterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr,
-                          ptrdiff_t src_stride,
+void ARGBInterpolateRow_C(uint8* dst_argb, const uint8* src_argb,
+                          ptrdiff_t src_stride_argb,
                           int dst_width, int source_y_fraction);
-void ARGBInterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
-                              ptrdiff_t src_stride, int dst_width,
+void ARGBInterpolateRow_SSSE3(uint8* dst_argb, const uint8* src_argb,
+                              ptrdiff_t src_stride_argb, int dst_width,
                               int source_y_fraction);
 
-void HalfRow_C(const uint8* src_uv, int src_uv_stride,
-               uint8* dst_uv, int pix);
-void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride,
-                  uint8* dst_uv, int pix);
-void HalfRow_NEON(const uint8* src_uv, int src_uv_stride,
-                  uint8* dst_uv, int pix);
-
-void ARGBToBayerRow_C(const uint8* src_argb,
-                      uint8* dst_bayer, uint32 selector, int pix);
-void ARGBToBayerRow_SSSE3(const uint8* src_argb,
-                          uint8* dst_bayer, uint32 selector, int pix);
-void ARGBToBayerRow_NEON(const uint8* src_argb,
-                         uint8* dst_bayer, uint32 selector, int pix);
-
-void I422ToYUY2Row_C(const uint8* src_y,
-                     const uint8* src_u,
-                     const uint8* src_v,
-                     uint8* dst_frame, int width);
-void I422ToUYVYRow_C(const uint8* src_y,
-                     const uint8* src_u,
-                     const uint8* src_v,
-                     uint8* dst_frame, int width);
-void I422ToYUY2Row_SSE2(const uint8* src_y,
-                        const uint8* src_u,
-                        const uint8* src_v,
-                        uint8* dst_frame, int width);
-void I422ToUYVYRow_SSE2(const uint8* src_y,
-                        const uint8* src_u,
-                        const uint8* src_v,
-                        uint8* dst_frame, int width);
-void I422ToYUY2Row_Any_SSE2(const uint8* src_y,
-                            const uint8* src_u,
-                            const uint8* src_v,
-                            uint8* dst_frame, int width);
-void I422ToUYVYRow_Any_SSE2(const uint8* src_y,
-                            const uint8* src_u,
-                            const uint8* src_v,
-                            uint8* dst_frame, int width);
-void I422ToYUY2Row_NEON(const uint8* src_y,
-                        const uint8* src_u,
-                        const uint8* src_v,
-                        uint8* dst_frame, int width);
-void I422ToUYVYRow_NEON(const uint8* src_y,
-                        const uint8* src_u,
-                        const uint8* src_v,
-                        uint8* dst_frame, int width);
-void I422ToYUY2Row_Any_NEON(const uint8* src_y,
-                            const uint8* src_u,
-                            const uint8* src_v,
-                            uint8* dst_frame, int width);
-void I422ToUYVYRow_Any_NEON(const uint8* src_y,
-                            const uint8* src_u,
-                            const uint8* src_v,
-                            uint8* dst_frame, int width);
-
 #ifdef __cplusplus
 }  // extern "C"
 }  // namespace libyuv
diff --git a/include/libyuv/scale.h b/include/libyuv/scale.h
index 18098798b..7ced534b5 100644
--- a/include/libyuv/scale.h
+++ b/include/libyuv/scale.h
@@ -66,8 +66,8 @@ int Scale(const uint8* src_y, const uint8* src_u, const uint8* src_v,
 
 // Legacy API.  Deprecated.
 LIBYUV_API
-int ScaleOffset(const uint8* src, int src_width, int src_height,
-                uint8* dst, int dst_width, int dst_height, int dst_yoffset,
+int ScaleOffset(const uint8* src_i420, int src_width, int src_height,
+                uint8* dst_i420, int dst_width, int dst_height, int dst_yoffset,
                 bool interpolate);
 
 // For testing, allow disabling of specialized scalers.
diff --git a/include/libyuv/version.h b/include/libyuv/version.h
index f4dfeef6e..d485c0f21 100644
--- a/include/libyuv/version.h
+++ b/include/libyuv/version.h
@@ -11,6 +11,6 @@
 #ifndef INCLUDE_LIBYUV_VERSION_H_  // NOLINT
 #define INCLUDE_LIBYUV_VERSION_H_
 
-#define LIBYUV_VERSION 470
+#define LIBYUV_VERSION 471
 
 #endif  // INCLUDE_LIBYUV_VERSION_H_  NOLINT
diff --git a/source/convert.cc b/source/convert.cc
index 02e56ecbc..5efefc3a3 100644
--- a/source/convert.cc
+++ b/source/convert.cc
@@ -533,11 +533,9 @@ int Q420ToI420(const uint8* src_y, int src_stride_y,
   void (*YUY2ToYRow)(const uint8* src_yuy2, uint8* dst_y, int pix) =
       YUY2ToYRow_C;
 #if defined(HAS_YUY2TOYROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2)) {
-    if (width > 16) {
-      YUY2ToUV422Row = YUY2ToUV422Row_Any_SSE2;
-      YUY2ToYRow = YUY2ToYRow_Any_SSE2;
-    }
+  if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
+    YUY2ToUV422Row = YUY2ToUV422Row_Any_SSE2;
+    YUY2ToYRow = YUY2ToYRow_Any_SSE2;
     if (IS_ALIGNED(width, 16)) {
       YUY2ToUV422Row = YUY2ToUV422Row_Unaligned_SSE2;
       YUY2ToYRow = YUY2ToYRow_Unaligned_SSE2;
@@ -550,12 +548,10 @@ int Q420ToI420(const uint8* src_y, int src_stride_y,
     }
   }
 #elif defined(HAS_YUY2TOYROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
-    if (width > 8) {
-      YUY2ToYRow = YUY2ToYRow_Any_NEON;
-      if (width > 16) {
-        YUY2ToUV422Row = YUY2ToUV422Row_Any_NEON;
-      }
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    YUY2ToYRow = YUY2ToYRow_Any_NEON;
+    if (width >= 16) {
+      YUY2ToUV422Row = YUY2ToUV422Row_Any_NEON;
     }
     if (IS_ALIGNED(width, 16)) {
       YUY2ToYRow = YUY2ToYRow_NEON;
@@ -656,11 +652,9 @@ int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2,
   YUY2ToYRow = YUY2ToYRow_C;
   YUY2ToUVRow = YUY2ToUVRow_C;
 #if defined(HAS_YUY2TOYROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2)) {
-    if (width > 16) {
-      YUY2ToUVRow = YUY2ToUVRow_Any_SSE2;
-      YUY2ToYRow = YUY2ToYRow_Any_SSE2;
-    }
+  if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
+    YUY2ToUVRow = YUY2ToUVRow_Any_SSE2;
+    YUY2ToYRow = YUY2ToYRow_Any_SSE2;
     if (IS_ALIGNED(width, 16)) {
       YUY2ToUVRow = YUY2ToUVRow_Unaligned_SSE2;
       YUY2ToYRow = YUY2ToYRow_Unaligned_SSE2;
@@ -673,12 +667,10 @@ int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2,
     }
   }
 #elif defined(HAS_YUY2TOYROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
-    if (width > 8) {
-      YUY2ToYRow = YUY2ToYRow_Any_NEON;
-      if (width > 16) {
-        YUY2ToUVRow = YUY2ToUVRow_Any_NEON;
-      }
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    YUY2ToYRow = YUY2ToYRow_Any_NEON;
+    if (width >= 16) {
+      YUY2ToUVRow = YUY2ToUVRow_Any_NEON;
     }
     if (IS_ALIGNED(width, 16)) {
       YUY2ToYRow = YUY2ToYRow_NEON;
@@ -723,11 +715,9 @@ int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy,
   UYVYToYRow = UYVYToYRow_C;
   UYVYToUVRow = UYVYToUVRow_C;
 #if defined(HAS_UYVYTOYROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2)) {
-    if (width > 16) {
-      UYVYToUVRow = UYVYToUVRow_Any_SSE2;
-      UYVYToYRow = UYVYToYRow_Any_SSE2;
-    }
+  if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
+    UYVYToUVRow = UYVYToUVRow_Any_SSE2;
+    UYVYToYRow = UYVYToYRow_Any_SSE2;
     if (IS_ALIGNED(width, 16)) {
       UYVYToUVRow = UYVYToUVRow_Unaligned_SSE2;
       UYVYToYRow = UYVYToYRow_Unaligned_SSE2;
@@ -740,12 +730,10 @@ int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy,
     }
   }
 #elif defined(HAS_UYVYTOYROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
-    if (width > 8) {
-      UYVYToYRow = UYVYToYRow_Any_NEON;
-      if (width > 16) {
-        UYVYToUVRow = UYVYToUVRow_Any_NEON;
-      }
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    UYVYToYRow = UYVYToYRow_Any_NEON;
+    if (width >= 16) {
+      UYVYToUVRow = UYVYToUVRow_Any_NEON;
     }
     if (IS_ALIGNED(width, 16)) {
       UYVYToYRow = UYVYToYRow_NEON;
@@ -827,10 +815,9 @@ int V210ToI420(const uint8* src_v210, int src_stride_v210,
                uint8* dst_u, int dst_stride_u,
                uint8* dst_v, int dst_stride_v,
                int width, int height) {
-  if (width * 2 * 2 > kMaxStride) {  // 2 rows of UYVY are required.
-    return -1;
-  } else if (!src_v210 || !dst_y || !dst_u || !dst_v ||
-             width <= 0 || height == 0) {
+  if (!src_v210 || !dst_y || !dst_u || !dst_v ||
+      width <= 0 || height == 0 ||
+      width * 2 * 2 > kMaxStride) {
     return -1;
   }
   // Negative height means invert the image.
@@ -858,12 +845,10 @@ int V210ToI420(const uint8* src_v210, int src_stride_v210,
     }
   }
 #elif defined(HAS_UYVYTOYROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
-    if (width > 8) {
-      UYVYToYRow = UYVYToYRow_Any_NEON;
-      if (width > 16) {
-        UYVYToUVRow = UYVYToUVRow_Any_NEON;
-      }
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    UYVYToYRow = UYVYToYRow_Any_NEON;
+    if (width >= 16) {
+      UYVYToUVRow = UYVYToUVRow_Any_NEON;
     }
     if (IS_ALIGNED(width, 16)) {
       UYVYToYRow = UYVYToYRow_NEON;
@@ -873,11 +858,9 @@ int V210ToI420(const uint8* src_v210, int src_stride_v210,
 #endif
 
 #if defined(HAS_UYVYTOYROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2)) {
-    if (width > 16) {
-      UYVYToUVRow = UYVYToUVRow_Any_SSE2;
-      UYVYToYRow = UYVYToYRow_Any_SSE2;
-    }
+  if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
+    UYVYToUVRow = UYVYToUVRow_Any_SSE2;
+    UYVYToYRow = UYVYToYRow_Any_SSE2;
     if (IS_ALIGNED(width, 16)) {
       UYVYToYRow = UYVYToYRow_Unaligned_SSE2;
       UYVYToUVRow = UYVYToUVRow_SSE2;
@@ -887,12 +870,10 @@ int V210ToI420(const uint8* src_v210, int src_stride_v210,
     }
   }
 #elif defined(HAS_UYVYTOYROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
-    if (width > 8) {
-      UYVYToYRow = UYVYToYRow_Any_NEON;
-      if (width > 16) {
-        UYVYToUVRow = UYVYToUVRow_Any_NEON;
-      }
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    UYVYToYRow = UYVYToYRow_Any_NEON;
+    if (width >= 16) {
+      UYVYToUVRow = UYVYToUVRow_Any_NEON;
     }
     if (IS_ALIGNED(width, 16)) {
       UYVYToYRow = UYVYToYRow_NEON;
@@ -920,6 +901,7 @@ int V210ToI420(const uint8* src_v210, int src_stride_v210,
   return 0;
 }
 
+// Convert ARGB to I420.
 LIBYUV_API
 int ARGBToI420(const uint8* src_argb, int src_stride_argb,
                uint8* dst_y, int dst_stride_y,
@@ -942,11 +924,9 @@ int ARGBToI420(const uint8* src_argb, int src_stride_argb,
   void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
       ARGBToYRow_C;
 #if defined(HAS_ARGBTOYROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
-    if (width > 16) {
-      ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
-      ARGBToYRow = ARGBToYRow_Any_SSSE3;
-    }
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
+    ARGBToYRow = ARGBToYRow_Any_SSSE3;
     if (IS_ALIGNED(width, 16)) {
       ARGBToUVRow = ARGBToUVRow_Unaligned_SSSE3;
       ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
@@ -959,10 +939,8 @@ int ARGBToI420(const uint8* src_argb, int src_stride_argb,
     }
   }
 #elif defined(HAS_ARGBTOYROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
-    if (width > 8) {
-      ARGBToYRow = ARGBToYRow_Any_NEON;
-    }
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    ARGBToYRow = ARGBToYRow_Any_NEON;
     if (IS_ALIGNED(width, 8)) {
       ARGBToYRow = ARGBToYRow_NEON;
     }
@@ -985,6 +963,7 @@ int ARGBToI420(const uint8* src_argb, int src_stride_argb,
   return 0;
 }
 
+// Convert BGRA to I420.
 LIBYUV_API
 int BGRAToI420(const uint8* src_bgra, int src_stride_bgra,
                uint8* dst_y, int dst_stride_y,
@@ -1002,18 +981,14 @@ int BGRAToI420(const uint8* src_bgra, int src_stride_bgra,
     src_bgra = src_bgra + (height - 1) * src_stride_bgra;
     src_stride_bgra = -src_stride_bgra;
   }
-  void (*BGRAToYRow)(const uint8* src_bgra, uint8* dst_y, int pix);
   void (*BGRAToUVRow)(const uint8* src_bgra0, int src_stride_bgra,
-                      uint8* dst_u, uint8* dst_v, int width);
-
-  BGRAToYRow = BGRAToYRow_C;
-  BGRAToUVRow = BGRAToUVRow_C;
+                      uint8* dst_u, uint8* dst_v, int width) = BGRAToUVRow_C;
+  void (*BGRAToYRow)(const uint8* src_bgra, uint8* dst_y, int pix) =
+      BGRAToYRow_C;
 #if defined(HAS_BGRATOYROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
-    if (width > 16) {
-      BGRAToUVRow = BGRAToUVRow_Any_SSSE3;
-      BGRAToYRow = BGRAToYRow_Any_SSSE3;
-    }
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    BGRAToUVRow = BGRAToUVRow_Any_SSSE3;
+    BGRAToYRow = BGRAToYRow_Any_SSSE3;
     if (IS_ALIGNED(width, 16)) {
       BGRAToUVRow = BGRAToUVRow_Unaligned_SSSE3;
       BGRAToYRow = BGRAToYRow_Unaligned_SSSE3;
@@ -1025,6 +1000,13 @@ int BGRAToI420(const uint8* src_bgra, int src_stride_bgra,
       }
     }
   }
+#elif defined(HAS_BGRATOYROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    BGRAToYRow = BGRAToYRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      BGRAToYRow = BGRAToYRow_NEON;
+    }
+  }
 #endif
 
   for (int y = 0; y < height - 1; y += 2) {
@@ -1043,6 +1025,7 @@ int BGRAToI420(const uint8* src_bgra, int src_stride_bgra,
   return 0;
 }
 
+// Convert ABGR to I420.
 LIBYUV_API
 int ABGRToI420(const uint8* src_abgr, int src_stride_abgr,
                uint8* dst_y, int dst_stride_y,
@@ -1060,18 +1043,14 @@ int ABGRToI420(const uint8* src_abgr, int src_stride_abgr,
     src_abgr = src_abgr + (height - 1) * src_stride_abgr;
     src_stride_abgr = -src_stride_abgr;
   }
-  void (*ABGRToYRow)(const uint8* src_abgr, uint8* dst_y, int pix);
   void (*ABGRToUVRow)(const uint8* src_abgr0, int src_stride_abgr,
-                      uint8* dst_u, uint8* dst_v, int width);
-
-  ABGRToYRow = ABGRToYRow_C;
-  ABGRToUVRow = ABGRToUVRow_C;
+                      uint8* dst_u, uint8* dst_v, int width) = ABGRToUVRow_C;
+  void (*ABGRToYRow)(const uint8* src_abgr, uint8* dst_y, int pix) =
+      ABGRToYRow_C;
 #if defined(HAS_ABGRTOYROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
-    if (width > 16) {
-      ABGRToUVRow = ABGRToUVRow_Any_SSSE3;
-      ABGRToYRow = ABGRToYRow_Any_SSSE3;
-    }
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    ABGRToUVRow = ABGRToUVRow_Any_SSSE3;
+    ABGRToYRow = ABGRToYRow_Any_SSSE3;
     if (IS_ALIGNED(width, 16)) {
       ABGRToUVRow = ABGRToUVRow_Unaligned_SSSE3;
       ABGRToYRow = ABGRToYRow_Unaligned_SSSE3;
@@ -1083,6 +1062,13 @@ int ABGRToI420(const uint8* src_abgr, int src_stride_abgr,
       }
     }
   }
+#elif defined(HAS_ABGRTOYROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    ABGRToYRow = ABGRToYRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      ABGRToYRow = ABGRToYRow_NEON;
+    }
+  }
 #endif
 
   for (int y = 0; y < height - 1; y += 2) {
@@ -1101,6 +1087,7 @@ int ABGRToI420(const uint8* src_abgr, int src_stride_abgr,
   return 0;
 }
 
+// Convert RGBA to I420.
 LIBYUV_API
 int RGBAToI420(const uint8* src_rgba, int src_stride_rgba,
                uint8* dst_y, int dst_stride_y,
@@ -1118,18 +1105,14 @@ int RGBAToI420(const uint8* src_rgba, int src_stride_rgba,
     src_rgba = src_rgba + (height - 1) * src_stride_rgba;
     src_stride_rgba = -src_stride_rgba;
   }
-  void (*RGBAToYRow)(const uint8* src_rgba, uint8* dst_y, int pix);
   void (*RGBAToUVRow)(const uint8* src_rgba0, int src_stride_rgba,
-                      uint8* dst_u, uint8* dst_v, int width);
-
-  RGBAToYRow = RGBAToYRow_C;
-  RGBAToUVRow = RGBAToUVRow_C;
+                      uint8* dst_u, uint8* dst_v, int width) = RGBAToUVRow_C;
+  void (*RGBAToYRow)(const uint8* src_rgba, uint8* dst_y, int pix) =
+      RGBAToYRow_C;
 #if defined(HAS_RGBATOYROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
-    if (width > 16) {
-      RGBAToUVRow = RGBAToUVRow_Any_SSSE3;
-      RGBAToYRow = RGBAToYRow_Any_SSSE3;
-    }
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    RGBAToUVRow = RGBAToUVRow_Any_SSSE3;
+    RGBAToYRow = RGBAToYRow_Any_SSSE3;
     if (IS_ALIGNED(width, 16)) {
       RGBAToUVRow = RGBAToUVRow_Unaligned_SSSE3;
       RGBAToYRow = RGBAToYRow_Unaligned_SSSE3;
@@ -1141,6 +1124,13 @@ int RGBAToI420(const uint8* src_rgba, int src_stride_rgba,
       }
     }
   }
+#elif defined(HAS_RGBATOYROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    RGBAToYRow = RGBAToYRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      RGBAToYRow = RGBAToYRow_NEON;
+    }
+  }
 #endif
 
   for (int y = 0; y < height - 1; y += 2) {
@@ -1159,18 +1149,17 @@ int RGBAToI420(const uint8* src_rgba, int src_stride_rgba,
   return 0;
 }
 
+// Convert RGB24 to I420.
 LIBYUV_API
 int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24,
                 uint8* dst_y, int dst_stride_y,
                 uint8* dst_u, int dst_stride_u,
                 uint8* dst_v, int dst_stride_v,
                 int width, int height) {
-  if (width * 4 > kMaxStride) {  // Row buffer is required.
+  if (!src_rgb24 || !dst_y || !dst_u || !dst_v ||
+      width <= 0 || height == 0 ||
+      width * 4 > kMaxStride) {
     return -1;
-  } else if (!src_rgb24 ||
-             !dst_y || !dst_u || !dst_v ||
-             width <= 0 || height == 0) {
-      return -1;
   }
   // Negative height means invert the image.
   if (height < 0) {
@@ -1179,44 +1168,71 @@ int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24,
     src_stride_rgb24 = -src_stride_rgb24;
   }
   SIMD_ALIGNED(uint8 row[kMaxStride * 2]);
-  void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix);
-
-  RGB24ToARGBRow = RGB24ToARGBRow_C;
+  void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
+      RGB24ToARGBRow_C;
 #if defined(HAS_RGB24TOARGBROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) &&
-      TestReadSafe(src_rgb24, src_stride_rgb24, width, height, 3, 48)) {
-    RGB24ToARGBRow = RGB24ToARGBRow_SSSE3;
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
+      RGB24ToARGBRow = RGB24ToARGBRow_SSSE3;
+    }
+  }
+#elif defined(HAS_RGB24TOARGBROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    RGB24ToARGBRow = RGB24ToARGBRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      RGB24ToARGBRow = RGB24ToARGBRow_NEON;
+    }
   }
 #endif
 
-  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix);
   void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
-                      uint8* dst_u, uint8* dst_v, int width);
-
-  ARGBToYRow = ARGBToYRow_C;
-  ARGBToUVRow = ARGBToUVRow_C;
-#if defined(HAS_ARGBTOYROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
-    if (width > 16) {
-      ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
-    }
-    ARGBToYRow = ARGBToYRow_Any_SSSE3;
+                      uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
+#if defined(HAS_ARGBTOUVROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
     if (IS_ALIGNED(width, 16)) {
       ARGBToUVRow = ARGBToUVRow_SSSE3;
+    }
+  }
+#endif
+
+#if defined(HAS_RGB24TOYROW_NEON)
+  void (*RGB24ToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
+      RGB24ToYRow_C;
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    RGB24ToYRow = RGB24ToYRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      RGB24ToYRow = RGB24ToYRow_NEON;
+    }
+  }
+#else
+  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
+      ARGBToYRow_C;
+#if defined(HAS_ARGBTOUVROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    ARGBToYRow = ARGBToYRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
       ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
       if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
         ARGBToYRow = ARGBToYRow_SSSE3;
       }
     }
   }
-#endif
+#endif  // HAS_ARGBTOUVROW_SSSE3
+#endif  // HAS_RGB24TOYROW_NEON
 
   for (int y = 0; y < height - 1; y += 2) {
     RGB24ToARGBRow(src_rgb24, row, width);
     RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + kMaxStride, width);
     ARGBToUVRow(row, kMaxStride, dst_u, dst_v, width);
+#if defined(HAS_RGB24TOYROW_NEON)
+    RGB24ToYRow(src_rgb24, dst_y, width);
+    RGB24ToYRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width);
+#else
     ARGBToYRow(row, dst_y, width);
     ARGBToYRow(row + kMaxStride, dst_y + dst_stride_y, width);
+#endif
     src_rgb24 += src_stride_rgb24 * 2;
     dst_y += dst_stride_y * 2;
     dst_u += dst_stride_u;
@@ -1225,23 +1241,27 @@ int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24,
   if (height & 1) {
     RGB24ToARGBRow_C(src_rgb24, row, width);
     ARGBToUVRow(row, 0, dst_u, dst_v, width);
+#if defined(HAS_RGB24TOYROW_NEON)
+    RGB24ToYRow(src_rgb24, dst_y, width);
+#else
     ARGBToYRow(row, dst_y, width);
+#endif
   }
   return 0;
 }
 
+// Convert RAW to I420.
+// Same as RGB24 but RGB vs BGR
 LIBYUV_API
 int RAWToI420(const uint8* src_raw, int src_stride_raw,
               uint8* dst_y, int dst_stride_y,
               uint8* dst_u, int dst_stride_u,
               uint8* dst_v, int dst_stride_v,
               int width, int height) {
-  if (width * 4 > kMaxStride) {  // Row buffer is required.
+  if (!src_raw || !dst_y || !dst_u || !dst_v ||
+      width <= 0 || height == 0 ||
+      width * 4 > kMaxStride) {
     return -1;
-  } else if (!src_raw ||
-             !dst_y || !dst_u || !dst_v ||
-             width <= 0 || height == 0) {
-      return -1;
   }
   // Negative height means invert the image.
   if (height < 0) {
@@ -1250,44 +1270,71 @@ int RAWToI420(const uint8* src_raw, int src_stride_raw,
     src_stride_raw = -src_stride_raw;
   }
   SIMD_ALIGNED(uint8 row[kMaxStride * 2]);
-  void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix);
-
-  RAWToARGBRow = RAWToARGBRow_C;
+  void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
+      RAWToARGBRow_C;
 #if defined(HAS_RAWTOARGBROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) &&
-      TestReadSafe(src_raw, src_stride_raw, width, height, 3, 48)) {
-    RAWToARGBRow = RAWToARGBRow_SSSE3;
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    RAWToARGBRow = RAWToARGBRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
+      RAWToARGBRow = RAWToARGBRow_SSSE3;
+    }
+  }
+#elif defined(HAS_RAWTOARGBROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    RAWToARGBRow = RAWToARGBRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      RAWToARGBRow = RAWToARGBRow_NEON;
+    }
   }
 #endif
 
-  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix);
   void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
-                      uint8* dst_u, uint8* dst_v, int width);
-
-  ARGBToYRow = ARGBToYRow_C;
-  ARGBToUVRow = ARGBToUVRow_C;
-#if defined(HAS_ARGBTOYROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
-    if (width > 16) {
-      ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
-    }
-    ARGBToYRow = ARGBToYRow_Any_SSSE3;
+                      uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
+#if defined(HAS_ARGBTOUVROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
     if (IS_ALIGNED(width, 16)) {
       ARGBToUVRow = ARGBToUVRow_SSSE3;
+    }
+  }
+#endif
+
+#if defined(HAS_RAWTOYROW_NEON)
+  void (*RAWToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
+      RAWToYRow_C;
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    RAWToYRow = RAWToYRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      RAWToYRow = RAWToYRow_NEON;
+    }
+  }
+#else
+  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
+      ARGBToYRow_C;
+#if defined(HAS_ARGBTOUVROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    ARGBToYRow = ARGBToYRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
       ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
       if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
         ARGBToYRow = ARGBToYRow_SSSE3;
       }
     }
   }
-#endif
+#endif  // HAS_ARGBTOUVROW_SSSE3
+#endif  // HAS_RAWTOYROW_NEON
 
   for (int y = 0; y < height - 1; y += 2) {
     RAWToARGBRow(src_raw, row, width);
     RAWToARGBRow(src_raw + src_stride_raw, row + kMaxStride, width);
     ARGBToUVRow(row, kMaxStride, dst_u, dst_v, width);
+#if defined(HAS_RAWTOYROW_NEON)
+    RAWToYRow(src_raw, dst_y, width);
+    RAWToYRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width);
+#else
     ARGBToYRow(row, dst_y, width);
     ARGBToYRow(row + kMaxStride, dst_y + dst_stride_y, width);
+#endif
     src_raw += src_stride_raw * 2;
     dst_y += dst_stride_y * 2;
     dst_u += dst_stride_u;
@@ -1296,22 +1343,25 @@ int RAWToI420(const uint8* src_raw, int src_stride_raw,
   if (height & 1) {
     RAWToARGBRow_C(src_raw, row, width);
     ARGBToUVRow(row, 0, dst_u, dst_v, width);
+#if defined(HAS_RAWTOYROW_NEON)
+    RAWToYRow(src_raw, dst_y, width);
+#else
     ARGBToYRow(row, dst_y, width);
+#endif
   }
   return 0;
 }
 
+// Convert RGB565 to I420.
 LIBYUV_API
 int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565,
-                 uint8* dst_y, int dst_stride_y,
-                 uint8* dst_u, int dst_stride_u,
-                 uint8* dst_v, int dst_stride_v,
-                 int width, int height) {
-  if (width * 4 > kMaxStride) {  // Row buffer is required.
-    return -1;
-  } else if (!src_rgb565 ||
-             !dst_y || !dst_u || !dst_v ||
-             width <= 0 || height == 0) {
+                uint8* dst_y, int dst_stride_y,
+                uint8* dst_u, int dst_stride_u,
+                uint8* dst_v, int dst_stride_v,
+                int width, int height) {
+  if (!src_rgb565 || !dst_y || !dst_u || !dst_v ||
+      width <= 0 || height == 0 ||
+      width * 4 > kMaxStride) {
     return -1;
   }
   // Negative height means invert the image.
@@ -1321,44 +1371,71 @@ int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565,
     src_stride_rgb565 = -src_stride_rgb565;
   }
   SIMD_ALIGNED(uint8 row[kMaxStride * 2]);
-  void (*RGB565ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix);
-
-  RGB565ToARGBRow = RGB565ToARGBRow_C;
+  void (*RGB565ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
+      RGB565ToARGBRow_C;
 #if defined(HAS_RGB565TOARGBROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) &&
-      TestReadSafe(src_rgb565, src_stride_rgb565, width, height, 2, 16)) {
-    RGB565ToARGBRow = RGB565ToARGBRow_SSE2;
+  if (TestCpuFlag(kCpuHasSSE2) && width >= 8) {
+    RGB565ToARGBRow = RGB565ToARGBRow_Any_SSE2;
+    if (IS_ALIGNED(width, 8)) {
+      RGB565ToARGBRow = RGB565ToARGBRow_SSE2;
+    }
+  }
+#elif defined(HAS_RGB565TOARGBROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    RGB565ToARGBRow = RGB565ToARGBRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      RGB565ToARGBRow = RGB565ToARGBRow_NEON;
+    }
   }
 #endif
 
-  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix);
   void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
-                      uint8* dst_u, uint8* dst_v, int width);
-
-  ARGBToYRow = ARGBToYRow_C;
-  ARGBToUVRow = ARGBToUVRow_C;
-#if defined(HAS_ARGBTOYROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
-    if (width > 16) {
-      ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
-    }
-    ARGBToYRow = ARGBToYRow_Any_SSSE3;
+                      uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
+#if defined(HAS_ARGBTOUVROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
     if (IS_ALIGNED(width, 16)) {
       ARGBToUVRow = ARGBToUVRow_SSSE3;
+    }
+  }
+#endif
+
+#if defined(HAS_RGB565TOYROW_NEON)
+  void (*RGB565ToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
+      RGB565ToYRow_C;
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    RGB565ToYRow = RGB565ToYRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      RGB565ToYRow = RGB565ToYRow_NEON;
+    }
+  }
+#else
+  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
+      ARGBToYRow_C;
+#if defined(HAS_ARGBTOUVROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    ARGBToYRow = ARGBToYRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
       ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
       if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
         ARGBToYRow = ARGBToYRow_SSSE3;
       }
     }
   }
-#endif
+#endif  // HAS_ARGBTOUVROW_SSSE3
+#endif  // HAS_RGB565TOYROW_NEON
 
   for (int y = 0; y < height - 1; y += 2) {
     RGB565ToARGBRow(src_rgb565, row, width);
     RGB565ToARGBRow(src_rgb565 + src_stride_rgb565, row + kMaxStride, width);
     ARGBToUVRow(row, kMaxStride, dst_u, dst_v, width);
+#if defined(HAS_RGB565TOYROW_NEON)
+    RGB565ToYRow(src_rgb565, dst_y, width);
+    RGB565ToYRow(src_rgb565 + src_stride_rgb565, dst_y + dst_stride_y, width);
+#else
     ARGBToYRow(row, dst_y, width);
     ARGBToYRow(row + kMaxStride, dst_y + dst_stride_y, width);
+#endif
     src_rgb565 += src_stride_rgb565 * 2;
     dst_y += dst_stride_y * 2;
     dst_u += dst_stride_u;
@@ -1367,7 +1444,11 @@ int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565,
   if (height & 1) {
     RGB565ToARGBRow_C(src_rgb565, row, width);
     ARGBToUVRow(row, 0, dst_u, dst_v, width);
+#if defined(HAS_RGB565TOYROW_NEON)
+    RGB565ToYRow(src_rgb565, dst_y, width);
+#else
     ARGBToYRow(row, dst_y, width);
+#endif
   }
   return 0;
 }
@@ -1378,12 +1459,10 @@ int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555,
                  uint8* dst_u, int dst_stride_u,
                  uint8* dst_v, int dst_stride_v,
                  int width, int height) {
-  if (width * 4 > kMaxStride) {  // Row buffer is required.
+  if (!src_argb1555 || !dst_y || !dst_u || !dst_v ||
+      width <= 0 || height == 0 ||
+      width * 4 > kMaxStride) {
     return -1;
-  } else if (!src_argb1555 ||
-             !dst_y || !dst_u || !dst_v ||
-             width <= 0 || height == 0) {
-      return -1;
   }
   // Negative height means invert the image.
   if (height < 0) {
@@ -1409,10 +1488,8 @@ int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555,
   ARGBToYRow = ARGBToYRow_C;
   ARGBToUVRow = ARGBToUVRow_C;
 #if defined(HAS_ARGBTOYROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
-    if (width > 16) {
-      ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
-    }
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
     ARGBToYRow = ARGBToYRow_Any_SSSE3;
     if (IS_ALIGNED(width, 16)) {
       ARGBToUVRow = ARGBToUVRow_SSSE3;
@@ -1450,12 +1527,10 @@ int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444,
                    uint8* dst_u, int dst_stride_u,
                    uint8* dst_v, int dst_stride_v,
                    int width, int height) {
-  if (width * 4 > kMaxStride) {  // Row buffer is required.
+  if (!src_argb4444 || !dst_y || !dst_u || !dst_v ||
+      width <= 0 || height == 0 ||
+      width * 4 > kMaxStride) {
     return -1;
-  } else if (!src_argb4444 ||
-             !dst_y || !dst_u || !dst_v ||
-             width <= 0 || height == 0) {
-      return -1;
   }
   // Negative height means invert the image.
   if (height < 0) {
@@ -1481,10 +1556,8 @@ int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444,
   ARGBToYRow = ARGBToYRow_C;
   ARGBToUVRow = ARGBToUVRow_C;
 #if defined(HAS_ARGBTOYROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
-    if (width > 16) {
-      ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
-    }
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
     ARGBToYRow = ARGBToYRow_Any_SSSE3;
     if (IS_ALIGNED(width, 16)) {
       ARGBToUVRow = ARGBToUVRow_SSSE3;
diff --git a/source/convert_argb.cc b/source/convert_argb.cc
index 7d6c26205..2c0d2dd4f 100644
--- a/source/convert_argb.cc
+++ b/source/convert_argb.cc
@@ -405,8 +405,8 @@ int RGBAToARGB(const uint8* src_rgba, int src_stride_rgba,
 // Convert RGB24 to ARGB.
 LIBYUV_API
 int RGB24ToARGB(const uint8* src_rgb24, int src_stride_rgb24,
-               uint8* dst_argb, int dst_stride_argb,
-               int width, int height) {
+                uint8* dst_argb, int dst_stride_argb,
+                int width, int height) {
   if (!src_rgb24 || !dst_argb ||
       width <= 0 || height == 0) {
     return -1;
@@ -417,16 +417,22 @@ int RGB24ToARGB(const uint8* src_rgb24, int src_stride_rgb24,
     src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24;
     src_stride_rgb24 = -src_stride_rgb24;
   }
-  void (*RGB24ToARGBRow)(const uint8* src_rgb24, uint8* dst_argb, int pix) =
+  void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
       RGB24ToARGBRow_C;
 #if defined(HAS_RGB24TOARGBROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) &&
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16 &&
       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
-    RGB24ToARGBRow = RGB24ToARGBRow_SSSE3;
+    RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
+      RGB24ToARGBRow = RGB24ToARGBRow_SSSE3;
+    }
   }
 #elif defined(HAS_RGB24TOARGBROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
-    RGB24ToARGBRow = RGB24ToARGBRow_NEON;
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    RGB24ToARGBRow = RGB24ToARGBRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      RGB24ToARGBRow = RGB24ToARGBRow_NEON;
+    }
   }
 #endif
 
@@ -441,8 +447,8 @@ int RGB24ToARGB(const uint8* src_rgb24, int src_stride_rgb24,
 // Convert RAW to ARGB.
 LIBYUV_API
 int RAWToARGB(const uint8* src_raw, int src_stride_raw,
-               uint8* dst_argb, int dst_stride_argb,
-               int width, int height) {
+              uint8* dst_argb, int dst_stride_argb,
+              int width, int height) {
   if (!src_raw || !dst_argb ||
       width <= 0 || height == 0) {
     return -1;
@@ -453,16 +459,22 @@ int RAWToARGB(const uint8* src_raw, int src_stride_raw,
     src_raw = src_raw + (height - 1) * src_stride_raw;
     src_stride_raw = -src_stride_raw;
   }
-  void (*RAWToARGBRow)(const uint8* src_raw, uint8* dst_argb, int pix) =
+  void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
       RAWToARGBRow_C;
 #if defined(HAS_RAWTOARGBROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) &&
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16 &&
       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
-    RAWToARGBRow = RAWToARGBRow_SSSE3;
+    RAWToARGBRow = RAWToARGBRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
+      RAWToARGBRow = RAWToARGBRow_SSSE3;
+    }
   }
 #elif defined(HAS_RAWTOARGBROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
-    RAWToARGBRow = RAWToARGBRow_NEON;
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    RAWToARGBRow = RAWToARGBRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      RAWToARGBRow = RAWToARGBRow_NEON;
+    }
   }
 #endif
 
@@ -492,10 +504,19 @@ int RGB565ToARGB(const uint8* src_rgb565, int src_stride_rgb565,
   void (*RGB565ToARGBRow)(const uint8* src_rgb565, uint8* dst_argb, int pix) =
       RGB565ToARGBRow_C;
 #if defined(HAS_RGB565TOARGBROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) &&
-      IS_ALIGNED(width, 8) &&
+  if (TestCpuFlag(kCpuHasSSE2) && width >= 8 &&
       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
-    RGB565ToARGBRow = RGB565ToARGBRow_SSE2;
+    RGB565ToARGBRow = RGB565ToARGBRow_Any_SSE2;
+    if (IS_ALIGNED(width, 8)) {
+      RGB565ToARGBRow = RGB565ToARGBRow_SSE2;
+    }
+  }
+#elif defined(HAS_RGB565TOARGBROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    RGB565ToARGBRow = RGB565ToARGBRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      RGB565ToARGBRow = RGB565ToARGBRow_NEON;
+    }
   }
 #endif
 
diff --git a/source/convert_from.cc b/source/convert_from.cc
index 86d3c387f..ff1fb315f 100644
--- a/source/convert_from.cc
+++ b/source/convert_from.cc
@@ -265,25 +265,25 @@ LIBYUV_API
 int I422ToYUY2(const uint8* src_y, int src_stride_y,
                const uint8* src_u, int src_stride_u,
                const uint8* src_v, int src_stride_v,
-               uint8* dst_frame, int dst_stride_frame,
+               uint8* dst_yuy2, int dst_stride_yuy2,
                int width, int height) {
-  if (!src_y || !src_u || !src_v || !dst_frame ||
+  if (!src_y || !src_u || !src_v || !dst_yuy2 ||
       width <= 0 || height == 0) {
     return -1;
   }
   // Negative height means invert the image.
   if (height < 0) {
     height = -height;
-    dst_frame = dst_frame + (height - 1) * dst_stride_frame;
-    dst_stride_frame = -dst_stride_frame;
+    dst_yuy2 = dst_yuy2 + (height - 1) * dst_stride_yuy2;
+    dst_stride_yuy2 = -dst_stride_yuy2;
   }
   void (*I422ToYUY2Row)(const uint8* src_y, const uint8* src_u,
-                        const uint8* src_v, uint8* dst_frame, int width) =
+                        const uint8* src_v, uint8* dst_yuy2, int width) =
       I422ToYUY2Row_C;
 #if defined(HAS_I422TOYUY2ROW_SSE2)
   if (TestCpuFlag(kCpuHasSSE2) && width >= 16 &&
       IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
-      IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) {
+      IS_ALIGNED(dst_yuy2, 16) && IS_ALIGNED(dst_stride_yuy2, 16)) {
     I422ToYUY2Row = I422ToYUY2Row_Any_SSE2;
     if (IS_ALIGNED(width, 16)) {
       I422ToYUY2Row = I422ToYUY2Row_SSE2;
@@ -299,11 +299,11 @@ int I422ToYUY2(const uint8* src_y, int src_stride_y,
 #endif
 
   for (int y = 0; y < height; ++y) {
-    I422ToYUY2Row(src_y, src_u, src_y, dst_frame, width);
+    I422ToYUY2Row(src_y, src_u, src_v, dst_yuy2, width);
     src_y += src_stride_y;
     src_u += src_stride_u;
     src_v += src_stride_v;
-    dst_frame += dst_stride_frame;
+    dst_yuy2 += dst_stride_yuy2;
   }
   return 0;
 }
@@ -312,25 +312,25 @@ LIBYUV_API
 int I420ToYUY2(const uint8* src_y, int src_stride_y,
                const uint8* src_u, int src_stride_u,
                const uint8* src_v, int src_stride_v,
-               uint8* dst_frame, int dst_stride_frame,
+               uint8* dst_yuy2, int dst_stride_yuy2,
                int width, int height) {
-  if (!src_y || !src_u || !src_v || !dst_frame ||
+  if (!src_y || !src_u || !src_v || !dst_yuy2 ||
       width <= 0 || height == 0) {
     return -1;
   }
   // Negative height means invert the image.
   if (height < 0) {
     height = -height;
-    dst_frame = dst_frame + (height - 1) * dst_stride_frame;
-    dst_stride_frame = -dst_stride_frame;
+    dst_yuy2 = dst_yuy2 + (height - 1) * dst_stride_yuy2;
+    dst_stride_yuy2 = -dst_stride_yuy2;
   }
   void (*I422ToYUY2Row)(const uint8* src_y, const uint8* src_u,
-                        const uint8* src_v, uint8* dst_frame, int width) =
+                        const uint8* src_v, uint8* dst_yuy2, int width) =
       I422ToYUY2Row_C;
 #if defined(HAS_I422TOYUY2ROW_SSE2)
   if (TestCpuFlag(kCpuHasSSE2) && width >= 16 &&
       IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
-      IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) {
+      IS_ALIGNED(dst_yuy2, 16) && IS_ALIGNED(dst_stride_yuy2, 16)) {
     I422ToYUY2Row = I422ToYUY2Row_Any_SSE2;
     if (IS_ALIGNED(width, 16)) {
       I422ToYUY2Row = I422ToYUY2Row_SSE2;
@@ -346,16 +346,16 @@ int I420ToYUY2(const uint8* src_y, int src_stride_y,
 #endif
 
   for (int y = 0; y < height - 1; y += 2) {
-    I422ToYUY2Row(src_y, src_u, src_v, dst_frame, width);
+    I422ToYUY2Row(src_y, src_u, src_v, dst_yuy2, width);
     I422ToYUY2Row(src_y + src_stride_y, src_u, src_v,
-                  dst_frame + dst_stride_frame, width);
+                  dst_yuy2 + dst_stride_yuy2, width);
     src_y += src_stride_y * 2;
     src_u += src_stride_u;
     src_v += src_stride_v;
-    dst_frame += dst_stride_frame * 2;
+    dst_yuy2 += dst_stride_yuy2 * 2;
   }
   if (height & 1) {
-    I422ToYUY2Row(src_y, src_u, src_v, dst_frame, width);
+    I422ToYUY2Row(src_y, src_u, src_v, dst_yuy2, width);
   }
   return 0;
 }
@@ -365,25 +365,25 @@ LIBYUV_API
 int I422ToUYVY(const uint8* src_y, int src_stride_y,
                const uint8* src_u, int src_stride_u,
                const uint8* src_v, int src_stride_v,
-               uint8* dst_frame, int dst_stride_frame,
+               uint8* dst_uyvy, int dst_stride_uyvy,
                int width, int height) {
-  if (!src_y || !src_u || !src_v || !dst_frame ||
+  if (!src_y || !src_u || !src_v || !dst_uyvy ||
       width <= 0 || height == 0) {
     return -1;
   }
   // Negative height means invert the image.
   if (height < 0) {
     height = -height;
-    dst_frame = dst_frame + (height - 1) * dst_stride_frame;
-    dst_stride_frame = -dst_stride_frame;
+    dst_uyvy = dst_uyvy + (height - 1) * dst_stride_uyvy;
+    dst_stride_uyvy = -dst_stride_uyvy;
   }
   void (*I422ToUYVYRow)(const uint8* src_y, const uint8* src_u,
-                        const uint8* src_v, uint8* dst_frame, int width) =
+                        const uint8* src_v, uint8* dst_uyvy, int width) =
       I422ToUYVYRow_C;
 #if defined(HAS_I422TOUYVYROW_SSE2)
   if (TestCpuFlag(kCpuHasSSE2) && width >= 16 &&
       IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
-      IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) {
+      IS_ALIGNED(dst_uyvy, 16) && IS_ALIGNED(dst_stride_uyvy, 16)) {
     I422ToUYVYRow = I422ToUYVYRow_Any_SSE2;
     if (IS_ALIGNED(width, 16)) {
       I422ToUYVYRow = I422ToUYVYRow_SSE2;
@@ -399,11 +399,11 @@ int I422ToUYVY(const uint8* src_y, int src_stride_y,
 #endif
 
   for (int y = 0; y < height; ++y) {
-    I422ToUYVYRow(src_y, src_u, src_y, dst_frame, width);
+    I422ToUYVYRow(src_y, src_u, src_v, dst_uyvy, width);
     src_y += src_stride_y;
     src_u += src_stride_u;
     src_v += src_stride_v;
-    dst_frame += dst_stride_frame;
+    dst_uyvy += dst_stride_uyvy;
   }
   return 0;
 }
@@ -412,25 +412,25 @@ LIBYUV_API
 int I420ToUYVY(const uint8* src_y, int src_stride_y,
                const uint8* src_u, int src_stride_u,
                const uint8* src_v, int src_stride_v,
-               uint8* dst_frame, int dst_stride_frame,
+               uint8* dst_uyvy, int dst_stride_uyvy,
                int width, int height) {
-  if (!src_y || !src_u || !src_v || !dst_frame ||
+  if (!src_y || !src_u || !src_v || !dst_uyvy ||
       width <= 0 || height == 0) {
     return -1;
   }
   // Negative height means invert the image.
   if (height < 0) {
     height = -height;
-    dst_frame = dst_frame + (height - 1) * dst_stride_frame;
-    dst_stride_frame = -dst_stride_frame;
+    dst_uyvy = dst_uyvy + (height - 1) * dst_stride_uyvy;
+    dst_stride_uyvy = -dst_stride_uyvy;
   }
   void (*I422ToUYVYRow)(const uint8* src_y, const uint8* src_u,
-                        const uint8* src_v, uint8* dst_frame, int width) =
+                        const uint8* src_v, uint8* dst_uyvy, int width) =
       I422ToUYVYRow_C;
 #if defined(HAS_I422TOUYVYROW_SSE2)
   if (TestCpuFlag(kCpuHasSSE2) && width >= 16 &&
       IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
-      IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) {
+      IS_ALIGNED(dst_uyvy, 16) && IS_ALIGNED(dst_stride_uyvy, 16)) {
     I422ToUYVYRow = I422ToUYVYRow_Any_SSE2;
     if (IS_ALIGNED(width, 16)) {
       I422ToUYVYRow = I422ToUYVYRow_SSE2;
@@ -446,16 +446,16 @@ int I420ToUYVY(const uint8* src_y, int src_stride_y,
 #endif
 
   for (int y = 0; y < height - 1; y += 2) {
-    I422ToUYVYRow(src_y, src_u, src_v, dst_frame, width);
+    I422ToUYVYRow(src_y, src_u, src_v, dst_uyvy, width);
     I422ToUYVYRow(src_y + src_stride_y, src_u, src_v,
-                  dst_frame + dst_stride_frame, width);
+                  dst_uyvy + dst_stride_uyvy, width);
     src_y += src_stride_y * 2;
     src_u += src_stride_u;
     src_v += src_stride_v;
-    dst_frame += dst_stride_frame * 2;
+    dst_uyvy += dst_stride_uyvy * 2;
   }
   if (height & 1) {
-    I422ToUYVYRow(src_y, src_u, src_v, dst_frame, width);
+    I422ToUYVYRow(src_y, src_u, src_v, dst_uyvy, width);
   }
   return 0;
 }
@@ -464,35 +464,35 @@ LIBYUV_API
 int I420ToV210(const uint8* src_y, int src_stride_y,
                const uint8* src_u, int src_stride_u,
                const uint8* src_v, int src_stride_v,
-               uint8* dst_frame, int dst_stride_frame,
+               uint8* dst_v210, int dst_stride_v210,
                int width, int height) {
   if (width * 16 / 6 > kMaxStride ||
-      !src_y || !src_u || !src_v || !dst_frame ||
+      !src_y || !src_u || !src_v || !dst_v210 ||
       width <= 0 || height == 0) {
     return -1;
   }
   // Negative height means invert the image.
   if (height < 0) {
     height = -height;
-    dst_frame = dst_frame + (height - 1) * dst_stride_frame;
-    dst_stride_frame = -dst_stride_frame;
+    dst_v210 = dst_v210 + (height - 1) * dst_stride_v210;
+    dst_stride_v210 = -dst_stride_v210;
   }
 
   SIMD_ALIGNED(uint8 row[kMaxStride]);
 
   for (int y = 0; y < height - 1; y += 2) {
     I422ToUYVYRow_C(src_y, src_u, src_v, row, width);
-    UYVYToV210Row_C(row, dst_frame, width);
+    UYVYToV210Row_C(row, dst_v210, width);
     I422ToUYVYRow_C(src_y + src_stride_y, src_u, src_v, row, width);
-    UYVYToV210Row_C(row, dst_frame + dst_stride_frame, width);
+    UYVYToV210Row_C(row, dst_v210 + dst_stride_v210, width);
     src_y += src_stride_y * 2;
     src_u += src_stride_u;
     src_v += src_stride_v;
-    dst_frame += dst_stride_frame * 2;
+    dst_v210 += dst_stride_v210 * 2;
   }
   if (height & 1) {
     I422ToUYVYRow_C(src_y, src_u, src_v, row, width);
-    UYVYToV210Row_C(row, dst_frame, width);
+    UYVYToV210Row_C(row, dst_v210, width);
   }
   return 0;
 }
@@ -521,7 +521,7 @@ int I420ToNV12(const uint8* src_y, int src_stride_y,
   int halfwidth = (width + 1) >> 1;
   void (*MergeUV)(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
                   int width) = MergeUV_C;
-#if defined(HAS_SPLITUV_SSE2)
+#if defined(HAS_MERGEUV_SSE2)
   if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) {
     MergeUV = MergeUV_Any_SSE2;
     if (IS_ALIGNED(halfwidth, 16)) {
@@ -534,7 +534,7 @@ int I420ToNV12(const uint8* src_y, int src_stride_y,
     }
   }
 #endif
-#if defined(HAS_SPLITUV_AVX2)
+#if defined(HAS_MERGEUV_AVX2)
   if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) {
     MergeUV = MergeUV_Any_AVX2;
     if (IS_ALIGNED(halfwidth, 32)) {
@@ -547,7 +547,7 @@ int I420ToNV12(const uint8* src_y, int src_stride_y,
     }
   }
 #endif
-#if defined(HAS_SPLITUV_NEON)
+#if defined(HAS_MERGEUV_NEON)
   if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) {
     MergeUV = MergeUV_Any_NEON;
     if (IS_ALIGNED(halfwidth, 16)) {
diff --git a/source/convert_from_argb.cc b/source/convert_from_argb.cc
index cd7186592..9dedde006 100644
--- a/source/convert_from_argb.cc
+++ b/source/convert_from_argb.cc
@@ -21,6 +21,522 @@ namespace libyuv {
 extern "C" {
 #endif
 
+// ARGB little endian (bgra in memory) to I444
+LIBYUV_API
+int ARGBToI444(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height) {
+  if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
+    return -1;
+  }
+  if (height < 0) {
+    height = -height;
+    src_argb = src_argb + (height - 1) * src_stride_argb;
+    src_stride_argb = -src_stride_argb;
+  }
+  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
+      ARGBToYRow_C;
+#if defined(HAS_ARGBTOYROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    ARGBToYRow = ARGBToYRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
+          IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
+        ARGBToYRow = ARGBToYRow_SSSE3;
+      }
+    }
+  }
+#elif defined(HAS_ARGBTOYROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    ARGBToYRow = ARGBToYRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBToYRow = ARGBToYRow_NEON;
+    }
+  }
+#endif
+
+  for (int y = 0; y < height; ++y) {
+    ARGBToUV444Row_C(src_argb, dst_u, dst_v, width);
+    ARGBToYRow(src_argb, dst_y, width);
+    src_argb += src_stride_argb;
+    dst_y += dst_stride_y;
+    dst_u += dst_stride_u;
+    dst_v += dst_stride_v;
+  }
+  return 0;
+}
+
+// ARGB little endian (bgra in memory) to I422
+LIBYUV_API
+int ARGBToI422(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height) {
+  if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
+    return -1;
+  }
+  if (height < 0) {
+    height = -height;
+    src_argb = src_argb + (height - 1) * src_stride_argb;
+    src_stride_argb = -src_stride_argb;
+  }
+  void (*ARGBToUV422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
+                         int pix) = ARGBToUV422Row_C;
+#if defined(HAS_ARGBTOUV422ROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    ARGBToUV422Row = ARGBToUV422Row_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToUV422Row = ARGBToUV422Row_Unaligned_SSSE3;
+      if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
+          IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
+        ARGBToUV422Row = ARGBToUV422Row_SSSE3;
+      }
+    }
+  }
+#endif
+
+  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
+      ARGBToYRow_C;
+#if defined(HAS_ARGBTOYROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    ARGBToYRow = ARGBToYRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
+          IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
+        ARGBToYRow = ARGBToYRow_SSSE3;
+      }
+    }
+  }
+#elif defined(HAS_ARGBTOYROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    ARGBToYRow = ARGBToYRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBToYRow = ARGBToYRow_NEON;
+    }
+  }
+#endif
+
+  for (int y = 0; y < height; ++y) {
+    ARGBToUV422Row(src_argb, dst_u, dst_v, width);
+    ARGBToYRow(src_argb, dst_y, width);
+    src_argb += src_stride_argb;
+    dst_y += dst_stride_y;
+    dst_u += dst_stride_u;
+    dst_v += dst_stride_v;
+  }
+  return 0;
+}
+
+// ARGB little endian (bgra in memory) to I411
+LIBYUV_API
+int ARGBToI411(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height) {
+  if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
+    return -1;
+  }
+  if (height < 0) {
+    height = -height;
+    src_argb = src_argb + (height - 1) * src_stride_argb;
+    src_stride_argb = -src_stride_argb;
+  }
+  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
+      ARGBToYRow_C;
+#if defined(HAS_ARGBTOYROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    ARGBToYRow = ARGBToYRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
+          IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
+        ARGBToYRow = ARGBToYRow_SSSE3;
+      }
+    }
+  }
+#elif defined(HAS_ARGBTOYROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    ARGBToYRow = ARGBToYRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBToYRow = ARGBToYRow_NEON;
+    }
+  }
+#endif
+
+  for (int y = 0; y < height; ++y) {
+    ARGBToUV411Row_C(src_argb, dst_u, dst_v, width);
+    ARGBToYRow(src_argb, dst_y, width);
+    src_argb += src_stride_argb;
+    dst_y += dst_stride_y;
+    dst_u += dst_stride_u;
+    dst_v += dst_stride_v;
+  }
+  return 0;
+}
+
+LIBYUV_API
+int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_uv, int dst_stride_uv,
+               int width, int height) {
+  if (!src_argb ||
+      !dst_y || !dst_uv ||
+      width <= 0 || height == 0 ||
+      width > kMaxStride) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_argb = src_argb + (height - 1) * src_stride_argb;
+    src_stride_argb = -src_stride_argb;
+  }
+  void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
+                      uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
+  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
+      ARGBToYRow_C;
+#if defined(HAS_ARGBTOYROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
+    ARGBToYRow = ARGBToYRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToUVRow = ARGBToUVRow_Unaligned_SSSE3;
+      ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
+        ARGBToUVRow = ARGBToUVRow_SSSE3;
+        if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
+          ARGBToYRow = ARGBToYRow_SSSE3;
+        }
+      }
+    }
+  }
+#elif defined(HAS_ARGBTOYROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    ARGBToYRow = ARGBToYRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBToYRow = ARGBToYRow_NEON;
+    }
+  }
+#endif
+  int halfwidth = (width + 1) >> 1;
+  void (*MergeUV)(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
+                  int width) = MergeUV_C;
+#if defined(HAS_MERGEUV_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) {
+    MergeUV = MergeUV_Any_SSE2;
+    if (IS_ALIGNED(halfwidth, 16)) {
+      MergeUV = MergeUV_Unaligned_SSE2;
+      if (IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) {
+        MergeUV = MergeUV_SSE2;
+      }
+    }
+  }
+#endif
+#if defined(HAS_MERGEUV_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) {
+    MergeUV = MergeUV_Any_AVX2;
+    if (IS_ALIGNED(halfwidth, 32)) {
+      MergeUV = MergeUV_Unaligned_AVX2;
+      if (IS_ALIGNED(dst_uv, 32) && IS_ALIGNED(dst_stride_uv, 32)) {
+        MergeUV = MergeUV_AVX2;
+      }
+    }
+  }
+#endif
+#if defined(HAS_MERGEUV_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) {
+    MergeUV = MergeUV_Any_NEON;
+    if (IS_ALIGNED(halfwidth, 16)) {
+      MergeUV = MergeUV_Unaligned_NEON;
+      if (IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) {
+        MergeUV = MergeUV_NEON;
+      }
+    }
+  }
+#endif
+
+  SIMD_ALIGNED(uint8 row_u[kMaxStride / 2]);
+  SIMD_ALIGNED(uint8 row_v[kMaxStride / 2]);
+
+  for (int y = 0; y < height - 1; y += 2) {
+    ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width);
+    MergeUV(row_u, row_v, dst_uv, halfwidth);
+    ARGBToYRow(src_argb, dst_y, width);
+    ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width);
+    src_argb += src_stride_argb * 2;
+    dst_y += dst_stride_y * 2;
+    dst_uv += dst_stride_uv;
+  }
+  if (height & 1) {
+    ARGBToUVRow(src_argb, 0, row_u, row_v, width);
+    MergeUV(row_u, row_v, dst_uv, halfwidth);
+    ARGBToYRow(src_argb, dst_y, width);
+    ARGBToYRow(src_argb + 0, dst_y + dst_stride_y, width);
+  }
+  return 0;
+}
+
+// Same as NV12 but U and V swapped.
+LIBYUV_API
+int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_uv, int dst_stride_uv,
+               int width, int height) {
+  if (!src_argb ||
+      !dst_y || !dst_uv ||
+      width <= 0 || height == 0 ||
+      width > kMaxStride) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_argb = src_argb + (height - 1) * src_stride_argb;
+    src_stride_argb = -src_stride_argb;
+  }
+  void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
+                      uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
+  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
+      ARGBToYRow_C;
+#if defined(HAS_ARGBTOYROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
+    ARGBToYRow = ARGBToYRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToUVRow = ARGBToUVRow_Unaligned_SSSE3;
+      ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
+        ARGBToUVRow = ARGBToUVRow_SSSE3;
+        if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
+          ARGBToYRow = ARGBToYRow_SSSE3;
+        }
+      }
+    }
+  }
+#elif defined(HAS_ARGBTOYROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    ARGBToYRow = ARGBToYRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBToYRow = ARGBToYRow_NEON;
+    }
+  }
+#endif
+  int halfwidth = (width + 1) >> 1;
+  void (*MergeUV)(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
+                  int width) = MergeUV_C;
+#if defined(HAS_MERGEUV_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) {
+    MergeUV = MergeUV_Any_SSE2;
+    if (IS_ALIGNED(halfwidth, 16)) {
+      MergeUV = MergeUV_Unaligned_SSE2;
+      if (IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) {
+        MergeUV = MergeUV_SSE2;
+      }
+    }
+  }
+#endif
+#if defined(HAS_MERGEUV_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) {
+    MergeUV = MergeUV_Any_AVX2;
+    if (IS_ALIGNED(halfwidth, 32)) {
+      MergeUV = MergeUV_Unaligned_AVX2;
+      if (IS_ALIGNED(dst_uv, 32) && IS_ALIGNED(dst_stride_uv, 32)) {
+        MergeUV = MergeUV_AVX2;
+      }
+    }
+  }
+#endif
+#if defined(HAS_MERGEUV_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) {
+    MergeUV = MergeUV_Any_NEON;
+    if (IS_ALIGNED(halfwidth, 16)) {
+      MergeUV = MergeUV_Unaligned_NEON;
+      if (IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) {
+        MergeUV = MergeUV_NEON;
+      }
+    }
+  }
+#endif
+
+  SIMD_ALIGNED(uint8 row_u[kMaxStride / 2]);
+  SIMD_ALIGNED(uint8 row_v[kMaxStride / 2]);
+
+  for (int y = 0; y < height - 1; y += 2) {
+    ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width);
+    MergeUV(row_v, row_u, dst_uv, halfwidth);
+    ARGBToYRow(src_argb, dst_y, width);
+    ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width);
+    src_argb += src_stride_argb * 2;
+    dst_y += dst_stride_y * 2;
+    dst_uv += dst_stride_uv;
+  }
+  if (height & 1) {
+    ARGBToUVRow(src_argb, 0, row_u, row_v, width);
+    MergeUV(row_v, row_u, dst_uv, halfwidth);
+    ARGBToYRow(src_argb, dst_y, width);
+    ARGBToYRow(src_argb + 0, dst_y + dst_stride_y, width);
+  }
+  return 0;
+}
+
+// Convert ARGB to YUY2.
+LIBYUV_API
+int ARGBToYUY2(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_yuy2, int dst_stride_yuy2,
+               int width, int height) {
+  if (!src_argb || !dst_yuy2 ||
+      width <= 0 || height == 0 ||
+      width > kMaxStride) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    dst_yuy2 = dst_yuy2 + (height - 1) * dst_stride_yuy2;
+    dst_stride_yuy2 = -dst_stride_yuy2;
+  }
+
+  void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
+                      uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
+  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
+      ARGBToYRow_C;
+#if defined(HAS_ARGBTOYROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
+    ARGBToYRow = ARGBToYRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToUVRow = ARGBToUVRow_Unaligned_SSSE3;
+      ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
+        ARGBToUVRow = ARGBToUVRow_SSSE3;
+        ARGBToYRow = ARGBToYRow_SSSE3;
+      }
+    }
+  }
+#elif defined(HAS_ARGBTOYROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    ARGBToYRow = ARGBToYRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBToYRow = ARGBToYRow_NEON;
+    }
+  }
+#endif
+
+  void (*I422ToYUY2Row)(const uint8* src_y, const uint8* src_u,
+                        const uint8* src_v, uint8* dst_yuy2, int width) =
+      I422ToYUY2Row_C;
+#if defined(HAS_I422TOYUY2ROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && width >= 16 &&
+      IS_ALIGNED(dst_yuy2, 16) && IS_ALIGNED(dst_stride_yuy2, 16)) {
+    I422ToYUY2Row = I422ToYUY2Row_Any_SSE2;
+    if (IS_ALIGNED(width, 16)) {
+      I422ToYUY2Row = I422ToYUY2Row_SSE2;
+    }
+  }
+#elif defined(HAS_I422TOYUY2ROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
+    I422ToYUY2Row = I422ToYUY2Row_Any_NEON;
+    if (IS_ALIGNED(width, 16)) {
+      I422ToYUY2Row = I422ToYUY2Row_NEON;
+    }
+  }
+#endif
+  SIMD_ALIGNED(uint8 row_y[kMaxStride]);
+  SIMD_ALIGNED(uint8 row_u[kMaxStride / 2]);
+  SIMD_ALIGNED(uint8 row_v[kMaxStride / 2]);
+
+  for (int y = 0; y < height; ++y) {
+    ARGBToUVRow(src_argb, 0, row_u, row_v, width);
+    ARGBToYRow(src_argb, row_y, width);
+    I422ToYUY2Row(row_y, row_u, row_v, dst_yuy2, width);
+    src_argb += src_stride_argb;
+    dst_yuy2 += dst_stride_yuy2;
+  }
+  return 0;
+}
+
+// Convert ARGB to UYVY.
+LIBYUV_API
+int ARGBToUYVY(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_uyvy, int dst_stride_uyvy,
+               int width, int height) {
+  if (!src_argb || !dst_uyvy ||
+      width <= 0 || height == 0 ||
+      width > kMaxStride) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    dst_uyvy = dst_uyvy + (height - 1) * dst_stride_uyvy;
+    dst_stride_uyvy = -dst_stride_uyvy;
+  }
+
+  void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
+                      uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
+  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
+      ARGBToYRow_C;
+#if defined(HAS_ARGBTOYROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
+    ARGBToYRow = ARGBToYRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToUVRow = ARGBToUVRow_Unaligned_SSSE3;
+      ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
+        ARGBToUVRow = ARGBToUVRow_SSSE3;
+        ARGBToYRow = ARGBToYRow_SSSE3;
+      }
+    }
+  }
+#elif defined(HAS_ARGBTOYROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    ARGBToYRow = ARGBToYRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBToYRow = ARGBToYRow_NEON;
+    }
+  }
+#endif
+
+  void (*I422ToUYVYRow)(const uint8* src_y, const uint8* src_u,
+                        const uint8* src_v, uint8* dst_uyvy, int width) =
+      I422ToUYVYRow_C;
+#if defined(HAS_I422TOUYVYROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && width >= 16 &&
+      IS_ALIGNED(dst_uyvy, 16) && IS_ALIGNED(dst_stride_uyvy, 16)) {
+    I422ToUYVYRow = I422ToUYVYRow_Any_SSE2;
+    if (IS_ALIGNED(width, 16)) {
+      I422ToUYVYRow = I422ToUYVYRow_SSE2;
+    }
+  }
+#elif defined(HAS_I422TOUYVYROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
+    I422ToUYVYRow = I422ToUYVYRow_Any_NEON;
+    if (IS_ALIGNED(width, 16)) {
+      I422ToUYVYRow = I422ToUYVYRow_NEON;
+    }
+  }
+#endif
+  SIMD_ALIGNED(uint8 row_y[kMaxStride]);
+  SIMD_ALIGNED(uint8 row_u[kMaxStride / 2]);
+  SIMD_ALIGNED(uint8 row_v[kMaxStride / 2]);
+
+  for (int y = 0; y < height; ++y) {
+    ARGBToUVRow(src_argb, 0, row_u, row_v, width);
+    ARGBToYRow(src_argb, row_y, width);
+    I422ToUYVYRow(row_y, row_u, row_v, dst_uyvy, width);
+    src_argb += src_stride_argb;
+    dst_uyvy += dst_stride_uyvy;
+  }
+  return 0;
+}
+
 // Convert ARGB to I400.
 LIBYUV_API
 int ARGBToI400(const uint8* src_argb, int src_stride_argb,
@@ -37,10 +553,8 @@ int ARGBToI400(const uint8* src_argb, int src_stride_argb,
   void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
       ARGBToYRow_C;
 #if defined(HAS_ARGBTOYROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
-    if (width > 16) {
-      ARGBToYRow = ARGBToYRow_Any_SSSE3;
-    }
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    ARGBToYRow = ARGBToYRow_Any_SSSE3;
     if (IS_ALIGNED(width, 16)) {
       ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
       if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
@@ -50,10 +564,8 @@ int ARGBToI400(const uint8* src_argb, int src_stride_argb,
     }
   }
 #elif defined(HAS_ARGBTOYROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
-    if (width > 8) {
-      ARGBToYRow = ARGBToYRow_Any_NEON;
-    }
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    ARGBToYRow = ARGBToYRow_Any_NEON;
     if (IS_ALIGNED(width, 8)) {
       ARGBToYRow = ARGBToYRow_NEON;
     }
@@ -68,64 +580,6 @@ int ARGBToI400(const uint8* src_argb, int src_stride_argb,
   return 0;
 }
 
-// ARGB little endian (bgra in memory) to I422
-// same as I420 except UV plane is full height
-LIBYUV_API
-int ARGBToI422(const uint8* src_argb, int src_stride_argb,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height) {
-  if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
-    return -1;
-  }
-  if (height < 0) {
-    height = -height;
-    src_argb = src_argb + (height - 1) * src_stride_argb;
-    src_stride_argb = -src_stride_argb;
-  }
-  void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
-                      uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
-  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
-      ARGBToYRow_C;
-#if defined(HAS_ARGBTOYROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
-    if (width > 16) {
-      ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
-      ARGBToYRow = ARGBToYRow_Any_SSSE3;
-    }
-    if (IS_ALIGNED(width, 16)) {
-      ARGBToUVRow = ARGBToUVRow_Unaligned_SSSE3;
-      ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
-      if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
-        ARGBToUVRow = ARGBToUVRow_SSSE3;
-        if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
-          ARGBToYRow = ARGBToYRow_SSSE3;
-        }
-      }
-    }
-  }
-#elif defined(HAS_ARGBTOYROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
-    if (width > 8) {
-      ARGBToYRow = ARGBToYRow_Any_NEON;
-    }
-    if (IS_ALIGNED(width, 8)) {
-      ARGBToYRow = ARGBToYRow_NEON;
-    }
-  }
-#endif
-
-  for (int y = 0; y < height; ++y) {
-    ARGBToUVRow(src_argb, 0, dst_u, dst_v, width);
-    ARGBToYRow(src_argb, dst_y, width);
-    src_argb += src_stride_argb;
-    dst_y += dst_stride_y;
-    dst_u += dst_stride_u;
-    dst_v += dst_stride_v;
-  }
-  return 0;
-}
 // Convert ARGB to RGBA.
 LIBYUV_API
 int ARGBToRGBA(const uint8* src_argb, int src_stride_argb,
diff --git a/source/format_conversion.cc b/source/format_conversion.cc
index 95ce4713e..e1ae5c4c4 100644
--- a/source/format_conversion.cc
+++ b/source/format_conversion.cc
@@ -268,7 +268,7 @@ int BayerToARGB(const uint8* src_bayer, int src_stride_bayer,
     dst_argb += dst_stride_argb * 2;
   }
   if (height & 1) {
-    BayerRow0(src_bayer, -src_stride_bayer, dst_argb, width);
+    BayerRow0(src_bayer, src_stride_bayer, dst_argb, width);
   }
   return 0;
 }
@@ -305,11 +305,9 @@ int BayerToI420(const uint8* src_bayer, int src_stride_bayer,
   void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
       ARGBToYRow_C;
 #if defined(HAS_ARGBTOYROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
-    if (width > 16) {
-      ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
-      ARGBToYRow = ARGBToYRow_Any_SSSE3;
-    }
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
+    ARGBToYRow = ARGBToYRow_Any_SSSE3;
     if (IS_ALIGNED(width, 16)) {
       ARGBToUVRow = ARGBToUVRow_SSSE3;
       ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
@@ -319,10 +317,8 @@ int BayerToI420(const uint8* src_bayer, int src_stride_bayer,
     }
   }
 #elif defined(HAS_ARGBTOYROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
-    if (width > 8) {
-      ARGBToYRow = ARGBToYRow_Any_NEON;
-    }
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    ARGBToYRow = ARGBToYRow_Any_NEON;
     if (IS_ALIGNED(width, 8)) {
       ARGBToYRow = ARGBToYRow_NEON;
     }
diff --git a/source/planar_functions.cc b/source/planar_functions.cc
index b5d4ffa57..7225c06ac 100644
--- a/source/planar_functions.cc
+++ b/source/planar_functions.cc
@@ -617,7 +617,7 @@ int NV21ToRGB565(const uint8* src_y, int src_stride_y,
     dst_stride_rgb565 = -dst_stride_rgb565;
   }
   void (*NV21ToRGB565Row)(const uint8* y_buf,
-                          const uint8* vu_buf,
+                          const uint8* src_vu,
                           uint8* rgb_buf,
                           int width) = NV21ToRGB565Row_C;
 #if defined(HAS_NV21TORGB565ROW_SSSE3)
diff --git a/source/rotate.cc b/source/rotate.cc
index 0601dec07..2d312f886 100644
--- a/source/rotate.cc
+++ b/source/rotate.cc
@@ -45,7 +45,7 @@ extern "C" {
 #define HAS_MIRRORROW_NEON
 void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
 #define HAS_MIRRORROW_UV_NEON
-void MirrorRowUV_NEON(const uint8* src, uint8* dst_a, uint8* dst_b, int width);
+void MirrorUVRow_NEON(const uint8* src, uint8* dst_a, uint8* dst_b, int width);
 #define HAS_TRANSPOSE_WX8_NEON
 void TransposeWx8_NEON(const uint8* src, int src_stride,
                        uint8* dst, int dst_stride, int width);
@@ -1049,21 +1049,21 @@ void RotateUV180(const uint8* src, int src_stride,
                  uint8* dst_b, int dst_stride_b,
                  int width, int height) {
   void (*MirrorRowUV)(const uint8* src, uint8* dst_u, uint8* dst_v, int width) =
-      MirrorRowUV_C;
+      MirrorUVRow_C;
 #if defined(HAS_MIRRORROW_UV_NEON)
   if (TestCpuFlag(kCpuHasNEON)) {
-    MirrorRowUV = MirrorRowUV_NEON;
+    MirrorRowUV = MirrorUVRow_NEON;
   }
 #elif defined(HAS_MIRRORROW_UV_SSSE3)
   if (TestCpuFlag(kCpuHasSSSE3) &&
       IS_ALIGNED(width, 16) &&
       IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) {
-    MirrorRowUV = MirrorRowUV_SSSE3;
+    MirrorRowUV = MirrorUVRow_SSSE3;
   }
-#elif defined(HAS_MIRRORROWUV_MIPS_DSPR2)
+#elif defined(HAS_MirrorUVRow_MIPS_DSPR2)
   if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
       IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) {
-    MirrorRowUV = MirrorRowUV_MIPS_DSPR2;
+    MirrorRowUV = MirrorUVRow_MIPS_DSPR2;
   }
 #endif
 
diff --git a/source/row_any.cc b/source/row_any.cc
index 1efc5572f..4dea57ff7 100644
--- a/source/row_any.cc
+++ b/source/row_any.cc
@@ -121,12 +121,12 @@ NV2NY(NV21ToRGB565Row_Any_NEON, NV21ToRGB565Row_NEON, NV21ToRGB565Row_C, 0, 2)
 // NEON RGB24 is multiple of 8 pixels, unaligned source and destination.
 // I400 To ARGB does multiple of 8 pixels with SIMD and remainder with C.
 #define RGBANY(NAMEANY, ARGBTORGB_SIMD, ARGBTORGB_C, MASK, SBPP, BPP)          \
-    void NAMEANY(const uint8* argb_buf,                                        \
-                 uint8* rgb_buf,                                               \
+    void NAMEANY(const uint8* src,                                             \
+                 uint8* dst,                                                   \
                  int width) {                                                  \
       int n = width & ~MASK;                                                   \
-      ARGBTORGB_SIMD(argb_buf, rgb_buf, n);                                    \
-      ARGBTORGB_C(argb_buf + n * SBPP, rgb_buf + n * BPP, width & MASK);       \
+      ARGBTORGB_SIMD(src, dst, n);                                             \
+      ARGBTORGB_C(src + n * SBPP, dst + n * BPP, width & MASK);                \
     }
 
 #if defined(HAS_ARGBTORGB24ROW_SSSE3)
@@ -167,30 +167,37 @@ RGBANY(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, UYVYToARGBRow_C,
 
 // RGB/YUV to Y does multiple of 16 with SIMD and last 16 with SIMD.
 // TODO(fbarchard): Use last 16 method for all unsubsampled conversions.
-#define YANY(NAMEANY, ARGBTOY_SIMD, BPP, NUM)                                  \
+#define YANY(NAMEANY, ARGBTOY_SIMD, SBPP, BPP, NUM)                            \
     void NAMEANY(const uint8* src_argb, uint8* dst_y, int width) {             \
       ARGBTOY_SIMD(src_argb, dst_y, width - NUM);                              \
-      ARGBTOY_SIMD(src_argb + (width - NUM) * BPP, dst_y + (width - NUM), NUM);\
+      ARGBTOY_SIMD(src_argb + (width - NUM) * SBPP,                            \
+                   dst_y + (width - NUM) * BPP, NUM);                          \
     }
 
 #ifdef HAS_ARGBTOYROW_SSSE3
-YANY(ARGBToYRow_Any_SSSE3, ARGBToYRow_Unaligned_SSSE3, 4, 16)
-YANY(BGRAToYRow_Any_SSSE3, BGRAToYRow_Unaligned_SSSE3, 4, 16)
-YANY(ABGRToYRow_Any_SSSE3, ABGRToYRow_Unaligned_SSSE3, 4, 16)
-#endif
-#ifdef HAS_RGBATOYROW_SSSE3
-YANY(RGBAToYRow_Any_SSSE3, RGBAToYRow_Unaligned_SSSE3, 4, 16)
+YANY(ARGBToYRow_Any_SSSE3, ARGBToYRow_Unaligned_SSSE3, 4, 1, 16)
+YANY(BGRAToYRow_Any_SSSE3, BGRAToYRow_Unaligned_SSSE3, 4, 1, 16)
+YANY(ABGRToYRow_Any_SSSE3, ABGRToYRow_Unaligned_SSSE3, 4, 1, 16)
+YANY(RGBAToYRow_Any_SSSE3, RGBAToYRow_Unaligned_SSSE3, 4, 1, 16)
+YANY(YUY2ToYRow_Any_SSE2, YUY2ToYRow_Unaligned_SSE2, 2, 1, 16)
+YANY(UYVYToYRow_Any_SSE2, UYVYToYRow_Unaligned_SSE2, 2, 1, 16)
+YANY(RGB24ToARGBRow_Any_SSSE3, RGB24ToARGBRow_SSSE3, 3, 4, 16)
+YANY(RAWToARGBRow_Any_SSSE3, RAWToARGBRow_SSSE3, 3, 4, 16)
+YANY(RGB565ToARGBRow_Any_SSE2, RGB565ToARGBRow_SSE2, 2, 4, 8)
 #endif
 #ifdef HAS_ARGBTOYROW_NEON
-YANY(ARGBToYRow_Any_NEON, ARGBToYRow_NEON, 4, 8)
-#endif
-#ifdef HAS_YUY2TOYROW_SSE2
-YANY(YUY2ToYRow_Any_SSE2, YUY2ToYRow_Unaligned_SSE2, 2, 16)
-YANY(UYVYToYRow_Any_SSE2, UYVYToYRow_Unaligned_SSE2, 2, 16)
-#endif
-#ifdef HAS_YUY2TOYROW_NEON
-YANY(YUY2ToYRow_Any_NEON, YUY2ToYRow_NEON, 2, 16)
-YANY(UYVYToYRow_Any_NEON, UYVYToYRow_NEON, 2, 16)
+YANY(ARGBToYRow_Any_NEON, ARGBToYRow_NEON, 4, 1, 8)
+YANY(BGRAToYRow_Any_NEON, BGRAToYRow_NEON, 4, 1, 8)
+YANY(ABGRToYRow_Any_NEON, ABGRToYRow_NEON, 4, 1, 8)
+YANY(RGBAToYRow_Any_NEON, RGBAToYRow_NEON, 4, 1, 8)
+YANY(RGB24ToYRow_Any_NEON, RGB24ToYRow_NEON, 3, 1, 8)
+YANY(RAWToYRow_Any_NEON, RAWToYRow_NEON, 3, 1, 8)
+YANY(RGB565ToYRow_Any_NEON, RGB565ToYRow_NEON, 2, 1, 8)
+YANY(YUY2ToYRow_Any_NEON, YUY2ToYRow_NEON, 2, 1, 16)
+YANY(UYVYToYRow_Any_NEON, UYVYToYRow_NEON, 2, 1, 16)
+YANY(RGB24ToARGBRow_Any_NEON, RGB24ToARGBRow_NEON, 3, 4, 8)
+YANY(RAWToARGBRow_Any_NEON, RAWToARGBRow_NEON, 3, 4, 8)
+YANY(RGB565ToARGBRow_Any_NEON, RGB565ToARGBRow_NEON, 2, 4, 8)
 #endif
 #undef YANY
 
@@ -201,17 +208,15 @@ YANY(UYVYToYRow_Any_NEON, UYVYToYRow_NEON, 2, 16)
       int n = width & ~15;                                                     \
       ANYTOUV_SIMD(src_argb, src_stride_argb, dst_u, dst_v, n);                \
       ANYTOUV_C(src_argb  + n * BPP, src_stride_argb,                          \
-                 dst_u + (n >> 1),                                             \
-                 dst_v + (n >> 1),                                             \
-                 width & 15);                                                  \
+                dst_u + (n >> 1),                                              \
+                dst_v + (n >> 1),                                              \
+                width & 15);                                                   \
     }
 
 #ifdef HAS_ARGBTOUVROW_SSSE3
 UVANY(ARGBToUVRow_Any_SSSE3, ARGBToUVRow_Unaligned_SSSE3, ARGBToUVRow_C, 4)
 UVANY(BGRAToUVRow_Any_SSSE3, BGRAToUVRow_Unaligned_SSSE3, BGRAToUVRow_C, 4)
 UVANY(ABGRToUVRow_Any_SSSE3, ABGRToUVRow_Unaligned_SSSE3, ABGRToUVRow_C, 4)
-#endif
-#ifdef HAS_RGBATOYROW_SSSE3
 UVANY(RGBAToUVRow_Any_SSSE3, RGBAToUVRow_Unaligned_SSSE3, RGBAToUVRow_C, 4)
 #endif
 #ifdef HAS_YUY2TOUVROW_SSE2
@@ -230,11 +235,15 @@ UVANY(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, UYVYToUVRow_C, 2)
       int n = width & ~15;                                                     \
       ANYTOUV_SIMD(src_uv, dst_u, dst_v, n);                                   \
       ANYTOUV_C(src_uv  + n * BPP,                                             \
-                 dst_u + (n >> 1),                                             \
-                 dst_v + (n >> 1),                                             \
-                 width & 15);                                                  \
+                dst_u + (n >> 1),                                              \
+                dst_v + (n >> 1),                                              \
+                width & 15);                                                   \
     }
 
+#ifdef HAS_ARGBTOUVROW_SSSE3
+UV422ANY(ARGBToUV422Row_Any_SSSE3, ARGBToUV422Row_Unaligned_SSSE3,
+         ARGBToUV422Row_C, 4)
+#endif
 #ifdef HAS_YUY2TOUV422ROW_SSE2
 UV422ANY(YUY2ToUV422Row_Any_SSE2, YUY2ToUV422Row_Unaligned_SSE2,
          YUY2ToUV422Row_C, 2)
diff --git a/source/row_common.cc b/source/row_common.cc
index 5ab935a89..aad97b001 100644
--- a/source/row_common.cc
+++ b/source/row_common.cc
@@ -95,47 +95,47 @@ void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int width) {
   }
 }
 
-void RGB565ToARGBRow_C(const uint8* src_rgb, uint8* dst_argb, int width) {
+void RGB565ToARGBRow_C(const uint8* src_rgb565, uint8* dst_argb, int width) {
   for (int x = 0; x < width; ++x) {
-    uint8 b = src_rgb[0] & 0x1f;
-    uint8 g = (src_rgb[0] >> 5) | ((src_rgb[1] & 0x07) << 3);
-    uint8 r = src_rgb[1] >> 3;
+    uint8 b = src_rgb565[0] & 0x1f;
+    uint8 g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
+    uint8 r = src_rgb565[1] >> 3;
     dst_argb[0] = (b << 3) | (b >> 2);
     dst_argb[1] = (g << 2) | (g >> 4);
     dst_argb[2] = (r << 3) | (r >> 2);
     dst_argb[3] = 255u;
     dst_argb += 4;
-    src_rgb += 2;
+    src_rgb565 += 2;
   }
 }
 
-void ARGB1555ToARGBRow_C(const uint8* src_rgb, uint8* dst_argb, int width) {
+void ARGB1555ToARGBRow_C(const uint8* src_argb1555, uint8* dst_argb, int width) {
   for (int x = 0; x < width; ++x) {
-    uint8 b = src_rgb[0] & 0x1f;
-    uint8 g = (src_rgb[0] >> 5) | ((src_rgb[1] & 0x03) << 3);
-    uint8 r = (src_rgb[1] & 0x7c) >> 2;
-    uint8 a = src_rgb[1] >> 7;
+    uint8 b = src_argb1555[0] & 0x1f;
+    uint8 g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
+    uint8 r = (src_argb1555[1] & 0x7c) >> 2;
+    uint8 a = src_argb1555[1] >> 7;
     dst_argb[0] = (b << 3) | (b >> 2);
     dst_argb[1] = (g << 3) | (g >> 2);
     dst_argb[2] = (r << 3) | (r >> 2);
     dst_argb[3] = -a;
     dst_argb += 4;
-    src_rgb += 2;
+    src_argb1555 += 2;
   }
 }
 
-void ARGB4444ToARGBRow_C(const uint8* src_rgb, uint8* dst_argb, int width) {
+void ARGB4444ToARGBRow_C(const uint8* src_argb4444, uint8* dst_argb, int width) {
   for (int x = 0; x < width; ++x) {
-    uint8 b = src_rgb[0] & 0x0f;
-    uint8 g = src_rgb[0] >> 4;
-    uint8 r = src_rgb[1] & 0x0f;
-    uint8 a = src_rgb[1] >> 4;
+    uint8 b = src_argb4444[0] & 0x0f;
+    uint8 g = src_argb4444[0] >> 4;
+    uint8 r = src_argb4444[1] & 0x0f;
+    uint8 a = src_argb4444[1] >> 4;
     dst_argb[0] = (b << 4) | b;
     dst_argb[1] = (g << 4) | g;
     dst_argb[2] = (r << 4) | r;
     dst_argb[3] = (a << 4) | a;
     dst_argb += 4;
-    src_rgb += 2;
+    src_argb4444 += 2;
   }
 }
 
@@ -265,11 +265,11 @@ static __inline int RGBToV(uint8 r, uint8 g, uint8 b) {
   return ((112 * r -  94 * g -  18 * b + 128) >> 8) + 128;
 }
 
-#define MAKEROWY(NAME, R, G, B) \
+#define MAKEROWY(NAME, R, G, B, BPP) \
 void NAME ## ToYRow_C(const uint8* src_argb0, uint8* dst_y, int width) {       \
   for (int x = 0; x < width; ++x) {                                            \
     dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]);               \
-    src_argb0 += 4;                                                            \
+    src_argb0 += BPP;                                                          \
     dst_y += 1;                                                                \
   }                                                                            \
 }                                                                              \
@@ -277,16 +277,16 @@ void NAME ## ToUVRow_C(const uint8* src_rgb0, int src_stride_rgb,              \
                        uint8* dst_u, uint8* dst_v, int width) {                \
   const uint8* src_rgb1 = src_rgb0 + src_stride_rgb;                           \
   for (int x = 0; x < width - 1; x += 2) {                                     \
-    uint8 ab = (src_rgb0[B] + src_rgb0[B + 4] +                                \
-               src_rgb1[B] + src_rgb1[B + 4]) >> 2;                            \
-    uint8 ag = (src_rgb0[G] + src_rgb0[G + 4] +                                \
-               src_rgb1[G] + src_rgb1[G + 4]) >> 2;                            \
-    uint8 ar = (src_rgb0[R] + src_rgb0[R + 4] +                                \
-               src_rgb1[R] + src_rgb1[R + 4]) >> 2;                            \
+    uint8 ab = (src_rgb0[B] + src_rgb0[B + BPP] +                              \
+               src_rgb1[B] + src_rgb1[B + BPP]) >> 2;                          \
+    uint8 ag = (src_rgb0[G] + src_rgb0[G + BPP] +                              \
+               src_rgb1[G] + src_rgb1[G + BPP]) >> 2;                          \
+    uint8 ar = (src_rgb0[R] + src_rgb0[R + BPP] +                              \
+               src_rgb1[R] + src_rgb1[R + BPP]) >> 2;                          \
     dst_u[0] = RGBToU(ar, ag, ab);                                             \
     dst_v[0] = RGBToV(ar, ag, ab);                                             \
-    src_rgb0 += 8;                                                             \
-    src_rgb1 += 8;                                                             \
+    src_rgb0 += BPP * 2;                                                       \
+    src_rgb1 += BPP * 2;                                                       \
     dst_u += 1;                                                                \
     dst_v += 1;                                                                \
   }                                                                            \
@@ -299,10 +299,95 @@ void NAME ## ToUVRow_C(const uint8* src_rgb0, int src_stride_rgb,              \
   }                                                                            \
 }
 
-MAKEROWY(ARGB, 2, 1, 0)
-MAKEROWY(BGRA, 1, 2, 3)
-MAKEROWY(ABGR, 0, 1, 2)
-MAKEROWY(RGBA, 3, 2, 1)
+MAKEROWY(ARGB, 2, 1, 0, 4)
+MAKEROWY(BGRA, 1, 2, 3, 4)
+MAKEROWY(ABGR, 0, 1, 2, 4)
+MAKEROWY(RGBA, 3, 2, 1, 4)
+MAKEROWY(RGB24, 2, 1, 0, 3)
+MAKEROWY(RAW, 0, 1, 2, 3)
+#undef MAKEROWY
+
+void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int width) {
+  for (int x = 0; x < width; ++x) {
+    uint8 b = src_rgb565[0] & 0x1f;
+    uint8 g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
+    uint8 r = src_rgb565[1] >> 3;
+    b = (b << 3) | (b >> 2);
+    g = (g << 2) | (g >> 4);
+    r = (r << 3) | (r >> 2);
+    dst_y[0] = RGBToY(r, g, b);
+    src_rgb565 += 2;
+    dst_y += 1;
+  }
+}
+
+void ARGBToUV444Row_C(const uint8* src_argb,
+                      uint8* dst_u, uint8* dst_v, int width) {
+  for (int x = 0; x < width; ++x) {
+    uint8 ab = src_argb[0];
+    uint8 ag = src_argb[1];
+    uint8 ar = src_argb[2];
+    dst_u[0] = RGBToU(ar, ag, ab);
+    dst_v[0] = RGBToV(ar, ag, ab);
+    src_argb += 4;
+    dst_u += 1;
+    dst_v += 1;
+  }
+}
+
+void ARGBToUV422Row_C(const uint8* src_argb,
+                      uint8* dst_u, uint8* dst_v, int width) {
+  for (int x = 0; x < width - 1; x += 2) {
+    uint8 ab = (src_argb[0] + src_argb[4]) >> 1;
+    uint8 ag = (src_argb[1] + src_argb[5]) >> 1;
+    uint8 ar = (src_argb[2] + src_argb[6]) >> 1;
+    dst_u[0] = RGBToU(ar, ag, ab);
+    dst_v[0] = RGBToV(ar, ag, ab);
+    src_argb += 8;
+    dst_u += 1;
+    dst_v += 1;
+  }
+  if ((width & 3) == 1) {
+    uint8 ab = src_argb[0];
+    uint8 ag = src_argb[1];
+    uint8 ar = src_argb[2];
+    dst_u[0] = RGBToU(ar, ag, ab);
+    dst_v[0] = RGBToV(ar, ag, ab);
+  }
+}
+
+void ARGBToUV411Row_C(const uint8* src_argb,
+                      uint8* dst_u, uint8* dst_v, int width) {
+  for (int x = 0; x < width - 3; x += 4) {
+    uint8 ab = (src_argb[0] + src_argb[4] + src_argb[8] + src_argb[12]) >> 2;
+    uint8 ag = (src_argb[1] + src_argb[5] + src_argb[9] + src_argb[13]) >> 2;
+    uint8 ar = (src_argb[2] + src_argb[6] + src_argb[10] + src_argb[14]) >> 2;
+    dst_u[0] = RGBToU(ar, ag, ab);
+    dst_v[0] = RGBToV(ar, ag, ab);
+    src_argb += 16;
+    dst_u += 1;
+    dst_v += 1;
+  }
+  if ((width & 3) == 3) {
+    uint8 ab = (src_argb[0] + src_argb[4] + src_argb[8]) / 3;
+    uint8 ag = (src_argb[1] + src_argb[5] + src_argb[9]) / 3;
+    uint8 ar = (src_argb[2] + src_argb[6] + src_argb[10]) / 3;
+    dst_u[0] = RGBToU(ar, ag, ab);
+    dst_v[0] = RGBToV(ar, ag, ab);
+  } else if ((width & 3) == 2) {
+    uint8 ab = (src_argb[0] + src_argb[4]) >> 1;
+    uint8 ag = (src_argb[1] + src_argb[5]) >> 1;
+    uint8 ar = (src_argb[2] + src_argb[6]) >> 1;
+    dst_u[0] = RGBToU(ar, ag, ab);
+    dst_v[0] = RGBToV(ar, ag, ab);
+  } else if ((width & 3) == 1) {
+    uint8 ab = src_argb[0];
+    uint8 ag = src_argb[1];
+    uint8 ar = src_argb[2];
+    dst_u[0] = RGBToU(ar, ag, ab);
+    dst_v[0] = RGBToV(ar, ag, ab);
+  }
+}
 
 // http://en.wikipedia.org/wiki/Grayscale.
 // 0.11 * B + 0.59 * G + 0.30 * R
@@ -470,104 +555,104 @@ static __inline void YuvPixel2(uint8 y, uint8 u, uint8 v,
 #if defined(__ARM_NEON__)
 // C mimic assembly.
 // TODO(fbarchard): Remove subsampling from Neon.
-void I444ToARGBRow_C(const uint8* y_buf,
-                     const uint8* u_buf,
-                     const uint8* v_buf,
+void I444ToARGBRow_C(const uint8* src_y,
+                     const uint8* src_u,
+                     const uint8* src_v,
                      uint8* rgb_buf,
                      int width) {
   for (int x = 0; x < width - 1; x += 2) {
-    uint8 u = (u_buf[0] + u_buf[1] + 1) >> 1;
-    uint8 v = (v_buf[0] + v_buf[1] + 1) >> 1;
-    YuvPixel(y_buf[0], u, v, rgb_buf + 0, 24, 16, 8, 0);
-    YuvPixel(y_buf[1], u, v, rgb_buf + 4, 24, 16, 8, 0);
-    y_buf += 2;
-    u_buf += 2;
-    v_buf += 2;
+    uint8 u = (src_u[0] + src_u[1] + 1) >> 1;
+    uint8 v = (src_v[0] + src_v[1] + 1) >> 1;
+    YuvPixel(src_y[0], u, v, rgb_buf + 0, 24, 16, 8, 0);
+    YuvPixel(src_y[1], u, v, rgb_buf + 4, 24, 16, 8, 0);
+    src_y += 2;
+    src_u += 2;
+    src_v += 2;
     rgb_buf += 8;  // Advance 2 pixels.
   }
   if (width & 1) {
-    YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 16, 8, 0);
+    YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, 24, 16, 8, 0);
   }
 }
 #else
-void I444ToARGBRow_C(const uint8* y_buf,
-                     const uint8* u_buf,
-                     const uint8* v_buf,
+void I444ToARGBRow_C(const uint8* src_y,
+                     const uint8* src_u,
+                     const uint8* src_v,
                      uint8* rgb_buf,
                      int width) {
   for (int x = 0; x < width; ++x) {
-    YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf, 24, 16, 8, 0);
-    y_buf += 1;
-    u_buf += 1;
-    v_buf += 1;
+    YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf, 24, 16, 8, 0);
+    src_y += 1;
+    src_u += 1;
+    src_v += 1;
     rgb_buf += 4;  // Advance 1 pixel.
   }
 }
 #endif
 // Also used for 420
-void I422ToARGBRow_C(const uint8* y_buf,
-                     const uint8* u_buf,
-                     const uint8* v_buf,
+void I422ToARGBRow_C(const uint8* src_y,
+                     const uint8* src_u,
+                     const uint8* src_v,
                      uint8* rgb_buf,
                      int width) {
   for (int x = 0; x < width - 1; x += 2) {
-    YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 16, 8, 0);
-    YuvPixel(y_buf[1], u_buf[0], v_buf[0], rgb_buf + 4, 24, 16, 8, 0);
-    y_buf += 2;
-    u_buf += 1;
-    v_buf += 1;
+    YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, 24, 16, 8, 0);
+    YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, 24, 16, 8, 0);
+    src_y += 2;
+    src_u += 1;
+    src_v += 1;
     rgb_buf += 8;  // Advance 2 pixels.
   }
   if (width & 1) {
-    YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 16, 8, 0);
+    YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, 24, 16, 8, 0);
   }
 }
 
-void I422ToRGB24Row_C(const uint8* y_buf,
-                      const uint8* u_buf,
-                      const uint8* v_buf,
+void I422ToRGB24Row_C(const uint8* src_y,
+                      const uint8* src_u,
+                      const uint8* src_v,
                       uint8* rgb_buf,
                       int width) {
   for (int x = 0; x < width - 1; x += 2) {
-    YuvPixel2(y_buf[0], u_buf[0], v_buf[0],
+    YuvPixel2(src_y[0], src_u[0], src_v[0],
               rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
-    YuvPixel2(y_buf[1], u_buf[0], v_buf[0],
+    YuvPixel2(src_y[1], src_u[0], src_v[0],
               rgb_buf + 3, rgb_buf + 4, rgb_buf + 5);
-    y_buf += 2;
-    u_buf += 1;
-    v_buf += 1;
+    src_y += 2;
+    src_u += 1;
+    src_v += 1;
     rgb_buf += 6;  // Advance 2 pixels.
   }
   if (width & 1) {
-    YuvPixel2(y_buf[0], u_buf[0], v_buf[0],
+    YuvPixel2(src_y[0], src_u[0], src_v[0],
               rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
   }
 }
 
-void I422ToRAWRow_C(const uint8* y_buf,
-                    const uint8* u_buf,
-                    const uint8* v_buf,
+void I422ToRAWRow_C(const uint8* src_y,
+                    const uint8* src_u,
+                    const uint8* src_v,
                     uint8* rgb_buf,
                     int width) {
   for (int x = 0; x < width - 1; x += 2) {
-    YuvPixel2(y_buf[0], u_buf[0], v_buf[0],
+    YuvPixel2(src_y[0], src_u[0], src_v[0],
               rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
-    YuvPixel2(y_buf[1], u_buf[0], v_buf[0],
+    YuvPixel2(src_y[1], src_u[0], src_v[0],
               rgb_buf + 5, rgb_buf + 4, rgb_buf + 3);
-    y_buf += 2;
-    u_buf += 1;
-    v_buf += 1;
+    src_y += 2;
+    src_u += 1;
+    src_v += 1;
     rgb_buf += 6;  // Advance 2 pixels.
   }
   if (width & 1) {
-    YuvPixel2(y_buf[0], u_buf[0], v_buf[0],
+    YuvPixel2(src_y[0], src_u[0], src_v[0],
               rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
   }
 }
 
-void I422ToARGB4444Row_C(const uint8* y_buf,
-                         const uint8* u_buf,
-                         const uint8* v_buf,
+void I422ToARGB4444Row_C(const uint8* src_y,
+                         const uint8* src_u,
+                         const uint8* src_v,
                          uint8* dst_argb4444,
                          int width) {
   uint8 b0;
@@ -577,8 +662,8 @@ void I422ToARGB4444Row_C(const uint8* y_buf,
   uint8 g1;
   uint8 r1;
   for (int x = 0; x < width - 1; x += 2) {
-    YuvPixel2(y_buf[0], u_buf[0], v_buf[0], &b0, &g0, &r0);
-    YuvPixel2(y_buf[1], u_buf[0], v_buf[0], &b1, &g1, &r1);
+    YuvPixel2(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
+    YuvPixel2(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1);
     b0 = b0 >> 4;
     g0 = g0 >> 4;
     r0 = r0 >> 4;
@@ -587,13 +672,13 @@ void I422ToARGB4444Row_C(const uint8* y_buf,
     r1 = r1 >> 4;
     *reinterpret_cast<uint32*>(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) |
         (b1 << 16) | (g1 << 20) | (r1 << 24) | 0xf000f000;
-    y_buf += 2;
-    u_buf += 1;
-    v_buf += 1;
+    src_y += 2;
+    src_u += 1;
+    src_v += 1;
     dst_argb4444 += 4;  // Advance 2 pixels.
   }
   if (width & 1) {
-    YuvPixel2(y_buf[0], u_buf[0], v_buf[0], &b0, &g0, &r0);
+    YuvPixel2(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
     b0 = b0 >> 4;
     g0 = g0 >> 4;
     r0 = r0 >> 4;
@@ -602,9 +687,9 @@ void I422ToARGB4444Row_C(const uint8* y_buf,
   }
 }
 
-void I422ToARGB1555Row_C(const uint8* y_buf,
-                         const uint8* u_buf,
-                         const uint8* v_buf,
+void I422ToARGB1555Row_C(const uint8* src_y,
+                         const uint8* src_u,
+                         const uint8* src_v,
                          uint8* dst_argb1555,
                          int width) {
   uint8 b0;
@@ -614,8 +699,8 @@ void I422ToARGB1555Row_C(const uint8* y_buf,
   uint8 g1;
   uint8 r1;
   for (int x = 0; x < width - 1; x += 2) {
-    YuvPixel2(y_buf[0], u_buf[0], v_buf[0], &b0, &g0, &r0);
-    YuvPixel2(y_buf[1], u_buf[0], v_buf[0], &b1, &g1, &r1);
+    YuvPixel2(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
+    YuvPixel2(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1);
     b0 = b0 >> 3;
     g0 = g0 >> 3;
     r0 = r0 >> 3;
@@ -624,13 +709,13 @@ void I422ToARGB1555Row_C(const uint8* y_buf,
     r1 = r1 >> 3;
     *reinterpret_cast<uint32*>(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) |
         (b1 << 16) | (g1 << 21) | (r1 << 26) | 0x80008000;
-    y_buf += 2;
-    u_buf += 1;
-    v_buf += 1;
+    src_y += 2;
+    src_u += 1;
+    src_v += 1;
     dst_argb1555 += 4;  // Advance 2 pixels.
   }
   if (width & 1) {
-    YuvPixel2(y_buf[0], u_buf[0], v_buf[0], &b0, &g0, &r0);
+    YuvPixel2(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
     b0 = b0 >> 3;
     g0 = g0 >> 3;
     r0 = r0 >> 3;
@@ -639,9 +724,9 @@ void I422ToARGB1555Row_C(const uint8* y_buf,
   }
 }
 
-void I422ToRGB565Row_C(const uint8* y_buf,
-                      const uint8* u_buf,
-                      const uint8* v_buf,
+void I422ToRGB565Row_C(const uint8* src_y,
+                      const uint8* src_u,
+                      const uint8* src_v,
                       uint8* dst_rgb565,
                       int width) {
   uint8 b0;
@@ -651,8 +736,8 @@ void I422ToRGB565Row_C(const uint8* y_buf,
   uint8 g1;
   uint8 r1;
   for (int x = 0; x < width - 1; x += 2) {
-    YuvPixel2(y_buf[0], u_buf[0], v_buf[0], &b0, &g0, &r0);
-    YuvPixel2(y_buf[1], u_buf[0], v_buf[0], &b1, &g1, &r1);
+    YuvPixel2(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
+    YuvPixel2(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1);
     b0 = b0 >> 3;
     g0 = g0 >> 2;
     r0 = r0 >> 3;
@@ -661,13 +746,13 @@ void I422ToRGB565Row_C(const uint8* y_buf,
     r1 = r1 >> 3;
     *reinterpret_cast<uint32*>(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) |
         (b1 << 16) | (g1 << 21) | (r1 << 27);
-    y_buf += 2;
-    u_buf += 1;
-    v_buf += 1;
+    src_y += 2;
+    src_u += 1;
+    src_v += 1;
     dst_rgb565 += 4;  // Advance 2 pixels.
   }
   if (width & 1) {
-    YuvPixel2(y_buf[0], u_buf[0], v_buf[0], &b0, &g0, &r0);
+    YuvPixel2(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
     b0 = b0 >> 3;
     g0 = g0 >> 2;
     r0 = r0 >> 3;
@@ -675,66 +760,66 @@ void I422ToRGB565Row_C(const uint8* y_buf,
   }
 }
 
-void I411ToARGBRow_C(const uint8* y_buf,
-                     const uint8* u_buf,
-                     const uint8* v_buf,
+void I411ToARGBRow_C(const uint8* src_y,
+                     const uint8* src_u,
+                     const uint8* src_v,
                      uint8* rgb_buf,
                      int width) {
   for (int x = 0; x < width - 3; x += 4) {
-    YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 16, 8, 0);
-    YuvPixel(y_buf[1], u_buf[0], v_buf[0], rgb_buf + 4, 24, 16, 8, 0);
-    YuvPixel(y_buf[2], u_buf[0], v_buf[0], rgb_buf + 8, 24, 16, 8, 0);
-    YuvPixel(y_buf[3], u_buf[0], v_buf[0], rgb_buf + 12, 24, 16, 8, 0);
-    y_buf += 4;
-    u_buf += 1;
-    v_buf += 1;
+    YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, 24, 16, 8, 0);
+    YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, 24, 16, 8, 0);
+    YuvPixel(src_y[2], src_u[0], src_v[0], rgb_buf + 8, 24, 16, 8, 0);
+    YuvPixel(src_y[3], src_u[0], src_v[0], rgb_buf + 12, 24, 16, 8, 0);
+    src_y += 4;
+    src_u += 1;
+    src_v += 1;
     rgb_buf += 16;  // Advance 4 pixels.
   }
   if (width & 2) {
-    YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 16, 8, 0);
-    YuvPixel(y_buf[1], u_buf[0], v_buf[0], rgb_buf + 4, 24, 16, 8, 0);
-    y_buf += 2;
+    YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, 24, 16, 8, 0);
+    YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, 24, 16, 8, 0);
+    src_y += 2;
     rgb_buf += 8;  // Advance 2 pixels.
   }
   if (width & 1) {
-    YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 16, 8, 0);
+    YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, 24, 16, 8, 0);
   }
 }
 
-void NV12ToARGBRow_C(const uint8* y_buf,
-                     const uint8* uv_buf,
+void NV12ToARGBRow_C(const uint8* src_y,
+                     const uint8* usrc_v,
                      uint8* rgb_buf,
                      int width) {
   for (int x = 0; x < width - 1; x += 2) {
-    YuvPixel(y_buf[0], uv_buf[0], uv_buf[1], rgb_buf + 0, 24, 16, 8, 0);
-    YuvPixel(y_buf[1], uv_buf[0], uv_buf[1], rgb_buf + 4, 24, 16, 8, 0);
-    y_buf += 2;
-    uv_buf += 2;
+    YuvPixel(src_y[0], usrc_v[0], usrc_v[1], rgb_buf + 0, 24, 16, 8, 0);
+    YuvPixel(src_y[1], usrc_v[0], usrc_v[1], rgb_buf + 4, 24, 16, 8, 0);
+    src_y += 2;
+    usrc_v += 2;
     rgb_buf += 8;  // Advance 2 pixels.
   }
   if (width & 1) {
-    YuvPixel(y_buf[0], uv_buf[0], uv_buf[1], rgb_buf + 0, 24, 16, 8, 0);
+    YuvPixel(src_y[0], usrc_v[0], usrc_v[1], rgb_buf + 0, 24, 16, 8, 0);
   }
 }
 
-void NV21ToARGBRow_C(const uint8* y_buf,
-                     const uint8* vu_buf,
+void NV21ToARGBRow_C(const uint8* src_y,
+                     const uint8* src_vu,
                      uint8* rgb_buf,
                      int width) {
   for (int x = 0; x < width - 1; x += 2) {
-    YuvPixel(y_buf[0], vu_buf[1], vu_buf[0], rgb_buf + 0, 24, 16, 8, 0);
-    YuvPixel(y_buf[1], vu_buf[1], vu_buf[0], rgb_buf + 4, 24, 16, 8, 0);
-    y_buf += 2;
-    vu_buf += 2;
+    YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, 24, 16, 8, 0);
+    YuvPixel(src_y[1], src_vu[1], src_vu[0], rgb_buf + 4, 24, 16, 8, 0);
+    src_y += 2;
+    src_vu += 2;
     rgb_buf += 8;  // Advance 2 pixels.
   }
   if (width & 1) {
-    YuvPixel(y_buf[0], vu_buf[1], vu_buf[0], rgb_buf + 0, 24, 16, 8, 0);
+    YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, 24, 16, 8, 0);
   }
 }
 
-void NV12ToRGB565Row_C(const uint8* y_buf,
-                       const uint8* uv_buf,
+void NV12ToRGB565Row_C(const uint8* src_y,
+                       const uint8* usrc_v,
                        uint8* dst_rgb565,
                        int width) {
   uint8 b0;
@@ -744,8 +829,8 @@ void NV12ToRGB565Row_C(const uint8* y_buf,
   uint8 g1;
   uint8 r1;
   for (int x = 0; x < width - 1; x += 2) {
-    YuvPixel2(y_buf[0], uv_buf[0], uv_buf[1], &b0, &g0, &r0);
-    YuvPixel2(y_buf[1], uv_buf[0], uv_buf[1], &b1, &g1, &r1);
+    YuvPixel2(src_y[0], usrc_v[0], usrc_v[1], &b0, &g0, &r0);
+    YuvPixel2(src_y[1], usrc_v[0], usrc_v[1], &b1, &g1, &r1);
     b0 = b0 >> 3;
     g0 = g0 >> 2;
     r0 = r0 >> 3;
@@ -754,12 +839,12 @@ void NV12ToRGB565Row_C(const uint8* y_buf,
     r1 = r1 >> 3;
     *reinterpret_cast<uint32*>(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) |
         (b1 << 16) | (g1 << 21) | (r1 << 27);
-    y_buf += 2;
-    uv_buf += 2;
+    src_y += 2;
+    usrc_v += 2;
     dst_rgb565 += 4;  // Advance 2 pixels.
   }
   if (width & 1) {
-    YuvPixel2(y_buf[0], uv_buf[0], uv_buf[1], &b0, &g0, &r0);
+    YuvPixel2(src_y[0], usrc_v[0], usrc_v[1], &b0, &g0, &r0);
     b0 = b0 >> 3;
     g0 = g0 >> 2;
     r0 = r0 >> 3;
@@ -767,8 +852,8 @@ void NV12ToRGB565Row_C(const uint8* y_buf,
   }
 }
 
-void NV21ToRGB565Row_C(const uint8* y_buf,
-                       const uint8* vu_buf,
+void NV21ToRGB565Row_C(const uint8* src_y,
+                       const uint8* vsrc_u,
                        uint8* dst_rgb565,
                        int width) {
   uint8 b0;
@@ -778,8 +863,8 @@ void NV21ToRGB565Row_C(const uint8* y_buf,
   uint8 g1;
   uint8 r1;
   for (int x = 0; x < width - 1; x += 2) {
-    YuvPixel2(y_buf[0], vu_buf[1], vu_buf[0], &b0, &g0, &r0);
-    YuvPixel2(y_buf[1], vu_buf[1], vu_buf[0], &b1, &g1, &r1);
+    YuvPixel2(src_y[0], vsrc_u[1], vsrc_u[0], &b0, &g0, &r0);
+    YuvPixel2(src_y[1], vsrc_u[1], vsrc_u[0], &b1, &g1, &r1);
     b0 = b0 >> 3;
     g0 = g0 >> 2;
     r0 = r0 >> 3;
@@ -788,12 +873,12 @@ void NV21ToRGB565Row_C(const uint8* y_buf,
     r1 = r1 >> 3;
     *reinterpret_cast<uint32*>(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) |
         (b1 << 16) | (g1 << 21) | (r1 << 27);
-    y_buf += 2;
-    vu_buf += 2;
+    src_y += 2;
+    vsrc_u += 2;
     dst_rgb565 += 4;  // Advance 2 pixels.
   }
   if (width & 1) {
-    YuvPixel2(y_buf[0], vu_buf[1], vu_buf[0], &b0, &g0, &r0);
+    YuvPixel2(src_y[0], vsrc_u[1], vsrc_u[0], &b0, &g0, &r0);
     b0 = b0 >> 3;
     g0 = g0 >> 2;
     r0 = r0 >> 3;
@@ -801,92 +886,92 @@ void NV21ToRGB565Row_C(const uint8* y_buf,
   }
 }
 
-void YUY2ToARGBRow_C(const uint8* yuy2_buf,
+void YUY2ToARGBRow_C(const uint8* src_yuy2,
                      uint8* rgb_buf,
                      int width) {
   for (int x = 0; x < width - 1; x += 2) {
-    YuvPixel(yuy2_buf[0], yuy2_buf[1], yuy2_buf[3], rgb_buf + 0, 24, 16, 8, 0);
-    YuvPixel(yuy2_buf[2], yuy2_buf[1], yuy2_buf[3], rgb_buf + 4, 24, 16, 8, 0);
-    yuy2_buf += 4;
+    YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, 24, 16, 8, 0);
+    YuvPixel(src_yuy2[2], src_yuy2[1], src_yuy2[3], rgb_buf + 4, 24, 16, 8, 0);
+    src_yuy2 += 4;
     rgb_buf += 8;  // Advance 2 pixels.
   }
   if (width & 1) {
-    YuvPixel(yuy2_buf[0], yuy2_buf[1], yuy2_buf[3], rgb_buf + 0, 24, 16, 8, 0);
+    YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, 24, 16, 8, 0);
   }
 }
 
-void UYVYToARGBRow_C(const uint8* uyvy_buf,
+void UYVYToARGBRow_C(const uint8* src_uyvy,
                      uint8* rgb_buf,
                      int width) {
   for (int x = 0; x < width - 1; x += 2) {
-    YuvPixel(uyvy_buf[1], uyvy_buf[0], uyvy_buf[2], rgb_buf + 0, 24, 16, 8, 0);
-    YuvPixel(uyvy_buf[3], uyvy_buf[0], uyvy_buf[2], rgb_buf + 4, 24, 16, 8, 0);
-    uyvy_buf += 4;
+    YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, 24, 16, 8, 0);
+    YuvPixel(src_uyvy[3], src_uyvy[0], src_uyvy[2], rgb_buf + 4, 24, 16, 8, 0);
+    src_uyvy += 4;
     rgb_buf += 8;  // Advance 2 pixels.
   }
   if (width & 1) {
-    YuvPixel(uyvy_buf[1], uyvy_buf[0], uyvy_buf[2], rgb_buf + 0, 24, 16, 8, 0);
+    YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, 24, 16, 8, 0);
   }
 }
 
-void I422ToBGRARow_C(const uint8* y_buf,
-                     const uint8* u_buf,
-                     const uint8* v_buf,
+void I422ToBGRARow_C(const uint8* src_y,
+                     const uint8* src_u,
+                     const uint8* src_v,
                      uint8* rgb_buf,
                      int width) {
   for (int x = 0; x < width - 1; x += 2) {
-    YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 0, 8, 16, 24);
-    YuvPixel(y_buf[1], u_buf[0], v_buf[0], rgb_buf + 4, 0, 8, 16, 24);
-    y_buf += 2;
-    u_buf += 1;
-    v_buf += 1;
+    YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, 0, 8, 16, 24);
+    YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, 0, 8, 16, 24);
+    src_y += 2;
+    src_u += 1;
+    src_v += 1;
     rgb_buf += 8;  // Advance 2 pixels.
   }
   if (width & 1) {
-    YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf, 0, 8, 16, 24);
+    YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf, 0, 8, 16, 24);
   }
 }
 
-void I422ToABGRRow_C(const uint8* y_buf,
-                     const uint8* u_buf,
-                     const uint8* v_buf,
+void I422ToABGRRow_C(const uint8* src_y,
+                     const uint8* src_u,
+                     const uint8* src_v,
                      uint8* rgb_buf,
                      int width) {
   for (int x = 0; x < width - 1; x += 2) {
-    YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 0, 8, 16);
-    YuvPixel(y_buf[1], u_buf[0], v_buf[0], rgb_buf + 4, 24, 0, 8, 16);
-    y_buf += 2;
-    u_buf += 1;
-    v_buf += 1;
+    YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, 24, 0, 8, 16);
+    YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, 24, 0, 8, 16);
+    src_y += 2;
+    src_u += 1;
+    src_v += 1;
     rgb_buf += 8;  // Advance 2 pixels.
   }
   if (width & 1) {
-    YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 0, 8, 16);
+    YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, 24, 0, 8, 16);
   }
 }
 
-void I422ToRGBARow_C(const uint8* y_buf,
-                     const uint8* u_buf,
-                     const uint8* v_buf,
+void I422ToRGBARow_C(const uint8* src_y,
+                     const uint8* src_u,
+                     const uint8* src_v,
                      uint8* rgb_buf,
                      int width) {
   for (int x = 0; x < width - 1; x += 2) {
-    YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 0, 24, 16, 8);
-    YuvPixel(y_buf[1], u_buf[0], v_buf[0], rgb_buf + 4, 0, 24, 16, 8);
-    y_buf += 2;
-    u_buf += 1;
-    v_buf += 1;
+    YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, 0, 24, 16, 8);
+    YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, 0, 24, 16, 8);
+    src_y += 2;
+    src_u += 1;
+    src_v += 1;
     rgb_buf += 8;  // Advance 2 pixels.
   }
   if (width & 1) {
-    YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 0, 24, 16, 8);
+    YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, 0, 24, 16, 8);
   }
 }
 
-void YToARGBRow_C(const uint8* y_buf, uint8* rgb_buf, int width) {
+void YToARGBRow_C(const uint8* src_y, uint8* rgb_buf, int width) {
   for (int x = 0; x < width; ++x) {
-    YuvPixel(y_buf[0], 128, 128, rgb_buf, 24, 16, 8, 0);
-    y_buf += 1;
+    YuvPixel(src_y[0], 128, 128, rgb_buf, 24, 16, 8, 0);
+    src_y += 1;
     rgb_buf += 4;  // Advance 1 pixel.
   }
 }
@@ -903,7 +988,7 @@ void MirrorRow_C(const uint8* src, uint8* dst, int width) {
   }
 }
 
-void MirrorRowUV_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
+void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
   src_uv += (width - 1) << 1;
   for (int x = 0; x < width - 1; x += 2) {
     dst_u[x] = src_uv[0];
@@ -1399,35 +1484,35 @@ void I422ToUYVYRow_C(const uint8* src_y,
 // row_win.cc has asm version, but GCC uses 2 step wrapper.  5% slower.
 // TODO(fbarchard): Handle width > kMaxStride here instead of calling code.
 #if defined(__x86_64__) || defined(__i386__)
-void I422ToRGB565Row_SSSE3(const uint8* y_buf,
-                           const uint8* u_buf,
-                           const uint8* v_buf,
+void I422ToRGB565Row_SSSE3(const uint8* src_y,
+                           const uint8* src_u,
+                           const uint8* src_v,
                            uint8* rgb_buf,
                            int width) {
   SIMD_ALIGNED(uint8 row[kMaxStride]);
-  I422ToARGBRow_SSSE3(y_buf, u_buf, v_buf, row, width);
+  I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, width);
   ARGBToRGB565Row_SSE2(row, rgb_buf, width);
 }
 #endif  // defined(__x86_64__) || defined(__i386__)
 
 #if defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)
-void I422ToARGB1555Row_SSSE3(const uint8* y_buf,
-                             const uint8* u_buf,
-                             const uint8* v_buf,
+void I422ToARGB1555Row_SSSE3(const uint8* src_y,
+                             const uint8* src_u,
+                             const uint8* src_v,
                              uint8* rgb_buf,
                              int width) {
   SIMD_ALIGNED(uint8 row[kMaxStride]);
-  I422ToARGBRow_SSSE3(y_buf, u_buf, v_buf, row, width);
+  I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, width);
   ARGBToARGB1555Row_SSE2(row, rgb_buf, width);
 }
 
-void I422ToARGB4444Row_SSSE3(const uint8* y_buf,
-                             const uint8* u_buf,
-                             const uint8* v_buf,
+void I422ToARGB4444Row_SSSE3(const uint8* src_y,
+                             const uint8* src_u,
+                             const uint8* src_v,
                              uint8* rgb_buf,
                              int width) {
   SIMD_ALIGNED(uint8 row[kMaxStride]);
-  I422ToARGBRow_SSSE3(y_buf, u_buf, v_buf, row, width);
+  I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, width);
   ARGBToARGB4444Row_SSE2(row, rgb_buf, width);
 }
 
@@ -1452,45 +1537,45 @@ void NV21ToRGB565Row_SSSE3(const uint8* src_y,
 void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2,
                          uint8* dst_argb,
                          int width) {
-  SIMD_ALIGNED(uint8 rowy[kMaxStride]);
-  SIMD_ALIGNED(uint8 rowu[kMaxStride]);
-  SIMD_ALIGNED(uint8 rowv[kMaxStride]);
-  YUY2ToUV422Row_SSE2(src_yuy2, rowu, rowv, width);
-  YUY2ToYRow_SSE2(src_yuy2, rowy, width);
-  I422ToARGBRow_SSSE3(rowy, rowu, rowv, dst_argb, width);
+  SIMD_ALIGNED(uint8 row_y[kMaxStride]);
+  SIMD_ALIGNED(uint8 row_u[kMaxStride / 2]);
+  SIMD_ALIGNED(uint8 row_v[kMaxStride / 2]);
+  YUY2ToUV422Row_SSE2(src_yuy2, row_u, row_v, width);
+  YUY2ToYRow_SSE2(src_yuy2, row_y, width);
+  I422ToARGBRow_SSSE3(row_y, row_u, row_v, dst_argb, width);
 }
 
 void YUY2ToARGBRow_Unaligned_SSSE3(const uint8* src_yuy2,
                                    uint8* dst_argb,
                                    int width) {
-  SIMD_ALIGNED(uint8 rowy[kMaxStride]);
-  SIMD_ALIGNED(uint8 rowu[kMaxStride]);
-  SIMD_ALIGNED(uint8 rowv[kMaxStride]);
-  YUY2ToUV422Row_Unaligned_SSE2(src_yuy2, rowu, rowv, width);
-  YUY2ToYRow_Unaligned_SSE2(src_yuy2, rowy, width);
-  I422ToARGBRow_Unaligned_SSSE3(rowy, rowu, rowv, dst_argb, width);
+  SIMD_ALIGNED(uint8 row_y[kMaxStride]);
+  SIMD_ALIGNED(uint8 row_u[kMaxStride / 2]);
+  SIMD_ALIGNED(uint8 row_v[kMaxStride / 2]);
+  YUY2ToUV422Row_Unaligned_SSE2(src_yuy2, row_u, row_v, width);
+  YUY2ToYRow_Unaligned_SSE2(src_yuy2, row_y, width);
+  I422ToARGBRow_Unaligned_SSSE3(row_y, row_u, row_v, dst_argb, width);
 }
 
 void UYVYToARGBRow_SSSE3(const uint8* src_uyvy,
                          uint8* dst_argb,
                          int width) {
-  SIMD_ALIGNED(uint8 rowy[kMaxStride]);
-  SIMD_ALIGNED(uint8 rowu[kMaxStride]);
-  SIMD_ALIGNED(uint8 rowv[kMaxStride]);
-  UYVYToUV422Row_SSE2(src_uyvy, rowu, rowv, width);
-  UYVYToYRow_SSE2(src_uyvy, rowy, width);
-  I422ToARGBRow_SSSE3(rowy, rowu, rowv, dst_argb, width);
+  SIMD_ALIGNED(uint8 row_y[kMaxStride]);
+  SIMD_ALIGNED(uint8 row_u[kMaxStride / 2]);
+  SIMD_ALIGNED(uint8 row_v[kMaxStride / 2]);
+  UYVYToUV422Row_SSE2(src_uyvy, row_u, row_v, width);
+  UYVYToYRow_SSE2(src_uyvy, row_y, width);
+  I422ToARGBRow_SSSE3(row_y, row_u, row_v, dst_argb, width);
 }
 
 void UYVYToARGBRow_Unaligned_SSSE3(const uint8* src_uyvy,
                                    uint8* dst_argb,
                                    int width) {
-  SIMD_ALIGNED(uint8 rowy[kMaxStride]);
-  SIMD_ALIGNED(uint8 rowu[kMaxStride]);
-  SIMD_ALIGNED(uint8 rowv[kMaxStride]);
-  UYVYToUV422Row_Unaligned_SSE2(src_uyvy, rowu, rowv, width);
-  UYVYToYRow_Unaligned_SSE2(src_uyvy, rowy, width);
-  I422ToARGBRow_Unaligned_SSSE3(rowy, rowu, rowv, dst_argb, width);
+  SIMD_ALIGNED(uint8 row_y[kMaxStride]);
+  SIMD_ALIGNED(uint8 row_u[kMaxStride / 2]);
+  SIMD_ALIGNED(uint8 row_v[kMaxStride / 2]);
+  UYVYToUV422Row_Unaligned_SSE2(src_uyvy, row_u, row_v, width);
+  UYVYToYRow_Unaligned_SSE2(src_uyvy, row_y, width);
+  I422ToARGBRow_Unaligned_SSSE3(row_y, row_u, row_v, dst_argb, width);
 }
 
 #endif  // defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)
diff --git a/source/row_mips.cc b/source/row_mips.cc
index df4542fbf..48759e09d 100644
--- a/source/row_mips.cc
+++ b/source/row_mips.cc
@@ -225,8 +225,8 @@ void MirrorRow_MIPS_DSPR2(const uint8* src, uint8* dst, int width) {
 }
 #endif  // HAS_MIRRORROW_MIPS_DSPR2
 
-#ifdef HAS_MIRRORROWUV_MIPS_DSPR2
-void MirrorRowUV_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
+#ifdef HAS_MirrorUVRow_MIPS_DSPR2
+void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
                             int width) {
   int x = 0;
   int y = 0;
@@ -315,7 +315,7 @@ void MirrorRowUV_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
         "t5", "t7", "t8", "t9"
   );
 }
-#endif  // HAS_MIRRORROWUV_MIPS_DSPR2
+#endif  // HAS_MirrorUVRow_MIPS_DSPR2
 
 
 
diff --git a/source/row_neon.cc b/source/row_neon.cc
index 0014e5df6..2c6643f18 100644
--- a/source/row_neon.cc
+++ b/source/row_neon.cc
@@ -629,9 +629,9 @@ void NV21ToARGBRow_NEON(const uint8* src_y,
 
 #ifdef HAS_NV12TORGB565ROW_NEON
 void NV12ToRGB565Row_NEON(const uint8* src_y,
-                        const uint8* src_uv,
-                        uint8* dst_rgb565,
-                        int width) {
+                          const uint8* src_uv,
+                          uint8* dst_rgb565,
+                          int width) {
   asm volatile (
     "vld1.u8    {d24}, [%4]                    \n"
     "vld1.u8    {d25}, [%5]                    \n"
@@ -660,9 +660,9 @@ void NV12ToRGB565Row_NEON(const uint8* src_y,
 
 #ifdef HAS_NV21TORGB565ROW_NEON
 void NV21ToRGB565Row_NEON(const uint8* src_y,
-                        const uint8* src_uv,
-                        uint8* dst_rgb565,
-                        int width) {
+                          const uint8* src_uv,
+                          uint8* dst_rgb565,
+                          int width) {
   asm volatile (
     "vld1.u8    {d24}, [%4]                    \n"
     "vld1.u8    {d25}, [%5]                    \n"
@@ -955,8 +955,8 @@ void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
 }
 #endif  // HAS_MIRRORROW_NEON
 
-#ifdef HAS_MIRRORROWUV_NEON
-void MirrorRowUV_NEON(const uint8* src, uint8* dst_a, uint8* dst_b, int width) {
+#ifdef HAS_MirrorUVRow_NEON
+void MirrorUVRow_NEON(const uint8* src, uint8* dst_a, uint8* dst_b, int width) {
   asm volatile (
     // compute where to start writing destination
     "add         %1, %3                        \n"  // dst_a + width
@@ -1013,7 +1013,7 @@ void MirrorRowUV_NEON(const uint8* src, uint8* dst_a, uint8* dst_b, int width) {
     : "memory", "cc", "r12", "q0"
   );
 }
-#endif  // HAS_MIRRORROWUV_NEON
+#endif  // HAS_MirrorUVRow_NEON
 
 #ifdef HAS_BGRATOARGBROW_NEON
 void BGRAToARGBRow_NEON(const uint8* src_bgra, uint8* dst_argb, int pix) {
@@ -1112,6 +1112,41 @@ void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int pix) {
 }
 #endif  // HAS_RAWTOARGBROW_NEON
 
+#ifdef HAS_RGB565TOARGBROW_NEON
+
+#define RGB565TOARGB                                                           \
+    "vmovn.u16  d4, q0                         \n"  /* B xxxBBBBB           */ \
+    "vshrn.u16  d5, q0, #5                     \n"  /* G xxGGGGGG           */ \
+    "vshrn.u16  d6, q0, #8                     \n"  /* R RRRRRxxx           */ \
+    "vshl.u8    d0, d4, #3                     \n"  /* B BBBBB000 upper 5   */ \
+    "vshl.u8    d1, d5, #2                     \n"  /* G GGGGGG00 upper 6   */ \
+    "vbic.u8    d2, d6, d7                     \n"  /* R RRRRR000 upper 5   */ \
+    "vshr.u8    d4, d0, #5                     \n"  /* B 00000BBB lower 3   */ \
+    "vshr.u8    d5, d1, #6                     \n"  /* G 000000GG lower 2   */ \
+    "vshr.u8    d6, d2, #5                     \n"  /* R 00000RRR lower 3   */ \
+    "vorr.u8    q0, q0, q2                     \n"  /* B,G                  */ \
+    "vorr.u8    d2, d2, d6                     \n"  /* R                    */
+
+void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int pix) {
+  asm volatile (
+    "vmov.u8    d3, #255                       \n"  // Alpha
+    "vmov.u8    d7, #7                         \n"  // 5 bit mask
+    ".p2align  2                               \n"
+  "1:                                          \n"
+    "vld1.8     {q0}, [%0]!                    \n"  // load 8 pixels of RGB565.
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
+    RGB565TOARGB
+    "vst4.8     {d0, d1, d2, d3}, [%1]!        \n"  // store 8 pixels of ARGB.
+    "bgt        1b                             \n"
+  : "+r"(src_rgb565),  // %0
+    "+r"(dst_argb),    // %1
+    "+r"(pix)          // %2
+  :
+  : "memory", "cc", "q0", "q1", "q2", "q3"  // Clobber List
+  );
+}
+#endif  // HAS_RGB565TOARGBROW_NEON
+
 #ifdef HAS_ARGBTORGBAROW_NEON
 void ARGBToRGBARow_NEON(const uint8* src_argb, uint8* dst_rgba, int pix) {
   asm volatile (
@@ -1436,9 +1471,9 @@ void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_argb4444,
     ARGBTOARGB4444
     "vst1.8     {q0}, [%1]!                    \n"  // store 8 pixels ARGB4444.
     "bgt        1b                             \n"
-  : "+r"(src_argb),  // %0
+  : "+r"(src_argb),      // %0
     "+r"(dst_argb4444),  // %1
-    "+r"(pix)        // %2
+    "+r"(pix)            // %2
   :
   : "memory", "cc", "q0", "q8", "q9", "q10", "q11"
   );
@@ -1447,6 +1482,117 @@ void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_argb4444,
 
 #ifdef HAS_ARGBTOYROW_NEON
 void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) {
+  asm volatile (
+    "vmov.u8    d24, #13                       \n"  // B * 0.1016 coefficient
+    "vmov.u8    d25, #65                       \n"  // G * 0.5078 coefficient
+    "vmov.u8    d26, #33                       \n"  // R * 0.2578 coefficient
+    "vmov.u8    d27, #16                       \n"  // Add 16 constant
+    ".p2align  2                               \n"
+  "1:                                          \n"
+    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 8 pixels of ARGB.
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
+    "vmull.u8   q2, d0, d24                    \n"  // B
+    "vmlal.u8   q2, d1, d25                    \n"  // G
+    "vmlal.u8   q2, d2, d26                    \n"  // R
+    "vqrshrun.s16 d0, q2, #7                   \n"  // 16 bit to 8 bit Y
+    "vqadd.u8   d0, d27                        \n"
+    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
+    "bgt        1b                             \n"
+  : "+r"(src_argb),  // %0
+    "+r"(dst_y),     // %1
+    "+r"(pix)        // %2
+  :
+  : "memory", "cc", "q0", "q1", "q2", "q12", "q13"
+  );
+}
+#endif  // HAS_ARGBTOYROW_NEON
+
+#ifdef HAS_RGB565TOYROW_NEON
+void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int pix) {
+  asm volatile (
+    "vmov.u8    d7, #7                         \n"  // 5 bit mask
+    "vmov.u8    d24, #13                       \n"  // B * 0.1016 coefficient
+    "vmov.u8    d25, #65                       \n"  // G * 0.5078 coefficient
+    "vmov.u8    d26, #33                       \n"  // R * 0.2578 coefficient
+    "vmov.u8    d27, #16                       \n"  // Add 16 constant
+    ".p2align  2                               \n"
+  "1:                                          \n"
+    "vld1.8     {q0}, [%0]!                    \n"  // load 8 pixels of RGB565.
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
+    RGB565TOARGB
+    "vmull.u8   q2, d0, d24                    \n"  // B
+    "vmlal.u8   q2, d1, d25                    \n"  // G
+    "vmlal.u8   q2, d2, d26                    \n"  // R
+    "vqrshrun.s16 d0, q2, #7                   \n"  // 16 bit to 8 bit Y
+    "vqadd.u8   d0, d27                        \n"
+    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
+    "bgt        1b                             \n"
+  : "+r"(src_rgb565),  // %0
+    "+r"(dst_y),       // %1
+    "+r"(pix)          // %2
+  :
+  : "memory", "cc", "q0", "q1", "q2", "q3", "q12", "q13"
+  );
+}
+#endif  // HAS_RGB565TOYROW_NEON
+
+
+#ifdef HAS_BGRATOYROW_NEON
+void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int pix) {
+  asm volatile (
+    "vmov.u8    d4, #33                        \n"  // R * 0.2578 coefficient
+    "vmov.u8    d5, #65                        \n"  // G * 0.5078 coefficient
+    "vmov.u8    d6, #13                        \n"  // B * 0.1016 coefficient
+    "vmov.u8    d7, #16                        \n"  // Add 16 constant
+    ".p2align  2                               \n"
+  "1:                                          \n"
+    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 8 pixels of BGRA.
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
+    "vmull.u8   q8, d1, d4                     \n"  // R
+    "vmlal.u8   q8, d2, d5                     \n"  // G
+    "vmlal.u8   q8, d3, d6                     \n"  // B
+    "vqrshrun.s16 d0, q8, #7                   \n"  // 16 bit to 8 bit Y
+    "vqadd.u8   d0, d7                         \n"
+    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
+    "bgt        1b                             \n"
+  : "+r"(src_bgra),  // %0
+    "+r"(dst_y),     // %1
+    "+r"(pix)        // %2
+  :
+  : "memory", "cc", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"
+  );
+}
+#endif  // HAS_BGRATOYROW_NEON
+
+#ifdef HAS_ABGRTOYROW_NEON
+void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int pix) {
+  asm volatile (
+    "vmov.u8    d4, #33                        \n"  // R * 0.2578 coefficient
+    "vmov.u8    d5, #65                        \n"  // G * 0.5078 coefficient
+    "vmov.u8    d6, #13                        \n"  // B * 0.1016 coefficient
+    "vmov.u8    d7, #16                        \n"  // Add 16 constant
+    ".p2align  2                               \n"
+  "1:                                          \n"
+    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 8 pixels of ABGR.
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
+    "vmull.u8   q8, d0, d4                     \n"  // R
+    "vmlal.u8   q8, d1, d5                     \n"  // G
+    "vmlal.u8   q8, d2, d6                     \n"  // B
+    "vqrshrun.s16 d0, q8, #7                   \n"  // 16 bit to 8 bit Y
+    "vqadd.u8   d0, d7                         \n"
+    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
+    "bgt        1b                             \n"
+  : "+r"(src_abgr),  // %0
+    "+r"(dst_y),  // %1
+    "+r"(pix)        // %2
+  :
+  : "memory", "cc", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"
+  );
+}
+#endif  // HAS_ABGRTOYROW_NEON
+
+#ifdef HAS_RGBATOYROW_NEON
+void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int pix) {
   asm volatile (
     "vmov.u8    d4, #13                        \n"  // B * 0.1016 coefficient
     "vmov.u8    d5, #65                        \n"  // G * 0.5078 coefficient
@@ -1454,7 +1600,34 @@ void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) {
     "vmov.u8    d7, #16                        \n"  // Add 16 constant
     ".p2align  2                               \n"
   "1:                                          \n"
-    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 8 pixels of ARGB.
+    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 8 pixels of RGBA.
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
+    "vmull.u8   q8, d1, d4                     \n"  // B
+    "vmlal.u8   q8, d2, d5                     \n"  // G
+    "vmlal.u8   q8, d3, d6                     \n"  // R
+    "vqrshrun.s16 d0, q8, #7                   \n"  // 16 bit to 8 bit Y
+    "vqadd.u8   d0, d7                         \n"
+    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
+    "bgt        1b                             \n"
+  : "+r"(src_rgba),  // %0
+    "+r"(dst_y),  // %1
+    "+r"(pix)        // %2
+  :
+  : "memory", "cc", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"
+  );
+}
+#endif  // HAS_RGBATOYROW_NEON
+
+#ifdef HAS_RGB24TOYROW_NEON
+void RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int pix) {
+  asm volatile (
+    "vmov.u8    d4, #13                        \n"  // B * 0.1016 coefficient
+    "vmov.u8    d5, #65                        \n"  // G * 0.5078 coefficient
+    "vmov.u8    d6, #33                        \n"  // R * 0.2578 coefficient
+    "vmov.u8    d7, #16                        \n"  // Add 16 constant
+    ".p2align  2                               \n"
+  "1:                                          \n"
+    "vld3.8     {d0, d1, d2}, [%0]!            \n"  // load 8 pixels of RGB24.
     "subs       %2, %2, #8                     \n"  // 8 processed per loop.
     "vmull.u8   q8, d0, d4                     \n"  // B
     "vmlal.u8   q8, d1, d5                     \n"  // G
@@ -1463,14 +1636,41 @@ void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) {
     "vqadd.u8   d0, d7                         \n"
     "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
     "bgt        1b                             \n"
-  : "+r"(src_argb),  // %0
+  : "+r"(src_rgb24),  // %0
     "+r"(dst_y),  // %1
     "+r"(pix)        // %2
   :
   : "memory", "cc", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"
   );
 }
-#endif  // HAS_ARGBTOYROW_NEON
+#endif  // HAS_RGB24TOYROW_NEON
+
+#ifdef HAS_RAWTOYROW_NEON
+void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int pix) {
+  asm volatile (
+    "vmov.u8    d4, #33                        \n"  // R * 0.2578 coefficient
+    "vmov.u8    d5, #65                        \n"  // G * 0.5078 coefficient
+    "vmov.u8    d6, #13                        \n"  // B * 0.1016 coefficient
+    "vmov.u8    d7, #16                        \n"  // Add 16 constant
+    ".p2align  2                               \n"
+  "1:                                          \n"
+    "vld3.8     {d0, d1, d2}, [%0]!            \n"  // load 8 pixels of RAW.
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
+    "vmull.u8   q8, d0, d4                     \n"  // B
+    "vmlal.u8   q8, d1, d5                     \n"  // G
+    "vmlal.u8   q8, d2, d6                     \n"  // R
+    "vqrshrun.s16 d0, q8, #7                   \n"  // 16 bit to 8 bit Y
+    "vqadd.u8   d0, d7                         \n"
+    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
+    "bgt        1b                             \n"
+  : "+r"(src_raw),  // %0
+    "+r"(dst_y),  // %1
+    "+r"(pix)        // %2
+  :
+  : "memory", "cc", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"
+  );
+}
+#endif  // HAS_RAWTOYROW_NEON
 
 #endif  // __ARM_NEON__
 
diff --git a/source/row_posix.cc b/source/row_posix.cc
index 5e26005b7..62afc05a8 100644
--- a/source/row_posix.cc
+++ b/source/row_posix.cc
@@ -925,6 +925,120 @@ void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
   );
 }
 
+void ARGBToUV422Row_SSSE3(const uint8* src_argb0,
+                          uint8* dst_u, uint8* dst_v, int width) {
+  asm volatile (
+    "movdqa    %0,%%xmm4                       \n"
+    "movdqa    %1,%%xmm3                       \n"
+    "movdqa    %2,%%xmm5                       \n"
+  :
+  : "m"(kARGBToU),  // %0
+    "m"(kARGBToV),  // %1
+    "m"(kAddUV128)  // %2
+  );
+  asm volatile (
+    "sub       %1,%2                           \n"
+    ".p2align  4                               \n"
+  "1:                                          \n"
+    "movdqa    (%0),%%xmm0                     \n"
+    "movdqa    0x10(%0),%%xmm1                 \n"
+    "movdqa    0x20(%0),%%xmm2                 \n"
+    "movdqa    0x30(%0),%%xmm6                 \n"
+    "lea       0x40(%0),%0                     \n"
+    "movdqa    %%xmm0,%%xmm7                   \n"
+    "shufps    $0x88,%%xmm1,%%xmm0             \n"
+    "shufps    $0xdd,%%xmm1,%%xmm7             \n"
+    "pavgb     %%xmm7,%%xmm0                   \n"
+    "movdqa    %%xmm2,%%xmm7                   \n"
+    "shufps    $0x88,%%xmm6,%%xmm2             \n"
+    "shufps    $0xdd,%%xmm6,%%xmm7             \n"
+    "pavgb     %%xmm7,%%xmm2                   \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "movdqa    %%xmm2,%%xmm6                   \n"
+    "pmaddubsw %%xmm4,%%xmm0                   \n"
+    "pmaddubsw %%xmm4,%%xmm2                   \n"
+    "pmaddubsw %%xmm3,%%xmm1                   \n"
+    "pmaddubsw %%xmm3,%%xmm6                   \n"
+    "phaddw    %%xmm2,%%xmm0                   \n"
+    "phaddw    %%xmm6,%%xmm1                   \n"
+    "psraw     $0x8,%%xmm0                     \n"
+    "psraw     $0x8,%%xmm1                     \n"
+    "packsswb  %%xmm1,%%xmm0                   \n"
+    "paddb     %%xmm5,%%xmm0                   \n"
+    "sub       $0x10,%3                        \n"
+    "movlps    %%xmm0,(%1)                     \n"
+    "movhps    %%xmm0,(%1,%2,1)                \n"
+    "lea       0x8(%1),%1                      \n"
+    "jg        1b                              \n"
+  : "+r"(src_argb0),       // %0
+    "+r"(dst_u),           // %1
+    "+r"(dst_v),           // %2
+    "+rm"(width)           // %3
+  :
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
+#endif
+  );
+}
+
+void ARGBToUV422Row_Unaligned_SSSE3(const uint8* src_argb0,
+                                    uint8* dst_u, uint8* dst_v, int width) {
+  asm volatile (
+    "movdqa    %0,%%xmm4                       \n"
+    "movdqa    %1,%%xmm3                       \n"
+    "movdqa    %2,%%xmm5                       \n"
+  :
+  : "m"(kARGBToU),  // %0
+    "m"(kARGBToV),  // %1
+    "m"(kAddUV128)  // %2
+  );
+  asm volatile (
+    "sub       %1,%2                           \n"
+    ".p2align  4                               \n"
+  "1:                                          \n"
+    "movdqu    (%0),%%xmm0                     \n"
+    "movdqu    0x10(%0),%%xmm1                 \n"
+    "movdqu    0x20(%0),%%xmm2                 \n"
+    "movdqu    0x30(%0),%%xmm6                 \n"
+    "lea       0x40(%0),%0                     \n"
+    "movdqa    %%xmm0,%%xmm7                   \n"
+    "shufps    $0x88,%%xmm1,%%xmm0             \n"
+    "shufps    $0xdd,%%xmm1,%%xmm7             \n"
+    "pavgb     %%xmm7,%%xmm0                   \n"
+    "movdqa    %%xmm2,%%xmm7                   \n"
+    "shufps    $0x88,%%xmm6,%%xmm2             \n"
+    "shufps    $0xdd,%%xmm6,%%xmm7             \n"
+    "pavgb     %%xmm7,%%xmm2                   \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "movdqa    %%xmm2,%%xmm6                   \n"
+    "pmaddubsw %%xmm4,%%xmm0                   \n"
+    "pmaddubsw %%xmm4,%%xmm2                   \n"
+    "pmaddubsw %%xmm3,%%xmm1                   \n"
+    "pmaddubsw %%xmm3,%%xmm6                   \n"
+    "phaddw    %%xmm2,%%xmm0                   \n"
+    "phaddw    %%xmm6,%%xmm1                   \n"
+    "psraw     $0x8,%%xmm0                     \n"
+    "psraw     $0x8,%%xmm1                     \n"
+    "packsswb  %%xmm1,%%xmm0                   \n"
+    "paddb     %%xmm5,%%xmm0                   \n"
+    "sub       $0x10,%3                        \n"
+    "movlps    %%xmm0,(%1)                     \n"
+    "movhps    %%xmm0,(%1,%2,1)                \n"
+    "lea       0x8(%1),%1                      \n"
+    "jg        1b                              \n"
+  : "+r"(src_argb0),       // %0
+    "+r"(dst_u),           // %1
+    "+r"(dst_v),           // %2
+    "+rm"(width)           // %3
+  :
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
+#endif
+  );
+}
+
 void BGRAToYRow_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix) {
   asm volatile (
     "movdqa    %4,%%xmm5                       \n"
@@ -1652,7 +1766,7 @@ struct {
 void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf,
                                 const uint8* u_buf,
                                 const uint8* v_buf,
-                                uint8* argb_buf,
+                                uint8* dst_argb,
                                 int width) {
   asm volatile (
     "sub       %[u_buf],%[v_buf]               \n"
@@ -1688,7 +1802,7 @@ void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf,
 void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf,
                                  const uint8* u_buf,
                                  const uint8* v_buf,
-                                 uint8* rgb24_buf,
+                                 uint8* dst_rgb24,
                                  int width) {
 // fpic 32 bit gcc 4.2 on OSX runs out of GPR regs.
 #ifdef __APPLE__
@@ -1743,7 +1857,7 @@ void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf,
 void OMITFP I422ToRAWRow_SSSE3(const uint8* y_buf,
                                const uint8* u_buf,
                                const uint8* v_buf,
-                               uint8* raw_buf,
+                               uint8* dst_raw,
                                int width) {
 // fpic 32 bit gcc 4.2 on OSX runs out of GPR regs.
 #ifdef __APPLE__
@@ -1798,7 +1912,7 @@ void OMITFP I422ToRAWRow_SSSE3(const uint8* y_buf,
 void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf,
                                 const uint8* u_buf,
                                 const uint8* v_buf,
-                                uint8* argb_buf,
+                                uint8* dst_argb,
                                 int width) {
   asm volatile (
     "sub       %[u_buf],%[v_buf]               \n"
@@ -1834,7 +1948,7 @@ void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf,
 void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf,
                                 const uint8* u_buf,
                                 const uint8* v_buf,
-                                uint8* argb_buf,
+                                uint8* dst_argb,
                                 int width) {
   asm volatile (
     "sub       %[u_buf],%[v_buf]               \n"
@@ -1869,7 +1983,7 @@ void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf,
 
 void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf,
                                 const uint8* uv_buf,
-                                uint8* argb_buf,
+                                uint8* dst_argb,
                                 int width) {
   asm volatile (
     "pcmpeqb   %%xmm5,%%xmm5                   \n"
@@ -1901,8 +2015,8 @@ void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf,
 }
 
 void OMITFP NV21ToARGBRow_SSSE3(const uint8* y_buf,
-                                const uint8* vu_buf,
-                                uint8* argb_buf,
+                                const uint8* src_vu,
+                                uint8* dst_argb,
                                 int width) {
   asm volatile (
     "pcmpeqb   %%xmm5,%%xmm5                   \n"
@@ -1936,7 +2050,7 @@ void OMITFP NV21ToARGBRow_SSSE3(const uint8* y_buf,
 void OMITFP I444ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
                                           const uint8* u_buf,
                                           const uint8* v_buf,
-                                          uint8* argb_buf,
+                                          uint8* dst_argb,
                                           int width) {
   asm volatile (
     "sub       %[u_buf],%[v_buf]               \n"
@@ -1972,7 +2086,7 @@ void OMITFP I444ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
 void OMITFP I422ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
                                           const uint8* u_buf,
                                           const uint8* v_buf,
-                                          uint8* argb_buf,
+                                          uint8* dst_argb,
                                           int width) {
   asm volatile (
     "sub       %[u_buf],%[v_buf]               \n"
@@ -2008,7 +2122,7 @@ void OMITFP I422ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
 void OMITFP I411ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
                                           const uint8* u_buf,
                                           const uint8* v_buf,
-                                          uint8* argb_buf,
+                                          uint8* dst_argb,
                                           int width) {
   asm volatile (
     "sub       %[u_buf],%[v_buf]               \n"
@@ -2043,7 +2157,7 @@ void OMITFP I411ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
 
 void OMITFP NV12ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
                                           const uint8* uv_buf,
-                                          uint8* argb_buf,
+                                          uint8* dst_argb,
                                           int width) {
   asm volatile (
     "pcmpeqb   %%xmm5,%%xmm5                   \n"
@@ -2075,8 +2189,8 @@ void OMITFP NV12ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
 }
 
 void OMITFP NV21ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
-                                          const uint8* vu_buf,
-                                          uint8* argb_buf,
+                                          const uint8* src_vu,
+                                          uint8* dst_argb,
                                           int width) {
   asm volatile (
     "pcmpeqb   %%xmm5,%%xmm5                   \n"
@@ -2110,7 +2224,7 @@ void OMITFP NV21ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
 void OMITFP I422ToBGRARow_SSSE3(const uint8* y_buf,
                                 const uint8* u_buf,
                                 const uint8* v_buf,
-                                uint8* bgra_buf,
+                                uint8* dst_bgra,
                                 int width) {
   asm volatile (
     "sub       %[u_buf],%[v_buf]               \n"
@@ -2147,7 +2261,7 @@ void OMITFP I422ToBGRARow_SSSE3(const uint8* y_buf,
 void OMITFP I422ToABGRRow_SSSE3(const uint8* y_buf,
                                 const uint8* u_buf,
                                 const uint8* v_buf,
-                                uint8* abgr_buf,
+                                uint8* dst_abgr,
                                 int width) {
   asm volatile (
     "sub       %[u_buf],%[v_buf]               \n"
@@ -2183,7 +2297,7 @@ void OMITFP I422ToABGRRow_SSSE3(const uint8* y_buf,
 void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf,
                                 const uint8* u_buf,
                                 const uint8* v_buf,
-                                uint8* rgba_buf,
+                                uint8* dst_rgba,
                                 int width) {
   asm volatile (
     "sub       %[u_buf],%[v_buf]               \n"
@@ -2220,7 +2334,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf,
 void OMITFP I422ToBGRARow_Unaligned_SSSE3(const uint8* y_buf,
                                           const uint8* u_buf,
                                           const uint8* v_buf,
-                                          uint8* bgra_buf,
+                                          uint8* dst_bgra,
                                           int width) {
   asm volatile (
     "sub       %[u_buf],%[v_buf]               \n"
@@ -2257,7 +2371,7 @@ void OMITFP I422ToBGRARow_Unaligned_SSSE3(const uint8* y_buf,
 void OMITFP I422ToABGRRow_Unaligned_SSSE3(const uint8* y_buf,
                                           const uint8* u_buf,
                                           const uint8* v_buf,
-                                          uint8* abgr_buf,
+                                          uint8* dst_abgr,
                                           int width) {
   asm volatile (
     "sub       %[u_buf],%[v_buf]               \n"
@@ -2293,7 +2407,7 @@ void OMITFP I422ToABGRRow_Unaligned_SSSE3(const uint8* y_buf,
 void OMITFP I422ToRGBARow_Unaligned_SSSE3(const uint8* y_buf,
                                           const uint8* u_buf,
                                           const uint8* v_buf,
-                                          uint8* rgba_buf,
+                                          uint8* dst_rgba,
                                           int width) {
   asm volatile (
     "sub       %[u_buf],%[v_buf]               \n"
@@ -2446,7 +2560,7 @@ void MirrorRow_SSE2(const uint8* src, uint8* dst, int width) {
 CONST uvec8 kShuffleMirrorUV = {
   14u, 12u, 10u, 8u, 6u, 4u, 2u, 0u, 15u, 13u, 11u, 9u, 7u, 5u, 3u, 1u
 };
-void MirrorRowUV_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v,
+void MirrorUVRow_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v,
                        int width) {
   intptr_t temp_width = static_cast<intptr_t>(width);
   asm volatile (
diff --git a/source/row_win.cc b/source/row_win.cc
index c1b77cfc7..87da31699 100644
--- a/source/row_win.cc
+++ b/source/row_win.cc
@@ -1101,6 +1101,124 @@ __asm {
   }
 }
 
+__declspec(naked) __declspec(align(16))
+void ARGBToUV422Row_SSSE3(const uint8* src_argb0,
+                          uint8* dst_u, uint8* dst_v, int width) {
+__asm {
+    push       edi
+    mov        eax, [esp + 4 + 4]   // src_argb
+    mov        edx, [esp + 4 + 8]   // dst_u
+    mov        edi, [esp + 4 + 12]  // dst_v
+    mov        ecx, [esp + 4 + 16]  // pix
+    movdqa     xmm7, kARGBToU
+    movdqa     xmm6, kARGBToV
+    movdqa     xmm5, kAddUV128
+    sub        edi, edx             // stride from u to v
+
+    align      16
+ convertloop:
+    /* step 1 - subsample 16x2 argb pixels to 8x1 */
+    movdqa     xmm0, [eax]
+    movdqa     xmm1, [eax + 16]
+    movdqa     xmm2, [eax + 32]
+    movdqa     xmm3, [eax + 48]
+    lea        eax,  [eax + 64]
+    movdqa     xmm4, xmm0
+    shufps     xmm0, xmm1, 0x88
+    shufps     xmm4, xmm1, 0xdd
+    pavgb      xmm0, xmm4
+    movdqa     xmm4, xmm2
+    shufps     xmm2, xmm3, 0x88
+    shufps     xmm4, xmm3, 0xdd
+    pavgb      xmm2, xmm4
+
+    // step 2 - convert to U and V
+    // from here down is very similar to Y code except
+    // instead of 16 different pixels, its 8 pixels of U and 8 of V
+    movdqa     xmm1, xmm0
+    movdqa     xmm3, xmm2
+    pmaddubsw  xmm0, xmm7  // U
+    pmaddubsw  xmm2, xmm7
+    pmaddubsw  xmm1, xmm6  // V
+    pmaddubsw  xmm3, xmm6
+    phaddw     xmm0, xmm2
+    phaddw     xmm1, xmm3
+    psraw      xmm0, 8
+    psraw      xmm1, 8
+    packsswb   xmm0, xmm1
+    paddb      xmm0, xmm5            // -> unsigned
+
+    // step 3 - store 8 U and 8 V values
+    sub        ecx, 16
+    movlps     qword ptr [edx], xmm0 // U
+    movhps     qword ptr [edx + edi], xmm0 // V
+    lea        edx, [edx + 8]
+    jg         convertloop
+
+    pop        edi
+    ret
+  }
+}
+
+__declspec(naked) __declspec(align(16))
+void ARGBToUV422Row_Unaligned_SSSE3(const uint8* src_argb0,
+                                    uint8* dst_u, uint8* dst_v, int width) {
+__asm {
+    push       edi
+    mov        eax, [esp + 4 + 4]   // src_argb
+    mov        edx, [esp + 4 + 8]   // dst_u
+    mov        edi, [esp + 4 + 12]  // dst_v
+    mov        ecx, [esp + 4 + 16]  // pix
+    movdqa     xmm7, kARGBToU
+    movdqa     xmm6, kARGBToV
+    movdqa     xmm5, kAddUV128
+    sub        edi, edx             // stride from u to v
+
+    align      16
+ convertloop:
+    /* step 1 - subsample 16x2 argb pixels to 8x1 */
+    movdqu     xmm0, [eax]
+    movdqu     xmm1, [eax + 16]
+    movdqu     xmm2, [eax + 32]
+    movdqu     xmm3, [eax + 48]
+    lea        eax,  [eax + 64]
+    movdqa     xmm4, xmm0
+    shufps     xmm0, xmm1, 0x88
+    shufps     xmm4, xmm1, 0xdd
+    pavgb      xmm0, xmm4
+    movdqa     xmm4, xmm2
+    shufps     xmm2, xmm3, 0x88
+    shufps     xmm4, xmm3, 0xdd
+    pavgb      xmm2, xmm4
+
+    // step 2 - convert to U and V
+    // from here down is very similar to Y code except
+    // instead of 16 different pixels, its 8 pixels of U and 8 of V
+    movdqa     xmm1, xmm0
+    movdqa     xmm3, xmm2
+    pmaddubsw  xmm0, xmm7  // U
+    pmaddubsw  xmm2, xmm7
+    pmaddubsw  xmm1, xmm6  // V
+    pmaddubsw  xmm3, xmm6
+    phaddw     xmm0, xmm2
+    phaddw     xmm1, xmm3
+    psraw      xmm0, 8
+    psraw      xmm1, 8
+    packsswb   xmm0, xmm1
+    paddb      xmm0, xmm5            // -> unsigned
+
+    // step 3 - store 8 U and 8 V values
+    sub        ecx, 16
+    movlps     qword ptr [edx], xmm0 // U
+    movhps     qword ptr [edx + edi], xmm0 // V
+    lea        edx, [edx + 8]
+    jg         convertloop
+
+    pop        edi
+    ret
+  }
+}
+
 __declspec(naked) __declspec(align(16))
 void BGRAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
                        uint8* dst_u, uint8* dst_v, int width) {
@@ -1656,7 +1774,7 @@ __declspec(naked) __declspec(align(16))
 void I444ToARGBRow_SSSE3(const uint8* y_buf,
                          const uint8* u_buf,
                          const uint8* v_buf,
-                         uint8* argb_buf,
+                         uint8* dst_argb,
                          int width) {
   __asm {
     push       esi
@@ -1699,7 +1817,7 @@ __declspec(naked) __declspec(align(16))
 void I422ToRGB24Row_SSSE3(const uint8* y_buf,
                           const uint8* u_buf,
                           const uint8* v_buf,
-                          uint8* rgb24_buf,
+                          uint8* dst_rgb24,
                           int width) {
   __asm {
     push       esi
@@ -1746,7 +1864,7 @@ __declspec(naked) __declspec(align(16))
 void I422ToRAWRow_SSSE3(const uint8* y_buf,
                         const uint8* u_buf,
                         const uint8* v_buf,
-                        uint8* raw_buf,
+                        uint8* dst_raw,
                         int width) {
   __asm {
     push       esi
@@ -1866,7 +1984,7 @@ __declspec(naked) __declspec(align(16))
 void I422ToARGBRow_SSSE3(const uint8* y_buf,
                          const uint8* u_buf,
                          const uint8* v_buf,
-                         uint8* argb_buf,
+                         uint8* dst_argb,
                          int width) {
   __asm {
     push       esi
@@ -1910,7 +2028,7 @@ __declspec(naked) __declspec(align(16))
 void I411ToARGBRow_SSSE3(const uint8* y_buf,
                          const uint8* u_buf,
                          const uint8* v_buf,
-                         uint8* argb_buf,
+                         uint8* dst_argb,
                          int width) {
   __asm {
     push       esi
@@ -1952,7 +2070,7 @@ void I411ToARGBRow_SSSE3(const uint8* y_buf,
 __declspec(naked) __declspec(align(16))
 void NV12ToARGBRow_SSSE3(const uint8* y_buf,
                          const uint8* uv_buf,
-                         uint8* argb_buf,
+                         uint8* dst_argb,
                          int width) {
   __asm {
     push       esi
@@ -1990,7 +2108,7 @@ void NV12ToARGBRow_SSSE3(const uint8* y_buf,
 __declspec(naked) __declspec(align(16))
 void NV21ToARGBRow_SSSE3(const uint8* y_buf,
                          const uint8* uv_buf,
-                         uint8* argb_buf,
+                         uint8* dst_argb,
                          int width) {
   __asm {
     push       esi
@@ -2029,7 +2147,7 @@ __declspec(naked) __declspec(align(16))
 void I444ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
                                    const uint8* u_buf,
                                    const uint8* v_buf,
-                                   uint8* argb_buf,
+                                   uint8* dst_argb,
                                    int width) {
   __asm {
     push       esi
@@ -2072,7 +2190,7 @@ __declspec(naked) __declspec(align(16))
 void I422ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
                                    const uint8* u_buf,
                                    const uint8* v_buf,
-                                   uint8* argb_buf,
+                                   uint8* dst_argb,
                                    int width) {
   __asm {
     push       esi
@@ -2116,7 +2234,7 @@ __declspec(naked) __declspec(align(16))
 void I411ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
                                    const uint8* u_buf,
                                    const uint8* v_buf,
-                                   uint8* argb_buf,
+                                   uint8* dst_argb,
                                    int width) {
   __asm {
     push       esi
@@ -2158,7 +2276,7 @@ void I411ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
 __declspec(naked) __declspec(align(16))
 void NV12ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
                                    const uint8* uv_buf,
-                                   uint8* argb_buf,
+                                   uint8* dst_argb,
                                    int width) {
   __asm {
     push       esi
@@ -2196,7 +2314,7 @@ void NV12ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
 __declspec(naked) __declspec(align(16))
 void NV21ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
                                    const uint8* uv_buf,
-                                   uint8* argb_buf,
+                                   uint8* dst_argb,
                                    int width) {
   __asm {
     push       esi
@@ -2233,7 +2351,7 @@ __declspec(naked) __declspec(align(16))
 void I422ToBGRARow_SSSE3(const uint8* y_buf,
                          const uint8* u_buf,
                          const uint8* v_buf,
-                         uint8* bgra_buf,
+                         uint8* dst_bgra,
                          int width) {
   __asm {
     push       esi
@@ -2274,7 +2392,7 @@ __declspec(naked) __declspec(align(16))
 void I422ToBGRARow_Unaligned_SSSE3(const uint8* y_buf,
                                    const uint8* u_buf,
                                    const uint8* v_buf,
-                                   uint8* bgra_buf,
+                                   uint8* dst_bgra,
                                    int width) {
   __asm {
     push       esi
@@ -2315,7 +2433,7 @@ __declspec(naked) __declspec(align(16))
 void I422ToABGRRow_SSSE3(const uint8* y_buf,
                          const uint8* u_buf,
                          const uint8* v_buf,
-                         uint8* abgr_buf,
+                         uint8* dst_abgr,
                          int width) {
   __asm {
     push       esi
@@ -2356,7 +2474,7 @@ __declspec(naked) __declspec(align(16))
 void I422ToABGRRow_Unaligned_SSSE3(const uint8* y_buf,
                                    const uint8* u_buf,
                                    const uint8* v_buf,
-                                   uint8* abgr_buf,
+                                   uint8* dst_abgr,
                                    int width) {
   __asm {
     push       esi
@@ -2397,7 +2515,7 @@ __declspec(naked) __declspec(align(16))
 void I422ToRGBARow_SSSE3(const uint8* y_buf,
                          const uint8* u_buf,
                          const uint8* v_buf,
-                         uint8* rgba_buf,
+                         uint8* dst_rgba,
                          int width) {
   __asm {
     push       esi
@@ -2438,7 +2556,7 @@ __declspec(naked) __declspec(align(16))
 void I422ToRGBARow_Unaligned_SSSE3(const uint8* y_buf,
                                    const uint8* u_buf,
                                    const uint8* v_buf,
-                                   uint8* rgba_buf,
+                                   uint8* dst_rgba,
                                    int width) {
   __asm {
     push       esi
@@ -2591,7 +2709,7 @@ static const uvec8 kShuffleMirrorUV = {
 };
 
 __declspec(naked) __declspec(align(16))
-void MirrorRowUV_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v,
+void MirrorUVRow_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v,
                        int width) {
   __asm {
     push      edi
diff --git a/unit_test/convert_test.cc b/unit_test/convert_test.cc
index f18beb675..d8a04c71e 100644
--- a/unit_test/convert_test.cc
+++ b/unit_test/convert_test.cc
@@ -220,7 +220,7 @@ TEST_F(libyuvTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) {                        \
                        FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y)                       \
     TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,              \
                     FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,                          \
-                    benchmark_width_ - 4, _Any, +, 0)                         \
+                    benchmark_width_ - 4, _Any, +, 0)                          \
     TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,              \
                     FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,                          \
                     benchmark_width_, _Unaligned, +, 1)                        \
@@ -614,10 +614,9 @@ TESTATOPLANAR(RGB24, 3, I420, 2, 2)
 TESTATOPLANAR(RGB565, 2, I420, 2, 2)
 TESTATOPLANAR(ARGB1555, 2, I420, 2, 2)
 TESTATOPLANAR(ARGB4444, 2, I420, 2, 2)
-// TESTATOPLANAR(ARGB, 4, I411, 4, 1)
+TESTATOPLANAR(ARGB, 4, I411, 4, 1)
 TESTATOPLANAR(ARGB, 4, I422, 2, 1)
-// TESTATOPLANAR(ARGB, 4, I444, 1, 1)
-// TODO(fbarchard): Implement and test 411 and 444
+TESTATOPLANAR(ARGB, 4, I444, 1, 1)
 TESTATOPLANAR(V210, 16 / 6, I420, 2, 2)
 TESTATOPLANAR(YUY2, 2, I420, 2, 2)
 TESTATOPLANAR(UYVY, 2, I420, 2, 2)
@@ -629,30 +628,103 @@ TESTATOPLANAR(BayerRGGB, 1, I420, 2, 2)
 TESTATOPLANAR(BayerGBRG, 1, I420, 2, 2)
 TESTATOPLANAR(BayerGRBG, 1, I420, 2, 2)
 
-#define TESTATOBI(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, W1280, DIFF,           \
-                  N, NEG, OFF)                                                 \
+#define TESTATOBIPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,       \
+                       W1280, N, NEG, OFF)                                     \
+TEST_F(libyuvTest, FMT_A##To##FMT_PLANAR##N) {                                 \
+  const int kWidth = W1280;                                                    \
+  const int kHeight = benchmark_height_;                                       \
+  const int kStride = (kWidth * 8 * BPP_A + 7) / 8;                            \
+  align_buffer_16(src_argb, kStride * kHeight + OFF);                          \
+  align_buffer_16(dst_y_c, kWidth * kHeight);                                  \
+  align_buffer_16(dst_uv_c, kWidth / SUBSAMP_X * 2 * kHeight / SUBSAMP_Y);     \
+  align_buffer_16(dst_y_opt, kWidth * kHeight);                                \
+  align_buffer_16(dst_uv_opt, kWidth / SUBSAMP_X * 2 * kHeight / SUBSAMP_Y);   \
+  srandom(time(NULL));                                                         \
+  for (int i = 0; i < kHeight; ++i)                                            \
+    for (int j = 0; j < kStride; ++j)                                          \
+      src_argb[(i * kStride) + j + OFF] = (random() & 0xff);                   \
+  MaskCpuFlags(0);                                                             \
+  FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride,                               \
+                        dst_y_c, kWidth,                                       \
+                        dst_uv_c, kWidth / SUBSAMP_X * 2,                      \
+                        kWidth, NEG kHeight);                                  \
+  MaskCpuFlags(-1);                                                            \
+  for (int i = 0; i < benchmark_iterations_; ++i) {                            \
+    FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride,                             \
+                          dst_y_opt, kWidth,                                   \
+                          dst_uv_opt, kWidth / SUBSAMP_X * 2,                  \
+                          kWidth, NEG kHeight);                                \
+  }                                                                            \
+  int max_diff = 0;                                                            \
+  for (int i = 0; i < kHeight; ++i) {                                          \
+    for (int j = 0; j < kWidth; ++j) {                                         \
+      int abs_diff =                                                           \
+          abs(static_cast<int>(dst_y_c[i * kWidth + j]) -                      \
+              static_cast<int>(dst_y_opt[i * kWidth + j]));                    \
+      if (abs_diff > max_diff) {                                               \
+        max_diff = abs_diff;                                                   \
+      }                                                                        \
+    }                                                                          \
+  }                                                                            \
+  EXPECT_LE(max_diff, 2);                                                      \
+  for (int i = 0; i < kHeight / SUBSAMP_Y; ++i) {                              \
+    for (int j = 0; j < kWidth / SUBSAMP_X * 2; ++j) {                         \
+      int abs_diff =                                                           \
+          abs(static_cast<int>(dst_uv_c[i * kWidth / SUBSAMP_X * 2 + j]) -      \
+              static_cast<int>(dst_uv_opt[i * kWidth / SUBSAMP_X * 2 + j]));    \
+      if (abs_diff > max_diff) {                                               \
+        max_diff = abs_diff;                                                   \
+      }                                                                        \
+    }                                                                          \
+  }                                                                            \
+  EXPECT_LE(max_diff, 2);                                                      \
+  free_aligned_buffer_16(dst_y_c)                                              \
+  free_aligned_buffer_16(dst_uv_c)                                             \
+  free_aligned_buffer_16(dst_y_opt)                                            \
+  free_aligned_buffer_16(dst_uv_opt)                                           \
+  free_aligned_buffer_16(src_argb)                                             \
+}
+
+#define TESTATOBIPLANAR(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y)        \
+    TESTATOBIPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,           \
+                   benchmark_width_ - 4, _Any, +, 0)                           \
+    TESTATOBIPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,           \
+                   benchmark_width_, _Unaligned, +, 1)                         \
+    TESTATOBIPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,           \
+                   benchmark_width_, _Invert, -, 0)                            \
+    TESTATOBIPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,           \
+                   benchmark_width_, _Opt, +, 0)
+
+TESTATOBIPLANAR(ARGB, 4, NV12, 2, 2)
+TESTATOBIPLANAR(ARGB, 4, NV21, 2, 2)
+
+#define TESTATOBI(FMT_A, BPP_A, STRIDE_A,                                      \
+                  FMT_B, BPP_B, STRIDE_B,                                      \
+                  W1280, DIFF, N, NEG, OFF)                                    \
 TEST_F(libyuvTest, FMT_A##To##FMT_B##N) {                                      \
   const int kWidth = W1280;                                                    \
   const int kHeight = benchmark_height_;                                       \
-  align_buffer_16(src_argb, (kWidth * BPP_A) * kHeight + OFF);                 \
-  align_buffer_16(dst_argb_c, (kWidth * BPP_B) * kHeight);                     \
-  align_buffer_16(dst_argb_opt, (kWidth * BPP_B) * kHeight);                   \
+  const int kStrideA = (kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A;  \
+  const int kStrideB = (kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B;  \
+  align_buffer_16(src_argb, kStrideA * kHeight + OFF);                         \
+  align_buffer_16(dst_argb_c, kStrideB * kHeight);                             \
+  align_buffer_16(dst_argb_opt, kStrideB * kHeight);                           \
   srandom(time(NULL));                                                         \
-  for (int i = 0; i < kHeight * kWidth * BPP_A; ++i) {                         \
+  for (int i = 0; i < kStrideA * kHeight; ++i) {                               \
     src_argb[i + OFF] = (random() & 0xff);                                     \
   }                                                                            \
   MaskCpuFlags(0);                                                             \
-  FMT_A##To##FMT_B(src_argb + OFF, kWidth * STRIDE_A,                          \
-                   dst_argb_c, kWidth * BPP_B,                                 \
+  FMT_A##To##FMT_B(src_argb + OFF, kStrideA,                                   \
+                   dst_argb_c, kStrideB,                                       \
                    kWidth, NEG kHeight);                                       \
   MaskCpuFlags(-1);                                                            \
   for (int i = 0; i < benchmark_iterations_; ++i) {                            \
-    FMT_A##To##FMT_B(src_argb + OFF, kWidth * STRIDE_A,                        \
-                     dst_argb_opt, kWidth * BPP_B,                             \
+    FMT_A##To##FMT_B(src_argb + OFF, kStrideA,                                 \
+                     dst_argb_opt, kStrideB,                                   \
                      kWidth, NEG kHeight);                                     \
   }                                                                            \
   int max_diff = 0;                                                            \
-  for (int i = 0; i < kHeight * kWidth * BPP_B; ++i) {                         \
+  for (int i = 0; i < kStrideB * kHeight; ++i) {                               \
     int abs_diff =                                                             \
         abs(static_cast<int>(dst_argb_c[i]) -                                  \
             static_cast<int>(dst_argb_opt[i]));                                \
@@ -665,65 +737,26 @@ TEST_F(libyuvTest, FMT_A##To##FMT_B##N) {                                      \
   free_aligned_buffer_16(dst_argb_c)                                           \
   free_aligned_buffer_16(dst_argb_opt)                                         \
 }
-#define TESTATOB(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, DIFF)                   \
-    TESTATOBI(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, benchmark_width_, DIFF,    \
-              _Any, +, 0)                                                      \
-    TESTATOBI(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, benchmark_width_, DIFF,    \
-              _Unaligned, +, 1)                                                \
-    TESTATOBI(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, benchmark_width_, DIFF,    \
-              _Invert, -, 0)                                                   \
-    TESTATOBI(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, benchmark_width_, DIFF,    \
-              _Opt, +, 0)
 
-TESTATOB(ARGB, 4, 4, ARGB, 4, 0)
-TESTATOB(ARGB, 4, 4, BGRA, 4, 0)
-TESTATOB(ARGB, 4, 4, ABGR, 4, 0)
-TESTATOB(ARGB, 4, 4, RGBA, 4, 0)
-TESTATOB(ARGB, 4, 4, RAW, 3, 0)
-TESTATOB(ARGB, 4, 4, RGB24, 3, 0)
-TESTATOB(ARGB, 4, 4, RGB565, 2, 0)
-TESTATOB(ARGB, 4, 4, ARGB1555, 2, 0)
-TESTATOB(ARGB, 4, 4, ARGB4444, 2, 0)
-TESTATOB(ARGB, 4, 4, BayerBGGR, 1, 0)
-TESTATOB(ARGB, 4, 4, BayerRGGB, 1, 0)
-TESTATOB(ARGB, 4, 4, BayerGBRG, 1, 0)
-TESTATOB(ARGB, 4, 4, BayerGRBG, 1, 0)
-TESTATOB(ARGB, 4, 4, I400, 1, 2)
-TESTATOB(BGRA, 4, 4, ARGB, 4, 0)
-TESTATOB(ABGR, 4, 4, ARGB, 4, 0)
-TESTATOB(RGBA, 4, 4, ARGB, 4, 0)
-TESTATOB(RAW, 3, 3, ARGB, 4, 0)
-TESTATOB(RGB24, 3, 3, ARGB, 4, 0)
-TESTATOB(RGB565, 2, 2, ARGB, 4, 0)
-TESTATOB(ARGB1555, 2, 2, ARGB, 4, 0)
-TESTATOB(ARGB4444, 2, 2, ARGB, 4, 0)
-TESTATOB(YUY2, 2, 2, ARGB, 4, 0)
-TESTATOB(UYVY, 2, 2, ARGB, 4, 0)
-TESTATOB(M420, 3 / 2, 1, ARGB, 4, 0)
-TESTATOB(BayerBGGR, 1, 1, ARGB, 4, 0)
-TESTATOB(BayerRGGB, 1, 1, ARGB, 4, 0)
-TESTATOB(BayerGBRG, 1, 1, ARGB, 4, 0)
-TESTATOB(BayerGRBG, 1, 1, ARGB, 4, 0)
-TESTATOB(I400, 1, 1, ARGB, 4, 0)
-TESTATOB(I400, 1, 1, I400, 1, 0)
-TESTATOB(I400, 1, 1, I400Mirror, 1, 0)
-TESTATOB(Y, 1, 1, ARGB, 4, 0)
-TESTATOB(ARGB, 4, 4, ARGBMirror, 4, 0)
-
-#define TESTATOBRANDOM(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, STRIDE_B, DIFF)   \
+#define TESTATOBRANDOM(FMT_A, BPP_A, STRIDE_A, HEIGHT_A,                       \
+                       FMT_B, BPP_B, STRIDE_B, HEIGHT_B, DIFF)                 \
 TEST_F(libyuvTest, FMT_A##To##FMT_B##_Random) {                                \
   srandom(time(NULL));                                                         \
   for (int times = 0; times < benchmark_iterations_; ++times) {                \
     const int kWidth = (random() & 63) + 1;                                    \
     const int kHeight = (random() & 31) + 1;                                   \
+    const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A;       \
+    const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B;       \
     const int kStrideA = (kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A;\
     const int kStrideB = (kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B;\
-    align_buffer_page_end(src_argb, kStrideA * kHeight);                       \
-    align_buffer_page_end(dst_argb_c, kStrideB * kHeight);                     \
-    align_buffer_page_end(dst_argb_opt, kStrideB * kHeight);                   \
-    for (int i = 0; i < kStrideA * kHeight; ++i) {                             \
+    align_buffer_page_end(src_argb, kStrideA * kHeightA);                      \
+    align_buffer_page_end(dst_argb_c, kStrideB * kHeightB);                    \
+    align_buffer_page_end(dst_argb_opt, kStrideB * kHeightB);                  \
+    for (int i = 0; i < kStrideA * kHeightA; ++i) {                            \
       src_argb[i] = (random() & 0xff);                                         \
     }                                                                          \
+    memset(dst_argb_c, 0, kStrideB * kHeightB);                                \
+    memset(dst_argb_opt, 0, kStrideB * kHeightB);                              \
     MaskCpuFlags(0);                                                           \
     FMT_A##To##FMT_B(src_argb, kStrideA,                                       \
                      dst_argb_c, kStrideB,                                     \
@@ -733,7 +766,7 @@ TEST_F(libyuvTest, FMT_A##To##FMT_B##_Random) {                                \
                      dst_argb_opt, kStrideB,                                   \
                      kWidth, kHeight);                                         \
     int max_diff = 0;                                                          \
-    for (int i = 0; i < kStrideB * kHeight; ++i) {                             \
+    for (int i = 0; i < kStrideB * kHeightB; ++i) {                            \
       int abs_diff =                                                           \
           abs(static_cast<int>(dst_argb_c[i]) -                                \
               static_cast<int>(dst_argb_opt[i]));                              \
@@ -748,33 +781,58 @@ TEST_F(libyuvTest, FMT_A##To##FMT_B##_Random) {                                \
   }                                                                            \
 }
 
-TESTATOBRANDOM(ARGB, 4, 4, ARGB, 4, 4, 0)
-TESTATOBRANDOM(ARGB, 4, 4, BGRA, 4, 4, 0)
-TESTATOBRANDOM(ARGB, 4, 4, ABGR, 4, 4, 0)
-TESTATOBRANDOM(ARGB, 4, 4, RGBA, 4, 4, 0)
-TESTATOBRANDOM(ARGB, 4, 4, RAW, 3, 3, 0)
-TESTATOBRANDOM(ARGB, 4, 4, RGB24, 3, 3, 0)
-TESTATOBRANDOM(ARGB, 4, 4, RGB565, 2, 2, 0)
-TESTATOBRANDOM(ARGB, 4, 4, ARGB1555, 2, 2, 0)
-TESTATOBRANDOM(ARGB, 4, 4, ARGB4444, 2, 2, 0)
-TESTATOBRANDOM(ARGB, 4, 4, I400, 1, 1, 2)
-// TODO(fbarchard): Implement YUY2
-// TESTATOBRANDOM(ARGB, 4, 4, YUY2, 4, 2, 0)
-// TESTATOBRANDOM(ARGB, 4, 4, UYVY, 4, 2, 0)
-TESTATOBRANDOM(BGRA, 4, 4, ARGB, 4, 4, 0)
-TESTATOBRANDOM(ABGR, 4, 4, ARGB, 4, 4, 0)
-TESTATOBRANDOM(RGBA, 4, 4, ARGB, 4, 4, 0)
-TESTATOBRANDOM(RAW, 3, 3, ARGB, 4, 4, 0)
-TESTATOBRANDOM(RGB24, 3, 3, ARGB, 4, 4, 0)
-TESTATOBRANDOM(RGB565, 2, 2, ARGB, 4, 4, 0)
-TESTATOBRANDOM(ARGB1555, 2, 2, ARGB, 4, 4, 0)
-TESTATOBRANDOM(ARGB4444, 2, 2, ARGB, 4, 4, 0)
-TESTATOBRANDOM(I400, 1, 1, ARGB, 4, 4, 0)
-TESTATOBRANDOM(YUY2, 4, 2, ARGB, 4, 4, 0)
-TESTATOBRANDOM(UYVY, 4, 2, ARGB, 4, 4, 0)
-TESTATOBRANDOM(I400, 1, 1, I400, 1, 1, 0)
-TESTATOBRANDOM(I400, 1, 1, I400Mirror, 1, 1, 0)
-TESTATOBRANDOM(ARGB, 4, 4, ARGBMirror, 4, 4, 0)
+#define TESTATOB(FMT_A, BPP_A, STRIDE_A, HEIGHT_A,                             \
+                 FMT_B, BPP_B, STRIDE_B, HEIGHT_B, DIFF)                       \
+    TESTATOBI(FMT_A, BPP_A, STRIDE_A,                                          \
+              FMT_B, BPP_B, STRIDE_B,                                          \
+              benchmark_width_, DIFF, _Any, +, 0)                              \
+    TESTATOBI(FMT_A, BPP_A, STRIDE_A,                                          \
+              FMT_B, BPP_B, STRIDE_B,                                          \
+              benchmark_width_, DIFF, _Unaligned, +, 1)                        \
+    TESTATOBI(FMT_A, BPP_A, STRIDE_A,                                          \
+              FMT_B, BPP_B, STRIDE_B,                                          \
+              benchmark_width_, DIFF, _Invert, -, 0)                           \
+    TESTATOBI(FMT_A, BPP_A, STRIDE_A,                                          \
+              FMT_B, BPP_B, STRIDE_B,                                          \
+              benchmark_width_, DIFF, _Opt, +, 0)                              \
+    TESTATOBRANDOM(FMT_A, BPP_A, STRIDE_A, HEIGHT_A,                           \
+                   FMT_B, BPP_B, STRIDE_B, HEIGHT_B, DIFF)
+
+TESTATOB(ARGB, 4, 4, 1, ARGB, 4, 4, 1, 0)
+TESTATOB(ARGB, 4, 4, 1, BGRA, 4, 4, 1, 0)
+TESTATOB(ARGB, 4, 4, 1, ABGR, 4, 4, 1, 0)
+TESTATOB(ARGB, 4, 4, 1, RGBA, 4, 4, 1, 0)
+TESTATOB(ARGB, 4, 4, 1, RAW, 3, 3, 1, 0)
+TESTATOB(ARGB, 4, 4, 1, RGB24, 3, 3, 1, 0)
+TESTATOB(ARGB, 4, 4, 1, RGB565, 2, 2, 1, 0)
+TESTATOB(ARGB, 4, 4, 1, ARGB1555, 2, 2, 1, 0)
+TESTATOB(ARGB, 4, 4, 1, ARGB4444, 2, 2, 1, 0)
+TESTATOB(ARGB, 4, 4, 1, BayerBGGR, 1, 2, 2, 0)
+TESTATOB(ARGB, 4, 4, 1, BayerRGGB, 1, 2, 2, 0)
+TESTATOB(ARGB, 4, 4, 1, BayerGBRG, 1, 2, 2, 0)
+TESTATOB(ARGB, 4, 4, 1, BayerGRBG, 1, 2, 2, 0)
+TESTATOB(ARGB, 4, 4, 1, YUY2, 2, 4, 1, 2)
+TESTATOB(ARGB, 4, 4, 1, UYVY, 2, 4, 1, 2)
+TESTATOB(ARGB, 4, 4, 1, I400, 1, 1, 1, 2)
+TESTATOB(BGRA, 4, 4, 1, ARGB, 4, 4, 1, 0)
+TESTATOB(ABGR, 4, 4, 1, ARGB, 4, 4, 1, 0)
+TESTATOB(RGBA, 4, 4, 1, ARGB, 4, 4, 1, 0)
+TESTATOB(RAW, 3, 3, 1, ARGB, 4, 4, 1, 0)
+TESTATOB(RGB24, 3, 3, 1, ARGB, 4, 4, 1, 0)
+TESTATOB(RGB565, 2, 2, 1, ARGB, 4, 4, 1, 0)
+TESTATOB(ARGB1555, 2, 2, 1, ARGB, 4, 4, 1, 0)
+TESTATOB(ARGB4444, 2, 2, 1, ARGB, 4, 4, 1, 0)
+TESTATOB(YUY2, 2, 4, 1, ARGB, 4, 4, 1, 0)
+TESTATOB(UYVY, 2, 4, 1, ARGB, 4, 4, 1, 0)
+TESTATOB(BayerBGGR, 1, 2, 2, ARGB, 4, 4, 1, 0)
+TESTATOB(BayerRGGB, 1, 2, 2, ARGB, 4, 4, 1, 0)
+TESTATOB(BayerGBRG, 1, 2, 2, ARGB, 4, 4, 1, 0)
+TESTATOB(BayerGRBG, 1, 2, 2, ARGB, 4, 4, 1, 0)
+TESTATOB(I400, 1, 1, 1, ARGB, 4, 4, 1, 0)
+TESTATOB(I400, 1, 1, 1, I400, 1, 1, 1, 0)
+TESTATOB(I400, 1, 1, 1, I400Mirror, 1, 1, 1, 0)
+TESTATOB(Y, 1, 1, 1, ARGB, 4, 4, 1, 0)
+TESTATOB(ARGB, 4, 4, 1, ARGBMirror, 4, 4, 1, 0)
 
 TEST_F(libyuvTest, Test565) {
   SIMD_ALIGNED(uint8 orig_pixels[256][4]);