diff --git a/Android.mk b/Android.mk
index c6f9f28b5..513a1961b 100644
--- a/Android.mk
+++ b/Android.mk
@@ -32,6 +32,8 @@ LOCAL_SRC_FILES := \
 
 # TODO(fbarchard): Enable mjpeg encoder.
 #   source/mjpeg_decoder.cc
+#   source/convert_jpeg.cc
+#   source/mjpeg_validate.cc
 
 ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
     LOCAL_CFLAGS += -DLIBYUV_NEON
diff --git a/README.chromium b/README.chromium
index 84905cb55..ce30de129 100644
--- a/README.chromium
+++ b/README.chromium
@@ -1,6 +1,6 @@
 Name: libyuv
 URL: http://code.google.com/p/libyuv/
-Version: 664
+Version: 665
 License File: LICENSE
 
 Description:
diff --git a/include/libyuv/version.h b/include/libyuv/version.h
index a932dd3a2..de9c13058 100644
--- a/include/libyuv/version.h
+++ b/include/libyuv/version.h
@@ -11,6 +11,6 @@
 #ifndef INCLUDE_LIBYUV_VERSION_H_  // NOLINT
 #define INCLUDE_LIBYUV_VERSION_H_
 
-#define LIBYUV_VERSION 664
+#define LIBYUV_VERSION 665
 
 #endif  // INCLUDE_LIBYUV_VERSION_H_  NOLINT
diff --git a/source/memcpy_mips.S b/source/memcpy_mips.S
deleted file mode 100644
index e69de29bb..000000000
diff --git a/source/row_win.cc b/source/row_win.cc
index 6cb233324..6f71c942a 100644
--- a/source/row_win.cc
+++ b/source/row_win.cc
@@ -2089,22 +2089,29 @@ static const lvec8 kUVToB_AVX = {
   UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB,
   UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB
 };
-
 static const lvec8 kUVToR_AVX = {
   UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR,
   UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR
 };
-
 static const lvec8 kUVToG_AVX = {
   UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,
   UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG
 };
-
-static const lvec16 kYToRgb_AVX = { YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG };
-static const lvec16 kYSub16_AVX = { 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16 };
-static const lvec16 kUVBiasB_AVX = { BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB};
-static const lvec16 kUVBiasG_AVX = { BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG};
-static const lvec16 kUVBiasR_AVX = { BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR};
+static const lvec16 kYToRgb_AVX = {
+  YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG
+};
+static const lvec16 kYSub16_AVX = {
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16
+};
+static const lvec16 kUVBiasB_AVX = {
+  BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB
+};
+static const lvec16 kUVBiasG_AVX = {
+  BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG
+};
+static const lvec16 kUVBiasR_AVX = {
+  BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR
+};
 
 // 16 pixels
 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
diff --git a/source/scale_argb.cc b/source/scale_argb.cc
index 31d966ccb..2e76b7d13 100644
--- a/source/scale_argb.cc
+++ b/source/scale_argb.cc
@@ -28,7 +28,6 @@ extern "C" {
     (defined(__ARM_NEON__) || defined(LIBYUV_NEON))
 #define HAS_SCALEARGBROWDOWNEVEN_NEON
 #define HAS_SCALEARGBROWDOWN2_NEON
-#define HAS_SCALEARGBFILTERROWS_NEON
 void ScaleARGBRowDownEven_NEON(const uint8* src_argb, int src_stride,
                                int src_stepx,
                                uint8* dst_argb, int dst_width);
@@ -39,9 +38,6 @@ void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t /* src_stride */,
                             uint8* dst, int dst_width);
 void ScaleARGBRowDown2Int_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
                                uint8* dst, int dst_width);
-void ScaleARGBFilterRows_NEON(uint8* dst_ptr,
-                              const uint8* src_ptr, ptrdiff_t src_stride,
-                              int dst_width, int source_y_fraction);
 #endif
 
 #if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
@@ -198,228 +194,6 @@ static void ScaleARGBRowDownEvenInt_SSE2(const uint8* src_argb,
   }
 }
 
-// Bilinear row filtering combines 4x2 -> 4x1. SSE2 version.
-#define HAS_SCALEARGBFILTERROWS_SSE2
-__declspec(naked) __declspec(align(16))
-void ScaleARGBFilterRows_SSE2(uint8* dst_argb, const uint8* src_argb,
-                              ptrdiff_t src_stride, int dst_width,
-                              int source_y_fraction) {
-  __asm {
-    push       esi
-    push       edi
-    mov        edi, [esp + 8 + 4]   // dst_argb
-    mov        esi, [esp + 8 + 8]   // src_argb
-    mov        edx, [esp + 8 + 12]  // src_stride
-    mov        ecx, [esp + 8 + 16]  // dst_width
-    mov        eax, [esp + 8 + 20]  // source_y_fraction (0..255)
-    sub        edi, esi
-    // Dispatch to specialized filters if applicable.
-    cmp        eax, 0
-    je         xloop100  // 0 / 256.  Blend 100 / 0.
-    cmp        eax, 64
-    je         xloop75   // 64 / 256 is 0.25.  Blend 75 / 25.
-    cmp        eax, 128
-    je         xloop50   // 128 / 256 is 0.50.  Blend 50 / 50.
-    cmp        eax, 192
-    je         xloop25   // 192 / 256 is 0.75.  Blend 25 / 75.
-
-    movd       xmm5, eax            // xmm5 = y fraction
-    punpcklbw  xmm5, xmm5
-    psrlw      xmm5, 1
-    punpcklwd  xmm5, xmm5
-    punpckldq  xmm5, xmm5
-    punpcklqdq xmm5, xmm5
-    pxor       xmm4, xmm4
-
-    align      16
-  xloop:
-    movdqa     xmm0, [esi]  // row0
-    movdqa     xmm2, [esi + edx]  // row1
-    movdqa     xmm1, xmm0
-    movdqa     xmm3, xmm2
-    punpcklbw  xmm2, xmm4
-    punpckhbw  xmm3, xmm4
-    punpcklbw  xmm0, xmm4
-    punpckhbw  xmm1, xmm4
-    psubw      xmm2, xmm0  // row1 - row0
-    psubw      xmm3, xmm1
-    paddw      xmm2, xmm2  // 9 bits * 15 bits = 8.16
-    paddw      xmm3, xmm3
-    pmulhw     xmm2, xmm5  // scale diff
-    pmulhw     xmm3, xmm5
-    paddw      xmm0, xmm2  // sum rows
-    paddw      xmm1, xmm3
-    packuswb   xmm0, xmm1
-    sub        ecx, 4
-    movdqa     [esi + edi], xmm0
-    lea        esi, [esi + 16]
-    jg         xloop
-    jmp        xloop99
-
-    // Blend 25 / 75.
-    align      16
-  xloop25:
-    movdqa     xmm0, [esi]
-    movdqa     xmm1, [esi + edx]
-    pavgb      xmm0, xmm1
-    pavgb      xmm0, xmm1
-    sub        ecx, 4
-    movdqa     [esi + edi], xmm0
-    lea        esi, [esi + 16]
-    jg         xloop25
-    jmp        xloop99
-
-    // Blend 50 / 50.
-    align      16
-  xloop50:
-    movdqa     xmm0, [esi]
-    movdqa     xmm1, [esi + edx]
-    pavgb      xmm0, xmm1
-    sub        ecx, 4
-    movdqa     [esi + edi], xmm0
-    lea        esi, [esi + 16]
-    jg         xloop50
-    jmp        xloop99
-
-    // Blend 75 / 25.
-    align      16
-  xloop75:
-    movdqa     xmm1, [esi]
-    movdqa     xmm0, [esi + edx]
-    pavgb      xmm0, xmm1
-    pavgb      xmm0, xmm1
-    sub        ecx, 4
-    movdqa     [esi + edi], xmm0
-    lea        esi, [esi + 16]
-    jg         xloop75
-    jmp        xloop99
-
-    // Blend 100 / 0 - Copy row unchanged.
-    align      16
-  xloop100:
-    movdqa     xmm0, [esi]
-    sub        ecx, 4
-    movdqa     [esi + edi], xmm0
-    lea        esi, [esi + 16]
-    jg         xloop100
-
-  xloop99:
-    shufps     xmm0, xmm0, 0xff
-    movdqa     [esi + edi], xmm0    // duplicate last pixel for filtering
-    pop        edi
-    pop        esi
-    ret
-  }
-}
-
-// Bilinear row filtering combines 4x2 -> 4x1. SSSE3 version.
-#define HAS_SCALEARGBFILTERROWS_SSSE3
-__declspec(naked) __declspec(align(16))
-void ScaleARGBFilterRows_SSSE3(uint8* dst_argb, const uint8* src_argb,
-                               ptrdiff_t src_stride, int dst_width,
-                               int source_y_fraction) {
-  __asm {
-    push       esi
-    push       edi
-    mov        edi, [esp + 8 + 4]   // dst_argb
-    mov        esi, [esp + 8 + 8]   // src_argb
-    mov        edx, [esp + 8 + 12]  // src_stride
-    mov        ecx, [esp + 8 + 16]  // dst_width
-    mov        eax, [esp + 8 + 20]  // source_y_fraction (0..255)
-    sub        edi, esi
-    shr        eax, 1
-    cmp        eax, 0  // dispatch to specialized filters if applicable.
-    je         xloop100
-    cmp        eax, 32
-    je         xloop75
-    cmp        eax, 64
-    je         xloop50
-    cmp        eax, 96
-    je         xloop25
-
-    movd       xmm0, eax  // high fraction 0..127
-    neg        eax
-    add        eax, 128
-    movd       xmm5, eax  // low fraction 128..1
-    punpcklbw  xmm5, xmm0
-    punpcklwd  xmm5, xmm5
-    pshufd     xmm5, xmm5, 0
-
-    align      16
-  xloop:
-    movdqa     xmm0, [esi]
-    movdqa     xmm2, [esi + edx]
-    movdqa     xmm1, xmm0
-    punpcklbw  xmm0, xmm2
-    punpckhbw  xmm1, xmm2
-    pmaddubsw  xmm0, xmm5
-    pmaddubsw  xmm1, xmm5
-    psrlw      xmm0, 7
-    psrlw      xmm1, 7
-    packuswb   xmm0, xmm1
-    sub        ecx, 4
-    movdqa     [esi + edi], xmm0
-    lea        esi, [esi + 16]
-    jg         xloop
-    jmp        xloop99
-
-    // Blend 25 / 75.
-    align      16
-  xloop25:
-    movdqa     xmm0, [esi]
-    movdqa     xmm1, [esi + edx]
-    pavgb      xmm0, xmm1
-    pavgb      xmm0, xmm1
-    sub        ecx, 4
-    movdqa     [esi + edi], xmm0
-    lea        esi, [esi + 16]
-    jg         xloop25
-    jmp        xloop99
-
-    // Blend 50 / 50.
-    align      16
-  xloop50:
-    movdqa     xmm0, [esi]
-    movdqa     xmm1, [esi + edx]
-    pavgb      xmm0, xmm1
-    sub        ecx, 4
-    movdqa     [esi + edi], xmm0
-    lea        esi, [esi + 16]
-    jg         xloop50
-    jmp        xloop99
-
-    // Blend 75 / 25.
-    align      16
-  xloop75:
-    movdqa     xmm1, [esi]
-    movdqa     xmm0, [esi + edx]
-    pavgb      xmm0, xmm1
-    pavgb      xmm0, xmm1
-    sub        ecx, 4
-    movdqa     [esi + edi], xmm0
-    lea        esi, [esi + 16]
-    jg         xloop75
-    jmp        xloop99
-
-    // Blend 100 / 0 - Copy row unchanged.
-    align      16
-  xloop100:
-    movdqa     xmm0, [esi]
-    sub        ecx, 4
-    movdqa     [esi + edi], xmm0
-    lea        esi, [esi + 16]
-    jg         xloop100
-
-    // Extrude last pixel.
-  xloop99:
-    shufps     xmm0, xmm0, 0xff
-    movdqa     [esi + edi], xmm0
-    pop        edi
-    pop        esi
-    ret
-  }
-}
-
 // Column scaling unfiltered. SSSE3 version.
 // TODO(fbarchard): Port to Neon
 
@@ -707,227 +481,6 @@ static void ScaleARGBRowDownEvenInt_SSE2(const uint8* src_argb,
   );
 }
 
-// Bilinear row filtering combines 4x2 -> 4x1. SSE2 version
-#define HAS_SCALEARGBFILTERROWS_SSE2
-void ScaleARGBFilterRows_SSE2(uint8* dst_argb, const uint8* src_argb,
-                              ptrdiff_t src_stride, int dst_width,
-                              int source_y_fraction) {
-  asm volatile (
-    "sub       %1,%0                           \n"
-    "shr       %3                              \n"
-    "cmp       $0x0,%3                         \n"
-    "je        100f                            \n"
-    "cmp       $0x20,%3                        \n"
-    "je        75f                             \n"
-    "cmp       $0x40,%3                        \n"
-    "je        50f                             \n"
-    "cmp       $0x60,%3                        \n"
-    "je        25f                             \n"
-
-    "movd      %3,%%xmm0                       \n"
-    "neg       %3                              \n"
-    "add       $0x80,%3                        \n"
-    "movd      %3,%%xmm5                       \n"
-    "punpcklbw %%xmm0,%%xmm5                   \n"
-    "punpcklwd %%xmm5,%%xmm5                   \n"
-    "pshufd    $0x0,%%xmm5,%%xmm5              \n"
-    "pxor      %%xmm4,%%xmm4                   \n"
-
-    // General purpose row blend.
-    ".p2align  4                               \n"
-  "1:                                          \n"
-    "movdqa    (%1),%%xmm0                     \n"
-    "movdqa    (%1,%4,1),%%xmm2                \n"
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "movdqa    %%xmm2,%%xmm3                   \n"
-    "punpcklbw %%xmm4,%%xmm2                   \n"
-    "punpckhbw %%xmm4,%%xmm3                   \n"
-    "punpcklbw %%xmm4,%%xmm0                   \n"
-    "punpckhbw %%xmm4,%%xmm1                   \n"
-    "psubw     %%xmm0,%%xmm2                   \n"
-    "psubw     %%xmm1,%%xmm3                   \n"
-    "paddw     %%xmm2,%%xmm2                   \n"
-    "paddw     %%xmm3,%%xmm3                   \n"
-    "pmulhw    %%xmm5,%%xmm2                   \n"
-    "pmulhw    %%xmm5,%%xmm3                   \n"
-    "paddw     %%xmm2,%%xmm0                   \n"
-    "paddw     %%xmm3,%%xmm1                   \n"
-    "packuswb  %%xmm1,%%xmm0                   \n"
-    "sub       $0x4,%2                         \n"
-    "movdqa    %%xmm0,(%1,%0,1)                \n"
-    "lea       0x10(%1),%1                     \n"
-    "jg        1b                              \n"
-    "jmp       99f                             \n"
-
-    // Blend 25 / 75.
-    ".p2align  4                               \n"
-  "25:                                         \n"
-    "movdqa    (%1),%%xmm0                     \n"
-    "movdqa    (%1,%4,1),%%xmm1                \n"
-    "pavgb     %%xmm1,%%xmm0                   \n"
-    "pavgb     %%xmm1,%%xmm0                   \n"
-    "sub       $0x4,%2                         \n"
-    "movdqa    %%xmm0,(%1,%0,1)                \n"
-    "lea       0x10(%1),%1                     \n"
-    "jg        25b                             \n"
-    "jmp       99f                             \n"
-
-    // Blend 50 / 50.
-    ".p2align  4                               \n"
-  "50:                                         \n"
-    "movdqa    (%1),%%xmm0                     \n"
-    "movdqa    (%1,%4,1),%%xmm1                \n"
-    "pavgb     %%xmm1,%%xmm0                   \n"
-    "sub       $0x4,%2                         \n"
-    "movdqa    %%xmm0,(%1,%0,1)                \n"
-    "lea       0x10(%1),%1                     \n"
-    "jg        50b                             \n"
-    "jmp       99f                             \n"
-
-    // Blend 75 / 25.
-    ".p2align  4                               \n"
-  "75:                                         \n"
-    "movdqa    (%1),%%xmm1                     \n"
-    "movdqa    (%1,%4,1),%%xmm0                \n"
-    "pavgb     %%xmm1,%%xmm0                   \n"
-    "pavgb     %%xmm1,%%xmm0                   \n"
-    "sub       $0x4,%2                         \n"
-    "movdqa    %%xmm0,(%1,%0,1)                \n"
-    "lea       0x10(%1),%1                     \n"
-    "jg        75b                             \n"
-    "jmp       99f                             \n"
-
-    // Blend 100 / 0 - Copy row unchanged.
-    ".p2align  4                               \n"
-  "100:                                        \n"
-    "movdqa    (%1),%%xmm0                     \n"
-    "sub       $0x4,%2                         \n"
-    "movdqa    %%xmm0,(%1,%0,1)                \n"
-    "lea       0x10(%1),%1                     \n"
-    "jg        100b                            \n"
-
-  "99:                                         \n"
-    "shufps    $0xff,%%xmm0,%%xmm0             \n"
-    "movdqa    %%xmm0,(%1,%0,1)                \n"
-  : "+r"(dst_argb),   // %0
-    "+r"(src_argb),   // %1
-    "+r"(dst_width),  // %2
-    "+r"(source_y_fraction)  // %3
-  : "r"(static_cast<intptr_t>(src_stride))  // %4
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
-  );
-}
-
-// Bilinear row filtering combines 4x2 -> 4x1. SSSE3 version
-#define HAS_SCALEARGBFILTERROWS_SSSE3
-void ScaleARGBFilterRows_SSSE3(uint8* dst_argb, const uint8* src_argb,
-                               ptrdiff_t src_stride, int dst_width,
-                               int source_y_fraction) {
-  asm volatile (
-    "sub       %1,%0                           \n"
-    "shr       %3                              \n"
-    "cmp       $0x0,%3                         \n"
-    "je        100f                            \n"
-    "cmp       $0x20,%3                        \n"
-    "je        75f                             \n"
-    "cmp       $0x40,%3                        \n"
-    "je        50f                             \n"
-    "cmp       $0x60,%3                        \n"
-    "je        25f                             \n"
-
-    "movd      %3,%%xmm0                       \n"
-    "neg       %3                              \n"
-    "add       $0x80,%3                        \n"
-    "movd      %3,%%xmm5                       \n"
-    "punpcklbw %%xmm0,%%xmm5                   \n"
-    "punpcklwd %%xmm5,%%xmm5                   \n"
-    "pshufd    $0x0,%%xmm5,%%xmm5              \n"
-
-    // General purpose row blend.
-    ".p2align  4                               \n"
-  "1:                                          \n"
-    "movdqa    (%1),%%xmm0                     \n"
-    "movdqa    (%1,%4,1),%%xmm2                \n"
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "punpcklbw %%xmm2,%%xmm0                   \n"
-    "punpckhbw %%xmm2,%%xmm1                   \n"
-    "pmaddubsw %%xmm5,%%xmm0                   \n"
-    "pmaddubsw %%xmm5,%%xmm1                   \n"
-    "psrlw     $0x7,%%xmm0                     \n"
-    "psrlw     $0x7,%%xmm1                     \n"
-    "packuswb  %%xmm1,%%xmm0                   \n"
-    "sub       $0x4,%2                         \n"
-    "movdqa    %%xmm0,(%1,%0,1)                \n"
-    "lea       0x10(%1),%1                     \n"
-    "jg        1b                              \n"
-    "jmp       99f                             \n"
-
-    // Blend 25 / 75.
-    ".p2align  4                               \n"
-  "25:                                         \n"
-    "movdqa    (%1),%%xmm0                     \n"
-    "movdqa    (%1,%4,1),%%xmm1                \n"
-    "pavgb     %%xmm1,%%xmm0                   \n"
-    "pavgb     %%xmm1,%%xmm0                   \n"
-    "sub       $0x4,%2                         \n"
-    "movdqa    %%xmm0,(%1,%0,1)                \n"
-    "lea       0x10(%1),%1                     \n"
-    "jg        25b                             \n"
-    "jmp       99f                             \n"
-
-    // Blend 50 / 50.
-    ".p2align  4                               \n"
-  "50:                                         \n"
-    "movdqa    (%1),%%xmm0                     \n"
-    "movdqa    (%1,%4,1),%%xmm1                \n"
-    "pavgb     %%xmm1,%%xmm0                   \n"
-    "sub       $0x4,%2                         \n"
-    "movdqa    %%xmm0,(%1,%0,1)                \n"
-    "lea       0x10(%1),%1                     \n"
-    "jg        50b                             \n"
-    "jmp       99f                             \n"
-
-    // Blend 75 / 25.
-    ".p2align  4                               \n"
-  "75:                                         \n"
-    "movdqa    (%1),%%xmm1                     \n"
-    "movdqa    (%1,%4,1),%%xmm0                \n"
-    "pavgb     %%xmm1,%%xmm0                   \n"
-    "pavgb     %%xmm1,%%xmm0                   \n"
-    "sub       $0x4,%2                         \n"
-    "movdqa    %%xmm0,(%1,%0,1)                \n"
-    "lea       0x10(%1),%1                     \n"
-    "jg        75b                             \n"
-    "jmp       99f                             \n"
-
-    // Blend 100 / 0 - Copy row unchanged.
-    ".p2align  4                               \n"
-  "100:                                        \n"
-    "movdqa    (%1),%%xmm0                     \n"
-    "sub       $0x4,%2                         \n"
-    "movdqa    %%xmm0,(%1,%0,1)                \n"
-    "lea       0x10(%1),%1                     \n"
-    "jg        100b                            \n"
-
-    // Extrude last pixel.
-  "99:                                         \n"
-    "shufps    $0xff,%%xmm0,%%xmm0             \n"
-    "movdqa    %%xmm0,(%1,%0,1)                \n"
-  : "+r"(dst_argb),    // %0
-    "+r"(src_argb),    // %1
-    "+r"(dst_width),  // %2
-    "+r"(source_y_fraction)  // %3
-  : "r"(static_cast<intptr_t>(src_stride))  // %4
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm5"
-#endif
-  );
-}
-
 #define HAS_SCALEARGBCOLS_SSE2
 static void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb,
                                int dst_width, int x, int dx) {
@@ -1175,51 +728,6 @@ static void ScaleARGBFilterCols_C(uint8* dst_argb, const uint8* src_argb,
   }
 }
 
-// C version 2x2 -> 2x1
-void ScaleARGBFilterRows_C(uint8* dst_argb, const uint8* src_argb,
-                           ptrdiff_t src_stride,
-                           int dst_width, int source_y_fraction) {
-  assert(dst_width > 0);
-  // Specialized case for 100% first row.  Helps avoid reading beyond last row.
-  if (source_y_fraction == 0) {
-    memcpy(dst_argb, src_argb, dst_width * 4);
-    dst_argb += dst_width * 4;
-    dst_argb[0] = dst_argb[-4];
-    dst_argb[1] = dst_argb[-3];
-    dst_argb[2] = dst_argb[-2];
-    dst_argb[3] = dst_argb[-1];
-    return;
-  }
-  int y1_fraction = source_y_fraction;
-  int y0_fraction = 256 - y1_fraction;
-  const uint8* src_ptr1 = src_argb + src_stride;
-  for (int x = 0; x < dst_width - 1; x += 2) {
-    dst_argb[0] = (src_argb[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
-    dst_argb[1] = (src_argb[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8;
-    dst_argb[2] = (src_argb[2] * y0_fraction + src_ptr1[2] * y1_fraction) >> 8;
-    dst_argb[3] = (src_argb[3] * y0_fraction + src_ptr1[3] * y1_fraction) >> 8;
-    dst_argb[4] = (src_argb[4] * y0_fraction + src_ptr1[4] * y1_fraction) >> 8;
-    dst_argb[5] = (src_argb[5] * y0_fraction + src_ptr1[5] * y1_fraction) >> 8;
-    dst_argb[6] = (src_argb[6] * y0_fraction + src_ptr1[6] * y1_fraction) >> 8;
-    dst_argb[7] = (src_argb[7] * y0_fraction + src_ptr1[7] * y1_fraction) >> 8;
-    src_argb += 8;
-    src_ptr1 += 8;
-    dst_argb += 8;
-  }
-  if (dst_width & 1) {
-    dst_argb[0] = (src_argb[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
-    dst_argb[1] = (src_argb[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8;
-    dst_argb[2] = (src_argb[2] * y0_fraction + src_ptr1[2] * y1_fraction) >> 8;
-    dst_argb[3] = (src_argb[3] * y0_fraction + src_ptr1[3] * y1_fraction) >> 8;
-    dst_argb += 4;
-  }
-  // Duplicate the last pixel (4 bytes) for filtering.
-  dst_argb[0] = dst_argb[-4];
-  dst_argb[1] = dst_argb[-3];
-  dst_argb[2] = dst_argb[-2];
-  dst_argb[3] = dst_argb[-1];
-}
-
 // ScaleARGB ARGB, 1/2
 // This is an optimized version for scaling down a ARGB to 1/2 of
 // its original size.
diff --git a/source/scale_argb_neon.cc b/source/scale_argb_neon.cc
index 8f438628c..720b72e22 100644
--- a/source/scale_argb_neon.cc
+++ b/source/scale_argb_neon.cc
@@ -135,90 +135,6 @@ void ScaleARGBRowDownEvenInt_NEON(const uint8* src_argb, ptrdiff_t src_stride,
   : "memory", "cc", "r12", "q0", "q1", "q2", "q3"
   );
 }
-
-// 4x2 -> 4x1
-void ScaleARGBFilterRows_NEON(uint8* dst_ptr,
-                              const uint8* src_ptr, ptrdiff_t src_stride,
-                              int dst_width, int source_y_fraction) {
-  asm volatile (
-    "cmp          %4, #0                       \n"
-    "beq          100f                         \n"
-    "add          %2, %1                       \n"
-    "cmp          %4, #64                      \n"
-    "beq          75f                          \n"
-    "cmp          %4, #128                     \n"
-    "beq          50f                          \n"
-    "cmp          %4, #192                     \n"
-    "beq          25f                          \n"
-
-    "vdup.8       d5, %4                       \n"
-    "rsb          %4, #256                     \n"
-    "vdup.8       d4, %4                       \n"
-    // General purpose row blend.
-  "1:                                          \n"
-    "vld1.u8      {q0}, [%1]!                  \n"
-    "vld1.u8      {q1}, [%2]!                  \n"
-    "subs         %3, #4                       \n"
-    "vmull.u8     q13, d0, d4                  \n"
-    "vmull.u8     q14, d1, d4                  \n"
-    "vmlal.u8     q13, d2, d5                  \n"
-    "vmlal.u8     q14, d3, d5                  \n"
-    "vrshrn.u16   d0, q13, #8                  \n"
-    "vrshrn.u16   d1, q14, #8                  \n"
-    "vst1.u8      {q0}, [%0]!                  \n"
-    "bgt          1b                           \n"
-    "b            99f                          \n"
-
-    // Blend 25 / 75.
-  "25:                                         \n"
-    "vld1.u8      {q0}, [%1]!                  \n"
-    "vld1.u8      {q1}, [%2]!                  \n"
-    "subs         %3, #4                       \n"
-    "vrhadd.u8    q0, q1                       \n"
-    "vrhadd.u8    q0, q1                       \n"
-    "vst1.u8      {q0}, [%0]!                  \n"
-    "bgt          25b                          \n"
-    "b            99f                          \n"
-
-    // Blend 50 / 50.
-  "50:                                         \n"
-    "vld1.u8      {q0}, [%1]!                  \n"
-    "vld1.u8      {q1}, [%2]!                  \n"
-    "subs         %3, #4                       \n"
-    "vrhadd.u8    q0, q1                       \n"
-    "vst1.u8      {q0}, [%0]!                  \n"
-    "bgt          50b                          \n"
-    "b            99f                          \n"
-
-    // Blend 75 / 25.
-  "75:                                         \n"
-    "vld1.u8      {q1}, [%1]!                  \n"
-    "vld1.u8      {q0}, [%2]!                  \n"
-    "subs         %3, #4                       \n"
-    "vrhadd.u8    q0, q1                       \n"
-    "vrhadd.u8    q0, q1                       \n"
-    "vst1.u8      {q0}, [%0]!                  \n"
-    "bgt          75b                          \n"
-    "b            99f                          \n"
-
-    // Blend 100 / 0 - Copy row unchanged.
-  "100:                                        \n"
-    "vld1.u8      {q0}, [%1]!                  \n"
-    "subs         %3, #4                       \n"
-    "vst1.u8      {q0}, [%0]!                  \n"
-    "bgt          100b                         \n"
-
-  "99:                                         \n"
-    "vst1.u32     {d1[1]}, [%0]                \n"
-  : "+r"(dst_ptr),          // %0
-    "+r"(src_ptr),          // %1
-    "+r"(src_stride),       // %2
-    "+r"(dst_width),        // %3
-    "+r"(source_y_fraction) // %4
-  :
-  : "q0", "q1", "d4", "d5", "q13", "q14", "memory", "cc"
-  );
-}
 #endif  // __ARM_NEON__
 
 #ifdef __cplusplus
diff --git a/unit_test/convert_test.cc b/unit_test/convert_test.cc
index 4672443b1..7e96c63a4 100644
--- a/unit_test/convert_test.cc
+++ b/unit_test/convert_test.cc
@@ -458,17 +458,15 @@ TEST_F(libyuvTest, FMT_PLANAR##To##FMT_B##N) {                                 \
 
 #define TESTPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,   \
                       DIFF, FMT_C, BPP_C)                                      \
+    TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
+                   benchmark_width_ - 4, DIFF, _Any, +, 0, FMT_C, BPP_C)       \
+    TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
+                   benchmark_width_, DIFF, _Unaligned, +, 1, FMT_C, BPP_C)     \
+    TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
+                   benchmark_width_, DIFF, _Invert, -, 0, FMT_C, BPP_C)        \
     TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
                    benchmark_width_, DIFF, _Opt, +, 0, FMT_C, BPP_C)
 
-//            TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
-//                           benchmark_width_ - 4, DIFF, _Any, +, 0, FMT_C, BPP_C)       \
-//            TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
-//                           benchmark_width_, DIFF, _Unaligned, +, 1, FMT_C, BPP_C)     \
-//            TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,      \
-//                           benchmark_width_, DIFF, _Invert, -, 0, FMT_C, BPP_C)        \
-
-
 TESTPLANARTOB(I420, 2, 2, ARGB, 4, 4, 2, ARGB, 4)
 TESTPLANARTOB(I420, 2, 2, BGRA, 4, 4, 2, ARGB, 4)
 TESTPLANARTOB(I420, 2, 2, ABGR, 4, 4, 2, ARGB, 4)
diff --git a/util/psnr_main.cc b/util/psnr_main.cc
index e9a9aa2b1..314ec54f6 100644
--- a/util/psnr_main.cc
+++ b/util/psnr_main.cc
@@ -172,7 +172,7 @@ void ParseOptions(int argc, const char* argv[]) {
     fprintf(stderr, "Number of frames incorrect\n");
     PrintHelp(argv[0]);
   }
-  if (image_width <= 0 || image_height <=0) {
+  if (image_width <= 0 || image_height <= 0) {
     int org_width, org_height;
     int rec_width, rec_height;
     bool org_res_avail = ExtractResolutionFromFilename(argv[fileindex_org],
@@ -307,7 +307,7 @@ int main(int argc, const char* argv[]) {
   }
 
   const int y_size = image_width * image_height;
-  const int uv_size = (image_width >> 1) * (image_height >> 1);
+  const int uv_size = (image_width + 1) / 2 * (image_height + 1) / 2;
   const size_t total_size = y_size + 2 * uv_size;    // NOLINT
 #if defined(_MSC_VER)
   _fseeki64(file_org,