diff --git a/source/convert_from_argb.cc b/source/convert_from_argb.cc
index 57cda4c85..48a645666 100644
--- a/source/convert_from_argb.cc
+++ b/source/convert_from_argb.cc
@@ -1061,7 +1061,7 @@ int ARGBToJ400(const uint8* src_argb, int src_stride_argb,
   // Coalesce contiguous rows.
   if (src_stride_argb == width * 4 &&
       dst_stride_yj == width) {
-    return ARGBToI400(src_argb, 0,
+    return ARGBToJ400(src_argb, 0,
                       dst_yj, 0,
                       width * height, 1);
   }
diff --git a/source/row_common.cc b/source/row_common.cc
index f06847000..538b04db9 100644
--- a/source/row_common.cc
+++ b/source/row_common.cc
@@ -256,8 +256,23 @@ MAKEROWY(RGB24, 2, 1, 0, 3)
 MAKEROWY(RAW, 0, 1, 2, 3)
 #undef MAKEROWY
 
+// BT.601 mpeg range
+// b 0.1016 * 255 = 25.908 = 25
+// g 0.5078 * 255 = 129.489 = 129
+// r 0.2578 * 255 = 65.739 = 66
+// = 0.8672.  1/.8672 = 1.1531
+// BT.601 full range 8 bit (not used)
+// b 0.1016 * 1.1531 = 0.1172 * 255 = 29.886 = 30
+// g 0.5078 * 1.1531 = 0.5855 * 255 = 149.3025 = 149
+// r 0.2578 * 1.1531 = 0.2973 * 255 = 75.8115 = 76
+// 30 + 149 + 76 = 255
+// BT.601 full range 7 bit
+// b 0.1172 * 127 = 14.8844 = 15
+// g 0.5855 * 127 = 74.35855 = 74
+// r 0.2973 * 127 = 37.7571 = 38
+
 static __inline int RGBToYJ(uint8 r, uint8 g, uint8 b) {
-  return (66 * r + 129 * g +  25 * b + 0x0080) >> 8;
+  return (38 * r + 74 * g +  15 * b + 64) >> 7;
 }
 
 #define MAKEROWYJ(NAME, R, G, B, BPP) \
diff --git a/source/row_neon.cc b/source/row_neon.cc
index e8f14ec1b..a22908b26 100644
--- a/source/row_neon.cc
+++ b/source/row_neon.cc
@@ -1338,9 +1338,9 @@ void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) {
 
 void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) {
   asm volatile (
-    "vmov.u8    d24, #13                       \n"  // B * 0.1016 coefficient
-    "vmov.u8    d25, #65                       \n"  // G * 0.5078 coefficient
-    "vmov.u8    d26, #33                       \n"  // R * 0.2578 coefficient
+    "vmov.u8    d24, #15                       \n"  // B * 0.1172 coefficient
+    "vmov.u8    d25, #74                       \n"  // G * 0.5855 coefficient
+    "vmov.u8    d26, #38                       \n"  // R * 0.2973 coefficient
     ".p2align  2                               \n"
   "1:                                          \n"
     "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 8 ARGB pixels.
diff --git a/source/row_posix.cc b/source/row_posix.cc
index db2e5f5cc..4f722c726 100644
--- a/source/row_posix.cc
+++ b/source/row_posix.cc
@@ -35,6 +35,11 @@ CONST vec8 kARGBToY = {
   13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0
 };
 
+// JPeg full range.
+CONST vec8 kARGBToYJ = {
+  15, 74, 38, 0, 15, 74, 38, 0, 15, 74, 38, 0, 15, 74, 38, 0
+};
+
 CONST vec8 kARGBToU = {
   112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0
 };
@@ -86,6 +91,10 @@ CONST uvec8 kAddY16 = {
   16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u
 };
 
+CONST vec16 kAddYJ64 = {
+  64, 64, 64, 64, 64, 64, 64, 64
+};
+
 CONST uvec8 kAddUV128 = {
   128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u,
   128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u
@@ -645,6 +654,7 @@ void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
 void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
   asm volatile (
     "movdqa    %3,%%xmm4                       \n"
+    "movdqa    %4,%%xmm5                       \n"
     ".p2align  4                               \n"
   "1:                                          \n"
     "movdqa    (%0),%%xmm0                     \n"
@@ -658,6 +668,8 @@ void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
     "lea       0x40(%0),%0                     \n"
     "phaddw    %%xmm1,%%xmm0                   \n"
     "phaddw    %%xmm3,%%xmm2                   \n"
+    "paddw     %%xmm5,%%xmm0                   \n"
+    "paddw     %%xmm5,%%xmm2                   \n"
     "psrlw     $0x7,%%xmm0                     \n"
     "psrlw     $0x7,%%xmm2                     \n"
     "packuswb  %%xmm2,%%xmm0                   \n"
@@ -668,10 +680,11 @@ void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
   : "+r"(src_argb),  // %0
     "+r"(dst_y),     // %1
     "+r"(pix)        // %2
-  : "m"(kARGBToY)    // %3
+  : "m"(kARGBToYJ),  // %3
+    "m"(kAddYJ64)    // %4
   : "memory", "cc"
 #if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
 #endif
   );
 }
@@ -716,6 +729,7 @@ void ARGBToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
 void ARGBToYJRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
   asm volatile (
     "movdqa    %3,%%xmm4                       \n"
+    "movdqa    %4,%%xmm5                       \n"
     ".p2align  4                               \n"
   "1:                                          \n"
     "movdqu    (%0),%%xmm0                     \n"
@@ -729,6 +743,8 @@ void ARGBToYJRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
     "lea       0x40(%0),%0                     \n"
     "phaddw    %%xmm1,%%xmm0                   \n"
     "phaddw    %%xmm3,%%xmm2                   \n"
+    "paddw     %%xmm5,%%xmm0                   \n"
+    "paddw     %%xmm5,%%xmm2                   \n"
     "psrlw     $0x7,%%xmm0                     \n"
     "psrlw     $0x7,%%xmm2                     \n"
     "packuswb  %%xmm2,%%xmm0                   \n"
@@ -739,13 +755,15 @@ void ARGBToYJRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
   : "+r"(src_argb),  // %0
     "+r"(dst_y),     // %1
     "+r"(pix)        // %2
-  : "m"(kARGBToY)    // %3
+  : "m"(kARGBToYJ),  // %3
+    "m"(kAddYJ64)    // %4
   : "memory", "cc"
 #if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
 #endif
   );
 }
+
 // TODO(fbarchard): pass xmm constants to single block of assembly.
 // fpic on GCC 4.2 for OSX runs out of GPR registers. "m" effectively takes
 // 3 registers - ebx, ebp and eax. "m" can be passed with 3 normal registers,
diff --git a/source/row_win.cc b/source/row_win.cc
index 78ee99860..d56ffd717 100644
--- a/source/row_win.cc
+++ b/source/row_win.cc
@@ -25,6 +25,11 @@ static const vec8 kARGBToY = {
   13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0
 };
 
+// JPeg full range.
+static const vec8 kARGBToYJ = {
+  15, 74, 38, 0, 15, 74, 38, 0, 15, 74, 38, 0, 15, 74, 38, 0
+};
+
 static const lvec8 kARGBToY_AVX = {
   13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0,
   13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0
@@ -103,6 +108,10 @@ static const uvec8 kAddY16 = {
   16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u
 };
 
+static const vec16 kAddYJ64 = {
+  64, 64, 64, 64, 64, 64, 64, 64
+};
+
 static const ulvec8 kAddY16_AVX = {
   16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u,
   16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u,
@@ -671,7 +680,8 @@ void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
     mov        eax, [esp + 4]   /* src_argb */
     mov        edx, [esp + 8]   /* dst_y */
     mov        ecx, [esp + 12]  /* pix */
-    movdqa     xmm4, kARGBToY
+    movdqa     xmm4, kARGBToYJ
+    movdqa     xmm5, kAddYJ64
 
     align      16
  convertloop:
@@ -686,6 +696,8 @@ void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
     lea        eax, [eax + 64]
     phaddw     xmm0, xmm1
     phaddw     xmm2, xmm3
+    paddw      xmm0, xmm5
+    paddw      xmm2, xmm5
     psrlw      xmm0, 7
     psrlw      xmm2, 7
     packuswb   xmm0, xmm2
@@ -776,7 +788,8 @@ void ARGBToYJRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
     mov        eax, [esp + 4]   /* src_argb */
     mov        edx, [esp + 8]   /* dst_y */
     mov        ecx, [esp + 12]  /* pix */
-    movdqa     xmm4, kARGBToY
+    movdqa     xmm4, kARGBToYJ
+    movdqa     xmm5, kAddYJ64
 
     align      16
  convertloop:
@@ -791,6 +804,8 @@ void ARGBToYJRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
     lea        eax, [eax + 64]
     phaddw     xmm0, xmm1
     phaddw     xmm2, xmm3
+    paddw      xmm0, xmm5
+    paddw      xmm2, xmm5
     psrlw      xmm0, 7
     psrlw      xmm2, 7
     packuswb   xmm0, xmm2
diff --git a/unit_test/compare_test.cc b/unit_test/compare_test.cc
index d35ecb730..225e75736 100644
--- a/unit_test/compare_test.cc
+++ b/unit_test/compare_test.cc
@@ -203,7 +203,9 @@ TEST_F(libyuvTest, Psnr) {
                       kSrcWidth, kSrcHeight);
 
   EXPECT_GT(err, 4.0);
-  EXPECT_LT(err, 5.0);
+  if (kSrcWidth * kSrcHeight >= 256) {
+    EXPECT_LT(err, 5.0);
+  }
 
   srandom(time(NULL));
 
diff --git a/unit_test/convert_test.cc b/unit_test/convert_test.cc
index 5edcb78e0..2cfd20001 100644
--- a/unit_test/convert_test.cc
+++ b/unit_test/convert_test.cc
@@ -35,7 +35,7 @@ namespace libyuv {
 #define TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,           \
                        FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, OFF)   \
 TEST_F(libyuvTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) {                        \
-  const int kWidth = W1280;                                                    \
+  const int kWidth = W1280 > 1 ? W1280 : 1;                                    \
   const int kHeight = benchmark_height_;                                       \
   align_buffer_64(src_y, kWidth * kHeight + OFF);                              \
   align_buffer_64(src_u,                                                       \
@@ -170,7 +170,7 @@ TESTPLANARTOP(I444, 1, 1, I444, 1, 1)
 #define TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,          \
                        FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, OFF)   \
 TEST_F(libyuvTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) {                        \
-  const int kWidth = W1280;                                                    \
+  const int kWidth = W1280 > 1 ? W1280 : 1;                                    \
   const int kHeight = benchmark_height_;                                       \
   align_buffer_64(src_y, kWidth * kHeight + OFF);                              \
   align_buffer_64(src_u,                                                       \
@@ -273,7 +273,7 @@ TESTPLANARTOBP(I420, 2, 2, NV21, 2, 2)
 #define TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y,         \
                          FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, OFF) \
 TEST_F(libyuvTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) {                        \
-  const int kWidth = W1280;                                                    \
+  const int kWidth = W1280 > 1 ? W1280 : 1;                                    \
   const int kHeight = benchmark_height_;                                       \
   align_buffer_64(src_y, kWidth * kHeight + OFF);                              \
   align_buffer_64(src_uv, 2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X) *               \
@@ -389,7 +389,7 @@ TESTBIPLANARTOP(NV21, 2, 2, I420, 2, 2)
 #define TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,  \
                        W1280, DIFF, N, NEG, OFF, FMT_C, BPP_C)                 \
 TEST_F(libyuvTest, FMT_PLANAR##To##FMT_B##N) {                                 \
-  const int kWidth = W1280;                                                    \
+  const int kWidth = W1280 > 1 ? W1280 : 1;                                    \
   const int kHeight = benchmark_height_;                                       \
   const int kStrideB = ((kWidth * 8 * BPP_B + 7) / 8 + ALIGN - 1) /            \
       ALIGN * ALIGN;                                                           \
@@ -503,7 +503,7 @@ TESTPLANARTOB(I420, 2, 2, I400, 1, 1, 0, ARGB, 4)
 #define TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B,       \
                          W1280, DIFF, N, NEG, OFF)                             \
 TEST_F(libyuvTest, FMT_PLANAR##To##FMT_B##N) {                                 \
-  const int kWidth = W1280;                                                    \
+  const int kWidth = W1280 > 1 ? W1280 : 1;                                    \
   const int kHeight = benchmark_height_;                                       \
   const int kStrideB = kWidth * BPP_B;                                         \
   align_buffer_64(src_y, kWidth * kHeight + OFF);                              \
@@ -582,7 +582,7 @@ TESTBIPLANARTOB(NV21, 2, 2, RGB565, 2, 9)
 #define TESTATOPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,         \
                        W1280, DIFF, N, NEG, OFF)                               \
 TEST_F(libyuvTest, FMT_A##To##FMT_PLANAR##N) {                                 \
-  const int kWidth = W1280;                                                    \
+  const int kWidth = W1280 > 1 ? W1280 : 1;                                    \
   const int kHeight = benchmark_height_;                                       \
   const int kStride = (kWidth * 8 * BPP_A + 7) / 8;                            \
   align_buffer_64(src_argb, kStride * kHeight + OFF);                          \
@@ -712,7 +712,7 @@ TESTATOPLANAR(BayerGRBG, 1, I420, 2, 2, 4)
 #define TESTATOBIPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y,       \
                        W1280, N, NEG, OFF)                                     \
 TEST_F(libyuvTest, FMT_A##To##FMT_PLANAR##N) {                                 \
-  const int kWidth = W1280;                                                    \
+  const int kWidth = W1280 > 1 ? W1280 : 1;                                    \
   const int kHeight = benchmark_height_;                                       \
   const int kStride = (kWidth * 8 * BPP_A + 7) / 8;                            \
   align_buffer_64(src_argb, kStride * kHeight + OFF);                          \
@@ -789,7 +789,7 @@ TESTATOBIPLANAR(ARGB, 4, NV21, 2, 2)
                   FMT_B, BPP_B, STRIDE_B,                                      \
                   W1280, DIFF, N, NEG, OFF)                                    \
 TEST_F(libyuvTest, FMT_A##To##FMT_B##N) {                                      \
-  const int kWidth = W1280;                                                    \
+  const int kWidth = W1280 > 1 ? W1280 : 1;                                    \
   const int kHeight = benchmark_height_;                                       \
   const int kStrideA = (kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A;  \
   const int kStrideB = (kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B;  \
@@ -814,6 +814,7 @@ TEST_F(libyuvTest, FMT_A##To##FMT_B##N) {                                      \
   }                                                                            \
   int max_diff = 0;                                                            \
   for (int i = 0; i < kStrideB * kHeight; ++i) {                               \
+    EXPECT_NEAR(dst_argb_c[i], dst_argb_opt[i], DIFF);                         \
     int abs_diff =                                                             \
         abs(static_cast<int>(dst_argb_c[i]) -                                  \
             static_cast<int>(dst_argb_opt[i]));                                \
@@ -859,6 +860,7 @@ TEST_F(libyuvTest, FMT_A##To##FMT_B##_Random) {                                \
       int abs_diff =                                                           \
           abs(static_cast<int>(dst_argb_c[i]) -                                \
               static_cast<int>(dst_argb_opt[i]));                              \
+      EXPECT_NEAR(dst_argb_c[i], dst_argb_opt[i], DIFF);                       \
       if (abs_diff > max_diff) {                                               \
         max_diff = abs_diff;                                                   \
       }                                                                        \
@@ -903,7 +905,7 @@ TESTATOB(ARGB, 4, 4, 1, BayerGRBG, 1, 2, 2, 0)
 TESTATOB(ARGB, 4, 4, 1, YUY2, 2, 4, 1, 4)
 TESTATOB(ARGB, 4, 4, 1, UYVY, 2, 4, 1, 4)
 TESTATOB(ARGB, 4, 4, 1, I400, 1, 1, 1, 2)
-TESTATOB(ARGB, 4, 4, 1, J400, 1, 1, 1, 2)
+TESTATOB(ARGB, 4, 4, 1, J400, 1, 1, 1, 0)
 TESTATOB(BGRA, 4, 4, 1, ARGB, 4, 4, 1, 0)
 TESTATOB(ABGR, 4, 4, 1, ARGB, 4, 4, 1, 0)
 TESTATOB(RGBA, 4, 4, 1, ARGB, 4, 4, 1, 0)
diff --git a/unit_test/planar_test.cc b/unit_test/planar_test.cc
index 94284b66b..506d11a8c 100644
--- a/unit_test/planar_test.cc
+++ b/unit_test/planar_test.cc
@@ -107,6 +107,9 @@ TEST_F(libyuvTest, TestAttenuate) {
 
 static int TestAttenuateI(int width, int height, int benchmark_iterations,
                           int invert, int off) {
+  if (width < 1) {
+    width = 1;
+  }
   const int kBpp = 4;
   const int kStride = (width * kBpp + 15) & ~15;
   align_buffer_64(src_argb, kStride * height + off);
@@ -170,6 +173,9 @@ TEST_F(libyuvTest, ARGBAttenuate_Opt) {
 
 static int TestUnattenuateI(int width, int height, int benchmark_iterations,
                             int invert, int off) {
+  if (width < 1) {
+    width = 1;
+  }
   const int kBpp = 4;
   const int kStride = (width * kBpp + 15) & ~15;
   align_buffer_64(src_argb, kStride * height + off);
@@ -787,6 +793,9 @@ TESTINTERPOLATE(85)
 
 static int TestBlend(int width, int height, int benchmark_iterations,
                      int invert, int off) {
+  if (width < 1) {
+    width = 1;
+  }
   const int kBpp = 4;
   const int kStride = width * kBpp;
   align_buffer_64(src_argb_a, kStride * height + off);
@@ -1101,6 +1110,9 @@ TEST_F(libyuvTest, TestCopyPlane) {
 
 static int TestMultiply(int width, int height, int benchmark_iterations,
                         int invert, int off) {
+  if (width < 1) {
+    width = 1;
+  }
   const int kBpp = 4;
   const int kStride = (width * kBpp + 15) & ~15;
   align_buffer_64(src_argb_a, kStride * height + off);
@@ -1169,6 +1181,9 @@ TEST_F(libyuvTest, ARGBMultiply_Opt) {
 
 static int TestAdd(int width, int height, int benchmark_iterations,
                    int invert, int off) {
+  if (width < 1) {
+    width = 1;
+  }
   const int kBpp = 4;
   const int kStride = (width * kBpp + 15) & ~15;
   align_buffer_64(src_argb_a, kStride * height + off);
@@ -1237,6 +1252,9 @@ TEST_F(libyuvTest, ARGBAdd_Opt) {
 
 static int TestSubtract(int width, int height, int benchmark_iterations,
                         int invert, int off) {
+  if (width < 1) {
+    width = 1;
+  }
   const int kBpp = 4;
   const int kStride = (width * kBpp + 15) & ~15;
   align_buffer_64(src_argb_a, kStride * height + off);
@@ -1305,6 +1323,9 @@ TEST_F(libyuvTest, ARGBSubtract_Opt) {
 
 static int TestSobel(int width, int height, int benchmark_iterations,
                      int invert, int off) {
+  if (width < 1) {
+    width = 1;
+  }
   const int kBpp = 4;
   const int kStride = (width * kBpp + 15) & ~15;
   align_buffer_64(src_argb_a, kStride * height + off);
@@ -1368,6 +1389,9 @@ TEST_F(libyuvTest, ARGBSobel_Opt) {
 
 static int TestSobelXY(int width, int height, int benchmark_iterations,
                      int invert, int off) {
+  if (width < 1) {
+    width = 1;
+  }
   const int kBpp = 4;
   const int kStride = (width * kBpp + 15) & ~15;
   align_buffer_64(src_argb_a, kStride * height + off);