diff --git a/README.chromium b/README.chromium
index b3da154c1..839f3b0f3 100644
--- a/README.chromium
+++ b/README.chromium
@@ -1,6 +1,6 @@
 Name: libyuv
 URL: http://code.google.com/p/libyuv/
-Version: 398
+Version: 399
 License: BSD
 License File: LICENSE
 
diff --git a/include/libyuv/row.h b/include/libyuv/row.h
index 4814f2544..ebbc4572c 100644
--- a/include/libyuv/row.h
+++ b/include/libyuv/row.h
@@ -100,6 +100,8 @@ extern "C" {
 #define HAS_RGBATOARGBROW_SSSE3
 #define HAS_RGBATOUVROW_SSSE3
 #define HAS_RGBATOYROW_SSSE3
+#define HAS_I422TORGB24ROW_SSSE3
+#define HAS_I422TORAWROW_SSSE3
 #endif
 
 // The following are disabled when SSSE3 is available:
@@ -436,6 +438,19 @@ void I422ToRGBARow_SSSE3(const uint8* y_buf,
                          uint8* rgba_buf,
                          int width);
 
+// RGB24/RAW are unaligned.
+void I422ToRGB24Row_SSSE3(const uint8* y_buf,
+                          const uint8* u_buf,
+                          const uint8* v_buf,
+                          uint8* rgb_buf,
+                          int width);
+
+void I422ToRAWRow_SSSE3(const uint8* y_buf,
+                        const uint8* u_buf,
+                        const uint8* v_buf,
+                        uint8* rgb_buf,
+                        int width);
+
 void I444ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
                                    const uint8* u_buf,
                                    const uint8* v_buf,
@@ -528,6 +543,19 @@ void I422ToRGBARow_Any_SSSE3(const uint8* y_buf,
                              uint8* rgba_buf,
                              int width);
 
+// RGB24/RAW are unaligned.
+void I422ToRGB24Row_Any_SSSE3(const uint8* y_buf,
+                              const uint8* u_buf,
+                              const uint8* v_buf,
+                              uint8* rgb_buf,
+                              int width);
+
+void I422ToRAWRow_Any_SSSE3(const uint8* y_buf,
+                            const uint8* u_buf,
+                            const uint8* v_buf,
+                            uint8* rgb_buf,
+                            int width);
+
 void YToARGBRow_SSE2(const uint8* y_buf,
                      uint8* argb_buf,
                      int width);
diff --git a/include/libyuv/version.h b/include/libyuv/version.h
index b12c2e7ac..d761e3295 100644
--- a/include/libyuv/version.h
+++ b/include/libyuv/version.h
@@ -11,6 +11,6 @@
 #ifndef INCLUDE_LIBYUV_VERSION_H_  // NOLINT
 #define INCLUDE_LIBYUV_VERSION_H_
 
-#define LIBYUV_VERSION 398
+#define LIBYUV_VERSION 399
 
 #endif  // INCLUDE_LIBYUV_VERSION_H_  NOLINT
diff --git a/source/convert_from.cc b/source/convert_from.cc
index 4ea974acf..817dbd8b6 100644
--- a/source/convert_from.cc
+++ b/source/convert_from.cc
@@ -928,10 +928,7 @@ int I420ToRGB24(const uint8* src_y, int src_stride_y,
   if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
     I422ToRGB24Row = I422ToRGB24Row_Any_SSSE3;
     if (IS_ALIGNED(width, 8)) {
-      I422ToRGB24Row = I422ToRGB24Row_Unaligned_SSSE3;
-      if (IS_ALIGNED(dst_rgb24, 16) && IS_ALIGNED(dst_stride_rgb24, 16)) {
-        I422ToRGB24Row = I422ToRGB24Row_SSSE3;
-      }
+      I422ToRGB24Row = I422ToRGB24Row_SSSE3;
     }
   }
 #endif
@@ -982,10 +979,7 @@ int I420ToRAW(const uint8* src_y, int src_stride_y,
   if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
     I422ToRAWRow = I422ToRAWRow_Any_SSSE3;
     if (IS_ALIGNED(width, 8)) {
-      I422ToRAWRow = I422ToRAWRow_Unaligned_SSSE3;
-      if (IS_ALIGNED(dst_raw, 16) && IS_ALIGNED(dst_stride_raw, 16)) {
-        I422ToRAWRow = I422ToRAWRow_SSSE3;
-      }
+      I422ToRAWRow = I422ToRAWRow_SSSE3;
     }
   }
 #endif
diff --git a/source/row_common.cc b/source/row_common.cc
index c5f3ce050..e0e426cd8 100644
--- a/source/row_common.cc
+++ b/source/row_common.cc
@@ -1023,9 +1023,9 @@ YANY(I422ToBGRARow_Any_SSSE3, I422ToBGRARow_Unaligned_SSSE3, I422ToBGRARow_C, 1)
 YANY(I422ToABGRRow_Any_SSSE3, I422ToABGRRow_Unaligned_SSSE3, I422ToABGRRow_C, 1)
 #endif
 #ifdef HAS_I422TORGB24ROW_SSSE3
-YANY(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_Unaligned_SSSE3,                 \
-     I422ToRGB24Row_C, 1)
-YANY(I422ToRAWRow_Any_SSSE3, I422ToRAWRow_Unaligned_SSSE3, I422ToRAWRow_C, 1)
+// I422ToRGB24Row_SSSE3 is unaligned.
+YANY(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, I422ToRGB24Row_C, 1)
+YANY(I422ToRAWRow_Any_SSSE3, I422ToRAWRow_SSSE3, I422ToRAWRow_C, 1)
 #endif
 #ifdef HAS_I422TORGBAROW_SSSE3
 YANY(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_Unaligned_SSSE3, I422ToRGBARow_C, 1)
diff --git a/source/row_win.cc b/source/row_win.cc
index e3b01f27f..de70b9435 100644
--- a/source/row_win.cc
+++ b/source/row_win.cc
@@ -122,6 +122,16 @@ static const uvec8 kShuffleMaskARGBToRAW = {
   2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 8u, 14u, 13u, 12u, 128u, 128u, 128u, 128u
 };
 
+// Shuffle table for converting ARGBToRGB24 for I420ToRGB24.  First 8 + next 4
+static const uvec8 kShuffleMaskARGBToRGB24_0 = {
+  0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 128u, 128u, 128u, 128u, 10u, 12u, 13u, 14u
+};
+
+// Shuffle table for converting ARGB to RAW.
+static const uvec8 kShuffleMaskARGBToRAW_0 = {
+  2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 128u, 128u, 128u, 128u, 8u, 14u, 13u, 12u
+};
+
 __declspec(naked) __declspec(align(16))
 void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) {
   __asm {
@@ -1654,6 +1664,100 @@ void I444ToARGBRow_SSSE3(const uint8* y_buf,
   }
 }
 
+// 8 pixels, dest aligned 16.
+// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
+__declspec(naked) __declspec(align(16))
+void I422ToRGB24Row_SSSE3(const uint8* y_buf,
+                          const uint8* u_buf,
+                          const uint8* v_buf,
+                          uint8* rgb24_buf,
+                          int width) {
+  __asm {
+    push       esi
+    push       edi
+    mov        eax, [esp + 8 + 4]   // Y
+    mov        esi, [esp + 8 + 8]   // U
+    mov        edi, [esp + 8 + 12]  // V
+    mov        edx, [esp + 8 + 16]  // rgb24
+    mov        ecx, [esp + 8 + 20]  // width
+    sub        edi, esi
+    pxor       xmm4, xmm4
+    movdqa     xmm5, kShuffleMaskARGBToRGB24_0
+    movdqa     xmm6, kShuffleMaskARGBToRGB24
+
+    align      16
+ convertloop:
+    READYUV422
+    YUVTORGB
+
+    // Step 3: Weave into RRGB
+    punpcklbw  xmm0, xmm1           // BG
+    punpcklbw  xmm2, xmm2           // RR
+    movdqa     xmm1, xmm0
+    punpcklwd  xmm0, xmm2           // BGRR first 4 pixels
+    punpckhwd  xmm1, xmm2           // BGRR next 4 pixels
+    pshufb     xmm0, xmm5           // Pack into first 8 and last 4 bytes.
+    pshufb     xmm1, xmm6           // Pack into first 12 bytes.
+    palignr    xmm1, xmm0, 12       // last 4 bytes of xmm0 + 12 from xmm1
+    movq       qword ptr [edx], xmm0  // First 8 bytes
+    movdqu     [edx + 8], xmm1      // Last 16 bytes. = 24 bytes, 8 RGB pixels.
+    lea        edx,  [edx + 24]
+    sub        ecx, 8
+    jg         convertloop
+
+    pop        edi
+    pop        esi
+    ret
+  }
+}
+
+// 8 pixels, dest aligned 16.
+// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
+__declspec(naked) __declspec(align(16))
+void I422ToRAWRow_SSSE3(const uint8* y_buf,
+                        const uint8* u_buf,
+                        const uint8* v_buf,
+                        uint8* raw_buf,
+                        int width) {
+  __asm {
+    push       esi
+    push       edi
+    mov        eax, [esp + 8 + 4]   // Y
+    mov        esi, [esp + 8 + 8]   // U
+    mov        edi, [esp + 8 + 12]  // V
+    mov        edx, [esp + 8 + 16]  // raw
+    mov        ecx, [esp + 8 + 20]  // width
+    sub        edi, esi
+    pxor       xmm4, xmm4
+    movdqa     xmm5, kShuffleMaskARGBToRAW_0
+    movdqa     xmm6, kShuffleMaskARGBToRAW
+
+    align      16
+ convertloop:
+    READYUV422
+    YUVTORGB
+
+    // Step 3: Weave into RRGB
+    punpcklbw  xmm0, xmm1           // BG
+    punpcklbw  xmm2, xmm2           // RR
+    movdqa     xmm1, xmm0
+    punpcklwd  xmm0, xmm2           // BGRR first 4 pixels
+    punpckhwd  xmm1, xmm2           // BGRR next 4 pixels
+    pshufb     xmm0, xmm5           // Pack into first 8 and last 4 bytes.
+    pshufb     xmm1, xmm6           // Pack into first 12 bytes.
+    palignr    xmm1, xmm0, 12       // last 4 bytes of xmm0 + 12 from xmm1
+    movq       qword ptr [edx], xmm0  // First 8 bytes
+    movdqu     [edx + 8], xmm1      // Last 16 bytes. = 24 bytes, 8 RGB pixels.
+    lea        edx,  [edx + 24]
+    sub        ecx, 8
+    jg         convertloop
+
+    pop        edi
+    pop        esi
+    ret
+  }
+}
+
 // 8 pixels, dest aligned 16.
 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
 __declspec(naked) __declspec(align(16))