From 304a611d800dbe5c26bad37733a479e00c8e5138 Mon Sep 17 00:00:00 2001 From: "fbarchard@google.com" Date: Thu, 7 Mar 2013 21:48:02 +0000 Subject: [PATCH] Unaligned version of ABGRToARGB for SSSE3 BUG=196 TESTED=libyuv_unittest --gtest_filter=*ABGRToARGB* Review URL: https://webrtc-codereview.appspot.com/1166006 git-svn-id: http://libyuv.googlecode.com/svn/trunk@595 16f28f9a-4ce2-e073-06de-1de4eb20be90 --- README.chromium | 2 +- include/libyuv/row.h | 2 ++ include/libyuv/version.h | 2 +- source/convert_argb.cc | 12 ++++++++---- source/row_win.cc | 22 ++++++++++++++++++++++ 5 files changed, 34 insertions(+), 6 deletions(-) diff --git a/README.chromium b/README.chromium index 8f1dccf34..9fc6df6c6 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 594 +Version: 595 License: BSD License File: LICENSE diff --git a/include/libyuv/row.h b/include/libyuv/row.h index 119c2aa22..5bf7b6481 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -635,6 +635,8 @@ void ARGBSetRows_C(uint8* dst, uint32 v32, int width, int dst_stride, void BGRAToARGBRow_SSSE3(const uint8* src_bgra, uint8* dst_argb, int pix); void ABGRToARGBRow_SSSE3(const uint8* src_abgr, uint8* dst_argb, int pix); +void ABGRToARGBRow_Unaligned_SSSE3(const uint8* src_abgr, uint8* dst_argb, + int pix); void RGBAToARGBRow_SSSE3(const uint8* src_rgba, uint8* dst_argb, int pix); void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix); void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, int pix); diff --git a/include/libyuv/version.h b/include/libyuv/version.h index a3c224299..710679854 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 594 +#define LIBYUV_VERSION 595 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/convert_argb.cc b/source/convert_argb.cc index 7db0be32c..576c136bc 100644 --- a/source/convert_argb.cc +++ b/source/convert_argb.cc @@ -350,10 +350,14 @@ int ABGRToARGB(const uint8* src_abgr, int src_stride_abgr, void (*ABGRToARGBRow)(const uint8* src_abgr, uint8* dst_argb, int pix) = ABGRToARGBRow_C; #if defined(HAS_ABGRTOARGBROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4) && - IS_ALIGNED(src_abgr, 16) && IS_ALIGNED(src_stride_abgr, 16) && - IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { - ABGRToARGBRow = ABGRToARGBRow_SSSE3; + if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4)) { + // TODO(fbarchard): Port to posix. +#if defined(_M_IX86) + ABGRToARGBRow = ABGRToARGBRow_Unaligned_SSSE3; +#endif + if (IS_ALIGNED(src_abgr, 16) && IS_ALIGNED(src_stride_abgr, 16) && + IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) + ABGRToARGBRow = ABGRToARGBRow_SSSE3; } #elif defined(HAS_ABGRTOARGBROW_NEON) if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { diff --git a/source/row_win.cc b/source/row_win.cc index 07f1f035c..902deebe2 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -251,6 +251,28 @@ void BGRAToARGBRow_SSSE3(const uint8* src_bgra, uint8* dst_argb, int pix) { } } +__declspec(naked) __declspec(align(16)) +void ABGRToARGBRow_Unaligned_SSSE3(const uint8* src_abgr, uint8* dst_argb, + int pix) { +__asm { + mov eax, [esp + 4] // src_abgr + mov edx, [esp + 8] // dst_argb + mov ecx, [esp + 12] // pix + movdqa xmm5, kShuffleMaskABGRToARGB + sub edx, eax + + align 16 + convertloop: + movdqu xmm0, [eax] + pshufb xmm0, xmm5 + sub ecx, 4 + movdqu [eax + edx], xmm0 + lea eax, [eax + 16] + jg convertloop + ret + } +} + __declspec(naked) __declspec(align(16)) void ABGRToARGBRow_SSSE3(const uint8* src_abgr, uint8* dst_argb, int pix) { __asm {