From 278d88f872154b32bc93363d90ea605955dbad3d Mon Sep 17 00:00:00 2001 From: Frank Barchard Date: Thu, 13 Aug 2015 15:05:14 -0700 Subject: [PATCH] Copy Alpha odd width support R=harryjin@google.com BUG=none Review URL: https://webrtc-codereview.appspot.com/59369004. --- include/libyuv/row.h | 8 ++++++++ source/convert_argb.cc | 14 ++++++++++---- source/planar_functions.cc | 28 ++++++++++++++++++++-------- source/row_any.cc | 12 ++++++++++++ 4 files changed, 50 insertions(+), 12 deletions(-) diff --git a/include/libyuv/row.h b/include/libyuv/row.h index ebae3e719..2285c3416 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -816,10 +816,18 @@ void CopyRow_16_C(const uint16* src, uint16* dst, int count); void ARGBCopyAlphaRow_C(const uint8* src_argb, uint8* dst_argb, int width); void ARGBCopyAlphaRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width); void ARGBCopyAlphaRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width); +void ARGBCopyAlphaRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb, + int width); +void ARGBCopyAlphaRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb, + int width); void ARGBCopyYToAlphaRow_C(const uint8* src_y, uint8* dst_argb, int width); void ARGBCopyYToAlphaRow_SSE2(const uint8* src_y, uint8* dst_argb, int width); void ARGBCopyYToAlphaRow_AVX2(const uint8* src_y, uint8* dst_argb, int width); +void ARGBCopyYToAlphaRow_Any_SSE2(const uint8* src_y, uint8* dst_argb, + int width); +void ARGBCopyYToAlphaRow_Any_AVX2(const uint8* src_y, uint8* dst_argb, + int width); void SetRow_C(uint8* dst, uint8 v8, int count); void SetRow_X86(uint8* dst, uint8 v8, int count); diff --git a/source/convert_argb.cc b/source/convert_argb.cc index 87434b172..1f8ee632c 100644 --- a/source/convert_argb.cc +++ b/source/convert_argb.cc @@ -319,13 +319,19 @@ int I420AlphaToARGB(const uint8* src_y, int src_stride_y, } #endif #if defined(HAS_ARGBCOPYYTOALPHAROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 8)) { - ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_SSE2; + if (TestCpuFlag(kCpuHasSSE2)) { + ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_Any_SSE2; + if (IS_ALIGNED(width, 8)) { + ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_SSE2; + } } #endif #if defined(HAS_ARGBCOPYYTOALPHAROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 16)) { - ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_AVX2; + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_Any_AVX2; + if (IS_ALIGNED(width, 16)) { + ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_AVX2; + } } #endif #if defined(HAS_ARGBATTENUATEROW_SSE2) diff --git a/source/planar_functions.cc b/source/planar_functions.cc index b96bd5020..3cca5f48b 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -2280,13 +2280,19 @@ int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb, src_stride_argb = dst_stride_argb = 0; } #if defined(HAS_ARGBCOPYALPHAROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 8)) { - ARGBCopyAlphaRow = ARGBCopyAlphaRow_SSE2; + if (TestCpuFlag(kCpuHasSSE2)) { + ARGBCopyAlphaRow = ARGBCopyAlphaRow_Any_SSE2; + if (IS_ALIGNED(width, 8)) { + ARGBCopyAlphaRow = ARGBCopyAlphaRow_SSE2; + } } #endif #if defined(HAS_ARGBCOPYALPHAROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 16)) { - ARGBCopyAlphaRow = ARGBCopyAlphaRow_AVX2; + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBCopyAlphaRow = ARGBCopyAlphaRow_Any_AVX2; + if (IS_ALIGNED(width, 16)) { + ARGBCopyAlphaRow = ARGBCopyAlphaRow_AVX2; + } } #endif @@ -2323,13 +2329,19 @@ int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y, src_stride_y = dst_stride_argb = 0; } #if defined(HAS_ARGBCOPYYTOALPHAROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 8)) { - ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_SSE2; + if (TestCpuFlag(kCpuHasSSE2)) { + ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_Any_SSE2; + if (IS_ALIGNED(width, 8)) { + ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_SSE2; + } } #endif #if defined(HAS_ARGBCOPYYTOALPHAROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 16)) { - ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_AVX2; + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_Any_AVX2; + if (IS_ALIGNED(width, 16)) { + ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_AVX2; + } } #endif diff --git a/source/row_any.cc b/source/row_any.cc index 1cb1f6b93..2ef5359bd 100644 --- a/source/row_any.cc +++ b/source/row_any.cc @@ -245,6 +245,18 @@ ANY11(CopyRow_Any_SSE2, CopyRow_SSE2, 0, 1, 1, 31) #ifdef HAS_COPYROW_NEON ANY11(CopyRow_Any_NEON, CopyRow_NEON, 0, 1, 1, 31) #endif +#ifdef HAS_ARGBCOPYALPHAROW_AVX2 +ANY11(ARGBCopyAlphaRow_Any_AVX2, ARGBCopyAlphaRow_AVX2, 0, 1, 4, 15) +#endif +#ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2 +ANY11(ARGBCopyAlphaRow_Any_SSE2, ARGBCopyAlphaRow_SSE2, 0, 1, 4, 7) +#endif +#ifdef HAS_ARGBCOPYYTOALPHAROW_AVX2 +ANY11(ARGBCopyYToAlphaRow_Any_AVX2, ARGBCopyYToAlphaRow_AVX2, 0, 1, 4, 15) +#endif +#ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2 +ANY11(ARGBCopyYToAlphaRow_Any_SSE2, ARGBCopyYToAlphaRow_SSE2, 0, 1, 4, 7) +#endif #if defined(HAS_ARGBTORGB24ROW_SSSE3) ANY11(ARGBToRGB24Row_Any_SSSE3, ARGBToRGB24Row_SSSE3, 0, 4, 3, 15) ANY11(ARGBToRAWRow_Any_SSSE3, ARGBToRAWRow_SSSE3, 0, 4, 3, 15)