diff --git a/include/libyuv/row.h b/include/libyuv/row.h index 2b1eb462a..62a9f5d2a 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -61,6 +61,7 @@ extern "C" { #define HAS_BGRATOYROW_SSSE3 #define HAS_COPYROW_SSE2 #define HAS_COPYROW_X86 +#define HAS_COPYROW_ERMS #define HAS_HALFROW_SSE2 #define HAS_I400TOARGBROW_SSE2 #define HAS_I411TOARGBROW_SSSE3 @@ -130,7 +131,6 @@ extern "C" { // TODO(fbarchard): Port to gcc. #if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) #define HAS_ARGBCOLORTABLEROW_X86 -#define HAS_COPYROW_AVX2 // Visual C 2012 required for AVX2. #if _MSC_VER >= 1700 // TODO(fbarchard): Hook these up to all functions. e.g. format conversion. @@ -649,7 +649,7 @@ void MergeUVRow_Any_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv, int width); void CopyRow_SSE2(const uint8* src, uint8* dst, int count); -void CopyRow_AVX2(const uint8* src, uint8* dst, int count); +void CopyRow_ERMS(const uint8* src, uint8* dst, int count); void CopyRow_X86(const uint8* src, uint8* dst, int count); void CopyRow_NEON(const uint8* src, uint8* dst, int count); void CopyRow_MIPS(const uint8* src, uint8* dst, int count); diff --git a/source/convert.cc b/source/convert.cc index 8d4551d4e..1e066d5ec 100644 --- a/source/convert.cc +++ b/source/convert.cc @@ -308,10 +308,10 @@ static void CopyPlane2(const uint8* src, int src_stride_0, int src_stride_1, CopyRow = CopyRow_SSE2; } #endif -#if defined(HAS_COPYROW_AVX2) +#if defined(HAS_COPYROW_ERMS) // TODO(fbarchard): Detect Fast String support. - if (TestCpuFlag(kCpuHasAVX2)) { - CopyRow = CopyRow_AVX2; + if (TestCpuFlag(kCpuHasERMS)) { + CopyRow = CopyRow_ERMS; } #endif #if defined(HAS_COPYROW_NEON) @@ -539,9 +539,9 @@ int Q420ToI420(const uint8* src_y, int src_stride_y, CopyRow = CopyRow_SSE2; } #endif -#if defined(HAS_COPYROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - CopyRow = CopyRow_AVX2; +#if defined(HAS_COPYROW_ERMS) + if (TestCpuFlag(kCpuHasERMS)) { + CopyRow = CopyRow_ERMS; } #endif #if defined(HAS_COPYROW_MIPS) diff --git a/source/convert_from.cc b/source/convert_from.cc index 93f8bfd86..bb7c35b6f 100644 --- a/source/convert_from.cc +++ b/source/convert_from.cc @@ -64,9 +64,9 @@ int I420ToI422(const uint8* src_y, int src_stride_y, CopyRow = CopyRow_SSE2; } #endif -#if defined(HAS_COPYROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - CopyRow = CopyRow_AVX2; +#if defined(HAS_COPYROW_ERMS) + if (TestCpuFlag(kCpuHasERMS)) { + CopyRow = CopyRow_ERMS; } #endif #if defined(HAS_COPYROW_NEON) diff --git a/source/planar_functions.cc b/source/planar_functions.cc index 72dff8b65..4642e5ca9 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -49,10 +49,9 @@ void CopyPlane(const uint8* src_y, int src_stride_y, CopyRow = CopyRow_SSE2; } #endif -#if defined(HAS_COPYROW_AVX2) - // TODO(fbarchard): Detect Fast String support. - if (TestCpuFlag(kCpuHasAVX2)) { - CopyRow = CopyRow_AVX2; +#if defined(HAS_COPYROW_ERMS) + if (TestCpuFlag(kCpuHasERMS)) { + CopyRow = CopyRow_ERMS; } #endif #if defined(HAS_COPYROW_NEON) diff --git a/source/rotate.cc b/source/rotate.cc index 682737224..8a4ede9f2 100644 --- a/source/rotate.cc +++ b/source/rotate.cc @@ -911,10 +911,9 @@ void RotatePlane180(const uint8* src, int src_stride, CopyRow = CopyRow_SSE2; } #endif -#if defined(HAS_COPYROW_AVX2) - // TODO(fbarchard): Detect Fast String support. - if (TestCpuFlag(kCpuHasAVX2)) { - CopyRow = CopyRow_AVX2; +#if defined(HAS_COPYROW_ERMS) + if (TestCpuFlag(kCpuHasERMS)) { + CopyRow = CopyRow_ERMS; } #endif #if defined(HAS_COPYROW_MIPS) diff --git a/source/rotate_argb.cc b/source/rotate_argb.cc index 38536f05c..0a85d38e8 100644 --- a/source/rotate_argb.cc +++ b/source/rotate_argb.cc @@ -128,10 +128,9 @@ void ARGBRotate180(const uint8* src, int src_stride, CopyRow = CopyRow_SSE2; } #endif -#if defined(HAS_COPYROW_AVX2) - // TODO(fbarchard): Detect Fast String support. - if (TestCpuFlag(kCpuHasAVX2)) { - CopyRow = CopyRow_AVX2; +#if defined(HAS_COPYROW_ERMS) + if (TestCpuFlag(kCpuHasERMS)) { + CopyRow = CopyRow_ERMS; } #endif #if defined(HAS_COPYROW_MIPS) diff --git a/source/row_posix.cc b/source/row_posix.cc index 42dd798e2..76d1b4af5 100644 --- a/source/row_posix.cc +++ b/source/row_posix.cc @@ -3027,6 +3027,19 @@ void CopyRow_X86(const uint8* src, uint8* dst, int width) { } #endif // HAS_COPYROW_X86 +// Unaligned Multiple of 1. +void CopyRow_ERMS(const uint8* src, uint8* dst, int width) { + size_t width_tmp = static_cast(width); + asm volatile ( + "rep movsb \n" + : "+S"(src), // %0 + "+D"(dst), // %1 + "+c"(width_tmp) // %2 + : + : "memory", "cc" + ); +} + #ifdef HAS_SETROW_X86 void SetRow_X86(uint8* dst, uint32 v32, int width) { size_t width_tmp = static_cast(width); diff --git a/source/row_win.cc b/source/row_win.cc index 3ec2ed472..5a1ec83e7 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -3497,10 +3497,9 @@ void CopyRow_SSE2(const uint8* src, uint8* dst, int count) { } #endif // HAS_COPYROW_SSE2 -#ifdef HAS_COPYROW_AVX2 // Unaligned Multiple of 1. __declspec(naked) __declspec(align(16)) -void CopyRow_AVX2(const uint8* src, uint8* dst, int count) { +void CopyRow_ERMS(const uint8* src, uint8* dst, int count) { __asm { mov eax, esi mov edx, edi @@ -3513,7 +3512,6 @@ void CopyRow_AVX2(const uint8* src, uint8* dst, int count) { ret } } -#endif // HAS_COPYROW_AVX2 #ifdef HAS_COPYROW_X86 __declspec(naked) __declspec(align(16))