mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
Enhanced Rep Mov String version of CopyRow for posix and use cpu detect for ERMS
BUG=213 TEST=none Review URL: https://webrtc-codereview.appspot.com/1306008 git-svn-id: http://libyuv.googlecode.com/svn/trunk@658 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
15c7b2fffc
commit
aa7988ff73
@ -61,6 +61,7 @@ extern "C" {
|
|||||||
#define HAS_BGRATOYROW_SSSE3
|
#define HAS_BGRATOYROW_SSSE3
|
||||||
#define HAS_COPYROW_SSE2
|
#define HAS_COPYROW_SSE2
|
||||||
#define HAS_COPYROW_X86
|
#define HAS_COPYROW_X86
|
||||||
|
#define HAS_COPYROW_ERMS
|
||||||
#define HAS_HALFROW_SSE2
|
#define HAS_HALFROW_SSE2
|
||||||
#define HAS_I400TOARGBROW_SSE2
|
#define HAS_I400TOARGBROW_SSE2
|
||||||
#define HAS_I411TOARGBROW_SSSE3
|
#define HAS_I411TOARGBROW_SSSE3
|
||||||
@ -130,7 +131,6 @@ extern "C" {
|
|||||||
// TODO(fbarchard): Port to gcc.
|
// TODO(fbarchard): Port to gcc.
|
||||||
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
|
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
|
||||||
#define HAS_ARGBCOLORTABLEROW_X86
|
#define HAS_ARGBCOLORTABLEROW_X86
|
||||||
#define HAS_COPYROW_AVX2
|
|
||||||
// Visual C 2012 required for AVX2.
|
// Visual C 2012 required for AVX2.
|
||||||
#if _MSC_VER >= 1700
|
#if _MSC_VER >= 1700
|
||||||
// TODO(fbarchard): Hook these up to all functions. e.g. format conversion.
|
// TODO(fbarchard): Hook these up to all functions. e.g. format conversion.
|
||||||
@ -649,7 +649,7 @@ void MergeUVRow_Any_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
|
|||||||
int width);
|
int width);
|
||||||
|
|
||||||
void CopyRow_SSE2(const uint8* src, uint8* dst, int count);
|
void CopyRow_SSE2(const uint8* src, uint8* dst, int count);
|
||||||
void CopyRow_AVX2(const uint8* src, uint8* dst, int count);
|
void CopyRow_ERMS(const uint8* src, uint8* dst, int count);
|
||||||
void CopyRow_X86(const uint8* src, uint8* dst, int count);
|
void CopyRow_X86(const uint8* src, uint8* dst, int count);
|
||||||
void CopyRow_NEON(const uint8* src, uint8* dst, int count);
|
void CopyRow_NEON(const uint8* src, uint8* dst, int count);
|
||||||
void CopyRow_MIPS(const uint8* src, uint8* dst, int count);
|
void CopyRow_MIPS(const uint8* src, uint8* dst, int count);
|
||||||
|
|||||||
@ -308,10 +308,10 @@ static void CopyPlane2(const uint8* src, int src_stride_0, int src_stride_1,
|
|||||||
CopyRow = CopyRow_SSE2;
|
CopyRow = CopyRow_SSE2;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_COPYROW_AVX2)
|
#if defined(HAS_COPYROW_ERMS)
|
||||||
// TODO(fbarchard): Detect Fast String support.
|
// TODO(fbarchard): Detect Fast String support.
|
||||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
if (TestCpuFlag(kCpuHasERMS)) {
|
||||||
CopyRow = CopyRow_AVX2;
|
CopyRow = CopyRow_ERMS;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_COPYROW_NEON)
|
#if defined(HAS_COPYROW_NEON)
|
||||||
@ -539,9 +539,9 @@ int Q420ToI420(const uint8* src_y, int src_stride_y,
|
|||||||
CopyRow = CopyRow_SSE2;
|
CopyRow = CopyRow_SSE2;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_COPYROW_AVX2)
|
#if defined(HAS_COPYROW_ERMS)
|
||||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
if (TestCpuFlag(kCpuHasERMS)) {
|
||||||
CopyRow = CopyRow_AVX2;
|
CopyRow = CopyRow_ERMS;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_COPYROW_MIPS)
|
#if defined(HAS_COPYROW_MIPS)
|
||||||
|
|||||||
@ -64,9 +64,9 @@ int I420ToI422(const uint8* src_y, int src_stride_y,
|
|||||||
CopyRow = CopyRow_SSE2;
|
CopyRow = CopyRow_SSE2;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_COPYROW_AVX2)
|
#if defined(HAS_COPYROW_ERMS)
|
||||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
if (TestCpuFlag(kCpuHasERMS)) {
|
||||||
CopyRow = CopyRow_AVX2;
|
CopyRow = CopyRow_ERMS;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_COPYROW_NEON)
|
#if defined(HAS_COPYROW_NEON)
|
||||||
|
|||||||
@ -49,10 +49,9 @@ void CopyPlane(const uint8* src_y, int src_stride_y,
|
|||||||
CopyRow = CopyRow_SSE2;
|
CopyRow = CopyRow_SSE2;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_COPYROW_AVX2)
|
#if defined(HAS_COPYROW_ERMS)
|
||||||
// TODO(fbarchard): Detect Fast String support.
|
if (TestCpuFlag(kCpuHasERMS)) {
|
||||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
CopyRow = CopyRow_ERMS;
|
||||||
CopyRow = CopyRow_AVX2;
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_COPYROW_NEON)
|
#if defined(HAS_COPYROW_NEON)
|
||||||
|
|||||||
@ -911,10 +911,9 @@ void RotatePlane180(const uint8* src, int src_stride,
|
|||||||
CopyRow = CopyRow_SSE2;
|
CopyRow = CopyRow_SSE2;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_COPYROW_AVX2)
|
#if defined(HAS_COPYROW_ERMS)
|
||||||
// TODO(fbarchard): Detect Fast String support.
|
if (TestCpuFlag(kCpuHasERMS)) {
|
||||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
CopyRow = CopyRow_ERMS;
|
||||||
CopyRow = CopyRow_AVX2;
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_COPYROW_MIPS)
|
#if defined(HAS_COPYROW_MIPS)
|
||||||
|
|||||||
@ -128,10 +128,9 @@ void ARGBRotate180(const uint8* src, int src_stride,
|
|||||||
CopyRow = CopyRow_SSE2;
|
CopyRow = CopyRow_SSE2;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_COPYROW_AVX2)
|
#if defined(HAS_COPYROW_ERMS)
|
||||||
// TODO(fbarchard): Detect Fast String support.
|
if (TestCpuFlag(kCpuHasERMS)) {
|
||||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
CopyRow = CopyRow_ERMS;
|
||||||
CopyRow = CopyRow_AVX2;
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_COPYROW_MIPS)
|
#if defined(HAS_COPYROW_MIPS)
|
||||||
|
|||||||
@ -3027,6 +3027,19 @@ void CopyRow_X86(const uint8* src, uint8* dst, int width) {
|
|||||||
}
|
}
|
||||||
#endif // HAS_COPYROW_X86
|
#endif // HAS_COPYROW_X86
|
||||||
|
|
||||||
|
// Unaligned Multiple of 1.
|
||||||
|
void CopyRow_ERMS(const uint8* src, uint8* dst, int width) {
|
||||||
|
size_t width_tmp = static_cast<size_t>(width);
|
||||||
|
asm volatile (
|
||||||
|
"rep movsb \n"
|
||||||
|
: "+S"(src), // %0
|
||||||
|
"+D"(dst), // %1
|
||||||
|
"+c"(width_tmp) // %2
|
||||||
|
:
|
||||||
|
: "memory", "cc"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef HAS_SETROW_X86
|
#ifdef HAS_SETROW_X86
|
||||||
void SetRow_X86(uint8* dst, uint32 v32, int width) {
|
void SetRow_X86(uint8* dst, uint32 v32, int width) {
|
||||||
size_t width_tmp = static_cast<size_t>(width);
|
size_t width_tmp = static_cast<size_t>(width);
|
||||||
|
|||||||
@ -3497,10 +3497,9 @@ void CopyRow_SSE2(const uint8* src, uint8* dst, int count) {
|
|||||||
}
|
}
|
||||||
#endif // HAS_COPYROW_SSE2
|
#endif // HAS_COPYROW_SSE2
|
||||||
|
|
||||||
#ifdef HAS_COPYROW_AVX2
|
|
||||||
// Unaligned Multiple of 1.
|
// Unaligned Multiple of 1.
|
||||||
__declspec(naked) __declspec(align(16))
|
__declspec(naked) __declspec(align(16))
|
||||||
void CopyRow_AVX2(const uint8* src, uint8* dst, int count) {
|
void CopyRow_ERMS(const uint8* src, uint8* dst, int count) {
|
||||||
__asm {
|
__asm {
|
||||||
mov eax, esi
|
mov eax, esi
|
||||||
mov edx, edi
|
mov edx, edi
|
||||||
@ -3513,7 +3512,6 @@ void CopyRow_AVX2(const uint8* src, uint8* dst, int count) {
|
|||||||
ret
|
ret
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif // HAS_COPYROW_AVX2
|
|
||||||
|
|
||||||
#ifdef HAS_COPYROW_X86
|
#ifdef HAS_COPYROW_X86
|
||||||
__declspec(naked) __declspec(align(16))
|
__declspec(naked) __declspec(align(16))
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user