mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2026-02-15 22:59:53 +08:00
Mirror_AVX2 ported to GCC.
BUG=269 TESTED=try bots R=harryjin@google.com Review URL: https://webrtc-codereview.appspot.com/32079004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@1164 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
91000425a3
commit
35508d0979
@ -199,6 +199,7 @@ extern "C" {
|
|||||||
#define HAS_UYVYTOUVROW_AVX2
|
#define HAS_UYVYTOUVROW_AVX2
|
||||||
#define HAS_SPLITUVROW_AVX2
|
#define HAS_SPLITUVROW_AVX2
|
||||||
#define HAS_MERGEUVROW_AVX2
|
#define HAS_MERGEUVROW_AVX2
|
||||||
|
#define HAS_MIRRORROW_AVX2
|
||||||
|
|
||||||
// Effects:
|
// Effects:
|
||||||
#define HAS_ARGBADDROW_AVX2
|
#define HAS_ARGBADDROW_AVX2
|
||||||
@ -206,6 +207,7 @@ extern "C" {
|
|||||||
#define HAS_ARGBMULTIPLYROW_AVX2
|
#define HAS_ARGBMULTIPLYROW_AVX2
|
||||||
#define HAS_ARGBATTENUATEROW_AVX2
|
#define HAS_ARGBATTENUATEROW_AVX2
|
||||||
#define HAS_ARGBUNATTENUATEROW_AVX2
|
#define HAS_ARGBUNATTENUATEROW_AVX2
|
||||||
|
#define HAS_ARGBMIRRORROW_AVX2
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// The following are require VS2012.
|
// The following are require VS2012.
|
||||||
@ -218,8 +220,6 @@ extern "C" {
|
|||||||
#define HAS_I422TORGBAROW_AVX2
|
#define HAS_I422TORGBAROW_AVX2
|
||||||
#define HAS_I422TOABGRROW_AVX2
|
#define HAS_I422TOABGRROW_AVX2
|
||||||
#define HAS_INTERPOLATEROW_AVX2
|
#define HAS_INTERPOLATEROW_AVX2
|
||||||
#define HAS_MIRRORROW_AVX2
|
|
||||||
#define HAS_ARGBMIRRORROW_AVX2
|
|
||||||
#endif // defined(VISUALC_HAS_AVX2)
|
#endif // defined(VISUALC_HAS_AVX2)
|
||||||
|
|
||||||
// The following are Yasm x86 only:
|
// The following are Yasm x86 only:
|
||||||
|
|||||||
@ -2207,6 +2207,37 @@ void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) {
|
|||||||
}
|
}
|
||||||
#endif // HAS_MIRRORROW_SSSE3
|
#endif // HAS_MIRRORROW_SSSE3
|
||||||
|
|
||||||
|
#ifdef HAS_MIRRORROW_AVX2
|
||||||
|
void MirrorRow_AVX2(const uint8* src, uint8* dst, int width) {
|
||||||
|
intptr_t temp_width = (intptr_t)(width);
|
||||||
|
asm volatile (
|
||||||
|
"vbroadcastf128 %3,%%ymm5 \n"
|
||||||
|
"lea " MEMLEA(-0x20,0) ",%0 \n"
|
||||||
|
LABELALIGN
|
||||||
|
"1: \n"
|
||||||
|
MEMOPREG(vmovdqu,0x00,0,2,1,ymm0) // vmovdqu (%0,%2),%%ymm0
|
||||||
|
"vpshufb %%ymm5,%%ymm0,%%ymm0 \n"
|
||||||
|
"vpermq $0x4e,%%ymm0,%%ymm0 \n"
|
||||||
|
"sub $0x20,%2 \n"
|
||||||
|
"vmovdqu %%ymm0," MEMACCESS(1) " \n"
|
||||||
|
"lea " MEMLEA(0x20,1) ",%1 \n"
|
||||||
|
"jg 1b \n"
|
||||||
|
"vzeroupper \n"
|
||||||
|
: "+r"(src), // %0
|
||||||
|
"+r"(dst), // %1
|
||||||
|
"+r"(temp_width) // %2
|
||||||
|
: "m"(kShuffleMirror) // %3
|
||||||
|
: "memory", "cc"
|
||||||
|
#if defined(__native_client__) && defined(__x86_64__)
|
||||||
|
, "r14"
|
||||||
|
#endif
|
||||||
|
#if defined(__SSE2__)
|
||||||
|
, "xmm0", "xmm5"
|
||||||
|
#endif
|
||||||
|
);
|
||||||
|
}
|
||||||
|
#endif // HAS_MIRRORROW_AVX2
|
||||||
|
|
||||||
#ifdef HAS_MIRRORROW_SSE2
|
#ifdef HAS_MIRRORROW_SSE2
|
||||||
void MirrorRow_SSE2(const uint8* src, uint8* dst, int width) {
|
void MirrorRow_SSE2(const uint8* src, uint8* dst, int width) {
|
||||||
intptr_t temp_width = (intptr_t)(width);
|
intptr_t temp_width = (intptr_t)(width);
|
||||||
|
|||||||
@ -2398,6 +2398,7 @@ static const uvec8 kShuffleMirror = {
|
|||||||
15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u
|
15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// TODO(fbarchard): Replace lea with -16 offset.
|
||||||
__declspec(naked) __declspec(align(16))
|
__declspec(naked) __declspec(align(16))
|
||||||
void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) {
|
void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) {
|
||||||
__asm {
|
__asm {
|
||||||
@ -2421,18 +2422,13 @@ void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) {
|
|||||||
#endif // HAS_MIRRORROW_SSSE3
|
#endif // HAS_MIRRORROW_SSSE3
|
||||||
|
|
||||||
#ifdef HAS_MIRRORROW_AVX2
|
#ifdef HAS_MIRRORROW_AVX2
|
||||||
// Shuffle table for reversing the bytes.
|
|
||||||
static const uvec8 kShuffleMirror_AVX2 = {
|
|
||||||
15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u
|
|
||||||
};
|
|
||||||
|
|
||||||
__declspec(naked) __declspec(align(16))
|
__declspec(naked) __declspec(align(16))
|
||||||
void MirrorRow_AVX2(const uint8* src, uint8* dst, int width) {
|
void MirrorRow_AVX2(const uint8* src, uint8* dst, int width) {
|
||||||
__asm {
|
__asm {
|
||||||
mov eax, [esp + 4] // src
|
mov eax, [esp + 4] // src
|
||||||
mov edx, [esp + 8] // dst
|
mov edx, [esp + 8] // dst
|
||||||
mov ecx, [esp + 12] // width
|
mov ecx, [esp + 12] // width
|
||||||
vbroadcastf128 ymm5, kShuffleMirror_AVX2
|
vbroadcastf128 ymm5, kShuffleMirror
|
||||||
lea eax, [eax - 32]
|
lea eax, [eax - 32]
|
||||||
|
|
||||||
align 4
|
align 4
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user