mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 01:06:46 +08:00
fix for InterpolateRow_AVX2
port scaledownby4_avx2 to gcc TBR=harryjin@google.com BUG=libyuv:492 Review URL: https://codereview.chromium.org/1546763002 .
This commit is contained in:
parent
71deb7ba3a
commit
36615d62a0
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: http://code.google.com/p/libyuv/
|
URL: http://code.google.com/p/libyuv/
|
||||||
Version: 1560
|
Version: 1561
|
||||||
License: BSD
|
License: BSD
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
|
|
||||||
|
|||||||
@ -70,11 +70,6 @@ extern "C" {
|
|||||||
defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
|
defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
|
||||||
#define HAS_SCALEADDROW_AVX2
|
#define HAS_SCALEADDROW_AVX2
|
||||||
#define HAS_SCALEROWDOWN2_AVX2
|
#define HAS_SCALEROWDOWN2_AVX2
|
||||||
#endif
|
|
||||||
|
|
||||||
// The following are available for Visual C and clangcl 32 bit:
|
|
||||||
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \
|
|
||||||
(defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2))
|
|
||||||
#define HAS_SCALEROWDOWN4_AVX2
|
#define HAS_SCALEROWDOWN4_AVX2
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|||||||
@ -11,6 +11,6 @@
|
|||||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||||
#define INCLUDE_LIBYUV_VERSION_H_
|
#define INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
#define LIBYUV_VERSION 1560
|
#define LIBYUV_VERSION 1561
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||||
|
|||||||
@ -368,6 +368,93 @@ void ScaleRowDown4Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef HAS_SCALEROWDOWN4_AVX2
|
||||||
|
void ScaleRowDown4_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||||
|
uint8* dst_ptr, int dst_width) {
|
||||||
|
asm volatile (
|
||||||
|
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
|
||||||
|
"vpsrld $0x18,%%ymm5,%%ymm5 \n"
|
||||||
|
"vpslld $0x10,%%ymm5,%%ymm5 \n"
|
||||||
|
LABELALIGN
|
||||||
|
"1: \n"
|
||||||
|
"vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
|
||||||
|
"vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
|
||||||
|
"lea " MEMLEA(0x40,0) ",%0 \n"
|
||||||
|
"vpand %%ymm5,%%ymm0,%%ymm0 \n"
|
||||||
|
"vpand %%ymm5,%%ymm1,%%ymm1 \n"
|
||||||
|
"vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
|
||||||
|
"vpermq $0xd8,%%ymm0,%%ymm0 \n"
|
||||||
|
"vpsrlw $0x8,%%ymm0,%%ymm0 \n"
|
||||||
|
"vpackuswb %%ymm0,%%ymm0,%%ymm0 \n"
|
||||||
|
"vpermq $0xd8,%%ymm0,%%ymm0 \n"
|
||||||
|
"vmovdqu %%xmm0," MEMACCESS(1) " \n"
|
||||||
|
"lea " MEMLEA(0x10,1) ",%1 \n"
|
||||||
|
"sub $0x10,%2 \n"
|
||||||
|
"jg 1b \n"
|
||||||
|
"vzeroupper \n"
|
||||||
|
: "+r"(src_ptr), // %0
|
||||||
|
"+r"(dst_ptr), // %1
|
||||||
|
"+r"(dst_width) // %2
|
||||||
|
:: "memory", "cc", "xmm0", "xmm1", "xmm5"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ScaleRowDown4Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||||
|
uint8* dst_ptr, int dst_width) {
|
||||||
|
asm volatile (
|
||||||
|
"vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n"
|
||||||
|
"vpsrlw $0xf,%%ymm4,%%ymm4 \n"
|
||||||
|
"vpsllw $0x3,%%ymm4,%%ymm5 \n"
|
||||||
|
"vpackuswb %%ymm4,%%ymm4,%%ymm4 \n"
|
||||||
|
|
||||||
|
LABELALIGN
|
||||||
|
"1: \n"
|
||||||
|
"vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
|
||||||
|
"vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
|
||||||
|
MEMOPREG(vmovdqu,0x00,0,3,1,ymm2) // vmovdqu (%0,%3,1),%%ymm2
|
||||||
|
MEMOPREG(vmovdqu,0x20,0,3,1,ymm3) // vmovdqu 0x20(%0,%3,1),%%ymm3
|
||||||
|
"vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n"
|
||||||
|
"vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n"
|
||||||
|
"vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n"
|
||||||
|
"vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n"
|
||||||
|
"vpaddw %%ymm2,%%ymm0,%%ymm0 \n"
|
||||||
|
"vpaddw %%ymm3,%%ymm1,%%ymm1 \n"
|
||||||
|
MEMOPREG(vmovdqu,0x00,0,3,2,ymm2) // vmovdqu (%0,%3,2),%%ymm2
|
||||||
|
MEMOPREG(vmovdqu,0x20,0,3,2,ymm3) // vmovdqu 0x20(%0,%3,2),%%ymm3
|
||||||
|
"vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n"
|
||||||
|
"vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n"
|
||||||
|
"vpaddw %%ymm2,%%ymm0,%%ymm0 \n"
|
||||||
|
"vpaddw %%ymm3,%%ymm1,%%ymm1 \n"
|
||||||
|
MEMOPREG(vmovdqu,0x00,0,4,1,ymm2) // vmovdqu (%0,%4,1),%%ymm2
|
||||||
|
MEMOPREG(vmovdqu,0x20,0,4,1,ymm3) // vmovdqu 0x20(%0,%4,1),%%ymm3
|
||||||
|
"lea " MEMLEA(0x40,0) ",%0 \n"
|
||||||
|
"vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n"
|
||||||
|
"vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n"
|
||||||
|
"vpaddw %%ymm2,%%ymm0,%%ymm0 \n"
|
||||||
|
"vpaddw %%ymm3,%%ymm1,%%ymm1 \n"
|
||||||
|
"vphaddw %%ymm1,%%ymm0,%%ymm0 \n"
|
||||||
|
"vpermq $0xd8,%%ymm0,%%ymm0 \n"
|
||||||
|
"vpaddw %%ymm5,%%ymm0,%%ymm0 \n"
|
||||||
|
"vpsrlw $0x4,%%ymm0,%%ymm0 \n"
|
||||||
|
"vpackuswb %%ymm0,%%ymm0,%%ymm0 \n"
|
||||||
|
"vpermq $0xd8,%%ymm0,%%ymm0 \n"
|
||||||
|
"vmovdqu %%xmm0," MEMACCESS(1) " \n"
|
||||||
|
"lea " MEMLEA(0x10,1) ",%1 \n"
|
||||||
|
"sub $0x10,%2 \n"
|
||||||
|
"jg 1b \n"
|
||||||
|
"vzeroupper \n"
|
||||||
|
: "+r"(src_ptr), // %0
|
||||||
|
"+r"(dst_ptr), // %1
|
||||||
|
"+r"(dst_width) // %2
|
||||||
|
: "r"((intptr_t)(src_stride)), // %3
|
||||||
|
"r"((intptr_t)(src_stride * 3)) // %4
|
||||||
|
: "memory", "cc", NACL_R14
|
||||||
|
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
#endif // HAS_SCALEROWDOWN4_AVX2
|
||||||
|
|
||||||
void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||||
uint8* dst_ptr, int dst_width) {
|
uint8* dst_ptr, int dst_width) {
|
||||||
asm volatile (
|
asm volatile (
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user