mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-08 01:36:47 +08:00
avx2 scale down by 2 for gcc
R=dhrosa@google.com, harryjin@google.com BUG=libyuv:527 Review URL: https://codereview.chromium.org/1520423003 .
This commit is contained in:
parent
77346fcb4a
commit
70445ef2ef
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 1554
|
||||
Version: 1555
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -69,12 +69,12 @@ extern "C" {
|
||||
#if !defined(LIBYUV_DISABLE_X86) && (defined(VISUALC_HAS_AVX2) || \
|
||||
defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
|
||||
#define HAS_SCALEADDROW_AVX2
|
||||
#define HAS_SCALEROWDOWN2_AVX2
|
||||
#endif
|
||||
|
||||
// The following are available for Visual C and clangcl 32 bit:
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \
|
||||
(defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2))
|
||||
#define HAS_SCALEROWDOWN2_AVX2
|
||||
#define HAS_SCALEROWDOWN4_AVX2
|
||||
#endif
|
||||
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 1554
|
||||
#define LIBYUV_VERSION 1555
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||
|
||||
@ -188,6 +188,104 @@ void ScaleRowDown2Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
);
|
||||
}
|
||||
|
||||
#ifdef HAS_SCALEROWDOWN2_AVX2
|
||||
void ScaleRowDown2_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
asm volatile (
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
|
||||
"vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
|
||||
"lea " MEMLEA(0x40,0) ",%0 \n"
|
||||
"vpsrlw $0x8,%%ymm0,%%ymm0 \n"
|
||||
"vpsrlw $0x8,%%ymm1,%%ymm1 \n"
|
||||
"vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
|
||||
"vpermq $0xd8,%%ymm0,%%ymm0 \n"
|
||||
"vmovdqu %%ymm0," MEMACCESS(1) " \n"
|
||||
"lea " MEMLEA(0x20,1) ",%1 \n"
|
||||
"sub $0x20,%2 \n"
|
||||
"jg 1b \n"
|
||||
"vzeroupper \n"
|
||||
: "+r"(src_ptr), // %0
|
||||
"+r"(dst_ptr), // %1
|
||||
"+r"(dst_width) // %2
|
||||
:: "memory", "cc", "xmm0", "xmm1"
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleRowDown2Linear_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
asm volatile (
|
||||
"vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n"
|
||||
"vpsrlw $0xf,%%ymm4,%%ymm4 \n"
|
||||
"vpackuswb %%ymm4,%%ymm4,%%ymm4 \n"
|
||||
"vpxor %%ymm5,%%ymm5,%%ymm5 \n"
|
||||
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
|
||||
"vmovdqu " MEMACCESS2(0x20, 0) ",%%ymm1 \n"
|
||||
"lea " MEMLEA(0x40,0) ",%0 \n"
|
||||
"vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n"
|
||||
"vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n"
|
||||
"vpavgw %%ymm5,%%ymm0,%%ymm0 \n"
|
||||
"vpavgw %%ymm5,%%ymm1,%%ymm1 \n"
|
||||
"vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
|
||||
"vpermq $0xd8,%%ymm0,%%ymm0 \n"
|
||||
"vmovdqu %%ymm0," MEMACCESS(1) " \n"
|
||||
"lea " MEMLEA(0x20,1) ",%1 \n"
|
||||
"sub $0x20,%2 \n"
|
||||
"jg 1b \n"
|
||||
"vzeroupper \n"
|
||||
: "+r"(src_ptr), // %0
|
||||
"+r"(dst_ptr), // %1
|
||||
"+r"(dst_width) // %2
|
||||
:: "memory", "cc", "xmm0", "xmm1", "xmm4", "xmm5"
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleRowDown2Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
asm volatile (
|
||||
"vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n"
|
||||
"vpsrlw $0xf,%%ymm4,%%ymm4 \n"
|
||||
"vpackuswb %%ymm4,%%ymm4,%%ymm4 \n"
|
||||
"vpxor %%ymm5,%%ymm5,%%ymm5 \n"
|
||||
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
|
||||
"vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
|
||||
MEMOPREG(vmovdqu,0x00,0,3,1,ymm2) // vmovdqu (%0,%3,1),%%ymm2
|
||||
MEMOPREG(vmovdqu,0x20,0,3,1,ymm3) // vmovdqu 0x20(%0,%3,1),%%ymm3
|
||||
"lea " MEMLEA(0x40,0) ",%0 \n"
|
||||
"vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n"
|
||||
"vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n"
|
||||
"vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n"
|
||||
"vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n"
|
||||
"vpaddw %%ymm2,%%ymm0,%%ymm0 \n"
|
||||
"vpaddw %%ymm3,%%ymm1,%%ymm1 \n"
|
||||
"vpsrlw $0x1,%%ymm0,%%ymm0 \n"
|
||||
"vpsrlw $0x1,%%ymm1,%%ymm1 \n"
|
||||
"vpavgw %%ymm5,%%ymm0,%%ymm0 \n"
|
||||
"vpavgw %%ymm5,%%ymm1,%%ymm1 \n"
|
||||
"vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
|
||||
"vpermq $0xd8,%%ymm0,%%ymm0 \n"
|
||||
"vmovdqu %%ymm0," MEMACCESS(1) " \n"
|
||||
"lea " MEMLEA(0x20,1) ",%1 \n"
|
||||
"sub $0x20,%2 \n"
|
||||
"jg 1b \n"
|
||||
"vzeroupper \n"
|
||||
: "+r"(src_ptr), // %0
|
||||
"+r"(dst_ptr), // %1
|
||||
"+r"(dst_width) // %2
|
||||
: "r"((intptr_t)(src_stride)) // %3
|
||||
: "memory", "cc", NACL_R14
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
|
||||
);
|
||||
}
|
||||
#endif // HAS_SCALEROWDOWN2_AVX2
|
||||
|
||||
void ScaleRowDown4_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
asm volatile (
|
||||
|
||||
@ -298,18 +298,18 @@ static int TestFilter_16(int src_width, int src_height,
|
||||
|
||||
// Test a scale factor with all 4 filters. Expect unfiltered to be exact, but
|
||||
// filtering is different fixed point implementations for SSSE3, Neon and C.
|
||||
#define TEST_FACTOR(name, nom, denom) \
|
||||
#define TEST_FACTOR(name, nom, denom, maxdiff) \
|
||||
TEST_FACTOR1(name, None, nom, denom, 0) \
|
||||
TEST_FACTOR1(name, Linear, nom, denom, 3) \
|
||||
TEST_FACTOR1(name, Bilinear, nom, denom, 3) \
|
||||
TEST_FACTOR1(name, Box, nom, denom, 3)
|
||||
TEST_FACTOR1(name, Linear, nom, denom, maxdiff) \
|
||||
TEST_FACTOR1(name, Bilinear, nom, denom, maxdiff) \
|
||||
TEST_FACTOR1(name, Box, nom, denom, maxdiff)
|
||||
|
||||
TEST_FACTOR(2, 1, 2)
|
||||
TEST_FACTOR(4, 1, 4)
|
||||
TEST_FACTOR(8, 1, 8)
|
||||
TEST_FACTOR(3by4, 3, 4)
|
||||
TEST_FACTOR(3by8, 3, 8)
|
||||
TEST_FACTOR(3, 1, 3)
|
||||
TEST_FACTOR(2, 1, 2, 0)
|
||||
TEST_FACTOR(4, 1, 4, 3)
|
||||
TEST_FACTOR(8, 1, 8, 3)
|
||||
TEST_FACTOR(3by4, 3, 4, 3)
|
||||
TEST_FACTOR(3by8, 3, 8, 3)
|
||||
TEST_FACTOR(3, 1, 3, 3)
|
||||
#undef TEST_FACTOR1
|
||||
#undef TEST_FACTOR
|
||||
#undef SX
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user