mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 01:06:46 +08:00
Disable bilinear 16 bit scale up for SSE2
- Undefine HAS_SCALEROWUP2_BILINEAR_16_SSE2 - Save XMM7 in ScaleRowUp2_Bilinear_16_SSE2(). - Rename HAS_SCALEROWUP2LINEAR_xxx to HAS_SCALEROWUP2_LINEAR_xxx - DetileSplitUVRow_C() is implemented using SplitUVRow_C(). - Changes to unit_test/planar_test.cc. Bug: libyuv:882 Change-Id: I0a8e8e5fb43bdf58ded87244e802343eacb789f2 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/3795063 Reviewed-by: Wan-Teh Chang <wtc@google.com>
This commit is contained in:
parent
fe8c78b61a
commit
b028453ba6
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: http://code.google.com/p/libyuv/
|
URL: http://code.google.com/p/libyuv/
|
||||||
Version: 1833
|
Version: 1834
|
||||||
License: BSD
|
License: BSD
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
|
|
||||||
|
|||||||
@ -76,18 +76,19 @@ extern "C" {
|
|||||||
// TODO(fbarchard): Port to Visual C
|
// TODO(fbarchard): Port to Visual C
|
||||||
#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
|
#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
|
||||||
#define HAS_SCALEUVROWDOWN2BOX_SSSE3
|
#define HAS_SCALEUVROWDOWN2BOX_SSSE3
|
||||||
#define HAS_SCALEROWUP2LINEAR_SSE2
|
#define HAS_SCALEROWUP2_LINEAR_SSE2
|
||||||
#define HAS_SCALEROWUP2LINEAR_SSSE3
|
#define HAS_SCALEROWUP2_LINEAR_SSSE3
|
||||||
#define HAS_SCALEROWUP2BILINEAR_SSE2
|
#define HAS_SCALEROWUP2_BILINEAR_SSE2
|
||||||
#define HAS_SCALEROWUP2BILINEAR_SSSE3
|
#define HAS_SCALEROWUP2_BILINEAR_SSSE3
|
||||||
#define HAS_SCALEROWUP2LINEAR_12_SSSE3
|
#define HAS_SCALEROWUP2_LINEAR_12_SSSE3
|
||||||
#define HAS_SCALEROWUP2BILINEAR_12_SSSE3
|
#define HAS_SCALEROWUP2_BILINEAR_12_SSSE3
|
||||||
#define HAS_SCALEROWUP2LINEAR_16_SSE2
|
#define HAS_SCALEROWUP2_LINEAR_16_SSE2
|
||||||
#define HAS_SCALEROWUP2BILINEAR_16_SSE2
|
// TODO(libyuv:882): Fix SSE2 version of BILINEAR
|
||||||
#define HAS_SCALEUVROWUP2LINEAR_SSSE3
|
//#define HAS_SCALEROWUP2_BILINEAR_16_SSE2
|
||||||
#define HAS_SCALEUVROWUP2BILINEAR_SSSE3
|
#define HAS_SCALEUVROWUP2_LINEAR_SSSE3
|
||||||
#define HAS_SCALEUVROWUP2LINEAR_16_SSE41
|
#define HAS_SCALEUVROWUP2_BILINEAR_SSSE3
|
||||||
#define HAS_SCALEUVROWUP2BILINEAR_16_SSE41
|
#define HAS_SCALEUVROWUP2_LINEAR_16_SSE41
|
||||||
|
#define HAS_SCALEUVROWUP2_BILINEAR_16_SSE41
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// The following are available for gcc/clang x86 platforms, but
|
// The following are available for gcc/clang x86 platforms, but
|
||||||
@ -97,16 +98,16 @@ extern "C" {
|
|||||||
(defined(__x86_64__) || defined(__i386__)) && \
|
(defined(__x86_64__) || defined(__i386__)) && \
|
||||||
(defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
|
(defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
|
||||||
#define HAS_SCALEUVROWDOWN2BOX_AVX2
|
#define HAS_SCALEUVROWDOWN2BOX_AVX2
|
||||||
#define HAS_SCALEROWUP2LINEAR_AVX2
|
#define HAS_SCALEROWUP2_LINEAR_AVX2
|
||||||
#define HAS_SCALEROWUP2BILINEAR_AVX2
|
#define HAS_SCALEROWUP2_BILINEAR_AVX2
|
||||||
#define HAS_SCALEROWUP2LINEAR_12_AVX2
|
#define HAS_SCALEROWUP2_LINEAR_12_AVX2
|
||||||
#define HAS_SCALEROWUP2BILINEAR_12_AVX2
|
#define HAS_SCALEROWUP2_BILINEAR_12_AVX2
|
||||||
#define HAS_SCALEROWUP2LINEAR_16_AVX2
|
#define HAS_SCALEROWUP2_LINEAR_16_AVX2
|
||||||
#define HAS_SCALEROWUP2BILINEAR_16_AVX2
|
#define HAS_SCALEROWUP2_BILINEAR_16_AVX2
|
||||||
#define HAS_SCALEUVROWUP2LINEAR_AVX2
|
#define HAS_SCALEUVROWUP2_LINEAR_AVX2
|
||||||
#define HAS_SCALEUVROWUP2BILINEAR_AVX2
|
#define HAS_SCALEUVROWUP2_BILINEAR_AVX2
|
||||||
#define HAS_SCALEUVROWUP2LINEAR_16_AVX2
|
#define HAS_SCALEUVROWUP2_LINEAR_16_AVX2
|
||||||
#define HAS_SCALEUVROWUP2BILINEAR_16_AVX2
|
#define HAS_SCALEUVROWUP2_BILINEAR_16_AVX2
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// The following are available on all x86 platforms, but
|
// The following are available on all x86 platforms, but
|
||||||
@ -135,16 +136,16 @@ extern "C" {
|
|||||||
#define HAS_SCALEROWDOWN4_NEON
|
#define HAS_SCALEROWDOWN4_NEON
|
||||||
#define HAS_SCALEUVROWDOWN2BOX_NEON
|
#define HAS_SCALEUVROWDOWN2BOX_NEON
|
||||||
#define HAS_SCALEUVROWDOWNEVEN_NEON
|
#define HAS_SCALEUVROWDOWNEVEN_NEON
|
||||||
#define HAS_SCALEROWUP2LINEAR_NEON
|
#define HAS_SCALEROWUP2_LINEAR_NEON
|
||||||
#define HAS_SCALEROWUP2BILINEAR_NEON
|
#define HAS_SCALEROWUP2_BILINEAR_NEON
|
||||||
#define HAS_SCALEROWUP2LINEAR_12_NEON
|
#define HAS_SCALEROWUP2_LINEAR_12_NEON
|
||||||
#define HAS_SCALEROWUP2BILINEAR_12_NEON
|
#define HAS_SCALEROWUP2_BILINEAR_12_NEON
|
||||||
#define HAS_SCALEROWUP2LINEAR_16_NEON
|
#define HAS_SCALEROWUP2_LINEAR_16_NEON
|
||||||
#define HAS_SCALEROWUP2BILINEAR_16_NEON
|
#define HAS_SCALEROWUP2_BILINEAR_16_NEON
|
||||||
#define HAS_SCALEUVROWUP2LINEAR_NEON
|
#define HAS_SCALEUVROWUP2_LINEAR_NEON
|
||||||
#define HAS_SCALEUVROWUP2BILINEAR_NEON
|
#define HAS_SCALEUVROWUP2_BILINEAR_NEON
|
||||||
#define HAS_SCALEUVROWUP2LINEAR_16_NEON
|
#define HAS_SCALEUVROWUP2_LINEAR_16_NEON
|
||||||
#define HAS_SCALEUVROWUP2BILINEAR_16_NEON
|
#define HAS_SCALEUVROWUP2_BILINEAR_16_NEON
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
|
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
|
||||||
|
|||||||
@ -11,6 +11,6 @@
|
|||||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||||
#define INCLUDE_LIBYUV_VERSION_H_
|
#define INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
#define LIBYUV_VERSION 1833
|
#define LIBYUV_VERSION 1834
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|||||||
@ -5454,25 +5454,25 @@ static int I420ToARGBMatrixBilinear(const uint8_t* src_y,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HAS_SCALEROWUP2LINEAR_SSE2)
|
#if defined(HAS_SCALEROWUP2_LINEAR_SSE2)
|
||||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||||
Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSE2;
|
Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSE2;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HAS_SCALEROWUP2LINEAR_SSSE3)
|
#if defined(HAS_SCALEROWUP2_LINEAR_SSSE3)
|
||||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||||
Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSSE3;
|
Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSSE3;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HAS_SCALEROWUP2LINEAR_AVX2)
|
#if defined(HAS_SCALEROWUP2_LINEAR_AVX2)
|
||||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||||
Scale2RowUp = ScaleRowUp2_Bilinear_Any_AVX2;
|
Scale2RowUp = ScaleRowUp2_Bilinear_Any_AVX2;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HAS_SCALEROWUP2LINEAR_NEON)
|
#if defined(HAS_SCALEROWUP2_LINEAR_NEON)
|
||||||
if (TestCpuFlag(kCpuHasNEON)) {
|
if (TestCpuFlag(kCpuHasNEON)) {
|
||||||
Scale2RowUp = ScaleRowUp2_Bilinear_Any_NEON;
|
Scale2RowUp = ScaleRowUp2_Bilinear_Any_NEON;
|
||||||
}
|
}
|
||||||
@ -5582,22 +5582,22 @@ static int I422ToARGBMatrixLinear(const uint8_t* src_y,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_SCALEROWUP2LINEAR_SSE2)
|
#if defined(HAS_SCALEROWUP2_LINEAR_SSE2)
|
||||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||||
ScaleRowUp = ScaleRowUp2_Linear_Any_SSE2;
|
ScaleRowUp = ScaleRowUp2_Linear_Any_SSE2;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_SCALEROWUP2LINEAR_SSSE3)
|
#if defined(HAS_SCALEROWUP2_LINEAR_SSSE3)
|
||||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||||
ScaleRowUp = ScaleRowUp2_Linear_Any_SSSE3;
|
ScaleRowUp = ScaleRowUp2_Linear_Any_SSSE3;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_SCALEROWUP2LINEAR_AVX2)
|
#if defined(HAS_SCALEROWUP2_LINEAR_AVX2)
|
||||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||||
ScaleRowUp = ScaleRowUp2_Linear_Any_AVX2;
|
ScaleRowUp = ScaleRowUp2_Linear_Any_AVX2;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_SCALEROWUP2LINEAR_NEON)
|
#if defined(HAS_SCALEROWUP2_LINEAR_NEON)
|
||||||
if (TestCpuFlag(kCpuHasNEON)) {
|
if (TestCpuFlag(kCpuHasNEON)) {
|
||||||
ScaleRowUp = ScaleRowUp2_Linear_Any_NEON;
|
ScaleRowUp = ScaleRowUp2_Linear_Any_NEON;
|
||||||
}
|
}
|
||||||
@ -5668,19 +5668,19 @@ static int I010ToAR30MatrixBilinear(const uint16_t* src_y,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HAS_SCALEROWUP2LINEAR_12_SSSE3)
|
#if defined(HAS_SCALEROWUP2_LINEAR_12_SSSE3)
|
||||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||||
Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_SSSE3;
|
Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_SSSE3;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HAS_SCALEROWUP2LINEAR_12_AVX2)
|
#if defined(HAS_SCALEROWUP2_LINEAR_12_AVX2)
|
||||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||||
Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_AVX2;
|
Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_AVX2;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HAS_SCALEROWUP2LINEAR_12_NEON)
|
#if defined(HAS_SCALEROWUP2_LINEAR_12_NEON)
|
||||||
if (TestCpuFlag(kCpuHasNEON)) {
|
if (TestCpuFlag(kCpuHasNEON)) {
|
||||||
Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_NEON;
|
Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_NEON;
|
||||||
}
|
}
|
||||||
@ -5768,17 +5768,17 @@ static int I210ToAR30MatrixLinear(const uint16_t* src_y,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HAS_SCALEROWUP2LINEAR_12_SSSE3)
|
#if defined(HAS_SCALEROWUP2_LINEAR_12_SSSE3)
|
||||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||||
ScaleRowUp = ScaleRowUp2_Linear_12_Any_SSSE3;
|
ScaleRowUp = ScaleRowUp2_Linear_12_Any_SSSE3;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_SCALEROWUP2LINEAR_12_AVX2)
|
#if defined(HAS_SCALEROWUP2_LINEAR_12_AVX2)
|
||||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||||
ScaleRowUp = ScaleRowUp2_Linear_12_Any_AVX2;
|
ScaleRowUp = ScaleRowUp2_Linear_12_Any_AVX2;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_SCALEROWUP2LINEAR_12_NEON)
|
#if defined(HAS_SCALEROWUP2_LINEAR_12_NEON)
|
||||||
if (TestCpuFlag(kCpuHasNEON)) {
|
if (TestCpuFlag(kCpuHasNEON)) {
|
||||||
ScaleRowUp = ScaleRowUp2_Linear_12_Any_NEON;
|
ScaleRowUp = ScaleRowUp2_Linear_12_Any_NEON;
|
||||||
}
|
}
|
||||||
@ -5848,19 +5848,19 @@ static int I010ToARGBMatrixBilinear(const uint16_t* src_y,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HAS_SCALEROWUP2LINEAR_12_SSSE3)
|
#if defined(HAS_SCALEROWUP2_LINEAR_12_SSSE3)
|
||||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||||
Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_SSSE3;
|
Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_SSSE3;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HAS_SCALEROWUP2LINEAR_12_AVX2)
|
#if defined(HAS_SCALEROWUP2_LINEAR_12_AVX2)
|
||||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||||
Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_AVX2;
|
Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_AVX2;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HAS_SCALEROWUP2LINEAR_12_NEON)
|
#if defined(HAS_SCALEROWUP2_LINEAR_12_NEON)
|
||||||
if (TestCpuFlag(kCpuHasNEON)) {
|
if (TestCpuFlag(kCpuHasNEON)) {
|
||||||
Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_NEON;
|
Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_NEON;
|
||||||
}
|
}
|
||||||
@ -5947,17 +5947,17 @@ static int I210ToARGBMatrixLinear(const uint16_t* src_y,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HAS_SCALEROWUP2LINEAR_12_SSSE3)
|
#if defined(HAS_SCALEROWUP2_LINEAR_12_SSSE3)
|
||||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||||
ScaleRowUp = ScaleRowUp2_Linear_12_Any_SSSE3;
|
ScaleRowUp = ScaleRowUp2_Linear_12_Any_SSSE3;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_SCALEROWUP2LINEAR_12_AVX2)
|
#if defined(HAS_SCALEROWUP2_LINEAR_12_AVX2)
|
||||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||||
ScaleRowUp = ScaleRowUp2_Linear_12_Any_AVX2;
|
ScaleRowUp = ScaleRowUp2_Linear_12_Any_AVX2;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_SCALEROWUP2LINEAR_12_NEON)
|
#if defined(HAS_SCALEROWUP2_LINEAR_12_NEON)
|
||||||
if (TestCpuFlag(kCpuHasNEON)) {
|
if (TestCpuFlag(kCpuHasNEON)) {
|
||||||
ScaleRowUp = ScaleRowUp2_Linear_12_Any_NEON;
|
ScaleRowUp = ScaleRowUp2_Linear_12_Any_NEON;
|
||||||
}
|
}
|
||||||
@ -6091,25 +6091,25 @@ static int I420AlphaToARGBMatrixBilinear(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_SCALEROWUP2LINEAR_SSE2)
|
#if defined(HAS_SCALEROWUP2_LINEAR_SSE2)
|
||||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||||
Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSE2;
|
Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSE2;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HAS_SCALEROWUP2LINEAR_SSSE3)
|
#if defined(HAS_SCALEROWUP2_LINEAR_SSSE3)
|
||||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||||
Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSSE3;
|
Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSSE3;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HAS_SCALEROWUP2LINEAR_AVX2)
|
#if defined(HAS_SCALEROWUP2_LINEAR_AVX2)
|
||||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||||
Scale2RowUp = ScaleRowUp2_Bilinear_Any_AVX2;
|
Scale2RowUp = ScaleRowUp2_Bilinear_Any_AVX2;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HAS_SCALEROWUP2LINEAR_NEON)
|
#if defined(HAS_SCALEROWUP2_LINEAR_NEON)
|
||||||
if (TestCpuFlag(kCpuHasNEON)) {
|
if (TestCpuFlag(kCpuHasNEON)) {
|
||||||
Scale2RowUp = ScaleRowUp2_Bilinear_Any_NEON;
|
Scale2RowUp = ScaleRowUp2_Bilinear_Any_NEON;
|
||||||
}
|
}
|
||||||
@ -6277,22 +6277,22 @@ static int I422AlphaToARGBMatrixLinear(const uint8_t* src_y,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_SCALEROWUP2LINEAR_SSE2)
|
#if defined(HAS_SCALEROWUP2_LINEAR_SSE2)
|
||||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||||
ScaleRowUp = ScaleRowUp2_Linear_Any_SSE2;
|
ScaleRowUp = ScaleRowUp2_Linear_Any_SSE2;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_SCALEROWUP2LINEAR_SSSE3)
|
#if defined(HAS_SCALEROWUP2_LINEAR_SSSE3)
|
||||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||||
ScaleRowUp = ScaleRowUp2_Linear_Any_SSSE3;
|
ScaleRowUp = ScaleRowUp2_Linear_Any_SSSE3;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_SCALEROWUP2LINEAR_AVX2)
|
#if defined(HAS_SCALEROWUP2_LINEAR_AVX2)
|
||||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||||
ScaleRowUp = ScaleRowUp2_Linear_Any_AVX2;
|
ScaleRowUp = ScaleRowUp2_Linear_Any_AVX2;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_SCALEROWUP2LINEAR_NEON)
|
#if defined(HAS_SCALEROWUP2_LINEAR_NEON)
|
||||||
if (TestCpuFlag(kCpuHasNEON)) {
|
if (TestCpuFlag(kCpuHasNEON)) {
|
||||||
ScaleRowUp = ScaleRowUp2_Linear_Any_NEON;
|
ScaleRowUp = ScaleRowUp2_Linear_Any_NEON;
|
||||||
}
|
}
|
||||||
@ -6408,19 +6408,19 @@ static int I010AlphaToARGBMatrixBilinear(
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HAS_SCALEROWUP2LINEAR_12_SSSE3)
|
#if defined(HAS_SCALEROWUP2_LINEAR_12_SSSE3)
|
||||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||||
Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_SSSE3;
|
Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_SSSE3;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HAS_SCALEROWUP2LINEAR_12_AVX2)
|
#if defined(HAS_SCALEROWUP2_LINEAR_12_AVX2)
|
||||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||||
Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_AVX2;
|
Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_AVX2;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HAS_SCALEROWUP2LINEAR_12_NEON)
|
#if defined(HAS_SCALEROWUP2_LINEAR_12_NEON)
|
||||||
if (TestCpuFlag(kCpuHasNEON)) {
|
if (TestCpuFlag(kCpuHasNEON)) {
|
||||||
Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_NEON;
|
Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_NEON;
|
||||||
}
|
}
|
||||||
@ -6565,17 +6565,17 @@ static int I210AlphaToARGBMatrixLinear(const uint16_t* src_y,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HAS_SCALEROWUP2LINEAR_12_SSSE3)
|
#if defined(HAS_SCALEROWUP2_LINEAR_12_SSSE3)
|
||||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||||
ScaleRowUp = ScaleRowUp2_Linear_12_Any_SSSE3;
|
ScaleRowUp = ScaleRowUp2_Linear_12_Any_SSSE3;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_SCALEROWUP2LINEAR_12_AVX2)
|
#if defined(HAS_SCALEROWUP2_LINEAR_12_AVX2)
|
||||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||||
ScaleRowUp = ScaleRowUp2_Linear_12_Any_AVX2;
|
ScaleRowUp = ScaleRowUp2_Linear_12_Any_AVX2;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_SCALEROWUP2LINEAR_12_NEON)
|
#if defined(HAS_SCALEROWUP2_LINEAR_12_NEON)
|
||||||
if (TestCpuFlag(kCpuHasNEON)) {
|
if (TestCpuFlag(kCpuHasNEON)) {
|
||||||
ScaleRowUp = ScaleRowUp2_Linear_12_Any_NEON;
|
ScaleRowUp = ScaleRowUp2_Linear_12_Any_NEON;
|
||||||
}
|
}
|
||||||
@ -6647,19 +6647,19 @@ static int P010ToARGBMatrixBilinear(const uint16_t* src_y,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEUVROWUP2BILINEAR_16_SSE41
|
#ifdef HAS_SCALEUVROWUP2_BILINEAR_16_SSE41
|
||||||
if (TestCpuFlag(kCpuHasSSE41)) {
|
if (TestCpuFlag(kCpuHasSSE41)) {
|
||||||
Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_SSE41;
|
Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_SSE41;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEUVROWUP2BILINEAR_16_AVX2
|
#ifdef HAS_SCALEUVROWUP2_BILINEAR_16_AVX2
|
||||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||||
Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_AVX2;
|
Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_AVX2;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEUVROWUP2BILINEAR_16_NEON
|
#ifdef HAS_SCALEUVROWUP2_BILINEAR_16_NEON
|
||||||
if (TestCpuFlag(kCpuHasNEON)) {
|
if (TestCpuFlag(kCpuHasNEON)) {
|
||||||
Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_NEON;
|
Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_NEON;
|
||||||
}
|
}
|
||||||
@ -6737,19 +6737,19 @@ static int P210ToARGBMatrixLinear(const uint16_t* src_y,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEUVROWUP2LINEAR_16_SSE41
|
#ifdef HAS_SCALEUVROWUP2_LINEAR_16_SSE41
|
||||||
if (TestCpuFlag(kCpuHasSSE41)) {
|
if (TestCpuFlag(kCpuHasSSE41)) {
|
||||||
ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_SSE41;
|
ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_SSE41;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEUVROWUP2LINEAR_16_AVX2
|
#ifdef HAS_SCALEUVROWUP2_LINEAR_16_AVX2
|
||||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||||
ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_AVX2;
|
ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_AVX2;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEUVROWUP2LINEAR_16_NEON
|
#ifdef HAS_SCALEUVROWUP2_LINEAR_16_NEON
|
||||||
if (TestCpuFlag(kCpuHasNEON)) {
|
if (TestCpuFlag(kCpuHasNEON)) {
|
||||||
ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_NEON;
|
ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_NEON;
|
||||||
}
|
}
|
||||||
@ -6813,19 +6813,19 @@ static int P010ToAR30MatrixBilinear(const uint16_t* src_y,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEUVROWUP2BILINEAR_16_SSE41
|
#ifdef HAS_SCALEUVROWUP2_BILINEAR_16_SSE41
|
||||||
if (TestCpuFlag(kCpuHasSSE41)) {
|
if (TestCpuFlag(kCpuHasSSE41)) {
|
||||||
Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_SSE41;
|
Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_SSE41;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEUVROWUP2BILINEAR_16_AVX2
|
#ifdef HAS_SCALEUVROWUP2_BILINEAR_16_AVX2
|
||||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||||
Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_AVX2;
|
Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_AVX2;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEUVROWUP2BILINEAR_16_NEON
|
#ifdef HAS_SCALEUVROWUP2_BILINEAR_16_NEON
|
||||||
if (TestCpuFlag(kCpuHasNEON)) {
|
if (TestCpuFlag(kCpuHasNEON)) {
|
||||||
Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_NEON;
|
Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_NEON;
|
||||||
}
|
}
|
||||||
@ -6903,19 +6903,19 @@ static int P210ToAR30MatrixLinear(const uint16_t* src_y,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEUVROWUP2LINEAR_16_SSE41
|
#ifdef HAS_SCALEUVROWUP2_LINEAR_16_SSE41
|
||||||
if (TestCpuFlag(kCpuHasSSE41)) {
|
if (TestCpuFlag(kCpuHasSSE41)) {
|
||||||
ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_SSE41;
|
ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_SSE41;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEUVROWUP2LINEAR_16_AVX2
|
#ifdef HAS_SCALEUVROWUP2_LINEAR_16_AVX2
|
||||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||||
ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_AVX2;
|
ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_AVX2;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEUVROWUP2LINEAR_16_NEON
|
#ifdef HAS_SCALEUVROWUP2_LINEAR_16_NEON
|
||||||
if (TestCpuFlag(kCpuHasNEON)) {
|
if (TestCpuFlag(kCpuHasNEON)) {
|
||||||
ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_NEON;
|
ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_NEON;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -2663,45 +2663,6 @@ void RGB24MirrorRow_C(const uint8_t* src_rgb24, uint8_t* dst_rgb24, int width) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void DetileRow_C(const uint8_t* src,
|
|
||||||
ptrdiff_t src_tile_stride,
|
|
||||||
uint8_t* dst,
|
|
||||||
int width) {
|
|
||||||
int x;
|
|
||||||
for (x = 0; x < width - 15; x += 16) {
|
|
||||||
memcpy(dst, src, 16);
|
|
||||||
dst += 16;
|
|
||||||
src += src_tile_stride;
|
|
||||||
}
|
|
||||||
if (width & 15) {
|
|
||||||
memcpy(dst, src, width & 15);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void DetileSplitUVRow_C(const uint8_t* src_uv,
|
|
||||||
ptrdiff_t src_tile_stride,
|
|
||||||
uint8_t* dst_u,
|
|
||||||
uint8_t* dst_v,
|
|
||||||
int width) {
|
|
||||||
int tile;
|
|
||||||
for (tile = 0; tile < width / 16; tile++) {
|
|
||||||
for (int x = 0; x < 8; x++) {
|
|
||||||
*dst_u++ = src_uv[0];
|
|
||||||
*dst_v++ = src_uv[1];
|
|
||||||
src_uv += 2;
|
|
||||||
}
|
|
||||||
src_uv += src_tile_stride - 16;
|
|
||||||
}
|
|
||||||
for (int x = 0; x < (width & 0xF) / 2; ++x) {
|
|
||||||
*dst_u = *src_uv;
|
|
||||||
dst_u++;
|
|
||||||
src_uv++;
|
|
||||||
*dst_v = *src_uv;
|
|
||||||
dst_v++;
|
|
||||||
src_uv++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void SplitUVRow_C(const uint8_t* src_uv,
|
void SplitUVRow_C(const uint8_t* src_uv,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
@ -2738,6 +2699,38 @@ void MergeUVRow_C(const uint8_t* src_u,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void DetileRow_C(const uint8_t* src,
|
||||||
|
ptrdiff_t src_tile_stride,
|
||||||
|
uint8_t* dst,
|
||||||
|
int width) {
|
||||||
|
int x;
|
||||||
|
for (x = 0; x < width - 15; x += 16) {
|
||||||
|
memcpy(dst, src, 16);
|
||||||
|
dst += 16;
|
||||||
|
src += src_tile_stride;
|
||||||
|
}
|
||||||
|
if (width & 15) {
|
||||||
|
memcpy(dst, src, width & 15);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void DetileSplitUVRow_C(const uint8_t* src_uv,
|
||||||
|
ptrdiff_t src_tile_stride,
|
||||||
|
uint8_t* dst_u,
|
||||||
|
uint8_t* dst_v,
|
||||||
|
int width) {
|
||||||
|
int x;
|
||||||
|
for (x = 0; x < width - 15; x += 16) {
|
||||||
|
SplitUVRow_C(src_uv, dst_u, dst_v, 8);
|
||||||
|
dst_u += 8;
|
||||||
|
dst_v += 8;
|
||||||
|
src_uv += src_tile_stride;
|
||||||
|
}
|
||||||
|
if (width & 15) {
|
||||||
|
SplitUVRow_C(src_uv, dst_u, dst_v, ((width & 15) + 1) / 2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void SplitRGBRow_C(const uint8_t* src_rgb,
|
void SplitRGBRow_C(const uint8_t* src_rgb,
|
||||||
uint8_t* dst_r,
|
uint8_t* dst_r,
|
||||||
uint8_t* dst_g,
|
uint8_t* dst_g,
|
||||||
|
|||||||
@ -1384,25 +1384,25 @@ void ScalePlaneUp2_Linear(int src_width,
|
|||||||
// This function can only scale up by 2 times horizontally.
|
// This function can only scale up by 2 times horizontally.
|
||||||
assert(src_width == ((dst_width + 1) / 2));
|
assert(src_width == ((dst_width + 1) / 2));
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWUP2LINEAR_SSE2
|
#ifdef HAS_SCALEROWUP2_LINEAR_SSE2
|
||||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||||
ScaleRowUp = ScaleRowUp2_Linear_Any_SSE2;
|
ScaleRowUp = ScaleRowUp2_Linear_Any_SSE2;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWUP2LINEAR_SSSE3
|
#ifdef HAS_SCALEROWUP2_LINEAR_SSSE3
|
||||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||||
ScaleRowUp = ScaleRowUp2_Linear_Any_SSSE3;
|
ScaleRowUp = ScaleRowUp2_Linear_Any_SSSE3;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWUP2LINEAR_AVX2
|
#ifdef HAS_SCALEROWUP2_LINEAR_AVX2
|
||||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||||
ScaleRowUp = ScaleRowUp2_Linear_Any_AVX2;
|
ScaleRowUp = ScaleRowUp2_Linear_Any_AVX2;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWUP2LINEAR_NEON
|
#ifdef HAS_SCALEROWUP2_LINEAR_NEON
|
||||||
if (TestCpuFlag(kCpuHasNEON)) {
|
if (TestCpuFlag(kCpuHasNEON)) {
|
||||||
ScaleRowUp = ScaleRowUp2_Linear_Any_NEON;
|
ScaleRowUp = ScaleRowUp2_Linear_Any_NEON;
|
||||||
}
|
}
|
||||||
@ -1443,25 +1443,25 @@ void ScalePlaneUp2_Bilinear(int src_width,
|
|||||||
assert(src_width == ((dst_width + 1) / 2));
|
assert(src_width == ((dst_width + 1) / 2));
|
||||||
assert(src_height == ((dst_height + 1) / 2));
|
assert(src_height == ((dst_height + 1) / 2));
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWUP2BILINEAR_SSE2
|
#ifdef HAS_SCALEROWUP2_BILINEAR_SSE2
|
||||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||||
Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSE2;
|
Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSE2;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWUP2BILINEAR_SSSE3
|
#ifdef HAS_SCALEROWUP2_BILINEAR_SSSE3
|
||||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||||
Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSSE3;
|
Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSSE3;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWUP2BILINEAR_AVX2
|
#ifdef HAS_SCALEROWUP2_BILINEAR_AVX2
|
||||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||||
Scale2RowUp = ScaleRowUp2_Bilinear_Any_AVX2;
|
Scale2RowUp = ScaleRowUp2_Bilinear_Any_AVX2;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWUP2BILINEAR_NEON
|
#ifdef HAS_SCALEROWUP2_BILINEAR_NEON
|
||||||
if (TestCpuFlag(kCpuHasNEON)) {
|
if (TestCpuFlag(kCpuHasNEON)) {
|
||||||
Scale2RowUp = ScaleRowUp2_Bilinear_Any_NEON;
|
Scale2RowUp = ScaleRowUp2_Bilinear_Any_NEON;
|
||||||
}
|
}
|
||||||
@ -1503,19 +1503,19 @@ void ScalePlaneUp2_12_Linear(int src_width,
|
|||||||
// This function can only scale up by 2 times horizontally.
|
// This function can only scale up by 2 times horizontally.
|
||||||
assert(src_width == ((dst_width + 1) / 2));
|
assert(src_width == ((dst_width + 1) / 2));
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWUP2LINEAR_12_SSSE3
|
#ifdef HAS_SCALEROWUP2_LINEAR_12_SSSE3
|
||||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||||
ScaleRowUp = ScaleRowUp2_Linear_12_Any_SSSE3;
|
ScaleRowUp = ScaleRowUp2_Linear_12_Any_SSSE3;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWUP2LINEAR_12_AVX2
|
#ifdef HAS_SCALEROWUP2_LINEAR_12_AVX2
|
||||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||||
ScaleRowUp = ScaleRowUp2_Linear_12_Any_AVX2;
|
ScaleRowUp = ScaleRowUp2_Linear_12_Any_AVX2;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWUP2LINEAR_12_NEON
|
#ifdef HAS_SCALEROWUP2_LINEAR_12_NEON
|
||||||
if (TestCpuFlag(kCpuHasNEON)) {
|
if (TestCpuFlag(kCpuHasNEON)) {
|
||||||
ScaleRowUp = ScaleRowUp2_Linear_12_Any_NEON;
|
ScaleRowUp = ScaleRowUp2_Linear_12_Any_NEON;
|
||||||
}
|
}
|
||||||
@ -1557,19 +1557,19 @@ void ScalePlaneUp2_12_Bilinear(int src_width,
|
|||||||
assert(src_width == ((dst_width + 1) / 2));
|
assert(src_width == ((dst_width + 1) / 2));
|
||||||
assert(src_height == ((dst_height + 1) / 2));
|
assert(src_height == ((dst_height + 1) / 2));
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWUP2BILINEAR_12_SSSE3
|
#ifdef HAS_SCALEROWUP2_BILINEAR_12_SSSE3
|
||||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||||
Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_SSSE3;
|
Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_SSSE3;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWUP2BILINEAR_12_AVX2
|
#ifdef HAS_SCALEROWUP2_BILINEAR_12_AVX2
|
||||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||||
Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_AVX2;
|
Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_AVX2;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWUP2BILINEAR_12_NEON
|
#ifdef HAS_SCALEROWUP2_BILINEAR_12_NEON
|
||||||
if (TestCpuFlag(kCpuHasNEON)) {
|
if (TestCpuFlag(kCpuHasNEON)) {
|
||||||
Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_NEON;
|
Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_NEON;
|
||||||
}
|
}
|
||||||
@ -1604,19 +1604,19 @@ void ScalePlaneUp2_16_Linear(int src_width,
|
|||||||
// This function can only scale up by 2 times horizontally.
|
// This function can only scale up by 2 times horizontally.
|
||||||
assert(src_width == ((dst_width + 1) / 2));
|
assert(src_width == ((dst_width + 1) / 2));
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWUP2LINEAR_16_SSE2
|
#ifdef HAS_SCALEROWUP2_LINEAR_16_SSE2
|
||||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||||
ScaleRowUp = ScaleRowUp2_Linear_16_Any_SSE2;
|
ScaleRowUp = ScaleRowUp2_Linear_16_Any_SSE2;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWUP2LINEAR_16_AVX2
|
#ifdef HAS_SCALEROWUP2_LINEAR_16_AVX2
|
||||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||||
ScaleRowUp = ScaleRowUp2_Linear_16_Any_AVX2;
|
ScaleRowUp = ScaleRowUp2_Linear_16_Any_AVX2;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWUP2LINEAR_16_NEON
|
#ifdef HAS_SCALEROWUP2_LINEAR_16_NEON
|
||||||
if (TestCpuFlag(kCpuHasNEON)) {
|
if (TestCpuFlag(kCpuHasNEON)) {
|
||||||
ScaleRowUp = ScaleRowUp2_Linear_16_Any_NEON;
|
ScaleRowUp = ScaleRowUp2_Linear_16_Any_NEON;
|
||||||
}
|
}
|
||||||
@ -1653,19 +1653,19 @@ void ScalePlaneUp2_16_Bilinear(int src_width,
|
|||||||
assert(src_width == ((dst_width + 1) / 2));
|
assert(src_width == ((dst_width + 1) / 2));
|
||||||
assert(src_height == ((dst_height + 1) / 2));
|
assert(src_height == ((dst_height + 1) / 2));
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWUP2BILINEAR_16_SSE2
|
#ifdef HAS_SCALEROWUP2_BILINEAR_16_SSE2
|
||||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||||
Scale2RowUp = ScaleRowUp2_Bilinear_16_Any_SSSE3;
|
Scale2RowUp = ScaleRowUp2_Bilinear_16_Any_SSSE3;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWUP2BILINEAR_16_AVX2
|
#ifdef HAS_SCALEROWUP2_BILINEAR_16_AVX2
|
||||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||||
Scale2RowUp = ScaleRowUp2_Bilinear_16_Any_AVX2;
|
Scale2RowUp = ScaleRowUp2_Bilinear_16_Any_AVX2;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWUP2BILINEAR_16_NEON
|
#ifdef HAS_SCALEROWUP2_BILINEAR_16_NEON
|
||||||
if (TestCpuFlag(kCpuHasNEON)) {
|
if (TestCpuFlag(kCpuHasNEON)) {
|
||||||
Scale2RowUp = ScaleRowUp2_Bilinear_16_Any_NEON;
|
Scale2RowUp = ScaleRowUp2_Bilinear_16_Any_NEON;
|
||||||
}
|
}
|
||||||
@ -1758,7 +1758,6 @@ void ScalePlaneBilinearUp_16(int src_width,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
if (y > max_y) {
|
if (y > max_y) {
|
||||||
y = max_y;
|
y = max_y;
|
||||||
}
|
}
|
||||||
@ -1915,7 +1914,6 @@ void ScalePlane(const uint8_t* src,
|
|||||||
src = src + (src_height - 1) * (int64_t)src_stride;
|
src = src + (src_height - 1) * (int64_t)src_stride;
|
||||||
src_stride = -src_stride;
|
src_stride = -src_stride;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Use specialized scales to improve performance for common resolutions.
|
// Use specialized scales to improve performance for common resolutions.
|
||||||
// For example, all the 1/2 scalings will use ScalePlaneDown2()
|
// For example, all the 1/2 scalings will use ScalePlaneDown2()
|
||||||
if (dst_width == src_width && dst_height == src_height) {
|
if (dst_width == src_width && dst_height == src_height) {
|
||||||
@ -2018,7 +2016,6 @@ void ScalePlane_16(const uint16_t* src,
|
|||||||
src = src + (src_height - 1) * (int64_t)src_stride;
|
src = src + (src_height - 1) * (int64_t)src_stride;
|
||||||
src_stride = -src_stride;
|
src_stride = -src_stride;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Use specialized scales to improve performance for common resolutions.
|
// Use specialized scales to improve performance for common resolutions.
|
||||||
// For example, all the 1/2 scalings will use ScalePlaneDown2()
|
// For example, all the 1/2 scalings will use ScalePlaneDown2()
|
||||||
if (dst_width == src_width && dst_height == src_height) {
|
if (dst_width == src_width && dst_height == src_height) {
|
||||||
|
|||||||
@ -676,7 +676,7 @@ SUH2LANY(ScaleRowUp2_Linear_16_Any_C,
|
|||||||
0,
|
0,
|
||||||
uint16_t)
|
uint16_t)
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWUP2LINEAR_SSE2
|
#ifdef HAS_SCALEROWUP2_LINEAR_SSE2
|
||||||
SUH2LANY(ScaleRowUp2_Linear_Any_SSE2,
|
SUH2LANY(ScaleRowUp2_Linear_Any_SSE2,
|
||||||
ScaleRowUp2_Linear_SSE2,
|
ScaleRowUp2_Linear_SSE2,
|
||||||
ScaleRowUp2_Linear_C,
|
ScaleRowUp2_Linear_C,
|
||||||
@ -684,7 +684,7 @@ SUH2LANY(ScaleRowUp2_Linear_Any_SSE2,
|
|||||||
uint8_t)
|
uint8_t)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWUP2LINEAR_SSSE3
|
#ifdef HAS_SCALEROWUP2_LINEAR_SSSE3
|
||||||
SUH2LANY(ScaleRowUp2_Linear_Any_SSSE3,
|
SUH2LANY(ScaleRowUp2_Linear_Any_SSSE3,
|
||||||
ScaleRowUp2_Linear_SSSE3,
|
ScaleRowUp2_Linear_SSSE3,
|
||||||
ScaleRowUp2_Linear_C,
|
ScaleRowUp2_Linear_C,
|
||||||
@ -692,7 +692,7 @@ SUH2LANY(ScaleRowUp2_Linear_Any_SSSE3,
|
|||||||
uint8_t)
|
uint8_t)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWUP2LINEAR_12_SSSE3
|
#ifdef HAS_SCALEROWUP2_LINEAR_12_SSSE3
|
||||||
SUH2LANY(ScaleRowUp2_Linear_12_Any_SSSE3,
|
SUH2LANY(ScaleRowUp2_Linear_12_Any_SSSE3,
|
||||||
ScaleRowUp2_Linear_12_SSSE3,
|
ScaleRowUp2_Linear_12_SSSE3,
|
||||||
ScaleRowUp2_Linear_16_C,
|
ScaleRowUp2_Linear_16_C,
|
||||||
@ -700,7 +700,7 @@ SUH2LANY(ScaleRowUp2_Linear_12_Any_SSSE3,
|
|||||||
uint16_t)
|
uint16_t)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWUP2LINEAR_16_SSE2
|
#ifdef HAS_SCALEROWUP2_LINEAR_16_SSE2
|
||||||
SUH2LANY(ScaleRowUp2_Linear_16_Any_SSE2,
|
SUH2LANY(ScaleRowUp2_Linear_16_Any_SSE2,
|
||||||
ScaleRowUp2_Linear_16_SSE2,
|
ScaleRowUp2_Linear_16_SSE2,
|
||||||
ScaleRowUp2_Linear_16_C,
|
ScaleRowUp2_Linear_16_C,
|
||||||
@ -708,7 +708,7 @@ SUH2LANY(ScaleRowUp2_Linear_16_Any_SSE2,
|
|||||||
uint16_t)
|
uint16_t)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWUP2LINEAR_AVX2
|
#ifdef HAS_SCALEROWUP2_LINEAR_AVX2
|
||||||
SUH2LANY(ScaleRowUp2_Linear_Any_AVX2,
|
SUH2LANY(ScaleRowUp2_Linear_Any_AVX2,
|
||||||
ScaleRowUp2_Linear_AVX2,
|
ScaleRowUp2_Linear_AVX2,
|
||||||
ScaleRowUp2_Linear_C,
|
ScaleRowUp2_Linear_C,
|
||||||
@ -716,7 +716,7 @@ SUH2LANY(ScaleRowUp2_Linear_Any_AVX2,
|
|||||||
uint8_t)
|
uint8_t)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWUP2LINEAR_12_AVX2
|
#ifdef HAS_SCALEROWUP2_LINEAR_12_AVX2
|
||||||
SUH2LANY(ScaleRowUp2_Linear_12_Any_AVX2,
|
SUH2LANY(ScaleRowUp2_Linear_12_Any_AVX2,
|
||||||
ScaleRowUp2_Linear_12_AVX2,
|
ScaleRowUp2_Linear_12_AVX2,
|
||||||
ScaleRowUp2_Linear_16_C,
|
ScaleRowUp2_Linear_16_C,
|
||||||
@ -724,7 +724,7 @@ SUH2LANY(ScaleRowUp2_Linear_12_Any_AVX2,
|
|||||||
uint16_t)
|
uint16_t)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWUP2LINEAR_16_AVX2
|
#ifdef HAS_SCALEROWUP2_LINEAR_16_AVX2
|
||||||
SUH2LANY(ScaleRowUp2_Linear_16_Any_AVX2,
|
SUH2LANY(ScaleRowUp2_Linear_16_Any_AVX2,
|
||||||
ScaleRowUp2_Linear_16_AVX2,
|
ScaleRowUp2_Linear_16_AVX2,
|
||||||
ScaleRowUp2_Linear_16_C,
|
ScaleRowUp2_Linear_16_C,
|
||||||
@ -732,7 +732,7 @@ SUH2LANY(ScaleRowUp2_Linear_16_Any_AVX2,
|
|||||||
uint16_t)
|
uint16_t)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWUP2LINEAR_NEON
|
#ifdef HAS_SCALEROWUP2_LINEAR_NEON
|
||||||
SUH2LANY(ScaleRowUp2_Linear_Any_NEON,
|
SUH2LANY(ScaleRowUp2_Linear_Any_NEON,
|
||||||
ScaleRowUp2_Linear_NEON,
|
ScaleRowUp2_Linear_NEON,
|
||||||
ScaleRowUp2_Linear_C,
|
ScaleRowUp2_Linear_C,
|
||||||
@ -740,7 +740,7 @@ SUH2LANY(ScaleRowUp2_Linear_Any_NEON,
|
|||||||
uint8_t)
|
uint8_t)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWUP2LINEAR_12_NEON
|
#ifdef HAS_SCALEROWUP2_LINEAR_12_NEON
|
||||||
SUH2LANY(ScaleRowUp2_Linear_12_Any_NEON,
|
SUH2LANY(ScaleRowUp2_Linear_12_Any_NEON,
|
||||||
ScaleRowUp2_Linear_12_NEON,
|
ScaleRowUp2_Linear_12_NEON,
|
||||||
ScaleRowUp2_Linear_16_C,
|
ScaleRowUp2_Linear_16_C,
|
||||||
@ -748,7 +748,7 @@ SUH2LANY(ScaleRowUp2_Linear_12_Any_NEON,
|
|||||||
uint16_t)
|
uint16_t)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWUP2LINEAR_16_NEON
|
#ifdef HAS_SCALEROWUP2_LINEAR_16_NEON
|
||||||
SUH2LANY(ScaleRowUp2_Linear_16_Any_NEON,
|
SUH2LANY(ScaleRowUp2_Linear_16_Any_NEON,
|
||||||
ScaleRowUp2_Linear_16_NEON,
|
ScaleRowUp2_Linear_16_NEON,
|
||||||
ScaleRowUp2_Linear_16_C,
|
ScaleRowUp2_Linear_16_C,
|
||||||
@ -796,7 +796,7 @@ SU2BLANY(ScaleRowUp2_Bilinear_16_Any_C,
|
|||||||
0,
|
0,
|
||||||
uint16_t)
|
uint16_t)
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWUP2BILINEAR_SSE2
|
#ifdef HAS_SCALEROWUP2_BILINEAR_SSE2
|
||||||
SU2BLANY(ScaleRowUp2_Bilinear_Any_SSE2,
|
SU2BLANY(ScaleRowUp2_Bilinear_Any_SSE2,
|
||||||
ScaleRowUp2_Bilinear_SSE2,
|
ScaleRowUp2_Bilinear_SSE2,
|
||||||
ScaleRowUp2_Bilinear_C,
|
ScaleRowUp2_Bilinear_C,
|
||||||
@ -804,7 +804,7 @@ SU2BLANY(ScaleRowUp2_Bilinear_Any_SSE2,
|
|||||||
uint8_t)
|
uint8_t)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWUP2BILINEAR_12_SSSE3
|
#ifdef HAS_SCALEROWUP2_BILINEAR_12_SSSE3
|
||||||
SU2BLANY(ScaleRowUp2_Bilinear_12_Any_SSSE3,
|
SU2BLANY(ScaleRowUp2_Bilinear_12_Any_SSSE3,
|
||||||
ScaleRowUp2_Bilinear_12_SSSE3,
|
ScaleRowUp2_Bilinear_12_SSSE3,
|
||||||
ScaleRowUp2_Bilinear_16_C,
|
ScaleRowUp2_Bilinear_16_C,
|
||||||
@ -812,7 +812,7 @@ SU2BLANY(ScaleRowUp2_Bilinear_12_Any_SSSE3,
|
|||||||
uint16_t)
|
uint16_t)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWUP2BILINEAR_16_SSE2
|
#ifdef HAS_SCALEROWUP2_BILINEAR_16_SSE2
|
||||||
SU2BLANY(ScaleRowUp2_Bilinear_16_Any_SSSE3,
|
SU2BLANY(ScaleRowUp2_Bilinear_16_Any_SSSE3,
|
||||||
ScaleRowUp2_Bilinear_16_SSE2,
|
ScaleRowUp2_Bilinear_16_SSE2,
|
||||||
ScaleRowUp2_Bilinear_16_C,
|
ScaleRowUp2_Bilinear_16_C,
|
||||||
@ -820,7 +820,7 @@ SU2BLANY(ScaleRowUp2_Bilinear_16_Any_SSSE3,
|
|||||||
uint16_t)
|
uint16_t)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWUP2BILINEAR_SSSE3
|
#ifdef HAS_SCALEROWUP2_BILINEAR_SSSE3
|
||||||
SU2BLANY(ScaleRowUp2_Bilinear_Any_SSSE3,
|
SU2BLANY(ScaleRowUp2_Bilinear_Any_SSSE3,
|
||||||
ScaleRowUp2_Bilinear_SSSE3,
|
ScaleRowUp2_Bilinear_SSSE3,
|
||||||
ScaleRowUp2_Bilinear_C,
|
ScaleRowUp2_Bilinear_C,
|
||||||
@ -828,7 +828,7 @@ SU2BLANY(ScaleRowUp2_Bilinear_Any_SSSE3,
|
|||||||
uint8_t)
|
uint8_t)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWUP2BILINEAR_AVX2
|
#ifdef HAS_SCALEROWUP2_BILINEAR_AVX2
|
||||||
SU2BLANY(ScaleRowUp2_Bilinear_Any_AVX2,
|
SU2BLANY(ScaleRowUp2_Bilinear_Any_AVX2,
|
||||||
ScaleRowUp2_Bilinear_AVX2,
|
ScaleRowUp2_Bilinear_AVX2,
|
||||||
ScaleRowUp2_Bilinear_C,
|
ScaleRowUp2_Bilinear_C,
|
||||||
@ -836,7 +836,7 @@ SU2BLANY(ScaleRowUp2_Bilinear_Any_AVX2,
|
|||||||
uint8_t)
|
uint8_t)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWUP2BILINEAR_12_AVX2
|
#ifdef HAS_SCALEROWUP2_BILINEAR_12_AVX2
|
||||||
SU2BLANY(ScaleRowUp2_Bilinear_12_Any_AVX2,
|
SU2BLANY(ScaleRowUp2_Bilinear_12_Any_AVX2,
|
||||||
ScaleRowUp2_Bilinear_12_AVX2,
|
ScaleRowUp2_Bilinear_12_AVX2,
|
||||||
ScaleRowUp2_Bilinear_16_C,
|
ScaleRowUp2_Bilinear_16_C,
|
||||||
@ -844,7 +844,7 @@ SU2BLANY(ScaleRowUp2_Bilinear_12_Any_AVX2,
|
|||||||
uint16_t)
|
uint16_t)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWUP2BILINEAR_16_AVX2
|
#ifdef HAS_SCALEROWUP2_BILINEAR_16_AVX2
|
||||||
SU2BLANY(ScaleRowUp2_Bilinear_16_Any_AVX2,
|
SU2BLANY(ScaleRowUp2_Bilinear_16_Any_AVX2,
|
||||||
ScaleRowUp2_Bilinear_16_AVX2,
|
ScaleRowUp2_Bilinear_16_AVX2,
|
||||||
ScaleRowUp2_Bilinear_16_C,
|
ScaleRowUp2_Bilinear_16_C,
|
||||||
@ -852,7 +852,7 @@ SU2BLANY(ScaleRowUp2_Bilinear_16_Any_AVX2,
|
|||||||
uint16_t)
|
uint16_t)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWUP2BILINEAR_NEON
|
#ifdef HAS_SCALEROWUP2_BILINEAR_NEON
|
||||||
SU2BLANY(ScaleRowUp2_Bilinear_Any_NEON,
|
SU2BLANY(ScaleRowUp2_Bilinear_Any_NEON,
|
||||||
ScaleRowUp2_Bilinear_NEON,
|
ScaleRowUp2_Bilinear_NEON,
|
||||||
ScaleRowUp2_Bilinear_C,
|
ScaleRowUp2_Bilinear_C,
|
||||||
@ -860,7 +860,7 @@ SU2BLANY(ScaleRowUp2_Bilinear_Any_NEON,
|
|||||||
uint8_t)
|
uint8_t)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWUP2BILINEAR_12_NEON
|
#ifdef HAS_SCALEROWUP2_BILINEAR_12_NEON
|
||||||
SU2BLANY(ScaleRowUp2_Bilinear_12_Any_NEON,
|
SU2BLANY(ScaleRowUp2_Bilinear_12_Any_NEON,
|
||||||
ScaleRowUp2_Bilinear_12_NEON,
|
ScaleRowUp2_Bilinear_12_NEON,
|
||||||
ScaleRowUp2_Bilinear_16_C,
|
ScaleRowUp2_Bilinear_16_C,
|
||||||
@ -868,7 +868,7 @@ SU2BLANY(ScaleRowUp2_Bilinear_12_Any_NEON,
|
|||||||
uint16_t)
|
uint16_t)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWUP2BILINEAR_16_NEON
|
#ifdef HAS_SCALEROWUP2_BILINEAR_16_NEON
|
||||||
SU2BLANY(ScaleRowUp2_Bilinear_16_Any_NEON,
|
SU2BLANY(ScaleRowUp2_Bilinear_16_Any_NEON,
|
||||||
ScaleRowUp2_Bilinear_16_NEON,
|
ScaleRowUp2_Bilinear_16_NEON,
|
||||||
ScaleRowUp2_Bilinear_16_C,
|
ScaleRowUp2_Bilinear_16_C,
|
||||||
@ -908,7 +908,7 @@ SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_C,
|
|||||||
0,
|
0,
|
||||||
uint16_t)
|
uint16_t)
|
||||||
|
|
||||||
#ifdef HAS_SCALEUVROWUP2LINEAR_SSSE3
|
#ifdef HAS_SCALEUVROWUP2_LINEAR_SSSE3
|
||||||
SBUH2LANY(ScaleUVRowUp2_Linear_Any_SSSE3,
|
SBUH2LANY(ScaleUVRowUp2_Linear_Any_SSSE3,
|
||||||
ScaleUVRowUp2_Linear_SSSE3,
|
ScaleUVRowUp2_Linear_SSSE3,
|
||||||
ScaleUVRowUp2_Linear_C,
|
ScaleUVRowUp2_Linear_C,
|
||||||
@ -916,7 +916,7 @@ SBUH2LANY(ScaleUVRowUp2_Linear_Any_SSSE3,
|
|||||||
uint8_t)
|
uint8_t)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEUVROWUP2LINEAR_AVX2
|
#ifdef HAS_SCALEUVROWUP2_LINEAR_AVX2
|
||||||
SBUH2LANY(ScaleUVRowUp2_Linear_Any_AVX2,
|
SBUH2LANY(ScaleUVRowUp2_Linear_Any_AVX2,
|
||||||
ScaleUVRowUp2_Linear_AVX2,
|
ScaleUVRowUp2_Linear_AVX2,
|
||||||
ScaleUVRowUp2_Linear_C,
|
ScaleUVRowUp2_Linear_C,
|
||||||
@ -924,7 +924,7 @@ SBUH2LANY(ScaleUVRowUp2_Linear_Any_AVX2,
|
|||||||
uint8_t)
|
uint8_t)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEUVROWUP2LINEAR_16_SSE41
|
#ifdef HAS_SCALEUVROWUP2_LINEAR_16_SSE41
|
||||||
SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_SSE41,
|
SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_SSE41,
|
||||||
ScaleUVRowUp2_Linear_16_SSE41,
|
ScaleUVRowUp2_Linear_16_SSE41,
|
||||||
ScaleUVRowUp2_Linear_16_C,
|
ScaleUVRowUp2_Linear_16_C,
|
||||||
@ -932,7 +932,7 @@ SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_SSE41,
|
|||||||
uint16_t)
|
uint16_t)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEUVROWUP2LINEAR_16_AVX2
|
#ifdef HAS_SCALEUVROWUP2_LINEAR_16_AVX2
|
||||||
SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_AVX2,
|
SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_AVX2,
|
||||||
ScaleUVRowUp2_Linear_16_AVX2,
|
ScaleUVRowUp2_Linear_16_AVX2,
|
||||||
ScaleUVRowUp2_Linear_16_C,
|
ScaleUVRowUp2_Linear_16_C,
|
||||||
@ -940,7 +940,7 @@ SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_AVX2,
|
|||||||
uint16_t)
|
uint16_t)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEUVROWUP2LINEAR_NEON
|
#ifdef HAS_SCALEUVROWUP2_LINEAR_NEON
|
||||||
SBUH2LANY(ScaleUVRowUp2_Linear_Any_NEON,
|
SBUH2LANY(ScaleUVRowUp2_Linear_Any_NEON,
|
||||||
ScaleUVRowUp2_Linear_NEON,
|
ScaleUVRowUp2_Linear_NEON,
|
||||||
ScaleUVRowUp2_Linear_C,
|
ScaleUVRowUp2_Linear_C,
|
||||||
@ -948,7 +948,7 @@ SBUH2LANY(ScaleUVRowUp2_Linear_Any_NEON,
|
|||||||
uint8_t)
|
uint8_t)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEUVROWUP2LINEAR_16_NEON
|
#ifdef HAS_SCALEUVROWUP2_LINEAR_16_NEON
|
||||||
SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_NEON,
|
SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_NEON,
|
||||||
ScaleUVRowUp2_Linear_16_NEON,
|
ScaleUVRowUp2_Linear_16_NEON,
|
||||||
ScaleUVRowUp2_Linear_16_C,
|
ScaleUVRowUp2_Linear_16_C,
|
||||||
@ -1006,7 +1006,7 @@ SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_C,
|
|||||||
0,
|
0,
|
||||||
uint16_t)
|
uint16_t)
|
||||||
|
|
||||||
#ifdef HAS_SCALEUVROWUP2BILINEAR_SSSE3
|
#ifdef HAS_SCALEUVROWUP2_BILINEAR_SSSE3
|
||||||
SBU2BLANY(ScaleUVRowUp2_Bilinear_Any_SSSE3,
|
SBU2BLANY(ScaleUVRowUp2_Bilinear_Any_SSSE3,
|
||||||
ScaleUVRowUp2_Bilinear_SSSE3,
|
ScaleUVRowUp2_Bilinear_SSSE3,
|
||||||
ScaleUVRowUp2_Bilinear_C,
|
ScaleUVRowUp2_Bilinear_C,
|
||||||
@ -1014,7 +1014,7 @@ SBU2BLANY(ScaleUVRowUp2_Bilinear_Any_SSSE3,
|
|||||||
uint8_t)
|
uint8_t)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEUVROWUP2BILINEAR_AVX2
|
#ifdef HAS_SCALEUVROWUP2_BILINEAR_AVX2
|
||||||
SBU2BLANY(ScaleUVRowUp2_Bilinear_Any_AVX2,
|
SBU2BLANY(ScaleUVRowUp2_Bilinear_Any_AVX2,
|
||||||
ScaleUVRowUp2_Bilinear_AVX2,
|
ScaleUVRowUp2_Bilinear_AVX2,
|
||||||
ScaleUVRowUp2_Bilinear_C,
|
ScaleUVRowUp2_Bilinear_C,
|
||||||
@ -1022,7 +1022,7 @@ SBU2BLANY(ScaleUVRowUp2_Bilinear_Any_AVX2,
|
|||||||
uint8_t)
|
uint8_t)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEUVROWUP2BILINEAR_16_SSE41
|
#ifdef HAS_SCALEUVROWUP2_BILINEAR_16_SSE41
|
||||||
SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_SSE41,
|
SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_SSE41,
|
||||||
ScaleUVRowUp2_Bilinear_16_SSE41,
|
ScaleUVRowUp2_Bilinear_16_SSE41,
|
||||||
ScaleUVRowUp2_Bilinear_16_C,
|
ScaleUVRowUp2_Bilinear_16_C,
|
||||||
@ -1030,7 +1030,7 @@ SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_SSE41,
|
|||||||
uint16_t)
|
uint16_t)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEUVROWUP2BILINEAR_16_AVX2
|
#ifdef HAS_SCALEUVROWUP2_BILINEAR_16_AVX2
|
||||||
SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_AVX2,
|
SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_AVX2,
|
||||||
ScaleUVRowUp2_Bilinear_16_AVX2,
|
ScaleUVRowUp2_Bilinear_16_AVX2,
|
||||||
ScaleUVRowUp2_Bilinear_16_C,
|
ScaleUVRowUp2_Bilinear_16_C,
|
||||||
@ -1038,7 +1038,7 @@ SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_AVX2,
|
|||||||
uint16_t)
|
uint16_t)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEUVROWUP2BILINEAR_NEON
|
#ifdef HAS_SCALEUVROWUP2_BILINEAR_NEON
|
||||||
SBU2BLANY(ScaleUVRowUp2_Bilinear_Any_NEON,
|
SBU2BLANY(ScaleUVRowUp2_Bilinear_Any_NEON,
|
||||||
ScaleUVRowUp2_Bilinear_NEON,
|
ScaleUVRowUp2_Bilinear_NEON,
|
||||||
ScaleUVRowUp2_Bilinear_C,
|
ScaleUVRowUp2_Bilinear_C,
|
||||||
@ -1046,7 +1046,7 @@ SBU2BLANY(ScaleUVRowUp2_Bilinear_Any_NEON,
|
|||||||
uint8_t)
|
uint8_t)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEUVROWUP2BILINEAR_16_NEON
|
#ifdef HAS_SCALEUVROWUP2_BILINEAR_16_NEON
|
||||||
SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_NEON,
|
SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_NEON,
|
||||||
ScaleUVRowUp2_Bilinear_16_NEON,
|
ScaleUVRowUp2_Bilinear_16_NEON,
|
||||||
ScaleUVRowUp2_Bilinear_16_C,
|
ScaleUVRowUp2_Bilinear_16_C,
|
||||||
|
|||||||
@ -779,7 +779,7 @@ static const uvec8 kLinearShuffleFar = {2, 3, 0, 1, 6, 7, 4, 5,
|
|||||||
static const uvec8 kLinearMadd31 = {3, 1, 1, 3, 3, 1, 1, 3,
|
static const uvec8 kLinearMadd31 = {3, 1, 1, 3, 3, 1, 1, 3,
|
||||||
3, 1, 1, 3, 3, 1, 1, 3};
|
3, 1, 1, 3, 3, 1, 1, 3};
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWUP2LINEAR_SSE2
|
#ifdef HAS_SCALEROWUP2_LINEAR_SSE2
|
||||||
void ScaleRowUp2_Linear_SSE2(const uint8_t* src_ptr,
|
void ScaleRowUp2_Linear_SSE2(const uint8_t* src_ptr,
|
||||||
uint8_t* dst_ptr,
|
uint8_t* dst_ptr,
|
||||||
int dst_width) {
|
int dst_width) {
|
||||||
@ -833,7 +833,7 @@ void ScaleRowUp2_Linear_SSE2(const uint8_t* src_ptr,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWUP2BILINEAR_SSE2
|
#ifdef HAS_SCALEROWUP2_BILINEAR_SSE2
|
||||||
void ScaleRowUp2_Bilinear_SSE2(const uint8_t* src_ptr,
|
void ScaleRowUp2_Bilinear_SSE2(const uint8_t* src_ptr,
|
||||||
ptrdiff_t src_stride,
|
ptrdiff_t src_stride,
|
||||||
uint8_t* dst_ptr,
|
uint8_t* dst_ptr,
|
||||||
@ -949,7 +949,7 @@ void ScaleRowUp2_Bilinear_SSE2(const uint8_t* src_ptr,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWUP2LINEAR_12_SSSE3
|
#ifdef HAS_SCALEROWUP2_LINEAR_12_SSSE3
|
||||||
void ScaleRowUp2_Linear_12_SSSE3(const uint16_t* src_ptr,
|
void ScaleRowUp2_Linear_12_SSSE3(const uint16_t* src_ptr,
|
||||||
uint16_t* dst_ptr,
|
uint16_t* dst_ptr,
|
||||||
int dst_width) {
|
int dst_width) {
|
||||||
@ -999,7 +999,7 @@ void ScaleRowUp2_Linear_12_SSSE3(const uint16_t* src_ptr,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWUP2BILINEAR_12_SSSE3
|
#ifdef HAS_SCALEROWUP2_BILINEAR_12_SSSE3
|
||||||
void ScaleRowUp2_Bilinear_12_SSSE3(const uint16_t* src_ptr,
|
void ScaleRowUp2_Bilinear_12_SSSE3(const uint16_t* src_ptr,
|
||||||
ptrdiff_t src_stride,
|
ptrdiff_t src_stride,
|
||||||
uint16_t* dst_ptr,
|
uint16_t* dst_ptr,
|
||||||
@ -1098,7 +1098,7 @@ void ScaleRowUp2_Bilinear_12_SSSE3(const uint16_t* src_ptr,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWUP2LINEAR_16_SSE2
|
#ifdef HAS_SCALEROWUP2_LINEAR_16_SSE2
|
||||||
void ScaleRowUp2_Linear_16_SSE2(const uint16_t* src_ptr,
|
void ScaleRowUp2_Linear_16_SSE2(const uint16_t* src_ptr,
|
||||||
uint16_t* dst_ptr,
|
uint16_t* dst_ptr,
|
||||||
int dst_width) {
|
int dst_width) {
|
||||||
@ -1149,7 +1149,7 @@ void ScaleRowUp2_Linear_16_SSE2(const uint16_t* src_ptr,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWUP2BILINEAR_16_SSE2
|
#ifdef HAS_SCALEROWUP2_BILINEAR_16_SSE2
|
||||||
void ScaleRowUp2_Bilinear_16_SSE2(const uint16_t* src_ptr,
|
void ScaleRowUp2_Bilinear_16_SSE2(const uint16_t* src_ptr,
|
||||||
ptrdiff_t src_stride,
|
ptrdiff_t src_stride,
|
||||||
uint16_t* dst_ptr,
|
uint16_t* dst_ptr,
|
||||||
@ -1254,11 +1254,12 @@ void ScaleRowUp2_Bilinear_16_SSE2(const uint16_t* src_ptr,
|
|||||||
"+r"(dst_width) // %2
|
"+r"(dst_width) // %2
|
||||||
: "r"((intptr_t)(src_stride)), // %3
|
: "r"((intptr_t)(src_stride)), // %3
|
||||||
"r"((intptr_t)(dst_stride)) // %4
|
"r"((intptr_t)(dst_stride)) // %4
|
||||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
|
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
|
||||||
|
"xmm7");
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWUP2LINEAR_SSSE3
|
#ifdef HAS_SCALEROWUP2_LINEAR_SSSE3
|
||||||
void ScaleRowUp2_Linear_SSSE3(const uint8_t* src_ptr,
|
void ScaleRowUp2_Linear_SSSE3(const uint8_t* src_ptr,
|
||||||
uint8_t* dst_ptr,
|
uint8_t* dst_ptr,
|
||||||
int dst_width) {
|
int dst_width) {
|
||||||
@ -1297,7 +1298,7 @@ void ScaleRowUp2_Linear_SSSE3(const uint8_t* src_ptr,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWUP2BILINEAR_SSSE3
|
#ifdef HAS_SCALEROWUP2_BILINEAR_SSSE3
|
||||||
void ScaleRowUp2_Bilinear_SSSE3(const uint8_t* src_ptr,
|
void ScaleRowUp2_Bilinear_SSSE3(const uint8_t* src_ptr,
|
||||||
ptrdiff_t src_stride,
|
ptrdiff_t src_stride,
|
||||||
uint8_t* dst_ptr,
|
uint8_t* dst_ptr,
|
||||||
@ -1384,7 +1385,7 @@ void ScaleRowUp2_Bilinear_SSSE3(const uint8_t* src_ptr,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWUP2LINEAR_AVX2
|
#ifdef HAS_SCALEROWUP2_LINEAR_AVX2
|
||||||
void ScaleRowUp2_Linear_AVX2(const uint8_t* src_ptr,
|
void ScaleRowUp2_Linear_AVX2(const uint8_t* src_ptr,
|
||||||
uint8_t* dst_ptr,
|
uint8_t* dst_ptr,
|
||||||
int dst_width) {
|
int dst_width) {
|
||||||
@ -1426,7 +1427,7 @@ void ScaleRowUp2_Linear_AVX2(const uint8_t* src_ptr,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWUP2BILINEAR_AVX2
|
#ifdef HAS_SCALEROWUP2_BILINEAR_AVX2
|
||||||
void ScaleRowUp2_Bilinear_AVX2(const uint8_t* src_ptr,
|
void ScaleRowUp2_Bilinear_AVX2(const uint8_t* src_ptr,
|
||||||
ptrdiff_t src_stride,
|
ptrdiff_t src_stride,
|
||||||
uint8_t* dst_ptr,
|
uint8_t* dst_ptr,
|
||||||
@ -1510,7 +1511,7 @@ void ScaleRowUp2_Bilinear_AVX2(const uint8_t* src_ptr,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWUP2LINEAR_12_AVX2
|
#ifdef HAS_SCALEROWUP2_LINEAR_12_AVX2
|
||||||
void ScaleRowUp2_Linear_12_AVX2(const uint16_t* src_ptr,
|
void ScaleRowUp2_Linear_12_AVX2(const uint16_t* src_ptr,
|
||||||
uint16_t* dst_ptr,
|
uint16_t* dst_ptr,
|
||||||
int dst_width) {
|
int dst_width) {
|
||||||
@ -1560,7 +1561,7 @@ void ScaleRowUp2_Linear_12_AVX2(const uint16_t* src_ptr,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWUP2BILINEAR_12_AVX2
|
#ifdef HAS_SCALEROWUP2_BILINEAR_12_AVX2
|
||||||
void ScaleRowUp2_Bilinear_12_AVX2(const uint16_t* src_ptr,
|
void ScaleRowUp2_Bilinear_12_AVX2(const uint16_t* src_ptr,
|
||||||
ptrdiff_t src_stride,
|
ptrdiff_t src_stride,
|
||||||
uint16_t* dst_ptr,
|
uint16_t* dst_ptr,
|
||||||
@ -1624,7 +1625,7 @@ void ScaleRowUp2_Bilinear_12_AVX2(const uint16_t* src_ptr,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWUP2LINEAR_16_AVX2
|
#ifdef HAS_SCALEROWUP2_LINEAR_16_AVX2
|
||||||
void ScaleRowUp2_Linear_16_AVX2(const uint16_t* src_ptr,
|
void ScaleRowUp2_Linear_16_AVX2(const uint16_t* src_ptr,
|
||||||
uint16_t* dst_ptr,
|
uint16_t* dst_ptr,
|
||||||
int dst_width) {
|
int dst_width) {
|
||||||
@ -1672,7 +1673,7 @@ void ScaleRowUp2_Linear_16_AVX2(const uint16_t* src_ptr,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWUP2BILINEAR_16_AVX2
|
#ifdef HAS_SCALEROWUP2_BILINEAR_16_AVX2
|
||||||
void ScaleRowUp2_Bilinear_16_AVX2(const uint16_t* src_ptr,
|
void ScaleRowUp2_Bilinear_16_AVX2(const uint16_t* src_ptr,
|
||||||
ptrdiff_t src_stride,
|
ptrdiff_t src_stride,
|
||||||
uint16_t* dst_ptr,
|
uint16_t* dst_ptr,
|
||||||
@ -2325,13 +2326,18 @@ int FixedDiv1_X86(int num, int div) {
|
|||||||
return num;
|
return num;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAS_SCALEUVROWDOWN2BOX_SSSE3
|
#if defined(HAS_SCALEUVROWDOWN2BOX_SSSE3) || \
|
||||||
|
defined(HAS_SCALEUVROWDOWN2BOX_AVX2)
|
||||||
|
|
||||||
// Shuffle table for splitting UV into upper and lower part of register.
|
// Shuffle table for splitting UV into upper and lower part of register.
|
||||||
static const uvec8 kShuffleSplitUV = {0u, 2u, 4u, 6u, 8u, 10u, 12u, 14u,
|
static const uvec8 kShuffleSplitUV = {0u, 2u, 4u, 6u, 8u, 10u, 12u, 14u,
|
||||||
1u, 3u, 5u, 7u, 9u, 11u, 13u, 15u};
|
1u, 3u, 5u, 7u, 9u, 11u, 13u, 15u};
|
||||||
static const uvec8 kShuffleMergeUV = {0u, 8u, 2u, 10u, 4u, 12u,
|
static const uvec8 kShuffleMergeUV = {0u, 8u, 2u, 10u, 4u, 12u,
|
||||||
6u, 14u, 0x80, 0x80, 0x80, 0x80,
|
6u, 14u, 0x80, 0x80, 0x80, 0x80,
|
||||||
0x80, 0x80, 0x80, 0x80};
|
0x80, 0x80, 0x80, 0x80};
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef HAS_SCALEUVROWDOWN2BOX_SSSE3
|
||||||
|
|
||||||
void ScaleUVRowDown2Box_SSSE3(const uint8_t* src_ptr,
|
void ScaleUVRowDown2Box_SSSE3(const uint8_t* src_ptr,
|
||||||
ptrdiff_t src_stride,
|
ptrdiff_t src_stride,
|
||||||
@ -2417,7 +2423,7 @@ void ScaleUVRowDown2Box_AVX2(const uint8_t* src_ptr,
|
|||||||
static const uvec8 kUVLinearMadd31 = {3, 1, 3, 1, 1, 3, 1, 3,
|
static const uvec8 kUVLinearMadd31 = {3, 1, 3, 1, 1, 3, 1, 3,
|
||||||
3, 1, 3, 1, 1, 3, 1, 3};
|
3, 1, 3, 1, 1, 3, 1, 3};
|
||||||
|
|
||||||
#ifdef HAS_SCALEUVROWUP2LINEAR_SSSE3
|
#ifdef HAS_SCALEUVROWUP2_LINEAR_SSSE3
|
||||||
void ScaleUVRowUp2_Linear_SSSE3(const uint8_t* src_ptr,
|
void ScaleUVRowUp2_Linear_SSSE3(const uint8_t* src_ptr,
|
||||||
uint8_t* dst_ptr,
|
uint8_t* dst_ptr,
|
||||||
int dst_width) {
|
int dst_width) {
|
||||||
@ -2456,7 +2462,7 @@ void ScaleUVRowUp2_Linear_SSSE3(const uint8_t* src_ptr,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEUVROWUP2BILINEAR_SSSE3
|
#ifdef HAS_SCALEUVROWUP2_BILINEAR_SSSE3
|
||||||
void ScaleUVRowUp2_Bilinear_SSSE3(const uint8_t* src_ptr,
|
void ScaleUVRowUp2_Bilinear_SSSE3(const uint8_t* src_ptr,
|
||||||
ptrdiff_t src_stride,
|
ptrdiff_t src_stride,
|
||||||
uint8_t* dst_ptr,
|
uint8_t* dst_ptr,
|
||||||
@ -2541,7 +2547,7 @@ void ScaleUVRowUp2_Bilinear_SSSE3(const uint8_t* src_ptr,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEUVROWUP2LINEAR_AVX2
|
#ifdef HAS_SCALEUVROWUP2_LINEAR_AVX2
|
||||||
|
|
||||||
void ScaleUVRowUp2_Linear_AVX2(const uint8_t* src_ptr,
|
void ScaleUVRowUp2_Linear_AVX2(const uint8_t* src_ptr,
|
||||||
uint8_t* dst_ptr,
|
uint8_t* dst_ptr,
|
||||||
@ -2583,7 +2589,7 @@ void ScaleUVRowUp2_Linear_AVX2(const uint8_t* src_ptr,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEUVROWUP2BILINEAR_AVX2
|
#ifdef HAS_SCALEUVROWUP2_BILINEAR_AVX2
|
||||||
void ScaleUVRowUp2_Bilinear_AVX2(const uint8_t* src_ptr,
|
void ScaleUVRowUp2_Bilinear_AVX2(const uint8_t* src_ptr,
|
||||||
ptrdiff_t src_stride,
|
ptrdiff_t src_stride,
|
||||||
uint8_t* dst_ptr,
|
uint8_t* dst_ptr,
|
||||||
@ -2665,7 +2671,7 @@ void ScaleUVRowUp2_Bilinear_AVX2(const uint8_t* src_ptr,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEUVROWUP2LINEAR_16_SSE41
|
#ifdef HAS_SCALEUVROWUP2_LINEAR_16_SSE41
|
||||||
void ScaleUVRowUp2_Linear_16_SSE41(const uint16_t* src_ptr,
|
void ScaleUVRowUp2_Linear_16_SSE41(const uint16_t* src_ptr,
|
||||||
uint16_t* dst_ptr,
|
uint16_t* dst_ptr,
|
||||||
int dst_width) {
|
int dst_width) {
|
||||||
@ -2715,7 +2721,7 @@ void ScaleUVRowUp2_Linear_16_SSE41(const uint16_t* src_ptr,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEUVROWUP2BILINEAR_16_SSE41
|
#ifdef HAS_SCALEUVROWUP2_BILINEAR_16_SSE41
|
||||||
void ScaleUVRowUp2_Bilinear_16_SSE41(const uint16_t* src_ptr,
|
void ScaleUVRowUp2_Bilinear_16_SSE41(const uint16_t* src_ptr,
|
||||||
ptrdiff_t src_stride,
|
ptrdiff_t src_stride,
|
||||||
uint16_t* dst_ptr,
|
uint16_t* dst_ptr,
|
||||||
@ -2808,7 +2814,7 @@ void ScaleUVRowUp2_Bilinear_16_SSE41(const uint16_t* src_ptr,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEUVROWUP2LINEAR_16_AVX2
|
#ifdef HAS_SCALEUVROWUP2_LINEAR_16_AVX2
|
||||||
void ScaleUVRowUp2_Linear_16_AVX2(const uint16_t* src_ptr,
|
void ScaleUVRowUp2_Linear_16_AVX2(const uint16_t* src_ptr,
|
||||||
uint16_t* dst_ptr,
|
uint16_t* dst_ptr,
|
||||||
int dst_width) {
|
int dst_width) {
|
||||||
@ -2855,7 +2861,7 @@ void ScaleUVRowUp2_Linear_16_AVX2(const uint16_t* src_ptr,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_SCALEUVROWUP2BILINEAR_16_AVX2
|
#ifdef HAS_SCALEUVROWUP2_BILINEAR_16_AVX2
|
||||||
void ScaleUVRowUp2_Bilinear_16_AVX2(const uint16_t* src_ptr,
|
void ScaleUVRowUp2_Bilinear_16_AVX2(const uint16_t* src_ptr,
|
||||||
ptrdiff_t src_stride,
|
ptrdiff_t src_stride,
|
||||||
uint16_t* dst_ptr,
|
uint16_t* dst_ptr,
|
||||||
|
|||||||
@ -1673,97 +1673,45 @@ TEST_F(LibYUVPlanarTest, TestDetilePlane) {
|
|||||||
free_aligned_buffer_page_end(dst_opt);
|
free_aligned_buffer_page_end(dst_opt);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Benchmark) {
|
// Compares DetileSplitUV to 2 step Detile + SplitUV
|
||||||
|
TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Correctness) {
|
||||||
int i, j;
|
int i, j;
|
||||||
|
|
||||||
// orig is tiled. Allocate enough memory for tiles.
|
// orig is tiled. Allocate enough memory for tiles.
|
||||||
int orig_width = (benchmark_width_ + 15) & ~15;
|
int orig_width = (benchmark_width_ + 15) & ~15;
|
||||||
int orig_height = (benchmark_height_ + 15) & ~15;
|
int orig_height = (benchmark_height_ + 15) & ~15;
|
||||||
int orig_plane_size = orig_width * orig_height;
|
int orig_plane_size = orig_width * orig_height;
|
||||||
int u_plane_size = benchmark_width_ * benchmark_height_;
|
int uv_plane_size = ((benchmark_width_ + 1) / 2) * benchmark_height_;
|
||||||
int v_plane_size = u_plane_size;
|
|
||||||
align_buffer_page_end(orig_uv, orig_plane_size);
|
|
||||||
align_buffer_page_end(dst_u_c, u_plane_size);
|
|
||||||
align_buffer_page_end(dst_u_opt, u_plane_size);
|
|
||||||
align_buffer_page_end(dst_v_c, v_plane_size);
|
|
||||||
align_buffer_page_end(dst_v_opt, v_plane_size);
|
|
||||||
|
|
||||||
MemRandomize(orig_uv, orig_plane_size);
|
|
||||||
memset(dst_u_c, 0, u_plane_size);
|
|
||||||
memset(dst_u_opt, 0, u_plane_size);
|
|
||||||
memset(dst_v_c, 0, v_plane_size);
|
|
||||||
memset(dst_v_opt, 0, v_plane_size);
|
|
||||||
|
|
||||||
// Disable all optimizations.
|
|
||||||
MaskCpuFlags(disable_cpu_flags_);
|
|
||||||
for (j = 0; j < benchmark_iterations_; j++) {
|
|
||||||
DetileSplitUVPlane(orig_uv, orig_width, dst_u_c, (benchmark_width_ + 1) / 2,
|
|
||||||
dst_v_c, (benchmark_width_ + 1) / 2, benchmark_width_,
|
|
||||||
benchmark_height_, 16);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Enable optimizations.
|
|
||||||
MaskCpuFlags(benchmark_cpu_info_);
|
|
||||||
for (j = 0; j < benchmark_iterations_; j++) {
|
|
||||||
DetileSplitUVPlane(
|
|
||||||
orig_uv, orig_width, dst_u_opt, (benchmark_width_ + 1) / 2, dst_v_opt,
|
|
||||||
(benchmark_width_ + 1) / 2, benchmark_width_, benchmark_height_, 16);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i = 0; i < u_plane_size; ++i) {
|
|
||||||
EXPECT_EQ(dst_u_c[i], dst_u_opt[i]);
|
|
||||||
}
|
|
||||||
for (i = 0; i < v_plane_size; ++i) {
|
|
||||||
EXPECT_EQ(dst_v_c[i], dst_v_opt[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
free_aligned_buffer_page_end(orig_uv);
|
|
||||||
free_aligned_buffer_page_end(dst_u_c);
|
|
||||||
free_aligned_buffer_page_end(dst_u_opt);
|
|
||||||
free_aligned_buffer_page_end(dst_v_c);
|
|
||||||
free_aligned_buffer_page_end(dst_v_opt);
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO(b/228518489): Fix Segmentation fault in this test
|
|
||||||
TEST_F(LibYUVPlanarTest, DISABLED_TestDetileSplitUVPlane_Correctness) {
|
|
||||||
int i, j;
|
|
||||||
|
|
||||||
// orig is tiled. Allocate enough memory for tiles.
|
|
||||||
int orig_width = (benchmark_width_ + 15) & ~15;
|
|
||||||
int orig_height = (benchmark_height_ + 15) & ~15;
|
|
||||||
int orig_plane_size = orig_width * orig_height;
|
|
||||||
int u_plane_size = benchmark_width_ * benchmark_height_;
|
|
||||||
int v_plane_size = u_plane_size;
|
|
||||||
align_buffer_page_end(orig_uv, orig_plane_size);
|
align_buffer_page_end(orig_uv, orig_plane_size);
|
||||||
align_buffer_page_end(detiled_uv, orig_plane_size);
|
align_buffer_page_end(detiled_uv, orig_plane_size);
|
||||||
align_buffer_page_end(dst_u_two_stage, u_plane_size);
|
align_buffer_page_end(dst_u_two_stage, uv_plane_size);
|
||||||
align_buffer_page_end(dst_u_opt, u_plane_size);
|
align_buffer_page_end(dst_u_opt, uv_plane_size);
|
||||||
align_buffer_page_end(dst_v_two_stage, v_plane_size);
|
align_buffer_page_end(dst_v_two_stage, uv_plane_size);
|
||||||
align_buffer_page_end(dst_v_opt, v_plane_size);
|
align_buffer_page_end(dst_v_opt, uv_plane_size);
|
||||||
|
|
||||||
MemRandomize(orig_uv, orig_plane_size);
|
MemRandomize(orig_uv, orig_plane_size);
|
||||||
memset(detiled_uv, 0, orig_plane_size);
|
memset(detiled_uv, 0, orig_plane_size);
|
||||||
memset(dst_u_two_stage, 0, u_plane_size);
|
memset(dst_u_two_stage, 0, uv_plane_size);
|
||||||
memset(dst_u_opt, 0, u_plane_size);
|
memset(dst_u_opt, 0, uv_plane_size);
|
||||||
memset(dst_v_two_stage, 0, v_plane_size);
|
memset(dst_v_two_stage, 0, uv_plane_size);
|
||||||
memset(dst_v_opt, 0, v_plane_size);
|
memset(dst_v_opt, 0, uv_plane_size);
|
||||||
|
|
||||||
|
DetileSplitUVPlane(orig_uv, orig_width, dst_u_opt, (benchmark_width_ + 1) / 2,
|
||||||
|
dst_v_opt, (benchmark_width_ + 1) / 2, benchmark_width_,
|
||||||
|
benchmark_height_, 16);
|
||||||
|
|
||||||
|
// Benchmark 2 step conversion for comparison.
|
||||||
for (j = 0; j < benchmark_iterations_; j++) {
|
for (j = 0; j < benchmark_iterations_; j++) {
|
||||||
DetileSplitUVPlane(
|
|
||||||
orig_uv, orig_width, dst_u_opt, (benchmark_width_ + 1) / 2, dst_v_opt,
|
|
||||||
(benchmark_width_ + 1) / 2, benchmark_width_, benchmark_height_, 16);
|
|
||||||
}
|
|
||||||
|
|
||||||
DetilePlane(orig_uv, orig_width, detiled_uv, benchmark_width_,
|
DetilePlane(orig_uv, orig_width, detiled_uv, benchmark_width_,
|
||||||
benchmark_width_, benchmark_height_, 16);
|
benchmark_width_, benchmark_height_, 16);
|
||||||
SplitUVPlane(detiled_uv, orig_width, dst_u_two_stage,
|
SplitUVPlane(detiled_uv, orig_width, dst_u_two_stage,
|
||||||
(benchmark_width_ + 1) / 2, dst_v_two_stage,
|
(benchmark_width_ + 1) / 2, dst_v_two_stage,
|
||||||
(benchmark_width_ + 1) / 2, benchmark_width_, benchmark_height_);
|
(benchmark_width_ + 1) / 2, (benchmark_width_ + 1) / 2,
|
||||||
|
benchmark_height_);
|
||||||
for (i = 0; i < u_plane_size; ++i) {
|
|
||||||
EXPECT_EQ(dst_u_two_stage[i], dst_u_opt[i]);
|
|
||||||
}
|
}
|
||||||
for (i = 0; i < v_plane_size; ++i) {
|
|
||||||
|
for (i = 0; i < uv_plane_size; ++i) {
|
||||||
|
EXPECT_EQ(dst_u_two_stage[i], dst_u_opt[i]);
|
||||||
EXPECT_EQ(dst_v_two_stage[i], dst_v_opt[i]);
|
EXPECT_EQ(dst_v_two_stage[i], dst_v_opt[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1775,6 +1723,54 @@ TEST_F(LibYUVPlanarTest, DISABLED_TestDetileSplitUVPlane_Correctness) {
|
|||||||
free_aligned_buffer_page_end(dst_v_opt);
|
free_aligned_buffer_page_end(dst_v_opt);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Benchmark) {
|
||||||
|
int i, j;
|
||||||
|
|
||||||
|
// orig is tiled. Allocate enough memory for tiles.
|
||||||
|
int orig_width = (benchmark_width_ + 15) & ~15;
|
||||||
|
int orig_height = (benchmark_height_ + 15) & ~15;
|
||||||
|
int orig_plane_size = orig_width * orig_height;
|
||||||
|
int uv_plane_size = ((benchmark_width_ + 1) / 2) * benchmark_height_;
|
||||||
|
align_buffer_page_end(orig_uv, orig_plane_size);
|
||||||
|
align_buffer_page_end(dst_u_c, uv_plane_size);
|
||||||
|
align_buffer_page_end(dst_u_opt, uv_plane_size);
|
||||||
|
align_buffer_page_end(dst_v_c, uv_plane_size);
|
||||||
|
align_buffer_page_end(dst_v_opt, uv_plane_size);
|
||||||
|
|
||||||
|
MemRandomize(orig_uv, orig_plane_size);
|
||||||
|
memset(dst_u_c, 0, uv_plane_size);
|
||||||
|
memset(dst_u_opt, 0, uv_plane_size);
|
||||||
|
memset(dst_v_c, 0, uv_plane_size);
|
||||||
|
memset(dst_v_opt, 0, uv_plane_size);
|
||||||
|
|
||||||
|
// Disable all optimizations.
|
||||||
|
MaskCpuFlags(disable_cpu_flags_);
|
||||||
|
|
||||||
|
DetileSplitUVPlane(orig_uv, orig_width, dst_u_c, (benchmark_width_ + 1) / 2,
|
||||||
|
dst_v_c, (benchmark_width_ + 1) / 2, benchmark_width_,
|
||||||
|
benchmark_height_, 16);
|
||||||
|
|
||||||
|
// Enable optimizations.
|
||||||
|
MaskCpuFlags(benchmark_cpu_info_);
|
||||||
|
|
||||||
|
for (j = 0; j < benchmark_iterations_; j++) {
|
||||||
|
DetileSplitUVPlane(
|
||||||
|
orig_uv, orig_width, dst_u_opt, (benchmark_width_ + 1) / 2, dst_v_opt,
|
||||||
|
(benchmark_width_ + 1) / 2, benchmark_width_, benchmark_height_, 16);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < uv_plane_size; ++i) {
|
||||||
|
EXPECT_EQ(dst_u_c[i], dst_u_opt[i]);
|
||||||
|
EXPECT_EQ(dst_v_c[i], dst_v_opt[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
free_aligned_buffer_page_end(orig_uv);
|
||||||
|
free_aligned_buffer_page_end(dst_u_c);
|
||||||
|
free_aligned_buffer_page_end(dst_u_opt);
|
||||||
|
free_aligned_buffer_page_end(dst_v_c);
|
||||||
|
free_aligned_buffer_page_end(dst_v_opt);
|
||||||
|
}
|
||||||
|
|
||||||
static int TestMultiply(int width,
|
static int TestMultiply(int width,
|
||||||
int height,
|
int height,
|
||||||
int benchmark_iterations,
|
int benchmark_iterations,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user