Disable bilinear 16 bit scale up for SSE2

- Undefine HAS_SCALEROWUP2_BILINEAR_16_SSE2
- Save XMM7 in ScaleRowUp2_Bilinear_16_SSE2().
- Rename HAS_SCALEROWUP2LINEAR_xxx to HAS_SCALEROWUP2_LINEAR_xxx
- DetileSplitUVRow_C() is implemented using SplitUVRow_C().
- Changes to unit_test/planar_test.cc.

Bug: libyuv:882
Change-Id: I0a8e8e5fb43bdf58ded87244e802343eacb789f2
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/3795063
Reviewed-by: Wan-Teh Chang <wtc@google.com>
This commit is contained in:
Frank Barchard 2022-08-01 12:49:02 -07:00 committed by Frank Barchard
parent fe8c78b61a
commit b028453ba6
9 changed files with 266 additions and 273 deletions

View File

@ -1,6 +1,6 @@
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 1833 Version: 1834
License: BSD License: BSD
License File: LICENSE License File: LICENSE

View File

@ -76,18 +76,19 @@ extern "C" {
// TODO(fbarchard): Port to Visual C // TODO(fbarchard): Port to Visual C
#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__)) #if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
#define HAS_SCALEUVROWDOWN2BOX_SSSE3 #define HAS_SCALEUVROWDOWN2BOX_SSSE3
#define HAS_SCALEROWUP2LINEAR_SSE2 #define HAS_SCALEROWUP2_LINEAR_SSE2
#define HAS_SCALEROWUP2LINEAR_SSSE3 #define HAS_SCALEROWUP2_LINEAR_SSSE3
#define HAS_SCALEROWUP2BILINEAR_SSE2 #define HAS_SCALEROWUP2_BILINEAR_SSE2
#define HAS_SCALEROWUP2BILINEAR_SSSE3 #define HAS_SCALEROWUP2_BILINEAR_SSSE3
#define HAS_SCALEROWUP2LINEAR_12_SSSE3 #define HAS_SCALEROWUP2_LINEAR_12_SSSE3
#define HAS_SCALEROWUP2BILINEAR_12_SSSE3 #define HAS_SCALEROWUP2_BILINEAR_12_SSSE3
#define HAS_SCALEROWUP2LINEAR_16_SSE2 #define HAS_SCALEROWUP2_LINEAR_16_SSE2
#define HAS_SCALEROWUP2BILINEAR_16_SSE2 // TODO(libyuv:882): Fix SSE2 version of BILINEAR
#define HAS_SCALEUVROWUP2LINEAR_SSSE3 //#define HAS_SCALEROWUP2_BILINEAR_16_SSE2
#define HAS_SCALEUVROWUP2BILINEAR_SSSE3 #define HAS_SCALEUVROWUP2_LINEAR_SSSE3
#define HAS_SCALEUVROWUP2LINEAR_16_SSE41 #define HAS_SCALEUVROWUP2_BILINEAR_SSSE3
#define HAS_SCALEUVROWUP2BILINEAR_16_SSE41 #define HAS_SCALEUVROWUP2_LINEAR_16_SSE41
#define HAS_SCALEUVROWUP2_BILINEAR_16_SSE41
#endif #endif
// The following are available for gcc/clang x86 platforms, but // The following are available for gcc/clang x86 platforms, but
@ -97,16 +98,16 @@ extern "C" {
(defined(__x86_64__) || defined(__i386__)) && \ (defined(__x86_64__) || defined(__i386__)) && \
(defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2)) (defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
#define HAS_SCALEUVROWDOWN2BOX_AVX2 #define HAS_SCALEUVROWDOWN2BOX_AVX2
#define HAS_SCALEROWUP2LINEAR_AVX2 #define HAS_SCALEROWUP2_LINEAR_AVX2
#define HAS_SCALEROWUP2BILINEAR_AVX2 #define HAS_SCALEROWUP2_BILINEAR_AVX2
#define HAS_SCALEROWUP2LINEAR_12_AVX2 #define HAS_SCALEROWUP2_LINEAR_12_AVX2
#define HAS_SCALEROWUP2BILINEAR_12_AVX2 #define HAS_SCALEROWUP2_BILINEAR_12_AVX2
#define HAS_SCALEROWUP2LINEAR_16_AVX2 #define HAS_SCALEROWUP2_LINEAR_16_AVX2
#define HAS_SCALEROWUP2BILINEAR_16_AVX2 #define HAS_SCALEROWUP2_BILINEAR_16_AVX2
#define HAS_SCALEUVROWUP2LINEAR_AVX2 #define HAS_SCALEUVROWUP2_LINEAR_AVX2
#define HAS_SCALEUVROWUP2BILINEAR_AVX2 #define HAS_SCALEUVROWUP2_BILINEAR_AVX2
#define HAS_SCALEUVROWUP2LINEAR_16_AVX2 #define HAS_SCALEUVROWUP2_LINEAR_16_AVX2
#define HAS_SCALEUVROWUP2BILINEAR_16_AVX2 #define HAS_SCALEUVROWUP2_BILINEAR_16_AVX2
#endif #endif
// The following are available on all x86 platforms, but // The following are available on all x86 platforms, but
@ -135,16 +136,16 @@ extern "C" {
#define HAS_SCALEROWDOWN4_NEON #define HAS_SCALEROWDOWN4_NEON
#define HAS_SCALEUVROWDOWN2BOX_NEON #define HAS_SCALEUVROWDOWN2BOX_NEON
#define HAS_SCALEUVROWDOWNEVEN_NEON #define HAS_SCALEUVROWDOWNEVEN_NEON
#define HAS_SCALEROWUP2LINEAR_NEON #define HAS_SCALEROWUP2_LINEAR_NEON
#define HAS_SCALEROWUP2BILINEAR_NEON #define HAS_SCALEROWUP2_BILINEAR_NEON
#define HAS_SCALEROWUP2LINEAR_12_NEON #define HAS_SCALEROWUP2_LINEAR_12_NEON
#define HAS_SCALEROWUP2BILINEAR_12_NEON #define HAS_SCALEROWUP2_BILINEAR_12_NEON
#define HAS_SCALEROWUP2LINEAR_16_NEON #define HAS_SCALEROWUP2_LINEAR_16_NEON
#define HAS_SCALEROWUP2BILINEAR_16_NEON #define HAS_SCALEROWUP2_BILINEAR_16_NEON
#define HAS_SCALEUVROWUP2LINEAR_NEON #define HAS_SCALEUVROWUP2_LINEAR_NEON
#define HAS_SCALEUVROWUP2BILINEAR_NEON #define HAS_SCALEUVROWUP2_BILINEAR_NEON
#define HAS_SCALEUVROWUP2LINEAR_16_NEON #define HAS_SCALEUVROWUP2_LINEAR_16_NEON
#define HAS_SCALEUVROWUP2BILINEAR_16_NEON #define HAS_SCALEUVROWUP2_BILINEAR_16_NEON
#endif #endif
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) #if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)

View File

@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ #ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1833 #define LIBYUV_VERSION 1834
#endif // INCLUDE_LIBYUV_VERSION_H_ #endif // INCLUDE_LIBYUV_VERSION_H_

View File

@ -5454,25 +5454,25 @@ static int I420ToARGBMatrixBilinear(const uint8_t* src_y,
} }
#endif #endif
#if defined(HAS_SCALEROWUP2LINEAR_SSE2) #if defined(HAS_SCALEROWUP2_LINEAR_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) { if (TestCpuFlag(kCpuHasSSE2)) {
Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSE2; Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSE2;
} }
#endif #endif
#if defined(HAS_SCALEROWUP2LINEAR_SSSE3) #if defined(HAS_SCALEROWUP2_LINEAR_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) { if (TestCpuFlag(kCpuHasSSSE3)) {
Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSSE3; Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSSE3;
} }
#endif #endif
#if defined(HAS_SCALEROWUP2LINEAR_AVX2) #if defined(HAS_SCALEROWUP2_LINEAR_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) { if (TestCpuFlag(kCpuHasAVX2)) {
Scale2RowUp = ScaleRowUp2_Bilinear_Any_AVX2; Scale2RowUp = ScaleRowUp2_Bilinear_Any_AVX2;
} }
#endif #endif
#if defined(HAS_SCALEROWUP2LINEAR_NEON) #if defined(HAS_SCALEROWUP2_LINEAR_NEON)
if (TestCpuFlag(kCpuHasNEON)) { if (TestCpuFlag(kCpuHasNEON)) {
Scale2RowUp = ScaleRowUp2_Bilinear_Any_NEON; Scale2RowUp = ScaleRowUp2_Bilinear_Any_NEON;
} }
@ -5582,22 +5582,22 @@ static int I422ToARGBMatrixLinear(const uint8_t* src_y,
} }
} }
#endif #endif
#if defined(HAS_SCALEROWUP2LINEAR_SSE2) #if defined(HAS_SCALEROWUP2_LINEAR_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) { if (TestCpuFlag(kCpuHasSSE2)) {
ScaleRowUp = ScaleRowUp2_Linear_Any_SSE2; ScaleRowUp = ScaleRowUp2_Linear_Any_SSE2;
} }
#endif #endif
#if defined(HAS_SCALEROWUP2LINEAR_SSSE3) #if defined(HAS_SCALEROWUP2_LINEAR_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) { if (TestCpuFlag(kCpuHasSSSE3)) {
ScaleRowUp = ScaleRowUp2_Linear_Any_SSSE3; ScaleRowUp = ScaleRowUp2_Linear_Any_SSSE3;
} }
#endif #endif
#if defined(HAS_SCALEROWUP2LINEAR_AVX2) #if defined(HAS_SCALEROWUP2_LINEAR_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) { if (TestCpuFlag(kCpuHasAVX2)) {
ScaleRowUp = ScaleRowUp2_Linear_Any_AVX2; ScaleRowUp = ScaleRowUp2_Linear_Any_AVX2;
} }
#endif #endif
#if defined(HAS_SCALEROWUP2LINEAR_NEON) #if defined(HAS_SCALEROWUP2_LINEAR_NEON)
if (TestCpuFlag(kCpuHasNEON)) { if (TestCpuFlag(kCpuHasNEON)) {
ScaleRowUp = ScaleRowUp2_Linear_Any_NEON; ScaleRowUp = ScaleRowUp2_Linear_Any_NEON;
} }
@ -5668,19 +5668,19 @@ static int I010ToAR30MatrixBilinear(const uint16_t* src_y,
} }
#endif #endif
#if defined(HAS_SCALEROWUP2LINEAR_12_SSSE3) #if defined(HAS_SCALEROWUP2_LINEAR_12_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) { if (TestCpuFlag(kCpuHasSSSE3)) {
Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_SSSE3; Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_SSSE3;
} }
#endif #endif
#if defined(HAS_SCALEROWUP2LINEAR_12_AVX2) #if defined(HAS_SCALEROWUP2_LINEAR_12_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) { if (TestCpuFlag(kCpuHasAVX2)) {
Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_AVX2; Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_AVX2;
} }
#endif #endif
#if defined(HAS_SCALEROWUP2LINEAR_12_NEON) #if defined(HAS_SCALEROWUP2_LINEAR_12_NEON)
if (TestCpuFlag(kCpuHasNEON)) { if (TestCpuFlag(kCpuHasNEON)) {
Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_NEON; Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_NEON;
} }
@ -5768,17 +5768,17 @@ static int I210ToAR30MatrixLinear(const uint16_t* src_y,
} }
#endif #endif
#if defined(HAS_SCALEROWUP2LINEAR_12_SSSE3) #if defined(HAS_SCALEROWUP2_LINEAR_12_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) { if (TestCpuFlag(kCpuHasSSSE3)) {
ScaleRowUp = ScaleRowUp2_Linear_12_Any_SSSE3; ScaleRowUp = ScaleRowUp2_Linear_12_Any_SSSE3;
} }
#endif #endif
#if defined(HAS_SCALEROWUP2LINEAR_12_AVX2) #if defined(HAS_SCALEROWUP2_LINEAR_12_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) { if (TestCpuFlag(kCpuHasAVX2)) {
ScaleRowUp = ScaleRowUp2_Linear_12_Any_AVX2; ScaleRowUp = ScaleRowUp2_Linear_12_Any_AVX2;
} }
#endif #endif
#if defined(HAS_SCALEROWUP2LINEAR_12_NEON) #if defined(HAS_SCALEROWUP2_LINEAR_12_NEON)
if (TestCpuFlag(kCpuHasNEON)) { if (TestCpuFlag(kCpuHasNEON)) {
ScaleRowUp = ScaleRowUp2_Linear_12_Any_NEON; ScaleRowUp = ScaleRowUp2_Linear_12_Any_NEON;
} }
@ -5848,19 +5848,19 @@ static int I010ToARGBMatrixBilinear(const uint16_t* src_y,
} }
#endif #endif
#if defined(HAS_SCALEROWUP2LINEAR_12_SSSE3) #if defined(HAS_SCALEROWUP2_LINEAR_12_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) { if (TestCpuFlag(kCpuHasSSSE3)) {
Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_SSSE3; Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_SSSE3;
} }
#endif #endif
#if defined(HAS_SCALEROWUP2LINEAR_12_AVX2) #if defined(HAS_SCALEROWUP2_LINEAR_12_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) { if (TestCpuFlag(kCpuHasAVX2)) {
Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_AVX2; Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_AVX2;
} }
#endif #endif
#if defined(HAS_SCALEROWUP2LINEAR_12_NEON) #if defined(HAS_SCALEROWUP2_LINEAR_12_NEON)
if (TestCpuFlag(kCpuHasNEON)) { if (TestCpuFlag(kCpuHasNEON)) {
Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_NEON; Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_NEON;
} }
@ -5947,17 +5947,17 @@ static int I210ToARGBMatrixLinear(const uint16_t* src_y,
} }
#endif #endif
#if defined(HAS_SCALEROWUP2LINEAR_12_SSSE3) #if defined(HAS_SCALEROWUP2_LINEAR_12_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) { if (TestCpuFlag(kCpuHasSSSE3)) {
ScaleRowUp = ScaleRowUp2_Linear_12_Any_SSSE3; ScaleRowUp = ScaleRowUp2_Linear_12_Any_SSSE3;
} }
#endif #endif
#if defined(HAS_SCALEROWUP2LINEAR_12_AVX2) #if defined(HAS_SCALEROWUP2_LINEAR_12_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) { if (TestCpuFlag(kCpuHasAVX2)) {
ScaleRowUp = ScaleRowUp2_Linear_12_Any_AVX2; ScaleRowUp = ScaleRowUp2_Linear_12_Any_AVX2;
} }
#endif #endif
#if defined(HAS_SCALEROWUP2LINEAR_12_NEON) #if defined(HAS_SCALEROWUP2_LINEAR_12_NEON)
if (TestCpuFlag(kCpuHasNEON)) { if (TestCpuFlag(kCpuHasNEON)) {
ScaleRowUp = ScaleRowUp2_Linear_12_Any_NEON; ScaleRowUp = ScaleRowUp2_Linear_12_Any_NEON;
} }
@ -6091,25 +6091,25 @@ static int I420AlphaToARGBMatrixBilinear(
} }
} }
#endif #endif
#if defined(HAS_SCALEROWUP2LINEAR_SSE2) #if defined(HAS_SCALEROWUP2_LINEAR_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) { if (TestCpuFlag(kCpuHasSSE2)) {
Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSE2; Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSE2;
} }
#endif #endif
#if defined(HAS_SCALEROWUP2LINEAR_SSSE3) #if defined(HAS_SCALEROWUP2_LINEAR_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) { if (TestCpuFlag(kCpuHasSSSE3)) {
Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSSE3; Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSSE3;
} }
#endif #endif
#if defined(HAS_SCALEROWUP2LINEAR_AVX2) #if defined(HAS_SCALEROWUP2_LINEAR_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) { if (TestCpuFlag(kCpuHasAVX2)) {
Scale2RowUp = ScaleRowUp2_Bilinear_Any_AVX2; Scale2RowUp = ScaleRowUp2_Bilinear_Any_AVX2;
} }
#endif #endif
#if defined(HAS_SCALEROWUP2LINEAR_NEON) #if defined(HAS_SCALEROWUP2_LINEAR_NEON)
if (TestCpuFlag(kCpuHasNEON)) { if (TestCpuFlag(kCpuHasNEON)) {
Scale2RowUp = ScaleRowUp2_Bilinear_Any_NEON; Scale2RowUp = ScaleRowUp2_Bilinear_Any_NEON;
} }
@ -6277,22 +6277,22 @@ static int I422AlphaToARGBMatrixLinear(const uint8_t* src_y,
} }
} }
#endif #endif
#if defined(HAS_SCALEROWUP2LINEAR_SSE2) #if defined(HAS_SCALEROWUP2_LINEAR_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) { if (TestCpuFlag(kCpuHasSSE2)) {
ScaleRowUp = ScaleRowUp2_Linear_Any_SSE2; ScaleRowUp = ScaleRowUp2_Linear_Any_SSE2;
} }
#endif #endif
#if defined(HAS_SCALEROWUP2LINEAR_SSSE3) #if defined(HAS_SCALEROWUP2_LINEAR_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) { if (TestCpuFlag(kCpuHasSSSE3)) {
ScaleRowUp = ScaleRowUp2_Linear_Any_SSSE3; ScaleRowUp = ScaleRowUp2_Linear_Any_SSSE3;
} }
#endif #endif
#if defined(HAS_SCALEROWUP2LINEAR_AVX2) #if defined(HAS_SCALEROWUP2_LINEAR_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) { if (TestCpuFlag(kCpuHasAVX2)) {
ScaleRowUp = ScaleRowUp2_Linear_Any_AVX2; ScaleRowUp = ScaleRowUp2_Linear_Any_AVX2;
} }
#endif #endif
#if defined(HAS_SCALEROWUP2LINEAR_NEON) #if defined(HAS_SCALEROWUP2_LINEAR_NEON)
if (TestCpuFlag(kCpuHasNEON)) { if (TestCpuFlag(kCpuHasNEON)) {
ScaleRowUp = ScaleRowUp2_Linear_Any_NEON; ScaleRowUp = ScaleRowUp2_Linear_Any_NEON;
} }
@ -6408,19 +6408,19 @@ static int I010AlphaToARGBMatrixBilinear(
} }
#endif #endif
#if defined(HAS_SCALEROWUP2LINEAR_12_SSSE3) #if defined(HAS_SCALEROWUP2_LINEAR_12_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) { if (TestCpuFlag(kCpuHasSSSE3)) {
Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_SSSE3; Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_SSSE3;
} }
#endif #endif
#if defined(HAS_SCALEROWUP2LINEAR_12_AVX2) #if defined(HAS_SCALEROWUP2_LINEAR_12_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) { if (TestCpuFlag(kCpuHasAVX2)) {
Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_AVX2; Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_AVX2;
} }
#endif #endif
#if defined(HAS_SCALEROWUP2LINEAR_12_NEON) #if defined(HAS_SCALEROWUP2_LINEAR_12_NEON)
if (TestCpuFlag(kCpuHasNEON)) { if (TestCpuFlag(kCpuHasNEON)) {
Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_NEON; Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_NEON;
} }
@ -6565,17 +6565,17 @@ static int I210AlphaToARGBMatrixLinear(const uint16_t* src_y,
} }
#endif #endif
#if defined(HAS_SCALEROWUP2LINEAR_12_SSSE3) #if defined(HAS_SCALEROWUP2_LINEAR_12_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) { if (TestCpuFlag(kCpuHasSSSE3)) {
ScaleRowUp = ScaleRowUp2_Linear_12_Any_SSSE3; ScaleRowUp = ScaleRowUp2_Linear_12_Any_SSSE3;
} }
#endif #endif
#if defined(HAS_SCALEROWUP2LINEAR_12_AVX2) #if defined(HAS_SCALEROWUP2_LINEAR_12_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) { if (TestCpuFlag(kCpuHasAVX2)) {
ScaleRowUp = ScaleRowUp2_Linear_12_Any_AVX2; ScaleRowUp = ScaleRowUp2_Linear_12_Any_AVX2;
} }
#endif #endif
#if defined(HAS_SCALEROWUP2LINEAR_12_NEON) #if defined(HAS_SCALEROWUP2_LINEAR_12_NEON)
if (TestCpuFlag(kCpuHasNEON)) { if (TestCpuFlag(kCpuHasNEON)) {
ScaleRowUp = ScaleRowUp2_Linear_12_Any_NEON; ScaleRowUp = ScaleRowUp2_Linear_12_Any_NEON;
} }
@ -6647,19 +6647,19 @@ static int P010ToARGBMatrixBilinear(const uint16_t* src_y,
} }
#endif #endif
#ifdef HAS_SCALEUVROWUP2BILINEAR_16_SSE41 #ifdef HAS_SCALEUVROWUP2_BILINEAR_16_SSE41
if (TestCpuFlag(kCpuHasSSE41)) { if (TestCpuFlag(kCpuHasSSE41)) {
Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_SSE41; Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_SSE41;
} }
#endif #endif
#ifdef HAS_SCALEUVROWUP2BILINEAR_16_AVX2 #ifdef HAS_SCALEUVROWUP2_BILINEAR_16_AVX2
if (TestCpuFlag(kCpuHasAVX2)) { if (TestCpuFlag(kCpuHasAVX2)) {
Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_AVX2; Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_AVX2;
} }
#endif #endif
#ifdef HAS_SCALEUVROWUP2BILINEAR_16_NEON #ifdef HAS_SCALEUVROWUP2_BILINEAR_16_NEON
if (TestCpuFlag(kCpuHasNEON)) { if (TestCpuFlag(kCpuHasNEON)) {
Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_NEON; Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_NEON;
} }
@ -6737,19 +6737,19 @@ static int P210ToARGBMatrixLinear(const uint16_t* src_y,
} }
#endif #endif
#ifdef HAS_SCALEUVROWUP2LINEAR_16_SSE41 #ifdef HAS_SCALEUVROWUP2_LINEAR_16_SSE41
if (TestCpuFlag(kCpuHasSSE41)) { if (TestCpuFlag(kCpuHasSSE41)) {
ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_SSE41; ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_SSE41;
} }
#endif #endif
#ifdef HAS_SCALEUVROWUP2LINEAR_16_AVX2 #ifdef HAS_SCALEUVROWUP2_LINEAR_16_AVX2
if (TestCpuFlag(kCpuHasAVX2)) { if (TestCpuFlag(kCpuHasAVX2)) {
ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_AVX2; ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_AVX2;
} }
#endif #endif
#ifdef HAS_SCALEUVROWUP2LINEAR_16_NEON #ifdef HAS_SCALEUVROWUP2_LINEAR_16_NEON
if (TestCpuFlag(kCpuHasNEON)) { if (TestCpuFlag(kCpuHasNEON)) {
ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_NEON; ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_NEON;
} }
@ -6813,19 +6813,19 @@ static int P010ToAR30MatrixBilinear(const uint16_t* src_y,
} }
#endif #endif
#ifdef HAS_SCALEUVROWUP2BILINEAR_16_SSE41 #ifdef HAS_SCALEUVROWUP2_BILINEAR_16_SSE41
if (TestCpuFlag(kCpuHasSSE41)) { if (TestCpuFlag(kCpuHasSSE41)) {
Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_SSE41; Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_SSE41;
} }
#endif #endif
#ifdef HAS_SCALEUVROWUP2BILINEAR_16_AVX2 #ifdef HAS_SCALEUVROWUP2_BILINEAR_16_AVX2
if (TestCpuFlag(kCpuHasAVX2)) { if (TestCpuFlag(kCpuHasAVX2)) {
Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_AVX2; Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_AVX2;
} }
#endif #endif
#ifdef HAS_SCALEUVROWUP2BILINEAR_16_NEON #ifdef HAS_SCALEUVROWUP2_BILINEAR_16_NEON
if (TestCpuFlag(kCpuHasNEON)) { if (TestCpuFlag(kCpuHasNEON)) {
Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_NEON; Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_NEON;
} }
@ -6903,19 +6903,19 @@ static int P210ToAR30MatrixLinear(const uint16_t* src_y,
} }
#endif #endif
#ifdef HAS_SCALEUVROWUP2LINEAR_16_SSE41 #ifdef HAS_SCALEUVROWUP2_LINEAR_16_SSE41
if (TestCpuFlag(kCpuHasSSE41)) { if (TestCpuFlag(kCpuHasSSE41)) {
ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_SSE41; ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_SSE41;
} }
#endif #endif
#ifdef HAS_SCALEUVROWUP2LINEAR_16_AVX2 #ifdef HAS_SCALEUVROWUP2_LINEAR_16_AVX2
if (TestCpuFlag(kCpuHasAVX2)) { if (TestCpuFlag(kCpuHasAVX2)) {
ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_AVX2; ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_AVX2;
} }
#endif #endif
#ifdef HAS_SCALEUVROWUP2LINEAR_16_NEON #ifdef HAS_SCALEUVROWUP2_LINEAR_16_NEON
if (TestCpuFlag(kCpuHasNEON)) { if (TestCpuFlag(kCpuHasNEON)) {
ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_NEON; ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_NEON;
} }

View File

@ -2663,45 +2663,6 @@ void RGB24MirrorRow_C(const uint8_t* src_rgb24, uint8_t* dst_rgb24, int width) {
} }
} }
void DetileRow_C(const uint8_t* src,
ptrdiff_t src_tile_stride,
uint8_t* dst,
int width) {
int x;
for (x = 0; x < width - 15; x += 16) {
memcpy(dst, src, 16);
dst += 16;
src += src_tile_stride;
}
if (width & 15) {
memcpy(dst, src, width & 15);
}
}
void DetileSplitUVRow_C(const uint8_t* src_uv,
ptrdiff_t src_tile_stride,
uint8_t* dst_u,
uint8_t* dst_v,
int width) {
int tile;
for (tile = 0; tile < width / 16; tile++) {
for (int x = 0; x < 8; x++) {
*dst_u++ = src_uv[0];
*dst_v++ = src_uv[1];
src_uv += 2;
}
src_uv += src_tile_stride - 16;
}
for (int x = 0; x < (width & 0xF) / 2; ++x) {
*dst_u = *src_uv;
dst_u++;
src_uv++;
*dst_v = *src_uv;
dst_v++;
src_uv++;
}
}
void SplitUVRow_C(const uint8_t* src_uv, void SplitUVRow_C(const uint8_t* src_uv,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
@ -2738,6 +2699,38 @@ void MergeUVRow_C(const uint8_t* src_u,
} }
} }
void DetileRow_C(const uint8_t* src,
ptrdiff_t src_tile_stride,
uint8_t* dst,
int width) {
int x;
for (x = 0; x < width - 15; x += 16) {
memcpy(dst, src, 16);
dst += 16;
src += src_tile_stride;
}
if (width & 15) {
memcpy(dst, src, width & 15);
}
}
void DetileSplitUVRow_C(const uint8_t* src_uv,
ptrdiff_t src_tile_stride,
uint8_t* dst_u,
uint8_t* dst_v,
int width) {
int x;
for (x = 0; x < width - 15; x += 16) {
SplitUVRow_C(src_uv, dst_u, dst_v, 8);
dst_u += 8;
dst_v += 8;
src_uv += src_tile_stride;
}
if (width & 15) {
SplitUVRow_C(src_uv, dst_u, dst_v, ((width & 15) + 1) / 2);
}
}
void SplitRGBRow_C(const uint8_t* src_rgb, void SplitRGBRow_C(const uint8_t* src_rgb,
uint8_t* dst_r, uint8_t* dst_r,
uint8_t* dst_g, uint8_t* dst_g,

View File

@ -1384,25 +1384,25 @@ void ScalePlaneUp2_Linear(int src_width,
// This function can only scale up by 2 times horizontally. // This function can only scale up by 2 times horizontally.
assert(src_width == ((dst_width + 1) / 2)); assert(src_width == ((dst_width + 1) / 2));
#ifdef HAS_SCALEROWUP2LINEAR_SSE2 #ifdef HAS_SCALEROWUP2_LINEAR_SSE2
if (TestCpuFlag(kCpuHasSSE2)) { if (TestCpuFlag(kCpuHasSSE2)) {
ScaleRowUp = ScaleRowUp2_Linear_Any_SSE2; ScaleRowUp = ScaleRowUp2_Linear_Any_SSE2;
} }
#endif #endif
#ifdef HAS_SCALEROWUP2LINEAR_SSSE3 #ifdef HAS_SCALEROWUP2_LINEAR_SSSE3
if (TestCpuFlag(kCpuHasSSSE3)) { if (TestCpuFlag(kCpuHasSSSE3)) {
ScaleRowUp = ScaleRowUp2_Linear_Any_SSSE3; ScaleRowUp = ScaleRowUp2_Linear_Any_SSSE3;
} }
#endif #endif
#ifdef HAS_SCALEROWUP2LINEAR_AVX2 #ifdef HAS_SCALEROWUP2_LINEAR_AVX2
if (TestCpuFlag(kCpuHasAVX2)) { if (TestCpuFlag(kCpuHasAVX2)) {
ScaleRowUp = ScaleRowUp2_Linear_Any_AVX2; ScaleRowUp = ScaleRowUp2_Linear_Any_AVX2;
} }
#endif #endif
#ifdef HAS_SCALEROWUP2LINEAR_NEON #ifdef HAS_SCALEROWUP2_LINEAR_NEON
if (TestCpuFlag(kCpuHasNEON)) { if (TestCpuFlag(kCpuHasNEON)) {
ScaleRowUp = ScaleRowUp2_Linear_Any_NEON; ScaleRowUp = ScaleRowUp2_Linear_Any_NEON;
} }
@ -1443,25 +1443,25 @@ void ScalePlaneUp2_Bilinear(int src_width,
assert(src_width == ((dst_width + 1) / 2)); assert(src_width == ((dst_width + 1) / 2));
assert(src_height == ((dst_height + 1) / 2)); assert(src_height == ((dst_height + 1) / 2));
#ifdef HAS_SCALEROWUP2BILINEAR_SSE2 #ifdef HAS_SCALEROWUP2_BILINEAR_SSE2
if (TestCpuFlag(kCpuHasSSE2)) { if (TestCpuFlag(kCpuHasSSE2)) {
Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSE2; Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSE2;
} }
#endif #endif
#ifdef HAS_SCALEROWUP2BILINEAR_SSSE3 #ifdef HAS_SCALEROWUP2_BILINEAR_SSSE3
if (TestCpuFlag(kCpuHasSSSE3)) { if (TestCpuFlag(kCpuHasSSSE3)) {
Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSSE3; Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSSE3;
} }
#endif #endif
#ifdef HAS_SCALEROWUP2BILINEAR_AVX2 #ifdef HAS_SCALEROWUP2_BILINEAR_AVX2
if (TestCpuFlag(kCpuHasAVX2)) { if (TestCpuFlag(kCpuHasAVX2)) {
Scale2RowUp = ScaleRowUp2_Bilinear_Any_AVX2; Scale2RowUp = ScaleRowUp2_Bilinear_Any_AVX2;
} }
#endif #endif
#ifdef HAS_SCALEROWUP2BILINEAR_NEON #ifdef HAS_SCALEROWUP2_BILINEAR_NEON
if (TestCpuFlag(kCpuHasNEON)) { if (TestCpuFlag(kCpuHasNEON)) {
Scale2RowUp = ScaleRowUp2_Bilinear_Any_NEON; Scale2RowUp = ScaleRowUp2_Bilinear_Any_NEON;
} }
@ -1503,19 +1503,19 @@ void ScalePlaneUp2_12_Linear(int src_width,
// This function can only scale up by 2 times horizontally. // This function can only scale up by 2 times horizontally.
assert(src_width == ((dst_width + 1) / 2)); assert(src_width == ((dst_width + 1) / 2));
#ifdef HAS_SCALEROWUP2LINEAR_12_SSSE3 #ifdef HAS_SCALEROWUP2_LINEAR_12_SSSE3
if (TestCpuFlag(kCpuHasSSSE3)) { if (TestCpuFlag(kCpuHasSSSE3)) {
ScaleRowUp = ScaleRowUp2_Linear_12_Any_SSSE3; ScaleRowUp = ScaleRowUp2_Linear_12_Any_SSSE3;
} }
#endif #endif
#ifdef HAS_SCALEROWUP2LINEAR_12_AVX2 #ifdef HAS_SCALEROWUP2_LINEAR_12_AVX2
if (TestCpuFlag(kCpuHasAVX2)) { if (TestCpuFlag(kCpuHasAVX2)) {
ScaleRowUp = ScaleRowUp2_Linear_12_Any_AVX2; ScaleRowUp = ScaleRowUp2_Linear_12_Any_AVX2;
} }
#endif #endif
#ifdef HAS_SCALEROWUP2LINEAR_12_NEON #ifdef HAS_SCALEROWUP2_LINEAR_12_NEON
if (TestCpuFlag(kCpuHasNEON)) { if (TestCpuFlag(kCpuHasNEON)) {
ScaleRowUp = ScaleRowUp2_Linear_12_Any_NEON; ScaleRowUp = ScaleRowUp2_Linear_12_Any_NEON;
} }
@ -1557,19 +1557,19 @@ void ScalePlaneUp2_12_Bilinear(int src_width,
assert(src_width == ((dst_width + 1) / 2)); assert(src_width == ((dst_width + 1) / 2));
assert(src_height == ((dst_height + 1) / 2)); assert(src_height == ((dst_height + 1) / 2));
#ifdef HAS_SCALEROWUP2BILINEAR_12_SSSE3 #ifdef HAS_SCALEROWUP2_BILINEAR_12_SSSE3
if (TestCpuFlag(kCpuHasSSSE3)) { if (TestCpuFlag(kCpuHasSSSE3)) {
Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_SSSE3; Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_SSSE3;
} }
#endif #endif
#ifdef HAS_SCALEROWUP2BILINEAR_12_AVX2 #ifdef HAS_SCALEROWUP2_BILINEAR_12_AVX2
if (TestCpuFlag(kCpuHasAVX2)) { if (TestCpuFlag(kCpuHasAVX2)) {
Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_AVX2; Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_AVX2;
} }
#endif #endif
#ifdef HAS_SCALEROWUP2BILINEAR_12_NEON #ifdef HAS_SCALEROWUP2_BILINEAR_12_NEON
if (TestCpuFlag(kCpuHasNEON)) { if (TestCpuFlag(kCpuHasNEON)) {
Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_NEON; Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_NEON;
} }
@ -1604,19 +1604,19 @@ void ScalePlaneUp2_16_Linear(int src_width,
// This function can only scale up by 2 times horizontally. // This function can only scale up by 2 times horizontally.
assert(src_width == ((dst_width + 1) / 2)); assert(src_width == ((dst_width + 1) / 2));
#ifdef HAS_SCALEROWUP2LINEAR_16_SSE2 #ifdef HAS_SCALEROWUP2_LINEAR_16_SSE2
if (TestCpuFlag(kCpuHasSSE2)) { if (TestCpuFlag(kCpuHasSSE2)) {
ScaleRowUp = ScaleRowUp2_Linear_16_Any_SSE2; ScaleRowUp = ScaleRowUp2_Linear_16_Any_SSE2;
} }
#endif #endif
#ifdef HAS_SCALEROWUP2LINEAR_16_AVX2 #ifdef HAS_SCALEROWUP2_LINEAR_16_AVX2
if (TestCpuFlag(kCpuHasAVX2)) { if (TestCpuFlag(kCpuHasAVX2)) {
ScaleRowUp = ScaleRowUp2_Linear_16_Any_AVX2; ScaleRowUp = ScaleRowUp2_Linear_16_Any_AVX2;
} }
#endif #endif
#ifdef HAS_SCALEROWUP2LINEAR_16_NEON #ifdef HAS_SCALEROWUP2_LINEAR_16_NEON
if (TestCpuFlag(kCpuHasNEON)) { if (TestCpuFlag(kCpuHasNEON)) {
ScaleRowUp = ScaleRowUp2_Linear_16_Any_NEON; ScaleRowUp = ScaleRowUp2_Linear_16_Any_NEON;
} }
@ -1653,19 +1653,19 @@ void ScalePlaneUp2_16_Bilinear(int src_width,
assert(src_width == ((dst_width + 1) / 2)); assert(src_width == ((dst_width + 1) / 2));
assert(src_height == ((dst_height + 1) / 2)); assert(src_height == ((dst_height + 1) / 2));
#ifdef HAS_SCALEROWUP2BILINEAR_16_SSE2 #ifdef HAS_SCALEROWUP2_BILINEAR_16_SSE2
if (TestCpuFlag(kCpuHasSSSE3)) { if (TestCpuFlag(kCpuHasSSSE3)) {
Scale2RowUp = ScaleRowUp2_Bilinear_16_Any_SSSE3; Scale2RowUp = ScaleRowUp2_Bilinear_16_Any_SSSE3;
} }
#endif #endif
#ifdef HAS_SCALEROWUP2BILINEAR_16_AVX2 #ifdef HAS_SCALEROWUP2_BILINEAR_16_AVX2
if (TestCpuFlag(kCpuHasAVX2)) { if (TestCpuFlag(kCpuHasAVX2)) {
Scale2RowUp = ScaleRowUp2_Bilinear_16_Any_AVX2; Scale2RowUp = ScaleRowUp2_Bilinear_16_Any_AVX2;
} }
#endif #endif
#ifdef HAS_SCALEROWUP2BILINEAR_16_NEON #ifdef HAS_SCALEROWUP2_BILINEAR_16_NEON
if (TestCpuFlag(kCpuHasNEON)) { if (TestCpuFlag(kCpuHasNEON)) {
Scale2RowUp = ScaleRowUp2_Bilinear_16_Any_NEON; Scale2RowUp = ScaleRowUp2_Bilinear_16_Any_NEON;
} }
@ -1758,7 +1758,6 @@ void ScalePlaneBilinearUp_16(int src_width,
} }
#endif #endif
} }
if (y > max_y) { if (y > max_y) {
y = max_y; y = max_y;
} }
@ -1915,7 +1914,6 @@ void ScalePlane(const uint8_t* src,
src = src + (src_height - 1) * (int64_t)src_stride; src = src + (src_height - 1) * (int64_t)src_stride;
src_stride = -src_stride; src_stride = -src_stride;
} }
// Use specialized scales to improve performance for common resolutions. // Use specialized scales to improve performance for common resolutions.
// For example, all the 1/2 scalings will use ScalePlaneDown2() // For example, all the 1/2 scalings will use ScalePlaneDown2()
if (dst_width == src_width && dst_height == src_height) { if (dst_width == src_width && dst_height == src_height) {
@ -2018,7 +2016,6 @@ void ScalePlane_16(const uint16_t* src,
src = src + (src_height - 1) * (int64_t)src_stride; src = src + (src_height - 1) * (int64_t)src_stride;
src_stride = -src_stride; src_stride = -src_stride;
} }
// Use specialized scales to improve performance for common resolutions. // Use specialized scales to improve performance for common resolutions.
// For example, all the 1/2 scalings will use ScalePlaneDown2() // For example, all the 1/2 scalings will use ScalePlaneDown2()
if (dst_width == src_width && dst_height == src_height) { if (dst_width == src_width && dst_height == src_height) {

View File

@ -676,7 +676,7 @@ SUH2LANY(ScaleRowUp2_Linear_16_Any_C,
0, 0,
uint16_t) uint16_t)
#ifdef HAS_SCALEROWUP2LINEAR_SSE2 #ifdef HAS_SCALEROWUP2_LINEAR_SSE2
SUH2LANY(ScaleRowUp2_Linear_Any_SSE2, SUH2LANY(ScaleRowUp2_Linear_Any_SSE2,
ScaleRowUp2_Linear_SSE2, ScaleRowUp2_Linear_SSE2,
ScaleRowUp2_Linear_C, ScaleRowUp2_Linear_C,
@ -684,7 +684,7 @@ SUH2LANY(ScaleRowUp2_Linear_Any_SSE2,
uint8_t) uint8_t)
#endif #endif
#ifdef HAS_SCALEROWUP2LINEAR_SSSE3 #ifdef HAS_SCALEROWUP2_LINEAR_SSSE3
SUH2LANY(ScaleRowUp2_Linear_Any_SSSE3, SUH2LANY(ScaleRowUp2_Linear_Any_SSSE3,
ScaleRowUp2_Linear_SSSE3, ScaleRowUp2_Linear_SSSE3,
ScaleRowUp2_Linear_C, ScaleRowUp2_Linear_C,
@ -692,7 +692,7 @@ SUH2LANY(ScaleRowUp2_Linear_Any_SSSE3,
uint8_t) uint8_t)
#endif #endif
#ifdef HAS_SCALEROWUP2LINEAR_12_SSSE3 #ifdef HAS_SCALEROWUP2_LINEAR_12_SSSE3
SUH2LANY(ScaleRowUp2_Linear_12_Any_SSSE3, SUH2LANY(ScaleRowUp2_Linear_12_Any_SSSE3,
ScaleRowUp2_Linear_12_SSSE3, ScaleRowUp2_Linear_12_SSSE3,
ScaleRowUp2_Linear_16_C, ScaleRowUp2_Linear_16_C,
@ -700,7 +700,7 @@ SUH2LANY(ScaleRowUp2_Linear_12_Any_SSSE3,
uint16_t) uint16_t)
#endif #endif
#ifdef HAS_SCALEROWUP2LINEAR_16_SSE2 #ifdef HAS_SCALEROWUP2_LINEAR_16_SSE2
SUH2LANY(ScaleRowUp2_Linear_16_Any_SSE2, SUH2LANY(ScaleRowUp2_Linear_16_Any_SSE2,
ScaleRowUp2_Linear_16_SSE2, ScaleRowUp2_Linear_16_SSE2,
ScaleRowUp2_Linear_16_C, ScaleRowUp2_Linear_16_C,
@ -708,7 +708,7 @@ SUH2LANY(ScaleRowUp2_Linear_16_Any_SSE2,
uint16_t) uint16_t)
#endif #endif
#ifdef HAS_SCALEROWUP2LINEAR_AVX2 #ifdef HAS_SCALEROWUP2_LINEAR_AVX2
SUH2LANY(ScaleRowUp2_Linear_Any_AVX2, SUH2LANY(ScaleRowUp2_Linear_Any_AVX2,
ScaleRowUp2_Linear_AVX2, ScaleRowUp2_Linear_AVX2,
ScaleRowUp2_Linear_C, ScaleRowUp2_Linear_C,
@ -716,7 +716,7 @@ SUH2LANY(ScaleRowUp2_Linear_Any_AVX2,
uint8_t) uint8_t)
#endif #endif
#ifdef HAS_SCALEROWUP2LINEAR_12_AVX2 #ifdef HAS_SCALEROWUP2_LINEAR_12_AVX2
SUH2LANY(ScaleRowUp2_Linear_12_Any_AVX2, SUH2LANY(ScaleRowUp2_Linear_12_Any_AVX2,
ScaleRowUp2_Linear_12_AVX2, ScaleRowUp2_Linear_12_AVX2,
ScaleRowUp2_Linear_16_C, ScaleRowUp2_Linear_16_C,
@ -724,7 +724,7 @@ SUH2LANY(ScaleRowUp2_Linear_12_Any_AVX2,
uint16_t) uint16_t)
#endif #endif
#ifdef HAS_SCALEROWUP2LINEAR_16_AVX2 #ifdef HAS_SCALEROWUP2_LINEAR_16_AVX2
SUH2LANY(ScaleRowUp2_Linear_16_Any_AVX2, SUH2LANY(ScaleRowUp2_Linear_16_Any_AVX2,
ScaleRowUp2_Linear_16_AVX2, ScaleRowUp2_Linear_16_AVX2,
ScaleRowUp2_Linear_16_C, ScaleRowUp2_Linear_16_C,
@ -732,7 +732,7 @@ SUH2LANY(ScaleRowUp2_Linear_16_Any_AVX2,
uint16_t) uint16_t)
#endif #endif
#ifdef HAS_SCALEROWUP2LINEAR_NEON #ifdef HAS_SCALEROWUP2_LINEAR_NEON
SUH2LANY(ScaleRowUp2_Linear_Any_NEON, SUH2LANY(ScaleRowUp2_Linear_Any_NEON,
ScaleRowUp2_Linear_NEON, ScaleRowUp2_Linear_NEON,
ScaleRowUp2_Linear_C, ScaleRowUp2_Linear_C,
@ -740,7 +740,7 @@ SUH2LANY(ScaleRowUp2_Linear_Any_NEON,
uint8_t) uint8_t)
#endif #endif
#ifdef HAS_SCALEROWUP2LINEAR_12_NEON #ifdef HAS_SCALEROWUP2_LINEAR_12_NEON
SUH2LANY(ScaleRowUp2_Linear_12_Any_NEON, SUH2LANY(ScaleRowUp2_Linear_12_Any_NEON,
ScaleRowUp2_Linear_12_NEON, ScaleRowUp2_Linear_12_NEON,
ScaleRowUp2_Linear_16_C, ScaleRowUp2_Linear_16_C,
@ -748,7 +748,7 @@ SUH2LANY(ScaleRowUp2_Linear_12_Any_NEON,
uint16_t) uint16_t)
#endif #endif
#ifdef HAS_SCALEROWUP2LINEAR_16_NEON #ifdef HAS_SCALEROWUP2_LINEAR_16_NEON
SUH2LANY(ScaleRowUp2_Linear_16_Any_NEON, SUH2LANY(ScaleRowUp2_Linear_16_Any_NEON,
ScaleRowUp2_Linear_16_NEON, ScaleRowUp2_Linear_16_NEON,
ScaleRowUp2_Linear_16_C, ScaleRowUp2_Linear_16_C,
@ -796,7 +796,7 @@ SU2BLANY(ScaleRowUp2_Bilinear_16_Any_C,
0, 0,
uint16_t) uint16_t)
#ifdef HAS_SCALEROWUP2BILINEAR_SSE2 #ifdef HAS_SCALEROWUP2_BILINEAR_SSE2
SU2BLANY(ScaleRowUp2_Bilinear_Any_SSE2, SU2BLANY(ScaleRowUp2_Bilinear_Any_SSE2,
ScaleRowUp2_Bilinear_SSE2, ScaleRowUp2_Bilinear_SSE2,
ScaleRowUp2_Bilinear_C, ScaleRowUp2_Bilinear_C,
@ -804,7 +804,7 @@ SU2BLANY(ScaleRowUp2_Bilinear_Any_SSE2,
uint8_t) uint8_t)
#endif #endif
#ifdef HAS_SCALEROWUP2BILINEAR_12_SSSE3 #ifdef HAS_SCALEROWUP2_BILINEAR_12_SSSE3
SU2BLANY(ScaleRowUp2_Bilinear_12_Any_SSSE3, SU2BLANY(ScaleRowUp2_Bilinear_12_Any_SSSE3,
ScaleRowUp2_Bilinear_12_SSSE3, ScaleRowUp2_Bilinear_12_SSSE3,
ScaleRowUp2_Bilinear_16_C, ScaleRowUp2_Bilinear_16_C,
@ -812,7 +812,7 @@ SU2BLANY(ScaleRowUp2_Bilinear_12_Any_SSSE3,
uint16_t) uint16_t)
#endif #endif
#ifdef HAS_SCALEROWUP2BILINEAR_16_SSE2 #ifdef HAS_SCALEROWUP2_BILINEAR_16_SSE2
SU2BLANY(ScaleRowUp2_Bilinear_16_Any_SSSE3, SU2BLANY(ScaleRowUp2_Bilinear_16_Any_SSSE3,
ScaleRowUp2_Bilinear_16_SSE2, ScaleRowUp2_Bilinear_16_SSE2,
ScaleRowUp2_Bilinear_16_C, ScaleRowUp2_Bilinear_16_C,
@ -820,7 +820,7 @@ SU2BLANY(ScaleRowUp2_Bilinear_16_Any_SSSE3,
uint16_t) uint16_t)
#endif #endif
#ifdef HAS_SCALEROWUP2BILINEAR_SSSE3 #ifdef HAS_SCALEROWUP2_BILINEAR_SSSE3
SU2BLANY(ScaleRowUp2_Bilinear_Any_SSSE3, SU2BLANY(ScaleRowUp2_Bilinear_Any_SSSE3,
ScaleRowUp2_Bilinear_SSSE3, ScaleRowUp2_Bilinear_SSSE3,
ScaleRowUp2_Bilinear_C, ScaleRowUp2_Bilinear_C,
@ -828,7 +828,7 @@ SU2BLANY(ScaleRowUp2_Bilinear_Any_SSSE3,
uint8_t) uint8_t)
#endif #endif
#ifdef HAS_SCALEROWUP2BILINEAR_AVX2 #ifdef HAS_SCALEROWUP2_BILINEAR_AVX2
SU2BLANY(ScaleRowUp2_Bilinear_Any_AVX2, SU2BLANY(ScaleRowUp2_Bilinear_Any_AVX2,
ScaleRowUp2_Bilinear_AVX2, ScaleRowUp2_Bilinear_AVX2,
ScaleRowUp2_Bilinear_C, ScaleRowUp2_Bilinear_C,
@ -836,7 +836,7 @@ SU2BLANY(ScaleRowUp2_Bilinear_Any_AVX2,
uint8_t) uint8_t)
#endif #endif
#ifdef HAS_SCALEROWUP2BILINEAR_12_AVX2 #ifdef HAS_SCALEROWUP2_BILINEAR_12_AVX2
SU2BLANY(ScaleRowUp2_Bilinear_12_Any_AVX2, SU2BLANY(ScaleRowUp2_Bilinear_12_Any_AVX2,
ScaleRowUp2_Bilinear_12_AVX2, ScaleRowUp2_Bilinear_12_AVX2,
ScaleRowUp2_Bilinear_16_C, ScaleRowUp2_Bilinear_16_C,
@ -844,7 +844,7 @@ SU2BLANY(ScaleRowUp2_Bilinear_12_Any_AVX2,
uint16_t) uint16_t)
#endif #endif
#ifdef HAS_SCALEROWUP2BILINEAR_16_AVX2 #ifdef HAS_SCALEROWUP2_BILINEAR_16_AVX2
SU2BLANY(ScaleRowUp2_Bilinear_16_Any_AVX2, SU2BLANY(ScaleRowUp2_Bilinear_16_Any_AVX2,
ScaleRowUp2_Bilinear_16_AVX2, ScaleRowUp2_Bilinear_16_AVX2,
ScaleRowUp2_Bilinear_16_C, ScaleRowUp2_Bilinear_16_C,
@ -852,7 +852,7 @@ SU2BLANY(ScaleRowUp2_Bilinear_16_Any_AVX2,
uint16_t) uint16_t)
#endif #endif
#ifdef HAS_SCALEROWUP2BILINEAR_NEON #ifdef HAS_SCALEROWUP2_BILINEAR_NEON
SU2BLANY(ScaleRowUp2_Bilinear_Any_NEON, SU2BLANY(ScaleRowUp2_Bilinear_Any_NEON,
ScaleRowUp2_Bilinear_NEON, ScaleRowUp2_Bilinear_NEON,
ScaleRowUp2_Bilinear_C, ScaleRowUp2_Bilinear_C,
@ -860,7 +860,7 @@ SU2BLANY(ScaleRowUp2_Bilinear_Any_NEON,
uint8_t) uint8_t)
#endif #endif
#ifdef HAS_SCALEROWUP2BILINEAR_12_NEON #ifdef HAS_SCALEROWUP2_BILINEAR_12_NEON
SU2BLANY(ScaleRowUp2_Bilinear_12_Any_NEON, SU2BLANY(ScaleRowUp2_Bilinear_12_Any_NEON,
ScaleRowUp2_Bilinear_12_NEON, ScaleRowUp2_Bilinear_12_NEON,
ScaleRowUp2_Bilinear_16_C, ScaleRowUp2_Bilinear_16_C,
@ -868,7 +868,7 @@ SU2BLANY(ScaleRowUp2_Bilinear_12_Any_NEON,
uint16_t) uint16_t)
#endif #endif
#ifdef HAS_SCALEROWUP2BILINEAR_16_NEON #ifdef HAS_SCALEROWUP2_BILINEAR_16_NEON
SU2BLANY(ScaleRowUp2_Bilinear_16_Any_NEON, SU2BLANY(ScaleRowUp2_Bilinear_16_Any_NEON,
ScaleRowUp2_Bilinear_16_NEON, ScaleRowUp2_Bilinear_16_NEON,
ScaleRowUp2_Bilinear_16_C, ScaleRowUp2_Bilinear_16_C,
@ -908,7 +908,7 @@ SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_C,
0, 0,
uint16_t) uint16_t)
#ifdef HAS_SCALEUVROWUP2LINEAR_SSSE3 #ifdef HAS_SCALEUVROWUP2_LINEAR_SSSE3
SBUH2LANY(ScaleUVRowUp2_Linear_Any_SSSE3, SBUH2LANY(ScaleUVRowUp2_Linear_Any_SSSE3,
ScaleUVRowUp2_Linear_SSSE3, ScaleUVRowUp2_Linear_SSSE3,
ScaleUVRowUp2_Linear_C, ScaleUVRowUp2_Linear_C,
@ -916,7 +916,7 @@ SBUH2LANY(ScaleUVRowUp2_Linear_Any_SSSE3,
uint8_t) uint8_t)
#endif #endif
#ifdef HAS_SCALEUVROWUP2LINEAR_AVX2 #ifdef HAS_SCALEUVROWUP2_LINEAR_AVX2
SBUH2LANY(ScaleUVRowUp2_Linear_Any_AVX2, SBUH2LANY(ScaleUVRowUp2_Linear_Any_AVX2,
ScaleUVRowUp2_Linear_AVX2, ScaleUVRowUp2_Linear_AVX2,
ScaleUVRowUp2_Linear_C, ScaleUVRowUp2_Linear_C,
@ -924,7 +924,7 @@ SBUH2LANY(ScaleUVRowUp2_Linear_Any_AVX2,
uint8_t) uint8_t)
#endif #endif
#ifdef HAS_SCALEUVROWUP2LINEAR_16_SSE41 #ifdef HAS_SCALEUVROWUP2_LINEAR_16_SSE41
SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_SSE41, SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_SSE41,
ScaleUVRowUp2_Linear_16_SSE41, ScaleUVRowUp2_Linear_16_SSE41,
ScaleUVRowUp2_Linear_16_C, ScaleUVRowUp2_Linear_16_C,
@ -932,7 +932,7 @@ SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_SSE41,
uint16_t) uint16_t)
#endif #endif
#ifdef HAS_SCALEUVROWUP2LINEAR_16_AVX2 #ifdef HAS_SCALEUVROWUP2_LINEAR_16_AVX2
SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_AVX2, SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_AVX2,
ScaleUVRowUp2_Linear_16_AVX2, ScaleUVRowUp2_Linear_16_AVX2,
ScaleUVRowUp2_Linear_16_C, ScaleUVRowUp2_Linear_16_C,
@ -940,7 +940,7 @@ SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_AVX2,
uint16_t) uint16_t)
#endif #endif
#ifdef HAS_SCALEUVROWUP2LINEAR_NEON #ifdef HAS_SCALEUVROWUP2_LINEAR_NEON
SBUH2LANY(ScaleUVRowUp2_Linear_Any_NEON, SBUH2LANY(ScaleUVRowUp2_Linear_Any_NEON,
ScaleUVRowUp2_Linear_NEON, ScaleUVRowUp2_Linear_NEON,
ScaleUVRowUp2_Linear_C, ScaleUVRowUp2_Linear_C,
@ -948,7 +948,7 @@ SBUH2LANY(ScaleUVRowUp2_Linear_Any_NEON,
uint8_t) uint8_t)
#endif #endif
#ifdef HAS_SCALEUVROWUP2LINEAR_16_NEON #ifdef HAS_SCALEUVROWUP2_LINEAR_16_NEON
SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_NEON, SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_NEON,
ScaleUVRowUp2_Linear_16_NEON, ScaleUVRowUp2_Linear_16_NEON,
ScaleUVRowUp2_Linear_16_C, ScaleUVRowUp2_Linear_16_C,
@ -1006,7 +1006,7 @@ SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_C,
0, 0,
uint16_t) uint16_t)
#ifdef HAS_SCALEUVROWUP2BILINEAR_SSSE3 #ifdef HAS_SCALEUVROWUP2_BILINEAR_SSSE3
SBU2BLANY(ScaleUVRowUp2_Bilinear_Any_SSSE3, SBU2BLANY(ScaleUVRowUp2_Bilinear_Any_SSSE3,
ScaleUVRowUp2_Bilinear_SSSE3, ScaleUVRowUp2_Bilinear_SSSE3,
ScaleUVRowUp2_Bilinear_C, ScaleUVRowUp2_Bilinear_C,
@ -1014,7 +1014,7 @@ SBU2BLANY(ScaleUVRowUp2_Bilinear_Any_SSSE3,
uint8_t) uint8_t)
#endif #endif
#ifdef HAS_SCALEUVROWUP2BILINEAR_AVX2 #ifdef HAS_SCALEUVROWUP2_BILINEAR_AVX2
SBU2BLANY(ScaleUVRowUp2_Bilinear_Any_AVX2, SBU2BLANY(ScaleUVRowUp2_Bilinear_Any_AVX2,
ScaleUVRowUp2_Bilinear_AVX2, ScaleUVRowUp2_Bilinear_AVX2,
ScaleUVRowUp2_Bilinear_C, ScaleUVRowUp2_Bilinear_C,
@ -1022,7 +1022,7 @@ SBU2BLANY(ScaleUVRowUp2_Bilinear_Any_AVX2,
uint8_t) uint8_t)
#endif #endif
#ifdef HAS_SCALEUVROWUP2BILINEAR_16_SSE41 #ifdef HAS_SCALEUVROWUP2_BILINEAR_16_SSE41
SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_SSE41, SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_SSE41,
ScaleUVRowUp2_Bilinear_16_SSE41, ScaleUVRowUp2_Bilinear_16_SSE41,
ScaleUVRowUp2_Bilinear_16_C, ScaleUVRowUp2_Bilinear_16_C,
@ -1030,7 +1030,7 @@ SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_SSE41,
uint16_t) uint16_t)
#endif #endif
#ifdef HAS_SCALEUVROWUP2BILINEAR_16_AVX2 #ifdef HAS_SCALEUVROWUP2_BILINEAR_16_AVX2
SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_AVX2, SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_AVX2,
ScaleUVRowUp2_Bilinear_16_AVX2, ScaleUVRowUp2_Bilinear_16_AVX2,
ScaleUVRowUp2_Bilinear_16_C, ScaleUVRowUp2_Bilinear_16_C,
@ -1038,7 +1038,7 @@ SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_AVX2,
uint16_t) uint16_t)
#endif #endif
#ifdef HAS_SCALEUVROWUP2BILINEAR_NEON #ifdef HAS_SCALEUVROWUP2_BILINEAR_NEON
SBU2BLANY(ScaleUVRowUp2_Bilinear_Any_NEON, SBU2BLANY(ScaleUVRowUp2_Bilinear_Any_NEON,
ScaleUVRowUp2_Bilinear_NEON, ScaleUVRowUp2_Bilinear_NEON,
ScaleUVRowUp2_Bilinear_C, ScaleUVRowUp2_Bilinear_C,
@ -1046,7 +1046,7 @@ SBU2BLANY(ScaleUVRowUp2_Bilinear_Any_NEON,
uint8_t) uint8_t)
#endif #endif
#ifdef HAS_SCALEUVROWUP2BILINEAR_16_NEON #ifdef HAS_SCALEUVROWUP2_BILINEAR_16_NEON
SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_NEON, SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_NEON,
ScaleUVRowUp2_Bilinear_16_NEON, ScaleUVRowUp2_Bilinear_16_NEON,
ScaleUVRowUp2_Bilinear_16_C, ScaleUVRowUp2_Bilinear_16_C,

View File

@ -779,7 +779,7 @@ static const uvec8 kLinearShuffleFar = {2, 3, 0, 1, 6, 7, 4, 5,
static const uvec8 kLinearMadd31 = {3, 1, 1, 3, 3, 1, 1, 3, static const uvec8 kLinearMadd31 = {3, 1, 1, 3, 3, 1, 1, 3,
3, 1, 1, 3, 3, 1, 1, 3}; 3, 1, 1, 3, 3, 1, 1, 3};
#ifdef HAS_SCALEROWUP2LINEAR_SSE2 #ifdef HAS_SCALEROWUP2_LINEAR_SSE2
void ScaleRowUp2_Linear_SSE2(const uint8_t* src_ptr, void ScaleRowUp2_Linear_SSE2(const uint8_t* src_ptr,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int dst_width) { int dst_width) {
@ -833,7 +833,7 @@ void ScaleRowUp2_Linear_SSE2(const uint8_t* src_ptr,
} }
#endif #endif
#ifdef HAS_SCALEROWUP2BILINEAR_SSE2 #ifdef HAS_SCALEROWUP2_BILINEAR_SSE2
void ScaleRowUp2_Bilinear_SSE2(const uint8_t* src_ptr, void ScaleRowUp2_Bilinear_SSE2(const uint8_t* src_ptr,
ptrdiff_t src_stride, ptrdiff_t src_stride,
uint8_t* dst_ptr, uint8_t* dst_ptr,
@ -949,7 +949,7 @@ void ScaleRowUp2_Bilinear_SSE2(const uint8_t* src_ptr,
} }
#endif #endif
#ifdef HAS_SCALEROWUP2LINEAR_12_SSSE3 #ifdef HAS_SCALEROWUP2_LINEAR_12_SSSE3
void ScaleRowUp2_Linear_12_SSSE3(const uint16_t* src_ptr, void ScaleRowUp2_Linear_12_SSSE3(const uint16_t* src_ptr,
uint16_t* dst_ptr, uint16_t* dst_ptr,
int dst_width) { int dst_width) {
@ -999,7 +999,7 @@ void ScaleRowUp2_Linear_12_SSSE3(const uint16_t* src_ptr,
} }
#endif #endif
#ifdef HAS_SCALEROWUP2BILINEAR_12_SSSE3 #ifdef HAS_SCALEROWUP2_BILINEAR_12_SSSE3
void ScaleRowUp2_Bilinear_12_SSSE3(const uint16_t* src_ptr, void ScaleRowUp2_Bilinear_12_SSSE3(const uint16_t* src_ptr,
ptrdiff_t src_stride, ptrdiff_t src_stride,
uint16_t* dst_ptr, uint16_t* dst_ptr,
@ -1098,7 +1098,7 @@ void ScaleRowUp2_Bilinear_12_SSSE3(const uint16_t* src_ptr,
} }
#endif #endif
#ifdef HAS_SCALEROWUP2LINEAR_16_SSE2 #ifdef HAS_SCALEROWUP2_LINEAR_16_SSE2
void ScaleRowUp2_Linear_16_SSE2(const uint16_t* src_ptr, void ScaleRowUp2_Linear_16_SSE2(const uint16_t* src_ptr,
uint16_t* dst_ptr, uint16_t* dst_ptr,
int dst_width) { int dst_width) {
@ -1149,7 +1149,7 @@ void ScaleRowUp2_Linear_16_SSE2(const uint16_t* src_ptr,
} }
#endif #endif
#ifdef HAS_SCALEROWUP2BILINEAR_16_SSE2 #ifdef HAS_SCALEROWUP2_BILINEAR_16_SSE2
void ScaleRowUp2_Bilinear_16_SSE2(const uint16_t* src_ptr, void ScaleRowUp2_Bilinear_16_SSE2(const uint16_t* src_ptr,
ptrdiff_t src_stride, ptrdiff_t src_stride,
uint16_t* dst_ptr, uint16_t* dst_ptr,
@ -1254,11 +1254,12 @@ void ScaleRowUp2_Bilinear_16_SSE2(const uint16_t* src_ptr,
"+r"(dst_width) // %2 "+r"(dst_width) // %2
: "r"((intptr_t)(src_stride)), // %3 : "r"((intptr_t)(src_stride)), // %3
"r"((intptr_t)(dst_stride)) // %4 "r"((intptr_t)(dst_stride)) // %4
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
"xmm7");
} }
#endif #endif
#ifdef HAS_SCALEROWUP2LINEAR_SSSE3 #ifdef HAS_SCALEROWUP2_LINEAR_SSSE3
void ScaleRowUp2_Linear_SSSE3(const uint8_t* src_ptr, void ScaleRowUp2_Linear_SSSE3(const uint8_t* src_ptr,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int dst_width) { int dst_width) {
@ -1297,7 +1298,7 @@ void ScaleRowUp2_Linear_SSSE3(const uint8_t* src_ptr,
} }
#endif #endif
#ifdef HAS_SCALEROWUP2BILINEAR_SSSE3 #ifdef HAS_SCALEROWUP2_BILINEAR_SSSE3
void ScaleRowUp2_Bilinear_SSSE3(const uint8_t* src_ptr, void ScaleRowUp2_Bilinear_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride, ptrdiff_t src_stride,
uint8_t* dst_ptr, uint8_t* dst_ptr,
@ -1384,7 +1385,7 @@ void ScaleRowUp2_Bilinear_SSSE3(const uint8_t* src_ptr,
} }
#endif #endif
#ifdef HAS_SCALEROWUP2LINEAR_AVX2 #ifdef HAS_SCALEROWUP2_LINEAR_AVX2
void ScaleRowUp2_Linear_AVX2(const uint8_t* src_ptr, void ScaleRowUp2_Linear_AVX2(const uint8_t* src_ptr,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int dst_width) { int dst_width) {
@ -1426,7 +1427,7 @@ void ScaleRowUp2_Linear_AVX2(const uint8_t* src_ptr,
} }
#endif #endif
#ifdef HAS_SCALEROWUP2BILINEAR_AVX2 #ifdef HAS_SCALEROWUP2_BILINEAR_AVX2
void ScaleRowUp2_Bilinear_AVX2(const uint8_t* src_ptr, void ScaleRowUp2_Bilinear_AVX2(const uint8_t* src_ptr,
ptrdiff_t src_stride, ptrdiff_t src_stride,
uint8_t* dst_ptr, uint8_t* dst_ptr,
@ -1510,7 +1511,7 @@ void ScaleRowUp2_Bilinear_AVX2(const uint8_t* src_ptr,
} }
#endif #endif
#ifdef HAS_SCALEROWUP2LINEAR_12_AVX2 #ifdef HAS_SCALEROWUP2_LINEAR_12_AVX2
void ScaleRowUp2_Linear_12_AVX2(const uint16_t* src_ptr, void ScaleRowUp2_Linear_12_AVX2(const uint16_t* src_ptr,
uint16_t* dst_ptr, uint16_t* dst_ptr,
int dst_width) { int dst_width) {
@ -1560,7 +1561,7 @@ void ScaleRowUp2_Linear_12_AVX2(const uint16_t* src_ptr,
} }
#endif #endif
#ifdef HAS_SCALEROWUP2BILINEAR_12_AVX2 #ifdef HAS_SCALEROWUP2_BILINEAR_12_AVX2
void ScaleRowUp2_Bilinear_12_AVX2(const uint16_t* src_ptr, void ScaleRowUp2_Bilinear_12_AVX2(const uint16_t* src_ptr,
ptrdiff_t src_stride, ptrdiff_t src_stride,
uint16_t* dst_ptr, uint16_t* dst_ptr,
@ -1624,7 +1625,7 @@ void ScaleRowUp2_Bilinear_12_AVX2(const uint16_t* src_ptr,
} }
#endif #endif
#ifdef HAS_SCALEROWUP2LINEAR_16_AVX2 #ifdef HAS_SCALEROWUP2_LINEAR_16_AVX2
void ScaleRowUp2_Linear_16_AVX2(const uint16_t* src_ptr, void ScaleRowUp2_Linear_16_AVX2(const uint16_t* src_ptr,
uint16_t* dst_ptr, uint16_t* dst_ptr,
int dst_width) { int dst_width) {
@ -1672,7 +1673,7 @@ void ScaleRowUp2_Linear_16_AVX2(const uint16_t* src_ptr,
} }
#endif #endif
#ifdef HAS_SCALEROWUP2BILINEAR_16_AVX2 #ifdef HAS_SCALEROWUP2_BILINEAR_16_AVX2
void ScaleRowUp2_Bilinear_16_AVX2(const uint16_t* src_ptr, void ScaleRowUp2_Bilinear_16_AVX2(const uint16_t* src_ptr,
ptrdiff_t src_stride, ptrdiff_t src_stride,
uint16_t* dst_ptr, uint16_t* dst_ptr,
@ -2325,13 +2326,18 @@ int FixedDiv1_X86(int num, int div) {
return num; return num;
} }
#ifdef HAS_SCALEUVROWDOWN2BOX_SSSE3 #if defined(HAS_SCALEUVROWDOWN2BOX_SSSE3) || \
defined(HAS_SCALEUVROWDOWN2BOX_AVX2)
// Shuffle table for splitting UV into upper and lower part of register. // Shuffle table for splitting UV into upper and lower part of register.
static const uvec8 kShuffleSplitUV = {0u, 2u, 4u, 6u, 8u, 10u, 12u, 14u, static const uvec8 kShuffleSplitUV = {0u, 2u, 4u, 6u, 8u, 10u, 12u, 14u,
1u, 3u, 5u, 7u, 9u, 11u, 13u, 15u}; 1u, 3u, 5u, 7u, 9u, 11u, 13u, 15u};
static const uvec8 kShuffleMergeUV = {0u, 8u, 2u, 10u, 4u, 12u, static const uvec8 kShuffleMergeUV = {0u, 8u, 2u, 10u, 4u, 12u,
6u, 14u, 0x80, 0x80, 0x80, 0x80, 6u, 14u, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80}; 0x80, 0x80, 0x80, 0x80};
#endif
#ifdef HAS_SCALEUVROWDOWN2BOX_SSSE3
void ScaleUVRowDown2Box_SSSE3(const uint8_t* src_ptr, void ScaleUVRowDown2Box_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride, ptrdiff_t src_stride,
@ -2417,7 +2423,7 @@ void ScaleUVRowDown2Box_AVX2(const uint8_t* src_ptr,
static const uvec8 kUVLinearMadd31 = {3, 1, 3, 1, 1, 3, 1, 3, static const uvec8 kUVLinearMadd31 = {3, 1, 3, 1, 1, 3, 1, 3,
3, 1, 3, 1, 1, 3, 1, 3}; 3, 1, 3, 1, 1, 3, 1, 3};
#ifdef HAS_SCALEUVROWUP2LINEAR_SSSE3 #ifdef HAS_SCALEUVROWUP2_LINEAR_SSSE3
void ScaleUVRowUp2_Linear_SSSE3(const uint8_t* src_ptr, void ScaleUVRowUp2_Linear_SSSE3(const uint8_t* src_ptr,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int dst_width) { int dst_width) {
@ -2456,7 +2462,7 @@ void ScaleUVRowUp2_Linear_SSSE3(const uint8_t* src_ptr,
} }
#endif #endif
#ifdef HAS_SCALEUVROWUP2BILINEAR_SSSE3 #ifdef HAS_SCALEUVROWUP2_BILINEAR_SSSE3
void ScaleUVRowUp2_Bilinear_SSSE3(const uint8_t* src_ptr, void ScaleUVRowUp2_Bilinear_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride, ptrdiff_t src_stride,
uint8_t* dst_ptr, uint8_t* dst_ptr,
@ -2541,7 +2547,7 @@ void ScaleUVRowUp2_Bilinear_SSSE3(const uint8_t* src_ptr,
} }
#endif #endif
#ifdef HAS_SCALEUVROWUP2LINEAR_AVX2 #ifdef HAS_SCALEUVROWUP2_LINEAR_AVX2
void ScaleUVRowUp2_Linear_AVX2(const uint8_t* src_ptr, void ScaleUVRowUp2_Linear_AVX2(const uint8_t* src_ptr,
uint8_t* dst_ptr, uint8_t* dst_ptr,
@ -2583,7 +2589,7 @@ void ScaleUVRowUp2_Linear_AVX2(const uint8_t* src_ptr,
} }
#endif #endif
#ifdef HAS_SCALEUVROWUP2BILINEAR_AVX2 #ifdef HAS_SCALEUVROWUP2_BILINEAR_AVX2
void ScaleUVRowUp2_Bilinear_AVX2(const uint8_t* src_ptr, void ScaleUVRowUp2_Bilinear_AVX2(const uint8_t* src_ptr,
ptrdiff_t src_stride, ptrdiff_t src_stride,
uint8_t* dst_ptr, uint8_t* dst_ptr,
@ -2665,7 +2671,7 @@ void ScaleUVRowUp2_Bilinear_AVX2(const uint8_t* src_ptr,
} }
#endif #endif
#ifdef HAS_SCALEUVROWUP2LINEAR_16_SSE41 #ifdef HAS_SCALEUVROWUP2_LINEAR_16_SSE41
void ScaleUVRowUp2_Linear_16_SSE41(const uint16_t* src_ptr, void ScaleUVRowUp2_Linear_16_SSE41(const uint16_t* src_ptr,
uint16_t* dst_ptr, uint16_t* dst_ptr,
int dst_width) { int dst_width) {
@ -2715,7 +2721,7 @@ void ScaleUVRowUp2_Linear_16_SSE41(const uint16_t* src_ptr,
} }
#endif #endif
#ifdef HAS_SCALEUVROWUP2BILINEAR_16_SSE41 #ifdef HAS_SCALEUVROWUP2_BILINEAR_16_SSE41
void ScaleUVRowUp2_Bilinear_16_SSE41(const uint16_t* src_ptr, void ScaleUVRowUp2_Bilinear_16_SSE41(const uint16_t* src_ptr,
ptrdiff_t src_stride, ptrdiff_t src_stride,
uint16_t* dst_ptr, uint16_t* dst_ptr,
@ -2808,7 +2814,7 @@ void ScaleUVRowUp2_Bilinear_16_SSE41(const uint16_t* src_ptr,
} }
#endif #endif
#ifdef HAS_SCALEUVROWUP2LINEAR_16_AVX2 #ifdef HAS_SCALEUVROWUP2_LINEAR_16_AVX2
void ScaleUVRowUp2_Linear_16_AVX2(const uint16_t* src_ptr, void ScaleUVRowUp2_Linear_16_AVX2(const uint16_t* src_ptr,
uint16_t* dst_ptr, uint16_t* dst_ptr,
int dst_width) { int dst_width) {
@ -2855,7 +2861,7 @@ void ScaleUVRowUp2_Linear_16_AVX2(const uint16_t* src_ptr,
} }
#endif #endif
#ifdef HAS_SCALEUVROWUP2BILINEAR_16_AVX2 #ifdef HAS_SCALEUVROWUP2_BILINEAR_16_AVX2
void ScaleUVRowUp2_Bilinear_16_AVX2(const uint16_t* src_ptr, void ScaleUVRowUp2_Bilinear_16_AVX2(const uint16_t* src_ptr,
ptrdiff_t src_stride, ptrdiff_t src_stride,
uint16_t* dst_ptr, uint16_t* dst_ptr,

View File

@ -1673,97 +1673,45 @@ TEST_F(LibYUVPlanarTest, TestDetilePlane) {
free_aligned_buffer_page_end(dst_opt); free_aligned_buffer_page_end(dst_opt);
} }
TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Benchmark) { // Compares DetileSplitUV to 2 step Detile + SplitUV
TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Correctness) {
int i, j; int i, j;
// orig is tiled. Allocate enough memory for tiles. // orig is tiled. Allocate enough memory for tiles.
int orig_width = (benchmark_width_ + 15) & ~15; int orig_width = (benchmark_width_ + 15) & ~15;
int orig_height = (benchmark_height_ + 15) & ~15; int orig_height = (benchmark_height_ + 15) & ~15;
int orig_plane_size = orig_width * orig_height; int orig_plane_size = orig_width * orig_height;
int u_plane_size = benchmark_width_ * benchmark_height_; int uv_plane_size = ((benchmark_width_ + 1) / 2) * benchmark_height_;
int v_plane_size = u_plane_size;
align_buffer_page_end(orig_uv, orig_plane_size);
align_buffer_page_end(dst_u_c, u_plane_size);
align_buffer_page_end(dst_u_opt, u_plane_size);
align_buffer_page_end(dst_v_c, v_plane_size);
align_buffer_page_end(dst_v_opt, v_plane_size);
MemRandomize(orig_uv, orig_plane_size);
memset(dst_u_c, 0, u_plane_size);
memset(dst_u_opt, 0, u_plane_size);
memset(dst_v_c, 0, v_plane_size);
memset(dst_v_opt, 0, v_plane_size);
// Disable all optimizations.
MaskCpuFlags(disable_cpu_flags_);
for (j = 0; j < benchmark_iterations_; j++) {
DetileSplitUVPlane(orig_uv, orig_width, dst_u_c, (benchmark_width_ + 1) / 2,
dst_v_c, (benchmark_width_ + 1) / 2, benchmark_width_,
benchmark_height_, 16);
}
// Enable optimizations.
MaskCpuFlags(benchmark_cpu_info_);
for (j = 0; j < benchmark_iterations_; j++) {
DetileSplitUVPlane(
orig_uv, orig_width, dst_u_opt, (benchmark_width_ + 1) / 2, dst_v_opt,
(benchmark_width_ + 1) / 2, benchmark_width_, benchmark_height_, 16);
}
for (i = 0; i < u_plane_size; ++i) {
EXPECT_EQ(dst_u_c[i], dst_u_opt[i]);
}
for (i = 0; i < v_plane_size; ++i) {
EXPECT_EQ(dst_v_c[i], dst_v_opt[i]);
}
free_aligned_buffer_page_end(orig_uv);
free_aligned_buffer_page_end(dst_u_c);
free_aligned_buffer_page_end(dst_u_opt);
free_aligned_buffer_page_end(dst_v_c);
free_aligned_buffer_page_end(dst_v_opt);
}
// TODO(b/228518489): Fix Segmentation fault in this test
TEST_F(LibYUVPlanarTest, DISABLED_TestDetileSplitUVPlane_Correctness) {
int i, j;
// orig is tiled. Allocate enough memory for tiles.
int orig_width = (benchmark_width_ + 15) & ~15;
int orig_height = (benchmark_height_ + 15) & ~15;
int orig_plane_size = orig_width * orig_height;
int u_plane_size = benchmark_width_ * benchmark_height_;
int v_plane_size = u_plane_size;
align_buffer_page_end(orig_uv, orig_plane_size); align_buffer_page_end(orig_uv, orig_plane_size);
align_buffer_page_end(detiled_uv, orig_plane_size); align_buffer_page_end(detiled_uv, orig_plane_size);
align_buffer_page_end(dst_u_two_stage, u_plane_size); align_buffer_page_end(dst_u_two_stage, uv_plane_size);
align_buffer_page_end(dst_u_opt, u_plane_size); align_buffer_page_end(dst_u_opt, uv_plane_size);
align_buffer_page_end(dst_v_two_stage, v_plane_size); align_buffer_page_end(dst_v_two_stage, uv_plane_size);
align_buffer_page_end(dst_v_opt, v_plane_size); align_buffer_page_end(dst_v_opt, uv_plane_size);
MemRandomize(orig_uv, orig_plane_size); MemRandomize(orig_uv, orig_plane_size);
memset(detiled_uv, 0, orig_plane_size); memset(detiled_uv, 0, orig_plane_size);
memset(dst_u_two_stage, 0, u_plane_size); memset(dst_u_two_stage, 0, uv_plane_size);
memset(dst_u_opt, 0, u_plane_size); memset(dst_u_opt, 0, uv_plane_size);
memset(dst_v_two_stage, 0, v_plane_size); memset(dst_v_two_stage, 0, uv_plane_size);
memset(dst_v_opt, 0, v_plane_size); memset(dst_v_opt, 0, uv_plane_size);
DetileSplitUVPlane(orig_uv, orig_width, dst_u_opt, (benchmark_width_ + 1) / 2,
dst_v_opt, (benchmark_width_ + 1) / 2, benchmark_width_,
benchmark_height_, 16);
// Benchmark 2 step conversion for comparison.
for (j = 0; j < benchmark_iterations_; j++) { for (j = 0; j < benchmark_iterations_; j++) {
DetileSplitUVPlane( DetilePlane(orig_uv, orig_width, detiled_uv, benchmark_width_,
orig_uv, orig_width, dst_u_opt, (benchmark_width_ + 1) / 2, dst_v_opt, benchmark_width_, benchmark_height_, 16);
(benchmark_width_ + 1) / 2, benchmark_width_, benchmark_height_, 16); SplitUVPlane(detiled_uv, orig_width, dst_u_two_stage,
(benchmark_width_ + 1) / 2, dst_v_two_stage,
(benchmark_width_ + 1) / 2, (benchmark_width_ + 1) / 2,
benchmark_height_);
} }
DetilePlane(orig_uv, orig_width, detiled_uv, benchmark_width_, for (i = 0; i < uv_plane_size; ++i) {
benchmark_width_, benchmark_height_, 16);
SplitUVPlane(detiled_uv, orig_width, dst_u_two_stage,
(benchmark_width_ + 1) / 2, dst_v_two_stage,
(benchmark_width_ + 1) / 2, benchmark_width_, benchmark_height_);
for (i = 0; i < u_plane_size; ++i) {
EXPECT_EQ(dst_u_two_stage[i], dst_u_opt[i]); EXPECT_EQ(dst_u_two_stage[i], dst_u_opt[i]);
}
for (i = 0; i < v_plane_size; ++i) {
EXPECT_EQ(dst_v_two_stage[i], dst_v_opt[i]); EXPECT_EQ(dst_v_two_stage[i], dst_v_opt[i]);
} }
@ -1775,6 +1723,54 @@ TEST_F(LibYUVPlanarTest, DISABLED_TestDetileSplitUVPlane_Correctness) {
free_aligned_buffer_page_end(dst_v_opt); free_aligned_buffer_page_end(dst_v_opt);
} }
TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Benchmark) {
int i, j;
// orig is tiled. Allocate enough memory for tiles.
int orig_width = (benchmark_width_ + 15) & ~15;
int orig_height = (benchmark_height_ + 15) & ~15;
int orig_plane_size = orig_width * orig_height;
int uv_plane_size = ((benchmark_width_ + 1) / 2) * benchmark_height_;
align_buffer_page_end(orig_uv, orig_plane_size);
align_buffer_page_end(dst_u_c, uv_plane_size);
align_buffer_page_end(dst_u_opt, uv_plane_size);
align_buffer_page_end(dst_v_c, uv_plane_size);
align_buffer_page_end(dst_v_opt, uv_plane_size);
MemRandomize(orig_uv, orig_plane_size);
memset(dst_u_c, 0, uv_plane_size);
memset(dst_u_opt, 0, uv_plane_size);
memset(dst_v_c, 0, uv_plane_size);
memset(dst_v_opt, 0, uv_plane_size);
// Disable all optimizations.
MaskCpuFlags(disable_cpu_flags_);
DetileSplitUVPlane(orig_uv, orig_width, dst_u_c, (benchmark_width_ + 1) / 2,
dst_v_c, (benchmark_width_ + 1) / 2, benchmark_width_,
benchmark_height_, 16);
// Enable optimizations.
MaskCpuFlags(benchmark_cpu_info_);
for (j = 0; j < benchmark_iterations_; j++) {
DetileSplitUVPlane(
orig_uv, orig_width, dst_u_opt, (benchmark_width_ + 1) / 2, dst_v_opt,
(benchmark_width_ + 1) / 2, benchmark_width_, benchmark_height_, 16);
}
for (i = 0; i < uv_plane_size; ++i) {
EXPECT_EQ(dst_u_c[i], dst_u_opt[i]);
EXPECT_EQ(dst_v_c[i], dst_v_opt[i]);
}
free_aligned_buffer_page_end(orig_uv);
free_aligned_buffer_page_end(dst_u_c);
free_aligned_buffer_page_end(dst_u_opt);
free_aligned_buffer_page_end(dst_v_c);
free_aligned_buffer_page_end(dst_v_opt);
}
static int TestMultiply(int width, static int TestMultiply(int width,
int height, int height,
int benchmark_iterations, int benchmark_iterations,