diff --git a/README.chromium b/README.chromium index 8fca6a3d3..d60ff00bd 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1833 +Version: 1834 License: BSD License File: LICENSE diff --git a/include/libyuv/scale_row.h b/include/libyuv/scale_row.h index 5c474b0ce..a96078b34 100644 --- a/include/libyuv/scale_row.h +++ b/include/libyuv/scale_row.h @@ -76,18 +76,19 @@ extern "C" { // TODO(fbarchard): Port to Visual C #if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__)) #define HAS_SCALEUVROWDOWN2BOX_SSSE3 -#define HAS_SCALEROWUP2LINEAR_SSE2 -#define HAS_SCALEROWUP2LINEAR_SSSE3 -#define HAS_SCALEROWUP2BILINEAR_SSE2 -#define HAS_SCALEROWUP2BILINEAR_SSSE3 -#define HAS_SCALEROWUP2LINEAR_12_SSSE3 -#define HAS_SCALEROWUP2BILINEAR_12_SSSE3 -#define HAS_SCALEROWUP2LINEAR_16_SSE2 -#define HAS_SCALEROWUP2BILINEAR_16_SSE2 -#define HAS_SCALEUVROWUP2LINEAR_SSSE3 -#define HAS_SCALEUVROWUP2BILINEAR_SSSE3 -#define HAS_SCALEUVROWUP2LINEAR_16_SSE41 -#define HAS_SCALEUVROWUP2BILINEAR_16_SSE41 +#define HAS_SCALEROWUP2_LINEAR_SSE2 +#define HAS_SCALEROWUP2_LINEAR_SSSE3 +#define HAS_SCALEROWUP2_BILINEAR_SSE2 +#define HAS_SCALEROWUP2_BILINEAR_SSSE3 +#define HAS_SCALEROWUP2_LINEAR_12_SSSE3 +#define HAS_SCALEROWUP2_BILINEAR_12_SSSE3 +#define HAS_SCALEROWUP2_LINEAR_16_SSE2 +// TODO(libyuv:882): Fix SSE2 version of BILINEAR +//#define HAS_SCALEROWUP2_BILINEAR_16_SSE2 +#define HAS_SCALEUVROWUP2_LINEAR_SSSE3 +#define HAS_SCALEUVROWUP2_BILINEAR_SSSE3 +#define HAS_SCALEUVROWUP2_LINEAR_16_SSE41 +#define HAS_SCALEUVROWUP2_BILINEAR_16_SSE41 #endif // The following are available for gcc/clang x86 platforms, but @@ -97,16 +98,16 @@ extern "C" { (defined(__x86_64__) || defined(__i386__)) && \ (defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2)) #define HAS_SCALEUVROWDOWN2BOX_AVX2 -#define HAS_SCALEROWUP2LINEAR_AVX2 -#define HAS_SCALEROWUP2BILINEAR_AVX2 -#define HAS_SCALEROWUP2LINEAR_12_AVX2 -#define HAS_SCALEROWUP2BILINEAR_12_AVX2 -#define HAS_SCALEROWUP2LINEAR_16_AVX2 -#define HAS_SCALEROWUP2BILINEAR_16_AVX2 -#define HAS_SCALEUVROWUP2LINEAR_AVX2 -#define HAS_SCALEUVROWUP2BILINEAR_AVX2 -#define HAS_SCALEUVROWUP2LINEAR_16_AVX2 -#define HAS_SCALEUVROWUP2BILINEAR_16_AVX2 +#define HAS_SCALEROWUP2_LINEAR_AVX2 +#define HAS_SCALEROWUP2_BILINEAR_AVX2 +#define HAS_SCALEROWUP2_LINEAR_12_AVX2 +#define HAS_SCALEROWUP2_BILINEAR_12_AVX2 +#define HAS_SCALEROWUP2_LINEAR_16_AVX2 +#define HAS_SCALEROWUP2_BILINEAR_16_AVX2 +#define HAS_SCALEUVROWUP2_LINEAR_AVX2 +#define HAS_SCALEUVROWUP2_BILINEAR_AVX2 +#define HAS_SCALEUVROWUP2_LINEAR_16_AVX2 +#define HAS_SCALEUVROWUP2_BILINEAR_16_AVX2 #endif // The following are available on all x86 platforms, but @@ -135,16 +136,16 @@ extern "C" { #define HAS_SCALEROWDOWN4_NEON #define HAS_SCALEUVROWDOWN2BOX_NEON #define HAS_SCALEUVROWDOWNEVEN_NEON -#define HAS_SCALEROWUP2LINEAR_NEON -#define HAS_SCALEROWUP2BILINEAR_NEON -#define HAS_SCALEROWUP2LINEAR_12_NEON -#define HAS_SCALEROWUP2BILINEAR_12_NEON -#define HAS_SCALEROWUP2LINEAR_16_NEON -#define HAS_SCALEROWUP2BILINEAR_16_NEON -#define HAS_SCALEUVROWUP2LINEAR_NEON -#define HAS_SCALEUVROWUP2BILINEAR_NEON -#define HAS_SCALEUVROWUP2LINEAR_16_NEON -#define HAS_SCALEUVROWUP2BILINEAR_16_NEON +#define HAS_SCALEROWUP2_LINEAR_NEON +#define HAS_SCALEROWUP2_BILINEAR_NEON +#define HAS_SCALEROWUP2_LINEAR_12_NEON +#define HAS_SCALEROWUP2_BILINEAR_12_NEON +#define HAS_SCALEROWUP2_LINEAR_16_NEON +#define HAS_SCALEROWUP2_BILINEAR_16_NEON +#define HAS_SCALEUVROWUP2_LINEAR_NEON +#define HAS_SCALEUVROWUP2_BILINEAR_NEON +#define HAS_SCALEUVROWUP2_LINEAR_16_NEON +#define HAS_SCALEUVROWUP2_BILINEAR_16_NEON #endif #if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 7cc55698a..eda5078bc 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1833 +#define LIBYUV_VERSION 1834 #endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/source/convert_argb.cc b/source/convert_argb.cc index 942df30a0..71ef8c108 100644 --- a/source/convert_argb.cc +++ b/source/convert_argb.cc @@ -5454,25 +5454,25 @@ static int I420ToARGBMatrixBilinear(const uint8_t* src_y, } #endif -#if defined(HAS_SCALEROWUP2LINEAR_SSE2) +#if defined(HAS_SCALEROWUP2_LINEAR_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSE2; } #endif -#if defined(HAS_SCALEROWUP2LINEAR_SSSE3) +#if defined(HAS_SCALEROWUP2_LINEAR_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSSE3; } #endif -#if defined(HAS_SCALEROWUP2LINEAR_AVX2) +#if defined(HAS_SCALEROWUP2_LINEAR_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { Scale2RowUp = ScaleRowUp2_Bilinear_Any_AVX2; } #endif -#if defined(HAS_SCALEROWUP2LINEAR_NEON) +#if defined(HAS_SCALEROWUP2_LINEAR_NEON) if (TestCpuFlag(kCpuHasNEON)) { Scale2RowUp = ScaleRowUp2_Bilinear_Any_NEON; } @@ -5582,22 +5582,22 @@ static int I422ToARGBMatrixLinear(const uint8_t* src_y, } } #endif -#if defined(HAS_SCALEROWUP2LINEAR_SSE2) +#if defined(HAS_SCALEROWUP2_LINEAR_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { ScaleRowUp = ScaleRowUp2_Linear_Any_SSE2; } #endif -#if defined(HAS_SCALEROWUP2LINEAR_SSSE3) +#if defined(HAS_SCALEROWUP2_LINEAR_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ScaleRowUp = ScaleRowUp2_Linear_Any_SSSE3; } #endif -#if defined(HAS_SCALEROWUP2LINEAR_AVX2) +#if defined(HAS_SCALEROWUP2_LINEAR_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ScaleRowUp = ScaleRowUp2_Linear_Any_AVX2; } #endif -#if defined(HAS_SCALEROWUP2LINEAR_NEON) +#if defined(HAS_SCALEROWUP2_LINEAR_NEON) if (TestCpuFlag(kCpuHasNEON)) { ScaleRowUp = ScaleRowUp2_Linear_Any_NEON; } @@ -5668,19 +5668,19 @@ static int I010ToAR30MatrixBilinear(const uint16_t* src_y, } #endif -#if defined(HAS_SCALEROWUP2LINEAR_12_SSSE3) +#if defined(HAS_SCALEROWUP2_LINEAR_12_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_SSSE3; } #endif -#if defined(HAS_SCALEROWUP2LINEAR_12_AVX2) +#if defined(HAS_SCALEROWUP2_LINEAR_12_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_AVX2; } #endif -#if defined(HAS_SCALEROWUP2LINEAR_12_NEON) +#if defined(HAS_SCALEROWUP2_LINEAR_12_NEON) if (TestCpuFlag(kCpuHasNEON)) { Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_NEON; } @@ -5768,17 +5768,17 @@ static int I210ToAR30MatrixLinear(const uint16_t* src_y, } #endif -#if defined(HAS_SCALEROWUP2LINEAR_12_SSSE3) +#if defined(HAS_SCALEROWUP2_LINEAR_12_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ScaleRowUp = ScaleRowUp2_Linear_12_Any_SSSE3; } #endif -#if defined(HAS_SCALEROWUP2LINEAR_12_AVX2) +#if defined(HAS_SCALEROWUP2_LINEAR_12_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ScaleRowUp = ScaleRowUp2_Linear_12_Any_AVX2; } #endif -#if defined(HAS_SCALEROWUP2LINEAR_12_NEON) +#if defined(HAS_SCALEROWUP2_LINEAR_12_NEON) if (TestCpuFlag(kCpuHasNEON)) { ScaleRowUp = ScaleRowUp2_Linear_12_Any_NEON; } @@ -5848,19 +5848,19 @@ static int I010ToARGBMatrixBilinear(const uint16_t* src_y, } #endif -#if defined(HAS_SCALEROWUP2LINEAR_12_SSSE3) +#if defined(HAS_SCALEROWUP2_LINEAR_12_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_SSSE3; } #endif -#if defined(HAS_SCALEROWUP2LINEAR_12_AVX2) +#if defined(HAS_SCALEROWUP2_LINEAR_12_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_AVX2; } #endif -#if defined(HAS_SCALEROWUP2LINEAR_12_NEON) +#if defined(HAS_SCALEROWUP2_LINEAR_12_NEON) if (TestCpuFlag(kCpuHasNEON)) { Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_NEON; } @@ -5947,17 +5947,17 @@ static int I210ToARGBMatrixLinear(const uint16_t* src_y, } #endif -#if defined(HAS_SCALEROWUP2LINEAR_12_SSSE3) +#if defined(HAS_SCALEROWUP2_LINEAR_12_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ScaleRowUp = ScaleRowUp2_Linear_12_Any_SSSE3; } #endif -#if defined(HAS_SCALEROWUP2LINEAR_12_AVX2) +#if defined(HAS_SCALEROWUP2_LINEAR_12_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ScaleRowUp = ScaleRowUp2_Linear_12_Any_AVX2; } #endif -#if defined(HAS_SCALEROWUP2LINEAR_12_NEON) +#if defined(HAS_SCALEROWUP2_LINEAR_12_NEON) if (TestCpuFlag(kCpuHasNEON)) { ScaleRowUp = ScaleRowUp2_Linear_12_Any_NEON; } @@ -6091,25 +6091,25 @@ static int I420AlphaToARGBMatrixBilinear( } } #endif -#if defined(HAS_SCALEROWUP2LINEAR_SSE2) +#if defined(HAS_SCALEROWUP2_LINEAR_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSE2; } #endif -#if defined(HAS_SCALEROWUP2LINEAR_SSSE3) +#if defined(HAS_SCALEROWUP2_LINEAR_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSSE3; } #endif -#if defined(HAS_SCALEROWUP2LINEAR_AVX2) +#if defined(HAS_SCALEROWUP2_LINEAR_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { Scale2RowUp = ScaleRowUp2_Bilinear_Any_AVX2; } #endif -#if defined(HAS_SCALEROWUP2LINEAR_NEON) +#if defined(HAS_SCALEROWUP2_LINEAR_NEON) if (TestCpuFlag(kCpuHasNEON)) { Scale2RowUp = ScaleRowUp2_Bilinear_Any_NEON; } @@ -6277,22 +6277,22 @@ static int I422AlphaToARGBMatrixLinear(const uint8_t* src_y, } } #endif -#if defined(HAS_SCALEROWUP2LINEAR_SSE2) +#if defined(HAS_SCALEROWUP2_LINEAR_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { ScaleRowUp = ScaleRowUp2_Linear_Any_SSE2; } #endif -#if defined(HAS_SCALEROWUP2LINEAR_SSSE3) +#if defined(HAS_SCALEROWUP2_LINEAR_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ScaleRowUp = ScaleRowUp2_Linear_Any_SSSE3; } #endif -#if defined(HAS_SCALEROWUP2LINEAR_AVX2) +#if defined(HAS_SCALEROWUP2_LINEAR_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ScaleRowUp = ScaleRowUp2_Linear_Any_AVX2; } #endif -#if defined(HAS_SCALEROWUP2LINEAR_NEON) +#if defined(HAS_SCALEROWUP2_LINEAR_NEON) if (TestCpuFlag(kCpuHasNEON)) { ScaleRowUp = ScaleRowUp2_Linear_Any_NEON; } @@ -6408,19 +6408,19 @@ static int I010AlphaToARGBMatrixBilinear( } #endif -#if defined(HAS_SCALEROWUP2LINEAR_12_SSSE3) +#if defined(HAS_SCALEROWUP2_LINEAR_12_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_SSSE3; } #endif -#if defined(HAS_SCALEROWUP2LINEAR_12_AVX2) +#if defined(HAS_SCALEROWUP2_LINEAR_12_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_AVX2; } #endif -#if defined(HAS_SCALEROWUP2LINEAR_12_NEON) +#if defined(HAS_SCALEROWUP2_LINEAR_12_NEON) if (TestCpuFlag(kCpuHasNEON)) { Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_NEON; } @@ -6565,17 +6565,17 @@ static int I210AlphaToARGBMatrixLinear(const uint16_t* src_y, } #endif -#if defined(HAS_SCALEROWUP2LINEAR_12_SSSE3) +#if defined(HAS_SCALEROWUP2_LINEAR_12_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ScaleRowUp = ScaleRowUp2_Linear_12_Any_SSSE3; } #endif -#if defined(HAS_SCALEROWUP2LINEAR_12_AVX2) +#if defined(HAS_SCALEROWUP2_LINEAR_12_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ScaleRowUp = ScaleRowUp2_Linear_12_Any_AVX2; } #endif -#if defined(HAS_SCALEROWUP2LINEAR_12_NEON) +#if defined(HAS_SCALEROWUP2_LINEAR_12_NEON) if (TestCpuFlag(kCpuHasNEON)) { ScaleRowUp = ScaleRowUp2_Linear_12_Any_NEON; } @@ -6647,19 +6647,19 @@ static int P010ToARGBMatrixBilinear(const uint16_t* src_y, } #endif -#ifdef HAS_SCALEUVROWUP2BILINEAR_16_SSE41 +#ifdef HAS_SCALEUVROWUP2_BILINEAR_16_SSE41 if (TestCpuFlag(kCpuHasSSE41)) { Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_SSE41; } #endif -#ifdef HAS_SCALEUVROWUP2BILINEAR_16_AVX2 +#ifdef HAS_SCALEUVROWUP2_BILINEAR_16_AVX2 if (TestCpuFlag(kCpuHasAVX2)) { Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_AVX2; } #endif -#ifdef HAS_SCALEUVROWUP2BILINEAR_16_NEON +#ifdef HAS_SCALEUVROWUP2_BILINEAR_16_NEON if (TestCpuFlag(kCpuHasNEON)) { Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_NEON; } @@ -6737,19 +6737,19 @@ static int P210ToARGBMatrixLinear(const uint16_t* src_y, } #endif -#ifdef HAS_SCALEUVROWUP2LINEAR_16_SSE41 +#ifdef HAS_SCALEUVROWUP2_LINEAR_16_SSE41 if (TestCpuFlag(kCpuHasSSE41)) { ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_SSE41; } #endif -#ifdef HAS_SCALEUVROWUP2LINEAR_16_AVX2 +#ifdef HAS_SCALEUVROWUP2_LINEAR_16_AVX2 if (TestCpuFlag(kCpuHasAVX2)) { ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_AVX2; } #endif -#ifdef HAS_SCALEUVROWUP2LINEAR_16_NEON +#ifdef HAS_SCALEUVROWUP2_LINEAR_16_NEON if (TestCpuFlag(kCpuHasNEON)) { ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_NEON; } @@ -6813,19 +6813,19 @@ static int P010ToAR30MatrixBilinear(const uint16_t* src_y, } #endif -#ifdef HAS_SCALEUVROWUP2BILINEAR_16_SSE41 +#ifdef HAS_SCALEUVROWUP2_BILINEAR_16_SSE41 if (TestCpuFlag(kCpuHasSSE41)) { Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_SSE41; } #endif -#ifdef HAS_SCALEUVROWUP2BILINEAR_16_AVX2 +#ifdef HAS_SCALEUVROWUP2_BILINEAR_16_AVX2 if (TestCpuFlag(kCpuHasAVX2)) { Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_AVX2; } #endif -#ifdef HAS_SCALEUVROWUP2BILINEAR_16_NEON +#ifdef HAS_SCALEUVROWUP2_BILINEAR_16_NEON if (TestCpuFlag(kCpuHasNEON)) { Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_NEON; } @@ -6903,19 +6903,19 @@ static int P210ToAR30MatrixLinear(const uint16_t* src_y, } #endif -#ifdef HAS_SCALEUVROWUP2LINEAR_16_SSE41 +#ifdef HAS_SCALEUVROWUP2_LINEAR_16_SSE41 if (TestCpuFlag(kCpuHasSSE41)) { ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_SSE41; } #endif -#ifdef HAS_SCALEUVROWUP2LINEAR_16_AVX2 +#ifdef HAS_SCALEUVROWUP2_LINEAR_16_AVX2 if (TestCpuFlag(kCpuHasAVX2)) { ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_AVX2; } #endif -#ifdef HAS_SCALEUVROWUP2LINEAR_16_NEON +#ifdef HAS_SCALEUVROWUP2_LINEAR_16_NEON if (TestCpuFlag(kCpuHasNEON)) { ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_NEON; } diff --git a/source/row_common.cc b/source/row_common.cc index 150f48db4..f4e987621 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -2663,45 +2663,6 @@ void RGB24MirrorRow_C(const uint8_t* src_rgb24, uint8_t* dst_rgb24, int width) { } } -void DetileRow_C(const uint8_t* src, - ptrdiff_t src_tile_stride, - uint8_t* dst, - int width) { - int x; - for (x = 0; x < width - 15; x += 16) { - memcpy(dst, src, 16); - dst += 16; - src += src_tile_stride; - } - if (width & 15) { - memcpy(dst, src, width & 15); - } -} - -void DetileSplitUVRow_C(const uint8_t* src_uv, - ptrdiff_t src_tile_stride, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - int tile; - for (tile = 0; tile < width / 16; tile++) { - for (int x = 0; x < 8; x++) { - *dst_u++ = src_uv[0]; - *dst_v++ = src_uv[1]; - src_uv += 2; - } - src_uv += src_tile_stride - 16; - } - for (int x = 0; x < (width & 0xF) / 2; ++x) { - *dst_u = *src_uv; - dst_u++; - src_uv++; - *dst_v = *src_uv; - dst_v++; - src_uv++; - } -} - void SplitUVRow_C(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, @@ -2738,6 +2699,38 @@ void MergeUVRow_C(const uint8_t* src_u, } } +void DetileRow_C(const uint8_t* src, + ptrdiff_t src_tile_stride, + uint8_t* dst, + int width) { + int x; + for (x = 0; x < width - 15; x += 16) { + memcpy(dst, src, 16); + dst += 16; + src += src_tile_stride; + } + if (width & 15) { + memcpy(dst, src, width & 15); + } +} + +void DetileSplitUVRow_C(const uint8_t* src_uv, + ptrdiff_t src_tile_stride, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + int x; + for (x = 0; x < width - 15; x += 16) { + SplitUVRow_C(src_uv, dst_u, dst_v, 8); + dst_u += 8; + dst_v += 8; + src_uv += src_tile_stride; + } + if (width & 15) { + SplitUVRow_C(src_uv, dst_u, dst_v, ((width & 15) + 1) / 2); + } +} + void SplitRGBRow_C(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g, diff --git a/source/scale.cc b/source/scale.cc index 8147c291c..d673536bb 100644 --- a/source/scale.cc +++ b/source/scale.cc @@ -1384,25 +1384,25 @@ void ScalePlaneUp2_Linear(int src_width, // This function can only scale up by 2 times horizontally. assert(src_width == ((dst_width + 1) / 2)); -#ifdef HAS_SCALEROWUP2LINEAR_SSE2 +#ifdef HAS_SCALEROWUP2_LINEAR_SSE2 if (TestCpuFlag(kCpuHasSSE2)) { ScaleRowUp = ScaleRowUp2_Linear_Any_SSE2; } #endif -#ifdef HAS_SCALEROWUP2LINEAR_SSSE3 +#ifdef HAS_SCALEROWUP2_LINEAR_SSSE3 if (TestCpuFlag(kCpuHasSSSE3)) { ScaleRowUp = ScaleRowUp2_Linear_Any_SSSE3; } #endif -#ifdef HAS_SCALEROWUP2LINEAR_AVX2 +#ifdef HAS_SCALEROWUP2_LINEAR_AVX2 if (TestCpuFlag(kCpuHasAVX2)) { ScaleRowUp = ScaleRowUp2_Linear_Any_AVX2; } #endif -#ifdef HAS_SCALEROWUP2LINEAR_NEON +#ifdef HAS_SCALEROWUP2_LINEAR_NEON if (TestCpuFlag(kCpuHasNEON)) { ScaleRowUp = ScaleRowUp2_Linear_Any_NEON; } @@ -1443,25 +1443,25 @@ void ScalePlaneUp2_Bilinear(int src_width, assert(src_width == ((dst_width + 1) / 2)); assert(src_height == ((dst_height + 1) / 2)); -#ifdef HAS_SCALEROWUP2BILINEAR_SSE2 +#ifdef HAS_SCALEROWUP2_BILINEAR_SSE2 if (TestCpuFlag(kCpuHasSSE2)) { Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSE2; } #endif -#ifdef HAS_SCALEROWUP2BILINEAR_SSSE3 +#ifdef HAS_SCALEROWUP2_BILINEAR_SSSE3 if (TestCpuFlag(kCpuHasSSSE3)) { Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSSE3; } #endif -#ifdef HAS_SCALEROWUP2BILINEAR_AVX2 +#ifdef HAS_SCALEROWUP2_BILINEAR_AVX2 if (TestCpuFlag(kCpuHasAVX2)) { Scale2RowUp = ScaleRowUp2_Bilinear_Any_AVX2; } #endif -#ifdef HAS_SCALEROWUP2BILINEAR_NEON +#ifdef HAS_SCALEROWUP2_BILINEAR_NEON if (TestCpuFlag(kCpuHasNEON)) { Scale2RowUp = ScaleRowUp2_Bilinear_Any_NEON; } @@ -1503,19 +1503,19 @@ void ScalePlaneUp2_12_Linear(int src_width, // This function can only scale up by 2 times horizontally. assert(src_width == ((dst_width + 1) / 2)); -#ifdef HAS_SCALEROWUP2LINEAR_12_SSSE3 +#ifdef HAS_SCALEROWUP2_LINEAR_12_SSSE3 if (TestCpuFlag(kCpuHasSSSE3)) { ScaleRowUp = ScaleRowUp2_Linear_12_Any_SSSE3; } #endif -#ifdef HAS_SCALEROWUP2LINEAR_12_AVX2 +#ifdef HAS_SCALEROWUP2_LINEAR_12_AVX2 if (TestCpuFlag(kCpuHasAVX2)) { ScaleRowUp = ScaleRowUp2_Linear_12_Any_AVX2; } #endif -#ifdef HAS_SCALEROWUP2LINEAR_12_NEON +#ifdef HAS_SCALEROWUP2_LINEAR_12_NEON if (TestCpuFlag(kCpuHasNEON)) { ScaleRowUp = ScaleRowUp2_Linear_12_Any_NEON; } @@ -1557,19 +1557,19 @@ void ScalePlaneUp2_12_Bilinear(int src_width, assert(src_width == ((dst_width + 1) / 2)); assert(src_height == ((dst_height + 1) / 2)); -#ifdef HAS_SCALEROWUP2BILINEAR_12_SSSE3 +#ifdef HAS_SCALEROWUP2_BILINEAR_12_SSSE3 if (TestCpuFlag(kCpuHasSSSE3)) { Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_SSSE3; } #endif -#ifdef HAS_SCALEROWUP2BILINEAR_12_AVX2 +#ifdef HAS_SCALEROWUP2_BILINEAR_12_AVX2 if (TestCpuFlag(kCpuHasAVX2)) { Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_AVX2; } #endif -#ifdef HAS_SCALEROWUP2BILINEAR_12_NEON +#ifdef HAS_SCALEROWUP2_BILINEAR_12_NEON if (TestCpuFlag(kCpuHasNEON)) { Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_NEON; } @@ -1604,19 +1604,19 @@ void ScalePlaneUp2_16_Linear(int src_width, // This function can only scale up by 2 times horizontally. assert(src_width == ((dst_width + 1) / 2)); -#ifdef HAS_SCALEROWUP2LINEAR_16_SSE2 +#ifdef HAS_SCALEROWUP2_LINEAR_16_SSE2 if (TestCpuFlag(kCpuHasSSE2)) { ScaleRowUp = ScaleRowUp2_Linear_16_Any_SSE2; } #endif -#ifdef HAS_SCALEROWUP2LINEAR_16_AVX2 +#ifdef HAS_SCALEROWUP2_LINEAR_16_AVX2 if (TestCpuFlag(kCpuHasAVX2)) { ScaleRowUp = ScaleRowUp2_Linear_16_Any_AVX2; } #endif -#ifdef HAS_SCALEROWUP2LINEAR_16_NEON +#ifdef HAS_SCALEROWUP2_LINEAR_16_NEON if (TestCpuFlag(kCpuHasNEON)) { ScaleRowUp = ScaleRowUp2_Linear_16_Any_NEON; } @@ -1653,19 +1653,19 @@ void ScalePlaneUp2_16_Bilinear(int src_width, assert(src_width == ((dst_width + 1) / 2)); assert(src_height == ((dst_height + 1) / 2)); -#ifdef HAS_SCALEROWUP2BILINEAR_16_SSE2 +#ifdef HAS_SCALEROWUP2_BILINEAR_16_SSE2 if (TestCpuFlag(kCpuHasSSSE3)) { Scale2RowUp = ScaleRowUp2_Bilinear_16_Any_SSSE3; } #endif -#ifdef HAS_SCALEROWUP2BILINEAR_16_AVX2 +#ifdef HAS_SCALEROWUP2_BILINEAR_16_AVX2 if (TestCpuFlag(kCpuHasAVX2)) { Scale2RowUp = ScaleRowUp2_Bilinear_16_Any_AVX2; } #endif -#ifdef HAS_SCALEROWUP2BILINEAR_16_NEON +#ifdef HAS_SCALEROWUP2_BILINEAR_16_NEON if (TestCpuFlag(kCpuHasNEON)) { Scale2RowUp = ScaleRowUp2_Bilinear_16_Any_NEON; } @@ -1758,7 +1758,6 @@ void ScalePlaneBilinearUp_16(int src_width, } #endif } - if (y > max_y) { y = max_y; } @@ -1915,7 +1914,6 @@ void ScalePlane(const uint8_t* src, src = src + (src_height - 1) * (int64_t)src_stride; src_stride = -src_stride; } - // Use specialized scales to improve performance for common resolutions. // For example, all the 1/2 scalings will use ScalePlaneDown2() if (dst_width == src_width && dst_height == src_height) { @@ -2018,7 +2016,6 @@ void ScalePlane_16(const uint16_t* src, src = src + (src_height - 1) * (int64_t)src_stride; src_stride = -src_stride; } - // Use specialized scales to improve performance for common resolutions. // For example, all the 1/2 scalings will use ScalePlaneDown2() if (dst_width == src_width && dst_height == src_height) { diff --git a/source/scale_any.cc b/source/scale_any.cc index e820584b0..ae47cb80c 100644 --- a/source/scale_any.cc +++ b/source/scale_any.cc @@ -676,7 +676,7 @@ SUH2LANY(ScaleRowUp2_Linear_16_Any_C, 0, uint16_t) -#ifdef HAS_SCALEROWUP2LINEAR_SSE2 +#ifdef HAS_SCALEROWUP2_LINEAR_SSE2 SUH2LANY(ScaleRowUp2_Linear_Any_SSE2, ScaleRowUp2_Linear_SSE2, ScaleRowUp2_Linear_C, @@ -684,7 +684,7 @@ SUH2LANY(ScaleRowUp2_Linear_Any_SSE2, uint8_t) #endif -#ifdef HAS_SCALEROWUP2LINEAR_SSSE3 +#ifdef HAS_SCALEROWUP2_LINEAR_SSSE3 SUH2LANY(ScaleRowUp2_Linear_Any_SSSE3, ScaleRowUp2_Linear_SSSE3, ScaleRowUp2_Linear_C, @@ -692,7 +692,7 @@ SUH2LANY(ScaleRowUp2_Linear_Any_SSSE3, uint8_t) #endif -#ifdef HAS_SCALEROWUP2LINEAR_12_SSSE3 +#ifdef HAS_SCALEROWUP2_LINEAR_12_SSSE3 SUH2LANY(ScaleRowUp2_Linear_12_Any_SSSE3, ScaleRowUp2_Linear_12_SSSE3, ScaleRowUp2_Linear_16_C, @@ -700,7 +700,7 @@ SUH2LANY(ScaleRowUp2_Linear_12_Any_SSSE3, uint16_t) #endif -#ifdef HAS_SCALEROWUP2LINEAR_16_SSE2 +#ifdef HAS_SCALEROWUP2_LINEAR_16_SSE2 SUH2LANY(ScaleRowUp2_Linear_16_Any_SSE2, ScaleRowUp2_Linear_16_SSE2, ScaleRowUp2_Linear_16_C, @@ -708,7 +708,7 @@ SUH2LANY(ScaleRowUp2_Linear_16_Any_SSE2, uint16_t) #endif -#ifdef HAS_SCALEROWUP2LINEAR_AVX2 +#ifdef HAS_SCALEROWUP2_LINEAR_AVX2 SUH2LANY(ScaleRowUp2_Linear_Any_AVX2, ScaleRowUp2_Linear_AVX2, ScaleRowUp2_Linear_C, @@ -716,7 +716,7 @@ SUH2LANY(ScaleRowUp2_Linear_Any_AVX2, uint8_t) #endif -#ifdef HAS_SCALEROWUP2LINEAR_12_AVX2 +#ifdef HAS_SCALEROWUP2_LINEAR_12_AVX2 SUH2LANY(ScaleRowUp2_Linear_12_Any_AVX2, ScaleRowUp2_Linear_12_AVX2, ScaleRowUp2_Linear_16_C, @@ -724,7 +724,7 @@ SUH2LANY(ScaleRowUp2_Linear_12_Any_AVX2, uint16_t) #endif -#ifdef HAS_SCALEROWUP2LINEAR_16_AVX2 +#ifdef HAS_SCALEROWUP2_LINEAR_16_AVX2 SUH2LANY(ScaleRowUp2_Linear_16_Any_AVX2, ScaleRowUp2_Linear_16_AVX2, ScaleRowUp2_Linear_16_C, @@ -732,7 +732,7 @@ SUH2LANY(ScaleRowUp2_Linear_16_Any_AVX2, uint16_t) #endif -#ifdef HAS_SCALEROWUP2LINEAR_NEON +#ifdef HAS_SCALEROWUP2_LINEAR_NEON SUH2LANY(ScaleRowUp2_Linear_Any_NEON, ScaleRowUp2_Linear_NEON, ScaleRowUp2_Linear_C, @@ -740,7 +740,7 @@ SUH2LANY(ScaleRowUp2_Linear_Any_NEON, uint8_t) #endif -#ifdef HAS_SCALEROWUP2LINEAR_12_NEON +#ifdef HAS_SCALEROWUP2_LINEAR_12_NEON SUH2LANY(ScaleRowUp2_Linear_12_Any_NEON, ScaleRowUp2_Linear_12_NEON, ScaleRowUp2_Linear_16_C, @@ -748,7 +748,7 @@ SUH2LANY(ScaleRowUp2_Linear_12_Any_NEON, uint16_t) #endif -#ifdef HAS_SCALEROWUP2LINEAR_16_NEON +#ifdef HAS_SCALEROWUP2_LINEAR_16_NEON SUH2LANY(ScaleRowUp2_Linear_16_Any_NEON, ScaleRowUp2_Linear_16_NEON, ScaleRowUp2_Linear_16_C, @@ -796,7 +796,7 @@ SU2BLANY(ScaleRowUp2_Bilinear_16_Any_C, 0, uint16_t) -#ifdef HAS_SCALEROWUP2BILINEAR_SSE2 +#ifdef HAS_SCALEROWUP2_BILINEAR_SSE2 SU2BLANY(ScaleRowUp2_Bilinear_Any_SSE2, ScaleRowUp2_Bilinear_SSE2, ScaleRowUp2_Bilinear_C, @@ -804,7 +804,7 @@ SU2BLANY(ScaleRowUp2_Bilinear_Any_SSE2, uint8_t) #endif -#ifdef HAS_SCALEROWUP2BILINEAR_12_SSSE3 +#ifdef HAS_SCALEROWUP2_BILINEAR_12_SSSE3 SU2BLANY(ScaleRowUp2_Bilinear_12_Any_SSSE3, ScaleRowUp2_Bilinear_12_SSSE3, ScaleRowUp2_Bilinear_16_C, @@ -812,7 +812,7 @@ SU2BLANY(ScaleRowUp2_Bilinear_12_Any_SSSE3, uint16_t) #endif -#ifdef HAS_SCALEROWUP2BILINEAR_16_SSE2 +#ifdef HAS_SCALEROWUP2_BILINEAR_16_SSE2 SU2BLANY(ScaleRowUp2_Bilinear_16_Any_SSSE3, ScaleRowUp2_Bilinear_16_SSE2, ScaleRowUp2_Bilinear_16_C, @@ -820,7 +820,7 @@ SU2BLANY(ScaleRowUp2_Bilinear_16_Any_SSSE3, uint16_t) #endif -#ifdef HAS_SCALEROWUP2BILINEAR_SSSE3 +#ifdef HAS_SCALEROWUP2_BILINEAR_SSSE3 SU2BLANY(ScaleRowUp2_Bilinear_Any_SSSE3, ScaleRowUp2_Bilinear_SSSE3, ScaleRowUp2_Bilinear_C, @@ -828,7 +828,7 @@ SU2BLANY(ScaleRowUp2_Bilinear_Any_SSSE3, uint8_t) #endif -#ifdef HAS_SCALEROWUP2BILINEAR_AVX2 +#ifdef HAS_SCALEROWUP2_BILINEAR_AVX2 SU2BLANY(ScaleRowUp2_Bilinear_Any_AVX2, ScaleRowUp2_Bilinear_AVX2, ScaleRowUp2_Bilinear_C, @@ -836,7 +836,7 @@ SU2BLANY(ScaleRowUp2_Bilinear_Any_AVX2, uint8_t) #endif -#ifdef HAS_SCALEROWUP2BILINEAR_12_AVX2 +#ifdef HAS_SCALEROWUP2_BILINEAR_12_AVX2 SU2BLANY(ScaleRowUp2_Bilinear_12_Any_AVX2, ScaleRowUp2_Bilinear_12_AVX2, ScaleRowUp2_Bilinear_16_C, @@ -844,7 +844,7 @@ SU2BLANY(ScaleRowUp2_Bilinear_12_Any_AVX2, uint16_t) #endif -#ifdef HAS_SCALEROWUP2BILINEAR_16_AVX2 +#ifdef HAS_SCALEROWUP2_BILINEAR_16_AVX2 SU2BLANY(ScaleRowUp2_Bilinear_16_Any_AVX2, ScaleRowUp2_Bilinear_16_AVX2, ScaleRowUp2_Bilinear_16_C, @@ -852,7 +852,7 @@ SU2BLANY(ScaleRowUp2_Bilinear_16_Any_AVX2, uint16_t) #endif -#ifdef HAS_SCALEROWUP2BILINEAR_NEON +#ifdef HAS_SCALEROWUP2_BILINEAR_NEON SU2BLANY(ScaleRowUp2_Bilinear_Any_NEON, ScaleRowUp2_Bilinear_NEON, ScaleRowUp2_Bilinear_C, @@ -860,7 +860,7 @@ SU2BLANY(ScaleRowUp2_Bilinear_Any_NEON, uint8_t) #endif -#ifdef HAS_SCALEROWUP2BILINEAR_12_NEON +#ifdef HAS_SCALEROWUP2_BILINEAR_12_NEON SU2BLANY(ScaleRowUp2_Bilinear_12_Any_NEON, ScaleRowUp2_Bilinear_12_NEON, ScaleRowUp2_Bilinear_16_C, @@ -868,7 +868,7 @@ SU2BLANY(ScaleRowUp2_Bilinear_12_Any_NEON, uint16_t) #endif -#ifdef HAS_SCALEROWUP2BILINEAR_16_NEON +#ifdef HAS_SCALEROWUP2_BILINEAR_16_NEON SU2BLANY(ScaleRowUp2_Bilinear_16_Any_NEON, ScaleRowUp2_Bilinear_16_NEON, ScaleRowUp2_Bilinear_16_C, @@ -908,7 +908,7 @@ SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_C, 0, uint16_t) -#ifdef HAS_SCALEUVROWUP2LINEAR_SSSE3 +#ifdef HAS_SCALEUVROWUP2_LINEAR_SSSE3 SBUH2LANY(ScaleUVRowUp2_Linear_Any_SSSE3, ScaleUVRowUp2_Linear_SSSE3, ScaleUVRowUp2_Linear_C, @@ -916,7 +916,7 @@ SBUH2LANY(ScaleUVRowUp2_Linear_Any_SSSE3, uint8_t) #endif -#ifdef HAS_SCALEUVROWUP2LINEAR_AVX2 +#ifdef HAS_SCALEUVROWUP2_LINEAR_AVX2 SBUH2LANY(ScaleUVRowUp2_Linear_Any_AVX2, ScaleUVRowUp2_Linear_AVX2, ScaleUVRowUp2_Linear_C, @@ -924,7 +924,7 @@ SBUH2LANY(ScaleUVRowUp2_Linear_Any_AVX2, uint8_t) #endif -#ifdef HAS_SCALEUVROWUP2LINEAR_16_SSE41 +#ifdef HAS_SCALEUVROWUP2_LINEAR_16_SSE41 SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_SSE41, ScaleUVRowUp2_Linear_16_SSE41, ScaleUVRowUp2_Linear_16_C, @@ -932,7 +932,7 @@ SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_SSE41, uint16_t) #endif -#ifdef HAS_SCALEUVROWUP2LINEAR_16_AVX2 +#ifdef HAS_SCALEUVROWUP2_LINEAR_16_AVX2 SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_AVX2, ScaleUVRowUp2_Linear_16_AVX2, ScaleUVRowUp2_Linear_16_C, @@ -940,7 +940,7 @@ SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_AVX2, uint16_t) #endif -#ifdef HAS_SCALEUVROWUP2LINEAR_NEON +#ifdef HAS_SCALEUVROWUP2_LINEAR_NEON SBUH2LANY(ScaleUVRowUp2_Linear_Any_NEON, ScaleUVRowUp2_Linear_NEON, ScaleUVRowUp2_Linear_C, @@ -948,7 +948,7 @@ SBUH2LANY(ScaleUVRowUp2_Linear_Any_NEON, uint8_t) #endif -#ifdef HAS_SCALEUVROWUP2LINEAR_16_NEON +#ifdef HAS_SCALEUVROWUP2_LINEAR_16_NEON SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_NEON, ScaleUVRowUp2_Linear_16_NEON, ScaleUVRowUp2_Linear_16_C, @@ -1006,7 +1006,7 @@ SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_C, 0, uint16_t) -#ifdef HAS_SCALEUVROWUP2BILINEAR_SSSE3 +#ifdef HAS_SCALEUVROWUP2_BILINEAR_SSSE3 SBU2BLANY(ScaleUVRowUp2_Bilinear_Any_SSSE3, ScaleUVRowUp2_Bilinear_SSSE3, ScaleUVRowUp2_Bilinear_C, @@ -1014,7 +1014,7 @@ SBU2BLANY(ScaleUVRowUp2_Bilinear_Any_SSSE3, uint8_t) #endif -#ifdef HAS_SCALEUVROWUP2BILINEAR_AVX2 +#ifdef HAS_SCALEUVROWUP2_BILINEAR_AVX2 SBU2BLANY(ScaleUVRowUp2_Bilinear_Any_AVX2, ScaleUVRowUp2_Bilinear_AVX2, ScaleUVRowUp2_Bilinear_C, @@ -1022,7 +1022,7 @@ SBU2BLANY(ScaleUVRowUp2_Bilinear_Any_AVX2, uint8_t) #endif -#ifdef HAS_SCALEUVROWUP2BILINEAR_16_SSE41 +#ifdef HAS_SCALEUVROWUP2_BILINEAR_16_SSE41 SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_SSE41, ScaleUVRowUp2_Bilinear_16_SSE41, ScaleUVRowUp2_Bilinear_16_C, @@ -1030,7 +1030,7 @@ SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_SSE41, uint16_t) #endif -#ifdef HAS_SCALEUVROWUP2BILINEAR_16_AVX2 +#ifdef HAS_SCALEUVROWUP2_BILINEAR_16_AVX2 SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_AVX2, ScaleUVRowUp2_Bilinear_16_AVX2, ScaleUVRowUp2_Bilinear_16_C, @@ -1038,7 +1038,7 @@ SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_AVX2, uint16_t) #endif -#ifdef HAS_SCALEUVROWUP2BILINEAR_NEON +#ifdef HAS_SCALEUVROWUP2_BILINEAR_NEON SBU2BLANY(ScaleUVRowUp2_Bilinear_Any_NEON, ScaleUVRowUp2_Bilinear_NEON, ScaleUVRowUp2_Bilinear_C, @@ -1046,7 +1046,7 @@ SBU2BLANY(ScaleUVRowUp2_Bilinear_Any_NEON, uint8_t) #endif -#ifdef HAS_SCALEUVROWUP2BILINEAR_16_NEON +#ifdef HAS_SCALEUVROWUP2_BILINEAR_16_NEON SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_NEON, ScaleUVRowUp2_Bilinear_16_NEON, ScaleUVRowUp2_Bilinear_16_C, diff --git a/source/scale_gcc.cc b/source/scale_gcc.cc index d827c0e7f..0bf7ddf7b 100644 --- a/source/scale_gcc.cc +++ b/source/scale_gcc.cc @@ -779,7 +779,7 @@ static const uvec8 kLinearShuffleFar = {2, 3, 0, 1, 6, 7, 4, 5, static const uvec8 kLinearMadd31 = {3, 1, 1, 3, 3, 1, 1, 3, 3, 1, 1, 3, 3, 1, 1, 3}; -#ifdef HAS_SCALEROWUP2LINEAR_SSE2 +#ifdef HAS_SCALEROWUP2_LINEAR_SSE2 void ScaleRowUp2_Linear_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int dst_width) { @@ -833,7 +833,7 @@ void ScaleRowUp2_Linear_SSE2(const uint8_t* src_ptr, } #endif -#ifdef HAS_SCALEROWUP2BILINEAR_SSE2 +#ifdef HAS_SCALEROWUP2_BILINEAR_SSE2 void ScaleRowUp2_Bilinear_SSE2(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, @@ -949,7 +949,7 @@ void ScaleRowUp2_Bilinear_SSE2(const uint8_t* src_ptr, } #endif -#ifdef HAS_SCALEROWUP2LINEAR_12_SSSE3 +#ifdef HAS_SCALEROWUP2_LINEAR_12_SSSE3 void ScaleRowUp2_Linear_12_SSSE3(const uint16_t* src_ptr, uint16_t* dst_ptr, int dst_width) { @@ -999,7 +999,7 @@ void ScaleRowUp2_Linear_12_SSSE3(const uint16_t* src_ptr, } #endif -#ifdef HAS_SCALEROWUP2BILINEAR_12_SSSE3 +#ifdef HAS_SCALEROWUP2_BILINEAR_12_SSSE3 void ScaleRowUp2_Bilinear_12_SSSE3(const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr, @@ -1098,7 +1098,7 @@ void ScaleRowUp2_Bilinear_12_SSSE3(const uint16_t* src_ptr, } #endif -#ifdef HAS_SCALEROWUP2LINEAR_16_SSE2 +#ifdef HAS_SCALEROWUP2_LINEAR_16_SSE2 void ScaleRowUp2_Linear_16_SSE2(const uint16_t* src_ptr, uint16_t* dst_ptr, int dst_width) { @@ -1149,7 +1149,7 @@ void ScaleRowUp2_Linear_16_SSE2(const uint16_t* src_ptr, } #endif -#ifdef HAS_SCALEROWUP2BILINEAR_16_SSE2 +#ifdef HAS_SCALEROWUP2_BILINEAR_16_SSE2 void ScaleRowUp2_Bilinear_16_SSE2(const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr, @@ -1254,11 +1254,12 @@ void ScaleRowUp2_Bilinear_16_SSE2(const uint16_t* src_ptr, "+r"(dst_width) // %2 : "r"((intptr_t)(src_stride)), // %3 "r"((intptr_t)(dst_stride)) // %4 - : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", + "xmm7"); } #endif -#ifdef HAS_SCALEROWUP2LINEAR_SSSE3 +#ifdef HAS_SCALEROWUP2_LINEAR_SSSE3 void ScaleRowUp2_Linear_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int dst_width) { @@ -1297,7 +1298,7 @@ void ScaleRowUp2_Linear_SSSE3(const uint8_t* src_ptr, } #endif -#ifdef HAS_SCALEROWUP2BILINEAR_SSSE3 +#ifdef HAS_SCALEROWUP2_BILINEAR_SSSE3 void ScaleRowUp2_Bilinear_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, @@ -1384,7 +1385,7 @@ void ScaleRowUp2_Bilinear_SSSE3(const uint8_t* src_ptr, } #endif -#ifdef HAS_SCALEROWUP2LINEAR_AVX2 +#ifdef HAS_SCALEROWUP2_LINEAR_AVX2 void ScaleRowUp2_Linear_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int dst_width) { @@ -1426,7 +1427,7 @@ void ScaleRowUp2_Linear_AVX2(const uint8_t* src_ptr, } #endif -#ifdef HAS_SCALEROWUP2BILINEAR_AVX2 +#ifdef HAS_SCALEROWUP2_BILINEAR_AVX2 void ScaleRowUp2_Bilinear_AVX2(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, @@ -1510,7 +1511,7 @@ void ScaleRowUp2_Bilinear_AVX2(const uint8_t* src_ptr, } #endif -#ifdef HAS_SCALEROWUP2LINEAR_12_AVX2 +#ifdef HAS_SCALEROWUP2_LINEAR_12_AVX2 void ScaleRowUp2_Linear_12_AVX2(const uint16_t* src_ptr, uint16_t* dst_ptr, int dst_width) { @@ -1560,7 +1561,7 @@ void ScaleRowUp2_Linear_12_AVX2(const uint16_t* src_ptr, } #endif -#ifdef HAS_SCALEROWUP2BILINEAR_12_AVX2 +#ifdef HAS_SCALEROWUP2_BILINEAR_12_AVX2 void ScaleRowUp2_Bilinear_12_AVX2(const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr, @@ -1624,7 +1625,7 @@ void ScaleRowUp2_Bilinear_12_AVX2(const uint16_t* src_ptr, } #endif -#ifdef HAS_SCALEROWUP2LINEAR_16_AVX2 +#ifdef HAS_SCALEROWUP2_LINEAR_16_AVX2 void ScaleRowUp2_Linear_16_AVX2(const uint16_t* src_ptr, uint16_t* dst_ptr, int dst_width) { @@ -1672,7 +1673,7 @@ void ScaleRowUp2_Linear_16_AVX2(const uint16_t* src_ptr, } #endif -#ifdef HAS_SCALEROWUP2BILINEAR_16_AVX2 +#ifdef HAS_SCALEROWUP2_BILINEAR_16_AVX2 void ScaleRowUp2_Bilinear_16_AVX2(const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr, @@ -2325,13 +2326,18 @@ int FixedDiv1_X86(int num, int div) { return num; } -#ifdef HAS_SCALEUVROWDOWN2BOX_SSSE3 +#if defined(HAS_SCALEUVROWDOWN2BOX_SSSE3) || \ + defined(HAS_SCALEUVROWDOWN2BOX_AVX2) + // Shuffle table for splitting UV into upper and lower part of register. static const uvec8 kShuffleSplitUV = {0u, 2u, 4u, 6u, 8u, 10u, 12u, 14u, 1u, 3u, 5u, 7u, 9u, 11u, 13u, 15u}; static const uvec8 kShuffleMergeUV = {0u, 8u, 2u, 10u, 4u, 12u, 6u, 14u, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}; +#endif + +#ifdef HAS_SCALEUVROWDOWN2BOX_SSSE3 void ScaleUVRowDown2Box_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, @@ -2417,7 +2423,7 @@ void ScaleUVRowDown2Box_AVX2(const uint8_t* src_ptr, static const uvec8 kUVLinearMadd31 = {3, 1, 3, 1, 1, 3, 1, 3, 3, 1, 3, 1, 1, 3, 1, 3}; -#ifdef HAS_SCALEUVROWUP2LINEAR_SSSE3 +#ifdef HAS_SCALEUVROWUP2_LINEAR_SSSE3 void ScaleUVRowUp2_Linear_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int dst_width) { @@ -2456,7 +2462,7 @@ void ScaleUVRowUp2_Linear_SSSE3(const uint8_t* src_ptr, } #endif -#ifdef HAS_SCALEUVROWUP2BILINEAR_SSSE3 +#ifdef HAS_SCALEUVROWUP2_BILINEAR_SSSE3 void ScaleUVRowUp2_Bilinear_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, @@ -2541,7 +2547,7 @@ void ScaleUVRowUp2_Bilinear_SSSE3(const uint8_t* src_ptr, } #endif -#ifdef HAS_SCALEUVROWUP2LINEAR_AVX2 +#ifdef HAS_SCALEUVROWUP2_LINEAR_AVX2 void ScaleUVRowUp2_Linear_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, @@ -2583,7 +2589,7 @@ void ScaleUVRowUp2_Linear_AVX2(const uint8_t* src_ptr, } #endif -#ifdef HAS_SCALEUVROWUP2BILINEAR_AVX2 +#ifdef HAS_SCALEUVROWUP2_BILINEAR_AVX2 void ScaleUVRowUp2_Bilinear_AVX2(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, @@ -2665,7 +2671,7 @@ void ScaleUVRowUp2_Bilinear_AVX2(const uint8_t* src_ptr, } #endif -#ifdef HAS_SCALEUVROWUP2LINEAR_16_SSE41 +#ifdef HAS_SCALEUVROWUP2_LINEAR_16_SSE41 void ScaleUVRowUp2_Linear_16_SSE41(const uint16_t* src_ptr, uint16_t* dst_ptr, int dst_width) { @@ -2715,7 +2721,7 @@ void ScaleUVRowUp2_Linear_16_SSE41(const uint16_t* src_ptr, } #endif -#ifdef HAS_SCALEUVROWUP2BILINEAR_16_SSE41 +#ifdef HAS_SCALEUVROWUP2_BILINEAR_16_SSE41 void ScaleUVRowUp2_Bilinear_16_SSE41(const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr, @@ -2808,7 +2814,7 @@ void ScaleUVRowUp2_Bilinear_16_SSE41(const uint16_t* src_ptr, } #endif -#ifdef HAS_SCALEUVROWUP2LINEAR_16_AVX2 +#ifdef HAS_SCALEUVROWUP2_LINEAR_16_AVX2 void ScaleUVRowUp2_Linear_16_AVX2(const uint16_t* src_ptr, uint16_t* dst_ptr, int dst_width) { @@ -2855,7 +2861,7 @@ void ScaleUVRowUp2_Linear_16_AVX2(const uint16_t* src_ptr, } #endif -#ifdef HAS_SCALEUVROWUP2BILINEAR_16_AVX2 +#ifdef HAS_SCALEUVROWUP2_BILINEAR_16_AVX2 void ScaleUVRowUp2_Bilinear_16_AVX2(const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr, diff --git a/unit_test/planar_test.cc b/unit_test/planar_test.cc index 42166d0d9..3a8c470ba 100644 --- a/unit_test/planar_test.cc +++ b/unit_test/planar_test.cc @@ -1673,97 +1673,45 @@ TEST_F(LibYUVPlanarTest, TestDetilePlane) { free_aligned_buffer_page_end(dst_opt); } -TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Benchmark) { +// Compares DetileSplitUV to 2 step Detile + SplitUV +TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Correctness) { int i, j; // orig is tiled. Allocate enough memory for tiles. int orig_width = (benchmark_width_ + 15) & ~15; int orig_height = (benchmark_height_ + 15) & ~15; int orig_plane_size = orig_width * orig_height; - int u_plane_size = benchmark_width_ * benchmark_height_; - int v_plane_size = u_plane_size; - align_buffer_page_end(orig_uv, orig_plane_size); - align_buffer_page_end(dst_u_c, u_plane_size); - align_buffer_page_end(dst_u_opt, u_plane_size); - align_buffer_page_end(dst_v_c, v_plane_size); - align_buffer_page_end(dst_v_opt, v_plane_size); - - MemRandomize(orig_uv, orig_plane_size); - memset(dst_u_c, 0, u_plane_size); - memset(dst_u_opt, 0, u_plane_size); - memset(dst_v_c, 0, v_plane_size); - memset(dst_v_opt, 0, v_plane_size); - - // Disable all optimizations. - MaskCpuFlags(disable_cpu_flags_); - for (j = 0; j < benchmark_iterations_; j++) { - DetileSplitUVPlane(orig_uv, orig_width, dst_u_c, (benchmark_width_ + 1) / 2, - dst_v_c, (benchmark_width_ + 1) / 2, benchmark_width_, - benchmark_height_, 16); - } - - // Enable optimizations. - MaskCpuFlags(benchmark_cpu_info_); - for (j = 0; j < benchmark_iterations_; j++) { - DetileSplitUVPlane( - orig_uv, orig_width, dst_u_opt, (benchmark_width_ + 1) / 2, dst_v_opt, - (benchmark_width_ + 1) / 2, benchmark_width_, benchmark_height_, 16); - } - - for (i = 0; i < u_plane_size; ++i) { - EXPECT_EQ(dst_u_c[i], dst_u_opt[i]); - } - for (i = 0; i < v_plane_size; ++i) { - EXPECT_EQ(dst_v_c[i], dst_v_opt[i]); - } - - free_aligned_buffer_page_end(orig_uv); - free_aligned_buffer_page_end(dst_u_c); - free_aligned_buffer_page_end(dst_u_opt); - free_aligned_buffer_page_end(dst_v_c); - free_aligned_buffer_page_end(dst_v_opt); -} - -// TODO(b/228518489): Fix Segmentation fault in this test -TEST_F(LibYUVPlanarTest, DISABLED_TestDetileSplitUVPlane_Correctness) { - int i, j; - - // orig is tiled. Allocate enough memory for tiles. - int orig_width = (benchmark_width_ + 15) & ~15; - int orig_height = (benchmark_height_ + 15) & ~15; - int orig_plane_size = orig_width * orig_height; - int u_plane_size = benchmark_width_ * benchmark_height_; - int v_plane_size = u_plane_size; + int uv_plane_size = ((benchmark_width_ + 1) / 2) * benchmark_height_; align_buffer_page_end(orig_uv, orig_plane_size); align_buffer_page_end(detiled_uv, orig_plane_size); - align_buffer_page_end(dst_u_two_stage, u_plane_size); - align_buffer_page_end(dst_u_opt, u_plane_size); - align_buffer_page_end(dst_v_two_stage, v_plane_size); - align_buffer_page_end(dst_v_opt, v_plane_size); + align_buffer_page_end(dst_u_two_stage, uv_plane_size); + align_buffer_page_end(dst_u_opt, uv_plane_size); + align_buffer_page_end(dst_v_two_stage, uv_plane_size); + align_buffer_page_end(dst_v_opt, uv_plane_size); MemRandomize(orig_uv, orig_plane_size); memset(detiled_uv, 0, orig_plane_size); - memset(dst_u_two_stage, 0, u_plane_size); - memset(dst_u_opt, 0, u_plane_size); - memset(dst_v_two_stage, 0, v_plane_size); - memset(dst_v_opt, 0, v_plane_size); + memset(dst_u_two_stage, 0, uv_plane_size); + memset(dst_u_opt, 0, uv_plane_size); + memset(dst_v_two_stage, 0, uv_plane_size); + memset(dst_v_opt, 0, uv_plane_size); + DetileSplitUVPlane(orig_uv, orig_width, dst_u_opt, (benchmark_width_ + 1) / 2, + dst_v_opt, (benchmark_width_ + 1) / 2, benchmark_width_, + benchmark_height_, 16); + + // Benchmark 2 step conversion for comparison. for (j = 0; j < benchmark_iterations_; j++) { - DetileSplitUVPlane( - orig_uv, orig_width, dst_u_opt, (benchmark_width_ + 1) / 2, dst_v_opt, - (benchmark_width_ + 1) / 2, benchmark_width_, benchmark_height_, 16); + DetilePlane(orig_uv, orig_width, detiled_uv, benchmark_width_, + benchmark_width_, benchmark_height_, 16); + SplitUVPlane(detiled_uv, orig_width, dst_u_two_stage, + (benchmark_width_ + 1) / 2, dst_v_two_stage, + (benchmark_width_ + 1) / 2, (benchmark_width_ + 1) / 2, + benchmark_height_); } - DetilePlane(orig_uv, orig_width, detiled_uv, benchmark_width_, - benchmark_width_, benchmark_height_, 16); - SplitUVPlane(detiled_uv, orig_width, dst_u_two_stage, - (benchmark_width_ + 1) / 2, dst_v_two_stage, - (benchmark_width_ + 1) / 2, benchmark_width_, benchmark_height_); - - for (i = 0; i < u_plane_size; ++i) { + for (i = 0; i < uv_plane_size; ++i) { EXPECT_EQ(dst_u_two_stage[i], dst_u_opt[i]); - } - for (i = 0; i < v_plane_size; ++i) { EXPECT_EQ(dst_v_two_stage[i], dst_v_opt[i]); } @@ -1775,6 +1723,54 @@ TEST_F(LibYUVPlanarTest, DISABLED_TestDetileSplitUVPlane_Correctness) { free_aligned_buffer_page_end(dst_v_opt); } +TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Benchmark) { + int i, j; + + // orig is tiled. Allocate enough memory for tiles. + int orig_width = (benchmark_width_ + 15) & ~15; + int orig_height = (benchmark_height_ + 15) & ~15; + int orig_plane_size = orig_width * orig_height; + int uv_plane_size = ((benchmark_width_ + 1) / 2) * benchmark_height_; + align_buffer_page_end(orig_uv, orig_plane_size); + align_buffer_page_end(dst_u_c, uv_plane_size); + align_buffer_page_end(dst_u_opt, uv_plane_size); + align_buffer_page_end(dst_v_c, uv_plane_size); + align_buffer_page_end(dst_v_opt, uv_plane_size); + + MemRandomize(orig_uv, orig_plane_size); + memset(dst_u_c, 0, uv_plane_size); + memset(dst_u_opt, 0, uv_plane_size); + memset(dst_v_c, 0, uv_plane_size); + memset(dst_v_opt, 0, uv_plane_size); + + // Disable all optimizations. + MaskCpuFlags(disable_cpu_flags_); + + DetileSplitUVPlane(orig_uv, orig_width, dst_u_c, (benchmark_width_ + 1) / 2, + dst_v_c, (benchmark_width_ + 1) / 2, benchmark_width_, + benchmark_height_, 16); + + // Enable optimizations. + MaskCpuFlags(benchmark_cpu_info_); + + for (j = 0; j < benchmark_iterations_; j++) { + DetileSplitUVPlane( + orig_uv, orig_width, dst_u_opt, (benchmark_width_ + 1) / 2, dst_v_opt, + (benchmark_width_ + 1) / 2, benchmark_width_, benchmark_height_, 16); + } + + for (i = 0; i < uv_plane_size; ++i) { + EXPECT_EQ(dst_u_c[i], dst_u_opt[i]); + EXPECT_EQ(dst_v_c[i], dst_v_opt[i]); + } + + free_aligned_buffer_page_end(orig_uv); + free_aligned_buffer_page_end(dst_u_c); + free_aligned_buffer_page_end(dst_u_opt); + free_aligned_buffer_page_end(dst_v_c); + free_aligned_buffer_page_end(dst_v_opt); +} + static int TestMultiply(int width, int height, int benchmark_iterations,