diff --git a/source/convert.cc b/source/convert.cc index c96cff9d9..e444d574a 100644 --- a/source/convert.cc +++ b/source/convert.cc @@ -544,10 +544,12 @@ int ARGBToI420(const uint8* src_argb, int src_stride_argb, } } #endif -#if defined(HAS_ARGBTOYROW_AVX2) +#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToUVRow = ARGBToUVRow_Any_AVX2; ARGBToYRow = ARGBToYRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { + ARGBToUVRow = ARGBToUVRow_AVX2; ARGBToYRow = ARGBToYRow_AVX2; } } @@ -839,34 +841,30 @@ int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24, } } #endif -#if defined(HAS_ARGBTOUVROW_SSSE3) +#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBToUVRow = ARGBToUVRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_SSSE3; - } - } -#endif -#if defined(HAS_ARGBTOUVROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { ARGBToYRow = ARGBToYRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { + ARGBToUVRow = ARGBToUVRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3; } } -#endif // HAS_ARGBTOUVROW_SSSE3 -#if defined(HAS_ARGBTOYROW_AVX2) +#endif +#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToUVRow = ARGBToUVRow_Any_AVX2; ARGBToYRow = ARGBToYRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { + ARGBToUVRow = ARGBToUVRow_AVX2; ARGBToYRow = ARGBToYRow_AVX2; } } #endif - { #if !defined(HAS_RGB24TOYROW_NEON) + { // Allocate 2 rows of ARGB. - const int kRowSize = (width * 4 + 15) & ~15; + const int kRowSize = (width * 4 + 31) & ~31; align_buffer_64(row, kRowSize * 2); #endif @@ -899,8 +897,8 @@ int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24, } #if !defined(HAS_RGB24TOYROW_NEON) free_aligned_buffer_64(row); -#endif } +#endif return 0; } @@ -960,66 +958,64 @@ int RAWToI420(const uint8* src_raw, int src_stride_raw, } } #endif -#if defined(HAS_ARGBTOUVROW_SSSE3) +#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBToUVRow = ARGBToUVRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_SSSE3; - } - } -#endif -#if defined(HAS_ARGBTOUVROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { ARGBToYRow = ARGBToYRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { + ARGBToUVRow = ARGBToUVRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3; } } -#endif // HAS_ARGBTOUVROW_SSSE3 -#if defined(HAS_ARGBTOYROW_AVX2) +#endif +#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToUVRow = ARGBToUVRow_Any_AVX2; ARGBToYRow = ARGBToYRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { + ARGBToUVRow = ARGBToUVRow_AVX2; ARGBToYRow = ARGBToYRow_AVX2; } } #endif +#if !defined(HAS_RAWTOYROW_NEON) { // Allocate 2 rows of ARGB. - const int kRowSize = (width * 4 + 15) & ~15; + const int kRowSize = (width * 4 + 31) & ~31; align_buffer_64(row, kRowSize * 2); +#endif for (y = 0; y < height - 1; y += 2) { - #if defined(HAS_RAWTOYROW_NEON) +#if defined(HAS_RAWTOYROW_NEON) RAWToUVRow(src_raw, src_stride_raw, dst_u, dst_v, width); RAWToYRow(src_raw, dst_y, width); RAWToYRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width); - #else +#else RAWToARGBRow(src_raw, row, width); RAWToARGBRow(src_raw + src_stride_raw, row + kRowSize, width); ARGBToUVRow(row, kRowSize, dst_u, dst_v, width); ARGBToYRow(row, dst_y, width); ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width); - #endif +#endif src_raw += src_stride_raw * 2; dst_y += dst_stride_y * 2; dst_u += dst_stride_u; dst_v += dst_stride_v; } if (height & 1) { - #if defined(HAS_RAWTOYROW_NEON) +#if defined(HAS_RAWTOYROW_NEON) RAWToUVRow(src_raw, 0, dst_u, dst_v, width); RAWToYRow(src_raw, dst_y, width); - #else +#else RAWToARGBRow(src_raw, row, width); ARGBToUVRow(row, 0, dst_u, dst_v, width); ARGBToYRow(row, dst_y, width); - #endif +#endif } - #if !defined(HAS_RAWTOYROW_NEON) +#if !defined(HAS_RAWTOYROW_NEON) free_aligned_buffer_64(row); - #endif } +#endif return 0; } @@ -1083,35 +1079,30 @@ int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565, } } #endif -#if defined(HAS_ARGBTOUVROW_SSSE3) +#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBToUVRow = ARGBToUVRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_SSSE3; - } - } -#endif -#if defined(HAS_ARGBTOYROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { ARGBToYRow = ARGBToYRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { + ARGBToUVRow = ARGBToUVRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3; } } #endif -#if defined(HAS_ARGBTOYROW_AVX2) +#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToUVRow = ARGBToUVRow_Any_AVX2; ARGBToYRow = ARGBToYRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { + ARGBToUVRow = ARGBToUVRow_AVX2; ARGBToYRow = ARGBToYRow_AVX2; } } #endif - - { #if !defined(HAS_RGB565TOYROW_NEON) + { // Allocate 2 rows of ARGB. - const int kRowSize = (width * 4 + 15) & ~15; + const int kRowSize = (width * 4 + 31) & ~31; align_buffer_64(row, kRowSize * 2); #endif @@ -1144,8 +1135,8 @@ int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565, } #if !defined(HAS_RGB565TOYROW_NEON) free_aligned_buffer_64(row); -#endif } +#endif return 0; } @@ -1202,35 +1193,30 @@ int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555, } } #endif -#if defined(HAS_ARGBTOUVROW_SSSE3) +#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBToUVRow = ARGBToUVRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_SSSE3; - } - } -#endif -#if defined(HAS_ARGBTOYROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { ARGBToYRow = ARGBToYRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { + ARGBToUVRow = ARGBToUVRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3; } } #endif -#if defined(HAS_ARGBTOYROW_AVX2) +#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToUVRow = ARGBToUVRow_Any_AVX2; ARGBToYRow = ARGBToYRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { + ARGBToUVRow = ARGBToUVRow_AVX2; ARGBToYRow = ARGBToYRow_AVX2; } } #endif - - { #if !defined(HAS_ARGB1555TOYROW_NEON) + { // Allocate 2 rows of ARGB. - const int kRowSize = (width * 4 + 15) & ~15; + const int kRowSize = (width * 4 + 31) & ~31; align_buffer_64(row, kRowSize * 2); #endif for (y = 0; y < height - 1; y += 2) { @@ -1263,9 +1249,9 @@ int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555, #endif } #if !defined(HAS_ARGB1555TOYROW_NEON) - free_aligned_buffer_64(row); -#endif + free_aligned_buffer_64(row); } +#endif return 0; } @@ -1322,35 +1308,31 @@ int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444, } } #endif -#if defined(HAS_ARGBTOUVROW_SSSE3) +#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBToUVRow = ARGBToUVRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_SSSE3; - } - } -#endif -#if defined(HAS_ARGBTOYROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { ARGBToYRow = ARGBToYRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { + ARGBToUVRow = ARGBToUVRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3; } } #endif -#if defined(HAS_ARGBTOYROW_AVX2) +#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToUVRow = ARGBToUVRow_Any_AVX2; ARGBToYRow = ARGBToYRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { + ARGBToUVRow = ARGBToUVRow_AVX2; ARGBToYRow = ARGBToYRow_AVX2; } } #endif - { #if !defined(HAS_ARGB4444TOYROW_NEON) + { // Allocate 2 rows of ARGB. - const int kRowSize = (width * 4 + 15) & ~15; + const int kRowSize = (width * 4 + 31) & ~31; align_buffer_64(row, kRowSize * 2); #endif @@ -1385,8 +1367,8 @@ int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444, } #if !defined(HAS_ARGB4444TOYROW_NEON) free_aligned_buffer_64(row); -#endif } +#endif return 0; } diff --git a/source/convert_from_argb.cc b/source/convert_from_argb.cc index a5fea893b..700c59902 100644 --- a/source/convert_from_argb.cc +++ b/source/convert_from_argb.cc @@ -289,10 +289,12 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb, } } #endif -#if defined(HAS_ARGBTOYROW_AVX2) +#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToUVRow = ARGBToUVRow_Any_AVX2; ARGBToYRow = ARGBToYRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { + ARGBToUVRow = ARGBToUVRow_AVX2; ARGBToYRow = ARGBToYRow_AVX2; } } @@ -339,8 +341,8 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb, #endif { // Allocate a rows of uv. - align_buffer_64(row_u, ((halfwidth + 15) & ~15) * 2); - uint8* row_v = row_u + ((halfwidth + 15) & ~15); + align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2); + uint8* row_v = row_u + ((halfwidth + 31) & ~31); for (y = 0; y < height - 1; y += 2) { ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width); @@ -396,10 +398,12 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb, } } #endif -#if defined(HAS_ARGBTOYROW_AVX2) +#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToUVRow = ARGBToUVRow_Any_AVX2; ARGBToYRow = ARGBToYRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { + ARGBToUVRow = ARGBToUVRow_AVX2; ARGBToYRow = ARGBToYRow_AVX2; } } @@ -446,8 +450,8 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb, #endif { // Allocate a rows of uv. - align_buffer_64(row_u, ((halfwidth + 15) & ~15) * 2); - uint8* row_v = row_u + ((halfwidth + 15) & ~15); + align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2); + uint8* row_v = row_u + ((halfwidth + 31) & ~31); for (y = 0; y < height - 1; y += 2) { ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width); @@ -1088,7 +1092,7 @@ int ARGBToJ420(const uint8* src_argb, int src_stride_argb, int width, int height) { int y; void (*ARGBToUVJRow)(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) = ARGBToUVJRow_C; + uint8* dst_u, uint8* dst_v, int width) = ARGBToUVJRow_C; void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_yj, int pix) = ARGBToYJRow_C; if (!src_argb || @@ -1112,7 +1116,7 @@ int ARGBToJ420(const uint8* src_argb, int src_stride_argb, } } #endif -#if defined(HAS_ARGBTOYJROW_AVX2) && defined(HAS_ARGBTOUVJROW_AVX2) +#if defined(HAS_ARGBTOYJROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGBToYJRow = ARGBToYJRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { diff --git a/source/planar_functions.cc b/source/planar_functions.cc index b8314c70c..08b9a215b 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -2040,7 +2040,7 @@ static int ARGBSobelize(const uint8* src_argb, int src_stride_argb, #endif { // 3 rows with edges before/after. - const int kRowSize = (width + kEdge + 15) & ~15; + const int kRowSize = (width + kEdge + 31) & ~31; align_buffer_64(rows, kRowSize * 2 + (kEdge + kRowSize * 3 + kEdge)); uint8* row_sobelx = rows; uint8* row_sobely = rows + kRowSize; diff --git a/source/row_posix.cc b/source/row_posix.cc index c42885696..8d90df638 100644 --- a/source/row_posix.cc +++ b/source/row_posix.cc @@ -953,7 +953,6 @@ void ARGBToUVRow_AVX2(const uint8* src_argb0, int src_stride_argb, #endif // HAS_ARGBTOUVROW_AVX2 #ifdef HAS_ARGBTOUVJROW_SSSE3 -// TODO(fbarchard): Share code with ARGBToUVRow_SSSE3. void ARGBToUVJRow_SSSE3(const uint8* src_argb0, int src_stride_argb, uint8* dst_u, uint8* dst_v, int width) { asm volatile ( diff --git a/source/row_win.cc b/source/row_win.cc index e34bdf70c..4531f9e5b 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -1161,7 +1161,7 @@ void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) { } #endif // HAS_ARGBTOYROW_AVX2 -#ifdef HAS_ARGBTOYROW_AVX2 +#ifdef HAS_ARGBTOYJROW_AVX2 // Convert 32 ARGB pixels (128 bytes) to 32 Y values. __declspec(naked) __declspec(align(32)) void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) { diff --git a/source/scale.cc b/source/scale.cc index 963599075..761c79a4d 100644 --- a/source/scale.cc +++ b/source/scale.cc @@ -1164,7 +1164,7 @@ void ScalePlaneBilinearUp(int src_width, int src_height, const uint8* src = src_ptr + yi * src_stride; // Allocate 2 row buffers. - const int kRowSize = (dst_width + 15) & ~15; + const int kRowSize = (dst_width + 31) & ~31; align_buffer_64(row, kRowSize * 2); uint8* rowptr = row; @@ -1295,7 +1295,7 @@ void ScalePlaneBilinearUp_16(int src_width, int src_height, const uint16* src = src_ptr + yi * src_stride; // Allocate 2 row buffers. - const int kRowSize = (dst_width + 15) & ~15; + const int kRowSize = (dst_width + 31) & ~31; align_buffer_64(row, kRowSize * 4); uint16* rowptr = (uint16*)row; diff --git a/source/scale_argb.cc b/source/scale_argb.cc index 6e019bf9b..67aa0495a 100644 --- a/source/scale_argb.cc +++ b/source/scale_argb.cc @@ -87,7 +87,7 @@ static void ScaleARGBDown4Box(int src_width, int src_height, int x, int dx, int y, int dy) { int j; // Allocate 2 rows of ARGB. - const int kRowSize = (dst_width * 2 * 4 + 15) & ~15; + const int kRowSize = (dst_width * 2 * 4 + 31) & ~31; align_buffer_64(row, kRowSize * 2); int row_stride = src_stride * (dy >> 16); void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride, @@ -353,7 +353,7 @@ static void ScaleARGBBilinearUp(int src_width, int src_height, const uint8* src = src_argb + yi * src_stride; // Allocate 2 rows of ARGB. - const int kRowSize = (dst_width * 4 + 15) & ~15; + const int kRowSize = (dst_width * 4 + 31) & ~31; align_buffer_64(row, kRowSize * 2); uint8* rowptr = row; @@ -538,7 +538,7 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height, const uint8* src_row_v = src_v + uv_yi * src_stride_v; // Allocate 2 rows of ARGB. - const int kRowSize = (dst_width * 4 + 15) & ~15; + const int kRowSize = (dst_width * 4 + 31) & ~31; align_buffer_64(row, kRowSize * 2); // Allocate 1 row of ARGB for source conversion.