diff --git a/README.chromium b/README.chromium index f621b8893..5b4f2b808 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 230 +Version: 231 License: BSD License File: LICENSE diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 081edbd03..04283c8fa 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,7 +11,7 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define INCLUDE_LIBYUV_VERSION 230 +#define INCLUDE_LIBYUV_VERSION 231 #endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/source/convert.cc b/source/convert.cc index b213ed0b3..e07970d80 100644 --- a/source/convert.cc +++ b/source/convert.cc @@ -78,7 +78,7 @@ static void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride, movdqa xmm0, [eax] pavgb xmm0, [eax + edx] sub ecx, 16 - movdqa [eax + edi], xmm0 // NOLINT + movdqa [eax + edi], xmm0 lea eax, [eax + 16] jg convertloop pop edi @@ -475,7 +475,7 @@ static void SplitYUY2_SSE2(const uint8* src_yuy2, pand xmm2, xmm5 // even bytes are Y pand xmm3, xmm5 packuswb xmm2, xmm3 - movdqa [edx], xmm2 // NOLINT + movdqa [edx], xmm2 lea edx, [edx + 16] psrlw xmm0, 8 // YUYV -> UVUV psrlw xmm1, 8 @@ -483,12 +483,12 @@ static void SplitYUY2_SSE2(const uint8* src_yuy2, movdqa xmm1, xmm0 pand xmm0, xmm5 // U packuswb xmm0, xmm0 - movq qword ptr [esi], xmm0 // NOLINT + movq qword ptr [esi], xmm0 lea esi, [esi + 8] psrlw xmm1, 8 // V packuswb xmm1, xmm1 sub ecx, 16 - movq qword ptr [edi], xmm1 // NOLINT + movq qword ptr [edi], xmm1 lea edi, [edi + 8] jg convertloop diff --git a/source/convert_from.cc b/source/convert_from.cc index dc51a4fe7..24156892b 100644 --- a/source/convert_from.cc +++ b/source/convert_from.cc @@ -283,7 +283,7 @@ static void I42xToUYVYRow_SSE2(const uint8* src_y, ret } } -#elif (defined(__x86_64__) || defined(__i386__)) && !defined(YUV_DISABLE_ASM) +#elif defined(__x86_64__) || defined(__i386__) && !defined(YUV_DISABLE_ASM) #define HAS_I42XTOYUY2ROW_SSE2 static void I42xToYUY2Row_SSE2(const uint8* src_y, const uint8* src_u, @@ -355,7 +355,9 @@ static void I42xToUYVYRow_SSE2(const uint8* src_y, } #endif -static void I42xToYUY2Row_C(const uint8* src_y, const uint8* src_u, const uint8* src_v, +static void I42xToYUY2Row_C(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, uint8* dst_frame, int width) { for (int x = 0; x < width - 1; x += 2) { dst_frame[0] = src_y[0]; @@ -375,7 +377,9 @@ static void I42xToYUY2Row_C(const uint8* src_y, const uint8* src_u, const uint8* } } -static void I42xToUYVYRow_C(const uint8* src_y, const uint8* src_u, const uint8* src_v, +static void I42xToUYVYRow_C(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, uint8* dst_frame, int width) { for (int x = 0; x < width - 1; x += 2) { dst_frame[0] = src_u[0]; @@ -406,7 +410,7 @@ static void I42xToUYVYRow_C(const uint8* src_y, const uint8* src_u, const uint8* #endif #ifdef LIBYUV_LITTLE_ENDIAN -#define WRITEWORD(p, v) (*((uint32*) (p))) = v +#define WRITEWORD(p, v) *reinterpret_cast(p) = v #else static inline void WRITEWORD(uint8* p, uint32 v) { @@ -453,8 +457,8 @@ int I422ToYUY2(const uint8* src_y, int src_stride_y, dst_stride_frame = -dst_stride_frame; } void (*I42xToYUY2Row)(const uint8* src_y, const uint8* src_u, - const uint8* src_v, uint8* dst_frame, int width); - I42xToYUY2Row = I42xToYUY2Row_C; + const uint8* src_v, uint8* dst_frame, int width) = + I42xToYUY2Row_C; #if defined(HAS_I42XTOYUY2ROW_SSE2) if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) && @@ -489,8 +493,8 @@ int I420ToYUY2(const uint8* src_y, int src_stride_y, dst_stride_frame = -dst_stride_frame; } void (*I42xToYUY2Row)(const uint8* src_y, const uint8* src_u, - const uint8* src_v, uint8* dst_frame, int width); - I42xToYUY2Row = I42xToYUY2Row_C; + const uint8* src_v, uint8* dst_frame, int width) = + I42xToYUY2Row_C; #if defined(HAS_I42XTOYUY2ROW_SSE2) if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) && @@ -531,8 +535,8 @@ int I422ToUYVY(const uint8* src_y, int src_stride_y, dst_stride_frame = -dst_stride_frame; } void (*I42xToUYVYRow)(const uint8* src_y, const uint8* src_u, - const uint8* src_v, uint8* dst_frame, int width); - I42xToUYVYRow = I42xToUYVYRow_C; + const uint8* src_v, uint8* dst_frame, int width) = + I42xToUYVYRow_C; #if defined(HAS_I42XTOUYVYROW_SSE2) if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) && @@ -567,8 +571,8 @@ int I420ToUYVY(const uint8* src_y, int src_stride_y, dst_stride_frame = -dst_stride_frame; } void (*I42xToUYVYRow)(const uint8* src_y, const uint8* src_u, - const uint8* src_v, uint8* dst_frame, int width); - I42xToUYVYRow = I42xToUYVYRow_C; + const uint8* src_v, uint8* dst_frame, int width) = + I42xToUYVYRow_C; #if defined(HAS_I42XTOUYVYROW_SSE2) if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) && @@ -598,7 +602,7 @@ int I420ToV210(const uint8* src_y, int src_stride_y, const uint8* src_v, int src_stride_v, uint8* dst_frame, int dst_stride_frame, int width, int height) { - if (width * 16 / 6 > kMaxStride || // row buffer of V210 is required + if (width * 16 / 6 > kMaxStride || // Row buffer of V210 is required. src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) { return -1; } @@ -614,8 +618,8 @@ int I420ToV210(const uint8* src_y, int src_stride_y, UYVYToV210Row = UYVYToV210Row_C; void (*I42xToUYVYRow)(const uint8* src_y, const uint8* src_u, - const uint8* src_v, uint8* dst_frame, int width); - I42xToUYVYRow = I42xToUYVYRow_C; + const uint8* src_v, uint8* dst_frame, int width) = + I42xToUYVYRow_C; #if defined(HAS_I42XTOUYVYROW_SSE2) if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) && @@ -655,17 +659,17 @@ int I420ToARGB(const uint8* src_y, int src_stride_y, dst_stride_argb = -dst_stride_argb; } void (*I420ToARGBRow)(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width); + const uint8* u_buf, + const uint8* v_buf, + uint8* rgb_buf, + int width) = I420ToARGBRow_C; #if defined(HAS_I420TOARGBROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { I420ToARGBRow = I420ToARGBRow_Any_NEON; if (IS_ALIGNED(width, 16)) { I420ToARGBRow = I420ToARGBRow_NEON; } - } else + } #elif defined(HAS_I420TOARGBROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { I420ToARGBRow = I420ToARGBRow_Any_SSSE3; @@ -673,11 +677,9 @@ int I420ToARGB(const uint8* src_y, int src_stride_y, IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { I420ToARGBRow = I420ToARGBRow_SSSE3; } - } else -#endif - { - I420ToARGBRow = I420ToARGBRow_C; } +#endif + for (int y = 0; y < height; ++y) { I420ToARGBRow(src_y, src_u, src_v, dst_argb, width); dst_argb += dst_stride_argb; @@ -703,17 +705,17 @@ int I420ToBGRA(const uint8* src_y, int src_stride_y, dst_stride_bgra = -dst_stride_bgra; } void (*I420ToBGRARow)(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width); + const uint8* u_buf, + const uint8* v_buf, + uint8* rgb_buf, + int width) = I420ToBGRARow_C; #if defined(HAS_I420TOBGRAROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { I420ToBGRARow = I420ToBGRARow_Any_NEON; if (IS_ALIGNED(width, 16)) { I420ToBGRARow = I420ToBGRARow_NEON; } - } else + } #elif defined(HAS_I420TOBGRAROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { I420ToBGRARow = I420ToBGRARow_Any_SSSE3; @@ -721,11 +723,9 @@ int I420ToBGRA(const uint8* src_y, int src_stride_y, IS_ALIGNED(dst_bgra, 16) && IS_ALIGNED(dst_stride_bgra, 16)) { I420ToBGRARow = I420ToBGRARow_SSSE3; } - } else -#endif - { - I420ToBGRARow = I420ToBGRARow_C; } +#endif + for (int y = 0; y < height; ++y) { I420ToBGRARow(src_y, src_u, src_v, dst_bgra, width); dst_bgra += dst_stride_bgra; @@ -751,17 +751,17 @@ int I420ToABGR(const uint8* src_y, int src_stride_y, dst_stride_abgr = -dst_stride_abgr; } void (*I420ToABGRRow)(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width); + const uint8* u_buf, + const uint8* v_buf, + uint8* rgb_buf, + int width) = I420ToABGRRow_C; #if defined(HAS_I420TOABGRROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { I420ToABGRRow = I420ToABGRRow_Any_NEON; if (IS_ALIGNED(width, 16)) { I420ToABGRRow = I420ToABGRRow_NEON; } - } else + } #elif defined(HAS_I420TOABGRROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { I420ToABGRRow = I420ToABGRRow_Any_SSSE3; @@ -769,11 +769,9 @@ int I420ToABGR(const uint8* src_y, int src_stride_y, IS_ALIGNED(dst_abgr, 16) && IS_ALIGNED(dst_stride_abgr, 16)) { I420ToABGRRow = I420ToABGRRow_SSSE3; } - } else -#endif - { - I420ToABGRRow = I420ToABGRRow_C; } +#endif + for (int y = 0; y < height; ++y) { I420ToABGRRow(src_y, src_u, src_v, dst_abgr, width); dst_abgr += dst_stride_abgr; @@ -799,25 +797,23 @@ int I420ToRGB24(const uint8* src_y, int src_stride_y, dst_stride_argb = -dst_stride_argb; } void (*I420ToARGBRow)(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width); + const uint8* u_buf, + const uint8* v_buf, + uint8* rgb_buf, + int width) = I420ToARGBRow_C; #if defined(HAS_I420TOARGBROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { I420ToARGBRow = I420ToARGBRow_NEON; - } else + } #elif defined(HAS_I420TOARGBROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { I420ToARGBRow = I420ToARGBRow_SSSE3; - } else -#endif - { - I420ToARGBRow = I420ToARGBRow_C; } +#endif SIMD_ALIGNED(uint8 row[kMaxStride]); - void (*ARGBToRGB24Row)(const uint8* src_argb, uint8* dst_rgb, int pix); + void (*ARGBToRGB24Row)(const uint8* src_argb, uint8* dst_rgb, int pix) = + ARGBToRGB24Row_C; #if defined(HAS_ARGBTORGB24ROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBToRGB24Row = ARGBToRGB24Row_Any_SSSE3; @@ -825,11 +821,8 @@ int I420ToRGB24(const uint8* src_y, int src_stride_y, IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { ARGBToRGB24Row = ARGBToRGB24Row_SSSE3; } - } else -#endif - { - ARGBToRGB24Row = ARGBToRGB24Row_C; } +#endif for (int y = 0; y < height; ++y) { I420ToARGBRow(src_y, src_u, src_v, row, width); @@ -857,25 +850,23 @@ int I420ToRAW(const uint8* src_y, int src_stride_y, dst_stride_argb = -dst_stride_argb; } void (*I420ToARGBRow)(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width); + const uint8* u_buf, + const uint8* v_buf, + uint8* rgb_buf, + int width) = I420ToARGBRow_C; #if defined(HAS_I420TOARGBROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { I420ToARGBRow = I420ToARGBRow_NEON; - } else + } #elif defined(HAS_I420TOARGBROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { I420ToARGBRow = I420ToARGBRow_SSSE3; - } else -#endif - { - I420ToARGBRow = I420ToARGBRow_C; } +#endif SIMD_ALIGNED(uint8 row[kMaxStride]); - void (*ARGBToRAWRow)(const uint8* src_argb, uint8* dst_rgb, int pix); + void (*ARGBToRAWRow)(const uint8* src_argb, uint8* dst_rgb, int pix) = + ARGBToRAWRow_C; #if defined(HAS_ARGBTORAWROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBToRAWRow = ARGBToRAWRow_Any_SSSE3; @@ -883,11 +874,8 @@ int I420ToRAW(const uint8* src_y, int src_stride_y, IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { ARGBToRAWRow = ARGBToRAWRow_SSSE3; } - } else -#endif - { - ARGBToRAWRow = ARGBToRAWRow_C; } +#endif for (int y = 0; y < height; ++y) { I420ToARGBRow(src_y, src_u, src_v, row, width); @@ -915,36 +903,31 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y, dst_stride_rgb = -dst_stride_rgb; } void (*I420ToARGBRow)(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width); + const uint8* u_buf, + const uint8* v_buf, + uint8* rgb_buf, + int width) = I420ToARGBRow_C; #if defined(HAS_I420TOARGBROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { I420ToARGBRow = I420ToARGBRow_NEON; - } else + } #elif defined(HAS_I420TOARGBROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { I420ToARGBRow = I420ToARGBRow_SSSE3; - } else -#endif - { - I420ToARGBRow = I420ToARGBRow_C; } +#endif SIMD_ALIGNED(uint8 row[kMaxStride]); - void (*ARGBToRGB565Row)(const uint8* src_rgb, uint8* dst_rgb, int pix); + void (*ARGBToRGB565Row)(const uint8* src_rgb, uint8* dst_rgb, int pix) = + ARGBToRGB565Row_C; #if defined(HAS_ARGBTORGB565ROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { ARGBToRGB565Row = ARGBToRGB565Row_Any_SSE2; if (IS_ALIGNED(width, 4)) { ARGBToRGB565Row = ARGBToRGB565Row_SSE2; } - } else -#endif - { - ARGBToRGB565Row = ARGBToRGB565Row_C; } +#endif for (int y = 0; y < height; ++y) { I420ToARGBRow(src_y, src_u, src_v, row, width); @@ -972,36 +955,31 @@ int I420ToARGB1555(const uint8* src_y, int src_stride_y, dst_stride_argb = -dst_stride_argb; } void (*I420ToARGBRow)(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width); + const uint8* u_buf, + const uint8* v_buf, + uint8* rgb_buf, + int width) = I420ToARGBRow_C; #if defined(HAS_I420TOARGBROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { I420ToARGBRow = I420ToARGBRow_NEON; - } else + } #elif defined(HAS_I420TOARGBROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { I420ToARGBRow = I420ToARGBRow_SSSE3; - } else -#endif - { - I420ToARGBRow = I420ToARGBRow_C; } +#endif SIMD_ALIGNED(uint8 row[kMaxStride]); - void (*ARGBToARGB1555Row)(const uint8* src_argb, uint8* dst_rgb, int pix); + void (*ARGBToARGB1555Row)(const uint8* src_argb, uint8* dst_rgb, int pix) = + ARGBToARGB1555Row_C; #if defined(HAS_ARGBTOARGB1555ROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { ARGBToARGB1555Row = ARGBToARGB1555Row_Any_SSE2; if (IS_ALIGNED(width, 4)) { ARGBToARGB1555Row = ARGBToARGB1555Row_SSE2; } - } else -#endif - { - ARGBToARGB1555Row = ARGBToARGB1555Row_C; } +#endif for (int y = 0; y < height; ++y) { I420ToARGBRow(src_y, src_u, src_v, row, width); @@ -1029,36 +1007,31 @@ int I420ToARGB4444(const uint8* src_y, int src_stride_y, dst_stride_argb = -dst_stride_argb; } void (*I420ToARGBRow)(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width); + const uint8* u_buf, + const uint8* v_buf, + uint8* rgb_buf, + int width) = I420ToARGBRow_C; #if defined(HAS_I420TOARGBROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { I420ToARGBRow = I420ToARGBRow_NEON; - } else + } #elif defined(HAS_I420TOARGBROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { I420ToARGBRow = I420ToARGBRow_SSSE3; - } else -#endif - { - I420ToARGBRow = I420ToARGBRow_C; } +#endif SIMD_ALIGNED(uint8 row[kMaxStride]); - void (*ARGBToARGB4444Row)(const uint8* src_argb, uint8* dst_rgb, int pix); + void (*ARGBToARGB4444Row)(const uint8* src_argb, uint8* dst_rgb, int pix) = + ARGBToARGB4444Row_C; #if defined(HAS_ARGBTOARGB4444ROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { ARGBToARGB4444Row = ARGBToARGB4444Row_Any_SSE2; if (IS_ALIGNED(width, 4)) { ARGBToARGB4444Row = ARGBToARGB4444Row_SSE2; } - } else -#endif - { - ARGBToARGB4444Row = ARGBToARGB4444Row_C; } +#endif for (int y = 0; y < height; ++y) { I420ToARGBRow(src_y, src_u, src_v, row, width); @@ -1080,7 +1053,6 @@ int ConvertFromI420(const uint8* y, int y_stride, uint8* dst_sample, int dst_sample_stride, int width, int height, uint32 format) { - if (y == NULL || u == NULL || v == NULL || dst_sample == NULL) { return -1; } diff --git a/source/format_conversion.cc b/source/format_conversion.cc index 7dfbfdb84..46d7e7e23 100644 --- a/source/format_conversion.cc +++ b/source/format_conversion.cc @@ -48,7 +48,7 @@ static void ARGBToBayerRow_SSSE3(const uint8* src_argb, } } -#elif (defined(__x86_64__) || defined(__i386__)) && !defined(YUV_DISABLE_ASM) +#elif defined(__x86_64__) || defined(__i386__) && !defined(YUV_DISABLE_ASM) #define HAS_ARGBTOBAYERROW_SSSE3 static void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer, @@ -126,7 +126,7 @@ static int MakeSelectors(const int blue_index, index_map[1] = GenerateSelector(blue_index, green_index); break; default: - return -1; // Bad FourCC + return -1; // Bad FourCC } return 0; } @@ -141,25 +141,22 @@ int ARGBToBayer(const uint8* src_argb, int src_stride_argb, src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } - void (*ARGBToBayerRow)(const uint8* src_argb, - uint8* dst_bayer, uint32 selector, int pix); + void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer, + uint32 selector, int pix) = ARGBToBayerRow_C; #if defined(HAS_ARGBTOBAYERROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4) && IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) { ARGBToBayerRow = ARGBToBayerRow_SSSE3; - } else -#endif - { - ARGBToBayerRow = ARGBToBayerRow_C; } +#endif const int blue_index = 0; // Offsets for ARGB format const int green_index = 1; const int red_index = 2; uint32 index_map[2]; if (MakeSelectors(blue_index, green_index, red_index, dst_fourcc_bayer, index_map)) { - return -1; // Bad FourCC + return -1; // Bad FourCC } for (int y = 0; y < height; ++y) { @@ -170,7 +167,7 @@ int ARGBToBayer(const uint8* src_argb, int src_stride_argb, return 0; } -#define AVG(a,b) (((a) + (b)) >> 1) +#define AVG(a, b) (((a) + (b)) >> 1) static void BayerRowBG(const uint8* src_bayer0, int src_stride_bayer, uint8* dst_argb, int pix) { @@ -369,9 +366,10 @@ int BayerToI420(const uint8* src_bayer, int src_stride_bayer, uint8* dst_argb, int pix); void (*BayerRow1)(const uint8* src_bayer, int src_stride_bayer, uint8* dst_argb, int pix); - void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix); + void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = + ARGBToYRow_C; void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); + uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C; SIMD_ALIGNED(uint8 row[kMaxStride * 2]); #if defined(HAS_ARGBTOYROW_SSSE3) @@ -379,19 +377,13 @@ int BayerToI420(const uint8* src_bayer, int src_stride_bayer, IS_ALIGNED(width, 16) && IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { ARGBToYRow = ARGBToYRow_SSSE3; - } else -#endif - { - ARGBToYRow = ARGBToYRow_C; } +#endif #if defined(HAS_ARGBTOUVROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16)) { ARGBToUVRow = ARGBToUVRow_SSSE3; - } else -#endif - { - ARGBToUVRow = ARGBToUVRow_C; } +#endif switch (src_fourcc_bayer) { case FOURCC_BGGR: @@ -411,7 +403,7 @@ int BayerToI420(const uint8* src_bayer, int src_stride_bayer, BayerRow1 = BayerRowGB; break; default: - return -1; // Bad FourCC + return -1; // Bad FourCC } for (int y = 0; y < height - 1; y += 2) { @@ -453,33 +445,27 @@ int I420ToBayer(const uint8* src_y, int src_stride_y, src_stride_v = -src_stride_v; } void (*I420ToARGBRow)(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width); + const uint8* u_buf, + const uint8* v_buf, + uint8* rgb_buf, + int width) = I420ToARGBRow_C; #if defined(HAS_I420TOARGBROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { I420ToARGBRow = I420ToARGBRow_NEON; - } else + } #elif defined(HAS_I420TOARGBROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { I420ToARGBRow = I420ToARGBRow_SSSE3; - } else -#endif - { - I420ToARGBRow = I420ToARGBRow_C; } +#endif SIMD_ALIGNED(uint8 row[kMaxStride]); - void (*ARGBToBayerRow)(const uint8* src_argb, - uint8* dst_bayer, uint32 selector, int pix); + void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer, + uint32 selector, int pix) = ARGBToBayerRow_C; #if defined(HAS_ARGBTOBAYERROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4)) { ARGBToBayerRow = ARGBToBayerRow_SSSE3; - } else -#endif - { - ARGBToBayerRow = ARGBToBayerRow_C; } +#endif const int blue_index = 0; // Offsets for ARGB format const int green_index = 1; const int red_index = 2; diff --git a/source/rotate.cc b/source/rotate.cc index 4b11683c2..f5f9075c3 100644 --- a/source/rotate.cc +++ b/source/rotate.cc @@ -29,7 +29,7 @@ extern "C" { ".private_extern _" #name " \n" \ ".align 4,0x90 \n" \ "_" #name ": \n" -#elif (defined(__MINGW32__) || defined(__CYGWIN__)) && defined(__i386__) +#elif defined(__MINGW32__) || defined(__CYGWIN__) && defined(__i386__) #define DECLARE_FUNCTION(name) \ ".text \n" \ ".align 4,0x90 \n" \ @@ -42,16 +42,6 @@ extern "C" { #endif #endif -typedef void (*mirror_uv_func)(const uint8*, uint8*, uint8*, int); -typedef void (*rotate_uv_wx8_func)(const uint8*, int, - uint8*, int, - uint8*, int, int); -typedef void (*rotate_uv_wxh_func)(const uint8*, int, - uint8*, int, - uint8*, int, int, int); -typedef void (*rotate_wx8_func)(const uint8*, int, uint8*, int, int); -typedef void (*rotate_wxh_func)(const uint8*, int, uint8*, int, int, int); - #ifdef __ARM_NEON__ #define HAS_MIRRORROW_NEON void MirrorRow_NEON(const uint8* src, uint8* dst, int width); @@ -291,7 +281,7 @@ static void TransposeUVWx8_SSE2(const uint8* src, int src_stride, ret } } -#elif (defined(__i386__) || defined(__x86_64__)) && !defined(YUV_DISABLE_ASM) +#elif defined(__i386__) || defined(__x86_64__) && !defined(YUV_DISABLE_ASM) #define HAS_TRANSPOSE_WX8_SSSE3 static void TransposeWx8_SSSE3(const uint8* src, int src_stride, uint8* dst, int dst_stride, int width) { @@ -501,7 +491,7 @@ extern "C" void TransposeUVWx8_SSE2(const uint8* src, int src_stride, "pop %ebx \n" "ret \n" ); -#elif defined (__x86_64__) +#elif defined(__x86_64__) // 64 bit version has enough registers to do 16x8 to 8x16 at a time. #define HAS_TRANSPOSE_WX8_FAST_SSSE3 static void TransposeWx8_FAST_SSSE3(const uint8* src, int src_stride, @@ -781,45 +771,37 @@ static void TransposeWxH_C(const uint8* src, int src_stride, void TransposePlane(const uint8* src, int src_stride, uint8* dst, int dst_stride, int width, int height) { - int i = height; - rotate_wx8_func TransposeWx8; - rotate_wxh_func TransposeWxH; - + void (*TransposeWx8)(const uint8* src, int src_stride, + uint8* dst, int dst_stride, + int width) = TransposeWx8_C; #if defined(HAS_TRANSPOSE_WX8_NEON) if (TestCpuFlag(kCpuHasNEON)) { TransposeWx8 = TransposeWx8_NEON; - TransposeWxH = TransposeWxH_C; - } else + } +#endif +#if defined(HAS_TRANSPOSE_WX8_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) { + TransposeWx8 = TransposeWx8_SSSE3; + } #endif #if defined(HAS_TRANSPOSE_WX8_FAST_SSSE3) if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) && IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) { TransposeWx8 = TransposeWx8_FAST_SSSE3; - TransposeWxH = TransposeWxH_C; - } else -#endif -#if defined(HAS_TRANSPOSE_WX8_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) { - TransposeWx8 = TransposeWx8_SSSE3; - TransposeWxH = TransposeWxH_C; - } else -#endif - { - TransposeWx8 = TransposeWx8_C; - TransposeWxH = TransposeWxH_C; } +#endif - // work across the source in 8x8 tiles + // Work across the source in 8x8 tiles + int i = height; while (i >= 8) { TransposeWx8(src, src_stride, dst, dst_stride, width); - - src += 8 * src_stride; // go down 8 rows - dst += 8; // move over 8 columns - i -= 8; + src += 8 * src_stride; // Go down 8 rows. + dst += 8; // Move over 8 columns. + i -= 8; } - TransposeWxH(src, src_stride, dst, dst_stride, width, i); + TransposeWxH_C(src, src_stride, dst, dst_stride, width, i); } void RotatePlane90(const uint8* src, int src_stride, @@ -830,7 +812,6 @@ void RotatePlane90(const uint8* src, int src_stride, // of the buffer and flip the sign of the source stride. src += src_stride * (height - 1); src_stride = -src_stride; - TransposePlane(src, src_stride, dst, dst_stride, width, height); } @@ -842,26 +823,17 @@ void RotatePlane270(const uint8* src, int src_stride, // of the buffer and flip the sign of the destination stride. dst += dst_stride * (width - 1); dst_stride = -dst_stride; - TransposePlane(src, src_stride, dst, dst_stride, width, height); } void RotatePlane180(const uint8* src, int src_stride, uint8* dst, int dst_stride, int width, int height) { - void (*MirrorRow)(const uint8* src, uint8* dst, int width); + void (*MirrorRow)(const uint8* src, uint8* dst, int width) = MirrorRow_C; #if defined(HAS_MIRRORROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { MirrorRow = MirrorRow_NEON; - } else -#endif -#if defined(HAS_MIRRORROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && - IS_ALIGNED(width, 16) && - IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) && - IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) { - MirrorRow = MirrorRow_SSSE3; - } else + } #endif #if defined(HAS_MIRRORROW_SSE2) if (TestCpuFlag(kCpuHasSSE2) && @@ -869,11 +841,16 @@ void RotatePlane180(const uint8* src, int src_stride, IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) && IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) { MirrorRow = MirrorRow_SSE2; - } else -#endif - { - MirrorRow = MirrorRow_C; } +#endif +#if defined(HAS_MIRRORROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3) && + IS_ALIGNED(width, 16) && + IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) && + IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) { + MirrorRow = MirrorRow_SSSE3; + } +#endif void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C; #if defined(HAS_COPYROW_NEON) if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 64)) { @@ -915,8 +892,7 @@ static void TransposeUVWx8_C(const uint8* src, int src_stride, uint8* dst_a, int dst_stride_a, uint8* dst_b, int dst_stride_b, int width) { - int i; - for (i = 0; i < width; ++i) { + for (int i = 0; i < width; ++i) { dst_a[0] = src[0 * src_stride + 0]; dst_b[0] = src[0 * src_stride + 1]; dst_a[1] = src[1 * src_stride + 0]; @@ -943,9 +919,8 @@ static void TransposeUVWxH_C(const uint8* src, int src_stride, uint8* dst_a, int dst_stride_a, uint8* dst_b, int dst_stride_b, int width, int height) { - int i, j; - for (i = 0; i < width * 2; i += 2) - for (j = 0; j < height; ++j) { + for (int i = 0; i < width * 2; i += 2) + for (int j = 0; j < height; ++j) { dst_a[j + ((i >> 1) * dst_stride_a)] = src[i + (j * src_stride)]; dst_b[j + ((i >> 1) * dst_stride_b)] = src[i + (j * src_stride) + 1]; } @@ -955,47 +930,39 @@ void TransposeUV(const uint8* src, int src_stride, uint8* dst_a, int dst_stride_a, uint8* dst_b, int dst_stride_b, int width, int height) { - int i = height; - rotate_uv_wx8_func TransposeWx8; - rotate_uv_wxh_func TransposeWxH; - + void (*TransposeUVWx8)(const uint8* src, int src_stride, + uint8* dst_a, int dst_stride_a, + uint8* dst_b, int dst_stride_b, + int width) = TransposeUVWx8_C; #if defined(HAS_TRANSPOSE_UVWX8_NEON) if (TestCpuFlag(kCpuHasNEON)) { - TransposeWx8 = TransposeUVWx8_NEON; - TransposeWxH = TransposeUVWxH_C; - } else -#endif -#if defined(HAS_TRANSPOSE_UVWX8_SSE2) + TransposeUVWx8 = TransposeUVWx8_NEON; + } +#elif defined(HAS_TRANSPOSE_UVWX8_SSE2) if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 8) && IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) { - TransposeWx8 = TransposeUVWx8_SSE2; - TransposeWxH = TransposeUVWxH_C; - } else + TransposeUVWx8 = TransposeUVWx8_SSE2; + } #endif - { - TransposeWx8 = TransposeUVWx8_C; - TransposeWxH = TransposeUVWxH_C; - } - // work through the source in 8x8 tiles + // Work through the source in 8x8 tiles. + int i = height; while (i >= 8) { - TransposeWx8(src, src_stride, - dst_a, dst_stride_a, - dst_b, dst_stride_b, - width); - - src += 8 * src_stride; // go down 8 rows - dst_a += 8; // move over 8 columns - dst_b += 8; // move over 8 columns - i -= 8; + TransposeUVWx8(src, src_stride, + dst_a, dst_stride_a, + dst_b, dst_stride_b, + width); + src += 8 * src_stride; // Go down 8 rows. + dst_a += 8; // Move over 8 columns. + dst_b += 8; // Move over 8 columns. + i -= 8; } - TransposeWxH(src, src_stride, - dst_a, dst_stride_a, - dst_b, dst_stride_b, - width, i); - + TransposeUVWxH_C(src, src_stride, + dst_a, dst_stride_a, + dst_b, dst_stride_b, + width, i); } void RotateUV90(const uint8* src, int src_stride, @@ -1031,29 +998,25 @@ void RotateUV180(const uint8* src, int src_stride, uint8* dst_a, int dst_stride_a, uint8* dst_b, int dst_stride_b, int width, int height) { - mirror_uv_func MirrorRow; - + void (*MirrorRowUV)(const uint8* src, uint8* dst_u, uint8* dst_v, int width) = + MirrorRowUV_C; #if defined(HAS_MIRRORROW_UV_NEON) if (TestCpuFlag(kCpuHasNEON)) { - MirrorRow = MirrorRowUV_NEON; - } else -#endif -#if defined(HAS_MIRRORROW_UV_SSSE3) + MirrorRowUV = MirrorRowUV_NEON; + } +#elif defined(HAS_MIRRORROW_UV_SSSE3) if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) && IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) { - MirrorRow = MirrorRowUV_SSSE3; - } else -#endif - { - MirrorRow = MirrorRowUV_C; + MirrorRowUV = MirrorRowUV_SSSE3; } +#endif dst_a += dst_stride_a * (height - 1); dst_b += dst_stride_b * (height - 1); for (int i = 0; i < height; ++i) { - MirrorRow(src, dst_a, dst_b, width); + MirrorRowUV(src, dst_a, dst_b, width); src += src_stride; dst_a -= dst_stride_a; dst_b -= dst_stride_b; diff --git a/source/row_common.cc b/source/row_common.cc index bf4bc15fe..68b6682cf 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -471,7 +471,7 @@ void ARGBBlendRow_C(const uint8* src_argb, uint8* dst_argb, int width) { dst_argb[3] = 255u; } else { *reinterpret_cast(dst_argb) = - *reinterpret_cast(src_argb); + *reinterpret_cast(src_argb); } } a = src_argb[4 + 3]; @@ -489,7 +489,7 @@ void ARGBBlendRow_C(const uint8* src_argb, uint8* dst_argb, int width) { dst_argb[4 + 3] = 255u; } else { *reinterpret_cast(dst_argb + 4) = - *reinterpret_cast(src_argb + 4); + *reinterpret_cast(src_argb + 4); } } src_argb += 8; @@ -512,7 +512,7 @@ void ARGBBlendRow_C(const uint8* src_argb, uint8* dst_argb, int width) { dst_argb[3] = 255u; } else { *reinterpret_cast(dst_argb) = - *reinterpret_cast(src_argb); + *reinterpret_cast(src_argb); } } } diff --git a/source/row_posix.cc b/source/row_posix.cc index 06ec5847a..709913ab0 100644 --- a/source/row_posix.cc +++ b/source/row_posix.cc @@ -105,7 +105,7 @@ CONST uvec8 kShuffleMaskARGBToRGB24 = { // Shuffle table for converting ARGB to RAW. CONST uvec8 kShuffleMaskARGBToRAW = { - 2u, 1u,0u, 6u, 5u, 4u, 10u, 9u, 8u, 14u, 13u, 12u, 128u, 128u, 128u, 128u + 2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 8u, 14u, 13u, 12u, 128u, 128u, 128u, 128u }; void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) { @@ -1728,6 +1728,7 @@ void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2, ); } + void YUY2ToYRow_Unaligned_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix) { asm volatile ( diff --git a/source/row_win.cc b/source/row_win.cc index 3b86c2755..06f361e73 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -96,7 +96,7 @@ static const uvec8 kShuffleMaskARGBToRGB24 = { // Shuffle table for converting ARGB to RAW. static const uvec8 kShuffleMaskARGBToRAW = { - 2u, 1u,0u, 6u, 5u, 4u, 10u, 9u, 8u, 14u, 13u, 12u, 128u, 128u, 128u, 128u + 2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 8u, 14u, 13u, 12u, 128u, 128u, 128u, 128u }; __declspec(naked) @@ -1252,7 +1252,7 @@ static const vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR }; __asm psubw xmm1, kUVBiasG \ __asm psubw xmm2, kUVBiasR \ /* Step 2: Find Y contribution to 8 R,G,B values */ \ - __asm movq xmm3, qword ptr [eax] \ + __asm movq xmm3, qword ptr [eax] /* NOLINT */ \ __asm lea eax, [eax + 8] \ __asm punpcklbw xmm3, xmm4 \ __asm psubsw xmm3, kYSub16 \ diff --git a/source/scale.cc b/source/scale.cc index 315ae0ffc..bd9127095 100644 --- a/source/scale.cc +++ b/source/scale.cc @@ -269,14 +269,14 @@ static void ScaleRowDown34_1_Int_NEON(const uint8* src_ptr, int src_stride, } #define HAS_SCALEROWDOWN38_NEON -const uint8 shuf38[16] __attribute__ ((aligned(16))) = +const uvec8 shuf38 = { 0, 3, 6, 8, 11, 14, 16, 19, 22, 24, 27, 30, 0, 0, 0, 0 }; -const uint8 shuf38_2[16] __attribute__ ((aligned(16))) = +const uvec8 shuf38_2 = { 0, 8, 16, 2, 10, 17, 4, 12, 18, 6, 14, 19, 0, 0, 0, 0 }; -const unsigned short mult38_div6[8] __attribute__ ((aligned(16))) = +const vec16 mult38_div6 = { 65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12 }; -const unsigned short mult38_div9[8] __attribute__ ((aligned(16))) = +const vec16 mult38_div9 = { 65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18 }; @@ -566,11 +566,11 @@ static void ScaleFilterRows_NEON(uint8* dst_ptr, */ // Constants for SSE2 code -#elif (defined(_M_IX86) || defined(__i386__) || defined(__x86_64__)) && \ +#elif defined(_M_IX86) || defined(__i386__) || defined(__x86_64__) && \ !defined(YUV_DISABLE_ASM) #if defined(_MSC_VER) #define TALIGN16(t, var) __declspec(align(16)) t _ ## var -#elif (defined(__APPLE__) || defined(__MINGW32__) || defined(__CYGWIN__)) && \ +#elif defined(__APPLE__) || defined(__MINGW32__) || defined(__CYGWIN__) && \ defined(__i386__) #define TALIGN16(t, var) t var __attribute__((aligned(16))) #else @@ -583,7 +583,7 @@ static void ScaleFilterRows_NEON(uint8* dst_ptr, ".private_extern _" #name " \n" \ ".align 4,0x90 \n" \ "_" #name ": \n" -#elif (defined(__MINGW32__) || defined(__CYGWIN__)) && defined(__i386__) +#elif defined(__MINGW32__) || defined(__CYGWIN__) && defined(__i386__) #define DECLARE_FUNCTION(name) \ ".text \n" \ ".align 4,0x90 \n" \ @@ -1547,7 +1547,7 @@ static void ScaleFilterCols34_SSSE3(uint8* dst_ptr, const uint8* src_ptr, } } -#elif (defined(__x86_64__) || defined(__i386__)) && !defined(YUV_DISABLE_ASM) +#elif defined(__x86_64__) || defined(__i386__) && !defined(YUV_DISABLE_ASM) // GCC versions of row functions are verbatim conversions from Visual C. // Generated using gcc disassembly on Visual C object file: @@ -2095,7 +2095,7 @@ extern "C" void ScaleRowDown38_2_Int_SSSE3(const uint8* src_ptr, int src_stride, "popa \n" "ret \n" ); -#endif // __PIC__ +#endif // __PIC__ // Bilinear row filtering combines 16x2 -> 16x1. SSE2 version #define HAS_SCALEFILTERROWS_SSE2 @@ -2910,7 +2910,8 @@ static void ScaleFilterCols34_C(uint8* dst_ptr, const uint8* src_ptr, #endif // (1-f)a + fb can be replaced with a + f(b-a) -#define BLENDER(a, b, f) ((int)(a) + ((f) * ((int)(b) - (int)(a)) >> 16)) +#define BLENDER(a, b, f) (static_cast(a) + \ + ((f) * (static_cast(b) - static_cast(a)) >> 16)) static void ScaleFilterCols_C(uint8* dst_ptr, const uint8* src_ptr, int dst_width, int x, int dx) { @@ -3067,24 +3068,22 @@ static void ScalePlaneDown2(int src_width, int src_height, assert(IS_ALIGNED(src_width, 2)); assert(IS_ALIGNED(src_height, 2)); void (*ScaleRowDown2)(const uint8* src_ptr, int src_stride, - uint8* dst_ptr, int dst_width); + uint8* dst_ptr, int dst_width) = + filtering ? ScaleRowDown2Int_C : ScaleRowDown2_C; #if defined(HAS_SCALEROWDOWN2_NEON) if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 16)) { ScaleRowDown2 = filtering ? ScaleRowDown2Int_NEON : ScaleRowDown2_NEON; - } else -#endif -#if defined(HAS_SCALEROWDOWN2_SSE2) + } +#elif defined(HAS_SCALEROWDOWN2_SSE2) if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16) && IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) && IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) { ScaleRowDown2 = filtering ? ScaleRowDown2Int_SSE2 : ScaleRowDown2_SSE2; - } else -#endif - { - ScaleRowDown2 = filtering ? ScaleRowDown2Int_C : ScaleRowDown2_C; } +#endif + // TODO(fbarchard): Loop through source height to allow odd height. for (int y = 0; y < dst_height; ++y) { ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width); @@ -3107,23 +3106,21 @@ static void ScalePlaneDown4(int src_width, int src_height, assert(IS_ALIGNED(src_width, 4)); assert(IS_ALIGNED(src_height, 4)); void (*ScaleRowDown4)(const uint8* src_ptr, int src_stride, - uint8* dst_ptr, int dst_width); + uint8* dst_ptr, int dst_width) = + filtering ? ScaleRowDown4Int_C : ScaleRowDown4_C; #if defined(HAS_SCALEROWDOWN4_NEON) if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 4)) { ScaleRowDown4 = filtering ? ScaleRowDown4Int_NEON : ScaleRowDown4_NEON; - } else -#endif -#if defined(HAS_SCALEROWDOWN4_SSE2) + } +#elif defined(HAS_SCALEROWDOWN4_SSE2) if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) && IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) { ScaleRowDown4 = filtering ? ScaleRowDown4Int_SSE2 : ScaleRowDown4_SSE2; - } else -#endif - { - ScaleRowDown4 = filtering ? ScaleRowDown4Int_C : ScaleRowDown4_C; } +#endif + for (int y = 0; y < dst_height; ++y) { ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width); src_ptr += (src_stride << 2); @@ -3146,18 +3143,17 @@ static void ScalePlaneDown8(int src_width, int src_height, assert(IS_ALIGNED(src_width, 8)); assert(IS_ALIGNED(src_height, 8)); void (*ScaleRowDown8)(const uint8* src_ptr, int src_stride, - uint8* dst_ptr, int dst_width); + uint8* dst_ptr, int dst_width) = + filtering && (dst_width <= kMaxOutputWidth) ? + ScaleRowDown8Int_C : ScaleRowDown8_C; #if defined(HAS_SCALEROWDOWN8_SSE2) if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) && IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) { ScaleRowDown8 = filtering ? ScaleRowDown8Int_SSE2 : ScaleRowDown8_SSE2; - } else -#endif - { - ScaleRowDown8 = filtering && (dst_width <= kMaxOutputWidth) ? - ScaleRowDown8Int_C : ScaleRowDown8_C; } +#endif + for (int y = 0; y < dst_height; ++y) { ScaleRowDown8(src_ptr, src_stride, dst_ptr, dst_width); src_ptr += (src_stride << 3); @@ -3181,6 +3177,13 @@ static void ScalePlaneDown34(int src_width, int src_height, uint8* dst_ptr, int dst_width); void (*ScaleRowDown34_1)(const uint8* src_ptr, int src_stride, uint8* dst_ptr, int dst_width); + if (!filtering) { + ScaleRowDown34_0 = ScaleRowDown34_C; + ScaleRowDown34_1 = ScaleRowDown34_C; + } else { + ScaleRowDown34_0 = ScaleRowDown34_0_Int_C; + ScaleRowDown34_1 = ScaleRowDown34_1_Int_C; + } #if defined(HAS_SCALEROWDOWN34_NEON) if (TestCpuFlag(kCpuHasNEON) && (dst_width % 24 == 0)) { if (!filtering) { @@ -3190,7 +3193,14 @@ static void ScalePlaneDown34(int src_width, int src_height, ScaleRowDown34_0 = ScaleRowDown34_0_Int_NEON; ScaleRowDown34_1 = ScaleRowDown34_1_Int_NEON; } - } else + } +#endif +#if defined(HAS_SCALEROWDOWN34_SSE2) + if (TestCpuFlag(kCpuHasSSE2) && (dst_width % 24 == 0) && + IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) && filtering) { + ScaleRowDown34_0 = ScaleRowDown34_0_Int_SSE2; + ScaleRowDown34_1 = ScaleRowDown34_1_Int_SSE2; + } #endif #if defined(HAS_SCALEROWDOWN34_SSSE3) if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) && @@ -3202,24 +3212,8 @@ static void ScalePlaneDown34(int src_width, int src_height, ScaleRowDown34_0 = ScaleRowDown34_0_Int_SSSE3; ScaleRowDown34_1 = ScaleRowDown34_1_Int_SSSE3; } - } else -#endif -#if defined(HAS_SCALEROWDOWN34_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && (dst_width % 24 == 0) && - IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) && filtering) { - ScaleRowDown34_0 = ScaleRowDown34_0_Int_SSE2; - ScaleRowDown34_1 = ScaleRowDown34_1_Int_SSE2; - } else -#endif - { - if (!filtering) { - ScaleRowDown34_0 = ScaleRowDown34_C; - ScaleRowDown34_1 = ScaleRowDown34_C; - } else { - ScaleRowDown34_0 = ScaleRowDown34_0_Int_C; - ScaleRowDown34_1 = ScaleRowDown34_1_Int_C; - } } +#endif for (int y = 0; y < dst_height - 2; y += 3) { ScaleRowDown34_0(src_ptr, src_stride, dst_ptr, dst_width); @@ -3272,6 +3266,13 @@ static void ScalePlaneDown38(int src_width, int src_height, uint8* dst_ptr, int dst_width); void (*ScaleRowDown38_2)(const uint8* src_ptr, int src_stride, uint8* dst_ptr, int dst_width); + if (!filtering) { + ScaleRowDown38_3 = ScaleRowDown38_C; + ScaleRowDown38_2 = ScaleRowDown38_C; + } else { + ScaleRowDown38_3 = ScaleRowDown38_3_Int_C; + ScaleRowDown38_2 = ScaleRowDown38_2_Int_C; + } #if defined(HAS_SCALEROWDOWN38_NEON) if (TestCpuFlag(kCpuHasNEON) && (dst_width % 12 == 0)) { if (!filtering) { @@ -3281,9 +3282,8 @@ static void ScalePlaneDown38(int src_width, int src_height, ScaleRowDown38_3 = ScaleRowDown38_3_Int_NEON; ScaleRowDown38_2 = ScaleRowDown38_2_Int_NEON; } - } else -#endif -#if defined(HAS_SCALEROWDOWN38_SSSE3) + } +#elif defined(HAS_SCALEROWDOWN38_SSSE3) if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) && IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) { if (!filtering) { @@ -3293,17 +3293,9 @@ static void ScalePlaneDown38(int src_width, int src_height, ScaleRowDown38_3 = ScaleRowDown38_3_Int_SSSE3; ScaleRowDown38_2 = ScaleRowDown38_2_Int_SSSE3; } - } else -#endif - { - if (!filtering) { - ScaleRowDown38_3 = ScaleRowDown38_C; - ScaleRowDown38_2 = ScaleRowDown38_C; - } else { - ScaleRowDown38_3 = ScaleRowDown38_3_Int_C; - ScaleRowDown38_2 = ScaleRowDown38_2_Int_C; - } } +#endif + for (int y = 0; y < dst_height - 2; y += 3) { ScaleRowDown38_3(src_ptr, src_stride, dst_ptr, dst_width); src_ptr += src_stride * 3; @@ -3426,23 +3418,21 @@ static void ScalePlaneBox(int src_width, int src_height, } else { ALIGN16(uint16 row[kMaxInputWidth]); void (*ScaleAddRows)(const uint8* src_ptr, int src_stride, - uint16* dst_ptr, int src_width, int src_height); + uint16* dst_ptr, int src_width, int src_height)= + ScaleAddRows_C; void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx, const uint16* src_ptr, uint8* dst_ptr); -#if defined(HAS_SCALEADDROWS_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && - IS_ALIGNED(src_stride, 16) && IS_ALIGNED(src_ptr, 16)) { - ScaleAddRows = ScaleAddRows_SSE2; - } else -#endif - { - ScaleAddRows = ScaleAddRows_C; - } if (dx & 0xffff) { ScaleAddCols = ScaleAddCols2_C; } else { ScaleAddCols = ScaleAddCols1_C; } +#if defined(HAS_SCALEADDROWS_SSE2) + if (TestCpuFlag(kCpuHasSSE2) && + IS_ALIGNED(src_stride, 16) && IS_ALIGNED(src_ptr, 16)) { + ScaleAddRows = ScaleAddRows_SSE2; + } +#endif for (int j = 0; j < dst_height; ++j) { int iy = y >> 16; @@ -3518,27 +3508,25 @@ void ScalePlaneBilinear(int src_width, int src_height, ALIGN16(uint8 row[kMaxInputWidth + 1]); void (*ScaleFilterRows)(uint8* dst_ptr, const uint8* src_ptr, int src_stride, - int dst_width, int source_y_fraction); + int dst_width, int source_y_fraction) = + ScaleFilterRows_C; #if defined(HAS_SCALEFILTERROWS_NEON) if (TestCpuFlag(kCpuHasNEON)) { ScaleFilterRows = ScaleFilterRows_NEON; - } else -#endif -#if defined(HAS_SCALEFILTERROWS_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && - IS_ALIGNED(src_stride, 16) && IS_ALIGNED(src_ptr, 16)) { - ScaleFilterRows = ScaleFilterRows_SSSE3; - } else + } #endif #if defined(HAS_SCALEFILTERROWS_SSE2) if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(src_stride, 16) && IS_ALIGNED(src_ptr, 16)) { ScaleFilterRows = ScaleFilterRows_SSE2; - } else -#endif - { - ScaleFilterRows = ScaleFilterRows_C; } +#endif +#if defined(HAS_SCALEFILTERROWS_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3) && + IS_ALIGNED(src_stride, 16) && IS_ALIGNED(src_ptr, 16)) { + ScaleFilterRows = ScaleFilterRows_SSSE3; + } +#endif int dx = (src_width << 16) / dst_width; int dy = (src_height << 16) / dst_height; @@ -3645,7 +3633,7 @@ void ScalePlane(const uint8* src, int src_stride, // environment variable overrides for testing. char *filter_override = getenv("LIBYUV_FILTER"); if (filter_override) { - filtering = (FilterMode)atoi(filter_override); + filtering = (FilterMode)atoi(filter_override); // NOLINT } #endif // Use specialized scales to improve performance for common resolutions.