diff --git a/include/libyuv/planar_functions.h b/include/libyuv/planar_functions.h index bf7a92a84..30e7cb5f9 100644 --- a/include/libyuv/planar_functions.h +++ b/include/libyuv/planar_functions.h @@ -160,12 +160,12 @@ int I422ToUYVY(const uint8* src_y, int src_stride_y, uint8* dst_frame, int dst_stride_frame, int width, int height); -// Convert unattentuated ARGB values to preattenuated ARGB. +// Convert unattentuated ARGB to preattenuated ARGB. int ARGBAttenuate(const uint8* src_argb, int src_stride_argb, uint8* dst_argb, int dst_stride_argb, int width, int height); -// Convert preattentuated ARGB values to unattenuated ARGB. +// Convert preattentuated ARGB to unattenuated ARGB. int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb, uint8* dst_argb, int dst_stride_argb, int width, int height); diff --git a/source/planar_functions.cc b/source/planar_functions.cc index 0e0b2428b..406c97aa6 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -10,7 +10,6 @@ #include "libyuv/planar_functions.h" -#include // printf() #include // for memset() #include "libyuv/cpu_id.h" @@ -144,9 +143,10 @@ int ARGBCopy(const uint8* src_argb, int src_stride_argb, ARGBBlendRow GetARGBBlend(uint8* dst_argb, int dst_stride_argb, int width) { void (*ARGBBlendRow)(const uint8* src_argb, const uint8* src_argb1, uint8* dst_argb, int width) = ARGBBlendRow_C; -#if defined(HAS_ARGBBLENDROW_SSE2) +#if defined(HAS_ARGBBLENDROW1_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { ARGBBlendRow = ARGBBlendRow1_SSE2; +#if defined(HAS_ARGBBLENDROW_SSE2) if (width >= 4) { ARGBBlendRow = ARGBBlendRow_Any_SSE2; if (IS_ALIGNED(width, 4) && @@ -154,6 +154,7 @@ ARGBBlendRow GetARGBBlend(uint8* dst_argb, int dst_stride_argb, int width) { ARGBBlendRow = ARGBBlendRow_Aligned_SSE2; } } +#endif } #endif #if defined(HAS_ARGBBLENDROW_SSSE3) @@ -864,7 +865,7 @@ int ARGBRect(uint8* dst_argb, int dst_stride_argb, return 0; } -// Convert unattentuated ARGB values to preattenuated ARGB. +// Convert unattentuated ARGB to preattenuated ARGB. // An unattenutated ARGB alpha blend uses the formula // p = a * f + (1 - a) * b // where @@ -910,7 +911,7 @@ int ARGBAttenuate(const uint8* src_argb, int src_stride_argb, return 0; } -// Convert unattentuated ARGB values to preattenuated ARGB. +// Convert preattentuated ARGB to unattenuated ARGB. int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb, uint8* dst_argb, int dst_stride_argb, int width, int height) { diff --git a/source/row.h b/source/row.h index 504175529..cb22aa554 100644 --- a/source/row.h +++ b/source/row.h @@ -25,50 +25,60 @@ extern "C" { #if defined(COVERAGE_ENABLED) || defined(TARGET_IPHONE_SIMULATOR) #define YUV_DISABLE_ASM #endif +// True if compiling for SSSE3 as a requirement. +#if defined(__SSSE3__) || (defined(_M_IX86_FP) && (_M_IX86_FP >= 3)) +#define LIBYUV_SSSE3_ONLY +#endif // The following are available on all x86 platforms #if !defined(YUV_DISABLE_ASM) && \ (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) - #define HAS_ABGRTOARGBROW_SSSE3 -#define HAS_BGRATOARGBROW_SSSE3 -#define HAS_RGB24TOARGBROW_SSSE3 -#define HAS_RAWTOARGBROW_SSSE3 -#define HAS_RGB565TOARGBROW_SSE2 +#define HAS_ABGRTOUVROW_SSSE3 +#define HAS_ABGRTOYROW_SSSE3 #define HAS_ARGB1555TOARGBROW_SSE2 #define HAS_ARGB4444TOARGBROW_SSE2 -#define HAS_ARGBTORGB24ROW_SSSE3 -#define HAS_ARGBTORAWROW_SSSE3 -#define HAS_ARGBTORGB565ROW_SSE2 +#define HAS_ARGBATTENUATE_SSSE3 +#define HAS_ARGBBLENDROW_SSSE3 +#define HAS_ARGBBLENDROW1_SSE2 #define HAS_ARGBTOARGB1555ROW_SSE2 #define HAS_ARGBTOARGB4444ROW_SSE2 -#define HAS_ARGBTOYROW_SSSE3 -#define HAS_BGRATOYROW_SSSE3 -#define HAS_ABGRTOYROW_SSSE3 +#define HAS_ARGBTORAWROW_SSSE3 +#define HAS_ARGBTORGB24ROW_SSSE3 +#define HAS_ARGBTORGB565ROW_SSE2 #define HAS_ARGBTOUVROW_SSSE3 +#define HAS_ARGBTOYROW_SSSE3 +#define HAS_ARGBUNATTENUATE_SSE2 +#define HAS_BGRATOARGBROW_SSSE3 #define HAS_BGRATOUVROW_SSSE3 -#define HAS_ABGRTOUVROW_SSSE3 -#define HAS_I400TOARGBROW_SSE2 -#define HAS_YTOARGBROW_SSE2 -#define HAS_I420TOARGBROW_SSSE3 -#define HAS_I420TOBGRAROW_SSSE3 -#define HAS_I420TOABGRROW_SSSE3 -#define HAS_I444TOARGBROW_SSSE3 -#define HAS_MIRRORROW_SSSE3 -#define HAS_MIRRORROW_SSE2 -#define HAS_MIRRORROWUV_SSSE3 -#define HAS_SPLITUV_SSE2 +#define HAS_BGRATOYROW_SSSE3 #define HAS_COPYROW_SSE2 #define HAS_COPYROW_X86 -#define HAS_YUY2TOYROW_SSE2 -#define HAS_UYVYTOYROW_SSE2 -#define HAS_YUY2TOUVROW_SSE2 +#define HAS_I400TOARGBROW_SSE2 +#define HAS_I420TOABGRROW_SSSE3 +#define HAS_I420TOARGBROW_SSSE3 +#define HAS_I420TOBGRAROW_SSSE3 +#define HAS_I444TOARGBROW_SSSE3 +#define HAS_MIRRORROW_SSSE3 +#define HAS_MIRRORROWUV_SSSE3 +#define HAS_RAWTOARGBROW_SSSE3 +#define HAS_RGB24TOARGBROW_SSSE3 +#define HAS_RGB565TOARGBROW_SSE2 +#define HAS_SPLITUV_SSE2 #define HAS_UYVYTOUVROW_SSE2 -#define HAS_ARGBBLENDROW_SSE2 -#define HAS_ARGBBLENDROW_SSSE3 +#define HAS_UYVYTOYROW_SSE2 +#define HAS_YTOARGBROW_SSE2 +#define HAS_YUY2TOUVROW_SSE2 +#define HAS_YUY2TOYROW_SSE2 +#endif + +// The following are available only useful when SSSE3 is unavailable. +#if !defined(YUV_DISABLE_ASM) && \ + (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) && \ + !defined(LIBYUV_SSSE3_ONLY) +#define HAS_MIRRORROW_SSE2 #define HAS_ARGBATTENUATE_SSE2 -#define HAS_ARGBATTENUATE_SSSE3 -#define HAS_ARGBUNATTENUATE_SSE2 +#define HAS_ARGBBLENDROW_SSE2 #endif // The following are available on Neon platforms diff --git a/source/row_posix.cc b/source/row_posix.cc index 8d25df0d6..2838585ae 100644 --- a/source/row_posix.cc +++ b/source/row_posix.cc @@ -2105,7 +2105,9 @@ void ARGBBlendRow_Aligned_SSE2(const uint8* src_argb0, const uint8* src_argb1, #endif ); } +#endif // HAS_ARGBBLENDROW_SSE2 +#ifdef HAS_ARGBBLENDROW1_SSE2 // Blend 1 pixel at a time, unaligned void ARGBBlendRow1_SSE2(const uint8* src_argb0, const uint8* src_argb1, uint8* dst_argb, int width) { @@ -2156,7 +2158,7 @@ void ARGBBlendRow1_SSE2(const uint8* src_argb0, const uint8* src_argb1, #endif ); } -#endif // HAS_ARGBBLENDROW_SSE2 +#endif // HAS_ARGBBLENDROW1_SSE2 #ifdef HAS_ARGBBLENDROW_SSSE3 // Shuffle table for reversing the bytes. diff --git a/source/row_win.cc b/source/row_win.cc index 6e9fd8f20..c9f46d4e1 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -2151,7 +2151,9 @@ void ARGBBlendRow_Aligned_SSE2(const uint8* src_argb0, const uint8* src_argb1, ret } } +#endif // HAS_ARGBBLENDROW_SSE2 +#ifdef HAS_ARGBBLENDROW1_SSE2 // Blend 1 pixel at a time, unaligned. __declspec(naked) __declspec(align(16)) void ARGBBlendRow1_SSE2(const uint8* src_argb0, const uint8* src_argb1, @@ -2202,8 +2204,8 @@ void ARGBBlendRow1_SSE2(const uint8* src_argb0, const uint8* src_argb1, ret } } +#endif // HAS_ARGBBLENDROW1_SSE2 -#endif // HAS_ARGBBLENDROW_SSE2 #ifdef HAS_ARGBBLENDROW_SSSE3 // Shuffle table for reversing the bytes. static const uvec8 kShuffleAlpha = {