SSE2 version of scale is suspected of having a bug, as it didnt work in ARGBInterpolate, while SSSE3 and C do. Until the result can be verified, SSE2 is disabled. This will hurt performance on Pentium4 and below.

BUG=49
TEST=none
Review URL: https://webrtc-codereview.appspot.com/672009

git-svn-id: http://libyuv.googlecode.com/svn/trunk@301 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
fbarchard@google.com 2012-07-13 00:55:11 +00:00
parent 0d95d47757
commit 942ad1f5f4
4 changed files with 17 additions and 22 deletions

View File

@ -1,6 +1,6 @@
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 300 Version: 301
License: BSD License: BSD
License File: LICENSE License File: LICENSE

View File

@ -1114,14 +1114,10 @@ int ARGBShade(const uint8* src_argb, int src_stride_argb,
#if !defined(YUV_DISABLE_ASM) && (defined(_M_IX86) || \ #if !defined(YUV_DISABLE_ASM) && (defined(_M_IX86) || \
(defined(__x86_64__) || defined(__i386__))) (defined(__x86_64__) || defined(__i386__)))
#define HAS_SCALEARGBFILTERROWS_SSE2
#define HAS_SCALEARGBFILTERROWS_SSSE3 #define HAS_SCALEARGBFILTERROWS_SSSE3
#endif #endif
void ScaleARGBFilterRows_C(uint8* dst_ptr, const uint8* src_ptr, int src_stride, void ScaleARGBFilterRows_C(uint8* dst_ptr, const uint8* src_ptr, int src_stride,
int dst_width, int source_y_fraction); int dst_width, int source_y_fraction);
void ScaleARGBFilterRows_SSE2(uint8* dst_ptr, const uint8* src_ptr,
int src_stride, int dst_width,
int source_y_fraction);
void ScaleARGBFilterRows_SSSE3(uint8* dst_ptr, const uint8* src_ptr, void ScaleARGBFilterRows_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
int src_stride, int dst_width, int src_stride, int dst_width,
int source_y_fraction); int source_y_fraction);
@ -1143,14 +1139,6 @@ int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0,
void (*ScaleARGBFilterRows)(uint8* dst_ptr, const uint8* src_ptr, void (*ScaleARGBFilterRows)(uint8* dst_ptr, const uint8* src_ptr,
int src_stride, int dst_width, int src_stride, int dst_width,
int source_y_fraction) = ScaleARGBFilterRows_C; int source_y_fraction) = ScaleARGBFilterRows_C;
#if defined(HAS_SCALEARGBFILTERROWS_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(src_argb0, 16) && IS_ALIGNED(src_stride_argb0, 16) &&
IS_ALIGNED(src_argb1, 16) && IS_ALIGNED(src_stride_argb1, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
ScaleARGBFilterRows = ScaleARGBFilterRows_SSE2;
}
#endif
#if defined(HAS_SCALEARGBFILTERROWS_SSSE3) #if defined(HAS_SCALEARGBFILTERROWS_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(src_argb0, 16) && IS_ALIGNED(src_stride_argb0, 16) && IS_ALIGNED(src_argb0, 16) && IS_ALIGNED(src_stride_argb0, 16) &&

View File

@ -31,7 +31,7 @@ extern "C" {
// NOT the optimized versions. Useful for debugging and // NOT the optimized versions. Useful for debugging and
// when comparing the quality of the resulting YUV planes // when comparing the quality of the resulting YUV planes
// as produced by the optimized and non-optimized versions. // as produced by the optimized and non-optimized versions.
#define SSE2_DISABLED 1
static bool use_reference_impl_ = false; static bool use_reference_impl_ = false;
void SetUseReferenceImpl(bool use) { void SetUseReferenceImpl(bool use) {
@ -1377,12 +1377,13 @@ static void ScaleAddRows_SSE2(const uint8* src_ptr, int src_stride,
} }
} }
#ifndef SSE2_DISABLED
// Bilinear row filtering combines 16x2 -> 16x1. SSE2 version. // Bilinear row filtering combines 16x2 -> 16x1. SSE2 version.
// Normal formula for bilinear interpolation is: // Normal formula for bilinear interpolation is:
// source_y_fraction * row1 + (1 - source_y_fraction) row0 // source_y_fraction * row1 + (1 - source_y_fraction) row0
// SSE2 version using the a single multiply of difference: // SSE2 version using the a single multiply of difference:
// source_y_fraction * (row1 - row0) + row0 // source_y_fraction * (row1 - row0) + row0
#define HAS_SCALEFILTERROWS_SSE2 #define HAS_SCALEFILTERROWS_SSE2_DISABLED
__declspec(naked) __declspec(align(16)) __declspec(naked) __declspec(align(16))
static void ScaleFilterRows_SSE2(uint8* dst_ptr, const uint8* src_ptr, static void ScaleFilterRows_SSE2(uint8* dst_ptr, const uint8* src_ptr,
int src_stride, int dst_width, int src_stride, int dst_width,
@ -1471,7 +1472,7 @@ static void ScaleFilterRows_SSE2(uint8* dst_ptr, const uint8* src_ptr,
ret ret
} }
} }
#endif // SSE2_DISABLED
// Bilinear row filtering combines 16x2 -> 16x1. SSSE3 version. // Bilinear row filtering combines 16x2 -> 16x1. SSSE3 version.
#define HAS_SCALEFILTERROWS_SSSE3 #define HAS_SCALEFILTERROWS_SSSE3
__declspec(naked) __declspec(align(16)) __declspec(naked) __declspec(align(16))
@ -2247,8 +2248,9 @@ static void ScaleAddRows_SSE2(const uint8* src_ptr, int src_stride,
); );
} }
#ifndef SSE2_DISABLED
// Bilinear row filtering combines 16x2 -> 16x1. SSE2 version // Bilinear row filtering combines 16x2 -> 16x1. SSE2 version
#define HAS_SCALEFILTERROWS_SSE2 #define HAS_SCALEFILTERROWS_SSE2_DISABLED
static void ScaleFilterRows_SSE2(uint8* dst_ptr, static void ScaleFilterRows_SSE2(uint8* dst_ptr,
const uint8* src_ptr, int src_stride, const uint8* src_ptr, int src_stride,
int dst_width, int source_y_fraction) { int dst_width, int source_y_fraction) {
@ -2318,6 +2320,7 @@ static void ScaleFilterRows_SSE2(uint8* dst_ptr,
#endif #endif
); );
} }
#endif // SSE2_DISABLED
// Bilinear row filtering combines 16x2 -> 16x1. SSSE3 version // Bilinear row filtering combines 16x2 -> 16x1. SSSE3 version
#define HAS_SCALEFILTERROWS_SSSE3 #define HAS_SCALEFILTERROWS_SSSE3
@ -2388,7 +2391,6 @@ static void ScaleFilterRows_SSSE3(uint8* dst_ptr,
#endif #endif
); );
} }
#endif // defined(__x86_64__) || defined(__i386__) #endif // defined(__x86_64__) || defined(__i386__)
// CPU agnostic row functions // CPU agnostic row functions
@ -2609,7 +2611,7 @@ static void ScaleFilterCols34_C(uint8* dst_ptr, const uint8* src_ptr,
} while (dst_ptr < dend); } while (dst_ptr < dend);
} }
#define HAS_SCALEROWDOWN34_SSE2 #define HAS_SCALEROWDOWN34_SSE2_DISABLED
// Filter rows 0 and 1 together, 3 : 1 // Filter rows 0 and 1 together, 3 : 1
static void ScaleRowDown34_0_Int_SSE2(const uint8* src_ptr, int src_stride, static void ScaleRowDown34_0_Int_SSE2(const uint8* src_ptr, int src_stride,
uint8* dst_ptr, int dst_width) { uint8* dst_ptr, int dst_width) {

View File

@ -28,6 +28,7 @@ extern "C" {
/** /**
* SSE2 downscalers with bilinear interpolation. * SSE2 downscalers with bilinear interpolation.
*/ */
#define SSE2_DISABLED 1
#if !defined(YUV_DISABLE_ASM) && defined(_M_IX86) #if !defined(YUV_DISABLE_ASM) && defined(_M_IX86)
@ -182,7 +183,8 @@ static void ScaleARGBRowDownEvenInt_SSE2(const uint8* src_ptr, int src_stride,
} }
// Bilinear row filtering combines 4x2 -> 4x1. SSE2 version. // Bilinear row filtering combines 4x2 -> 4x1. SSE2 version.
#define HAS_SCALEARGBFILTERROWS_SSE2 #ifndef SSE2_DISABLED
#define HAS_SCALEARGBFILTERROWS_SSE2_DISABLED
__declspec(naked) __declspec(align(16)) __declspec(naked) __declspec(align(16))
void ScaleARGBFilterRows_SSE2(uint8* dst_ptr, const uint8* src_ptr, void ScaleARGBFilterRows_SSE2(uint8* dst_ptr, const uint8* src_ptr,
int src_stride, int dst_width, int src_stride, int dst_width,
@ -267,6 +269,7 @@ void ScaleARGBFilterRows_SSE2(uint8* dst_ptr, const uint8* src_ptr,
ret ret
} }
} }
#endif // SSE2_DISABLED
// Bilinear row filtering combines 4x2 -> 4x1. SSSE3 version. // Bilinear row filtering combines 4x2 -> 4x1. SSSE3 version.
#define HAS_SCALEARGBFILTERROWS_SSSE3 #define HAS_SCALEARGBFILTERROWS_SSSE3
@ -497,8 +500,9 @@ static void ScaleARGBRowDownEvenInt_SSE2(const uint8* src_ptr, int src_stride,
); );
} }
#ifndef SSE2_DISABLED
// Bilinear row filtering combines 4x2 -> 4x1. SSE2 version // Bilinear row filtering combines 4x2 -> 4x1. SSE2 version
#define HAS_SCALEARGBFILTERROWS_SSE2 #define HAS_SCALEARGBFILTERROWS_SSE2_DISABLED
void ScaleARGBFilterRows_SSE2(uint8* dst_ptr, const uint8* src_ptr, void ScaleARGBFilterRows_SSE2(uint8* dst_ptr, const uint8* src_ptr,
int src_stride, int dst_width, int src_stride, int dst_width,
int source_y_fraction) { int source_y_fraction) {
@ -567,6 +571,7 @@ void ScaleARGBFilterRows_SSE2(uint8* dst_ptr, const uint8* src_ptr,
#endif #endif
); );
} }
#endif // SSE2_DISABLED
// Bilinear row filtering combines 4x2 -> 4x1. SSSE3 version // Bilinear row filtering combines 4x2 -> 4x1. SSSE3 version
#define HAS_SCALEARGBFILTERROWS_SSSE3 #define HAS_SCALEARGBFILTERROWS_SSSE3
@ -635,7 +640,7 @@ void ScaleARGBFilterRows_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
#endif #endif
); );
} }
#endif #endif // defined(__x86_64__) || defined(__i386__)
static void ScaleARGBRowDown2_C(const uint8* src_ptr, int, static void ScaleARGBRowDown2_C(const uint8* src_ptr, int,
uint8* dst_ptr, int dst_width) { uint8* dst_ptr, int dst_width) {