mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
SSE2 version of scale is suspected of having a bug, as it didnt work in ARGBInterpolate, while SSSE3 and C do. Until the result can be verified, SSE2 is disabled. This will hurt performance on Pentium4 and below.
BUG=49 TEST=none Review URL: https://webrtc-codereview.appspot.com/672009 git-svn-id: http://libyuv.googlecode.com/svn/trunk@301 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
0d95d47757
commit
942ad1f5f4
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: http://code.google.com/p/libyuv/
|
URL: http://code.google.com/p/libyuv/
|
||||||
Version: 300
|
Version: 301
|
||||||
License: BSD
|
License: BSD
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
|
|
||||||
|
|||||||
@ -1114,14 +1114,10 @@ int ARGBShade(const uint8* src_argb, int src_stride_argb,
|
|||||||
|
|
||||||
#if !defined(YUV_DISABLE_ASM) && (defined(_M_IX86) || \
|
#if !defined(YUV_DISABLE_ASM) && (defined(_M_IX86) || \
|
||||||
(defined(__x86_64__) || defined(__i386__)))
|
(defined(__x86_64__) || defined(__i386__)))
|
||||||
#define HAS_SCALEARGBFILTERROWS_SSE2
|
|
||||||
#define HAS_SCALEARGBFILTERROWS_SSSE3
|
#define HAS_SCALEARGBFILTERROWS_SSSE3
|
||||||
#endif
|
#endif
|
||||||
void ScaleARGBFilterRows_C(uint8* dst_ptr, const uint8* src_ptr, int src_stride,
|
void ScaleARGBFilterRows_C(uint8* dst_ptr, const uint8* src_ptr, int src_stride,
|
||||||
int dst_width, int source_y_fraction);
|
int dst_width, int source_y_fraction);
|
||||||
void ScaleARGBFilterRows_SSE2(uint8* dst_ptr, const uint8* src_ptr,
|
|
||||||
int src_stride, int dst_width,
|
|
||||||
int source_y_fraction);
|
|
||||||
void ScaleARGBFilterRows_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
|
void ScaleARGBFilterRows_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
|
||||||
int src_stride, int dst_width,
|
int src_stride, int dst_width,
|
||||||
int source_y_fraction);
|
int source_y_fraction);
|
||||||
@ -1143,14 +1139,6 @@ int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0,
|
|||||||
void (*ScaleARGBFilterRows)(uint8* dst_ptr, const uint8* src_ptr,
|
void (*ScaleARGBFilterRows)(uint8* dst_ptr, const uint8* src_ptr,
|
||||||
int src_stride, int dst_width,
|
int src_stride, int dst_width,
|
||||||
int source_y_fraction) = ScaleARGBFilterRows_C;
|
int source_y_fraction) = ScaleARGBFilterRows_C;
|
||||||
#if defined(HAS_SCALEARGBFILTERROWS_SSE2)
|
|
||||||
if (TestCpuFlag(kCpuHasSSE2) &&
|
|
||||||
IS_ALIGNED(src_argb0, 16) && IS_ALIGNED(src_stride_argb0, 16) &&
|
|
||||||
IS_ALIGNED(src_argb1, 16) && IS_ALIGNED(src_stride_argb1, 16) &&
|
|
||||||
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
|
|
||||||
ScaleARGBFilterRows = ScaleARGBFilterRows_SSE2;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
#if defined(HAS_SCALEARGBFILTERROWS_SSSE3)
|
#if defined(HAS_SCALEARGBFILTERROWS_SSSE3)
|
||||||
if (TestCpuFlag(kCpuHasSSSE3) &&
|
if (TestCpuFlag(kCpuHasSSSE3) &&
|
||||||
IS_ALIGNED(src_argb0, 16) && IS_ALIGNED(src_stride_argb0, 16) &&
|
IS_ALIGNED(src_argb0, 16) && IS_ALIGNED(src_stride_argb0, 16) &&
|
||||||
|
|||||||
@ -31,7 +31,7 @@ extern "C" {
|
|||||||
// NOT the optimized versions. Useful for debugging and
|
// NOT the optimized versions. Useful for debugging and
|
||||||
// when comparing the quality of the resulting YUV planes
|
// when comparing the quality of the resulting YUV planes
|
||||||
// as produced by the optimized and non-optimized versions.
|
// as produced by the optimized and non-optimized versions.
|
||||||
|
#define SSE2_DISABLED 1
|
||||||
static bool use_reference_impl_ = false;
|
static bool use_reference_impl_ = false;
|
||||||
|
|
||||||
void SetUseReferenceImpl(bool use) {
|
void SetUseReferenceImpl(bool use) {
|
||||||
@ -1377,12 +1377,13 @@ static void ScaleAddRows_SSE2(const uint8* src_ptr, int src_stride,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifndef SSE2_DISABLED
|
||||||
// Bilinear row filtering combines 16x2 -> 16x1. SSE2 version.
|
// Bilinear row filtering combines 16x2 -> 16x1. SSE2 version.
|
||||||
// Normal formula for bilinear interpolation is:
|
// Normal formula for bilinear interpolation is:
|
||||||
// source_y_fraction * row1 + (1 - source_y_fraction) row0
|
// source_y_fraction * row1 + (1 - source_y_fraction) row0
|
||||||
// SSE2 version using the a single multiply of difference:
|
// SSE2 version using the a single multiply of difference:
|
||||||
// source_y_fraction * (row1 - row0) + row0
|
// source_y_fraction * (row1 - row0) + row0
|
||||||
#define HAS_SCALEFILTERROWS_SSE2
|
#define HAS_SCALEFILTERROWS_SSE2_DISABLED
|
||||||
__declspec(naked) __declspec(align(16))
|
__declspec(naked) __declspec(align(16))
|
||||||
static void ScaleFilterRows_SSE2(uint8* dst_ptr, const uint8* src_ptr,
|
static void ScaleFilterRows_SSE2(uint8* dst_ptr, const uint8* src_ptr,
|
||||||
int src_stride, int dst_width,
|
int src_stride, int dst_width,
|
||||||
@ -1471,7 +1472,7 @@ static void ScaleFilterRows_SSE2(uint8* dst_ptr, const uint8* src_ptr,
|
|||||||
ret
|
ret
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#endif // SSE2_DISABLED
|
||||||
// Bilinear row filtering combines 16x2 -> 16x1. SSSE3 version.
|
// Bilinear row filtering combines 16x2 -> 16x1. SSSE3 version.
|
||||||
#define HAS_SCALEFILTERROWS_SSSE3
|
#define HAS_SCALEFILTERROWS_SSSE3
|
||||||
__declspec(naked) __declspec(align(16))
|
__declspec(naked) __declspec(align(16))
|
||||||
@ -2247,8 +2248,9 @@ static void ScaleAddRows_SSE2(const uint8* src_ptr, int src_stride,
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifndef SSE2_DISABLED
|
||||||
// Bilinear row filtering combines 16x2 -> 16x1. SSE2 version
|
// Bilinear row filtering combines 16x2 -> 16x1. SSE2 version
|
||||||
#define HAS_SCALEFILTERROWS_SSE2
|
#define HAS_SCALEFILTERROWS_SSE2_DISABLED
|
||||||
static void ScaleFilterRows_SSE2(uint8* dst_ptr,
|
static void ScaleFilterRows_SSE2(uint8* dst_ptr,
|
||||||
const uint8* src_ptr, int src_stride,
|
const uint8* src_ptr, int src_stride,
|
||||||
int dst_width, int source_y_fraction) {
|
int dst_width, int source_y_fraction) {
|
||||||
@ -2318,6 +2320,7 @@ static void ScaleFilterRows_SSE2(uint8* dst_ptr,
|
|||||||
#endif
|
#endif
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
#endif // SSE2_DISABLED
|
||||||
|
|
||||||
// Bilinear row filtering combines 16x2 -> 16x1. SSSE3 version
|
// Bilinear row filtering combines 16x2 -> 16x1. SSSE3 version
|
||||||
#define HAS_SCALEFILTERROWS_SSSE3
|
#define HAS_SCALEFILTERROWS_SSSE3
|
||||||
@ -2388,7 +2391,6 @@ static void ScaleFilterRows_SSSE3(uint8* dst_ptr,
|
|||||||
#endif
|
#endif
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // defined(__x86_64__) || defined(__i386__)
|
#endif // defined(__x86_64__) || defined(__i386__)
|
||||||
|
|
||||||
// CPU agnostic row functions
|
// CPU agnostic row functions
|
||||||
@ -2609,7 +2611,7 @@ static void ScaleFilterCols34_C(uint8* dst_ptr, const uint8* src_ptr,
|
|||||||
} while (dst_ptr < dend);
|
} while (dst_ptr < dend);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define HAS_SCALEROWDOWN34_SSE2
|
#define HAS_SCALEROWDOWN34_SSE2_DISABLED
|
||||||
// Filter rows 0 and 1 together, 3 : 1
|
// Filter rows 0 and 1 together, 3 : 1
|
||||||
static void ScaleRowDown34_0_Int_SSE2(const uint8* src_ptr, int src_stride,
|
static void ScaleRowDown34_0_Int_SSE2(const uint8* src_ptr, int src_stride,
|
||||||
uint8* dst_ptr, int dst_width) {
|
uint8* dst_ptr, int dst_width) {
|
||||||
|
|||||||
@ -28,6 +28,7 @@ extern "C" {
|
|||||||
/**
|
/**
|
||||||
* SSE2 downscalers with bilinear interpolation.
|
* SSE2 downscalers with bilinear interpolation.
|
||||||
*/
|
*/
|
||||||
|
#define SSE2_DISABLED 1
|
||||||
|
|
||||||
#if !defined(YUV_DISABLE_ASM) && defined(_M_IX86)
|
#if !defined(YUV_DISABLE_ASM) && defined(_M_IX86)
|
||||||
|
|
||||||
@ -182,7 +183,8 @@ static void ScaleARGBRowDownEvenInt_SSE2(const uint8* src_ptr, int src_stride,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Bilinear row filtering combines 4x2 -> 4x1. SSE2 version.
|
// Bilinear row filtering combines 4x2 -> 4x1. SSE2 version.
|
||||||
#define HAS_SCALEARGBFILTERROWS_SSE2
|
#ifndef SSE2_DISABLED
|
||||||
|
#define HAS_SCALEARGBFILTERROWS_SSE2_DISABLED
|
||||||
__declspec(naked) __declspec(align(16))
|
__declspec(naked) __declspec(align(16))
|
||||||
void ScaleARGBFilterRows_SSE2(uint8* dst_ptr, const uint8* src_ptr,
|
void ScaleARGBFilterRows_SSE2(uint8* dst_ptr, const uint8* src_ptr,
|
||||||
int src_stride, int dst_width,
|
int src_stride, int dst_width,
|
||||||
@ -267,6 +269,7 @@ void ScaleARGBFilterRows_SSE2(uint8* dst_ptr, const uint8* src_ptr,
|
|||||||
ret
|
ret
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#endif // SSE2_DISABLED
|
||||||
|
|
||||||
// Bilinear row filtering combines 4x2 -> 4x1. SSSE3 version.
|
// Bilinear row filtering combines 4x2 -> 4x1. SSSE3 version.
|
||||||
#define HAS_SCALEARGBFILTERROWS_SSSE3
|
#define HAS_SCALEARGBFILTERROWS_SSSE3
|
||||||
@ -497,8 +500,9 @@ static void ScaleARGBRowDownEvenInt_SSE2(const uint8* src_ptr, int src_stride,
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifndef SSE2_DISABLED
|
||||||
// Bilinear row filtering combines 4x2 -> 4x1. SSE2 version
|
// Bilinear row filtering combines 4x2 -> 4x1. SSE2 version
|
||||||
#define HAS_SCALEARGBFILTERROWS_SSE2
|
#define HAS_SCALEARGBFILTERROWS_SSE2_DISABLED
|
||||||
void ScaleARGBFilterRows_SSE2(uint8* dst_ptr, const uint8* src_ptr,
|
void ScaleARGBFilterRows_SSE2(uint8* dst_ptr, const uint8* src_ptr,
|
||||||
int src_stride, int dst_width,
|
int src_stride, int dst_width,
|
||||||
int source_y_fraction) {
|
int source_y_fraction) {
|
||||||
@ -567,6 +571,7 @@ void ScaleARGBFilterRows_SSE2(uint8* dst_ptr, const uint8* src_ptr,
|
|||||||
#endif
|
#endif
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
#endif // SSE2_DISABLED
|
||||||
|
|
||||||
// Bilinear row filtering combines 4x2 -> 4x1. SSSE3 version
|
// Bilinear row filtering combines 4x2 -> 4x1. SSSE3 version
|
||||||
#define HAS_SCALEARGBFILTERROWS_SSSE3
|
#define HAS_SCALEARGBFILTERROWS_SSSE3
|
||||||
@ -635,7 +640,7 @@ void ScaleARGBFilterRows_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
|
|||||||
#endif
|
#endif
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
#endif
|
#endif // defined(__x86_64__) || defined(__i386__)
|
||||||
|
|
||||||
static void ScaleARGBRowDown2_C(const uint8* src_ptr, int,
|
static void ScaleARGBRowDown2_C(const uint8* src_ptr, int,
|
||||||
uint8* dst_ptr, int dst_width) {
|
uint8* dst_ptr, int dst_width) {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user