odd width support for ARGBSobel functions. Improves performance for images that are not a multiple of 8 pixels.

BUG=444
TESTED=libyuvTest.ARGBSobel_Opt
R=harryjin@google.com

Review URL: https://webrtc-codereview.appspot.com/54589004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@1415 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
fbarchard@google.com 2015-05-28 22:22:28 +00:00
parent cfce47efc8
commit bd2d903e1b
5 changed files with 77 additions and 27 deletions

View File

@ -1,6 +1,6 @@
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 1416
Version: 1417
License: BSD
License File: LICENSE

View File

@ -1819,6 +1819,18 @@ void SobelXYRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
uint8* dst_argb, int width);
void SobelXYRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
uint8* dst_argb, int width);
void SobelRow_Any_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
uint8* dst_argb, int width);
void SobelRow_Any_NEON(const uint8* src_sobelx, const uint8* src_sobely,
uint8* dst_argb, int width);
void SobelToPlaneRow_Any_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
uint8* dst_y, int width);
void SobelToPlaneRow_Any_NEON(const uint8* src_sobelx, const uint8* src_sobely,
uint8* dst_y, int width);
void SobelXYRow_Any_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
uint8* dst_argb, int width);
void SobelXYRow_Any_NEON(const uint8* src_sobelx, const uint8* src_sobely,
uint8* dst_argb, int width);
void ARGBPolynomialRow_C(const uint8* src_argb,
uint8* dst_argb, const float* poly,

View File

@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1416
#define LIBYUV_VERSION 1417
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT

View File

@ -2095,13 +2095,19 @@ int ARGBSobel(const uint8* src_argb, int src_stride_argb,
void (*SobelRow)(const uint8* src_sobelx, const uint8* src_sobely,
uint8* dst_argb, int width) = SobelRow_C;
#if defined(HAS_SOBELROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16)) {
SobelRow = SobelRow_SSE2;
if (TestCpuFlag(kCpuHasSSE2)) {
SobelRow = SobelRow_Any_SSE2;
if (IS_ALIGNED(width, 16)) {
SobelRow = SobelRow_SSE2;
}
}
#endif
#if defined(HAS_SOBELROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
SobelRow = SobelRow_NEON;
if (TestCpuFlag(kCpuHasNEON)) {
SobelRow = SobelRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
SobelRow = SobelRow_NEON;
}
}
#endif
return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
@ -2116,13 +2122,19 @@ int ARGBSobelToPlane(const uint8* src_argb, int src_stride_argb,
void (*SobelToPlaneRow)(const uint8* src_sobelx, const uint8* src_sobely,
uint8* dst_, int width) = SobelToPlaneRow_C;
#if defined(HAS_SOBELTOPLANEROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16)) {
SobelToPlaneRow = SobelToPlaneRow_SSE2;
if (TestCpuFlag(kCpuHasSSE2)) {
SobelToPlaneRow = SobelToPlaneRow_Any_SSE2;
if (IS_ALIGNED(width, 16)) {
SobelToPlaneRow = SobelToPlaneRow_SSE2;
}
}
#endif
#if defined(HAS_SOBELTOPLANEROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
SobelToPlaneRow = SobelToPlaneRow_NEON;
if (TestCpuFlag(kCpuHasNEON)) {
SobelToPlaneRow = SobelToPlaneRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
SobelToPlaneRow = SobelToPlaneRow_NEON;
}
}
#endif
return ARGBSobelize(src_argb, src_stride_argb, dst_y, dst_stride_y,
@ -2138,13 +2150,19 @@ int ARGBSobelXY(const uint8* src_argb, int src_stride_argb,
void (*SobelXYRow)(const uint8* src_sobelx, const uint8* src_sobely,
uint8* dst_argb, int width) = SobelXYRow_C;
#if defined(HAS_SOBELXYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16)) {
SobelXYRow = SobelXYRow_SSE2;
if (TestCpuFlag(kCpuHasSSE2)) {
SobelXYRow = SobelXYRow_Any_SSE2;
if (IS_ALIGNED(width, 16)) {
SobelXYRow = SobelXYRow_SSE2;
}
}
#endif
#if defined(HAS_SOBELXYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
SobelXYRow = SobelXYRow_NEON;
if (TestCpuFlag(kCpuHasNEON)) {
SobelXYRow = SobelXYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
SobelXYRow = SobelXYRow_NEON;
}
}
#endif
return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,

View File

@ -558,51 +558,71 @@ MERGEUVROW_ANY(MergeUVRow_Any_NEON, MergeUVRow_NEON, MergeUVRow_C, 15)
#endif
#undef MERGEUVROW_ANY
#define MATHROW_ANY(NAMEANY, ARGBMATH_SIMD, ARGBMATH_C, MASK) \
#define MATHROW_ANY(NAMEANY, ARGBMATH_SIMD, ARGBMATH_C, SBPP, DBPP, MASK) \
void NAMEANY(const uint8* src_argb0, const uint8* src_argb1, \
uint8* dst_argb, int width) { \
int n = width & ~MASK; \
if (n > 0) { \
ARGBMATH_SIMD(src_argb0, src_argb1, dst_argb, n); \
} \
ARGBMATH_C(src_argb0 + n * 4, \
src_argb1 + n * 4, \
dst_argb + n * 4, \
ARGBMATH_C(src_argb0 + n * SBPP, \
src_argb1 + n * SBPP, \
dst_argb + n * DBPP, \
width & MASK); \
}
#ifdef HAS_ARGBMULTIPLYROW_SSE2
MATHROW_ANY(ARGBMultiplyRow_Any_SSE2, ARGBMultiplyRow_SSE2, ARGBMultiplyRow_C,
3)
4, 4, 3)
#endif
#ifdef HAS_ARGBADDROW_SSE2
MATHROW_ANY(ARGBAddRow_Any_SSE2, ARGBAddRow_SSE2, ARGBAddRow_C, 3)
MATHROW_ANY(ARGBAddRow_Any_SSE2, ARGBAddRow_SSE2, ARGBAddRow_C, 4, 4, 3)
#endif
#ifdef HAS_ARGBSUBTRACTROW_SSE2
MATHROW_ANY(ARGBSubtractRow_Any_SSE2, ARGBSubtractRow_SSE2, ARGBSubtractRow_C,
3)
4, 4, 3)
#endif
#ifdef HAS_ARGBMULTIPLYROW_AVX2
MATHROW_ANY(ARGBMultiplyRow_Any_AVX2, ARGBMultiplyRow_AVX2, ARGBMultiplyRow_C,
7)
4, 4, 7)
#endif
#ifdef HAS_ARGBADDROW_AVX2
MATHROW_ANY(ARGBAddRow_Any_AVX2, ARGBAddRow_AVX2, ARGBAddRow_C, 7)
MATHROW_ANY(ARGBAddRow_Any_AVX2, ARGBAddRow_AVX2, ARGBAddRow_C, 4, 4, 7)
#endif
#ifdef HAS_ARGBSUBTRACTROW_AVX2
MATHROW_ANY(ARGBSubtractRow_Any_AVX2, ARGBSubtractRow_AVX2, ARGBSubtractRow_C,
7)
4, 4, 7)
#endif
#ifdef HAS_ARGBMULTIPLYROW_NEON
MATHROW_ANY(ARGBMultiplyRow_Any_NEON, ARGBMultiplyRow_NEON, ARGBMultiplyRow_C,
7)
4, 4, 7)
#endif
#ifdef HAS_ARGBADDROW_NEON
MATHROW_ANY(ARGBAddRow_Any_NEON, ARGBAddRow_NEON, ARGBAddRow_C, 7)
MATHROW_ANY(ARGBAddRow_Any_NEON, ARGBAddRow_NEON, ARGBAddRow_C, 4, 4, 7)
#endif
#ifdef HAS_ARGBSUBTRACTROW_NEON
MATHROW_ANY(ARGBSubtractRow_Any_NEON, ARGBSubtractRow_NEON, ARGBSubtractRow_C,
7)
4, 4, 7)
#endif
#ifdef HAS_SOBELROW_SSE2
MATHROW_ANY(SobelRow_Any_SSE2, SobelRow_SSE2, SobelRow_C, 1, 4, 15)
#endif
#ifdef HAS_SOBELROW_NEON
MATHROW_ANY(SobelRow_Any_NEON, SobelRow_NEON, SobelRow_C, 1, 4, 7)
#endif
#ifdef HAS_SOBELTOPLANEROW_SSE2
MATHROW_ANY(SobelToPlaneRow_Any_SSE2, SobelToPlaneRow_SSE2, SobelToPlaneRow_C,
1, 1, 15)
#endif
#ifdef HAS_SOBELTOPLANEROW_NEON
MATHROW_ANY(SobelToPlaneRow_Any_NEON, SobelToPlaneRow_NEON, SobelToPlaneRow_C,
1, 1, 7)
#endif
#ifdef HAS_SOBELXYROW_SSE2
MATHROW_ANY(SobelXYRow_Any_SSE2, SobelXYRow_SSE2, SobelXYRow_C, 1, 4, 15)
#endif
#ifdef HAS_SOBELXYROW_NEON
MATHROW_ANY(SobelXYRow_Any_NEON, SobelXYRow_NEON, SobelXYRow_C, 1, 4, 7)
#endif
#undef MATHROW_ANY